qmd.js 139 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123
  1. #!/usr/bin/env node
  2. import { openDatabase } from "../db.js";
  3. import fastGlob from "fast-glob";
  4. import { execSync, spawn as nodeSpawn } from "child_process";
  5. import { fileURLToPath } from "url";
  6. import { dirname, join as pathJoin, relative as relativePath } from "path";
  7. import { parseArgs } from "util";
  8. import { readFileSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync } from "fs";
  9. import { createInterface } from "readline/promises";
  10. import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, structuredSearch, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_EMBED_MAX_BATCH_BYTES, DEFAULT_EMBED_MAX_DOCS_PER_BATCH, DEFAULT_RERANK_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, reindexCollection, generateEmbeddings, syncConfigToDb, } from "../store.js";
  11. import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js";
  12. import { formatSearchResults, formatDocuments, escapeXml, escapeCSV, } from "./formatter.js";
  13. import { getCollection as getCollectionFromYaml, listCollections as yamlListCollections, getDefaultCollectionNames, addContext as yamlAddContext, removeContext as yamlRemoveContext, removeCollection as yamlRemoveCollectionFn, renameCollection as yamlRenameCollectionFn, setGlobalContext, listAllContexts, setConfigIndexName, loadConfig, } from "../collections.js";
  14. import { getEmbeddedQmdSkillContent, getEmbeddedQmdSkillFiles } from "../embedded-skills.js";
  15. import { createEmbeddingProvider, resolveProviderKind, ModelMismatchError, } from "../embedding/index.js";
  16. // Enable production mode - allows using default database path
  17. // Tests must set INDEX_PATH or use createStore() with explicit path
  18. enableProductionMode();
  19. // =============================================================================
  20. // Store/DB lifecycle (no legacy singletons in store.ts)
  21. // =============================================================================
  22. let store = null;
  23. let storeDbPathOverride;
  24. function getStore() {
  25. if (!store) {
  26. store = createStore(storeDbPathOverride);
  27. // Sync YAML config into SQLite store_collections so store.ts reads from DB
  28. try {
  29. const config = loadConfig();
  30. syncConfigToDb(store.db, config);
  31. if (config.models) {
  32. setDefaultLlamaCpp(new LlamaCpp({
  33. embedModel: config.models.embed,
  34. generateModel: config.models.generate,
  35. rerankModel: config.models.rerank,
  36. }));
  37. }
  38. }
  39. catch {
  40. // Config may not exist yet — that's fine, DB works without it
  41. }
  42. }
  43. return store;
  44. }
  45. function getDb() {
  46. return getStore().db;
  47. }
  48. /** Re-sync YAML config into SQLite after CLI mutations (add/remove/rename collection, context changes) */
  49. function resyncConfig() {
  50. const s = getStore();
  51. try {
  52. const config = loadConfig();
  53. // Clear config hash to force re-sync
  54. s.db.prepare(`DELETE FROM store_config WHERE key = 'config_hash'`).run();
  55. syncConfigToDb(s.db, config);
  56. }
  57. catch {
  58. // Config may not exist — that's fine
  59. }
  60. }
  61. function closeDb() {
  62. if (store) {
  63. store.close();
  64. store = null;
  65. }
  66. }
  67. function getDbPath() {
  68. return store?.dbPath ?? storeDbPathOverride ?? getDefaultDbPath();
  69. }
  70. function setIndexName(name) {
  71. let normalizedName = name;
  72. // Normalize relative paths to prevent malformed database paths
  73. if (name && name.includes('/')) {
  74. const { resolve } = require('path');
  75. const { cwd } = require('process');
  76. const absolutePath = resolve(cwd(), name);
  77. // Replace path separators with underscores to create a valid filename
  78. normalizedName = absolutePath.replace(/\//g, '_').replace(/^_/, '');
  79. }
  80. storeDbPathOverride = normalizedName ? getDefaultDbPath(normalizedName) : undefined;
  81. // Reset open handle so next use opens the new index
  82. closeDb();
  83. }
  84. function ensureVecTable(_db, dimensions) {
  85. // Store owns the DB; ignore `_db` and ensure vec table on the active store
  86. getStore().ensureVecTable(dimensions);
  87. }
  88. // Terminal colors (respects NO_COLOR env)
  89. const useColor = !process.env.NO_COLOR && process.stdout.isTTY;
  90. const c = {
  91. reset: useColor ? "\x1b[0m" : "",
  92. dim: useColor ? "\x1b[2m" : "",
  93. bold: useColor ? "\x1b[1m" : "",
  94. cyan: useColor ? "\x1b[36m" : "",
  95. yellow: useColor ? "\x1b[33m" : "",
  96. green: useColor ? "\x1b[32m" : "",
  97. magenta: useColor ? "\x1b[35m" : "",
  98. blue: useColor ? "\x1b[34m" : "",
  99. red: useColor ? "\x1b[31m" : "",
  100. };
  101. // Terminal cursor control
  102. const cursor = {
  103. hide() { process.stderr.write('\x1b[?25l'); },
  104. show() { process.stderr.write('\x1b[?25h'); },
  105. };
  106. // Ensure cursor is restored on exit
  107. process.on('SIGINT', () => { cursor.show(); process.exit(130); });
  108. process.on('SIGTERM', () => { cursor.show(); process.exit(143); });
  109. // Terminal progress bar using OSC 9;4 escape sequence (TTY only)
  110. const isTTY = process.stderr.isTTY;
  111. const progress = {
  112. set(percent) {
  113. if (isTTY)
  114. process.stderr.write(`\x1b]9;4;1;${Math.round(percent)}\x07`);
  115. },
  116. clear() {
  117. if (isTTY)
  118. process.stderr.write(`\x1b]9;4;0\x07`);
  119. },
  120. indeterminate() {
  121. if (isTTY)
  122. process.stderr.write(`\x1b]9;4;3\x07`);
  123. },
  124. error() {
  125. if (isTTY)
  126. process.stderr.write(`\x1b]9;4;2\x07`);
  127. },
  128. };
  129. // Format seconds into human-readable ETA
  130. function formatETA(seconds) {
  131. if (seconds < 60)
  132. return `${Math.round(seconds)}s`;
  133. if (seconds < 3600)
  134. return `${Math.floor(seconds / 60)}m ${Math.round(seconds % 60)}s`;
  135. return `${Math.floor(seconds / 3600)}h ${Math.floor((seconds % 3600) / 60)}m`;
  136. }
  137. // Check index health and print warnings/tips
  138. function checkIndexHealth(db) {
  139. const { needsEmbedding, totalDocs, daysStale } = getIndexHealth(db);
  140. // Warn if many docs need embedding
  141. if (needsEmbedding > 0) {
  142. const pct = Math.round((needsEmbedding / totalDocs) * 100);
  143. if (pct >= 10) {
  144. process.stderr.write(`${c.yellow}Warning: ${needsEmbedding} documents (${pct}%) need embeddings. Run 'qmd embed' for better results.${c.reset}\n`);
  145. }
  146. else {
  147. process.stderr.write(`${c.dim}Tip: ${needsEmbedding} documents need embeddings. Run 'qmd embed' to index them.${c.reset}\n`);
  148. }
  149. }
  150. // Check if most recent document update is older than 2 weeks
  151. if (daysStale !== null && daysStale >= 14) {
  152. process.stderr.write(`${c.dim}Tip: Index last updated ${daysStale} days ago. Run 'qmd update' to refresh.${c.reset}\n`);
  153. }
  154. }
  155. // Compute unique display path for a document
  156. // Always include at least parent folder + filename, add more parent dirs until unique
  157. function computeDisplayPath(filepath, collectionPath, existingPaths) {
  158. // Get path relative to collection (include collection dir name)
  159. const collectionDir = collectionPath.replace(/\/$/, '');
  160. const collectionName = collectionDir.split('/').pop() || '';
  161. let relativePath;
  162. if (filepath.startsWith(collectionDir + '/')) {
  163. // filepath is under collection: use collection name + relative path
  164. relativePath = collectionName + filepath.slice(collectionDir.length);
  165. }
  166. else {
  167. // Fallback: just use the filepath
  168. relativePath = filepath;
  169. }
  170. const parts = relativePath.split('/').filter(p => p.length > 0);
  171. // Always include at least parent folder + filename (minimum 2 parts if available)
  172. // Then add more parent dirs until unique
  173. const minParts = Math.min(2, parts.length);
  174. for (let i = parts.length - minParts; i >= 0; i--) {
  175. const candidate = parts.slice(i).join('/');
  176. if (!existingPaths.has(candidate)) {
  177. return candidate;
  178. }
  179. }
  180. // Absolute fallback: use full path (should be unique)
  181. return filepath;
  182. }
  183. function formatTimeAgo(date) {
  184. const seconds = Math.floor((Date.now() - date.getTime()) / 1000);
  185. if (seconds < 60)
  186. return `${seconds}s ago`;
  187. const minutes = Math.floor(seconds / 60);
  188. if (minutes < 60)
  189. return `${minutes}m ago`;
  190. const hours = Math.floor(minutes / 60);
  191. if (hours < 24)
  192. return `${hours}h ago`;
  193. const days = Math.floor(hours / 24);
  194. return `${days}d ago`;
  195. }
  196. function formatMs(ms) {
  197. if (ms < 1000)
  198. return `${ms}ms`;
  199. return `${(ms / 1000).toFixed(1)}s`;
  200. }
  201. function formatBytes(bytes) {
  202. if (bytes < 1024)
  203. return `${bytes} B`;
  204. if (bytes < 1024 * 1024)
  205. return `${(bytes / 1024).toFixed(1)} KB`;
  206. if (bytes < 1024 * 1024 * 1024)
  207. return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
  208. return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
  209. }
  210. async function showStatus() {
  211. const dbPath = getDbPath();
  212. const db = getDb();
  213. // Collections are defined in YAML; no duplicate cleanup needed.
  214. // Collections are defined in YAML; no duplicate cleanup needed.
  215. // Index size
  216. let indexSize = 0;
  217. try {
  218. const stat = statSync(dbPath).size;
  219. indexSize = stat;
  220. }
  221. catch { }
  222. // Collections info (from YAML + database stats)
  223. const collections = listCollections(db);
  224. // Overall stats
  225. const totalDocs = db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get();
  226. const vectorCount = db.prepare(`SELECT COUNT(*) as count FROM content_vectors`).get();
  227. const needsEmbedding = getHashesNeedingEmbedding(db);
  228. // Most recent update across all collections
  229. const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get();
  230. console.log(`${c.bold}QMD Status${c.reset}\n`);
  231. console.log(`Index: ${dbPath}`);
  232. console.log(`Size: ${formatBytes(indexSize)}`);
  233. // MCP daemon status (check PID file liveness)
  234. const mcpCacheDir = process.env.XDG_CACHE_HOME
  235. ? resolve(process.env.XDG_CACHE_HOME, "qmd")
  236. : resolve(homedir(), ".cache", "qmd");
  237. const mcpPidPath = resolve(mcpCacheDir, "mcp.pid");
  238. if (existsSync(mcpPidPath)) {
  239. const mcpPid = parseInt(readFileSync(mcpPidPath, "utf-8").trim());
  240. try {
  241. process.kill(mcpPid, 0);
  242. console.log(`MCP: ${c.green}running${c.reset} (PID ${mcpPid})`);
  243. }
  244. catch {
  245. unlinkSync(mcpPidPath);
  246. // Stale PID file cleaned up silently
  247. }
  248. }
  249. console.log("");
  250. console.log(`${c.bold}Documents${c.reset}`);
  251. console.log(` Total: ${totalDocs.count} files indexed`);
  252. console.log(` Vectors: ${vectorCount.count} embedded`);
  253. if (needsEmbedding > 0) {
  254. console.log(` ${c.yellow}Pending: ${needsEmbedding} need embedding${c.reset} (run 'qmd embed')`);
  255. }
  256. if (mostRecent.latest) {
  257. const lastUpdate = new Date(mostRecent.latest);
  258. console.log(` Updated: ${formatTimeAgo(lastUpdate)}`);
  259. }
  260. // Get all contexts grouped by collection (from YAML)
  261. const allContexts = listAllContexts();
  262. const contextsByCollection = new Map();
  263. for (const ctx of allContexts) {
  264. // Group contexts by collection name
  265. if (!contextsByCollection.has(ctx.collection)) {
  266. contextsByCollection.set(ctx.collection, []);
  267. }
  268. contextsByCollection.get(ctx.collection).push({
  269. path_prefix: ctx.path,
  270. context: ctx.context
  271. });
  272. }
  273. // AST chunking status
  274. try {
  275. const { getASTStatus } = await import("../ast.js");
  276. const ast = await getASTStatus();
  277. console.log(`\n${c.bold}AST Chunking${c.reset}`);
  278. if (ast.available) {
  279. const ok = ast.languages.filter(l => l.available).map(l => l.language);
  280. const fail = ast.languages.filter(l => !l.available);
  281. console.log(` Status: ${c.green}active${c.reset}`);
  282. console.log(` Languages: ${ok.join(", ")}`);
  283. if (fail.length > 0) {
  284. for (const f of fail) {
  285. console.log(` ${c.yellow}Unavailable: ${f.language} (${f.error})${c.reset}`);
  286. }
  287. }
  288. }
  289. else {
  290. console.log(` Status: ${c.yellow}unavailable${c.reset} (falling back to regex chunking)`);
  291. for (const l of ast.languages) {
  292. if (l.error)
  293. console.log(` ${c.dim}${l.language}: ${l.error}${c.reset}`);
  294. }
  295. }
  296. }
  297. catch {
  298. console.log(`\n${c.bold}AST Chunking${c.reset}`);
  299. console.log(` Status: ${c.dim}not available${c.reset}`);
  300. }
  301. if (collections.length > 0) {
  302. console.log(`\n${c.bold}Collections${c.reset}`);
  303. for (const col of collections) {
  304. const lastMod = col.last_modified ? formatTimeAgo(new Date(col.last_modified)) : "never";
  305. const contexts = contextsByCollection.get(col.name) || [];
  306. console.log(` ${c.cyan}${col.name}${c.reset} ${c.dim}(qmd://${col.name}/)${c.reset}`);
  307. console.log(` ${c.dim}Pattern:${c.reset} ${col.glob_pattern}`);
  308. console.log(` ${c.dim}Files:${c.reset} ${col.active_count} (updated ${lastMod})`);
  309. if (contexts.length > 0) {
  310. console.log(` ${c.dim}Contexts:${c.reset} ${contexts.length}`);
  311. for (const ctx of contexts) {
  312. // Handle both empty string and '/' as root context
  313. const pathDisplay = (ctx.path_prefix === '' || ctx.path_prefix === '/') ? '/' : `/${ctx.path_prefix}`;
  314. const contextPreview = ctx.context.length > 60
  315. ? ctx.context.substring(0, 57) + '...'
  316. : ctx.context;
  317. console.log(` ${c.dim}${pathDisplay}:${c.reset} ${contextPreview}`);
  318. }
  319. }
  320. }
  321. // Show examples of virtual paths
  322. console.log(`\n${c.bold}Examples${c.reset}`);
  323. console.log(` ${c.dim}# List files in a collection${c.reset}`);
  324. if (collections.length > 0 && collections[0]) {
  325. console.log(` qmd ls ${collections[0].name}`);
  326. }
  327. console.log(` ${c.dim}# Get a document${c.reset}`);
  328. if (collections.length > 0 && collections[0]) {
  329. console.log(` qmd get qmd://${collections[0].name}/path/to/file.md`);
  330. }
  331. console.log(` ${c.dim}# Search within a collection${c.reset}`);
  332. if (collections.length > 0 && collections[0]) {
  333. console.log(` qmd search "query" -c ${collections[0].name}`);
  334. }
  335. }
  336. else {
  337. console.log(`\n${c.dim}No collections. Run 'qmd collection add .' to index markdown files.${c.reset}`);
  338. }
  339. // Models
  340. {
  341. // hf:org/repo/file.gguf → https://huggingface.co/org/repo
  342. const hfLink = (uri) => {
  343. const match = uri.match(/^hf:([^/]+\/[^/]+)\//);
  344. return match ? `https://huggingface.co/${match[1]}` : uri;
  345. };
  346. console.log(`\n${c.bold}Models${c.reset}`);
  347. console.log(` Embedding: ${hfLink(DEFAULT_EMBED_MODEL_URI)}`);
  348. console.log(` Reranking: ${hfLink(DEFAULT_RERANK_MODEL_URI)}`);
  349. console.log(` Generation: ${hfLink(DEFAULT_GENERATE_MODEL_URI)}`);
  350. }
  351. // Device / GPU info
  352. try {
  353. const llm = getDefaultLlamaCpp();
  354. const device = await llm.getDeviceInfo();
  355. console.log(`\n${c.bold}Device${c.reset}`);
  356. if (device.gpu) {
  357. console.log(` GPU: ${c.green}${device.gpu}${c.reset} (offloading: ${device.gpuOffloading ? 'yes' : 'no'})`);
  358. if (device.gpuDevices.length > 0) {
  359. // Deduplicate and count GPUs
  360. const counts = new Map();
  361. for (const name of device.gpuDevices) {
  362. counts.set(name, (counts.get(name) || 0) + 1);
  363. }
  364. const deviceStr = Array.from(counts.entries())
  365. .map(([name, count]) => count > 1 ? `${count}× ${name}` : name)
  366. .join(', ');
  367. console.log(` Devices: ${deviceStr}`);
  368. }
  369. if (device.vram) {
  370. console.log(` VRAM: ${formatBytes(device.vram.free)} free / ${formatBytes(device.vram.total)} total`);
  371. }
  372. }
  373. else {
  374. console.log(` GPU: ${c.yellow}none${c.reset} (running on CPU — models will be slow)`);
  375. console.log(` ${c.dim}Tip: Install CUDA, Vulkan, or Metal support for GPU acceleration.${c.reset}`);
  376. }
  377. console.log(` CPU: ${device.cpuCores} math cores`);
  378. }
  379. catch {
  380. // Don't fail status if LLM init fails
  381. }
  382. // Tips section
  383. const tips = [];
  384. // Check for collections without context
  385. const collectionsWithoutContext = collections.filter(col => {
  386. const contexts = contextsByCollection.get(col.name) || [];
  387. return contexts.length === 0;
  388. });
  389. if (collectionsWithoutContext.length > 0) {
  390. const names = collectionsWithoutContext.map(c => c.name).slice(0, 3).join(', ');
  391. const more = collectionsWithoutContext.length > 3 ? ` +${collectionsWithoutContext.length - 3} more` : '';
  392. tips.push(`Add context to collections for better search results: ${names}${more}`);
  393. tips.push(` ${c.dim}qmd context add qmd://<name>/ "What this collection contains"${c.reset}`);
  394. tips.push(` ${c.dim}qmd context add qmd://<name>/meeting-notes "Weekly team meeting notes"${c.reset}`);
  395. }
  396. // Check for collections without update commands
  397. const collectionsWithoutUpdate = collections.filter(col => {
  398. const yamlCol = getCollectionFromYaml(col.name);
  399. return !yamlCol?.update;
  400. });
  401. if (collectionsWithoutUpdate.length > 0 && collections.length > 1) {
  402. const names = collectionsWithoutUpdate.map(c => c.name).slice(0, 3).join(', ');
  403. const more = collectionsWithoutUpdate.length > 3 ? ` +${collectionsWithoutUpdate.length - 3} more` : '';
  404. tips.push(`Add update commands to keep collections fresh: ${names}${more}`);
  405. tips.push(` ${c.dim}qmd collection update-cmd <name> 'git stash && git pull --rebase --ff-only && git stash pop'${c.reset}`);
  406. }
  407. if (tips.length > 0) {
  408. console.log(`\n${c.bold}Tips${c.reset}`);
  409. for (const tip of tips) {
  410. console.log(` ${tip}`);
  411. }
  412. }
  413. closeDb();
  414. }
  415. async function updateCollections(collectionFilter) {
  416. const db = getDb();
  417. const storeInstance = getStore();
  418. // Collections are defined in YAML; no duplicate cleanup needed.
  419. // Clear Ollama cache on update
  420. clearCache(db);
  421. const allCollections = listCollections(db);
  422. if (allCollections.length === 0) {
  423. console.log(`${c.dim}No collections found. Run 'qmd collection add .' to index markdown files.${c.reset}`);
  424. closeDb();
  425. return;
  426. }
  427. // i-ofojj7dy — when a positional collection name is supplied, filter to just
  428. // that collection. Validate against the known list and exit non-zero on miss
  429. // (no silent full-fleet fallback). Empty filter = full-fleet (legacy).
  430. let collections = allCollections;
  431. if (collectionFilter !== undefined) {
  432. const match = allCollections.find(col => col.name === collectionFilter);
  433. if (!match) {
  434. const known = allCollections.map(c => c.name).sort().join(", ");
  435. console.error(`${c.red}Collection not found: "${collectionFilter}"${c.reset}`);
  436. console.error(`${c.dim}Available collections: ${known || "(none)"}${c.reset}`);
  437. console.error(`${c.dim}Run 'qmd update --all' (or 'qmd update' with no args) to process every collection.${c.reset}`);
  438. closeDb();
  439. process.exit(1);
  440. }
  441. collections = [match];
  442. }
  443. console.log(`${c.bold}Updating ${collections.length} collection(s)...${c.reset}\n`);
  444. for (let i = 0; i < collections.length; i++) {
  445. const col = collections[i];
  446. if (!col)
  447. continue;
  448. console.log(`${c.cyan}[${i + 1}/${collections.length}]${c.reset} ${c.bold}${col.name}${c.reset} ${c.dim}(${col.glob_pattern})${c.reset}`);
  449. // Execute custom update command if specified in YAML
  450. const yamlCol = getCollectionFromYaml(col.name);
  451. if (yamlCol?.update) {
  452. console.log(`${c.dim} Running update command: ${yamlCol.update}${c.reset}`);
  453. try {
  454. const proc = nodeSpawn("bash", ["-c", yamlCol.update], {
  455. cwd: col.pwd,
  456. stdio: ["ignore", "pipe", "pipe"],
  457. });
  458. const [output, errorOutput, exitCode] = await new Promise((resolve, reject) => {
  459. let out = "";
  460. let err = "";
  461. proc.stdout?.on("data", (d) => { out += d.toString(); });
  462. proc.stderr?.on("data", (d) => { err += d.toString(); });
  463. proc.on("error", reject);
  464. proc.on("close", (code) => resolve([out, err, code ?? 1]));
  465. });
  466. if (output.trim()) {
  467. console.log(output.trim().split('\n').map(l => ` ${l}`).join('\n'));
  468. }
  469. if (errorOutput.trim()) {
  470. console.log(errorOutput.trim().split('\n').map(l => ` ${l}`).join('\n'));
  471. }
  472. if (exitCode !== 0) {
  473. console.log(`${c.yellow}✗ Update command failed with exit code ${exitCode}${c.reset}`);
  474. process.exit(exitCode);
  475. }
  476. }
  477. catch (err) {
  478. console.log(`${c.yellow}✗ Update command failed: ${err}${c.reset}`);
  479. process.exit(1);
  480. }
  481. }
  482. const startTime = Date.now();
  483. console.log(`Collection: ${col.pwd} (${col.glob_pattern})`);
  484. progress.indeterminate();
  485. const result = await reindexCollection(storeInstance, col.pwd, col.glob_pattern, col.name, {
  486. ignorePatterns: yamlCol?.ignore,
  487. onProgress: (info) => {
  488. progress.set((info.current / info.total) * 100);
  489. const elapsed = (Date.now() - startTime) / 1000;
  490. const rate = info.current / elapsed;
  491. const remaining = (info.total - info.current) / rate;
  492. const eta = info.current > 2 ? ` ETA: ${formatETA(remaining)}` : "";
  493. if (isTTY)
  494. process.stderr.write(`\rIndexing: ${info.current}/${info.total}${eta} `);
  495. },
  496. });
  497. progress.clear();
  498. console.log(`\nIndexed: ${result.indexed} new, ${result.updated} updated, ${result.unchanged} unchanged, ${result.removed} removed`);
  499. if (result.orphanedCleaned > 0) {
  500. console.log(`Cleaned up ${result.orphanedCleaned} orphaned content hash(es)`);
  501. }
  502. console.log("");
  503. }
  504. // Check if any documents need embedding (show once at end)
  505. const needsEmbedding = getHashesNeedingEmbedding(db);
  506. closeDb();
  507. console.log(`${c.green}✓ All collections updated.${c.reset}`);
  508. if (needsEmbedding > 0) {
  509. console.log(`\nRun 'qmd embed' to update embeddings (${needsEmbedding} unique hashes need vectors)`);
  510. }
  511. }
  512. /**
  513. * Detect which collection (if any) contains the given filesystem path.
  514. * Returns { collectionId, collectionName, relativePath } or null if not in any collection.
  515. */
  516. function detectCollectionFromPath(db, fsPath) {
  517. const realPath = getRealPath(fsPath);
  518. // Find collections that this path is under from YAML
  519. const allCollections = yamlListCollections();
  520. // Find longest matching path
  521. let bestMatch = null;
  522. for (const coll of allCollections) {
  523. if (realPath.startsWith(coll.path + '/') || realPath === coll.path) {
  524. if (!bestMatch || coll.path.length > bestMatch.path.length) {
  525. bestMatch = { name: coll.name, path: coll.path };
  526. }
  527. }
  528. }
  529. if (!bestMatch)
  530. return null;
  531. // Calculate relative path
  532. let relativePath = realPath;
  533. if (relativePath.startsWith(bestMatch.path + '/')) {
  534. relativePath = relativePath.slice(bestMatch.path.length + 1);
  535. }
  536. else if (relativePath === bestMatch.path) {
  537. relativePath = '';
  538. }
  539. return {
  540. collectionName: bestMatch.name,
  541. relativePath
  542. };
  543. }
  544. async function contextAdd(pathArg, contextText) {
  545. const db = getDb();
  546. // Handle "/" as global context (applies to all collections)
  547. if (pathArg === '/') {
  548. setGlobalContext(contextText);
  549. resyncConfig();
  550. console.log(`${c.green}✓${c.reset} Set global context`);
  551. console.log(`${c.dim}Context: ${contextText}${c.reset}`);
  552. closeDb();
  553. return;
  554. }
  555. // Resolve path - defaults to current directory if not provided
  556. let fsPath = pathArg || '.';
  557. if (fsPath === '.' || fsPath === './') {
  558. fsPath = getPwd();
  559. }
  560. else if (fsPath.startsWith('~/')) {
  561. fsPath = homedir() + fsPath.slice(1);
  562. }
  563. else if (!fsPath.startsWith('/') && !fsPath.startsWith('qmd://')) {
  564. fsPath = resolve(getPwd(), fsPath);
  565. }
  566. // Handle virtual paths (qmd://collection/path)
  567. if (isVirtualPath(fsPath)) {
  568. const parsed = parseVirtualPath(fsPath);
  569. if (!parsed) {
  570. console.error(`${c.yellow}Invalid virtual path: ${fsPath}${c.reset}`);
  571. process.exit(1);
  572. }
  573. const coll = getCollectionFromYaml(parsed.collectionName);
  574. if (!coll) {
  575. console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`);
  576. process.exit(1);
  577. }
  578. yamlAddContext(parsed.collectionName, parsed.path, contextText);
  579. resyncConfig();
  580. const displayPath = parsed.path
  581. ? `qmd://${parsed.collectionName}/${parsed.path}`
  582. : `qmd://${parsed.collectionName}/ (collection root)`;
  583. console.log(`${c.green}✓${c.reset} Added context for: ${displayPath}`);
  584. console.log(`${c.dim}Context: ${contextText}${c.reset}`);
  585. closeDb();
  586. return;
  587. }
  588. // Detect collection from filesystem path
  589. const detected = detectCollectionFromPath(db, fsPath);
  590. if (!detected) {
  591. console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`);
  592. console.error(`${c.dim}Run 'qmd status' to see indexed collections${c.reset}`);
  593. process.exit(1);
  594. }
  595. yamlAddContext(detected.collectionName, detected.relativePath, contextText);
  596. resyncConfig();
  597. const displayPath = detected.relativePath ? `qmd://${detected.collectionName}/${detected.relativePath}` : `qmd://${detected.collectionName}/`;
  598. console.log(`${c.green}✓${c.reset} Added context for: ${displayPath}`);
  599. console.log(`${c.dim}Context: ${contextText}${c.reset}`);
  600. closeDb();
  601. }
  602. function contextList() {
  603. const db = getDb();
  604. const allContexts = listAllContexts();
  605. if (allContexts.length === 0) {
  606. console.log(`${c.dim}No contexts configured. Use 'qmd context add' to add one.${c.reset}`);
  607. closeDb();
  608. return;
  609. }
  610. console.log(`\n${c.bold}Configured Contexts${c.reset}\n`);
  611. let lastCollection = '';
  612. for (const ctx of allContexts) {
  613. if (ctx.collection !== lastCollection) {
  614. console.log(`${c.cyan}${ctx.collection}${c.reset}`);
  615. lastCollection = ctx.collection;
  616. }
  617. const displayPath = ctx.path ? ` ${ctx.path}` : ' / (root)';
  618. console.log(`${displayPath}`);
  619. console.log(` ${c.dim}${ctx.context}${c.reset}`);
  620. }
  621. closeDb();
  622. }
  623. function contextRemove(pathArg) {
  624. if (pathArg === '/') {
  625. // Remove global context
  626. setGlobalContext(undefined);
  627. // Resync so SQLite store_config is updated
  628. const s = getStore();
  629. resyncConfig();
  630. closeDb();
  631. console.log(`${c.green}✓${c.reset} Removed global context`);
  632. return;
  633. }
  634. // Handle virtual paths
  635. if (isVirtualPath(pathArg)) {
  636. const parsed = parseVirtualPath(pathArg);
  637. if (!parsed) {
  638. console.error(`${c.yellow}Invalid virtual path: ${pathArg}${c.reset}`);
  639. process.exit(1);
  640. }
  641. const coll = getCollectionFromYaml(parsed.collectionName);
  642. if (!coll) {
  643. console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`);
  644. process.exit(1);
  645. }
  646. const success = yamlRemoveContext(coll.name, parsed.path);
  647. if (!success) {
  648. console.error(`${c.yellow}No context found for: ${pathArg}${c.reset}`);
  649. process.exit(1);
  650. }
  651. console.log(`${c.green}✓${c.reset} Removed context for: ${pathArg}`);
  652. return;
  653. }
  654. // Handle filesystem paths
  655. let fsPath = pathArg;
  656. if (fsPath === '.' || fsPath === './') {
  657. fsPath = getPwd();
  658. }
  659. else if (fsPath.startsWith('~/')) {
  660. fsPath = homedir() + fsPath.slice(1);
  661. }
  662. else if (!fsPath.startsWith('/')) {
  663. fsPath = resolve(getPwd(), fsPath);
  664. }
  665. const db = getDb();
  666. const detected = detectCollectionFromPath(db, fsPath);
  667. closeDb();
  668. if (!detected) {
  669. console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`);
  670. process.exit(1);
  671. }
  672. const success = yamlRemoveContext(detected.collectionName, detected.relativePath);
  673. if (!success) {
  674. console.error(`${c.yellow}No context found for: qmd://${detected.collectionName}/${detected.relativePath}${c.reset}`);
  675. process.exit(1);
  676. }
  677. console.log(`${c.green}✓${c.reset} Removed context for: qmd://${detected.collectionName}/${detected.relativePath}`);
  678. }
  679. function getDocument(filename, fromLine, maxLines, lineNumbers) {
  680. const db = getDb();
  681. // Parse :linenum suffix from filename (e.g., "file.md:100")
  682. let inputPath = filename;
  683. const colonMatch = inputPath.match(/:(\d+)$/);
  684. if (colonMatch && !fromLine) {
  685. const matched = colonMatch[1];
  686. if (matched) {
  687. fromLine = parseInt(matched, 10);
  688. inputPath = inputPath.slice(0, -colonMatch[0].length);
  689. }
  690. }
  691. // Handle docid lookup (#abc123, abc123, "#abc123", "abc123", etc.)
  692. if (isDocid(inputPath)) {
  693. const docidMatch = findDocumentByDocid(db, inputPath);
  694. if (docidMatch) {
  695. inputPath = docidMatch.filepath;
  696. }
  697. else {
  698. console.error(`Document not found: ${filename}`);
  699. closeDb();
  700. process.exit(1);
  701. }
  702. }
  703. let doc = null;
  704. let virtualPath;
  705. // Handle virtual paths (qmd://collection/path)
  706. if (isVirtualPath(inputPath)) {
  707. const parsed = parseVirtualPath(inputPath);
  708. if (!parsed) {
  709. console.error(`Invalid virtual path: ${inputPath}`);
  710. closeDb();
  711. process.exit(1);
  712. }
  713. // Try exact match on collection + path
  714. doc = db.prepare(`
  715. SELECT d.collection as collectionName, d.path, content.doc as body
  716. FROM documents d
  717. JOIN content ON content.hash = d.hash
  718. WHERE d.collection = ? AND d.path = ? AND d.active = 1
  719. `).get(parsed.collectionName, parsed.path);
  720. if (!doc) {
  721. // Try fuzzy match by path ending
  722. doc = db.prepare(`
  723. SELECT d.collection as collectionName, d.path, content.doc as body
  724. FROM documents d
  725. JOIN content ON content.hash = d.hash
  726. WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
  727. LIMIT 1
  728. `).get(parsed.collectionName, `%${parsed.path}`);
  729. }
  730. virtualPath = inputPath;
  731. }
  732. else {
  733. // Try to interpret as collection/path format first (before filesystem path)
  734. // If path is relative (no / or ~ prefix), check if first component is a collection name
  735. if (!inputPath.startsWith('/') && !inputPath.startsWith('~')) {
  736. const parts = inputPath.split('/');
  737. if (parts.length >= 2) {
  738. const possibleCollection = parts[0];
  739. const possiblePath = parts.slice(1).join('/');
  740. // Check if this collection exists
  741. const collExists = possibleCollection ? db.prepare(`
  742. SELECT 1 FROM documents WHERE collection = ? AND active = 1 LIMIT 1
  743. `).get(possibleCollection) : null;
  744. if (collExists) {
  745. // Try exact match on collection + path
  746. doc = db.prepare(`
  747. SELECT d.collection as collectionName, d.path, content.doc as body
  748. FROM documents d
  749. JOIN content ON content.hash = d.hash
  750. WHERE d.collection = ? AND d.path = ? AND d.active = 1
  751. `).get(possibleCollection || "", possiblePath || "");
  752. if (!doc) {
  753. // Try fuzzy match by path ending
  754. doc = db.prepare(`
  755. SELECT d.collection as collectionName, d.path, content.doc as body
  756. FROM documents d
  757. JOIN content ON content.hash = d.hash
  758. WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
  759. LIMIT 1
  760. `).get(possibleCollection || "", `%${possiblePath}`);
  761. }
  762. if (doc) {
  763. virtualPath = buildVirtualPath(doc.collectionName, doc.path);
  764. // Skip the filesystem path handling below
  765. }
  766. }
  767. }
  768. }
  769. // If not found as collection/path, handle as filesystem paths
  770. if (!doc) {
  771. let fsPath = inputPath;
  772. // Expand ~ to home directory
  773. if (fsPath.startsWith('~/')) {
  774. fsPath = homedir() + fsPath.slice(1);
  775. }
  776. else if (!fsPath.startsWith('/')) {
  777. // Relative path - resolve from current directory
  778. fsPath = resolve(getPwd(), fsPath);
  779. }
  780. fsPath = getRealPath(fsPath);
  781. // Try to detect which collection contains this path
  782. const detected = detectCollectionFromPath(db, fsPath);
  783. if (detected) {
  784. // Found collection - query by collection name + relative path
  785. doc = db.prepare(`
  786. SELECT d.collection as collectionName, d.path, content.doc as body
  787. FROM documents d
  788. JOIN content ON content.hash = d.hash
  789. WHERE d.collection = ? AND d.path = ? AND d.active = 1
  790. `).get(detected.collectionName, detected.relativePath);
  791. }
  792. // Fuzzy match by filename (last component of path)
  793. if (!doc) {
  794. const filename = inputPath.split('/').pop() || inputPath;
  795. doc = db.prepare(`
  796. SELECT d.collection as collectionName, d.path, content.doc as body
  797. FROM documents d
  798. JOIN content ON content.hash = d.hash
  799. WHERE d.path LIKE ? AND d.active = 1
  800. LIMIT 1
  801. `).get(`%${filename}`);
  802. }
  803. if (doc) {
  804. virtualPath = buildVirtualPath(doc.collectionName, doc.path);
  805. }
  806. else {
  807. virtualPath = inputPath;
  808. }
  809. }
  810. }
  811. // Ensure doc is not null before proceeding
  812. if (!doc) {
  813. console.error(`Document not found: ${filename}`);
  814. closeDb();
  815. process.exit(1);
  816. }
  817. // Get context for this file
  818. const context = getContextForPath(db, doc.collectionName, doc.path);
  819. let output = doc.body;
  820. const startLine = fromLine || 1;
  821. // Apply line filtering if specified
  822. if (fromLine !== undefined || maxLines !== undefined) {
  823. const lines = output.split('\n');
  824. const start = startLine - 1; // Convert to 0-indexed
  825. const end = maxLines !== undefined ? start + maxLines : lines.length;
  826. output = lines.slice(start, end).join('\n');
  827. }
  828. // Add line numbers if requested
  829. if (lineNumbers) {
  830. output = addLineNumbers(output, startLine);
  831. }
  832. // Output context header if exists
  833. if (context) {
  834. console.log(`Folder Context: ${context}\n---\n`);
  835. }
  836. console.log(output);
  837. closeDb();
  838. }
  839. // Multi-get: fetch multiple documents by glob pattern or comma-separated list
  840. function multiGet(pattern, maxLines, maxBytes = DEFAULT_MULTI_GET_MAX_BYTES, format = "cli") {
  841. const db = getDb();
  842. // Check if it's a comma-separated list or a glob pattern
  843. const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?') && !pattern.includes('{');
  844. let files;
  845. if (isCommaSeparated) {
  846. // Comma-separated list of files (can be virtual paths or relative paths)
  847. const names = pattern.split(',').map(s => s.trim()).filter(Boolean);
  848. files = [];
  849. for (const name of names) {
  850. let doc = null;
  851. // Handle virtual paths
  852. if (isVirtualPath(name)) {
  853. const parsed = parseVirtualPath(name);
  854. if (parsed) {
  855. // Try exact match on collection + path
  856. doc = db.prepare(`
  857. SELECT
  858. 'qmd://' || d.collection || '/' || d.path as virtual_path,
  859. LENGTH(content.doc) as body_length,
  860. d.collection,
  861. d.path
  862. FROM documents d
  863. JOIN content ON content.hash = d.hash
  864. WHERE d.collection = ? AND d.path = ? AND d.active = 1
  865. `).get(parsed.collectionName, parsed.path);
  866. }
  867. }
  868. else {
  869. // Try exact match on path
  870. doc = db.prepare(`
  871. SELECT
  872. 'qmd://' || d.collection || '/' || d.path as virtual_path,
  873. LENGTH(content.doc) as body_length,
  874. d.collection,
  875. d.path
  876. FROM documents d
  877. JOIN content ON content.hash = d.hash
  878. WHERE d.path = ? AND d.active = 1
  879. LIMIT 1
  880. `).get(name);
  881. // Try suffix match
  882. if (!doc) {
  883. doc = db.prepare(`
  884. SELECT
  885. 'qmd://' || d.collection || '/' || d.path as virtual_path,
  886. LENGTH(content.doc) as body_length,
  887. d.collection,
  888. d.path
  889. FROM documents d
  890. JOIN content ON content.hash = d.hash
  891. WHERE d.path LIKE ? AND d.active = 1
  892. LIMIT 1
  893. `).get(`%${name}`);
  894. }
  895. }
  896. if (doc) {
  897. files.push({
  898. filepath: doc.virtual_path,
  899. displayPath: doc.virtual_path,
  900. bodyLength: doc.body_length,
  901. collection: doc.collection,
  902. path: doc.path
  903. });
  904. }
  905. else {
  906. console.error(`File not found: ${name}`);
  907. }
  908. }
  909. }
  910. else {
  911. // Glob pattern - matchFilesByGlob now returns virtual paths
  912. files = matchFilesByGlob(db, pattern).map(f => ({
  913. ...f,
  914. collection: undefined, // Will be fetched later if needed
  915. path: undefined
  916. }));
  917. if (files.length === 0) {
  918. console.error(`No files matched pattern: ${pattern}`);
  919. closeDb();
  920. process.exit(1);
  921. }
  922. }
  923. // Collect results for structured output
  924. const results = [];
  925. for (const file of files) {
  926. // Parse virtual path to get collection info if not already available
  927. let collection = file.collection;
  928. let path = file.path;
  929. if (!collection || !path) {
  930. const parsed = parseVirtualPath(file.filepath);
  931. if (parsed) {
  932. collection = parsed.collectionName;
  933. path = parsed.path;
  934. }
  935. }
  936. // Get context using collection-scoped function
  937. const context = collection && path ? getContextForPath(db, collection, path) : null;
  938. // Check size limit
  939. if (file.bodyLength > maxBytes) {
  940. results.push({
  941. file: file.filepath,
  942. displayPath: file.displayPath,
  943. title: file.displayPath.split('/').pop() || file.displayPath,
  944. body: "",
  945. context,
  946. skipped: true,
  947. skipReason: `File too large (${Math.round(file.bodyLength / 1024)}KB > ${Math.round(maxBytes / 1024)}KB). Use 'qmd get ${file.displayPath}' to retrieve.`,
  948. });
  949. continue;
  950. }
  951. // Fetch document content using collection and path
  952. if (!collection || !path)
  953. continue;
  954. const doc = db.prepare(`
  955. SELECT content.doc as body, d.title
  956. FROM documents d
  957. JOIN content ON content.hash = d.hash
  958. WHERE d.collection = ? AND d.path = ? AND d.active = 1
  959. `).get(collection, path);
  960. if (!doc)
  961. continue;
  962. let body = doc.body;
  963. // Apply line limit if specified
  964. if (maxLines !== undefined) {
  965. const lines = body.split('\n');
  966. body = lines.slice(0, maxLines).join('\n');
  967. if (lines.length > maxLines) {
  968. body += `\n\n[... truncated ${lines.length - maxLines} more lines]`;
  969. }
  970. }
  971. results.push({
  972. file: file.filepath,
  973. displayPath: file.displayPath,
  974. title: doc.title || file.displayPath.split('/').pop() || file.displayPath,
  975. body,
  976. context,
  977. skipped: false,
  978. });
  979. }
  980. closeDb();
  981. // Output based on format
  982. if (format === "json") {
  983. const output = results.map(r => ({
  984. file: r.displayPath,
  985. title: r.title,
  986. ...(r.context && { context: r.context }),
  987. ...(r.skipped ? { skipped: true, reason: r.skipReason } : { body: r.body }),
  988. }));
  989. console.log(JSON.stringify(output, null, 2));
  990. }
  991. else if (format === "csv") {
  992. const escapeField = (val) => {
  993. if (val === null || val === undefined)
  994. return "";
  995. const str = String(val);
  996. if (str.includes(",") || str.includes('"') || str.includes("\n")) {
  997. return `"${str.replace(/"/g, '""')}"`;
  998. }
  999. return str;
  1000. };
  1001. console.log("file,title,context,skipped,body");
  1002. for (const r of results) {
  1003. console.log([r.displayPath, r.title, r.context, r.skipped ? "true" : "false", r.skipped ? r.skipReason : r.body].map(escapeField).join(","));
  1004. }
  1005. }
  1006. else if (format === "files") {
  1007. for (const r of results) {
  1008. const ctx = r.context ? `,"${r.context.replace(/"/g, '""')}"` : "";
  1009. const status = r.skipped ? "[SKIPPED]" : "";
  1010. console.log(`${r.displayPath}${ctx}${status ? `,${status}` : ""}`);
  1011. }
  1012. }
  1013. else if (format === "md") {
  1014. for (const r of results) {
  1015. console.log(`## ${r.displayPath}\n`);
  1016. if (r.title && r.title !== r.displayPath)
  1017. console.log(`**Title:** ${r.title}\n`);
  1018. if (r.context)
  1019. console.log(`**Context:** ${r.context}\n`);
  1020. if (r.skipped) {
  1021. console.log(`> ${r.skipReason}\n`);
  1022. }
  1023. else {
  1024. console.log("```");
  1025. console.log(r.body);
  1026. console.log("```\n");
  1027. }
  1028. }
  1029. }
  1030. else if (format === "xml") {
  1031. console.log('<?xml version="1.0" encoding="UTF-8"?>');
  1032. console.log("<documents>");
  1033. for (const r of results) {
  1034. console.log(" <document>");
  1035. console.log(` <file>${escapeXml(r.displayPath)}</file>`);
  1036. console.log(` <title>${escapeXml(r.title)}</title>`);
  1037. if (r.context)
  1038. console.log(` <context>${escapeXml(r.context)}</context>`);
  1039. if (r.skipped) {
  1040. console.log(` <skipped>true</skipped>`);
  1041. console.log(` <reason>${escapeXml(r.skipReason || "")}</reason>`);
  1042. }
  1043. else {
  1044. console.log(` <body>${escapeXml(r.body)}</body>`);
  1045. }
  1046. console.log(" </document>");
  1047. }
  1048. console.log("</documents>");
  1049. }
  1050. else {
  1051. // CLI format (default)
  1052. for (const r of results) {
  1053. console.log(`\n${'='.repeat(60)}`);
  1054. console.log(`File: ${r.displayPath}`);
  1055. console.log(`${'='.repeat(60)}\n`);
  1056. if (r.skipped) {
  1057. console.log(`[SKIPPED: ${r.skipReason}]`);
  1058. continue;
  1059. }
  1060. if (r.context) {
  1061. console.log(`Folder Context: ${r.context}\n---\n`);
  1062. }
  1063. console.log(r.body);
  1064. }
  1065. }
  1066. }
  1067. // List files in virtual file tree
  1068. function listFiles(pathArg) {
  1069. const db = getDb();
  1070. if (!pathArg) {
  1071. // No argument - list all collections
  1072. const yamlCollections = yamlListCollections();
  1073. if (yamlCollections.length === 0) {
  1074. console.log("No collections found. Run 'qmd collection add .' to index files.");
  1075. closeDb();
  1076. return;
  1077. }
  1078. // Get file counts from database for each collection
  1079. const collections = yamlCollections.map(coll => {
  1080. const stats = db.prepare(`
  1081. SELECT COUNT(*) as file_count
  1082. FROM documents d
  1083. WHERE d.collection = ? AND d.active = 1
  1084. `).get(coll.name);
  1085. return {
  1086. name: coll.name,
  1087. file_count: stats?.file_count || 0
  1088. };
  1089. });
  1090. console.log(`${c.bold}Collections:${c.reset}\n`);
  1091. for (const coll of collections) {
  1092. console.log(` ${c.dim}qmd://${c.reset}${c.cyan}${coll.name}/${c.reset} ${c.dim}(${coll.file_count} files)${c.reset}`);
  1093. }
  1094. closeDb();
  1095. return;
  1096. }
  1097. // Parse the path argument
  1098. let collectionName;
  1099. let pathPrefix = null;
  1100. if (pathArg.startsWith('qmd://')) {
  1101. // Virtual path format: qmd://collection/path
  1102. const parsed = parseVirtualPath(pathArg);
  1103. if (!parsed) {
  1104. console.error(`Invalid virtual path: ${pathArg}`);
  1105. closeDb();
  1106. process.exit(1);
  1107. }
  1108. collectionName = parsed.collectionName;
  1109. pathPrefix = parsed.path;
  1110. }
  1111. else {
  1112. // Just collection name or collection/path
  1113. const parts = pathArg.split('/');
  1114. collectionName = parts[0] || '';
  1115. if (parts.length > 1) {
  1116. pathPrefix = parts.slice(1).join('/');
  1117. }
  1118. }
  1119. // Get the collection
  1120. const coll = getCollectionFromYaml(collectionName);
  1121. if (!coll) {
  1122. console.error(`Collection not found: ${collectionName}`);
  1123. console.error(`Run 'qmd ls' to see available collections.`);
  1124. closeDb();
  1125. process.exit(1);
  1126. }
  1127. // List files in the collection with size and modification time
  1128. let query;
  1129. let params;
  1130. if (pathPrefix) {
  1131. // List files under a specific path
  1132. query = `
  1133. SELECT d.path, d.title, d.modified_at, LENGTH(ct.doc) as size
  1134. FROM documents d
  1135. JOIN content ct ON d.hash = ct.hash
  1136. WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
  1137. ORDER BY d.path
  1138. `;
  1139. params = [coll.name, `${pathPrefix}%`];
  1140. }
  1141. else {
  1142. // List all files in the collection
  1143. query = `
  1144. SELECT d.path, d.title, d.modified_at, LENGTH(ct.doc) as size
  1145. FROM documents d
  1146. JOIN content ct ON d.hash = ct.hash
  1147. WHERE d.collection = ? AND d.active = 1
  1148. ORDER BY d.path
  1149. `;
  1150. params = [coll.name];
  1151. }
  1152. const files = db.prepare(query).all(...params);
  1153. if (files.length === 0) {
  1154. if (pathPrefix) {
  1155. console.log(`No files found under qmd://${collectionName}/${pathPrefix}`);
  1156. }
  1157. else {
  1158. console.log(`No files found in collection: ${collectionName}`);
  1159. }
  1160. closeDb();
  1161. return;
  1162. }
  1163. // Calculate max widths for alignment
  1164. const maxSize = Math.max(...files.map(f => formatBytes(f.size).length));
  1165. // Output in ls -l style
  1166. for (const file of files) {
  1167. const sizeStr = formatBytes(file.size).padStart(maxSize);
  1168. const date = new Date(file.modified_at);
  1169. const timeStr = formatLsTime(date);
  1170. // Dim the qmd:// prefix, highlight the filename
  1171. console.log(`${sizeStr} ${timeStr} ${c.dim}qmd://${collectionName}/${c.reset}${c.cyan}${file.path}${c.reset}`);
  1172. }
  1173. closeDb();
  1174. }
  1175. // Format date/time like ls -l
  1176. function formatLsTime(date) {
  1177. const now = new Date();
  1178. const sixMonthsAgo = new Date(now.getTime() - 6 * 30 * 24 * 60 * 60 * 1000);
  1179. const months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
  1180. const month = months[date.getMonth()];
  1181. const day = date.getDate().toString().padStart(2, ' ');
  1182. // If file is older than 6 months, show year instead of time
  1183. if (date < sixMonthsAgo) {
  1184. const year = date.getFullYear();
  1185. return `${month} ${day} ${year}`;
  1186. }
  1187. else {
  1188. const hours = date.getHours().toString().padStart(2, '0');
  1189. const minutes = date.getMinutes().toString().padStart(2, '0');
  1190. return `${month} ${day} ${hours}:${minutes}`;
  1191. }
  1192. }
  1193. // Collection management commands
  1194. function collectionList() {
  1195. const db = getDb();
  1196. const collections = listCollections(db);
  1197. if (collections.length === 0) {
  1198. console.log("No collections found. Run 'qmd collection add .' to create one.");
  1199. closeDb();
  1200. return;
  1201. }
  1202. console.log(`${c.bold}Collections (${collections.length}):${c.reset}\n`);
  1203. for (const coll of collections) {
  1204. const updatedAt = coll.last_modified ? new Date(coll.last_modified) : new Date();
  1205. const timeAgo = formatTimeAgo(updatedAt);
  1206. // Get YAML config to check includeByDefault
  1207. const yamlColl = getCollectionFromYaml(coll.name);
  1208. const excluded = yamlColl?.includeByDefault === false;
  1209. const excludeTag = excluded ? ` ${c.yellow}[excluded]${c.reset}` : '';
  1210. console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(qmd://${coll.name}/)${c.reset}${excludeTag}`);
  1211. console.log(` ${c.dim}Pattern:${c.reset} ${coll.glob_pattern}`);
  1212. if (yamlColl?.ignore?.length) {
  1213. console.log(` ${c.dim}Ignore:${c.reset} ${yamlColl.ignore.join(', ')}`);
  1214. }
  1215. console.log(` ${c.dim}Files:${c.reset} ${coll.active_count}`);
  1216. console.log(` ${c.dim}Updated:${c.reset} ${timeAgo}`);
  1217. console.log();
  1218. }
  1219. closeDb();
  1220. }
  1221. async function collectionAdd(pwd, globPattern, name) {
  1222. // If name not provided, generate from pwd basename
  1223. let collName = name;
  1224. if (!collName) {
  1225. const parts = pwd.split('/').filter(Boolean);
  1226. collName = parts[parts.length - 1] || 'root';
  1227. }
  1228. // Check if collection with this name already exists in YAML
  1229. const existing = getCollectionFromYaml(collName);
  1230. if (existing) {
  1231. console.error(`${c.yellow}Collection '${collName}' already exists.${c.reset}`);
  1232. console.error(`Use a different name with --name <name>`);
  1233. process.exit(1);
  1234. }
  1235. // Check if a collection with this pwd+glob already exists in YAML
  1236. const allCollections = yamlListCollections();
  1237. const existingPwdGlob = allCollections.find(c => c.path === pwd && c.pattern === globPattern);
  1238. if (existingPwdGlob) {
  1239. console.error(`${c.yellow}A collection already exists for this path and pattern:${c.reset}`);
  1240. console.error(` Name: ${existingPwdGlob.name} (qmd://${existingPwdGlob.name}/)`);
  1241. console.error(` Pattern: ${globPattern}`);
  1242. console.error(`\nUse 'qmd update' to re-index it, or remove it first with 'qmd collection remove ${existingPwdGlob.name}'`);
  1243. process.exit(1);
  1244. }
  1245. // Add to YAML config + sync to SQLite
  1246. const { addCollection } = await import("../collections.js");
  1247. addCollection(collName, pwd, globPattern);
  1248. resyncConfig();
  1249. // Create the collection and index files
  1250. console.log(`Creating collection '${collName}'...`);
  1251. const newColl = getCollectionFromYaml(collName);
  1252. await indexFiles(pwd, globPattern, collName, false, newColl?.ignore);
  1253. console.log(`${c.green}✓${c.reset} Collection '${collName}' created successfully`);
  1254. }
  1255. function collectionRemove(name) {
  1256. // Check if collection exists in YAML
  1257. const coll = getCollectionFromYaml(name);
  1258. if (!coll) {
  1259. console.error(`${c.yellow}Collection not found: ${name}${c.reset}`);
  1260. console.error(`Run 'qmd collection list' to see available collections.`);
  1261. process.exit(1);
  1262. }
  1263. const db = getDb();
  1264. const result = removeCollection(db, name);
  1265. // Also remove from YAML config
  1266. yamlRemoveCollectionFn(name);
  1267. closeDb();
  1268. console.log(`${c.green}✓${c.reset} Removed collection '${name}'`);
  1269. console.log(` Deleted ${result.deletedDocs} documents`);
  1270. if (result.cleanedHashes > 0) {
  1271. console.log(` Cleaned up ${result.cleanedHashes} orphaned content hashes`);
  1272. }
  1273. }
  1274. function collectionRename(oldName, newName) {
  1275. // Check if old collection exists in YAML
  1276. const coll = getCollectionFromYaml(oldName);
  1277. if (!coll) {
  1278. console.error(`${c.yellow}Collection not found: ${oldName}${c.reset}`);
  1279. console.error(`Run 'qmd collection list' to see available collections.`);
  1280. process.exit(1);
  1281. }
  1282. // Check if new name already exists in YAML
  1283. const existing = getCollectionFromYaml(newName);
  1284. if (existing) {
  1285. console.error(`${c.yellow}Collection name already exists: ${newName}${c.reset}`);
  1286. console.error(`Choose a different name or remove the existing collection first.`);
  1287. process.exit(1);
  1288. }
  1289. const db = getDb();
  1290. renameCollection(db, oldName, newName);
  1291. // Also rename in YAML config
  1292. yamlRenameCollectionFn(oldName, newName);
  1293. closeDb();
  1294. console.log(`${c.green}✓${c.reset} Renamed collection '${oldName}' to '${newName}'`);
  1295. console.log(` Virtual paths updated: ${c.cyan}qmd://${oldName}/${c.reset} → ${c.cyan}qmd://${newName}/${c.reset}`);
  1296. }
  1297. async function indexFiles(pwd, globPattern = DEFAULT_GLOB, collectionName, suppressEmbedNotice = false, ignorePatterns) {
  1298. const db = getDb();
  1299. const resolvedPwd = pwd || getPwd();
  1300. const now = new Date().toISOString();
  1301. const excludeDirs = ["node_modules", ".git", ".cache", "vendor", "dist", "build"];
  1302. // Clear Ollama cache on index
  1303. clearCache(db);
  1304. // Collection name must be provided (from YAML)
  1305. if (!collectionName) {
  1306. throw new Error("Collection name is required. Collections must be defined in ~/.config/qmd/index.yml");
  1307. }
  1308. console.log(`Collection: ${resolvedPwd} (${globPattern})`);
  1309. progress.indeterminate();
  1310. const allIgnore = [
  1311. ...excludeDirs.map(d => `**/${d}/**`),
  1312. ...(ignorePatterns || []),
  1313. ];
  1314. const allFiles = await fastGlob(globPattern, {
  1315. cwd: resolvedPwd,
  1316. onlyFiles: true,
  1317. followSymbolicLinks: false,
  1318. dot: false,
  1319. ignore: allIgnore,
  1320. });
  1321. // Filter hidden files/folders (dot: false handles top-level but not nested)
  1322. const files = allFiles.filter(file => {
  1323. const parts = file.split("/");
  1324. return !parts.some(part => part.startsWith("."));
  1325. });
  1326. const total = files.length;
  1327. const hasNoFiles = total === 0;
  1328. if (hasNoFiles) {
  1329. progress.clear();
  1330. console.log("No files found matching pattern.");
  1331. // Continue so the deactivation pass can mark previously indexed docs as inactive.
  1332. }
  1333. let indexed = 0, updated = 0, unchanged = 0, processed = 0;
  1334. const seenPaths = new Set();
  1335. const startTime = Date.now();
  1336. for (const relativeFile of files) {
  1337. const filepath = getRealPath(resolve(resolvedPwd, relativeFile));
  1338. const path = handelize(relativeFile); // Normalize path for token-friendliness
  1339. seenPaths.add(path);
  1340. let content;
  1341. try {
  1342. content = readFileSync(filepath, "utf-8");
  1343. }
  1344. catch (err) {
  1345. // Skip files that can't be read (e.g. iCloud evicted files returning EAGAIN)
  1346. processed++;
  1347. progress.set((processed / total) * 100);
  1348. continue;
  1349. }
  1350. // Skip empty files - nothing useful to index
  1351. if (!content.trim()) {
  1352. processed++;
  1353. continue;
  1354. }
  1355. const hash = await hashContent(content);
  1356. const title = extractTitle(content, relativeFile);
  1357. // Check if document exists in this collection with this path
  1358. const existing = findActiveDocument(db, collectionName, path);
  1359. if (existing) {
  1360. if (existing.hash === hash) {
  1361. // Hash unchanged, but check if title needs updating
  1362. if (existing.title !== title) {
  1363. updateDocumentTitle(db, existing.id, title, now);
  1364. updated++;
  1365. }
  1366. else {
  1367. unchanged++;
  1368. }
  1369. }
  1370. else {
  1371. // Content changed - insert new content hash and update document
  1372. insertContent(db, hash, content, now);
  1373. const stat = statSync(filepath);
  1374. updateDocument(db, existing.id, title, hash, stat ? new Date(stat.mtime).toISOString() : now);
  1375. updated++;
  1376. }
  1377. }
  1378. else {
  1379. // New document - insert content and document
  1380. indexed++;
  1381. insertContent(db, hash, content, now);
  1382. const stat = statSync(filepath);
  1383. insertDocument(db, collectionName, path, title, hash, stat ? new Date(stat.birthtime).toISOString() : now, stat ? new Date(stat.mtime).toISOString() : now);
  1384. }
  1385. processed++;
  1386. progress.set((processed / total) * 100);
  1387. const elapsed = (Date.now() - startTime) / 1000;
  1388. const rate = processed / elapsed;
  1389. const remaining = (total - processed) / rate;
  1390. const eta = processed > 2 ? ` ETA: ${formatETA(remaining)}` : "";
  1391. if (isTTY)
  1392. process.stderr.write(`\rIndexing: ${processed}/${total}${eta} `);
  1393. }
  1394. // Deactivate documents in this collection that no longer exist
  1395. const allActive = getActiveDocumentPaths(db, collectionName);
  1396. let removed = 0;
  1397. for (const path of allActive) {
  1398. if (!seenPaths.has(path)) {
  1399. deactivateDocument(db, collectionName, path);
  1400. removed++;
  1401. }
  1402. }
  1403. // Clean up orphaned content hashes (content not referenced by any document)
  1404. const orphanedContent = cleanupOrphanedContent(db);
  1405. // Check if vector index needs updating
  1406. const needsEmbedding = getHashesNeedingEmbedding(db);
  1407. progress.clear();
  1408. console.log(`\nIndexed: ${indexed} new, ${updated} updated, ${unchanged} unchanged, ${removed} removed`);
  1409. if (orphanedContent > 0) {
  1410. console.log(`Cleaned up ${orphanedContent} orphaned content hash(es)`);
  1411. }
  1412. if (needsEmbedding > 0 && !suppressEmbedNotice) {
  1413. console.log(`\nRun 'qmd embed' to update embeddings (${needsEmbedding} unique hashes need vectors)`);
  1414. }
  1415. closeDb();
  1416. }
  1417. function renderProgressBar(percent, width = 30) {
  1418. const filled = Math.round((percent / 100) * width);
  1419. const empty = width - filled;
  1420. const bar = "█".repeat(filled) + "░".repeat(empty);
  1421. return bar;
  1422. }
  1423. function parseEmbedBatchOption(name, value) {
  1424. if (value === undefined)
  1425. return undefined;
  1426. const parsed = Number(value);
  1427. if (!Number.isInteger(parsed) || parsed < 1) {
  1428. throw new Error(`${name} must be a positive integer`);
  1429. }
  1430. return parsed;
  1431. }
  1432. function parseChunkStrategy(value) {
  1433. if (value === undefined)
  1434. return undefined;
  1435. const s = String(value);
  1436. if (s === "auto" || s === "regex" || s === "function")
  1437. return s;
  1438. throw new Error(`--chunk-strategy must be "auto", "regex", or "function" (got "${s}")`);
  1439. }
  1440. function parseProviderKind(value) {
  1441. if (value === undefined)
  1442. return undefined;
  1443. const s = String(value).toLowerCase();
  1444. if (s === "local" || s === "openai")
  1445. return s;
  1446. throw new Error(`--provider must be "local" or "openai" (got "${s}")`);
  1447. }
  1448. function parseOptionalPositiveInt(name, value) {
  1449. if (value === undefined)
  1450. return undefined;
  1451. const parsed = Number(value);
  1452. if (!Number.isInteger(parsed) || parsed < 1) {
  1453. throw new Error(`${name} must be a positive integer`);
  1454. }
  1455. return parsed;
  1456. }
  1457. /**
  1458. * Build an `EmbeddingProvider` for the QUERY-side path (vsearch / query)
  1459. * if and only if the user has opted into a non-local provider via flags or
  1460. * env vars. Returns `undefined` for the zero-config case so the legacy
  1461. * `getDefaultLlamaCpp().embed(...)` path is used unchanged — preserving
  1462. * pre-patch behavior for callers that have not configured remote embedding
  1463. * (i-loazq6ze DoD #5: backward compat).
  1464. *
  1465. * Resolution mirrors `qmd embed` (factory.resolveProviderKind):
  1466. * 1. Explicit `--provider` flag → build provider
  1467. * 2. Any `--embed-*` flag / `QMD_EMBED_*` env / `embedProvider.endpoint`
  1468. * in `~/.config/qmd/config.json` → build provider
  1469. * 3. Otherwise → return `undefined` (legacy path)
  1470. *
  1471. * Returns `null` on construction failure (e.g. malformed flags) so the
  1472. * caller can warn + fall back to the legacy path.
  1473. */
  1474. function buildQueryEmbedProvider(values) {
  1475. const providerCliKind = parseProviderKind(values["provider"]);
  1476. const opts = buildProviderOpts(values, providerCliKind);
  1477. // Determine whether the user opted into a provider. The factory's resolve
  1478. // step returns "local" by default; without explicit opt-in (flag/env/
  1479. // config), we keep the legacy path with no construction overhead.
  1480. const resolved = resolveProviderKind(opts);
  1481. const hasProviderFlag = providerCliKind !== undefined;
  1482. const hasOpenAiOverride = !!opts.openai && Object.keys(opts.openai).length > 0;
  1483. const envOptIn = !!(process.env.QMD_EMBED_PROVIDER ||
  1484. process.env.QMD_EMBED_ENDPOINT ||
  1485. process.env.QMD_EMBED_AUTO_FALLBACK);
  1486. if (!hasProviderFlag && !hasOpenAiOverride && !envOptIn && resolved === "local") {
  1487. return undefined;
  1488. }
  1489. try {
  1490. return createEmbeddingProvider(opts);
  1491. }
  1492. catch (err) {
  1493. process.stderr.write(`${c.yellow}Warning: failed to build query embedding provider — using local fallback (${err instanceof Error ? err.message : String(err)})${c.reset}\n`);
  1494. return undefined;
  1495. }
  1496. }
  1497. /**
  1498. * Translate `cli.values` into `CreateEmbeddingProviderOptions`. CLI flags
  1499. * win over env vars (the factory itself reads env when these are unset).
  1500. */
  1501. function buildProviderOpts(values, providerCliKind) {
  1502. const endpoint = optionalString(values["embed-endpoint"]);
  1503. const apiKey = optionalString(values["embed-api-key"]);
  1504. const modelId = optionalString(values["embed-model-id"]);
  1505. const upstreamModel = optionalString(values["embed-upstream-model"]);
  1506. const batchSize = parseOptionalPositiveInt("--embed-batch-size", values["embed-batch-size"]);
  1507. const timeoutMs = parseOptionalPositiveInt("--embed-timeout-ms", values["embed-timeout-ms"]);
  1508. // Only build the openai overrides object if the user supplied flags
  1509. const openai = endpoint || apiKey || modelId || upstreamModel || batchSize !== undefined || timeoutMs !== undefined
  1510. ? {
  1511. ...(endpoint !== undefined ? { endpoint } : {}),
  1512. ...(apiKey !== undefined ? { apiKey } : {}),
  1513. ...(modelId !== undefined ? { modelId } : {}),
  1514. ...(upstreamModel !== undefined ? { upstreamModel } : {}),
  1515. ...(batchSize !== undefined ? { batchSize } : {}),
  1516. ...(timeoutMs !== undefined ? { timeoutMs } : {}),
  1517. }
  1518. : undefined;
  1519. // CLI flag for auto-fallback wrapping (only meaningful when kind === openai)
  1520. const autoFallback = values["embed-auto-fallback"] === true ? true : undefined;
  1521. return {
  1522. ...(providerCliKind ? { kind: providerCliKind } : {}),
  1523. ...(openai ? { openai } : {}),
  1524. ...(autoFallback !== undefined ? { autoFallback } : {}),
  1525. };
  1526. }
  1527. function optionalString(v) {
  1528. if (v === undefined || v === null)
  1529. return undefined;
  1530. const s = String(v);
  1531. return s === "" ? undefined : s;
  1532. }
  1533. async function vectorIndex(model = DEFAULT_EMBED_MODEL_URI, force = false, batchOptions) {
  1534. const storeInstance = getStore();
  1535. const db = storeInstance.db;
  1536. // i-ofojj7dy — validate the collection filter against the known list before
  1537. // doing any work. Mirrors `qmd update <name>` ergonomics.
  1538. if (batchOptions?.collection !== undefined) {
  1539. const allCollections = listCollections(db);
  1540. const match = allCollections.find(col => col.name === batchOptions.collection);
  1541. if (!match) {
  1542. const known = allCollections.map(c => c.name).sort().join(", ");
  1543. console.error(`${c.red}Collection not found: "${batchOptions.collection}"${c.reset}`);
  1544. console.error(`${c.dim}Available collections: ${known || "(none)"}${c.reset}`);
  1545. console.error(`${c.dim}Run 'qmd embed --all' (or 'qmd embed' with no args) to embed every collection.${c.reset}`);
  1546. closeDb();
  1547. process.exit(1);
  1548. }
  1549. // i-ofojj7dy — `--force` is fleet-wide (nukes all content_vectors).
  1550. // Combining it with a single-collection filter would silently break
  1551. // every OTHER collection's embeddings. Per-collection force-clear is a
  1552. // distinct feature (out of scope here). Refuse and steer the user.
  1553. if (force) {
  1554. console.error(`${c.red}--force cannot be combined with a positional collection name.${c.reset}`);
  1555. console.error(`${c.dim}--force clears ALL vectors fleet-wide before re-embedding; restricting it to one collection would corrupt the others.${c.reset}`);
  1556. console.error(`${c.dim}Use 'qmd embed --all -f' to force-re-embed every collection, OR drop -f and run 'qmd embed ${batchOptions.collection}' to embed only this collection's pending hashes.${c.reset}`);
  1557. closeDb();
  1558. process.exit(1);
  1559. }
  1560. }
  1561. if (force) {
  1562. console.log(`${c.yellow}Force re-indexing: clearing all vectors...${c.reset}`);
  1563. }
  1564. // Check if there's work to do before starting
  1565. const hashesToEmbed = getHashesNeedingEmbedding(db, batchOptions?.collection);
  1566. if (hashesToEmbed === 0 && !force) {
  1567. if (batchOptions?.collection) {
  1568. console.log(`${c.green}✓ All content hashes in collection "${batchOptions.collection}" already have embeddings.${c.reset}`);
  1569. }
  1570. else {
  1571. console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
  1572. }
  1573. closeDb();
  1574. return;
  1575. }
  1576. console.log(`${c.dim}Model: ${model}${c.reset}\n`);
  1577. if (batchOptions?.embedProvider) {
  1578. const kind = batchOptions.embedProvider.kind;
  1579. const providerModel = batchOptions.embedProvider.getModelId();
  1580. console.log(`${c.dim}Provider: ${kind} (model id "${providerModel}")${c.reset}\n`);
  1581. }
  1582. else if (batchOptions?.providerKind) {
  1583. console.log(`${c.dim}Provider: ${batchOptions.providerKind}${c.reset}\n`);
  1584. }
  1585. if (batchOptions?.maxDocsPerBatch !== undefined || batchOptions?.maxBatchBytes !== undefined) {
  1586. const maxDocsPerBatch = batchOptions.maxDocsPerBatch ?? DEFAULT_EMBED_MAX_DOCS_PER_BATCH;
  1587. const maxBatchBytes = batchOptions.maxBatchBytes ?? DEFAULT_EMBED_MAX_BATCH_BYTES;
  1588. console.log(`${c.dim}Batch: ${maxDocsPerBatch} docs / ${formatBytes(maxBatchBytes)}${c.reset}\n`);
  1589. }
  1590. cursor.hide();
  1591. progress.indeterminate();
  1592. const startTime = Date.now();
  1593. const result = await generateEmbeddings(storeInstance, {
  1594. force,
  1595. model,
  1596. maxDocsPerBatch: batchOptions?.maxDocsPerBatch,
  1597. maxBatchBytes: batchOptions?.maxBatchBytes,
  1598. chunkStrategy: batchOptions?.chunkStrategy,
  1599. embedProvider: batchOptions?.embedProvider,
  1600. onProgress: (info) => {
  1601. if (info.totalBytes === 0)
  1602. return;
  1603. const percent = (info.bytesProcessed / info.totalBytes) * 100;
  1604. progress.set(percent);
  1605. const elapsed = (Date.now() - startTime) / 1000;
  1606. const bytesPerSec = info.bytesProcessed / elapsed;
  1607. const remainingBytes = info.totalBytes - info.bytesProcessed;
  1608. const etaSec = remainingBytes / bytesPerSec;
  1609. const bar = renderProgressBar(percent);
  1610. const percentStr = percent.toFixed(0).padStart(3);
  1611. const throughput = `${formatBytes(bytesPerSec)}/s`;
  1612. const eta = elapsed > 2 ? formatETA(etaSec) : "...";
  1613. const errStr = info.errors > 0 ? ` ${c.yellow}${info.errors} err${c.reset}` : "";
  1614. if (isTTY)
  1615. process.stderr.write(`\r${c.cyan}${bar}${c.reset} ${c.bold}${percentStr}%${c.reset} ${c.dim}${info.chunksEmbedded}/${info.totalChunks}${c.reset}${errStr} ${c.dim}${throughput} ETA ${eta}${c.reset} `);
  1616. },
  1617. });
  1618. progress.clear();
  1619. cursor.show();
  1620. const totalTimeSec = result.durationMs / 1000;
  1621. if (result.chunksEmbedded === 0 && result.docsProcessed === 0) {
  1622. console.log(`${c.green}✓ No non-empty documents to embed.${c.reset}`);
  1623. }
  1624. else {
  1625. console.log(`\r${c.green}${renderProgressBar(100)}${c.reset} ${c.bold}100%${c.reset} `);
  1626. console.log(`\n${c.green}✓ Done!${c.reset} Embedded ${c.bold}${result.chunksEmbedded}${c.reset} chunks from ${c.bold}${result.docsProcessed}${c.reset} documents in ${c.bold}${formatETA(totalTimeSec)}${c.reset}`);
  1627. if (result.errors > 0) {
  1628. console.log(`${c.yellow}⚠ ${result.errors} chunks failed${c.reset}`);
  1629. }
  1630. }
  1631. closeDb();
  1632. }
  1633. // Sanitize a term for FTS5: remove punctuation except apostrophes
  1634. function sanitizeFTS5Term(term) {
  1635. // Remove all non-alphanumeric except apostrophes (for contractions like "don't")
  1636. return term.replace(/[^\w']/g, '').trim();
  1637. }
  1638. // Build FTS5 query: phrase-aware with fallback to individual terms
  1639. function buildFTS5Query(query) {
  1640. // Sanitize the full query for phrase matching
  1641. const sanitizedQuery = query.replace(/[^\w\s']/g, '').trim();
  1642. const terms = query
  1643. .split(/\s+/)
  1644. .map(sanitizeFTS5Term)
  1645. .filter(term => term.length >= 2); // Skip single chars and empty
  1646. if (terms.length === 0)
  1647. return "";
  1648. if (terms.length === 1)
  1649. return `"${terms[0].replace(/"/g, '""')}"`;
  1650. // Strategy: exact phrase OR proximity match OR individual terms
  1651. // Exact phrase matches rank highest, then close proximity, then any term
  1652. const phrase = `"${sanitizedQuery.replace(/"/g, '""')}"`;
  1653. const quotedTerms = terms.map(t => `"${t.replace(/"/g, '""')}"`);
  1654. // FTS5 NEAR syntax: NEAR(term1 term2, distance)
  1655. const nearPhrase = `NEAR(${quotedTerms.join(' ')}, 10)`;
  1656. const orTerms = quotedTerms.join(' OR ');
  1657. // Exact phrase > proximity > any term
  1658. return `(${phrase}) OR (${nearPhrase}) OR (${orTerms})`;
  1659. }
  1660. // Normalize BM25 score to 0-1 range using sigmoid
  1661. function normalizeBM25(score) {
  1662. // BM25 scores are negative in SQLite (lower = better)
  1663. // Typical range: -15 (excellent) to -2 (weak match)
  1664. // Map to 0-1 where higher is better
  1665. const absScore = Math.abs(score);
  1666. // Sigmoid-ish normalization: maps ~2-15 range to ~0.1-0.95
  1667. return 1 / (1 + Math.exp(-(absScore - 5) / 3));
  1668. }
  1669. // Highlight query terms in text (skip short words < 3 chars)
  1670. function highlightTerms(text, query) {
  1671. if (!useColor)
  1672. return text;
  1673. const terms = query.toLowerCase().split(/\s+/).filter(t => t.length >= 3);
  1674. let result = text;
  1675. for (const term of terms) {
  1676. const regex = new RegExp(`(${term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')})`, 'gi');
  1677. result = result.replace(regex, `${c.yellow}${c.bold}$1${c.reset}`);
  1678. }
  1679. return result;
  1680. }
  1681. // Format score with color based on value
  1682. function formatScore(score) {
  1683. const pct = (score * 100).toFixed(0).padStart(3);
  1684. if (!useColor)
  1685. return `${pct}%`;
  1686. if (score >= 0.7)
  1687. return `${c.green}${pct}%${c.reset}`;
  1688. if (score >= 0.4)
  1689. return `${c.yellow}${pct}%${c.reset}`;
  1690. return `${c.dim}${pct}%${c.reset}`;
  1691. }
  1692. function formatExplainNumber(value) {
  1693. return value.toFixed(4);
  1694. }
  1695. // Shorten directory path for display - relative to $HOME (used for context paths, not documents)
  1696. function shortPath(dirpath) {
  1697. const home = homedir();
  1698. if (dirpath.startsWith(home)) {
  1699. return '~' + dirpath.slice(home.length);
  1700. }
  1701. return dirpath;
  1702. }
  1703. // Emit format-safe empty output for search commands.
  1704. function printEmptySearchResults(format, reason = "no_results") {
  1705. if (format === "json") {
  1706. console.log("[]");
  1707. return;
  1708. }
  1709. if (format === "csv") {
  1710. console.log("docid,score,file,title,context,line,snippet");
  1711. return;
  1712. }
  1713. if (format === "xml") {
  1714. console.log("<results></results>");
  1715. return;
  1716. }
  1717. if (format === "md" || format === "files") {
  1718. return;
  1719. }
  1720. if (reason === "min_score") {
  1721. console.log("No results found above minimum score threshold.");
  1722. return;
  1723. }
  1724. console.log("No results found.");
  1725. }
  1726. const DEFAULT_EDITOR_URI_TEMPLATE = "vscode://file/{path}:{line}:{col}";
  1727. function encodePathForEditorUri(absolutePath) {
  1728. return encodeURI(absolutePath)
  1729. .replace(/\?/g, "%3F")
  1730. .replace(/#/g, "%23");
  1731. }
  1732. function getEditorUriTemplate() {
  1733. const envTemplate = process.env.QMD_EDITOR_URI?.trim();
  1734. if (envTemplate)
  1735. return envTemplate;
  1736. try {
  1737. const config = loadConfig();
  1738. const configTemplate = (config.editor_uri
  1739. || config.editor_uri_template
  1740. || config.editorUri
  1741. || (typeof config["editor-uri"] === "string" ? config["editor-uri"] : undefined))?.trim();
  1742. if (configTemplate)
  1743. return configTemplate;
  1744. }
  1745. catch {
  1746. // Ignore config parsing issues and use default template.
  1747. }
  1748. return DEFAULT_EDITOR_URI_TEMPLATE;
  1749. }
  1750. export function buildEditorUri(template, absolutePath, line, col) {
  1751. const safeLine = Number.isFinite(line) && line > 0 ? Math.floor(line) : 1;
  1752. const safeCol = Number.isFinite(col) && col > 0 ? Math.floor(col) : 1;
  1753. const encodedPath = encodePathForEditorUri(absolutePath);
  1754. return template
  1755. .replace(/\{path\}/g, encodedPath)
  1756. .replace(/\{line\}/g, String(safeLine))
  1757. .replace(/\{col\}/g, String(safeCol))
  1758. .replace(/\{column\}/g, String(safeCol));
  1759. }
  1760. export function termLink(text, url, isTTY = !!process.stdout.isTTY) {
  1761. if (!isTTY)
  1762. return text;
  1763. return `\x1b]8;;${url}\x07${text}\x1b]8;;\x07`;
  1764. }
  1765. function outputResults(results, query, opts) {
  1766. const filtered = results.filter(r => r.score >= opts.minScore).slice(0, opts.limit);
  1767. if (filtered.length === 0) {
  1768. printEmptySearchResults(opts.format, "min_score");
  1769. return;
  1770. }
  1771. // Helper to create qmd:// URI from displayPath
  1772. const toQmdPath = (displayPath) => `qmd://${displayPath}`;
  1773. if (opts.format === "json") {
  1774. // JSON output for LLM consumption
  1775. const output = filtered.map(row => {
  1776. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
  1777. let body = opts.full ? row.body : undefined;
  1778. let snippet = !opts.full ? extractSnippet(row.body, query, 300, row.chunkPos, undefined, opts.intent).snippet : undefined;
  1779. if (opts.lineNumbers) {
  1780. if (body)
  1781. body = addLineNumbers(body);
  1782. if (snippet)
  1783. snippet = addLineNumbers(snippet);
  1784. }
  1785. return {
  1786. ...(docid && { docid: `#${docid}` }),
  1787. score: Math.round(row.score * 100) / 100,
  1788. file: toQmdPath(row.displayPath),
  1789. title: row.title,
  1790. ...(row.context && { context: row.context }),
  1791. ...(body && { body }),
  1792. ...(snippet && { snippet }),
  1793. ...(opts.explain && row.explain && { explain: row.explain }),
  1794. };
  1795. });
  1796. console.log(JSON.stringify(output, null, 2));
  1797. }
  1798. else if (opts.format === "files") {
  1799. // Simple docid,score,filepath,context output
  1800. for (const row of filtered) {
  1801. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
  1802. const ctx = row.context ? `,"${row.context.replace(/"/g, '""')}"` : "";
  1803. console.log(`#${docid},${row.score.toFixed(2)},${toQmdPath(row.displayPath)}${ctx}`);
  1804. }
  1805. }
  1806. else if (opts.format === "cli") {
  1807. const editorUriTemplate = getEditorUriTemplate();
  1808. const linkDb = getDb();
  1809. for (let i = 0; i < filtered.length; i++) {
  1810. const row = filtered[i];
  1811. if (!row)
  1812. continue;
  1813. const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent);
  1814. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
  1815. // Line 1: filepath with docid
  1816. const virtualPath = row.file.startsWith("qmd://") ? row.file : toQmdPath(row.displayPath);
  1817. const parsed = parseVirtualPath(virtualPath);
  1818. const absolutePath = resolveVirtualPath(linkDb, virtualPath);
  1819. const legacyPath = toQmdPath(row.displayPath);
  1820. const displayPath = parsed?.path || row.displayPath;
  1821. // Only show :line if we actually found a term match in the snippet body (exclude header line).
  1822. const snippetBody = snippet.split("\n").slice(1).join("\n").toLowerCase();
  1823. const hasMatch = query.toLowerCase().split(/\s+/).some(t => t.length > 0 && snippetBody.includes(t));
  1824. const lineInfo = hasMatch ? `:${line}` : "";
  1825. const docidStr = docid ? ` ${c.dim}#${docid}${c.reset}` : "";
  1826. if (process.stdout.isTTY && absolutePath && parsed?.path) {
  1827. const linkLine = hasMatch ? line : 1;
  1828. const linkTarget = buildEditorUri(editorUriTemplate, absolutePath, linkLine, 1);
  1829. const clickable = termLink(`${displayPath}${lineInfo}`, linkTarget);
  1830. console.log(`${c.cyan}${clickable}${c.reset}${docidStr}`);
  1831. }
  1832. else {
  1833. console.log(`${c.cyan}${legacyPath}${c.dim}${lineInfo}${c.reset}${docidStr}`);
  1834. }
  1835. // Line 2: Title (if available)
  1836. if (row.title) {
  1837. console.log(`${c.bold}Title: ${row.title}${c.reset}`);
  1838. }
  1839. // Line 3: Context (if available)
  1840. if (row.context) {
  1841. console.log(`${c.dim}Context: ${row.context}${c.reset}`);
  1842. }
  1843. // Line 4: Score
  1844. const score = formatScore(row.score);
  1845. console.log(`Score: ${c.bold}${score}${c.reset}`);
  1846. if (opts.explain && row.explain) {
  1847. const explain = row.explain;
  1848. const ftsScores = explain.ftsScores.length > 0
  1849. ? explain.ftsScores.map(formatExplainNumber).join(", ")
  1850. : "none";
  1851. const vecScores = explain.vectorScores.length > 0
  1852. ? explain.vectorScores.map(formatExplainNumber).join(", ")
  1853. : "none";
  1854. const contribSummary = explain.rrf.contributions
  1855. .slice()
  1856. .sort((a, b) => b.rrfContribution - a.rrfContribution)
  1857. .slice(0, 3)
  1858. .map(c => `${c.source}/${c.queryType}#${c.rank}:${formatExplainNumber(c.rrfContribution)}`)
  1859. .join(" | ");
  1860. console.log(`${c.dim}Explain: fts=[${ftsScores}] vec=[${vecScores}]${c.reset}`);
  1861. console.log(`${c.dim} RRF: total=${formatExplainNumber(explain.rrf.totalScore)} base=${formatExplainNumber(explain.rrf.baseScore)} bonus=${formatExplainNumber(explain.rrf.topRankBonus)} rank=${explain.rrf.rank}${c.reset}`);
  1862. console.log(`${c.dim} Blend: ${Math.round(explain.rrf.weight * 100)}%*${formatExplainNumber(explain.rrf.positionScore)} + ${Math.round((1 - explain.rrf.weight) * 100)}%*${formatExplainNumber(explain.rerankScore)} = ${formatExplainNumber(explain.blendedScore)}${c.reset}`);
  1863. if (contribSummary.length > 0) {
  1864. console.log(`${c.dim} Top RRF contributions: ${contribSummary}${c.reset}`);
  1865. }
  1866. }
  1867. console.log();
  1868. // Snippet with highlighting (diff-style header included)
  1869. let displaySnippet = opts.lineNumbers ? addLineNumbers(snippet, line) : snippet;
  1870. const highlighted = highlightTerms(displaySnippet, query);
  1871. console.log(highlighted);
  1872. // Double empty line between results
  1873. if (i < filtered.length - 1)
  1874. console.log('\n');
  1875. }
  1876. }
  1877. else if (opts.format === "md") {
  1878. for (let i = 0; i < filtered.length; i++) {
  1879. const row = filtered[i];
  1880. if (!row)
  1881. continue;
  1882. const heading = row.title || row.displayPath;
  1883. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
  1884. let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent).snippet;
  1885. if (opts.lineNumbers) {
  1886. content = addLineNumbers(content);
  1887. }
  1888. const docidLine = docid ? `**docid:** \`#${docid}\`\n` : "";
  1889. const contextLine = row.context ? `**context:** ${row.context}\n` : "";
  1890. console.log(`---\n# ${heading}\n${docidLine}${contextLine}\n${content}\n`);
  1891. }
  1892. }
  1893. else if (opts.format === "xml") {
  1894. for (const row of filtered) {
  1895. const titleAttr = row.title ? ` title="${row.title.replace(/"/g, '&quot;')}"` : "";
  1896. const contextAttr = row.context ? ` context="${row.context.replace(/"/g, '&quot;')}"` : "";
  1897. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
  1898. let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent).snippet;
  1899. if (opts.lineNumbers) {
  1900. content = addLineNumbers(content);
  1901. }
  1902. console.log(`<file docid="#${docid}" name="${toQmdPath(row.displayPath)}"${titleAttr}${contextAttr}>\n${content}\n</file>\n`);
  1903. }
  1904. }
  1905. else {
  1906. // CSV format
  1907. console.log("docid,score,file,title,context,line,snippet");
  1908. for (const row of filtered) {
  1909. const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent);
  1910. let content = opts.full ? row.body : snippet;
  1911. if (opts.lineNumbers) {
  1912. content = addLineNumbers(content, line);
  1913. }
  1914. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
  1915. const snippetText = content || "";
  1916. console.log(`#${docid},${row.score.toFixed(4)},${escapeCSV(toQmdPath(row.displayPath))},${escapeCSV(row.title || "")},${escapeCSV(row.context || "")},${line},${escapeCSV(snippetText)}`);
  1917. }
  1918. }
  1919. }
  1920. // Resolve -c collection filter: supports single string, array, or undefined.
  1921. // Returns validated collection names (exits on unknown collection).
  1922. function resolveCollectionFilter(raw, useDefaults = false) {
  1923. // If no filter specified and useDefaults is true, use default collections
  1924. if (!raw && useDefaults) {
  1925. return getDefaultCollectionNames();
  1926. }
  1927. if (!raw)
  1928. return [];
  1929. const names = Array.isArray(raw) ? raw : [raw];
  1930. const validated = [];
  1931. for (const name of names) {
  1932. const coll = getCollectionFromYaml(name);
  1933. if (!coll) {
  1934. console.error(`Collection not found: ${name}`);
  1935. closeDb();
  1936. process.exit(1);
  1937. }
  1938. validated.push(name);
  1939. }
  1940. return validated;
  1941. }
  1942. // Post-filter results to only include files from specified collections.
  1943. function filterByCollections(results, collectionNames) {
  1944. if (collectionNames.length <= 1)
  1945. return results;
  1946. const prefixes = collectionNames.map(n => `qmd://${n}/`);
  1947. return results.filter(r => {
  1948. const path = r.filepath || r.file || '';
  1949. return prefixes.some(p => path.startsWith(p));
  1950. });
  1951. }
  1952. function parseStructuredQuery(query) {
  1953. const rawLines = query.split('\n').map((line, idx) => ({
  1954. raw: line,
  1955. trimmed: line.trim(),
  1956. number: idx + 1,
  1957. })).filter(line => line.trimmed.length > 0);
  1958. if (rawLines.length === 0)
  1959. return null;
  1960. const prefixRe = /^(lex|vec|hyde):\s*/i;
  1961. const expandRe = /^expand:\s*/i;
  1962. const intentRe = /^intent:\s*/i;
  1963. const typed = [];
  1964. let intent;
  1965. for (const line of rawLines) {
  1966. if (expandRe.test(line.trimmed)) {
  1967. if (rawLines.length > 1) {
  1968. throw new Error(`Line ${line.number} starts with expand:, but query documents cannot mix expand with typed lines. Submit a single expand query instead.`);
  1969. }
  1970. const text = line.trimmed.replace(expandRe, '').trim();
  1971. if (!text) {
  1972. throw new Error('expand: query must include text.');
  1973. }
  1974. return null; // treat as standalone expand query
  1975. }
  1976. // Parse intent: lines
  1977. if (intentRe.test(line.trimmed)) {
  1978. if (intent !== undefined) {
  1979. throw new Error(`Line ${line.number}: only one intent: line is allowed per query document.`);
  1980. }
  1981. const text = line.trimmed.replace(intentRe, '').trim();
  1982. if (!text) {
  1983. throw new Error(`Line ${line.number}: intent: must include text.`);
  1984. }
  1985. intent = text;
  1986. continue;
  1987. }
  1988. const match = line.trimmed.match(prefixRe);
  1989. if (match) {
  1990. const type = match[1].toLowerCase();
  1991. const text = line.trimmed.slice(match[0].length).trim();
  1992. if (!text) {
  1993. throw new Error(`Line ${line.number} (${type}:) must include text.`);
  1994. }
  1995. if (/\r|\n/.test(text)) {
  1996. throw new Error(`Line ${line.number} (${type}:) contains a newline. Keep each query on a single line.`);
  1997. }
  1998. typed.push({ type, query: text, line: line.number });
  1999. continue;
  2000. }
  2001. if (rawLines.length === 1) {
  2002. // Single plain line -> implicit expand
  2003. return null;
  2004. }
  2005. throw new Error(`Line ${line.number} is missing a lex:/vec:/hyde:/intent: prefix. Each line in a query document must start with one.`);
  2006. }
  2007. // intent: alone is not a valid query — must have at least one search
  2008. if (intent && typed.length === 0) {
  2009. throw new Error('intent: cannot appear alone. Add at least one lex:, vec:, or hyde: line.');
  2010. }
  2011. return typed.length > 0 ? { searches: typed, intent } : null;
  2012. }
  2013. function search(query, opts) {
  2014. const db = getDb();
  2015. // Validate collection filter (supports multiple -c flags)
  2016. // Use default collections if none specified
  2017. const collectionNames = resolveCollectionFilter(opts.collection, true);
  2018. const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
  2019. // Use large limit for --all, otherwise fetch more than needed and let outputResults filter
  2020. const fetchLimit = opts.all ? 100000 : Math.max(50, opts.limit * 2);
  2021. const results = filterByCollections(searchFTS(db, query, fetchLimit, singleCollection), collectionNames);
  2022. // Add context to results
  2023. const resultsWithContext = results.map(r => ({
  2024. file: r.filepath,
  2025. displayPath: r.displayPath,
  2026. title: r.title,
  2027. body: r.body || "",
  2028. score: r.score,
  2029. context: getContextForFile(db, r.filepath),
  2030. hash: r.hash,
  2031. docid: r.docid,
  2032. }));
  2033. closeDb();
  2034. if (resultsWithContext.length === 0) {
  2035. printEmptySearchResults(opts.format);
  2036. return;
  2037. }
  2038. outputResults(resultsWithContext, query, opts);
  2039. }
  2040. // Log query expansion as a tree to stderr (CLI progress feedback)
  2041. function logExpansionTree(originalQuery, expanded) {
  2042. const lines = [];
  2043. lines.push(`${c.dim}├─ ${originalQuery}${c.reset}`);
  2044. for (const q of expanded) {
  2045. let preview = q.query.replace(/\n/g, ' ');
  2046. if (preview.length > 72)
  2047. preview = preview.substring(0, 69) + '...';
  2048. lines.push(`${c.dim}├─ ${q.type}: ${preview}${c.reset}`);
  2049. }
  2050. if (lines.length > 0) {
  2051. lines[lines.length - 1] = lines[lines.length - 1].replace('├─', '└─');
  2052. }
  2053. for (const line of lines)
  2054. process.stderr.write(line + '\n');
  2055. }
  2056. async function vectorSearch(query, opts, _model = DEFAULT_EMBED_MODEL) {
  2057. const store = getStore();
  2058. // Validate collection filter (supports multiple -c flags)
  2059. // Use default collections if none specified
  2060. const collectionNames = resolveCollectionFilter(opts.collection, true);
  2061. const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
  2062. checkIndexHealth(store.db);
  2063. // Build embedding provider for query encoding (i-loazq6ze).
  2064. // Same precedence as `qmd embed`: explicit `--provider` flag → env vars →
  2065. // `~/.config/qmd/config.json` → default LocalLlamaCppProvider. The local
  2066. // default keeps zero-config callers on the legacy llama-cpp path with no
  2067. // observable change.
  2068. const embedProvider = opts.embedProvider;
  2069. await withLLMSession(async () => {
  2070. let results = await vectorSearchQuery(store, query, {
  2071. collection: singleCollection,
  2072. limit: opts.all ? 500 : (opts.limit || 10),
  2073. minScore: opts.minScore || 0.3,
  2074. intent: opts.intent,
  2075. ...(embedProvider ? { embedProvider } : {}),
  2076. hooks: {
  2077. onExpand: (original, expanded) => {
  2078. logExpansionTree(original, expanded);
  2079. process.stderr.write(`${c.dim}Searching ${expanded.length + 1} vector queries...${c.reset}\n`);
  2080. },
  2081. },
  2082. });
  2083. // Post-filter for multi-collection
  2084. if (collectionNames.length > 1) {
  2085. results = results.filter(r => {
  2086. const prefixes = collectionNames.map(n => `qmd://${n}/`);
  2087. return prefixes.some(p => r.file.startsWith(p));
  2088. });
  2089. }
  2090. closeDb();
  2091. if (results.length === 0) {
  2092. printEmptySearchResults(opts.format);
  2093. return;
  2094. }
  2095. outputResults(results.map(r => ({
  2096. file: r.file,
  2097. displayPath: r.displayPath,
  2098. title: r.title,
  2099. body: r.body,
  2100. score: r.score,
  2101. context: r.context,
  2102. docid: r.docid,
  2103. })), query, { ...opts, limit: results.length });
  2104. }, { maxDuration: 10 * 60 * 1000, name: 'vectorSearch' });
  2105. }
  2106. async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rerankModel = DEFAULT_RERANK_MODEL) {
  2107. const store = getStore();
  2108. // Validate collection filter (supports multiple -c flags)
  2109. // Use default collections if none specified
  2110. const collectionNames = resolveCollectionFilter(opts.collection, true);
  2111. const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
  2112. checkIndexHealth(store.db);
  2113. // Check for structured query syntax (lex:/vec:/hyde:/intent: prefixes)
  2114. const parsed = parseStructuredQuery(query);
  2115. // Intent can come from --intent flag or from intent: line in query document
  2116. const intent = opts.intent || parsed?.intent;
  2117. await withLLMSession(async () => {
  2118. let results;
  2119. if (parsed) {
  2120. const structuredQueries = parsed.searches;
  2121. // Structured search — user provided their own query expansions
  2122. const typeLabels = structuredQueries.map(s => s.type).join('+');
  2123. process.stderr.write(`${c.dim}Structured search: ${structuredQueries.length} queries (${typeLabels})${c.reset}\n`);
  2124. if (intent) {
  2125. process.stderr.write(`${c.dim}├─ intent: ${intent}${c.reset}\n`);
  2126. }
  2127. // Log each sub-query
  2128. for (const s of structuredQueries) {
  2129. let preview = s.query.replace(/\n/g, ' ');
  2130. if (preview.length > 72)
  2131. preview = preview.substring(0, 69) + '...';
  2132. process.stderr.write(`${c.dim}├─ ${s.type}: ${preview}${c.reset}\n`);
  2133. }
  2134. process.stderr.write(`${c.dim}└─ Searching...${c.reset}\n`);
  2135. results = await structuredSearch(store, structuredQueries, {
  2136. collections: singleCollection ? [singleCollection] : undefined,
  2137. limit: opts.all ? 500 : (opts.limit || 10),
  2138. minScore: opts.minScore || 0,
  2139. candidateLimit: opts.candidateLimit,
  2140. skipRerank: opts.skipRerank,
  2141. explain: !!opts.explain,
  2142. intent,
  2143. chunkStrategy: opts.chunkStrategy,
  2144. ...(opts.embedProvider ? { embedProvider: opts.embedProvider } : {}),
  2145. hooks: {
  2146. onEmbedStart: (count) => {
  2147. process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
  2148. },
  2149. onEmbedDone: (ms) => {
  2150. process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
  2151. },
  2152. onRerankStart: (chunkCount) => {
  2153. process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}`);
  2154. progress.indeterminate();
  2155. },
  2156. onRerankDone: (ms) => {
  2157. progress.clear();
  2158. process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
  2159. },
  2160. },
  2161. });
  2162. }
  2163. else {
  2164. // Standard hybrid query with automatic expansion
  2165. results = await hybridQuery(store, query, {
  2166. collection: singleCollection,
  2167. limit: opts.all ? 500 : (opts.limit || 10),
  2168. minScore: opts.minScore || 0,
  2169. candidateLimit: opts.candidateLimit,
  2170. skipRerank: opts.skipRerank,
  2171. explain: !!opts.explain,
  2172. intent,
  2173. chunkStrategy: opts.chunkStrategy,
  2174. ...(opts.embedProvider ? { embedProvider: opts.embedProvider } : {}),
  2175. hooks: {
  2176. onStrongSignal: (score) => {
  2177. process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
  2178. },
  2179. onExpandStart: () => {
  2180. process.stderr.write(`${c.dim}Expanding query...${c.reset}`);
  2181. },
  2182. onExpand: (original, expanded, ms) => {
  2183. process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
  2184. logExpansionTree(original, expanded);
  2185. process.stderr.write(`${c.dim}Searching ${expanded.length + 1} queries...${c.reset}\n`);
  2186. },
  2187. onEmbedStart: (count) => {
  2188. process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
  2189. },
  2190. onEmbedDone: (ms) => {
  2191. process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
  2192. },
  2193. onRerankStart: (chunkCount) => {
  2194. process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}`);
  2195. progress.indeterminate();
  2196. },
  2197. onRerankDone: (ms) => {
  2198. progress.clear();
  2199. process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
  2200. },
  2201. },
  2202. });
  2203. }
  2204. // Post-filter for multi-collection
  2205. if (collectionNames.length > 1) {
  2206. results = results.filter(r => {
  2207. const prefixes = collectionNames.map(n => `qmd://${n}/`);
  2208. return prefixes.some(p => r.file.startsWith(p));
  2209. });
  2210. }
  2211. closeDb();
  2212. if (results.length === 0) {
  2213. printEmptySearchResults(opts.format);
  2214. return;
  2215. }
  2216. // Use first lex/vec query for output context, or original query
  2217. const structuredQueries = parsed?.searches;
  2218. const displayQuery = structuredQueries
  2219. ? (structuredQueries.find(s => s.type === 'lex')?.query || structuredQueries.find(s => s.type === 'vec')?.query || query)
  2220. : query;
  2221. // Map to CLI output format — use bestChunk for snippet display
  2222. outputResults(results.map(r => ({
  2223. file: r.file,
  2224. displayPath: r.displayPath,
  2225. title: r.title,
  2226. body: r.bestChunk,
  2227. chunkPos: r.bestChunkPos,
  2228. score: r.score,
  2229. context: r.context,
  2230. docid: r.docid,
  2231. explain: r.explain,
  2232. })), displayQuery, { ...opts, limit: results.length });
  2233. }, { maxDuration: 10 * 60 * 1000, name: 'querySearch' });
  2234. }
  2235. // Parse CLI arguments using util.parseArgs
  2236. function parseCLI() {
  2237. const { values, positionals } = parseArgs({
  2238. args: process.argv.slice(2), // Skip node and script path
  2239. options: {
  2240. // Global options
  2241. index: {
  2242. type: "string",
  2243. },
  2244. context: {
  2245. type: "string",
  2246. },
  2247. help: { type: "boolean", short: "h" },
  2248. version: { type: "boolean", short: "v" },
  2249. skill: { type: "boolean" },
  2250. global: { type: "boolean" },
  2251. yes: { type: "boolean" },
  2252. // Search options
  2253. n: { type: "string" },
  2254. "min-score": { type: "string" },
  2255. all: { type: "boolean" },
  2256. full: { type: "boolean" },
  2257. csv: { type: "boolean" },
  2258. md: { type: "boolean" },
  2259. xml: { type: "boolean" },
  2260. files: { type: "boolean" },
  2261. json: { type: "boolean" },
  2262. explain: { type: "boolean" },
  2263. collection: { type: "string", short: "c", multiple: true }, // Filter by collection(s)
  2264. // Collection options
  2265. name: { type: "string" }, // collection name
  2266. mask: { type: "string" }, // glob pattern
  2267. // Embed options
  2268. force: { type: "boolean", short: "f" },
  2269. "max-docs-per-batch": { type: "string" },
  2270. "max-batch-mb": { type: "string" },
  2271. provider: { type: "string" }, // "local" | "openai"
  2272. "embed-endpoint": { type: "string" }, // OpenAI-compatible endpoint URL
  2273. "embed-api-key": { type: "string" }, // Bearer token
  2274. "embed-model-id": { type: "string" }, // Stable model id (default: embeddinggemma)
  2275. "embed-upstream-model": { type: "string" }, // Upstream model name in HTTP body
  2276. "embed-batch-size": { type: "string" }, // Batch size for HTTP provider
  2277. "embed-timeout-ms": { type: "string" }, // Per-request timeout
  2278. "embed-auto-fallback": { type: "boolean" }, // Wrap openai in AutoFallback (local fallback)
  2279. // Update options
  2280. pull: { type: "boolean" }, // git pull before update
  2281. refresh: { type: "boolean" },
  2282. // Get options
  2283. l: { type: "string" }, // max lines
  2284. from: { type: "string" }, // start line
  2285. "max-bytes": { type: "string" }, // max bytes for multi-get
  2286. "line-numbers": { type: "boolean" }, // add line numbers to output
  2287. // Query options
  2288. "candidate-limit": { type: "string", short: "C" },
  2289. "no-rerank": { type: "boolean", default: false },
  2290. intent: { type: "string" },
  2291. // Chunking options
  2292. "chunk-strategy": { type: "string" }, // "regex" (default) or "auto" (AST for code files)
  2293. // MCP HTTP transport options
  2294. http: { type: "boolean" },
  2295. daemon: { type: "boolean" },
  2296. port: { type: "string" },
  2297. },
  2298. allowPositionals: true,
  2299. strict: false, // Allow unknown options to pass through
  2300. });
  2301. // Select index name (default: "index")
  2302. const indexName = values.index;
  2303. if (indexName) {
  2304. setIndexName(indexName);
  2305. setConfigIndexName(indexName);
  2306. }
  2307. // Determine output format
  2308. let format = "cli";
  2309. if (values.csv)
  2310. format = "csv";
  2311. else if (values.md)
  2312. format = "md";
  2313. else if (values.xml)
  2314. format = "xml";
  2315. else if (values.files)
  2316. format = "files";
  2317. else if (values.json)
  2318. format = "json";
  2319. // Default limit: 20 for --files/--json, 5 otherwise
  2320. // --all means return all results (use very large limit)
  2321. const defaultLimit = (format === "files" || format === "json") ? 20 : 5;
  2322. const isAll = !!values.all;
  2323. const opts = {
  2324. format,
  2325. full: !!values.full,
  2326. limit: isAll ? 100000 : (values.n ? parseInt(String(values.n), 10) || defaultLimit : defaultLimit),
  2327. minScore: values["min-score"] ? parseFloat(String(values["min-score"])) || 0 : 0,
  2328. all: isAll,
  2329. collection: values.collection,
  2330. lineNumbers: !!values["line-numbers"],
  2331. candidateLimit: values["candidate-limit"] ? parseInt(String(values["candidate-limit"]), 10) : undefined,
  2332. skipRerank: !!values["no-rerank"],
  2333. explain: !!values.explain,
  2334. intent: values.intent,
  2335. chunkStrategy: parseChunkStrategy(values["chunk-strategy"]),
  2336. };
  2337. return {
  2338. command: positionals[0] || "",
  2339. args: positionals.slice(1),
  2340. query: positionals.slice(1).join(" "),
  2341. opts,
  2342. values,
  2343. };
  2344. }
  2345. function getSkillInstallDir(globalInstall) {
  2346. return globalInstall
  2347. ? resolve(homedir(), ".agents", "skills", "qmd")
  2348. : resolve(getPwd(), ".agents", "skills", "qmd");
  2349. }
  2350. function getClaudeSkillLinkPath(globalInstall) {
  2351. return globalInstall
  2352. ? resolve(homedir(), ".claude", "skills", "qmd")
  2353. : resolve(getPwd(), ".claude", "skills", "qmd");
  2354. }
  2355. function pathExists(path) {
  2356. try {
  2357. lstatSync(path);
  2358. return true;
  2359. }
  2360. catch {
  2361. return false;
  2362. }
  2363. }
  2364. function removePath(path) {
  2365. const stat = lstatSync(path);
  2366. if (stat.isDirectory() && !stat.isSymbolicLink()) {
  2367. rmSync(path, { recursive: true, force: true });
  2368. }
  2369. else {
  2370. unlinkSync(path);
  2371. }
  2372. }
  2373. function showSkill() {
  2374. console.log("QMD Skill (embedded)");
  2375. console.log("");
  2376. const content = getEmbeddedQmdSkillContent();
  2377. process.stdout.write(content.endsWith("\n") ? content : content + "\n");
  2378. }
  2379. function writeEmbeddedSkill(targetDir, force) {
  2380. if (pathExists(targetDir)) {
  2381. if (!force) {
  2382. throw new Error(`Skill already exists: ${targetDir} (use --force to replace it)`);
  2383. }
  2384. removePath(targetDir);
  2385. }
  2386. mkdirSync(targetDir, { recursive: true });
  2387. for (const file of getEmbeddedQmdSkillFiles()) {
  2388. const destination = resolve(targetDir, file.relativePath);
  2389. mkdirSync(dirname(destination), { recursive: true });
  2390. writeFileSync(destination, file.content, "utf-8");
  2391. }
  2392. }
  2393. function ensureClaudeSymlink(linkPath, targetDir, force) {
  2394. const parentDir = dirname(linkPath);
  2395. if (pathExists(parentDir)) {
  2396. const resolvedTargetDir = realpathSync(dirname(targetDir));
  2397. const resolvedLinkParent = realpathSync(parentDir);
  2398. // If .claude/skills already resolves to the same directory as .agents/skills,
  2399. // the skill is already visible to Claude and creating qmd -> qmd would loop.
  2400. if (resolvedTargetDir === resolvedLinkParent) {
  2401. return false;
  2402. }
  2403. }
  2404. const linkTarget = relativePath(parentDir, targetDir) || ".";
  2405. mkdirSync(parentDir, { recursive: true });
  2406. if (pathExists(linkPath)) {
  2407. const stat = lstatSync(linkPath);
  2408. if (stat.isSymbolicLink() && readlinkSync(linkPath) === linkTarget) {
  2409. return true;
  2410. }
  2411. if (!force) {
  2412. throw new Error(`Claude skill path already exists: ${linkPath} (use --force to replace it)`);
  2413. }
  2414. removePath(linkPath);
  2415. }
  2416. symlinkSync(linkTarget, linkPath, "dir");
  2417. return true;
  2418. }
  2419. async function shouldCreateClaudeSymlink(linkPath, autoYes) {
  2420. if (autoYes) {
  2421. return true;
  2422. }
  2423. if (!process.stdin.isTTY || !process.stdout.isTTY) {
  2424. console.log(`Tip: create a Claude symlink manually at ${linkPath}`);
  2425. return false;
  2426. }
  2427. const rl = createInterface({
  2428. input: process.stdin,
  2429. output: process.stdout,
  2430. });
  2431. try {
  2432. const answer = await rl.question(`Create a symlink in ${linkPath}? [y/N] `);
  2433. const normalized = answer.trim().toLowerCase();
  2434. return normalized === "y" || normalized === "yes";
  2435. }
  2436. finally {
  2437. rl.close();
  2438. }
  2439. }
  2440. async function installSkill(globalInstall, force, autoYes) {
  2441. const installDir = getSkillInstallDir(globalInstall);
  2442. writeEmbeddedSkill(installDir, force);
  2443. console.log(`✓ Installed QMD skill to ${installDir}`);
  2444. const claudeLinkPath = getClaudeSkillLinkPath(globalInstall);
  2445. if (!(await shouldCreateClaudeSymlink(claudeLinkPath, autoYes))) {
  2446. return;
  2447. }
  2448. const linked = ensureClaudeSymlink(claudeLinkPath, installDir, force);
  2449. if (linked) {
  2450. console.log(`✓ Linked Claude skill at ${claudeLinkPath}`);
  2451. }
  2452. else {
  2453. console.log(`✓ Claude already sees the skill via ${dirname(claudeLinkPath)}`);
  2454. }
  2455. }
  2456. function showHelp() {
  2457. console.log("qmd — Quick Markdown Search");
  2458. console.log("");
  2459. console.log("Usage:");
  2460. console.log(" qmd <command> [options]");
  2461. console.log("");
  2462. console.log("Primary commands:");
  2463. console.log(" qmd query <query> - Hybrid search with auto expansion + reranking (recommended)");
  2464. console.log(" qmd query 'lex:..\\nvec:...' - Structured query document (you provide lex/vec/hyde lines)");
  2465. console.log(" qmd search <query> - Full-text BM25 keywords (no LLM)");
  2466. console.log(" qmd vsearch <query> - Vector similarity only");
  2467. console.log(" qmd get <file>[:line] [-l N] - Show a single document, optional line slice");
  2468. console.log(" qmd multi-get <pattern> - Batch fetch via glob or comma-separated list");
  2469. console.log(" qmd skill show/install - Show or install the packaged QMD skill");
  2470. console.log(" qmd mcp - Start the MCP server (stdio transport for AI agents)");
  2471. console.log(" qmd bench <fixture.json> - Run search quality benchmarks against a fixture file");
  2472. console.log("");
  2473. console.log("Collections & context:");
  2474. console.log(" qmd collection add/list/remove/rename/show - Manage indexed folders");
  2475. console.log(" qmd context add/list/rm - Attach human-written summaries");
  2476. console.log(" qmd ls [collection[/path]] - Inspect indexed files");
  2477. console.log("");
  2478. console.log("Maintenance:");
  2479. console.log(" qmd status - View index + collection health");
  2480. console.log(" qmd update [<collection>|--all] [--pull]");
  2481. console.log(" - Re-index collections (positional name limits to one;");
  2482. console.log(" no arg or --all = every collection; --pull = git pull first)");
  2483. console.log(" qmd embed [<collection>|--all] [-f]");
  2484. console.log(" - Generate/refresh vector embeddings");
  2485. console.log(" (positional name limits to one collection; no arg or --all = all;");
  2486. console.log(" -f clears + re-embeds ALL vectors fleet-wide, incompatible with <collection>)");
  2487. console.log(" --max-docs-per-batch <n> - Cap docs loaded into memory per embedding batch");
  2488. console.log(" --max-batch-mb <n> - Cap UTF-8 MB loaded into memory per embedding batch");
  2489. console.log(" --provider {local,openai} - Embedding backend (default: local llama.cpp)");
  2490. console.log(" --embed-endpoint <url> - OpenAI-compatible endpoint (or QMD_EMBED_ENDPOINT)");
  2491. console.log(" --embed-api-key <key> - Bearer token (or QMD_EMBED_API_KEY)");
  2492. console.log(" --embed-model-id <id> - Stable model id stored in DB (default: embeddinggemma)");
  2493. console.log(" --embed-upstream-model <m> - Model name sent in HTTP body (default: same as model-id)");
  2494. console.log(" --embed-batch-size <n> - Batch size for HTTP provider (default: 64)");
  2495. console.log(" --embed-timeout-ms <n> - Per-request timeout in ms (default: 30000)");
  2496. console.log(" --embed-auto-fallback - Wrap openai provider in local fallback (or QMD_EMBED_AUTO_FALLBACK)");
  2497. console.log(" qmd cleanup - Clear caches, vacuum DB");
  2498. console.log("");
  2499. console.log("Query syntax (qmd query):");
  2500. console.log(" QMD queries are either a single expand query (no prefix) or a multi-line");
  2501. console.log(" document where every line is typed with lex:, vec:, or hyde:. This grammar");
  2502. console.log(" matches the docs in docs/SYNTAX.md and is enforced in the CLI.");
  2503. console.log("");
  2504. const grammar = [
  2505. `query = expand_query | query_document ;`,
  2506. `expand_query = text | explicit_expand ;`,
  2507. `explicit_expand= "expand:" text ;`,
  2508. `query_document = [ intent_line ] { typed_line } ;`,
  2509. `intent_line = "intent:" text newline ;`,
  2510. `typed_line = type ":" text newline ;`,
  2511. `type = "lex" | "vec" | "hyde" ;`,
  2512. `text = quoted_phrase | plain_text ;`,
  2513. `quoted_phrase = '"' { character } '"' ;`,
  2514. `plain_text = { character } ;`,
  2515. `newline = "\\n" ;`,
  2516. ];
  2517. console.log(" Grammar:");
  2518. for (const line of grammar) {
  2519. console.log(` ${line}`);
  2520. }
  2521. console.log("");
  2522. console.log(" Examples:");
  2523. console.log(" qmd query \"how does auth work\" # single-line → implicit expand");
  2524. console.log(" qmd query $'lex: CAP theorem\\nvec: consistency' # typed query document");
  2525. console.log(" qmd query $'lex: \"exact matches\" sports -baseball' # phrase + negation lex search");
  2526. console.log(" qmd query $'hyde: Hypothetical answer text' # hyde-only document");
  2527. console.log("");
  2528. console.log(" Constraints:");
  2529. console.log(" - Standalone expand queries cannot mix with typed lines.");
  2530. console.log(" - Query documents allow only lex:, vec:, or hyde: prefixes.");
  2531. console.log(" - Each typed line must be single-line text with balanced quotes.");
  2532. console.log("");
  2533. console.log("AI agents & integrations:");
  2534. console.log(" - Run `qmd mcp` to expose the MCP server (stdio) to agents/IDEs.");
  2535. console.log(" - `qmd skill install` installs the QMD skill into ./.agents/skills/qmd.");
  2536. console.log(" - Use `qmd skill install --global` for ~/.agents/skills/qmd.");
  2537. console.log(" - `qmd --skill` is kept as an alias for `qmd skill show`.");
  2538. console.log(" - Advanced: `qmd mcp --http ...` and `qmd mcp --http --daemon` are optional for custom transports.");
  2539. console.log("");
  2540. console.log("Global options:");
  2541. console.log(" --index <name> - Use a named index (default: index)");
  2542. console.log(" QMD_EDITOR_URI - Editor link template for clickable TTY search output");
  2543. console.log("");
  2544. console.log("Search options:");
  2545. console.log(" -n <num> - Max results (default 5, or 20 for --files/--json)");
  2546. console.log(" --all - Return all matches (pair with --min-score)");
  2547. console.log(" --min-score <num> - Minimum similarity score");
  2548. console.log(" --full - Output full document instead of snippet");
  2549. console.log(" -C, --candidate-limit <n> - Max candidates to rerank (default 40, lower = faster)");
  2550. console.log(" --no-rerank - Skip LLM reranking (use RRF scores only, much faster on CPU)");
  2551. console.log(" --line-numbers - Include line numbers in output");
  2552. console.log(" --explain - Include retrieval score traces (query --json/CLI)");
  2553. console.log(" --files | --json | --csv | --md | --xml - Output format");
  2554. console.log(" -c, --collection <name> - Filter by one or more collections");
  2555. console.log("");
  2556. console.log("Embed/query options:");
  2557. console.log(" --chunk-strategy <auto|regex> - Chunking mode (default: regex; auto uses AST for code files)");
  2558. console.log("");
  2559. console.log("Multi-get options:");
  2560. console.log(" -l <num> - Maximum lines per file");
  2561. console.log(" --max-bytes <num> - Skip files larger than N bytes (default 10240)");
  2562. console.log(" --json/--csv/--md/--xml/--files - Same formats as search");
  2563. console.log("");
  2564. console.log(`Index: ${getDbPath()}`);
  2565. }
  2566. async function showVersion() {
  2567. const scriptDir = dirname(fileURLToPath(import.meta.url));
  2568. const pkgPath = resolve(scriptDir, "..", "..", "package.json");
  2569. const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
  2570. let commit = "";
  2571. try {
  2572. commit = execSync(`git -C ${scriptDir} rev-parse --short HEAD`, { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
  2573. }
  2574. catch {
  2575. // Not a git repo or git not available
  2576. }
  2577. const versionStr = commit ? `${pkg.version} (${commit})` : pkg.version;
  2578. console.log(`qmd ${versionStr}`);
  2579. }
  2580. // Main CLI - only run if this is the main module
  2581. const __filename = fileURLToPath(import.meta.url);
  2582. const argv1 = process.argv[1];
  2583. const isMain = argv1 === __filename
  2584. || argv1?.endsWith("/qmd.ts")
  2585. || argv1?.endsWith("/qmd.js")
  2586. || (argv1 != null && realpathSync(argv1) === __filename);
  2587. if (isMain) {
  2588. const cli = parseCLI();
  2589. if (cli.values.version) {
  2590. await showVersion();
  2591. process.exit(0);
  2592. }
  2593. if (cli.values.skill) {
  2594. showSkill();
  2595. process.exit(0);
  2596. }
  2597. if (cli.values.help && cli.command === "skill") {
  2598. console.log("Usage: qmd skill <show|install> [options]");
  2599. console.log("");
  2600. console.log("Commands:");
  2601. console.log(" show Print the packaged QMD skill");
  2602. console.log(" install Install into ./.agents/skills/qmd");
  2603. console.log("");
  2604. console.log("Options:");
  2605. console.log(" --global Install into ~/.agents/skills/qmd");
  2606. console.log(" --yes Also create the .claude/skills/qmd symlink");
  2607. console.log(" -f, --force Replace existing install or symlink");
  2608. process.exit(0);
  2609. }
  2610. if (!cli.command || cli.values.help) {
  2611. showHelp();
  2612. process.exit(cli.values.help ? 0 : 1);
  2613. }
  2614. switch (cli.command) {
  2615. case "context": {
  2616. const subcommand = cli.args[0];
  2617. if (!subcommand) {
  2618. console.error("Usage: qmd context <add|list|rm>");
  2619. console.error("");
  2620. console.error("Commands:");
  2621. console.error(" qmd context add [path] \"text\" - Add context (defaults to current dir)");
  2622. console.error(" qmd context add / \"text\" - Add global context to all collections");
  2623. console.error(" qmd context list - List all contexts");
  2624. console.error(" qmd context rm <path> - Remove context");
  2625. process.exit(1);
  2626. }
  2627. switch (subcommand) {
  2628. case "add": {
  2629. if (cli.args.length < 2) {
  2630. console.error("Usage: qmd context add [path] \"text\"");
  2631. console.error("");
  2632. console.error("Examples:");
  2633. console.error(" qmd context add \"Context for current directory\"");
  2634. console.error(" qmd context add . \"Context for current directory\"");
  2635. console.error(" qmd context add /subfolder \"Context for subfolder\"");
  2636. console.error(" qmd context add / \"Global context for all collections\"");
  2637. console.error("");
  2638. console.error(" Using virtual paths:");
  2639. console.error(" qmd context add qmd://journals/ \"Context for entire journals collection\"");
  2640. console.error(" qmd context add qmd://journals/2024 \"Context for 2024 journals\"");
  2641. process.exit(1);
  2642. }
  2643. let pathArg;
  2644. let contextText;
  2645. // Check if first arg looks like a path or if it's the context text
  2646. const firstArg = cli.args[1] || '';
  2647. const secondArg = cli.args[2];
  2648. if (secondArg) {
  2649. // Two args: path + context
  2650. pathArg = firstArg;
  2651. contextText = cli.args.slice(2).join(" ");
  2652. }
  2653. else {
  2654. // One arg: context only (use current directory)
  2655. pathArg = undefined;
  2656. contextText = firstArg;
  2657. }
  2658. await contextAdd(pathArg, contextText);
  2659. break;
  2660. }
  2661. case "list": {
  2662. contextList();
  2663. break;
  2664. }
  2665. case "rm":
  2666. case "remove": {
  2667. if (cli.args.length < 2 || !cli.args[1]) {
  2668. console.error("Usage: qmd context rm <path>");
  2669. console.error("Examples:");
  2670. console.error(" qmd context rm /");
  2671. console.error(" qmd context rm qmd://journals/2024");
  2672. process.exit(1);
  2673. }
  2674. contextRemove(cli.args[1]);
  2675. break;
  2676. }
  2677. default:
  2678. console.error(`Unknown subcommand: ${subcommand}`);
  2679. console.error("Available: add, list, rm");
  2680. process.exit(1);
  2681. }
  2682. break;
  2683. }
  2684. case "get": {
  2685. if (!cli.args[0]) {
  2686. console.error("Usage: qmd get <filepath>[:line] [--from <line>] [-l <lines>] [--line-numbers]");
  2687. process.exit(1);
  2688. }
  2689. const fromLine = cli.values.from ? parseInt(cli.values.from, 10) : undefined;
  2690. const maxLines = cli.values.l ? parseInt(cli.values.l, 10) : undefined;
  2691. getDocument(cli.args[0], fromLine, maxLines, cli.opts.lineNumbers);
  2692. break;
  2693. }
  2694. case "multi-get": {
  2695. if (!cli.args[0]) {
  2696. console.error("Usage: qmd multi-get <pattern> [-l <lines>] [--max-bytes <bytes>] [--json|--csv|--md|--xml|--files]");
  2697. console.error(" pattern: glob (e.g., 'journals/2025-05*.md') or comma-separated list");
  2698. process.exit(1);
  2699. }
  2700. const maxLinesMulti = cli.values.l ? parseInt(cli.values.l, 10) : undefined;
  2701. const maxBytes = cli.values["max-bytes"] ? parseInt(cli.values["max-bytes"], 10) : DEFAULT_MULTI_GET_MAX_BYTES;
  2702. multiGet(cli.args[0], maxLinesMulti, maxBytes, cli.opts.format);
  2703. break;
  2704. }
  2705. case "ls": {
  2706. listFiles(cli.args[0]);
  2707. break;
  2708. }
  2709. case "collection": {
  2710. const subcommand = cli.args[0];
  2711. switch (subcommand) {
  2712. case "list": {
  2713. collectionList();
  2714. break;
  2715. }
  2716. case "add": {
  2717. const pwd = cli.args[1] || getPwd();
  2718. const resolvedPwd = pwd === '.' ? getPwd() : getRealPath(resolve(pwd));
  2719. const globPattern = cli.values.mask || DEFAULT_GLOB;
  2720. const name = cli.values.name;
  2721. await collectionAdd(resolvedPwd, globPattern, name);
  2722. break;
  2723. }
  2724. case "remove":
  2725. case "rm": {
  2726. if (!cli.args[1]) {
  2727. console.error("Usage: qmd collection remove <name>");
  2728. console.error(" Use 'qmd collection list' to see available collections");
  2729. process.exit(1);
  2730. }
  2731. collectionRemove(cli.args[1]);
  2732. break;
  2733. }
  2734. case "rename":
  2735. case "mv": {
  2736. if (!cli.args[1] || !cli.args[2]) {
  2737. console.error("Usage: qmd collection rename <old-name> <new-name>");
  2738. console.error(" Use 'qmd collection list' to see available collections");
  2739. process.exit(1);
  2740. }
  2741. collectionRename(cli.args[1], cli.args[2]);
  2742. break;
  2743. }
  2744. case "set-update":
  2745. case "update-cmd": {
  2746. const name = cli.args[1];
  2747. const cmd = cli.args.slice(2).join(' ') || null;
  2748. if (!name) {
  2749. console.error("Usage: qmd collection update-cmd <name> [command]");
  2750. console.error(" Set the command to run before indexing (e.g., 'git pull')");
  2751. console.error(" Omit command to clear it");
  2752. process.exit(1);
  2753. }
  2754. const { updateCollectionSettings, getCollection } = await import("../collections.js");
  2755. const col = getCollection(name);
  2756. if (!col) {
  2757. console.error(`Collection not found: ${name}`);
  2758. process.exit(1);
  2759. }
  2760. updateCollectionSettings(name, { update: cmd });
  2761. if (cmd) {
  2762. console.log(`✓ Set update command for '${name}': ${cmd}`);
  2763. }
  2764. else {
  2765. console.log(`✓ Cleared update command for '${name}'`);
  2766. }
  2767. break;
  2768. }
  2769. case "include":
  2770. case "exclude": {
  2771. const name = cli.args[1];
  2772. if (!name) {
  2773. console.error(`Usage: qmd collection ${subcommand} <name>`);
  2774. console.error(` ${subcommand === 'include' ? 'Include' : 'Exclude'} collection in default queries`);
  2775. process.exit(1);
  2776. }
  2777. const { updateCollectionSettings, getCollection } = await import("../collections.js");
  2778. const col = getCollection(name);
  2779. if (!col) {
  2780. console.error(`Collection not found: ${name}`);
  2781. process.exit(1);
  2782. }
  2783. const include = subcommand === 'include';
  2784. updateCollectionSettings(name, { includeByDefault: include });
  2785. console.log(`✓ Collection '${name}' ${include ? 'included in' : 'excluded from'} default queries`);
  2786. break;
  2787. }
  2788. case "show":
  2789. case "info": {
  2790. const name = cli.args[1];
  2791. if (!name) {
  2792. console.error("Usage: qmd collection show <name>");
  2793. process.exit(1);
  2794. }
  2795. const { getCollection } = await import("../collections.js");
  2796. const col = getCollection(name);
  2797. if (!col) {
  2798. console.error(`Collection not found: ${name}`);
  2799. process.exit(1);
  2800. }
  2801. console.log(`Collection: ${name}`);
  2802. console.log(` Path: ${col.path}`);
  2803. console.log(` Pattern: ${col.pattern}`);
  2804. console.log(` Include: ${col.includeByDefault !== false ? 'yes (default)' : 'no'}`);
  2805. if (col.update) {
  2806. console.log(` Update: ${col.update}`);
  2807. }
  2808. if (col.context) {
  2809. const ctxCount = Object.keys(col.context).length;
  2810. console.log(` Contexts: ${ctxCount}`);
  2811. }
  2812. break;
  2813. }
  2814. case "help":
  2815. case undefined: {
  2816. console.log("Usage: qmd collection <command> [options]");
  2817. console.log("");
  2818. console.log("Commands:");
  2819. console.log(" list List all collections");
  2820. console.log(" add <path> [--name NAME] Add a collection");
  2821. console.log(" remove <name> Remove a collection");
  2822. console.log(" rename <old> <new> Rename a collection");
  2823. console.log(" show <name> Show collection details");
  2824. console.log(" update-cmd <name> [cmd] Set pre-update command (e.g., 'git pull')");
  2825. console.log(" include <name> Include in default queries");
  2826. console.log(" exclude <name> Exclude from default queries");
  2827. console.log("");
  2828. console.log("Examples:");
  2829. console.log(" qmd collection add ~/notes --name notes");
  2830. console.log(" qmd collection update-cmd brain 'git pull'");
  2831. console.log(" qmd collection exclude archive");
  2832. process.exit(0);
  2833. }
  2834. default:
  2835. console.error(`Unknown subcommand: ${subcommand}`);
  2836. console.error("Run 'qmd collection help' for usage");
  2837. process.exit(1);
  2838. }
  2839. break;
  2840. }
  2841. case "status":
  2842. await showStatus();
  2843. break;
  2844. case "update": {
  2845. // i-ofojj7dy — `qmd update <collection>` filters to a single collection;
  2846. // `qmd update --all` or `qmd update` (no arg) preserves full-fleet behavior.
  2847. // `--all` together with a positional name errors out to avoid silent
  2848. // disagreement between the two intents.
  2849. const updateCollectionArg = cli.args[0];
  2850. const updateAllFlag = !!cli.values.all;
  2851. if (updateAllFlag && updateCollectionArg !== undefined) {
  2852. console.error(`${c.red}Conflicting arguments: --all cannot be combined with a positional collection name.${c.reset}`);
  2853. console.error(`${c.dim}Use 'qmd update --all' for every collection OR 'qmd update <name>' for one.${c.reset}`);
  2854. process.exit(1);
  2855. }
  2856. const updateFilter = updateAllFlag ? undefined : updateCollectionArg;
  2857. await updateCollections(updateFilter);
  2858. break;
  2859. }
  2860. case "embed":
  2861. try {
  2862. const maxDocsPerBatch = parseEmbedBatchOption("maxDocsPerBatch", cli.values["max-docs-per-batch"]);
  2863. const maxBatchMb = parseEmbedBatchOption("maxBatchBytes", cli.values["max-batch-mb"]);
  2864. const embedChunkStrategy = parseChunkStrategy(cli.values["chunk-strategy"]);
  2865. // i-ofojj7dy — `qmd embed <collection>` filters pending-embedding
  2866. // candidates to documents in that collection. `--all` together with a
  2867. // positional name is an explicit error.
  2868. const embedCollectionArg = cli.args[0];
  2869. const embedAllFlag = !!cli.values.all;
  2870. if (embedAllFlag && embedCollectionArg !== undefined) {
  2871. console.error(`${c.red}Conflicting arguments: --all cannot be combined with a positional collection name.${c.reset}`);
  2872. console.error(`${c.dim}Use 'qmd embed --all' for every collection OR 'qmd embed <name>' for one.${c.reset}`);
  2873. process.exit(1);
  2874. }
  2875. const embedCollectionFilter = embedAllFlag ? undefined : embedCollectionArg;
  2876. // Build embedding provider from CLI flags + env + config file.
  2877. // Backward compat: with no flags / env vars, the factory returns
  2878. // a LocalLlamaCppProvider that delegates to the default LlamaCpp
  2879. // singleton — identical to pre-patch behavior.
  2880. const providerCliKind = parseProviderKind(cli.values["provider"]);
  2881. const providerOpts = buildProviderOpts(cli.values, providerCliKind);
  2882. const embedProvider = createEmbeddingProvider(providerOpts);
  2883. await vectorIndex(DEFAULT_EMBED_MODEL_URI, !!cli.values.force, {
  2884. maxDocsPerBatch,
  2885. maxBatchBytes: maxBatchMb === undefined ? undefined : maxBatchMb * 1024 * 1024,
  2886. chunkStrategy: embedChunkStrategy,
  2887. embedProvider,
  2888. providerKind: embedProvider.kind,
  2889. collection: embedCollectionFilter,
  2890. });
  2891. }
  2892. catch (error) {
  2893. if (error instanceof ModelMismatchError) {
  2894. // Friendlier output for the migration-safety guard
  2895. console.error(`${c.red}Model mismatch:${c.reset} ${error.message}`);
  2896. }
  2897. else {
  2898. console.error(error instanceof Error ? error.message : String(error));
  2899. }
  2900. process.exit(1);
  2901. }
  2902. break;
  2903. case "pull": {
  2904. const refresh = cli.values.refresh === undefined ? false : Boolean(cli.values.refresh);
  2905. const models = [
  2906. DEFAULT_EMBED_MODEL_URI,
  2907. DEFAULT_GENERATE_MODEL_URI,
  2908. DEFAULT_RERANK_MODEL_URI,
  2909. ];
  2910. console.log(`${c.bold}Pulling models${c.reset}`);
  2911. const results = await pullModels(models, {
  2912. refresh,
  2913. cacheDir: DEFAULT_MODEL_CACHE_DIR,
  2914. });
  2915. for (const result of results) {
  2916. const size = formatBytes(result.sizeBytes);
  2917. const note = result.refreshed ? "refreshed" : "cached/checked";
  2918. console.log(`- ${result.model} -> ${result.path} (${size}, ${note})`);
  2919. }
  2920. break;
  2921. }
  2922. case "search":
  2923. if (!cli.query) {
  2924. console.error("Usage: qmd search [options] <query>");
  2925. process.exit(1);
  2926. }
  2927. search(cli.query, cli.opts);
  2928. break;
  2929. case "vsearch":
  2930. case "vector-search": // undocumented alias
  2931. if (!cli.query) {
  2932. console.error("Usage: qmd vsearch [options] <query>");
  2933. process.exit(1);
  2934. }
  2935. // Default min-score for vector search is 0.3
  2936. if (!cli.values["min-score"]) {
  2937. cli.opts.minScore = 0.3;
  2938. }
  2939. // Build query-side embedding provider (i-loazq6ze).
  2940. // Returns undefined for zero-config callers (legacy local path).
  2941. cli.opts.embedProvider = buildQueryEmbedProvider(cli.values);
  2942. await vectorSearch(cli.query, cli.opts);
  2943. break;
  2944. case "query":
  2945. case "deep-search": // undocumented alias
  2946. if (!cli.query) {
  2947. console.error("Usage: qmd query [options] <query>");
  2948. process.exit(1);
  2949. }
  2950. cli.opts.embedProvider = buildQueryEmbedProvider(cli.values);
  2951. await querySearch(cli.query, cli.opts);
  2952. break;
  2953. case "bench": {
  2954. const fixturePath = cli.args[0];
  2955. if (!fixturePath) {
  2956. console.error("Usage: qmd bench <fixture.json> [--json] [-c collection]");
  2957. console.error("");
  2958. console.error("Run search quality benchmarks against a fixture file.");
  2959. console.error("See src/bench/fixtures/example.json for the fixture format.");
  2960. process.exit(1);
  2961. }
  2962. const { runBenchmark } = await import("../bench/bench.js");
  2963. const benchCollection = cli.opts.collection;
  2964. await runBenchmark(fixturePath, {
  2965. json: !!cli.opts.json,
  2966. collection: Array.isArray(benchCollection) ? benchCollection[0] : benchCollection,
  2967. });
  2968. break;
  2969. }
  2970. case "mcp": {
  2971. const sub = cli.args[0]; // stop | status | undefined
  2972. // Cache dir for PID/log files — same dir as the index
  2973. const cacheDir = process.env.XDG_CACHE_HOME
  2974. ? resolve(process.env.XDG_CACHE_HOME, "qmd")
  2975. : resolve(homedir(), ".cache", "qmd");
  2976. const pidPath = resolve(cacheDir, "mcp.pid");
  2977. // Subcommands take priority over flags
  2978. if (sub === "stop") {
  2979. if (!existsSync(pidPath)) {
  2980. console.log("Not running (no PID file).");
  2981. process.exit(0);
  2982. }
  2983. const pid = parseInt(readFileSync(pidPath, "utf-8").trim());
  2984. try {
  2985. process.kill(pid, 0); // alive?
  2986. process.kill(pid, "SIGTERM");
  2987. unlinkSync(pidPath);
  2988. console.log(`Stopped QMD MCP server (PID ${pid}).`);
  2989. }
  2990. catch {
  2991. unlinkSync(pidPath);
  2992. console.log("Cleaned up stale PID file (server was not running).");
  2993. }
  2994. process.exit(0);
  2995. }
  2996. if (cli.values.http) {
  2997. const port = Number(cli.values.port) || 8181;
  2998. if (cli.values.daemon) {
  2999. // Guard: check if already running
  3000. if (existsSync(pidPath)) {
  3001. const existingPid = parseInt(readFileSync(pidPath, "utf-8").trim());
  3002. try {
  3003. process.kill(existingPid, 0); // alive?
  3004. console.error(`Already running (PID ${existingPid}). Run 'qmd mcp stop' first.`);
  3005. process.exit(1);
  3006. }
  3007. catch {
  3008. // Stale PID file — continue
  3009. }
  3010. }
  3011. mkdirSync(cacheDir, { recursive: true });
  3012. const logPath = resolve(cacheDir, "mcp.log");
  3013. const logFd = openSync(logPath, "w"); // truncate — fresh log per daemon run
  3014. const selfPath = fileURLToPath(import.meta.url);
  3015. const spawnArgs = selfPath.endsWith(".ts")
  3016. ? ["--import", pathJoin(dirname(selfPath), "..", "..", "node_modules", "tsx", "dist", "esm", "index.mjs"), selfPath, "mcp", "--http", "--port", String(port)]
  3017. : [selfPath, "mcp", "--http", "--port", String(port)];
  3018. const child = nodeSpawn(process.execPath, spawnArgs, {
  3019. stdio: ["ignore", logFd, logFd],
  3020. detached: true,
  3021. });
  3022. child.unref();
  3023. closeSync(logFd); // parent's copy; child inherited the fd
  3024. writeFileSync(pidPath, String(child.pid));
  3025. console.log(`Started on http://localhost:${port}/mcp (PID ${child.pid})`);
  3026. console.log(`Logs: ${logPath}`);
  3027. process.exit(0);
  3028. }
  3029. // Foreground HTTP mode — remove top-level cursor handlers so the
  3030. // async cleanup handlers in startMcpHttpServer actually run.
  3031. process.removeAllListeners("SIGTERM");
  3032. process.removeAllListeners("SIGINT");
  3033. const { startMcpHttpServer } = await import("../mcp/server.js");
  3034. try {
  3035. await startMcpHttpServer(port);
  3036. }
  3037. catch (e) {
  3038. if (e?.code === "EADDRINUSE") {
  3039. console.error(`Port ${port} already in use. Try a different port with --port.`);
  3040. process.exit(1);
  3041. }
  3042. throw e;
  3043. }
  3044. }
  3045. else {
  3046. // Default: stdio transport
  3047. const { startMcpServer } = await import("../mcp/server.js");
  3048. await startMcpServer();
  3049. }
  3050. break;
  3051. }
  3052. case "skill": {
  3053. const subcommand = cli.args[0];
  3054. switch (subcommand) {
  3055. case "show": {
  3056. showSkill();
  3057. break;
  3058. }
  3059. case "install": {
  3060. try {
  3061. await installSkill(Boolean(cli.values.global), Boolean(cli.values.force), Boolean(cli.values.yes));
  3062. }
  3063. catch (error) {
  3064. console.error(error instanceof Error ? error.message : String(error));
  3065. process.exit(1);
  3066. }
  3067. break;
  3068. }
  3069. case "help":
  3070. case undefined: {
  3071. console.log("Usage: qmd skill <show|install> [options]");
  3072. console.log("");
  3073. console.log("Commands:");
  3074. console.log(" show Print the packaged QMD skill");
  3075. console.log(" install Install into ./.agents/skills/qmd");
  3076. console.log("");
  3077. console.log("Options:");
  3078. console.log(" --global Install into ~/.agents/skills/qmd");
  3079. console.log(" --yes Also create the .claude/skills/qmd symlink");
  3080. console.log(" -f, --force Replace existing install or symlink");
  3081. process.exit(0);
  3082. }
  3083. default:
  3084. console.error(`Unknown subcommand: ${subcommand}`);
  3085. console.error("Run 'qmd skill help' for usage");
  3086. process.exit(1);
  3087. }
  3088. break;
  3089. }
  3090. case "cleanup": {
  3091. const db = getDb();
  3092. // 1. Clear llm_cache
  3093. const cacheCount = deleteLLMCache(db);
  3094. console.log(`${c.green}✓${c.reset} Cleared ${cacheCount} cached API responses`);
  3095. // 2. Remove orphaned vectors
  3096. const orphanedVecs = cleanupOrphanedVectors(db);
  3097. if (orphanedVecs > 0) {
  3098. console.log(`${c.green}✓${c.reset} Removed ${orphanedVecs} orphaned embedding chunks`);
  3099. }
  3100. else {
  3101. console.log(`${c.dim}No orphaned embeddings to remove${c.reset}`);
  3102. }
  3103. // 3. Remove inactive documents
  3104. const inactiveDocs = deleteInactiveDocuments(db);
  3105. if (inactiveDocs > 0) {
  3106. console.log(`${c.green}✓${c.reset} Removed ${inactiveDocs} inactive document records`);
  3107. }
  3108. // 4. Vacuum to reclaim space
  3109. vacuumDatabase(db);
  3110. console.log(`${c.green}✓${c.reset} Database vacuumed`);
  3111. closeDb();
  3112. break;
  3113. }
  3114. default:
  3115. console.error(`Unknown command: ${cli.command}`);
  3116. console.error("Run 'qmd --help' for usage.");
  3117. process.exit(1);
  3118. }
  3119. if (cli.command !== "mcp") {
  3120. await disposeDefaultLlamaCpp();
  3121. process.exit(0);
  3122. }
  3123. } // end if (main module)