qmd.js 126 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899
  1. #!/usr/bin/env node
  2. import { openDatabase } from "../db.js";
  3. import fastGlob from "fast-glob";
  4. import { execSync, spawn as nodeSpawn } from "child_process";
  5. import { fileURLToPath } from "url";
  6. import { dirname, join as pathJoin, relative as relativePath } from "path";
  7. import { parseArgs } from "util";
  8. import { readFileSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync } from "fs";
  9. import { createInterface } from "readline/promises";
  10. import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, structuredSearch, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_EMBED_MAX_BATCH_BYTES, DEFAULT_EMBED_MAX_DOCS_PER_BATCH, DEFAULT_RERANK_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, reindexCollection, generateEmbeddings, syncConfigToDb, } from "../store.js";
  11. import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js";
  12. import { formatSearchResults, formatDocuments, escapeXml, escapeCSV, } from "./formatter.js";
  13. import { getCollection as getCollectionFromYaml, listCollections as yamlListCollections, getDefaultCollectionNames, addContext as yamlAddContext, removeContext as yamlRemoveContext, removeCollection as yamlRemoveCollectionFn, renameCollection as yamlRenameCollectionFn, setGlobalContext, listAllContexts, setConfigIndexName, loadConfig, } from "../collections.js";
  14. import { getEmbeddedQmdSkillContent, getEmbeddedQmdSkillFiles } from "../embedded-skills.js";
  15. // Enable production mode - allows using default database path
  16. // Tests must set INDEX_PATH or use createStore() with explicit path
  17. enableProductionMode();
  18. // =============================================================================
  19. // Store/DB lifecycle (no legacy singletons in store.ts)
  20. // =============================================================================
  21. let store = null;
  22. let storeDbPathOverride;
  23. function getStore() {
  24. if (!store) {
  25. store = createStore(storeDbPathOverride);
  26. // Sync YAML config into SQLite store_collections so store.ts reads from DB
  27. try {
  28. const config = loadConfig();
  29. syncConfigToDb(store.db, config);
  30. if (config.models) {
  31. setDefaultLlamaCpp(new LlamaCpp({
  32. embedModel: config.models.embed,
  33. generateModel: config.models.generate,
  34. rerankModel: config.models.rerank,
  35. }));
  36. }
  37. }
  38. catch {
  39. // Config may not exist yet — that's fine, DB works without it
  40. }
  41. }
  42. return store;
  43. }
  44. function getDb() {
  45. return getStore().db;
  46. }
  47. /** Re-sync YAML config into SQLite after CLI mutations (add/remove/rename collection, context changes) */
  48. function resyncConfig() {
  49. const s = getStore();
  50. try {
  51. const config = loadConfig();
  52. // Clear config hash to force re-sync
  53. s.db.prepare(`DELETE FROM store_config WHERE key = 'config_hash'`).run();
  54. syncConfigToDb(s.db, config);
  55. }
  56. catch {
  57. // Config may not exist — that's fine
  58. }
  59. }
  60. function closeDb() {
  61. if (store) {
  62. store.close();
  63. store = null;
  64. }
  65. }
  66. function getDbPath() {
  67. return store?.dbPath ?? storeDbPathOverride ?? getDefaultDbPath();
  68. }
  69. function setIndexName(name) {
  70. let normalizedName = name;
  71. // Normalize relative paths to prevent malformed database paths
  72. if (name && name.includes('/')) {
  73. const { resolve } = require('path');
  74. const { cwd } = require('process');
  75. const absolutePath = resolve(cwd(), name);
  76. // Replace path separators with underscores to create a valid filename
  77. normalizedName = absolutePath.replace(/\//g, '_').replace(/^_/, '');
  78. }
  79. storeDbPathOverride = normalizedName ? getDefaultDbPath(normalizedName) : undefined;
  80. // Reset open handle so next use opens the new index
  81. closeDb();
  82. }
  83. function ensureVecTable(_db, dimensions) {
  84. // Store owns the DB; ignore `_db` and ensure vec table on the active store
  85. getStore().ensureVecTable(dimensions);
  86. }
  87. // Terminal colors (respects NO_COLOR env)
  88. const useColor = !process.env.NO_COLOR && process.stdout.isTTY;
  89. const c = {
  90. reset: useColor ? "\x1b[0m" : "",
  91. dim: useColor ? "\x1b[2m" : "",
  92. bold: useColor ? "\x1b[1m" : "",
  93. cyan: useColor ? "\x1b[36m" : "",
  94. yellow: useColor ? "\x1b[33m" : "",
  95. green: useColor ? "\x1b[32m" : "",
  96. magenta: useColor ? "\x1b[35m" : "",
  97. blue: useColor ? "\x1b[34m" : "",
  98. };
  99. // Terminal cursor control
  100. const cursor = {
  101. hide() { process.stderr.write('\x1b[?25l'); },
  102. show() { process.stderr.write('\x1b[?25h'); },
  103. };
  104. // Ensure cursor is restored on exit
  105. process.on('SIGINT', () => { cursor.show(); process.exit(130); });
  106. process.on('SIGTERM', () => { cursor.show(); process.exit(143); });
  107. // Terminal progress bar using OSC 9;4 escape sequence (TTY only)
  108. const isTTY = process.stderr.isTTY;
  109. const progress = {
  110. set(percent) {
  111. if (isTTY)
  112. process.stderr.write(`\x1b]9;4;1;${Math.round(percent)}\x07`);
  113. },
  114. clear() {
  115. if (isTTY)
  116. process.stderr.write(`\x1b]9;4;0\x07`);
  117. },
  118. indeterminate() {
  119. if (isTTY)
  120. process.stderr.write(`\x1b]9;4;3\x07`);
  121. },
  122. error() {
  123. if (isTTY)
  124. process.stderr.write(`\x1b]9;4;2\x07`);
  125. },
  126. };
  127. // Format seconds into human-readable ETA
  128. function formatETA(seconds) {
  129. if (seconds < 60)
  130. return `${Math.round(seconds)}s`;
  131. if (seconds < 3600)
  132. return `${Math.floor(seconds / 60)}m ${Math.round(seconds % 60)}s`;
  133. return `${Math.floor(seconds / 3600)}h ${Math.floor((seconds % 3600) / 60)}m`;
  134. }
  135. // Check index health and print warnings/tips
  136. function checkIndexHealth(db) {
  137. const { needsEmbedding, totalDocs, daysStale } = getIndexHealth(db);
  138. // Warn if many docs need embedding
  139. if (needsEmbedding > 0) {
  140. const pct = Math.round((needsEmbedding / totalDocs) * 100);
  141. if (pct >= 10) {
  142. process.stderr.write(`${c.yellow}Warning: ${needsEmbedding} documents (${pct}%) need embeddings. Run 'qmd embed' for better results.${c.reset}\n`);
  143. }
  144. else {
  145. process.stderr.write(`${c.dim}Tip: ${needsEmbedding} documents need embeddings. Run 'qmd embed' to index them.${c.reset}\n`);
  146. }
  147. }
  148. // Check if most recent document update is older than 2 weeks
  149. if (daysStale !== null && daysStale >= 14) {
  150. process.stderr.write(`${c.dim}Tip: Index last updated ${daysStale} days ago. Run 'qmd update' to refresh.${c.reset}\n`);
  151. }
  152. }
  153. // Compute unique display path for a document
  154. // Always include at least parent folder + filename, add more parent dirs until unique
  155. function computeDisplayPath(filepath, collectionPath, existingPaths) {
  156. // Get path relative to collection (include collection dir name)
  157. const collectionDir = collectionPath.replace(/\/$/, '');
  158. const collectionName = collectionDir.split('/').pop() || '';
  159. let relativePath;
  160. if (filepath.startsWith(collectionDir + '/')) {
  161. // filepath is under collection: use collection name + relative path
  162. relativePath = collectionName + filepath.slice(collectionDir.length);
  163. }
  164. else {
  165. // Fallback: just use the filepath
  166. relativePath = filepath;
  167. }
  168. const parts = relativePath.split('/').filter(p => p.length > 0);
  169. // Always include at least parent folder + filename (minimum 2 parts if available)
  170. // Then add more parent dirs until unique
  171. const minParts = Math.min(2, parts.length);
  172. for (let i = parts.length - minParts; i >= 0; i--) {
  173. const candidate = parts.slice(i).join('/');
  174. if (!existingPaths.has(candidate)) {
  175. return candidate;
  176. }
  177. }
  178. // Absolute fallback: use full path (should be unique)
  179. return filepath;
  180. }
  181. function formatTimeAgo(date) {
  182. const seconds = Math.floor((Date.now() - date.getTime()) / 1000);
  183. if (seconds < 60)
  184. return `${seconds}s ago`;
  185. const minutes = Math.floor(seconds / 60);
  186. if (minutes < 60)
  187. return `${minutes}m ago`;
  188. const hours = Math.floor(minutes / 60);
  189. if (hours < 24)
  190. return `${hours}h ago`;
  191. const days = Math.floor(hours / 24);
  192. return `${days}d ago`;
  193. }
  194. function formatMs(ms) {
  195. if (ms < 1000)
  196. return `${ms}ms`;
  197. return `${(ms / 1000).toFixed(1)}s`;
  198. }
  199. function formatBytes(bytes) {
  200. if (bytes < 1024)
  201. return `${bytes} B`;
  202. if (bytes < 1024 * 1024)
  203. return `${(bytes / 1024).toFixed(1)} KB`;
  204. if (bytes < 1024 * 1024 * 1024)
  205. return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
  206. return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
  207. }
  208. async function showStatus() {
  209. const dbPath = getDbPath();
  210. const db = getDb();
  211. // Collections are defined in YAML; no duplicate cleanup needed.
  212. // Collections are defined in YAML; no duplicate cleanup needed.
  213. // Index size
  214. let indexSize = 0;
  215. try {
  216. const stat = statSync(dbPath).size;
  217. indexSize = stat;
  218. }
  219. catch { }
  220. // Collections info (from YAML + database stats)
  221. const collections = listCollections(db);
  222. // Overall stats
  223. const totalDocs = db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get();
  224. const vectorCount = db.prepare(`SELECT COUNT(*) as count FROM content_vectors`).get();
  225. const needsEmbedding = getHashesNeedingEmbedding(db);
  226. // Most recent update across all collections
  227. const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get();
  228. console.log(`${c.bold}QMD Status${c.reset}\n`);
  229. console.log(`Index: ${dbPath}`);
  230. console.log(`Size: ${formatBytes(indexSize)}`);
  231. // MCP daemon status (check PID file liveness)
  232. const mcpCacheDir = process.env.XDG_CACHE_HOME
  233. ? resolve(process.env.XDG_CACHE_HOME, "qmd")
  234. : resolve(homedir(), ".cache", "qmd");
  235. const mcpPidPath = resolve(mcpCacheDir, "mcp.pid");
  236. if (existsSync(mcpPidPath)) {
  237. const mcpPid = parseInt(readFileSync(mcpPidPath, "utf-8").trim());
  238. try {
  239. process.kill(mcpPid, 0);
  240. console.log(`MCP: ${c.green}running${c.reset} (PID ${mcpPid})`);
  241. }
  242. catch {
  243. unlinkSync(mcpPidPath);
  244. // Stale PID file cleaned up silently
  245. }
  246. }
  247. console.log("");
  248. console.log(`${c.bold}Documents${c.reset}`);
  249. console.log(` Total: ${totalDocs.count} files indexed`);
  250. console.log(` Vectors: ${vectorCount.count} embedded`);
  251. if (needsEmbedding > 0) {
  252. console.log(` ${c.yellow}Pending: ${needsEmbedding} need embedding${c.reset} (run 'qmd embed')`);
  253. }
  254. if (mostRecent.latest) {
  255. const lastUpdate = new Date(mostRecent.latest);
  256. console.log(` Updated: ${formatTimeAgo(lastUpdate)}`);
  257. }
  258. // Get all contexts grouped by collection (from YAML)
  259. const allContexts = listAllContexts();
  260. const contextsByCollection = new Map();
  261. for (const ctx of allContexts) {
  262. // Group contexts by collection name
  263. if (!contextsByCollection.has(ctx.collection)) {
  264. contextsByCollection.set(ctx.collection, []);
  265. }
  266. contextsByCollection.get(ctx.collection).push({
  267. path_prefix: ctx.path,
  268. context: ctx.context
  269. });
  270. }
  271. // AST chunking status
  272. try {
  273. const { getASTStatus } = await import("../ast.js");
  274. const ast = await getASTStatus();
  275. console.log(`\n${c.bold}AST Chunking${c.reset}`);
  276. if (ast.available) {
  277. const ok = ast.languages.filter(l => l.available).map(l => l.language);
  278. const fail = ast.languages.filter(l => !l.available);
  279. console.log(` Status: ${c.green}active${c.reset}`);
  280. console.log(` Languages: ${ok.join(", ")}`);
  281. if (fail.length > 0) {
  282. for (const f of fail) {
  283. console.log(` ${c.yellow}Unavailable: ${f.language} (${f.error})${c.reset}`);
  284. }
  285. }
  286. }
  287. else {
  288. console.log(` Status: ${c.yellow}unavailable${c.reset} (falling back to regex chunking)`);
  289. for (const l of ast.languages) {
  290. if (l.error)
  291. console.log(` ${c.dim}${l.language}: ${l.error}${c.reset}`);
  292. }
  293. }
  294. }
  295. catch {
  296. console.log(`\n${c.bold}AST Chunking${c.reset}`);
  297. console.log(` Status: ${c.dim}not available${c.reset}`);
  298. }
  299. if (collections.length > 0) {
  300. console.log(`\n${c.bold}Collections${c.reset}`);
  301. for (const col of collections) {
  302. const lastMod = col.last_modified ? formatTimeAgo(new Date(col.last_modified)) : "never";
  303. const contexts = contextsByCollection.get(col.name) || [];
  304. console.log(` ${c.cyan}${col.name}${c.reset} ${c.dim}(qmd://${col.name}/)${c.reset}`);
  305. console.log(` ${c.dim}Pattern:${c.reset} ${col.glob_pattern}`);
  306. console.log(` ${c.dim}Files:${c.reset} ${col.active_count} (updated ${lastMod})`);
  307. if (contexts.length > 0) {
  308. console.log(` ${c.dim}Contexts:${c.reset} ${contexts.length}`);
  309. for (const ctx of contexts) {
  310. // Handle both empty string and '/' as root context
  311. const pathDisplay = (ctx.path_prefix === '' || ctx.path_prefix === '/') ? '/' : `/${ctx.path_prefix}`;
  312. const contextPreview = ctx.context.length > 60
  313. ? ctx.context.substring(0, 57) + '...'
  314. : ctx.context;
  315. console.log(` ${c.dim}${pathDisplay}:${c.reset} ${contextPreview}`);
  316. }
  317. }
  318. }
  319. // Show examples of virtual paths
  320. console.log(`\n${c.bold}Examples${c.reset}`);
  321. console.log(` ${c.dim}# List files in a collection${c.reset}`);
  322. if (collections.length > 0 && collections[0]) {
  323. console.log(` qmd ls ${collections[0].name}`);
  324. }
  325. console.log(` ${c.dim}# Get a document${c.reset}`);
  326. if (collections.length > 0 && collections[0]) {
  327. console.log(` qmd get qmd://${collections[0].name}/path/to/file.md`);
  328. }
  329. console.log(` ${c.dim}# Search within a collection${c.reset}`);
  330. if (collections.length > 0 && collections[0]) {
  331. console.log(` qmd search "query" -c ${collections[0].name}`);
  332. }
  333. }
  334. else {
  335. console.log(`\n${c.dim}No collections. Run 'qmd collection add .' to index markdown files.${c.reset}`);
  336. }
  337. // Models
  338. {
  339. // hf:org/repo/file.gguf → https://huggingface.co/org/repo
  340. const hfLink = (uri) => {
  341. const match = uri.match(/^hf:([^/]+\/[^/]+)\//);
  342. return match ? `https://huggingface.co/${match[1]}` : uri;
  343. };
  344. console.log(`\n${c.bold}Models${c.reset}`);
  345. console.log(` Embedding: ${hfLink(DEFAULT_EMBED_MODEL_URI)}`);
  346. console.log(` Reranking: ${hfLink(DEFAULT_RERANK_MODEL_URI)}`);
  347. console.log(` Generation: ${hfLink(DEFAULT_GENERATE_MODEL_URI)}`);
  348. }
  349. // Device / GPU info
  350. try {
  351. const llm = getDefaultLlamaCpp();
  352. const device = await llm.getDeviceInfo();
  353. console.log(`\n${c.bold}Device${c.reset}`);
  354. if (device.gpu) {
  355. console.log(` GPU: ${c.green}${device.gpu}${c.reset} (offloading: ${device.gpuOffloading ? 'yes' : 'no'})`);
  356. if (device.gpuDevices.length > 0) {
  357. // Deduplicate and count GPUs
  358. const counts = new Map();
  359. for (const name of device.gpuDevices) {
  360. counts.set(name, (counts.get(name) || 0) + 1);
  361. }
  362. const deviceStr = Array.from(counts.entries())
  363. .map(([name, count]) => count > 1 ? `${count}× ${name}` : name)
  364. .join(', ');
  365. console.log(` Devices: ${deviceStr}`);
  366. }
  367. if (device.vram) {
  368. console.log(` VRAM: ${formatBytes(device.vram.free)} free / ${formatBytes(device.vram.total)} total`);
  369. }
  370. }
  371. else {
  372. console.log(` GPU: ${c.yellow}none${c.reset} (running on CPU — models will be slow)`);
  373. console.log(` ${c.dim}Tip: Install CUDA, Vulkan, or Metal support for GPU acceleration.${c.reset}`);
  374. }
  375. console.log(` CPU: ${device.cpuCores} math cores`);
  376. }
  377. catch {
  378. // Don't fail status if LLM init fails
  379. }
  380. // Tips section
  381. const tips = [];
  382. // Check for collections without context
  383. const collectionsWithoutContext = collections.filter(col => {
  384. const contexts = contextsByCollection.get(col.name) || [];
  385. return contexts.length === 0;
  386. });
  387. if (collectionsWithoutContext.length > 0) {
  388. const names = collectionsWithoutContext.map(c => c.name).slice(0, 3).join(', ');
  389. const more = collectionsWithoutContext.length > 3 ? ` +${collectionsWithoutContext.length - 3} more` : '';
  390. tips.push(`Add context to collections for better search results: ${names}${more}`);
  391. tips.push(` ${c.dim}qmd context add qmd://<name>/ "What this collection contains"${c.reset}`);
  392. tips.push(` ${c.dim}qmd context add qmd://<name>/meeting-notes "Weekly team meeting notes"${c.reset}`);
  393. }
  394. // Check for collections without update commands
  395. const collectionsWithoutUpdate = collections.filter(col => {
  396. const yamlCol = getCollectionFromYaml(col.name);
  397. return !yamlCol?.update;
  398. });
  399. if (collectionsWithoutUpdate.length > 0 && collections.length > 1) {
  400. const names = collectionsWithoutUpdate.map(c => c.name).slice(0, 3).join(', ');
  401. const more = collectionsWithoutUpdate.length > 3 ? ` +${collectionsWithoutUpdate.length - 3} more` : '';
  402. tips.push(`Add update commands to keep collections fresh: ${names}${more}`);
  403. tips.push(` ${c.dim}qmd collection update-cmd <name> 'git stash && git pull --rebase --ff-only && git stash pop'${c.reset}`);
  404. }
  405. if (tips.length > 0) {
  406. console.log(`\n${c.bold}Tips${c.reset}`);
  407. for (const tip of tips) {
  408. console.log(` ${tip}`);
  409. }
  410. }
  411. closeDb();
  412. }
  413. async function updateCollections() {
  414. const db = getDb();
  415. const storeInstance = getStore();
  416. // Collections are defined in YAML; no duplicate cleanup needed.
  417. // Clear Ollama cache on update
  418. clearCache(db);
  419. const collections = listCollections(db);
  420. if (collections.length === 0) {
  421. console.log(`${c.dim}No collections found. Run 'qmd collection add .' to index markdown files.${c.reset}`);
  422. closeDb();
  423. return;
  424. }
  425. console.log(`${c.bold}Updating ${collections.length} collection(s)...${c.reset}\n`);
  426. for (let i = 0; i < collections.length; i++) {
  427. const col = collections[i];
  428. if (!col)
  429. continue;
  430. console.log(`${c.cyan}[${i + 1}/${collections.length}]${c.reset} ${c.bold}${col.name}${c.reset} ${c.dim}(${col.glob_pattern})${c.reset}`);
  431. // Execute custom update command if specified in YAML
  432. const yamlCol = getCollectionFromYaml(col.name);
  433. if (yamlCol?.update) {
  434. console.log(`${c.dim} Running update command: ${yamlCol.update}${c.reset}`);
  435. try {
  436. const proc = nodeSpawn("bash", ["-c", yamlCol.update], {
  437. cwd: col.pwd,
  438. stdio: ["ignore", "pipe", "pipe"],
  439. });
  440. const [output, errorOutput, exitCode] = await new Promise((resolve, reject) => {
  441. let out = "";
  442. let err = "";
  443. proc.stdout?.on("data", (d) => { out += d.toString(); });
  444. proc.stderr?.on("data", (d) => { err += d.toString(); });
  445. proc.on("error", reject);
  446. proc.on("close", (code) => resolve([out, err, code ?? 1]));
  447. });
  448. if (output.trim()) {
  449. console.log(output.trim().split('\n').map(l => ` ${l}`).join('\n'));
  450. }
  451. if (errorOutput.trim()) {
  452. console.log(errorOutput.trim().split('\n').map(l => ` ${l}`).join('\n'));
  453. }
  454. if (exitCode !== 0) {
  455. console.log(`${c.yellow}✗ Update command failed with exit code ${exitCode}${c.reset}`);
  456. process.exit(exitCode);
  457. }
  458. }
  459. catch (err) {
  460. console.log(`${c.yellow}✗ Update command failed: ${err}${c.reset}`);
  461. process.exit(1);
  462. }
  463. }
  464. const startTime = Date.now();
  465. console.log(`Collection: ${col.pwd} (${col.glob_pattern})`);
  466. progress.indeterminate();
  467. const result = await reindexCollection(storeInstance, col.pwd, col.glob_pattern, col.name, {
  468. ignorePatterns: yamlCol?.ignore,
  469. onProgress: (info) => {
  470. progress.set((info.current / info.total) * 100);
  471. const elapsed = (Date.now() - startTime) / 1000;
  472. const rate = info.current / elapsed;
  473. const remaining = (info.total - info.current) / rate;
  474. const eta = info.current > 2 ? ` ETA: ${formatETA(remaining)}` : "";
  475. if (isTTY)
  476. process.stderr.write(`\rIndexing: ${info.current}/${info.total}${eta} `);
  477. },
  478. });
  479. progress.clear();
  480. console.log(`\nIndexed: ${result.indexed} new, ${result.updated} updated, ${result.unchanged} unchanged, ${result.removed} removed`);
  481. if (result.orphanedCleaned > 0) {
  482. console.log(`Cleaned up ${result.orphanedCleaned} orphaned content hash(es)`);
  483. }
  484. console.log("");
  485. }
  486. // Check if any documents need embedding (show once at end)
  487. const needsEmbedding = getHashesNeedingEmbedding(db);
  488. closeDb();
  489. console.log(`${c.green}✓ All collections updated.${c.reset}`);
  490. if (needsEmbedding > 0) {
  491. console.log(`\nRun 'qmd embed' to update embeddings (${needsEmbedding} unique hashes need vectors)`);
  492. }
  493. }
  494. /**
  495. * Detect which collection (if any) contains the given filesystem path.
  496. * Returns { collectionId, collectionName, relativePath } or null if not in any collection.
  497. */
  498. function detectCollectionFromPath(db, fsPath) {
  499. const realPath = getRealPath(fsPath);
  500. // Find collections that this path is under from YAML
  501. const allCollections = yamlListCollections();
  502. // Find longest matching path
  503. let bestMatch = null;
  504. for (const coll of allCollections) {
  505. if (realPath.startsWith(coll.path + '/') || realPath === coll.path) {
  506. if (!bestMatch || coll.path.length > bestMatch.path.length) {
  507. bestMatch = { name: coll.name, path: coll.path };
  508. }
  509. }
  510. }
  511. if (!bestMatch)
  512. return null;
  513. // Calculate relative path
  514. let relativePath = realPath;
  515. if (relativePath.startsWith(bestMatch.path + '/')) {
  516. relativePath = relativePath.slice(bestMatch.path.length + 1);
  517. }
  518. else if (relativePath === bestMatch.path) {
  519. relativePath = '';
  520. }
  521. return {
  522. collectionName: bestMatch.name,
  523. relativePath
  524. };
  525. }
  526. async function contextAdd(pathArg, contextText) {
  527. const db = getDb();
  528. // Handle "/" as global context (applies to all collections)
  529. if (pathArg === '/') {
  530. setGlobalContext(contextText);
  531. resyncConfig();
  532. console.log(`${c.green}✓${c.reset} Set global context`);
  533. console.log(`${c.dim}Context: ${contextText}${c.reset}`);
  534. closeDb();
  535. return;
  536. }
  537. // Resolve path - defaults to current directory if not provided
  538. let fsPath = pathArg || '.';
  539. if (fsPath === '.' || fsPath === './') {
  540. fsPath = getPwd();
  541. }
  542. else if (fsPath.startsWith('~/')) {
  543. fsPath = homedir() + fsPath.slice(1);
  544. }
  545. else if (!fsPath.startsWith('/') && !fsPath.startsWith('qmd://')) {
  546. fsPath = resolve(getPwd(), fsPath);
  547. }
  548. // Handle virtual paths (qmd://collection/path)
  549. if (isVirtualPath(fsPath)) {
  550. const parsed = parseVirtualPath(fsPath);
  551. if (!parsed) {
  552. console.error(`${c.yellow}Invalid virtual path: ${fsPath}${c.reset}`);
  553. process.exit(1);
  554. }
  555. const coll = getCollectionFromYaml(parsed.collectionName);
  556. if (!coll) {
  557. console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`);
  558. process.exit(1);
  559. }
  560. yamlAddContext(parsed.collectionName, parsed.path, contextText);
  561. resyncConfig();
  562. const displayPath = parsed.path
  563. ? `qmd://${parsed.collectionName}/${parsed.path}`
  564. : `qmd://${parsed.collectionName}/ (collection root)`;
  565. console.log(`${c.green}✓${c.reset} Added context for: ${displayPath}`);
  566. console.log(`${c.dim}Context: ${contextText}${c.reset}`);
  567. closeDb();
  568. return;
  569. }
  570. // Detect collection from filesystem path
  571. const detected = detectCollectionFromPath(db, fsPath);
  572. if (!detected) {
  573. console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`);
  574. console.error(`${c.dim}Run 'qmd status' to see indexed collections${c.reset}`);
  575. process.exit(1);
  576. }
  577. yamlAddContext(detected.collectionName, detected.relativePath, contextText);
  578. resyncConfig();
  579. const displayPath = detected.relativePath ? `qmd://${detected.collectionName}/${detected.relativePath}` : `qmd://${detected.collectionName}/`;
  580. console.log(`${c.green}✓${c.reset} Added context for: ${displayPath}`);
  581. console.log(`${c.dim}Context: ${contextText}${c.reset}`);
  582. closeDb();
  583. }
  584. function contextList() {
  585. const db = getDb();
  586. const allContexts = listAllContexts();
  587. if (allContexts.length === 0) {
  588. console.log(`${c.dim}No contexts configured. Use 'qmd context add' to add one.${c.reset}`);
  589. closeDb();
  590. return;
  591. }
  592. console.log(`\n${c.bold}Configured Contexts${c.reset}\n`);
  593. let lastCollection = '';
  594. for (const ctx of allContexts) {
  595. if (ctx.collection !== lastCollection) {
  596. console.log(`${c.cyan}${ctx.collection}${c.reset}`);
  597. lastCollection = ctx.collection;
  598. }
  599. const displayPath = ctx.path ? ` ${ctx.path}` : ' / (root)';
  600. console.log(`${displayPath}`);
  601. console.log(` ${c.dim}${ctx.context}${c.reset}`);
  602. }
  603. closeDb();
  604. }
  605. function contextRemove(pathArg) {
  606. if (pathArg === '/') {
  607. // Remove global context
  608. setGlobalContext(undefined);
  609. // Resync so SQLite store_config is updated
  610. const s = getStore();
  611. resyncConfig();
  612. closeDb();
  613. console.log(`${c.green}✓${c.reset} Removed global context`);
  614. return;
  615. }
  616. // Handle virtual paths
  617. if (isVirtualPath(pathArg)) {
  618. const parsed = parseVirtualPath(pathArg);
  619. if (!parsed) {
  620. console.error(`${c.yellow}Invalid virtual path: ${pathArg}${c.reset}`);
  621. process.exit(1);
  622. }
  623. const coll = getCollectionFromYaml(parsed.collectionName);
  624. if (!coll) {
  625. console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`);
  626. process.exit(1);
  627. }
  628. const success = yamlRemoveContext(coll.name, parsed.path);
  629. if (!success) {
  630. console.error(`${c.yellow}No context found for: ${pathArg}${c.reset}`);
  631. process.exit(1);
  632. }
  633. console.log(`${c.green}✓${c.reset} Removed context for: ${pathArg}`);
  634. return;
  635. }
  636. // Handle filesystem paths
  637. let fsPath = pathArg;
  638. if (fsPath === '.' || fsPath === './') {
  639. fsPath = getPwd();
  640. }
  641. else if (fsPath.startsWith('~/')) {
  642. fsPath = homedir() + fsPath.slice(1);
  643. }
  644. else if (!fsPath.startsWith('/')) {
  645. fsPath = resolve(getPwd(), fsPath);
  646. }
  647. const db = getDb();
  648. const detected = detectCollectionFromPath(db, fsPath);
  649. closeDb();
  650. if (!detected) {
  651. console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`);
  652. process.exit(1);
  653. }
  654. const success = yamlRemoveContext(detected.collectionName, detected.relativePath);
  655. if (!success) {
  656. console.error(`${c.yellow}No context found for: qmd://${detected.collectionName}/${detected.relativePath}${c.reset}`);
  657. process.exit(1);
  658. }
  659. console.log(`${c.green}✓${c.reset} Removed context for: qmd://${detected.collectionName}/${detected.relativePath}`);
  660. }
  661. function getDocument(filename, fromLine, maxLines, lineNumbers) {
  662. const db = getDb();
  663. // Parse :linenum suffix from filename (e.g., "file.md:100")
  664. let inputPath = filename;
  665. const colonMatch = inputPath.match(/:(\d+)$/);
  666. if (colonMatch && !fromLine) {
  667. const matched = colonMatch[1];
  668. if (matched) {
  669. fromLine = parseInt(matched, 10);
  670. inputPath = inputPath.slice(0, -colonMatch[0].length);
  671. }
  672. }
  673. // Handle docid lookup (#abc123, abc123, "#abc123", "abc123", etc.)
  674. if (isDocid(inputPath)) {
  675. const docidMatch = findDocumentByDocid(db, inputPath);
  676. if (docidMatch) {
  677. inputPath = docidMatch.filepath;
  678. }
  679. else {
  680. console.error(`Document not found: ${filename}`);
  681. closeDb();
  682. process.exit(1);
  683. }
  684. }
  685. let doc = null;
  686. let virtualPath;
  687. // Handle virtual paths (qmd://collection/path)
  688. if (isVirtualPath(inputPath)) {
  689. const parsed = parseVirtualPath(inputPath);
  690. if (!parsed) {
  691. console.error(`Invalid virtual path: ${inputPath}`);
  692. closeDb();
  693. process.exit(1);
  694. }
  695. // Try exact match on collection + path
  696. doc = db.prepare(`
  697. SELECT d.collection as collectionName, d.path, content.doc as body
  698. FROM documents d
  699. JOIN content ON content.hash = d.hash
  700. WHERE d.collection = ? AND d.path = ? AND d.active = 1
  701. `).get(parsed.collectionName, parsed.path);
  702. if (!doc) {
  703. // Try fuzzy match by path ending
  704. doc = db.prepare(`
  705. SELECT d.collection as collectionName, d.path, content.doc as body
  706. FROM documents d
  707. JOIN content ON content.hash = d.hash
  708. WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
  709. LIMIT 1
  710. `).get(parsed.collectionName, `%${parsed.path}`);
  711. }
  712. virtualPath = inputPath;
  713. }
  714. else {
  715. // Try to interpret as collection/path format first (before filesystem path)
  716. // If path is relative (no / or ~ prefix), check if first component is a collection name
  717. if (!inputPath.startsWith('/') && !inputPath.startsWith('~')) {
  718. const parts = inputPath.split('/');
  719. if (parts.length >= 2) {
  720. const possibleCollection = parts[0];
  721. const possiblePath = parts.slice(1).join('/');
  722. // Check if this collection exists
  723. const collExists = possibleCollection ? db.prepare(`
  724. SELECT 1 FROM documents WHERE collection = ? AND active = 1 LIMIT 1
  725. `).get(possibleCollection) : null;
  726. if (collExists) {
  727. // Try exact match on collection + path
  728. doc = db.prepare(`
  729. SELECT d.collection as collectionName, d.path, content.doc as body
  730. FROM documents d
  731. JOIN content ON content.hash = d.hash
  732. WHERE d.collection = ? AND d.path = ? AND d.active = 1
  733. `).get(possibleCollection || "", possiblePath || "");
  734. if (!doc) {
  735. // Try fuzzy match by path ending
  736. doc = db.prepare(`
  737. SELECT d.collection as collectionName, d.path, content.doc as body
  738. FROM documents d
  739. JOIN content ON content.hash = d.hash
  740. WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
  741. LIMIT 1
  742. `).get(possibleCollection || "", `%${possiblePath}`);
  743. }
  744. if (doc) {
  745. virtualPath = buildVirtualPath(doc.collectionName, doc.path);
  746. // Skip the filesystem path handling below
  747. }
  748. }
  749. }
  750. }
  751. // If not found as collection/path, handle as filesystem paths
  752. if (!doc) {
  753. let fsPath = inputPath;
  754. // Expand ~ to home directory
  755. if (fsPath.startsWith('~/')) {
  756. fsPath = homedir() + fsPath.slice(1);
  757. }
  758. else if (!fsPath.startsWith('/')) {
  759. // Relative path - resolve from current directory
  760. fsPath = resolve(getPwd(), fsPath);
  761. }
  762. fsPath = getRealPath(fsPath);
  763. // Try to detect which collection contains this path
  764. const detected = detectCollectionFromPath(db, fsPath);
  765. if (detected) {
  766. // Found collection - query by collection name + relative path
  767. doc = db.prepare(`
  768. SELECT d.collection as collectionName, d.path, content.doc as body
  769. FROM documents d
  770. JOIN content ON content.hash = d.hash
  771. WHERE d.collection = ? AND d.path = ? AND d.active = 1
  772. `).get(detected.collectionName, detected.relativePath);
  773. }
  774. // Fuzzy match by filename (last component of path)
  775. if (!doc) {
  776. const filename = inputPath.split('/').pop() || inputPath;
  777. doc = db.prepare(`
  778. SELECT d.collection as collectionName, d.path, content.doc as body
  779. FROM documents d
  780. JOIN content ON content.hash = d.hash
  781. WHERE d.path LIKE ? AND d.active = 1
  782. LIMIT 1
  783. `).get(`%${filename}`);
  784. }
  785. if (doc) {
  786. virtualPath = buildVirtualPath(doc.collectionName, doc.path);
  787. }
  788. else {
  789. virtualPath = inputPath;
  790. }
  791. }
  792. }
  793. // Ensure doc is not null before proceeding
  794. if (!doc) {
  795. console.error(`Document not found: ${filename}`);
  796. closeDb();
  797. process.exit(1);
  798. }
  799. // Get context for this file
  800. const context = getContextForPath(db, doc.collectionName, doc.path);
  801. let output = doc.body;
  802. const startLine = fromLine || 1;
  803. // Apply line filtering if specified
  804. if (fromLine !== undefined || maxLines !== undefined) {
  805. const lines = output.split('\n');
  806. const start = startLine - 1; // Convert to 0-indexed
  807. const end = maxLines !== undefined ? start + maxLines : lines.length;
  808. output = lines.slice(start, end).join('\n');
  809. }
  810. // Add line numbers if requested
  811. if (lineNumbers) {
  812. output = addLineNumbers(output, startLine);
  813. }
  814. // Output context header if exists
  815. if (context) {
  816. console.log(`Folder Context: ${context}\n---\n`);
  817. }
  818. console.log(output);
  819. closeDb();
  820. }
  821. // Multi-get: fetch multiple documents by glob pattern or comma-separated list
  822. function multiGet(pattern, maxLines, maxBytes = DEFAULT_MULTI_GET_MAX_BYTES, format = "cli") {
  823. const db = getDb();
  824. // Check if it's a comma-separated list or a glob pattern
  825. const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?') && !pattern.includes('{');
  826. let files;
  827. if (isCommaSeparated) {
  828. // Comma-separated list of files (can be virtual paths or relative paths)
  829. const names = pattern.split(',').map(s => s.trim()).filter(Boolean);
  830. files = [];
  831. for (const name of names) {
  832. let doc = null;
  833. // Handle virtual paths
  834. if (isVirtualPath(name)) {
  835. const parsed = parseVirtualPath(name);
  836. if (parsed) {
  837. // Try exact match on collection + path
  838. doc = db.prepare(`
  839. SELECT
  840. 'qmd://' || d.collection || '/' || d.path as virtual_path,
  841. LENGTH(content.doc) as body_length,
  842. d.collection,
  843. d.path
  844. FROM documents d
  845. JOIN content ON content.hash = d.hash
  846. WHERE d.collection = ? AND d.path = ? AND d.active = 1
  847. `).get(parsed.collectionName, parsed.path);
  848. }
  849. }
  850. else {
  851. // Try exact match on path
  852. doc = db.prepare(`
  853. SELECT
  854. 'qmd://' || d.collection || '/' || d.path as virtual_path,
  855. LENGTH(content.doc) as body_length,
  856. d.collection,
  857. d.path
  858. FROM documents d
  859. JOIN content ON content.hash = d.hash
  860. WHERE d.path = ? AND d.active = 1
  861. LIMIT 1
  862. `).get(name);
  863. // Try suffix match
  864. if (!doc) {
  865. doc = db.prepare(`
  866. SELECT
  867. 'qmd://' || d.collection || '/' || d.path as virtual_path,
  868. LENGTH(content.doc) as body_length,
  869. d.collection,
  870. d.path
  871. FROM documents d
  872. JOIN content ON content.hash = d.hash
  873. WHERE d.path LIKE ? AND d.active = 1
  874. LIMIT 1
  875. `).get(`%${name}`);
  876. }
  877. }
  878. if (doc) {
  879. files.push({
  880. filepath: doc.virtual_path,
  881. displayPath: doc.virtual_path,
  882. bodyLength: doc.body_length,
  883. collection: doc.collection,
  884. path: doc.path
  885. });
  886. }
  887. else {
  888. console.error(`File not found: ${name}`);
  889. }
  890. }
  891. }
  892. else {
  893. // Glob pattern - matchFilesByGlob now returns virtual paths
  894. files = matchFilesByGlob(db, pattern).map(f => ({
  895. ...f,
  896. collection: undefined, // Will be fetched later if needed
  897. path: undefined
  898. }));
  899. if (files.length === 0) {
  900. console.error(`No files matched pattern: ${pattern}`);
  901. closeDb();
  902. process.exit(1);
  903. }
  904. }
  905. // Collect results for structured output
  906. const results = [];
  907. for (const file of files) {
  908. // Parse virtual path to get collection info if not already available
  909. let collection = file.collection;
  910. let path = file.path;
  911. if (!collection || !path) {
  912. const parsed = parseVirtualPath(file.filepath);
  913. if (parsed) {
  914. collection = parsed.collectionName;
  915. path = parsed.path;
  916. }
  917. }
  918. // Get context using collection-scoped function
  919. const context = collection && path ? getContextForPath(db, collection, path) : null;
  920. // Check size limit
  921. if (file.bodyLength > maxBytes) {
  922. results.push({
  923. file: file.filepath,
  924. displayPath: file.displayPath,
  925. title: file.displayPath.split('/').pop() || file.displayPath,
  926. body: "",
  927. context,
  928. skipped: true,
  929. skipReason: `File too large (${Math.round(file.bodyLength / 1024)}KB > ${Math.round(maxBytes / 1024)}KB). Use 'qmd get ${file.displayPath}' to retrieve.`,
  930. });
  931. continue;
  932. }
  933. // Fetch document content using collection and path
  934. if (!collection || !path)
  935. continue;
  936. const doc = db.prepare(`
  937. SELECT content.doc as body, d.title
  938. FROM documents d
  939. JOIN content ON content.hash = d.hash
  940. WHERE d.collection = ? AND d.path = ? AND d.active = 1
  941. `).get(collection, path);
  942. if (!doc)
  943. continue;
  944. let body = doc.body;
  945. // Apply line limit if specified
  946. if (maxLines !== undefined) {
  947. const lines = body.split('\n');
  948. body = lines.slice(0, maxLines).join('\n');
  949. if (lines.length > maxLines) {
  950. body += `\n\n[... truncated ${lines.length - maxLines} more lines]`;
  951. }
  952. }
  953. results.push({
  954. file: file.filepath,
  955. displayPath: file.displayPath,
  956. title: doc.title || file.displayPath.split('/').pop() || file.displayPath,
  957. body,
  958. context,
  959. skipped: false,
  960. });
  961. }
  962. closeDb();
  963. // Output based on format
  964. if (format === "json") {
  965. const output = results.map(r => ({
  966. file: r.displayPath,
  967. title: r.title,
  968. ...(r.context && { context: r.context }),
  969. ...(r.skipped ? { skipped: true, reason: r.skipReason } : { body: r.body }),
  970. }));
  971. console.log(JSON.stringify(output, null, 2));
  972. }
  973. else if (format === "csv") {
  974. const escapeField = (val) => {
  975. if (val === null || val === undefined)
  976. return "";
  977. const str = String(val);
  978. if (str.includes(",") || str.includes('"') || str.includes("\n")) {
  979. return `"${str.replace(/"/g, '""')}"`;
  980. }
  981. return str;
  982. };
  983. console.log("file,title,context,skipped,body");
  984. for (const r of results) {
  985. console.log([r.displayPath, r.title, r.context, r.skipped ? "true" : "false", r.skipped ? r.skipReason : r.body].map(escapeField).join(","));
  986. }
  987. }
  988. else if (format === "files") {
  989. for (const r of results) {
  990. const ctx = r.context ? `,"${r.context.replace(/"/g, '""')}"` : "";
  991. const status = r.skipped ? "[SKIPPED]" : "";
  992. console.log(`${r.displayPath}${ctx}${status ? `,${status}` : ""}`);
  993. }
  994. }
  995. else if (format === "md") {
  996. for (const r of results) {
  997. console.log(`## ${r.displayPath}\n`);
  998. if (r.title && r.title !== r.displayPath)
  999. console.log(`**Title:** ${r.title}\n`);
  1000. if (r.context)
  1001. console.log(`**Context:** ${r.context}\n`);
  1002. if (r.skipped) {
  1003. console.log(`> ${r.skipReason}\n`);
  1004. }
  1005. else {
  1006. console.log("```");
  1007. console.log(r.body);
  1008. console.log("```\n");
  1009. }
  1010. }
  1011. }
  1012. else if (format === "xml") {
  1013. console.log('<?xml version="1.0" encoding="UTF-8"?>');
  1014. console.log("<documents>");
  1015. for (const r of results) {
  1016. console.log(" <document>");
  1017. console.log(` <file>${escapeXml(r.displayPath)}</file>`);
  1018. console.log(` <title>${escapeXml(r.title)}</title>`);
  1019. if (r.context)
  1020. console.log(` <context>${escapeXml(r.context)}</context>`);
  1021. if (r.skipped) {
  1022. console.log(` <skipped>true</skipped>`);
  1023. console.log(` <reason>${escapeXml(r.skipReason || "")}</reason>`);
  1024. }
  1025. else {
  1026. console.log(` <body>${escapeXml(r.body)}</body>`);
  1027. }
  1028. console.log(" </document>");
  1029. }
  1030. console.log("</documents>");
  1031. }
  1032. else {
  1033. // CLI format (default)
  1034. for (const r of results) {
  1035. console.log(`\n${'='.repeat(60)}`);
  1036. console.log(`File: ${r.displayPath}`);
  1037. console.log(`${'='.repeat(60)}\n`);
  1038. if (r.skipped) {
  1039. console.log(`[SKIPPED: ${r.skipReason}]`);
  1040. continue;
  1041. }
  1042. if (r.context) {
  1043. console.log(`Folder Context: ${r.context}\n---\n`);
  1044. }
  1045. console.log(r.body);
  1046. }
  1047. }
  1048. }
  1049. // List files in virtual file tree
  1050. function listFiles(pathArg) {
  1051. const db = getDb();
  1052. if (!pathArg) {
  1053. // No argument - list all collections
  1054. const yamlCollections = yamlListCollections();
  1055. if (yamlCollections.length === 0) {
  1056. console.log("No collections found. Run 'qmd collection add .' to index files.");
  1057. closeDb();
  1058. return;
  1059. }
  1060. // Get file counts from database for each collection
  1061. const collections = yamlCollections.map(coll => {
  1062. const stats = db.prepare(`
  1063. SELECT COUNT(*) as file_count
  1064. FROM documents d
  1065. WHERE d.collection = ? AND d.active = 1
  1066. `).get(coll.name);
  1067. return {
  1068. name: coll.name,
  1069. file_count: stats?.file_count || 0
  1070. };
  1071. });
  1072. console.log(`${c.bold}Collections:${c.reset}\n`);
  1073. for (const coll of collections) {
  1074. console.log(` ${c.dim}qmd://${c.reset}${c.cyan}${coll.name}/${c.reset} ${c.dim}(${coll.file_count} files)${c.reset}`);
  1075. }
  1076. closeDb();
  1077. return;
  1078. }
  1079. // Parse the path argument
  1080. let collectionName;
  1081. let pathPrefix = null;
  1082. if (pathArg.startsWith('qmd://')) {
  1083. // Virtual path format: qmd://collection/path
  1084. const parsed = parseVirtualPath(pathArg);
  1085. if (!parsed) {
  1086. console.error(`Invalid virtual path: ${pathArg}`);
  1087. closeDb();
  1088. process.exit(1);
  1089. }
  1090. collectionName = parsed.collectionName;
  1091. pathPrefix = parsed.path;
  1092. }
  1093. else {
  1094. // Just collection name or collection/path
  1095. const parts = pathArg.split('/');
  1096. collectionName = parts[0] || '';
  1097. if (parts.length > 1) {
  1098. pathPrefix = parts.slice(1).join('/');
  1099. }
  1100. }
  1101. // Get the collection
  1102. const coll = getCollectionFromYaml(collectionName);
  1103. if (!coll) {
  1104. console.error(`Collection not found: ${collectionName}`);
  1105. console.error(`Run 'qmd ls' to see available collections.`);
  1106. closeDb();
  1107. process.exit(1);
  1108. }
  1109. // List files in the collection with size and modification time
  1110. let query;
  1111. let params;
  1112. if (pathPrefix) {
  1113. // List files under a specific path
  1114. query = `
  1115. SELECT d.path, d.title, d.modified_at, LENGTH(ct.doc) as size
  1116. FROM documents d
  1117. JOIN content ct ON d.hash = ct.hash
  1118. WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
  1119. ORDER BY d.path
  1120. `;
  1121. params = [coll.name, `${pathPrefix}%`];
  1122. }
  1123. else {
  1124. // List all files in the collection
  1125. query = `
  1126. SELECT d.path, d.title, d.modified_at, LENGTH(ct.doc) as size
  1127. FROM documents d
  1128. JOIN content ct ON d.hash = ct.hash
  1129. WHERE d.collection = ? AND d.active = 1
  1130. ORDER BY d.path
  1131. `;
  1132. params = [coll.name];
  1133. }
  1134. const files = db.prepare(query).all(...params);
  1135. if (files.length === 0) {
  1136. if (pathPrefix) {
  1137. console.log(`No files found under qmd://${collectionName}/${pathPrefix}`);
  1138. }
  1139. else {
  1140. console.log(`No files found in collection: ${collectionName}`);
  1141. }
  1142. closeDb();
  1143. return;
  1144. }
  1145. // Calculate max widths for alignment
  1146. const maxSize = Math.max(...files.map(f => formatBytes(f.size).length));
  1147. // Output in ls -l style
  1148. for (const file of files) {
  1149. const sizeStr = formatBytes(file.size).padStart(maxSize);
  1150. const date = new Date(file.modified_at);
  1151. const timeStr = formatLsTime(date);
  1152. // Dim the qmd:// prefix, highlight the filename
  1153. console.log(`${sizeStr} ${timeStr} ${c.dim}qmd://${collectionName}/${c.reset}${c.cyan}${file.path}${c.reset}`);
  1154. }
  1155. closeDb();
  1156. }
  1157. // Format date/time like ls -l
  1158. function formatLsTime(date) {
  1159. const now = new Date();
  1160. const sixMonthsAgo = new Date(now.getTime() - 6 * 30 * 24 * 60 * 60 * 1000);
  1161. const months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
  1162. const month = months[date.getMonth()];
  1163. const day = date.getDate().toString().padStart(2, ' ');
  1164. // If file is older than 6 months, show year instead of time
  1165. if (date < sixMonthsAgo) {
  1166. const year = date.getFullYear();
  1167. return `${month} ${day} ${year}`;
  1168. }
  1169. else {
  1170. const hours = date.getHours().toString().padStart(2, '0');
  1171. const minutes = date.getMinutes().toString().padStart(2, '0');
  1172. return `${month} ${day} ${hours}:${minutes}`;
  1173. }
  1174. }
  1175. // Collection management commands
  1176. function collectionList() {
  1177. const db = getDb();
  1178. const collections = listCollections(db);
  1179. if (collections.length === 0) {
  1180. console.log("No collections found. Run 'qmd collection add .' to create one.");
  1181. closeDb();
  1182. return;
  1183. }
  1184. console.log(`${c.bold}Collections (${collections.length}):${c.reset}\n`);
  1185. for (const coll of collections) {
  1186. const updatedAt = coll.last_modified ? new Date(coll.last_modified) : new Date();
  1187. const timeAgo = formatTimeAgo(updatedAt);
  1188. // Get YAML config to check includeByDefault
  1189. const yamlColl = getCollectionFromYaml(coll.name);
  1190. const excluded = yamlColl?.includeByDefault === false;
  1191. const excludeTag = excluded ? ` ${c.yellow}[excluded]${c.reset}` : '';
  1192. console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(qmd://${coll.name}/)${c.reset}${excludeTag}`);
  1193. console.log(` ${c.dim}Pattern:${c.reset} ${coll.glob_pattern}`);
  1194. if (yamlColl?.ignore?.length) {
  1195. console.log(` ${c.dim}Ignore:${c.reset} ${yamlColl.ignore.join(', ')}`);
  1196. }
  1197. console.log(` ${c.dim}Files:${c.reset} ${coll.active_count}`);
  1198. console.log(` ${c.dim}Updated:${c.reset} ${timeAgo}`);
  1199. console.log();
  1200. }
  1201. closeDb();
  1202. }
  1203. async function collectionAdd(pwd, globPattern, name) {
  1204. // If name not provided, generate from pwd basename
  1205. let collName = name;
  1206. if (!collName) {
  1207. const parts = pwd.split('/').filter(Boolean);
  1208. collName = parts[parts.length - 1] || 'root';
  1209. }
  1210. // Check if collection with this name already exists in YAML
  1211. const existing = getCollectionFromYaml(collName);
  1212. if (existing) {
  1213. console.error(`${c.yellow}Collection '${collName}' already exists.${c.reset}`);
  1214. console.error(`Use a different name with --name <name>`);
  1215. process.exit(1);
  1216. }
  1217. // Check if a collection with this pwd+glob already exists in YAML
  1218. const allCollections = yamlListCollections();
  1219. const existingPwdGlob = allCollections.find(c => c.path === pwd && c.pattern === globPattern);
  1220. if (existingPwdGlob) {
  1221. console.error(`${c.yellow}A collection already exists for this path and pattern:${c.reset}`);
  1222. console.error(` Name: ${existingPwdGlob.name} (qmd://${existingPwdGlob.name}/)`);
  1223. console.error(` Pattern: ${globPattern}`);
  1224. console.error(`\nUse 'qmd update' to re-index it, or remove it first with 'qmd collection remove ${existingPwdGlob.name}'`);
  1225. process.exit(1);
  1226. }
  1227. // Add to YAML config + sync to SQLite
  1228. const { addCollection } = await import("../collections.js");
  1229. addCollection(collName, pwd, globPattern);
  1230. resyncConfig();
  1231. // Create the collection and index files
  1232. console.log(`Creating collection '${collName}'...`);
  1233. const newColl = getCollectionFromYaml(collName);
  1234. await indexFiles(pwd, globPattern, collName, false, newColl?.ignore);
  1235. console.log(`${c.green}✓${c.reset} Collection '${collName}' created successfully`);
  1236. }
  1237. function collectionRemove(name) {
  1238. // Check if collection exists in YAML
  1239. const coll = getCollectionFromYaml(name);
  1240. if (!coll) {
  1241. console.error(`${c.yellow}Collection not found: ${name}${c.reset}`);
  1242. console.error(`Run 'qmd collection list' to see available collections.`);
  1243. process.exit(1);
  1244. }
  1245. const db = getDb();
  1246. const result = removeCollection(db, name);
  1247. // Also remove from YAML config
  1248. yamlRemoveCollectionFn(name);
  1249. closeDb();
  1250. console.log(`${c.green}✓${c.reset} Removed collection '${name}'`);
  1251. console.log(` Deleted ${result.deletedDocs} documents`);
  1252. if (result.cleanedHashes > 0) {
  1253. console.log(` Cleaned up ${result.cleanedHashes} orphaned content hashes`);
  1254. }
  1255. }
  1256. function collectionRename(oldName, newName) {
  1257. // Check if old collection exists in YAML
  1258. const coll = getCollectionFromYaml(oldName);
  1259. if (!coll) {
  1260. console.error(`${c.yellow}Collection not found: ${oldName}${c.reset}`);
  1261. console.error(`Run 'qmd collection list' to see available collections.`);
  1262. process.exit(1);
  1263. }
  1264. // Check if new name already exists in YAML
  1265. const existing = getCollectionFromYaml(newName);
  1266. if (existing) {
  1267. console.error(`${c.yellow}Collection name already exists: ${newName}${c.reset}`);
  1268. console.error(`Choose a different name or remove the existing collection first.`);
  1269. process.exit(1);
  1270. }
  1271. const db = getDb();
  1272. renameCollection(db, oldName, newName);
  1273. // Also rename in YAML config
  1274. yamlRenameCollectionFn(oldName, newName);
  1275. closeDb();
  1276. console.log(`${c.green}✓${c.reset} Renamed collection '${oldName}' to '${newName}'`);
  1277. console.log(` Virtual paths updated: ${c.cyan}qmd://${oldName}/${c.reset} → ${c.cyan}qmd://${newName}/${c.reset}`);
  1278. }
  1279. async function indexFiles(pwd, globPattern = DEFAULT_GLOB, collectionName, suppressEmbedNotice = false, ignorePatterns) {
  1280. const db = getDb();
  1281. const resolvedPwd = pwd || getPwd();
  1282. const now = new Date().toISOString();
  1283. const excludeDirs = ["node_modules", ".git", ".cache", "vendor", "dist", "build"];
  1284. // Clear Ollama cache on index
  1285. clearCache(db);
  1286. // Collection name must be provided (from YAML)
  1287. if (!collectionName) {
  1288. throw new Error("Collection name is required. Collections must be defined in ~/.config/qmd/index.yml");
  1289. }
  1290. console.log(`Collection: ${resolvedPwd} (${globPattern})`);
  1291. progress.indeterminate();
  1292. const allIgnore = [
  1293. ...excludeDirs.map(d => `**/${d}/**`),
  1294. ...(ignorePatterns || []),
  1295. ];
  1296. const allFiles = await fastGlob(globPattern, {
  1297. cwd: resolvedPwd,
  1298. onlyFiles: true,
  1299. followSymbolicLinks: false,
  1300. dot: false,
  1301. ignore: allIgnore,
  1302. });
  1303. // Filter hidden files/folders (dot: false handles top-level but not nested)
  1304. const files = allFiles.filter(file => {
  1305. const parts = file.split("/");
  1306. return !parts.some(part => part.startsWith("."));
  1307. });
  1308. const total = files.length;
  1309. const hasNoFiles = total === 0;
  1310. if (hasNoFiles) {
  1311. progress.clear();
  1312. console.log("No files found matching pattern.");
  1313. // Continue so the deactivation pass can mark previously indexed docs as inactive.
  1314. }
  1315. let indexed = 0, updated = 0, unchanged = 0, processed = 0;
  1316. const seenPaths = new Set();
  1317. const startTime = Date.now();
  1318. for (const relativeFile of files) {
  1319. const filepath = getRealPath(resolve(resolvedPwd, relativeFile));
  1320. const path = handelize(relativeFile); // Normalize path for token-friendliness
  1321. seenPaths.add(path);
  1322. let content;
  1323. try {
  1324. content = readFileSync(filepath, "utf-8");
  1325. }
  1326. catch (err) {
  1327. // Skip files that can't be read (e.g. iCloud evicted files returning EAGAIN)
  1328. processed++;
  1329. progress.set((processed / total) * 100);
  1330. continue;
  1331. }
  1332. // Skip empty files - nothing useful to index
  1333. if (!content.trim()) {
  1334. processed++;
  1335. continue;
  1336. }
  1337. const hash = await hashContent(content);
  1338. const title = extractTitle(content, relativeFile);
  1339. // Check if document exists in this collection with this path
  1340. const existing = findActiveDocument(db, collectionName, path);
  1341. if (existing) {
  1342. if (existing.hash === hash) {
  1343. // Hash unchanged, but check if title needs updating
  1344. if (existing.title !== title) {
  1345. updateDocumentTitle(db, existing.id, title, now);
  1346. updated++;
  1347. }
  1348. else {
  1349. unchanged++;
  1350. }
  1351. }
  1352. else {
  1353. // Content changed - insert new content hash and update document
  1354. insertContent(db, hash, content, now);
  1355. const stat = statSync(filepath);
  1356. updateDocument(db, existing.id, title, hash, stat ? new Date(stat.mtime).toISOString() : now);
  1357. updated++;
  1358. }
  1359. }
  1360. else {
  1361. // New document - insert content and document
  1362. indexed++;
  1363. insertContent(db, hash, content, now);
  1364. const stat = statSync(filepath);
  1365. insertDocument(db, collectionName, path, title, hash, stat ? new Date(stat.birthtime).toISOString() : now, stat ? new Date(stat.mtime).toISOString() : now);
  1366. }
  1367. processed++;
  1368. progress.set((processed / total) * 100);
  1369. const elapsed = (Date.now() - startTime) / 1000;
  1370. const rate = processed / elapsed;
  1371. const remaining = (total - processed) / rate;
  1372. const eta = processed > 2 ? ` ETA: ${formatETA(remaining)}` : "";
  1373. if (isTTY)
  1374. process.stderr.write(`\rIndexing: ${processed}/${total}${eta} `);
  1375. }
  1376. // Deactivate documents in this collection that no longer exist
  1377. const allActive = getActiveDocumentPaths(db, collectionName);
  1378. let removed = 0;
  1379. for (const path of allActive) {
  1380. if (!seenPaths.has(path)) {
  1381. deactivateDocument(db, collectionName, path);
  1382. removed++;
  1383. }
  1384. }
  1385. // Clean up orphaned content hashes (content not referenced by any document)
  1386. const orphanedContent = cleanupOrphanedContent(db);
  1387. // Check if vector index needs updating
  1388. const needsEmbedding = getHashesNeedingEmbedding(db);
  1389. progress.clear();
  1390. console.log(`\nIndexed: ${indexed} new, ${updated} updated, ${unchanged} unchanged, ${removed} removed`);
  1391. if (orphanedContent > 0) {
  1392. console.log(`Cleaned up ${orphanedContent} orphaned content hash(es)`);
  1393. }
  1394. if (needsEmbedding > 0 && !suppressEmbedNotice) {
  1395. console.log(`\nRun 'qmd embed' to update embeddings (${needsEmbedding} unique hashes need vectors)`);
  1396. }
  1397. closeDb();
  1398. }
  1399. function renderProgressBar(percent, width = 30) {
  1400. const filled = Math.round((percent / 100) * width);
  1401. const empty = width - filled;
  1402. const bar = "█".repeat(filled) + "░".repeat(empty);
  1403. return bar;
  1404. }
  1405. function parseEmbedBatchOption(name, value) {
  1406. if (value === undefined)
  1407. return undefined;
  1408. const parsed = Number(value);
  1409. if (!Number.isInteger(parsed) || parsed < 1) {
  1410. throw new Error(`${name} must be a positive integer`);
  1411. }
  1412. return parsed;
  1413. }
  1414. function parseChunkStrategy(value) {
  1415. if (value === undefined)
  1416. return undefined;
  1417. const s = String(value);
  1418. if (s === "auto" || s === "regex")
  1419. return s;
  1420. throw new Error(`--chunk-strategy must be "auto" or "regex" (got "${s}")`);
  1421. }
  1422. async function vectorIndex(model = DEFAULT_EMBED_MODEL_URI, force = false, batchOptions) {
  1423. const storeInstance = getStore();
  1424. const db = storeInstance.db;
  1425. if (force) {
  1426. console.log(`${c.yellow}Force re-indexing: clearing all vectors...${c.reset}`);
  1427. }
  1428. // Check if there's work to do before starting
  1429. const hashesToEmbed = getHashesNeedingEmbedding(db);
  1430. if (hashesToEmbed === 0 && !force) {
  1431. console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
  1432. closeDb();
  1433. return;
  1434. }
  1435. console.log(`${c.dim}Model: ${model}${c.reset}\n`);
  1436. if (batchOptions?.maxDocsPerBatch !== undefined || batchOptions?.maxBatchBytes !== undefined) {
  1437. const maxDocsPerBatch = batchOptions.maxDocsPerBatch ?? DEFAULT_EMBED_MAX_DOCS_PER_BATCH;
  1438. const maxBatchBytes = batchOptions.maxBatchBytes ?? DEFAULT_EMBED_MAX_BATCH_BYTES;
  1439. console.log(`${c.dim}Batch: ${maxDocsPerBatch} docs / ${formatBytes(maxBatchBytes)}${c.reset}\n`);
  1440. }
  1441. cursor.hide();
  1442. progress.indeterminate();
  1443. const startTime = Date.now();
  1444. const result = await generateEmbeddings(storeInstance, {
  1445. force,
  1446. model,
  1447. maxDocsPerBatch: batchOptions?.maxDocsPerBatch,
  1448. maxBatchBytes: batchOptions?.maxBatchBytes,
  1449. chunkStrategy: batchOptions?.chunkStrategy,
  1450. onProgress: (info) => {
  1451. if (info.totalBytes === 0)
  1452. return;
  1453. const percent = (info.bytesProcessed / info.totalBytes) * 100;
  1454. progress.set(percent);
  1455. const elapsed = (Date.now() - startTime) / 1000;
  1456. const bytesPerSec = info.bytesProcessed / elapsed;
  1457. const remainingBytes = info.totalBytes - info.bytesProcessed;
  1458. const etaSec = remainingBytes / bytesPerSec;
  1459. const bar = renderProgressBar(percent);
  1460. const percentStr = percent.toFixed(0).padStart(3);
  1461. const throughput = `${formatBytes(bytesPerSec)}/s`;
  1462. const eta = elapsed > 2 ? formatETA(etaSec) : "...";
  1463. const errStr = info.errors > 0 ? ` ${c.yellow}${info.errors} err${c.reset}` : "";
  1464. if (isTTY)
  1465. process.stderr.write(`\r${c.cyan}${bar}${c.reset} ${c.bold}${percentStr}%${c.reset} ${c.dim}${info.chunksEmbedded}/${info.totalChunks}${c.reset}${errStr} ${c.dim}${throughput} ETA ${eta}${c.reset} `);
  1466. },
  1467. });
  1468. progress.clear();
  1469. cursor.show();
  1470. const totalTimeSec = result.durationMs / 1000;
  1471. if (result.chunksEmbedded === 0 && result.docsProcessed === 0) {
  1472. console.log(`${c.green}✓ No non-empty documents to embed.${c.reset}`);
  1473. }
  1474. else {
  1475. console.log(`\r${c.green}${renderProgressBar(100)}${c.reset} ${c.bold}100%${c.reset} `);
  1476. console.log(`\n${c.green}✓ Done!${c.reset} Embedded ${c.bold}${result.chunksEmbedded}${c.reset} chunks from ${c.bold}${result.docsProcessed}${c.reset} documents in ${c.bold}${formatETA(totalTimeSec)}${c.reset}`);
  1477. if (result.errors > 0) {
  1478. console.log(`${c.yellow}⚠ ${result.errors} chunks failed${c.reset}`);
  1479. }
  1480. }
  1481. closeDb();
  1482. }
  1483. // Sanitize a term for FTS5: remove punctuation except apostrophes
  1484. function sanitizeFTS5Term(term) {
  1485. // Remove all non-alphanumeric except apostrophes (for contractions like "don't")
  1486. return term.replace(/[^\w']/g, '').trim();
  1487. }
  1488. // Build FTS5 query: phrase-aware with fallback to individual terms
  1489. function buildFTS5Query(query) {
  1490. // Sanitize the full query for phrase matching
  1491. const sanitizedQuery = query.replace(/[^\w\s']/g, '').trim();
  1492. const terms = query
  1493. .split(/\s+/)
  1494. .map(sanitizeFTS5Term)
  1495. .filter(term => term.length >= 2); // Skip single chars and empty
  1496. if (terms.length === 0)
  1497. return "";
  1498. if (terms.length === 1)
  1499. return `"${terms[0].replace(/"/g, '""')}"`;
  1500. // Strategy: exact phrase OR proximity match OR individual terms
  1501. // Exact phrase matches rank highest, then close proximity, then any term
  1502. const phrase = `"${sanitizedQuery.replace(/"/g, '""')}"`;
  1503. const quotedTerms = terms.map(t => `"${t.replace(/"/g, '""')}"`);
  1504. // FTS5 NEAR syntax: NEAR(term1 term2, distance)
  1505. const nearPhrase = `NEAR(${quotedTerms.join(' ')}, 10)`;
  1506. const orTerms = quotedTerms.join(' OR ');
  1507. // Exact phrase > proximity > any term
  1508. return `(${phrase}) OR (${nearPhrase}) OR (${orTerms})`;
  1509. }
  1510. // Normalize BM25 score to 0-1 range using sigmoid
  1511. function normalizeBM25(score) {
  1512. // BM25 scores are negative in SQLite (lower = better)
  1513. // Typical range: -15 (excellent) to -2 (weak match)
  1514. // Map to 0-1 where higher is better
  1515. const absScore = Math.abs(score);
  1516. // Sigmoid-ish normalization: maps ~2-15 range to ~0.1-0.95
  1517. return 1 / (1 + Math.exp(-(absScore - 5) / 3));
  1518. }
  1519. // Highlight query terms in text (skip short words < 3 chars)
  1520. function highlightTerms(text, query) {
  1521. if (!useColor)
  1522. return text;
  1523. const terms = query.toLowerCase().split(/\s+/).filter(t => t.length >= 3);
  1524. let result = text;
  1525. for (const term of terms) {
  1526. const regex = new RegExp(`(${term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')})`, 'gi');
  1527. result = result.replace(regex, `${c.yellow}${c.bold}$1${c.reset}`);
  1528. }
  1529. return result;
  1530. }
  1531. // Format score with color based on value
  1532. function formatScore(score) {
  1533. const pct = (score * 100).toFixed(0).padStart(3);
  1534. if (!useColor)
  1535. return `${pct}%`;
  1536. if (score >= 0.7)
  1537. return `${c.green}${pct}%${c.reset}`;
  1538. if (score >= 0.4)
  1539. return `${c.yellow}${pct}%${c.reset}`;
  1540. return `${c.dim}${pct}%${c.reset}`;
  1541. }
  1542. function formatExplainNumber(value) {
  1543. return value.toFixed(4);
  1544. }
  1545. // Shorten directory path for display - relative to $HOME (used for context paths, not documents)
  1546. function shortPath(dirpath) {
  1547. const home = homedir();
  1548. if (dirpath.startsWith(home)) {
  1549. return '~' + dirpath.slice(home.length);
  1550. }
  1551. return dirpath;
  1552. }
  1553. // Emit format-safe empty output for search commands.
  1554. function printEmptySearchResults(format, reason = "no_results") {
  1555. if (format === "json") {
  1556. console.log("[]");
  1557. return;
  1558. }
  1559. if (format === "csv") {
  1560. console.log("docid,score,file,title,context,line,snippet");
  1561. return;
  1562. }
  1563. if (format === "xml") {
  1564. console.log("<results></results>");
  1565. return;
  1566. }
  1567. if (format === "md" || format === "files") {
  1568. return;
  1569. }
  1570. if (reason === "min_score") {
  1571. console.log("No results found above minimum score threshold.");
  1572. return;
  1573. }
  1574. console.log("No results found.");
  1575. }
  1576. const DEFAULT_EDITOR_URI_TEMPLATE = "vscode://file/{path}:{line}:{col}";
  1577. function encodePathForEditorUri(absolutePath) {
  1578. return encodeURI(absolutePath)
  1579. .replace(/\?/g, "%3F")
  1580. .replace(/#/g, "%23");
  1581. }
  1582. function getEditorUriTemplate() {
  1583. const envTemplate = process.env.QMD_EDITOR_URI?.trim();
  1584. if (envTemplate)
  1585. return envTemplate;
  1586. try {
  1587. const config = loadConfig();
  1588. const configTemplate = (config.editor_uri
  1589. || config.editor_uri_template
  1590. || config.editorUri
  1591. || (typeof config["editor-uri"] === "string" ? config["editor-uri"] : undefined))?.trim();
  1592. if (configTemplate)
  1593. return configTemplate;
  1594. }
  1595. catch {
  1596. // Ignore config parsing issues and use default template.
  1597. }
  1598. return DEFAULT_EDITOR_URI_TEMPLATE;
  1599. }
  1600. export function buildEditorUri(template, absolutePath, line, col) {
  1601. const safeLine = Number.isFinite(line) && line > 0 ? Math.floor(line) : 1;
  1602. const safeCol = Number.isFinite(col) && col > 0 ? Math.floor(col) : 1;
  1603. const encodedPath = encodePathForEditorUri(absolutePath);
  1604. return template
  1605. .replace(/\{path\}/g, encodedPath)
  1606. .replace(/\{line\}/g, String(safeLine))
  1607. .replace(/\{col\}/g, String(safeCol))
  1608. .replace(/\{column\}/g, String(safeCol));
  1609. }
  1610. export function termLink(text, url, isTTY = !!process.stdout.isTTY) {
  1611. if (!isTTY)
  1612. return text;
  1613. return `\x1b]8;;${url}\x07${text}\x1b]8;;\x07`;
  1614. }
  1615. function outputResults(results, query, opts) {
  1616. const filtered = results.filter(r => r.score >= opts.minScore).slice(0, opts.limit);
  1617. if (filtered.length === 0) {
  1618. printEmptySearchResults(opts.format, "min_score");
  1619. return;
  1620. }
  1621. // Helper to create qmd:// URI from displayPath
  1622. const toQmdPath = (displayPath) => `qmd://${displayPath}`;
  1623. if (opts.format === "json") {
  1624. // JSON output for LLM consumption
  1625. const output = filtered.map(row => {
  1626. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
  1627. let body = opts.full ? row.body : undefined;
  1628. let snippet = !opts.full ? extractSnippet(row.body, query, 300, row.chunkPos, undefined, opts.intent).snippet : undefined;
  1629. if (opts.lineNumbers) {
  1630. if (body)
  1631. body = addLineNumbers(body);
  1632. if (snippet)
  1633. snippet = addLineNumbers(snippet);
  1634. }
  1635. return {
  1636. ...(docid && { docid: `#${docid}` }),
  1637. score: Math.round(row.score * 100) / 100,
  1638. file: toQmdPath(row.displayPath),
  1639. title: row.title,
  1640. ...(row.context && { context: row.context }),
  1641. ...(body && { body }),
  1642. ...(snippet && { snippet }),
  1643. ...(opts.explain && row.explain && { explain: row.explain }),
  1644. };
  1645. });
  1646. console.log(JSON.stringify(output, null, 2));
  1647. }
  1648. else if (opts.format === "files") {
  1649. // Simple docid,score,filepath,context output
  1650. for (const row of filtered) {
  1651. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
  1652. const ctx = row.context ? `,"${row.context.replace(/"/g, '""')}"` : "";
  1653. console.log(`#${docid},${row.score.toFixed(2)},${toQmdPath(row.displayPath)}${ctx}`);
  1654. }
  1655. }
  1656. else if (opts.format === "cli") {
  1657. const editorUriTemplate = getEditorUriTemplate();
  1658. const linkDb = getDb();
  1659. for (let i = 0; i < filtered.length; i++) {
  1660. const row = filtered[i];
  1661. if (!row)
  1662. continue;
  1663. const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent);
  1664. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
  1665. // Line 1: filepath with docid
  1666. const virtualPath = row.file.startsWith("qmd://") ? row.file : toQmdPath(row.displayPath);
  1667. const parsed = parseVirtualPath(virtualPath);
  1668. const absolutePath = resolveVirtualPath(linkDb, virtualPath);
  1669. const legacyPath = toQmdPath(row.displayPath);
  1670. const displayPath = parsed?.path || row.displayPath;
  1671. // Only show :line if we actually found a term match in the snippet body (exclude header line).
  1672. const snippetBody = snippet.split("\n").slice(1).join("\n").toLowerCase();
  1673. const hasMatch = query.toLowerCase().split(/\s+/).some(t => t.length > 0 && snippetBody.includes(t));
  1674. const lineInfo = hasMatch ? `:${line}` : "";
  1675. const docidStr = docid ? ` ${c.dim}#${docid}${c.reset}` : "";
  1676. if (process.stdout.isTTY && absolutePath && parsed?.path) {
  1677. const linkLine = hasMatch ? line : 1;
  1678. const linkTarget = buildEditorUri(editorUriTemplate, absolutePath, linkLine, 1);
  1679. const clickable = termLink(`${displayPath}${lineInfo}`, linkTarget);
  1680. console.log(`${c.cyan}${clickable}${c.reset}${docidStr}`);
  1681. }
  1682. else {
  1683. console.log(`${c.cyan}${legacyPath}${c.dim}${lineInfo}${c.reset}${docidStr}`);
  1684. }
  1685. // Line 2: Title (if available)
  1686. if (row.title) {
  1687. console.log(`${c.bold}Title: ${row.title}${c.reset}`);
  1688. }
  1689. // Line 3: Context (if available)
  1690. if (row.context) {
  1691. console.log(`${c.dim}Context: ${row.context}${c.reset}`);
  1692. }
  1693. // Line 4: Score
  1694. const score = formatScore(row.score);
  1695. console.log(`Score: ${c.bold}${score}${c.reset}`);
  1696. if (opts.explain && row.explain) {
  1697. const explain = row.explain;
  1698. const ftsScores = explain.ftsScores.length > 0
  1699. ? explain.ftsScores.map(formatExplainNumber).join(", ")
  1700. : "none";
  1701. const vecScores = explain.vectorScores.length > 0
  1702. ? explain.vectorScores.map(formatExplainNumber).join(", ")
  1703. : "none";
  1704. const contribSummary = explain.rrf.contributions
  1705. .slice()
  1706. .sort((a, b) => b.rrfContribution - a.rrfContribution)
  1707. .slice(0, 3)
  1708. .map(c => `${c.source}/${c.queryType}#${c.rank}:${formatExplainNumber(c.rrfContribution)}`)
  1709. .join(" | ");
  1710. console.log(`${c.dim}Explain: fts=[${ftsScores}] vec=[${vecScores}]${c.reset}`);
  1711. console.log(`${c.dim} RRF: total=${formatExplainNumber(explain.rrf.totalScore)} base=${formatExplainNumber(explain.rrf.baseScore)} bonus=${formatExplainNumber(explain.rrf.topRankBonus)} rank=${explain.rrf.rank}${c.reset}`);
  1712. console.log(`${c.dim} Blend: ${Math.round(explain.rrf.weight * 100)}%*${formatExplainNumber(explain.rrf.positionScore)} + ${Math.round((1 - explain.rrf.weight) * 100)}%*${formatExplainNumber(explain.rerankScore)} = ${formatExplainNumber(explain.blendedScore)}${c.reset}`);
  1713. if (contribSummary.length > 0) {
  1714. console.log(`${c.dim} Top RRF contributions: ${contribSummary}${c.reset}`);
  1715. }
  1716. }
  1717. console.log();
  1718. // Snippet with highlighting (diff-style header included)
  1719. let displaySnippet = opts.lineNumbers ? addLineNumbers(snippet, line) : snippet;
  1720. const highlighted = highlightTerms(displaySnippet, query);
  1721. console.log(highlighted);
  1722. // Double empty line between results
  1723. if (i < filtered.length - 1)
  1724. console.log('\n');
  1725. }
  1726. }
  1727. else if (opts.format === "md") {
  1728. for (let i = 0; i < filtered.length; i++) {
  1729. const row = filtered[i];
  1730. if (!row)
  1731. continue;
  1732. const heading = row.title || row.displayPath;
  1733. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
  1734. let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent).snippet;
  1735. if (opts.lineNumbers) {
  1736. content = addLineNumbers(content);
  1737. }
  1738. const docidLine = docid ? `**docid:** \`#${docid}\`\n` : "";
  1739. const contextLine = row.context ? `**context:** ${row.context}\n` : "";
  1740. console.log(`---\n# ${heading}\n${docidLine}${contextLine}\n${content}\n`);
  1741. }
  1742. }
  1743. else if (opts.format === "xml") {
  1744. for (const row of filtered) {
  1745. const titleAttr = row.title ? ` title="${row.title.replace(/"/g, '&quot;')}"` : "";
  1746. const contextAttr = row.context ? ` context="${row.context.replace(/"/g, '&quot;')}"` : "";
  1747. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
  1748. let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent).snippet;
  1749. if (opts.lineNumbers) {
  1750. content = addLineNumbers(content);
  1751. }
  1752. console.log(`<file docid="#${docid}" name="${toQmdPath(row.displayPath)}"${titleAttr}${contextAttr}>\n${content}\n</file>\n`);
  1753. }
  1754. }
  1755. else {
  1756. // CSV format
  1757. console.log("docid,score,file,title,context,line,snippet");
  1758. for (const row of filtered) {
  1759. const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent);
  1760. let content = opts.full ? row.body : snippet;
  1761. if (opts.lineNumbers) {
  1762. content = addLineNumbers(content, line);
  1763. }
  1764. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
  1765. const snippetText = content || "";
  1766. console.log(`#${docid},${row.score.toFixed(4)},${escapeCSV(toQmdPath(row.displayPath))},${escapeCSV(row.title || "")},${escapeCSV(row.context || "")},${line},${escapeCSV(snippetText)}`);
  1767. }
  1768. }
  1769. }
  1770. // Resolve -c collection filter: supports single string, array, or undefined.
  1771. // Returns validated collection names (exits on unknown collection).
  1772. function resolveCollectionFilter(raw, useDefaults = false) {
  1773. // If no filter specified and useDefaults is true, use default collections
  1774. if (!raw && useDefaults) {
  1775. return getDefaultCollectionNames();
  1776. }
  1777. if (!raw)
  1778. return [];
  1779. const names = Array.isArray(raw) ? raw : [raw];
  1780. const validated = [];
  1781. for (const name of names) {
  1782. const coll = getCollectionFromYaml(name);
  1783. if (!coll) {
  1784. console.error(`Collection not found: ${name}`);
  1785. closeDb();
  1786. process.exit(1);
  1787. }
  1788. validated.push(name);
  1789. }
  1790. return validated;
  1791. }
  1792. // Post-filter results to only include files from specified collections.
  1793. function filterByCollections(results, collectionNames) {
  1794. if (collectionNames.length <= 1)
  1795. return results;
  1796. const prefixes = collectionNames.map(n => `qmd://${n}/`);
  1797. return results.filter(r => {
  1798. const path = r.filepath || r.file || '';
  1799. return prefixes.some(p => path.startsWith(p));
  1800. });
  1801. }
  1802. function parseStructuredQuery(query) {
  1803. const rawLines = query.split('\n').map((line, idx) => ({
  1804. raw: line,
  1805. trimmed: line.trim(),
  1806. number: idx + 1,
  1807. })).filter(line => line.trimmed.length > 0);
  1808. if (rawLines.length === 0)
  1809. return null;
  1810. const prefixRe = /^(lex|vec|hyde):\s*/i;
  1811. const expandRe = /^expand:\s*/i;
  1812. const intentRe = /^intent:\s*/i;
  1813. const typed = [];
  1814. let intent;
  1815. for (const line of rawLines) {
  1816. if (expandRe.test(line.trimmed)) {
  1817. if (rawLines.length > 1) {
  1818. throw new Error(`Line ${line.number} starts with expand:, but query documents cannot mix expand with typed lines. Submit a single expand query instead.`);
  1819. }
  1820. const text = line.trimmed.replace(expandRe, '').trim();
  1821. if (!text) {
  1822. throw new Error('expand: query must include text.');
  1823. }
  1824. return null; // treat as standalone expand query
  1825. }
  1826. // Parse intent: lines
  1827. if (intentRe.test(line.trimmed)) {
  1828. if (intent !== undefined) {
  1829. throw new Error(`Line ${line.number}: only one intent: line is allowed per query document.`);
  1830. }
  1831. const text = line.trimmed.replace(intentRe, '').trim();
  1832. if (!text) {
  1833. throw new Error(`Line ${line.number}: intent: must include text.`);
  1834. }
  1835. intent = text;
  1836. continue;
  1837. }
  1838. const match = line.trimmed.match(prefixRe);
  1839. if (match) {
  1840. const type = match[1].toLowerCase();
  1841. const text = line.trimmed.slice(match[0].length).trim();
  1842. if (!text) {
  1843. throw new Error(`Line ${line.number} (${type}:) must include text.`);
  1844. }
  1845. if (/\r|\n/.test(text)) {
  1846. throw new Error(`Line ${line.number} (${type}:) contains a newline. Keep each query on a single line.`);
  1847. }
  1848. typed.push({ type, query: text, line: line.number });
  1849. continue;
  1850. }
  1851. if (rawLines.length === 1) {
  1852. // Single plain line -> implicit expand
  1853. return null;
  1854. }
  1855. throw new Error(`Line ${line.number} is missing a lex:/vec:/hyde:/intent: prefix. Each line in a query document must start with one.`);
  1856. }
  1857. // intent: alone is not a valid query — must have at least one search
  1858. if (intent && typed.length === 0) {
  1859. throw new Error('intent: cannot appear alone. Add at least one lex:, vec:, or hyde: line.');
  1860. }
  1861. return typed.length > 0 ? { searches: typed, intent } : null;
  1862. }
  1863. function search(query, opts) {
  1864. const db = getDb();
  1865. // Validate collection filter (supports multiple -c flags)
  1866. // Use default collections if none specified
  1867. const collectionNames = resolveCollectionFilter(opts.collection, true);
  1868. const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
  1869. // Use large limit for --all, otherwise fetch more than needed and let outputResults filter
  1870. const fetchLimit = opts.all ? 100000 : Math.max(50, opts.limit * 2);
  1871. const results = filterByCollections(searchFTS(db, query, fetchLimit, singleCollection), collectionNames);
  1872. // Add context to results
  1873. const resultsWithContext = results.map(r => ({
  1874. file: r.filepath,
  1875. displayPath: r.displayPath,
  1876. title: r.title,
  1877. body: r.body || "",
  1878. score: r.score,
  1879. context: getContextForFile(db, r.filepath),
  1880. hash: r.hash,
  1881. docid: r.docid,
  1882. }));
  1883. closeDb();
  1884. if (resultsWithContext.length === 0) {
  1885. printEmptySearchResults(opts.format);
  1886. return;
  1887. }
  1888. outputResults(resultsWithContext, query, opts);
  1889. }
  1890. // Log query expansion as a tree to stderr (CLI progress feedback)
  1891. function logExpansionTree(originalQuery, expanded) {
  1892. const lines = [];
  1893. lines.push(`${c.dim}├─ ${originalQuery}${c.reset}`);
  1894. for (const q of expanded) {
  1895. let preview = q.query.replace(/\n/g, ' ');
  1896. if (preview.length > 72)
  1897. preview = preview.substring(0, 69) + '...';
  1898. lines.push(`${c.dim}├─ ${q.type}: ${preview}${c.reset}`);
  1899. }
  1900. if (lines.length > 0) {
  1901. lines[lines.length - 1] = lines[lines.length - 1].replace('├─', '└─');
  1902. }
  1903. for (const line of lines)
  1904. process.stderr.write(line + '\n');
  1905. }
  1906. async function vectorSearch(query, opts, _model = DEFAULT_EMBED_MODEL) {
  1907. const store = getStore();
  1908. // Validate collection filter (supports multiple -c flags)
  1909. // Use default collections if none specified
  1910. const collectionNames = resolveCollectionFilter(opts.collection, true);
  1911. const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
  1912. checkIndexHealth(store.db);
  1913. await withLLMSession(async () => {
  1914. let results = await vectorSearchQuery(store, query, {
  1915. collection: singleCollection,
  1916. limit: opts.all ? 500 : (opts.limit || 10),
  1917. minScore: opts.minScore || 0.3,
  1918. intent: opts.intent,
  1919. hooks: {
  1920. onExpand: (original, expanded) => {
  1921. logExpansionTree(original, expanded);
  1922. process.stderr.write(`${c.dim}Searching ${expanded.length + 1} vector queries...${c.reset}\n`);
  1923. },
  1924. },
  1925. });
  1926. // Post-filter for multi-collection
  1927. if (collectionNames.length > 1) {
  1928. results = results.filter(r => {
  1929. const prefixes = collectionNames.map(n => `qmd://${n}/`);
  1930. return prefixes.some(p => r.file.startsWith(p));
  1931. });
  1932. }
  1933. closeDb();
  1934. if (results.length === 0) {
  1935. printEmptySearchResults(opts.format);
  1936. return;
  1937. }
  1938. outputResults(results.map(r => ({
  1939. file: r.file,
  1940. displayPath: r.displayPath,
  1941. title: r.title,
  1942. body: r.body,
  1943. score: r.score,
  1944. context: r.context,
  1945. docid: r.docid,
  1946. })), query, { ...opts, limit: results.length });
  1947. }, { maxDuration: 10 * 60 * 1000, name: 'vectorSearch' });
  1948. }
  1949. async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rerankModel = DEFAULT_RERANK_MODEL) {
  1950. const store = getStore();
  1951. // Validate collection filter (supports multiple -c flags)
  1952. // Use default collections if none specified
  1953. const collectionNames = resolveCollectionFilter(opts.collection, true);
  1954. const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
  1955. checkIndexHealth(store.db);
  1956. // Check for structured query syntax (lex:/vec:/hyde:/intent: prefixes)
  1957. const parsed = parseStructuredQuery(query);
  1958. // Intent can come from --intent flag or from intent: line in query document
  1959. const intent = opts.intent || parsed?.intent;
  1960. await withLLMSession(async () => {
  1961. let results;
  1962. if (parsed) {
  1963. const structuredQueries = parsed.searches;
  1964. // Structured search — user provided their own query expansions
  1965. const typeLabels = structuredQueries.map(s => s.type).join('+');
  1966. process.stderr.write(`${c.dim}Structured search: ${structuredQueries.length} queries (${typeLabels})${c.reset}\n`);
  1967. if (intent) {
  1968. process.stderr.write(`${c.dim}├─ intent: ${intent}${c.reset}\n`);
  1969. }
  1970. // Log each sub-query
  1971. for (const s of structuredQueries) {
  1972. let preview = s.query.replace(/\n/g, ' ');
  1973. if (preview.length > 72)
  1974. preview = preview.substring(0, 69) + '...';
  1975. process.stderr.write(`${c.dim}├─ ${s.type}: ${preview}${c.reset}\n`);
  1976. }
  1977. process.stderr.write(`${c.dim}└─ Searching...${c.reset}\n`);
  1978. results = await structuredSearch(store, structuredQueries, {
  1979. collections: singleCollection ? [singleCollection] : undefined,
  1980. limit: opts.all ? 500 : (opts.limit || 10),
  1981. minScore: opts.minScore || 0,
  1982. candidateLimit: opts.candidateLimit,
  1983. skipRerank: opts.skipRerank,
  1984. explain: !!opts.explain,
  1985. intent,
  1986. chunkStrategy: opts.chunkStrategy,
  1987. hooks: {
  1988. onEmbedStart: (count) => {
  1989. process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
  1990. },
  1991. onEmbedDone: (ms) => {
  1992. process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
  1993. },
  1994. onRerankStart: (chunkCount) => {
  1995. process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}`);
  1996. progress.indeterminate();
  1997. },
  1998. onRerankDone: (ms) => {
  1999. progress.clear();
  2000. process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
  2001. },
  2002. },
  2003. });
  2004. }
  2005. else {
  2006. // Standard hybrid query with automatic expansion
  2007. results = await hybridQuery(store, query, {
  2008. collection: singleCollection,
  2009. limit: opts.all ? 500 : (opts.limit || 10),
  2010. minScore: opts.minScore || 0,
  2011. candidateLimit: opts.candidateLimit,
  2012. skipRerank: opts.skipRerank,
  2013. explain: !!opts.explain,
  2014. intent,
  2015. chunkStrategy: opts.chunkStrategy,
  2016. hooks: {
  2017. onStrongSignal: (score) => {
  2018. process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
  2019. },
  2020. onExpandStart: () => {
  2021. process.stderr.write(`${c.dim}Expanding query...${c.reset}`);
  2022. },
  2023. onExpand: (original, expanded, ms) => {
  2024. process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
  2025. logExpansionTree(original, expanded);
  2026. process.stderr.write(`${c.dim}Searching ${expanded.length + 1} queries...${c.reset}\n`);
  2027. },
  2028. onEmbedStart: (count) => {
  2029. process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
  2030. },
  2031. onEmbedDone: (ms) => {
  2032. process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
  2033. },
  2034. onRerankStart: (chunkCount) => {
  2035. process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}`);
  2036. progress.indeterminate();
  2037. },
  2038. onRerankDone: (ms) => {
  2039. progress.clear();
  2040. process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
  2041. },
  2042. },
  2043. });
  2044. }
  2045. // Post-filter for multi-collection
  2046. if (collectionNames.length > 1) {
  2047. results = results.filter(r => {
  2048. const prefixes = collectionNames.map(n => `qmd://${n}/`);
  2049. return prefixes.some(p => r.file.startsWith(p));
  2050. });
  2051. }
  2052. closeDb();
  2053. if (results.length === 0) {
  2054. printEmptySearchResults(opts.format);
  2055. return;
  2056. }
  2057. // Use first lex/vec query for output context, or original query
  2058. const structuredQueries = parsed?.searches;
  2059. const displayQuery = structuredQueries
  2060. ? (structuredQueries.find(s => s.type === 'lex')?.query || structuredQueries.find(s => s.type === 'vec')?.query || query)
  2061. : query;
  2062. // Map to CLI output format — use bestChunk for snippet display
  2063. outputResults(results.map(r => ({
  2064. file: r.file,
  2065. displayPath: r.displayPath,
  2066. title: r.title,
  2067. body: r.bestChunk,
  2068. chunkPos: r.bestChunkPos,
  2069. score: r.score,
  2070. context: r.context,
  2071. docid: r.docid,
  2072. explain: r.explain,
  2073. })), displayQuery, { ...opts, limit: results.length });
  2074. }, { maxDuration: 10 * 60 * 1000, name: 'querySearch' });
  2075. }
  2076. // Parse CLI arguments using util.parseArgs
  2077. function parseCLI() {
  2078. const { values, positionals } = parseArgs({
  2079. args: process.argv.slice(2), // Skip node and script path
  2080. options: {
  2081. // Global options
  2082. index: {
  2083. type: "string",
  2084. },
  2085. context: {
  2086. type: "string",
  2087. },
  2088. help: { type: "boolean", short: "h" },
  2089. version: { type: "boolean", short: "v" },
  2090. skill: { type: "boolean" },
  2091. global: { type: "boolean" },
  2092. yes: { type: "boolean" },
  2093. // Search options
  2094. n: { type: "string" },
  2095. "min-score": { type: "string" },
  2096. all: { type: "boolean" },
  2097. full: { type: "boolean" },
  2098. csv: { type: "boolean" },
  2099. md: { type: "boolean" },
  2100. xml: { type: "boolean" },
  2101. files: { type: "boolean" },
  2102. json: { type: "boolean" },
  2103. explain: { type: "boolean" },
  2104. collection: { type: "string", short: "c", multiple: true }, // Filter by collection(s)
  2105. // Collection options
  2106. name: { type: "string" }, // collection name
  2107. mask: { type: "string" }, // glob pattern
  2108. // Embed options
  2109. force: { type: "boolean", short: "f" },
  2110. "max-docs-per-batch": { type: "string" },
  2111. "max-batch-mb": { type: "string" },
  2112. // Update options
  2113. pull: { type: "boolean" }, // git pull before update
  2114. refresh: { type: "boolean" },
  2115. // Get options
  2116. l: { type: "string" }, // max lines
  2117. from: { type: "string" }, // start line
  2118. "max-bytes": { type: "string" }, // max bytes for multi-get
  2119. "line-numbers": { type: "boolean" }, // add line numbers to output
  2120. // Query options
  2121. "candidate-limit": { type: "string", short: "C" },
  2122. "no-rerank": { type: "boolean", default: false },
  2123. intent: { type: "string" },
  2124. // Chunking options
  2125. "chunk-strategy": { type: "string" }, // "regex" (default) or "auto" (AST for code files)
  2126. // MCP HTTP transport options
  2127. http: { type: "boolean" },
  2128. daemon: { type: "boolean" },
  2129. port: { type: "string" },
  2130. },
  2131. allowPositionals: true,
  2132. strict: false, // Allow unknown options to pass through
  2133. });
  2134. // Select index name (default: "index")
  2135. const indexName = values.index;
  2136. if (indexName) {
  2137. setIndexName(indexName);
  2138. setConfigIndexName(indexName);
  2139. }
  2140. // Determine output format
  2141. let format = "cli";
  2142. if (values.csv)
  2143. format = "csv";
  2144. else if (values.md)
  2145. format = "md";
  2146. else if (values.xml)
  2147. format = "xml";
  2148. else if (values.files)
  2149. format = "files";
  2150. else if (values.json)
  2151. format = "json";
  2152. // Default limit: 20 for --files/--json, 5 otherwise
  2153. // --all means return all results (use very large limit)
  2154. const defaultLimit = (format === "files" || format === "json") ? 20 : 5;
  2155. const isAll = !!values.all;
  2156. const opts = {
  2157. format,
  2158. full: !!values.full,
  2159. limit: isAll ? 100000 : (values.n ? parseInt(String(values.n), 10) || defaultLimit : defaultLimit),
  2160. minScore: values["min-score"] ? parseFloat(String(values["min-score"])) || 0 : 0,
  2161. all: isAll,
  2162. collection: values.collection,
  2163. lineNumbers: !!values["line-numbers"],
  2164. candidateLimit: values["candidate-limit"] ? parseInt(String(values["candidate-limit"]), 10) : undefined,
  2165. skipRerank: !!values["no-rerank"],
  2166. explain: !!values.explain,
  2167. intent: values.intent,
  2168. chunkStrategy: parseChunkStrategy(values["chunk-strategy"]),
  2169. };
  2170. return {
  2171. command: positionals[0] || "",
  2172. args: positionals.slice(1),
  2173. query: positionals.slice(1).join(" "),
  2174. opts,
  2175. values,
  2176. };
  2177. }
  2178. function getSkillInstallDir(globalInstall) {
  2179. return globalInstall
  2180. ? resolve(homedir(), ".agents", "skills", "qmd")
  2181. : resolve(getPwd(), ".agents", "skills", "qmd");
  2182. }
  2183. function getClaudeSkillLinkPath(globalInstall) {
  2184. return globalInstall
  2185. ? resolve(homedir(), ".claude", "skills", "qmd")
  2186. : resolve(getPwd(), ".claude", "skills", "qmd");
  2187. }
  2188. function pathExists(path) {
  2189. try {
  2190. lstatSync(path);
  2191. return true;
  2192. }
  2193. catch {
  2194. return false;
  2195. }
  2196. }
  2197. function removePath(path) {
  2198. const stat = lstatSync(path);
  2199. if (stat.isDirectory() && !stat.isSymbolicLink()) {
  2200. rmSync(path, { recursive: true, force: true });
  2201. }
  2202. else {
  2203. unlinkSync(path);
  2204. }
  2205. }
  2206. function showSkill() {
  2207. console.log("QMD Skill (embedded)");
  2208. console.log("");
  2209. const content = getEmbeddedQmdSkillContent();
  2210. process.stdout.write(content.endsWith("\n") ? content : content + "\n");
  2211. }
  2212. function writeEmbeddedSkill(targetDir, force) {
  2213. if (pathExists(targetDir)) {
  2214. if (!force) {
  2215. throw new Error(`Skill already exists: ${targetDir} (use --force to replace it)`);
  2216. }
  2217. removePath(targetDir);
  2218. }
  2219. mkdirSync(targetDir, { recursive: true });
  2220. for (const file of getEmbeddedQmdSkillFiles()) {
  2221. const destination = resolve(targetDir, file.relativePath);
  2222. mkdirSync(dirname(destination), { recursive: true });
  2223. writeFileSync(destination, file.content, "utf-8");
  2224. }
  2225. }
  2226. function ensureClaudeSymlink(linkPath, targetDir, force) {
  2227. const parentDir = dirname(linkPath);
  2228. if (pathExists(parentDir)) {
  2229. const resolvedTargetDir = realpathSync(dirname(targetDir));
  2230. const resolvedLinkParent = realpathSync(parentDir);
  2231. // If .claude/skills already resolves to the same directory as .agents/skills,
  2232. // the skill is already visible to Claude and creating qmd -> qmd would loop.
  2233. if (resolvedTargetDir === resolvedLinkParent) {
  2234. return false;
  2235. }
  2236. }
  2237. const linkTarget = relativePath(parentDir, targetDir) || ".";
  2238. mkdirSync(parentDir, { recursive: true });
  2239. if (pathExists(linkPath)) {
  2240. const stat = lstatSync(linkPath);
  2241. if (stat.isSymbolicLink() && readlinkSync(linkPath) === linkTarget) {
  2242. return true;
  2243. }
  2244. if (!force) {
  2245. throw new Error(`Claude skill path already exists: ${linkPath} (use --force to replace it)`);
  2246. }
  2247. removePath(linkPath);
  2248. }
  2249. symlinkSync(linkTarget, linkPath, "dir");
  2250. return true;
  2251. }
  2252. async function shouldCreateClaudeSymlink(linkPath, autoYes) {
  2253. if (autoYes) {
  2254. return true;
  2255. }
  2256. if (!process.stdin.isTTY || !process.stdout.isTTY) {
  2257. console.log(`Tip: create a Claude symlink manually at ${linkPath}`);
  2258. return false;
  2259. }
  2260. const rl = createInterface({
  2261. input: process.stdin,
  2262. output: process.stdout,
  2263. });
  2264. try {
  2265. const answer = await rl.question(`Create a symlink in ${linkPath}? [y/N] `);
  2266. const normalized = answer.trim().toLowerCase();
  2267. return normalized === "y" || normalized === "yes";
  2268. }
  2269. finally {
  2270. rl.close();
  2271. }
  2272. }
  2273. async function installSkill(globalInstall, force, autoYes) {
  2274. const installDir = getSkillInstallDir(globalInstall);
  2275. writeEmbeddedSkill(installDir, force);
  2276. console.log(`✓ Installed QMD skill to ${installDir}`);
  2277. const claudeLinkPath = getClaudeSkillLinkPath(globalInstall);
  2278. if (!(await shouldCreateClaudeSymlink(claudeLinkPath, autoYes))) {
  2279. return;
  2280. }
  2281. const linked = ensureClaudeSymlink(claudeLinkPath, installDir, force);
  2282. if (linked) {
  2283. console.log(`✓ Linked Claude skill at ${claudeLinkPath}`);
  2284. }
  2285. else {
  2286. console.log(`✓ Claude already sees the skill via ${dirname(claudeLinkPath)}`);
  2287. }
  2288. }
  2289. function showHelp() {
  2290. console.log("qmd — Quick Markdown Search");
  2291. console.log("");
  2292. console.log("Usage:");
  2293. console.log(" qmd <command> [options]");
  2294. console.log("");
  2295. console.log("Primary commands:");
  2296. console.log(" qmd query <query> - Hybrid search with auto expansion + reranking (recommended)");
  2297. console.log(" qmd query 'lex:..\\nvec:...' - Structured query document (you provide lex/vec/hyde lines)");
  2298. console.log(" qmd search <query> - Full-text BM25 keywords (no LLM)");
  2299. console.log(" qmd vsearch <query> - Vector similarity only");
  2300. console.log(" qmd get <file>[:line] [-l N] - Show a single document, optional line slice");
  2301. console.log(" qmd multi-get <pattern> - Batch fetch via glob or comma-separated list");
  2302. console.log(" qmd skill show/install - Show or install the packaged QMD skill");
  2303. console.log(" qmd mcp - Start the MCP server (stdio transport for AI agents)");
  2304. console.log(" qmd bench <fixture.json> - Run search quality benchmarks against a fixture file");
  2305. console.log("");
  2306. console.log("Collections & context:");
  2307. console.log(" qmd collection add/list/remove/rename/show - Manage indexed folders");
  2308. console.log(" qmd context add/list/rm - Attach human-written summaries");
  2309. console.log(" qmd ls [collection[/path]] - Inspect indexed files");
  2310. console.log("");
  2311. console.log("Maintenance:");
  2312. console.log(" qmd status - View index + collection health");
  2313. console.log(" qmd update [--pull] - Re-index collections (optionally git pull first)");
  2314. console.log(" qmd embed [-f] - Generate/refresh vector embeddings");
  2315. console.log(" --max-docs-per-batch <n> - Cap docs loaded into memory per embedding batch");
  2316. console.log(" --max-batch-mb <n> - Cap UTF-8 MB loaded into memory per embedding batch");
  2317. console.log(" qmd cleanup - Clear caches, vacuum DB");
  2318. console.log("");
  2319. console.log("Query syntax (qmd query):");
  2320. console.log(" QMD queries are either a single expand query (no prefix) or a multi-line");
  2321. console.log(" document where every line is typed with lex:, vec:, or hyde:. This grammar");
  2322. console.log(" matches the docs in docs/SYNTAX.md and is enforced in the CLI.");
  2323. console.log("");
  2324. const grammar = [
  2325. `query = expand_query | query_document ;`,
  2326. `expand_query = text | explicit_expand ;`,
  2327. `explicit_expand= "expand:" text ;`,
  2328. `query_document = [ intent_line ] { typed_line } ;`,
  2329. `intent_line = "intent:" text newline ;`,
  2330. `typed_line = type ":" text newline ;`,
  2331. `type = "lex" | "vec" | "hyde" ;`,
  2332. `text = quoted_phrase | plain_text ;`,
  2333. `quoted_phrase = '"' { character } '"' ;`,
  2334. `plain_text = { character } ;`,
  2335. `newline = "\\n" ;`,
  2336. ];
  2337. console.log(" Grammar:");
  2338. for (const line of grammar) {
  2339. console.log(` ${line}`);
  2340. }
  2341. console.log("");
  2342. console.log(" Examples:");
  2343. console.log(" qmd query \"how does auth work\" # single-line → implicit expand");
  2344. console.log(" qmd query $'lex: CAP theorem\\nvec: consistency' # typed query document");
  2345. console.log(" qmd query $'lex: \"exact matches\" sports -baseball' # phrase + negation lex search");
  2346. console.log(" qmd query $'hyde: Hypothetical answer text' # hyde-only document");
  2347. console.log("");
  2348. console.log(" Constraints:");
  2349. console.log(" - Standalone expand queries cannot mix with typed lines.");
  2350. console.log(" - Query documents allow only lex:, vec:, or hyde: prefixes.");
  2351. console.log(" - Each typed line must be single-line text with balanced quotes.");
  2352. console.log("");
  2353. console.log("AI agents & integrations:");
  2354. console.log(" - Run `qmd mcp` to expose the MCP server (stdio) to agents/IDEs.");
  2355. console.log(" - `qmd skill install` installs the QMD skill into ./.agents/skills/qmd.");
  2356. console.log(" - Use `qmd skill install --global` for ~/.agents/skills/qmd.");
  2357. console.log(" - `qmd --skill` is kept as an alias for `qmd skill show`.");
  2358. console.log(" - Advanced: `qmd mcp --http ...` and `qmd mcp --http --daemon` are optional for custom transports.");
  2359. console.log("");
  2360. console.log("Global options:");
  2361. console.log(" --index <name> - Use a named index (default: index)");
  2362. console.log(" QMD_EDITOR_URI - Editor link template for clickable TTY search output");
  2363. console.log("");
  2364. console.log("Search options:");
  2365. console.log(" -n <num> - Max results (default 5, or 20 for --files/--json)");
  2366. console.log(" --all - Return all matches (pair with --min-score)");
  2367. console.log(" --min-score <num> - Minimum similarity score");
  2368. console.log(" --full - Output full document instead of snippet");
  2369. console.log(" -C, --candidate-limit <n> - Max candidates to rerank (default 40, lower = faster)");
  2370. console.log(" --no-rerank - Skip LLM reranking (use RRF scores only, much faster on CPU)");
  2371. console.log(" --line-numbers - Include line numbers in output");
  2372. console.log(" --explain - Include retrieval score traces (query --json/CLI)");
  2373. console.log(" --files | --json | --csv | --md | --xml - Output format");
  2374. console.log(" -c, --collection <name> - Filter by one or more collections");
  2375. console.log("");
  2376. console.log("Embed/query options:");
  2377. console.log(" --chunk-strategy <auto|regex> - Chunking mode (default: regex; auto uses AST for code files)");
  2378. console.log("");
  2379. console.log("Multi-get options:");
  2380. console.log(" -l <num> - Maximum lines per file");
  2381. console.log(" --max-bytes <num> - Skip files larger than N bytes (default 10240)");
  2382. console.log(" --json/--csv/--md/--xml/--files - Same formats as search");
  2383. console.log("");
  2384. console.log(`Index: ${getDbPath()}`);
  2385. }
  2386. async function showVersion() {
  2387. const scriptDir = dirname(fileURLToPath(import.meta.url));
  2388. const pkgPath = resolve(scriptDir, "..", "..", "package.json");
  2389. const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
  2390. let commit = "";
  2391. try {
  2392. commit = execSync(`git -C ${scriptDir} rev-parse --short HEAD`, { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
  2393. }
  2394. catch {
  2395. // Not a git repo or git not available
  2396. }
  2397. const versionStr = commit ? `${pkg.version} (${commit})` : pkg.version;
  2398. console.log(`qmd ${versionStr}`);
  2399. }
  2400. // Main CLI - only run if this is the main module
  2401. const __filename = fileURLToPath(import.meta.url);
  2402. const argv1 = process.argv[1];
  2403. const isMain = argv1 === __filename
  2404. || argv1?.endsWith("/qmd.ts")
  2405. || argv1?.endsWith("/qmd.js")
  2406. || (argv1 != null && realpathSync(argv1) === __filename);
  2407. if (isMain) {
  2408. const cli = parseCLI();
  2409. if (cli.values.version) {
  2410. await showVersion();
  2411. process.exit(0);
  2412. }
  2413. if (cli.values.skill) {
  2414. showSkill();
  2415. process.exit(0);
  2416. }
  2417. if (cli.values.help && cli.command === "skill") {
  2418. console.log("Usage: qmd skill <show|install> [options]");
  2419. console.log("");
  2420. console.log("Commands:");
  2421. console.log(" show Print the packaged QMD skill");
  2422. console.log(" install Install into ./.agents/skills/qmd");
  2423. console.log("");
  2424. console.log("Options:");
  2425. console.log(" --global Install into ~/.agents/skills/qmd");
  2426. console.log(" --yes Also create the .claude/skills/qmd symlink");
  2427. console.log(" -f, --force Replace existing install or symlink");
  2428. process.exit(0);
  2429. }
  2430. if (!cli.command || cli.values.help) {
  2431. showHelp();
  2432. process.exit(cli.values.help ? 0 : 1);
  2433. }
  2434. switch (cli.command) {
  2435. case "context": {
  2436. const subcommand = cli.args[0];
  2437. if (!subcommand) {
  2438. console.error("Usage: qmd context <add|list|rm>");
  2439. console.error("");
  2440. console.error("Commands:");
  2441. console.error(" qmd context add [path] \"text\" - Add context (defaults to current dir)");
  2442. console.error(" qmd context add / \"text\" - Add global context to all collections");
  2443. console.error(" qmd context list - List all contexts");
  2444. console.error(" qmd context rm <path> - Remove context");
  2445. process.exit(1);
  2446. }
  2447. switch (subcommand) {
  2448. case "add": {
  2449. if (cli.args.length < 2) {
  2450. console.error("Usage: qmd context add [path] \"text\"");
  2451. console.error("");
  2452. console.error("Examples:");
  2453. console.error(" qmd context add \"Context for current directory\"");
  2454. console.error(" qmd context add . \"Context for current directory\"");
  2455. console.error(" qmd context add /subfolder \"Context for subfolder\"");
  2456. console.error(" qmd context add / \"Global context for all collections\"");
  2457. console.error("");
  2458. console.error(" Using virtual paths:");
  2459. console.error(" qmd context add qmd://journals/ \"Context for entire journals collection\"");
  2460. console.error(" qmd context add qmd://journals/2024 \"Context for 2024 journals\"");
  2461. process.exit(1);
  2462. }
  2463. let pathArg;
  2464. let contextText;
  2465. // Check if first arg looks like a path or if it's the context text
  2466. const firstArg = cli.args[1] || '';
  2467. const secondArg = cli.args[2];
  2468. if (secondArg) {
  2469. // Two args: path + context
  2470. pathArg = firstArg;
  2471. contextText = cli.args.slice(2).join(" ");
  2472. }
  2473. else {
  2474. // One arg: context only (use current directory)
  2475. pathArg = undefined;
  2476. contextText = firstArg;
  2477. }
  2478. await contextAdd(pathArg, contextText);
  2479. break;
  2480. }
  2481. case "list": {
  2482. contextList();
  2483. break;
  2484. }
  2485. case "rm":
  2486. case "remove": {
  2487. if (cli.args.length < 2 || !cli.args[1]) {
  2488. console.error("Usage: qmd context rm <path>");
  2489. console.error("Examples:");
  2490. console.error(" qmd context rm /");
  2491. console.error(" qmd context rm qmd://journals/2024");
  2492. process.exit(1);
  2493. }
  2494. contextRemove(cli.args[1]);
  2495. break;
  2496. }
  2497. default:
  2498. console.error(`Unknown subcommand: ${subcommand}`);
  2499. console.error("Available: add, list, rm");
  2500. process.exit(1);
  2501. }
  2502. break;
  2503. }
  2504. case "get": {
  2505. if (!cli.args[0]) {
  2506. console.error("Usage: qmd get <filepath>[:line] [--from <line>] [-l <lines>] [--line-numbers]");
  2507. process.exit(1);
  2508. }
  2509. const fromLine = cli.values.from ? parseInt(cli.values.from, 10) : undefined;
  2510. const maxLines = cli.values.l ? parseInt(cli.values.l, 10) : undefined;
  2511. getDocument(cli.args[0], fromLine, maxLines, cli.opts.lineNumbers);
  2512. break;
  2513. }
  2514. case "multi-get": {
  2515. if (!cli.args[0]) {
  2516. console.error("Usage: qmd multi-get <pattern> [-l <lines>] [--max-bytes <bytes>] [--json|--csv|--md|--xml|--files]");
  2517. console.error(" pattern: glob (e.g., 'journals/2025-05*.md') or comma-separated list");
  2518. process.exit(1);
  2519. }
  2520. const maxLinesMulti = cli.values.l ? parseInt(cli.values.l, 10) : undefined;
  2521. const maxBytes = cli.values["max-bytes"] ? parseInt(cli.values["max-bytes"], 10) : DEFAULT_MULTI_GET_MAX_BYTES;
  2522. multiGet(cli.args[0], maxLinesMulti, maxBytes, cli.opts.format);
  2523. break;
  2524. }
  2525. case "ls": {
  2526. listFiles(cli.args[0]);
  2527. break;
  2528. }
  2529. case "collection": {
  2530. const subcommand = cli.args[0];
  2531. switch (subcommand) {
  2532. case "list": {
  2533. collectionList();
  2534. break;
  2535. }
  2536. case "add": {
  2537. const pwd = cli.args[1] || getPwd();
  2538. const resolvedPwd = pwd === '.' ? getPwd() : getRealPath(resolve(pwd));
  2539. const globPattern = cli.values.mask || DEFAULT_GLOB;
  2540. const name = cli.values.name;
  2541. await collectionAdd(resolvedPwd, globPattern, name);
  2542. break;
  2543. }
  2544. case "remove":
  2545. case "rm": {
  2546. if (!cli.args[1]) {
  2547. console.error("Usage: qmd collection remove <name>");
  2548. console.error(" Use 'qmd collection list' to see available collections");
  2549. process.exit(1);
  2550. }
  2551. collectionRemove(cli.args[1]);
  2552. break;
  2553. }
  2554. case "rename":
  2555. case "mv": {
  2556. if (!cli.args[1] || !cli.args[2]) {
  2557. console.error("Usage: qmd collection rename <old-name> <new-name>");
  2558. console.error(" Use 'qmd collection list' to see available collections");
  2559. process.exit(1);
  2560. }
  2561. collectionRename(cli.args[1], cli.args[2]);
  2562. break;
  2563. }
  2564. case "set-update":
  2565. case "update-cmd": {
  2566. const name = cli.args[1];
  2567. const cmd = cli.args.slice(2).join(' ') || null;
  2568. if (!name) {
  2569. console.error("Usage: qmd collection update-cmd <name> [command]");
  2570. console.error(" Set the command to run before indexing (e.g., 'git pull')");
  2571. console.error(" Omit command to clear it");
  2572. process.exit(1);
  2573. }
  2574. const { updateCollectionSettings, getCollection } = await import("../collections.js");
  2575. const col = getCollection(name);
  2576. if (!col) {
  2577. console.error(`Collection not found: ${name}`);
  2578. process.exit(1);
  2579. }
  2580. updateCollectionSettings(name, { update: cmd });
  2581. if (cmd) {
  2582. console.log(`✓ Set update command for '${name}': ${cmd}`);
  2583. }
  2584. else {
  2585. console.log(`✓ Cleared update command for '${name}'`);
  2586. }
  2587. break;
  2588. }
  2589. case "include":
  2590. case "exclude": {
  2591. const name = cli.args[1];
  2592. if (!name) {
  2593. console.error(`Usage: qmd collection ${subcommand} <name>`);
  2594. console.error(` ${subcommand === 'include' ? 'Include' : 'Exclude'} collection in default queries`);
  2595. process.exit(1);
  2596. }
  2597. const { updateCollectionSettings, getCollection } = await import("../collections.js");
  2598. const col = getCollection(name);
  2599. if (!col) {
  2600. console.error(`Collection not found: ${name}`);
  2601. process.exit(1);
  2602. }
  2603. const include = subcommand === 'include';
  2604. updateCollectionSettings(name, { includeByDefault: include });
  2605. console.log(`✓ Collection '${name}' ${include ? 'included in' : 'excluded from'} default queries`);
  2606. break;
  2607. }
  2608. case "show":
  2609. case "info": {
  2610. const name = cli.args[1];
  2611. if (!name) {
  2612. console.error("Usage: qmd collection show <name>");
  2613. process.exit(1);
  2614. }
  2615. const { getCollection } = await import("../collections.js");
  2616. const col = getCollection(name);
  2617. if (!col) {
  2618. console.error(`Collection not found: ${name}`);
  2619. process.exit(1);
  2620. }
  2621. console.log(`Collection: ${name}`);
  2622. console.log(` Path: ${col.path}`);
  2623. console.log(` Pattern: ${col.pattern}`);
  2624. console.log(` Include: ${col.includeByDefault !== false ? 'yes (default)' : 'no'}`);
  2625. if (col.update) {
  2626. console.log(` Update: ${col.update}`);
  2627. }
  2628. if (col.context) {
  2629. const ctxCount = Object.keys(col.context).length;
  2630. console.log(` Contexts: ${ctxCount}`);
  2631. }
  2632. break;
  2633. }
  2634. case "help":
  2635. case undefined: {
  2636. console.log("Usage: qmd collection <command> [options]");
  2637. console.log("");
  2638. console.log("Commands:");
  2639. console.log(" list List all collections");
  2640. console.log(" add <path> [--name NAME] Add a collection");
  2641. console.log(" remove <name> Remove a collection");
  2642. console.log(" rename <old> <new> Rename a collection");
  2643. console.log(" show <name> Show collection details");
  2644. console.log(" update-cmd <name> [cmd] Set pre-update command (e.g., 'git pull')");
  2645. console.log(" include <name> Include in default queries");
  2646. console.log(" exclude <name> Exclude from default queries");
  2647. console.log("");
  2648. console.log("Examples:");
  2649. console.log(" qmd collection add ~/notes --name notes");
  2650. console.log(" qmd collection update-cmd brain 'git pull'");
  2651. console.log(" qmd collection exclude archive");
  2652. process.exit(0);
  2653. }
  2654. default:
  2655. console.error(`Unknown subcommand: ${subcommand}`);
  2656. console.error("Run 'qmd collection help' for usage");
  2657. process.exit(1);
  2658. }
  2659. break;
  2660. }
  2661. case "status":
  2662. await showStatus();
  2663. break;
  2664. case "update":
  2665. await updateCollections();
  2666. break;
  2667. case "embed":
  2668. try {
  2669. const maxDocsPerBatch = parseEmbedBatchOption("maxDocsPerBatch", cli.values["max-docs-per-batch"]);
  2670. const maxBatchMb = parseEmbedBatchOption("maxBatchBytes", cli.values["max-batch-mb"]);
  2671. const embedChunkStrategy = parseChunkStrategy(cli.values["chunk-strategy"]);
  2672. await vectorIndex(DEFAULT_EMBED_MODEL_URI, !!cli.values.force, {
  2673. maxDocsPerBatch,
  2674. maxBatchBytes: maxBatchMb === undefined ? undefined : maxBatchMb * 1024 * 1024,
  2675. chunkStrategy: embedChunkStrategy,
  2676. });
  2677. }
  2678. catch (error) {
  2679. console.error(error instanceof Error ? error.message : String(error));
  2680. process.exit(1);
  2681. }
  2682. break;
  2683. case "pull": {
  2684. const refresh = cli.values.refresh === undefined ? false : Boolean(cli.values.refresh);
  2685. const models = [
  2686. DEFAULT_EMBED_MODEL_URI,
  2687. DEFAULT_GENERATE_MODEL_URI,
  2688. DEFAULT_RERANK_MODEL_URI,
  2689. ];
  2690. console.log(`${c.bold}Pulling models${c.reset}`);
  2691. const results = await pullModels(models, {
  2692. refresh,
  2693. cacheDir: DEFAULT_MODEL_CACHE_DIR,
  2694. });
  2695. for (const result of results) {
  2696. const size = formatBytes(result.sizeBytes);
  2697. const note = result.refreshed ? "refreshed" : "cached/checked";
  2698. console.log(`- ${result.model} -> ${result.path} (${size}, ${note})`);
  2699. }
  2700. break;
  2701. }
  2702. case "search":
  2703. if (!cli.query) {
  2704. console.error("Usage: qmd search [options] <query>");
  2705. process.exit(1);
  2706. }
  2707. search(cli.query, cli.opts);
  2708. break;
  2709. case "vsearch":
  2710. case "vector-search": // undocumented alias
  2711. if (!cli.query) {
  2712. console.error("Usage: qmd vsearch [options] <query>");
  2713. process.exit(1);
  2714. }
  2715. // Default min-score for vector search is 0.3
  2716. if (!cli.values["min-score"]) {
  2717. cli.opts.minScore = 0.3;
  2718. }
  2719. await vectorSearch(cli.query, cli.opts);
  2720. break;
  2721. case "query":
  2722. case "deep-search": // undocumented alias
  2723. if (!cli.query) {
  2724. console.error("Usage: qmd query [options] <query>");
  2725. process.exit(1);
  2726. }
  2727. await querySearch(cli.query, cli.opts);
  2728. break;
  2729. case "bench": {
  2730. const fixturePath = cli.args[0];
  2731. if (!fixturePath) {
  2732. console.error("Usage: qmd bench <fixture.json> [--json] [-c collection]");
  2733. console.error("");
  2734. console.error("Run search quality benchmarks against a fixture file.");
  2735. console.error("See src/bench/fixtures/example.json for the fixture format.");
  2736. process.exit(1);
  2737. }
  2738. const { runBenchmark } = await import("../bench/bench.js");
  2739. const benchCollection = cli.opts.collection;
  2740. await runBenchmark(fixturePath, {
  2741. json: !!cli.opts.json,
  2742. collection: Array.isArray(benchCollection) ? benchCollection[0] : benchCollection,
  2743. });
  2744. break;
  2745. }
  2746. case "mcp": {
  2747. const sub = cli.args[0]; // stop | status | undefined
  2748. // Cache dir for PID/log files — same dir as the index
  2749. const cacheDir = process.env.XDG_CACHE_HOME
  2750. ? resolve(process.env.XDG_CACHE_HOME, "qmd")
  2751. : resolve(homedir(), ".cache", "qmd");
  2752. const pidPath = resolve(cacheDir, "mcp.pid");
  2753. // Subcommands take priority over flags
  2754. if (sub === "stop") {
  2755. if (!existsSync(pidPath)) {
  2756. console.log("Not running (no PID file).");
  2757. process.exit(0);
  2758. }
  2759. const pid = parseInt(readFileSync(pidPath, "utf-8").trim());
  2760. try {
  2761. process.kill(pid, 0); // alive?
  2762. process.kill(pid, "SIGTERM");
  2763. unlinkSync(pidPath);
  2764. console.log(`Stopped QMD MCP server (PID ${pid}).`);
  2765. }
  2766. catch {
  2767. unlinkSync(pidPath);
  2768. console.log("Cleaned up stale PID file (server was not running).");
  2769. }
  2770. process.exit(0);
  2771. }
  2772. if (cli.values.http) {
  2773. const port = Number(cli.values.port) || 8181;
  2774. if (cli.values.daemon) {
  2775. // Guard: check if already running
  2776. if (existsSync(pidPath)) {
  2777. const existingPid = parseInt(readFileSync(pidPath, "utf-8").trim());
  2778. try {
  2779. process.kill(existingPid, 0); // alive?
  2780. console.error(`Already running (PID ${existingPid}). Run 'qmd mcp stop' first.`);
  2781. process.exit(1);
  2782. }
  2783. catch {
  2784. // Stale PID file — continue
  2785. }
  2786. }
  2787. mkdirSync(cacheDir, { recursive: true });
  2788. const logPath = resolve(cacheDir, "mcp.log");
  2789. const logFd = openSync(logPath, "w"); // truncate — fresh log per daemon run
  2790. const selfPath = fileURLToPath(import.meta.url);
  2791. const spawnArgs = selfPath.endsWith(".ts")
  2792. ? ["--import", pathJoin(dirname(selfPath), "..", "..", "node_modules", "tsx", "dist", "esm", "index.mjs"), selfPath, "mcp", "--http", "--port", String(port)]
  2793. : [selfPath, "mcp", "--http", "--port", String(port)];
  2794. const child = nodeSpawn(process.execPath, spawnArgs, {
  2795. stdio: ["ignore", logFd, logFd],
  2796. detached: true,
  2797. });
  2798. child.unref();
  2799. closeSync(logFd); // parent's copy; child inherited the fd
  2800. writeFileSync(pidPath, String(child.pid));
  2801. console.log(`Started on http://localhost:${port}/mcp (PID ${child.pid})`);
  2802. console.log(`Logs: ${logPath}`);
  2803. process.exit(0);
  2804. }
  2805. // Foreground HTTP mode — remove top-level cursor handlers so the
  2806. // async cleanup handlers in startMcpHttpServer actually run.
  2807. process.removeAllListeners("SIGTERM");
  2808. process.removeAllListeners("SIGINT");
  2809. const { startMcpHttpServer } = await import("../mcp/server.js");
  2810. try {
  2811. await startMcpHttpServer(port);
  2812. }
  2813. catch (e) {
  2814. if (e?.code === "EADDRINUSE") {
  2815. console.error(`Port ${port} already in use. Try a different port with --port.`);
  2816. process.exit(1);
  2817. }
  2818. throw e;
  2819. }
  2820. }
  2821. else {
  2822. // Default: stdio transport
  2823. const { startMcpServer } = await import("../mcp/server.js");
  2824. await startMcpServer();
  2825. }
  2826. break;
  2827. }
  2828. case "skill": {
  2829. const subcommand = cli.args[0];
  2830. switch (subcommand) {
  2831. case "show": {
  2832. showSkill();
  2833. break;
  2834. }
  2835. case "install": {
  2836. try {
  2837. await installSkill(Boolean(cli.values.global), Boolean(cli.values.force), Boolean(cli.values.yes));
  2838. }
  2839. catch (error) {
  2840. console.error(error instanceof Error ? error.message : String(error));
  2841. process.exit(1);
  2842. }
  2843. break;
  2844. }
  2845. case "help":
  2846. case undefined: {
  2847. console.log("Usage: qmd skill <show|install> [options]");
  2848. console.log("");
  2849. console.log("Commands:");
  2850. console.log(" show Print the packaged QMD skill");
  2851. console.log(" install Install into ./.agents/skills/qmd");
  2852. console.log("");
  2853. console.log("Options:");
  2854. console.log(" --global Install into ~/.agents/skills/qmd");
  2855. console.log(" --yes Also create the .claude/skills/qmd symlink");
  2856. console.log(" -f, --force Replace existing install or symlink");
  2857. process.exit(0);
  2858. }
  2859. default:
  2860. console.error(`Unknown subcommand: ${subcommand}`);
  2861. console.error("Run 'qmd skill help' for usage");
  2862. process.exit(1);
  2863. }
  2864. break;
  2865. }
  2866. case "cleanup": {
  2867. const db = getDb();
  2868. // 1. Clear llm_cache
  2869. const cacheCount = deleteLLMCache(db);
  2870. console.log(`${c.green}✓${c.reset} Cleared ${cacheCount} cached API responses`);
  2871. // 2. Remove orphaned vectors
  2872. const orphanedVecs = cleanupOrphanedVectors(db);
  2873. if (orphanedVecs > 0) {
  2874. console.log(`${c.green}✓${c.reset} Removed ${orphanedVecs} orphaned embedding chunks`);
  2875. }
  2876. else {
  2877. console.log(`${c.dim}No orphaned embeddings to remove${c.reset}`);
  2878. }
  2879. // 3. Remove inactive documents
  2880. const inactiveDocs = deleteInactiveDocuments(db);
  2881. if (inactiveDocs > 0) {
  2882. console.log(`${c.green}✓${c.reset} Removed ${inactiveDocs} inactive document records`);
  2883. }
  2884. // 4. Vacuum to reclaim space
  2885. vacuumDatabase(db);
  2886. console.log(`${c.green}✓${c.reset} Database vacuumed`);
  2887. closeDb();
  2888. break;
  2889. }
  2890. default:
  2891. console.error(`Unknown command: ${cli.command}`);
  2892. console.error("Run 'qmd --help' for usage.");
  2893. process.exit(1);
  2894. }
  2895. if (cli.command !== "mcp") {
  2896. await disposeDefaultLlamaCpp();
  2897. process.exit(0);
  2898. }
  2899. } // end if (main module)