qmd.ts 88 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564
  1. #!/usr/bin/env bun
  2. import { Database } from "bun:sqlite";
  3. import { Glob, $ } from "bun";
  4. import { parseArgs } from "util";
  5. import { readFileSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync } from "fs";
  6. import {
  7. getPwd,
  8. getRealPath,
  9. homedir,
  10. resolve,
  11. enableProductionMode,
  12. searchFTS,
  13. extractSnippet,
  14. getContextForFile,
  15. getContextForPath,
  16. listCollections,
  17. removeCollection,
  18. renameCollection,
  19. findSimilarFiles,
  20. findDocumentByDocid,
  21. isDocid,
  22. matchFilesByGlob,
  23. getHashesNeedingEmbedding,
  24. getHashesForEmbedding,
  25. clearAllEmbeddings,
  26. insertEmbedding,
  27. getStatus,
  28. hashContent,
  29. extractTitle,
  30. formatDocForEmbedding,
  31. chunkDocumentByTokens,
  32. clearCache,
  33. getCacheKey,
  34. getCachedResult,
  35. setCachedResult,
  36. getIndexHealth,
  37. parseVirtualPath,
  38. buildVirtualPath,
  39. isVirtualPath,
  40. resolveVirtualPath,
  41. toVirtualPath,
  42. insertContent,
  43. insertDocument,
  44. findActiveDocument,
  45. updateDocumentTitle,
  46. updateDocument,
  47. deactivateDocument,
  48. getActiveDocumentPaths,
  49. cleanupOrphanedContent,
  50. deleteLLMCache,
  51. deleteInactiveDocuments,
  52. cleanupOrphanedVectors,
  53. vacuumDatabase,
  54. getCollectionsWithoutContext,
  55. getTopLevelPathsWithoutContext,
  56. handelize,
  57. hybridQuery,
  58. vectorSearchQuery,
  59. addLineNumbers,
  60. type ExpandedQuery,
  61. DEFAULT_EMBED_MODEL,
  62. DEFAULT_RERANK_MODEL,
  63. DEFAULT_GLOB,
  64. DEFAULT_MULTI_GET_MAX_BYTES,
  65. createStore,
  66. getDefaultDbPath,
  67. } from "./store.js";
  68. import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "./llm.js";
  69. import {
  70. formatSearchResults,
  71. formatDocuments,
  72. escapeXml,
  73. escapeCSV,
  74. type OutputFormat,
  75. } from "./formatter.js";
  76. import {
  77. getCollection as getCollectionFromYaml,
  78. listCollections as yamlListCollections,
  79. addContext as yamlAddContext,
  80. removeContext as yamlRemoveContext,
  81. setGlobalContext,
  82. listAllContexts,
  83. setConfigIndexName,
  84. } from "./collections.js";
  85. // Enable production mode - allows using default database path
  86. // Tests must set INDEX_PATH or use createStore() with explicit path
  87. enableProductionMode();
  88. // =============================================================================
  89. // Store/DB lifecycle (no legacy singletons in store.ts)
  90. // =============================================================================
  91. let store: ReturnType<typeof createStore> | null = null;
  92. let storeDbPathOverride: string | undefined;
  93. function getStore(): ReturnType<typeof createStore> {
  94. if (!store) {
  95. store = createStore(storeDbPathOverride);
  96. }
  97. return store;
  98. }
  99. function getDb(): Database {
  100. return getStore().db;
  101. }
  102. function closeDb(): void {
  103. if (store) {
  104. store.close();
  105. store = null;
  106. }
  107. }
  108. function getDbPath(): string {
  109. return store?.dbPath ?? storeDbPathOverride ?? getDefaultDbPath();
  110. }
  111. function setIndexName(name: string | null): void {
  112. storeDbPathOverride = name ? getDefaultDbPath(name) : undefined;
  113. // Reset open handle so next use opens the new index
  114. closeDb();
  115. }
  116. function ensureVecTable(_db: Database, dimensions: number): void {
  117. // Store owns the DB; ignore `_db` and ensure vec table on the active store
  118. getStore().ensureVecTable(dimensions);
  119. }
  120. // Terminal colors (respects NO_COLOR env)
  121. const useColor = !process.env.NO_COLOR && process.stdout.isTTY;
  122. const c = {
  123. reset: useColor ? "\x1b[0m" : "",
  124. dim: useColor ? "\x1b[2m" : "",
  125. bold: useColor ? "\x1b[1m" : "",
  126. cyan: useColor ? "\x1b[36m" : "",
  127. yellow: useColor ? "\x1b[33m" : "",
  128. green: useColor ? "\x1b[32m" : "",
  129. magenta: useColor ? "\x1b[35m" : "",
  130. blue: useColor ? "\x1b[34m" : "",
  131. };
  132. // Terminal cursor control
  133. const cursor = {
  134. hide() { process.stderr.write('\x1b[?25l'); },
  135. show() { process.stderr.write('\x1b[?25h'); },
  136. };
  137. // Ensure cursor is restored on exit
  138. process.on('SIGINT', () => { cursor.show(); process.exit(130); });
  139. process.on('SIGTERM', () => { cursor.show(); process.exit(143); });
  140. // Terminal progress bar using OSC 9;4 escape sequence
  141. const progress = {
  142. set(percent: number) {
  143. process.stderr.write(`\x1b]9;4;1;${Math.round(percent)}\x07`);
  144. },
  145. clear() {
  146. process.stderr.write(`\x1b]9;4;0\x07`);
  147. },
  148. indeterminate() {
  149. process.stderr.write(`\x1b]9;4;3\x07`);
  150. },
  151. error() {
  152. process.stderr.write(`\x1b]9;4;2\x07`);
  153. },
  154. };
  155. // Format seconds into human-readable ETA
  156. function formatETA(seconds: number): string {
  157. if (seconds < 60) return `${Math.round(seconds)}s`;
  158. if (seconds < 3600) return `${Math.floor(seconds / 60)}m ${Math.round(seconds % 60)}s`;
  159. return `${Math.floor(seconds / 3600)}h ${Math.floor((seconds % 3600) / 60)}m`;
  160. }
  161. // Check index health and print warnings/tips
  162. function checkIndexHealth(db: Database): void {
  163. const { needsEmbedding, totalDocs, daysStale } = getIndexHealth(db);
  164. // Warn if many docs need embedding
  165. if (needsEmbedding > 0) {
  166. const pct = Math.round((needsEmbedding / totalDocs) * 100);
  167. if (pct >= 10) {
  168. process.stderr.write(`${c.yellow}Warning: ${needsEmbedding} documents (${pct}%) need embeddings. Run 'qmd embed' for better results.${c.reset}\n`);
  169. } else {
  170. process.stderr.write(`${c.dim}Tip: ${needsEmbedding} documents need embeddings. Run 'qmd embed' to index them.${c.reset}\n`);
  171. }
  172. }
  173. // Check if most recent document update is older than 2 weeks
  174. if (daysStale !== null && daysStale >= 14) {
  175. process.stderr.write(`${c.dim}Tip: Index last updated ${daysStale} days ago. Run 'qmd update' to refresh.${c.reset}\n`);
  176. }
  177. }
  178. // Compute unique display path for a document
  179. // Always include at least parent folder + filename, add more parent dirs until unique
  180. function computeDisplayPath(
  181. filepath: string,
  182. collectionPath: string,
  183. existingPaths: Set<string>
  184. ): string {
  185. // Get path relative to collection (include collection dir name)
  186. const collectionDir = collectionPath.replace(/\/$/, '');
  187. const collectionName = collectionDir.split('/').pop() || '';
  188. let relativePath: string;
  189. if (filepath.startsWith(collectionDir + '/')) {
  190. // filepath is under collection: use collection name + relative path
  191. relativePath = collectionName + filepath.slice(collectionDir.length);
  192. } else {
  193. // Fallback: just use the filepath
  194. relativePath = filepath;
  195. }
  196. const parts = relativePath.split('/').filter(p => p.length > 0);
  197. // Always include at least parent folder + filename (minimum 2 parts if available)
  198. // Then add more parent dirs until unique
  199. const minParts = Math.min(2, parts.length);
  200. for (let i = parts.length - minParts; i >= 0; i--) {
  201. const candidate = parts.slice(i).join('/');
  202. if (!existingPaths.has(candidate)) {
  203. return candidate;
  204. }
  205. }
  206. // Absolute fallback: use full path (should be unique)
  207. return filepath;
  208. }
  209. function formatTimeAgo(date: Date): string {
  210. const seconds = Math.floor((Date.now() - date.getTime()) / 1000);
  211. if (seconds < 60) return `${seconds}s ago`;
  212. const minutes = Math.floor(seconds / 60);
  213. if (minutes < 60) return `${minutes}m ago`;
  214. const hours = Math.floor(minutes / 60);
  215. if (hours < 24) return `${hours}h ago`;
  216. const days = Math.floor(hours / 24);
  217. return `${days}d ago`;
  218. }
  219. function formatBytes(bytes: number): string {
  220. if (bytes < 1024) return `${bytes} B`;
  221. if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
  222. if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
  223. return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
  224. }
  225. async function showStatus(): Promise<void> {
  226. const dbPath = getDbPath();
  227. const db = getDb();
  228. // Collections are defined in YAML; no duplicate cleanup needed.
  229. // Collections are defined in YAML; no duplicate cleanup needed.
  230. // Index size
  231. let indexSize = 0;
  232. try {
  233. const stat = statSync(dbPath).size;
  234. indexSize = stat;
  235. } catch { }
  236. // Collections info (from YAML + database stats)
  237. const collections = listCollections(db);
  238. // Overall stats
  239. const totalDocs = db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get() as { count: number };
  240. const vectorCount = db.prepare(`SELECT COUNT(*) as count FROM content_vectors`).get() as { count: number };
  241. const needsEmbedding = getHashesNeedingEmbedding(db);
  242. // Most recent update across all collections
  243. const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get() as { latest: string | null };
  244. console.log(`${c.bold}QMD Status${c.reset}\n`);
  245. console.log(`Index: ${dbPath}`);
  246. console.log(`Size: ${formatBytes(indexSize)}`);
  247. // MCP daemon status (check PID file liveness)
  248. const mcpCacheDir = Bun.env.XDG_CACHE_HOME
  249. ? resolve(Bun.env.XDG_CACHE_HOME, "qmd")
  250. : resolve(homedir(), ".cache", "qmd");
  251. const mcpPidPath = resolve(mcpCacheDir, "mcp.pid");
  252. if (existsSync(mcpPidPath)) {
  253. const mcpPid = parseInt(readFileSync(mcpPidPath, "utf-8").trim());
  254. try {
  255. process.kill(mcpPid, 0);
  256. console.log(`MCP: ${c.green}running${c.reset} (PID ${mcpPid})`);
  257. } catch {
  258. unlinkSync(mcpPidPath);
  259. // Stale PID file cleaned up silently
  260. }
  261. }
  262. console.log("");
  263. console.log(`${c.bold}Documents${c.reset}`);
  264. console.log(` Total: ${totalDocs.count} files indexed`);
  265. console.log(` Vectors: ${vectorCount.count} embedded`);
  266. if (needsEmbedding > 0) {
  267. console.log(` ${c.yellow}Pending: ${needsEmbedding} need embedding${c.reset} (run 'qmd embed')`);
  268. }
  269. if (mostRecent.latest) {
  270. const lastUpdate = new Date(mostRecent.latest);
  271. console.log(` Updated: ${formatTimeAgo(lastUpdate)}`);
  272. }
  273. // Get all contexts grouped by collection (from YAML)
  274. const allContexts = listAllContexts();
  275. const contextsByCollection = new Map<string, { path_prefix: string; context: string }[]>();
  276. for (const ctx of allContexts) {
  277. // Group contexts by collection name
  278. if (!contextsByCollection.has(ctx.collection)) {
  279. contextsByCollection.set(ctx.collection, []);
  280. }
  281. contextsByCollection.get(ctx.collection)!.push({
  282. path_prefix: ctx.path,
  283. context: ctx.context
  284. });
  285. }
  286. if (collections.length > 0) {
  287. console.log(`\n${c.bold}Collections${c.reset}`);
  288. for (const col of collections) {
  289. const lastMod = col.last_modified ? formatTimeAgo(new Date(col.last_modified)) : "never";
  290. const contexts = contextsByCollection.get(col.name) || [];
  291. console.log(` ${c.cyan}${col.name}${c.reset} ${c.dim}(qmd://${col.name}/)${c.reset}`);
  292. console.log(` ${c.dim}Pattern:${c.reset} ${col.glob_pattern}`);
  293. console.log(` ${c.dim}Files:${c.reset} ${col.active_count} (updated ${lastMod})`);
  294. if (contexts.length > 0) {
  295. console.log(` ${c.dim}Contexts:${c.reset} ${contexts.length}`);
  296. for (const ctx of contexts) {
  297. // Handle both empty string and '/' as root context
  298. const pathDisplay = (ctx.path_prefix === '' || ctx.path_prefix === '/') ? '/' : `/${ctx.path_prefix}`;
  299. const contextPreview = ctx.context.length > 60
  300. ? ctx.context.substring(0, 57) + '...'
  301. : ctx.context;
  302. console.log(` ${c.dim}${pathDisplay}:${c.reset} ${contextPreview}`);
  303. }
  304. }
  305. }
  306. // Show examples of virtual paths
  307. console.log(`\n${c.bold}Examples${c.reset}`);
  308. console.log(` ${c.dim}# List files in a collection${c.reset}`);
  309. if (collections.length > 0 && collections[0]) {
  310. console.log(` qmd ls ${collections[0].name}`);
  311. }
  312. console.log(` ${c.dim}# Get a document${c.reset}`);
  313. if (collections.length > 0 && collections[0]) {
  314. console.log(` qmd get qmd://${collections[0].name}/path/to/file.md`);
  315. }
  316. console.log(` ${c.dim}# Search within a collection${c.reset}`);
  317. if (collections.length > 0 && collections[0]) {
  318. console.log(` qmd search "query" -c ${collections[0].name}`);
  319. }
  320. } else {
  321. console.log(`\n${c.dim}No collections. Run 'qmd collection add .' to index markdown files.${c.reset}`);
  322. }
  323. // Device / GPU info
  324. try {
  325. const llm = getDefaultLlamaCpp();
  326. const device = await llm.getDeviceInfo();
  327. console.log(`\n${c.bold}Device${c.reset}`);
  328. if (device.gpu) {
  329. console.log(` GPU: ${c.green}${device.gpu}${c.reset} (offloading: ${device.gpuOffloading ? 'yes' : 'no'})`);
  330. if (device.gpuDevices.length > 0) {
  331. // Deduplicate and count GPUs
  332. const counts = new Map<string, number>();
  333. for (const name of device.gpuDevices) {
  334. counts.set(name, (counts.get(name) || 0) + 1);
  335. }
  336. const deviceStr = Array.from(counts.entries())
  337. .map(([name, count]) => count > 1 ? `${count}× ${name}` : name)
  338. .join(', ');
  339. console.log(` Devices: ${deviceStr}`);
  340. }
  341. if (device.vram) {
  342. console.log(` VRAM: ${formatBytes(device.vram.free)} free / ${formatBytes(device.vram.total)} total`);
  343. }
  344. } else {
  345. console.log(` GPU: ${c.yellow}none${c.reset} (running on CPU — models will be slow)`);
  346. console.log(` ${c.dim}Tip: Install CUDA, Vulkan, or Metal support for GPU acceleration.${c.reset}`);
  347. }
  348. console.log(` CPU: ${device.cpuCores} math cores`);
  349. } catch {
  350. // Don't fail status if LLM init fails
  351. }
  352. closeDb();
  353. }
  354. async function updateCollections(): Promise<void> {
  355. const db = getDb();
  356. // Collections are defined in YAML; no duplicate cleanup needed.
  357. // Clear Ollama cache on update
  358. clearCache(db);
  359. const collections = listCollections(db);
  360. if (collections.length === 0) {
  361. console.log(`${c.dim}No collections found. Run 'qmd collection add .' to index markdown files.${c.reset}`);
  362. closeDb();
  363. return;
  364. }
  365. // Don't close db here - indexFiles will reuse it and close at the end
  366. console.log(`${c.bold}Updating ${collections.length} collection(s)...${c.reset}\n`);
  367. for (let i = 0; i < collections.length; i++) {
  368. const col = collections[i];
  369. if (!col) continue;
  370. console.log(`${c.cyan}[${i + 1}/${collections.length}]${c.reset} ${c.bold}${col.name}${c.reset} ${c.dim}(${col.glob_pattern})${c.reset}`);
  371. // Execute custom update command if specified in YAML
  372. const yamlCol = getCollectionFromYaml(col.name);
  373. if (yamlCol?.update) {
  374. console.log(`${c.dim} Running update command: ${yamlCol.update}${c.reset}`);
  375. try {
  376. const proc = Bun.spawn(["/usr/bin/env", "bash", "-c", yamlCol.update], {
  377. cwd: col.pwd,
  378. stdout: "pipe",
  379. stderr: "pipe",
  380. });
  381. const output = await new Response(proc.stdout).text();
  382. const errorOutput = await new Response(proc.stderr).text();
  383. const exitCode = await proc.exited;
  384. if (output.trim()) {
  385. console.log(output.trim().split('\n').map(l => ` ${l}`).join('\n'));
  386. }
  387. if (errorOutput.trim()) {
  388. console.log(errorOutput.trim().split('\n').map(l => ` ${l}`).join('\n'));
  389. }
  390. if (exitCode !== 0) {
  391. console.log(`${c.yellow}✗ Update command failed with exit code ${exitCode}${c.reset}`);
  392. process.exit(exitCode);
  393. }
  394. } catch (err) {
  395. console.log(`${c.yellow}✗ Update command failed: ${err}${c.reset}`);
  396. process.exit(1);
  397. }
  398. }
  399. await indexFiles(col.pwd, col.glob_pattern, col.name, true);
  400. console.log("");
  401. }
  402. // Check if any documents need embedding (show once at end)
  403. const finalDb = getDb();
  404. const needsEmbedding = getHashesNeedingEmbedding(finalDb);
  405. closeDb();
  406. console.log(`${c.green}✓ All collections updated.${c.reset}`);
  407. if (needsEmbedding > 0) {
  408. console.log(`\nRun 'qmd embed' to update embeddings (${needsEmbedding} unique hashes need vectors)`);
  409. }
  410. }
  411. /**
  412. * Detect which collection (if any) contains the given filesystem path.
  413. * Returns { collectionId, collectionName, relativePath } or null if not in any collection.
  414. */
  415. function detectCollectionFromPath(db: Database, fsPath: string): { collectionName: string; relativePath: string } | null {
  416. const realPath = getRealPath(fsPath);
  417. // Find collections that this path is under from YAML
  418. const allCollections = yamlListCollections();
  419. // Find longest matching path
  420. let bestMatch: { name: string; path: string } | null = null;
  421. for (const coll of allCollections) {
  422. if (realPath.startsWith(coll.path + '/') || realPath === coll.path) {
  423. if (!bestMatch || coll.path.length > bestMatch.path.length) {
  424. bestMatch = { name: coll.name, path: coll.path };
  425. }
  426. }
  427. }
  428. if (!bestMatch) return null;
  429. // Calculate relative path
  430. let relativePath = realPath;
  431. if (relativePath.startsWith(bestMatch.path + '/')) {
  432. relativePath = relativePath.slice(bestMatch.path.length + 1);
  433. } else if (relativePath === bestMatch.path) {
  434. relativePath = '';
  435. }
  436. return {
  437. collectionName: bestMatch.name,
  438. relativePath
  439. };
  440. }
  441. async function contextAdd(pathArg: string | undefined, contextText: string): Promise<void> {
  442. const db = getDb();
  443. // Handle "/" as global context (applies to all collections)
  444. if (pathArg === '/') {
  445. setGlobalContext(contextText);
  446. console.log(`${c.green}✓${c.reset} Set global context`);
  447. console.log(`${c.dim}Context: ${contextText}${c.reset}`);
  448. closeDb();
  449. return;
  450. }
  451. // Resolve path - defaults to current directory if not provided
  452. let fsPath = pathArg || '.';
  453. if (fsPath === '.' || fsPath === './') {
  454. fsPath = getPwd();
  455. } else if (fsPath.startsWith('~/')) {
  456. fsPath = homedir() + fsPath.slice(1);
  457. } else if (!fsPath.startsWith('/') && !fsPath.startsWith('qmd://')) {
  458. fsPath = resolve(getPwd(), fsPath);
  459. }
  460. // Handle virtual paths (qmd://collection/path)
  461. if (isVirtualPath(fsPath)) {
  462. const parsed = parseVirtualPath(fsPath);
  463. if (!parsed) {
  464. console.error(`${c.yellow}Invalid virtual path: ${fsPath}${c.reset}`);
  465. process.exit(1);
  466. }
  467. const coll = getCollectionFromYaml(parsed.collectionName);
  468. if (!coll) {
  469. console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`);
  470. process.exit(1);
  471. }
  472. yamlAddContext(parsed.collectionName, parsed.path, contextText);
  473. const displayPath = parsed.path
  474. ? `qmd://${parsed.collectionName}/${parsed.path}`
  475. : `qmd://${parsed.collectionName}/ (collection root)`;
  476. console.log(`${c.green}✓${c.reset} Added context for: ${displayPath}`);
  477. console.log(`${c.dim}Context: ${contextText}${c.reset}`);
  478. closeDb();
  479. return;
  480. }
  481. // Detect collection from filesystem path
  482. const detected = detectCollectionFromPath(db, fsPath);
  483. if (!detected) {
  484. console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`);
  485. console.error(`${c.dim}Run 'qmd status' to see indexed collections${c.reset}`);
  486. process.exit(1);
  487. }
  488. yamlAddContext(detected.collectionName, detected.relativePath, contextText);
  489. const displayPath = detected.relativePath ? `qmd://${detected.collectionName}/${detected.relativePath}` : `qmd://${detected.collectionName}/`;
  490. console.log(`${c.green}✓${c.reset} Added context for: ${displayPath}`);
  491. console.log(`${c.dim}Context: ${contextText}${c.reset}`);
  492. closeDb();
  493. }
  494. function contextList(): void {
  495. const db = getDb();
  496. const allContexts = listAllContexts();
  497. if (allContexts.length === 0) {
  498. console.log(`${c.dim}No contexts configured. Use 'qmd context add' to add one.${c.reset}`);
  499. closeDb();
  500. return;
  501. }
  502. console.log(`\n${c.bold}Configured Contexts${c.reset}\n`);
  503. let lastCollection = '';
  504. for (const ctx of allContexts) {
  505. if (ctx.collection !== lastCollection) {
  506. console.log(`${c.cyan}${ctx.collection}${c.reset}`);
  507. lastCollection = ctx.collection;
  508. }
  509. const displayPath = ctx.path ? ` ${ctx.path}` : ' / (root)';
  510. console.log(`${displayPath}`);
  511. console.log(` ${c.dim}${ctx.context}${c.reset}`);
  512. }
  513. closeDb();
  514. }
  515. function contextRemove(pathArg: string): void {
  516. if (pathArg === '/') {
  517. // Remove global context
  518. setGlobalContext(undefined);
  519. console.log(`${c.green}✓${c.reset} Removed global context`);
  520. return;
  521. }
  522. // Handle virtual paths
  523. if (isVirtualPath(pathArg)) {
  524. const parsed = parseVirtualPath(pathArg);
  525. if (!parsed) {
  526. console.error(`${c.yellow}Invalid virtual path: ${pathArg}${c.reset}`);
  527. process.exit(1);
  528. }
  529. const coll = getCollectionFromYaml(parsed.collectionName);
  530. if (!coll) {
  531. console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`);
  532. process.exit(1);
  533. }
  534. const success = yamlRemoveContext(coll.name, parsed.path);
  535. if (!success) {
  536. console.error(`${c.yellow}No context found for: ${pathArg}${c.reset}`);
  537. process.exit(1);
  538. }
  539. console.log(`${c.green}✓${c.reset} Removed context for: ${pathArg}`);
  540. return;
  541. }
  542. // Handle filesystem paths
  543. let fsPath = pathArg;
  544. if (fsPath === '.' || fsPath === './') {
  545. fsPath = getPwd();
  546. } else if (fsPath.startsWith('~/')) {
  547. fsPath = homedir() + fsPath.slice(1);
  548. } else if (!fsPath.startsWith('/')) {
  549. fsPath = resolve(getPwd(), fsPath);
  550. }
  551. const db = getDb();
  552. const detected = detectCollectionFromPath(db, fsPath);
  553. closeDb();
  554. if (!detected) {
  555. console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`);
  556. process.exit(1);
  557. }
  558. const success = yamlRemoveContext(detected.collectionName, detected.relativePath);
  559. if (!success) {
  560. console.error(`${c.yellow}No context found for: qmd://${detected.collectionName}/${detected.relativePath}${c.reset}`);
  561. process.exit(1);
  562. }
  563. console.log(`${c.green}✓${c.reset} Removed context for: qmd://${detected.collectionName}/${detected.relativePath}`);
  564. }
  565. function contextCheck(): void {
  566. const db = getDb();
  567. // Get collections without any context
  568. const collectionsWithoutContext = getCollectionsWithoutContext(db);
  569. // Get all collections to check for missing path contexts
  570. const allCollections = listCollections(db);
  571. if (collectionsWithoutContext.length === 0 && allCollections.length > 0) {
  572. // Check if all collections have contexts
  573. console.log(`\n${c.green}✓${c.reset} ${c.bold}All collections have context configured${c.reset}\n`);
  574. }
  575. if (collectionsWithoutContext.length > 0) {
  576. console.log(`\n${c.yellow}Collections without any context:${c.reset}\n`);
  577. for (const coll of collectionsWithoutContext) {
  578. console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(${coll.doc_count} documents)${c.reset}`);
  579. console.log(` ${c.dim}Suggestion: qmd context add qmd://${coll.name}/ "Description of ${coll.name}"${c.reset}\n`);
  580. }
  581. }
  582. // Check for top-level paths without context within collections that DO have context
  583. const collectionsWithContext = allCollections.filter(c =>
  584. c && !collectionsWithoutContext.some(cwc => cwc.name === c.name)
  585. );
  586. let hasPathSuggestions = false;
  587. for (const coll of collectionsWithContext) {
  588. if (!coll) continue;
  589. const missingPaths = getTopLevelPathsWithoutContext(db, coll.name);
  590. if (missingPaths.length > 0) {
  591. if (!hasPathSuggestions) {
  592. console.log(`${c.yellow}Top-level directories without context:${c.reset}\n`);
  593. hasPathSuggestions = true;
  594. }
  595. console.log(`${c.cyan}${coll.name}${c.reset}`);
  596. for (const path of missingPaths) {
  597. console.log(` ${path}`);
  598. console.log(` ${c.dim}Suggestion: qmd context add qmd://${coll.name}/${path} "Description of ${path}"${c.reset}`);
  599. }
  600. console.log('');
  601. }
  602. }
  603. if (collectionsWithoutContext.length === 0 && !hasPathSuggestions) {
  604. console.log(`${c.dim}All collections and major paths have context configured.${c.reset}`);
  605. console.log(`${c.dim}Use 'qmd context list' to see all configured contexts.${c.reset}\n`);
  606. }
  607. closeDb();
  608. }
  609. function getDocument(filename: string, fromLine?: number, maxLines?: number, lineNumbers?: boolean): void {
  610. const db = getDb();
  611. // Parse :linenum suffix from filename (e.g., "file.md:100")
  612. let inputPath = filename;
  613. const colonMatch = inputPath.match(/:(\d+)$/);
  614. if (colonMatch && !fromLine) {
  615. const matched = colonMatch[1];
  616. if (matched) {
  617. fromLine = parseInt(matched, 10);
  618. inputPath = inputPath.slice(0, -colonMatch[0].length);
  619. }
  620. }
  621. // Handle docid lookup (#abc123, abc123, "#abc123", "abc123", etc.)
  622. if (isDocid(inputPath)) {
  623. const docidMatch = findDocumentByDocid(db, inputPath);
  624. if (docidMatch) {
  625. inputPath = docidMatch.filepath;
  626. } else {
  627. console.error(`Document not found: ${filename}`);
  628. closeDb();
  629. process.exit(1);
  630. }
  631. }
  632. let doc: { collectionName: string; path: string; body: string } | null = null;
  633. let virtualPath: string;
  634. // Handle virtual paths (qmd://collection/path)
  635. if (isVirtualPath(inputPath)) {
  636. const parsed = parseVirtualPath(inputPath);
  637. if (!parsed) {
  638. console.error(`Invalid virtual path: ${inputPath}`);
  639. closeDb();
  640. process.exit(1);
  641. }
  642. // Try exact match on collection + path
  643. doc = db.prepare(`
  644. SELECT d.collection as collectionName, d.path, content.doc as body
  645. FROM documents d
  646. JOIN content ON content.hash = d.hash
  647. WHERE d.collection = ? AND d.path = ? AND d.active = 1
  648. `).get(parsed.collectionName, parsed.path) as typeof doc;
  649. if (!doc) {
  650. // Try fuzzy match by path ending
  651. doc = db.prepare(`
  652. SELECT d.collection as collectionName, d.path, content.doc as body
  653. FROM documents d
  654. JOIN content ON content.hash = d.hash
  655. WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
  656. LIMIT 1
  657. `).get(parsed.collectionName, `%${parsed.path}`) as typeof doc;
  658. }
  659. virtualPath = inputPath;
  660. } else {
  661. // Try to interpret as collection/path format first (before filesystem path)
  662. // If path is relative (no / or ~ prefix), check if first component is a collection name
  663. if (!inputPath.startsWith('/') && !inputPath.startsWith('~')) {
  664. const parts = inputPath.split('/');
  665. if (parts.length >= 2) {
  666. const possibleCollection = parts[0];
  667. const possiblePath = parts.slice(1).join('/');
  668. // Check if this collection exists
  669. const collExists = possibleCollection ? db.prepare(`
  670. SELECT 1 FROM documents WHERE collection = ? AND active = 1 LIMIT 1
  671. `).get(possibleCollection) : null;
  672. if (collExists) {
  673. // Try exact match on collection + path
  674. doc = db.prepare(`
  675. SELECT d.collection as collectionName, d.path, content.doc as body
  676. FROM documents d
  677. JOIN content ON content.hash = d.hash
  678. WHERE d.collection = ? AND d.path = ? AND d.active = 1
  679. `).get(possibleCollection || "", possiblePath || "") as { collectionName: string; path: string; body: string } | null;
  680. if (!doc) {
  681. // Try fuzzy match by path ending
  682. doc = db.prepare(`
  683. SELECT d.collection as collectionName, d.path, content.doc as body
  684. FROM documents d
  685. JOIN content ON content.hash = d.hash
  686. WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
  687. LIMIT 1
  688. `).get(possibleCollection || "", `%${possiblePath}`) as { collectionName: string; path: string; body: string } | null;
  689. }
  690. if (doc) {
  691. virtualPath = buildVirtualPath(doc.collectionName, doc.path);
  692. // Skip the filesystem path handling below
  693. }
  694. }
  695. }
  696. }
  697. // If not found as collection/path, handle as filesystem paths
  698. if (!doc) {
  699. let fsPath = inputPath;
  700. // Expand ~ to home directory
  701. if (fsPath.startsWith('~/')) {
  702. fsPath = homedir() + fsPath.slice(1);
  703. } else if (!fsPath.startsWith('/')) {
  704. // Relative path - resolve from current directory
  705. fsPath = resolve(getPwd(), fsPath);
  706. }
  707. fsPath = getRealPath(fsPath);
  708. // Try to detect which collection contains this path
  709. const detected = detectCollectionFromPath(db, fsPath);
  710. if (detected) {
  711. // Found collection - query by collection name + relative path
  712. doc = db.prepare(`
  713. SELECT d.collection as collectionName, d.path, content.doc as body
  714. FROM documents d
  715. JOIN content ON content.hash = d.hash
  716. WHERE d.collection = ? AND d.path = ? AND d.active = 1
  717. `).get(detected.collectionName, detected.relativePath) as { collectionName: string; path: string; body: string } | null;
  718. }
  719. // Fuzzy match by filename (last component of path)
  720. if (!doc) {
  721. const filename = inputPath.split('/').pop() || inputPath;
  722. doc = db.prepare(`
  723. SELECT d.collection as collectionName, d.path, content.doc as body
  724. FROM documents d
  725. JOIN content ON content.hash = d.hash
  726. WHERE d.path LIKE ? AND d.active = 1
  727. LIMIT 1
  728. `).get(`%${filename}`) as { collectionName: string; path: string; body: string } | null;
  729. }
  730. if (doc) {
  731. virtualPath = buildVirtualPath(doc.collectionName, doc.path);
  732. } else {
  733. virtualPath = inputPath;
  734. }
  735. }
  736. }
  737. // Ensure doc is not null before proceeding
  738. if (!doc) {
  739. console.error(`Document not found: ${filename}`);
  740. closeDb();
  741. process.exit(1);
  742. }
  743. // Get context for this file
  744. const context = getContextForPath(db, doc.collectionName, doc.path);
  745. let output = doc.body;
  746. const startLine = fromLine || 1;
  747. // Apply line filtering if specified
  748. if (fromLine !== undefined || maxLines !== undefined) {
  749. const lines = output.split('\n');
  750. const start = startLine - 1; // Convert to 0-indexed
  751. const end = maxLines !== undefined ? start + maxLines : lines.length;
  752. output = lines.slice(start, end).join('\n');
  753. }
  754. // Add line numbers if requested
  755. if (lineNumbers) {
  756. output = addLineNumbers(output, startLine);
  757. }
  758. // Output context header if exists
  759. if (context) {
  760. console.log(`Folder Context: ${context}\n---\n`);
  761. }
  762. console.log(output);
  763. closeDb();
  764. }
  765. // Multi-get: fetch multiple documents by glob pattern or comma-separated list
  766. function multiGet(pattern: string, maxLines?: number, maxBytes: number = DEFAULT_MULTI_GET_MAX_BYTES, format: OutputFormat = "cli"): void {
  767. const db = getDb();
  768. // Check if it's a comma-separated list or a glob pattern
  769. const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?');
  770. let files: { filepath: string; displayPath: string; bodyLength: number; collection?: string; path?: string }[];
  771. if (isCommaSeparated) {
  772. // Comma-separated list of files (can be virtual paths or relative paths)
  773. const names = pattern.split(',').map(s => s.trim()).filter(Boolean);
  774. files = [];
  775. for (const name of names) {
  776. let doc: { virtual_path: string; body_length: number; collection: string; path: string } | null = null;
  777. // Handle virtual paths
  778. if (isVirtualPath(name)) {
  779. const parsed = parseVirtualPath(name);
  780. if (parsed) {
  781. // Try exact match on collection + path
  782. doc = db.prepare(`
  783. SELECT
  784. 'qmd://' || d.collection || '/' || d.path as virtual_path,
  785. LENGTH(content.doc) as body_length,
  786. d.collection,
  787. d.path
  788. FROM documents d
  789. JOIN content ON content.hash = d.hash
  790. WHERE d.collection = ? AND d.path = ? AND d.active = 1
  791. `).get(parsed.collectionName, parsed.path) as typeof doc;
  792. }
  793. } else {
  794. // Try exact match on path
  795. doc = db.prepare(`
  796. SELECT
  797. 'qmd://' || d.collection || '/' || d.path as virtual_path,
  798. LENGTH(content.doc) as body_length,
  799. d.collection,
  800. d.path
  801. FROM documents d
  802. JOIN content ON content.hash = d.hash
  803. WHERE d.path = ? AND d.active = 1
  804. LIMIT 1
  805. `).get(name) as { virtual_path: string; body_length: number; collection: string; path: string } | null;
  806. // Try suffix match
  807. if (!doc) {
  808. doc = db.prepare(`
  809. SELECT
  810. 'qmd://' || d.collection || '/' || d.path as virtual_path,
  811. LENGTH(content.doc) as body_length,
  812. d.collection,
  813. d.path
  814. FROM documents d
  815. JOIN content ON content.hash = d.hash
  816. WHERE d.path LIKE ? AND d.active = 1
  817. LIMIT 1
  818. `).get(`%${name}`) as { virtual_path: string; body_length: number; collection: string; path: string } | null;
  819. }
  820. }
  821. if (doc) {
  822. files.push({
  823. filepath: doc.virtual_path,
  824. displayPath: doc.virtual_path,
  825. bodyLength: doc.body_length,
  826. collection: doc.collection,
  827. path: doc.path
  828. });
  829. } else {
  830. console.error(`File not found: ${name}`);
  831. }
  832. }
  833. } else {
  834. // Glob pattern - matchFilesByGlob now returns virtual paths
  835. files = matchFilesByGlob(db, pattern).map(f => ({
  836. ...f,
  837. collection: undefined, // Will be fetched later if needed
  838. path: undefined
  839. }));
  840. if (files.length === 0) {
  841. console.error(`No files matched pattern: ${pattern}`);
  842. closeDb();
  843. process.exit(1);
  844. }
  845. }
  846. // Collect results for structured output
  847. const results: { file: string; displayPath: string; title: string; body: string; context: string | null; skipped: boolean; skipReason?: string }[] = [];
  848. for (const file of files) {
  849. // Parse virtual path to get collection info if not already available
  850. let collection = file.collection;
  851. let path = file.path;
  852. if (!collection || !path) {
  853. const parsed = parseVirtualPath(file.filepath);
  854. if (parsed) {
  855. collection = parsed.collectionName;
  856. path = parsed.path;
  857. }
  858. }
  859. // Get context using collection-scoped function
  860. const context = collection && path ? getContextForPath(db, collection, path) : null;
  861. // Check size limit
  862. if (file.bodyLength > maxBytes) {
  863. results.push({
  864. file: file.filepath,
  865. displayPath: file.displayPath,
  866. title: file.displayPath.split('/').pop() || file.displayPath,
  867. body: "",
  868. context,
  869. skipped: true,
  870. skipReason: `File too large (${Math.round(file.bodyLength / 1024)}KB > ${Math.round(maxBytes / 1024)}KB). Use 'qmd get ${file.displayPath}' to retrieve.`,
  871. });
  872. continue;
  873. }
  874. // Fetch document content using collection and path
  875. if (!collection || !path) continue;
  876. const doc = db.prepare(`
  877. SELECT content.doc as body, d.title
  878. FROM documents d
  879. JOIN content ON content.hash = d.hash
  880. WHERE d.collection = ? AND d.path = ? AND d.active = 1
  881. `).get(collection, path) as { body: string; title: string } | null;
  882. if (!doc) continue;
  883. let body = doc.body;
  884. // Apply line limit if specified
  885. if (maxLines !== undefined) {
  886. const lines = body.split('\n');
  887. body = lines.slice(0, maxLines).join('\n');
  888. if (lines.length > maxLines) {
  889. body += `\n\n[... truncated ${lines.length - maxLines} more lines]`;
  890. }
  891. }
  892. results.push({
  893. file: file.filepath,
  894. displayPath: file.displayPath,
  895. title: doc.title || file.displayPath.split('/').pop() || file.displayPath,
  896. body,
  897. context,
  898. skipped: false,
  899. });
  900. }
  901. closeDb();
  902. // Output based on format
  903. if (format === "json") {
  904. const output = results.map(r => ({
  905. file: r.displayPath,
  906. title: r.title,
  907. ...(r.context && { context: r.context }),
  908. ...(r.skipped ? { skipped: true, reason: r.skipReason } : { body: r.body }),
  909. }));
  910. console.log(JSON.stringify(output, null, 2));
  911. } else if (format === "csv") {
  912. const escapeField = (val: string | null | undefined): string => {
  913. if (val === null || val === undefined) return "";
  914. const str = String(val);
  915. if (str.includes(",") || str.includes('"') || str.includes("\n")) {
  916. return `"${str.replace(/"/g, '""')}"`;
  917. }
  918. return str;
  919. };
  920. console.log("file,title,context,skipped,body");
  921. for (const r of results) {
  922. console.log([r.displayPath, r.title, r.context, r.skipped ? "true" : "false", r.skipped ? r.skipReason : r.body].map(escapeField).join(","));
  923. }
  924. } else if (format === "files") {
  925. for (const r of results) {
  926. const ctx = r.context ? `,"${r.context.replace(/"/g, '""')}"` : "";
  927. const status = r.skipped ? "[SKIPPED]" : "";
  928. console.log(`${r.displayPath}${ctx}${status ? `,${status}` : ""}`);
  929. }
  930. } else if (format === "md") {
  931. for (const r of results) {
  932. console.log(`## ${r.displayPath}\n`);
  933. if (r.title && r.title !== r.displayPath) console.log(`**Title:** ${r.title}\n`);
  934. if (r.context) console.log(`**Context:** ${r.context}\n`);
  935. if (r.skipped) {
  936. console.log(`> ${r.skipReason}\n`);
  937. } else {
  938. console.log("```");
  939. console.log(r.body);
  940. console.log("```\n");
  941. }
  942. }
  943. } else if (format === "xml") {
  944. console.log('<?xml version="1.0" encoding="UTF-8"?>');
  945. console.log("<documents>");
  946. for (const r of results) {
  947. console.log(" <document>");
  948. console.log(` <file>${escapeXml(r.displayPath)}</file>`);
  949. console.log(` <title>${escapeXml(r.title)}</title>`);
  950. if (r.context) console.log(` <context>${escapeXml(r.context)}</context>`);
  951. if (r.skipped) {
  952. console.log(` <skipped>true</skipped>`);
  953. console.log(` <reason>${escapeXml(r.skipReason || "")}</reason>`);
  954. } else {
  955. console.log(` <body>${escapeXml(r.body)}</body>`);
  956. }
  957. console.log(" </document>");
  958. }
  959. console.log("</documents>");
  960. } else {
  961. // CLI format (default)
  962. for (const r of results) {
  963. console.log(`\n${'='.repeat(60)}`);
  964. console.log(`File: ${r.displayPath}`);
  965. console.log(`${'='.repeat(60)}\n`);
  966. if (r.skipped) {
  967. console.log(`[SKIPPED: ${r.skipReason}]`);
  968. continue;
  969. }
  970. if (r.context) {
  971. console.log(`Folder Context: ${r.context}\n---\n`);
  972. }
  973. console.log(r.body);
  974. }
  975. }
  976. }
  977. // List files in virtual file tree
  978. function listFiles(pathArg?: string): void {
  979. const db = getDb();
  980. if (!pathArg) {
  981. // No argument - list all collections
  982. const yamlCollections = yamlListCollections();
  983. if (yamlCollections.length === 0) {
  984. console.log("No collections found. Run 'qmd add .' to index files.");
  985. closeDb();
  986. return;
  987. }
  988. // Get file counts from database for each collection
  989. const collections = yamlCollections.map(coll => {
  990. const stats = db.prepare(`
  991. SELECT COUNT(*) as file_count
  992. FROM documents d
  993. WHERE d.collection = ? AND d.active = 1
  994. `).get(coll.name) as { file_count: number } | null;
  995. return {
  996. name: coll.name,
  997. file_count: stats?.file_count || 0
  998. };
  999. });
  1000. console.log(`${c.bold}Collections:${c.reset}\n`);
  1001. for (const coll of collections) {
  1002. console.log(` ${c.dim}qmd://${c.reset}${c.cyan}${coll.name}/${c.reset} ${c.dim}(${coll.file_count} files)${c.reset}`);
  1003. }
  1004. closeDb();
  1005. return;
  1006. }
  1007. // Parse the path argument
  1008. let collectionName: string;
  1009. let pathPrefix: string | null = null;
  1010. if (pathArg.startsWith('qmd://')) {
  1011. // Virtual path format: qmd://collection/path
  1012. const parsed = parseVirtualPath(pathArg);
  1013. if (!parsed) {
  1014. console.error(`Invalid virtual path: ${pathArg}`);
  1015. closeDb();
  1016. process.exit(1);
  1017. }
  1018. collectionName = parsed.collectionName;
  1019. pathPrefix = parsed.path;
  1020. } else {
  1021. // Just collection name or collection/path
  1022. const parts = pathArg.split('/');
  1023. collectionName = parts[0] || '';
  1024. if (parts.length > 1) {
  1025. pathPrefix = parts.slice(1).join('/');
  1026. }
  1027. }
  1028. // Get the collection
  1029. const coll = getCollectionFromYaml(collectionName);
  1030. if (!coll) {
  1031. console.error(`Collection not found: ${collectionName}`);
  1032. console.error(`Run 'qmd ls' to see available collections.`);
  1033. closeDb();
  1034. process.exit(1);
  1035. }
  1036. // List files in the collection with size and modification time
  1037. let query: string;
  1038. let params: any[];
  1039. if (pathPrefix) {
  1040. // List files under a specific path
  1041. query = `
  1042. SELECT d.path, d.title, d.modified_at, LENGTH(ct.doc) as size
  1043. FROM documents d
  1044. JOIN content ct ON d.hash = ct.hash
  1045. WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
  1046. ORDER BY d.path
  1047. `;
  1048. params = [coll.name, `${pathPrefix}%`];
  1049. } else {
  1050. // List all files in the collection
  1051. query = `
  1052. SELECT d.path, d.title, d.modified_at, LENGTH(ct.doc) as size
  1053. FROM documents d
  1054. JOIN content ct ON d.hash = ct.hash
  1055. WHERE d.collection = ? AND d.active = 1
  1056. ORDER BY d.path
  1057. `;
  1058. params = [coll.name];
  1059. }
  1060. const files = db.prepare(query).all(...params) as { path: string; title: string; modified_at: string; size: number }[];
  1061. if (files.length === 0) {
  1062. if (pathPrefix) {
  1063. console.log(`No files found under qmd://${collectionName}/${pathPrefix}`);
  1064. } else {
  1065. console.log(`No files found in collection: ${collectionName}`);
  1066. }
  1067. closeDb();
  1068. return;
  1069. }
  1070. // Calculate max widths for alignment
  1071. const maxSize = Math.max(...files.map(f => formatBytes(f.size).length));
  1072. // Output in ls -l style
  1073. for (const file of files) {
  1074. const sizeStr = formatBytes(file.size).padStart(maxSize);
  1075. const date = new Date(file.modified_at);
  1076. const timeStr = formatLsTime(date);
  1077. // Dim the qmd:// prefix, highlight the filename
  1078. console.log(`${sizeStr} ${timeStr} ${c.dim}qmd://${collectionName}/${c.reset}${c.cyan}${file.path}${c.reset}`);
  1079. }
  1080. closeDb();
  1081. }
  1082. // Format date/time like ls -l
  1083. function formatLsTime(date: Date): string {
  1084. const now = new Date();
  1085. const sixMonthsAgo = new Date(now.getTime() - 6 * 30 * 24 * 60 * 60 * 1000);
  1086. const months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
  1087. const month = months[date.getMonth()];
  1088. const day = date.getDate().toString().padStart(2, ' ');
  1089. // If file is older than 6 months, show year instead of time
  1090. if (date < sixMonthsAgo) {
  1091. const year = date.getFullYear();
  1092. return `${month} ${day} ${year}`;
  1093. } else {
  1094. const hours = date.getHours().toString().padStart(2, '0');
  1095. const minutes = date.getMinutes().toString().padStart(2, '0');
  1096. return `${month} ${day} ${hours}:${minutes}`;
  1097. }
  1098. }
  1099. // Collection management commands
  1100. function collectionList(): void {
  1101. const db = getDb();
  1102. const collections = listCollections(db);
  1103. if (collections.length === 0) {
  1104. console.log("No collections found. Run 'qmd add .' to create one.");
  1105. closeDb();
  1106. return;
  1107. }
  1108. console.log(`${c.bold}Collections (${collections.length}):${c.reset}\n`);
  1109. for (const coll of collections) {
  1110. const updatedAt = coll.last_modified ? new Date(coll.last_modified) : new Date();
  1111. const timeAgo = formatTimeAgo(updatedAt);
  1112. console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(qmd://${coll.name}/)${c.reset}`);
  1113. console.log(` ${c.dim}Pattern:${c.reset} ${coll.glob_pattern}`);
  1114. console.log(` ${c.dim}Files:${c.reset} ${coll.active_count}`);
  1115. console.log(` ${c.dim}Updated:${c.reset} ${timeAgo}`);
  1116. console.log();
  1117. }
  1118. closeDb();
  1119. }
  1120. async function collectionAdd(pwd: string, globPattern: string, name?: string): Promise<void> {
  1121. // If name not provided, generate from pwd basename
  1122. let collName = name;
  1123. if (!collName) {
  1124. const parts = pwd.split('/').filter(Boolean);
  1125. collName = parts[parts.length - 1] || 'root';
  1126. }
  1127. // Check if collection with this name already exists in YAML
  1128. const existing = getCollectionFromYaml(collName);
  1129. if (existing) {
  1130. console.error(`${c.yellow}Collection '${collName}' already exists.${c.reset}`);
  1131. console.error(`Use a different name with --name <name>`);
  1132. process.exit(1);
  1133. }
  1134. // Check if a collection with this pwd+glob already exists in YAML
  1135. const allCollections = yamlListCollections();
  1136. const existingPwdGlob = allCollections.find(c => c.path === pwd && c.pattern === globPattern);
  1137. if (existingPwdGlob) {
  1138. console.error(`${c.yellow}A collection already exists for this path and pattern:${c.reset}`);
  1139. console.error(` Name: ${existingPwdGlob.name} (qmd://${existingPwdGlob.name}/)`);
  1140. console.error(` Pattern: ${globPattern}`);
  1141. console.error(`\nUse 'qmd update' to re-index it, or remove it first with 'qmd collection remove ${existingPwdGlob.name}'`);
  1142. process.exit(1);
  1143. }
  1144. // Add to YAML config
  1145. const { addCollection } = await import("./collections.js");
  1146. addCollection(collName, pwd, globPattern);
  1147. // Create the collection and index files
  1148. console.log(`Creating collection '${collName}'...`);
  1149. await indexFiles(pwd, globPattern, collName);
  1150. console.log(`${c.green}✓${c.reset} Collection '${collName}' created successfully`);
  1151. }
  1152. function collectionRemove(name: string): void {
  1153. // Check if collection exists in YAML
  1154. const coll = getCollectionFromYaml(name);
  1155. if (!coll) {
  1156. console.error(`${c.yellow}Collection not found: ${name}${c.reset}`);
  1157. console.error(`Run 'qmd collection list' to see available collections.`);
  1158. process.exit(1);
  1159. }
  1160. const db = getDb();
  1161. const result = removeCollection(db, name);
  1162. closeDb();
  1163. console.log(`${c.green}✓${c.reset} Removed collection '${name}'`);
  1164. console.log(` Deleted ${result.deletedDocs} documents`);
  1165. if (result.cleanedHashes > 0) {
  1166. console.log(` Cleaned up ${result.cleanedHashes} orphaned content hashes`);
  1167. }
  1168. }
  1169. function collectionRename(oldName: string, newName: string): void {
  1170. // Check if old collection exists in YAML
  1171. const coll = getCollectionFromYaml(oldName);
  1172. if (!coll) {
  1173. console.error(`${c.yellow}Collection not found: ${oldName}${c.reset}`);
  1174. console.error(`Run 'qmd collection list' to see available collections.`);
  1175. process.exit(1);
  1176. }
  1177. // Check if new name already exists in YAML
  1178. const existing = getCollectionFromYaml(newName);
  1179. if (existing) {
  1180. console.error(`${c.yellow}Collection name already exists: ${newName}${c.reset}`);
  1181. console.error(`Choose a different name or remove the existing collection first.`);
  1182. process.exit(1);
  1183. }
  1184. const db = getDb();
  1185. renameCollection(db, oldName, newName);
  1186. closeDb();
  1187. console.log(`${c.green}✓${c.reset} Renamed collection '${oldName}' to '${newName}'`);
  1188. console.log(` Virtual paths updated: ${c.cyan}qmd://${oldName}/${c.reset} → ${c.cyan}qmd://${newName}/${c.reset}`);
  1189. }
  1190. async function indexFiles(pwd?: string, globPattern: string = DEFAULT_GLOB, collectionName?: string, suppressEmbedNotice: boolean = false): Promise<void> {
  1191. const db = getDb();
  1192. const resolvedPwd = pwd || getPwd();
  1193. const now = new Date().toISOString();
  1194. const excludeDirs = ["node_modules", ".git", ".cache", "vendor", "dist", "build"];
  1195. // Clear Ollama cache on index
  1196. clearCache(db);
  1197. // Collection name must be provided (from YAML)
  1198. if (!collectionName) {
  1199. throw new Error("Collection name is required. Collections must be defined in ~/.config/qmd/index.yml");
  1200. }
  1201. console.log(`Collection: ${resolvedPwd} (${globPattern})`);
  1202. progress.indeterminate();
  1203. const glob = new Glob(globPattern);
  1204. const files: string[] = [];
  1205. for await (const file of glob.scan({ cwd: resolvedPwd, onlyFiles: true, followSymlinks: false })) {
  1206. // Skip node_modules, hidden folders (.*), and other common excludes
  1207. const parts = file.split("/");
  1208. const shouldSkip = parts.some(part =>
  1209. part === "node_modules" ||
  1210. part.startsWith(".") ||
  1211. excludeDirs.includes(part)
  1212. );
  1213. if (!shouldSkip) {
  1214. files.push(file);
  1215. }
  1216. }
  1217. const total = files.length;
  1218. if (total === 0) {
  1219. progress.clear();
  1220. console.log("No files found matching pattern.");
  1221. closeDb();
  1222. return;
  1223. }
  1224. let indexed = 0, updated = 0, unchanged = 0, processed = 0;
  1225. const seenPaths = new Set<string>();
  1226. const startTime = Date.now();
  1227. for (const relativeFile of files) {
  1228. const filepath = getRealPath(resolve(resolvedPwd, relativeFile));
  1229. const path = handelize(relativeFile); // Normalize path for token-friendliness
  1230. seenPaths.add(path);
  1231. const content = readFileSync(filepath, "utf-8");
  1232. // Skip empty files - nothing useful to index
  1233. if (!content.trim()) {
  1234. processed++;
  1235. continue;
  1236. }
  1237. const hash = await hashContent(content);
  1238. const title = extractTitle(content, relativeFile);
  1239. // Check if document exists in this collection with this path
  1240. const existing = findActiveDocument(db, collectionName, path);
  1241. if (existing) {
  1242. if (existing.hash === hash) {
  1243. // Hash unchanged, but check if title needs updating
  1244. if (existing.title !== title) {
  1245. updateDocumentTitle(db, existing.id, title, now);
  1246. updated++;
  1247. } else {
  1248. unchanged++;
  1249. }
  1250. } else {
  1251. // Content changed - insert new content hash and update document
  1252. insertContent(db, hash, content, now);
  1253. const stat = statSync(filepath);
  1254. updateDocument(db, existing.id, title, hash,
  1255. stat ? new Date(stat.mtime).toISOString() : now);
  1256. updated++;
  1257. }
  1258. } else {
  1259. // New document - insert content and document
  1260. indexed++;
  1261. insertContent(db, hash, content, now);
  1262. const stat = statSync(filepath);
  1263. insertDocument(db, collectionName, path, title, hash,
  1264. stat ? new Date(stat.birthtime).toISOString() : now,
  1265. stat ? new Date(stat.mtime).toISOString() : now);
  1266. }
  1267. processed++;
  1268. progress.set((processed / total) * 100);
  1269. const elapsed = (Date.now() - startTime) / 1000;
  1270. const rate = processed / elapsed;
  1271. const remaining = (total - processed) / rate;
  1272. const eta = processed > 2 ? ` ETA: ${formatETA(remaining)}` : "";
  1273. process.stderr.write(`\rIndexing: ${processed}/${total}${eta} `);
  1274. }
  1275. // Deactivate documents in this collection that no longer exist
  1276. const allActive = getActiveDocumentPaths(db, collectionName);
  1277. let removed = 0;
  1278. for (const path of allActive) {
  1279. if (!seenPaths.has(path)) {
  1280. deactivateDocument(db, collectionName, path);
  1281. removed++;
  1282. }
  1283. }
  1284. // Clean up orphaned content hashes (content not referenced by any document)
  1285. const orphanedContent = cleanupOrphanedContent(db);
  1286. // Check if vector index needs updating
  1287. const needsEmbedding = getHashesNeedingEmbedding(db);
  1288. progress.clear();
  1289. console.log(`\nIndexed: ${indexed} new, ${updated} updated, ${unchanged} unchanged, ${removed} removed`);
  1290. if (orphanedContent > 0) {
  1291. console.log(`Cleaned up ${orphanedContent} orphaned content hash(es)`);
  1292. }
  1293. if (needsEmbedding > 0 && !suppressEmbedNotice) {
  1294. console.log(`\nRun 'qmd embed' to update embeddings (${needsEmbedding} unique hashes need vectors)`);
  1295. }
  1296. closeDb();
  1297. }
  1298. function renderProgressBar(percent: number, width: number = 30): string {
  1299. const filled = Math.round((percent / 100) * width);
  1300. const empty = width - filled;
  1301. const bar = "█".repeat(filled) + "░".repeat(empty);
  1302. return bar;
  1303. }
  1304. async function vectorIndex(model: string = DEFAULT_EMBED_MODEL, force: boolean = false): Promise<void> {
  1305. const db = getDb();
  1306. const now = new Date().toISOString();
  1307. // If force, clear all vectors
  1308. if (force) {
  1309. console.log(`${c.yellow}Force re-indexing: clearing all vectors...${c.reset}`);
  1310. clearAllEmbeddings(db);
  1311. }
  1312. // Find unique hashes that need embedding (from active documents)
  1313. const hashesToEmbed = getHashesForEmbedding(db);
  1314. if (hashesToEmbed.length === 0) {
  1315. console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
  1316. closeDb();
  1317. return;
  1318. }
  1319. // Prepare documents with chunks
  1320. type ChunkItem = { hash: string; title: string; text: string; seq: number; pos: number; tokens: number; bytes: number; displayName: string };
  1321. const allChunks: ChunkItem[] = [];
  1322. let multiChunkDocs = 0;
  1323. // Chunk all documents using actual token counts
  1324. process.stderr.write(`Chunking ${hashesToEmbed.length} documents by token count...\n`);
  1325. for (const item of hashesToEmbed) {
  1326. const encoder = new TextEncoder();
  1327. const bodyBytes = encoder.encode(item.body).length;
  1328. if (bodyBytes === 0) continue; // Skip empty
  1329. const title = extractTitle(item.body, item.path);
  1330. const displayName = item.path;
  1331. const chunks = await chunkDocumentByTokens(item.body); // Uses actual tokenizer
  1332. if (chunks.length > 1) multiChunkDocs++;
  1333. for (let seq = 0; seq < chunks.length; seq++) {
  1334. allChunks.push({
  1335. hash: item.hash,
  1336. title,
  1337. text: chunks[seq]!.text, // Chunk is guaranteed to exist by seq loop
  1338. seq,
  1339. pos: chunks[seq]!.pos,
  1340. tokens: chunks[seq]!.tokens,
  1341. bytes: encoder.encode(chunks[seq]!.text).length,
  1342. displayName,
  1343. });
  1344. }
  1345. }
  1346. if (allChunks.length === 0) {
  1347. console.log(`${c.green}✓ No non-empty documents to embed.${c.reset}`);
  1348. closeDb();
  1349. return;
  1350. }
  1351. const totalBytes = allChunks.reduce((sum, chk) => sum + chk.bytes, 0);
  1352. const totalChunks = allChunks.length;
  1353. const totalDocs = hashesToEmbed.length;
  1354. console.log(`${c.bold}Embedding ${totalDocs} documents${c.reset} ${c.dim}(${totalChunks} chunks, ${formatBytes(totalBytes)})${c.reset}`);
  1355. if (multiChunkDocs > 0) {
  1356. console.log(`${c.dim}${multiChunkDocs} documents split into multiple chunks${c.reset}`);
  1357. }
  1358. console.log(`${c.dim}Model: ${model}${c.reset}\n`);
  1359. // Hide cursor during embedding
  1360. cursor.hide();
  1361. // Wrap all LLM embedding operations in a session for lifecycle management
  1362. // Use 30 minute timeout for large collections
  1363. await withLLMSession(async (session) => {
  1364. // Get embedding dimensions from first chunk
  1365. progress.indeterminate();
  1366. const firstChunk = allChunks[0];
  1367. if (!firstChunk) {
  1368. throw new Error("No chunks available to embed");
  1369. }
  1370. const firstText = formatDocForEmbedding(firstChunk.text, firstChunk.title);
  1371. const firstResult = await session.embed(firstText);
  1372. if (!firstResult) {
  1373. throw new Error("Failed to get embedding dimensions from first chunk");
  1374. }
  1375. ensureVecTable(db, firstResult.embedding.length);
  1376. let chunksEmbedded = 0, errors = 0, bytesProcessed = 0;
  1377. const startTime = Date.now();
  1378. // Batch embedding for better throughput
  1379. // Process in batches of 32 to balance memory usage and efficiency
  1380. const BATCH_SIZE = 32;
  1381. for (let batchStart = 0; batchStart < allChunks.length; batchStart += BATCH_SIZE) {
  1382. const batchEnd = Math.min(batchStart + BATCH_SIZE, allChunks.length);
  1383. const batch = allChunks.slice(batchStart, batchEnd);
  1384. // Format texts for embedding
  1385. const texts = batch.map(chunk => formatDocForEmbedding(chunk.text, chunk.title));
  1386. try {
  1387. // Batch embed all texts at once
  1388. const embeddings = await session.embedBatch(texts);
  1389. // Insert each embedding
  1390. for (let i = 0; i < batch.length; i++) {
  1391. const chunk = batch[i]!;
  1392. const embedding = embeddings[i];
  1393. if (embedding) {
  1394. insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now);
  1395. chunksEmbedded++;
  1396. } else {
  1397. errors++;
  1398. console.error(`\n${c.yellow}⚠ Error embedding "${chunk.displayName}" chunk ${chunk.seq}${c.reset}`);
  1399. }
  1400. bytesProcessed += chunk.bytes;
  1401. }
  1402. } catch (err) {
  1403. // If batch fails, try individual embeddings as fallback
  1404. for (const chunk of batch) {
  1405. try {
  1406. const text = formatDocForEmbedding(chunk.text, chunk.title);
  1407. const result = await session.embed(text);
  1408. if (result) {
  1409. insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now);
  1410. chunksEmbedded++;
  1411. } else {
  1412. errors++;
  1413. }
  1414. } catch (innerErr) {
  1415. errors++;
  1416. console.error(`\n${c.yellow}⚠ Error embedding "${chunk.displayName}" chunk ${chunk.seq}: ${innerErr}${c.reset}`);
  1417. }
  1418. bytesProcessed += chunk.bytes;
  1419. }
  1420. }
  1421. const percent = (bytesProcessed / totalBytes) * 100;
  1422. progress.set(percent);
  1423. const elapsed = (Date.now() - startTime) / 1000;
  1424. const bytesPerSec = bytesProcessed / elapsed;
  1425. const remainingBytes = totalBytes - bytesProcessed;
  1426. const etaSec = remainingBytes / bytesPerSec;
  1427. const bar = renderProgressBar(percent);
  1428. const percentStr = percent.toFixed(0).padStart(3);
  1429. const throughput = `${formatBytes(bytesPerSec)}/s`;
  1430. const eta = elapsed > 2 ? formatETA(etaSec) : "...";
  1431. const errStr = errors > 0 ? ` ${c.yellow}${errors} err${c.reset}` : "";
  1432. process.stderr.write(`\r${c.cyan}${bar}${c.reset} ${c.bold}${percentStr}%${c.reset} ${c.dim}${chunksEmbedded}/${totalChunks}${c.reset}${errStr} ${c.dim}${throughput} ETA ${eta}${c.reset} `);
  1433. }
  1434. progress.clear();
  1435. cursor.show();
  1436. const totalTimeSec = (Date.now() - startTime) / 1000;
  1437. const avgThroughput = formatBytes(totalBytes / totalTimeSec);
  1438. console.log(`\r${c.green}${renderProgressBar(100)}${c.reset} ${c.bold}100%${c.reset} `);
  1439. console.log(`\n${c.green}✓ Done!${c.reset} Embedded ${c.bold}${chunksEmbedded}${c.reset} chunks from ${c.bold}${totalDocs}${c.reset} documents in ${c.bold}${formatETA(totalTimeSec)}${c.reset} ${c.dim}(${avgThroughput}/s)${c.reset}`);
  1440. if (errors > 0) {
  1441. console.log(`${c.yellow}⚠ ${errors} chunks failed${c.reset}`);
  1442. }
  1443. }, { maxDuration: 30 * 60 * 1000, name: 'embed-command' });
  1444. closeDb();
  1445. }
  1446. // Sanitize a term for FTS5: remove punctuation except apostrophes
  1447. function sanitizeFTS5Term(term: string): string {
  1448. // Remove all non-alphanumeric except apostrophes (for contractions like "don't")
  1449. return term.replace(/[^\w']/g, '').trim();
  1450. }
  1451. // Build FTS5 query: phrase-aware with fallback to individual terms
  1452. function buildFTS5Query(query: string): string {
  1453. // Sanitize the full query for phrase matching
  1454. const sanitizedQuery = query.replace(/[^\w\s']/g, '').trim();
  1455. const terms = query
  1456. .split(/\s+/)
  1457. .map(sanitizeFTS5Term)
  1458. .filter(term => term.length >= 2); // Skip single chars and empty
  1459. if (terms.length === 0) return "";
  1460. if (terms.length === 1) return `"${terms[0]!.replace(/"/g, '""')}"`;
  1461. // Strategy: exact phrase OR proximity match OR individual terms
  1462. // Exact phrase matches rank highest, then close proximity, then any term
  1463. const phrase = `"${sanitizedQuery.replace(/"/g, '""')}"`;
  1464. const quotedTerms = terms.map(t => `"${t.replace(/"/g, '""')}"`);
  1465. // FTS5 NEAR syntax: NEAR(term1 term2, distance)
  1466. const nearPhrase = `NEAR(${quotedTerms.join(' ')}, 10)`;
  1467. const orTerms = quotedTerms.join(' OR ');
  1468. // Exact phrase > proximity > any term
  1469. return `(${phrase}) OR (${nearPhrase}) OR (${orTerms})`;
  1470. }
  1471. // Normalize BM25 score to 0-1 range using sigmoid
  1472. function normalizeBM25(score: number): number {
  1473. // BM25 scores are negative in SQLite (lower = better)
  1474. // Typical range: -15 (excellent) to -2 (weak match)
  1475. // Map to 0-1 where higher is better
  1476. const absScore = Math.abs(score);
  1477. // Sigmoid-ish normalization: maps ~2-15 range to ~0.1-0.95
  1478. return 1 / (1 + Math.exp(-(absScore - 5) / 3));
  1479. }
  1480. type OutputOptions = {
  1481. format: OutputFormat;
  1482. full: boolean;
  1483. limit: number;
  1484. minScore: number;
  1485. all?: boolean;
  1486. collection?: string; // Filter by collection name (pwd suffix match)
  1487. lineNumbers?: boolean; // Add line numbers to output
  1488. context?: string; // Optional context for query expansion
  1489. };
  1490. // Highlight query terms in text (skip short words < 3 chars)
  1491. function highlightTerms(text: string, query: string): string {
  1492. if (!useColor) return text;
  1493. const terms = query.toLowerCase().split(/\s+/).filter(t => t.length >= 3);
  1494. let result = text;
  1495. for (const term of terms) {
  1496. const regex = new RegExp(`(${term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')})`, 'gi');
  1497. result = result.replace(regex, `${c.yellow}${c.bold}$1${c.reset}`);
  1498. }
  1499. return result;
  1500. }
  1501. // Format score with color based on value
  1502. function formatScore(score: number): string {
  1503. const pct = (score * 100).toFixed(0).padStart(3);
  1504. if (!useColor) return `${pct}%`;
  1505. if (score >= 0.7) return `${c.green}${pct}%${c.reset}`;
  1506. if (score >= 0.4) return `${c.yellow}${pct}%${c.reset}`;
  1507. return `${c.dim}${pct}%${c.reset}`;
  1508. }
  1509. // Shorten directory path for display - relative to $HOME (used for context paths, not documents)
  1510. function shortPath(dirpath: string): string {
  1511. const home = homedir();
  1512. if (dirpath.startsWith(home)) {
  1513. return '~' + dirpath.slice(home.length);
  1514. }
  1515. return dirpath;
  1516. }
  1517. function outputResults(results: { file: string; displayPath: string; title: string; body: string; score: number; context?: string | null; chunkPos?: number; hash?: string; docid?: string }[], query: string, opts: OutputOptions): void {
  1518. const filtered = results.filter(r => r.score >= opts.minScore).slice(0, opts.limit);
  1519. if (filtered.length === 0) {
  1520. console.log("No results found above minimum score threshold.");
  1521. return;
  1522. }
  1523. // Helper to create qmd:// URI from displayPath
  1524. const toQmdPath = (displayPath: string) => `qmd://${displayPath}`;
  1525. if (opts.format === "json") {
  1526. // JSON output for LLM consumption
  1527. const output = filtered.map(row => {
  1528. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
  1529. let body = opts.full ? row.body : undefined;
  1530. let snippet = !opts.full ? extractSnippet(row.body, query, 300, row.chunkPos).snippet : undefined;
  1531. if (opts.lineNumbers) {
  1532. if (body) body = addLineNumbers(body);
  1533. if (snippet) snippet = addLineNumbers(snippet);
  1534. }
  1535. return {
  1536. ...(docid && { docid: `#${docid}` }),
  1537. score: Math.round(row.score * 100) / 100,
  1538. file: toQmdPath(row.displayPath),
  1539. title: row.title,
  1540. ...(row.context && { context: row.context }),
  1541. ...(body && { body }),
  1542. ...(snippet && { snippet }),
  1543. };
  1544. });
  1545. console.log(JSON.stringify(output, null, 2));
  1546. } else if (opts.format === "files") {
  1547. // Simple docid,score,filepath,context output
  1548. for (const row of filtered) {
  1549. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
  1550. const ctx = row.context ? `,"${row.context.replace(/"/g, '""')}"` : "";
  1551. console.log(`#${docid},${row.score.toFixed(2)},${toQmdPath(row.displayPath)}${ctx}`);
  1552. }
  1553. } else if (opts.format === "cli") {
  1554. for (let i = 0; i < filtered.length; i++) {
  1555. const row = filtered[i];
  1556. if (!row) continue;
  1557. const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos);
  1558. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
  1559. // Line 1: filepath with docid
  1560. const path = toQmdPath(row.displayPath);
  1561. // Only show :line if we actually found a term match in the snippet body (exclude header line).
  1562. const snippetBody = snippet.split("\n").slice(1).join("\n").toLowerCase();
  1563. const hasMatch = query.toLowerCase().split(/\s+/).some(t => t.length > 0 && snippetBody.includes(t));
  1564. const lineInfo = hasMatch ? `:${line}` : "";
  1565. const docidStr = docid ? ` ${c.dim}#${docid}${c.reset}` : "";
  1566. console.log(`${c.cyan}${path}${c.dim}${lineInfo}${c.reset}${docidStr}`);
  1567. // Line 2: Title (if available)
  1568. if (row.title) {
  1569. console.log(`${c.bold}Title: ${row.title}${c.reset}`);
  1570. }
  1571. // Line 3: Context (if available)
  1572. if (row.context) {
  1573. console.log(`${c.dim}Context: ${row.context}${c.reset}`);
  1574. }
  1575. // Line 4: Score
  1576. const score = formatScore(row.score);
  1577. console.log(`Score: ${c.bold}${score}${c.reset}`);
  1578. console.log();
  1579. // Snippet with highlighting (diff-style header included)
  1580. let displaySnippet = opts.lineNumbers ? addLineNumbers(snippet, line) : snippet;
  1581. const highlighted = highlightTerms(displaySnippet, query);
  1582. console.log(highlighted);
  1583. // Double empty line between results
  1584. if (i < filtered.length - 1) console.log('\n');
  1585. }
  1586. } else if (opts.format === "md") {
  1587. for (let i = 0; i < filtered.length; i++) {
  1588. const row = filtered[i];
  1589. if (!row) continue;
  1590. const heading = row.title || row.displayPath;
  1591. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
  1592. let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos).snippet;
  1593. if (opts.lineNumbers) {
  1594. content = addLineNumbers(content);
  1595. }
  1596. const docidLine = docid ? `**docid:** \`#${docid}\`\n` : "";
  1597. const contextLine = row.context ? `**context:** ${row.context}\n` : "";
  1598. console.log(`---\n# ${heading}\n${docidLine}${contextLine}\n${content}\n`);
  1599. }
  1600. } else if (opts.format === "xml") {
  1601. for (const row of filtered) {
  1602. const titleAttr = row.title ? ` title="${row.title.replace(/"/g, '&quot;')}"` : "";
  1603. const contextAttr = row.context ? ` context="${row.context.replace(/"/g, '&quot;')}"` : "";
  1604. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
  1605. let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos).snippet;
  1606. if (opts.lineNumbers) {
  1607. content = addLineNumbers(content);
  1608. }
  1609. console.log(`<file docid="#${docid}" name="${toQmdPath(row.displayPath)}"${titleAttr}${contextAttr}>\n${content}\n</file>\n`);
  1610. }
  1611. } else {
  1612. // CSV format
  1613. console.log("docid,score,file,title,context,line,snippet");
  1614. for (const row of filtered) {
  1615. const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos);
  1616. let content = opts.full ? row.body : snippet;
  1617. if (opts.lineNumbers) {
  1618. content = addLineNumbers(content, line);
  1619. }
  1620. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
  1621. const snippetText = content || "";
  1622. console.log(`#${docid},${row.score.toFixed(4)},${escapeCSV(toQmdPath(row.displayPath))},${escapeCSV(row.title || "")},${escapeCSV(row.context || "")},${line},${escapeCSV(snippetText)}`);
  1623. }
  1624. }
  1625. }
  1626. function search(query: string, opts: OutputOptions): void {
  1627. const db = getDb();
  1628. // Validate collection filter if specified
  1629. let collectionName: string | undefined;
  1630. if (opts.collection) {
  1631. const coll = getCollectionFromYaml(opts.collection);
  1632. if (!coll) {
  1633. console.error(`Collection not found: ${opts.collection}`);
  1634. closeDb();
  1635. process.exit(1);
  1636. }
  1637. collectionName = opts.collection;
  1638. }
  1639. // Use large limit for --all, otherwise fetch more than needed and let outputResults filter
  1640. const fetchLimit = opts.all ? 100000 : Math.max(50, opts.limit * 2);
  1641. const results = searchFTS(db, query, fetchLimit, collectionName);
  1642. // Add context to results
  1643. const resultsWithContext = results.map(r => ({
  1644. file: r.filepath,
  1645. displayPath: r.displayPath,
  1646. title: r.title,
  1647. body: r.body || "",
  1648. score: r.score,
  1649. context: getContextForFile(db, r.filepath),
  1650. hash: r.hash,
  1651. docid: r.docid,
  1652. }));
  1653. closeDb();
  1654. if (resultsWithContext.length === 0) {
  1655. console.log("No results found.");
  1656. return;
  1657. }
  1658. outputResults(resultsWithContext, query, opts);
  1659. }
  1660. // Log query expansion as a tree to stderr (CLI progress feedback)
  1661. function logExpansionTree(originalQuery: string, expanded: ExpandedQuery[]): void {
  1662. const lines: string[] = [];
  1663. lines.push(`${c.dim}├─ ${originalQuery}${c.reset}`);
  1664. for (const q of expanded) {
  1665. let preview = q.text.replace(/\n/g, ' ');
  1666. if (preview.length > 72) preview = preview.substring(0, 69) + '...';
  1667. lines.push(`${c.dim}├─ ${q.type}: ${preview}${c.reset}`);
  1668. }
  1669. if (lines.length > 0) {
  1670. lines[lines.length - 1] = lines[lines.length - 1]!.replace('├─', '└─');
  1671. }
  1672. for (const line of lines) process.stderr.write(line + '\n');
  1673. }
  1674. async function vectorSearch(query: string, opts: OutputOptions, _model: string = DEFAULT_EMBED_MODEL): Promise<void> {
  1675. const store = getStore();
  1676. if (opts.collection) {
  1677. const coll = getCollectionFromYaml(opts.collection);
  1678. if (!coll) {
  1679. console.error(`Collection not found: ${opts.collection}`);
  1680. closeDb();
  1681. process.exit(1);
  1682. }
  1683. }
  1684. checkIndexHealth(store.db);
  1685. await withLLMSession(async () => {
  1686. const results = await vectorSearchQuery(store, query, {
  1687. collection: opts.collection,
  1688. limit: opts.all ? 500 : (opts.limit || 10),
  1689. minScore: opts.minScore || 0.3,
  1690. hooks: {
  1691. onExpand: (original, expanded) => {
  1692. logExpansionTree(original, expanded);
  1693. process.stderr.write(`${c.dim}Searching ${expanded.length + 1} vector queries...${c.reset}\n`);
  1694. },
  1695. },
  1696. });
  1697. closeDb();
  1698. if (results.length === 0) {
  1699. console.log("No results found.");
  1700. return;
  1701. }
  1702. outputResults(results.map(r => ({
  1703. file: r.file,
  1704. displayPath: r.displayPath,
  1705. title: r.title,
  1706. body: r.body,
  1707. score: r.score,
  1708. context: r.context,
  1709. docid: r.docid,
  1710. })), query, { ...opts, limit: results.length });
  1711. }, { maxDuration: 10 * 60 * 1000, name: 'vectorSearch' });
  1712. }
  1713. async function querySearch(query: string, opts: OutputOptions, _embedModel: string = DEFAULT_EMBED_MODEL, _rerankModel: string = DEFAULT_RERANK_MODEL): Promise<void> {
  1714. const store = getStore();
  1715. if (opts.collection) {
  1716. const coll = getCollectionFromYaml(opts.collection);
  1717. if (!coll) {
  1718. console.error(`Collection not found: ${opts.collection}`);
  1719. closeDb();
  1720. process.exit(1);
  1721. }
  1722. }
  1723. checkIndexHealth(store.db);
  1724. await withLLMSession(async () => {
  1725. const results = await hybridQuery(store, query, {
  1726. collection: opts.collection,
  1727. limit: opts.all ? 500 : (opts.limit || 10),
  1728. minScore: opts.minScore || 0,
  1729. hooks: {
  1730. onStrongSignal: (score) => {
  1731. process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
  1732. },
  1733. onExpand: (original, expanded) => {
  1734. logExpansionTree(original, expanded);
  1735. process.stderr.write(`${c.dim}Searching ${expanded.length + 1} queries...${c.reset}\n`);
  1736. },
  1737. onRerankStart: (chunkCount) => {
  1738. process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}\n`);
  1739. progress.indeterminate();
  1740. },
  1741. onRerankDone: () => {
  1742. progress.clear();
  1743. },
  1744. },
  1745. });
  1746. closeDb();
  1747. if (results.length === 0) {
  1748. console.log("No results found.");
  1749. return;
  1750. }
  1751. // Map to CLI output format — use bestChunk for snippet display
  1752. outputResults(results.map(r => ({
  1753. file: r.file,
  1754. displayPath: r.displayPath,
  1755. title: r.title,
  1756. body: r.bestChunk,
  1757. chunkPos: r.bestChunkPos,
  1758. score: r.score,
  1759. context: r.context,
  1760. docid: r.docid,
  1761. })), query, { ...opts, limit: results.length });
  1762. }, { maxDuration: 10 * 60 * 1000, name: 'querySearch' });
  1763. }
  1764. // Parse CLI arguments using util.parseArgs
  1765. function parseCLI() {
  1766. const { values, positionals } = parseArgs({
  1767. args: Bun.argv.slice(2), // Skip bun and script path
  1768. options: {
  1769. // Global options
  1770. index: {
  1771. type: "string",
  1772. },
  1773. context: {
  1774. type: "string",
  1775. },
  1776. "no-lex": {
  1777. type: "boolean",
  1778. },
  1779. help: { type: "boolean", short: "h" },
  1780. version: { type: "boolean", short: "v" },
  1781. // Search options
  1782. n: { type: "string" },
  1783. "min-score": { type: "string" },
  1784. all: { type: "boolean" },
  1785. full: { type: "boolean" },
  1786. csv: { type: "boolean" },
  1787. md: { type: "boolean" },
  1788. xml: { type: "boolean" },
  1789. files: { type: "boolean" },
  1790. json: { type: "boolean" },
  1791. collection: { type: "string", short: "c" }, // Filter by collection
  1792. // Collection options
  1793. name: { type: "string" }, // collection name
  1794. mask: { type: "string" }, // glob pattern
  1795. // Embed options
  1796. force: { type: "boolean", short: "f" },
  1797. // Update options
  1798. pull: { type: "boolean" }, // git pull before update
  1799. refresh: { type: "boolean" },
  1800. // Get options
  1801. l: { type: "string" }, // max lines
  1802. from: { type: "string" }, // start line
  1803. "max-bytes": { type: "string" }, // max bytes for multi-get
  1804. "line-numbers": { type: "boolean" }, // add line numbers to output
  1805. // MCP HTTP transport options
  1806. http: { type: "boolean" },
  1807. daemon: { type: "boolean" },
  1808. port: { type: "string" },
  1809. },
  1810. allowPositionals: true,
  1811. strict: false, // Allow unknown options to pass through
  1812. });
  1813. // Select index name (default: "index")
  1814. const indexName = values.index as string | undefined;
  1815. if (indexName) {
  1816. setIndexName(indexName);
  1817. setConfigIndexName(indexName);
  1818. }
  1819. // Determine output format
  1820. let format: OutputFormat = "cli";
  1821. if (values.csv) format = "csv";
  1822. else if (values.md) format = "md";
  1823. else if (values.xml) format = "xml";
  1824. else if (values.files) format = "files";
  1825. else if (values.json) format = "json";
  1826. // Default limit: 20 for --files/--json, 5 otherwise
  1827. // --all means return all results (use very large limit)
  1828. const defaultLimit = (format === "files" || format === "json") ? 20 : 5;
  1829. const isAll = !!values.all;
  1830. const opts: OutputOptions = {
  1831. format,
  1832. full: !!values.full,
  1833. limit: isAll ? 100000 : (values.n ? parseInt(String(values.n), 10) || defaultLimit : defaultLimit),
  1834. minScore: values["min-score"] ? parseFloat(String(values["min-score"])) || 0 : 0,
  1835. all: isAll,
  1836. collection: values.collection as string | undefined,
  1837. lineNumbers: !!values["line-numbers"],
  1838. };
  1839. return {
  1840. command: positionals[0] || "",
  1841. args: positionals.slice(1),
  1842. query: positionals.slice(1).join(" "),
  1843. opts,
  1844. values,
  1845. };
  1846. }
  1847. function showHelp(): void {
  1848. console.log("Usage:");
  1849. console.log(" qmd collection add [path] --name <name> --mask <pattern> - Create/index collection");
  1850. console.log(" qmd collection list - List all collections with details");
  1851. console.log(" qmd collection remove <name> - Remove a collection by name");
  1852. console.log(" qmd collection rename <old> <new> - Rename a collection");
  1853. console.log(" qmd ls [collection[/path]] - List collections or files in a collection");
  1854. console.log(" qmd context add [path] \"text\" - Add context for path (defaults to current dir)");
  1855. console.log(" qmd context list - List all contexts");
  1856. console.log(" qmd context rm <path> - Remove context");
  1857. console.log(" qmd get <file>[:line] [-l N] [--from N] - Get document (optionally from line, max N lines)");
  1858. console.log(" qmd multi-get <pattern> [-l N] [--max-bytes N] - Get multiple docs by glob or comma-separated list");
  1859. console.log(" qmd status - Show index status and collections");
  1860. console.log(" qmd update [--pull] - Re-index all collections (--pull: git pull first)");
  1861. console.log(" qmd embed [-f] - Create vector embeddings (900 tokens/chunk, 15% overlap)");
  1862. console.log(" qmd cleanup - Remove cache and orphaned data, vacuum DB");
  1863. console.log(" qmd query <query> - Search with query expansion + reranking (recommended)");
  1864. console.log(" qmd search <query> - Full-text keyword search (BM25, no LLM)");
  1865. console.log(" qmd vsearch <query> - Vector similarity search (no reranking)");
  1866. console.log(" qmd mcp - Start MCP server (stdio transport)");
  1867. console.log(" qmd mcp --http [--port N] - Start MCP server (HTTP transport, default port 8181)");
  1868. console.log(" qmd mcp --http --daemon - Start MCP server as background daemon");
  1869. console.log(" qmd mcp stop - Stop background MCP daemon");
  1870. console.log("");
  1871. console.log("Global options:");
  1872. console.log(" --index <name> - Use custom index name (default: index)");
  1873. console.log("");
  1874. console.log("Search options:");
  1875. console.log(" -n <num> - Number of results (default: 5, or 20 for --files)");
  1876. console.log(" --all - Return all matches (use with --min-score to filter)");
  1877. console.log(" --min-score <num> - Minimum similarity score");
  1878. console.log(" --full - Output full document instead of snippet");
  1879. console.log(" --line-numbers - Add line numbers to output");
  1880. console.log(" --files - Output docid,score,filepath,context (default: 20 results)");
  1881. console.log(" --json - JSON output with snippets (default: 20 results)");
  1882. console.log(" --csv - CSV output with snippets");
  1883. console.log(" --md - Markdown output");
  1884. console.log(" --xml - XML output");
  1885. console.log(" -c, --collection <name> - Filter results to a specific collection");
  1886. console.log("");
  1887. console.log("Multi-get options:");
  1888. console.log(" -l <num> - Maximum lines per file");
  1889. console.log(" --max-bytes <num> - Skip files larger than N bytes (default: 10240)");
  1890. console.log(" --json/--csv/--md/--xml/--files - Output format (same as search)");
  1891. console.log("");
  1892. console.log("Models (auto-downloaded from HuggingFace):");
  1893. console.log(" Embedding: embeddinggemma-300M-Q8_0");
  1894. console.log(" Reranking: qwen3-reranker-0.6b-q8_0");
  1895. console.log(" Generation: Qwen3-0.6B-Q8_0");
  1896. console.log("");
  1897. console.log(`Index: ${getDbPath()}`);
  1898. }
  1899. async function showVersion(): Promise<void> {
  1900. const scriptDir = import.meta.dir;
  1901. const pkgPath = resolve(scriptDir, "..", "package.json");
  1902. const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
  1903. let commit = "";
  1904. try {
  1905. const result = await $`git -C ${scriptDir} rev-parse --short HEAD`.quiet();
  1906. commit = result.text().trim();
  1907. } catch {
  1908. // Not a git repo or git not available
  1909. }
  1910. const versionStr = commit ? `${pkg.version} (${commit})` : pkg.version;
  1911. console.log(`qmd ${versionStr}`);
  1912. }
  1913. // Main CLI - only run if this is the main module
  1914. if (import.meta.main) {
  1915. const cli = parseCLI();
  1916. if (cli.values.version) {
  1917. await showVersion();
  1918. process.exit(0);
  1919. }
  1920. if (!cli.command || cli.values.help) {
  1921. showHelp();
  1922. process.exit(cli.values.help ? 0 : 1);
  1923. }
  1924. switch (cli.command) {
  1925. case "context": {
  1926. const subcommand = cli.args[0];
  1927. if (!subcommand) {
  1928. console.error("Usage: qmd context <add|list|check|rm>");
  1929. console.error("");
  1930. console.error("Commands:");
  1931. console.error(" qmd context add [path] \"text\" - Add context (defaults to current dir)");
  1932. console.error(" qmd context add / \"text\" - Add global context to all collections");
  1933. console.error(" qmd context list - List all contexts");
  1934. console.error(" qmd context check - Check for missing contexts");
  1935. console.error(" qmd context rm <path> - Remove context");
  1936. process.exit(1);
  1937. }
  1938. switch (subcommand) {
  1939. case "add": {
  1940. if (cli.args.length < 2) {
  1941. console.error("Usage: qmd context add [path] \"text\"");
  1942. console.error("");
  1943. console.error("Examples:");
  1944. console.error(" qmd context add \"Context for current directory\"");
  1945. console.error(" qmd context add . \"Context for current directory\"");
  1946. console.error(" qmd context add /subfolder \"Context for subfolder\"");
  1947. console.error(" qmd context add / \"Global context for all collections\"");
  1948. console.error("");
  1949. console.error(" Using virtual paths:");
  1950. console.error(" qmd context add qmd://journals/ \"Context for entire journals collection\"");
  1951. console.error(" qmd context add qmd://journals/2024 \"Context for 2024 journals\"");
  1952. process.exit(1);
  1953. }
  1954. let pathArg: string | undefined;
  1955. let contextText: string;
  1956. // Check if first arg looks like a path or if it's the context text
  1957. const firstArg = cli.args[1] || '';
  1958. const secondArg = cli.args[2];
  1959. if (secondArg) {
  1960. // Two args: path + context
  1961. pathArg = firstArg;
  1962. contextText = cli.args.slice(2).join(" ");
  1963. } else {
  1964. // One arg: context only (use current directory)
  1965. pathArg = undefined;
  1966. contextText = firstArg;
  1967. }
  1968. await contextAdd(pathArg, contextText);
  1969. break;
  1970. }
  1971. case "list": {
  1972. contextList();
  1973. break;
  1974. }
  1975. case "check": {
  1976. contextCheck();
  1977. break;
  1978. }
  1979. case "rm":
  1980. case "remove": {
  1981. if (cli.args.length < 2 || !cli.args[1]) {
  1982. console.error("Usage: qmd context rm <path>");
  1983. console.error("Examples:");
  1984. console.error(" qmd context rm /");
  1985. console.error(" qmd context rm qmd://journals/2024");
  1986. process.exit(1);
  1987. }
  1988. contextRemove(cli.args[1]);
  1989. break;
  1990. }
  1991. default:
  1992. console.error(`Unknown subcommand: ${subcommand}`);
  1993. console.error("Available: add, list, check, rm");
  1994. process.exit(1);
  1995. }
  1996. break;
  1997. }
  1998. case "get": {
  1999. if (!cli.args[0]) {
  2000. console.error("Usage: qmd get <filepath>[:line] [--from <line>] [-l <lines>] [--line-numbers]");
  2001. process.exit(1);
  2002. }
  2003. const fromLine = cli.values.from ? parseInt(cli.values.from as string, 10) : undefined;
  2004. const maxLines = cli.values.l ? parseInt(cli.values.l as string, 10) : undefined;
  2005. getDocument(cli.args[0], fromLine, maxLines, cli.opts.lineNumbers);
  2006. break;
  2007. }
  2008. case "multi-get": {
  2009. if (!cli.args[0]) {
  2010. console.error("Usage: qmd multi-get <pattern> [-l <lines>] [--max-bytes <bytes>] [--json|--csv|--md|--xml|--files]");
  2011. console.error(" pattern: glob (e.g., 'journals/2025-05*.md') or comma-separated list");
  2012. process.exit(1);
  2013. }
  2014. const maxLinesMulti = cli.values.l ? parseInt(cli.values.l as string, 10) : undefined;
  2015. const maxBytes = cli.values["max-bytes"] ? parseInt(cli.values["max-bytes"] as string, 10) : DEFAULT_MULTI_GET_MAX_BYTES;
  2016. multiGet(cli.args[0], maxLinesMulti, maxBytes, cli.opts.format);
  2017. break;
  2018. }
  2019. case "ls": {
  2020. listFiles(cli.args[0]);
  2021. break;
  2022. }
  2023. case "collection": {
  2024. const subcommand = cli.args[0];
  2025. switch (subcommand) {
  2026. case "list": {
  2027. collectionList();
  2028. break;
  2029. }
  2030. case "add": {
  2031. const pwd = cli.args[1] || getPwd();
  2032. const resolvedPwd = pwd === '.' ? getPwd() : getRealPath(resolve(pwd));
  2033. const globPattern = cli.values.mask as string || DEFAULT_GLOB;
  2034. const name = cli.values.name as string | undefined;
  2035. await collectionAdd(resolvedPwd, globPattern, name);
  2036. break;
  2037. }
  2038. case "remove":
  2039. case "rm": {
  2040. if (!cli.args[1]) {
  2041. console.error("Usage: qmd collection remove <name>");
  2042. console.error(" Use 'qmd collection list' to see available collections");
  2043. process.exit(1);
  2044. }
  2045. collectionRemove(cli.args[1]);
  2046. break;
  2047. }
  2048. case "rename":
  2049. case "mv": {
  2050. if (!cli.args[1] || !cli.args[2]) {
  2051. console.error("Usage: qmd collection rename <old-name> <new-name>");
  2052. console.error(" Use 'qmd collection list' to see available collections");
  2053. process.exit(1);
  2054. }
  2055. collectionRename(cli.args[1], cli.args[2]);
  2056. break;
  2057. }
  2058. default:
  2059. console.error(`Unknown subcommand: ${subcommand}`);
  2060. console.error("Available: list, add, remove, rename");
  2061. process.exit(1);
  2062. }
  2063. break;
  2064. }
  2065. case "status":
  2066. await showStatus();
  2067. break;
  2068. case "update":
  2069. await updateCollections();
  2070. break;
  2071. case "embed":
  2072. await vectorIndex(DEFAULT_EMBED_MODEL, !!cli.values.force);
  2073. break;
  2074. case "pull": {
  2075. const refresh = cli.values.refresh === undefined ? false : Boolean(cli.values.refresh);
  2076. const models = [
  2077. DEFAULT_EMBED_MODEL_URI,
  2078. DEFAULT_GENERATE_MODEL_URI,
  2079. DEFAULT_RERANK_MODEL_URI,
  2080. ];
  2081. console.log(`${c.bold}Pulling models${c.reset}`);
  2082. const results = await pullModels(models, {
  2083. refresh,
  2084. cacheDir: DEFAULT_MODEL_CACHE_DIR,
  2085. });
  2086. for (const result of results) {
  2087. const size = formatBytes(result.sizeBytes);
  2088. const note = result.refreshed ? "refreshed" : "cached/checked";
  2089. console.log(`- ${result.model} -> ${result.path} (${size}, ${note})`);
  2090. }
  2091. break;
  2092. }
  2093. case "search":
  2094. if (!cli.query) {
  2095. console.error("Usage: qmd search [options] <query>");
  2096. process.exit(1);
  2097. }
  2098. search(cli.query, cli.opts);
  2099. break;
  2100. case "vsearch":
  2101. case "vector-search": // undocumented alias
  2102. if (!cli.query) {
  2103. console.error("Usage: qmd vsearch [options] <query>");
  2104. process.exit(1);
  2105. }
  2106. // Default min-score for vector search is 0.3
  2107. if (!cli.values["min-score"]) {
  2108. cli.opts.minScore = 0.3;
  2109. }
  2110. await vectorSearch(cli.query, cli.opts);
  2111. break;
  2112. case "query":
  2113. case "deep-search": // undocumented alias
  2114. if (!cli.query) {
  2115. console.error("Usage: qmd query [options] <query>");
  2116. process.exit(1);
  2117. }
  2118. await querySearch(cli.query, cli.opts);
  2119. break;
  2120. case "mcp": {
  2121. const sub = cli.args[0]; // stop | status | undefined
  2122. // Cache dir for PID/log files — same dir as the index
  2123. const cacheDir = Bun.env.XDG_CACHE_HOME
  2124. ? resolve(Bun.env.XDG_CACHE_HOME, "qmd")
  2125. : resolve(homedir(), ".cache", "qmd");
  2126. const pidPath = resolve(cacheDir, "mcp.pid");
  2127. // Subcommands take priority over flags
  2128. if (sub === "stop") {
  2129. if (!existsSync(pidPath)) {
  2130. console.log("Not running (no PID file).");
  2131. process.exit(0);
  2132. }
  2133. const pid = parseInt(readFileSync(pidPath, "utf-8").trim());
  2134. try {
  2135. process.kill(pid, 0); // alive?
  2136. process.kill(pid, "SIGTERM");
  2137. unlinkSync(pidPath);
  2138. console.log(`Stopped QMD MCP server (PID ${pid}).`);
  2139. } catch {
  2140. unlinkSync(pidPath);
  2141. console.log("Cleaned up stale PID file (server was not running).");
  2142. }
  2143. process.exit(0);
  2144. }
  2145. if (cli.values.http) {
  2146. const port = Number(cli.values.port) || 8181;
  2147. if (cli.values.daemon) {
  2148. // Guard: check if already running
  2149. if (existsSync(pidPath)) {
  2150. const existingPid = parseInt(readFileSync(pidPath, "utf-8").trim());
  2151. try {
  2152. process.kill(existingPid, 0); // alive?
  2153. console.error(`Already running (PID ${existingPid}). Run 'qmd mcp stop' first.`);
  2154. process.exit(1);
  2155. } catch {
  2156. // Stale PID file — continue
  2157. }
  2158. }
  2159. mkdirSync(cacheDir, { recursive: true });
  2160. const logPath = resolve(cacheDir, "mcp.log");
  2161. const logFd = openSync(logPath, "w"); // truncate — fresh log per daemon run
  2162. const child = Bun.spawn([process.execPath, import.meta.path, "mcp", "--http", "--port", String(port)], {
  2163. stdout: logFd,
  2164. stderr: logFd,
  2165. stdin: "ignore",
  2166. });
  2167. child.unref();
  2168. closeSync(logFd); // parent's copy; child inherited the fd
  2169. writeFileSync(pidPath, String(child.pid));
  2170. console.log(`Started on http://localhost:${port}/mcp (PID ${child.pid})`);
  2171. console.log(`Logs: ${logPath}`);
  2172. process.exit(0);
  2173. }
  2174. // Foreground HTTP mode — remove top-level cursor handlers so the
  2175. // async cleanup handlers in startMcpHttpServer actually run.
  2176. process.removeAllListeners("SIGTERM");
  2177. process.removeAllListeners("SIGINT");
  2178. const { startMcpHttpServer } = await import("./mcp.js");
  2179. try {
  2180. await startMcpHttpServer(port);
  2181. } catch (e: any) {
  2182. if (e?.code === "EADDRINUSE") {
  2183. console.error(`Port ${port} already in use. Try a different port with --port.`);
  2184. process.exit(1);
  2185. }
  2186. throw e;
  2187. }
  2188. } else {
  2189. // Default: stdio transport
  2190. const { startMcpServer } = await import("./mcp.js");
  2191. await startMcpServer();
  2192. }
  2193. break;
  2194. }
  2195. case "cleanup": {
  2196. const db = getDb();
  2197. // 1. Clear llm_cache
  2198. const cacheCount = deleteLLMCache(db);
  2199. console.log(`${c.green}✓${c.reset} Cleared ${cacheCount} cached API responses`);
  2200. // 2. Remove orphaned vectors
  2201. const orphanedVecs = cleanupOrphanedVectors(db);
  2202. if (orphanedVecs > 0) {
  2203. console.log(`${c.green}✓${c.reset} Removed ${orphanedVecs} orphaned embedding chunks`);
  2204. } else {
  2205. console.log(`${c.dim}No orphaned embeddings to remove${c.reset}`);
  2206. }
  2207. // 3. Remove inactive documents
  2208. const inactiveDocs = deleteInactiveDocuments(db);
  2209. if (inactiveDocs > 0) {
  2210. console.log(`${c.green}✓${c.reset} Removed ${inactiveDocs} inactive document records`);
  2211. }
  2212. // 4. Vacuum to reclaim space
  2213. vacuumDatabase(db);
  2214. console.log(`${c.green}✓${c.reset} Database vacuumed`);
  2215. closeDb();
  2216. break;
  2217. }
  2218. default:
  2219. console.error(`Unknown command: ${cli.command}`);
  2220. console.error("Run 'qmd --help' for usage.");
  2221. process.exit(1);
  2222. }
  2223. if (cli.command !== "mcp") {
  2224. await disposeDefaultLlamaCpp();
  2225. process.exit(0);
  2226. }
  2227. } // end if (import.meta.main)