qmd.ts 104 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939
  1. import { openDatabase } from "./db.js";
  2. import type { Database } from "./db.js";
  3. import fastGlob from "fast-glob";
  4. import { execSync, spawn as nodeSpawn } from "child_process";
  5. import { fileURLToPath } from "url";
  6. import { dirname, join as pathJoin } from "path";
  7. import { parseArgs } from "util";
  8. import { readFileSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync } from "fs";
  9. import {
  10. getPwd,
  11. getRealPath,
  12. homedir,
  13. resolve,
  14. enableProductionMode,
  15. searchFTS,
  16. extractSnippet,
  17. getContextForFile,
  18. getContextForPath,
  19. listCollections,
  20. removeCollection,
  21. renameCollection,
  22. findSimilarFiles,
  23. findDocumentByDocid,
  24. isDocid,
  25. matchFilesByGlob,
  26. getHashesNeedingEmbedding,
  27. getHashesForEmbedding,
  28. clearAllEmbeddings,
  29. insertEmbedding,
  30. getStatus,
  31. hashContent,
  32. extractTitle,
  33. formatDocForEmbedding,
  34. chunkDocumentByTokens,
  35. clearCache,
  36. getCacheKey,
  37. getCachedResult,
  38. setCachedResult,
  39. getIndexHealth,
  40. parseVirtualPath,
  41. buildVirtualPath,
  42. isVirtualPath,
  43. resolveVirtualPath,
  44. toVirtualPath,
  45. insertContent,
  46. insertDocument,
  47. findActiveDocument,
  48. updateDocumentTitle,
  49. updateDocument,
  50. deactivateDocument,
  51. getActiveDocumentPaths,
  52. cleanupOrphanedContent,
  53. deleteLLMCache,
  54. deleteInactiveDocuments,
  55. cleanupOrphanedVectors,
  56. vacuumDatabase,
  57. getCollectionsWithoutContext,
  58. getTopLevelPathsWithoutContext,
  59. handelize,
  60. hybridQuery,
  61. vectorSearchQuery,
  62. structuredSearch,
  63. addLineNumbers,
  64. type ExpandedQuery,
  65. type StructuredSubSearch,
  66. DEFAULT_EMBED_MODEL,
  67. DEFAULT_RERANK_MODEL,
  68. DEFAULT_GLOB,
  69. DEFAULT_MULTI_GET_MAX_BYTES,
  70. createStore,
  71. getDefaultDbPath,
  72. } from "./store.js";
  73. import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "./llm.js";
  74. import {
  75. formatSearchResults,
  76. formatDocuments,
  77. escapeXml,
  78. escapeCSV,
  79. type OutputFormat,
  80. } from "./formatter.js";
  81. import {
  82. getCollection as getCollectionFromYaml,
  83. listCollections as yamlListCollections,
  84. getDefaultCollectionNames,
  85. addContext as yamlAddContext,
  86. removeContext as yamlRemoveContext,
  87. setGlobalContext,
  88. listAllContexts,
  89. setConfigIndexName,
  90. } from "./collections.js";
  91. // Enable production mode - allows using default database path
  92. // Tests must set INDEX_PATH or use createStore() with explicit path
  93. enableProductionMode();
  94. // =============================================================================
  95. // Store/DB lifecycle (no legacy singletons in store.ts)
  96. // =============================================================================
  97. let store: ReturnType<typeof createStore> | null = null;
  98. let storeDbPathOverride: string | undefined;
  99. function getStore(): ReturnType<typeof createStore> {
  100. if (!store) {
  101. store = createStore(storeDbPathOverride);
  102. }
  103. return store;
  104. }
  105. function getDb(): Database {
  106. return getStore().db;
  107. }
  108. function closeDb(): void {
  109. if (store) {
  110. store.close();
  111. store = null;
  112. }
  113. }
  114. function getDbPath(): string {
  115. return store?.dbPath ?? storeDbPathOverride ?? getDefaultDbPath();
  116. }
  117. function setIndexName(name: string | null): void {
  118. let normalizedName = name;
  119. // Normalize relative paths to prevent malformed database paths
  120. if (name && name.includes('/')) {
  121. const { resolve } = require('path');
  122. const { cwd } = require('process');
  123. const absolutePath = resolve(cwd(), name);
  124. // Replace path separators with underscores to create a valid filename
  125. normalizedName = absolutePath.replace(/\//g, '_').replace(/^_/, '');
  126. }
  127. storeDbPathOverride = normalizedName ? getDefaultDbPath(normalizedName) : undefined;
  128. // Reset open handle so next use opens the new index
  129. closeDb();
  130. }
  131. function ensureVecTable(_db: Database, dimensions: number): void {
  132. // Store owns the DB; ignore `_db` and ensure vec table on the active store
  133. getStore().ensureVecTable(dimensions);
  134. }
  135. // Terminal colors (respects NO_COLOR env)
  136. const useColor = !process.env.NO_COLOR && process.stdout.isTTY;
  137. const c = {
  138. reset: useColor ? "\x1b[0m" : "",
  139. dim: useColor ? "\x1b[2m" : "",
  140. bold: useColor ? "\x1b[1m" : "",
  141. cyan: useColor ? "\x1b[36m" : "",
  142. yellow: useColor ? "\x1b[33m" : "",
  143. green: useColor ? "\x1b[32m" : "",
  144. magenta: useColor ? "\x1b[35m" : "",
  145. blue: useColor ? "\x1b[34m" : "",
  146. };
  147. // Terminal cursor control
  148. const cursor = {
  149. hide() { process.stderr.write('\x1b[?25l'); },
  150. show() { process.stderr.write('\x1b[?25h'); },
  151. };
  152. // Ensure cursor is restored on exit
  153. process.on('SIGINT', () => { cursor.show(); process.exit(130); });
  154. process.on('SIGTERM', () => { cursor.show(); process.exit(143); });
  155. // Terminal progress bar using OSC 9;4 escape sequence (TTY only)
  156. const isTTY = process.stderr.isTTY;
  157. const progress = {
  158. set(percent: number) {
  159. if (isTTY) process.stderr.write(`\x1b]9;4;1;${Math.round(percent)}\x07`);
  160. },
  161. clear() {
  162. if (isTTY) process.stderr.write(`\x1b]9;4;0\x07`);
  163. },
  164. indeterminate() {
  165. if (isTTY) process.stderr.write(`\x1b]9;4;3\x07`);
  166. },
  167. error() {
  168. if (isTTY) process.stderr.write(`\x1b]9;4;2\x07`);
  169. },
  170. };
  171. // Format seconds into human-readable ETA
  172. function formatETA(seconds: number): string {
  173. if (seconds < 60) return `${Math.round(seconds)}s`;
  174. if (seconds < 3600) return `${Math.floor(seconds / 60)}m ${Math.round(seconds % 60)}s`;
  175. return `${Math.floor(seconds / 3600)}h ${Math.floor((seconds % 3600) / 60)}m`;
  176. }
  177. // Check index health and print warnings/tips
  178. function checkIndexHealth(db: Database): void {
  179. const { needsEmbedding, totalDocs, daysStale } = getIndexHealth(db);
  180. // Warn if many docs need embedding
  181. if (needsEmbedding > 0) {
  182. const pct = Math.round((needsEmbedding / totalDocs) * 100);
  183. if (pct >= 10) {
  184. process.stderr.write(`${c.yellow}Warning: ${needsEmbedding} documents (${pct}%) need embeddings. Run 'qmd embed' for better results.${c.reset}\n`);
  185. } else {
  186. process.stderr.write(`${c.dim}Tip: ${needsEmbedding} documents need embeddings. Run 'qmd embed' to index them.${c.reset}\n`);
  187. }
  188. }
  189. // Check if most recent document update is older than 2 weeks
  190. if (daysStale !== null && daysStale >= 14) {
  191. process.stderr.write(`${c.dim}Tip: Index last updated ${daysStale} days ago. Run 'qmd update' to refresh.${c.reset}\n`);
  192. }
  193. }
  194. // Compute unique display path for a document
  195. // Always include at least parent folder + filename, add more parent dirs until unique
  196. function computeDisplayPath(
  197. filepath: string,
  198. collectionPath: string,
  199. existingPaths: Set<string>
  200. ): string {
  201. // Get path relative to collection (include collection dir name)
  202. const collectionDir = collectionPath.replace(/\/$/, '');
  203. const collectionName = collectionDir.split('/').pop() || '';
  204. let relativePath: string;
  205. if (filepath.startsWith(collectionDir + '/')) {
  206. // filepath is under collection: use collection name + relative path
  207. relativePath = collectionName + filepath.slice(collectionDir.length);
  208. } else {
  209. // Fallback: just use the filepath
  210. relativePath = filepath;
  211. }
  212. const parts = relativePath.split('/').filter(p => p.length > 0);
  213. // Always include at least parent folder + filename (minimum 2 parts if available)
  214. // Then add more parent dirs until unique
  215. const minParts = Math.min(2, parts.length);
  216. for (let i = parts.length - minParts; i >= 0; i--) {
  217. const candidate = parts.slice(i).join('/');
  218. if (!existingPaths.has(candidate)) {
  219. return candidate;
  220. }
  221. }
  222. // Absolute fallback: use full path (should be unique)
  223. return filepath;
  224. }
  225. function formatTimeAgo(date: Date): string {
  226. const seconds = Math.floor((Date.now() - date.getTime()) / 1000);
  227. if (seconds < 60) return `${seconds}s ago`;
  228. const minutes = Math.floor(seconds / 60);
  229. if (minutes < 60) return `${minutes}m ago`;
  230. const hours = Math.floor(minutes / 60);
  231. if (hours < 24) return `${hours}h ago`;
  232. const days = Math.floor(hours / 24);
  233. return `${days}d ago`;
  234. }
  235. function formatMs(ms: number): string {
  236. if (ms < 1000) return `${ms}ms`;
  237. return `${(ms / 1000).toFixed(1)}s`;
  238. }
  239. function formatBytes(bytes: number): string {
  240. if (bytes < 1024) return `${bytes} B`;
  241. if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
  242. if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
  243. return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
  244. }
  245. async function showStatus(): Promise<void> {
  246. const dbPath = getDbPath();
  247. const db = getDb();
  248. // Collections are defined in YAML; no duplicate cleanup needed.
  249. // Collections are defined in YAML; no duplicate cleanup needed.
  250. // Index size
  251. let indexSize = 0;
  252. try {
  253. const stat = statSync(dbPath).size;
  254. indexSize = stat;
  255. } catch { }
  256. // Collections info (from YAML + database stats)
  257. const collections = listCollections(db);
  258. // Overall stats
  259. const totalDocs = db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get() as { count: number };
  260. const vectorCount = db.prepare(`SELECT COUNT(*) as count FROM content_vectors`).get() as { count: number };
  261. const needsEmbedding = getHashesNeedingEmbedding(db);
  262. // Most recent update across all collections
  263. const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get() as { latest: string | null };
  264. console.log(`${c.bold}QMD Status${c.reset}\n`);
  265. console.log(`Index: ${dbPath}`);
  266. console.log(`Size: ${formatBytes(indexSize)}`);
  267. // MCP daemon status (check PID file liveness)
  268. const mcpCacheDir = process.env.XDG_CACHE_HOME
  269. ? resolve(process.env.XDG_CACHE_HOME, "qmd")
  270. : resolve(homedir(), ".cache", "qmd");
  271. const mcpPidPath = resolve(mcpCacheDir, "mcp.pid");
  272. if (existsSync(mcpPidPath)) {
  273. const mcpPid = parseInt(readFileSync(mcpPidPath, "utf-8").trim());
  274. try {
  275. process.kill(mcpPid, 0);
  276. console.log(`MCP: ${c.green}running${c.reset} (PID ${mcpPid})`);
  277. } catch {
  278. unlinkSync(mcpPidPath);
  279. // Stale PID file cleaned up silently
  280. }
  281. }
  282. console.log("");
  283. console.log(`${c.bold}Documents${c.reset}`);
  284. console.log(` Total: ${totalDocs.count} files indexed`);
  285. console.log(` Vectors: ${vectorCount.count} embedded`);
  286. if (needsEmbedding > 0) {
  287. console.log(` ${c.yellow}Pending: ${needsEmbedding} need embedding${c.reset} (run 'qmd embed')`);
  288. }
  289. if (mostRecent.latest) {
  290. const lastUpdate = new Date(mostRecent.latest);
  291. console.log(` Updated: ${formatTimeAgo(lastUpdate)}`);
  292. }
  293. // Get all contexts grouped by collection (from YAML)
  294. const allContexts = listAllContexts();
  295. const contextsByCollection = new Map<string, { path_prefix: string; context: string }[]>();
  296. for (const ctx of allContexts) {
  297. // Group contexts by collection name
  298. if (!contextsByCollection.has(ctx.collection)) {
  299. contextsByCollection.set(ctx.collection, []);
  300. }
  301. contextsByCollection.get(ctx.collection)!.push({
  302. path_prefix: ctx.path,
  303. context: ctx.context
  304. });
  305. }
  306. if (collections.length > 0) {
  307. console.log(`\n${c.bold}Collections${c.reset}`);
  308. for (const col of collections) {
  309. const lastMod = col.last_modified ? formatTimeAgo(new Date(col.last_modified)) : "never";
  310. const contexts = contextsByCollection.get(col.name) || [];
  311. console.log(` ${c.cyan}${col.name}${c.reset} ${c.dim}(qmd://${col.name}/)${c.reset}`);
  312. console.log(` ${c.dim}Pattern:${c.reset} ${col.glob_pattern}`);
  313. console.log(` ${c.dim}Files:${c.reset} ${col.active_count} (updated ${lastMod})`);
  314. if (contexts.length > 0) {
  315. console.log(` ${c.dim}Contexts:${c.reset} ${contexts.length}`);
  316. for (const ctx of contexts) {
  317. // Handle both empty string and '/' as root context
  318. const pathDisplay = (ctx.path_prefix === '' || ctx.path_prefix === '/') ? '/' : `/${ctx.path_prefix}`;
  319. const contextPreview = ctx.context.length > 60
  320. ? ctx.context.substring(0, 57) + '...'
  321. : ctx.context;
  322. console.log(` ${c.dim}${pathDisplay}:${c.reset} ${contextPreview}`);
  323. }
  324. }
  325. }
  326. // Show examples of virtual paths
  327. console.log(`\n${c.bold}Examples${c.reset}`);
  328. console.log(` ${c.dim}# List files in a collection${c.reset}`);
  329. if (collections.length > 0 && collections[0]) {
  330. console.log(` qmd ls ${collections[0].name}`);
  331. }
  332. console.log(` ${c.dim}# Get a document${c.reset}`);
  333. if (collections.length > 0 && collections[0]) {
  334. console.log(` qmd get qmd://${collections[0].name}/path/to/file.md`);
  335. }
  336. console.log(` ${c.dim}# Search within a collection${c.reset}`);
  337. if (collections.length > 0 && collections[0]) {
  338. console.log(` qmd search "query" -c ${collections[0].name}`);
  339. }
  340. } else {
  341. console.log(`\n${c.dim}No collections. Run 'qmd collection add .' to index markdown files.${c.reset}`);
  342. }
  343. // Models
  344. {
  345. // hf:org/repo/file.gguf → https://huggingface.co/org/repo
  346. const hfLink = (uri: string) => {
  347. const match = uri.match(/^hf:([^/]+\/[^/]+)\//);
  348. return match ? `https://huggingface.co/${match[1]}` : uri;
  349. };
  350. console.log(`\n${c.bold}Models${c.reset}`);
  351. console.log(` Embedding: ${hfLink(DEFAULT_EMBED_MODEL_URI)}`);
  352. console.log(` Reranking: ${hfLink(DEFAULT_RERANK_MODEL_URI)}`);
  353. console.log(` Generation: ${hfLink(DEFAULT_GENERATE_MODEL_URI)}`);
  354. }
  355. // Device / GPU info
  356. try {
  357. const llm = getDefaultLlamaCpp();
  358. const device = await llm.getDeviceInfo();
  359. console.log(`\n${c.bold}Device${c.reset}`);
  360. if (device.gpu) {
  361. console.log(` GPU: ${c.green}${device.gpu}${c.reset} (offloading: ${device.gpuOffloading ? 'yes' : 'no'})`);
  362. if (device.gpuDevices.length > 0) {
  363. // Deduplicate and count GPUs
  364. const counts = new Map<string, number>();
  365. for (const name of device.gpuDevices) {
  366. counts.set(name, (counts.get(name) || 0) + 1);
  367. }
  368. const deviceStr = Array.from(counts.entries())
  369. .map(([name, count]) => count > 1 ? `${count}× ${name}` : name)
  370. .join(', ');
  371. console.log(` Devices: ${deviceStr}`);
  372. }
  373. if (device.vram) {
  374. console.log(` VRAM: ${formatBytes(device.vram.free)} free / ${formatBytes(device.vram.total)} total`);
  375. }
  376. } else {
  377. console.log(` GPU: ${c.yellow}none${c.reset} (running on CPU — models will be slow)`);
  378. console.log(` ${c.dim}Tip: Install CUDA, Vulkan, or Metal support for GPU acceleration.${c.reset}`);
  379. }
  380. console.log(` CPU: ${device.cpuCores} math cores`);
  381. } catch {
  382. // Don't fail status if LLM init fails
  383. }
  384. // Tips section
  385. const tips: string[] = [];
  386. // Check for collections without context
  387. const collectionsWithoutContext = collections.filter(col => {
  388. const contexts = contextsByCollection.get(col.name) || [];
  389. return contexts.length === 0;
  390. });
  391. if (collectionsWithoutContext.length > 0) {
  392. const names = collectionsWithoutContext.map(c => c.name).slice(0, 3).join(', ');
  393. const more = collectionsWithoutContext.length > 3 ? ` +${collectionsWithoutContext.length - 3} more` : '';
  394. tips.push(`Add context to collections for better search results: ${names}${more}`);
  395. tips.push(` ${c.dim}qmd context add qmd://<name>/ "What this collection contains"${c.reset}`);
  396. tips.push(` ${c.dim}qmd context add qmd://<name>/meeting-notes "Weekly team meeting notes"${c.reset}`);
  397. }
  398. // Check for collections without update commands
  399. const collectionsWithoutUpdate = collections.filter(col => {
  400. const yamlCol = getCollectionFromYaml(col.name);
  401. return !yamlCol?.update;
  402. });
  403. if (collectionsWithoutUpdate.length > 0 && collections.length > 1) {
  404. const names = collectionsWithoutUpdate.map(c => c.name).slice(0, 3).join(', ');
  405. const more = collectionsWithoutUpdate.length > 3 ? ` +${collectionsWithoutUpdate.length - 3} more` : '';
  406. tips.push(`Add update commands to keep collections fresh: ${names}${more}`);
  407. tips.push(` ${c.dim}qmd collection update-cmd <name> 'git stash && git pull --rebase --ff-only && git stash pop'${c.reset}`);
  408. }
  409. if (tips.length > 0) {
  410. console.log(`\n${c.bold}Tips${c.reset}`);
  411. for (const tip of tips) {
  412. console.log(` ${tip}`);
  413. }
  414. }
  415. closeDb();
  416. }
  417. async function updateCollections(): Promise<void> {
  418. const db = getDb();
  419. // Collections are defined in YAML; no duplicate cleanup needed.
  420. // Clear Ollama cache on update
  421. clearCache(db);
  422. const collections = listCollections(db);
  423. if (collections.length === 0) {
  424. console.log(`${c.dim}No collections found. Run 'qmd collection add .' to index markdown files.${c.reset}`);
  425. closeDb();
  426. return;
  427. }
  428. // Don't close db here - indexFiles will reuse it and close at the end
  429. console.log(`${c.bold}Updating ${collections.length} collection(s)...${c.reset}\n`);
  430. for (let i = 0; i < collections.length; i++) {
  431. const col = collections[i];
  432. if (!col) continue;
  433. console.log(`${c.cyan}[${i + 1}/${collections.length}]${c.reset} ${c.bold}${col.name}${c.reset} ${c.dim}(${col.glob_pattern})${c.reset}`);
  434. // Execute custom update command if specified in YAML
  435. const yamlCol = getCollectionFromYaml(col.name);
  436. if (yamlCol?.update) {
  437. console.log(`${c.dim} Running update command: ${yamlCol.update}${c.reset}`);
  438. try {
  439. const proc = nodeSpawn("bash", ["-c", yamlCol.update], {
  440. cwd: col.pwd,
  441. stdio: ["ignore", "pipe", "pipe"],
  442. });
  443. const [output, errorOutput, exitCode] = await new Promise<[string, string, number]>((resolve, reject) => {
  444. let out = "";
  445. let err = "";
  446. proc.stdout?.on("data", (d: Buffer) => { out += d.toString(); });
  447. proc.stderr?.on("data", (d: Buffer) => { err += d.toString(); });
  448. proc.on("error", reject);
  449. proc.on("close", (code) => resolve([out, err, code ?? 1]));
  450. });
  451. if (output.trim()) {
  452. console.log(output.trim().split('\n').map(l => ` ${l}`).join('\n'));
  453. }
  454. if (errorOutput.trim()) {
  455. console.log(errorOutput.trim().split('\n').map(l => ` ${l}`).join('\n'));
  456. }
  457. if (exitCode !== 0) {
  458. console.log(`${c.yellow}✗ Update command failed with exit code ${exitCode}${c.reset}`);
  459. process.exit(exitCode);
  460. }
  461. } catch (err) {
  462. console.log(`${c.yellow}✗ Update command failed: ${err}${c.reset}`);
  463. process.exit(1);
  464. }
  465. }
  466. await indexFiles(col.pwd, col.glob_pattern, col.name, true, yamlCol?.ignore);
  467. console.log("");
  468. }
  469. // Check if any documents need embedding (show once at end)
  470. const finalDb = getDb();
  471. const needsEmbedding = getHashesNeedingEmbedding(finalDb);
  472. closeDb();
  473. console.log(`${c.green}✓ All collections updated.${c.reset}`);
  474. if (needsEmbedding > 0) {
  475. console.log(`\nRun 'qmd embed' to update embeddings (${needsEmbedding} unique hashes need vectors)`);
  476. }
  477. }
  478. /**
  479. * Detect which collection (if any) contains the given filesystem path.
  480. * Returns { collectionId, collectionName, relativePath } or null if not in any collection.
  481. */
  482. function detectCollectionFromPath(db: Database, fsPath: string): { collectionName: string; relativePath: string } | null {
  483. const realPath = getRealPath(fsPath);
  484. // Find collections that this path is under from YAML
  485. const allCollections = yamlListCollections();
  486. // Find longest matching path
  487. let bestMatch: { name: string; path: string } | null = null;
  488. for (const coll of allCollections) {
  489. if (realPath.startsWith(coll.path + '/') || realPath === coll.path) {
  490. if (!bestMatch || coll.path.length > bestMatch.path.length) {
  491. bestMatch = { name: coll.name, path: coll.path };
  492. }
  493. }
  494. }
  495. if (!bestMatch) return null;
  496. // Calculate relative path
  497. let relativePath = realPath;
  498. if (relativePath.startsWith(bestMatch.path + '/')) {
  499. relativePath = relativePath.slice(bestMatch.path.length + 1);
  500. } else if (relativePath === bestMatch.path) {
  501. relativePath = '';
  502. }
  503. return {
  504. collectionName: bestMatch.name,
  505. relativePath
  506. };
  507. }
  508. async function contextAdd(pathArg: string | undefined, contextText: string): Promise<void> {
  509. const db = getDb();
  510. // Handle "/" as global context (applies to all collections)
  511. if (pathArg === '/') {
  512. setGlobalContext(contextText);
  513. console.log(`${c.green}✓${c.reset} Set global context`);
  514. console.log(`${c.dim}Context: ${contextText}${c.reset}`);
  515. closeDb();
  516. return;
  517. }
  518. // Resolve path - defaults to current directory if not provided
  519. let fsPath = pathArg || '.';
  520. if (fsPath === '.' || fsPath === './') {
  521. fsPath = getPwd();
  522. } else if (fsPath.startsWith('~/')) {
  523. fsPath = homedir() + fsPath.slice(1);
  524. } else if (!fsPath.startsWith('/') && !fsPath.startsWith('qmd://')) {
  525. fsPath = resolve(getPwd(), fsPath);
  526. }
  527. // Handle virtual paths (qmd://collection/path)
  528. if (isVirtualPath(fsPath)) {
  529. const parsed = parseVirtualPath(fsPath);
  530. if (!parsed) {
  531. console.error(`${c.yellow}Invalid virtual path: ${fsPath}${c.reset}`);
  532. process.exit(1);
  533. }
  534. const coll = getCollectionFromYaml(parsed.collectionName);
  535. if (!coll) {
  536. console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`);
  537. process.exit(1);
  538. }
  539. yamlAddContext(parsed.collectionName, parsed.path, contextText);
  540. const displayPath = parsed.path
  541. ? `qmd://${parsed.collectionName}/${parsed.path}`
  542. : `qmd://${parsed.collectionName}/ (collection root)`;
  543. console.log(`${c.green}✓${c.reset} Added context for: ${displayPath}`);
  544. console.log(`${c.dim}Context: ${contextText}${c.reset}`);
  545. closeDb();
  546. return;
  547. }
  548. // Detect collection from filesystem path
  549. const detected = detectCollectionFromPath(db, fsPath);
  550. if (!detected) {
  551. console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`);
  552. console.error(`${c.dim}Run 'qmd status' to see indexed collections${c.reset}`);
  553. process.exit(1);
  554. }
  555. yamlAddContext(detected.collectionName, detected.relativePath, contextText);
  556. const displayPath = detected.relativePath ? `qmd://${detected.collectionName}/${detected.relativePath}` : `qmd://${detected.collectionName}/`;
  557. console.log(`${c.green}✓${c.reset} Added context for: ${displayPath}`);
  558. console.log(`${c.dim}Context: ${contextText}${c.reset}`);
  559. closeDb();
  560. }
  561. function contextList(): void {
  562. const db = getDb();
  563. const allContexts = listAllContexts();
  564. if (allContexts.length === 0) {
  565. console.log(`${c.dim}No contexts configured. Use 'qmd context add' to add one.${c.reset}`);
  566. closeDb();
  567. return;
  568. }
  569. console.log(`\n${c.bold}Configured Contexts${c.reset}\n`);
  570. let lastCollection = '';
  571. for (const ctx of allContexts) {
  572. if (ctx.collection !== lastCollection) {
  573. console.log(`${c.cyan}${ctx.collection}${c.reset}`);
  574. lastCollection = ctx.collection;
  575. }
  576. const displayPath = ctx.path ? ` ${ctx.path}` : ' / (root)';
  577. console.log(`${displayPath}`);
  578. console.log(` ${c.dim}${ctx.context}${c.reset}`);
  579. }
  580. closeDb();
  581. }
  582. function contextRemove(pathArg: string): void {
  583. if (pathArg === '/') {
  584. // Remove global context
  585. setGlobalContext(undefined);
  586. console.log(`${c.green}✓${c.reset} Removed global context`);
  587. return;
  588. }
  589. // Handle virtual paths
  590. if (isVirtualPath(pathArg)) {
  591. const parsed = parseVirtualPath(pathArg);
  592. if (!parsed) {
  593. console.error(`${c.yellow}Invalid virtual path: ${pathArg}${c.reset}`);
  594. process.exit(1);
  595. }
  596. const coll = getCollectionFromYaml(parsed.collectionName);
  597. if (!coll) {
  598. console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`);
  599. process.exit(1);
  600. }
  601. const success = yamlRemoveContext(coll.name, parsed.path);
  602. if (!success) {
  603. console.error(`${c.yellow}No context found for: ${pathArg}${c.reset}`);
  604. process.exit(1);
  605. }
  606. console.log(`${c.green}✓${c.reset} Removed context for: ${pathArg}`);
  607. return;
  608. }
  609. // Handle filesystem paths
  610. let fsPath = pathArg;
  611. if (fsPath === '.' || fsPath === './') {
  612. fsPath = getPwd();
  613. } else if (fsPath.startsWith('~/')) {
  614. fsPath = homedir() + fsPath.slice(1);
  615. } else if (!fsPath.startsWith('/')) {
  616. fsPath = resolve(getPwd(), fsPath);
  617. }
  618. const db = getDb();
  619. const detected = detectCollectionFromPath(db, fsPath);
  620. closeDb();
  621. if (!detected) {
  622. console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`);
  623. process.exit(1);
  624. }
  625. const success = yamlRemoveContext(detected.collectionName, detected.relativePath);
  626. if (!success) {
  627. console.error(`${c.yellow}No context found for: qmd://${detected.collectionName}/${detected.relativePath}${c.reset}`);
  628. process.exit(1);
  629. }
  630. console.log(`${c.green}✓${c.reset} Removed context for: qmd://${detected.collectionName}/${detected.relativePath}`);
  631. }
  632. function getDocument(filename: string, fromLine?: number, maxLines?: number, lineNumbers?: boolean): void {
  633. const db = getDb();
  634. // Parse :linenum suffix from filename (e.g., "file.md:100")
  635. let inputPath = filename;
  636. const colonMatch = inputPath.match(/:(\d+)$/);
  637. if (colonMatch && !fromLine) {
  638. const matched = colonMatch[1];
  639. if (matched) {
  640. fromLine = parseInt(matched, 10);
  641. inputPath = inputPath.slice(0, -colonMatch[0].length);
  642. }
  643. }
  644. // Handle docid lookup (#abc123, abc123, "#abc123", "abc123", etc.)
  645. if (isDocid(inputPath)) {
  646. const docidMatch = findDocumentByDocid(db, inputPath);
  647. if (docidMatch) {
  648. inputPath = docidMatch.filepath;
  649. } else {
  650. console.error(`Document not found: ${filename}`);
  651. closeDb();
  652. process.exit(1);
  653. }
  654. }
  655. let doc: { collectionName: string; path: string; body: string } | null = null;
  656. let virtualPath: string;
  657. // Handle virtual paths (qmd://collection/path)
  658. if (isVirtualPath(inputPath)) {
  659. const parsed = parseVirtualPath(inputPath);
  660. if (!parsed) {
  661. console.error(`Invalid virtual path: ${inputPath}`);
  662. closeDb();
  663. process.exit(1);
  664. }
  665. // Try exact match on collection + path
  666. doc = db.prepare(`
  667. SELECT d.collection as collectionName, d.path, content.doc as body
  668. FROM documents d
  669. JOIN content ON content.hash = d.hash
  670. WHERE d.collection = ? AND d.path = ? AND d.active = 1
  671. `).get(parsed.collectionName, parsed.path) as typeof doc;
  672. if (!doc) {
  673. // Try fuzzy match by path ending
  674. doc = db.prepare(`
  675. SELECT d.collection as collectionName, d.path, content.doc as body
  676. FROM documents d
  677. JOIN content ON content.hash = d.hash
  678. WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
  679. LIMIT 1
  680. `).get(parsed.collectionName, `%${parsed.path}`) as typeof doc;
  681. }
  682. virtualPath = inputPath;
  683. } else {
  684. // Try to interpret as collection/path format first (before filesystem path)
  685. // If path is relative (no / or ~ prefix), check if first component is a collection name
  686. if (!inputPath.startsWith('/') && !inputPath.startsWith('~')) {
  687. const parts = inputPath.split('/');
  688. if (parts.length >= 2) {
  689. const possibleCollection = parts[0];
  690. const possiblePath = parts.slice(1).join('/');
  691. // Check if this collection exists
  692. const collExists = possibleCollection ? db.prepare(`
  693. SELECT 1 FROM documents WHERE collection = ? AND active = 1 LIMIT 1
  694. `).get(possibleCollection) : null;
  695. if (collExists) {
  696. // Try exact match on collection + path
  697. doc = db.prepare(`
  698. SELECT d.collection as collectionName, d.path, content.doc as body
  699. FROM documents d
  700. JOIN content ON content.hash = d.hash
  701. WHERE d.collection = ? AND d.path = ? AND d.active = 1
  702. `).get(possibleCollection || "", possiblePath || "") as { collectionName: string; path: string; body: string } | null;
  703. if (!doc) {
  704. // Try fuzzy match by path ending
  705. doc = db.prepare(`
  706. SELECT d.collection as collectionName, d.path, content.doc as body
  707. FROM documents d
  708. JOIN content ON content.hash = d.hash
  709. WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
  710. LIMIT 1
  711. `).get(possibleCollection || "", `%${possiblePath}`) as { collectionName: string; path: string; body: string } | null;
  712. }
  713. if (doc) {
  714. virtualPath = buildVirtualPath(doc.collectionName, doc.path);
  715. // Skip the filesystem path handling below
  716. }
  717. }
  718. }
  719. }
  720. // If not found as collection/path, handle as filesystem paths
  721. if (!doc) {
  722. let fsPath = inputPath;
  723. // Expand ~ to home directory
  724. if (fsPath.startsWith('~/')) {
  725. fsPath = homedir() + fsPath.slice(1);
  726. } else if (!fsPath.startsWith('/')) {
  727. // Relative path - resolve from current directory
  728. fsPath = resolve(getPwd(), fsPath);
  729. }
  730. fsPath = getRealPath(fsPath);
  731. // Try to detect which collection contains this path
  732. const detected = detectCollectionFromPath(db, fsPath);
  733. if (detected) {
  734. // Found collection - query by collection name + relative path
  735. doc = db.prepare(`
  736. SELECT d.collection as collectionName, d.path, content.doc as body
  737. FROM documents d
  738. JOIN content ON content.hash = d.hash
  739. WHERE d.collection = ? AND d.path = ? AND d.active = 1
  740. `).get(detected.collectionName, detected.relativePath) as { collectionName: string; path: string; body: string } | null;
  741. }
  742. // Fuzzy match by filename (last component of path)
  743. if (!doc) {
  744. const filename = inputPath.split('/').pop() || inputPath;
  745. doc = db.prepare(`
  746. SELECT d.collection as collectionName, d.path, content.doc as body
  747. FROM documents d
  748. JOIN content ON content.hash = d.hash
  749. WHERE d.path LIKE ? AND d.active = 1
  750. LIMIT 1
  751. `).get(`%${filename}`) as { collectionName: string; path: string; body: string } | null;
  752. }
  753. if (doc) {
  754. virtualPath = buildVirtualPath(doc.collectionName, doc.path);
  755. } else {
  756. virtualPath = inputPath;
  757. }
  758. }
  759. }
  760. // Ensure doc is not null before proceeding
  761. if (!doc) {
  762. console.error(`Document not found: ${filename}`);
  763. closeDb();
  764. process.exit(1);
  765. }
  766. // Get context for this file
  767. const context = getContextForPath(db, doc.collectionName, doc.path);
  768. let output = doc.body;
  769. const startLine = fromLine || 1;
  770. // Apply line filtering if specified
  771. if (fromLine !== undefined || maxLines !== undefined) {
  772. const lines = output.split('\n');
  773. const start = startLine - 1; // Convert to 0-indexed
  774. const end = maxLines !== undefined ? start + maxLines : lines.length;
  775. output = lines.slice(start, end).join('\n');
  776. }
  777. // Add line numbers if requested
  778. if (lineNumbers) {
  779. output = addLineNumbers(output, startLine);
  780. }
  781. // Output context header if exists
  782. if (context) {
  783. console.log(`Folder Context: ${context}\n---\n`);
  784. }
  785. console.log(output);
  786. closeDb();
  787. }
  788. // Multi-get: fetch multiple documents by glob pattern or comma-separated list
  789. function multiGet(pattern: string, maxLines?: number, maxBytes: number = DEFAULT_MULTI_GET_MAX_BYTES, format: OutputFormat = "cli"): void {
  790. const db = getDb();
  791. // Check if it's a comma-separated list or a glob pattern
  792. const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?');
  793. let files: { filepath: string; displayPath: string; bodyLength: number; collection?: string; path?: string }[];
  794. if (isCommaSeparated) {
  795. // Comma-separated list of files (can be virtual paths or relative paths)
  796. const names = pattern.split(',').map(s => s.trim()).filter(Boolean);
  797. files = [];
  798. for (const name of names) {
  799. let doc: { virtual_path: string; body_length: number; collection: string; path: string } | null = null;
  800. // Handle virtual paths
  801. if (isVirtualPath(name)) {
  802. const parsed = parseVirtualPath(name);
  803. if (parsed) {
  804. // Try exact match on collection + path
  805. doc = db.prepare(`
  806. SELECT
  807. 'qmd://' || d.collection || '/' || d.path as virtual_path,
  808. LENGTH(content.doc) as body_length,
  809. d.collection,
  810. d.path
  811. FROM documents d
  812. JOIN content ON content.hash = d.hash
  813. WHERE d.collection = ? AND d.path = ? AND d.active = 1
  814. `).get(parsed.collectionName, parsed.path) as typeof doc;
  815. }
  816. } else {
  817. // Try exact match on path
  818. doc = db.prepare(`
  819. SELECT
  820. 'qmd://' || d.collection || '/' || d.path as virtual_path,
  821. LENGTH(content.doc) as body_length,
  822. d.collection,
  823. d.path
  824. FROM documents d
  825. JOIN content ON content.hash = d.hash
  826. WHERE d.path = ? AND d.active = 1
  827. LIMIT 1
  828. `).get(name) as { virtual_path: string; body_length: number; collection: string; path: string } | null;
  829. // Try suffix match
  830. if (!doc) {
  831. doc = db.prepare(`
  832. SELECT
  833. 'qmd://' || d.collection || '/' || d.path as virtual_path,
  834. LENGTH(content.doc) as body_length,
  835. d.collection,
  836. d.path
  837. FROM documents d
  838. JOIN content ON content.hash = d.hash
  839. WHERE d.path LIKE ? AND d.active = 1
  840. LIMIT 1
  841. `).get(`%${name}`) as { virtual_path: string; body_length: number; collection: string; path: string } | null;
  842. }
  843. }
  844. if (doc) {
  845. files.push({
  846. filepath: doc.virtual_path,
  847. displayPath: doc.virtual_path,
  848. bodyLength: doc.body_length,
  849. collection: doc.collection,
  850. path: doc.path
  851. });
  852. } else {
  853. console.error(`File not found: ${name}`);
  854. }
  855. }
  856. } else {
  857. // Glob pattern - matchFilesByGlob now returns virtual paths
  858. files = matchFilesByGlob(db, pattern).map(f => ({
  859. ...f,
  860. collection: undefined, // Will be fetched later if needed
  861. path: undefined
  862. }));
  863. if (files.length === 0) {
  864. console.error(`No files matched pattern: ${pattern}`);
  865. closeDb();
  866. process.exit(1);
  867. }
  868. }
  869. // Collect results for structured output
  870. const results: { file: string; displayPath: string; title: string; body: string; context: string | null; skipped: boolean; skipReason?: string }[] = [];
  871. for (const file of files) {
  872. // Parse virtual path to get collection info if not already available
  873. let collection = file.collection;
  874. let path = file.path;
  875. if (!collection || !path) {
  876. const parsed = parseVirtualPath(file.filepath);
  877. if (parsed) {
  878. collection = parsed.collectionName;
  879. path = parsed.path;
  880. }
  881. }
  882. // Get context using collection-scoped function
  883. const context = collection && path ? getContextForPath(db, collection, path) : null;
  884. // Check size limit
  885. if (file.bodyLength > maxBytes) {
  886. results.push({
  887. file: file.filepath,
  888. displayPath: file.displayPath,
  889. title: file.displayPath.split('/').pop() || file.displayPath,
  890. body: "",
  891. context,
  892. skipped: true,
  893. skipReason: `File too large (${Math.round(file.bodyLength / 1024)}KB > ${Math.round(maxBytes / 1024)}KB). Use 'qmd get ${file.displayPath}' to retrieve.`,
  894. });
  895. continue;
  896. }
  897. // Fetch document content using collection and path
  898. if (!collection || !path) continue;
  899. const doc = db.prepare(`
  900. SELECT content.doc as body, d.title
  901. FROM documents d
  902. JOIN content ON content.hash = d.hash
  903. WHERE d.collection = ? AND d.path = ? AND d.active = 1
  904. `).get(collection, path) as { body: string; title: string } | null;
  905. if (!doc) continue;
  906. let body = doc.body;
  907. // Apply line limit if specified
  908. if (maxLines !== undefined) {
  909. const lines = body.split('\n');
  910. body = lines.slice(0, maxLines).join('\n');
  911. if (lines.length > maxLines) {
  912. body += `\n\n[... truncated ${lines.length - maxLines} more lines]`;
  913. }
  914. }
  915. results.push({
  916. file: file.filepath,
  917. displayPath: file.displayPath,
  918. title: doc.title || file.displayPath.split('/').pop() || file.displayPath,
  919. body,
  920. context,
  921. skipped: false,
  922. });
  923. }
  924. closeDb();
  925. // Output based on format
  926. if (format === "json") {
  927. const output = results.map(r => ({
  928. file: r.displayPath,
  929. title: r.title,
  930. ...(r.context && { context: r.context }),
  931. ...(r.skipped ? { skipped: true, reason: r.skipReason } : { body: r.body }),
  932. }));
  933. console.log(JSON.stringify(output, null, 2));
  934. } else if (format === "csv") {
  935. const escapeField = (val: string | null | undefined): string => {
  936. if (val === null || val === undefined) return "";
  937. const str = String(val);
  938. if (str.includes(",") || str.includes('"') || str.includes("\n")) {
  939. return `"${str.replace(/"/g, '""')}"`;
  940. }
  941. return str;
  942. };
  943. console.log("file,title,context,skipped,body");
  944. for (const r of results) {
  945. console.log([r.displayPath, r.title, r.context, r.skipped ? "true" : "false", r.skipped ? r.skipReason : r.body].map(escapeField).join(","));
  946. }
  947. } else if (format === "files") {
  948. for (const r of results) {
  949. const ctx = r.context ? `,"${r.context.replace(/"/g, '""')}"` : "";
  950. const status = r.skipped ? "[SKIPPED]" : "";
  951. console.log(`${r.displayPath}${ctx}${status ? `,${status}` : ""}`);
  952. }
  953. } else if (format === "md") {
  954. for (const r of results) {
  955. console.log(`## ${r.displayPath}\n`);
  956. if (r.title && r.title !== r.displayPath) console.log(`**Title:** ${r.title}\n`);
  957. if (r.context) console.log(`**Context:** ${r.context}\n`);
  958. if (r.skipped) {
  959. console.log(`> ${r.skipReason}\n`);
  960. } else {
  961. console.log("```");
  962. console.log(r.body);
  963. console.log("```\n");
  964. }
  965. }
  966. } else if (format === "xml") {
  967. console.log('<?xml version="1.0" encoding="UTF-8"?>');
  968. console.log("<documents>");
  969. for (const r of results) {
  970. console.log(" <document>");
  971. console.log(` <file>${escapeXml(r.displayPath)}</file>`);
  972. console.log(` <title>${escapeXml(r.title)}</title>`);
  973. if (r.context) console.log(` <context>${escapeXml(r.context)}</context>`);
  974. if (r.skipped) {
  975. console.log(` <skipped>true</skipped>`);
  976. console.log(` <reason>${escapeXml(r.skipReason || "")}</reason>`);
  977. } else {
  978. console.log(` <body>${escapeXml(r.body)}</body>`);
  979. }
  980. console.log(" </document>");
  981. }
  982. console.log("</documents>");
  983. } else {
  984. // CLI format (default)
  985. for (const r of results) {
  986. console.log(`\n${'='.repeat(60)}`);
  987. console.log(`File: ${r.displayPath}`);
  988. console.log(`${'='.repeat(60)}\n`);
  989. if (r.skipped) {
  990. console.log(`[SKIPPED: ${r.skipReason}]`);
  991. continue;
  992. }
  993. if (r.context) {
  994. console.log(`Folder Context: ${r.context}\n---\n`);
  995. }
  996. console.log(r.body);
  997. }
  998. }
  999. }
  1000. // List files in virtual file tree
  1001. function listFiles(pathArg?: string): void {
  1002. const db = getDb();
  1003. if (!pathArg) {
  1004. // No argument - list all collections
  1005. const yamlCollections = yamlListCollections();
  1006. if (yamlCollections.length === 0) {
  1007. console.log("No collections found. Run 'qmd collection add .' to index files.");
  1008. closeDb();
  1009. return;
  1010. }
  1011. // Get file counts from database for each collection
  1012. const collections = yamlCollections.map(coll => {
  1013. const stats = db.prepare(`
  1014. SELECT COUNT(*) as file_count
  1015. FROM documents d
  1016. WHERE d.collection = ? AND d.active = 1
  1017. `).get(coll.name) as { file_count: number } | null;
  1018. return {
  1019. name: coll.name,
  1020. file_count: stats?.file_count || 0
  1021. };
  1022. });
  1023. console.log(`${c.bold}Collections:${c.reset}\n`);
  1024. for (const coll of collections) {
  1025. console.log(` ${c.dim}qmd://${c.reset}${c.cyan}${coll.name}/${c.reset} ${c.dim}(${coll.file_count} files)${c.reset}`);
  1026. }
  1027. closeDb();
  1028. return;
  1029. }
  1030. // Parse the path argument
  1031. let collectionName: string;
  1032. let pathPrefix: string | null = null;
  1033. if (pathArg.startsWith('qmd://')) {
  1034. // Virtual path format: qmd://collection/path
  1035. const parsed = parseVirtualPath(pathArg);
  1036. if (!parsed) {
  1037. console.error(`Invalid virtual path: ${pathArg}`);
  1038. closeDb();
  1039. process.exit(1);
  1040. }
  1041. collectionName = parsed.collectionName;
  1042. pathPrefix = parsed.path;
  1043. } else {
  1044. // Just collection name or collection/path
  1045. const parts = pathArg.split('/');
  1046. collectionName = parts[0] || '';
  1047. if (parts.length > 1) {
  1048. pathPrefix = parts.slice(1).join('/');
  1049. }
  1050. }
  1051. // Get the collection
  1052. const coll = getCollectionFromYaml(collectionName);
  1053. if (!coll) {
  1054. console.error(`Collection not found: ${collectionName}`);
  1055. console.error(`Run 'qmd ls' to see available collections.`);
  1056. closeDb();
  1057. process.exit(1);
  1058. }
  1059. // List files in the collection with size and modification time
  1060. let query: string;
  1061. let params: any[];
  1062. if (pathPrefix) {
  1063. // List files under a specific path
  1064. query = `
  1065. SELECT d.path, d.title, d.modified_at, LENGTH(ct.doc) as size
  1066. FROM documents d
  1067. JOIN content ct ON d.hash = ct.hash
  1068. WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
  1069. ORDER BY d.path
  1070. `;
  1071. params = [coll.name, `${pathPrefix}%`];
  1072. } else {
  1073. // List all files in the collection
  1074. query = `
  1075. SELECT d.path, d.title, d.modified_at, LENGTH(ct.doc) as size
  1076. FROM documents d
  1077. JOIN content ct ON d.hash = ct.hash
  1078. WHERE d.collection = ? AND d.active = 1
  1079. ORDER BY d.path
  1080. `;
  1081. params = [coll.name];
  1082. }
  1083. const files = db.prepare(query).all(...params) as { path: string; title: string; modified_at: string; size: number }[];
  1084. if (files.length === 0) {
  1085. if (pathPrefix) {
  1086. console.log(`No files found under qmd://${collectionName}/${pathPrefix}`);
  1087. } else {
  1088. console.log(`No files found in collection: ${collectionName}`);
  1089. }
  1090. closeDb();
  1091. return;
  1092. }
  1093. // Calculate max widths for alignment
  1094. const maxSize = Math.max(...files.map(f => formatBytes(f.size).length));
  1095. // Output in ls -l style
  1096. for (const file of files) {
  1097. const sizeStr = formatBytes(file.size).padStart(maxSize);
  1098. const date = new Date(file.modified_at);
  1099. const timeStr = formatLsTime(date);
  1100. // Dim the qmd:// prefix, highlight the filename
  1101. console.log(`${sizeStr} ${timeStr} ${c.dim}qmd://${collectionName}/${c.reset}${c.cyan}${file.path}${c.reset}`);
  1102. }
  1103. closeDb();
  1104. }
  1105. // Format date/time like ls -l
  1106. function formatLsTime(date: Date): string {
  1107. const now = new Date();
  1108. const sixMonthsAgo = new Date(now.getTime() - 6 * 30 * 24 * 60 * 60 * 1000);
  1109. const months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
  1110. const month = months[date.getMonth()];
  1111. const day = date.getDate().toString().padStart(2, ' ');
  1112. // If file is older than 6 months, show year instead of time
  1113. if (date < sixMonthsAgo) {
  1114. const year = date.getFullYear();
  1115. return `${month} ${day} ${year}`;
  1116. } else {
  1117. const hours = date.getHours().toString().padStart(2, '0');
  1118. const minutes = date.getMinutes().toString().padStart(2, '0');
  1119. return `${month} ${day} ${hours}:${minutes}`;
  1120. }
  1121. }
  1122. // Collection management commands
  1123. function collectionList(): void {
  1124. const db = getDb();
  1125. const collections = listCollections(db);
  1126. if (collections.length === 0) {
  1127. console.log("No collections found. Run 'qmd collection add .' to create one.");
  1128. closeDb();
  1129. return;
  1130. }
  1131. console.log(`${c.bold}Collections (${collections.length}):${c.reset}\n`);
  1132. for (const coll of collections) {
  1133. const updatedAt = coll.last_modified ? new Date(coll.last_modified) : new Date();
  1134. const timeAgo = formatTimeAgo(updatedAt);
  1135. // Get YAML config to check includeByDefault
  1136. const yamlColl = getCollectionFromYaml(coll.name);
  1137. const excluded = yamlColl?.includeByDefault === false;
  1138. const excludeTag = excluded ? ` ${c.yellow}[excluded]${c.reset}` : '';
  1139. console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(qmd://${coll.name}/)${c.reset}${excludeTag}`);
  1140. console.log(` ${c.dim}Pattern:${c.reset} ${coll.glob_pattern}`);
  1141. if (yamlColl?.ignore?.length) {
  1142. console.log(` ${c.dim}Ignore:${c.reset} ${yamlColl.ignore.join(', ')}`);
  1143. }
  1144. console.log(` ${c.dim}Files:${c.reset} ${coll.active_count}`);
  1145. console.log(` ${c.dim}Updated:${c.reset} ${timeAgo}`);
  1146. console.log();
  1147. }
  1148. closeDb();
  1149. }
  1150. async function collectionAdd(pwd: string, globPattern: string, name?: string): Promise<void> {
  1151. // If name not provided, generate from pwd basename
  1152. let collName = name;
  1153. if (!collName) {
  1154. const parts = pwd.split('/').filter(Boolean);
  1155. collName = parts[parts.length - 1] || 'root';
  1156. }
  1157. // Check if collection with this name already exists in YAML
  1158. const existing = getCollectionFromYaml(collName);
  1159. if (existing) {
  1160. console.error(`${c.yellow}Collection '${collName}' already exists.${c.reset}`);
  1161. console.error(`Use a different name with --name <name>`);
  1162. process.exit(1);
  1163. }
  1164. // Check if a collection with this pwd+glob already exists in YAML
  1165. const allCollections = yamlListCollections();
  1166. const existingPwdGlob = allCollections.find(c => c.path === pwd && c.pattern === globPattern);
  1167. if (existingPwdGlob) {
  1168. console.error(`${c.yellow}A collection already exists for this path and pattern:${c.reset}`);
  1169. console.error(` Name: ${existingPwdGlob.name} (qmd://${existingPwdGlob.name}/)`);
  1170. console.error(` Pattern: ${globPattern}`);
  1171. console.error(`\nUse 'qmd update' to re-index it, or remove it first with 'qmd collection remove ${existingPwdGlob.name}'`);
  1172. process.exit(1);
  1173. }
  1174. // Add to YAML config
  1175. const { addCollection } = await import("./collections.js");
  1176. addCollection(collName, pwd, globPattern);
  1177. // Create the collection and index files
  1178. console.log(`Creating collection '${collName}'...`);
  1179. const newColl = getCollectionFromYaml(collName);
  1180. await indexFiles(pwd, globPattern, collName, false, newColl?.ignore);
  1181. console.log(`${c.green}✓${c.reset} Collection '${collName}' created successfully`);
  1182. }
  1183. function collectionRemove(name: string): void {
  1184. // Check if collection exists in YAML
  1185. const coll = getCollectionFromYaml(name);
  1186. if (!coll) {
  1187. console.error(`${c.yellow}Collection not found: ${name}${c.reset}`);
  1188. console.error(`Run 'qmd collection list' to see available collections.`);
  1189. process.exit(1);
  1190. }
  1191. const db = getDb();
  1192. const result = removeCollection(db, name);
  1193. closeDb();
  1194. console.log(`${c.green}✓${c.reset} Removed collection '${name}'`);
  1195. console.log(` Deleted ${result.deletedDocs} documents`);
  1196. if (result.cleanedHashes > 0) {
  1197. console.log(` Cleaned up ${result.cleanedHashes} orphaned content hashes`);
  1198. }
  1199. }
  1200. function collectionRename(oldName: string, newName: string): void {
  1201. // Check if old collection exists in YAML
  1202. const coll = getCollectionFromYaml(oldName);
  1203. if (!coll) {
  1204. console.error(`${c.yellow}Collection not found: ${oldName}${c.reset}`);
  1205. console.error(`Run 'qmd collection list' to see available collections.`);
  1206. process.exit(1);
  1207. }
  1208. // Check if new name already exists in YAML
  1209. const existing = getCollectionFromYaml(newName);
  1210. if (existing) {
  1211. console.error(`${c.yellow}Collection name already exists: ${newName}${c.reset}`);
  1212. console.error(`Choose a different name or remove the existing collection first.`);
  1213. process.exit(1);
  1214. }
  1215. const db = getDb();
  1216. renameCollection(db, oldName, newName);
  1217. closeDb();
  1218. console.log(`${c.green}✓${c.reset} Renamed collection '${oldName}' to '${newName}'`);
  1219. console.log(` Virtual paths updated: ${c.cyan}qmd://${oldName}/${c.reset} → ${c.cyan}qmd://${newName}/${c.reset}`);
  1220. }
  1221. async function indexFiles(pwd?: string, globPattern: string = DEFAULT_GLOB, collectionName?: string, suppressEmbedNotice: boolean = false, ignorePatterns?: string[]): Promise<void> {
  1222. const db = getDb();
  1223. const resolvedPwd = pwd || getPwd();
  1224. const now = new Date().toISOString();
  1225. const excludeDirs = ["node_modules", ".git", ".cache", "vendor", "dist", "build"];
  1226. // Clear Ollama cache on index
  1227. clearCache(db);
  1228. // Collection name must be provided (from YAML)
  1229. if (!collectionName) {
  1230. throw new Error("Collection name is required. Collections must be defined in ~/.config/qmd/index.yml");
  1231. }
  1232. console.log(`Collection: ${resolvedPwd} (${globPattern})`);
  1233. progress.indeterminate();
  1234. const allIgnore = [
  1235. ...excludeDirs.map(d => `**/${d}/**`),
  1236. ...(ignorePatterns || []),
  1237. ];
  1238. const allFiles: string[] = await fastGlob(globPattern, {
  1239. cwd: resolvedPwd,
  1240. onlyFiles: true,
  1241. followSymbolicLinks: false,
  1242. dot: false,
  1243. ignore: allIgnore,
  1244. });
  1245. // Filter hidden files/folders (dot: false handles top-level but not nested)
  1246. const files = allFiles.filter(file => {
  1247. const parts = file.split("/");
  1248. return !parts.some(part => part.startsWith("."));
  1249. });
  1250. const total = files.length;
  1251. const hasNoFiles = total === 0;
  1252. if (hasNoFiles) {
  1253. progress.clear();
  1254. console.log("No files found matching pattern.");
  1255. // Continue so the deactivation pass can mark previously indexed docs as inactive.
  1256. }
  1257. let indexed = 0, updated = 0, unchanged = 0, processed = 0;
  1258. const seenPaths = new Set<string>();
  1259. const startTime = Date.now();
  1260. for (const relativeFile of files) {
  1261. const filepath = getRealPath(resolve(resolvedPwd, relativeFile));
  1262. const path = handelize(relativeFile); // Normalize path for token-friendliness
  1263. seenPaths.add(path);
  1264. let content: string;
  1265. try {
  1266. content = readFileSync(filepath, "utf-8");
  1267. } catch (err: any) {
  1268. // Skip files that can't be read (e.g. iCloud evicted files returning EAGAIN)
  1269. processed++;
  1270. progress.set((processed / total) * 100);
  1271. continue;
  1272. }
  1273. // Skip empty files - nothing useful to index
  1274. if (!content.trim()) {
  1275. processed++;
  1276. continue;
  1277. }
  1278. const hash = await hashContent(content);
  1279. const title = extractTitle(content, relativeFile);
  1280. // Check if document exists in this collection with this path
  1281. const existing = findActiveDocument(db, collectionName, path);
  1282. if (existing) {
  1283. if (existing.hash === hash) {
  1284. // Hash unchanged, but check if title needs updating
  1285. if (existing.title !== title) {
  1286. updateDocumentTitle(db, existing.id, title, now);
  1287. updated++;
  1288. } else {
  1289. unchanged++;
  1290. }
  1291. } else {
  1292. // Content changed - insert new content hash and update document
  1293. insertContent(db, hash, content, now);
  1294. const stat = statSync(filepath);
  1295. updateDocument(db, existing.id, title, hash,
  1296. stat ? new Date(stat.mtime).toISOString() : now);
  1297. updated++;
  1298. }
  1299. } else {
  1300. // New document - insert content and document
  1301. indexed++;
  1302. insertContent(db, hash, content, now);
  1303. const stat = statSync(filepath);
  1304. insertDocument(db, collectionName, path, title, hash,
  1305. stat ? new Date(stat.birthtime).toISOString() : now,
  1306. stat ? new Date(stat.mtime).toISOString() : now);
  1307. }
  1308. processed++;
  1309. progress.set((processed / total) * 100);
  1310. const elapsed = (Date.now() - startTime) / 1000;
  1311. const rate = processed / elapsed;
  1312. const remaining = (total - processed) / rate;
  1313. const eta = processed > 2 ? ` ETA: ${formatETA(remaining)}` : "";
  1314. if (isTTY) process.stderr.write(`\rIndexing: ${processed}/${total}${eta} `);
  1315. }
  1316. // Deactivate documents in this collection that no longer exist
  1317. const allActive = getActiveDocumentPaths(db, collectionName);
  1318. let removed = 0;
  1319. for (const path of allActive) {
  1320. if (!seenPaths.has(path)) {
  1321. deactivateDocument(db, collectionName, path);
  1322. removed++;
  1323. }
  1324. }
  1325. // Clean up orphaned content hashes (content not referenced by any document)
  1326. const orphanedContent = cleanupOrphanedContent(db);
  1327. // Check if vector index needs updating
  1328. const needsEmbedding = getHashesNeedingEmbedding(db);
  1329. progress.clear();
  1330. console.log(`\nIndexed: ${indexed} new, ${updated} updated, ${unchanged} unchanged, ${removed} removed`);
  1331. if (orphanedContent > 0) {
  1332. console.log(`Cleaned up ${orphanedContent} orphaned content hash(es)`);
  1333. }
  1334. if (needsEmbedding > 0 && !suppressEmbedNotice) {
  1335. console.log(`\nRun 'qmd embed' to update embeddings (${needsEmbedding} unique hashes need vectors)`);
  1336. }
  1337. closeDb();
  1338. }
  1339. function renderProgressBar(percent: number, width: number = 30): string {
  1340. const filled = Math.round((percent / 100) * width);
  1341. const empty = width - filled;
  1342. const bar = "█".repeat(filled) + "░".repeat(empty);
  1343. return bar;
  1344. }
  1345. async function vectorIndex(model: string = DEFAULT_EMBED_MODEL, force: boolean = false): Promise<void> {
  1346. const db = getDb();
  1347. const now = new Date().toISOString();
  1348. // If force, clear all vectors
  1349. if (force) {
  1350. console.log(`${c.yellow}Force re-indexing: clearing all vectors...${c.reset}`);
  1351. clearAllEmbeddings(db);
  1352. }
  1353. // Find unique hashes that need embedding (from active documents)
  1354. const hashesToEmbed = getHashesForEmbedding(db);
  1355. if (hashesToEmbed.length === 0) {
  1356. console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
  1357. closeDb();
  1358. return;
  1359. }
  1360. // Prepare documents with chunks
  1361. type ChunkItem = { hash: string; title: string; text: string; seq: number; pos: number; tokens: number; bytes: number; displayName: string };
  1362. const allChunks: ChunkItem[] = [];
  1363. let multiChunkDocs = 0;
  1364. // Chunk all documents using actual token counts
  1365. process.stderr.write(`Chunking ${hashesToEmbed.length} documents by token count...\n`);
  1366. for (const item of hashesToEmbed) {
  1367. const encoder = new TextEncoder();
  1368. const bodyBytes = encoder.encode(item.body).length;
  1369. if (bodyBytes === 0) continue; // Skip empty
  1370. const title = extractTitle(item.body, item.path);
  1371. const displayName = item.path;
  1372. const chunks = await chunkDocumentByTokens(item.body); // Uses actual tokenizer
  1373. if (chunks.length > 1) multiChunkDocs++;
  1374. for (let seq = 0; seq < chunks.length; seq++) {
  1375. allChunks.push({
  1376. hash: item.hash,
  1377. title,
  1378. text: chunks[seq]!.text, // Chunk is guaranteed to exist by seq loop
  1379. seq,
  1380. pos: chunks[seq]!.pos,
  1381. tokens: chunks[seq]!.tokens,
  1382. bytes: encoder.encode(chunks[seq]!.text).length,
  1383. displayName,
  1384. });
  1385. }
  1386. }
  1387. if (allChunks.length === 0) {
  1388. console.log(`${c.green}✓ No non-empty documents to embed.${c.reset}`);
  1389. closeDb();
  1390. return;
  1391. }
  1392. const totalBytes = allChunks.reduce((sum, chk) => sum + chk.bytes, 0);
  1393. const totalChunks = allChunks.length;
  1394. const totalDocs = hashesToEmbed.length;
  1395. console.log(`${c.bold}Embedding ${totalDocs} documents${c.reset} ${c.dim}(${totalChunks} chunks, ${formatBytes(totalBytes)})${c.reset}`);
  1396. if (multiChunkDocs > 0) {
  1397. console.log(`${c.dim}${multiChunkDocs} documents split into multiple chunks${c.reset}`);
  1398. }
  1399. console.log(`${c.dim}Model: ${model}${c.reset}\n`);
  1400. // Hide cursor during embedding
  1401. cursor.hide();
  1402. // Wrap all LLM embedding operations in a session for lifecycle management
  1403. // Use 30 minute timeout for large collections
  1404. await withLLMSession(async (session) => {
  1405. // Get embedding dimensions from first chunk
  1406. progress.indeterminate();
  1407. const firstChunk = allChunks[0];
  1408. if (!firstChunk) {
  1409. throw new Error("No chunks available to embed");
  1410. }
  1411. const firstText = formatDocForEmbedding(firstChunk.text, firstChunk.title);
  1412. const firstResult = await session.embed(firstText);
  1413. if (!firstResult) {
  1414. throw new Error("Failed to get embedding dimensions from first chunk");
  1415. }
  1416. ensureVecTable(db, firstResult.embedding.length);
  1417. let chunksEmbedded = 0, errors = 0, bytesProcessed = 0;
  1418. const startTime = Date.now();
  1419. // Batch embedding for better throughput
  1420. // Process in batches of 32 to balance memory usage and efficiency
  1421. const BATCH_SIZE = 32;
  1422. for (let batchStart = 0; batchStart < allChunks.length; batchStart += BATCH_SIZE) {
  1423. const batchEnd = Math.min(batchStart + BATCH_SIZE, allChunks.length);
  1424. const batch = allChunks.slice(batchStart, batchEnd);
  1425. // Format texts for embedding
  1426. const texts = batch.map(chunk => formatDocForEmbedding(chunk.text, chunk.title));
  1427. try {
  1428. // Batch embed all texts at once
  1429. const embeddings = await session.embedBatch(texts);
  1430. // Insert each embedding
  1431. for (let i = 0; i < batch.length; i++) {
  1432. const chunk = batch[i]!;
  1433. const embedding = embeddings[i];
  1434. if (embedding) {
  1435. insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now);
  1436. chunksEmbedded++;
  1437. } else {
  1438. errors++;
  1439. console.error(`\n${c.yellow}⚠ Error embedding "${chunk.displayName}" chunk ${chunk.seq}${c.reset}`);
  1440. }
  1441. bytesProcessed += chunk.bytes;
  1442. }
  1443. } catch (err) {
  1444. // If batch fails, try individual embeddings as fallback
  1445. for (const chunk of batch) {
  1446. try {
  1447. const text = formatDocForEmbedding(chunk.text, chunk.title);
  1448. const result = await session.embed(text);
  1449. if (result) {
  1450. insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now);
  1451. chunksEmbedded++;
  1452. } else {
  1453. errors++;
  1454. }
  1455. } catch (innerErr) {
  1456. errors++;
  1457. console.error(`\n${c.yellow}⚠ Error embedding "${chunk.displayName}" chunk ${chunk.seq}: ${innerErr}${c.reset}`);
  1458. }
  1459. bytesProcessed += chunk.bytes;
  1460. }
  1461. }
  1462. const percent = (bytesProcessed / totalBytes) * 100;
  1463. progress.set(percent);
  1464. const elapsed = (Date.now() - startTime) / 1000;
  1465. const bytesPerSec = bytesProcessed / elapsed;
  1466. const remainingBytes = totalBytes - bytesProcessed;
  1467. const etaSec = remainingBytes / bytesPerSec;
  1468. const bar = renderProgressBar(percent);
  1469. const percentStr = percent.toFixed(0).padStart(3);
  1470. const throughput = `${formatBytes(bytesPerSec)}/s`;
  1471. const eta = elapsed > 2 ? formatETA(etaSec) : "...";
  1472. const errStr = errors > 0 ? ` ${c.yellow}${errors} err${c.reset}` : "";
  1473. if (isTTY) process.stderr.write(`\r${c.cyan}${bar}${c.reset} ${c.bold}${percentStr}%${c.reset} ${c.dim}${chunksEmbedded}/${totalChunks}${c.reset}${errStr} ${c.dim}${throughput} ETA ${eta}${c.reset} `);
  1474. }
  1475. progress.clear();
  1476. cursor.show();
  1477. const totalTimeSec = (Date.now() - startTime) / 1000;
  1478. const avgThroughput = formatBytes(totalBytes / totalTimeSec);
  1479. console.log(`\r${c.green}${renderProgressBar(100)}${c.reset} ${c.bold}100%${c.reset} `);
  1480. console.log(`\n${c.green}✓ Done!${c.reset} Embedded ${c.bold}${chunksEmbedded}${c.reset} chunks from ${c.bold}${totalDocs}${c.reset} documents in ${c.bold}${formatETA(totalTimeSec)}${c.reset} ${c.dim}(${avgThroughput}/s)${c.reset}`);
  1481. if (errors > 0) {
  1482. console.log(`${c.yellow}⚠ ${errors} chunks failed${c.reset}`);
  1483. }
  1484. }, { maxDuration: 30 * 60 * 1000, name: 'embed-command' });
  1485. closeDb();
  1486. }
  1487. // Sanitize a term for FTS5: remove punctuation except apostrophes
  1488. function sanitizeFTS5Term(term: string): string {
  1489. // Remove all non-alphanumeric except apostrophes (for contractions like "don't")
  1490. return term.replace(/[^\w']/g, '').trim();
  1491. }
  1492. // Build FTS5 query: phrase-aware with fallback to individual terms
  1493. function buildFTS5Query(query: string): string {
  1494. // Sanitize the full query for phrase matching
  1495. const sanitizedQuery = query.replace(/[^\w\s']/g, '').trim();
  1496. const terms = query
  1497. .split(/\s+/)
  1498. .map(sanitizeFTS5Term)
  1499. .filter(term => term.length >= 2); // Skip single chars and empty
  1500. if (terms.length === 0) return "";
  1501. if (terms.length === 1) return `"${terms[0]!.replace(/"/g, '""')}"`;
  1502. // Strategy: exact phrase OR proximity match OR individual terms
  1503. // Exact phrase matches rank highest, then close proximity, then any term
  1504. const phrase = `"${sanitizedQuery.replace(/"/g, '""')}"`;
  1505. const quotedTerms = terms.map(t => `"${t.replace(/"/g, '""')}"`);
  1506. // FTS5 NEAR syntax: NEAR(term1 term2, distance)
  1507. const nearPhrase = `NEAR(${quotedTerms.join(' ')}, 10)`;
  1508. const orTerms = quotedTerms.join(' OR ');
  1509. // Exact phrase > proximity > any term
  1510. return `(${phrase}) OR (${nearPhrase}) OR (${orTerms})`;
  1511. }
  1512. // Normalize BM25 score to 0-1 range using sigmoid
  1513. function normalizeBM25(score: number): number {
  1514. // BM25 scores are negative in SQLite (lower = better)
  1515. // Typical range: -15 (excellent) to -2 (weak match)
  1516. // Map to 0-1 where higher is better
  1517. const absScore = Math.abs(score);
  1518. // Sigmoid-ish normalization: maps ~2-15 range to ~0.1-0.95
  1519. return 1 / (1 + Math.exp(-(absScore - 5) / 3));
  1520. }
  1521. type OutputOptions = {
  1522. format: OutputFormat;
  1523. full: boolean;
  1524. limit: number;
  1525. minScore: number;
  1526. all?: boolean;
  1527. collection?: string | string[]; // Filter by collection name(s)
  1528. lineNumbers?: boolean; // Add line numbers to output
  1529. context?: string; // Optional context for query expansion
  1530. candidateLimit?: number; // Max candidates to rerank (default: 40)
  1531. };
  1532. // Highlight query terms in text (skip short words < 3 chars)
  1533. function highlightTerms(text: string, query: string): string {
  1534. if (!useColor) return text;
  1535. const terms = query.toLowerCase().split(/\s+/).filter(t => t.length >= 3);
  1536. let result = text;
  1537. for (const term of terms) {
  1538. const regex = new RegExp(`(${term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')})`, 'gi');
  1539. result = result.replace(regex, `${c.yellow}${c.bold}$1${c.reset}`);
  1540. }
  1541. return result;
  1542. }
  1543. // Format score with color based on value
  1544. function formatScore(score: number): string {
  1545. const pct = (score * 100).toFixed(0).padStart(3);
  1546. if (!useColor) return `${pct}%`;
  1547. if (score >= 0.7) return `${c.green}${pct}%${c.reset}`;
  1548. if (score >= 0.4) return `${c.yellow}${pct}%${c.reset}`;
  1549. return `${c.dim}${pct}%${c.reset}`;
  1550. }
  1551. // Shorten directory path for display - relative to $HOME (used for context paths, not documents)
  1552. function shortPath(dirpath: string): string {
  1553. const home = homedir();
  1554. if (dirpath.startsWith(home)) {
  1555. return '~' + dirpath.slice(home.length);
  1556. }
  1557. return dirpath;
  1558. }
  1559. type EmptySearchReason = "no_results" | "min_score";
  1560. // Emit format-safe empty output for search commands.
  1561. function printEmptySearchResults(format: OutputFormat, reason: EmptySearchReason = "no_results"): void {
  1562. if (format === "json") {
  1563. console.log("[]");
  1564. return;
  1565. }
  1566. if (format === "csv") {
  1567. console.log("docid,score,file,title,context,line,snippet");
  1568. return;
  1569. }
  1570. if (format === "xml") {
  1571. console.log("<results></results>");
  1572. return;
  1573. }
  1574. if (format === "md" || format === "files") {
  1575. return;
  1576. }
  1577. if (reason === "min_score") {
  1578. console.log("No results found above minimum score threshold.");
  1579. return;
  1580. }
  1581. console.log("No results found.");
  1582. }
  1583. function outputResults(results: { file: string; displayPath: string; title: string; body: string; score: number; context?: string | null; chunkPos?: number; hash?: string; docid?: string }[], query: string, opts: OutputOptions): void {
  1584. const filtered = results.filter(r => r.score >= opts.minScore).slice(0, opts.limit);
  1585. if (filtered.length === 0) {
  1586. printEmptySearchResults(opts.format, "min_score");
  1587. return;
  1588. }
  1589. // Helper to create qmd:// URI from displayPath
  1590. const toQmdPath = (displayPath: string) => `qmd://${displayPath}`;
  1591. if (opts.format === "json") {
  1592. // JSON output for LLM consumption
  1593. const output = filtered.map(row => {
  1594. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
  1595. let body = opts.full ? row.body : undefined;
  1596. let snippet = !opts.full ? extractSnippet(row.body, query, 300, row.chunkPos).snippet : undefined;
  1597. if (opts.lineNumbers) {
  1598. if (body) body = addLineNumbers(body);
  1599. if (snippet) snippet = addLineNumbers(snippet);
  1600. }
  1601. return {
  1602. ...(docid && { docid: `#${docid}` }),
  1603. score: Math.round(row.score * 100) / 100,
  1604. file: toQmdPath(row.displayPath),
  1605. title: row.title,
  1606. ...(row.context && { context: row.context }),
  1607. ...(body && { body }),
  1608. ...(snippet && { snippet }),
  1609. };
  1610. });
  1611. console.log(JSON.stringify(output, null, 2));
  1612. } else if (opts.format === "files") {
  1613. // Simple docid,score,filepath,context output
  1614. for (const row of filtered) {
  1615. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
  1616. const ctx = row.context ? `,"${row.context.replace(/"/g, '""')}"` : "";
  1617. console.log(`#${docid},${row.score.toFixed(2)},${toQmdPath(row.displayPath)}${ctx}`);
  1618. }
  1619. } else if (opts.format === "cli") {
  1620. for (let i = 0; i < filtered.length; i++) {
  1621. const row = filtered[i];
  1622. if (!row) continue;
  1623. const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos);
  1624. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
  1625. // Line 1: filepath with docid
  1626. const path = toQmdPath(row.displayPath);
  1627. // Only show :line if we actually found a term match in the snippet body (exclude header line).
  1628. const snippetBody = snippet.split("\n").slice(1).join("\n").toLowerCase();
  1629. const hasMatch = query.toLowerCase().split(/\s+/).some(t => t.length > 0 && snippetBody.includes(t));
  1630. const lineInfo = hasMatch ? `:${line}` : "";
  1631. const docidStr = docid ? ` ${c.dim}#${docid}${c.reset}` : "";
  1632. console.log(`${c.cyan}${path}${c.dim}${lineInfo}${c.reset}${docidStr}`);
  1633. // Line 2: Title (if available)
  1634. if (row.title) {
  1635. console.log(`${c.bold}Title: ${row.title}${c.reset}`);
  1636. }
  1637. // Line 3: Context (if available)
  1638. if (row.context) {
  1639. console.log(`${c.dim}Context: ${row.context}${c.reset}`);
  1640. }
  1641. // Line 4: Score
  1642. const score = formatScore(row.score);
  1643. console.log(`Score: ${c.bold}${score}${c.reset}`);
  1644. console.log();
  1645. // Snippet with highlighting (diff-style header included)
  1646. let displaySnippet = opts.lineNumbers ? addLineNumbers(snippet, line) : snippet;
  1647. const highlighted = highlightTerms(displaySnippet, query);
  1648. console.log(highlighted);
  1649. // Double empty line between results
  1650. if (i < filtered.length - 1) console.log('\n');
  1651. }
  1652. } else if (opts.format === "md") {
  1653. for (let i = 0; i < filtered.length; i++) {
  1654. const row = filtered[i];
  1655. if (!row) continue;
  1656. const heading = row.title || row.displayPath;
  1657. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
  1658. let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos).snippet;
  1659. if (opts.lineNumbers) {
  1660. content = addLineNumbers(content);
  1661. }
  1662. const docidLine = docid ? `**docid:** \`#${docid}\`\n` : "";
  1663. const contextLine = row.context ? `**context:** ${row.context}\n` : "";
  1664. console.log(`---\n# ${heading}\n${docidLine}${contextLine}\n${content}\n`);
  1665. }
  1666. } else if (opts.format === "xml") {
  1667. for (const row of filtered) {
  1668. const titleAttr = row.title ? ` title="${row.title.replace(/"/g, '&quot;')}"` : "";
  1669. const contextAttr = row.context ? ` context="${row.context.replace(/"/g, '&quot;')}"` : "";
  1670. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
  1671. let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos).snippet;
  1672. if (opts.lineNumbers) {
  1673. content = addLineNumbers(content);
  1674. }
  1675. console.log(`<file docid="#${docid}" name="${toQmdPath(row.displayPath)}"${titleAttr}${contextAttr}>\n${content}\n</file>\n`);
  1676. }
  1677. } else {
  1678. // CSV format
  1679. console.log("docid,score,file,title,context,line,snippet");
  1680. for (const row of filtered) {
  1681. const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos);
  1682. let content = opts.full ? row.body : snippet;
  1683. if (opts.lineNumbers) {
  1684. content = addLineNumbers(content, line);
  1685. }
  1686. const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
  1687. const snippetText = content || "";
  1688. console.log(`#${docid},${row.score.toFixed(4)},${escapeCSV(toQmdPath(row.displayPath))},${escapeCSV(row.title || "")},${escapeCSV(row.context || "")},${line},${escapeCSV(snippetText)}`);
  1689. }
  1690. }
  1691. }
  1692. // Resolve -c collection filter: supports single string, array, or undefined.
  1693. // Returns validated collection names (exits on unknown collection).
  1694. function resolveCollectionFilter(raw: string | string[] | undefined, useDefaults: boolean = false): string[] {
  1695. // If no filter specified and useDefaults is true, use default collections
  1696. if (!raw && useDefaults) {
  1697. return getDefaultCollectionNames();
  1698. }
  1699. if (!raw) return [];
  1700. const names = Array.isArray(raw) ? raw : [raw];
  1701. const validated: string[] = [];
  1702. for (const name of names) {
  1703. const coll = getCollectionFromYaml(name);
  1704. if (!coll) {
  1705. console.error(`Collection not found: ${name}`);
  1706. closeDb();
  1707. process.exit(1);
  1708. }
  1709. validated.push(name);
  1710. }
  1711. return validated;
  1712. }
  1713. // Post-filter results to only include files from specified collections.
  1714. function filterByCollections<T extends { filepath?: string; file?: string }>(results: T[], collectionNames: string[]): T[] {
  1715. if (collectionNames.length <= 1) return results;
  1716. const prefixes = collectionNames.map(n => `qmd://${n}/`);
  1717. return results.filter(r => {
  1718. const path = r.filepath || r.file || '';
  1719. return prefixes.some(p => path.startsWith(p));
  1720. });
  1721. }
  1722. /**
  1723. * Parse structured search query syntax.
  1724. * Lines starting with lex:, vec:, or hyde: are routed directly.
  1725. * Plain lines without prefix go through query expansion.
  1726. *
  1727. * Returns null if this is a plain query (single line, no prefix).
  1728. * Returns StructuredSubSearch[] if structured syntax detected.
  1729. * Throws if multiple plain lines (ambiguous).
  1730. *
  1731. * Examples:
  1732. * "CAP theorem" -> null (plain query, use expansion)
  1733. * "lex: CAP theorem" -> [{ type: 'lex', query: 'CAP theorem' }]
  1734. * "lex: CAP\nvec: consistency" -> [{ type: 'lex', ... }, { type: 'vec', ... }]
  1735. * "CAP\nconsistency" -> throws (multiple plain lines)
  1736. */
  1737. function parseStructuredQuery(query: string): StructuredSubSearch[] | null {
  1738. const rawLines = query.split('\n').map((line, idx) => ({
  1739. raw: line,
  1740. trimmed: line.trim(),
  1741. number: idx + 1,
  1742. })).filter(line => line.trimmed.length > 0);
  1743. if (rawLines.length === 0) return null;
  1744. const prefixRe = /^(lex|vec|hyde):\s*/i;
  1745. const expandRe = /^expand:\s*/i;
  1746. const typed: StructuredSubSearch[] = [];
  1747. for (const line of rawLines) {
  1748. if (expandRe.test(line.trimmed)) {
  1749. if (rawLines.length > 1) {
  1750. throw new Error(`Line ${line.number} starts with expand:, but query documents cannot mix expand with typed lines. Submit a single expand query instead.`);
  1751. }
  1752. const text = line.trimmed.replace(expandRe, '').trim();
  1753. if (!text) {
  1754. throw new Error('expand: query must include text.');
  1755. }
  1756. return null; // treat as standalone expand query
  1757. }
  1758. const match = line.trimmed.match(prefixRe);
  1759. if (match) {
  1760. const type = match[1]!.toLowerCase() as 'lex' | 'vec' | 'hyde';
  1761. const text = line.trimmed.slice(match[0].length).trim();
  1762. if (!text) {
  1763. throw new Error(`Line ${line.number} (${type}:) must include text.`);
  1764. }
  1765. if (/\r|\n/.test(text)) {
  1766. throw new Error(`Line ${line.number} (${type}:) contains a newline. Keep each query on a single line.`);
  1767. }
  1768. typed.push({ type, query: text, line: line.number });
  1769. continue;
  1770. }
  1771. if (rawLines.length === 1) {
  1772. // Single plain line -> implicit expand
  1773. return null;
  1774. }
  1775. throw new Error(`Line ${line.number} is missing a lex:/vec:/hyde: prefix. Each line in a query document must start with one.`);
  1776. }
  1777. return typed.length > 0 ? typed : null;
  1778. }
  1779. function search(query: string, opts: OutputOptions): void {
  1780. const db = getDb();
  1781. // Validate collection filter (supports multiple -c flags)
  1782. // Use default collections if none specified
  1783. const collectionNames = resolveCollectionFilter(opts.collection, true);
  1784. const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
  1785. // Use large limit for --all, otherwise fetch more than needed and let outputResults filter
  1786. const fetchLimit = opts.all ? 100000 : Math.max(50, opts.limit * 2);
  1787. const results = filterByCollections(
  1788. searchFTS(db, query, fetchLimit, singleCollection),
  1789. collectionNames
  1790. );
  1791. // Add context to results
  1792. const resultsWithContext = results.map(r => ({
  1793. file: r.filepath,
  1794. displayPath: r.displayPath,
  1795. title: r.title,
  1796. body: r.body || "",
  1797. score: r.score,
  1798. context: getContextForFile(db, r.filepath),
  1799. hash: r.hash,
  1800. docid: r.docid,
  1801. }));
  1802. closeDb();
  1803. if (resultsWithContext.length === 0) {
  1804. printEmptySearchResults(opts.format);
  1805. return;
  1806. }
  1807. outputResults(resultsWithContext, query, opts);
  1808. }
  1809. // Log query expansion as a tree to stderr (CLI progress feedback)
  1810. function logExpansionTree(originalQuery: string, expanded: ExpandedQuery[]): void {
  1811. const lines: string[] = [];
  1812. lines.push(`${c.dim}├─ ${originalQuery}${c.reset}`);
  1813. for (const q of expanded) {
  1814. let preview = q.text.replace(/\n/g, ' ');
  1815. if (preview.length > 72) preview = preview.substring(0, 69) + '...';
  1816. lines.push(`${c.dim}├─ ${q.type}: ${preview}${c.reset}`);
  1817. }
  1818. if (lines.length > 0) {
  1819. lines[lines.length - 1] = lines[lines.length - 1]!.replace('├─', '└─');
  1820. }
  1821. for (const line of lines) process.stderr.write(line + '\n');
  1822. }
  1823. async function vectorSearch(query: string, opts: OutputOptions, _model: string = DEFAULT_EMBED_MODEL): Promise<void> {
  1824. const store = getStore();
  1825. // Validate collection filter (supports multiple -c flags)
  1826. // Use default collections if none specified
  1827. const collectionNames = resolveCollectionFilter(opts.collection, true);
  1828. const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
  1829. checkIndexHealth(store.db);
  1830. await withLLMSession(async () => {
  1831. let results = await vectorSearchQuery(store, query, {
  1832. collection: singleCollection,
  1833. limit: opts.all ? 500 : (opts.limit || 10),
  1834. minScore: opts.minScore || 0.3,
  1835. hooks: {
  1836. onExpand: (original, expanded) => {
  1837. logExpansionTree(original, expanded);
  1838. process.stderr.write(`${c.dim}Searching ${expanded.length + 1} vector queries...${c.reset}\n`);
  1839. },
  1840. },
  1841. });
  1842. // Post-filter for multi-collection
  1843. if (collectionNames.length > 1) {
  1844. results = results.filter(r => {
  1845. const prefixes = collectionNames.map(n => `qmd://${n}/`);
  1846. return prefixes.some(p => r.file.startsWith(p));
  1847. });
  1848. }
  1849. closeDb();
  1850. if (results.length === 0) {
  1851. printEmptySearchResults(opts.format);
  1852. return;
  1853. }
  1854. outputResults(results.map(r => ({
  1855. file: r.file,
  1856. displayPath: r.displayPath,
  1857. title: r.title,
  1858. body: r.body,
  1859. score: r.score,
  1860. context: r.context,
  1861. docid: r.docid,
  1862. })), query, { ...opts, limit: results.length });
  1863. }, { maxDuration: 10 * 60 * 1000, name: 'vectorSearch' });
  1864. }
  1865. async function querySearch(query: string, opts: OutputOptions, _embedModel: string = DEFAULT_EMBED_MODEL, _rerankModel: string = DEFAULT_RERANK_MODEL): Promise<void> {
  1866. const store = getStore();
  1867. // Validate collection filter (supports multiple -c flags)
  1868. // Use default collections if none specified
  1869. const collectionNames = resolveCollectionFilter(opts.collection, true);
  1870. const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
  1871. checkIndexHealth(store.db);
  1872. // Check for structured query syntax (lex:/vec:/hyde: prefixes)
  1873. const structuredQueries = parseStructuredQuery(query);
  1874. await withLLMSession(async () => {
  1875. let results;
  1876. if (structuredQueries) {
  1877. // Structured search — user provided their own query expansions
  1878. const typeLabels = structuredQueries.map(s => s.type).join('+');
  1879. process.stderr.write(`${c.dim}Structured search: ${structuredQueries.length} queries (${typeLabels})${c.reset}\n`);
  1880. // Log each sub-query
  1881. for (const s of structuredQueries) {
  1882. let preview = s.query.replace(/\n/g, ' ');
  1883. if (preview.length > 72) preview = preview.substring(0, 69) + '...';
  1884. process.stderr.write(`${c.dim}├─ ${s.type}: ${preview}${c.reset}\n`);
  1885. }
  1886. process.stderr.write(`${c.dim}└─ Searching...${c.reset}\n`);
  1887. results = await structuredSearch(store, structuredQueries, {
  1888. collections: singleCollection ? [singleCollection] : undefined,
  1889. limit: opts.all ? 500 : (opts.limit || 10),
  1890. minScore: opts.minScore || 0,
  1891. candidateLimit: opts.candidateLimit,
  1892. hooks: {
  1893. onEmbedStart: (count) => {
  1894. process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
  1895. },
  1896. onEmbedDone: (ms) => {
  1897. process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
  1898. },
  1899. onRerankStart: (chunkCount) => {
  1900. process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}`);
  1901. progress.indeterminate();
  1902. },
  1903. onRerankDone: (ms) => {
  1904. progress.clear();
  1905. process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
  1906. },
  1907. },
  1908. });
  1909. } else {
  1910. // Standard hybrid query with automatic expansion
  1911. results = await hybridQuery(store, query, {
  1912. collection: singleCollection,
  1913. limit: opts.all ? 500 : (opts.limit || 10),
  1914. minScore: opts.minScore || 0,
  1915. candidateLimit: opts.candidateLimit,
  1916. hooks: {
  1917. onStrongSignal: (score) => {
  1918. process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
  1919. },
  1920. onExpandStart: () => {
  1921. process.stderr.write(`${c.dim}Expanding query...${c.reset}`);
  1922. },
  1923. onExpand: (original, expanded, ms) => {
  1924. process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
  1925. logExpansionTree(original, expanded);
  1926. process.stderr.write(`${c.dim}Searching ${expanded.length + 1} queries...${c.reset}\n`);
  1927. },
  1928. onEmbedStart: (count) => {
  1929. process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
  1930. },
  1931. onEmbedDone: (ms) => {
  1932. process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
  1933. },
  1934. onRerankStart: (chunkCount) => {
  1935. process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}`);
  1936. progress.indeterminate();
  1937. },
  1938. onRerankDone: (ms) => {
  1939. progress.clear();
  1940. process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
  1941. },
  1942. },
  1943. });
  1944. }
  1945. // Post-filter for multi-collection
  1946. if (collectionNames.length > 1) {
  1947. results = results.filter(r => {
  1948. const prefixes = collectionNames.map(n => `qmd://${n}/`);
  1949. return prefixes.some(p => r.file.startsWith(p));
  1950. });
  1951. }
  1952. closeDb();
  1953. if (results.length === 0) {
  1954. printEmptySearchResults(opts.format);
  1955. return;
  1956. }
  1957. // Use first lex/vec query for output context, or original query
  1958. const displayQuery = structuredQueries
  1959. ? (structuredQueries.find(s => s.type === 'lex')?.query || structuredQueries.find(s => s.type === 'vec')?.query || query)
  1960. : query;
  1961. // Map to CLI output format — use bestChunk for snippet display
  1962. outputResults(results.map(r => ({
  1963. file: r.file,
  1964. displayPath: r.displayPath,
  1965. title: r.title,
  1966. body: r.bestChunk,
  1967. chunkPos: r.bestChunkPos,
  1968. score: r.score,
  1969. context: r.context,
  1970. docid: r.docid,
  1971. })), displayQuery, { ...opts, limit: results.length });
  1972. }, { maxDuration: 10 * 60 * 1000, name: 'querySearch' });
  1973. }
  1974. // Parse CLI arguments using util.parseArgs
  1975. function parseCLI() {
  1976. const { values, positionals } = parseArgs({
  1977. args: process.argv.slice(2), // Skip node and script path
  1978. options: {
  1979. // Global options
  1980. index: {
  1981. type: "string",
  1982. },
  1983. context: {
  1984. type: "string",
  1985. },
  1986. help: { type: "boolean", short: "h" },
  1987. version: { type: "boolean", short: "v" },
  1988. skill: { type: "boolean" },
  1989. // Search options
  1990. n: { type: "string" },
  1991. "min-score": { type: "string" },
  1992. all: { type: "boolean" },
  1993. full: { type: "boolean" },
  1994. csv: { type: "boolean" },
  1995. md: { type: "boolean" },
  1996. xml: { type: "boolean" },
  1997. files: { type: "boolean" },
  1998. json: { type: "boolean" },
  1999. collection: { type: "string", short: "c", multiple: true }, // Filter by collection(s)
  2000. // Collection options
  2001. name: { type: "string" }, // collection name
  2002. mask: { type: "string" }, // glob pattern
  2003. // Embed options
  2004. force: { type: "boolean", short: "f" },
  2005. // Update options
  2006. pull: { type: "boolean" }, // git pull before update
  2007. refresh: { type: "boolean" },
  2008. // Get options
  2009. l: { type: "string" }, // max lines
  2010. from: { type: "string" }, // start line
  2011. "max-bytes": { type: "string" }, // max bytes for multi-get
  2012. "line-numbers": { type: "boolean" }, // add line numbers to output
  2013. // Query options
  2014. "candidate-limit": { type: "string", short: "C" },
  2015. // MCP HTTP transport options
  2016. http: { type: "boolean" },
  2017. daemon: { type: "boolean" },
  2018. port: { type: "string" },
  2019. },
  2020. allowPositionals: true,
  2021. strict: false, // Allow unknown options to pass through
  2022. });
  2023. // Select index name (default: "index")
  2024. const indexName = values.index as string | undefined;
  2025. if (indexName) {
  2026. setIndexName(indexName);
  2027. setConfigIndexName(indexName);
  2028. }
  2029. // Determine output format
  2030. let format: OutputFormat = "cli";
  2031. if (values.csv) format = "csv";
  2032. else if (values.md) format = "md";
  2033. else if (values.xml) format = "xml";
  2034. else if (values.files) format = "files";
  2035. else if (values.json) format = "json";
  2036. // Default limit: 20 for --files/--json, 5 otherwise
  2037. // --all means return all results (use very large limit)
  2038. const defaultLimit = (format === "files" || format === "json") ? 20 : 5;
  2039. const isAll = !!values.all;
  2040. const opts: OutputOptions = {
  2041. format,
  2042. full: !!values.full,
  2043. limit: isAll ? 100000 : (values.n ? parseInt(String(values.n), 10) || defaultLimit : defaultLimit),
  2044. minScore: values["min-score"] ? parseFloat(String(values["min-score"])) || 0 : 0,
  2045. all: isAll,
  2046. collection: values.collection as string[] | undefined,
  2047. lineNumbers: !!values["line-numbers"],
  2048. candidateLimit: values["candidate-limit"] ? parseInt(String(values["candidate-limit"]), 10) : undefined,
  2049. };
  2050. return {
  2051. command: positionals[0] || "",
  2052. args: positionals.slice(1),
  2053. query: positionals.slice(1).join(" "),
  2054. opts,
  2055. values,
  2056. };
  2057. }
  2058. function showSkill(): void {
  2059. const scriptDir = dirname(fileURLToPath(import.meta.url));
  2060. const relativePath = pathJoin("skills", "qmd", "SKILL.md");
  2061. const skillPath = pathJoin(scriptDir, "..", relativePath);
  2062. console.log(`QMD Skill (${relativePath})`);
  2063. console.log(`Location: ${skillPath}`);
  2064. console.log("");
  2065. if (!existsSync(skillPath)) {
  2066. console.error("SKILL.md not found. If you built from source, ensure skills/qmd/SKILL.md exists.");
  2067. return;
  2068. }
  2069. const content = readFileSync(skillPath, "utf-8");
  2070. process.stdout.write(content.endsWith("\n") ? content : content + "\n");
  2071. }
  2072. function showHelp(): void {
  2073. console.log("qmd — Quick Markdown Search");
  2074. console.log("");
  2075. console.log("Usage:");
  2076. console.log(" qmd <command> [options]");
  2077. console.log("");
  2078. console.log("Primary commands:");
  2079. console.log(" qmd query <query> - Hybrid search with auto expansion + reranking (recommended)");
  2080. console.log(" qmd query 'lex:..\\nvec:...' - Structured query document (you provide lex/vec/hyde lines)");
  2081. console.log(" qmd search <query> - Full-text BM25 keywords (no LLM)");
  2082. console.log(" qmd vsearch <query> - Vector similarity only");
  2083. console.log(" qmd get <file>[:line] [-l N] - Show a single document, optional line slice");
  2084. console.log(" qmd multi-get <pattern> - Batch fetch via glob or comma-separated list");
  2085. console.log(" qmd mcp - Start the MCP server (stdio transport for AI agents)");
  2086. console.log("");
  2087. console.log("Collections & context:");
  2088. console.log(" qmd collection add/list/remove/rename/show - Manage indexed folders");
  2089. console.log(" qmd context add/list/rm - Attach human-written summaries");
  2090. console.log(" qmd ls [collection[/path]] - Inspect indexed files");
  2091. console.log("");
  2092. console.log("Maintenance:");
  2093. console.log(" qmd status - View index + collection health");
  2094. console.log(" qmd update [--pull] - Re-index collections (optionally git pull first)");
  2095. console.log(" qmd embed [-f] - Generate/refresh vector embeddings");
  2096. console.log(" qmd cleanup - Clear caches, vacuum DB");
  2097. console.log("");
  2098. console.log("Query syntax (qmd query):");
  2099. console.log(" QMD queries are either a single expand query (no prefix) or a multi-line");
  2100. console.log(" document where every line is typed with lex:, vec:, or hyde:. This grammar");
  2101. console.log(" matches the docs in docs/SYNTAX.md and is enforced in the CLI.");
  2102. console.log("");
  2103. const grammar = [
  2104. `query = expand_query | query_document ;`,
  2105. `expand_query = text | explicit_expand ;`,
  2106. `explicit_expand= "expand:" text ;`,
  2107. `query_document = { typed_line } ;`,
  2108. `typed_line = type ":" text newline ;`,
  2109. `type = "lex" | "vec" | "hyde" ;`,
  2110. `text = quoted_phrase | plain_text ;`,
  2111. `quoted_phrase = '"' { character } '"' ;`,
  2112. `plain_text = { character } ;`,
  2113. `newline = "\\n" ;`,
  2114. ];
  2115. console.log(" Grammar:");
  2116. for (const line of grammar) {
  2117. console.log(` ${line}`);
  2118. }
  2119. console.log("");
  2120. console.log(" Examples:");
  2121. console.log(" qmd query \"how does auth work\" # single-line → implicit expand");
  2122. console.log(" qmd query $'lex: CAP theorem\\nvec: consistency' # typed query document");
  2123. console.log(" qmd query $'lex: \"exact matches\" sports -baseball' # phrase + negation lex search");
  2124. console.log(" qmd query $'hyde: Hypothetical answer text' # hyde-only document");
  2125. console.log("");
  2126. console.log(" Constraints:");
  2127. console.log(" - Standalone expand queries cannot mix with typed lines.");
  2128. console.log(" - Query documents allow only lex:, vec:, or hyde: prefixes.");
  2129. console.log(" - Each typed line must be single-line text with balanced quotes.");
  2130. console.log("");
  2131. console.log("AI agents & integrations:");
  2132. console.log(" - Run `qmd mcp` to expose the MCP server (stdio) to agents/IDEs.");
  2133. console.log(" - `qmd --skill` prints the packaged skills/qmd/SKILL.md (path + contents).");
  2134. console.log(" - Advanced: `qmd mcp --http ...` and `qmd mcp --http --daemon` are optional for custom transports.");
  2135. console.log("");
  2136. console.log("Global options:");
  2137. console.log(" --index <name> - Use a named index (default: index)");
  2138. console.log("");
  2139. console.log("Search options:");
  2140. console.log(" -n <num> - Max results (default 5, or 20 for --files/--json)");
  2141. console.log(" --all - Return all matches (pair with --min-score)");
  2142. console.log(" --min-score <num> - Minimum similarity score");
  2143. console.log(" --full - Output full document instead of snippet");
  2144. console.log(" -C, --candidate-limit <n> - Max candidates to rerank (default 40, lower = faster)");
  2145. console.log(" --line-numbers - Include line numbers in output");
  2146. console.log(" --files | --json | --csv | --md | --xml - Output format");
  2147. console.log(" -c, --collection <name> - Filter by one or more collections");
  2148. console.log("");
  2149. console.log("Multi-get options:");
  2150. console.log(" -l <num> - Maximum lines per file");
  2151. console.log(" --max-bytes <num> - Skip files larger than N bytes (default 10240)");
  2152. console.log(" --json/--csv/--md/--xml/--files - Same formats as search");
  2153. console.log("");
  2154. console.log(`Index: ${getDbPath()}`);
  2155. }
  2156. async function showVersion(): Promise<void> {
  2157. const scriptDir = dirname(fileURLToPath(import.meta.url));
  2158. const pkgPath = resolve(scriptDir, "..", "package.json");
  2159. const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
  2160. let commit = "";
  2161. try {
  2162. commit = execSync(`git -C ${scriptDir} rev-parse --short HEAD`, { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
  2163. } catch {
  2164. // Not a git repo or git not available
  2165. }
  2166. const versionStr = commit ? `${pkg.version} (${commit})` : pkg.version;
  2167. console.log(`qmd ${versionStr}`);
  2168. }
  2169. // Main CLI - only run if this is the main module
  2170. const __filename = fileURLToPath(import.meta.url);
  2171. const argv1 = process.argv[1];
  2172. const isMain = argv1 === __filename
  2173. || argv1?.endsWith("/qmd.ts")
  2174. || argv1?.endsWith("/qmd.js")
  2175. || (argv1 != null && realpathSync(argv1) === __filename);
  2176. if (isMain) {
  2177. const cli = parseCLI();
  2178. if (cli.values.version) {
  2179. await showVersion();
  2180. process.exit(0);
  2181. }
  2182. if (cli.values.skill) {
  2183. showSkill();
  2184. process.exit(0);
  2185. }
  2186. if (!cli.command || cli.values.help) {
  2187. showHelp();
  2188. process.exit(cli.values.help ? 0 : 1);
  2189. }
  2190. switch (cli.command) {
  2191. case "context": {
  2192. const subcommand = cli.args[0];
  2193. if (!subcommand) {
  2194. console.error("Usage: qmd context <add|list|rm>");
  2195. console.error("");
  2196. console.error("Commands:");
  2197. console.error(" qmd context add [path] \"text\" - Add context (defaults to current dir)");
  2198. console.error(" qmd context add / \"text\" - Add global context to all collections");
  2199. console.error(" qmd context list - List all contexts");
  2200. console.error(" qmd context rm <path> - Remove context");
  2201. process.exit(1);
  2202. }
  2203. switch (subcommand) {
  2204. case "add": {
  2205. if (cli.args.length < 2) {
  2206. console.error("Usage: qmd context add [path] \"text\"");
  2207. console.error("");
  2208. console.error("Examples:");
  2209. console.error(" qmd context add \"Context for current directory\"");
  2210. console.error(" qmd context add . \"Context for current directory\"");
  2211. console.error(" qmd context add /subfolder \"Context for subfolder\"");
  2212. console.error(" qmd context add / \"Global context for all collections\"");
  2213. console.error("");
  2214. console.error(" Using virtual paths:");
  2215. console.error(" qmd context add qmd://journals/ \"Context for entire journals collection\"");
  2216. console.error(" qmd context add qmd://journals/2024 \"Context for 2024 journals\"");
  2217. process.exit(1);
  2218. }
  2219. let pathArg: string | undefined;
  2220. let contextText: string;
  2221. // Check if first arg looks like a path or if it's the context text
  2222. const firstArg = cli.args[1] || '';
  2223. const secondArg = cli.args[2];
  2224. if (secondArg) {
  2225. // Two args: path + context
  2226. pathArg = firstArg;
  2227. contextText = cli.args.slice(2).join(" ");
  2228. } else {
  2229. // One arg: context only (use current directory)
  2230. pathArg = undefined;
  2231. contextText = firstArg;
  2232. }
  2233. await contextAdd(pathArg, contextText);
  2234. break;
  2235. }
  2236. case "list": {
  2237. contextList();
  2238. break;
  2239. }
  2240. case "rm":
  2241. case "remove": {
  2242. if (cli.args.length < 2 || !cli.args[1]) {
  2243. console.error("Usage: qmd context rm <path>");
  2244. console.error("Examples:");
  2245. console.error(" qmd context rm /");
  2246. console.error(" qmd context rm qmd://journals/2024");
  2247. process.exit(1);
  2248. }
  2249. contextRemove(cli.args[1]);
  2250. break;
  2251. }
  2252. default:
  2253. console.error(`Unknown subcommand: ${subcommand}`);
  2254. console.error("Available: add, list, rm");
  2255. process.exit(1);
  2256. }
  2257. break;
  2258. }
  2259. case "get": {
  2260. if (!cli.args[0]) {
  2261. console.error("Usage: qmd get <filepath>[:line] [--from <line>] [-l <lines>] [--line-numbers]");
  2262. process.exit(1);
  2263. }
  2264. const fromLine = cli.values.from ? parseInt(cli.values.from as string, 10) : undefined;
  2265. const maxLines = cli.values.l ? parseInt(cli.values.l as string, 10) : undefined;
  2266. getDocument(cli.args[0], fromLine, maxLines, cli.opts.lineNumbers);
  2267. break;
  2268. }
  2269. case "multi-get": {
  2270. if (!cli.args[0]) {
  2271. console.error("Usage: qmd multi-get <pattern> [-l <lines>] [--max-bytes <bytes>] [--json|--csv|--md|--xml|--files]");
  2272. console.error(" pattern: glob (e.g., 'journals/2025-05*.md') or comma-separated list");
  2273. process.exit(1);
  2274. }
  2275. const maxLinesMulti = cli.values.l ? parseInt(cli.values.l as string, 10) : undefined;
  2276. const maxBytes = cli.values["max-bytes"] ? parseInt(cli.values["max-bytes"] as string, 10) : DEFAULT_MULTI_GET_MAX_BYTES;
  2277. multiGet(cli.args[0], maxLinesMulti, maxBytes, cli.opts.format);
  2278. break;
  2279. }
  2280. case "ls": {
  2281. listFiles(cli.args[0]);
  2282. break;
  2283. }
  2284. case "collection": {
  2285. const subcommand = cli.args[0];
  2286. switch (subcommand) {
  2287. case "list": {
  2288. collectionList();
  2289. break;
  2290. }
  2291. case "add": {
  2292. const pwd = cli.args[1] || getPwd();
  2293. const resolvedPwd = pwd === '.' ? getPwd() : getRealPath(resolve(pwd));
  2294. const globPattern = cli.values.mask as string || DEFAULT_GLOB;
  2295. const name = cli.values.name as string | undefined;
  2296. await collectionAdd(resolvedPwd, globPattern, name);
  2297. break;
  2298. }
  2299. case "remove":
  2300. case "rm": {
  2301. if (!cli.args[1]) {
  2302. console.error("Usage: qmd collection remove <name>");
  2303. console.error(" Use 'qmd collection list' to see available collections");
  2304. process.exit(1);
  2305. }
  2306. collectionRemove(cli.args[1]);
  2307. break;
  2308. }
  2309. case "rename":
  2310. case "mv": {
  2311. if (!cli.args[1] || !cli.args[2]) {
  2312. console.error("Usage: qmd collection rename <old-name> <new-name>");
  2313. console.error(" Use 'qmd collection list' to see available collections");
  2314. process.exit(1);
  2315. }
  2316. collectionRename(cli.args[1], cli.args[2]);
  2317. break;
  2318. }
  2319. case "set-update":
  2320. case "update-cmd": {
  2321. const name = cli.args[1];
  2322. const cmd = cli.args.slice(2).join(' ') || null;
  2323. if (!name) {
  2324. console.error("Usage: qmd collection update-cmd <name> [command]");
  2325. console.error(" Set the command to run before indexing (e.g., 'git pull')");
  2326. console.error(" Omit command to clear it");
  2327. process.exit(1);
  2328. }
  2329. const { updateCollectionSettings, getCollection } = await import("./collections.js");
  2330. const col = getCollection(name);
  2331. if (!col) {
  2332. console.error(`Collection not found: ${name}`);
  2333. process.exit(1);
  2334. }
  2335. updateCollectionSettings(name, { update: cmd });
  2336. if (cmd) {
  2337. console.log(`✓ Set update command for '${name}': ${cmd}`);
  2338. } else {
  2339. console.log(`✓ Cleared update command for '${name}'`);
  2340. }
  2341. break;
  2342. }
  2343. case "include":
  2344. case "exclude": {
  2345. const name = cli.args[1];
  2346. if (!name) {
  2347. console.error(`Usage: qmd collection ${subcommand} <name>`);
  2348. console.error(` ${subcommand === 'include' ? 'Include' : 'Exclude'} collection in default queries`);
  2349. process.exit(1);
  2350. }
  2351. const { updateCollectionSettings, getCollection } = await import("./collections.js");
  2352. const col = getCollection(name);
  2353. if (!col) {
  2354. console.error(`Collection not found: ${name}`);
  2355. process.exit(1);
  2356. }
  2357. const include = subcommand === 'include';
  2358. updateCollectionSettings(name, { includeByDefault: include });
  2359. console.log(`✓ Collection '${name}' ${include ? 'included in' : 'excluded from'} default queries`);
  2360. break;
  2361. }
  2362. case "show":
  2363. case "info": {
  2364. const name = cli.args[1];
  2365. if (!name) {
  2366. console.error("Usage: qmd collection show <name>");
  2367. process.exit(1);
  2368. }
  2369. const { getCollection } = await import("./collections.js");
  2370. const col = getCollection(name);
  2371. if (!col) {
  2372. console.error(`Collection not found: ${name}`);
  2373. process.exit(1);
  2374. }
  2375. console.log(`Collection: ${name}`);
  2376. console.log(` Path: ${col.path}`);
  2377. console.log(` Pattern: ${col.pattern}`);
  2378. console.log(` Include: ${col.includeByDefault !== false ? 'yes (default)' : 'no'}`);
  2379. if (col.update) {
  2380. console.log(` Update: ${col.update}`);
  2381. }
  2382. if (col.context) {
  2383. const ctxCount = Object.keys(col.context).length;
  2384. console.log(` Contexts: ${ctxCount}`);
  2385. }
  2386. break;
  2387. }
  2388. case "help":
  2389. case undefined: {
  2390. console.log("Usage: qmd collection <command> [options]");
  2391. console.log("");
  2392. console.log("Commands:");
  2393. console.log(" list List all collections");
  2394. console.log(" add <path> [--name NAME] Add a collection");
  2395. console.log(" remove <name> Remove a collection");
  2396. console.log(" rename <old> <new> Rename a collection");
  2397. console.log(" show <name> Show collection details");
  2398. console.log(" update-cmd <name> [cmd] Set pre-update command (e.g., 'git pull')");
  2399. console.log(" include <name> Include in default queries");
  2400. console.log(" exclude <name> Exclude from default queries");
  2401. console.log("");
  2402. console.log("Examples:");
  2403. console.log(" qmd collection add ~/notes --name notes");
  2404. console.log(" qmd collection update-cmd brain 'git pull'");
  2405. console.log(" qmd collection exclude archive");
  2406. process.exit(0);
  2407. }
  2408. default:
  2409. console.error(`Unknown subcommand: ${subcommand}`);
  2410. console.error("Run 'qmd collection help' for usage");
  2411. process.exit(1);
  2412. }
  2413. break;
  2414. }
  2415. case "status":
  2416. await showStatus();
  2417. break;
  2418. case "update":
  2419. await updateCollections();
  2420. break;
  2421. case "embed":
  2422. await vectorIndex(DEFAULT_EMBED_MODEL, !!cli.values.force);
  2423. break;
  2424. case "pull": {
  2425. const refresh = cli.values.refresh === undefined ? false : Boolean(cli.values.refresh);
  2426. const models = [
  2427. DEFAULT_EMBED_MODEL_URI,
  2428. DEFAULT_GENERATE_MODEL_URI,
  2429. DEFAULT_RERANK_MODEL_URI,
  2430. ];
  2431. console.log(`${c.bold}Pulling models${c.reset}`);
  2432. const results = await pullModels(models, {
  2433. refresh,
  2434. cacheDir: DEFAULT_MODEL_CACHE_DIR,
  2435. });
  2436. for (const result of results) {
  2437. const size = formatBytes(result.sizeBytes);
  2438. const note = result.refreshed ? "refreshed" : "cached/checked";
  2439. console.log(`- ${result.model} -> ${result.path} (${size}, ${note})`);
  2440. }
  2441. break;
  2442. }
  2443. case "search":
  2444. if (!cli.query) {
  2445. console.error("Usage: qmd search [options] <query>");
  2446. process.exit(1);
  2447. }
  2448. search(cli.query, cli.opts);
  2449. break;
  2450. case "vsearch":
  2451. case "vector-search": // undocumented alias
  2452. if (!cli.query) {
  2453. console.error("Usage: qmd vsearch [options] <query>");
  2454. process.exit(1);
  2455. }
  2456. // Default min-score for vector search is 0.3
  2457. if (!cli.values["min-score"]) {
  2458. cli.opts.minScore = 0.3;
  2459. }
  2460. await vectorSearch(cli.query, cli.opts);
  2461. break;
  2462. case "query":
  2463. case "deep-search": // undocumented alias
  2464. if (!cli.query) {
  2465. console.error("Usage: qmd query [options] <query>");
  2466. process.exit(1);
  2467. }
  2468. await querySearch(cli.query, cli.opts);
  2469. break;
  2470. case "mcp": {
  2471. const sub = cli.args[0]; // stop | status | undefined
  2472. // Cache dir for PID/log files — same dir as the index
  2473. const cacheDir = process.env.XDG_CACHE_HOME
  2474. ? resolve(process.env.XDG_CACHE_HOME, "qmd")
  2475. : resolve(homedir(), ".cache", "qmd");
  2476. const pidPath = resolve(cacheDir, "mcp.pid");
  2477. // Subcommands take priority over flags
  2478. if (sub === "stop") {
  2479. if (!existsSync(pidPath)) {
  2480. console.log("Not running (no PID file).");
  2481. process.exit(0);
  2482. }
  2483. const pid = parseInt(readFileSync(pidPath, "utf-8").trim());
  2484. try {
  2485. process.kill(pid, 0); // alive?
  2486. process.kill(pid, "SIGTERM");
  2487. unlinkSync(pidPath);
  2488. console.log(`Stopped QMD MCP server (PID ${pid}).`);
  2489. } catch {
  2490. unlinkSync(pidPath);
  2491. console.log("Cleaned up stale PID file (server was not running).");
  2492. }
  2493. process.exit(0);
  2494. }
  2495. if (cli.values.http) {
  2496. const port = Number(cli.values.port) || 8181;
  2497. if (cli.values.daemon) {
  2498. // Guard: check if already running
  2499. if (existsSync(pidPath)) {
  2500. const existingPid = parseInt(readFileSync(pidPath, "utf-8").trim());
  2501. try {
  2502. process.kill(existingPid, 0); // alive?
  2503. console.error(`Already running (PID ${existingPid}). Run 'qmd mcp stop' first.`);
  2504. process.exit(1);
  2505. } catch {
  2506. // Stale PID file — continue
  2507. }
  2508. }
  2509. mkdirSync(cacheDir, { recursive: true });
  2510. const logPath = resolve(cacheDir, "mcp.log");
  2511. const logFd = openSync(logPath, "w"); // truncate — fresh log per daemon run
  2512. const selfPath = fileURLToPath(import.meta.url);
  2513. const spawnArgs = selfPath.endsWith(".ts")
  2514. ? ["--import", pathJoin(dirname(selfPath), "..", "node_modules", "tsx", "dist", "esm", "index.mjs"), selfPath, "mcp", "--http", "--port", String(port)]
  2515. : [selfPath, "mcp", "--http", "--port", String(port)];
  2516. const child = nodeSpawn(process.execPath, spawnArgs, {
  2517. stdio: ["ignore", logFd, logFd],
  2518. detached: true,
  2519. });
  2520. child.unref();
  2521. closeSync(logFd); // parent's copy; child inherited the fd
  2522. writeFileSync(pidPath, String(child.pid));
  2523. console.log(`Started on http://localhost:${port}/mcp (PID ${child.pid})`);
  2524. console.log(`Logs: ${logPath}`);
  2525. process.exit(0);
  2526. }
  2527. // Foreground HTTP mode — remove top-level cursor handlers so the
  2528. // async cleanup handlers in startMcpHttpServer actually run.
  2529. process.removeAllListeners("SIGTERM");
  2530. process.removeAllListeners("SIGINT");
  2531. const { startMcpHttpServer } = await import("./mcp.js");
  2532. try {
  2533. await startMcpHttpServer(port);
  2534. } catch (e: any) {
  2535. if (e?.code === "EADDRINUSE") {
  2536. console.error(`Port ${port} already in use. Try a different port with --port.`);
  2537. process.exit(1);
  2538. }
  2539. throw e;
  2540. }
  2541. } else {
  2542. // Default: stdio transport
  2543. const { startMcpServer } = await import("./mcp.js");
  2544. await startMcpServer();
  2545. }
  2546. break;
  2547. }
  2548. case "cleanup": {
  2549. const db = getDb();
  2550. // 1. Clear llm_cache
  2551. const cacheCount = deleteLLMCache(db);
  2552. console.log(`${c.green}✓${c.reset} Cleared ${cacheCount} cached API responses`);
  2553. // 2. Remove orphaned vectors
  2554. const orphanedVecs = cleanupOrphanedVectors(db);
  2555. if (orphanedVecs > 0) {
  2556. console.log(`${c.green}✓${c.reset} Removed ${orphanedVecs} orphaned embedding chunks`);
  2557. } else {
  2558. console.log(`${c.dim}No orphaned embeddings to remove${c.reset}`);
  2559. }
  2560. // 3. Remove inactive documents
  2561. const inactiveDocs = deleteInactiveDocuments(db);
  2562. if (inactiveDocs > 0) {
  2563. console.log(`${c.green}✓${c.reset} Removed ${inactiveDocs} inactive document records`);
  2564. }
  2565. // 4. Vacuum to reclaim space
  2566. vacuumDatabase(db);
  2567. console.log(`${c.green}✓${c.reset} Database vacuumed`);
  2568. closeDb();
  2569. break;
  2570. }
  2571. default:
  2572. console.error(`Unknown command: ${cli.command}`);
  2573. console.error("Run 'qmd --help' for usage.");
  2574. process.exit(1);
  2575. }
  2576. if (cli.command !== "mcp") {
  2577. await disposeDefaultLlamaCpp();
  2578. process.exit(0);
  2579. }
  2580. } // end if (main module)