| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567 |
- #!/usr/bin/env bun
- import { Database } from "bun:sqlite";
- import { Glob, $ } from "bun";
- import { parseArgs } from "util";
- import * as sqliteVec from "sqlite-vec";
- import {
- getDb,
- closeDb,
- getDbPath,
- getPwd,
- getRealPath,
- homedir,
- resolve,
- setCustomIndexName,
- searchFTS,
- searchVec,
- reciprocalRankFusion,
- extractSnippet,
- getContextForFile,
- getContextForPath,
- listCollections,
- removeCollection,
- renameCollection,
- findSimilarFiles,
- matchFilesByGlob,
- getHashesNeedingEmbedding,
- getHashesForEmbedding,
- clearAllEmbeddings,
- insertEmbedding,
- getDocument as storeGetDocument,
- getMultipleDocuments as storeMultiGetDocuments,
- getStatus,
- hashContent,
- extractTitle,
- formatDocForEmbedding,
- formatQueryForEmbedding,
- chunkDocument,
- chunkDocumentByTokens,
- ensureVecTable,
- clearCache,
- getCacheKey,
- getCachedResult,
- setCachedResult,
- getIndexHealth,
- parseVirtualPath,
- buildVirtualPath,
- isVirtualPath,
- resolveVirtualPath,
- toVirtualPath,
- insertContent,
- insertDocument,
- findActiveDocument,
- updateDocumentTitle,
- updateDocument,
- deactivateDocument,
- getActiveDocumentPaths,
- cleanupOrphanedContent,
- deleteLLMCache,
- deleteInactiveDocuments,
- cleanupOrphanedVectors,
- cleanupDuplicateCollections,
- vacuumDatabase,
- getCollectionsWithoutContext,
- getTopLevelPathsWithoutContext,
- handelize,
- DEFAULT_EMBED_MODEL,
- DEFAULT_QUERY_MODEL,
- DEFAULT_RERANK_MODEL,
- DEFAULT_GLOB,
- DEFAULT_MULTI_GET_MAX_BYTES,
- } from "./store.js";
- import { getDefaultLlamaCpp, disposeDefaultLlamaCpp, type RerankDocument, type ExpandedQuery } from "./llm.js";
- import type { SearchResult, RankedResult } from "./store.js";
- import {
- formatSearchResults,
- formatDocuments,
- escapeXml,
- escapeCSV,
- type OutputFormat,
- } from "./formatter.js";
- import {
- getCollection as getCollectionFromYaml,
- listCollections as yamlListCollections,
- addContext as yamlAddContext,
- removeContext as yamlRemoveContext,
- setGlobalContext,
- listAllContexts,
- } from "./collections.js";
- // Terminal colors (respects NO_COLOR env)
- const useColor = !process.env.NO_COLOR && process.stdout.isTTY;
- const c = {
- reset: useColor ? "\x1b[0m" : "",
- dim: useColor ? "\x1b[2m" : "",
- bold: useColor ? "\x1b[1m" : "",
- cyan: useColor ? "\x1b[36m" : "",
- yellow: useColor ? "\x1b[33m" : "",
- green: useColor ? "\x1b[32m" : "",
- magenta: useColor ? "\x1b[35m" : "",
- blue: useColor ? "\x1b[34m" : "",
- };
- // Terminal cursor control
- const cursor = {
- hide() { process.stderr.write('\x1b[?25l'); },
- show() { process.stderr.write('\x1b[?25h'); },
- };
- // Ensure cursor is restored on exit
- process.on('SIGINT', () => { cursor.show(); process.exit(130); });
- process.on('SIGTERM', () => { cursor.show(); process.exit(143); });
- // Terminal progress bar using OSC 9;4 escape sequence
- const progress = {
- set(percent: number) {
- process.stderr.write(`\x1b]9;4;1;${Math.round(percent)}\x07`);
- },
- clear() {
- process.stderr.write(`\x1b]9;4;0\x07`);
- },
- indeterminate() {
- process.stderr.write(`\x1b]9;4;3\x07`);
- },
- error() {
- process.stderr.write(`\x1b]9;4;2\x07`);
- },
- };
- // Format seconds into human-readable ETA
- function formatETA(seconds: number): string {
- if (seconds < 60) return `${Math.round(seconds)}s`;
- if (seconds < 3600) return `${Math.floor(seconds / 60)}m ${Math.round(seconds % 60)}s`;
- return `${Math.floor(seconds / 3600)}h ${Math.floor((seconds % 3600) / 60)}m`;
- }
- // Check index health and print warnings/tips
- function checkIndexHealth(db: Database): void {
- const { needsEmbedding, totalDocs, daysStale } = getIndexHealth(db);
- // Warn if many docs need embedding
- if (needsEmbedding > 0) {
- const pct = Math.round((needsEmbedding / totalDocs) * 100);
- if (pct >= 10) {
- process.stderr.write(`${c.yellow}Warning: ${needsEmbedding} documents (${pct}%) need embeddings. Run 'qmd embed' for better results.${c.reset}\n`);
- } else {
- process.stderr.write(`${c.dim}Tip: ${needsEmbedding} documents need embeddings. Run 'qmd embed' to index them.${c.reset}\n`);
- }
- }
- // Check if most recent document update is older than 2 weeks
- if (daysStale !== null && daysStale >= 14) {
- process.stderr.write(`${c.dim}Tip: Index last updated ${daysStale} days ago. Run 'qmd update' to refresh.${c.reset}\n`);
- }
- }
- // Compute unique display path for a document
- // Always include at least parent folder + filename, add more parent dirs until unique
- function computeDisplayPath(
- filepath: string,
- collectionPath: string,
- existingPaths: Set<string>
- ): string {
- // Get path relative to collection (include collection dir name)
- const collectionDir = collectionPath.replace(/\/$/, '');
- const collectionName = collectionDir.split('/').pop() || '';
- let relativePath: string;
- if (filepath.startsWith(collectionDir + '/')) {
- // filepath is under collection: use collection name + relative path
- relativePath = collectionName + filepath.slice(collectionDir.length);
- } else {
- // Fallback: just use the filepath
- relativePath = filepath;
- }
- const parts = relativePath.split('/').filter(p => p.length > 0);
- // Always include at least parent folder + filename (minimum 2 parts if available)
- // Then add more parent dirs until unique
- const minParts = Math.min(2, parts.length);
- for (let i = parts.length - minParts; i >= 0; i--) {
- const candidate = parts.slice(i).join('/');
- if (!existingPaths.has(candidate)) {
- return candidate;
- }
- }
- // Absolute fallback: use full path (should be unique)
- return filepath;
- }
- // Rerank documents using node-llama-cpp cross-encoder model
- async function rerank(query: string, documents: { file: string; text: string }[], _model: string = DEFAULT_RERANK_MODEL, _db?: Database): Promise<{ file: string; score: number }[]> {
- if (documents.length === 0) return [];
- const total = documents.length;
- process.stderr.write(`Reranking ${total} documents...\n`);
- progress.indeterminate();
- const llm = getDefaultLlamaCpp();
- const rerankDocs: RerankDocument[] = documents.map((doc) => ({
- file: doc.file,
- text: doc.text.slice(0, 4000), // Truncate to context limit
- }));
- const result = await llm.rerank(query, rerankDocs);
- progress.clear();
- process.stderr.write("\n");
- return result.results.map((r) => ({ file: r.file, score: r.score }));
- }
- function formatTimeAgo(date: Date): string {
- const seconds = Math.floor((Date.now() - date.getTime()) / 1000);
- if (seconds < 60) return `${seconds}s ago`;
- const minutes = Math.floor(seconds / 60);
- if (minutes < 60) return `${minutes}m ago`;
- const hours = Math.floor(minutes / 60);
- if (hours < 24) return `${hours}h ago`;
- const days = Math.floor(hours / 24);
- return `${days}d ago`;
- }
- function formatBytes(bytes: number): string {
- if (bytes < 1024) return `${bytes} B`;
- if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
- if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
- return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
- }
- function showStatus(): void {
- const dbPath = getDbPath();
- const db = getDb();
- // Cleanup any duplicate collections
- cleanupDuplicateCollections(db);
- // Index size
- let indexSize = 0;
- try {
- const stat = Bun.file(dbPath).size;
- indexSize = stat;
- } catch {}
- // Collections info (from YAML + database stats)
- const collections = listCollections(db);
- // Overall stats
- const totalDocs = db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get() as { count: number };
- const vectorCount = db.prepare(`SELECT COUNT(*) as count FROM content_vectors`).get() as { count: number };
- const needsEmbedding = getHashesNeedingEmbedding(db);
- // Most recent update across all collections
- const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get() as { latest: string | null };
- console.log(`${c.bold}QMD Status${c.reset}\n`);
- console.log(`Index: ${dbPath}`);
- console.log(`Size: ${formatBytes(indexSize)}\n`);
- console.log(`${c.bold}Documents${c.reset}`);
- console.log(` Total: ${totalDocs.count} files indexed`);
- console.log(` Vectors: ${vectorCount.count} embedded`);
- if (needsEmbedding > 0) {
- console.log(` ${c.yellow}Pending: ${needsEmbedding} need embedding${c.reset} (run 'qmd embed')`);
- }
- if (mostRecent.latest) {
- const lastUpdate = new Date(mostRecent.latest);
- console.log(` Updated: ${formatTimeAgo(lastUpdate)}`);
- }
- // Get all contexts grouped by collection (from YAML)
- const allContexts = listAllContexts();
- const contextsByCollection = new Map<string, { path_prefix: string; context: string }[]>();
- for (const ctx of allContexts) {
- // Group contexts by collection name
- if (!contextsByCollection.has(ctx.collection)) {
- contextsByCollection.set(ctx.collection, []);
- }
- contextsByCollection.get(ctx.collection)!.push({
- path_prefix: ctx.path,
- context: ctx.context
- });
- }
- if (collections.length > 0) {
- console.log(`\n${c.bold}Collections${c.reset}`);
- for (const col of collections) {
- const lastMod = col.last_modified ? formatTimeAgo(new Date(col.last_modified)) : "never";
- const contexts = contextsByCollection.get(col.name) || [];
- console.log(` ${c.cyan}${col.name}${c.reset} ${c.dim}(qmd://${col.name}/)${c.reset}`);
- console.log(` ${c.dim}Pattern:${c.reset} ${col.glob_pattern}`);
- console.log(` ${c.dim}Files:${c.reset} ${col.active_count} (updated ${lastMod})`);
- if (contexts.length > 0) {
- console.log(` ${c.dim}Contexts:${c.reset} ${contexts.length}`);
- for (const ctx of contexts) {
- // Handle both empty string and '/' as root context
- const pathDisplay = (ctx.path_prefix === '' || ctx.path_prefix === '/') ? '/' : `/${ctx.path_prefix}`;
- const contextPreview = ctx.context.length > 60
- ? ctx.context.substring(0, 57) + '...'
- : ctx.context;
- console.log(` ${c.dim}${pathDisplay}:${c.reset} ${contextPreview}`);
- }
- }
- }
- // Show examples of virtual paths
- console.log(`\n${c.bold}Examples${c.reset}`);
- console.log(` ${c.dim}# List files in a collection${c.reset}`);
- if (collections.length > 0) {
- console.log(` qmd ls ${collections[0].name}`);
- }
- console.log(` ${c.dim}# Get a document${c.reset}`);
- if (collections.length > 0) {
- console.log(` qmd get qmd://${collections[0].name}/path/to/file.md`);
- }
- console.log(` ${c.dim}# Search within a collection${c.reset}`);
- if (collections.length > 0) {
- console.log(` qmd search "query" -c ${collections[0].name}`);
- }
- } else {
- console.log(`\n${c.dim}No collections. Run 'qmd collection add .' to index markdown files.${c.reset}`);
- }
- closeDb();
- }
- async function updateCollections(): Promise<void> {
- const db = getDb();
- cleanupDuplicateCollections(db);
- // Clear Ollama cache on update
- clearCache(db);
- const collections = listCollections(db);
- if (collections.length === 0) {
- console.log(`${c.dim}No collections found. Run 'qmd collection add .' to index markdown files.${c.reset}`);
- closeDb();
- return;
- }
- // Don't close db here - indexFiles will reuse it and close at the end
- console.log(`${c.bold}Updating ${collections.length} collection(s)...${c.reset}\n`);
- for (let i = 0; i < collections.length; i++) {
- const col = collections[i];
- console.log(`${c.cyan}[${i + 1}/${collections.length}]${c.reset} ${c.bold}${col.name}${c.reset} ${c.dim}(${col.glob_pattern})${c.reset}`);
- // Execute custom update command if specified in YAML
- const yamlCol = getCollectionFromYaml(col.name);
- if (yamlCol?.update) {
- console.log(`${c.dim} Running update command: ${yamlCol.update}${c.reset}`);
- try {
- const proc = Bun.spawn(["/usr/bin/env", "bash", "-c", yamlCol.update], {
- cwd: col.pwd,
- stdout: "pipe",
- stderr: "pipe",
- });
- const output = await new Response(proc.stdout).text();
- const errorOutput = await new Response(proc.stderr).text();
- const exitCode = await proc.exited;
- if (output.trim()) {
- console.log(output.trim().split('\n').map(l => ` ${l}`).join('\n'));
- }
- if (errorOutput.trim()) {
- console.log(errorOutput.trim().split('\n').map(l => ` ${l}`).join('\n'));
- }
- if (exitCode !== 0) {
- console.log(`${c.yellow}✗ Update command failed with exit code ${exitCode}${c.reset}`);
- process.exit(exitCode);
- }
- } catch (err) {
- console.log(`${c.yellow}✗ Update command failed: ${err}${c.reset}`);
- process.exit(1);
- }
- }
- await indexFiles(col.pwd, col.glob_pattern, col.name);
- console.log("");
- }
- console.log(`${c.green}✓ All collections updated.${c.reset}`);
- }
- /**
- * Detect which collection (if any) contains the given filesystem path.
- * Returns { collectionId, collectionName, relativePath } or null if not in any collection.
- */
- function detectCollectionFromPath(db: Database, fsPath: string): { collectionName: string; relativePath: string } | null {
- const realPath = getRealPath(fsPath);
- // Find collections that this path is under from YAML
- const allCollections = yamlListCollections();
- // Find longest matching path
- let bestMatch: { name: string; path: string } | null = null;
- for (const coll of allCollections) {
- if (realPath.startsWith(coll.path + '/') || realPath === coll.path) {
- if (!bestMatch || coll.path.length > bestMatch.path.length) {
- bestMatch = { name: coll.name, path: coll.path };
- }
- }
- }
- if (!bestMatch) return null;
- // Calculate relative path
- let relativePath = realPath;
- if (relativePath.startsWith(bestMatch.path + '/')) {
- relativePath = relativePath.slice(bestMatch.path.length + 1);
- } else if (relativePath === bestMatch.path) {
- relativePath = '';
- }
- return {
- collectionName: bestMatch.name,
- relativePath
- };
- }
- async function contextAdd(pathArg: string | undefined, contextText: string): Promise<void> {
- const db = getDb();
- // Handle "/" as global context (applies to all collections)
- if (pathArg === '/') {
- setGlobalContext(contextText);
- console.log(`${c.green}✓${c.reset} Set global context`);
- console.log(`${c.dim}Context: ${contextText}${c.reset}`);
- closeDb();
- return;
- }
- // Resolve path - defaults to current directory if not provided
- let fsPath = pathArg || '.';
- if (fsPath === '.' || fsPath === './') {
- fsPath = getPwd();
- } else if (fsPath.startsWith('~/')) {
- fsPath = homedir() + fsPath.slice(1);
- } else if (!fsPath.startsWith('/') && !fsPath.startsWith('qmd://')) {
- fsPath = resolve(getPwd(), fsPath);
- }
- // Handle virtual paths (qmd://collection/path)
- if (isVirtualPath(fsPath)) {
- const parsed = parseVirtualPath(fsPath);
- if (!parsed) {
- console.error(`${c.yellow}Invalid virtual path: ${fsPath}${c.reset}`);
- process.exit(1);
- }
- const coll = getCollectionFromYaml(parsed.collectionName);
- if (!coll) {
- console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`);
- process.exit(1);
- }
- yamlAddContext(parsed.collectionName, parsed.path, contextText);
- const displayPath = parsed.path
- ? `qmd://${parsed.collectionName}/${parsed.path}`
- : `qmd://${parsed.collectionName}/ (collection root)`;
- console.log(`${c.green}✓${c.reset} Added context for: ${displayPath}`);
- console.log(`${c.dim}Context: ${contextText}${c.reset}`);
- closeDb();
- return;
- }
- // Detect collection from filesystem path
- const detected = detectCollectionFromPath(db, fsPath);
- if (!detected) {
- console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`);
- console.error(`${c.dim}Run 'qmd status' to see indexed collections${c.reset}`);
- process.exit(1);
- }
- yamlAddContext(detected.collectionName, detected.relativePath, contextText);
- const displayPath = detected.relativePath ? `qmd://${detected.collectionName}/${detected.relativePath}` : `qmd://${detected.collectionName}/`;
- console.log(`${c.green}✓${c.reset} Added context for: ${displayPath}`);
- console.log(`${c.dim}Context: ${contextText}${c.reset}`);
- closeDb();
- }
- function contextList(): void {
- const db = getDb();
- const allContexts = listAllContexts();
- if (allContexts.length === 0) {
- console.log(`${c.dim}No contexts configured. Use 'qmd context add' to add one.${c.reset}`);
- closeDb();
- return;
- }
- console.log(`\n${c.bold}Configured Contexts${c.reset}\n`);
- let lastCollection = '';
- for (const ctx of allContexts) {
- if (ctx.collection !== lastCollection) {
- console.log(`${c.cyan}${ctx.collection}${c.reset}`);
- lastCollection = ctx.collection;
- }
- const displayPath = ctx.path ? ` ${ctx.path}` : ' / (root)';
- console.log(`${displayPath}`);
- console.log(` ${c.dim}${ctx.context}${c.reset}`);
- }
- closeDb();
- }
- function contextRemove(pathArg: string): void {
- if (pathArg === '/') {
- // Remove global context
- setGlobalContext(undefined);
- console.log(`${c.green}✓${c.reset} Removed global context`);
- return;
- }
- // Handle virtual paths
- if (isVirtualPath(pathArg)) {
- const parsed = parseVirtualPath(pathArg);
- if (!parsed) {
- console.error(`${c.yellow}Invalid virtual path: ${pathArg}${c.reset}`);
- process.exit(1);
- }
- const coll = getCollectionFromYaml(parsed.collectionName);
- if (!coll) {
- console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`);
- process.exit(1);
- }
- const success = yamlRemoveContext(coll.name, parsed.path);
- if (!success) {
- console.error(`${c.yellow}No context found for: ${pathArg}${c.reset}`);
- process.exit(1);
- }
- console.log(`${c.green}✓${c.reset} Removed context for: ${pathArg}`);
- return;
- }
- // Handle filesystem paths
- let fsPath = pathArg;
- if (fsPath === '.' || fsPath === './') {
- fsPath = getPwd();
- } else if (fsPath.startsWith('~/')) {
- fsPath = homedir() + fsPath.slice(1);
- } else if (!fsPath.startsWith('/')) {
- fsPath = resolve(getPwd(), fsPath);
- }
- const db = getDb();
- const detected = detectCollectionFromPath(db, fsPath);
- closeDb();
- if (!detected) {
- console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`);
- process.exit(1);
- }
- const success = yamlRemoveContext(detected.collectionName, detected.relativePath);
- if (!success) {
- console.error(`${c.yellow}No context found for: qmd://${detected.collectionName}/${detected.relativePath}${c.reset}`);
- process.exit(1);
- }
- console.log(`${c.green}✓${c.reset} Removed context for: qmd://${detected.collectionName}/${detected.relativePath}`);
- }
- function contextCheck(): void {
- const db = getDb();
- // Get collections without any context
- const collectionsWithoutContext = getCollectionsWithoutContext(db);
- // Get all collections to check for missing path contexts
- const allCollections = listCollections(db);
- if (collectionsWithoutContext.length === 0 && allCollections.length > 0) {
- // Check if all collections have contexts
- console.log(`\n${c.green}✓${c.reset} ${c.bold}All collections have context configured${c.reset}\n`);
- }
- if (collectionsWithoutContext.length > 0) {
- console.log(`\n${c.yellow}Collections without any context:${c.reset}\n`);
- for (const coll of collectionsWithoutContext) {
- console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(${coll.doc_count} documents)${c.reset}`);
- console.log(` ${c.dim}Suggestion: qmd context add qmd://${coll.name}/ "Description of ${coll.name}"${c.reset}\n`);
- }
- }
- // Check for top-level paths without context within collections that DO have context
- const collectionsWithContext = allCollections.filter(c =>
- !collectionsWithoutContext.some(cwc => cwc.id === c.id)
- );
- let hasPathSuggestions = false;
- for (const coll of collectionsWithContext) {
- const missingPaths = getTopLevelPathsWithoutContext(db, coll.id);
- if (missingPaths.length > 0) {
- if (!hasPathSuggestions) {
- console.log(`${c.yellow}Top-level directories without context:${c.reset}\n`);
- hasPathSuggestions = true;
- }
- console.log(`${c.cyan}${coll.name}${c.reset}`);
- for (const path of missingPaths) {
- console.log(` ${path}`);
- console.log(` ${c.dim}Suggestion: qmd context add qmd://${coll.name}/${path} "Description of ${path}"${c.reset}`);
- }
- console.log('');
- }
- }
- if (collectionsWithoutContext.length === 0 && !hasPathSuggestions) {
- console.log(`${c.dim}All collections and major paths have context configured.${c.reset}`);
- console.log(`${c.dim}Use 'qmd context list' to see all configured contexts.${c.reset}\n`);
- }
- closeDb();
- }
- function getDocument(filename: string, fromLine?: number, maxLines?: number, lineNumbers?: boolean): void {
- const db = getDb();
- // Parse :linenum suffix from filename (e.g., "file.md:100")
- let inputPath = filename;
- const colonMatch = inputPath.match(/:(\d+)$/);
- if (colonMatch && !fromLine) {
- fromLine = parseInt(colonMatch[1], 10);
- inputPath = inputPath.slice(0, -colonMatch[0].length);
- }
- let doc: { collectionName: string; path: string; body: string } | null = null;
- let virtualPath: string;
- // Handle virtual paths (qmd://collection/path)
- if (isVirtualPath(inputPath)) {
- const parsed = parseVirtualPath(inputPath);
- if (!parsed) {
- console.error(`Invalid virtual path: ${inputPath}`);
- closeDb();
- process.exit(1);
- }
- // Try exact match on collection + path
- doc = db.prepare(`
- SELECT d.collection as collectionName, d.path, content.doc as body
- FROM documents d
- JOIN content ON content.hash = d.hash
- WHERE d.collection = ? AND d.path = ? AND d.active = 1
- `).get(parsed.collectionName, parsed.path) as typeof doc;
- if (!doc) {
- // Try fuzzy match by path ending
- doc = db.prepare(`
- SELECT d.collection as collectionName, d.path, content.doc as body
- FROM documents d
- JOIN content ON content.hash = d.hash
- WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
- LIMIT 1
- `).get(parsed.collectionName, `%${parsed.path}`) as typeof doc;
- }
- virtualPath = inputPath;
- } else {
- // Try to interpret as collection/path format first (before filesystem path)
- // If path is relative (no / or ~ prefix), check if first component is a collection name
- if (!inputPath.startsWith('/') && !inputPath.startsWith('~')) {
- const parts = inputPath.split('/');
- if (parts.length >= 2) {
- const possibleCollection = parts[0];
- const possiblePath = parts.slice(1).join('/');
- // Check if this collection exists
- const collExists = db.prepare(`
- SELECT 1 FROM documents WHERE collection = ? AND active = 1 LIMIT 1
- `).get(possibleCollection);
- if (collExists) {
- // Try exact match on collection + path
- doc = db.prepare(`
- SELECT d.collection as collectionName, d.path, content.doc as body
- FROM documents d
- JOIN content ON content.hash = d.hash
- WHERE d.collection = ? AND d.path = ? AND d.active = 1
- `).get(possibleCollection, possiblePath) as typeof doc;
- if (!doc) {
- // Try fuzzy match by path ending
- doc = db.prepare(`
- SELECT d.collection as collectionName, d.path, content.doc as body
- FROM documents d
- JOIN content ON content.hash = d.hash
- WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
- LIMIT 1
- `).get(possibleCollection, `%${possiblePath}`) as typeof doc;
- }
- if (doc) {
- virtualPath = buildVirtualPath(doc.collectionName, doc.path);
- // Skip the filesystem path handling below
- }
- }
- }
- }
- // If not found as collection/path, handle as filesystem paths
- if (!doc) {
- let fsPath = inputPath;
- // Expand ~ to home directory
- if (fsPath.startsWith('~/')) {
- fsPath = homedir() + fsPath.slice(1);
- } else if (!fsPath.startsWith('/')) {
- // Relative path - resolve from current directory
- fsPath = resolve(getPwd(), fsPath);
- }
- fsPath = getRealPath(fsPath);
- // Try to detect which collection contains this path
- const detected = detectCollectionFromPath(db, fsPath);
- if (detected) {
- // Found collection - query by collection name + relative path
- doc = db.prepare(`
- SELECT d.collection as collectionName, d.path, content.doc as body
- FROM documents d
- JOIN content ON content.hash = d.hash
- WHERE d.collection = ? AND d.path = ? AND d.active = 1
- `).get(detected.collectionName, detected.relativePath) as typeof doc;
- }
- // Fuzzy match by filename (last component of path)
- if (!doc) {
- const filename = inputPath.split('/').pop() || inputPath;
- doc = db.prepare(`
- SELECT d.collection as collectionName, d.path, content.doc as body
- FROM documents d
- JOIN content ON content.hash = d.hash
- WHERE d.path LIKE ? AND d.active = 1
- LIMIT 1
- `).get(`%${filename}`) as typeof doc;
- }
- if (doc) {
- virtualPath = buildVirtualPath(doc.collectionName, doc.path);
- } else {
- virtualPath = inputPath;
- }
- }
- }
- if (!doc) {
- console.error(`Document not found: ${filename}`);
- closeDb();
- process.exit(1);
- }
- // Get context for this file
- const context = getContextForPath(db, doc.collectionName, doc.path);
- let output = doc.body;
- const startLine = fromLine || 1;
- // Apply line filtering if specified
- if (fromLine !== undefined || maxLines !== undefined) {
- const lines = output.split('\n');
- const start = startLine - 1; // Convert to 0-indexed
- const end = maxLines !== undefined ? start + maxLines : lines.length;
- output = lines.slice(start, end).join('\n');
- }
- // Add line numbers if requested
- if (lineNumbers) {
- output = addLineNumbers(output, startLine);
- }
- // Output context header if exists
- if (context) {
- console.log(`Folder Context: ${context}\n---\n`);
- }
- console.log(output);
- closeDb();
- }
- // Multi-get: fetch multiple documents by glob pattern or comma-separated list
- function multiGet(pattern: string, maxLines?: number, maxBytes: number = DEFAULT_MULTI_GET_MAX_BYTES, format: OutputFormat = "cli"): void {
- const db = getDb();
- // Check if it's a comma-separated list or a glob pattern
- const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?');
- let files: { filepath: string; displayPath: string; bodyLength: number; collection?: string; path?: string }[];
- if (isCommaSeparated) {
- // Comma-separated list of files (can be virtual paths or relative paths)
- const names = pattern.split(',').map(s => s.trim()).filter(Boolean);
- files = [];
- for (const name of names) {
- let doc: { virtual_path: string; body_length: number; collection: string; path: string } | null = null;
- // Handle virtual paths
- if (isVirtualPath(name)) {
- const parsed = parseVirtualPath(name);
- if (parsed) {
- // Try exact match on collection + path
- doc = db.prepare(`
- SELECT
- 'qmd://' || d.collection || '/' || d.path as virtual_path,
- LENGTH(content.doc) as body_length,
- d.collection,
- d.path
- FROM documents d
- JOIN content ON content.hash = d.hash
- WHERE d.collection = ? AND d.path = ? AND d.active = 1
- `).get(parsed.collectionName, parsed.path) as typeof doc;
- }
- } else {
- // Try exact match on path
- doc = db.prepare(`
- SELECT
- 'qmd://' || d.collection || '/' || d.path as virtual_path,
- LENGTH(content.doc) as body_length,
- d.collection,
- d.path
- FROM documents d
- JOIN content ON content.hash = d.hash
- WHERE d.path = ? AND d.active = 1
- LIMIT 1
- `).get(name) as typeof doc;
- // Try suffix match
- if (!doc) {
- doc = db.prepare(`
- SELECT
- 'qmd://' || d.collection || '/' || d.path as virtual_path,
- LENGTH(content.doc) as body_length,
- d.collection,
- d.path
- FROM documents d
- JOIN content ON content.hash = d.hash
- WHERE d.path LIKE ? AND d.active = 1
- LIMIT 1
- `).get(`%${name}`) as typeof doc;
- }
- }
- if (doc) {
- files.push({
- filepath: doc.virtual_path,
- displayPath: doc.virtual_path,
- bodyLength: doc.body_length,
- collection: doc.collection,
- path: doc.path
- });
- } else {
- console.error(`File not found: ${name}`);
- }
- }
- } else {
- // Glob pattern - matchFilesByGlob now returns virtual paths
- files = matchFilesByGlob(db, pattern).map(f => ({
- ...f,
- collection: undefined, // Will be fetched later if needed
- path: undefined
- }));
- if (files.length === 0) {
- console.error(`No files matched pattern: ${pattern}`);
- closeDb();
- process.exit(1);
- }
- }
- // Collect results for structured output
- const results: { file: string; displayPath: string; title: string; body: string; context: string | null; skipped: boolean; skipReason?: string }[] = [];
- for (const file of files) {
- // Parse virtual path to get collection info if not already available
- let collection = file.collection;
- let path = file.path;
- if (!collection || !path) {
- const parsed = parseVirtualPath(file.filepath);
- if (parsed) {
- collection = parsed.collectionName;
- path = parsed.path;
- }
- }
- // Get context using collection-scoped function
- const context = collection && path ? getContextForPath(db, collection, path) : null;
- // Check size limit
- if (file.bodyLength > maxBytes) {
- results.push({
- file: file.filepath,
- displayPath: file.displayPath,
- title: file.displayPath.split('/').pop() || file.displayPath,
- body: "",
- context,
- skipped: true,
- skipReason: `File too large (${Math.round(file.bodyLength / 1024)}KB > ${Math.round(maxBytes / 1024)}KB). Use 'qmd get ${file.displayPath}' to retrieve.`,
- });
- continue;
- }
- // Fetch document content using collection and path
- if (!collection || !path) continue;
- const doc = db.prepare(`
- SELECT content.doc as body, d.title
- FROM documents d
- JOIN content ON content.hash = d.hash
- WHERE d.collection = ? AND d.path = ? AND d.active = 1
- `).get(collection, path) as { body: string; title: string } | null;
- if (!doc) continue;
- let body = doc.body;
- // Apply line limit if specified
- if (maxLines !== undefined) {
- const lines = body.split('\n');
- body = lines.slice(0, maxLines).join('\n');
- if (lines.length > maxLines) {
- body += `\n\n[... truncated ${lines.length - maxLines} more lines]`;
- }
- }
- results.push({
- file: file.filepath,
- displayPath: file.displayPath,
- title: doc.title || file.displayPath.split('/').pop() || file.displayPath,
- body,
- context,
- skipped: false,
- });
- }
- closeDb();
- // Output based on format
- if (format === "json") {
- const output = results.map(r => ({
- file: r.displayPath,
- title: r.title,
- ...(r.context && { context: r.context }),
- ...(r.skipped ? { skipped: true, reason: r.skipReason } : { body: r.body }),
- }));
- console.log(JSON.stringify(output, null, 2));
- } else if (format === "csv") {
- const escapeField = (val: string | null): string => {
- if (val === null || val === undefined) return "";
- const str = String(val);
- if (str.includes(",") || str.includes('"') || str.includes("\n")) {
- return `"${str.replace(/"/g, '""')}"`;
- }
- return str;
- };
- console.log("file,title,context,skipped,body");
- for (const r of results) {
- console.log([r.displayPath, r.title, r.context || "", r.skipped ? "true" : "false", r.skipped ? r.skipReason : r.body].map(escapeField).join(","));
- }
- } else if (format === "files") {
- for (const r of results) {
- const ctx = r.context ? `,"${r.context.replace(/"/g, '""')}"` : "";
- const status = r.skipped ? "[SKIPPED]" : "";
- console.log(`${r.displayPath}${ctx}${status ? `,${status}` : ""}`);
- }
- } else if (format === "md") {
- for (const r of results) {
- console.log(`## ${r.displayPath}\n`);
- if (r.title && r.title !== r.displayPath) console.log(`**Title:** ${r.title}\n`);
- if (r.context) console.log(`**Context:** ${r.context}\n`);
- if (r.skipped) {
- console.log(`> ${r.skipReason}\n`);
- } else {
- console.log("```");
- console.log(r.body);
- console.log("```\n");
- }
- }
- } else if (format === "xml") {
- console.log('<?xml version="1.0" encoding="UTF-8"?>');
- console.log("<documents>");
- for (const r of results) {
- console.log(" <document>");
- console.log(` <file>${escapeXml(r.displayPath)}</file>`);
- console.log(` <title>${escapeXml(r.title)}</title>`);
- if (r.context) console.log(` <context>${escapeXml(r.context)}</context>`);
- if (r.skipped) {
- console.log(` <skipped>true</skipped>`);
- console.log(` <reason>${escapeXml(r.skipReason || "")}</reason>`);
- } else {
- console.log(` <body>${escapeXml(r.body)}</body>`);
- }
- console.log(" </document>");
- }
- console.log("</documents>");
- } else {
- // CLI format (default)
- for (const r of results) {
- console.log(`\n${'='.repeat(60)}`);
- console.log(`File: ${r.displayPath}`);
- console.log(`${'='.repeat(60)}\n`);
- if (r.skipped) {
- console.log(`[SKIPPED: ${r.skipReason}]`);
- continue;
- }
- if (r.context) {
- console.log(`Folder Context: ${r.context}\n---\n`);
- }
- console.log(r.body);
- }
- }
- }
- // List files in virtual file tree
- function listFiles(pathArg?: string): void {
- const db = getDb();
- if (!pathArg) {
- // No argument - list all collections
- const yamlCollections = yamlListCollections();
- if (yamlCollections.length === 0) {
- console.log("No collections found. Run 'qmd add .' to index files.");
- closeDb();
- return;
- }
- // Get file counts from database for each collection
- const collections = yamlCollections.map(coll => {
- const stats = db.prepare(`
- SELECT COUNT(*) as file_count
- FROM documents d
- WHERE d.collection = ? AND d.active = 1
- `).get(coll.name) as { file_count: number } | null;
- return {
- name: coll.name,
- file_count: stats?.file_count || 0
- };
- });
- console.log(`${c.bold}Collections:${c.reset}\n`);
- for (const coll of collections) {
- console.log(` ${c.dim}qmd://${c.reset}${c.cyan}${coll.name}/${c.reset} ${c.dim}(${coll.file_count} files)${c.reset}`);
- }
- closeDb();
- return;
- }
- // Parse the path argument
- let collectionName: string;
- let pathPrefix: string | null = null;
- if (pathArg.startsWith('qmd://')) {
- // Virtual path format: qmd://collection/path
- const parsed = parseVirtualPath(pathArg);
- if (!parsed) {
- console.error(`Invalid virtual path: ${pathArg}`);
- closeDb();
- process.exit(1);
- }
- collectionName = parsed.collectionName;
- pathPrefix = parsed.path;
- } else {
- // Just collection name or collection/path
- const parts = pathArg.split('/');
- collectionName = parts[0];
- if (parts.length > 1) {
- pathPrefix = parts.slice(1).join('/');
- }
- }
- // Get the collection
- const coll = getCollectionFromYaml(collectionName);
- if (!coll) {
- console.error(`Collection not found: ${collectionName}`);
- console.error(`Run 'qmd ls' to see available collections.`);
- closeDb();
- process.exit(1);
- }
- // List files in the collection with size and modification time
- let query: string;
- let params: any[];
- if (pathPrefix) {
- // List files under a specific path
- query = `
- SELECT d.path, d.title, d.modified_at, LENGTH(ct.doc) as size
- FROM documents d
- JOIN content ct ON d.hash = ct.hash
- WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
- ORDER BY d.path
- `;
- params = [coll.name, `${pathPrefix}%`];
- } else {
- // List all files in the collection
- query = `
- SELECT d.path, d.title, d.modified_at, LENGTH(ct.doc) as size
- FROM documents d
- JOIN content ct ON d.hash = ct.hash
- WHERE d.collection = ? AND d.active = 1
- ORDER BY d.path
- `;
- params = [coll.name];
- }
- const files = db.prepare(query).all(...params) as { path: string; title: string; modified_at: string; size: number }[];
- if (files.length === 0) {
- if (pathPrefix) {
- console.log(`No files found under qmd://${collectionName}/${pathPrefix}`);
- } else {
- console.log(`No files found in collection: ${collectionName}`);
- }
- closeDb();
- return;
- }
- // Calculate max widths for alignment
- const maxSize = Math.max(...files.map(f => formatBytes(f.size).length));
- // Output in ls -l style
- for (const file of files) {
- const sizeStr = formatBytes(file.size).padStart(maxSize);
- const date = new Date(file.modified_at);
- const timeStr = formatLsTime(date);
- // Dim the qmd:// prefix, highlight the filename
- console.log(`${sizeStr} ${timeStr} ${c.dim}qmd://${collectionName}/${c.reset}${c.cyan}${file.path}${c.reset}`);
- }
- closeDb();
- }
- // Format date/time like ls -l
- function formatLsTime(date: Date): string {
- const now = new Date();
- const sixMonthsAgo = new Date(now.getTime() - 6 * 30 * 24 * 60 * 60 * 1000);
- const months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
- const month = months[date.getMonth()];
- const day = date.getDate().toString().padStart(2, ' ');
- // If file is older than 6 months, show year instead of time
- if (date < sixMonthsAgo) {
- const year = date.getFullYear();
- return `${month} ${day} ${year}`;
- } else {
- const hours = date.getHours().toString().padStart(2, '0');
- const minutes = date.getMinutes().toString().padStart(2, '0');
- return `${month} ${day} ${hours}:${minutes}`;
- }
- }
- // Collection management commands
- function collectionList(): void {
- const db = getDb();
- const collections = listCollections(db);
- if (collections.length === 0) {
- console.log("No collections found. Run 'qmd add .' to create one.");
- closeDb();
- return;
- }
- console.log(`${c.bold}Collections (${collections.length}):${c.reset}\n`);
- for (const coll of collections) {
- const updatedAt = new Date(coll.updated_at);
- const timeAgo = formatTimeAgo(updatedAt);
- console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(qmd://${coll.name}/)${c.reset}`);
- console.log(` ${c.dim}Pattern:${c.reset} ${coll.glob_pattern}`);
- console.log(` ${c.dim}Files:${c.reset} ${coll.active_count}`);
- console.log(` ${c.dim}Updated:${c.reset} ${timeAgo}`);
- console.log();
- }
- closeDb();
- }
- async function collectionAdd(pwd: string, globPattern: string, name?: string): Promise<void> {
- // If name not provided, generate from pwd basename
- if (!name) {
- const parts = pwd.split('/').filter(Boolean);
- name = parts[parts.length - 1] || 'root';
- }
- // Check if collection with this name already exists in YAML
- const existing = getCollectionFromYaml(name);
- if (existing) {
- console.error(`${c.yellow}Collection '${name}' already exists.${c.reset}`);
- console.error(`Use a different name with --name <name>`);
- process.exit(1);
- }
- // Check if a collection with this pwd+glob already exists in YAML
- const allCollections = yamlListCollections();
- const existingPwdGlob = allCollections.find(c => c.path === pwd && c.pattern === globPattern);
- if (existingPwdGlob) {
- console.error(`${c.yellow}A collection already exists for this path and pattern:${c.reset}`);
- console.error(` Name: ${existingPwdGlob.name} (qmd://${existingPwdGlob.name}/)`);
- console.error(` Pattern: ${globPattern}`);
- console.error(`\nUse 'qmd update' to re-index it, or remove it first with 'qmd collection remove ${existingPwdGlob.name}'`);
- process.exit(1);
- }
- // Add to YAML config
- const { addCollection } = await import("./collections.js");
- addCollection(name, pwd, globPattern);
- // Create the collection and index files
- console.log(`Creating collection '${name}'...`);
- await indexFiles(pwd, globPattern, name);
- console.log(`${c.green}✓${c.reset} Collection '${name}' created successfully`);
- }
- function collectionRemove(name: string): void {
- // Check if collection exists in YAML
- const coll = getCollectionFromYaml(name);
- if (!coll) {
- console.error(`${c.yellow}Collection not found: ${name}${c.reset}`);
- console.error(`Run 'qmd collection list' to see available collections.`);
- process.exit(1);
- }
- const db = getDb();
- const result = removeCollection(db, name);
- closeDb();
- console.log(`${c.green}✓${c.reset} Removed collection '${name}'`);
- console.log(` Deleted ${result.deletedDocs} documents`);
- if (result.cleanedHashes > 0) {
- console.log(` Cleaned up ${result.cleanedHashes} orphaned content hashes`);
- }
- }
- function collectionRename(oldName: string, newName: string): void {
- // Check if old collection exists in YAML
- const coll = getCollectionFromYaml(oldName);
- if (!coll) {
- console.error(`${c.yellow}Collection not found: ${oldName}${c.reset}`);
- console.error(`Run 'qmd collection list' to see available collections.`);
- process.exit(1);
- }
- // Check if new name already exists in YAML
- const existing = getCollectionFromYaml(newName);
- if (existing) {
- console.error(`${c.yellow}Collection name already exists: ${newName}${c.reset}`);
- console.error(`Choose a different name or remove the existing collection first.`);
- process.exit(1);
- }
- const db = getDb();
- renameCollection(db, oldName, newName);
- closeDb();
- console.log(`${c.green}✓${c.reset} Renamed collection '${oldName}' to '${newName}'`);
- console.log(` Virtual paths updated: ${c.cyan}qmd://${oldName}/${c.reset} → ${c.cyan}qmd://${newName}/${c.reset}`);
- }
- async function indexFiles(pwd?: string, globPattern: string = DEFAULT_GLOB, collectionName?: string): Promise<void> {
- const db = getDb();
- const resolvedPwd = pwd || getPwd();
- const now = new Date().toISOString();
- const excludeDirs = ["node_modules", ".git", ".cache", "vendor", "dist", "build"];
- // Clear Ollama cache on index
- clearCache(db);
- // Collection name must be provided (from YAML)
- if (!collectionName) {
- throw new Error("Collection name is required. Collections must be defined in ~/.config/qmd/index.yml");
- }
- console.log(`Collection: ${resolvedPwd} (${globPattern})`);
- progress.indeterminate();
- const glob = new Glob(globPattern);
- const files: string[] = [];
- for await (const file of glob.scan({ cwd: resolvedPwd, onlyFiles: true, followSymlinks: true })) {
- // Skip node_modules, hidden folders (.*), and other common excludes
- const parts = file.split("/");
- const shouldSkip = parts.some(part =>
- part === "node_modules" ||
- part.startsWith(".") ||
- excludeDirs.includes(part)
- );
- if (!shouldSkip) {
- files.push(file);
- }
- }
- const total = files.length;
- if (total === 0) {
- progress.clear();
- console.log("No files found matching pattern.");
- closeDb();
- return;
- }
- let indexed = 0, updated = 0, unchanged = 0, processed = 0;
- const seenPaths = new Set<string>();
- const startTime = Date.now();
- for (const relativeFile of files) {
- const filepath = getRealPath(resolve(resolvedPwd, relativeFile));
- const path = handelize(relativeFile); // Normalize path for token-friendliness
- seenPaths.add(path);
- const content = await Bun.file(filepath).text();
- const hash = await hashContent(content);
- const title = extractTitle(content, relativeFile);
- // Check if document exists in this collection with this path
- const existing = findActiveDocument(db, collectionName, path);
- if (existing) {
- if (existing.hash === hash) {
- // Hash unchanged, but check if title needs updating
- if (existing.title !== title) {
- updateDocumentTitle(db, existing.id, title, now);
- updated++;
- } else {
- unchanged++;
- }
- } else {
- // Content changed - insert new content hash and update document
- insertContent(db, hash, content, now);
- const stat = await Bun.file(filepath).stat();
- updateDocument(db, existing.id, title, hash,
- stat ? new Date(stat.mtime).toISOString() : now);
- updated++;
- }
- } else {
- // New document - insert content and document
- indexed++;
- insertContent(db, hash, content, now);
- const stat = await Bun.file(filepath).stat();
- insertDocument(db, collectionName, path, title, hash,
- stat ? new Date(stat.birthtime).toISOString() : now,
- stat ? new Date(stat.mtime).toISOString() : now);
- }
- processed++;
- progress.set((processed / total) * 100);
- const elapsed = (Date.now() - startTime) / 1000;
- const rate = processed / elapsed;
- const remaining = (total - processed) / rate;
- const eta = processed > 2 ? ` ETA: ${formatETA(remaining)}` : "";
- process.stderr.write(`\rIndexing: ${processed}/${total}${eta} `);
- }
- // Deactivate documents in this collection that no longer exist
- const allActive = getActiveDocumentPaths(db, collectionName);
- let removed = 0;
- for (const path of allActive) {
- if (!seenPaths.has(path)) {
- deactivateDocument(db, collectionName, path);
- removed++;
- }
- }
- // Clean up orphaned content hashes (content not referenced by any document)
- const orphanedContent = cleanupOrphanedContent(db);
- // Check if vector index needs updating
- const needsEmbedding = getHashesNeedingEmbedding(db);
- progress.clear();
- console.log(`\nIndexed: ${indexed} new, ${updated} updated, ${unchanged} unchanged, ${removed} removed`);
- if (orphanedContent > 0) {
- console.log(`Cleaned up ${orphanedContent} orphaned content hash(es)`);
- }
- if (needsEmbedding > 0) {
- console.log(`\nRun 'qmd embed' to update embeddings (${needsEmbedding} unique hashes need vectors)`);
- }
- closeDb();
- }
- function renderProgressBar(percent: number, width: number = 30): string {
- const filled = Math.round((percent / 100) * width);
- const empty = width - filled;
- const bar = "█".repeat(filled) + "░".repeat(empty);
- return bar;
- }
- async function vectorIndex(model: string = DEFAULT_EMBED_MODEL, force: boolean = false): Promise<void> {
- const db = getDb();
- const now = new Date().toISOString();
- // If force, clear all vectors
- if (force) {
- console.log(`${c.yellow}Force re-indexing: clearing all vectors...${c.reset}`);
- clearAllEmbeddings(db);
- }
- // Find unique hashes that need embedding (from active documents)
- const hashesToEmbed = getHashesForEmbedding(db);
- if (hashesToEmbed.length === 0) {
- console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
- closeDb();
- return;
- }
- // Prepare documents with chunks
- type ChunkItem = { hash: string; title: string; text: string; seq: number; pos: number; tokens: number; bytes: number; displayName: string };
- const allChunks: ChunkItem[] = [];
- let multiChunkDocs = 0;
- // Chunk all documents using actual token counts
- process.stderr.write(`Chunking ${hashesToEmbed.length} documents by token count...\n`);
- for (const item of hashesToEmbed) {
- const encoder = new TextEncoder();
- const bodyBytes = encoder.encode(item.body).length;
- if (bodyBytes === 0) continue; // Skip empty
- const title = extractTitle(item.body, item.path);
- const displayName = item.path;
- const chunks = await chunkDocumentByTokens(item.body); // Uses actual tokenizer
- if (chunks.length > 1) multiChunkDocs++;
- for (let seq = 0; seq < chunks.length; seq++) {
- allChunks.push({
- hash: item.hash,
- title,
- text: chunks[seq].text,
- seq,
- pos: chunks[seq].pos,
- tokens: chunks[seq].tokens,
- bytes: encoder.encode(chunks[seq].text).length,
- displayName,
- });
- }
- }
- if (allChunks.length === 0) {
- console.log(`${c.green}✓ No non-empty documents to embed.${c.reset}`);
- closeDb();
- return;
- }
- const totalBytes = allChunks.reduce((sum, c) => sum + c.bytes, 0);
- const totalChunks = allChunks.length;
- const totalDocs = hashesToEmbed.length;
- console.log(`${c.bold}Embedding ${totalDocs} documents${c.reset} ${c.dim}(${totalChunks} chunks, ${formatBytes(totalBytes)})${c.reset}`);
- if (multiChunkDocs > 0) {
- console.log(`${c.dim}${multiChunkDocs} documents split into multiple chunks${c.reset}`);
- }
- console.log(`${c.dim}Model: ${model}${c.reset}\n`);
- // Hide cursor during embedding
- cursor.hide();
- // Get embedding dimensions from first chunk
- progress.indeterminate();
- const llm = getDefaultLlamaCpp();
- const firstText = formatDocForEmbedding(allChunks[0].text, allChunks[0].title);
- const firstResult = await llm.embed(firstText);
- if (!firstResult) {
- throw new Error("Failed to get embedding dimensions from first chunk");
- }
- ensureVecTable(db, firstResult.embedding.length);
- let chunksEmbedded = 0, errors = 0, bytesProcessed = 0;
- const startTime = Date.now();
- // Batch embedding for better throughput
- // Process in batches of 32 to balance memory usage and efficiency
- const BATCH_SIZE = 32;
- for (let batchStart = 0; batchStart < allChunks.length; batchStart += BATCH_SIZE) {
- const batchEnd = Math.min(batchStart + BATCH_SIZE, allChunks.length);
- const batch = allChunks.slice(batchStart, batchEnd);
- // Format texts for embedding
- const texts = batch.map(chunk => formatDocForEmbedding(chunk.text, chunk.title));
- try {
- // Batch embed all texts at once
- const embeddings = await llm.embedBatch(texts);
- // Insert each embedding
- for (let i = 0; i < batch.length; i++) {
- const chunk = batch[i];
- const embedding = embeddings[i];
- if (embedding) {
- insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now);
- chunksEmbedded++;
- } else {
- errors++;
- console.error(`\n${c.yellow}⚠ Error embedding "${chunk.displayName}" chunk ${chunk.seq}${c.reset}`);
- }
- bytesProcessed += chunk.bytes;
- }
- } catch (err) {
- // If batch fails, try individual embeddings as fallback
- for (const chunk of batch) {
- try {
- const text = formatDocForEmbedding(chunk.text, chunk.title);
- const result = await llm.embed(text);
- if (result) {
- insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now);
- chunksEmbedded++;
- } else {
- errors++;
- }
- } catch (innerErr) {
- errors++;
- console.error(`\n${c.yellow}⚠ Error embedding "${chunk.displayName}" chunk ${chunk.seq}: ${innerErr}${c.reset}`);
- }
- bytesProcessed += chunk.bytes;
- }
- }
- const percent = (bytesProcessed / totalBytes) * 100;
- progress.set(percent);
- const elapsed = (Date.now() - startTime) / 1000;
- const bytesPerSec = bytesProcessed / elapsed;
- const remainingBytes = totalBytes - bytesProcessed;
- const etaSec = remainingBytes / bytesPerSec;
- const bar = renderProgressBar(percent);
- const percentStr = percent.toFixed(0).padStart(3);
- const throughput = `${formatBytes(bytesPerSec)}/s`;
- const eta = elapsed > 2 ? formatETA(etaSec) : "...";
- const errStr = errors > 0 ? ` ${c.yellow}${errors} err${c.reset}` : "";
- process.stderr.write(`\r${c.cyan}${bar}${c.reset} ${c.bold}${percentStr}%${c.reset} ${c.dim}${chunksEmbedded}/${totalChunks}${c.reset}${errStr} ${c.dim}${throughput} ETA ${eta}${c.reset} `);
- }
- progress.clear();
- cursor.show();
- const totalTimeSec = (Date.now() - startTime) / 1000;
- const avgThroughput = formatBytes(totalBytes / totalTimeSec);
- console.log(`\r${c.green}${renderProgressBar(100)}${c.reset} ${c.bold}100%${c.reset} `);
- console.log(`\n${c.green}✓ Done!${c.reset} Embedded ${c.bold}${chunksEmbedded}${c.reset} chunks from ${c.bold}${totalDocs}${c.reset} documents in ${c.bold}${formatETA(totalTimeSec)}${c.reset} ${c.dim}(${avgThroughput}/s)${c.reset}`);
- if (errors > 0) {
- console.log(`${c.yellow}⚠ ${errors} chunks failed${c.reset}`);
- }
- closeDb();
- }
- // Sanitize a term for FTS5: remove punctuation except apostrophes
- function sanitizeFTS5Term(term: string): string {
- // Remove all non-alphanumeric except apostrophes (for contractions like "don't")
- return term.replace(/[^\w']/g, '').trim();
- }
- // Build FTS5 query: phrase-aware with fallback to individual terms
- function buildFTS5Query(query: string): string {
- // Sanitize the full query for phrase matching
- const sanitizedQuery = query.replace(/[^\w\s']/g, '').trim();
- const terms = query
- .split(/\s+/)
- .map(sanitizeFTS5Term)
- .filter(term => term.length >= 2); // Skip single chars and empty
- if (terms.length === 0) return "";
- if (terms.length === 1) return `"${terms[0].replace(/"/g, '""')}"`;
- // Strategy: exact phrase OR proximity match OR individual terms
- // Exact phrase matches rank highest, then close proximity, then any term
- const phrase = `"${sanitizedQuery.replace(/"/g, '""')}"`;
- const quotedTerms = terms.map(t => `"${t.replace(/"/g, '""')}"`);
- // FTS5 NEAR syntax: NEAR(term1 term2, distance)
- const nearPhrase = `NEAR(${quotedTerms.join(' ')}, 10)`;
- const orTerms = quotedTerms.join(' OR ');
- // Exact phrase > proximity > any term
- return `(${phrase}) OR (${nearPhrase}) OR (${orTerms})`;
- }
- // Normalize BM25 score to 0-1 range using sigmoid
- function normalizeBM25(score: number): number {
- // BM25 scores are negative in SQLite (lower = better)
- // Typical range: -15 (excellent) to -2 (weak match)
- // Map to 0-1 where higher is better
- const absScore = Math.abs(score);
- // Sigmoid-ish normalization: maps ~2-15 range to ~0.1-0.95
- return 1 / (1 + Math.exp(-(absScore - 5) / 3));
- }
- function normalizeScores(results: SearchResult[]): SearchResult[] {
- if (results.length === 0) return results;
- const maxScore = Math.max(...results.map(r => r.score));
- const minScore = Math.min(...results.map(r => r.score));
- const range = maxScore - minScore || 1;
- return results.map(r => ({ ...r, score: (r.score - minScore) / range }));
- }
- // Reciprocal Rank Fusion: combines multiple ranked lists
- // RRF score = sum(1 / (k + rank)) across all lists where doc appears
- // k=60 is standard, provides good balance between top and lower ranks
- export type RankedResult = { file: string; displayPath: string; title: string; body: string; score: number };
- function reciprocalRankFusion(
- resultLists: RankedResult[][],
- weights: number[] = [], // Weight per result list (default 1.0)
- k: number = 60
- ): RankedResult[] {
- const scores = new Map<string, { score: number; displayPath: string; title: string; body: string; bestRank: number }>();
- for (let listIdx = 0; listIdx < resultLists.length; listIdx++) {
- const results = resultLists[listIdx];
- const weight = weights[listIdx] ?? 1.0;
- for (let rank = 0; rank < results.length; rank++) {
- const doc = results[rank];
- const rrfScore = weight / (k + rank + 1);
- const existing = scores.get(doc.file);
- if (existing) {
- existing.score += rrfScore;
- existing.bestRank = Math.min(existing.bestRank, rank);
- } else {
- scores.set(doc.file, { score: rrfScore, displayPath: doc.displayPath, title: doc.title, body: doc.body, bestRank: rank });
- }
- }
- }
- // Add bonus for best rank: documents that ranked #1-3 in any list get a boost
- // This prevents dilution of exact matches by expansion queries
- return Array.from(scores.entries())
- .map(([file, { score, displayPath, title, body, bestRank }]) => {
- let bonus = 0;
- if (bestRank === 0) bonus = 0.05; // Ranked #1 somewhere
- else if (bestRank <= 2) bonus = 0.02; // Ranked top-3 somewhere
- return { file, displayPath, title, body, score: score + bonus };
- })
- .sort((a, b) => b.score - a.score);
- }
- type OutputOptions = {
- format: OutputFormat;
- full: boolean;
- limit: number;
- minScore: number;
- all?: boolean;
- collection?: string; // Filter by collection name (pwd suffix match)
- lineNumbers?: boolean; // Add line numbers to output
- };
- // Extract snippet with more context lines for CLI display
- function extractSnippetWithContext(body: string, query: string, contextLines = 3, chunkPos?: number): { line: number; snippet: string; hasMatch: boolean } {
- // If chunkPos provided, focus search on that area
- let lineOffset = 0;
- let searchBody = body;
- if (chunkPos && chunkPos > 0) {
- const contextStart = Math.max(0, chunkPos - 200);
- searchBody = body.slice(contextStart);
- if (contextStart > 0) {
- lineOffset = body.slice(0, contextStart).split('\n').length - 1;
- }
- }
- const lines = searchBody.split('\n');
- const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 0);
- let bestLine = 0, bestScore = -1;
- for (let i = 0; i < lines.length; i++) {
- const lineLower = lines[i].toLowerCase();
- let score = 0;
- for (const term of queryTerms) {
- if (lineLower.includes(term)) score++;
- }
- if (score > bestScore) {
- bestScore = score;
- bestLine = i;
- }
- }
- // No query match found - return beginning of chunk area or file
- if (bestScore <= 0) {
- const preview = lines.slice(0, contextLines * 2).join('\n').trim();
- return { line: lineOffset + 1, snippet: preview, hasMatch: false };
- }
- const startLine = Math.max(0, bestLine - contextLines);
- const endLine = Math.min(lines.length, bestLine + contextLines + 1);
- const snippet = lines.slice(startLine, endLine).join('\n').trim();
- return { line: lineOffset + bestLine + 1, snippet, hasMatch: true };
- }
- // Highlight query terms in text (skip short words < 3 chars)
- function highlightTerms(text: string, query: string): string {
- if (!useColor) return text;
- const terms = query.toLowerCase().split(/\s+/).filter(t => t.length >= 3);
- let result = text;
- for (const term of terms) {
- const regex = new RegExp(`(${term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')})`, 'gi');
- result = result.replace(regex, `${c.yellow}${c.bold}$1${c.reset}`);
- }
- return result;
- }
- // Format score with color based on value
- function formatScore(score: number): string {
- const pct = (score * 100).toFixed(0).padStart(3);
- if (!useColor) return `${pct}%`;
- if (score >= 0.7) return `${c.green}${pct}%${c.reset}`;
- if (score >= 0.4) return `${c.yellow}${pct}%${c.reset}`;
- return `${c.dim}${pct}%${c.reset}`;
- }
- // Shorten directory path for display - relative to $HOME (used for context paths, not documents)
- function shortPath(dirpath: string): string {
- const home = homedir();
- if (dirpath.startsWith(home)) {
- return '~' + dirpath.slice(home.length);
- }
- return dirpath;
- }
- // Add line numbers to text content
- function addLineNumbers(text: string, startLine: number = 1): string {
- const lines = text.split('\n');
- return lines.map((line, i) => `${startLine + i}: ${line}`).join('\n');
- }
- function outputResults(results: { file: string; displayPath: string; title: string; body: string; score: number; context?: string | null; chunkPos?: number; hash?: string; docid?: string }[], query: string, opts: OutputOptions): void {
- const filtered = results.filter(r => r.score >= opts.minScore).slice(0, opts.limit);
- if (filtered.length === 0) {
- console.log("No results found above minimum score threshold.");
- return;
- }
- // Helper to create qmd:// URI from displayPath
- const toQmdPath = (displayPath: string) => `qmd://${displayPath}`;
- if (opts.format === "json") {
- // JSON output for LLM consumption
- const output = filtered.map(row => {
- const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
- let body = opts.full ? row.body : undefined;
- let snippet = !opts.full ? extractSnippet(row.body, query, 300, row.chunkPos).snippet : undefined;
- if (opts.lineNumbers) {
- if (body) body = addLineNumbers(body);
- if (snippet) snippet = addLineNumbers(snippet);
- }
- return {
- ...(docid && { docid: `#${docid}` }),
- score: Math.round(row.score * 100) / 100,
- file: toQmdPath(row.displayPath),
- title: row.title,
- ...(row.context && { context: row.context }),
- ...(body && { body }),
- ...(snippet && { snippet }),
- };
- });
- console.log(JSON.stringify(output, null, 2));
- } else if (opts.format === "files") {
- // Simple docid,score,filepath,context output
- for (const row of filtered) {
- const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
- const ctx = row.context ? `,"${row.context.replace(/"/g, '""')}"` : "";
- console.log(`#${docid},${row.score.toFixed(2)},${toQmdPath(row.displayPath)}${ctx}`);
- }
- } else if (opts.format === "cli") {
- for (let i = 0; i < filtered.length; i++) {
- const row = filtered[i];
- const { line, snippet, hasMatch } = extractSnippetWithContext(row.body, query, 2, row.chunkPos);
- const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
- // Line 1: filepath with docid
- const path = toQmdPath(row.displayPath);
- const lineInfo = hasMatch ? `:${line}` : "";
- const docidStr = docid ? ` ${c.dim}#${docid}${c.reset}` : "";
- console.log(`${c.cyan}${path}${c.dim}${lineInfo}${c.reset}${docidStr}`);
- // Line 2: Title (if available)
- if (row.title) {
- console.log(`${c.bold}Title: ${row.title}${c.reset}`);
- }
- // Line 3: Context (if available)
- if (row.context) {
- console.log(`${c.dim}Context: ${row.context}${c.reset}`);
- }
- // Line 4: Score
- const score = formatScore(row.score);
- console.log(`Score: ${c.bold}${score}${c.reset}`);
- console.log();
- // Snippet with highlighting (no leading | chars for better word wrap)
- let displaySnippet = opts.lineNumbers ? addLineNumbers(snippet, line) : snippet;
- const highlighted = highlightTerms(displaySnippet, query);
- console.log(highlighted);
- // Double empty line between results
- if (i < filtered.length - 1) console.log('\n');
- }
- } else if (opts.format === "md") {
- for (const row of filtered) {
- const heading = row.title || row.displayPath;
- const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
- let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos).snippet;
- if (opts.lineNumbers) {
- content = addLineNumbers(content);
- }
- const docidLine = docid ? `**docid:** \`#${docid}\`\n` : "";
- const contextLine = row.context ? `**context:** ${row.context}\n` : "";
- console.log(`---\n# ${heading}\n${docidLine}${contextLine}\n${content}\n`);
- }
- } else if (opts.format === "xml") {
- for (const row of filtered) {
- const titleAttr = row.title ? ` title="${row.title.replace(/"/g, '"')}"` : "";
- const contextAttr = row.context ? ` context="${row.context.replace(/"/g, '"')}"` : "";
- const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
- let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos).snippet;
- if (opts.lineNumbers) {
- content = addLineNumbers(content);
- }
- console.log(`<file docid="#${docid}" name="${toQmdPath(row.displayPath)}"${titleAttr}${contextAttr}>\n${content}\n</file>\n`);
- }
- } else {
- // CSV format
- console.log("docid,score,file,title,context,line,snippet");
- for (const row of filtered) {
- const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos);
- let content = opts.full ? row.body : snippet;
- if (opts.lineNumbers) {
- content = addLineNumbers(content, line);
- }
- const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
- console.log(`#${docid},${row.score.toFixed(4)},${escapeCSV(toQmdPath(row.displayPath))},${escapeCSV(row.title)},${escapeCSV(row.context || "")},${line},${escapeCSV(content)}`);
- }
- }
- }
- function search(query: string, opts: OutputOptions): void {
- const db = getDb();
- // Validate collection filter if specified
- let collectionName: string | undefined;
- if (opts.collection) {
- const coll = getCollectionFromYaml(opts.collection);
- if (!coll) {
- console.error(`Collection not found: ${opts.collection}`);
- closeDb();
- process.exit(1);
- }
- collectionName = opts.collection;
- }
- // Use large limit for --all, otherwise fetch more than needed and let outputResults filter
- const fetchLimit = opts.all ? 100000 : Math.max(50, opts.limit * 2);
- // searchFTS accepts collection name as number parameter for legacy reasons (will be fixed in store.ts)
- const results = searchFTS(db, query, fetchLimit, collectionName as any);
- // Add context to results
- const resultsWithContext = results.map(r => ({
- ...r,
- context: getContextForFile(db, r.filepath),
- }));
- closeDb();
- if (resultsWithContext.length === 0) {
- console.log("No results found.");
- return;
- }
- outputResults(resultsWithContext, query, opts);
- }
- async function vectorSearch(query: string, opts: OutputOptions, model: string = DEFAULT_EMBED_MODEL): Promise<void> {
- const db = getDb();
- // Validate collection filter if specified
- let collectionName: string | undefined;
- if (opts.collection) {
- const coll = getCollectionFromYaml(opts.collection);
- if (!coll) {
- console.error(`Collection not found: ${opts.collection}`);
- closeDb();
- process.exit(1);
- }
- collectionName = opts.collection;
- }
- const tableExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
- if (!tableExists) {
- console.error("Vector index not found. Run 'qmd embed' first to create embeddings.");
- closeDb();
- return;
- }
- // Check index health and warn about issues
- checkIndexHealth(db);
- // Expand query using structured output (no lexical for vector-only search)
- const expanded = await expandQueryStructured(query, false);
- // Build list of queries for vector search: original, vectorQuery, and hyde
- const vectorQueries: string[] = [query];
- if (expanded.vectorQuery && expanded.vectorQuery !== query) {
- vectorQueries.push(expanded.vectorQuery);
- }
- if (expanded.hyde && expanded.hyde.length > 20) {
- vectorQueries.push(expanded.hyde);
- }
- process.stderr.write(`${c.dim}Searching ${vectorQueries.length} vector queries...${c.reset}\n`);
- // Collect results from all query variations
- const perQueryLimit = opts.all ? 500 : 20;
- const allResults = new Map<string, { file: string; displayPath: string; title: string; body: string; score: number; hash: string }>();
- for (const q of vectorQueries) {
- const vecResults = await searchVec(db, q, model, perQueryLimit, collectionName as any);
- for (const r of vecResults) {
- const existing = allResults.get(r.filepath);
- if (!existing || r.score > existing.score) {
- allResults.set(r.filepath, { file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score, hash: r.hash });
- }
- }
- }
- // Sort by max score and limit to requested count
- const results = Array.from(allResults.values())
- .sort((a, b) => b.score - a.score)
- .slice(0, opts.limit)
- .map(r => ({ ...r, context: getContextForFile(db, r.file) }));
- closeDb();
- if (results.length === 0) {
- console.log("No results found.");
- return;
- }
- outputResults(results, query, { ...opts, limit: results.length }); // Already limited
- }
- // Expand query using structured output with JSON schema grammar
- async function expandQueryStructured(query: string, includeLexical: boolean = true): Promise<ExpandedQuery> {
- process.stderr.write(`${c.dim}Expanding query...${c.reset}\n`);
- const llm = getDefaultLlamaCpp();
- const expanded = await llm.expandQueryStructured(query, includeLexical);
- // Log the expansion as a tree, starting with original query
- const lines: string[] = [];
- const bothLabel = includeLexical ? ' · (lexical+vector)' : ' · (vector)';
- lines.push(`${c.dim}├─ ${query}${bothLabel}${c.reset}`);
- if (expanded.lexicalQuery && expanded.lexicalQuery !== query) {
- lines.push(`${c.dim}├─ ${expanded.lexicalQuery} · (lexical)${c.reset}`);
- }
- if (expanded.vectorQuery && expanded.vectorQuery !== query) {
- lines.push(`${c.dim}├─ ${expanded.vectorQuery} · (vector)${c.reset}`);
- }
- if (expanded.hyde && expanded.hyde.length > 20) {
- // Truncate hyde to first ~60 chars for display
- const hydePreview = expanded.hyde.length > 60
- ? expanded.hyde.substring(0, 60).replace(/\n/g, ' ') + '...'
- : expanded.hyde.replace(/\n/g, ' ');
- lines.push(`${c.dim}├─ ${hydePreview} · (vector)${c.reset}`);
- }
- // Fix last item to use └─ instead of ├─
- if (lines.length > 0) {
- lines[lines.length - 1] = lines[lines.length - 1].replace('├─', '└─');
- }
- for (const line of lines) {
- process.stderr.write(line + '\n');
- }
- return expanded;
- }
- // Legacy wrapper for backward compatibility
- async function expandQuery(query: string, _model: string = DEFAULT_QUERY_MODEL, _db?: Database): Promise<string[]> {
- const expanded = await expandQueryStructured(query, true);
- const queries = [query];
- if (expanded.lexicalQuery && expanded.lexicalQuery !== query) queries.push(expanded.lexicalQuery);
- if (expanded.vectorQuery && expanded.vectorQuery !== query) queries.push(expanded.vectorQuery);
- return queries;
- }
- async function querySearch(query: string, opts: OutputOptions, embedModel: string = DEFAULT_EMBED_MODEL, rerankModel: string = DEFAULT_RERANK_MODEL): Promise<void> {
- const db = getDb();
- // Validate collection filter if specified
- let collectionName: string | undefined;
- if (opts.collection) {
- const coll = getCollectionFromYaml(opts.collection);
- if (!coll) {
- console.error(`Collection not found: ${opts.collection}`);
- closeDb();
- process.exit(1);
- }
- collectionName = opts.collection;
- }
- // Check index health and warn about issues
- checkIndexHealth(db);
- // Expand query using structured output
- const expanded = await expandQueryStructured(query, true);
- // Build query lists for each retrieval type
- const ftsQueries: string[] = [query];
- if (expanded.lexicalQuery && expanded.lexicalQuery !== query) {
- ftsQueries.push(expanded.lexicalQuery);
- }
- const vectorQueries: string[] = [query];
- if (expanded.vectorQuery && expanded.vectorQuery !== query) {
- vectorQueries.push(expanded.vectorQuery);
- }
- if (expanded.hyde && expanded.hyde.length > 20) {
- vectorQueries.push(expanded.hyde);
- }
- process.stderr.write(`${c.dim}Searching ${ftsQueries.length} lexical + ${vectorQueries.length} vector queries...${c.reset}\n`);
- // Collect ranked result lists for RRF fusion
- const rankedLists: RankedResult[][] = [];
- const hasVectors = !!db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
- // Map to store hash by filepath for final results
- const hashMap = new Map<string, string>();
- // FTS searches with lexical queries
- for (const q of ftsQueries) {
- const ftsResults = searchFTS(db, q, 20, collectionName as any);
- if (ftsResults.length > 0) {
- for (const r of ftsResults) hashMap.set(r.filepath, r.hash);
- rankedLists.push(ftsResults.map(r => ({ file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score })));
- }
- }
- // Vector searches with semantic queries + hyde
- if (hasVectors) {
- for (const q of vectorQueries) {
- const vecResults = await searchVec(db, q, embedModel, 20, collectionName as any);
- if (vecResults.length > 0) {
- for (const r of vecResults) hashMap.set(r.filepath, r.hash);
- rankedLists.push(vecResults.map(r => ({ file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score })));
- }
- }
- }
- // Apply Reciprocal Rank Fusion to combine all ranked lists
- // Give 2x weight to original query results (first 2 lists: FTS + vector)
- const weights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
- const fused = reciprocalRankFusion(rankedLists, weights);
- const candidates = fused.slice(0, 30); // Over-retrieve for reranking
- if (candidates.length === 0) {
- console.log("No results found.");
- closeDb();
- return;
- }
- // Rerank chunks, not full documents
- // For each candidate, extract the most relevant chunk to rerank
- const chunksToRerank: { file: string; text: string; chunkIdx: number }[] = [];
- const docChunkMap = new Map<string, { chunks: { text: string; pos: number }[]; bestChunkIdx: number }>();
- for (const c of candidates) {
- const chunks = chunkDocument(c.body);
- if (chunks.length === 1) {
- // Small document - use entire body
- chunksToRerank.push({ file: c.file, text: chunks[0].text, chunkIdx: 0 });
- docChunkMap.set(c.file, { chunks, bestChunkIdx: 0 });
- } else {
- // Find the chunk that best matches the query terms (simple keyword heuristic)
- const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 2);
- let bestIdx = 0;
- let bestScore = 0;
- for (let i = 0; i < chunks.length; i++) {
- const chunkLower = chunks[i].text.toLowerCase();
- const score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0);
- if (score > bestScore) {
- bestScore = score;
- bestIdx = i;
- }
- }
- chunksToRerank.push({ file: c.file, text: chunks[bestIdx].text, chunkIdx: bestIdx });
- docChunkMap.set(c.file, { chunks, bestChunkIdx: bestIdx });
- }
- }
- // Rerank the focused chunks (with caching)
- const reranked = await rerank(
- query,
- chunksToRerank.map(c => ({ file: c.file, text: c.text })),
- rerankModel,
- db
- );
- // Blend RRF position score with reranker score using position-aware weights
- // Top retrieval results get more protection from reranker disagreement
- const candidateMap = new Map(candidates.map(c => [c.file, { displayPath: c.displayPath, title: c.title, body: c.body }]));
- const rrfRankMap = new Map(candidates.map((c, i) => [c.file, i + 1])); // 1-indexed rank
- const finalResults = reranked.map(r => {
- const rrfRank = rrfRankMap.get(r.file) || 30;
- // Position-aware blending: top retrieval results preserved more
- // Rank 1-3: 75% RRF, 25% reranker (trust retrieval for exact matches)
- // Rank 4-10: 60% RRF, 40% reranker
- // Rank 11+: 40% RRF, 60% reranker (trust reranker for lower-ranked)
- let rrfWeight: number;
- if (rrfRank <= 3) {
- rrfWeight = 0.75;
- } else if (rrfRank <= 10) {
- rrfWeight = 0.60;
- } else {
- rrfWeight = 0.40;
- }
- const rrfScore = 1 / rrfRank; // Position-based: 1, 0.5, 0.33...
- const blendedScore = rrfWeight * rrfScore + (1 - rrfWeight) * r.score;
- const candidate = candidateMap.get(r.file);
- // Use the best chunk's text for the body (better for snippets)
- const chunkInfo = docChunkMap.get(r.file);
- const chunkBody = chunkInfo ? chunkInfo.chunks[chunkInfo.bestChunkIdx].text : candidate?.body || "";
- const chunkPos = chunkInfo ? chunkInfo.chunks[chunkInfo.bestChunkIdx].pos : 0;
- return {
- file: r.file,
- displayPath: candidate?.displayPath || "",
- title: candidate?.title || "",
- body: chunkBody,
- chunkPos,
- score: blendedScore,
- context: getContextForFile(db, r.file),
- hash: hashMap.get(r.file) || "",
- };
- }).sort((a, b) => b.score - a.score);
- closeDb();
- outputResults(finalResults, query, opts);
- }
- // Parse CLI arguments using util.parseArgs
- function parseCLI() {
- const { values, positionals } = parseArgs({
- args: Bun.argv.slice(2), // Skip bun and script path
- options: {
- // Global options
- index: { type: "string" },
- help: { type: "boolean", short: "h" },
- // Search options
- n: { type: "string" },
- "min-score": { type: "string" },
- all: { type: "boolean" },
- full: { type: "boolean" },
- csv: { type: "boolean" },
- md: { type: "boolean" },
- xml: { type: "boolean" },
- files: { type: "boolean" },
- json: { type: "boolean" },
- collection: { type: "string", short: "c" }, // Filter by collection
- // Collection options
- name: { type: "string" }, // collection name
- mask: { type: "string" }, // glob pattern
- // Embed options
- force: { type: "boolean", short: "f" },
- // Update options
- pull: { type: "boolean" }, // git pull before update
- // Get options
- l: { type: "string" }, // max lines
- from: { type: "string" }, // start line
- "max-bytes": { type: "string" }, // max bytes for multi-get
- "line-numbers": { type: "boolean" }, // add line numbers to output
- },
- allowPositionals: true,
- strict: false, // Allow unknown options to pass through
- });
- // Set global index name in store
- if (values.index) {
- setCustomIndexName(values.index);
- }
- // Determine output format
- let format: OutputFormat = "cli";
- if (values.csv) format = "csv";
- else if (values.md) format = "md";
- else if (values.xml) format = "xml";
- else if (values.files) format = "files";
- else if (values.json) format = "json";
- // Default limit: 20 for --files/--json, 5 otherwise
- // --all means return all results (use very large limit)
- const defaultLimit = (format === "files" || format === "json") ? 20 : 5;
- const isAll = values.all || false;
- const opts: OutputOptions = {
- format,
- full: values.full || false,
- limit: isAll ? 100000 : (values.n ? parseInt(values.n, 10) || defaultLimit : defaultLimit),
- minScore: values["min-score"] ? parseFloat(values["min-score"]) || 0 : 0,
- all: isAll,
- collection: values.collection as string | undefined,
- lineNumbers: values["line-numbers"] || false,
- };
- return {
- command: positionals[0] || "",
- args: positionals.slice(1),
- query: positionals.slice(1).join(" "),
- opts,
- values,
- };
- }
- function showHelp(): void {
- console.log("Usage:");
- console.log(" qmd collection add [path] --name <name> --mask <pattern> - Create/index collection");
- console.log(" qmd collection list - List all collections with details");
- console.log(" qmd collection remove <name> - Remove a collection by name");
- console.log(" qmd collection rename <old> <new> - Rename a collection");
- console.log(" qmd ls [collection[/path]] - List collections or files in a collection");
- console.log(" qmd context add [path] \"text\" - Add context for path (defaults to current dir)");
- console.log(" qmd context list - List all contexts");
- console.log(" qmd context rm <path> - Remove context");
- console.log(" qmd get <file>[:line] [-l N] [--from N] - Get document (optionally from line, max N lines)");
- console.log(" qmd multi-get <pattern> [-l N] [--max-bytes N] - Get multiple docs by glob or comma-separated list");
- console.log(" qmd status - Show index status and collections");
- console.log(" qmd update [--pull] - Re-index all collections (--pull: git pull first)");
- console.log(" qmd embed [-f] - Create vector embeddings (800 tokens/chunk, 15% overlap)");
- console.log(" qmd cleanup - Remove cache and orphaned data, vacuum DB");
- console.log(" qmd search <query> - Full-text search (BM25)");
- console.log(" qmd vsearch <query> - Vector similarity search");
- console.log(" qmd query <query> - Combined search with query expansion + reranking");
- console.log(" qmd mcp - Start MCP server (for AI agent integration)");
- console.log("");
- console.log("Global options:");
- console.log(" --index <name> - Use custom index name (default: index)");
- console.log("");
- console.log("Search options:");
- console.log(" -n <num> - Number of results (default: 5, or 20 for --files)");
- console.log(" --all - Return all matches (use with --min-score to filter)");
- console.log(" --min-score <num> - Minimum similarity score");
- console.log(" --full - Output full document instead of snippet");
- console.log(" --line-numbers - Add line numbers to output");
- console.log(" --files - Output docid,score,filepath,context (default: 20 results)");
- console.log(" --json - JSON output with snippets (default: 20 results)");
- console.log(" --csv - CSV output with snippets");
- console.log(" --md - Markdown output");
- console.log(" --xml - XML output");
- console.log(" -c, --collection <name> - Filter results to a specific collection");
- console.log("");
- console.log("Multi-get options:");
- console.log(" -l <num> - Maximum lines per file");
- console.log(" --max-bytes <num> - Skip files larger than N bytes (default: 10240)");
- console.log(" --json/--csv/--md/--xml/--files - Output format (same as search)");
- console.log("");
- console.log("Models (auto-downloaded from HuggingFace):");
- console.log(" Embedding: embeddinggemma-300M-Q8_0");
- console.log(" Reranking: qwen3-reranker-0.6b-q8_0");
- console.log(" Generation: Qwen3-0.6B-Q8_0");
- console.log("");
- console.log(`Index: ${getDbPath()}`);
- }
- // Main CLI - only run if this is the main module
- if (import.meta.main) {
- const cli = parseCLI();
- if (!cli.command || cli.values.help) {
- showHelp();
- process.exit(cli.values.help ? 0 : 1);
- }
- switch (cli.command) {
- case "context": {
- const subcommand = cli.args[0];
- if (!subcommand) {
- console.error("Usage: qmd context <add|list|check|rm>");
- console.error("");
- console.error("Commands:");
- console.error(" qmd context add [path] \"text\" - Add context (defaults to current dir)");
- console.error(" qmd context add / \"text\" - Add global context to all collections");
- console.error(" qmd context list - List all contexts");
- console.error(" qmd context check - Check for missing contexts");
- console.error(" qmd context rm <path> - Remove context");
- process.exit(1);
- }
- switch (subcommand) {
- case "add": {
- if (cli.args.length < 2) {
- console.error("Usage: qmd context add [path] \"text\"");
- console.error("");
- console.error("Examples:");
- console.error(" qmd context add \"Context for current directory\"");
- console.error(" qmd context add . \"Context for current directory\"");
- console.error(" qmd context add /subfolder \"Context for subfolder\"");
- console.error(" qmd context add / \"Global context for all collections\"");
- console.error("");
- console.error(" Using virtual paths:");
- console.error(" qmd context add qmd://journals/ \"Context for entire journals collection\"");
- console.error(" qmd context add qmd://journals/2024 \"Context for 2024 journals\"");
- process.exit(1);
- }
- let pathArg: string | undefined;
- let contextText: string;
- // Check if first arg looks like a path or if it's the context text
- const firstArg = cli.args[1];
- const secondArg = cli.args[2];
- if (secondArg) {
- // Two args: path + context
- pathArg = firstArg;
- contextText = cli.args.slice(2).join(" ");
- } else {
- // One arg: context only (use current directory)
- pathArg = undefined;
- contextText = firstArg;
- }
- await contextAdd(pathArg, contextText);
- break;
- }
- case "list": {
- contextList();
- break;
- }
- case "check": {
- contextCheck();
- break;
- }
- case "rm":
- case "remove": {
- if (cli.args.length < 2) {
- console.error("Usage: qmd context rm <path>");
- console.error("Examples:");
- console.error(" qmd context rm /");
- console.error(" qmd context rm qmd://journals/2024");
- process.exit(1);
- }
- contextRemove(cli.args[1]);
- break;
- }
- default:
- console.error(`Unknown subcommand: ${subcommand}`);
- console.error("Available: add, list, check, rm");
- process.exit(1);
- }
- break;
- }
- // Legacy alias for backwards compatibility
- case "add-context": {
- console.error(`${c.yellow}Note: 'qmd add-context' is deprecated. Use 'qmd context add' instead.${c.reset}`);
- if (cli.args.length === 0) {
- console.error("Usage: qmd context add [path] \"text\"");
- process.exit(1);
- }
- let pathArg: string | undefined;
- let contextText: string;
- if (cli.args.length === 1) {
- pathArg = undefined;
- contextText = cli.args[0];
- } else {
- pathArg = cli.args[0];
- contextText = cli.args.slice(1).join(" ");
- }
- await contextAdd(pathArg, contextText);
- break;
- }
- case "get": {
- if (!cli.args[0]) {
- console.error("Usage: qmd get <filepath>[:line] [--from <line>] [-l <lines>] [--line-numbers]");
- process.exit(1);
- }
- const fromLine = cli.values.from ? parseInt(cli.values.from as string, 10) : undefined;
- const maxLines = cli.values.l ? parseInt(cli.values.l as string, 10) : undefined;
- getDocument(cli.args[0], fromLine, maxLines, cli.opts.lineNumbers);
- break;
- }
- case "multi-get": {
- if (!cli.args[0]) {
- console.error("Usage: qmd multi-get <pattern> [-l <lines>] [--max-bytes <bytes>] [--json|--csv|--md|--xml|--files]");
- console.error(" pattern: glob (e.g., 'journals/2025-05*.md') or comma-separated list");
- process.exit(1);
- }
- const maxLinesMulti = cli.values.l ? parseInt(cli.values.l as string, 10) : undefined;
- const maxBytes = cli.values["max-bytes"] ? parseInt(cli.values["max-bytes"] as string, 10) : DEFAULT_MULTI_GET_MAX_BYTES;
- multiGet(cli.args[0], maxLinesMulti, maxBytes, cli.opts.format);
- break;
- }
- case "ls": {
- listFiles(cli.args[0]);
- break;
- }
- case "collection": {
- const subcommand = cli.args[0];
- switch (subcommand) {
- case "list": {
- collectionList();
- break;
- }
- case "add": {
- const pwd = cli.args[1] || getPwd();
- const resolvedPwd = pwd === '.' ? getPwd() : getRealPath(resolve(pwd));
- const globPattern = cli.values.mask as string || DEFAULT_GLOB;
- const name = cli.values.name as string | undefined;
- await collectionAdd(resolvedPwd, globPattern, name);
- break;
- }
- case "remove":
- case "rm": {
- if (!cli.args[1]) {
- console.error("Usage: qmd collection remove <name>");
- console.error(" Use 'qmd collection list' to see available collections");
- process.exit(1);
- }
- collectionRemove(cli.args[1]);
- break;
- }
- case "rename":
- case "mv": {
- if (!cli.args[1] || !cli.args[2]) {
- console.error("Usage: qmd collection rename <old-name> <new-name>");
- console.error(" Use 'qmd collection list' to see available collections");
- process.exit(1);
- }
- collectionRename(cli.args[1], cli.args[2]);
- break;
- }
- default:
- console.error(`Unknown subcommand: ${subcommand}`);
- console.error("Available: list, add, remove, rename");
- process.exit(1);
- }
- break;
- }
- case "status":
- showStatus();
- break;
- case "update":
- await updateCollections();
- break;
- case "embed":
- await vectorIndex(DEFAULT_EMBED_MODEL, cli.values.force || false);
- break;
- case "search":
- if (!cli.query) {
- console.error("Usage: qmd search [options] <query>");
- process.exit(1);
- }
- search(cli.query, cli.opts);
- break;
- case "vsearch":
- if (!cli.query) {
- console.error("Usage: qmd vsearch [options] <query>");
- process.exit(1);
- }
- // Default min-score for vector search is 0.3
- if (!cli.values["min-score"]) {
- cli.opts.minScore = 0.3;
- }
- await vectorSearch(cli.query, cli.opts);
- break;
- case "query":
- if (!cli.query) {
- console.error("Usage: qmd query [options] <query>");
- process.exit(1);
- }
- await querySearch(cli.query, cli.opts);
- break;
- case "mcp": {
- const { startMcpServer } = await import("./mcp.js");
- await startMcpServer();
- break;
- }
- case "cleanup": {
- const db = getDb();
- // 1. Clear llm_cache
- const cacheCount = deleteLLMCache(db);
- console.log(`${c.green}✓${c.reset} Cleared ${cacheCount} cached API responses`);
- // 2. Remove orphaned vectors
- const orphanedVecs = cleanupOrphanedVectors(db);
- if (orphanedVecs > 0) {
- console.log(`${c.green}✓${c.reset} Removed ${orphanedVecs} orphaned embedding chunks`);
- } else {
- console.log(`${c.dim}No orphaned embeddings to remove${c.reset}`);
- }
- // 3. Remove inactive documents
- const inactiveDocs = deleteInactiveDocuments(db);
- if (inactiveDocs > 0) {
- console.log(`${c.green}✓${c.reset} Removed ${inactiveDocs} inactive document records`);
- }
- // 4. Vacuum to reclaim space
- vacuumDatabase(db);
- console.log(`${c.green}✓${c.reset} Database vacuumed`);
- closeDb();
- break;
- }
- default:
- console.error(`Unknown command: ${cli.command}`);
- console.error("Run 'qmd --help' for usage.");
- process.exit(1);
- }
- // Cleanup LlamaCpp instance to prevent NAPI crash on exit
- await disposeDefaultLlamaCpp();
- } // end if (import.meta.main)
|