#!/usr/bin/env node import { openDatabase } from "../db.js"; import fastGlob from "fast-glob"; import { execSync, spawn as nodeSpawn } from "child_process"; import { fileURLToPath } from "url"; import { dirname, join as pathJoin, relative as relativePath } from "path"; import { parseArgs } from "util"; import { readFileSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync } from "fs"; import { createInterface } from "readline/promises"; import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, structuredSearch, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_EMBED_MAX_BATCH_BYTES, DEFAULT_EMBED_MAX_DOCS_PER_BATCH, DEFAULT_RERANK_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, reindexCollection, generateEmbeddings, syncConfigToDb, } from "../store.js"; import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js"; import { formatSearchResults, formatDocuments, escapeXml, escapeCSV, } from "./formatter.js"; import { getCollection as getCollectionFromYaml, listCollections as yamlListCollections, getDefaultCollectionNames, addContext as yamlAddContext, removeContext as yamlRemoveContext, removeCollection as yamlRemoveCollectionFn, renameCollection as yamlRenameCollectionFn, setGlobalContext, listAllContexts, setConfigIndexName, loadConfig, } from "../collections.js"; import { getEmbeddedQmdSkillContent, getEmbeddedQmdSkillFiles } from "../embedded-skills.js"; import { createEmbeddingProvider, resolveProviderKind, ModelMismatchError, } from "../embedding/index.js"; // Enable production mode - allows using default database path // Tests must set INDEX_PATH or use createStore() with explicit path enableProductionMode(); // ============================================================================= // Store/DB lifecycle (no legacy singletons in store.ts) // ============================================================================= let store = null; let storeDbPathOverride; function getStore() { if (!store) { store = createStore(storeDbPathOverride); // Sync YAML config into SQLite store_collections so store.ts reads from DB try { const config = loadConfig(); syncConfigToDb(store.db, config); if (config.models) { setDefaultLlamaCpp(new LlamaCpp({ embedModel: config.models.embed, generateModel: config.models.generate, rerankModel: config.models.rerank, })); } } catch { // Config may not exist yet — that's fine, DB works without it } } return store; } function getDb() { return getStore().db; } /** Re-sync YAML config into SQLite after CLI mutations (add/remove/rename collection, context changes) */ function resyncConfig() { const s = getStore(); try { const config = loadConfig(); // Clear config hash to force re-sync s.db.prepare(`DELETE FROM store_config WHERE key = 'config_hash'`).run(); syncConfigToDb(s.db, config); } catch { // Config may not exist — that's fine } } function closeDb() { if (store) { store.close(); store = null; } } function getDbPath() { return store?.dbPath ?? storeDbPathOverride ?? getDefaultDbPath(); } function setIndexName(name) { let normalizedName = name; // Normalize relative paths to prevent malformed database paths if (name && name.includes('/')) { const { resolve } = require('path'); const { cwd } = require('process'); const absolutePath = resolve(cwd(), name); // Replace path separators with underscores to create a valid filename normalizedName = absolutePath.replace(/\//g, '_').replace(/^_/, ''); } storeDbPathOverride = normalizedName ? getDefaultDbPath(normalizedName) : undefined; // Reset open handle so next use opens the new index closeDb(); } function ensureVecTable(_db, dimensions) { // Store owns the DB; ignore `_db` and ensure vec table on the active store getStore().ensureVecTable(dimensions); } // Terminal colors (respects NO_COLOR env) const useColor = !process.env.NO_COLOR && process.stdout.isTTY; const c = { reset: useColor ? "\x1b[0m" : "", dim: useColor ? "\x1b[2m" : "", bold: useColor ? "\x1b[1m" : "", cyan: useColor ? "\x1b[36m" : "", yellow: useColor ? "\x1b[33m" : "", green: useColor ? "\x1b[32m" : "", magenta: useColor ? "\x1b[35m" : "", blue: useColor ? "\x1b[34m" : "", red: useColor ? "\x1b[31m" : "", }; // Terminal cursor control const cursor = { hide() { process.stderr.write('\x1b[?25l'); }, show() { process.stderr.write('\x1b[?25h'); }, }; // Ensure cursor is restored on exit process.on('SIGINT', () => { cursor.show(); process.exit(130); }); process.on('SIGTERM', () => { cursor.show(); process.exit(143); }); // Terminal progress bar using OSC 9;4 escape sequence (TTY only) const isTTY = process.stderr.isTTY; const progress = { set(percent) { if (isTTY) process.stderr.write(`\x1b]9;4;1;${Math.round(percent)}\x07`); }, clear() { if (isTTY) process.stderr.write(`\x1b]9;4;0\x07`); }, indeterminate() { if (isTTY) process.stderr.write(`\x1b]9;4;3\x07`); }, error() { if (isTTY) process.stderr.write(`\x1b]9;4;2\x07`); }, }; // Format seconds into human-readable ETA function formatETA(seconds) { if (seconds < 60) return `${Math.round(seconds)}s`; if (seconds < 3600) return `${Math.floor(seconds / 60)}m ${Math.round(seconds % 60)}s`; return `${Math.floor(seconds / 3600)}h ${Math.floor((seconds % 3600) / 60)}m`; } // Check index health and print warnings/tips function checkIndexHealth(db) { const { needsEmbedding, totalDocs, daysStale } = getIndexHealth(db); // Warn if many docs need embedding if (needsEmbedding > 0) { const pct = Math.round((needsEmbedding / totalDocs) * 100); if (pct >= 10) { process.stderr.write(`${c.yellow}Warning: ${needsEmbedding} documents (${pct}%) need embeddings. Run 'qmd embed' for better results.${c.reset}\n`); } else { process.stderr.write(`${c.dim}Tip: ${needsEmbedding} documents need embeddings. Run 'qmd embed' to index them.${c.reset}\n`); } } // Check if most recent document update is older than 2 weeks if (daysStale !== null && daysStale >= 14) { process.stderr.write(`${c.dim}Tip: Index last updated ${daysStale} days ago. Run 'qmd update' to refresh.${c.reset}\n`); } } // Compute unique display path for a document // Always include at least parent folder + filename, add more parent dirs until unique function computeDisplayPath(filepath, collectionPath, existingPaths) { // Get path relative to collection (include collection dir name) const collectionDir = collectionPath.replace(/\/$/, ''); const collectionName = collectionDir.split('/').pop() || ''; let relativePath; if (filepath.startsWith(collectionDir + '/')) { // filepath is under collection: use collection name + relative path relativePath = collectionName + filepath.slice(collectionDir.length); } else { // Fallback: just use the filepath relativePath = filepath; } const parts = relativePath.split('/').filter(p => p.length > 0); // Always include at least parent folder + filename (minimum 2 parts if available) // Then add more parent dirs until unique const minParts = Math.min(2, parts.length); for (let i = parts.length - minParts; i >= 0; i--) { const candidate = parts.slice(i).join('/'); if (!existingPaths.has(candidate)) { return candidate; } } // Absolute fallback: use full path (should be unique) return filepath; } function formatTimeAgo(date) { const seconds = Math.floor((Date.now() - date.getTime()) / 1000); if (seconds < 60) return `${seconds}s ago`; const minutes = Math.floor(seconds / 60); if (minutes < 60) return `${minutes}m ago`; const hours = Math.floor(minutes / 60); if (hours < 24) return `${hours}h ago`; const days = Math.floor(hours / 24); return `${days}d ago`; } function formatMs(ms) { if (ms < 1000) return `${ms}ms`; return `${(ms / 1000).toFixed(1)}s`; } function formatBytes(bytes) { if (bytes < 1024) return `${bytes} B`; if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`; } async function showStatus() { const dbPath = getDbPath(); const db = getDb(); // Collections are defined in YAML; no duplicate cleanup needed. // Collections are defined in YAML; no duplicate cleanup needed. // Index size let indexSize = 0; try { const stat = statSync(dbPath).size; indexSize = stat; } catch { } // Collections info (from YAML + database stats) const collections = listCollections(db); // Overall stats const totalDocs = db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get(); const vectorCount = db.prepare(`SELECT COUNT(*) as count FROM content_vectors`).get(); const needsEmbedding = getHashesNeedingEmbedding(db); // Most recent update across all collections const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get(); console.log(`${c.bold}QMD Status${c.reset}\n`); console.log(`Index: ${dbPath}`); console.log(`Size: ${formatBytes(indexSize)}`); // MCP daemon status (check PID file liveness) const mcpCacheDir = process.env.XDG_CACHE_HOME ? resolve(process.env.XDG_CACHE_HOME, "qmd") : resolve(homedir(), ".cache", "qmd"); const mcpPidPath = resolve(mcpCacheDir, "mcp.pid"); if (existsSync(mcpPidPath)) { const mcpPid = parseInt(readFileSync(mcpPidPath, "utf-8").trim()); try { process.kill(mcpPid, 0); console.log(`MCP: ${c.green}running${c.reset} (PID ${mcpPid})`); } catch { unlinkSync(mcpPidPath); // Stale PID file cleaned up silently } } console.log(""); console.log(`${c.bold}Documents${c.reset}`); console.log(` Total: ${totalDocs.count} files indexed`); console.log(` Vectors: ${vectorCount.count} embedded`); if (needsEmbedding > 0) { console.log(` ${c.yellow}Pending: ${needsEmbedding} need embedding${c.reset} (run 'qmd embed')`); } if (mostRecent.latest) { const lastUpdate = new Date(mostRecent.latest); console.log(` Updated: ${formatTimeAgo(lastUpdate)}`); } // Get all contexts grouped by collection (from YAML) const allContexts = listAllContexts(); const contextsByCollection = new Map(); for (const ctx of allContexts) { // Group contexts by collection name if (!contextsByCollection.has(ctx.collection)) { contextsByCollection.set(ctx.collection, []); } contextsByCollection.get(ctx.collection).push({ path_prefix: ctx.path, context: ctx.context }); } // AST chunking status try { const { getASTStatus } = await import("../ast.js"); const ast = await getASTStatus(); console.log(`\n${c.bold}AST Chunking${c.reset}`); if (ast.available) { const ok = ast.languages.filter(l => l.available).map(l => l.language); const fail = ast.languages.filter(l => !l.available); console.log(` Status: ${c.green}active${c.reset}`); console.log(` Languages: ${ok.join(", ")}`); if (fail.length > 0) { for (const f of fail) { console.log(` ${c.yellow}Unavailable: ${f.language} (${f.error})${c.reset}`); } } } else { console.log(` Status: ${c.yellow}unavailable${c.reset} (falling back to regex chunking)`); for (const l of ast.languages) { if (l.error) console.log(` ${c.dim}${l.language}: ${l.error}${c.reset}`); } } } catch { console.log(`\n${c.bold}AST Chunking${c.reset}`); console.log(` Status: ${c.dim}not available${c.reset}`); } if (collections.length > 0) { console.log(`\n${c.bold}Collections${c.reset}`); for (const col of collections) { const lastMod = col.last_modified ? formatTimeAgo(new Date(col.last_modified)) : "never"; const contexts = contextsByCollection.get(col.name) || []; console.log(` ${c.cyan}${col.name}${c.reset} ${c.dim}(qmd://${col.name}/)${c.reset}`); console.log(` ${c.dim}Pattern:${c.reset} ${col.glob_pattern}`); console.log(` ${c.dim}Files:${c.reset} ${col.active_count} (updated ${lastMod})`); if (contexts.length > 0) { console.log(` ${c.dim}Contexts:${c.reset} ${contexts.length}`); for (const ctx of contexts) { // Handle both empty string and '/' as root context const pathDisplay = (ctx.path_prefix === '' || ctx.path_prefix === '/') ? '/' : `/${ctx.path_prefix}`; const contextPreview = ctx.context.length > 60 ? ctx.context.substring(0, 57) + '...' : ctx.context; console.log(` ${c.dim}${pathDisplay}:${c.reset} ${contextPreview}`); } } } // Show examples of virtual paths console.log(`\n${c.bold}Examples${c.reset}`); console.log(` ${c.dim}# List files in a collection${c.reset}`); if (collections.length > 0 && collections[0]) { console.log(` qmd ls ${collections[0].name}`); } console.log(` ${c.dim}# Get a document${c.reset}`); if (collections.length > 0 && collections[0]) { console.log(` qmd get qmd://${collections[0].name}/path/to/file.md`); } console.log(` ${c.dim}# Search within a collection${c.reset}`); if (collections.length > 0 && collections[0]) { console.log(` qmd search "query" -c ${collections[0].name}`); } } else { console.log(`\n${c.dim}No collections. Run 'qmd collection add .' to index markdown files.${c.reset}`); } // Models { // hf:org/repo/file.gguf → https://huggingface.co/org/repo const hfLink = (uri) => { const match = uri.match(/^hf:([^/]+\/[^/]+)\//); return match ? `https://huggingface.co/${match[1]}` : uri; }; console.log(`\n${c.bold}Models${c.reset}`); console.log(` Embedding: ${hfLink(DEFAULT_EMBED_MODEL_URI)}`); console.log(` Reranking: ${hfLink(DEFAULT_RERANK_MODEL_URI)}`); console.log(` Generation: ${hfLink(DEFAULT_GENERATE_MODEL_URI)}`); } // Device / GPU info try { const llm = getDefaultLlamaCpp(); const device = await llm.getDeviceInfo(); console.log(`\n${c.bold}Device${c.reset}`); if (device.gpu) { console.log(` GPU: ${c.green}${device.gpu}${c.reset} (offloading: ${device.gpuOffloading ? 'yes' : 'no'})`); if (device.gpuDevices.length > 0) { // Deduplicate and count GPUs const counts = new Map(); for (const name of device.gpuDevices) { counts.set(name, (counts.get(name) || 0) + 1); } const deviceStr = Array.from(counts.entries()) .map(([name, count]) => count > 1 ? `${count}× ${name}` : name) .join(', '); console.log(` Devices: ${deviceStr}`); } if (device.vram) { console.log(` VRAM: ${formatBytes(device.vram.free)} free / ${formatBytes(device.vram.total)} total`); } } else { console.log(` GPU: ${c.yellow}none${c.reset} (running on CPU — models will be slow)`); console.log(` ${c.dim}Tip: Install CUDA, Vulkan, or Metal support for GPU acceleration.${c.reset}`); } console.log(` CPU: ${device.cpuCores} math cores`); } catch { // Don't fail status if LLM init fails } // Tips section const tips = []; // Check for collections without context const collectionsWithoutContext = collections.filter(col => { const contexts = contextsByCollection.get(col.name) || []; return contexts.length === 0; }); if (collectionsWithoutContext.length > 0) { const names = collectionsWithoutContext.map(c => c.name).slice(0, 3).join(', '); const more = collectionsWithoutContext.length > 3 ? ` +${collectionsWithoutContext.length - 3} more` : ''; tips.push(`Add context to collections for better search results: ${names}${more}`); tips.push(` ${c.dim}qmd context add qmd:/// "What this collection contains"${c.reset}`); tips.push(` ${c.dim}qmd context add qmd:///meeting-notes "Weekly team meeting notes"${c.reset}`); } // Check for collections without update commands const collectionsWithoutUpdate = collections.filter(col => { const yamlCol = getCollectionFromYaml(col.name); return !yamlCol?.update; }); if (collectionsWithoutUpdate.length > 0 && collections.length > 1) { const names = collectionsWithoutUpdate.map(c => c.name).slice(0, 3).join(', '); const more = collectionsWithoutUpdate.length > 3 ? ` +${collectionsWithoutUpdate.length - 3} more` : ''; tips.push(`Add update commands to keep collections fresh: ${names}${more}`); tips.push(` ${c.dim}qmd collection update-cmd 'git stash && git pull --rebase --ff-only && git stash pop'${c.reset}`); } if (tips.length > 0) { console.log(`\n${c.bold}Tips${c.reset}`); for (const tip of tips) { console.log(` ${tip}`); } } closeDb(); } async function updateCollections() { const db = getDb(); const storeInstance = getStore(); // Collections are defined in YAML; no duplicate cleanup needed. // Clear Ollama cache on update clearCache(db); const collections = listCollections(db); if (collections.length === 0) { console.log(`${c.dim}No collections found. Run 'qmd collection add .' to index markdown files.${c.reset}`); closeDb(); return; } console.log(`${c.bold}Updating ${collections.length} collection(s)...${c.reset}\n`); for (let i = 0; i < collections.length; i++) { const col = collections[i]; if (!col) continue; console.log(`${c.cyan}[${i + 1}/${collections.length}]${c.reset} ${c.bold}${col.name}${c.reset} ${c.dim}(${col.glob_pattern})${c.reset}`); // Execute custom update command if specified in YAML const yamlCol = getCollectionFromYaml(col.name); if (yamlCol?.update) { console.log(`${c.dim} Running update command: ${yamlCol.update}${c.reset}`); try { const proc = nodeSpawn("bash", ["-c", yamlCol.update], { cwd: col.pwd, stdio: ["ignore", "pipe", "pipe"], }); const [output, errorOutput, exitCode] = await new Promise((resolve, reject) => { let out = ""; let err = ""; proc.stdout?.on("data", (d) => { out += d.toString(); }); proc.stderr?.on("data", (d) => { err += d.toString(); }); proc.on("error", reject); proc.on("close", (code) => resolve([out, err, code ?? 1])); }); if (output.trim()) { console.log(output.trim().split('\n').map(l => ` ${l}`).join('\n')); } if (errorOutput.trim()) { console.log(errorOutput.trim().split('\n').map(l => ` ${l}`).join('\n')); } if (exitCode !== 0) { console.log(`${c.yellow}✗ Update command failed with exit code ${exitCode}${c.reset}`); process.exit(exitCode); } } catch (err) { console.log(`${c.yellow}✗ Update command failed: ${err}${c.reset}`); process.exit(1); } } const startTime = Date.now(); console.log(`Collection: ${col.pwd} (${col.glob_pattern})`); progress.indeterminate(); const result = await reindexCollection(storeInstance, col.pwd, col.glob_pattern, col.name, { ignorePatterns: yamlCol?.ignore, onProgress: (info) => { progress.set((info.current / info.total) * 100); const elapsed = (Date.now() - startTime) / 1000; const rate = info.current / elapsed; const remaining = (info.total - info.current) / rate; const eta = info.current > 2 ? ` ETA: ${formatETA(remaining)}` : ""; if (isTTY) process.stderr.write(`\rIndexing: ${info.current}/${info.total}${eta} `); }, }); progress.clear(); console.log(`\nIndexed: ${result.indexed} new, ${result.updated} updated, ${result.unchanged} unchanged, ${result.removed} removed`); if (result.orphanedCleaned > 0) { console.log(`Cleaned up ${result.orphanedCleaned} orphaned content hash(es)`); } console.log(""); } // Check if any documents need embedding (show once at end) const needsEmbedding = getHashesNeedingEmbedding(db); closeDb(); console.log(`${c.green}✓ All collections updated.${c.reset}`); if (needsEmbedding > 0) { console.log(`\nRun 'qmd embed' to update embeddings (${needsEmbedding} unique hashes need vectors)`); } } /** * Detect which collection (if any) contains the given filesystem path. * Returns { collectionId, collectionName, relativePath } or null if not in any collection. */ function detectCollectionFromPath(db, fsPath) { const realPath = getRealPath(fsPath); // Find collections that this path is under from YAML const allCollections = yamlListCollections(); // Find longest matching path let bestMatch = null; for (const coll of allCollections) { if (realPath.startsWith(coll.path + '/') || realPath === coll.path) { if (!bestMatch || coll.path.length > bestMatch.path.length) { bestMatch = { name: coll.name, path: coll.path }; } } } if (!bestMatch) return null; // Calculate relative path let relativePath = realPath; if (relativePath.startsWith(bestMatch.path + '/')) { relativePath = relativePath.slice(bestMatch.path.length + 1); } else if (relativePath === bestMatch.path) { relativePath = ''; } return { collectionName: bestMatch.name, relativePath }; } async function contextAdd(pathArg, contextText) { const db = getDb(); // Handle "/" as global context (applies to all collections) if (pathArg === '/') { setGlobalContext(contextText); resyncConfig(); console.log(`${c.green}✓${c.reset} Set global context`); console.log(`${c.dim}Context: ${contextText}${c.reset}`); closeDb(); return; } // Resolve path - defaults to current directory if not provided let fsPath = pathArg || '.'; if (fsPath === '.' || fsPath === './') { fsPath = getPwd(); } else if (fsPath.startsWith('~/')) { fsPath = homedir() + fsPath.slice(1); } else if (!fsPath.startsWith('/') && !fsPath.startsWith('qmd://')) { fsPath = resolve(getPwd(), fsPath); } // Handle virtual paths (qmd://collection/path) if (isVirtualPath(fsPath)) { const parsed = parseVirtualPath(fsPath); if (!parsed) { console.error(`${c.yellow}Invalid virtual path: ${fsPath}${c.reset}`); process.exit(1); } const coll = getCollectionFromYaml(parsed.collectionName); if (!coll) { console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`); process.exit(1); } yamlAddContext(parsed.collectionName, parsed.path, contextText); resyncConfig(); const displayPath = parsed.path ? `qmd://${parsed.collectionName}/${parsed.path}` : `qmd://${parsed.collectionName}/ (collection root)`; console.log(`${c.green}✓${c.reset} Added context for: ${displayPath}`); console.log(`${c.dim}Context: ${contextText}${c.reset}`); closeDb(); return; } // Detect collection from filesystem path const detected = detectCollectionFromPath(db, fsPath); if (!detected) { console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`); console.error(`${c.dim}Run 'qmd status' to see indexed collections${c.reset}`); process.exit(1); } yamlAddContext(detected.collectionName, detected.relativePath, contextText); resyncConfig(); const displayPath = detected.relativePath ? `qmd://${detected.collectionName}/${detected.relativePath}` : `qmd://${detected.collectionName}/`; console.log(`${c.green}✓${c.reset} Added context for: ${displayPath}`); console.log(`${c.dim}Context: ${contextText}${c.reset}`); closeDb(); } function contextList() { const db = getDb(); const allContexts = listAllContexts(); if (allContexts.length === 0) { console.log(`${c.dim}No contexts configured. Use 'qmd context add' to add one.${c.reset}`); closeDb(); return; } console.log(`\n${c.bold}Configured Contexts${c.reset}\n`); let lastCollection = ''; for (const ctx of allContexts) { if (ctx.collection !== lastCollection) { console.log(`${c.cyan}${ctx.collection}${c.reset}`); lastCollection = ctx.collection; } const displayPath = ctx.path ? ` ${ctx.path}` : ' / (root)'; console.log(`${displayPath}`); console.log(` ${c.dim}${ctx.context}${c.reset}`); } closeDb(); } function contextRemove(pathArg) { if (pathArg === '/') { // Remove global context setGlobalContext(undefined); // Resync so SQLite store_config is updated const s = getStore(); resyncConfig(); closeDb(); console.log(`${c.green}✓${c.reset} Removed global context`); return; } // Handle virtual paths if (isVirtualPath(pathArg)) { const parsed = parseVirtualPath(pathArg); if (!parsed) { console.error(`${c.yellow}Invalid virtual path: ${pathArg}${c.reset}`); process.exit(1); } const coll = getCollectionFromYaml(parsed.collectionName); if (!coll) { console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`); process.exit(1); } const success = yamlRemoveContext(coll.name, parsed.path); if (!success) { console.error(`${c.yellow}No context found for: ${pathArg}${c.reset}`); process.exit(1); } console.log(`${c.green}✓${c.reset} Removed context for: ${pathArg}`); return; } // Handle filesystem paths let fsPath = pathArg; if (fsPath === '.' || fsPath === './') { fsPath = getPwd(); } else if (fsPath.startsWith('~/')) { fsPath = homedir() + fsPath.slice(1); } else if (!fsPath.startsWith('/')) { fsPath = resolve(getPwd(), fsPath); } const db = getDb(); const detected = detectCollectionFromPath(db, fsPath); closeDb(); if (!detected) { console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`); process.exit(1); } const success = yamlRemoveContext(detected.collectionName, detected.relativePath); if (!success) { console.error(`${c.yellow}No context found for: qmd://${detected.collectionName}/${detected.relativePath}${c.reset}`); process.exit(1); } console.log(`${c.green}✓${c.reset} Removed context for: qmd://${detected.collectionName}/${detected.relativePath}`); } function getDocument(filename, fromLine, maxLines, lineNumbers) { const db = getDb(); // Parse :linenum suffix from filename (e.g., "file.md:100") let inputPath = filename; const colonMatch = inputPath.match(/:(\d+)$/); if (colonMatch && !fromLine) { const matched = colonMatch[1]; if (matched) { fromLine = parseInt(matched, 10); inputPath = inputPath.slice(0, -colonMatch[0].length); } } // Handle docid lookup (#abc123, abc123, "#abc123", "abc123", etc.) if (isDocid(inputPath)) { const docidMatch = findDocumentByDocid(db, inputPath); if (docidMatch) { inputPath = docidMatch.filepath; } else { console.error(`Document not found: ${filename}`); closeDb(); process.exit(1); } } let doc = null; let virtualPath; // Handle virtual paths (qmd://collection/path) if (isVirtualPath(inputPath)) { const parsed = parseVirtualPath(inputPath); if (!parsed) { console.error(`Invalid virtual path: ${inputPath}`); closeDb(); process.exit(1); } // Try exact match on collection + path doc = db.prepare(` SELECT d.collection as collectionName, d.path, content.doc as body FROM documents d JOIN content ON content.hash = d.hash WHERE d.collection = ? AND d.path = ? AND d.active = 1 `).get(parsed.collectionName, parsed.path); if (!doc) { // Try fuzzy match by path ending doc = db.prepare(` SELECT d.collection as collectionName, d.path, content.doc as body FROM documents d JOIN content ON content.hash = d.hash WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1 LIMIT 1 `).get(parsed.collectionName, `%${parsed.path}`); } virtualPath = inputPath; } else { // Try to interpret as collection/path format first (before filesystem path) // If path is relative (no / or ~ prefix), check if first component is a collection name if (!inputPath.startsWith('/') && !inputPath.startsWith('~')) { const parts = inputPath.split('/'); if (parts.length >= 2) { const possibleCollection = parts[0]; const possiblePath = parts.slice(1).join('/'); // Check if this collection exists const collExists = possibleCollection ? db.prepare(` SELECT 1 FROM documents WHERE collection = ? AND active = 1 LIMIT 1 `).get(possibleCollection) : null; if (collExists) { // Try exact match on collection + path doc = db.prepare(` SELECT d.collection as collectionName, d.path, content.doc as body FROM documents d JOIN content ON content.hash = d.hash WHERE d.collection = ? AND d.path = ? AND d.active = 1 `).get(possibleCollection || "", possiblePath || ""); if (!doc) { // Try fuzzy match by path ending doc = db.prepare(` SELECT d.collection as collectionName, d.path, content.doc as body FROM documents d JOIN content ON content.hash = d.hash WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1 LIMIT 1 `).get(possibleCollection || "", `%${possiblePath}`); } if (doc) { virtualPath = buildVirtualPath(doc.collectionName, doc.path); // Skip the filesystem path handling below } } } } // If not found as collection/path, handle as filesystem paths if (!doc) { let fsPath = inputPath; // Expand ~ to home directory if (fsPath.startsWith('~/')) { fsPath = homedir() + fsPath.slice(1); } else if (!fsPath.startsWith('/')) { // Relative path - resolve from current directory fsPath = resolve(getPwd(), fsPath); } fsPath = getRealPath(fsPath); // Try to detect which collection contains this path const detected = detectCollectionFromPath(db, fsPath); if (detected) { // Found collection - query by collection name + relative path doc = db.prepare(` SELECT d.collection as collectionName, d.path, content.doc as body FROM documents d JOIN content ON content.hash = d.hash WHERE d.collection = ? AND d.path = ? AND d.active = 1 `).get(detected.collectionName, detected.relativePath); } // Fuzzy match by filename (last component of path) if (!doc) { const filename = inputPath.split('/').pop() || inputPath; doc = db.prepare(` SELECT d.collection as collectionName, d.path, content.doc as body FROM documents d JOIN content ON content.hash = d.hash WHERE d.path LIKE ? AND d.active = 1 LIMIT 1 `).get(`%${filename}`); } if (doc) { virtualPath = buildVirtualPath(doc.collectionName, doc.path); } else { virtualPath = inputPath; } } } // Ensure doc is not null before proceeding if (!doc) { console.error(`Document not found: ${filename}`); closeDb(); process.exit(1); } // Get context for this file const context = getContextForPath(db, doc.collectionName, doc.path); let output = doc.body; const startLine = fromLine || 1; // Apply line filtering if specified if (fromLine !== undefined || maxLines !== undefined) { const lines = output.split('\n'); const start = startLine - 1; // Convert to 0-indexed const end = maxLines !== undefined ? start + maxLines : lines.length; output = lines.slice(start, end).join('\n'); } // Add line numbers if requested if (lineNumbers) { output = addLineNumbers(output, startLine); } // Output context header if exists if (context) { console.log(`Folder Context: ${context}\n---\n`); } console.log(output); closeDb(); } // Multi-get: fetch multiple documents by glob pattern or comma-separated list function multiGet(pattern, maxLines, maxBytes = DEFAULT_MULTI_GET_MAX_BYTES, format = "cli") { const db = getDb(); // Check if it's a comma-separated list or a glob pattern const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?') && !pattern.includes('{'); let files; if (isCommaSeparated) { // Comma-separated list of files (can be virtual paths or relative paths) const names = pattern.split(',').map(s => s.trim()).filter(Boolean); files = []; for (const name of names) { let doc = null; // Handle virtual paths if (isVirtualPath(name)) { const parsed = parseVirtualPath(name); if (parsed) { // Try exact match on collection + path doc = db.prepare(` SELECT 'qmd://' || d.collection || '/' || d.path as virtual_path, LENGTH(content.doc) as body_length, d.collection, d.path FROM documents d JOIN content ON content.hash = d.hash WHERE d.collection = ? AND d.path = ? AND d.active = 1 `).get(parsed.collectionName, parsed.path); } } else { // Try exact match on path doc = db.prepare(` SELECT 'qmd://' || d.collection || '/' || d.path as virtual_path, LENGTH(content.doc) as body_length, d.collection, d.path FROM documents d JOIN content ON content.hash = d.hash WHERE d.path = ? AND d.active = 1 LIMIT 1 `).get(name); // Try suffix match if (!doc) { doc = db.prepare(` SELECT 'qmd://' || d.collection || '/' || d.path as virtual_path, LENGTH(content.doc) as body_length, d.collection, d.path FROM documents d JOIN content ON content.hash = d.hash WHERE d.path LIKE ? AND d.active = 1 LIMIT 1 `).get(`%${name}`); } } if (doc) { files.push({ filepath: doc.virtual_path, displayPath: doc.virtual_path, bodyLength: doc.body_length, collection: doc.collection, path: doc.path }); } else { console.error(`File not found: ${name}`); } } } else { // Glob pattern - matchFilesByGlob now returns virtual paths files = matchFilesByGlob(db, pattern).map(f => ({ ...f, collection: undefined, // Will be fetched later if needed path: undefined })); if (files.length === 0) { console.error(`No files matched pattern: ${pattern}`); closeDb(); process.exit(1); } } // Collect results for structured output const results = []; for (const file of files) { // Parse virtual path to get collection info if not already available let collection = file.collection; let path = file.path; if (!collection || !path) { const parsed = parseVirtualPath(file.filepath); if (parsed) { collection = parsed.collectionName; path = parsed.path; } } // Get context using collection-scoped function const context = collection && path ? getContextForPath(db, collection, path) : null; // Check size limit if (file.bodyLength > maxBytes) { results.push({ file: file.filepath, displayPath: file.displayPath, title: file.displayPath.split('/').pop() || file.displayPath, body: "", context, skipped: true, skipReason: `File too large (${Math.round(file.bodyLength / 1024)}KB > ${Math.round(maxBytes / 1024)}KB). Use 'qmd get ${file.displayPath}' to retrieve.`, }); continue; } // Fetch document content using collection and path if (!collection || !path) continue; const doc = db.prepare(` SELECT content.doc as body, d.title FROM documents d JOIN content ON content.hash = d.hash WHERE d.collection = ? AND d.path = ? AND d.active = 1 `).get(collection, path); if (!doc) continue; let body = doc.body; // Apply line limit if specified if (maxLines !== undefined) { const lines = body.split('\n'); body = lines.slice(0, maxLines).join('\n'); if (lines.length > maxLines) { body += `\n\n[... truncated ${lines.length - maxLines} more lines]`; } } results.push({ file: file.filepath, displayPath: file.displayPath, title: doc.title || file.displayPath.split('/').pop() || file.displayPath, body, context, skipped: false, }); } closeDb(); // Output based on format if (format === "json") { const output = results.map(r => ({ file: r.displayPath, title: r.title, ...(r.context && { context: r.context }), ...(r.skipped ? { skipped: true, reason: r.skipReason } : { body: r.body }), })); console.log(JSON.stringify(output, null, 2)); } else if (format === "csv") { const escapeField = (val) => { if (val === null || val === undefined) return ""; const str = String(val); if (str.includes(",") || str.includes('"') || str.includes("\n")) { return `"${str.replace(/"/g, '""')}"`; } return str; }; console.log("file,title,context,skipped,body"); for (const r of results) { console.log([r.displayPath, r.title, r.context, r.skipped ? "true" : "false", r.skipped ? r.skipReason : r.body].map(escapeField).join(",")); } } else if (format === "files") { for (const r of results) { const ctx = r.context ? `,"${r.context.replace(/"/g, '""')}"` : ""; const status = r.skipped ? "[SKIPPED]" : ""; console.log(`${r.displayPath}${ctx}${status ? `,${status}` : ""}`); } } else if (format === "md") { for (const r of results) { console.log(`## ${r.displayPath}\n`); if (r.title && r.title !== r.displayPath) console.log(`**Title:** ${r.title}\n`); if (r.context) console.log(`**Context:** ${r.context}\n`); if (r.skipped) { console.log(`> ${r.skipReason}\n`); } else { console.log("```"); console.log(r.body); console.log("```\n"); } } } else if (format === "xml") { console.log(''); console.log(""); for (const r of results) { console.log(" "); console.log(` ${escapeXml(r.displayPath)}`); console.log(` ${escapeXml(r.title)}`); if (r.context) console.log(` ${escapeXml(r.context)}`); if (r.skipped) { console.log(` true`); console.log(` ${escapeXml(r.skipReason || "")}`); } else { console.log(` ${escapeXml(r.body)}`); } console.log(" "); } console.log(""); } else { // CLI format (default) for (const r of results) { console.log(`\n${'='.repeat(60)}`); console.log(`File: ${r.displayPath}`); console.log(`${'='.repeat(60)}\n`); if (r.skipped) { console.log(`[SKIPPED: ${r.skipReason}]`); continue; } if (r.context) { console.log(`Folder Context: ${r.context}\n---\n`); } console.log(r.body); } } } // List files in virtual file tree function listFiles(pathArg) { const db = getDb(); if (!pathArg) { // No argument - list all collections const yamlCollections = yamlListCollections(); if (yamlCollections.length === 0) { console.log("No collections found. Run 'qmd collection add .' to index files."); closeDb(); return; } // Get file counts from database for each collection const collections = yamlCollections.map(coll => { const stats = db.prepare(` SELECT COUNT(*) as file_count FROM documents d WHERE d.collection = ? AND d.active = 1 `).get(coll.name); return { name: coll.name, file_count: stats?.file_count || 0 }; }); console.log(`${c.bold}Collections:${c.reset}\n`); for (const coll of collections) { console.log(` ${c.dim}qmd://${c.reset}${c.cyan}${coll.name}/${c.reset} ${c.dim}(${coll.file_count} files)${c.reset}`); } closeDb(); return; } // Parse the path argument let collectionName; let pathPrefix = null; if (pathArg.startsWith('qmd://')) { // Virtual path format: qmd://collection/path const parsed = parseVirtualPath(pathArg); if (!parsed) { console.error(`Invalid virtual path: ${pathArg}`); closeDb(); process.exit(1); } collectionName = parsed.collectionName; pathPrefix = parsed.path; } else { // Just collection name or collection/path const parts = pathArg.split('/'); collectionName = parts[0] || ''; if (parts.length > 1) { pathPrefix = parts.slice(1).join('/'); } } // Get the collection const coll = getCollectionFromYaml(collectionName); if (!coll) { console.error(`Collection not found: ${collectionName}`); console.error(`Run 'qmd ls' to see available collections.`); closeDb(); process.exit(1); } // List files in the collection with size and modification time let query; let params; if (pathPrefix) { // List files under a specific path query = ` SELECT d.path, d.title, d.modified_at, LENGTH(ct.doc) as size FROM documents d JOIN content ct ON d.hash = ct.hash WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1 ORDER BY d.path `; params = [coll.name, `${pathPrefix}%`]; } else { // List all files in the collection query = ` SELECT d.path, d.title, d.modified_at, LENGTH(ct.doc) as size FROM documents d JOIN content ct ON d.hash = ct.hash WHERE d.collection = ? AND d.active = 1 ORDER BY d.path `; params = [coll.name]; } const files = db.prepare(query).all(...params); if (files.length === 0) { if (pathPrefix) { console.log(`No files found under qmd://${collectionName}/${pathPrefix}`); } else { console.log(`No files found in collection: ${collectionName}`); } closeDb(); return; } // Calculate max widths for alignment const maxSize = Math.max(...files.map(f => formatBytes(f.size).length)); // Output in ls -l style for (const file of files) { const sizeStr = formatBytes(file.size).padStart(maxSize); const date = new Date(file.modified_at); const timeStr = formatLsTime(date); // Dim the qmd:// prefix, highlight the filename console.log(`${sizeStr} ${timeStr} ${c.dim}qmd://${collectionName}/${c.reset}${c.cyan}${file.path}${c.reset}`); } closeDb(); } // Format date/time like ls -l function formatLsTime(date) { const now = new Date(); const sixMonthsAgo = new Date(now.getTime() - 6 * 30 * 24 * 60 * 60 * 1000); const months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']; const month = months[date.getMonth()]; const day = date.getDate().toString().padStart(2, ' '); // If file is older than 6 months, show year instead of time if (date < sixMonthsAgo) { const year = date.getFullYear(); return `${month} ${day} ${year}`; } else { const hours = date.getHours().toString().padStart(2, '0'); const minutes = date.getMinutes().toString().padStart(2, '0'); return `${month} ${day} ${hours}:${minutes}`; } } // Collection management commands function collectionList() { const db = getDb(); const collections = listCollections(db); if (collections.length === 0) { console.log("No collections found. Run 'qmd collection add .' to create one."); closeDb(); return; } console.log(`${c.bold}Collections (${collections.length}):${c.reset}\n`); for (const coll of collections) { const updatedAt = coll.last_modified ? new Date(coll.last_modified) : new Date(); const timeAgo = formatTimeAgo(updatedAt); // Get YAML config to check includeByDefault const yamlColl = getCollectionFromYaml(coll.name); const excluded = yamlColl?.includeByDefault === false; const excludeTag = excluded ? ` ${c.yellow}[excluded]${c.reset}` : ''; console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(qmd://${coll.name}/)${c.reset}${excludeTag}`); console.log(` ${c.dim}Pattern:${c.reset} ${coll.glob_pattern}`); if (yamlColl?.ignore?.length) { console.log(` ${c.dim}Ignore:${c.reset} ${yamlColl.ignore.join(', ')}`); } console.log(` ${c.dim}Files:${c.reset} ${coll.active_count}`); console.log(` ${c.dim}Updated:${c.reset} ${timeAgo}`); console.log(); } closeDb(); } async function collectionAdd(pwd, globPattern, name) { // If name not provided, generate from pwd basename let collName = name; if (!collName) { const parts = pwd.split('/').filter(Boolean); collName = parts[parts.length - 1] || 'root'; } // Check if collection with this name already exists in YAML const existing = getCollectionFromYaml(collName); if (existing) { console.error(`${c.yellow}Collection '${collName}' already exists.${c.reset}`); console.error(`Use a different name with --name `); process.exit(1); } // Check if a collection with this pwd+glob already exists in YAML const allCollections = yamlListCollections(); const existingPwdGlob = allCollections.find(c => c.path === pwd && c.pattern === globPattern); if (existingPwdGlob) { console.error(`${c.yellow}A collection already exists for this path and pattern:${c.reset}`); console.error(` Name: ${existingPwdGlob.name} (qmd://${existingPwdGlob.name}/)`); console.error(` Pattern: ${globPattern}`); console.error(`\nUse 'qmd update' to re-index it, or remove it first with 'qmd collection remove ${existingPwdGlob.name}'`); process.exit(1); } // Add to YAML config + sync to SQLite const { addCollection } = await import("../collections.js"); addCollection(collName, pwd, globPattern); resyncConfig(); // Create the collection and index files console.log(`Creating collection '${collName}'...`); const newColl = getCollectionFromYaml(collName); await indexFiles(pwd, globPattern, collName, false, newColl?.ignore); console.log(`${c.green}✓${c.reset} Collection '${collName}' created successfully`); } function collectionRemove(name) { // Check if collection exists in YAML const coll = getCollectionFromYaml(name); if (!coll) { console.error(`${c.yellow}Collection not found: ${name}${c.reset}`); console.error(`Run 'qmd collection list' to see available collections.`); process.exit(1); } const db = getDb(); const result = removeCollection(db, name); // Also remove from YAML config yamlRemoveCollectionFn(name); closeDb(); console.log(`${c.green}✓${c.reset} Removed collection '${name}'`); console.log(` Deleted ${result.deletedDocs} documents`); if (result.cleanedHashes > 0) { console.log(` Cleaned up ${result.cleanedHashes} orphaned content hashes`); } } function collectionRename(oldName, newName) { // Check if old collection exists in YAML const coll = getCollectionFromYaml(oldName); if (!coll) { console.error(`${c.yellow}Collection not found: ${oldName}${c.reset}`); console.error(`Run 'qmd collection list' to see available collections.`); process.exit(1); } // Check if new name already exists in YAML const existing = getCollectionFromYaml(newName); if (existing) { console.error(`${c.yellow}Collection name already exists: ${newName}${c.reset}`); console.error(`Choose a different name or remove the existing collection first.`); process.exit(1); } const db = getDb(); renameCollection(db, oldName, newName); // Also rename in YAML config yamlRenameCollectionFn(oldName, newName); closeDb(); console.log(`${c.green}✓${c.reset} Renamed collection '${oldName}' to '${newName}'`); console.log(` Virtual paths updated: ${c.cyan}qmd://${oldName}/${c.reset} → ${c.cyan}qmd://${newName}/${c.reset}`); } async function indexFiles(pwd, globPattern = DEFAULT_GLOB, collectionName, suppressEmbedNotice = false, ignorePatterns) { const db = getDb(); const resolvedPwd = pwd || getPwd(); const now = new Date().toISOString(); const excludeDirs = ["node_modules", ".git", ".cache", "vendor", "dist", "build"]; // Clear Ollama cache on index clearCache(db); // Collection name must be provided (from YAML) if (!collectionName) { throw new Error("Collection name is required. Collections must be defined in ~/.config/qmd/index.yml"); } console.log(`Collection: ${resolvedPwd} (${globPattern})`); progress.indeterminate(); const allIgnore = [ ...excludeDirs.map(d => `**/${d}/**`), ...(ignorePatterns || []), ]; const allFiles = await fastGlob(globPattern, { cwd: resolvedPwd, onlyFiles: true, followSymbolicLinks: false, dot: false, ignore: allIgnore, }); // Filter hidden files/folders (dot: false handles top-level but not nested) const files = allFiles.filter(file => { const parts = file.split("/"); return !parts.some(part => part.startsWith(".")); }); const total = files.length; const hasNoFiles = total === 0; if (hasNoFiles) { progress.clear(); console.log("No files found matching pattern."); // Continue so the deactivation pass can mark previously indexed docs as inactive. } let indexed = 0, updated = 0, unchanged = 0, processed = 0; const seenPaths = new Set(); const startTime = Date.now(); for (const relativeFile of files) { const filepath = getRealPath(resolve(resolvedPwd, relativeFile)); const path = handelize(relativeFile); // Normalize path for token-friendliness seenPaths.add(path); let content; try { content = readFileSync(filepath, "utf-8"); } catch (err) { // Skip files that can't be read (e.g. iCloud evicted files returning EAGAIN) processed++; progress.set((processed / total) * 100); continue; } // Skip empty files - nothing useful to index if (!content.trim()) { processed++; continue; } const hash = await hashContent(content); const title = extractTitle(content, relativeFile); // Check if document exists in this collection with this path const existing = findActiveDocument(db, collectionName, path); if (existing) { if (existing.hash === hash) { // Hash unchanged, but check if title needs updating if (existing.title !== title) { updateDocumentTitle(db, existing.id, title, now); updated++; } else { unchanged++; } } else { // Content changed - insert new content hash and update document insertContent(db, hash, content, now); const stat = statSync(filepath); updateDocument(db, existing.id, title, hash, stat ? new Date(stat.mtime).toISOString() : now); updated++; } } else { // New document - insert content and document indexed++; insertContent(db, hash, content, now); const stat = statSync(filepath); insertDocument(db, collectionName, path, title, hash, stat ? new Date(stat.birthtime).toISOString() : now, stat ? new Date(stat.mtime).toISOString() : now); } processed++; progress.set((processed / total) * 100); const elapsed = (Date.now() - startTime) / 1000; const rate = processed / elapsed; const remaining = (total - processed) / rate; const eta = processed > 2 ? ` ETA: ${formatETA(remaining)}` : ""; if (isTTY) process.stderr.write(`\rIndexing: ${processed}/${total}${eta} `); } // Deactivate documents in this collection that no longer exist const allActive = getActiveDocumentPaths(db, collectionName); let removed = 0; for (const path of allActive) { if (!seenPaths.has(path)) { deactivateDocument(db, collectionName, path); removed++; } } // Clean up orphaned content hashes (content not referenced by any document) const orphanedContent = cleanupOrphanedContent(db); // Check if vector index needs updating const needsEmbedding = getHashesNeedingEmbedding(db); progress.clear(); console.log(`\nIndexed: ${indexed} new, ${updated} updated, ${unchanged} unchanged, ${removed} removed`); if (orphanedContent > 0) { console.log(`Cleaned up ${orphanedContent} orphaned content hash(es)`); } if (needsEmbedding > 0 && !suppressEmbedNotice) { console.log(`\nRun 'qmd embed' to update embeddings (${needsEmbedding} unique hashes need vectors)`); } closeDb(); } function renderProgressBar(percent, width = 30) { const filled = Math.round((percent / 100) * width); const empty = width - filled; const bar = "█".repeat(filled) + "░".repeat(empty); return bar; } function parseEmbedBatchOption(name, value) { if (value === undefined) return undefined; const parsed = Number(value); if (!Number.isInteger(parsed) || parsed < 1) { throw new Error(`${name} must be a positive integer`); } return parsed; } function parseChunkStrategy(value) { if (value === undefined) return undefined; const s = String(value); if (s === "auto" || s === "regex" || s === "function") return s; throw new Error(`--chunk-strategy must be "auto", "regex", or "function" (got "${s}")`); } function parseProviderKind(value) { if (value === undefined) return undefined; const s = String(value).toLowerCase(); if (s === "local" || s === "openai") return s; throw new Error(`--provider must be "local" or "openai" (got "${s}")`); } function parseOptionalPositiveInt(name, value) { if (value === undefined) return undefined; const parsed = Number(value); if (!Number.isInteger(parsed) || parsed < 1) { throw new Error(`${name} must be a positive integer`); } return parsed; } /** * Translate `cli.values` into `CreateEmbeddingProviderOptions`. CLI flags * win over env vars (the factory itself reads env when these are unset). */ function buildProviderOpts(values, providerCliKind) { const endpoint = optionalString(values["embed-endpoint"]); const apiKey = optionalString(values["embed-api-key"]); const modelId = optionalString(values["embed-model-id"]); const upstreamModel = optionalString(values["embed-upstream-model"]); const batchSize = parseOptionalPositiveInt("--embed-batch-size", values["embed-batch-size"]); const timeoutMs = parseOptionalPositiveInt("--embed-timeout-ms", values["embed-timeout-ms"]); // Only build the openai overrides object if the user supplied flags const openai = endpoint || apiKey || modelId || upstreamModel || batchSize !== undefined || timeoutMs !== undefined ? { ...(endpoint !== undefined ? { endpoint } : {}), ...(apiKey !== undefined ? { apiKey } : {}), ...(modelId !== undefined ? { modelId } : {}), ...(upstreamModel !== undefined ? { upstreamModel } : {}), ...(batchSize !== undefined ? { batchSize } : {}), ...(timeoutMs !== undefined ? { timeoutMs } : {}), } : undefined; // CLI flag for auto-fallback wrapping (only meaningful when kind === openai) const autoFallback = values["embed-auto-fallback"] === true ? true : undefined; return { ...(providerCliKind ? { kind: providerCliKind } : {}), ...(openai ? { openai } : {}), ...(autoFallback !== undefined ? { autoFallback } : {}), }; } function optionalString(v) { if (v === undefined || v === null) return undefined; const s = String(v); return s === "" ? undefined : s; } async function vectorIndex(model = DEFAULT_EMBED_MODEL_URI, force = false, batchOptions) { const storeInstance = getStore(); const db = storeInstance.db; if (force) { console.log(`${c.yellow}Force re-indexing: clearing all vectors...${c.reset}`); } // Check if there's work to do before starting const hashesToEmbed = getHashesNeedingEmbedding(db); if (hashesToEmbed === 0 && !force) { console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`); closeDb(); return; } console.log(`${c.dim}Model: ${model}${c.reset}\n`); if (batchOptions?.embedProvider) { const kind = batchOptions.embedProvider.kind; const providerModel = batchOptions.embedProvider.getModelId(); console.log(`${c.dim}Provider: ${kind} (model id "${providerModel}")${c.reset}\n`); } else if (batchOptions?.providerKind) { console.log(`${c.dim}Provider: ${batchOptions.providerKind}${c.reset}\n`); } if (batchOptions?.maxDocsPerBatch !== undefined || batchOptions?.maxBatchBytes !== undefined) { const maxDocsPerBatch = batchOptions.maxDocsPerBatch ?? DEFAULT_EMBED_MAX_DOCS_PER_BATCH; const maxBatchBytes = batchOptions.maxBatchBytes ?? DEFAULT_EMBED_MAX_BATCH_BYTES; console.log(`${c.dim}Batch: ${maxDocsPerBatch} docs / ${formatBytes(maxBatchBytes)}${c.reset}\n`); } cursor.hide(); progress.indeterminate(); const startTime = Date.now(); const result = await generateEmbeddings(storeInstance, { force, model, maxDocsPerBatch: batchOptions?.maxDocsPerBatch, maxBatchBytes: batchOptions?.maxBatchBytes, chunkStrategy: batchOptions?.chunkStrategy, embedProvider: batchOptions?.embedProvider, onProgress: (info) => { if (info.totalBytes === 0) return; const percent = (info.bytesProcessed / info.totalBytes) * 100; progress.set(percent); const elapsed = (Date.now() - startTime) / 1000; const bytesPerSec = info.bytesProcessed / elapsed; const remainingBytes = info.totalBytes - info.bytesProcessed; const etaSec = remainingBytes / bytesPerSec; const bar = renderProgressBar(percent); const percentStr = percent.toFixed(0).padStart(3); const throughput = `${formatBytes(bytesPerSec)}/s`; const eta = elapsed > 2 ? formatETA(etaSec) : "..."; const errStr = info.errors > 0 ? ` ${c.yellow}${info.errors} err${c.reset}` : ""; if (isTTY) process.stderr.write(`\r${c.cyan}${bar}${c.reset} ${c.bold}${percentStr}%${c.reset} ${c.dim}${info.chunksEmbedded}/${info.totalChunks}${c.reset}${errStr} ${c.dim}${throughput} ETA ${eta}${c.reset} `); }, }); progress.clear(); cursor.show(); const totalTimeSec = result.durationMs / 1000; if (result.chunksEmbedded === 0 && result.docsProcessed === 0) { console.log(`${c.green}✓ No non-empty documents to embed.${c.reset}`); } else { console.log(`\r${c.green}${renderProgressBar(100)}${c.reset} ${c.bold}100%${c.reset} `); console.log(`\n${c.green}✓ Done!${c.reset} Embedded ${c.bold}${result.chunksEmbedded}${c.reset} chunks from ${c.bold}${result.docsProcessed}${c.reset} documents in ${c.bold}${formatETA(totalTimeSec)}${c.reset}`); if (result.errors > 0) { console.log(`${c.yellow}⚠ ${result.errors} chunks failed${c.reset}`); } } closeDb(); } // Sanitize a term for FTS5: remove punctuation except apostrophes function sanitizeFTS5Term(term) { // Remove all non-alphanumeric except apostrophes (for contractions like "don't") return term.replace(/[^\w']/g, '').trim(); } // Build FTS5 query: phrase-aware with fallback to individual terms function buildFTS5Query(query) { // Sanitize the full query for phrase matching const sanitizedQuery = query.replace(/[^\w\s']/g, '').trim(); const terms = query .split(/\s+/) .map(sanitizeFTS5Term) .filter(term => term.length >= 2); // Skip single chars and empty if (terms.length === 0) return ""; if (terms.length === 1) return `"${terms[0].replace(/"/g, '""')}"`; // Strategy: exact phrase OR proximity match OR individual terms // Exact phrase matches rank highest, then close proximity, then any term const phrase = `"${sanitizedQuery.replace(/"/g, '""')}"`; const quotedTerms = terms.map(t => `"${t.replace(/"/g, '""')}"`); // FTS5 NEAR syntax: NEAR(term1 term2, distance) const nearPhrase = `NEAR(${quotedTerms.join(' ')}, 10)`; const orTerms = quotedTerms.join(' OR '); // Exact phrase > proximity > any term return `(${phrase}) OR (${nearPhrase}) OR (${orTerms})`; } // Normalize BM25 score to 0-1 range using sigmoid function normalizeBM25(score) { // BM25 scores are negative in SQLite (lower = better) // Typical range: -15 (excellent) to -2 (weak match) // Map to 0-1 where higher is better const absScore = Math.abs(score); // Sigmoid-ish normalization: maps ~2-15 range to ~0.1-0.95 return 1 / (1 + Math.exp(-(absScore - 5) / 3)); } // Highlight query terms in text (skip short words < 3 chars) function highlightTerms(text, query) { if (!useColor) return text; const terms = query.toLowerCase().split(/\s+/).filter(t => t.length >= 3); let result = text; for (const term of terms) { const regex = new RegExp(`(${term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')})`, 'gi'); result = result.replace(regex, `${c.yellow}${c.bold}$1${c.reset}`); } return result; } // Format score with color based on value function formatScore(score) { const pct = (score * 100).toFixed(0).padStart(3); if (!useColor) return `${pct}%`; if (score >= 0.7) return `${c.green}${pct}%${c.reset}`; if (score >= 0.4) return `${c.yellow}${pct}%${c.reset}`; return `${c.dim}${pct}%${c.reset}`; } function formatExplainNumber(value) { return value.toFixed(4); } // Shorten directory path for display - relative to $HOME (used for context paths, not documents) function shortPath(dirpath) { const home = homedir(); if (dirpath.startsWith(home)) { return '~' + dirpath.slice(home.length); } return dirpath; } // Emit format-safe empty output for search commands. function printEmptySearchResults(format, reason = "no_results") { if (format === "json") { console.log("[]"); return; } if (format === "csv") { console.log("docid,score,file,title,context,line,snippet"); return; } if (format === "xml") { console.log(""); return; } if (format === "md" || format === "files") { return; } if (reason === "min_score") { console.log("No results found above minimum score threshold."); return; } console.log("No results found."); } const DEFAULT_EDITOR_URI_TEMPLATE = "vscode://file/{path}:{line}:{col}"; function encodePathForEditorUri(absolutePath) { return encodeURI(absolutePath) .replace(/\?/g, "%3F") .replace(/#/g, "%23"); } function getEditorUriTemplate() { const envTemplate = process.env.QMD_EDITOR_URI?.trim(); if (envTemplate) return envTemplate; try { const config = loadConfig(); const configTemplate = (config.editor_uri || config.editor_uri_template || config.editorUri || (typeof config["editor-uri"] === "string" ? config["editor-uri"] : undefined))?.trim(); if (configTemplate) return configTemplate; } catch { // Ignore config parsing issues and use default template. } return DEFAULT_EDITOR_URI_TEMPLATE; } export function buildEditorUri(template, absolutePath, line, col) { const safeLine = Number.isFinite(line) && line > 0 ? Math.floor(line) : 1; const safeCol = Number.isFinite(col) && col > 0 ? Math.floor(col) : 1; const encodedPath = encodePathForEditorUri(absolutePath); return template .replace(/\{path\}/g, encodedPath) .replace(/\{line\}/g, String(safeLine)) .replace(/\{col\}/g, String(safeCol)) .replace(/\{column\}/g, String(safeCol)); } export function termLink(text, url, isTTY = !!process.stdout.isTTY) { if (!isTTY) return text; return `\x1b]8;;${url}\x07${text}\x1b]8;;\x07`; } function outputResults(results, query, opts) { const filtered = results.filter(r => r.score >= opts.minScore).slice(0, opts.limit); if (filtered.length === 0) { printEmptySearchResults(opts.format, "min_score"); return; } // Helper to create qmd:// URI from displayPath const toQmdPath = (displayPath) => `qmd://${displayPath}`; if (opts.format === "json") { // JSON output for LLM consumption const output = filtered.map(row => { const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined); let body = opts.full ? row.body : undefined; let snippet = !opts.full ? extractSnippet(row.body, query, 300, row.chunkPos, undefined, opts.intent).snippet : undefined; if (opts.lineNumbers) { if (body) body = addLineNumbers(body); if (snippet) snippet = addLineNumbers(snippet); } return { ...(docid && { docid: `#${docid}` }), score: Math.round(row.score * 100) / 100, file: toQmdPath(row.displayPath), title: row.title, ...(row.context && { context: row.context }), ...(body && { body }), ...(snippet && { snippet }), ...(opts.explain && row.explain && { explain: row.explain }), }; }); console.log(JSON.stringify(output, null, 2)); } else if (opts.format === "files") { // Simple docid,score,filepath,context output for (const row of filtered) { const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : ""); const ctx = row.context ? `,"${row.context.replace(/"/g, '""')}"` : ""; console.log(`#${docid},${row.score.toFixed(2)},${toQmdPath(row.displayPath)}${ctx}`); } } else if (opts.format === "cli") { const editorUriTemplate = getEditorUriTemplate(); const linkDb = getDb(); for (let i = 0; i < filtered.length; i++) { const row = filtered[i]; if (!row) continue; const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent); const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined); // Line 1: filepath with docid const virtualPath = row.file.startsWith("qmd://") ? row.file : toQmdPath(row.displayPath); const parsed = parseVirtualPath(virtualPath); const absolutePath = resolveVirtualPath(linkDb, virtualPath); const legacyPath = toQmdPath(row.displayPath); const displayPath = parsed?.path || row.displayPath; // Only show :line if we actually found a term match in the snippet body (exclude header line). const snippetBody = snippet.split("\n").slice(1).join("\n").toLowerCase(); const hasMatch = query.toLowerCase().split(/\s+/).some(t => t.length > 0 && snippetBody.includes(t)); const lineInfo = hasMatch ? `:${line}` : ""; const docidStr = docid ? ` ${c.dim}#${docid}${c.reset}` : ""; if (process.stdout.isTTY && absolutePath && parsed?.path) { const linkLine = hasMatch ? line : 1; const linkTarget = buildEditorUri(editorUriTemplate, absolutePath, linkLine, 1); const clickable = termLink(`${displayPath}${lineInfo}`, linkTarget); console.log(`${c.cyan}${clickable}${c.reset}${docidStr}`); } else { console.log(`${c.cyan}${legacyPath}${c.dim}${lineInfo}${c.reset}${docidStr}`); } // Line 2: Title (if available) if (row.title) { console.log(`${c.bold}Title: ${row.title}${c.reset}`); } // Line 3: Context (if available) if (row.context) { console.log(`${c.dim}Context: ${row.context}${c.reset}`); } // Line 4: Score const score = formatScore(row.score); console.log(`Score: ${c.bold}${score}${c.reset}`); if (opts.explain && row.explain) { const explain = row.explain; const ftsScores = explain.ftsScores.length > 0 ? explain.ftsScores.map(formatExplainNumber).join(", ") : "none"; const vecScores = explain.vectorScores.length > 0 ? explain.vectorScores.map(formatExplainNumber).join(", ") : "none"; const contribSummary = explain.rrf.contributions .slice() .sort((a, b) => b.rrfContribution - a.rrfContribution) .slice(0, 3) .map(c => `${c.source}/${c.queryType}#${c.rank}:${formatExplainNumber(c.rrfContribution)}`) .join(" | "); console.log(`${c.dim}Explain: fts=[${ftsScores}] vec=[${vecScores}]${c.reset}`); console.log(`${c.dim} RRF: total=${formatExplainNumber(explain.rrf.totalScore)} base=${formatExplainNumber(explain.rrf.baseScore)} bonus=${formatExplainNumber(explain.rrf.topRankBonus)} rank=${explain.rrf.rank}${c.reset}`); console.log(`${c.dim} Blend: ${Math.round(explain.rrf.weight * 100)}%*${formatExplainNumber(explain.rrf.positionScore)} + ${Math.round((1 - explain.rrf.weight) * 100)}%*${formatExplainNumber(explain.rerankScore)} = ${formatExplainNumber(explain.blendedScore)}${c.reset}`); if (contribSummary.length > 0) { console.log(`${c.dim} Top RRF contributions: ${contribSummary}${c.reset}`); } } console.log(); // Snippet with highlighting (diff-style header included) let displaySnippet = opts.lineNumbers ? addLineNumbers(snippet, line) : snippet; const highlighted = highlightTerms(displaySnippet, query); console.log(highlighted); // Double empty line between results if (i < filtered.length - 1) console.log('\n'); } } else if (opts.format === "md") { for (let i = 0; i < filtered.length; i++) { const row = filtered[i]; if (!row) continue; const heading = row.title || row.displayPath; const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined); let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent).snippet; if (opts.lineNumbers) { content = addLineNumbers(content); } const docidLine = docid ? `**docid:** \`#${docid}\`\n` : ""; const contextLine = row.context ? `**context:** ${row.context}\n` : ""; console.log(`---\n# ${heading}\n${docidLine}${contextLine}\n${content}\n`); } } else if (opts.format === "xml") { for (const row of filtered) { const titleAttr = row.title ? ` title="${row.title.replace(/"/g, '"')}"` : ""; const contextAttr = row.context ? ` context="${row.context.replace(/"/g, '"')}"` : ""; const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : ""); let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent).snippet; if (opts.lineNumbers) { content = addLineNumbers(content); } console.log(`\n${content}\n\n`); } } else { // CSV format console.log("docid,score,file,title,context,line,snippet"); for (const row of filtered) { const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent); let content = opts.full ? row.body : snippet; if (opts.lineNumbers) { content = addLineNumbers(content, line); } const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : ""); const snippetText = content || ""; console.log(`#${docid},${row.score.toFixed(4)},${escapeCSV(toQmdPath(row.displayPath))},${escapeCSV(row.title || "")},${escapeCSV(row.context || "")},${line},${escapeCSV(snippetText)}`); } } } // Resolve -c collection filter: supports single string, array, or undefined. // Returns validated collection names (exits on unknown collection). function resolveCollectionFilter(raw, useDefaults = false) { // If no filter specified and useDefaults is true, use default collections if (!raw && useDefaults) { return getDefaultCollectionNames(); } if (!raw) return []; const names = Array.isArray(raw) ? raw : [raw]; const validated = []; for (const name of names) { const coll = getCollectionFromYaml(name); if (!coll) { console.error(`Collection not found: ${name}`); closeDb(); process.exit(1); } validated.push(name); } return validated; } // Post-filter results to only include files from specified collections. function filterByCollections(results, collectionNames) { if (collectionNames.length <= 1) return results; const prefixes = collectionNames.map(n => `qmd://${n}/`); return results.filter(r => { const path = r.filepath || r.file || ''; return prefixes.some(p => path.startsWith(p)); }); } function parseStructuredQuery(query) { const rawLines = query.split('\n').map((line, idx) => ({ raw: line, trimmed: line.trim(), number: idx + 1, })).filter(line => line.trimmed.length > 0); if (rawLines.length === 0) return null; const prefixRe = /^(lex|vec|hyde):\s*/i; const expandRe = /^expand:\s*/i; const intentRe = /^intent:\s*/i; const typed = []; let intent; for (const line of rawLines) { if (expandRe.test(line.trimmed)) { if (rawLines.length > 1) { throw new Error(`Line ${line.number} starts with expand:, but query documents cannot mix expand with typed lines. Submit a single expand query instead.`); } const text = line.trimmed.replace(expandRe, '').trim(); if (!text) { throw new Error('expand: query must include text.'); } return null; // treat as standalone expand query } // Parse intent: lines if (intentRe.test(line.trimmed)) { if (intent !== undefined) { throw new Error(`Line ${line.number}: only one intent: line is allowed per query document.`); } const text = line.trimmed.replace(intentRe, '').trim(); if (!text) { throw new Error(`Line ${line.number}: intent: must include text.`); } intent = text; continue; } const match = line.trimmed.match(prefixRe); if (match) { const type = match[1].toLowerCase(); const text = line.trimmed.slice(match[0].length).trim(); if (!text) { throw new Error(`Line ${line.number} (${type}:) must include text.`); } if (/\r|\n/.test(text)) { throw new Error(`Line ${line.number} (${type}:) contains a newline. Keep each query on a single line.`); } typed.push({ type, query: text, line: line.number }); continue; } if (rawLines.length === 1) { // Single plain line -> implicit expand return null; } throw new Error(`Line ${line.number} is missing a lex:/vec:/hyde:/intent: prefix. Each line in a query document must start with one.`); } // intent: alone is not a valid query — must have at least one search if (intent && typed.length === 0) { throw new Error('intent: cannot appear alone. Add at least one lex:, vec:, or hyde: line.'); } return typed.length > 0 ? { searches: typed, intent } : null; } function search(query, opts) { const db = getDb(); // Validate collection filter (supports multiple -c flags) // Use default collections if none specified const collectionNames = resolveCollectionFilter(opts.collection, true); const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined; // Use large limit for --all, otherwise fetch more than needed and let outputResults filter const fetchLimit = opts.all ? 100000 : Math.max(50, opts.limit * 2); const results = filterByCollections(searchFTS(db, query, fetchLimit, singleCollection), collectionNames); // Add context to results const resultsWithContext = results.map(r => ({ file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score, context: getContextForFile(db, r.filepath), hash: r.hash, docid: r.docid, })); closeDb(); if (resultsWithContext.length === 0) { printEmptySearchResults(opts.format); return; } outputResults(resultsWithContext, query, opts); } // Log query expansion as a tree to stderr (CLI progress feedback) function logExpansionTree(originalQuery, expanded) { const lines = []; lines.push(`${c.dim}├─ ${originalQuery}${c.reset}`); for (const q of expanded) { let preview = q.query.replace(/\n/g, ' '); if (preview.length > 72) preview = preview.substring(0, 69) + '...'; lines.push(`${c.dim}├─ ${q.type}: ${preview}${c.reset}`); } if (lines.length > 0) { lines[lines.length - 1] = lines[lines.length - 1].replace('├─', '└─'); } for (const line of lines) process.stderr.write(line + '\n'); } async function vectorSearch(query, opts, _model = DEFAULT_EMBED_MODEL) { const store = getStore(); // Validate collection filter (supports multiple -c flags) // Use default collections if none specified const collectionNames = resolveCollectionFilter(opts.collection, true); const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined; checkIndexHealth(store.db); await withLLMSession(async () => { let results = await vectorSearchQuery(store, query, { collection: singleCollection, limit: opts.all ? 500 : (opts.limit || 10), minScore: opts.minScore || 0.3, intent: opts.intent, hooks: { onExpand: (original, expanded) => { logExpansionTree(original, expanded); process.stderr.write(`${c.dim}Searching ${expanded.length + 1} vector queries...${c.reset}\n`); }, }, }); // Post-filter for multi-collection if (collectionNames.length > 1) { results = results.filter(r => { const prefixes = collectionNames.map(n => `qmd://${n}/`); return prefixes.some(p => r.file.startsWith(p)); }); } closeDb(); if (results.length === 0) { printEmptySearchResults(opts.format); return; } outputResults(results.map(r => ({ file: r.file, displayPath: r.displayPath, title: r.title, body: r.body, score: r.score, context: r.context, docid: r.docid, })), query, { ...opts, limit: results.length }); }, { maxDuration: 10 * 60 * 1000, name: 'vectorSearch' }); } async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rerankModel = DEFAULT_RERANK_MODEL) { const store = getStore(); // Validate collection filter (supports multiple -c flags) // Use default collections if none specified const collectionNames = resolveCollectionFilter(opts.collection, true); const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined; checkIndexHealth(store.db); // Check for structured query syntax (lex:/vec:/hyde:/intent: prefixes) const parsed = parseStructuredQuery(query); // Intent can come from --intent flag or from intent: line in query document const intent = opts.intent || parsed?.intent; await withLLMSession(async () => { let results; if (parsed) { const structuredQueries = parsed.searches; // Structured search — user provided their own query expansions const typeLabels = structuredQueries.map(s => s.type).join('+'); process.stderr.write(`${c.dim}Structured search: ${structuredQueries.length} queries (${typeLabels})${c.reset}\n`); if (intent) { process.stderr.write(`${c.dim}├─ intent: ${intent}${c.reset}\n`); } // Log each sub-query for (const s of structuredQueries) { let preview = s.query.replace(/\n/g, ' '); if (preview.length > 72) preview = preview.substring(0, 69) + '...'; process.stderr.write(`${c.dim}├─ ${s.type}: ${preview}${c.reset}\n`); } process.stderr.write(`${c.dim}└─ Searching...${c.reset}\n`); results = await structuredSearch(store, structuredQueries, { collections: singleCollection ? [singleCollection] : undefined, limit: opts.all ? 500 : (opts.limit || 10), minScore: opts.minScore || 0, candidateLimit: opts.candidateLimit, skipRerank: opts.skipRerank, explain: !!opts.explain, intent, chunkStrategy: opts.chunkStrategy, hooks: { onEmbedStart: (count) => { process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`); }, onEmbedDone: (ms) => { process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`); }, onRerankStart: (chunkCount) => { process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}`); progress.indeterminate(); }, onRerankDone: (ms) => { progress.clear(); process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`); }, }, }); } else { // Standard hybrid query with automatic expansion results = await hybridQuery(store, query, { collection: singleCollection, limit: opts.all ? 500 : (opts.limit || 10), minScore: opts.minScore || 0, candidateLimit: opts.candidateLimit, skipRerank: opts.skipRerank, explain: !!opts.explain, intent, chunkStrategy: opts.chunkStrategy, hooks: { onStrongSignal: (score) => { process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`); }, onExpandStart: () => { process.stderr.write(`${c.dim}Expanding query...${c.reset}`); }, onExpand: (original, expanded, ms) => { process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`); logExpansionTree(original, expanded); process.stderr.write(`${c.dim}Searching ${expanded.length + 1} queries...${c.reset}\n`); }, onEmbedStart: (count) => { process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`); }, onEmbedDone: (ms) => { process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`); }, onRerankStart: (chunkCount) => { process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}`); progress.indeterminate(); }, onRerankDone: (ms) => { progress.clear(); process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`); }, }, }); } // Post-filter for multi-collection if (collectionNames.length > 1) { results = results.filter(r => { const prefixes = collectionNames.map(n => `qmd://${n}/`); return prefixes.some(p => r.file.startsWith(p)); }); } closeDb(); if (results.length === 0) { printEmptySearchResults(opts.format); return; } // Use first lex/vec query for output context, or original query const structuredQueries = parsed?.searches; const displayQuery = structuredQueries ? (structuredQueries.find(s => s.type === 'lex')?.query || structuredQueries.find(s => s.type === 'vec')?.query || query) : query; // Map to CLI output format — use bestChunk for snippet display outputResults(results.map(r => ({ file: r.file, displayPath: r.displayPath, title: r.title, body: r.bestChunk, chunkPos: r.bestChunkPos, score: r.score, context: r.context, docid: r.docid, explain: r.explain, })), displayQuery, { ...opts, limit: results.length }); }, { maxDuration: 10 * 60 * 1000, name: 'querySearch' }); } // Parse CLI arguments using util.parseArgs function parseCLI() { const { values, positionals } = parseArgs({ args: process.argv.slice(2), // Skip node and script path options: { // Global options index: { type: "string", }, context: { type: "string", }, help: { type: "boolean", short: "h" }, version: { type: "boolean", short: "v" }, skill: { type: "boolean" }, global: { type: "boolean" }, yes: { type: "boolean" }, // Search options n: { type: "string" }, "min-score": { type: "string" }, all: { type: "boolean" }, full: { type: "boolean" }, csv: { type: "boolean" }, md: { type: "boolean" }, xml: { type: "boolean" }, files: { type: "boolean" }, json: { type: "boolean" }, explain: { type: "boolean" }, collection: { type: "string", short: "c", multiple: true }, // Filter by collection(s) // Collection options name: { type: "string" }, // collection name mask: { type: "string" }, // glob pattern // Embed options force: { type: "boolean", short: "f" }, "max-docs-per-batch": { type: "string" }, "max-batch-mb": { type: "string" }, provider: { type: "string" }, // "local" | "openai" "embed-endpoint": { type: "string" }, // OpenAI-compatible endpoint URL "embed-api-key": { type: "string" }, // Bearer token "embed-model-id": { type: "string" }, // Stable model id (default: embeddinggemma) "embed-upstream-model": { type: "string" }, // Upstream model name in HTTP body "embed-batch-size": { type: "string" }, // Batch size for HTTP provider "embed-timeout-ms": { type: "string" }, // Per-request timeout "embed-auto-fallback": { type: "boolean" }, // Wrap openai in AutoFallback (local fallback) // Update options pull: { type: "boolean" }, // git pull before update refresh: { type: "boolean" }, // Get options l: { type: "string" }, // max lines from: { type: "string" }, // start line "max-bytes": { type: "string" }, // max bytes for multi-get "line-numbers": { type: "boolean" }, // add line numbers to output // Query options "candidate-limit": { type: "string", short: "C" }, "no-rerank": { type: "boolean", default: false }, intent: { type: "string" }, // Chunking options "chunk-strategy": { type: "string" }, // "regex" (default) or "auto" (AST for code files) // MCP HTTP transport options http: { type: "boolean" }, daemon: { type: "boolean" }, port: { type: "string" }, }, allowPositionals: true, strict: false, // Allow unknown options to pass through }); // Select index name (default: "index") const indexName = values.index; if (indexName) { setIndexName(indexName); setConfigIndexName(indexName); } // Determine output format let format = "cli"; if (values.csv) format = "csv"; else if (values.md) format = "md"; else if (values.xml) format = "xml"; else if (values.files) format = "files"; else if (values.json) format = "json"; // Default limit: 20 for --files/--json, 5 otherwise // --all means return all results (use very large limit) const defaultLimit = (format === "files" || format === "json") ? 20 : 5; const isAll = !!values.all; const opts = { format, full: !!values.full, limit: isAll ? 100000 : (values.n ? parseInt(String(values.n), 10) || defaultLimit : defaultLimit), minScore: values["min-score"] ? parseFloat(String(values["min-score"])) || 0 : 0, all: isAll, collection: values.collection, lineNumbers: !!values["line-numbers"], candidateLimit: values["candidate-limit"] ? parseInt(String(values["candidate-limit"]), 10) : undefined, skipRerank: !!values["no-rerank"], explain: !!values.explain, intent: values.intent, chunkStrategy: parseChunkStrategy(values["chunk-strategy"]), }; return { command: positionals[0] || "", args: positionals.slice(1), query: positionals.slice(1).join(" "), opts, values, }; } function getSkillInstallDir(globalInstall) { return globalInstall ? resolve(homedir(), ".agents", "skills", "qmd") : resolve(getPwd(), ".agents", "skills", "qmd"); } function getClaudeSkillLinkPath(globalInstall) { return globalInstall ? resolve(homedir(), ".claude", "skills", "qmd") : resolve(getPwd(), ".claude", "skills", "qmd"); } function pathExists(path) { try { lstatSync(path); return true; } catch { return false; } } function removePath(path) { const stat = lstatSync(path); if (stat.isDirectory() && !stat.isSymbolicLink()) { rmSync(path, { recursive: true, force: true }); } else { unlinkSync(path); } } function showSkill() { console.log("QMD Skill (embedded)"); console.log(""); const content = getEmbeddedQmdSkillContent(); process.stdout.write(content.endsWith("\n") ? content : content + "\n"); } function writeEmbeddedSkill(targetDir, force) { if (pathExists(targetDir)) { if (!force) { throw new Error(`Skill already exists: ${targetDir} (use --force to replace it)`); } removePath(targetDir); } mkdirSync(targetDir, { recursive: true }); for (const file of getEmbeddedQmdSkillFiles()) { const destination = resolve(targetDir, file.relativePath); mkdirSync(dirname(destination), { recursive: true }); writeFileSync(destination, file.content, "utf-8"); } } function ensureClaudeSymlink(linkPath, targetDir, force) { const parentDir = dirname(linkPath); if (pathExists(parentDir)) { const resolvedTargetDir = realpathSync(dirname(targetDir)); const resolvedLinkParent = realpathSync(parentDir); // If .claude/skills already resolves to the same directory as .agents/skills, // the skill is already visible to Claude and creating qmd -> qmd would loop. if (resolvedTargetDir === resolvedLinkParent) { return false; } } const linkTarget = relativePath(parentDir, targetDir) || "."; mkdirSync(parentDir, { recursive: true }); if (pathExists(linkPath)) { const stat = lstatSync(linkPath); if (stat.isSymbolicLink() && readlinkSync(linkPath) === linkTarget) { return true; } if (!force) { throw new Error(`Claude skill path already exists: ${linkPath} (use --force to replace it)`); } removePath(linkPath); } symlinkSync(linkTarget, linkPath, "dir"); return true; } async function shouldCreateClaudeSymlink(linkPath, autoYes) { if (autoYes) { return true; } if (!process.stdin.isTTY || !process.stdout.isTTY) { console.log(`Tip: create a Claude symlink manually at ${linkPath}`); return false; } const rl = createInterface({ input: process.stdin, output: process.stdout, }); try { const answer = await rl.question(`Create a symlink in ${linkPath}? [y/N] `); const normalized = answer.trim().toLowerCase(); return normalized === "y" || normalized === "yes"; } finally { rl.close(); } } async function installSkill(globalInstall, force, autoYes) { const installDir = getSkillInstallDir(globalInstall); writeEmbeddedSkill(installDir, force); console.log(`✓ Installed QMD skill to ${installDir}`); const claudeLinkPath = getClaudeSkillLinkPath(globalInstall); if (!(await shouldCreateClaudeSymlink(claudeLinkPath, autoYes))) { return; } const linked = ensureClaudeSymlink(claudeLinkPath, installDir, force); if (linked) { console.log(`✓ Linked Claude skill at ${claudeLinkPath}`); } else { console.log(`✓ Claude already sees the skill via ${dirname(claudeLinkPath)}`); } } function showHelp() { console.log("qmd — Quick Markdown Search"); console.log(""); console.log("Usage:"); console.log(" qmd [options]"); console.log(""); console.log("Primary commands:"); console.log(" qmd query - Hybrid search with auto expansion + reranking (recommended)"); console.log(" qmd query 'lex:..\\nvec:...' - Structured query document (you provide lex/vec/hyde lines)"); console.log(" qmd search - Full-text BM25 keywords (no LLM)"); console.log(" qmd vsearch - Vector similarity only"); console.log(" qmd get [:line] [-l N] - Show a single document, optional line slice"); console.log(" qmd multi-get - Batch fetch via glob or comma-separated list"); console.log(" qmd skill show/install - Show or install the packaged QMD skill"); console.log(" qmd mcp - Start the MCP server (stdio transport for AI agents)"); console.log(" qmd bench - Run search quality benchmarks against a fixture file"); console.log(""); console.log("Collections & context:"); console.log(" qmd collection add/list/remove/rename/show - Manage indexed folders"); console.log(" qmd context add/list/rm - Attach human-written summaries"); console.log(" qmd ls [collection[/path]] - Inspect indexed files"); console.log(""); console.log("Maintenance:"); console.log(" qmd status - View index + collection health"); console.log(" qmd update [--pull] - Re-index collections (optionally git pull first)"); console.log(" qmd embed [-f] - Generate/refresh vector embeddings"); console.log(" --max-docs-per-batch - Cap docs loaded into memory per embedding batch"); console.log(" --max-batch-mb - Cap UTF-8 MB loaded into memory per embedding batch"); console.log(" --provider {local,openai} - Embedding backend (default: local llama.cpp)"); console.log(" --embed-endpoint - OpenAI-compatible endpoint (or QMD_EMBED_ENDPOINT)"); console.log(" --embed-api-key - Bearer token (or QMD_EMBED_API_KEY)"); console.log(" --embed-model-id - Stable model id stored in DB (default: embeddinggemma)"); console.log(" --embed-upstream-model - Model name sent in HTTP body (default: same as model-id)"); console.log(" --embed-batch-size - Batch size for HTTP provider (default: 64)"); console.log(" --embed-timeout-ms - Per-request timeout in ms (default: 30000)"); console.log(" --embed-auto-fallback - Wrap openai provider in local fallback (or QMD_EMBED_AUTO_FALLBACK)"); console.log(" qmd cleanup - Clear caches, vacuum DB"); console.log(""); console.log("Query syntax (qmd query):"); console.log(" QMD queries are either a single expand query (no prefix) or a multi-line"); console.log(" document where every line is typed with lex:, vec:, or hyde:. This grammar"); console.log(" matches the docs in docs/SYNTAX.md and is enforced in the CLI."); console.log(""); const grammar = [ `query = expand_query | query_document ;`, `expand_query = text | explicit_expand ;`, `explicit_expand= "expand:" text ;`, `query_document = [ intent_line ] { typed_line } ;`, `intent_line = "intent:" text newline ;`, `typed_line = type ":" text newline ;`, `type = "lex" | "vec" | "hyde" ;`, `text = quoted_phrase | plain_text ;`, `quoted_phrase = '"' { character } '"' ;`, `plain_text = { character } ;`, `newline = "\\n" ;`, ]; console.log(" Grammar:"); for (const line of grammar) { console.log(` ${line}`); } console.log(""); console.log(" Examples:"); console.log(" qmd query \"how does auth work\" # single-line → implicit expand"); console.log(" qmd query $'lex: CAP theorem\\nvec: consistency' # typed query document"); console.log(" qmd query $'lex: \"exact matches\" sports -baseball' # phrase + negation lex search"); console.log(" qmd query $'hyde: Hypothetical answer text' # hyde-only document"); console.log(""); console.log(" Constraints:"); console.log(" - Standalone expand queries cannot mix with typed lines."); console.log(" - Query documents allow only lex:, vec:, or hyde: prefixes."); console.log(" - Each typed line must be single-line text with balanced quotes."); console.log(""); console.log("AI agents & integrations:"); console.log(" - Run `qmd mcp` to expose the MCP server (stdio) to agents/IDEs."); console.log(" - `qmd skill install` installs the QMD skill into ./.agents/skills/qmd."); console.log(" - Use `qmd skill install --global` for ~/.agents/skills/qmd."); console.log(" - `qmd --skill` is kept as an alias for `qmd skill show`."); console.log(" - Advanced: `qmd mcp --http ...` and `qmd mcp --http --daemon` are optional for custom transports."); console.log(""); console.log("Global options:"); console.log(" --index - Use a named index (default: index)"); console.log(" QMD_EDITOR_URI - Editor link template for clickable TTY search output"); console.log(""); console.log("Search options:"); console.log(" -n - Max results (default 5, or 20 for --files/--json)"); console.log(" --all - Return all matches (pair with --min-score)"); console.log(" --min-score - Minimum similarity score"); console.log(" --full - Output full document instead of snippet"); console.log(" -C, --candidate-limit - Max candidates to rerank (default 40, lower = faster)"); console.log(" --no-rerank - Skip LLM reranking (use RRF scores only, much faster on CPU)"); console.log(" --line-numbers - Include line numbers in output"); console.log(" --explain - Include retrieval score traces (query --json/CLI)"); console.log(" --files | --json | --csv | --md | --xml - Output format"); console.log(" -c, --collection - Filter by one or more collections"); console.log(""); console.log("Embed/query options:"); console.log(" --chunk-strategy - Chunking mode (default: regex; auto uses AST for code files)"); console.log(""); console.log("Multi-get options:"); console.log(" -l - Maximum lines per file"); console.log(" --max-bytes - Skip files larger than N bytes (default 10240)"); console.log(" --json/--csv/--md/--xml/--files - Same formats as search"); console.log(""); console.log(`Index: ${getDbPath()}`); } async function showVersion() { const scriptDir = dirname(fileURLToPath(import.meta.url)); const pkgPath = resolve(scriptDir, "..", "..", "package.json"); const pkg = JSON.parse(readFileSync(pkgPath, "utf-8")); let commit = ""; try { commit = execSync(`git -C ${scriptDir} rev-parse --short HEAD`, { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim(); } catch { // Not a git repo or git not available } const versionStr = commit ? `${pkg.version} (${commit})` : pkg.version; console.log(`qmd ${versionStr}`); } // Main CLI - only run if this is the main module const __filename = fileURLToPath(import.meta.url); const argv1 = process.argv[1]; const isMain = argv1 === __filename || argv1?.endsWith("/qmd.ts") || argv1?.endsWith("/qmd.js") || (argv1 != null && realpathSync(argv1) === __filename); if (isMain) { const cli = parseCLI(); if (cli.values.version) { await showVersion(); process.exit(0); } if (cli.values.skill) { showSkill(); process.exit(0); } if (cli.values.help && cli.command === "skill") { console.log("Usage: qmd skill [options]"); console.log(""); console.log("Commands:"); console.log(" show Print the packaged QMD skill"); console.log(" install Install into ./.agents/skills/qmd"); console.log(""); console.log("Options:"); console.log(" --global Install into ~/.agents/skills/qmd"); console.log(" --yes Also create the .claude/skills/qmd symlink"); console.log(" -f, --force Replace existing install or symlink"); process.exit(0); } if (!cli.command || cli.values.help) { showHelp(); process.exit(cli.values.help ? 0 : 1); } switch (cli.command) { case "context": { const subcommand = cli.args[0]; if (!subcommand) { console.error("Usage: qmd context "); console.error(""); console.error("Commands:"); console.error(" qmd context add [path] \"text\" - Add context (defaults to current dir)"); console.error(" qmd context add / \"text\" - Add global context to all collections"); console.error(" qmd context list - List all contexts"); console.error(" qmd context rm - Remove context"); process.exit(1); } switch (subcommand) { case "add": { if (cli.args.length < 2) { console.error("Usage: qmd context add [path] \"text\""); console.error(""); console.error("Examples:"); console.error(" qmd context add \"Context for current directory\""); console.error(" qmd context add . \"Context for current directory\""); console.error(" qmd context add /subfolder \"Context for subfolder\""); console.error(" qmd context add / \"Global context for all collections\""); console.error(""); console.error(" Using virtual paths:"); console.error(" qmd context add qmd://journals/ \"Context for entire journals collection\""); console.error(" qmd context add qmd://journals/2024 \"Context for 2024 journals\""); process.exit(1); } let pathArg; let contextText; // Check if first arg looks like a path or if it's the context text const firstArg = cli.args[1] || ''; const secondArg = cli.args[2]; if (secondArg) { // Two args: path + context pathArg = firstArg; contextText = cli.args.slice(2).join(" "); } else { // One arg: context only (use current directory) pathArg = undefined; contextText = firstArg; } await contextAdd(pathArg, contextText); break; } case "list": { contextList(); break; } case "rm": case "remove": { if (cli.args.length < 2 || !cli.args[1]) { console.error("Usage: qmd context rm "); console.error("Examples:"); console.error(" qmd context rm /"); console.error(" qmd context rm qmd://journals/2024"); process.exit(1); } contextRemove(cli.args[1]); break; } default: console.error(`Unknown subcommand: ${subcommand}`); console.error("Available: add, list, rm"); process.exit(1); } break; } case "get": { if (!cli.args[0]) { console.error("Usage: qmd get [:line] [--from ] [-l ] [--line-numbers]"); process.exit(1); } const fromLine = cli.values.from ? parseInt(cli.values.from, 10) : undefined; const maxLines = cli.values.l ? parseInt(cli.values.l, 10) : undefined; getDocument(cli.args[0], fromLine, maxLines, cli.opts.lineNumbers); break; } case "multi-get": { if (!cli.args[0]) { console.error("Usage: qmd multi-get [-l ] [--max-bytes ] [--json|--csv|--md|--xml|--files]"); console.error(" pattern: glob (e.g., 'journals/2025-05*.md') or comma-separated list"); process.exit(1); } const maxLinesMulti = cli.values.l ? parseInt(cli.values.l, 10) : undefined; const maxBytes = cli.values["max-bytes"] ? parseInt(cli.values["max-bytes"], 10) : DEFAULT_MULTI_GET_MAX_BYTES; multiGet(cli.args[0], maxLinesMulti, maxBytes, cli.opts.format); break; } case "ls": { listFiles(cli.args[0]); break; } case "collection": { const subcommand = cli.args[0]; switch (subcommand) { case "list": { collectionList(); break; } case "add": { const pwd = cli.args[1] || getPwd(); const resolvedPwd = pwd === '.' ? getPwd() : getRealPath(resolve(pwd)); const globPattern = cli.values.mask || DEFAULT_GLOB; const name = cli.values.name; await collectionAdd(resolvedPwd, globPattern, name); break; } case "remove": case "rm": { if (!cli.args[1]) { console.error("Usage: qmd collection remove "); console.error(" Use 'qmd collection list' to see available collections"); process.exit(1); } collectionRemove(cli.args[1]); break; } case "rename": case "mv": { if (!cli.args[1] || !cli.args[2]) { console.error("Usage: qmd collection rename "); console.error(" Use 'qmd collection list' to see available collections"); process.exit(1); } collectionRename(cli.args[1], cli.args[2]); break; } case "set-update": case "update-cmd": { const name = cli.args[1]; const cmd = cli.args.slice(2).join(' ') || null; if (!name) { console.error("Usage: qmd collection update-cmd [command]"); console.error(" Set the command to run before indexing (e.g., 'git pull')"); console.error(" Omit command to clear it"); process.exit(1); } const { updateCollectionSettings, getCollection } = await import("../collections.js"); const col = getCollection(name); if (!col) { console.error(`Collection not found: ${name}`); process.exit(1); } updateCollectionSettings(name, { update: cmd }); if (cmd) { console.log(`✓ Set update command for '${name}': ${cmd}`); } else { console.log(`✓ Cleared update command for '${name}'`); } break; } case "include": case "exclude": { const name = cli.args[1]; if (!name) { console.error(`Usage: qmd collection ${subcommand} `); console.error(` ${subcommand === 'include' ? 'Include' : 'Exclude'} collection in default queries`); process.exit(1); } const { updateCollectionSettings, getCollection } = await import("../collections.js"); const col = getCollection(name); if (!col) { console.error(`Collection not found: ${name}`); process.exit(1); } const include = subcommand === 'include'; updateCollectionSettings(name, { includeByDefault: include }); console.log(`✓ Collection '${name}' ${include ? 'included in' : 'excluded from'} default queries`); break; } case "show": case "info": { const name = cli.args[1]; if (!name) { console.error("Usage: qmd collection show "); process.exit(1); } const { getCollection } = await import("../collections.js"); const col = getCollection(name); if (!col) { console.error(`Collection not found: ${name}`); process.exit(1); } console.log(`Collection: ${name}`); console.log(` Path: ${col.path}`); console.log(` Pattern: ${col.pattern}`); console.log(` Include: ${col.includeByDefault !== false ? 'yes (default)' : 'no'}`); if (col.update) { console.log(` Update: ${col.update}`); } if (col.context) { const ctxCount = Object.keys(col.context).length; console.log(` Contexts: ${ctxCount}`); } break; } case "help": case undefined: { console.log("Usage: qmd collection [options]"); console.log(""); console.log("Commands:"); console.log(" list List all collections"); console.log(" add [--name NAME] Add a collection"); console.log(" remove Remove a collection"); console.log(" rename Rename a collection"); console.log(" show Show collection details"); console.log(" update-cmd [cmd] Set pre-update command (e.g., 'git pull')"); console.log(" include Include in default queries"); console.log(" exclude Exclude from default queries"); console.log(""); console.log("Examples:"); console.log(" qmd collection add ~/notes --name notes"); console.log(" qmd collection update-cmd brain 'git pull'"); console.log(" qmd collection exclude archive"); process.exit(0); } default: console.error(`Unknown subcommand: ${subcommand}`); console.error("Run 'qmd collection help' for usage"); process.exit(1); } break; } case "status": await showStatus(); break; case "update": await updateCollections(); break; case "embed": try { const maxDocsPerBatch = parseEmbedBatchOption("maxDocsPerBatch", cli.values["max-docs-per-batch"]); const maxBatchMb = parseEmbedBatchOption("maxBatchBytes", cli.values["max-batch-mb"]); const embedChunkStrategy = parseChunkStrategy(cli.values["chunk-strategy"]); // Build embedding provider from CLI flags + env + config file. // Backward compat: with no flags / env vars, the factory returns // a LocalLlamaCppProvider that delegates to the default LlamaCpp // singleton — identical to pre-patch behavior. const providerCliKind = parseProviderKind(cli.values["provider"]); const providerOpts = buildProviderOpts(cli.values, providerCliKind); const embedProvider = createEmbeddingProvider(providerOpts); await vectorIndex(DEFAULT_EMBED_MODEL_URI, !!cli.values.force, { maxDocsPerBatch, maxBatchBytes: maxBatchMb === undefined ? undefined : maxBatchMb * 1024 * 1024, chunkStrategy: embedChunkStrategy, embedProvider, providerKind: embedProvider.kind, }); } catch (error) { if (error instanceof ModelMismatchError) { // Friendlier output for the migration-safety guard console.error(`${c.red}Model mismatch:${c.reset} ${error.message}`); } else { console.error(error instanceof Error ? error.message : String(error)); } process.exit(1); } break; case "pull": { const refresh = cli.values.refresh === undefined ? false : Boolean(cli.values.refresh); const models = [ DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, ]; console.log(`${c.bold}Pulling models${c.reset}`); const results = await pullModels(models, { refresh, cacheDir: DEFAULT_MODEL_CACHE_DIR, }); for (const result of results) { const size = formatBytes(result.sizeBytes); const note = result.refreshed ? "refreshed" : "cached/checked"; console.log(`- ${result.model} -> ${result.path} (${size}, ${note})`); } break; } case "search": if (!cli.query) { console.error("Usage: qmd search [options] "); process.exit(1); } search(cli.query, cli.opts); break; case "vsearch": case "vector-search": // undocumented alias if (!cli.query) { console.error("Usage: qmd vsearch [options] "); process.exit(1); } // Default min-score for vector search is 0.3 if (!cli.values["min-score"]) { cli.opts.minScore = 0.3; } await vectorSearch(cli.query, cli.opts); break; case "query": case "deep-search": // undocumented alias if (!cli.query) { console.error("Usage: qmd query [options] "); process.exit(1); } await querySearch(cli.query, cli.opts); break; case "bench": { const fixturePath = cli.args[0]; if (!fixturePath) { console.error("Usage: qmd bench [--json] [-c collection]"); console.error(""); console.error("Run search quality benchmarks against a fixture file."); console.error("See src/bench/fixtures/example.json for the fixture format."); process.exit(1); } const { runBenchmark } = await import("../bench/bench.js"); const benchCollection = cli.opts.collection; await runBenchmark(fixturePath, { json: !!cli.opts.json, collection: Array.isArray(benchCollection) ? benchCollection[0] : benchCollection, }); break; } case "mcp": { const sub = cli.args[0]; // stop | status | undefined // Cache dir for PID/log files — same dir as the index const cacheDir = process.env.XDG_CACHE_HOME ? resolve(process.env.XDG_CACHE_HOME, "qmd") : resolve(homedir(), ".cache", "qmd"); const pidPath = resolve(cacheDir, "mcp.pid"); // Subcommands take priority over flags if (sub === "stop") { if (!existsSync(pidPath)) { console.log("Not running (no PID file)."); process.exit(0); } const pid = parseInt(readFileSync(pidPath, "utf-8").trim()); try { process.kill(pid, 0); // alive? process.kill(pid, "SIGTERM"); unlinkSync(pidPath); console.log(`Stopped QMD MCP server (PID ${pid}).`); } catch { unlinkSync(pidPath); console.log("Cleaned up stale PID file (server was not running)."); } process.exit(0); } if (cli.values.http) { const port = Number(cli.values.port) || 8181; if (cli.values.daemon) { // Guard: check if already running if (existsSync(pidPath)) { const existingPid = parseInt(readFileSync(pidPath, "utf-8").trim()); try { process.kill(existingPid, 0); // alive? console.error(`Already running (PID ${existingPid}). Run 'qmd mcp stop' first.`); process.exit(1); } catch { // Stale PID file — continue } } mkdirSync(cacheDir, { recursive: true }); const logPath = resolve(cacheDir, "mcp.log"); const logFd = openSync(logPath, "w"); // truncate — fresh log per daemon run const selfPath = fileURLToPath(import.meta.url); const spawnArgs = selfPath.endsWith(".ts") ? ["--import", pathJoin(dirname(selfPath), "..", "..", "node_modules", "tsx", "dist", "esm", "index.mjs"), selfPath, "mcp", "--http", "--port", String(port)] : [selfPath, "mcp", "--http", "--port", String(port)]; const child = nodeSpawn(process.execPath, spawnArgs, { stdio: ["ignore", logFd, logFd], detached: true, }); child.unref(); closeSync(logFd); // parent's copy; child inherited the fd writeFileSync(pidPath, String(child.pid)); console.log(`Started on http://localhost:${port}/mcp (PID ${child.pid})`); console.log(`Logs: ${logPath}`); process.exit(0); } // Foreground HTTP mode — remove top-level cursor handlers so the // async cleanup handlers in startMcpHttpServer actually run. process.removeAllListeners("SIGTERM"); process.removeAllListeners("SIGINT"); const { startMcpHttpServer } = await import("../mcp/server.js"); try { await startMcpHttpServer(port); } catch (e) { if (e?.code === "EADDRINUSE") { console.error(`Port ${port} already in use. Try a different port with --port.`); process.exit(1); } throw e; } } else { // Default: stdio transport const { startMcpServer } = await import("../mcp/server.js"); await startMcpServer(); } break; } case "skill": { const subcommand = cli.args[0]; switch (subcommand) { case "show": { showSkill(); break; } case "install": { try { await installSkill(Boolean(cli.values.global), Boolean(cli.values.force), Boolean(cli.values.yes)); } catch (error) { console.error(error instanceof Error ? error.message : String(error)); process.exit(1); } break; } case "help": case undefined: { console.log("Usage: qmd skill [options]"); console.log(""); console.log("Commands:"); console.log(" show Print the packaged QMD skill"); console.log(" install Install into ./.agents/skills/qmd"); console.log(""); console.log("Options:"); console.log(" --global Install into ~/.agents/skills/qmd"); console.log(" --yes Also create the .claude/skills/qmd symlink"); console.log(" -f, --force Replace existing install or symlink"); process.exit(0); } default: console.error(`Unknown subcommand: ${subcommand}`); console.error("Run 'qmd skill help' for usage"); process.exit(1); } break; } case "cleanup": { const db = getDb(); // 1. Clear llm_cache const cacheCount = deleteLLMCache(db); console.log(`${c.green}✓${c.reset} Cleared ${cacheCount} cached API responses`); // 2. Remove orphaned vectors const orphanedVecs = cleanupOrphanedVectors(db); if (orphanedVecs > 0) { console.log(`${c.green}✓${c.reset} Removed ${orphanedVecs} orphaned embedding chunks`); } else { console.log(`${c.dim}No orphaned embeddings to remove${c.reset}`); } // 3. Remove inactive documents const inactiveDocs = deleteInactiveDocuments(db); if (inactiveDocs > 0) { console.log(`${c.green}✓${c.reset} Removed ${inactiveDocs} inactive document records`); } // 4. Vacuum to reclaim space vacuumDatabase(db); console.log(`${c.green}✓${c.reset} Database vacuumed`); closeDb(); break; } default: console.error(`Unknown command: ${cli.command}`); console.error("Run 'qmd --help' for usage."); process.exit(1); } if (cli.command !== "mcp") { await disposeDefaultLlamaCpp(); process.exit(0); } } // end if (main module)