/** * QMD Store - Core data access and retrieval functions * * This module provides all database operations, search functions, and document * retrieval for QMD. It returns raw data structures that can be formatted by * CLI or MCP consumers. * * Usage: * const store = createStore("/path/to/db.sqlite"); * // or use default path: * const store = createStore(); */ import { Database } from "bun:sqlite"; import { Glob } from "bun"; import * as sqliteVec from "sqlite-vec"; import { Ollama, getDefaultOllama, formatQueryForEmbedding, formatDocForEmbedding, type RerankDocument, } from "./llm"; // ============================================================================= // Configuration // ============================================================================= const HOME = Bun.env.HOME || "/tmp"; export const DEFAULT_EMBED_MODEL = "embeddinggemma"; export const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0"; export const DEFAULT_QUERY_MODEL = "qwen3:0.6b"; export const DEFAULT_GLOB = "**/*.md"; export const DEFAULT_MULTI_GET_MAX_BYTES = 10 * 1024; // 10KB // Re-export OLLAMA_URL for backwards compatibility export const OLLAMA_URL = getDefaultOllama().getBaseUrl(); // Chunking: ~2000 tokens per chunk, ~3 bytes/token = 6KB const CHUNK_BYTE_SIZE = 6 * 1024; // ============================================================================= // Path utilities // ============================================================================= export function homedir(): string { return HOME; } export function resolve(...paths: string[]): string { let result = paths[0].startsWith('/') ? '' : Bun.env.PWD || process.cwd(); for (const p of paths) { if (p.startsWith('/')) { result = p; } else { result = result + '/' + p; } } const parts = result.split('/').filter(Boolean); const normalized: string[] = []; for (const part of parts) { if (part === '..') normalized.pop(); else if (part !== '.') normalized.push(part); } return '/' + normalized.join('/'); } export function getDefaultDbPath(indexName: string = "index"): string { // Allow override via INDEX_PATH for testing if (Bun.env.INDEX_PATH) { return Bun.env.INDEX_PATH; } const cacheDir = Bun.env.XDG_CACHE_HOME || resolve(homedir(), ".cache"); const qmdCacheDir = resolve(cacheDir, "qmd"); try { Bun.spawnSync(["mkdir", "-p", qmdCacheDir]); } catch {} return resolve(qmdCacheDir, `${indexName}.sqlite`); } export function getPwd(): string { return process.env.PWD || process.cwd(); } export function getRealPath(path: string): string { try { const result = Bun.spawnSync(["realpath", path]); if (result.success) { return result.stdout.toString().trim(); } } catch {} return resolve(path); } // ============================================================================= // Virtual Path Utilities (qmd://) // ============================================================================= export type VirtualPath = { collectionName: string; path: string; // relative path within collection }; /** * Parse a virtual path like "qmd://collection-name/path/to/file.md" * into its components. */ export function parseVirtualPath(virtualPath: string): VirtualPath | null { const match = virtualPath.match(/^qmd:\/\/([^\/]+)\/(.+)$/); if (!match) return null; return { collectionName: match[1], path: match[2], }; } /** * Build a virtual path from collection name and relative path. */ export function buildVirtualPath(collectionName: string, path: string): string { return `qmd://${collectionName}/${path}`; } /** * Check if a path is a virtual path (starts with qmd://). */ export function isVirtualPath(path: string): boolean { return path.startsWith('qmd://'); } /** * Resolve a virtual path to absolute filesystem path. */ export function resolveVirtualPath(db: Database, virtualPath: string): string | null { const parsed = parseVirtualPath(virtualPath); if (!parsed) return null; const coll = getCollectionByName(db, parsed.collectionName); if (!coll) return null; return resolve(coll.pwd, parsed.path); } /** * Convert an absolute filesystem path to a virtual path. * Returns null if the file is not in any indexed collection. */ export function toVirtualPath(db: Database, absolutePath: string): string | null { const doc = db.prepare(` SELECT c.name, d.path FROM documents d JOIN collections c ON c.id = d.collection_id WHERE c.pwd || '/' || d.path = ? AND d.active = 1 LIMIT 1 `).get(absolutePath) as { name: string; path: string } | null; if (!doc) return null; return buildVirtualPath(doc.name, doc.path); } // ============================================================================= // Database initialization // ============================================================================= // On macOS, use Homebrew's SQLite which supports extensions if (process.platform === "darwin") { const homebrewSqlitePath = "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib"; try { if (Bun.file(homebrewSqlitePath).size > 0) { Database.setCustomSQLite(homebrewSqlitePath); } } catch {} } function initializeDatabase(db: Database): void { sqliteVec.load(db); db.exec("PRAGMA journal_mode = WAL"); db.exec("PRAGMA foreign_keys = ON"); // Check if we need to migrate from old schema const tables = db.prepare(`SELECT name FROM sqlite_master WHERE type='table'`).all() as { name: string }[]; const tableNames = tables.map(t => t.name); const needsMigration = tableNames.includes('documents') && !tableNames.includes('content'); if (needsMigration) { migrateToContentAddressable(db); return; // Migration will call initializeDatabase again } // Content-addressable storage - the source of truth for document content db.exec(` CREATE TABLE IF NOT EXISTS content ( hash TEXT PRIMARY KEY, doc TEXT NOT NULL, created_at TEXT NOT NULL ) `); // Collections table with name field db.exec(` CREATE TABLE IF NOT EXISTS collections ( id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL UNIQUE, pwd TEXT NOT NULL, glob_pattern TEXT NOT NULL, created_at TEXT NOT NULL, updated_at TEXT NOT NULL, UNIQUE(pwd, glob_pattern) ) `); // Documents table - file system layer mapping virtual paths to content hashes db.exec(` CREATE TABLE IF NOT EXISTS documents ( id INTEGER PRIMARY KEY AUTOINCREMENT, collection_id INTEGER NOT NULL, path TEXT NOT NULL, title TEXT NOT NULL, hash TEXT NOT NULL, created_at TEXT NOT NULL, modified_at TEXT NOT NULL, active INTEGER NOT NULL DEFAULT 1, FOREIGN KEY (collection_id) REFERENCES collections(id) ON DELETE CASCADE, FOREIGN KEY (hash) REFERENCES content(hash) ON DELETE CASCADE, UNIQUE(collection_id, path) ) `); db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_collection ON documents(collection_id, active)`); db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_hash ON documents(hash)`); db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_path ON documents(path, active)`); // Path-based context (collection-scoped, hierarchical) db.exec(` CREATE TABLE IF NOT EXISTS path_contexts ( id INTEGER PRIMARY KEY AUTOINCREMENT, collection_id INTEGER NOT NULL, path_prefix TEXT NOT NULL, context TEXT NOT NULL, created_at TEXT NOT NULL, FOREIGN KEY (collection_id) REFERENCES collections(id) ON DELETE CASCADE, UNIQUE(collection_id, path_prefix) ) `); db.exec(`CREATE INDEX IF NOT EXISTS idx_path_contexts_collection ON path_contexts(collection_id, path_prefix)`); // Cache table for Ollama API calls db.exec(` CREATE TABLE IF NOT EXISTS ollama_cache ( hash TEXT PRIMARY KEY, result TEXT NOT NULL, created_at TEXT NOT NULL ) `); // Content vectors const cvInfo = db.prepare(`PRAGMA table_info(content_vectors)`).all() as { name: string }[]; const hasSeqColumn = cvInfo.some(col => col.name === 'seq'); if (cvInfo.length > 0 && !hasSeqColumn) { db.exec(`DROP TABLE IF EXISTS content_vectors`); db.exec(`DROP TABLE IF EXISTS vectors_vec`); } db.exec(` CREATE TABLE IF NOT EXISTS content_vectors ( hash TEXT NOT NULL, seq INTEGER NOT NULL DEFAULT 0, pos INTEGER NOT NULL DEFAULT 0, model TEXT NOT NULL, embedded_at TEXT NOT NULL, PRIMARY KEY (hash, seq) ) `); // FTS - index path and content (joined from content table) db.exec(` CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5( path, body, tokenize='porter unicode61' ) `); // Triggers to keep FTS in sync db.exec(` CREATE TRIGGER IF NOT EXISTS documents_ai AFTER INSERT ON documents BEGIN INSERT INTO documents_fts(rowid, path, body) SELECT new.id, new.path, c.doc FROM content c WHERE c.hash = new.hash; END `); db.exec(` CREATE TRIGGER IF NOT EXISTS documents_ad AFTER DELETE ON documents BEGIN DELETE FROM documents_fts WHERE rowid = old.id; END `); db.exec(` CREATE TRIGGER IF NOT EXISTS documents_au AFTER UPDATE ON documents BEGIN UPDATE documents_fts SET path = new.path, body = (SELECT doc FROM content WHERE hash = new.hash) WHERE rowid = new.id; END `); } function migrateToContentAddressable(db: Database): void { console.log("Migrating database to content-addressable schema..."); // Start transaction db.exec("BEGIN TRANSACTION"); try { // Rename old tables db.exec("ALTER TABLE documents RENAME TO documents_old"); db.exec("ALTER TABLE collections RENAME TO collections_old"); db.exec("ALTER TABLE path_contexts RENAME TO path_contexts_old"); db.exec("DROP TABLE IF EXISTS documents_fts"); db.exec("DROP TRIGGER IF EXISTS documents_ai"); db.exec("DROP TRIGGER IF EXISTS documents_ad"); db.exec("DROP TRIGGER IF EXISTS documents_au"); // Create new schema db.exec(` CREATE TABLE content ( hash TEXT PRIMARY KEY, doc TEXT NOT NULL, created_at TEXT NOT NULL ) `); db.exec(` CREATE TABLE collections ( id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL UNIQUE, pwd TEXT NOT NULL, glob_pattern TEXT NOT NULL, created_at TEXT NOT NULL, updated_at TEXT NOT NULL, UNIQUE(pwd, glob_pattern) ) `); db.exec(` CREATE TABLE documents ( id INTEGER PRIMARY KEY AUTOINCREMENT, collection_id INTEGER NOT NULL, path TEXT NOT NULL, title TEXT NOT NULL, hash TEXT NOT NULL, created_at TEXT NOT NULL, modified_at TEXT NOT NULL, active INTEGER NOT NULL DEFAULT 1, FOREIGN KEY (collection_id) REFERENCES collections(id) ON DELETE CASCADE, FOREIGN KEY (hash) REFERENCES content(hash) ON DELETE CASCADE, UNIQUE(collection_id, path) ) `); db.exec(` CREATE TABLE path_contexts ( id INTEGER PRIMARY KEY AUTOINCREMENT, collection_id INTEGER NOT NULL, path_prefix TEXT NOT NULL, context TEXT NOT NULL, created_at TEXT NOT NULL, FOREIGN KEY (collection_id) REFERENCES collections(id) ON DELETE CASCADE, UNIQUE(collection_id, path_prefix) ) `); // Migrate data: Extract unique content hashes console.log("Migrating content..."); db.exec(` INSERT INTO content (hash, doc, created_at) SELECT hash, body, MIN(created_at) as created_at FROM documents_old WHERE active = 1 GROUP BY hash `); // Migrate collections: generate names from pwd basename console.log("Migrating collections..."); // First insert with pwd as temporary name db.exec(` INSERT INTO collections (id, name, pwd, glob_pattern, created_at, updated_at) SELECT id, pwd as name, pwd, glob_pattern, created_at, created_at as updated_at FROM collections_old `); // Then update names to basenames using application logic const collections = db.prepare(`SELECT id, pwd FROM collections`).all() as { id: number; pwd: string }[]; for (const coll of collections) { const parts = coll.pwd.split('/').filter(Boolean); const name = parts[parts.length - 1] || 'root'; db.prepare(`UPDATE collections SET name = ? WHERE id = ?`).run(name, coll.id); } // Handle duplicate collection names by appending collection_id const duplicates = db.prepare(` SELECT name, COUNT(*) as cnt FROM collections GROUP BY name HAVING cnt > 1 `).all() as { name: string; cnt: number }[]; for (const dup of duplicates) { const rows = db.prepare(`SELECT id FROM collections WHERE name = ? ORDER BY id`).all(dup.name) as { id: number }[]; for (let i = 1; i < rows.length; i++) { db.prepare(`UPDATE collections SET name = ? WHERE id = ?`).run(`${dup.name}-${rows[i].id}`, rows[i].id); } } // Migrate documents: convert filepath to relative path within collection console.log("Migrating documents..."); const oldDocs = db.prepare(` SELECT d.id, d.collection_id, d.filepath, d.title, d.hash, d.created_at, d.modified_at, c.pwd FROM documents_old d JOIN collections c ON c.id = d.collection_id WHERE d.active = 1 `).all() as Array<{ id: number; collection_id: number; filepath: string; title: string; hash: string; created_at: string; modified_at: string; pwd: string; }>; const insertDoc = db.prepare(` INSERT INTO documents (collection_id, path, title, hash, created_at, modified_at, active) VALUES (?, ?, ?, ?, ?, ?, 1) `); for (const doc of oldDocs) { // Convert absolute filepath to relative path within collection let path = doc.filepath; if (path.startsWith(doc.pwd + '/')) { path = path.slice(doc.pwd.length + 1); } else if (path.startsWith(doc.pwd)) { path = path.slice(doc.pwd.length); } // Remove leading slash if present path = path.replace(/^\/+/, ''); try { insertDoc.run(doc.collection_id, path, doc.title, doc.hash, doc.created_at, doc.modified_at); } catch (e) { console.warn(`Skipping duplicate path: ${path} in collection ${doc.collection_id}`); } } // Migrate path_contexts: associate with collections based on path prefix console.log("Migrating path contexts..."); const oldContexts = db.prepare(`SELECT * FROM path_contexts_old`).all() as Array<{ path_prefix: string; context: string; created_at: string; }>; const insertContext = db.prepare(` INSERT INTO path_contexts (collection_id, path_prefix, context, created_at) VALUES (?, ?, ?, ?) `); const allCollections = db.prepare(`SELECT id, pwd FROM collections`).all() as Array<{ id: number; pwd: string }>; for (const ctx of oldContexts) { // Find collection(s) that match this path prefix for (const coll of allCollections) { if (ctx.path_prefix.startsWith(coll.pwd)) { // Convert absolute path_prefix to relative within collection let relPath = ctx.path_prefix; if (relPath.startsWith(coll.pwd + '/')) { relPath = relPath.slice(coll.pwd.length + 1); } else if (relPath.startsWith(coll.pwd)) { relPath = relPath.slice(coll.pwd.length); } relPath = relPath.replace(/^\/+/, ''); try { insertContext.run(coll.id, relPath, ctx.context, ctx.created_at); } catch (e) { // Ignore duplicates } } } } // Drop old tables db.exec("DROP TABLE documents_old"); db.exec("DROP TABLE collections_old"); db.exec("DROP TABLE path_contexts_old"); // Recreate FTS and triggers db.exec(` CREATE VIRTUAL TABLE documents_fts USING fts5( path, body, tokenize='porter unicode61' ) `); db.exec(` CREATE TRIGGER documents_ai AFTER INSERT ON documents BEGIN INSERT INTO documents_fts(rowid, path, body) SELECT new.id, new.path, c.doc FROM content c WHERE c.hash = new.hash; END `); db.exec(` CREATE TRIGGER documents_ad AFTER DELETE ON documents BEGIN DELETE FROM documents_fts WHERE rowid = old.id; END `); db.exec(` CREATE TRIGGER documents_au AFTER UPDATE ON documents BEGIN UPDATE documents_fts SET path = new.path, body = (SELECT doc FROM content WHERE hash = new.hash) WHERE rowid = new.id; END `); // Populate FTS from migrated data console.log("Rebuilding full-text search index..."); db.exec(` INSERT INTO documents_fts(rowid, path, body) SELECT d.id, d.path, c.doc FROM documents d JOIN content c ON c.hash = d.hash WHERE d.active = 1 `); // Create indexes db.exec(`CREATE INDEX idx_documents_collection ON documents(collection_id, active)`); db.exec(`CREATE INDEX idx_documents_hash ON documents(hash)`); db.exec(`CREATE INDEX idx_documents_path ON documents(path, active)`); db.exec(`CREATE INDEX idx_path_contexts_collection ON path_contexts(collection_id, path_prefix)`); db.exec("COMMIT"); console.log("Migration complete!"); } catch (e) { db.exec("ROLLBACK"); console.error("Migration failed:", e); throw e; } } function ensureVecTableInternal(db: Database, dimensions: number): void { const tableInfo = db.prepare(`SELECT sql FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get() as { sql: string } | null; if (tableInfo) { const match = tableInfo.sql.match(/float\[(\d+)\]/); const hasHashSeq = tableInfo.sql.includes('hash_seq'); if (match && parseInt(match[1]) === dimensions && hasHashSeq) return; db.exec("DROP TABLE IF EXISTS vectors_vec"); } db.exec(`CREATE VIRTUAL TABLE vectors_vec USING vec0(hash_seq TEXT PRIMARY KEY, embedding float[${dimensions}])`); } // ============================================================================= // Store Factory // ============================================================================= export type Store = { db: Database; dbPath: string; close: () => void; ensureVecTable: (dimensions: number) => void; // Index health getHashesNeedingEmbedding: () => number; getIndexHealth: () => IndexHealthInfo; getStatus: () => IndexStatus; // Caching getCacheKey: typeof getCacheKey; getCachedResult: (cacheKey: string) => string | null; setCachedResult: (cacheKey: string, result: string) => void; clearCache: () => void; // Context getContextForFile: (filepath: string) => string | null; getContextForPath: (collectionId: number, path: string) => string | null; getCollectionIdByName: (name: string) => number | null; getCollectionByName: (name: string) => { id: number; name: string; pwd: string; glob_pattern: string } | null; // Virtual paths parseVirtualPath: typeof parseVirtualPath; buildVirtualPath: typeof buildVirtualPath; isVirtualPath: typeof isVirtualPath; resolveVirtualPath: (virtualPath: string) => string | null; toVirtualPath: (absolutePath: string) => string | null; // Search searchFTS: (query: string, limit?: number, collectionId?: number) => SearchResult[]; searchVec: (query: string, model: string, limit?: number, collectionId?: number) => Promise; // Query expansion & reranking expandQuery: (query: string, model?: string) => Promise; rerank: (query: string, documents: { file: string; text: string }[], model?: string) => Promise<{ file: string; score: number }[]>; // Document retrieval findDocument: (filename: string, options?: { includeBody?: boolean }) => DocumentResult | DocumentNotFound; getDocumentBody: (doc: DocumentResult | { filepath: string }, fromLine?: number, maxLines?: number) => string | null; findDocuments: (pattern: string, options?: { includeBody?: boolean; maxBytes?: number }) => { docs: MultiGetResult[]; errors: string[] }; // Legacy compatibility getDocument: (filename: string, fromLine?: number, maxLines?: number) => (DocumentResult & { body: string }) | DocumentNotFound; getMultipleDocuments: (pattern: string, maxLines?: number, maxBytes?: number) => { files: MultiGetFile[]; errors: string[] }; // Fuzzy matching findSimilarFiles: (query: string, maxDistance?: number, limit?: number) => string[]; matchFilesByGlob: (pattern: string) => { filepath: string; displayPath: string; bodyLength: number }[]; }; /** * Create a new store instance with the given database path. * If no path is provided, uses the default path (~/.cache/qmd/index.sqlite). * * @param dbPath - Path to the SQLite database file * @returns Store instance with all methods bound to the database */ export function createStore(dbPath?: string): Store { const resolvedPath = dbPath || getDefaultDbPath(); const db = new Database(resolvedPath); initializeDatabase(db); return { db, dbPath: resolvedPath, close: () => db.close(), ensureVecTable: (dimensions: number) => ensureVecTableInternal(db, dimensions), // Index health getHashesNeedingEmbedding: () => getHashesNeedingEmbedding(db), getIndexHealth: () => getIndexHealth(db), getStatus: () => getStatus(db), // Caching getCacheKey, getCachedResult: (cacheKey: string) => getCachedResult(db, cacheKey), setCachedResult: (cacheKey: string, result: string) => setCachedResult(db, cacheKey, result), clearCache: () => clearCache(db), // Context getContextForFile: (filepath: string) => getContextForFile(db, filepath), getContextForPath: (collectionId: number, path: string) => getContextForPath(db, collectionId, path), getCollectionIdByName: (name: string) => getCollectionIdByName(db, name), getCollectionByName: (name: string) => getCollectionByName(db, name), // Virtual paths parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath: (virtualPath: string) => resolveVirtualPath(db, virtualPath), toVirtualPath: (absolutePath: string) => toVirtualPath(db, absolutePath), // Search searchFTS: (query: string, limit?: number, collectionId?: number) => searchFTS(db, query, limit, collectionId), searchVec: (query: string, model: string, limit?: number, collectionId?: number) => searchVec(db, query, model, limit, collectionId), // Query expansion & reranking expandQuery: (query: string, model?: string) => expandQuery(query, model, db), rerank: (query: string, documents: { file: string; text: string }[], model?: string) => rerank(query, documents, model, db), // Document retrieval findDocument: (filename: string, options?: { includeBody?: boolean }) => findDocument(db, filename, options), getDocumentBody: (doc: DocumentResult | { filepath: string }, fromLine?: number, maxLines?: number) => getDocumentBody(db, doc, fromLine, maxLines), findDocuments: (pattern: string, options?: { includeBody?: boolean; maxBytes?: number }) => findDocuments(db, pattern, options), // Legacy compatibility getDocument: (filename: string, fromLine?: number, maxLines?: number) => getDocument(db, filename, fromLine, maxLines), getMultipleDocuments: (pattern: string, maxLines?: number, maxBytes?: number) => getMultipleDocuments(db, pattern, maxLines, maxBytes), // Fuzzy matching findSimilarFiles: (query: string, maxDistance?: number, limit?: number) => findSimilarFiles(db, query, maxDistance, limit), matchFilesByGlob: (pattern: string) => matchFilesByGlob(db, pattern), }; } // ============================================================================= // Legacy compatibility - will be removed // ============================================================================= let _legacyDb: Database | null = null; let _legacyDbPath: string | null = null; /** @deprecated Use createStore() instead */ export function setCustomIndexName(name: string | null): void { _legacyDbPath = name ? getDefaultDbPath(name) : null; _legacyDb = null; // Reset so next getDb() creates new connection } /** @deprecated Use createStore() instead */ export function getDbPath(): string { return _legacyDbPath || getDefaultDbPath(); } /** @deprecated Use createStore() instead */ export function getDb(): Database { if (!_legacyDb) { _legacyDb = new Database(getDbPath()); initializeDatabase(_legacyDb); } return _legacyDb; } /** @deprecated Use store.db.close() instead. Closes the legacy db and resets singleton. */ export function closeDb(): void { if (_legacyDb) { _legacyDb.close(); _legacyDb = null; } } /** @deprecated Use store.ensureVecTable() instead */ export function ensureVecTable(db: Database, dimensions: number): void { ensureVecTableInternal(db, dimensions); } // ============================================================================= // Core Document Type // ============================================================================= /** * Unified document result type with all metadata. * Body is optional - use getDocumentBody() to load it separately if needed. */ export type DocumentResult = { filepath: string; // Full filesystem path displayPath: string; // Short display path (e.g., "docs/readme.md") title: string; // Document title (from first heading or filename) context: string | null; // Folder context description if configured hash: string; // Content hash for caching/change detection collectionId: number; // Parent collection ID modifiedAt: string; // Last modification timestamp bodyLength: number; // Body length in bytes (useful before loading) body?: string; // Document body (optional, load with getDocumentBody) }; /** * Search result extends DocumentResult with score and source info */ export type SearchResult = DocumentResult & { score: number; // Relevance score (0-1) source: "fts" | "vec"; // Search source (full-text or vector) chunkPos?: number; // Character position of matching chunk (for vector search) }; /** * Ranked result for RRF fusion (simplified, used internally) */ export type RankedResult = { file: string; displayPath: string; title: string; body: string; score: number; }; /** * Error result when document is not found */ export type DocumentNotFound = { error: "not_found"; query: string; similarFiles: string[]; }; /** * Result from multi-get operations */ export type MultiGetResult = { doc: DocumentResult; skipped: false; } | { doc: Pick; skipped: true; skipReason: string; }; export type CollectionInfo = { id: number; path: string; pattern: string; documents: number; lastUpdated: string; }; export type IndexStatus = { totalDocuments: number; needsEmbedding: number; hasVectorIndex: boolean; collections: CollectionInfo[]; }; // ============================================================================= // Index health // ============================================================================= export function getHashesNeedingEmbedding(db: Database): number { const result = db.prepare(` SELECT COUNT(DISTINCT d.hash) as count FROM documents d LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0 WHERE d.active = 1 AND v.hash IS NULL `).get() as { count: number }; return result.count; } export type IndexHealthInfo = { needsEmbedding: number; totalDocs: number; daysStale: number | null; }; export function getIndexHealth(db: Database): IndexHealthInfo { const needsEmbedding = getHashesNeedingEmbedding(db); const totalDocs = (db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get() as { count: number }).count; const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get() as { latest: string | null }; let daysStale: number | null = null; if (mostRecent?.latest) { const lastUpdate = new Date(mostRecent.latest); daysStale = Math.floor((Date.now() - lastUpdate.getTime()) / (24 * 60 * 60 * 1000)); } return { needsEmbedding, totalDocs, daysStale }; } // ============================================================================= // Caching // ============================================================================= export function getCacheKey(url: string, body: object): string { const hash = new Bun.CryptoHasher("sha256"); hash.update(url); hash.update(JSON.stringify(body)); return hash.digest("hex"); } export function getCachedResult(db: Database, cacheKey: string): string | null { const row = db.prepare(`SELECT result FROM ollama_cache WHERE hash = ?`).get(cacheKey) as { result: string } | null; return row?.result || null; } export function setCachedResult(db: Database, cacheKey: string, result: string): void { const now = new Date().toISOString(); db.prepare(`INSERT OR REPLACE INTO ollama_cache (hash, result, created_at) VALUES (?, ?, ?)`).run(cacheKey, result, now); if (Math.random() < 0.01) { db.exec(`DELETE FROM ollama_cache WHERE hash NOT IN (SELECT hash FROM ollama_cache ORDER BY created_at DESC LIMIT 1000)`); } } export function clearCache(db: Database): void { db.exec(`DELETE FROM ollama_cache`); } // ============================================================================= // Document helpers // ============================================================================= export async function hashContent(content: string): Promise { const hash = new Bun.CryptoHasher("sha256"); hash.update(content); return hash.digest("hex"); } export function extractTitle(content: string, filename: string): string { const match = content.match(/^##?\s+(.+)$/m); if (match) { const title = match[1].trim(); if (title === "📝 Notes" || title === "Notes") { const nextMatch = content.match(/^##\s+(.+)$/m); if (nextMatch) return nextMatch[1].trim(); } return title; } return filename.replace(/\.md$/, "").split("/").pop() || filename; } // Re-export from llm.ts for backwards compatibility export { formatQueryForEmbedding, formatDocForEmbedding }; export function chunkDocument(content: string, maxBytes: number = CHUNK_BYTE_SIZE): { text: string; pos: number }[] { const encoder = new TextEncoder(); const totalBytes = encoder.encode(content).length; if (totalBytes <= maxBytes) { return [{ text: content, pos: 0 }]; } const chunks: { text: string; pos: number }[] = []; let charPos = 0; while (charPos < content.length) { let endPos = charPos; let byteCount = 0; while (endPos < content.length && byteCount < maxBytes) { const charBytes = encoder.encode(content[endPos]).length; if (byteCount + charBytes > maxBytes) break; byteCount += charBytes; endPos++; } if (endPos < content.length && endPos > charPos) { const slice = content.slice(charPos, endPos); const paragraphBreak = slice.lastIndexOf('\n\n'); const sentenceEnd = Math.max( slice.lastIndexOf('. '), slice.lastIndexOf('.\n'), slice.lastIndexOf('? '), slice.lastIndexOf('?\n'), slice.lastIndexOf('! '), slice.lastIndexOf('!\n') ); const lineBreak = slice.lastIndexOf('\n'); const spaceBreak = slice.lastIndexOf(' '); let breakPoint = -1; if (paragraphBreak > slice.length * 0.5) { breakPoint = paragraphBreak + 2; } else if (sentenceEnd > slice.length * 0.5) { breakPoint = sentenceEnd + 2; } else if (lineBreak > slice.length * 0.3) { breakPoint = lineBreak + 1; } else if (spaceBreak > slice.length * 0.3) { breakPoint = spaceBreak + 1; } if (breakPoint > 0) { endPos = charPos + breakPoint; } } if (endPos <= charPos) { endPos = charPos + 1; } chunks.push({ text: content.slice(charPos, endPos), pos: charPos }); charPos = endPos; } return chunks; } // ============================================================================= // Fuzzy matching // ============================================================================= function levenshtein(a: string, b: string): number { const m = a.length, n = b.length; if (m === 0) return n; if (n === 0) return m; const dp: number[][] = Array.from({ length: m + 1 }, (_, i) => [i]); for (let j = 1; j <= n; j++) dp[0][j] = j; for (let i = 1; i <= m; i++) { for (let j = 1; j <= n; j++) { const cost = a[i - 1] === b[j - 1] ? 0 : 1; dp[i][j] = Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + cost); } } return dp[m][n]; } export function findSimilarFiles(db: Database, query: string, maxDistance: number = 3, limit: number = 5): string[] { const allFiles = db.prepare(`SELECT display_path FROM documents WHERE active = 1`).all() as { display_path: string }[]; const queryLower = query.toLowerCase(); const scored = allFiles .map(f => ({ path: f.display_path, dist: levenshtein(f.display_path.toLowerCase(), queryLower) })) .filter(f => f.dist <= maxDistance) .sort((a, b) => a.dist - b.dist) .slice(0, limit); return scored.map(f => f.path); } export function matchFilesByGlob(db: Database, pattern: string): { filepath: string; displayPath: string; bodyLength: number }[] { const allFiles = db.prepare(` SELECT 'qmd://' || c.name || '/' || d.path as virtual_path, LENGTH(content.doc) as body_length, d.collection_id, d.path FROM documents d JOIN collections c ON c.id = d.collection_id JOIN content ON content.hash = d.hash WHERE d.active = 1 `).all() as { virtual_path: string; body_length: number; collection_id: number; path: string }[]; const glob = new Glob(pattern); return allFiles .filter(f => glob.match(f.virtual_path) || glob.match(f.path)) .map(f => ({ filepath: f.virtual_path, // Use virtual path as filepath displayPath: f.virtual_path, bodyLength: f.body_length })); } // ============================================================================= // Context // ============================================================================= /** * Get context for a file path using hierarchical inheritance. * Contexts are collection-scoped and inherit from parent directories. * For example, context at "/talks" applies to "/talks/2024/keynote.md". * * @param db Database instance * @param collectionId Collection ID * @param path Relative path within the collection * @returns Context string or null if no context is defined */ export function getContextForPath(db: Database, collectionId: number, path: string): string | null { // Find the most specific (longest) matching path prefix for this collection const result = db.prepare(` SELECT context FROM path_contexts WHERE collection_id = ? AND (? LIKE path_prefix || '/%' OR ? = path_prefix OR path_prefix = '') ORDER BY LENGTH(path_prefix) DESC LIMIT 1 `).get(collectionId, path, path) as { context: string } | null; return result?.context || null; } /** * Legacy function for backward compatibility - resolves filepath to collection+path first */ export function getContextForFile(db: Database, filepath: string): string | null { // Try to find the document to get its collection_id and path const doc = db.prepare(` SELECT d.collection_id, d.path FROM documents d JOIN collections c ON c.id = d.collection_id WHERE c.pwd || '/' || d.path = ? AND d.active = 1 LIMIT 1 `).get(filepath) as { collection_id: number; path: string } | null; if (!doc) return null; return getContextForPath(db, doc.collection_id, doc.path); } /** * Get collection ID by its name (exact match). */ export function getCollectionIdByName(db: Database, name: string): number | null { const result = db.prepare(` SELECT id FROM collections WHERE name = ? LIMIT 1 `).get(name) as { id: number } | null; return result?.id || null; } /** * Get collection by name. */ export function getCollectionByName(db: Database, name: string): { id: number; name: string; pwd: string; glob_pattern: string } | null { const result = db.prepare(` SELECT id, name, pwd, glob_pattern FROM collections WHERE name = ? LIMIT 1 `).get(name) as { id: number; name: string; pwd: string; glob_pattern: string } | null; return result; } // ============================================================================= // FTS Search // ============================================================================= function sanitizeFTS5Term(term: string): string { return term.replace(/[^\p{L}\p{N}']/gu, '').toLowerCase(); } function buildFTS5Query(query: string): string | null { const terms = query.split(/\s+/) .map(t => sanitizeFTS5Term(t)) .filter(t => t.length > 0); if (terms.length === 0) return null; if (terms.length === 1) return `"${terms[0]}"*`; return terms.map(t => `"${t}"*`).join(' AND '); } export function searchFTS(db: Database, query: string, limit: number = 20, collectionId?: number): SearchResult[] { const ftsQuery = buildFTS5Query(query); if (!ftsQuery) return []; let sql = ` SELECT 'qmd://' || c.name || '/' || d.path as filepath, 'qmd://' || c.name || '/' || d.path as display_path, d.title, content.doc as body, bm25(documents_fts, 10.0, 1.0) as score FROM documents_fts f JOIN documents d ON d.id = f.rowid JOIN collections c ON c.id = d.collection_id JOIN content ON content.hash = d.hash WHERE documents_fts MATCH ? AND d.active = 1 `; const params: (string | number)[] = [ftsQuery]; if (collectionId !== undefined) { sql += ` AND d.collection_id = ?`; params.push(collectionId); } sql += ` ORDER BY score LIMIT ?`; params.push(limit); const rows = db.prepare(sql).all(...params) as { filepath: string; display_path: string; title: string; body: string; score: number }[]; const maxScore = rows.length > 0 ? Math.max(...rows.map(r => Math.abs(r.score))) : 1; return rows.map(row => ({ file: row.filepath, displayPath: row.display_path, title: row.title, body: row.body, score: Math.abs(row.score) / maxScore, source: "fts" as const, })); } // ============================================================================= // Vector Search // ============================================================================= export async function searchVec(db: Database, query: string, model: string, limit: number = 20, collectionId?: number): Promise { const tableExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get(); if (!tableExists) return []; const embedding = await getEmbedding(query, model, true); if (!embedding) return []; // sqlite-vec requires "k = ?" for KNN queries let sql = ` SELECT v.hash_seq, v.distance, 'qmd://' || c.name || '/' || d.path as filepath, 'qmd://' || c.name || '/' || d.path as display_path, d.title, content.doc as body, cv.pos FROM vectors_vec v JOIN content_vectors cv ON cv.hash || '_' || cv.seq = v.hash_seq JOIN documents d ON d.hash = cv.hash AND d.active = 1 JOIN collections c ON c.id = d.collection_id JOIN content ON content.hash = d.hash WHERE v.embedding MATCH ? AND k = ? `; if (collectionId !== undefined) { sql += ` AND d.collection_id = ${collectionId}`; } sql += ` ORDER BY v.distance`; const rows = db.prepare(sql).all(new Float32Array(embedding), limit * 3) as { hash_seq: string; distance: number; filepath: string; display_path: string; title: string; body: string; pos: number }[]; const seen = new Map(); for (const row of rows) { const existing = seen.get(row.filepath); if (!existing || row.distance < existing.bestDist) { seen.set(row.filepath, { row, bestDist: row.distance }); } } return Array.from(seen.values()) .sort((a, b) => a.bestDist - b.bestDist) .slice(0, limit) .map(({ row }) => ({ file: row.filepath, displayPath: row.display_path, title: row.title, body: row.body, score: 1 / (1 + row.distance), source: "vec" as const, chunkPos: row.pos, })); } // ============================================================================= // Embeddings // ============================================================================= async function getEmbedding(text: string, model: string, isQuery: boolean): Promise { const ollama = getDefaultOllama(); const result = await ollama.embed(text, { model, isQuery }); return result?.embedding || null; } // ============================================================================= // Query expansion // ============================================================================= export async function expandQuery(query: string, model: string = DEFAULT_QUERY_MODEL, db: Database): Promise { // Check cache first const cacheKey = getCacheKey("expandQuery", { query, model }); const cached = getCachedResult(db, cacheKey); if (cached) { const lines = cached.split('\n').map(l => l.trim()).filter(l => l.length > 0); return [query, ...lines.slice(0, 2)]; } const ollama = getDefaultOllama(); const results = await ollama.expandQuery(query, model, 2); // Cache the expanded queries (excluding original) if (results.length > 1) { setCachedResult(db, cacheKey, results.slice(1).join('\n')); } return results; } // ============================================================================= // Reranking // ============================================================================= export async function rerank(query: string, documents: { file: string; text: string }[], model: string = DEFAULT_RERANK_MODEL, db: Database): Promise<{ file: string; score: number }[]> { const cachedResults: Map = new Map(); const uncachedDocs: RerankDocument[] = []; // Check cache for each document for (const doc of documents) { const cacheKey = getCacheKey("rerank", { query, file: doc.file, model }); const cached = getCachedResult(db, cacheKey); if (cached !== null) { cachedResults.set(doc.file, parseFloat(cached)); } else { uncachedDocs.push({ file: doc.file, text: doc.text }); } } // Rerank uncached documents using Ollama if (uncachedDocs.length > 0) { const ollama = getDefaultOllama(); const rerankResult = await ollama.rerank(query, uncachedDocs, { model }); // Cache results for (const result of rerankResult.results) { const cacheKey = getCacheKey("rerank", { query, file: result.file, model }); setCachedResult(db, cacheKey, result.score.toString()); cachedResults.set(result.file, result.score); } } // Return all results sorted by score return documents .map(doc => ({ file: doc.file, score: cachedResults.get(doc.file) || 0 })) .sort((a, b) => b.score - a.score); } // ============================================================================= // Reciprocal Rank Fusion // ============================================================================= export function reciprocalRankFusion( resultLists: RankedResult[][], weights: number[] = [], k: number = 60 ): RankedResult[] { const scores = new Map(); for (let listIdx = 0; listIdx < resultLists.length; listIdx++) { const list = resultLists[listIdx]; const weight = weights[listIdx] ?? 1.0; for (let rank = 0; rank < list.length; rank++) { const result = list[rank]; const rrfContribution = weight / (k + rank + 1); const existing = scores.get(result.file); if (existing) { existing.rrfScore += rrfContribution; existing.topRank = Math.min(existing.topRank, rank); } else { scores.set(result.file, { result, rrfScore: rrfContribution, topRank: rank, }); } } } // Top-rank bonus for (const entry of scores.values()) { if (entry.topRank === 0) { entry.rrfScore += 0.05; } else if (entry.topRank <= 2) { entry.rrfScore += 0.02; } } return Array.from(scores.values()) .sort((a, b) => b.rrfScore - a.rrfScore) .map(e => ({ ...e.result, score: e.rrfScore })); } // ============================================================================= // Document retrieval // ============================================================================= type DbDocRow = { filepath: string; display_path: string; title: string; hash: string; collection_id: number; modified_at: string; body_length: number; body?: string; }; /** * Find a document by filename/path (with fuzzy matching) * Returns document metadata without body by default */ export function findDocument(db: Database, filename: string, options: { includeBody?: boolean } = {}): DocumentResult | DocumentNotFound { let filepath = filename; const colonMatch = filepath.match(/:(\d+)$/); if (colonMatch) { filepath = filepath.slice(0, -colonMatch[0].length); } if (filepath.startsWith('~/')) { filepath = homedir() + filepath.slice(1); } const selectCols = options.includeBody ? `filepath, display_path, title, hash, collection_id, modified_at, LENGTH(body) as body_length, body` : `filepath, display_path, title, hash, collection_id, modified_at, LENGTH(body) as body_length`; // Try various match strategies let doc = db.prepare(`SELECT ${selectCols} FROM documents WHERE filepath = ? AND active = 1`).get(filepath) as DbDocRow | null; if (!doc) { doc = db.prepare(`SELECT ${selectCols} FROM documents WHERE display_path = ? AND active = 1`).get(filepath) as DbDocRow | null; } if (!doc) { doc = db.prepare(`SELECT ${selectCols} FROM documents WHERE filepath LIKE ? AND active = 1 LIMIT 1`).get(`%${filepath}`) as DbDocRow | null; } if (!doc) { doc = db.prepare(`SELECT ${selectCols} FROM documents WHERE display_path LIKE ? AND active = 1 LIMIT 1`).get(`%${filepath}`) as DbDocRow | null; } if (!doc) { const similar = findSimilarFiles(db, filepath, 5, 5); return { error: "not_found", query: filename, similarFiles: similar }; } const context = getContextForFile(db, doc.filepath); return { filepath: doc.filepath, displayPath: doc.display_path, title: doc.title, context, hash: doc.hash, collectionId: doc.collection_id, modifiedAt: doc.modified_at, bodyLength: doc.body_length, ...(options.includeBody && doc.body !== undefined && { body: doc.body }), }; } /** * Get the body content for a document * Optionally slice by line range */ export function getDocumentBody(db: Database, doc: DocumentResult | { filepath: string }, fromLine?: number, maxLines?: number): string | null { const filepath = 'filepath' in doc ? doc.filepath : doc.filepath; const row = db.prepare(`SELECT body FROM documents WHERE filepath = ? AND active = 1`).get(filepath) as { body: string } | null; if (!row) return null; let body = row.body; if (fromLine !== undefined || maxLines !== undefined) { const lines = body.split('\n'); const start = (fromLine || 1) - 1; const end = maxLines !== undefined ? start + maxLines : lines.length; body = lines.slice(start, end).join('\n'); } return body; } /** * Legacy function for backwards compatibility * Combines findDocument + getDocumentBody with line slicing */ export function getDocument(db: Database, filename: string, fromLine?: number, maxLines?: number): (DocumentResult & { body: string }) | DocumentNotFound { // Parse :line suffix let parsedFromLine = fromLine; let filepath = filename; const colonMatch = filepath.match(/:(\d+)$/); if (colonMatch && !parsedFromLine) { parsedFromLine = parseInt(colonMatch[1], 10); filepath = filepath.slice(0, -colonMatch[0].length); } const result = findDocument(db, filepath, { includeBody: true }); if ("error" in result) return result; let body = result.body || ""; if (parsedFromLine !== undefined || maxLines !== undefined) { const lines = body.split('\n'); const start = (parsedFromLine || 1) - 1; const end = maxLines !== undefined ? start + maxLines : lines.length; body = lines.slice(start, end).join('\n'); } return { ...result, body }; } /** * Find multiple documents by glob pattern or comma-separated list * Returns documents without body by default (use getDocumentBody to load) */ export function findDocuments( db: Database, pattern: string, options: { includeBody?: boolean; maxBytes?: number } = {} ): { docs: MultiGetResult[]; errors: string[] } { const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?'); const errors: string[] = []; const maxBytes = options.maxBytes ?? DEFAULT_MULTI_GET_MAX_BYTES; const selectCols = options.includeBody ? `filepath, display_path, title, hash, collection_id, modified_at, LENGTH(body) as body_length, body` : `filepath, display_path, title, hash, collection_id, modified_at, LENGTH(body) as body_length`; let fileRows: DbDocRow[]; if (isCommaSeparated) { const names = pattern.split(',').map(s => s.trim()).filter(Boolean); fileRows = []; for (const name of names) { let doc = db.prepare(`SELECT ${selectCols} FROM documents WHERE display_path = ? AND active = 1`).get(name) as DbDocRow | null; if (!doc) { doc = db.prepare(`SELECT ${selectCols} FROM documents WHERE display_path LIKE ? AND active = 1 LIMIT 1`).get(`%${name}`) as DbDocRow | null; } if (doc) { fileRows.push(doc); } else { const similar = findSimilarFiles(db, name, 5, 3); let msg = `File not found: ${name}`; if (similar.length > 0) { msg += ` (did you mean: ${similar.join(', ')}?)`; } errors.push(msg); } } } else { // Glob pattern match const matched = matchFilesByGlob(db, pattern); if (matched.length === 0) { errors.push(`No files matched pattern: ${pattern}`); return { docs: [], errors }; } const filepaths = matched.map(m => m.filepath); const placeholders = filepaths.map(() => '?').join(','); fileRows = db.prepare(`SELECT ${selectCols} FROM documents WHERE filepath IN (${placeholders}) AND active = 1`).all(...filepaths) as DbDocRow[]; } const results: MultiGetResult[] = []; for (const row of fileRows) { const context = getContextForFile(db, row.filepath); if (row.body_length > maxBytes) { results.push({ doc: { filepath: row.filepath, displayPath: row.display_path }, skipped: true, skipReason: `File too large (${Math.round(row.body_length / 1024)}KB > ${Math.round(maxBytes / 1024)}KB)`, }); continue; } results.push({ doc: { filepath: row.filepath, displayPath: row.display_path, title: row.title || row.display_path.split('/').pop() || row.display_path, context, hash: row.hash, collectionId: row.collection_id, modifiedAt: row.modified_at, bodyLength: row.body_length, ...(options.includeBody && row.body !== undefined && { body: row.body }), }, skipped: false, }); } return { docs: results, errors }; } /** * Legacy function for backwards compatibility */ export function getMultipleDocuments(db: Database, pattern: string, maxLines?: number, maxBytes: number = DEFAULT_MULTI_GET_MAX_BYTES): { files: MultiGetFile[]; errors: string[] } { const { docs, errors } = findDocuments(db, pattern, { includeBody: true, maxBytes }); const files: MultiGetFile[] = docs.map(result => { if (result.skipped) { return { filepath: result.doc.filepath, displayPath: result.doc.displayPath, title: "", body: "", context: null, skipped: true as const, skipReason: result.skipReason, }; } let body = result.doc.body || ""; if (maxLines !== undefined) { const lines = body.split('\n'); body = lines.slice(0, maxLines).join('\n'); if (lines.length > maxLines) { body += `\n\n[... truncated ${lines.length - maxLines} more lines]`; } } return { filepath: result.doc.filepath, displayPath: result.doc.displayPath, title: result.doc.title, body, context: result.doc.context, skipped: false as const, }; }); return { files, errors }; } // Keep the old MultiGetFile type for backwards compatibility export type MultiGetFile = { filepath: string; displayPath: string; title: string; body: string; context: string | null; skipped: false; } | { filepath: string; displayPath: string; title: string; body: string; context: string | null; skipped: true; skipReason: string; }; // ============================================================================= // Status // ============================================================================= export function getStatus(db: Database): IndexStatus { const collections = db.prepare(` SELECT c.id, c.pwd, c.glob_pattern, c.created_at, COUNT(d.id) as active_count, MAX(d.modified_at) as last_doc_update FROM collections c LEFT JOIN documents d ON d.collection_id = c.id AND d.active = 1 GROUP BY c.id ORDER BY last_doc_update DESC `).all() as { id: number; pwd: string; glob_pattern: string; created_at: string; active_count: number; last_doc_update: string | null }[]; const totalDocs = (db.prepare(`SELECT COUNT(*) as c FROM documents WHERE active = 1`).get() as { c: number }).c; const needsEmbedding = getHashesNeedingEmbedding(db); const hasVectors = !!db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get(); return { totalDocuments: totalDocs, needsEmbedding, hasVectorIndex: hasVectors, collections: collections.map(col => ({ id: col.id, path: col.pwd, pattern: col.glob_pattern, documents: col.active_count, lastUpdated: col.last_doc_update || col.created_at, })), }; } // ============================================================================= // Snippet extraction // ============================================================================= export type SnippetResult = { line: number; // 1-indexed line number of best match snippet: string; // The snippet text with diff-style header linesBefore: number; // Lines in document before snippet linesAfter: number; // Lines in document after snippet snippetLines: number; // Number of lines in snippet }; export function extractSnippet(body: string, query: string, maxLen = 500, chunkPos?: number): SnippetResult { const totalLines = body.split('\n').length; let searchBody = body; let lineOffset = 0; if (chunkPos && chunkPos > 0) { const contextStart = Math.max(0, chunkPos - 100); const contextEnd = Math.min(body.length, chunkPos + maxLen + 100); searchBody = body.slice(contextStart, contextEnd); if (contextStart > 0) { lineOffset = body.slice(0, contextStart).split('\n').length - 1; } } const lines = searchBody.split('\n'); const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 0); let bestLine = 0, bestScore = -1; for (let i = 0; i < lines.length; i++) { const lineLower = lines[i].toLowerCase(); let score = 0; for (const term of queryTerms) { if (lineLower.includes(term)) score++; } if (score > bestScore) { bestScore = score; bestLine = i; } } const start = Math.max(0, bestLine - 1); const end = Math.min(lines.length, bestLine + 3); const snippetLines = lines.slice(start, end); let snippetText = snippetLines.join('\n'); if (snippetText.length > maxLen) snippetText = snippetText.substring(0, maxLen - 3) + "..."; const absoluteStart = lineOffset + start + 1; // 1-indexed const snippetLineCount = snippetLines.length; const linesBefore = absoluteStart - 1; const linesAfter = totalLines - (absoluteStart + snippetLineCount - 1); // Format with diff-style header: @@ -start,count @@ (linesBefore before, linesAfter after) const header = `@@ -${absoluteStart},${snippetLineCount} @@ (${linesBefore} before, ${linesAfter} after)`; const snippet = `${header}\n${snippetText}`; return { line: lineOffset + bestLine + 1, snippet, linesBefore, linesAfter, snippetLines: snippetLineCount, }; }