|
|
@@ -65,7 +65,7 @@ import {
|
|
|
createStore,
|
|
|
getDefaultDbPath,
|
|
|
} from "./store.js";
|
|
|
-import { getDefaultLlamaCpp, disposeDefaultLlamaCpp, type RerankDocument, type Queryable, type QueryType } from "./llm.js";
|
|
|
+import { getDefaultLlamaCpp, disposeDefaultLlamaCpp, withLLMSession, type ILLMSession, type RerankDocument, type Queryable, type QueryType } from "./llm.js";
|
|
|
import type { SearchResult, RankedResult } from "./store.js";
|
|
|
import {
|
|
|
formatSearchResults,
|
|
|
@@ -231,20 +231,21 @@ function computeDisplayPath(
|
|
|
}
|
|
|
|
|
|
// Rerank documents using node-llama-cpp cross-encoder model
|
|
|
-async function rerank(query: string, documents: { file: string; text: string }[], _model: string = DEFAULT_RERANK_MODEL, _db?: Database): Promise<{ file: string; score: number }[]> {
|
|
|
+async function rerank(query: string, documents: { file: string; text: string }[], _model: string = DEFAULT_RERANK_MODEL, _db?: Database, session?: ILLMSession): Promise<{ file: string; score: number }[]> {
|
|
|
if (documents.length === 0) return [];
|
|
|
|
|
|
const total = documents.length;
|
|
|
process.stderr.write(`Reranking ${total} documents...\n`);
|
|
|
progress.indeterminate();
|
|
|
|
|
|
- const llm = getDefaultLlamaCpp();
|
|
|
const rerankDocs: RerankDocument[] = documents.map((doc) => ({
|
|
|
file: doc.file,
|
|
|
text: doc.text.slice(0, 4000), // Truncate to context limit
|
|
|
}));
|
|
|
|
|
|
- const result = await llm.rerank(query, rerankDocs);
|
|
|
+ const result = session
|
|
|
+ ? await session.rerank(query, rerankDocs)
|
|
|
+ : await getDefaultLlamaCpp().rerank(query, rerankDocs);
|
|
|
|
|
|
progress.clear();
|
|
|
process.stderr.write("\n");
|
|
|
@@ -1543,7 +1544,7 @@ async function vectorIndex(model: string = DEFAULT_EMBED_MODEL, force: boolean =
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- const totalBytes = allChunks.reduce((sum, c) => sum + c.bytes, 0);
|
|
|
+ const totalBytes = allChunks.reduce((sum, chk) => sum + chk.bytes, 0);
|
|
|
const totalChunks = allChunks.length;
|
|
|
const totalDocs = hashesToEmbed.length;
|
|
|
|
|
|
@@ -1556,99 +1557,103 @@ async function vectorIndex(model: string = DEFAULT_EMBED_MODEL, force: boolean =
|
|
|
// Hide cursor during embedding
|
|
|
cursor.hide();
|
|
|
|
|
|
- // Get embedding dimensions from first chunk
|
|
|
- progress.indeterminate();
|
|
|
- const llm = getDefaultLlamaCpp();
|
|
|
- const firstChunk = allChunks[0];
|
|
|
- if (!firstChunk) {
|
|
|
- throw new Error("No chunks available to embed");
|
|
|
- }
|
|
|
- const firstText = formatDocForEmbedding(firstChunk.text, firstChunk.title);
|
|
|
- const firstResult = await llm.embed(firstText);
|
|
|
- if (!firstResult) {
|
|
|
- throw new Error("Failed to get embedding dimensions from first chunk");
|
|
|
- }
|
|
|
- ensureVecTable(db, firstResult.embedding.length);
|
|
|
+ // Wrap all LLM embedding operations in a session for lifecycle management
|
|
|
+ // Use 30 minute timeout for large collections
|
|
|
+ await withLLMSession(async (session) => {
|
|
|
+ // Get embedding dimensions from first chunk
|
|
|
+ progress.indeterminate();
|
|
|
+ const firstChunk = allChunks[0];
|
|
|
+ if (!firstChunk) {
|
|
|
+ throw new Error("No chunks available to embed");
|
|
|
+ }
|
|
|
+ const firstText = formatDocForEmbedding(firstChunk.text, firstChunk.title);
|
|
|
+ const firstResult = await session.embed(firstText);
|
|
|
+ if (!firstResult) {
|
|
|
+ throw new Error("Failed to get embedding dimensions from first chunk");
|
|
|
+ }
|
|
|
+ ensureVecTable(db, firstResult.embedding.length);
|
|
|
|
|
|
- let chunksEmbedded = 0, errors = 0, bytesProcessed = 0;
|
|
|
- const startTime = Date.now();
|
|
|
+ let chunksEmbedded = 0, errors = 0, bytesProcessed = 0;
|
|
|
+ const startTime = Date.now();
|
|
|
|
|
|
- // Batch embedding for better throughput
|
|
|
- // Process in batches of 32 to balance memory usage and efficiency
|
|
|
- const BATCH_SIZE = 32;
|
|
|
+ // Batch embedding for better throughput
|
|
|
+ // Process in batches of 32 to balance memory usage and efficiency
|
|
|
+ const BATCH_SIZE = 32;
|
|
|
|
|
|
- for (let batchStart = 0; batchStart < allChunks.length; batchStart += BATCH_SIZE) {
|
|
|
- const batchEnd = Math.min(batchStart + BATCH_SIZE, allChunks.length);
|
|
|
- const batch = allChunks.slice(batchStart, batchEnd);
|
|
|
+ for (let batchStart = 0; batchStart < allChunks.length; batchStart += BATCH_SIZE) {
|
|
|
+ const batchEnd = Math.min(batchStart + BATCH_SIZE, allChunks.length);
|
|
|
+ const batch = allChunks.slice(batchStart, batchEnd);
|
|
|
|
|
|
- // Format texts for embedding
|
|
|
- const texts = batch.map(chunk => formatDocForEmbedding(chunk.text, chunk.title));
|
|
|
+ // Format texts for embedding
|
|
|
+ const texts = batch.map(chunk => formatDocForEmbedding(chunk.text, chunk.title));
|
|
|
|
|
|
- try {
|
|
|
- // Batch embed all texts at once
|
|
|
- const embeddings = await llm.embedBatch(texts);
|
|
|
+ try {
|
|
|
+ // Batch embed all texts at once
|
|
|
+ const embeddings = await session.embedBatch(texts);
|
|
|
|
|
|
- // Insert each embedding
|
|
|
- for (let i = 0; i < batch.length; i++) {
|
|
|
- const chunk = batch[i]!;
|
|
|
- const embedding = embeddings[i];
|
|
|
+ // Insert each embedding
|
|
|
+ for (let i = 0; i < batch.length; i++) {
|
|
|
+ const chunk = batch[i]!;
|
|
|
+ const embedding = embeddings[i];
|
|
|
|
|
|
- if (embedding) {
|
|
|
- insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now);
|
|
|
- chunksEmbedded++;
|
|
|
- } else {
|
|
|
- errors++;
|
|
|
- console.error(`\n${c.yellow}⚠ Error embedding "${chunk.displayName}" chunk ${chunk.seq}${c.reset}`);
|
|
|
- }
|
|
|
- bytesProcessed += chunk.bytes;
|
|
|
- }
|
|
|
- } catch (err) {
|
|
|
- // If batch fails, try individual embeddings as fallback
|
|
|
- for (const chunk of batch) {
|
|
|
- try {
|
|
|
- const text = formatDocForEmbedding(chunk.text, chunk.title);
|
|
|
- const result = await llm.embed(text);
|
|
|
- if (result) {
|
|
|
- insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now);
|
|
|
+ if (embedding) {
|
|
|
+ insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now);
|
|
|
chunksEmbedded++;
|
|
|
} else {
|
|
|
errors++;
|
|
|
+ console.error(`\n${c.yellow}⚠ Error embedding "${chunk.displayName}" chunk ${chunk.seq}${c.reset}`);
|
|
|
}
|
|
|
- } catch (innerErr) {
|
|
|
- errors++;
|
|
|
- console.error(`\n${c.yellow}⚠ Error embedding "${chunk.displayName}" chunk ${chunk.seq}: ${innerErr}${c.reset}`);
|
|
|
+ bytesProcessed += chunk.bytes;
|
|
|
+ }
|
|
|
+ } catch (err) {
|
|
|
+ // If batch fails, try individual embeddings as fallback
|
|
|
+ for (const chunk of batch) {
|
|
|
+ try {
|
|
|
+ const text = formatDocForEmbedding(chunk.text, chunk.title);
|
|
|
+ const result = await session.embed(text);
|
|
|
+ if (result) {
|
|
|
+ insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now);
|
|
|
+ chunksEmbedded++;
|
|
|
+ } else {
|
|
|
+ errors++;
|
|
|
+ }
|
|
|
+ } catch (innerErr) {
|
|
|
+ errors++;
|
|
|
+ console.error(`\n${c.yellow}⚠ Error embedding "${chunk.displayName}" chunk ${chunk.seq}: ${innerErr}${c.reset}`);
|
|
|
+ }
|
|
|
+ bytesProcessed += chunk.bytes;
|
|
|
}
|
|
|
- bytesProcessed += chunk.bytes;
|
|
|
}
|
|
|
- }
|
|
|
|
|
|
- const percent = (bytesProcessed / totalBytes) * 100;
|
|
|
- progress.set(percent);
|
|
|
+ const percent = (bytesProcessed / totalBytes) * 100;
|
|
|
+ progress.set(percent);
|
|
|
|
|
|
- const elapsed = (Date.now() - startTime) / 1000;
|
|
|
- const bytesPerSec = bytesProcessed / elapsed;
|
|
|
- const remainingBytes = totalBytes - bytesProcessed;
|
|
|
- const etaSec = remainingBytes / bytesPerSec;
|
|
|
+ const elapsed = (Date.now() - startTime) / 1000;
|
|
|
+ const bytesPerSec = bytesProcessed / elapsed;
|
|
|
+ const remainingBytes = totalBytes - bytesProcessed;
|
|
|
+ const etaSec = remainingBytes / bytesPerSec;
|
|
|
|
|
|
- const bar = renderProgressBar(percent);
|
|
|
- const percentStr = percent.toFixed(0).padStart(3);
|
|
|
- const throughput = `${formatBytes(bytesPerSec)}/s`;
|
|
|
- const eta = elapsed > 2 ? formatETA(etaSec) : "...";
|
|
|
- const errStr = errors > 0 ? ` ${c.yellow}${errors} err${c.reset}` : "";
|
|
|
+ const bar = renderProgressBar(percent);
|
|
|
+ const percentStr = percent.toFixed(0).padStart(3);
|
|
|
+ const throughput = `${formatBytes(bytesPerSec)}/s`;
|
|
|
+ const eta = elapsed > 2 ? formatETA(etaSec) : "...";
|
|
|
+ const errStr = errors > 0 ? ` ${c.yellow}${errors} err${c.reset}` : "";
|
|
|
|
|
|
- process.stderr.write(`\r${c.cyan}${bar}${c.reset} ${c.bold}${percentStr}%${c.reset} ${c.dim}${chunksEmbedded}/${totalChunks}${c.reset}${errStr} ${c.dim}${throughput} ETA ${eta}${c.reset} `);
|
|
|
- }
|
|
|
+ process.stderr.write(`\r${c.cyan}${bar}${c.reset} ${c.bold}${percentStr}%${c.reset} ${c.dim}${chunksEmbedded}/${totalChunks}${c.reset}${errStr} ${c.dim}${throughput} ETA ${eta}${c.reset} `);
|
|
|
+ }
|
|
|
|
|
|
- progress.clear();
|
|
|
- cursor.show();
|
|
|
- const totalTimeSec = (Date.now() - startTime) / 1000;
|
|
|
- const avgThroughput = formatBytes(totalBytes / totalTimeSec);
|
|
|
+ progress.clear();
|
|
|
+ cursor.show();
|
|
|
+ const totalTimeSec = (Date.now() - startTime) / 1000;
|
|
|
+ const avgThroughput = formatBytes(totalBytes / totalTimeSec);
|
|
|
+
|
|
|
+ console.log(`\r${c.green}${renderProgressBar(100)}${c.reset} ${c.bold}100%${c.reset} `);
|
|
|
+ console.log(`\n${c.green}✓ Done!${c.reset} Embedded ${c.bold}${chunksEmbedded}${c.reset} chunks from ${c.bold}${totalDocs}${c.reset} documents in ${c.bold}${formatETA(totalTimeSec)}${c.reset} ${c.dim}(${avgThroughput}/s)${c.reset}`);
|
|
|
+ if (errors > 0) {
|
|
|
+ console.log(`${c.yellow}⚠ ${errors} chunks failed${c.reset}`);
|
|
|
+ }
|
|
|
+ }, { maxDuration: 30 * 60 * 1000, name: 'embed-command' });
|
|
|
|
|
|
- console.log(`\r${c.green}${renderProgressBar(100)}${c.reset} ${c.bold}100%${c.reset} `);
|
|
|
- console.log(`\n${c.green}✓ Done!${c.reset} Embedded ${c.bold}${chunksEmbedded}${c.reset} chunks from ${c.bold}${totalDocs}${c.reset} documents in ${c.bold}${formatETA(totalTimeSec)}${c.reset} ${c.dim}(${avgThroughput}/s)${c.reset}`);
|
|
|
- if (errors > 0) {
|
|
|
- console.log(`${c.yellow}⚠ ${errors} chunks failed${c.reset}`);
|
|
|
- }
|
|
|
closeDb();
|
|
|
}
|
|
|
|
|
|
@@ -1975,60 +1980,64 @@ async function vectorSearch(query: string, opts: OutputOptions, model: string =
|
|
|
// Check index health and warn about issues
|
|
|
checkIndexHealth(db);
|
|
|
|
|
|
- // Expand query using structured output (no lexical for vector-only search)
|
|
|
- const queryables = await expandQueryStructured(query, false, opts.context);
|
|
|
+ // Wrap LLM operations in a session for lifecycle management
|
|
|
+ await withLLMSession(async (session) => {
|
|
|
+ // Expand query using structured output (no lexical for vector-only search)
|
|
|
+ const queryables = await expandQueryStructured(query, false, opts.context, session);
|
|
|
|
|
|
- // Build list of queries for vector search: original, vec, and hyde
|
|
|
- const vectorQueries: string[] = [query];
|
|
|
- for (const q of queryables) {
|
|
|
- if (q.type === 'vec' || q.type === 'hyde') {
|
|
|
- if (q.text && q.text !== query) {
|
|
|
- vectorQueries.push(q.text);
|
|
|
+ // Build list of queries for vector search: original, vec, and hyde
|
|
|
+ const vectorQueries: string[] = [query];
|
|
|
+ for (const q of queryables) {
|
|
|
+ if (q.type === 'vec' || q.type === 'hyde') {
|
|
|
+ if (q.text && q.text !== query) {
|
|
|
+ vectorQueries.push(q.text);
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
- }
|
|
|
|
|
|
- process.stderr.write(`${c.dim}Searching ${vectorQueries.length} vector queries...${c.reset}\n`);
|
|
|
+ process.stderr.write(`${c.dim}Searching ${vectorQueries.length} vector queries...${c.reset}\n`);
|
|
|
|
|
|
- // Collect results from all query variations
|
|
|
- const perQueryLimit = opts.all ? 500 : 20;
|
|
|
- const allResults = new Map<string, { file: string; displayPath: string; title: string; body: string; score: number; hash: string }>();
|
|
|
+ // Collect results from all query variations
|
|
|
+ const perQueryLimit = opts.all ? 500 : 20;
|
|
|
+ const allResults = new Map<string, { file: string; displayPath: string; title: string; body: string; score: number; hash: string }>();
|
|
|
|
|
|
- // IMPORTANT: Run vector searches sequentially, not with Promise.all.
|
|
|
- // node-llama-cpp's embedding context hangs when multiple concurrent embed() calls
|
|
|
- // are made. This is a known limitation of the LlamaEmbeddingContext.
|
|
|
- // See: https://github.com/tobi/qmd/pull/23
|
|
|
- for (const q of vectorQueries) {
|
|
|
- const vecResults = await searchVec(db, q, model, perQueryLimit, collectionName as any);
|
|
|
- for (const r of vecResults) {
|
|
|
- const existing = allResults.get(r.filepath);
|
|
|
- if (!existing || r.score > existing.score) {
|
|
|
- allResults.set(r.filepath, { file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score, hash: r.hash });
|
|
|
+ // IMPORTANT: Run vector searches sequentially, not with Promise.all.
|
|
|
+ // node-llama-cpp's embedding context hangs when multiple concurrent embed() calls
|
|
|
+ // are made. This is a known limitation of the LlamaEmbeddingContext.
|
|
|
+ // See: https://github.com/tobi/qmd/pull/23
|
|
|
+ for (const q of vectorQueries) {
|
|
|
+ const vecResults = await searchVec(db, q, model, perQueryLimit, collectionName as any, session);
|
|
|
+ for (const r of vecResults) {
|
|
|
+ const existing = allResults.get(r.filepath);
|
|
|
+ if (!existing || r.score > existing.score) {
|
|
|
+ allResults.set(r.filepath, { file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score, hash: r.hash });
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
- }
|
|
|
|
|
|
- // Sort by max score and limit to requested count
|
|
|
- const results = Array.from(allResults.values())
|
|
|
- .sort((a, b) => b.score - a.score)
|
|
|
- .slice(0, opts.limit)
|
|
|
- .map(r => ({ ...r, context: getContextForFile(db, r.file) }));
|
|
|
+ // Sort by max score and limit to requested count
|
|
|
+ const results = Array.from(allResults.values())
|
|
|
+ .sort((a, b) => b.score - a.score)
|
|
|
+ .slice(0, opts.limit)
|
|
|
+ .map(r => ({ ...r, context: getContextForFile(db, r.file) }));
|
|
|
|
|
|
- closeDb();
|
|
|
+ closeDb();
|
|
|
|
|
|
- if (results.length === 0) {
|
|
|
- console.log("No results found.");
|
|
|
- return;
|
|
|
- }
|
|
|
- outputResults(results, query, { ...opts, limit: results.length }); // Already limited
|
|
|
+ if (results.length === 0) {
|
|
|
+ console.log("No results found.");
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ outputResults(results, query, { ...opts, limit: results.length }); // Already limited
|
|
|
+ }, { maxDuration: 10 * 60 * 1000, name: 'vectorSearch' });
|
|
|
}
|
|
|
|
|
|
// Expand query using structured output with GBNF grammar
|
|
|
-async function expandQueryStructured(query: string, includeLexical: boolean = true, context?: string): Promise<Queryable[]> {
|
|
|
+async function expandQueryStructured(query: string, includeLexical: boolean = true, context?: string, session?: ILLMSession): Promise<Queryable[]> {
|
|
|
process.stderr.write(`${c.dim}Expanding query...${c.reset}\n`);
|
|
|
|
|
|
- const llm = getDefaultLlamaCpp();
|
|
|
- const queryables = await llm.expandQuery(query, { includeLexical, context });
|
|
|
+ const queryables = session
|
|
|
+ ? await session.expandQuery(query, { includeLexical, context })
|
|
|
+ : await getDefaultLlamaCpp().expandQuery(query, { includeLexical, context });
|
|
|
|
|
|
// Log the expansion as a tree
|
|
|
const lines: string[] = [];
|
|
|
@@ -2060,8 +2069,8 @@ async function expandQueryStructured(query: string, includeLexical: boolean = tr
|
|
|
return queryables;
|
|
|
}
|
|
|
|
|
|
-async function expandQuery(query: string, _model: string = DEFAULT_QUERY_MODEL, _db?: Database): Promise<string[]> {
|
|
|
- const queryables = await expandQueryStructured(query, true);
|
|
|
+async function expandQuery(query: string, _model: string = DEFAULT_QUERY_MODEL, _db?: Database, session?: ILLMSession): Promise<string[]> {
|
|
|
+ const queryables = await expandQueryStructured(query, true, undefined, session);
|
|
|
const queries = new Set<string>([query]);
|
|
|
for (const q of queryables) {
|
|
|
queries.add(q.text);
|
|
|
@@ -2098,178 +2107,182 @@ async function querySearch(query: string, opts: OutputOptions, embedModel: strin
|
|
|
const secondScore = initialFts[1]?.score ?? 0;
|
|
|
const hasStrongSignal = initialFts.length > 0 && topScore >= 0.85 && (topScore - secondScore) >= 0.15;
|
|
|
|
|
|
- let ftsQueries: string[] = [query];
|
|
|
- let vectorQueries: string[] = [query];
|
|
|
-
|
|
|
- if (hasStrongSignal) {
|
|
|
- // Strong BM25 signal - skip expensive LLM expansion
|
|
|
- process.stderr.write(`${c.dim}Strong BM25 signal (${topScore.toFixed(2)}) - skipping expansion${c.reset}\n`);
|
|
|
- // Still log the "expansion tree" in the same style as vsearch for consistency.
|
|
|
- {
|
|
|
- const lines: string[] = [];
|
|
|
- lines.push(`${c.dim}├─ ${query} · (lexical+vector)${c.reset}`);
|
|
|
- lines[lines.length - 1] = lines[lines.length - 1]!.replace('├─', '└─');
|
|
|
- for (const line of lines) process.stderr.write(line + '\n');
|
|
|
- }
|
|
|
- } else {
|
|
|
- // Weak signal - expand query for better recall
|
|
|
- const queryables = await expandQueryStructured(query, true, opts.context);
|
|
|
-
|
|
|
- for (const q of queryables) {
|
|
|
- if (q.type === 'lex') {
|
|
|
- if (q.text && q.text !== query) ftsQueries.push(q.text);
|
|
|
- } else if (q.type === 'vec' || q.type === 'hyde') {
|
|
|
- if (q.text && q.text !== query) vectorQueries.push(q.text);
|
|
|
+ // Wrap LLM operations in a session for lifecycle management
|
|
|
+ await withLLMSession(async (session) => {
|
|
|
+ let ftsQueries: string[] = [query];
|
|
|
+ let vectorQueries: string[] = [query];
|
|
|
+
|
|
|
+ if (hasStrongSignal) {
|
|
|
+ // Strong BM25 signal - skip expensive LLM expansion
|
|
|
+ process.stderr.write(`${c.dim}Strong BM25 signal (${topScore.toFixed(2)}) - skipping expansion${c.reset}\n`);
|
|
|
+ // Still log the "expansion tree" in the same style as vsearch for consistency.
|
|
|
+ {
|
|
|
+ const lines: string[] = [];
|
|
|
+ lines.push(`${c.dim}├─ ${query} · (lexical+vector)${c.reset}`);
|
|
|
+ lines[lines.length - 1] = lines[lines.length - 1]!.replace('├─', '└─');
|
|
|
+ for (const line of lines) process.stderr.write(line + '\n');
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ // Weak signal - expand query for better recall
|
|
|
+ const queryables = await expandQueryStructured(query, true, opts.context, session);
|
|
|
+
|
|
|
+ for (const q of queryables) {
|
|
|
+ if (q.type === 'lex') {
|
|
|
+ if (q.text && q.text !== query) ftsQueries.push(q.text);
|
|
|
+ } else if (q.type === 'vec' || q.type === 'hyde') {
|
|
|
+ if (q.text && q.text !== query) vectorQueries.push(q.text);
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
- }
|
|
|
-
|
|
|
- process.stderr.write(`${c.dim}Searching ${ftsQueries.length} lexical + ${vectorQueries.length} vector queries...${c.reset}\n`);
|
|
|
|
|
|
- // Collect ranked result lists for RRF fusion
|
|
|
- const rankedLists: RankedResult[][] = [];
|
|
|
+ process.stderr.write(`${c.dim}Searching ${ftsQueries.length} lexical + ${vectorQueries.length} vector queries...${c.reset}\n`);
|
|
|
|
|
|
- // Map to store hash by filepath for final results
|
|
|
- const hashMap = new Map<string, string>();
|
|
|
+ // Collect ranked result lists for RRF fusion
|
|
|
+ const rankedLists: RankedResult[][] = [];
|
|
|
|
|
|
- // Run all searches concurrently (FTS + Vector)
|
|
|
- const searchPromises: Promise<void>[] = [];
|
|
|
+ // Map to store hash by filepath for final results
|
|
|
+ const hashMap = new Map<string, string>();
|
|
|
|
|
|
- // FTS searches
|
|
|
- for (const q of ftsQueries) {
|
|
|
- if (!q) continue;
|
|
|
- searchPromises.push((async () => {
|
|
|
- const ftsResults = searchFTS(db, q, 20, (collectionName || "") as any);
|
|
|
- if (ftsResults.length > 0) {
|
|
|
- for (const r of ftsResults) {
|
|
|
- // Mutex for hashMap is not strictly needed as it's just adding values
|
|
|
- hashMap.set(r.filepath, r.hash);
|
|
|
- }
|
|
|
- rankedLists.push(ftsResults.map(r => ({ file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score })));
|
|
|
- }
|
|
|
- })());
|
|
|
- }
|
|
|
+ // Run all searches concurrently (FTS + Vector)
|
|
|
+ const searchPromises: Promise<void>[] = [];
|
|
|
|
|
|
- // Vector searches
|
|
|
- if (hasVectors) {
|
|
|
- for (const q of vectorQueries) {
|
|
|
+ // FTS searches
|
|
|
+ for (const q of ftsQueries) {
|
|
|
if (!q) continue;
|
|
|
searchPromises.push((async () => {
|
|
|
- const vecResults = await searchVec(db, q, embedModel, 20, (collectionName || "") as any);
|
|
|
- if (vecResults.length > 0) {
|
|
|
- for (const r of vecResults) hashMap.set(r.filepath, r.hash);
|
|
|
- rankedLists.push(vecResults.map(r => ({ file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score })));
|
|
|
+ const ftsResults = searchFTS(db, q, 20, (collectionName || "") as any);
|
|
|
+ if (ftsResults.length > 0) {
|
|
|
+ for (const r of ftsResults) {
|
|
|
+ // Mutex for hashMap is not strictly needed as it's just adding values
|
|
|
+ hashMap.set(r.filepath, r.hash);
|
|
|
+ }
|
|
|
+ rankedLists.push(ftsResults.map(r => ({ file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score })));
|
|
|
}
|
|
|
})());
|
|
|
}
|
|
|
- }
|
|
|
|
|
|
- await Promise.all(searchPromises);
|
|
|
+ // Vector searches (session ensures contexts stay alive)
|
|
|
+ if (hasVectors) {
|
|
|
+ for (const q of vectorQueries) {
|
|
|
+ if (!q) continue;
|
|
|
+ searchPromises.push((async () => {
|
|
|
+ const vecResults = await searchVec(db, q, embedModel, 20, (collectionName || "") as any, session);
|
|
|
+ if (vecResults.length > 0) {
|
|
|
+ for (const r of vecResults) hashMap.set(r.filepath, r.hash);
|
|
|
+ rankedLists.push(vecResults.map(r => ({ file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score })));
|
|
|
+ }
|
|
|
+ })());
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- // Apply Reciprocal Rank Fusion to combine all ranked lists
|
|
|
- // Give 2x weight to original query results (first 2 lists: FTS + vector)
|
|
|
- const weights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
|
|
|
- const fused = reciprocalRankFusion(rankedLists, weights);
|
|
|
- // Hard cap reranking for latency/cost. We rerank per-document (best chunk only).
|
|
|
- const RERANK_DOC_LIMIT = 40;
|
|
|
- const candidates = fused.slice(0, RERANK_DOC_LIMIT);
|
|
|
+ await Promise.all(searchPromises);
|
|
|
|
|
|
- if (candidates.length === 0) {
|
|
|
- console.log("No results found.");
|
|
|
- closeDb();
|
|
|
- return;
|
|
|
- }
|
|
|
+ // Apply Reciprocal Rank Fusion to combine all ranked lists
|
|
|
+ // Give 2x weight to original query results (first 2 lists: FTS + vector)
|
|
|
+ const weights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
|
|
|
+ const fused = reciprocalRankFusion(rankedLists, weights);
|
|
|
+ // Hard cap reranking for latency/cost. We rerank per-document (best chunk only).
|
|
|
+ const RERANK_DOC_LIMIT = 40;
|
|
|
+ const candidates = fused.slice(0, RERANK_DOC_LIMIT);
|
|
|
|
|
|
- // Rerank multiple chunks per document, then aggregate scores
|
|
|
- // This improves ranking for long documents where keyword-matched chunk isn't always best
|
|
|
- // We only rerank ONE chunk per document (best chunk by a simple keyword heuristic),
|
|
|
- // so we never rerank more than RERANK_DOC_LIMIT items.
|
|
|
- const chunksToRerank: { file: string; text: string; chunkIdx: number }[] = [];
|
|
|
- const docChunkMap = new Map<string, { chunks: { text: string; pos: number }[]; bestIdx: number }>();
|
|
|
-
|
|
|
- const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 2);
|
|
|
- for (const c of candidates) {
|
|
|
- const chunks = chunkDocument(c.body);
|
|
|
- if (chunks.length === 0) continue;
|
|
|
-
|
|
|
- // Choose best chunk by keyword matches; fall back to first chunk.
|
|
|
- let bestIdx = 0;
|
|
|
- let bestScore = -1;
|
|
|
- for (let i = 0; i < chunks.length; i++) {
|
|
|
- const chunkLower = chunks[i]!.text.toLowerCase();
|
|
|
- const score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0);
|
|
|
- if (score > bestScore) {
|
|
|
- bestScore = score;
|
|
|
- bestIdx = i;
|
|
|
- }
|
|
|
+ if (candidates.length === 0) {
|
|
|
+ console.log("No results found.");
|
|
|
+ closeDb();
|
|
|
+ return;
|
|
|
}
|
|
|
|
|
|
- chunksToRerank.push({ file: c.file, text: chunks[bestIdx]!.text, chunkIdx: bestIdx });
|
|
|
- docChunkMap.set(c.file, { chunks, bestIdx });
|
|
|
- }
|
|
|
+ // Rerank multiple chunks per document, then aggregate scores
|
|
|
+ // This improves ranking for long documents where keyword-matched chunk isn't always best
|
|
|
+ // We only rerank ONE chunk per document (best chunk by a simple keyword heuristic),
|
|
|
+ // so we never rerank more than RERANK_DOC_LIMIT items.
|
|
|
+ const chunksToRerank: { file: string; text: string; chunkIdx: number }[] = [];
|
|
|
+ const docChunkMap = new Map<string, { chunks: { text: string; pos: number }[]; bestIdx: number }>();
|
|
|
+
|
|
|
+ const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 2);
|
|
|
+ for (const cand of candidates) {
|
|
|
+ const chunks = chunkDocument(cand.body);
|
|
|
+ if (chunks.length === 0) continue;
|
|
|
+
|
|
|
+ // Choose best chunk by keyword matches; fall back to first chunk.
|
|
|
+ let bestIdx = 0;
|
|
|
+ let bestScore = -1;
|
|
|
+ for (let i = 0; i < chunks.length; i++) {
|
|
|
+ const chunkLower = chunks[i]!.text.toLowerCase();
|
|
|
+ const score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0);
|
|
|
+ if (score > bestScore) {
|
|
|
+ bestScore = score;
|
|
|
+ bestIdx = i;
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- // Rerank selected chunks (with caching). One chunk per doc -> one rerank item per doc.
|
|
|
- const reranked = await rerank(
|
|
|
- query,
|
|
|
- chunksToRerank.map(c => ({ file: c.file, text: c.text })),
|
|
|
- rerankModel,
|
|
|
- db
|
|
|
- );
|
|
|
+ chunksToRerank.push({ file: cand.file, text: chunks[bestIdx]!.text, chunkIdx: bestIdx });
|
|
|
+ docChunkMap.set(cand.file, { chunks, bestIdx });
|
|
|
+ }
|
|
|
|
|
|
- const aggregatedScores = new Map<string, { score: number; bestChunkIdx: number }>();
|
|
|
- for (const r of reranked) {
|
|
|
- const chunkInfo = docChunkMap.get(r.file);
|
|
|
- aggregatedScores.set(r.file, { score: r.score, bestChunkIdx: chunkInfo?.bestIdx ?? 0 });
|
|
|
- }
|
|
|
-
|
|
|
- // Blend RRF position score with aggregated reranker score using position-aware weights
|
|
|
- // Top retrieval results get more protection from reranker disagreement
|
|
|
- const candidateMap = new Map(candidates.map(c => [c.file, { displayPath: c.displayPath, title: c.title, body: c.body }]));
|
|
|
- const rrfRankMap = new Map(candidates.map((c, i) => [c.file, i + 1])); // 1-indexed rank
|
|
|
-
|
|
|
- const finalResults = Array.from(aggregatedScores.entries()).map(([file, { score: rerankScore, bestChunkIdx }]) => {
|
|
|
- const rrfRank = rrfRankMap.get(file) || 30;
|
|
|
- // Position-aware blending: top retrieval results preserved more
|
|
|
- // Rank 1-3: 75% RRF, 25% reranker (trust retrieval for exact matches)
|
|
|
- // Rank 4-10: 60% RRF, 40% reranker
|
|
|
- // Rank 11+: 40% RRF, 60% reranker (trust reranker for lower-ranked)
|
|
|
- let rrfWeight: number;
|
|
|
- if (rrfRank <= 3) {
|
|
|
- rrfWeight = 0.75;
|
|
|
- } else if (rrfRank <= 10) {
|
|
|
- rrfWeight = 0.60;
|
|
|
- } else {
|
|
|
- rrfWeight = 0.40;
|
|
|
- }
|
|
|
- const rrfScore = 1 / rrfRank; // Position-based: 1, 0.5, 0.33...
|
|
|
- const blendedScore = rrfWeight * rrfScore + (1 - rrfWeight) * rerankScore;
|
|
|
- const candidate = candidateMap.get(file);
|
|
|
- // Use the best-scoring chunk's text for the body (better for snippets)
|
|
|
- const chunkInfo = docChunkMap.get(file);
|
|
|
- const chunkBody = chunkInfo ? (chunkInfo.chunks[bestChunkIdx]?.text || chunkInfo.chunks[0]!.text) : candidate?.body || "";
|
|
|
- const chunkPos = chunkInfo ? (chunkInfo.chunks[bestChunkIdx]?.pos || 0) : 0;
|
|
|
- return {
|
|
|
- file,
|
|
|
- displayPath: candidate?.displayPath || "",
|
|
|
- title: candidate?.title || "",
|
|
|
- body: chunkBody,
|
|
|
- chunkPos,
|
|
|
- score: blendedScore,
|
|
|
- context: getContextForFile(db, file),
|
|
|
- hash: hashMap.get(file) || "",
|
|
|
- };
|
|
|
- }).sort((a, b) => b.score - a.score);
|
|
|
-
|
|
|
- // Deduplicate by file (safety net - shouldn't happen but prevents duplicate output)
|
|
|
- const seenFiles = new Set<string>();
|
|
|
- const dedupedResults = finalResults.filter(r => {
|
|
|
- if (seenFiles.has(r.file)) return false;
|
|
|
- seenFiles.add(r.file);
|
|
|
- return true;
|
|
|
- });
|
|
|
+ // Rerank selected chunks (with caching). One chunk per doc -> one rerank item per doc.
|
|
|
+ const reranked = await rerank(
|
|
|
+ query,
|
|
|
+ chunksToRerank.map(ch => ({ file: ch.file, text: ch.text })),
|
|
|
+ rerankModel,
|
|
|
+ db,
|
|
|
+ session
|
|
|
+ );
|
|
|
|
|
|
- closeDb();
|
|
|
- outputResults(dedupedResults, query, opts);
|
|
|
+ const aggregatedScores = new Map<string, { score: number; bestChunkIdx: number }>();
|
|
|
+ for (const r of reranked) {
|
|
|
+ const chunkInfo = docChunkMap.get(r.file);
|
|
|
+ aggregatedScores.set(r.file, { score: r.score, bestChunkIdx: chunkInfo?.bestIdx ?? 0 });
|
|
|
+ }
|
|
|
+
|
|
|
+ // Blend RRF position score with aggregated reranker score using position-aware weights
|
|
|
+ // Top retrieval results get more protection from reranker disagreement
|
|
|
+ const candidateMap = new Map(candidates.map(cand => [cand.file, { displayPath: cand.displayPath, title: cand.title, body: cand.body }]));
|
|
|
+ const rrfRankMap = new Map(candidates.map((cand, i) => [cand.file, i + 1])); // 1-indexed rank
|
|
|
+
|
|
|
+ const finalResults = Array.from(aggregatedScores.entries()).map(([file, { score: rerankScore, bestChunkIdx }]) => {
|
|
|
+ const rrfRank = rrfRankMap.get(file) || 30;
|
|
|
+ // Position-aware blending: top retrieval results preserved more
|
|
|
+ // Rank 1-3: 75% RRF, 25% reranker (trust retrieval for exact matches)
|
|
|
+ // Rank 4-10: 60% RRF, 40% reranker
|
|
|
+ // Rank 11+: 40% RRF, 60% reranker (trust reranker for lower-ranked)
|
|
|
+ let rrfWeight: number;
|
|
|
+ if (rrfRank <= 3) {
|
|
|
+ rrfWeight = 0.75;
|
|
|
+ } else if (rrfRank <= 10) {
|
|
|
+ rrfWeight = 0.60;
|
|
|
+ } else {
|
|
|
+ rrfWeight = 0.40;
|
|
|
+ }
|
|
|
+ const rrfScore = 1 / rrfRank; // Position-based: 1, 0.5, 0.33...
|
|
|
+ const blendedScore = rrfWeight * rrfScore + (1 - rrfWeight) * rerankScore;
|
|
|
+ const candidate = candidateMap.get(file);
|
|
|
+ // Use the best-scoring chunk's text for the body (better for snippets)
|
|
|
+ const chunkInfo = docChunkMap.get(file);
|
|
|
+ const chunkBody = chunkInfo ? (chunkInfo.chunks[bestChunkIdx]?.text || chunkInfo.chunks[0]!.text) : candidate?.body || "";
|
|
|
+ const chunkPos = chunkInfo ? (chunkInfo.chunks[bestChunkIdx]?.pos || 0) : 0;
|
|
|
+ return {
|
|
|
+ file,
|
|
|
+ displayPath: candidate?.displayPath || "",
|
|
|
+ title: candidate?.title || "",
|
|
|
+ body: chunkBody,
|
|
|
+ chunkPos,
|
|
|
+ score: blendedScore,
|
|
|
+ context: getContextForFile(db, file),
|
|
|
+ hash: hashMap.get(file) || "",
|
|
|
+ };
|
|
|
+ }).sort((a, b) => b.score - a.score);
|
|
|
+
|
|
|
+ // Deduplicate by file (safety net - shouldn't happen but prevents duplicate output)
|
|
|
+ const seenFiles = new Set<string>();
|
|
|
+ const dedupedResults = finalResults.filter(r => {
|
|
|
+ if (seenFiles.has(r.file)) return false;
|
|
|
+ seenFiles.add(r.file);
|
|
|
+ return true;
|
|
|
+ });
|
|
|
+
|
|
|
+ closeDb();
|
|
|
+ outputResults(dedupedResults, query, opts);
|
|
|
+ }, { maxDuration: 10 * 60 * 1000, name: 'querySearch' });
|
|
|
}
|
|
|
|
|
|
// Parse CLI arguments using util.parseArgs
|