|
@@ -1040,6 +1040,41 @@ export type RankedResult = {
|
|
|
score: number;
|
|
score: number;
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
|
|
+export type RRFContributionTrace = {
|
|
|
|
|
+ listIndex: number;
|
|
|
|
|
+ source: "fts" | "vec";
|
|
|
|
|
+ queryType: "original" | "lex" | "vec" | "hyde";
|
|
|
|
|
+ query: string;
|
|
|
|
|
+ rank: number; // 1-indexed rank within list
|
|
|
|
|
+ weight: number;
|
|
|
|
|
+ backendScore: number; // Backend-normalized score before fusion
|
|
|
|
|
+ rrfContribution: number; // weight / (k + rank)
|
|
|
|
|
+};
|
|
|
|
|
+
|
|
|
|
|
+export type RRFScoreTrace = {
|
|
|
|
|
+ contributions: RRFContributionTrace[];
|
|
|
|
|
+ baseScore: number; // Sum of reciprocal-rank contributions
|
|
|
|
|
+ topRank: number; // Best (lowest) rank seen across lists
|
|
|
|
|
+ topRankBonus: number; // +0.05 for rank 1, +0.02 for rank 2-3
|
|
|
|
|
+ totalScore: number; // baseScore + topRankBonus
|
|
|
|
|
+};
|
|
|
|
|
+
|
|
|
|
|
+export type HybridQueryExplain = {
|
|
|
|
|
+ ftsScores: number[];
|
|
|
|
|
+ vectorScores: number[];
|
|
|
|
|
+ rrf: {
|
|
|
|
|
+ rank: number; // Rank after RRF fusion (1-indexed)
|
|
|
|
|
+ positionScore: number; // 1 / rank used in position-aware blending
|
|
|
|
|
+ weight: number; // Position-aware RRF weight (0.75 / 0.60 / 0.40)
|
|
|
|
|
+ baseScore: number;
|
|
|
|
|
+ topRankBonus: number;
|
|
|
|
|
+ totalScore: number;
|
|
|
|
|
+ contributions: RRFContributionTrace[];
|
|
|
|
|
+ };
|
|
|
|
|
+ rerankScore: number;
|
|
|
|
|
+ blendedScore: number;
|
|
|
|
|
+};
|
|
|
|
|
+
|
|
|
/**
|
|
/**
|
|
|
* Error result when document is not found
|
|
* Error result when document is not found
|
|
|
*/
|
|
*/
|
|
@@ -2430,6 +2465,72 @@ export function reciprocalRankFusion(
|
|
|
.map(e => ({ ...e.result, score: e.rrfScore }));
|
|
.map(e => ({ ...e.result, score: e.rrfScore }));
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+/**
|
|
|
|
|
+ * Build per-document RRF contribution traces for explain/debug output.
|
|
|
|
|
+ */
|
|
|
|
|
+export function buildRrfTrace(
|
|
|
|
|
+ resultLists: RankedResult[][],
|
|
|
|
|
+ weights: number[] = [],
|
|
|
|
|
+ listMeta: RankedListMeta[] = [],
|
|
|
|
|
+ k: number = 60
|
|
|
|
|
+): Map<string, RRFScoreTrace> {
|
|
|
|
|
+ const traces = new Map<string, RRFScoreTrace>();
|
|
|
|
|
+
|
|
|
|
|
+ for (let listIdx = 0; listIdx < resultLists.length; listIdx++) {
|
|
|
|
|
+ const list = resultLists[listIdx];
|
|
|
|
|
+ if (!list) continue;
|
|
|
|
|
+ const weight = weights[listIdx] ?? 1.0;
|
|
|
|
|
+ const meta = listMeta[listIdx] ?? {
|
|
|
|
|
+ source: "fts",
|
|
|
|
|
+ queryType: "original",
|
|
|
|
|
+ query: "",
|
|
|
|
|
+ } as const;
|
|
|
|
|
+
|
|
|
|
|
+ for (let rank0 = 0; rank0 < list.length; rank0++) {
|
|
|
|
|
+ const result = list[rank0];
|
|
|
|
|
+ if (!result) continue;
|
|
|
|
|
+ const rank = rank0 + 1; // 1-indexed rank for explain output
|
|
|
|
|
+ const contribution = weight / (k + rank);
|
|
|
|
|
+ const existing = traces.get(result.file);
|
|
|
|
|
+
|
|
|
|
|
+ const detail: RRFContributionTrace = {
|
|
|
|
|
+ listIndex: listIdx,
|
|
|
|
|
+ source: meta.source,
|
|
|
|
|
+ queryType: meta.queryType,
|
|
|
|
|
+ query: meta.query,
|
|
|
|
|
+ rank,
|
|
|
|
|
+ weight,
|
|
|
|
|
+ backendScore: result.score,
|
|
|
|
|
+ rrfContribution: contribution,
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ if (existing) {
|
|
|
|
|
+ existing.baseScore += contribution;
|
|
|
|
|
+ existing.topRank = Math.min(existing.topRank, rank);
|
|
|
|
|
+ existing.contributions.push(detail);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ traces.set(result.file, {
|
|
|
|
|
+ contributions: [detail],
|
|
|
|
|
+ baseScore: contribution,
|
|
|
|
|
+ topRank: rank,
|
|
|
|
|
+ topRankBonus: 0,
|
|
|
|
|
+ totalScore: 0,
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ for (const trace of traces.values()) {
|
|
|
|
|
+ let bonus = 0;
|
|
|
|
|
+ if (trace.topRank === 1) bonus = 0.05;
|
|
|
|
|
+ else if (trace.topRank <= 3) bonus = 0.02;
|
|
|
|
|
+ trace.topRankBonus = bonus;
|
|
|
|
|
+ trace.totalScore = trace.baseScore + bonus;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return traces;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
// =============================================================================
|
|
// =============================================================================
|
|
|
// Document retrieval
|
|
// Document retrieval
|
|
|
// =============================================================================
|
|
// =============================================================================
|
|
@@ -2891,6 +2992,7 @@ export interface HybridQueryOptions {
|
|
|
limit?: number; // default 10
|
|
limit?: number; // default 10
|
|
|
minScore?: number; // default 0
|
|
minScore?: number; // default 0
|
|
|
candidateLimit?: number; // default RERANK_CANDIDATE_LIMIT
|
|
candidateLimit?: number; // default RERANK_CANDIDATE_LIMIT
|
|
|
|
|
+ explain?: boolean; // include backend/RRF/rerank score traces
|
|
|
hooks?: SearchHooks;
|
|
hooks?: SearchHooks;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -2904,8 +3006,15 @@ export interface HybridQueryResult {
|
|
|
score: number; // blended score (full precision)
|
|
score: number; // blended score (full precision)
|
|
|
context: string | null; // user-set context
|
|
context: string | null; // user-set context
|
|
|
docid: string; // content hash prefix (6 chars)
|
|
docid: string; // content hash prefix (6 chars)
|
|
|
|
|
+ explain?: HybridQueryExplain;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+export type RankedListMeta = {
|
|
|
|
|
+ source: "fts" | "vec";
|
|
|
|
|
+ queryType: "original" | "lex" | "vec" | "hyde";
|
|
|
|
|
+ query: string;
|
|
|
|
|
+};
|
|
|
|
|
+
|
|
|
/**
|
|
/**
|
|
|
* Hybrid search: BM25 + vector + query expansion + RRF + chunked reranking.
|
|
* Hybrid search: BM25 + vector + query expansion + RRF + chunked reranking.
|
|
|
*
|
|
*
|
|
@@ -2928,9 +3037,11 @@ export async function hybridQuery(
|
|
|
const minScore = options?.minScore ?? 0;
|
|
const minScore = options?.minScore ?? 0;
|
|
|
const candidateLimit = options?.candidateLimit ?? RERANK_CANDIDATE_LIMIT;
|
|
const candidateLimit = options?.candidateLimit ?? RERANK_CANDIDATE_LIMIT;
|
|
|
const collection = options?.collection;
|
|
const collection = options?.collection;
|
|
|
|
|
+ const explain = options?.explain ?? false;
|
|
|
const hooks = options?.hooks;
|
|
const hooks = options?.hooks;
|
|
|
|
|
|
|
|
const rankedLists: RankedResult[][] = [];
|
|
const rankedLists: RankedResult[][] = [];
|
|
|
|
|
+ const rankedListMeta: RankedListMeta[] = [];
|
|
|
const docidMap = new Map<string, string>(); // filepath -> docid
|
|
const docidMap = new Map<string, string>(); // filepath -> docid
|
|
|
const hasVectors = !!store.db.prepare(
|
|
const hasVectors = !!store.db.prepare(
|
|
|
`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`
|
|
`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`
|
|
@@ -2963,6 +3074,7 @@ export async function hybridQuery(
|
|
|
file: r.filepath, displayPath: r.displayPath,
|
|
file: r.filepath, displayPath: r.displayPath,
|
|
|
title: r.title, body: r.body || "", score: r.score,
|
|
title: r.title, body: r.body || "", score: r.score,
|
|
|
})));
|
|
})));
|
|
|
|
|
+ rankedListMeta.push({ source: "fts", queryType: "original", query });
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// Step 3: Route searches by query type
|
|
// Step 3: Route searches by query type
|
|
@@ -2981,18 +3093,19 @@ export async function hybridQuery(
|
|
|
file: r.filepath, displayPath: r.displayPath,
|
|
file: r.filepath, displayPath: r.displayPath,
|
|
|
title: r.title, body: r.body || "", score: r.score,
|
|
title: r.title, body: r.body || "", score: r.score,
|
|
|
})));
|
|
})));
|
|
|
|
|
+ rankedListMeta.push({ source: "fts", queryType: "lex", query: q.text });
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// 3b: Collect all texts that need vector search (original query + vec/hyde expansions)
|
|
// 3b: Collect all texts that need vector search (original query + vec/hyde expansions)
|
|
|
if (hasVectors) {
|
|
if (hasVectors) {
|
|
|
- const vecQueries: { text: string; isOriginal: boolean }[] = [
|
|
|
|
|
- { text: query, isOriginal: true },
|
|
|
|
|
|
|
+ const vecQueries: { text: string; queryType: "original" | "vec" | "hyde" }[] = [
|
|
|
|
|
+ { text: query, queryType: "original" },
|
|
|
];
|
|
];
|
|
|
for (const q of expanded) {
|
|
for (const q of expanded) {
|
|
|
if (q.type === 'vec' || q.type === 'hyde') {
|
|
if (q.type === 'vec' || q.type === 'hyde') {
|
|
|
- vecQueries.push({ text: q.text, isOriginal: false });
|
|
|
|
|
|
|
+ vecQueries.push({ text: q.text, queryType: q.type });
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -3019,6 +3132,11 @@ export async function hybridQuery(
|
|
|
file: r.filepath, displayPath: r.displayPath,
|
|
file: r.filepath, displayPath: r.displayPath,
|
|
|
title: r.title, body: r.body || "", score: r.score,
|
|
title: r.title, body: r.body || "", score: r.score,
|
|
|
})));
|
|
})));
|
|
|
|
|
+ rankedListMeta.push({
|
|
|
|
|
+ source: "vec",
|
|
|
|
|
+ queryType: vecQueries[i]!.queryType,
|
|
|
|
|
+ query: vecQueries[i]!.text,
|
|
|
|
|
+ });
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
@@ -3026,6 +3144,7 @@ export async function hybridQuery(
|
|
|
// Step 4: RRF fusion — first 2 lists (original FTS + first vec) get 2x weight
|
|
// Step 4: RRF fusion — first 2 lists (original FTS + first vec) get 2x weight
|
|
|
const weights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
|
|
const weights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
|
|
|
const fused = reciprocalRankFusion(rankedLists, weights);
|
|
const fused = reciprocalRankFusion(rankedLists, weights);
|
|
|
|
|
+ const rrfTraceByFile = explain ? buildRrfTrace(rankedLists, weights, rankedListMeta) : null;
|
|
|
const candidates = fused.slice(0, candidateLimit);
|
|
const candidates = fused.slice(0, candidateLimit);
|
|
|
|
|
|
|
|
if (candidates.length === 0) return [];
|
|
if (candidates.length === 0) return [];
|
|
@@ -3080,6 +3199,22 @@ export async function hybridQuery(
|
|
|
const bestIdx = chunkInfo?.bestIdx ?? 0;
|
|
const bestIdx = chunkInfo?.bestIdx ?? 0;
|
|
|
const bestChunk = chunkInfo?.chunks[bestIdx]?.text || candidate?.body || "";
|
|
const bestChunk = chunkInfo?.chunks[bestIdx]?.text || candidate?.body || "";
|
|
|
const bestChunkPos = chunkInfo?.chunks[bestIdx]?.pos || 0;
|
|
const bestChunkPos = chunkInfo?.chunks[bestIdx]?.pos || 0;
|
|
|
|
|
+ const trace = rrfTraceByFile?.get(r.file);
|
|
|
|
|
+ const explainData: HybridQueryExplain | undefined = explain ? {
|
|
|
|
|
+ ftsScores: trace?.contributions.filter(c => c.source === "fts").map(c => c.backendScore) ?? [],
|
|
|
|
|
+ vectorScores: trace?.contributions.filter(c => c.source === "vec").map(c => c.backendScore) ?? [],
|
|
|
|
|
+ rrf: {
|
|
|
|
|
+ rank: rrfRank,
|
|
|
|
|
+ positionScore: rrfScore,
|
|
|
|
|
+ weight: rrfWeight,
|
|
|
|
|
+ baseScore: trace?.baseScore ?? 0,
|
|
|
|
|
+ topRankBonus: trace?.topRankBonus ?? 0,
|
|
|
|
|
+ totalScore: trace?.totalScore ?? 0,
|
|
|
|
|
+ contributions: trace?.contributions ?? [],
|
|
|
|
|
+ },
|
|
|
|
|
+ rerankScore: r.score,
|
|
|
|
|
+ blendedScore,
|
|
|
|
|
+ } : undefined;
|
|
|
|
|
|
|
|
return {
|
|
return {
|
|
|
file: r.file,
|
|
file: r.file,
|
|
@@ -3091,6 +3226,7 @@ export async function hybridQuery(
|
|
|
score: blendedScore,
|
|
score: blendedScore,
|
|
|
context: store.getContextForFile(r.file),
|
|
context: store.getContextForFile(r.file),
|
|
|
docid: docidMap.get(r.file) || "",
|
|
docid: docidMap.get(r.file) || "",
|
|
|
|
|
+ ...(explainData ? { explain: explainData } : {}),
|
|
|
};
|
|
};
|
|
|
}).sort((a, b) => b.score - a.score);
|
|
}).sort((a, b) => b.score - a.score);
|
|
|
|
|
|
|
@@ -3201,6 +3337,7 @@ export interface StructuredSearchOptions {
|
|
|
limit?: number; // default 10
|
|
limit?: number; // default 10
|
|
|
minScore?: number; // default 0
|
|
minScore?: number; // default 0
|
|
|
candidateLimit?: number; // default RERANK_CANDIDATE_LIMIT
|
|
candidateLimit?: number; // default RERANK_CANDIDATE_LIMIT
|
|
|
|
|
+ explain?: boolean; // include backend/RRF/rerank score traces
|
|
|
/** Future: domain intent hint for routing/boosting */
|
|
/** Future: domain intent hint for routing/boosting */
|
|
|
intent?: string;
|
|
intent?: string;
|
|
|
hooks?: SearchHooks;
|
|
hooks?: SearchHooks;
|
|
@@ -3232,6 +3369,7 @@ export async function structuredSearch(
|
|
|
const limit = options?.limit ?? 10;
|
|
const limit = options?.limit ?? 10;
|
|
|
const minScore = options?.minScore ?? 0;
|
|
const minScore = options?.minScore ?? 0;
|
|
|
const candidateLimit = options?.candidateLimit ?? RERANK_CANDIDATE_LIMIT;
|
|
const candidateLimit = options?.candidateLimit ?? RERANK_CANDIDATE_LIMIT;
|
|
|
|
|
+ const explain = options?.explain ?? false;
|
|
|
const hooks = options?.hooks;
|
|
const hooks = options?.hooks;
|
|
|
|
|
|
|
|
const collections = options?.collections;
|
|
const collections = options?.collections;
|
|
@@ -3258,6 +3396,7 @@ export async function structuredSearch(
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
const rankedLists: RankedResult[][] = [];
|
|
const rankedLists: RankedResult[][] = [];
|
|
|
|
|
+ const rankedListMeta: RankedListMeta[] = [];
|
|
|
const docidMap = new Map<string, string>(); // filepath -> docid
|
|
const docidMap = new Map<string, string>(); // filepath -> docid
|
|
|
const hasVectors = !!store.db.prepare(
|
|
const hasVectors = !!store.db.prepare(
|
|
|
`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`
|
|
`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`
|
|
@@ -3277,6 +3416,11 @@ export async function structuredSearch(
|
|
|
file: r.filepath, displayPath: r.displayPath,
|
|
file: r.filepath, displayPath: r.displayPath,
|
|
|
title: r.title, body: r.body || "", score: r.score,
|
|
title: r.title, body: r.body || "", score: r.score,
|
|
|
})));
|
|
})));
|
|
|
|
|
+ rankedListMeta.push({
|
|
|
|
|
+ source: "fts",
|
|
|
|
|
+ queryType: "lex",
|
|
|
|
|
+ query: search.query,
|
|
|
|
|
+ });
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
@@ -3284,7 +3428,10 @@ export async function structuredSearch(
|
|
|
|
|
|
|
|
// Step 2: Batch embed and run vector searches for vec/hyde
|
|
// Step 2: Batch embed and run vector searches for vec/hyde
|
|
|
if (hasVectors) {
|
|
if (hasVectors) {
|
|
|
- const vecSearches = searches.filter(s => s.type === 'vec' || s.type === 'hyde');
|
|
|
|
|
|
|
+ const vecSearches = searches.filter(
|
|
|
|
|
+ (s): s is StructuredSubSearch & { type: 'vec' | 'hyde' } =>
|
|
|
|
|
+ s.type === 'vec' || s.type === 'hyde'
|
|
|
|
|
+ );
|
|
|
if (vecSearches.length > 0) {
|
|
if (vecSearches.length > 0) {
|
|
|
const llm = getDefaultLlamaCpp();
|
|
const llm = getDefaultLlamaCpp();
|
|
|
const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query));
|
|
const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query));
|
|
@@ -3308,6 +3455,11 @@ export async function structuredSearch(
|
|
|
file: r.filepath, displayPath: r.displayPath,
|
|
file: r.filepath, displayPath: r.displayPath,
|
|
|
title: r.title, body: r.body || "", score: r.score,
|
|
title: r.title, body: r.body || "", score: r.score,
|
|
|
})));
|
|
})));
|
|
|
|
|
+ rankedListMeta.push({
|
|
|
|
|
+ source: "vec",
|
|
|
|
|
+ queryType: vecSearches[i]!.type,
|
|
|
|
|
+ query: vecSearches[i]!.query,
|
|
|
|
|
+ });
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
@@ -3319,6 +3471,7 @@ export async function structuredSearch(
|
|
|
// Step 3: RRF fusion — first list gets 2x weight (assume caller ordered by importance)
|
|
// Step 3: RRF fusion — first list gets 2x weight (assume caller ordered by importance)
|
|
|
const weights = rankedLists.map((_, i) => i === 0 ? 2.0 : 1.0);
|
|
const weights = rankedLists.map((_, i) => i === 0 ? 2.0 : 1.0);
|
|
|
const fused = reciprocalRankFusion(rankedLists, weights);
|
|
const fused = reciprocalRankFusion(rankedLists, weights);
|
|
|
|
|
+ const rrfTraceByFile = explain ? buildRrfTrace(rankedLists, weights, rankedListMeta) : null;
|
|
|
const candidates = fused.slice(0, candidateLimit);
|
|
const candidates = fused.slice(0, candidateLimit);
|
|
|
|
|
|
|
|
if (candidates.length === 0) return [];
|
|
if (candidates.length === 0) return [];
|
|
@@ -3377,6 +3530,22 @@ export async function structuredSearch(
|
|
|
const bestIdx = chunkInfo?.bestIdx ?? 0;
|
|
const bestIdx = chunkInfo?.bestIdx ?? 0;
|
|
|
const bestChunk = chunkInfo?.chunks[bestIdx]?.text || candidate?.body || "";
|
|
const bestChunk = chunkInfo?.chunks[bestIdx]?.text || candidate?.body || "";
|
|
|
const bestChunkPos = chunkInfo?.chunks[bestIdx]?.pos || 0;
|
|
const bestChunkPos = chunkInfo?.chunks[bestIdx]?.pos || 0;
|
|
|
|
|
+ const trace = rrfTraceByFile?.get(r.file);
|
|
|
|
|
+ const explainData: HybridQueryExplain | undefined = explain ? {
|
|
|
|
|
+ ftsScores: trace?.contributions.filter(c => c.source === "fts").map(c => c.backendScore) ?? [],
|
|
|
|
|
+ vectorScores: trace?.contributions.filter(c => c.source === "vec").map(c => c.backendScore) ?? [],
|
|
|
|
|
+ rrf: {
|
|
|
|
|
+ rank: rrfRank,
|
|
|
|
|
+ positionScore: rrfScore,
|
|
|
|
|
+ weight: rrfWeight,
|
|
|
|
|
+ baseScore: trace?.baseScore ?? 0,
|
|
|
|
|
+ topRankBonus: trace?.topRankBonus ?? 0,
|
|
|
|
|
+ totalScore: trace?.totalScore ?? 0,
|
|
|
|
|
+ contributions: trace?.contributions ?? [],
|
|
|
|
|
+ },
|
|
|
|
|
+ rerankScore: r.score,
|
|
|
|
|
+ blendedScore,
|
|
|
|
|
+ } : undefined;
|
|
|
|
|
|
|
|
return {
|
|
return {
|
|
|
file: r.file,
|
|
file: r.file,
|
|
@@ -3388,6 +3557,7 @@ export async function structuredSearch(
|
|
|
score: blendedScore,
|
|
score: blendedScore,
|
|
|
context: store.getContextForFile(r.file),
|
|
context: store.getContextForFile(r.file),
|
|
|
docid: docidMap.get(r.file) || "",
|
|
docid: docidMap.get(r.file) || "",
|
|
|
|
|
+ ...(explainData ? { explain: explainData } : {}),
|
|
|
};
|
|
};
|
|
|
}).sort((a, b) => b.score - a.score);
|
|
}).sort((a, b) => b.score - a.score);
|
|
|
|
|
|