Procházet zdrojové kódy

Rerank multiple chunks per document with score aggregation

Instead of reranking just 1 keyword-matched chunk per doc, now:
- Select top 3 chunks per document (by keyword score)
- Rerank all selected chunks
- Aggregate scores using top-2 average (rewards consistency)
- Use best-scoring chunk for snippet display

This improves ranking for long documents where the keyword-matched
chunk isn't always the most relevant to the query.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Tobi Lutke před 5 měsíci
rodič
revize
fd24df81c9
2 změnil soubory, kde provedl 63 přidání a 33 odebrání
  1. 1 1
      .beads/issues.jsonl
  2. 62 32
      src/qmd.ts

+ 1 - 1
.beads/issues.jsonl

@@ -21,7 +21,7 @@
 {"id":"qmd-clr","title":"fix embed","description":"","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-12T16:14:55.292114-05:00","updated_at":"2025-12-12T16:31:27.661829-05:00","closed_at":"2025-12-12T16:31:27.661829-05:00"}
 {"id":"qmd-d00","title":"Add offline evaluation harness for tuning","description":"Create a small benchmark with ~100 labeled queries from real searches. Would enable tuning: expansion on/off threshold, candidate count (30 vs 100), blending weights, reranker threshold.","status":"open","priority":3,"issue_type":"feature","created_at":"2025-12-20T17:18:42.007265-05:00","updated_at":"2025-12-20T17:18:42.007265-05:00"}
 {"id":"qmd-deh","title":"Refactor database introduce qmd collection *","description":"","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-10T10:56:04.516137-05:00","updated_at":"2025-12-12T16:12:12.349428-05:00","closed_at":"2025-12-12T16:12:12.349428-05:00"}
-{"id":"qmd-df5","title":"Rerank multiple chunks per document with score aggregation","description":"Currently we only rerank 1 chunk per doc (selected by keyword heuristic). Should rerank top 2-3 chunks per document, then aggregate scores (max, softmax, or top-2 average). This improves ranking for long documents where the keyword-matched chunk isn't always the most relevant.","status":"open","priority":2,"issue_type":"feature","created_at":"2025-12-20T17:18:41.592575-05:00","updated_at":"2025-12-20T17:18:41.592575-05:00"}
+{"id":"qmd-df5","title":"Rerank multiple chunks per document with score aggregation","description":"Currently we only rerank 1 chunk per doc (selected by keyword heuristic). Should rerank top 2-3 chunks per document, then aggregate scores (max, softmax, or top-2 average). This improves ranking for long documents where the keyword-matched chunk isn't always the most relevant.","status":"in_progress","priority":2,"issue_type":"feature","created_at":"2025-12-20T17:18:41.592575-05:00","updated_at":"2025-12-21T12:02:56.013748-05:00"}
 {"id":"qmd-dmi","title":"Implement 'qmd collection' commands","description":"Add explicit collection management:\n- qmd collection add . --name \u003cname\u003e --mask '**/*.md'\n- qmd collection list\n- qmd collection remove \u003cname\u003e\n\nThis gives users control over collection names and patterns.","status":"closed","priority":1,"issue_type":"feature","created_at":"2025-12-12T15:29:53.810666-05:00","updated_at":"2025-12-12T16:02:08.079158-05:00","closed_at":"2025-12-12T16:02:08.079158-05:00","dependencies":[{"issue_id":"qmd-dmi","depends_on_id":"qmd-ama","type":"discovered-from","created_at":"2025-12-12T15:29:53.811294-05:00","created_by":"daemon"}]}
 {"id":"qmd-dt1","title":"Redesign context add command for better usability","description":"Current issues: \n1. Virtual path qmd://journals/ is rejected as invalid\n2. Syntax is confusing - sometimes path is first arg, sometimes second\n3. Need to support collection root context (qmd://name/)\n4. Should be intuitive: qmd context add \u003cwhere\u003e \u003cwhat\u003e\nDesign goals:\n- Support qmd://collection/ for collection root context\n- Support qmd://collection/path for path-specific context\n- Clear, consistent syntax\n- Good error messages","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-13T09:39:19.764114-05:00","updated_at":"2025-12-13T09:41:38.467861-05:00","closed_at":"2025-12-13T09:41:38.467861-05:00"}
 {"id":"qmd-e2c","title":"Implement 'qmd ls' command","description":"Add command to explore virtual file tree:\n- qmd ls → list all collections\n- qmd ls \u003ccollection\u003e → list files in collection\n- qmd ls \u003ccollection\u003e/\u003cpath\u003e → list files under path\nOutput: flat list of qmd:// paths","status":"closed","priority":1,"issue_type":"feature","created_at":"2025-12-12T15:29:53.859804-05:00","updated_at":"2025-12-12T15:55:12.777701-05:00","closed_at":"2025-12-12T15:55:12.777701-05:00","dependencies":[{"issue_id":"qmd-e2c","depends_on_id":"qmd-ama","type":"discovered-from","created_at":"2025-12-12T15:29:53.860535-05:00","created_by":"daemon"}]}

+ 62 - 32
src/qmd.ts

@@ -2089,50 +2089,80 @@ async function querySearch(query: string, opts: OutputOptions, embedModel: strin
     return;
   }
 
-  // Rerank chunks, not full documents
-  // For each candidate, extract the most relevant chunk to rerank
+  // Rerank multiple chunks per document, then aggregate scores
+  // This improves ranking for long documents where keyword-matched chunk isn't always best
+  const MAX_CHUNKS_PER_DOC = 3;
   const chunksToRerank: { file: string; text: string; chunkIdx: number }[] = [];
-  const docChunkMap = new Map<string, { chunks: { text: string; pos: number }[]; bestChunkIdx: number }>();
+  const docChunkMap = new Map<string, { chunks: { text: string; pos: number }[]; selectedIndices: number[] }>();
 
   for (const c of candidates) {
     const chunks = chunkDocument(c.body);
-    if (chunks.length === 1) {
-      // Small document - use entire body
-      chunksToRerank.push({ file: c.file, text: chunks[0].text, chunkIdx: 0 });
-      docChunkMap.set(c.file, { chunks, bestChunkIdx: 0 });
+    if (chunks.length <= MAX_CHUNKS_PER_DOC) {
+      // Small document - rerank all chunks
+      for (let i = 0; i < chunks.length; i++) {
+        chunksToRerank.push({ file: c.file, text: chunks[i].text, chunkIdx: i });
+      }
+      docChunkMap.set(c.file, { chunks, selectedIndices: chunks.map((_, i) => i) });
     } else {
-      // Find the chunk that best matches the query terms (simple keyword heuristic)
+      // Score all chunks by keyword match, select top MAX_CHUNKS_PER_DOC
       const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 2);
-      let bestIdx = 0;
-      let bestScore = 0;
-      for (let i = 0; i < chunks.length; i++) {
-        const chunkLower = chunks[i].text.toLowerCase();
+      const scored = chunks.map((chunk, idx) => {
+        const chunkLower = chunk.text.toLowerCase();
         const score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0);
-        if (score > bestScore) {
-          bestScore = score;
-          bestIdx = i;
-        }
+        return { idx, score };
+      });
+      scored.sort((a, b) => b.score - a.score);
+      const selectedIndices = scored.slice(0, MAX_CHUNKS_PER_DOC).map(s => s.idx);
+
+      for (const idx of selectedIndices) {
+        chunksToRerank.push({ file: c.file, text: chunks[idx].text, chunkIdx: idx });
       }
-      chunksToRerank.push({ file: c.file, text: chunks[bestIdx].text, chunkIdx: bestIdx });
-      docChunkMap.set(c.file, { chunks, bestChunkIdx: bestIdx });
+      docChunkMap.set(c.file, { chunks, selectedIndices });
     }
   }
 
-  // Rerank the focused chunks (with caching)
+  // Rerank all selected chunks (with caching)
+  // Use file:chunkIdx as unique identifier for reranker
   const reranked = await rerank(
     query,
-    chunksToRerank.map(c => ({ file: c.file, text: c.text })),
+    chunksToRerank.map(c => ({ file: `${c.file}:${c.chunkIdx}`, text: c.text })),
     rerankModel,
     db
   );
 
-  // Blend RRF position score with reranker score using position-aware weights
+  // Aggregate chunk scores back to document level using top-2 average
+  // (or max if only 1 chunk) - this balances best chunk with consistency
+  const docScores = new Map<string, { scores: number[]; bestChunkIdx: number }>();
+  for (const r of reranked) {
+    const [file, chunkIdxStr] = r.file.split(/:(\d+)$/);
+    const chunkIdx = parseInt(chunkIdxStr || "0");
+    const existing = docScores.get(file);
+    if (existing) {
+      existing.scores.push(r.score);
+      if (r.score > (existing.scores[0] || 0)) {
+        existing.bestChunkIdx = chunkIdx;
+      }
+    } else {
+      docScores.set(file, { scores: [r.score], bestChunkIdx: chunkIdx });
+    }
+  }
+
+  // Compute aggregated score: top-2 average (rewards consistency across chunks)
+  const aggregatedScores = new Map<string, { score: number; bestChunkIdx: number }>();
+  for (const [file, { scores, bestChunkIdx }] of docScores) {
+    scores.sort((a, b) => b - a);
+    const topScores = scores.slice(0, 2);
+    const avgScore = topScores.reduce((a, b) => a + b, 0) / topScores.length;
+    aggregatedScores.set(file, { score: avgScore, bestChunkIdx });
+  }
+
+  // Blend RRF position score with aggregated reranker score using position-aware weights
   // Top retrieval results get more protection from reranker disagreement
   const candidateMap = new Map(candidates.map(c => [c.file, { displayPath: c.displayPath, title: c.title, body: c.body }]));
   const rrfRankMap = new Map(candidates.map((c, i) => [c.file, i + 1])); // 1-indexed rank
 
-  const finalResults = reranked.map(r => {
-    const rrfRank = rrfRankMap.get(r.file) || 30;
+  const finalResults = Array.from(aggregatedScores.entries()).map(([file, { score: rerankScore, bestChunkIdx }]) => {
+    const rrfRank = rrfRankMap.get(file) || 30;
     // Position-aware blending: top retrieval results preserved more
     // Rank 1-3: 75% RRF, 25% reranker (trust retrieval for exact matches)
     // Rank 4-10: 60% RRF, 40% reranker
@@ -2146,21 +2176,21 @@ async function querySearch(query: string, opts: OutputOptions, embedModel: strin
       rrfWeight = 0.40;
     }
     const rrfScore = 1 / rrfRank;  // Position-based: 1, 0.5, 0.33...
-    const blendedScore = rrfWeight * rrfScore + (1 - rrfWeight) * r.score;
-    const candidate = candidateMap.get(r.file);
-    // Use the best chunk's text for the body (better for snippets)
-    const chunkInfo = docChunkMap.get(r.file);
-    const chunkBody = chunkInfo ? chunkInfo.chunks[chunkInfo.bestChunkIdx].text : candidate?.body || "";
-    const chunkPos = chunkInfo ? chunkInfo.chunks[chunkInfo.bestChunkIdx].pos : 0;
+    const blendedScore = rrfWeight * rrfScore + (1 - rrfWeight) * rerankScore;
+    const candidate = candidateMap.get(file);
+    // Use the best-scoring chunk's text for the body (better for snippets)
+    const chunkInfo = docChunkMap.get(file);
+    const chunkBody = chunkInfo ? chunkInfo.chunks[bestChunkIdx]?.text || chunkInfo.chunks[0].text : candidate?.body || "";
+    const chunkPos = chunkInfo ? chunkInfo.chunks[bestChunkIdx]?.pos || 0 : 0;
     return {
-      file: r.file,
+      file,
       displayPath: candidate?.displayPath || "",
       title: candidate?.title || "",
       body: chunkBody,
       chunkPos,
       score: blendedScore,
-      context: getContextForFile(db, r.file),
-      hash: hashMap.get(r.file) || "",
+      context: getContextForFile(db, file),
+      hash: hashMap.get(file) || "",
     };
   }).sort((a, b) => b.score - a.score);