6 luni în urmă · c26e8ea3ba
--- a/src/eval.test.ts
+++ b/src/eval.test.ts
@@ -40,7 +40,7 @@ import { getDefaultLlamaCpp, formatDocForEmbedding } from "./llm";
 
				 const evalQueries: {
			
 
				   query: string;
			
 
				   expectedDoc: string;
			
 
				-  difficulty: "easy" | "medium" | "hard";
			
 
				+  difficulty: "easy" | "medium" | "hard" | "fusion";
			
 
				 }[] = [
			
 
				   // EASY: Exact keyword matches
			
 
				   { query: "API versioning", expectedDoc: "api-design", difficulty: "easy" },
			
@@ -65,6 +65,15 @@ const evalQueries: {
 
				   { query: "F1 score precision recall", expectedDoc: "machine-learning", difficulty: "hard" },
			
 
				   { query: "quarterly team gathering travel", expectedDoc: "remote-work", difficulty: "hard" },
			
 
				   { query: "beta program 47 bugs", expectedDoc: "product-launch", difficulty: "hard" },
			
 
				+
			
 
				+  // FUSION: Multi-signal queries that need both lexical AND semantic matching
			
 
				+  // These should have weak individual scores but strong combined RRF scores
			
 
				+  { query: "how much runway before running out of money", expectedDoc: "fundraising", difficulty: "fusion" },
			
 
				+  { query: "datacenter replication sync strategy", expectedDoc: "distributed-systems", difficulty: "fusion" },
			
 
				+  { query: "splitting data for training and testing", expectedDoc: "machine-learning", difficulty: "fusion" },
			
 
				+  { query: "JSON response codes error messages", expectedDoc: "api-design", difficulty: "fusion" },
			
 
				+  { query: "video calls camera async messaging", expectedDoc: "remote-work", difficulty: "fusion" },
			
 
				+  { query: "CI/CD pipeline testing coverage", expectedDoc: "product-launch", difficulty: "fusion" },
			
 
				 ];
			
 
				 
			
 
				 // Helper to check if result matches expected doc
			
@@ -333,14 +342,49 @@ describe("Hybrid Search (RRF)", () => {
 
				     expect(hits / hardQueries.length).toBeGreaterThanOrEqual(threshold);
			
 
				   }, 60000);
			
 
				 
			
 
				+  test("fusion queries: ≥50% Hit@3 (RRF combines weak signals)", async () => {
			
 
				+    if (!hasVectors) return; // Fusion requires both methods
			
 
				+
			
 
				+    const fusionQueries = evalQueries.filter(q => q.difficulty === "fusion");
			
 
				+    let hybridHits = 0;
			
 
				+    let bm25Hits = 0;
			
 
				+    let vecHits = 0;
			
 
				+
			
 
				+    for (const { query, expectedDoc } of fusionQueries) {
			
 
				+      // Hybrid results
			
 
				+      const hybridResults = await hybridSearch(query);
			
 
				+      if (hybridResults.slice(0, 3).some(r => matchesExpected(r.file, expectedDoc))) hybridHits++;
			
 
				+
			
 
				+      // BM25 results for comparison
			
 
				+      const bm25Results = searchFTS(db, query, 5);
			
 
				+      if (bm25Results.slice(0, 3).some(r => matchesExpected(r.filepath, expectedDoc))) bm25Hits++;
			
 
				+
			
 
				+      // Vector results for comparison
			
 
				+      const vecResults = await searchVec(db, query, DEFAULT_EMBED_MODEL, 5);
			
 
				+      if (vecResults.slice(0, 3).some(r => matchesExpected(r.filepath, expectedDoc))) vecHits++;
			
 
				+    }
			
 
				+
			
 
				+    const hybridRate = hybridHits / fusionQueries.length;
			
 
				+    const bm25Rate = bm25Hits / fusionQueries.length;
			
 
				+    const vecRate = vecHits / fusionQueries.length;
			
 
				+
			
 
				+    // Fusion should achieve at least 50% on these multi-signal queries
			
 
				+    expect(hybridRate).toBeGreaterThanOrEqual(0.5);
			
 
				+
			
 
				+    // Fusion should outperform or match the best individual method
			
 
				+    expect(hybridRate).toBeGreaterThanOrEqual(Math.max(bm25Rate, vecRate));
			
 
				+  }, 60000);
			
 
				+
			
 
				   test("overall Hit@3 ≥60% with vectors, ≥40% without", async () => {
			
 
				+    // Filter out fusion queries for overall score (they're tested separately)
			
 
				+    const standardQueries = evalQueries.filter(q => q.difficulty !== "fusion");
			
 
				     let hits = 0;
			
 
				-    for (const { query, expectedDoc } of evalQueries) {
			
 
				+    for (const { query, expectedDoc } of standardQueries) {
			
 
				       const results = await hybridSearch(query);
			
 
				       if (results.slice(0, 3).some(r => matchesExpected(r.file, expectedDoc))) hits++;
			
 
				     }
			
 
				     const threshold = hasVectors ? 0.6 : 0.4;
			
 
				-    expect(hits / evalQueries.length).toBeGreaterThanOrEqual(threshold);
			
 
				+    expect(hits / standardQueries.length).toBeGreaterThanOrEqual(threshold);
			
 
				   }, 60000);
			
 
				 });