Procházet zdrojové kódy

Merge pull request #462 from goldsr09/fix/bm25-field-weights

Fix BM25 field weights to include all 3 FTS columns
Tobias Lütke před 1 měsícem
rodič
revize
08566ec316
2 změnil soubory, kde provedl 29 přidání a 1 odebrání
  1. 1 1
      src/store.ts
  2. 28 0
      test/store.test.ts

+ 1 - 1
src/store.ts

@@ -2770,7 +2770,7 @@ export function searchFTS(db: Database, query: string, limit: number = 20, colle
       d.title,
       content.doc as body,
       d.hash,
-      bm25(documents_fts, 10.0, 1.0) as bm25_score
+      bm25(documents_fts, 1.5, 4.0, 1.0) as bm25_score
     FROM documents_fts f
     JOIN documents d ON d.id = f.rowid
     JOIN content ON content.hash = d.hash

+ 28 - 0
test/store.test.ts

@@ -1203,6 +1203,34 @@ describe("FTS Search", () => {
     await cleanupTestDb(store);
   });
 
+  test("searchFTS title boost outweighs higher body frequency", async () => {
+    const store = await createTestStore();
+    const collectionName = await createTestCollection();
+
+    // Document with "quantum" mentioned in a longer body but NOT in the title
+    await insertTestDocument(store.db, collectionName, {
+      name: "body-only",
+      title: "General Science Notes",
+      body: "This research paper discusses quantum mechanics and the quantum model of computation. The quantum approach offers improvements over classical methods.",
+      displayPath: "test/body-only.md",
+    });
+
+    // Document with "quantum" in the title but a shorter body mention
+    await insertTestDocument(store.db, collectionName, {
+      name: "title-match",
+      title: "Quantum Computing Overview",
+      body: "An introduction to the fundamentals of this emerging computing paradigm.",
+      displayPath: "test/title-match.md",
+    });
+
+    const results = store.searchFTS("quantum", 10);
+    expect(results.length).toBe(2);
+    // Title-match doc should rank higher due to BM25 column weights boosting title
+    expect(results[0]!.displayPath).toBe(`${collectionName}/test/title-match.md`);
+
+    await cleanupTestDb(store);
+  });
+
   test("searchFTS respects limit parameter", async () => {
     const store = await createTestStore();
     const collectionName = await createTestCollection();