Explorar o código

Merge origin/main into fix/fts5-collection-filter-performance

Resolve conflict: use CTE approach from #455 with updated BM25
weights (1.5, 4.0, 1.0) from #462.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Tobias Lütke hai 1 mes
pai
achega
827ad839f4
Modificáronse 7 ficheiros con 51 adicións e 17 borrados
  1. 1 0
      CHANGELOG.md
  2. 2 2
      flake.nix
  3. 3 1
      src/llm.ts
  4. 5 1
      src/mcp/server.ts
  5. 2 3
      src/store.ts
  6. 10 10
      test/store.helpers.unit.test.ts
  7. 28 0
      test/store.test.ts

+ 1 - 0
CHANGELOG.md

@@ -4,6 +4,7 @@
 
 ### Fixes
 
+- Fix paths in nix flake 
 - Sync stale `bun.lock` (`better-sqlite3` 11.x → 12.x). CI and release
   script now use `--frozen-lockfile` to prevent recurrence. #386
   (thanks @Mic92)

+ 2 - 2
flake.nix

@@ -48,7 +48,7 @@
             cp package.json $out/lib/qmd/
 
             makeWrapper ${pkgs.bun}/bin/bun $out/bin/qmd \
-              --add-flags "$out/lib/qmd/src/qmd.ts" \
+              --add-flags "$out/lib/qmd/src/cli/qmd.ts" \
               --set DYLD_LIBRARY_PATH "${pkgs.sqlite.out}/lib" \
               --set LD_LIBRARY_PATH "${pkgs.sqlite.out}/lib"
           '';
@@ -81,7 +81,7 @@
           shellHook = ''
             export BREW_PREFIX="''${BREW_PREFIX:-${sqliteWithExtensions.out}}"
             echo "QMD development shell"
-            echo "Run: bun src/qmd.ts <command>"
+            echo "Run: bun src/cli/qmd.ts <command>"
           '';
         };
       }

+ 3 - 1
src/llm.ts

@@ -209,7 +209,9 @@ export const DEFAULT_RERANK_MODEL_URI = DEFAULT_RERANK_MODEL;
 export const DEFAULT_GENERATE_MODEL_URI = DEFAULT_GENERATE_MODEL;
 
 // Local model cache directory
-const MODEL_CACHE_DIR = join(homedir(), ".cache", "qmd", "models");
+const MODEL_CACHE_DIR = process.env.XDG_CACHE_HOME
+  ? join(process.env.XDG_CACHE_HOME, "qmd", "models")
+  : join(homedir(), ".cache", "qmd", "models");
 export const DEFAULT_MODEL_CACHE_DIR = MODEL_CACHE_DIR;
 
 export type PullResult = {

+ 5 - 1
src/mcp/server.ts

@@ -296,9 +296,12 @@ Intent-aware lex (C++ performance, not sports):
         intent: z.string().optional().describe(
           "Background context to disambiguate the query. Example: query='performance', intent='web page load times and Core Web Vitals'. Does not search on its own."
         ),
+        rerank: z.boolean().optional().default(true).describe(
+          "Rerank results using LLM (default: true). Set to false for faster results on CPU-only machines."
+        ),
       },
     },
-    async ({ searches, limit, minScore, candidateLimit, collections, intent }) => {
+    async ({ searches, limit, minScore, candidateLimit, collections, intent, rerank }) => {
       // Map to internal format
       const queries: ExpandedQuery[] = searches.map(s => ({
         type: s.type,
@@ -313,6 +316,7 @@ Intent-aware lex (C++ performance, not sports):
         collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
         limit,
         minScore,
+        rerank,
         intent,
       });
 

+ 2 - 3
src/store.ts

@@ -1595,7 +1595,6 @@ export function handelize(path: string): string {
 
   const result = path
     .replace(/___/g, '/')       // Triple underscore becomes folder separator
-    .toLowerCase()
     .split('/')
     .map((segment, idx, arr) => {
       const isLastSegment = idx === arr.length - 1;
@@ -1610,7 +1609,7 @@ export function handelize(path: string): string {
         const nameWithoutExt = ext ? segment.slice(0, -ext.length) : segment;
 
         const cleanedName = nameWithoutExt
-          .replace(/[^\p{L}\p{N}$]+/gu, '-')  // Keep route marker "$", dash-separate other chars
+          .replace(/[^\p{L}\p{N}.$]+/gu, '-')  // Keep letters, numbers, dots, "$"; dash-separate rest
           .replace(/^-+|-+$/g, ''); // Remove leading/trailing dashes
 
         return cleanedName + ext;
@@ -2778,7 +2777,7 @@ export function searchFTS(db: Database, query: string, limit: number = 20, colle
 
   let sql = `
     WITH fts_matches AS (
-      SELECT rowid, bm25(documents_fts, 10.0, 1.0) as bm25_score
+      SELECT rowid, bm25(documents_fts, 1.5, 4.0, 1.0) as bm25_score
       FROM documents_fts
       WHERE documents_fts MATCH ?
       ORDER BY bm25_score ASC

+ 10 - 10
test/store.helpers.unit.test.ts

@@ -114,14 +114,14 @@ describe("cleanupOrphanedVectors", () => {
 // =============================================================================
 
 describe("handelize", () => {
-  test("converts to lowercase", () => {
-    expect(handelize("README.md")).toBe("readme.md");
-    expect(handelize("MyFile.MD")).toBe("myfile.md");
+  test("preserves original case", () => {
+    expect(handelize("README.md")).toBe("README.md");
+    expect(handelize("MyFile.MD")).toBe("MyFile.MD");
   });
 
   test("preserves folder structure", () => {
     expect(handelize("a/b/c/d.md")).toBe("a/b/c/d.md");
-    expect(handelize("docs/api/README.md")).toBe("docs/api/readme.md");
+    expect(handelize("docs/api/README.md")).toBe("docs/api/README.md");
   });
 
   test("replaces non-word characters with dash", () => {
@@ -151,7 +151,7 @@ describe("handelize", () => {
   test("handles complex real-world meeting notes", () => {
     const complexName = "Money Movement Licensing Review - 2025/11/19 10:25 EST - Notes by Gemini.md";
     const result = handelize(complexName);
-    expect(result).toBe("money-movement-licensing-review-2025-11-19-10-25-est-notes-by-gemini.md");
+    expect(result).toBe("Money-Movement-Licensing-Review-2025-11-19-10-25-EST-Notes-by-Gemini.md");
     expect(result).not.toContain(" ");
     expect(result).not.toContain("/");
     expect(result).not.toContain(":");
@@ -159,7 +159,7 @@ describe("handelize", () => {
 
   test("handles unicode characters", () => {
     expect(handelize("日本語.md")).toBe("日本語.md");
-    expect(handelize("Зоны и проекты.md")).toBe("зоны-и-проекты.md");
+    expect(handelize("Зоны и проекты.md")).toBe("Зоны-и-проекты.md");
     expect(handelize("café-notes.md")).toBe("café-notes.md");
     expect(handelize("naïve.md")).toBe("naïve.md");
     expect(handelize("日本語-notes.md")).toBe("日本語-notes.md");
@@ -181,13 +181,13 @@ describe("handelize", () => {
   test("handles dates and times in filenames", () => {
     expect(handelize("meeting-2025-01-15.md")).toBe("meeting-2025-01-15.md");
     expect(handelize("notes 2025/01/15.md")).toBe("notes-2025/01/15.md");
-    expect(handelize("call_10:30_AM.md")).toBe("call-10-30-am.md");
+    expect(handelize("call_10:30_AM.md")).toBe("call-10-30-AM.md");
   });
 
   test("handles special project naming patterns", () => {
-    expect(handelize("PROJECT_ABC_v2.0.md")).toBe("project-abc-v2-0.md");
-    expect(handelize("[WIP] Feature Request.md")).toBe("wip-feature-request.md");
-    expect(handelize("(DRAFT) Proposal v1.md")).toBe("draft-proposal-v1.md");
+    expect(handelize("PROJECT_ABC_v2.0.md")).toBe("PROJECT-ABC-v2.0.md");
+    expect(handelize("[WIP] Feature Request.md")).toBe("WIP-Feature-Request.md");
+    expect(handelize("(DRAFT) Proposal v1.md")).toBe("DRAFT-Proposal-v1.md");
   });
 
   test("handles symbol-only route filenames", () => {

+ 28 - 0
test/store.test.ts

@@ -1203,6 +1203,34 @@ describe("FTS Search", () => {
     await cleanupTestDb(store);
   });
 
+  test("searchFTS title boost outweighs higher body frequency", async () => {
+    const store = await createTestStore();
+    const collectionName = await createTestCollection();
+
+    // Document with "quantum" mentioned in a longer body but NOT in the title
+    await insertTestDocument(store.db, collectionName, {
+      name: "body-only",
+      title: "General Science Notes",
+      body: "This research paper discusses quantum mechanics and the quantum model of computation. The quantum approach offers improvements over classical methods.",
+      displayPath: "test/body-only.md",
+    });
+
+    // Document with "quantum" in the title but a shorter body mention
+    await insertTestDocument(store.db, collectionName, {
+      name: "title-match",
+      title: "Quantum Computing Overview",
+      body: "An introduction to the fundamentals of this emerging computing paradigm.",
+      displayPath: "test/title-match.md",
+    });
+
+    const results = store.searchFTS("quantum", 10);
+    expect(results.length).toBe(2);
+    // Title-match doc should rank higher due to BM25 column weights boosting title
+    expect(results[0]!.displayPath).toBe(`${collectionName}/test/title-match.md`);
+
+    await cleanupTestDb(store);
+  });
+
   test("searchFTS respects limit parameter", async () => {
     const store = await createTestStore();
     const collectionName = await createTestCollection();