Prechádzať zdrojové kódy

Fix remaining test failures and schema mismatches

- Update fuzzy matching functions to return relative paths
- Fix findDocument to properly separate displayPath and filepath
- Update MCP test schema to use content-addressable storage
- Remove deprecated getCollectionIdByName function references
- Fix MCP collection filtering to work post-search
- Update test expectations for YAML-based collections
- Fix integration test expectations for path formats

Test results: 244 passing / 16 failing (93.8% pass rate)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Tobi Lutke 5 mesiacov pred
rodič
commit
ebbddd04aa
5 zmenil súbory, kde vykonal 99 pridanie a 153 odobranie
  1. 3 1
      src/cli.test.ts
  2. 61 89
      src/mcp.test.ts
  3. 11 42
      src/mcp.ts
  4. 8 5
      src/store.test.ts
  5. 16 16
      src/store.ts

+ 3 - 1
src/cli.test.ts

@@ -339,7 +339,9 @@ describe("CLI Add-Context Command", () => {
   test("adds context to a path", async () => {
     // First add a collection to get its name
     const listResult = await runQmd(["collection", "list"]);
-    const collectionName = listResult.stdout.split('\n')[0].trim();
+    // Parse collection name - it's on the 3rd line (after header and blank line)
+    const lines = listResult.stdout.split('\n').filter(l => l.trim());
+    const collectionName = lines[1]; // "fixtures" is on line 2
 
     // Add context to the collection root using virtual path
     const { stdout, exitCode } = await runQmd([

+ 61 - 89
src/mcp.test.ts

@@ -77,48 +77,40 @@ function initTestDatabase(db: Database): void {
   sqliteVec.load(db);
   db.exec("PRAGMA journal_mode = WAL");
 
+  // Content-addressable storage - the source of truth for document content
   db.exec(`
-    CREATE TABLE IF NOT EXISTS collections (
-      id INTEGER PRIMARY KEY AUTOINCREMENT,
-      pwd TEXT NOT NULL,
-      glob_pattern TEXT NOT NULL,
-      created_at TEXT NOT NULL,
-      context TEXT,
-      UNIQUE(pwd, glob_pattern)
-    )
-  `);
-
-  db.exec(`
-    CREATE TABLE IF NOT EXISTS path_contexts (
-      id INTEGER PRIMARY KEY AUTOINCREMENT,
-      path_prefix TEXT NOT NULL UNIQUE,
-      context TEXT NOT NULL,
-      created_at TEXT NOT NULL
-    )
-  `);
-
-  db.exec(`
-    CREATE TABLE IF NOT EXISTS ollama_cache (
+    CREATE TABLE IF NOT EXISTS content (
       hash TEXT PRIMARY KEY,
-      result TEXT NOT NULL,
+      doc TEXT NOT NULL,
       created_at TEXT NOT NULL
     )
   `);
 
+  // Documents table - file system layer mapping virtual paths to content hashes
+  // Collections are now managed in YAML config
   db.exec(`
     CREATE TABLE IF NOT EXISTS documents (
       id INTEGER PRIMARY KEY AUTOINCREMENT,
-      collection_id INTEGER NOT NULL,
-      name TEXT NOT NULL,
+      collection TEXT NOT NULL,
+      path TEXT NOT NULL,
       title TEXT NOT NULL,
       hash TEXT NOT NULL,
-      filepath TEXT NOT NULL,
-      display_path TEXT NOT NULL DEFAULT '',
-      body TEXT NOT NULL,
       created_at TEXT NOT NULL,
       modified_at TEXT NOT NULL,
       active INTEGER NOT NULL DEFAULT 1,
-      FOREIGN KEY (collection_id) REFERENCES collections(id)
+      FOREIGN KEY (hash) REFERENCES content(hash) ON DELETE CASCADE,
+      UNIQUE(collection, path)
+    )
+  `);
+
+  db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_collection ON documents(collection, active)`);
+  db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_hash ON documents(hash)`);
+
+  db.exec(`
+    CREATE TABLE IF NOT EXISTS ollama_cache (
+      hash TEXT PRIMARY KEY,
+      result TEXT NOT NULL,
+      created_at TEXT NOT NULL
     )
   `);
 
@@ -144,7 +136,10 @@ function initTestDatabase(db: Database): void {
 
   db.exec(`
     CREATE TRIGGER IF NOT EXISTS documents_ai AFTER INSERT ON documents BEGIN
-      INSERT INTO documents_fts(rowid, name, body) VALUES (new.id, new.name, new.body);
+      INSERT INTO documents_fts(rowid, name, body)
+      SELECT new.id, new.path, content.doc
+      FROM content
+      WHERE content.hash = new.hash;
     END
   `);
 
@@ -155,70 +150,55 @@ function initTestDatabase(db: Database): void {
 function seedTestData(db: Database): void {
   const now = new Date().toISOString();
 
-  // Create a collection
-  db.prepare(`INSERT INTO collections (pwd, glob_pattern, created_at, context) VALUES (?, ?, ?, ?)`).run(
-    "/test/docs",
-    "**/*.md",
-    now,
-    "Test documentation collection"
-  );
-
-  // Add path context
-  db.prepare(`INSERT INTO path_contexts (path_prefix, context, created_at) VALUES (?, ?, ?)`).run(
-    "/test/docs/meetings",
-    "Meeting notes and transcripts",
-    now
-  );
+  // Note: Collections are now managed in YAML config, not in database
+  // For tests, we'll use a collection name "docs"
 
   // Add test documents
   const docs = [
     {
-      name: "readme.md",
+      path: "readme.md",
       title: "Project README",
       hash: "hash1",
-      filepath: "/test/docs/readme.md",
-      display_path: "readme.md",
       body: "# Project README\n\nThis is the main readme file for the project.\n\nIt contains important information about setup and usage.",
     },
     {
-      name: "api.md",
+      path: "api.md",
       title: "API Documentation",
       hash: "hash2",
-      filepath: "/test/docs/api.md",
-      display_path: "api.md",
       body: "# API Documentation\n\nThis document describes the REST API endpoints.\n\n## Authentication\n\nUse Bearer tokens for auth.",
     },
     {
-      name: "meeting-2024-01.md",
+      path: "meetings/meeting-2024-01.md",
       title: "January Meeting Notes",
       hash: "hash3",
-      filepath: "/test/docs/meetings/meeting-2024-01.md",
-      display_path: "meetings/meeting-2024-01.md",
       body: "# January Meeting Notes\n\nDiscussed Q1 goals and roadmap.\n\n## Action Items\n\n- Review budget\n- Hire new team members",
     },
     {
-      name: "meeting-2024-02.md",
+      path: "meetings/meeting-2024-02.md",
       title: "February Meeting Notes",
       hash: "hash4",
-      filepath: "/test/docs/meetings/meeting-2024-02.md",
-      display_path: "meetings/meeting-2024-02.md",
       body: "# February Meeting Notes\n\nFollowed up on Q1 progress.\n\n## Updates\n\n- Budget approved\n- Two candidates interviewed",
     },
     {
-      name: "large-file.md",
+      path: "large-file.md",
       title: "Large Document",
       hash: "hash5",
-      filepath: "/test/docs/large-file.md",
-      display_path: "large-file.md",
       body: "# Large Document\n\n" + "Lorem ipsum ".repeat(2000), // ~24KB
     },
   ];
 
   for (const doc of docs) {
+    // Insert content first
     db.prepare(`
-      INSERT INTO documents (collection_id, name, title, hash, filepath, display_path, body, created_at, modified_at, active)
-      VALUES (1, ?, ?, ?, ?, ?, ?, ?, ?, 1)
-    `).run(doc.name, doc.title, doc.hash, doc.filepath, doc.display_path, doc.body, now, now);
+      INSERT OR IGNORE INTO content (hash, doc, created_at)
+      VALUES (?, ?, ?)
+    `).run(doc.hash, doc.body, now);
+
+    // Then insert document metadata
+    db.prepare(`
+      INSERT INTO documents (collection, path, title, hash, created_at, modified_at, active)
+      VALUES ('docs', ?, ?, ?, ?, ?, 1)
+    `).run(doc.path, doc.title, doc.hash, now, now);
   }
 
   // Add embeddings for vector search
@@ -246,7 +226,6 @@ import {
   reciprocalRankFusion,
   extractSnippet,
   getContextForFile,
-  getCollectionIdByName,
   getDocument,
   getMultipleDocuments,
   getStatus,
@@ -304,17 +283,7 @@ describe("MCP Server", () => {
       expect(results.length).toBe(1);
     });
 
-    test("filters by collection", () => {
-      const collectionId = getCollectionIdByName(testDb, "docs");
-      expect(collectionId).toBe(1);
-      const results = searchFTS(testDb, "meeting", 10, collectionId!);
-      expect(results.length).toBeGreaterThan(0);
-    });
-
-    test("returns null for non-existent collection", () => {
-      const collectionId = getCollectionIdByName(testDb, "nonexistent");
-      expect(collectionId).toBeNull();
-    });
+    // Note: Collection filtering tests removed - collections are now managed in YAML, not DB
 
     test("formats results as structured content", () => {
       const results = searchFTS(testDb, "api", 10);
@@ -717,19 +686,21 @@ describe("MCP Server", () => {
     test("handles URL-encoded paths with spaces", () => {
       // Add a document with spaces in the path
       const now = new Date().toISOString();
+      const body = "# Podcast Episode\n\nInterview content here.";
+      const hash = "hash_spaces";
+      const path = "External Podcast/2023 April - Interview.md";
+
+      // Insert content first
       testDb.prepare(`
-        INSERT INTO documents (collection_id, name, title, hash, filepath, display_path, body, created_at, modified_at, active)
-        VALUES (1, ?, ?, ?, ?, ?, ?, ?, ?, 1)
-      `).run(
-        "podcast with spaces.md",
-        "Podcast Episode",
-        "hash_spaces",
-        "/test/docs/External Podcast/2023 April - Interview.md",
-        "External Podcast/2023 April - Interview.md",
-        "# Podcast Episode\n\nInterview content here.",
-        now,
-        now
-      );
+        INSERT OR IGNORE INTO content (hash, doc, created_at)
+        VALUES (?, ?, ?)
+      `).run(hash, body, now);
+
+      // Then insert document metadata
+      testDb.prepare(`
+        INSERT INTO documents (collection, path, title, hash, created_at, modified_at, active)
+        VALUES ('docs', ?, ?, ?, ?, ?, 1)
+      `).run(path, "Podcast Episode", hash, now, now);
 
       // Simulate URL-encoded path from MCP client
       const encodedPath = "External%20Podcast%2F2023%20April%20-%20Interview.md";
@@ -738,9 +709,10 @@ describe("MCP Server", () => {
       expect(decodedPath).toBe("External Podcast/2023 April - Interview.md");
 
       const doc = testDb.prepare(`
-        SELECT filepath, display_path, body
-        FROM documents
-        WHERE display_path = ? AND active = 1
+        SELECT 'qmd://' || d.collection || '/' || d.path as filepath, d.path as display_path, content.doc as body
+        FROM documents d
+        JOIN content ON content.hash = d.hash
+        WHERE d.path = ? AND d.active = 1
       `).get(decodedPath) as { filepath: string; display_path: string; body: string } | null;
 
       expect(doc).not.toBeNull();
@@ -908,7 +880,7 @@ QMD is your on-device search engine for markdown knowledge bases.`;
       expect(Array.isArray(status.collections)).toBe(true);
       if (status.collections.length > 0) {
         const col = status.collections[0];
-        expect(typeof col.id).toBe("number");
+        expect(typeof col.name).toBe("string"); // Collections now use names, not IDs
         expect(typeof col.path).toBe("string");
         expect(typeof col.pattern).toBe("string");
         expect(typeof col.documents).toBe("number");

+ 11 - 42
src/mcp.ts

@@ -276,19 +276,9 @@ You can also access documents directly via the \`qmd://\` URI scheme:
       },
     },
     async ({ query, limit, minScore, collection }) => {
-      // Resolve collection filter
-      let collectionId: number | undefined;
-      if (collection) {
-        collectionId = store.getCollectionIdByName(collection) ?? undefined;
-        if (collectionId === undefined) {
-          return {
-            content: [{ type: "text", text: `Collection not found: ${collection}` }],
-            isError: true,
-          };
-        }
-      }
-
-      const results = store.searchFTS(query, limit || 10, collectionId);
+      // Note: Collection filtering is now done post-search since collections are managed in YAML
+      const results = store.searchFTS(query, limit || 10)
+        .filter(r => !collection || r.collectionName === collection);
       const filtered: SearchResultItem[] = results
         .filter(r => r.score >= (minScore || 0))
         .map(r => ({
@@ -323,18 +313,6 @@ You can also access documents directly via the \`qmd://\` URI scheme:
       },
     },
     async ({ query, limit, minScore, collection }) => {
-      // Resolve collection filter
-      let collectionId: number | undefined;
-      if (collection) {
-        collectionId = store.getCollectionIdByName(collection) ?? undefined;
-        if (collectionId === undefined) {
-          return {
-            content: [{ type: "text", text: `Collection not found: ${collection}` }],
-            isError: true,
-          };
-        }
-      }
-
       const tableExists = store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
       if (!tableExists) {
         return {
@@ -346,10 +324,11 @@ You can also access documents directly via the \`qmd://\` URI scheme:
       // Expand query
       const queries = await store.expandQuery(query, DEFAULT_QUERY_MODEL);
 
-      // Collect results
+      // Collect results (filter by collection after search)
       const allResults = new Map<string, { file: string; displayPath: string; title: string; body: string; score: number }>();
       for (const q of queries) {
-        const vecResults = await store.searchVec(q, DEFAULT_EMBED_MODEL, limit || 10, collectionId);
+        const vecResults = await store.searchVec(q, DEFAULT_EMBED_MODEL, limit || 10)
+          .then(results => results.filter(r => !collection || r.collectionName === collection));
         for (const r of vecResults) {
           const existing = allResults.get(r.file);
           if (!existing || r.score > existing.score) {
@@ -394,32 +373,22 @@ You can also access documents directly via the \`qmd://\` URI scheme:
       },
     },
     async ({ query, limit, minScore, collection }) => {
-      // Resolve collection filter
-      let collectionId: number | undefined;
-      if (collection) {
-        collectionId = store.getCollectionIdByName(collection) ?? undefined;
-        if (collectionId === undefined) {
-          return {
-            content: [{ type: "text", text: `Collection not found: ${collection}` }],
-            isError: true,
-          };
-        }
-      }
-
       // Expand query
       const queries = await store.expandQuery(query, DEFAULT_QUERY_MODEL);
 
-      // Collect ranked lists
+      // Collect ranked lists (filter by collection after search)
       const rankedLists: RankedResult[][] = [];
       const hasVectors = !!store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
 
       for (const q of queries) {
-        const ftsResults = store.searchFTS(q, 20, collectionId);
+        const ftsResults = store.searchFTS(q, 20)
+          .filter(r => !collection || r.collectionName === collection);
         if (ftsResults.length > 0) {
           rankedLists.push(ftsResults.map(r => ({ file: r.file, displayPath: r.displayPath, title: r.title, body: r.body, score: r.score })));
         }
         if (hasVectors) {
-          const vecResults = await store.searchVec(q, DEFAULT_EMBED_MODEL, 20, collectionId);
+          const vecResults = await store.searchVec(q, DEFAULT_EMBED_MODEL, 20)
+            .then(results => results.filter(r => !collection || r.collectionName === collection));
           if (vecResults.length > 0) {
             rankedLists.push(vecResults.map(r => ({ file: r.file, displayPath: r.displayPath, title: r.title, body: r.body, score: r.score })));
           }

+ 8 - 5
src/store.test.ts

@@ -1554,8 +1554,8 @@ describe("Integration", () => {
     const store = await createTestStore();
     const collectionName = await createTestCollection({ pwd: "/test/notes", glob: "**/*.md" });
 
-    // Add context
-    await addPathContext(collectionName, "/test/notes", "Personal notes");
+    // Add context - use "/" for collection root
+    await addPathContext(collectionName, "/", "Personal notes");
 
     // Insert documents
     await insertTestDocument(store.db, collectionName, {
@@ -1624,10 +1624,12 @@ describe("Integration", () => {
     const results2 = store2.searchFTS("different", 10);
 
     expect(results1).toHaveLength(1);
-    expect(results1[0].displayPath).toBe("qmd://store1/store1/doc.md");
+    expect(results1[0].displayPath).toBe("store1/doc.md");
+    expect(results1[0].filepath).toBe("qmd://store1/store1/doc.md");
 
     expect(results2).toHaveLength(1);
-    expect(results2[0].displayPath).toBe("qmd://store2/store2/doc.md");
+    expect(results2[0].displayPath).toBe("store2/doc.md");
+    expect(results2[0].filepath).toBe("qmd://store2/store2/doc.md");
 
     // Cross-check: store1 shouldn't find store2's content
     const cross1 = store1.searchFTS("different", 10);
@@ -1753,7 +1755,8 @@ describe("Ollama Integration (Mocked)", () => {
 
     const results = await store.searchVec("test query", "embeddinggemma", 10);
     expect(results).toHaveLength(1);
-    expect(results[0].displayPath).toBe(`qmd://${collectionName}/doc1.md`);
+    expect(results[0].displayPath).toBe("doc1.md");
+    expect(results[0].filepath).toBe(`qmd://${collectionName}/doc1.md`);
     expect(results[0].source).toBe("vec");
 
     await cleanupTestDb(store);

+ 16 - 16
src/store.ts

@@ -972,13 +972,13 @@ function levenshtein(a: string, b: string): number {
 
 export function findSimilarFiles(db: Database, query: string, maxDistance: number = 3, limit: number = 5): string[] {
   const allFiles = db.prepare(`
-    SELECT 'qmd://' || d.collection || '/' || d.path as display_path
+    SELECT d.path
     FROM documents d
     WHERE d.active = 1
-  `).all() as { display_path: string }[];
+  `).all() as { path: string }[];
   const queryLower = query.toLowerCase();
   const scored = allFiles
-    .map(f => ({ path: f.display_path, dist: levenshtein(f.display_path.toLowerCase(), queryLower) }))
+    .map(f => ({ path: f.path, dist: levenshtein(f.path.toLowerCase(), queryLower) }))
     .filter(f => f.dist <= maxDistance)
     .sort((a, b) => a.dist - b.dist)
     .slice(0, limit);
@@ -990,18 +990,19 @@ export function matchFilesByGlob(db: Database, pattern: string): { filepath: str
     SELECT
       'qmd://' || d.collection || '/' || d.path as virtual_path,
       LENGTH(content.doc) as body_length,
-      d.path
+      d.path,
+      d.collection
     FROM documents d
     JOIN content ON content.hash = d.hash
     WHERE d.active = 1
-  `).all() as { virtual_path: string; body_length: number; path: string }[];
+  `).all() as { virtual_path: string; body_length: number; path: string; collection: string }[];
 
   const glob = new Glob(pattern);
   return allFiles
     .filter(f => glob.match(f.virtual_path) || glob.match(f.path))
     .map(f => ({
-      filepath: f.virtual_path,  // Use virtual path as filepath
-      displayPath: f.virtual_path,
+      filepath: f.virtual_path,  // Virtual path for precise lookup
+      displayPath: f.path,        // Relative path for display
       bodyLength: f.body_length
     }));
 }
@@ -1734,14 +1735,14 @@ export function findDocument(db: Database, filename: string, options: { includeB
 
   const bodyCol = options.includeBody ? `, content.doc as body` : ``;
 
-  // Build computed columns for display_path
-  // Note: filepath is computed from YAML collections after query
+  // Build computed columns
+  // Note: absoluteFilepath is computed from YAML collections after query
   const selectCols = `
-    'qmd://' || d.collection || '/' || d.path as display_path,
+    'qmd://' || d.collection || '/' || d.path as virtual_path,
+    d.path as display_path,
     d.title,
     d.hash,
     d.collection,
-    d.path,
     d.modified_at,
     LENGTH(content.doc) as body_length
     ${bodyCol}
@@ -1798,13 +1799,12 @@ export function findDocument(db: Database, filename: string, options: { includeB
     return { error: "not_found", query: filename, similarFiles: similar };
   }
 
-  // Compute absolute filepath from collection (in YAML) and relative path
-  const coll = getCollection(doc.collection);
-  const absoluteFilepath = coll ? `${coll.path}/${doc.path}` : doc.path;
-  const context = getContextForFile(db, absoluteFilepath);
+  // Get context using virtual path
+  const virtualPath = doc.virtual_path || `qmd://${doc.collection}/${doc.display_path}`;
+  const context = getContextForFile(db, virtualPath);
 
   return {
-    filepath: absoluteFilepath,
+    filepath: virtualPath,
     displayPath: doc.display_path,
     title: doc.title,
     context,