Преглед на файлове

fix: migrate legacy lowercase paths on reindex

When qmd update runs against an index created before case-preservation,
documents may exist under lowercase paths (e.g. "skill.md" for a file
actually named "SKILL.md"). Add findOrMigrateLegacyDocument() that:

- Falls back to a lowercase lookup when the canonical path is not found
- Renames the document path in-place via UPDATE OR IGNORE
- Manually rebuilds the FTS entry (FTS5 INSERT OR REPLACE does not
  reliably update existing rows via triggers)
- Handles UNIQUE conflicts gracefully (returns null on conflict)

Embeddings are keyed by content hash, so the rename preserves all
existing vectors — no re-embedding required.

Both the CLI indexer and the library reindexer share the same helper,
eliminating the duplication that a previous review flagged.

Includes integration tests for: successful migration, already-lowercase
no-op, and UNIQUE conflict handling.
Kim Junmo преди 1 месец
родител
ревизия
fee576bf98
променени са 4 файла, в които са добавени 131 реда и са изтрити 3 реда
  1. 5 0
      CHANGELOG.md
  2. 3 2
      src/cli/qmd.ts
  3. 54 1
      src/store.ts
  4. 69 0
      test/store.test.ts

+ 5 - 0
CHANGELOG.md

@@ -2,6 +2,11 @@
 
 ## [Unreleased]
 
+- Fix: preserve original filename case in `handelize()`. The previous
+  `.toLowerCase()` call made indexed paths unreachable on case-sensitive
+  filesystems (Linux). `qmd update` automatically migrates legacy
+  lowercase paths without re-embedding.
+
 ## [2.1.0] - 2026-04-05
 
 Code files now chunk at function and class boundaries via tree-sitter,

+ 3 - 2
src/cli/qmd.ts

@@ -45,6 +45,7 @@ import {
   insertContent,
   insertDocument,
   findActiveDocument,
+  findOrMigrateLegacyDocument,
   updateDocumentTitle,
   updateDocument,
   deactivateDocument,
@@ -1567,8 +1568,8 @@ async function indexFiles(pwd?: string, globPattern: string = DEFAULT_GLOB, coll
     const hash = await hashContent(content);
     const title = extractTitle(content, relativeFile);
 
-    // Check if document exists in this collection with this path
-    const existing = findActiveDocument(db, collectionName, path);
+    // Check if document exists (also migrates legacy lowercase paths)
+    const existing = findOrMigrateLegacyDocument(db, collectionName, path);
 
     if (existing) {
       if (existing.hash === hash) {

+ 54 - 1
src/store.ts

@@ -1135,6 +1135,7 @@ export type Store = {
   insertContent: (hash: string, content: string, createdAt: string) => void;
   insertDocument: (collectionName: string, path: string, title: string, hash: string, createdAt: string, modifiedAt: string) => void;
   findActiveDocument: (collectionName: string, path: string) => { id: number; hash: string; title: string } | null;
+  findOrMigrateLegacyDocument: (collectionName: string, path: string) => { id: number; hash: string; title: string } | null;
   updateDocumentTitle: (documentId: number, title: string, modifiedAt: string) => void;
   updateDocument: (documentId: number, title: string, hash: string, modifiedAt: string) => void;
   deactivateDocument: (collectionName: string, path: string) => void;
@@ -1225,7 +1226,7 @@ export async function reindexCollection(
     const hash = await hashContent(content);
     const title = extractTitle(content, relativeFile);
 
-    const existing = findActiveDocument(db, collectionName, path);
+    const existing = findOrMigrateLegacyDocument(db, collectionName, path);
 
     if (existing) {
       if (existing.hash === hash) {
@@ -1648,6 +1649,7 @@ export function createStore(dbPath?: string): Store {
     insertContent: (hash: string, content: string, createdAt: string) => insertContent(db, hash, content, createdAt),
     insertDocument: (collectionName: string, path: string, title: string, hash: string, createdAt: string, modifiedAt: string) => insertDocument(db, collectionName, path, title, hash, createdAt, modifiedAt),
     findActiveDocument: (collectionName: string, path: string) => findActiveDocument(db, collectionName, path),
+    findOrMigrateLegacyDocument: (collectionName: string, path: string) => findOrMigrateLegacyDocument(db, collectionName, path),
     updateDocumentTitle: (documentId: number, title: string, modifiedAt: string) => updateDocumentTitle(db, documentId, title, modifiedAt),
     updateDocument: (documentId: number, title: string, hash: string, modifiedAt: string) => updateDocument(db, documentId, title, hash, modifiedAt),
     deactivateDocument: (collectionName: string, path: string) => deactivateDocument(db, collectionName, path),
@@ -2102,6 +2104,57 @@ export function findActiveDocument(
   return row ?? null;
 }
 
+/**
+ * Find an active document, falling back to a legacy lowercase path.
+ * If found under the legacy path, renames it in-place and rebuilds the
+ * FTS entry. Embeddings are keyed by content hash, so the rename is
+ * safe — no re-embedding required.
+ *
+ * @internal Used by reindexCollection and indexFiles during qmd update.
+ * Returns null if the document does not exist under either path.
+ */
+export function findOrMigrateLegacyDocument(
+  db: Database,
+  collectionName: string,
+  path: string
+): { id: number; hash: string; title: string } | null {
+  const existing = findActiveDocument(db, collectionName, path);
+  if (existing) return existing;
+
+  const legacyPath = path.toLowerCase();
+  if (legacyPath === path) return null;
+
+  const legacy = findActiveDocument(db, collectionName, legacyPath);
+  if (!legacy) return null;
+
+  // Wrap rename + FTS rebuild in a transaction for atomicity.
+  const migrate = db.transaction(() => {
+    // Use OR IGNORE so a UNIQUE conflict (e.g. both "readme.md" and
+    // "README.md" already exist) is a no-op rather than crashing.
+    const result = db.prepare(
+      `UPDATE OR IGNORE documents SET path = ? WHERE id = ? AND active = 1`
+    ).run(path, legacy.id);
+
+    if (result.changes === 0) return false;
+
+    // FTS5 does not reliably update via the documents_au trigger's
+    // INSERT OR REPLACE. Manually rebuild the FTS entry.
+    db.prepare(`DELETE FROM documents_fts WHERE rowid = ?`).run(legacy.id);
+    db.prepare(`
+      INSERT INTO documents_fts(rowid, filepath, title, body)
+      SELECT id, collection || '/' || path, title,
+             (SELECT doc FROM content WHERE hash = documents.hash)
+      FROM documents WHERE id = ?
+    `).run(legacy.id);
+
+    return true;
+  });
+
+  if (!migrate()) return null;
+
+  return findActiveDocument(db, collectionName, path);
+}
+
 /**
  * Update the title and modified_at timestamp for a document.
  */

+ 69 - 0
test/store.test.ts

@@ -3014,6 +3014,75 @@ describe("Content-Addressable Storage", () => {
 
     await cleanupTestDb(store);
   });
+
+  test("findOrMigrateLegacyDocument renames lowercase path to case-preserved", async () => {
+    const store = await createTestStore();
+    const collectionName = await createTestCollection();
+    const now = new Date().toISOString();
+
+    const content = "# My Skill";
+    const hash = await hashContent(content);
+    store.insertContent(hash, content, now);
+    // Simulate legacy index: path stored as lowercase
+    store.insertDocument(collectionName, "skills/skill.md", "My Skill", hash, now, now);
+
+    // Migration: look up case-preserved path, expect rename
+    const result = store.findOrMigrateLegacyDocument(collectionName, "skills/SKILL.md");
+    expect(result).not.toBeNull();
+    expect(result!.hash).toBe(hash);
+
+    // Old lowercase path should no longer be findable
+    expect(store.findActiveDocument(collectionName, "skills/skill.md")).toBeNull();
+    // New case-preserved path should be active
+    const migrated = store.findActiveDocument(collectionName, "skills/SKILL.md");
+    expect(migrated).not.toBeNull();
+    expect(migrated!.hash).toBe(hash);
+
+    // FTS should reflect the new path (documents_au trigger)
+    const ftsRow = store.db.prepare(
+      `SELECT filepath FROM documents_fts WHERE rowid = ?`
+    ).get(result!.id) as { filepath: string } | undefined;
+    expect(ftsRow).toBeDefined();
+    expect(ftsRow!.filepath).toContain("SKILL.md");
+
+    await cleanupTestDb(store);
+  });
+
+  test("findOrMigrateLegacyDocument returns null when path is already lowercase", async () => {
+    const store = await createTestStore();
+    const collectionName = await createTestCollection();
+
+    // No document exists at all
+    const result = store.findOrMigrateLegacyDocument(collectionName, "readme.md");
+    expect(result).toBeNull();
+
+    await cleanupTestDb(store);
+  });
+
+  test("findOrMigrateLegacyDocument returns existing doc when canonical path already present", async () => {
+    const store = await createTestStore();
+    const collectionName = await createTestCollection();
+    const now = new Date().toISOString();
+
+    const content = "# Content";
+    const hash = await hashContent(content);
+    store.insertContent(hash, content, now);
+    // Both lowercase and case-preserved paths exist (edge case from prior partial migration)
+    store.insertDocument(collectionName, "readme.md", "Readme", hash, now, now);
+    store.insertDocument(collectionName, "README.md", "README", hash, now, now);
+
+    // Should return the canonical-path document directly (fast path)
+    // The legacy "readme.md" row is untouched — no rename attempted.
+    const result = store.findOrMigrateLegacyDocument(collectionName, "README.md");
+    expect(result).not.toBeNull();
+    expect(result!.hash).toBe(hash);
+
+    // Both rows still exist (legacy row not migrated, not deactivated here)
+    expect(store.findActiveDocument(collectionName, "readme.md")).not.toBeNull();
+    expect(store.findActiveDocument(collectionName, "README.md")).not.toBeNull();
+
+    await cleanupTestDb(store);
+  });
 });
 
 // =============================================================================