Эх сурвалжийг харах

Make docid lookup more lenient with quotes support (#39)

- Add normalizeDocid() to strip quotes and # prefix
- Add isDocid() to detect docid patterns including quoted forms
- Update findDocumentByDocid, findDocument, getDocument to use new helpers
- All formats now work: #abc123, abc123, "#abc123", "abc123", '#abc123', 'abc123'
- Add 18 unit tests for normalizeDocid and isDocid

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
Tobias Lütke 4 сар өмнө
parent
commit
3c7dfad1b6
3 өөрчлөгдсөн 146 нэмэгдсэн , 6 устгасан
  1. 3 2
      src/qmd.ts
  2. 105 0
      src/store.test.ts
  3. 38 4
      src/store.ts

+ 3 - 2
src/qmd.ts

@@ -19,6 +19,7 @@ import {
   renameCollection,
   findSimilarFiles,
   findDocumentByDocid,
+  isDocid,
   matchFilesByGlob,
   getHashesNeedingEmbedding,
   getHashesForEmbedding,
@@ -698,8 +699,8 @@ function getDocument(filename: string, fromLine?: number, maxLines?: number, lin
     }
   }
 
-  // Handle docid lookup (#hash or 6-char hex)
-  if (inputPath.startsWith('#') || /^[a-f0-9]{6}$/i.test(inputPath)) {
+  // Handle docid lookup (#abc123, abc123, "#abc123", "abc123", etc.)
+  if (isDocid(inputPath)) {
     const docidMatch = findDocumentByDocid(db, inputPath);
     if (docidMatch) {
       inputPath = docidMatch.filepath;

+ 105 - 0
src/store.test.ts

@@ -33,6 +33,8 @@ import {
   normalizeVirtualPath,
   isVirtualPath,
   parseVirtualPath,
+  normalizeDocid,
+  isDocid,
   type Store,
   type DocumentResult,
   type SearchResult,
@@ -2376,3 +2378,106 @@ describe("parseVirtualPath", () => {
     expect(parseVirtualPath("collection/path.md")).toBe(null);
   });
 });
+
+// =============================================================================
+// Docid Functions
+// =============================================================================
+
+describe("normalizeDocid", () => {
+  test("strips leading # from docid", () => {
+    expect(normalizeDocid("#abc123")).toBe("abc123");
+    expect(normalizeDocid("#def456")).toBe("def456");
+  });
+
+  test("returns bare hex unchanged", () => {
+    expect(normalizeDocid("abc123")).toBe("abc123");
+    expect(normalizeDocid("def456")).toBe("def456");
+  });
+
+  test("strips surrounding double quotes", () => {
+    expect(normalizeDocid('"#abc123"')).toBe("abc123");
+    expect(normalizeDocid('"abc123"')).toBe("abc123");
+  });
+
+  test("strips surrounding single quotes", () => {
+    expect(normalizeDocid("'#abc123'")).toBe("abc123");
+    expect(normalizeDocid("'abc123'")).toBe("abc123");
+  });
+
+  test("handles quoted docid without #", () => {
+    expect(normalizeDocid('"def456"')).toBe("def456");
+    expect(normalizeDocid("'def456'")).toBe("def456");
+  });
+
+  test("handles whitespace", () => {
+    expect(normalizeDocid("  #abc123  ")).toBe("abc123");
+    expect(normalizeDocid("  abc123  ")).toBe("abc123");
+  });
+
+  test("handles uppercase hex", () => {
+    expect(normalizeDocid("#ABC123")).toBe("ABC123");
+    expect(normalizeDocid('"ABC123"')).toBe("ABC123");
+  });
+
+  test("does not strip mismatched quotes", () => {
+    expect(normalizeDocid('"abc123\'')).toBe('"abc123\'');
+    expect(normalizeDocid("'abc123\"")).toBe("'abc123\"");
+  });
+});
+
+describe("isDocid", () => {
+  test("accepts #hash format", () => {
+    expect(isDocid("#abc123")).toBe(true);
+    expect(isDocid("#def456")).toBe(true);
+    expect(isDocid("#ABCDEF")).toBe(true);
+  });
+
+  test("accepts bare 6-char hex", () => {
+    expect(isDocid("abc123")).toBe(true);
+    expect(isDocid("def456")).toBe(true);
+    expect(isDocid("ABCDEF")).toBe(true);
+  });
+
+  test("accepts longer hex strings", () => {
+    expect(isDocid("abc123def456")).toBe(true);
+    expect(isDocid("#abc123def456")).toBe(true);
+  });
+
+  test("accepts double-quoted docids", () => {
+    expect(isDocid('"#abc123"')).toBe(true);
+    expect(isDocid('"abc123"')).toBe(true);
+  });
+
+  test("accepts single-quoted docids", () => {
+    expect(isDocid("'#abc123'")).toBe(true);
+    expect(isDocid("'abc123'")).toBe(true);
+  });
+
+  test("rejects non-hex strings", () => {
+    expect(isDocid("ghijkl")).toBe(false);
+    expect(isDocid("#ghijkl")).toBe(false);
+    expect(isDocid("abc12g")).toBe(false);
+  });
+
+  test("rejects strings shorter than 6 chars", () => {
+    expect(isDocid("abc12")).toBe(false);
+    expect(isDocid("#abc1")).toBe(false);
+    expect(isDocid("'abc'")).toBe(false);
+  });
+
+  test("rejects empty strings", () => {
+    expect(isDocid("")).toBe(false);
+    expect(isDocid("#")).toBe(false);
+    expect(isDocid('""')).toBe(false);
+  });
+
+  test("rejects file paths", () => {
+    expect(isDocid("/path/to/file.md")).toBe(false);
+    expect(isDocid("path/to/file.md")).toBe(false);
+    expect(isDocid("qmd://collection/file.md")).toBe(false);
+  });
+
+  test("rejects paths that look like hex with extensions", () => {
+    expect(isDocid("abc123.md")).toBe(false);
+  });
+});

+ 38 - 4
src/store.ts

@@ -1145,14 +1145,48 @@ function levenshtein(a: string, b: string): number {
   return dp[m]![n]!;
 }
 
+/**
+ * Normalize a docid input by stripping surrounding quotes and leading #.
+ * Handles: "#abc123", 'abc123', "abc123", #abc123, abc123
+ * Returns the bare hex string.
+ */
+export function normalizeDocid(docid: string): string {
+  let normalized = docid.trim();
+
+  // Strip surrounding quotes (single or double)
+  if ((normalized.startsWith('"') && normalized.endsWith('"')) ||
+      (normalized.startsWith("'") && normalized.endsWith("'"))) {
+    normalized = normalized.slice(1, -1);
+  }
+
+  // Strip leading # if present
+  if (normalized.startsWith('#')) {
+    normalized = normalized.slice(1);
+  }
+
+  return normalized;
+}
+
+/**
+ * Check if a string looks like a docid reference.
+ * Accepts: #abc123, abc123, "#abc123", "abc123", '#abc123', 'abc123'
+ * Returns true if the normalized form is a valid hex string of 6+ chars.
+ */
+export function isDocid(input: string): boolean {
+  const normalized = normalizeDocid(input);
+  // Must be at least 6 hex characters
+  return normalized.length >= 6 && /^[a-f0-9]+$/i.test(normalized);
+}
+
 /**
  * Find a document by its short docid (first 6 characters of hash).
  * Returns the document's virtual path if found, null otherwise.
  * If multiple documents match the same short hash (collision), returns the first one.
+ *
+ * Accepts lenient input: #abc123, abc123, "#abc123", "abc123"
  */
 export function findDocumentByDocid(db: Database, docid: string): { filepath: string; hash: string } | null {
-  // Normalize: remove leading # if present
-  const shortHash = docid.startsWith('#') ? docid.slice(1) : docid;
+  const shortHash = normalizeDocid(docid);
 
   if (shortHash.length < 1) return null;
 
@@ -1962,8 +1996,8 @@ export function findDocument(db: Database, filename: string, options: { includeB
     filepath = filepath.slice(0, -colonMatch[0].length);
   }
 
-  // Check if this is a docid lookup (#hash or just 6-char hex)
-  if (filepath.startsWith('#') || /^[a-f0-9]{6}$/i.test(filepath)) {
+  // Check if this is a docid lookup (#abc123, abc123, "#abc123", "abc123", etc.)
+  if (isDocid(filepath)) {
     const docidMatch = findDocumentByDocid(db, filepath);
     if (docidMatch) {
       filepath = docidMatch.filepath;