6 miesięcy temu · 5343e0e51d
--- a/.beads/issues.jsonl
+++ b/.beads/issues.jsonl
@@ -19,7 +19,7 @@
 
				 {"id":"qmd-bx1","title":"Fix migration SQL for proper basename extraction","description":"The migration currently generates collection names incorrectly (uses full path instead of basename). Need to fix the SQL in migrateToContentAddressable to properly extract the directory basename.","status":"closed","priority":1,"issue_type":"bug","created_at":"2025-12-12T15:29:53.757723-05:00","updated_at":"2025-12-12T15:50:29.349134-05:00","closed_at":"2025-12-12T15:50:29.349134-05:00","dependencies":[{"issue_id":"qmd-bx1","depends_on_id":"qmd-ama","type":"discovered-from","created_at":"2025-12-12T15:29:53.758524-05:00","created_by":"daemon"}]}
			
 
				 {"id":"qmd-c0m","title":"Comprehensive CLI review and consistency pass","description":"Review entire CLI command structure:\n- Consistent naming (add vs create, remove vs delete)\n- Consistent flag usage (--name, --mask, etc)\n- Update help text for all commands\n- Ensure virtual paths work everywhere\n- Test all commands end-to-end","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-12T15:29:38.083564-05:00","updated_at":"2025-12-12T16:06:51.544695-05:00","closed_at":"2025-12-12T16:06:51.544695-05:00"}
			
 
				 {"id":"qmd-clr","title":"fix embed","description":"","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-12T16:14:55.292114-05:00","updated_at":"2025-12-12T16:31:27.661829-05:00","closed_at":"2025-12-12T16:31:27.661829-05:00"}
			
 
				-{"id":"qmd-d00","title":"Add offline evaluation harness for tuning","description":"Create a small benchmark with ~100 labeled queries from real searches. Would enable tuning: expansion on/off threshold, candidate count (30 vs 100), blending weights, reranker threshold.","notes":"Test samples must be: 1) entirely synthetic, OR 2) public documents (e.g., public podcasts, public memos). No private/personal content in eval set.","status":"in_progress","priority":3,"issue_type":"feature","created_at":"2025-12-20T17:18:42.007265-05:00","updated_at":"2025-12-21T12:07:22.319147-05:00"}
			
 
				+{"id":"qmd-d00","title":"Add offline evaluation harness for tuning","description":"Create a small benchmark with ~100 labeled queries from real searches. Would enable tuning: expansion on/off threshold, candidate count (30 vs 100), blending weights, reranker threshold.","notes":"Test samples must be: 1) entirely synthetic, OR 2) public documents (e.g., public podcasts, public memos). No private/personal content in eval set.","status":"closed","priority":3,"issue_type":"feature","created_at":"2025-12-20T17:18:42.007265-05:00","updated_at":"2025-12-21T12:10:40.497797-05:00","closed_at":"2025-12-21T12:10:40.497797-05:00"}
			
 
				 {"id":"qmd-deh","title":"Refactor database introduce qmd collection *","description":"","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-10T10:56:04.516137-05:00","updated_at":"2025-12-12T16:12:12.349428-05:00","closed_at":"2025-12-12T16:12:12.349428-05:00"}
			
 
				 {"id":"qmd-df5","title":"Rerank multiple chunks per document with score aggregation","description":"Currently we only rerank 1 chunk per doc (selected by keyword heuristic). Should rerank top 2-3 chunks per document, then aggregate scores (max, softmax, or top-2 average). This improves ranking for long documents where the keyword-matched chunk isn't always the most relevant.","status":"closed","priority":2,"issue_type":"feature","created_at":"2025-12-20T17:18:41.592575-05:00","updated_at":"2025-12-21T12:04:11.777309-05:00","closed_at":"2025-12-21T12:04:11.777309-05:00"}
			
 
				 {"id":"qmd-dmi","title":"Implement 'qmd collection' commands","description":"Add explicit collection management:\n- qmd collection add . --name \u003cname\u003e --mask '**/*.md'\n- qmd collection list\n- qmd collection remove \u003cname\u003e\n\nThis gives users control over collection names and patterns.","status":"closed","priority":1,"issue_type":"feature","created_at":"2025-12-12T15:29:53.810666-05:00","updated_at":"2025-12-12T16:02:08.079158-05:00","closed_at":"2025-12-12T16:02:08.079158-05:00","dependencies":[{"issue_id":"qmd-dmi","depends_on_id":"qmd-ama","type":"discovered-from","created_at":"2025-12-12T15:29:53.811294-05:00","created_by":"daemon"}]}
			
--- a/src/eval.test.ts
+++ b/src/eval.test.ts
@@ -0,0 +1,148 @@
 
				+/**
			
 
				+ * Evaluation Tests for QMD Search Quality
			
 
				+ *
			
 
				+ * Tests search quality against synthetic documents with known-answer queries.
			
 
				+ * Validates that search improvements don't regress quality.
			
 
				+ */
			
 
				+
			
 
				+import { describe, test, expect, beforeAll, afterAll } from "bun:test";
			
 
				+import { mkdtempSync, rmSync, readFileSync, readdirSync } from "fs";
			
 
				+import { join } from "path";
			
 
				+import { tmpdir } from "os";
			
 
				+import Database from "bun:sqlite";
			
 
				+
			
 
				+// Set INDEX_PATH before importing store to prevent using global index
			
 
				+const tempDir = mkdtempSync(join(tmpdir(), "qmd-eval-"));
			
 
				+process.env.INDEX_PATH = join(tempDir, "eval.sqlite");
			
 
				+
			
 
				+import {
			
 
				+  getDb,
			
 
				+  closeDb,
			
 
				+  searchFTS,
			
 
				+  insertDocument,
			
 
				+  insertContent,
			
 
				+} from "./store";
			
 
				+
			
 
				+// Eval queries with expected documents
			
 
				+const evalQueries: {
			
 
				+  query: string;
			
 
				+  expectedDoc: string;
			
 
				+  difficulty: "easy" | "medium" | "hard";
			
 
				+}[] = [
			
 
				+  // EASY: Exact keyword matches
			
 
				+  { query: "API versioning", expectedDoc: "api-design", difficulty: "easy" },
			
 
				+  { query: "Series A fundraising", expectedDoc: "fundraising", difficulty: "easy" },
			
 
				+  { query: "CAP theorem", expectedDoc: "distributed-systems", difficulty: "easy" },
			
 
				+  { query: "overfitting machine learning", expectedDoc: "machine-learning", difficulty: "easy" },
			
 
				+  { query: "remote work VPN", expectedDoc: "remote-work", difficulty: "easy" },
			
 
				+  { query: "Project Phoenix retrospective", expectedDoc: "product-launch", difficulty: "easy" },
			
 
				+
			
 
				+  // MEDIUM: Semantic/conceptual queries
			
 
				+  { query: "how to structure REST endpoints", expectedDoc: "api-design", difficulty: "medium" },
			
 
				+  { query: "raising money for startup", expectedDoc: "fundraising", difficulty: "medium" },
			
 
				+  { query: "consistency vs availability tradeoffs", expectedDoc: "distributed-systems", difficulty: "medium" },
			
 
				+  { query: "how to prevent models from memorizing data", expectedDoc: "machine-learning", difficulty: "medium" },
			
 
				+  { query: "working from home guidelines", expectedDoc: "remote-work", difficulty: "medium" },
			
 
				+  { query: "what went wrong with the launch", expectedDoc: "product-launch", difficulty: "medium" },
			
 
				+
			
 
				+  // HARD: Vague, partial memory, indirect
			
 
				+  { query: "nouns not verbs", expectedDoc: "api-design", difficulty: "hard" },
			
 
				+  { query: "Sequoia investor pitch", expectedDoc: "fundraising", difficulty: "hard" },
			
 
				+  { query: "Raft algorithm leader election", expectedDoc: "distributed-systems", difficulty: "hard" },
			
 
				+  { query: "F1 score precision recall", expectedDoc: "machine-learning", difficulty: "hard" },
			
 
				+  { query: "quarterly team gathering travel", expectedDoc: "remote-work", difficulty: "hard" },
			
 
				+  { query: "beta program 47 bugs", expectedDoc: "product-launch", difficulty: "hard" },
			
 
				+];
			
 
				+
			
 
				+describe("Search Quality Evaluation", () => {
			
 
				+  let db: Database;
			
 
				+
			
 
				+  beforeAll(() => {
			
 
				+    // Initialize database (INDEX_PATH already set at top of file)
			
 
				+    db = getDb();
			
 
				+
			
 
				+    // Load and index eval documents
			
 
				+    const evalDocsDir = join(import.meta.dir, "../test/eval-docs");
			
 
				+    const files = readdirSync(evalDocsDir).filter(f => f.endsWith(".md"));
			
 
				+
			
 
				+    for (const file of files) {
			
 
				+      const content = readFileSync(join(evalDocsDir, file), "utf-8");
			
 
				+      const title = content.split("\n")[0]?.replace(/^#\s*/, "") || file;
			
 
				+      const hash = Bun.hash(content).toString(16).slice(0, 12);
			
 
				+      const now = new Date().toISOString();
			
 
				+
			
 
				+      insertContent(db, hash, content, now);
			
 
				+      insertDocument(db, "eval-docs", file, title, hash, now, now);
			
 
				+    }
			
 
				+  });
			
 
				+
			
 
				+  afterAll(() => {
			
 
				+    closeDb();
			
 
				+    rmSync(tempDir, { recursive: true, force: true });
			
 
				+  });
			
 
				+
			
 
				+  describe("BM25 Search (FTS)", () => {
			
 
				+    test("easy queries: ≥80% Hit@3", () => {
			
 
				+      const easyQueries = evalQueries.filter(q => q.difficulty === "easy");
			
 
				+      let hits = 0;
			
 
				+
			
 
				+      for (const { query, expectedDoc } of easyQueries) {
			
 
				+        const results = searchFTS(db, query, 5);
			
 
				+        const top3 = results.slice(0, 3);
			
 
				+        const found = top3.some(r => r.filepath.toLowerCase().includes(expectedDoc));
			
 
				+        if (found) hits++;
			
 
				+      }
			
 
				+
			
 
				+      const hitRate = hits / easyQueries.length;
			
 
				+      expect(hitRate).toBeGreaterThanOrEqual(0.8);
			
 
				+    });
			
 
				+
			
 
				+    test("medium queries: ≥15% Hit@3 (BM25 struggles with semantic)", () => {
			
 
				+      const mediumQueries = evalQueries.filter(q => q.difficulty === "medium");
			
 
				+      let hits = 0;
			
 
				+
			
 
				+      for (const { query, expectedDoc } of mediumQueries) {
			
 
				+        const results = searchFTS(db, query, 5);
			
 
				+        const top3 = results.slice(0, 3);
			
 
				+        const found = top3.some(r => r.filepath.toLowerCase().includes(expectedDoc));
			
 
				+        if (found) hits++;
			
 
				+      }
			
 
				+
			
 
				+      const hitRate = hits / mediumQueries.length;
			
 
				+      // BM25 alone struggles with semantic queries - baseline is low
			
 
				+      expect(hitRate).toBeGreaterThanOrEqual(0.15);
			
 
				+    });
			
 
				+
			
 
				+    test("hard queries: ≥15% Hit@5 (BM25 baseline)", () => {
			
 
				+      const hardQueries = evalQueries.filter(q => q.difficulty === "hard");
			
 
				+      let hits = 0;
			
 
				+
			
 
				+      for (const { query, expectedDoc } of hardQueries) {
			
 
				+        const results = searchFTS(db, query, 5);
			
 
				+        const found = results.some(r => r.filepath.toLowerCase().includes(expectedDoc));
			
 
				+        if (found) hits++;
			
 
				+      }
			
 
				+
			
 
				+      const hitRate = hits / hardQueries.length;
			
 
				+      // BM25 alone really struggles with vague queries
			
 
				+      expect(hitRate).toBeGreaterThanOrEqual(0.15);
			
 
				+    });
			
 
				+  });
			
 
				+
			
 
				+  describe("Overall Quality", () => {
			
 
				+    test("overall Hit@3 ≥40% (BM25 baseline)", () => {
			
 
				+      let hits = 0;
			
 
				+
			
 
				+      for (const { query, expectedDoc } of evalQueries) {
			
 
				+        const results = searchFTS(db, query, 5);
			
 
				+        const top3 = results.slice(0, 3);
			
 
				+        const found = top3.some(r => r.filepath.toLowerCase().includes(expectedDoc));
			
 
				+        if (found) hits++;
			
 
				+      }
			
 
				+
			
 
				+      const hitRate = hits / evalQueries.length;
			
 
				+      // BM25 alone: ~40% is baseline, hybrid should be higher
			
 
				+      expect(hitRate).toBeGreaterThanOrEqual(0.4);
			
 
				+    });
			
 
				+  });
			
 
				+});
			
--- a/src/qmd.ts
+++ b/src/qmd.ts
@@ -12,6 +12,7 @@ import {
 
				   homedir,
			
 
				   resolve,
			
 
				   setCustomIndexName,
			
 
				+  enableProductionMode,
			
 
				   searchFTS,
			
 
				   searchVec,
			
 
				   reciprocalRankFusion,
			
@@ -87,6 +88,10 @@ import {
 
				   listAllContexts,
			
 
				 } from "./collections.js";
			
 
				 
			
 
				+// Enable production mode - allows using default database path
			
 
				+// Tests must set INDEX_PATH or use createStore() with explicit path
			
 
				+enableProductionMode();
			
 
				+
			
 
				 // Terminal colors (respects NO_COLOR env)
			
 
				 const useColor = !process.env.NO_COLOR && process.stdout.isTTY;
			
 
				 const c = {
			
--- a/src/store.test.ts
+++ b/src/store.test.ts
@@ -258,12 +258,31 @@ describe("Path Utilities", () => {
 
				     expect(resolve("/foo/bar/../../baz")).toBe("/baz");
			
 
				   });
			
 
				 
			
 
				-  test("getDefaultDbPath returns expected path structure", () => {
			
 
				-    const defaultPath = getDefaultDbPath();
			
 
				-    expect(defaultPath).toContain(".cache/qmd/index.sqlite");
			
 
				+  test("getDefaultDbPath throws in test mode without INDEX_PATH", () => {
			
 
				+    // In test mode, getDefaultDbPath should throw to prevent accidental writes to global index
			
 
				+    // This is intentional safety behavior
			
 
				+    const originalIndexPath = process.env.INDEX_PATH;
			
 
				+    delete process.env.INDEX_PATH;
			
 
				 
			
 
				-    const customPath = getDefaultDbPath("custom");
			
 
				-    expect(customPath).toContain(".cache/qmd/custom.sqlite");
			
 
				+    expect(() => getDefaultDbPath()).toThrow("Database path not set");
			
 
				+
			
 
				+    // Restore
			
 
				+    if (originalIndexPath) process.env.INDEX_PATH = originalIndexPath;
			
 
				+  });
			
 
				+
			
 
				+  test("getDefaultDbPath uses INDEX_PATH when set", () => {
			
 
				+    const originalIndexPath = process.env.INDEX_PATH;
			
 
				+    process.env.INDEX_PATH = "/tmp/test-index.sqlite";
			
 
				+
			
 
				+    expect(getDefaultDbPath()).toBe("/tmp/test-index.sqlite");
			
 
				+    expect(getDefaultDbPath("custom")).toBe("/tmp/test-index.sqlite"); // INDEX_PATH overrides name
			
 
				+
			
 
				+    // Restore
			
 
				+    if (originalIndexPath) {
			
 
				+      process.env.INDEX_PATH = originalIndexPath;
			
 
				+    } else {
			
 
				+      delete process.env.INDEX_PATH;
			
 
				+    }
			
 
				   });
			
 
				 
			
 
				   test("getPwd returns current working directory", () => {
			
@@ -376,12 +395,15 @@ describe("handelize", () => {
 
				 // =============================================================================
			
 
				 
			
 
				 describe("Store Creation", () => {
			
 
				-  test("createStore creates a new store with default path", () => {
			
 
				-    const store = createStore();
			
 
				-    expect(store).toBeDefined();
			
 
				-    expect(store.db).toBeDefined();
			
 
				-    expect(store.dbPath).toContain(".cache/qmd/index.sqlite");
			
 
				-    store.close();
			
 
				+  test("createStore throws without explicit path in test mode", () => {
			
 
				+    // In test mode, createStore without path should throw to prevent accidental writes
			
 
				+    const originalIndexPath = process.env.INDEX_PATH;
			
 
				+    delete process.env.INDEX_PATH;
			
 
				+
			
 
				+    expect(() => createStore()).toThrow("Database path not set");
			
 
				+
			
 
				+    // Restore
			
 
				+    if (originalIndexPath) process.env.INDEX_PATH = originalIndexPath;
			
 
				   });
			
 
				 
			
 
				   test("createStore creates a new store with custom path", async () => {
			
--- a/src/store.ts
+++ b/src/store.ts
@@ -80,11 +80,27 @@ export function resolve(...paths: string[]): string {
 
				   return '/' + normalized.join('/');
			
 
				 }
			
 
				 
			
 
				+// Flag to indicate production mode (set by qmd.ts at startup)
			
 
				+let _productionMode = false;
			
 
				+
			
 
				+export function enableProductionMode(): void {
			
 
				+  _productionMode = true;
			
 
				+}
			
 
				+
			
 
				 export function getDefaultDbPath(indexName: string = "index"): string {
			
 
				-  // Allow override via INDEX_PATH for testing
			
 
				+  // Always allow override via INDEX_PATH (for testing)
			
 
				   if (Bun.env.INDEX_PATH) {
			
 
				     return Bun.env.INDEX_PATH;
			
 
				   }
			
 
				+
			
 
				+  // In non-production mode (tests), require explicit path
			
 
				+  if (!_productionMode) {
			
 
				+    throw new Error(
			
 
				+      "Database path not set. Tests must set INDEX_PATH env var or use createStore() with explicit path. " +
			
 
				+      "This prevents tests from accidentally writing to the global index."
			
 
				+    );
			
 
				+  }
			
 
				+
			
 
				   const cacheDir = Bun.env.XDG_CACHE_HOME || resolve(homedir(), ".cache");
			
 
				   const qmdCacheDir = resolve(cacheDir, "qmd");
			
 
				   try { Bun.spawnSync(["mkdir", "-p", qmdCacheDir]); } catch {}