3 ماه پیش · b7a5a86a9b
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,10 @@
 
				 - `qmd status` now shows AST grammar availability.
			
 
				 - SDK: `chunkStrategy` option on `embed()` and `search()` methods.
			
 
				 - GitHub Actions workflow to build the Nix flake on Linux and macOS.
			
 
				+- `qmd bench <fixture.json>` command for search quality benchmarks.
			
 
				+  Measures precision@k, recall, MRR, and F1 across BM25, vector, hybrid,
			
 
				+  and full pipeline backends. Ships with an example fixture against
			
 
				+  the eval-docs test collection.
			
 
				 
			
 
				 ### Fixes
			
 
				 
			
--- a/src/bench/bench.ts
+++ b/src/bench/bench.ts
@@ -0,0 +1,241 @@
 
				+/**
			
 
				+ * QMD Benchmark Harness
			
 
				+ *
			
 
				+ * Runs queries from a fixture file against multiple search backends
			
 
				+ * and measures precision@k, recall, MRR, F1, and latency.
			
 
				+ *
			
 
				+ * Usage:
			
 
				+ *   qmd bench <fixture.json> [--json] [--collection <name>]
			
 
				+ *
			
 
				+ * Backends tested:
			
 
				+ *   - bm25: BM25 keyword search (searchLex)
			
 
				+ *   - vector: Vector similarity search (searchVector)
			
 
				+ *   - hybrid: BM25 + vector RRF fusion without reranking
			
 
				+ *   - full: Full hybrid pipeline with LLM reranking
			
 
				+ */
			
 
				+
			
 
				+import { readFileSync } from "node:fs";
			
 
				+import { resolve } from "node:path";
			
 
				+import {
			
 
				+  createStore,
			
 
				+  getDefaultDbPath,
			
 
				+  type QMDStore,
			
 
				+  type SearchResult,
			
 
				+  type HybridQueryResult,
			
 
				+} from "../index.js";
			
 
				+import { scoreResults } from "./score.js";
			
 
				+import type {
			
 
				+  BenchmarkFixture,
			
 
				+  BenchmarkQuery,
			
 
				+  BackendResult,
			
 
				+  QueryResult,
			
 
				+  BenchmarkResult,
			
 
				+} from "./types.js";
			
 
				+
			
 
				+type Backend = {
			
 
				+  name: string;
			
 
				+  run: (store: QMDStore, query: string, limit: number, collection?: string) => Promise<string[]>;
			
 
				+};
			
 
				+
			
 
				+const BACKENDS: Backend[] = [
			
 
				+  {
			
 
				+    name: "bm25",
			
 
				+    run: async (store, query, limit, collection) => {
			
 
				+      const results = await store.searchLex(query, { limit, collection });
			
 
				+      return results.map((r: SearchResult) => r.filepath);
			
 
				+    },
			
 
				+  },
			
 
				+  {
			
 
				+    name: "vector",
			
 
				+    run: async (store, query, limit, collection) => {
			
 
				+      const results = await store.searchVector(query, { limit, collection });
			
 
				+      return results.map((r: SearchResult) => r.filepath);
			
 
				+    },
			
 
				+  },
			
 
				+  {
			
 
				+    name: "hybrid",
			
 
				+    run: async (store, query, limit, collection) => {
			
 
				+      const results = await store.search({ query, limit, collection, rerank: false });
			
 
				+      return results.map((r: HybridQueryResult) => r.file);
			
 
				+    },
			
 
				+  },
			
 
				+  {
			
 
				+    name: "full",
			
 
				+    run: async (store, query, limit, collection) => {
			
 
				+      const results = await store.search({ query, limit, collection, rerank: true });
			
 
				+      return results.map((r: HybridQueryResult) => r.file);
			
 
				+    },
			
 
				+  },
			
 
				+];
			
 
				+
			
 
				+async function runQuery(
			
 
				+  store: QMDStore,
			
 
				+  backend: Backend,
			
 
				+  query: BenchmarkQuery,
			
 
				+  collection?: string,
			
 
				+): Promise<BackendResult> {
			
 
				+  const limit = Math.max(query.expected_in_top_k, 10);
			
 
				+  const start = Date.now();
			
 
				+
			
 
				+  let resultFiles: string[];
			
 
				+  try {
			
 
				+    resultFiles = await backend.run(store, query.query, limit, collection);
			
 
				+  } catch (err: any) {
			
 
				+    // Backend may not be available (e.g., no embeddings for vector search)
			
 
				+    return {
			
 
				+      precision_at_k: 0,
			
 
				+      recall: 0,
			
 
				+      mrr: 0,
			
 
				+      f1: 0,
			
 
				+      hits_at_k: 0,
			
 
				+      total_expected: query.expected_files.length,
			
 
				+      latency_ms: Date.now() - start,
			
 
				+      top_files: [],
			
 
				+    };
			
 
				+  }
			
 
				+
			
 
				+  const latency_ms = Date.now() - start;
			
 
				+  const scores = scoreResults(resultFiles, query.expected_files, query.expected_in_top_k);
			
 
				+
			
 
				+  return {
			
 
				+    ...scores,
			
 
				+    total_expected: query.expected_files.length,
			
 
				+    latency_ms,
			
 
				+    top_files: resultFiles.slice(0, 10),
			
 
				+  };
			
 
				+}
			
 
				+
			
 
				+function formatTable(results: QueryResult[]): string {
			
 
				+  const lines: string[] = [];
			
 
				+  const pad = (s: string, n: number) => s.slice(0, n).padEnd(n);
			
 
				+  const num = (n: number) => n.toFixed(2).padStart(5);
			
 
				+
			
 
				+  lines.push(
			
 
				+    `${pad("Query", 25)} ${pad("Backend", 8)} ${pad("P@k", 6)} ${pad("Recall", 7)} ${pad("MRR", 6)} ${pad("F1", 6)} ${pad("ms", 8)}`
			
 
				+  );
			
 
				+  lines.push("-".repeat(70));
			
 
				+
			
 
				+  for (const r of results) {
			
 
				+    for (const [backend, br] of Object.entries(r.backends)) {
			
 
				+      lines.push(
			
 
				+        `${pad(r.id, 25)} ${pad(backend, 8)} ${num(br.precision_at_k)} ${num(br.recall)}  ${num(br.mrr)} ${num(br.f1)} ${String(Math.round(br.latency_ms)).padStart(7)}ms`
			
 
				+      );
			
 
				+    }
			
 
				+    lines.push("");
			
 
				+  }
			
 
				+
			
 
				+  return lines.join("\n");
			
 
				+}
			
 
				+
			
 
				+function computeSummary(results: QueryResult[]): BenchmarkResult["summary"] {
			
 
				+  const summary: BenchmarkResult["summary"] = {};
			
 
				+
			
 
				+  // Collect all backend names
			
 
				+  const backendNames = new Set<string>();
			
 
				+  for (const r of results) {
			
 
				+    for (const name of Object.keys(r.backends)) {
			
 
				+      backendNames.add(name);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  for (const name of backendNames) {
			
 
				+    let totalP = 0, totalR = 0, totalMrr = 0, totalF1 = 0, totalLat = 0, count = 0;
			
 
				+    for (const r of results) {
			
 
				+      const br = r.backends[name];
			
 
				+      if (!br) continue;
			
 
				+      totalP += br.precision_at_k;
			
 
				+      totalR += br.recall;
			
 
				+      totalMrr += br.mrr;
			
 
				+      totalF1 += br.f1;
			
 
				+      totalLat += br.latency_ms;
			
 
				+      count++;
			
 
				+    }
			
 
				+    if (count > 0) {
			
 
				+      summary[name] = {
			
 
				+        avg_precision: totalP / count,
			
 
				+        avg_recall: totalR / count,
			
 
				+        avg_mrr: totalMrr / count,
			
 
				+        avg_f1: totalF1 / count,
			
 
				+        avg_latency_ms: totalLat / count,
			
 
				+      };
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  return summary;
			
 
				+}
			
 
				+
			
 
				+export async function runBenchmark(
			
 
				+  fixturePath: string,
			
 
				+  options: { json?: boolean; collection?: string; backends?: string[] } = {},
			
 
				+): Promise<BenchmarkResult> {
			
 
				+  // Load fixture
			
 
				+  const raw = readFileSync(resolve(fixturePath), "utf-8");
			
 
				+  const fixture: BenchmarkFixture = JSON.parse(raw);
			
 
				+
			
 
				+  if (!fixture.queries || !Array.isArray(fixture.queries)) {
			
 
				+    throw new Error("Invalid fixture: missing 'queries' array");
			
 
				+  }
			
 
				+
			
 
				+  // Open store
			
 
				+  const store = await createStore({ dbPath: getDefaultDbPath() });
			
 
				+
			
 
				+  // Filter backends if requested
			
 
				+  const activeBackends = options.backends
			
 
				+    ? BACKENDS.filter(b => options.backends!.includes(b.name))
			
 
				+    : BACKENDS;
			
 
				+
			
 
				+  const collection = options.collection ?? fixture.collection;
			
 
				+
			
 
				+  // Run queries
			
 
				+  const results: QueryResult[] = [];
			
 
				+  for (const query of fixture.queries) {
			
 
				+    const backends: Record<string, BackendResult> = {};
			
 
				+
			
 
				+    for (const backend of activeBackends) {
			
 
				+      if (!options.json) {
			
 
				+        process.stderr.write(`  ${query.id} / ${backend.name}...`);
			
 
				+      }
			
 
				+      backends[backend.name] = await runQuery(store, backend, query, collection);
			
 
				+      if (!options.json) {
			
 
				+        process.stderr.write(` ${Math.round(backends[backend.name]!.latency_ms)}ms\n`);
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    results.push({
			
 
				+      id: query.id,
			
 
				+      query: query.query,
			
 
				+      type: query.type,
			
 
				+      backends,
			
 
				+    });
			
 
				+  }
			
 
				+
			
 
				+  await store.close();
			
 
				+
			
 
				+  const summary = computeSummary(results);
			
 
				+  const timestamp = new Date().toISOString().replace(/[:.]/g, "").slice(0, 15);
			
 
				+
			
 
				+  const benchResult: BenchmarkResult = {
			
 
				+    timestamp,
			
 
				+    fixture: fixturePath,
			
 
				+    results,
			
 
				+    summary,
			
 
				+  };
			
 
				+
			
 
				+  // Output
			
 
				+  if (options.json) {
			
 
				+    console.log(JSON.stringify(benchResult, null, 2));
			
 
				+  } else {
			
 
				+    console.log("\n" + formatTable(results));
			
 
				+    console.log("Summary:");
			
 
				+    console.log("-".repeat(70));
			
 
				+    const pad = (s: string, n: number) => s.slice(0, n).padEnd(n);
			
 
				+    const num = (n: number) => n.toFixed(3).padStart(6);
			
 
				+    for (const [name, s] of Object.entries(summary)) {
			
 
				+      console.log(
			
 
				+        `  ${pad(name, 8)} P@k=${num(s.avg_precision)} Recall=${num(s.avg_recall)} MRR=${num(s.avg_mrr)} F1=${num(s.avg_f1)} Avg=${Math.round(s.avg_latency_ms)}ms`
			
 
				+      );
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  return benchResult;
			
 
				+}
			
--- a/src/bench/fixtures/example.json
+++ b/src/bench/fixtures/example.json
@@ -0,0 +1,87 @@
 
				+{
			
 
				+  "description": "Example benchmark fixture for QMD eval-docs. Tests exact keyword, semantic, and cross-domain retrieval across 6 documents.",
			
 
				+  "version": 1,
			
 
				+  "collection": "eval-docs",
			
 
				+  "queries": [
			
 
				+    {
			
 
				+      "id": "exact-api",
			
 
				+      "query": "API versioning",
			
 
				+      "type": "exact",
			
 
				+      "description": "Direct keyword match in API design document",
			
 
				+      "expected_files": ["api-design-principles.md"],
			
 
				+      "expected_in_top_k": 1
			
 
				+    },
			
 
				+    {
			
 
				+      "id": "exact-fundraising",
			
 
				+      "query": "Series A fundraising",
			
 
				+      "type": "exact",
			
 
				+      "description": "Direct keyword match in fundraising memo",
			
 
				+      "expected_files": ["startup-fundraising-memo.md"],
			
 
				+      "expected_in_top_k": 1
			
 
				+    },
			
 
				+    {
			
 
				+      "id": "exact-cap",
			
 
				+      "query": "CAP theorem",
			
 
				+      "type": "exact",
			
 
				+      "description": "Direct keyword match in distributed systems doc",
			
 
				+      "expected_files": ["distributed-systems-overview.md"],
			
 
				+      "expected_in_top_k": 1
			
 
				+    },
			
 
				+    {
			
 
				+      "id": "semantic-rest",
			
 
				+      "query": "how to structure REST endpoints",
			
 
				+      "type": "semantic",
			
 
				+      "description": "Conceptual match — no exact keyword overlap with 'API design'",
			
 
				+      "expected_files": ["api-design-principles.md"],
			
 
				+      "expected_in_top_k": 3
			
 
				+    },
			
 
				+    {
			
 
				+      "id": "semantic-fundraising",
			
 
				+      "query": "raising money for startup",
			
 
				+      "type": "semantic",
			
 
				+      "description": "Synonym match — 'raising money' should find 'fundraising'",
			
 
				+      "expected_files": ["startup-fundraising-memo.md"],
			
 
				+      "expected_in_top_k": 3
			
 
				+    },
			
 
				+    {
			
 
				+      "id": "semantic-overfitting",
			
 
				+      "query": "how to prevent models from memorizing data",
			
 
				+      "type": "semantic",
			
 
				+      "description": "Conceptual match for overfitting in ML primer",
			
 
				+      "expected_files": ["machine-learning-primer.md"],
			
 
				+      "expected_in_top_k": 3
			
 
				+    },
			
 
				+    {
			
 
				+      "id": "topical-launch",
			
 
				+      "query": "what went wrong with the product launch",
			
 
				+      "type": "topical",
			
 
				+      "description": "Should find the retrospective document",
			
 
				+      "expected_files": ["product-launch-retrospective.md"],
			
 
				+      "expected_in_top_k": 3
			
 
				+    },
			
 
				+    {
			
 
				+      "id": "cross-domain-consistency",
			
 
				+      "query": "consistency vs availability tradeoffs",
			
 
				+      "type": "cross-domain",
			
 
				+      "description": "CAP theorem concept — specific detail in longer document",
			
 
				+      "expected_files": ["distributed-systems-overview.md"],
			
 
				+      "expected_in_top_k": 3
			
 
				+    },
			
 
				+    {
			
 
				+      "id": "alias-remote",
			
 
				+      "query": "working from home guidelines",
			
 
				+      "type": "alias",
			
 
				+      "description": "Synonym match — 'working from home' should find 'remote work policy'",
			
 
				+      "expected_files": ["remote-work-policy.md"],
			
 
				+      "expected_in_top_k": 3
			
 
				+    },
			
 
				+    {
			
 
				+      "id": "hard-partial",
			
 
				+      "query": "nouns not verbs",
			
 
				+      "type": "semantic",
			
 
				+      "description": "Partial phrase recall — API design principle about resource naming",
			
 
				+      "expected_files": ["api-design-principles.md"],
			
 
				+      "expected_in_top_k": 5
			
 
				+    }
			
 
				+  ]
			
 
				+}
			
--- a/src/bench/score.ts
+++ b/src/bench/score.ts
@@ -0,0 +1,76 @@
 
				+/**
			
 
				+ * Scoring functions for the QMD benchmark harness.
			
 
				+ *
			
 
				+ * Computes precision@k, recall, MRR, and F1 for search results
			
 
				+ * against ground-truth expected files.
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+ * Normalize a file path for comparison.
			
 
				+ * Strips qmd:// prefix, lowercases, removes leading/trailing slashes.
			
 
				+ */
			
 
				+export function normalizePath(p: string): string {
			
 
				+  if (p.startsWith("qmd://")) {
			
 
				+    // qmd://collection/path/to/file → path/to/file
			
 
				+    const withoutScheme = p.slice("qmd://".length);
			
 
				+    const slashIdx = withoutScheme.indexOf("/");
			
 
				+    p = slashIdx >= 0 ? withoutScheme.slice(slashIdx + 1) : withoutScheme;
			
 
				+  }
			
 
				+  return p.toLowerCase().replace(/^\/+|\/+$/g, "");
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Check if two paths refer to the same file.
			
 
				+ * Handles different path formats by comparing normalized suffixes.
			
 
				+ */
			
 
				+export function pathsMatch(result: string, expected: string): boolean {
			
 
				+  const nr = normalizePath(result);
			
 
				+  const ne = normalizePath(expected);
			
 
				+  if (nr === ne) return true;
			
 
				+  if (nr.endsWith(ne) || ne.endsWith(nr)) return true;
			
 
				+  return false;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Score a set of search results against expected files.
			
 
				+ */
			
 
				+export function scoreResults(
			
 
				+  resultFiles: string[],
			
 
				+  expectedFiles: string[],
			
 
				+  topK: number,
			
 
				+): { precision_at_k: number; recall: number; mrr: number; f1: number; hits_at_k: number } {
			
 
				+  // Count hits in top-k
			
 
				+  const topKResults = resultFiles.slice(0, topK);
			
 
				+  let hitsAtK = 0;
			
 
				+  for (const expected of expectedFiles) {
			
 
				+    if (topKResults.some(r => pathsMatch(r, expected))) {
			
 
				+      hitsAtK++;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // Count total hits anywhere
			
 
				+  let totalHits = 0;
			
 
				+  for (const expected of expectedFiles) {
			
 
				+    if (resultFiles.some(r => pathsMatch(r, expected))) {
			
 
				+      totalHits++;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // MRR: reciprocal rank of first relevant result
			
 
				+  let mrr = 0;
			
 
				+  for (let i = 0; i < resultFiles.length; i++) {
			
 
				+    if (expectedFiles.some(e => pathsMatch(resultFiles[i]!, e))) {
			
 
				+      mrr = 1 / (i + 1);
			
 
				+      break;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  const denominator = Math.min(topK, expectedFiles.length);
			
 
				+  const precision_at_k = denominator > 0 ? hitsAtK / denominator : 0;
			
 
				+  const recall = expectedFiles.length > 0 ? totalHits / expectedFiles.length : 0;
			
 
				+  const f1 = precision_at_k + recall > 0
			
 
				+    ? 2 * (precision_at_k * recall) / (precision_at_k + recall)
			
 
				+    : 0;
			
 
				+
			
 
				+  return { precision_at_k, recall, mrr, f1, hits_at_k: hitsAtK };
			
 
				+}
			
--- a/src/bench/types.ts
+++ b/src/bench/types.ts
@@ -0,0 +1,72 @@
 
				+/**
			
 
				+ * Types for the QMD benchmark harness.
			
 
				+ *
			
 
				+ * A benchmark fixture defines queries with expected results.
			
 
				+ * The harness runs each query through multiple search backends
			
 
				+ * and measures precision, recall, MRR, and latency.
			
 
				+ */
			
 
				+
			
 
				+export interface BenchmarkQuery {
			
 
				+  /** Unique identifier for the query */
			
 
				+  id: string;
			
 
				+  /** The search query text */
			
 
				+  query: string;
			
 
				+  /** Query difficulty/type for grouping results */
			
 
				+  type: "exact" | "semantic" | "topical" | "cross-domain" | "alias";
			
 
				+  /** Human-readable description of what this tests */
			
 
				+  description: string;
			
 
				+  /** File paths (relative to collection) that should appear in results */
			
 
				+  expected_files: string[];
			
 
				+  /** How many of expected_files should appear in top-k results */
			
 
				+  expected_in_top_k: number;
			
 
				+}
			
 
				+
			
 
				+export interface BenchmarkFixture {
			
 
				+  /** Description of the benchmark */
			
 
				+  description: string;
			
 
				+  /** Fixture format version */
			
 
				+  version: number;
			
 
				+  /** Optional collection to search within */
			
 
				+  collection?: string;
			
 
				+  /** The test queries */
			
 
				+  queries: BenchmarkQuery[];
			
 
				+}
			
 
				+
			
 
				+export interface BackendResult {
			
 
				+  /** Fraction of top-k results that are relevant */
			
 
				+  precision_at_k: number;
			
 
				+  /** Fraction of expected files found anywhere in results */
			
 
				+  recall: number;
			
 
				+  /** Reciprocal rank of first relevant result (1/rank, 0 if not found) */
			
 
				+  mrr: number;
			
 
				+  /** Harmonic mean of precision_at_k and recall */
			
 
				+  f1: number;
			
 
				+  /** Number of expected files found in top-k */
			
 
				+  hits_at_k: number;
			
 
				+  /** Total expected files */
			
 
				+  total_expected: number;
			
 
				+  /** Wall-clock latency in milliseconds */
			
 
				+  latency_ms: number;
			
 
				+  /** Top result file paths (for inspection) */
			
 
				+  top_files: string[];
			
 
				+}
			
 
				+
			
 
				+export interface QueryResult {
			
 
				+  id: string;
			
 
				+  query: string;
			
 
				+  type: string;
			
 
				+  backends: Record<string, BackendResult>;
			
 
				+}
			
 
				+
			
 
				+export interface BenchmarkResult {
			
 
				+  timestamp: string;
			
 
				+  fixture: string;
			
 
				+  results: QueryResult[];
			
 
				+  summary: Record<string, {
			
 
				+    avg_precision: number;
			
 
				+    avg_recall: number;
			
 
				+    avg_mrr: number;
			
 
				+    avg_f1: number;
			
 
				+    avg_latency_ms: number;
			
 
				+  }>;
			
 
				+}
			
--- a/src/cli/qmd.ts
+++ b/src/cli/qmd.ts
@@ -2606,6 +2606,7 @@ function showHelp(): void {
 
				   console.log("  qmd multi-get <pattern>       - Batch fetch via glob or comma-separated list");
			
 
				   console.log("  qmd skill show/install        - Show or install the packaged QMD skill");
			
 
				   console.log("  qmd mcp                       - Start the MCP server (stdio transport for AI agents)");
			
 
				+  console.log("  qmd bench <fixture.json>      - Run search quality benchmarks against a fixture file");
			
 
				   console.log("");
			
 
				   console.log("Collections & context:");
			
 
				   console.log("  qmd collection add/list/remove/rename/show   - Manage indexed folders");
			
@@ -3063,6 +3064,23 @@ if (isMain) {
 
				       await querySearch(cli.query, cli.opts);
			
 
				       break;
			
 
				 
			
 
				+    case "bench": {
			
 
				+      const fixturePath = cli.args[0];
			
 
				+      if (!fixturePath) {
			
 
				+        console.error("Usage: qmd bench <fixture.json> [--json] [-c collection]");
			
 
				+        console.error("");
			
 
				+        console.error("Run search quality benchmarks against a fixture file.");
			
 
				+        console.error("See src/bench/fixtures/example.json for the fixture format.");
			
 
				+        process.exit(1);
			
 
				+      }
			
 
				+      const { runBenchmark } = await import("../bench/bench.js");
			
 
				+      await runBenchmark(fixturePath, {
			
 
				+        json: !!cli.opts.json,
			
 
				+        collection: cli.opts.collection,
			
 
				+      });
			
 
				+      break;
			
 
				+    }
			
 
				+
			
 
				     case "mcp": {
			
 
				       const sub = cli.args[0]; // stop | status | undefined
			
 
				 
			
--- a/test/bench-score.test.ts
+++ b/test/bench-score.test.ts
@@ -0,0 +1,114 @@
 
				+/**
			
 
				+ * Tests for the benchmark scoring functions.
			
 
				+ */
			
 
				+
			
 
				+import { describe, test, expect } from "vitest";
			
 
				+import { normalizePath, pathsMatch, scoreResults } from "../src/bench/score.js";
			
 
				+
			
 
				+describe("normalizePath", () => {
			
 
				+  test("lowercases path", () => {
			
 
				+    expect(normalizePath("Resources/Concepts/Context Engineering.md"))
			
 
				+      .toBe("resources/concepts/context engineering.md");
			
 
				+  });
			
 
				+
			
 
				+  test("strips qmd:// prefix", () => {
			
 
				+    expect(normalizePath("qmd://collection/docs/readme.md"))
			
 
				+      .toBe("docs/readme.md");
			
 
				+  });
			
 
				+
			
 
				+  test("strips leading/trailing slashes", () => {
			
 
				+    expect(normalizePath("/docs/readme.md/")).toBe("docs/readme.md");
			
 
				+  });
			
 
				+
			
 
				+  test("handles plain filename", () => {
			
 
				+    expect(normalizePath("readme.md")).toBe("readme.md");
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+describe("pathsMatch", () => {
			
 
				+  test("exact match", () => {
			
 
				+    expect(pathsMatch("docs/readme.md", "docs/readme.md")).toBe(true);
			
 
				+  });
			
 
				+
			
 
				+  test("case-insensitive match", () => {
			
 
				+    expect(pathsMatch("Docs/README.md", "docs/readme.md")).toBe(true);
			
 
				+  });
			
 
				+
			
 
				+  test("suffix match (result is longer)", () => {
			
 
				+    expect(pathsMatch("/full/path/docs/readme.md", "docs/readme.md")).toBe(true);
			
 
				+  });
			
 
				+
			
 
				+  test("suffix match (expected is longer)", () => {
			
 
				+    expect(pathsMatch("readme.md", "docs/readme.md")).toBe(true);
			
 
				+  });
			
 
				+
			
 
				+  test("qmd:// prefix handled", () => {
			
 
				+    expect(pathsMatch("qmd://col/docs/readme.md", "docs/readme.md")).toBe(true);
			
 
				+  });
			
 
				+
			
 
				+  test("different files don't match", () => {
			
 
				+    expect(pathsMatch("docs/readme.md", "docs/other.md")).toBe(false);
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+describe("scoreResults", () => {
			
 
				+  test("perfect score: all expected in top-k", () => {
			
 
				+    const result = scoreResults(
			
 
				+      ["a.md", "b.md", "c.md"],
			
 
				+      ["a.md", "b.md"],
			
 
				+      2,
			
 
				+    );
			
 
				+    expect(result.precision_at_k).toBe(1);
			
 
				+    expect(result.recall).toBe(1);
			
 
				+    expect(result.mrr).toBe(1);
			
 
				+    expect(result.f1).toBe(1);
			
 
				+    expect(result.hits_at_k).toBe(2);
			
 
				+  });
			
 
				+
			
 
				+  test("zero score: none found", () => {
			
 
				+    const result = scoreResults(
			
 
				+      ["x.md", "y.md", "z.md"],
			
 
				+      ["a.md", "b.md"],
			
 
				+      2,
			
 
				+    );
			
 
				+    expect(result.precision_at_k).toBe(0);
			
 
				+    expect(result.recall).toBe(0);
			
 
				+    expect(result.mrr).toBe(0);
			
 
				+    expect(result.f1).toBe(0);
			
 
				+    expect(result.hits_at_k).toBe(0);
			
 
				+  });
			
 
				+
			
 
				+  test("partial: found outside top-k", () => {
			
 
				+    const result = scoreResults(
			
 
				+      ["x.md", "y.md", "a.md"],
			
 
				+      ["a.md"],
			
 
				+      1,
			
 
				+    );
			
 
				+    expect(result.precision_at_k).toBe(0); // not in top-1
			
 
				+    expect(result.recall).toBe(1); // found somewhere
			
 
				+    expect(result.mrr).toBeCloseTo(1 / 3); // rank 3
			
 
				+    expect(result.hits_at_k).toBe(0);
			
 
				+  });
			
 
				+
			
 
				+  test("MRR: first relevant at rank 2", () => {
			
 
				+    const result = scoreResults(
			
 
				+      ["x.md", "a.md", "b.md"],
			
 
				+      ["a.md", "b.md"],
			
 
				+      3,
			
 
				+    );
			
 
				+    expect(result.mrr).toBeCloseTo(0.5); // 1/2
			
 
				+  });
			
 
				+
			
 
				+  test("empty results", () => {
			
 
				+    const result = scoreResults([], ["a.md"], 1);
			
 
				+    expect(result.precision_at_k).toBe(0);
			
 
				+    expect(result.recall).toBe(0);
			
 
				+    expect(result.mrr).toBe(0);
			
 
				+  });
			
 
				+
			
 
				+  test("empty expected", () => {
			
 
				+    const result = scoreResults(["a.md"], [], 1);
			
 
				+    expect(result.precision_at_k).toBe(0);
			
 
				+    expect(result.recall).toBe(0);
			
 
				+  });
			
 
				+});