bench.js 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. /**
  2. * QMD Benchmark Harness
  3. *
  4. * Runs queries from a fixture file against multiple search backends
  5. * and measures precision@k, recall, MRR, F1, and latency.
  6. *
  7. * Usage:
  8. * qmd bench <fixture.json> [--json] [--collection <name>]
  9. *
  10. * Backends tested:
  11. * - bm25: BM25 keyword search (searchLex)
  12. * - vector: Vector similarity search (searchVector)
  13. * - hybrid: BM25 + vector RRF fusion without reranking
  14. * - full: Full hybrid pipeline with LLM reranking
  15. */
  16. import { readFileSync } from "node:fs";
  17. import { resolve } from "node:path";
  18. import { createStore, getDefaultDbPath, } from "../index.js";
  19. import { scoreResults } from "./score.js";
  20. const BACKENDS = [
  21. {
  22. name: "bm25",
  23. run: async (store, query, limit, collection) => {
  24. const results = await store.searchLex(query, { limit, collection });
  25. return results.map((r) => r.filepath);
  26. },
  27. },
  28. {
  29. name: "vector",
  30. run: async (store, query, limit, collection) => {
  31. const results = await store.searchVector(query, { limit, collection });
  32. return results.map((r) => r.filepath);
  33. },
  34. },
  35. {
  36. name: "hybrid",
  37. run: async (store, query, limit, collection) => {
  38. const results = await store.search({ query, limit, collection, rerank: false });
  39. return results.map((r) => r.file);
  40. },
  41. },
  42. {
  43. name: "full",
  44. run: async (store, query, limit, collection) => {
  45. const results = await store.search({ query, limit, collection, rerank: true });
  46. return results.map((r) => r.file);
  47. },
  48. },
  49. ];
  50. async function runQuery(store, backend, query, collection) {
  51. const limit = Math.max(query.expected_in_top_k, 10);
  52. const start = Date.now();
  53. let resultFiles;
  54. try {
  55. resultFiles = await backend.run(store, query.query, limit, collection);
  56. }
  57. catch (err) {
  58. // Backend may not be available (e.g., no embeddings for vector search)
  59. return {
  60. precision_at_k: 0,
  61. recall: 0,
  62. mrr: 0,
  63. f1: 0,
  64. hits_at_k: 0,
  65. total_expected: query.expected_files.length,
  66. latency_ms: Date.now() - start,
  67. top_files: [],
  68. };
  69. }
  70. const latency_ms = Date.now() - start;
  71. const scores = scoreResults(resultFiles, query.expected_files, query.expected_in_top_k);
  72. return {
  73. ...scores,
  74. total_expected: query.expected_files.length,
  75. latency_ms,
  76. top_files: resultFiles.slice(0, 10),
  77. };
  78. }
  79. function formatTable(results) {
  80. const lines = [];
  81. const pad = (s, n) => s.slice(0, n).padEnd(n);
  82. const num = (n) => n.toFixed(2).padStart(5);
  83. lines.push(`${pad("Query", 25)} ${pad("Backend", 8)} ${pad("P@k", 6)} ${pad("Recall", 7)} ${pad("MRR", 6)} ${pad("F1", 6)} ${pad("ms", 8)}`);
  84. lines.push("-".repeat(70));
  85. for (const r of results) {
  86. for (const [backend, br] of Object.entries(r.backends)) {
  87. lines.push(`${pad(r.id, 25)} ${pad(backend, 8)} ${num(br.precision_at_k)} ${num(br.recall)} ${num(br.mrr)} ${num(br.f1)} ${String(Math.round(br.latency_ms)).padStart(7)}ms`);
  88. }
  89. lines.push("");
  90. }
  91. return lines.join("\n");
  92. }
  93. function computeSummary(results) {
  94. const summary = {};
  95. // Collect all backend names
  96. const backendNames = new Set();
  97. for (const r of results) {
  98. for (const name of Object.keys(r.backends)) {
  99. backendNames.add(name);
  100. }
  101. }
  102. for (const name of backendNames) {
  103. let totalP = 0, totalR = 0, totalMrr = 0, totalF1 = 0, totalLat = 0, count = 0;
  104. for (const r of results) {
  105. const br = r.backends[name];
  106. if (!br)
  107. continue;
  108. totalP += br.precision_at_k;
  109. totalR += br.recall;
  110. totalMrr += br.mrr;
  111. totalF1 += br.f1;
  112. totalLat += br.latency_ms;
  113. count++;
  114. }
  115. if (count > 0) {
  116. summary[name] = {
  117. avg_precision: totalP / count,
  118. avg_recall: totalR / count,
  119. avg_mrr: totalMrr / count,
  120. avg_f1: totalF1 / count,
  121. avg_latency_ms: totalLat / count,
  122. };
  123. }
  124. }
  125. return summary;
  126. }
  127. export async function runBenchmark(fixturePath, options = {}) {
  128. // Load fixture
  129. const raw = readFileSync(resolve(fixturePath), "utf-8");
  130. const fixture = JSON.parse(raw);
  131. if (!fixture.queries || !Array.isArray(fixture.queries)) {
  132. throw new Error("Invalid fixture: missing 'queries' array");
  133. }
  134. // Open store
  135. const store = await createStore({ dbPath: getDefaultDbPath() });
  136. // Filter backends if requested
  137. const activeBackends = options.backends
  138. ? BACKENDS.filter(b => options.backends.includes(b.name))
  139. : BACKENDS;
  140. const collection = options.collection ?? fixture.collection;
  141. // Run queries
  142. const results = [];
  143. for (const query of fixture.queries) {
  144. const backends = {};
  145. for (const backend of activeBackends) {
  146. if (!options.json) {
  147. process.stderr.write(` ${query.id} / ${backend.name}...`);
  148. }
  149. backends[backend.name] = await runQuery(store, backend, query, collection);
  150. if (!options.json) {
  151. process.stderr.write(` ${Math.round(backends[backend.name].latency_ms)}ms\n`);
  152. }
  153. }
  154. results.push({
  155. id: query.id,
  156. query: query.query,
  157. type: query.type,
  158. backends,
  159. });
  160. }
  161. await store.close();
  162. const summary = computeSummary(results);
  163. const timestamp = new Date().toISOString().replace(/[:.]/g, "").slice(0, 15);
  164. const benchResult = {
  165. timestamp,
  166. fixture: fixturePath,
  167. results,
  168. summary,
  169. };
  170. // Output
  171. if (options.json) {
  172. console.log(JSON.stringify(benchResult, null, 2));
  173. }
  174. else {
  175. console.log("\n" + formatTable(results));
  176. console.log("Summary:");
  177. console.log("-".repeat(70));
  178. const pad = (s, n) => s.slice(0, n).padEnd(n);
  179. const num = (n) => n.toFixed(3).padStart(6);
  180. for (const [name, s] of Object.entries(summary)) {
  181. console.log(` ${pad(name, 8)} P@k=${num(s.avg_precision)} Recall=${num(s.avg_recall)} MRR=${num(s.avg_mrr)} F1=${num(s.avg_f1)} Avg=${Math.round(s.avg_latency_ms)}ms`);
  182. }
  183. }
  184. return benchResult;
  185. }