suby
/
qmd


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
							/**
 * QMD Benchmark Harness
 *
 * Runs queries from a fixture file against multiple search backends
 * and measures precision@k, recall, MRR, F1, and latency.
 *
 * Usage:
 *   qmd bench <fixture.json> [--json] [--collection <name>]
 *
 * Backends tested:
 *   - bm25: BM25 keyword search (searchLex)
 *   - vector: Vector similarity search (searchVector)
 *   - hybrid: BM25 + vector RRF fusion without reranking
 *   - full: Full hybrid pipeline with LLM reranking
 */
import { readFileSync } from "node:fs";
import { resolve } from "node:path";
import { createStore, getDefaultDbPath, } from "../index.js";
import { scoreResults } from "./score.js";
const BACKENDS = [
    {
        name: "bm25",
        run: async (store, query, limit, collection) => {
            const results = await store.searchLex(query, { limit, collection });
            return results.map((r) => r.filepath);
        },
    },
    {
        name: "vector",
        run: async (store, query, limit, collection) => {
            const results = await store.searchVector(query, { limit, collection });
            return results.map((r) => r.filepath);
        },
    },
    {
        name: "hybrid",
        run: async (store, query, limit, collection) => {
            const results = await store.search({ query, limit, collection, rerank: false });
            return results.map((r) => r.file);
        },
    },
    {
        name: "full",
        run: async (store, query, limit, collection) => {
            const results = await store.search({ query, limit, collection, rerank: true });
            return results.map((r) => r.file);
        },
    },
];
async function runQuery(store, backend, query, collection) {
    const limit = Math.max(query.expected_in_top_k, 10);
    const start = Date.now();
    let resultFiles;
    try {
        resultFiles = await backend.run(store, query.query, limit, collection);
    }
    catch (err) {
        // Backend may not be available (e.g., no embeddings for vector search)
        return {
            precision_at_k: 0,
            recall: 0,
            mrr: 0,
            f1: 0,
            hits_at_k: 0,
            total_expected: query.expected_files.length,
            latency_ms: Date.now() - start,
            top_files: [],
        };
    }
    const latency_ms = Date.now() - start;
    const scores = scoreResults(resultFiles, query.expected_files, query.expected_in_top_k);
    return {
        ...scores,
        total_expected: query.expected_files.length,
        latency_ms,
        top_files: resultFiles.slice(0, 10),
    };
}
function formatTable(results) {
    const lines = [];
    const pad = (s, n) => s.slice(0, n).padEnd(n);
    const num = (n) => n.toFixed(2).padStart(5);
    lines.push(`${pad("Query", 25)} ${pad("Backend", 8)} ${pad("P@k", 6)} ${pad("Recall", 7)} ${pad("MRR", 6)} ${pad("F1", 6)} ${pad("ms", 8)}`);
    lines.push("-".repeat(70));
    for (const r of results) {
        for (const [backend, br] of Object.entries(r.backends)) {
            lines.push(`${pad(r.id, 25)} ${pad(backend, 8)} ${num(br.precision_at_k)} ${num(br.recall)}  ${num(br.mrr)} ${num(br.f1)} ${String(Math.round(br.latency_ms)).padStart(7)}ms`);
        }
        lines.push("");
    }
    return lines.join("\n");
}
function computeSummary(results) {
    const summary = {};
    // Collect all backend names
    const backendNames = new Set();
    for (const r of results) {
        for (const name of Object.keys(r.backends)) {
            backendNames.add(name);
        }
    }
    for (const name of backendNames) {
        let totalP = 0, totalR = 0, totalMrr = 0, totalF1 = 0, totalLat = 0, count = 0;
        for (const r of results) {
            const br = r.backends[name];
            if (!br)
                continue;
            totalP += br.precision_at_k;
            totalR += br.recall;
            totalMrr += br.mrr;
            totalF1 += br.f1;
            totalLat += br.latency_ms;
            count++;
        }
        if (count > 0) {
            summary[name] = {
                avg_precision: totalP / count,
                avg_recall: totalR / count,
                avg_mrr: totalMrr / count,
                avg_f1: totalF1 / count,
                avg_latency_ms: totalLat / count,
            };
        }
    }
    return summary;
}
export async function runBenchmark(fixturePath, options = {}) {
    // Load fixture
    const raw = readFileSync(resolve(fixturePath), "utf-8");
    const fixture = JSON.parse(raw);
    if (!fixture.queries || !Array.isArray(fixture.queries)) {
        throw new Error("Invalid fixture: missing 'queries' array");
    }
    // Open store
    const store = await createStore({ dbPath: getDefaultDbPath() });
    // Filter backends if requested
    const activeBackends = options.backends
        ? BACKENDS.filter(b => options.backends.includes(b.name))
        : BACKENDS;
    const collection = options.collection ?? fixture.collection;
    // Run queries
    const results = [];
    for (const query of fixture.queries) {
        const backends = {};
        for (const backend of activeBackends) {
            if (!options.json) {
                process.stderr.write(`  ${query.id} / ${backend.name}...`);
            }
            backends[backend.name] = await runQuery(store, backend, query, collection);
            if (!options.json) {
                process.stderr.write(` ${Math.round(backends[backend.name].latency_ms)}ms\n`);
            }
        }
        results.push({
            id: query.id,
            query: query.query,
            type: query.type,
            backends,
        });
    }
    await store.close();
    const summary = computeSummary(results);
    const timestamp = new Date().toISOString().replace(/[:.]/g, "").slice(0, 15);
    const benchResult = {
        timestamp,
        fixture: fixturePath,
        results,
        summary,
    };
    // Output
    if (options.json) {
        console.log(JSON.stringify(benchResult, null, 2));
    }
    else {
        console.log("\n" + formatTable(results));
        console.log("Summary:");
        console.log("-".repeat(70));
        const pad = (s, n) => s.slice(0, n).padEnd(n);
        const num = (n) => n.toFixed(3).padStart(6);
        for (const [name, s] of Object.entries(summary)) {
            console.log(`  ${pad(name, 8)} P@k=${num(s.avg_precision)} Recall=${num(s.avg_recall)} MRR=${num(s.avg_mrr)} F1=${num(s.avg_f1)} Avg=${Math.round(s.avg_latency_ms)}ms`);
        }
    }
    return benchResult;
}