types.d.ts 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. /**
  2. * Types for the QMD benchmark harness.
  3. *
  4. * A benchmark fixture defines queries with expected results.
  5. * The harness runs each query through multiple search backends
  6. * and measures precision, recall, MRR, and latency.
  7. */
  8. export interface BenchmarkQuery {
  9. /** Unique identifier for the query */
  10. id: string;
  11. /** The search query text */
  12. query: string;
  13. /** Query difficulty/type for grouping results */
  14. type: "exact" | "semantic" | "topical" | "cross-domain" | "alias";
  15. /** Human-readable description of what this tests */
  16. description: string;
  17. /** File paths (relative to collection) that should appear in results */
  18. expected_files: string[];
  19. /** How many of expected_files should appear in top-k results */
  20. expected_in_top_k: number;
  21. }
  22. export interface BenchmarkFixture {
  23. /** Description of the benchmark */
  24. description: string;
  25. /** Fixture format version */
  26. version: number;
  27. /** Optional collection to search within */
  28. collection?: string;
  29. /** The test queries */
  30. queries: BenchmarkQuery[];
  31. }
  32. export interface BackendResult {
  33. /** Fraction of top-k results that are relevant */
  34. precision_at_k: number;
  35. /** Fraction of expected files found anywhere in results */
  36. recall: number;
  37. /** Reciprocal rank of first relevant result (1/rank, 0 if not found) */
  38. mrr: number;
  39. /** Harmonic mean of precision_at_k and recall */
  40. f1: number;
  41. /** Number of expected files found in top-k */
  42. hits_at_k: number;
  43. /** Total expected files */
  44. total_expected: number;
  45. /** Wall-clock latency in milliseconds */
  46. latency_ms: number;
  47. /** Top result file paths (for inspection) */
  48. top_files: string[];
  49. }
  50. export interface QueryResult {
  51. id: string;
  52. query: string;
  53. type: string;
  54. backends: Record<string, BackendResult>;
  55. }
  56. export interface BenchmarkResult {
  57. timestamp: string;
  58. fixture: string;
  59. results: QueryResult[];
  60. summary: Record<string, {
  61. avg_precision: number;
  62. avg_recall: number;
  63. avg_mrr: number;
  64. avg_f1: number;
  65. avg_latency_ms: number;
  66. }>;
  67. }