bench-score.test.ts 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. /**
  2. * Tests for the benchmark scoring functions.
  3. */
  4. import { describe, test, expect } from "vitest";
  5. import { normalizePath, pathsMatch, scoreResults } from "../src/bench/score.js";
  6. describe("normalizePath", () => {
  7. test("lowercases path", () => {
  8. expect(normalizePath("Resources/Concepts/Context Engineering.md"))
  9. .toBe("resources/concepts/context engineering.md");
  10. });
  11. test("strips qmd:// prefix", () => {
  12. expect(normalizePath("qmd://collection/docs/readme.md"))
  13. .toBe("docs/readme.md");
  14. });
  15. test("strips leading/trailing slashes", () => {
  16. expect(normalizePath("/docs/readme.md/")).toBe("docs/readme.md");
  17. });
  18. test("handles plain filename", () => {
  19. expect(normalizePath("readme.md")).toBe("readme.md");
  20. });
  21. });
  22. describe("pathsMatch", () => {
  23. test("exact match", () => {
  24. expect(pathsMatch("docs/readme.md", "docs/readme.md")).toBe(true);
  25. });
  26. test("case-insensitive match", () => {
  27. expect(pathsMatch("Docs/README.md", "docs/readme.md")).toBe(true);
  28. });
  29. test("suffix match (result is longer)", () => {
  30. expect(pathsMatch("/full/path/docs/readme.md", "docs/readme.md")).toBe(true);
  31. });
  32. test("suffix match (expected is longer)", () => {
  33. expect(pathsMatch("readme.md", "docs/readme.md")).toBe(true);
  34. });
  35. test("qmd:// prefix handled", () => {
  36. expect(pathsMatch("qmd://col/docs/readme.md", "docs/readme.md")).toBe(true);
  37. });
  38. test("different files don't match", () => {
  39. expect(pathsMatch("docs/readme.md", "docs/other.md")).toBe(false);
  40. });
  41. });
  42. describe("scoreResults", () => {
  43. test("perfect score: all expected in top-k", () => {
  44. const result = scoreResults(
  45. ["a.md", "b.md", "c.md"],
  46. ["a.md", "b.md"],
  47. 2,
  48. );
  49. expect(result.precision_at_k).toBe(1);
  50. expect(result.recall).toBe(1);
  51. expect(result.mrr).toBe(1);
  52. expect(result.f1).toBe(1);
  53. expect(result.hits_at_k).toBe(2);
  54. });
  55. test("zero score: none found", () => {
  56. const result = scoreResults(
  57. ["x.md", "y.md", "z.md"],
  58. ["a.md", "b.md"],
  59. 2,
  60. );
  61. expect(result.precision_at_k).toBe(0);
  62. expect(result.recall).toBe(0);
  63. expect(result.mrr).toBe(0);
  64. expect(result.f1).toBe(0);
  65. expect(result.hits_at_k).toBe(0);
  66. });
  67. test("partial: found outside top-k", () => {
  68. const result = scoreResults(
  69. ["x.md", "y.md", "a.md"],
  70. ["a.md"],
  71. 1,
  72. );
  73. expect(result.precision_at_k).toBe(0); // not in top-1
  74. expect(result.recall).toBe(1); // found somewhere
  75. expect(result.mrr).toBeCloseTo(1 / 3); // rank 3
  76. expect(result.hits_at_k).toBe(0);
  77. });
  78. test("MRR: first relevant at rank 2", () => {
  79. const result = scoreResults(
  80. ["x.md", "a.md", "b.md"],
  81. ["a.md", "b.md"],
  82. 3,
  83. );
  84. expect(result.mrr).toBeCloseTo(0.5); // 1/2
  85. });
  86. test("empty results", () => {
  87. const result = scoreResults([], ["a.md"], 1);
  88. expect(result.precision_at_k).toBe(0);
  89. expect(result.recall).toBe(0);
  90. expect(result.mrr).toBe(0);
  91. });
  92. test("empty expected", () => {
  93. const result = scoreResults(["a.md"], [], 1);
  94. expect(result.precision_at_k).toBe(0);
  95. expect(result.recall).toBe(0);
  96. });
  97. });