example.json 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. {
  2. "description": "Example benchmark fixture for QMD eval-docs. Tests exact keyword, semantic, and cross-domain retrieval across 6 documents.",
  3. "version": 1,
  4. "collection": "eval-docs",
  5. "queries": [
  6. {
  7. "id": "exact-api",
  8. "query": "API versioning",
  9. "type": "exact",
  10. "description": "Direct keyword match in API design document",
  11. "expected_files": ["api-design-principles.md"],
  12. "expected_in_top_k": 1
  13. },
  14. {
  15. "id": "exact-fundraising",
  16. "query": "Series A fundraising",
  17. "type": "exact",
  18. "description": "Direct keyword match in fundraising memo",
  19. "expected_files": ["startup-fundraising-memo.md"],
  20. "expected_in_top_k": 1
  21. },
  22. {
  23. "id": "exact-cap",
  24. "query": "CAP theorem",
  25. "type": "exact",
  26. "description": "Direct keyword match in distributed systems doc",
  27. "expected_files": ["distributed-systems-overview.md"],
  28. "expected_in_top_k": 1
  29. },
  30. {
  31. "id": "semantic-rest",
  32. "query": "how to structure REST endpoints",
  33. "type": "semantic",
  34. "description": "Conceptual match — no exact keyword overlap with 'API design'",
  35. "expected_files": ["api-design-principles.md"],
  36. "expected_in_top_k": 3
  37. },
  38. {
  39. "id": "semantic-fundraising",
  40. "query": "raising money for startup",
  41. "type": "semantic",
  42. "description": "Synonym match — 'raising money' should find 'fundraising'",
  43. "expected_files": ["startup-fundraising-memo.md"],
  44. "expected_in_top_k": 3
  45. },
  46. {
  47. "id": "semantic-overfitting",
  48. "query": "how to prevent models from memorizing data",
  49. "type": "semantic",
  50. "description": "Conceptual match for overfitting in ML primer",
  51. "expected_files": ["machine-learning-primer.md"],
  52. "expected_in_top_k": 3
  53. },
  54. {
  55. "id": "topical-launch",
  56. "query": "what went wrong with the product launch",
  57. "type": "topical",
  58. "description": "Should find the retrospective document",
  59. "expected_files": ["product-launch-retrospective.md"],
  60. "expected_in_top_k": 3
  61. },
  62. {
  63. "id": "cross-domain-consistency",
  64. "query": "consistency vs availability tradeoffs",
  65. "type": "cross-domain",
  66. "description": "CAP theorem concept — specific detail in longer document",
  67. "expected_files": ["distributed-systems-overview.md"],
  68. "expected_in_top_k": 3
  69. },
  70. {
  71. "id": "alias-remote",
  72. "query": "working from home guidelines",
  73. "type": "alias",
  74. "description": "Synonym match — 'working from home' should find 'remote work policy'",
  75. "expected_files": ["remote-work-policy.md"],
  76. "expected_in_top_k": 3
  77. },
  78. {
  79. "id": "hard-partial",
  80. "query": "nouns not verbs",
  81. "type": "semantic",
  82. "description": "Partial phrase recall — API design principle about resource naming",
  83. "expected_files": ["api-design-principles.md"],
  84. "expected_in_top_k": 5
  85. }
  86. ]
  87. }