structured-search.test.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
  1. /**
  2. * structured-search.test.ts - Tests for structured search functionality
  3. *
  4. * Tests cover:
  5. * - CLI query parser (parseStructuredQuery)
  6. * - StructuredSubSearch type validation
  7. * - Basic structuredSearch function behavior
  8. *
  9. * Run with: bun test structured-search.test.ts
  10. */
  11. import { describe, test, expect, beforeAll, afterAll } from "vitest";
  12. import { mkdtemp, rm } from "node:fs/promises";
  13. import { tmpdir } from "node:os";
  14. import { join } from "node:path";
  15. import {
  16. createStore,
  17. structuredSearch,
  18. type StructuredSubSearch,
  19. type Store,
  20. } from "../src/store.js";
  21. import { disposeDefaultLlamaCpp } from "../src/llm.js";
  22. // =============================================================================
  23. // parseStructuredQuery Tests (CLI Parser)
  24. // =============================================================================
  25. /**
  26. * Parse structured search query syntax.
  27. * This is a copy of the function from qmd.ts for isolated testing.
  28. */
  29. function parseStructuredQuery(query: string): StructuredSubSearch[] | null {
  30. const lines = query.split('\n').map(l => l.trim()).filter(l => l.length > 0);
  31. if (lines.length === 0) return null;
  32. const prefixRe = /^(lex|vec|hyde):\s*/i;
  33. const searches: StructuredSubSearch[] = [];
  34. const plainLines: string[] = [];
  35. for (const line of lines) {
  36. const match = line.match(prefixRe);
  37. if (match) {
  38. const type = match[1]!.toLowerCase() as 'lex' | 'vec' | 'hyde';
  39. const text = line.slice(match[0].length).trim();
  40. if (text.length > 0) {
  41. searches.push({ type, query: text });
  42. }
  43. } else {
  44. plainLines.push(line);
  45. }
  46. }
  47. // All plain lines, no prefixes -> null (use normal expansion)
  48. if (searches.length === 0 && plainLines.length === 1) {
  49. return null;
  50. }
  51. // Multiple plain lines without prefixes -> ambiguous, error
  52. if (plainLines.length > 1) {
  53. throw new Error("Ambiguous query: multiple lines without lex:/vec:/hyde: prefix.");
  54. }
  55. // Mix of prefixed and one plain line -> treat plain as lex
  56. if (plainLines.length === 1) {
  57. searches.unshift({ type: 'lex', query: plainLines[0]! });
  58. }
  59. return searches.length > 0 ? searches : null;
  60. }
  61. describe("parseStructuredQuery", () => {
  62. describe("plain queries (returns null for normal expansion)", () => {
  63. test("single line without prefix", () => {
  64. expect(parseStructuredQuery("CAP theorem")).toBeNull();
  65. expect(parseStructuredQuery("distributed systems")).toBeNull();
  66. });
  67. test("empty queries", () => {
  68. expect(parseStructuredQuery("")).toBeNull();
  69. expect(parseStructuredQuery(" ")).toBeNull();
  70. expect(parseStructuredQuery("\n\n")).toBeNull();
  71. });
  72. });
  73. describe("single prefixed queries", () => {
  74. test("lex: prefix", () => {
  75. const result = parseStructuredQuery("lex: CAP theorem");
  76. expect(result).toEqual([{ type: "lex", query: "CAP theorem" }]);
  77. });
  78. test("vec: prefix", () => {
  79. const result = parseStructuredQuery("vec: what is the CAP theorem");
  80. expect(result).toEqual([{ type: "vec", query: "what is the CAP theorem" }]);
  81. });
  82. test("hyde: prefix", () => {
  83. const result = parseStructuredQuery("hyde: The CAP theorem states that...");
  84. expect(result).toEqual([{ type: "hyde", query: "The CAP theorem states that..." }]);
  85. });
  86. test("uppercase prefix", () => {
  87. expect(parseStructuredQuery("LEX: keywords")).toEqual([{ type: "lex", query: "keywords" }]);
  88. expect(parseStructuredQuery("VEC: question")).toEqual([{ type: "vec", query: "question" }]);
  89. expect(parseStructuredQuery("HYDE: passage")).toEqual([{ type: "hyde", query: "passage" }]);
  90. });
  91. test("mixed case prefix", () => {
  92. expect(parseStructuredQuery("Lex: test")).toEqual([{ type: "lex", query: "test" }]);
  93. expect(parseStructuredQuery("VeC: test")).toEqual([{ type: "vec", query: "test" }]);
  94. });
  95. });
  96. describe("multiple prefixed queries", () => {
  97. test("lex + vec", () => {
  98. const result = parseStructuredQuery("lex: keywords\nvec: natural language");
  99. expect(result).toEqual([
  100. { type: "lex", query: "keywords" },
  101. { type: "vec", query: "natural language" },
  102. ]);
  103. });
  104. test("all three types", () => {
  105. const result = parseStructuredQuery("lex: keywords\nvec: question\nhyde: hypothetical doc");
  106. expect(result).toEqual([
  107. { type: "lex", query: "keywords" },
  108. { type: "vec", query: "question" },
  109. { type: "hyde", query: "hypothetical doc" },
  110. ]);
  111. });
  112. test("duplicate types allowed", () => {
  113. const result = parseStructuredQuery("lex: term1\nlex: term2\nlex: term3");
  114. expect(result).toEqual([
  115. { type: "lex", query: "term1" },
  116. { type: "lex", query: "term2" },
  117. { type: "lex", query: "term3" },
  118. ]);
  119. });
  120. test("order preserved", () => {
  121. const result = parseStructuredQuery("hyde: passage\nvec: question\nlex: keywords");
  122. expect(result).toEqual([
  123. { type: "hyde", query: "passage" },
  124. { type: "vec", query: "question" },
  125. { type: "lex", query: "keywords" },
  126. ]);
  127. });
  128. });
  129. describe("mixed plain and prefixed", () => {
  130. test("single plain line with prefixed lines -> plain becomes lex first", () => {
  131. const result = parseStructuredQuery("plain keywords\nvec: semantic question");
  132. expect(result).toEqual([
  133. { type: "lex", query: "plain keywords" },
  134. { type: "vec", query: "semantic question" },
  135. ]);
  136. });
  137. test("plain line prepended before other prefixed", () => {
  138. const result = parseStructuredQuery("keywords\nhyde: passage\nvec: question");
  139. expect(result).toEqual([
  140. { type: "lex", query: "keywords" },
  141. { type: "hyde", query: "passage" },
  142. { type: "vec", query: "question" },
  143. ]);
  144. });
  145. });
  146. describe("error cases", () => {
  147. test("multiple plain lines throws", () => {
  148. expect(() => parseStructuredQuery("line one\nline two")).toThrow("Ambiguous query");
  149. });
  150. test("three plain lines throws", () => {
  151. expect(() => parseStructuredQuery("a\nb\nc")).toThrow("Ambiguous query");
  152. });
  153. });
  154. describe("whitespace handling", () => {
  155. test("empty lines ignored", () => {
  156. const result = parseStructuredQuery("lex: keywords\n\nvec: question\n");
  157. expect(result).toEqual([
  158. { type: "lex", query: "keywords" },
  159. { type: "vec", query: "question" },
  160. ]);
  161. });
  162. test("whitespace-only lines ignored", () => {
  163. const result = parseStructuredQuery("lex: keywords\n \nvec: question");
  164. expect(result).toEqual([
  165. { type: "lex", query: "keywords" },
  166. { type: "vec", query: "question" },
  167. ]);
  168. });
  169. test("leading/trailing whitespace trimmed from lines", () => {
  170. const result = parseStructuredQuery(" lex: keywords \n vec: question ");
  171. expect(result).toEqual([
  172. { type: "lex", query: "keywords" },
  173. { type: "vec", query: "question" },
  174. ]);
  175. });
  176. test("internal whitespace preserved in query", () => {
  177. const result = parseStructuredQuery("lex: multiple spaces ");
  178. expect(result).toEqual([{ type: "lex", query: "multiple spaces" }]);
  179. });
  180. test("empty prefix value skipped", () => {
  181. const result = parseStructuredQuery("lex: \nvec: actual query");
  182. expect(result).toEqual([{ type: "vec", query: "actual query" }]);
  183. });
  184. test("only empty prefix values returns null", () => {
  185. const result = parseStructuredQuery("lex: \nvec: \nhyde: ");
  186. expect(result).toBeNull();
  187. });
  188. });
  189. describe("edge cases", () => {
  190. test("colon in query text preserved", () => {
  191. const result = parseStructuredQuery("lex: time: 12:30 PM");
  192. expect(result).toEqual([{ type: "lex", query: "time: 12:30 PM" }]);
  193. });
  194. test("prefix-like text in query preserved", () => {
  195. const result = parseStructuredQuery("vec: what does lex: mean");
  196. expect(result).toEqual([{ type: "vec", query: "what does lex: mean" }]);
  197. });
  198. test("newline in hyde passage (as single line)", () => {
  199. // If user wants actual newlines in hyde, they need to escape or use multiline syntax
  200. const result = parseStructuredQuery("hyde: The answer is X. It means Y.");
  201. expect(result).toEqual([{ type: "hyde", query: "The answer is X. It means Y." }]);
  202. });
  203. });
  204. });
  205. // =============================================================================
  206. // StructuredSubSearch Type Tests
  207. // =============================================================================
  208. describe("StructuredSubSearch type", () => {
  209. test("accepts lex type", () => {
  210. const search: StructuredSubSearch = { type: "lex", query: "test" };
  211. expect(search.type).toBe("lex");
  212. expect(search.query).toBe("test");
  213. });
  214. test("accepts vec type", () => {
  215. const search: StructuredSubSearch = { type: "vec", query: "test" };
  216. expect(search.type).toBe("vec");
  217. expect(search.query).toBe("test");
  218. });
  219. test("accepts hyde type", () => {
  220. const search: StructuredSubSearch = { type: "hyde", query: "test" };
  221. expect(search.type).toBe("hyde");
  222. expect(search.query).toBe("test");
  223. });
  224. });
  225. // =============================================================================
  226. // structuredSearch Function Tests
  227. // =============================================================================
  228. describe("structuredSearch", () => {
  229. let testDir: string;
  230. let store: Store;
  231. beforeAll(async () => {
  232. testDir = await mkdtemp(join(tmpdir(), "qmd-structured-test-"));
  233. const testDbPath = join(testDir, "test.sqlite");
  234. const testConfigDir = await mkdtemp(join(testDir, "config-"));
  235. process.env.QMD_CONFIG_DIR = testConfigDir;
  236. store = createStore(testDbPath);
  237. });
  238. afterAll(async () => {
  239. store.close();
  240. await disposeDefaultLlamaCpp();
  241. if (testDir) {
  242. await rm(testDir, { recursive: true, force: true });
  243. }
  244. });
  245. test("returns empty array for empty searches", async () => {
  246. const results = await structuredSearch(store, []);
  247. expect(results).toEqual([]);
  248. });
  249. test("returns empty array when no documents match", async () => {
  250. const results = await structuredSearch(store, [
  251. { type: "lex", query: "nonexistent-term-xyz123" }
  252. ]);
  253. expect(results).toEqual([]);
  254. });
  255. test("accepts all search types without error", async () => {
  256. // These may return empty results but should not throw
  257. await expect(structuredSearch(store, [{ type: "lex", query: "test" }])).resolves.toBeDefined();
  258. // vec and hyde require embeddings, so just test lex
  259. });
  260. test("respects limit option", async () => {
  261. const results = await structuredSearch(store, [
  262. { type: "lex", query: "test" }
  263. ], { limit: 5 });
  264. expect(results.length).toBeLessThanOrEqual(5);
  265. });
  266. test("respects minScore option", async () => {
  267. const results = await structuredSearch(store, [
  268. { type: "lex", query: "test" }
  269. ], { minScore: 0.5 });
  270. for (const r of results) {
  271. expect(r.score).toBeGreaterThanOrEqual(0.5);
  272. }
  273. });
  274. });