store.test.ts 60 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808
  1. /**
  2. * store.test.ts - Comprehensive unit tests for the QMD store module
  3. *
  4. * Run with: bun test store.test.ts
  5. *
  6. * Ollama is mocked - tests will fail if any real Ollama calls are made.
  7. */
  8. import { describe, test, expect, beforeAll, afterAll, beforeEach, afterEach, mock, spyOn } from "bun:test";
  9. import { Database } from "bun:sqlite";
  10. import { unlink, mkdtemp, rmdir } from "node:fs/promises";
  11. import { tmpdir } from "node:os";
  12. import { join } from "node:path";
  13. import {
  14. createStore,
  15. getDefaultDbPath,
  16. homedir,
  17. resolve,
  18. getPwd,
  19. getRealPath,
  20. hashContent,
  21. extractTitle,
  22. formatQueryForEmbedding,
  23. formatDocForEmbedding,
  24. chunkDocument,
  25. reciprocalRankFusion,
  26. extractSnippet,
  27. getCacheKey,
  28. OLLAMA_URL,
  29. type Store,
  30. type DocumentResult,
  31. type SearchResult,
  32. type RankedResult,
  33. } from "./store.js";
  34. // =============================================================================
  35. // Ollama Mocking
  36. // =============================================================================
  37. // Track original fetch
  38. const originalFetch = globalThis.fetch;
  39. // Mock responses for different Ollama endpoints
  40. const mockOllamaResponses: Record<string, (body: unknown) => Response> = {
  41. "/api/embed": (body: unknown) => {
  42. // Return mock embeddings (768 dimensions)
  43. const embedding = Array(768).fill(0).map(() => Math.random());
  44. return new Response(JSON.stringify({ embeddings: [embedding] }), {
  45. status: 200,
  46. headers: { "Content-Type": "application/json" },
  47. });
  48. },
  49. "/api/generate": (body: unknown) => {
  50. const reqBody = body as { prompt?: string };
  51. // Check if this is a rerank request or query expansion
  52. if (reqBody.prompt?.includes("yes") || reqBody.prompt?.includes("no") || reqBody.prompt?.includes("Judge")) {
  53. // Rerank response
  54. return new Response(JSON.stringify({
  55. response: "yes",
  56. logprobs: [{ token: "yes", logprob: -0.1 }],
  57. }), {
  58. status: 200,
  59. headers: { "Content-Type": "application/json" },
  60. });
  61. } else {
  62. // Query expansion response
  63. return new Response(JSON.stringify({
  64. response: "expanded query variation 1\nexpanded query variation 2",
  65. }), {
  66. status: 200,
  67. headers: { "Content-Type": "application/json" },
  68. });
  69. }
  70. },
  71. "/api/show": () => {
  72. // Model exists
  73. return new Response(JSON.stringify({ modelfile: "exists" }), {
  74. status: 200,
  75. headers: { "Content-Type": "application/json" },
  76. });
  77. },
  78. };
  79. // Install mock fetch that intercepts Ollama calls
  80. function installOllamaMock(): void {
  81. globalThis.fetch = async (input: RequestInfo | URL, init?: RequestInit): Promise<Response> => {
  82. const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
  83. // Check if this is an Ollama URL
  84. if (url.startsWith(OLLAMA_URL)) {
  85. const path = url.replace(OLLAMA_URL, "");
  86. const mockHandler = mockOllamaResponses[path];
  87. if (mockHandler) {
  88. const body = init?.body ? JSON.parse(init.body as string) : {};
  89. return mockHandler(body);
  90. }
  91. // Unknown Ollama endpoint - fail the test
  92. throw new Error(`TEST ERROR: Unmocked Ollama endpoint called: ${path}`);
  93. }
  94. // Non-Ollama URLs fail (we shouldn't be making other network calls in tests)
  95. throw new Error(`TEST ERROR: Unexpected network call to: ${url}`);
  96. };
  97. }
  98. // Restore original fetch
  99. function restoreOllamaMock(): void {
  100. globalThis.fetch = originalFetch;
  101. }
  102. // Install mock before all tests
  103. beforeAll(() => {
  104. installOllamaMock();
  105. });
  106. // Restore after all tests
  107. afterAll(() => {
  108. restoreOllamaMock();
  109. });
  110. // =============================================================================
  111. // Test Utilities
  112. // =============================================================================
  113. let testDir: string;
  114. let testDbPath: string;
  115. async function createTestStore(): Promise<Store> {
  116. testDbPath = join(testDir, `test-${Date.now()}-${Math.random().toString(36).slice(2)}.sqlite`);
  117. return createStore(testDbPath);
  118. }
  119. async function cleanupTestDb(store: Store): Promise<void> {
  120. store.close();
  121. try {
  122. await unlink(store.dbPath);
  123. } catch {
  124. // Ignore if file doesn't exist
  125. }
  126. }
  127. // Helper to insert a test document directly into the database
  128. function insertTestDocument(
  129. db: Database,
  130. collectionId: number,
  131. opts: {
  132. name?: string;
  133. title?: string;
  134. hash?: string;
  135. filepath?: string;
  136. displayPath?: string;
  137. body?: string;
  138. active?: number;
  139. }
  140. ): number {
  141. const now = new Date().toISOString();
  142. const name = opts.name || "test-doc";
  143. const title = opts.title || "Test Document";
  144. const hash = opts.hash || `hash-${Date.now()}-${Math.random().toString(36).slice(2)}`;
  145. const filepath = opts.filepath || `/test/path/${name}.md`;
  146. const displayPath = opts.displayPath || `test/${name}.md`;
  147. const body = opts.body || "# Test Document\n\nThis is test content.";
  148. const active = opts.active ?? 1;
  149. const result = db.prepare(`
  150. INSERT INTO documents (collection_id, name, title, hash, filepath, display_path, body, created_at, modified_at, active)
  151. VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
  152. `).run(collectionId, name, title, hash, filepath, displayPath, body, now, now, active);
  153. return Number(result.lastInsertRowid);
  154. }
  155. // Helper to create a test collection
  156. function createTestCollection(db: Database, pwd: string = "/test/collection", glob: string = "**/*.md"): number {
  157. const now = new Date().toISOString();
  158. const result = db.prepare(`
  159. INSERT INTO collections (pwd, glob_pattern, created_at)
  160. VALUES (?, ?, ?)
  161. `).run(pwd, glob, now);
  162. return Number(result.lastInsertRowid);
  163. }
  164. // Helper to add path context
  165. function addPathContext(db: Database, pathPrefix: string, context: string): void {
  166. const now = new Date().toISOString();
  167. db.prepare(`
  168. INSERT OR REPLACE INTO path_contexts (path_prefix, context, created_at)
  169. VALUES (?, ?, ?)
  170. `).run(pathPrefix, context, now);
  171. }
  172. // =============================================================================
  173. // Test Setup
  174. // =============================================================================
  175. beforeAll(async () => {
  176. testDir = await mkdtemp(join(tmpdir(), "qmd-test-"));
  177. });
  178. afterAll(async () => {
  179. try {
  180. // Clean up test directory
  181. const { readdir, unlink } = await import("node:fs/promises");
  182. const files = await readdir(testDir);
  183. for (const file of files) {
  184. await unlink(join(testDir, file));
  185. }
  186. await rmdir(testDir);
  187. } catch {
  188. // Ignore cleanup errors
  189. }
  190. });
  191. // =============================================================================
  192. // Path Utilities Tests
  193. // =============================================================================
  194. describe("Path Utilities", () => {
  195. test("homedir returns HOME environment variable", () => {
  196. const result = homedir();
  197. expect(result).toBe(Bun.env.HOME || "/tmp");
  198. });
  199. test("resolve handles absolute paths", () => {
  200. expect(resolve("/foo/bar")).toBe("/foo/bar");
  201. expect(resolve("/foo", "/bar")).toBe("/bar");
  202. });
  203. test("resolve handles relative paths", () => {
  204. const pwd = Bun.env.PWD || process.cwd();
  205. expect(resolve("foo")).toBe(`${pwd}/foo`);
  206. expect(resolve("foo", "bar")).toBe(`${pwd}/foo/bar`);
  207. });
  208. test("resolve normalizes . and ..", () => {
  209. expect(resolve("/foo/bar/./baz")).toBe("/foo/bar/baz");
  210. expect(resolve("/foo/bar/../baz")).toBe("/foo/baz");
  211. expect(resolve("/foo/bar/../../baz")).toBe("/baz");
  212. });
  213. test("getDefaultDbPath returns expected path structure", () => {
  214. const defaultPath = getDefaultDbPath();
  215. expect(defaultPath).toContain(".cache/qmd/index.sqlite");
  216. const customPath = getDefaultDbPath("custom");
  217. expect(customPath).toContain(".cache/qmd/custom.sqlite");
  218. });
  219. test("getPwd returns current working directory", () => {
  220. const pwd = getPwd();
  221. expect(pwd).toBeTruthy();
  222. expect(typeof pwd).toBe("string");
  223. });
  224. test("getRealPath resolves symlinks", () => {
  225. const result = getRealPath("/tmp");
  226. expect(result).toBeTruthy();
  227. // On macOS, /tmp is a symlink to /private/tmp
  228. expect(result === "/tmp" || result === "/private/tmp").toBe(true);
  229. });
  230. });
  231. // =============================================================================
  232. // Store Creation Tests
  233. // =============================================================================
  234. describe("Store Creation", () => {
  235. test("createStore creates a new store with default path", () => {
  236. const store = createStore();
  237. expect(store).toBeDefined();
  238. expect(store.db).toBeDefined();
  239. expect(store.dbPath).toContain(".cache/qmd/index.sqlite");
  240. store.close();
  241. });
  242. test("createStore creates a new store with custom path", async () => {
  243. const store = await createTestStore();
  244. expect(store.dbPath).toBe(testDbPath);
  245. expect(store.db).toBeInstanceOf(Database);
  246. await cleanupTestDb(store);
  247. });
  248. test("createStore initializes database schema", async () => {
  249. const store = await createTestStore();
  250. // Check tables exist
  251. const tables = store.db.prepare(`
  252. SELECT name FROM sqlite_master WHERE type='table' ORDER BY name
  253. `).all() as { name: string }[];
  254. const tableNames = tables.map(t => t.name);
  255. expect(tableNames).toContain("collections");
  256. expect(tableNames).toContain("documents");
  257. expect(tableNames).toContain("documents_fts");
  258. expect(tableNames).toContain("content_vectors");
  259. expect(tableNames).toContain("path_contexts");
  260. expect(tableNames).toContain("ollama_cache");
  261. await cleanupTestDb(store);
  262. });
  263. test("createStore sets WAL journal mode", async () => {
  264. const store = await createTestStore();
  265. const result = store.db.prepare("PRAGMA journal_mode").get() as { journal_mode: string };
  266. expect(result.journal_mode).toBe("wal");
  267. await cleanupTestDb(store);
  268. });
  269. test("store.close closes the database connection", async () => {
  270. const store = await createTestStore();
  271. store.close();
  272. // Attempting to use db after close should throw
  273. expect(() => store.db.prepare("SELECT 1").get()).toThrow();
  274. try {
  275. await unlink(testDbPath);
  276. } catch {}
  277. });
  278. });
  279. // =============================================================================
  280. // Document Hashing & Title Extraction Tests
  281. // =============================================================================
  282. describe("Document Helpers", () => {
  283. test("hashContent produces consistent SHA256 hashes", async () => {
  284. const content = "Hello, World!";
  285. const hash1 = await hashContent(content);
  286. const hash2 = await hashContent(content);
  287. expect(hash1).toBe(hash2);
  288. expect(hash1).toMatch(/^[a-f0-9]{64}$/);
  289. });
  290. test("hashContent produces different hashes for different content", async () => {
  291. const hash1 = await hashContent("Hello");
  292. const hash2 = await hashContent("World");
  293. expect(hash1).not.toBe(hash2);
  294. });
  295. test("extractTitle extracts H1 heading", () => {
  296. const content = "# My Title\n\nSome content here.";
  297. expect(extractTitle(content, "file.md")).toBe("My Title");
  298. });
  299. test("extractTitle extracts H2 heading if no H1", () => {
  300. const content = "## My Subtitle\n\nSome content here.";
  301. expect(extractTitle(content, "file.md")).toBe("My Subtitle");
  302. });
  303. test("extractTitle falls back to filename", () => {
  304. const content = "Just some plain text without headings.";
  305. expect(extractTitle(content, "my-document.md")).toBe("my-document");
  306. });
  307. test("extractTitle skips generic 'Notes' heading", () => {
  308. const content = "# Notes\n\n## Actual Title\n\nContent";
  309. expect(extractTitle(content, "file.md")).toBe("Actual Title");
  310. });
  311. test("extractTitle handles 📝 Notes heading", () => {
  312. const content = "# 📝 Notes\n\n## Meeting Summary\n\nContent";
  313. expect(extractTitle(content, "file.md")).toBe("Meeting Summary");
  314. });
  315. });
  316. // =============================================================================
  317. // Embedding Format Tests
  318. // =============================================================================
  319. describe("Embedding Formatting", () => {
  320. test("formatQueryForEmbedding adds search task prefix", () => {
  321. const formatted = formatQueryForEmbedding("how to deploy");
  322. expect(formatted).toBe("task: search result | query: how to deploy");
  323. });
  324. test("formatDocForEmbedding adds title and text prefix", () => {
  325. const formatted = formatDocForEmbedding("Some content", "My Title");
  326. expect(formatted).toBe("title: My Title | text: Some content");
  327. });
  328. test("formatDocForEmbedding handles missing title", () => {
  329. const formatted = formatDocForEmbedding("Some content");
  330. expect(formatted).toBe("title: none | text: Some content");
  331. });
  332. });
  333. // =============================================================================
  334. // Document Chunking Tests
  335. // =============================================================================
  336. describe("Document Chunking", () => {
  337. test("chunkDocument returns single chunk for small documents", () => {
  338. const content = "Small document content";
  339. const chunks = chunkDocument(content, 1000);
  340. expect(chunks).toHaveLength(1);
  341. expect(chunks[0].text).toBe(content);
  342. expect(chunks[0].pos).toBe(0);
  343. });
  344. test("chunkDocument splits large documents", () => {
  345. const content = "A".repeat(10000);
  346. const chunks = chunkDocument(content, 1000);
  347. expect(chunks.length).toBeGreaterThan(1);
  348. // All chunks should have correct positions
  349. for (let i = 0; i < chunks.length; i++) {
  350. expect(chunks[i].pos).toBeGreaterThanOrEqual(0);
  351. if (i > 0) {
  352. expect(chunks[i].pos).toBeGreaterThan(chunks[i - 1].pos);
  353. }
  354. }
  355. });
  356. test("chunkDocument prefers paragraph breaks", () => {
  357. const content = "First paragraph.\n\nSecond paragraph.\n\nThird paragraph.".repeat(50);
  358. const chunks = chunkDocument(content, 500);
  359. // Chunks should end at paragraph breaks when possible
  360. for (const chunk of chunks.slice(0, -1)) {
  361. // Most chunks should end near a paragraph break
  362. const endsNearParagraph = chunk.text.endsWith("\n\n") ||
  363. chunk.text.endsWith(".") ||
  364. chunk.text.endsWith("\n");
  365. // This is a soft check - not all chunks can end at breaks
  366. }
  367. expect(chunks.length).toBeGreaterThan(1);
  368. });
  369. test("chunkDocument handles UTF-8 characters correctly", () => {
  370. const content = "こんにちは世界".repeat(500); // Japanese text
  371. const chunks = chunkDocument(content, 1000);
  372. // Should not split in the middle of a multi-byte character
  373. for (const chunk of chunks) {
  374. expect(() => new TextEncoder().encode(chunk.text)).not.toThrow();
  375. }
  376. });
  377. });
  378. // =============================================================================
  379. // Caching Tests
  380. // =============================================================================
  381. describe("Caching", () => {
  382. test("getCacheKey generates consistent keys", () => {
  383. const key1 = getCacheKey("http://example.com", { query: "test" });
  384. const key2 = getCacheKey("http://example.com", { query: "test" });
  385. expect(key1).toBe(key2);
  386. expect(key1).toMatch(/^[a-f0-9]{64}$/);
  387. });
  388. test("getCacheKey generates different keys for different inputs", () => {
  389. const key1 = getCacheKey("http://example.com", { query: "test1" });
  390. const key2 = getCacheKey("http://example.com", { query: "test2" });
  391. expect(key1).not.toBe(key2);
  392. });
  393. test("store cache operations work correctly", async () => {
  394. const store = await createTestStore();
  395. const key = "test-cache-key";
  396. const value = "cached result";
  397. // Initially empty
  398. expect(store.getCachedResult(key)).toBeNull();
  399. // Set cache
  400. store.setCachedResult(key, value);
  401. // Retrieve cache
  402. expect(store.getCachedResult(key)).toBe(value);
  403. // Clear cache
  404. store.clearCache();
  405. expect(store.getCachedResult(key)).toBeNull();
  406. await cleanupTestDb(store);
  407. });
  408. });
  409. // =============================================================================
  410. // Context Tests
  411. // =============================================================================
  412. describe("Path Context", () => {
  413. test("getContextForFile returns null when no context set", async () => {
  414. const store = await createTestStore();
  415. const context = store.getContextForFile("/some/random/path.md");
  416. expect(context).toBeNull();
  417. await cleanupTestDb(store);
  418. });
  419. test("getContextForFile returns matching context", async () => {
  420. const store = await createTestStore();
  421. addPathContext(store.db, "/test/docs", "Documentation files");
  422. const context = store.getContextForFile("/test/docs/readme.md");
  423. expect(context).toBe("Documentation files");
  424. await cleanupTestDb(store);
  425. });
  426. test("getContextForFile returns most specific context", async () => {
  427. const store = await createTestStore();
  428. addPathContext(store.db, "/test", "General test files");
  429. addPathContext(store.db, "/test/docs", "Documentation files");
  430. addPathContext(store.db, "/test/docs/api", "API documentation");
  431. expect(store.getContextForFile("/test/readme.md")).toBe("General test files");
  432. expect(store.getContextForFile("/test/docs/guide.md")).toBe("Documentation files");
  433. expect(store.getContextForFile("/test/docs/api/reference.md")).toBe("API documentation");
  434. await cleanupTestDb(store);
  435. });
  436. });
  437. // =============================================================================
  438. // Collection Tests
  439. // =============================================================================
  440. describe("Collections", () => {
  441. test("getCollectionIdByName finds collection by path suffix", async () => {
  442. const store = await createTestStore();
  443. const collectionId = createTestCollection(store.db, "/home/user/projects/myapp", "**/*.md");
  444. const found = store.getCollectionIdByName("myapp");
  445. expect(found).toBe(collectionId);
  446. await cleanupTestDb(store);
  447. });
  448. test("getCollectionIdByName returns null for non-existent collection", async () => {
  449. const store = await createTestStore();
  450. const found = store.getCollectionIdByName("nonexistent");
  451. expect(found).toBeNull();
  452. await cleanupTestDb(store);
  453. });
  454. });
  455. // =============================================================================
  456. // FTS Search Tests
  457. // =============================================================================
  458. describe("FTS Search", () => {
  459. test("searchFTS returns empty array for no matches", async () => {
  460. const store = await createTestStore();
  461. const collectionId = createTestCollection(store.db);
  462. insertTestDocument(store.db, collectionId, {
  463. name: "doc1",
  464. body: "The quick brown fox jumps over the lazy dog",
  465. });
  466. const results = store.searchFTS("nonexistent-term-xyz", 10);
  467. expect(results).toHaveLength(0);
  468. await cleanupTestDb(store);
  469. });
  470. test("searchFTS finds documents by keyword", async () => {
  471. const store = await createTestStore();
  472. const collectionId = createTestCollection(store.db);
  473. insertTestDocument(store.db, collectionId, {
  474. name: "doc1",
  475. title: "Fox Document",
  476. body: "The quick brown fox jumps over the lazy dog",
  477. displayPath: "test/doc1.md",
  478. });
  479. const results = store.searchFTS("fox", 10);
  480. expect(results.length).toBeGreaterThan(0);
  481. expect(results[0].displayPath).toBe("test/doc1.md");
  482. expect(results[0].source).toBe("fts");
  483. await cleanupTestDb(store);
  484. });
  485. test("searchFTS ranks title matches higher", async () => {
  486. const store = await createTestStore();
  487. const collectionId = createTestCollection(store.db);
  488. // Document with "fox" in body only
  489. insertTestDocument(store.db, collectionId, {
  490. name: "body-match",
  491. title: "Some Other Title",
  492. body: "The fox is here in the body",
  493. displayPath: "test/body.md",
  494. });
  495. // Document with "fox" in title (via name field which is indexed)
  496. insertTestDocument(store.db, collectionId, {
  497. name: "fox",
  498. title: "Fox Title",
  499. body: "Different content without the animal",
  500. displayPath: "test/title.md",
  501. });
  502. const results = store.searchFTS("fox", 10);
  503. expect(results.length).toBe(2);
  504. // Title/name match should rank higher due to BM25 weights
  505. expect(results[0].displayPath).toBe("test/title.md");
  506. await cleanupTestDb(store);
  507. });
  508. test("searchFTS respects limit parameter", async () => {
  509. const store = await createTestStore();
  510. const collectionId = createTestCollection(store.db);
  511. // Insert 10 documents
  512. for (let i = 0; i < 10; i++) {
  513. insertTestDocument(store.db, collectionId, {
  514. name: `doc${i}`,
  515. body: "common keyword appears here",
  516. displayPath: `test/doc${i}.md`,
  517. });
  518. }
  519. const results = store.searchFTS("common keyword", 3);
  520. expect(results).toHaveLength(3);
  521. await cleanupTestDb(store);
  522. });
  523. test("searchFTS filters by collectionId", async () => {
  524. const store = await createTestStore();
  525. const collection1 = createTestCollection(store.db, "/path/one", "**/*.md");
  526. const collection2 = createTestCollection(store.db, "/path/two", "**/*.md");
  527. insertTestDocument(store.db, collection1, {
  528. name: "doc1",
  529. body: "searchable content",
  530. displayPath: "one/doc1.md",
  531. });
  532. insertTestDocument(store.db, collection2, {
  533. name: "doc2",
  534. body: "searchable content",
  535. displayPath: "two/doc2.md",
  536. });
  537. const allResults = store.searchFTS("searchable", 10);
  538. expect(allResults).toHaveLength(2);
  539. const filtered = store.searchFTS("searchable", 10, collection1);
  540. expect(filtered).toHaveLength(1);
  541. expect(filtered[0].displayPath).toBe("one/doc1.md");
  542. await cleanupTestDb(store);
  543. });
  544. test("searchFTS handles special characters in query", async () => {
  545. const store = await createTestStore();
  546. const collectionId = createTestCollection(store.db);
  547. insertTestDocument(store.db, collectionId, {
  548. name: "doc1",
  549. body: "Function with params: foo(bar, baz)",
  550. displayPath: "test/doc1.md",
  551. });
  552. // Should not throw on special characters
  553. const results = store.searchFTS("foo(bar)", 10);
  554. // Results may vary based on FTS5 handling
  555. expect(Array.isArray(results)).toBe(true);
  556. await cleanupTestDb(store);
  557. });
  558. test("searchFTS ignores inactive documents", async () => {
  559. const store = await createTestStore();
  560. const collectionId = createTestCollection(store.db);
  561. insertTestDocument(store.db, collectionId, {
  562. name: "active",
  563. body: "findme content",
  564. displayPath: "test/active.md",
  565. active: 1,
  566. });
  567. insertTestDocument(store.db, collectionId, {
  568. name: "inactive",
  569. body: "findme content",
  570. displayPath: "test/inactive.md",
  571. active: 0,
  572. });
  573. const results = store.searchFTS("findme", 10);
  574. expect(results).toHaveLength(1);
  575. expect(results[0].displayPath).toBe("test/active.md");
  576. await cleanupTestDb(store);
  577. });
  578. });
  579. // =============================================================================
  580. // Document Retrieval Tests
  581. // =============================================================================
  582. describe("Document Retrieval", () => {
  583. describe("findDocument", () => {
  584. test("findDocument finds by exact filepath", async () => {
  585. const store = await createTestStore();
  586. const collectionId = createTestCollection(store.db);
  587. insertTestDocument(store.db, collectionId, {
  588. name: "mydoc",
  589. title: "My Document",
  590. filepath: "/exact/path/mydoc.md",
  591. displayPath: "path/mydoc.md",
  592. body: "Document content here",
  593. });
  594. const result = store.findDocument("/exact/path/mydoc.md");
  595. expect("error" in result).toBe(false);
  596. if (!("error" in result)) {
  597. expect(result.title).toBe("My Document");
  598. expect(result.displayPath).toBe("path/mydoc.md");
  599. expect(result.body).toBeUndefined(); // body not included by default
  600. }
  601. await cleanupTestDb(store);
  602. });
  603. test("findDocument finds by display_path", async () => {
  604. const store = await createTestStore();
  605. const collectionId = createTestCollection(store.db);
  606. insertTestDocument(store.db, collectionId, {
  607. name: "mydoc",
  608. filepath: "/some/path/mydoc.md",
  609. displayPath: "docs/mydoc.md",
  610. });
  611. const result = store.findDocument("docs/mydoc.md");
  612. expect("error" in result).toBe(false);
  613. await cleanupTestDb(store);
  614. });
  615. test("findDocument finds by partial path match", async () => {
  616. const store = await createTestStore();
  617. const collectionId = createTestCollection(store.db);
  618. insertTestDocument(store.db, collectionId, {
  619. name: "mydoc",
  620. filepath: "/very/long/path/to/mydoc.md",
  621. displayPath: "path/to/mydoc.md",
  622. });
  623. const result = store.findDocument("mydoc.md");
  624. expect("error" in result).toBe(false);
  625. await cleanupTestDb(store);
  626. });
  627. test("findDocument includes body when requested", async () => {
  628. const store = await createTestStore();
  629. const collectionId = createTestCollection(store.db);
  630. insertTestDocument(store.db, collectionId, {
  631. name: "mydoc",
  632. filepath: "/path/mydoc.md",
  633. body: "The actual body content",
  634. });
  635. const result = store.findDocument("/path/mydoc.md", { includeBody: true });
  636. expect("error" in result).toBe(false);
  637. if (!("error" in result)) {
  638. expect(result.body).toBe("The actual body content");
  639. }
  640. await cleanupTestDb(store);
  641. });
  642. test("findDocument returns error with suggestions for not found", async () => {
  643. const store = await createTestStore();
  644. const collectionId = createTestCollection(store.db);
  645. insertTestDocument(store.db, collectionId, {
  646. name: "similar",
  647. filepath: "/path/similar.md",
  648. displayPath: "similar.md",
  649. });
  650. const result = store.findDocument("simlar.md"); // typo - 1 char diff
  651. expect("error" in result).toBe(true);
  652. if ("error" in result) {
  653. expect(result.error).toBe("not_found");
  654. // Levenshtein distance of 1 should be found with maxDistance 3
  655. expect(result.similarFiles.length).toBeGreaterThanOrEqual(0); // May or may not find depending on distance calc
  656. }
  657. await cleanupTestDb(store);
  658. });
  659. test("findDocument handles :line suffix", async () => {
  660. const store = await createTestStore();
  661. const collectionId = createTestCollection(store.db);
  662. insertTestDocument(store.db, collectionId, {
  663. name: "mydoc",
  664. filepath: "/path/mydoc.md",
  665. displayPath: "mydoc.md",
  666. });
  667. const result = store.findDocument("mydoc.md:100");
  668. expect("error" in result).toBe(false);
  669. await cleanupTestDb(store);
  670. });
  671. test("findDocument expands ~ to home directory", async () => {
  672. const store = await createTestStore();
  673. const collectionId = createTestCollection(store.db);
  674. const home = homedir();
  675. insertTestDocument(store.db, collectionId, {
  676. name: "mydoc",
  677. filepath: `${home}/docs/mydoc.md`,
  678. displayPath: "docs/mydoc.md",
  679. });
  680. const result = store.findDocument("~/docs/mydoc.md");
  681. expect("error" in result).toBe(false);
  682. await cleanupTestDb(store);
  683. });
  684. test("findDocument includes context from path_contexts", async () => {
  685. const store = await createTestStore();
  686. const collectionId = createTestCollection(store.db);
  687. addPathContext(store.db, "/path/docs", "Documentation");
  688. insertTestDocument(store.db, collectionId, {
  689. name: "mydoc",
  690. filepath: "/path/docs/mydoc.md",
  691. displayPath: "docs/mydoc.md",
  692. });
  693. const result = store.findDocument("/path/docs/mydoc.md");
  694. expect("error" in result).toBe(false);
  695. if (!("error" in result)) {
  696. expect(result.context).toBe("Documentation");
  697. }
  698. await cleanupTestDb(store);
  699. });
  700. });
  701. describe("getDocumentBody", () => {
  702. test("getDocumentBody returns full body", async () => {
  703. const store = await createTestStore();
  704. const collectionId = createTestCollection(store.db);
  705. insertTestDocument(store.db, collectionId, {
  706. name: "mydoc",
  707. filepath: "/path/mydoc.md",
  708. body: "Line 1\nLine 2\nLine 3\nLine 4\nLine 5",
  709. });
  710. const body = store.getDocumentBody({ filepath: "/path/mydoc.md" });
  711. expect(body).toBe("Line 1\nLine 2\nLine 3\nLine 4\nLine 5");
  712. await cleanupTestDb(store);
  713. });
  714. test("getDocumentBody supports line range", async () => {
  715. const store = await createTestStore();
  716. const collectionId = createTestCollection(store.db);
  717. insertTestDocument(store.db, collectionId, {
  718. name: "mydoc",
  719. filepath: "/path/mydoc.md",
  720. body: "Line 1\nLine 2\nLine 3\nLine 4\nLine 5",
  721. });
  722. const body = store.getDocumentBody({ filepath: "/path/mydoc.md" }, 2, 2);
  723. expect(body).toBe("Line 2\nLine 3");
  724. await cleanupTestDb(store);
  725. });
  726. test("getDocumentBody returns null for non-existent document", async () => {
  727. const store = await createTestStore();
  728. const body = store.getDocumentBody({ filepath: "/nonexistent.md" });
  729. expect(body).toBeNull();
  730. await cleanupTestDb(store);
  731. });
  732. });
  733. describe("findDocuments (multi-get)", () => {
  734. test("findDocuments finds by glob pattern", async () => {
  735. const store = await createTestStore();
  736. const collectionId = createTestCollection(store.db);
  737. insertTestDocument(store.db, collectionId, {
  738. name: "doc1",
  739. filepath: "/path/journals/2024-01.md",
  740. displayPath: "journals/2024-01.md",
  741. });
  742. insertTestDocument(store.db, collectionId, {
  743. name: "doc2",
  744. filepath: "/path/journals/2024-02.md",
  745. displayPath: "journals/2024-02.md",
  746. });
  747. insertTestDocument(store.db, collectionId, {
  748. name: "doc3",
  749. filepath: "/path/other/file.md",
  750. displayPath: "other/file.md",
  751. });
  752. const { docs, errors } = store.findDocuments("journals/2024-*.md");
  753. expect(errors).toHaveLength(0);
  754. expect(docs).toHaveLength(2);
  755. await cleanupTestDb(store);
  756. });
  757. test("findDocuments finds by comma-separated list", async () => {
  758. const store = await createTestStore();
  759. const collectionId = createTestCollection(store.db);
  760. insertTestDocument(store.db, collectionId, {
  761. name: "doc1",
  762. filepath: "/path/doc1.md",
  763. displayPath: "doc1.md",
  764. });
  765. insertTestDocument(store.db, collectionId, {
  766. name: "doc2",
  767. filepath: "/path/doc2.md",
  768. displayPath: "doc2.md",
  769. });
  770. const { docs, errors } = store.findDocuments("doc1.md, doc2.md");
  771. expect(errors).toHaveLength(0);
  772. expect(docs).toHaveLength(2);
  773. await cleanupTestDb(store);
  774. });
  775. test("findDocuments reports errors for not found files", async () => {
  776. const store = await createTestStore();
  777. const collectionId = createTestCollection(store.db);
  778. insertTestDocument(store.db, collectionId, {
  779. name: "doc1",
  780. filepath: "/path/doc1.md",
  781. displayPath: "doc1.md",
  782. });
  783. const { docs, errors } = store.findDocuments("doc1.md, nonexistent.md");
  784. expect(docs).toHaveLength(1);
  785. expect(errors).toHaveLength(1);
  786. expect(errors[0]).toContain("not found");
  787. await cleanupTestDb(store);
  788. });
  789. test("findDocuments skips large files", async () => {
  790. const store = await createTestStore();
  791. const collectionId = createTestCollection(store.db);
  792. insertTestDocument(store.db, collectionId, {
  793. name: "large",
  794. filepath: "/path/large.md",
  795. displayPath: "large.md",
  796. body: "x".repeat(20000), // 20KB
  797. });
  798. const { docs } = store.findDocuments("large.md", { maxBytes: 10000 });
  799. expect(docs).toHaveLength(1);
  800. expect(docs[0].skipped).toBe(true);
  801. if (docs[0].skipped) {
  802. expect(docs[0].skipReason).toContain("too large");
  803. }
  804. await cleanupTestDb(store);
  805. });
  806. test("findDocuments includes body when requested", async () => {
  807. const store = await createTestStore();
  808. const collectionId = createTestCollection(store.db);
  809. insertTestDocument(store.db, collectionId, {
  810. name: "doc1",
  811. filepath: "/path/doc1.md",
  812. displayPath: "doc1.md",
  813. body: "The content",
  814. });
  815. const { docs } = store.findDocuments("doc1.md", { includeBody: true });
  816. expect(docs[0].skipped).toBe(false);
  817. if (!docs[0].skipped) {
  818. expect(docs[0].doc.body).toBe("The content");
  819. }
  820. await cleanupTestDb(store);
  821. });
  822. });
  823. describe("Legacy getDocument", () => {
  824. test("getDocument returns document with body", async () => {
  825. const store = await createTestStore();
  826. const collectionId = createTestCollection(store.db);
  827. insertTestDocument(store.db, collectionId, {
  828. name: "mydoc",
  829. filepath: "/path/mydoc.md",
  830. body: "Document body",
  831. });
  832. const result = store.getDocument("/path/mydoc.md");
  833. expect("error" in result).toBe(false);
  834. if (!("error" in result)) {
  835. expect(result.body).toBe("Document body");
  836. }
  837. await cleanupTestDb(store);
  838. });
  839. test("getDocument supports line range from :line suffix", async () => {
  840. const store = await createTestStore();
  841. const collectionId = createTestCollection(store.db);
  842. insertTestDocument(store.db, collectionId, {
  843. name: "mydoc",
  844. filepath: "/path/mydoc.md",
  845. displayPath: "mydoc.md",
  846. body: "Line 1\nLine 2\nLine 3\nLine 4",
  847. });
  848. const result = store.getDocument("mydoc.md:2", undefined, 2);
  849. expect("error" in result).toBe(false);
  850. if (!("error" in result)) {
  851. expect(result.body).toBe("Line 2\nLine 3");
  852. }
  853. await cleanupTestDb(store);
  854. });
  855. });
  856. });
  857. // =============================================================================
  858. // Snippet Extraction Tests
  859. // =============================================================================
  860. describe("Snippet Extraction", () => {
  861. test("extractSnippet finds query terms", () => {
  862. const body = "First line.\nSecond line with keyword.\nThird line.\nFourth line.";
  863. const { line, snippet } = extractSnippet(body, "keyword", 500);
  864. expect(line).toBe(2); // Line 2 contains "keyword"
  865. expect(snippet).toContain("keyword");
  866. });
  867. test("extractSnippet includes context lines", () => {
  868. const body = "Line 1\nLine 2\nLine 3 has keyword\nLine 4\nLine 5";
  869. const { snippet } = extractSnippet(body, "keyword", 500);
  870. expect(snippet).toContain("Line 2"); // Context before
  871. expect(snippet).toContain("Line 3 has keyword");
  872. expect(snippet).toContain("Line 4"); // Context after
  873. });
  874. test("extractSnippet respects maxLen for content", () => {
  875. const body = "A".repeat(1000);
  876. const result = extractSnippet(body, "query", 100);
  877. // Snippet includes header + content, content should be truncated
  878. expect(result.snippet).toContain("@@"); // Has diff header
  879. expect(result.snippet).toContain("..."); // Content was truncated
  880. });
  881. test("extractSnippet uses chunkPos hint", () => {
  882. const body = "First section...\n".repeat(50) + "Target keyword here\n" + "More content...".repeat(50);
  883. const chunkPos = body.indexOf("Target keyword");
  884. const { snippet } = extractSnippet(body, "Target", 200, chunkPos);
  885. expect(snippet).toContain("Target keyword");
  886. });
  887. test("extractSnippet returns beginning when no match", () => {
  888. const body = "First line\nSecond line\nThird line";
  889. const { line, snippet } = extractSnippet(body, "nonexistent", 500);
  890. expect(line).toBe(1);
  891. expect(snippet).toContain("First line");
  892. });
  893. test("extractSnippet includes diff-style header", () => {
  894. const body = "Line 1\nLine 2\nLine 3 has keyword\nLine 4\nLine 5";
  895. const { snippet, linesBefore, linesAfter, snippetLines } = extractSnippet(body, "keyword", 500);
  896. // Header should show line position and context info
  897. expect(snippet).toMatch(/^@@ -\d+,\d+ @@ \(\d+ before, \d+ after\)/);
  898. expect(linesBefore).toBe(1); // Line 1 comes before
  899. expect(linesAfter).toBe(0); // Snippet includes to end (lines 2-5)
  900. expect(snippetLines).toBe(4); // Lines 2, 3, 4, 5
  901. });
  902. test("extractSnippet calculates linesBefore and linesAfter correctly", () => {
  903. const body = "L1\nL2\nL3\nL4 match\nL5\nL6\nL7\nL8\nL9\nL10";
  904. const { linesBefore, linesAfter, snippetLines, line } = extractSnippet(body, "match", 500);
  905. expect(line).toBe(4); // "L4 match" is line 4
  906. expect(linesBefore).toBe(2); // L1, L2 before snippet (snippet starts at L3)
  907. expect(snippetLines).toBe(4); // L3, L4, L5, L6
  908. expect(linesAfter).toBe(4); // L7, L8, L9, L10 after snippet
  909. });
  910. test("extractSnippet header format matches diff style", () => {
  911. const body = "A\nB\nC keyword\nD\nE\nF\nG\nH";
  912. const { snippet } = extractSnippet(body, "keyword", 500);
  913. // Should start with @@ -line,count @@ (N before, M after)
  914. const headerMatch = snippet.match(/^@@ -(\d+),(\d+) @@ \((\d+) before, (\d+) after\)/);
  915. expect(headerMatch).not.toBeNull();
  916. const [, startLine, count, before, after] = headerMatch!;
  917. expect(parseInt(startLine)).toBe(2); // Snippet starts at line 2 (B)
  918. expect(parseInt(count)).toBe(4); // 4 lines: B, C keyword, D, E
  919. expect(parseInt(before)).toBe(1); // A is before
  920. expect(parseInt(after)).toBe(3); // F, G, H are after
  921. });
  922. test("extractSnippet at document start shows 0 before", () => {
  923. const body = "First line keyword\nSecond\nThird\nFourth\nFifth";
  924. const { linesBefore, linesAfter, snippetLines, line } = extractSnippet(body, "keyword", 500);
  925. expect(line).toBe(1); // Keyword on first line
  926. expect(linesBefore).toBe(0); // Nothing before
  927. expect(snippetLines).toBe(3); // First, Second, Third (bestLine-1 to bestLine+3, clamped)
  928. expect(linesAfter).toBe(2); // Fourth, Fifth
  929. });
  930. test("extractSnippet at document end shows 0 after", () => {
  931. const body = "First\nSecond\nThird\nFourth\nFifth keyword";
  932. const { linesBefore, linesAfter, snippetLines, line } = extractSnippet(body, "keyword", 500);
  933. expect(line).toBe(5); // Keyword on last line
  934. expect(linesBefore).toBe(3); // First, Second, Third before snippet
  935. expect(snippetLines).toBe(2); // Fourth, Fifth keyword (bestLine-1 to bestLine+3, clamped)
  936. expect(linesAfter).toBe(0); // Nothing after
  937. });
  938. test("extractSnippet with single line document", () => {
  939. const body = "Single line with keyword";
  940. const { linesBefore, linesAfter, snippetLines, snippet } = extractSnippet(body, "keyword", 500);
  941. expect(linesBefore).toBe(0);
  942. expect(linesAfter).toBe(0);
  943. expect(snippetLines).toBe(1);
  944. expect(snippet).toContain("@@ -1,1 @@ (0 before, 0 after)");
  945. expect(snippet).toContain("Single line with keyword");
  946. });
  947. test("extractSnippet with chunkPos adjusts line numbers correctly", () => {
  948. // 50 lines of padding, then keyword, then more content
  949. const padding = "Padding line\n".repeat(50);
  950. const body = padding + "Target keyword here\nMore content\nEven more";
  951. const chunkPos = padding.length; // Position of "Target keyword"
  952. const { line, linesBefore, linesAfter } = extractSnippet(body, "keyword", 200, chunkPos);
  953. expect(line).toBe(51); // "Target keyword" is line 51
  954. expect(linesBefore).toBeGreaterThan(40); // Many lines before
  955. });
  956. });
  957. // =============================================================================
  958. // Reciprocal Rank Fusion Tests
  959. // =============================================================================
  960. describe("Reciprocal Rank Fusion", () => {
  961. const makeResult = (file: string, score: number): RankedResult => ({
  962. file,
  963. displayPath: file,
  964. title: file,
  965. body: "body",
  966. score,
  967. });
  968. test("RRF combines single list correctly", () => {
  969. const list1 = [
  970. makeResult("doc1", 0.9),
  971. makeResult("doc2", 0.8),
  972. makeResult("doc3", 0.7),
  973. ];
  974. const fused = reciprocalRankFusion([list1]);
  975. // Order should be preserved
  976. expect(fused[0].file).toBe("doc1");
  977. expect(fused[1].file).toBe("doc2");
  978. expect(fused[2].file).toBe("doc3");
  979. });
  980. test("RRF merges documents from multiple lists", () => {
  981. const list1 = [makeResult("doc1", 0.9), makeResult("doc2", 0.8)];
  982. const list2 = [makeResult("doc2", 0.95), makeResult("doc3", 0.85)];
  983. const fused = reciprocalRankFusion([list1, list2]);
  984. // doc2 appears in both lists, should have higher combined score
  985. expect(fused.find(r => r.file === "doc2")).toBeDefined();
  986. expect(fused.find(r => r.file === "doc1")).toBeDefined();
  987. expect(fused.find(r => r.file === "doc3")).toBeDefined();
  988. });
  989. test("RRF respects weights", () => {
  990. const list1 = [makeResult("doc1", 0.9)];
  991. const list2 = [makeResult("doc2", 0.9)];
  992. // Give double weight to list1
  993. const fused = reciprocalRankFusion([list1, list2], [2.0, 1.0]);
  994. // doc1 should rank higher due to weight
  995. expect(fused[0].file).toBe("doc1");
  996. });
  997. test("RRF adds top-rank bonus", () => {
  998. // doc1 is #1 in list1, doc2 is #2 in list1
  999. const list1 = [makeResult("doc1", 0.9), makeResult("doc2", 0.8)];
  1000. const list2 = [makeResult("doc3", 0.85)];
  1001. const fused = reciprocalRankFusion([list1, list2]);
  1002. // doc1 should get +0.05 bonus for being #1
  1003. // doc2 should get +0.02 bonus for being #2-3
  1004. const doc1 = fused.find(r => r.file === "doc1");
  1005. const doc2 = fused.find(r => r.file === "doc2");
  1006. expect(doc1!.score).toBeGreaterThan(doc2!.score);
  1007. });
  1008. test("RRF handles empty lists", () => {
  1009. const fused = reciprocalRankFusion([[], []]);
  1010. expect(fused).toHaveLength(0);
  1011. });
  1012. test("RRF uses k parameter correctly", () => {
  1013. const list = [makeResult("doc1", 0.9)];
  1014. // With different k values, scores should differ
  1015. const fused60 = reciprocalRankFusion([list], [], 60);
  1016. const fused30 = reciprocalRankFusion([list], [], 30);
  1017. // Lower k = higher scores for top ranks
  1018. expect(fused30[0].score).toBeGreaterThan(fused60[0].score);
  1019. });
  1020. });
  1021. // =============================================================================
  1022. // Index Status Tests
  1023. // =============================================================================
  1024. describe("Index Status", () => {
  1025. test("getStatus returns correct structure", async () => {
  1026. const store = await createTestStore();
  1027. const status = store.getStatus();
  1028. expect(status).toHaveProperty("totalDocuments");
  1029. expect(status).toHaveProperty("needsEmbedding");
  1030. expect(status).toHaveProperty("hasVectorIndex");
  1031. expect(status).toHaveProperty("collections");
  1032. expect(Array.isArray(status.collections)).toBe(true);
  1033. await cleanupTestDb(store);
  1034. });
  1035. test("getStatus counts documents correctly", async () => {
  1036. const store = await createTestStore();
  1037. const collectionId = createTestCollection(store.db);
  1038. insertTestDocument(store.db, collectionId, { name: "doc1", active: 1 });
  1039. insertTestDocument(store.db, collectionId, { name: "doc2", active: 1 });
  1040. insertTestDocument(store.db, collectionId, { name: "doc3", active: 0 }); // inactive
  1041. const status = store.getStatus();
  1042. expect(status.totalDocuments).toBe(2); // Only active docs
  1043. await cleanupTestDb(store);
  1044. });
  1045. test("getStatus reports collection info", async () => {
  1046. const store = await createTestStore();
  1047. const collectionId = createTestCollection(store.db, "/test/path", "**/*.md");
  1048. insertTestDocument(store.db, collectionId, { name: "doc1" });
  1049. const status = store.getStatus();
  1050. expect(status.collections).toHaveLength(1);
  1051. expect(status.collections[0].path).toBe("/test/path");
  1052. expect(status.collections[0].pattern).toBe("**/*.md");
  1053. expect(status.collections[0].documents).toBe(1);
  1054. await cleanupTestDb(store);
  1055. });
  1056. test("getHashesNeedingEmbedding counts correctly", async () => {
  1057. const store = await createTestStore();
  1058. const collectionId = createTestCollection(store.db);
  1059. // Add documents with different hashes
  1060. insertTestDocument(store.db, collectionId, { name: "doc1", hash: "hash1" });
  1061. insertTestDocument(store.db, collectionId, { name: "doc2", hash: "hash2" });
  1062. insertTestDocument(store.db, collectionId, { name: "doc3", hash: "hash1" }); // same hash as doc1
  1063. const needsEmbedding = store.getHashesNeedingEmbedding();
  1064. expect(needsEmbedding).toBe(2); // hash1 and hash2
  1065. await cleanupTestDb(store);
  1066. });
  1067. test("getIndexHealth returns health info", async () => {
  1068. const store = await createTestStore();
  1069. const collectionId = createTestCollection(store.db);
  1070. insertTestDocument(store.db, collectionId, { name: "doc1" });
  1071. const health = store.getIndexHealth();
  1072. expect(health).toHaveProperty("needsEmbedding");
  1073. expect(health).toHaveProperty("totalDocs");
  1074. expect(health).toHaveProperty("daysStale");
  1075. expect(health.totalDocs).toBe(1);
  1076. await cleanupTestDb(store);
  1077. });
  1078. });
  1079. // =============================================================================
  1080. // Fuzzy Matching Tests
  1081. // =============================================================================
  1082. describe("Fuzzy Matching", () => {
  1083. test("findSimilarFiles finds similar paths", async () => {
  1084. const store = await createTestStore();
  1085. const collectionId = createTestCollection(store.db);
  1086. insertTestDocument(store.db, collectionId, {
  1087. name: "readme",
  1088. displayPath: "docs/readme.md",
  1089. });
  1090. insertTestDocument(store.db, collectionId, {
  1091. name: "readmi",
  1092. displayPath: "docs/readmi.md", // typo
  1093. });
  1094. const similar = store.findSimilarFiles("docs/readme.md", 3, 5);
  1095. expect(similar).toContain("docs/readme.md");
  1096. await cleanupTestDb(store);
  1097. });
  1098. test("findSimilarFiles respects maxDistance", async () => {
  1099. const store = await createTestStore();
  1100. const collectionId = createTestCollection(store.db);
  1101. insertTestDocument(store.db, collectionId, {
  1102. name: "abc",
  1103. displayPath: "abc.md",
  1104. });
  1105. insertTestDocument(store.db, collectionId, {
  1106. name: "xyz",
  1107. displayPath: "xyz.md", // very different
  1108. });
  1109. const similar = store.findSimilarFiles("abc.md", 1, 5); // max distance 1
  1110. expect(similar).toContain("abc.md");
  1111. expect(similar).not.toContain("xyz.md");
  1112. await cleanupTestDb(store);
  1113. });
  1114. test("matchFilesByGlob matches patterns", async () => {
  1115. const store = await createTestStore();
  1116. const collectionId = createTestCollection(store.db);
  1117. insertTestDocument(store.db, collectionId, {
  1118. filepath: "/p/journals/2024-01.md",
  1119. displayPath: "journals/2024-01.md",
  1120. });
  1121. insertTestDocument(store.db, collectionId, {
  1122. filepath: "/p/journals/2024-02.md",
  1123. displayPath: "journals/2024-02.md",
  1124. });
  1125. insertTestDocument(store.db, collectionId, {
  1126. filepath: "/p/docs/readme.md",
  1127. displayPath: "docs/readme.md",
  1128. });
  1129. const matches = store.matchFilesByGlob("journals/*.md");
  1130. expect(matches).toHaveLength(2);
  1131. expect(matches.every(m => m.displayPath.startsWith("journals/"))).toBe(true);
  1132. await cleanupTestDb(store);
  1133. });
  1134. });
  1135. // =============================================================================
  1136. // Vector Table Tests
  1137. // =============================================================================
  1138. describe("Vector Table", () => {
  1139. test("ensureVecTable creates vector table", async () => {
  1140. const store = await createTestStore();
  1141. // Initially no vector table
  1142. let exists = store.db.prepare(`
  1143. SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'
  1144. `).get();
  1145. expect(exists).toBeFalsy(); // null or undefined
  1146. // Create vector table
  1147. store.ensureVecTable(768);
  1148. exists = store.db.prepare(`
  1149. SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'
  1150. `).get();
  1151. expect(exists).toBeTruthy();
  1152. await cleanupTestDb(store);
  1153. });
  1154. test("ensureVecTable recreates table if dimensions change", async () => {
  1155. const store = await createTestStore();
  1156. // Create with 768 dimensions
  1157. store.ensureVecTable(768);
  1158. // Check dimensions
  1159. let tableInfo = store.db.prepare(`
  1160. SELECT sql FROM sqlite_master WHERE type='table' AND name='vectors_vec'
  1161. `).get() as { sql: string };
  1162. expect(tableInfo.sql).toContain("float[768]");
  1163. // Recreate with different dimensions
  1164. store.ensureVecTable(1024);
  1165. tableInfo = store.db.prepare(`
  1166. SELECT sql FROM sqlite_master WHERE type='table' AND name='vectors_vec'
  1167. `).get() as { sql: string };
  1168. expect(tableInfo.sql).toContain("float[1024]");
  1169. await cleanupTestDb(store);
  1170. });
  1171. });
  1172. // =============================================================================
  1173. // Integration Tests
  1174. // =============================================================================
  1175. describe("Integration", () => {
  1176. test("full document lifecycle: create, search, retrieve", async () => {
  1177. const store = await createTestStore();
  1178. const collectionId = createTestCollection(store.db, "/test/notes", "**/*.md");
  1179. // Add context
  1180. addPathContext(store.db, "/test/notes", "Personal notes");
  1181. // Insert documents
  1182. insertTestDocument(store.db, collectionId, {
  1183. name: "meeting",
  1184. title: "Team Meeting Notes",
  1185. filepath: "/test/notes/meeting.md",
  1186. displayPath: "notes/meeting.md",
  1187. body: "# Team Meeting Notes\n\nDiscussed project timeline and deliverables.",
  1188. });
  1189. insertTestDocument(store.db, collectionId, {
  1190. name: "ideas",
  1191. title: "Project Ideas",
  1192. filepath: "/test/notes/ideas.md",
  1193. displayPath: "notes/ideas.md",
  1194. body: "# Project Ideas\n\nBrainstorming new features for the product.",
  1195. });
  1196. // Search
  1197. const searchResults = store.searchFTS("project", 10);
  1198. expect(searchResults.length).toBe(2);
  1199. // Status
  1200. const status = store.getStatus();
  1201. expect(status.totalDocuments).toBe(2);
  1202. expect(status.collections).toHaveLength(1);
  1203. // Retrieve single document
  1204. const doc = store.findDocument("notes/meeting.md", { includeBody: true });
  1205. expect("error" in doc).toBe(false);
  1206. if (!("error" in doc)) {
  1207. expect(doc.title).toBe("Team Meeting Notes");
  1208. expect(doc.context).toBe("Personal notes");
  1209. expect(doc.body).toContain("Team Meeting");
  1210. }
  1211. // Multi-get
  1212. const { docs, errors } = store.findDocuments("notes/*.md", { includeBody: true });
  1213. expect(errors).toHaveLength(0);
  1214. expect(docs).toHaveLength(2);
  1215. await cleanupTestDb(store);
  1216. });
  1217. test("multiple stores can operate independently", async () => {
  1218. const store1 = await createTestStore();
  1219. const store2 = await createTestStore();
  1220. const col1 = createTestCollection(store1.db, "/store1", "**/*.md");
  1221. const col2 = createTestCollection(store2.db, "/store2", "**/*.md");
  1222. insertTestDocument(store1.db, col1, {
  1223. name: "doc1",
  1224. body: "unique content for store1",
  1225. displayPath: "store1/doc.md",
  1226. });
  1227. insertTestDocument(store2.db, col2, {
  1228. name: "doc2",
  1229. body: "different content for store2",
  1230. displayPath: "store2/doc.md",
  1231. });
  1232. // Each store should only see its own documents
  1233. const results1 = store1.searchFTS("unique", 10);
  1234. const results2 = store2.searchFTS("different", 10);
  1235. expect(results1).toHaveLength(1);
  1236. expect(results1[0].displayPath).toBe("store1/doc.md");
  1237. expect(results2).toHaveLength(1);
  1238. expect(results2[0].displayPath).toBe("store2/doc.md");
  1239. // Cross-check: store1 shouldn't find store2's content
  1240. const cross1 = store1.searchFTS("different", 10);
  1241. const cross2 = store2.searchFTS("unique", 10);
  1242. expect(cross1).toHaveLength(0);
  1243. expect(cross2).toHaveLength(0);
  1244. await cleanupTestDb(store1);
  1245. await cleanupTestDb(store2);
  1246. });
  1247. });
  1248. // =============================================================================
  1249. // Legacy Compatibility Tests
  1250. // =============================================================================
  1251. describe("Legacy Compatibility", () => {
  1252. test("getMultipleDocuments returns files with body", async () => {
  1253. const store = await createTestStore();
  1254. const collectionId = createTestCollection(store.db);
  1255. insertTestDocument(store.db, collectionId, {
  1256. name: "doc1",
  1257. filepath: "/path/doc1.md",
  1258. displayPath: "doc1.md",
  1259. body: "Content 1",
  1260. });
  1261. insertTestDocument(store.db, collectionId, {
  1262. name: "doc2",
  1263. filepath: "/path/doc2.md",
  1264. displayPath: "doc2.md",
  1265. body: "Content 2",
  1266. });
  1267. const { files, errors } = store.getMultipleDocuments("*.md");
  1268. expect(errors).toHaveLength(0);
  1269. expect(files).toHaveLength(2);
  1270. expect(files[0].body).toBeTruthy();
  1271. expect(files[1].body).toBeTruthy();
  1272. await cleanupTestDb(store);
  1273. });
  1274. test("getMultipleDocuments truncates with maxLines", async () => {
  1275. const store = await createTestStore();
  1276. const collectionId = createTestCollection(store.db);
  1277. insertTestDocument(store.db, collectionId, {
  1278. name: "doc1",
  1279. filepath: "/path/doc1.md",
  1280. displayPath: "doc1.md",
  1281. body: "Line 1\nLine 2\nLine 3\nLine 4\nLine 5",
  1282. });
  1283. const { files } = store.getMultipleDocuments("doc1.md", 2);
  1284. expect(files).toHaveLength(1);
  1285. expect(files[0].skipped).toBe(false);
  1286. if (!files[0].skipped) {
  1287. expect(files[0].body).toBe("Line 1\nLine 2\n\n[... truncated 3 more lines]");
  1288. }
  1289. await cleanupTestDb(store);
  1290. });
  1291. test("getMultipleDocuments skips large files", async () => {
  1292. const store = await createTestStore();
  1293. const collectionId = createTestCollection(store.db);
  1294. insertTestDocument(store.db, collectionId, {
  1295. name: "large",
  1296. filepath: "/path/large.md",
  1297. displayPath: "large.md",
  1298. body: "x".repeat(15000),
  1299. });
  1300. const { files } = store.getMultipleDocuments("large.md", undefined, 10000);
  1301. expect(files).toHaveLength(1);
  1302. expect(files[0].skipped).toBe(true);
  1303. await cleanupTestDb(store);
  1304. });
  1305. });
  1306. // =============================================================================
  1307. // Ollama Integration Tests (using mocked Ollama)
  1308. // =============================================================================
  1309. describe("Ollama Integration (Mocked)", () => {
  1310. test("searchVec returns empty when no vector index", async () => {
  1311. const store = await createTestStore();
  1312. const collectionId = createTestCollection(store.db);
  1313. insertTestDocument(store.db, collectionId, {
  1314. name: "doc1",
  1315. body: "Some content",
  1316. });
  1317. // No vectors_vec table exists, should return empty
  1318. const results = await store.searchVec("query", "embeddinggemma", 10);
  1319. expect(results).toHaveLength(0);
  1320. await cleanupTestDb(store);
  1321. });
  1322. test("searchVec returns results when vector index exists", async () => {
  1323. const store = await createTestStore();
  1324. const collectionId = createTestCollection(store.db);
  1325. const hash = "testhash123";
  1326. insertTestDocument(store.db, collectionId, {
  1327. name: "doc1",
  1328. hash,
  1329. body: "Some content about testing",
  1330. filepath: "/test/doc1.md",
  1331. displayPath: "doc1.md",
  1332. });
  1333. // Create vector table and insert a vector
  1334. store.ensureVecTable(768);
  1335. const embedding = Array(768).fill(0).map(() => Math.random());
  1336. store.db.prepare(`INSERT INTO content_vectors (hash, seq, pos, model, embedded_at) VALUES (?, 0, 0, 'test', ?)`).run(hash, new Date().toISOString());
  1337. store.db.prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`).run(`${hash}_0`, new Float32Array(embedding));
  1338. const results = await store.searchVec("test query", "embeddinggemma", 10);
  1339. expect(results).toHaveLength(1);
  1340. expect(results[0].displayPath).toBe("doc1.md");
  1341. expect(results[0].source).toBe("vec");
  1342. await cleanupTestDb(store);
  1343. });
  1344. test("expandQuery returns original plus expanded queries", async () => {
  1345. const store = await createTestStore();
  1346. const queries = await store.expandQuery("test query");
  1347. expect(queries).toContain("test query");
  1348. expect(queries[0]).toBe("test query");
  1349. // Mock returns 2 variations
  1350. expect(queries.length).toBeGreaterThanOrEqual(1);
  1351. await cleanupTestDb(store);
  1352. });
  1353. test("expandQuery caches results", async () => {
  1354. const store = await createTestStore();
  1355. // First call
  1356. const queries1 = await store.expandQuery("cached query test");
  1357. // Second call - should hit cache
  1358. const queries2 = await store.expandQuery("cached query test");
  1359. expect(queries1[0]).toBe(queries2[0]);
  1360. await cleanupTestDb(store);
  1361. });
  1362. test("rerank scores documents", async () => {
  1363. const store = await createTestStore();
  1364. const docs = [
  1365. { file: "doc1.md", text: "Relevant content about the topic" },
  1366. { file: "doc2.md", text: "Other content" },
  1367. ];
  1368. const results = await store.rerank("topic", docs);
  1369. expect(results).toHaveLength(2);
  1370. // Mock returns "yes" with high confidence
  1371. expect(results[0].score).toBeGreaterThan(0);
  1372. await cleanupTestDb(store);
  1373. });
  1374. test("rerank caches results", async () => {
  1375. const store = await createTestStore();
  1376. const docs = [{ file: "doc1.md", text: "Content for caching test" }];
  1377. // First call
  1378. await store.rerank("cache test query", docs);
  1379. // Second call - should hit cache
  1380. const results = await store.rerank("cache test query", docs);
  1381. expect(results).toHaveLength(1);
  1382. await cleanupTestDb(store);
  1383. });
  1384. });
  1385. // =============================================================================
  1386. // Edge Cases & Error Handling
  1387. // =============================================================================
  1388. describe("Edge Cases", () => {
  1389. test("handles empty database gracefully", async () => {
  1390. const store = await createTestStore();
  1391. const searchResults = store.searchFTS("anything", 10);
  1392. expect(searchResults).toHaveLength(0);
  1393. const status = store.getStatus();
  1394. expect(status.totalDocuments).toBe(0);
  1395. expect(status.collections).toHaveLength(0);
  1396. const doc = store.findDocument("nonexistent.md");
  1397. expect("error" in doc).toBe(true);
  1398. await cleanupTestDb(store);
  1399. });
  1400. test("handles very long document bodies", async () => {
  1401. const store = await createTestStore();
  1402. const collectionId = createTestCollection(store.db);
  1403. const longBody = "word ".repeat(100000); // ~600KB
  1404. insertTestDocument(store.db, collectionId, {
  1405. name: "long",
  1406. body: longBody,
  1407. displayPath: "long.md",
  1408. });
  1409. const results = store.searchFTS("word", 10);
  1410. expect(results).toHaveLength(1);
  1411. await cleanupTestDb(store);
  1412. });
  1413. test("handles unicode content correctly", async () => {
  1414. const store = await createTestStore();
  1415. const collectionId = createTestCollection(store.db);
  1416. insertTestDocument(store.db, collectionId, {
  1417. name: "unicode",
  1418. title: "日本語タイトル",
  1419. body: "# 日本語\n\n内容は日本語で書かれています。\n\nEmoji: 🎉🚀✨",
  1420. displayPath: "unicode.md",
  1421. });
  1422. // Should be searchable
  1423. const results = store.searchFTS("日本語", 10);
  1424. expect(results.length).toBeGreaterThan(0);
  1425. // Should retrieve correctly
  1426. const doc = store.findDocument("unicode.md", { includeBody: true });
  1427. expect("error" in doc).toBe(false);
  1428. if (!("error" in doc)) {
  1429. expect(doc.title).toBe("日本語タイトル");
  1430. expect(doc.body).toContain("🎉");
  1431. }
  1432. await cleanupTestDb(store);
  1433. });
  1434. test("handles documents with special characters in paths", async () => {
  1435. const store = await createTestStore();
  1436. const collectionId = createTestCollection(store.db);
  1437. insertTestDocument(store.db, collectionId, {
  1438. name: "special",
  1439. filepath: "/path/file with spaces.md",
  1440. displayPath: "file with spaces.md",
  1441. body: "Content",
  1442. });
  1443. const doc = store.findDocument("file with spaces.md");
  1444. expect("error" in doc).toBe(false);
  1445. await cleanupTestDb(store);
  1446. });
  1447. test("handles concurrent operations", async () => {
  1448. const store = await createTestStore();
  1449. const collectionId = createTestCollection(store.db);
  1450. // Insert multiple documents concurrently
  1451. const inserts = Array.from({ length: 10 }, (_, i) =>
  1452. Promise.resolve(insertTestDocument(store.db, collectionId, {
  1453. name: `concurrent${i}`,
  1454. body: `Content ${i} searchterm`,
  1455. displayPath: `concurrent${i}.md`,
  1456. }))
  1457. );
  1458. await Promise.all(inserts);
  1459. // All should be searchable
  1460. const results = store.searchFTS("searchterm", 20);
  1461. expect(results).toHaveLength(10);
  1462. await cleanupTestDb(store);
  1463. });
  1464. });