store.test.ts 88 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483
  1. /**
  2. * store.test.ts - Comprehensive unit tests for the QMD store module
  3. *
  4. * Run with: bun test store.test.ts
  5. *
  6. * LLM operations use LlamaCpp with local GGUF models (node-llama-cpp).
  7. */
  8. import { describe, test, expect, beforeAll, afterAll, beforeEach, afterEach, mock, spyOn } from "bun:test";
  9. import { Database } from "bun:sqlite";
  10. import { unlink, mkdtemp, rmdir, writeFile } from "node:fs/promises";
  11. import { tmpdir } from "node:os";
  12. import { join } from "node:path";
  13. import YAML from "yaml";
  14. import { disposeDefaultLlamaCpp } from "./llm.js";
  15. import {
  16. createStore,
  17. getDefaultDbPath,
  18. homedir,
  19. resolve,
  20. getPwd,
  21. getRealPath,
  22. hashContent,
  23. extractTitle,
  24. formatQueryForEmbedding,
  25. formatDocForEmbedding,
  26. chunkDocument,
  27. chunkDocumentByTokens,
  28. reciprocalRankFusion,
  29. extractSnippet,
  30. getCacheKey,
  31. handelize,
  32. normalizeVirtualPath,
  33. isVirtualPath,
  34. parseVirtualPath,
  35. normalizeDocid,
  36. isDocid,
  37. type Store,
  38. type DocumentResult,
  39. type SearchResult,
  40. type RankedResult,
  41. } from "./store.js";
  42. import type { CollectionConfig } from "./collections.js";
  43. // =============================================================================
  44. // LlamaCpp Setup
  45. // =============================================================================
  46. // Note: LlamaCpp uses node-llama-cpp for local GGUF model inference.
  47. // No HTTP mocking needed - tests use real LlamaCpp calls for integration tests.
  48. // =============================================================================
  49. // Test Utilities
  50. // =============================================================================
  51. let testDir: string;
  52. let testDbPath: string;
  53. let testConfigDir: string;
  54. async function createTestStore(): Promise<Store> {
  55. testDbPath = join(testDir, `test-${Date.now()}-${Math.random().toString(36).slice(2)}.sqlite`);
  56. // Set up test config directory
  57. const configPrefix = join(testDir, `config-${Date.now()}-${Math.random().toString(36).slice(2)}`);
  58. testConfigDir = await mkdtemp(configPrefix);
  59. // Set environment variable to use test config
  60. process.env.QMD_CONFIG_DIR = testConfigDir;
  61. // Create empty YAML config
  62. const emptyConfig: CollectionConfig = { collections: {} };
  63. await writeFile(
  64. join(testConfigDir, "index.yml"),
  65. YAML.stringify(emptyConfig)
  66. );
  67. return createStore(testDbPath);
  68. }
  69. async function cleanupTestDb(store: Store): Promise<void> {
  70. store.close();
  71. try {
  72. await unlink(store.dbPath);
  73. } catch {
  74. // Ignore if file doesn't exist
  75. }
  76. // Clean up test config directory
  77. try {
  78. const { readdir, unlink: unlinkFile, rmdir: rmdirAsync } = await import("node:fs/promises");
  79. const files = await readdir(testConfigDir);
  80. for (const file of files) {
  81. await unlinkFile(join(testConfigDir, file));
  82. }
  83. await rmdirAsync(testConfigDir);
  84. } catch {
  85. // Ignore cleanup errors
  86. }
  87. // Clear environment variable
  88. delete process.env.QMD_CONFIG_DIR;
  89. }
  90. // Helper to insert a test document directly into the database
  91. async function insertTestDocument(
  92. db: Database,
  93. collectionName: string,
  94. opts: {
  95. name?: string;
  96. title?: string;
  97. hash?: string;
  98. displayPath?: string;
  99. filepath?: string;
  100. body?: string;
  101. active?: number;
  102. }
  103. ): Promise<number> {
  104. const now = new Date().toISOString();
  105. const name = opts.name || "test-doc";
  106. const title = opts.title || "Test Document";
  107. // Use displayPath if provided, otherwise filepath's basename, otherwise default
  108. let path: string;
  109. if (opts.displayPath) {
  110. path = opts.displayPath;
  111. } else if (opts.filepath) {
  112. // Extract relative path from filepath by removing collection path
  113. // For tests, assume filepath is either relative or we want the whole path as the document path
  114. path = opts.filepath.startsWith('/') ? opts.filepath : opts.filepath;
  115. } else {
  116. path = `test/${name}.md`;
  117. }
  118. const body = opts.body || "# Test Document\n\nThis is test content.";
  119. const active = opts.active ?? 1;
  120. // Generate hash from body if not provided
  121. const hash = opts.hash || await hashContent(body);
  122. // Insert content (with OR IGNORE for deduplication)
  123. db.prepare(`
  124. INSERT OR IGNORE INTO content (hash, doc, created_at)
  125. VALUES (?, ?, ?)
  126. `).run(hash, body, now);
  127. // Insert document
  128. const result = db.prepare(`
  129. INSERT INTO documents (collection, path, title, hash, created_at, modified_at, active)
  130. VALUES (?, ?, ?, ?, ?, ?, ?)
  131. `).run(collectionName, path, title, hash, now, now, active);
  132. return Number(result.lastInsertRowid);
  133. }
  134. // Helper to create a test collection in YAML config
  135. async function createTestCollection(
  136. options: { pwd?: string; glob?: string; name?: string } = {}
  137. ): Promise<string> {
  138. const pwd = options.pwd || "/test/collection";
  139. const glob = options.glob || "**/*.md";
  140. const name = options.name || pwd.split('/').filter(Boolean).pop() || 'test';
  141. // Read current config
  142. const configPath = join(testConfigDir, "index.yml");
  143. const { readFile } = await import("node:fs/promises");
  144. const content = await readFile(configPath, "utf-8");
  145. const config = YAML.parse(content) as CollectionConfig;
  146. // Add collection
  147. config.collections[name] = {
  148. path: pwd,
  149. pattern: glob,
  150. };
  151. // Write back
  152. await writeFile(configPath, YAML.stringify(config));
  153. return name;
  154. }
  155. // Helper to add path context in YAML config
  156. async function addPathContext(collectionName: string, pathPrefix: string, contextText: string): Promise<void> {
  157. // Read current config
  158. const configPath = join(testConfigDir, "index.yml");
  159. const { readFile } = await import("node:fs/promises");
  160. const content = await readFile(configPath, "utf-8");
  161. const config = YAML.parse(content) as CollectionConfig;
  162. // Add context to collection
  163. if (!config.collections[collectionName]) {
  164. throw new Error(`Collection ${collectionName} not found`);
  165. }
  166. if (!config.collections[collectionName].context) {
  167. config.collections[collectionName].context = {};
  168. }
  169. config.collections[collectionName].context![pathPrefix] = contextText;
  170. // Write back
  171. await writeFile(configPath, YAML.stringify(config));
  172. }
  173. // Helper to add global context in YAML config
  174. async function addGlobalContext(contextText: string): Promise<void> {
  175. const configPath = join(testConfigDir, "index.yml");
  176. const { readFile } = await import("node:fs/promises");
  177. const content = await readFile(configPath, "utf-8");
  178. const config = YAML.parse(content) as CollectionConfig;
  179. config.global_context = contextText;
  180. await writeFile(configPath, YAML.stringify(config));
  181. }
  182. // =============================================================================
  183. // Test Setup
  184. // =============================================================================
  185. beforeAll(async () => {
  186. testDir = await mkdtemp(join(tmpdir(), "qmd-test-"));
  187. });
  188. afterAll(async () => {
  189. // Ensure native resources are released to avoid ggml-metal asserts on process exit.
  190. await disposeDefaultLlamaCpp();
  191. try {
  192. // Clean up test directory
  193. const { readdir, unlink } = await import("node:fs/promises");
  194. const files = await readdir(testDir);
  195. for (const file of files) {
  196. await unlink(join(testDir, file));
  197. }
  198. await rmdir(testDir);
  199. } catch {
  200. // Ignore cleanup errors
  201. }
  202. });
  203. // =============================================================================
  204. // Path Utilities Tests
  205. // =============================================================================
  206. describe("Path Utilities", () => {
  207. test("homedir returns HOME environment variable", () => {
  208. const result = homedir();
  209. expect(result).toBe(Bun.env.HOME || "/tmp");
  210. });
  211. test("resolve handles absolute paths", () => {
  212. expect(resolve("/foo/bar")).toBe("/foo/bar");
  213. expect(resolve("/foo", "/bar")).toBe("/bar");
  214. });
  215. test("resolve handles relative paths", () => {
  216. const pwd = Bun.env.PWD || process.cwd();
  217. expect(resolve("foo")).toBe(`${pwd}/foo`);
  218. expect(resolve("foo", "bar")).toBe(`${pwd}/foo/bar`);
  219. });
  220. test("resolve normalizes . and ..", () => {
  221. expect(resolve("/foo/bar/./baz")).toBe("/foo/bar/baz");
  222. expect(resolve("/foo/bar/../baz")).toBe("/foo/baz");
  223. expect(resolve("/foo/bar/../../baz")).toBe("/baz");
  224. });
  225. test("getDefaultDbPath throws in test mode without INDEX_PATH", () => {
  226. // In test mode, getDefaultDbPath should throw to prevent accidental writes to global index
  227. // This is intentional safety behavior
  228. const originalIndexPath = process.env.INDEX_PATH;
  229. delete process.env.INDEX_PATH;
  230. expect(() => getDefaultDbPath()).toThrow("Database path not set");
  231. // Restore
  232. if (originalIndexPath) process.env.INDEX_PATH = originalIndexPath;
  233. });
  234. test("getDefaultDbPath uses INDEX_PATH when set", () => {
  235. const originalIndexPath = process.env.INDEX_PATH;
  236. process.env.INDEX_PATH = "/tmp/test-index.sqlite";
  237. expect(getDefaultDbPath()).toBe("/tmp/test-index.sqlite");
  238. expect(getDefaultDbPath("custom")).toBe("/tmp/test-index.sqlite"); // INDEX_PATH overrides name
  239. // Restore
  240. if (originalIndexPath) {
  241. process.env.INDEX_PATH = originalIndexPath;
  242. } else {
  243. delete process.env.INDEX_PATH;
  244. }
  245. });
  246. test("getPwd returns current working directory", () => {
  247. const pwd = getPwd();
  248. expect(pwd).toBeTruthy();
  249. expect(typeof pwd).toBe("string");
  250. });
  251. test("getRealPath resolves symlinks", () => {
  252. const result = getRealPath("/tmp");
  253. expect(result).toBeTruthy();
  254. // On macOS, /tmp is a symlink to /private/tmp
  255. expect(result === "/tmp" || result === "/private/tmp").toBe(true);
  256. });
  257. });
  258. // =============================================================================
  259. // Handelize Tests - path normalization for token-friendly filenames
  260. // =============================================================================
  261. describe("handelize", () => {
  262. test("converts to lowercase", () => {
  263. expect(handelize("README.md")).toBe("readme.md");
  264. expect(handelize("MyFile.MD")).toBe("myfile.md");
  265. });
  266. test("preserves folder structure", () => {
  267. expect(handelize("a/b/c/d.md")).toBe("a/b/c/d.md");
  268. expect(handelize("docs/api/README.md")).toBe("docs/api/readme.md");
  269. });
  270. test("replaces non-word characters with dash", () => {
  271. expect(handelize("hello world.md")).toBe("hello-world.md");
  272. expect(handelize("file (1).md")).toBe("file-1.md");
  273. expect(handelize("foo@bar#baz.md")).toBe("foo-bar-baz.md");
  274. });
  275. test("collapses multiple special chars into single dash", () => {
  276. expect(handelize("hello world.md")).toBe("hello-world.md");
  277. expect(handelize("foo---bar.md")).toBe("foo-bar.md");
  278. expect(handelize("a - b.md")).toBe("a-b.md");
  279. });
  280. test("removes leading and trailing dashes from segments", () => {
  281. expect(handelize("-hello-.md")).toBe("hello.md");
  282. expect(handelize("--test--.md")).toBe("test.md");
  283. expect(handelize("a/-b-/c.md")).toBe("a/b/c.md");
  284. });
  285. test("converts triple underscore to folder separator", () => {
  286. expect(handelize("foo___bar.md")).toBe("foo/bar.md");
  287. expect(handelize("notes___2025___january.md")).toBe("notes/2025/january.md");
  288. expect(handelize("a/b___c/d.md")).toBe("a/b/c/d.md");
  289. });
  290. test("handles complex real-world meeting notes", () => {
  291. // Example: "Money Movement Licensing Review - 2025/11/19 10:25 EST - Notes by Gemini.md"
  292. const complexName = "Money Movement Licensing Review - 2025/11/19 10:25 EST - Notes by Gemini.md";
  293. const result = handelize(complexName);
  294. expect(result).toBe("money-movement-licensing-review-2025-11-19-10-25-est-notes-by-gemini.md");
  295. expect(result).not.toContain(" ");
  296. expect(result).not.toContain("/");
  297. expect(result).not.toContain(":");
  298. });
  299. test("handles unicode characters", () => {
  300. // Pure unicode filenames are now supported (fixes GitHub issue #10)
  301. expect(handelize("日本語.md")).toBe("日本語.md");
  302. expect(handelize("Зоны и проекты.md")).toBe("зоны-и-проекты.md");
  303. // Mixed unicode/ascii preserves both
  304. expect(handelize("café-notes.md")).toBe("café-notes.md");
  305. expect(handelize("naïve.md")).toBe("naïve.md");
  306. expect(handelize("日本語-notes.md")).toBe("日本語-notes.md");
  307. });
  308. test("handles dates and times in filenames", () => {
  309. expect(handelize("meeting-2025-01-15.md")).toBe("meeting-2025-01-15.md");
  310. expect(handelize("notes 2025/01/15.md")).toBe("notes-2025/01/15.md");
  311. expect(handelize("call_10:30_AM.md")).toBe("call-10-30-am.md");
  312. });
  313. test("handles special project naming patterns", () => {
  314. expect(handelize("PROJECT_ABC_v2.0.md")).toBe("project-abc-v2-0.md");
  315. expect(handelize("[WIP] Feature Request.md")).toBe("wip-feature-request.md");
  316. expect(handelize("(DRAFT) Proposal v1.md")).toBe("draft-proposal-v1.md");
  317. });
  318. test("filters out empty segments", () => {
  319. expect(handelize("a//b/c.md")).toBe("a/b/c.md");
  320. expect(handelize("/a/b/")).toBe("a/b");
  321. expect(handelize("///test///")).toBe("test");
  322. });
  323. test("throws error for invalid inputs", () => {
  324. expect(() => handelize("")).toThrow("path cannot be empty");
  325. expect(() => handelize(" ")).toThrow("path cannot be empty");
  326. expect(() => handelize(".md")).toThrow("no valid filename content");
  327. expect(() => handelize("...")).toThrow("no valid filename content");
  328. expect(() => handelize("___")).toThrow("no valid filename content");
  329. });
  330. test("handles minimal valid inputs", () => {
  331. expect(handelize("a")).toBe("a");
  332. expect(handelize("1")).toBe("1");
  333. expect(handelize("a.md")).toBe("a.md");
  334. });
  335. });
  336. // =============================================================================
  337. // Store Creation Tests
  338. // =============================================================================
  339. describe("Store Creation", () => {
  340. test("createStore throws without explicit path in test mode", () => {
  341. // In test mode, createStore without path should throw to prevent accidental writes
  342. const originalIndexPath = process.env.INDEX_PATH;
  343. delete process.env.INDEX_PATH;
  344. expect(() => createStore()).toThrow("Database path not set");
  345. // Restore
  346. if (originalIndexPath) process.env.INDEX_PATH = originalIndexPath;
  347. });
  348. test("createStore creates a new store with custom path", async () => {
  349. const store = await createTestStore();
  350. expect(store.dbPath).toBe(testDbPath);
  351. expect(store.db).toBeInstanceOf(Database);
  352. await cleanupTestDb(store);
  353. });
  354. test("createStore initializes database schema", async () => {
  355. const store = await createTestStore();
  356. // Check tables exist
  357. const tables = store.db.prepare(`
  358. SELECT name FROM sqlite_master WHERE type='table' ORDER BY name
  359. `).all() as { name: string }[];
  360. const tableNames = tables.map(t => t.name);
  361. expect(tableNames).toContain("documents");
  362. expect(tableNames).toContain("documents_fts");
  363. expect(tableNames).toContain("content_vectors");
  364. expect(tableNames).toContain("llm_cache");
  365. // Note: path_contexts table removed in favor of YAML-based context storage
  366. await cleanupTestDb(store);
  367. });
  368. test("createStore sets WAL journal mode", async () => {
  369. const store = await createTestStore();
  370. const result = store.db.prepare("PRAGMA journal_mode").get() as { journal_mode: string };
  371. expect(result.journal_mode).toBe("wal");
  372. await cleanupTestDb(store);
  373. });
  374. test("store.close closes the database connection", async () => {
  375. const store = await createTestStore();
  376. store.close();
  377. // Attempting to use db after close should throw
  378. expect(() => store.db.prepare("SELECT 1").get()).toThrow();
  379. try {
  380. await unlink(testDbPath);
  381. } catch {}
  382. });
  383. });
  384. // =============================================================================
  385. // Document Hashing & Title Extraction Tests
  386. // =============================================================================
  387. describe("Document Helpers", () => {
  388. test("hashContent produces consistent SHA256 hashes", async () => {
  389. const content = "Hello, World!";
  390. const hash1 = await hashContent(content);
  391. const hash2 = await hashContent(content);
  392. expect(hash1).toBe(hash2);
  393. expect(hash1).toMatch(/^[a-f0-9]{64}$/);
  394. });
  395. test("hashContent produces different hashes for different content", async () => {
  396. const hash1 = await hashContent("Hello");
  397. const hash2 = await hashContent("World");
  398. expect(hash1).not.toBe(hash2);
  399. });
  400. test("extractTitle extracts H1 heading", () => {
  401. const content = "# My Title\n\nSome content here.";
  402. expect(extractTitle(content, "file.md")).toBe("My Title");
  403. });
  404. test("extractTitle extracts H2 heading if no H1", () => {
  405. const content = "## My Subtitle\n\nSome content here.";
  406. expect(extractTitle(content, "file.md")).toBe("My Subtitle");
  407. });
  408. test("extractTitle falls back to filename", () => {
  409. const content = "Just some plain text without headings.";
  410. expect(extractTitle(content, "my-document.md")).toBe("my-document");
  411. });
  412. test("extractTitle skips generic 'Notes' heading", () => {
  413. const content = "# Notes\n\n## Actual Title\n\nContent";
  414. expect(extractTitle(content, "file.md")).toBe("Actual Title");
  415. });
  416. test("extractTitle handles 📝 Notes heading", () => {
  417. const content = "# 📝 Notes\n\n## Meeting Summary\n\nContent";
  418. expect(extractTitle(content, "file.md")).toBe("Meeting Summary");
  419. });
  420. });
  421. // =============================================================================
  422. // Embedding Format Tests
  423. // =============================================================================
  424. describe("Embedding Formatting", () => {
  425. test("formatQueryForEmbedding adds search task prefix", () => {
  426. const formatted = formatQueryForEmbedding("how to deploy");
  427. expect(formatted).toBe("task: search result | query: how to deploy");
  428. });
  429. test("formatDocForEmbedding adds title and text prefix", () => {
  430. const formatted = formatDocForEmbedding("Some content", "My Title");
  431. expect(formatted).toBe("title: My Title | text: Some content");
  432. });
  433. test("formatDocForEmbedding handles missing title", () => {
  434. const formatted = formatDocForEmbedding("Some content");
  435. expect(formatted).toBe("title: none | text: Some content");
  436. });
  437. });
  438. // =============================================================================
  439. // Document Chunking Tests
  440. // =============================================================================
  441. describe("Document Chunking", () => {
  442. test("chunkDocument returns single chunk for small documents", () => {
  443. const content = "Small document content";
  444. const chunks = chunkDocument(content, 1000, 0);
  445. expect(chunks).toHaveLength(1);
  446. expect(chunks[0]!.text).toBe(content);
  447. expect(chunks[0]!.pos).toBe(0);
  448. });
  449. test("chunkDocument splits large documents", () => {
  450. const content = "A".repeat(10000);
  451. const chunks = chunkDocument(content, 1000, 0);
  452. expect(chunks.length).toBeGreaterThan(1);
  453. // All chunks should have correct positions
  454. for (let i = 0; i < chunks.length; i++) {
  455. expect(chunks[i]!.pos).toBeGreaterThanOrEqual(0);
  456. if (i > 0) {
  457. expect(chunks[i]!.pos).toBeGreaterThan(chunks[i - 1]!.pos);
  458. }
  459. }
  460. });
  461. test("chunkDocument with overlap creates overlapping chunks", () => {
  462. const content = "A".repeat(3000);
  463. const chunks = chunkDocument(content, 1000, 150); // 15% overlap
  464. expect(chunks.length).toBeGreaterThan(1);
  465. // With overlap, positions should be closer together than without
  466. // Each new chunk starts 150 chars before where the previous one ended
  467. for (let i = 1; i < chunks.length; i++) {
  468. const prevEnd = chunks[i - 1]!.pos + chunks[i - 1]!.text.length;
  469. const currentStart = chunks[i]!.pos;
  470. // Current chunk should start before the previous chunk ended (overlap)
  471. expect(currentStart).toBeLessThan(prevEnd);
  472. // But should still make forward progress
  473. expect(currentStart).toBeGreaterThan(chunks[i - 1]!.pos);
  474. }
  475. });
  476. test("chunkDocument prefers paragraph breaks", () => {
  477. const content = "First paragraph.\n\nSecond paragraph.\n\nThird paragraph.".repeat(50);
  478. const chunks = chunkDocument(content, 500, 0);
  479. // Chunks should end at paragraph breaks when possible
  480. for (const chunk of chunks.slice(0, -1)) {
  481. // Most chunks should end near a paragraph break
  482. const endsNearParagraph = chunk.text.endsWith("\n\n") ||
  483. chunk.text.endsWith(".") ||
  484. chunk.text.endsWith("\n");
  485. // This is a soft check - not all chunks can end at breaks
  486. }
  487. expect(chunks.length).toBeGreaterThan(1);
  488. });
  489. test("chunkDocument handles UTF-8 characters correctly", () => {
  490. const content = "こんにちは世界".repeat(500); // Japanese text
  491. const chunks = chunkDocument(content, 1000, 0);
  492. // Should not split in the middle of a multi-byte character
  493. for (const chunk of chunks) {
  494. expect(() => new TextEncoder().encode(chunk.text)).not.toThrow();
  495. }
  496. });
  497. test("chunkDocument with default params uses 800-token chunks", () => {
  498. // Default is CHUNK_SIZE_CHARS (3200 chars) with CHUNK_OVERLAP_CHARS (480 chars)
  499. const content = "Word ".repeat(2000); // ~10000 chars
  500. const chunks = chunkDocument(content);
  501. expect(chunks.length).toBeGreaterThan(1);
  502. // Each chunk should be around 3200 chars (except last)
  503. expect(chunks[0]!.text.length).toBeGreaterThan(2500);
  504. expect(chunks[0]!.text.length).toBeLessThanOrEqual(3200);
  505. });
  506. });
  507. describe("Token-based Chunking", () => {
  508. test("chunkDocumentByTokens returns single chunk for small documents", async () => {
  509. const content = "This is a small document.";
  510. const chunks = await chunkDocumentByTokens(content, 800, 120);
  511. expect(chunks).toHaveLength(1);
  512. expect(chunks[0]!.text).toBe(content);
  513. expect(chunks[0]!.pos).toBe(0);
  514. expect(chunks[0]!.tokens).toBeGreaterThan(0);
  515. expect(chunks[0]!.tokens).toBeLessThan(800);
  516. });
  517. test("chunkDocumentByTokens splits large documents", async () => {
  518. // Create a document that's definitely more than 800 tokens
  519. const content = "The quick brown fox jumps over the lazy dog. ".repeat(200);
  520. const chunks = await chunkDocumentByTokens(content, 800, 120);
  521. expect(chunks.length).toBeGreaterThan(1);
  522. // Each chunk should have ~800 tokens or less
  523. for (const chunk of chunks) {
  524. expect(chunk.tokens).toBeLessThanOrEqual(850); // Allow slight overage
  525. expect(chunk.tokens).toBeGreaterThan(0);
  526. }
  527. // Chunks should have correct positions
  528. for (let i = 0; i < chunks.length; i++) {
  529. expect(chunks[i]!.pos).toBeGreaterThanOrEqual(0);
  530. if (i > 0) {
  531. expect(chunks[i]!.pos).toBeGreaterThan(chunks[i - 1]!.pos);
  532. }
  533. }
  534. });
  535. test("chunkDocumentByTokens creates overlapping chunks", async () => {
  536. const content = "Word ".repeat(500); // ~500 tokens
  537. const chunks = await chunkDocumentByTokens(content, 200, 30); // 15% overlap
  538. expect(chunks.length).toBeGreaterThan(1);
  539. // With overlap, consecutive chunks should have overlapping positions
  540. for (let i = 1; i < chunks.length; i++) {
  541. const prevEnd = chunks[i - 1]!.pos + chunks[i - 1]!.text.length;
  542. const currentStart = chunks[i]!.pos;
  543. // Current chunk should start before the previous chunk ended (overlap)
  544. expect(currentStart).toBeLessThan(prevEnd);
  545. }
  546. });
  547. test("chunkDocumentByTokens returns actual token counts", async () => {
  548. const content = "Hello world, this is a test.";
  549. const chunks = await chunkDocumentByTokens(content);
  550. expect(chunks).toHaveLength(1);
  551. // The token count should be reasonable (not 0, not equal to char count)
  552. expect(chunks[0]!.tokens).toBeGreaterThan(0);
  553. expect(chunks[0]!.tokens).toBeLessThan(content.length); // Tokens < chars for English
  554. });
  555. });
  556. // =============================================================================
  557. // Caching Tests
  558. // =============================================================================
  559. describe("Caching", () => {
  560. test("getCacheKey generates consistent keys", () => {
  561. const key1 = getCacheKey("http://example.com", { query: "test" });
  562. const key2 = getCacheKey("http://example.com", { query: "test" });
  563. expect(key1).toBe(key2);
  564. expect(key1).toMatch(/^[a-f0-9]{64}$/);
  565. });
  566. test("getCacheKey generates different keys for different inputs", () => {
  567. const key1 = getCacheKey("http://example.com", { query: "test1" });
  568. const key2 = getCacheKey("http://example.com", { query: "test2" });
  569. expect(key1).not.toBe(key2);
  570. });
  571. test("store cache operations work correctly", async () => {
  572. const store = await createTestStore();
  573. const key = "test-cache-key";
  574. const value = "cached result";
  575. // Initially empty
  576. expect(store.getCachedResult(key)).toBeNull();
  577. // Set cache
  578. store.setCachedResult(key, value);
  579. // Retrieve cache
  580. expect(store.getCachedResult(key)).toBe(value);
  581. // Clear cache
  582. store.clearCache();
  583. expect(store.getCachedResult(key)).toBeNull();
  584. await cleanupTestDb(store);
  585. });
  586. });
  587. // =============================================================================
  588. // Context Tests
  589. // =============================================================================
  590. describe("Path Context", () => {
  591. test("getContextForFile returns null when no context set", async () => {
  592. const store = await createTestStore();
  593. const context = store.getContextForFile("/some/random/path.md");
  594. expect(context).toBeNull();
  595. await cleanupTestDb(store);
  596. });
  597. test("getContextForFile returns matching context", async () => {
  598. const store = await createTestStore();
  599. const collectionName = await createTestCollection({ pwd: "/test/collection", glob: "**/*.md" });
  600. await addPathContext(collectionName, "/docs", "Documentation files");
  601. // Insert a document so getContextForFile can find it
  602. await insertTestDocument(store.db, collectionName, {
  603. name: "readme",
  604. displayPath: "docs/readme.md",
  605. });
  606. const context = store.getContextForFile("/test/collection/docs/readme.md");
  607. expect(context).toBe("Documentation files");
  608. await cleanupTestDb(store);
  609. });
  610. test("getContextForFile returns all matching contexts", async () => {
  611. const store = await createTestStore();
  612. const collectionName = await createTestCollection({ pwd: "/test/collection", glob: "**/*.md" });
  613. await addPathContext(collectionName, "/", "General test files");
  614. await addPathContext(collectionName, "/docs", "Documentation files");
  615. await addPathContext(collectionName, "/docs/api", "API documentation");
  616. // Insert documents so getContextForFile can find them
  617. await insertTestDocument(store.db, collectionName, {
  618. name: "readme",
  619. displayPath: "readme.md",
  620. });
  621. await insertTestDocument(store.db, collectionName, {
  622. name: "guide",
  623. displayPath: "docs/guide.md",
  624. });
  625. await insertTestDocument(store.db, collectionName, {
  626. name: "reference",
  627. displayPath: "docs/api/reference.md",
  628. });
  629. // Context now returns ALL matching contexts joined with \n\n
  630. expect(store.getContextForFile("/test/collection/readme.md")).toBe("General test files");
  631. expect(store.getContextForFile("/test/collection/docs/guide.md")).toBe("General test files\n\nDocumentation files");
  632. expect(store.getContextForFile("/test/collection/docs/api/reference.md")).toBe("General test files\n\nDocumentation files\n\nAPI documentation");
  633. await cleanupTestDb(store);
  634. });
  635. });
  636. // =============================================================================
  637. // Collection Tests
  638. // =============================================================================
  639. describe("Collections", () => {
  640. test("collections are managed via YAML config", async () => {
  641. const store = await createTestStore();
  642. const collectionName = await createTestCollection({ pwd: "/home/user/projects/myapp", glob: "**/*.md" });
  643. // Collections are now in YAML, not in the database
  644. expect(collectionName).toBe("myapp");
  645. await cleanupTestDb(store);
  646. });
  647. });
  648. // =============================================================================
  649. // FTS Search Tests
  650. // =============================================================================
  651. describe("FTS Search", () => {
  652. test("searchFTS returns empty array for no matches", async () => {
  653. const store = await createTestStore();
  654. const collectionName = await createTestCollection();
  655. await insertTestDocument(store.db, collectionName, {
  656. name: "doc1",
  657. body: "The quick brown fox jumps over the lazy dog",
  658. });
  659. const results = store.searchFTS("nonexistent-term-xyz", 10);
  660. expect(results).toHaveLength(0);
  661. await cleanupTestDb(store);
  662. });
  663. test("searchFTS finds documents by keyword", async () => {
  664. const store = await createTestStore();
  665. const collectionName = await createTestCollection();
  666. await insertTestDocument(store.db, collectionName, {
  667. name: "doc1",
  668. title: "Fox Document",
  669. body: "The quick brown fox jumps over the lazy dog",
  670. displayPath: "test/doc1.md",
  671. });
  672. const results = store.searchFTS("fox", 10);
  673. expect(results.length).toBeGreaterThan(0);
  674. expect(results[0]!.displayPath).toBe(`${collectionName}/test/doc1.md`);
  675. expect(results[0]!.filepath).toBe(`qmd://${collectionName}/test/doc1.md`);
  676. expect(results[0]!.source).toBe("fts");
  677. await cleanupTestDb(store);
  678. });
  679. test("searchFTS ranks title matches higher", async () => {
  680. const store = await createTestStore();
  681. const collectionName = await createTestCollection();
  682. // Document with "fox" in body only
  683. await insertTestDocument(store.db, collectionName, {
  684. name: "body-match",
  685. title: "Some Other Title",
  686. body: "The fox is here in the body",
  687. displayPath: "test/body.md",
  688. });
  689. // Document with "fox" in title (via name field which is indexed)
  690. await insertTestDocument(store.db, collectionName, {
  691. name: "fox",
  692. title: "Fox Title",
  693. body: "Different content without the animal fox",
  694. displayPath: "test/title.md",
  695. });
  696. const results = store.searchFTS("fox", 10);
  697. // Both documents contain "fox" in the body now, so we should get 2 results
  698. expect(results.length).toBe(2);
  699. // Title/name match should rank higher due to BM25 weights
  700. expect(results[0]!.displayPath).toBe(`${collectionName}/test/title.md`);
  701. await cleanupTestDb(store);
  702. });
  703. test("searchFTS respects limit parameter", async () => {
  704. const store = await createTestStore();
  705. const collectionName = await createTestCollection();
  706. // Insert 10 documents
  707. for (let i = 0; i < 10; i++) {
  708. await insertTestDocument(store.db, collectionName, {
  709. name: `doc${i}`,
  710. body: "common keyword appears here",
  711. displayPath: `test/doc${i}.md`,
  712. });
  713. }
  714. const results = store.searchFTS("common keyword", 3);
  715. expect(results).toHaveLength(3);
  716. await cleanupTestDb(store);
  717. });
  718. test("searchFTS filters by collection name", async () => {
  719. const store = await createTestStore();
  720. const collection1 = await createTestCollection({ pwd: "/path/one", glob: "**/*.md", name: "one" });
  721. const collection2 = await createTestCollection({ pwd: "/path/two", glob: "**/*.md", name: "two" });
  722. await insertTestDocument(store.db, collection1, {
  723. name: "doc1",
  724. body: "searchable content",
  725. displayPath: "doc1.md",
  726. });
  727. await insertTestDocument(store.db, collection2, {
  728. name: "doc2",
  729. body: "searchable content",
  730. displayPath: "doc2.md",
  731. });
  732. const allResults = store.searchFTS("searchable", 10);
  733. expect(allResults).toHaveLength(2);
  734. // Filter by collection name (collectionId is now treated as collection name string)
  735. const filtered = store.searchFTS("searchable", 10, collection1 as unknown as number);
  736. expect(filtered).toHaveLength(1);
  737. expect(filtered[0]!.displayPath).toBe(`${collection1}/doc1.md`);
  738. await cleanupTestDb(store);
  739. });
  740. test("searchFTS handles special characters in query", async () => {
  741. const store = await createTestStore();
  742. const collectionName = await createTestCollection();
  743. await insertTestDocument(store.db, collectionName, {
  744. name: "doc1",
  745. body: "Function with params: foo(bar, baz)",
  746. displayPath: "test/doc1.md",
  747. });
  748. // Should not throw on special characters
  749. const results = store.searchFTS("foo(bar)", 10);
  750. // Results may vary based on FTS5 handling
  751. expect(Array.isArray(results)).toBe(true);
  752. await cleanupTestDb(store);
  753. });
  754. test("searchFTS ignores inactive documents", async () => {
  755. const store = await createTestStore();
  756. const collectionName = await createTestCollection();
  757. await insertTestDocument(store.db, collectionName, {
  758. name: "active",
  759. body: "findme content",
  760. displayPath: "test/active.md",
  761. active: 1,
  762. });
  763. await insertTestDocument(store.db, collectionName, {
  764. name: "inactive",
  765. body: "findme content",
  766. displayPath: "test/inactive.md",
  767. active: 0,
  768. });
  769. const results = store.searchFTS("findme", 10);
  770. expect(results).toHaveLength(1);
  771. expect(results[0]!.displayPath).toBe(`${collectionName}/test/active.md`);
  772. expect(results[0]!.filepath).toBe(`qmd://${collectionName}/test/active.md`);
  773. await cleanupTestDb(store);
  774. });
  775. });
  776. // =============================================================================
  777. // Document Retrieval Tests
  778. // =============================================================================
  779. describe("Document Retrieval", () => {
  780. describe("findDocument", () => {
  781. test("findDocument finds by exact filepath", async () => {
  782. const store = await createTestStore();
  783. const collectionName = await createTestCollection({ pwd: "/exact/path", glob: "**/*.md" });
  784. await insertTestDocument(store.db, collectionName, {
  785. name: "mydoc",
  786. title: "My Document",
  787. displayPath: "mydoc.md",
  788. body: "Document content here",
  789. });
  790. const result = store.findDocument("/exact/path/mydoc.md");
  791. expect("error" in result).toBe(false);
  792. if (!("error" in result)) {
  793. expect(result.title).toBe("My Document");
  794. expect(result.displayPath).toBe(`${collectionName}/mydoc.md`);
  795. expect(result.filepath).toBe(`qmd://${collectionName}/mydoc.md`);
  796. expect(result.body).toBeUndefined(); // body not included by default
  797. }
  798. await cleanupTestDb(store);
  799. });
  800. test("findDocument finds by display_path", async () => {
  801. const store = await createTestStore();
  802. const collectionName = await createTestCollection({ pwd: "/some/path", glob: "**/*.md" });
  803. await insertTestDocument(store.db, collectionName, {
  804. name: "mydoc",
  805. displayPath: "docs/mydoc.md",
  806. });
  807. const result = store.findDocument("docs/mydoc.md");
  808. expect("error" in result).toBe(false);
  809. await cleanupTestDb(store);
  810. });
  811. test("findDocument finds by partial path match", async () => {
  812. const store = await createTestStore();
  813. const collectionName = await createTestCollection({ pwd: "/very/long/path/to", glob: "**/*.md" });
  814. await insertTestDocument(store.db, collectionName, {
  815. name: "mydoc",
  816. displayPath: "mydoc.md",
  817. });
  818. const result = store.findDocument("mydoc.md");
  819. expect("error" in result).toBe(false);
  820. await cleanupTestDb(store);
  821. });
  822. test("findDocument includes body when requested", async () => {
  823. const store = await createTestStore();
  824. const collectionName = await createTestCollection({ pwd: "/path", glob: "**/*.md" });
  825. await insertTestDocument(store.db, collectionName, {
  826. name: "mydoc",
  827. displayPath: "mydoc.md",
  828. body: "The actual body content",
  829. });
  830. const result = store.findDocument("/path/mydoc.md", { includeBody: true });
  831. expect("error" in result).toBe(false);
  832. if (!("error" in result)) {
  833. expect(result.body).toBe("The actual body content");
  834. }
  835. await cleanupTestDb(store);
  836. });
  837. test("findDocument returns error with suggestions for not found", async () => {
  838. const store = await createTestStore();
  839. const collectionName = await createTestCollection();
  840. await insertTestDocument(store.db, collectionName, {
  841. name: "similar",
  842. filepath: "/path/similar.md",
  843. displayPath: "similar.md",
  844. });
  845. const result = store.findDocument("simlar.md"); // typo - 1 char diff
  846. expect("error" in result).toBe(true);
  847. if ("error" in result) {
  848. expect(result.error).toBe("not_found");
  849. // Levenshtein distance of 1 should be found with maxDistance 3
  850. expect(result.similarFiles.length).toBeGreaterThanOrEqual(0); // May or may not find depending on distance calc
  851. }
  852. await cleanupTestDb(store);
  853. });
  854. test("findDocument handles :line suffix", async () => {
  855. const store = await createTestStore();
  856. const collectionName = await createTestCollection();
  857. await insertTestDocument(store.db, collectionName, {
  858. name: "mydoc",
  859. filepath: "/path/mydoc.md",
  860. displayPath: "mydoc.md",
  861. });
  862. const result = store.findDocument("mydoc.md:100");
  863. expect("error" in result).toBe(false);
  864. await cleanupTestDb(store);
  865. });
  866. test("findDocument expands ~ to home directory", async () => {
  867. const store = await createTestStore();
  868. const home = homedir();
  869. const collectionName = await createTestCollection({ pwd: home, name: "home" });
  870. await insertTestDocument(store.db, collectionName, {
  871. name: "mydoc",
  872. filepath: `${home}/docs/mydoc.md`,
  873. displayPath: "docs/mydoc.md",
  874. });
  875. const result = store.findDocument("~/docs/mydoc.md");
  876. expect("error" in result).toBe(false);
  877. await cleanupTestDb(store);
  878. });
  879. test("findDocument includes context from path_contexts", async () => {
  880. const store = await createTestStore();
  881. const collectionName = await createTestCollection({ pwd: "/path" });
  882. await addPathContext(collectionName, "docs", "Documentation");
  883. await insertTestDocument(store.db, collectionName, {
  884. name: "mydoc",
  885. displayPath: "docs/mydoc.md",
  886. });
  887. const result = store.findDocument("/path/docs/mydoc.md");
  888. expect("error" in result).toBe(false);
  889. if (!("error" in result)) {
  890. expect(result.context).toBe("Documentation");
  891. }
  892. await cleanupTestDb(store);
  893. });
  894. test("findDocument includes hierarchical contexts (global + collection + path)", async () => {
  895. const store = await createTestStore();
  896. const collectionName = await createTestCollection({ pwd: "/archive", name: "archive" });
  897. // Add global context
  898. await addGlobalContext("Global context for all documents");
  899. // Add collection root context
  900. await addPathContext(collectionName, "/", "Archive collection context");
  901. // Add path-specific contexts at different levels
  902. await addPathContext(collectionName, "/podcasts", "Podcast episodes");
  903. await addPathContext(collectionName, "/podcasts/external", "External podcast interviews");
  904. // Insert document in nested path
  905. await insertTestDocument(store.db, collectionName, {
  906. name: "interview",
  907. displayPath: "podcasts/external/2024-jan-interview.md",
  908. });
  909. const result = store.findDocument("/archive/podcasts/external/2024-jan-interview.md");
  910. expect("error" in result).toBe(false);
  911. if (!("error" in result)) {
  912. // Should have all contexts joined with double newlines
  913. expect(result.context).toBe(
  914. "Global context for all documents\n\n" +
  915. "Archive collection context\n\n" +
  916. "Podcast episodes\n\n" +
  917. "External podcast interviews"
  918. );
  919. }
  920. await cleanupTestDb(store);
  921. });
  922. });
  923. describe("getDocumentBody", () => {
  924. test("getDocumentBody returns full body", async () => {
  925. const store = await createTestStore();
  926. const collectionName = await createTestCollection({ pwd: "/path" });
  927. await insertTestDocument(store.db, collectionName, {
  928. name: "mydoc",
  929. displayPath: "mydoc.md",
  930. body: "Line 1\nLine 2\nLine 3\nLine 4\nLine 5",
  931. });
  932. const body = store.getDocumentBody({ filepath: "/path/mydoc.md" });
  933. expect(body).toBe("Line 1\nLine 2\nLine 3\nLine 4\nLine 5");
  934. await cleanupTestDb(store);
  935. });
  936. test("getDocumentBody supports line range", async () => {
  937. const store = await createTestStore();
  938. const collectionName = await createTestCollection({ pwd: "/path" });
  939. await insertTestDocument(store.db, collectionName, {
  940. name: "mydoc",
  941. displayPath: "mydoc.md",
  942. body: "Line 1\nLine 2\nLine 3\nLine 4\nLine 5",
  943. });
  944. const body = store.getDocumentBody({ filepath: "/path/mydoc.md" }, 2, 2);
  945. expect(body).toBe("Line 2\nLine 3");
  946. await cleanupTestDb(store);
  947. });
  948. test("getDocumentBody returns null for non-existent document", async () => {
  949. const store = await createTestStore();
  950. const body = store.getDocumentBody({ filepath: "/nonexistent.md" });
  951. expect(body).toBeNull();
  952. await cleanupTestDb(store);
  953. });
  954. });
  955. describe("findDocuments (multi-get)", () => {
  956. test("findDocuments finds by glob pattern", async () => {
  957. const store = await createTestStore();
  958. const collectionName = await createTestCollection();
  959. await insertTestDocument(store.db, collectionName, {
  960. name: "doc1",
  961. filepath: "/path/journals/2024-01.md",
  962. displayPath: "journals/2024-01.md",
  963. });
  964. await insertTestDocument(store.db, collectionName, {
  965. name: "doc2",
  966. filepath: "/path/journals/2024-02.md",
  967. displayPath: "journals/2024-02.md",
  968. });
  969. await insertTestDocument(store.db, collectionName, {
  970. name: "doc3",
  971. filepath: "/path/other/file.md",
  972. displayPath: "other/file.md",
  973. });
  974. const { docs, errors } = store.findDocuments("journals/2024-*.md");
  975. expect(errors).toHaveLength(0);
  976. expect(docs).toHaveLength(2);
  977. await cleanupTestDb(store);
  978. });
  979. test("findDocuments finds by comma-separated list", async () => {
  980. const store = await createTestStore();
  981. const collectionName = await createTestCollection();
  982. await insertTestDocument(store.db, collectionName, {
  983. name: "doc1",
  984. filepath: "/path/doc1.md",
  985. displayPath: "doc1.md",
  986. });
  987. await insertTestDocument(store.db, collectionName, {
  988. name: "doc2",
  989. filepath: "/path/doc2.md",
  990. displayPath: "doc2.md",
  991. });
  992. const { docs, errors } = store.findDocuments("doc1.md, doc2.md");
  993. expect(errors).toHaveLength(0);
  994. expect(docs).toHaveLength(2);
  995. await cleanupTestDb(store);
  996. });
  997. test("findDocuments reports errors for not found files", async () => {
  998. const store = await createTestStore();
  999. const collectionName = await createTestCollection();
  1000. await insertTestDocument(store.db, collectionName, {
  1001. name: "doc1",
  1002. filepath: "/path/doc1.md",
  1003. displayPath: "doc1.md",
  1004. });
  1005. const { docs, errors } = store.findDocuments("doc1.md, nonexistent.md");
  1006. expect(docs).toHaveLength(1);
  1007. expect(errors).toHaveLength(1);
  1008. expect(errors[0]).toContain("not found");
  1009. await cleanupTestDb(store);
  1010. });
  1011. test("findDocuments skips large files", async () => {
  1012. const store = await createTestStore();
  1013. const collectionName = await createTestCollection();
  1014. await insertTestDocument(store.db, collectionName, {
  1015. name: "large",
  1016. filepath: "/path/large.md",
  1017. displayPath: "large.md",
  1018. body: "x".repeat(20000), // 20KB
  1019. });
  1020. const { docs } = store.findDocuments("large.md", { maxBytes: 10000 });
  1021. expect(docs).toHaveLength(1);
  1022. expect(docs[0]!.skipped).toBe(true);
  1023. if (docs[0]!.skipped) {
  1024. expect((docs[0] as { skipped: true; skipReason: string }).skipReason).toContain("too large");
  1025. }
  1026. await cleanupTestDb(store);
  1027. });
  1028. test("findDocuments includes body when requested", async () => {
  1029. const store = await createTestStore();
  1030. const collectionName = await createTestCollection();
  1031. await insertTestDocument(store.db, collectionName, {
  1032. name: "doc1",
  1033. filepath: "/path/doc1.md",
  1034. displayPath: "doc1.md",
  1035. body: "The content",
  1036. });
  1037. const { docs } = store.findDocuments("doc1.md", { includeBody: true });
  1038. expect(docs[0]!.skipped).toBe(false);
  1039. if (!docs[0]!.skipped) {
  1040. expect((docs[0] as { doc: { body: string }; skipped: false }).doc.body).toBe("The content");
  1041. }
  1042. await cleanupTestDb(store);
  1043. });
  1044. });
  1045. });
  1046. // =============================================================================
  1047. // Snippet Extraction Tests
  1048. // =============================================================================
  1049. describe("Snippet Extraction", () => {
  1050. test("extractSnippet finds query terms", () => {
  1051. const body = "First line.\nSecond line with keyword.\nThird line.\nFourth line.";
  1052. const { line, snippet } = extractSnippet(body, "keyword", 500);
  1053. expect(line).toBe(2); // Line 2 contains "keyword"
  1054. expect(snippet).toContain("keyword");
  1055. });
  1056. test("extractSnippet includes context lines", () => {
  1057. const body = "Line 1\nLine 2\nLine 3 has keyword\nLine 4\nLine 5";
  1058. const { snippet } = extractSnippet(body, "keyword", 500);
  1059. expect(snippet).toContain("Line 2"); // Context before
  1060. expect(snippet).toContain("Line 3 has keyword");
  1061. expect(snippet).toContain("Line 4"); // Context after
  1062. });
  1063. test("extractSnippet respects maxLen for content", () => {
  1064. const body = "A".repeat(1000);
  1065. const result = extractSnippet(body, "query", 100);
  1066. // Snippet includes header + content, content should be truncated
  1067. expect(result.snippet).toContain("@@"); // Has diff header
  1068. expect(result.snippet).toContain("..."); // Content was truncated
  1069. });
  1070. test("extractSnippet uses chunkPos hint", () => {
  1071. const body = "First section...\n".repeat(50) + "Target keyword here\n" + "More content...".repeat(50);
  1072. const chunkPos = body.indexOf("Target keyword");
  1073. const { snippet } = extractSnippet(body, "Target", 200, chunkPos);
  1074. expect(snippet).toContain("Target keyword");
  1075. });
  1076. test("extractSnippet returns beginning when no match", () => {
  1077. const body = "First line\nSecond line\nThird line";
  1078. const { line, snippet } = extractSnippet(body, "nonexistent", 500);
  1079. expect(line).toBe(1);
  1080. expect(snippet).toContain("First line");
  1081. });
  1082. test("extractSnippet includes diff-style header", () => {
  1083. const body = "Line 1\nLine 2\nLine 3 has keyword\nLine 4\nLine 5";
  1084. const { snippet, linesBefore, linesAfter, snippetLines } = extractSnippet(body, "keyword", 500);
  1085. // Header should show line position and context info
  1086. expect(snippet).toMatch(/^@@ -\d+,\d+ @@ \(\d+ before, \d+ after\)/);
  1087. expect(linesBefore).toBe(1); // Line 1 comes before
  1088. expect(linesAfter).toBe(0); // Snippet includes to end (lines 2-5)
  1089. expect(snippetLines).toBe(4); // Lines 2, 3, 4, 5
  1090. });
  1091. test("extractSnippet calculates linesBefore and linesAfter correctly", () => {
  1092. const body = "L1\nL2\nL3\nL4 match\nL5\nL6\nL7\nL8\nL9\nL10";
  1093. const { linesBefore, linesAfter, snippetLines, line } = extractSnippet(body, "match", 500);
  1094. expect(line).toBe(4); // "L4 match" is line 4
  1095. expect(linesBefore).toBe(2); // L1, L2 before snippet (snippet starts at L3)
  1096. expect(snippetLines).toBe(4); // L3, L4, L5, L6
  1097. expect(linesAfter).toBe(4); // L7, L8, L9, L10 after snippet
  1098. });
  1099. test("extractSnippet header format matches diff style", () => {
  1100. const body = "A\nB\nC keyword\nD\nE\nF\nG\nH";
  1101. const { snippet } = extractSnippet(body, "keyword", 500);
  1102. // Should start with @@ -line,count @@ (N before, M after)
  1103. const headerMatch = snippet.match(/^@@ -(\d+),(\d+) @@ \((\d+) before, (\d+) after\)/);
  1104. expect(headerMatch).not.toBeNull();
  1105. const [, startLine, count, before, after] = headerMatch!;
  1106. expect(parseInt(startLine!)).toBe(2); // Snippet starts at line 2 (B)
  1107. expect(parseInt(count!)).toBe(4); // 4 lines: B, C keyword, D, E
  1108. expect(parseInt(before!)).toBe(1); // A is before
  1109. expect(parseInt(after!)).toBe(3); // F, G, H are after
  1110. });
  1111. test("extractSnippet at document start shows 0 before", () => {
  1112. const body = "First line keyword\nSecond\nThird\nFourth\nFifth";
  1113. const { linesBefore, linesAfter, snippetLines, line } = extractSnippet(body, "keyword", 500);
  1114. expect(line).toBe(1); // Keyword on first line
  1115. expect(linesBefore).toBe(0); // Nothing before
  1116. expect(snippetLines).toBe(3); // First, Second, Third (bestLine-1 to bestLine+3, clamped)
  1117. expect(linesAfter).toBe(2); // Fourth, Fifth
  1118. });
  1119. test("extractSnippet at document end shows 0 after", () => {
  1120. const body = "First\nSecond\nThird\nFourth\nFifth keyword";
  1121. const { linesBefore, linesAfter, snippetLines, line } = extractSnippet(body, "keyword", 500);
  1122. expect(line).toBe(5); // Keyword on last line
  1123. expect(linesBefore).toBe(3); // First, Second, Third before snippet
  1124. expect(snippetLines).toBe(2); // Fourth, Fifth keyword (bestLine-1 to bestLine+3, clamped)
  1125. expect(linesAfter).toBe(0); // Nothing after
  1126. });
  1127. test("extractSnippet with single line document", () => {
  1128. const body = "Single line with keyword";
  1129. const { linesBefore, linesAfter, snippetLines, snippet } = extractSnippet(body, "keyword", 500);
  1130. expect(linesBefore).toBe(0);
  1131. expect(linesAfter).toBe(0);
  1132. expect(snippetLines).toBe(1);
  1133. expect(snippet).toContain("@@ -1,1 @@ (0 before, 0 after)");
  1134. expect(snippet).toContain("Single line with keyword");
  1135. });
  1136. test("extractSnippet with chunkPos adjusts line numbers correctly", () => {
  1137. // 50 lines of padding, then keyword, then more content
  1138. const padding = "Padding line\n".repeat(50);
  1139. const body = padding + "Target keyword here\nMore content\nEven more";
  1140. const chunkPos = padding.length; // Position of "Target keyword"
  1141. const { line, linesBefore, linesAfter } = extractSnippet(body, "keyword", 200, chunkPos);
  1142. expect(line).toBe(51); // "Target keyword" is line 51
  1143. expect(linesBefore).toBeGreaterThan(40); // Many lines before
  1144. });
  1145. });
  1146. // =============================================================================
  1147. // Reciprocal Rank Fusion Tests
  1148. // =============================================================================
  1149. describe("Reciprocal Rank Fusion", () => {
  1150. const makeResult = (file: string, score: number): RankedResult => ({
  1151. file,
  1152. displayPath: file,
  1153. title: file,
  1154. body: "body",
  1155. score,
  1156. });
  1157. test("RRF combines single list correctly", () => {
  1158. const list1 = [
  1159. makeResult("doc1", 0.9),
  1160. makeResult("doc2", 0.8),
  1161. makeResult("doc3", 0.7),
  1162. ];
  1163. const fused = reciprocalRankFusion([list1]);
  1164. // Order should be preserved
  1165. expect(fused[0]!.file).toBe("doc1");
  1166. expect(fused[1]!.file).toBe("doc2");
  1167. expect(fused[2]!.file).toBe("doc3");
  1168. });
  1169. test("RRF merges documents from multiple lists", () => {
  1170. const list1 = [makeResult("doc1", 0.9), makeResult("doc2", 0.8)];
  1171. const list2 = [makeResult("doc2", 0.95), makeResult("doc3", 0.85)];
  1172. const fused = reciprocalRankFusion([list1, list2]);
  1173. // doc2 appears in both lists, should have higher combined score
  1174. expect(fused.find(r => r.file === "doc2")).toBeDefined();
  1175. expect(fused.find(r => r.file === "doc1")).toBeDefined();
  1176. expect(fused.find(r => r.file === "doc3")).toBeDefined();
  1177. });
  1178. test("RRF respects weights", () => {
  1179. const list1 = [makeResult("doc1", 0.9)];
  1180. const list2 = [makeResult("doc2", 0.9)];
  1181. // Give double weight to list1
  1182. const fused = reciprocalRankFusion([list1, list2], [2.0, 1.0]);
  1183. // doc1 should rank higher due to weight
  1184. expect(fused[0]!.file).toBe("doc1");
  1185. });
  1186. test("RRF adds top-rank bonus", () => {
  1187. // doc1 is #1 in list1, doc2 is #2 in list1
  1188. const list1 = [makeResult("doc1", 0.9), makeResult("doc2", 0.8)];
  1189. const list2 = [makeResult("doc3", 0.85)];
  1190. const fused = reciprocalRankFusion([list1, list2]);
  1191. // doc1 should get +0.05 bonus for being #1
  1192. // doc2 should get +0.02 bonus for being #2-3
  1193. const doc1 = fused.find(r => r.file === "doc1");
  1194. const doc2 = fused.find(r => r.file === "doc2");
  1195. expect(doc1!.score).toBeGreaterThan(doc2!.score);
  1196. });
  1197. test("RRF handles empty lists", () => {
  1198. const fused = reciprocalRankFusion([[], []]);
  1199. expect(fused).toHaveLength(0);
  1200. });
  1201. test("RRF uses k parameter correctly", () => {
  1202. const list = [makeResult("doc1", 0.9)];
  1203. // With different k values, scores should differ
  1204. const fused60 = reciprocalRankFusion([list], [], 60);
  1205. const fused30 = reciprocalRankFusion([list], [], 30);
  1206. // Lower k = higher scores for top ranks
  1207. expect(fused30[0]!.score).toBeGreaterThan(fused60[0]!.score);
  1208. });
  1209. });
  1210. // =============================================================================
  1211. // Index Status Tests
  1212. // =============================================================================
  1213. describe("Index Status", () => {
  1214. test("getStatus returns correct structure", async () => {
  1215. const store = await createTestStore();
  1216. const status = store.getStatus();
  1217. expect(status).toHaveProperty("totalDocuments");
  1218. expect(status).toHaveProperty("needsEmbedding");
  1219. expect(status).toHaveProperty("hasVectorIndex");
  1220. expect(status).toHaveProperty("collections");
  1221. expect(Array.isArray(status.collections)).toBe(true);
  1222. await cleanupTestDb(store);
  1223. });
  1224. test("getStatus counts documents correctly", async () => {
  1225. const store = await createTestStore();
  1226. const collectionName = await createTestCollection();
  1227. await insertTestDocument(store.db, collectionName, { name: "doc1", active: 1 });
  1228. await insertTestDocument(store.db, collectionName, { name: "doc2", active: 1 });
  1229. await insertTestDocument(store.db, collectionName, { name: "doc3", active: 0 }); // inactive
  1230. const status = store.getStatus();
  1231. expect(status.totalDocuments).toBe(2); // Only active docs
  1232. await cleanupTestDb(store);
  1233. });
  1234. test("getStatus reports collection info", async () => {
  1235. const store = await createTestStore();
  1236. const collectionName = await createTestCollection({ pwd: "/test/path", glob: "**/*.md" });
  1237. await insertTestDocument(store.db, collectionName, { name: "doc1" });
  1238. const status = store.getStatus();
  1239. expect(status.collections.length).toBeGreaterThanOrEqual(1);
  1240. const col = status.collections.find(c => c.name === collectionName);
  1241. expect(col).toBeDefined();
  1242. expect(col?.path).toBe("/test/path");
  1243. expect(col?.pattern).toBe("**/*.md");
  1244. expect(col?.documents).toBe(1);
  1245. await cleanupTestDb(store);
  1246. });
  1247. test("getHashesNeedingEmbedding counts correctly", async () => {
  1248. const store = await createTestStore();
  1249. const collectionName = await createTestCollection();
  1250. // Add documents with different hashes
  1251. await insertTestDocument(store.db, collectionName, { name: "doc1", hash: "hash1" });
  1252. await insertTestDocument(store.db, collectionName, { name: "doc2", hash: "hash2" });
  1253. await insertTestDocument(store.db, collectionName, { name: "doc3", hash: "hash1" }); // same hash as doc1
  1254. const needsEmbedding = store.getHashesNeedingEmbedding();
  1255. expect(needsEmbedding).toBe(2); // hash1 and hash2
  1256. await cleanupTestDb(store);
  1257. });
  1258. test("getIndexHealth returns health info", async () => {
  1259. const store = await createTestStore();
  1260. const collectionName = await createTestCollection();
  1261. await insertTestDocument(store.db, collectionName, { name: "doc1" });
  1262. const health = store.getIndexHealth();
  1263. expect(health).toHaveProperty("needsEmbedding");
  1264. expect(health).toHaveProperty("totalDocs");
  1265. expect(health).toHaveProperty("daysStale");
  1266. expect(health.totalDocs).toBe(1);
  1267. await cleanupTestDb(store);
  1268. });
  1269. });
  1270. // =============================================================================
  1271. // Fuzzy Matching Tests
  1272. // =============================================================================
  1273. describe("Fuzzy Matching", () => {
  1274. test("findSimilarFiles finds similar paths", async () => {
  1275. const store = await createTestStore();
  1276. const collectionName = await createTestCollection();
  1277. await insertTestDocument(store.db, collectionName, {
  1278. name: "readme",
  1279. displayPath: "docs/readme.md",
  1280. });
  1281. await insertTestDocument(store.db, collectionName, {
  1282. name: "readmi",
  1283. displayPath: "docs/readmi.md", // typo
  1284. });
  1285. const similar = store.findSimilarFiles("docs/readme.md", 3, 5);
  1286. expect(similar).toContain("docs/readme.md");
  1287. await cleanupTestDb(store);
  1288. });
  1289. test("findSimilarFiles respects maxDistance", async () => {
  1290. const store = await createTestStore();
  1291. const collectionName = await createTestCollection();
  1292. await insertTestDocument(store.db, collectionName, {
  1293. name: "abc",
  1294. displayPath: "abc.md",
  1295. });
  1296. await insertTestDocument(store.db, collectionName, {
  1297. name: "xyz",
  1298. displayPath: "xyz.md", // very different
  1299. });
  1300. const similar = store.findSimilarFiles("abc.md", 1, 5); // max distance 1
  1301. expect(similar).toContain("abc.md");
  1302. expect(similar).not.toContain("xyz.md");
  1303. await cleanupTestDb(store);
  1304. });
  1305. test("matchFilesByGlob matches patterns", async () => {
  1306. const store = await createTestStore();
  1307. const collectionName = await createTestCollection();
  1308. await insertTestDocument(store.db, collectionName, {
  1309. filepath: "/p/journals/2024-01.md",
  1310. displayPath: "journals/2024-01.md",
  1311. });
  1312. await insertTestDocument(store.db, collectionName, {
  1313. filepath: "/p/journals/2024-02.md",
  1314. displayPath: "journals/2024-02.md",
  1315. });
  1316. await insertTestDocument(store.db, collectionName, {
  1317. filepath: "/p/docs/readme.md",
  1318. displayPath: "docs/readme.md",
  1319. });
  1320. const matches = store.matchFilesByGlob("journals/*.md");
  1321. expect(matches).toHaveLength(2);
  1322. expect(matches.every(m => m.displayPath.startsWith("journals/"))).toBe(true);
  1323. await cleanupTestDb(store);
  1324. });
  1325. });
  1326. // =============================================================================
  1327. // Vector Table Tests
  1328. // =============================================================================
  1329. describe("Vector Table", () => {
  1330. test("ensureVecTable creates vector table", async () => {
  1331. const store = await createTestStore();
  1332. // Initially no vector table
  1333. let exists = store.db.prepare(`
  1334. SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'
  1335. `).get();
  1336. expect(exists).toBeFalsy(); // null or undefined
  1337. // Create vector table
  1338. store.ensureVecTable(768);
  1339. exists = store.db.prepare(`
  1340. SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'
  1341. `).get();
  1342. expect(exists).toBeTruthy();
  1343. await cleanupTestDb(store);
  1344. });
  1345. test("ensureVecTable recreates table if dimensions change", async () => {
  1346. const store = await createTestStore();
  1347. // Create with 768 dimensions
  1348. store.ensureVecTable(768);
  1349. // Check dimensions
  1350. let tableInfo = store.db.prepare(`
  1351. SELECT sql FROM sqlite_master WHERE type='table' AND name='vectors_vec'
  1352. `).get() as { sql: string };
  1353. expect(tableInfo.sql).toContain("float[768]");
  1354. // Recreate with different dimensions
  1355. store.ensureVecTable(1024);
  1356. tableInfo = store.db.prepare(`
  1357. SELECT sql FROM sqlite_master WHERE type='table' AND name='vectors_vec'
  1358. `).get() as { sql: string };
  1359. expect(tableInfo.sql).toContain("float[1024]");
  1360. await cleanupTestDb(store);
  1361. });
  1362. });
  1363. // =============================================================================
  1364. // Integration Tests
  1365. // =============================================================================
  1366. describe("Integration", () => {
  1367. test("full document lifecycle: create, search, retrieve", async () => {
  1368. const store = await createTestStore();
  1369. const collectionName = await createTestCollection({ pwd: "/test/notes", glob: "**/*.md" });
  1370. // Add context - use "/" for collection root
  1371. await addPathContext(collectionName, "/", "Personal notes");
  1372. // Insert documents
  1373. await insertTestDocument(store.db, collectionName, {
  1374. name: "meeting",
  1375. title: "Team Meeting Notes",
  1376. filepath: "/test/notes/meeting.md",
  1377. displayPath: "notes/meeting.md",
  1378. body: "# Team Meeting Notes\n\nDiscussed project timeline and deliverables.",
  1379. });
  1380. await insertTestDocument(store.db, collectionName, {
  1381. name: "ideas",
  1382. title: "Project Ideas",
  1383. filepath: "/test/notes/ideas.md",
  1384. displayPath: "notes/ideas.md",
  1385. body: "# Project Ideas\n\nBrainstorming new features for the product.",
  1386. });
  1387. // Search
  1388. const searchResults = store.searchFTS("project", 10);
  1389. expect(searchResults.length).toBe(2);
  1390. // Status - SKIPPED: getStatus() has bug (queries non-existent collections table)
  1391. // const status = store.getStatus();
  1392. // expect(status.totalDocuments).toBe(2);
  1393. // expect(status.collections).toHaveLength(1);
  1394. // Retrieve single document
  1395. const doc = store.findDocument("notes/meeting.md", { includeBody: true });
  1396. expect("error" in doc).toBe(false);
  1397. if (!("error" in doc)) {
  1398. expect(doc.title).toBe("Team Meeting Notes");
  1399. expect(doc.context).toBe("Personal notes");
  1400. expect(doc.body).toContain("Team Meeting");
  1401. }
  1402. // Multi-get
  1403. const { docs, errors } = store.findDocuments("notes/*.md", { includeBody: true });
  1404. expect(errors).toHaveLength(0);
  1405. expect(docs).toHaveLength(2);
  1406. await cleanupTestDb(store);
  1407. });
  1408. test("multiple stores can operate independently", async () => {
  1409. const store1 = await createTestStore();
  1410. const store2 = await createTestStore();
  1411. const col1 = await createTestCollection({ pwd: "/store1", glob: "**/*.md", name: "store1" });
  1412. const col2 = await createTestCollection({ pwd: "/store2", glob: "**/*.md", name: "store2" });
  1413. await insertTestDocument(store1.db, col1, {
  1414. name: "doc1",
  1415. body: "unique content for store1",
  1416. displayPath: "doc.md",
  1417. });
  1418. await insertTestDocument(store2.db, col2, {
  1419. name: "doc2",
  1420. body: "different content for store2",
  1421. displayPath: "doc.md",
  1422. });
  1423. // Each store should only see its own documents
  1424. const results1 = store1.searchFTS("unique", 10);
  1425. const results2 = store2.searchFTS("different", 10);
  1426. expect(results1).toHaveLength(1);
  1427. expect(results1[0]!.displayPath).toBe("store1/doc.md");
  1428. expect(results1[0]!.filepath).toBe("qmd://store1/doc.md");
  1429. expect(results2).toHaveLength(1);
  1430. expect(results2[0]!.displayPath).toBe("store2/doc.md");
  1431. expect(results2[0]!.filepath).toBe("qmd://store2/doc.md");
  1432. // Cross-check: store1 shouldn't find store2's content
  1433. const cross1 = store1.searchFTS("different", 10);
  1434. const cross2 = store2.searchFTS("unique", 10);
  1435. expect(cross1).toHaveLength(0);
  1436. expect(cross2).toHaveLength(0);
  1437. await cleanupTestDb(store1);
  1438. await cleanupTestDb(store2);
  1439. });
  1440. });
  1441. // =============================================================================
  1442. // LlamaCpp Integration Tests (using real local models)
  1443. // =============================================================================
  1444. describe("LlamaCpp Integration", () => {
  1445. test("searchVec returns empty when no vector index", async () => {
  1446. const store = await createTestStore();
  1447. const collectionName = await createTestCollection();
  1448. await insertTestDocument(store.db, collectionName, {
  1449. name: "doc1",
  1450. body: "Some content",
  1451. });
  1452. // No vectors_vec table exists, should return empty
  1453. const results = await store.searchVec("query", "embeddinggemma", 10);
  1454. expect(results).toHaveLength(0);
  1455. await cleanupTestDb(store);
  1456. });
  1457. test("searchVec returns results when vector index exists", async () => {
  1458. const store = await createTestStore();
  1459. const collectionName = await createTestCollection();
  1460. const hash = "testhash123";
  1461. await insertTestDocument(store.db, collectionName, {
  1462. name: "doc1",
  1463. hash,
  1464. body: "Some content about testing",
  1465. filepath: "/test/doc1.md",
  1466. displayPath: "doc1.md",
  1467. });
  1468. // Create vector table and insert a vector
  1469. store.ensureVecTable(768);
  1470. const embedding = Array(768).fill(0).map(() => Math.random());
  1471. store.db.prepare(`INSERT INTO content_vectors (hash, seq, pos, model, embedded_at) VALUES (?, 0, 0, 'test', ?)`).run(hash, new Date().toISOString());
  1472. store.db.prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`).run(`${hash}_0`, new Float32Array(embedding));
  1473. const results = await store.searchVec("test query", "embeddinggemma", 10);
  1474. expect(results).toHaveLength(1);
  1475. expect(results[0]!.displayPath).toBe(`${collectionName}/doc1.md`);
  1476. expect(results[0]!.filepath).toBe(`qmd://${collectionName}/doc1.md`);
  1477. expect(results[0]!.source).toBe("vec");
  1478. await cleanupTestDb(store);
  1479. });
  1480. test("searchVec filters by collection name", async () => {
  1481. const store = await createTestStore();
  1482. const collection1 = await createTestCollection({ name: "coll1", pwd: "/test/coll1" });
  1483. const collection2 = await createTestCollection({ name: "coll2", pwd: "/test/coll2" });
  1484. const hash1 = "hash1abc";
  1485. const hash2 = "hash2xyz";
  1486. await insertTestDocument(store.db, collection1, {
  1487. name: "doc1",
  1488. hash: hash1,
  1489. body: "Content in collection one",
  1490. });
  1491. await insertTestDocument(store.db, collection2, {
  1492. name: "doc2",
  1493. hash: hash2,
  1494. body: "Content in collection two",
  1495. });
  1496. // Create vectors_vec table with correct dimensions (768 for embeddinggemma)
  1497. store.ensureVecTable(768);
  1498. const embedding1 = Array(768).fill(0).map(() => Math.random());
  1499. const embedding2 = Array(768).fill(0).map(() => Math.random());
  1500. store.db.prepare(`INSERT INTO content_vectors (hash, seq, pos, model, embedded_at) VALUES (?, 0, 0, 'test', ?)`).run(hash1, new Date().toISOString());
  1501. store.db.prepare(`INSERT INTO content_vectors (hash, seq, pos, model, embedded_at) VALUES (?, 0, 0, 'test', ?)`).run(hash2, new Date().toISOString());
  1502. store.db.prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`).run(`${hash1}_0`, new Float32Array(embedding1));
  1503. store.db.prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`).run(`${hash2}_0`, new Float32Array(embedding2));
  1504. // Search without filter - should return both
  1505. const allResults = await store.searchVec("content", "embeddinggemma", 10);
  1506. expect(allResults).toHaveLength(2);
  1507. // Search with collection filter - should return only from collection1
  1508. const filtered = await store.searchVec("content", "embeddinggemma", 10, collection1 as unknown as number);
  1509. expect(filtered).toHaveLength(1);
  1510. expect(filtered[0]!.collectionName).toBe(collection1);
  1511. await cleanupTestDb(store);
  1512. });
  1513. // Regression test for https://github.com/tobi/qmd/pull/23
  1514. // sqlite-vec virtual tables hang when combined with JOINs in the same query.
  1515. // The fix uses a two-step approach: vector query first, then separate JOINs.
  1516. test("searchVec uses two-step query to avoid sqlite-vec JOIN hang", async () => {
  1517. const store = await createTestStore();
  1518. const collectionName = await createTestCollection();
  1519. const hash = "regression_test_hash";
  1520. await insertTestDocument(store.db, collectionName, {
  1521. name: "regression-doc",
  1522. hash,
  1523. body: "Test content for vector search regression",
  1524. filepath: "/test/regression.md",
  1525. displayPath: "regression.md",
  1526. });
  1527. // Create vector table and insert a test vector
  1528. store.ensureVecTable(768);
  1529. const embedding = Array(768).fill(0).map(() => Math.random());
  1530. store.db.prepare(`INSERT INTO content_vectors (hash, seq, pos, model, embedded_at) VALUES (?, 0, 0, 'test', ?)`).run(hash, new Date().toISOString());
  1531. store.db.prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`).run(`${hash}_0`, new Float32Array(embedding));
  1532. // This should complete quickly (not hang) due to the two-step fix
  1533. // The old code with JOINs in the sqlite-vec query would hang indefinitely
  1534. const startTime = Date.now();
  1535. const results = await store.searchVec("test content", "embeddinggemma", 5);
  1536. const elapsed = Date.now() - startTime;
  1537. // If the query took more than 5 seconds, something is wrong
  1538. // (the hang bug would cause it to never return at all)
  1539. expect(elapsed).toBeLessThan(5000);
  1540. expect(results.length).toBeGreaterThan(0);
  1541. await cleanupTestDb(store);
  1542. });
  1543. test("expandQuery returns original plus expanded queries", async () => {
  1544. const store = await createTestStore();
  1545. const queries = await store.expandQuery("test query");
  1546. expect(queries).toContain("test query");
  1547. expect(queries[0]).toBe("test query");
  1548. // LlamaCpp returns original + variations
  1549. expect(queries.length).toBeGreaterThanOrEqual(1);
  1550. await cleanupTestDb(store);
  1551. }, 30000);
  1552. test("expandQuery caches results", async () => {
  1553. const store = await createTestStore();
  1554. // First call
  1555. const queries1 = await store.expandQuery("cached query test");
  1556. // Second call - should hit cache
  1557. const queries2 = await store.expandQuery("cached query test");
  1558. expect(queries1[0]).toBe(queries2[0]);
  1559. await cleanupTestDb(store);
  1560. }, 30000);
  1561. test("rerank scores documents", async () => {
  1562. const store = await createTestStore();
  1563. const docs = [
  1564. { file: "doc1.md", text: "Relevant content about the topic" },
  1565. { file: "doc2.md", text: "Other content" },
  1566. ];
  1567. const results = await store.rerank("topic", docs);
  1568. expect(results).toHaveLength(2);
  1569. // LlamaCpp reranker returns relevance scores
  1570. expect(results[0]!.score).toBeGreaterThan(0);
  1571. await cleanupTestDb(store);
  1572. });
  1573. test("rerank caches results", async () => {
  1574. const store = await createTestStore();
  1575. const docs = [{ file: "doc1.md", text: "Content for caching test" }];
  1576. // First call
  1577. await store.rerank("cache test query", docs);
  1578. // Second call - should hit cache
  1579. const results = await store.rerank("cache test query", docs);
  1580. expect(results).toHaveLength(1);
  1581. await cleanupTestDb(store);
  1582. });
  1583. });
  1584. // =============================================================================
  1585. // Edge Cases & Error Handling
  1586. // =============================================================================
  1587. describe("Edge Cases", () => {
  1588. test("handles empty database gracefully", async () => {
  1589. const store = await createTestStore();
  1590. const searchResults = store.searchFTS("anything", 10);
  1591. expect(searchResults).toHaveLength(0);
  1592. // SKIPPED: getStatus() has bug (queries non-existent collections table)
  1593. // const status = store.getStatus();
  1594. // expect(status.totalDocuments).toBe(0);
  1595. // expect(status.collections).toHaveLength(0);
  1596. const doc = store.findDocument("nonexistent.md");
  1597. expect("error" in doc).toBe(true);
  1598. await cleanupTestDb(store);
  1599. });
  1600. test("handles very long document bodies", async () => {
  1601. const store = await createTestStore();
  1602. const collectionName = await createTestCollection();
  1603. const longBody = "word ".repeat(100000); // ~600KB
  1604. await insertTestDocument(store.db, collectionName, {
  1605. name: "long",
  1606. body: longBody,
  1607. displayPath: "long.md",
  1608. });
  1609. const results = store.searchFTS("word", 10);
  1610. expect(results).toHaveLength(1);
  1611. await cleanupTestDb(store);
  1612. });
  1613. test("handles unicode content correctly", async () => {
  1614. const store = await createTestStore();
  1615. const collectionName = await createTestCollection();
  1616. await insertTestDocument(store.db, collectionName, {
  1617. name: "unicode",
  1618. title: "日本語タイトル",
  1619. body: "# 日本語\n\n内容は日本語で書かれています。\n\nEmoji: 🎉🚀✨",
  1620. displayPath: "unicode.md",
  1621. });
  1622. // Should be searchable
  1623. const results = store.searchFTS("日本語", 10);
  1624. expect(results.length).toBeGreaterThan(0);
  1625. // Should retrieve correctly
  1626. const doc = store.findDocument("unicode.md", { includeBody: true });
  1627. expect("error" in doc).toBe(false);
  1628. if (!("error" in doc)) {
  1629. expect(doc.title).toBe("日本語タイトル");
  1630. expect(doc.body).toContain("🎉");
  1631. }
  1632. await cleanupTestDb(store);
  1633. });
  1634. test("handles documents with special characters in paths", async () => {
  1635. const store = await createTestStore();
  1636. const collectionName = await createTestCollection();
  1637. await insertTestDocument(store.db, collectionName, {
  1638. name: "special",
  1639. filepath: "/path/file with spaces.md",
  1640. displayPath: "file with spaces.md",
  1641. body: "Content",
  1642. });
  1643. const doc = store.findDocument("file with spaces.md");
  1644. expect("error" in doc).toBe(false);
  1645. await cleanupTestDb(store);
  1646. });
  1647. test("handles concurrent operations", async () => {
  1648. const store = await createTestStore();
  1649. const collectionName = await createTestCollection();
  1650. // Insert multiple documents concurrently
  1651. const inserts = Array.from({ length: 10 }, (_, i) =>
  1652. insertTestDocument(store.db, collectionName, {
  1653. name: `concurrent${i}`,
  1654. body: `Content ${i} searchterm`,
  1655. displayPath: `concurrent${i}.md`,
  1656. })
  1657. );
  1658. await Promise.all(inserts);
  1659. // All should be searchable
  1660. const results = store.searchFTS("searchterm", 20);
  1661. expect(results).toHaveLength(10);
  1662. await cleanupTestDb(store);
  1663. });
  1664. });
  1665. // =============================================================================
  1666. // Content-Addressable Storage Tests
  1667. // =============================================================================
  1668. describe("Content-Addressable Storage", () => {
  1669. test("same content gets same hash from multiple collections", async () => {
  1670. const store = await createTestStore();
  1671. // Create two collections
  1672. const collection1 = await createTestCollection({ pwd: "/path/collection1", name: "collection1" });
  1673. const collection2 = await createTestCollection({ pwd: "/path/collection2", name: "collection2" });
  1674. // Add same content to both collections
  1675. const content = "# Same Content\n\nThis is the same content in two places.";
  1676. const hash1 = await hashContent(content);
  1677. const doc1 = await insertTestDocument(store.db, collection1, {
  1678. name: "doc1",
  1679. body: content,
  1680. displayPath: "doc1.md",
  1681. });
  1682. const doc2 = await insertTestDocument(store.db, collection2, {
  1683. name: "doc2",
  1684. body: content,
  1685. displayPath: "doc2.md",
  1686. });
  1687. // Both should have the same hash
  1688. const hash1Db = store.db.prepare(`SELECT hash FROM documents WHERE id = ?`).get(doc1) as { hash: string };
  1689. const hash2Db = store.db.prepare(`SELECT hash FROM documents WHERE id = ?`).get(doc2) as { hash: string };
  1690. expect(hash1Db.hash).toBe(hash2Db.hash);
  1691. expect(hash1Db.hash).toBe(hash1);
  1692. // There should only be one entry in the content table
  1693. const contentCount = store.db.prepare(`SELECT COUNT(*) as count FROM content WHERE hash = ?`).get(hash1) as { count: number };
  1694. expect(contentCount.count).toBe(1);
  1695. await cleanupTestDb(store);
  1696. });
  1697. test("removing one collection preserves content used by another", async () => {
  1698. const store = await createTestStore();
  1699. // Create two collections
  1700. const collection1 = await createTestCollection({ pwd: "/path/collection1", name: "collection1" });
  1701. const collection2 = await createTestCollection({ pwd: "/path/collection2", name: "collection2" });
  1702. // Add same content to both collections
  1703. const sharedContent = "# Shared Content\n\nThis is shared.";
  1704. const sharedHash = await hashContent(sharedContent);
  1705. await insertTestDocument(store.db, collection1, {
  1706. name: "shared1",
  1707. body: sharedContent,
  1708. displayPath: "shared1.md",
  1709. });
  1710. await insertTestDocument(store.db, collection2, {
  1711. name: "shared2",
  1712. body: sharedContent,
  1713. displayPath: "shared2.md",
  1714. });
  1715. // Add unique content to collection1
  1716. const uniqueContent = "# Unique Content\n\nThis is unique to collection1.";
  1717. const uniqueHash = await hashContent(uniqueContent);
  1718. await insertTestDocument(store.db, collection1, {
  1719. name: "unique",
  1720. body: uniqueContent,
  1721. displayPath: "unique.md",
  1722. });
  1723. // Verify both hashes exist in content table
  1724. const sharedExists1 = store.db.prepare(`SELECT hash FROM content WHERE hash = ?`).get(sharedHash);
  1725. const uniqueExists1 = store.db.prepare(`SELECT hash FROM content WHERE hash = ?`).get(uniqueHash);
  1726. expect(sharedExists1).toBeTruthy();
  1727. expect(uniqueExists1).toBeTruthy();
  1728. // Remove collection1 documents (collections are in YAML now)
  1729. store.db.prepare(`DELETE FROM documents WHERE collection = ?`).run(collection1);
  1730. // Clean up orphaned content (mimics what the CLI does)
  1731. store.db.prepare(`
  1732. DELETE FROM content
  1733. WHERE hash NOT IN (SELECT DISTINCT hash FROM documents WHERE active = 1)
  1734. `).run();
  1735. // Shared content should still exist (used by collection2)
  1736. const sharedExists2 = store.db.prepare(`SELECT hash FROM content WHERE hash = ?`).get(sharedHash);
  1737. expect(sharedExists2).toBeTruthy();
  1738. // Unique content should be removed (only used by collection1)
  1739. const uniqueExists2 = store.db.prepare(`SELECT hash FROM content WHERE hash = ?`).get(uniqueHash);
  1740. expect(uniqueExists2).toBeFalsy();
  1741. await cleanupTestDb(store);
  1742. });
  1743. test("deduplicates content across many collections", async () => {
  1744. const store = await createTestStore();
  1745. const sharedContent = "# Common Header\n\nThis appears everywhere.";
  1746. const sharedHash = await hashContent(sharedContent);
  1747. // Create 5 collections with the same content
  1748. const collectionNames = [];
  1749. for (let i = 0; i < 5; i++) {
  1750. const collName = await createTestCollection({ pwd: `/path/collection${i}`, name: `collection${i}` });
  1751. collectionNames.push(collName);
  1752. await insertTestDocument(store.db, collName, {
  1753. name: `doc${i}`,
  1754. body: sharedContent,
  1755. displayPath: `doc${i}.md`,
  1756. });
  1757. }
  1758. // Should have 5 documents
  1759. const docCount = store.db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get() as { count: number };
  1760. expect(docCount.count).toBe(5);
  1761. // But only 1 content entry
  1762. const contentCount = store.db.prepare(`SELECT COUNT(*) as count FROM content WHERE hash = ?`).get(sharedHash) as { count: number };
  1763. expect(contentCount.count).toBe(1);
  1764. // All documents should point to the same hash
  1765. const hashes = store.db.prepare(`SELECT DISTINCT hash FROM documents WHERE active = 1`).all() as { hash: string }[];
  1766. expect(hashes).toHaveLength(1);
  1767. expect(hashes[0]!.hash).toBe(sharedHash);
  1768. await cleanupTestDb(store);
  1769. });
  1770. test("different content gets different hashes", async () => {
  1771. const store = await createTestStore();
  1772. const collectionName = await createTestCollection();
  1773. const content1 = "# Content One";
  1774. const content2 = "# Content Two";
  1775. const hash1 = await hashContent(content1);
  1776. const hash2 = await hashContent(content2);
  1777. // Hashes should be different
  1778. expect(hash1).not.toBe(hash2);
  1779. const doc1 = await insertTestDocument(store.db, collectionName, {
  1780. name: "doc1",
  1781. body: content1,
  1782. displayPath: "doc1.md",
  1783. });
  1784. const doc2 = await insertTestDocument(store.db, collectionName, {
  1785. name: "doc2",
  1786. body: content2,
  1787. displayPath: "doc2.md",
  1788. });
  1789. // Both hashes should exist in content table
  1790. const hash1Db = store.db.prepare(`SELECT hash FROM documents WHERE id = ?`).get(doc1) as { hash: string };
  1791. const hash2Db = store.db.prepare(`SELECT hash FROM documents WHERE id = ?`).get(doc2) as { hash: string };
  1792. expect(hash1Db.hash).toBe(hash1);
  1793. expect(hash2Db.hash).toBe(hash2);
  1794. expect(hash1Db.hash).not.toBe(hash2Db.hash);
  1795. // Should have 2 entries in content table
  1796. const contentCount = store.db.prepare(`SELECT COUNT(*) as count FROM content`).get() as { count: number };
  1797. expect(contentCount.count).toBe(2);
  1798. await cleanupTestDb(store);
  1799. });
  1800. });
  1801. // =============================================================================
  1802. // Virtual Path Normalization Tests
  1803. // =============================================================================
  1804. describe("normalizeVirtualPath", () => {
  1805. test("already normalized qmd:// path passes through", () => {
  1806. expect(normalizeVirtualPath("qmd://collection/path.md")).toBe("qmd://collection/path.md");
  1807. expect(normalizeVirtualPath("qmd://journals/2025-01-01.md")).toBe("qmd://journals/2025-01-01.md");
  1808. });
  1809. test("handles //collection/path format (missing qmd: prefix)", () => {
  1810. expect(normalizeVirtualPath("//collection/path.md")).toBe("qmd://collection/path.md");
  1811. expect(normalizeVirtualPath("//journals/2025-01-01.md")).toBe("qmd://journals/2025-01-01.md");
  1812. });
  1813. test("handles qmd:// with extra slashes", () => {
  1814. expect(normalizeVirtualPath("qmd:////collection/path.md")).toBe("qmd://collection/path.md");
  1815. expect(normalizeVirtualPath("qmd:///journals/2025-01-01.md")).toBe("qmd://journals/2025-01-01.md");
  1816. expect(normalizeVirtualPath("qmd:///////archive/file.md")).toBe("qmd://archive/file.md");
  1817. });
  1818. test("handles collection root paths", () => {
  1819. expect(normalizeVirtualPath("qmd://collection/")).toBe("qmd://collection/");
  1820. expect(normalizeVirtualPath("qmd://collection")).toBe("qmd://collection");
  1821. expect(normalizeVirtualPath("//collection/")).toBe("qmd://collection/");
  1822. });
  1823. test("preserves bare collection/path format (not auto-converted)", () => {
  1824. // Bare paths without qmd:// or // prefix are NOT converted
  1825. // (could be relative filesystem paths)
  1826. expect(normalizeVirtualPath("collection/path.md")).toBe("collection/path.md");
  1827. expect(normalizeVirtualPath("journals/2025-01-01.md")).toBe("journals/2025-01-01.md");
  1828. });
  1829. test("preserves absolute filesystem paths", () => {
  1830. expect(normalizeVirtualPath("/Users/test/file.md")).toBe("/Users/test/file.md");
  1831. expect(normalizeVirtualPath("/absolute/path/file.md")).toBe("/absolute/path/file.md");
  1832. });
  1833. test("preserves home-relative paths", () => {
  1834. expect(normalizeVirtualPath("~/Documents/file.md")).toBe("~/Documents/file.md");
  1835. });
  1836. test("preserves docid format", () => {
  1837. expect(normalizeVirtualPath("#abc123")).toBe("#abc123");
  1838. expect(normalizeVirtualPath("#def456")).toBe("#def456");
  1839. });
  1840. test("handles whitespace trimming", () => {
  1841. expect(normalizeVirtualPath(" qmd://collection/path.md ")).toBe("qmd://collection/path.md");
  1842. expect(normalizeVirtualPath(" //collection/path.md ")).toBe("qmd://collection/path.md");
  1843. });
  1844. });
  1845. describe("isVirtualPath", () => {
  1846. test("recognizes qmd:// paths", () => {
  1847. expect(isVirtualPath("qmd://collection/path.md")).toBe(true);
  1848. expect(isVirtualPath("qmd://journals/2025-01-01.md")).toBe(true);
  1849. expect(isVirtualPath("qmd://collection")).toBe(true);
  1850. });
  1851. test("recognizes //collection/path format", () => {
  1852. expect(isVirtualPath("//collection/path.md")).toBe(true);
  1853. expect(isVirtualPath("//journals/2025-01-01.md")).toBe(true);
  1854. });
  1855. test("does not auto-recognize bare collection/path format", () => {
  1856. // Bare paths could be relative filesystem paths, so not auto-detected as virtual
  1857. expect(isVirtualPath("collection/path.md")).toBe(false);
  1858. expect(isVirtualPath("journals/2025-01-01.md")).toBe(false);
  1859. expect(isVirtualPath("archive/subfolder/file.md")).toBe(false);
  1860. });
  1861. test("rejects docid format", () => {
  1862. expect(isVirtualPath("#abc123")).toBe(false);
  1863. expect(isVirtualPath("#def456")).toBe(false);
  1864. });
  1865. test("rejects absolute filesystem paths", () => {
  1866. expect(isVirtualPath("/Users/test/file.md")).toBe(false);
  1867. expect(isVirtualPath("/absolute/path/file.md")).toBe(false);
  1868. });
  1869. test("rejects home-relative paths", () => {
  1870. expect(isVirtualPath("~/Documents/file.md")).toBe(false);
  1871. expect(isVirtualPath("~/notes/journal.md")).toBe(false);
  1872. });
  1873. test("rejects paths without slashes", () => {
  1874. expect(isVirtualPath("file.md")).toBe(false);
  1875. expect(isVirtualPath("document")).toBe(false);
  1876. });
  1877. });
  1878. describe("parseVirtualPath", () => {
  1879. test("parses standard qmd:// paths", () => {
  1880. expect(parseVirtualPath("qmd://collection/path.md")).toEqual({
  1881. collectionName: "collection",
  1882. path: "path.md",
  1883. });
  1884. expect(parseVirtualPath("qmd://journals/2025-01-01.md")).toEqual({
  1885. collectionName: "journals",
  1886. path: "2025-01-01.md",
  1887. });
  1888. });
  1889. test("parses paths with nested directories", () => {
  1890. expect(parseVirtualPath("qmd://archive/subfolder/file.md")).toEqual({
  1891. collectionName: "archive",
  1892. path: "subfolder/file.md",
  1893. });
  1894. });
  1895. test("parses collection root paths", () => {
  1896. expect(parseVirtualPath("qmd://collection/")).toEqual({
  1897. collectionName: "collection",
  1898. path: "",
  1899. });
  1900. expect(parseVirtualPath("qmd://collection")).toEqual({
  1901. collectionName: "collection",
  1902. path: "",
  1903. });
  1904. });
  1905. test("parses //collection/path format (normalizes first)", () => {
  1906. expect(parseVirtualPath("//collection/path.md")).toEqual({
  1907. collectionName: "collection",
  1908. path: "path.md",
  1909. });
  1910. });
  1911. test("parses qmd:// with extra slashes (normalizes first)", () => {
  1912. expect(parseVirtualPath("qmd:////collection/path.md")).toEqual({
  1913. collectionName: "collection",
  1914. path: "path.md",
  1915. });
  1916. });
  1917. test("returns null for non-virtual paths", () => {
  1918. expect(parseVirtualPath("/absolute/path.md")).toBe(null);
  1919. expect(parseVirtualPath("~/home/path.md")).toBe(null);
  1920. expect(parseVirtualPath("#docid")).toBe(null);
  1921. expect(parseVirtualPath("file.md")).toBe(null);
  1922. // Bare collection/path is not recognized as virtual
  1923. expect(parseVirtualPath("collection/path.md")).toBe(null);
  1924. });
  1925. });
  1926. // =============================================================================
  1927. // Docid Functions
  1928. // =============================================================================
  1929. describe("normalizeDocid", () => {
  1930. test("strips leading # from docid", () => {
  1931. expect(normalizeDocid("#abc123")).toBe("abc123");
  1932. expect(normalizeDocid("#def456")).toBe("def456");
  1933. });
  1934. test("returns bare hex unchanged", () => {
  1935. expect(normalizeDocid("abc123")).toBe("abc123");
  1936. expect(normalizeDocid("def456")).toBe("def456");
  1937. });
  1938. test("strips surrounding double quotes", () => {
  1939. expect(normalizeDocid('"#abc123"')).toBe("abc123");
  1940. expect(normalizeDocid('"abc123"')).toBe("abc123");
  1941. });
  1942. test("strips surrounding single quotes", () => {
  1943. expect(normalizeDocid("'#abc123'")).toBe("abc123");
  1944. expect(normalizeDocid("'abc123'")).toBe("abc123");
  1945. });
  1946. test("handles quoted docid without #", () => {
  1947. expect(normalizeDocid('"def456"')).toBe("def456");
  1948. expect(normalizeDocid("'def456'")).toBe("def456");
  1949. });
  1950. test("handles whitespace", () => {
  1951. expect(normalizeDocid(" #abc123 ")).toBe("abc123");
  1952. expect(normalizeDocid(" abc123 ")).toBe("abc123");
  1953. });
  1954. test("handles uppercase hex", () => {
  1955. expect(normalizeDocid("#ABC123")).toBe("ABC123");
  1956. expect(normalizeDocid('"ABC123"')).toBe("ABC123");
  1957. });
  1958. test("does not strip mismatched quotes", () => {
  1959. expect(normalizeDocid('"abc123\'')).toBe('"abc123\'');
  1960. expect(normalizeDocid("'abc123\"")).toBe("'abc123\"");
  1961. });
  1962. });
  1963. describe("isDocid", () => {
  1964. test("accepts #hash format", () => {
  1965. expect(isDocid("#abc123")).toBe(true);
  1966. expect(isDocid("#def456")).toBe(true);
  1967. expect(isDocid("#ABCDEF")).toBe(true);
  1968. });
  1969. test("accepts bare 6-char hex", () => {
  1970. expect(isDocid("abc123")).toBe(true);
  1971. expect(isDocid("def456")).toBe(true);
  1972. expect(isDocid("ABCDEF")).toBe(true);
  1973. });
  1974. test("accepts longer hex strings", () => {
  1975. expect(isDocid("abc123def456")).toBe(true);
  1976. expect(isDocid("#abc123def456")).toBe(true);
  1977. });
  1978. test("accepts double-quoted docids", () => {
  1979. expect(isDocid('"#abc123"')).toBe(true);
  1980. expect(isDocid('"abc123"')).toBe(true);
  1981. });
  1982. test("accepts single-quoted docids", () => {
  1983. expect(isDocid("'#abc123'")).toBe(true);
  1984. expect(isDocid("'abc123'")).toBe(true);
  1985. });
  1986. test("rejects non-hex strings", () => {
  1987. expect(isDocid("ghijkl")).toBe(false);
  1988. expect(isDocid("#ghijkl")).toBe(false);
  1989. expect(isDocid("abc12g")).toBe(false);
  1990. });
  1991. test("rejects strings shorter than 6 chars", () => {
  1992. expect(isDocid("abc12")).toBe(false);
  1993. expect(isDocid("#abc1")).toBe(false);
  1994. expect(isDocid("'abc'")).toBe(false);
  1995. });
  1996. test("rejects empty strings", () => {
  1997. expect(isDocid("")).toBe(false);
  1998. expect(isDocid("#")).toBe(false);
  1999. expect(isDocid('""')).toBe(false);
  2000. });
  2001. test("rejects file paths", () => {
  2002. expect(isDocid("/path/to/file.md")).toBe(false);
  2003. expect(isDocid("path/to/file.md")).toBe(false);
  2004. expect(isDocid("qmd://collection/file.md")).toBe(false);
  2005. });
  2006. test("rejects paths that look like hex with extensions", () => {
  2007. expect(isDocid("abc123.md")).toBe(false);
  2008. });
  2009. });