mcp.test.ts 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889
  1. /**
  2. * MCP Server Tests
  3. *
  4. * Tests all MCP tools, resources, and prompts.
  5. * Uses mocked Ollama responses and a test database.
  6. */
  7. import { describe, test, expect, beforeAll, afterAll, beforeEach, afterEach } from "bun:test";
  8. import { Database } from "bun:sqlite";
  9. import * as sqliteVec from "sqlite-vec";
  10. import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
  11. import { z } from "zod";
  12. import { getDefaultLlamaCpp, disposeDefaultLlamaCpp } from "./llm";
  13. import { mkdtemp, writeFile, readdir, unlink, rmdir } from "node:fs/promises";
  14. import { join } from "node:path";
  15. import { tmpdir } from "node:os";
  16. import YAML from "yaml";
  17. import type { CollectionConfig } from "./collections";
  18. // =============================================================================
  19. // Test Database Setup
  20. // =============================================================================
  21. let testDb: Database;
  22. let testDbPath: string;
  23. let testConfigDir: string;
  24. afterAll(async () => {
  25. // Ensure native resources are released to avoid ggml-metal asserts on process exit.
  26. await disposeDefaultLlamaCpp();
  27. });
  28. function initTestDatabase(db: Database): void {
  29. sqliteVec.load(db);
  30. db.exec("PRAGMA journal_mode = WAL");
  31. // Content-addressable storage - the source of truth for document content
  32. db.exec(`
  33. CREATE TABLE IF NOT EXISTS content (
  34. hash TEXT PRIMARY KEY,
  35. doc TEXT NOT NULL,
  36. created_at TEXT NOT NULL
  37. )
  38. `);
  39. // Documents table - file system layer mapping virtual paths to content hashes
  40. // Collections are now managed in YAML config
  41. db.exec(`
  42. CREATE TABLE IF NOT EXISTS documents (
  43. id INTEGER PRIMARY KEY AUTOINCREMENT,
  44. collection TEXT NOT NULL,
  45. path TEXT NOT NULL,
  46. title TEXT NOT NULL,
  47. hash TEXT NOT NULL,
  48. created_at TEXT NOT NULL,
  49. modified_at TEXT NOT NULL,
  50. active INTEGER NOT NULL DEFAULT 1,
  51. FOREIGN KEY (hash) REFERENCES content(hash) ON DELETE CASCADE,
  52. UNIQUE(collection, path)
  53. )
  54. `);
  55. db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_collection ON documents(collection, active)`);
  56. db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_hash ON documents(hash)`);
  57. db.exec(`
  58. CREATE TABLE IF NOT EXISTS llm_cache (
  59. hash TEXT PRIMARY KEY,
  60. result TEXT NOT NULL,
  61. created_at TEXT NOT NULL
  62. )
  63. `);
  64. db.exec(`
  65. CREATE TABLE IF NOT EXISTS content_vectors (
  66. hash TEXT NOT NULL,
  67. seq INTEGER NOT NULL DEFAULT 0,
  68. pos INTEGER NOT NULL DEFAULT 0,
  69. model TEXT NOT NULL,
  70. embedded_at TEXT NOT NULL,
  71. PRIMARY KEY (hash, seq)
  72. )
  73. `);
  74. db.exec(`
  75. CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5(
  76. name, body,
  77. content='documents',
  78. content_rowid='id',
  79. tokenize='porter unicode61'
  80. )
  81. `);
  82. db.exec(`
  83. CREATE TRIGGER IF NOT EXISTS documents_ai AFTER INSERT ON documents BEGIN
  84. INSERT INTO documents_fts(rowid, name, body)
  85. SELECT new.id, new.path, content.doc
  86. FROM content
  87. WHERE content.hash = new.hash;
  88. END
  89. `);
  90. // Create vector table
  91. db.exec(`CREATE VIRTUAL TABLE IF NOT EXISTS vectors_vec USING vec0(hash_seq TEXT PRIMARY KEY, embedding float[768] distance_metric=cosine)`);
  92. }
  93. function seedTestData(db: Database): void {
  94. const now = new Date().toISOString();
  95. // Note: Collections are now managed in YAML config, not in database
  96. // For tests, we'll use a collection name "docs"
  97. // Add test documents
  98. const docs = [
  99. {
  100. path: "readme.md",
  101. title: "Project README",
  102. hash: "hash1",
  103. body: "# Project README\n\nThis is the main readme file for the project.\n\nIt contains important information about setup and usage.",
  104. },
  105. {
  106. path: "api.md",
  107. title: "API Documentation",
  108. hash: "hash2",
  109. body: "# API Documentation\n\nThis document describes the REST API endpoints.\n\n## Authentication\n\nUse Bearer tokens for auth.",
  110. },
  111. {
  112. path: "meetings/meeting-2024-01.md",
  113. title: "January Meeting Notes",
  114. hash: "hash3",
  115. body: "# January Meeting Notes\n\nDiscussed Q1 goals and roadmap.\n\n## Action Items\n\n- Review budget\n- Hire new team members",
  116. },
  117. {
  118. path: "meetings/meeting-2024-02.md",
  119. title: "February Meeting Notes",
  120. hash: "hash4",
  121. body: "# February Meeting Notes\n\nFollowed up on Q1 progress.\n\n## Updates\n\n- Budget approved\n- Two candidates interviewed",
  122. },
  123. {
  124. path: "large-file.md",
  125. title: "Large Document",
  126. hash: "hash5",
  127. body: "# Large Document\n\n" + "Lorem ipsum ".repeat(2000), // ~24KB
  128. },
  129. ];
  130. for (const doc of docs) {
  131. // Insert content first
  132. db.prepare(`
  133. INSERT OR IGNORE INTO content (hash, doc, created_at)
  134. VALUES (?, ?, ?)
  135. `).run(doc.hash, doc.body, now);
  136. // Then insert document metadata
  137. db.prepare(`
  138. INSERT INTO documents (collection, path, title, hash, created_at, modified_at, active)
  139. VALUES ('docs', ?, ?, ?, ?, ?, 1)
  140. `).run(doc.path, doc.title, doc.hash, now, now);
  141. }
  142. // Add embeddings for vector search
  143. const embedding = new Float32Array(768);
  144. for (let i = 0; i < 768; i++) embedding[i] = Math.random();
  145. for (const doc of docs.slice(0, 4)) { // Skip large file for embeddings
  146. db.prepare(`INSERT INTO content_vectors (hash, seq, pos, model, embedded_at) VALUES (?, 0, 0, 'embeddinggemma', ?)`).run(doc.hash, now);
  147. db.prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`).run(`${doc.hash}_0`, embedding);
  148. }
  149. }
  150. // =============================================================================
  151. // MCP Server Test Helpers
  152. // =============================================================================
  153. // We need to create a testable version of the MCP handlers
  154. // Since McpServer uses internal routing, we'll test the handler functions directly
  155. import {
  156. searchFTS,
  157. searchVec,
  158. expandQuery,
  159. rerank,
  160. reciprocalRankFusion,
  161. extractSnippet,
  162. getContextForFile,
  163. findDocument,
  164. getDocumentBody,
  165. findDocuments,
  166. getStatus,
  167. DEFAULT_EMBED_MODEL,
  168. DEFAULT_QUERY_MODEL,
  169. DEFAULT_RERANK_MODEL,
  170. DEFAULT_MULTI_GET_MAX_BYTES,
  171. createStore,
  172. } from "./store";
  173. import type { RankedResult } from "./store";
  174. // Note: searchResultsToMcpCsv no longer used in MCP - using structuredContent instead
  175. // =============================================================================
  176. // Tests
  177. // =============================================================================
  178. describe("MCP Server", () => {
  179. beforeAll(async () => {
  180. // LlamaCpp uses node-llama-cpp for local model inference (no HTTP mocking needed)
  181. // Use shared singleton to avoid creating multiple instances with separate GPU resources
  182. getDefaultLlamaCpp();
  183. // Set up test config directory
  184. const configPrefix = join(tmpdir(), `qmd-mcp-config-${Date.now()}-${Math.random().toString(36).slice(2)}`);
  185. testConfigDir = await mkdtemp(configPrefix);
  186. process.env.QMD_CONFIG_DIR = testConfigDir;
  187. // Create YAML config with test collection
  188. const testConfig: CollectionConfig = {
  189. collections: {
  190. docs: {
  191. path: "/test/docs",
  192. pattern: "**/*.md",
  193. context: {
  194. "/meetings": "Meeting notes and transcripts"
  195. }
  196. }
  197. }
  198. };
  199. await writeFile(join(testConfigDir, "index.yml"), YAML.stringify(testConfig));
  200. testDbPath = `/tmp/qmd-mcp-test-${Date.now()}.sqlite`;
  201. testDb = new Database(testDbPath);
  202. initTestDatabase(testDb);
  203. seedTestData(testDb);
  204. });
  205. afterAll(async () => {
  206. testDb.close();
  207. try {
  208. require("fs").unlinkSync(testDbPath);
  209. } catch {}
  210. // Clean up test config directory
  211. try {
  212. const files = await readdir(testConfigDir);
  213. for (const file of files) {
  214. await unlink(join(testConfigDir, file));
  215. }
  216. await rmdir(testConfigDir);
  217. } catch {}
  218. delete process.env.QMD_CONFIG_DIR;
  219. });
  220. // ===========================================================================
  221. // Tool: qmd_search (BM25)
  222. // ===========================================================================
  223. describe("qmd_search tool", () => {
  224. test("returns results for matching query", () => {
  225. const results = searchFTS(testDb, "readme", 10);
  226. expect(results.length).toBeGreaterThan(0);
  227. expect(results[0]!.displayPath).toBe("docs/readme.md");
  228. });
  229. test("returns empty for non-matching query", () => {
  230. const results = searchFTS(testDb, "xyznonexistent", 10);
  231. expect(results.length).toBe(0);
  232. });
  233. test("respects limit parameter", () => {
  234. const results = searchFTS(testDb, "meeting", 1);
  235. expect(results.length).toBe(1);
  236. });
  237. // Note: Collection filtering tests removed - collections are now managed in YAML, not DB
  238. test("formats results as structured content", () => {
  239. const results = searchFTS(testDb, "api", 10);
  240. const filtered = results.map(r => ({
  241. file: r.displayPath,
  242. title: r.title,
  243. score: Math.round(r.score * 100) / 100,
  244. context: getContextForFile(testDb, r.filepath),
  245. snippet: extractSnippet(r.body || "", "api", 300, r.chunkPos).snippet,
  246. }));
  247. // MCP now returns structuredContent with results array
  248. expect(filtered.length).toBeGreaterThan(0);
  249. expect(filtered[0]).toHaveProperty("file");
  250. expect(filtered[0]).toHaveProperty("title");
  251. expect(filtered[0]).toHaveProperty("score");
  252. expect(filtered[0]).toHaveProperty("snippet");
  253. });
  254. });
  255. // ===========================================================================
  256. // Tool: qmd_vsearch (Vector)
  257. // ===========================================================================
  258. describe("qmd_vsearch tool", () => {
  259. test("returns results for semantic query", async () => {
  260. const results = await searchVec(testDb, "project documentation", DEFAULT_EMBED_MODEL, 10);
  261. expect(results.length).toBeGreaterThan(0);
  262. });
  263. test("respects limit parameter", async () => {
  264. const results = await searchVec(testDb, "documentation", DEFAULT_EMBED_MODEL, 2);
  265. expect(results.length).toBeLessThanOrEqual(2);
  266. });
  267. test("returns empty when no vector table exists", async () => {
  268. const emptyDb = new Database(":memory:");
  269. initTestDatabase(emptyDb);
  270. emptyDb.exec("DROP TABLE IF EXISTS vectors_vec");
  271. const results = await searchVec(emptyDb, "test", DEFAULT_EMBED_MODEL, 10);
  272. expect(results.length).toBe(0);
  273. emptyDb.close();
  274. });
  275. });
  276. // ===========================================================================
  277. // Tool: qmd_query (Hybrid)
  278. // ===========================================================================
  279. describe("qmd_query tool", () => {
  280. test("expands query with variations", async () => {
  281. const queries = await expandQuery("api documentation", DEFAULT_QUERY_MODEL, testDb);
  282. // Always returns at least the original query, may have more if generation succeeds
  283. expect(queries.length).toBeGreaterThanOrEqual(1);
  284. expect(queries[0]).toBe("api documentation");
  285. }, 30000); // 30s timeout for model loading
  286. test("performs RRF fusion on multiple result lists", () => {
  287. const list1: RankedResult[] = [
  288. { file: "/a", displayPath: "a.md", title: "A", body: "body", score: 1 },
  289. { file: "/b", displayPath: "b.md", title: "B", body: "body", score: 0.8 },
  290. ];
  291. const list2: RankedResult[] = [
  292. { file: "/b", displayPath: "b.md", title: "B", body: "body", score: 1 },
  293. { file: "/c", displayPath: "c.md", title: "C", body: "body", score: 0.9 },
  294. ];
  295. const fused = reciprocalRankFusion([list1, list2]);
  296. expect(fused.length).toBe(3);
  297. // B appears in both lists, should have higher score
  298. const bResult = fused.find(r => r.file === "/b");
  299. expect(bResult).toBeDefined();
  300. });
  301. test("reranks documents with LLM", async () => {
  302. const docs = [
  303. { file: "/test/docs/readme.md", text: "Project readme" },
  304. { file: "/test/docs/api.md", text: "API documentation" },
  305. ];
  306. const reranked = await rerank("readme", docs, DEFAULT_RERANK_MODEL, testDb);
  307. expect(reranked.length).toBe(2);
  308. expect(reranked[0]!.score).toBeGreaterThan(0);
  309. });
  310. test("full hybrid search pipeline", async () => {
  311. // Simulate full qmd_query flow
  312. const query = "meeting notes";
  313. const queries = await expandQuery(query, DEFAULT_QUERY_MODEL, testDb);
  314. const rankedLists: RankedResult[][] = [];
  315. for (const q of queries) {
  316. const ftsResults = searchFTS(testDb, q, 20);
  317. if (ftsResults.length > 0) {
  318. rankedLists.push(ftsResults.map(r => ({
  319. file: r.filepath,
  320. displayPath: r.displayPath,
  321. title: r.title,
  322. body: r.body || "",
  323. score: r.score,
  324. })));
  325. }
  326. }
  327. expect(rankedLists.length).toBeGreaterThan(0);
  328. const fused = reciprocalRankFusion(rankedLists);
  329. expect(fused.length).toBeGreaterThan(0);
  330. const candidates = fused.slice(0, 10);
  331. const reranked = await rerank(
  332. query,
  333. candidates.map(c => ({ file: c.file, text: c.body })),
  334. DEFAULT_RERANK_MODEL,
  335. testDb
  336. );
  337. expect(reranked.length).toBeGreaterThan(0);
  338. });
  339. });
  340. // ===========================================================================
  341. // Tool: qmd_get (Get Document)
  342. // ===========================================================================
  343. describe("qmd_get tool", () => {
  344. test("retrieves document by display_path", () => {
  345. const meta = findDocument(testDb, "readme.md", { includeBody: false });
  346. expect("error" in meta).toBe(false);
  347. if ("error" in meta) return;
  348. const body = getDocumentBody(testDb, meta) ?? "";
  349. expect(meta.displayPath).toBe("docs/readme.md");
  350. expect(body).toContain("Project README");
  351. });
  352. test("retrieves document by filepath", () => {
  353. const meta = findDocument(testDb, "/test/docs/api.md", { includeBody: false });
  354. expect("error" in meta).toBe(false);
  355. if ("error" in meta) return;
  356. expect(meta.title).toBe("API Documentation");
  357. });
  358. test("retrieves document by partial path", () => {
  359. const result = findDocument(testDb, "api.md", { includeBody: false });
  360. expect("error" in result).toBe(false);
  361. });
  362. test("returns not found for missing document", () => {
  363. const result = findDocument(testDb, "nonexistent.md", { includeBody: false });
  364. expect("error" in result).toBe(true);
  365. if ("error" in result) {
  366. expect(result.error).toBe("not_found");
  367. }
  368. });
  369. test("suggests similar files when not found", () => {
  370. const result = findDocument(testDb, "readm.md", { includeBody: false }); // typo
  371. expect("error" in result).toBe(true);
  372. if ("error" in result) {
  373. expect(result.similarFiles.length).toBeGreaterThanOrEqual(0);
  374. }
  375. });
  376. test("supports line range with :line suffix", () => {
  377. const meta = findDocument(testDb, "readme.md:2", { includeBody: false });
  378. expect("error" in meta).toBe(false);
  379. if ("error" in meta) return;
  380. const body = getDocumentBody(testDb, meta, 2, 2) ?? "";
  381. const lines = body.split("\n");
  382. expect(lines.length).toBeLessThanOrEqual(2);
  383. });
  384. test("supports fromLine parameter", () => {
  385. const meta = findDocument(testDb, "readme.md", { includeBody: false });
  386. expect("error" in meta).toBe(false);
  387. if ("error" in meta) return;
  388. const body = getDocumentBody(testDb, meta, 3) ?? "";
  389. expect(body).not.toContain("# Project README");
  390. });
  391. test("supports maxLines parameter", () => {
  392. const meta = findDocument(testDb, "api.md", { includeBody: false });
  393. expect("error" in meta).toBe(false);
  394. if ("error" in meta) return;
  395. const body = getDocumentBody(testDb, meta, 1, 3) ?? "";
  396. const lines = body.split("\n");
  397. expect(lines.length).toBeLessThanOrEqual(3);
  398. });
  399. test("includes context for documents in context path", () => {
  400. const result = findDocument(testDb, "meetings/meeting-2024-01.md", { includeBody: false });
  401. expect("error" in result).toBe(false);
  402. if ("error" in result) return;
  403. expect(result.context).toBe("Meeting notes and transcripts");
  404. });
  405. });
  406. // ===========================================================================
  407. // Tool: qmd_multi_get (Multi Get)
  408. // ===========================================================================
  409. describe("qmd_multi_get tool", () => {
  410. test("retrieves multiple documents by glob pattern", () => {
  411. const { docs, errors } = findDocuments(testDb, "meetings/*.md", { includeBody: true });
  412. expect(errors.length).toBe(0);
  413. expect(docs.length).toBe(2);
  414. const paths = docs.map(d => d.doc.displayPath);
  415. expect(paths).toContain("docs/meetings/meeting-2024-01.md");
  416. expect(paths).toContain("docs/meetings/meeting-2024-02.md");
  417. });
  418. test("retrieves documents by comma-separated list", () => {
  419. const { docs, errors } = findDocuments(testDb, "readme.md, api.md", { includeBody: true });
  420. expect(errors.length).toBe(0);
  421. expect(docs.length).toBe(2);
  422. });
  423. test("returns errors for missing files in comma list", () => {
  424. const { docs, errors } = findDocuments(testDb, "readme.md, nonexistent.md", { includeBody: true });
  425. expect(docs.length).toBe(1);
  426. expect(errors.length).toBe(1);
  427. expect(errors[0]).toContain("not found");
  428. });
  429. test("skips files larger than maxBytes", () => {
  430. const { docs } = findDocuments(testDb, "*.md", { includeBody: true, maxBytes: 1000 }); // 1KB limit
  431. const large = docs.find(d => d.doc.displayPath === "docs/large-file.md");
  432. expect(large).toBeDefined();
  433. expect(large?.skipped).toBe(true);
  434. if (large?.skipped) expect(large.skipReason).toContain("too large");
  435. });
  436. test("respects maxLines parameter", () => {
  437. const { docs } = findDocuments(testDb, "readme.md", { includeBody: true, maxBytes: DEFAULT_MULTI_GET_MAX_BYTES });
  438. expect(docs.length).toBe(1);
  439. const d = docs[0]!;
  440. expect(d.skipped).toBe(false);
  441. if (d.skipped) return;
  442. if (!("body" in d.doc)) {
  443. throw new Error("Expected body to be included in findDocuments result");
  444. }
  445. const lines = (d.doc.body || "").split("\n").slice(0, 2);
  446. expect(lines.length).toBeLessThanOrEqual(2);
  447. });
  448. test("returns error for non-matching glob", () => {
  449. const { docs, errors } = findDocuments(testDb, "nonexistent/*.md", { includeBody: true });
  450. expect(docs.length).toBe(0);
  451. expect(errors.length).toBe(1);
  452. expect(errors[0]).toContain("No files matched");
  453. });
  454. test("includes context in results", () => {
  455. const { docs } = findDocuments(testDb, "meetings/meeting-2024-01.md", { includeBody: true });
  456. expect(docs.length).toBe(1);
  457. const d = docs[0]!;
  458. expect(d.skipped).toBe(false);
  459. if (d.skipped) return;
  460. if (!("context" in d.doc)) {
  461. throw new Error("Expected context to be present on document result");
  462. }
  463. expect(d.doc.context).toBe("Meeting notes and transcripts");
  464. });
  465. });
  466. // ===========================================================================
  467. // Tool: qmd_status
  468. // ===========================================================================
  469. describe("qmd_status tool", () => {
  470. test("returns index status", () => {
  471. const status = getStatus(testDb);
  472. expect(status.totalDocuments).toBe(5);
  473. expect(status.hasVectorIndex).toBe(true);
  474. expect(status.collections.length).toBe(1);
  475. expect(status.collections[0]!.path).toBe("/test/docs");
  476. });
  477. test("shows documents needing embedding", () => {
  478. const status = getStatus(testDb);
  479. // large-file.md doesn't have embeddings
  480. expect(status.needsEmbedding).toBe(1);
  481. });
  482. });
  483. // ===========================================================================
  484. // Resource: qmd://{path}
  485. // ===========================================================================
  486. describe("qmd:// resource", () => {
  487. test("lists all documents", () => {
  488. const docs = testDb.prepare(`
  489. SELECT path as display_path, title
  490. FROM documents
  491. WHERE active = 1
  492. ORDER BY modified_at DESC
  493. LIMIT 1000
  494. `).all() as { display_path: string; title: string }[];
  495. expect(docs.length).toBe(5);
  496. expect(docs.map(d => d.display_path)).toContain("readme.md");
  497. });
  498. test("reads document by display_path", () => {
  499. const path = "readme.md";
  500. const doc = testDb.prepare(`
  501. SELECT 'qmd://' || d.collection || '/' || d.path as filepath, d.path as display_path, content.doc as body
  502. FROM documents d
  503. JOIN content ON content.hash = d.hash
  504. WHERE d.path = ? AND d.active = 1
  505. `).get(path) as { filepath: string; display_path: string; body: string } | null;
  506. expect(doc).not.toBeNull();
  507. expect(doc?.body).toContain("Project README");
  508. });
  509. test("reads document by URL-encoded path", () => {
  510. // Simulate URL encoding that MCP clients may send
  511. const encodedPath = "meetings%2Fmeeting-2024-01.md";
  512. const decodedPath = decodeURIComponent(encodedPath);
  513. const doc = testDb.prepare(`
  514. SELECT 'qmd://' || d.collection || '/' || d.path as filepath, d.path as display_path, content.doc as body
  515. FROM documents d
  516. JOIN content ON content.hash = d.hash
  517. WHERE d.path = ? AND d.active = 1
  518. `).get(decodedPath) as { filepath: string; display_path: string; body: string } | null;
  519. expect(doc).not.toBeNull();
  520. expect(doc?.display_path).toBe("meetings/meeting-2024-01.md");
  521. });
  522. test("reads document by suffix match", () => {
  523. const path = "meeting-2024-01.md"; // without meetings/ prefix
  524. let doc = testDb.prepare(`
  525. SELECT 'qmd://' || d.collection || '/' || d.path as filepath, d.path as display_path, content.doc as body
  526. FROM documents d
  527. JOIN content ON content.hash = d.hash
  528. WHERE d.path = ? AND d.active = 1
  529. `).get(path) as { filepath: string; display_path: string; body: string } | null;
  530. if (!doc) {
  531. doc = testDb.prepare(`
  532. SELECT 'qmd://' || d.collection || '/' || d.path as filepath, d.path as display_path, content.doc as body
  533. FROM documents d
  534. JOIN content ON content.hash = d.hash
  535. WHERE d.path LIKE ? AND d.active = 1
  536. LIMIT 1
  537. `).get(`%${path}`) as { filepath: string; display_path: string; body: string } | null;
  538. }
  539. expect(doc).not.toBeNull();
  540. expect(doc?.display_path).toBe("meetings/meeting-2024-01.md");
  541. });
  542. test("returns not found for missing document", () => {
  543. const path = "nonexistent.md";
  544. const doc = testDb.prepare(`
  545. SELECT 'qmd://' || d.collection || '/' || d.path as filepath, d.path as display_path, content.doc as body
  546. FROM documents d
  547. JOIN content ON content.hash = d.hash
  548. WHERE d.path = ? AND d.active = 1
  549. `).get(path) as { filepath: string; display_path: string; body: string } | null;
  550. expect(doc).toBeNull();
  551. });
  552. test("includes context in document body", () => {
  553. const path = "meetings/meeting-2024-01.md";
  554. const doc = testDb.prepare(`
  555. SELECT 'qmd://' || d.collection || '/' || d.path as filepath, d.path as display_path, content.doc as body
  556. FROM documents d
  557. JOIN content ON content.hash = d.hash
  558. WHERE d.path = ? AND d.active = 1
  559. `).get(path) as { filepath: string; display_path: string; body: string } | null;
  560. expect(doc).not.toBeNull();
  561. const context = getContextForFile(testDb, doc!.filepath);
  562. expect(context).toBe("Meeting notes and transcripts");
  563. // Verify context would be prepended
  564. let text = doc!.body;
  565. if (context) {
  566. text = `<!-- Context: ${context} -->\n\n` + text;
  567. }
  568. expect(text).toContain("<!-- Context: Meeting notes and transcripts -->");
  569. });
  570. test("handles URL-encoded special characters", () => {
  571. // Test various URL encodings
  572. const testCases = [
  573. { encoded: "readme.md", decoded: "readme.md" },
  574. { encoded: "meetings%2Fmeeting-2024-01.md", decoded: "meetings/meeting-2024-01.md" },
  575. { encoded: "api.md%3A10", decoded: "api.md:10" }, // with line number
  576. ];
  577. for (const { encoded, decoded } of testCases) {
  578. expect(decodeURIComponent(encoded)).toBe(decoded);
  579. }
  580. });
  581. test("handles double-encoded URLs", () => {
  582. // Some clients may double-encode
  583. const doubleEncoded = "meetings%252Fmeeting-2024-01.md";
  584. const singleDecoded = decodeURIComponent(doubleEncoded);
  585. expect(singleDecoded).toBe("meetings%2Fmeeting-2024-01.md");
  586. const fullyDecoded = decodeURIComponent(singleDecoded);
  587. expect(fullyDecoded).toBe("meetings/meeting-2024-01.md");
  588. });
  589. test("handles URL-encoded paths with spaces", () => {
  590. // Add a document with spaces in the path
  591. const now = new Date().toISOString();
  592. const body = "# Podcast Episode\n\nInterview content here.";
  593. const hash = "hash_spaces";
  594. const path = "External Podcast/2023 April - Interview.md";
  595. // Insert content first
  596. testDb.prepare(`
  597. INSERT OR IGNORE INTO content (hash, doc, created_at)
  598. VALUES (?, ?, ?)
  599. `).run(hash, body, now);
  600. // Then insert document metadata
  601. testDb.prepare(`
  602. INSERT INTO documents (collection, path, title, hash, created_at, modified_at, active)
  603. VALUES ('docs', ?, ?, ?, ?, ?, 1)
  604. `).run(path, "Podcast Episode", hash, now, now);
  605. // Simulate URL-encoded path from MCP client
  606. const encodedPath = "External%20Podcast%2F2023%20April%20-%20Interview.md";
  607. const decodedPath = decodeURIComponent(encodedPath);
  608. expect(decodedPath).toBe("External Podcast/2023 April - Interview.md");
  609. const doc = testDb.prepare(`
  610. SELECT 'qmd://' || d.collection || '/' || d.path as filepath, d.path as display_path, content.doc as body
  611. FROM documents d
  612. JOIN content ON content.hash = d.hash
  613. WHERE d.path = ? AND d.active = 1
  614. `).get(decodedPath) as { filepath: string; display_path: string; body: string } | null;
  615. expect(doc).not.toBeNull();
  616. expect(doc?.display_path).toBe("External Podcast/2023 April - Interview.md");
  617. expect(doc?.body).toContain("Podcast Episode");
  618. });
  619. });
  620. // ===========================================================================
  621. // Prompt: query
  622. // ===========================================================================
  623. describe("query prompt", () => {
  624. test("returns usage guide", () => {
  625. // The prompt content is static, just verify the structure
  626. const promptContent = `# QMD - Quick Markdown Search
  627. QMD is your on-device search engine for markdown knowledge bases.`;
  628. expect(promptContent).toContain("QMD");
  629. expect(promptContent).toContain("search");
  630. });
  631. test("describes all available tools", () => {
  632. const toolNames = [
  633. "qmd_search",
  634. "qmd_vsearch",
  635. "qmd_query",
  636. "qmd_get",
  637. "qmd_multi_get",
  638. "qmd_status",
  639. ];
  640. // Verify these are documented in the prompt
  641. const promptGuide = `
  642. ### 1. qmd_search (Fast keyword search)
  643. ### 2. qmd_vsearch (Semantic search)
  644. ### 3. qmd_query (Hybrid search - highest quality)
  645. ### 4. qmd_get (Retrieve document)
  646. ### 5. qmd_multi_get (Retrieve multiple documents)
  647. ### 6. qmd_status (Index info)
  648. `;
  649. for (const tool of toolNames) {
  650. expect(promptGuide).toContain(tool);
  651. }
  652. });
  653. });
  654. // ===========================================================================
  655. // Edge Cases
  656. // ===========================================================================
  657. describe("edge cases", () => {
  658. test("handles empty query", () => {
  659. const results = searchFTS(testDb, "", 10);
  660. expect(results.length).toBe(0);
  661. });
  662. test("handles special characters in query", () => {
  663. const results = searchFTS(testDb, "project's", 10);
  664. // Should not throw
  665. expect(Array.isArray(results)).toBe(true);
  666. });
  667. test("handles unicode in query", () => {
  668. const results = searchFTS(testDb, "文档", 10);
  669. expect(Array.isArray(results)).toBe(true);
  670. });
  671. test("handles very long query", () => {
  672. const longQuery = "documentation ".repeat(100);
  673. const results = searchFTS(testDb, longQuery, 10);
  674. expect(Array.isArray(results)).toBe(true);
  675. });
  676. test("handles query with only stopwords", () => {
  677. const results = searchFTS(testDb, "the and or", 10);
  678. expect(Array.isArray(results)).toBe(true);
  679. });
  680. test("extracts snippet around matching text", () => {
  681. const body = "Line 1\nLine 2\nThis is the important line with the keyword\nLine 4\nLine 5";
  682. const { line, snippet } = extractSnippet(body, "keyword", 200);
  683. expect(snippet).toContain("keyword");
  684. expect(line).toBe(3);
  685. });
  686. test("handles snippet extraction with chunkPos", () => {
  687. const body = "A".repeat(1000) + "KEYWORD" + "B".repeat(1000);
  688. const chunkPos = 1000; // Position of KEYWORD
  689. const { snippet } = extractSnippet(body, "keyword", 200, chunkPos);
  690. expect(snippet).toContain("KEYWORD");
  691. });
  692. });
  693. // ===========================================================================
  694. // MCP Spec Compliance
  695. // ===========================================================================
  696. describe("MCP spec compliance", () => {
  697. test("encodeQmdPath preserves slashes but encodes special chars", () => {
  698. // Helper function behavior (tested indirectly through resource URIs)
  699. const path = "External Podcast/2023 April - Interview.md";
  700. const segments = path.split('/').map(s => encodeURIComponent(s)).join('/');
  701. expect(segments).toBe("External%20Podcast/2023%20April%20-%20Interview.md");
  702. expect(segments).toContain("/"); // Slashes preserved
  703. expect(segments).toContain("%20"); // Spaces encoded
  704. });
  705. test("search results have correct structure for structuredContent", () => {
  706. const results = searchFTS(testDb, "readme", 5);
  707. const structured = results.map(r => ({
  708. file: r.displayPath,
  709. title: r.title,
  710. score: Math.round(r.score * 100) / 100,
  711. context: getContextForFile(testDb, r.filepath),
  712. snippet: extractSnippet(r.body || "", "readme", 300, r.chunkPos).snippet,
  713. }));
  714. expect(structured.length).toBeGreaterThan(0);
  715. const item = structured[0]!;
  716. expect(typeof item.file).toBe("string");
  717. expect(typeof item.title).toBe("string");
  718. expect(typeof item.score).toBe("number");
  719. expect(item.score).toBeGreaterThanOrEqual(0);
  720. expect(item.score).toBeLessThanOrEqual(1);
  721. expect(typeof item.snippet).toBe("string");
  722. });
  723. test("error responses should include isError flag", () => {
  724. // Simulate what MCP server returns for errors
  725. const errorResponse = {
  726. content: [{ type: "text", text: "Collection not found: nonexistent" }],
  727. isError: true,
  728. };
  729. expect(errorResponse.isError).toBe(true);
  730. expect(errorResponse.content[0]!.type).toBe("text");
  731. });
  732. test("embedded resources include name and title", () => {
  733. // Simulate what qmd_get returns
  734. const meta = findDocument(testDb, "readme.md", { includeBody: false });
  735. expect("error" in meta).toBe(false);
  736. if ("error" in meta) return;
  737. const body = getDocumentBody(testDb, meta) ?? "";
  738. const resource = {
  739. uri: `qmd://${meta.displayPath}`,
  740. name: meta.displayPath,
  741. title: meta.title,
  742. mimeType: "text/markdown",
  743. text: body,
  744. };
  745. expect(resource.name).toBe("docs/readme.md");
  746. expect(resource.title).toBe("Project README");
  747. expect(resource.mimeType).toBe("text/markdown");
  748. });
  749. test("status response includes structuredContent", () => {
  750. const status = getStatus(testDb);
  751. // Verify structure matches StatusResult type
  752. expect(typeof status.totalDocuments).toBe("number");
  753. expect(typeof status.needsEmbedding).toBe("number");
  754. expect(typeof status.hasVectorIndex).toBe("boolean");
  755. expect(Array.isArray(status.collections)).toBe(true);
  756. if (status.collections.length > 0) {
  757. const col = status.collections[0]!;
  758. expect(typeof col.name).toBe("string"); // Collections now use names, not IDs
  759. expect(typeof col.path).toBe("string");
  760. expect(typeof col.pattern).toBe("string");
  761. expect(typeof col.documents).toBe("number");
  762. }
  763. });
  764. });
  765. });