mcp.ts 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608
  1. #!/usr/bin/env bun
  2. /**
  3. * QMD MCP Server - Model Context Protocol server for QMD
  4. *
  5. * Exposes QMD search and document retrieval as MCP tools and resources.
  6. * Documents are accessible via qmd:// URIs.
  7. *
  8. * Follows MCP spec 2025-06-18 for proper response types.
  9. */
  10. import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
  11. import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
  12. import { z } from "zod";
  13. import {
  14. createStore,
  15. reciprocalRankFusion,
  16. extractSnippet,
  17. DEFAULT_EMBED_MODEL,
  18. DEFAULT_QUERY_MODEL,
  19. DEFAULT_RERANK_MODEL,
  20. DEFAULT_MULTI_GET_MAX_BYTES,
  21. } from "./store.js";
  22. import type { RankedResult } from "./store.js";
  23. // =============================================================================
  24. // Types for structured content
  25. // =============================================================================
  26. type SearchResultItem = {
  27. docid: string; // Short docid (#abc123) for quick reference
  28. file: string;
  29. title: string;
  30. score: number;
  31. context: string | null;
  32. snippet: string;
  33. };
  34. type StatusResult = {
  35. totalDocuments: number;
  36. needsEmbedding: number;
  37. hasVectorIndex: boolean;
  38. collections: {
  39. id: number;
  40. path: string;
  41. pattern: string;
  42. documents: number;
  43. lastUpdated: string;
  44. }[];
  45. };
  46. // =============================================================================
  47. // Helper functions
  48. // =============================================================================
  49. /**
  50. * Encode a path for use in qmd:// URIs.
  51. * Encodes special characters but preserves forward slashes for readability.
  52. */
  53. function encodeQmdPath(path: string): string {
  54. // Encode each path segment separately to preserve slashes
  55. return path.split('/').map(segment => encodeURIComponent(segment)).join('/');
  56. }
  57. /**
  58. * Format search results as human-readable text summary
  59. */
  60. function formatSearchSummary(results: SearchResultItem[], query: string): string {
  61. if (results.length === 0) {
  62. return `No results found for "${query}"`;
  63. }
  64. const lines = [`Found ${results.length} result${results.length === 1 ? '' : 's'} for "${query}":\n`];
  65. for (const r of results) {
  66. lines.push(`${r.docid} ${Math.round(r.score * 100)}% ${r.file} - ${r.title}`);
  67. }
  68. return lines.join('\n');
  69. }
  70. /**
  71. * Add line numbers to text content.
  72. * Each line becomes: "{lineNum}: {content}"
  73. */
  74. function addLineNumbers(text: string, startLine: number = 1): string {
  75. const lines = text.split('\n');
  76. return lines.map((line, i) => `${startLine + i}: ${line}`).join('\n');
  77. }
  78. // =============================================================================
  79. // MCP Server
  80. // =============================================================================
  81. export async function startMcpServer(): Promise<void> {
  82. // Open database once at startup - keep it open for the lifetime of the server
  83. const store = createStore();
  84. const server = new McpServer({
  85. name: "qmd",
  86. version: "1.0.0",
  87. });
  88. // ---------------------------------------------------------------------------
  89. // Resource: qmd://{path} - read-only access to documents by path
  90. // Note: No list() - documents are discovered via search tools
  91. // ---------------------------------------------------------------------------
  92. server.registerResource(
  93. "document",
  94. new ResourceTemplate("qmd://{+path}", {}),
  95. {
  96. title: "QMD Document",
  97. description: "A markdown document from your QMD knowledge base. Use search tools to discover documents.",
  98. mimeType: "text/markdown",
  99. },
  100. async (uri, { path }) => {
  101. // Decode URL-encoded path (MCP clients send encoded URIs)
  102. const decodedPath = decodeURIComponent(path);
  103. // Parse virtual path: collection/relative/path
  104. const parts = decodedPath.split('/');
  105. const collection = parts[0];
  106. const relativePath = parts.slice(1).join('/');
  107. // Find document by collection and path, join with content table
  108. let doc = store.db.prepare(`
  109. SELECT d.collection, d.path, d.title, c.doc as body
  110. FROM documents d
  111. JOIN content c ON c.hash = d.hash
  112. WHERE d.collection = ? AND d.path = ? AND d.active = 1
  113. `).get(collection, relativePath) as { collection: string; path: string; title: string; body: string } | null;
  114. // Try suffix match if exact match fails
  115. if (!doc) {
  116. doc = store.db.prepare(`
  117. SELECT d.collection, d.path, d.title, c.doc as body
  118. FROM documents d
  119. JOIN content c ON c.hash = d.hash
  120. WHERE d.path LIKE ? AND d.active = 1
  121. LIMIT 1
  122. `).get(`%${relativePath}`) as { collection: string; path: string; title: string; body: string } | null;
  123. }
  124. if (!doc) {
  125. return { contents: [{ uri: uri.href, text: `Document not found: ${decodedPath}` }] };
  126. }
  127. // Construct virtual path for context lookup
  128. const virtualPath = `qmd://${doc.collection}/${doc.path}`;
  129. const context = store.getContextForFile(virtualPath);
  130. let text = addLineNumbers(doc.body); // Default to line numbers
  131. if (context) {
  132. text = `<!-- Context: ${context} -->\n\n` + text;
  133. }
  134. const displayName = `${doc.collection}/${doc.path}`;
  135. return {
  136. contents: [{
  137. uri: uri.href,
  138. name: displayName,
  139. title: doc.title || doc.path,
  140. mimeType: "text/markdown",
  141. text,
  142. }],
  143. };
  144. }
  145. );
  146. // ---------------------------------------------------------------------------
  147. // Prompt: query guide
  148. // ---------------------------------------------------------------------------
  149. server.registerPrompt(
  150. "query",
  151. {
  152. title: "QMD Query Guide",
  153. description: "How to effectively search your knowledge base with QMD",
  154. },
  155. () => ({
  156. messages: [
  157. {
  158. role: "user",
  159. content: {
  160. type: "text",
  161. text: `# QMD - Quick Markdown Search
  162. QMD is your on-device search engine for markdown knowledge bases. Use it to find information across your notes, documents, and meeting transcripts.
  163. ## Available Tools
  164. ### 1. search (Fast keyword search)
  165. Best for: Finding documents with specific keywords or phrases.
  166. - Uses BM25 full-text search
  167. - Fast, no LLM required
  168. - Good for exact matches
  169. - Use \`collection\` parameter to filter to a specific collection
  170. ### 2. vsearch (Semantic search)
  171. Best for: Finding conceptually related content even without exact keyword matches.
  172. - Uses vector embeddings
  173. - Understands meaning and context
  174. - Good for "how do I..." or conceptual queries
  175. - Use \`collection\` parameter to filter to a specific collection
  176. ### 3. query (Hybrid search - highest quality)
  177. Best for: Important searches where you want the best results.
  178. - Combines keyword + semantic search
  179. - Expands your query with variations
  180. - Re-ranks results with LLM
  181. - Slower but most accurate
  182. - Use \`collection\` parameter to filter to a specific collection
  183. ### 4. get (Retrieve document)
  184. Best for: Getting the full content of a single document you found.
  185. - Use the file path from search results
  186. - Supports line ranges: \`file.md:100\` or fromLine/maxLines parameters
  187. - Suggests similar files if not found
  188. ### 5. multi_get (Retrieve multiple documents)
  189. Best for: Getting content from multiple files at once.
  190. - Use glob patterns: \`journals/2025-05*.md\`
  191. - Or comma-separated: \`file1.md, file2.md\`
  192. - Skips files over maxBytes (default 10KB) - use get for large files
  193. ### 6. status (Index info)
  194. Shows collection info, document counts, and embedding status.
  195. ## Resources
  196. You can also access documents directly via the \`qmd://\` URI scheme:
  197. - List all documents: \`resources/list\`
  198. - Read a document: \`resources/read\` with uri \`qmd://path/to/file.md\`
  199. ## Search Strategy
  200. 1. **Start with search** for quick keyword lookups
  201. 2. **Use vsearch** when keywords aren't working or for conceptual queries
  202. 3. **Use query** for important searches or when you need high confidence
  203. 4. **Use get** to retrieve a single full document
  204. 5. **Use multi_get** to batch retrieve multiple related files
  205. ## Tips
  206. - Use \`minScore: 0.5\` to filter low-relevance results
  207. - Use \`collection: "notes"\` to search only in a specific collection
  208. - Check the "Context" field - it describes what kind of content the file contains
  209. - File paths are relative to their collection (e.g., \`pages/meeting.md\`)
  210. - For glob patterns, match on display_path (e.g., \`journals/2025-*.md\`)`,
  211. },
  212. },
  213. ],
  214. })
  215. );
  216. // ---------------------------------------------------------------------------
  217. // Tool: qmd_search (BM25 full-text)
  218. // ---------------------------------------------------------------------------
  219. server.registerTool(
  220. "search",
  221. {
  222. title: "Search (BM25)",
  223. description: "Fast keyword-based full-text search using BM25. Best for finding documents with specific words or phrases.",
  224. inputSchema: {
  225. query: z.string().describe("Search query - keywords or phrases to find"),
  226. limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
  227. minScore: z.number().optional().default(0).describe("Minimum relevance score 0-1 (default: 0)"),
  228. collection: z.string().optional().describe("Filter to a specific collection by name"),
  229. },
  230. },
  231. async ({ query, limit, minScore, collection }) => {
  232. // Note: Collection filtering is now done post-search since collections are managed in YAML
  233. const results = store.searchFTS(query, limit || 10)
  234. .filter(r => !collection || r.collectionName === collection);
  235. const filtered: SearchResultItem[] = results
  236. .filter(r => r.score >= (minScore || 0))
  237. .map(r => {
  238. const { line, snippet } = extractSnippet(r.body || "", query, 300, r.chunkPos);
  239. return {
  240. docid: `#${r.docid}`,
  241. file: r.displayPath,
  242. title: r.title,
  243. score: Math.round(r.score * 100) / 100,
  244. context: store.getContextForFile(r.filepath),
  245. snippet: addLineNumbers(snippet, line), // Default to line numbers
  246. };
  247. });
  248. return {
  249. content: [{ type: "text", text: formatSearchSummary(filtered, query) }],
  250. structuredContent: { results: filtered },
  251. };
  252. }
  253. );
  254. // ---------------------------------------------------------------------------
  255. // Tool: qmd_vsearch (Vector semantic search)
  256. // ---------------------------------------------------------------------------
  257. server.registerTool(
  258. "vsearch",
  259. {
  260. title: "Vector Search (Semantic)",
  261. description: "Semantic similarity search using vector embeddings. Finds conceptually related content even without exact keyword matches. Requires embeddings (run 'qmd embed' first).",
  262. inputSchema: {
  263. query: z.string().describe("Natural language query - describe what you're looking for"),
  264. limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
  265. minScore: z.number().optional().default(0.3).describe("Minimum relevance score 0-1 (default: 0.3)"),
  266. collection: z.string().optional().describe("Filter to a specific collection by name"),
  267. },
  268. },
  269. async ({ query, limit, minScore, collection }) => {
  270. const tableExists = store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
  271. if (!tableExists) {
  272. return {
  273. content: [{ type: "text", text: "Vector index not found. Run 'qmd embed' first to create embeddings." }],
  274. isError: true,
  275. };
  276. }
  277. // Expand query
  278. const queries = await store.expandQuery(query, DEFAULT_QUERY_MODEL);
  279. // Collect results (filter by collection after search)
  280. const allResults = new Map<string, { file: string; displayPath: string; title: string; body: string; score: number; docid: string }>();
  281. for (const q of queries) {
  282. const vecResults = await store.searchVec(q, DEFAULT_EMBED_MODEL, limit || 10)
  283. .then(results => results.filter(r => !collection || r.collectionName === collection));
  284. for (const r of vecResults) {
  285. const existing = allResults.get(r.filepath);
  286. if (!existing || r.score > existing.score) {
  287. allResults.set(r.filepath, { file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score, docid: r.docid });
  288. }
  289. }
  290. }
  291. const filtered: SearchResultItem[] = Array.from(allResults.values())
  292. .sort((a, b) => b.score - a.score)
  293. .slice(0, limit || 10)
  294. .filter(r => r.score >= (minScore || 0.3))
  295. .map(r => {
  296. const { line, snippet } = extractSnippet(r.body || "", query, 300);
  297. return {
  298. docid: `#${r.docid}`,
  299. file: r.displayPath,
  300. title: r.title,
  301. score: Math.round(r.score * 100) / 100,
  302. context: store.getContextForFile(r.file),
  303. snippet: addLineNumbers(snippet, line), // Default to line numbers
  304. };
  305. });
  306. return {
  307. content: [{ type: "text", text: formatSearchSummary(filtered, query) }],
  308. structuredContent: { results: filtered },
  309. };
  310. }
  311. );
  312. // ---------------------------------------------------------------------------
  313. // Tool: qmd_query (Hybrid with reranking)
  314. // ---------------------------------------------------------------------------
  315. server.registerTool(
  316. "query",
  317. {
  318. title: "Hybrid Query (Best Quality)",
  319. description: "Highest quality search combining BM25 + vector + query expansion + LLM reranking. Slower but most accurate. Use for important searches.",
  320. inputSchema: {
  321. query: z.string().describe("Natural language query - describe what you're looking for"),
  322. limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
  323. minScore: z.number().optional().default(0).describe("Minimum relevance score 0-1 (default: 0)"),
  324. collection: z.string().optional().describe("Filter to a specific collection by name"),
  325. },
  326. },
  327. async ({ query, limit, minScore, collection }) => {
  328. // Expand query
  329. const queries = await store.expandQuery(query, DEFAULT_QUERY_MODEL);
  330. // Collect ranked lists (filter by collection after search)
  331. const rankedLists: RankedResult[][] = [];
  332. const docidMap = new Map<string, string>(); // filepath -> docid
  333. const hasVectors = !!store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
  334. for (const q of queries) {
  335. const ftsResults = store.searchFTS(q, 20)
  336. .filter(r => !collection || r.collectionName === collection);
  337. if (ftsResults.length > 0) {
  338. for (const r of ftsResults) docidMap.set(r.filepath, r.docid);
  339. rankedLists.push(ftsResults.map(r => ({ file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score })));
  340. }
  341. if (hasVectors) {
  342. const vecResults = await store.searchVec(q, DEFAULT_EMBED_MODEL, 20)
  343. .then(results => results.filter(r => !collection || r.collectionName === collection));
  344. if (vecResults.length > 0) {
  345. for (const r of vecResults) docidMap.set(r.filepath, r.docid);
  346. rankedLists.push(vecResults.map(r => ({ file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score })));
  347. }
  348. }
  349. }
  350. // RRF fusion
  351. const weights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
  352. const fused = reciprocalRankFusion(rankedLists, weights);
  353. const candidates = fused.slice(0, 30);
  354. // Rerank
  355. const reranked = await store.rerank(
  356. query,
  357. candidates.map(c => ({ file: c.file, text: c.body })),
  358. DEFAULT_RERANK_MODEL
  359. );
  360. // Blend scores
  361. const candidateMap = new Map(candidates.map(c => [c.file, { displayPath: c.displayPath, title: c.title, body: c.body }]));
  362. const rrfRankMap = new Map(candidates.map((c, i) => [c.file, i + 1]));
  363. const filtered: SearchResultItem[] = reranked.map(r => {
  364. const rrfRank = rrfRankMap.get(r.file) || candidates.length;
  365. let rrfWeight: number;
  366. if (rrfRank <= 3) rrfWeight = 0.75;
  367. else if (rrfRank <= 10) rrfWeight = 0.60;
  368. else rrfWeight = 0.40;
  369. const rrfScore = 1 / rrfRank;
  370. const blendedScore = rrfWeight * rrfScore + (1 - rrfWeight) * r.score;
  371. const candidate = candidateMap.get(r.file);
  372. const { line, snippet } = extractSnippet(candidate?.body || "", query, 300);
  373. return {
  374. docid: `#${docidMap.get(r.file) || ""}`,
  375. file: candidate?.displayPath || "",
  376. title: candidate?.title || "",
  377. score: Math.round(blendedScore * 100) / 100,
  378. context: store.getContextForFile(r.file),
  379. snippet: addLineNumbers(snippet, line), // Default to line numbers
  380. };
  381. }).filter(r => r.score >= (minScore || 0)).slice(0, limit || 10);
  382. return {
  383. content: [{ type: "text", text: formatSearchSummary(filtered, query) }],
  384. structuredContent: { results: filtered },
  385. };
  386. }
  387. );
  388. // ---------------------------------------------------------------------------
  389. // Tool: qmd_get (Retrieve document)
  390. // ---------------------------------------------------------------------------
  391. server.registerTool(
  392. "get",
  393. {
  394. title: "Get Document",
  395. description: "Retrieve the full content of a document by its file path or docid. Use paths or docids (#abc123) from search results. Suggests similar files if not found.",
  396. inputSchema: {
  397. file: z.string().describe("File path or docid from search results (e.g., 'pages/meeting.md', '#abc123', or 'pages/meeting.md:100' to start at line 100)"),
  398. fromLine: z.number().optional().describe("Start from this line number (1-indexed)"),
  399. maxLines: z.number().optional().describe("Maximum number of lines to return"),
  400. lineNumbers: z.boolean().optional().default(false).describe("Add line numbers to output (format: 'N: content')"),
  401. },
  402. },
  403. async ({ file, fromLine, maxLines, lineNumbers }) => {
  404. const result = store.getDocument(file, fromLine, maxLines);
  405. if ("error" in result) {
  406. let msg = `Document not found: ${file}`;
  407. if (result.similarFiles.length > 0) {
  408. msg += `\n\nDid you mean one of these?\n${result.similarFiles.map(s => ` - ${s}`).join('\n')}`;
  409. }
  410. return {
  411. content: [{ type: "text", text: msg }],
  412. isError: true,
  413. };
  414. }
  415. let text = result.body;
  416. if (lineNumbers) {
  417. const startLine = fromLine || 1;
  418. text = addLineNumbers(text, startLine);
  419. }
  420. if (result.context) {
  421. text = `<!-- Context: ${result.context} -->\n\n` + text;
  422. }
  423. return {
  424. content: [{
  425. type: "resource",
  426. resource: {
  427. uri: `qmd://${encodeQmdPath(result.displayPath)}`,
  428. name: result.displayPath,
  429. title: result.title,
  430. mimeType: "text/markdown",
  431. text,
  432. },
  433. }],
  434. };
  435. }
  436. );
  437. // ---------------------------------------------------------------------------
  438. // Tool: qmd_multi_get (Retrieve multiple documents)
  439. // ---------------------------------------------------------------------------
  440. server.registerTool(
  441. "multi_get",
  442. {
  443. title: "Multi-Get Documents",
  444. description: "Retrieve multiple documents by glob pattern (e.g., 'journals/2025-05*.md') or comma-separated list. Skips files larger than maxBytes.",
  445. inputSchema: {
  446. pattern: z.string().describe("Glob pattern or comma-separated list of file paths"),
  447. maxLines: z.number().optional().describe("Maximum lines per file"),
  448. maxBytes: z.number().optional().default(10240).describe("Skip files larger than this (default: 10240 = 10KB)"),
  449. lineNumbers: z.boolean().optional().default(false).describe("Add line numbers to output (format: 'N: content')"),
  450. },
  451. },
  452. async ({ pattern, maxLines, maxBytes, lineNumbers }) => {
  453. const { files, errors } = store.getMultipleDocuments(pattern, maxLines, maxBytes || DEFAULT_MULTI_GET_MAX_BYTES);
  454. if (files.length === 0 && errors.length === 0) {
  455. return {
  456. content: [{ type: "text", text: `No files matched pattern: ${pattern}` }],
  457. isError: true,
  458. };
  459. }
  460. const content: ({ type: "text"; text: string } | { type: "resource"; resource: { uri: string; name: string; title?: string; mimeType: string; text: string } })[] = [];
  461. if (errors.length > 0) {
  462. content.push({ type: "text", text: `Errors:\n${errors.join('\n')}` });
  463. }
  464. for (const file of files) {
  465. if (file.skipped) {
  466. content.push({
  467. type: "text",
  468. text: `[SKIPPED: ${file.displayPath} - ${file.skipReason}. Use 'qmd_get' with file="${file.displayPath}" to retrieve.]`,
  469. });
  470. continue;
  471. }
  472. let text = file.body;
  473. if (lineNumbers) {
  474. text = addLineNumbers(text);
  475. }
  476. if (file.context) {
  477. text = `<!-- Context: ${file.context} -->\n\n` + text;
  478. }
  479. content.push({
  480. type: "resource",
  481. resource: {
  482. uri: `qmd://${encodeQmdPath(file.displayPath)}`,
  483. name: file.displayPath,
  484. title: file.title,
  485. mimeType: "text/markdown",
  486. text,
  487. },
  488. });
  489. }
  490. return { content };
  491. }
  492. );
  493. // ---------------------------------------------------------------------------
  494. // Tool: qmd_status (Index status)
  495. // ---------------------------------------------------------------------------
  496. server.registerTool(
  497. "status",
  498. {
  499. title: "Index Status",
  500. description: "Show the status of the QMD index: collections, document counts, and health information.",
  501. inputSchema: {},
  502. },
  503. async () => {
  504. const status: StatusResult = store.getStatus();
  505. const summary = [
  506. `QMD Index Status:`,
  507. ` Total documents: ${status.totalDocuments}`,
  508. ` Needs embedding: ${status.needsEmbedding}`,
  509. ` Vector index: ${status.hasVectorIndex ? 'yes' : 'no'}`,
  510. ` Collections: ${status.collections.length}`,
  511. ];
  512. for (const col of status.collections) {
  513. summary.push(` - ${col.path} (${col.documents} docs)`);
  514. }
  515. return {
  516. content: [{ type: "text", text: summary.join('\n') }],
  517. structuredContent: status,
  518. };
  519. }
  520. );
  521. // ---------------------------------------------------------------------------
  522. // Connect via stdio
  523. // ---------------------------------------------------------------------------
  524. const transport = new StdioServerTransport();
  525. await server.connect(transport);
  526. // Note: Database stays open - it will be closed when the process exits
  527. }
  528. // Run if this is the main module
  529. if (import.meta.main) {
  530. startMcpServer().catch(console.error);
  531. }