formatter.ts 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. /**
  2. * formatter.ts - Output formatting utilities for QMD
  3. *
  4. * Provides methods to format search results and documents into various output formats:
  5. * JSON, CSV, XML, Markdown, files list, and CLI (colored terminal output).
  6. */
  7. import { extractSnippet } from "./store.js";
  8. import type { SearchResult, MultiGetResult, DocumentResult } from "./store.js";
  9. // =============================================================================
  10. // Types
  11. // =============================================================================
  12. // Re-export store types for convenience
  13. export type { SearchResult, MultiGetResult, DocumentResult };
  14. // Flattened type for formatter convenience (extracts info from MultiGetResult)
  15. export type MultiGetFile = {
  16. filepath: string;
  17. displayPath: string;
  18. title: string;
  19. body: string;
  20. context?: string | null;
  21. skipped: false;
  22. } | {
  23. filepath: string;
  24. displayPath: string;
  25. title: string;
  26. body: string;
  27. context?: string | null;
  28. skipped: true;
  29. skipReason: string;
  30. };
  31. export type OutputFormat = "cli" | "csv" | "md" | "xml" | "files" | "json";
  32. export type FormatOptions = {
  33. full?: boolean; // Show full document content instead of snippet
  34. query?: string; // Query for snippet extraction and highlighting
  35. useColor?: boolean; // Enable terminal colors (default: false for non-CLI)
  36. lineNumbers?: boolean;// Add line numbers to output
  37. };
  38. // =============================================================================
  39. // Helper Functions
  40. // =============================================================================
  41. /**
  42. * Add line numbers to text content.
  43. * Each line becomes: "{lineNum}: {content}"
  44. * @param text The text to add line numbers to
  45. * @param startLine Optional starting line number (default: 1)
  46. */
  47. export function addLineNumbers(text: string, startLine: number = 1): string {
  48. const lines = text.split('\n');
  49. return lines.map((line, i) => `${startLine + i}: ${line}`).join('\n');
  50. }
  51. /**
  52. * Extract short docid from a full hash (first 6 characters).
  53. */
  54. export function getDocid(hash: string): string {
  55. return hash.slice(0, 6);
  56. }
  57. // =============================================================================
  58. // Escape Helpers
  59. // =============================================================================
  60. export function escapeCSV(value: string | null | number): string {
  61. if (value === null || value === undefined) return "";
  62. const str = String(value);
  63. if (str.includes(",") || str.includes('"') || str.includes("\n")) {
  64. return `"${str.replace(/"/g, '""')}"`;
  65. }
  66. return str;
  67. }
  68. export function escapeXml(str: string): string {
  69. return str
  70. .replace(/&/g, "&")
  71. .replace(/</g, "&lt;")
  72. .replace(/>/g, "&gt;")
  73. .replace(/"/g, "&quot;")
  74. .replace(/'/g, "&apos;");
  75. }
  76. // =============================================================================
  77. // Search Results Formatters
  78. // =============================================================================
  79. /**
  80. * Format search results as JSON
  81. */
  82. export function searchResultsToJson(
  83. results: SearchResult[],
  84. opts: FormatOptions = {}
  85. ): string {
  86. const query = opts.query || "";
  87. const output = results.map(row => {
  88. const bodyStr = row.body || "";
  89. let body = opts.full ? bodyStr : undefined;
  90. let snippet = !opts.full ? extractSnippet(bodyStr, query, 300, row.chunkPos).snippet : undefined;
  91. if (opts.lineNumbers) {
  92. if (body) body = addLineNumbers(body);
  93. if (snippet) snippet = addLineNumbers(snippet);
  94. }
  95. return {
  96. docid: `#${row.docid}`,
  97. score: Math.round(row.score * 100) / 100,
  98. file: row.displayPath,
  99. title: row.title,
  100. ...(row.context && { context: row.context }),
  101. ...(body && { body }),
  102. ...(snippet && { snippet }),
  103. };
  104. });
  105. return JSON.stringify(output, null, 2);
  106. }
  107. /**
  108. * Format search results as CSV
  109. */
  110. export function searchResultsToCsv(
  111. results: SearchResult[],
  112. opts: FormatOptions = {}
  113. ): string {
  114. const query = opts.query || "";
  115. const header = "docid,score,file,title,context,line,snippet";
  116. const rows = results.map(row => {
  117. const bodyStr = row.body || "";
  118. const { line, snippet } = extractSnippet(bodyStr, query, 500, row.chunkPos);
  119. let content = opts.full ? bodyStr : snippet;
  120. if (opts.lineNumbers && content) {
  121. content = addLineNumbers(content);
  122. }
  123. return [
  124. `#${row.docid}`,
  125. row.score.toFixed(4),
  126. escapeCSV(row.displayPath),
  127. escapeCSV(row.title),
  128. escapeCSV(row.context || ""),
  129. line,
  130. escapeCSV(content),
  131. ].join(",");
  132. });
  133. return [header, ...rows].join("\n");
  134. }
  135. /**
  136. * Format search results as simple files list (docid,score,filepath,context)
  137. */
  138. export function searchResultsToFiles(results: SearchResult[]): string {
  139. return results.map(row => {
  140. const ctx = row.context ? `,"${row.context.replace(/"/g, '""')}"` : "";
  141. return `#${row.docid},${row.score.toFixed(2)},${row.displayPath}${ctx}`;
  142. }).join("\n");
  143. }
  144. /**
  145. * Format search results as Markdown
  146. */
  147. export function searchResultsToMarkdown(
  148. results: SearchResult[],
  149. opts: FormatOptions = {}
  150. ): string {
  151. const query = opts.query || "";
  152. return results.map(row => {
  153. const heading = row.title || row.displayPath;
  154. const bodyStr = row.body || "";
  155. let content: string;
  156. if (opts.full) {
  157. content = bodyStr;
  158. } else {
  159. content = extractSnippet(bodyStr, query, 500, row.chunkPos).snippet;
  160. }
  161. if (opts.lineNumbers) {
  162. content = addLineNumbers(content);
  163. }
  164. const contextLine = row.context ? `**context:** ${row.context}\n` : "";
  165. return `---\n# ${heading}\n\n**docid:** \`#${row.docid}\`\n${contextLine}\n${content}\n`;
  166. }).join("\n");
  167. }
  168. /**
  169. * Format search results as XML
  170. */
  171. export function searchResultsToXml(
  172. results: SearchResult[],
  173. opts: FormatOptions = {}
  174. ): string {
  175. const query = opts.query || "";
  176. const items = results.map(row => {
  177. const titleAttr = row.title ? ` title="${escapeXml(row.title)}"` : "";
  178. const bodyStr = row.body || "";
  179. let content = opts.full ? bodyStr : extractSnippet(bodyStr, query, 500, row.chunkPos).snippet;
  180. if (opts.lineNumbers) {
  181. content = addLineNumbers(content);
  182. }
  183. const contextAttr = row.context ? ` context="${escapeXml(row.context)}"` : "";
  184. return `<file docid="#${row.docid}" name="${escapeXml(row.displayPath)}"${titleAttr}${contextAttr}>\n${escapeXml(content)}\n</file>`;
  185. });
  186. return items.join("\n\n");
  187. }
  188. /**
  189. * Format search results for MCP (simpler CSV format with pre-extracted snippets)
  190. */
  191. export function searchResultsToMcpCsv(
  192. results: { docid: string; file: string; title: string; score: number; context: string | null; snippet: string }[]
  193. ): string {
  194. const header = "docid,file,title,score,context,snippet";
  195. const rows = results.map(r =>
  196. [`#${r.docid}`, r.file, r.title, r.score, r.context || "", r.snippet].map(escapeCSV).join(",")
  197. );
  198. return [header, ...rows].join("\n");
  199. }
  200. // =============================================================================
  201. // Document Formatters (for multi-get using MultiGetFile from store)
  202. // =============================================================================
  203. /**
  204. * Format documents as JSON
  205. */
  206. export function documentsToJson(results: MultiGetFile[]): string {
  207. const output = results.map(r => ({
  208. file: r.displayPath,
  209. title: r.title,
  210. ...(r.context && { context: r.context }),
  211. ...(r.skipped ? { skipped: true, reason: r.skipReason } : { body: r.body }),
  212. }));
  213. return JSON.stringify(output, null, 2);
  214. }
  215. /**
  216. * Format documents as CSV
  217. */
  218. export function documentsToCsv(results: MultiGetFile[]): string {
  219. const header = "file,title,context,skipped,body";
  220. const rows = results.map(r =>
  221. [
  222. r.displayPath,
  223. r.title,
  224. r.context || "",
  225. r.skipped ? "true" : "false",
  226. r.skipped ? (r.skipReason || "") : r.body
  227. ].map(escapeCSV).join(",")
  228. );
  229. return [header, ...rows].join("\n");
  230. }
  231. /**
  232. * Format documents as files list
  233. */
  234. export function documentsToFiles(results: MultiGetFile[]): string {
  235. return results.map(r => {
  236. const ctx = r.context ? `,"${r.context.replace(/"/g, '""')}"` : "";
  237. const status = r.skipped ? ",[SKIPPED]" : "";
  238. return `${r.displayPath}${ctx}${status}`;
  239. }).join("\n");
  240. }
  241. /**
  242. * Format documents as Markdown
  243. */
  244. export function documentsToMarkdown(results: MultiGetFile[]): string {
  245. return results.map(r => {
  246. let md = `## ${r.displayPath}\n\n`;
  247. if (r.title && r.title !== r.displayPath) md += `**Title:** ${r.title}\n\n`;
  248. if (r.context) md += `**Context:** ${r.context}\n\n`;
  249. if (r.skipped) {
  250. md += `> ${r.skipReason}\n`;
  251. } else {
  252. md += "```\n" + r.body + "\n```\n";
  253. }
  254. return md;
  255. }).join("\n");
  256. }
  257. /**
  258. * Format documents as XML
  259. */
  260. export function documentsToXml(results: MultiGetFile[]): string {
  261. const items = results.map(r => {
  262. let xml = " <document>\n";
  263. xml += ` <file>${escapeXml(r.displayPath)}</file>\n`;
  264. xml += ` <title>${escapeXml(r.title)}</title>\n`;
  265. if (r.context) xml += ` <context>${escapeXml(r.context)}</context>\n`;
  266. if (r.skipped) {
  267. xml += ` <skipped>true</skipped>\n`;
  268. xml += ` <reason>${escapeXml(r.skipReason || "")}</reason>\n`;
  269. } else {
  270. xml += ` <body>${escapeXml(r.body)}</body>\n`;
  271. }
  272. xml += " </document>";
  273. return xml;
  274. });
  275. return `<?xml version="1.0" encoding="UTF-8"?>\n<documents>\n${items.join("\n")}\n</documents>`;
  276. }
  277. // =============================================================================
  278. // Single Document Formatters
  279. // =============================================================================
  280. /**
  281. * Format a single DocumentResult as JSON
  282. */
  283. export function documentToJson(doc: DocumentResult): string {
  284. return JSON.stringify({
  285. file: doc.displayPath,
  286. title: doc.title,
  287. ...(doc.context && { context: doc.context }),
  288. hash: doc.hash,
  289. modifiedAt: doc.modifiedAt,
  290. bodyLength: doc.bodyLength,
  291. ...(doc.body !== undefined && { body: doc.body }),
  292. }, null, 2);
  293. }
  294. /**
  295. * Format a single DocumentResult as Markdown
  296. */
  297. export function documentToMarkdown(doc: DocumentResult): string {
  298. let md = `# ${doc.title || doc.displayPath}\n\n`;
  299. if (doc.context) md += `**Context:** ${doc.context}\n\n`;
  300. md += `**File:** ${doc.displayPath}\n`;
  301. md += `**Modified:** ${doc.modifiedAt}\n\n`;
  302. if (doc.body !== undefined) {
  303. md += "---\n\n" + doc.body + "\n";
  304. }
  305. return md;
  306. }
  307. /**
  308. * Format a single DocumentResult as XML
  309. */
  310. export function documentToXml(doc: DocumentResult): string {
  311. let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<document>\n`;
  312. xml += ` <file>${escapeXml(doc.displayPath)}</file>\n`;
  313. xml += ` <title>${escapeXml(doc.title)}</title>\n`;
  314. if (doc.context) xml += ` <context>${escapeXml(doc.context)}</context>\n`;
  315. xml += ` <hash>${escapeXml(doc.hash)}</hash>\n`;
  316. xml += ` <modifiedAt>${escapeXml(doc.modifiedAt)}</modifiedAt>\n`;
  317. xml += ` <bodyLength>${doc.bodyLength}</bodyLength>\n`;
  318. if (doc.body !== undefined) {
  319. xml += ` <body>${escapeXml(doc.body)}</body>\n`;
  320. }
  321. xml += `</document>`;
  322. return xml;
  323. }
  324. /**
  325. * Format a single document to the specified format
  326. */
  327. export function formatDocument(doc: DocumentResult, format: OutputFormat): string {
  328. switch (format) {
  329. case "json":
  330. return documentToJson(doc);
  331. case "md":
  332. return documentToMarkdown(doc);
  333. case "xml":
  334. return documentToXml(doc);
  335. default:
  336. // Default to markdown for CLI and other formats
  337. return documentToMarkdown(doc);
  338. }
  339. }
  340. // =============================================================================
  341. // Universal Format Function
  342. // =============================================================================
  343. /**
  344. * Format search results to the specified output format
  345. */
  346. export function formatSearchResults(
  347. results: SearchResult[],
  348. format: OutputFormat,
  349. opts: FormatOptions = {}
  350. ): string {
  351. switch (format) {
  352. case "json":
  353. return searchResultsToJson(results, opts);
  354. case "csv":
  355. return searchResultsToCsv(results, opts);
  356. case "files":
  357. return searchResultsToFiles(results);
  358. case "md":
  359. return searchResultsToMarkdown(results, opts);
  360. case "xml":
  361. return searchResultsToXml(results, opts);
  362. case "cli":
  363. // CLI format should be handled separately with colors
  364. // Return a simple text version as fallback
  365. return searchResultsToMarkdown(results, opts);
  366. default:
  367. return searchResultsToJson(results, opts);
  368. }
  369. }
  370. /**
  371. * Format documents to the specified output format
  372. */
  373. export function formatDocuments(
  374. results: MultiGetFile[],
  375. format: OutputFormat
  376. ): string {
  377. switch (format) {
  378. case "json":
  379. return documentsToJson(results);
  380. case "csv":
  381. return documentsToCsv(results);
  382. case "files":
  383. return documentsToFiles(results);
  384. case "md":
  385. return documentsToMarkdown(results);
  386. case "xml":
  387. return documentsToXml(results);
  388. case "cli":
  389. // CLI format should be handled separately with colors
  390. return documentsToMarkdown(results);
  391. default:
  392. return documentsToJson(results);
  393. }
  394. }