formatter.ts 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. /**
  2. * formatter.ts - Output formatting utilities for QMD
  3. *
  4. * Provides methods to format search results and documents into various output formats:
  5. * JSON, CSV, XML, Markdown, files list, and CLI (colored terminal output).
  6. */
  7. import { extractSnippet } from "./store.js";
  8. import type { SearchResult, MultiGetFile, MultiGetResult, DocumentResult } from "./store.js";
  9. // =============================================================================
  10. // Types
  11. // =============================================================================
  12. // Re-export store types for convenience
  13. export type { SearchResult, MultiGetFile, MultiGetResult, DocumentResult };
  14. export type OutputFormat = "cli" | "csv" | "md" | "xml" | "files" | "json";
  15. export type FormatOptions = {
  16. full?: boolean; // Show full document content instead of snippet
  17. query?: string; // Query for snippet extraction and highlighting
  18. useColor?: boolean; // Enable terminal colors (default: false for non-CLI)
  19. lineNumbers?: boolean;// Add line numbers to output
  20. };
  21. // =============================================================================
  22. // Helper Functions
  23. // =============================================================================
  24. /**
  25. * Add line numbers to text content.
  26. * Each line becomes: "{lineNum}: {content}"
  27. * @param text The text to add line numbers to
  28. * @param startLine Optional starting line number (default: 1)
  29. */
  30. export function addLineNumbers(text: string, startLine: number = 1): string {
  31. const lines = text.split('\n');
  32. return lines.map((line, i) => `${startLine + i}: ${line}`).join('\n');
  33. }
  34. /**
  35. * Extract short docid from a full hash (first 6 characters).
  36. */
  37. export function getDocid(hash: string): string {
  38. return hash.slice(0, 6);
  39. }
  40. // =============================================================================
  41. // Escape Helpers
  42. // =============================================================================
  43. export function escapeCSV(value: string | null | number): string {
  44. if (value === null || value === undefined) return "";
  45. const str = String(value);
  46. if (str.includes(",") || str.includes('"') || str.includes("\n")) {
  47. return `"${str.replace(/"/g, '""')}"`;
  48. }
  49. return str;
  50. }
  51. export function escapeXml(str: string): string {
  52. return str
  53. .replace(/&/g, "&")
  54. .replace(/</g, "&lt;")
  55. .replace(/>/g, "&gt;")
  56. .replace(/"/g, "&quot;")
  57. .replace(/'/g, "&apos;");
  58. }
  59. // =============================================================================
  60. // Search Results Formatters
  61. // =============================================================================
  62. /**
  63. * Format search results as JSON
  64. */
  65. export function searchResultsToJson(
  66. results: SearchResult[],
  67. opts: FormatOptions = {}
  68. ): string {
  69. const query = opts.query || "";
  70. const output = results.map(row => {
  71. const bodyStr = row.body || "";
  72. let body = opts.full ? bodyStr : undefined;
  73. let snippet = !opts.full ? extractSnippet(bodyStr, query, 300, row.chunkPos).snippet : undefined;
  74. if (opts.lineNumbers) {
  75. if (body) body = addLineNumbers(body);
  76. if (snippet) snippet = addLineNumbers(snippet);
  77. }
  78. return {
  79. docid: `#${row.docid}`,
  80. score: Math.round(row.score * 100) / 100,
  81. file: row.displayPath,
  82. title: row.title,
  83. ...(row.context && { context: row.context }),
  84. ...(body && { body }),
  85. ...(snippet && { snippet }),
  86. };
  87. });
  88. return JSON.stringify(output, null, 2);
  89. }
  90. /**
  91. * Format search results as CSV
  92. */
  93. export function searchResultsToCsv(
  94. results: SearchResult[],
  95. opts: FormatOptions = {}
  96. ): string {
  97. const query = opts.query || "";
  98. const header = "docid,score,file,title,context,line,snippet";
  99. const rows = results.map(row => {
  100. const bodyStr = row.body || "";
  101. const { line, snippet } = extractSnippet(bodyStr, query, 500, row.chunkPos);
  102. let content = opts.full ? bodyStr : snippet;
  103. if (opts.lineNumbers && content) {
  104. content = addLineNumbers(content);
  105. }
  106. return [
  107. `#${row.docid}`,
  108. row.score.toFixed(4),
  109. escapeCSV(row.displayPath),
  110. escapeCSV(row.title),
  111. escapeCSV(row.context || ""),
  112. line,
  113. escapeCSV(content),
  114. ].join(",");
  115. });
  116. return [header, ...rows].join("\n");
  117. }
  118. /**
  119. * Format search results as simple files list (docid,score,filepath,context)
  120. */
  121. export function searchResultsToFiles(results: SearchResult[]): string {
  122. return results.map(row => {
  123. const ctx = row.context ? `,"${row.context.replace(/"/g, '""')}"` : "";
  124. return `#${row.docid},${row.score.toFixed(2)},${row.displayPath}${ctx}`;
  125. }).join("\n");
  126. }
  127. /**
  128. * Format search results as Markdown
  129. */
  130. export function searchResultsToMarkdown(
  131. results: SearchResult[],
  132. opts: FormatOptions = {}
  133. ): string {
  134. const query = opts.query || "";
  135. return results.map(row => {
  136. const heading = row.title || row.displayPath;
  137. const bodyStr = row.body || "";
  138. let content: string;
  139. if (opts.full) {
  140. content = bodyStr;
  141. } else {
  142. content = extractSnippet(bodyStr, query, 500, row.chunkPos).snippet;
  143. }
  144. if (opts.lineNumbers) {
  145. content = addLineNumbers(content);
  146. }
  147. return `---\n# ${heading}\n\n**docid:** \`#${row.docid}\`\n\n${content}\n`;
  148. }).join("\n");
  149. }
  150. /**
  151. * Format search results as XML
  152. */
  153. export function searchResultsToXml(
  154. results: SearchResult[],
  155. opts: FormatOptions = {}
  156. ): string {
  157. const query = opts.query || "";
  158. const items = results.map(row => {
  159. const titleAttr = row.title ? ` title="${escapeXml(row.title)}"` : "";
  160. const bodyStr = row.body || "";
  161. let content = opts.full ? bodyStr : extractSnippet(bodyStr, query, 500, row.chunkPos).snippet;
  162. if (opts.lineNumbers) {
  163. content = addLineNumbers(content);
  164. }
  165. return `<file docid="#${row.docid}" name="${escapeXml(row.displayPath)}"${titleAttr}>\n${escapeXml(content)}\n</file>`;
  166. });
  167. return items.join("\n\n");
  168. }
  169. /**
  170. * Format search results for MCP (simpler CSV format with pre-extracted snippets)
  171. */
  172. export function searchResultsToMcpCsv(
  173. results: { docid: string; file: string; title: string; score: number; context: string | null; snippet: string }[]
  174. ): string {
  175. const header = "docid,file,title,score,context,snippet";
  176. const rows = results.map(r =>
  177. [`#${r.docid}`, r.file, r.title, r.score, r.context || "", r.snippet].map(escapeCSV).join(",")
  178. );
  179. return [header, ...rows].join("\n");
  180. }
  181. // =============================================================================
  182. // Document Formatters (for multi-get using MultiGetFile from store)
  183. // =============================================================================
  184. /**
  185. * Format documents as JSON
  186. */
  187. export function documentsToJson(results: MultiGetFile[]): string {
  188. const output = results.map(r => ({
  189. file: r.displayPath,
  190. title: r.title,
  191. ...(r.context && { context: r.context }),
  192. ...(r.skipped ? { skipped: true, reason: r.skipReason } : { body: r.body }),
  193. }));
  194. return JSON.stringify(output, null, 2);
  195. }
  196. /**
  197. * Format documents as CSV
  198. */
  199. export function documentsToCsv(results: MultiGetFile[]): string {
  200. const header = "file,title,context,skipped,body";
  201. const rows = results.map(r =>
  202. [
  203. r.displayPath,
  204. r.title,
  205. r.context || "",
  206. r.skipped ? "true" : "false",
  207. r.skipped ? (r.skipReason || "") : r.body
  208. ].map(escapeCSV).join(",")
  209. );
  210. return [header, ...rows].join("\n");
  211. }
  212. /**
  213. * Format documents as files list
  214. */
  215. export function documentsToFiles(results: MultiGetFile[]): string {
  216. return results.map(r => {
  217. const ctx = r.context ? `,"${r.context.replace(/"/g, '""')}"` : "";
  218. const status = r.skipped ? ",[SKIPPED]" : "";
  219. return `${r.displayPath}${ctx}${status}`;
  220. }).join("\n");
  221. }
  222. /**
  223. * Format documents as Markdown
  224. */
  225. export function documentsToMarkdown(results: MultiGetFile[]): string {
  226. return results.map(r => {
  227. let md = `## ${r.displayPath}\n\n`;
  228. if (r.title && r.title !== r.displayPath) md += `**Title:** ${r.title}\n\n`;
  229. if (r.context) md += `**Context:** ${r.context}\n\n`;
  230. if (r.skipped) {
  231. md += `> ${r.skipReason}\n`;
  232. } else {
  233. md += "```\n" + r.body + "\n```\n";
  234. }
  235. return md;
  236. }).join("\n");
  237. }
  238. /**
  239. * Format documents as XML
  240. */
  241. export function documentsToXml(results: MultiGetFile[]): string {
  242. const items = results.map(r => {
  243. let xml = " <document>\n";
  244. xml += ` <file>${escapeXml(r.displayPath)}</file>\n`;
  245. xml += ` <title>${escapeXml(r.title)}</title>\n`;
  246. if (r.context) xml += ` <context>${escapeXml(r.context)}</context>\n`;
  247. if (r.skipped) {
  248. xml += ` <skipped>true</skipped>\n`;
  249. xml += ` <reason>${escapeXml(r.skipReason || "")}</reason>\n`;
  250. } else {
  251. xml += ` <body>${escapeXml(r.body)}</body>\n`;
  252. }
  253. xml += " </document>";
  254. return xml;
  255. });
  256. return `<?xml version="1.0" encoding="UTF-8"?>\n<documents>\n${items.join("\n")}\n</documents>`;
  257. }
  258. // =============================================================================
  259. // Single Document Formatters
  260. // =============================================================================
  261. /**
  262. * Format a single DocumentResult as JSON
  263. */
  264. export function documentToJson(doc: DocumentResult): string {
  265. return JSON.stringify({
  266. file: doc.displayPath,
  267. title: doc.title,
  268. ...(doc.context && { context: doc.context }),
  269. hash: doc.hash,
  270. modifiedAt: doc.modifiedAt,
  271. bodyLength: doc.bodyLength,
  272. ...(doc.body !== undefined && { body: doc.body }),
  273. }, null, 2);
  274. }
  275. /**
  276. * Format a single DocumentResult as Markdown
  277. */
  278. export function documentToMarkdown(doc: DocumentResult): string {
  279. let md = `# ${doc.title || doc.displayPath}\n\n`;
  280. if (doc.context) md += `**Context:** ${doc.context}\n\n`;
  281. md += `**File:** ${doc.displayPath}\n`;
  282. md += `**Modified:** ${doc.modifiedAt}\n\n`;
  283. if (doc.body !== undefined) {
  284. md += "---\n\n" + doc.body + "\n";
  285. }
  286. return md;
  287. }
  288. /**
  289. * Format a single DocumentResult as XML
  290. */
  291. export function documentToXml(doc: DocumentResult): string {
  292. let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<document>\n`;
  293. xml += ` <file>${escapeXml(doc.displayPath)}</file>\n`;
  294. xml += ` <title>${escapeXml(doc.title)}</title>\n`;
  295. if (doc.context) xml += ` <context>${escapeXml(doc.context)}</context>\n`;
  296. xml += ` <hash>${escapeXml(doc.hash)}</hash>\n`;
  297. xml += ` <modifiedAt>${escapeXml(doc.modifiedAt)}</modifiedAt>\n`;
  298. xml += ` <bodyLength>${doc.bodyLength}</bodyLength>\n`;
  299. if (doc.body !== undefined) {
  300. xml += ` <body>${escapeXml(doc.body)}</body>\n`;
  301. }
  302. xml += `</document>`;
  303. return xml;
  304. }
  305. /**
  306. * Format a single document to the specified format
  307. */
  308. export function formatDocument(doc: DocumentResult, format: OutputFormat): string {
  309. switch (format) {
  310. case "json":
  311. return documentToJson(doc);
  312. case "md":
  313. return documentToMarkdown(doc);
  314. case "xml":
  315. return documentToXml(doc);
  316. default:
  317. // Default to markdown for CLI and other formats
  318. return documentToMarkdown(doc);
  319. }
  320. }
  321. // =============================================================================
  322. // Universal Format Function
  323. // =============================================================================
  324. /**
  325. * Format search results to the specified output format
  326. */
  327. export function formatSearchResults(
  328. results: SearchResult[],
  329. format: OutputFormat,
  330. opts: FormatOptions = {}
  331. ): string {
  332. switch (format) {
  333. case "json":
  334. return searchResultsToJson(results, opts);
  335. case "csv":
  336. return searchResultsToCsv(results, opts);
  337. case "files":
  338. return searchResultsToFiles(results);
  339. case "md":
  340. return searchResultsToMarkdown(results, opts);
  341. case "xml":
  342. return searchResultsToXml(results, opts);
  343. case "cli":
  344. // CLI format should be handled separately with colors
  345. // Return a simple text version as fallback
  346. return searchResultsToMarkdown(results, opts);
  347. default:
  348. return searchResultsToJson(results, opts);
  349. }
  350. }
  351. /**
  352. * Format documents to the specified output format
  353. */
  354. export function formatDocuments(
  355. results: MultiGetFile[],
  356. format: OutputFormat
  357. ): string {
  358. switch (format) {
  359. case "json":
  360. return documentsToJson(results);
  361. case "csv":
  362. return documentsToCsv(results);
  363. case "files":
  364. return documentsToFiles(results);
  365. case "md":
  366. return documentsToMarkdown(results);
  367. case "xml":
  368. return documentsToXml(results);
  369. case "cli":
  370. // CLI format should be handled separately with colors
  371. return documentsToMarkdown(results);
  372. default:
  373. return documentsToJson(results);
  374. }
  375. }