formatter.ts 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. /**
  2. * formatter.ts - Output formatting utilities for QMD
  3. *
  4. * Provides methods to format search results and documents into various output formats:
  5. * JSON, CSV, XML, Markdown, files list, and CLI (colored terminal output).
  6. */
  7. import { extractSnippet } from "./store.js";
  8. import type { SearchResult, MultiGetResult, DocumentResult } from "./store.js";
  9. // =============================================================================
  10. // Types
  11. // =============================================================================
  12. // Re-export store types for convenience
  13. export type { SearchResult, MultiGetResult, DocumentResult };
  14. // Flattened type for formatter convenience (extracts info from MultiGetResult)
  15. export type MultiGetFile = {
  16. filepath: string;
  17. displayPath: string;
  18. title: string;
  19. body: string;
  20. context?: string | null;
  21. skipped: false;
  22. } | {
  23. filepath: string;
  24. displayPath: string;
  25. title: string;
  26. body: string;
  27. context?: string | null;
  28. skipped: true;
  29. skipReason: string;
  30. };
  31. export type OutputFormat = "cli" | "csv" | "md" | "xml" | "files" | "json";
  32. export type FormatOptions = {
  33. full?: boolean; // Show full document content instead of snippet
  34. query?: string; // Query for snippet extraction and highlighting
  35. useColor?: boolean; // Enable terminal colors (default: false for non-CLI)
  36. lineNumbers?: boolean;// Add line numbers to output
  37. intent?: string; // Domain intent for snippet extraction disambiguation
  38. };
  39. // =============================================================================
  40. // Helper Functions
  41. // =============================================================================
  42. /**
  43. * Add line numbers to text content.
  44. * Each line becomes: "{lineNum}: {content}"
  45. * @param text The text to add line numbers to
  46. * @param startLine Optional starting line number (default: 1)
  47. */
  48. export function addLineNumbers(text: string, startLine: number = 1): string {
  49. const lines = text.split('\n');
  50. return lines.map((line, i) => `${startLine + i}: ${line}`).join('\n');
  51. }
  52. /**
  53. * Extract short docid from a full hash (first 6 characters).
  54. */
  55. export function getDocid(hash: string): string {
  56. return hash.slice(0, 6);
  57. }
  58. // =============================================================================
  59. // Escape Helpers
  60. // =============================================================================
  61. export function escapeCSV(value: string | null | number): string {
  62. if (value === null || value === undefined) return "";
  63. const str = String(value);
  64. if (str.includes(",") || str.includes('"') || str.includes("\n")) {
  65. return `"${str.replace(/"/g, '""')}"`;
  66. }
  67. return str;
  68. }
  69. export function escapeXml(str: string): string {
  70. return str
  71. .replace(/&/g, "&")
  72. .replace(/</g, "&lt;")
  73. .replace(/>/g, "&gt;")
  74. .replace(/"/g, "&quot;")
  75. .replace(/'/g, "&apos;");
  76. }
  77. // =============================================================================
  78. // Search Results Formatters
  79. // =============================================================================
  80. /**
  81. * Format search results as JSON
  82. */
  83. export function searchResultsToJson(
  84. results: SearchResult[],
  85. opts: FormatOptions = {}
  86. ): string {
  87. const query = opts.query || "";
  88. const output = results.map(row => {
  89. const bodyStr = row.body || "";
  90. let body = opts.full ? bodyStr : undefined;
  91. let snippet = !opts.full ? extractSnippet(bodyStr, query, 300, row.chunkPos, undefined, opts.intent).snippet : undefined;
  92. if (opts.lineNumbers) {
  93. if (body) body = addLineNumbers(body);
  94. if (snippet) snippet = addLineNumbers(snippet);
  95. }
  96. return {
  97. docid: `#${row.docid}`,
  98. score: Math.round(row.score * 100) / 100,
  99. file: row.displayPath,
  100. title: row.title,
  101. ...(row.context && { context: row.context }),
  102. ...(body && { body }),
  103. ...(snippet && { snippet }),
  104. };
  105. });
  106. return JSON.stringify(output, null, 2);
  107. }
  108. /**
  109. * Format search results as CSV
  110. */
  111. export function searchResultsToCsv(
  112. results: SearchResult[],
  113. opts: FormatOptions = {}
  114. ): string {
  115. const query = opts.query || "";
  116. const header = "docid,score,file,title,context,line,snippet";
  117. const rows = results.map(row => {
  118. const bodyStr = row.body || "";
  119. const { line, snippet } = extractSnippet(bodyStr, query, 500, row.chunkPos, undefined, opts.intent);
  120. let content = opts.full ? bodyStr : snippet;
  121. if (opts.lineNumbers && content) {
  122. content = addLineNumbers(content);
  123. }
  124. return [
  125. `#${row.docid}`,
  126. row.score.toFixed(4),
  127. escapeCSV(row.displayPath),
  128. escapeCSV(row.title),
  129. escapeCSV(row.context || ""),
  130. line,
  131. escapeCSV(content),
  132. ].join(",");
  133. });
  134. return [header, ...rows].join("\n");
  135. }
  136. /**
  137. * Format search results as simple files list (docid,score,filepath,context)
  138. */
  139. export function searchResultsToFiles(results: SearchResult[]): string {
  140. return results.map(row => {
  141. const ctx = row.context ? `,"${row.context.replace(/"/g, '""')}"` : "";
  142. return `#${row.docid},${row.score.toFixed(2)},${row.displayPath}${ctx}`;
  143. }).join("\n");
  144. }
  145. /**
  146. * Format search results as Markdown
  147. */
  148. export function searchResultsToMarkdown(
  149. results: SearchResult[],
  150. opts: FormatOptions = {}
  151. ): string {
  152. const query = opts.query || "";
  153. return results.map(row => {
  154. const heading = row.title || row.displayPath;
  155. const bodyStr = row.body || "";
  156. let content: string;
  157. if (opts.full) {
  158. content = bodyStr;
  159. } else {
  160. content = extractSnippet(bodyStr, query, 500, row.chunkPos, undefined, opts.intent).snippet;
  161. }
  162. if (opts.lineNumbers) {
  163. content = addLineNumbers(content);
  164. }
  165. const contextLine = row.context ? `**context:** ${row.context}\n` : "";
  166. return `---\n# ${heading}\n\n**docid:** \`#${row.docid}\`\n${contextLine}\n${content}\n`;
  167. }).join("\n");
  168. }
  169. /**
  170. * Format search results as XML
  171. */
  172. export function searchResultsToXml(
  173. results: SearchResult[],
  174. opts: FormatOptions = {}
  175. ): string {
  176. const query = opts.query || "";
  177. const items = results.map(row => {
  178. const titleAttr = row.title ? ` title="${escapeXml(row.title)}"` : "";
  179. const bodyStr = row.body || "";
  180. let content = opts.full ? bodyStr : extractSnippet(bodyStr, query, 500, row.chunkPos, undefined, opts.intent).snippet;
  181. if (opts.lineNumbers) {
  182. content = addLineNumbers(content);
  183. }
  184. const contextAttr = row.context ? ` context="${escapeXml(row.context)}"` : "";
  185. return `<file docid="#${row.docid}" name="${escapeXml(row.displayPath)}"${titleAttr}${contextAttr}>\n${escapeXml(content)}\n</file>`;
  186. });
  187. return items.join("\n\n");
  188. }
  189. /**
  190. * Format search results for MCP (simpler CSV format with pre-extracted snippets)
  191. */
  192. export function searchResultsToMcpCsv(
  193. results: { docid: string; file: string; title: string; score: number; context: string | null; snippet: string }[]
  194. ): string {
  195. const header = "docid,file,title,score,context,snippet";
  196. const rows = results.map(r =>
  197. [`#${r.docid}`, r.file, r.title, r.score, r.context || "", r.snippet].map(escapeCSV).join(",")
  198. );
  199. return [header, ...rows].join("\n");
  200. }
  201. // =============================================================================
  202. // Document Formatters (for multi-get using MultiGetFile from store)
  203. // =============================================================================
  204. /**
  205. * Format documents as JSON
  206. */
  207. export function documentsToJson(results: MultiGetFile[]): string {
  208. const output = results.map(r => ({
  209. file: r.displayPath,
  210. title: r.title,
  211. ...(r.context && { context: r.context }),
  212. ...(r.skipped ? { skipped: true, reason: r.skipReason } : { body: r.body }),
  213. }));
  214. return JSON.stringify(output, null, 2);
  215. }
  216. /**
  217. * Format documents as CSV
  218. */
  219. export function documentsToCsv(results: MultiGetFile[]): string {
  220. const header = "file,title,context,skipped,body";
  221. const rows = results.map(r =>
  222. [
  223. r.displayPath,
  224. r.title,
  225. r.context || "",
  226. r.skipped ? "true" : "false",
  227. r.skipped ? (r.skipReason || "") : r.body
  228. ].map(escapeCSV).join(",")
  229. );
  230. return [header, ...rows].join("\n");
  231. }
  232. /**
  233. * Format documents as files list
  234. */
  235. export function documentsToFiles(results: MultiGetFile[]): string {
  236. return results.map(r => {
  237. const ctx = r.context ? `,"${r.context.replace(/"/g, '""')}"` : "";
  238. const status = r.skipped ? ",[SKIPPED]" : "";
  239. return `${r.displayPath}${ctx}${status}`;
  240. }).join("\n");
  241. }
  242. /**
  243. * Format documents as Markdown
  244. */
  245. export function documentsToMarkdown(results: MultiGetFile[]): string {
  246. return results.map(r => {
  247. let md = `## ${r.displayPath}\n\n`;
  248. if (r.title && r.title !== r.displayPath) md += `**Title:** ${r.title}\n\n`;
  249. if (r.context) md += `**Context:** ${r.context}\n\n`;
  250. if (r.skipped) {
  251. md += `> ${r.skipReason}\n`;
  252. } else {
  253. md += "```\n" + r.body + "\n```\n";
  254. }
  255. return md;
  256. }).join("\n");
  257. }
  258. /**
  259. * Format documents as XML
  260. */
  261. export function documentsToXml(results: MultiGetFile[]): string {
  262. const items = results.map(r => {
  263. let xml = " <document>\n";
  264. xml += ` <file>${escapeXml(r.displayPath)}</file>\n`;
  265. xml += ` <title>${escapeXml(r.title)}</title>\n`;
  266. if (r.context) xml += ` <context>${escapeXml(r.context)}</context>\n`;
  267. if (r.skipped) {
  268. xml += ` <skipped>true</skipped>\n`;
  269. xml += ` <reason>${escapeXml(r.skipReason || "")}</reason>\n`;
  270. } else {
  271. xml += ` <body>${escapeXml(r.body)}</body>\n`;
  272. }
  273. xml += " </document>";
  274. return xml;
  275. });
  276. return `<?xml version="1.0" encoding="UTF-8"?>\n<documents>\n${items.join("\n")}\n</documents>`;
  277. }
  278. // =============================================================================
  279. // Single Document Formatters
  280. // =============================================================================
  281. /**
  282. * Format a single DocumentResult as JSON
  283. */
  284. export function documentToJson(doc: DocumentResult): string {
  285. return JSON.stringify({
  286. file: doc.displayPath,
  287. title: doc.title,
  288. ...(doc.context && { context: doc.context }),
  289. hash: doc.hash,
  290. modifiedAt: doc.modifiedAt,
  291. bodyLength: doc.bodyLength,
  292. ...(doc.body !== undefined && { body: doc.body }),
  293. }, null, 2);
  294. }
  295. /**
  296. * Format a single DocumentResult as Markdown
  297. */
  298. export function documentToMarkdown(doc: DocumentResult): string {
  299. let md = `# ${doc.title || doc.displayPath}\n\n`;
  300. if (doc.context) md += `**Context:** ${doc.context}\n\n`;
  301. md += `**File:** ${doc.displayPath}\n`;
  302. md += `**Modified:** ${doc.modifiedAt}\n\n`;
  303. if (doc.body !== undefined) {
  304. md += "---\n\n" + doc.body + "\n";
  305. }
  306. return md;
  307. }
  308. /**
  309. * Format a single DocumentResult as XML
  310. */
  311. export function documentToXml(doc: DocumentResult): string {
  312. let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<document>\n`;
  313. xml += ` <file>${escapeXml(doc.displayPath)}</file>\n`;
  314. xml += ` <title>${escapeXml(doc.title)}</title>\n`;
  315. if (doc.context) xml += ` <context>${escapeXml(doc.context)}</context>\n`;
  316. xml += ` <hash>${escapeXml(doc.hash)}</hash>\n`;
  317. xml += ` <modifiedAt>${escapeXml(doc.modifiedAt)}</modifiedAt>\n`;
  318. xml += ` <bodyLength>${doc.bodyLength}</bodyLength>\n`;
  319. if (doc.body !== undefined) {
  320. xml += ` <body>${escapeXml(doc.body)}</body>\n`;
  321. }
  322. xml += `</document>`;
  323. return xml;
  324. }
  325. /**
  326. * Format a single document to the specified format
  327. */
  328. export function formatDocument(doc: DocumentResult, format: OutputFormat): string {
  329. switch (format) {
  330. case "json":
  331. return documentToJson(doc);
  332. case "md":
  333. return documentToMarkdown(doc);
  334. case "xml":
  335. return documentToXml(doc);
  336. default:
  337. // Default to markdown for CLI and other formats
  338. return documentToMarkdown(doc);
  339. }
  340. }
  341. // =============================================================================
  342. // Universal Format Function
  343. // =============================================================================
  344. /**
  345. * Format search results to the specified output format
  346. */
  347. export function formatSearchResults(
  348. results: SearchResult[],
  349. format: OutputFormat,
  350. opts: FormatOptions = {}
  351. ): string {
  352. switch (format) {
  353. case "json":
  354. return searchResultsToJson(results, opts);
  355. case "csv":
  356. return searchResultsToCsv(results, opts);
  357. case "files":
  358. return searchResultsToFiles(results);
  359. case "md":
  360. return searchResultsToMarkdown(results, opts);
  361. case "xml":
  362. return searchResultsToXml(results, opts);
  363. case "cli":
  364. // CLI format should be handled separately with colors
  365. // Return a simple text version as fallback
  366. return searchResultsToMarkdown(results, opts);
  367. default:
  368. return searchResultsToJson(results, opts);
  369. }
  370. }
  371. /**
  372. * Format documents to the specified output format
  373. */
  374. export function formatDocuments(
  375. results: MultiGetFile[],
  376. format: OutputFormat
  377. ): string {
  378. switch (format) {
  379. case "json":
  380. return documentsToJson(results);
  381. case "csv":
  382. return documentsToCsv(results);
  383. case "files":
  384. return documentsToFiles(results);
  385. case "md":
  386. return documentsToMarkdown(results);
  387. case "xml":
  388. return documentsToXml(results);
  389. case "cli":
  390. // CLI format should be handled separately with colors
  391. return documentsToMarkdown(results);
  392. default:
  393. return documentsToJson(results);
  394. }
  395. }