| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267 |
- /**
- * Integration tests for AST-aware chunking.
- *
- * Migrated from the standalone test-ast-chunking.mjs script into the
- * vitest suite. Covers the integration between AST break point extraction
- * and the chunking pipeline — areas not tested by the unit-level ast.test.ts.
- */
- import { describe, test, expect } from "vitest";
- import { getASTBreakPoints } from "../src/ast.js";
- import {
- chunkDocument,
- chunkDocumentAsync,
- chunkDocumentWithBreakPoints,
- mergeBreakPoints,
- scanBreakPoints,
- findCodeFences,
- } from "../src/store.js";
- // ==========================================================================
- // mergeBreakPoints
- // ==========================================================================
- describe("mergeBreakPoints", () => {
- test("merges regex and AST break points, higher score wins at same position", () => {
- const regexPoints = [
- { pos: 10, score: 20, type: "blank" },
- { pos: 50, score: 1, type: "newline" },
- { pos: 100, score: 20, type: "blank" },
- ];
- const astPoints = [
- { pos: 10, score: 90, type: "ast:func" },
- { pos: 75, score: 100, type: "ast:class" },
- { pos: 100, score: 60, type: "ast:import" },
- ];
- const merged = mergeBreakPoints(regexPoints, astPoints);
- expect(merged).toHaveLength(4);
- expect(merged.find(p => p.pos === 10)?.score).toBe(90); // AST wins (90 > 20)
- expect(merged.find(p => p.pos === 50)?.score).toBe(1); // regex only
- expect(merged.find(p => p.pos === 75)?.score).toBe(100); // AST only
- expect(merged.find(p => p.pos === 100)?.score).toBe(60); // AST wins (60 > 20)
- });
- test("result is sorted by position", () => {
- const merged = mergeBreakPoints(
- [{ pos: 100, score: 10, type: "a" }],
- [{ pos: 5, score: 50, type: "b" }],
- );
- expect(merged[0]!.pos).toBeLessThan(merged[1]!.pos);
- });
- });
- // ==========================================================================
- // AST vs Regex chunking comparison
- // ==========================================================================
- describe("AST vs Regex chunking", () => {
- // Generate a large TS file with 30 functions
- const parts: string[] = [];
- for (let i = 0; i < 30; i++) {
- parts.push(`
- export function handler${i}(req: Request, res: Response): void {
- const startTime = Date.now();
- const userId = req.params.userId;
- const sessionToken = req.headers.authorization;
- if (!userId || !sessionToken) {
- res.status(400).json({ error: "Missing required parameters" });
- return;
- }
- console.log(\`Processing request ${i} for user \${userId}\`);
- const result = processBusinessLogic${i}(userId, sessionToken);
- const elapsed = Date.now() - startTime;
- res.json({ data: result, processingTimeMs: elapsed });
- }
- `);
- }
- const largeTS = parts.join("\n");
- function countSplitFunctions(chunks: { text: string; pos: number }[]): number {
- let splits = 0;
- for (let i = 0; i < 30; i++) {
- const funcStart = largeTS.indexOf(`function handler${i}(`);
- const nextFunc = largeTS.indexOf(`function handler${i + 1}(`, funcStart + 1);
- const funcEnd = nextFunc > 0 ? nextFunc : largeTS.length;
- const chunkIndices = new Set<number>();
- for (let ci = 0; ci < chunks.length; ci++) {
- const chunkStart = chunks[ci]!.pos;
- const chunkEnd = chunkStart + chunks[ci]!.text.length;
- if (chunkStart < funcEnd && chunkEnd > funcStart) {
- chunkIndices.add(ci);
- }
- }
- if (chunkIndices.size > 1) splits++;
- }
- return splits;
- }
- test("AST splits fewer functions across chunk boundaries than regex", async () => {
- const regexChunks = chunkDocument(largeTS);
- const astChunks = await chunkDocumentAsync(largeTS, undefined, undefined, undefined, "handlers.ts", "auto");
- const regexSplits = countSplitFunctions(regexChunks);
- const astSplits = countSplitFunctions(astChunks);
- expect(astSplits).toBeLessThanOrEqual(regexSplits);
- });
- test("markdown files produce identical chunks in auto vs regex mode", async () => {
- const sections: string[] = [];
- for (let i = 0; i < 15; i++) {
- sections.push(`# Section ${i}\n\n${"Lorem ipsum dolor sit amet. ".repeat(40)}\n`);
- }
- const largeMD = sections.join("\n");
- const mdRegex = chunkDocument(largeMD);
- const mdAst = await chunkDocumentAsync(largeMD, undefined, undefined, undefined, "readme.md", "auto");
- expect(mdAst).toHaveLength(mdRegex.length);
- for (let i = 0; i < mdRegex.length; i++) {
- expect(mdAst[i]?.text).toBe(mdRegex[i]?.text);
- expect(mdAst[i]?.pos).toBe(mdRegex[i]?.pos);
- }
- });
- test("regex strategy bypasses AST entirely", async () => {
- const regexOnly = await chunkDocumentAsync(largeTS, undefined, undefined, undefined, "handlers.ts", "regex");
- const syncRegex = chunkDocument(largeTS);
- expect(regexOnly).toHaveLength(syncRegex.length);
- for (let i = 0; i < syncRegex.length; i++) {
- expect(regexOnly[i]?.text).toBe(syncRegex[i]?.text);
- }
- });
- test("no filepath falls back to regex", async () => {
- const noPathChunks = await chunkDocumentAsync(largeTS, undefined, undefined, undefined, undefined, "auto");
- const syncRegex = chunkDocument(largeTS);
- expect(noPathChunks).toHaveLength(syncRegex.length);
- });
- test("small file produces single chunk", async () => {
- const smallChunks = await chunkDocumentAsync("export const x = 1;", undefined, undefined, undefined, "s.ts", "auto");
- expect(smallChunks).toHaveLength(1);
- });
- });
- // ==========================================================================
- // chunkDocumentWithBreakPoints equivalence
- // ==========================================================================
- describe("chunkDocumentWithBreakPoints equivalence", () => {
- test("produces identical output to chunkDocument for the same content", () => {
- const content = "a".repeat(5000) + "\n\n" + "b".repeat(5000);
- const old = chunkDocument(content);
- const withBP = chunkDocumentWithBreakPoints(content, scanBreakPoints(content), findCodeFences(content));
- expect(withBP).toHaveLength(old.length);
- for (let i = 0; i < old.length; i++) {
- expect(withBP[i]?.text).toBe(old[i]?.text);
- expect(withBP[i]?.pos).toBe(old[i]?.pos);
- }
- });
- });
- // ==========================================================================
- // Score assertions not covered by ast.test.ts unit tests
- // ==========================================================================
- describe("AST break point scores", () => {
- test("TypeScript export (class) scores 90", async () => {
- const code = `export class Foo {}\nexport function bar() {}`;
- const points = await getASTBreakPoints(code, "a.ts");
- const exportPoint = points.find(p => p.type === "ast:export");
- expect(exportPoint?.score).toBe(90);
- });
- test("Python class scores 100", async () => {
- const code = `class Foo:\n pass\n\ndef bar():\n pass`;
- const points = await getASTBreakPoints(code, "a.py");
- expect(points.find(p => p.type === "ast:class")?.score).toBe(100);
- });
- test("Go type scores 80", async () => {
- const code = `package main\n\ntype Server struct {\n port int\n}\n\nfunc main() {}`;
- const points = await getASTBreakPoints(code, "a.go");
- expect(points.find(p => p.type === "ast:type")?.score).toBe(80);
- });
- test("Rust enum scores 80", async () => {
- const code = `enum State {\n On,\n Off,\n}\n\nfn main() {}`;
- const points = await getASTBreakPoints(code, "a.rs");
- expect(points.find(p => p.type === "ast:enum")?.score).toBe(80);
- });
- });
- // ==========================================================================
- // Function-level chunk strategy (Phase 2)
- // ==========================================================================
- describe("chunkDocumentAsync with chunkStrategy='function'", () => {
- const TS_CODE = `import { X } from "./x";
- export function alpha(): number {
- const start = Date.now();
- return start;
- }
- export function beta(): number {
- return 42;
- }
- export class Gamma {
- constructor() {}
- run(): void {}
- }
- `;
- test("produces one chunk per top-level code unit + import gap", async () => {
- const chunks = await chunkDocumentAsync(TS_CODE, undefined, undefined, undefined, "x.ts", "function");
- // Expect at least: import-gap, alpha, beta, Gamma = 4 chunks.
- expect(chunks.length).toBeGreaterThanOrEqual(3);
- });
- test("each function chunk contains exactly one function/class body", async () => {
- const chunks = await chunkDocumentAsync(TS_CODE, undefined, undefined, undefined, "x.ts", "function");
- const alphaChunk = chunks.find(c => c.text.includes("function alpha"));
- const betaChunk = chunks.find(c => c.text.includes("function beta"));
- const classChunk = chunks.find(c => c.text.includes("class Gamma"));
- expect(alphaChunk).toBeDefined();
- expect(betaChunk).toBeDefined();
- expect(classChunk).toBeDefined();
- expect(alphaChunk!.text.includes("function beta")).toBe(false);
- expect(betaChunk!.text.includes("class Gamma")).toBe(false);
- });
- test("pos reflects absolute offset in original content", async () => {
- const chunks = await chunkDocumentAsync(TS_CODE, undefined, undefined, undefined, "x.ts", "function");
- for (const c of chunks) {
- expect(c.pos).toBeGreaterThanOrEqual(0);
- const slice = TS_CODE.slice(c.pos, c.pos + Math.min(20, c.text.length));
- const head = c.text.slice(0, Math.min(20, c.text.length));
- expect(slice).toBe(head);
- }
- });
- test("markdown falls back to auto behavior when chunkStrategy='function'", async () => {
- // Markdown → detectLanguage returns null → getASTFunctionRanges returns []
- // → fall through to auto behavior → short markdown = 1 chunk.
- const md = "# Heading\n\nSome paragraph text.";
- const chunks = await chunkDocumentAsync(md, undefined, undefined, undefined, "readme.md", "function");
- expect(chunks.length).toBe(1);
- expect(chunks[0]!.text).toBe(md);
- });
- test("code file with only bare statements falls back to auto (no ranges)", async () => {
- const bare = "const x = 1;\nconst y = 2;\n";
- const chunks = await chunkDocumentAsync(bare, undefined, undefined, undefined, "bare.ts", "function");
- expect(chunks.length).toBe(1);
- });
- });
|