3 maanden geleden · 4909aea28d
--- a/test-ast-chunking.mjs
+++ b/test-ast-chunking.mjs
@@ -1,823 +0,0 @@
 
				-#!/usr/bin/env npx tsx
			
 
				-/**
			
 
				- * Thorough integration test + real-collection performance report for
			
 
				- * AST-aware chunking.
			
 
				- *
			
 
				- * Usage:
			
 
				- *   npx tsx test-ast-chunking.mjs                  # synthetic tests only
			
 
				- *   npx tsx test-ast-chunking.mjs /path/to/code    # + scan a real directory
			
 
				- *   npx tsx test-ast-chunking.mjs ~/dev/myproject   # works with ~
			
 
				- *   npx tsx test-ast-chunking.mjs --help
			
 
				- *
			
 
				- * The real-collection scan walks the directory tree, finds supported code
			
 
				- * files (.ts/.js/.py/.go/.rs) and markdown (.md), chunks each file with
			
 
				- * both strategies, and prints a comparative performance report.
			
 
				- */
			
 
				-
			
 
				-import { readFileSync, readdirSync, statSync } from "node:fs";
			
 
				-import { join, relative, extname, resolve } from "node:path";
			
 
				-import { homedir } from "node:os";
			
 
				-import { detectLanguage, getASTBreakPoints } from "./src/ast.js";
			
 
				-import {
			
 
				-  chunkDocument,
			
 
				-  chunkDocumentAsync,
			
 
				-  chunkDocumentWithBreakPoints,
			
 
				-  mergeBreakPoints,
			
 
				-  scanBreakPoints,
			
 
				-  findCodeFences,
			
 
				-  CHUNK_SIZE_CHARS,
			
 
				-} from "./src/store.js";
			
 
				-
			
 
				-// ============================================================================
			
 
				-// Helpers
			
 
				-// ============================================================================
			
 
				-
			
 
				-let passed = 0;
			
 
				-let failed = 0;
			
 
				-
			
 
				-function section(title) {
			
 
				-  console.log(`\n${"=".repeat(70)}`);
			
 
				-  console.log(`  ${title}`);
			
 
				-  console.log("=".repeat(70));
			
 
				-}
			
 
				-
			
 
				-function check(label, condition, detail) {
			
 
				-  if (condition) {
			
 
				-    console.log(`  PASS  ${label}`);
			
 
				-    passed++;
			
 
				-  } else {
			
 
				-    console.log(`  FAIL  ${label}`);
			
 
				-    if (detail) console.log(`        ${detail}`);
			
 
				-    failed++;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-function formatBytes(bytes) {
			
 
				-  if (bytes < 1024) return `${bytes} B`;
			
 
				-  if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
			
 
				-  return `${(bytes / 1024 / 1024).toFixed(1)} MB`;
			
 
				-}
			
 
				-
			
 
				-function pct(n, d) {
			
 
				-  if (d === 0) return "N/A";
			
 
				-  return `${((n / d) * 100).toFixed(1)}%`;
			
 
				-}
			
 
				-
			
 
				-const SKIP_DIRS = new Set([
			
 
				-  "node_modules", ".git", ".cache", "vendor", "dist", "build",
			
 
				-  "__pycache__", ".tox", ".venv", "venv", ".mypy_cache", "target",
			
 
				-  ".next", ".nuxt", "coverage", ".turbo",
			
 
				-]);
			
 
				-
			
 
				-const CODE_EXTS = new Set([
			
 
				-  ".ts", ".tsx", ".js", ".jsx", ".mts", ".cts", ".mjs", ".cjs",
			
 
				-  ".py", ".go", ".rs",
			
 
				-]);
			
 
				-
			
 
				-const ALL_EXTS = new Set([...CODE_EXTS, ".md"]);
			
 
				-
			
 
				-function walkDir(dir, maxFiles = 5000) {
			
 
				-  const results = [];
			
 
				-  const queue = [dir];
			
 
				-  while (queue.length > 0 && results.length < maxFiles) {
			
 
				-    const current = queue.shift();
			
 
				-    let entries;
			
 
				-    try {
			
 
				-      entries = readdirSync(current, { withFileTypes: true });
			
 
				-    } catch {
			
 
				-      continue;
			
 
				-    }
			
 
				-    for (const entry of entries) {
			
 
				-      if (results.length >= maxFiles) break;
			
 
				-      if (entry.name.startsWith(".")) continue;
			
 
				-      const full = join(current, entry.name);
			
 
				-      if (entry.isDirectory()) {
			
 
				-        if (!SKIP_DIRS.has(entry.name)) queue.push(full);
			
 
				-      } else if (entry.isFile()) {
			
 
				-        const ext = extname(entry.name).toLowerCase();
			
 
				-        if (ALL_EXTS.has(ext)) results.push(full);
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-  return results;
			
 
				-}
			
 
				-
			
 
				-// ============================================================================
			
 
				-// Parse CLI args
			
 
				-// ============================================================================
			
 
				-
			
 
				-const args = process.argv.slice(2);
			
 
				-let scanDir = null;
			
 
				-let skipSynthetic = false;
			
 
				-
			
 
				-for (const arg of args) {
			
 
				-  if (arg === "--help" || arg === "-h") {
			
 
				-    console.log(`Usage: npx tsx test-ast-chunking.mjs [options] [directory]
			
 
				-
			
 
				-Options:
			
 
				-  --help, -h          Show this help
			
 
				-  --scan-only         Skip synthetic tests, only scan directory
			
 
				-
			
 
				-Arguments:
			
 
				-  directory           Path to scan for a real-collection performance report.
			
 
				-                      Walks the tree for .ts/.tsx/.js/.jsx/.py/.go/.rs/.md files.
			
 
				-
			
 
				-Examples:
			
 
				-  npx tsx test-ast-chunking.mjs                    # synthetic tests only
			
 
				-  npx tsx test-ast-chunking.mjs ~/dev/myproject     # synthetic + real scan
			
 
				-  npx tsx test-ast-chunking.mjs --scan-only ~/dev   # real scan only
			
 
				-`);
			
 
				-    process.exit(0);
			
 
				-  }
			
 
				-  if (arg === "--scan-only") {
			
 
				-    skipSynthetic = true;
			
 
				-  } else if (!arg.startsWith("-")) {
			
 
				-    scanDir = arg.startsWith("~") ? arg.replace("~", homedir()) : resolve(arg);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// ============================================================================
			
 
				-// PART 1: Synthetic Tests
			
 
				-// ============================================================================
			
 
				-
			
 
				-if (!skipSynthetic) {
			
 
				-
			
 
				-// --------------------------------------------------------------------------
			
 
				-// 1. Language Detection
			
 
				-// --------------------------------------------------------------------------
			
 
				-section("1. Language Detection");
			
 
				-
			
 
				-const langTests = [
			
 
				-  ["src/auth.ts", "typescript"],
			
 
				-  ["src/App.tsx", "tsx"],
			
 
				-  ["src/util.js", "javascript"],
			
 
				-  ["src/App.jsx", "tsx"],
			
 
				-  ["src/auth.mts", "typescript"],
			
 
				-  ["src/auth.cjs", "javascript"],
			
 
				-  ["src/auth.py", "python"],
			
 
				-  ["src/auth.go", "go"],
			
 
				-  ["src/auth.rs", "rust"],
			
 
				-  ["docs/README.md", null],
			
 
				-  ["data/file.csv", null],
			
 
				-  ["Makefile", null],
			
 
				-  ["qmd://myproject/src/auth.ts", "typescript"],
			
 
				-  ["qmd://docs/notes.md", null],
			
 
				-];
			
 
				-
			
 
				-for (const [path, expected] of langTests) {
			
 
				-  const result = detectLanguage(path);
			
 
				-  check(`detectLanguage("${path}") = ${result}`, result === expected,
			
 
				-    `expected ${expected}, got ${result}`);
			
 
				-}
			
 
				-
			
 
				-// --------------------------------------------------------------------------
			
 
				-// 2. AST Break Points - TypeScript
			
 
				-// --------------------------------------------------------------------------
			
 
				-section("2. AST Break Points - TypeScript");
			
 
				-
			
 
				-const TS_SAMPLE = `import { Database } from './db';
			
 
				-import type { User } from './types';
			
 
				-
			
 
				-interface AuthConfig {
			
 
				-  secret: string;
			
 
				-  ttl: number;
			
 
				-}
			
 
				-
			
 
				-type UserId = string;
			
 
				-
			
 
				-export class AuthService {
			
 
				-  constructor(private db: Database) {}
			
 
				-
			
 
				-  async authenticate(user: User, token: string): Promise<boolean> {
			
 
				-    const session = await this.db.findSession(token);
			
 
				-    return session?.userId === user.id;
			
 
				-  }
			
 
				-
			
 
				-  validateToken(token: string): boolean {
			
 
				-    return token.length === 64;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-export function hashPassword(password: string): string {
			
 
				-  return crypto.createHash('sha256').update(password).digest('hex');
			
 
				-}
			
 
				-
			
 
				-const helper = (x: number) => x * 2;
			
 
				-`;
			
 
				-
			
 
				-const tsPoints = await getASTBreakPoints(TS_SAMPLE, "auth.ts");
			
 
				-console.log(`\n  TypeScript break points (${tsPoints.length} total):`);
			
 
				-for (const p of tsPoints) {
			
 
				-  const snippet = TS_SAMPLE.slice(p.pos, p.pos + 40).replace(/\n/g, "\\n");
			
 
				-  console.log(`    pos=${String(p.pos).padStart(4)} score=${String(p.score).padStart(3)} type=${p.type.padEnd(15)} text="${snippet}..."`);
			
 
				-}
			
 
				-
			
 
				-check("Has import break points", tsPoints.some(p => p.type === "ast:import"));
			
 
				-check("Has interface break point", tsPoints.some(p => p.type === "ast:iface"));
			
 
				-check("Has type break point", tsPoints.some(p => p.type === "ast:type"));
			
 
				-check("Has export break point (class)", tsPoints.some(p => p.type === "ast:export"));
			
 
				-check("Has method break points", tsPoints.filter(p => p.type === "ast:method").length >= 2);
			
 
				-check("Import scores 60", tsPoints.find(p => p.type === "ast:import")?.score === 60);
			
 
				-check("Interface scores 100", tsPoints.find(p => p.type === "ast:iface")?.score === 100);
			
 
				-check("Method scores 90", tsPoints.find(p => p.type === "ast:method")?.score === 90);
			
 
				-check("Export scores 90", tsPoints.find(p => p.type === "ast:export")?.score === 90);
			
 
				-check("Break points sorted by position", tsPoints.every((p, i) => i === 0 || p.pos >= tsPoints[i-1].pos));
			
 
				-
			
 
				-const firstImport = tsPoints.find(p => p.type === "ast:import");
			
 
				-check("First import position is correct",
			
 
				-  TS_SAMPLE.slice(firstImport.pos, firstImport.pos + 6) === "import",
			
 
				-  `at pos ${firstImport.pos}: "${TS_SAMPLE.slice(firstImport.pos, firstImport.pos + 10)}"`);
			
 
				-
			
 
				-// --------------------------------------------------------------------------
			
 
				-// 3. AST Break Points - Python
			
 
				-// --------------------------------------------------------------------------
			
 
				-section("3. AST Break Points - Python");
			
 
				-
			
 
				-const PY_SAMPLE = `import os
			
 
				-from typing import Optional, List
			
 
				-
			
 
				-class UserService:
			
 
				-    def __init__(self, db):
			
 
				-        self.db = db
			
 
				-
			
 
				-    async def find_user(self, user_id: str) -> Optional[dict]:
			
 
				-        return await self.db.find(user_id)
			
 
				-
			
 
				-    def validate(self, user: dict) -> bool:
			
 
				-        return "id" in user and "name" in user
			
 
				-
			
 
				-def create_user(name: str, email: str) -> dict:
			
 
				-    return {"name": name, "email": email}
			
 
				-
			
 
				-@login_required
			
 
				-def protected_endpoint():
			
 
				-    return "secret"
			
 
				-`;
			
 
				-
			
 
				-const pyPoints = await getASTBreakPoints(PY_SAMPLE, "service.py");
			
 
				-console.log(`\n  Python break points (${pyPoints.length} total):`);
			
 
				-for (const p of pyPoints) {
			
 
				-  const snippet = PY_SAMPLE.slice(p.pos, p.pos + 40).replace(/\n/g, "\\n");
			
 
				-  console.log(`    pos=${String(p.pos).padStart(4)} score=${String(p.score).padStart(3)} type=${p.type.padEnd(15)} text="${snippet}..."`);
			
 
				-}
			
 
				-
			
 
				-check("Has import break points", pyPoints.filter(p => p.type === "ast:import").length >= 2);
			
 
				-check("Has class break point", pyPoints.some(p => p.type === "ast:class"));
			
 
				-check("Has function break points (methods)", pyPoints.filter(p => p.type === "ast:func").length >= 3);
			
 
				-check("Has decorated definition", pyPoints.some(p => p.type === "ast:decorated"));
			
 
				-check("Class scores 100", pyPoints.find(p => p.type === "ast:class")?.score === 100);
			
 
				-
			
 
				-// --------------------------------------------------------------------------
			
 
				-// 4. AST Break Points - Go
			
 
				-// --------------------------------------------------------------------------
			
 
				-section("4. AST Break Points - Go");
			
 
				-
			
 
				-const GO_SAMPLE = `package main
			
 
				-
			
 
				-import (
			
 
				-    "fmt"
			
 
				-    "net/http"
			
 
				-)
			
 
				-
			
 
				-type Server struct {
			
 
				-    port int
			
 
				-    db   *Database
			
 
				-}
			
 
				-
			
 
				-type Config interface {
			
 
				-    GetPort() int
			
 
				-}
			
 
				-
			
 
				-func NewServer(port int) *Server {
			
 
				-    return &Server{port: port}
			
 
				-}
			
 
				-
			
 
				-func (s *Server) Start() error {
			
 
				-    return http.ListenAndServe(fmt.Sprintf(":%d", s.port), nil)
			
 
				-}
			
 
				-
			
 
				-func (s *Server) Stop() {
			
 
				-    fmt.Println("stopping")
			
 
				-}
			
 
				-`;
			
 
				-
			
 
				-const goPoints = await getASTBreakPoints(GO_SAMPLE, "server.go");
			
 
				-console.log(`\n  Go break points (${goPoints.length} total):`);
			
 
				-for (const p of goPoints) {
			
 
				-  const snippet = GO_SAMPLE.slice(p.pos, p.pos + 40).replace(/\n/g, "\\n");
			
 
				-  console.log(`    pos=${String(p.pos).padStart(4)} score=${String(p.score).padStart(3)} type=${p.type.padEnd(15)} text="${snippet}..."`);
			
 
				-}
			
 
				-
			
 
				-check("Has import break point", goPoints.some(p => p.type === "ast:import"));
			
 
				-check("Has type break points", goPoints.filter(p => p.type === "ast:type").length >= 2);
			
 
				-check("Has function break point", goPoints.some(p => p.type === "ast:func"));
			
 
				-check("Has method break points", goPoints.filter(p => p.type === "ast:method").length >= 2);
			
 
				-check("Type scores 80", goPoints.find(p => p.type === "ast:type")?.score === 80);
			
 
				-
			
 
				-// --------------------------------------------------------------------------
			
 
				-// 5. AST Break Points - Rust
			
 
				-// --------------------------------------------------------------------------
			
 
				-section("5. AST Break Points - Rust");
			
 
				-
			
 
				-const RS_SAMPLE = `use std::collections::HashMap;
			
 
				-use std::io;
			
 
				-
			
 
				-pub struct Config {
			
 
				-    port: u16,
			
 
				-    host: String,
			
 
				-}
			
 
				-
			
 
				-impl Config {
			
 
				-    pub fn new(port: u16, host: String) -> Self {
			
 
				-        Config { port, host }
			
 
				-    }
			
 
				-
			
 
				-    pub fn address(&self) -> String {
			
 
				-        format!("{}:{}", self.host, self.port)
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-pub trait Configurable {
			
 
				-    fn configure(&mut self, config: &Config);
			
 
				-}
			
 
				-
			
 
				-pub enum ServerState {
			
 
				-    Running,
			
 
				-    Stopped,
			
 
				-    Error(String),
			
 
				-}
			
 
				-
			
 
				-pub fn start_server(config: Config) -> io::Result<()> {
			
 
				-    Ok(())
			
 
				-}
			
 
				-`;
			
 
				-
			
 
				-const rsPoints = await getASTBreakPoints(RS_SAMPLE, "config.rs");
			
 
				-console.log(`\n  Rust break points (${rsPoints.length} total):`);
			
 
				-for (const p of rsPoints) {
			
 
				-  const snippet = RS_SAMPLE.slice(p.pos, p.pos + 40).replace(/\n/g, "\\n");
			
 
				-  console.log(`    pos=${String(p.pos).padStart(4)} score=${String(p.score).padStart(3)} type=${p.type.padEnd(15)} text="${snippet}..."`);
			
 
				-}
			
 
				-
			
 
				-check("Has use/import break points", rsPoints.filter(p => p.type === "ast:import").length >= 2);
			
 
				-check("Has struct break point", rsPoints.some(p => p.type === "ast:struct"));
			
 
				-check("Has impl break point", rsPoints.some(p => p.type === "ast:impl"));
			
 
				-check("Has trait break point", rsPoints.some(p => p.type === "ast:trait"));
			
 
				-check("Has enum break point", rsPoints.some(p => p.type === "ast:enum"));
			
 
				-check("Has function break point", rsPoints.some(p => p.type === "ast:func"));
			
 
				-check("Struct scores 100", rsPoints.find(p => p.type === "ast:struct")?.score === 100);
			
 
				-check("Impl scores 100", rsPoints.find(p => p.type === "ast:impl")?.score === 100);
			
 
				-check("Trait scores 100", rsPoints.find(p => p.type === "ast:trait")?.score === 100);
			
 
				-check("Enum scores 80", rsPoints.find(p => p.type === "ast:enum")?.score === 80);
			
 
				-
			
 
				-// --------------------------------------------------------------------------
			
 
				-// 6. Merge Break Points
			
 
				-// --------------------------------------------------------------------------
			
 
				-section("6. mergeBreakPoints");
			
 
				-
			
 
				-const regexPoints = [
			
 
				-  { pos: 10, score: 20, type: "blank" },
			
 
				-  { pos: 50, score: 1, type: "newline" },
			
 
				-  { pos: 100, score: 20, type: "blank" },
			
 
				-];
			
 
				-const astPointsMerge = [
			
 
				-  { pos: 10, score: 90, type: "ast:func" },
			
 
				-  { pos: 75, score: 100, type: "ast:class" },
			
 
				-  { pos: 100, score: 60, type: "ast:import" },
			
 
				-];
			
 
				-
			
 
				-const merged = mergeBreakPoints(regexPoints, astPointsMerge);
			
 
				-console.log(`\n  Merged break points (${merged.length} total):`);
			
 
				-for (const p of merged) {
			
 
				-  console.log(`    pos=${String(p.pos).padStart(4)} score=${String(p.score).padStart(3)} type=${p.type}`);
			
 
				-}
			
 
				-
			
 
				-check("Merge has 4 unique positions", merged.length === 4);
			
 
				-check("pos 10: AST wins (90 > 20)", merged.find(p => p.pos === 10)?.score === 90);
			
 
				-check("pos 50: regex only (1)", merged.find(p => p.pos === 50)?.score === 1);
			
 
				-check("pos 75: AST only (100)", merged.find(p => p.pos === 75)?.score === 100);
			
 
				-check("pos 100: AST wins (60 > 20)", merged.find(p => p.pos === 100)?.score === 60);
			
 
				-check("Sorted by position", merged.every((p, i) => i === 0 || p.pos >= merged[i-1].pos));
			
 
				-
			
 
				-// --------------------------------------------------------------------------
			
 
				-// 7. AST vs Regex Chunking Comparison (Large Synthetic File)
			
 
				-// --------------------------------------------------------------------------
			
 
				-section("7. AST vs Regex Chunking Comparison");
			
 
				-
			
 
				-const largeTSParts = [];
			
 
				-for (let i = 0; i < 30; i++) {
			
 
				-  largeTSParts.push(`
			
 
				-export function handler${i}(req: Request, res: Response): void {
			
 
				-  const startTime = Date.now();
			
 
				-  const userId = req.params.userId;
			
 
				-  const sessionToken = req.headers.authorization;
			
 
				-
			
 
				-  // Validate the incoming request parameters
			
 
				-  if (!userId || !sessionToken) {
			
 
				-    res.status(400).json({ error: "Missing required parameters" });
			
 
				-    return;
			
 
				-  }
			
 
				-
			
 
				-  // Process the request with detailed logging
			
 
				-  console.log(\`Processing request \${i} for user \${userId}\`);
			
 
				-  const result = processBusinessLogic${i}(userId, sessionToken);
			
 
				-
			
 
				-  // Return the response with timing info
			
 
				-  const elapsed = Date.now() - startTime;
			
 
				-  res.json({ data: result, processingTimeMs: elapsed });
			
 
				-}
			
 
				-`);
			
 
				-}
			
 
				-const largeTS = largeTSParts.join("\n");
			
 
				-
			
 
				-console.log(`\n  Large TS file: ${largeTS.length} chars, ${largeTSParts.length} functions`);
			
 
				-
			
 
				-const regexChunks = chunkDocument(largeTS);
			
 
				-const astChunks = await chunkDocumentAsync(largeTS, undefined, undefined, undefined, "handlers.ts", "auto");
			
 
				-
			
 
				-console.log(`  Regex chunks: ${regexChunks.length}`);
			
 
				-console.log(`  AST chunks:   ${astChunks.length}`);
			
 
				-
			
 
				-function countSplitFunctions(chunks, source) {
			
 
				-  let splits = 0;
			
 
				-  for (let i = 0; i < 30; i++) {
			
 
				-    const funcStart = source.indexOf(`function handler${i}(`);
			
 
				-    const nextFunc = source.indexOf(`function handler${i + 1}(`, funcStart + 1);
			
 
				-    const funcEnd = nextFunc > 0 ? nextFunc : source.length;
			
 
				-    const chunkIndices = new Set();
			
 
				-    for (let ci = 0; ci < chunks.length; ci++) {
			
 
				-      const chunkStart = chunks[ci].pos;
			
 
				-      const chunkEnd = chunkStart + chunks[ci].text.length;
			
 
				-      if (chunkStart < funcEnd && chunkEnd > funcStart) {
			
 
				-        chunkIndices.add(ci);
			
 
				-      }
			
 
				-    }
			
 
				-    if (chunkIndices.size > 1) splits++;
			
 
				-  }
			
 
				-  return splits;
			
 
				-}
			
 
				-
			
 
				-const regexSplits = countSplitFunctions(regexChunks, largeTS);
			
 
				-const astSplitsSynth = countSplitFunctions(astChunks, largeTS);
			
 
				-
			
 
				-console.log(`\n  Functions split across chunks:`);
			
 
				-console.log(`    Regex: ${regexSplits} / 30`);
			
 
				-console.log(`    AST:   ${astSplitsSynth} / 30`);
			
 
				-
			
 
				-check("AST splits fewer functions than regex", astSplitsSynth <= regexSplits,
			
 
				-  `AST split ${astSplitsSynth}, regex split ${regexSplits}`);
			
 
				-
			
 
				-// --------------------------------------------------------------------------
			
 
				-// 8. Markdown Files Unchanged
			
 
				-// --------------------------------------------------------------------------
			
 
				-section("8. Markdown Files Unchanged in Auto Mode");
			
 
				-
			
 
				-const mdContent = [];
			
 
				-for (let i = 0; i < 15; i++) {
			
 
				-  mdContent.push(`# Section ${i}\n\n${"Lorem ipsum dolor sit amet. ".repeat(40)}\n`);
			
 
				-}
			
 
				-const largeMD = mdContent.join("\n");
			
 
				-
			
 
				-const mdRegex = chunkDocument(largeMD);
			
 
				-const mdAst = await chunkDocumentAsync(largeMD, undefined, undefined, undefined, "readme.md", "auto");
			
 
				-
			
 
				-check("Same number of chunks", mdRegex.length === mdAst.length,
			
 
				-  `regex=${mdRegex.length}, ast=${mdAst.length}`);
			
 
				-
			
 
				-let mdIdentical = true;
			
 
				-for (let i = 0; i < mdRegex.length; i++) {
			
 
				-  if (mdRegex[i]?.text !== mdAst[i]?.text || mdRegex[i]?.pos !== mdAst[i]?.pos) {
			
 
				-    mdIdentical = false;
			
 
				-    break;
			
 
				-  }
			
 
				-}
			
 
				-check("Chunk content is identical", mdIdentical);
			
 
				-
			
 
				-// --------------------------------------------------------------------------
			
 
				-// 9-11. Strategy bypass, no-filepath fallback, error handling
			
 
				-// --------------------------------------------------------------------------
			
 
				-section("9. Regex Strategy Bypass");
			
 
				-const regexOnly = await chunkDocumentAsync(largeTS, undefined, undefined, undefined, "handlers.ts", "regex");
			
 
				-const syncRegex = chunkDocument(largeTS);
			
 
				-check("Same chunks as sync regex", regexOnly.length === syncRegex.length &&
			
 
				-  regexOnly.every((c, i) => c.text === syncRegex[i]?.text));
			
 
				-
			
 
				-section("10. No Filepath Falls Back to Regex");
			
 
				-const noPathChunks = await chunkDocumentAsync(largeTS, undefined, undefined, undefined, undefined, "auto");
			
 
				-check("Same chunks as regex", noPathChunks.length === syncRegex.length);
			
 
				-
			
 
				-section("11. Error Handling & Edge Cases");
			
 
				-check("Empty file -> []", (await getASTBreakPoints("", "e.ts")).length === 0);
			
 
				-check("Broken syntax doesn't crash", Array.isArray(await getASTBreakPoints("function { %%", "x.ts")));
			
 
				-check("Unknown ext -> []", (await getASTBreakPoints("data", "f.csv")).length === 0);
			
 
				-check("Markdown -> []", (await getASTBreakPoints("# H", "r.md")).length === 0);
			
 
				-const smallChunks = await chunkDocumentAsync("export const x = 1;", undefined, undefined, undefined, "s.ts", "auto");
			
 
				-check("Small file -> 1 chunk", smallChunks.length === 1);
			
 
				-
			
 
				-// --------------------------------------------------------------------------
			
 
				-// 12. chunkDocumentWithBreakPoints Equivalence
			
 
				-// --------------------------------------------------------------------------
			
 
				-section("12. chunkDocumentWithBreakPoints Equivalence");
			
 
				-const eqContent = "a".repeat(5000) + "\n\n" + "b".repeat(5000);
			
 
				-const eqOld = chunkDocument(eqContent);
			
 
				-const eqNew = chunkDocumentWithBreakPoints(eqContent, scanBreakPoints(eqContent), findCodeFences(eqContent));
			
 
				-check("Identical output", eqOld.length === eqNew.length &&
			
 
				-  eqOld.every((c, i) => c.text === eqNew[i]?.text && c.pos === eqNew[i]?.pos));
			
 
				-
			
 
				-// --------------------------------------------------------------------------
			
 
				-// 13. Synthetic performance
			
 
				-// --------------------------------------------------------------------------
			
 
				-section("13. Synthetic Performance");
			
 
				-
			
 
				-const t0 = performance.now();
			
 
				-for (let i = 0; i < 10; i++) await getASTBreakPoints(largeTS, "p.ts");
			
 
				-const astExtractMs = (performance.now() - t0) / 10;
			
 
				-
			
 
				-const t1 = performance.now();
			
 
				-for (let i = 0; i < 10; i++) scanBreakPoints(largeTS);
			
 
				-const regexExtractMs = (performance.now() - t1) / 10;
			
 
				-
			
 
				-const t2 = performance.now();
			
 
				-for (let i = 0; i < 10; i++) await chunkDocumentAsync(largeTS, undefined, undefined, undefined, "p.ts", "auto");
			
 
				-const astFullMs = (performance.now() - t2) / 10;
			
 
				-
			
 
				-const t3 = performance.now();
			
 
				-for (let i = 0; i < 10; i++) chunkDocument(largeTS);
			
 
				-const regexFullMs = (performance.now() - t3) / 10;
			
 
				-
			
 
				-console.log(`\n  File size:                    ${formatBytes(largeTS.length)}`);
			
 
				-console.log(`  AST break point extraction:   ${astExtractMs.toFixed(1)}ms`);
			
 
				-console.log(`  Regex break point extraction: ${regexExtractMs.toFixed(1)}ms`);
			
 
				-console.log(`  Full AST chunking:            ${astFullMs.toFixed(1)}ms`);
			
 
				-console.log(`  Full regex chunking:          ${regexFullMs.toFixed(1)}ms`);
			
 
				-console.log(`  Overhead per file:            ${(astFullMs - regexFullMs).toFixed(1)}ms`);
			
 
				-
			
 
				-check("AST chunking < 50ms per file", astFullMs < 50, `was ${astFullMs.toFixed(1)}ms`);
			
 
				-
			
 
				-// End of synthetic tests
			
 
				-section("Synthetic Test Results");
			
 
				-console.log(`\n  ${passed} passed, ${failed} failed`);
			
 
				-
			
 
				-} // end if (!skipSynthetic)
			
 
				-
			
 
				-
			
 
				-// ============================================================================
			
 
				-// PART 2: Real Collection Scan
			
 
				-// ============================================================================
			
 
				-
			
 
				-if (scanDir) {
			
 
				-
			
 
				-section(`Real Collection Scan: ${scanDir}`);
			
 
				-
			
 
				-console.log(`\n  Discovering files...`);
			
 
				-const realFiles = walkDir(scanDir);
			
 
				-console.log(`  Found ${realFiles.length} files\n`);
			
 
				-
			
 
				-if (realFiles.length === 0) {
			
 
				-  console.log("  No supported files found. Supported: .ts .tsx .js .jsx .py .go .rs .md");
			
 
				-} else {
			
 
				-
			
 
				-  // Classify files
			
 
				-  const byLang = {};
			
 
				-  let totalBytes = 0;
			
 
				-  const fileEntries = [];
			
 
				-
			
 
				-  for (const filepath of realFiles) {
			
 
				-    let content;
			
 
				-    try {
			
 
				-      const stat = statSync(filepath);
			
 
				-      if (stat.size > 500_000) continue; // skip files > 500KB
			
 
				-      content = readFileSync(filepath, "utf-8");
			
 
				-    } catch {
			
 
				-      continue;
			
 
				-    }
			
 
				-    if (!content.trim()) continue;
			
 
				-
			
 
				-    const rel = relative(scanDir, filepath);
			
 
				-    const lang = detectLanguage(filepath);
			
 
				-    const langLabel = lang ?? "markdown";
			
 
				-
			
 
				-    byLang[langLabel] = (byLang[langLabel] || 0) + 1;
			
 
				-    totalBytes += content.length;
			
 
				-    fileEntries.push({ filepath, rel, lang, langLabel, content });
			
 
				-  }
			
 
				-
			
 
				-  // Print file distribution
			
 
				-  console.log("  File distribution:");
			
 
				-  for (const [lang, count] of Object.entries(byLang).sort((a, b) => b[1] - a[1])) {
			
 
				-    console.log(`    ${lang.padEnd(14)} ${count} files`);
			
 
				-  }
			
 
				-  console.log(`    ${"total".padEnd(14)} ${fileEntries.length} files (${formatBytes(totalBytes)})`);
			
 
				-
			
 
				-  // ---- Per-file analysis ----
			
 
				-
			
 
				-  // Accumulators
			
 
				-  const perLang = {};
			
 
				-  let totalRegexChunks = 0;
			
 
				-  let totalAstChunks = 0;
			
 
				-  let totalRegexMs = 0;
			
 
				-  let totalAstMs = 0;
			
 
				-  let filesWithDifference = 0;
			
 
				-  let multiChunkFiles = 0;
			
 
				-  const bigDiffs = [];  // files where AST made the biggest difference
			
 
				-
			
 
				-  console.log(`\n  Analyzing ${fileEntries.length} files...\n`);
			
 
				-
			
 
				-  for (const entry of fileEntries) {
			
 
				-    const { rel, lang, langLabel, content } = entry;
			
 
				-    const isCode = lang !== null;
			
 
				-
			
 
				-    // Regex chunking
			
 
				-    const rt0 = performance.now();
			
 
				-    const rChunks = chunkDocument(content);
			
 
				-    const rMs = performance.now() - rt0;
			
 
				-
			
 
				-    // AST chunking
			
 
				-    const at0 = performance.now();
			
 
				-    const aChunks = await chunkDocumentAsync(content, undefined, undefined, undefined, rel, "auto");
			
 
				-    const aMs = performance.now() - at0;
			
 
				-
			
 
				-    totalRegexChunks += rChunks.length;
			
 
				-    totalAstChunks += aChunks.length;
			
 
				-    totalRegexMs += rMs;
			
 
				-    totalAstMs += aMs;
			
 
				-
			
 
				-    if (rChunks.length > 1 || aChunks.length > 1) multiChunkFiles++;
			
 
				-
			
 
				-    const chunkDiff = aChunks.length - rChunks.length;
			
 
				-    const contentDiffers = rChunks.length !== aChunks.length ||
			
 
				-      rChunks.some((c, i) => c.text !== aChunks[i]?.text);
			
 
				-
			
 
				-    if (contentDiffers) filesWithDifference++;
			
 
				-
			
 
				-    // Per-language stats
			
 
				-    if (!perLang[langLabel]) {
			
 
				-      perLang[langLabel] = {
			
 
				-        files: 0, bytes: 0, regexChunks: 0, astChunks: 0,
			
 
				-        regexMs: 0, astMs: 0, astBreakpoints: 0, diffs: 0,
			
 
				-      };
			
 
				-    }
			
 
				-    const s = perLang[langLabel];
			
 
				-    s.files++;
			
 
				-    s.bytes += content.length;
			
 
				-    s.regexChunks += rChunks.length;
			
 
				-    s.astChunks += aChunks.length;
			
 
				-    s.regexMs += rMs;
			
 
				-    s.astMs += aMs;
			
 
				-    if (contentDiffers) s.diffs++;
			
 
				-
			
 
				-    // Count AST breakpoints for code files
			
 
				-    if (isCode) {
			
 
				-      const bp = await getASTBreakPoints(content, rel);
			
 
				-      s.astBreakpoints += bp.length;
			
 
				-    }
			
 
				-
			
 
				-    // Track big differences for the detailed report
			
 
				-    if (contentDiffers && isCode && (rChunks.length > 1 || aChunks.length > 1)) {
			
 
				-      bigDiffs.push({
			
 
				-        rel, lang: langLabel, bytes: content.length,
			
 
				-        regexN: rChunks.length, astN: aChunks.length,
			
 
				-        diff: chunkDiff, overheadMs: aMs - rMs,
			
 
				-      });
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // ---- Aggregate report ----
			
 
				-
			
 
				-  section("Per-Language Summary");
			
 
				-
			
 
				-  const langOrder = Object.entries(perLang).sort((a, b) => b[1].files - a[1].files);
			
 
				-  const colW = { lang: 14, files: 7, bytes: 10, rChunks: 9, aChunks: 9, bps: 6, diffs: 6, rMs: 9, aMs: 9 };
			
 
				-
			
 
				-  console.log(
			
 
				-    `\n  ${"Language".padEnd(colW.lang)}${"Files".padStart(colW.files)}${"Size".padStart(colW.bytes)}` +
			
 
				-    `${"Rx Chnk".padStart(colW.rChunks)}${"AST Chnk".padStart(colW.aChunks)}` +
			
 
				-    `${"BPs".padStart(colW.bps)}${"Diffs".padStart(colW.diffs)}` +
			
 
				-    `${"Rx ms".padStart(colW.rMs)}${"AST ms".padStart(colW.aMs)}`
			
 
				-  );
			
 
				-  console.log("  " + "-".repeat(Object.values(colW).reduce((a, b) => a + b, 0)));
			
 
				-
			
 
				-  for (const [lang, s] of langOrder) {
			
 
				-    console.log(
			
 
				-      `  ${lang.padEnd(colW.lang)}` +
			
 
				-      `${String(s.files).padStart(colW.files)}` +
			
 
				-      `${formatBytes(s.bytes).padStart(colW.bytes)}` +
			
 
				-      `${String(s.regexChunks).padStart(colW.rChunks)}` +
			
 
				-      `${String(s.astChunks).padStart(colW.aChunks)}` +
			
 
				-      `${String(s.astBreakpoints).padStart(colW.bps)}` +
			
 
				-      `${String(s.diffs).padStart(colW.diffs)}` +
			
 
				-      `${s.regexMs.toFixed(1).padStart(colW.rMs)}` +
			
 
				-      `${s.astMs.toFixed(1).padStart(colW.aMs)}`
			
 
				-    );
			
 
				-  }
			
 
				-
			
 
				-  console.log("  " + "-".repeat(Object.values(colW).reduce((a, b) => a + b, 0)));
			
 
				-  console.log(
			
 
				-    `  ${"TOTAL".padEnd(colW.lang)}` +
			
 
				-    `${String(fileEntries.length).padStart(colW.files)}` +
			
 
				-    `${formatBytes(totalBytes).padStart(colW.bytes)}` +
			
 
				-    `${String(totalRegexChunks).padStart(colW.rChunks)}` +
			
 
				-    `${String(totalAstChunks).padStart(colW.aChunks)}` +
			
 
				-    `${"".padStart(colW.bps)}` +
			
 
				-    `${String(filesWithDifference).padStart(colW.diffs)}` +
			
 
				-    `${totalRegexMs.toFixed(1).padStart(colW.rMs)}` +
			
 
				-    `${totalAstMs.toFixed(1).padStart(colW.aMs)}`
			
 
				-  );
			
 
				-
			
 
				-  // ---- Headline stats ----
			
 
				-
			
 
				-  section("Headline Stats");
			
 
				-
			
 
				-  const codeFiles = fileEntries.filter(e => e.lang !== null).length;
			
 
				-  const mdFiles = fileEntries.filter(e => e.lang === null).length;
			
 
				-  const avgOverheadMs = codeFiles > 0
			
 
				-    ? (langOrder.filter(([l]) => l !== "markdown").reduce((s, [, v]) => s + v.astMs - v.regexMs, 0)) / codeFiles
			
 
				-    : 0;
			
 
				-
			
 
				-  console.log(`
			
 
				-  Files scanned:         ${fileEntries.length} (${codeFiles} code, ${mdFiles} markdown)
			
 
				-  Multi-chunk files:     ${multiChunkFiles} (files large enough to produce >1 chunk)
			
 
				-  Files where AST differed: ${filesWithDifference} / ${fileEntries.length} (${pct(filesWithDifference, fileEntries.length)})
			
 
				-  Total chunks (regex):  ${totalRegexChunks}
			
 
				-  Total chunks (AST):    ${totalAstChunks}  (${totalAstChunks > totalRegexChunks ? "+" : ""}${totalAstChunks - totalRegexChunks})
			
 
				-  Total time (regex):    ${totalRegexMs.toFixed(1)}ms
			
 
				-  Total time (AST):      ${totalAstMs.toFixed(1)}ms  (+${(totalAstMs - totalRegexMs).toFixed(1)}ms overhead)
			
 
				-  Avg overhead per code file: ${avgOverheadMs.toFixed(2)}ms
			
 
				-  `);
			
 
				-
			
 
				-  // ---- Top differences ----
			
 
				-
			
 
				-  if (bigDiffs.length > 0) {
			
 
				-    section("Top Files Where AST Changed Chunking");
			
 
				-
			
 
				-    bigDiffs.sort((a, b) => Math.abs(b.diff) - Math.abs(a.diff));
			
 
				-    const topN = bigDiffs.slice(0, 20);
			
 
				-
			
 
				-    console.log(
			
 
				-      `\n  ${"File".padEnd(50)} ${"Lang".padEnd(12)} ${"Size".padStart(8)} ` +
			
 
				-      `${"Rx".padStart(4)} ${"AST".padStart(4)} ${"Diff".padStart(5)} ` +
			
 
				-      `${"OH ms".padStart(7)}`
			
 
				-    );
			
 
				-    console.log("  " + "-".repeat(94));
			
 
				-
			
 
				-    for (const d of topN) {
			
 
				-      const sign = d.diff > 0 ? "+" : "";
			
 
				-      console.log(
			
 
				-        `  ${d.rel.slice(0, 49).padEnd(50)} ${d.lang.padEnd(12)} ${formatBytes(d.bytes).padStart(8)} ` +
			
 
				-        `${String(d.regexN).padStart(4)} ${String(d.astN).padStart(4)} ${(sign + d.diff).padStart(5)} ` +
			
 
				-        `${d.overheadMs.toFixed(1).padStart(7)}`
			
 
				-      );
			
 
				-    }
			
 
				-
			
 
				-    if (bigDiffs.length > 20) {
			
 
				-      console.log(`\n  ... and ${bigDiffs.length - 20} more files with differences`);
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // ---- Markdown regression check ----
			
 
				-
			
 
				-  const mdEntries = fileEntries.filter(e => e.lang === null);
			
 
				-  if (mdEntries.length > 0) {
			
 
				-    section("Markdown Regression Check");
			
 
				-
			
 
				-    let mdRegressions = 0;
			
 
				-    for (const entry of mdEntries) {
			
 
				-      const rChunks = chunkDocument(entry.content);
			
 
				-      const aChunks = await chunkDocumentAsync(entry.content, undefined, undefined, undefined, entry.rel, "auto");
			
 
				-      const same = rChunks.length === aChunks.length &&
			
 
				-        rChunks.every((c, i) => c.text === aChunks[i]?.text);
			
 
				-      if (!same) {
			
 
				-        mdRegressions++;
			
 
				-        console.log(`  REGRESSION: ${entry.rel} (regex=${rChunks.length}, ast=${aChunks.length})`);
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    if (mdRegressions === 0) {
			
 
				-      console.log(`\n  All ${mdEntries.length} markdown files produce identical chunks. No regressions.`);
			
 
				-    } else {
			
 
				-      console.log(`\n  ${mdRegressions} / ${mdEntries.length} markdown files differ (unexpected!)`);
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-} // end if realFiles.length > 0
			
 
				-
			
 
				-} // end if scanDir
			
 
				-
			
 
				-// ============================================================================
			
 
				-// Final Summary
			
 
				-// ============================================================================
			
 
				-
			
 
				-console.log(`\n${"=".repeat(70)}`);
			
 
				-if (!skipSynthetic) {
			
 
				-  console.log(`  SYNTHETIC TESTS: ${passed} passed, ${failed} failed`);
			
 
				-}
			
 
				-if (scanDir) {
			
 
				-  console.log(`  COLLECTION SCAN: complete (see report above)`);
			
 
				-}
			
 
				-if (!scanDir && !skipSynthetic) {
			
 
				-  console.log(`\n  Tip: Run with a directory argument to scan real files:`);
			
 
				-  console.log(`    npx tsx test-ast-chunking.mjs ~/dev/my-project`);
			
 
				-}
			
 
				-console.log("=".repeat(70));
			
 
				-
			
 
				-if (failed > 0) process.exit(1);
			
--- a/test/ast-chunking.test.ts
+++ b/test/ast-chunking.test.ts
@@ -0,0 +1,199 @@
 
				+/**
			
 
				+ * Integration tests for AST-aware chunking.
			
 
				+ *
			
 
				+ * Migrated from the standalone test-ast-chunking.mjs script into the
			
 
				+ * vitest suite. Covers the integration between AST break point extraction
			
 
				+ * and the chunking pipeline — areas not tested by the unit-level ast.test.ts.
			
 
				+ */
			
 
				+
			
 
				+import { describe, test, expect } from "vitest";
			
 
				+import { getASTBreakPoints } from "../src/ast.js";
			
 
				+import {
			
 
				+  chunkDocument,
			
 
				+  chunkDocumentAsync,
			
 
				+  chunkDocumentWithBreakPoints,
			
 
				+  mergeBreakPoints,
			
 
				+  scanBreakPoints,
			
 
				+  findCodeFences,
			
 
				+} from "../src/store.js";
			
 
				+
			
 
				+// ==========================================================================
			
 
				+// mergeBreakPoints
			
 
				+// ==========================================================================
			
 
				+
			
 
				+describe("mergeBreakPoints", () => {
			
 
				+  test("merges regex and AST break points, higher score wins at same position", () => {
			
 
				+    const regexPoints = [
			
 
				+      { pos: 10, score: 20, type: "blank" },
			
 
				+      { pos: 50, score: 1, type: "newline" },
			
 
				+      { pos: 100, score: 20, type: "blank" },
			
 
				+    ];
			
 
				+    const astPoints = [
			
 
				+      { pos: 10, score: 90, type: "ast:func" },
			
 
				+      { pos: 75, score: 100, type: "ast:class" },
			
 
				+      { pos: 100, score: 60, type: "ast:import" },
			
 
				+    ];
			
 
				+
			
 
				+    const merged = mergeBreakPoints(regexPoints, astPoints);
			
 
				+
			
 
				+    expect(merged).toHaveLength(4);
			
 
				+    expect(merged.find(p => p.pos === 10)?.score).toBe(90);   // AST wins (90 > 20)
			
 
				+    expect(merged.find(p => p.pos === 50)?.score).toBe(1);    // regex only
			
 
				+    expect(merged.find(p => p.pos === 75)?.score).toBe(100);  // AST only
			
 
				+    expect(merged.find(p => p.pos === 100)?.score).toBe(60);  // AST wins (60 > 20)
			
 
				+  });
			
 
				+
			
 
				+  test("result is sorted by position", () => {
			
 
				+    const merged = mergeBreakPoints(
			
 
				+      [{ pos: 100, score: 10, type: "a" }],
			
 
				+      [{ pos: 5, score: 50, type: "b" }],
			
 
				+    );
			
 
				+    expect(merged[0]!.pos).toBeLessThan(merged[1]!.pos);
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+// ==========================================================================
			
 
				+// AST vs Regex chunking comparison
			
 
				+// ==========================================================================
			
 
				+
			
 
				+describe("AST vs Regex chunking", () => {
			
 
				+  // Generate a large TS file with 30 functions
			
 
				+  const parts: string[] = [];
			
 
				+  for (let i = 0; i < 30; i++) {
			
 
				+    parts.push(`
			
 
				+export function handler${i}(req: Request, res: Response): void {
			
 
				+  const startTime = Date.now();
			
 
				+  const userId = req.params.userId;
			
 
				+  const sessionToken = req.headers.authorization;
			
 
				+
			
 
				+  if (!userId || !sessionToken) {
			
 
				+    res.status(400).json({ error: "Missing required parameters" });
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  console.log(\`Processing request ${i} for user \${userId}\`);
			
 
				+  const result = processBusinessLogic${i}(userId, sessionToken);
			
 
				+
			
 
				+  const elapsed = Date.now() - startTime;
			
 
				+  res.json({ data: result, processingTimeMs: elapsed });
			
 
				+}
			
 
				+`);
			
 
				+  }
			
 
				+  const largeTS = parts.join("\n");
			
 
				+
			
 
				+  function countSplitFunctions(chunks: { text: string; pos: number }[]): number {
			
 
				+    let splits = 0;
			
 
				+    for (let i = 0; i < 30; i++) {
			
 
				+      const funcStart = largeTS.indexOf(`function handler${i}(`);
			
 
				+      const nextFunc = largeTS.indexOf(`function handler${i + 1}(`, funcStart + 1);
			
 
				+      const funcEnd = nextFunc > 0 ? nextFunc : largeTS.length;
			
 
				+      const chunkIndices = new Set<number>();
			
 
				+      for (let ci = 0; ci < chunks.length; ci++) {
			
 
				+        const chunkStart = chunks[ci]!.pos;
			
 
				+        const chunkEnd = chunkStart + chunks[ci]!.text.length;
			
 
				+        if (chunkStart < funcEnd && chunkEnd > funcStart) {
			
 
				+          chunkIndices.add(ci);
			
 
				+        }
			
 
				+      }
			
 
				+      if (chunkIndices.size > 1) splits++;
			
 
				+    }
			
 
				+    return splits;
			
 
				+  }
			
 
				+
			
 
				+  test("AST splits fewer functions across chunk boundaries than regex", async () => {
			
 
				+    const regexChunks = chunkDocument(largeTS);
			
 
				+    const astChunks = await chunkDocumentAsync(largeTS, undefined, undefined, undefined, "handlers.ts", "auto");
			
 
				+
			
 
				+    const regexSplits = countSplitFunctions(regexChunks);
			
 
				+    const astSplits = countSplitFunctions(astChunks);
			
 
				+
			
 
				+    expect(astSplits).toBeLessThanOrEqual(regexSplits);
			
 
				+  });
			
 
				+
			
 
				+  test("markdown files produce identical chunks in auto vs regex mode", async () => {
			
 
				+    const sections: string[] = [];
			
 
				+    for (let i = 0; i < 15; i++) {
			
 
				+      sections.push(`# Section ${i}\n\n${"Lorem ipsum dolor sit amet. ".repeat(40)}\n`);
			
 
				+    }
			
 
				+    const largeMD = sections.join("\n");
			
 
				+
			
 
				+    const mdRegex = chunkDocument(largeMD);
			
 
				+    const mdAst = await chunkDocumentAsync(largeMD, undefined, undefined, undefined, "readme.md", "auto");
			
 
				+
			
 
				+    expect(mdAst).toHaveLength(mdRegex.length);
			
 
				+    for (let i = 0; i < mdRegex.length; i++) {
			
 
				+      expect(mdAst[i]?.text).toBe(mdRegex[i]?.text);
			
 
				+      expect(mdAst[i]?.pos).toBe(mdRegex[i]?.pos);
			
 
				+    }
			
 
				+  });
			
 
				+
			
 
				+  test("regex strategy bypasses AST entirely", async () => {
			
 
				+    const regexOnly = await chunkDocumentAsync(largeTS, undefined, undefined, undefined, "handlers.ts", "regex");
			
 
				+    const syncRegex = chunkDocument(largeTS);
			
 
				+
			
 
				+    expect(regexOnly).toHaveLength(syncRegex.length);
			
 
				+    for (let i = 0; i < syncRegex.length; i++) {
			
 
				+      expect(regexOnly[i]?.text).toBe(syncRegex[i]?.text);
			
 
				+    }
			
 
				+  });
			
 
				+
			
 
				+  test("no filepath falls back to regex", async () => {
			
 
				+    const noPathChunks = await chunkDocumentAsync(largeTS, undefined, undefined, undefined, undefined, "auto");
			
 
				+    const syncRegex = chunkDocument(largeTS);
			
 
				+    expect(noPathChunks).toHaveLength(syncRegex.length);
			
 
				+  });
			
 
				+
			
 
				+  test("small file produces single chunk", async () => {
			
 
				+    const smallChunks = await chunkDocumentAsync("export const x = 1;", undefined, undefined, undefined, "s.ts", "auto");
			
 
				+    expect(smallChunks).toHaveLength(1);
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+// ==========================================================================
			
 
				+// chunkDocumentWithBreakPoints equivalence
			
 
				+// ==========================================================================
			
 
				+
			
 
				+describe("chunkDocumentWithBreakPoints equivalence", () => {
			
 
				+  test("produces identical output to chunkDocument for the same content", () => {
			
 
				+    const content = "a".repeat(5000) + "\n\n" + "b".repeat(5000);
			
 
				+    const old = chunkDocument(content);
			
 
				+    const withBP = chunkDocumentWithBreakPoints(content, scanBreakPoints(content), findCodeFences(content));
			
 
				+
			
 
				+    expect(withBP).toHaveLength(old.length);
			
 
				+    for (let i = 0; i < old.length; i++) {
			
 
				+      expect(withBP[i]?.text).toBe(old[i]?.text);
			
 
				+      expect(withBP[i]?.pos).toBe(old[i]?.pos);
			
 
				+    }
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+// ==========================================================================
			
 
				+// Score assertions not covered by ast.test.ts unit tests
			
 
				+// ==========================================================================
			
 
				+
			
 
				+describe("AST break point scores", () => {
			
 
				+  test("TypeScript export (class) scores 90", async () => {
			
 
				+    const code = `export class Foo {}\nexport function bar() {}`;
			
 
				+    const points = await getASTBreakPoints(code, "a.ts");
			
 
				+    const exportPoint = points.find(p => p.type === "ast:export");
			
 
				+    expect(exportPoint?.score).toBe(90);
			
 
				+  });
			
 
				+
			
 
				+  test("Python class scores 100", async () => {
			
 
				+    const code = `class Foo:\n    pass\n\ndef bar():\n    pass`;
			
 
				+    const points = await getASTBreakPoints(code, "a.py");
			
 
				+    expect(points.find(p => p.type === "ast:class")?.score).toBe(100);
			
 
				+  });
			
 
				+
			
 
				+  test("Go type scores 80", async () => {
			
 
				+    const code = `package main\n\ntype Server struct {\n    port int\n}\n\nfunc main() {}`;
			
 
				+    const points = await getASTBreakPoints(code, "a.go");
			
 
				+    expect(points.find(p => p.type === "ast:type")?.score).toBe(80);
			
 
				+  });
			
 
				+
			
 
				+  test("Rust enum scores 80", async () => {
			
 
				+    const code = `enum State {\n    On,\n    Off,\n}\n\nfn main() {}`;
			
 
				+    const points = await getASTBreakPoints(code, "a.rs");
			
 
				+    expect(points.find(p => p.type === "ast:enum")?.score).toBe(80);
			
 
				+  });
			
 
				+});