/** * ast.test.ts - Tests for AST-aware chunking support * * Tests language detection, AST break point extraction for each * supported language, and graceful fallback on errors. */ import { describe, test, expect } from "vitest"; import { detectLanguage, getASTBreakPoints, getASTFunctionRanges, extractSymbols } from "../src/ast.js"; import type { SupportedLanguage } from "../src/ast.js"; // ============================================================================= // Language Detection // ============================================================================= describe("detectLanguage", () => { test("recognizes TypeScript extensions", () => { expect(detectLanguage("src/auth.ts")).toBe("typescript"); expect(detectLanguage("src/auth.mts")).toBe("typescript"); expect(detectLanguage("src/auth.cts")).toBe("typescript"); }); test("recognizes TSX extension", () => { expect(detectLanguage("src/App.tsx")).toBe("tsx"); }); test("recognizes JavaScript extensions", () => { expect(detectLanguage("src/util.js")).toBe("javascript"); expect(detectLanguage("src/util.mjs")).toBe("javascript"); expect(detectLanguage("src/util.cjs")).toBe("javascript"); }); test("recognizes JSX as tsx", () => { expect(detectLanguage("src/App.jsx")).toBe("tsx"); }); test("recognizes Python extension", () => { expect(detectLanguage("src/auth.py")).toBe("python"); }); test("recognizes Go extension", () => { expect(detectLanguage("src/auth.go")).toBe("go"); }); test("recognizes Rust extension", () => { expect(detectLanguage("src/auth.rs")).toBe("rust"); }); test("returns null for markdown", () => { expect(detectLanguage("docs/README.md")).toBeNull(); }); test("returns null for unknown extensions", () => { expect(detectLanguage("data/file.csv")).toBeNull(); expect(detectLanguage("config.yaml")).toBeNull(); expect(detectLanguage("Makefile")).toBeNull(); }); test("is case-insensitive for extensions", () => { expect(detectLanguage("src/Auth.TS")).toBe("typescript"); expect(detectLanguage("src/Auth.PY")).toBe("python"); }); test("works with virtual qmd:// paths", () => { expect(detectLanguage("qmd://myproject/src/auth.ts")).toBe("typescript"); expect(detectLanguage("qmd://docs/README.md")).toBeNull(); }); }); // ============================================================================= // AST Break Points - TypeScript // ============================================================================= describe("getASTBreakPoints - TypeScript", () => { const TS_SAMPLE = `import { Database } from './db'; import type { User } from './types'; interface AuthConfig { secret: string; ttl: number; } type UserId = string; export class AuthService { constructor(private db: Database) {} async authenticate(user: User, token: string): Promise { const session = await this.db.findSession(token); return session?.userId === user.id; } validateToken(token: string): boolean { return token.length === 64; } } export function hashPassword(password: string): string { return crypto.createHash('sha256').update(password).digest('hex'); } `; test("produces break points at function, class, and import boundaries", async () => { const points = await getASTBreakPoints(TS_SAMPLE, "src/auth.ts"); expect(points.length).toBeGreaterThan(0); // Should have import, interface, type, class (via export), method, and function break points const types = points.map(p => p.type); expect(types.some(t => t.includes("import"))).toBe(true); expect(types.some(t => t.includes("iface"))).toBe(true); expect(types.some(t => t.includes("type"))).toBe(true); expect(types.some(t => t.includes("export") || t.includes("class"))).toBe(true); expect(types.some(t => t.includes("method"))).toBe(true); }); test("break points are sorted by position", async () => { const points = await getASTBreakPoints(TS_SAMPLE, "src/auth.ts"); for (let i = 1; i < points.length; i++) { expect(points[i]!.pos).toBeGreaterThanOrEqual(points[i - 1]!.pos); } }); test("scores align with expected hierarchy", async () => { const points = await getASTBreakPoints(TS_SAMPLE, "src/auth.ts"); // Class/interface should score 100 const ifacePoint = points.find(p => p.type === "ast:iface"); expect(ifacePoint?.score).toBe(100); // Function/method should score 90 const methodPoint = points.find(p => p.type === "ast:method"); expect(methodPoint?.score).toBe(90); // Import should score 60 const importPoint = points.find(p => p.type === "ast:import"); expect(importPoint?.score).toBe(60); }); test("break point positions match actual content positions", async () => { const points = await getASTBreakPoints(TS_SAMPLE, "src/auth.ts"); // First import should be at position 0 const firstImport = points.find(p => p.type === "ast:import"); expect(firstImport).toBeDefined(); expect(TS_SAMPLE.slice(firstImport!.pos, firstImport!.pos + 6)).toBe("import"); }); }); // ============================================================================= // AST Break Points - Python // ============================================================================= describe("getASTBreakPoints - Python", () => { const PY_SAMPLE = `import os from typing import Optional class AuthService: def __init__(self, db): self.db = db async def authenticate(self, user, token): session = await self.db.find(token) return session.user_id == user.id def validate_token(self, token): return len(token) == 64 def hash_password(password: str) -> str: return hashlib.sha256(password.encode()).hexdigest() @decorator def decorated_func(): pass `; test("produces break points for class, function, import, and decorated definitions", async () => { const points = await getASTBreakPoints(PY_SAMPLE, "auth.py"); const types = points.map(p => p.type); expect(types.some(t => t.includes("import"))).toBe(true); expect(types.some(t => t.includes("class"))).toBe(true); expect(types.some(t => t.includes("func"))).toBe(true); expect(types.some(t => t.includes("decorated"))).toBe(true); }); test("captures method definitions inside classes", async () => { const points = await getASTBreakPoints(PY_SAMPLE, "auth.py"); // Should capture __init__, authenticate, and validate_token as func const funcPoints = points.filter(p => p.type === "ast:func"); expect(funcPoints.length).toBeGreaterThanOrEqual(3); }); }); // ============================================================================= // AST Break Points - Go // ============================================================================= describe("getASTBreakPoints - Go", () => { const GO_SAMPLE = `package main import "fmt" type AuthService struct { db *Database } func (s *AuthService) Authenticate(user User) bool { return true } func HashPassword(password string) string { return "hash" } `; test("produces break points for type, function, method, and import", async () => { const points = await getASTBreakPoints(GO_SAMPLE, "auth.go"); const types = points.map(p => p.type); expect(types.some(t => t.includes("import"))).toBe(true); expect(types.some(t => t.includes("type"))).toBe(true); expect(types.some(t => t.includes("method"))).toBe(true); expect(types.some(t => t.includes("func"))).toBe(true); }); test("function and method both score 90", async () => { const points = await getASTBreakPoints(GO_SAMPLE, "auth.go"); const funcPoint = points.find(p => p.type === "ast:func"); const methodPoint = points.find(p => p.type === "ast:method"); expect(funcPoint?.score).toBe(90); expect(methodPoint?.score).toBe(90); }); }); // ============================================================================= // AST Break Points - Rust // ============================================================================= describe("getASTBreakPoints - Rust", () => { const RS_SAMPLE = `use std::collections::HashMap; struct AuthService { db: Database, } impl AuthService { fn authenticate(&self, user: &User) -> bool { true } } trait Authenticatable { fn validate(&self) -> bool; } enum Role { Admin, User, } fn hash_password(password: &str) -> String { String::new() } `; test("produces break points for struct, impl, trait, enum, function, and use", async () => { const points = await getASTBreakPoints(RS_SAMPLE, "auth.rs"); const types = points.map(p => p.type); expect(types.some(t => t.includes("import"))).toBe(true); // use_declaration -> @import expect(types.some(t => t.includes("struct"))).toBe(true); expect(types.some(t => t.includes("impl"))).toBe(true); expect(types.some(t => t.includes("trait"))).toBe(true); expect(types.some(t => t.includes("enum"))).toBe(true); expect(types.some(t => t.includes("func"))).toBe(true); }); test("struct, impl, and trait all score 100", async () => { const points = await getASTBreakPoints(RS_SAMPLE, "auth.rs"); const structPoint = points.find(p => p.type === "ast:struct"); const implPoint = points.find(p => p.type === "ast:impl"); const traitPoint = points.find(p => p.type === "ast:trait"); expect(structPoint?.score).toBe(100); expect(implPoint?.score).toBe(100); expect(traitPoint?.score).toBe(100); }); }); // ============================================================================= // Error Handling & Fallback // ============================================================================= describe("getASTBreakPoints - error handling", () => { test("returns empty array for unsupported file types", async () => { const points = await getASTBreakPoints("# Hello World", "readme.md"); expect(points).toEqual([]); }); test("returns empty array for unknown extensions", async () => { const points = await getASTBreakPoints("data,here", "file.csv"); expect(points).toEqual([]); }); test("handles empty content gracefully", async () => { const points = await getASTBreakPoints("", "empty.ts"); expect(points).toEqual([]); }); test("handles syntactically invalid code gracefully", async () => { // Tree-sitter is error-tolerant, so this should still parse (with error nodes) // but should not crash const points = await getASTBreakPoints("function { broken syntax %%%", "broken.ts"); // Should either return some partial break points or empty array — not throw expect(Array.isArray(points)).toBe(true); }); }); // ============================================================================= // Function-Level Range Extraction (Phase 2) // ============================================================================= describe("getASTFunctionRanges - TypeScript", () => { const TS_SAMPLE = `import { Database } from './db'; interface Config { secret: string; } type UserId = string; export class Service { constructor(private db: Database) {} async fetch(id: UserId): Promise { return this.db.get(id); } parse(raw: string): string { return raw.trim(); } } export function helper(x: string): string { return x.toUpperCase(); } const arrow = (n: number): number => n + 1; `; test("returns one range per top-level code unit", async () => { const ranges = await getASTFunctionRanges(TS_SAMPLE, "src/service.ts"); // interface, type, export class, export function, const arrow = 5 ranges // (the methods inside the class are absorbed by the class range) expect(ranges.length).toBeGreaterThanOrEqual(4); }); test("ranges are sorted by startIndex", async () => { const ranges = await getASTFunctionRanges(TS_SAMPLE, "src/service.ts"); for (let i = 1; i < ranges.length; i++) { expect(ranges[i]!.startIndex).toBeGreaterThanOrEqual(ranges[i - 1]!.startIndex); } }); test("ranges do not overlap", async () => { const ranges = await getASTFunctionRanges(TS_SAMPLE, "src/service.ts"); for (let i = 1; i < ranges.length; i++) { expect(ranges[i]!.startIndex).toBeGreaterThanOrEqual(ranges[i - 1]!.endIndex); } }); test("each range slice is non-empty and starts at a recognizable token", async () => { const ranges = await getASTFunctionRanges(TS_SAMPLE, "src/service.ts"); for (const r of ranges) { const slice = TS_SAMPLE.slice(r.startIndex, r.endIndex); expect(slice.length).toBeGreaterThan(0); expect(/^(export|class|interface|type|function|const)\b/.test(slice.trimStart())).toBe(true); } }); test("export class range is captured as one unit (not split into methods)", async () => { const ranges = await getASTFunctionRanges(TS_SAMPLE, "src/service.ts"); const classRange = ranges.find(r => { const slice = TS_SAMPLE.slice(r.startIndex, r.endIndex); return slice.includes("class Service") && slice.includes("parse("); }); expect(classRange).toBeDefined(); }); }); describe("getASTFunctionRanges - Python", () => { const PY_SAMPLE = `import os class Service: def __init__(self): self.x = 1 def run(self): return self.x @decorator def decorated_func(): return 42 def plain_func(): return 1 `; test("captures class and function definitions (including decorated)", async () => { const ranges = await getASTFunctionRanges(PY_SAMPLE, "service.py"); expect(ranges.length).toBeGreaterThanOrEqual(3); const types = ranges.map(r => r.type); expect(types.some(t => t === "ast:class")).toBe(true); expect(types.some(t => t === "ast:func" || t === "ast:decorated")).toBe(true); }); test("decorated function range includes the decorator", async () => { const ranges = await getASTFunctionRanges(PY_SAMPLE, "service.py"); const decorated = ranges.find(r => { const slice = PY_SAMPLE.slice(r.startIndex, r.endIndex); return slice.includes("decorated_func"); }); expect(decorated).toBeDefined(); const slice = PY_SAMPLE.slice(decorated!.startIndex, decorated!.endIndex); expect(slice.trimStart().startsWith("@decorator")).toBe(true); }); }); describe("getASTFunctionRanges - error handling", () => { test("returns empty array for markdown", async () => { const ranges = await getASTFunctionRanges("# Hello", "README.md"); expect(ranges).toEqual([]); }); test("returns empty array for unknown extension", async () => { const ranges = await getASTFunctionRanges("noop", "notes.txt"); expect(ranges).toEqual([]); }); test("returns empty array for empty file", async () => { const ranges = await getASTFunctionRanges("", "empty.ts"); expect(ranges).toEqual([]); }); test("handles garbage input gracefully (non-throwing)", async () => { const ranges = await getASTFunctionRanges("function {{ broken !!", "broken.ts"); expect(Array.isArray(ranges)).toBe(true); }); test("returns empty array for content with no top-level units", async () => { const ranges = await getASTFunctionRanges("const x = 1;\nconst y = 2;\n", "vars.ts"); // lexical_declaration only matches when value is arrow_function/function_expression expect(ranges).toEqual([]); }); }); // ============================================================================= // Symbol Extraction Stub (Phase 2) // ============================================================================= describe("extractSymbols", () => { test("returns empty array (Phase 2 stub)", () => { const symbols = extractSymbols("function foo() {}", "typescript", 0, 18); expect(symbols).toEqual([]); }); });