| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467 |
- /**
- * ast.test.ts - Tests for AST-aware chunking support
- *
- * Tests language detection, AST break point extraction for each
- * supported language, and graceful fallback on errors.
- */
- import { describe, test, expect } from "vitest";
- import { detectLanguage, getASTBreakPoints, getASTFunctionRanges, extractSymbols } from "../src/ast.js";
- import type { SupportedLanguage } from "../src/ast.js";
- // =============================================================================
- // Language Detection
- // =============================================================================
- describe("detectLanguage", () => {
- test("recognizes TypeScript extensions", () => {
- expect(detectLanguage("src/auth.ts")).toBe("typescript");
- expect(detectLanguage("src/auth.mts")).toBe("typescript");
- expect(detectLanguage("src/auth.cts")).toBe("typescript");
- });
- test("recognizes TSX extension", () => {
- expect(detectLanguage("src/App.tsx")).toBe("tsx");
- });
- test("recognizes JavaScript extensions", () => {
- expect(detectLanguage("src/util.js")).toBe("javascript");
- expect(detectLanguage("src/util.mjs")).toBe("javascript");
- expect(detectLanguage("src/util.cjs")).toBe("javascript");
- });
- test("recognizes JSX as tsx", () => {
- expect(detectLanguage("src/App.jsx")).toBe("tsx");
- });
- test("recognizes Python extension", () => {
- expect(detectLanguage("src/auth.py")).toBe("python");
- });
- test("recognizes Go extension", () => {
- expect(detectLanguage("src/auth.go")).toBe("go");
- });
- test("recognizes Rust extension", () => {
- expect(detectLanguage("src/auth.rs")).toBe("rust");
- });
- test("returns null for markdown", () => {
- expect(detectLanguage("docs/README.md")).toBeNull();
- });
- test("returns null for unknown extensions", () => {
- expect(detectLanguage("data/file.csv")).toBeNull();
- expect(detectLanguage("config.yaml")).toBeNull();
- expect(detectLanguage("Makefile")).toBeNull();
- });
- test("is case-insensitive for extensions", () => {
- expect(detectLanguage("src/Auth.TS")).toBe("typescript");
- expect(detectLanguage("src/Auth.PY")).toBe("python");
- });
- test("works with virtual qmd:// paths", () => {
- expect(detectLanguage("qmd://myproject/src/auth.ts")).toBe("typescript");
- expect(detectLanguage("qmd://docs/README.md")).toBeNull();
- });
- });
- // =============================================================================
- // AST Break Points - TypeScript
- // =============================================================================
- describe("getASTBreakPoints - TypeScript", () => {
- const TS_SAMPLE = `import { Database } from './db';
- import type { User } from './types';
- interface AuthConfig {
- secret: string;
- ttl: number;
- }
- type UserId = string;
- export class AuthService {
- constructor(private db: Database) {}
- async authenticate(user: User, token: string): Promise<boolean> {
- const session = await this.db.findSession(token);
- return session?.userId === user.id;
- }
- validateToken(token: string): boolean {
- return token.length === 64;
- }
- }
- export function hashPassword(password: string): string {
- return crypto.createHash('sha256').update(password).digest('hex');
- }
- `;
- test("produces break points at function, class, and import boundaries", async () => {
- const points = await getASTBreakPoints(TS_SAMPLE, "src/auth.ts");
- expect(points.length).toBeGreaterThan(0);
- // Should have import, interface, type, class (via export), method, and function break points
- const types = points.map(p => p.type);
- expect(types.some(t => t.includes("import"))).toBe(true);
- expect(types.some(t => t.includes("iface"))).toBe(true);
- expect(types.some(t => t.includes("type"))).toBe(true);
- expect(types.some(t => t.includes("export") || t.includes("class"))).toBe(true);
- expect(types.some(t => t.includes("method"))).toBe(true);
- });
- test("break points are sorted by position", async () => {
- const points = await getASTBreakPoints(TS_SAMPLE, "src/auth.ts");
- for (let i = 1; i < points.length; i++) {
- expect(points[i]!.pos).toBeGreaterThanOrEqual(points[i - 1]!.pos);
- }
- });
- test("scores align with expected hierarchy", async () => {
- const points = await getASTBreakPoints(TS_SAMPLE, "src/auth.ts");
- // Class/interface should score 100
- const ifacePoint = points.find(p => p.type === "ast:iface");
- expect(ifacePoint?.score).toBe(100);
- // Function/method should score 90
- const methodPoint = points.find(p => p.type === "ast:method");
- expect(methodPoint?.score).toBe(90);
- // Import should score 60
- const importPoint = points.find(p => p.type === "ast:import");
- expect(importPoint?.score).toBe(60);
- });
- test("break point positions match actual content positions", async () => {
- const points = await getASTBreakPoints(TS_SAMPLE, "src/auth.ts");
- // First import should be at position 0
- const firstImport = points.find(p => p.type === "ast:import");
- expect(firstImport).toBeDefined();
- expect(TS_SAMPLE.slice(firstImport!.pos, firstImport!.pos + 6)).toBe("import");
- });
- });
- // =============================================================================
- // AST Break Points - Python
- // =============================================================================
- describe("getASTBreakPoints - Python", () => {
- const PY_SAMPLE = `import os
- from typing import Optional
- class AuthService:
- def __init__(self, db):
- self.db = db
- async def authenticate(self, user, token):
- session = await self.db.find(token)
- return session.user_id == user.id
- def validate_token(self, token):
- return len(token) == 64
- def hash_password(password: str) -> str:
- return hashlib.sha256(password.encode()).hexdigest()
- @decorator
- def decorated_func():
- pass
- `;
- test("produces break points for class, function, import, and decorated definitions", async () => {
- const points = await getASTBreakPoints(PY_SAMPLE, "auth.py");
- const types = points.map(p => p.type);
- expect(types.some(t => t.includes("import"))).toBe(true);
- expect(types.some(t => t.includes("class"))).toBe(true);
- expect(types.some(t => t.includes("func"))).toBe(true);
- expect(types.some(t => t.includes("decorated"))).toBe(true);
- });
- test("captures method definitions inside classes", async () => {
- const points = await getASTBreakPoints(PY_SAMPLE, "auth.py");
- // Should capture __init__, authenticate, and validate_token as func
- const funcPoints = points.filter(p => p.type === "ast:func");
- expect(funcPoints.length).toBeGreaterThanOrEqual(3);
- });
- });
- // =============================================================================
- // AST Break Points - Go
- // =============================================================================
- describe("getASTBreakPoints - Go", () => {
- const GO_SAMPLE = `package main
- import "fmt"
- type AuthService struct {
- db *Database
- }
- func (s *AuthService) Authenticate(user User) bool {
- return true
- }
- func HashPassword(password string) string {
- return "hash"
- }
- `;
- test("produces break points for type, function, method, and import", async () => {
- const points = await getASTBreakPoints(GO_SAMPLE, "auth.go");
- const types = points.map(p => p.type);
- expect(types.some(t => t.includes("import"))).toBe(true);
- expect(types.some(t => t.includes("type"))).toBe(true);
- expect(types.some(t => t.includes("method"))).toBe(true);
- expect(types.some(t => t.includes("func"))).toBe(true);
- });
- test("function and method both score 90", async () => {
- const points = await getASTBreakPoints(GO_SAMPLE, "auth.go");
- const funcPoint = points.find(p => p.type === "ast:func");
- const methodPoint = points.find(p => p.type === "ast:method");
- expect(funcPoint?.score).toBe(90);
- expect(methodPoint?.score).toBe(90);
- });
- });
- // =============================================================================
- // AST Break Points - Rust
- // =============================================================================
- describe("getASTBreakPoints - Rust", () => {
- const RS_SAMPLE = `use std::collections::HashMap;
- struct AuthService {
- db: Database,
- }
- impl AuthService {
- fn authenticate(&self, user: &User) -> bool {
- true
- }
- }
- trait Authenticatable {
- fn validate(&self) -> bool;
- }
- enum Role {
- Admin,
- User,
- }
- fn hash_password(password: &str) -> String {
- String::new()
- }
- `;
- test("produces break points for struct, impl, trait, enum, function, and use", async () => {
- const points = await getASTBreakPoints(RS_SAMPLE, "auth.rs");
- const types = points.map(p => p.type);
- expect(types.some(t => t.includes("import"))).toBe(true); // use_declaration -> @import
- expect(types.some(t => t.includes("struct"))).toBe(true);
- expect(types.some(t => t.includes("impl"))).toBe(true);
- expect(types.some(t => t.includes("trait"))).toBe(true);
- expect(types.some(t => t.includes("enum"))).toBe(true);
- expect(types.some(t => t.includes("func"))).toBe(true);
- });
- test("struct, impl, and trait all score 100", async () => {
- const points = await getASTBreakPoints(RS_SAMPLE, "auth.rs");
- const structPoint = points.find(p => p.type === "ast:struct");
- const implPoint = points.find(p => p.type === "ast:impl");
- const traitPoint = points.find(p => p.type === "ast:trait");
- expect(structPoint?.score).toBe(100);
- expect(implPoint?.score).toBe(100);
- expect(traitPoint?.score).toBe(100);
- });
- });
- // =============================================================================
- // Error Handling & Fallback
- // =============================================================================
- describe("getASTBreakPoints - error handling", () => {
- test("returns empty array for unsupported file types", async () => {
- const points = await getASTBreakPoints("# Hello World", "readme.md");
- expect(points).toEqual([]);
- });
- test("returns empty array for unknown extensions", async () => {
- const points = await getASTBreakPoints("data,here", "file.csv");
- expect(points).toEqual([]);
- });
- test("handles empty content gracefully", async () => {
- const points = await getASTBreakPoints("", "empty.ts");
- expect(points).toEqual([]);
- });
- test("handles syntactically invalid code gracefully", async () => {
- // Tree-sitter is error-tolerant, so this should still parse (with error nodes)
- // but should not crash
- const points = await getASTBreakPoints("function { broken syntax %%%", "broken.ts");
- // Should either return some partial break points or empty array — not throw
- expect(Array.isArray(points)).toBe(true);
- });
- });
- // =============================================================================
- // Function-Level Range Extraction (Phase 2)
- // =============================================================================
- describe("getASTFunctionRanges - TypeScript", () => {
- const TS_SAMPLE = `import { Database } from './db';
- interface Config {
- secret: string;
- }
- type UserId = string;
- export class Service {
- constructor(private db: Database) {}
- async fetch(id: UserId): Promise<string> {
- return this.db.get(id);
- }
- parse(raw: string): string {
- return raw.trim();
- }
- }
- export function helper(x: string): string {
- return x.toUpperCase();
- }
- const arrow = (n: number): number => n + 1;
- `;
- test("returns one range per top-level code unit", async () => {
- const ranges = await getASTFunctionRanges(TS_SAMPLE, "src/service.ts");
- // interface, type, export class, export function, const arrow = 5 ranges
- // (the methods inside the class are absorbed by the class range)
- expect(ranges.length).toBeGreaterThanOrEqual(4);
- });
- test("ranges are sorted by startIndex", async () => {
- const ranges = await getASTFunctionRanges(TS_SAMPLE, "src/service.ts");
- for (let i = 1; i < ranges.length; i++) {
- expect(ranges[i]!.startIndex).toBeGreaterThanOrEqual(ranges[i - 1]!.startIndex);
- }
- });
- test("ranges do not overlap", async () => {
- const ranges = await getASTFunctionRanges(TS_SAMPLE, "src/service.ts");
- for (let i = 1; i < ranges.length; i++) {
- expect(ranges[i]!.startIndex).toBeGreaterThanOrEqual(ranges[i - 1]!.endIndex);
- }
- });
- test("each range slice is non-empty and starts at a recognizable token", async () => {
- const ranges = await getASTFunctionRanges(TS_SAMPLE, "src/service.ts");
- for (const r of ranges) {
- const slice = TS_SAMPLE.slice(r.startIndex, r.endIndex);
- expect(slice.length).toBeGreaterThan(0);
- expect(/^(export|class|interface|type|function|const)\b/.test(slice.trimStart())).toBe(true);
- }
- });
- test("export class range is captured as one unit (not split into methods)", async () => {
- const ranges = await getASTFunctionRanges(TS_SAMPLE, "src/service.ts");
- const classRange = ranges.find(r => {
- const slice = TS_SAMPLE.slice(r.startIndex, r.endIndex);
- return slice.includes("class Service") && slice.includes("parse(");
- });
- expect(classRange).toBeDefined();
- });
- });
- describe("getASTFunctionRanges - Python", () => {
- const PY_SAMPLE = `import os
- class Service:
- def __init__(self):
- self.x = 1
- def run(self):
- return self.x
- @decorator
- def decorated_func():
- return 42
- def plain_func():
- return 1
- `;
- test("captures class and function definitions (including decorated)", async () => {
- const ranges = await getASTFunctionRanges(PY_SAMPLE, "service.py");
- expect(ranges.length).toBeGreaterThanOrEqual(3);
- const types = ranges.map(r => r.type);
- expect(types.some(t => t === "ast:class")).toBe(true);
- expect(types.some(t => t === "ast:func" || t === "ast:decorated")).toBe(true);
- });
- test("decorated function range includes the decorator", async () => {
- const ranges = await getASTFunctionRanges(PY_SAMPLE, "service.py");
- const decorated = ranges.find(r => {
- const slice = PY_SAMPLE.slice(r.startIndex, r.endIndex);
- return slice.includes("decorated_func");
- });
- expect(decorated).toBeDefined();
- const slice = PY_SAMPLE.slice(decorated!.startIndex, decorated!.endIndex);
- expect(slice.trimStart().startsWith("@decorator")).toBe(true);
- });
- });
- describe("getASTFunctionRanges - error handling", () => {
- test("returns empty array for markdown", async () => {
- const ranges = await getASTFunctionRanges("# Hello", "README.md");
- expect(ranges).toEqual([]);
- });
- test("returns empty array for unknown extension", async () => {
- const ranges = await getASTFunctionRanges("noop", "notes.txt");
- expect(ranges).toEqual([]);
- });
- test("returns empty array for empty file", async () => {
- const ranges = await getASTFunctionRanges("", "empty.ts");
- expect(ranges).toEqual([]);
- });
- test("handles garbage input gracefully (non-throwing)", async () => {
- const ranges = await getASTFunctionRanges("function {{ broken !!", "broken.ts");
- expect(Array.isArray(ranges)).toBe(true);
- });
- test("returns empty array for content with no top-level units", async () => {
- const ranges = await getASTFunctionRanges("const x = 1;\nconst y = 2;\n", "vars.ts");
- // lexical_declaration only matches when value is arrow_function/function_expression
- expect(ranges).toEqual([]);
- });
- });
- // =============================================================================
- // Symbol Extraction Stub (Phase 2)
- // =============================================================================
- describe("extractSymbols", () => {
- test("returns empty array (Phase 2 stub)", () => {
- const symbols = extractSymbols("function foo() {}", "typescript", 0, 18);
- expect(symbols).toEqual([]);
- });
- });
|