ast.test.ts 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. /**
  2. * ast.test.ts - Tests for AST-aware chunking support
  3. *
  4. * Tests language detection, AST break point extraction for each
  5. * supported language, and graceful fallback on errors.
  6. */
  7. import { describe, test, expect } from "vitest";
  8. import { detectLanguage, getASTBreakPoints, getASTFunctionRanges, extractSymbols } from "../src/ast.js";
  9. import type { SupportedLanguage } from "../src/ast.js";
  10. // =============================================================================
  11. // Language Detection
  12. // =============================================================================
  13. describe("detectLanguage", () => {
  14. test("recognizes TypeScript extensions", () => {
  15. expect(detectLanguage("src/auth.ts")).toBe("typescript");
  16. expect(detectLanguage("src/auth.mts")).toBe("typescript");
  17. expect(detectLanguage("src/auth.cts")).toBe("typescript");
  18. });
  19. test("recognizes TSX extension", () => {
  20. expect(detectLanguage("src/App.tsx")).toBe("tsx");
  21. });
  22. test("recognizes JavaScript extensions", () => {
  23. expect(detectLanguage("src/util.js")).toBe("javascript");
  24. expect(detectLanguage("src/util.mjs")).toBe("javascript");
  25. expect(detectLanguage("src/util.cjs")).toBe("javascript");
  26. });
  27. test("recognizes JSX as tsx", () => {
  28. expect(detectLanguage("src/App.jsx")).toBe("tsx");
  29. });
  30. test("recognizes Python extension", () => {
  31. expect(detectLanguage("src/auth.py")).toBe("python");
  32. });
  33. test("recognizes Go extension", () => {
  34. expect(detectLanguage("src/auth.go")).toBe("go");
  35. });
  36. test("recognizes Rust extension", () => {
  37. expect(detectLanguage("src/auth.rs")).toBe("rust");
  38. });
  39. test("returns null for markdown", () => {
  40. expect(detectLanguage("docs/README.md")).toBeNull();
  41. });
  42. test("returns null for unknown extensions", () => {
  43. expect(detectLanguage("data/file.csv")).toBeNull();
  44. expect(detectLanguage("config.yaml")).toBeNull();
  45. expect(detectLanguage("Makefile")).toBeNull();
  46. });
  47. test("is case-insensitive for extensions", () => {
  48. expect(detectLanguage("src/Auth.TS")).toBe("typescript");
  49. expect(detectLanguage("src/Auth.PY")).toBe("python");
  50. });
  51. test("works with virtual qmd:// paths", () => {
  52. expect(detectLanguage("qmd://myproject/src/auth.ts")).toBe("typescript");
  53. expect(detectLanguage("qmd://docs/README.md")).toBeNull();
  54. });
  55. });
  56. // =============================================================================
  57. // AST Break Points - TypeScript
  58. // =============================================================================
  59. describe("getASTBreakPoints - TypeScript", () => {
  60. const TS_SAMPLE = `import { Database } from './db';
  61. import type { User } from './types';
  62. interface AuthConfig {
  63. secret: string;
  64. ttl: number;
  65. }
  66. type UserId = string;
  67. export class AuthService {
  68. constructor(private db: Database) {}
  69. async authenticate(user: User, token: string): Promise<boolean> {
  70. const session = await this.db.findSession(token);
  71. return session?.userId === user.id;
  72. }
  73. validateToken(token: string): boolean {
  74. return token.length === 64;
  75. }
  76. }
  77. export function hashPassword(password: string): string {
  78. return crypto.createHash('sha256').update(password).digest('hex');
  79. }
  80. `;
  81. test("produces break points at function, class, and import boundaries", async () => {
  82. const points = await getASTBreakPoints(TS_SAMPLE, "src/auth.ts");
  83. expect(points.length).toBeGreaterThan(0);
  84. // Should have import, interface, type, class (via export), method, and function break points
  85. const types = points.map(p => p.type);
  86. expect(types.some(t => t.includes("import"))).toBe(true);
  87. expect(types.some(t => t.includes("iface"))).toBe(true);
  88. expect(types.some(t => t.includes("type"))).toBe(true);
  89. expect(types.some(t => t.includes("export") || t.includes("class"))).toBe(true);
  90. expect(types.some(t => t.includes("method"))).toBe(true);
  91. });
  92. test("break points are sorted by position", async () => {
  93. const points = await getASTBreakPoints(TS_SAMPLE, "src/auth.ts");
  94. for (let i = 1; i < points.length; i++) {
  95. expect(points[i]!.pos).toBeGreaterThanOrEqual(points[i - 1]!.pos);
  96. }
  97. });
  98. test("scores align with expected hierarchy", async () => {
  99. const points = await getASTBreakPoints(TS_SAMPLE, "src/auth.ts");
  100. // Class/interface should score 100
  101. const ifacePoint = points.find(p => p.type === "ast:iface");
  102. expect(ifacePoint?.score).toBe(100);
  103. // Function/method should score 90
  104. const methodPoint = points.find(p => p.type === "ast:method");
  105. expect(methodPoint?.score).toBe(90);
  106. // Import should score 60
  107. const importPoint = points.find(p => p.type === "ast:import");
  108. expect(importPoint?.score).toBe(60);
  109. });
  110. test("break point positions match actual content positions", async () => {
  111. const points = await getASTBreakPoints(TS_SAMPLE, "src/auth.ts");
  112. // First import should be at position 0
  113. const firstImport = points.find(p => p.type === "ast:import");
  114. expect(firstImport).toBeDefined();
  115. expect(TS_SAMPLE.slice(firstImport!.pos, firstImport!.pos + 6)).toBe("import");
  116. });
  117. });
  118. // =============================================================================
  119. // AST Break Points - Python
  120. // =============================================================================
  121. describe("getASTBreakPoints - Python", () => {
  122. const PY_SAMPLE = `import os
  123. from typing import Optional
  124. class AuthService:
  125. def __init__(self, db):
  126. self.db = db
  127. async def authenticate(self, user, token):
  128. session = await self.db.find(token)
  129. return session.user_id == user.id
  130. def validate_token(self, token):
  131. return len(token) == 64
  132. def hash_password(password: str) -> str:
  133. return hashlib.sha256(password.encode()).hexdigest()
  134. @decorator
  135. def decorated_func():
  136. pass
  137. `;
  138. test("produces break points for class, function, import, and decorated definitions", async () => {
  139. const points = await getASTBreakPoints(PY_SAMPLE, "auth.py");
  140. const types = points.map(p => p.type);
  141. expect(types.some(t => t.includes("import"))).toBe(true);
  142. expect(types.some(t => t.includes("class"))).toBe(true);
  143. expect(types.some(t => t.includes("func"))).toBe(true);
  144. expect(types.some(t => t.includes("decorated"))).toBe(true);
  145. });
  146. test("captures method definitions inside classes", async () => {
  147. const points = await getASTBreakPoints(PY_SAMPLE, "auth.py");
  148. // Should capture __init__, authenticate, and validate_token as func
  149. const funcPoints = points.filter(p => p.type === "ast:func");
  150. expect(funcPoints.length).toBeGreaterThanOrEqual(3);
  151. });
  152. });
  153. // =============================================================================
  154. // AST Break Points - Go
  155. // =============================================================================
  156. describe("getASTBreakPoints - Go", () => {
  157. const GO_SAMPLE = `package main
  158. import "fmt"
  159. type AuthService struct {
  160. db *Database
  161. }
  162. func (s *AuthService) Authenticate(user User) bool {
  163. return true
  164. }
  165. func HashPassword(password string) string {
  166. return "hash"
  167. }
  168. `;
  169. test("produces break points for type, function, method, and import", async () => {
  170. const points = await getASTBreakPoints(GO_SAMPLE, "auth.go");
  171. const types = points.map(p => p.type);
  172. expect(types.some(t => t.includes("import"))).toBe(true);
  173. expect(types.some(t => t.includes("type"))).toBe(true);
  174. expect(types.some(t => t.includes("method"))).toBe(true);
  175. expect(types.some(t => t.includes("func"))).toBe(true);
  176. });
  177. test("function and method both score 90", async () => {
  178. const points = await getASTBreakPoints(GO_SAMPLE, "auth.go");
  179. const funcPoint = points.find(p => p.type === "ast:func");
  180. const methodPoint = points.find(p => p.type === "ast:method");
  181. expect(funcPoint?.score).toBe(90);
  182. expect(methodPoint?.score).toBe(90);
  183. });
  184. });
  185. // =============================================================================
  186. // AST Break Points - Rust
  187. // =============================================================================
  188. describe("getASTBreakPoints - Rust", () => {
  189. const RS_SAMPLE = `use std::collections::HashMap;
  190. struct AuthService {
  191. db: Database,
  192. }
  193. impl AuthService {
  194. fn authenticate(&self, user: &User) -> bool {
  195. true
  196. }
  197. }
  198. trait Authenticatable {
  199. fn validate(&self) -> bool;
  200. }
  201. enum Role {
  202. Admin,
  203. User,
  204. }
  205. fn hash_password(password: &str) -> String {
  206. String::new()
  207. }
  208. `;
  209. test("produces break points for struct, impl, trait, enum, function, and use", async () => {
  210. const points = await getASTBreakPoints(RS_SAMPLE, "auth.rs");
  211. const types = points.map(p => p.type);
  212. expect(types.some(t => t.includes("import"))).toBe(true); // use_declaration -> @import
  213. expect(types.some(t => t.includes("struct"))).toBe(true);
  214. expect(types.some(t => t.includes("impl"))).toBe(true);
  215. expect(types.some(t => t.includes("trait"))).toBe(true);
  216. expect(types.some(t => t.includes("enum"))).toBe(true);
  217. expect(types.some(t => t.includes("func"))).toBe(true);
  218. });
  219. test("struct, impl, and trait all score 100", async () => {
  220. const points = await getASTBreakPoints(RS_SAMPLE, "auth.rs");
  221. const structPoint = points.find(p => p.type === "ast:struct");
  222. const implPoint = points.find(p => p.type === "ast:impl");
  223. const traitPoint = points.find(p => p.type === "ast:trait");
  224. expect(structPoint?.score).toBe(100);
  225. expect(implPoint?.score).toBe(100);
  226. expect(traitPoint?.score).toBe(100);
  227. });
  228. });
  229. // =============================================================================
  230. // Error Handling & Fallback
  231. // =============================================================================
  232. describe("getASTBreakPoints - error handling", () => {
  233. test("returns empty array for unsupported file types", async () => {
  234. const points = await getASTBreakPoints("# Hello World", "readme.md");
  235. expect(points).toEqual([]);
  236. });
  237. test("returns empty array for unknown extensions", async () => {
  238. const points = await getASTBreakPoints("data,here", "file.csv");
  239. expect(points).toEqual([]);
  240. });
  241. test("handles empty content gracefully", async () => {
  242. const points = await getASTBreakPoints("", "empty.ts");
  243. expect(points).toEqual([]);
  244. });
  245. test("handles syntactically invalid code gracefully", async () => {
  246. // Tree-sitter is error-tolerant, so this should still parse (with error nodes)
  247. // but should not crash
  248. const points = await getASTBreakPoints("function { broken syntax %%%", "broken.ts");
  249. // Should either return some partial break points or empty array — not throw
  250. expect(Array.isArray(points)).toBe(true);
  251. });
  252. });
  253. // =============================================================================
  254. // Function-Level Range Extraction (Phase 2)
  255. // =============================================================================
  256. describe("getASTFunctionRanges - TypeScript", () => {
  257. const TS_SAMPLE = `import { Database } from './db';
  258. interface Config {
  259. secret: string;
  260. }
  261. type UserId = string;
  262. export class Service {
  263. constructor(private db: Database) {}
  264. async fetch(id: UserId): Promise<string> {
  265. return this.db.get(id);
  266. }
  267. parse(raw: string): string {
  268. return raw.trim();
  269. }
  270. }
  271. export function helper(x: string): string {
  272. return x.toUpperCase();
  273. }
  274. const arrow = (n: number): number => n + 1;
  275. `;
  276. test("returns one range per top-level code unit", async () => {
  277. const ranges = await getASTFunctionRanges(TS_SAMPLE, "src/service.ts");
  278. // interface, type, export class, export function, const arrow = 5 ranges
  279. // (the methods inside the class are absorbed by the class range)
  280. expect(ranges.length).toBeGreaterThanOrEqual(4);
  281. });
  282. test("ranges are sorted by startIndex", async () => {
  283. const ranges = await getASTFunctionRanges(TS_SAMPLE, "src/service.ts");
  284. for (let i = 1; i < ranges.length; i++) {
  285. expect(ranges[i]!.startIndex).toBeGreaterThanOrEqual(ranges[i - 1]!.startIndex);
  286. }
  287. });
  288. test("ranges do not overlap", async () => {
  289. const ranges = await getASTFunctionRanges(TS_SAMPLE, "src/service.ts");
  290. for (let i = 1; i < ranges.length; i++) {
  291. expect(ranges[i]!.startIndex).toBeGreaterThanOrEqual(ranges[i - 1]!.endIndex);
  292. }
  293. });
  294. test("each range slice is non-empty and starts at a recognizable token", async () => {
  295. const ranges = await getASTFunctionRanges(TS_SAMPLE, "src/service.ts");
  296. for (const r of ranges) {
  297. const slice = TS_SAMPLE.slice(r.startIndex, r.endIndex);
  298. expect(slice.length).toBeGreaterThan(0);
  299. expect(/^(export|class|interface|type|function|const)\b/.test(slice.trimStart())).toBe(true);
  300. }
  301. });
  302. test("export class range is captured as one unit (not split into methods)", async () => {
  303. const ranges = await getASTFunctionRanges(TS_SAMPLE, "src/service.ts");
  304. const classRange = ranges.find(r => {
  305. const slice = TS_SAMPLE.slice(r.startIndex, r.endIndex);
  306. return slice.includes("class Service") && slice.includes("parse(");
  307. });
  308. expect(classRange).toBeDefined();
  309. });
  310. });
  311. describe("getASTFunctionRanges - Python", () => {
  312. const PY_SAMPLE = `import os
  313. class Service:
  314. def __init__(self):
  315. self.x = 1
  316. def run(self):
  317. return self.x
  318. @decorator
  319. def decorated_func():
  320. return 42
  321. def plain_func():
  322. return 1
  323. `;
  324. test("captures class and function definitions (including decorated)", async () => {
  325. const ranges = await getASTFunctionRanges(PY_SAMPLE, "service.py");
  326. expect(ranges.length).toBeGreaterThanOrEqual(3);
  327. const types = ranges.map(r => r.type);
  328. expect(types.some(t => t === "ast:class")).toBe(true);
  329. expect(types.some(t => t === "ast:func" || t === "ast:decorated")).toBe(true);
  330. });
  331. test("decorated function range includes the decorator", async () => {
  332. const ranges = await getASTFunctionRanges(PY_SAMPLE, "service.py");
  333. const decorated = ranges.find(r => {
  334. const slice = PY_SAMPLE.slice(r.startIndex, r.endIndex);
  335. return slice.includes("decorated_func");
  336. });
  337. expect(decorated).toBeDefined();
  338. const slice = PY_SAMPLE.slice(decorated!.startIndex, decorated!.endIndex);
  339. expect(slice.trimStart().startsWith("@decorator")).toBe(true);
  340. });
  341. });
  342. describe("getASTFunctionRanges - error handling", () => {
  343. test("returns empty array for markdown", async () => {
  344. const ranges = await getASTFunctionRanges("# Hello", "README.md");
  345. expect(ranges).toEqual([]);
  346. });
  347. test("returns empty array for unknown extension", async () => {
  348. const ranges = await getASTFunctionRanges("noop", "notes.txt");
  349. expect(ranges).toEqual([]);
  350. });
  351. test("returns empty array for empty file", async () => {
  352. const ranges = await getASTFunctionRanges("", "empty.ts");
  353. expect(ranges).toEqual([]);
  354. });
  355. test("handles garbage input gracefully (non-throwing)", async () => {
  356. const ranges = await getASTFunctionRanges("function {{ broken !!", "broken.ts");
  357. expect(Array.isArray(ranges)).toBe(true);
  358. });
  359. test("returns empty array for content with no top-level units", async () => {
  360. const ranges = await getASTFunctionRanges("const x = 1;\nconst y = 2;\n", "vars.ts");
  361. // lexical_declaration only matches when value is arrow_function/function_expression
  362. expect(ranges).toEqual([]);
  363. });
  364. });
  365. // =============================================================================
  366. // Symbol Extraction Stub (Phase 2)
  367. // =============================================================================
  368. describe("extractSymbols", () => {
  369. test("returns empty array (Phase 2 stub)", () => {
  370. const symbols = extractSymbols("function foo() {}", "typescript", 0, 18);
  371. expect(symbols).toEqual([]);
  372. });
  373. });