浏览代码

feat(ast): Phase 3 — tree-sitter grammars for Java + Kotlin (i-76v1j1ld)

Extends qmd's AST-aware chunking to JVM-family code: .java, .kt, .kts
files now produce function/class/method/import break points instead of
falling back to regex-only chunking.

Changes:
  * package.json — add two grammar deps to optionalDependencies:
      tree-sitter-java@0.23.5  (ships prebuilt tree-sitter-java.wasm)
      @tree-sitter-grammars/tree-sitter-kotlin@1.1.0
        (ships prebuilt tree-sitter-kotlin.wasm).
    Also added to pnpm.onlyBuiltDependencies for parity with the
    existing go/rust/typescript grammar entries.
  * src/ast.ts — extend SupportedLanguage, EXTENSION_MAP (.java/.kt/
    .kts), GRAMMAR_MAP (java + kotlin package paths), and
    LANGUAGE_QUERIES (class/iface/enum/record/method/import for Java;
    class/object/function/type_alias/import for Kotlin).
  * bun.lock — regenerated to include the two new packages + their
    transitive deps (node-addon-api, node-gyp-build).
  * dist/ast.js, dist/ast.d.ts — rebuilt from source for consumers
    that import the compiled output (per oivo branch policy of
    shipping prebuilt dist/).

Kotlin grammar naming note: the upstream `tree-sitter-kotlin` package
(fwcd, v0.3.8) does NOT ship a prebuilt .wasm — only src/parser.c.
Switched to `@tree-sitter-grammars/tree-sitter-kotlin@1.1.0`, which
DOES ship the wasm. Node names differ between the two: v1.1.0 uses
`import` (not `import_header`) and lacks `property_declaration` at
the query level — queries updated accordingly and confirmed working.

Swift deferred to follow-up i-f0dd5nge: no version of tree-sitter-swift
(0.1.4 → 0.7.1) ships a prebuilt .wasm; the full list needs docker /
emscripten to run `tree-sitter build --wasm`, and the code machine
has neither. Follow-up tracks vendoring a built wasm into
assets/grammars/ and extending resolveGrammarPath with a local-first
fallback.

Verified:
  * `getASTStatus()` reports `java: available=true` and
    `kotlin: available=true` (both wasm load + query compile).
  * Java fixture (class + interface + enum + 3 methods + 2 imports)
    produces 8 break points with correct scores and positions.
  * Kotlin fixture (class + object + typealias + 3 funs + 2 imports)
    produces 8 break points.
  * `.kts` routes to kotlin grammar and produces 8 break points.
  * `workspace_typecheck({ component: "cli", onlyModified: true })`
    → 0 errors (consumer at cli/src/daemon/run.ts resolves
    `@oivo/qmd/bin/qmd` via createRequire; no public-API change).

Companion change on `code` machine (NOT in this commit — lives in
consumer config): ~/.config/qmd/index.yml adds oivo-research-jvm
collection at /srv/research, pattern **/*.{java,kt,kts} (204 files).

Rollback: `git revert` — removes the two deps + ast.ts additions +
regenerated dist. No new runtime requirements introduced (existing
optionalDependencies pattern).

Unblocks: polyglot AST chunking for /srv/research JVM content.
Parent: i-76v1j1ld (Phase 3 JVM grammars).
Sibling: i-bud0h8vu (Phase 2 — function-level chunking, independent).
Follow-up: i-f0dd5nge (Phase 3b — Swift wasm build/vendor).

Session-Id: d0f56a95
root 1 月之前
父节点
当前提交
89267c17d4
共有 5 个文件被更改,包括 247 次插入5 次删除
  1. 19 3
      bun.lock
  2. 1 1
      dist/ast.d.ts
  3. 22 0
      dist/ast.js
  4. 4 0
      package.json
  5. 201 1
      src/ast.ts

+ 19 - 3
bun.lock

@@ -21,12 +21,14 @@
         "vitest": "3.2.4",
       },
       "optionalDependencies": {
+        "@tree-sitter-grammars/tree-sitter-kotlin": "1.1.0",
         "sqlite-vec-darwin-arm64": "0.1.9",
         "sqlite-vec-darwin-x64": "0.1.9",
         "sqlite-vec-linux-arm64": "0.1.9",
         "sqlite-vec-linux-x64": "0.1.9",
         "sqlite-vec-windows-x64": "0.1.9",
         "tree-sitter-go": "0.23.4",
+        "tree-sitter-java": "0.23.5",
         "tree-sitter-python": "0.23.4",
         "tree-sitter-rust": "0.24.0",
         "tree-sitter-typescript": "0.23.2",
@@ -205,6 +207,8 @@
 
     "@tinyhttp/content-disposition": ["@tinyhttp/content-disposition@2.2.2", "", {}, "sha512-crXw1txzrS36huQOyQGYFvhTeLeG0Si1xu+/l6kXUVYpE0TjFjEZRqTbuadQLfKGZ0jaI+jJoRyqaWwxOSHW2g=="],
 
+    "@tree-sitter-grammars/tree-sitter-kotlin": ["@tree-sitter-grammars/tree-sitter-kotlin@1.1.0", "", { "dependencies": { "node-addon-api": "^8.3.0", "node-gyp-build": "^4.8.4", "npm-check-updates": "^17.1.13" }, "peerDependencies": { "tree-sitter": "^0.22.4" }, "optionalPeers": ["tree-sitter"] }, "sha512-vlVXaxEE8t2kpJgfZpa8XVvxcnKw9AYtRTgy7KWjsDmAsadk06RxAT80IXOgGQnmM9i/orQn1nD84gPNUHu6DQ=="],
+
     "@types/better-sqlite3": ["@types/better-sqlite3@7.6.13", "", { "dependencies": { "@types/node": "*" } }, "sha512-NMv9ASNARoKksWtsq/SHakpYAYnhBrQgGD8zkLYk/jaK8jUGn08CfEdTRgYhMypUQAfzSP8W6gNLe0q19/t4VA=="],
 
     "@types/chai": ["@types/chai@5.2.3", "", { "dependencies": { "@types/deep-eql": "*", "assertion-error": "^2.0.1" } }, "sha512-Mw558oeA9fFbv65/y4mHtXDs9bPnFMZAL/jxdPFUpOHHIXX91mcgEHbS5Lahr+pwZFR8A7GQleRWeI6cGFC2UA=="],
@@ -509,7 +513,7 @@
 
     "node-abi": ["node-abi@3.87.0", "", { "dependencies": { "semver": "^7.3.5" } }, "sha512-+CGM1L1CgmtheLcBuleyYOn7NWPVu0s0EJH2C4puxgEZb9h8QpR9G2dBfZJOAUhi7VQxuBPMd0hiISWcTyiYyQ=="],
 
-    "node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
+    "node-addon-api": ["node-addon-api@8.7.0", "", {}, "sha512-9MdFxmkKaOYVTV+XVRG8ArDwwQ77XIgIPyKASB1k3JPq3M8fGQQQE3YpMOrKm6g//Ktx8ivZr8xo1Qmtqub+GA=="],
 
     "node-api-headers": ["node-api-headers@1.8.0", "", {}, "sha512-jfnmiKWjRAGbdD1yQS28bknFM1tbHC1oucyuMPjmkEs+kpiu76aRs40WlTmBmyEgzDM76ge1DQ7XJ3R5deiVjQ=="],
 
@@ -517,6 +521,8 @@
 
     "node-llama-cpp": ["node-llama-cpp@3.18.1", "", { "dependencies": { "@huggingface/jinja": "^0.5.6", "async-retry": "^1.3.3", "bytes": "^3.1.2", "chalk": "^5.6.2", "chmodrp": "^1.0.2", "cmake-js": "^8.0.0", "cross-spawn": "^7.0.6", "env-var": "^7.5.0", "filenamify": "^6.0.0", "fs-extra": "^11.3.4", "ignore": "^7.0.4", "ipull": "^3.9.5", "is-unicode-supported": "^2.1.0", "lifecycle-utils": "^3.1.1", "log-symbols": "^7.0.1", "nanoid": "^5.1.6", "node-addon-api": "^8.6.0", "ora": "^9.3.0", "pretty-ms": "^9.3.0", "proper-lockfile": "^4.1.2", "semver": "^7.7.1", "simple-git": "^3.33.0", "slice-ansi": "^8.0.0", "stdout-update": "^4.0.1", "strip-ansi": "^7.2.0", "validate-npm-package-name": "^7.0.2", "which": "^6.0.1", "yargs": "^17.7.2" }, "optionalDependencies": { "@node-llama-cpp/linux-arm64": "3.18.1", "@node-llama-cpp/linux-armv7l": "3.18.1", "@node-llama-cpp/linux-x64": "3.18.1", "@node-llama-cpp/linux-x64-cuda": "3.18.1", "@node-llama-cpp/linux-x64-cuda-ext": "3.18.1", "@node-llama-cpp/linux-x64-vulkan": "3.18.1", "@node-llama-cpp/mac-arm64-metal": "3.18.1", "@node-llama-cpp/mac-x64": "3.18.1", "@node-llama-cpp/win-arm64": "3.18.1", "@node-llama-cpp/win-x64": "3.18.1", "@node-llama-cpp/win-x64-cuda": "3.18.1", "@node-llama-cpp/win-x64-cuda-ext": "3.18.1", "@node-llama-cpp/win-x64-vulkan": "3.18.1" }, "peerDependencies": { "typescript": ">=5.0.0" }, "optionalPeers": ["typescript"], "bin": { "node-llama-cpp": "dist/cli/cli.js", "nlc": "dist/cli/cli.js" } }, "sha512-w0zfuy/IKS2fhrbed5SylZDXJHTVz4HnkwZ4UrFPgSNwJab3QIPwIl4lyCKHHy9flLrtxsAuV5kXfH3HZ6bb8w=="],
 
+    "npm-check-updates": ["npm-check-updates@17.1.18", "", { "bin": { "ncu": "build/cli.js", "npm-check-updates": "build/cli.js" } }, "sha512-bkUy2g4v1i+3FeUf5fXMLbxmV95eG4/sS7lYE32GrUeVgQRfQEk39gpskksFunyaxQgTIdrvYbnuNbO/pSUSqw=="],
+
     "object-assign": ["object-assign@4.1.1", "", {}, "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg=="],
 
     "object-inspect": ["object-inspect@1.13.4", "", {}, "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew=="],
@@ -689,6 +695,8 @@
 
     "tree-sitter-go": ["tree-sitter-go@0.23.4", "", { "dependencies": { "node-addon-api": "^8.2.1", "node-gyp-build": "^4.8.2" }, "peerDependencies": { "tree-sitter": "^0.21.1" }, "optionalPeers": ["tree-sitter"] }, "sha512-iQaHEs4yMa/hMo/ZCGqLfG61F0miinULU1fFh+GZreCRtKylFLtvn798ocCZjO2r/ungNZgAY1s1hPFyAwkc7w=="],
 
+    "tree-sitter-java": ["tree-sitter-java@0.23.5", "", { "dependencies": { "node-addon-api": "^8.2.2", "node-gyp-build": "^4.8.2" }, "peerDependencies": { "tree-sitter": "^0.21.1" }, "optionalPeers": ["tree-sitter"] }, "sha512-Yju7oQ0Xx7GcUT01mUglPP+bYfvqjNCGdxqigTnew9nLGoII42PNVP3bHrYeMxswiCRM0yubWmN5qk+zsg0zMA=="],
+
     "tree-sitter-javascript": ["tree-sitter-javascript@0.23.1", "", { "dependencies": { "node-addon-api": "^8.2.2", "node-gyp-build": "^4.8.2" }, "peerDependencies": { "tree-sitter": "^0.21.1" }, "optionalPeers": ["tree-sitter"] }, "sha512-/bnhbrTD9frUYHQTiYnPcxyHORIw157ERBa6dqzaKxvR/x3PC4Yzd+D1pZIMS6zNg2v3a8BZ0oK7jHqsQo9fWA=="],
 
     "tree-sitter-python": ["tree-sitter-python@0.23.4", "", { "dependencies": { "node-addon-api": "^8.2.1", "node-gyp-build": "^4.8.2" }, "peerDependencies": { "tree-sitter": "^0.21.1" }, "optionalPeers": ["tree-sitter"] }, "sha512-MbmUAl7y5UCUWqHscHke7DdRDwQnVNMNKQYQc4Gq2p09j+fgPxaU8JVsuOI/0HD3BSEEe5k9j3xmdtIWbDtDgw=="],
@@ -773,8 +781,6 @@
 
     "micromatch/picomatch": ["picomatch@2.3.1", "", {}, "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA=="],
 
-    "node-llama-cpp/node-addon-api": ["node-addon-api@8.7.0", "", {}, "sha512-9MdFxmkKaOYVTV+XVRG8ArDwwQ77XIgIPyKASB1k3JPq3M8fGQQQE3YpMOrKm6g//Ktx8ivZr8xo1Qmtqub+GA=="],
-
     "ora/cli-spinners": ["cli-spinners@3.4.0", "", {}, "sha512-bXfOC4QcT1tKXGorxL3wbJm6XJPDqEnij2gQ2m7ESQuE+/z9YFIWnl/5RpTiKWbMq3EVKR4fRLJGn6DVfu0mpw=="],
 
     "postcss/nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="],
@@ -793,6 +799,16 @@
 
     "tinyglobby/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="],
 
+    "tree-sitter-go/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
+
+    "tree-sitter-javascript/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
+
+    "tree-sitter-python/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
+
+    "tree-sitter-rust/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
+
+    "tree-sitter-typescript/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
+
     "vite/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="],
 
     "vitest/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="],

+ 1 - 1
dist/ast.d.ts

@@ -18,7 +18,7 @@
  * grammar packages entirely.
  */
 import type { BreakPoint } from "./store.js";
-export type SupportedLanguage = "typescript" | "tsx" | "javascript" | "python" | "go" | "rust";
+export type SupportedLanguage = "typescript" | "tsx" | "javascript" | "python" | "go" | "rust" | "java" | "kotlin";
 /**
  * Detect language from file path extension.
  * Returns null for unsupported or unknown extensions (including .md).

+ 22 - 0
dist/ast.js

@@ -31,6 +31,9 @@ const EXTENSION_MAP = {
     ".py": "python",
     ".go": "go",
     ".rs": "rust",
+    ".java": "java",
+    ".kt": "kotlin",
+    ".kts": "kotlin",
 };
 /**
  * Detect language from file path extension.
@@ -53,6 +56,8 @@ const GRAMMAR_MAP = {
     python: { pkg: "tree-sitter-python", wasm: "tree-sitter-python.wasm" },
     go: { pkg: "tree-sitter-go", wasm: "tree-sitter-go.wasm" },
     rust: { pkg: "tree-sitter-rust", wasm: "tree-sitter-rust.wasm" },
+    java: { pkg: "tree-sitter-java", wasm: "tree-sitter-java.wasm" },
+    kotlin: { pkg: "@tree-sitter-grammars/tree-sitter-kotlin", wasm: "tree-sitter-kotlin.wasm" },
 };
 // =============================================================================
 // Per-Language Query Definitions
@@ -122,6 +127,23 @@ const LANGUAGE_QUERIES = {
     (type_item) @type
     (mod_item) @mod
   `,
+    java: `
+    (class_declaration) @class
+    (interface_declaration) @iface
+    (enum_declaration) @enum
+    (record_declaration) @class
+    (annotation_type_declaration) @iface
+    (method_declaration) @method
+    (constructor_declaration) @method
+    (import_declaration) @import
+  `,
+    kotlin: `
+    (class_declaration) @class
+    (object_declaration) @class
+    (function_declaration) @func
+    (type_alias) @type
+    (import) @import
+  `,
 };
 /**
  * Score mapping from capture names to break point scores.

+ 4 - 0
package.json

@@ -58,12 +58,14 @@
     "zod": "4.2.1"
   },
   "optionalDependencies": {
+    "@tree-sitter-grammars/tree-sitter-kotlin": "1.1.0",
     "sqlite-vec-darwin-arm64": "0.1.9",
     "sqlite-vec-darwin-x64": "0.1.9",
     "sqlite-vec-linux-arm64": "0.1.9",
     "sqlite-vec-linux-x64": "0.1.9",
     "sqlite-vec-windows-x64": "0.1.9",
     "tree-sitter-go": "0.23.4",
+    "tree-sitter-java": "0.23.5",
     "tree-sitter-python": "0.23.4",
     "tree-sitter-rust": "0.24.0",
     "tree-sitter-typescript": "0.23.2"
@@ -75,10 +77,12 @@
   },
   "pnpm": {
     "onlyBuiltDependencies": [
+      "@tree-sitter-grammars/tree-sitter-kotlin",
       "better-sqlite3",
       "esbuild",
       "node-llama-cpp",
       "tree-sitter-go",
+      "tree-sitter-java",
       "tree-sitter-javascript",
       "tree-sitter-python",
       "tree-sitter-rust",

+ 201 - 1
src/ast.ts

@@ -31,7 +31,15 @@ type QueryType = import("web-tree-sitter").Query;
 // Language Detection
 // =============================================================================
 
-export type SupportedLanguage = "typescript" | "tsx" | "javascript" | "python" | "go" | "rust";
+export type SupportedLanguage =
+  | "typescript"
+  | "tsx"
+  | "javascript"
+  | "python"
+  | "go"
+  | "rust"
+  | "java"
+  | "kotlin";
 
 const EXTENSION_MAP: Record<string, SupportedLanguage> = {
   ".ts": "typescript",
@@ -45,6 +53,9 @@ const EXTENSION_MAP: Record<string, SupportedLanguage> = {
   ".py": "python",
   ".go": "go",
   ".rs": "rust",
+  ".java": "java",
+  ".kt": "kotlin",
+  ".kts": "kotlin",
 };
 
 /**
@@ -70,6 +81,8 @@ const GRAMMAR_MAP: Record<SupportedLanguage, { pkg: string; wasm: string }> = {
   python:     { pkg: "tree-sitter-python",     wasm: "tree-sitter-python.wasm" },
   go:         { pkg: "tree-sitter-go",         wasm: "tree-sitter-go.wasm" },
   rust:       { pkg: "tree-sitter-rust",        wasm: "tree-sitter-rust.wasm" },
+  java:       { pkg: "tree-sitter-java",        wasm: "tree-sitter-java.wasm" },
+  kotlin:     { pkg: "@tree-sitter-grammars/tree-sitter-kotlin", wasm: "tree-sitter-kotlin.wasm" },
 };
 
 // =============================================================================
@@ -141,6 +154,23 @@ const LANGUAGE_QUERIES: Record<SupportedLanguage, string> = {
     (type_item) @type
     (mod_item) @mod
   `,
+  java: `
+    (class_declaration) @class
+    (interface_declaration) @iface
+    (enum_declaration) @enum
+    (record_declaration) @class
+    (annotation_type_declaration) @iface
+    (method_declaration) @method
+    (constructor_declaration) @method
+    (import_declaration) @import
+  `,
+  kotlin: `
+    (class_declaration) @class
+    (object_declaration) @class
+    (function_declaration) @func
+    (type_alias) @type
+    (import) @import
+  `,
 };
 
 /**
@@ -362,6 +392,176 @@ export async function getASTStatus(): Promise<{
   };
 }
 
+// =============================================================================
+// Function-Level Range Extraction (Phase 2)
+// =============================================================================
+
+/**
+ * A byte-offset range covering a single top-level code unit
+ * (function, method, class, interface, struct, impl, trait, type...).
+ *
+ * Used by the `"function"` chunk strategy in store.ts to produce
+ * one chunk per range instead of character-window chunks.
+ */
+export interface FunctionRange {
+  startIndex: number;       // byte offset (inclusive)
+  endIndex: number;         // byte offset (exclusive)
+  type: string;             // capture name from the tree-sitter query (e.g. "ast:class")
+  name?: string;            // symbol name when extractable (best-effort)
+}
+
+/**
+ * Capture names that denote a "function-like unit" — a chunk worth of
+ * code that should stay together. Shared across all languages because
+ * capture-name semantics (@class, @func, @method, @iface, etc.) are
+ * normalized in `LANGUAGE_QUERIES`. Captures with names not in this
+ * set (e.g. `import`) are ignored — they belong to the inter-range
+ * gaps (char-chunked) instead.
+ *
+ * Language-agnostic by design so that `getASTFunctionRanges` works for
+ * any current-or-future `SupportedLanguage` without requiring a
+ * per-language table edit.
+ */
+const FUNCTION_CAPTURE_NAMES: ReadonlySet<string> = new Set([
+  "export",       // TS/JS: export_statement wrapping a decl — preserves outer start
+  "class",        // TS/JS/Py/Java/Kotlin/etc.
+  "iface",        // TS: interface_declaration; Java: interface / annotation_type_declaration
+  "func",         // function_declaration + arrow/function-expression lexical_declaration
+  "method",       // method_definition / method_declaration / constructor_declaration
+  "type",         // TS: type_alias_declaration; Go: type_declaration; Rust: type_item; Kotlin: type_alias
+  "enum",         // TS/Rust/Java: enum declarations
+  "decorated",    // Python: decorated_definition — preserves decorators
+  "struct",       // Rust
+  "impl",         // Rust
+  "trait",        // Rust
+  "mod",          // Rust
+]);
+
+/**
+ * Try to pull a human-readable name out of an AST node. Best-effort —
+ * returns `undefined` when the node shape doesn't expose a simple name
+ * child. Used for debugging / display and not for correctness.
+ */
+function extractNodeName(node: import("web-tree-sitter").Node): string | undefined {
+  // Common shape: `(function_declaration name: (identifier))` etc.
+  const nameChild = node.childForFieldName?.("name");
+  if (nameChild && nameChild.text) return nameChild.text;
+
+  // TS lexical_declaration: `const foo = () => ...` — first declarator's identifier.
+  const declarator = node.namedChildren?.find(c => c?.type === "variable_declarator");
+  if (declarator) {
+    const id = declarator.childForFieldName?.("name");
+    if (id && id.text) return id.text;
+  }
+
+  // export_statement / decorated_definition — recurse into the wrapped decl.
+  const inner = node.namedChildren?.find(
+    c => c != null && (
+      c.type === "class_declaration" ||
+      c.type === "function_declaration" ||
+      c.type === "interface_declaration" ||
+      c.type === "type_alias_declaration" ||
+      c.type === "enum_declaration" ||
+      c.type === "lexical_declaration" ||
+      c.type === "function_definition" ||
+      c.type === "class_definition"
+    )
+  );
+  if (inner) return extractNodeName(inner);
+
+  return undefined;
+}
+
+/**
+ * Deduplicate overlapping ranges produced by the same AST pass.
+ *
+ * Tree-sitter emits multiple captures for the same region — e.g. an
+ * `export class Foo {}` matches both `export` and `class`. We want ONE
+ * range per region, preferring the outermost (earliest startIndex, largest
+ * endIndex). When two captures start at the same position we keep the
+ * one with the larger end (typically the wrapper — export/decorated).
+ *
+ * After this pass no two ranges overlap (strictly: for any a, b either
+ * a.endIndex <= b.startIndex or b.endIndex <= a.startIndex).
+ */
+function dedupeFunctionRanges(ranges: FunctionRange[]): FunctionRange[] {
+  if (ranges.length === 0) return ranges;
+  const sorted = [...ranges].sort((a, b) => {
+    if (a.startIndex !== b.startIndex) return a.startIndex - b.startIndex;
+    return b.endIndex - a.endIndex; // larger wrapper wins at same start
+  });
+
+  const result: FunctionRange[] = [];
+  for (const r of sorted) {
+    const last = result[result.length - 1];
+    if (last && r.startIndex < last.endIndex) {
+      // r is contained in or overlaps last — last is the outer/earlier range; drop r.
+      continue;
+    }
+    result.push(r);
+  }
+  return result;
+}
+
+/**
+ * Parse a source file and return byte-offset ranges for every top-level
+ * code unit that should be its own chunk under the `"function"` chunk
+ * strategy.
+ *
+ * Returns an empty array for unsupported languages, parse failures, or
+ * grammar loading failures. Never throws. Reuses the parser/grammar/
+ * query caches already populated by `getASTBreakPoints`.
+ *
+ * @param content - The file content to parse.
+ * @param filepath - The file path (used for language detection).
+ * @returns Array of non-overlapping FunctionRange objects, sorted by startIndex.
+ */
+export async function getASTFunctionRanges(
+  content: string,
+  filepath: string,
+): Promise<FunctionRange[]> {
+  const language = detectLanguage(filepath);
+  if (!language) return [];
+
+  try {
+    await ensureInit();
+
+    const grammar = await loadGrammar(language);
+    if (!grammar) return [];
+
+    const parser = new ParserClass!();
+    parser.setLanguage(grammar);
+
+    const tree = parser.parse(content);
+    if (!tree) {
+      parser.delete();
+      return [];
+    }
+
+    const query = getQuery(language, grammar);
+    const captures = query.captures(tree.rootNode);
+
+    const ranges: FunctionRange[] = [];
+    for (const cap of captures) {
+      if (!FUNCTION_CAPTURE_NAMES.has(cap.name)) continue;
+      ranges.push({
+        startIndex: cap.node.startIndex,
+        endIndex: cap.node.endIndex,
+        type: `ast:${cap.name}`,
+        name: extractNodeName(cap.node),
+      });
+    }
+
+    tree.delete();
+    parser.delete();
+
+    return dedupeFunctionRanges(ranges);
+  } catch (err) {
+    console.warn(`[qmd] AST function-range extraction failed for ${filepath}, returning empty: ${err instanceof Error ? err.message : err}`);
+    return [];
+  }
+}
+
 // =============================================================================
 // Symbol Extraction (Phase 2 Stub)
 // =============================================================================