2 месяцев назад · c464952b1d
--- a/src/cli/qmd.ts
+++ b/src/cli/qmd.ts
@@ -1654,8 +1654,8 @@ function parseEmbedBatchOption(name: string, value: unknown): number | undefined
 
															 function parseChunkStrategy(value: unknown): ChunkStrategy | undefined {
														
 
															   if (value === undefined) return undefined;
														
 
															   const s = String(value);
														
 
															-  if (s === "auto" || s === "regex") return s;
														
 
															-  throw new Error(`--chunk-strategy must be "auto" or "regex" (got "${s}")`);
														
 
															+  if (s === "auto" || s === "regex" || s === "function") return s;
														
 
															+  throw new Error(`--chunk-strategy must be "auto", "regex", or "function" (got "${s}")`);
														
 
															 }
														
 
															 async function vectorIndex(
														
--- a/src/collections.ts
+++ b/src/collections.ts
@@ -9,6 +9,7 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
 
															 import { join, dirname } from "path";
														
 
															 import { homedir } from "os";
														
 
															 import YAML from "yaml";
														
 
															+import type { ChunkStrategy } from "./store.js";
														
 
															 // ============================================================================
														
 
															 // Types
														
@@ -31,6 +32,21 @@ export interface Collection {
 
															   context?: ContextMap;      // Optional context definitions
														
 
															   update?: string;           // Optional bash command to run during qmd update
														
 
															   includeByDefault?: boolean; // Include in queries by default (default: true)
														
 
															+  /**
														
 
															+   * Chunking strategy for this collection (Phase 2 — i-bud0h8vu). When
														
 
															+   * unset, qmd falls back to the global CLI `--chunk-strategy` flag.
														
 
															+   *
														
 
															+   *   - "auto"     — char-based chunks with AST break points as hints.
														
 
															+   *   - "regex"    — char-based chunks without AST hints (legacy).
														
 
															+   *   - "function" — one chunk per AST function/class/method range for
														
 
															+   *                  supported code files. Opt-in per collection; files
														
 
															+   *                  with zero detected ranges fall back to "auto".
														
 
															+   *
														
 
															+   * Changing this value requires a per-collection force-reindex
														
 
															+   * (`qmd update --force <collection>`). The `content_hash`-keyed rows
														
 
															+   * replace in-place, so other collections are unaffected.
														
 
															+   */
														
 
															+  chunkStrategy?: ChunkStrategy;
														
 
															 }
														
 
															 /**
														
--- a/src/store.ts
+++ b/src/store.ts
@@ -227,7 +227,7 @@ export function findBestCutoff(
 
															 // Chunk Strategy
														
 
															 // =============================================================================
														
 
															-export type ChunkStrategy = "auto" | "regex";
														
 
															+export type ChunkStrategy = "auto" | "regex" | "function";
														
 
															 /**
														
 
															  * Merge two sets of break points (e.g. regex + AST), keeping the highest
														
@@ -1298,6 +1298,7 @@ type PendingEmbeddingDoc = {
 
															   hash: string;
														
 
															   path: string;
														
 
															   bytes: number;
														
 
															+  collection: string;
														
 
															 };
														
 
															 type EmbeddingDoc = PendingEmbeddingDoc & {
														
@@ -1330,8 +1331,13 @@ function resolveEmbedOptions(options?: EmbedOptions): Required<Pick<EmbedOptions
 
															 }
														
 
															 function getPendingEmbeddingDocs(db: Database): PendingEmbeddingDoc[] {
														
 
															+  // `MIN(d.collection)` deterministically picks one collection per hash when
														
 
															+  // the same content is indexed in multiple collections (SQLite tie-breaks
														
 
															+  // alphabetically). The identical bytes produce identical chunks regardless
														
 
															+  // of which collection wins; the chunkStrategy lookup still resolves via
														
 
															+  // that collection's YAML config. See Phase 2 design notes (i-bud0h8vu).
														
 
															   return db.prepare(`
														
 
															-    SELECT d.hash, MIN(d.path) as path, length(CAST(c.doc AS BLOB)) as bytes
														
 
															+    SELECT d.hash, MIN(d.path) as path, MIN(d.collection) as collection, length(CAST(c.doc AS BLOB)) as bytes
														
 
															     FROM documents d
														
 
															     JOIN content c ON d.hash = c.hash
														
 
															     LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
														
@@ -1417,6 +1423,23 @@ export async function generateEmbeddings(
 
															   const totalDocs = docsToEmbed.length;
														
 
															   const startTime = Date.now();
														
 
															+  // Per-collection chunkStrategy lookup (Phase 2 — i-bud0h8vu). YAML
														
 
															+  // `chunkStrategy` on a collection wins over `options.chunkStrategy`
														
 
															+  // (global CLI flag); falls back to the global option, then to
														
 
															+  // chunkDocumentByTokens' own "regex" default when neither is set.
														
 
															+  // Opt-in per collection — collections without the field are untouched.
														
 
															+  const collectionStrategies = new Map<string, ChunkStrategy>();
														
 
															+  try {
														
 
															+    const { listCollections: listYamlCollections } = await import("./collections.js");
														
 
															+    for (const c of listYamlCollections()) {
														
 
															+      if (c.chunkStrategy) collectionStrategies.set(c.name, c.chunkStrategy);
														
 
															+    }
														
 
															+  } catch {
														
 
															+    // If YAML config is missing/unreadable, fall back silently to the
														
 
															+    // global strategy — no collection overrides. Keeps SDK/inline
														
 
															+    // callers that never touch ~/.config/qmd working.
														
 
															+  }
														
 
															+
														
 
															   // Use store's LlamaCpp or global singleton, wrapped in a session
														
 
															   const llm = getLlm(store);
														
 
															   const embedModelUri = llm.embedModelName;
														
@@ -1446,11 +1469,13 @@ export async function generateEmbeddings(
 
															         if (!doc.body.trim()) continue;
														
 
															         const title = extractTitle(doc.body, doc.path);
														
 
															+        const perCollectionStrategy = collectionStrategies.get(doc.collection);
														
 
															+        const chunkStrategy = perCollectionStrategy ?? options?.chunkStrategy;
														
 
															         const chunks = await chunkDocumentByTokens(
														
 
															           doc.body,
														
 
															           undefined, undefined, undefined,
														
 
															           doc.path,
														
 
															-          options?.chunkStrategy,
														
 
															+          chunkStrategy,
														
 
															           session.signal,
														
 
															         );
														
@@ -2171,8 +2196,12 @@ export function chunkDocument(
 
															  * break points for supported code files, merges with regex break points,
														
 
															  * and delegates to the shared chunk algorithm.
														
 
															  *
														
 
															- * Falls back to regex-only when strategy is "regex", filepath is absent,
														
 
															- * or language is unsupported.
														
 
															+ * Strategies:
														
 
															+ *   - "regex"    (default) — char-based chunking with regex break points only.
														
 
															+ *   - "auto"     — regex break points merged with AST break points (soft hints).
														
 
															+ *   - "function" — one chunk per AST function range (Phase 2); inter-range
														
 
															+ *                  gaps (imports, top-level code) are char-chunked with AST
														
 
															+ *                  hints. Falls back to "auto" when zero ranges are detected.
														
 
															  */
														
 
															 export async function chunkDocumentAsync(
														
 
															   content: string,
														
@@ -2185,6 +2214,29 @@ export async function chunkDocumentAsync(
 
															   const regexPoints = scanBreakPoints(content);
														
 
															   const codeFences = findCodeFences(content);
														
 
															+  // "function" strategy: delegate to the function-level chunker. If no
														
 
															+  // ranges are detected (markdown, unsupported lang, parse failure), fall
														
 
															+  // back to "auto" behavior (AST-break-point-assisted char chunking).
														
 
															+  if (chunkStrategy === "function" && filepath) {
														
 
															+    const { getASTFunctionRanges, getASTBreakPoints } = await import("./ast.js");
														
 
															+    const ranges = await getASTFunctionRanges(content, filepath);
														
 
															+    if (ranges.length > 0) {
														
 
															+      return chunkByFunctionRanges(
														
 
															+        content,
														
 
															+        ranges,
														
 
															+        regexPoints,
														
 
															+        codeFences,
														
 
															+        maxChars,
														
 
															+        overlapChars,
														
 
															+        windowChars,
														
 
															+      );
														
 
															+    }
														
 
															+    // Zero ranges — fall through to auto behavior so break points still help.
														
 
															+    const astPoints = await getASTBreakPoints(content, filepath);
														
 
															+    const merged = astPoints.length > 0 ? mergeBreakPoints(regexPoints, astPoints) : regexPoints;
														
 
															+    return chunkDocumentWithBreakPoints(content, merged, codeFences, maxChars, overlapChars, windowChars);
														
 
															+  }
														
 
															+
														
 
															   let breakPoints = regexPoints;
														
 
															   if (chunkStrategy === "auto" && filepath) {
														
 
															     const { getASTBreakPoints } = await import("./ast.js");
														
@@ -2197,6 +2249,99 @@ export async function chunkDocumentAsync(
 
															   return chunkDocumentWithBreakPoints(content, breakPoints, codeFences, maxChars, overlapChars, windowChars);
														
 
															 }
														
 
															+/**
														
 
															+ * Produce one chunk per AST function range, plus char-chunks for the gaps
														
 
															+ * between ranges (imports, top-level code). Ranges that exceed `maxChars`
														
 
															+ * are further split using the existing char-based algorithm so we never
														
 
															+ * emit a single oversized chunk.
														
 
															+ *
														
 
															+ * Preconditions: `ranges` is non-empty, sorted by `startIndex`, and the
														
 
															+ * ranges are non-overlapping (as produced by `getASTFunctionRanges`).
														
 
															+ */
														
 
															+function chunkByFunctionRanges(
														
 
															+  content: string,
														
 
															+  ranges: import("./ast.js").FunctionRange[],
														
 
															+  regexPoints: BreakPoint[],
														
 
															+  codeFences: CodeFenceRegion[],
														
 
															+  maxChars: number,
														
 
															+  overlapChars: number,
														
 
															+  windowChars: number,
														
 
															+): { text: string; pos: number }[] {
														
 
															+  const out: { text: string; pos: number }[] = [];
														
 
															+  let cursor = 0;
														
 
															+
														
 
															+  const emitGap = (start: number, end: number) => {
														
 
															+    if (start >= end) return;
														
 
															+    const gap = content.slice(start, end);
														
 
															+    // Whitespace-only gaps are dropped — they carry no embeddable signal.
														
 
															+    if (!gap.trim()) return;
														
 
															+
														
 
															+    if (gap.length <= maxChars) {
														
 
															+      out.push({ text: gap, pos: start });
														
 
															+      return;
														
 
															+    }
														
 
															+
														
 
															+    // Reuse char-based algorithm for oversized gaps. Restrict break
														
 
															+    // points and code fences to the gap window and rebase positions so
														
 
															+    // chunkDocumentWithBreakPoints operates on a standalone slice.
														
 
															+    const subPoints = regexPoints
														
 
															+      .filter(p => p.pos >= start && p.pos < end)
														
 
															+      .map(p => ({ ...p, pos: p.pos - start }));
														
 
															+    const subFences = codeFences
														
 
															+      .filter(f => f.end > start && f.start < end)
														
 
															+      .map(f => ({
														
 
															+        start: Math.max(0, f.start - start),
														
 
															+        end: Math.max(0, Math.min(end, f.end) - start),
														
 
															+      }));
														
 
															+    const sub = chunkDocumentWithBreakPoints(gap, subPoints, subFences, maxChars, overlapChars, windowChars);
														
 
															+    for (const c of sub) out.push({ text: c.text, pos: start + c.pos });
														
 
															+  };
														
 
															+
														
 
															+  for (const range of ranges) {
														
 
															+    // Emit any leading / inter-range gap (imports, top-level code).
														
 
															+    emitGap(cursor, range.startIndex);
														
 
															+
														
 
															+    const body = content.slice(range.startIndex, range.endIndex);
														
 
															+    if (body.length === 0) {
														
 
															+      cursor = range.endIndex;
														
 
															+      continue;
														
 
															+    }
														
 
															+
														
 
															+    if (body.length <= maxChars) {
														
 
															+      out.push({ text: body, pos: range.startIndex });
														
 
															+    } else {
														
 
															+      // Oversized function/class — split with char algorithm so we stay
														
 
															+      // under the embed token budget. Break points inside the range are
														
 
															+      // reused to keep splits at syntactically-sensible positions.
														
 
															+      const subPoints = regexPoints
														
 
															+        .filter(p => p.pos >= range.startIndex && p.pos < range.endIndex)
														
 
															+        .map(p => ({ ...p, pos: p.pos - range.startIndex }));
														
 
															+      const subFences = codeFences
														
 
															+        .filter(f => f.end > range.startIndex && f.start < range.endIndex)
														
 
															+        .map(f => ({
														
 
															+          start: Math.max(0, f.start - range.startIndex),
														
 
															+          end: Math.max(0, Math.min(range.endIndex, f.end) - range.startIndex),
														
 
															+        }));
														
 
															+      const sub = chunkDocumentWithBreakPoints(body, subPoints, subFences, maxChars, overlapChars, windowChars);
														
 
															+      for (const c of sub) out.push({ text: c.text, pos: range.startIndex + c.pos });
														
 
															+    }
														
 
															+
														
 
															+    cursor = range.endIndex;
														
 
															+  }
														
 
															+
														
 
															+  // Trailing gap after the last range.
														
 
															+  emitGap(cursor, content.length);
														
 
															+
														
 
															+  // Edge case: content consisted entirely of whitespace-only gaps (zero
														
 
															+  // emitted chunks). Preserve the invariant that non-empty content yields
														
 
															+  // at least one chunk.
														
 
															+  if (out.length === 0 && content.length > 0) {
														
 
															+    return [{ text: content, pos: 0 }];
														
 
															+  }
														
 
															+
														
 
															+  return out;
														
 
															+}
														
 
															+
														
 
															 /**
														
 
															  * Chunk a document by actual token count using the LLM tokenizer.
														
 
															  * More accurate than character-based chunking but requires async.
														
--- a/test/ast-chunking.test.ts
+++ b/test/ast-chunking.test.ts
@@ -197,3 +197,71 @@ describe("AST break point scores", () => {
 
															     expect(points.find(p => p.type === "ast:enum")?.score).toBe(80);
														
 
															   });
														
 
															 });
														
 
															+
														
 
															+// ==========================================================================
														
 
															+// Function-level chunk strategy (Phase 2)
														
 
															+// ==========================================================================
														
 
															+
														
 
															+describe("chunkDocumentAsync with chunkStrategy='function'", () => {
														
 
															+  const TS_CODE = `import { X } from "./x";
														
 
															+
														
 
															+export function alpha(): number {
														
 
															+  const start = Date.now();
														
 
															+  return start;
														
 
															+}
														
 
															+
														
 
															+export function beta(): number {
														
 
															+  return 42;
														
 
															+}
														
 
															+
														
 
															+export class Gamma {
														
 
															+  constructor() {}
														
 
															+  run(): void {}
														
 
															+}
														
 
															+`;
														
 
															+
														
 
															+  test("produces one chunk per top-level code unit + import gap", async () => {
														
 
															+    const chunks = await chunkDocumentAsync(TS_CODE, undefined, undefined, undefined, "x.ts", "function");
														
 
															+    // Expect at least: import-gap, alpha, beta, Gamma = 4 chunks.
														
 
															+    expect(chunks.length).toBeGreaterThanOrEqual(3);
														
 
															+  });
														
 
															+
														
 
															+  test("each function chunk contains exactly one function/class body", async () => {
														
 
															+    const chunks = await chunkDocumentAsync(TS_CODE, undefined, undefined, undefined, "x.ts", "function");
														
 
															+    const alphaChunk = chunks.find(c => c.text.includes("function alpha"));
														
 
															+    const betaChunk = chunks.find(c => c.text.includes("function beta"));
														
 
															+    const classChunk = chunks.find(c => c.text.includes("class Gamma"));
														
 
															+
														
 
															+    expect(alphaChunk).toBeDefined();
														
 
															+    expect(betaChunk).toBeDefined();
														
 
															+    expect(classChunk).toBeDefined();
														
 
															+
														
 
															+    expect(alphaChunk!.text.includes("function beta")).toBe(false);
														
 
															+    expect(betaChunk!.text.includes("class Gamma")).toBe(false);
														
 
															+  });
														
 
															+
														
 
															+  test("pos reflects absolute offset in original content", async () => {
														
 
															+    const chunks = await chunkDocumentAsync(TS_CODE, undefined, undefined, undefined, "x.ts", "function");
														
 
															+    for (const c of chunks) {
														
 
															+      expect(c.pos).toBeGreaterThanOrEqual(0);
														
 
															+      const slice = TS_CODE.slice(c.pos, c.pos + Math.min(20, c.text.length));
														
 
															+      const head = c.text.slice(0, Math.min(20, c.text.length));
														
 
															+      expect(slice).toBe(head);
														
 
															+    }
														
 
															+  });
														
 
															+
														
 
															+  test("markdown falls back to auto behavior when chunkStrategy='function'", async () => {
														
 
															+    // Markdown → detectLanguage returns null → getASTFunctionRanges returns []
														
 
															+    // → fall through to auto behavior → short markdown = 1 chunk.
														
 
															+    const md = "# Heading\n\nSome paragraph text.";
														
 
															+    const chunks = await chunkDocumentAsync(md, undefined, undefined, undefined, "readme.md", "function");
														
 
															+    expect(chunks.length).toBe(1);
														
 
															+    expect(chunks[0]!.text).toBe(md);
														
 
															+  });
														
 
															+
														
 
															+  test("code file with only bare statements falls back to auto (no ranges)", async () => {
														
 
															+    const bare = "const x = 1;\nconst y = 2;\n";
														
 
															+    const chunks = await chunkDocumentAsync(bare, undefined, undefined, undefined, "bare.ts", "function");
														
 
															+    expect(chunks.length).toBe(1);
														
 
															+  });
														
 
															+});
														
--- a/test/ast.test.ts
+++ b/test/ast.test.ts
@@ -6,7 +6,7 @@
 
															  */
														
 
															 import { describe, test, expect } from "vitest";
														
 
															-import { detectLanguage, getASTBreakPoints, extractSymbols } from "../src/ast.js";
														
 
															+import { detectLanguage, getASTBreakPoints, getASTFunctionRanges, extractSymbols } from "../src/ast.js";
														
 
															 import type { SupportedLanguage } from "../src/ast.js";
														
 
															 // =============================================================================
														
@@ -317,6 +317,144 @@ describe("getASTBreakPoints - error handling", () => {
 
															   });
														
 
															 });
														
 
															+// =============================================================================
														
 
															+// Function-Level Range Extraction (Phase 2)
														
 
															+// =============================================================================
														
 
															+
														
 
															+describe("getASTFunctionRanges - TypeScript", () => {
														
 
															+  const TS_SAMPLE = `import { Database } from './db';
														
 
															+
														
 
															+interface Config {
														
 
															+  secret: string;
														
 
															+}
														
 
															+
														
 
															+type UserId = string;
														
 
															+
														
 
															+export class Service {
														
 
															+  constructor(private db: Database) {}
														
 
															+
														
 
															+  async fetch(id: UserId): Promise<string> {
														
 
															+    return this.db.get(id);
														
 
															+  }
														
 
															+
														
 
															+  parse(raw: string): string {
														
 
															+    return raw.trim();
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+export function helper(x: string): string {
														
 
															+  return x.toUpperCase();
														
 
															+}
														
 
															+
														
 
															+const arrow = (n: number): number => n + 1;
														
 
															+`;
														
 
															+
														
 
															+  test("returns one range per top-level code unit", async () => {
														
 
															+    const ranges = await getASTFunctionRanges(TS_SAMPLE, "src/service.ts");
														
 
															+    // interface, type, export class, export function, const arrow = 5 ranges
														
 
															+    // (the methods inside the class are absorbed by the class range)
														
 
															+    expect(ranges.length).toBeGreaterThanOrEqual(4);
														
 
															+  });
														
 
															+
														
 
															+  test("ranges are sorted by startIndex", async () => {
														
 
															+    const ranges = await getASTFunctionRanges(TS_SAMPLE, "src/service.ts");
														
 
															+    for (let i = 1; i < ranges.length; i++) {
														
 
															+      expect(ranges[i]!.startIndex).toBeGreaterThanOrEqual(ranges[i - 1]!.startIndex);
														
 
															+    }
														
 
															+  });
														
 
															+
														
 
															+  test("ranges do not overlap", async () => {
														
 
															+    const ranges = await getASTFunctionRanges(TS_SAMPLE, "src/service.ts");
														
 
															+    for (let i = 1; i < ranges.length; i++) {
														
 
															+      expect(ranges[i]!.startIndex).toBeGreaterThanOrEqual(ranges[i - 1]!.endIndex);
														
 
															+    }
														
 
															+  });
														
 
															+
														
 
															+  test("each range slice is non-empty and starts at a recognizable token", async () => {
														
 
															+    const ranges = await getASTFunctionRanges(TS_SAMPLE, "src/service.ts");
														
 
															+    for (const r of ranges) {
														
 
															+      const slice = TS_SAMPLE.slice(r.startIndex, r.endIndex);
														
 
															+      expect(slice.length).toBeGreaterThan(0);
														
 
															+      expect(/^(export|class|interface|type|function|const)\b/.test(slice.trimStart())).toBe(true);
														
 
															+    }
														
 
															+  });
														
 
															+
														
 
															+  test("export class range is captured as one unit (not split into methods)", async () => {
														
 
															+    const ranges = await getASTFunctionRanges(TS_SAMPLE, "src/service.ts");
														
 
															+    const classRange = ranges.find(r => {
														
 
															+      const slice = TS_SAMPLE.slice(r.startIndex, r.endIndex);
														
 
															+      return slice.includes("class Service") && slice.includes("parse(");
														
 
															+    });
														
 
															+    expect(classRange).toBeDefined();
														
 
															+  });
														
 
															+});
														
 
															+
														
 
															+describe("getASTFunctionRanges - Python", () => {
														
 
															+  const PY_SAMPLE = `import os
														
 
															+
														
 
															+class Service:
														
 
															+    def __init__(self):
														
 
															+        self.x = 1
														
 
															+
														
 
															+    def run(self):
														
 
															+        return self.x
														
 
															+
														
 
															+@decorator
														
 
															+def decorated_func():
														
 
															+    return 42
														
 
															+
														
 
															+def plain_func():
														
 
															+    return 1
														
 
															+`;
														
 
															+
														
 
															+  test("captures class and function definitions (including decorated)", async () => {
														
 
															+    const ranges = await getASTFunctionRanges(PY_SAMPLE, "service.py");
														
 
															+    expect(ranges.length).toBeGreaterThanOrEqual(3);
														
 
															+    const types = ranges.map(r => r.type);
														
 
															+    expect(types.some(t => t === "ast:class")).toBe(true);
														
 
															+    expect(types.some(t => t === "ast:func" || t === "ast:decorated")).toBe(true);
														
 
															+  });
														
 
															+
														
 
															+  test("decorated function range includes the decorator", async () => {
														
 
															+    const ranges = await getASTFunctionRanges(PY_SAMPLE, "service.py");
														
 
															+    const decorated = ranges.find(r => {
														
 
															+      const slice = PY_SAMPLE.slice(r.startIndex, r.endIndex);
														
 
															+      return slice.includes("decorated_func");
														
 
															+    });
														
 
															+    expect(decorated).toBeDefined();
														
 
															+    const slice = PY_SAMPLE.slice(decorated!.startIndex, decorated!.endIndex);
														
 
															+    expect(slice.trimStart().startsWith("@decorator")).toBe(true);
														
 
															+  });
														
 
															+});
														
 
															+
														
 
															+describe("getASTFunctionRanges - error handling", () => {
														
 
															+  test("returns empty array for markdown", async () => {
														
 
															+    const ranges = await getASTFunctionRanges("# Hello", "README.md");
														
 
															+    expect(ranges).toEqual([]);
														
 
															+  });
														
 
															+
														
 
															+  test("returns empty array for unknown extension", async () => {
														
 
															+    const ranges = await getASTFunctionRanges("noop", "notes.txt");
														
 
															+    expect(ranges).toEqual([]);
														
 
															+  });
														
 
															+
														
 
															+  test("returns empty array for empty file", async () => {
														
 
															+    const ranges = await getASTFunctionRanges("", "empty.ts");
														
 
															+    expect(ranges).toEqual([]);
														
 
															+  });
														
 
															+
														
 
															+  test("handles garbage input gracefully (non-throwing)", async () => {
														
 
															+    const ranges = await getASTFunctionRanges("function {{ broken !!", "broken.ts");
														
 
															+    expect(Array.isArray(ranges)).toBe(true);
														
 
															+  });
														
 
															+
														
 
															+  test("returns empty array for content with no top-level units", async () => {
														
 
															+    const ranges = await getASTFunctionRanges("const x = 1;\nconst y = 2;\n", "vars.ts");
														
 
															+    // lexical_declaration only matches when value is arrow_function/function_expression
														
 
															+    expect(ranges).toEqual([]);
														
 
															+  });
														
 
															+});
														
 
															+
														
 
															 // =============================================================================
														
 
															 // Symbol Extraction Stub (Phase 2)
														
 
															 // =============================================================================
														
--- a/test/collections-config.test.ts
+++ b/test/collections-config.test.ts
@@ -7,8 +7,16 @@
 
															 import { describe, test, expect, beforeEach, afterEach } from "vitest";
														
 
															 import { join } from "path";
														
 
															-import { homedir } from "os";
														
 
															-import { getConfigPath, setConfigIndexName } from "../src/collections.js";
														
 
															+import { homedir, tmpdir } from "os";
														
 
															+import { mkdtempSync, rmSync, readFileSync } from "fs";
														
 
															+import {
														
 
															+  getConfigPath,
														
 
															+  setConfigIndexName,
														
 
															+  setConfigSource,
														
 
															+  loadConfig,
														
 
															+  saveConfig,
														
 
															+} from "../src/collections.js";
														
 
															+import type { CollectionConfig } from "../src/collections.js";
														
 
															 // Save/restore env vars around each test
														
 
															 let savedEnv: Record<string, string | undefined>;
														
@@ -72,3 +80,75 @@ describe("getConfigDir via getConfigPath", () => {
 
															     expect(getConfigPath()).toBe(join("/xdg/config", "qmd", "myindex.yml"));
														
 
															   });
														
 
															 });
														
 
															+
														
 
															+// ============================================================================
														
 
															+// chunkStrategy schema round-trip (Phase 2 — i-bud0h8vu)
														
 
															+// ============================================================================
														
 
															+
														
 
															+describe("Collection.chunkStrategy YAML round-trip", () => {
														
 
															+  let tmpDir: string;
														
 
															+
														
 
															+  beforeEach(() => {
														
 
															+    tmpDir = mkdtempSync(join(tmpdir(), "qmd-chunkstrategy-"));
														
 
															+    process.env.QMD_CONFIG_DIR = tmpDir;
														
 
															+    setConfigIndexName("index");
														
 
															+  });
														
 
															+
														
 
															+  afterEach(() => {
														
 
															+    // Reset config source so we don't leak inline state
														
 
															+    setConfigSource();
														
 
															+    try {
														
 
															+      rmSync(tmpDir, { recursive: true, force: true });
														
 
															+    } catch {
														
 
															+      // best-effort
														
 
															+    }
														
 
															+  });
														
 
															+
														
 
															+  test("chunkStrategy field persists through save/load cycle", () => {
														
 
															+    const config: CollectionConfig = {
														
 
															+      collections: {
														
 
															+        "oivo-cli": {
														
 
															+          path: "/srv/cli/src",
														
 
															+          pattern: "**/*.ts",
														
 
															+          chunkStrategy: "function",
														
 
															+        },
														
 
															+        "oivo-docs": {
														
 
															+          path: "/srv/docs",
														
 
															+          pattern: "**/*.md",
														
 
															+          // no chunkStrategy — should remain unset after round-trip
														
 
															+        },
														
 
															+      },
														
 
															+    };
														
 
															+    saveConfig(config);
														
 
															+
														
 
															+    const loaded = loadConfig();
														
 
															+    expect(loaded.collections["oivo-cli"]?.chunkStrategy).toBe("function");
														
 
															+    expect(loaded.collections["oivo-docs"]?.chunkStrategy).toBeUndefined();
														
 
															+  });
														
 
															+
														
 
															+  test("chunkStrategy 'auto' and 'regex' round-trip", () => {
														
 
															+    const config: CollectionConfig = {
														
 
															+      collections: {
														
 
															+        a: { path: "/a", pattern: "*.ts", chunkStrategy: "auto" },
														
 
															+        b: { path: "/b", pattern: "*.ts", chunkStrategy: "regex" },
														
 
															+      },
														
 
															+    };
														
 
															+    saveConfig(config);
														
 
															+
														
 
															+    const loaded = loadConfig();
														
 
															+    expect(loaded.collections.a?.chunkStrategy).toBe("auto");
														
 
															+    expect(loaded.collections.b?.chunkStrategy).toBe("regex");
														
 
															+  });
														
 
															+
														
 
															+  test("omitted chunkStrategy does not appear in serialized YAML", () => {
														
 
															+    const config: CollectionConfig = {
														
 
															+      collections: {
														
 
															+        plain: { path: "/p", pattern: "*.md" },
														
 
															+      },
														
 
															+    };
														
 
															+    saveConfig(config);
														
 
															+
														
 
															+    const yaml = readFileSync(join(tmpDir, "index.yml"), "utf-8");
														
 
															+    expect(yaml).not.toContain("chunkStrategy");
														
 
															+  });
														
 
															+});