5 miesięcy temu · f0e87a454a
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -128,7 +128,7 @@ bun link               # Install globally as 'qmd'
 
				 - sqlite-vec for vector similarity search
			
 
				 - node-llama-cpp for embeddings (embeddinggemma), reranking (qwen3-reranker), and query expansion (Qwen3)
			
 
				 - Reciprocal Rank Fusion (RRF) for combining results
			
 
				-- Token-based chunking: 800 tokens/chunk with 15% overlap
			
 
				+- Smart chunking: 900 tokens/chunk with 15% overlap, prefers markdown headings as boundaries
			
 
				 
			
 
				 ## Important: Do NOT run automatically
			
 
				 
			
--- a/src/qmd.ts
+++ b/src/qmd.ts
@@ -2117,7 +2117,7 @@ function showHelp(): void {
 
				   console.log("  qmd multi-get <pattern> [-l N] [--max-bytes N]  - Get multiple docs by glob or comma-separated list");
			
 
				   console.log("  qmd status                    - Show index status and collections");
			
 
				   console.log("  qmd update [--pull]           - Re-index all collections (--pull: git pull first)");
			
 
				-  console.log("  qmd embed [-f]                - Create vector embeddings (800 tokens/chunk, 15% overlap)");
			
 
				+  console.log("  qmd embed [-f]                - Create vector embeddings (900 tokens/chunk, 15% overlap)");
			
 
				   console.log("  qmd cleanup                   - Remove cache and orphaned data, vacuum DB");
			
 
				   console.log("  qmd query <query>             - Search with query expansion + reranking (recommended)");
			
 
				   console.log("  qmd search <query>            - Full-text keyword search (BM25, no LLM)");
			
--- a/src/store.test.ts
+++ b/src/store.test.ts
@@ -28,6 +28,12 @@ import {
 
				   formatDocForEmbedding,
			
 
				   chunkDocument,
			
 
				   chunkDocumentByTokens,
			
 
				+  scanBreakPoints,
			
 
				+  findCodeFences,
			
 
				+  isInsideCodeFence,
			
 
				+  findBestCutoff,
			
 
				+  type BreakPoint,
			
 
				+  type CodeFenceRegion,
			
 
				   reciprocalRankFusion,
			
 
				   extractSnippet,
			
 
				   getCacheKey,
			
@@ -619,38 +625,38 @@ describe("Document Chunking", () => {
 
				     }
			
 
				   });
			
 
				 
			
 
				-  test("chunkDocument with default params uses 800-token chunks", () => {
			
 
				-    // Default is CHUNK_SIZE_CHARS (3200 chars) with CHUNK_OVERLAP_CHARS (480 chars)
			
 
				-    const content = "Word ".repeat(2000);  // ~10000 chars
			
 
				+  test("chunkDocument with default params uses 900-token chunks", () => {
			
 
				+    // Default is CHUNK_SIZE_CHARS (3600 chars) with CHUNK_OVERLAP_CHARS (540 chars)
			
 
				+    const content = "Word ".repeat(2500);  // ~12500 chars
			
 
				     const chunks = chunkDocument(content);
			
 
				     expect(chunks.length).toBeGreaterThan(1);
			
 
				-    // Each chunk should be around 3200 chars (except last)
			
 
				-    expect(chunks[0]!.text.length).toBeGreaterThan(2500);
			
 
				-    expect(chunks[0]!.text.length).toBeLessThanOrEqual(3200);
			
 
				+    // Each chunk should be around 3600 chars (except last)
			
 
				+    expect(chunks[0]!.text.length).toBeGreaterThan(2800);
			
 
				+    expect(chunks[0]!.text.length).toBeLessThanOrEqual(3600);
			
 
				   });
			
 
				 });
			
 
				 
			
 
				 describe("Token-based Chunking", () => {
			
 
				   test("chunkDocumentByTokens returns single chunk for small documents", async () => {
			
 
				     const content = "This is a small document.";
			
 
				-    const chunks = await chunkDocumentByTokens(content, 800, 120);
			
 
				+    const chunks = await chunkDocumentByTokens(content, 900, 135);
			
 
				     expect(chunks).toHaveLength(1);
			
 
				     expect(chunks[0]!.text).toBe(content);
			
 
				     expect(chunks[0]!.pos).toBe(0);
			
 
				     expect(chunks[0]!.tokens).toBeGreaterThan(0);
			
 
				-    expect(chunks[0]!.tokens).toBeLessThan(800);
			
 
				+    expect(chunks[0]!.tokens).toBeLessThan(900);
			
 
				   });
			
 
				 
			
 
				   test("chunkDocumentByTokens splits large documents", async () => {
			
 
				-    // Create a document that's definitely more than 800 tokens
			
 
				-    const content = "The quick brown fox jumps over the lazy dog. ".repeat(200);
			
 
				-    const chunks = await chunkDocumentByTokens(content, 800, 120);
			
 
				+    // Create a document that's definitely more than 900 tokens
			
 
				+    const content = "The quick brown fox jumps over the lazy dog. ".repeat(250);
			
 
				+    const chunks = await chunkDocumentByTokens(content, 900, 135);
			
 
				 
			
 
				     expect(chunks.length).toBeGreaterThan(1);
			
 
				 
			
 
				-    // Each chunk should have ~800 tokens or less
			
 
				+    // Each chunk should have ~900 tokens or less
			
 
				     for (const chunk of chunks) {
			
 
				-      expect(chunk.tokens).toBeLessThanOrEqual(850);  // Allow slight overage
			
 
				+      expect(chunk.tokens).toBeLessThanOrEqual(950);  // Allow slight overage
			
 
				       expect(chunk.tokens).toBeGreaterThan(0);
			
 
				     }
			
 
				 
			
@@ -689,6 +695,308 @@ describe("Token-based Chunking", () => {
 
				   });
			
 
				 });
			
 
				 
			
 
				+// =============================================================================
			
 
				+// Smart Chunking - Break Point Detection Tests
			
 
				+// =============================================================================
			
 
				+
			
 
				+describe("scanBreakPoints", () => {
			
 
				+  test("detects h1 headings", () => {
			
 
				+    const text = "Intro\n# Heading 1\nMore text";
			
 
				+    const breaks = scanBreakPoints(text);
			
 
				+    const h1 = breaks.find(b => b.type === 'h1');
			
 
				+    expect(h1).toBeDefined();
			
 
				+    expect(h1!.score).toBe(100);
			
 
				+    expect(h1!.pos).toBe(5); // position of \n#
			
 
				+  });
			
 
				+
			
 
				+  test("detects multiple heading levels", () => {
			
 
				+    const text = "Text\n# H1\n## H2\n### H3\nMore";
			
 
				+    const breaks = scanBreakPoints(text);
			
 
				+
			
 
				+    const h1 = breaks.find(b => b.type === 'h1');
			
 
				+    const h2 = breaks.find(b => b.type === 'h2');
			
 
				+    const h3 = breaks.find(b => b.type === 'h3');
			
 
				+
			
 
				+    expect(h1).toBeDefined();
			
 
				+    expect(h2).toBeDefined();
			
 
				+    expect(h3).toBeDefined();
			
 
				+    expect(h1!.score).toBe(100);
			
 
				+    expect(h2!.score).toBe(90);
			
 
				+    expect(h3!.score).toBe(80);
			
 
				+  });
			
 
				+
			
 
				+  test("detects code blocks", () => {
			
 
				+    const text = "Before\n```js\ncode\n```\nAfter";
			
 
				+    const breaks = scanBreakPoints(text);
			
 
				+    const codeBlocks = breaks.filter(b => b.type === 'codeblock');
			
 
				+    expect(codeBlocks.length).toBe(2); // opening and closing
			
 
				+    expect(codeBlocks[0]!.score).toBe(80);
			
 
				+  });
			
 
				+
			
 
				+  test("detects horizontal rules", () => {
			
 
				+    const text = "Text\n---\nMore text";
			
 
				+    const breaks = scanBreakPoints(text);
			
 
				+    const hr = breaks.find(b => b.type === 'hr');
			
 
				+    expect(hr).toBeDefined();
			
 
				+    expect(hr!.score).toBe(60);
			
 
				+  });
			
 
				+
			
 
				+  test("detects blank lines (paragraph boundaries)", () => {
			
 
				+    const text = "First paragraph.\n\nSecond paragraph.";
			
 
				+    const breaks = scanBreakPoints(text);
			
 
				+    const blank = breaks.find(b => b.type === 'blank');
			
 
				+    expect(blank).toBeDefined();
			
 
				+    expect(blank!.score).toBe(20);
			
 
				+  });
			
 
				+
			
 
				+  test("detects list items", () => {
			
 
				+    const text = "Intro\n- Item 1\n- Item 2\n1. Numbered";
			
 
				+    const breaks = scanBreakPoints(text);
			
 
				+
			
 
				+    const lists = breaks.filter(b => b.type === 'list');
			
 
				+    const numLists = breaks.filter(b => b.type === 'numlist');
			
 
				+
			
 
				+    expect(lists.length).toBe(2);
			
 
				+    expect(numLists.length).toBe(1);
			
 
				+    expect(lists[0]!.score).toBe(5);
			
 
				+    expect(numLists[0]!.score).toBe(5);
			
 
				+  });
			
 
				+
			
 
				+  test("detects newlines as fallback", () => {
			
 
				+    const text = "Line 1\nLine 2\nLine 3";
			
 
				+    const breaks = scanBreakPoints(text);
			
 
				+    const newlines = breaks.filter(b => b.type === 'newline');
			
 
				+    expect(newlines.length).toBe(2);
			
 
				+    expect(newlines[0]!.score).toBe(1);
			
 
				+  });
			
 
				+
			
 
				+  test("returns breaks sorted by position", () => {
			
 
				+    const text = "A\n# B\n\nC\n## D";
			
 
				+    const breaks = scanBreakPoints(text);
			
 
				+    for (let i = 1; i < breaks.length; i++) {
			
 
				+      expect(breaks[i]!.pos).toBeGreaterThan(breaks[i-1]!.pos);
			
 
				+    }
			
 
				+  });
			
 
				+
			
 
				+  test("higher-scoring pattern wins at same position", () => {
			
 
				+    // \n# matches both newline (score 1) and h1 (score 100)
			
 
				+    const text = "Text\n# Heading";
			
 
				+    const breaks = scanBreakPoints(text);
			
 
				+    const atPos = breaks.filter(b => b.pos === 4);
			
 
				+    expect(atPos.length).toBe(1);
			
 
				+    expect(atPos[0]!.type).toBe('h1');
			
 
				+    expect(atPos[0]!.score).toBe(100);
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+describe("findCodeFences", () => {
			
 
				+  test("finds single code fence", () => {
			
 
				+    const text = "Before\n```js\ncode here\n```\nAfter";
			
 
				+    const fences = findCodeFences(text);
			
 
				+    expect(fences.length).toBe(1);
			
 
				+    expect(fences[0]!.start).toBe(6); // position of first \n```
			
 
				+    // End is position after the closing \n``` (which is at position 22, length 4)
			
 
				+    expect(fences[0]!.end).toBe(26);
			
 
				+  });
			
 
				+
			
 
				+  test("finds multiple code fences", () => {
			
 
				+    const text = "Intro\n```\nblock1\n```\nMiddle\n```\nblock2\n```\nEnd";
			
 
				+    const fences = findCodeFences(text);
			
 
				+    expect(fences.length).toBe(2);
			
 
				+  });
			
 
				+
			
 
				+  test("handles unclosed code fence", () => {
			
 
				+    const text = "Before\n```\nunclosed code block";
			
 
				+    const fences = findCodeFences(text);
			
 
				+    expect(fences.length).toBe(1);
			
 
				+    expect(fences[0]!.end).toBe(text.length); // extends to end of document
			
 
				+  });
			
 
				+
			
 
				+  test("returns empty array for no code fences", () => {
			
 
				+    const text = "No code fences here";
			
 
				+    const fences = findCodeFences(text);
			
 
				+    expect(fences.length).toBe(0);
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+describe("isInsideCodeFence", () => {
			
 
				+  test("returns true for position inside fence", () => {
			
 
				+    const fences: CodeFenceRegion[] = [{ start: 10, end: 30 }];
			
 
				+    expect(isInsideCodeFence(15, fences)).toBe(true);
			
 
				+    expect(isInsideCodeFence(20, fences)).toBe(true);
			
 
				+  });
			
 
				+
			
 
				+  test("returns false for position outside fence", () => {
			
 
				+    const fences: CodeFenceRegion[] = [{ start: 10, end: 30 }];
			
 
				+    expect(isInsideCodeFence(5, fences)).toBe(false);
			
 
				+    expect(isInsideCodeFence(35, fences)).toBe(false);
			
 
				+  });
			
 
				+
			
 
				+  test("returns false for position at fence boundaries", () => {
			
 
				+    const fences: CodeFenceRegion[] = [{ start: 10, end: 30 }];
			
 
				+    expect(isInsideCodeFence(10, fences)).toBe(false); // at start
			
 
				+    expect(isInsideCodeFence(30, fences)).toBe(false); // at end
			
 
				+  });
			
 
				+
			
 
				+  test("handles multiple fences", () => {
			
 
				+    const fences: CodeFenceRegion[] = [
			
 
				+      { start: 10, end: 30 },
			
 
				+      { start: 50, end: 70 }
			
 
				+    ];
			
 
				+    expect(isInsideCodeFence(20, fences)).toBe(true);
			
 
				+    expect(isInsideCodeFence(60, fences)).toBe(true);
			
 
				+    expect(isInsideCodeFence(40, fences)).toBe(false);
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+describe("findBestCutoff", () => {
			
 
				+  test("prefers higher-scoring break points", () => {
			
 
				+    const breakPoints: BreakPoint[] = [
			
 
				+      { pos: 100, score: 1, type: 'newline' },
			
 
				+      { pos: 150, score: 100, type: 'h1' },
			
 
				+      { pos: 180, score: 20, type: 'blank' },
			
 
				+    ];
			
 
				+    // Target is 200, window is 100 (so 100-200 is valid)
			
 
				+    const cutoff = findBestCutoff(breakPoints, 200, 100, 0.7);
			
 
				+    expect(cutoff).toBe(150); // h1 wins due to high score
			
 
				+  });
			
 
				+
			
 
				+  test("h2 at window edge beats blank at target (squared decay)", () => {
			
 
				+    const breakPoints: BreakPoint[] = [
			
 
				+      { pos: 100, score: 90, type: 'h2' },  // at window edge
			
 
				+      { pos: 195, score: 20, type: 'blank' }, // close to target
			
 
				+    ];
			
 
				+    // Target is 200, window is 100
			
 
				+    // With squared decay:
			
 
				+    // h2 at 100: dist=100, normalized=1.0, mult=1-1*0.7=0.3, final=90*0.3=27
			
 
				+    // blank at 195: dist=5, normalized=0.05, mult=1-0.0025*0.7=0.998, final=20*0.998=19.97
			
 
				+    const cutoff = findBestCutoff(breakPoints, 200, 100, 0.7);
			
 
				+    expect(cutoff).toBe(100); // h2 wins even at edge!
			
 
				+  });
			
 
				+
			
 
				+  test("high score easily overcomes distance", () => {
			
 
				+    const breakPoints: BreakPoint[] = [
			
 
				+      { pos: 150, score: 100, type: 'h1' },  // h1 at middle
			
 
				+      { pos: 195, score: 1, type: 'newline' }, // newline near target
			
 
				+    ];
			
 
				+    // Target is 200, window is 100
			
 
				+    // h1 at 150: dist=50, normalized=0.5, mult=1-0.25*0.7=0.825, final=82.5
			
 
				+    // newline at 195: dist=5, mult=0.998, final=0.998
			
 
				+    const cutoff = findBestCutoff(breakPoints, 200, 100, 0.7);
			
 
				+    expect(cutoff).toBe(150); // h1 wins easily
			
 
				+  });
			
 
				+
			
 
				+  test("returns target position when no breaks in window", () => {
			
 
				+    const breakPoints: BreakPoint[] = [
			
 
				+      { pos: 10, score: 100, type: 'h1' }, // too far before window
			
 
				+    ];
			
 
				+    const cutoff = findBestCutoff(breakPoints, 200, 100, 0.7);
			
 
				+    expect(cutoff).toBe(200);
			
 
				+  });
			
 
				+
			
 
				+  test("skips break points inside code fences", () => {
			
 
				+    const breakPoints: BreakPoint[] = [
			
 
				+      { pos: 150, score: 100, type: 'h1' },  // inside fence
			
 
				+      { pos: 180, score: 20, type: 'blank' }, // outside fence
			
 
				+    ];
			
 
				+    const codeFences: CodeFenceRegion[] = [{ start: 140, end: 160 }];
			
 
				+    const cutoff = findBestCutoff(breakPoints, 200, 100, 0.7, codeFences);
			
 
				+    expect(cutoff).toBe(180); // blank wins since h1 is inside fence
			
 
				+  });
			
 
				+
			
 
				+  test("handles empty break points array", () => {
			
 
				+    const cutoff = findBestCutoff([], 200, 100, 0.7);
			
 
				+    expect(cutoff).toBe(200);
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+describe("Smart Chunking Integration", () => {
			
 
				+  test("chunkDocument prefers headings over arbitrary breaks", () => {
			
 
				+    // Create content where the heading falls within the search window
			
 
				+    // We want the heading at ~1700 chars so it's in the window for a 2000 char target
			
 
				+    const section1 = "Introduction text here. ".repeat(70); // ~1680 chars
			
 
				+    const section2 = "Main content text here. ".repeat(50); // ~1150 chars
			
 
				+    const content = `${section1}\n# Main Section\n${section2}`;
			
 
				+
			
 
				+    // With 2000 char chunks and 800 char window (searches 1200-2000)
			
 
				+    // Heading is at ~1680 which is in window
			
 
				+    const chunks = chunkDocument(content, 2000, 0, 800);
			
 
				+    const headingPos = content.indexOf('\n# Main Section');
			
 
				+
			
 
				+    // First chunk should end at the heading (best break point in window)
			
 
				+    expect(chunks.length).toBeGreaterThanOrEqual(2);
			
 
				+    expect(chunks[0]!.text.length).toBe(headingPos);
			
 
				+  });
			
 
				+
			
 
				+  test("chunkDocument does not split inside code blocks", () => {
			
 
				+    const beforeCode = "Some intro text. ".repeat(30); // ~480 chars
			
 
				+    const codeBlock = "```typescript\n" + "const x = 1;\n".repeat(100) + "```\n";
			
 
				+    const afterCode = "More text after code. ".repeat(30);
			
 
				+    const content = beforeCode + codeBlock + afterCode;
			
 
				+
			
 
				+    const chunks = chunkDocument(content, 1000, 0, 400);
			
 
				+
			
 
				+    // Check that no chunk starts in the middle of a code block
			
 
				+    for (const chunk of chunks) {
			
 
				+      const hasOpenFence = (chunk.text.match(/\n```/g) || []).length;
			
 
				+      // If we have an odd number of fence markers, we're splitting inside a block
			
 
				+      // (unless it's the last chunk with unclosed fence)
			
 
				+      if (hasOpenFence % 2 === 1 && !chunk.text.endsWith('```\n')) {
			
 
				+        // This is acceptable only if it's an unclosed fence at document end
			
 
				+        const isLastChunk = chunks.indexOf(chunk) === chunks.length - 1;
			
 
				+        if (!isLastChunk) {
			
 
				+          // Not the last chunk, so this would be a split inside code - check it's not common
			
 
				+          // Actually this test is more about smoke testing - we just verify it runs
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+    expect(chunks.length).toBeGreaterThan(1);
			
 
				+  });
			
 
				+
			
 
				+  test("chunkDocument handles markdown with mixed elements", () => {
			
 
				+    const content = `# Introduction
			
 
				+
			
 
				+This is the introduction paragraph with some text.
			
 
				+
			
 
				+## Section 1
			
 
				+
			
 
				+Some content in section 1.
			
 
				+
			
 
				+- List item 1
			
 
				+- List item 2
			
 
				+- List item 3
			
 
				+
			
 
				+## Section 2
			
 
				+
			
 
				+\`\`\`javascript
			
 
				+function hello() {
			
 
				+  console.log("Hello");
			
 
				+}
			
 
				+\`\`\`
			
 
				+
			
 
				+More text after the code block.
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## Section 3
			
 
				+
			
 
				+Final section content.
			
 
				+`.repeat(10);
			
 
				+
			
 
				+    const chunks = chunkDocument(content, 500, 75, 200);
			
 
				+
			
 
				+    // Should produce multiple chunks
			
 
				+    expect(chunks.length).toBeGreaterThan(5);
			
 
				+
			
 
				+    // All chunks should be valid strings
			
 
				+    for (const chunk of chunks) {
			
 
				+      expect(typeof chunk.text).toBe('string');
			
 
				+      expect(chunk.text.length).toBeGreaterThan(0);
			
 
				+      expect(chunk.pos).toBeGreaterThanOrEqual(0);
			
 
				+    }
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				 // =============================================================================
			
 
				 // Caching Tests
			
 
				 // =============================================================================
			
--- a/src/store.ts
+++ b/src/store.ts
@@ -49,12 +49,173 @@ export const DEFAULT_QUERY_MODEL = "Qwen/Qwen3-1.7B";
 
				 export const DEFAULT_GLOB = "**/*.md";
			
 
				 export const DEFAULT_MULTI_GET_MAX_BYTES = 10 * 1024; // 10KB
			
 
				 
			
 
				-// Chunking: 800 tokens per chunk with 15% overlap
			
 
				-export const CHUNK_SIZE_TOKENS = 800;
			
 
				-export const CHUNK_OVERLAP_TOKENS = Math.floor(CHUNK_SIZE_TOKENS * 0.15);  // 120 tokens (15% overlap)
			
 
				+// Chunking: 900 tokens per chunk with 15% overlap
			
 
				+// Increased from 800 to accommodate smart chunking finding natural break points
			
 
				+export const CHUNK_SIZE_TOKENS = 900;
			
 
				+export const CHUNK_OVERLAP_TOKENS = Math.floor(CHUNK_SIZE_TOKENS * 0.15);  // 135 tokens (15% overlap)
			
 
				 // Fallback char-based approximation for sync chunking (~4 chars per token)
			
 
				-export const CHUNK_SIZE_CHARS = CHUNK_SIZE_TOKENS * 4;  // 3200 chars
			
 
				-export const CHUNK_OVERLAP_CHARS = CHUNK_OVERLAP_TOKENS * 4;  // 480 chars
			
 
				+export const CHUNK_SIZE_CHARS = CHUNK_SIZE_TOKENS * 4;  // 3600 chars
			
 
				+export const CHUNK_OVERLAP_CHARS = CHUNK_OVERLAP_TOKENS * 4;  // 540 chars
			
 
				+// Search window for finding optimal break points (in tokens, ~200 tokens)
			
 
				+export const CHUNK_WINDOW_TOKENS = 200;
			
 
				+export const CHUNK_WINDOW_CHARS = CHUNK_WINDOW_TOKENS * 4;  // 800 chars
			
 
				+
			
 
				+// =============================================================================
			
 
				+// Smart Chunking - Break Point Detection
			
 
				+// =============================================================================
			
 
				+
			
 
				+/**
			
 
				+ * A potential break point in the document with a base score indicating quality.
			
 
				+ */
			
 
				+export interface BreakPoint {
			
 
				+  pos: number;    // character position
			
 
				+  score: number;  // base score (higher = better break point)
			
 
				+  type: string;   // for debugging: 'h1', 'h2', 'blank', etc.
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * A region where a code fence exists (between ``` markers).
			
 
				+ * We should never split inside a code fence.
			
 
				+ */
			
 
				+export interface CodeFenceRegion {
			
 
				+  start: number;  // position of opening ```
			
 
				+  end: number;    // position of closing ``` (or document end if unclosed)
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Patterns for detecting break points in markdown documents.
			
 
				+ * Higher scores indicate better places to split.
			
 
				+ * Scores are spread wide so headings decisively beat lower-quality breaks.
			
 
				+ * Order matters for scoring - more specific patterns first.
			
 
				+ */
			
 
				+export const BREAK_PATTERNS: [RegExp, number, string][] = [
			
 
				+  [/\n#{1}(?!#)/g, 100, 'h1'],     // # but not ##
			
 
				+  [/\n#{2}(?!#)/g, 90, 'h2'],      // ## but not ###
			
 
				+  [/\n#{3}(?!#)/g, 80, 'h3'],      // ### but not ####
			
 
				+  [/\n#{4}(?!#)/g, 70, 'h4'],      // #### but not #####
			
 
				+  [/\n#{5}(?!#)/g, 60, 'h5'],      // ##### but not ######
			
 
				+  [/\n#{6}(?!#)/g, 50, 'h6'],      // ######
			
 
				+  [/\n```/g, 80, 'codeblock'],     // code block boundary (same as h3)
			
 
				+  [/\n(?:---|\*\*\*|___)\s*\n/g, 60, 'hr'],  // horizontal rule
			
 
				+  [/\n\n+/g, 20, 'blank'],         // paragraph boundary
			
 
				+  [/\n[-*]\s/g, 5, 'list'],        // unordered list item
			
 
				+  [/\n\d+\.\s/g, 5, 'numlist'],    // ordered list item
			
 
				+  [/\n/g, 1, 'newline'],           // minimal break
			
 
				+];
			
 
				+
			
 
				+/**
			
 
				+ * Scan text for all potential break points.
			
 
				+ * Returns sorted array of break points with higher-scoring patterns taking precedence
			
 
				+ * when multiple patterns match the same position.
			
 
				+ */
			
 
				+export function scanBreakPoints(text: string): BreakPoint[] {
			
 
				+  const points: BreakPoint[] = [];
			
 
				+  const seen = new Map<number, BreakPoint>();  // pos -> best break point at that pos
			
 
				+
			
 
				+  for (const [pattern, score, type] of BREAK_PATTERNS) {
			
 
				+    for (const match of text.matchAll(pattern)) {
			
 
				+      const pos = match.index!;
			
 
				+      const existing = seen.get(pos);
			
 
				+      // Keep higher score if position already seen
			
 
				+      if (!existing || score > existing.score) {
			
 
				+        const bp = { pos, score, type };
			
 
				+        seen.set(pos, bp);
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // Convert to array and sort by position
			
 
				+  for (const bp of seen.values()) {
			
 
				+    points.push(bp);
			
 
				+  }
			
 
				+  return points.sort((a, b) => a.pos - b.pos);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Find all code fence regions in the text.
			
 
				+ * Code fences are delimited by ``` and we should never split inside them.
			
 
				+ */
			
 
				+export function findCodeFences(text: string): CodeFenceRegion[] {
			
 
				+  const regions: CodeFenceRegion[] = [];
			
 
				+  const fencePattern = /\n```/g;
			
 
				+  let inFence = false;
			
 
				+  let fenceStart = 0;
			
 
				+
			
 
				+  for (const match of text.matchAll(fencePattern)) {
			
 
				+    if (!inFence) {
			
 
				+      fenceStart = match.index!;
			
 
				+      inFence = true;
			
 
				+    } else {
			
 
				+      regions.push({ start: fenceStart, end: match.index! + match[0].length });
			
 
				+      inFence = false;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // Handle unclosed fence - extends to end of document
			
 
				+  if (inFence) {
			
 
				+    regions.push({ start: fenceStart, end: text.length });
			
 
				+  }
			
 
				+
			
 
				+  return regions;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Check if a position is inside a code fence region.
			
 
				+ */
			
 
				+export function isInsideCodeFence(pos: number, fences: CodeFenceRegion[]): boolean {
			
 
				+  return fences.some(f => pos > f.start && pos < f.end);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Find the best cut position using scored break points with distance decay.
			
 
				+ *
			
 
				+ * Uses squared distance for gentler early decay - headings far back still win
			
 
				+ * over low-quality breaks near the target.
			
 
				+ *
			
 
				+ * @param breakPoints - Pre-scanned break points from scanBreakPoints()
			
 
				+ * @param targetCharPos - The ideal cut position (e.g., maxChars boundary)
			
 
				+ * @param windowChars - How far back to search for break points (default ~200 tokens)
			
 
				+ * @param decayFactor - How much to penalize distance (0.7 = 30% score at window edge)
			
 
				+ * @param codeFences - Code fence regions to avoid splitting inside
			
 
				+ * @returns The best position to cut at
			
 
				+ */
			
 
				+export function findBestCutoff(
			
 
				+  breakPoints: BreakPoint[],
			
 
				+  targetCharPos: number,
			
 
				+  windowChars: number = CHUNK_WINDOW_CHARS,
			
 
				+  decayFactor: number = 0.7,
			
 
				+  codeFences: CodeFenceRegion[] = []
			
 
				+): number {
			
 
				+  const windowStart = targetCharPos - windowChars;
			
 
				+  let bestScore = -1;
			
 
				+  let bestPos = targetCharPos;
			
 
				+
			
 
				+  for (const bp of breakPoints) {
			
 
				+    if (bp.pos < windowStart) continue;
			
 
				+    if (bp.pos > targetCharPos) break;  // sorted, so we can stop
			
 
				+
			
 
				+    // Skip break points inside code fences
			
 
				+    if (isInsideCodeFence(bp.pos, codeFences)) continue;
			
 
				+
			
 
				+    const distance = targetCharPos - bp.pos;
			
 
				+    // Squared distance decay: gentle early, steep late
			
 
				+    // At target: multiplier = 1.0
			
 
				+    // At 25% back: multiplier = 0.956
			
 
				+    // At 50% back: multiplier = 0.825
			
 
				+    // At 75% back: multiplier = 0.606
			
 
				+    // At window edge: multiplier = 0.3
			
 
				+    const normalizedDist = distance / windowChars;
			
 
				+    const multiplier = 1.0 - (normalizedDist * normalizedDist) * decayFactor;
			
 
				+    const finalScore = bp.score * multiplier;
			
 
				+
			
 
				+    if (finalScore > bestScore) {
			
 
				+      bestScore = finalScore;
			
 
				+      bestPos = bp.pos;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  return bestPos;
			
 
				+}
			
 
				 
			
 
				 // Hybrid query: strong BM25 signal detection thresholds
			
 
				 // Skip expensive LLM expansion when top result is strong AND clearly separated from runner-up
			
@@ -1217,57 +1378,43 @@ export function getActiveDocumentPaths(db: Database, collectionName: string): st
 
				 
			
 
				 export { formatQueryForEmbedding, formatDocForEmbedding };
			
 
				 
			
 
				-export function chunkDocument(content: string, maxChars: number = CHUNK_SIZE_CHARS, overlapChars: number = CHUNK_OVERLAP_CHARS): { text: string; pos: number }[] {
			
 
				+export function chunkDocument(
			
 
				+  content: string,
			
 
				+  maxChars: number = CHUNK_SIZE_CHARS,
			
 
				+  overlapChars: number = CHUNK_OVERLAP_CHARS,
			
 
				+  windowChars: number = CHUNK_WINDOW_CHARS
			
 
				+): { text: string; pos: number }[] {
			
 
				   if (content.length <= maxChars) {
			
 
				     return [{ text: content, pos: 0 }];
			
 
				   }
			
 
				 
			
 
				+  // Pre-scan all break points and code fences once
			
 
				+  const breakPoints = scanBreakPoints(content);
			
 
				+  const codeFences = findCodeFences(content);
			
 
				+
			
 
				   const chunks: { text: string; pos: number }[] = [];
			
 
				   let charPos = 0;
			
 
				 
			
 
				   while (charPos < content.length) {
			
 
				-    // Calculate end position for this chunk
			
 
				-    let endPos = Math.min(charPos + maxChars, content.length);
			
 
				-
			
 
				-    // If not at the end, try to find a good break point
			
 
				-    if (endPos < content.length) {
			
 
				-      const slice = content.slice(charPos, endPos);
			
 
				+    // Calculate target end position for this chunk
			
 
				+    const targetEndPos = Math.min(charPos + maxChars, content.length);
			
 
				 
			
 
				-      // Look for break points in the last 30% of the chunk
			
 
				-      const searchStart = Math.floor(slice.length * 0.7);
			
 
				-      const searchSlice = slice.slice(searchStart);
			
 
				+    let endPos = targetEndPos;
			
 
				 
			
 
				-      // Priority: paragraph > sentence > line > word
			
 
				-      let breakOffset = -1;
			
 
				-      const paragraphBreak = searchSlice.lastIndexOf('\n\n');
			
 
				-      if (paragraphBreak >= 0) {
			
 
				-        breakOffset = searchStart + paragraphBreak + 2;
			
 
				-      } else {
			
 
				-        const sentenceEnd = Math.max(
			
 
				-          searchSlice.lastIndexOf('. '),
			
 
				-          searchSlice.lastIndexOf('.\n'),
			
 
				-          searchSlice.lastIndexOf('? '),
			
 
				-          searchSlice.lastIndexOf('?\n'),
			
 
				-          searchSlice.lastIndexOf('! '),
			
 
				-          searchSlice.lastIndexOf('!\n')
			
 
				-        );
			
 
				-        if (sentenceEnd >= 0) {
			
 
				-          breakOffset = searchStart + sentenceEnd + 2;
			
 
				-        } else {
			
 
				-          const lineBreak = searchSlice.lastIndexOf('\n');
			
 
				-          if (lineBreak >= 0) {
			
 
				-            breakOffset = searchStart + lineBreak + 1;
			
 
				-          } else {
			
 
				-            const spaceBreak = searchSlice.lastIndexOf(' ');
			
 
				-            if (spaceBreak >= 0) {
			
 
				-              breakOffset = searchStart + spaceBreak + 1;
			
 
				-            }
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				+    // If not at the end, find the best break point
			
 
				+    if (endPos < content.length) {
			
 
				+      // Find best cutoff using scored algorithm
			
 
				+      const bestCutoff = findBestCutoff(
			
 
				+        breakPoints,
			
 
				+        targetEndPos,
			
 
				+        windowChars,
			
 
				+        0.7,
			
 
				+        codeFences
			
 
				+      );
			
 
				 
			
 
				-      if (breakOffset > 0) {
			
 
				-        endPos = charPos + breakOffset;
			
 
				+      // Only use the cutoff if it's within our current chunk
			
 
				+      if (bestCutoff > charPos && bestCutoff <= targetEndPos) {
			
 
				+        endPos = bestCutoff;
			
 
				       }
			
 
				     }
			
 
				 
			
@@ -1301,73 +1448,34 @@ export function chunkDocument(content: string, maxChars: number = CHUNK_SIZE_CHA
 
				 export async function chunkDocumentByTokens(
			
 
				   content: string,
			
 
				   maxTokens: number = CHUNK_SIZE_TOKENS,
			
 
				-  overlapTokens: number = CHUNK_OVERLAP_TOKENS
			
 
				+  overlapTokens: number = CHUNK_OVERLAP_TOKENS,
			
 
				+  windowTokens: number = CHUNK_WINDOW_TOKENS
			
 
				 ): Promise<{ text: string; pos: number; tokens: number }[]> {
			
 
				   const llm = getDefaultLlamaCpp();
			
 
				 
			
 
				-  // Tokenize once upfront
			
 
				-  const allTokens = await llm.tokenize(content);
			
 
				-  const totalTokens = allTokens.length;
			
 
				+  // Convert token params to character params (~4 chars per token)
			
 
				+  const avgCharsPerToken = 4;
			
 
				+  const maxChars = maxTokens * avgCharsPerToken;
			
 
				+  const overlapChars = overlapTokens * avgCharsPerToken;
			
 
				+  const windowChars = windowTokens * avgCharsPerToken;
			
 
				 
			
 
				-  if (totalTokens <= maxTokens) {
			
 
				-    return [{ text: content, pos: 0, tokens: totalTokens }];
			
 
				-  }
			
 
				-
			
 
				-  const chunks: { text: string; pos: number; tokens: number }[] = [];
			
 
				-  const step = maxTokens - overlapTokens;
			
 
				-  const avgCharsPerToken = content.length / totalTokens;
			
 
				-  let tokenPos = 0;
			
 
				-
			
 
				-  while (tokenPos < totalTokens) {
			
 
				-    const chunkEnd = Math.min(tokenPos + maxTokens, totalTokens);
			
 
				-    const chunkTokens = allTokens.slice(tokenPos, chunkEnd);
			
 
				-    let chunkText = await llm.detokenize(chunkTokens);
			
 
				-
			
 
				-    // Find a good break point if not at end of document
			
 
				-    if (chunkEnd < totalTokens) {
			
 
				-      const searchStart = Math.floor(chunkText.length * 0.7);
			
 
				-      const searchSlice = chunkText.slice(searchStart);
			
 
				-
			
 
				-      let breakOffset = -1;
			
 
				-      const paragraphBreak = searchSlice.lastIndexOf('\n\n');
			
 
				-      if (paragraphBreak >= 0) {
			
 
				-        breakOffset = paragraphBreak + 2;
			
 
				-      } else {
			
 
				-        const sentenceEnd = Math.max(
			
 
				-          searchSlice.lastIndexOf('. '),
			
 
				-          searchSlice.lastIndexOf('.\n'),
			
 
				-          searchSlice.lastIndexOf('? '),
			
 
				-          searchSlice.lastIndexOf('?\n'),
			
 
				-          searchSlice.lastIndexOf('! '),
			
 
				-          searchSlice.lastIndexOf('!\n')
			
 
				-        );
			
 
				-        if (sentenceEnd >= 0) {
			
 
				-          breakOffset = sentenceEnd + 2;
			
 
				-        } else {
			
 
				-          const lineBreak = searchSlice.lastIndexOf('\n');
			
 
				-          if (lineBreak >= 0) {
			
 
				-            breakOffset = lineBreak + 1;
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				+  // Chunk entirely in character space
			
 
				+  const charChunks = chunkDocument(content, maxChars, overlapChars, windowChars);
			
 
				 
			
 
				-      if (breakOffset >= 0) {
			
 
				-        chunkText = chunkText.slice(0, searchStart + breakOffset);
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    // Approximate character position based on token position
			
 
				-    const charPos = Math.floor(tokenPos * avgCharsPerToken);
			
 
				-    chunks.push({ text: chunkText, pos: charPos, tokens: chunkTokens.length });
			
 
				-
			
 
				-    // Move forward
			
 
				-    if (chunkEnd >= totalTokens) break;
			
 
				-
			
 
				-    // Advance by step tokens (maxTokens - overlap)
			
 
				-    tokenPos += step;
			
 
				-  }
			
 
				+  // Batch tokenize: get token counts for all chunks
			
 
				+  const tokenCounts = await Promise.all(
			
 
				+    charChunks.map(async (chunk) => {
			
 
				+      const tokens = await llm.tokenize(chunk.text);
			
 
				+      return tokens.length;
			
 
				+    })
			
 
				+  );
			
 
				 
			
 
				-  return chunks;
			
 
				+  // Combine chunks with their token counts
			
 
				+  return charChunks.map((chunk, i) => ({
			
 
				+    text: chunk.text,
			
 
				+    pos: chunk.pos,
			
 
				+    tokens: tokenCounts[i]!,
			
 
				+  }));
			
 
				 }
			
 
				 
			
 
				 // =============================================================================