/** * intent.test.ts - Tests for the intent feature * * Tests cover: * - extractIntentTerms: stop word filtering, punctuation, acronyms, edge cases * - extractSnippet with intent: disambiguation across multiple document sections * - parseStructuredQuery with intent: lines (parsing, validation, error cases) * - Chunk selection scoring with intent * - Strong-signal bypass when intent is present * - Intent constants * * Run with: npx vitest run test/intent.test.ts */ import { describe, test, expect } from "vitest"; import { extractSnippet, extractIntentTerms, INTENT_WEIGHT_SNIPPET, INTENT_WEIGHT_CHUNK, type ExpandedQuery, } from "../src/store.js"; // ============================================================================= // parseStructuredQuery — duplicated from src/cli/qmd.ts for unit testing // (qmd.ts doesn't export it since it's a CLI internal) // ============================================================================= interface ParsedStructuredQuery { searches: ExpandedQuery[]; intent?: string; } function parseStructuredQuery(query: string): ParsedStructuredQuery | null { const rawLines = query.split('\n').map((line, idx) => ({ raw: line, trimmed: line.trim(), number: idx + 1, })).filter(line => line.trimmed.length > 0); if (rawLines.length === 0) return null; const prefixRe = /^(lex|vec|hyde):\s*/i; const expandRe = /^expand:\s*/i; const intentRe = /^intent:\s*/i; const typed: ExpandedQuery[] = []; let intent: string | undefined; for (const line of rawLines) { if (expandRe.test(line.trimmed)) { if (rawLines.length > 1) { throw new Error(`Line ${line.number} starts with expand:, but query documents cannot mix expand with typed lines. Submit a single expand query instead.`); } const text = line.trimmed.replace(expandRe, '').trim(); if (!text) { throw new Error('expand: query must include text.'); } return null; } if (intentRe.test(line.trimmed)) { if (intent !== undefined) { throw new Error(`Line ${line.number}: only one intent: line is allowed per query document.`); } const text = line.trimmed.replace(intentRe, '').trim(); if (!text) { throw new Error(`Line ${line.number}: intent: must include text.`); } intent = text; continue; } const match = line.trimmed.match(prefixRe); if (match) { const type = match[1]!.toLowerCase() as 'lex' | 'vec' | 'hyde'; const text = line.trimmed.slice(match[0].length).trim(); if (!text) { throw new Error(`Line ${line.number} (${type}:) must include text.`); } if (/\r|\n/.test(text)) { throw new Error(`Line ${line.number} (${type}:) contains a newline. Keep each query on a single line.`); } typed.push({ type, query: text, line: line.number }); continue; } if (rawLines.length === 1) { return null; } throw new Error(`Line ${line.number} is missing a lex:/vec:/hyde:/intent: prefix. Each line in a query document must start with one.`); } if (intent && typed.length === 0) { throw new Error('intent: cannot appear alone. Add at least one lex:, vec:, or hyde: line.'); } return typed.length > 0 ? { searches: typed, intent } : null; } // ============================================================================= // extractIntentTerms // ============================================================================= describe("extractIntentTerms", () => { test("filters stop words", () => { // "looking", "for", "notes", "about" are stop words expect(extractIntentTerms("looking for notes about latency optimization")) .toEqual(["latency", "optimization"]); }); test("filters common function words", () => { // "what", "is", "the", "to", "find" are stop words; "best", "way" survive expect(extractIntentTerms("what is the best way to find")) .toEqual(["best", "way"]); }); test("preserves domain terms", () => { expect(extractIntentTerms("web performance latency page load times")) .toEqual(["web", "performance", "latency", "page", "load", "times"]); }); test("handles surrounding punctuation with Unicode awareness", () => { expect(extractIntentTerms("personal health, fitness, and endurance")) .toEqual(["personal", "health", "fitness", "endurance"]); }); test("preserves internal hyphens", () => { expect(extractIntentTerms("self-hosted real-time (decision-making)")) .toEqual(["self-hosted", "real-time", "decision-making"]); }); test("short domain terms survive (API, SQL, LLM)", () => { expect(extractIntentTerms("API design for LLM agents")) .toEqual(["api", "design", "llm", "agents"]); }); test("returns empty for empty input", () => { expect(extractIntentTerms("")).toEqual([]); expect(extractIntentTerms(" ")).toEqual([]); }); test("filters single-char terms", () => { const terms = extractIntentTerms("a b c web"); expect(terms).toEqual(["web"]); }); test("all stop words returns empty", () => { const terms = extractIntentTerms("the and or but in on at to for of with by"); expect(terms).toEqual([]); }); test("preserves 2-char domain terms (CI, CD, DB)", () => { const terms = extractIntentTerms("SQL CI CD DB"); expect(terms).toContain("sql"); expect(terms).toContain("ci"); expect(terms).toContain("cd"); expect(terms).toContain("db"); }); test("lowercases all terms", () => { const terms = extractIntentTerms("WebSocket HTTP REST"); expect(terms).toContain("websocket"); expect(terms).toContain("http"); expect(terms).toContain("rest"); }); test("handles C++ style punctuation", () => { const terms = extractIntentTerms("C++, performance! optimization."); expect(terms).toContain("performance"); expect(terms).toContain("optimization"); }); }); // ============================================================================= // extractSnippet with intent — disambiguation // ============================================================================= describe("extractSnippet with intent", () => { // Each section contains "performance" so the query score is tied (1.0 each). // Intent terms (INTENT_WEIGHT_SNIPPET) then break the tie toward the relevant section. const body = [ "# Notes on Various Topics", "", "## Web Performance Section", "Web performance means optimizing page load times and Core Web Vitals.", "Reduce latency, improve rendering speed, and measure performance budgets.", "", "## Team Performance Section", "Team performance depends on trust, psychological safety, and feedback.", "Build culture where performance reviews drive growth not fear.", "", "## Health Performance Section", "Health performance comes from consistent exercise, sleep, and endurance.", "Track fitness metrics, optimize recovery, and monitor healthspan.", ].join("\n"); test("without intent, anchors on query terms only", () => { const result = extractSnippet(body, "performance", 500); // "performance" appears in title and multiple sections — should anchor on first match expect(result.snippet).toContain("Performance"); }); test("with web-perf intent, prefers web performance section", () => { const result = extractSnippet( body, "performance", 500, undefined, undefined, "Looking for notes about web performance, latency, and page load times" ); expect(result.snippet).toMatch(/latency|page.*load|Core Web Vitals/i); }); test("with health intent, prefers health section", () => { const result = extractSnippet( body, "performance", 500, undefined, undefined, "Looking for notes about personal health, fitness, and endurance" ); expect(result.snippet).toMatch(/health|fitness|endurance|exercise/i); }); test("with team intent, prefers team section", () => { const result = extractSnippet( body, "performance", 500, undefined, undefined, "Looking for notes about building high-performing teams and culture" ); expect(result.snippet).toMatch(/team|culture|trust|feedback/i); }); test("intent does not override strong query match", () => { // Query "Core Web Vitals" is very specific — intent shouldn't pull away from it const result = extractSnippet( body, "Core Web Vitals", 500, undefined, undefined, "Looking for notes about health and fitness" ); expect(result.snippet).toContain("Core Web Vitals"); }); test("absent intent produces same result as undefined", () => { const withoutIntent = extractSnippet(body, "performance", 500); const withUndefined = extractSnippet(body, "performance", 500, undefined, undefined, undefined); expect(withoutIntent.line).toBe(withUndefined.line); expect(withoutIntent.snippet).toBe(withUndefined.snippet); }); test("intent with no matching terms falls back to query-only scoring", () => { const result = extractSnippet( body, "performance", 500, undefined, undefined, "quantum computing and entanglement" ); expect(result.snippet).toContain("Performance"); expect(result.snippet.length).toBeGreaterThan(0); }); test("intent works with chunk position", () => { const webPerfStart = body.indexOf("## Web Performance"); const result = extractSnippet( body, "performance", 500, webPerfStart, 200, "web page load times" ); expect(result.snippet).toMatch(/Web Performance|Core Web Vitals|Page load/i); }); }); // ============================================================================= // extractSnippet — intent weight verification // ============================================================================= describe("extractSnippet intent weight behavior", () => { // Document where query term appears on every line but intent terms differ const body = [ "performance metrics for team velocity", "performance metrics for web latency", "performance metrics for athletic endurance", ].join("\n"); test("intent breaks tie when query matches all lines equally", () => { const noIntent = extractSnippet(body, "performance metrics", 500); // Without intent, first line wins (all equal score) expect(noIntent.line).toBe(1); const withIntent = extractSnippet( body, "performance metrics", 500, undefined, undefined, "web latency and page speed" ); // Intent terms "web", "latency" match line 2 expect(withIntent.snippet).toContain("web latency"); }); }); // ============================================================================= // Chunk selection scoring with intent // ============================================================================= describe("intent keyword extraction logic", () => { // Mirrors the chunk selection scoring in hybridQuery, using the shared // extractIntentTerms helper and INTENT_WEIGHT_CHUNK constant. function scoreChunk(text: string, query: string, intent?: string): number { const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 2); const intentTerms = intent ? extractIntentTerms(intent) : []; const lower = text.toLowerCase(); const qScore = queryTerms.reduce((acc, term) => acc + (lower.includes(term) ? 1 : 0), 0); const iScore = intentTerms.reduce((acc, term) => acc + (lower.includes(term) ? INTENT_WEIGHT_CHUNK : 0), 0); return qScore + iScore; } const chunks = [ "Web performance: optimize page load times, reduce latency, improve rendering pipeline.", "Team performance: build trust, give feedback, set clear expectations for the group.", "Health performance: exercise regularly, sleep 8 hours, manage stress for endurance.", ]; test("without intent, all chunks score equally on 'performance'", () => { const scores = chunks.map(c => scoreChunk(c, "performance")); // All contain "performance", so all score 1 expect(scores[0]).toBe(scores[1]); expect(scores[1]).toBe(scores[2]); }); test("with web intent, web chunk scores highest", () => { const intent = "looking for notes about page load times and latency optimization"; const scores = chunks.map(c => scoreChunk(c, "performance", intent)); expect(scores[0]).toBeGreaterThan(scores[1]!); expect(scores[0]).toBeGreaterThan(scores[2]!); }); test("with health intent, health chunk scores highest", () => { const intent = "looking for notes about exercise, sleep, and endurance"; const scores = chunks.map(c => scoreChunk(c, "performance", intent)); expect(scores[2]).toBeGreaterThan(scores[0]!); expect(scores[2]).toBeGreaterThan(scores[1]!); }); test("intent terms have lower weight than query terms (1.0)", () => { const intent = "looking for latency"; // Chunk 0 has "performance" (query: 1.0) + "latency" (intent: INTENT_WEIGHT_CHUNK) = 1.5 const withBoth = scoreChunk(chunks[0]!, "performance", intent); const queryOnly = scoreChunk(chunks[0]!, "performance"); expect(withBoth).toBe(queryOnly + INTENT_WEIGHT_CHUNK); }); test("stop words are filtered, short domain terms survive", () => { const intent = "the art of web performance"; // "the" (stop word), "art" (survives), "of" (stop word), // "web" (survives), "performance" (survives) // intent terms after filtering: ["art", "web", "performance"] // Chunk 0 has "web" + "performance" → 2 intent hits (no "art") // Chunks 1,2 have "performance" only → 1 intent hit const scores = chunks.map(c => scoreChunk(c, "test", intent)); expect(scores[0]).toBe(INTENT_WEIGHT_CHUNK * 2); // "web" + "performance" expect(scores[1]).toBe(INTENT_WEIGHT_CHUNK); // "performance" only expect(scores[2]).toBe(INTENT_WEIGHT_CHUNK); // "performance" only }); }); // ============================================================================= // Strong-signal bypass with intent // ============================================================================= describe("strong-signal bypass logic", () => { // Mirrors the logic in hybridQuery: // const hasStrongSignal = !intent && topScore >= STRONG_SIGNAL_MIN_SCORE && gap >= STRONG_SIGNAL_MIN_GAP function hasStrongSignal(topScore: number, secondScore: number, intent?: string): boolean { return !intent && topScore >= 0.85 && (topScore - secondScore) >= 0.15; } test("strong signal detected without intent", () => { expect(hasStrongSignal(0.90, 0.70)).toBe(true); }); test("strong signal bypassed when intent provided", () => { expect(hasStrongSignal(0.90, 0.70, "looking for health performance")).toBe(false); }); test("weak signal not affected by intent", () => { expect(hasStrongSignal(0.50, 0.45)).toBe(false); expect(hasStrongSignal(0.50, 0.45, "some intent")).toBe(false); }); test("close scores not strong even without intent", () => { expect(hasStrongSignal(0.90, 0.80)).toBe(false); // gap < 0.15 }); }); // ============================================================================= // parseStructuredQuery with intent // ============================================================================= describe("parseStructuredQuery with intent", () => { test("parses intent + lex query", () => { const result = parseStructuredQuery("intent: web performance\nlex: performance"); expect(result).not.toBeNull(); expect(result!.intent).toBe("web performance"); expect(result!.searches).toHaveLength(1); expect(result!.searches[0]!.type).toBe("lex"); expect(result!.searches[0]!.query).toBe("performance"); }); test("parses intent + multiple typed lines", () => { const result = parseStructuredQuery( "intent: web page load times\nlex: performance\nvec: how to improve performance" ); expect(result).not.toBeNull(); expect(result!.intent).toBe("web page load times"); expect(result!.searches).toHaveLength(2); expect(result!.searches[0]!.type).toBe("lex"); expect(result!.searches[1]!.type).toBe("vec"); }); test("intent can appear after typed lines", () => { const result = parseStructuredQuery( "lex: performance\nintent: web page load times\nvec: latency" ); expect(result).not.toBeNull(); expect(result!.intent).toBe("web page load times"); expect(result!.searches).toHaveLength(2); }); test("intent is case-insensitive prefix", () => { const result = parseStructuredQuery("Intent: web perf\nlex: performance"); expect(result).not.toBeNull(); expect(result!.intent).toBe("web perf"); }); test("no intent returns undefined", () => { const result = parseStructuredQuery("lex: performance\nvec: speed"); expect(result).not.toBeNull(); expect(result!.intent).toBeUndefined(); }); test("intent alone throws error", () => { expect(() => parseStructuredQuery("intent: web performance")).toThrow( /intent: cannot appear alone/ ); }); test("multiple intent lines throw error", () => { expect(() => parseStructuredQuery("intent: web perf\nintent: team health\nlex: performance") ).toThrow(/only one intent: line is allowed/); }); test("empty intent text throws error", () => { expect(() => parseStructuredQuery("intent:\nlex: performance") ).toThrow(/intent: must include text/); }); test("intent with whitespace-only text throws error", () => { expect(() => parseStructuredQuery("intent: \nlex: performance") ).toThrow(/intent: must include text/); }); test("single plain line still returns null (expand mode)", () => { const result = parseStructuredQuery("how does auth work"); expect(result).toBeNull(); }); test("expand: line still returns null", () => { const result = parseStructuredQuery("expand: auth stuff"); expect(result).toBeNull(); }); test("intent with expand throws error (expand can't mix)", () => { expect(() => parseStructuredQuery("intent: web\nexpand: performance") ).toThrow(/cannot mix expand/); }); test("empty query returns null", () => { expect(parseStructuredQuery("")).toBeNull(); expect(parseStructuredQuery(" \n \n ")).toBeNull(); }); test("intent with blank lines is fine", () => { const result = parseStructuredQuery( "intent: web perf\n\nlex: performance\n\nvec: speed" ); expect(result).not.toBeNull(); expect(result!.intent).toBe("web perf"); expect(result!.searches).toHaveLength(2); }); test("intent preserves full text including colons", () => { const result = parseStructuredQuery( "intent: web performance: LCP, FID, CLS\nlex: performance" ); expect(result).not.toBeNull(); expect(result!.intent).toBe("web performance: LCP, FID, CLS"); }); }); // ============================================================================= // Constants exported // ============================================================================= describe("intent constants", () => { test("INTENT_WEIGHT_SNIPPET is 0.3", () => { expect(INTENT_WEIGHT_SNIPPET).toBe(0.3); }); test("INTENT_WEIGHT_CHUNK is 0.5", () => { expect(INTENT_WEIGHT_CHUNK).toBe(0.5); }); });