/** * embedding-vsearch.test.ts — Query-side EmbeddingProvider integration * (issue i-loazq6ze). * * Verifies that `searchVec`, `structuredSearch`, and `vectorSearchQuery` * route query encoding through the supplied `EmbeddingProvider` instead * of the local `node-llama-cpp` model when one is configured. Also covers * the AutoFallback path so a transient remote outage degrades to local * instead of throwing. * * The store is in-memory (sqlite + sqlite-vec); the provider is a stub * that records calls and returns deterministic vectors so we can verify * routing without standing up real services. */ import { describe, test, expect, beforeEach, afterEach } from "vitest"; import { mkdtempSync, rmSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; import { createStore, searchVec, structuredSearch, vectorSearchQuery, type Store, type ExpandedQuery, } from "../src/store.js"; import { AutoFallbackEmbeddingProvider, CircuitOpenError, type EmbeddingProvider, type ProviderEmbedding, type ProviderHealth, } from "../src/embedding/index.js"; // ─────────────────────────── Stub providers ────────────────────────────────── /** Deterministic stub — returns a fixed embedding to match index vectors. */ class FixedProvider implements EmbeddingProvider { readonly kind = "openai" as const; embedCalls = 0; embedBatchCalls = 0; lastEmbedTexts: string[] = []; constructor( private readonly modelId: string, private readonly embedding: number[], ) {} getModelId(): string { return this.modelId; } getDimensions(): number | undefined { return this.embedding.length; } async healthcheck(): Promise { return { ok: true, model: this.modelId, dimensions: this.embedding.length }; } async embed(text: string): Promise { this.embedCalls++; this.lastEmbedTexts.push(text); return { embedding: this.embedding.slice(), model: this.modelId }; } async embedBatch(texts: string[]): Promise<(ProviderEmbedding | null)[]> { this.embedBatchCalls++; this.lastEmbedTexts.push(...texts); return texts.map(() => ({ embedding: this.embedding.slice(), model: this.modelId })); } async dispose(): Promise {} } /** Throws CircuitOpenError on every call — simulates "remote down". */ class CircuitOpenProvider implements EmbeddingProvider { readonly kind = "openai" as const; embedCalls = 0; embedBatchCalls = 0; constructor(private readonly modelId: string = "embeddinggemma") {} getModelId(): string { return this.modelId; } getDimensions(): number | undefined { return undefined; } async healthcheck(): Promise { return { ok: false, model: this.modelId, detail: "circuit open" }; } async embed(): Promise { this.embedCalls++; throw new CircuitOpenError("remote down"); } async embedBatch(): Promise<(ProviderEmbedding | null)[]> { this.embedBatchCalls++; throw new CircuitOpenError("remote down"); } async dispose(): Promise {} } /** Throws a generic error on every call — simulates total backend failure. */ class AlwaysFailProvider implements EmbeddingProvider { readonly kind = "openai" as const; constructor(private readonly modelId: string = "embeddinggemma") {} getModelId(): string { return this.modelId; } getDimensions(): number | undefined { return undefined; } async healthcheck(): Promise { return { ok: false, model: this.modelId, detail: "always fail" }; } async embed(): Promise { throw new Error("backend unreachable"); } async embedBatch(): Promise<(ProviderEmbedding | null)[]> { throw new Error("backend unreachable"); } async dispose(): Promise {} } // ─────────────────────────── Test setup ────────────────────────────────────── let workDir: string; let store: Store; const DIM = 4; // Fixed embedding used for both index vectors and query vectors so the // stub provider's response will match the indexed vector exactly (cosine // distance ≈ 0 → similarity ≈ 1). const FIXED_VEC = [0.1, 0.2, 0.3, 0.4]; beforeEach(() => { workDir = mkdtempSync(join(tmpdir(), "qmd-vsearch-test-")); process.env.INDEX_PATH = join(workDir, "index.sqlite"); store = createStore(process.env.INDEX_PATH); const now = "2026-04-28T00:00:00Z"; store.db .prepare(`INSERT INTO content (hash, doc, created_at) VALUES (?, ?, ?)`) .run("hashA", "Alpha document body about query encoding via remote provider.", now); store.db .prepare(`INSERT INTO content (hash, doc, created_at) VALUES (?, ?, ?)`) .run("hashB", "Beta document body about fallback chain semantics.", now); store.db .prepare(`INSERT INTO documents (hash, collection, path, title, created_at, modified_at, active) VALUES (?, ?, ?, ?, ?, ?, ?)`) .run("hashA", "test", "alpha.md", "Alpha", now, now, 1); store.db .prepare(`INSERT INTO documents (hash, collection, path, title, created_at, modified_at, active) VALUES (?, ?, ?, ?, ?, ?, ?)`) .run("hashB", "test", "beta.md", "Beta", now, now, 1); // Seed vectors_vec with the same fixed vector so stub provider's query // embedding lines up with the index entries. store.ensureVecTable(DIM); store.db .prepare(`INSERT INTO content_vectors (hash, seq, pos, model, embedded_at) VALUES (?, 0, 0, 'embeddinggemma', ?)`) .run("hashA", now); store.db .prepare(`INSERT INTO content_vectors (hash, seq, pos, model, embedded_at) VALUES (?, 0, 0, 'embeddinggemma', ?)`) .run("hashB", now); store.db .prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`) .run("hashA_0", new Float32Array(FIXED_VEC)); store.db .prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`) .run("hashB_0", new Float32Array(FIXED_VEC)); }); afterEach(() => { try { store.close(); } catch { /* ignore */ } delete process.env.INDEX_PATH; rmSync(workDir, { recursive: true, force: true }); }); // ─────────────────────────── searchVec ────────────────────────────────────── describe("searchVec with EmbeddingProvider", () => { test("encodes the query through the provider when supplied", async () => { const provider = new FixedProvider("embeddinggemma", FIXED_VEC); // Sanity: store.llm is not set; if searchVec touched local llama-cpp // it would fail (no model loaded). Provider routing must be exclusive. const results = await searchVec( store.db, "hello", "embeddinggemma", 10, undefined, undefined, undefined, provider, ); expect(provider.embedCalls).toBe(1); expect(provider.embedBatchCalls).toBe(0); expect(results.length).toBeGreaterThan(0); // Both alpha + beta share the same vector — both should be returned. const filepaths = results.map((r) => r.filepath).sort(); expect(filepaths).toEqual(["qmd://test/alpha.md", "qmd://test/beta.md"]); }); test("provider mode does not access the local llama-cpp instance", async () => { const provider = new FixedProvider("embeddinggemma", FIXED_VEC); // If anything touches `store.llm` while the provider is set, the proxy // throws — proves the provider path is truly exclusive (mirrors the // i-08ovbvtb regression guard in embedding-store-integration.test.ts). store.llm = new Proxy({}, { get(_target, prop) { throw new Error( `store.llm.${String(prop)} accessed when embedProvider was supplied — DoD violation`, ); }, }) as never; const results = await searchVec( store.db, "hello", "embeddinggemma", 10, undefined, undefined, undefined, provider, ); expect(results.length).toBeGreaterThan(0); }); test("survives transient primary failure via AutoFallback", async () => { const primary = new CircuitOpenProvider("embeddinggemma"); const fallback = new FixedProvider("embeddinggemma", FIXED_VEC); const wrapped = new AutoFallbackEmbeddingProvider({ primary, fallback, warn: () => { /* swallow noisy WARN in tests */ }, }); const results = await searchVec( store.db, "fallback test", "embeddinggemma", 10, undefined, undefined, undefined, wrapped, ); expect(primary.embedCalls).toBe(1); expect(fallback.embedCalls).toBe(1); expect(results.length).toBeGreaterThan(0); }); test("surfaces error when both primary AND fallback fail", async () => { const primary = new AlwaysFailProvider("embeddinggemma"); const fallback = new AlwaysFailProvider("embeddinggemma"); const wrapped = new AutoFallbackEmbeddingProvider({ primary, fallback, warn: () => { /* swallow */ }, }); await expect( searchVec( store.db, "doomed", "embeddinggemma", 10, undefined, undefined, undefined, wrapped, ), ).rejects.toThrow(/backend unreachable/); }); }); // ─────────────────────────── structuredSearch ─────────────────────────────── describe("structuredSearch with EmbeddingProvider", () => { test("uses provider.embedBatch for vec/hyde sub-queries", async () => { const provider = new FixedProvider("embeddinggemma", FIXED_VEC); // Deny access to the local llama-cpp — proves the provider path is exclusive. store.llm = new Proxy({}, { get(_target, prop) { throw new Error( `store.llm.${String(prop)} accessed when embedProvider was supplied — DoD violation`, ); }, }) as never; const queries: ExpandedQuery[] = [ { type: "vec", query: "what is the fallback chain about" }, { type: "hyde", query: "Fallback chains route around primary failure transparently." }, ]; const results = await structuredSearch(store, queries, { skipRerank: true, // reranker uses local llm — skip in this isolation test embedProvider: provider, }); // One batch call covering both vec/hyde queries. expect(provider.embedBatchCalls).toBe(1); expect(provider.lastEmbedTexts.length).toBe(2); expect(results.length).toBeGreaterThan(0); }); test("AutoFallback covers structuredSearch query batch", async () => { const primary = new CircuitOpenProvider("embeddinggemma"); const fallback = new FixedProvider("embeddinggemma", FIXED_VEC); const wrapped = new AutoFallbackEmbeddingProvider({ primary, fallback, warn: () => { /* swallow */ }, }); const queries: ExpandedQuery[] = [ { type: "vec", query: "fallback test" }, ]; const results = await structuredSearch(store, queries, { skipRerank: true, embedProvider: wrapped, }); expect(primary.embedBatchCalls).toBe(1); expect(fallback.embedBatchCalls).toBe(1); expect(results.length).toBeGreaterThan(0); }); test("structuredSearch degrades to empty results when both providers fail (batch path)", async () => { // AutoFallback.embedBatch is contract-bound to return nulls on total // failure (graceful degradation in batch mode — see autofallback.ts // onTotalFail). structuredSearch then has no embeddings to query // sqlite-vec with and returns []. This is the documented behavior; // searchVec (single-embed path) is the one that surfaces a thrown // error to the caller, see the test above. const primary = new AlwaysFailProvider("embeddinggemma"); const fallback = new AlwaysFailProvider("embeddinggemma"); const wrapped = new AutoFallbackEmbeddingProvider({ primary, fallback, warn: () => { /* swallow */ }, }); const queries: ExpandedQuery[] = [ { type: "vec", query: "doomed" }, ]; const results = await structuredSearch(store, queries, { skipRerank: true, embedProvider: wrapped, }); expect(results).toEqual([]); }); }); // ─────────────────────────── vectorSearchQuery ────────────────────────────── describe("vectorSearchQuery with EmbeddingProvider", () => { test("encodes original query via provider, no local llm access", async () => { const provider = new FixedProvider("embeddinggemma", FIXED_VEC); // Stub expandQuery to return no expansions — this isolates the // embedding path from the LLM-driven query expansion path. store.expandQuery = async () => []; store.llm = new Proxy({}, { get(_target, prop) { throw new Error( `store.llm.${String(prop)} accessed when embedProvider was supplied — DoD violation`, ); }, }) as never; const results = await vectorSearchQuery(store, "vector search test", { limit: 5, minScore: 0, embedProvider: provider, }); // vectorSearchQuery sequentializes — at minimum the original query // triggers one embed call via the provider. expect(provider.embedCalls).toBeGreaterThanOrEqual(1); expect(results.length).toBeGreaterThan(0); }); test("AutoFallback rescues vectorSearchQuery from primary failure", async () => { const primary = new CircuitOpenProvider("embeddinggemma"); const fallback = new FixedProvider("embeddinggemma", FIXED_VEC); const wrapped = new AutoFallbackEmbeddingProvider({ primary, fallback, warn: () => { /* swallow */ }, }); store.expandQuery = async () => []; const results = await vectorSearchQuery(store, "fallback path", { minScore: 0, embedProvider: wrapped, }); expect(primary.embedCalls).toBeGreaterThanOrEqual(1); expect(fallback.embedCalls).toBeGreaterThanOrEqual(1); expect(results.length).toBeGreaterThan(0); }); }); // ─────────────────────────── Backward compat ──────────────────────────────── describe("backward compat — no provider supplied", () => { test("searchVec without provider uses precomputed embedding path (no llm needed)", async () => { // When the caller passes `precomputedEmbedding`, searchVec must not // touch any embedding backend at all — neither local nor provider. // This is the cheapest backward-compat smoke test we can run without // loading node-llama-cpp. store.llm = new Proxy({}, { get(_target, prop) { throw new Error(`store.llm.${String(prop)} accessed unexpectedly`); }, }) as never; const results = await searchVec( store.db, "hello", "embeddinggemma", 10, undefined, undefined, FIXED_VEC, // precomputedEmbedding ); expect(results.length).toBeGreaterThan(0); }); });