| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564 |
- /**
- * llm.test.ts - Unit tests for the LLM abstraction layer (node-llama-cpp)
- *
- * Run with: bun test src/llm.test.ts
- *
- * These tests require the actual models to be downloaded. Run the embed or
- * rerank functions first to trigger model downloads.
- */
- import { describe, test, expect, beforeAll, afterAll } from "vitest";
- import {
- LlamaCpp,
- getDefaultLlamaCpp,
- disposeDefaultLlamaCpp,
- withLLMSession,
- canUnloadLLM,
- SessionReleasedError,
- type RerankDocument,
- type ILLMSession,
- } from "../src/llm.js";
- // =============================================================================
- // Singleton Tests (no model loading required)
- // =============================================================================
- describe("Default LlamaCpp Singleton", () => {
- // Test singleton behavior without resetting to avoid orphan instances
- test("getDefaultLlamaCpp returns same instance on subsequent calls", () => {
- const llm1 = getDefaultLlamaCpp();
- const llm2 = getDefaultLlamaCpp();
- expect(llm1).toBe(llm2);
- expect(llm1).toBeInstanceOf(LlamaCpp);
- });
- });
- // =============================================================================
- // Model Existence Tests
- // =============================================================================
- describe("LlamaCpp.modelExists", () => {
- test("returns exists:true for HuggingFace model URIs", async () => {
- const llm = getDefaultLlamaCpp();
- const result = await llm.modelExists("hf:org/repo/model.gguf");
- expect(result.exists).toBe(true);
- expect(result.name).toBe("hf:org/repo/model.gguf");
- });
- test("returns exists:false for non-existent local paths", async () => {
- const llm = getDefaultLlamaCpp();
- const result = await llm.modelExists("/nonexistent/path/model.gguf");
- expect(result.exists).toBe(false);
- expect(result.name).toBe("/nonexistent/path/model.gguf");
- });
- });
- // =============================================================================
- // Integration Tests (require actual models)
- // =============================================================================
- describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => {
- // Use the singleton to avoid multiple Metal contexts
- const llm = getDefaultLlamaCpp();
- afterAll(async () => {
- // Ensure native resources are released to avoid ggml-metal asserts on process exit.
- await disposeDefaultLlamaCpp();
- });
- describe("embed", () => {
- test("returns embedding with correct dimensions", async () => {
- const result = await llm.embed("Hello world");
- expect(result).not.toBeNull();
- expect(result!.embedding).toBeInstanceOf(Array);
- expect(result!.embedding.length).toBeGreaterThan(0);
- // embeddinggemma outputs 768 dimensions
- expect(result!.embedding.length).toBe(768);
- });
- test("returns consistent embeddings for same input", async () => {
- const result1 = await llm.embed("test text");
- const result2 = await llm.embed("test text");
- expect(result1).not.toBeNull();
- expect(result2).not.toBeNull();
- // Embeddings should be identical for the same input
- for (let i = 0; i < result1!.embedding.length; i++) {
- expect(result1!.embedding[i]).toBeCloseTo(result2!.embedding[i]!, 5);
- }
- });
- test("returns different embeddings for different inputs", async () => {
- const result1 = await llm.embed("cats are great");
- const result2 = await llm.embed("database optimization");
- expect(result1).not.toBeNull();
- expect(result2).not.toBeNull();
- // Calculate cosine similarity - should be less than 1.0 (not identical)
- let dotProduct = 0;
- let norm1 = 0;
- let norm2 = 0;
- for (let i = 0; i < result1!.embedding.length; i++) {
- const v1 = result1!.embedding[i]!;
- const v2 = result2!.embedding[i]!;
- dotProduct += v1 * v2;
- norm1 += v1 ** 2;
- norm2 += v2 ** 2;
- }
- const similarity = dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
- expect(similarity).toBeLessThan(0.95); // Should be meaningfully different
- });
- });
- describe("embedBatch", () => {
- test("returns embeddings for multiple texts", async () => {
- const texts = ["Hello world", "Test text", "Another document"];
- const results = await llm.embedBatch(texts);
- expect(results).toHaveLength(3);
- for (const result of results) {
- expect(result).not.toBeNull();
- expect(result!.embedding.length).toBe(768);
- }
- });
- test("returns same results as individual embed calls", async () => {
- const texts = ["cats are great", "dogs are awesome"];
- // Get batch embeddings
- const batchResults = await llm.embedBatch(texts);
- // Get individual embeddings
- const individualResults = await Promise.all(texts.map(t => llm.embed(t)));
- // Compare - should be identical
- for (let i = 0; i < texts.length; i++) {
- expect(batchResults[i]).not.toBeNull();
- expect(individualResults[i]).not.toBeNull();
- for (let j = 0; j < batchResults[i]!.embedding.length; j++) {
- expect(batchResults[i]!.embedding[j]).toBeCloseTo(individualResults[i]!.embedding[j]!, 5);
- }
- }
- });
- test("handles empty array", async () => {
- const results = await llm.embedBatch([]);
- expect(results).toHaveLength(0);
- });
- test("batch is faster than sequential", async () => {
- const texts = Array(10).fill(null).map((_, i) => `Document number ${i} with content`);
- // Time batch
- const batchStart = Date.now();
- await llm.embedBatch(texts);
- const batchTime = Date.now() - batchStart;
- // Time sequential
- const seqStart = Date.now();
- for (const text of texts) {
- await llm.embed(text);
- }
- const seqTime = Date.now() - seqStart;
- console.log(`Batch: ${batchTime}ms, Sequential: ${seqTime}ms`);
- // Performance is machine/load dependent. We only assert batch isn't drastically worse.
- expect(batchTime).toBeLessThanOrEqual(seqTime * 3);
- });
- test("handles concurrent embedBatch calls on fresh instance without race condition", async () => {
- // This test verifies the fix for a race condition where concurrent calls to
- // ensureEmbedContext() could create multiple contexts. Without the promise guard,
- // each concurrent embedBatch call sees embedContext === null and creates its own
- // context, causing resource leaks and potential "Context is disposed" errors.
- //
- // See: https://github.com/tobi/qmd/pull/54
- //
- // The fix uses a promise guard to ensure only one context creation runs at a time.
- // We verify this by instrumenting createEmbeddingContext to count invocations.
-
- const freshLlm = new LlamaCpp({});
- let contextCreateCount = 0;
-
- // Instrument the model's createEmbeddingContext to count calls
- const originalEnsureEmbedModel = (freshLlm as any).ensureEmbedModel.bind(freshLlm);
- let modelInstrumented = false;
- (freshLlm as any).ensureEmbedModel = async function() {
- const model = await originalEnsureEmbedModel();
- if (!modelInstrumented) {
- modelInstrumented = true;
- const originalCreate = model.createEmbeddingContext.bind(model);
- model.createEmbeddingContext = async function(...args: any[]) {
- contextCreateCount++;
- return originalCreate(...args);
- };
- }
- return model;
- };
-
- const texts = Array(10).fill(null).map((_, i) => `Document ${i}`);
- // Call embedBatch 5 TIMES in parallel on fresh instance.
- // Without the promise guard fix, this would create 5 contexts (one per call).
- // With the fix, only 1 context should be created.
- const batches = await Promise.all([
- freshLlm.embedBatch(texts.slice(0, 2)),
- freshLlm.embedBatch(texts.slice(2, 4)),
- freshLlm.embedBatch(texts.slice(4, 6)),
- freshLlm.embedBatch(texts.slice(6, 8)),
- freshLlm.embedBatch(texts.slice(8, 10)),
- ]);
- const allResults = batches.flat();
- expect(allResults).toHaveLength(10);
-
- const successCount = allResults.filter(r => r !== null).length;
- expect(successCount).toBe(10);
- // THE KEY ASSERTION: Contexts should be created once (by ensureEmbedContexts),
- // not duplicated per concurrent embedBatch call. The exact count depends on
- // available VRAM (computeParallelism), but should not be 5 (one per call).
- // Without the fix, contextCreateCount would be 5× the intended count (one set per concurrent call).
- // With the promise guard, contexts are created exactly once regardless of concurrent callers.
- // The count depends on VRAM (computeParallelism), but should be ≤ 8 (the cap).
- console.log(`Context creation count: ${contextCreateCount} (expected: ≤ 8, not 5× duplicated)`);
- expect(contextCreateCount).toBeGreaterThanOrEqual(1);
- expect(contextCreateCount).toBeLessThanOrEqual(8);
-
- await freshLlm.dispose();
- }, 60000);
- });
- describe("rerank", () => {
- test("scores capital of France question correctly", async () => {
- const query = "What is the capital of France?";
- const documents: RerankDocument[] = [
- { file: "butterflies.txt", text: "Butterflies indeed fly through the garden." },
- { file: "france.txt", text: "The capital of France is Paris." },
- { file: "canada.txt", text: "The capital of Canada is Ottawa." },
- ];
- const result = await llm.rerank(query, documents);
- expect(result.results).toHaveLength(3);
- // The France document should score highest
- expect(result.results[0]!.file).toBe("france.txt");
- expect(result.results[0]!.score).toBeGreaterThan(0.7);
- // Canada should be somewhat relevant (also about capitals)
- expect(result.results[1]!.file).toBe("canada.txt");
- // Butterflies should score lowest
- expect(result.results[2]!.file).toBe("butterflies.txt");
- expect(result.results[2]!.score).toBeLessThan(0.6);
- });
- test("scores authentication query correctly", async () => {
- const query = "How do I configure authentication?";
- const documents: RerankDocument[] = [
- { file: "weather.md", text: "The weather today is sunny with mild temperatures." },
- { file: "auth.md", text: "Authentication can be configured by setting the AUTH_SECRET environment variable." },
- { file: "pizza.md", text: "Our restaurant serves the best pizza in town." },
- { file: "jwt.md", text: "JWT authentication requires a secret key and expiration time." },
- ];
- const result = await llm.rerank(query, documents);
- expect(result.results).toHaveLength(4);
- // Auth documents should score highest
- const topTwo = result.results.slice(0, 2).map((r) => r.file);
- expect(topTwo).toContain("auth.md");
- expect(topTwo).toContain("jwt.md");
- // Irrelevant documents should score lowest
- const bottomTwo = result.results.slice(2).map((r) => r.file);
- expect(bottomTwo).toContain("weather.md");
- expect(bottomTwo).toContain("pizza.md");
- });
- test("handles programming queries correctly", async () => {
- const query = "How do I handle errors in JavaScript?";
- const documents: RerankDocument[] = [
- { file: "cooking.md", text: "To make a good pasta, boil water and add salt." },
- { file: "errors.md", text: "Use try-catch blocks to handle JavaScript errors gracefully." },
- { file: "python.md", text: "Python uses try-except for exception handling." },
- ];
- const result = await llm.rerank(query, documents);
- // JavaScript errors doc should score highest
- expect(result.results[0]!.file).toBe("errors.md");
- expect(result.results[0]!.score).toBeGreaterThan(0.7);
- // Python doc might be somewhat relevant (same concept, different language)
- // Cooking should be least relevant
- expect(result.results[2]!.file).toBe("cooking.md");
- });
- test("handles empty document list", async () => {
- const result = await llm.rerank("test query", []);
- expect(result.results).toHaveLength(0);
- });
- test("handles single document", async () => {
- const result = await llm.rerank("test", [{ file: "doc.md", text: "content" }]);
- expect(result.results).toHaveLength(1);
- expect(result.results[0]!.file).toBe("doc.md");
- });
- test("preserves original file paths", async () => {
- const documents: RerankDocument[] = [
- { file: "path/to/doc1.md", text: "content one" },
- { file: "another/path/doc2.md", text: "content two" },
- ];
- const result = await llm.rerank("query", documents);
- const files = result.results.map((r) => r.file).sort();
- expect(files).toEqual(["another/path/doc2.md", "path/to/doc1.md"]);
- });
- test("returns scores between 0 and 1", async () => {
- const documents: RerankDocument[] = [
- { file: "a.md", text: "The quick brown fox jumps over the lazy dog." },
- { file: "b.md", text: "Machine learning algorithms process data efficiently." },
- { file: "c.md", text: "React components use JSX syntax for rendering." },
- ];
- const result = await llm.rerank("Tell me about animals", documents);
- for (const doc of result.results) {
- expect(doc.score).toBeGreaterThanOrEqual(0);
- expect(doc.score).toBeLessThanOrEqual(1);
- }
- });
- test("batch reranks multiple documents efficiently", async () => {
- // Create 10 documents to verify batch processing works
- const documents: RerankDocument[] = Array(10)
- .fill(null)
- .map((_, i) => ({
- file: `doc${i}.md`,
- text: `Document number ${i} with some content about topic ${i % 3}`,
- }));
- const start = Date.now();
- const result = await llm.rerank("topic 1", documents);
- const elapsed = Date.now() - start;
- expect(result.results).toHaveLength(10);
- // Verify all documents are returned with valid scores
- for (const doc of result.results) {
- expect(doc.score).toBeGreaterThanOrEqual(0);
- expect(doc.score).toBeLessThanOrEqual(1);
- }
- // Log timing for monitoring batch performance
- console.log(`Batch rerank of 10 docs took ${elapsed}ms`);
- });
- });
- describe("expandQuery", () => {
- test("returns query expansions with correct types", async () => {
- const result = await llm.expandQuery("test query");
- // Result is Queryable[] containing lex, vec, and/or hyde entries
- expect(result.length).toBeGreaterThanOrEqual(1);
- // Each result should have a valid type
- for (const q of result) {
- expect(["lex", "vec", "hyde"]).toContain(q.type);
- expect(q.text.length).toBeGreaterThan(0);
- }
- }, 30000); // 30s timeout for model loading
- test("can exclude lexical queries", async () => {
- const result = await llm.expandQuery("authentication setup", { includeLexical: false });
- // Should not contain any 'lex' type entries
- const lexEntries = result.filter(q => q.type === "lex");
- expect(lexEntries).toHaveLength(0);
- });
- });
- });
- // =============================================================================
- // Session Management Tests
- // =============================================================================
- describe.skipIf(!!process.env.CI)("LLM Session Management", () => {
- describe("withLLMSession", () => {
- test("session provides access to LLM operations", async () => {
- const result = await withLLMSession(async (session) => {
- expect(session.isValid).toBe(true);
- const embedding = await session.embed("test text");
- expect(embedding).not.toBeNull();
- expect(embedding!.embedding.length).toBe(768);
- return "success";
- });
- expect(result).toBe("success");
- });
- test("session is invalid after release", async () => {
- let capturedSession: ILLMSession | null = null;
- await withLLMSession(async (session) => {
- capturedSession = session;
- expect(session.isValid).toBe(true);
- });
- // Session should be invalid after withLLMSession returns
- expect(capturedSession).not.toBeNull();
- expect(capturedSession!.isValid).toBe(false);
- });
- test("session prevents idle unload during operations", async () => {
- await withLLMSession(async (session) => {
- // While inside a session, canUnloadLLM should return false
- expect(canUnloadLLM()).toBe(false);
- // Perform an operation
- await session.embed("test");
- // Still should not be able to unload
- expect(canUnloadLLM()).toBe(false);
- });
- // After session ends, should be able to unload
- expect(canUnloadLLM()).toBe(true);
- });
- test("nested sessions increment ref count", async () => {
- await withLLMSession(async (outerSession) => {
- expect(canUnloadLLM()).toBe(false);
- await withLLMSession(async (innerSession) => {
- expect(canUnloadLLM()).toBe(false);
- expect(innerSession.isValid).toBe(true);
- expect(outerSession.isValid).toBe(true);
- });
- // Inner session released, but outer still active
- expect(canUnloadLLM()).toBe(false);
- expect(outerSession.isValid).toBe(true);
- });
- // All sessions released
- expect(canUnloadLLM()).toBe(true);
- });
- test("session embedBatch works correctly", async () => {
- await withLLMSession(async (session) => {
- const texts = ["Hello world", "Test text", "Another document"];
- const results = await session.embedBatch(texts);
- expect(results).toHaveLength(3);
- for (const result of results) {
- expect(result).not.toBeNull();
- expect(result!.embedding.length).toBe(768);
- }
- });
- });
- test("session rerank works correctly", async () => {
- await withLLMSession(async (session) => {
- const documents: RerankDocument[] = [
- { file: "a.txt", text: "The capital of France is Paris." },
- { file: "b.txt", text: "Dogs are great pets." },
- ];
- const result = await session.rerank("What is the capital of France?", documents);
- expect(result.results).toHaveLength(2);
- expect(result.results[0]!.file).toBe("a.txt");
- expect(result.results[0]!.score).toBeGreaterThan(result.results[1]!.score);
- });
- });
- test("max duration aborts session after timeout", async () => {
- let aborted = false;
- try {
- await withLLMSession(async (session) => {
- // Wait longer than max duration
- await new Promise(resolve => setTimeout(resolve, 150));
- // This operation should throw because session was aborted
- await session.embed("test");
- }, { maxDuration: 50 }); // 50ms max
- } catch (err) {
- if (err instanceof SessionReleasedError) {
- aborted = true;
- } else {
- throw err;
- }
- }
- expect(aborted).toBe(true);
- }, 5000);
- test("external abort signal propagates to session", async () => {
- const abortController = new AbortController();
- let sessionAborted = false;
- const promise = withLLMSession(async (session) => {
- // Wait a bit then check if aborted
- await new Promise(resolve => setTimeout(resolve, 100));
- if (!session.isValid) {
- sessionAborted = true;
- throw new SessionReleasedError("Session aborted");
- }
- return "should not reach";
- }, { signal: abortController.signal });
- // Abort after 20ms
- setTimeout(() => abortController.abort(), 20);
- try {
- await promise;
- } catch (err) {
- // Expected
- }
- expect(sessionAborted).toBe(true);
- }, 5000);
- test("session provides abort signal for monitoring", async () => {
- await withLLMSession(async (session) => {
- expect(session.signal).toBeInstanceOf(AbortSignal);
- expect(session.signal.aborted).toBe(false);
- });
- });
- test("returns value from callback", async () => {
- const result = await withLLMSession(async (session) => {
- await session.embed("test");
- return { status: "complete", count: 42 };
- });
- expect(result).toEqual({ status: "complete", count: 42 });
- });
- test("propagates errors from callback", async () => {
- const customError = new Error("Custom test error");
- await expect(
- withLLMSession(async () => {
- throw customError;
- })
- ).rejects.toThrow("Custom test error");
- });
- });
- });
|