llm.test.ts 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. /**
  2. * llm.test.ts - Unit tests for the LLM abstraction layer (node-llama-cpp)
  3. *
  4. * Run with: bun test src/llm.test.ts
  5. *
  6. * These tests require the actual models to be downloaded. Run the embed or
  7. * rerank functions first to trigger model downloads.
  8. */
  9. import { describe, test, expect, beforeAll, afterAll } from "bun:test";
  10. import {
  11. LlamaCpp,
  12. getDefaultLlamaCpp,
  13. setDefaultLlamaCpp,
  14. type RerankDocument,
  15. } from "./llm.js";
  16. // =============================================================================
  17. // Singleton Tests (no model loading required)
  18. // =============================================================================
  19. describe("Default LlamaCpp Singleton", () => {
  20. // Don't dispose - let process exit handle Metal cleanup naturally
  21. test("getDefaultLlamaCpp creates instance on first call", () => {
  22. setDefaultLlamaCpp(null);
  23. const llm = getDefaultLlamaCpp();
  24. expect(llm).toBeInstanceOf(LlamaCpp);
  25. });
  26. test("getDefaultLlamaCpp returns same instance on subsequent calls", () => {
  27. setDefaultLlamaCpp(null);
  28. const llm1 = getDefaultLlamaCpp();
  29. const llm2 = getDefaultLlamaCpp();
  30. expect(llm1).toBe(llm2);
  31. });
  32. test("setDefaultLlamaCpp allows replacing the singleton", () => {
  33. const custom = new LlamaCpp({ embedModel: "custom-model" });
  34. setDefaultLlamaCpp(custom);
  35. const result = getDefaultLlamaCpp();
  36. expect(result).toBe(custom);
  37. });
  38. test("setDefaultLlamaCpp with null resets singleton", () => {
  39. const original = getDefaultLlamaCpp();
  40. setDefaultLlamaCpp(null);
  41. const newInstance = getDefaultLlamaCpp();
  42. expect(newInstance).not.toBe(original);
  43. });
  44. });
  45. // =============================================================================
  46. // Model Existence Tests
  47. // =============================================================================
  48. describe("LlamaCpp.modelExists", () => {
  49. test("returns exists:true for HuggingFace model URIs", async () => {
  50. const llm = getDefaultLlamaCpp();
  51. const result = await llm.modelExists("hf:org/repo/model.gguf");
  52. expect(result.exists).toBe(true);
  53. expect(result.name).toBe("hf:org/repo/model.gguf");
  54. });
  55. test("returns exists:false for non-existent local paths", async () => {
  56. const llm = getDefaultLlamaCpp();
  57. const result = await llm.modelExists("/nonexistent/path/model.gguf");
  58. expect(result.exists).toBe(false);
  59. expect(result.name).toBe("/nonexistent/path/model.gguf");
  60. });
  61. });
  62. // =============================================================================
  63. // Integration Tests (require actual models)
  64. // =============================================================================
  65. describe("LlamaCpp Integration", () => {
  66. // Use the singleton to avoid multiple Metal contexts
  67. const llm = getDefaultLlamaCpp();
  68. describe("embed", () => {
  69. test("returns embedding with correct dimensions", async () => {
  70. const result = await llm.embed("Hello world");
  71. expect(result).not.toBeNull();
  72. expect(result!.embedding).toBeInstanceOf(Array);
  73. expect(result!.embedding.length).toBeGreaterThan(0);
  74. // embeddinggemma outputs 768 dimensions
  75. expect(result!.embedding.length).toBe(768);
  76. });
  77. test("returns consistent embeddings for same input", async () => {
  78. const result1 = await llm.embed("test text");
  79. const result2 = await llm.embed("test text");
  80. expect(result1).not.toBeNull();
  81. expect(result2).not.toBeNull();
  82. // Embeddings should be identical for the same input
  83. for (let i = 0; i < result1!.embedding.length; i++) {
  84. expect(result1!.embedding[i]).toBeCloseTo(result2!.embedding[i], 5);
  85. }
  86. });
  87. test("returns different embeddings for different inputs", async () => {
  88. const result1 = await llm.embed("cats are great");
  89. const result2 = await llm.embed("database optimization");
  90. expect(result1).not.toBeNull();
  91. expect(result2).not.toBeNull();
  92. // Calculate cosine similarity - should be less than 1.0 (not identical)
  93. let dotProduct = 0;
  94. let norm1 = 0;
  95. let norm2 = 0;
  96. for (let i = 0; i < result1!.embedding.length; i++) {
  97. dotProduct += result1!.embedding[i] * result2!.embedding[i];
  98. norm1 += result1!.embedding[i] ** 2;
  99. norm2 += result2!.embedding[i] ** 2;
  100. }
  101. const similarity = dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
  102. expect(similarity).toBeLessThan(0.95); // Should be meaningfully different
  103. });
  104. });
  105. describe("embedBatch", () => {
  106. test("returns embeddings for multiple texts", async () => {
  107. const texts = ["Hello world", "Test text", "Another document"];
  108. const results = await llm.embedBatch(texts);
  109. expect(results).toHaveLength(3);
  110. for (const result of results) {
  111. expect(result).not.toBeNull();
  112. expect(result!.embedding.length).toBe(768);
  113. }
  114. });
  115. test("returns same results as individual embed calls", async () => {
  116. const texts = ["cats are great", "dogs are awesome"];
  117. // Get batch embeddings
  118. const batchResults = await llm.embedBatch(texts);
  119. // Get individual embeddings
  120. const individualResults = await Promise.all(texts.map(t => llm.embed(t)));
  121. // Compare - should be identical
  122. for (let i = 0; i < texts.length; i++) {
  123. expect(batchResults[i]).not.toBeNull();
  124. expect(individualResults[i]).not.toBeNull();
  125. for (let j = 0; j < batchResults[i]!.embedding.length; j++) {
  126. expect(batchResults[i]!.embedding[j]).toBeCloseTo(individualResults[i]!.embedding[j], 5);
  127. }
  128. }
  129. });
  130. test("handles empty array", async () => {
  131. const results = await llm.embedBatch([]);
  132. expect(results).toHaveLength(0);
  133. });
  134. test("batch is faster than sequential", async () => {
  135. const texts = Array(10).fill(null).map((_, i) => `Document number ${i} with content`);
  136. // Time batch
  137. const batchStart = Date.now();
  138. await llm.embedBatch(texts);
  139. const batchTime = Date.now() - batchStart;
  140. // Time sequential
  141. const seqStart = Date.now();
  142. for (const text of texts) {
  143. await llm.embed(text);
  144. }
  145. const seqTime = Date.now() - seqStart;
  146. console.log(`Batch: ${batchTime}ms, Sequential: ${seqTime}ms`);
  147. // Batch should be faster (or at least not much slower)
  148. // Allow some variance since first call may load the model
  149. expect(batchTime).toBeLessThan(seqTime * 1.5);
  150. });
  151. });
  152. describe("rerank", () => {
  153. test("scores capital of France question correctly", async () => {
  154. const query = "What is the capital of France?";
  155. const documents: RerankDocument[] = [
  156. { file: "butterflies.txt", text: "Butterflies indeed fly through the garden." },
  157. { file: "france.txt", text: "The capital of France is Paris." },
  158. { file: "canada.txt", text: "The capital of Canada is Ottawa." },
  159. ];
  160. const result = await llm.rerank(query, documents);
  161. expect(result.results).toHaveLength(3);
  162. // The France document should score highest
  163. expect(result.results[0].file).toBe("france.txt");
  164. expect(result.results[0].score).toBeGreaterThan(0.7);
  165. // Canada should be somewhat relevant (also about capitals)
  166. expect(result.results[1].file).toBe("canada.txt");
  167. // Butterflies should score lowest
  168. expect(result.results[2].file).toBe("butterflies.txt");
  169. expect(result.results[2].score).toBeLessThan(0.6);
  170. });
  171. test("scores authentication query correctly", async () => {
  172. const query = "How do I configure authentication?";
  173. const documents: RerankDocument[] = [
  174. { file: "weather.md", text: "The weather today is sunny with mild temperatures." },
  175. { file: "auth.md", text: "Authentication can be configured by setting the AUTH_SECRET environment variable." },
  176. { file: "pizza.md", text: "Our restaurant serves the best pizza in town." },
  177. { file: "jwt.md", text: "JWT authentication requires a secret key and expiration time." },
  178. ];
  179. const result = await llm.rerank(query, documents);
  180. expect(result.results).toHaveLength(4);
  181. // Auth documents should score highest
  182. const topTwo = result.results.slice(0, 2).map((r) => r.file);
  183. expect(topTwo).toContain("auth.md");
  184. expect(topTwo).toContain("jwt.md");
  185. // Irrelevant documents should score lowest
  186. const bottomTwo = result.results.slice(2).map((r) => r.file);
  187. expect(bottomTwo).toContain("weather.md");
  188. expect(bottomTwo).toContain("pizza.md");
  189. });
  190. test("handles programming queries correctly", async () => {
  191. const query = "How do I handle errors in JavaScript?";
  192. const documents: RerankDocument[] = [
  193. { file: "cooking.md", text: "To make a good pasta, boil water and add salt." },
  194. { file: "errors.md", text: "Use try-catch blocks to handle JavaScript errors gracefully." },
  195. { file: "python.md", text: "Python uses try-except for exception handling." },
  196. ];
  197. const result = await llm.rerank(query, documents);
  198. // JavaScript errors doc should score highest
  199. expect(result.results[0].file).toBe("errors.md");
  200. expect(result.results[0].score).toBeGreaterThan(0.7);
  201. // Python doc might be somewhat relevant (same concept, different language)
  202. // Cooking should be least relevant
  203. expect(result.results[2].file).toBe("cooking.md");
  204. });
  205. test("handles empty document list", async () => {
  206. const result = await llm.rerank("test query", []);
  207. expect(result.results).toHaveLength(0);
  208. });
  209. test("handles single document", async () => {
  210. const result = await llm.rerank("test", [{ file: "doc.md", text: "content" }]);
  211. expect(result.results).toHaveLength(1);
  212. expect(result.results[0].file).toBe("doc.md");
  213. });
  214. test("preserves original file paths", async () => {
  215. const documents: RerankDocument[] = [
  216. { file: "path/to/doc1.md", text: "content one" },
  217. { file: "another/path/doc2.md", text: "content two" },
  218. ];
  219. const result = await llm.rerank("query", documents);
  220. const files = result.results.map((r) => r.file).sort();
  221. expect(files).toEqual(["another/path/doc2.md", "path/to/doc1.md"]);
  222. });
  223. test("returns scores between 0 and 1", async () => {
  224. const documents: RerankDocument[] = [
  225. { file: "a.md", text: "The quick brown fox jumps over the lazy dog." },
  226. { file: "b.md", text: "Machine learning algorithms process data efficiently." },
  227. { file: "c.md", text: "React components use JSX syntax for rendering." },
  228. ];
  229. const result = await llm.rerank("Tell me about animals", documents);
  230. for (const doc of result.results) {
  231. expect(doc.score).toBeGreaterThanOrEqual(0);
  232. expect(doc.score).toBeLessThanOrEqual(1);
  233. }
  234. });
  235. test("batch reranks multiple documents efficiently", async () => {
  236. // Create 10 documents to verify batch processing works
  237. const documents: RerankDocument[] = Array(10)
  238. .fill(null)
  239. .map((_, i) => ({
  240. file: `doc${i}.md`,
  241. text: `Document number ${i} with some content about topic ${i % 3}`,
  242. }));
  243. const start = Date.now();
  244. const result = await llm.rerank("topic 1", documents);
  245. const elapsed = Date.now() - start;
  246. expect(result.results).toHaveLength(10);
  247. // Verify all documents are returned with valid scores
  248. for (const doc of result.results) {
  249. expect(doc.score).toBeGreaterThanOrEqual(0);
  250. expect(doc.score).toBeLessThanOrEqual(1);
  251. }
  252. // Log timing for monitoring batch performance
  253. console.log(`Batch rerank of 10 docs took ${elapsed}ms`);
  254. });
  255. });
  256. describe("expandQuery", () => {
  257. test("returns at least the original query", async () => {
  258. const result = await llm.expandQuery("test query");
  259. expect(result).toContain("test query");
  260. expect(result.length).toBeGreaterThanOrEqual(1);
  261. }, 30000); // 30s timeout for model loading
  262. test("returns original query first", async () => {
  263. const result = await llm.expandQuery("authentication setup");
  264. expect(result[0]).toBe("authentication setup");
  265. });
  266. });
  267. });