llm.test.ts 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724
  1. /**
  2. * llm.test.ts - Unit tests for the LLM abstraction layer (node-llama-cpp)
  3. *
  4. * Run with: bun test src/llm.test.ts
  5. *
  6. * These tests require the actual models to be downloaded. Run the embed or
  7. * rerank functions first to trigger model downloads.
  8. */
  9. import { describe, test, expect, beforeAll, afterAll, vi } from "vitest";
  10. import {
  11. LlamaCpp,
  12. getDefaultLlamaCpp,
  13. disposeDefaultLlamaCpp,
  14. withLLMSession,
  15. canUnloadLLM,
  16. SessionReleasedError,
  17. type RerankDocument,
  18. type ILLMSession,
  19. } from "../src/llm.js";
  20. // =============================================================================
  21. // Singleton Tests (no model loading required)
  22. // =============================================================================
  23. describe("Default LlamaCpp Singleton", () => {
  24. // Test singleton behavior without resetting to avoid orphan instances
  25. test("getDefaultLlamaCpp returns same instance on subsequent calls", () => {
  26. const llm1 = getDefaultLlamaCpp();
  27. const llm2 = getDefaultLlamaCpp();
  28. expect(llm1).toBe(llm2);
  29. expect(llm1).toBeInstanceOf(LlamaCpp);
  30. });
  31. });
  32. // =============================================================================
  33. // Model Existence Tests
  34. // =============================================================================
  35. describe("LlamaCpp.modelExists", () => {
  36. test("returns exists:true for HuggingFace model URIs", async () => {
  37. const llm = getDefaultLlamaCpp();
  38. const result = await llm.modelExists("hf:org/repo/model.gguf");
  39. expect(result.exists).toBe(true);
  40. expect(result.name).toBe("hf:org/repo/model.gguf");
  41. });
  42. test("returns exists:false for non-existent local paths", async () => {
  43. const llm = getDefaultLlamaCpp();
  44. const result = await llm.modelExists("/nonexistent/path/model.gguf");
  45. expect(result.exists).toBe(false);
  46. expect(result.name).toBe("/nonexistent/path/model.gguf");
  47. });
  48. });
  49. describe("LlamaCpp expand context size config", () => {
  50. const defaultExpandContextSize = 2048;
  51. test("uses default expand context size when no config or env is set", () => {
  52. const prev = process.env.QMD_EXPAND_CONTEXT_SIZE;
  53. delete process.env.QMD_EXPAND_CONTEXT_SIZE;
  54. try {
  55. const llm = new LlamaCpp({}) as any;
  56. expect(llm.expandContextSize).toBe(defaultExpandContextSize);
  57. } finally {
  58. if (prev === undefined) delete process.env.QMD_EXPAND_CONTEXT_SIZE;
  59. else process.env.QMD_EXPAND_CONTEXT_SIZE = prev;
  60. }
  61. });
  62. test("uses QMD_EXPAND_CONTEXT_SIZE when set to a positive integer", () => {
  63. const prev = process.env.QMD_EXPAND_CONTEXT_SIZE;
  64. process.env.QMD_EXPAND_CONTEXT_SIZE = "3072";
  65. try {
  66. const llm = new LlamaCpp({}) as any;
  67. expect(llm.expandContextSize).toBe(3072);
  68. } finally {
  69. if (prev === undefined) delete process.env.QMD_EXPAND_CONTEXT_SIZE;
  70. else process.env.QMD_EXPAND_CONTEXT_SIZE = prev;
  71. }
  72. });
  73. test("config value overrides QMD_EXPAND_CONTEXT_SIZE", () => {
  74. const prev = process.env.QMD_EXPAND_CONTEXT_SIZE;
  75. process.env.QMD_EXPAND_CONTEXT_SIZE = "4096";
  76. try {
  77. const llm = new LlamaCpp({ expandContextSize: 1536 }) as any;
  78. expect(llm.expandContextSize).toBe(1536);
  79. } finally {
  80. if (prev === undefined) delete process.env.QMD_EXPAND_CONTEXT_SIZE;
  81. else process.env.QMD_EXPAND_CONTEXT_SIZE = prev;
  82. }
  83. });
  84. test("falls back to default and warns when QMD_EXPAND_CONTEXT_SIZE is invalid", () => {
  85. const prev = process.env.QMD_EXPAND_CONTEXT_SIZE;
  86. process.env.QMD_EXPAND_CONTEXT_SIZE = "bad";
  87. const stderrSpy = vi.spyOn(process.stderr, "write").mockReturnValue(true);
  88. try {
  89. const llm = new LlamaCpp({}) as any;
  90. expect(llm.expandContextSize).toBe(defaultExpandContextSize);
  91. expect(stderrSpy).toHaveBeenCalled();
  92. expect(String(stderrSpy.mock.calls[0]?.[0] || "")).toContain("QMD_EXPAND_CONTEXT_SIZE");
  93. } finally {
  94. stderrSpy.mockRestore();
  95. if (prev === undefined) delete process.env.QMD_EXPAND_CONTEXT_SIZE;
  96. else process.env.QMD_EXPAND_CONTEXT_SIZE = prev;
  97. }
  98. });
  99. test("throws when config expandContextSize is invalid", () => {
  100. expect(() => new LlamaCpp({ expandContextSize: 0 })).toThrow(
  101. "Invalid expandContextSize: 0. Must be a positive integer."
  102. );
  103. });
  104. });
  105. describe("LlamaCpp rerank deduping", () => {
  106. test("deduplicates identical document texts before scoring", async () => {
  107. const llm = new LlamaCpp({}) as any;
  108. llm._ciMode = false; // allow unit test even in CI (mocked, no real models)
  109. const rankAll = vi.fn(async (_query: string, docs: string[]) =>
  110. docs.map((doc) => doc === "shared chunk" ? 0.9 : 0.2)
  111. );
  112. llm.touchActivity = vi.fn();
  113. llm.ensureRerankContexts = vi.fn().mockResolvedValue([{ rankAll }]);
  114. llm.ensureRerankModel = vi.fn().mockResolvedValue({
  115. tokenize: (text: string) => Array.from(text),
  116. detokenize: (tokens: string[]) => tokens.join(""),
  117. });
  118. const result = await llm.rerank("query", [
  119. { file: "a.md", text: "shared chunk" },
  120. { file: "b.md", text: "shared chunk" },
  121. { file: "c.md", text: "different chunk" },
  122. ]);
  123. expect(rankAll).toHaveBeenCalledTimes(1);
  124. expect(rankAll).toHaveBeenCalledWith("query", ["shared chunk", "different chunk"]);
  125. expect(result.results).toHaveLength(3);
  126. const scoreByFile = new Map(result.results.map((item) => [item.file, item.score]));
  127. expect(scoreByFile.get("a.md")).toBe(0.9);
  128. expect(scoreByFile.get("b.md")).toBe(0.9);
  129. expect(scoreByFile.get("c.md")).toBe(0.2);
  130. });
  131. });
  132. // =============================================================================
  133. // Integration Tests (require actual models)
  134. // =============================================================================
  135. describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => {
  136. // Use the singleton to avoid multiple Metal contexts
  137. const llm = getDefaultLlamaCpp();
  138. afterAll(async () => {
  139. // Ensure native resources are released to avoid ggml-metal asserts on process exit.
  140. await disposeDefaultLlamaCpp();
  141. });
  142. describe("embed", () => {
  143. test("returns embedding with correct dimensions", async () => {
  144. const result = await llm.embed("Hello world");
  145. expect(result).not.toBeNull();
  146. expect(result!.embedding).toBeInstanceOf(Array);
  147. expect(result!.embedding.length).toBeGreaterThan(0);
  148. // embeddinggemma outputs 768 dimensions
  149. expect(result!.embedding.length).toBe(768);
  150. });
  151. test("returns consistent embeddings for same input", async () => {
  152. const result1 = await llm.embed("test text");
  153. const result2 = await llm.embed("test text");
  154. expect(result1).not.toBeNull();
  155. expect(result2).not.toBeNull();
  156. // Embeddings should be identical for the same input
  157. for (let i = 0; i < result1!.embedding.length; i++) {
  158. expect(result1!.embedding[i]).toBeCloseTo(result2!.embedding[i]!, 5);
  159. }
  160. });
  161. test("returns different embeddings for different inputs", async () => {
  162. const result1 = await llm.embed("cats are great");
  163. const result2 = await llm.embed("database optimization");
  164. expect(result1).not.toBeNull();
  165. expect(result2).not.toBeNull();
  166. // Calculate cosine similarity - should be less than 1.0 (not identical)
  167. let dotProduct = 0;
  168. let norm1 = 0;
  169. let norm2 = 0;
  170. for (let i = 0; i < result1!.embedding.length; i++) {
  171. const v1 = result1!.embedding[i]!;
  172. const v2 = result2!.embedding[i]!;
  173. dotProduct += v1 * v2;
  174. norm1 += v1 ** 2;
  175. norm2 += v2 ** 2;
  176. }
  177. const similarity = dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
  178. expect(similarity).toBeLessThan(0.95); // Should be meaningfully different
  179. });
  180. });
  181. describe("embedBatch", () => {
  182. test("returns embeddings for multiple texts", async () => {
  183. const texts = ["Hello world", "Test text", "Another document"];
  184. const results = await llm.embedBatch(texts);
  185. expect(results).toHaveLength(3);
  186. for (const result of results) {
  187. expect(result).not.toBeNull();
  188. expect(result!.embedding.length).toBe(768);
  189. }
  190. });
  191. test("returns same results as individual embed calls", async () => {
  192. const texts = ["cats are great", "dogs are awesome"];
  193. // Get batch embeddings
  194. const batchResults = await llm.embedBatch(texts);
  195. // Get individual embeddings
  196. const individualResults = await Promise.all(texts.map(t => llm.embed(t)));
  197. // Compare - should be identical
  198. for (let i = 0; i < texts.length; i++) {
  199. expect(batchResults[i]).not.toBeNull();
  200. expect(individualResults[i]).not.toBeNull();
  201. for (let j = 0; j < batchResults[i]!.embedding.length; j++) {
  202. expect(batchResults[i]!.embedding[j]).toBeCloseTo(individualResults[i]!.embedding[j]!, 5);
  203. }
  204. }
  205. });
  206. test("handles empty array", async () => {
  207. const results = await llm.embedBatch([]);
  208. expect(results).toHaveLength(0);
  209. });
  210. test("batch is faster than sequential", async () => {
  211. const texts = Array(10).fill(null).map((_, i) => `Document number ${i} with content`);
  212. // Time batch
  213. const batchStart = Date.now();
  214. await llm.embedBatch(texts);
  215. const batchTime = Date.now() - batchStart;
  216. // Time sequential
  217. const seqStart = Date.now();
  218. for (const text of texts) {
  219. await llm.embed(text);
  220. }
  221. const seqTime = Date.now() - seqStart;
  222. console.log(`Batch: ${batchTime}ms, Sequential: ${seqTime}ms`);
  223. // Performance is machine/load dependent. We only assert batch isn't drastically worse.
  224. expect(batchTime).toBeLessThanOrEqual(seqTime * 3);
  225. });
  226. test("handles concurrent embedBatch calls on fresh instance without race condition", async () => {
  227. // This test verifies the fix for a race condition where concurrent calls to
  228. // ensureEmbedContext() could create multiple contexts. Without the promise guard,
  229. // each concurrent embedBatch call sees embedContext === null and creates its own
  230. // context, causing resource leaks and potential "Context is disposed" errors.
  231. //
  232. // See: https://github.com/tobi/qmd/pull/54
  233. //
  234. // The fix uses a promise guard to ensure only one context creation runs at a time.
  235. // We verify this by instrumenting createEmbeddingContext to count invocations.
  236. const freshLlm = new LlamaCpp({});
  237. let contextCreateCount = 0;
  238. // Instrument the model's createEmbeddingContext to count calls
  239. const originalEnsureEmbedModel = (freshLlm as any).ensureEmbedModel.bind(freshLlm);
  240. let modelInstrumented = false;
  241. (freshLlm as any).ensureEmbedModel = async function() {
  242. const model = await originalEnsureEmbedModel();
  243. if (!modelInstrumented) {
  244. modelInstrumented = true;
  245. const originalCreate = model.createEmbeddingContext.bind(model);
  246. model.createEmbeddingContext = async function(...args: any[]) {
  247. contextCreateCount++;
  248. return originalCreate(...args);
  249. };
  250. }
  251. return model;
  252. };
  253. const texts = Array(10).fill(null).map((_, i) => `Document ${i}`);
  254. // Call embedBatch 5 TIMES in parallel on fresh instance.
  255. // Without the promise guard fix, this would create 5 contexts (one per call).
  256. // With the fix, only 1 context should be created.
  257. const batches = await Promise.all([
  258. freshLlm.embedBatch(texts.slice(0, 2)),
  259. freshLlm.embedBatch(texts.slice(2, 4)),
  260. freshLlm.embedBatch(texts.slice(4, 6)),
  261. freshLlm.embedBatch(texts.slice(6, 8)),
  262. freshLlm.embedBatch(texts.slice(8, 10)),
  263. ]);
  264. const allResults = batches.flat();
  265. expect(allResults).toHaveLength(10);
  266. const successCount = allResults.filter(r => r !== null).length;
  267. expect(successCount).toBe(10);
  268. // THE KEY ASSERTION: Contexts should be created once (by ensureEmbedContexts),
  269. // not duplicated per concurrent embedBatch call. The exact count depends on
  270. // available VRAM (computeParallelism), but should not be 5 (one per call).
  271. // Without the fix, contextCreateCount would be 5× the intended count (one set per concurrent call).
  272. // With the promise guard, contexts are created exactly once regardless of concurrent callers.
  273. // The count depends on VRAM (computeParallelism), but should be ≤ 8 (the cap).
  274. console.log(`Context creation count: ${contextCreateCount} (expected: ≤ 8, not 5× duplicated)`);
  275. expect(contextCreateCount).toBeGreaterThanOrEqual(1);
  276. expect(contextCreateCount).toBeLessThanOrEqual(8);
  277. await freshLlm.dispose();
  278. }, 60000);
  279. });
  280. describe("rerank", () => {
  281. test("scores capital of France question correctly", async () => {
  282. const query = "What is the capital of France?";
  283. const documents: RerankDocument[] = [
  284. { file: "butterflies.txt", text: "Butterflies indeed fly through the garden." },
  285. { file: "france.txt", text: "The capital of France is Paris." },
  286. { file: "canada.txt", text: "The capital of Canada is Ottawa." },
  287. ];
  288. const result = await llm.rerank(query, documents);
  289. expect(result.results).toHaveLength(3);
  290. // The France document should score highest
  291. expect(result.results[0]!.file).toBe("france.txt");
  292. expect(result.results[0]!.score).toBeGreaterThan(0.7);
  293. // Canada should be somewhat relevant (also about capitals)
  294. expect(result.results[1]!.file).toBe("canada.txt");
  295. // Butterflies should score lowest
  296. expect(result.results[2]!.file).toBe("butterflies.txt");
  297. expect(result.results[2]!.score).toBeLessThan(0.6);
  298. });
  299. test("scores authentication query correctly", async () => {
  300. const query = "How do I configure authentication?";
  301. const documents: RerankDocument[] = [
  302. { file: "weather.md", text: "The weather today is sunny with mild temperatures." },
  303. { file: "auth.md", text: "Authentication can be configured by setting the AUTH_SECRET environment variable." },
  304. { file: "pizza.md", text: "Our restaurant serves the best pizza in town." },
  305. { file: "jwt.md", text: "JWT authentication requires a secret key and expiration time." },
  306. ];
  307. const result = await llm.rerank(query, documents);
  308. expect(result.results).toHaveLength(4);
  309. // Auth documents should score highest
  310. const topTwo = result.results.slice(0, 2).map((r) => r.file);
  311. expect(topTwo).toContain("auth.md");
  312. expect(topTwo).toContain("jwt.md");
  313. // Irrelevant documents should score lowest
  314. const bottomTwo = result.results.slice(2).map((r) => r.file);
  315. expect(bottomTwo).toContain("weather.md");
  316. expect(bottomTwo).toContain("pizza.md");
  317. });
  318. test("handles programming queries correctly", async () => {
  319. const query = "How do I handle errors in JavaScript?";
  320. const documents: RerankDocument[] = [
  321. { file: "cooking.md", text: "To make a good pasta, boil water and add salt." },
  322. { file: "errors.md", text: "Use try-catch blocks to handle JavaScript errors gracefully." },
  323. { file: "python.md", text: "Python uses try-except for exception handling." },
  324. ];
  325. const result = await llm.rerank(query, documents);
  326. // JavaScript errors doc should score highest
  327. expect(result.results[0]!.file).toBe("errors.md");
  328. expect(result.results[0]!.score).toBeGreaterThan(0.7);
  329. // Python doc might be somewhat relevant (same concept, different language)
  330. // Cooking should be least relevant
  331. expect(result.results[2]!.file).toBe("cooking.md");
  332. });
  333. test("handles empty document list", async () => {
  334. const result = await llm.rerank("test query", []);
  335. expect(result.results).toHaveLength(0);
  336. });
  337. test("handles single document", async () => {
  338. const result = await llm.rerank("test", [{ file: "doc.md", text: "content" }]);
  339. expect(result.results).toHaveLength(1);
  340. expect(result.results[0]!.file).toBe("doc.md");
  341. });
  342. test("preserves original file paths", async () => {
  343. const documents: RerankDocument[] = [
  344. { file: "path/to/doc1.md", text: "content one" },
  345. { file: "another/path/doc2.md", text: "content two" },
  346. ];
  347. const result = await llm.rerank("query", documents);
  348. const files = result.results.map((r) => r.file).sort();
  349. expect(files).toEqual(["another/path/doc2.md", "path/to/doc1.md"]);
  350. });
  351. test("returns scores between 0 and 1", async () => {
  352. const documents: RerankDocument[] = [
  353. { file: "a.md", text: "The quick brown fox jumps over the lazy dog." },
  354. { file: "b.md", text: "Machine learning algorithms process data efficiently." },
  355. { file: "c.md", text: "React components use JSX syntax for rendering." },
  356. ];
  357. const result = await llm.rerank("Tell me about animals", documents);
  358. for (const doc of result.results) {
  359. expect(doc.score).toBeGreaterThanOrEqual(0);
  360. expect(doc.score).toBeLessThanOrEqual(1);
  361. }
  362. });
  363. test("batch reranks multiple documents efficiently", async () => {
  364. // Create 10 documents to verify batch processing works
  365. const documents: RerankDocument[] = Array(10)
  366. .fill(null)
  367. .map((_, i) => ({
  368. file: `doc${i}.md`,
  369. text: `Document number ${i} with some content about topic ${i % 3}`,
  370. }));
  371. const start = Date.now();
  372. const result = await llm.rerank("topic 1", documents);
  373. const elapsed = Date.now() - start;
  374. expect(result.results).toHaveLength(10);
  375. // Verify all documents are returned with valid scores
  376. for (const doc of result.results) {
  377. expect(doc.score).toBeGreaterThanOrEqual(0);
  378. expect(doc.score).toBeLessThanOrEqual(1);
  379. }
  380. // Log timing for monitoring batch performance
  381. console.log(`Batch rerank of 10 docs took ${elapsed}ms`);
  382. });
  383. test("uses fewer active rerank contexts for small batches", async () => {
  384. const freshLlm = new LlamaCpp({});
  385. const calls: number[] = [];
  386. const fakeModel = {
  387. tokenize: (text: string) => Array.from(text),
  388. detokenize: (tokens: string[]) => tokens.join(""),
  389. };
  390. const fakeContexts = Array.from({ length: 4 }, (_, idx) => ({
  391. rankAll: async (_query: string, docs: string[]) => {
  392. calls.push(idx);
  393. return docs.map(() => 0.5);
  394. },
  395. }));
  396. (freshLlm as any).ensureRerankModel = async () => fakeModel;
  397. (freshLlm as any).ensureRerankContexts = async () => fakeContexts;
  398. const documents: RerankDocument[] = Array.from({ length: 20 }, (_, i) => ({
  399. file: `doc${i}.md`,
  400. text: `Document number ${i}`,
  401. }));
  402. const result = await freshLlm.rerank("topic 1", documents);
  403. expect(result.results).toHaveLength(20);
  404. expect(calls).toEqual([0, 1]);
  405. });
  406. test("truncates and reranks document exceeding 2048 token context size", async () => {
  407. // The reranker context is created with contextSize=2048. Documents that
  408. // exceed the token budget (contextSize - template overhead - query tokens)
  409. // should be silently truncated rather than crashing.
  410. const paragraph = "The quick brown fox jumps over the lazy dog near the riverbank. " +
  411. "Authentication tokens must be validated on every request to ensure security. " +
  412. "Database queries should use prepared statements to prevent SQL injection attacks. " +
  413. "The deployment pipeline includes linting, testing, building, and publishing stages. ";
  414. // ~320 chars per paragraph, repeat 40 times = ~12800 chars ≈ 3200 tokens
  415. const longText = paragraph.repeat(40);
  416. const query = "How do I configure authentication?";
  417. const documents: RerankDocument[] = [
  418. { file: "short-relevant.md", text: "Authentication can be configured by setting AUTH_SECRET." },
  419. { file: "long-doc.md", text: longText },
  420. { file: "short-irrelevant.md", text: "The weather is sunny today." },
  421. ];
  422. console.log(`Long doc length: ${longText.length} chars (~${Math.round(longText.length / 4)} tokens)`);
  423. const result = await llm.rerank(query, documents);
  424. // Should return all 3 documents without crashing
  425. expect(result.results).toHaveLength(3);
  426. // All scores should be valid numbers in [0, 1]
  427. for (const doc of result.results) {
  428. expect(doc.score).toBeGreaterThanOrEqual(0);
  429. expect(doc.score).toBeLessThanOrEqual(1);
  430. expect(Number.isNaN(doc.score)).toBe(false);
  431. }
  432. // The short, directly relevant doc should still rank highest
  433. console.log("Rerank results for long doc test:");
  434. for (const doc of result.results) {
  435. console.log(` ${doc.file}: ${doc.score.toFixed(4)}`);
  436. }
  437. });
  438. });
  439. describe("expandQuery", () => {
  440. test("returns query expansions with correct types", async () => {
  441. const result = await llm.expandQuery("test query");
  442. // Result is Queryable[] containing lex, vec, and/or hyde entries
  443. expect(result.length).toBeGreaterThanOrEqual(1);
  444. // Each result should have a valid type
  445. for (const q of result) {
  446. expect(["lex", "vec", "hyde"]).toContain(q.type);
  447. expect(q.text.length).toBeGreaterThan(0);
  448. }
  449. }, 30000); // 30s timeout for model loading
  450. test("can exclude lexical queries", async () => {
  451. const result = await llm.expandQuery("authentication setup", { includeLexical: false });
  452. // Should not contain any 'lex' type entries
  453. const lexEntries = result.filter(q => q.type === "lex");
  454. expect(lexEntries).toHaveLength(0);
  455. });
  456. });
  457. });
  458. // =============================================================================
  459. // Session Management Tests
  460. // =============================================================================
  461. describe.skipIf(!!process.env.CI)("LLM Session Management", () => {
  462. describe("withLLMSession", () => {
  463. test("session provides access to LLM operations", async () => {
  464. const result = await withLLMSession(async (session) => {
  465. expect(session.isValid).toBe(true);
  466. const embedding = await session.embed("test text");
  467. expect(embedding).not.toBeNull();
  468. expect(embedding!.embedding.length).toBe(768);
  469. return "success";
  470. });
  471. expect(result).toBe("success");
  472. });
  473. test("session is invalid after release", async () => {
  474. let capturedSession: ILLMSession | null = null;
  475. await withLLMSession(async (session) => {
  476. capturedSession = session;
  477. expect(session.isValid).toBe(true);
  478. });
  479. // Session should be invalid after withLLMSession returns
  480. expect(capturedSession).not.toBeNull();
  481. expect(capturedSession!.isValid).toBe(false);
  482. });
  483. test("session prevents idle unload during operations", async () => {
  484. await withLLMSession(async (session) => {
  485. // While inside a session, canUnloadLLM should return false
  486. expect(canUnloadLLM()).toBe(false);
  487. // Perform an operation
  488. await session.embed("test");
  489. // Still should not be able to unload
  490. expect(canUnloadLLM()).toBe(false);
  491. });
  492. // After session ends, should be able to unload
  493. expect(canUnloadLLM()).toBe(true);
  494. });
  495. test("nested sessions increment ref count", async () => {
  496. await withLLMSession(async (outerSession) => {
  497. expect(canUnloadLLM()).toBe(false);
  498. await withLLMSession(async (innerSession) => {
  499. expect(canUnloadLLM()).toBe(false);
  500. expect(innerSession.isValid).toBe(true);
  501. expect(outerSession.isValid).toBe(true);
  502. });
  503. // Inner session released, but outer still active
  504. expect(canUnloadLLM()).toBe(false);
  505. expect(outerSession.isValid).toBe(true);
  506. });
  507. // All sessions released
  508. expect(canUnloadLLM()).toBe(true);
  509. });
  510. test("session embedBatch works correctly", async () => {
  511. await withLLMSession(async (session) => {
  512. const texts = ["Hello world", "Test text", "Another document"];
  513. const results = await session.embedBatch(texts);
  514. expect(results).toHaveLength(3);
  515. for (const result of results) {
  516. expect(result).not.toBeNull();
  517. expect(result!.embedding.length).toBe(768);
  518. }
  519. });
  520. });
  521. test("session rerank works correctly", async () => {
  522. await withLLMSession(async (session) => {
  523. const documents: RerankDocument[] = [
  524. { file: "a.txt", text: "The capital of France is Paris." },
  525. { file: "b.txt", text: "Dogs are great pets." },
  526. ];
  527. const result = await session.rerank("What is the capital of France?", documents);
  528. expect(result.results).toHaveLength(2);
  529. expect(result.results[0]!.file).toBe("a.txt");
  530. expect(result.results[0]!.score).toBeGreaterThan(result.results[1]!.score);
  531. });
  532. });
  533. test("max duration aborts session after timeout", async () => {
  534. let aborted = false;
  535. try {
  536. await withLLMSession(async (session) => {
  537. // Wait longer than max duration
  538. await new Promise(resolve => setTimeout(resolve, 150));
  539. // This operation should throw because session was aborted
  540. await session.embed("test");
  541. }, { maxDuration: 50 }); // 50ms max
  542. } catch (err) {
  543. if (err instanceof SessionReleasedError) {
  544. aborted = true;
  545. } else {
  546. throw err;
  547. }
  548. }
  549. expect(aborted).toBe(true);
  550. }, 5000);
  551. test("external abort signal propagates to session", async () => {
  552. const abortController = new AbortController();
  553. let sessionAborted = false;
  554. const promise = withLLMSession(async (session) => {
  555. // Wait a bit then check if aborted
  556. await new Promise(resolve => setTimeout(resolve, 100));
  557. if (!session.isValid) {
  558. sessionAborted = true;
  559. throw new SessionReleasedError("Session aborted");
  560. }
  561. return "should not reach";
  562. }, { signal: abortController.signal });
  563. // Abort after 20ms
  564. setTimeout(() => abortController.abort(), 20);
  565. try {
  566. await promise;
  567. } catch (err) {
  568. // Expected
  569. }
  570. expect(sessionAborted).toBe(true);
  571. }, 5000);
  572. test("session provides abort signal for monitoring", async () => {
  573. await withLLMSession(async (session) => {
  574. expect(session.signal).toBeInstanceOf(AbortSignal);
  575. expect(session.signal.aborted).toBe(false);
  576. });
  577. });
  578. test("returns value from callback", async () => {
  579. const result = await withLLMSession(async (session) => {
  580. await session.embed("test");
  581. return { status: "complete", count: 42 };
  582. });
  583. expect(result).toEqual({ status: "complete", count: 42 });
  584. });
  585. test("propagates errors from callback", async () => {
  586. const customError = new Error("Custom test error");
  587. await expect(
  588. withLLMSession(async () => {
  589. throw customError;
  590. })
  591. ).rejects.toThrow("Custom test error");
  592. });
  593. });
  594. });