5 mesi fa · bfb0eebc3e
--- a/src/llm.test.ts
+++ b/src/llm.test.ts
@@ -167,6 +167,63 @@ describe("LlamaCpp Integration", () => {
 
				       // Performance is machine/load dependent. We only assert batch isn't drastically worse.
			
 
				       expect(batchTime).toBeLessThanOrEqual(seqTime * 3);
			
 
				     });
			
 
				+
			
 
				+    test("handles concurrent embedBatch calls on fresh instance without race condition", async () => {
			
 
				+      // This test verifies the fix for a race condition where concurrent calls to
			
 
				+      // ensureEmbedContext() could create multiple contexts. Without the promise guard,
			
 
				+      // each concurrent embedBatch call sees embedContext === null and creates its own
			
 
				+      // context, causing resource leaks and potential "Context is disposed" errors.
			
 
				+      //
			
 
				+      // See: https://github.com/tobi/qmd/pull/54
			
 
				+      //
			
 
				+      // The fix uses a promise guard to ensure only one context creation runs at a time.
			
 
				+      // We verify this by instrumenting createEmbeddingContext to count invocations.
			
 
				+      
			
 
				+      const freshLlm = new LlamaCpp({});
			
 
				+      let contextCreateCount = 0;
			
 
				+      
			
 
				+      // Instrument the model's createEmbeddingContext to count calls
			
 
				+      const originalEnsureEmbedModel = (freshLlm as any).ensureEmbedModel.bind(freshLlm);
			
 
				+      let modelInstrumented = false;
			
 
				+      (freshLlm as any).ensureEmbedModel = async function() {
			
 
				+        const model = await originalEnsureEmbedModel();
			
 
				+        if (!modelInstrumented) {
			
 
				+          modelInstrumented = true;
			
 
				+          const originalCreate = model.createEmbeddingContext.bind(model);
			
 
				+          model.createEmbeddingContext = async function(...args: any[]) {
			
 
				+            contextCreateCount++;
			
 
				+            return originalCreate(...args);
			
 
				+          };
			
 
				+        }
			
 
				+        return model;
			
 
				+      };
			
 
				+      
			
 
				+      const texts = Array(10).fill(null).map((_, i) => `Document ${i}`);
			
 
				+
			
 
				+      // Call embedBatch 5 TIMES in parallel on fresh instance.
			
 
				+      // Without the promise guard fix, this would create 5 contexts (one per call).
			
 
				+      // With the fix, only 1 context should be created.
			
 
				+      const batches = await Promise.all([
			
 
				+        freshLlm.embedBatch(texts.slice(0, 2)),
			
 
				+        freshLlm.embedBatch(texts.slice(2, 4)),
			
 
				+        freshLlm.embedBatch(texts.slice(4, 6)),
			
 
				+        freshLlm.embedBatch(texts.slice(6, 8)),
			
 
				+        freshLlm.embedBatch(texts.slice(8, 10)),
			
 
				+      ]);
			
 
				+
			
 
				+      const allResults = batches.flat();
			
 
				+      expect(allResults).toHaveLength(10);
			
 
				+      
			
 
				+      const successCount = allResults.filter(r => r !== null).length;
			
 
				+      expect(successCount).toBe(10);
			
 
				+
			
 
				+      // THE KEY ASSERTION: Only 1 context should be created, not 5
			
 
				+      // Without the fix, contextCreateCount would be 5 (one per concurrent embedBatch call)
			
 
				+      console.log(`Context creation count: ${contextCreateCount} (expected: 1)`);
			
 
				+      expect(contextCreateCount).toBe(1);
			
 
				+      
			
 
				+      await freshLlm.dispose();
			
 
				+    }, 60000);
			
 
				   });
			
 
				 
			
 
				   describe("rerank", () => {
			
--- a/src/llm.ts
+++ b/src/llm.ts
@@ -241,8 +241,9 @@ export class LlamaCpp implements LLM {
 
				   private rerankModelUri: string;
			
 
				   private modelCacheDir: string;
			
 
				 
			
 
				-  // Ensure we don't load the same model concurrently (which can allocate duplicate VRAM).
			
 
				+  // Ensure we don't load the same model/context concurrently (which can allocate duplicate VRAM).
			
 
				   private embedModelLoadPromise: Promise<LlamaModel> | null = null;
			
 
				+  private embedContextCreatePromise: Promise<LlamaEmbeddingContext> | null = null;
			
 
				   private generateModelLoadPromise: Promise<LlamaModel> | null = null;
			
 
				   private rerankModelLoadPromise: Promise<LlamaModel> | null = null;
			
 
				 
			
@@ -402,11 +403,28 @@ export class LlamaCpp implements LLM {
 
				 
			
 
				   /**
			
 
				    * Load embedding context (lazy). Context can be disposed and recreated without reloading the model.
			
 
				+   * Uses promise guard to prevent concurrent context creation race condition.
			
 
				    */
			
 
				   private async ensureEmbedContext(): Promise<LlamaEmbeddingContext> {
			
 
				     if (!this.embedContext) {
			
 
				-      const model = await this.ensureEmbedModel();
			
 
				-      this.embedContext = await model.createEmbeddingContext();
			
 
				+      // If context creation is already in progress, wait for it
			
 
				+      if (this.embedContextCreatePromise) {
			
 
				+        return await this.embedContextCreatePromise;
			
 
				+      }
			
 
				+
			
 
				+      // Start context creation and store promise so concurrent calls wait
			
 
				+      this.embedContextCreatePromise = (async () => {
			
 
				+        const model = await this.ensureEmbedModel();
			
 
				+        const context = await model.createEmbeddingContext();
			
 
				+        this.embedContext = context;
			
 
				+        return context;
			
 
				+      })();
			
 
				+
			
 
				+      try {
			
 
				+        await this.embedContextCreatePromise;
			
 
				+      } finally {
			
 
				+        this.embedContextCreatePromise = null;
			
 
				+      }
			
 
				     }
			
 
				     this.touchActivity();
			
 
				     return this.embedContext;
			
@@ -781,8 +799,9 @@ Final Output:`;
 
				     this.rerankModel = null;
			
 
				     this.llama = null;
			
 
				 
			
 
				-    // Clear any in-flight load promises
			
 
				+    // Clear any in-flight load/create promises
			
 
				     this.embedModelLoadPromise = null;
			
 
				+    this.embedContextCreatePromise = null;
			
 
				     this.generateModelLoadPromise = null;
			
 
				     this.rerankModelLoadPromise = null;
			
 
				   }