Преглед изворни кода

Simplify disposal: let llama cascade to children, remove test dispose calls

- dispose() now just calls llama.dispose() which cascades to models/contexts
  per node-llama-cpp lifecycle docs
- Remove disposeDefaultLlamaCpp calls from tests - they don't help with
  the Metal cleanup crash
- Use singleton getDefaultLlamaCpp() in llm tests for consistency

The Metal backend crash at process exit is a known llama.cpp issue:
https://github.com/ggml-org/llama.cpp/pull/17869

All tests pass - the abort happens after test completion.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Tobi Lutke пре 5 месеци
родитељ
комит
10c5ec016f
4 измењених фајлова са 18 додато и 43 уклоњено
  1. 1 2
      src/eval.test.ts
  2. 5 10
      src/llm.test.ts
  3. 11 29
      src/llm.ts
  4. 1 2
      src/mcp.test.ts

+ 1 - 2
src/eval.test.ts

@@ -34,7 +34,7 @@ import {
   DEFAULT_EMBED_MODEL,
   type RankedResult,
 } from "./store";
-import { getDefaultLlamaCpp, disposeDefaultLlamaCpp, formatDocForEmbedding } from "./llm";
+import { getDefaultLlamaCpp, formatDocForEmbedding } from "./llm";
 
 // Eval queries with expected documents
 const evalQueries: {
@@ -393,6 +393,5 @@ describe("Hybrid Search (RRF)", () => {
 // =============================================================================
 
 afterAll(() => {
-  // Don't dispose llama - let process exit handle Metal cleanup naturally
   rmSync(tempDir, { recursive: true, force: true });
 });

+ 5 - 10
src/llm.test.ts

@@ -12,7 +12,6 @@ import {
   LlamaCpp,
   getDefaultLlamaCpp,
   setDefaultLlamaCpp,
-  disposeDefaultLlamaCpp,
   type RerankDocument,
 } from "./llm.js";
 
@@ -59,7 +58,7 @@ describe("Default LlamaCpp Singleton", () => {
 
 describe("LlamaCpp.modelExists", () => {
   test("returns exists:true for HuggingFace model URIs", async () => {
-    const llm = new LlamaCpp();
+    const llm = getDefaultLlamaCpp();
     const result = await llm.modelExists("hf:org/repo/model.gguf");
 
     expect(result.exists).toBe(true);
@@ -67,7 +66,7 @@ describe("LlamaCpp.modelExists", () => {
   });
 
   test("returns exists:false for non-existent local paths", async () => {
-    const llm = new LlamaCpp();
+    const llm = getDefaultLlamaCpp();
     const result = await llm.modelExists("/nonexistent/path/model.gguf");
 
     expect(result.exists).toBe(false);
@@ -80,13 +79,8 @@ describe("LlamaCpp.modelExists", () => {
 // =============================================================================
 
 describe("LlamaCpp Integration", () => {
-  let llm: LlamaCpp;
-
-  beforeAll(() => {
-    llm = new LlamaCpp();
-  });
-
-  // Don't dispose - let process exit handle Metal cleanup naturally
+  // Use the singleton to avoid multiple Metal contexts
+  const llm = getDefaultLlamaCpp();
 
   describe("embed", () => {
     test("returns embedding with correct dimensions", async () => {
@@ -339,3 +333,4 @@ describe("LlamaCpp Integration", () => {
     });
   });
 });
+

+ 11 - 29
src/llm.ts

@@ -704,38 +704,20 @@ Generate the structured expansion:`;
       this.inactivityTimer = null;
     }
 
-    // Dispose in order: contexts -> models -> llama
-    // Contexts depend on models, models depend on llama
-    if (this.embedContext) {
-      await this.embedContext.dispose();
-      this.embedContext = null;
-    }
-    if (this.generateContext) {
-      await this.generateContext.dispose();
-      this.generateContext = null;
-    }
-    if (this.rerankContext) {
-      await this.rerankContext.dispose();
-      this.rerankContext = null;
-    }
-
-    if (this.embedModel) {
-      await this.embedModel.dispose();
-      this.embedModel = null;
-    }
-    if (this.generateModel) {
-      await this.generateModel.dispose();
-      this.generateModel = null;
-    }
-    if (this.rerankModel) {
-      await this.rerankModel.dispose();
-      this.rerankModel = null;
-    }
-
+    // Disposing llama cascades to models and contexts automatically
+    // See: https://node-llama-cpp.withcat.ai/guide/objects-lifecycle
     if (this.llama) {
       await this.llama.dispose();
-      this.llama = null;
     }
+
+    // Clear references
+    this.embedContext = null;
+    this.generateContext = null;
+    this.rerankContext = null;
+    this.embedModel = null;
+    this.generateModel = null;
+    this.rerankModel = null;
+    this.llama = null;
   }
 }
 

+ 1 - 2
src/mcp.test.ts

@@ -10,7 +10,7 @@ import { Database } from "bun:sqlite";
 import * as sqliteVec from "sqlite-vec";
 import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { z } from "zod";
-import { setDefaultLlamaCpp, disposeDefaultLlamaCpp, LlamaCpp } from "./llm";
+import { setDefaultLlamaCpp, LlamaCpp } from "./llm";
 import { mkdtemp, writeFile, readdir, unlink, rmdir } from "node:fs/promises";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
@@ -225,7 +225,6 @@ describe("MCP Server", () => {
   });
 
   afterAll(async () => {
-    // Don't dispose llama - let process exit handle Metal cleanup naturally
     testDb.close();
     try {
       require("fs").unlinkSync(testDbPath);