ソースを参照

Merge pull request #530 from kuishou68/fix-status-no-build-probe

Tobias Lütke 1 ヶ月 前
コミット
171e9e3e65
3 ファイル変更38 行追加8 行削除
  1. 7 4
      src/cli/qmd.ts
  2. 5 4
      src/llm.ts
  3. 26 0
      test/llm.test.ts

+ 7 - 4
src/cli/qmd.ts

@@ -461,10 +461,10 @@ async function showStatus(): Promise<void> {
   }
 
   // Device / GPU info
+  console.log(`\n${c.bold}Device${c.reset}`);
   try {
     const llm = getDefaultLlamaCpp();
-    const device = await llm.getDeviceInfo();
-    console.log(`\n${c.bold}Device${c.reset}`);
+    const device = await llm.getDeviceInfo({ allowBuild: false });
     if (device.gpu) {
       console.log(`  GPU:      ${c.green}${device.gpu}${c.reset} (offloading: ${device.gpuOffloading ? 'yes' : 'no'})`);
       if (device.gpuDevices.length > 0) {
@@ -486,8 +486,11 @@ async function showStatus(): Promise<void> {
       console.log(`  ${c.dim}Tip: Install CUDA, Vulkan, or Metal support for GPU acceleration.${c.reset}`);
     }
     console.log(`  CPU:      ${device.cpuCores} math cores`);
-  } catch {
-    // Don't fail status if LLM init fails
+  } catch (error) {
+    console.log(`  Status:   ${c.dim}skipped${c.reset} (status probe does not build llama.cpp backends)`);
+    if (error instanceof Error && error.message) {
+      console.log(`  ${c.dim}${error.message}${c.reset}`);
+    }
   }
 
   // Tips section

+ 5 - 4
src/llm.ts

@@ -562,15 +562,16 @@ export class LlamaCpp implements LLM {
   /**
    * Initialize the llama instance (lazy)
    */
-  private async ensureLlama(): Promise<Llama> {
+  private async ensureLlama(allowBuild = true): Promise<Llama> {
     if (!this.llama) {
       const gpuMode = resolveLlamaGpuMode();
 
       const loadLlama = async (gpu: LlamaGpuMode) =>
         await getLlama({
-          build: "autoAttempt",
+          build: allowBuild ? "autoAttempt" : "never",
           logLevel: LlamaLogLevel.error,
           gpu,
+          skipDownload: !allowBuild,
         });
 
       let llama: Llama;
@@ -1254,14 +1255,14 @@ export class LlamaCpp implements LLM {
    * Get device/GPU info for status display.
    * Initializes llama if not already done.
    */
-  async getDeviceInfo(): Promise<{
+  async getDeviceInfo(options: { allowBuild?: boolean } = {}): Promise<{
     gpu: string | false;
     gpuOffloading: boolean;
     gpuDevices: string[];
     vram?: { total: number; used: number; free: number };
     cpuCores: number;
   }> {
-    const llama = await this.ensureLlama();
+    const llama = await this.ensureLlama(options.allowBuild ?? true);
     const gpuDevices = await llama.getGpuDeviceNames();
     let vram: { total: number; used: number; free: number } | undefined;
     if (llama.gpu) {

+ 26 - 0
test/llm.test.ts

@@ -226,6 +226,32 @@ describe("LlamaCpp rerank deduping", () => {
   });
 });
 
+describe("LlamaCpp.getDeviceInfo", () => {
+  test("can skip build attempts for status probes", async () => {
+    const llm = new LlamaCpp({}) as any;
+    const fakeLlama = {
+      gpu: "metal",
+      supportsGpuOffloading: true,
+      cpuMathCores: 8,
+      getGpuDeviceNames: vi.fn().mockResolvedValue(["Apple GPU"]),
+      getVramState: vi.fn().mockResolvedValue({ total: 1024, used: 256, free: 768 }),
+    };
+
+    llm.ensureLlama = vi.fn().mockResolvedValue(fakeLlama);
+
+    const device = await llm.getDeviceInfo({ allowBuild: false });
+
+    expect(llm.ensureLlama).toHaveBeenCalledWith(false);
+    expect(device).toEqual({
+      gpu: "metal",
+      gpuOffloading: true,
+      gpuDevices: ["Apple GPU"],
+      vram: { total: 1024, used: 256, free: 768 },
+      cpuCores: 8,
+    });
+  });
+});
+
 // =============================================================================
 // Integration Tests (require actual models)
 // =============================================================================