Răsfoiți Sursa

fix(embedding): retry + rich error context for first-chunk dimension probe (i-vm1lxwry)

Previously, when `qmd embed chat-archives` could not get embedding dimensions
from the first chunk (e.g. transient HTTP 500/503 from ai.mm.mk, malformed
JSON, timeout), `store.ts` threw a cryptic:

    Failed to get embedding dimensions from first chunk

with no information about provider, endpoint, or underlying cause.

Changes:

  * `EmbeddingProvider` interface: new optional `getLastError(): string |
    undefined` (source-compatible — existing 3rd-party impls keep working).
  * `OpenAIEmbeddingsProvider`:
      - tracks `lastError` on every swallowed per-chunk failure
      - clears it on a fully-successful sweep
      - new `formatErrorContext()` produces "endpoint=… status=N body=…"
      - new `getEndpoint()` exposes the configured base URL
  * `LocalLlamaCppProvider`: same `lastError` tracking + clearing for
    `llm.embed` / `llm.embedBatch` failures and aborts.
  * `AutoFallbackEmbeddingProvider.getLastError()`: combines primary +
    fallback last errors (`primary: … | fallback: …` when both failed).
  * `store.ts` first-chunk dimension probe: SINGLE retry on null result
    after a 250ms backoff (transient embedding-service issues), then
    throws a rich error including provider kind, endpoint, status code,
    body preview, and a hint to set `QMD_EMBED_DEBUG=1`.

Tests (3 new describes, 11 new tests, 93 passing total):

  * `embedding-openai.test.ts § getLastError (i-vm1lxwry)`
      - undefined before first call
      - captures HTTP status + endpoint
      - captures malformed-JSON message
      - cleared after successful sweep
      - getEndpoint() strips trailing slashes
  * `embedding-autofallback.test.ts § getLastError (i-vm1lxwry)`
      - undefined when both legs clean
      - returns primary error / fallback error / combined
  * `embedding-store-integration.test.ts § first-chunk dimension probe`
      - retry succeeds on second attempt
      - throws rich error after both attempts fail (provider=openai,
        endpoint, status, body preview)

Resolves: i-vm1lxwry

Co-Authored-By: Claude <noreply@anthropic.com>
Session-Id: 435c1d69
Claude 3 săptămâni în urmă
părinte
comite
e041f19285

+ 9 - 0
dist/embedding/autofallback.d.ts

@@ -65,6 +65,15 @@ export declare class AutoFallbackEmbeddingProvider implements EmbeddingProvider
      */
     getModelId(): string;
     getDimensions(): number | undefined;
+    /**
+     * Combined last-error from primary + fallback. Either, neither, or both legs
+     * may have a tracked error after `embed()`/`embedBatch()` runs:
+     *   - Both clean → undefined
+     *   - Primary failed, fallback rescued → returns primary error (most useful)
+     *   - Both failed → returns "primary: <msg> | fallback: <msg>"
+     *   - Only primary skipped (cooldown), fallback also failed → returns fallback error
+     */
+    getLastError(): string | undefined;
     /** Current routing state (mostly for tests + observability) */
     getRoutingState(): FallbackState;
     /** Reset failure-streak + cooldown (mostly for tests / admin) */

+ 16 - 0
dist/embedding/autofallback.js

@@ -68,6 +68,22 @@ export class AutoFallbackEmbeddingProvider {
     getDimensions() {
         return this.primary.getDimensions() ?? this.fallback.getDimensions();
     }
+    /**
+     * Combined last-error from primary + fallback. Either, neither, or both legs
+     * may have a tracked error after `embed()`/`embedBatch()` runs:
+     *   - Both clean → undefined
+     *   - Primary failed, fallback rescued → returns primary error (most useful)
+     *   - Both failed → returns "primary: <msg> | fallback: <msg>"
+     *   - Only primary skipped (cooldown), fallback also failed → returns fallback error
+     */
+    getLastError() {
+        const primaryErr = this.primary.getLastError?.();
+        const fallbackErr = this.fallback.getLastError?.();
+        if (primaryErr && fallbackErr) {
+            return `primary: ${primaryErr} | fallback: ${fallbackErr}`;
+        }
+        return primaryErr ?? fallbackErr;
+    }
     /** Current routing state (mostly for tests + observability) */
     getRoutingState() {
         if (this.fallbackUntil !== null && this.now() < this.fallbackUntil) {

+ 7 - 0
dist/embedding/local.d.ts

@@ -21,9 +21,16 @@ export declare class LocalLlamaCppProvider implements EmbeddingProvider {
     private readonly llm;
     private readonly modelId;
     private dimensions;
+    private lastError;
     constructor(config?: LocalLlamaCppProviderConfig);
     getModelId(): string;
     getDimensions(): number | undefined;
+    /**
+     * Most recent thrown error from `llm.embed` / `llm.embedBatch`. Returns
+     * `undefined` after a successful call or before the first call. See
+     * `EmbeddingProvider.getLastError`.
+     */
+    getLastError(): string | undefined;
     healthcheck(_signal?: AbortSignal): Promise<ProviderHealth>;
     embed(text: string, options?: ProviderEmbedOptions): Promise<ProviderEmbedding | null>;
     embedBatch(texts: string[], options?: ProviderEmbedOptions): Promise<(ProviderEmbedding | null)[]>;

+ 45 - 8
dist/embedding/local.js

@@ -11,6 +11,7 @@ export class LocalLlamaCppProvider {
     llm;
     modelId;
     dimensions = undefined;
+    lastError = undefined;
     constructor(config = {}) {
         this.llm = config.llm ?? getDefaultLlamaCpp();
         this.modelId = config.modelId ?? "embeddinggemma";
@@ -21,6 +22,14 @@ export class LocalLlamaCppProvider {
     getDimensions() {
         return this.dimensions;
     }
+    /**
+     * Most recent thrown error from `llm.embed` / `llm.embedBatch`. Returns
+     * `undefined` after a successful call or before the first call. See
+     * `EmbeddingProvider.getLastError`.
+     */
+    getLastError() {
+        return this.lastError;
+    }
     async healthcheck(_signal) {
         // For the local provider, "healthy" means the embed model loads.
         // We probe with a single embed call.
@@ -50,14 +59,26 @@ export class LocalLlamaCppProvider {
         }
     }
     async embed(text, options = {}) {
-        if (options.signal?.aborted)
+        if (options.signal?.aborted) {
+            this.lastError = `aborted by caller${options.signal.reason ? `: ${String(options.signal.reason)}` : ""}`;
             return null;
-        const result = await this.llm.embed(text, { model: options.model ?? this.modelId });
-        if (!result)
+        }
+        let result;
+        try {
+            result = await this.llm.embed(text, { model: options.model ?? this.modelId });
+        }
+        catch (err) {
+            this.lastError = `provider=local error="${err instanceof Error ? err.message : String(err)}"`;
             return null;
+        }
+        if (!result) {
+            this.lastError = `provider=local error="llm.embed returned null/undefined"`;
+            return null;
+        }
         if (this.dimensions === undefined) {
             this.dimensions = result.embedding.length;
         }
+        this.lastError = undefined;
         return {
             embedding: result.embedding,
             model: this.modelId,
@@ -66,12 +87,21 @@ export class LocalLlamaCppProvider {
     async embedBatch(texts, options = {}) {
         if (texts.length === 0)
             return [];
-        if (options.signal?.aborted)
+        if (options.signal?.aborted) {
+            this.lastError = `aborted by caller${options.signal.reason ? `: ${String(options.signal.reason)}` : ""}`;
             return texts.map(() => null);
-        const raw = await this.llm.embedBatch(texts, {
-            model: options.model ?? this.modelId,
-        });
-        return raw.map((r) => {
+        }
+        let raw;
+        try {
+            raw = await this.llm.embedBatch(texts, {
+                model: options.model ?? this.modelId,
+            });
+        }
+        catch (err) {
+            this.lastError = `provider=local error="${err instanceof Error ? err.message : String(err)}"`;
+            return texts.map(() => null);
+        }
+        const out = raw.map((r) => {
             if (!r)
                 return null;
             if (this.dimensions === undefined && r.embedding.length > 0) {
@@ -82,6 +112,13 @@ export class LocalLlamaCppProvider {
                 model: this.modelId,
             };
         });
+        if (out.every((r) => r !== null)) {
+            this.lastError = undefined;
+        }
+        else if (out.some((r) => r === null)) {
+            this.lastError = `provider=local error="llm.embedBatch returned null entries (${out.filter((r) => r === null).length}/${out.length})"`;
+        }
+        return out;
     }
     async dispose() {
         // We do NOT dispose the underlying LlamaCpp here because the singleton

+ 19 - 0
dist/embedding/openai.d.ts

@@ -162,14 +162,33 @@ export declare class OpenAIEmbeddingsProvider implements EmbeddingProvider {
     private readonly sleep;
     private readonly now;
     private dimensions;
+    private lastError;
     readonly breaker: CircuitBreaker;
     constructor(config: OpenAIProviderConfig);
     getModelId(): string;
     getDimensions(): number | undefined;
+    /**
+     * Most recent per-chunk failure message (HTTP status + body preview, malformed
+     * JSON, timeout, abort reason). Returns `undefined` after a successful call
+     * or before the first call. See `EmbeddingProvider.getLastError`.
+     */
+    getLastError(): string | undefined;
+    /** Endpoint URL configured at construction time — used by callers when
+     *  building error messages for failed first-chunk probes. */
+    getEndpoint(): string;
     healthcheck(signal?: AbortSignal): Promise<ProviderHealth>;
     embed(text: string, options?: ProviderEmbedOptions): Promise<ProviderEmbedding | null>;
     embedBatch(texts: string[], options?: ProviderEmbedOptions): Promise<(ProviderEmbedding | null)[]>;
     dispose(): Promise<void>;
+    /**
+     * Format a request-failure context string for `lastError`. Includes endpoint
+     * + HTTP status + body preview when the error was an `HttpError`, otherwise
+     * falls back to the message of the underlying error (or the value itself
+     * when not an Error). Kept short — body preview is already capped at 1024
+     * chars by `HttpError`, but we trim further here for the dimension-probe
+     * thrown error which surfaces directly to users.
+     */
+    private formatErrorContext;
     private buildHeaders;
     /**
      * Single HTTP request with retry on 429/503. Returns embeddings indexed

+ 43 - 0
dist/embedding/openai.js

@@ -231,6 +231,7 @@ export class OpenAIEmbeddingsProvider {
     sleep;
     now;
     dimensions = undefined;
+    lastError = undefined;
     breaker;
     constructor(config) {
         if (!config.endpoint) {
@@ -261,6 +262,19 @@ export class OpenAIEmbeddingsProvider {
     getDimensions() {
         return this.dimensions;
     }
+    /**
+     * Most recent per-chunk failure message (HTTP status + body preview, malformed
+     * JSON, timeout, abort reason). Returns `undefined` after a successful call
+     * or before the first call. See `EmbeddingProvider.getLastError`.
+     */
+    getLastError() {
+        return this.lastError;
+    }
+    /** Endpoint URL configured at construction time — used by callers when
+     *  building error messages for failed first-chunk probes. */
+    getEndpoint() {
+        return this.endpoint;
+    }
     async healthcheck(signal) {
         // Try GET /health first (worker exposes it). Fall back to probe embed.
         try {
@@ -331,12 +345,14 @@ export class OpenAIEmbeddingsProvider {
         const chunks = chunkArray(texts, this.batchSize);
         const results = new Array(texts.length).fill(null);
         let cursor = 0;
+        let anySucceeded = false;
         for (const chunk of chunks) {
             const start = cursor;
             cursor += chunk.length;
             // Abort early if signal already fired
             if (options.signal?.aborted) {
                 // Leave remaining slots as null (caller treats as errors)
+                this.lastError = `aborted by caller${options.signal.reason ? `: ${String(options.signal.reason)}` : ""}`;
                 return results;
             }
             // Fail-fast if breaker tripped mid-loop
@@ -352,6 +368,7 @@ export class OpenAIEmbeddingsProvider {
                             embedding,
                             model: this.modelId,
                         };
+                        anySucceeded = true;
                         // Record dimensions on first success
                         if (this.dimensions === undefined) {
                             this.dimensions = embedding.length;
@@ -365,6 +382,10 @@ export class OpenAIEmbeddingsProvider {
                 // CircuitOpenError must propagate so the caller can fall back
                 if (err instanceof CircuitOpenError)
                     throw err;
+                // Capture the underlying error so callers (e.g. the store dimension
+                // probe) can surface it instead of "Failed to get embedding
+                // dimensions from first chunk" with no context.
+                this.lastError = this.formatErrorContext(err);
                 // Other errors mark the chunk as null and continue with next chunk.
                 // (The store layer already handles per-text nulls as errors.)
                 if (process.env.QMD_EMBED_DEBUG) {
@@ -372,6 +393,10 @@ export class OpenAIEmbeddingsProvider {
                 }
             }
         }
+        // Clear lastError on a fully-successful sweep (every input got an embedding).
+        if (anySucceeded && results.every((r) => r !== null)) {
+            this.lastError = undefined;
+        }
         return results;
     }
     async dispose() {
@@ -380,6 +405,24 @@ export class OpenAIEmbeddingsProvider {
         this.breaker.reset();
     }
     // ────────────────────── Internals ──────────────────────
+    /**
+     * Format a request-failure context string for `lastError`. Includes endpoint
+     * + HTTP status + body preview when the error was an `HttpError`, otherwise
+     * falls back to the message of the underlying error (or the value itself
+     * when not an Error). Kept short — body preview is already capped at 1024
+     * chars by `HttpError`, but we trim further here for the dimension-probe
+     * thrown error which surfaces directly to users.
+     */
+    formatErrorContext(err) {
+        if (err instanceof HttpError) {
+            const preview = err.bodyPreview.replace(/\s+/g, " ").trim().slice(0, 240);
+            return `endpoint=${this.endpoint}/v1/embeddings status=${err.status}${preview ? ` body="${preview}"` : ""}`;
+        }
+        if (err instanceof Error) {
+            return `endpoint=${this.endpoint}/v1/embeddings error="${err.message}"`;
+        }
+        return `endpoint=${this.endpoint}/v1/embeddings error="${String(err)}"`;
+    }
     buildHeaders() {
         const headers = {
             "Content-Type": "application/json",

+ 17 - 0
dist/embedding/provider.d.ts

@@ -85,6 +85,23 @@ export interface EmbeddingProvider {
      * upstream limits (e.g. OpenAI provider chunks to 64).
      */
     embedBatch(texts: string[], options?: ProviderEmbedOptions): Promise<(ProviderEmbedding | null)[]>;
+    /**
+     * Optional: most recent error message from a swallowed per-chunk failure.
+     *
+     * Per-chunk errors are intentionally swallowed (slot becomes `null`) so a
+     * single bad text does not abort a 1000-doc embed run. Callers that need
+     * to surface a meaningful error (e.g. the dimension-probe call site in
+     * `store.ts` when even the first chunk fails) can read this field to
+     * include the underlying cause (HTTP status, malformed JSON, timeout,
+     * abort reason, …) in their own error message.
+     *
+     * Returns `undefined` when the most recent call succeeded or no call has
+     * happened yet. Implementations MUST clear it on success.
+     *
+     * Optional so 3rd-party `EmbeddingProvider` implementations remain source-
+     * compatible; callers must guard with `provider.getLastError?.()`.
+     */
+    getLastError?(): string | undefined;
     /** Release any held resources (HTTP keep-alive sockets, model handles, …) */
     dispose(): Promise<void>;
 }

+ 25 - 2
dist/store.js

@@ -1187,9 +1187,32 @@ export async function generateEmbeddings(store, options) {
             if (!vectorTableInitialized) {
                 const firstChunk = batchChunks[0];
                 const firstText = formatDocForEmbedding(firstChunk.text, firstChunk.title, embedModelUri);
-                const firstResult = await embedOne(firstText, providerModel);
+                // Single retry on transient failure (issue i-vm1lxwry). The provider
+                // swallows per-chunk errors per its contract — `getLastError?.()`
+                // surfaces the actual cause (HTTP status / abort / parse error) so we
+                // can include it in the thrown message instead of the cryptic
+                // "Failed to get embedding dimensions from first chunk".
+                let firstResult = await embedOne(firstText, providerModel);
+                if (!firstResult && session.isValid) {
+                    const firstErr = provider?.getLastError?.();
+                    // Brief backoff before retry — embedding worker may be re-warming
+                    // a model or the GPU host may be transiently busy. 250ms is short
+                    // enough to be invisible on the happy path and long enough to
+                    // clear most "thundering-herd" race conditions.
+                    await new Promise((resolve) => setTimeout(resolve, 250));
+                    if (process.env.QMD_EMBED_DEBUG) {
+                        process.stderr.write(`qmd embed: first-chunk dimension probe failed, retrying once${firstErr ? ` (last error: ${firstErr})` : ""}\n`);
+                    }
+                    firstResult = await embedOne(firstText, providerModel);
+                }
                 if (!firstResult) {
-                    throw new Error("Failed to get embedding dimensions from first chunk");
+                    const lastErr = provider?.getLastError?.();
+                    const providerHint = provider ? `provider=${provider.kind}` : "provider=session";
+                    const errSuffix = lastErr ? ` — underlying: ${lastErr}` : "";
+                    const debugHint = process.env.QMD_EMBED_DEBUG
+                        ? ""
+                        : " (set QMD_EMBED_DEBUG=1 for per-chunk traces)";
+                    throw new Error(`Failed to get embedding dimensions from first chunk after retry [${providerHint}]${errSuffix}${debugHint}`);
                 }
                 store.ensureVecTable(firstResult.embedding.length);
                 vectorTableInitialized = true;

+ 17 - 0
src/embedding/autofallback.ts

@@ -107,6 +107,23 @@ export class AutoFallbackEmbeddingProvider implements EmbeddingProvider {
     return this.primary.getDimensions() ?? this.fallback.getDimensions();
   }
 
+  /**
+   * Combined last-error from primary + fallback. Either, neither, or both legs
+   * may have a tracked error after `embed()`/`embedBatch()` runs:
+   *   - Both clean → undefined
+   *   - Primary failed, fallback rescued → returns primary error (most useful)
+   *   - Both failed → returns "primary: <msg> | fallback: <msg>"
+   *   - Only primary skipped (cooldown), fallback also failed → returns fallback error
+   */
+  getLastError(): string | undefined {
+    const primaryErr = this.primary.getLastError?.();
+    const fallbackErr = this.fallback.getLastError?.();
+    if (primaryErr && fallbackErr) {
+      return `primary: ${primaryErr} | fallback: ${fallbackErr}`;
+    }
+    return primaryErr ?? fallbackErr;
+  }
+
   /** Current routing state (mostly for tests + observability) */
   getRoutingState(): FallbackState {
     if (this.fallbackUntil !== null && this.now() < this.fallbackUntil) {

+ 48 - 8
src/embedding/local.ts

@@ -34,6 +34,7 @@ export class LocalLlamaCppProvider implements EmbeddingProvider {
   private readonly llm: LlamaCpp;
   private readonly modelId: string;
   private dimensions: number | undefined = undefined;
+  private lastError: string | undefined = undefined;
 
   constructor(config: LocalLlamaCppProviderConfig = {}) {
     this.llm = config.llm ?? getDefaultLlamaCpp();
@@ -48,6 +49,15 @@ export class LocalLlamaCppProvider implements EmbeddingProvider {
     return this.dimensions;
   }
 
+  /**
+   * Most recent thrown error from `llm.embed` / `llm.embedBatch`. Returns
+   * `undefined` after a successful call or before the first call. See
+   * `EmbeddingProvider.getLastError`.
+   */
+  getLastError(): string | undefined {
+    return this.lastError;
+  }
+
   async healthcheck(_signal?: AbortSignal): Promise<ProviderHealth> {
     // For the local provider, "healthy" means the embed model loads.
     // We probe with a single embed call.
@@ -80,12 +90,25 @@ export class LocalLlamaCppProvider implements EmbeddingProvider {
     text: string,
     options: ProviderEmbedOptions = {},
   ): Promise<ProviderEmbedding | null> {
-    if (options.signal?.aborted) return null;
-    const result = await this.llm.embed(text, { model: options.model ?? this.modelId });
-    if (!result) return null;
+    if (options.signal?.aborted) {
+      this.lastError = `aborted by caller${options.signal.reason ? `: ${String(options.signal.reason)}` : ""}`;
+      return null;
+    }
+    let result;
+    try {
+      result = await this.llm.embed(text, { model: options.model ?? this.modelId });
+    } catch (err) {
+      this.lastError = `provider=local error="${err instanceof Error ? err.message : String(err)}"`;
+      return null;
+    }
+    if (!result) {
+      this.lastError = `provider=local error="llm.embed returned null/undefined"`;
+      return null;
+    }
     if (this.dimensions === undefined) {
       this.dimensions = result.embedding.length;
     }
+    this.lastError = undefined;
     return {
       embedding: result.embedding,
       model: this.modelId,
@@ -97,13 +120,22 @@ export class LocalLlamaCppProvider implements EmbeddingProvider {
     options: ProviderEmbedOptions = {},
   ): Promise<(ProviderEmbedding | null)[]> {
     if (texts.length === 0) return [];
-    if (options.signal?.aborted) return texts.map(() => null);
+    if (options.signal?.aborted) {
+      this.lastError = `aborted by caller${options.signal.reason ? `: ${String(options.signal.reason)}` : ""}`;
+      return texts.map(() => null);
+    }
 
-    const raw = await this.llm.embedBatch(texts, {
-      model: options.model ?? this.modelId,
-    });
+    let raw;
+    try {
+      raw = await this.llm.embedBatch(texts, {
+        model: options.model ?? this.modelId,
+      });
+    } catch (err) {
+      this.lastError = `provider=local error="${err instanceof Error ? err.message : String(err)}"`;
+      return texts.map(() => null);
+    }
 
-    return raw.map((r) => {
+    const out = raw.map((r) => {
       if (!r) return null;
       if (this.dimensions === undefined && r.embedding.length > 0) {
         this.dimensions = r.embedding.length;
@@ -113,6 +145,14 @@ export class LocalLlamaCppProvider implements EmbeddingProvider {
         model: this.modelId,
       };
     });
+
+    if (out.every((r) => r !== null)) {
+      this.lastError = undefined;
+    } else if (out.some((r) => r === null)) {
+      this.lastError = `provider=local error="llm.embedBatch returned null entries (${out.filter((r) => r === null).length}/${out.length})"`;
+    }
+
+    return out;
   }
 
   async dispose(): Promise<void> {

+ 47 - 0
src/embedding/openai.ts

@@ -323,6 +323,7 @@ export class OpenAIEmbeddingsProvider implements EmbeddingProvider {
   private readonly now: () => number;
 
   private dimensions: number | undefined = undefined;
+  private lastError: string | undefined = undefined;
   readonly breaker: CircuitBreaker;
 
   constructor(config: OpenAIProviderConfig) {
@@ -360,6 +361,21 @@ export class OpenAIEmbeddingsProvider implements EmbeddingProvider {
     return this.dimensions;
   }
 
+  /**
+   * Most recent per-chunk failure message (HTTP status + body preview, malformed
+   * JSON, timeout, abort reason). Returns `undefined` after a successful call
+   * or before the first call. See `EmbeddingProvider.getLastError`.
+   */
+  getLastError(): string | undefined {
+    return this.lastError;
+  }
+
+  /** Endpoint URL configured at construction time — used by callers when
+   *  building error messages for failed first-chunk probes. */
+  getEndpoint(): string {
+    return this.endpoint;
+  }
+
   async healthcheck(signal?: AbortSignal): Promise<ProviderHealth> {
     // Try GET /health first (worker exposes it). Fall back to probe embed.
     try {
@@ -437,6 +453,7 @@ export class OpenAIEmbeddingsProvider implements EmbeddingProvider {
     const chunks = chunkArray(texts, this.batchSize);
     const results: (ProviderEmbedding | null)[] = new Array(texts.length).fill(null);
     let cursor = 0;
+    let anySucceeded = false;
 
     for (const chunk of chunks) {
       const start = cursor;
@@ -445,6 +462,7 @@ export class OpenAIEmbeddingsProvider implements EmbeddingProvider {
       // Abort early if signal already fired
       if (options.signal?.aborted) {
         // Leave remaining slots as null (caller treats as errors)
+        this.lastError = `aborted by caller${options.signal.reason ? `: ${String(options.signal.reason)}` : ""}`;
         return results;
       }
 
@@ -462,6 +480,7 @@ export class OpenAIEmbeddingsProvider implements EmbeddingProvider {
               embedding,
               model: this.modelId,
             };
+            anySucceeded = true;
             // Record dimensions on first success
             if (this.dimensions === undefined) {
               this.dimensions = embedding.length;
@@ -473,6 +492,10 @@ export class OpenAIEmbeddingsProvider implements EmbeddingProvider {
         this.breaker.recordFailure();
         // CircuitOpenError must propagate so the caller can fall back
         if (err instanceof CircuitOpenError) throw err;
+        // Capture the underlying error so callers (e.g. the store dimension
+        // probe) can surface it instead of "Failed to get embedding
+        // dimensions from first chunk" with no context.
+        this.lastError = this.formatErrorContext(err);
         // Other errors mark the chunk as null and continue with next chunk.
         // (The store layer already handles per-text nulls as errors.)
         if (process.env.QMD_EMBED_DEBUG) {
@@ -483,6 +506,11 @@ export class OpenAIEmbeddingsProvider implements EmbeddingProvider {
       }
     }
 
+    // Clear lastError on a fully-successful sweep (every input got an embedding).
+    if (anySucceeded && results.every((r) => r !== null)) {
+      this.lastError = undefined;
+    }
+
     return results;
   }
 
@@ -494,6 +522,25 @@ export class OpenAIEmbeddingsProvider implements EmbeddingProvider {
 
   // ────────────────────── Internals ──────────────────────
 
+  /**
+   * Format a request-failure context string for `lastError`. Includes endpoint
+   * + HTTP status + body preview when the error was an `HttpError`, otherwise
+   * falls back to the message of the underlying error (or the value itself
+   * when not an Error). Kept short — body preview is already capped at 1024
+   * chars by `HttpError`, but we trim further here for the dimension-probe
+   * thrown error which surfaces directly to users.
+   */
+  private formatErrorContext(err: unknown): string {
+    if (err instanceof HttpError) {
+      const preview = err.bodyPreview.replace(/\s+/g, " ").trim().slice(0, 240);
+      return `endpoint=${this.endpoint}/v1/embeddings status=${err.status}${preview ? ` body="${preview}"` : ""}`;
+    }
+    if (err instanceof Error) {
+      return `endpoint=${this.endpoint}/v1/embeddings error="${err.message}"`;
+    }
+    return `endpoint=${this.endpoint}/v1/embeddings error="${String(err)}"`;
+  }
+
   private buildHeaders(): Record<string, string> {
     const headers: Record<string, string> = {
       "Content-Type": "application/json",

+ 18 - 0
src/embedding/provider.ts

@@ -96,6 +96,24 @@ export interface EmbeddingProvider {
    */
   embedBatch(texts: string[], options?: ProviderEmbedOptions): Promise<(ProviderEmbedding | null)[]>;
 
+  /**
+   * Optional: most recent error message from a swallowed per-chunk failure.
+   *
+   * Per-chunk errors are intentionally swallowed (slot becomes `null`) so a
+   * single bad text does not abort a 1000-doc embed run. Callers that need
+   * to surface a meaningful error (e.g. the dimension-probe call site in
+   * `store.ts` when even the first chunk fails) can read this field to
+   * include the underlying cause (HTTP status, malformed JSON, timeout,
+   * abort reason, …) in their own error message.
+   *
+   * Returns `undefined` when the most recent call succeeded or no call has
+   * happened yet. Implementations MUST clear it on success.
+   *
+   * Optional so 3rd-party `EmbeddingProvider` implementations remain source-
+   * compatible; callers must guard with `provider.getLastError?.()`.
+   */
+  getLastError?(): string | undefined;
+
   /** Release any held resources (HTTP keep-alive sockets, model handles, …) */
   dispose(): Promise<void>;
 }

+ 29 - 2
src/store.ts

@@ -1625,9 +1625,36 @@ export async function generateEmbeddings(
       if (!vectorTableInitialized) {
         const firstChunk = batchChunks[0]!;
         const firstText = formatDocForEmbedding(firstChunk.text, firstChunk.title, embedModelUri);
-        const firstResult = await embedOne(firstText, providerModel);
+        // Single retry on transient failure (issue i-vm1lxwry). The provider
+        // swallows per-chunk errors per its contract — `getLastError?.()`
+        // surfaces the actual cause (HTTP status / abort / parse error) so we
+        // can include it in the thrown message instead of the cryptic
+        // "Failed to get embedding dimensions from first chunk".
+        let firstResult = await embedOne(firstText, providerModel);
+        if (!firstResult && session.isValid) {
+          const firstErr = provider?.getLastError?.();
+          // Brief backoff before retry — embedding worker may be re-warming
+          // a model or the GPU host may be transiently busy. 250ms is short
+          // enough to be invisible on the happy path and long enough to
+          // clear most "thundering-herd" race conditions.
+          await new Promise((resolve) => setTimeout(resolve, 250));
+          if (process.env.QMD_EMBED_DEBUG) {
+            process.stderr.write(
+              `qmd embed: first-chunk dimension probe failed, retrying once${firstErr ? ` (last error: ${firstErr})` : ""}\n`,
+            );
+          }
+          firstResult = await embedOne(firstText, providerModel);
+        }
         if (!firstResult) {
-          throw new Error("Failed to get embedding dimensions from first chunk");
+          const lastErr = provider?.getLastError?.();
+          const providerHint = provider ? `provider=${provider.kind}` : "provider=session";
+          const errSuffix = lastErr ? ` — underlying: ${lastErr}` : "";
+          const debugHint = process.env.QMD_EMBED_DEBUG
+            ? ""
+            : " (set QMD_EMBED_DEBUG=1 for per-chunk traces)";
+          throw new Error(
+            `Failed to get embedding dimensions from first chunk after retry [${providerHint}]${errSuffix}${debugHint}`,
+          );
         }
         store.ensureVecTable(firstResult.embedding.length);
         vectorTableInitialized = true;

+ 41 - 0
test/embedding-autofallback.test.ts

@@ -32,6 +32,8 @@ class FakeProvider implements EmbeddingProvider {
   alwaysThrows: Error | null = null;
   /** Health response */
   healthResponse: ProviderHealth | null = null;
+  /** Stub for getLastError() return value */
+  lastErr: string | undefined = undefined;
 
   constructor(kind: ProviderKind, modelId: string, dim = 4) {
     this.kind = kind;
@@ -45,6 +47,9 @@ class FakeProvider implements EmbeddingProvider {
   getDimensions(): number | undefined {
     return this.dim;
   }
+  getLastError(): string | undefined {
+    return this.lastErr;
+  }
 
   async healthcheck(): Promise<ProviderHealth> {
     this.healthcheckCalls++;
@@ -343,6 +348,42 @@ describe("AutoFallbackEmbeddingProvider — healthcheck", () => {
   });
 });
 
+// ─────────────────────────── getLastError (i-vm1lxwry) ──────────────────────
+
+describe("AutoFallbackEmbeddingProvider — getLastError (i-vm1lxwry)", () => {
+  test("returns undefined when both legs are clean", () => {
+    const { af, primary, fallback } = buildAutoFallback();
+    primary.lastErr = undefined;
+    fallback.lastErr = undefined;
+    expect(af.getLastError()).toBeUndefined();
+  });
+
+  test("returns primary error when only primary has one", () => {
+    const { af, primary, fallback } = buildAutoFallback();
+    primary.lastErr = `endpoint=https://ai.mm.mk/v1/embeddings status=503 body="busy"`;
+    fallback.lastErr = undefined;
+    expect(af.getLastError()).toBe(primary.lastErr);
+  });
+
+  test("returns fallback error when only fallback has one", () => {
+    const { af, primary, fallback } = buildAutoFallback();
+    primary.lastErr = undefined;
+    fallback.lastErr = `provider=local error="model file not found"`;
+    expect(af.getLastError()).toBe(fallback.lastErr);
+  });
+
+  test("combines primary + fallback when both failed", () => {
+    const { af, primary, fallback } = buildAutoFallback();
+    primary.lastErr = `endpoint=https://ai.mm.mk/v1/embeddings status=503`;
+    fallback.lastErr = `provider=local error="OOM"`;
+    const combined = af.getLastError();
+    expect(combined).toContain("primary:");
+    expect(combined).toContain("fallback:");
+    expect(combined).toContain("status=503");
+    expect(combined).toContain("OOM");
+  });
+});
+
 // ─────────────────────────── dispose ─────────────────────────────────────────
 
 describe("AutoFallbackEmbeddingProvider — dispose", () => {

+ 80 - 0
test/embedding-openai.test.ts

@@ -696,6 +696,86 @@ describe("HttpError", () => {
   });
 });
 
+// ─────────────────────────── lastError tracking (i-vm1lxwry) ────────────────
+
+describe("OpenAIEmbeddingsProvider — getLastError (i-vm1lxwry)", () => {
+  test("returns undefined before first call", () => {
+    const { fetchImpl } = makeFetchSequence([]);
+    const p = new OpenAIEmbeddingsProvider({
+      endpoint: "https://ai.example.com",
+      fetchImpl,
+    });
+    expect(p.getLastError()).toBeUndefined();
+  });
+
+  test("captures HTTP status + endpoint after non-retryable failure", async () => {
+    const { fetchImpl } = makeFetchSequence([
+      () => mockResponse(500, "internal error: GPU OOM"),
+    ]);
+    const p = new OpenAIEmbeddingsProvider({
+      endpoint: "https://ai.example.com",
+      fetchImpl,
+      retryBackoffsMs: [],
+      sleep: async () => {},
+    });
+    const r = await p.embed("hello");
+    expect(r).toBeNull();
+    const lastErr = p.getLastError();
+    expect(lastErr).toBeDefined();
+    expect(lastErr).toContain("https://ai.example.com/v1/embeddings");
+    expect(lastErr).toContain("status=500");
+    expect(lastErr).toContain("internal error: GPU OOM");
+  });
+
+  test("captures malformed-JSON error message", async () => {
+    const { fetchImpl } = makeFetchSequence([
+      () => new Response("not json at all", { status: 200, headers: { "content-type": "application/json" } }),
+    ]);
+    const p = new OpenAIEmbeddingsProvider({
+      endpoint: "https://ai.example.com",
+      fetchImpl,
+      retryBackoffsMs: [],
+      sleep: async () => {},
+    });
+    const r = await p.embed("hello");
+    expect(r).toBeNull();
+    const lastErr = p.getLastError();
+    expect(lastErr).toBeDefined();
+    expect(lastErr).toContain("https://ai.example.com/v1/embeddings");
+    expect(lastErr).toMatch(/error="/);
+  });
+
+  test("clears lastError after a fully-successful sweep", async () => {
+    const { fetchImpl } = makeFetchSequence([
+      () => mockResponse(500, "fail"),
+      () => embeddingsResponse(["recovered"], 4),
+    ]);
+    const p = new OpenAIEmbeddingsProvider({
+      endpoint: "https://ai.example.com",
+      fetchImpl,
+      retryBackoffsMs: [],
+      sleep: async () => {},
+    });
+    // First call fails — lastError set
+    const r1 = await p.embed("first");
+    expect(r1).toBeNull();
+    expect(p.getLastError()).toBeDefined();
+    // Second call succeeds — lastError cleared
+    const r2 = await p.embed("recovered");
+    expect(r2).not.toBeNull();
+    expect(p.getLastError()).toBeUndefined();
+  });
+
+  test("getEndpoint() exposes configured endpoint (no trailing slash)", () => {
+    const { fetchImpl } = makeFetchSequence([]);
+    const p = new OpenAIEmbeddingsProvider({
+      endpoint: "https://ai.example.com//",
+      fetchImpl,
+    });
+    expect(p.getEndpoint()).toBe("https://ai.example.com");
+  });
+});
+
 // ─────────────────────────── dispose ─────────────────────────────────────────
 
 describe("OpenAIEmbeddingsProvider — dispose", () => {

+ 89 - 0
test/embedding-store-integration.test.ts

@@ -298,3 +298,92 @@ describe("generateEmbeddings with EmbeddingProvider", () => {
     expect(result.errors).toBe(0);
   });
 });
+
+// ─────── First-chunk dimension probe — retry + rich error (i-vm1lxwry) ───────
+
+/**
+ * Provider that controls per-call success/failure for the first N calls,
+ * exposing a `getLastError()` so the dimension-probe error path includes
+ * the upstream cause. Used to exercise the issue i-vm1lxwry behavior.
+ */
+class FlakyProvider implements EmbeddingProvider {
+  readonly kind = "openai" as const;
+  readonly modelId: string;
+  readonly dim: number;
+  // Behavior plan: on call N, return plan[N] (true=success, false=fail, "throw"=throw)
+  plan: Array<true | false | "throw">;
+  callIdx = 0;
+  private lastErr: string | undefined = undefined;
+  errorMessage = `endpoint=https://ai.mm.mk/v1/embeddings status=500 body="probe failure"`;
+
+  constructor(modelId: string, dim: number, plan: Array<true | false | "throw">) {
+    this.modelId = modelId;
+    this.dim = dim;
+    this.plan = plan;
+  }
+
+  getModelId(): string { return this.modelId; }
+  getDimensions(): number | undefined { return this.dim; }
+  getLastError(): string | undefined { return this.lastErr; }
+  async healthcheck(): Promise<ProviderHealth> {
+    return { ok: true, model: this.modelId, dimensions: this.dim };
+  }
+  async embed(text: string): Promise<ProviderEmbedding | null> {
+    return (await this.embedBatch([text]))[0] ?? null;
+  }
+  async embedBatch(texts: string[]): Promise<(ProviderEmbedding | null)[]> {
+    const decision = this.plan[this.callIdx] ?? this.plan[this.plan.length - 1] ?? false;
+    this.callIdx++;
+    if (decision === "throw") {
+      this.lastErr = this.errorMessage;
+      throw new Error(this.errorMessage);
+    }
+    if (decision === false) {
+      this.lastErr = this.errorMessage;
+      return texts.map(() => null);
+    }
+    this.lastErr = undefined;
+    return texts.map((t) => ({
+      embedding: Array.from({ length: this.dim }, (_, i) => (t.length + i) * 0.01),
+      model: this.modelId,
+    }));
+  }
+  async dispose(): Promise<void> {}
+}
+
+describe("first-chunk dimension probe — retry + rich error (i-vm1lxwry)", () => {
+  test("retries once on null first-chunk and proceeds on success", async () => {
+    // Plan: first call fails, second (retry) succeeds, all subsequent succeed
+    const provider = new FlakyProvider("embeddinggemma", 4, [false, true]);
+    const result = await generateEmbeddings(store, { embedProvider: provider });
+    expect(result.errors).toBe(0);
+    expect(result.docsProcessed).toBe(2);
+    expect(result.chunksEmbedded).toBeGreaterThan(0);
+    // We expect at least 2 calls: the failed first probe + the retry that succeeded.
+    expect(provider.callIdx).toBeGreaterThanOrEqual(2);
+  });
+
+  test("throws rich error including provider kind and underlying cause when both attempts fail", async () => {
+    // Plan: every call returns null
+    const provider = new FlakyProvider("embeddinggemma", 4, [false]);
+    await expect(
+      generateEmbeddings(store, { embedProvider: provider }),
+    ).rejects.toThrow(/Failed to get embedding dimensions from first chunk after retry/);
+    // Re-run to inspect the rejected error
+    const provider2 = new FlakyProvider("embeddinggemma", 4, [false]);
+    let caught: unknown = null;
+    try {
+      await generateEmbeddings(store, { embedProvider: provider2 });
+    } catch (e) {
+      caught = e;
+    }
+    expect(caught).toBeInstanceOf(Error);
+    const msg = (caught as Error).message;
+    expect(msg).toContain("provider=openai");
+    expect(msg).toContain("ai.mm.mk");
+    expect(msg).toContain("status=500");
+    expect(msg).toContain("probe failure");
+    // Both attempts (initial + retry) consumed → at least 2 calls.
+    expect(provider2.callIdx).toBeGreaterThanOrEqual(2);
+  });
+});