2 luni în urmă · e041f19285
--- a/dist/embedding/autofallback.d.ts
+++ b/dist/embedding/autofallback.d.ts
@@ -65,6 +65,15 @@ export declare class AutoFallbackEmbeddingProvider implements EmbeddingProvider
 
				      */
			
 
				     getModelId(): string;
			
 
				     getDimensions(): number | undefined;
			
 
				+    /**
			
 
				+     * Combined last-error from primary + fallback. Either, neither, or both legs
			
 
				+     * may have a tracked error after `embed()`/`embedBatch()` runs:
			
 
				+     *   - Both clean → undefined
			
 
				+     *   - Primary failed, fallback rescued → returns primary error (most useful)
			
 
				+     *   - Both failed → returns "primary: <msg> | fallback: <msg>"
			
 
				+     *   - Only primary skipped (cooldown), fallback also failed → returns fallback error
			
 
				+     */
			
 
				+    getLastError(): string | undefined;
			
 
				     /** Current routing state (mostly for tests + observability) */
			
 
				     getRoutingState(): FallbackState;
			
 
				     /** Reset failure-streak + cooldown (mostly for tests / admin) */
			
--- a/dist/embedding/autofallback.js
+++ b/dist/embedding/autofallback.js
@@ -68,6 +68,22 @@ export class AutoFallbackEmbeddingProvider {
 
				     getDimensions() {
			
 
				         return this.primary.getDimensions() ?? this.fallback.getDimensions();
			
 
				     }
			
 
				+    /**
			
 
				+     * Combined last-error from primary + fallback. Either, neither, or both legs
			
 
				+     * may have a tracked error after `embed()`/`embedBatch()` runs:
			
 
				+     *   - Both clean → undefined
			
 
				+     *   - Primary failed, fallback rescued → returns primary error (most useful)
			
 
				+     *   - Both failed → returns "primary: <msg> | fallback: <msg>"
			
 
				+     *   - Only primary skipped (cooldown), fallback also failed → returns fallback error
			
 
				+     */
			
 
				+    getLastError() {
			
 
				+        const primaryErr = this.primary.getLastError?.();
			
 
				+        const fallbackErr = this.fallback.getLastError?.();
			
 
				+        if (primaryErr && fallbackErr) {
			
 
				+            return `primary: ${primaryErr} | fallback: ${fallbackErr}`;
			
 
				+        }
			
 
				+        return primaryErr ?? fallbackErr;
			
 
				+    }
			
 
				     /** Current routing state (mostly for tests + observability) */
			
 
				     getRoutingState() {
			
 
				         if (this.fallbackUntil !== null && this.now() < this.fallbackUntil) {
			
--- a/dist/embedding/local.d.ts
+++ b/dist/embedding/local.d.ts
@@ -21,9 +21,16 @@ export declare class LocalLlamaCppProvider implements EmbeddingProvider {
 
				     private readonly llm;
			
 
				     private readonly modelId;
			
 
				     private dimensions;
			
 
				+    private lastError;
			
 
				     constructor(config?: LocalLlamaCppProviderConfig);
			
 
				     getModelId(): string;
			
 
				     getDimensions(): number | undefined;
			
 
				+    /**
			
 
				+     * Most recent thrown error from `llm.embed` / `llm.embedBatch`. Returns
			
 
				+     * `undefined` after a successful call or before the first call. See
			
 
				+     * `EmbeddingProvider.getLastError`.
			
 
				+     */
			
 
				+    getLastError(): string | undefined;
			
 
				     healthcheck(_signal?: AbortSignal): Promise<ProviderHealth>;
			
 
				     embed(text: string, options?: ProviderEmbedOptions): Promise<ProviderEmbedding | null>;
			
 
				     embedBatch(texts: string[], options?: ProviderEmbedOptions): Promise<(ProviderEmbedding | null)[]>;
			
--- a/dist/embedding/local.js
+++ b/dist/embedding/local.js
@@ -11,6 +11,7 @@ export class LocalLlamaCppProvider {
 
				     llm;
			
 
				     modelId;
			
 
				     dimensions = undefined;
			
 
				+    lastError = undefined;
			
 
				     constructor(config = {}) {
			
 
				         this.llm = config.llm ?? getDefaultLlamaCpp();
			
 
				         this.modelId = config.modelId ?? "embeddinggemma";
			
@@ -21,6 +22,14 @@ export class LocalLlamaCppProvider {
 
				     getDimensions() {
			
 
				         return this.dimensions;
			
 
				     }
			
 
				+    /**
			
 
				+     * Most recent thrown error from `llm.embed` / `llm.embedBatch`. Returns
			
 
				+     * `undefined` after a successful call or before the first call. See
			
 
				+     * `EmbeddingProvider.getLastError`.
			
 
				+     */
			
 
				+    getLastError() {
			
 
				+        return this.lastError;
			
 
				+    }
			
 
				     async healthcheck(_signal) {
			
 
				         // For the local provider, "healthy" means the embed model loads.
			
 
				         // We probe with a single embed call.
			
@@ -50,14 +59,26 @@ export class LocalLlamaCppProvider {
 
				         }
			
 
				     }
			
 
				     async embed(text, options = {}) {
			
 
				-        if (options.signal?.aborted)
			
 
				+        if (options.signal?.aborted) {
			
 
				+            this.lastError = `aborted by caller${options.signal.reason ? `: ${String(options.signal.reason)}` : ""}`;
			
 
				             return null;
			
 
				-        const result = await this.llm.embed(text, { model: options.model ?? this.modelId });
			
 
				-        if (!result)
			
 
				+        }
			
 
				+        let result;
			
 
				+        try {
			
 
				+            result = await this.llm.embed(text, { model: options.model ?? this.modelId });
			
 
				+        }
			
 
				+        catch (err) {
			
 
				+            this.lastError = `provider=local error="${err instanceof Error ? err.message : String(err)}"`;
			
 
				             return null;
			
 
				+        }
			
 
				+        if (!result) {
			
 
				+            this.lastError = `provider=local error="llm.embed returned null/undefined"`;
			
 
				+            return null;
			
 
				+        }
			
 
				         if (this.dimensions === undefined) {
			
 
				             this.dimensions = result.embedding.length;
			
 
				         }
			
 
				+        this.lastError = undefined;
			
 
				         return {
			
 
				             embedding: result.embedding,
			
 
				             model: this.modelId,
			
@@ -66,12 +87,21 @@ export class LocalLlamaCppProvider {
 
				     async embedBatch(texts, options = {}) {
			
 
				         if (texts.length === 0)
			
 
				             return [];
			
 
				-        if (options.signal?.aborted)
			
 
				+        if (options.signal?.aborted) {
			
 
				+            this.lastError = `aborted by caller${options.signal.reason ? `: ${String(options.signal.reason)}` : ""}`;
			
 
				             return texts.map(() => null);
			
 
				-        const raw = await this.llm.embedBatch(texts, {
			
 
				-            model: options.model ?? this.modelId,
			
 
				-        });
			
 
				-        return raw.map((r) => {
			
 
				+        }
			
 
				+        let raw;
			
 
				+        try {
			
 
				+            raw = await this.llm.embedBatch(texts, {
			
 
				+                model: options.model ?? this.modelId,
			
 
				+            });
			
 
				+        }
			
 
				+        catch (err) {
			
 
				+            this.lastError = `provider=local error="${err instanceof Error ? err.message : String(err)}"`;
			
 
				+            return texts.map(() => null);
			
 
				+        }
			
 
				+        const out = raw.map((r) => {
			
 
				             if (!r)
			
 
				                 return null;
			
 
				             if (this.dimensions === undefined && r.embedding.length > 0) {
			
@@ -82,6 +112,13 @@ export class LocalLlamaCppProvider {
 
				                 model: this.modelId,
			
 
				             };
			
 
				         });
			
 
				+        if (out.every((r) => r !== null)) {
			
 
				+            this.lastError = undefined;
			
 
				+        }
			
 
				+        else if (out.some((r) => r === null)) {
			
 
				+            this.lastError = `provider=local error="llm.embedBatch returned null entries (${out.filter((r) => r === null).length}/${out.length})"`;
			
 
				+        }
			
 
				+        return out;
			
 
				     }
			
 
				     async dispose() {
			
 
				         // We do NOT dispose the underlying LlamaCpp here because the singleton
			
--- a/dist/embedding/openai.d.ts
+++ b/dist/embedding/openai.d.ts
@@ -162,14 +162,33 @@ export declare class OpenAIEmbeddingsProvider implements EmbeddingProvider {
 
				     private readonly sleep;
			
 
				     private readonly now;
			
 
				     private dimensions;
			
 
				+    private lastError;
			
 
				     readonly breaker: CircuitBreaker;
			
 
				     constructor(config: OpenAIProviderConfig);
			
 
				     getModelId(): string;
			
 
				     getDimensions(): number | undefined;
			
 
				+    /**
			
 
				+     * Most recent per-chunk failure message (HTTP status + body preview, malformed
			
 
				+     * JSON, timeout, abort reason). Returns `undefined` after a successful call
			
 
				+     * or before the first call. See `EmbeddingProvider.getLastError`.
			
 
				+     */
			
 
				+    getLastError(): string | undefined;
			
 
				+    /** Endpoint URL configured at construction time — used by callers when
			
 
				+     *  building error messages for failed first-chunk probes. */
			
 
				+    getEndpoint(): string;
			
 
				     healthcheck(signal?: AbortSignal): Promise<ProviderHealth>;
			
 
				     embed(text: string, options?: ProviderEmbedOptions): Promise<ProviderEmbedding | null>;
			
 
				     embedBatch(texts: string[], options?: ProviderEmbedOptions): Promise<(ProviderEmbedding | null)[]>;
			
 
				     dispose(): Promise<void>;
			
 
				+    /**
			
 
				+     * Format a request-failure context string for `lastError`. Includes endpoint
			
 
				+     * + HTTP status + body preview when the error was an `HttpError`, otherwise
			
 
				+     * falls back to the message of the underlying error (or the value itself
			
 
				+     * when not an Error). Kept short — body preview is already capped at 1024
			
 
				+     * chars by `HttpError`, but we trim further here for the dimension-probe
			
 
				+     * thrown error which surfaces directly to users.
			
 
				+     */
			
 
				+    private formatErrorContext;
			
 
				     private buildHeaders;
			
 
				     /**
			
 
				      * Single HTTP request with retry on 429/503. Returns embeddings indexed
			
--- a/dist/embedding/openai.js
+++ b/dist/embedding/openai.js
@@ -231,6 +231,7 @@ export class OpenAIEmbeddingsProvider {
 
				     sleep;
			
 
				     now;
			
 
				     dimensions = undefined;
			
 
				+    lastError = undefined;
			
 
				     breaker;
			
 
				     constructor(config) {
			
 
				         if (!config.endpoint) {
			
@@ -261,6 +262,19 @@ export class OpenAIEmbeddingsProvider {
 
				     getDimensions() {
			
 
				         return this.dimensions;
			
 
				     }
			
 
				+    /**
			
 
				+     * Most recent per-chunk failure message (HTTP status + body preview, malformed
			
 
				+     * JSON, timeout, abort reason). Returns `undefined` after a successful call
			
 
				+     * or before the first call. See `EmbeddingProvider.getLastError`.
			
 
				+     */
			
 
				+    getLastError() {
			
 
				+        return this.lastError;
			
 
				+    }
			
 
				+    /** Endpoint URL configured at construction time — used by callers when
			
 
				+     *  building error messages for failed first-chunk probes. */
			
 
				+    getEndpoint() {
			
 
				+        return this.endpoint;
			
 
				+    }
			
 
				     async healthcheck(signal) {
			
 
				         // Try GET /health first (worker exposes it). Fall back to probe embed.
			
 
				         try {
			
@@ -331,12 +345,14 @@ export class OpenAIEmbeddingsProvider {
 
				         const chunks = chunkArray(texts, this.batchSize);
			
 
				         const results = new Array(texts.length).fill(null);
			
 
				         let cursor = 0;
			
 
				+        let anySucceeded = false;
			
 
				         for (const chunk of chunks) {
			
 
				             const start = cursor;
			
 
				             cursor += chunk.length;
			
 
				             // Abort early if signal already fired
			
 
				             if (options.signal?.aborted) {
			
 
				                 // Leave remaining slots as null (caller treats as errors)
			
 
				+                this.lastError = `aborted by caller${options.signal.reason ? `: ${String(options.signal.reason)}` : ""}`;
			
 
				                 return results;
			
 
				             }
			
 
				             // Fail-fast if breaker tripped mid-loop
			
@@ -352,6 +368,7 @@ export class OpenAIEmbeddingsProvider {
 
				                             embedding,
			
 
				                             model: this.modelId,
			
 
				                         };
			
 
				+                        anySucceeded = true;
			
 
				                         // Record dimensions on first success
			
 
				                         if (this.dimensions === undefined) {
			
 
				                             this.dimensions = embedding.length;
			
@@ -365,6 +382,10 @@ export class OpenAIEmbeddingsProvider {
 
				                 // CircuitOpenError must propagate so the caller can fall back
			
 
				                 if (err instanceof CircuitOpenError)
			
 
				                     throw err;
			
 
				+                // Capture the underlying error so callers (e.g. the store dimension
			
 
				+                // probe) can surface it instead of "Failed to get embedding
			
 
				+                // dimensions from first chunk" with no context.
			
 
				+                this.lastError = this.formatErrorContext(err);
			
 
				                 // Other errors mark the chunk as null and continue with next chunk.
			
 
				                 // (The store layer already handles per-text nulls as errors.)
			
 
				                 if (process.env.QMD_EMBED_DEBUG) {
			
@@ -372,6 +393,10 @@ export class OpenAIEmbeddingsProvider {
 
				                 }
			
 
				             }
			
 
				         }
			
 
				+        // Clear lastError on a fully-successful sweep (every input got an embedding).
			
 
				+        if (anySucceeded && results.every((r) => r !== null)) {
			
 
				+            this.lastError = undefined;
			
 
				+        }
			
 
				         return results;
			
 
				     }
			
 
				     async dispose() {
			
@@ -380,6 +405,24 @@ export class OpenAIEmbeddingsProvider {
 
				         this.breaker.reset();
			
 
				     }
			
 
				     // ────────────────────── Internals ──────────────────────
			
 
				+    /**
			
 
				+     * Format a request-failure context string for `lastError`. Includes endpoint
			
 
				+     * + HTTP status + body preview when the error was an `HttpError`, otherwise
			
 
				+     * falls back to the message of the underlying error (or the value itself
			
 
				+     * when not an Error). Kept short — body preview is already capped at 1024
			
 
				+     * chars by `HttpError`, but we trim further here for the dimension-probe
			
 
				+     * thrown error which surfaces directly to users.
			
 
				+     */
			
 
				+    formatErrorContext(err) {
			
 
				+        if (err instanceof HttpError) {
			
 
				+            const preview = err.bodyPreview.replace(/\s+/g, " ").trim().slice(0, 240);
			
 
				+            return `endpoint=${this.endpoint}/v1/embeddings status=${err.status}${preview ? ` body="${preview}"` : ""}`;
			
 
				+        }
			
 
				+        if (err instanceof Error) {
			
 
				+            return `endpoint=${this.endpoint}/v1/embeddings error="${err.message}"`;
			
 
				+        }
			
 
				+        return `endpoint=${this.endpoint}/v1/embeddings error="${String(err)}"`;
			
 
				+    }
			
 
				     buildHeaders() {
			
 
				         const headers = {
			
 
				             "Content-Type": "application/json",
			
--- a/dist/embedding/provider.d.ts
+++ b/dist/embedding/provider.d.ts
@@ -85,6 +85,23 @@ export interface EmbeddingProvider {
 
				      * upstream limits (e.g. OpenAI provider chunks to 64).
			
 
				      */
			
 
				     embedBatch(texts: string[], options?: ProviderEmbedOptions): Promise<(ProviderEmbedding | null)[]>;
			
 
				+    /**
			
 
				+     * Optional: most recent error message from a swallowed per-chunk failure.
			
 
				+     *
			
 
				+     * Per-chunk errors are intentionally swallowed (slot becomes `null`) so a
			
 
				+     * single bad text does not abort a 1000-doc embed run. Callers that need
			
 
				+     * to surface a meaningful error (e.g. the dimension-probe call site in
			
 
				+     * `store.ts` when even the first chunk fails) can read this field to
			
 
				+     * include the underlying cause (HTTP status, malformed JSON, timeout,
			
 
				+     * abort reason, …) in their own error message.
			
 
				+     *
			
 
				+     * Returns `undefined` when the most recent call succeeded or no call has
			
 
				+     * happened yet. Implementations MUST clear it on success.
			
 
				+     *
			
 
				+     * Optional so 3rd-party `EmbeddingProvider` implementations remain source-
			
 
				+     * compatible; callers must guard with `provider.getLastError?.()`.
			
 
				+     */
			
 
				+    getLastError?(): string | undefined;
			
 
				     /** Release any held resources (HTTP keep-alive sockets, model handles, …) */
			
 
				     dispose(): Promise<void>;
			
 
				 }
			
--- a/dist/store.js
+++ b/dist/store.js
@@ -1187,9 +1187,32 @@ export async function generateEmbeddings(store, options) {
 
				             if (!vectorTableInitialized) {
			
 
				                 const firstChunk = batchChunks[0];
			
 
				                 const firstText = formatDocForEmbedding(firstChunk.text, firstChunk.title, embedModelUri);
			
 
				-                const firstResult = await embedOne(firstText, providerModel);
			
 
				+                // Single retry on transient failure (issue i-vm1lxwry). The provider
			
 
				+                // swallows per-chunk errors per its contract — `getLastError?.()`
			
 
				+                // surfaces the actual cause (HTTP status / abort / parse error) so we
			
 
				+                // can include it in the thrown message instead of the cryptic
			
 
				+                // "Failed to get embedding dimensions from first chunk".
			
 
				+                let firstResult = await embedOne(firstText, providerModel);
			
 
				+                if (!firstResult && session.isValid) {
			
 
				+                    const firstErr = provider?.getLastError?.();
			
 
				+                    // Brief backoff before retry — embedding worker may be re-warming
			
 
				+                    // a model or the GPU host may be transiently busy. 250ms is short
			
 
				+                    // enough to be invisible on the happy path and long enough to
			
 
				+                    // clear most "thundering-herd" race conditions.
			
 
				+                    await new Promise((resolve) => setTimeout(resolve, 250));
			
 
				+                    if (process.env.QMD_EMBED_DEBUG) {
			
 
				+                        process.stderr.write(`qmd embed: first-chunk dimension probe failed, retrying once${firstErr ? ` (last error: ${firstErr})` : ""}\n`);
			
 
				+                    }
			
 
				+                    firstResult = await embedOne(firstText, providerModel);
			
 
				+                }
			
 
				                 if (!firstResult) {
			
 
				-                    throw new Error("Failed to get embedding dimensions from first chunk");
			
 
				+                    const lastErr = provider?.getLastError?.();
			
 
				+                    const providerHint = provider ? `provider=${provider.kind}` : "provider=session";
			
 
				+                    const errSuffix = lastErr ? ` — underlying: ${lastErr}` : "";
			
 
				+                    const debugHint = process.env.QMD_EMBED_DEBUG
			
 
				+                        ? ""
			
 
				+                        : " (set QMD_EMBED_DEBUG=1 for per-chunk traces)";
			
 
				+                    throw new Error(`Failed to get embedding dimensions from first chunk after retry [${providerHint}]${errSuffix}${debugHint}`);
			
 
				                 }
			
 
				                 store.ensureVecTable(firstResult.embedding.length);
			
 
				                 vectorTableInitialized = true;
			
--- a/src/embedding/autofallback.ts
+++ b/src/embedding/autofallback.ts
@@ -107,6 +107,23 @@ export class AutoFallbackEmbeddingProvider implements EmbeddingProvider {
 
				     return this.primary.getDimensions() ?? this.fallback.getDimensions();
			
 
				   }
			
 
				 
			
 
				+  /**
			
 
				+   * Combined last-error from primary + fallback. Either, neither, or both legs
			
 
				+   * may have a tracked error after `embed()`/`embedBatch()` runs:
			
 
				+   *   - Both clean → undefined
			
 
				+   *   - Primary failed, fallback rescued → returns primary error (most useful)
			
 
				+   *   - Both failed → returns "primary: <msg> | fallback: <msg>"
			
 
				+   *   - Only primary skipped (cooldown), fallback also failed → returns fallback error
			
 
				+   */
			
 
				+  getLastError(): string | undefined {
			
 
				+    const primaryErr = this.primary.getLastError?.();
			
 
				+    const fallbackErr = this.fallback.getLastError?.();
			
 
				+    if (primaryErr && fallbackErr) {
			
 
				+      return `primary: ${primaryErr} | fallback: ${fallbackErr}`;
			
 
				+    }
			
 
				+    return primaryErr ?? fallbackErr;
			
 
				+  }
			
 
				+
			
 
				   /** Current routing state (mostly for tests + observability) */
			
 
				   getRoutingState(): FallbackState {
			
 
				     if (this.fallbackUntil !== null && this.now() < this.fallbackUntil) {
			
--- a/src/embedding/local.ts
+++ b/src/embedding/local.ts
@@ -34,6 +34,7 @@ export class LocalLlamaCppProvider implements EmbeddingProvider {
 
				   private readonly llm: LlamaCpp;
			
 
				   private readonly modelId: string;
			
 
				   private dimensions: number | undefined = undefined;
			
 
				+  private lastError: string | undefined = undefined;
			
 
				 
			
 
				   constructor(config: LocalLlamaCppProviderConfig = {}) {
			
 
				     this.llm = config.llm ?? getDefaultLlamaCpp();
			
@@ -48,6 +49,15 @@ export class LocalLlamaCppProvider implements EmbeddingProvider {
 
				     return this.dimensions;
			
 
				   }
			
 
				 
			
 
				+  /**
			
 
				+   * Most recent thrown error from `llm.embed` / `llm.embedBatch`. Returns
			
 
				+   * `undefined` after a successful call or before the first call. See
			
 
				+   * `EmbeddingProvider.getLastError`.
			
 
				+   */
			
 
				+  getLastError(): string | undefined {
			
 
				+    return this.lastError;
			
 
				+  }
			
 
				+
			
 
				   async healthcheck(_signal?: AbortSignal): Promise<ProviderHealth> {
			
 
				     // For the local provider, "healthy" means the embed model loads.
			
 
				     // We probe with a single embed call.
			
@@ -80,12 +90,25 @@ export class LocalLlamaCppProvider implements EmbeddingProvider {
 
				     text: string,
			
 
				     options: ProviderEmbedOptions = {},
			
 
				   ): Promise<ProviderEmbedding | null> {
			
 
				-    if (options.signal?.aborted) return null;
			
 
				-    const result = await this.llm.embed(text, { model: options.model ?? this.modelId });
			
 
				-    if (!result) return null;
			
 
				+    if (options.signal?.aborted) {
			
 
				+      this.lastError = `aborted by caller${options.signal.reason ? `: ${String(options.signal.reason)}` : ""}`;
			
 
				+      return null;
			
 
				+    }
			
 
				+    let result;
			
 
				+    try {
			
 
				+      result = await this.llm.embed(text, { model: options.model ?? this.modelId });
			
 
				+    } catch (err) {
			
 
				+      this.lastError = `provider=local error="${err instanceof Error ? err.message : String(err)}"`;
			
 
				+      return null;
			
 
				+    }
			
 
				+    if (!result) {
			
 
				+      this.lastError = `provider=local error="llm.embed returned null/undefined"`;
			
 
				+      return null;
			
 
				+    }
			
 
				     if (this.dimensions === undefined) {
			
 
				       this.dimensions = result.embedding.length;
			
 
				     }
			
 
				+    this.lastError = undefined;
			
 
				     return {
			
 
				       embedding: result.embedding,
			
 
				       model: this.modelId,
			
@@ -97,13 +120,22 @@ export class LocalLlamaCppProvider implements EmbeddingProvider {
 
				     options: ProviderEmbedOptions = {},
			
 
				   ): Promise<(ProviderEmbedding | null)[]> {
			
 
				     if (texts.length === 0) return [];
			
 
				-    if (options.signal?.aborted) return texts.map(() => null);
			
 
				+    if (options.signal?.aborted) {
			
 
				+      this.lastError = `aborted by caller${options.signal.reason ? `: ${String(options.signal.reason)}` : ""}`;
			
 
				+      return texts.map(() => null);
			
 
				+    }
			
 
				 
			
 
				-    const raw = await this.llm.embedBatch(texts, {
			
 
				-      model: options.model ?? this.modelId,
			
 
				-    });
			
 
				+    let raw;
			
 
				+    try {
			
 
				+      raw = await this.llm.embedBatch(texts, {
			
 
				+        model: options.model ?? this.modelId,
			
 
				+      });
			
 
				+    } catch (err) {
			
 
				+      this.lastError = `provider=local error="${err instanceof Error ? err.message : String(err)}"`;
			
 
				+      return texts.map(() => null);
			
 
				+    }
			
 
				 
			
 
				-    return raw.map((r) => {
			
 
				+    const out = raw.map((r) => {
			
 
				       if (!r) return null;
			
 
				       if (this.dimensions === undefined && r.embedding.length > 0) {
			
 
				         this.dimensions = r.embedding.length;
			
@@ -113,6 +145,14 @@ export class LocalLlamaCppProvider implements EmbeddingProvider {
 
				         model: this.modelId,
			
 
				       };
			
 
				     });
			
 
				+
			
 
				+    if (out.every((r) => r !== null)) {
			
 
				+      this.lastError = undefined;
			
 
				+    } else if (out.some((r) => r === null)) {
			
 
				+      this.lastError = `provider=local error="llm.embedBatch returned null entries (${out.filter((r) => r === null).length}/${out.length})"`;
			
 
				+    }
			
 
				+
			
 
				+    return out;
			
 
				   }
			
 
				 
			
 
				   async dispose(): Promise<void> {
			
--- a/src/embedding/openai.ts
+++ b/src/embedding/openai.ts
@@ -323,6 +323,7 @@ export class OpenAIEmbeddingsProvider implements EmbeddingProvider {
 
				   private readonly now: () => number;
			
 
				 
			
 
				   private dimensions: number | undefined = undefined;
			
 
				+  private lastError: string | undefined = undefined;
			
 
				   readonly breaker: CircuitBreaker;
			
 
				 
			
 
				   constructor(config: OpenAIProviderConfig) {
			
@@ -360,6 +361,21 @@ export class OpenAIEmbeddingsProvider implements EmbeddingProvider {
 
				     return this.dimensions;
			
 
				   }
			
 
				 
			
 
				+  /**
			
 
				+   * Most recent per-chunk failure message (HTTP status + body preview, malformed
			
 
				+   * JSON, timeout, abort reason). Returns `undefined` after a successful call
			
 
				+   * or before the first call. See `EmbeddingProvider.getLastError`.
			
 
				+   */
			
 
				+  getLastError(): string | undefined {
			
 
				+    return this.lastError;
			
 
				+  }
			
 
				+
			
 
				+  /** Endpoint URL configured at construction time — used by callers when
			
 
				+   *  building error messages for failed first-chunk probes. */
			
 
				+  getEndpoint(): string {
			
 
				+    return this.endpoint;
			
 
				+  }
			
 
				+
			
 
				   async healthcheck(signal?: AbortSignal): Promise<ProviderHealth> {
			
 
				     // Try GET /health first (worker exposes it). Fall back to probe embed.
			
 
				     try {
			
@@ -437,6 +453,7 @@ export class OpenAIEmbeddingsProvider implements EmbeddingProvider {
 
				     const chunks = chunkArray(texts, this.batchSize);
			
 
				     const results: (ProviderEmbedding | null)[] = new Array(texts.length).fill(null);
			
 
				     let cursor = 0;
			
 
				+    let anySucceeded = false;
			
 
				 
			
 
				     for (const chunk of chunks) {
			
 
				       const start = cursor;
			
@@ -445,6 +462,7 @@ export class OpenAIEmbeddingsProvider implements EmbeddingProvider {
 
				       // Abort early if signal already fired
			
 
				       if (options.signal?.aborted) {
			
 
				         // Leave remaining slots as null (caller treats as errors)
			
 
				+        this.lastError = `aborted by caller${options.signal.reason ? `: ${String(options.signal.reason)}` : ""}`;
			
 
				         return results;
			
 
				       }
			
 
				 
			
@@ -462,6 +480,7 @@ export class OpenAIEmbeddingsProvider implements EmbeddingProvider {
 
				               embedding,
			
 
				               model: this.modelId,
			
 
				             };
			
 
				+            anySucceeded = true;
			
 
				             // Record dimensions on first success
			
 
				             if (this.dimensions === undefined) {
			
 
				               this.dimensions = embedding.length;
			
@@ -473,6 +492,10 @@ export class OpenAIEmbeddingsProvider implements EmbeddingProvider {
 
				         this.breaker.recordFailure();
			
 
				         // CircuitOpenError must propagate so the caller can fall back
			
 
				         if (err instanceof CircuitOpenError) throw err;
			
 
				+        // Capture the underlying error so callers (e.g. the store dimension
			
 
				+        // probe) can surface it instead of "Failed to get embedding
			
 
				+        // dimensions from first chunk" with no context.
			
 
				+        this.lastError = this.formatErrorContext(err);
			
 
				         // Other errors mark the chunk as null and continue with next chunk.
			
 
				         // (The store layer already handles per-text nulls as errors.)
			
 
				         if (process.env.QMD_EMBED_DEBUG) {
			
@@ -483,6 +506,11 @@ export class OpenAIEmbeddingsProvider implements EmbeddingProvider {
 
				       }
			
 
				     }
			
 
				 
			
 
				+    // Clear lastError on a fully-successful sweep (every input got an embedding).
			
 
				+    if (anySucceeded && results.every((r) => r !== null)) {
			
 
				+      this.lastError = undefined;
			
 
				+    }
			
 
				+
			
 
				     return results;
			
 
				   }
			
 
				 
			
@@ -494,6 +522,25 @@ export class OpenAIEmbeddingsProvider implements EmbeddingProvider {
 
				 
			
 
				   // ────────────────────── Internals ──────────────────────
			
 
				 
			
 
				+  /**
			
 
				+   * Format a request-failure context string for `lastError`. Includes endpoint
			
 
				+   * + HTTP status + body preview when the error was an `HttpError`, otherwise
			
 
				+   * falls back to the message of the underlying error (or the value itself
			
 
				+   * when not an Error). Kept short — body preview is already capped at 1024
			
 
				+   * chars by `HttpError`, but we trim further here for the dimension-probe
			
 
				+   * thrown error which surfaces directly to users.
			
 
				+   */
			
 
				+  private formatErrorContext(err: unknown): string {
			
 
				+    if (err instanceof HttpError) {
			
 
				+      const preview = err.bodyPreview.replace(/\s+/g, " ").trim().slice(0, 240);
			
 
				+      return `endpoint=${this.endpoint}/v1/embeddings status=${err.status}${preview ? ` body="${preview}"` : ""}`;
			
 
				+    }
			
 
				+    if (err instanceof Error) {
			
 
				+      return `endpoint=${this.endpoint}/v1/embeddings error="${err.message}"`;
			
 
				+    }
			
 
				+    return `endpoint=${this.endpoint}/v1/embeddings error="${String(err)}"`;
			
 
				+  }
			
 
				+
			
 
				   private buildHeaders(): Record<string, string> {
			
 
				     const headers: Record<string, string> = {
			
 
				       "Content-Type": "application/json",
			
--- a/src/embedding/provider.ts
+++ b/src/embedding/provider.ts
@@ -96,6 +96,24 @@ export interface EmbeddingProvider {
 
				    */
			
 
				   embedBatch(texts: string[], options?: ProviderEmbedOptions): Promise<(ProviderEmbedding | null)[]>;
			
 
				 
			
 
				+  /**
			
 
				+   * Optional: most recent error message from a swallowed per-chunk failure.
			
 
				+   *
			
 
				+   * Per-chunk errors are intentionally swallowed (slot becomes `null`) so a
			
 
				+   * single bad text does not abort a 1000-doc embed run. Callers that need
			
 
				+   * to surface a meaningful error (e.g. the dimension-probe call site in
			
 
				+   * `store.ts` when even the first chunk fails) can read this field to
			
 
				+   * include the underlying cause (HTTP status, malformed JSON, timeout,
			
 
				+   * abort reason, …) in their own error message.
			
 
				+   *
			
 
				+   * Returns `undefined` when the most recent call succeeded or no call has
			
 
				+   * happened yet. Implementations MUST clear it on success.
			
 
				+   *
			
 
				+   * Optional so 3rd-party `EmbeddingProvider` implementations remain source-
			
 
				+   * compatible; callers must guard with `provider.getLastError?.()`.
			
 
				+   */
			
 
				+  getLastError?(): string | undefined;
			
 
				+
			
 
				   /** Release any held resources (HTTP keep-alive sockets, model handles, …) */
			
 
				   dispose(): Promise<void>;
			
 
				 }
			
--- a/src/store.ts
+++ b/src/store.ts
@@ -1625,9 +1625,36 @@ export async function generateEmbeddings(
 
				       if (!vectorTableInitialized) {
			
 
				         const firstChunk = batchChunks[0]!;
			
 
				         const firstText = formatDocForEmbedding(firstChunk.text, firstChunk.title, embedModelUri);
			
 
				-        const firstResult = await embedOne(firstText, providerModel);
			
 
				+        // Single retry on transient failure (issue i-vm1lxwry). The provider
			
 
				+        // swallows per-chunk errors per its contract — `getLastError?.()`
			
 
				+        // surfaces the actual cause (HTTP status / abort / parse error) so we
			
 
				+        // can include it in the thrown message instead of the cryptic
			
 
				+        // "Failed to get embedding dimensions from first chunk".
			
 
				+        let firstResult = await embedOne(firstText, providerModel);
			
 
				+        if (!firstResult && session.isValid) {
			
 
				+          const firstErr = provider?.getLastError?.();
			
 
				+          // Brief backoff before retry — embedding worker may be re-warming
			
 
				+          // a model or the GPU host may be transiently busy. 250ms is short
			
 
				+          // enough to be invisible on the happy path and long enough to
			
 
				+          // clear most "thundering-herd" race conditions.
			
 
				+          await new Promise((resolve) => setTimeout(resolve, 250));
			
 
				+          if (process.env.QMD_EMBED_DEBUG) {
			
 
				+            process.stderr.write(
			
 
				+              `qmd embed: first-chunk dimension probe failed, retrying once${firstErr ? ` (last error: ${firstErr})` : ""}\n`,
			
 
				+            );
			
 
				+          }
			
 
				+          firstResult = await embedOne(firstText, providerModel);
			
 
				+        }
			
 
				         if (!firstResult) {
			
 
				-          throw new Error("Failed to get embedding dimensions from first chunk");
			
 
				+          const lastErr = provider?.getLastError?.();
			
 
				+          const providerHint = provider ? `provider=${provider.kind}` : "provider=session";
			
 
				+          const errSuffix = lastErr ? ` — underlying: ${lastErr}` : "";
			
 
				+          const debugHint = process.env.QMD_EMBED_DEBUG
			
 
				+            ? ""
			
 
				+            : " (set QMD_EMBED_DEBUG=1 for per-chunk traces)";
			
 
				+          throw new Error(
			
 
				+            `Failed to get embedding dimensions from first chunk after retry [${providerHint}]${errSuffix}${debugHint}`,
			
 
				+          );
			
 
				         }
			
 
				         store.ensureVecTable(firstResult.embedding.length);
			
 
				         vectorTableInitialized = true;
			
--- a/test/embedding-autofallback.test.ts
+++ b/test/embedding-autofallback.test.ts
@@ -32,6 +32,8 @@ class FakeProvider implements EmbeddingProvider {
 
				   alwaysThrows: Error | null = null;
			
 
				   /** Health response */
			
 
				   healthResponse: ProviderHealth | null = null;
			
 
				+  /** Stub for getLastError() return value */
			
 
				+  lastErr: string | undefined = undefined;
			
 
				 
			
 
				   constructor(kind: ProviderKind, modelId: string, dim = 4) {
			
 
				     this.kind = kind;
			
@@ -45,6 +47,9 @@ class FakeProvider implements EmbeddingProvider {
 
				   getDimensions(): number | undefined {
			
 
				     return this.dim;
			
 
				   }
			
 
				+  getLastError(): string | undefined {
			
 
				+    return this.lastErr;
			
 
				+  }
			
 
				 
			
 
				   async healthcheck(): Promise<ProviderHealth> {
			
 
				     this.healthcheckCalls++;
			
@@ -343,6 +348,42 @@ describe("AutoFallbackEmbeddingProvider — healthcheck", () => {
 
				   });
			
 
				 });
			
 
				 
			
 
				+// ─────────────────────────── getLastError (i-vm1lxwry) ──────────────────────
			
 
				+
			
 
				+describe("AutoFallbackEmbeddingProvider — getLastError (i-vm1lxwry)", () => {
			
 
				+  test("returns undefined when both legs are clean", () => {
			
 
				+    const { af, primary, fallback } = buildAutoFallback();
			
 
				+    primary.lastErr = undefined;
			
 
				+    fallback.lastErr = undefined;
			
 
				+    expect(af.getLastError()).toBeUndefined();
			
 
				+  });
			
 
				+
			
 
				+  test("returns primary error when only primary has one", () => {
			
 
				+    const { af, primary, fallback } = buildAutoFallback();
			
 
				+    primary.lastErr = `endpoint=https://ai.mm.mk/v1/embeddings status=503 body="busy"`;
			
 
				+    fallback.lastErr = undefined;
			
 
				+    expect(af.getLastError()).toBe(primary.lastErr);
			
 
				+  });
			
 
				+
			
 
				+  test("returns fallback error when only fallback has one", () => {
			
 
				+    const { af, primary, fallback } = buildAutoFallback();
			
 
				+    primary.lastErr = undefined;
			
 
				+    fallback.lastErr = `provider=local error="model file not found"`;
			
 
				+    expect(af.getLastError()).toBe(fallback.lastErr);
			
 
				+  });
			
 
				+
			
 
				+  test("combines primary + fallback when both failed", () => {
			
 
				+    const { af, primary, fallback } = buildAutoFallback();
			
 
				+    primary.lastErr = `endpoint=https://ai.mm.mk/v1/embeddings status=503`;
			
 
				+    fallback.lastErr = `provider=local error="OOM"`;
			
 
				+    const combined = af.getLastError();
			
 
				+    expect(combined).toContain("primary:");
			
 
				+    expect(combined).toContain("fallback:");
			
 
				+    expect(combined).toContain("status=503");
			
 
				+    expect(combined).toContain("OOM");
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				 // ─────────────────────────── dispose ─────────────────────────────────────────
			
 
				 
			
 
				 describe("AutoFallbackEmbeddingProvider — dispose", () => {
			
--- a/test/embedding-openai.test.ts
+++ b/test/embedding-openai.test.ts
@@ -696,6 +696,86 @@ describe("HttpError", () => {
 
				   });
			
 
				 });
			
 
				 
			
 
				+// ─────────────────────────── lastError tracking (i-vm1lxwry) ────────────────
			
 
				+
			
 
				+describe("OpenAIEmbeddingsProvider — getLastError (i-vm1lxwry)", () => {
			
 
				+  test("returns undefined before first call", () => {
			
 
				+    const { fetchImpl } = makeFetchSequence([]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+    });
			
 
				+    expect(p.getLastError()).toBeUndefined();
			
 
				+  });
			
 
				+
			
 
				+  test("captures HTTP status + endpoint after non-retryable failure", async () => {
			
 
				+    const { fetchImpl } = makeFetchSequence([
			
 
				+      () => mockResponse(500, "internal error: GPU OOM"),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+      retryBackoffsMs: [],
			
 
				+      sleep: async () => {},
			
 
				+    });
			
 
				+    const r = await p.embed("hello");
			
 
				+    expect(r).toBeNull();
			
 
				+    const lastErr = p.getLastError();
			
 
				+    expect(lastErr).toBeDefined();
			
 
				+    expect(lastErr).toContain("https://ai.example.com/v1/embeddings");
			
 
				+    expect(lastErr).toContain("status=500");
			
 
				+    expect(lastErr).toContain("internal error: GPU OOM");
			
 
				+  });
			
 
				+
			
 
				+  test("captures malformed-JSON error message", async () => {
			
 
				+    const { fetchImpl } = makeFetchSequence([
			
 
				+      () => new Response("not json at all", { status: 200, headers: { "content-type": "application/json" } }),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+      retryBackoffsMs: [],
			
 
				+      sleep: async () => {},
			
 
				+    });
			
 
				+    const r = await p.embed("hello");
			
 
				+    expect(r).toBeNull();
			
 
				+    const lastErr = p.getLastError();
			
 
				+    expect(lastErr).toBeDefined();
			
 
				+    expect(lastErr).toContain("https://ai.example.com/v1/embeddings");
			
 
				+    expect(lastErr).toMatch(/error="/);
			
 
				+  });
			
 
				+
			
 
				+  test("clears lastError after a fully-successful sweep", async () => {
			
 
				+    const { fetchImpl } = makeFetchSequence([
			
 
				+      () => mockResponse(500, "fail"),
			
 
				+      () => embeddingsResponse(["recovered"], 4),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+      retryBackoffsMs: [],
			
 
				+      sleep: async () => {},
			
 
				+    });
			
 
				+    // First call fails — lastError set
			
 
				+    const r1 = await p.embed("first");
			
 
				+    expect(r1).toBeNull();
			
 
				+    expect(p.getLastError()).toBeDefined();
			
 
				+    // Second call succeeds — lastError cleared
			
 
				+    const r2 = await p.embed("recovered");
			
 
				+    expect(r2).not.toBeNull();
			
 
				+    expect(p.getLastError()).toBeUndefined();
			
 
				+  });
			
 
				+
			
 
				+  test("getEndpoint() exposes configured endpoint (no trailing slash)", () => {
			
 
				+    const { fetchImpl } = makeFetchSequence([]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com//",
			
 
				+      fetchImpl,
			
 
				+    });
			
 
				+    expect(p.getEndpoint()).toBe("https://ai.example.com");
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				 // ─────────────────────────── dispose ─────────────────────────────────────────
			
 
				 
			
 
				 describe("OpenAIEmbeddingsProvider — dispose", () => {
			
--- a/test/embedding-store-integration.test.ts
+++ b/test/embedding-store-integration.test.ts
@@ -298,3 +298,92 @@ describe("generateEmbeddings with EmbeddingProvider", () => {
 
				     expect(result.errors).toBe(0);
			
 
				   });
			
 
				 });
			
 
				+
			
 
				+// ─────── First-chunk dimension probe — retry + rich error (i-vm1lxwry) ───────
			
 
				+
			
 
				+/**
			
 
				+ * Provider that controls per-call success/failure for the first N calls,
			
 
				+ * exposing a `getLastError()` so the dimension-probe error path includes
			
 
				+ * the upstream cause. Used to exercise the issue i-vm1lxwry behavior.
			
 
				+ */
			
 
				+class FlakyProvider implements EmbeddingProvider {
			
 
				+  readonly kind = "openai" as const;
			
 
				+  readonly modelId: string;
			
 
				+  readonly dim: number;
			
 
				+  // Behavior plan: on call N, return plan[N] (true=success, false=fail, "throw"=throw)
			
 
				+  plan: Array<true | false | "throw">;
			
 
				+  callIdx = 0;
			
 
				+  private lastErr: string | undefined = undefined;
			
 
				+  errorMessage = `endpoint=https://ai.mm.mk/v1/embeddings status=500 body="probe failure"`;
			
 
				+
			
 
				+  constructor(modelId: string, dim: number, plan: Array<true | false | "throw">) {
			
 
				+    this.modelId = modelId;
			
 
				+    this.dim = dim;
			
 
				+    this.plan = plan;
			
 
				+  }
			
 
				+
			
 
				+  getModelId(): string { return this.modelId; }
			
 
				+  getDimensions(): number | undefined { return this.dim; }
			
 
				+  getLastError(): string | undefined { return this.lastErr; }
			
 
				+  async healthcheck(): Promise<ProviderHealth> {
			
 
				+    return { ok: true, model: this.modelId, dimensions: this.dim };
			
 
				+  }
			
 
				+  async embed(text: string): Promise<ProviderEmbedding | null> {
			
 
				+    return (await this.embedBatch([text]))[0] ?? null;
			
 
				+  }
			
 
				+  async embedBatch(texts: string[]): Promise<(ProviderEmbedding | null)[]> {
			
 
				+    const decision = this.plan[this.callIdx] ?? this.plan[this.plan.length - 1] ?? false;
			
 
				+    this.callIdx++;
			
 
				+    if (decision === "throw") {
			
 
				+      this.lastErr = this.errorMessage;
			
 
				+      throw new Error(this.errorMessage);
			
 
				+    }
			
 
				+    if (decision === false) {
			
 
				+      this.lastErr = this.errorMessage;
			
 
				+      return texts.map(() => null);
			
 
				+    }
			
 
				+    this.lastErr = undefined;
			
 
				+    return texts.map((t) => ({
			
 
				+      embedding: Array.from({ length: this.dim }, (_, i) => (t.length + i) * 0.01),
			
 
				+      model: this.modelId,
			
 
				+    }));
			
 
				+  }
			
 
				+  async dispose(): Promise<void> {}
			
 
				+}
			
 
				+
			
 
				+describe("first-chunk dimension probe — retry + rich error (i-vm1lxwry)", () => {
			
 
				+  test("retries once on null first-chunk and proceeds on success", async () => {
			
 
				+    // Plan: first call fails, second (retry) succeeds, all subsequent succeed
			
 
				+    const provider = new FlakyProvider("embeddinggemma", 4, [false, true]);
			
 
				+    const result = await generateEmbeddings(store, { embedProvider: provider });
			
 
				+    expect(result.errors).toBe(0);
			
 
				+    expect(result.docsProcessed).toBe(2);
			
 
				+    expect(result.chunksEmbedded).toBeGreaterThan(0);
			
 
				+    // We expect at least 2 calls: the failed first probe + the retry that succeeded.
			
 
				+    expect(provider.callIdx).toBeGreaterThanOrEqual(2);
			
 
				+  });
			
 
				+
			
 
				+  test("throws rich error including provider kind and underlying cause when both attempts fail", async () => {
			
 
				+    // Plan: every call returns null
			
 
				+    const provider = new FlakyProvider("embeddinggemma", 4, [false]);
			
 
				+    await expect(
			
 
				+      generateEmbeddings(store, { embedProvider: provider }),
			
 
				+    ).rejects.toThrow(/Failed to get embedding dimensions from first chunk after retry/);
			
 
				+    // Re-run to inspect the rejected error
			
 
				+    const provider2 = new FlakyProvider("embeddinggemma", 4, [false]);
			
 
				+    let caught: unknown = null;
			
 
				+    try {
			
 
				+      await generateEmbeddings(store, { embedProvider: provider2 });
			
 
				+    } catch (e) {
			
 
				+      caught = e;
			
 
				+    }
			
 
				+    expect(caught).toBeInstanceOf(Error);
			
 
				+    const msg = (caught as Error).message;
			
 
				+    expect(msg).toContain("provider=openai");
			
 
				+    expect(msg).toContain("ai.mm.mk");
			
 
				+    expect(msg).toContain("status=500");
			
 
				+    expect(msg).toContain("probe failure");
			
 
				+    // Both attempts (initial + retry) consumed → at least 2 calls.
			
 
				+    expect(provider2.callIdx).toBeGreaterThanOrEqual(2);
			
 
				+  });
			
 
				+});