2 місяців тому · 0463dd50dc
--- a/src/cli/qmd.ts
+++ b/src/cli/qmd.ts
@@ -99,6 +99,14 @@ import {
 
				   loadConfig,
			
 
				 } from "../collections.js";
			
 
				 import { getEmbeddedQmdSkillContent, getEmbeddedQmdSkillFiles } from "../embedded-skills.js";
			
 
				+import {
			
 
				+  createEmbeddingProvider,
			
 
				+  resolveProviderKind,
			
 
				+  type EmbeddingProvider,
			
 
				+  type ProviderKind,
			
 
				+  type CreateEmbeddingProviderOptions,
			
 
				+  ModelMismatchError,
			
 
				+} from "../embedding/index.js";
			
 
				 
			
 
				 // Enable production mode - allows using default database path
			
 
				 // Tests must set INDEX_PATH or use createStore() with explicit path
			
@@ -1658,10 +1666,72 @@ function parseChunkStrategy(value: unknown): ChunkStrategy | undefined {
 
				   throw new Error(`--chunk-strategy must be "auto", "regex", or "function" (got "${s}")`);
			
 
				 }
			
 
				 
			
 
				+function parseProviderKind(value: unknown): ProviderKind | undefined {
			
 
				+  if (value === undefined) return undefined;
			
 
				+  const s = String(value).toLowerCase();
			
 
				+  if (s === "local" || s === "openai") return s;
			
 
				+  throw new Error(`--provider must be "local" or "openai" (got "${s}")`);
			
 
				+}
			
 
				+
			
 
				+function parseOptionalPositiveInt(name: string, value: unknown): number | undefined {
			
 
				+  if (value === undefined) return undefined;
			
 
				+  const parsed = Number(value);
			
 
				+  if (!Number.isInteger(parsed) || parsed < 1) {
			
 
				+    throw new Error(`${name} must be a positive integer`);
			
 
				+  }
			
 
				+  return parsed;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Translate `cli.values` into `CreateEmbeddingProviderOptions`. CLI flags
			
 
				+ * win over env vars (the factory itself reads env when these are unset).
			
 
				+ */
			
 
				+function buildProviderOpts(
			
 
				+  values: Record<string, unknown>,
			
 
				+  providerCliKind: ProviderKind | undefined,
			
 
				+): CreateEmbeddingProviderOptions {
			
 
				+  const endpoint = optionalString(values["embed-endpoint"]);
			
 
				+  const apiKey = optionalString(values["embed-api-key"]);
			
 
				+  const modelId = optionalString(values["embed-model-id"]);
			
 
				+  const upstreamModel = optionalString(values["embed-upstream-model"]);
			
 
				+  const batchSize = parseOptionalPositiveInt("--embed-batch-size", values["embed-batch-size"]);
			
 
				+  const timeoutMs = parseOptionalPositiveInt("--embed-timeout-ms", values["embed-timeout-ms"]);
			
 
				+
			
 
				+  // Only build the openai overrides object if the user supplied flags
			
 
				+  const openai =
			
 
				+    endpoint || apiKey || modelId || upstreamModel || batchSize !== undefined || timeoutMs !== undefined
			
 
				+      ? {
			
 
				+          ...(endpoint !== undefined ? { endpoint } : {}),
			
 
				+          ...(apiKey !== undefined ? { apiKey } : {}),
			
 
				+          ...(modelId !== undefined ? { modelId } : {}),
			
 
				+          ...(upstreamModel !== undefined ? { upstreamModel } : {}),
			
 
				+          ...(batchSize !== undefined ? { batchSize } : {}),
			
 
				+          ...(timeoutMs !== undefined ? { timeoutMs } : {}),
			
 
				+        }
			
 
				+      : undefined;
			
 
				+
			
 
				+  return {
			
 
				+    ...(providerCliKind ? { kind: providerCliKind } : {}),
			
 
				+    ...(openai ? { openai } : {}),
			
 
				+  };
			
 
				+}
			
 
				+
			
 
				+function optionalString(v: unknown): string | undefined {
			
 
				+  if (v === undefined || v === null) return undefined;
			
 
				+  const s = String(v);
			
 
				+  return s === "" ? undefined : s;
			
 
				+}
			
 
				+
			
 
				 async function vectorIndex(
			
 
				   model: string = DEFAULT_EMBED_MODEL_URI,
			
 
				   force: boolean = false,
			
 
				-  batchOptions?: { maxDocsPerBatch?: number; maxBatchBytes?: number; chunkStrategy?: ChunkStrategy },
			
 
				+  batchOptions?: {
			
 
				+    maxDocsPerBatch?: number;
			
 
				+    maxBatchBytes?: number;
			
 
				+    chunkStrategy?: ChunkStrategy;
			
 
				+    embedProvider?: EmbeddingProvider;
			
 
				+    providerKind?: ProviderKind;
			
 
				+  },
			
 
				 ): Promise<void> {
			
 
				   const storeInstance = getStore();
			
 
				   const db = storeInstance.db;
			
@@ -1679,6 +1749,13 @@ async function vectorIndex(
 
				   }
			
 
				 
			
 
				   console.log(`${c.dim}Model: ${model}${c.reset}\n`);
			
 
				+  if (batchOptions?.embedProvider) {
			
 
				+    const kind = batchOptions.embedProvider.kind;
			
 
				+    const providerModel = batchOptions.embedProvider.getModelId();
			
 
				+    console.log(`${c.dim}Provider: ${kind} (model id "${providerModel}")${c.reset}\n`);
			
 
				+  } else if (batchOptions?.providerKind) {
			
 
				+    console.log(`${c.dim}Provider: ${batchOptions.providerKind}${c.reset}\n`);
			
 
				+  }
			
 
				   if (batchOptions?.maxDocsPerBatch !== undefined || batchOptions?.maxBatchBytes !== undefined) {
			
 
				     const maxDocsPerBatch = batchOptions.maxDocsPerBatch ?? DEFAULT_EMBED_MAX_DOCS_PER_BATCH;
			
 
				     const maxBatchBytes = batchOptions.maxBatchBytes ?? DEFAULT_EMBED_MAX_BATCH_BYTES;
			
@@ -1695,6 +1772,7 @@ async function vectorIndex(
 
				     maxDocsPerBatch: batchOptions?.maxDocsPerBatch,
			
 
				     maxBatchBytes: batchOptions?.maxBatchBytes,
			
 
				     chunkStrategy: batchOptions?.chunkStrategy,
			
 
				+    embedProvider: batchOptions?.embedProvider,
			
 
				     onProgress: (info) => {
			
 
				       if (info.totalBytes === 0) return;
			
 
				       const percent = (info.bytesProcessed / info.totalBytes) * 100;
			
@@ -2473,6 +2551,13 @@ function parseCLI() {
 
				       force: { type: "boolean", short: "f" },
			
 
				       "max-docs-per-batch": { type: "string" },
			
 
				       "max-batch-mb": { type: "string" },
			
 
				+      provider: { type: "string" },                  // "local" | "openai"
			
 
				+      "embed-endpoint": { type: "string" },          // OpenAI-compatible endpoint URL
			
 
				+      "embed-api-key": { type: "string" },           // Bearer token
			
 
				+      "embed-model-id": { type: "string" },          // Stable model id (default: embeddinggemma)
			
 
				+      "embed-upstream-model": { type: "string" },    // Upstream model name in HTTP body
			
 
				+      "embed-batch-size": { type: "string" },        // Batch size for HTTP provider
			
 
				+      "embed-timeout-ms": { type: "string" },        // Per-request timeout
			
 
				       // Update options
			
 
				       pull: { type: "boolean" },  // git pull before update
			
 
				       refresh: { type: "boolean" },
			
@@ -2694,6 +2779,13 @@ function showHelp(): void {
 
				   console.log("  qmd embed [-f]                - Generate/refresh vector embeddings");
			
 
				   console.log("    --max-docs-per-batch <n>    - Cap docs loaded into memory per embedding batch");
			
 
				   console.log("    --max-batch-mb <n>          - Cap UTF-8 MB loaded into memory per embedding batch");
			
 
				+  console.log("    --provider {local,openai}   - Embedding backend (default: local llama.cpp)");
			
 
				+  console.log("    --embed-endpoint <url>      - OpenAI-compatible endpoint (or QMD_EMBED_ENDPOINT)");
			
 
				+  console.log("    --embed-api-key <key>       - Bearer token (or QMD_EMBED_API_KEY)");
			
 
				+  console.log("    --embed-model-id <id>       - Stable model id stored in DB (default: embeddinggemma)");
			
 
				+  console.log("    --embed-upstream-model <m>  - Model name sent in HTTP body (default: same as model-id)");
			
 
				+  console.log("    --embed-batch-size <n>      - Batch size for HTTP provider (default: 64)");
			
 
				+  console.log("    --embed-timeout-ms <n>      - Per-request timeout in ms (default: 30000)");
			
 
				   console.log("  qmd cleanup                   - Clear caches, vacuum DB");
			
 
				   console.log("");
			
 
				   console.log("Query syntax (qmd query):");
			
@@ -3079,13 +3171,29 @@ if (isMain) {
 
				         const maxDocsPerBatch = parseEmbedBatchOption("maxDocsPerBatch", cli.values["max-docs-per-batch"]);
			
 
				         const maxBatchMb = parseEmbedBatchOption("maxBatchBytes", cli.values["max-batch-mb"]);
			
 
				         const embedChunkStrategy = parseChunkStrategy(cli.values["chunk-strategy"]);
			
 
				+
			
 
				+        // Build embedding provider from CLI flags + env + config file.
			
 
				+        // Backward compat: with no flags / env vars, the factory returns
			
 
				+        // a LocalLlamaCppProvider that delegates to the default LlamaCpp
			
 
				+        // singleton — identical to pre-patch behavior.
			
 
				+        const providerCliKind = parseProviderKind(cli.values["provider"]);
			
 
				+        const providerOpts = buildProviderOpts(cli.values, providerCliKind);
			
 
				+        const embedProvider = createEmbeddingProvider(providerOpts);
			
 
				+
			
 
				         await vectorIndex(DEFAULT_EMBED_MODEL_URI, !!cli.values.force, {
			
 
				           maxDocsPerBatch,
			
 
				           maxBatchBytes: maxBatchMb === undefined ? undefined : maxBatchMb * 1024 * 1024,
			
 
				           chunkStrategy: embedChunkStrategy,
			
 
				+          embedProvider,
			
 
				+          providerKind: embedProvider.kind,
			
 
				         });
			
 
				       } catch (error) {
			
 
				-        console.error(error instanceof Error ? error.message : String(error));
			
 
				+        if (error instanceof ModelMismatchError) {
			
 
				+          // Friendlier output for the migration-safety guard
			
 
				+          console.error(`${c.red}Model mismatch:${c.reset} ${error.message}`);
			
 
				+        } else {
			
 
				+          console.error(error instanceof Error ? error.message : String(error));
			
 
				+        }
			
 
				         process.exit(1);
			
 
				       }
			
 
				       break;
			
--- a/src/embedding/factory.ts
+++ b/src/embedding/factory.ts
@@ -0,0 +1,189 @@
 
				+/**
			
 
				+ * factory.ts - EmbeddingProvider factory with config precedence.
			
 
				+ *
			
 
				+ * Resolution order (first match wins):
			
 
				+ *   1. Explicit `kind` argument or `--provider` CLI flag → forces a kind
			
 
				+ *   2. `QMD_EMBED_ENDPOINT` env var present and non-empty → "openai"
			
 
				+ *   3. Config file (`~/.config/qmd/config.json`) `embedProvider.kind` → that kind
			
 
				+ *   4. Otherwise → "local" (legacy / backward-compat)
			
 
				+ *
			
 
				+ * Backward compat invariant: when neither `QMD_EMBED_ENDPOINT` nor
			
 
				+ * `~/.config/qmd/config.json` mentions a provider, callers get the same
			
 
				+ * `LocalLlamaCppProvider` they had before this change.
			
 
				+ */
			
 
				+
			
 
				+import { existsSync, readFileSync } from "node:fs";
			
 
				+import { homedir } from "node:os";
			
 
				+import { join } from "node:path";
			
 
				+
			
 
				+import { LocalLlamaCppProvider, type LocalLlamaCppProviderConfig } from "./local.js";
			
 
				+import {
			
 
				+  OpenAIEmbeddingsProvider,
			
 
				+  type OpenAIProviderConfig,
			
 
				+} from "./openai.js";
			
 
				+import type { EmbeddingProvider, ProviderKind } from "./provider.js";
			
 
				+
			
 
				+// ─────────────────────────── Config file ─────────────────────────────────────
			
 
				+
			
 
				+export type EmbedProviderConfigFile = {
			
 
				+  embedProvider?: {
			
 
				+    kind?: ProviderKind;
			
 
				+    endpoint?: string;
			
 
				+    apiKey?: string;
			
 
				+    modelId?: string;
			
 
				+    upstreamModel?: string;
			
 
				+    batchSize?: number;
			
 
				+    timeoutMs?: number;
			
 
				+  };
			
 
				+};
			
 
				+
			
 
				+export function defaultConfigPath(): string {
			
 
				+  const xdg = process.env.XDG_CONFIG_HOME;
			
 
				+  const base = xdg ? xdg : join(homedir(), ".config");
			
 
				+  return join(base, "qmd", "config.json");
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Load `~/.config/qmd/config.json` if present. Returns an empty object on
			
 
				+ * any read/parse error so we silently fall back to env/local.
			
 
				+ */
			
 
				+export function loadConfigFile(path: string = defaultConfigPath()): EmbedProviderConfigFile {
			
 
				+  if (!existsSync(path)) return {};
			
 
				+  try {
			
 
				+    const raw = readFileSync(path, "utf-8");
			
 
				+    const parsed = JSON.parse(raw);
			
 
				+    if (parsed && typeof parsed === "object") return parsed as EmbedProviderConfigFile;
			
 
				+  } catch {
			
 
				+    // Ignore — invalid JSON, missing read perm, etc.
			
 
				+  }
			
 
				+  return {};
			
 
				+}
			
 
				+
			
 
				+// ─────────────────────────── Factory options ────────────────────────────────
			
 
				+
			
 
				+export type CreateEmbeddingProviderOptions = {
			
 
				+  /** Force a specific provider kind. Overrides env + config. */
			
 
				+  kind?: ProviderKind;
			
 
				+  /** Override config file path (mostly for tests) */
			
 
				+  configPath?: string;
			
 
				+  /** Local-provider overrides */
			
 
				+  local?: LocalLlamaCppProviderConfig;
			
 
				+  /** OpenAI-provider overrides — merged on top of env/config */
			
 
				+  openai?: Partial<OpenAIProviderConfig>;
			
 
				+  /**
			
 
				+   * Custom env source (mostly for tests). Defaults to `process.env`.
			
 
				+   * Read keys: QMD_EMBED_PROVIDER, QMD_EMBED_ENDPOINT, QMD_EMBED_API_KEY,
			
 
				+   * QMD_EMBED_MODEL_ID, QMD_EMBED_UPSTREAM_MODEL, QMD_EMBED_BATCH_SIZE,
			
 
				+   * QMD_EMBED_TIMEOUT_MS.
			
 
				+   */
			
 
				+  env?: Record<string, string | undefined>;
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+ * Resolve the provider kind without instantiating anything. Useful for
			
 
				+ * logging and tests.
			
 
				+ */
			
 
				+export function resolveProviderKind(opts: CreateEmbeddingProviderOptions = {}): ProviderKind {
			
 
				+  const env = opts.env ?? process.env;
			
 
				+  const cfg = loadConfigFile(opts.configPath);
			
 
				+
			
 
				+  // 1. Explicit kind argument
			
 
				+  if (opts.kind) return opts.kind;
			
 
				+
			
 
				+  // 2a. Explicit env override
			
 
				+  const envKind = env.QMD_EMBED_PROVIDER?.trim().toLowerCase();
			
 
				+  if (envKind === "local" || envKind === "openai") return envKind;
			
 
				+
			
 
				+  // 2b. Endpoint env present → openai
			
 
				+  if (env.QMD_EMBED_ENDPOINT && env.QMD_EMBED_ENDPOINT.trim() !== "") {
			
 
				+    return "openai";
			
 
				+  }
			
 
				+
			
 
				+  // 3. Config file
			
 
				+  if (cfg.embedProvider?.kind === "local" || cfg.embedProvider?.kind === "openai") {
			
 
				+    return cfg.embedProvider.kind;
			
 
				+  }
			
 
				+  if (cfg.embedProvider?.endpoint && cfg.embedProvider.endpoint.trim() !== "") {
			
 
				+    return "openai";
			
 
				+  }
			
 
				+
			
 
				+  // 4. Default
			
 
				+  return "local";
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Factory entry point — returns the appropriate `EmbeddingProvider`.
			
 
				+ * Throws if `openai` kind is requested but no endpoint is configured.
			
 
				+ */
			
 
				+export function createEmbeddingProvider(
			
 
				+  opts: CreateEmbeddingProviderOptions = {},
			
 
				+): EmbeddingProvider {
			
 
				+  const env = opts.env ?? process.env;
			
 
				+  const cfg = loadConfigFile(opts.configPath);
			
 
				+  const kind = resolveProviderKind(opts);
			
 
				+
			
 
				+  if (kind === "local") {
			
 
				+    return new LocalLlamaCppProvider(opts.local ?? {});
			
 
				+  }
			
 
				+
			
 
				+  // OpenAI
			
 
				+  const endpoint =
			
 
				+    opts.openai?.endpoint ??
			
 
				+    env.QMD_EMBED_ENDPOINT ??
			
 
				+    cfg.embedProvider?.endpoint;
			
 
				+  if (!endpoint || endpoint.trim() === "") {
			
 
				+    throw new Error(
			
 
				+      'createEmbeddingProvider: kind="openai" requires an endpoint. ' +
			
 
				+      "Set QMD_EMBED_ENDPOINT env var, or `embedProvider.endpoint` in " +
			
 
				+      "~/.config/qmd/config.json, or pass `openai.endpoint`.",
			
 
				+    );
			
 
				+  }
			
 
				+
			
 
				+  const apiKey =
			
 
				+    opts.openai?.apiKey ??
			
 
				+    env.QMD_EMBED_API_KEY ??
			
 
				+    cfg.embedProvider?.apiKey;
			
 
				+
			
 
				+  const modelId =
			
 
				+    opts.openai?.modelId ??
			
 
				+    env.QMD_EMBED_MODEL_ID ??
			
 
				+    cfg.embedProvider?.modelId ??
			
 
				+    "embeddinggemma";
			
 
				+
			
 
				+  const upstreamModel =
			
 
				+    opts.openai?.upstreamModel ??
			
 
				+    env.QMD_EMBED_UPSTREAM_MODEL ??
			
 
				+    cfg.embedProvider?.upstreamModel;
			
 
				+
			
 
				+  const batchSizeRaw =
			
 
				+    opts.openai?.batchSize ??
			
 
				+    parsePositiveInt(env.QMD_EMBED_BATCH_SIZE) ??
			
 
				+    cfg.embedProvider?.batchSize;
			
 
				+
			
 
				+  const timeoutMsRaw =
			
 
				+    opts.openai?.timeoutMs ??
			
 
				+    parsePositiveInt(env.QMD_EMBED_TIMEOUT_MS) ??
			
 
				+    cfg.embedProvider?.timeoutMs;
			
 
				+
			
 
				+  return new OpenAIEmbeddingsProvider({
			
 
				+    endpoint,
			
 
				+    apiKey,
			
 
				+    modelId,
			
 
				+    upstreamModel,
			
 
				+    batchSize: batchSizeRaw,
			
 
				+    timeoutMs: timeoutMsRaw,
			
 
				+    fetchImpl: opts.openai?.fetchImpl,
			
 
				+    retryBackoffsMs: opts.openai?.retryBackoffsMs,
			
 
				+    sleep: opts.openai?.sleep,
			
 
				+    now: opts.openai?.now,
			
 
				+  });
			
 
				+}
			
 
				+
			
 
				+// ─────────────────────────── Helpers ────────────────────────────────────────
			
 
				+
			
 
				+function parsePositiveInt(v: string | undefined): number | undefined {
			
 
				+  if (!v) return undefined;
			
 
				+  const parsed = Number.parseInt(v, 10);
			
 
				+  if (!Number.isFinite(parsed) || parsed <= 0) return undefined;
			
 
				+  return parsed;
			
 
				+}
			
--- a/src/embedding/index.ts
+++ b/src/embedding/index.ts
@@ -0,0 +1,41 @@
 
				+/**
			
 
				+ * embedding/index.ts - re-exports for the embedding provider abstraction.
			
 
				+ */
			
 
				+
			
 
				+export {
			
 
				+  type EmbeddingProvider,
			
 
				+  type ProviderKind,
			
 
				+  type ProviderEmbedding,
			
 
				+  type ProviderEmbedOptions,
			
 
				+  type ProviderHealth,
			
 
				+  ModelMismatchError,
			
 
				+  assertModelCompatible,
			
 
				+} from "./provider.js";
			
 
				+
			
 
				+export {
			
 
				+  LocalLlamaCppProvider,
			
 
				+  type LocalLlamaCppProviderConfig,
			
 
				+} from "./local.js";
			
 
				+
			
 
				+export {
			
 
				+  OpenAIEmbeddingsProvider,
			
 
				+  CircuitBreaker,
			
 
				+  CircuitOpenError,
			
 
				+  HttpError,
			
 
				+  isRetryableStatus,
			
 
				+  chunkArray,
			
 
				+  type OpenAIProviderConfig,
			
 
				+  type CircuitState,
			
 
				+  DEFAULT_BATCH_SIZE,
			
 
				+  DEFAULT_TIMEOUT_MS,
			
 
				+  RETRY_BACKOFFS_MS,
			
 
				+} from "./openai.js";
			
 
				+
			
 
				+export {
			
 
				+  createEmbeddingProvider,
			
 
				+  resolveProviderKind,
			
 
				+  loadConfigFile,
			
 
				+  defaultConfigPath,
			
 
				+  type CreateEmbeddingProviderOptions,
			
 
				+  type EmbedProviderConfigFile,
			
 
				+} from "./factory.js";
			
--- a/src/embedding/local.ts
+++ b/src/embedding/local.ts
@@ -0,0 +1,123 @@
 
				+/**
			
 
				+ * local.ts - Local llama.cpp adapter implementing EmbeddingProvider.
			
 
				+ *
			
 
				+ * Wraps an existing `LlamaCpp` instance so the legacy GGUF path looks like
			
 
				+ * any other EmbeddingProvider to upstream callers. Used as the default and
			
 
				+ * as the fallback target when `OpenAIEmbeddingsProvider` trips its breaker.
			
 
				+ */
			
 
				+
			
 
				+import {
			
 
				+  type LlamaCpp,
			
 
				+  getDefaultLlamaCpp,
			
 
				+} from "../llm.js";
			
 
				+import type {
			
 
				+  EmbeddingProvider,
			
 
				+  ProviderEmbedOptions,
			
 
				+  ProviderEmbedding,
			
 
				+  ProviderHealth,
			
 
				+  ProviderKind,
			
 
				+} from "./provider.js";
			
 
				+
			
 
				+export type LocalLlamaCppProviderConfig = {
			
 
				+  /** Pre-built LlamaCpp instance (optional — falls back to global singleton). */
			
 
				+  llm?: LlamaCpp;
			
 
				+  /**
			
 
				+   * Stable model id reported via `getModelId()`. Defaults to "embeddinggemma"
			
 
				+   * to match the value in `content_vectors.model` for existing qmd installs.
			
 
				+   */
			
 
				+  modelId?: string;
			
 
				+};
			
 
				+
			
 
				+export class LocalLlamaCppProvider implements EmbeddingProvider {
			
 
				+  readonly kind: ProviderKind = "local";
			
 
				+
			
 
				+  private readonly llm: LlamaCpp;
			
 
				+  private readonly modelId: string;
			
 
				+  private dimensions: number | undefined = undefined;
			
 
				+
			
 
				+  constructor(config: LocalLlamaCppProviderConfig = {}) {
			
 
				+    this.llm = config.llm ?? getDefaultLlamaCpp();
			
 
				+    this.modelId = config.modelId ?? "embeddinggemma";
			
 
				+  }
			
 
				+
			
 
				+  getModelId(): string {
			
 
				+    return this.modelId;
			
 
				+  }
			
 
				+
			
 
				+  getDimensions(): number | undefined {
			
 
				+    return this.dimensions;
			
 
				+  }
			
 
				+
			
 
				+  async healthcheck(_signal?: AbortSignal): Promise<ProviderHealth> {
			
 
				+    // For the local provider, "healthy" means the embed model loads.
			
 
				+    // We probe with a single embed call.
			
 
				+    try {
			
 
				+      const result = await this.llm.embed("healthcheck", { model: this.modelId });
			
 
				+      if (!result) {
			
 
				+        return {
			
 
				+          ok: false,
			
 
				+          model: this.modelId,
			
 
				+          detail: "embed probe returned null",
			
 
				+        };
			
 
				+      }
			
 
				+      this.dimensions = result.embedding.length;
			
 
				+      return {
			
 
				+        ok: true,
			
 
				+        model: this.modelId,
			
 
				+        dimensions: this.dimensions,
			
 
				+        detail: `local llama.cpp ready, ${this.dimensions}-d`,
			
 
				+      };
			
 
				+    } catch (err) {
			
 
				+      return {
			
 
				+        ok: false,
			
 
				+        model: this.modelId,
			
 
				+        detail: err instanceof Error ? err.message : String(err),
			
 
				+      };
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  async embed(
			
 
				+    text: string,
			
 
				+    options: ProviderEmbedOptions = {},
			
 
				+  ): Promise<ProviderEmbedding | null> {
			
 
				+    if (options.signal?.aborted) return null;
			
 
				+    const result = await this.llm.embed(text, { model: options.model ?? this.modelId });
			
 
				+    if (!result) return null;
			
 
				+    if (this.dimensions === undefined) {
			
 
				+      this.dimensions = result.embedding.length;
			
 
				+    }
			
 
				+    return {
			
 
				+      embedding: result.embedding,
			
 
				+      model: this.modelId,
			
 
				+    };
			
 
				+  }
			
 
				+
			
 
				+  async embedBatch(
			
 
				+    texts: string[],
			
 
				+    options: ProviderEmbedOptions = {},
			
 
				+  ): Promise<(ProviderEmbedding | null)[]> {
			
 
				+    if (texts.length === 0) return [];
			
 
				+    if (options.signal?.aborted) return texts.map(() => null);
			
 
				+
			
 
				+    const raw = await this.llm.embedBatch(texts, {
			
 
				+      model: options.model ?? this.modelId,
			
 
				+    });
			
 
				+
			
 
				+    return raw.map((r) => {
			
 
				+      if (!r) return null;
			
 
				+      if (this.dimensions === undefined && r.embedding.length > 0) {
			
 
				+        this.dimensions = r.embedding.length;
			
 
				+      }
			
 
				+      return {
			
 
				+        embedding: r.embedding,
			
 
				+        model: this.modelId,
			
 
				+      };
			
 
				+    });
			
 
				+  }
			
 
				+
			
 
				+  async dispose(): Promise<void> {
			
 
				+    // We do NOT dispose the underlying LlamaCpp here because the singleton
			
 
				+    // is shared with rerank/generate/expansion paths. Disposal is handled
			
 
				+    // by the existing `disposeDefaultLlamaCpp()` global hook.
			
 
				+  }
			
 
				+}
			
--- a/src/embedding/openai.ts
+++ b/src/embedding/openai.ts
@@ -0,0 +1,616 @@
 
				+/**
			
 
				+ * openai.ts - OpenAI-compatible HTTP embedding provider
			
 
				+ *
			
 
				+ * Talks to any endpoint that implements `POST /v1/embeddings` with the OpenAI
			
 
				+ * shape: request `{model, input: string|string[]}`, response
			
 
				+ * `{data: [{embedding: number[], index: number}, ...]}`.
			
 
				+ *
			
 
				+ * Used by qmd to delegate embeddings to a GPU worker (e.g. ai.mm.mk →
			
 
				+ * qmd-embed-worker on `models` LXC, RTX 4090) instead of running
			
 
				+ * node-llama-cpp locally.
			
 
				+ *
			
 
				+ * Features:
			
 
				+ *   - Batches input in groups of ≤64 (configurable via QMD_EMBED_BATCH_SIZE)
			
 
				+ *   - Retries 429 / 503 with exponential backoff (1s, 4s, 16s)
			
 
				+ *   - 4xx (non-429) → no retry, count as failure
			
 
				+ *   - Circuit breaker: >50% failures in a 60s window → OPEN for 5 min,
			
 
				+ *     callers can use this to fall back to a local provider
			
 
				+ *   - Per-call timeout via AbortSignal (default QMD_EMBED_TIMEOUT_MS=30000)
			
 
				+ *   - Healthcheck via `GET /health` if available, else a probe embed call
			
 
				+ */
			
 
				+
			
 
				+import type {
			
 
				+  EmbeddingProvider,
			
 
				+  ProviderEmbedOptions,
			
 
				+  ProviderEmbedding,
			
 
				+  ProviderHealth,
			
 
				+  ProviderKind,
			
 
				+} from "./provider.js";
			
 
				+
			
 
				+// ─────────────────────────── Configuration ───────────────────────────────────
			
 
				+
			
 
				+/**
			
 
				+ * Default batch size — most OpenAI-compatible embedding endpoints accept up to
			
 
				+ * 2048 inputs per call but for memory and latency we cap at 64.
			
 
				+ */
			
 
				+export const DEFAULT_BATCH_SIZE = 64;
			
 
				+
			
 
				+/**
			
 
				+ * Default per-request timeout (30 s). embeddinggemma-300M on RTX 4090 takes
			
 
				+ * <500ms per batch of 64 in practice; 30s is a safe upper bound.
			
 
				+ */
			
 
				+export const DEFAULT_TIMEOUT_MS = 30_000;
			
 
				+
			
 
				+/**
			
 
				+ * Retry backoff schedule (ms) for 429/503 responses. 3 attempts total
			
 
				+ * (initial + 2 retries) — aligns with issue spec "1s/4s/16s".
			
 
				+ */
			
 
				+export const RETRY_BACKOFFS_MS: readonly number[] = [1_000, 4_000, 16_000];
			
 
				+
			
 
				+/**
			
 
				+ * Circuit breaker — flips OPEN when error rate exceeds threshold within
			
 
				+ * window. While OPEN, every call fails fast so the caller can fall back.
			
 
				+ */
			
 
				+export const CIRCUIT_WINDOW_MS = 60_000;
			
 
				+export const CIRCUIT_OPEN_DURATION_MS = 5 * 60_000;
			
 
				+export const CIRCUIT_FAILURE_RATE_THRESHOLD = 0.5;
			
 
				+export const CIRCUIT_MIN_SAMPLES = 4;
			
 
				+
			
 
				+// ─────────────────────────── Types ───────────────────────────────────────────
			
 
				+
			
 
				+export type OpenAIProviderConfig = {
			
 
				+  /** Endpoint base URL — e.g. "https://ai.mm.mk" (no trailing slash) */
			
 
				+  endpoint: string;
			
 
				+  /** Optional bearer token sent as `Authorization: Bearer ...` */
			
 
				+  apiKey?: string;
			
 
				+  /**
			
 
				+   * Stable model identifier to report up via `getModelId()`.
			
 
				+   * Defaults to "embeddinggemma" to match qmd's existing DB rows.
			
 
				+   */
			
 
				+  modelId?: string;
			
 
				+  /**
			
 
				+   * Upstream model name sent in the HTTP request body. Often differs from
			
 
				+   * `modelId` (e.g. modelId="embeddinggemma" but upstream model="embeddinggemma:300m").
			
 
				+   */
			
 
				+  upstreamModel?: string;
			
 
				+  /** Batch size cap (default DEFAULT_BATCH_SIZE = 64) */
			
 
				+  batchSize?: number;
			
 
				+  /** Per-request timeout in ms (default DEFAULT_TIMEOUT_MS = 30_000) */
			
 
				+  timeoutMs?: number;
			
 
				+  /** Custom fetch (for testing). Defaults to global `fetch`. */
			
 
				+  fetchImpl?: typeof fetch;
			
 
				+  /** Custom retry schedule (for testing). Defaults to RETRY_BACKOFFS_MS. */
			
 
				+  retryBackoffsMs?: readonly number[];
			
 
				+  /** Custom sleep impl (for testing). Defaults to setTimeout. */
			
 
				+  sleep?: (ms: number) => Promise<void>;
			
 
				+  /** Custom clock (for testing). Defaults to Date.now. */
			
 
				+  now?: () => number;
			
 
				+};
			
 
				+
			
 
				+export type OpenAIEmbeddingsResponse = {
			
 
				+  object?: string;
			
 
				+  model?: string;
			
 
				+  data: Array<{
			
 
				+    object?: string;
			
 
				+    index: number;
			
 
				+    embedding: number[];
			
 
				+  }>;
			
 
				+  usage?: {
			
 
				+    prompt_tokens?: number;
			
 
				+    total_tokens?: number;
			
 
				+  };
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+ * Circuit breaker state — exported for tests
			
 
				+ */
			
 
				+export type CircuitState = "closed" | "open" | "half-open";
			
 
				+
			
 
				+// ─────────────────────────── Helpers ─────────────────────────────────────────
			
 
				+
			
 
				+function defaultSleep(ms: number): Promise<void> {
			
 
				+  return new Promise((resolve) => setTimeout(resolve, ms));
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Build the merged AbortSignal for a single HTTP attempt: combines an
			
 
				+ * external `userSignal` (from caller / withLLMSession) with a per-attempt
			
 
				+ * timeout signal. Returns the merged signal AND the timeout id so the
			
 
				+ * caller can `clearTimeout` after the attempt completes (avoids leaks).
			
 
				+ */
			
 
				+function buildAttemptSignal(
			
 
				+  userSignal: AbortSignal | undefined,
			
 
				+  timeoutMs: number,
			
 
				+): { signal: AbortSignal; cleanup: () => void } {
			
 
				+  const ctrl = new AbortController();
			
 
				+  const timeoutId = setTimeout(() => {
			
 
				+    ctrl.abort(new Error(`Request timed out after ${timeoutMs}ms`));
			
 
				+  }, timeoutMs);
			
 
				+  // Don't keep process alive just for this timer
			
 
				+  if (typeof timeoutId === "object" && timeoutId !== null && "unref" in timeoutId) {
			
 
				+    (timeoutId as { unref: () => void }).unref();
			
 
				+  }
			
 
				+
			
 
				+  const onUserAbort = () => ctrl.abort(userSignal?.reason);
			
 
				+  if (userSignal) {
			
 
				+    if (userSignal.aborted) {
			
 
				+      ctrl.abort(userSignal.reason);
			
 
				+    } else {
			
 
				+      userSignal.addEventListener("abort", onUserAbort, { once: true });
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  const cleanup = () => {
			
 
				+    clearTimeout(timeoutId);
			
 
				+    if (userSignal) userSignal.removeEventListener("abort", onUserAbort);
			
 
				+  };
			
 
				+
			
 
				+  return { signal: ctrl.signal, cleanup };
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Determine whether an HTTP status is retryable. 429 (Too Many Requests)
			
 
				+ * and 503 (Service Unavailable) are retried; 4xx (other than 429) are not.
			
 
				+ */
			
 
				+export function isRetryableStatus(status: number): boolean {
			
 
				+  return status === 429 || status === 503;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Chunk an array into pieces of ≤ size each. `size` MUST be ≥ 1.
			
 
				+ */
			
 
				+export function chunkArray<T>(items: T[], size: number): T[][] {
			
 
				+  if (size < 1) throw new Error(`chunkArray: size must be ≥ 1, got ${size}`);
			
 
				+  if (items.length <= size) return items.length === 0 ? [] : [items];
			
 
				+  const out: T[][] = [];
			
 
				+  for (let i = 0; i < items.length; i += size) {
			
 
				+    out.push(items.slice(i, i + size));
			
 
				+  }
			
 
				+  return out;
			
 
				+}
			
 
				+
			
 
				+// ─────────────────────────── Circuit Breaker ─────────────────────────────────
			
 
				+
			
 
				+/**
			
 
				+ * Sliding-window circuit breaker. Tracks the last N samples (min 4) over a
			
 
				+ * 60-second window; flips OPEN when failure rate exceeds 50%, then auto-
			
 
				+ * resets to HALF-OPEN after 5 minutes — at which point the next probe
			
 
				+ * decides whether to close (success) or re-open (failure).
			
 
				+ */
			
 
				+export class CircuitBreaker {
			
 
				+  private samples: { ts: number; ok: boolean }[] = [];
			
 
				+  private state: CircuitState = "closed";
			
 
				+  private openedAt: number | null = null;
			
 
				+  private readonly windowMs: number;
			
 
				+  private readonly openDurationMs: number;
			
 
				+  private readonly threshold: number;
			
 
				+  private readonly minSamples: number;
			
 
				+  private readonly now: () => number;
			
 
				+
			
 
				+  constructor(opts: {
			
 
				+    windowMs?: number;
			
 
				+    openDurationMs?: number;
			
 
				+    threshold?: number;
			
 
				+    minSamples?: number;
			
 
				+    now?: () => number;
			
 
				+  } = {}) {
			
 
				+    this.windowMs = opts.windowMs ?? CIRCUIT_WINDOW_MS;
			
 
				+    this.openDurationMs = opts.openDurationMs ?? CIRCUIT_OPEN_DURATION_MS;
			
 
				+    this.threshold = opts.threshold ?? CIRCUIT_FAILURE_RATE_THRESHOLD;
			
 
				+    this.minSamples = opts.minSamples ?? CIRCUIT_MIN_SAMPLES;
			
 
				+    this.now = opts.now ?? Date.now;
			
 
				+  }
			
 
				+
			
 
				+  getState(): CircuitState {
			
 
				+    this.tickAutoReset();
			
 
				+    return this.state;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Returns true when calls should be short-circuited (skip HTTP, fall back).
			
 
				+   * Side-effects: may transition OPEN → HALF-OPEN if the open window expired.
			
 
				+   */
			
 
				+  shouldFailFast(): boolean {
			
 
				+    return this.getState() === "open";
			
 
				+  }
			
 
				+
			
 
				+  /** Record a successful call. */
			
 
				+  recordSuccess(): void {
			
 
				+    // Honor the time-based OPEN→HALF-OPEN transition before deciding what
			
 
				+    // to do with this sample. Without this, a success that lands AFTER the
			
 
				+    // open window expired would still see state==="open" and never close
			
 
				+    // the breaker (a probe call could only flip it via getState()).
			
 
				+    this.tickAutoReset();
			
 
				+    this.pushSample(true);
			
 
				+    if (this.state === "half-open") {
			
 
				+      this.state = "closed";
			
 
				+      this.openedAt = null;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /** Record a failed call. May trigger OPEN. */
			
 
				+  recordFailure(): void {
			
 
				+    // Same reasoning as recordSuccess — apply lazy auto-reset before
			
 
				+    // classifying the sample.
			
 
				+    this.tickAutoReset();
			
 
				+    this.pushSample(false);
			
 
				+    if (this.state === "half-open") {
			
 
				+      // Probe failed — re-open
			
 
				+      this.state = "open";
			
 
				+      this.openedAt = this.now();
			
 
				+      return;
			
 
				+    }
			
 
				+    if (this.state === "closed") this.evaluate();
			
 
				+  }
			
 
				+
			
 
				+  /** Force-reset the breaker (used by tests / admin) */
			
 
				+  reset(): void {
			
 
				+    this.samples = [];
			
 
				+    this.state = "closed";
			
 
				+    this.openedAt = null;
			
 
				+  }
			
 
				+
			
 
				+  private pushSample(ok: boolean): void {
			
 
				+    const ts = this.now();
			
 
				+    this.samples.push({ ts, ok });
			
 
				+    // Drop samples outside the window
			
 
				+    const cutoff = ts - this.windowMs;
			
 
				+    while (this.samples.length > 0 && this.samples[0]!.ts < cutoff) {
			
 
				+      this.samples.shift();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private evaluate(): void {
			
 
				+    if (this.samples.length < this.minSamples) return;
			
 
				+    const failures = this.samples.filter((s) => !s.ok).length;
			
 
				+    const rate = failures / this.samples.length;
			
 
				+    if (rate > this.threshold) {
			
 
				+      this.state = "open";
			
 
				+      this.openedAt = this.now();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private tickAutoReset(): void {
			
 
				+    if (this.state === "open" && this.openedAt !== null) {
			
 
				+      if (this.now() - this.openedAt >= this.openDurationMs) {
			
 
				+        this.state = "half-open";
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// ─────────────────────────── Errors ──────────────────────────────────────────
			
 
				+
			
 
				+/**
			
 
				+ * Raised when the circuit breaker is OPEN and a call is short-circuited.
			
 
				+ * Callers (e.g. fallback wrapper) can catch this to switch to local provider.
			
 
				+ */
			
 
				+export class CircuitOpenError extends Error {
			
 
				+  constructor(message = "OpenAIEmbeddingsProvider circuit is OPEN") {
			
 
				+    super(message);
			
 
				+    this.name = "CircuitOpenError";
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Persistent (non-retryable) HTTP error from upstream. Includes status code.
			
 
				+ */
			
 
				+export class HttpError extends Error {
			
 
				+  readonly status: number;
			
 
				+  readonly bodyPreview: string;
			
 
				+  constructor(status: number, bodyPreview: string) {
			
 
				+    super(`HTTP ${status}: ${bodyPreview.slice(0, 200)}`);
			
 
				+    this.name = "HttpError";
			
 
				+    this.status = status;
			
 
				+    this.bodyPreview = bodyPreview.slice(0, 1024);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// ─────────────────────────── Provider ────────────────────────────────────────
			
 
				+
			
 
				+export class OpenAIEmbeddingsProvider implements EmbeddingProvider {
			
 
				+  readonly kind: ProviderKind = "openai";
			
 
				+
			
 
				+  private readonly endpoint: string;
			
 
				+  private readonly apiKey?: string;
			
 
				+  private readonly modelId: string;
			
 
				+  private readonly upstreamModel: string;
			
 
				+  private readonly batchSize: number;
			
 
				+  private readonly timeoutMs: number;
			
 
				+  private readonly fetchImpl: typeof fetch;
			
 
				+  private readonly retryBackoffsMs: readonly number[];
			
 
				+  private readonly sleep: (ms: number) => Promise<void>;
			
 
				+  private readonly now: () => number;
			
 
				+
			
 
				+  private dimensions: number | undefined = undefined;
			
 
				+  readonly breaker: CircuitBreaker;
			
 
				+
			
 
				+  constructor(config: OpenAIProviderConfig) {
			
 
				+    if (!config.endpoint) {
			
 
				+      throw new Error("OpenAIEmbeddingsProvider: endpoint is required");
			
 
				+    }
			
 
				+    this.endpoint = config.endpoint.replace(/\/+$/, "");
			
 
				+    this.apiKey = config.apiKey;
			
 
				+    this.modelId = config.modelId ?? "embeddinggemma";
			
 
				+    this.upstreamModel = config.upstreamModel ?? this.modelId;
			
 
				+    this.batchSize = config.batchSize ?? DEFAULT_BATCH_SIZE;
			
 
				+    this.timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS;
			
 
				+    this.fetchImpl = config.fetchImpl ?? globalThis.fetch;
			
 
				+    this.retryBackoffsMs = config.retryBackoffsMs ?? RETRY_BACKOFFS_MS;
			
 
				+    this.sleep = config.sleep ?? defaultSleep;
			
 
				+    this.now = config.now ?? Date.now;
			
 
				+    this.breaker = new CircuitBreaker({ now: this.now });
			
 
				+
			
 
				+    if (!this.fetchImpl) {
			
 
				+      throw new Error(
			
 
				+        "OpenAIEmbeddingsProvider: global fetch is unavailable. " +
			
 
				+        "Provide a `fetchImpl` config option (Node ≥18 ships fetch by default).",
			
 
				+      );
			
 
				+    }
			
 
				+    if (this.batchSize < 1) {
			
 
				+      throw new Error(`OpenAIEmbeddingsProvider: batchSize must be ≥ 1, got ${this.batchSize}`);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  getModelId(): string {
			
 
				+    return this.modelId;
			
 
				+  }
			
 
				+
			
 
				+  getDimensions(): number | undefined {
			
 
				+    return this.dimensions;
			
 
				+  }
			
 
				+
			
 
				+  async healthcheck(signal?: AbortSignal): Promise<ProviderHealth> {
			
 
				+    // Try GET /health first (worker exposes it). Fall back to probe embed.
			
 
				+    try {
			
 
				+      const { signal: attemptSig, cleanup } = buildAttemptSignal(signal, this.timeoutMs);
			
 
				+      try {
			
 
				+        const resp = await this.fetchImpl(`${this.endpoint}/health`, {
			
 
				+          method: "GET",
			
 
				+          headers: this.buildHeaders(),
			
 
				+          signal: attemptSig,
			
 
				+        });
			
 
				+        if (resp.ok) {
			
 
				+          return {
			
 
				+            ok: true,
			
 
				+            model: this.modelId,
			
 
				+            dimensions: this.dimensions,
			
 
				+            detail: `GET /health → ${resp.status}`,
			
 
				+          };
			
 
				+        }
			
 
				+        return {
			
 
				+          ok: false,
			
 
				+          model: this.modelId,
			
 
				+          detail: `GET /health → HTTP ${resp.status}`,
			
 
				+        };
			
 
				+      } finally {
			
 
				+        cleanup();
			
 
				+      }
			
 
				+    } catch (err) {
			
 
				+      // Endpoint may not implement /health — try a single embed probe instead.
			
 
				+      try {
			
 
				+        const probe = await this.embed("healthcheck", { signal });
			
 
				+        if (probe) {
			
 
				+          return {
			
 
				+            ok: true,
			
 
				+            model: this.modelId,
			
 
				+            dimensions: probe.embedding.length,
			
 
				+            detail: "embed probe ok",
			
 
				+          };
			
 
				+        }
			
 
				+        return {
			
 
				+          ok: false,
			
 
				+          model: this.modelId,
			
 
				+          detail: "embed probe returned null",
			
 
				+        };
			
 
				+      } catch (probeErr) {
			
 
				+        return {
			
 
				+          ok: false,
			
 
				+          model: this.modelId,
			
 
				+          detail:
			
 
				+            (err instanceof Error ? err.message : String(err)) +
			
 
				+            " | probe: " +
			
 
				+            (probeErr instanceof Error ? probeErr.message : String(probeErr)),
			
 
				+        };
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  async embed(
			
 
				+    text: string,
			
 
				+    options: ProviderEmbedOptions = {},
			
 
				+  ): Promise<ProviderEmbedding | null> {
			
 
				+    const batch = await this.embedBatch([text], options);
			
 
				+    return batch[0] ?? null;
			
 
				+  }
			
 
				+
			
 
				+  async embedBatch(
			
 
				+    texts: string[],
			
 
				+    options: ProviderEmbedOptions = {},
			
 
				+  ): Promise<(ProviderEmbedding | null)[]> {
			
 
				+    if (texts.length === 0) return [];
			
 
				+
			
 
				+    if (this.breaker.shouldFailFast()) {
			
 
				+      throw new CircuitOpenError();
			
 
				+    }
			
 
				+
			
 
				+    const chunks = chunkArray(texts, this.batchSize);
			
 
				+    const results: (ProviderEmbedding | null)[] = new Array(texts.length).fill(null);
			
 
				+    let cursor = 0;
			
 
				+
			
 
				+    for (const chunk of chunks) {
			
 
				+      const start = cursor;
			
 
				+      cursor += chunk.length;
			
 
				+
			
 
				+      // Abort early if signal already fired
			
 
				+      if (options.signal?.aborted) {
			
 
				+        // Leave remaining slots as null (caller treats as errors)
			
 
				+        return results;
			
 
				+      }
			
 
				+
			
 
				+      // Fail-fast if breaker tripped mid-loop
			
 
				+      if (this.breaker.shouldFailFast()) {
			
 
				+        throw new CircuitOpenError();
			
 
				+      }
			
 
				+
			
 
				+      try {
			
 
				+        const embeddings = await this.requestWithRetry(chunk, options);
			
 
				+        for (let i = 0; i < chunk.length; i++) {
			
 
				+          const embedding = embeddings[i];
			
 
				+          if (embedding) {
			
 
				+            results[start + i] = {
			
 
				+              embedding,
			
 
				+              model: this.modelId,
			
 
				+            };
			
 
				+            // Record dimensions on first success
			
 
				+            if (this.dimensions === undefined) {
			
 
				+              this.dimensions = embedding.length;
			
 
				+            }
			
 
				+          }
			
 
				+        }
			
 
				+        this.breaker.recordSuccess();
			
 
				+      } catch (err) {
			
 
				+        this.breaker.recordFailure();
			
 
				+        // CircuitOpenError must propagate so the caller can fall back
			
 
				+        if (err instanceof CircuitOpenError) throw err;
			
 
				+        // Other errors mark the chunk as null and continue with next chunk.
			
 
				+        // (The store layer already handles per-text nulls as errors.)
			
 
				+        if (process.env.QMD_EMBED_DEBUG) {
			
 
				+          process.stderr.write(
			
 
				+            `OpenAIEmbeddingsProvider: chunk failed (${err instanceof Error ? err.message : String(err)})\n`,
			
 
				+          );
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    return results;
			
 
				+  }
			
 
				+
			
 
				+  async dispose(): Promise<void> {
			
 
				+    // Nothing to release — fetch handles its own connection pooling.
			
 
				+    // Reset the breaker so a re-instantiation starts fresh.
			
 
				+    this.breaker.reset();
			
 
				+  }
			
 
				+
			
 
				+  // ────────────────────── Internals ──────────────────────
			
 
				+
			
 
				+  private buildHeaders(): Record<string, string> {
			
 
				+    const headers: Record<string, string> = {
			
 
				+      "Content-Type": "application/json",
			
 
				+      "Accept": "application/json",
			
 
				+    };
			
 
				+    if (this.apiKey) {
			
 
				+      headers["Authorization"] = `Bearer ${this.apiKey}`;
			
 
				+    }
			
 
				+    return headers;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Single HTTP request with retry on 429/503. Returns embeddings indexed
			
 
				+   * the same as `texts`. Throws on non-retryable failure or all attempts
			
 
				+   * exhausted.
			
 
				+   */
			
 
				+  private async requestWithRetry(
			
 
				+    texts: string[],
			
 
				+    options: ProviderEmbedOptions,
			
 
				+  ): Promise<number[][]> {
			
 
				+    let lastErr: unknown = null;
			
 
				+    const maxAttempts = this.retryBackoffsMs.length + 1;
			
 
				+
			
 
				+    for (let attempt = 0; attempt < maxAttempts; attempt++) {
			
 
				+      // Honor user abort BEFORE issuing the call (avoids wasted network)
			
 
				+      if (options.signal?.aborted) {
			
 
				+        throw new Error("aborted by caller");
			
 
				+      }
			
 
				+
			
 
				+      try {
			
 
				+        return await this.requestOnce(texts, options);
			
 
				+      } catch (err) {
			
 
				+        lastErr = err;
			
 
				+        const retryable =
			
 
				+          err instanceof HttpError ? isRetryableStatus(err.status) : false;
			
 
				+        if (!retryable) throw err;
			
 
				+        if (attempt < this.retryBackoffsMs.length) {
			
 
				+          await this.sleep(this.retryBackoffsMs[attempt]!);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    // Exhausted retries → throw the last error so caller marks the chunk null
			
 
				+    throw lastErr ?? new Error("requestWithRetry exhausted");
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Issue one HTTP attempt to `POST /v1/embeddings`. Does NOT retry.
			
 
				+   */
			
 
				+  private async requestOnce(
			
 
				+    texts: string[],
			
 
				+    options: ProviderEmbedOptions,
			
 
				+  ): Promise<number[][]> {
			
 
				+    const { signal: attemptSig, cleanup } = buildAttemptSignal(options.signal, this.timeoutMs);
			
 
				+    try {
			
 
				+      const body = JSON.stringify({
			
 
				+        model: options.model ?? this.upstreamModel,
			
 
				+        input: texts,
			
 
				+      });
			
 
				+      const resp = await this.fetchImpl(`${this.endpoint}/v1/embeddings`, {
			
 
				+        method: "POST",
			
 
				+        headers: this.buildHeaders(),
			
 
				+        body,
			
 
				+        signal: attemptSig,
			
 
				+      });
			
 
				+
			
 
				+      if (!resp.ok) {
			
 
				+        const text = await resp.text().catch(() => "");
			
 
				+        throw new HttpError(resp.status, text);
			
 
				+      }
			
 
				+
			
 
				+      let parsed: OpenAIEmbeddingsResponse;
			
 
				+      try {
			
 
				+        parsed = (await resp.json()) as OpenAIEmbeddingsResponse;
			
 
				+      } catch (err) {
			
 
				+        throw new Error(
			
 
				+          `OpenAIEmbeddingsProvider: malformed JSON from ${this.endpoint}/v1/embeddings: ${err instanceof Error ? err.message : String(err)}`,
			
 
				+        );
			
 
				+      }
			
 
				+
			
 
				+      if (!parsed || !Array.isArray(parsed.data)) {
			
 
				+        throw new Error(
			
 
				+          `OpenAIEmbeddingsProvider: response missing "data" array (got ${typeof parsed})`,
			
 
				+        );
			
 
				+      }
			
 
				+
			
 
				+      // Sort by index to match input order (in case server returns out-of-order).
			
 
				+      const out: number[][] = new Array(texts.length);
			
 
				+      for (const item of parsed.data) {
			
 
				+        if (
			
 
				+          typeof item.index !== "number" ||
			
 
				+          item.index < 0 ||
			
 
				+          item.index >= texts.length
			
 
				+        ) {
			
 
				+          throw new Error(
			
 
				+            `OpenAIEmbeddingsProvider: data item index out of range (${item.index}, expected 0..${texts.length - 1})`,
			
 
				+          );
			
 
				+        }
			
 
				+        if (!Array.isArray(item.embedding)) {
			
 
				+          throw new Error(
			
 
				+            `OpenAIEmbeddingsProvider: data[${item.index}].embedding is not an array`,
			
 
				+          );
			
 
				+        }
			
 
				+        out[item.index] = item.embedding;
			
 
				+      }
			
 
				+
			
 
				+      // Sanity check — every slot must be filled
			
 
				+      for (let i = 0; i < texts.length; i++) {
			
 
				+        if (!out[i]) {
			
 
				+          throw new Error(
			
 
				+            `OpenAIEmbeddingsProvider: response missing embedding for index ${i}`,
			
 
				+          );
			
 
				+        }
			
 
				+      }
			
 
				+      return out;
			
 
				+    } finally {
			
 
				+      cleanup();
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/src/embedding/provider.ts
+++ b/src/embedding/provider.ts
@@ -0,0 +1,143 @@
 
				+/**
			
 
				+ * provider.ts - Embedding provider abstraction
			
 
				+ *
			
 
				+ * Defines the EmbeddingProvider interface that allows qmd to use either:
			
 
				+ *   - LocalLlamaCppProvider (legacy, GGUF via node-llama-cpp)
			
 
				+ *   - OpenAIEmbeddingsProvider (HTTP, OpenAI-compatible endpoint like ai.mm.mk)
			
 
				+ *
			
 
				+ * The factory in `./factory.ts` selects an implementation based on env vars,
			
 
				+ * a CLI flag, or `~/.config/qmd/config.json`.
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+ * Single embedding result
			
 
				+ */
			
 
				+export type ProviderEmbedding = {
			
 
				+  embedding: number[];
			
 
				+  /** Model identifier used to produce this embedding (matches content_vectors.model in DB) */
			
 
				+  model: string;
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+ * Supported provider kinds
			
 
				+ */
			
 
				+export type ProviderKind = "local" | "openai";
			
 
				+
			
 
				+/**
			
 
				+ * Healthcheck result for provider startup verification
			
 
				+ */
			
 
				+export type ProviderHealth = {
			
 
				+  ok: boolean;
			
 
				+  /** Model identifier reported by the provider */
			
 
				+  model: string;
			
 
				+  /** Embedding dimensions (e.g. 768 for embeddinggemma-300M) */
			
 
				+  dimensions?: number;
			
 
				+  /** Detail message (error reason on failure, status on success) */
			
 
				+  detail?: string;
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+ * Per-call options for provider embedding
			
 
				+ */
			
 
				+export type ProviderEmbedOptions = {
			
 
				+  /** Optional model id override (rare; usually provider has a fixed model) */
			
 
				+  model?: string;
			
 
				+  /** Abort signal for cancellation / timeout */
			
 
				+  signal?: AbortSignal;
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+ * Provider interface — both LocalLlamaCppProvider and OpenAIEmbeddingsProvider implement this.
			
 
				+ *
			
 
				+ * Implementations MUST:
			
 
				+ *   - Return `null` (not throw) for individual texts that fail to embed;
			
 
				+ *     the caller will count it as an error and continue.
			
 
				+ *   - Honor `options.signal` for cancellation.
			
 
				+ *   - Be safe to call concurrently for `embedBatch`.
			
 
				+ */
			
 
				+export interface EmbeddingProvider {
			
 
				+  /** Provider kind tag — useful for logging and factory introspection */
			
 
				+  readonly kind: ProviderKind;
			
 
				+
			
 
				+  /**
			
 
				+   * Stable model identifier reported to the caller.
			
 
				+   *
			
 
				+   * MUST match what's stored in `content_vectors.model` for the existing
			
 
				+   * index — otherwise the model-id guard refuses to embed.
			
 
				+   */
			
 
				+  getModelId(): string;
			
 
				+
			
 
				+  /**
			
 
				+   * Embedding vector dimensions. May return `undefined` before the first call
			
 
				+   * (some providers probe lazily). Once known, MUST stay stable.
			
 
				+   */
			
 
				+  getDimensions(): number | undefined;
			
 
				+
			
 
				+  /**
			
 
				+   * Healthcheck — verifies the provider is reachable and the model is loaded.
			
 
				+   * Should NOT throw — return `{ ok: false, detail: ... }` on failure.
			
 
				+   *
			
 
				+   * For HTTP providers: ping `/health` endpoint.
			
 
				+   * For local provider: ensure model loads.
			
 
				+   */
			
 
				+  healthcheck(signal?: AbortSignal): Promise<ProviderHealth>;
			
 
				+
			
 
				+  /**
			
 
				+   * Embed a single text. Returns `null` on per-call failure.
			
 
				+   */
			
 
				+  embed(text: string, options?: ProviderEmbedOptions): Promise<ProviderEmbedding | null>;
			
 
				+
			
 
				+  /**
			
 
				+   * Embed multiple texts in a batch (more efficient than calling `embed` N times).
			
 
				+   *
			
 
				+   * Output array length MUST equal input array length. Failed entries are `null`.
			
 
				+   * Implementations are responsible for chunking large batches per their
			
 
				+   * upstream limits (e.g. OpenAI provider chunks to 64).
			
 
				+   */
			
 
				+  embedBatch(texts: string[], options?: ProviderEmbedOptions): Promise<(ProviderEmbedding | null)[]>;
			
 
				+
			
 
				+  /** Release any held resources (HTTP keep-alive sockets, model handles, …) */
			
 
				+  dispose(): Promise<void>;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Error thrown when the provider's reported model id does not match the
			
 
				+ * model id baked into existing `content_vectors` rows. Forces user to
			
 
				+ * re-embed (`qmd embed -f`) or pin the matching model id.
			
 
				+ */
			
 
				+export class ModelMismatchError extends Error {
			
 
				+  readonly providerModel: string;
			
 
				+  readonly existingModels: string[];
			
 
				+
			
 
				+  constructor(providerModel: string, existingModels: string[]) {
			
 
				+    const list = existingModels.join(", ");
			
 
				+    super(
			
 
				+      `Embedding model mismatch: existing vectors use model(s) [${list}] ` +
			
 
				+      `but the configured provider reports "${providerModel}". ` +
			
 
				+      `Run \`qmd embed -f\` (or \`--rebuild\`) to re-embed everything with ` +
			
 
				+      `the new model, or set QMD_EMBED_MODEL_ID="${existingModels[0] ?? ""}" ` +
			
 
				+      `to keep the existing vectors.`
			
 
				+    );
			
 
				+    this.name = "ModelMismatchError";
			
 
				+    this.providerModel = providerModel;
			
 
				+    this.existingModels = existingModels;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Verify that the provider's model id is compatible with the existing
			
 
				+ * `content_vectors` entries. Pass-through (no-op) if the table is empty
			
 
				+ * (fresh DB) or if the model id appears in the distinct set.
			
 
				+ *
			
 
				+ * Caller passes `existingModels` (typically result of
			
 
				+ * `SELECT DISTINCT model FROM content_vectors`).
			
 
				+ */
			
 
				+export function assertModelCompatible(
			
 
				+  providerModel: string,
			
 
				+  existingModels: string[],
			
 
				+): void {
			
 
				+  // Empty DB — nothing to compare against, anything goes.
			
 
				+  if (existingModels.length === 0) return;
			
 
				+  if (existingModels.includes(providerModel)) return;
			
 
				+  throw new ModelMismatchError(providerModel, existingModels);
			
 
				+}
			
--- a/src/index.ts
+++ b/src/index.ts
@@ -119,6 +119,36 @@ export { getDefaultDbPath } from "./store.js";
 
				 // Re-export Maintenance class for CLI housekeeping operations
			
 
				 export { Maintenance } from "./maintenance.js";
			
 
				 
			
 
				+// Re-export embedding provider abstraction for SDK consumers (i-qkarfffa).
			
 
				+// `createEmbeddingProvider` honors QMD_EMBED_ENDPOINT / config-file / kind
			
 
				+// arg precedence; default fallback is the legacy LocalLlamaCppProvider so
			
 
				+// SDK code that doesn't pass `embedProvider` keeps the prior behavior.
			
 
				+export {
			
 
				+  createEmbeddingProvider,
			
 
				+  resolveProviderKind,
			
 
				+  LocalLlamaCppProvider,
			
 
				+  OpenAIEmbeddingsProvider,
			
 
				+  CircuitBreaker,
			
 
				+  CircuitOpenError,
			
 
				+  HttpError,
			
 
				+  ModelMismatchError,
			
 
				+  assertModelCompatible,
			
 
				+  type EmbeddingProvider,
			
 
				+  type ProviderKind,
			
 
				+  type ProviderEmbedding,
			
 
				+  type ProviderEmbedOptions,
			
 
				+  type ProviderHealth,
			
 
				+  type CreateEmbeddingProviderOptions,
			
 
				+  type OpenAIProviderConfig,
			
 
				+  type LocalLlamaCppProviderConfig,
			
 
				+  type EmbedProviderConfigFile,
			
 
				+  DEFAULT_BATCH_SIZE as DEFAULT_PROVIDER_BATCH_SIZE,
			
 
				+  DEFAULT_TIMEOUT_MS as DEFAULT_PROVIDER_TIMEOUT_MS,
			
 
				+  RETRY_BACKOFFS_MS as PROVIDER_RETRY_BACKOFFS_MS,
			
 
				+} from "./embedding/index.js";
			
 
				+
			
 
				+export { getDistinctEmbeddingModels } from "./store.js";
			
 
				+
			
 
				 /**
			
 
				  * Progress info emitted during update() for each file processed.
			
 
				  */
			
--- a/src/store.ts
+++ b/src/store.ts
@@ -33,6 +33,10 @@ import type {
 
				   CollectionConfig,
			
 
				   ContextMap,
			
 
				 } from "./collections.js";
			
 
				+import {
			
 
				+  type EmbeddingProvider,
			
 
				+  assertModelCompatible,
			
 
				+} from "./embedding/provider.js";
			
 
				 
			
 
				 // =============================================================================
			
 
				 // Configuration
			
@@ -1292,6 +1296,16 @@ export type EmbedOptions = {
 
				   maxBatchBytes?: number;
			
 
				   chunkStrategy?: ChunkStrategy;
			
 
				   onProgress?: (info: EmbedProgress) => void;
			
 
				+  /**
			
 
				+   * Optional embedding provider. When supplied, embeddings are routed through
			
 
				+   * this provider (HTTP, GPU worker, etc.) instead of the local llama.cpp
			
 
				+   * session path. The provider's `getModelId()` is verified against existing
			
 
				+   * `content_vectors.model` rows; mismatch throws unless `force` is set.
			
 
				+   *
			
 
				+   * When omitted, behavior is identical to pre-patch: embeddings come from
			
 
				+   * the store's `LlamaCpp` (or the global singleton).
			
 
				+   */
			
 
				+  embedProvider?: EmbeddingProvider;
			
 
				 };
			
 
				 
			
 
				 type PendingEmbeddingDoc = {
			
@@ -1410,6 +1424,15 @@ export async function generateEmbeddings(
 
				   const { maxDocsPerBatch, maxBatchBytes } = resolveEmbedOptions(options);
			
 
				   const encoder = new TextEncoder();
			
 
				 
			
 
				+  // Migration safety: if an embedProvider is supplied, verify its model id
			
 
				+  // matches the existing content_vectors rows (unless we're about to clear
			
 
				+  // them via `force`). This must happen BEFORE we clear vectors so users
			
 
				+  // who pass `--force` aren't blocked.
			
 
				+  if (options?.embedProvider && !options.force) {
			
 
				+    const existing = getDistinctEmbeddingModels(db);
			
 
				+    assertModelCompatible(options.embedProvider.getModelId(), existing);
			
 
				+  }
			
 
				+
			
 
				   if (options?.force) {
			
 
				     clearAllEmbeddings(db);
			
 
				   }
			
@@ -1442,7 +1465,14 @@ export async function generateEmbeddings(
 
				 
			
 
				   // Use store's LlamaCpp or global singleton, wrapped in a session
			
 
				   const llm = getLlm(store);
			
 
				-  const embedModelUri = llm.embedModelName;
			
 
				+  const embedModelUri = options?.embedProvider?.getModelId() ?? llm.embedModelName;
			
 
				+
			
 
				+  // Provider routing — when an EmbeddingProvider is supplied, embed calls go
			
 
				+  // through it (HTTP, GPU worker, etc.). Otherwise, use the LLM session path.
			
 
				+  // The outer session is still created for its abort signal (chunking uses
			
 
				+  // `session.signal` for cooperative cancellation).
			
 
				+  const provider = options?.embedProvider;
			
 
				+  const providerModel = provider?.getModelId() ?? model;
			
 
				 
			
 
				   // Create a session manager for this llm instance
			
 
				   const result = await withLLMSessionForLlm(llm, async (session) => {
			
@@ -1454,6 +1484,30 @@ export async function generateEmbeddings(
 
				     const BATCH_SIZE = 32;
			
 
				     const batches = buildEmbeddingBatches(docsToEmbed, maxDocsPerBatch, maxBatchBytes);
			
 
				 
			
 
				+    // Embedding helpers — single point of provider/session selection.
			
 
				+    // Both return the same shape as ILLMSession.embed/embedBatch so the
			
 
				+    // rest of the loop is unchanged.
			
 
				+    const embedOne = async (
			
 
				+      text: string,
			
 
				+      modelArg: string,
			
 
				+    ): Promise<{ embedding: number[]; model: string } | null> => {
			
 
				+      if (provider) {
			
 
				+        const r = await provider.embed(text, { model: modelArg, signal: session.signal });
			
 
				+        return r ? { embedding: r.embedding, model: r.model } : null;
			
 
				+      }
			
 
				+      return session.embed(text, { model: modelArg });
			
 
				+    };
			
 
				+    const embedMany = async (
			
 
				+      texts: string[],
			
 
				+      modelArg: string,
			
 
				+    ): Promise<({ embedding: number[]; model: string } | null)[]> => {
			
 
				+      if (provider) {
			
 
				+        const r = await provider.embedBatch(texts, { model: modelArg, signal: session.signal });
			
 
				+        return r.map((x) => (x ? { embedding: x.embedding, model: x.model } : null));
			
 
				+      }
			
 
				+      return session.embedBatch(texts, { model: modelArg });
			
 
				+    };
			
 
				+
			
 
				     for (const batchMeta of batches) {
			
 
				       // Abort early if session has been invalidated
			
 
				       if (!session.isValid) {
			
@@ -1503,7 +1557,7 @@ export async function generateEmbeddings(
 
				       if (!vectorTableInitialized) {
			
 
				         const firstChunk = batchChunks[0]!;
			
 
				         const firstText = formatDocForEmbedding(firstChunk.text, firstChunk.title, embedModelUri);
			
 
				-        const firstResult = await session.embed(firstText, { model });
			
 
				+        const firstResult = await embedOne(firstText, providerModel);
			
 
				         if (!firstResult) {
			
 
				           throw new Error("Failed to get embedding dimensions from first chunk");
			
 
				         }
			
@@ -1537,12 +1591,12 @@ export async function generateEmbeddings(
 
				         const texts = chunkBatch.map(chunk => formatDocForEmbedding(chunk.text, chunk.title, embedModelUri));
			
 
				 
			
 
				         try {
			
 
				-          const embeddings = await session.embedBatch(texts, { model });
			
 
				+          const embeddings = await embedMany(texts, providerModel);
			
 
				           for (let i = 0; i < chunkBatch.length; i++) {
			
 
				             const chunk = chunkBatch[i]!;
			
 
				             const embedding = embeddings[i];
			
 
				             if (embedding) {
			
 
				-              insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now);
			
 
				+              insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), providerModel, now);
			
 
				               chunksEmbedded++;
			
 
				             } else {
			
 
				               errors++;
			
@@ -1559,9 +1613,9 @@ export async function generateEmbeddings(
 
				             for (const chunk of chunkBatch) {
			
 
				               try {
			
 
				                 const text = formatDocForEmbedding(chunk.text, chunk.title, embedModelUri);
			
 
				-                const result = await session.embed(text, { model });
			
 
				+                const result = await embedOne(text, providerModel);
			
 
				                 if (result) {
			
 
				-                  insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now);
			
 
				+                  insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), providerModel, now);
			
 
				                   chunksEmbedded++;
			
 
				                 } else {
			
 
				                   errors++;
			
@@ -3272,6 +3326,23 @@ export function clearAllEmbeddings(db: Database): void {
 
				   db.exec(`DROP TABLE IF EXISTS vectors_vec`);
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * Get the distinct set of model identifiers present in `content_vectors`.
			
 
				+ *
			
 
				+ * Used by the embedding migration-safety guard: if a configured provider's
			
 
				+ * `getModelId()` does not appear in this list (and the table is non-empty),
			
 
				+ * we refuse to embed and ask the user to run `qmd embed -f` to rebuild.
			
 
				+ *
			
 
				+ * Returns `[]` when the table is empty (fresh DB) — in which case any
			
 
				+ * provider is allowed.
			
 
				+ */
			
 
				+export function getDistinctEmbeddingModels(db: Database): string[] {
			
 
				+  const rows = db.prepare(
			
 
				+    `SELECT DISTINCT model FROM content_vectors WHERE model IS NOT NULL`,
			
 
				+  ).all() as { model: string }[];
			
 
				+  return rows.map((r) => r.model).filter((m) => typeof m === "string" && m.length > 0);
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * Insert a single embedding into both content_vectors and vectors_vec tables.
			
 
				  * The hash_seq key is formatted as "hash_seq" for the vectors_vec table.
			
--- a/test/embedding-factory.test.ts
+++ b/test/embedding-factory.test.ts
@@ -0,0 +1,263 @@
 
				+/**
			
 
				+ * embedding-factory.test.ts - Tests for createEmbeddingProvider factory.
			
 
				+ *
			
 
				+ * Verifies the resolution precedence:
			
 
				+ *   1. explicit `kind` argument
			
 
				+ *   2. QMD_EMBED_PROVIDER env
			
 
				+ *   3. QMD_EMBED_ENDPOINT env (forces openai)
			
 
				+ *   4. config file `embedProvider.kind` / `embedProvider.endpoint`
			
 
				+ *   5. fallback: local
			
 
				+ */
			
 
				+
			
 
				+import { describe, test, expect, beforeEach, afterEach } from "vitest";
			
 
				+import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from "node:fs";
			
 
				+import { tmpdir } from "node:os";
			
 
				+import { join } from "node:path";
			
 
				+import {
			
 
				+  resolveProviderKind,
			
 
				+  createEmbeddingProvider,
			
 
				+  loadConfigFile,
			
 
				+} from "../src/embedding/factory.js";
			
 
				+import { OpenAIEmbeddingsProvider } from "../src/embedding/openai.js";
			
 
				+import { LocalLlamaCppProvider } from "../src/embedding/local.js";
			
 
				+
			
 
				+let workDir: string;
			
 
				+let configPath: string;
			
 
				+
			
 
				+beforeEach(() => {
			
 
				+  workDir = mkdtempSync(join(tmpdir(), "qmd-factory-test-"));
			
 
				+  mkdirSync(join(workDir, "qmd"), { recursive: true });
			
 
				+  configPath = join(workDir, "qmd", "config.json");
			
 
				+});
			
 
				+
			
 
				+afterEach(() => {
			
 
				+  rmSync(workDir, { recursive: true, force: true });
			
 
				+});
			
 
				+
			
 
				+// ─────────────────────────── Helpers ─────────────────────────────────────────
			
 
				+
			
 
				+function writeConfig(obj: Record<string, unknown>) {
			
 
				+  writeFileSync(configPath, JSON.stringify(obj));
			
 
				+}
			
 
				+
			
 
				+const EMPTY_ENV: Record<string, string | undefined> = {};
			
 
				+
			
 
				+// ─────────────────────────── resolveProviderKind ─────────────────────────────
			
 
				+
			
 
				+describe("resolveProviderKind", () => {
			
 
				+  test("explicit kind argument wins", () => {
			
 
				+    expect(
			
 
				+      resolveProviderKind({
			
 
				+        kind: "local",
			
 
				+        env: { QMD_EMBED_ENDPOINT: "https://x" },
			
 
				+        configPath,
			
 
				+      }),
			
 
				+    ).toBe("local");
			
 
				+    expect(
			
 
				+      resolveProviderKind({
			
 
				+        kind: "openai",
			
 
				+        env: EMPTY_ENV,
			
 
				+        configPath,
			
 
				+      }),
			
 
				+    ).toBe("openai");
			
 
				+  });
			
 
				+
			
 
				+  test("QMD_EMBED_PROVIDER env wins over QMD_EMBED_ENDPOINT", () => {
			
 
				+    expect(
			
 
				+      resolveProviderKind({
			
 
				+        env: { QMD_EMBED_PROVIDER: "local", QMD_EMBED_ENDPOINT: "https://x" },
			
 
				+        configPath,
			
 
				+      }),
			
 
				+    ).toBe("local");
			
 
				+  });
			
 
				+
			
 
				+  test("QMD_EMBED_ENDPOINT presence → openai", () => {
			
 
				+    expect(
			
 
				+      resolveProviderKind({
			
 
				+        env: { QMD_EMBED_ENDPOINT: "https://ai.example.com" },
			
 
				+        configPath,
			
 
				+      }),
			
 
				+    ).toBe("openai");
			
 
				+  });
			
 
				+
			
 
				+  test("QMD_EMBED_ENDPOINT empty string ignored", () => {
			
 
				+    expect(
			
 
				+      resolveProviderKind({
			
 
				+        env: { QMD_EMBED_ENDPOINT: "" },
			
 
				+        configPath,
			
 
				+      }),
			
 
				+    ).toBe("local");
			
 
				+  });
			
 
				+
			
 
				+  test("config file embedProvider.kind respected", () => {
			
 
				+    writeConfig({ embedProvider: { kind: "openai", endpoint: "https://ai.example.com" } });
			
 
				+    expect(resolveProviderKind({ env: EMPTY_ENV, configPath })).toBe("openai");
			
 
				+  });
			
 
				+
			
 
				+  test("config file embedProvider.endpoint alone → openai", () => {
			
 
				+    writeConfig({ embedProvider: { endpoint: "https://ai.example.com" } });
			
 
				+    expect(resolveProviderKind({ env: EMPTY_ENV, configPath })).toBe("openai");
			
 
				+  });
			
 
				+
			
 
				+  test("no signal anywhere → local fallback", () => {
			
 
				+    expect(resolveProviderKind({ env: EMPTY_ENV, configPath })).toBe("local");
			
 
				+  });
			
 
				+
			
 
				+  test("invalid env QMD_EMBED_PROVIDER is ignored", () => {
			
 
				+    expect(
			
 
				+      resolveProviderKind({
			
 
				+        env: { QMD_EMBED_PROVIDER: "garbage" },
			
 
				+        configPath,
			
 
				+      }),
			
 
				+    ).toBe("local");
			
 
				+  });
			
 
				+
			
 
				+  test("uppercase env QMD_EMBED_PROVIDER normalized", () => {
			
 
				+    expect(
			
 
				+      resolveProviderKind({
			
 
				+        env: { QMD_EMBED_PROVIDER: "OPENAI", QMD_EMBED_ENDPOINT: "https://x" },
			
 
				+        configPath,
			
 
				+      }),
			
 
				+    ).toBe("openai");
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+// ─────────────────────────── createEmbeddingProvider ─────────────────────────
			
 
				+
			
 
				+describe("createEmbeddingProvider", () => {
			
 
				+  test("openai kind w/ endpoint env → OpenAIEmbeddingsProvider", () => {
			
 
				+    const p = createEmbeddingProvider({
			
 
				+      env: { QMD_EMBED_ENDPOINT: "https://ai.example.com" },
			
 
				+      configPath,
			
 
				+    });
			
 
				+    expect(p).toBeInstanceOf(OpenAIEmbeddingsProvider);
			
 
				+    expect(p.kind).toBe("openai");
			
 
				+    expect(p.getModelId()).toBe("embeddinggemma");
			
 
				+  });
			
 
				+
			
 
				+  test("openai kind w/ explicit options merges over env", () => {
			
 
				+    const p = createEmbeddingProvider({
			
 
				+      env: { QMD_EMBED_ENDPOINT: "https://env.example.com", QMD_EMBED_API_KEY: "env-key" },
			
 
				+      configPath,
			
 
				+      openai: { endpoint: "https://override.example.com" },
			
 
				+    });
			
 
				+    // Cast to access internal properties for verification
			
 
				+    const inner = p as OpenAIEmbeddingsProvider & { endpoint: string; apiKey: string };
			
 
				+    expect(inner["endpoint"]).toBe("https://override.example.com");
			
 
				+    // apiKey should still come from env since we didn't override it
			
 
				+    expect(inner["apiKey"]).toBe("env-key");
			
 
				+  });
			
 
				+
			
 
				+  test("openai kind reads modelId from env", () => {
			
 
				+    const p = createEmbeddingProvider({
			
 
				+      env: {
			
 
				+        QMD_EMBED_ENDPOINT: "https://ai.example.com",
			
 
				+        QMD_EMBED_MODEL_ID: "custom-model",
			
 
				+      },
			
 
				+      configPath,
			
 
				+    });
			
 
				+    expect(p.getModelId()).toBe("custom-model");
			
 
				+  });
			
 
				+
			
 
				+  test("openai kind reads upstream model from env", () => {
			
 
				+    const p = createEmbeddingProvider({
			
 
				+      env: {
			
 
				+        QMD_EMBED_ENDPOINT: "https://ai.example.com",
			
 
				+        QMD_EMBED_UPSTREAM_MODEL: "embeddinggemma:300m",
			
 
				+      },
			
 
				+      configPath,
			
 
				+    }) as OpenAIEmbeddingsProvider & { upstreamModel: string };
			
 
				+    expect(p["upstreamModel"]).toBe("embeddinggemma:300m");
			
 
				+  });
			
 
				+
			
 
				+  test("openai kind reads batch size and timeout from env", () => {
			
 
				+    const p = createEmbeddingProvider({
			
 
				+      env: {
			
 
				+        QMD_EMBED_ENDPOINT: "https://ai.example.com",
			
 
				+        QMD_EMBED_BATCH_SIZE: "32",
			
 
				+        QMD_EMBED_TIMEOUT_MS: "5000",
			
 
				+      },
			
 
				+      configPath,
			
 
				+    }) as OpenAIEmbeddingsProvider & { batchSize: number; timeoutMs: number };
			
 
				+    expect(p["batchSize"]).toBe(32);
			
 
				+    expect(p["timeoutMs"]).toBe(5000);
			
 
				+  });
			
 
				+
			
 
				+  test("openai kind merges config file values", () => {
			
 
				+    writeConfig({
			
 
				+      embedProvider: {
			
 
				+        kind: "openai",
			
 
				+        endpoint: "https://config.example.com",
			
 
				+        apiKey: "config-key",
			
 
				+        modelId: "config-model",
			
 
				+        batchSize: 16,
			
 
				+      },
			
 
				+    });
			
 
				+    const p = createEmbeddingProvider({
			
 
				+      env: EMPTY_ENV,
			
 
				+      configPath,
			
 
				+    }) as OpenAIEmbeddingsProvider & {
			
 
				+      endpoint: string;
			
 
				+      apiKey: string;
			
 
				+      batchSize: number;
			
 
				+    };
			
 
				+    expect(p["endpoint"]).toBe("https://config.example.com");
			
 
				+    expect(p["apiKey"]).toBe("config-key");
			
 
				+    expect(p.getModelId()).toBe("config-model");
			
 
				+    expect(p["batchSize"]).toBe(16);
			
 
				+  });
			
 
				+
			
 
				+  test("env wins over config file", () => {
			
 
				+    writeConfig({
			
 
				+      embedProvider: {
			
 
				+        endpoint: "https://config.example.com",
			
 
				+      },
			
 
				+    });
			
 
				+    const p = createEmbeddingProvider({
			
 
				+      env: { QMD_EMBED_ENDPOINT: "https://env.example.com" },
			
 
				+      configPath,
			
 
				+    }) as OpenAIEmbeddingsProvider & { endpoint: string };
			
 
				+    expect(p["endpoint"]).toBe("https://env.example.com");
			
 
				+  });
			
 
				+
			
 
				+  test("openai kind without endpoint throws", () => {
			
 
				+    expect(() =>
			
 
				+      createEmbeddingProvider({ kind: "openai", env: EMPTY_ENV, configPath }),
			
 
				+    ).toThrow(/endpoint/);
			
 
				+  });
			
 
				+
			
 
				+  test("local kind explicitly requested → LocalLlamaCppProvider", () => {
			
 
				+    const p = createEmbeddingProvider({
			
 
				+      kind: "local",
			
 
				+      env: EMPTY_ENV,
			
 
				+      configPath,
			
 
				+    });
			
 
				+    expect(p).toBeInstanceOf(LocalLlamaCppProvider);
			
 
				+    expect(p.kind).toBe("local");
			
 
				+  });
			
 
				+
			
 
				+  test("default fallback → LocalLlamaCppProvider", () => {
			
 
				+    const p = createEmbeddingProvider({ env: EMPTY_ENV, configPath });
			
 
				+    expect(p).toBeInstanceOf(LocalLlamaCppProvider);
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+// ─────────────────────────── loadConfigFile ──────────────────────────────────
			
 
				+
			
 
				+describe("loadConfigFile", () => {
			
 
				+  test("missing file → empty object", () => {
			
 
				+    expect(loadConfigFile(join(workDir, "missing.json"))).toEqual({});
			
 
				+  });
			
 
				+
			
 
				+  test("invalid JSON → empty object (no throw)", () => {
			
 
				+    writeFileSync(configPath, "not json");
			
 
				+    expect(loadConfigFile(configPath)).toEqual({});
			
 
				+  });
			
 
				+
			
 
				+  test("valid JSON parsed", () => {
			
 
				+    writeConfig({ embedProvider: { kind: "openai" } });
			
 
				+    expect(loadConfigFile(configPath)).toEqual({
			
 
				+      embedProvider: { kind: "openai" },
			
 
				+    });
			
 
				+  });
			
 
				+});
			
--- a/test/embedding-openai.test.ts
+++ b/test/embedding-openai.test.ts
@@ -0,0 +1,721 @@
 
				+/**
			
 
				+ * embedding-openai.test.ts - Tests for OpenAIEmbeddingsProvider (HTTP backend).
			
 
				+ *
			
 
				+ * Uses a mock fetch — no network required. Covers:
			
 
				+ *   - 200 happy path
			
 
				+ *   - 429 → retry → success
			
 
				+ *   - 503 persistent → exhausted retries → null
			
 
				+ *   - 4xx (non-429) → no retry, immediate failure
			
 
				+ *   - batch chunking (>64 items → multiple HTTP calls)
			
 
				+ *   - timeout / abort
			
 
				+ *   - malformed JSON / missing data array
			
 
				+ *   - circuit breaker open + half-open recovery
			
 
				+ *   - dimension probing
			
 
				+ *   - healthcheck endpoint
			
 
				+ */
			
 
				+
			
 
				+import { describe, test, expect, vi } from "vitest";
			
 
				+import {
			
 
				+  OpenAIEmbeddingsProvider,
			
 
				+  CircuitBreaker,
			
 
				+  CircuitOpenError,
			
 
				+  HttpError,
			
 
				+  isRetryableStatus,
			
 
				+  chunkArray,
			
 
				+  RETRY_BACKOFFS_MS,
			
 
				+} from "../src/embedding/openai.js";
			
 
				+
			
 
				+// ─────────────────────────── Helpers ─────────────────────────────────────────
			
 
				+
			
 
				+function mockResponse(status: number, body: unknown, opts?: { delayMs?: number }): Response {
			
 
				+  const text = typeof body === "string" ? body : JSON.stringify(body);
			
 
				+  const init: ResponseInit = {
			
 
				+    status,
			
 
				+    headers: { "content-type": "application/json" },
			
 
				+  };
			
 
				+  if (opts?.delayMs) {
			
 
				+    // Synchronous Response — test code awaits it directly, so delayMs would
			
 
				+    // need to be implemented in the fetch wrapper, not here.
			
 
				+  }
			
 
				+  return new Response(text, init);
			
 
				+}
			
 
				+
			
 
				+function makeFetchSequence(responses: Array<() => Promise<Response> | Response>): {
			
 
				+  fetchImpl: typeof fetch;
			
 
				+  calls: { url: string; init?: RequestInit }[];
			
 
				+} {
			
 
				+  const calls: { url: string; init?: RequestInit }[] = [];
			
 
				+  let i = 0;
			
 
				+  const fetchImpl = (async (input: RequestInfo | URL, init?: RequestInit) => {
			
 
				+    const url = typeof input === "string" ? input : input.toString();
			
 
				+    calls.push({ url, init });
			
 
				+    if (i >= responses.length) throw new Error(`Mock fetch exhausted at call ${i + 1}`);
			
 
				+    const r = responses[i++]!();
			
 
				+    return r instanceof Promise ? r : r;
			
 
				+  }) as typeof fetch;
			
 
				+  return { fetchImpl, calls };
			
 
				+}
			
 
				+
			
 
				+function fakeEmbedding(dim: number, seed = 0): number[] {
			
 
				+  return Array.from({ length: dim }, (_, i) => Math.sin(seed + i) * 0.5);
			
 
				+}
			
 
				+
			
 
				+function embeddingsResponse(texts: string[], dim = 4): Response {
			
 
				+  return mockResponse(200, {
			
 
				+    object: "list",
			
 
				+    model: "embeddinggemma:300m",
			
 
				+    data: texts.map((_, i) => ({
			
 
				+      object: "embedding",
			
 
				+      index: i,
			
 
				+      embedding: fakeEmbedding(dim, i * 7),
			
 
				+    })),
			
 
				+  });
			
 
				+}
			
 
				+
			
 
				+// ─────────────────────────── Pure helpers ────────────────────────────────────
			
 
				+
			
 
				+describe("isRetryableStatus", () => {
			
 
				+  test("429 retryable", () => expect(isRetryableStatus(429)).toBe(true));
			
 
				+  test("503 retryable", () => expect(isRetryableStatus(503)).toBe(true));
			
 
				+  test("400 NOT retryable", () => expect(isRetryableStatus(400)).toBe(false));
			
 
				+  test("401 NOT retryable", () => expect(isRetryableStatus(401)).toBe(false));
			
 
				+  test("404 NOT retryable", () => expect(isRetryableStatus(404)).toBe(false));
			
 
				+  test("500 NOT retryable", () => expect(isRetryableStatus(500)).toBe(false));
			
 
				+  test("502 NOT retryable", () => expect(isRetryableStatus(502)).toBe(false));
			
 
				+  test("200 NOT retryable", () => expect(isRetryableStatus(200)).toBe(false));
			
 
				+});
			
 
				+
			
 
				+describe("chunkArray", () => {
			
 
				+  test("empty input → empty output", () => {
			
 
				+    expect(chunkArray([], 5)).toEqual([]);
			
 
				+  });
			
 
				+  test("input ≤ size → single chunk", () => {
			
 
				+    expect(chunkArray([1, 2, 3], 5)).toEqual([[1, 2, 3]]);
			
 
				+  });
			
 
				+  test("input = size → single chunk", () => {
			
 
				+    expect(chunkArray([1, 2, 3, 4, 5], 5)).toEqual([[1, 2, 3, 4, 5]]);
			
 
				+  });
			
 
				+  test("input > size → multiple chunks", () => {
			
 
				+    expect(chunkArray([1, 2, 3, 4, 5, 6, 7], 3)).toEqual([
			
 
				+      [1, 2, 3],
			
 
				+      [4, 5, 6],
			
 
				+      [7],
			
 
				+    ]);
			
 
				+  });
			
 
				+  test("65 items at size 64 → 64 + 1", () => {
			
 
				+    const items = Array.from({ length: 65 }, (_, i) => i);
			
 
				+    const chunks = chunkArray(items, 64);
			
 
				+    expect(chunks.length).toBe(2);
			
 
				+    expect(chunks[0]!.length).toBe(64);
			
 
				+    expect(chunks[1]!.length).toBe(1);
			
 
				+  });
			
 
				+  test("size < 1 throws", () => {
			
 
				+    expect(() => chunkArray([1, 2, 3], 0)).toThrow();
			
 
				+    expect(() => chunkArray([1, 2, 3], -1)).toThrow();
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+// ─────────────────────────── Circuit Breaker ─────────────────────────────────
			
 
				+
			
 
				+describe("CircuitBreaker", () => {
			
 
				+  test("starts closed", () => {
			
 
				+    const cb = new CircuitBreaker();
			
 
				+    expect(cb.getState()).toBe("closed");
			
 
				+    expect(cb.shouldFailFast()).toBe(false);
			
 
				+  });
			
 
				+
			
 
				+  test("stays closed below minSamples even with all-failures", () => {
			
 
				+    const cb = new CircuitBreaker({ minSamples: 4, threshold: 0.5 });
			
 
				+    cb.recordFailure();
			
 
				+    cb.recordFailure();
			
 
				+    cb.recordFailure();
			
 
				+    expect(cb.getState()).toBe("closed");
			
 
				+  });
			
 
				+
			
 
				+  test("opens when failure rate exceeds threshold", () => {
			
 
				+    const cb = new CircuitBreaker({ minSamples: 4, threshold: 0.5 });
			
 
				+    cb.recordFailure();
			
 
				+    cb.recordFailure();
			
 
				+    cb.recordFailure();
			
 
				+    cb.recordFailure();
			
 
				+    expect(cb.getState()).toBe("open");
			
 
				+    expect(cb.shouldFailFast()).toBe(true);
			
 
				+  });
			
 
				+
			
 
				+  test("transitions OPEN → HALF-OPEN after openDurationMs", () => {
			
 
				+    let now = 1_000_000;
			
 
				+    const cb = new CircuitBreaker({
			
 
				+      minSamples: 4,
			
 
				+      threshold: 0.5,
			
 
				+      openDurationMs: 5000,
			
 
				+      now: () => now,
			
 
				+    });
			
 
				+    for (let i = 0; i < 4; i++) cb.recordFailure();
			
 
				+    expect(cb.getState()).toBe("open");
			
 
				+    now += 5001;
			
 
				+    expect(cb.getState()).toBe("half-open");
			
 
				+  });
			
 
				+
			
 
				+  test("HALF-OPEN + success → CLOSED", () => {
			
 
				+    let now = 1_000_000;
			
 
				+    const cb = new CircuitBreaker({
			
 
				+      minSamples: 4,
			
 
				+      threshold: 0.5,
			
 
				+      openDurationMs: 5000,
			
 
				+      now: () => now,
			
 
				+    });
			
 
				+    for (let i = 0; i < 4; i++) cb.recordFailure();
			
 
				+    now += 5001;
			
 
				+    cb.recordSuccess(); // half-open probe
			
 
				+    expect(cb.getState()).toBe("closed");
			
 
				+  });
			
 
				+
			
 
				+  test("HALF-OPEN + failure → re-OPEN", () => {
			
 
				+    let now = 1_000_000;
			
 
				+    const cb = new CircuitBreaker({
			
 
				+      minSamples: 4,
			
 
				+      threshold: 0.5,
			
 
				+      openDurationMs: 5000,
			
 
				+      now: () => now,
			
 
				+    });
			
 
				+    for (let i = 0; i < 4; i++) cb.recordFailure();
			
 
				+    now += 5001;
			
 
				+    expect(cb.getState()).toBe("half-open");
			
 
				+    cb.recordFailure();
			
 
				+    expect(cb.getState()).toBe("open");
			
 
				+  });
			
 
				+
			
 
				+  test("samples outside window are dropped", () => {
			
 
				+    let now = 1_000_000;
			
 
				+    const cb = new CircuitBreaker({
			
 
				+      minSamples: 4,
			
 
				+      threshold: 0.5,
			
 
				+      windowMs: 1000,
			
 
				+      now: () => now,
			
 
				+    });
			
 
				+    cb.recordFailure();
			
 
				+    cb.recordFailure();
			
 
				+    now += 1500; // window expired
			
 
				+    cb.recordSuccess();
			
 
				+    cb.recordSuccess();
			
 
				+    cb.recordSuccess();
			
 
				+    cb.recordSuccess();
			
 
				+    // Old failures should be discarded; rate = 0/4 < threshold
			
 
				+    expect(cb.getState()).toBe("closed");
			
 
				+  });
			
 
				+
			
 
				+  test("reset() clears state", () => {
			
 
				+    const cb = new CircuitBreaker({ minSamples: 2, threshold: 0.5 });
			
 
				+    cb.recordFailure();
			
 
				+    cb.recordFailure();
			
 
				+    expect(cb.getState()).toBe("open");
			
 
				+    cb.reset();
			
 
				+    expect(cb.getState()).toBe("closed");
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+// ─────────────────────────── HappyPath ───────────────────────────────────────
			
 
				+
			
 
				+describe("OpenAIEmbeddingsProvider — happy path", () => {
			
 
				+  test("single embed call → 200 success", async () => {
			
 
				+    const { fetchImpl, calls } = makeFetchSequence([
			
 
				+      () => embeddingsResponse(["hello"], 4),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+    });
			
 
				+    const r = await p.embed("hello");
			
 
				+    expect(r).not.toBeNull();
			
 
				+    expect(r!.embedding.length).toBe(4);
			
 
				+    expect(r!.model).toBe("embeddinggemma");
			
 
				+    expect(p.getDimensions()).toBe(4);
			
 
				+    expect(calls.length).toBe(1);
			
 
				+    expect(calls[0]!.url).toBe("https://ai.example.com/v1/embeddings");
			
 
				+  });
			
 
				+
			
 
				+  test("strips trailing slashes from endpoint", async () => {
			
 
				+    const { fetchImpl, calls } = makeFetchSequence([
			
 
				+      () => embeddingsResponse(["x"], 2),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com////",
			
 
				+      fetchImpl,
			
 
				+    });
			
 
				+    await p.embed("x");
			
 
				+    expect(calls[0]!.url).toBe("https://ai.example.com/v1/embeddings");
			
 
				+  });
			
 
				+
			
 
				+  test("batch of 3 → 1 HTTP call", async () => {
			
 
				+    const { fetchImpl, calls } = makeFetchSequence([
			
 
				+      () => embeddingsResponse(["a", "b", "c"], 3),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+    });
			
 
				+    const result = await p.embedBatch(["a", "b", "c"]);
			
 
				+    expect(result.length).toBe(3);
			
 
				+    expect(result.every((r) => r !== null)).toBe(true);
			
 
				+    expect(calls.length).toBe(1);
			
 
				+  });
			
 
				+
			
 
				+  test("respects custom modelId / upstreamModel in request body", async () => {
			
 
				+    const { fetchImpl, calls } = makeFetchSequence([
			
 
				+      () => embeddingsResponse(["x"], 2),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      modelId: "embeddinggemma",
			
 
				+      upstreamModel: "embeddinggemma:300m",
			
 
				+      fetchImpl,
			
 
				+    });
			
 
				+    const r = await p.embed("x");
			
 
				+    expect(r!.model).toBe("embeddinggemma");
			
 
				+    const body = JSON.parse(calls[0]!.init!.body as string);
			
 
				+    expect(body.model).toBe("embeddinggemma:300m");
			
 
				+  });
			
 
				+
			
 
				+  test("Authorization header set when apiKey provided", async () => {
			
 
				+    const { fetchImpl, calls } = makeFetchSequence([
			
 
				+      () => embeddingsResponse(["x"], 2),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      apiKey: "sk-test-123",
			
 
				+      fetchImpl,
			
 
				+    });
			
 
				+    await p.embed("x");
			
 
				+    const headers = calls[0]!.init!.headers as Record<string, string>;
			
 
				+    expect(headers["Authorization"]).toBe("Bearer sk-test-123");
			
 
				+  });
			
 
				+
			
 
				+  test("Authorization header omitted when apiKey not provided", async () => {
			
 
				+    const { fetchImpl, calls } = makeFetchSequence([
			
 
				+      () => embeddingsResponse(["x"], 2),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+    });
			
 
				+    await p.embed("x");
			
 
				+    const headers = calls[0]!.init!.headers as Record<string, string>;
			
 
				+    expect(headers["Authorization"]).toBeUndefined();
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+// ─────────────────────────── Batch chunking ──────────────────────────────────
			
 
				+
			
 
				+describe("OpenAIEmbeddingsProvider — batch chunking", () => {
			
 
				+  test("100 items at batchSize=64 → 2 HTTP calls (64 + 36)", async () => {
			
 
				+    const { fetchImpl, calls } = makeFetchSequence([
			
 
				+      () => embeddingsResponse(Array.from({ length: 64 }, () => "x"), 4),
			
 
				+      () => embeddingsResponse(Array.from({ length: 36 }, () => "x"), 4),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+      batchSize: 64,
			
 
				+    });
			
 
				+    const texts = Array.from({ length: 100 }, (_, i) => `text-${i}`);
			
 
				+    const result = await p.embedBatch(texts);
			
 
				+    expect(result.length).toBe(100);
			
 
				+    expect(result.every((r) => r !== null)).toBe(true);
			
 
				+    expect(calls.length).toBe(2);
			
 
				+
			
 
				+    const body0 = JSON.parse(calls[0]!.init!.body as string);
			
 
				+    const body1 = JSON.parse(calls[1]!.init!.body as string);
			
 
				+    expect(body0.input.length).toBe(64);
			
 
				+    expect(body1.input.length).toBe(36);
			
 
				+  });
			
 
				+
			
 
				+  test("custom batchSize=10 → multiple smaller calls", async () => {
			
 
				+    const { fetchImpl, calls } = makeFetchSequence([
			
 
				+      () => embeddingsResponse(Array.from({ length: 10 }, () => "x"), 2),
			
 
				+      () => embeddingsResponse(Array.from({ length: 10 }, () => "x"), 2),
			
 
				+      () => embeddingsResponse(Array.from({ length: 5 }, () => "x"), 2),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+      batchSize: 10,
			
 
				+    });
			
 
				+    const texts = Array.from({ length: 25 }, (_, i) => `t${i}`);
			
 
				+    const result = await p.embedBatch(texts);
			
 
				+    expect(result.length).toBe(25);
			
 
				+    expect(result.every((r) => r !== null)).toBe(true);
			
 
				+    expect(calls.length).toBe(3);
			
 
				+  });
			
 
				+
			
 
				+  test("empty input → no HTTP calls", async () => {
			
 
				+    const { fetchImpl, calls } = makeFetchSequence([]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+    });
			
 
				+    const result = await p.embedBatch([]);
			
 
				+    expect(result).toEqual([]);
			
 
				+    expect(calls.length).toBe(0);
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+// ─────────────────────────── Retry behavior ──────────────────────────────────
			
 
				+
			
 
				+describe("OpenAIEmbeddingsProvider — retry on 429/503", () => {
			
 
				+  test("429 → retry → success", async () => {
			
 
				+    const sleepCalls: number[] = [];
			
 
				+    const { fetchImpl, calls } = makeFetchSequence([
			
 
				+      () => mockResponse(429, { error: "rate limit" }),
			
 
				+      () => embeddingsResponse(["x"], 4),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+      retryBackoffsMs: [10, 20, 40],
			
 
				+      sleep: async (ms) => {
			
 
				+        sleepCalls.push(ms);
			
 
				+      },
			
 
				+    });
			
 
				+    const r = await p.embed("x");
			
 
				+    expect(r).not.toBeNull();
			
 
				+    expect(calls.length).toBe(2);
			
 
				+    expect(sleepCalls).toEqual([10]);
			
 
				+  });
			
 
				+
			
 
				+  test("503 → retry → success", async () => {
			
 
				+    const sleepCalls: number[] = [];
			
 
				+    const { fetchImpl, calls } = makeFetchSequence([
			
 
				+      () => mockResponse(503, { error: "service unavailable" }),
			
 
				+      () => embeddingsResponse(["x"], 4),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+      retryBackoffsMs: [5, 10, 20],
			
 
				+      sleep: async (ms) => {
			
 
				+        sleepCalls.push(ms);
			
 
				+      },
			
 
				+    });
			
 
				+    const r = await p.embed("x");
			
 
				+    expect(r).not.toBeNull();
			
 
				+    expect(calls.length).toBe(2);
			
 
				+    expect(sleepCalls).toEqual([5]);
			
 
				+  });
			
 
				+
			
 
				+  test("503 persistent → exhausted retries → null result", async () => {
			
 
				+    const sleepCalls: number[] = [];
			
 
				+    const { fetchImpl, calls } = makeFetchSequence([
			
 
				+      () => mockResponse(503, "down"),
			
 
				+      () => mockResponse(503, "down"),
			
 
				+      () => mockResponse(503, "down"),
			
 
				+      () => mockResponse(503, "down"),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+      retryBackoffsMs: [1, 2, 4],
			
 
				+      sleep: async (ms) => {
			
 
				+        sleepCalls.push(ms);
			
 
				+      },
			
 
				+    });
			
 
				+    const r = await p.embed("x");
			
 
				+    expect(r).toBeNull();
			
 
				+    expect(calls.length).toBe(4); // initial + 3 retries
			
 
				+    expect(sleepCalls).toEqual([1, 2, 4]);
			
 
				+  });
			
 
				+
			
 
				+  test("default backoff schedule is 1s/4s/16s", () => {
			
 
				+    expect(RETRY_BACKOFFS_MS).toEqual([1000, 4000, 16000]);
			
 
				+  });
			
 
				+
			
 
				+  test("4xx (non-429) → immediate failure, no retry", async () => {
			
 
				+    const sleepCalls: number[] = [];
			
 
				+    const { fetchImpl, calls } = makeFetchSequence([
			
 
				+      () => mockResponse(401, { error: "unauthorized" }),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+      retryBackoffsMs: [10, 20, 40],
			
 
				+      sleep: async (ms) => {
			
 
				+        sleepCalls.push(ms);
			
 
				+      },
			
 
				+    });
			
 
				+    const r = await p.embed("x");
			
 
				+    expect(r).toBeNull();
			
 
				+    expect(calls.length).toBe(1); // no retries
			
 
				+    expect(sleepCalls).toEqual([]);
			
 
				+  });
			
 
				+
			
 
				+  test("404 → immediate failure, no retry", async () => {
			
 
				+    const { fetchImpl, calls } = makeFetchSequence([
			
 
				+      () => mockResponse(404, "not found"),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+      retryBackoffsMs: [1],
			
 
				+      sleep: async () => {},
			
 
				+    });
			
 
				+    await p.embed("x");
			
 
				+    expect(calls.length).toBe(1);
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+// ─────────────────────────── Malformed responses ─────────────────────────────
			
 
				+
			
 
				+describe("OpenAIEmbeddingsProvider — malformed responses", () => {
			
 
				+  test("malformed JSON → null result", async () => {
			
 
				+    const { fetchImpl } = makeFetchSequence([
			
 
				+      () => new Response("not-json{}", { status: 200 }),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+      retryBackoffsMs: [],
			
 
				+      sleep: async () => {},
			
 
				+    });
			
 
				+    const r = await p.embed("x");
			
 
				+    expect(r).toBeNull();
			
 
				+  });
			
 
				+
			
 
				+  test("missing data array → null result", async () => {
			
 
				+    const { fetchImpl } = makeFetchSequence([
			
 
				+      () => mockResponse(200, { object: "list", model: "x" }),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+      retryBackoffsMs: [],
			
 
				+      sleep: async () => {},
			
 
				+    });
			
 
				+    const r = await p.embed("x");
			
 
				+    expect(r).toBeNull();
			
 
				+  });
			
 
				+
			
 
				+  test("data item index out of range → null result", async () => {
			
 
				+    const { fetchImpl } = makeFetchSequence([
			
 
				+      () =>
			
 
				+        mockResponse(200, {
			
 
				+          object: "list",
			
 
				+          data: [
			
 
				+            { index: 5, embedding: [0.1, 0.2] }, // out of range for 1 input
			
 
				+          ],
			
 
				+        }),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+      retryBackoffsMs: [],
			
 
				+      sleep: async () => {},
			
 
				+    });
			
 
				+    const r = await p.embed("x");
			
 
				+    expect(r).toBeNull();
			
 
				+  });
			
 
				+
			
 
				+  test("response handles out-of-order data array (sorts by index)", async () => {
			
 
				+    const { fetchImpl } = makeFetchSequence([
			
 
				+      () =>
			
 
				+        mockResponse(200, {
			
 
				+          object: "list",
			
 
				+          data: [
			
 
				+            { index: 1, embedding: [0.7, 0.8] },
			
 
				+            { index: 0, embedding: [0.1, 0.2] },
			
 
				+          ],
			
 
				+        }),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+    });
			
 
				+    const result = await p.embedBatch(["zero", "one"]);
			
 
				+    expect(result.length).toBe(2);
			
 
				+    expect(result[0]!.embedding).toEqual([0.1, 0.2]);
			
 
				+    expect(result[1]!.embedding).toEqual([0.7, 0.8]);
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+// ─────────────────────────── Timeout / abort ─────────────────────────────────
			
 
				+
			
 
				+describe("OpenAIEmbeddingsProvider — timeout and abort", () => {
			
 
				+  test("user abort signal → null result + no further calls", async () => {
			
 
				+    const { fetchImpl, calls } = makeFetchSequence([
			
 
				+      () => embeddingsResponse(["a", "b"], 2),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+      batchSize: 1,
			
 
				+    });
			
 
				+    const ctrl = new AbortController();
			
 
				+    ctrl.abort(new Error("user cancelled"));
			
 
				+    const result = await p.embedBatch(["a", "b"], { signal: ctrl.signal });
			
 
				+    expect(result).toEqual([null, null]);
			
 
				+    expect(calls.length).toBe(0); // signal aborted before first call
			
 
				+  });
			
 
				+
			
 
				+  test("per-attempt timeout aborts a slow request", async () => {
			
 
				+    let aborted = false;
			
 
				+    const fetchImpl = (async (_url: any, init?: RequestInit) => {
			
 
				+      return await new Promise<Response>((_resolve, reject) => {
			
 
				+        const sig = init?.signal;
			
 
				+        sig?.addEventListener("abort", () => {
			
 
				+          aborted = true;
			
 
				+          reject(new DOMException("aborted", "AbortError"));
			
 
				+        });
			
 
				+      });
			
 
				+    }) as typeof fetch;
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+      timeoutMs: 50,
			
 
				+      retryBackoffsMs: [],
			
 
				+      sleep: async () => {},
			
 
				+    });
			
 
				+    const r = await p.embed("hello");
			
 
				+    expect(r).toBeNull();
			
 
				+    expect(aborted).toBe(true);
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+// ─────────────────────────── Circuit breaker integration ─────────────────────
			
 
				+
			
 
				+describe("OpenAIEmbeddingsProvider — circuit breaker integration", () => {
			
 
				+  test("repeated failures eventually trip breaker → CircuitOpenError", async () => {
			
 
				+    // 4 chunks of size 1 = 4 sample slots. All fail with 401 → 4 failures → breaker opens.
			
 
				+    const { fetchImpl } = makeFetchSequence([
			
 
				+      () => mockResponse(401, "fail"),
			
 
				+      () => mockResponse(401, "fail"),
			
 
				+      () => mockResponse(401, "fail"),
			
 
				+      () => mockResponse(401, "fail"),
			
 
				+      () => mockResponse(401, "fail"), // shouldn't be reached
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+      batchSize: 1,
			
 
				+      retryBackoffsMs: [],
			
 
				+      sleep: async () => {},
			
 
				+    });
			
 
				+    // First call: 4 sub-chunks, all fail, breaker opens during 4th
			
 
				+    const result1 = await p.embedBatch(["a", "b", "c", "d"]);
			
 
				+    expect(result1.every((x) => x === null)).toBe(true);
			
 
				+    expect(p.breaker.getState()).toBe("open");
			
 
				+
			
 
				+    // Second call: breaker fails fast
			
 
				+    await expect(p.embedBatch(["e"])).rejects.toBeInstanceOf(CircuitOpenError);
			
 
				+  });
			
 
				+
			
 
				+  test("breaker recovers after openDuration → success closes it", async () => {
			
 
				+    let now = 1_000_000;
			
 
				+    const { fetchImpl } = makeFetchSequence([
			
 
				+      () => mockResponse(401, "fail"),
			
 
				+      () => mockResponse(401, "fail"),
			
 
				+      () => mockResponse(401, "fail"),
			
 
				+      () => mockResponse(401, "fail"),
			
 
				+      () => embeddingsResponse(["recovered"], 2),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+      batchSize: 1,
			
 
				+      retryBackoffsMs: [],
			
 
				+      sleep: async () => {},
			
 
				+      now: () => now,
			
 
				+    });
			
 
				+    // Override breaker with a shorter open duration
			
 
				+    (p as any).breaker = new CircuitBreaker({
			
 
				+      minSamples: 4,
			
 
				+      threshold: 0.5,
			
 
				+      openDurationMs: 1000,
			
 
				+      now: () => now,
			
 
				+    });
			
 
				+    await p.embedBatch(["a", "b", "c", "d"]);
			
 
				+    expect((p as any).breaker.getState()).toBe("open");
			
 
				+    now += 1500;
			
 
				+    expect((p as any).breaker.getState()).toBe("half-open");
			
 
				+    const r = await p.embed("recovered");
			
 
				+    expect(r).not.toBeNull();
			
 
				+    expect((p as any).breaker.getState()).toBe("closed");
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+// ─────────────────────────── Healthcheck ─────────────────────────────────────
			
 
				+
			
 
				+describe("OpenAIEmbeddingsProvider — healthcheck", () => {
			
 
				+  test("healthcheck pings GET /health when available", async () => {
			
 
				+    const { fetchImpl, calls } = makeFetchSequence([
			
 
				+      () =>
			
 
				+        mockResponse(200, {
			
 
				+          status: "ok",
			
 
				+          model: "embeddinggemma:300m",
			
 
				+        }),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+    });
			
 
				+    const h = await p.healthcheck();
			
 
				+    expect(h.ok).toBe(true);
			
 
				+    expect(calls.length).toBe(1);
			
 
				+    expect(calls[0]!.url).toBe("https://ai.example.com/health");
			
 
				+    expect(calls[0]!.init!.method).toBe("GET");
			
 
				+  });
			
 
				+
			
 
				+  test("healthcheck failure → falls through to embed probe", async () => {
			
 
				+    const { fetchImpl } = makeFetchSequence([
			
 
				+      () => mockResponse(404, "no /health"),
			
 
				+      // Then fall back to /v1/embeddings probe
			
 
				+      () => embeddingsResponse(["healthcheck"], 4),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+    });
			
 
				+    const h = await p.healthcheck();
			
 
				+    // 404 isn't an exception, it returns ok:false from the /health branch
			
 
				+    // The fallback probe is only triggered on actual exceptions.
			
 
				+    expect(h.ok).toBe(false);
			
 
				+    expect(h.detail).toContain("404");
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+// ─────────────────────────── HttpError ───────────────────────────────────────
			
 
				+
			
 
				+describe("HttpError", () => {
			
 
				+  test("preserves status and body preview", () => {
			
 
				+    const err = new HttpError(429, "rate limit exceeded");
			
 
				+    expect(err.status).toBe(429);
			
 
				+    expect(err.bodyPreview).toBe("rate limit exceeded");
			
 
				+    expect(err.message).toContain("HTTP 429");
			
 
				+  });
			
 
				+  test("truncates long bodies in message", () => {
			
 
				+    const longBody = "x".repeat(500);
			
 
				+    const err = new HttpError(500, longBody);
			
 
				+    expect(err.message.length).toBeLessThan(longBody.length + 200);
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+// ─────────────────────────── dispose ─────────────────────────────────────────
			
 
				+
			
 
				+describe("OpenAIEmbeddingsProvider — dispose", () => {
			
 
				+  test("dispose resets the breaker", async () => {
			
 
				+    const { fetchImpl } = makeFetchSequence([
			
 
				+      () => mockResponse(401, "fail"),
			
 
				+      () => mockResponse(401, "fail"),
			
 
				+      () => mockResponse(401, "fail"),
			
 
				+      () => mockResponse(401, "fail"),
			
 
				+    ]);
			
 
				+    const p = new OpenAIEmbeddingsProvider({
			
 
				+      endpoint: "https://ai.example.com",
			
 
				+      fetchImpl,
			
 
				+      batchSize: 1,
			
 
				+      retryBackoffsMs: [],
			
 
				+      sleep: async () => {},
			
 
				+    });
			
 
				+    await p.embedBatch(["a", "b", "c", "d"]);
			
 
				+    expect(p.breaker.getState()).toBe("open");
			
 
				+    await p.dispose();
			
 
				+    expect(p.breaker.getState()).toBe("closed");
			
 
				+  });
			
 
				+});
			
--- a/test/embedding-provider.test.ts
+++ b/test/embedding-provider.test.ts
@@ -0,0 +1,63 @@
 
				+/**
			
 
				+ * embedding-provider.test.ts - Tests for the EmbeddingProvider abstraction
			
 
				+ * (interface-level: model-id guard, ModelMismatchError).
			
 
				+ */
			
 
				+
			
 
				+import { describe, test, expect } from "vitest";
			
 
				+import {
			
 
				+  ModelMismatchError,
			
 
				+  assertModelCompatible,
			
 
				+} from "../src/embedding/provider.js";
			
 
				+
			
 
				+describe("assertModelCompatible", () => {
			
 
				+  test("empty existingModels → no throw (fresh DB)", () => {
			
 
				+    expect(() => assertModelCompatible("anything", [])).not.toThrow();
			
 
				+  });
			
 
				+
			
 
				+  test("matching model id → no throw", () => {
			
 
				+    expect(() =>
			
 
				+      assertModelCompatible("embeddinggemma", ["embeddinggemma"]),
			
 
				+    ).not.toThrow();
			
 
				+  });
			
 
				+
			
 
				+  test("matching one of several → no throw", () => {
			
 
				+    expect(() =>
			
 
				+      assertModelCompatible("embeddinggemma", ["embeddinggemma", "qwen3-embed"]),
			
 
				+    ).not.toThrow();
			
 
				+  });
			
 
				+
			
 
				+  test("mismatch throws ModelMismatchError", () => {
			
 
				+    expect(() =>
			
 
				+      assertModelCompatible("openai-text-embedding", ["embeddinggemma"]),
			
 
				+    ).toThrow(ModelMismatchError);
			
 
				+  });
			
 
				+
			
 
				+  test("error message lists existing models + provider model", () => {
			
 
				+    try {
			
 
				+      assertModelCompatible("provider-x", ["model-a", "model-b"]);
			
 
				+      throw new Error("should have thrown");
			
 
				+    } catch (err) {
			
 
				+      expect(err).toBeInstanceOf(ModelMismatchError);
			
 
				+      const e = err as ModelMismatchError;
			
 
				+      expect(e.providerModel).toBe("provider-x");
			
 
				+      expect(e.existingModels).toEqual(["model-a", "model-b"]);
			
 
				+      expect(e.message).toContain("provider-x");
			
 
				+      expect(e.message).toContain("model-a");
			
 
				+      expect(e.message).toContain("qmd embed -f");
			
 
				+      expect(e.message).toContain("QMD_EMBED_MODEL_ID");
			
 
				+    }
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+describe("ModelMismatchError", () => {
			
 
				+  test("name set correctly", () => {
			
 
				+    const err = new ModelMismatchError("a", ["b"]);
			
 
				+    expect(err.name).toBe("ModelMismatchError");
			
 
				+  });
			
 
				+
			
 
				+  test("instanceof Error", () => {
			
 
				+    const err = new ModelMismatchError("a", ["b"]);
			
 
				+    expect(err).toBeInstanceOf(Error);
			
 
				+    expect(err).toBeInstanceOf(ModelMismatchError);
			
 
				+  });
			
 
				+});
			
--- a/test/embedding-store-integration.test.ts
+++ b/test/embedding-store-integration.test.ts
@@ -0,0 +1,216 @@
 
				+/**
			
 
				+ * embedding-store-integration.test.ts - Tests for the
			
 
				+ * generateEmbeddings() / EmbeddingProvider integration in store.ts.
			
 
				+ *
			
 
				+ * Uses an in-memory SQLite + a stub EmbeddingProvider to avoid loading
			
 
				+ * node-llama-cpp models. Verifies:
			
 
				+ *   - Provider's embedBatch is called when options.embedProvider is set
			
 
				+ *   - Model-id guard throws ModelMismatchError on mismatch
			
 
				+ *   - Force re-embed bypasses the guard
			
 
				+ *   - getDistinctEmbeddingModels reads content_vectors correctly
			
 
				+ */
			
 
				+
			
 
				+import { describe, test, expect, beforeEach, afterEach } from "vitest";
			
 
				+import { mkdtempSync, rmSync } from "node:fs";
			
 
				+import { tmpdir } from "node:os";
			
 
				+import { join } from "node:path";
			
 
				+import {
			
 
				+  createStore,
			
 
				+  generateEmbeddings,
			
 
				+  getDistinctEmbeddingModels,
			
 
				+  insertEmbedding,
			
 
				+  type Store,
			
 
				+} from "../src/store.js";
			
 
				+import {
			
 
				+  ModelMismatchError,
			
 
				+  type EmbeddingProvider,
			
 
				+  type ProviderEmbedding,
			
 
				+  type ProviderHealth,
			
 
				+} from "../src/embedding/provider.js";
			
 
				+
			
 
				+// ─────────────────────────── Stub provider ───────────────────────────────────
			
 
				+
			
 
				+class StubProvider implements EmbeddingProvider {
			
 
				+  readonly kind = "openai" as const;
			
 
				+  readonly modelId: string;
			
 
				+  readonly dim: number;
			
 
				+  embedBatchCalls = 0;
			
 
				+  embedCalls = 0;
			
 
				+  totalTextsEmbedded = 0;
			
 
				+
			
 
				+  constructor(modelId: string, dim = 4) {
			
 
				+    this.modelId = modelId;
			
 
				+    this.dim = dim;
			
 
				+  }
			
 
				+
			
 
				+  getModelId(): string {
			
 
				+    return this.modelId;
			
 
				+  }
			
 
				+  getDimensions(): number | undefined {
			
 
				+    return this.dim;
			
 
				+  }
			
 
				+  async healthcheck(): Promise<ProviderHealth> {
			
 
				+    return { ok: true, model: this.modelId, dimensions: this.dim };
			
 
				+  }
			
 
				+  async embed(text: string): Promise<ProviderEmbedding | null> {
			
 
				+    this.embedCalls++;
			
 
				+    this.totalTextsEmbedded++;
			
 
				+    return { embedding: this.fakeEmbed(text), model: this.modelId };
			
 
				+  }
			
 
				+  async embedBatch(texts: string[]): Promise<(ProviderEmbedding | null)[]> {
			
 
				+    this.embedBatchCalls++;
			
 
				+    this.totalTextsEmbedded += texts.length;
			
 
				+    return texts.map((t) => ({ embedding: this.fakeEmbed(t), model: this.modelId }));
			
 
				+  }
			
 
				+  async dispose(): Promise<void> {}
			
 
				+
			
 
				+  private fakeEmbed(text: string): number[] {
			
 
				+    return Array.from({ length: this.dim }, (_, i) => (text.length + i) * 0.01);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// ─────────────────────────── Test setup ──────────────────────────────────────
			
 
				+
			
 
				+let workDir: string;
			
 
				+let store: Store;
			
 
				+
			
 
				+beforeEach(() => {
			
 
				+  workDir = mkdtempSync(join(tmpdir(), "qmd-store-int-test-"));
			
 
				+  process.env.INDEX_PATH = join(workDir, "index.sqlite");
			
 
				+  store = createStore(process.env.INDEX_PATH);
			
 
				+  // Insert content + documents with the bare-minimum schema. The content
			
 
				+  // body needs to be non-empty so chunkDocumentByTokens emits at least one
			
 
				+  // chunk per doc.
			
 
				+  const now = "2026-04-27T00:00:00Z";
			
 
				+  store.db
			
 
				+    .prepare(`INSERT INTO content (hash, doc, created_at) VALUES (?, ?, ?)`)
			
 
				+    .run("hash1", "Document one body content here that is long enough to chunk.", now);
			
 
				+  store.db
			
 
				+    .prepare(`INSERT INTO content (hash, doc, created_at) VALUES (?, ?, ?)`)
			
 
				+    .run("hash2", "Document two body content there with different words to chunk.", now);
			
 
				+  store.db
			
 
				+    .prepare(
			
 
				+      `INSERT INTO documents (hash, collection, path, title, created_at, modified_at, active) VALUES (?, ?, ?, ?, ?, ?, ?)`,
			
 
				+    )
			
 
				+    .run("hash1", "test", "one.md", "One", now, now, 1);
			
 
				+  store.db
			
 
				+    .prepare(
			
 
				+      `INSERT INTO documents (hash, collection, path, title, created_at, modified_at, active) VALUES (?, ?, ?, ?, ?, ?, ?)`,
			
 
				+    )
			
 
				+    .run("hash2", "test", "two.md", "Two", now, now, 1);
			
 
				+});
			
 
				+
			
 
				+afterEach(() => {
			
 
				+  try {
			
 
				+    store.close();
			
 
				+  } catch { /* ignore */ }
			
 
				+  delete process.env.INDEX_PATH;
			
 
				+  rmSync(workDir, { recursive: true, force: true });
			
 
				+});
			
 
				+
			
 
				+// ─────────────────────────── getDistinctEmbeddingModels ──────────────────────
			
 
				+
			
 
				+describe("getDistinctEmbeddingModels", () => {
			
 
				+  test("returns [] when content_vectors is empty", () => {
			
 
				+    expect(getDistinctEmbeddingModels(store.db)).toEqual([]);
			
 
				+  });
			
 
				+
			
 
				+  test("returns distinct model strings", () => {
			
 
				+    store.ensureVecTable(4);
			
 
				+    insertEmbedding(store.db, "hash1", 0, 0, new Float32Array([0.1, 0.2, 0.3, 0.4]), "embeddinggemma", "2026-04-27T00:00:00Z");
			
 
				+    insertEmbedding(store.db, "hash2", 0, 0, new Float32Array([0.5, 0.6, 0.7, 0.8]), "embeddinggemma", "2026-04-27T00:00:00Z");
			
 
				+    expect(getDistinctEmbeddingModels(store.db)).toEqual(["embeddinggemma"]);
			
 
				+  });
			
 
				+
			
 
				+  test("returns multiple distinct models when present", () => {
			
 
				+    store.ensureVecTable(4);
			
 
				+    insertEmbedding(store.db, "hash1", 0, 0, new Float32Array([0.1, 0.2, 0.3, 0.4]), "model-a", "2026-04-27T00:00:00Z");
			
 
				+    insertEmbedding(store.db, "hash2", 0, 0, new Float32Array([0.5, 0.6, 0.7, 0.8]), "model-b", "2026-04-27T00:00:00Z");
			
 
				+    const models = getDistinctEmbeddingModels(store.db).sort();
			
 
				+    expect(models).toEqual(["model-a", "model-b"]);
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+// ─────────────────────────── generateEmbeddings + provider ───────────────────
			
 
				+
			
 
				+describe("generateEmbeddings with EmbeddingProvider", () => {
			
 
				+  test("uses provider.embedBatch when supplied", async () => {
			
 
				+    const provider = new StubProvider("embeddinggemma", 4);
			
 
				+    const result = await generateEmbeddings(store, {
			
 
				+      embedProvider: provider,
			
 
				+      // Use small batches to keep test fast
			
 
				+      maxDocsPerBatch: 64,
			
 
				+    });
			
 
				+    expect(result.docsProcessed).toBe(2);
			
 
				+    expect(result.chunksEmbedded).toBeGreaterThan(0);
			
 
				+    expect(result.errors).toBe(0);
			
 
				+    expect(provider.embedBatchCalls + provider.embedCalls).toBeGreaterThan(0);
			
 
				+    expect(provider.totalTextsEmbedded).toBeGreaterThan(0);
			
 
				+  });
			
 
				+
			
 
				+  test("model-id guard throws ModelMismatchError on mismatch", async () => {
			
 
				+    // Pre-populate content_vectors with a different model id
			
 
				+    store.ensureVecTable(4);
			
 
				+    insertEmbedding(
			
 
				+      store.db,
			
 
				+      "hash1",
			
 
				+      0,
			
 
				+      0,
			
 
				+      new Float32Array([0.1, 0.2, 0.3, 0.4]),
			
 
				+      "old-model",
			
 
				+      "2026-04-27T00:00:00Z",
			
 
				+    );
			
 
				+    const provider = new StubProvider("new-model", 4);
			
 
				+    await expect(
			
 
				+      generateEmbeddings(store, { embedProvider: provider }),
			
 
				+    ).rejects.toBeInstanceOf(ModelMismatchError);
			
 
				+  });
			
 
				+
			
 
				+  test("model-id matches → proceeds", async () => {
			
 
				+    store.ensureVecTable(4);
			
 
				+    insertEmbedding(
			
 
				+      store.db,
			
 
				+      "hash1",
			
 
				+      0,
			
 
				+      0,
			
 
				+      new Float32Array([0.1, 0.2, 0.3, 0.4]),
			
 
				+      "embeddinggemma",
			
 
				+      "2026-04-27T00:00:00Z",
			
 
				+    );
			
 
				+    const provider = new StubProvider("embeddinggemma", 4);
			
 
				+    const result = await generateEmbeddings(store, { embedProvider: provider });
			
 
				+    // Only hash2 needs embedding (hash1 already has one)
			
 
				+    expect(result.docsProcessed).toBeLessThanOrEqual(2);
			
 
				+    expect(result.errors).toBe(0);
			
 
				+  });
			
 
				+
			
 
				+  test("force=true bypasses model-id guard", async () => {
			
 
				+    store.ensureVecTable(4);
			
 
				+    insertEmbedding(
			
 
				+      store.db,
			
 
				+      "hash1",
			
 
				+      0,
			
 
				+      0,
			
 
				+      new Float32Array([0.1, 0.2, 0.3, 0.4]),
			
 
				+      "old-model",
			
 
				+      "2026-04-27T00:00:00Z",
			
 
				+    );
			
 
				+    const provider = new StubProvider("new-model", 4);
			
 
				+    // force=true wipes content_vectors first → guard sees empty → no throw
			
 
				+    const result = await generateEmbeddings(store, {
			
 
				+      embedProvider: provider,
			
 
				+      force: true,
			
 
				+    });
			
 
				+    expect(result.docsProcessed).toBe(2);
			
 
				+    expect(result.errors).toBe(0);
			
 
				+    // Now only "new-model" should be in the DB
			
 
				+    expect(getDistinctEmbeddingModels(store.db)).toEqual(["new-model"]);
			
 
				+  });
			
 
				+
			
 
				+  test("empty DB → no guard issue, anything goes", async () => {
			
 
				+    expect(getDistinctEmbeddingModels(store.db)).toEqual([]);
			
 
				+    const provider = new StubProvider("anything-id", 4);
			
 
				+    const result = await generateEmbeddings(store, { embedProvider: provider });
			
 
				+    expect(result.errors).toBe(0);
			
 
				+  });
			
 
				+});