openai.d.ts 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. /**
  2. * openai.ts - OpenAI-compatible HTTP embedding provider
  3. *
  4. * Talks to any endpoint that implements `POST /v1/embeddings` with the OpenAI
  5. * shape: request `{model, input: string|string[]}`, response
  6. * `{data: [{embedding: number[], index: number}, ...]}`.
  7. *
  8. * Used by qmd to delegate embeddings to a GPU worker (e.g. ai.mm.mk →
  9. * qmd-embed-worker on `models` LXC, RTX 4090) instead of running
  10. * node-llama-cpp locally.
  11. *
  12. * Features:
  13. * - Batches input in groups of ≤64 (configurable via QMD_EMBED_BATCH_SIZE)
  14. * - Retries 429 / 503 with exponential backoff (1s, 4s, 16s)
  15. * - 4xx (non-429) → no retry, count as failure
  16. * - Circuit breaker: >50% failures in a 60s window → OPEN for 5 min,
  17. * callers can use this to fall back to a local provider
  18. * - Per-call timeout via AbortSignal (default QMD_EMBED_TIMEOUT_MS=30000)
  19. * - Healthcheck via `GET /health` if available, else a probe embed call
  20. */
  21. import type { EmbeddingProvider, ProviderEmbedOptions, ProviderEmbedding, ProviderHealth, ProviderKind } from "./provider.js";
  22. /**
  23. * Default batch size — most OpenAI-compatible embedding endpoints accept up to
  24. * 2048 inputs per call but for memory and latency we cap at 64.
  25. */
  26. export declare const DEFAULT_BATCH_SIZE = 64;
  27. /**
  28. * Default per-request timeout (30 s). embeddinggemma-300M on RTX 4090 takes
  29. * <500ms per batch of 64 in practice; 30s is a safe upper bound.
  30. */
  31. export declare const DEFAULT_TIMEOUT_MS = 30000;
  32. /**
  33. * Retry backoff schedule (ms) for 429/503 responses. 3 attempts total
  34. * (initial + 2 retries) — aligns with issue spec "1s/4s/16s".
  35. */
  36. export declare const RETRY_BACKOFFS_MS: readonly number[];
  37. /**
  38. * Circuit breaker — flips OPEN when error rate exceeds threshold within
  39. * window. While OPEN, every call fails fast so the caller can fall back.
  40. */
  41. export declare const CIRCUIT_WINDOW_MS = 60000;
  42. export declare const CIRCUIT_OPEN_DURATION_MS: number;
  43. export declare const CIRCUIT_FAILURE_RATE_THRESHOLD = 0.5;
  44. export declare const CIRCUIT_MIN_SAMPLES = 4;
  45. export type OpenAIProviderConfig = {
  46. /** Endpoint base URL — e.g. "https://ai.mm.mk" (no trailing slash) */
  47. endpoint: string;
  48. /** Optional bearer token sent as `Authorization: Bearer ...` */
  49. apiKey?: string;
  50. /**
  51. * Stable model identifier to report up via `getModelId()`.
  52. * Defaults to "embeddinggemma" to match qmd's existing DB rows.
  53. */
  54. modelId?: string;
  55. /**
  56. * Upstream model name sent in the HTTP request body. Often differs from
  57. * `modelId` (e.g. modelId="embeddinggemma" but upstream model="embeddinggemma:300m").
  58. */
  59. upstreamModel?: string;
  60. /** Batch size cap (default DEFAULT_BATCH_SIZE = 64) */
  61. batchSize?: number;
  62. /** Per-request timeout in ms (default DEFAULT_TIMEOUT_MS = 30_000) */
  63. timeoutMs?: number;
  64. /** Custom fetch (for testing). Defaults to global `fetch`. */
  65. fetchImpl?: typeof fetch;
  66. /** Custom retry schedule (for testing). Defaults to RETRY_BACKOFFS_MS. */
  67. retryBackoffsMs?: readonly number[];
  68. /** Custom sleep impl (for testing). Defaults to setTimeout. */
  69. sleep?: (ms: number) => Promise<void>;
  70. /** Custom clock (for testing). Defaults to Date.now. */
  71. now?: () => number;
  72. };
  73. export type OpenAIEmbeddingsResponse = {
  74. object?: string;
  75. model?: string;
  76. data: Array<{
  77. object?: string;
  78. index: number;
  79. embedding: number[];
  80. }>;
  81. usage?: {
  82. prompt_tokens?: number;
  83. total_tokens?: number;
  84. };
  85. };
  86. /**
  87. * Circuit breaker state — exported for tests
  88. */
  89. export type CircuitState = "closed" | "open" | "half-open";
  90. /**
  91. * Determine whether an HTTP status is retryable. 429 (Too Many Requests)
  92. * and 503 (Service Unavailable) are retried; 4xx (other than 429) are not.
  93. */
  94. export declare function isRetryableStatus(status: number): boolean;
  95. /**
  96. * Chunk an array into pieces of ≤ size each. `size` MUST be ≥ 1.
  97. */
  98. export declare function chunkArray<T>(items: T[], size: number): T[][];
  99. /**
  100. * Sliding-window circuit breaker. Tracks the last N samples (min 4) over a
  101. * 60-second window; flips OPEN when failure rate exceeds 50%, then auto-
  102. * resets to HALF-OPEN after 5 minutes — at which point the next probe
  103. * decides whether to close (success) or re-open (failure).
  104. */
  105. export declare class CircuitBreaker {
  106. private samples;
  107. private state;
  108. private openedAt;
  109. private readonly windowMs;
  110. private readonly openDurationMs;
  111. private readonly threshold;
  112. private readonly minSamples;
  113. private readonly now;
  114. constructor(opts?: {
  115. windowMs?: number;
  116. openDurationMs?: number;
  117. threshold?: number;
  118. minSamples?: number;
  119. now?: () => number;
  120. });
  121. getState(): CircuitState;
  122. /**
  123. * Returns true when calls should be short-circuited (skip HTTP, fall back).
  124. * Side-effects: may transition OPEN → HALF-OPEN if the open window expired.
  125. */
  126. shouldFailFast(): boolean;
  127. /** Record a successful call. */
  128. recordSuccess(): void;
  129. /** Record a failed call. May trigger OPEN. */
  130. recordFailure(): void;
  131. /** Force-reset the breaker (used by tests / admin) */
  132. reset(): void;
  133. private pushSample;
  134. private evaluate;
  135. private tickAutoReset;
  136. }
  137. /**
  138. * Raised when the circuit breaker is OPEN and a call is short-circuited.
  139. * Callers (e.g. fallback wrapper) can catch this to switch to local provider.
  140. */
  141. export declare class CircuitOpenError extends Error {
  142. constructor(message?: string);
  143. }
  144. /**
  145. * Persistent (non-retryable) HTTP error from upstream. Includes status code.
  146. */
  147. export declare class HttpError extends Error {
  148. readonly status: number;
  149. readonly bodyPreview: string;
  150. constructor(status: number, bodyPreview: string);
  151. }
  152. export declare class OpenAIEmbeddingsProvider implements EmbeddingProvider {
  153. readonly kind: ProviderKind;
  154. private readonly endpoint;
  155. private readonly apiKey?;
  156. private readonly modelId;
  157. private readonly upstreamModel;
  158. private readonly batchSize;
  159. private readonly timeoutMs;
  160. private readonly fetchImpl;
  161. private readonly retryBackoffsMs;
  162. private readonly sleep;
  163. private readonly now;
  164. private dimensions;
  165. readonly breaker: CircuitBreaker;
  166. constructor(config: OpenAIProviderConfig);
  167. getModelId(): string;
  168. getDimensions(): number | undefined;
  169. healthcheck(signal?: AbortSignal): Promise<ProviderHealth>;
  170. embed(text: string, options?: ProviderEmbedOptions): Promise<ProviderEmbedding | null>;
  171. embedBatch(texts: string[], options?: ProviderEmbedOptions): Promise<(ProviderEmbedding | null)[]>;
  172. dispose(): Promise<void>;
  173. private buildHeaders;
  174. /**
  175. * Single HTTP request with retry on 429/503. Returns embeddings indexed
  176. * the same as `texts`. Throws on non-retryable failure or all attempts
  177. * exhausted.
  178. */
  179. private requestWithRetry;
  180. /**
  181. * Issue one HTTP attempt to `POST /v1/embeddings`. Does NOT retry.
  182. */
  183. private requestOnce;
  184. }