/** * llm.ts - LLM abstraction layer for QMD * * Provides a clean interface for LLM operations with an Ollama implementation. * All raw fetch calls to LLM APIs should go through this module. */ // ============================================================================= // Types // ============================================================================= /** * Token with log probability */ export type TokenLogProb = { token: string; logprob: number; }; /** * Embedding result */ export type EmbeddingResult = { embedding: number[]; model: string; }; /** * Generation result with optional logprobs */ export type GenerateResult = { text: string; model: string; logprobs?: TokenLogProb[]; done: boolean; }; /** * Rerank result for a single document */ export type RerankDocumentResult = { file: string; relevant: boolean; confidence: number; score: number; rawToken: string; logprob: number; }; /** * Batch rerank result */ export type RerankResult = { results: RerankDocumentResult[]; model: string; }; /** * Model info */ export type ModelInfo = { name: string; exists: boolean; size?: number; modifiedAt?: string; }; /** * Options for embedding */ export type EmbedOptions = { model: string; isQuery?: boolean; title?: string; }; /** * Options for text generation */ export type GenerateOptions = { model: string; maxTokens?: number; temperature?: number; logprobs?: boolean; raw?: boolean; stop?: string[]; }; /** * Options for reranking */ export type RerankOptions = { model: string; batchSize?: number; }; /** * Document to rerank */ export type RerankDocument = { file: string; text: string; title?: string; }; // ============================================================================= // LLM Interface // ============================================================================= /** * Abstract LLM interface - implement this for different backends */ export interface LLM { /** * Get embeddings for text */ embed(text: string, options: EmbedOptions): Promise; /** * Generate text completion */ generate(prompt: string, options: GenerateOptions): Promise; /** * Check if a model exists */ modelExists(model: string): Promise; /** * Pull a model (download if not available) */ pullModel(model: string, onProgress?: (progress: number) => void): Promise; // ========================================================================== // High-level abstractions // ========================================================================== /** * Expand a search query into multiple variations */ expandQuery(query: string, model: string, numVariations?: number): Promise; /** * Rerank documents by relevance to a query * Returns list of documents with relevance scores and boolean judgments */ rerank(query: string, documents: RerankDocument[], options: RerankOptions): Promise; /** * Quick relevance check - returns just boolean judgments with logprobs * More efficient than full rerank when you just need yes/no */ rerankerLogprobsCheck(query: string, documents: RerankDocument[], options: RerankOptions): Promise; } // ============================================================================= // Ollama Implementation // ============================================================================= export type OllamaConfig = { baseUrl?: string; defaultEmbedModel?: string; defaultGenerateModel?: string; defaultRerankModel?: string; }; const DEFAULT_OLLAMA_URL = "http://localhost:11434"; const DEFAULT_EMBED_MODEL = "embeddinggemma"; const DEFAULT_GENERATE_MODEL = "qwen3:0.6b"; const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0"; /** * Format text for embedding query */ export function formatQueryForEmbedding(query: string): string { return `task: search result | query: ${query}`; } /** * Format text for embedding document */ export function formatDocForEmbedding(text: string, title?: string): string { return `title: ${title || "none"} | text: ${text}`; } /** * Ollama LLM implementation */ export class Ollama implements LLM { private baseUrl: string; private defaultEmbedModel: string; private defaultGenerateModel: string; private defaultRerankModel: string; constructor(config: OllamaConfig = {}) { this.baseUrl = config.baseUrl || process.env.OLLAMA_URL || DEFAULT_OLLAMA_URL; this.defaultEmbedModel = config.defaultEmbedModel || DEFAULT_EMBED_MODEL; this.defaultGenerateModel = config.defaultGenerateModel || DEFAULT_GENERATE_MODEL; this.defaultRerankModel = config.defaultRerankModel || DEFAULT_RERANK_MODEL; } /** * Get the base URL for this Ollama instance */ getBaseUrl(): string { return this.baseUrl; } // ========================================================================== // Core API methods // ========================================================================== async embed(text: string, options: EmbedOptions): Promise { const model = options.model || this.defaultEmbedModel; const formatted = options.isQuery ? formatQueryForEmbedding(text) : formatDocForEmbedding(text, options.title); try { const response = await fetch(`${this.baseUrl}/api/embed`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ model, input: formatted }), }); if (!response.ok) { return null; } const data = await response.json() as { embeddings?: number[][] }; if (!data.embeddings?.[0]) { return null; } return { embedding: data.embeddings[0], model, }; } catch { return null; } } async generate(prompt: string, options: GenerateOptions): Promise { const model = options.model || this.defaultGenerateModel; const requestBody: Record = { model, prompt, stream: false, options: { num_predict: options.maxTokens ?? 150, temperature: options.temperature ?? 0, }, }; if (options.logprobs) { requestBody.logprobs = true; } if (options.raw) { requestBody.raw = true; } if (options.stop) { (requestBody.options as Record).stop = options.stop; } try { const response = await fetch(`${this.baseUrl}/api/generate`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody), }); if (!response.ok) { return null; } const data = await response.json() as { response?: string; done?: boolean; logprobs?: { tokens?: string[]; token_logprobs?: number[] }; }; // Parse logprobs if present let logprobs: TokenLogProb[] | undefined; if (data.logprobs?.tokens && data.logprobs?.token_logprobs) { logprobs = data.logprobs.tokens.map((token, i) => ({ token, logprob: data.logprobs!.token_logprobs![i], })); } return { text: data.response || "", model, logprobs, done: data.done ?? true, }; } catch { return null; } } async modelExists(model: string): Promise { try { const response = await fetch(`${this.baseUrl}/api/show`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ name: model }), }); if (!response.ok) { return { name: model, exists: false }; } const data = await response.json() as { size?: number; modified_at?: string; }; return { name: model, exists: true, size: data.size, modifiedAt: data.modified_at, }; } catch { return { name: model, exists: false }; } } async pullModel(model: string, onProgress?: (progress: number) => void): Promise { try { const response = await fetch(`${this.baseUrl}/api/pull`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ name: model, stream: false }), }); if (!response.ok) { return false; } // For non-streaming, we just wait for completion await response.json(); onProgress?.(100); return true; } catch { return false; } } // ========================================================================== // High-level abstractions // ========================================================================== async expandQuery(query: string, model?: string, numVariations: number = 2): Promise { const useModel = model || this.defaultGenerateModel; const prompt = `You are a search query expander. Given a search query, generate ${numVariations} alternative queries that would help find relevant documents. Rules: - Use synonyms and related terminology (e.g., "craft" → "craftsmanship", "quality", "excellence") - Rephrase to capture different angles (e.g., "engineering culture" → "technical excellence", "developer practices") - Keep proper nouns and named concepts exactly as written (e.g., "Build a Business", "Stripe", "Shopify") - Each variation should be 3-8 words, natural search terms - Do NOT just append words like "search" or "find" or "documents" Query: "${query}" Output exactly ${numVariations} variations, one per line, no numbering or bullets:`; const result = await this.generate(prompt, { model: useModel, maxTokens: 150, temperature: 0, }); if (!result) { return [query]; } // Parse response - filter out thinking tags and clean up const cleanText = result.text.replace(/[\s\S]*?<\/think>/g, "").trim(); const lines = cleanText .split("\n") .map((l) => l.trim()) .filter((l) => l.length > 2 && l.length < 100 && !l.startsWith("<")); return [query, ...lines.slice(0, numVariations)]; } async rerank( query: string, documents: RerankDocument[], options: RerankOptions ): Promise { const results = await this.rerankerLogprobsCheck(query, documents, options); return { results: results.sort((a, b) => b.score - a.score), model: options.model || this.defaultRerankModel, }; } async rerankerLogprobsCheck( query: string, documents: RerankDocument[], options: RerankOptions ): Promise { const model = options.model || this.defaultRerankModel; const batchSize = options.batchSize || 5; const results: RerankDocumentResult[] = []; // Process in batches for (let i = 0; i < documents.length; i += batchSize) { const batch = documents.slice(i, i + batchSize); const batchResults = await Promise.all( batch.map((doc) => this.rerankSingle(query, doc, model)) ); results.push(...batchResults); } return results; } /** * Rerank a single document - internal helper */ private async rerankSingle( query: string, doc: RerankDocument, model: string ): Promise { const systemPrompt = `Judge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be "yes" or "no".`; const instruct = `Given a search query, determine if the following document is relevant to the query. Consider both direct matches and related concepts.`; const docTitle = doc.title || doc.file.split("/").pop()?.replace(/\.md$/, "") || doc.file; const docPreview = doc.text.length > 4000 ? doc.text.substring(0, 4000) + "..." : doc.text; // Qwen3-reranker prompt format with empty think tags const prompt = `<|im_start|>system ${systemPrompt}<|im_end|> <|im_start|>user : ${instruct} : ${query} : ${docTitle} : ${docPreview}<|im_end|> <|im_start|>assistant `; const result = await this.generate(prompt, { model, maxTokens: 1, temperature: 0, logprobs: true, raw: true, }); if (!result) { return { file: doc.file, relevant: false, confidence: 0, score: 0, rawToken: "", logprob: 0, }; } return this.parseRerankResponse(doc.file, result); } /** * Parse rerank response into structured result */ private parseRerankResponse(file: string, result: GenerateResult): RerankDocumentResult { const token = result.text.toLowerCase().trim(); const logprob = result.logprobs?.[0]?.logprob ?? 0; const confidence = Math.exp(logprob); let relevant: boolean; let score: number; if (token.startsWith("yes")) { relevant = true; // Score: 0.5 base + up to 0.5 from confidence score = 0.5 + 0.5 * confidence; } else if (token.startsWith("no")) { relevant = false; // Score: up to 0.5 based on uncertainty (1 - confidence) score = 0.5 * (1 - confidence); } else { // Unknown token - neutral score relevant = false; score = 0.3; } return { file, relevant, confidence, score, rawToken: result.logprobs?.[0]?.token ?? token, logprob, }; } } // ============================================================================= // Singleton for default Ollama instance // ============================================================================= let defaultOllama: Ollama | null = null; /** * Get the default Ollama instance (creates one if needed) */ export function getDefaultOllama(): Ollama { if (!defaultOllama) { defaultOllama = new Ollama(); } return defaultOllama; } /** * Set a custom default Ollama instance (useful for testing) */ export function setDefaultOllama(ollama: Ollama | null): void { defaultOllama = ollama; }