| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539 |
- /**
- * llm.ts - LLM abstraction layer for QMD
- *
- * Provides a clean interface for LLM operations with an Ollama implementation.
- * All raw fetch calls to LLM APIs should go through this module.
- */
- // =============================================================================
- // Types
- // =============================================================================
- /**
- * Token with log probability
- */
- export type TokenLogProb = {
- token: string;
- logprob: number;
- };
- /**
- * Embedding result
- */
- export type EmbeddingResult = {
- embedding: number[];
- model: string;
- };
- /**
- * Generation result with optional logprobs
- */
- export type GenerateResult = {
- text: string;
- model: string;
- logprobs?: TokenLogProb[];
- done: boolean;
- };
- /**
- * Rerank result for a single document
- */
- export type RerankDocumentResult = {
- file: string;
- relevant: boolean;
- confidence: number;
- score: number;
- rawToken: string;
- logprob: number;
- };
- /**
- * Batch rerank result
- */
- export type RerankResult = {
- results: RerankDocumentResult[];
- model: string;
- };
- /**
- * Model info
- */
- export type ModelInfo = {
- name: string;
- exists: boolean;
- size?: number;
- modifiedAt?: string;
- };
- /**
- * Options for embedding
- */
- export type EmbedOptions = {
- model: string;
- isQuery?: boolean;
- title?: string;
- };
- /**
- * Options for text generation
- */
- export type GenerateOptions = {
- model: string;
- maxTokens?: number;
- temperature?: number;
- logprobs?: boolean;
- raw?: boolean;
- stop?: string[];
- };
- /**
- * Options for reranking
- */
- export type RerankOptions = {
- model: string;
- batchSize?: number;
- };
- /**
- * Document to rerank
- */
- export type RerankDocument = {
- file: string;
- text: string;
- title?: string;
- };
- // =============================================================================
- // LLM Interface
- // =============================================================================
- /**
- * Abstract LLM interface - implement this for different backends
- */
- export interface LLM {
- /**
- * Get embeddings for text
- */
- embed(text: string, options: EmbedOptions): Promise<EmbeddingResult | null>;
- /**
- * Generate text completion
- */
- generate(prompt: string, options: GenerateOptions): Promise<GenerateResult | null>;
- /**
- * Check if a model exists
- */
- modelExists(model: string): Promise<ModelInfo>;
- /**
- * Pull a model (download if not available)
- */
- pullModel(model: string, onProgress?: (progress: number) => void): Promise<boolean>;
- // ==========================================================================
- // High-level abstractions
- // ==========================================================================
- /**
- * Expand a search query into multiple variations
- */
- expandQuery(query: string, model: string, numVariations?: number): Promise<string[]>;
- /**
- * Rerank documents by relevance to a query
- * Returns list of documents with relevance scores and boolean judgments
- */
- rerank(query: string, documents: RerankDocument[], options: RerankOptions): Promise<RerankResult>;
- /**
- * Quick relevance check - returns just boolean judgments with logprobs
- * More efficient than full rerank when you just need yes/no
- */
- rerankerLogprobsCheck(query: string, documents: RerankDocument[], options: RerankOptions): Promise<RerankDocumentResult[]>;
- }
- // =============================================================================
- // Ollama Implementation
- // =============================================================================
- export type OllamaConfig = {
- baseUrl?: string;
- defaultEmbedModel?: string;
- defaultGenerateModel?: string;
- defaultRerankModel?: string;
- };
- const DEFAULT_OLLAMA_URL = "http://localhost:11434";
- const DEFAULT_EMBED_MODEL = "embeddinggemma";
- const DEFAULT_GENERATE_MODEL = "qwen3:0.6b";
- const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0";
- /**
- * Format text for embedding query
- */
- export function formatQueryForEmbedding(query: string): string {
- return `task: search result | query: ${query}`;
- }
- /**
- * Format text for embedding document
- */
- export function formatDocForEmbedding(text: string, title?: string): string {
- return `title: ${title || "none"} | text: ${text}`;
- }
- /**
- * Ollama LLM implementation
- */
- export class Ollama implements LLM {
- private baseUrl: string;
- private defaultEmbedModel: string;
- private defaultGenerateModel: string;
- private defaultRerankModel: string;
- constructor(config: OllamaConfig = {}) {
- this.baseUrl = config.baseUrl || process.env.OLLAMA_URL || DEFAULT_OLLAMA_URL;
- this.defaultEmbedModel = config.defaultEmbedModel || DEFAULT_EMBED_MODEL;
- this.defaultGenerateModel = config.defaultGenerateModel || DEFAULT_GENERATE_MODEL;
- this.defaultRerankModel = config.defaultRerankModel || DEFAULT_RERANK_MODEL;
- }
- /**
- * Get the base URL for this Ollama instance
- */
- getBaseUrl(): string {
- return this.baseUrl;
- }
- // ==========================================================================
- // Core API methods
- // ==========================================================================
- async embed(text: string, options: EmbedOptions): Promise<EmbeddingResult | null> {
- const model = options.model || this.defaultEmbedModel;
- const formatted = options.isQuery
- ? formatQueryForEmbedding(text)
- : formatDocForEmbedding(text, options.title);
- try {
- const response = await fetch(`${this.baseUrl}/api/embed`, {
- method: "POST",
- headers: { "Content-Type": "application/json" },
- body: JSON.stringify({ model, input: formatted }),
- });
- if (!response.ok) {
- return null;
- }
- const data = await response.json() as { embeddings?: number[][] };
- if (!data.embeddings?.[0]) {
- return null;
- }
- return {
- embedding: data.embeddings[0],
- model,
- };
- } catch {
- return null;
- }
- }
- async generate(prompt: string, options: GenerateOptions): Promise<GenerateResult | null> {
- const model = options.model || this.defaultGenerateModel;
- const requestBody: Record<string, unknown> = {
- model,
- prompt,
- stream: false,
- options: {
- num_predict: options.maxTokens ?? 150,
- temperature: options.temperature ?? 0,
- },
- };
- if (options.logprobs) {
- requestBody.logprobs = true;
- }
- if (options.raw) {
- requestBody.raw = true;
- }
- if (options.stop) {
- (requestBody.options as Record<string, unknown>).stop = options.stop;
- }
- try {
- const response = await fetch(`${this.baseUrl}/api/generate`, {
- method: "POST",
- headers: { "Content-Type": "application/json" },
- body: JSON.stringify(requestBody),
- });
- if (!response.ok) {
- return null;
- }
- const data = await response.json() as {
- response?: string;
- done?: boolean;
- logprobs?: { tokens?: string[]; token_logprobs?: number[] };
- };
- // Parse logprobs if present
- let logprobs: TokenLogProb[] | undefined;
- if (data.logprobs?.tokens && data.logprobs?.token_logprobs) {
- logprobs = data.logprobs.tokens.map((token, i) => ({
- token,
- logprob: data.logprobs!.token_logprobs![i],
- }));
- }
- return {
- text: data.response || "",
- model,
- logprobs,
- done: data.done ?? true,
- };
- } catch {
- return null;
- }
- }
- async modelExists(model: string): Promise<ModelInfo> {
- try {
- const response = await fetch(`${this.baseUrl}/api/show`, {
- method: "POST",
- headers: { "Content-Type": "application/json" },
- body: JSON.stringify({ name: model }),
- });
- if (!response.ok) {
- return { name: model, exists: false };
- }
- const data = await response.json() as {
- size?: number;
- modified_at?: string;
- };
- return {
- name: model,
- exists: true,
- size: data.size,
- modifiedAt: data.modified_at,
- };
- } catch {
- return { name: model, exists: false };
- }
- }
- async pullModel(model: string, onProgress?: (progress: number) => void): Promise<boolean> {
- try {
- const response = await fetch(`${this.baseUrl}/api/pull`, {
- method: "POST",
- headers: { "Content-Type": "application/json" },
- body: JSON.stringify({ name: model, stream: false }),
- });
- if (!response.ok) {
- return false;
- }
- // For non-streaming, we just wait for completion
- await response.json();
- onProgress?.(100);
- return true;
- } catch {
- return false;
- }
- }
- // ==========================================================================
- // High-level abstractions
- // ==========================================================================
- async expandQuery(query: string, model?: string, numVariations: number = 2): Promise<string[]> {
- const useModel = model || this.defaultGenerateModel;
- const prompt = `You are a search query expander. Given a search query, generate ${numVariations} alternative queries that would help find relevant documents.
- Rules:
- - Use synonyms and related terminology (e.g., "craft" → "craftsmanship", "quality", "excellence")
- - Rephrase to capture different angles (e.g., "engineering culture" → "technical excellence", "developer practices")
- - Keep proper nouns and named concepts exactly as written (e.g., "Build a Business", "Stripe", "Shopify")
- - Each variation should be 3-8 words, natural search terms
- - Do NOT just append words like "search" or "find" or "documents"
- Query: "${query}"
- Output exactly ${numVariations} variations, one per line, no numbering or bullets:`;
- const result = await this.generate(prompt, {
- model: useModel,
- maxTokens: 150,
- temperature: 0,
- });
- if (!result) {
- return [query];
- }
- // Parse response - filter out thinking tags and clean up
- const cleanText = result.text.replace(/<think>[\s\S]*?<\/think>/g, "").trim();
- const lines = cleanText
- .split("\n")
- .map((l) => l.trim())
- .filter((l) => l.length > 2 && l.length < 100 && !l.startsWith("<"));
- return [query, ...lines.slice(0, numVariations)];
- }
- async rerank(
- query: string,
- documents: RerankDocument[],
- options: RerankOptions
- ): Promise<RerankResult> {
- const results = await this.rerankerLogprobsCheck(query, documents, options);
- return {
- results: results.sort((a, b) => b.score - a.score),
- model: options.model || this.defaultRerankModel,
- };
- }
- async rerankerLogprobsCheck(
- query: string,
- documents: RerankDocument[],
- options: RerankOptions
- ): Promise<RerankDocumentResult[]> {
- const model = options.model || this.defaultRerankModel;
- const batchSize = options.batchSize || 5;
- const results: RerankDocumentResult[] = [];
- // Process in batches
- for (let i = 0; i < documents.length; i += batchSize) {
- const batch = documents.slice(i, i + batchSize);
- const batchResults = await Promise.all(
- batch.map((doc) => this.rerankSingle(query, doc, model))
- );
- results.push(...batchResults);
- }
- return results;
- }
- /**
- * Rerank a single document - internal helper
- */
- private async rerankSingle(
- query: string,
- doc: RerankDocument,
- model: string
- ): Promise<RerankDocumentResult> {
- const systemPrompt = `Judge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be "yes" or "no".`;
- const instruct = `Given a search query, determine if the following document is relevant to the query. Consider both direct matches and related concepts.`;
- const docTitle = doc.title || doc.file.split("/").pop()?.replace(/\.md$/, "") || doc.file;
- const docPreview = doc.text.length > 4000 ? doc.text.substring(0, 4000) + "..." : doc.text;
- // Qwen3-reranker prompt format with empty think tags
- const prompt = `<|im_start|>system
- ${systemPrompt}<|im_end|>
- <|im_start|>user
- <Instruct>: ${instruct}
- <Query>: ${query}
- <Document Title>: ${docTitle}
- <Document>: ${docPreview}<|im_end|>
- <|im_start|>assistant
- <think>
- </think>
- `;
- const result = await this.generate(prompt, {
- model,
- maxTokens: 1,
- temperature: 0,
- logprobs: true,
- raw: true,
- });
- if (!result) {
- return {
- file: doc.file,
- relevant: false,
- confidence: 0,
- score: 0,
- rawToken: "",
- logprob: 0,
- };
- }
- return this.parseRerankResponse(doc.file, result);
- }
- /**
- * Parse rerank response into structured result
- */
- private parseRerankResponse(file: string, result: GenerateResult): RerankDocumentResult {
- const token = result.text.toLowerCase().trim();
- const logprob = result.logprobs?.[0]?.logprob ?? 0;
- const confidence = Math.exp(logprob);
- let relevant: boolean;
- let score: number;
- if (token.startsWith("yes")) {
- relevant = true;
- // Score: 0.5 base + up to 0.5 from confidence
- score = 0.5 + 0.5 * confidence;
- } else if (token.startsWith("no")) {
- relevant = false;
- // Score: up to 0.5 based on uncertainty (1 - confidence)
- score = 0.5 * (1 - confidence);
- } else {
- // Unknown token - neutral score
- relevant = false;
- score = 0.3;
- }
- return {
- file,
- relevant,
- confidence,
- score,
- rawToken: result.logprobs?.[0]?.token ?? token,
- logprob,
- };
- }
- }
- // =============================================================================
- // Singleton for default Ollama instance
- // =============================================================================
- let defaultOllama: Ollama | null = null;
- /**
- * Get the default Ollama instance (creates one if needed)
- */
- export function getDefaultOllama(): Ollama {
- if (!defaultOllama) {
- defaultOllama = new Ollama();
- }
- return defaultOllama;
- }
- /**
- * Set a custom default Ollama instance (useful for testing)
- */
- export function setDefaultOllama(ollama: Ollama | null): void {
- defaultOllama = ollama;
- }
|