| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824 |
- /**
- * llm.ts - LLM abstraction layer for QMD using node-llama-cpp
- *
- * Provides embeddings, text generation, and reranking using local GGUF models.
- */
- import {
- getLlama,
- resolveModelFile,
- LlamaChatSession,
- LlamaLogLevel,
- type Llama,
- type LlamaModel,
- type LlamaEmbeddingContext,
- type Token as LlamaToken,
- } from "node-llama-cpp";
- import { homedir } from "os";
- import { join } from "path";
- import { existsSync, mkdirSync } from "fs";
- // =============================================================================
- // Embedding Formatting Functions
- // =============================================================================
- /**
- * Format a query for embedding.
- * Uses nomic-style task prefix format for embeddinggemma.
- */
- export function formatQueryForEmbedding(query: string): string {
- return `task: search result | query: ${query}`;
- }
- /**
- * Format a document for embedding.
- * Uses nomic-style format with title and text fields.
- */
- export function formatDocForEmbedding(text: string, title?: string): string {
- return `title: ${title || "none"} | text: ${text}`;
- }
- // =============================================================================
- // Types
- // =============================================================================
- /**
- * Token with log probability
- */
- export type TokenLogProb = {
- token: string;
- logprob: number;
- };
- /**
- * Embedding result
- */
- export type EmbeddingResult = {
- embedding: number[];
- model: string;
- };
- /**
- * Generation result with optional logprobs
- */
- export type GenerateResult = {
- text: string;
- model: string;
- logprobs?: TokenLogProb[];
- done: boolean;
- };
- /**
- * Rerank result for a single document
- */
- export type RerankDocumentResult = {
- file: string;
- score: number;
- index: number;
- };
- /**
- * Batch rerank result
- */
- export type RerankResult = {
- results: RerankDocumentResult[];
- model: string;
- };
- /**
- * Model info
- */
- export type ModelInfo = {
- name: string;
- exists: boolean;
- path?: string;
- };
- /**
- * Options for embedding
- */
- export type EmbedOptions = {
- model?: string;
- isQuery?: boolean;
- title?: string;
- };
- /**
- * Options for text generation
- */
- export type GenerateOptions = {
- model?: string;
- maxTokens?: number;
- temperature?: number;
- };
- /**
- * Options for reranking
- */
- export type RerankOptions = {
- model?: string;
- };
- /**
- * Supported query types for different search backends
- */
- export type QueryType = 'lex' | 'vec' | 'hyde';
- /**
- * A single query and its target backend type
- */
- export type Queryable = {
- type: QueryType;
- text: string;
- };
- /**
- * Document to rerank
- */
- export type RerankDocument = {
- file: string;
- text: string;
- title?: string;
- };
- // =============================================================================
- // Model Configuration
- // =============================================================================
- // HuggingFace model URIs for node-llama-cpp
- // Format: hf:<user>/<repo>/<file>
- const DEFAULT_EMBED_MODEL = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
- const DEFAULT_RERANK_MODEL = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf";
- // const DEFAULT_GENERATE_MODEL = "hf:ggml-org/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf";
- const DEFAULT_GENERATE_MODEL = "hf:ggml-org/Qwen3-1.7B-GGUF/Qwen3-1.7b-q8_0.gguf";
- // Local model cache directory
- const MODEL_CACHE_DIR = join(homedir(), ".cache", "qmd", "models");
- // =============================================================================
- // LLM Interface
- // =============================================================================
- /**
- * Abstract LLM interface - implement this for different backends
- */
- export interface LLM {
- /**
- * Get embeddings for text
- */
- embed(text: string, options?: EmbedOptions): Promise<EmbeddingResult | null>;
- /**
- * Generate text completion
- */
- generate(prompt: string, options?: GenerateOptions): Promise<GenerateResult | null>;
- /**
- * Check if a model exists/is available
- */
- modelExists(model: string): Promise<ModelInfo>;
- /**
- * Expand a search query into multiple variations for different backends.
- * Returns a list of Queryable objects.
- */
- expandQuery(query: string, options?: { context?: string, includeLexical?: boolean }): Promise<Queryable[]>;
- /**
- * Rerank documents by relevance to a query
- * Returns list of documents with relevance scores (higher = more relevant)
- */
- rerank(query: string, documents: RerankDocument[], options?: RerankOptions): Promise<RerankResult>;
- /**
- * Dispose of resources
- */
- dispose(): Promise<void>;
- }
- // =============================================================================
- // node-llama-cpp Implementation
- // =============================================================================
- export type LlamaCppConfig = {
- embedModel?: string;
- generateModel?: string;
- rerankModel?: string;
- modelCacheDir?: string;
- /**
- * Inactivity timeout in ms before unloading contexts (default: 2 minutes, 0 to disable).
- *
- * Per node-llama-cpp lifecycle guidance, we prefer keeping models loaded and only disposing
- * contexts when idle, since contexts (and their sequences) are the heavy per-session objects.
- * @see https://node-llama-cpp.withcat.ai/guide/objects-lifecycle
- */
- inactivityTimeoutMs?: number;
- /**
- * Whether to dispose models on inactivity (default: false).
- *
- * Keeping models loaded avoids repeated VRAM thrash; set to true only if you need aggressive
- * memory reclaim.
- */
- disposeModelsOnInactivity?: boolean;
- };
- /**
- * LLM implementation using node-llama-cpp
- */
- // Default inactivity timeout: 2 minutes
- const DEFAULT_INACTIVITY_TIMEOUT_MS = 2 * 60 * 1000;
- export class LlamaCpp implements LLM {
- private llama: Llama | null = null;
- private embedModel: LlamaModel | null = null;
- private embedContext: LlamaEmbeddingContext | null = null;
- private generateModel: LlamaModel | null = null;
- private rerankModel: LlamaModel | null = null;
- private rerankContext: Awaited<ReturnType<LlamaModel["createRankingContext"]>> | null = null;
- private embedModelUri: string;
- private generateModelUri: string;
- private rerankModelUri: string;
- private modelCacheDir: string;
- // Ensure we don't load the same model concurrently (which can allocate duplicate VRAM).
- private embedModelLoadPromise: Promise<LlamaModel> | null = null;
- private generateModelLoadPromise: Promise<LlamaModel> | null = null;
- private rerankModelLoadPromise: Promise<LlamaModel> | null = null;
- // Inactivity timer for auto-unloading models
- private inactivityTimer: ReturnType<typeof setTimeout> | null = null;
- private inactivityTimeoutMs: number;
- private disposeModelsOnInactivity: boolean;
- // Track disposal state to prevent double-dispose
- private disposed = false;
- constructor(config: LlamaCppConfig = {}) {
- this.embedModelUri = config.embedModel || DEFAULT_EMBED_MODEL;
- this.generateModelUri = config.generateModel || DEFAULT_GENERATE_MODEL;
- this.rerankModelUri = config.rerankModel || DEFAULT_RERANK_MODEL;
- this.modelCacheDir = config.modelCacheDir || MODEL_CACHE_DIR;
- this.inactivityTimeoutMs = config.inactivityTimeoutMs ?? DEFAULT_INACTIVITY_TIMEOUT_MS;
- this.disposeModelsOnInactivity = config.disposeModelsOnInactivity ?? false;
- }
- /**
- * Reset the inactivity timer. Called after each model operation.
- * When timer fires, models are unloaded to free memory.
- */
- private touchActivity(): void {
- // Clear existing timer
- if (this.inactivityTimer) {
- clearTimeout(this.inactivityTimer);
- this.inactivityTimer = null;
- }
- // Only set timer if we have disposable contexts and timeout is enabled
- if (this.inactivityTimeoutMs > 0 && this.hasLoadedContexts()) {
- this.inactivityTimer = setTimeout(() => {
- this.unloadIdleResources().catch(err => {
- console.error("Error unloading idle resources:", err);
- });
- }, this.inactivityTimeoutMs);
- // Don't keep process alive just for this timer
- this.inactivityTimer.unref();
- }
- }
- /**
- * Check if any contexts are currently loaded (and therefore worth unloading on inactivity).
- */
- private hasLoadedContexts(): boolean {
- return !!(this.embedContext || this.rerankContext);
- }
- /**
- * Unload idle resources but keep the instance alive for future use.
- *
- * By default, this disposes contexts (and their dependent sequences), while keeping models loaded.
- * This matches the intended lifecycle: model → context → sequence, where contexts are per-session.
- */
- async unloadIdleResources(): Promise<void> {
- // Don't unload if already disposed
- if (this.disposed) {
- return;
- }
- // Clear timer
- if (this.inactivityTimer) {
- clearTimeout(this.inactivityTimer);
- this.inactivityTimer = null;
- }
- // Dispose contexts first
- if (this.embedContext) {
- await this.embedContext.dispose();
- this.embedContext = null;
- }
- if (this.rerankContext) {
- await this.rerankContext.dispose();
- this.rerankContext = null;
- }
- // Optionally dispose models too (opt-in)
- if (this.disposeModelsOnInactivity) {
- if (this.embedModel) {
- await this.embedModel.dispose();
- this.embedModel = null;
- }
- if (this.generateModel) {
- await this.generateModel.dispose();
- this.generateModel = null;
- }
- if (this.rerankModel) {
- await this.rerankModel.dispose();
- this.rerankModel = null;
- }
- // Reset load promises so models can be reloaded later
- this.embedModelLoadPromise = null;
- this.generateModelLoadPromise = null;
- this.rerankModelLoadPromise = null;
- }
- // Note: We keep llama instance alive - it's lightweight
- }
- /**
- * Ensure model cache directory exists
- */
- private ensureModelCacheDir(): void {
- if (!existsSync(this.modelCacheDir)) {
- mkdirSync(this.modelCacheDir, { recursive: true });
- }
- }
- /**
- * Initialize the llama instance (lazy)
- */
- private async ensureLlama(): Promise<Llama> {
- if (!this.llama) {
- this.llama = await getLlama({ logLevel: LlamaLogLevel.error });
- }
- return this.llama;
- }
- /**
- * Resolve a model URI to a local path, downloading if needed
- */
- private async resolveModel(modelUri: string): Promise<string> {
- this.ensureModelCacheDir();
- // resolveModelFile handles HF URIs and downloads to the cache dir
- return await resolveModelFile(modelUri, this.modelCacheDir);
- }
- /**
- * Load embedding model (lazy)
- */
- private async ensureEmbedModel(): Promise<LlamaModel> {
- if (this.embedModel) {
- return this.embedModel;
- }
- if (this.embedModelLoadPromise) {
- return await this.embedModelLoadPromise;
- }
- this.embedModelLoadPromise = (async () => {
- const llama = await this.ensureLlama();
- const modelPath = await this.resolveModel(this.embedModelUri);
- const model = await llama.loadModel({ modelPath });
- this.embedModel = model;
- return model;
- })();
- try {
- return await this.embedModelLoadPromise;
- } finally {
- // Keep the resolved model cached; clear only the in-flight promise.
- this.embedModelLoadPromise = null;
- }
- }
- /**
- * Load embedding context (lazy). Context can be disposed and recreated without reloading the model.
- */
- private async ensureEmbedContext(): Promise<LlamaEmbeddingContext> {
- if (!this.embedContext) {
- const model = await this.ensureEmbedModel();
- this.embedContext = await model.createEmbeddingContext();
- }
- this.touchActivity();
- return this.embedContext;
- }
- /**
- * Load generation model (lazy) - context is created fresh per call
- */
- private async ensureGenerateModel(): Promise<LlamaModel> {
- if (!this.generateModel) {
- if (this.generateModelLoadPromise) {
- return await this.generateModelLoadPromise;
- }
- this.generateModelLoadPromise = (async () => {
- const llama = await this.ensureLlama();
- const modelPath = await this.resolveModel(this.generateModelUri);
- const model = await llama.loadModel({ modelPath });
- this.generateModel = model;
- return model;
- })();
- try {
- await this.generateModelLoadPromise;
- } finally {
- this.generateModelLoadPromise = null;
- }
- }
- this.touchActivity();
- if (!this.generateModel) {
- throw new Error("Generate model not loaded");
- }
- return this.generateModel;
- }
- /**
- * Load rerank model (lazy)
- */
- private async ensureRerankModel(): Promise<LlamaModel> {
- if (this.rerankModel) {
- return this.rerankModel;
- }
- if (this.rerankModelLoadPromise) {
- return await this.rerankModelLoadPromise;
- }
- this.rerankModelLoadPromise = (async () => {
- const llama = await this.ensureLlama();
- const modelPath = await this.resolveModel(this.rerankModelUri);
- const model = await llama.loadModel({ modelPath });
- this.rerankModel = model;
- return model;
- })();
- try {
- return await this.rerankModelLoadPromise;
- } finally {
- this.rerankModelLoadPromise = null;
- }
- }
- /**
- * Load rerank context (lazy). Context can be disposed and recreated without reloading the model.
- */
- private async ensureRerankContext(): Promise<Awaited<ReturnType<LlamaModel["createRankingContext"]>>> {
- if (!this.rerankContext) {
- const model = await this.ensureRerankModel();
- this.rerankContext = await model.createRankingContext();
- }
- this.touchActivity();
- return this.rerankContext;
- }
- // ==========================================================================
- // Tokenization
- // ==========================================================================
- /**
- * Tokenize text using the embedding model's tokenizer
- * Returns tokenizer tokens (opaque type from node-llama-cpp)
- */
- async tokenize(text: string): Promise<readonly LlamaToken[]> {
- await this.ensureEmbedContext(); // Ensure model is loaded
- if (!this.embedModel) {
- throw new Error("Embed model not loaded");
- }
- return this.embedModel.tokenize(text);
- }
- /**
- * Count tokens in text using the embedding model's tokenizer
- */
- async countTokens(text: string): Promise<number> {
- const tokens = await this.tokenize(text);
- return tokens.length;
- }
- /**
- * Detokenize token IDs back to text
- */
- async detokenize(tokens: readonly LlamaToken[]): Promise<string> {
- await this.ensureEmbedContext();
- if (!this.embedModel) {
- throw new Error("Embed model not loaded");
- }
- return this.embedModel.detokenize(tokens);
- }
- // ==========================================================================
- // Core API methods
- // ==========================================================================
- async embed(text: string, options: EmbedOptions = {}): Promise<EmbeddingResult | null> {
- try {
- const context = await this.ensureEmbedContext();
- const embedding = await context.getEmbeddingFor(text);
- return {
- embedding: Array.from(embedding.vector),
- model: this.embedModelUri,
- };
- } catch (error) {
- console.error("Embedding error:", error);
- return null;
- }
- }
- /**
- * Batch embed multiple texts efficiently
- * Uses Promise.all for parallel embedding - node-llama-cpp handles batching internally
- */
- async embedBatch(texts: string[]): Promise<(EmbeddingResult | null)[]> {
- if (texts.length === 0) return [];
- try {
- const context = await this.ensureEmbedContext();
- // node-llama-cpp handles batching internally when we make parallel requests
- const embeddings = await Promise.all(
- texts.map(async (text) => {
- try {
- const embedding = await context.getEmbeddingFor(text);
- return {
- embedding: Array.from(embedding.vector),
- model: this.embedModelUri,
- };
- } catch (err) {
- console.error("Embedding error for text:", err);
- return null;
- }
- })
- );
- return embeddings;
- } catch (error) {
- console.error("Batch embedding error:", error);
- return texts.map(() => null);
- }
- }
- async generate(prompt: string, options: GenerateOptions = {}): Promise<GenerateResult | null> {
- // Ensure model is loaded
- await this.ensureGenerateModel();
- // Create fresh context -> sequence -> session for each call
- const context = await this.generateModel!.createContext();
- const sequence = context.getSequence();
- const session = new LlamaChatSession({ contextSequence: sequence });
- const maxTokens = options.maxTokens ?? 150;
- const temperature = options.temperature ?? 0;
- let result = "";
- try {
- await session.prompt(prompt, {
- maxTokens,
- temperature,
- onTextChunk: (text) => {
- result += text;
- },
- });
- return {
- text: result,
- model: this.generateModelUri,
- done: true,
- };
- } finally {
- // Dispose context (which disposes dependent sequences/sessions per lifecycle rules)
- await context.dispose();
- }
- }
- async modelExists(modelUri: string): Promise<ModelInfo> {
- // For HuggingFace URIs, we assume they exist
- // For local paths, check if file exists
- if (modelUri.startsWith("hf:")) {
- return { name: modelUri, exists: true };
- }
- const exists = existsSync(modelUri);
- return {
- name: modelUri,
- exists,
- path: exists ? modelUri : undefined,
- };
- }
- // ==========================================================================
- // High-level abstractions
- // ==========================================================================
- async expandQuery(query: string, options: { context?: string, includeLexical?: boolean } = {}): Promise<Queryable[]> {
- const llama = await this.ensureLlama();
- await this.ensureGenerateModel();
- const includeLexical = options.includeLexical ?? true;
- const context = options.context;
- const grammar = await llama.createGrammar({
- grammar: `
- root ::= line+
- line ::= type ": " content "\\n"
- type ::= "lex" | "vec" | "hyde"
- content ::= [^\\n]+
- `
- });
- const prompt = `You are a search query optimization expert. Your task is to improve retrieval by rewriting queries and generating hypothetical documents.
- Original Query: ${query}
- ${context ? `Additional Context, ONLY USE IF RELEVANT:\n\n<context>${context}</context>` : ""}
- ## Step 1: Query Analysis
- Identify entities, search intent, and missing context.
- ## Step 2: Generate Hypothetical Document
- Write a focused sentence passage that would answer the query. Include specific terminology and domain vocabulary.
- ## Step 3: Query Rewrites
- Generate 2-3 alternative search queries that resolve ambiguities. Use terminology from the hypothetical document.
- ## Step 4: Final Retrieval Text
- Output exactly 1-3 'lex' lines, 1-3 'vec' lines, and MAX ONE 'hyde' line.
- <format>
- lex: {single search term}
- vec: {single vector query}
- hyde: {complete hypothetical document passage from Step 2 on a SINGLE LINE}
- </format>
- <example>
- Example (FOR FORMAT ONLY - DO NOT COPY THIS CONTENT):
- lex: example keyword 1
- lex: example keyword 2
- vec: example semantic query
- hyde: This is an example of a hypothetical document passage that would answer the example query. It contains multiple sentences and relevant vocabulary.
- </example>
- <rules>
- - DO NOT repeat the same line.
- - Each 'lex:' line MUST be a different keyword variation based on the ORIGINAL QUERY.
- - Each 'vec:' line MUST be a different semantic variation based on the ORIGINAL QUERY.
- - The 'hyde:' line MUST be the full sentence passage from Step 2, but all on one line.
- - DO NOT use the example content above.
- ${!includeLexical ? "- Do NOT output any 'lex:' lines" : ""}
- </rules>
- Final Output:`;
- // Create fresh context for each call
- const genContext = await this.generateModel!.createContext();
- const sequence = genContext.getSequence();
- const session = new LlamaChatSession({ contextSequence: sequence });
- try {
- const result = await session.prompt(prompt, {
- grammar,
- maxTokens: 1000,
- temperature: 1,
- });
- const lines = result.trim().split("\n");
- const queryables: Queryable[] = lines.map(line => {
- const colonIdx = line.indexOf(":");
- if (colonIdx === -1) return null;
- const type = line.slice(0, colonIdx).trim();
- if (type !== 'lex' && type !== 'vec' && type !== 'hyde') return null;
- const text = line.slice(colonIdx + 1).trim();
- return { type: type as QueryType, text };
- }).filter((q): q is Queryable => q !== null);
- // Filter out lex entries if not requested
- if (!includeLexical) {
- return queryables.filter(q => q.type !== 'lex');
- }
- return queryables;
- } catch (error) {
- console.error("Structured query expansion failed:", error);
- // Fallback to original query
- const fallback: Queryable[] = [{ type: 'vec', text: query }];
- if (includeLexical) fallback.unshift({ type: 'lex', text: query });
- return fallback;
- } finally {
- await genContext.dispose();
- }
- }
- async rerank(
- query: string,
- documents: RerankDocument[],
- options: RerankOptions = {}
- ): Promise<RerankResult> {
- const context = await this.ensureRerankContext();
- // Build a map from document text to original indices (for lookup after sorting)
- const textToDoc = new Map<string, { file: string; index: number }>();
- documents.forEach((doc, index) => {
- textToDoc.set(doc.text, { file: doc.file, index });
- });
- // Extract just the text for ranking
- const texts = documents.map((doc) => doc.text);
- // Use the proper ranking API - returns [{document: string, score: number}] sorted by score
- const ranked = await context.rankAndSort(query, texts);
- // Map back to our result format using the text-to-doc map
- const results: RerankDocumentResult[] = ranked.map((item) => {
- const docInfo = textToDoc.get(item.document)!;
- return {
- file: docInfo.file,
- score: item.score,
- index: docInfo.index,
- };
- });
- return {
- results,
- model: this.rerankModelUri,
- };
- }
- async dispose(): Promise<void> {
- // Prevent double-dispose
- if (this.disposed) {
- return;
- }
- this.disposed = true;
- // Clear inactivity timer
- if (this.inactivityTimer) {
- clearTimeout(this.inactivityTimer);
- this.inactivityTimer = null;
- }
- // Disposing llama cascades to models and contexts automatically
- // See: https://node-llama-cpp.withcat.ai/guide/objects-lifecycle
- // Note: llama.dispose() can hang indefinitely, so we use a timeout
- if (this.llama) {
- const disposePromise = this.llama.dispose();
- const timeoutPromise = new Promise<void>((resolve) => setTimeout(resolve, 1000));
- await Promise.race([disposePromise, timeoutPromise]);
- }
- // Clear references
- this.embedContext = null;
- this.rerankContext = null;
- this.embedModel = null;
- this.generateModel = null;
- this.rerankModel = null;
- this.llama = null;
- // Clear any in-flight load promises
- this.embedModelLoadPromise = null;
- this.generateModelLoadPromise = null;
- this.rerankModelLoadPromise = null;
- }
- }
- // =============================================================================
- // Singleton for default LlamaCpp instance
- // =============================================================================
- let defaultLlamaCpp: LlamaCpp | null = null;
- /**
- * Get the default LlamaCpp instance (creates one if needed)
- */
- export function getDefaultLlamaCpp(): LlamaCpp {
- if (!defaultLlamaCpp) {
- defaultLlamaCpp = new LlamaCpp();
- }
- return defaultLlamaCpp;
- }
- /**
- * Set a custom default LlamaCpp instance (useful for testing)
- */
- export function setDefaultLlamaCpp(llm: LlamaCpp | null): void {
- defaultLlamaCpp = llm;
- }
- /**
- * Dispose the default LlamaCpp instance if it exists.
- * Call this before process exit to prevent NAPI crashes.
- */
- export async function disposeDefaultLlamaCpp(): Promise<void> {
- if (defaultLlamaCpp) {
- await defaultLlamaCpp.dispose();
- defaultLlamaCpp = null;
- }
- }
|