|
|
@@ -155,7 +155,7 @@ export type LLMSessionOptions = {
|
|
|
*/
|
|
|
export interface ILLMSession {
|
|
|
embed(text: string, options?: EmbedOptions): Promise<EmbeddingResult | null>;
|
|
|
- embedBatch(texts: string[]): Promise<(EmbeddingResult | null)[]>;
|
|
|
+ embedBatch(texts: string[], options?: EmbedOptions): Promise<(EmbeddingResult | null)[]>;
|
|
|
expandQuery(query: string, options?: { context?: string; includeLexical?: boolean }): Promise<Queryable[]>;
|
|
|
rerank(query: string, documents: RerankDocument[], options?: RerankOptions): Promise<RerankResult>;
|
|
|
/** Whether this session is still valid (not released or aborted) */
|
|
|
@@ -880,7 +880,7 @@ export class LlamaCpp implements LLM {
|
|
|
|
|
|
return {
|
|
|
embedding: Array.from(embedding.vector),
|
|
|
- model: this.embedModelUri,
|
|
|
+ model: options.model ?? this.embedModelUri,
|
|
|
};
|
|
|
} catch (error) {
|
|
|
console.error("Embedding error:", error);
|
|
|
@@ -892,7 +892,7 @@ export class LlamaCpp implements LLM {
|
|
|
* Batch embed multiple texts efficiently
|
|
|
* Uses Promise.all for parallel embedding - node-llama-cpp handles batching internally
|
|
|
*/
|
|
|
- async embedBatch(texts: string[]): Promise<(EmbeddingResult | null)[]> {
|
|
|
+ async embedBatch(texts: string[], options: EmbedOptions = {}): Promise<(EmbeddingResult | null)[]> {
|
|
|
if (this._ciMode) throw new Error("LLM operations are disabled in CI (set CI=true)");
|
|
|
// Ping activity at start to keep models alive during this operation
|
|
|
this.touchActivity();
|
|
|
@@ -915,7 +915,7 @@ export class LlamaCpp implements LLM {
|
|
|
}
|
|
|
const embedding = await context.getEmbeddingFor(safeText);
|
|
|
this.touchActivity();
|
|
|
- embeddings.push({ embedding: Array.from(embedding.vector), model: this.embedModelUri });
|
|
|
+ embeddings.push({ embedding: Array.from(embedding.vector), model: options.model ?? this.embedModelUri });
|
|
|
} catch (err) {
|
|
|
console.error("Embedding error for text:", err);
|
|
|
embeddings.push(null);
|
|
|
@@ -942,7 +942,7 @@ export class LlamaCpp implements LLM {
|
|
|
}
|
|
|
const embedding = await ctx.getEmbeddingFor(safeText);
|
|
|
this.touchActivity();
|
|
|
- results.push({ embedding: Array.from(embedding.vector), model: this.embedModelUri });
|
|
|
+ results.push({ embedding: Array.from(embedding.vector), model: options.model ?? this.embedModelUri });
|
|
|
} catch (err) {
|
|
|
console.error("Embedding error for text:", err);
|
|
|
results.push(null);
|
|
|
@@ -1431,8 +1431,8 @@ class LLMSession implements ILLMSession {
|
|
|
return this.withOperation(() => this.manager.getLlamaCpp().embed(text, options));
|
|
|
}
|
|
|
|
|
|
- async embedBatch(texts: string[]): Promise<(EmbeddingResult | null)[]> {
|
|
|
- return this.withOperation(() => this.manager.getLlamaCpp().embedBatch(texts));
|
|
|
+ async embedBatch(texts: string[], options?: EmbedOptions): Promise<(EmbeddingResult | null)[]> {
|
|
|
+ return this.withOperation(() => this.manager.getLlamaCpp().embedBatch(texts, options));
|
|
|
}
|
|
|
|
|
|
async expandQuery(
|