|
@@ -665,6 +665,7 @@ export class LlamaCpp implements LLM {
|
|
|
for (let i = 0; i < n; i++) {
|
|
for (let i = 0; i < n; i++) {
|
|
|
try {
|
|
try {
|
|
|
this.embedContexts.push(await model.createEmbeddingContext({
|
|
this.embedContexts.push(await model.createEmbeddingContext({
|
|
|
|
|
+ contextSize: LlamaCpp.EMBED_CONTEXT_SIZE,
|
|
|
...(threads > 0 ? { threads } : {}),
|
|
...(threads > 0 ? { threads } : {}),
|
|
|
}));
|
|
}));
|
|
|
} catch {
|
|
} catch {
|
|
@@ -769,6 +770,11 @@ export class LlamaCpp implements LLM {
|
|
|
const v = parseInt(process.env.QMD_RERANK_CONTEXT_SIZE ?? "", 10);
|
|
const v = parseInt(process.env.QMD_RERANK_CONTEXT_SIZE ?? "", 10);
|
|
|
return Number.isFinite(v) && v > 0 ? v : 4096;
|
|
return Number.isFinite(v) && v > 0 ? v : 4096;
|
|
|
})();
|
|
})();
|
|
|
|
|
+
|
|
|
|
|
+ private static readonly EMBED_CONTEXT_SIZE: number = (() => {
|
|
|
|
|
+ const v = parseInt(process.env.QMD_EMBED_CONTEXT_SIZE ?? "", 10);
|
|
|
|
|
+ return Number.isFinite(v) && v > 0 ? v : 2048;
|
|
|
|
|
+ })();
|
|
|
private async ensureRerankContexts(): Promise<Awaited<ReturnType<LlamaModel["createRankingContext"]>>[]> {
|
|
private async ensureRerankContexts(): Promise<Awaited<ReturnType<LlamaModel["createRankingContext"]>>[]> {
|
|
|
if (this.rerankContexts.length === 0) {
|
|
if (this.rerankContexts.length === 0) {
|
|
|
const model = await this.ensureRerankModel();
|
|
const model = await this.ensureRerankModel();
|