diff --git a/sample.config.toml b/sample.config.toml index f6c6943..dddcc03 100644 --- a/sample.config.toml +++ b/sample.config.toml @@ -1,6 +1,7 @@ [GENERAL] PORT = 3001 # Port to run the server on SIMILARITY_MEASURE = "cosine" # "cosine" or "dot" +KEEP_ALIVE = "5m" # How long to keep Ollama models loaded into memory. (Instead of using -1 use "-1m") [API_KEYS] OPENAI = "" # OpenAI API key - sk-1234567890abcdef1234567890abcdef diff --git a/src/config.ts b/src/config.ts index bb69335..8624e7f 100644 --- a/src/config.ts +++ b/src/config.ts @@ -8,6 +8,7 @@ interface Config { GENERAL: { PORT: number; SIMILARITY_MEASURE: string; + KEEP_ALIVE: string; }; API_KEYS: { OPENAI: string; @@ -34,6 +35,8 @@ export const getPort = () => loadConfig().GENERAL.PORT; export const getSimilarityMeasure = () => loadConfig().GENERAL.SIMILARITY_MEASURE; +export const getKeepAlive = () => loadConfig().GENERAL.KEEP_ALIVE; + export const getOpenaiApiKey = () => loadConfig().API_KEYS.OPENAI; export const getGroqApiKey = () => loadConfig().API_KEYS.GROQ; diff --git a/src/lib/providers/ollama.ts b/src/lib/providers/ollama.ts index ed68bfa..8c36c98 100644 --- a/src/lib/providers/ollama.ts +++ b/src/lib/providers/ollama.ts @@ -1,11 +1,12 @@ import { OllamaEmbeddings } from '@langchain/community/embeddings/ollama'; -import { getOllamaApiEndpoint } from '../../config'; +import { getKeepAlive, getOllamaApiEndpoint } from '../../config'; import logger from '../../utils/logger'; import { ChatOllama } from '@langchain/community/chat_models/ollama'; export const loadOllamaChatModels = async () => { const ollamaEndpoint = getOllamaApiEndpoint(); - + const keepAlive = getKeepAlive(); + if (!ollamaEndpoint) return {}; try { @@ -24,6 +25,7 @@ export const loadOllamaChatModels = async () => { baseUrl: ollamaEndpoint, model: model.model, temperature: 0.7, + keepAlive: keepAlive }), };