diff --git a/sample.config.toml b/sample.config.toml
index f6c6943..dddcc03 100644
--- a/sample.config.toml
+++ b/sample.config.toml
@@ -1,6 +1,7 @@
 [GENERAL]
 PORT = 3001 # Port to run the server on
 SIMILARITY_MEASURE = "cosine" # "cosine" or "dot"
+KEEP_ALIVE = "5m" # How long to keep Ollama models loaded into memory. (Instead of using -1 use "-1m")
 
 [API_KEYS]
 OPENAI = "" # OpenAI API key - sk-1234567890abcdef1234567890abcdef
diff --git a/src/config.ts b/src/config.ts
index bb69335..8624e7f 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -8,6 +8,7 @@ interface Config {
   GENERAL: {
     PORT: number;
     SIMILARITY_MEASURE: string;
+    KEEP_ALIVE: string;
   };
   API_KEYS: {
     OPENAI: string;
@@ -34,6 +35,8 @@ export const getPort = () => loadConfig().GENERAL.PORT;
 export const getSimilarityMeasure = () =>
   loadConfig().GENERAL.SIMILARITY_MEASURE;
 
+export const getKeepAlive = () => loadConfig().GENERAL.KEEP_ALIVE;
+
 export const getOpenaiApiKey = () => loadConfig().API_KEYS.OPENAI;
 
 export const getGroqApiKey = () => loadConfig().API_KEYS.GROQ;
diff --git a/src/lib/providers/ollama.ts b/src/lib/providers/ollama.ts
index ed68bfa..8c36c98 100644
--- a/src/lib/providers/ollama.ts
+++ b/src/lib/providers/ollama.ts
@@ -1,11 +1,12 @@
 import { OllamaEmbeddings } from '@langchain/community/embeddings/ollama';
-import { getOllamaApiEndpoint } from '../../config';
+import { getKeepAlive, getOllamaApiEndpoint } from '../../config';
 import logger from '../../utils/logger';
 import { ChatOllama } from '@langchain/community/chat_models/ollama';
 
 export const loadOllamaChatModels = async () => {
   const ollamaEndpoint = getOllamaApiEndpoint();
-
+  const keepAlive = getKeepAlive();
+  
   if (!ollamaEndpoint) return {};
 
   try {
@@ -24,6 +25,7 @@ export const loadOllamaChatModels = async () => {
           baseUrl: ollamaEndpoint,
           model: model.model,
           temperature: 0.7,
+          keepAlive: keepAlive
         }),
       };