agenticSeek/server/sources/ollama_handler.py
2025-04-05 16:36:58 +02:00

64 lines
2.0 KiB
Python

import time
from .generator import GeneratorLLM
import ollama
class OllamaLLM(GeneratorLLM):
def __init__(self):
"""
Handle generation using Ollama.
"""
super().__init__()
def generate(self, history):
self.logger.info(f"Using {self.model} for generation with Ollama")
if cache.is_cached(history[-1]['content']):
self.state.current_buffer = cache.get_cached_response(history[-1]['content'])
self.state.is_generating = False
return
try:
with self.state.lock:
self.state.is_generating = True
self.state.last_complete_sentence = ""
self.state.current_buffer = ""
stream = ollama.chat(
model=self.model,
messages=history,
stream=True,
)
for chunk in stream:
content = chunk['message']['content']
if '\n' in content:
self.logger.info(content)
with self.state.lock:
self.state.current_buffer += content
except Exception as e:
if "404" in str(e):
self.logger.info(f"Downloading {self.model}...")
ollama.pull(self.model)
if "refused" in str(e).lower():
raise Exception("Ollama connection failed. is the server running ?") from e
raise e
finally:
self.logger.info("Generation complete")
with self.state.lock:
self.state.is_generating = False
self.cache.add_message_pair(history[-1]['content'], self.state.current_buffer)
if __name__ == "__main__":
generator = OllamaLLM()
history = [
{
"role": "user",
"content": "Hello, how are you ?"
}
]
generator.set_model("deepseek-r1:1.5b")
generator.start(history)
while True:
print(generator.get_status())
time.sleep(1)