2025-03-29 11:46:22 +01:00

46 lines
1.4 KiB
Python

from .generator import GeneratorLLM
class OllamaLLM(GeneratorLLM):
import ollama
def __init__(self):
"""
Handle generation using Ollama.
"""
super().__init__()
def generate(self, history):
self.logger.info(f"Using {self.model} for generation with Ollama")
try:
with self.state.lock:
self.state.is_generating = True
self.state.last_complete_sentence = ""
self.state.current_buffer = ""
stream = ollama.chat(
model=self.model,
messages=history,
stream=True,
)
for chunk in stream:
content = chunk['message']['content']
print(content, end='', flush=True)
with self.state.lock:
self.state.current_buffer += content
except ollama.ResponseError as e:
if e.status_code == 404:
self.logger.info(f"Downloading {self.model}...")
ollama.pull(self.model)
with self.state.lock:
self.state.is_generating = False
print(f"Error: {e}")
except Exception as e:
if "refused" in str(e).lower():
raise Exception("Ollama connection failed. is the server running ?") from e
finally:
with self.state.lock:
self.state.is_generating = False