fix : server problem with llamacpp

This commit is contained in:
martin legrand 2025-03-29 13:31:13 +01:00
parent 4676b817e9
commit 44e0508ae5

View File

@ -9,13 +9,15 @@ class LlamacppLLM(GeneratorLLM):
Handle generation using llama.cpp
"""
super().__init__()
self.llm = Llama.from_pretrained(
repo_id=self.model,
filename="*q8_0.gguf",
verbose=True
)
self.llm = None
def generate(self, history):
if self.model is None:
self.llm = Llama.from_pretrained(
repo_id=self.model,
filename="*q8_0.gguf",
verbose=True
)
self.logger.info(f"Using {self.model} for generation with Llama.cpp")
self.llm.create_chat_completion(
messages = history