diff --git a/server/sources/llamacpp.py b/server/sources/llamacpp.py index 49e4ceb..3e17445 100644 --- a/server/sources/llamacpp.py +++ b/server/sources/llamacpp.py @@ -20,6 +20,17 @@ class LlamacppLLM(GeneratorLLM): ) return self.logger.info(f"Using {self.model} for generation with Llama.cpp") - self.llm.create_chat_completion( - messages = history - ) \ No newline at end of file + try: + with self.state.lock: + self.state.is_generating = True + self.state.last_complete_sentence = "" + self.state.current_buffer = "" + output = self.llm.create_chat_completion( + messages = history + ) + self.state.current_buffer = output + except Exception as e: + self.logger.error(f"Error: {e}") + finally: + with self.state.lock: + self.state.is_generating = False \ No newline at end of file diff --git a/server/sources/ollama.py b/server/sources/ollama.py index 0740ead..259061d 100644 --- a/server/sources/ollama.py +++ b/server/sources/ollama.py @@ -18,7 +18,6 @@ class OllamaLLM(GeneratorLLM): self.state.last_complete_sentence = "" self.state.current_buffer = "" - self.logger.info("Starting generation...") stream = ollama.chat( model=self.model, messages=history,