From f42a31578ebd17e1501255e981d6806e9f11390d Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sat, 29 Mar 2025 14:30:28 +0100 Subject: [PATCH] fix : response timeout --- server/sources/llamacpp.py | 17 ++++++++++++++--- server/sources/ollama.py | 1 - 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/server/sources/llamacpp.py b/server/sources/llamacpp.py index 49e4ceb..3e17445 100644 --- a/server/sources/llamacpp.py +++ b/server/sources/llamacpp.py @@ -20,6 +20,17 @@ class LlamacppLLM(GeneratorLLM): ) return self.logger.info(f"Using {self.model} for generation with Llama.cpp") - self.llm.create_chat_completion( - messages = history - ) \ No newline at end of file + try: + with self.state.lock: + self.state.is_generating = True + self.state.last_complete_sentence = "" + self.state.current_buffer = "" + output = self.llm.create_chat_completion( + messages = history + ) + self.state.current_buffer = output + except Exception as e: + self.logger.error(f"Error: {e}") + finally: + with self.state.lock: + self.state.is_generating = False \ No newline at end of file diff --git a/server/sources/ollama.py b/server/sources/ollama.py index 0740ead..259061d 100644 --- a/server/sources/ollama.py +++ b/server/sources/ollama.py @@ -18,7 +18,6 @@ class OllamaLLM(GeneratorLLM): self.state.last_complete_sentence = "" self.state.current_buffer = "" - self.logger.info("Starting generation...") stream = ollama.chat( model=self.model, messages=history,