From 7553d9dbb620bc332eb9137c433be603f204da86 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Wed, 9 Apr 2025 19:03:04 +0200 Subject: [PATCH] refactor : memory clear section function logic --- server/app.py | 4 +- server/sources/vllm_handler.py | 74 --------------------------------- sources/agents/browser_agent.py | 2 +- sources/browser.py | 2 +- sources/memory.py | 26 ++++++++---- 5 files changed, 21 insertions(+), 87 deletions(-) delete mode 100644 server/sources/vllm_handler.py diff --git a/server/app.py b/server/app.py index 13817b9..946390a 100644 --- a/server/app.py +++ b/server/app.py @@ -6,7 +6,6 @@ from flask import Flask, jsonify, request from sources.llamacpp_handler import LlamacppLLM from sources.ollama_handler import OllamaLLM -from sources.vllm_handler import Vllm parser = argparse.ArgumentParser(description='AgenticSeek server script') parser.add_argument('--provider', type=str, help='LLM backend library to use. set to [ollama], [vllm] or [llamacpp]', required=True) @@ -15,12 +14,11 @@ args = parser.parse_args() app = Flask(__name__) -assert args.provider in ["ollama", "llamacpp", "vllm"], f"Provider {args.provider} does not exists. see --help for more information" +assert args.provider in ["ollama", "llamacpp"], f"Provider {args.provider} does not exists. see --help for more information" handler_map = { "ollama": OllamaLLM(), "llamacpp": LlamacppLLM(), - "vllm": Vllm() } generator = handler_map[args.provider] diff --git a/server/sources/vllm_handler.py b/server/sources/vllm_handler.py deleted file mode 100644 index e9cea14..0000000 --- a/server/sources/vllm_handler.py +++ /dev/null @@ -1,74 +0,0 @@ -from .generator import GeneratorLLM -from vllm import LLM, SamplingParams -import logging -from typing import List, Dict - -class Vllm(GeneratorLLM): - def __init__(self): - """ - Handle generation using vLLM. - """ - super().__init__() - self.logger = logging.getLogger(__name__) - self.llm = None - - def convert_history_to_prompt(self, history: List[Dict[str, str]]) -> str: - """ - Convert OpenAI-format history to a single prompt string for vLLM. - """ - prompt = "" - for message in history: - role = message["role"] - content = message["content"] - if role == "system": - prompt += f"System: {content}\n" - elif role == "user": - prompt += f"User: {content}\n" - elif role == "assistant": - prompt += f"Assistant: {content}\n" - prompt += "Assistant: " - return prompt - - def generate(self, history: List[Dict[str, str]]): - """ - Generate response using vLLM from OpenAI-format message history. - - Args: - history: List of dictionaries in OpenAI format [{"role": "user", "content": "..."}, ...] - """ - self.logger.info(f"Using {self.model} for generation with vLLM") - if self.llm is None: - self.llm = LLM(model=self.model) - - try: - with self.state.lock: - self.state.is_generating = True - self.state.last_complete_sentence = "" - self.state.current_buffer = "" - - prompt = self.convert_history_to_prompt(history) - - sampling_params = SamplingParams( - temperature=0.7, - max_tokens=512, - gpu_memory_utilization=0.5, - stream=True # Enable streaming - ) - outputs = self.llm.generate(prompt, sampling_params, use_tqdm=False) - for output in outputs: - content = output.outputs[0].text - with self.state.lock: - if '.' in content: - self.logger.info(self.state.current_buffer) - self.state.current_buffer += content - with self.state.lock: - self.logger.info(f"Final output: {self.state.current_buffer}") - - except Exception as e: - self.logger.error(f"Error during generation: {str(e)}") - raise e - - finally: - self.logger.info("Generation complete") - with self.state.lock: - self.state.is_generating = False \ No newline at end of file diff --git a/sources/agents/browser_agent.py b/sources/agents/browser_agent.py index 77064eb..d330c3d 100644 --- a/sources/agents/browser_agent.py +++ b/sources/agents/browser_agent.py @@ -357,7 +357,7 @@ class BrowserAgent(Agent): mem_last_idx = self.memory.push('user', prompt) answer, reasoning = self.llm_request() pretty_print(answer, color="output") - self.memory.clear_section(mem_begin_idx, mem_last_idx) + self.memory.clear_section(mem_begin_idx+1, mem_last_idx-1) return answer, reasoning if __name__ == "__main__": diff --git a/sources/browser.py b/sources/browser.py index c3fc2c7..17ab23e 100644 --- a/sources/browser.py +++ b/sources/browser.py @@ -58,7 +58,7 @@ def create_driver(headless=False, stealth_mode=True) -> webdriver.Chrome: chrome_options.add_argument("--disable-gpu") chrome_options.add_argument("--disable-webgl") #ua = UserAgent() - #user_agent = ua.random # NOTE sometime return wrong user agent, investigate + #user_agent = ua.random # NOTE sometime return bad ua crash, investigate #chrome_options.add_argument(f'user-agent={user_agent}') user_data_dir = tempfile.mkdtemp() chrome_options.add_argument(f"--user-data-dir={user_data_dir}") diff --git a/sources/memory.py b/sources/memory.py index ac6ff9b..bc2c796 100644 --- a/sources/memory.py +++ b/sources/memory.py @@ -106,13 +106,23 @@ class Memory(): return curr_idx-1 def clear(self) -> None: + """Clear all memory except system prompt""" self.logger.info("Memory clear performed.") - self.memory = [] + self.memory = self.memory[:1] def clear_section(self, start: int, end: int) -> None: - """Clear a section of the memory.""" - self.logger.info(f"Memory section {start} to {end} cleared.") + """ + Clear a section of the memory. Ignore system message index. + Args: + start (int): Starting bound of the section to clear. + end (int): Ending bound of the section to clear. + """ + self.logger.info(f"Clearing memory section {start} to {end}.") + start = max(0, start) + 1 + end = min(end, len(self.memory)-1) + 2 + self.logger.info(f"Memory before: {self.memory}") self.memory = self.memory[:start] + self.memory[end:] + self.logger.info(f"Memory after: {self.memory}") def get(self) -> list: return self.memory @@ -172,7 +182,10 @@ if __name__ == "__main__": sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) memory = Memory("You are a helpful assistant.", recover_last_session=False, memory_compression=True) - + + memory.push('user', "hello") + memory.push('assistant', "how can i help you?") + memory.push('user', "why do i get this cuda error?") sample_text = """ The error you're encountering: cuda.cu:52:10: fatal error: helper_functions.h: No such file or directory @@ -190,11 +203,8 @@ Use #include "helper_functions.h" for local files. Use the -I flag to specify the directory containing helper_functions.h. Ensure the file exists in the specified location. """ - - memory.push('user', "hello") - memory.push('assistant', "how can i help you?") - memory.push('user', "why do i get this cuda error?") memory.push('assistant', sample_text) + print("\n---\nmemory before:", memory.get()) memory.compress() print("\n---\nmemory after:", memory.get())