From 8cfb2d12461c18396d4c70ec8815f4d6ebbcc937 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 8 Apr 2025 19:38:06 +0200 Subject: [PATCH 1/9] feat : vllm in server --- server/app.py | 9 ++++- server/sources/vllm_handler.py | 70 +++++++++++++++++++++++++++++++++ sources/agents/browser_agent.py | 7 ++-- sources/llm_provider.py | 4 +- sources/router.py | 2 + 5 files changed, 86 insertions(+), 6 deletions(-) create mode 100644 server/sources/vllm_handler.py diff --git a/server/app.py b/server/app.py index 85e0f04..b46a314 100644 --- a/server/app.py +++ b/server/app.py @@ -6,6 +6,7 @@ from flask import Flask, jsonify, request from sources.llamacpp_handler import LlamacppLLM from sources.ollama_handler import OllamaLLM +from sources.vllm_handler import Vllm parser = argparse.ArgumentParser(description='AgenticSeek server script') parser.add_argument('--provider', type=str, help='LLM backend library to use. set to [ollama] or [llamacpp]', required=True) @@ -16,7 +17,13 @@ app = Flask(__name__) assert args.provider in ["ollama", "llamacpp"], f"Provider {args.provider} does not exists. see --help for more information" -generator = OllamaLLM() if args.provider == "ollama" else LlamacppLLM() +handler_map = { + "ollama": OllamaLLM(), + "llamacpp": LlamacppLLM(), + "vllm": Vllm() +} + +generator = handler_map[args.provider] @app.route('/generate', methods=['POST']) def start_generation(): diff --git a/server/sources/vllm_handler.py b/server/sources/vllm_handler.py new file mode 100644 index 0000000..2f9b329 --- /dev/null +++ b/server/sources/vllm_handler.py @@ -0,0 +1,70 @@ +from vllm import LLM, SamplingParams +import logging +from typing import List, Dict + +class Vllm(GeneratorLLM): + def __init__(self): + """ + Handle generation using vLLM. + """ + super().__init__() + self.logger = logging.getLogger(__name__) + self.llm = LLM(model=self.model) + + def convert_history_to_prompt(self, history: List[Dict[str, str]]) -> str: + """ + Convert OpenAI-format history to a single prompt string for vLLM. + """ + prompt = "" + for message in history: + role = message["role"] + content = message["content"] + if role == "system": + prompt += f"System: {content}\n" + elif role == "user": + prompt += f"User: {content}\n" + elif role == "assistant": + prompt += f"Assistant: {content}\n" + prompt += "Assistant: " + return prompt + + def generate(self, history: List[Dict[str, str]]): + """ + Generate response using vLLM from OpenAI-format message history. + + Args: + history: List of dictionaries in OpenAI format [{"role": "user", "content": "..."}, ...] + """ + self.logger.info(f"Using {self.model} for generation with vLLM") + + try: + with self.state.lock: + self.state.is_generating = True + self.state.last_complete_sentence = "" + self.state.current_buffer = "" + + prompt = self.convert_history_to_prompt(history) + + sampling_params = SamplingParams( + temperature=0.7, + max_tokens=512, + stream=True # Enable streaming + ) + outputs = self.llm.generate(prompt, sampling_params, use_tqdm=False) + for output in outputs: + content = output.outputs[0].text + with self.state.lock: + if '.' in content: + self.logger.info(self.state.current_buffer) + self.state.current_buffer += content + with self.state.lock: + self.logger.info(f"Final output: {self.state.current_buffer}") + + except Exception as e: + self.logger.error(f"Error during generation: {str(e)}") + raise e + + finally: + self.logger.info("Generation complete") + with self.state.lock: + self.state.is_generating = False \ No newline at end of file diff --git a/sources/agents/browser_agent.py b/sources/agents/browser_agent.py index 0e5a687..77064eb 100644 --- a/sources/agents/browser_agent.py +++ b/sources/agents/browser_agent.py @@ -112,7 +112,7 @@ class BrowserAgent(Agent): 1. **Decide if the page answers the user’s query:** - If it does, take notes of useful information (Note: ...), include relevant link in note, then move to a new page. - - If it does and you completed user request, say {Action.REQUEST_EXIT}. + - If it does and no futher step would help with user request, say {Action.REQUEST_EXIT}. - If it doesn’t, say: Error: then go back or navigate to another link. 2. **Navigate to a link by either: ** - Saying I will navigate to (write down the full URL) www.example.com/cats @@ -145,7 +145,7 @@ class BrowserAgent(Agent): Action: {Action.GO_BACK.value} Example 3 (query answer found, enought notes taken): - Note: I found on that ...... + Note: I found on website www.example.com that ...... Given this answer the user query I should exit the web browser. Action: {Action.REQUEST_EXIT.value} @@ -161,8 +161,9 @@ class BrowserAgent(Agent): You previously took these notes: {notes} Do not Step-by-Step explanation. Write Notes or Error as a long paragraph followed by your action. - You might {Action.REQUEST_EXIT.value} if no more link are useful. If you conduct research do not exit until you have several notes. + Do not ever ask the user to conduct a task, do not ever exit expecting user intervention. + You should be Investigative, Curious and Skeptical. """ def llm_decide(self, prompt: str, show_reasoning: bool = False) -> Tuple[str, str]: diff --git a/sources/llm_provider.py b/sources/llm_provider.py index bffd8e6..e0b0314 100644 --- a/sources/llm_provider.py +++ b/sources/llm_provider.py @@ -87,8 +87,8 @@ class Provider: except AttributeError as e: raise NotImplementedError(f"{str(e)}\nIs {self.provider_name} implemented ?") except Exception as e: - if "RemoteDisconnected" in str(e): - return f"{self.server_ip} seem offline. RemoteDisconnected error." + if "refused" in str(e): + return f"Server {self.server_ip} seem offline. Unable to answer." raise Exception(f"Provider {self.provider_name} failed: {str(e)}") from e return thought diff --git a/sources/router.py b/sources/router.py index cab9e8b..008bec7 100644 --- a/sources/router.py +++ b/sources/router.py @@ -121,7 +121,9 @@ class AgentRouter: ("Write a Python function to merge two sorted lists", "LOW"), ("Organize my desktop files by extension and then write a script to list them", "HIGH"), ("Create a bash script to monitor disk space and alert via text file", "LOW"), + ("can you find vitess repo, clone it and install by following the readme", "HIGH"), ("Search X for posts about AI ethics and summarize them", "LOW"), + ("Can you follow the readme and install the project", "HIGH"), ("Find the latest research on renewable energy and build a web app to display it", "HIGH"), ("Write a C program to sort an array of integers", "LOW"), ("Create a Node.js server that queries a public API for traffic data and displays it", "HIGH"), From 82cf54706b5d702b8b81252f67a181a023ae553c Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 8 Apr 2025 19:40:02 +0200 Subject: [PATCH 2/9] feat : vllm in server fix --- server/sources/vllm_handler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/server/sources/vllm_handler.py b/server/sources/vllm_handler.py index 2f9b329..c3479a3 100644 --- a/server/sources/vllm_handler.py +++ b/server/sources/vllm_handler.py @@ -1,3 +1,4 @@ +from .generator import GeneratorLLM from vllm import LLM, SamplingParams import logging from typing import List, Dict From 3c66eb646e31aa26d7898aa8fe471549d8fe8449 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 8 Apr 2025 19:48:18 +0200 Subject: [PATCH 3/9] fix : vllm Nonetype --- server/app.py | 4 ++-- server/sources/vllm_handler.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/server/app.py b/server/app.py index b46a314..13817b9 100644 --- a/server/app.py +++ b/server/app.py @@ -9,13 +9,13 @@ from sources.ollama_handler import OllamaLLM from sources.vllm_handler import Vllm parser = argparse.ArgumentParser(description='AgenticSeek server script') -parser.add_argument('--provider', type=str, help='LLM backend library to use. set to [ollama] or [llamacpp]', required=True) +parser.add_argument('--provider', type=str, help='LLM backend library to use. set to [ollama], [vllm] or [llamacpp]', required=True) parser.add_argument('--port', type=int, help='port to use', required=True) args = parser.parse_args() app = Flask(__name__) -assert args.provider in ["ollama", "llamacpp"], f"Provider {args.provider} does not exists. see --help for more information" +assert args.provider in ["ollama", "llamacpp", "vllm"], f"Provider {args.provider} does not exists. see --help for more information" handler_map = { "ollama": OllamaLLM(), diff --git a/server/sources/vllm_handler.py b/server/sources/vllm_handler.py index c3479a3..6fd122b 100644 --- a/server/sources/vllm_handler.py +++ b/server/sources/vllm_handler.py @@ -10,7 +10,7 @@ class Vllm(GeneratorLLM): """ super().__init__() self.logger = logging.getLogger(__name__) - self.llm = LLM(model=self.model) + self.llm = None def convert_history_to_prompt(self, history: List[Dict[str, str]]) -> str: """ @@ -37,6 +37,8 @@ class Vllm(GeneratorLLM): history: List of dictionaries in OpenAI format [{"role": "user", "content": "..."}, ...] """ self.logger.info(f"Using {self.model} for generation with vLLM") + if self.llm is None: + self.llm = LLM(model=self.model) try: with self.state.lock: From 9a1b2b93f682882807174b29584b370800c86201 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 8 Apr 2025 20:31:07 +0200 Subject: [PATCH 4/9] fix : SessionNotCreatedException caused by data dir conflict --- sources/browser.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sources/browser.py b/sources/browser.py index d2bb30c..c3fc2c7 100644 --- a/sources/browser.py +++ b/sources/browser.py @@ -17,6 +17,7 @@ import time import random import os import shutil +import tempfile import markdownify import sys import re @@ -59,6 +60,8 @@ def create_driver(headless=False, stealth_mode=True) -> webdriver.Chrome: #ua = UserAgent() #user_agent = ua.random # NOTE sometime return wrong user agent, investigate #chrome_options.add_argument(f'user-agent={user_agent}') + user_data_dir = tempfile.mkdtemp() + chrome_options.add_argument(f"--user-data-dir={user_data_dir}") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.add_argument("--autoplay-policy=user-gesture-required") From ed2a9cc20435d8f67de0b7a27f28e02f2641d16d Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 8 Apr 2025 20:58:32 +0200 Subject: [PATCH 5/9] server gpu percentage for vllm --- server/sources/vllm_handler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/server/sources/vllm_handler.py b/server/sources/vllm_handler.py index 6fd122b..e9cea14 100644 --- a/server/sources/vllm_handler.py +++ b/server/sources/vllm_handler.py @@ -51,6 +51,7 @@ class Vllm(GeneratorLLM): sampling_params = SamplingParams( temperature=0.7, max_tokens=512, + gpu_memory_utilization=0.5, stream=True # Enable streaming ) outputs = self.llm.generate(prompt, sampling_params, use_tqdm=False) From 7553d9dbb620bc332eb9137c433be603f204da86 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Wed, 9 Apr 2025 19:03:04 +0200 Subject: [PATCH 6/9] refactor : memory clear section function logic --- server/app.py | 4 +- server/sources/vllm_handler.py | 74 --------------------------------- sources/agents/browser_agent.py | 2 +- sources/browser.py | 2 +- sources/memory.py | 26 ++++++++---- 5 files changed, 21 insertions(+), 87 deletions(-) delete mode 100644 server/sources/vllm_handler.py diff --git a/server/app.py b/server/app.py index 13817b9..946390a 100644 --- a/server/app.py +++ b/server/app.py @@ -6,7 +6,6 @@ from flask import Flask, jsonify, request from sources.llamacpp_handler import LlamacppLLM from sources.ollama_handler import OllamaLLM -from sources.vllm_handler import Vllm parser = argparse.ArgumentParser(description='AgenticSeek server script') parser.add_argument('--provider', type=str, help='LLM backend library to use. set to [ollama], [vllm] or [llamacpp]', required=True) @@ -15,12 +14,11 @@ args = parser.parse_args() app = Flask(__name__) -assert args.provider in ["ollama", "llamacpp", "vllm"], f"Provider {args.provider} does not exists. see --help for more information" +assert args.provider in ["ollama", "llamacpp"], f"Provider {args.provider} does not exists. see --help for more information" handler_map = { "ollama": OllamaLLM(), "llamacpp": LlamacppLLM(), - "vllm": Vllm() } generator = handler_map[args.provider] diff --git a/server/sources/vllm_handler.py b/server/sources/vllm_handler.py deleted file mode 100644 index e9cea14..0000000 --- a/server/sources/vllm_handler.py +++ /dev/null @@ -1,74 +0,0 @@ -from .generator import GeneratorLLM -from vllm import LLM, SamplingParams -import logging -from typing import List, Dict - -class Vllm(GeneratorLLM): - def __init__(self): - """ - Handle generation using vLLM. - """ - super().__init__() - self.logger = logging.getLogger(__name__) - self.llm = None - - def convert_history_to_prompt(self, history: List[Dict[str, str]]) -> str: - """ - Convert OpenAI-format history to a single prompt string for vLLM. - """ - prompt = "" - for message in history: - role = message["role"] - content = message["content"] - if role == "system": - prompt += f"System: {content}\n" - elif role == "user": - prompt += f"User: {content}\n" - elif role == "assistant": - prompt += f"Assistant: {content}\n" - prompt += "Assistant: " - return prompt - - def generate(self, history: List[Dict[str, str]]): - """ - Generate response using vLLM from OpenAI-format message history. - - Args: - history: List of dictionaries in OpenAI format [{"role": "user", "content": "..."}, ...] - """ - self.logger.info(f"Using {self.model} for generation with vLLM") - if self.llm is None: - self.llm = LLM(model=self.model) - - try: - with self.state.lock: - self.state.is_generating = True - self.state.last_complete_sentence = "" - self.state.current_buffer = "" - - prompt = self.convert_history_to_prompt(history) - - sampling_params = SamplingParams( - temperature=0.7, - max_tokens=512, - gpu_memory_utilization=0.5, - stream=True # Enable streaming - ) - outputs = self.llm.generate(prompt, sampling_params, use_tqdm=False) - for output in outputs: - content = output.outputs[0].text - with self.state.lock: - if '.' in content: - self.logger.info(self.state.current_buffer) - self.state.current_buffer += content - with self.state.lock: - self.logger.info(f"Final output: {self.state.current_buffer}") - - except Exception as e: - self.logger.error(f"Error during generation: {str(e)}") - raise e - - finally: - self.logger.info("Generation complete") - with self.state.lock: - self.state.is_generating = False \ No newline at end of file diff --git a/sources/agents/browser_agent.py b/sources/agents/browser_agent.py index 77064eb..d330c3d 100644 --- a/sources/agents/browser_agent.py +++ b/sources/agents/browser_agent.py @@ -357,7 +357,7 @@ class BrowserAgent(Agent): mem_last_idx = self.memory.push('user', prompt) answer, reasoning = self.llm_request() pretty_print(answer, color="output") - self.memory.clear_section(mem_begin_idx, mem_last_idx) + self.memory.clear_section(mem_begin_idx+1, mem_last_idx-1) return answer, reasoning if __name__ == "__main__": diff --git a/sources/browser.py b/sources/browser.py index c3fc2c7..17ab23e 100644 --- a/sources/browser.py +++ b/sources/browser.py @@ -58,7 +58,7 @@ def create_driver(headless=False, stealth_mode=True) -> webdriver.Chrome: chrome_options.add_argument("--disable-gpu") chrome_options.add_argument("--disable-webgl") #ua = UserAgent() - #user_agent = ua.random # NOTE sometime return wrong user agent, investigate + #user_agent = ua.random # NOTE sometime return bad ua crash, investigate #chrome_options.add_argument(f'user-agent={user_agent}') user_data_dir = tempfile.mkdtemp() chrome_options.add_argument(f"--user-data-dir={user_data_dir}") diff --git a/sources/memory.py b/sources/memory.py index ac6ff9b..bc2c796 100644 --- a/sources/memory.py +++ b/sources/memory.py @@ -106,13 +106,23 @@ class Memory(): return curr_idx-1 def clear(self) -> None: + """Clear all memory except system prompt""" self.logger.info("Memory clear performed.") - self.memory = [] + self.memory = self.memory[:1] def clear_section(self, start: int, end: int) -> None: - """Clear a section of the memory.""" - self.logger.info(f"Memory section {start} to {end} cleared.") + """ + Clear a section of the memory. Ignore system message index. + Args: + start (int): Starting bound of the section to clear. + end (int): Ending bound of the section to clear. + """ + self.logger.info(f"Clearing memory section {start} to {end}.") + start = max(0, start) + 1 + end = min(end, len(self.memory)-1) + 2 + self.logger.info(f"Memory before: {self.memory}") self.memory = self.memory[:start] + self.memory[end:] + self.logger.info(f"Memory after: {self.memory}") def get(self) -> list: return self.memory @@ -172,7 +182,10 @@ if __name__ == "__main__": sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) memory = Memory("You are a helpful assistant.", recover_last_session=False, memory_compression=True) - + + memory.push('user', "hello") + memory.push('assistant', "how can i help you?") + memory.push('user', "why do i get this cuda error?") sample_text = """ The error you're encountering: cuda.cu:52:10: fatal error: helper_functions.h: No such file or directory @@ -190,11 +203,8 @@ Use #include "helper_functions.h" for local files. Use the -I flag to specify the directory containing helper_functions.h. Ensure the file exists in the specified location. """ - - memory.push('user', "hello") - memory.push('assistant', "how can i help you?") - memory.push('user', "why do i get this cuda error?") memory.push('assistant', sample_text) + print("\n---\nmemory before:", memory.get()) memory.compress() print("\n---\nmemory after:", memory.get()) From 369850b86dff88b2bcc37bd86f3ce1da9380c014 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Thu, 10 Apr 2025 16:35:07 +0200 Subject: [PATCH 7/9] fix : major issue with file reading tool, feat : prompt improvement for browser agent --- prompts/base/coder_agent.txt | 3 +- prompts/base/file_agent.txt | 13 +++-- prompts/base/planner_agent.txt | 1 + prompts/jarvis/coder_agent.txt | 3 +- prompts/jarvis/file_agent.txt | 12 ++-- prompts/jarvis/planner_agent.txt | 1 + sources/agents/browser_agent.py | 16 +++--- sources/browser.py | 19 ++++++- sources/router.py | 97 ++++++++++++++++++++++---------- sources/tools/fileFinder.py | 37 ++++++------ sources/tools/tools.py | 15 +++++ 11 files changed, 147 insertions(+), 70 deletions(-) diff --git a/prompts/base/coder_agent.txt b/prompts/base/coder_agent.txt index 51ac111..13dd55b 100644 --- a/prompts/base/coder_agent.txt +++ b/prompts/base/coder_agent.txt @@ -46,9 +46,8 @@ Some rules: - Always provide a short sentence above the code for what it does, even for a hello world. - Be efficient, no need to explain your code, unless asked. - You do not ever need to use bash to execute code. -- Do not ever use user input, input are not supported by the system. - Do not ever tell user how to run it. user know it. -- For simple explanation you don't need to code. +- In python do not use if __name__ == "__main__" - If using gui, make sure echap or exit button close the program - No lazyness, write and rewrite full code every time - If query is unclear say REQUEST_CLARIFICATION \ No newline at end of file diff --git a/prompts/base/file_agent.txt b/prompts/base/file_agent.txt index eb3a746..2e802b7 100644 --- a/prompts/base/file_agent.txt +++ b/prompts/base/file_agent.txt @@ -25,14 +25,15 @@ The file_finder tool is used to locate files on the user’s system. It is a sep To use the file_finder tool, use this syntax: ```file_finder -toto.py +name=toto.py ``` This will return the path of the file toto.py and other informations. Find file and read file: -```file_finder:read -toto.py +```file_finder +action=read +name=toto.py ``` This will return the content of the file toto.py. @@ -44,13 +45,15 @@ rules: - Do not ever use editor such as vim or nano. - Make sure to always cd your work folder before executing commands, like cd && - only use file name with file_finder, not path +- remember to use :read when reading file. Example Interaction User: "I need to find the file config.txt and read its contents." Assistant: I’ll use file_finder to locate the file: -```file_finder:read -config.txt +```file_finder +action=read +name=config.txt ``` diff --git a/prompts/base/planner_agent.txt b/prompts/base/planner_agent.txt index 89928f1..b32e6d7 100644 --- a/prompts/base/planner_agent.txt +++ b/prompts/base/planner_agent.txt @@ -72,6 +72,7 @@ Rules: - Think about how the main.py will import the class from other coding agents. - Coding agent should use a class based approach. - One coding agent should work on one file at a time. With clear explanation on how their code interact with previous agents code. +- The file agent can only conduct one action at the time. successive file agent could be needed. - Tell agent to execute without question. - Only use web agent for finding necessary informations. - If a task might require user email (eg: api services), do not write plan instead ask for user email. diff --git a/prompts/jarvis/coder_agent.txt b/prompts/jarvis/coder_agent.txt index 5a441bc..c862261 100644 --- a/prompts/jarvis/coder_agent.txt +++ b/prompts/jarvis/coder_agent.txt @@ -45,9 +45,8 @@ Some rules: - Always provide a short sentence above the code for what it does, even for a hello world. - Be efficient, no need to explain your code, unless asked. - You do not ever need to use bash to execute code. -- Do not ever use user input, input are not supported by the system. - Do not ever tell user how to run it. user know it. -- For simple explanation you don't need to code. +- In python do not use if __name__ == "__main__" - If using gui, make sure echap close the program - No lazyness, write and rewrite full code every time - If query is unclear say REQUEST_CLARIFICATION diff --git a/prompts/jarvis/file_agent.txt b/prompts/jarvis/file_agent.txt index e6373d4..34a0301 100644 --- a/prompts/jarvis/file_agent.txt +++ b/prompts/jarvis/file_agent.txt @@ -35,14 +35,15 @@ The file_finder tool is used to locate files on the user’s system. It is a sep To use the file_finder tool, use this syntax: ```file_finder -toto.py +name=toto.py ``` This will return the path of the file toto.py and other informations. Find file and read file: -```file_finder:read -toto.py +```file_finder +action=read +name=toto.py ``` This will return the content of the file toto.py. @@ -60,8 +61,9 @@ User: "I need to find the file config.txt and read its contents." Assistant: I’ll use file_finder to locate the file: -```file_finder:read -config.txt +```file_finder +action=read +name=config.txt ``` Personality: diff --git a/prompts/jarvis/planner_agent.txt b/prompts/jarvis/planner_agent.txt index 89928f1..b32e6d7 100644 --- a/prompts/jarvis/planner_agent.txt +++ b/prompts/jarvis/planner_agent.txt @@ -72,6 +72,7 @@ Rules: - Think about how the main.py will import the class from other coding agents. - Coding agent should use a class based approach. - One coding agent should work on one file at a time. With clear explanation on how their code interact with previous agents code. +- The file agent can only conduct one action at the time. successive file agent could be needed. - Tell agent to execute without question. - Only use web agent for finding necessary informations. - If a task might require user email (eg: api services), do not write plan instead ask for user email. diff --git a/sources/agents/browser_agent.py b/sources/agents/browser_agent.py index d330c3d..604fed6 100644 --- a/sources/agents/browser_agent.py +++ b/sources/agents/browser_agent.py @@ -112,7 +112,6 @@ class BrowserAgent(Agent): 1. **Decide if the page answers the user’s query:** - If it does, take notes of useful information (Note: ...), include relevant link in note, then move to a new page. - - If it does and no futher step would help with user request, say {Action.REQUEST_EXIT}. - If it doesn’t, say: Error: then go back or navigate to another link. 2. **Navigate to a link by either: ** - Saying I will navigate to (write down the full URL) www.example.com/cats @@ -122,6 +121,12 @@ class BrowserAgent(Agent): - Use Login if username/password specified by user. For quick task create account, remember password in a note. - You can fill a form using [form_name](value). Don't {Action.GO_BACK.value} when filling form. - If a form is irrelevant or you lack informations (eg: don't know user email) leave it empty. + 4. **Decide if you completed the task** + - Check your notes. Do they fully answer the question? Did you verify with multiple pages? + - Are you sure it’s correct? + - If yes to all, say {Action.REQUEST_EXIT}. + - If no, or a page lacks info, go to another link. + - Never stop or ask the user for help. **Rules:** - Do not write "The page talk about ...", write your finding on the page and how they contribute to an answer. @@ -144,9 +149,9 @@ class BrowserAgent(Agent): Error: x.com does not discuss anything related to the user’s query and no navigation link are usefull. Action: {Action.GO_BACK.value} - Example 3 (query answer found, enought notes taken): - Note: I found on website www.example.com that ...... - Given this answer the user query I should exit the web browser. + Example 3 (clear definitive query answer found or enought notes taken): + Note: I took 10 notes so far with enought finding to answer user question. + Therefore I should exit the web browser. Action: {Action.REQUEST_EXIT.value} Example 4 (loging form visible): @@ -161,9 +166,6 @@ class BrowserAgent(Agent): You previously took these notes: {notes} Do not Step-by-Step explanation. Write Notes or Error as a long paragraph followed by your action. - If you conduct research do not exit until you have several notes. - Do not ever ask the user to conduct a task, do not ever exit expecting user intervention. - You should be Investigative, Curious and Skeptical. """ def llm_decide(self, prompt: str, show_reasoning: bool = False) -> Tuple[str, str]: diff --git a/sources/browser.py b/sources/browser.py index 17ab23e..98faa80 100644 --- a/sources/browser.py +++ b/sources/browser.py @@ -67,7 +67,9 @@ def create_driver(headless=False, stealth_mode=True) -> webdriver.Chrome: chrome_options.add_argument("--autoplay-policy=user-gesture-required") chrome_options.add_argument("--mute-audio") chrome_options.add_argument("--disable-notifications") - chrome_options.add_argument('--window-size=1080,560') + resolutions = [(1920, 1080), (1366, 768), (1440, 900)] + width, height = random.choice(resolutions) + chrome_options.add_argument(f'--window-size={width},{height}') if not stealth_mode: # crx file can't be installed in stealth mode crx_path = "./crx/nopecha.crx" @@ -85,6 +87,15 @@ def create_driver(headless=False, stealth_mode=True) -> webdriver.Chrome: service = Service(chromedriver_path) if stealth_mode: driver = uc.Chrome(service=service, options=chrome_options) + driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { + "source": """ + Object.defineProperty(navigator, 'hardwareConcurrency', { get: () => Math.floor(Math.random() * 8) + 2 }); + Object.defineProperty(navigator, 'deviceMemory', { get: () => Math.floor(Math.random() * 8) + 2 }); + """ + }) + chrome_version = driver.capabilities['browserVersion'] + user_agent = f"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{chrome_version} Safari/537.36" + chrome_options.add_argument(f'user-agent={user_agent}') stealth(driver, languages=["en-US", "en"], vendor="Google Inc.", @@ -125,6 +136,7 @@ class Browser: def go_to(self, url:str) -> bool: """Navigate to a specified URL.""" + time.sleep(random.uniform(0.4, 2.5)) # more human behavior try: initial_handles = self.driver.window_handles self.driver.get(url) @@ -465,9 +477,10 @@ if __name__ == "__main__": #browser.go_to("https://practicetestautomation.com/practice-test-login/") time.sleep(10) print("AntiCaptcha / Form Test") - browser.go_to("https://www.google.com/recaptcha/api2/demo") + #browser.go_to("https://www.google.com/recaptcha/api2/demo") + browser.go_to("https://auth.leboncoin.fr/login") inputs = browser.get_form_inputs() - #inputs = ['[input1](Martin)', f'[input2](Test)', '[input3](test@gmail.com)'] + inputs = ['[input1](Martin)', f'[input2](Test)', '[input3](test@gmail.com)'] browser.fill_form_inputs(inputs) browser.find_and_click_submission() time.sleep(10) diff --git a/sources/router.py b/sources/router.py index 008bec7..b16b616 100644 --- a/sources/router.py +++ b/sources/router.py @@ -1,6 +1,7 @@ import os import sys import torch +import random from typing import List, Tuple, Type, Dict from transformers import pipeline @@ -70,12 +71,30 @@ class AgentRouter: Use the build in add_examples method of the Adaptive_classifier. """ few_shots = [ - ("can you find api and build a python web app with it ?", "HIGH"), - ("can you lookup for api that track flight and build a web flight tracking app", "HIGH"), + ("hi", "LOW"), + ("How it's going ?", "LOW"), + ("What’s the weather like today?", "LOW"), + ("Can you find a file named ‘notes.txt’ in my Documents folder?", "LOW"), + ("Write a Python script to generate a random password", "LOW"), + ("Debug this JavaScript code that’s not running properly", "LOW"), + ("Search the web for the cheapest laptop under $500", "LOW"), + ("Locate a file called ‘report_2024.pdf’ on my drive", "LOW"), + ("Check if a folder named ‘Backups’ exists on my system", "LOW"), + ("Can you find ‘family_vacation.mp4’ in my Videos folder?", "LOW"), + ("Search my drive for a file named ‘todo_list.xlsx’", "LOW"), + ("Write a Python function to check if a string is a palindrome", "LOW"), + ("Can you search the web for startups in Berlin?", "LOW"), + ("Find recent articles on blockchain technology online", "LOW"), + ("Check if ‘Personal_Projects’ folder exists on my desktop", "LOW"), + ("Create a bash script to list all running processes", "LOW"), + ("Debug this Python script that’s crashing on line 10", "LOW"), + ("Browse the web to find out who invented Python", "LOW"), + ("Locate a file named ‘shopping_list.txt’ on my system", "LOW"), + ("Search the web for tips on staying productive", "LOW"), + ("Find ‘sales_pitch.pptx’ in my Downloads folder", "LOW"), ("can you find a file called resume.docx on my drive?", "LOW"), ("can you write a python script to check if the device on my network is connected to the internet", "LOW"), ("can you debug this Java code? It’s not working.", "LOW"), - ("can you browse the web and find me a 4090 for cheap?", "LOW"), ("can you find the old_project.zip file somewhere on my drive?", "LOW"), ("can you locate the backup folder I created last month on my system?", "LOW"), ("could you check if the presentation.pdf file exists in my downloads?", "LOW"), @@ -91,52 +110,71 @@ class AgentRouter: ("find the file ‘important_notes.txt’", "LOW"), ("search the web for the best ways to learn a new language", "LOW"), ("locate the file ‘presentation.pptx’ in my Documents folder", "LOW"), - ("Make a 3d game in javascript using three.js", "HIGH"), - ("Create a whole web app in python using the flask framework that query news API", "HIGH"), - ("Find the latest research papers on AI and build a web app that display them", "HIGH"), - ("Create a bash script that monitor the CPU usage and send an email if it's too high", "HIGH"), + ("Make a 3d game in javascript using three.js", "LOW"), + ("Find the latest research papers on AI and build save in a file", "HIGH"), ("Make a web server in go that serve a simple html page", "LOW"), - ("Make a web server in go that query a weather API and display the weather", "HIGH"), - ("Make a web search for latest news on the stock market and display them", "HIGH"), - ("Search the web for latest ai papers", "LOW"), - ("Write a Python script to calculate the factorial of a number", "LOW"), - ("Can you find a weather API and build a Python app to display current weather", "HIGH"), ("Search the web for the cheapest 4K monitor and provide a link", "LOW"), - ("Create a Python web app using Flask to track cryptocurrency prices from an API", "HIGH"), ("Write a JavaScript function to reverse a string", "LOW"), ("Can you locate a file called ‘budget_2025.xlsx’ on my system?", "LOW"), ("Search the web for recent articles on space exploration", "LOW"), - ("Find a public API for movie data and build a web app to display movie ratings", "HIGH"), - ("Write a bash script to list all files in a directory", "LOW"), ("when is the exam period for master student in france?", "LOW"), ("Check if a folder named ‘Photos_2024’ exists on my desktop", "LOW"), + ("Can you look up some nice knitting patterns on that web thingy?", "LOW"), + ("Goodness, check if my ‘Photos_Grandkids’ folder is still on the desktop", "LOW"), ("Create a Python script to rename all files in a folder based on their creation date", "LOW"), - ("Search the web for tutorials on machine learning and build a simple ML model in Python", "HIGH"), - ("Debug this Python code that’s throwing an error", "LOW"), ("Can you find a file named ‘meeting_notes.txt’ in my Downloads folder?", "LOW"), - ("Create a JavaScript game using Phaser.js with multiple levels", "HIGH"), ("Write a Go program to check if a port is open on a network", "LOW"), ("Search the web for the latest electric car reviews", "LOW"), - ("Find a public API for book data and create a Flask app to list bestsellers", "HIGH"), ("Write a Python function to merge two sorted lists", "LOW"), - ("Organize my desktop files by extension and then write a script to list them", "HIGH"), ("Create a bash script to monitor disk space and alert via text file", "LOW"), - ("can you find vitess repo, clone it and install by following the readme", "HIGH"), + ("What’s out there on the web about cheap travel spots?", "LOW"), ("Search X for posts about AI ethics and summarize them", "LOW"), - ("Can you follow the readme and install the project", "HIGH"), - ("Find the latest research on renewable energy and build a web app to display it", "HIGH"), - ("Write a C program to sort an array of integers", "LOW"), - ("Create a Node.js server that queries a public API for traffic data and displays it", "HIGH"), ("Check if a file named ‘project_proposal.pdf’ exists in my Documents", "LOW"), ("Search the web for tips on improving coding skills", "LOW"), ("Write a Python script to count words in a text file", "LOW"), ("Search the web for restaurant", "LOW"), - ("Find a public API for sports scores and build a web app to show live updates", "HIGH"), ("Create a simple HTML page with CSS styling", "LOW"), - ("hi", "LOW"), - ("Bonjour", "LOW"), - ("What's up ?", "LOW"), + ("Use file.txt and then use it to ...", "HIGH"), + ("Yo, what’s good? Find my ‘mixtape.mp3’ real quick", "LOW"), + ("Can you follow the readme and install the project", "HIGH"), + ("Man, write me a dope Python script to flex some random numbers", "LOW"), + ("Search the web for peer-reviewed articles on gene editing", "LOW"), + ("Locate ‘meeting_notes.docx’ in Downloads, I’m late for this call", "LOW"), + ("Write a Python script to list all .pdf files in my Documents", "LOW"), + ("Write a Python thing to sort my .jpg files by date", "LOW"), + ("Find ‘gallery_list.pdf’, then build a web app to show my pics", "HIGH"), + ("Find ‘budget_2025.xlsx’, analyze it, and make a chart for my boss", "HIGH"), + ("Retrieve the latest publications on CRISPR and develop a web application to display them", "HIGH"), + ("Bro dig up a music API and build me a tight app for the hottest tracks", "HIGH"), + ("Find a public API for sports scores and build a web app to show live updates", "HIGH"), + ("Find a public API for book data and create a Flask app to list bestsellers", "HIGH"), + ("Organize my desktop files by extension and then write a script to list them", "HIGH"), + ("Find the latest research on renewable energy and build a web app to display it", "HIGH"), + ("can you find vitess repo, clone it and install by following the readme", "HIGH"), + ("Create a JavaScript game using Phaser.js with multiple levels", "HIGH"), + ("Use my research_note.txt file, double check the informations on the web", "HIGH"), + ("Make a web server in go that query a flight API and display them in a app", "HIGH"), + ("can you lookup for api that track flight and build a web flight tracking app", "HIGH"), + ("Find the file toto.pdf then use its content to reply to Jojo on superforum.com", "HIGH"), + ("Create a whole web app in python using the flask framework that query news API", "HIGH"), + ("Create a bash script that monitor the CPU usage and send an email if it's too high", "HIGH"), + ("Make a web search for latest news on the stock market and display them with python", "HIGH"), + ("Find my resume file, apply to job that might fit online", "HIGH"), + ("Can you find a weather API and build a Python app to display current weather", "HIGH"), + ("Create a Python web app using Flask to track cryptocurrency prices from an API", "HIGH"), + ("Search the web for tutorials on machine learning and build a simple ML model in Python", "HIGH"), + ("Find a public API for movie data and build a web app to display movie ratings", "HIGH"), + ("Create a Node.js server that queries a public API for traffic data and displays it", "HIGH"), + ("can you find api and build a python web app with it ?", "HIGH"), + ("Find a public API for recipe data and build a web app to display recipes", "HIGH"), + ("Search the web for recent space mission updates and build a Flask app", "HIGH"), + ("Create a Python script to scrape a website and save data to a database", "HIGH"), + ("Find a public API for fitness tracking and build a web app to show stats", "HIGH"), + ("Search the web for tutorials on web development and build a sample site", "HIGH"), + ("Create a Node.js app to query a public API for event listings and display them", "HIGH"), + ("Find a file named ‘budget.xlsx’, analyze its data, and generate a chart", "HIGH"), ] + random.shuffle(few_shots) texts = [text for text, _ in few_shots] labels = [label for _, label in few_shots] self.complexity_classifier.add_examples(texts, labels) @@ -287,6 +325,7 @@ class AgentRouter: ("hi", "talk"), ("hello", "talk"), ] + random.shuffle(few_shots) texts = [text for text, _ in few_shots] labels = [label for _, label in few_shots] self.talk_classifier.add_examples(texts, labels) diff --git a/sources/tools/fileFinder.py b/sources/tools/fileFinder.py index 000d29b..29e1494 100644 --- a/sources/tools/fileFinder.py +++ b/sources/tools/fileFinder.py @@ -62,11 +62,13 @@ class FileFinder(Tools): file_path = None excluded_files = [".pyc", ".o", ".so", ".a", ".lib", ".dll", ".dylib", ".so", ".git"] for root, dirs, files in os.walk(directory_path): - for file in files: - if any(excluded_file in file for excluded_file in excluded_files): + for f in files: + if f is None: continue - if filename.strip() in file.strip(): - file_path = os.path.join(root, file) + if any(excluded_file in f for excluded_file in excluded_files): + continue + if filename.strip() in f.strip(): + file_path = os.path.join(root, f) return file_path return None @@ -82,27 +84,25 @@ class FileFinder(Tools): if not blocks or not isinstance(blocks, list): return "Error: No valid filenames provided" - results = [] + output = "" for block in blocks: - filename = block.split(":")[0] + filename = self.get_parameter_value(block, "name") + action = self.get_parameter_value(block, "action") + if filename is None: + output = "Error: No filename provided\n" + return output + if action is None: + action = "info" file_path = self.recursive_search(self.work_dir, filename) if file_path is None: - results.append({"filename": filename, "error": "File not found"}) + output = f"File: {filename} - not found\n" continue - if len(block.split(":")) > 1: - action = block.split(":")[1] - else: - action = "info" result = self.get_file_info(file_path) - results.append(result) - - output = "" - for result in results: if "error" in result: output += f"File: {result['filename']} - {result['error']}\n" else: if action == "read": - output += result['read'] + output += "Content:\n" + result['read'] + "\n" else: output += (f"File: {result['filename']}, " f"found at {result['path']}, " @@ -144,7 +144,10 @@ class FileFinder(Tools): if __name__ == "__main__": tool = FileFinder() - result = tool.execute(["toto.txt"], False) + result = tool.execute([""" +action=read +name=tools.py +"""], False) print("Execution result:") print(result) print("\nFailure check:", tool.execution_failure_check(result)) diff --git a/sources/tools/tools.py b/sources/tools/tools.py index 198211f..27a2069 100644 --- a/sources/tools/tools.py +++ b/sources/tools/tools.py @@ -127,6 +127,21 @@ class Tools(): with open(os.path.join(directory, save_path_file), 'w') as f: f.write(block) + def get_parameter_value(self, block: str, parameter_name: str) -> str: + """ + Get a parameter name. + Args: + block (str): The block of text to search for the parameter + parameter_name (str): The name of the parameter to retrieve + Returns: + str: The value of the parameter + """ + for param_line in block.split('\n'): + if parameter_name in param_line: + param_value = param_line.split('=')[1].strip() + return param_value + return None + def found_executable_blocks(self): """ Check if executable blocks were found. From 4d5a532b231b869ef3266e784b2480a975552a83 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Thu, 10 Apr 2025 16:39:50 +0200 Subject: [PATCH 8/9] mistalke in prompt --- prompts/base/file_agent.txt | 1 - sources/router.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/prompts/base/file_agent.txt b/prompts/base/file_agent.txt index 2e802b7..0da3c78 100644 --- a/prompts/base/file_agent.txt +++ b/prompts/base/file_agent.txt @@ -45,7 +45,6 @@ rules: - Do not ever use editor such as vim or nano. - Make sure to always cd your work folder before executing commands, like cd && - only use file name with file_finder, not path -- remember to use :read when reading file. Example Interaction User: "I need to find the file config.txt and read its contents." diff --git a/sources/router.py b/sources/router.py index b16b616..242fdf9 100644 --- a/sources/router.py +++ b/sources/router.py @@ -356,7 +356,7 @@ class AgentRouter: llm_router, confidence_llm_router = result_llm_router[0], result_llm_router[1] final_score_bart = confidence_bart / (confidence_bart + confidence_llm_router) final_score_llm = confidence_llm_router / (confidence_bart + confidence_llm_router) - self.logger.info(f"Routing Vote: BART: {bart} ({final_score_bart}) LLM-router: {llm_router} ({final_score_llm})") + self.logger.info(f"Routing Vote for text {text}: BART: {bart} ({final_score_bart}) LLM-router: {llm_router} ({final_score_llm})") if log_confidence: pretty_print(f"Agent choice -> BART: {bart} ({final_score_bart}) LLM-router: {llm_router} ({final_score_llm})") return bart if final_score_bart > final_score_llm else llm_router From e66f535dd356f84f1421cf86f9258268303aaf6b Mon Sep 17 00:00:00 2001 From: martin legrand Date: Thu, 10 Apr 2025 16:43:05 +0200 Subject: [PATCH 9/9] feat : more router few shots --- sources/router.py | 1 + tests/test_memory.py | 93 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 tests/test_memory.py diff --git a/sources/router.py b/sources/router.py index 242fdf9..5ad5a34 100644 --- a/sources/router.py +++ b/sources/router.py @@ -142,6 +142,7 @@ class AgentRouter: ("Locate ‘meeting_notes.docx’ in Downloads, I’m late for this call", "LOW"), ("Write a Python script to list all .pdf files in my Documents", "LOW"), ("Write a Python thing to sort my .jpg files by date", "LOW"), + ("make a snake game please", "LOW"), ("Find ‘gallery_list.pdf’, then build a web app to show my pics", "HIGH"), ("Find ‘budget_2025.xlsx’, analyze it, and make a chart for my boss", "HIGH"), ("Retrieve the latest publications on CRISPR and develop a web application to display them", "HIGH"), diff --git a/tests/test_memory.py b/tests/test_memory.py new file mode 100644 index 0000000..eb64d44 --- /dev/null +++ b/tests/test_memory.py @@ -0,0 +1,93 @@ +import unittest +import os +import sys +import json +import datetime + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) # Add project root to Python path +from sources.memory import Memory + +class TestMemory(unittest.TestCase): + def setUp(self): + self.system_prompt = "Test system prompt" + self.memory = Memory( + system_prompt=self.system_prompt, + recover_last_session=False, + memory_compression=False + ) + + def tearDown(self): + if os.path.exists("conversations"): + for root, dirs, files in os.walk("conversations", topdown=False): + for name in files: + os.remove(os.path.join(root, name)) + for name in dirs: + os.rmdir(os.path.join(root, name)) + os.rmdir("conversations") + + def test_initialization(self): + self.assertEqual(len(self.memory.memory), 1) + self.assertEqual(self.memory.memory[0]['role'], 'system') + self.assertEqual(self.memory.memory[0]['content'], self.system_prompt) + self.assertIsNotNone(self.memory.session_id) + self.assertIsInstance(self.memory.session_time, datetime.datetime) + + def test_get_filename(self): + filename = self.memory.get_filename() + self.assertTrue(filename.startswith("memory_")) + self.assertTrue(filename.endswith(".txt")) + self.assertIn(self.memory.session_time.strftime('%Y-%m-%d'), filename) + + def test_save_memory(self): + self.memory.save_memory() + save_path = os.path.join(self.memory.conversation_folder, "casual_agent") + self.assertTrue(os.path.exists(save_path)) + filename = self.memory.get_filename() + self.assertTrue(os.path.exists(os.path.join(save_path, filename))) + + def test_push(self): + index = self.memory.push("user", "Hello") + self.assertEqual(index, 0) + self.assertEqual(len(self.memory.memory), 2) + self.assertEqual(self.memory.memory[1]['role'], "user") + self.assertEqual(self.memory.memory[1]['content'], "Hello") + + def test_clear(self): + self.memory.push("user", "Hello") + self.memory.clear() + self.assertEqual(len(self.memory.memory), 1) # doesn't clear sys message + + def test_clear_section(self): + self.memory.clear() + mem_begin_idx = self.memory.push("user", "Hi i want you to make...") + self.memory.push("assistant", "") + self.memory.push("user", "sys feedback: error") + self.memory.push("assistant", "") + mem_end_idx = self.memory.push("user", "according to search...") + self.memory.clear_section(mem_begin_idx+1, mem_end_idx-1) + self.assertEqual(len(self.memory.memory), 3) # 3 msg with sys msg + self.assertEqual(self.memory.memory[0]['role'], "system") + + def test_get(self): + self.memory.push("user", "Hello") + memory_content = self.memory.get() + self.assertEqual(len(memory_content), 2) + + def test_reset(self): + self.memory.push("user", "Hello") + new_memory = [{"role": "system", "content": "New prompt"}] + self.memory.reset(new_memory) + self.assertEqual(self.memory.memory, new_memory) + + def test_save_and_load_memory(self): + self.memory.push("user", "Hello") + self.memory.push("assistant", "Hi") + self.memory.save_memory() + + new_memory = Memory(self.system_prompt, recover_last_session=True) + new_memory.load_memory() + self.assertEqual(len(new_memory.memory), 3) # System + messages + self.assertEqual(new_memory.memory[1]['content'], "Hello") + +if __name__ == '__main__': + unittest.main() \ No newline at end of file