From 60795111b015a9a362c9a39599ca1e2c03c962cd Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 8 Apr 2025 13:35:21 +0200 Subject: [PATCH] feat: action enum for browser agent --- prompts/base/planner_agent.txt | 1 + sources/agents/browser_agent.py | 11 ++++++----- sources/router.py | 1 + sources/tools/BashInterpreter.py | 2 +- sources/tools/tools.py | 4 ++++ 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/prompts/base/planner_agent.txt b/prompts/base/planner_agent.txt index 1cf230b..89928f1 100644 --- a/prompts/base/planner_agent.txt +++ b/prompts/base/planner_agent.txt @@ -74,4 +74,5 @@ Rules: - One coding agent should work on one file at a time. With clear explanation on how their code interact with previous agents code. - Tell agent to execute without question. - Only use web agent for finding necessary informations. +- If a task might require user email (eg: api services), do not write plan instead ask for user email. - Do not search for tutorial. \ No newline at end of file diff --git a/sources/agents/browser_agent.py b/sources/agents/browser_agent.py index 308332d..0e5a687 100644 --- a/sources/agents/browser_agent.py +++ b/sources/agents/browser_agent.py @@ -115,12 +115,13 @@ class BrowserAgent(Agent): - If it does and you completed user request, say {Action.REQUEST_EXIT}. - If it doesn’t, say: Error: then go back or navigate to another link. 2. **Navigate to a link by either: ** - - Saying I will navigate to : (write down the full URL, e.g., www.example.com/cats). + - Saying I will navigate to (write down the full URL) www.example.com/cats - Going back: If no link seems helpful, say: {Action.GO_BACK.value}. 3. **Fill forms on the page:** - - Fill form only on relevant page with given informations. You might use form to conduct search on a page. + - Fill form only when relevant. + - Use Login if username/password specified by user. For quick task create account, remember password in a note. - You can fill a form using [form_name](value). Don't {Action.GO_BACK.value} when filling form. - - If a form is irrelevant or you lack informations leave it empty. + - If a form is irrelevant or you lack informations (eg: don't know user email) leave it empty. **Rules:** - Do not write "The page talk about ...", write your finding on the page and how they contribute to an answer. @@ -161,7 +162,7 @@ class BrowserAgent(Agent): {notes} Do not Step-by-Step explanation. Write Notes or Error as a long paragraph followed by your action. You might {Action.REQUEST_EXIT.value} if no more link are useful. - Do not navigate to AI tools or search engine. Only navigate to tool if asked. + If you conduct research do not exit until you have several notes. """ def llm_decide(self, prompt: str, show_reasoning: bool = False) -> Tuple[str, str]: @@ -329,7 +330,6 @@ class BrowserAgent(Agent): links = self.parse_answer(answer) link = self.select_link(links) - self.search_history.append(link) if Action.REQUEST_EXIT.value in answer: pretty_print(f"Agent requested exit.", color="status") @@ -345,6 +345,7 @@ class BrowserAgent(Agent): animate_thinking(f"Navigating to {link}", color="status") if speech_module: speech_module.speak(f"Navigating to {link}") self.browser.go_to(link) + self.search_history.append(link) self.current_page = link page_text = self.browser.get_text() self.navigable_links = self.browser.get_navigable() diff --git a/sources/router.py b/sources/router.py index 6b0f011..cab9e8b 100644 --- a/sources/router.py +++ b/sources/router.py @@ -265,6 +265,7 @@ class AgentRouter: ("Check if ‘photo_backup.zip’ exists on my drive", "files"), ("Write a Python script to rename files with a timestamp", "code"), ("What’s your favorite thing about space?", "talk"), + ("search for GPU with at least 24gb vram", "web"), ("Browse the web for the latest fitness trends", "web"), ("Move all .docx files to a ‘Work’ folder", "files"), ("I would like to make a new project called 'new_project'", "files"), diff --git a/sources/tools/BashInterpreter.py b/sources/tools/BashInterpreter.py index 6e9f224..584b017 100644 --- a/sources/tools/BashInterpreter.py +++ b/sources/tools/BashInterpreter.py @@ -44,7 +44,7 @@ class BashInterpreter(Tools): command = command.replace('\n', '') if self.safe_mode and is_unsafe(commands): return "Unsafe command detected, execution aborted." - if self.language_bash_attempt(command): + if self.language_bash_attempt(command) and allow_language_exec_bash == False: continue try: process = subprocess.Popen( diff --git a/sources/tools/tools.py b/sources/tools/tools.py index dac890a..198211f 100644 --- a/sources/tools/tools.py +++ b/sources/tools/tools.py @@ -37,10 +37,14 @@ class Tools(): self.work_dir = self.create_work_dir() self.excutable_blocks_found = False self.safe_mode = True + self.allow_language_exec_bash = False def get_work_dir(self): return self.work_dir + def set_allow_language_exec_bash(value: bool) -> None: + self.allow_language_exec_bash = value + def check_config_dir_validity(self): """Check if the config directory is valid.""" path = self.config['MAIN']['work_dir']