feat: action enum for browser agent

This commit is contained in:
martin legrand 2025-04-08 13:35:21 +02:00
parent 469551c2b5
commit 60795111b0
5 changed files with 13 additions and 6 deletions

View File

@ -74,4 +74,5 @@ Rules:
- One coding agent should work on one file at a time. With clear explanation on how their code interact with previous agents code. - One coding agent should work on one file at a time. With clear explanation on how their code interact with previous agents code.
- Tell agent to execute without question. - Tell agent to execute without question.
- Only use web agent for finding necessary informations. - Only use web agent for finding necessary informations.
- If a task might require user email (eg: api services), do not write plan instead ask for user email.
- Do not search for tutorial. - Do not search for tutorial.

View File

@ -115,12 +115,13 @@ class BrowserAgent(Agent):
- If it does and you completed user request, say {Action.REQUEST_EXIT}. - If it does and you completed user request, say {Action.REQUEST_EXIT}.
- If it doesnt, say: Error: <why page don't help> then go back or navigate to another link. - If it doesnt, say: Error: <why page don't help> then go back or navigate to another link.
2. **Navigate to a link by either: ** 2. **Navigate to a link by either: **
- Saying I will navigate to <url>: (write down the full URL, e.g., www.example.com/cats). - Saying I will navigate to (write down the full URL) www.example.com/cats
- Going back: If no link seems helpful, say: {Action.GO_BACK.value}. - Going back: If no link seems helpful, say: {Action.GO_BACK.value}.
3. **Fill forms on the page:** 3. **Fill forms on the page:**
- Fill form only on relevant page with given informations. You might use form to conduct search on a page. - Fill form only when relevant.
- Use Login if username/password specified by user. For quick task create account, remember password in a note.
- You can fill a form using [form_name](value). Don't {Action.GO_BACK.value} when filling form. - You can fill a form using [form_name](value). Don't {Action.GO_BACK.value} when filling form.
- If a form is irrelevant or you lack informations leave it empty. - If a form is irrelevant or you lack informations (eg: don't know user email) leave it empty.
**Rules:** **Rules:**
- Do not write "The page talk about ...", write your finding on the page and how they contribute to an answer. - Do not write "The page talk about ...", write your finding on the page and how they contribute to an answer.
@ -161,7 +162,7 @@ class BrowserAgent(Agent):
{notes} {notes}
Do not Step-by-Step explanation. Write Notes or Error as a long paragraph followed by your action. Do not Step-by-Step explanation. Write Notes or Error as a long paragraph followed by your action.
You might {Action.REQUEST_EXIT.value} if no more link are useful. You might {Action.REQUEST_EXIT.value} if no more link are useful.
Do not navigate to AI tools or search engine. Only navigate to tool if asked. If you conduct research do not exit until you have several notes.
""" """
def llm_decide(self, prompt: str, show_reasoning: bool = False) -> Tuple[str, str]: def llm_decide(self, prompt: str, show_reasoning: bool = False) -> Tuple[str, str]:
@ -329,7 +330,6 @@ class BrowserAgent(Agent):
links = self.parse_answer(answer) links = self.parse_answer(answer)
link = self.select_link(links) link = self.select_link(links)
self.search_history.append(link)
if Action.REQUEST_EXIT.value in answer: if Action.REQUEST_EXIT.value in answer:
pretty_print(f"Agent requested exit.", color="status") pretty_print(f"Agent requested exit.", color="status")
@ -345,6 +345,7 @@ class BrowserAgent(Agent):
animate_thinking(f"Navigating to {link}", color="status") animate_thinking(f"Navigating to {link}", color="status")
if speech_module: speech_module.speak(f"Navigating to {link}") if speech_module: speech_module.speak(f"Navigating to {link}")
self.browser.go_to(link) self.browser.go_to(link)
self.search_history.append(link)
self.current_page = link self.current_page = link
page_text = self.browser.get_text() page_text = self.browser.get_text()
self.navigable_links = self.browser.get_navigable() self.navigable_links = self.browser.get_navigable()

View File

@ -265,6 +265,7 @@ class AgentRouter:
("Check if photo_backup.zip exists on my drive", "files"), ("Check if photo_backup.zip exists on my drive", "files"),
("Write a Python script to rename files with a timestamp", "code"), ("Write a Python script to rename files with a timestamp", "code"),
("Whats your favorite thing about space?", "talk"), ("Whats your favorite thing about space?", "talk"),
("search for GPU with at least 24gb vram", "web"),
("Browse the web for the latest fitness trends", "web"), ("Browse the web for the latest fitness trends", "web"),
("Move all .docx files to a Work folder", "files"), ("Move all .docx files to a Work folder", "files"),
("I would like to make a new project called 'new_project'", "files"), ("I would like to make a new project called 'new_project'", "files"),

View File

@ -44,7 +44,7 @@ class BashInterpreter(Tools):
command = command.replace('\n', '') command = command.replace('\n', '')
if self.safe_mode and is_unsafe(commands): if self.safe_mode and is_unsafe(commands):
return "Unsafe command detected, execution aborted." return "Unsafe command detected, execution aborted."
if self.language_bash_attempt(command): if self.language_bash_attempt(command) and allow_language_exec_bash == False:
continue continue
try: try:
process = subprocess.Popen( process = subprocess.Popen(

View File

@ -37,10 +37,14 @@ class Tools():
self.work_dir = self.create_work_dir() self.work_dir = self.create_work_dir()
self.excutable_blocks_found = False self.excutable_blocks_found = False
self.safe_mode = True self.safe_mode = True
self.allow_language_exec_bash = False
def get_work_dir(self): def get_work_dir(self):
return self.work_dir return self.work_dir
def set_allow_language_exec_bash(value: bool) -> None:
self.allow_language_exec_bash = value
def check_config_dir_validity(self): def check_config_dir_validity(self):
"""Check if the config directory is valid.""" """Check if the config directory is valid."""
path = self.config['MAIN']['work_dir'] path = self.config['MAIN']['work_dir']