feat : improve browser agent code structure

This commit is contained in:
martin legrand 2025-04-08 10:34:39 +02:00
parent d3e95712fd
commit 6eafeb15a4

View File

@ -1,13 +1,22 @@
import re import re
import time import time
from datetime import date
from typing import List, Tuple, Type, Dict
from enum import Enum
from sources.utility import pretty_print, animate_thinking from sources.utility import pretty_print, animate_thinking
from sources.agents.agent import Agent from sources.agents.agent import Agent
from sources.tools.searxSearch import searxSearch from sources.tools.searxSearch import searxSearch
from sources.browser import Browser from sources.browser import Browser
from datetime import date from sources.logger import Logger
from typing import List, Tuple, Type, Dict
class Action(Enum):
REQUEST_EXIT = "REQUEST_EXIT"
FORM_FILLED = "FORM_FILLED"
GO_BACK = "GO_BACK"
NAVIGATE = "NAVIGATE"
SEARCH = "SEARCH"
class BrowserAgent(Agent): class BrowserAgent(Agent):
def __init__(self, name, prompt_path, provider, verbose=False, browser=None): def __init__(self, name, prompt_path, provider, verbose=False, browser=None):
""" """
@ -27,8 +36,10 @@ class BrowserAgent(Agent):
self.current_page = "" self.current_page = ""
self.search_history = [] self.search_history = []
self.navigable_links = [] self.navigable_links = []
self.last_action = Action.NAVIGATE.value
self.notes = [] self.notes = []
self.date = self.get_today_date() self.date = self.get_today_date()
self.logger = Logger("browser_agent.log")
def get_today_date(self) -> str: def get_today_date(self) -> str:
"""Get the date""" """Get the date"""
@ -101,14 +112,14 @@ class BrowserAgent(Agent):
1. **Decide if the page answers the users query:** 1. **Decide if the page answers the users query:**
- If it does, take notes of useful information (Note: ...), include relevant link in note, then move to a new page. - If it does, take notes of useful information (Note: ...), include relevant link in note, then move to a new page.
- If it does and you completed user request, say REQUEST_EXIT. - If it does and you completed user request, say {Action.REQUEST_EXIT}.
- If it doesnt, say: Error: <why page don't help> then go back or navigate to another link. - If it doesnt, say: Error: <why page don't help> then go back or navigate to another link.
2. **Navigate to a link by either: ** 2. **Navigate to a link by either: **
- Saying I will navigate to <url>: (write down the full URL, e.g., www.example.com/cats). - Saying I will navigate to <url>: (write down the full URL, e.g., www.example.com/cats).
- Going back: If no link seems helpful, say: GO_BACK. - Going back: If no link seems helpful, say: {Action.GO_BACK.value}.
3. **Fill forms on the page:** 3. **Fill forms on the page:**
- Fill form only on relevant page with given informations. You might use form to conduct search on a page. - Fill form only on relevant page with given informations. You might use form to conduct search on a page.
- You can fill a form using [form_name](value). Don't GO_BACK when filling form. - You can fill a form using [form_name](value). Don't {Action.GO_BACK.value} when filling form.
- If a form is irrelevant or you lack informations leave it empty. - If a form is irrelevant or you lack informations leave it empty.
**Rules:** **Rules:**
@ -121,7 +132,7 @@ class BrowserAgent(Agent):
Example 1 (useful page, no need go futher): Example 1 (useful page, no need go futher):
Note: According to karpathy site (<link>) LeCun net is ...<expand on page content>..." Note: According to karpathy site (<link>) LeCun net is ...<expand on page content>..."
No link seem useful to provide futher information. No link seem useful to provide futher information.
Action: GO_BACK Action: {Action.GO_BACK.value}
Example 2 (not useful, see useful link on page): Example 2 (not useful, see useful link on page):
Error: reddit.com/welcome does not discuss anything related to the users query. Error: reddit.com/welcome does not discuss anything related to the users query.
@ -130,12 +141,12 @@ class BrowserAgent(Agent):
Example 3 (not useful, no related links): Example 3 (not useful, no related links):
Error: x.com does not discuss anything related to the users query and no navigation link are usefull. Error: x.com does not discuss anything related to the users query and no navigation link are usefull.
Action: GO_BACK Action: {Action.GO_BACK.value}
Example 3 (query answer found, enought notes taken): Example 3 (query answer found, enought notes taken):
Note: I found on <link> that ...<expand on information found>... Note: I found on <link> that ...<expand on information found>...
Given this answer the user query I should exit the web browser. Given this answer the user query I should exit the web browser.
Action: REQUEST_EXIT Action: {Action.REQUEST_EXIT.value}
Example 4 (loging form visible): Example 4 (loging form visible):
@ -149,7 +160,7 @@ class BrowserAgent(Agent):
You previously took these notes: You previously took these notes:
{notes} {notes}
Do not Step-by-Step explanation. Write Notes or Error as a long paragraph followed by your action. Do not Step-by-Step explanation. Write Notes or Error as a long paragraph followed by your action.
You might REQUEST_EXIT if no more link are useful. You might {Action.REQUEST_EXIT.value} if no more link are useful.
Do not navigate to AI tools or search engine. Only navigate to tool if asked. Do not navigate to AI tools or search engine. Only navigate to tool if asked.
""" """
@ -245,7 +256,7 @@ class BrowserAgent(Agent):
You: "search: Recent space missions news, {self.date}" You: "search: Recent space missions news, {self.date}"
Do not explain, do not write anything beside the search query. Do not explain, do not write anything beside the search query.
Except if query does not make any sense for a web search then explain why and say REQUEST_EXIT Except if query does not make any sense for a web search then explain why and say {Action.REQUEST_EXIT.value}
Do not try to answer query. you can only formulate search term or exit. Do not try to answer query. you can only formulate search term or exit.
""" """
@ -258,10 +269,10 @@ class BrowserAgent(Agent):
{page_text} {page_text}
The user asked: {user_prompt} The user asked: {user_prompt}
Does the page answer the users query now? Does the page answer the users query now?
If it does, take notes of the useful information, write down result and say FORM_FILLED. If it does, take notes of the useful information, write down result and say {Action.FORM_FILLED.value}.
If you were previously on a login form, no need to explain. If you were previously on a login form, no need to explain.
If it does and you completed user request, say REQUEST_EXIT If it does and you completed user request, say {Action.REQUEST_EXIT.value}
if it doesnt, say: Error: This page does not answer the users query then GO_BACK. if it doesnt, say: Error: Attempt to fill form didn't work {Action.GO_BACK.value}.
""" """
def show_search_results(self, search_result: List[str]): def show_search_results(self, search_result: List[str]):
@ -286,7 +297,7 @@ class BrowserAgent(Agent):
animate_thinking(f"Thinking...", color="status") animate_thinking(f"Thinking...", color="status")
mem_begin_idx = self.memory.push('user', self.search_prompt(user_prompt)) mem_begin_idx = self.memory.push('user', self.search_prompt(user_prompt))
ai_prompt, _ = self.llm_request() ai_prompt, _ = self.llm_request()
if "REQUEST_EXIT" in ai_prompt: if Action.REQUEST_EXIT.value in ai_prompt:
pretty_print(f"Web agent requested exit.\n{reasoning}\n\n{ai_prompt}", color="failure") pretty_print(f"Web agent requested exit.\n{reasoning}\n\n{ai_prompt}", color="failure")
return ai_prompt, "" return ai_prompt, ""
animate_thinking(f"Searching...", color="status") animate_thinking(f"Searching...", color="status")
@ -295,7 +306,8 @@ class BrowserAgent(Agent):
self.show_search_results(search_result) self.show_search_results(search_result)
prompt = self.make_newsearch_prompt(user_prompt, search_result) prompt = self.make_newsearch_prompt(user_prompt, search_result)
unvisited = [None] unvisited = [None]
while not complete: while not complete and len(unvisited) > 0:
answer, reasoning = self.llm_decide(prompt, show_reasoning = False) answer, reasoning = self.llm_decide(prompt, show_reasoning = False)
pretty_print(''*32, color="status") pretty_print(''*32, color="status")
@ -308,27 +320,23 @@ class BrowserAgent(Agent):
answer = self.handle_update_prompt(user_prompt, page_text) answer = self.handle_update_prompt(user_prompt, page_text)
answer, reasoning = self.llm_decide(prompt) answer, reasoning = self.llm_decide(prompt)
links = self.parse_answer(answer) if Action.FORM_FILLED.value in answer:
link = self.select_link(links)
self.search_history.append(link)
if "REQUEST_EXIT" in answer:
pretty_print(f"Agent requested exit.", color="status")
complete = True
break
if len(unvisited) == 0:
pretty_print(f"Visited all links.", color="status")
break
if "FORM_FILLED" in answer:
pretty_print(f"Filled form. Handling page update.", color="status") pretty_print(f"Filled form. Handling page update.", color="status")
page_text = self.browser.get_text() page_text = self.browser.get_text()
self.navigable_links = self.browser.get_navigable() self.navigable_links = self.browser.get_navigable()
prompt = self.make_navigation_prompt(user_prompt, page_text) prompt = self.make_navigation_prompt(user_prompt, page_text)
continue continue
if link == None or "GO_BACK" in answer: links = self.parse_answer(answer)
link = self.select_link(links)
self.search_history.append(link)
if Action.REQUEST_EXIT.value in answer:
pretty_print(f"Agent requested exit.", color="status")
complete = True
break
if link == None or Action.GO_BACK.value in answer:
pretty_print(f"Going back to results. Still {len(unvisited)}", color="status") pretty_print(f"Going back to results. Still {len(unvisited)}", color="status")
unvisited = self.select_unvisited(search_result) unvisited = self.select_unvisited(search_result)
prompt = self.make_newsearch_prompt(user_prompt, unvisited) prompt = self.make_newsearch_prompt(user_prompt, unvisited)