feat : enhance browser agent + add tests

This commit is contained in:
martin legrand 2025-04-06 12:17:38 +02:00
parent 6fb9ce67c0
commit 42f9485a39
4 changed files with 96 additions and 22 deletions

View File

@ -54,7 +54,7 @@ class BrowserAgent(Agent):
links_clean = []
for link in links:
link = link.strip()
if link[-1] == '.':
if not (link[-1].isalpha() or link[-1].isdigit()):
links_clean.append(link[:-1])
else:
links_clean.append(link)
@ -71,7 +71,7 @@ class BrowserAgent(Agent):
Your goal is to find accurate and complete information to satisfy the users request.
User request: {user_prompt}
To proceed, choose a relevant link from the search results. Announce your choice by saying: "I want to navigate to <link>"
Do not explain your choice.
Do not eplain your choice.
"""
def make_navigation_prompt(self, user_prompt: str, page_text: str) -> str:
@ -120,22 +120,27 @@ class BrowserAgent(Agent):
Example 1 (useful page, no need go futher):
Note: According to karpathy site (<link>) LeCun net is ...<expand on page content>..."
No link seem useful to provide futher information. GO_BACK
No link seem useful to provide futher information.
Action: GO_BACK
Example 2 (not useful, see useful link on page):
Error: reddit.com/welcome does not discuss anything related to the users query.
There is a link that could lead to the information, I want to navigate to http://reddit.com/r/locallama
There is a link that could lead to the information.
Action: navigate to http://reddit.com/r/locallama
Example 3 (not useful, no related links):
Error: x.com does not discuss anything related to the users query and no navigation link are usefull.
GO_BACK
Action: GO_BACK
Example 3 (query answer found, enought notes taken):
Note: I found on <link> that ...<expand on information found>...
Given this answer the user query I should exit the web browser. REQUEST_EXIT
Given this answer the user query I should exit the web browser.
Action: REQUEST_EXIT
Example 4 (loging form visible):
Note: I am on the login page, I will type the given username and password.
Action:
[username_field](David)
[password_field](edgerunners77)
@ -143,7 +148,7 @@ class BrowserAgent(Agent):
{user_prompt}
You previously took these notes:
{notes}
Do not Step-by-Step explanation. Instead write simple explanation sentence following by your notes and actions.
Do not Step-by-Step explanation. Write Notes or Error as a long paragraph followed by your action.
"""
def llm_decide(self, prompt: str, show_reasoning: bool = False) -> Tuple[str, str]:
@ -190,7 +195,7 @@ class BrowserAgent(Agent):
buffer = []
links = []
for line in lines:
if "exit" in line:
if line == '' or 'action:' in line.lower():
saving = False
if "note" in line.lower():
saving = True
@ -198,7 +203,7 @@ class BrowserAgent(Agent):
buffer.append(line)
else:
links.extend(self.extract_links(line))
self.notes.append('. '.join(buffer))
self.notes.append('. '.join(buffer).strip())
return links
def select_link(self, links: List[str]) -> str | None:
@ -293,6 +298,7 @@ class BrowserAgent(Agent):
extracted_form = self.extract_form(answer)
if len(extracted_form) > 0:
pretty_print(f"Filling inputs form...", color="status")
self.browser.fill_form_inputs(extracted_form)
self.browser.find_and_click_submission()
page_text = self.browser.get_text()
@ -303,22 +309,25 @@ class BrowserAgent(Agent):
link = self.select_link(links)
if "REQUEST_EXIT" in answer:
pretty_print(f"Agent requested exit.", color="status")
complete = True
break
if len(unvisited) == 0:
pretty_print(f"Visited all links.", color="status")
break
if "FORM_FILLED" in answer:
pretty_print(f"Filled form. Handling page update.", color="status")
page_text = self.browser.get_text()
self.navigable_links = self.browser.get_navigable()
prompt = self.make_navigation_prompt(user_prompt, page_text)
continue
if link == None or "GO_BACK" in answer:
pretty_print(f"Going back to results. Still {len(unvisited)}", color="status")
unvisited = self.select_unvisited(search_result)
prompt = self.make_newsearch_prompt(user_prompt, unvisited)
pretty_print(f"Going back to results. Still {len(unvisited)}", color="warning")
continue
animate_thinking(f"Navigating to {link}", color="status")
@ -329,8 +338,8 @@ class BrowserAgent(Agent):
page_text = self.browser.get_text()
self.navigable_links = self.browser.get_navigable()
prompt = self.make_navigation_prompt(user_prompt, page_text)
pretty_print(f"Current page: {self.current_page}", color="warning")
pretty_print("Exited navigation, starting to summarize finding...", color="status")
prompt = self.conclude_prompt(user_prompt)
mem_last_idx = self.memory.push('user', prompt)
answer, reasoning = self.llm_request()

View File

@ -54,7 +54,7 @@ class searxSearch(Tools):
status = self.link_valid(link)
statuses.append(status)
return statuses
def execute(self, blocks: list, safety: bool = False) -> str:
"""Executes a search query against a SearxNG instance using POST and extracts URLs and titles."""
if not blocks:

View File

@ -2,11 +2,13 @@
"""
define a generic tool class, any tool can be used by the agent.
A tool can be used by deepseek like so:
A tool can be used by a llm like so:
```<tool name>
<code or query to execute>
```
we call these "blocks".
For example:
```python
print("Hello world")
@ -40,9 +42,7 @@ class Tools():
return self.current_dir
def check_config_dir_validity(self):
"""
Check if the config directory is valid.
"""
"""Check if the config directory is valid."""
path = self.config['MAIN']['work_dir']
if path == "":
print("WARNING: Work directory not set in config.ini")
@ -56,15 +56,11 @@ class Tools():
return True
def config_exists(self):
"""
Check if the config file exists.
"""
"""Check if the config file exists."""
return os.path.exists('./config.ini')
def create_work_dir(self):
"""
Create the work directory if it does not exist.
"""
"""Create the work directory if it does not exist."""
default_path = os.path.dirname(os.getcwd())
if self.config_exists():
self.config.read('./config.ini')

View File

@ -0,0 +1,69 @@
import unittest
import os
import sys
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) # Add project root to Python path
from sources.agents.browser_agent import BrowserAgent
class TestBrowserAgentParsing(unittest.TestCase):
def setUp(self):
# Initialize a basic BrowserAgent instance for testing
self.agent = BrowserAgent(
name="TestAgent",
prompt_path="../prompts/base/browser_agent.txt",
provider=None
)
def test_extract_links(self):
# Test various link formats
test_text = """
Check this out: https://example.com, and www.google.com!
Also try https://test.org/about?page=1.
"""
expected = [
"https://example.com",
"www.google.com",
"https://test.org/about?page=1"
]
result = self.agent.extract_links(test_text)
self.assertEqual(result, expected)
def test_extract_form(self):
# Test form extraction
test_text = """
Fill this: [username](john) and [password](secret123)
Not a form: [random]text
"""
expected = ["[username](john)", "[password](secret123)"]
result = self.agent.extract_form(test_text)
self.assertEqual(result, expected)
def test_clean_links(self):
# Test link cleaning
test_links = [
"https://example.com.",
"www.test.com,",
"https://clean.org!",
"https://good.com"
]
expected = [
"https://example.com",
"www.test.com",
"https://clean.org",
"https://good.com"
]
result = self.agent.clean_links(test_links)
self.assertEqual(result, expected)
def test_parse_answer(self):
# Test parsing answer with notes and links
test_text = """
Here's some info
Note: This is important. We are doing test it's very cool.
action:
i wanna navigate to https://test.com
"""
self.agent.parse_answer(test_text)
self.assertEqual(self.agent.notes[0], "Note: This is important. We are doing test it's very cool.")
if __name__ == "__main__":
unittest.main()