Feat : browser agent v1.0

This commit is contained in:
martin legrand 2025-03-11 15:22:42 +01:00
parent e9e2e3ecf2
commit 9f9907cedb
6 changed files with 96 additions and 10 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 898 KiB

View File

@ -1,6 +1,21 @@
You are a browser agent that can search the web for information.
You are an internet ai that can browse the web for information.
In fact you are embedded in a browser with selenium.
You can use the following tools:
If you need to conduct a web search, you can use the following tool:
- web_search: to search the web for information
TODODODODODOTO
This is how you can use the web_search tool:
```web_search
<query>
```
This will provide you with a list of links that you can navigate to.
You can navigate to a specific link by typing the link. For example, If you say:
"I want to navigate to https://www.google.com"
You will navigate to https://www.google.com
Any link that you type will be opened in a new tab.
If you want to exit the browser, you can say:
"REQUEST_EXIT"
Only exit the browser if you are done browsing.

View File

@ -1,3 +1,5 @@
import re
import time
from sources.utility import pretty_print, animate_thinking
from sources.agents.agent import Agent
@ -16,8 +18,73 @@ class BrowserAgent(Agent):
self.browser = Browser()
self.browser.goTo("https://github.com/")
def make_init_prompt(self, user_prompt: str, search_result: str):
return f"""
Based on the search result:
{search_result}
Start browsing and find the information the user want.
User: {user_prompt}
You must choose a link to navigate to. Say i want to navigate to a <link>.
"""
def extract_links(self, search_result: str):
return re.findall(r'https?://[^\s]+', search_result)
def make_navigation_prompt(self, user_prompt: str, page_text: str, navigable_links: list):
format_links = "\n".join([f"[{i}] {link['text']} - {link['url']}" for i, link in enumerate(navigable_links)])
return f"""
\nYou are browsing the web. Not the user, you are the browser.
Page content:
{page_text}
Navigable links:
{format_links}
Remember, you must seek the information the user want.
The user query was : {user_prompt}
You must choose a link to navigate to.
If you have an answer and want to exit the browser, please say "REQUEST_EXIT".
"""
def clean_links(self, links: list):
links_clean = []
for link in links:
if link[-1] == '.':
links_clean.append(link[:-1])
else:
links_clean.append(link)
return links_clean
def process(self, prompt, speech_module) -> str:
raise NotImplementedError("Browser agent is not implemented yet")
complete = False
animate_thinking(f"Searching...", color="status")
search_result = self.tools["web_search"].execute([prompt], False)
user_prompt = self.make_init_prompt(prompt, search_result)
prompt = user_prompt
while not complete:
animate_thinking("Thinking...", color="status")
self.memory.push('user', user_prompt)
answer, reasoning = self.llm_request(prompt)
if "REQUEST_EXIT" in answer:
complete = True
break
links = self.extract_links(answer)
links_clean = self.clean_links(links)
if len(links_clean) == 0:
prompt = "Please choose a link to navigate to."
continue
animate_thinking(f"Navigating to {links[0]}", color="status")
speech_module.speak(f"Navigating to {links[0]}")
self.browser.goTo(links[0])
page_text = self.browser.getText()[:2048]
navigable_links = self.browser.getNavigable()[:15]
prompt = self.make_navigation_prompt(user_prompt, page_text, navigable_links)
self.browser.close()
return answer, reasoning
if __name__ == "__main__":
browser = Browser()

View File

@ -11,7 +11,7 @@ import markdownify
import logging
class Browser:
def __init__(self, headless=True):
def __init__(self, headless=False):
"""Initialize the browser with optional headless mode."""
try:
chrome_options = Options()
@ -38,6 +38,10 @@ class Browser:
except WebDriverException as e:
self.logger.error(f"Error navigating to {url}: {str(e)}")
return False
def is_sentence(self, text):
"""Check if the text is a sentence."""
return len(text.split(" ")) > 5 and '.' in text
def getText(self):
"""Get page text and convert it to README (Markdown) format."""
@ -51,7 +55,7 @@ class Browser:
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = "\n".join(chunk for chunk in chunks if chunk)
text = "\n".join(chunk for chunk in chunks if chunk and self.is_sentence(chunk))
markdown_text = markdownify.markdownify(text, heading_style="ATX")
@ -142,7 +146,7 @@ if __name__ == "__main__":
browser = Browser(headless=False)
try:
browser.goTo("https://github.com/geohot")
browser.goTo("https://karpathy.github.io/")
text = browser.getText()
print("Page Text in Markdown:")
print(text)

View File

@ -100,7 +100,7 @@ class Interaction:
return
if self.current_agent != agent:
self.current_agent = agent
# get history from previous agent
# get history from previous agent, good ?
self.current_agent.memory.push('user', self.last_query)
self.last_answer, _ = agent.process(self.last_query, self.speech)

View File

@ -77,11 +77,11 @@ class Tools():
return dir_path
@abstractmethod
def execute(self, blocks:str, safety:bool) -> str:
def execute(self, blocks:[str], safety:bool) -> str:
"""
Abstract method that must be implemented by child classes to execute the tool's functionality.
Args:
blocks (str): The code or query blocks to execute
blocks (List[str]): The codes or queries blocks to execute
safety (bool): Whenever human intervention is required
Returns:
str: The output/result from executing the tool