Merge branch 'browser' into dev

This commit is contained in:
martin legrand 2025-03-15 14:17:22 +01:00
commit 40cecbcccb
7 changed files with 211 additions and 104 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 MiB

View File

@ -1,21 +1,9 @@
You are an internet ai that can browse the web for information.
In fact you are embedded in a browser with selenium.
If you need to conduct a web search, you can use the following tool:
- web_search: to search the web for information
This is how you can use the web_search tool:
```web_search
<query>
```
This will provide you with a list of links that you can navigate to.
You can navigate to a specific link by typing the link. For example, If you say:
"I want to navigate to https://www.google.com"
You will navigate to https://www.google.com
Any link that you type will be opened in a new tab.
If you want to exit the browser, you can say:
"REQUEST_EXIT"
Only exit the browser if you are done browsing.
You are a web browsing AI, your goal is to explore the internet to find information.
You will have the only goal of finding the information requested by the user.
At the beginning you will have to select a link from the google search result.
You will choose a link by simply typing it.
This will automatically make you browse to the link.
Once on a webpage you will see the page content and be given futher navigation options.
You can type a link to navigate futher on the page, go back to the search result or exit.
At each interaction step the browser will remind you of your options.

View File

@ -34,7 +34,7 @@ class Agent():
name: str,
prompt_path:str,
provider,
recover_last_session=False) -> None:
recover_last_session=True) -> None:
self.agent_name = name
self.role = None
self.current_directory = os.getcwd()

View File

@ -8,7 +8,7 @@ from sources.browser import Browser
class BrowserAgent(Agent):
def __init__(self, model, name, prompt_path, provider):
"""
The casual agent is a special for casual talk to the user without specific tasks.
The Browser agent is an agent that navigate the web autonomously in search of answer
"""
super().__init__(model, name, prompt_path, provider)
self.tools = {
@ -16,86 +16,146 @@ class BrowserAgent(Agent):
}
self.role = "deep research and web search"
self.browser = Browser()
self.browser.goTo("https://github.com/")
self.browser.go_to("https://github.com/")
self.search_history = []
self.navigable_links = []
self.notes = []
def make_init_prompt(self, user_prompt: str, search_result: str):
return f"""
Based on the search result:
{search_result}
Start browsing and find the information the user want.
User: {user_prompt}
You must choose a link to navigate to. Say i want to navigate to a <link>.
"""
def extract_links(self, search_result: str):
return re.findall(r'https?://[^\s]+', search_result)
def make_navigation_prompt(self, user_prompt: str, page_text: str, navigable_links: list):
remaining_links = "\n".join([f"[{i}] {link}" for i, link in enumerate(navigable_links) if link not in self.search_history])
return f"""
\nYou are browsing the web. Not the user, you are the browser.
Page content:
{page_text}
Navigable links:
{remaining_links}
You must choose a link to navigate to or do a new search.
Remember, you seek the information the user want.
The user query was : {user_prompt}
If you want to do a new search, use the "web_search" tool.
Exemple:
```web_search
weather in tokyo
```
If you have an answer and want to exit the browser, please say "REQUEST_EXIT".
If you don't choose a link or do a new search I will cut my fucking arm off.
"""
pattern = r'(https?://\S+|www\.\S+)'
matches = re.findall(pattern, search_result)
trailing_punct = ".,!?;:"
cleaned_links = [link.rstrip(trailing_punct) for link in matches]
return self.clean_links(cleaned_links)
def clean_links(self, links: list):
links_clean = []
for link in links:
link = link.strip()
if link[-1] == '.':
links_clean.append(link[:-1])
else:
links_clean.append(link)
return links_clean
def get_unvisited_links(self):
return "\n".join([f"[{i}] {link}" for i, link in enumerate(self.navigable_links) if link not in self.search_history])
def make_newsearch_prompt(self, user_prompt: str, search_result: dict):
search_choice = self.stringify_search_results(search_result)
return f"""
Based on the search result:
{search_choice}
Your goal is to find accurate and complete information to satisfy the users request.
User request: {user_prompt}
To proceed, choose a relevant link from the search results. Announce your choice by saying: "I want to navigate to <link>."
Do not explain your choice.
"""
def process(self, prompt, speech_module) -> str:
def make_navigation_prompt(self, user_prompt: str, page_text: str):
remaining_links = self.get_unvisited_links()
remaining_links_text = remaining_links if remaining_links is not None else "No links remaining, proceed with a new search."
return f"""
\nYou are currently browsing the web. Not the user, you are the browser.
Page content:
{page_text}
You can navigate to these links:
{remaining_links}
If no link seem appropriate, please say "GO_BACK".
Remember, you seek the information the user want.
The user query was : {user_prompt}
You must choose a link (write it down) to navigate to, go go back.
For exemple you can say: i want to go to www.events.org/events
Always end with a sentence that summarize useful information if any for exemple:
Summary: According to https://karpathy.github.io/ LeCun net is the earliest real-world application of a neural net"
Another exemple:
Summary: the BBC website does not provide useful informations.
Do not explain your choice, be short, concise.
"""
def llm_decide(self, prompt):
animate_thinking("Thinking...", color="status")
self.memory.push('user', prompt)
answer, reasoning = self.llm_request(prompt)
pretty_print("-"*100)
pretty_print(answer, color="output")
pretty_print("-"*100)
return answer, reasoning
def select_unvisited(self, search_result):
results_unvisited = []
for res in search_result:
if res["link"] not in self.search_history:
results_unvisited.append(res)
return results_unvisited
def jsonify_search_results(self, results_string):
result_blocks = results_string.split("\n\n")
parsed_results = []
for block in result_blocks:
if not block.strip():
continue
lines = block.split("\n")
result_dict = {}
for line in lines:
if line.startswith("Title:"):
result_dict["title"] = line.replace("Title:", "").strip()
elif line.startswith("Snippet:"):
result_dict["snippet"] = line.replace("Snippet:", "").strip()
elif line.startswith("Link:"):
result_dict["link"] = line.replace("Link:", "").strip()
if result_dict:
parsed_results.append(result_dict)
return parsed_results
def stringify_search_results(self, results_arr):
return '\n\n'.join([f"Link: {res['link']}" for res in results_arr])
def save_notes(self, text):
lines = text.split('\n')
for line in lines:
if "summary:" in line:
self.notes.append(line)
def process(self, user_prompt, speech_module) -> str:
complete = False
animate_thinking(f"Searching...", color="status")
search_result = self.tools["web_search"].execute([prompt], False)
user_prompt = self.make_init_prompt(prompt, search_result)
prompt = user_prompt
search_result_raw = self.tools["web_search"].execute([user_prompt], False)
search_result = self.jsonify_search_results(search_result_raw)
search_result = search_result[:10] # until futher improvement
prompt = self.make_newsearch_prompt(user_prompt, search_result)
unvisited = [None]
while not complete:
animate_thinking("Thinking...", color="status")
self.memory.push('user', user_prompt)
answer, reasoning = self.llm_request(prompt)
pretty_print("-"*100)
pretty_print(answer, color="output")
pretty_print("-"*100)
answer, reasoning = self.llm_decide(prompt)
self.save_notes(answer)
if "REQUEST_EXIT" in answer:
complete = True
break
links = self.extract_links(answer)
links_clean = self.clean_links(links)
if len(links_clean) == 0:
prompt = f"Please choose a link to navigate to or do a new search. Links found:\n{links_clean}"
pretty_print("No links found, doing a new search.", color="warning")
if len(links) == 0 or "GO_BACK" in answer:
unvisited = self.select_unvisited(search_result)
prompt = self.make_newsearch_prompt(user_prompt, unvisited)
pretty_print(f"Going back to results. Still {len(unvisited)}", color="warning")
links = []
continue
if len(unvisited) == 0:
break
animate_thinking(f"Navigating to {links[0]}", color="status")
speech_module.speak(f"Navigating to {links[0]}")
self.browser.goTo(links[0])
self.browser.go_to(links[0])
self.search_history.append(links[0])
page_text = self.browser.getText()[:2048]
navigable_links = self.browser.getNavigable()[:15]
prompt = self.make_navigation_prompt(user_prompt, page_text, navigable_links)
page_text = self.browser.get_text()
self.navigable_links = self.browser.get_navigable()
prompt = self.make_navigation_prompt(user_prompt, page_text)
speech_module.speak(answer)
self.browser.close()
print("Final notes:", notes)
return answer, reasoning
if __name__ == "__main__":

View File

@ -18,7 +18,7 @@ class CasualAgent(Agent):
"file_finder": FileFinder(),
"bash": BashInterpreter()
}
self.role = "talking, advices, events and philosophical"
self.role = "casual talking"
def process(self, prompt, speech_module) -> str:
complete = False

View File

@ -35,7 +35,7 @@ class Browser:
except Exception as e:
raise Exception(f"Failed to initialize browser: {str(e)}")
def goTo(self, url):
def go_to(self, url):
"""Navigate to a specified URL."""
try:
self.driver.get(url)
@ -47,12 +47,19 @@ class Browser:
return False
def is_sentence(self, text):
"""Check if the text is a sentence."""
if "404" in text:
return True # we want the ai to see the error
return len(text.split(" ")) > 5 and '.' in text
"""Check if the text qualifies as a meaningful sentence or contains important error codes."""
text = text.strip()
error_codes = ["404", "403", "500", "502", "503"]
if any(code in text for code in error_codes):
return True
words = text.split()
word_count = len(words)
has_punctuation = text.endswith(('.', '!', '?'))
is_long_enough = word_count > 5
has_letters = any(word.isalpha() for word in words)
return (word_count >= 5 and (has_punctuation or is_long_enough) and has_letters)
def getText(self):
def get_text(self):
"""Get page text and convert it to README (Markdown) format."""
try:
soup = BeautifulSoup(self.driver.page_source, 'html.parser')
@ -72,8 +79,24 @@ class Browser:
except Exception as e:
self.logger.error(f"Error getting text: {str(e)}")
return None
def clean_url(self, url):
clean = url.split('#')[0]
parts = clean.split('?', 1)
base_url = parts[0]
if len(parts) > 1:
query = parts[1]
essential_params = []
for param in query.split('&'):
if param.startswith('_skw=') or param.startswith('q=') or param.startswith('s='):
essential_params.append(param)
elif param.startswith('_') or param.startswith('hash=') or param.startswith('itmmeta='):
break
if essential_params:
return f"{base_url}?{'&'.join(essential_params)}"
return base_url
def getNavigable(self):
def get_navigable(self):
"""Get all navigable links on the current page."""
try:
links = []
@ -89,12 +112,12 @@ class Browser:
})
self.logger.info(f"Found {len(links)} navigable links")
return links
return [self.clean_url(link['url']) for link in links if link['is_displayed'] == True and len(link) < 256]
except Exception as e:
self.logger.error(f"Error getting navigable links: {str(e)}")
return []
def clickElement(self, xpath):
def click_element(self, xpath):
"""Click an element specified by xpath."""
try:
element = self.wait.until(
@ -107,15 +130,15 @@ class Browser:
self.logger.error(f"Element not found or not clickable: {xpath}")
return False
def getCurrentUrl(self):
def get_current_url(self):
"""Get the current URL of the page."""
return self.driver.current_url
def getPageTitle(self):
def get_page_title(self):
"""Get the title of the current page."""
return self.driver.title
def scrollToBottom(self):
def scroll_bottom(self):
"""Scroll to the bottom of the page."""
try:
self.driver.execute_script(
@ -127,7 +150,7 @@ class Browser:
self.logger.error(f"Error scrolling: {str(e)}")
return False
def takeScreenshot(self, filename):
def screenshot(self, filename):
"""Take a screenshot of the current page."""
try:
self.driver.save_screenshot(filename)
@ -155,16 +178,11 @@ if __name__ == "__main__":
browser = Browser(headless=False)
try:
browser.goTo("https://karpathy.github.io/")
text = browser.getText()
browser.go_to("https://karpathy.github.io/")
text = browser.get_text()
print("Page Text in Markdown:")
print(text)
links = browser.getNavigable()
print("\nNavigable Links:")
for link in links[:50]:
print(f"Text: {link['text']}, URL: {link['url']}")
browser.takeScreenshot("example.png")
links = browser.get_navigable()
print("\nNavigable Links:", links)
finally:
browser.close()

View File

@ -22,6 +22,42 @@ class webSearch(Tools):
super().__init__()
self.tag = "web_search"
self.api_key = api_key or os.getenv("SERPAPI_KEY") # Requires a SerpApi key
self.paywall_keywords = [
"subscribe", "login to continue", "access denied", "restricted content", "404", "this page is not working"
]
def link_valid(self, link):
"""check if a link is valid."""
if not link.startswith("http"):
return "Status: Invalid URL"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
try:
response = requests.get(link, headers=headers, timeout=5)
status = response.status_code
if status == 200:
content = response.text[:1000].lower()
if any(keyword in content for keyword in self.paywall_keywords):
return "Status: Possible Paywall"
return "Status: OK"
elif status == 404:
return "Status: 404 Not Found"
elif status == 403:
return "Status: 403 Forbidden"
else:
return f"Status: {status} {response.reason}"
except requests.exceptions.RequestException as e:
return f"Error: {str(e)}"
def check_all_links(self, links):
"""Check all links, one by one."""
# TODO Make it asyncromous or smth
statuses = []
print("Workers started, scrawling the web...")
for i, link in enumerate(links):
status = self.link_valid(link)
statuses.append(status)
return statuses
def execute(self, blocks: str, safety: bool = True) -> str:
if self.api_key is None:
@ -37,7 +73,7 @@ class webSearch(Tools):
params = {
"q": query,
"api_key": self.api_key,
"num": 100,
"num": 50,
"output": "json"
}
response = requests.get(url, params=params)
@ -46,11 +82,16 @@ class webSearch(Tools):
data = response.json()
results = []
if "organic_results" in data and len(data["organic_results"]) > 0:
for result in data["organic_results"][:50]:
organic_results = data["organic_results"][:50]
links = [result.get("link", "No link available") for result in organic_results]
statuses = self.check_all_links(links)
for result, status in zip(organic_results, statuses):
if not "OK" in status:
continue
title = result.get("title", "No title")
snippet = result.get("snippet", "No snippet available")
link = result.get("link", "No link available")
results.append(f"Title: {title}\nSnippet: {snippet}\nLink: {link}")
results.append(f"Title:{title}\nSnippet:{snippet}\nLink:{link}")
return "\n\n".join(results)
else:
return "No results found for the query."
@ -73,5 +114,5 @@ if __name__ == "__main__":
search_tool = webSearch(api_key=os.getenv("SERPAPI_KEY"))
query = "when did covid start"
result = search_tool.execute([query], safety=True)
feedback = search_tool.interpreter_feedback(result)
print(feedback)
output = search_tool.interpreter_feedback(result)
print(output)