Feat : browser agent v1.0

2025-07-24 02:10:10 +00:00 · 2025-03-11 15:22:42 +01:00 · 2025-03-11 15:22:42 +01:00 · 9f9907cedb
commit 9f9907cedb
parent e9e2e3ecf2
6 changed files with 96 additions and 10 deletions
--- a/media/exemples/advanced_web.png
+++ b/media/exemples/advanced_web.png
--- a/prompts/browser_agent.txt
+++ b/prompts/browser_agent.txt
@ -1,6 +1,21 @@
-You are a browser agent that can search the web for information.
+You are an internet ai that can browse the web for information.
 In fact you are embedded in a browser with selenium.
-You can use the following tools:
+If you need to conduct a web search, you can use the following tool:
 - web_search: to search the web for information
-TODODODODODOTO
+This is how you can use the web_search tool:
 ```web_search
 <query>
 ```
 This will provide you with a list of links that you can navigate to.
 You can navigate to a specific link by typing the link. For example, If you say:
 "I want to navigate to https://www.google.com"
 You will navigate to https://www.google.com
 Any link that you type will be opened in a new tab.
 If you want to exit the browser, you can say:
 "REQUEST_EXIT"
 Only exit the browser if you are done browsing.
--- a/sources/agents/browser_agent.py
+++ b/sources/agents/browser_agent.py
@ -1,3 +1,5 @@
 import re
 import time
 from sources.utility import pretty_print, animate_thinking
 from sources.agents.agent import Agent
@ -16,8 +18,73 @@ class BrowserAgent(Agent):
        self.browser = Browser()
        self.browser.goTo("https://github.com/")
    def make_init_prompt(self, user_prompt: str, search_result: str):
        return f"""
        Based on the search result:
        {search_result}
        Start browsing and find the information the user want.
        User: {user_prompt}
        You must choose a link to navigate to. Say i want to navigate to a <link>.
        """
    def extract_links(self, search_result: str):
        return re.findall(r'https?://[^\s]+', search_result)
    def make_navigation_prompt(self, user_prompt: str, page_text: str, navigable_links: list):
        format_links = "\n".join([f"[{i}] {link['text']} - {link['url']}" for i, link in enumerate(navigable_links)])
        return f"""
        \nYou are browsing the web. Not the user, you are the browser.
        Page content:
        {page_text}
        Navigable links:
        {format_links}
        Remember, you must seek the information the user want.
        The user query was : {user_prompt}
        You must choose a link to navigate to.
        If you have an answer and want to exit the browser, please say "REQUEST_EXIT".
        """
    def clean_links(self, links: list):
        links_clean = []
        for link in links:
            if link[-1] == '.':
                links_clean.append(link[:-1])
            else:
                links_clean.append(link)
        return links_clean
    def process(self, prompt, speech_module) -> str:
-        raise NotImplementedError("Browser agent is not implemented yet")
+        complete = False
        animate_thinking(f"Searching...", color="status")
        search_result = self.tools["web_search"].execute([prompt], False)
        user_prompt = self.make_init_prompt(prompt, search_result)
        prompt = user_prompt
        while not complete:
            animate_thinking("Thinking...", color="status")
            self.memory.push('user', user_prompt)
            answer, reasoning = self.llm_request(prompt)
            if "REQUEST_EXIT" in answer:
                complete = True
                break
            links = self.extract_links(answer)
            links_clean = self.clean_links(links)
            if len(links_clean) == 0:
                prompt = "Please choose a link to navigate to."
                continue
            animate_thinking(f"Navigating to {links[0]}", color="status")
            speech_module.speak(f"Navigating to {links[0]}")
            self.browser.goTo(links[0])
            page_text = self.browser.getText()[:2048]
            navigable_links = self.browser.getNavigable()[:15]
            prompt = self.make_navigation_prompt(user_prompt, page_text, navigable_links)
        self.browser.close()
        return answer, reasoning
 if __name__ == "__main__":
    browser = Browser()
--- a/sources/browser.py
+++ b/sources/browser.py
@ -11,7 +11,7 @@ import markdownify
 import logging
 class Browser:
-    def __init__(self, headless=True):
+    def __init__(self, headless=False):
        """Initialize the browser with optional headless mode."""
        try:
            chrome_options = Options()
@ -39,6 +39,10 @@ class Browser:
            self.logger.error(f"Error navigating to {url}: {str(e)}")
            return False
    def is_sentence(self, text):
        """Check if the text is a sentence."""
        return len(text.split(" ")) > 5 and '.' in text
    def getText(self):
        """Get page text and convert it to README (Markdown) format."""
        try:
@ -51,7 +55,7 @@ class Browser:
            lines = (line.strip() for line in text.splitlines())
            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
-            text = "\n".join(chunk for chunk in chunks if chunk)
+            text = "\n".join(chunk for chunk in chunks if chunk and self.is_sentence(chunk))
            markdown_text = markdownify.markdownify(text, heading_style="ATX")
@ -142,7 +146,7 @@ if __name__ == "__main__":
    browser = Browser(headless=False)
    try:
-        browser.goTo("https://github.com/geohot")
+        browser.goTo("https://karpathy.github.io/")
        text = browser.getText()
        print("Page Text in Markdown:")
        print(text)
--- a/sources/interaction.py
+++ b/sources/interaction.py
@ -100,7 +100,7 @@ class Interaction:
            return
        if self.current_agent != agent:
            self.current_agent = agent
-            # get history from previous agent
+            # get history from previous agent, good ?
            self.current_agent.memory.push('user', self.last_query)
        self.last_answer, _ = agent.process(self.last_query, self.speech)
--- a/sources/tools/tools.py
+++ b/sources/tools/tools.py
@ -77,11 +77,11 @@ class Tools():
        return dir_path
    @abstractmethod
-    def execute(self, blocks:str, safety:bool) -> str:
+    def execute(self, blocks:[str], safety:bool) -> str:
        """
        Abstract method that must be implemented by child classes to execute the tool's functionality.
        Args:
-            blocks (str): The code or query blocks to execute
+            blocks (List[str]): The codes or queries blocks to execute
            safety (bool): Whenever human intervention is required
        Returns:
            str: The output/result from executing the tool