Merge branch 'browser' into dev

2025-06-05 02:25:27 +00:00 · 2025-03-15 14:17:22 +01:00 · 2025-03-15 14:17:22 +01:00 · 40cecbcccb
commit 40cecbcccb
parent efe27fc079 23d7a6d6f5
7 changed files with 211 additions and 104 deletions
--- a/media/exemples/search_startup.png
+++ b/media/exemples/search_startup.png
--- a/prompts/browser_agent.txt
+++ b/prompts/browser_agent.txt
@ -1,21 +1,9 @@
-You are an internet ai that can browse the web for information.
-In fact you are embedded in a browser with selenium.

-If you need to conduct a web search, you can use the following tool:
- web_search: to search the web for information
-
-This is how you can use the web_search tool:
-```web_search
-<query>
-```
-
-This will provide you with a list of links that you can navigate to.
-You can navigate to a specific link by typing the link. For example, If you say:
-"I want to navigate to https://www.google.com"
-
-You will navigate to https://www.google.com
-Any link that you type will be opened in a new tab.
-
-If you want to exit the browser, you can say:
-"REQUEST_EXIT"
-Only exit the browser if you are done browsing.
+You are a web browsing AI, your goal is to explore the internet to find information.
+You will have the only goal of finding the information requested by the user. 
+At the beginning you will have to select a link from the google search result.
+You will choose a link by simply typing it.
+This will automatically make you browse to the link.
+Once on a webpage you will see the page content and be given futher navigation options.
+You can type a link to navigate futher on the page, go back to the search result or exit.
+At each interaction step the browser will remind you of your options.
--- a/sources/agents/agent.py
+++ b/sources/agents/agent.py
@ -34,7 +34,7 @@ class Agent():
                       name: str,
                       prompt_path:str,
                       provider,
-                       recover_last_session=False) -> None:
+                       recover_last_session=True) -> None:
        self.agent_name = name
        self.role = None
        self.current_directory = os.getcwd()
--- a/sources/agents/browser_agent.py
+++ b/sources/agents/browser_agent.py
@ -8,7 +8,7 @@ from sources.browser import Browser
 class BrowserAgent(Agent):
    def __init__(self, model, name, prompt_path, provider):
        """
-        The casual agent is a special for casual talk to the user without specific tasks.
+        The Browser agent is an agent that navigate the web autonomously in search of answer
        """
        super().__init__(model, name, prompt_path, provider)
        self.tools = {
@ -16,86 +16,146 @@ class BrowserAgent(Agent):
        }
        self.role = "deep research and web search"
        self.browser = Browser()
-        self.browser.goTo("https://github.com/")
+        self.browser.go_to("https://github.com/")
        self.search_history = []
+        self.navigable_links = []
+        self.notes = []
    
-    def make_init_prompt(self, user_prompt: str, search_result: str):
-        return f"""
-        Based on the search result:
-        {search_result}
-        Start browsing and find the information the user want.
-        User: {user_prompt}
-        You must choose a link to navigate to. Say i want to navigate to a <link>.
-        """
-    
+
    def extract_links(self, search_result: str):
-        return re.findall(r'https?://[^\s]+', search_result)
-    
-    def make_navigation_prompt(self, user_prompt: str, page_text: str, navigable_links: list):
-        remaining_links = "\n".join([f"[{i}] {link}" for i, link in enumerate(navigable_links) if link not in self.search_history])
-        return f"""
-        \nYou are browsing the web. Not the user, you are the browser.
-
-        Page content:
-        {page_text}
-
-        Navigable links:
-        {remaining_links}
-
-
-        You must choose a link to navigate to or do a new search.
-        Remember, you seek the information the user want.
-        The user query was : {user_prompt}
-        If you want to do a new search, use the "web_search" tool.
-        Exemple:
-        ```web_search
-        weather in tokyo
-        ```
-        If you have an answer and want to exit the browser, please say "REQUEST_EXIT".
-        If you don't choose a link or do a new search I will cut my fucking arm off.
-        """
-    
+        pattern = r'(https?://\S+|www\.\S+)'
+        matches = re.findall(pattern, search_result)
+        trailing_punct = ".,!?;:"
+        cleaned_links = [link.rstrip(trailing_punct) for link in matches]
+        return self.clean_links(cleaned_links)
+        
    def clean_links(self, links: list):
        links_clean = []
        for link in links:
+            link = link.strip()
            if link[-1] == '.':
                links_clean.append(link[:-1])
            else:
                links_clean.append(link)
        return links_clean
+
+    def get_unvisited_links(self):
+        return "\n".join([f"[{i}] {link}" for i, link in enumerate(self.navigable_links) if link not in self.search_history])
+
+    def make_newsearch_prompt(self, user_prompt: str, search_result: dict):
+        search_choice = self.stringify_search_results(search_result)
+        return f"""
+        Based on the search result:
+        {search_choice}
+        Your goal is to find accurate and complete information to satisfy the user’s request.
+        User request: {user_prompt}
+        To proceed, choose a relevant link from the search results. Announce your choice by saying: "I want to navigate to <link>."
+        Do not explain your choice.
+        """
    
-    def process(self, prompt, speech_module) -> str:
+    def make_navigation_prompt(self, user_prompt: str, page_text: str):
+        remaining_links = self.get_unvisited_links() 
+        remaining_links_text = remaining_links if remaining_links is not None else "No links remaining, proceed with a new search." 
+        return f"""
+        \nYou are currently browsing the web. Not the user, you are the browser.
+
+        Page content:
+        {page_text}
+
+        You can navigate to these links:
+        {remaining_links}
+
+        If no link seem appropriate, please say "GO_BACK".
+        Remember, you seek the information the user want.
+        The user query was : {user_prompt}
+        You must choose a link (write it down) to navigate to, go go back.
+        For exemple you can say: i want to go to www.events.org/events
+        Always end with a sentence that summarize useful information if any for exemple:
+        Summary: According to https://karpathy.github.io/ LeCun net is the earliest real-world application of a neural net"
+        Another exemple:
+        Summary: the BBC website does not provide useful informations.
+        Do not explain your choice, be short, concise.
+        """
+    
+    def llm_decide(self, prompt):
+        animate_thinking("Thinking...", color="status")
+        self.memory.push('user', prompt)
+        answer, reasoning = self.llm_request(prompt)
+        pretty_print("-"*100)
+        pretty_print(answer, color="output")
+        pretty_print("-"*100)
+        return answer, reasoning
+    
+    def select_unvisited(self, search_result):
+        results_unvisited = []
+        for res in search_result:
+            if res["link"] not in self.search_history:
+                results_unvisited.append(res) 
+        return results_unvisited
+
+    def jsonify_search_results(self, results_string):
+        result_blocks = results_string.split("\n\n")
+        parsed_results = []
+        for block in result_blocks:
+            if not block.strip():
+                continue
+            lines = block.split("\n")
+            result_dict = {}
+            for line in lines:
+                if line.startswith("Title:"):
+                    result_dict["title"] = line.replace("Title:", "").strip()
+                elif line.startswith("Snippet:"):
+                    result_dict["snippet"] = line.replace("Snippet:", "").strip()
+                elif line.startswith("Link:"):
+                    result_dict["link"] = line.replace("Link:", "").strip()
+            if result_dict:
+                parsed_results.append(result_dict)
+        return parsed_results 
+    
+    def stringify_search_results(self, results_arr):
+        return '\n\n'.join([f"Link: {res['link']}" for res in results_arr])
+    
+    def save_notes(self, text):
+        lines = text.split('\n')
+        for line in lines:
+            if "summary:" in line:
+                self.notes.append(line)
+
+    def process(self, user_prompt, speech_module) -> str:
        complete = False

        animate_thinking(f"Searching...", color="status")
-        search_result = self.tools["web_search"].execute([prompt], False)
-        user_prompt = self.make_init_prompt(prompt, search_result)
-        prompt = user_prompt
+        search_result_raw = self.tools["web_search"].execute([user_prompt], False)
+        search_result = self.jsonify_search_results(search_result_raw)
+        search_result = search_result[:10] # until futher improvement
+        prompt = self.make_newsearch_prompt(user_prompt, search_result)
+        unvisited = [None]
        while not complete:
-            animate_thinking("Thinking...", color="status")
-            self.memory.push('user', user_prompt)
-            answer, reasoning = self.llm_request(prompt)
-            pretty_print("-"*100)
-            pretty_print(answer, color="output")
-            pretty_print("-"*100)
+            answer, reasoning = self.llm_decide(prompt)
+            self.save_notes(answer)
            if "REQUEST_EXIT" in answer:
                complete = True
                break
            links = self.extract_links(answer)
-            links_clean = self.clean_links(links)
-            if len(links_clean) == 0:
-                prompt = f"Please choose a link to navigate to or do a new search. Links found:\n{links_clean}"
-                pretty_print("No links found, doing a new search.", color="warning")
+            if len(links) == 0 or "GO_BACK" in answer:
+                unvisited = self.select_unvisited(search_result)
+                prompt = self.make_newsearch_prompt(user_prompt, unvisited)
+                pretty_print(f"Going back to results. Still {len(unvisited)}", color="warning")
+                links = []
                continue
+            if len(unvisited) == 0:
+                break
            animate_thinking(f"Navigating to {links[0]}", color="status")
            speech_module.speak(f"Navigating to {links[0]}")
-            self.browser.goTo(links[0])
+            self.browser.go_to(links[0])
            self.search_history.append(links[0])
-            page_text = self.browser.getText()[:2048]
-            navigable_links = self.browser.getNavigable()[:15]
-            prompt = self.make_navigation_prompt(user_prompt, page_text, navigable_links)
+            page_text = self.browser.get_text()
+            self.navigable_links = self.browser.get_navigable()
+            prompt = self.make_navigation_prompt(user_prompt, page_text)

+        speech_module.speak(answer)
        self.browser.close()
+        print("Final notes:", notes)
        return answer, reasoning

 if __name__ == "__main__":
--- a/sources/agents/casual_agent.py
+++ b/sources/agents/casual_agent.py
@ -18,7 +18,7 @@ class CasualAgent(Agent):
            "file_finder": FileFinder(),
            "bash": BashInterpreter()
        }
-        self.role = "talking, advices, events and philosophical"
+        self.role = "casual talking"
    
    def process(self, prompt, speech_module) -> str:
        complete = False
--- a/sources/browser.py
+++ b/sources/browser.py
@ -35,7 +35,7 @@ class Browser:
        except Exception as e:
            raise Exception(f"Failed to initialize browser: {str(e)}")

-    def goTo(self, url):
+    def go_to(self, url):
        """Navigate to a specified URL."""
        try:
            self.driver.get(url)
@ -47,12 +47,19 @@ class Browser:
            return False
    
    def is_sentence(self, text):
-        """Check if the text is a sentence."""
-        if "404" in text:
-            return True # we want the ai to see the error
-        return len(text.split(" ")) > 5 and '.' in text
+        """Check if the text qualifies as a meaningful sentence or contains important error codes."""
+        text = text.strip()
+        error_codes = ["404", "403", "500", "502", "503"]
+        if any(code in text for code in error_codes):
+            return True
+        words = text.split()
+        word_count = len(words)
+        has_punctuation = text.endswith(('.', '!', '?'))
+        is_long_enough = word_count > 5
+        has_letters = any(word.isalpha() for word in words)
+        return (word_count >= 5 and (has_punctuation or is_long_enough) and has_letters)

-    def getText(self):
+    def get_text(self):
        """Get page text and convert it to README (Markdown) format."""
        try:
            soup = BeautifulSoup(self.driver.page_source, 'html.parser')
@ -72,8 +79,24 @@ class Browser:
        except Exception as e:
            self.logger.error(f"Error getting text: {str(e)}")
            return None
+    
+    def clean_url(self, url):
+        clean = url.split('#')[0]
+        parts = clean.split('?', 1)
+        base_url = parts[0]
+        if len(parts) > 1:
+            query = parts[1]
+            essential_params = []
+            for param in query.split('&'):
+                if param.startswith('_skw=') or param.startswith('q=') or param.startswith('s='):
+                    essential_params.append(param)
+                elif param.startswith('_') or param.startswith('hash=') or param.startswith('itmmeta='):
+                    break
+            if essential_params:
+                return f"{base_url}?{'&'.join(essential_params)}"
+        return base_url

-    def getNavigable(self):
+    def get_navigable(self):
        """Get all navigable links on the current page."""
        try:
            links = []
@ -89,12 +112,12 @@ class Browser:
                    })
            
            self.logger.info(f"Found {len(links)} navigable links")
-            return links
+            return [self.clean_url(link['url']) for link in links if link['is_displayed'] == True and len(link) < 256]
        except Exception as e:
            self.logger.error(f"Error getting navigable links: {str(e)}")
            return []

-    def clickElement(self, xpath):
+    def click_element(self, xpath):
        """Click an element specified by xpath."""
        try:
            element = self.wait.until(
@ -107,15 +130,15 @@ class Browser:
            self.logger.error(f"Element not found or not clickable: {xpath}")
            return False

-    def getCurrentUrl(self):
+    def get_current_url(self):
        """Get the current URL of the page."""
        return self.driver.current_url

-    def getPageTitle(self):
+    def get_page_title(self):
        """Get the title of the current page."""
        return self.driver.title

-    def scrollToBottom(self):
+    def scroll_bottom(self):
        """Scroll to the bottom of the page."""
        try:
            self.driver.execute_script(
@ -127,7 +150,7 @@ class Browser:
            self.logger.error(f"Error scrolling: {str(e)}")
            return False

-    def takeScreenshot(self, filename):
+    def screenshot(self, filename):
        """Take a screenshot of the current page."""
        try:
            self.driver.save_screenshot(filename)
@ -155,16 +178,11 @@ if __name__ == "__main__":
    browser = Browser(headless=False)
    
    try:
-        browser.goTo("https://karpathy.github.io/")
-        text = browser.getText()
+        browser.go_to("https://karpathy.github.io/")
+        text = browser.get_text()
        print("Page Text in Markdown:")
        print(text)
-        links = browser.getNavigable()
-        print("\nNavigable Links:")
-        for link in links[:50]:
-            print(f"Text: {link['text']}, URL: {link['url']}")
-        
-        browser.takeScreenshot("example.png")
-        
+        links = browser.get_navigable()
+        print("\nNavigable Links:", links)
    finally:
        browser.close()
--- a/sources/tools/webSearch.py
+++ b/sources/tools/webSearch.py
@ -22,6 +22,42 @@ class webSearch(Tools):
        super().__init__()
        self.tag = "web_search"
        self.api_key = api_key or os.getenv("SERPAPI_KEY")  # Requires a SerpApi key
+        self.paywall_keywords = [
+            "subscribe", "login to continue", "access denied", "restricted content", "404", "this page is not working"
+        ]
+
+    def link_valid(self, link):
+        """check if a link is valid."""
+        if not link.startswith("http"):
+            return "Status: Invalid URL"
+        
+        headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
+        try:
+            response = requests.get(link, headers=headers, timeout=5)
+            status = response.status_code
+            if status == 200:
+                content = response.text[:1000].lower()
+                if any(keyword in content for keyword in self.paywall_keywords):
+                    return "Status: Possible Paywall"
+                return "Status: OK"
+            elif status == 404:
+                return "Status: 404 Not Found"
+            elif status == 403:
+                return "Status: 403 Forbidden"
+            else:
+                return f"Status: {status} {response.reason}"
+        except requests.exceptions.RequestException as e:
+            return f"Error: {str(e)}"
+
+    def check_all_links(self, links):
+        """Check all links, one by one."""
+        # TODO Make it asyncromous or smth
+        statuses = []
+        print("Workers started, scrawling the web...")
+        for i, link in enumerate(links):
+            status = self.link_valid(link)
+            statuses.append(status)
+        return statuses

    def execute(self, blocks: str, safety: bool = True) -> str:
        if self.api_key is None:
@ -37,7 +73,7 @@ class webSearch(Tools):
                params = {
                    "q": query,
                    "api_key": self.api_key,
-                    "num": 100,
+                    "num": 50,
                    "output": "json"
                }
                response = requests.get(url, params=params)
@ -46,11 +82,16 @@ class webSearch(Tools):
                data = response.json()
                results = []
                if "organic_results" in data and len(data["organic_results"]) > 0:
-                    for result in data["organic_results"][:50]:
+                    organic_results = data["organic_results"][:50]
+                    links = [result.get("link", "No link available") for result in organic_results]
+                    statuses = self.check_all_links(links)
+                    for result, status in zip(organic_results, statuses):
+                        if not "OK" in status:
+                            continue
                        title = result.get("title", "No title")
                        snippet = result.get("snippet", "No snippet available")
                        link = result.get("link", "No link available")
-                        results.append(f"Title: {title}\nSnippet: {snippet}\nLink: {link}")
+                        results.append(f"Title:{title}\nSnippet:{snippet}\nLink:{link}")
                    return "\n\n".join(results)
                else:
                    return "No results found for the query."
@ -73,5 +114,5 @@ if __name__ == "__main__":
    search_tool = webSearch(api_key=os.getenv("SERPAPI_KEY"))
    query = "when did covid start"
    result = search_tool.execute([query], safety=True)
-    feedback = search_tool.interpreter_feedback(result)
-    print(feedback)
+    output = search_tool.interpreter_feedback(result)
+    print(output)