Feat : web navigation improvement, better prompting

2025-06-06 19:15:28 +00:00 · 2025-03-18 16:52:09 +01:00 · 2025-03-18 16:52:09 +01:00 · bdbb590dc4
commit bdbb590dc4
parent 292623ab52
4 changed files with 61 additions and 43 deletions
--- a/prompts/file_agent.txt
+++ b/prompts/file_agent.txt
@ -44,7 +44,7 @@ User: "I need to find the file config.txt and read its contents."
 Assistant: I’ll use file_finder to locate the file:
-```file_finder
+```file_finder:read
 config.txt
 ```
--- a/sources/agents/browser_agent.py
+++ b/sources/agents/browser_agent.py
@ -15,9 +15,8 @@ class BrowserAgent(Agent):
        self.tools = {
            "web_search": searxSearch(),
        }
-        self.role = "deep research and web search"
+        self.role = "web search, internet, google"
        self.browser = Browser()
        self.browser.go_to("https://github.com/")
        self.search_history = []
        self.navigable_links = []
        self.notes = []
@ -50,7 +49,7 @@ class BrowserAgent(Agent):
        {search_choice}
        Your goal is to find accurate and complete information to satisfy the user’s request.
        User request: {user_prompt}
-        To proceed, choose a relevant link from the search results. Announce your choice by saying: "I want to navigate to <link>."
+        To proceed, choose a relevant link from the search results. Announce your choice by saying: "I want to navigate to <link>"
        Do not explain your choice.
        """
@ -58,27 +57,44 @@ class BrowserAgent(Agent):
        remaining_links = self.get_unvisited_links() 
        remaining_links_text = remaining_links if remaining_links is not None else "No links remaining, proceed with a new search." 
        return f"""
-        \nYou are currently browsing the web. Not the user, you are the browser.
+        You are a web browser.
-
+        You are currently on this webpage:
        Page content:
        {page_text}
-        You can navigate to these links:
+        You can navigate to these navigation links:
        {remaining_links}
-        You must choose a link (write it down) to navigate to, or go back.
+        Your task:
-        For exemple you can say: i want to go to www.wikipedia.org/cats
+        1. Decide if the current page answers the user’s query: {user_prompt}
          - If it does, take notes of the useful information, write down source, link or reference, then move to a new page.
          - If it does and you are 100% certain that it provide a definive answer, say REQUEST_EXIT
          - If it doesn’t, say: Error: This page does not answer the user’s query then go back or navigate to another link.
        2. Navigate by either: 
          - Navigate to a navigation links (write the full URL, e.g., www.example.com/cats).
          - If no link seems helpful, say: GO_BACK.
-        Follow up with a summary of the page content (of the current page, not of the link), for example:
+        Recap of note taking:
-        Summary: According to https://karpathy.github.io/ LeCun net is the earliest real-world application of a neural net"
+        If useful -> Note: [Briefly summarize the key information that answers the user’s query.]
-        The summary should include any useful finding that are useful in answering user query.
+        Do not write "The page talk about ...", write your finding on the page and how they contribute to an answer.
-        If a website does not have usefull information say Error, for exemple:
+        If not useful -> Error: [Explain why the page doesn’t help.]
        Error: This forum does not discus anything that can answer the user query
        Be short, concise, direct.
-        If no link seem appropriate, please say "GO_BACK".
+        Example 1 (useful page, no need of going futher):
-        Remember, you seek the information the user want.
+        Note: According to karpathy site (https://karpathy.github.io/) LeCun net is the earliest real-world application of a neural net"
-        The user query was : {user_prompt}
+        No link seem useful to provide futher information. GO_BACK
        Example 2 (not useful, but related link):
        Error: This forum reddit.com/welcome does not discuss anything related to the user’s query.
        There is a link that could lead to the information, I want to navigate to http://reddit.com/r/locallama
        Example 3 (not useful, no related links):
        Error: x.com does not discuss anything related to the user’s query and no navigation link are usefull
        GO_BACK
        Example 3 (not useful, no related links):
        Note: I found on github.com that the creator of agenticSeek is fosowl. 
        Given this information, given this I should exit the web browser. REQUEST_EXIT
        Remember, the user asked: {user_prompt}
        """
    def llm_decide(self, prompt):
@ -122,11 +138,11 @@ class BrowserAgent(Agent):
    def save_notes(self, text):
        lines = text.split('\n')
        for line in lines:
-            if "summary" in line.lower():
+            if "note" in line.lower():
                self.notes.append(line)
    def conclude_prompt(self, user_query):
-        annotated_notes = [f"{i+1}: {note.lower().replace('summary:', '')}" for i, note in enumerate(self.notes)]
+        annotated_notes = [f"{i+1}: {note.lower().replace('note:', '')}" for i, note in enumerate(self.notes)]
        search_note = '\n'.join(annotated_notes)
        print("AI research notes:\n", search_note)
        return f"""
@ -174,7 +190,6 @@ class BrowserAgent(Agent):
        self.memory.push('user', prompt)
        answer, reasoning = self.llm_request(prompt)
        pretty_print(answer, color="output")
        speech_module.speak(answer)
        return answer, reasoning
 if __name__ == "__main__":
--- a/sources/agents/file_agent.py
+++ b/sources/agents/file_agent.py
@ -14,7 +14,7 @@ class FileAgent(Agent):
            "file_finder": FileFinder(),
            "bash": BashInterpreter()
        }
-        self.role = "files operations"
+        self.role = "find, read, write, edit files"
    def process(self, prompt, speech_module) -> str:
        complete = False
--- a/sources/browser.py
+++ b/sources/browser.py
@ -13,6 +13,7 @@ import markdownify
 import logging
 import sys
 import re
 from urllib.parse import urlparse
 class Browser:
    def __init__(self, headless=False, anticaptcha_install=False):
@ -58,7 +59,8 @@ class Browser:
                os.path.join(os.environ.get("LOCALAPPDATA", ""), "Google\\Chrome\\Application\\chrome.exe")  # User install
            ]
        elif sys.platform.startswith("darwin"):  # macOS
-            paths = ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", "/Applications/Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta"]
+            paths = ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
                     "/Applications/Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta"]
        else:  # Linux
            paths = ["/usr/bin/google-chrome", "/usr/bin/chromium-browser", "/usr/bin/chromium"]
@ -79,19 +81,6 @@ class Browser:
            self.logger.error(f"Error navigating to {url}: {str(e)}")
            return False
    def is_sentence(self, text):
        """Check if the text qualifies as a meaningful sentence or contains important error codes."""
        text = text.strip()
        error_codes = ["404", "403", "500", "502", "503"]
        if any(code in text for code in error_codes):
            return True
        words = text.split()
        word_count = len(words)
        has_punctuation = text.endswith(('.', '!', '?'))
        is_long_enough = word_count > 5
        has_letters = any(word.isalpha() for word in words)
        return (word_count >= 5 and (has_punctuation or is_long_enough) and has_letters)
    def is_sentence(self, text):
        """Check if the text qualifies as a meaningful sentence or contains important error codes."""
        text = text.strip()
@ -118,10 +107,8 @@ class Browser:
            lines = (line.strip() for line in text.splitlines())
            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
            text = "\n".join(chunk for chunk in chunks if chunk and self.is_sentence(chunk))
-            
+            #markdown_text = markdownify.markdownify(text, heading_style="ATX")
-            markdown_text = markdownify.markdownify(text, heading_style="ATX")
+            return "[Start of page]\n" + text + "\n[End of page]"
            return markdown_text
        except Exception as e:
            self.logger.error(f"Error getting text: {str(e)}")
            return None
@ -143,6 +130,22 @@ class Browser:
                return f"{base_url}?{'&'.join(essential_params)}"
        return base_url
    def is_link_valid(self, url):
        """Check if a URL is a valid link (page, not related to icon or metadata)."""
        if len(url) > 64:
            return False
        parsed_url = urlparse(url)
        if not parsed_url.scheme or not parsed_url.netloc:
            return False
        if re.search(r'/\d+$', parsed_url.path):
            return False
        image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp']
        metadata_extensions = ['.ico', '.xml', '.json', '.rss', '.atom']
        for ext in image_extensions + metadata_extensions:
            if url.lower().endswith(ext):
                return False
        return True
    def get_navigable(self):
        """Get all navigable links on the current page."""
        try:
@ -159,7 +162,7 @@ class Browser:
                    })
            self.logger.info(f"Found {len(links)} navigable links")
-            return [self.clean_url(link['url']) for link in links if (link['is_displayed'] == True and len(link['url']) < 64)]
+            return [self.clean_url(link['url']) for link in links if (link['is_displayed'] == True and self.is_link_valid(link['url']))]
        except Exception as e:
            self.logger.error(f"Error getting navigable links: {str(e)}")
            return []
@ -225,7 +228,7 @@ if __name__ == "__main__":
    browser = Browser(headless=False)
    try:
-        browser.go_to("https://www.seoul.co.kr/")
+        browser.go_to("https://github.com/Fosowl/agenticSeek")
        text = browser.get_text()
        print("Page Text in Markdown:")
        print(text)