diff --git a/prompts/file_agent.txt b/prompts/file_agent.txt index 82f2931..2fab37e 100644 --- a/prompts/file_agent.txt +++ b/prompts/file_agent.txt @@ -44,7 +44,7 @@ User: "I need to find the file config.txt and read its contents." Assistant: I’ll use file_finder to locate the file: -```file_finder +```file_finder:read config.txt ``` diff --git a/sources/agents/browser_agent.py b/sources/agents/browser_agent.py index 758a4dc..e226a3d 100644 --- a/sources/agents/browser_agent.py +++ b/sources/agents/browser_agent.py @@ -15,9 +15,8 @@ class BrowserAgent(Agent): self.tools = { "web_search": searxSearch(), } - self.role = "deep research and web search" + self.role = "web search, internet, google" self.browser = Browser() - self.browser.go_to("https://github.com/") self.search_history = [] self.navigable_links = [] self.notes = [] @@ -50,7 +49,7 @@ class BrowserAgent(Agent): {search_choice} Your goal is to find accurate and complete information to satisfy the user’s request. User request: {user_prompt} - To proceed, choose a relevant link from the search results. Announce your choice by saying: "I want to navigate to ." + To proceed, choose a relevant link from the search results. Announce your choice by saying: "I want to navigate to " Do not explain your choice. """ @@ -58,27 +57,44 @@ class BrowserAgent(Agent): remaining_links = self.get_unvisited_links() remaining_links_text = remaining_links if remaining_links is not None else "No links remaining, proceed with a new search." return f""" - \nYou are currently browsing the web. Not the user, you are the browser. - - Page content: + You are a web browser. + You are currently on this webpage: {page_text} - You can navigate to these links: + You can navigate to these navigation links: {remaining_links} - You must choose a link (write it down) to navigate to, or go back. - For exemple you can say: i want to go to www.wikipedia.org/cats + Your task: + 1. Decide if the current page answers the user’s query: {user_prompt} + - If it does, take notes of the useful information, write down source, link or reference, then move to a new page. + - If it does and you are 100% certain that it provide a definive answer, say REQUEST_EXIT + - If it doesn’t, say: Error: This page does not answer the user’s query then go back or navigate to another link. + 2. Navigate by either: + - Navigate to a navigation links (write the full URL, e.g., www.example.com/cats). + - If no link seems helpful, say: GO_BACK. + + Recap of note taking: + If useful -> Note: [Briefly summarize the key information that answers the user’s query.] + Do not write "The page talk about ...", write your finding on the page and how they contribute to an answer. + If not useful -> Error: [Explain why the page doesn’t help.] + + Example 1 (useful page, no need of going futher): + Note: According to karpathy site (https://karpathy.github.io/) LeCun net is the earliest real-world application of a neural net" + No link seem useful to provide futher information. GO_BACK - Follow up with a summary of the page content (of the current page, not of the link), for example: - Summary: According to https://karpathy.github.io/ LeCun net is the earliest real-world application of a neural net" - The summary should include any useful finding that are useful in answering user query. - If a website does not have usefull information say Error, for exemple: - Error: This forum does not discus anything that can answer the user query - Be short, concise, direct. + Example 2 (not useful, but related link): + Error: This forum reddit.com/welcome does not discuss anything related to the user’s query. + There is a link that could lead to the information, I want to navigate to http://reddit.com/r/locallama - If no link seem appropriate, please say "GO_BACK". - Remember, you seek the information the user want. - The user query was : {user_prompt} + Example 3 (not useful, no related links): + Error: x.com does not discuss anything related to the user’s query and no navigation link are usefull + GO_BACK + + Example 3 (not useful, no related links): + Note: I found on github.com that the creator of agenticSeek is fosowl. + Given this information, given this I should exit the web browser. REQUEST_EXIT + + Remember, the user asked: {user_prompt} """ def llm_decide(self, prompt): @@ -122,11 +138,11 @@ class BrowserAgent(Agent): def save_notes(self, text): lines = text.split('\n') for line in lines: - if "summary" in line.lower(): + if "note" in line.lower(): self.notes.append(line) def conclude_prompt(self, user_query): - annotated_notes = [f"{i+1}: {note.lower().replace('summary:', '')}" for i, note in enumerate(self.notes)] + annotated_notes = [f"{i+1}: {note.lower().replace('note:', '')}" for i, note in enumerate(self.notes)] search_note = '\n'.join(annotated_notes) print("AI research notes:\n", search_note) return f""" @@ -174,7 +190,6 @@ class BrowserAgent(Agent): self.memory.push('user', prompt) answer, reasoning = self.llm_request(prompt) pretty_print(answer, color="output") - speech_module.speak(answer) return answer, reasoning if __name__ == "__main__": diff --git a/sources/agents/file_agent.py b/sources/agents/file_agent.py index cc16adb..3f3c0d1 100644 --- a/sources/agents/file_agent.py +++ b/sources/agents/file_agent.py @@ -14,7 +14,7 @@ class FileAgent(Agent): "file_finder": FileFinder(), "bash": BashInterpreter() } - self.role = "files operations" + self.role = "find, read, write, edit files" def process(self, prompt, speech_module) -> str: complete = False diff --git a/sources/browser.py b/sources/browser.py index cd100dd..917498b 100644 --- a/sources/browser.py +++ b/sources/browser.py @@ -13,6 +13,7 @@ import markdownify import logging import sys import re +from urllib.parse import urlparse class Browser: def __init__(self, headless=False, anticaptcha_install=False): @@ -58,7 +59,8 @@ class Browser: os.path.join(os.environ.get("LOCALAPPDATA", ""), "Google\\Chrome\\Application\\chrome.exe") # User install ] elif sys.platform.startswith("darwin"): # macOS - paths = ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", "/Applications/Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta"] + paths = ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", + "/Applications/Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta"] else: # Linux paths = ["/usr/bin/google-chrome", "/usr/bin/chromium-browser", "/usr/bin/chromium"] @@ -79,19 +81,6 @@ class Browser: self.logger.error(f"Error navigating to {url}: {str(e)}") return False - def is_sentence(self, text): - """Check if the text qualifies as a meaningful sentence or contains important error codes.""" - text = text.strip() - error_codes = ["404", "403", "500", "502", "503"] - if any(code in text for code in error_codes): - return True - words = text.split() - word_count = len(words) - has_punctuation = text.endswith(('.', '!', '?')) - is_long_enough = word_count > 5 - has_letters = any(word.isalpha() for word in words) - return (word_count >= 5 and (has_punctuation or is_long_enough) and has_letters) - def is_sentence(self, text): """Check if the text qualifies as a meaningful sentence or contains important error codes.""" text = text.strip() @@ -118,10 +107,8 @@ class Browser: lines = (line.strip() for line in text.splitlines()) chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) text = "\n".join(chunk for chunk in chunks if chunk and self.is_sentence(chunk)) - - markdown_text = markdownify.markdownify(text, heading_style="ATX") - - return markdown_text + #markdown_text = markdownify.markdownify(text, heading_style="ATX") + return "[Start of page]\n" + text + "\n[End of page]" except Exception as e: self.logger.error(f"Error getting text: {str(e)}") return None @@ -142,6 +129,22 @@ class Browser: if essential_params: return f"{base_url}?{'&'.join(essential_params)}" return base_url + + def is_link_valid(self, url): + """Check if a URL is a valid link (page, not related to icon or metadata).""" + if len(url) > 64: + return False + parsed_url = urlparse(url) + if not parsed_url.scheme or not parsed_url.netloc: + return False + if re.search(r'/\d+$', parsed_url.path): + return False + image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'] + metadata_extensions = ['.ico', '.xml', '.json', '.rss', '.atom'] + for ext in image_extensions + metadata_extensions: + if url.lower().endswith(ext): + return False + return True def get_navigable(self): """Get all navigable links on the current page.""" @@ -159,7 +162,7 @@ class Browser: }) self.logger.info(f"Found {len(links)} navigable links") - return [self.clean_url(link['url']) for link in links if (link['is_displayed'] == True and len(link['url']) < 64)] + return [self.clean_url(link['url']) for link in links if (link['is_displayed'] == True and self.is_link_valid(link['url']))] except Exception as e: self.logger.error(f"Error getting navigable links: {str(e)}") return [] @@ -225,7 +228,7 @@ if __name__ == "__main__": browser = Browser(headless=False) try: - browser.go_to("https://www.seoul.co.kr/") + browser.go_to("https://github.com/Fosowl/agenticSeek") text = browser.get_text() print("Page Text in Markdown:") print(text)