diff --git a/media/exemples/search_startup.png b/media/exemples/search_startup.png new file mode 100644 index 0000000..180317f Binary files /dev/null and b/media/exemples/search_startup.png differ diff --git a/prompts/browser_agent.txt b/prompts/browser_agent.txt index 52846f4..77c6a9e 100644 --- a/prompts/browser_agent.txt +++ b/prompts/browser_agent.txt @@ -1,21 +1,9 @@ -You are an internet ai that can browse the web for information. -In fact you are embedded in a browser with selenium. -If you need to conduct a web search, you can use the following tool: -- web_search: to search the web for information - -This is how you can use the web_search tool: -```web_search - -``` - -This will provide you with a list of links that you can navigate to. -You can navigate to a specific link by typing the link. For example, If you say: -"I want to navigate to https://www.google.com" - -You will navigate to https://www.google.com -Any link that you type will be opened in a new tab. - -If you want to exit the browser, you can say: -"REQUEST_EXIT" -Only exit the browser if you are done browsing. \ No newline at end of file +You are a web browsing AI, your goal is to explore the internet to find information. +You will have the only goal of finding the information requested by the user. +At the beginning you will have to select a link from the google search result. +You will choose a link by simply typing it. +This will automatically make you browse to the link. +Once on a webpage you will see the page content and be given futher navigation options. +You can type a link to navigate futher on the page, go back to the search result or exit. +At each interaction step the browser will remind you of your options. diff --git a/sources/agents/agent.py b/sources/agents/agent.py index 37205c3..af2559a 100644 --- a/sources/agents/agent.py +++ b/sources/agents/agent.py @@ -34,7 +34,7 @@ class Agent(): name: str, prompt_path:str, provider, - recover_last_session=False) -> None: + recover_last_session=True) -> None: self.agent_name = name self.role = None self.current_directory = os.getcwd() diff --git a/sources/agents/browser_agent.py b/sources/agents/browser_agent.py index 47246dd..ebd50ba 100644 --- a/sources/agents/browser_agent.py +++ b/sources/agents/browser_agent.py @@ -8,7 +8,7 @@ from sources.browser import Browser class BrowserAgent(Agent): def __init__(self, model, name, prompt_path, provider): """ - The casual agent is a special for casual talk to the user without specific tasks. + The Browser agent is an agent that navigate the web autonomously in search of answer """ super().__init__(model, name, prompt_path, provider) self.tools = { @@ -16,86 +16,146 @@ class BrowserAgent(Agent): } self.role = "deep research and web search" self.browser = Browser() - self.browser.goTo("https://github.com/") + self.browser.go_to("https://github.com/") self.search_history = [] + self.navigable_links = [] + self.notes = [] - def make_init_prompt(self, user_prompt: str, search_result: str): - return f""" - Based on the search result: - {search_result} - Start browsing and find the information the user want. - User: {user_prompt} - You must choose a link to navigate to. Say i want to navigate to a . - """ - + def extract_links(self, search_result: str): - return re.findall(r'https?://[^\s]+', search_result) - - def make_navigation_prompt(self, user_prompt: str, page_text: str, navigable_links: list): - remaining_links = "\n".join([f"[{i}] {link}" for i, link in enumerate(navigable_links) if link not in self.search_history]) - return f""" - \nYou are browsing the web. Not the user, you are the browser. - - Page content: - {page_text} - - Navigable links: - {remaining_links} - - - You must choose a link to navigate to or do a new search. - Remember, you seek the information the user want. - The user query was : {user_prompt} - If you want to do a new search, use the "web_search" tool. - Exemple: - ```web_search - weather in tokyo - ``` - If you have an answer and want to exit the browser, please say "REQUEST_EXIT". - If you don't choose a link or do a new search I will cut my fucking arm off. - """ - + pattern = r'(https?://\S+|www\.\S+)' + matches = re.findall(pattern, search_result) + trailing_punct = ".,!?;:" + cleaned_links = [link.rstrip(trailing_punct) for link in matches] + return self.clean_links(cleaned_links) + def clean_links(self, links: list): links_clean = [] for link in links: + link = link.strip() if link[-1] == '.': links_clean.append(link[:-1]) else: links_clean.append(link) return links_clean + + def get_unvisited_links(self): + return "\n".join([f"[{i}] {link}" for i, link in enumerate(self.navigable_links) if link not in self.search_history]) + + def make_newsearch_prompt(self, user_prompt: str, search_result: dict): + search_choice = self.stringify_search_results(search_result) + return f""" + Based on the search result: + {search_choice} + Your goal is to find accurate and complete information to satisfy the user’s request. + User request: {user_prompt} + To proceed, choose a relevant link from the search results. Announce your choice by saying: "I want to navigate to ." + Do not explain your choice. + """ - def process(self, prompt, speech_module) -> str: + def make_navigation_prompt(self, user_prompt: str, page_text: str): + remaining_links = self.get_unvisited_links() + remaining_links_text = remaining_links if remaining_links is not None else "No links remaining, proceed with a new search." + return f""" + \nYou are currently browsing the web. Not the user, you are the browser. + + Page content: + {page_text} + + You can navigate to these links: + {remaining_links} + + If no link seem appropriate, please say "GO_BACK". + Remember, you seek the information the user want. + The user query was : {user_prompt} + You must choose a link (write it down) to navigate to, go go back. + For exemple you can say: i want to go to www.events.org/events + Always end with a sentence that summarize useful information if any for exemple: + Summary: According to https://karpathy.github.io/ LeCun net is the earliest real-world application of a neural net" + Another exemple: + Summary: the BBC website does not provide useful informations. + Do not explain your choice, be short, concise. + """ + + def llm_decide(self, prompt): + animate_thinking("Thinking...", color="status") + self.memory.push('user', prompt) + answer, reasoning = self.llm_request(prompt) + pretty_print("-"*100) + pretty_print(answer, color="output") + pretty_print("-"*100) + return answer, reasoning + + def select_unvisited(self, search_result): + results_unvisited = [] + for res in search_result: + if res["link"] not in self.search_history: + results_unvisited.append(res) + return results_unvisited + + def jsonify_search_results(self, results_string): + result_blocks = results_string.split("\n\n") + parsed_results = [] + for block in result_blocks: + if not block.strip(): + continue + lines = block.split("\n") + result_dict = {} + for line in lines: + if line.startswith("Title:"): + result_dict["title"] = line.replace("Title:", "").strip() + elif line.startswith("Snippet:"): + result_dict["snippet"] = line.replace("Snippet:", "").strip() + elif line.startswith("Link:"): + result_dict["link"] = line.replace("Link:", "").strip() + if result_dict: + parsed_results.append(result_dict) + return parsed_results + + def stringify_search_results(self, results_arr): + return '\n\n'.join([f"Link: {res['link']}" for res in results_arr]) + + def save_notes(self, text): + lines = text.split('\n') + for line in lines: + if "summary:" in line: + self.notes.append(line) + + def process(self, user_prompt, speech_module) -> str: complete = False animate_thinking(f"Searching...", color="status") - search_result = self.tools["web_search"].execute([prompt], False) - user_prompt = self.make_init_prompt(prompt, search_result) - prompt = user_prompt + search_result_raw = self.tools["web_search"].execute([user_prompt], False) + search_result = self.jsonify_search_results(search_result_raw) + search_result = search_result[:10] # until futher improvement + prompt = self.make_newsearch_prompt(user_prompt, search_result) + unvisited = [None] while not complete: - animate_thinking("Thinking...", color="status") - self.memory.push('user', user_prompt) - answer, reasoning = self.llm_request(prompt) - pretty_print("-"*100) - pretty_print(answer, color="output") - pretty_print("-"*100) + answer, reasoning = self.llm_decide(prompt) + self.save_notes(answer) if "REQUEST_EXIT" in answer: complete = True break links = self.extract_links(answer) - links_clean = self.clean_links(links) - if len(links_clean) == 0: - prompt = f"Please choose a link to navigate to or do a new search. Links found:\n{links_clean}" - pretty_print("No links found, doing a new search.", color="warning") + if len(links) == 0 or "GO_BACK" in answer: + unvisited = self.select_unvisited(search_result) + prompt = self.make_newsearch_prompt(user_prompt, unvisited) + pretty_print(f"Going back to results. Still {len(unvisited)}", color="warning") + links = [] continue + if len(unvisited) == 0: + break animate_thinking(f"Navigating to {links[0]}", color="status") speech_module.speak(f"Navigating to {links[0]}") - self.browser.goTo(links[0]) + self.browser.go_to(links[0]) self.search_history.append(links[0]) - page_text = self.browser.getText()[:2048] - navigable_links = self.browser.getNavigable()[:15] - prompt = self.make_navigation_prompt(user_prompt, page_text, navigable_links) + page_text = self.browser.get_text() + self.navigable_links = self.browser.get_navigable() + prompt = self.make_navigation_prompt(user_prompt, page_text) + speech_module.speak(answer) self.browser.close() + print("Final notes:", notes) return answer, reasoning if __name__ == "__main__": diff --git a/sources/agents/casual_agent.py b/sources/agents/casual_agent.py index 314c3b8..d99eda9 100644 --- a/sources/agents/casual_agent.py +++ b/sources/agents/casual_agent.py @@ -18,7 +18,7 @@ class CasualAgent(Agent): "file_finder": FileFinder(), "bash": BashInterpreter() } - self.role = "talking, advices, events and philosophical" + self.role = "casual talking" def process(self, prompt, speech_module) -> str: complete = False diff --git a/sources/browser.py b/sources/browser.py index 5484c4b..06c0c6c 100644 --- a/sources/browser.py +++ b/sources/browser.py @@ -35,7 +35,7 @@ class Browser: except Exception as e: raise Exception(f"Failed to initialize browser: {str(e)}") - def goTo(self, url): + def go_to(self, url): """Navigate to a specified URL.""" try: self.driver.get(url) @@ -47,12 +47,19 @@ class Browser: return False def is_sentence(self, text): - """Check if the text is a sentence.""" - if "404" in text: - return True # we want the ai to see the error - return len(text.split(" ")) > 5 and '.' in text + """Check if the text qualifies as a meaningful sentence or contains important error codes.""" + text = text.strip() + error_codes = ["404", "403", "500", "502", "503"] + if any(code in text for code in error_codes): + return True + words = text.split() + word_count = len(words) + has_punctuation = text.endswith(('.', '!', '?')) + is_long_enough = word_count > 5 + has_letters = any(word.isalpha() for word in words) + return (word_count >= 5 and (has_punctuation or is_long_enough) and has_letters) - def getText(self): + def get_text(self): """Get page text and convert it to README (Markdown) format.""" try: soup = BeautifulSoup(self.driver.page_source, 'html.parser') @@ -72,8 +79,24 @@ class Browser: except Exception as e: self.logger.error(f"Error getting text: {str(e)}") return None + + def clean_url(self, url): + clean = url.split('#')[0] + parts = clean.split('?', 1) + base_url = parts[0] + if len(parts) > 1: + query = parts[1] + essential_params = [] + for param in query.split('&'): + if param.startswith('_skw=') or param.startswith('q=') or param.startswith('s='): + essential_params.append(param) + elif param.startswith('_') or param.startswith('hash=') or param.startswith('itmmeta='): + break + if essential_params: + return f"{base_url}?{'&'.join(essential_params)}" + return base_url - def getNavigable(self): + def get_navigable(self): """Get all navigable links on the current page.""" try: links = [] @@ -89,12 +112,12 @@ class Browser: }) self.logger.info(f"Found {len(links)} navigable links") - return links + return [self.clean_url(link['url']) for link in links if link['is_displayed'] == True and len(link) < 256] except Exception as e: self.logger.error(f"Error getting navigable links: {str(e)}") return [] - def clickElement(self, xpath): + def click_element(self, xpath): """Click an element specified by xpath.""" try: element = self.wait.until( @@ -107,15 +130,15 @@ class Browser: self.logger.error(f"Element not found or not clickable: {xpath}") return False - def getCurrentUrl(self): + def get_current_url(self): """Get the current URL of the page.""" return self.driver.current_url - def getPageTitle(self): + def get_page_title(self): """Get the title of the current page.""" return self.driver.title - def scrollToBottom(self): + def scroll_bottom(self): """Scroll to the bottom of the page.""" try: self.driver.execute_script( @@ -127,7 +150,7 @@ class Browser: self.logger.error(f"Error scrolling: {str(e)}") return False - def takeScreenshot(self, filename): + def screenshot(self, filename): """Take a screenshot of the current page.""" try: self.driver.save_screenshot(filename) @@ -155,16 +178,11 @@ if __name__ == "__main__": browser = Browser(headless=False) try: - browser.goTo("https://karpathy.github.io/") - text = browser.getText() + browser.go_to("https://karpathy.github.io/") + text = browser.get_text() print("Page Text in Markdown:") print(text) - links = browser.getNavigable() - print("\nNavigable Links:") - for link in links[:50]: - print(f"Text: {link['text']}, URL: {link['url']}") - - browser.takeScreenshot("example.png") - + links = browser.get_navigable() + print("\nNavigable Links:", links) finally: browser.close() \ No newline at end of file diff --git a/sources/tools/webSearch.py b/sources/tools/webSearch.py index 4544d1f..07bb075 100644 --- a/sources/tools/webSearch.py +++ b/sources/tools/webSearch.py @@ -22,6 +22,42 @@ class webSearch(Tools): super().__init__() self.tag = "web_search" self.api_key = api_key or os.getenv("SERPAPI_KEY") # Requires a SerpApi key + self.paywall_keywords = [ + "subscribe", "login to continue", "access denied", "restricted content", "404", "this page is not working" + ] + + def link_valid(self, link): + """check if a link is valid.""" + if not link.startswith("http"): + return "Status: Invalid URL" + + headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"} + try: + response = requests.get(link, headers=headers, timeout=5) + status = response.status_code + if status == 200: + content = response.text[:1000].lower() + if any(keyword in content for keyword in self.paywall_keywords): + return "Status: Possible Paywall" + return "Status: OK" + elif status == 404: + return "Status: 404 Not Found" + elif status == 403: + return "Status: 403 Forbidden" + else: + return f"Status: {status} {response.reason}" + except requests.exceptions.RequestException as e: + return f"Error: {str(e)}" + + def check_all_links(self, links): + """Check all links, one by one.""" + # TODO Make it asyncromous or smth + statuses = [] + print("Workers started, scrawling the web...") + for i, link in enumerate(links): + status = self.link_valid(link) + statuses.append(status) + return statuses def execute(self, blocks: str, safety: bool = True) -> str: if self.api_key is None: @@ -37,7 +73,7 @@ class webSearch(Tools): params = { "q": query, "api_key": self.api_key, - "num": 100, + "num": 50, "output": "json" } response = requests.get(url, params=params) @@ -46,11 +82,16 @@ class webSearch(Tools): data = response.json() results = [] if "organic_results" in data and len(data["organic_results"]) > 0: - for result in data["organic_results"][:50]: + organic_results = data["organic_results"][:50] + links = [result.get("link", "No link available") for result in organic_results] + statuses = self.check_all_links(links) + for result, status in zip(organic_results, statuses): + if not "OK" in status: + continue title = result.get("title", "No title") snippet = result.get("snippet", "No snippet available") link = result.get("link", "No link available") - results.append(f"Title: {title}\nSnippet: {snippet}\nLink: {link}") + results.append(f"Title:{title}\nSnippet:{snippet}\nLink:{link}") return "\n\n".join(results) else: return "No results found for the query." @@ -73,5 +114,5 @@ if __name__ == "__main__": search_tool = webSearch(api_key=os.getenv("SERPAPI_KEY")) query = "when did covid start" result = search_tool.execute([query], safety=True) - feedback = search_tool.interpreter_feedback(result) - print(feedback) \ No newline at end of file + output = search_tool.interpreter_feedback(result) + print(output) \ No newline at end of file