agenticSeek/sources/agents/browser_agent.py

import re
import time
from datetime import date
from typing import List, Tuple, Type, Dict
from enum import Enum
import asyncio

from sources.utility import pretty_print, animate_thinking
from sources.agents.agent import Agent
from sources.tools.searxSearch import searxSearch
from sources.browser import Browser
from sources.logger import Logger
from sources.memory import Memory

class Action(Enum):
    REQUEST_EXIT = "REQUEST_EXIT"
    FORM_FILLED = "FORM_FILLED"
    GO_BACK = "GO_BACK"
    NAVIGATE = "NAVIGATE"
    SEARCH = "SEARCH"

class BrowserAgent(Agent):
    def __init__(self, name, prompt_path, provider, verbose=False, browser=None):
        """
        The Browser agent is an agent that navigate the web autonomously in search of answer
        """
        super().__init__(name, prompt_path, provider, verbose, browser)
        self.tools = {
            "web_search": searxSearch(),
        }
        self.role = "web"
        self.type = "browser_agent"
        self.browser = browser
        self.current_page = ""
        self.search_history = []
        self.navigable_links = []
        self.last_action = Action.NAVIGATE.value
        self.notes = []
        self.date = self.get_today_date()
        self.logger = Logger("browser_agent.log")
        self.memory = Memory(self.load_prompt(prompt_path),
                        recover_last_session=False, # session recovery in handled by the interaction class
                        memory_compression=False,
                        model_provider=provider.get_model_name())

    def get_today_date(self) -> str:
        """Get the date"""
        date_time = date.today()
        return date_time.strftime("%B %d, %Y")

    def extract_links(self, search_result: str) -> List[str]:
        """Extract all links from a sentence."""
        pattern = r'(https?://\S+|www\.\S+)'
        matches = re.findall(pattern, search_result)
        trailing_punct = ".,!?;:)"
        cleaned_links = [link.rstrip(trailing_punct) for link in matches]
        self.logger.info(f"Extracted links: {cleaned_links}")
        return self.clean_links(cleaned_links)

    def extract_form(self, text: str) -> List[str]:
        """Extract form written by the LLM in format [input_name](value)"""
        inputs = []
        matches = re.findall(r"\[\w+\]\([^)]+\)", text)
        return matches

    def clean_links(self, links: List[str]) -> List[str]:
        """Ensure no '.' at the end of link"""
        links_clean = []
        for link in links:
            link = link.strip()
            if not (link[-1].isalpha() or link[-1].isdigit()):
                links_clean.append(link[:-1])
            else:
                links_clean.append(link)
        return links_clean

    def get_unvisited_links(self) -> List[str]:
        return "\n".join([f"[{i}] {link}" for i, link in enumerate(self.navigable_links) if link not in self.search_history])

    def make_newsearch_prompt(self, user_prompt: str, search_result: dict) -> str:
        search_choice = self.stringify_search_results(search_result)
        self.logger.info(f"Search results: {search_choice}")
        return f"""
        Based on the search result:
        {search_choice}
        Your goal is to find accurate and complete information to satisfy the user’s request.
        User request: {user_prompt}
        To proceed, choose a relevant link from the search results. Announce your choice by saying: "I will navigate to <link>"
        Do not explain your choice.
        """

    def make_navigation_prompt(self, user_prompt: str, page_text: str) -> str:
        remaining_links = self.get_unvisited_links()
        remaining_links_text = remaining_links if remaining_links is not None else "No links remaining, do a new search."
        inputs_form = self.browser.get_form_inputs()
        inputs_form_text = '\n'.join(inputs_form)
        notes = '\n'.join(self.notes)
        self.logger.info(f"Making navigation prompt with page text: {page_text[:100]}...\nremaining links: {remaining_links_text}")
        self.logger.info(f"Inputs form: {inputs_form_text}")
        self.logger.info(f"Notes: {notes}")

        return f"""
        You are navigating the web.

        **Current Context**

        Webpage ({self.current_page}) content:
        {page_text}

        Allowed Navigation Links:
        {remaining_links_text}

        Inputs forms:
        {inputs_form_text}

        End of webpage ({self.current_page}.

        # Instruction

        1. **Evaluate if the page is relevant for user’s query and document finding:**
          - If the page is relevant, extract and summarize key information in concise notes (Note: <your note>)
          - If page not relevant, state: "Error: <specific reason the page does not address the query>" and either return to the previous page or navigate to a new link.
          - Notes should be factual, useful summaries of relevant content, they should always include specific names or link. Written as: "On <website URL>, <key fact 1>. <Key fact 2>. <Additional insight>." Avoid phrases like "the page provides" or "I found that."
        2. **Navigate to a link by either: **
          - Saying I will navigate to (write down the full URL) www.example.com/cats
          - Going back: If no link seems helpful, say: {Action.GO_BACK.value}.
        3. **Fill forms on the page:**
          - Fill form only when relevant.
          - Use Login if username/password specified by user. For quick task create account, remember password in a note.
          - You can fill a form using [form_name](value). Don't {Action.GO_BACK.value} when filling form.
          - If a form is irrelevant or you lack informations (eg: don't know user email) leave it empty.
        4. **Decide if you completed the task**
          - Check your notes. Do they fully answer the question? Did you verify with multiple pages?
          - Are you sure it’s correct?
          - If yes to all, say {Action.REQUEST_EXIT}.
          - If no, or a page lacks info, go to another link.
          - Never stop or ask the user for help.

        **Rules:**
        - Do not write "The page talk about ...", write your finding on the page and how they contribute to an answer.
        - Put note in a single paragraph.
        - When you exit, explain why.

        # Example:

        Example 1 (useful page, no need go futher):
        Note: According to karpathy site LeCun net is ...
        No link seem useful to provide futher information.
        Action: {Action.GO_BACK.value}

        Example 2 (not useful, see useful link on page):
        Error: reddit.com/welcome does not discuss anything related to the user’s query.
        There is a link that could lead to the information.
        Action: navigate to http://reddit.com/r/locallama

        Example 3 (not useful, no related links):
        Error: x.com does not discuss anything related to the user’s query and no navigation link are usefull.
        Action: {Action.GO_BACK.value}

        Example 3 (clear definitive query answer found or enought notes taken):
        I took 10 notes so far with enought finding to answer user question.
        Therefore I should exit the web browser.
        Action: {Action.REQUEST_EXIT.value}

        Example 4 (loging form visible):

        Note: I am on the login page, I will type the given username and password.
        Action:
        [username_field](David)
        [password_field](edgerunners77)

        Remember, user asked:
        {user_prompt}
        You previously took these notes:
        {notes}
        Do not Step-by-Step explanation. Write comprehensive Notes or Error as a long paragraph followed by your action.
        You must always take notes.
        """

    async def llm_decide(self, prompt: str, show_reasoning: bool = False) -> Tuple[str, str]:
        animate_thinking("Thinking...", color="status")
        self.memory.push('user', prompt)
        answer, reasoning = await self.llm_request()
        if show_reasoning:
            pretty_print(reasoning, color="failure")
        pretty_print(answer, color="output")
        return answer, reasoning

    def select_unvisited(self, search_result: List[str]) -> List[str]:
        results_unvisited = []
        for res in search_result:
            if res["link"] not in self.search_history:
                results_unvisited.append(res)
        self.logger.info(f"Unvisited links: {results_unvisited}")
        return results_unvisited

    def jsonify_search_results(self, results_string: str) -> List[str]:
        result_blocks = results_string.split("\n\n")
        parsed_results = []
        for block in result_blocks:
            if not block.strip():
                continue
            lines = block.split("\n")
            result_dict = {}
            for line in lines:
                if line.startswith("Title:"):
                    result_dict["title"] = line.replace("Title:", "").strip()
                elif line.startswith("Snippet:"):
                    result_dict["snippet"] = line.replace("Snippet:", "").strip()
                elif line.startswith("Link:"):
                    result_dict["link"] = line.replace("Link:", "").strip()
            if result_dict:
                parsed_results.append(result_dict)
        return parsed_results

    def stringify_search_results(self, results_arr: List[str]) -> str:
        return '\n\n'.join([f"Link: {res['link']}\nPreview: {res['snippet']}" for res in results_arr])

    def parse_answer(self, text):
        lines = text.split('\n')
        saving = False
        buffer = []
        links = []
        for line in lines:
            if line == '' or 'action:' in line.lower():
                saving = False
            if "note" in line.lower():
                saving = True
            if saving:
                buffer.append(line.replace("notes:", ''))
            else:
                links.extend(self.extract_links(line))
        self.notes.append('. '.join(buffer).strip())
        return links

    def select_link(self, links: List[str]) -> str | None:
        for lk in links:
            if lk == self.current_page:
                self.logger.info(f"Already visited {lk}. Skipping.")
                continue
            self.logger.info(f"Selected link: {lk}")
            return lk
        self.logger.warning("No link selected.")
        return None

    def get_page_text(self, compression = False) -> str:
        """Get the text content of the current page."""
        page_text = self.browser.get_text()
        if compression:
            #page_text = self.memory.compress_text_to_max_ctx(page_text)
            page_text = self.memory.trim_text_to_max_ctx(page_text)
        return page_text

    def conclude_prompt(self, user_query: str) -> str:
        annotated_notes = [f"{i+1}: {note.lower()}" for i, note in enumerate(self.notes)]
        search_note = '\n'.join(annotated_notes)
        pretty_print(f"AI notes:\n{search_note}", color="success")
        return f"""
        Following a human request:
        {user_query}
        A web browsing AI made the following finding across different pages:
        {search_note}

        Expand on the finding or step that lead to success, and provide a conclusion that answer the request. Include link when possible.
        Do not give advices or try to answer the human. Just structure the AI finding in a structured and clear way.
        You should answer in the same language as the user.
        """

    def search_prompt(self, user_prompt: str) -> str:
        return f"""
        Current date: {self.date}
        Make a efficient search engine query to help users with their request:
        {user_prompt}
        Example:
        User: "go to twitter, login with username toto and password pass79 to my twitter and say hello everyone "
        You: search: Twitter login page.

        User: "I need info on the best laptops for AI this year."
        You: "search: best laptops 2025 to run Machine Learning model, reviews"

        User: "Search for recent news about space missions."
        You: "search: Recent space missions news, {self.date}"

        Do not explain, do not write anything beside the search query.
        Except if query does not make any sense for a web search then explain why and say {Action.REQUEST_EXIT.value}
        Do not try to answer query. you can only formulate search term or exit.
        """

    def handle_update_prompt(self, user_prompt: str, page_text: str, fill_success: bool) -> str:
        prompt = f"""
        You are a web browser.
        You just filled a form on the page.
        Now you should see the result of the form submission on the page:
        Page text:
        {page_text}
        The user asked: {user_prompt}
        Does the page answer the user’s query now? Are you still on a login page or did you get redirected?
        If it does, take notes of the useful information, write down result and say {Action.FORM_FILLED.value}.
        if it doesn’t, say: Error: Attempt to fill form didn't work {Action.GO_BACK.value}.
        If you were previously on a login form, no need to take notes.
        """
        if not fill_success:
            prompt += f"""
            According to browser feedback, the form was not filled correctly. Is that so? you might consider other strategies.
            """
        return prompt

    def show_search_results(self, search_result: List[str]):
        pretty_print("\nSearch results:", color="output")
        for res in search_result:
            pretty_print(f"Title: {res['title']} - ", color="info", no_newline=True)
            pretty_print(f"Link: {res['link']}", color="status")

    def stuck_prompt(self, user_prompt: str, unvisited: List[str]) -> str:
        """
        Prompt for when the agent repeat itself, can happen when fail to extract a link.
        """
        prompt = self.make_newsearch_prompt(user_prompt, unvisited)
        prompt += f"""
        You previously said:
        {self.last_answer}
        You must consider other options. Choose other link.
        """
        return prompt

    async def process(self, user_prompt: str, speech_module: type) -> Tuple[str, str]:
        """
        Process the user prompt to conduct an autonomous web search.
        Start with a google search with searxng using web_search tool.
        Then enter a navigation logic to find the answer or conduct required actions.
        Args:
          user_prompt: The user's input query
          speech_module: Optional speech output module
        Returns:
            tuple containing the final answer and reasoning
        """
        complete = False

        animate_thinking(f"Thinking...", color="status")
        mem_begin_idx = self.memory.push('user', self.search_prompt(user_prompt))
        ai_prompt, reasoning = await self.llm_request()
        if Action.REQUEST_EXIT.value in ai_prompt:
            pretty_print(f"Web agent requested exit.\n{reasoning}\n\n{ai_prompt}", color="failure")
            return ai_prompt, ""
        animate_thinking(f"Searching...", color="status")
        self.status_message = "Searching..."
        search_result_raw = self.tools["web_search"].execute([ai_prompt], False)
        search_result = self.jsonify_search_results(search_result_raw)[:16]
        self.show_search_results(search_result)
        prompt = self.make_newsearch_prompt(user_prompt, search_result)
        unvisited = [None]
        while not complete and len(unvisited) > 0:

            self.memory.clear()
            unvisited = self.select_unvisited(search_result)
            answer, reasoning = await self.llm_decide(prompt, show_reasoning = False)
            if self.last_answer == answer:
                prompt = self.stuck_prompt(user_prompt, unvisited)
                continue
            self.last_answer = answer
            pretty_print('▂'*32, color="status")

            extracted_form = self.extract_form(answer)
            if len(extracted_form) > 0:
                self.status_message = "Filling web form..."
                pretty_print(f"Filling inputs form...", color="status")
                fill_success = self.browser.fill_form(extracted_form)
                page_text = self.get_page_text()
                answer = self.handle_update_prompt(user_prompt, page_text, fill_success)
                answer, reasoning = await self.llm_decide(prompt)

            if Action.FORM_FILLED.value in answer:
                pretty_print(f"Filled form. Handling page update.", color="status")
                page_text = self.get_page_text()
                self.navigable_links = self.browser.get_navigable()
                prompt = self.make_navigation_prompt(user_prompt, page_text)
                continue

            links = self.parse_answer(answer)
            link = self.select_link(links)
            if link == self.current_page:
                pretty_print(f"Already visited {link}. Search callback.", color="status")
                prompt = self.make_newsearch_prompt(user_prompt, unvisited)
                self.search_history.append(link)
                continue

            if Action.REQUEST_EXIT.value in answer:
                self.status_message = "Exiting web browser..."
                pretty_print(f"Agent requested exit.", color="status")
                complete = True
                break

            if (link == None and len(extracted_form) < 3) or Action.GO_BACK.value in answer or link in self.search_history:
                pretty_print(f"Going back to results. Still {len(unvisited)}", color="status")
                self.status_message = "Going back to search results..."
                prompt = self.make_newsearch_prompt(user_prompt, unvisited)
                self.search_history.append(link)
                self.current_page = link
                continue

            animate_thinking(f"Navigating to {link}", color="status")
            if speech_module: speech_module.speak(f"Navigating to {link}")
            nav_ok = self.browser.go_to(link)
            self.search_history.append(link)
            if not nav_ok:
                pretty_print(f"Failed to navigate to {link}.", color="failure")
                prompt = self.make_newsearch_prompt(user_prompt, unvisited)
                continue
            self.current_page = link
            page_text = self.get_page_text()
            self.navigable_links = self.browser.get_navigable()
            prompt = self.make_navigation_prompt(user_prompt, page_text)
            self.status_message = "Navigating..."
            self.browser.screenshot()

        pretty_print("Exited navigation, starting to summarize finding...", color="status")
        prompt = self.conclude_prompt(user_prompt)
        mem_last_idx = self.memory.push('user', prompt)
        self.status_message = "Summarizing findings..."
        answer, reasoning = await self.llm_request()
        pretty_print(answer, color="output")
        self.status_message = "Ready"
        self.last_answer = answer
        return answer, reasoning

if __name__ == "__main__":
    pass