feat : better numerical value handling on webpage

2025-07-24 10:20:13 +00:00 · 2025-04-04 11:56:11 +02:00 · 2025-04-04 11:56:11 +02:00 · ff9c1576b6
commit ff9c1576b6
parent 4f7e30b498
2 changed files with 16 additions and 11 deletions
--- a/sources/agents/browser_agent.py
+++ b/sources/agents/browser_agent.py
@ -90,7 +90,7 @@ class BrowserAgent(Agent):
        {remaining_links_text}
        Your task:
-        1. Decide if the current page answers the user’s query: {user_prompt}
+        1. Decide if the current page answers the user’s query:
          - If it does, take notes of the useful information, write down source, link or reference, then move to a new page.
          - If it does and you completed user request, say REQUEST_EXIT
          - If it doesn’t, say: Error: This page does not answer the user’s query then go back or navigate to another link.
@ -120,7 +120,7 @@ class BrowserAgent(Agent):
        GO_BACK
        Example 3 (query answer found):
-        Note: I found on github.com that agenticSeek is Fosowl.
+        Note: I found on github.com that agenticSeek is made by Fosowl.
        Given this information, given this I should exit the web browser. REQUEST_EXIT
        Example 4 (loging form visible):
@ -131,7 +131,8 @@ class BrowserAgent(Agent):
        You see the following inputs forms:
        {inputs_form_text}
-        Remember, the user asked: {user_prompt}
+        Remember, the user asked:
        {user_prompt}
        So far you took these notes:
        {notes}
        You are currently on page : {self.current_page}
--- a/sources/browser.py
+++ b/sources/browser.py
@ -22,7 +22,10 @@ import logging
 import sys
 import re
-from sources.utility import pretty_print, animate_thinking
+if __name__ == "__main__":
    from utility import pretty_print, animate_thinking
 else:
    from sources.utility import pretty_print, animate_thinking
 logging.basicConfig(filename='browser.log', level=logging.ERROR, 
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
@ -155,13 +158,12 @@ class Browser:
        """Check if the text qualifies as a meaningful sentence or contains important error codes."""
        text = text.strip()
-        error_codes = ["404", "403", "500", "502", "503"]
+        if any(c.isdigit() for c in text):
        if any(code in text for code in error_codes):
            return True
        words = re.findall(r'\w+', text, re.UNICODE)
        word_count = len(words)
        has_punctuation = any(text.endswith(p) for p in ['.', '，', ',', '!', '?', '。', '！', '？', '।', '۔'])
-        is_long_enough = word_count > 5
+        is_long_enough = word_count > 4
        return (word_count >= 5 and (has_punctuation or is_long_enough))
    def get_text(self) -> str | None:
@ -173,9 +175,8 @@ class Browser:
                element.decompose()
            text = soup.get_text()
-            lines = (line.strip() for line in text.splitlines())
+            lines = (f"{line.strip()}\n" for line in text.splitlines())
-            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+            text = "\n".join(chunk for chunk in lines if chunk and self.is_sentence(chunk))
            text = "\n".join(chunk for chunk in chunks if chunk and self.is_sentence(chunk))
            text = text[:4096]
            #markdown_text = markdownify.markdownify(text, heading_style="ATX")
            return "[Start of page]\n" + text + "\n[End of page]"
@ -448,6 +449,9 @@ if __name__ == "__main__":
    browser = Browser(driver, anticaptcha_manual_install=True)
    time.sleep(10)
    #browser.go_to("https://coinmarketcap.com/")
    #txt = browser.get_text()
    #print(txt)
    print("AntiCaptcha / Form Test")
    browser.go_to("https://www.google.com/recaptcha/api2/demo")
    #browser.go_to("https://practicetestautomation.com/practice-test-login/")
@ -456,4 +460,4 @@ if __name__ == "__main__":
    inputs = ['[input1](Martin)', f'[input2](Test)', '[input3](test@gmail.com)']
    browser.fill_form_inputs(inputs)
    browser.find_and_click_submission()
-    time.sleep(30)
+    time.sleep(10)