mirror of
https://github.com/tcsenpai/agenticSeek.git
synced 2025-06-06 11:05:26 +00:00
feat : better numerical value handling on webpage
This commit is contained in:
parent
4f7e30b498
commit
ff9c1576b6
@ -90,7 +90,7 @@ class BrowserAgent(Agent):
|
||||
{remaining_links_text}
|
||||
|
||||
Your task:
|
||||
1. Decide if the current page answers the user’s query: {user_prompt}
|
||||
1. Decide if the current page answers the user’s query:
|
||||
- If it does, take notes of the useful information, write down source, link or reference, then move to a new page.
|
||||
- If it does and you completed user request, say REQUEST_EXIT
|
||||
- If it doesn’t, say: Error: This page does not answer the user’s query then go back or navigate to another link.
|
||||
@ -120,7 +120,7 @@ class BrowserAgent(Agent):
|
||||
GO_BACK
|
||||
|
||||
Example 3 (query answer found):
|
||||
Note: I found on github.com that agenticSeek is Fosowl.
|
||||
Note: I found on github.com that agenticSeek is made by Fosowl.
|
||||
Given this information, given this I should exit the web browser. REQUEST_EXIT
|
||||
|
||||
Example 4 (loging form visible):
|
||||
@ -131,7 +131,8 @@ class BrowserAgent(Agent):
|
||||
You see the following inputs forms:
|
||||
{inputs_form_text}
|
||||
|
||||
Remember, the user asked: {user_prompt}
|
||||
Remember, the user asked:
|
||||
{user_prompt}
|
||||
So far you took these notes:
|
||||
{notes}
|
||||
You are currently on page : {self.current_page}
|
||||
|
@ -22,7 +22,10 @@ import logging
|
||||
import sys
|
||||
import re
|
||||
|
||||
from sources.utility import pretty_print, animate_thinking
|
||||
if __name__ == "__main__":
|
||||
from utility import pretty_print, animate_thinking
|
||||
else:
|
||||
from sources.utility import pretty_print, animate_thinking
|
||||
|
||||
logging.basicConfig(filename='browser.log', level=logging.ERROR,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
@ -155,13 +158,12 @@ class Browser:
|
||||
"""Check if the text qualifies as a meaningful sentence or contains important error codes."""
|
||||
text = text.strip()
|
||||
|
||||
error_codes = ["404", "403", "500", "502", "503"]
|
||||
if any(code in text for code in error_codes):
|
||||
if any(c.isdigit() for c in text):
|
||||
return True
|
||||
words = re.findall(r'\w+', text, re.UNICODE)
|
||||
word_count = len(words)
|
||||
has_punctuation = any(text.endswith(p) for p in ['.', ',', ',', '!', '?', '。', '!', '?', '।', '۔'])
|
||||
is_long_enough = word_count > 5
|
||||
is_long_enough = word_count > 4
|
||||
return (word_count >= 5 and (has_punctuation or is_long_enough))
|
||||
|
||||
def get_text(self) -> str | None:
|
||||
@ -173,9 +175,8 @@ class Browser:
|
||||
element.decompose()
|
||||
|
||||
text = soup.get_text()
|
||||
lines = (line.strip() for line in text.splitlines())
|
||||
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
||||
text = "\n".join(chunk for chunk in chunks if chunk and self.is_sentence(chunk))
|
||||
lines = (f"{line.strip()}\n" for line in text.splitlines())
|
||||
text = "\n".join(chunk for chunk in lines if chunk and self.is_sentence(chunk))
|
||||
text = text[:4096]
|
||||
#markdown_text = markdownify.markdownify(text, heading_style="ATX")
|
||||
return "[Start of page]\n" + text + "\n[End of page]"
|
||||
@ -448,6 +449,9 @@ if __name__ == "__main__":
|
||||
browser = Browser(driver, anticaptcha_manual_install=True)
|
||||
time.sleep(10)
|
||||
|
||||
#browser.go_to("https://coinmarketcap.com/")
|
||||
#txt = browser.get_text()
|
||||
#print(txt)
|
||||
print("AntiCaptcha / Form Test")
|
||||
browser.go_to("https://www.google.com/recaptcha/api2/demo")
|
||||
#browser.go_to("https://practicetestautomation.com/practice-test-login/")
|
||||
@ -456,4 +460,4 @@ if __name__ == "__main__":
|
||||
inputs = ['[input1](Martin)', f'[input2](Test)', '[input3](test@gmail.com)']
|
||||
browser.fill_form_inputs(inputs)
|
||||
browser.find_and_click_submission()
|
||||
time.sleep(30)
|
||||
time.sleep(10)
|
||||
|
Loading…
x
Reference in New Issue
Block a user