mirror of
https://github.com/tcsenpai/agenticSeek.git
synced 2025-06-06 19:15:28 +00:00
feat : better numerical value handling on webpage
This commit is contained in:
parent
4f7e30b498
commit
ff9c1576b6
@ -90,7 +90,7 @@ class BrowserAgent(Agent):
|
|||||||
{remaining_links_text}
|
{remaining_links_text}
|
||||||
|
|
||||||
Your task:
|
Your task:
|
||||||
1. Decide if the current page answers the user’s query: {user_prompt}
|
1. Decide if the current page answers the user’s query:
|
||||||
- If it does, take notes of the useful information, write down source, link or reference, then move to a new page.
|
- If it does, take notes of the useful information, write down source, link or reference, then move to a new page.
|
||||||
- If it does and you completed user request, say REQUEST_EXIT
|
- If it does and you completed user request, say REQUEST_EXIT
|
||||||
- If it doesn’t, say: Error: This page does not answer the user’s query then go back or navigate to another link.
|
- If it doesn’t, say: Error: This page does not answer the user’s query then go back or navigate to another link.
|
||||||
@ -120,7 +120,7 @@ class BrowserAgent(Agent):
|
|||||||
GO_BACK
|
GO_BACK
|
||||||
|
|
||||||
Example 3 (query answer found):
|
Example 3 (query answer found):
|
||||||
Note: I found on github.com that agenticSeek is Fosowl.
|
Note: I found on github.com that agenticSeek is made by Fosowl.
|
||||||
Given this information, given this I should exit the web browser. REQUEST_EXIT
|
Given this information, given this I should exit the web browser. REQUEST_EXIT
|
||||||
|
|
||||||
Example 4 (loging form visible):
|
Example 4 (loging form visible):
|
||||||
@ -131,7 +131,8 @@ class BrowserAgent(Agent):
|
|||||||
You see the following inputs forms:
|
You see the following inputs forms:
|
||||||
{inputs_form_text}
|
{inputs_form_text}
|
||||||
|
|
||||||
Remember, the user asked: {user_prompt}
|
Remember, the user asked:
|
||||||
|
{user_prompt}
|
||||||
So far you took these notes:
|
So far you took these notes:
|
||||||
{notes}
|
{notes}
|
||||||
You are currently on page : {self.current_page}
|
You are currently on page : {self.current_page}
|
||||||
|
@ -22,7 +22,10 @@ import logging
|
|||||||
import sys
|
import sys
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from sources.utility import pretty_print, animate_thinking
|
if __name__ == "__main__":
|
||||||
|
from utility import pretty_print, animate_thinking
|
||||||
|
else:
|
||||||
|
from sources.utility import pretty_print, animate_thinking
|
||||||
|
|
||||||
logging.basicConfig(filename='browser.log', level=logging.ERROR,
|
logging.basicConfig(filename='browser.log', level=logging.ERROR,
|
||||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||||
@ -155,13 +158,12 @@ class Browser:
|
|||||||
"""Check if the text qualifies as a meaningful sentence or contains important error codes."""
|
"""Check if the text qualifies as a meaningful sentence or contains important error codes."""
|
||||||
text = text.strip()
|
text = text.strip()
|
||||||
|
|
||||||
error_codes = ["404", "403", "500", "502", "503"]
|
if any(c.isdigit() for c in text):
|
||||||
if any(code in text for code in error_codes):
|
|
||||||
return True
|
return True
|
||||||
words = re.findall(r'\w+', text, re.UNICODE)
|
words = re.findall(r'\w+', text, re.UNICODE)
|
||||||
word_count = len(words)
|
word_count = len(words)
|
||||||
has_punctuation = any(text.endswith(p) for p in ['.', ',', ',', '!', '?', '。', '!', '?', '।', '۔'])
|
has_punctuation = any(text.endswith(p) for p in ['.', ',', ',', '!', '?', '。', '!', '?', '।', '۔'])
|
||||||
is_long_enough = word_count > 5
|
is_long_enough = word_count > 4
|
||||||
return (word_count >= 5 and (has_punctuation or is_long_enough))
|
return (word_count >= 5 and (has_punctuation or is_long_enough))
|
||||||
|
|
||||||
def get_text(self) -> str | None:
|
def get_text(self) -> str | None:
|
||||||
@ -173,9 +175,8 @@ class Browser:
|
|||||||
element.decompose()
|
element.decompose()
|
||||||
|
|
||||||
text = soup.get_text()
|
text = soup.get_text()
|
||||||
lines = (line.strip() for line in text.splitlines())
|
lines = (f"{line.strip()}\n" for line in text.splitlines())
|
||||||
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
text = "\n".join(chunk for chunk in lines if chunk and self.is_sentence(chunk))
|
||||||
text = "\n".join(chunk for chunk in chunks if chunk and self.is_sentence(chunk))
|
|
||||||
text = text[:4096]
|
text = text[:4096]
|
||||||
#markdown_text = markdownify.markdownify(text, heading_style="ATX")
|
#markdown_text = markdownify.markdownify(text, heading_style="ATX")
|
||||||
return "[Start of page]\n" + text + "\n[End of page]"
|
return "[Start of page]\n" + text + "\n[End of page]"
|
||||||
@ -448,6 +449,9 @@ if __name__ == "__main__":
|
|||||||
browser = Browser(driver, anticaptcha_manual_install=True)
|
browser = Browser(driver, anticaptcha_manual_install=True)
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
||||||
|
#browser.go_to("https://coinmarketcap.com/")
|
||||||
|
#txt = browser.get_text()
|
||||||
|
#print(txt)
|
||||||
print("AntiCaptcha / Form Test")
|
print("AntiCaptcha / Form Test")
|
||||||
browser.go_to("https://www.google.com/recaptcha/api2/demo")
|
browser.go_to("https://www.google.com/recaptcha/api2/demo")
|
||||||
#browser.go_to("https://practicetestautomation.com/practice-test-login/")
|
#browser.go_to("https://practicetestautomation.com/practice-test-login/")
|
||||||
@ -456,4 +460,4 @@ if __name__ == "__main__":
|
|||||||
inputs = ['[input1](Martin)', f'[input2](Test)', '[input3](test@gmail.com)']
|
inputs = ['[input1](Martin)', f'[input2](Test)', '[input3](test@gmail.com)']
|
||||||
browser.fill_form_inputs(inputs)
|
browser.fill_form_inputs(inputs)
|
||||||
browser.find_and_click_submission()
|
browser.find_and_click_submission()
|
||||||
time.sleep(30)
|
time.sleep(10)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user