mirror of
https://github.com/tcsenpai/agenticSeek.git
synced 2025-06-10 12:57:14 +00:00
fix : browser not handling properly web form
This commit is contained in:
parent
a3e95abfde
commit
b3efd09fb3
@ -6,7 +6,8 @@ from sources.agents.agent import Agent
|
|||||||
from sources.tools.searxSearch import searxSearch
|
from sources.tools.searxSearch import searxSearch
|
||||||
from sources.browser import Browser
|
from sources.browser import Browser
|
||||||
from datetime import date
|
from datetime import date
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple, Type, Dict, Tuple
|
||||||
|
|
||||||
|
|
||||||
class BrowserAgent(Agent):
|
class BrowserAgent(Agent):
|
||||||
def __init__(self, name, prompt_path, provider, verbose=False, browser=None):
|
def __init__(self, name, prompt_path, provider, verbose=False, browser=None):
|
||||||
@ -92,7 +93,7 @@ class BrowserAgent(Agent):
|
|||||||
Your task:
|
Your task:
|
||||||
1. Decide if the current page answers the user’s query: {user_prompt}
|
1. Decide if the current page answers the user’s query: {user_prompt}
|
||||||
- If it does, take notes of the useful information, write down source, link or reference, then move to a new page.
|
- If it does, take notes of the useful information, write down source, link or reference, then move to a new page.
|
||||||
- If it does and you are 100% certain that it provide a definive answer, say REQUEST_EXIT
|
- If it does and you completed use request, say REQUEST_EXIT
|
||||||
- If it doesn’t, say: Error: This page does not answer the user’s query then go back or navigate to another link.
|
- If it doesn’t, say: Error: This page does not answer the user’s query then go back or navigate to another link.
|
||||||
2. Navigate by either:
|
2. Navigate by either:
|
||||||
- Navigate to a navigation links (write the full URL, e.g., www.example.com/cats).
|
- Navigate to a navigation links (write the full URL, e.g., www.example.com/cats).
|
||||||
@ -100,7 +101,7 @@ class BrowserAgent(Agent):
|
|||||||
3. Fill forms on the page:
|
3. Fill forms on the page:
|
||||||
- If user give you informations that help you fill form, fill it.
|
- If user give you informations that help you fill form, fill it.
|
||||||
- If you don't know how to fill a form, leave it empty.
|
- If you don't know how to fill a form, leave it empty.
|
||||||
- You can fill a form using [form_name](value).
|
- You can fill a form using [form_name](value). Do not go back when you fill a form.
|
||||||
|
|
||||||
Recap of note taking:
|
Recap of note taking:
|
||||||
If useful -> Note: [Briefly summarize the key information or task you conducted.]
|
If useful -> Note: [Briefly summarize the key information or task you conducted.]
|
||||||
@ -125,8 +126,8 @@ class BrowserAgent(Agent):
|
|||||||
|
|
||||||
Example 4 (loging form visible):
|
Example 4 (loging form visible):
|
||||||
Note: I am on the login page, I should now type the given username and password.
|
Note: I am on the login page, I should now type the given username and password.
|
||||||
[form_name_1](David)
|
[username_field](David)
|
||||||
[form_name_2](edgerunners_2077)
|
[password_field](edgerunners77)
|
||||||
|
|
||||||
You see the following inputs forms:
|
You see the following inputs forms:
|
||||||
{inputs_form_text}
|
{inputs_form_text}
|
||||||
@ -143,8 +144,9 @@ class BrowserAgent(Agent):
|
|||||||
animate_thinking("Thinking...", color="status")
|
animate_thinking("Thinking...", color="status")
|
||||||
self.memory.push('user', prompt)
|
self.memory.push('user', prompt)
|
||||||
answer, reasoning = self.llm_request()
|
answer, reasoning = self.llm_request()
|
||||||
|
output = f"Answer: {answer}" if len(answer) > 16 else f"Action: {answer}\nReasoning: {reasoning}"
|
||||||
pretty_print("-"*100)
|
pretty_print("-"*100)
|
||||||
pretty_print(answer, color="output")
|
pretty_print(output, color="output")
|
||||||
pretty_print("-"*100)
|
pretty_print("-"*100)
|
||||||
return answer, reasoning
|
return answer, reasoning
|
||||||
|
|
||||||
@ -175,7 +177,7 @@ class BrowserAgent(Agent):
|
|||||||
return parsed_results
|
return parsed_results
|
||||||
|
|
||||||
def stringify_search_results(self, results_arr: List[str]) -> str:
|
def stringify_search_results(self, results_arr: List[str]) -> str:
|
||||||
return '\n\n'.join([f"Link: {res['link']}" for res in results_arr])
|
return '\n\n'.join([f"Link: {res['link']}\nPreview: {res['snippet']}" for res in results_arr])
|
||||||
|
|
||||||
def save_notes(self, text):
|
def save_notes(self, text):
|
||||||
lines = text.split('\n')
|
lines = text.split('\n')
|
||||||
@ -215,18 +217,49 @@ class BrowserAgent(Agent):
|
|||||||
If the query does not make any sense for a web search explain why and say REQUEST_EXIT
|
If the query does not make any sense for a web search explain why and say REQUEST_EXIT
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def process(self, user_prompt, speech_module) -> str:
|
def handle_update_prompt(self, user_prompt: str, page_text: str) -> str:
|
||||||
|
return f"""
|
||||||
|
You are a web browser.
|
||||||
|
You just filled a form on the page.
|
||||||
|
Now you should see the result of the form submission on the page:
|
||||||
|
Page text:
|
||||||
|
{page_text}
|
||||||
|
The user asked: {user_prompt}
|
||||||
|
Does the page answer the user’s query now?
|
||||||
|
If it does, take notes of the useful information, write down result and say FORM_FILLED.
|
||||||
|
If you were previously on a login form, no need to explain.
|
||||||
|
If it does and you completed user request, say REQUEST_EXIT
|
||||||
|
if it doesn’t, say: Error: This page does not answer the user’s query then GO_BACK.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def show_search_results(self, search_result: List[str]):
|
||||||
|
pretty_print("\nSearch results:", color="output")
|
||||||
|
for res in search_result:
|
||||||
|
pretty_print(f"Title: {res['title']} - Link: {res['link']}", color="output")
|
||||||
|
|
||||||
|
def process(self, user_prompt: str, speech_module: type) -> Tuple[str, str]:
|
||||||
|
"""
|
||||||
|
Process the user prompt to conduct an autonomous web search.
|
||||||
|
Start with a google search with searxng using web_search tool.
|
||||||
|
Then enter a navigation logic to find the answer or conduct required actions.
|
||||||
|
Args:
|
||||||
|
user_prompt: The user's input query
|
||||||
|
speech_module: Optional speech output module
|
||||||
|
Returns:
|
||||||
|
tuple containing the final answer and reasoning
|
||||||
|
"""
|
||||||
complete = False
|
complete = False
|
||||||
|
|
||||||
animate_thinking(f"Thinking...", color="status")
|
animate_thinking(f"Thinking...", color="status")
|
||||||
self.memory.push('user', self.search_prompt(user_prompt))
|
self.memory.push('user', self.search_prompt(user_prompt))
|
||||||
ai_prompt, _ = self.llm_request()
|
ai_prompt, _ = self.llm_request()
|
||||||
if "REQUEST_EXIT" in ai_prompt:
|
if "REQUEST_EXIT" in ai_prompt:
|
||||||
# request make no sense, maybe wrong agent was allocated?
|
pretty_print(f"{reasoning}\n{ai_prompt}", color="output")
|
||||||
return ai_prompt, ""
|
return ai_prompt, ""
|
||||||
animate_thinking(f"Searching...", color="status")
|
animate_thinking(f"Searching...", color="status")
|
||||||
search_result_raw = self.tools["web_search"].execute([ai_prompt], False)
|
search_result_raw = self.tools["web_search"].execute([ai_prompt], False)
|
||||||
search_result = self.jsonify_search_results(search_result_raw)[:12] # until futher improvement
|
search_result = self.jsonify_search_results(search_result_raw)[:12] # until futher improvement
|
||||||
|
self.show_search_results(search_result)
|
||||||
prompt = self.make_newsearch_prompt(user_prompt, search_result)
|
prompt = self.make_newsearch_prompt(user_prompt, search_result)
|
||||||
unvisited = [None]
|
unvisited = [None]
|
||||||
while not complete:
|
while not complete:
|
||||||
@ -236,7 +269,10 @@ class BrowserAgent(Agent):
|
|||||||
extracted_form = self.extract_form(answer)
|
extracted_form = self.extract_form(answer)
|
||||||
if len(extracted_form) > 0:
|
if len(extracted_form) > 0:
|
||||||
self.browser.fill_form_inputs(extracted_form)
|
self.browser.fill_form_inputs(extracted_form)
|
||||||
self.browser.find_and_click_submit()
|
self.browser.find_and_click_submission()
|
||||||
|
page_text = self.browser.get_text()
|
||||||
|
answer = self.handle_update_prompt(user_prompt, page_text)
|
||||||
|
answer, reasoning = self.llm_decide(prompt)
|
||||||
|
|
||||||
if "REQUEST_EXIT" in answer:
|
if "REQUEST_EXIT" in answer:
|
||||||
complete = True
|
complete = True
|
||||||
@ -246,6 +282,12 @@ class BrowserAgent(Agent):
|
|||||||
if len(unvisited) == 0:
|
if len(unvisited) == 0:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
if "FORM_FILLED" in answer:
|
||||||
|
page_text = self.browser.get_text()
|
||||||
|
self.navigable_links = self.browser.get_navigable()
|
||||||
|
prompt = self.make_navigation_prompt(user_prompt, page_text)
|
||||||
|
continue
|
||||||
|
|
||||||
if len(links) == 0 or "GO_BACK" in answer:
|
if len(links) == 0 or "GO_BACK" in answer:
|
||||||
unvisited = self.select_unvisited(search_result)
|
unvisited = self.select_unvisited(search_result)
|
||||||
prompt = self.make_newsearch_prompt(user_prompt, unvisited)
|
prompt = self.make_newsearch_prompt(user_prompt, unvisited)
|
||||||
|
@ -6,10 +6,9 @@ from selenium.webdriver.support.ui import WebDriverWait
|
|||||||
from selenium.webdriver.support import expected_conditions as EC
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
from selenium.common.exceptions import TimeoutException, WebDriverException
|
from selenium.common.exceptions import TimeoutException, WebDriverException
|
||||||
from selenium.webdriver.common.action_chains import ActionChains
|
from selenium.webdriver.common.action_chains import ActionChains
|
||||||
from selenium.webdriver.chrome.options import Options
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple, Type, Dict, Tuple
|
||||||
from fake_useragent import UserAgent
|
from fake_useragent import UserAgent
|
||||||
from selenium_stealth import stealth
|
from selenium_stealth import stealth
|
||||||
import undetected_chromedriver as uc
|
import undetected_chromedriver as uc
|
||||||
@ -126,13 +125,26 @@ class Browser:
|
|||||||
try:
|
try:
|
||||||
initial_handles = self.driver.window_handles
|
initial_handles = self.driver.window_handles
|
||||||
self.driver.get(url)
|
self.driver.get(url)
|
||||||
time.sleep(1)
|
wait = WebDriverWait(self.driver, timeout=30)
|
||||||
|
wait.until(
|
||||||
|
lambda driver: (
|
||||||
|
driver.execute_script("return document.readyState") == "complete" and
|
||||||
|
not any(keyword in driver.page_source.lower() for keyword in ["checking your browser", "verifying", "captcha"])
|
||||||
|
),
|
||||||
|
message="stuck on 'checking browser' or verification screen"
|
||||||
|
)
|
||||||
self.apply_web_safety()
|
self.apply_web_safety()
|
||||||
self.logger.info(f"Navigated to: {url}")
|
self.logger.info(f"Navigated to: {url}")
|
||||||
return True
|
return True
|
||||||
|
except TimeoutException as e:
|
||||||
|
self.logger.error(f"Timeout waiting for {url} to load: {str(e)}")
|
||||||
|
return False
|
||||||
except WebDriverException as e:
|
except WebDriverException as e:
|
||||||
self.logger.error(f"Error navigating to {url}: {str(e)}")
|
self.logger.error(f"Error navigating to {url}: {str(e)}")
|
||||||
return False
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Fatal error with go_to method on {url}:\n{str(e)}")
|
||||||
|
raise e
|
||||||
|
|
||||||
def is_sentence(self, text:str) -> bool:
|
def is_sentence(self, text:str) -> bool:
|
||||||
"""Check if the text qualifies as a meaningful sentence or contains important error codes."""
|
"""Check if the text qualifies as a meaningful sentence or contains important error codes."""
|
||||||
@ -199,7 +211,7 @@ class Browser:
|
|||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get_navigable(self) -> [str]:
|
def get_navigable(self) -> List[str]:
|
||||||
"""Get all navigable links on the current page."""
|
"""Get all navigable links on the current page."""
|
||||||
try:
|
try:
|
||||||
links = []
|
links = []
|
||||||
@ -301,13 +313,55 @@ class Browser:
|
|||||||
result.sort(key=lambda x: len(x[0]))
|
result.sort(key=lambda x: len(x[0]))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def find_and_click_submit(self, btn_type:str = 'login') -> None:
|
"""
|
||||||
|
def find_and_click_submission(self, btn_type:str = 'login') -> None:
|
||||||
buttons = self.get_buttons_xpath()
|
buttons = self.get_buttons_xpath()
|
||||||
if len(buttons) == 0:
|
if len(buttons) == 0:
|
||||||
self.logger.warning(f"No visible buttons found")
|
self.logger.warning(f"No visible buttons found")
|
||||||
for button in buttons:
|
for button in buttons:
|
||||||
if button[0] == btn_type:
|
if button[0] == btn_type:
|
||||||
self.click_element(button[1])
|
self.click_element(button[1])
|
||||||
|
"""
|
||||||
|
|
||||||
|
def find_and_click_submission(self, timeout: int = 10) -> bool:
|
||||||
|
possible_submissions = ["login", "submit", "register"]
|
||||||
|
for submission in possible_submissions:
|
||||||
|
if self.find_and_click_btn(submission, timeout):
|
||||||
|
return True
|
||||||
|
self.logger.warning("No submission button found")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def find_and_click_btn(self, btn_type: str = 'login', timeout: int = 10) -> bool:
|
||||||
|
"""
|
||||||
|
Find and click a submit button matching the specified type.
|
||||||
|
Args:
|
||||||
|
btn_type: The type of button to find (e.g., 'login', 'submit'), matched against button text.
|
||||||
|
timeout: Maximum time (in seconds) to wait for the button to appear.
|
||||||
|
Returns:
|
||||||
|
bool: True if the button was found and clicked, False otherwise.
|
||||||
|
"""
|
||||||
|
buttons = self.get_buttons_xpath()
|
||||||
|
if not buttons:
|
||||||
|
self.logger.warning("No visible buttons found")
|
||||||
|
return False
|
||||||
|
|
||||||
|
for button_text, xpath in buttons:
|
||||||
|
if btn_type.lower() in button_text.lower():
|
||||||
|
try:
|
||||||
|
wait = WebDriverWait(self.driver, timeout)
|
||||||
|
element = wait.until(
|
||||||
|
EC.element_to_be_clickable((By.XPATH, xpath)),
|
||||||
|
message=f"Button with XPath '{xpath}' not clickable within {timeout} seconds"
|
||||||
|
)
|
||||||
|
if self.click_element(xpath):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
except TimeoutException:
|
||||||
|
self.logger.warning(f"Timeout waiting for '{button_text}' button at XPath: {xpath}")
|
||||||
|
return False
|
||||||
|
self.logger.warning(f"No button matching '{btn_type}' found")
|
||||||
|
return False
|
||||||
|
|
||||||
def find_input_xpath_by_name(self, inputs, name: str) -> str | None:
|
def find_input_xpath_by_name(self, inputs, name: str) -> str | None:
|
||||||
for field in inputs:
|
for field in inputs:
|
||||||
@ -315,8 +369,11 @@ class Browser:
|
|||||||
return field["xpath"]
|
return field["xpath"]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def fill_form_inputs(self, input_list:[str]) -> bool:
|
def fill_form_inputs(self, input_list: List[str]) -> bool:
|
||||||
"""Fill form inputs based on a list of [name](value) strings."""
|
"""Fill form inputs based on a list of [name](value) strings."""
|
||||||
|
if not isinstance(input_list, list):
|
||||||
|
self.logger.error("input_list must be a list")
|
||||||
|
return False
|
||||||
inputs = self.find_all_inputs()
|
inputs = self.find_all_inputs()
|
||||||
try:
|
try:
|
||||||
for input_str in input_list:
|
for input_str in input_list:
|
||||||
@ -389,7 +446,7 @@ if __name__ == "__main__":
|
|||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
driver = create_driver()
|
driver = create_driver()
|
||||||
browser = Browser(driver)
|
browser = Browser(driver, anticaptcha_manual_install=True)
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
||||||
print("AntiCaptcha Test")
|
print("AntiCaptcha Test")
|
||||||
@ -400,4 +457,5 @@ if __name__ == "__main__":
|
|||||||
inputs = browser.get_form_inputs()
|
inputs = browser.get_form_inputs()
|
||||||
inputs = ['[username](student)', f'[password](Password123)', '[appOtp]()', '[backupOtp]()']
|
inputs = ['[username](student)', f'[password](Password123)', '[appOtp]()', '[backupOtp]()']
|
||||||
browser.fill_form_inputs(inputs)
|
browser.fill_form_inputs(inputs)
|
||||||
browser.find_and_click_submit()
|
browser.find_and_click_submission()
|
||||||
|
time.sleep(30)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user