mirror of
https://github.com/tcsenpai/agenticSeek.git
synced 2025-06-06 11:05:26 +00:00
fix : browser not handling properly web form
This commit is contained in:
parent
a3e95abfde
commit
b3efd09fb3
@ -6,7 +6,8 @@ from sources.agents.agent import Agent
|
||||
from sources.tools.searxSearch import searxSearch
|
||||
from sources.browser import Browser
|
||||
from datetime import date
|
||||
from typing import List, Tuple
|
||||
from typing import List, Tuple, Type, Dict, Tuple
|
||||
|
||||
|
||||
class BrowserAgent(Agent):
|
||||
def __init__(self, name, prompt_path, provider, verbose=False, browser=None):
|
||||
@ -92,7 +93,7 @@ class BrowserAgent(Agent):
|
||||
Your task:
|
||||
1. Decide if the current page answers the user’s query: {user_prompt}
|
||||
- If it does, take notes of the useful information, write down source, link or reference, then move to a new page.
|
||||
- If it does and you are 100% certain that it provide a definive answer, say REQUEST_EXIT
|
||||
- If it does and you completed use request, say REQUEST_EXIT
|
||||
- If it doesn’t, say: Error: This page does not answer the user’s query then go back or navigate to another link.
|
||||
2. Navigate by either:
|
||||
- Navigate to a navigation links (write the full URL, e.g., www.example.com/cats).
|
||||
@ -100,7 +101,7 @@ class BrowserAgent(Agent):
|
||||
3. Fill forms on the page:
|
||||
- If user give you informations that help you fill form, fill it.
|
||||
- If you don't know how to fill a form, leave it empty.
|
||||
- You can fill a form using [form_name](value).
|
||||
- You can fill a form using [form_name](value). Do not go back when you fill a form.
|
||||
|
||||
Recap of note taking:
|
||||
If useful -> Note: [Briefly summarize the key information or task you conducted.]
|
||||
@ -125,8 +126,8 @@ class BrowserAgent(Agent):
|
||||
|
||||
Example 4 (loging form visible):
|
||||
Note: I am on the login page, I should now type the given username and password.
|
||||
[form_name_1](David)
|
||||
[form_name_2](edgerunners_2077)
|
||||
[username_field](David)
|
||||
[password_field](edgerunners77)
|
||||
|
||||
You see the following inputs forms:
|
||||
{inputs_form_text}
|
||||
@ -143,8 +144,9 @@ class BrowserAgent(Agent):
|
||||
animate_thinking("Thinking...", color="status")
|
||||
self.memory.push('user', prompt)
|
||||
answer, reasoning = self.llm_request()
|
||||
output = f"Answer: {answer}" if len(answer) > 16 else f"Action: {answer}\nReasoning: {reasoning}"
|
||||
pretty_print("-"*100)
|
||||
pretty_print(answer, color="output")
|
||||
pretty_print(output, color="output")
|
||||
pretty_print("-"*100)
|
||||
return answer, reasoning
|
||||
|
||||
@ -175,7 +177,7 @@ class BrowserAgent(Agent):
|
||||
return parsed_results
|
||||
|
||||
def stringify_search_results(self, results_arr: List[str]) -> str:
|
||||
return '\n\n'.join([f"Link: {res['link']}" for res in results_arr])
|
||||
return '\n\n'.join([f"Link: {res['link']}\nPreview: {res['snippet']}" for res in results_arr])
|
||||
|
||||
def save_notes(self, text):
|
||||
lines = text.split('\n')
|
||||
@ -214,19 +216,50 @@ class BrowserAgent(Agent):
|
||||
Do not explain, do not write anything beside the search query.
|
||||
If the query does not make any sense for a web search explain why and say REQUEST_EXIT
|
||||
"""
|
||||
|
||||
def handle_update_prompt(self, user_prompt: str, page_text: str) -> str:
|
||||
return f"""
|
||||
You are a web browser.
|
||||
You just filled a form on the page.
|
||||
Now you should see the result of the form submission on the page:
|
||||
Page text:
|
||||
{page_text}
|
||||
The user asked: {user_prompt}
|
||||
Does the page answer the user’s query now?
|
||||
If it does, take notes of the useful information, write down result and say FORM_FILLED.
|
||||
If you were previously on a login form, no need to explain.
|
||||
If it does and you completed user request, say REQUEST_EXIT
|
||||
if it doesn’t, say: Error: This page does not answer the user’s query then GO_BACK.
|
||||
"""
|
||||
|
||||
def show_search_results(self, search_result: List[str]):
|
||||
pretty_print("\nSearch results:", color="output")
|
||||
for res in search_result:
|
||||
pretty_print(f"Title: {res['title']} - Link: {res['link']}", color="output")
|
||||
|
||||
def process(self, user_prompt, speech_module) -> str:
|
||||
def process(self, user_prompt: str, speech_module: type) -> Tuple[str, str]:
|
||||
"""
|
||||
Process the user prompt to conduct an autonomous web search.
|
||||
Start with a google search with searxng using web_search tool.
|
||||
Then enter a navigation logic to find the answer or conduct required actions.
|
||||
Args:
|
||||
user_prompt: The user's input query
|
||||
speech_module: Optional speech output module
|
||||
Returns:
|
||||
tuple containing the final answer and reasoning
|
||||
"""
|
||||
complete = False
|
||||
|
||||
animate_thinking(f"Thinking...", color="status")
|
||||
self.memory.push('user', self.search_prompt(user_prompt))
|
||||
ai_prompt, _ = self.llm_request()
|
||||
if "REQUEST_EXIT" in ai_prompt:
|
||||
# request make no sense, maybe wrong agent was allocated?
|
||||
pretty_print(f"{reasoning}\n{ai_prompt}", color="output")
|
||||
return ai_prompt, ""
|
||||
animate_thinking(f"Searching...", color="status")
|
||||
search_result_raw = self.tools["web_search"].execute([ai_prompt], False)
|
||||
search_result = self.jsonify_search_results(search_result_raw)[:12] # until futher improvement
|
||||
self.show_search_results(search_result)
|
||||
prompt = self.make_newsearch_prompt(user_prompt, search_result)
|
||||
unvisited = [None]
|
||||
while not complete:
|
||||
@ -236,7 +269,10 @@ class BrowserAgent(Agent):
|
||||
extracted_form = self.extract_form(answer)
|
||||
if len(extracted_form) > 0:
|
||||
self.browser.fill_form_inputs(extracted_form)
|
||||
self.browser.find_and_click_submit()
|
||||
self.browser.find_and_click_submission()
|
||||
page_text = self.browser.get_text()
|
||||
answer = self.handle_update_prompt(user_prompt, page_text)
|
||||
answer, reasoning = self.llm_decide(prompt)
|
||||
|
||||
if "REQUEST_EXIT" in answer:
|
||||
complete = True
|
||||
@ -246,6 +282,12 @@ class BrowserAgent(Agent):
|
||||
if len(unvisited) == 0:
|
||||
break
|
||||
|
||||
if "FORM_FILLED" in answer:
|
||||
page_text = self.browser.get_text()
|
||||
self.navigable_links = self.browser.get_navigable()
|
||||
prompt = self.make_navigation_prompt(user_prompt, page_text)
|
||||
continue
|
||||
|
||||
if len(links) == 0 or "GO_BACK" in answer:
|
||||
unvisited = self.select_unvisited(search_result)
|
||||
prompt = self.make_newsearch_prompt(user_prompt, unvisited)
|
||||
|
@ -6,10 +6,9 @@ from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.common.exceptions import TimeoutException, WebDriverException
|
||||
from selenium.webdriver.common.action_chains import ActionChains
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urlparse
|
||||
from typing import List, Tuple
|
||||
from typing import List, Tuple, Type, Dict, Tuple
|
||||
from fake_useragent import UserAgent
|
||||
from selenium_stealth import stealth
|
||||
import undetected_chromedriver as uc
|
||||
@ -126,13 +125,26 @@ class Browser:
|
||||
try:
|
||||
initial_handles = self.driver.window_handles
|
||||
self.driver.get(url)
|
||||
time.sleep(1)
|
||||
wait = WebDriverWait(self.driver, timeout=30)
|
||||
wait.until(
|
||||
lambda driver: (
|
||||
driver.execute_script("return document.readyState") == "complete" and
|
||||
not any(keyword in driver.page_source.lower() for keyword in ["checking your browser", "verifying", "captcha"])
|
||||
),
|
||||
message="stuck on 'checking browser' or verification screen"
|
||||
)
|
||||
self.apply_web_safety()
|
||||
self.logger.info(f"Navigated to: {url}")
|
||||
return True
|
||||
except TimeoutException as e:
|
||||
self.logger.error(f"Timeout waiting for {url} to load: {str(e)}")
|
||||
return False
|
||||
except WebDriverException as e:
|
||||
self.logger.error(f"Error navigating to {url}: {str(e)}")
|
||||
return False
|
||||
except Exception as e:
|
||||
self.logger.error(f"Fatal error with go_to method on {url}:\n{str(e)}")
|
||||
raise e
|
||||
|
||||
def is_sentence(self, text:str) -> bool:
|
||||
"""Check if the text qualifies as a meaningful sentence or contains important error codes."""
|
||||
@ -199,7 +211,7 @@ class Browser:
|
||||
return False
|
||||
return True
|
||||
|
||||
def get_navigable(self) -> [str]:
|
||||
def get_navigable(self) -> List[str]:
|
||||
"""Get all navigable links on the current page."""
|
||||
try:
|
||||
links = []
|
||||
@ -301,13 +313,55 @@ class Browser:
|
||||
result.sort(key=lambda x: len(x[0]))
|
||||
return result
|
||||
|
||||
def find_and_click_submit(self, btn_type:str = 'login') -> None:
|
||||
"""
|
||||
def find_and_click_submission(self, btn_type:str = 'login') -> None:
|
||||
buttons = self.get_buttons_xpath()
|
||||
if len(buttons) == 0:
|
||||
self.logger.warning(f"No visible buttons found")
|
||||
for button in buttons:
|
||||
if button[0] == btn_type:
|
||||
self.click_element(button[1])
|
||||
"""
|
||||
|
||||
def find_and_click_submission(self, timeout: int = 10) -> bool:
|
||||
possible_submissions = ["login", "submit", "register"]
|
||||
for submission in possible_submissions:
|
||||
if self.find_and_click_btn(submission, timeout):
|
||||
return True
|
||||
self.logger.warning("No submission button found")
|
||||
return False
|
||||
|
||||
def find_and_click_btn(self, btn_type: str = 'login', timeout: int = 10) -> bool:
|
||||
"""
|
||||
Find and click a submit button matching the specified type.
|
||||
Args:
|
||||
btn_type: The type of button to find (e.g., 'login', 'submit'), matched against button text.
|
||||
timeout: Maximum time (in seconds) to wait for the button to appear.
|
||||
Returns:
|
||||
bool: True if the button was found and clicked, False otherwise.
|
||||
"""
|
||||
buttons = self.get_buttons_xpath()
|
||||
if not buttons:
|
||||
self.logger.warning("No visible buttons found")
|
||||
return False
|
||||
|
||||
for button_text, xpath in buttons:
|
||||
if btn_type.lower() in button_text.lower():
|
||||
try:
|
||||
wait = WebDriverWait(self.driver, timeout)
|
||||
element = wait.until(
|
||||
EC.element_to_be_clickable((By.XPATH, xpath)),
|
||||
message=f"Button with XPath '{xpath}' not clickable within {timeout} seconds"
|
||||
)
|
||||
if self.click_element(xpath):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
except TimeoutException:
|
||||
self.logger.warning(f"Timeout waiting for '{button_text}' button at XPath: {xpath}")
|
||||
return False
|
||||
self.logger.warning(f"No button matching '{btn_type}' found")
|
||||
return False
|
||||
|
||||
def find_input_xpath_by_name(self, inputs, name: str) -> str | None:
|
||||
for field in inputs:
|
||||
@ -315,8 +369,11 @@ class Browser:
|
||||
return field["xpath"]
|
||||
return None
|
||||
|
||||
def fill_form_inputs(self, input_list:[str]) -> bool:
|
||||
def fill_form_inputs(self, input_list: List[str]) -> bool:
|
||||
"""Fill form inputs based on a list of [name](value) strings."""
|
||||
if not isinstance(input_list, list):
|
||||
self.logger.error("input_list must be a list")
|
||||
return False
|
||||
inputs = self.find_all_inputs()
|
||||
try:
|
||||
for input_str in input_list:
|
||||
@ -389,7 +446,7 @@ if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
driver = create_driver()
|
||||
browser = Browser(driver)
|
||||
browser = Browser(driver, anticaptcha_manual_install=True)
|
||||
time.sleep(10)
|
||||
|
||||
print("AntiCaptcha Test")
|
||||
@ -400,4 +457,5 @@ if __name__ == "__main__":
|
||||
inputs = browser.get_form_inputs()
|
||||
inputs = ['[username](student)', f'[password](Password123)', '[appOtp]()', '[backupOtp]()']
|
||||
browser.fill_form_inputs(inputs)
|
||||
browser.find_and_click_submit()
|
||||
browser.find_and_click_submission()
|
||||
time.sleep(30)
|
||||
|
Loading…
x
Reference in New Issue
Block a user