mirror of
https://github.com/tcsenpai/agenticSeek.git
synced 2025-06-03 01:30:11 +00:00
soft enforced chromedriver path for python-env usage
This commit is contained in:
parent
365a552726
commit
d6ec56b5df
@ -27,53 +27,84 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from sources.utility import pretty_print, animate_thinking
|
||||
from sources.logger import Logger
|
||||
|
||||
|
||||
def get_chrome_path() -> str:
|
||||
"""Get the path to the Chrome executable."""
|
||||
if sys.platform.startswith("win"):
|
||||
paths = [
|
||||
"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
|
||||
"C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
|
||||
os.path.join(os.environ.get("LOCALAPPDATA", ""), "Google\\Chrome\\Application\\chrome.exe") # User install
|
||||
os.path.join(
|
||||
os.environ.get("LOCALAPPDATA", ""),
|
||||
"Google\\Chrome\\Application\\chrome.exe",
|
||||
), # User install
|
||||
]
|
||||
elif sys.platform.startswith("darwin"): # macOS
|
||||
paths = ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
||||
"/Applications/Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta"]
|
||||
paths = [
|
||||
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
||||
"/Applications/Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta",
|
||||
]
|
||||
else: # Linux
|
||||
paths = ["/usr/bin/google-chrome", "/usr/bin/chromium-browser", "/usr/bin/chromium", "/opt/chrome/chrome", "/usr/local/bin/chrome"]
|
||||
paths = [
|
||||
"/usr/bin/google-chrome",
|
||||
"/usr/bin/chromium-browser",
|
||||
"/usr/bin/chromium",
|
||||
"/opt/chrome/chrome",
|
||||
"/usr/local/bin/chrome",
|
||||
]
|
||||
|
||||
for path in paths:
|
||||
if os.path.exists(path) and os.access(path, os.X_OK): # Check if executable
|
||||
return path
|
||||
print("Looking for Google Chrome in these locations failed:")
|
||||
print('\n'.join(paths))
|
||||
print("\n".join(paths))
|
||||
chrome_path_env = os.environ.get("CHROME_EXECUTABLE_PATH")
|
||||
if chrome_path_env and os.path.exists(chrome_path_env) and os.access(chrome_path_env, os.X_OK):
|
||||
if (
|
||||
chrome_path_env
|
||||
and os.path.exists(chrome_path_env)
|
||||
and os.access(chrome_path_env, os.X_OK)
|
||||
):
|
||||
return chrome_path_env
|
||||
path = input("Google Chrome not found. Please enter the path to the Chrome executable: ")
|
||||
path = input(
|
||||
"Google Chrome not found. Please enter the path to the Chrome executable: "
|
||||
)
|
||||
if os.path.exists(path) and os.access(path, os.X_OK):
|
||||
os.environ["CHROME_EXECUTABLE_PATH"] = path
|
||||
print(f"Chrome path saved to environment variable CHROME_EXECUTABLE_PATH")
|
||||
return path
|
||||
return None
|
||||
|
||||
|
||||
def get_random_user_agent() -> str:
|
||||
"""Get a random user agent string with associated vendor."""
|
||||
user_agents = [
|
||||
{"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36", "vendor": "Google Inc."},
|
||||
{"ua": "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_6_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15", "vendor": "Apple Inc."},
|
||||
{"ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "vendor": ""},
|
||||
{
|
||||
"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36",
|
||||
"vendor": "Google Inc.",
|
||||
},
|
||||
{
|
||||
"ua": "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_6_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15",
|
||||
"vendor": "Apple Inc.",
|
||||
},
|
||||
{
|
||||
"ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0",
|
||||
"vendor": "",
|
||||
},
|
||||
]
|
||||
return random.choice(user_agents)
|
||||
|
||||
def create_driver(headless=False, stealth_mode=True, crx_path="./crx/nopecha.crx") -> webdriver.Chrome:
|
||||
|
||||
def create_driver(
|
||||
headless=False, stealth_mode=True, crx_path="./crx/nopecha.crx"
|
||||
) -> webdriver.Chrome:
|
||||
"""Create a Chrome WebDriver with specified options."""
|
||||
chrome_options = Options()
|
||||
chrome_path = get_chrome_path()
|
||||
|
||||
|
||||
if not chrome_path:
|
||||
raise FileNotFoundError("Google Chrome not found. Please install it.")
|
||||
chrome_options.binary_location = chrome_path
|
||||
|
||||
|
||||
if headless:
|
||||
chrome_options.add_argument("--headless")
|
||||
chrome_options.add_argument("--disable-gpu")
|
||||
@ -90,31 +121,40 @@ def create_driver(headless=False, stealth_mode=True, crx_path="./crx/nopecha.crx
|
||||
chrome_options.add_argument(f'user-agent={user_agent["ua"]}')
|
||||
resolutions = [(1920, 1080), (1366, 768), (1440, 900)]
|
||||
width, height = random.choice(resolutions)
|
||||
chrome_options.add_argument(f'--window-size={width},{height}')
|
||||
chrome_options.add_argument(f"--window-size={width},{height}")
|
||||
if not stealth_mode:
|
||||
# crx file can't be installed in stealth mode
|
||||
if not os.path.exists(crx_path):
|
||||
pretty_print(f"Anti-captcha CRX not found at {crx_path}.", color="failure")
|
||||
else:
|
||||
chrome_options.add_extension(crx_path)
|
||||
|
||||
chromedriver_path = shutil.which("chromedriver")
|
||||
if not chromedriver_path:
|
||||
|
||||
chromedriver_path = "/usr/bin/chromedriver"
|
||||
if not os.path.exists(chromedriver_path):
|
||||
chromedriver_path = chromedriver_autoinstaller.install()
|
||||
|
||||
|
||||
if not chromedriver_path:
|
||||
raise FileNotFoundError("ChromeDriver not found. Please install it or add it to your PATH.")
|
||||
|
||||
raise FileNotFoundError(
|
||||
"ChromeDriver not found. Please install it or add it to your PATH."
|
||||
)
|
||||
|
||||
service = Service(chromedriver_path)
|
||||
if stealth_mode:
|
||||
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
|
||||
driver = uc.Chrome(service=service, options=chrome_options)
|
||||
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
|
||||
chrome_version = driver.capabilities['browserVersion']
|
||||
stealth(driver,
|
||||
driver.execute_script(
|
||||
"Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"
|
||||
)
|
||||
chrome_version = driver.capabilities["browserVersion"]
|
||||
stealth(
|
||||
driver,
|
||||
languages=["en-US", "en"],
|
||||
vendor=user_agent["vendor"],
|
||||
platform="Win64" if "Windows" in user_agent["ua"] else "MacIntel" if "Macintosh" in user_agent["ua"] else "Linux x86_64",
|
||||
platform=(
|
||||
"Win64"
|
||||
if "Windows" in user_agent["ua"]
|
||||
else "MacIntel" if "Macintosh" in user_agent["ua"] else "Linux x86_64"
|
||||
),
|
||||
webgl_vendor="Intel Inc.",
|
||||
renderer="Intel Iris OpenGL Engine",
|
||||
fix_hairline=True,
|
||||
@ -127,13 +167,16 @@ def create_driver(headless=False, stealth_mode=True, crx_path="./crx/nopecha.crx
|
||||
}
|
||||
chrome_options.add_experimental_option("prefs", security_prefs)
|
||||
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
||||
chrome_options.add_experimental_option('useAutomationExtension', False)
|
||||
chrome_options.add_experimental_option("useAutomationExtension", False)
|
||||
return webdriver.Chrome(service=service, options=chrome_options)
|
||||
|
||||
|
||||
class Browser:
|
||||
def __init__(self, driver, anticaptcha_manual_install=False):
|
||||
"""Initialize the browser with optional AntiCaptcha installation."""
|
||||
self.js_scripts_folder = "./sources/web_scripts/" if not __name__ == "__main__" else "./web_scripts/"
|
||||
self.js_scripts_folder = (
|
||||
"./sources/web_scripts/" if not __name__ == "__main__" else "./web_scripts/"
|
||||
)
|
||||
self.anticaptcha = "https://chrome.google.com/webstore/detail/nopecha-captcha-solver/dknlfmjaanfblgfdfebhijalfmhmjjjo/related"
|
||||
self.logger = Logger("browser.log")
|
||||
self.screenshot_folder = os.path.join(os.getcwd(), ".screenshots")
|
||||
@ -146,23 +189,26 @@ class Browser:
|
||||
self.setup_tabs()
|
||||
if anticaptcha_manual_install:
|
||||
self.load_anticatpcha_manually()
|
||||
|
||||
|
||||
def setup_tabs(self):
|
||||
self.tabs = self.driver.window_handles
|
||||
self.driver.get("https://www.google.com")
|
||||
self.screenshot()
|
||||
|
||||
|
||||
def switch_control_tab(self):
|
||||
self.logger.log("Switching to control tab.")
|
||||
self.driver.switch_to.window(self.tabs[0])
|
||||
|
||||
|
||||
def load_anticatpcha_manually(self):
|
||||
pretty_print("You might want to install the AntiCaptcha extension for captchas.", color="warning")
|
||||
pretty_print(
|
||||
"You might want to install the AntiCaptcha extension for captchas.",
|
||||
color="warning",
|
||||
)
|
||||
self.driver.get(self.anticaptcha)
|
||||
|
||||
def go_to(self, url:str) -> bool:
|
||||
def go_to(self, url: str) -> bool:
|
||||
"""Navigate to a specified URL."""
|
||||
time.sleep(random.uniform(0.4, 2.5)) # more human behavior
|
||||
time.sleep(random.uniform(0.4, 2.5)) # more human behavior
|
||||
try:
|
||||
initial_handles = self.driver.window_handles
|
||||
self.driver.get(url)
|
||||
@ -170,12 +216,17 @@ class Browser:
|
||||
wait = WebDriverWait(self.driver, timeout=10)
|
||||
wait.until(
|
||||
lambda driver: (
|
||||
not any(keyword in driver.page_source.lower() for keyword in ["checking your browser", "captcha"])
|
||||
not any(
|
||||
keyword in driver.page_source.lower()
|
||||
for keyword in ["checking your browser", "captcha"]
|
||||
)
|
||||
),
|
||||
message="stuck on 'checking browser' or verification screen"
|
||||
message="stuck on 'checking browser' or verification screen",
|
||||
)
|
||||
except TimeoutException:
|
||||
self.logger.warning("Timeout while waiting for page to bypass 'checking your browser'")
|
||||
self.logger.warning(
|
||||
"Timeout while waiting for page to bypass 'checking your browser'"
|
||||
)
|
||||
self.apply_web_safety()
|
||||
self.logger.log(f"Navigated to: {url}")
|
||||
return True
|
||||
@ -189,30 +240,33 @@ class Browser:
|
||||
self.logger.error(f"Fatal error with go_to method on {url}:\n{str(e)}")
|
||||
raise e
|
||||
|
||||
def is_sentence(self, text:str) -> bool:
|
||||
def is_sentence(self, text: str) -> bool:
|
||||
"""Check if the text qualifies as a meaningful sentence or contains important error codes."""
|
||||
text = text.strip()
|
||||
|
||||
if any(c.isdigit() for c in text):
|
||||
return True
|
||||
words = re.findall(r'\w+', text, re.UNICODE)
|
||||
words = re.findall(r"\w+", text, re.UNICODE)
|
||||
word_count = len(words)
|
||||
has_punctuation = any(text.endswith(p) for p in ['.', ',', ',', '!', '?', '。', '!', '?', '।', '۔'])
|
||||
has_punctuation = any(
|
||||
text.endswith(p)
|
||||
for p in [".", ",", ",", "!", "?", "。", "!", "?", "।", "۔"]
|
||||
)
|
||||
is_long_enough = word_count > 4
|
||||
return (word_count >= 5 and (has_punctuation or is_long_enough))
|
||||
return word_count >= 5 and (has_punctuation or is_long_enough)
|
||||
|
||||
def get_text(self) -> str | None:
|
||||
"""Get page text as formatted Markdown"""
|
||||
try:
|
||||
soup = BeautifulSoup(self.driver.page_source, 'html.parser')
|
||||
for element in soup(['script', 'style', 'noscript', 'meta', 'link']):
|
||||
soup = BeautifulSoup(self.driver.page_source, "html.parser")
|
||||
for element in soup(["script", "style", "noscript", "meta", "link"]):
|
||||
element.decompose()
|
||||
markdown_converter = markdownify.MarkdownConverter(
|
||||
heading_style="ATX",
|
||||
strip=['a'],
|
||||
strip=["a"],
|
||||
autolinks=False,
|
||||
bullets='•',
|
||||
strong_em_symbol='*',
|
||||
bullets="•",
|
||||
strong_em_symbol="*",
|
||||
default_title=False,
|
||||
)
|
||||
markdown_text = markdown_converter.convert(str(soup.body))
|
||||
@ -220,35 +274,43 @@ class Browser:
|
||||
for line in markdown_text.splitlines():
|
||||
stripped = line.strip()
|
||||
if stripped and self.is_sentence(stripped):
|
||||
cleaned = ' '.join(stripped.split())
|
||||
cleaned = " ".join(stripped.split())
|
||||
lines.append(cleaned)
|
||||
result = "[Start of page]\n\n" + "\n\n".join(lines) + "\n\n[End of page]"
|
||||
result = re.sub(r'!\[(.*?)\]\(.*?\)', r'[IMAGE: \1]', result)
|
||||
result = re.sub(r"!\[(.*?)\]\(.*?\)", r"[IMAGE: \1]", result)
|
||||
self.logger.info(f"Extracted text: {result[:100]}...")
|
||||
self.logger.info(f"Extracted text length: {len(result)}")
|
||||
return result[:8192]
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error getting text: {str(e)}")
|
||||
return None
|
||||
|
||||
def clean_url(self, url:str) -> str:
|
||||
|
||||
def clean_url(self, url: str) -> str:
|
||||
"""Clean URL to keep only the part needed for navigation to the page"""
|
||||
clean = url.split('#')[0]
|
||||
parts = clean.split('?', 1)
|
||||
clean = url.split("#")[0]
|
||||
parts = clean.split("?", 1)
|
||||
base_url = parts[0]
|
||||
if len(parts) > 1:
|
||||
query = parts[1]
|
||||
essential_params = []
|
||||
for param in query.split('&'):
|
||||
if param.startswith('_skw=') or param.startswith('q=') or param.startswith('s='):
|
||||
for param in query.split("&"):
|
||||
if (
|
||||
param.startswith("_skw=")
|
||||
or param.startswith("q=")
|
||||
or param.startswith("s=")
|
||||
):
|
||||
essential_params.append(param)
|
||||
elif param.startswith('_') or param.startswith('hash=') or param.startswith('itmmeta='):
|
||||
elif (
|
||||
param.startswith("_")
|
||||
or param.startswith("hash=")
|
||||
or param.startswith("itmmeta=")
|
||||
):
|
||||
break
|
||||
if essential_params:
|
||||
return f"{base_url}?{'&'.join(essential_params)}"
|
||||
return base_url
|
||||
|
||||
def is_link_valid(self, url:str) -> bool:
|
||||
|
||||
def is_link_valid(self, url: str) -> bool:
|
||||
"""Check if a URL is a valid link (page, not related to icon or metadata)."""
|
||||
if len(url) > 72:
|
||||
self.logger.warning(f"URL too long: {url}")
|
||||
@ -257,10 +319,10 @@ class Browser:
|
||||
if not parsed_url.scheme or not parsed_url.netloc:
|
||||
self.logger.warning(f"Invalid URL: {url}")
|
||||
return False
|
||||
if re.search(r'/\d+$', parsed_url.path):
|
||||
if re.search(r"/\d+$", parsed_url.path):
|
||||
return False
|
||||
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp']
|
||||
metadata_extensions = ['.ico', '.xml', '.json', '.rss', '.atom']
|
||||
image_extensions = [".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp"]
|
||||
metadata_extensions = [".ico", ".xml", ".json", ".rss", ".atom"]
|
||||
for ext in image_extensions + metadata_extensions:
|
||||
if url.lower().endswith(ext):
|
||||
return False
|
||||
@ -271,18 +333,24 @@ class Browser:
|
||||
try:
|
||||
links = []
|
||||
elements = self.driver.find_elements(By.TAG_NAME, "a")
|
||||
|
||||
|
||||
for element in elements:
|
||||
href = element.get_attribute("href")
|
||||
if href and href.startswith(("http", "https")):
|
||||
links.append({
|
||||
"url": href,
|
||||
"text": element.text.strip(),
|
||||
"is_displayed": element.is_displayed()
|
||||
})
|
||||
|
||||
links.append(
|
||||
{
|
||||
"url": href,
|
||||
"text": element.text.strip(),
|
||||
"is_displayed": element.is_displayed(),
|
||||
}
|
||||
)
|
||||
|
||||
self.logger.info(f"Found {len(links)} navigable links")
|
||||
return [self.clean_url(link['url']) for link in links if (link['is_displayed'] == True and self.is_link_valid(link['url']))]
|
||||
return [
|
||||
self.clean_url(link["url"])
|
||||
for link in links
|
||||
if (link["is_displayed"] == True and self.is_link_valid(link["url"]))
|
||||
]
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error getting navigable links: {str(e)}")
|
||||
return []
|
||||
@ -297,7 +365,10 @@ class Browser:
|
||||
return False
|
||||
try:
|
||||
self.logger.error(f"Scrolling to element for click_element.")
|
||||
self.driver.execute_script("arguments[0].scrollIntoView({block: 'center', behavior: 'smooth'});", element)
|
||||
self.driver.execute_script(
|
||||
"arguments[0].scrollIntoView({block: 'center', behavior: 'smooth'});",
|
||||
element,
|
||||
)
|
||||
time.sleep(0.1)
|
||||
element.click()
|
||||
self.logger.info(f"Clicked element at {xpath}")
|
||||
@ -311,13 +382,13 @@ class Browser:
|
||||
except Exception as e:
|
||||
self.logger.error(f"Unexpected error clicking element at {xpath}: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def load_js(self, file_name: str) -> str:
|
||||
"""Load javascript from script folder to inject to page."""
|
||||
path = os.path.join(self.js_scripts_folder, file_name)
|
||||
self.logger.info(f"Loading js at {path}")
|
||||
try:
|
||||
with open(path, 'r') as f:
|
||||
with open(path, "r") as f:
|
||||
return f.read()
|
||||
except FileNotFoundError as e:
|
||||
raise Exception(f"Could not find: {path}") from e
|
||||
@ -349,17 +420,22 @@ class Browser:
|
||||
form_strings = []
|
||||
for element in input_elements:
|
||||
input_type = element.get("type") or "text"
|
||||
if input_type in ["hidden", "submit", "button", "image"] or not element["displayed"]:
|
||||
if (
|
||||
input_type in ["hidden", "submit", "button", "image"]
|
||||
or not element["displayed"]
|
||||
):
|
||||
continue
|
||||
input_name = element.get("text") or element.get("id") or input_type
|
||||
if input_type == "checkbox" or input_type == "radio":
|
||||
try:
|
||||
checked_status = "checked" if element.is_selected() else "unchecked"
|
||||
checked_status = (
|
||||
"checked" if element.is_selected() else "unchecked"
|
||||
)
|
||||
except Exception as e:
|
||||
continue
|
||||
form_strings.append(f"[{input_name}]({checked_status})")
|
||||
else:
|
||||
form_strings.append(f"[{input_name}]("")")
|
||||
form_strings.append(f"[{input_name}](" ")")
|
||||
return form_strings
|
||||
|
||||
except Exception as e:
|
||||
@ -369,13 +445,18 @@ class Browser:
|
||||
"""
|
||||
Find buttons and return their type and xpath.
|
||||
"""
|
||||
buttons = self.driver.find_elements(By.TAG_NAME, "button") + \
|
||||
self.driver.find_elements(By.XPATH, "//input[@type='submit']")
|
||||
buttons = self.driver.find_elements(
|
||||
By.TAG_NAME, "button"
|
||||
) + self.driver.find_elements(By.XPATH, "//input[@type='submit']")
|
||||
result = []
|
||||
for i, button in enumerate(buttons):
|
||||
if not button.is_displayed() or not button.is_enabled():
|
||||
continue
|
||||
text = (button.text or button.get_attribute("value") or "").lower().replace(' ', '')
|
||||
text = (
|
||||
(button.text or button.get_attribute("value") or "")
|
||||
.lower()
|
||||
.replace(" ", "")
|
||||
)
|
||||
xpath = f"(//button | //input[@type='submit'])[{i + 1}]"
|
||||
result.append((text, xpath))
|
||||
result.sort(key=lambda x: len(x[0]))
|
||||
@ -389,8 +470,8 @@ class Browser:
|
||||
self.logger.info("Waiting for submission outcome...")
|
||||
wait = WebDriverWait(self.driver, timeout)
|
||||
wait.until(
|
||||
lambda driver: driver.current_url != self.driver.current_url or
|
||||
driver.find_elements(By.XPATH, "//*[contains(text(), 'success')]")
|
||||
lambda driver: driver.current_url != self.driver.current_url
|
||||
or driver.find_elements(By.XPATH, "//*[contains(text(), 'success')]")
|
||||
)
|
||||
self.logger.info("Detected submission outcome")
|
||||
return True
|
||||
@ -398,7 +479,7 @@ class Browser:
|
||||
self.logger.warning("No submission outcome detected")
|
||||
return False
|
||||
|
||||
def find_and_click_btn(self, btn_type: str = 'login', timeout: int = 5) -> bool:
|
||||
def find_and_click_btn(self, btn_type: str = "login", timeout: int = 5) -> bool:
|
||||
"""Find and click a submit button matching the specified type."""
|
||||
buttons = self.get_buttons_xpath()
|
||||
if not buttons:
|
||||
@ -406,24 +487,35 @@ class Browser:
|
||||
return False
|
||||
|
||||
for button_text, xpath in buttons:
|
||||
if btn_type.lower() in button_text.lower() or btn_type.lower() in xpath.lower():
|
||||
if (
|
||||
btn_type.lower() in button_text.lower()
|
||||
or btn_type.lower() in xpath.lower()
|
||||
):
|
||||
try:
|
||||
wait = WebDriverWait(self.driver, timeout)
|
||||
element = wait.until(
|
||||
EC.element_to_be_clickable((By.XPATH, xpath)),
|
||||
message=f"Button with XPath '{xpath}' not clickable within {timeout} seconds"
|
||||
message=f"Button with XPath '{xpath}' not clickable within {timeout} seconds",
|
||||
)
|
||||
if self.click_element(xpath):
|
||||
self.logger.info(f"Clicked button '{button_text}' at XPath: {xpath}")
|
||||
self.logger.info(
|
||||
f"Clicked button '{button_text}' at XPath: {xpath}"
|
||||
)
|
||||
return True
|
||||
else:
|
||||
self.logger.warning(f"Button '{button_text}' at XPath: {xpath} not clickable")
|
||||
self.logger.warning(
|
||||
f"Button '{button_text}' at XPath: {xpath} not clickable"
|
||||
)
|
||||
return False
|
||||
except TimeoutException:
|
||||
self.logger.warning(f"Timeout waiting for '{button_text}' button at XPath: {xpath}")
|
||||
self.logger.warning(
|
||||
f"Timeout waiting for '{button_text}' button at XPath: {xpath}"
|
||||
)
|
||||
return False
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error clicking button '{button_text}' at XPath: {xpath} - {str(e)}")
|
||||
self.logger.error(
|
||||
f"Error clicking button '{button_text}' at XPath: {xpath} - {str(e)}"
|
||||
)
|
||||
return False
|
||||
self.logger.warning(f"No button matching '{btn_type}' found")
|
||||
return False
|
||||
@ -434,7 +526,9 @@ class Browser:
|
||||
Returns True if successful, False if any issues occur.
|
||||
"""
|
||||
try:
|
||||
checkboxes = self.driver.find_elements(By.XPATH, "//input[@type='checkbox']")
|
||||
checkboxes = self.driver.find_elements(
|
||||
By.XPATH, "//input[@type='checkbox']"
|
||||
)
|
||||
if not checkboxes:
|
||||
self.logger.info("No checkboxes found on the page")
|
||||
return True
|
||||
@ -445,19 +539,24 @@ class Browser:
|
||||
EC.element_to_be_clickable(checkbox)
|
||||
)
|
||||
self.driver.execute_script(
|
||||
"arguments[0].scrollIntoView({block: 'center', inline: 'center'});", checkbox
|
||||
"arguments[0].scrollIntoView({block: 'center', inline: 'center'});",
|
||||
checkbox,
|
||||
)
|
||||
if not checkbox.is_selected():
|
||||
try:
|
||||
checkbox.click()
|
||||
self.logger.info(f"Ticked checkbox {index}")
|
||||
except ElementClickInterceptedException:
|
||||
self.driver.execute_script("arguments[0].click();", checkbox)
|
||||
self.driver.execute_script(
|
||||
"arguments[0].click();", checkbox
|
||||
)
|
||||
self.logger.warning(f"Click checkbox {index} intercepted")
|
||||
else:
|
||||
self.logger.info(f"Checkbox {index} already ticked")
|
||||
except TimeoutException:
|
||||
self.logger.warning(f"Timeout waiting for checkbox {index} to be clickable")
|
||||
self.logger.warning(
|
||||
f"Timeout waiting for checkbox {index} to be clickable"
|
||||
)
|
||||
continue
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error ticking checkbox {index}: {str(e)}")
|
||||
@ -468,16 +567,28 @@ class Browser:
|
||||
return False
|
||||
|
||||
def find_and_click_submission(self, timeout: int = 10) -> bool:
|
||||
possible_submissions = ["login", "submit", "register", "continue", "apply",
|
||||
"ok", "confirm", "proceed", "accept",
|
||||
"done", "finish", "start", "calculate"]
|
||||
possible_submissions = [
|
||||
"login",
|
||||
"submit",
|
||||
"register",
|
||||
"continue",
|
||||
"apply",
|
||||
"ok",
|
||||
"confirm",
|
||||
"proceed",
|
||||
"accept",
|
||||
"done",
|
||||
"finish",
|
||||
"start",
|
||||
"calculate",
|
||||
]
|
||||
for submission in possible_submissions:
|
||||
if self.find_and_click_btn(submission, timeout):
|
||||
self.logger.info(f"Clicked on submission button: {submission}")
|
||||
return True
|
||||
self.logger.warning("No submission button found")
|
||||
return False
|
||||
|
||||
|
||||
def find_input_xpath_by_name(self, inputs, name: str) -> str | None:
|
||||
for field in inputs:
|
||||
if name in field["text"]:
|
||||
@ -492,7 +603,7 @@ class Browser:
|
||||
inputs = self.find_all_inputs()
|
||||
try:
|
||||
for input_str in input_list:
|
||||
match = re.match(r'\[(.*?)\]\((.*?)\)', input_str)
|
||||
match = re.match(r"\[(.*?)\]\((.*?)\)", input_str)
|
||||
if not match:
|
||||
self.logger.warning(f"Invalid format for input: {input_str}")
|
||||
continue
|
||||
@ -509,11 +620,17 @@ class Browser:
|
||||
EC.element_to_be_clickable((By.XPATH, xpath))
|
||||
)
|
||||
except TimeoutException:
|
||||
self.logger.error(f"Timeout waiting for element '{name}' to be clickable")
|
||||
self.logger.error(
|
||||
f"Timeout waiting for element '{name}' to be clickable"
|
||||
)
|
||||
continue
|
||||
self.driver.execute_script("arguments[0].scrollIntoView(true);", element)
|
||||
self.driver.execute_script(
|
||||
"arguments[0].scrollIntoView(true);", element
|
||||
)
|
||||
if not element.is_displayed() or not element.is_enabled():
|
||||
self.logger.warning(f"Element '{name}' is not interactable (not displayed or disabled)")
|
||||
self.logger.warning(
|
||||
f"Element '{name}' is not interactable (not displayed or disabled)"
|
||||
)
|
||||
continue
|
||||
input_type = (element.get_attribute("type") or "text").lower()
|
||||
if input_type in ["checkbox", "radio"]:
|
||||
@ -531,7 +648,7 @@ class Browser:
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error filling form inputs: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def fill_form(self, input_list: List[str]) -> bool:
|
||||
"""Fill form inputs based on a list of [name](value) and submit."""
|
||||
if not isinstance(input_list, list):
|
||||
@ -571,11 +688,11 @@ class Browser:
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error scrolling: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def get_screenshot(self) -> str:
|
||||
return self.screenshot_folder + "/updated_screen.png"
|
||||
|
||||
def screenshot(self, filename:str = 'updated_screen.png') -> bool:
|
||||
def screenshot(self, filename: str = "updated_screen.png") -> bool:
|
||||
"""Take a screenshot of the current page."""
|
||||
self.logger.info("Taking screenshot...")
|
||||
time.sleep(0.1)
|
||||
@ -598,18 +715,19 @@ class Browser:
|
||||
script = self.load_js("inject_safety_script.js")
|
||||
input_elements = self.driver.execute_script(script)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
driver = create_driver(headless=False, stealth_mode=True)
|
||||
browser = Browser(driver, anticaptcha_manual_install=True)
|
||||
|
||||
|
||||
input("press enter to continue")
|
||||
print("AntiCaptcha / Form Test")
|
||||
#browser.go_to("https://www.browserscan.net/bot-detection")
|
||||
#txt = browser.get_text()
|
||||
#browser.go_to("https://www.google.com/recaptcha/api2/demo")
|
||||
# browser.go_to("https://www.browserscan.net/bot-detection")
|
||||
# txt = browser.get_text()
|
||||
# browser.go_to("https://www.google.com/recaptcha/api2/demo")
|
||||
browser.go_to("https://home.openweathermap.org/users/sign_up")
|
||||
inputs_visible = browser.get_form_inputs()
|
||||
print("inputs:", inputs_visible)
|
||||
#inputs_fill = ['[q](checked)', '[q](checked)', '[user[username]](mlg)', '[user[email]](mlg.fcu@gmail.com)', '[user[password]](placeholder_P@ssw0rd123)', '[user[password_confirmation]](placeholder_P@ssw0rd123)']
|
||||
#browser.fill_form(inputs_fill)
|
||||
# inputs_fill = ['[q](checked)', '[q](checked)', '[user[username]](mlg)', '[user[email]](mlg.fcu@gmail.com)', '[user[password]](placeholder_P@ssw0rd123)', '[user[password_confirmation]](placeholder_P@ssw0rd123)']
|
||||
# browser.fill_form(inputs_fill)
|
||||
input("press enter to exit")
|
||||
|
Loading…
x
Reference in New Issue
Block a user