mirror of
https://github.com/tcsenpai/agenticSeek.git
synced 2025-06-04 18:20:09 +00:00
feat : stealth browser with selenium_stealth for captcha
This commit is contained in:
parent
69f276955a
commit
3dbef96cf0
@ -7,12 +7,14 @@ echo "Detecting operating system..."
|
|||||||
|
|
||||||
OS_TYPE=$(uname -s)
|
OS_TYPE=$(uname -s)
|
||||||
|
|
||||||
|
|
||||||
case "$OS_TYPE" in
|
case "$OS_TYPE" in
|
||||||
"Linux"*)
|
"Linux"*)
|
||||||
echo "Detected Linux OS"
|
echo "Detected Linux OS"
|
||||||
if [ -f "$SCRIPTS_DIR/linux_install.sh" ]; then
|
if [ -f "$SCRIPTS_DIR/linux_install.sh" ]; then
|
||||||
echo "Running Linux installation script..."
|
echo "Running Linux installation script..."
|
||||||
bash "$SCRIPTS_DIR/linux_install.sh"
|
bash "$SCRIPTS_DIR/linux_install.sh"
|
||||||
|
bash -c "wget https://github.com/Fosowl/fosowl.github.io/raw/refs/heads/main/usefull/anticaptcha.crx"
|
||||||
bash -c "cd $LLM_ROUTER_DIR && ./dl_safetensors.sh"
|
bash -c "cd $LLM_ROUTER_DIR && ./dl_safetensors.sh"
|
||||||
else
|
else
|
||||||
echo "Error: $SCRIPTS_DIR/linux_install.sh not found!"
|
echo "Error: $SCRIPTS_DIR/linux_install.sh not found!"
|
||||||
@ -24,6 +26,7 @@ case "$OS_TYPE" in
|
|||||||
if [ -f "$SCRIPTS_DIR/macos_install.sh" ]; then
|
if [ -f "$SCRIPTS_DIR/macos_install.sh" ]; then
|
||||||
echo "Running macOS installation script..."
|
echo "Running macOS installation script..."
|
||||||
bash "$SCRIPTS_DIR/macos_install.sh"
|
bash "$SCRIPTS_DIR/macos_install.sh"
|
||||||
|
bash -c "wget https://github.com/Fosowl/fosowl.github.io/raw/refs/heads/main/usefull/anticaptcha.crx"
|
||||||
bash -c "cd $LLM_ROUTER_DIR && ./dl_safetensors.sh"
|
bash -c "cd $LLM_ROUTER_DIR && ./dl_safetensors.sh"
|
||||||
else
|
else
|
||||||
echo "Error: $SCRIPTS_DIR/macos_install.sh not found!"
|
echo "Error: $SCRIPTS_DIR/macos_install.sh not found!"
|
||||||
|
@ -29,6 +29,9 @@ distro>=1.7.0,<2
|
|||||||
jiter>=0.4.0,<1
|
jiter>=0.4.0,<1
|
||||||
sniffio
|
sniffio
|
||||||
tqdm>4
|
tqdm>4
|
||||||
|
fake_useragent>=2.1.0
|
||||||
|
selenium_stealth>=1.0.6
|
||||||
|
undetected-chromedriver>=3.5.5
|
||||||
# for api provider
|
# for api provider
|
||||||
openai
|
openai
|
||||||
# if use chinese
|
# if use chinese
|
||||||
|
@ -5,6 +5,9 @@ echo "Starting installation for macOS..."
|
|||||||
# Install Python dependencies from requirements.txt
|
# Install Python dependencies from requirements.txt
|
||||||
pip3 install -r requirements.txt
|
pip3 install -r requirements.txt
|
||||||
|
|
||||||
|
# make sure wget installed
|
||||||
|
brew install wget
|
||||||
|
|
||||||
# Install chromedriver using Homebrew
|
# Install chromedriver using Homebrew
|
||||||
brew install --cask chromedriver
|
brew install --cask chromedriver
|
||||||
|
|
||||||
|
3
setup.py
3
setup.py
@ -41,6 +41,9 @@ setup(
|
|||||||
"anyio>=3.5.0,<5",
|
"anyio>=3.5.0,<5",
|
||||||
"distro>=1.7.0,<2",
|
"distro>=1.7.0,<2",
|
||||||
"jiter>=0.4.0,<1",
|
"jiter>=0.4.0,<1",
|
||||||
|
"fake_useragent>=2.1.0",
|
||||||
|
"selenium_stealth>=1.0.6",
|
||||||
|
"undetected-chromedriver>=3.5.5",
|
||||||
"sniffio",
|
"sniffio",
|
||||||
"tqdm>4"
|
"tqdm>4"
|
||||||
],
|
],
|
||||||
|
@ -7,17 +7,21 @@ from selenium.webdriver.support import expected_conditions as EC
|
|||||||
from selenium.common.exceptions import TimeoutException, WebDriverException
|
from selenium.common.exceptions import TimeoutException, WebDriverException
|
||||||
from selenium.webdriver.common.action_chains import ActionChains
|
from selenium.webdriver.common.action_chains import ActionChains
|
||||||
from selenium.webdriver.chrome.options import Options
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from urllib.parse import urlparse
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
|
from fake_useragent import UserAgent
|
||||||
|
from selenium_stealth import stealth
|
||||||
|
import undetected_chromedriver as uc
|
||||||
import chromedriver_autoinstaller
|
import chromedriver_autoinstaller
|
||||||
import time
|
import time
|
||||||
|
import random
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
import markdownify
|
import markdownify
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
import re
|
import re
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
from sources.utility import pretty_print, animate_thinking
|
from sources.utility import pretty_print, animate_thinking
|
||||||
|
|
||||||
@ -39,7 +43,8 @@ def get_chrome_path() -> str:
|
|||||||
return path
|
return path
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def create_driver(headless=False):
|
def create_driver(headless=False, stealth_mode=True) -> webdriver.Chrome:
|
||||||
|
"""Create a Chrome WebDriver with specified options."""
|
||||||
chrome_options = Options()
|
chrome_options = Options()
|
||||||
chrome_path = get_chrome_path()
|
chrome_path = get_chrome_path()
|
||||||
|
|
||||||
@ -51,20 +56,19 @@ def create_driver(headless=False):
|
|||||||
chrome_options.add_argument("--headless")
|
chrome_options.add_argument("--headless")
|
||||||
chrome_options.add_argument("--disable-gpu")
|
chrome_options.add_argument("--disable-gpu")
|
||||||
chrome_options.add_argument("--disable-webgl")
|
chrome_options.add_argument("--disable-webgl")
|
||||||
|
#ua = UserAgent()
|
||||||
|
#user_agent = ua.random # NOTE sometime return wrong user agent, investigate
|
||||||
|
#chrome_options.add_argument(f'user-agent={user_agent}')
|
||||||
chrome_options.add_argument("--no-sandbox")
|
chrome_options.add_argument("--no-sandbox")
|
||||||
chrome_options.add_argument("--disable-dev-shm-usage")
|
chrome_options.add_argument("--disable-dev-shm-usage")
|
||||||
chrome_options.add_argument("--autoplay-policy=user-gesture-required")
|
chrome_options.add_argument("--autoplay-policy=user-gesture-required")
|
||||||
chrome_options.add_argument("--mute-audio")
|
chrome_options.add_argument("--mute-audio")
|
||||||
chrome_options.add_argument("--disable-notifications")
|
chrome_options.add_argument("--disable-notifications")
|
||||||
chrome_options.add_argument('--window-size=1080,560')
|
chrome_options.add_argument('--window-size=1080,560')
|
||||||
security_prefs = {
|
try:
|
||||||
"profile.default_content_setting_values.media_stream": 2,
|
chrome_options.add_extension("./anticaptcha.crx")
|
||||||
"profile.default_content_setting_values.geolocation": 2,
|
except Exception as e:
|
||||||
"safebrowsing.enabled": True,
|
print(f"Failed to load AntiCaptcha extension: {str(e)}")
|
||||||
}
|
|
||||||
chrome_options.add_experimental_option("prefs", security_prefs)
|
|
||||||
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
|
||||||
chrome_options.add_experimental_option('useAutomationExtension', False)
|
|
||||||
|
|
||||||
chromedriver_path = shutil.which("chromedriver")
|
chromedriver_path = shutil.which("chromedriver")
|
||||||
if not chromedriver_path:
|
if not chromedriver_path:
|
||||||
@ -74,10 +78,29 @@ def create_driver(headless=False):
|
|||||||
raise FileNotFoundError("ChromeDriver not found. Please install it or add it to your PATH.")
|
raise FileNotFoundError("ChromeDriver not found. Please install it or add it to your PATH.")
|
||||||
|
|
||||||
service = Service(chromedriver_path)
|
service = Service(chromedriver_path)
|
||||||
|
if stealth_mode:
|
||||||
|
driver = uc.Chrome(service=service, options=chrome_options)
|
||||||
|
stealth(driver,
|
||||||
|
languages=["en-US", "en"],
|
||||||
|
vendor="Google Inc.",
|
||||||
|
platform="Win32",
|
||||||
|
webgl_vendor="Intel Inc.",
|
||||||
|
renderer="Intel Iris OpenGL Engine",
|
||||||
|
fix_hairline=True,
|
||||||
|
)
|
||||||
|
return driver
|
||||||
|
security_prefs = {
|
||||||
|
"profile.default_content_setting_values.media_stream": 2,
|
||||||
|
"profile.default_content_setting_values.geolocation": 2,
|
||||||
|
"safebrowsing.enabled": True,
|
||||||
|
}
|
||||||
|
chrome_options.add_experimental_option("prefs", security_prefs)
|
||||||
|
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
||||||
|
chrome_options.add_experimental_option('useAutomationExtension', False)
|
||||||
return webdriver.Chrome(service=service, options=chrome_options)
|
return webdriver.Chrome(service=service, options=chrome_options)
|
||||||
|
|
||||||
class Browser:
|
class Browser:
|
||||||
def __init__(self, driver, anticaptcha_install=True):
|
def __init__(self, driver, anticaptcha_manual_install=False):
|
||||||
"""Initialize the browser with optional AntiCaptcha installation."""
|
"""Initialize the browser with optional AntiCaptcha installation."""
|
||||||
self.js_scripts_folder = "./sources/web_scripts/" if not __name__ == "__main__" else "./web_scripts/"
|
self.js_scripts_folder = "./sources/web_scripts/" if not __name__ == "__main__" else "./web_scripts/"
|
||||||
self.anticaptcha = "https://chrome.google.com/webstore/detail/nopecha-captcha-solver/dknlfmjaanfblgfdfebhijalfmhmjjjo/related"
|
self.anticaptcha = "https://chrome.google.com/webstore/detail/nopecha-captcha-solver/dknlfmjaanfblgfdfebhijalfmhmjjjo/related"
|
||||||
@ -88,10 +111,11 @@ class Browser:
|
|||||||
self.logger.info("Browser initialized successfully")
|
self.logger.info("Browser initialized successfully")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise Exception(f"Failed to initialize browser: {str(e)}")
|
raise Exception(f"Failed to initialize browser: {str(e)}")
|
||||||
if anticaptcha_install:
|
self.driver.get("https://www.google.com")
|
||||||
self.load_anticatpcha()
|
if anticaptcha_manual_install:
|
||||||
|
self.load_anticatpcha_manually()
|
||||||
|
|
||||||
def load_anticatpcha(self):
|
def load_anticatpcha_manually(self):
|
||||||
print("You might want to install the AntiCaptcha extension for captchas.")
|
print("You might want to install the AntiCaptcha extension for captchas.")
|
||||||
self.driver.get(self.anticaptcha)
|
self.driver.get(self.anticaptcha)
|
||||||
|
|
||||||
@ -129,11 +153,11 @@ class Browser:
|
|||||||
for element in soup(['script', 'style']):
|
for element in soup(['script', 'style']):
|
||||||
element.decompose()
|
element.decompose()
|
||||||
|
|
||||||
text = soup.get_text()
|
|
||||||
|
|
||||||
lines = (line.strip() for line in text.splitlines())
|
lines = (line.strip() for line in text.splitlines())
|
||||||
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
||||||
text = "\n".join(chunk for chunk in chunks if chunk and self.is_sentence(chunk))
|
text = "\n".join(chunk for chunk in chunks if chunk and self.is_sentence(chunk))
|
||||||
|
text = text[:4096]
|
||||||
#markdown_text = markdownify.markdownify(text, heading_style="ATX")
|
#markdown_text = markdownify.markdownify(text, heading_style="ATX")
|
||||||
return "[Start of page]\n" + text + "\n[End of page]"
|
return "[Start of page]\n" + text + "\n[End of page]"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user