diff --git a/install.sh b/install.sh index 93b8cf9..d70706e 100755 --- a/install.sh +++ b/install.sh @@ -7,12 +7,14 @@ echo "Detecting operating system..." OS_TYPE=$(uname -s) + case "$OS_TYPE" in "Linux"*) echo "Detected Linux OS" if [ -f "$SCRIPTS_DIR/linux_install.sh" ]; then echo "Running Linux installation script..." bash "$SCRIPTS_DIR/linux_install.sh" + bash -c "wget https://github.com/Fosowl/fosowl.github.io/raw/refs/heads/main/usefull/anticaptcha.crx" bash -c "cd $LLM_ROUTER_DIR && ./dl_safetensors.sh" else echo "Error: $SCRIPTS_DIR/linux_install.sh not found!" @@ -24,6 +26,7 @@ case "$OS_TYPE" in if [ -f "$SCRIPTS_DIR/macos_install.sh" ]; then echo "Running macOS installation script..." bash "$SCRIPTS_DIR/macos_install.sh" + bash -c "wget https://github.com/Fosowl/fosowl.github.io/raw/refs/heads/main/usefull/anticaptcha.crx" bash -c "cd $LLM_ROUTER_DIR && ./dl_safetensors.sh" else echo "Error: $SCRIPTS_DIR/macos_install.sh not found!" diff --git a/requirements.txt b/requirements.txt index 1a9f46b..8586850 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,6 +29,9 @@ distro>=1.7.0,<2 jiter>=0.4.0,<1 sniffio tqdm>4 +fake_useragent>=2.1.0 +selenium_stealth>=1.0.6 +undetected-chromedriver>=3.5.5 # for api provider openai # if use chinese diff --git a/scripts/macos_install.sh b/scripts/macos_install.sh index 8a4bd5c..5e648c2 100644 --- a/scripts/macos_install.sh +++ b/scripts/macos_install.sh @@ -5,6 +5,9 @@ echo "Starting installation for macOS..." # Install Python dependencies from requirements.txt pip3 install -r requirements.txt +# make sure wget installed +brew install wget + # Install chromedriver using Homebrew brew install --cask chromedriver diff --git a/setup.py b/setup.py index 8f22c71..4af1bf3 100644 --- a/setup.py +++ b/setup.py @@ -41,6 +41,9 @@ setup( "anyio>=3.5.0,<5", "distro>=1.7.0,<2", "jiter>=0.4.0,<1", + "fake_useragent>=2.1.0", + "selenium_stealth>=1.0.6", + "undetected-chromedriver>=3.5.5", "sniffio", "tqdm>4" ], diff --git a/sources/browser.py b/sources/browser.py index c3d8fbf..95e86ab 100644 --- a/sources/browser.py +++ b/sources/browser.py @@ -7,17 +7,21 @@ from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException, WebDriverException from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.chrome.options import Options +from bs4 import BeautifulSoup +from urllib.parse import urlparse from typing import List, Tuple +from fake_useragent import UserAgent +from selenium_stealth import stealth +import undetected_chromedriver as uc import chromedriver_autoinstaller import time +import random import os import shutil -from bs4 import BeautifulSoup import markdownify import logging import sys import re -from urllib.parse import urlparse from sources.utility import pretty_print, animate_thinking @@ -39,7 +43,8 @@ def get_chrome_path() -> str: return path return None -def create_driver(headless=False): +def create_driver(headless=False, stealth_mode=True) -> webdriver.Chrome: + """Create a Chrome WebDriver with specified options.""" chrome_options = Options() chrome_path = get_chrome_path() @@ -51,20 +56,19 @@ def create_driver(headless=False): chrome_options.add_argument("--headless") chrome_options.add_argument("--disable-gpu") chrome_options.add_argument("--disable-webgl") + #ua = UserAgent() + #user_agent = ua.random # NOTE sometime return wrong user agent, investigate + #chrome_options.add_argument(f'user-agent={user_agent}') chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.add_argument("--autoplay-policy=user-gesture-required") chrome_options.add_argument("--mute-audio") chrome_options.add_argument("--disable-notifications") chrome_options.add_argument('--window-size=1080,560') - security_prefs = { - "profile.default_content_setting_values.media_stream": 2, - "profile.default_content_setting_values.geolocation": 2, - "safebrowsing.enabled": True, - } - chrome_options.add_experimental_option("prefs", security_prefs) - chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"]) - chrome_options.add_experimental_option('useAutomationExtension', False) + try: + chrome_options.add_extension("./anticaptcha.crx") + except Exception as e: + print(f"Failed to load AntiCaptcha extension: {str(e)}") chromedriver_path = shutil.which("chromedriver") if not chromedriver_path: @@ -74,10 +78,29 @@ def create_driver(headless=False): raise FileNotFoundError("ChromeDriver not found. Please install it or add it to your PATH.") service = Service(chromedriver_path) + if stealth_mode: + driver = uc.Chrome(service=service, options=chrome_options) + stealth(driver, + languages=["en-US", "en"], + vendor="Google Inc.", + platform="Win32", + webgl_vendor="Intel Inc.", + renderer="Intel Iris OpenGL Engine", + fix_hairline=True, + ) + return driver + security_prefs = { + "profile.default_content_setting_values.media_stream": 2, + "profile.default_content_setting_values.geolocation": 2, + "safebrowsing.enabled": True, + } + chrome_options.add_experimental_option("prefs", security_prefs) + chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"]) + chrome_options.add_experimental_option('useAutomationExtension', False) return webdriver.Chrome(service=service, options=chrome_options) class Browser: - def __init__(self, driver, anticaptcha_install=True): + def __init__(self, driver, anticaptcha_manual_install=False): """Initialize the browser with optional AntiCaptcha installation.""" self.js_scripts_folder = "./sources/web_scripts/" if not __name__ == "__main__" else "./web_scripts/" self.anticaptcha = "https://chrome.google.com/webstore/detail/nopecha-captcha-solver/dknlfmjaanfblgfdfebhijalfmhmjjjo/related" @@ -88,10 +111,11 @@ class Browser: self.logger.info("Browser initialized successfully") except Exception as e: raise Exception(f"Failed to initialize browser: {str(e)}") - if anticaptcha_install: - self.load_anticatpcha() + self.driver.get("https://www.google.com") + if anticaptcha_manual_install: + self.load_anticatpcha_manually() - def load_anticatpcha(self): + def load_anticatpcha_manually(self): print("You might want to install the AntiCaptcha extension for captchas.") self.driver.get(self.anticaptcha) @@ -129,11 +153,11 @@ class Browser: for element in soup(['script', 'style']): element.decompose() - text = soup.get_text() lines = (line.strip() for line in text.splitlines()) chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) text = "\n".join(chunk for chunk in chunks if chunk and self.is_sentence(chunk)) + text = text[:4096] #markdown_text = markdownify.markdownify(text, heading_style="ATX") return "[Start of page]\n" + text + "\n[End of page]" except Exception as e: