diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b5149c..8535429 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,58 @@ # Changelog -## v3.3.5 (2023/09/15) +## v3.3.16 (2024/02/28) + +* Fix of the subprocess.STARTUPINFO() call. Thanks @ceconelo +* Add FreeBSD support. Thanks @Asthowen +* Use headless configuration properly. Thanks @hashworks + +## v3.3.15 (2024/02/20) + +* Fix looping challenges + +## v3.3.14-hotfix2 (2024/02/17) + +* Hotfix 2 - bad Chromium build, instances failed to terminate + +## v3.3.14-hotfix (2024/02/17) + +* Hotfix for Linux build - some Chrome files no longer exist + +## v3.3.14 (2024/02/17) + +* Update Chrome downloads. Thanks @opemvbs + +## v3.3.13 (2024/01/07) + +* Fix too many open files error + +## v3.3.12 (2023/12/15) + +* Fix looping challenges and invalid cookies + +## v3.3.11 (2023/12/11) + +* Update UC 3.5.4 & Selenium 4.15.2. Thanks @txtsd + +## v3.3.10 (2023/11/14) + +* Add LANG ENV - resolves issues with YGGtorrent + +## v3.3.9 (2023/11/13) + +* Fix for Docker build, capture TypeError + +## v3.3.8 (2023/11/13) + +* Fix headless=true for Chrome 117+. Thanks @NabiKAZ +* Support running Chrome 119 from source. Thanks @koleg and @Chris7X +* Fix "OSError: [WinError 6] The handle is invalid" on exit. Thanks @enesgorkemgenc + +## v3.3.7 (2023/11/05) + +* Bump to rebuild. Thanks @JoachimDorchies + +## v3.3.6 (2023/09/15) * Update checkbox selector, again diff --git a/Dockerfile b/Dockerfile index 5c0b670..fde140c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -62,17 +62,17 @@ ENTRYPOINT ["/usr/bin/dumb-init", "--"] CMD ["/usr/local/bin/python", "-u", "/app/flaresolverr.py"] # Local build -# docker build -t ngosang/flaresolverr:3.3.6 . -# docker run -p 8191:8191 ngosang/flaresolverr:3.3.6 +# docker build -t ngosang/flaresolverr:3.3.16 . +# docker run -p 8191:8191 ngosang/flaresolverr:3.3.16 # Multi-arch build # docker run --rm --privileged multiarch/qemu-user-static --reset -p yes # docker buildx create --use -# docker buildx build -t ngosang/flaresolverr:3.3.6 --platform linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8 . +# docker buildx build -t ngosang/flaresolverr:3.3.16 --platform linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8 . # add --push to publish in DockerHub # Test multi-arch build # docker run --rm --privileged multiarch/qemu-user-static --reset -p yes # docker buildx create --use -# docker buildx build -t ngosang/flaresolverr:3.3.6 --platform linux/arm/v7 --load . -# docker run -p 8191:8191 --platform linux/arm/v7 ngosang/flaresolverr:3.3.6 +# docker buildx build -t ngosang/flaresolverr:3.3.16 --platform linux/arm/v7 --load . +# docker run -p 8191:8191 --platform linux/arm/v7 ngosang/flaresolverr:3.3.16 diff --git a/README.md b/README.md index ccb3c2f..b7fba4b 100644 --- a/README.md +++ b/README.md @@ -83,23 +83,56 @@ This is the recommended way for Windows users. * Run `pip install -r requirements.txt` command to install FlareSolverr dependencies. * Run `python src/flaresolverr.py` command to start FlareSolverr. +### From source code (FreeBSD/TrueNAS CORE) + +* Run `pkg install chromium python39 py39-pip xorg-vfbserver` command to install the required dependencies. +* Clone this repository and open a shell in that path. +* Run `python3.9 -m pip install -r requirements.txt` command to install FlareSolverr dependencies. +* Run `python3.9 src/flaresolverr.py` command to start FlareSolverr. + ### Systemd service We provide an example Systemd unit file `flaresolverr.service` as reference. You have to modify the file to suit your needs: paths, user and environment variables. ## Usage -Example request: +Example Bash request: ```bash curl -L -X POST 'http://localhost:8191/v1' \ -H 'Content-Type: application/json' \ --data-raw '{ "cmd": "request.get", - "url":"http://www.google.com/", + "url": "http://www.google.com/", "maxTimeout": 60000 }' ``` +Example Python request: +```py +import requests + +url = "http://localhost:8191/v1" +headers = {"Content-Type": "application/json"} +data = { + "cmd": "request.get", + "url": "http://www.google.com/", + "maxTimeout": 60000 +} +response = requests.post(url, headers=headers, json=data) +print(response.text) +``` + +Example PowerShell request: +```ps1 +$body = @{ + cmd = "request.get" + url = "http://www.google.com/" + maxTimeout = 60000 +} | ConvertTo-Json + +irm -UseBasicParsing 'http://localhost:8191/v1' -Headers @{"Content-Type"="application/json"} -Method Post -Body $body +``` + ### Commands #### + `sessions.create` @@ -239,6 +272,7 @@ Also you can set xpathWaitTimeout parameter to control how much browser will wai | LOG_HTML | false | Only for debugging. If `true` all HTML that passes through the proxy will be logged to the console in `debug` level. | | CAPTCHA_SOLVER | none | Captcha solving method. It is used when a captcha is encountered. See the Captcha Solvers section. | | TZ | UTC | Timezone used in the logs and the web browser. Example: `TZ=Europe/London`. | +| LANG | none | Language used in the web browser. Example: `LANG=en_GB`. | | HEADLESS | true | Only for debugging. To run the web browser in headless mode or visible. | | BROWSER_TIMEOUT | 40000 | If you are experiencing errors/timeouts because your system is slow, you can try to increase this value. Remember to increase the `maxTimeout` parameter too. | | TEST_URL | https://www.google.com | FlareSolverr makes a request on start to make sure the web browser is working. You can change that URL if it is blocked in your country. | @@ -249,8 +283,8 @@ Also you can set xpathWaitTimeout parameter to control how much browser will wai Environment variables are set differently depending on the operating system. Some examples: * Docker: Take a look at the Docker section in this document. Environment variables can be set in the `docker-compose.yml` file or in the Docker CLI command. -* Linux: Run `export LOG_LEVEL=debug` and then start FlareSolverr in the same shell. -* Windows: Open `cmd.exe`, run `set LOG_LEVEL=debug` and then start FlareSolverr in the same shell. +* Linux: Run `export LOG_LEVEL=debug` and then run `flaresolverr` in the same shell. +* Windows: Open `cmd.exe`, run `set LOG_LEVEL=debug` and then run `flaresolverr.exe` in the same shell. ## Prometheus exporter diff --git a/package.json b/package.json index bac0518..7d08a19 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "flaresolverr", - "version": "3.3.6", + "version": "3.3.16", "description": "Proxy server to bypass Cloudflare protection", "author": "Diego Heras (ngosang / ngosang@hotmail.es)", "license": "MIT" diff --git a/requirements.txt b/requirements.txt index af8845a..fc002d1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,13 @@ bottle==0.12.25 waitress==2.1.2 -selenium==4.11.2 +selenium==4.15.2 func-timeout==4.3.5 prometheus-client==0.17.1 # required by undetected_chromedriver requests==2.31.0 certifi==2023.7.22 websockets==11.0.3 -# only required for linux -xvfbwrapper==0.2.9 +# only required for linux and macos +xvfbwrapper==0.2.9; platform_system != "Windows" # only required for windows -pefile==2023.2.7 +pefile==2023.2.7; platform_system == "Windows" diff --git a/src/build_package.py b/src/build_package.py index db85a30..5ff4956 100644 --- a/src/build_package.py +++ b/src/build_package.py @@ -25,7 +25,7 @@ def clean_files(): def download_chromium(): # https://commondatastorage.googleapis.com/chromium-browser-snapshots/index.html?prefix=Linux_x64/ - revision = "1140001" if os.name == 'nt' else '1140000' + revision = "1260008" if os.name == 'nt' else '1260015' arch = 'Win_x64' if os.name == 'nt' else 'Linux_x64' dl_file = 'chrome-win' if os.name == 'nt' else 'chrome-linux' dl_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist_chrome') @@ -59,8 +59,7 @@ def download_chromium(): # Give executable permissions for *nix # file * | grep executable | cut -d: -f1 print("Giving executable permissions...") - execs = ['chrome', 'chrome_crashpad_handler', 'chrome_sandbox', 'chrome-wrapper', 'nacl_helper', - 'nacl_helper_bootstrap', 'nacl_irt_x86_64.nexe', 'xdg-mime', 'xdg-settings'] + execs = ['chrome', 'chrome_crashpad_handler', 'chrome_sandbox', 'chrome-wrapper', 'xdg-mime', 'xdg-settings'] for exec_file in execs: exec_path = os.path.join(chrome_path, exec_file) os.chmod(exec_path, 0o755) diff --git a/src/flaresolverr_service.py b/src/flaresolverr_service.py index e57fbda..4dced6a 100644 --- a/src/flaresolverr_service.py +++ b/src/flaresolverr_service.py @@ -289,18 +289,35 @@ def click_verify(driver: WebDriver): time.sleep(2) +def get_correct_window(driver: WebDriver) -> WebDriver: + if len(driver.window_handles) > 1: + for window_handle in driver.window_handles: + driver.switch_to.window(window_handle) + current_url = driver.current_url + if not current_url.startswith("devtools://devtools"): + return driver + return driver + + +def access_page(driver: WebDriver, url: str) -> None: + driver.get(url) + driver.start_session() + driver.start_session() # required to bypass Cloudflare + + def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> ChallengeResolutionT: res = ChallengeResolutionT({}) res.status = STATUS_OK res.message = "" + # navigate to the page logging.debug(f'Navigating to... {req.url}') if method == 'POST': _post_request(req, driver) else: - driver.get(req.url) - driver.start_session() # required to bypass Cloudflare + access_page(driver, req.url) + driver = get_correct_window(driver) # set cookies if required if req.cookies is not None and len(req.cookies) > 0: @@ -312,8 +329,8 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge if method == 'POST': _post_request(req, driver) else: - driver.get(req.url) - driver.start_session() # required to bypass Cloudflare + access_page(driver, req.url) + driver = get_correct_window(driver) # wait for the page if utils.get_config_log_html(): @@ -439,4 +456,5 @@ def _post_request(req: V1RequestBase, driver: WebDriver): """ driver.get("data:text/html;charset=utf-8," + html_content) + driver.start_session() driver.start_session() # required to bypass Cloudflare diff --git a/src/undetected_chromedriver/__init__.py b/src/undetected_chromedriver/__init__.py index 4d2e85a..1382758 100644 --- a/src/undetected_chromedriver/__init__.py +++ b/src/undetected_chromedriver/__init__.py @@ -17,7 +17,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam) from __future__ import annotations -__version__ = "3.5.3" +__version__ = "3.5.5" import json import logging @@ -396,7 +396,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): if no_sandbox: options.arguments.extend(["--no-sandbox", "--test-type"]) - if headless or options.headless: + if headless or getattr(options, 'headless', None): #workaround until a better checking is found try: v_main = int(self.patcher.version_main) if self.patcher.version_main else 108 @@ -451,8 +451,10 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): options.binary_location, *options.arguments ) else: - startupinfo = subprocess.STARTUPINFO() + startupinfo = None if os.name == 'nt' and windows_headless: + # STARTUPINFO() is Windows only + startupinfo = subprocess.STARTUPINFO() startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW browser = subprocess.Popen( [options.binary_location, *options.arguments], @@ -464,11 +466,9 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): ) self.browser_pid = browser.pid - # Fix for Chrome 115 - # https://github.com/seleniumbase/SeleniumBase/pull/1967 + service = selenium.webdriver.chromium.service.ChromiumService( - executable_path=self.patcher.executable_path, - service_args=["--disable-build-check"] + self.patcher.executable_path ) super(Chrome, self).__init__( @@ -493,7 +493,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): else: self._web_element_cls = WebElement - if options.headless: + if headless or getattr(options, 'headless', None): self._configure_headless() def _configure_headless(self): @@ -813,7 +813,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): else: logger.debug("successfully removed %s" % self.user_data_dir) break - time.sleep(0.1) + + try: + time.sleep(0.1) + except OSError: + pass # dereference patcher, so patcher can start cleaning up as well. # this must come last, otherwise it will throw 'in use' errors diff --git a/src/undetected_chromedriver/dprocess.py b/src/undetected_chromedriver/dprocess.py index 0fb93a8..6d053fa 100644 --- a/src/undetected_chromedriver/dprocess.py +++ b/src/undetected_chromedriver/dprocess.py @@ -41,6 +41,7 @@ def start_detached(executable, *args): # close pipes writer.close() reader.close() + process.close() return pid diff --git a/src/undetected_chromedriver/patcher.py b/src/undetected_chromedriver/patcher.py index 5776d19..a6eeab2 100644 --- a/src/undetected_chromedriver/patcher.py +++ b/src/undetected_chromedriver/patcher.py @@ -21,7 +21,7 @@ from multiprocessing import Lock logger = logging.getLogger(__name__) -IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2")) +IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2", "freebsd")) class Patcher(object): @@ -62,7 +62,17 @@ class Patcher(object): prefix = "undetected" self.user_multi_procs = user_multi_procs - self.is_old_chromedriver = version_main and version_main <= 114 + try: + # Try to convert version_main into an integer + version_main_int = int(version_main) + # check if version_main_int is less than or equal to e.g 114 + self.is_old_chromedriver = version_main and version_main_int <= 114 + except (ValueError,TypeError): + # If the conversion fails, print an error message + print("version_main cannot be converted to an integer") + # Set self.is_old_chromedriver to False if the conversion fails + self.is_old_chromedriver = False + # Needs to be called before self.exe_name is accessed self._set_platform_name() @@ -70,9 +80,14 @@ class Patcher(object): os.makedirs(self.data_path, exist_ok=True) if not executable_path: - self.executable_path = os.path.join( - self.data_path, "_".join([prefix, self.exe_name]) - ) + if sys.platform.startswith("freebsd"): + self.executable_path = os.path.join( + self.data_path, self.exe_name + ) + else: + self.executable_path = os.path.join( + self.data_path, "_".join([prefix, self.exe_name]) + ) if not IS_POSIX: if executable_path: @@ -117,6 +132,9 @@ class Patcher(object): else: self.platform_name = "mac-x64" self.exe_name %= "" + if self.platform.startswith("freebsd"): + self.platform_name = "freebsd" + self.exe_name %= "" def auto(self, executable_path=None, force=False, version_main=None, _=None): """ @@ -156,26 +174,56 @@ class Patcher(object): if force is True: self.force = force - try: - os.unlink(self.executable_path) - except PermissionError: - if self.force: - self.force_kill_instances(self.executable_path) - return self.auto(force=not self.force) - try: - if self.is_binary_patched(): - # assumes already running AND patched - return True - except PermissionError: - pass - # return False - except FileNotFoundError: - pass - release = self.fetch_release_number() - self.version_main = release.version[0] - self.version_full = release - self.unzip_package(self.fetch_package()) + if self.platform_name == "freebsd": + chromedriver_path = shutil.which("chromedriver") + + if not os.path.isfile(chromedriver_path) or not os.access(chromedriver_path, os.X_OK): + logging.error("Chromedriver not installed!") + return + + version_path = os.path.join(os.path.dirname(self.executable_path), "version.txt") + + process = os.popen(f'"{chromedriver_path}" --version') + chromedriver_version = process.read().split(' ')[1].split(' ')[0] + process.close() + + current_version = None + if os.path.isfile(version_path) or os.access(version_path, os.X_OK): + with open(version_path, 'r') as f: + current_version = f.read() + + if current_version != chromedriver_version: + logging.info("Copying chromedriver executable...") + shutil.copy(chromedriver_path, self.executable_path) + os.chmod(self.executable_path, 0o755) + + with open(version_path, 'w') as f: + f.write(chromedriver_version) + + logging.info("Chromedriver executable copied!") + else: + try: + os.unlink(self.executable_path) + except PermissionError: + if self.force: + self.force_kill_instances(self.executable_path) + return self.auto(force=not self.force) + try: + if self.is_binary_patched(): + # assumes already running AND patched + return True + except PermissionError: + pass + # return False + except FileNotFoundError: + pass + + release = self.fetch_release_number() + self.version_main = release.version[0] + self.version_full = release + self.unzip_package(self.fetch_package()) + return self.patch() def driver_binary_in_use(self, path: str = None) -> bool: @@ -280,7 +328,7 @@ class Patcher(object): download_url = "%s/%s/%s" % (self.url_repo, self.version_full.vstring, zip_name) else: zip_name = zip_name.replace("_", "-", 1) - download_url = "https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/%s/%s/%s" + download_url = "https://storage.googleapis.com/chrome-for-testing-public/%s/%s/%s" download_url %= (self.version_full.vstring, self.platform_name, zip_name) logger.debug("downloading from %s" % download_url) diff --git a/src/utils.py b/src/utils.py index 79cf6f6..9d02d58 100644 --- a/src/utils.py +++ b/src/utils.py @@ -5,6 +5,7 @@ import re import shutil import urllib.parse import tempfile +import sys from selenium.webdriver.chrome.webdriver import WebDriver import undetected_chromedriver as uc @@ -113,7 +114,7 @@ def create_proxy_extension(proxy: dict) -> str: def get_webdriver(proxy: dict = None) -> WebDriver: - global PATCHED_DRIVER_PATH + global PATCHED_DRIVER_PATH, USER_AGENT logging.debug('Launching web browser...') # undetected_chromedriver @@ -136,6 +137,14 @@ def get_webdriver(proxy: dict = None) -> WebDriver: # https://peter.sh/experiments/chromium-command-line-switches/#use-gl options.add_argument('--use-gl=swiftshader') + language = os.environ.get('LANG', None) + if language is not None: + options.add_argument('--lang=%s' % language) + + # Fix for Chrome 117 | https://github.com/FlareSolverr/FlareSolverr/issues/910 + if USER_AGENT is not None: + options.add_argument('--user-agent=%s' % USER_AGENT) + proxy_extension_dir = None if proxy and all(key in proxy for key in ['url', 'username', 'password']): proxy_extension_dir = create_proxy_extension(proxy) @@ -145,7 +154,7 @@ def get_webdriver(proxy: dict = None) -> WebDriver: logging.debug("Using webdriver proxy: %s", proxy_url) options.add_argument('--proxy-server=%s' % proxy_url) - # note: headless mode is detected (options.headless = True) + # note: headless mode is detected (headless = True) # we launch the browser in head-full mode with the window hidden windows_headless = False if get_config_headless(): @@ -153,6 +162,10 @@ def get_webdriver(proxy: dict = None) -> WebDriver: windows_headless = True else: start_xvfb_display() + # For normal headless mode: + # options.add_argument('--headless') + + options.add_argument("--auto-open-devtools-for-tabs") # if we are inside the Docker container, we avoid downloading the driver driver_exe_path = None @@ -162,10 +175,6 @@ def get_webdriver(proxy: dict = None) -> WebDriver: driver_exe_path = "/app/chromedriver" else: version_main = get_chrome_major_version() - # Fix for Chrome 115 - # https://github.com/seleniumbase/SeleniumBase/pull/1967 - if int(version_main) > 114: - version_main = 114 if PATCHED_DRIVER_PATH is not None: driver_exe_path = PATCHED_DRIVER_PATH @@ -174,9 +183,12 @@ def get_webdriver(proxy: dict = None) -> WebDriver: # downloads and patches the chromedriver # if we don't set driver_executable_path it downloads, patches, and deletes the driver each time - driver = uc.Chrome(options=options, browser_executable_path=browser_executable_path, - driver_executable_path=driver_exe_path, version_main=version_main, - windows_headless=windows_headless, headless=windows_headless) + try: + driver = uc.Chrome(options=options, browser_executable_path=browser_executable_path, + driver_executable_path=driver_exe_path, version_main=version_main, + windows_headless=windows_headless, headless=get_config_headless()) + except Exception as e: + logging.error("Error starting Chrome: %s" % e) # save the patched driver to avoid re-downloads if driver_exe_path is None: @@ -295,6 +307,8 @@ def get_user_agent(driver=None) -> str: if driver is None: driver = get_webdriver() USER_AGENT = driver.execute_script("return navigator.userAgent") + # Fix for Chrome 117 | https://github.com/FlareSolverr/FlareSolverr/issues/910 + USER_AGENT = re.sub('HEADLESS', '', USER_AGENT, flags=re.IGNORECASE) return USER_AGENT except Exception as e: raise Exception("Error getting browser User-Agent. " + str(e))