From 7728f2ab317ea4b1a9a417b65465e130eb3f337f Mon Sep 17 00:00:00 2001 From: Maksim Kurnosenko <12829428+DX37@users.noreply.github.com> Date: Tue, 18 Jul 2023 00:46:43 +0700 Subject: [PATCH] Update undetected_chromedriver to v3.5.0 (#803) Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com> --- src/undetected_chromedriver/__init__.py | 110 +++++++++++++++++------- src/undetected_chromedriver/patcher.py | 105 ++++++++++++++++++---- 2 files changed, 166 insertions(+), 49 deletions(-) diff --git a/src/undetected_chromedriver/__init__.py b/src/undetected_chromedriver/__init__.py index 29ef76a..5150cba 100644 --- a/src/undetected_chromedriver/__init__.py +++ b/src/undetected_chromedriver/__init__.py @@ -17,7 +17,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam) from __future__ import annotations -__version__ = "3.4.6" +__version__ = "3.5.0" import json import logging @@ -33,7 +33,7 @@ from weakref import finalize import selenium.webdriver.chrome.service import selenium.webdriver.chrome.webdriver from selenium.webdriver.common.by import By -import selenium.webdriver.common.service +import selenium.webdriver.chromium.service import selenium.webdriver.remote.command import selenium.webdriver.remote.webdriver @@ -109,11 +109,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): browser_executable_path=None, port=0, enable_cdp_events=False, - service_args=None, - service_creationflags=None, + # service_args=None, + # service_creationflags=None, desired_capabilities=None, advanced_elements=False, - service_log_path=None, + # service_log_path=None, keep_alive=True, log_level=0, headless=False, @@ -122,8 +122,9 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): suppress_welcome=True, use_subprocess=False, debug=False, - no_sandbox=True, + no_sandbox=True, windows_headless=False, + user_multi_procs: bool = False, **kw, ): """ @@ -235,6 +236,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar this option has a default of True since many people seem to run this as root (....) , and chrome does not start when running as root without using --no-sandbox flag. + + user_multi_procs: + set to true when you are using multithreads/multiprocessing + ensures not all processes are trying to modify a binary which is in use by another. + for this to work. YOU MUST HAVE AT LEAST 1 UNDETECTED_CHROMEDRIVER BINARY IN YOUR ROAMING DATA FOLDER. + this requirement can be easily satisfied, by just running this program "normal" and close/kill it. + + """ finalize(self, self._ensure_close, self) @@ -243,8 +252,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): executable_path=driver_executable_path, force=patcher_force_close, version_main=version_main, + user_multi_procs=user_multi_procs, ) + # self.patcher.auto(user_multiprocess = user_multi_num_procs) self.patcher.auto() + # self.patcher = patcher if not options: options = ChromeOptions() @@ -372,10 +384,16 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): options.arguments.extend(["--no-sandbox", "--test-type"]) if headless or options.headless: - v_main = int(self.patcher.version_main) if self.patcher.version_main else 108 - if v_main < 108: - options.add_argument("--headless=chrome") - elif v_main >= 108: + #workaround until a better checking is found + try: + v_main = int(self.patcher.version_main) if self.patcher.version_main else 108 + if v_main < 108: + options.add_argument("--headless=chrome") + elif v_main >= 108: + options.add_argument("--headless=new") + except: + logger.warning("could not detect version_main." + "therefore, we are assuming it is chrome 108 or higher") options.add_argument("--headless=new") options.add_argument("--window-size=1920,1080") @@ -419,40 +437,29 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): self.browser_pid = start_detached( options.binary_location, *options.arguments ) - else: - startupinfo = subprocess.STARTUPINFO() - if os.name == 'nt' and windows_headless: + else: + startupinfo = subprocess.STARTUPINFO() + if os.name == 'nt' and windows_headless: startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW browser = subprocess.Popen( [options.binary_location, *options.arguments], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - close_fds=IS_POSIX, + close_fds=IS_POSIX, startupinfo=startupinfo ) self.browser_pid = browser.pid - if service_creationflags: - service = selenium.webdriver.common.service.Service( - self.patcher.executable_path, port, service_args, service_log_path - ) - for attr_name in ("creationflags", "creation_flags"): - if hasattr(service, attr_name): - setattr(service, attr_name, service_creationflags) - break - else: - service = None + + service = selenium.webdriver.chromium.service.ChromiumService( + self.patcher.executable_path + ) super(Chrome, self).__init__( - executable_path=self.patcher.executable_path, - port=port, + service=service, options=options, - service_args=service_args, - desired_capabilities=desired_capabilities, - service_log_path=service_log_path, keep_alive=keep_alive, - service=service, # needed or the service will be re-created ) self.reactor = None @@ -708,13 +715,48 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): if not capabilities: capabilities = self.options.to_capabilities() super(selenium.webdriver.chrome.webdriver.WebDriver, self).start_session( - capabilities, browser_profile + capabilities ) # super(Chrome, self).start_session(capabilities, browser_profile) + def find_elements_recursive(self, by, value): + """ + find elements in all frames + this is a generator function, which is needed + since if it would return a list of elements, they + will be stale on arrival. + using generator, when the element is returned we are in the correct frame + to use it directly + Args: + by: By + value: str + Returns: Generator[webelement.WebElement] + """ + def search_frame(f=None): + if not f: + # ensure we are on main content frame + self.switch_to.default_content() + else: + self.switch_to.frame(f) + for elem in self.find_elements(by, value): + yield elem + # switch back to main content, otherwise we will get StaleElementReferenceException + self.switch_to.default_content() + + # search root frame + for elem in search_frame(): + yield elem + # get iframes + frames = self.find_elements('css selector', 'iframe') + + # search per frame + for f in frames: + for elem in search_frame(f): + yield elem + def quit(self): try: - self.service.process.kill() + self.service.process.kill() self.service.process.wait(5) logger.debug("webdriver process ended") except (AttributeError, RuntimeError, OSError): @@ -728,7 +770,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): os.kill(self.browser_pid, 15) logger.debug("gracefully closed browser") except Exception as e: # noqa - logger.debug(e, exc_info=True) + pass # Force kill Chrome process in Windows # https://github.com/FlareSolverr/FlareSolverr/issues/772 if os.name == 'nt': @@ -856,5 +898,7 @@ def find_chrome_executable(): ): candidates.add(os.sep.join((item, subitem, "chrome.exe"))) for candidate in candidates: + logger.debug('checking if %s exists and is executable' % candidate) if os.path.exists(candidate) and os.access(candidate, os.X_OK): + logger.debug('found! using %s' % candidate) return os.path.normpath(candidate) diff --git a/src/undetected_chromedriver/patcher.py b/src/undetected_chromedriver/patcher.py index 24da802..d083dc3 100644 --- a/src/undetected_chromedriver/patcher.py +++ b/src/undetected_chromedriver/patcher.py @@ -5,15 +5,17 @@ from distutils.version import LooseVersion import io import logging import os +import pathlib import random import re +import shutil import string import sys import time from urllib.request import urlopen from urllib.request import urlretrieve import zipfile - +from multiprocessing import Lock logger = logging.getLogger(__name__) @@ -21,6 +23,7 @@ IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2")) class Patcher(object): + lock = Lock() url_repo = "https://chromedriver.storage.googleapis.com" zip_name = "chromedriver_%s.zip" exe_name = "chromedriver%s" @@ -48,7 +51,13 @@ class Patcher(object): d = "~/.undetected_chromedriver" data_path = os.path.abspath(os.path.expanduser(d)) - def __init__(self, executable_path=None, force=False, version_main: int = 0): + def __init__( + self, + executable_path=None, + force=False, + version_main: int = 0, + user_multi_procs=False, + ): """ Args: executable_path: None = automatic @@ -61,6 +70,7 @@ class Patcher(object): self.force = force self._custom_exe_path = False prefix = "undetected" + self.user_multi_procs = user_multi_procs if not os.path.exists(self.data_path): os.makedirs(self.data_path, exist_ok=True) @@ -78,17 +88,41 @@ class Patcher(object): self.zip_path = os.path.join(self.data_path, prefix) if not executable_path: - self.executable_path = os.path.abspath( - os.path.join(".", self.executable_path) - ) + if not self.user_multi_procs: + self.executable_path = os.path.abspath( + os.path.join(".", self.executable_path) + ) if executable_path: self._custom_exe_path = True self.executable_path = executable_path + self.version_main = version_main self.version_full = None - def auto(self, executable_path=None, force=False, version_main=None): + def auto(self, executable_path=None, force=False, version_main=None, _=None): + """ + + Args: + executable_path: + force: + version_main: + + Returns: + + """ + # if self.user_multi_procs and \ + # self.user_multi_procs != -1: + # # -1 being a skip value used later in this block + # + p = pathlib.Path(self.data_path) + with Lock(): + files = list(p.rglob("*chromedriver*?")) + for file in files: + if self.is_binary_patched(file): + self.executable_path = str(file) + return True + if executable_path: self.executable_path = executable_path self._custom_exe_path = True @@ -127,6 +161,49 @@ class Patcher(object): self.unzip_package(self.fetch_package()) return self.patch() + def driver_binary_in_use(self, path: str = None) -> bool: + """ + naive test to check if a found chromedriver binary is + currently in use + + Args: + path: a string or PathLike object to the binary to check. + if not specified, we check use this object's executable_path + """ + if not path: + path = self.executable_path + p = pathlib.Path(path) + + if not p.exists(): + raise OSError("file does not exist: %s" % p) + try: + with open(p, mode="a+b") as fs: + exc = [] + try: + + fs.seek(0, 0) + except PermissionError as e: + exc.append(e) # since some systems apprently allow seeking + # we conduct another test + try: + fs.readline() + except PermissionError as e: + exc.append(e) + + if exc: + + return True + return False + # ok safe to assume this is in use + except Exception as e: + # logger.exception("whoops ", e) + pass + + def cleanup_unused_files(self): + p = pathlib.Path(self.data_path) + items = list(p.glob("*undetected*")) + print(items) + def patch(self): self.patch_exe() return self.is_binary_patched() @@ -255,21 +332,17 @@ class Patcher(object): else: timeout = 3 # stop trying after this many seconds t = time.monotonic() - while True: - now = time.monotonic() - if now - t > timeout: - # we don't want to wait until the end of time - logger.debug( - "could not unlink %s in time (%d seconds)" - % (self.executable_path, timeout) - ) - break + now = lambda: time.monotonic() + while now() - t > timeout: + # we don't want to wait until the end of time try: + if self.user_multi_procs: + break os.unlink(self.executable_path) logger.debug("successfully unlinked %s" % self.executable_path) break except (OSError, RuntimeError, PermissionError): - time.sleep(0.1) + time.sleep(0.01) continue except FileNotFoundError: break