Update undetected_chromedriver to v3.5.0 (#803)

Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com>
This commit is contained in:
Maksim Kurnosenko 2023-07-18 00:46:43 +07:00 committed by GitHub
parent c920bea4ca
commit 7728f2ab31
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 166 additions and 49 deletions

View File

@ -17,7 +17,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
from __future__ import annotations
__version__ = "3.4.6"
__version__ = "3.5.0"
import json
import logging
@ -33,7 +33,7 @@ from weakref import finalize
import selenium.webdriver.chrome.service
import selenium.webdriver.chrome.webdriver
from selenium.webdriver.common.by import By
import selenium.webdriver.common.service
import selenium.webdriver.chromium.service
import selenium.webdriver.remote.command
import selenium.webdriver.remote.webdriver
@ -109,11 +109,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
browser_executable_path=None,
port=0,
enable_cdp_events=False,
service_args=None,
service_creationflags=None,
# service_args=None,
# service_creationflags=None,
desired_capabilities=None,
advanced_elements=False,
service_log_path=None,
# service_log_path=None,
keep_alive=True,
log_level=0,
headless=False,
@ -124,6 +124,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
debug=False,
no_sandbox=True,
windows_headless=False,
user_multi_procs: bool = False,
**kw,
):
"""
@ -235,6 +236,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar
this option has a default of True since many people seem to run this as root (....) , and chrome does not start
when running as root without using --no-sandbox flag.
user_multi_procs:
set to true when you are using multithreads/multiprocessing
ensures not all processes are trying to modify a binary which is in use by another.
for this to work. YOU MUST HAVE AT LEAST 1 UNDETECTED_CHROMEDRIVER BINARY IN YOUR ROAMING DATA FOLDER.
this requirement can be easily satisfied, by just running this program "normal" and close/kill it.
"""
finalize(self, self._ensure_close, self)
@ -243,8 +252,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
executable_path=driver_executable_path,
force=patcher_force_close,
version_main=version_main,
user_multi_procs=user_multi_procs,
)
# self.patcher.auto(user_multiprocess = user_multi_num_procs)
self.patcher.auto()
# self.patcher = patcher
if not options:
options = ChromeOptions()
@ -372,10 +384,16 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
options.arguments.extend(["--no-sandbox", "--test-type"])
if headless or options.headless:
v_main = int(self.patcher.version_main) if self.patcher.version_main else 108
if v_main < 108:
options.add_argument("--headless=chrome")
elif v_main >= 108:
#workaround until a better checking is found
try:
v_main = int(self.patcher.version_main) if self.patcher.version_main else 108
if v_main < 108:
options.add_argument("--headless=chrome")
elif v_main >= 108:
options.add_argument("--headless=new")
except:
logger.warning("could not detect version_main."
"therefore, we are assuming it is chrome 108 or higher")
options.add_argument("--headless=new")
options.add_argument("--window-size=1920,1080")
@ -433,26 +451,15 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
)
self.browser_pid = browser.pid
if service_creationflags:
service = selenium.webdriver.common.service.Service(
self.patcher.executable_path, port, service_args, service_log_path
)
for attr_name in ("creationflags", "creation_flags"):
if hasattr(service, attr_name):
setattr(service, attr_name, service_creationflags)
break
else:
service = None
service = selenium.webdriver.chromium.service.ChromiumService(
self.patcher.executable_path
)
super(Chrome, self).__init__(
executable_path=self.patcher.executable_path,
port=port,
service=service,
options=options,
service_args=service_args,
desired_capabilities=desired_capabilities,
service_log_path=service_log_path,
keep_alive=keep_alive,
service=service, # needed or the service will be re-created
)
self.reactor = None
@ -708,10 +715,45 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
if not capabilities:
capabilities = self.options.to_capabilities()
super(selenium.webdriver.chrome.webdriver.WebDriver, self).start_session(
capabilities, browser_profile
capabilities
)
# super(Chrome, self).start_session(capabilities, browser_profile)
def find_elements_recursive(self, by, value):
"""
find elements in all frames
this is a generator function, which is needed
since if it would return a list of elements, they
will be stale on arrival.
using generator, when the element is returned we are in the correct frame
to use it directly
Args:
by: By
value: str
Returns: Generator[webelement.WebElement]
"""
def search_frame(f=None):
if not f:
# ensure we are on main content frame
self.switch_to.default_content()
else:
self.switch_to.frame(f)
for elem in self.find_elements(by, value):
yield elem
# switch back to main content, otherwise we will get StaleElementReferenceException
self.switch_to.default_content()
# search root frame
for elem in search_frame():
yield elem
# get iframes
frames = self.find_elements('css selector', 'iframe')
# search per frame
for f in frames:
for elem in search_frame(f):
yield elem
def quit(self):
try:
self.service.process.kill()
@ -728,7 +770,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
os.kill(self.browser_pid, 15)
logger.debug("gracefully closed browser")
except Exception as e: # noqa
logger.debug(e, exc_info=True)
pass
# Force kill Chrome process in Windows
# https://github.com/FlareSolverr/FlareSolverr/issues/772
if os.name == 'nt':
@ -856,5 +898,7 @@ def find_chrome_executable():
):
candidates.add(os.sep.join((item, subitem, "chrome.exe")))
for candidate in candidates:
logger.debug('checking if %s exists and is executable' % candidate)
if os.path.exists(candidate) and os.access(candidate, os.X_OK):
logger.debug('found! using %s' % candidate)
return os.path.normpath(candidate)

View File

@ -5,15 +5,17 @@ from distutils.version import LooseVersion
import io
import logging
import os
import pathlib
import random
import re
import shutil
import string
import sys
import time
from urllib.request import urlopen
from urllib.request import urlretrieve
import zipfile
from multiprocessing import Lock
logger = logging.getLogger(__name__)
@ -21,6 +23,7 @@ IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2"))
class Patcher(object):
lock = Lock()
url_repo = "https://chromedriver.storage.googleapis.com"
zip_name = "chromedriver_%s.zip"
exe_name = "chromedriver%s"
@ -48,7 +51,13 @@ class Patcher(object):
d = "~/.undetected_chromedriver"
data_path = os.path.abspath(os.path.expanduser(d))
def __init__(self, executable_path=None, force=False, version_main: int = 0):
def __init__(
self,
executable_path=None,
force=False,
version_main: int = 0,
user_multi_procs=False,
):
"""
Args:
executable_path: None = automatic
@ -61,6 +70,7 @@ class Patcher(object):
self.force = force
self._custom_exe_path = False
prefix = "undetected"
self.user_multi_procs = user_multi_procs
if not os.path.exists(self.data_path):
os.makedirs(self.data_path, exist_ok=True)
@ -78,17 +88,41 @@ class Patcher(object):
self.zip_path = os.path.join(self.data_path, prefix)
if not executable_path:
self.executable_path = os.path.abspath(
os.path.join(".", self.executable_path)
)
if not self.user_multi_procs:
self.executable_path = os.path.abspath(
os.path.join(".", self.executable_path)
)
if executable_path:
self._custom_exe_path = True
self.executable_path = executable_path
self.version_main = version_main
self.version_full = None
def auto(self, executable_path=None, force=False, version_main=None):
def auto(self, executable_path=None, force=False, version_main=None, _=None):
"""
Args:
executable_path:
force:
version_main:
Returns:
"""
# if self.user_multi_procs and \
# self.user_multi_procs != -1:
# # -1 being a skip value used later in this block
#
p = pathlib.Path(self.data_path)
with Lock():
files = list(p.rglob("*chromedriver*?"))
for file in files:
if self.is_binary_patched(file):
self.executable_path = str(file)
return True
if executable_path:
self.executable_path = executable_path
self._custom_exe_path = True
@ -127,6 +161,49 @@ class Patcher(object):
self.unzip_package(self.fetch_package())
return self.patch()
def driver_binary_in_use(self, path: str = None) -> bool:
"""
naive test to check if a found chromedriver binary is
currently in use
Args:
path: a string or PathLike object to the binary to check.
if not specified, we check use this object's executable_path
"""
if not path:
path = self.executable_path
p = pathlib.Path(path)
if not p.exists():
raise OSError("file does not exist: %s" % p)
try:
with open(p, mode="a+b") as fs:
exc = []
try:
fs.seek(0, 0)
except PermissionError as e:
exc.append(e) # since some systems apprently allow seeking
# we conduct another test
try:
fs.readline()
except PermissionError as e:
exc.append(e)
if exc:
return True
return False
# ok safe to assume this is in use
except Exception as e:
# logger.exception("whoops ", e)
pass
def cleanup_unused_files(self):
p = pathlib.Path(self.data_path)
items = list(p.glob("*undetected*"))
print(items)
def patch(self):
self.patch_exe()
return self.is_binary_patched()
@ -255,21 +332,17 @@ class Patcher(object):
else:
timeout = 3 # stop trying after this many seconds
t = time.monotonic()
while True:
now = time.monotonic()
if now - t > timeout:
# we don't want to wait until the end of time
logger.debug(
"could not unlink %s in time (%d seconds)"
% (self.executable_path, timeout)
)
break
now = lambda: time.monotonic()
while now() - t > timeout:
# we don't want to wait until the end of time
try:
if self.user_multi_procs:
break
os.unlink(self.executable_path)
logger.debug("successfully unlinked %s" % self.executable_path)
break
except (OSError, RuntimeError, PermissionError):
time.sleep(0.1)
time.sleep(0.01)
continue
except FileNotFoundError:
break