mirror of
https://github.com/FlareSolverr/FlareSolverr.git
synced 2025-06-08 12:35:30 +00:00
Update undetected_chromedriver to v3.5.0 (#803)
Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com>
This commit is contained in:
parent
c920bea4ca
commit
7728f2ab31
@ -17,7 +17,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
__version__ = "3.4.6"
|
__version__ = "3.5.0"
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
@ -33,7 +33,7 @@ from weakref import finalize
|
|||||||
import selenium.webdriver.chrome.service
|
import selenium.webdriver.chrome.service
|
||||||
import selenium.webdriver.chrome.webdriver
|
import selenium.webdriver.chrome.webdriver
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
import selenium.webdriver.common.service
|
import selenium.webdriver.chromium.service
|
||||||
import selenium.webdriver.remote.command
|
import selenium.webdriver.remote.command
|
||||||
import selenium.webdriver.remote.webdriver
|
import selenium.webdriver.remote.webdriver
|
||||||
|
|
||||||
@ -109,11 +109,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
browser_executable_path=None,
|
browser_executable_path=None,
|
||||||
port=0,
|
port=0,
|
||||||
enable_cdp_events=False,
|
enable_cdp_events=False,
|
||||||
service_args=None,
|
# service_args=None,
|
||||||
service_creationflags=None,
|
# service_creationflags=None,
|
||||||
desired_capabilities=None,
|
desired_capabilities=None,
|
||||||
advanced_elements=False,
|
advanced_elements=False,
|
||||||
service_log_path=None,
|
# service_log_path=None,
|
||||||
keep_alive=True,
|
keep_alive=True,
|
||||||
log_level=0,
|
log_level=0,
|
||||||
headless=False,
|
headless=False,
|
||||||
@ -122,8 +122,9 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
suppress_welcome=True,
|
suppress_welcome=True,
|
||||||
use_subprocess=False,
|
use_subprocess=False,
|
||||||
debug=False,
|
debug=False,
|
||||||
no_sandbox=True,
|
no_sandbox=True,
|
||||||
windows_headless=False,
|
windows_headless=False,
|
||||||
|
user_multi_procs: bool = False,
|
||||||
**kw,
|
**kw,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@ -235,6 +236,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar
|
uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar
|
||||||
this option has a default of True since many people seem to run this as root (....) , and chrome does not start
|
this option has a default of True since many people seem to run this as root (....) , and chrome does not start
|
||||||
when running as root without using --no-sandbox flag.
|
when running as root without using --no-sandbox flag.
|
||||||
|
|
||||||
|
user_multi_procs:
|
||||||
|
set to true when you are using multithreads/multiprocessing
|
||||||
|
ensures not all processes are trying to modify a binary which is in use by another.
|
||||||
|
for this to work. YOU MUST HAVE AT LEAST 1 UNDETECTED_CHROMEDRIVER BINARY IN YOUR ROAMING DATA FOLDER.
|
||||||
|
this requirement can be easily satisfied, by just running this program "normal" and close/kill it.
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
finalize(self, self._ensure_close, self)
|
finalize(self, self._ensure_close, self)
|
||||||
@ -243,8 +252,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
executable_path=driver_executable_path,
|
executable_path=driver_executable_path,
|
||||||
force=patcher_force_close,
|
force=patcher_force_close,
|
||||||
version_main=version_main,
|
version_main=version_main,
|
||||||
|
user_multi_procs=user_multi_procs,
|
||||||
)
|
)
|
||||||
|
# self.patcher.auto(user_multiprocess = user_multi_num_procs)
|
||||||
self.patcher.auto()
|
self.patcher.auto()
|
||||||
|
|
||||||
# self.patcher = patcher
|
# self.patcher = patcher
|
||||||
if not options:
|
if not options:
|
||||||
options = ChromeOptions()
|
options = ChromeOptions()
|
||||||
@ -372,10 +384,16 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
options.arguments.extend(["--no-sandbox", "--test-type"])
|
options.arguments.extend(["--no-sandbox", "--test-type"])
|
||||||
|
|
||||||
if headless or options.headless:
|
if headless or options.headless:
|
||||||
v_main = int(self.patcher.version_main) if self.patcher.version_main else 108
|
#workaround until a better checking is found
|
||||||
if v_main < 108:
|
try:
|
||||||
options.add_argument("--headless=chrome")
|
v_main = int(self.patcher.version_main) if self.patcher.version_main else 108
|
||||||
elif v_main >= 108:
|
if v_main < 108:
|
||||||
|
options.add_argument("--headless=chrome")
|
||||||
|
elif v_main >= 108:
|
||||||
|
options.add_argument("--headless=new")
|
||||||
|
except:
|
||||||
|
logger.warning("could not detect version_main."
|
||||||
|
"therefore, we are assuming it is chrome 108 or higher")
|
||||||
options.add_argument("--headless=new")
|
options.add_argument("--headless=new")
|
||||||
|
|
||||||
options.add_argument("--window-size=1920,1080")
|
options.add_argument("--window-size=1920,1080")
|
||||||
@ -419,40 +437,29 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
self.browser_pid = start_detached(
|
self.browser_pid = start_detached(
|
||||||
options.binary_location, *options.arguments
|
options.binary_location, *options.arguments
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
startupinfo = subprocess.STARTUPINFO()
|
startupinfo = subprocess.STARTUPINFO()
|
||||||
if os.name == 'nt' and windows_headless:
|
if os.name == 'nt' and windows_headless:
|
||||||
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
|
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
|
||||||
browser = subprocess.Popen(
|
browser = subprocess.Popen(
|
||||||
[options.binary_location, *options.arguments],
|
[options.binary_location, *options.arguments],
|
||||||
stdin=subprocess.PIPE,
|
stdin=subprocess.PIPE,
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.PIPE,
|
stderr=subprocess.PIPE,
|
||||||
close_fds=IS_POSIX,
|
close_fds=IS_POSIX,
|
||||||
startupinfo=startupinfo
|
startupinfo=startupinfo
|
||||||
)
|
)
|
||||||
self.browser_pid = browser.pid
|
self.browser_pid = browser.pid
|
||||||
|
|
||||||
if service_creationflags:
|
|
||||||
service = selenium.webdriver.common.service.Service(
|
service = selenium.webdriver.chromium.service.ChromiumService(
|
||||||
self.patcher.executable_path, port, service_args, service_log_path
|
self.patcher.executable_path
|
||||||
)
|
)
|
||||||
for attr_name in ("creationflags", "creation_flags"):
|
|
||||||
if hasattr(service, attr_name):
|
|
||||||
setattr(service, attr_name, service_creationflags)
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
service = None
|
|
||||||
|
|
||||||
super(Chrome, self).__init__(
|
super(Chrome, self).__init__(
|
||||||
executable_path=self.patcher.executable_path,
|
service=service,
|
||||||
port=port,
|
|
||||||
options=options,
|
options=options,
|
||||||
service_args=service_args,
|
|
||||||
desired_capabilities=desired_capabilities,
|
|
||||||
service_log_path=service_log_path,
|
|
||||||
keep_alive=keep_alive,
|
keep_alive=keep_alive,
|
||||||
service=service, # needed or the service will be re-created
|
|
||||||
)
|
)
|
||||||
|
|
||||||
self.reactor = None
|
self.reactor = None
|
||||||
@ -708,13 +715,48 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
if not capabilities:
|
if not capabilities:
|
||||||
capabilities = self.options.to_capabilities()
|
capabilities = self.options.to_capabilities()
|
||||||
super(selenium.webdriver.chrome.webdriver.WebDriver, self).start_session(
|
super(selenium.webdriver.chrome.webdriver.WebDriver, self).start_session(
|
||||||
capabilities, browser_profile
|
capabilities
|
||||||
)
|
)
|
||||||
# super(Chrome, self).start_session(capabilities, browser_profile)
|
# super(Chrome, self).start_session(capabilities, browser_profile)
|
||||||
|
|
||||||
|
def find_elements_recursive(self, by, value):
|
||||||
|
"""
|
||||||
|
find elements in all frames
|
||||||
|
this is a generator function, which is needed
|
||||||
|
since if it would return a list of elements, they
|
||||||
|
will be stale on arrival.
|
||||||
|
using generator, when the element is returned we are in the correct frame
|
||||||
|
to use it directly
|
||||||
|
Args:
|
||||||
|
by: By
|
||||||
|
value: str
|
||||||
|
Returns: Generator[webelement.WebElement]
|
||||||
|
"""
|
||||||
|
def search_frame(f=None):
|
||||||
|
if not f:
|
||||||
|
# ensure we are on main content frame
|
||||||
|
self.switch_to.default_content()
|
||||||
|
else:
|
||||||
|
self.switch_to.frame(f)
|
||||||
|
for elem in self.find_elements(by, value):
|
||||||
|
yield elem
|
||||||
|
# switch back to main content, otherwise we will get StaleElementReferenceException
|
||||||
|
self.switch_to.default_content()
|
||||||
|
|
||||||
|
# search root frame
|
||||||
|
for elem in search_frame():
|
||||||
|
yield elem
|
||||||
|
# get iframes
|
||||||
|
frames = self.find_elements('css selector', 'iframe')
|
||||||
|
|
||||||
|
# search per frame
|
||||||
|
for f in frames:
|
||||||
|
for elem in search_frame(f):
|
||||||
|
yield elem
|
||||||
|
|
||||||
def quit(self):
|
def quit(self):
|
||||||
try:
|
try:
|
||||||
self.service.process.kill()
|
self.service.process.kill()
|
||||||
self.service.process.wait(5)
|
self.service.process.wait(5)
|
||||||
logger.debug("webdriver process ended")
|
logger.debug("webdriver process ended")
|
||||||
except (AttributeError, RuntimeError, OSError):
|
except (AttributeError, RuntimeError, OSError):
|
||||||
@ -728,7 +770,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
os.kill(self.browser_pid, 15)
|
os.kill(self.browser_pid, 15)
|
||||||
logger.debug("gracefully closed browser")
|
logger.debug("gracefully closed browser")
|
||||||
except Exception as e: # noqa
|
except Exception as e: # noqa
|
||||||
logger.debug(e, exc_info=True)
|
pass
|
||||||
# Force kill Chrome process in Windows
|
# Force kill Chrome process in Windows
|
||||||
# https://github.com/FlareSolverr/FlareSolverr/issues/772
|
# https://github.com/FlareSolverr/FlareSolverr/issues/772
|
||||||
if os.name == 'nt':
|
if os.name == 'nt':
|
||||||
@ -856,5 +898,7 @@ def find_chrome_executable():
|
|||||||
):
|
):
|
||||||
candidates.add(os.sep.join((item, subitem, "chrome.exe")))
|
candidates.add(os.sep.join((item, subitem, "chrome.exe")))
|
||||||
for candidate in candidates:
|
for candidate in candidates:
|
||||||
|
logger.debug('checking if %s exists and is executable' % candidate)
|
||||||
if os.path.exists(candidate) and os.access(candidate, os.X_OK):
|
if os.path.exists(candidate) and os.access(candidate, os.X_OK):
|
||||||
|
logger.debug('found! using %s' % candidate)
|
||||||
return os.path.normpath(candidate)
|
return os.path.normpath(candidate)
|
||||||
|
@ -5,15 +5,17 @@ from distutils.version import LooseVersion
|
|||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import pathlib
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
|
import shutil
|
||||||
import string
|
import string
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
from urllib.request import urlopen
|
from urllib.request import urlopen
|
||||||
from urllib.request import urlretrieve
|
from urllib.request import urlretrieve
|
||||||
import zipfile
|
import zipfile
|
||||||
|
from multiprocessing import Lock
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -21,6 +23,7 @@ IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2"))
|
|||||||
|
|
||||||
|
|
||||||
class Patcher(object):
|
class Patcher(object):
|
||||||
|
lock = Lock()
|
||||||
url_repo = "https://chromedriver.storage.googleapis.com"
|
url_repo = "https://chromedriver.storage.googleapis.com"
|
||||||
zip_name = "chromedriver_%s.zip"
|
zip_name = "chromedriver_%s.zip"
|
||||||
exe_name = "chromedriver%s"
|
exe_name = "chromedriver%s"
|
||||||
@ -48,7 +51,13 @@ class Patcher(object):
|
|||||||
d = "~/.undetected_chromedriver"
|
d = "~/.undetected_chromedriver"
|
||||||
data_path = os.path.abspath(os.path.expanduser(d))
|
data_path = os.path.abspath(os.path.expanduser(d))
|
||||||
|
|
||||||
def __init__(self, executable_path=None, force=False, version_main: int = 0):
|
def __init__(
|
||||||
|
self,
|
||||||
|
executable_path=None,
|
||||||
|
force=False,
|
||||||
|
version_main: int = 0,
|
||||||
|
user_multi_procs=False,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
executable_path: None = automatic
|
executable_path: None = automatic
|
||||||
@ -61,6 +70,7 @@ class Patcher(object):
|
|||||||
self.force = force
|
self.force = force
|
||||||
self._custom_exe_path = False
|
self._custom_exe_path = False
|
||||||
prefix = "undetected"
|
prefix = "undetected"
|
||||||
|
self.user_multi_procs = user_multi_procs
|
||||||
|
|
||||||
if not os.path.exists(self.data_path):
|
if not os.path.exists(self.data_path):
|
||||||
os.makedirs(self.data_path, exist_ok=True)
|
os.makedirs(self.data_path, exist_ok=True)
|
||||||
@ -78,17 +88,41 @@ class Patcher(object):
|
|||||||
self.zip_path = os.path.join(self.data_path, prefix)
|
self.zip_path = os.path.join(self.data_path, prefix)
|
||||||
|
|
||||||
if not executable_path:
|
if not executable_path:
|
||||||
self.executable_path = os.path.abspath(
|
if not self.user_multi_procs:
|
||||||
os.path.join(".", self.executable_path)
|
self.executable_path = os.path.abspath(
|
||||||
)
|
os.path.join(".", self.executable_path)
|
||||||
|
)
|
||||||
|
|
||||||
if executable_path:
|
if executable_path:
|
||||||
self._custom_exe_path = True
|
self._custom_exe_path = True
|
||||||
self.executable_path = executable_path
|
self.executable_path = executable_path
|
||||||
|
|
||||||
self.version_main = version_main
|
self.version_main = version_main
|
||||||
self.version_full = None
|
self.version_full = None
|
||||||
|
|
||||||
def auto(self, executable_path=None, force=False, version_main=None):
|
def auto(self, executable_path=None, force=False, version_main=None, _=None):
|
||||||
|
"""
|
||||||
|
|
||||||
|
Args:
|
||||||
|
executable_path:
|
||||||
|
force:
|
||||||
|
version_main:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
# if self.user_multi_procs and \
|
||||||
|
# self.user_multi_procs != -1:
|
||||||
|
# # -1 being a skip value used later in this block
|
||||||
|
#
|
||||||
|
p = pathlib.Path(self.data_path)
|
||||||
|
with Lock():
|
||||||
|
files = list(p.rglob("*chromedriver*?"))
|
||||||
|
for file in files:
|
||||||
|
if self.is_binary_patched(file):
|
||||||
|
self.executable_path = str(file)
|
||||||
|
return True
|
||||||
|
|
||||||
if executable_path:
|
if executable_path:
|
||||||
self.executable_path = executable_path
|
self.executable_path = executable_path
|
||||||
self._custom_exe_path = True
|
self._custom_exe_path = True
|
||||||
@ -127,6 +161,49 @@ class Patcher(object):
|
|||||||
self.unzip_package(self.fetch_package())
|
self.unzip_package(self.fetch_package())
|
||||||
return self.patch()
|
return self.patch()
|
||||||
|
|
||||||
|
def driver_binary_in_use(self, path: str = None) -> bool:
|
||||||
|
"""
|
||||||
|
naive test to check if a found chromedriver binary is
|
||||||
|
currently in use
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: a string or PathLike object to the binary to check.
|
||||||
|
if not specified, we check use this object's executable_path
|
||||||
|
"""
|
||||||
|
if not path:
|
||||||
|
path = self.executable_path
|
||||||
|
p = pathlib.Path(path)
|
||||||
|
|
||||||
|
if not p.exists():
|
||||||
|
raise OSError("file does not exist: %s" % p)
|
||||||
|
try:
|
||||||
|
with open(p, mode="a+b") as fs:
|
||||||
|
exc = []
|
||||||
|
try:
|
||||||
|
|
||||||
|
fs.seek(0, 0)
|
||||||
|
except PermissionError as e:
|
||||||
|
exc.append(e) # since some systems apprently allow seeking
|
||||||
|
# we conduct another test
|
||||||
|
try:
|
||||||
|
fs.readline()
|
||||||
|
except PermissionError as e:
|
||||||
|
exc.append(e)
|
||||||
|
|
||||||
|
if exc:
|
||||||
|
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
# ok safe to assume this is in use
|
||||||
|
except Exception as e:
|
||||||
|
# logger.exception("whoops ", e)
|
||||||
|
pass
|
||||||
|
|
||||||
|
def cleanup_unused_files(self):
|
||||||
|
p = pathlib.Path(self.data_path)
|
||||||
|
items = list(p.glob("*undetected*"))
|
||||||
|
print(items)
|
||||||
|
|
||||||
def patch(self):
|
def patch(self):
|
||||||
self.patch_exe()
|
self.patch_exe()
|
||||||
return self.is_binary_patched()
|
return self.is_binary_patched()
|
||||||
@ -255,21 +332,17 @@ class Patcher(object):
|
|||||||
else:
|
else:
|
||||||
timeout = 3 # stop trying after this many seconds
|
timeout = 3 # stop trying after this many seconds
|
||||||
t = time.monotonic()
|
t = time.monotonic()
|
||||||
while True:
|
now = lambda: time.monotonic()
|
||||||
now = time.monotonic()
|
while now() - t > timeout:
|
||||||
if now - t > timeout:
|
# we don't want to wait until the end of time
|
||||||
# we don't want to wait until the end of time
|
|
||||||
logger.debug(
|
|
||||||
"could not unlink %s in time (%d seconds)"
|
|
||||||
% (self.executable_path, timeout)
|
|
||||||
)
|
|
||||||
break
|
|
||||||
try:
|
try:
|
||||||
|
if self.user_multi_procs:
|
||||||
|
break
|
||||||
os.unlink(self.executable_path)
|
os.unlink(self.executable_path)
|
||||||
logger.debug("successfully unlinked %s" % self.executable_path)
|
logger.debug("successfully unlinked %s" % self.executable_path)
|
||||||
break
|
break
|
||||||
except (OSError, RuntimeError, PermissionError):
|
except (OSError, RuntimeError, PermissionError):
|
||||||
time.sleep(0.1)
|
time.sleep(0.01)
|
||||||
continue
|
continue
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
break
|
break
|
||||||
|
Loading…
x
Reference in New Issue
Block a user