Update undetected-chromedriver

This commit is contained in:
ngosang 2023-01-06 17:18:54 +01:00
parent 7d84f1b663
commit e163019f28
10 changed files with 1925 additions and 1729 deletions

View File

@ -4,6 +4,7 @@
* Kill Chromium processes properly to avoid defunct/zombie processes
* Include procps (ps), curl and vim packages in the Docker image
* Update undetected-chromedriver
## v3.0.0 (2023/01/04)

View File

@ -1,9 +1,9 @@
bottle==0.12.23
waitress==2.1.2
selenium==4.4.3
selenium==4.7.2
func-timeout==4.3.5
# required by undetected_chromedriver
requests==2.28.1
websockets==10.3
websockets==10.4
# only required for linux
xvfbwrapper==0.2.9

View File

@ -1,7 +1,4 @@
#!/usr/bin/env python3
from __future__ import annotations
import subprocess
"""
@ -17,33 +14,38 @@ Y88b. 888 888 888 Y88..88P 888 888 888 Y8b. Y88b 888 888 888 Y
by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
"""
from __future__ import annotations
__version__ = "3.1.5r4"
__version__ = "3.2.1"
import json
import logging
import os
import re
import shutil
import subprocess
import sys
import tempfile
import time
import inspect
import threading
from weakref import finalize
import selenium.webdriver.chrome.service
import selenium.webdriver.chrome.webdriver
from selenium.webdriver.common.by import By
import selenium.webdriver.common.service
import selenium.webdriver.remote.command
import selenium.webdriver.remote.webdriver
from .cdp import CDP
from .dprocess import start_detached
from .options import ChromeOptions
from .patcher import IS_POSIX
from .patcher import Patcher
from .reactor import Reactor
from .dprocess import start_detached
from .webelement import UCWebElement
from .webelement import WebElement
__all__ = (
"Chrome",
@ -108,6 +110,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
port=0,
enable_cdp_events=False,
service_args=None,
service_creationflags=None,
desired_capabilities=None,
advanced_elements=False,
service_log_path=None,
@ -117,10 +120,10 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
version_main=None,
patcher_force_close=False,
suppress_welcome=True,
use_subprocess=False,
use_subprocess=True,
debug=False,
windows_headless=False,
**kw
no_sandbox=True,
**kw,
):
"""
Creates a new instance of the chrome driver.
@ -147,7 +150,9 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
If not specified, make sure the executable's folder is in $PATH
port: int, optional, default: 0
port you would like the service to run, if left as 0, a free port will be found.
port to be used by the chromedriver executable, this is NOT the debugger port.
leave it at 0 unless you know what you are doing.
the default value of 0 automatically picks an available port.
enable_cdp_events: bool, default: False
:: currently for chrome only
@ -207,11 +212,12 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
now, in case you are nag-fetishist, or a diagnostics data feeder to google, you can set this to False.
Note: if you don't handle the nag screen in time, the browser loses it's connection and throws an Exception.
use_subprocess: bool, optional , default: False,
use_subprocess: bool, optional , default: True,
False (the default) makes sure Chrome will get it's own process (so no subprocess of chromedriver.exe or python
This fixes a LOT of issues, like multithreaded run, but mst importantly. shutting corectly after
program exits or using .quit()
you should be knowing what you're doing, and know how python works.
unfortunately, there is always an edge case in which one would like to write an single script with the only contents being:
--start script--
@ -224,7 +230,13 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
in that case you can set this to `True`. The browser will start via subprocess, and will keep running most of times.
! setting it to True comes with NO support when being detected. !
no_sandbox: bool, optional, default=True
uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar
this option has a default of True since many people seem to run this as root (....) , and chrome does not start
when running as root without using --no-sandbox flag.
"""
finalize(self, self._ensure_close, self)
self.debug = debug
patcher = Patcher(
executable_path=driver_executable_path,
@ -236,7 +248,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
if not options:
options = ChromeOptions()
try:
if hasattr(options, "_session") and options._session is not None:
# prevent reuse of options,
@ -248,11 +259,17 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
options._session = self
debug_port = selenium.webdriver.common.service.utils.free_port()
debug_host = "127.0.0.1"
if not options.debugger_address:
debug_port = (
port
if port != 0
else selenium.webdriver.common.service.utils.free_port()
)
debug_host = "127.0.0.1"
options.debugger_address = "%s:%d" % (debug_host, debug_port)
else:
debug_host, debug_port = options.debugger_address.split(":")
debug_port = int(debug_port)
if enable_cdp_events:
options.set_capability(
@ -263,13 +280,12 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
options.add_argument("--remote-debugging-port=%s" % debug_port)
if user_data_dir:
options.add_argument('--user-data-dir=%s' % user_data_dir)
options.add_argument("--user-data-dir=%s" % user_data_dir)
language, keep_user_data_dir = None, bool(user_data_dir)
# see if a custom user profile is specified in options
for arg in options.arguments:
if "lang" in arg:
m = re.search("(?:--)?lang(?:[ =])?(.*)", arg)
try:
@ -294,7 +310,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
)
if not user_data_dir:
# backward compatiblity
# check if an old uc.ChromeOptions is used, and extract the user data dir
@ -347,6 +362,8 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
if suppress_welcome:
options.arguments.extend(["--no-default-browser-check", "--no-first-run"])
if no_sandbox:
options.arguments.extend(["--no-sandbox", "--test-type"])
if headless or options.headless:
options.headless = True
options.add_argument("--window-size=1920,1080")
@ -360,7 +377,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
or divmod(logging.getLogger().getEffectiveLevel(), 10)[0]
)
if hasattr(options, 'handle_prefs'):
if hasattr(options, "handle_prefs"):
options.handle_prefs(user_data_dir)
# fix exit_type flag to prevent tab-restore nag
@ -376,6 +393,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
config["profile"]["exit_type"] = None
fs.seek(0, 0)
json.dump(config, fs)
fs.truncate() # the file might be shorter
logger.debug("fixed exit_type flag")
except Exception as e:
logger.debug("did not find a bad exit_type flag ")
@ -385,24 +403,31 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
if not desired_capabilities:
desired_capabilities = options.to_capabilities()
if not use_subprocess and not windows_headless:
if not use_subprocess:
self.browser_pid = start_detached(
options.binary_location, *options.arguments
)
else:
startupinfo = subprocess.STARTUPINFO()
if os.name == 'nt' and windows_headless:
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
browser = subprocess.Popen(
[options.binary_location, *options.arguments],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
close_fds=IS_POSIX,
startupinfo=startupinfo
)
self.browser_pid = browser.pid
if service_creationflags:
service = selenium.webdriver.common.service.Service(
patcher.executable_path, port, service_args, service_log_path
)
for attr_name in ("creationflags", "creation_flags"):
if hasattr(service, attr_name):
setattr(service, attr_name, service_creationflags)
break
else:
service = None
super(Chrome, self).__init__(
executable_path=patcher.executable_path,
port=port,
@ -411,6 +436,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
desired_capabilities=desired_capabilities,
service_log_path=service_log_path,
keep_alive=keep_alive,
service=service, # needed or the service will be re-created
)
self.reactor = None
@ -425,35 +451,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
self.reactor = reactor
if advanced_elements:
from .webelement import WebElement
self._web_element_cls = UCWebElement
else:
self._web_element_cls = WebElement
if options.headless:
self._configure_headless()
def __getattribute__(self, item):
if not super().__getattribute__("debug"):
return super().__getattribute__(item)
else:
import inspect
original = super().__getattribute__(item)
if inspect.ismethod(original) and not inspect.isclass(original):
def newfunc(*args, **kwargs):
logger.debug(
"calling %s with args %s and kwargs %s\n"
% (original.__qualname__, args, kwargs)
)
return original(*args, **kwargs)
return newfunc
return original
def _configure_headless(self):
orig_get = self.get
logger.info("setting properties for headless")
@ -494,18 +499,107 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
"Page.addScriptToEvaluateOnNewDocument",
{
"source": """
Object.defineProperty(navigator, 'maxTouchPoints', {
get: () => 1
})"""
Object.defineProperty(navigator, 'maxTouchPoints', {get: () => 1});
Object.defineProperty(navigator.connection, 'rtt', {get: () => 100});
// https://github.com/microlinkhq/browserless/blob/master/packages/goto/src/evasions/chrome-runtime.js
window.chrome = {
app: {
isInstalled: false,
InstallState: {
DISABLED: 'disabled',
INSTALLED: 'installed',
NOT_INSTALLED: 'not_installed'
},
RunningState: {
CANNOT_RUN: 'cannot_run',
READY_TO_RUN: 'ready_to_run',
RUNNING: 'running'
}
},
runtime: {
OnInstalledReason: {
CHROME_UPDATE: 'chrome_update',
INSTALL: 'install',
SHARED_MODULE_UPDATE: 'shared_module_update',
UPDATE: 'update'
},
OnRestartRequiredReason: {
APP_UPDATE: 'app_update',
OS_UPDATE: 'os_update',
PERIODIC: 'periodic'
},
PlatformArch: {
ARM: 'arm',
ARM64: 'arm64',
MIPS: 'mips',
MIPS64: 'mips64',
X86_32: 'x86-32',
X86_64: 'x86-64'
},
PlatformNaclArch: {
ARM: 'arm',
MIPS: 'mips',
MIPS64: 'mips64',
X86_32: 'x86-32',
X86_64: 'x86-64'
},
PlatformOs: {
ANDROID: 'android',
CROS: 'cros',
LINUX: 'linux',
MAC: 'mac',
OPENBSD: 'openbsd',
WIN: 'win'
},
RequestUpdateCheckStatus: {
NO_UPDATE: 'no_update',
THROTTLED: 'throttled',
UPDATE_AVAILABLE: 'update_available'
}
}
}
// https://github.com/microlinkhq/browserless/blob/master/packages/goto/src/evasions/navigator-permissions.js
if (!window.Notification) {
window.Notification = {
permission: 'denied'
}
}
const originalQuery = window.navigator.permissions.query
window.navigator.permissions.__proto__.query = parameters =>
parameters.name === 'notifications'
? Promise.resolve({ state: window.Notification.permission })
: originalQuery(parameters)
const oldCall = Function.prototype.call
function call() {
return oldCall.apply(this, arguments)
}
Function.prototype.call = call
const nativeToStringFunctionString = Error.toString().replace(/Error/g, 'toString')
const oldToString = Function.prototype.toString
function functionToString() {
if (this === window.navigator.permissions.query) {
return 'function query() { [native code] }'
}
if (this === functionToString) {
return nativeToStringFunctionString
}
return oldCall.call(oldToString, this)
}
// eslint-disable-next-line
Function.prototype.toString = functionToString
"""
},
)
return orig_get(*args, **kwargs)
self.get = get_wrapped
def __dir__(self):
return object.__dir__(self)
def _get_cdc_props(self):
return self.execute_script(
"""
@ -553,6 +647,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
if self.reactor and isinstance(self.reactor, Reactor):
self.reactor.handlers.clear()
def window_new(self):
self.execute(
selenium.webdriver.remote.command.Command.NEW_WINDOW, {"type": "window"}
)
def tab_new(self, url: str):
"""
this opens a url in a new tab.
@ -597,25 +696,21 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
# super(Chrome, self).start_session(capabilities, browser_profile)
def quit(self):
logger.debug("closing webdriver")
if hasattr(self, "service") and getattr(self.service, "process", None):
self.service.process.terminate()
self.service.process.wait(5)
try:
if self.reactor and isinstance(self.reactor, Reactor):
logger.debug("shutting down reactor")
self.reactor.event.set()
except Exception: # noqa
self.service.process.kill()
logger.debug("webdriver process ended")
except (AttributeError, RuntimeError, OSError):
pass
try:
self.reactor.event.set()
logger.debug("shutting down reactor")
except AttributeError:
pass
try:
logger.debug("killing browser")
os.kill(self.browser_pid, 15)
except TimeoutError as e:
logger.debug("gracefully closed browser")
except Exception as e: # noqa
logger.debug(e, exc_info=True)
except Exception: # noqa
pass
if (
hasattr(self, "keep_user_data_dir")
and hasattr(self, "user_data_dir")
@ -623,7 +718,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
):
for _ in range(5):
try:
shutil.rmtree(self.user_data_dir, ignore_errors=False)
except FileNotFoundError:
pass
@ -641,13 +735,24 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
# this must come last, otherwise it will throw 'in use' errors
self.patcher = None
def __del__(self):
try:
super().quit()
# self.service.process.kill()
except: # noqa
pass
self.quit()
def __getattribute__(self, item):
if not super().__getattribute__("debug"):
return super().__getattribute__(item)
else:
import inspect
original = super().__getattribute__(item)
if inspect.ismethod(original) and not inspect.isclass(original):
def newfunc(*args, **kwargs):
logger.debug(
"calling %s with args %s and kwargs %s\n"
% (original.__qualname__, args, kwargs)
)
return original(*args, **kwargs)
return newfunc
return original
def __enter__(self):
return self
@ -661,6 +766,27 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
def __hash__(self):
return hash(self.options.debugger_address)
def __dir__(self):
return object.__dir__(self)
def __del__(self):
try:
self.service.process.kill()
except: # noqa
pass
self.quit()
@classmethod
def _ensure_close(cls, self):
# needs to be a classmethod so finalize can find the reference
logger.info("ensuring close")
if (
hasattr(self, "service")
and hasattr(self.service, "process")
and hasattr(self.service.process, "kill")
):
self.service.process.kill()
def find_chrome_executable():
"""
@ -692,14 +818,16 @@ def find_chrome_executable():
)
else:
for item in map(
os.environ.get, ("PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA")
os.environ.get,
("PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA", "PROGRAMW6432"),
):
for subitem in (
"Google/Chrome/Application",
"Google/Chrome Beta/Application",
"Google/Chrome Canary/Application",
):
candidates.add(os.sep.join((item, subitem, "chrome.exe")))
if item is not None:
for subitem in (
"Google/Chrome/Application",
"Google/Chrome Beta/Application",
"Google/Chrome Canary/Application",
):
candidates.add(os.sep.join((item, subitem, "chrome.exe")))
for candidate in candidates:
if os.path.exists(candidate) and os.access(candidate, os.X_OK):
return os.path.normpath(candidate)

View File

@ -17,6 +17,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
"""
from distutils.version import LooseVersion
import io
import logging
import os
@ -24,11 +25,13 @@ import random
import re
import string
import sys
from urllib.request import urlopen
from urllib.request import urlretrieve
import zipfile
from distutils.version import LooseVersion
from urllib.request import urlopen, urlretrieve
from selenium.webdriver import Chrome as _Chrome, ChromeOptions as _ChromeOptions
from selenium.webdriver import Chrome as _Chrome
from selenium.webdriver import ChromeOptions as _ChromeOptions
TARGET_VERSION = 0
logger = logging.getLogger("uc")

View File

@ -3,11 +3,11 @@
import json
import logging
from collections.abc import Mapping, Sequence
import requests
import websockets
log = logging.getLogger(__name__)

View File

@ -1,17 +1,16 @@
import asyncio
import logging
import time
import traceback
from collections.abc import Mapping
from collections.abc import Sequence
from functools import wraps
import logging
import threading
import time
import traceback
from typing import Any
from typing import Awaitable
from typing import Callable
from typing import List
from typing import Optional
from contextlib import ExitStack
import threading
from functools import wraps, partial
class Structure(dict):

View File

@ -1,13 +1,13 @@
import atexit
import logging
import multiprocessing
import os
import platform
import sys
import signal
from subprocess import PIPE
from subprocess import Popen
import atexit
import traceback
import logging
import signal
import sys
CREATE_NEW_PROCESS_GROUP = 0x00000200
DETACHED_PROCESS = 0x00000008
@ -27,14 +27,12 @@ def start_detached(executable, *args):
reader, writer = multiprocessing.Pipe(False)
# do not keep reference
process = multiprocessing.Process(
multiprocessing.Process(
target=_start_detached,
args=(executable, *args),
kwargs={"writer": writer},
daemon=True,
)
process.start()
process.join()
).start()
# receive pid from pipe
pid = reader.recv()
REGISTERED.append(pid)

View File

@ -39,6 +39,20 @@ class ChromeOptions(_ChromiumOptions):
value = ChromeOptions._undot_key(rest, value)
return {key: value}
@staticmethod
def _merge_nested(a, b):
"""
merges b into a
leaf values in a are overwritten with values from b
"""
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
ChromeOptions._merge_nested(a[key], b[key])
continue
a[key] = b[key]
return a
def handle_prefs(self, user_data_dir):
prefs = self.experimental_options.get("prefs")
if prefs:
@ -50,12 +64,14 @@ class ChromeOptions(_ChromiumOptions):
# undot prefs dict keys
undot_prefs = {}
for key, value in prefs.items():
undot_prefs.update(self._undot_key(key, value))
undot_prefs = self._merge_nested(
undot_prefs, self._undot_key(key, value)
)
prefs_file = os.path.join(default_path, "Preferences")
if os.path.exists(prefs_file):
with open(prefs_file, encoding="latin1", mode="r") as f:
undot_prefs.update(json.load(f))
undot_prefs = self._merge_nested(json.load(f), undot_prefs)
with open(prefs_file, encoding="latin1", mode="w") as f:
json.dump(undot_prefs, f)

View File

@ -1,23 +1,24 @@
#!/usr/bin/env python3
# this module is part of undetected_chromedriver
from distutils.version import LooseVersion
import io
import logging
import os
import random
import re
import secrets
import string
import sys
import time
from urllib.request import urlopen
from urllib.request import urlretrieve
import zipfile
from distutils.version import LooseVersion
from urllib.request import urlopen, urlretrieve
import secrets
logger = logging.getLogger(__name__)
IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux"))
IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2"))
class Patcher(object):
@ -29,7 +30,7 @@ class Patcher(object):
if platform.endswith("win32"):
zip_name %= "win32"
exe_name %= ".exe"
if platform.endswith("linux"):
if platform.endswith(("linux", "linux2")):
zip_name %= "linux64"
exe_name %= ""
if platform.endswith("darwin"):
@ -38,7 +39,9 @@ class Patcher(object):
if platform.endswith("win32"):
d = "~/appdata/roaming/undetected_chromedriver"
elif platform.startswith("linux"):
elif "LAMBDA_TASK_ROOT" in os.environ:
d = "/tmp/undetected_chromedriver"
elif platform.startswith(("linux","linux2")):
d = "~/.local/share/undetected_chromedriver"
elif platform.endswith("darwin"):
d = "~/Library/Application Support/undetected_chromedriver"

View File

@ -1,7 +1,29 @@
from selenium.webdriver.common.by import By
import selenium.webdriver.remote.webelement
from typing import List
class WebElement(selenium.webdriver.remote.webelement.WebElement):
def click_safe(self):
super().click()
self._parent.reconnect(0.1)
def children(
self, tag=None, recursive=False
) -> List[selenium.webdriver.remote.webelement.WebElement]:
"""
returns direct child elements of current element
:param tag: str, if supplied, returns <tag> nodes only
"""
script = "return [... arguments[0].children]"
if tag:
script += ".filter( node => node.tagName === '%s')" % tag.upper()
if recursive:
return list(_recursive_children(self, tag))
return list(self._parent.execute_script(script, self))
class UCWebElement(WebElement):
"""
Custom WebElement class which makes it easier to view elements when
working in an interactive environment.
@ -14,9 +36,13 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement):
"""
def __init__(self, parent, id_):
super().__init__(parent, id_)
self._attrs = None
@property
def attrs(self):
if not hasattr(self, "_attrs"):
if not self._attrs:
self._attrs = self._parent.execute_script(
"""
var items = {};
@ -35,3 +61,25 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement):
if strattrs:
strattrs = " " + strattrs
return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>"
def _recursive_children(element, tag: str = None, _results=None):
"""
returns all children of <element> recursively
:param element: `WebElement` object.
find children below this <element>
:param tag: str = None.
if provided, return only <tag> elements. example: 'a', or 'img'
:param _results: do not use!
"""
results = _results or set()
for element in element.children():
if tag:
if element.tag_name == tag:
results.add(element)
else:
results.add(element)
results |= _recursive_children(element, tag, results)
return results