From 99708041b5c9bf197bf8b89bcd8005f5216055b5 Mon Sep 17 00:00:00 2001 From: AlexFozor Date: Wed, 31 Jul 2024 16:28:05 +0200 Subject: [PATCH] proxy fix, user data folder fix --- README.md | 2 +- src/flaresolverr.py | 3 +++ src/flaresolverr_service.py | 6 +++++- src/sessions.py | 6 +++++- src/utils.py | 36 +++++++++++++++++++++++++++++++----- 5 files changed, 45 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 8f97158..61004d2 100644 --- a/README.md +++ b/README.md @@ -187,7 +187,7 @@ session. When you no longer need to use a session you should make sure to close | maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds. | | cookies | Optional. Will be used by the headless browser. Eg: `"cookies": [{"name": "cookie1", "value": "value1"}, {"name": "cookie2", "value": "value2"}]`. | | returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed. | -| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported. (When the `session` parameter is set, the proxy is ignored; a session specific proxy can be set in `sessions.create`.) | +| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is supported. (When the `session` parameter is set, the proxy is ignored; a session specific proxy can be set in `sessions.create`.) | > **Warning** > If you want to use Cloudflare clearance cookie in your scripts, make sure you use the FlareSolverr User-Agent too. If they don't match you will see the challenge. diff --git a/src/flaresolverr.py b/src/flaresolverr.py index 3596fe1..cbf29f7 100644 --- a/src/flaresolverr.py +++ b/src/flaresolverr.py @@ -114,6 +114,9 @@ if __name__ == "__main__": prometheus_plugin.setup() app.install(prometheus_plugin.prometheus_plugin) + webdriver_data = utils.get_webdriver_data_path() + utils.remove_all_subfolders(webdriver_data) + # start webserver # default server 'wsgiref' does not support concurrent requests # https://github.com/FlareSolverr/FlareSolverr/issues/680 diff --git a/src/flaresolverr_service.py b/src/flaresolverr_service.py index fea0b91..3fda457 100644 --- a/src/flaresolverr_service.py +++ b/src/flaresolverr_service.py @@ -218,6 +218,7 @@ def _cmd_sessions_destroy(req: V1RequestBase) -> V1ResponseBase: def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT: timeout = int(req.maxTimeout) / 1000 driver = None + user_data_path = None try: if req.session: session_id = req.session @@ -232,7 +233,8 @@ def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT: driver = session.driver else: - driver = utils.get_webdriver(req.proxy) + user_data_path = utils.get_user_data_path() + driver = utils.get_webdriver(req.proxy, user_data_path) logging.debug('New instance of webdriver has been created to perform the request') return func_timeout(timeout, _evil_logic, (req, driver, method)) except FunctionTimedOut: @@ -245,6 +247,8 @@ def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT: driver.close() driver.quit() logging.debug('A used instance of webdriver has been destroyed') + if user_data_path: + utils.remove_user_data(user_data_path) def click_verify(driver: ChromiumPage) -> DataPacket: try: diff --git a/src/sessions.py b/src/sessions.py index 7fb39d3..6b7881a 100644 --- a/src/sessions.py +++ b/src/sessions.py @@ -45,7 +45,7 @@ class SessionsStorage: if self.exists(session_id): return self.sessions[session_id], False - driver = utils.get_webdriver(proxy) + driver = utils.get_webdriver(proxy, utils.get_user_data_path(session_id)) created_at = datetime.now() session = Session(session_id, driver, created_at) @@ -69,6 +69,10 @@ class SessionsStorage: if utils.PLATFORM_VERSION == "nt": session.driver.close() session.driver.quit() + + session_data = utils.get_user_data_path(session_id) + utils.remove_user_data(session_data) + return True def get(self, session_id: str, ttl: Optional[timedelta] = None) -> Tuple[Session, bool]: diff --git a/src/utils.py b/src/utils.py index 4b4e879..69db334 100644 --- a/src/utils.py +++ b/src/utils.py @@ -7,6 +7,10 @@ import urllib.parse import tempfile import sys +from uuid import uuid1 +from pathlib import Path +from tempfile import gettempdir + from DrissionPage import ChromiumPage, ChromiumOptions # from DrissionPage.common import Settings @@ -120,8 +124,25 @@ def create_proxy_extension(proxy: dict) -> str: return proxy_extension_dir +def get_webdriver_data_path() -> Path: + return Path(gettempdir()) / 'FlareSolverr' -def get_webdriver(proxy: dict = None) -> ChromiumPage: +def get_user_data_path(user_name: str = None) -> Path: + user_name = user_name or str(uuid1()) + return get_webdriver_data_path() / user_name + +def remove_user_data(user_data_path: Path): + if user_data_path.exists(): + shutil.rmtree(user_data_path) + +def remove_all_subfolders(parent_folder: str): + for item in os.listdir(parent_folder): + item_path = os.path.join(parent_folder, item) + + if os.path.isdir(item_path): + shutil.rmtree(item_path) + +def get_webdriver(proxy: dict = None, user_data_path: str = None) -> ChromiumPage: global CHROME_EXE_PATH, USER_AGENT logging.debug('Launching web browser...') @@ -156,7 +177,7 @@ def get_webdriver(proxy: dict = None) -> ChromiumPage: proxy_extension_dir = None if proxy and all(key in proxy for key in ['url', 'username', 'password']): proxy_extension_dir = create_proxy_extension(proxy) - options.set_argument("--load-extension=%s" % os.path.abspath(proxy_extension_dir)) + options.add_extension(proxy_extension_dir) elif proxy and 'url' in proxy: proxy_url = proxy['url'] logging.debug("Using webdriver proxy: %s", proxy_url) @@ -171,15 +192,17 @@ def get_webdriver(proxy: dict = None) -> ChromiumPage: options.headless(windows_headless) options.set_argument("--auto-open-devtools-for-tabs") + if user_data_path: + options.set_user_data_path(user_data_path) + if CHROME_EXE_PATH is not None: options.set_paths(browser_path=CHROME_EXE_PATH) + driver = ChromiumPage(addr_or_opts=options) # clean up proxy extension directory if proxy_extension_dir is not None: shutil.rmtree(proxy_extension_dir) - driver = ChromiumPage(addr_or_opts=options) - return driver @@ -319,8 +342,9 @@ def get_user_agent(driver=None) -> str: return USER_AGENT try: + user_path = get_user_data_path("DefaultUserAgent") if driver is None: - driver = get_webdriver() + driver = get_webdriver(user_data_path=user_path) USER_AGENT = driver.user_agent # Fix for Chrome 117 | https://github.com/FlareSolverr/FlareSolverr/issues/910 USER_AGENT = re.sub('HEADLESS', '', USER_AGENT, flags=re.IGNORECASE) @@ -332,6 +356,8 @@ def get_user_agent(driver=None) -> str: if PLATFORM_VERSION == "nt": driver.close() driver.quit() + if user_path: + remove_user_data(user_path) def start_xvfb_display():