From 1505595591fc5ddd8b1e83c4aa66916c2fe53967 Mon Sep 17 00:00:00 2001 From: ngosang Date: Fri, 23 Sep 2022 02:17:50 +0200 Subject: [PATCH] Rewrite FlareSolverr from scratch in Python + Selenium --- requirements.txt | 5 + src/bottle_plugins/__init__.py | 0 src/bottle_plugins/error_plugin.py | 22 ++ src/bottle_plugins/logger_plugin.py | 23 ++ src/dtos.py | 83 +++++++ src/flaresolverr.py | 91 +++++++ src/flaresolverr_service.py | 247 +++++++++++++++++++ src/tests.py | 360 ++++++++++++++++++++++++++++ src/utils.py | 97 ++++++++ test-requirements.txt | 1 + 10 files changed, 929 insertions(+) create mode 100644 requirements.txt create mode 100644 src/bottle_plugins/__init__.py create mode 100644 src/bottle_plugins/error_plugin.py create mode 100644 src/bottle_plugins/logger_plugin.py create mode 100644 src/dtos.py create mode 100644 src/flaresolverr.py create mode 100644 src/flaresolverr_service.py create mode 100644 src/tests.py create mode 100644 src/utils.py create mode 100644 test-requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..03e048c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +bottle==0.12.23 +waitress==2.1.2 +selenium==4.4.3 +undetected-chromedriver==3.1.5.post4 +func-timeout==4.3.5 diff --git a/src/bottle_plugins/__init__.py b/src/bottle_plugins/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/bottle_plugins/error_plugin.py b/src/bottle_plugins/error_plugin.py new file mode 100644 index 0000000..4d99508 --- /dev/null +++ b/src/bottle_plugins/error_plugin.py @@ -0,0 +1,22 @@ +from bottle import response +import logging + + +def error_plugin(callback): + """ + Bottle plugin to handle exceptions + https://stackoverflow.com/a/32764250 + """ + + def wrapper(*args, **kwargs): + try: + actual_response = callback(*args, **kwargs) + except Exception as e: + logging.error(str(e)) + actual_response = { + "error": str(e) + } + response.status = 500 + return actual_response + + return wrapper diff --git a/src/bottle_plugins/logger_plugin.py b/src/bottle_plugins/logger_plugin.py new file mode 100644 index 0000000..9005754 --- /dev/null +++ b/src/bottle_plugins/logger_plugin.py @@ -0,0 +1,23 @@ +from bottle import request, response +import logging + + +def logger_plugin(callback): + """ + Bottle plugin to use logging module + http://bottlepy.org/docs/dev/plugindev.html + + Wrap a Bottle request so that a log line is emitted after it's handled. + (This decorator can be extended to take the desired logger as a param.) + """ + + def wrapper(*args, **kwargs): + actual_response = callback(*args, **kwargs) + if not request.url.endswith("/health"): + logging.info('%s %s %s %s' % (request.remote_addr, + request.method, + request.url, + response.status)) + return actual_response + + return wrapper diff --git a/src/dtos.py b/src/dtos.py new file mode 100644 index 0000000..87fd18c --- /dev/null +++ b/src/dtos.py @@ -0,0 +1,83 @@ + +STATUS_OK = "ok" +STATUS_ERROR = "error" + + +class ChallengeResolutionResultT: + url: str = None + status: int = None + headers: list = None + response: str = None + cookies: list = None + userAgent: str = None + + def __init__(self, _dict): + self.__dict__.update(_dict) + + +class ChallengeResolutionT: + status: str = None + message: str = None + result: ChallengeResolutionResultT = None + + def __init__(self, _dict): + self.__dict__.update(_dict) + if self.result is not None: + self.result = ChallengeResolutionResultT(self.result) + + +class V1RequestBase(object): + # V1RequestBase + cmd: str = None + cookies: list = None + maxTimeout: int = None + proxy: dict = None + session: str = None + headers: list = None # deprecated v2.0.0, not used + userAgent: str = None # deprecated v2.0.0, not used + + # V1Request + url: str = None + postData: str = None + returnOnlyCookies: bool = None + download: bool = None # deprecated v2.0.0, not used + returnRawHtml: bool = None # deprecated v2.0.0, not used + + def __init__(self, _dict): + self.__dict__.update(_dict) + + +class V1ResponseBase(object): + # V1ResponseBase + status: str = None + message: str = None + startTimestamp: int = None + endTimestamp: int = None + version: str = None + + # V1ResponseSolution + solution: ChallengeResolutionResultT = None + + # hidden vars + __error_500__: bool = False + + def __init__(self, _dict): + self.__dict__.update(_dict) + if self.solution is not None: + self.solution = ChallengeResolutionResultT(self.solution) + + +class IndexResponse(object): + msg: str = None + version: str = None + userAgent: str = None + + def __init__(self, _dict): + self.__dict__.update(_dict) + + +class HealthResponse(object): + status: str = None + + def __init__(self, _dict): + self.__dict__.update(_dict) diff --git a/src/flaresolverr.py b/src/flaresolverr.py new file mode 100644 index 0000000..b8775d4 --- /dev/null +++ b/src/flaresolverr.py @@ -0,0 +1,91 @@ +import json +import logging +import os +import sys + +from bottle import run, response, Bottle, request + +from bottle_plugins.error_plugin import error_plugin +from bottle_plugins.logger_plugin import logger_plugin +from dtos import IndexResponse, V1RequestBase +import flaresolverr_service +import utils + + +class JSONErrorBottle(Bottle): + """ + Handle 404 errors + """ + def default_error_handler(self, res): + response.content_type = 'application/json' + return json.dumps(dict(error=res.body, status_code=res.status_code)) + + +app = JSONErrorBottle() + +# plugin order is important +app.install(logger_plugin) +app.install(error_plugin) + + +@app.route('/') +def index(): + """ + Show welcome message + """ + res = flaresolverr_service.index_endpoint() + return utils.object_to_dict(res) + + +@app.route('/health') +def health(): + """ + Healthcheck endpoint. + This endpoint is special because it doesn't print traces + """ + res = flaresolverr_service.health_endpoint() + return utils.object_to_dict(res) + + +@app.post('/v1') +def controller_v1(): + """ + Controller v1 + """ + req = V1RequestBase(request.json) + res = flaresolverr_service.controller_v1_endpoint(req) + if res.__error_500__: + response.status = 500 + return utils.object_to_dict(res) + + +if __name__ == "__main__": + # validate configuration + log_level = os.environ.get('LOG_LEVEL', 'info').upper() + log_html = utils.get_config_log_html() + server_host = os.environ.get('HOST', '0.0.0.0') + server_port = int(os.environ.get('PORT', 8191)) + + # configure logger + logging.basicConfig( + format='%(asctime)s %(levelname)-8s ReqId %(thread)s %(message)s', + level=log_level, + datefmt='%Y-%m-%d %H:%M:%S', + handlers=[ + logging.StreamHandler(sys.stdout) + ] + ) + # disable warning traces from urllib3 + logging.getLogger('urllib3').setLevel(logging.ERROR) + logging.getLogger('selenium.webdriver.remote.remote_connection').setLevel(logging.WARNING) + logging.getLogger('undetected_chromedriver').setLevel(logging.WARNING) + + logging.info(f'FlareSolverr {utils.get_flaresolverr_version()}') + logging.debug('Debug log enabled') + + # test browser installation + flaresolverr_service.test_browser_installation() + + # start webserver + # default server 'wsgiref' does not support concurrent requests + run(app, host=server_host, port=server_port, quiet=True, server='waitress') diff --git a/src/flaresolverr_service.py b/src/flaresolverr_service.py new file mode 100644 index 0000000..b3a0d2a --- /dev/null +++ b/src/flaresolverr_service.py @@ -0,0 +1,247 @@ +import logging +import time +from urllib.parse import unquote + +from func_timeout import func_timeout, FunctionTimedOut +from selenium.common import TimeoutException +from selenium.webdriver.chrome.webdriver import WebDriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support.wait import WebDriverWait +from selenium.webdriver.support.expected_conditions import presence_of_element_located, staleness_of + +from dtos import V1RequestBase, V1ResponseBase, ChallengeResolutionT, ChallengeResolutionResultT, IndexResponse, \ + HealthResponse, STATUS_OK, STATUS_ERROR +import utils + + +CHALLENGE_SELECTORS = [ + # Cloudflare + '#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#trk_jschal_js', + # DDoS-GUARD + '#link-ddg', + # Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands + 'td.info #js_info' +] +SHORT_TIMEOUT = 5 + + +def test_browser_installation(): + logging.info("Testing web browser installation...") + user_agent = utils.get_user_agent() + logging.info("FlareSolverr User-Agent: " + user_agent) + logging.info("Test successful") + + +def index_endpoint() -> IndexResponse: + res = IndexResponse({}) + res.msg = "FlareSolverr is ready!" + res.version = utils.get_flaresolverr_version() + res.userAgent = utils.get_user_agent() + return res + + +def health_endpoint() -> HealthResponse: + res = HealthResponse({}) + res.status = STATUS_OK + return res + + +def controller_v1_endpoint(req: V1RequestBase) -> V1ResponseBase: + start_ts = int(time.time() * 1000) + logging.info(f"Incoming request => POST /v1 body: {utils.object_to_dict(req)}") + res: V1ResponseBase + try: + res = _controller_v1_handler(req) + except Exception as e: + res = V1ResponseBase({}) + res.__error_500__ = True + res.status = STATUS_ERROR + res.message = "Error: " + str(e) + logging.error(res.message) + + res.startTimestamp = start_ts + res.endTimestamp = int(time.time() * 1000) + res.version = utils.get_flaresolverr_version() + logging.debug(f"Response => POST /v1 body: {utils.object_to_dict(res.solution)}") + logging.info(f"Response in {(res.endTimestamp - res.startTimestamp) / 1000} s") + return res + + +def _controller_v1_handler(req: V1RequestBase) -> V1ResponseBase: + # do some validations + if req.cmd is None: + raise Exception("Request parameter 'cmd' is mandatory.") + if req.headers is not None: + logging.warning("Request parameter 'headers' was removed in FlareSolverr v2.") + if req.userAgent is not None: + logging.warning("Request parameter 'userAgent' was removed in FlareSolverr v2.") + + # set default values + if req.maxTimeout is None or req.maxTimeout < 1: + req.maxTimeout = 60000 + + # execute the command + res: V1ResponseBase + if req.cmd == 'sessions.create': + raise Exception("Not implemented yet.") + elif req.cmd == 'sessions.list': + raise Exception("Not implemented yet.") + elif req.cmd == 'sessions.destroy': + raise Exception("Not implemented yet.") + elif req.cmd == 'request.get': + res = _cmd_request_get(req) + elif req.cmd == 'request.post': + res = _cmd_request_post(req) + else: + raise Exception(f"Request parameter 'cmd' = '{req.cmd}' is invalid.") + + return res + + +def _cmd_request_get(req: V1RequestBase) -> V1ResponseBase: + # do some validations + if req.url is None: + raise Exception("Request parameter 'url' is mandatory in 'request.get' command.") + if req.postData is not None: + raise Exception("Cannot use 'postBody' when sending a GET request.") + if req.returnRawHtml is not None: + logging.warning("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.") + if req.download is not None: + logging.warning("Request parameter 'download' was removed in FlareSolverr v2.") + + challenge_res = _resolve_challenge(req, 'GET') + res = V1ResponseBase({}) + res.status = challenge_res.status + res.message = challenge_res.message + res.solution = challenge_res.result + return res + + +def _cmd_request_post(req: V1RequestBase) -> V1ResponseBase: + # do some validations + if req.postData is None: + raise Exception("Request parameter 'postData' is mandatory in 'request.post' command.") + if req.returnRawHtml is not None: + logging.warning("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.") + if req.download is not None: + logging.warning("Request parameter 'download' was removed in FlareSolverr v2.") + + challenge_res = _resolve_challenge(req, 'POST') + res = V1ResponseBase({}) + res.status = challenge_res.status + res.message = challenge_res.message + res.solution = challenge_res.result + return res + + +def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT: + timeout = req.maxTimeout / 1000 + driver = None + try: + driver = utils.get_webdriver() + return func_timeout(timeout, _evil_logic, (req, driver, method)) + except FunctionTimedOut: + raise Exception(f'Error solving the challenge. Timeout after {timeout} seconds.') + except Exception as e: + raise Exception('Error solving the challenge. ' + str(e)) + finally: + if driver is not None: + driver.quit() + + +def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> ChallengeResolutionT: + res = ChallengeResolutionT({}) + res.status = STATUS_OK + res.message = "" + + # navigate to the page + logging.debug(f'Navigating to... {req.url}') + if method == 'POST': + _post_request(req, driver) + else: + driver.get(req.url) + if utils.get_config_log_html(): + logging.debug(f"Response HTML:\n{driver.page_source}") + + # find challenge selectors + html_element = driver.find_element(By.TAG_NAME, "html") + challenge_found = False + for selector in CHALLENGE_SELECTORS: + found_elements = driver.find_elements(By.CSS_SELECTOR, selector) + if len(found_elements) > 0: + challenge_found = True + logging.info("Challenge detected. Selector found: " + selector) + break + + if challenge_found: + while True: + try: + # then wait until all the selectors disappear + for selector in CHALLENGE_SELECTORS: + logging.debug("Waiting for selector: " + selector) + WebDriverWait(driver, SHORT_TIMEOUT).until_not( + presence_of_element_located((By.CSS_SELECTOR, selector))) + + # all elements not found + break + + except TimeoutException: + logging.debug("Timeout waiting for selector") + # update the html (cloudflare reloads the page every 5 s) + html_element = driver.find_element(By.TAG_NAME, "html") + + # waits until cloudflare redirection ends + logging.debug("Waiting for redirect") + # noinspection PyBroadException + try: + WebDriverWait(driver, SHORT_TIMEOUT).until(staleness_of(html_element)) + except Exception: + logging.debug("Timeout waiting for redirect") + + logging.info("Challenge solved!") + else: + logging.info("Challenge not detected!") + + challenge_res = ChallengeResolutionResultT({}) + challenge_res.url = driver.current_url + challenge_res.status = 200 # todo: fix, selenium not provides this info + challenge_res.cookies = driver.get_cookies() + + if not req.returnOnlyCookies: + challenge_res.headers = {} # todo: fix, selenium not provides this info + challenge_res.response = driver.page_source + challenge_res.userAgent = utils.get_user_agent(driver) + + res.result = challenge_res + return res + + +def _post_request(req: V1RequestBase, driver: WebDriver): + post_form = f'
' + query_string = req.postData if req.postData[0] != '?' else req.postData[1:] + pairs = query_string.split('&') + for pair in pairs: + parts = pair.split('=') + # noinspection PyBroadException + try: + name = unquote(parts[0]) + except Exception: + name = parts[0] + if name == 'submit': + continue + # noinspection PyBroadException + try: + value = unquote(parts[1]) + except Exception: + value = parts[1] + post_form += f'
' + post_form += '
' + html_content = f""" + + + + {post_form} + + + """ + driver.get("data:text/html;charset=utf-8," + html_content) diff --git a/src/tests.py b/src/tests.py new file mode 100644 index 0000000..071e4dc --- /dev/null +++ b/src/tests.py @@ -0,0 +1,360 @@ +import unittest +from datetime import datetime, timezone + +from webtest import TestApp + +from dtos import IndexResponse, HealthResponse, V1ResponseBase, STATUS_OK, STATUS_ERROR +import flaresolverr +import utils + + +def _find_obj_by_key(key: str, value: str, _list: list) -> dict | None: + for obj in _list: + if obj[key] == value: + return obj + return None + + +class TestFlareSolverr(unittest.TestCase): + + proxy_url = "http://127.0.0.1:8888" + proxy_socks_url = "socks5://127.0.0.1:1080" + google_url = "https://www.google.com" + post_url = "https://ptsv2.com/t/qv4j3-1634496523" + cloudflare_url = "https://nowsecure.nl" + cloudflare_url_2 = "https://idope.se/torrent-list/harry/" + ddos_guard_url = "https://anidex.info/" + custom_cloudflare_url = "https://www.muziekfabriek.org" + + app = TestApp(flaresolverr.app) + + def test_wrong_endpoint(self): + res = self.app.get('/wrong', status=404) + self.assertEqual(res.status_code, 404) + + body = res.json + self.assertEqual("Not found: '/wrong'", body['error']) + self.assertEqual(404, body['status_code']) + + def test_index_endpoint(self): + res = self.app.get('/') + self.assertEqual(res.status_code, 200) + + body = IndexResponse(res.json) + self.assertEqual("FlareSolverr is ready!", body.msg) + self.assertEqual(utils.get_flaresolverr_version(), body.version) + self.assertIn("Chrome/", body.userAgent) + + def test_health_endpoint(self): + res = self.app.get('/health') + self.assertEqual(res.status_code, 200) + + body = HealthResponse(res.json) + self.assertEqual(STATUS_OK, body.status) + + def test_v1_endpoint_wrong_cmd(self): + res = self.app.post_json('/v1', { + "cmd": "request.bad", + "url": self.google_url + }, status=500) + self.assertEqual(res.status_code, 500) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_ERROR, body.status) + self.assertEqual("Error: Request parameter 'cmd' = 'request.bad' is invalid.", body.message) + self.assertGreater(body.startTimestamp, 10000) + self.assertGreaterEqual(body.endTimestamp, body.startTimestamp) + self.assertEqual(utils.get_flaresolverr_version(), body.version) + + def test_v1_endpoint_request_get_no_cloudflare(self): + res = self.app.post_json('/v1', { + "cmd": "request.get", + "url": self.google_url + }) + self.assertEqual(res.status_code, 200) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_OK, body.status) + self.assertEqual("", body.message) + self.assertGreater(body.startTimestamp, 10000) + self.assertGreaterEqual(body.endTimestamp, body.startTimestamp) + self.assertEqual(utils.get_flaresolverr_version(), body.version) + + solution = body.solution + self.assertIn(self.google_url, solution.url) + self.assertEqual(solution.status, 200) + self.assertIs(len(solution.headers), 0) + self.assertIn("Google", solution.response) + self.assertGreater(len(solution.cookies), 0) + self.assertIn("Chrome/", solution.userAgent) + + def test_v1_endpoint_request_get_cloudflare_js_1(self): + res = self.app.post_json('/v1', { + "cmd": "request.get", + "url": self.cloudflare_url + }) + self.assertEqual(res.status_code, 200) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_OK, body.status) + self.assertEqual("", body.message) + self.assertGreater(body.startTimestamp, 10000) + self.assertGreaterEqual(body.endTimestamp, body.startTimestamp) + self.assertEqual(utils.get_flaresolverr_version(), body.version) + + solution = body.solution + self.assertIn(self.cloudflare_url, solution.url) + self.assertEqual(solution.status, 200) + self.assertIs(len(solution.headers), 0) + self.assertIn("nowSecure", solution.response) + self.assertGreater(len(solution.cookies), 0) + self.assertIn("Chrome/", solution.userAgent) + + cf_cookie = _find_obj_by_key("name", "cf_clearance", solution.cookies) + self.assertIsNotNone(cf_cookie, "Cloudflare cookie not found") + self.assertGreater(len(cf_cookie["value"]), 30) + + def test_v1_endpoint_request_get_cloudflare_js_2(self): + res = self.app.post_json('/v1', { + "cmd": "request.get", + "url": self.cloudflare_url_2 + }) + self.assertEqual(res.status_code, 200) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_OK, body.status) + self.assertEqual("", body.message) + self.assertGreater(body.startTimestamp, 10000) + self.assertGreaterEqual(body.endTimestamp, body.startTimestamp) + self.assertEqual(utils.get_flaresolverr_version(), body.version) + + solution = body.solution + self.assertIn(self.cloudflare_url_2, solution.url) + self.assertEqual(solution.status, 200) + self.assertIs(len(solution.headers), 0) + self.assertIn("harry - idope torrent search", solution.response) + self.assertGreater(len(solution.cookies), 0) + self.assertIn("Chrome/", solution.userAgent) + + cf_cookie = _find_obj_by_key("name", "cf_clearance", solution.cookies) + self.assertIsNotNone(cf_cookie, "Cloudflare cookie not found") + self.assertGreater(len(cf_cookie["value"]), 30) + + def test_v1_endpoint_request_get_ddos_guard_js(self): + res = self.app.post_json('/v1', { + "cmd": "request.get", + "url": self.ddos_guard_url + }) + self.assertEqual(res.status_code, 200) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_OK, body.status) + self.assertEqual("", body.message) + self.assertGreater(body.startTimestamp, 10000) + self.assertGreaterEqual(body.endTimestamp, body.startTimestamp) + self.assertEqual(utils.get_flaresolverr_version(), body.version) + + solution = body.solution + self.assertIn(self.ddos_guard_url, solution.url) + self.assertEqual(solution.status, 200) + self.assertIs(len(solution.headers), 0) + self.assertIn("AniDex", solution.response) + self.assertGreater(len(solution.cookies), 0) + self.assertIn("Chrome/", solution.userAgent) + + cf_cookie = _find_obj_by_key("name", "__ddg1_", solution.cookies) + self.assertIsNotNone(cf_cookie, "DDOS-Guard cookie not found") + self.assertGreater(len(cf_cookie["value"]), 10) + + def test_v1_endpoint_request_get_custom_cloudflare_js(self): + res = self.app.post_json('/v1', { + "cmd": "request.get", + "url": self.custom_cloudflare_url + }) + self.assertEqual(res.status_code, 200) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_OK, body.status) + self.assertEqual("", body.message) + self.assertGreater(body.startTimestamp, 10000) + self.assertGreaterEqual(body.endTimestamp, body.startTimestamp) + self.assertEqual(utils.get_flaresolverr_version(), body.version) + + solution = body.solution + self.assertIn(self.custom_cloudflare_url, solution.url) + self.assertEqual(solution.status, 200) + self.assertIs(len(solution.headers), 0) + self.assertIn("MuziekFabriek : Aanmelden", solution.response) + self.assertGreater(len(solution.cookies), 0) + self.assertIn("Chrome/", solution.userAgent) + + cf_cookie = _find_obj_by_key("name", "ct_anti_ddos_key", solution.cookies) + self.assertIsNotNone(cf_cookie, "Custom Cloudflare cookie not found") + self.assertGreater(len(cf_cookie["value"]), 10) + + # todo: test Cmd 'request.get' should return fail with Cloudflare CAPTCHA + # todo: test Cmd 'request.get' should return fail with Cloudflare Blocked + # todo: test Cmd 'request.get' should return OK with 'cookies' param + + def test_v1_endpoint_request_get_returnOnlyCookies_param(self): + res = self.app.post_json('/v1', { + "cmd": "request.get", + "url": self.google_url, + "returnOnlyCookies": True + }) + self.assertEqual(res.status_code, 200) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_OK, body.status) + self.assertEqual("", body.message) + self.assertGreater(body.startTimestamp, 10000) + self.assertGreaterEqual(body.endTimestamp, body.startTimestamp) + self.assertEqual(utils.get_flaresolverr_version(), body.version) + + solution = body.solution + self.assertIn(self.google_url, solution.url) + self.assertEqual(solution.status, 200) + self.assertIsNone(solution.headers) + self.assertIsNone(solution.response) + self.assertGreater(len(solution.cookies), 0) + self.assertIsNone(solution.userAgent) + + # todo: test Cmd 'request.get' should return OK with HTTP 'proxy' param + # todo: test Cmd 'request.get' should return OK with HTTP 'proxy' param with credentials + # todo: test Cmd 'request.get' should return OK with SOCKSv5 'proxy' param + # todo: test Cmd 'request.get' should fail with wrong 'proxy' param + + def test_v1_endpoint_request_get_fail_timeout(self): + res = self.app.post_json('/v1', { + "cmd": "request.get", + "url": self.google_url, + "maxTimeout": 10 + }, status=500) + self.assertEqual(res.status_code, 500) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_ERROR, body.status) + self.assertEqual("Error: Error solving the challenge. Timeout after 0.01 seconds.", body.message) + self.assertGreater(body.startTimestamp, 10000) + self.assertGreaterEqual(body.endTimestamp, body.startTimestamp) + self.assertEqual(utils.get_flaresolverr_version(), body.version) + + def test_v1_endpoint_request_get_fail_bad_domain(self): + res = self.app.post_json('/v1', { + "cmd": "request.get", + "url": "https://www.google.combad" + }, status=500) + self.assertEqual(res.status_code, 500) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_ERROR, body.status) + self.assertIn("Message: unknown error: net::ERR_NAME_NOT_RESOLVED", body.message) + + def test_v1_endpoint_request_get_deprecated_param(self): + res = self.app.post_json('/v1', { + "cmd": "request.get", + "url": self.google_url, + "userAgent": "Test User-Agent" # was removed in v2, not used + }) + self.assertEqual(res.status_code, 200) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_OK, body.status) + self.assertEqual("", body.message) + + def test_v1_endpoint_request_post_no_cloudflare(self): + res = self.app.post_json('/v1', { + "cmd": "request.post", + "url": self.post_url + '/post', + "postData": "param1=value1¶m2=value2" + }) + self.assertEqual(res.status_code, 200) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_OK, body.status) + self.assertEqual("", body.message) + self.assertGreater(body.startTimestamp, 10000) + self.assertGreaterEqual(body.endTimestamp, body.startTimestamp) + self.assertEqual(utils.get_flaresolverr_version(), body.version) + + solution = body.solution + self.assertIn(self.post_url, solution.url) + self.assertEqual(solution.status, 200) + self.assertIs(len(solution.headers), 0) + self.assertIn("I hope you have a lovely day!", solution.response) + self.assertEqual(len(solution.cookies), 0) + self.assertIn("Chrome/", solution.userAgent) + + # check that we sent the post data + res2 = self.app.post_json('/v1', { + "cmd": "request.get", + "url": self.post_url + }) + self.assertEqual(res2.status_code, 200) + + body2 = V1ResponseBase(res2.json) + self.assertEqual(STATUS_OK, body2.status) + date_hour = datetime.now(timezone.utc).isoformat().split(':')[0].replace('T', ' ') + self.assertIn(date_hour, body2.solution.response) + + def test_v1_endpoint_request_post_cloudflare(self): + res = self.app.post_json('/v1', { + "cmd": "request.post", + "url": self.cloudflare_url, + "postData": "param1=value1¶m2=value2" + }) + self.assertEqual(res.status_code, 200) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_OK, body.status) + self.assertEqual("", body.message) + self.assertGreater(body.startTimestamp, 10000) + self.assertGreaterEqual(body.endTimestamp, body.startTimestamp) + self.assertEqual(utils.get_flaresolverr_version(), body.version) + + solution = body.solution + self.assertIn(self.cloudflare_url, solution.url) + self.assertEqual(solution.status, 200) + self.assertIs(len(solution.headers), 0) + self.assertIn("405 Not Allowed", solution.response) + self.assertGreater(len(solution.cookies), 0) + self.assertIn("Chrome/", solution.userAgent) + + cf_cookie = _find_obj_by_key("name", "cf_clearance", solution.cookies) + self.assertIsNotNone(cf_cookie, "Cloudflare cookie not found") + self.assertGreater(len(cf_cookie["value"]), 30) + + def test_v1_endpoint_request_post_fail_no_post_data(self): + res = self.app.post_json('/v1', { + "cmd": "request.post", + "url": self.google_url + }, status=500) + self.assertEqual(res.status_code, 500) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_ERROR, body.status) + self.assertIn("Request parameter 'postData' is mandatory in 'request.post' command", body.message) + + def test_v1_endpoint_request_post_deprecated_param(self): + res = self.app.post_json('/v1', { + "cmd": "request.post", + "url": self.google_url, + "postData": "param1=value1¶m2=value2", + "userAgent": "Test User-Agent" # was removed in v2, not used + }) + self.assertEqual(res.status_code, 200) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_OK, body.status) + self.assertEqual("", body.message) + + # todo: test Cmd 'sessions.create' should return OK + # todo: test Cmd 'sessions.create' should return OK with session + # todo: test Cmd 'sessions.list' should return OK + # todo: test Cmd 'sessions.destroy' should return OK + # todo: test Cmd 'sessions.destroy' should fail + # todo: test Cmd 'request.get' should use session + + +if __name__ == '__main__': + unittest.main() diff --git a/src/utils.py b/src/utils.py new file mode 100644 index 0000000..6dddc67 --- /dev/null +++ b/src/utils.py @@ -0,0 +1,97 @@ +import json +import logging +import os + +from selenium.webdriver.chrome.webdriver import WebDriver +import undetected_chromedriver as uc + +FLARESOLVERR_VERSION = None +CHROME_MAJOR_VERSION = None +USER_AGENT = None + + +def get_config_log_html() -> bool: + return os.environ.get('LOG_HTML', 'false').lower() == 'true' + + +def get_flaresolverr_version() -> str: + global FLARESOLVERR_VERSION + if FLARESOLVERR_VERSION is not None: + return FLARESOLVERR_VERSION + + package_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'package.json') + with open(package_path) as f: + FLARESOLVERR_VERSION = json.loads(f.read())['version'] + return FLARESOLVERR_VERSION + + +def get_webdriver() -> WebDriver: + logging.debug('Launching web browser...') + + # undetected_chromedriver + options = uc.ChromeOptions() + options.add_argument('--no-sandbox') + options.add_argument('--window-size=1920,1080') + # todo: this param shows a warning in chrome headfull + options.add_argument('--disable-setuid-sandbox') + options.add_argument('--disable-dev-shm-usage') + # note: headless mode is detected + # options.headless = True + + # if we are inside the Docker container, we avoid downloading the driver + driver_exe_path = None + version_main = None + if os.path.exists("/app/chromedriver"): + driver_exe_path = "/app/chromedriver" + else: + version_main = get_chrome_major_version() + + # downloads and patches the chromedriver + # todo: if we don't set driver_executable_path it downloads, patches, and deletes the driver each time + driver = uc.Chrome(options=options, driver_executable_path=driver_exe_path, version_main=version_main) + + # selenium vanilla + # options = webdriver.ChromeOptions() + # options.add_argument('--no-sandbox') + # options.add_argument('--window-size=1920,1080') + # options.add_argument('--disable-setuid-sandbox') + # options.add_argument('--disable-dev-shm-usage') + # driver = webdriver.Chrome(options=options) + + return driver + + +def get_chrome_major_version() -> str: + global CHROME_MAJOR_VERSION + if CHROME_MAJOR_VERSION is not None: + return CHROME_MAJOR_VERSION + + chrome_path = uc.find_chrome_executable() + # Example 1: 'Chromium 104.0.5112.79 Arch Linux\n' + # Example 2: 'Google Chrome 104.0.5112.79 Arch Linux\n' + process = os.popen(f"{chrome_path} --version") + complete_version = process.read() + process.close() + CHROME_MAJOR_VERSION = complete_version.split('.')[0].split(' ')[-1] + return CHROME_MAJOR_VERSION + + +def get_user_agent(driver=None) -> str: + global USER_AGENT + if USER_AGENT is not None: + return USER_AGENT + + try: + if driver is None: + driver = get_webdriver() + USER_AGENT = driver.execute_script("return navigator.userAgent") + return USER_AGENT + except Exception as e: + raise Exception("Error getting browser User-Agent. " + str(e)) + finally: + if driver is not None: + driver.quit() + + +def object_to_dict(_object): + return json.loads(json.dumps(_object, default=lambda o: o.__dict__)) diff --git a/test-requirements.txt b/test-requirements.txt new file mode 100644 index 0000000..aeb254e --- /dev/null +++ b/test-requirements.txt @@ -0,0 +1 @@ +WebTest==3.0.0