diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..03e048c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +bottle==0.12.23 +waitress==2.1.2 +selenium==4.4.3 +undetected-chromedriver==3.1.5.post4 +func-timeout==4.3.5 diff --git a/src/bottle_plugins/__init__.py b/src/bottle_plugins/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/bottle_plugins/error_plugin.py b/src/bottle_plugins/error_plugin.py new file mode 100644 index 0000000..4d99508 --- /dev/null +++ b/src/bottle_plugins/error_plugin.py @@ -0,0 +1,22 @@ +from bottle import response +import logging + + +def error_plugin(callback): + """ + Bottle plugin to handle exceptions + https://stackoverflow.com/a/32764250 + """ + + def wrapper(*args, **kwargs): + try: + actual_response = callback(*args, **kwargs) + except Exception as e: + logging.error(str(e)) + actual_response = { + "error": str(e) + } + response.status = 500 + return actual_response + + return wrapper diff --git a/src/bottle_plugins/logger_plugin.py b/src/bottle_plugins/logger_plugin.py new file mode 100644 index 0000000..9005754 --- /dev/null +++ b/src/bottle_plugins/logger_plugin.py @@ -0,0 +1,23 @@ +from bottle import request, response +import logging + + +def logger_plugin(callback): + """ + Bottle plugin to use logging module + http://bottlepy.org/docs/dev/plugindev.html + + Wrap a Bottle request so that a log line is emitted after it's handled. + (This decorator can be extended to take the desired logger as a param.) + """ + + def wrapper(*args, **kwargs): + actual_response = callback(*args, **kwargs) + if not request.url.endswith("/health"): + logging.info('%s %s %s %s' % (request.remote_addr, + request.method, + request.url, + response.status)) + return actual_response + + return wrapper diff --git a/src/dtos.py b/src/dtos.py new file mode 100644 index 0000000..87fd18c --- /dev/null +++ b/src/dtos.py @@ -0,0 +1,83 @@ + +STATUS_OK = "ok" +STATUS_ERROR = "error" + + +class ChallengeResolutionResultT: + url: str = None + status: int = None + headers: list = None + response: str = None + cookies: list = None + userAgent: str = None + + def __init__(self, _dict): + self.__dict__.update(_dict) + + +class ChallengeResolutionT: + status: str = None + message: str = None + result: ChallengeResolutionResultT = None + + def __init__(self, _dict): + self.__dict__.update(_dict) + if self.result is not None: + self.result = ChallengeResolutionResultT(self.result) + + +class V1RequestBase(object): + # V1RequestBase + cmd: str = None + cookies: list = None + maxTimeout: int = None + proxy: dict = None + session: str = None + headers: list = None # deprecated v2.0.0, not used + userAgent: str = None # deprecated v2.0.0, not used + + # V1Request + url: str = None + postData: str = None + returnOnlyCookies: bool = None + download: bool = None # deprecated v2.0.0, not used + returnRawHtml: bool = None # deprecated v2.0.0, not used + + def __init__(self, _dict): + self.__dict__.update(_dict) + + +class V1ResponseBase(object): + # V1ResponseBase + status: str = None + message: str = None + startTimestamp: int = None + endTimestamp: int = None + version: str = None + + # V1ResponseSolution + solution: ChallengeResolutionResultT = None + + # hidden vars + __error_500__: bool = False + + def __init__(self, _dict): + self.__dict__.update(_dict) + if self.solution is not None: + self.solution = ChallengeResolutionResultT(self.solution) + + +class IndexResponse(object): + msg: str = None + version: str = None + userAgent: str = None + + def __init__(self, _dict): + self.__dict__.update(_dict) + + +class HealthResponse(object): + status: str = None + + def __init__(self, _dict): + self.__dict__.update(_dict) diff --git a/src/flaresolverr.py b/src/flaresolverr.py new file mode 100644 index 0000000..b8775d4 --- /dev/null +++ b/src/flaresolverr.py @@ -0,0 +1,91 @@ +import json +import logging +import os +import sys + +from bottle import run, response, Bottle, request + +from bottle_plugins.error_plugin import error_plugin +from bottle_plugins.logger_plugin import logger_plugin +from dtos import IndexResponse, V1RequestBase +import flaresolverr_service +import utils + + +class JSONErrorBottle(Bottle): + """ + Handle 404 errors + """ + def default_error_handler(self, res): + response.content_type = 'application/json' + return json.dumps(dict(error=res.body, status_code=res.status_code)) + + +app = JSONErrorBottle() + +# plugin order is important +app.install(logger_plugin) +app.install(error_plugin) + + +@app.route('/') +def index(): + """ + Show welcome message + """ + res = flaresolverr_service.index_endpoint() + return utils.object_to_dict(res) + + +@app.route('/health') +def health(): + """ + Healthcheck endpoint. + This endpoint is special because it doesn't print traces + """ + res = flaresolverr_service.health_endpoint() + return utils.object_to_dict(res) + + +@app.post('/v1') +def controller_v1(): + """ + Controller v1 + """ + req = V1RequestBase(request.json) + res = flaresolverr_service.controller_v1_endpoint(req) + if res.__error_500__: + response.status = 500 + return utils.object_to_dict(res) + + +if __name__ == "__main__": + # validate configuration + log_level = os.environ.get('LOG_LEVEL', 'info').upper() + log_html = utils.get_config_log_html() + server_host = os.environ.get('HOST', '0.0.0.0') + server_port = int(os.environ.get('PORT', 8191)) + + # configure logger + logging.basicConfig( + format='%(asctime)s %(levelname)-8s ReqId %(thread)s %(message)s', + level=log_level, + datefmt='%Y-%m-%d %H:%M:%S', + handlers=[ + logging.StreamHandler(sys.stdout) + ] + ) + # disable warning traces from urllib3 + logging.getLogger('urllib3').setLevel(logging.ERROR) + logging.getLogger('selenium.webdriver.remote.remote_connection').setLevel(logging.WARNING) + logging.getLogger('undetected_chromedriver').setLevel(logging.WARNING) + + logging.info(f'FlareSolverr {utils.get_flaresolverr_version()}') + logging.debug('Debug log enabled') + + # test browser installation + flaresolverr_service.test_browser_installation() + + # start webserver + # default server 'wsgiref' does not support concurrent requests + run(app, host=server_host, port=server_port, quiet=True, server='waitress') diff --git a/src/flaresolverr_service.py b/src/flaresolverr_service.py new file mode 100644 index 0000000..b3a0d2a --- /dev/null +++ b/src/flaresolverr_service.py @@ -0,0 +1,247 @@ +import logging +import time +from urllib.parse import unquote + +from func_timeout import func_timeout, FunctionTimedOut +from selenium.common import TimeoutException +from selenium.webdriver.chrome.webdriver import WebDriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support.wait import WebDriverWait +from selenium.webdriver.support.expected_conditions import presence_of_element_located, staleness_of + +from dtos import V1RequestBase, V1ResponseBase, ChallengeResolutionT, ChallengeResolutionResultT, IndexResponse, \ + HealthResponse, STATUS_OK, STATUS_ERROR +import utils + + +CHALLENGE_SELECTORS = [ + # Cloudflare + '#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#trk_jschal_js', + # DDoS-GUARD + '#link-ddg', + # Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands + 'td.info #js_info' +] +SHORT_TIMEOUT = 5 + + +def test_browser_installation(): + logging.info("Testing web browser installation...") + user_agent = utils.get_user_agent() + logging.info("FlareSolverr User-Agent: " + user_agent) + logging.info("Test successful") + + +def index_endpoint() -> IndexResponse: + res = IndexResponse({}) + res.msg = "FlareSolverr is ready!" + res.version = utils.get_flaresolverr_version() + res.userAgent = utils.get_user_agent() + return res + + +def health_endpoint() -> HealthResponse: + res = HealthResponse({}) + res.status = STATUS_OK + return res + + +def controller_v1_endpoint(req: V1RequestBase) -> V1ResponseBase: + start_ts = int(time.time() * 1000) + logging.info(f"Incoming request => POST /v1 body: {utils.object_to_dict(req)}") + res: V1ResponseBase + try: + res = _controller_v1_handler(req) + except Exception as e: + res = V1ResponseBase({}) + res.__error_500__ = True + res.status = STATUS_ERROR + res.message = "Error: " + str(e) + logging.error(res.message) + + res.startTimestamp = start_ts + res.endTimestamp = int(time.time() * 1000) + res.version = utils.get_flaresolverr_version() + logging.debug(f"Response => POST /v1 body: {utils.object_to_dict(res.solution)}") + logging.info(f"Response in {(res.endTimestamp - res.startTimestamp) / 1000} s") + return res + + +def _controller_v1_handler(req: V1RequestBase) -> V1ResponseBase: + # do some validations + if req.cmd is None: + raise Exception("Request parameter 'cmd' is mandatory.") + if req.headers is not None: + logging.warning("Request parameter 'headers' was removed in FlareSolverr v2.") + if req.userAgent is not None: + logging.warning("Request parameter 'userAgent' was removed in FlareSolverr v2.") + + # set default values + if req.maxTimeout is None or req.maxTimeout < 1: + req.maxTimeout = 60000 + + # execute the command + res: V1ResponseBase + if req.cmd == 'sessions.create': + raise Exception("Not implemented yet.") + elif req.cmd == 'sessions.list': + raise Exception("Not implemented yet.") + elif req.cmd == 'sessions.destroy': + raise Exception("Not implemented yet.") + elif req.cmd == 'request.get': + res = _cmd_request_get(req) + elif req.cmd == 'request.post': + res = _cmd_request_post(req) + else: + raise Exception(f"Request parameter 'cmd' = '{req.cmd}' is invalid.") + + return res + + +def _cmd_request_get(req: V1RequestBase) -> V1ResponseBase: + # do some validations + if req.url is None: + raise Exception("Request parameter 'url' is mandatory in 'request.get' command.") + if req.postData is not None: + raise Exception("Cannot use 'postBody' when sending a GET request.") + if req.returnRawHtml is not None: + logging.warning("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.") + if req.download is not None: + logging.warning("Request parameter 'download' was removed in FlareSolverr v2.") + + challenge_res = _resolve_challenge(req, 'GET') + res = V1ResponseBase({}) + res.status = challenge_res.status + res.message = challenge_res.message + res.solution = challenge_res.result + return res + + +def _cmd_request_post(req: V1RequestBase) -> V1ResponseBase: + # do some validations + if req.postData is None: + raise Exception("Request parameter 'postData' is mandatory in 'request.post' command.") + if req.returnRawHtml is not None: + logging.warning("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.") + if req.download is not None: + logging.warning("Request parameter 'download' was removed in FlareSolverr v2.") + + challenge_res = _resolve_challenge(req, 'POST') + res = V1ResponseBase({}) + res.status = challenge_res.status + res.message = challenge_res.message + res.solution = challenge_res.result + return res + + +def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT: + timeout = req.maxTimeout / 1000 + driver = None + try: + driver = utils.get_webdriver() + return func_timeout(timeout, _evil_logic, (req, driver, method)) + except FunctionTimedOut: + raise Exception(f'Error solving the challenge. Timeout after {timeout} seconds.') + except Exception as e: + raise Exception('Error solving the challenge. ' + str(e)) + finally: + if driver is not None: + driver.quit() + + +def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> ChallengeResolutionT: + res = ChallengeResolutionT({}) + res.status = STATUS_OK + res.message = "" + + # navigate to the page + logging.debug(f'Navigating to... {req.url}') + if method == 'POST': + _post_request(req, driver) + else: + driver.get(req.url) + if utils.get_config_log_html(): + logging.debug(f"Response HTML:\n{driver.page_source}") + + # find challenge selectors + html_element = driver.find_element(By.TAG_NAME, "html") + challenge_found = False + for selector in CHALLENGE_SELECTORS: + found_elements = driver.find_elements(By.CSS_SELECTOR, selector) + if len(found_elements) > 0: + challenge_found = True + logging.info("Challenge detected. Selector found: " + selector) + break + + if challenge_found: + while True: + try: + # then wait until all the selectors disappear + for selector in CHALLENGE_SELECTORS: + logging.debug("Waiting for selector: " + selector) + WebDriverWait(driver, SHORT_TIMEOUT).until_not( + presence_of_element_located((By.CSS_SELECTOR, selector))) + + # all elements not found + break + + except TimeoutException: + logging.debug("Timeout waiting for selector") + # update the html (cloudflare reloads the page every 5 s) + html_element = driver.find_element(By.TAG_NAME, "html") + + # waits until cloudflare redirection ends + logging.debug("Waiting for redirect") + # noinspection PyBroadException + try: + WebDriverWait(driver, SHORT_TIMEOUT).until(staleness_of(html_element)) + except Exception: + logging.debug("Timeout waiting for redirect") + + logging.info("Challenge solved!") + else: + logging.info("Challenge not detected!") + + challenge_res = ChallengeResolutionResultT({}) + challenge_res.url = driver.current_url + challenge_res.status = 200 # todo: fix, selenium not provides this info + challenge_res.cookies = driver.get_cookies() + + if not req.returnOnlyCookies: + challenge_res.headers = {} # todo: fix, selenium not provides this info + challenge_res.response = driver.page_source + challenge_res.userAgent = utils.get_user_agent(driver) + + res.result = challenge_res + return res + + +def _post_request(req: V1RequestBase, driver: WebDriver): + post_form = f'
' + html_content = f""" + + + + {post_form} + + + """ + driver.get("data:text/html;charset=utf-8," + html_content) diff --git a/src/tests.py b/src/tests.py new file mode 100644 index 0000000..071e4dc --- /dev/null +++ b/src/tests.py @@ -0,0 +1,360 @@ +import unittest +from datetime import datetime, timezone + +from webtest import TestApp + +from dtos import IndexResponse, HealthResponse, V1ResponseBase, STATUS_OK, STATUS_ERROR +import flaresolverr +import utils + + +def _find_obj_by_key(key: str, value: str, _list: list) -> dict | None: + for obj in _list: + if obj[key] == value: + return obj + return None + + +class TestFlareSolverr(unittest.TestCase): + + proxy_url = "http://127.0.0.1:8888" + proxy_socks_url = "socks5://127.0.0.1:1080" + google_url = "https://www.google.com" + post_url = "https://ptsv2.com/t/qv4j3-1634496523" + cloudflare_url = "https://nowsecure.nl" + cloudflare_url_2 = "https://idope.se/torrent-list/harry/" + ddos_guard_url = "https://anidex.info/" + custom_cloudflare_url = "https://www.muziekfabriek.org" + + app = TestApp(flaresolverr.app) + + def test_wrong_endpoint(self): + res = self.app.get('/wrong', status=404) + self.assertEqual(res.status_code, 404) + + body = res.json + self.assertEqual("Not found: '/wrong'", body['error']) + self.assertEqual(404, body['status_code']) + + def test_index_endpoint(self): + res = self.app.get('/') + self.assertEqual(res.status_code, 200) + + body = IndexResponse(res.json) + self.assertEqual("FlareSolverr is ready!", body.msg) + self.assertEqual(utils.get_flaresolverr_version(), body.version) + self.assertIn("Chrome/", body.userAgent) + + def test_health_endpoint(self): + res = self.app.get('/health') + self.assertEqual(res.status_code, 200) + + body = HealthResponse(res.json) + self.assertEqual(STATUS_OK, body.status) + + def test_v1_endpoint_wrong_cmd(self): + res = self.app.post_json('/v1', { + "cmd": "request.bad", + "url": self.google_url + }, status=500) + self.assertEqual(res.status_code, 500) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_ERROR, body.status) + self.assertEqual("Error: Request parameter 'cmd' = 'request.bad' is invalid.", body.message) + self.assertGreater(body.startTimestamp, 10000) + self.assertGreaterEqual(body.endTimestamp, body.startTimestamp) + self.assertEqual(utils.get_flaresolverr_version(), body.version) + + def test_v1_endpoint_request_get_no_cloudflare(self): + res = self.app.post_json('/v1', { + "cmd": "request.get", + "url": self.google_url + }) + self.assertEqual(res.status_code, 200) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_OK, body.status) + self.assertEqual("", body.message) + self.assertGreater(body.startTimestamp, 10000) + self.assertGreaterEqual(body.endTimestamp, body.startTimestamp) + self.assertEqual(utils.get_flaresolverr_version(), body.version) + + solution = body.solution + self.assertIn(self.google_url, solution.url) + self.assertEqual(solution.status, 200) + self.assertIs(len(solution.headers), 0) + self.assertIn("