From 49fd1aacfc55548983bf1eed3b533a00a4b27eb0 Mon Sep 17 00:00:00 2001 From: Artemiy Ryabinkov Date: Mon, 20 Mar 2023 15:25:48 +0000 Subject: [PATCH] Sessions with auto-creation on fetch request and TTL (#736) * Add support for sessions * Add tests for sessions * Missing return type * Don't re-create an existing session * Return success in case of session doesn't exists on destroy * Create session if necessary on get request * Add session TTL to the request.get method When fetching some webpage with a predefined session id, FlareSorverr is using existing instance of WebDriver. That allows user to not manage cookies explicitly and rely on WebDriver to maintain the session. However, if session has been created long time ago, CloudFlare might stop accepting the requests, so we want to recreate the session time to time. From the user perspective the easiest way of doing it is to define their expectation on the session duration. These changes add an option to define Time-to-live (TTL) for the session and FlareSorverr takes care about rotating the sessions. * Update message for session destroy in tests --------- Co-authored-by: Michel Roux --- src/dtos.py | 1 + src/flaresolverr_service.py | 85 ++++++++++++++++++++++++++++---- src/sessions.py | 82 +++++++++++++++++++++++++++++++ src/tests.py | 96 ++++++++++++++++++++++++++++++++----- 4 files changed, 243 insertions(+), 21 deletions(-) create mode 100644 src/sessions.py diff --git a/src/dtos.py b/src/dtos.py index 87fd18c..bf4414b 100644 --- a/src/dtos.py +++ b/src/dtos.py @@ -33,6 +33,7 @@ class V1RequestBase(object): maxTimeout: int = None proxy: dict = None session: str = None + session_ttl_minutes: int = None headers: list = None # deprecated v2.0.0, not used userAgent: str = None # deprecated v2.0.0, not used diff --git a/src/flaresolverr_service.py b/src/flaresolverr_service.py index a9f9862..c34ff02 100644 --- a/src/flaresolverr_service.py +++ b/src/flaresolverr_service.py @@ -2,19 +2,25 @@ import logging import platform import sys import time +from datetime import timedelta from urllib.parse import unquote +from uuid import uuid1 -from func_timeout import func_timeout, FunctionTimedOut +from func_timeout import FunctionTimedOut, func_timeout from selenium.common import TimeoutException from selenium.webdriver.chrome.webdriver import WebDriver +from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.by import By +from selenium.webdriver.support.expected_conditions import ( + presence_of_element_located, staleness_of, title_is) from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.support.wait import WebDriverWait -from selenium.webdriver.support.expected_conditions import presence_of_element_located, staleness_of, title_is -from dtos import V1RequestBase, V1ResponseBase, ChallengeResolutionT, ChallengeResolutionResultT, IndexResponse, \ - HealthResponse, STATUS_OK, STATUS_ERROR import utils +from dtos import (STATUS_ERROR, STATUS_OK, ChallengeResolutionResultT, + ChallengeResolutionT, HealthResponse, IndexResponse, + V1RequestBase, V1ResponseBase) +from sessions import SessionsStorage ACCESS_DENIED_TITLES = [ # Cloudflare @@ -44,6 +50,7 @@ CHALLENGE_SELECTORS = [ ] SHORT_TIMEOUT = 10 +SESSIONS_STORAGE = SessionsStorage() def test_browser_installation(): logging.info("Testing web browser installation...") @@ -120,11 +127,11 @@ def _controller_v1_handler(req: V1RequestBase) -> V1ResponseBase: # execute the command res: V1ResponseBase if req.cmd == 'sessions.create': - raise Exception("Not implemented yet.") + res = _cmd_sessions_create(req) elif req.cmd == 'sessions.list': - raise Exception("Not implemented yet.") + res = _cmd_sessions_list(req) elif req.cmd == 'sessions.destroy': - raise Exception("Not implemented yet.") + res = _cmd_sessions_destroy(req) elif req.cmd == 'request.get': res = _cmd_request_get(req) elif req.cmd == 'request.post': @@ -171,19 +178,79 @@ def _cmd_request_post(req: V1RequestBase) -> V1ResponseBase: return res +def _cmd_sessions_create(req: V1RequestBase) -> V1ResponseBase: + logging.debug("Creating new session...") + + session, fresh = SESSIONS_STORAGE.create() + session_id = session.session_id + + if not fresh: + return V1ResponseBase({ + "status": STATUS_OK, + "message": "Session already exists.", + "session": session_id + }) + + return V1ResponseBase({ + "status": STATUS_OK, + "message": "Session created successfully.", + "session": session_id + }) + + +def _cmd_sessions_list(req: V1RequestBase) -> V1ResponseBase: + session_ids = SESSIONS_STORAGE.session_ids() + + return V1ResponseBase({ + "status": STATUS_OK, + "message": "", + "sessions": session_ids + }) + + +def _cmd_sessions_destroy(req: V1RequestBase) -> V1ResponseBase: + session_id = req.session + existed = SESSIONS_STORAGE.destroy(session_id) + + if not existed: + return V1ResponseBase({ + "status": STATUS_OK, + "message": "The session doesn't exists." + }) + + return V1ResponseBase({ + "status": STATUS_OK, + "message": "The session has been removed." + }) + + def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT: timeout = req.maxTimeout / 1000 driver = None try: - driver = utils.get_webdriver() + if req.session: + session_id = req.session + ttl = timedelta(minutes=req.session_ttl_minutes) if req.session_ttl_minutes else None + session, fresh = SESSIONS_STORAGE.get(session_id, ttl) + + if fresh: + logging.debug(f"new session created to perform the request (session_id={session_id})") + else: + logging.debug(f"existing session is used to perform the request (session_id={session_id}, lifetime={str(session.lifetime())}, ttl={str(ttl)})") + + driver = session.driver + else: + driver = utils.get_webdriver() + logging.debug('New instance of webdriver has been created to perform the request') return func_timeout(timeout, _evil_logic, (req, driver, method)) except FunctionTimedOut: raise Exception(f'Error solving the challenge. Timeout after {timeout} seconds.') except Exception as e: raise Exception('Error solving the challenge. ' + str(e)) finally: - if driver is not None: + if not req.session and driver is not None: driver.quit() + logging.debug('A used instance of webdriver has been destroyed') def click_verify(driver: WebDriver): diff --git a/src/sessions.py b/src/sessions.py new file mode 100644 index 0000000..1fb434d --- /dev/null +++ b/src/sessions.py @@ -0,0 +1,82 @@ +import logging +from dataclasses import dataclass +from datetime import datetime, timedelta +from typing import Optional, Tuple +from uuid import uuid1 + +from selenium.webdriver.chrome.webdriver import WebDriver + +import utils + + +@dataclass +class Session: + session_id: str + driver: WebDriver + created_at: datetime + + def lifetime(self) -> timedelta: + return datetime.now() - self.created_at + +class SessionsStorage: + """SessionsStorage creates, stores and process all the sessions""" + + def __init__(self): + self.sessions = {} + + def create(self, session_id: Optional[str] = None, force_new: Optional[bool] = False) -> Tuple[Session, bool]: + """create creates new instance of WebDriver if neccessary, + assign defined (or newly generated) session_id to the instance + and returns the session object. If a new session has been created + second argument is set to True. + + Note: The function is idemponent, so in case if session_id + already exists in the storage a new instance of WebDriver won't be created + and existing session will be returned. Second argument defines if + new session has been created (True) or an existing one was used (False). + """ + session_id = session_id or str(uuid1()) + + if force_new: + self.destroy(session_id) + + if self.exists(session_id): + return self.sessions[session_id], False + + driver = utils.get_webdriver() + created_at = datetime.now() + session = Session(session_id, driver, created_at) + + self.sessions[session_id] = session + + return session, True + + def exists(self, session_id: str) -> bool: + return session_id in self.sessions + + def destroy(self, session_id: str) -> bool: + """destroy closes the driver instance and removes session from the storage. + The function is noop if session_id doesn't exist. + The function returns True if session was found and destroyed, + and False if session_id wasn't found. + """ + if not self.exists(session_id): + return False + + session = self.sessions.pop(session_id) + session.driver.quit() + return True + + def get(self, session_id: str, ttl: Optional[timedelta] = None) -> Tuple[Session, bool]: + session, fresh = self.create(session_id) + + if ttl != None and not fresh and session.lifetime() > ttl: + logging.debug(f'session\'s lifetime has expired, so the session is recreated (session_id={session_id})') + session, fresh = self.create(session_id, force_new = True) + + return session, fresh + + + def session_ids(self) -> list[str]: + return list(self.sessions.keys()) + diff --git a/src/tests.py b/src/tests.py index ed03ca0..bda43a0 100644 --- a/src/tests.py +++ b/src/tests.py @@ -64,7 +64,7 @@ class TestFlareSolverr(unittest.TestCase): self.assertEqual("Error: Request parameter 'cmd' = 'request.bad' is invalid.", body.message) self.assertGreater(body.startTimestamp, 10000) self.assertGreaterEqual(body.endTimestamp, body.startTimestamp) - self.assertEqual(utils.get_flaresolverr_version(), body.version) + self.assertEqual(utils.get_flaresolverr_version(), body.version) def test_v1_endpoint_request_get_no_cloudflare(self): res = self.app.post_json('/v1', { @@ -78,7 +78,7 @@ class TestFlareSolverr(unittest.TestCase): self.assertEqual("Challenge not detected!", body.message) self.assertGreater(body.startTimestamp, 10000) self.assertGreaterEqual(body.endTimestamp, body.startTimestamp) - self.assertEqual(utils.get_flaresolverr_version(), body.version) + self.assertEqual(utils.get_flaresolverr_version(), body.version) solution = body.solution self.assertIn(self.google_url, solution.url) @@ -100,7 +100,7 @@ class TestFlareSolverr(unittest.TestCase): self.assertEqual("Challenge solved!", body.message) self.assertGreater(body.startTimestamp, 10000) self.assertGreaterEqual(body.endTimestamp, body.startTimestamp) - self.assertEqual(utils.get_flaresolverr_version(), body.version) + self.assertEqual(utils.get_flaresolverr_version(), body.version) solution = body.solution self.assertIn(self.cloudflare_url, solution.url) @@ -126,7 +126,7 @@ class TestFlareSolverr(unittest.TestCase): self.assertEqual("Challenge solved!", body.message) self.assertGreater(body.startTimestamp, 10000) self.assertGreaterEqual(body.endTimestamp, body.startTimestamp) - self.assertEqual(utils.get_flaresolverr_version(), body.version) + self.assertEqual(utils.get_flaresolverr_version(), body.version) solution = body.solution self.assertIn(self.cloudflare_url_2, solution.url) @@ -152,7 +152,7 @@ class TestFlareSolverr(unittest.TestCase): self.assertEqual("Challenge solved!", body.message) self.assertGreater(body.startTimestamp, 10000) self.assertGreaterEqual(body.endTimestamp, body.startTimestamp) - self.assertEqual(utils.get_flaresolverr_version(), body.version) + self.assertEqual(utils.get_flaresolverr_version(), body.version) solution = body.solution self.assertIn(self.ddos_guard_url, solution.url) @@ -178,7 +178,7 @@ class TestFlareSolverr(unittest.TestCase): self.assertEqual("Challenge solved!", body.message) self.assertGreater(body.startTimestamp, 10000) self.assertGreaterEqual(body.endTimestamp, body.startTimestamp) - self.assertEqual(utils.get_flaresolverr_version(), body.version) + self.assertEqual(utils.get_flaresolverr_version(), body.version) solution = body.solution self.assertIn(self.custom_cloudflare_url, solution.url) @@ -351,13 +351,85 @@ class TestFlareSolverr(unittest.TestCase): self.assertEqual(STATUS_OK, body.status) self.assertEqual("Challenge not detected!", body.message) - # todo: test Cmd 'sessions.create' should return OK - # todo: test Cmd 'sessions.create' should return OK with session - # todo: test Cmd 'sessions.list' should return OK - # todo: test Cmd 'sessions.destroy' should return OK - # todo: test Cmd 'sessions.destroy' should fail - # todo: test Cmd 'request.get' should use session + def test_v1_endpoint_sessions_create_without_session(self): + res = self.app.post_json('/v1', { + "cmd": "sessions.create" + }) + self.assertEqual(res.status_code, 200) + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_OK, body.status) + self.assertEqual("Session created successfully.", body.message) + self.assertIsNotNone(body.session) + + def test_v1_endpoint_sessions_create_with_session(self): + res = self.app.post_json('/v1', { + "cmd": "sessions.create", + "session": "test_create_session" + }) + self.assertEqual(res.status_code, 200) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_OK, body.status) + self.assertEqual("Session created successfully.", body.message) + self.assertEqual(body.session, "test_create_session") + + def test_v1_endpoint_sessions_list(self): + self.app.post_json('/v1', { + "cmd": "sessions.create", + "session": "test_list_sessions" + }) + res = self.app.post_json('/v1', { + "cmd": "sessions.list" + }) + self.assertEqual(res.status_code, 200) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_OK, body.status) + self.assertEqual("", body.message) + self.assertGreaterEqual(len(body.sessions), 1) + self.assertIn("test_list_sessions", body.sessions) + + def test_v1_endpoint_sessions_destroy_existing_session(self): + self.app.post_json('/v1', { + "cmd": "sessions.create", + "session": "test_destroy_sessions" + }) + res = self.app.post_json('/v1', { + "cmd": "sessions.destroy", + "session": "test_destroy_sessions" + }) + self.assertEqual(res.status_code, 200) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_OK, body.status) + self.assertEqual("The session has been removed.", body.message) + + def test_v1_endpoint_sessions_destroy_non_existing_session(self): + res = self.app.post_json('/v1', { + "cmd": "sessions.destroy", + "session": "non_existing_session_name" + }, status=500) + self.assertEqual(res.status_code, 200) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_ERROR, body.status) + self.assertEqual("The session doesn't exists.", body.message) + + def test_v1_endpoint_request_get_with_session(self): + self.app.post_json('/v1', { + "cmd": "sessions.create", + "session": "test_request_sessions" + }) + res = self.app.post_json('/v1', { + "cmd": "request.get", + "session": "test_request_sessions", + "url": self.google_url + }) + self.assertEqual(res.status_code, 200) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_OK, body.status) if __name__ == '__main__': unittest.main()