From 30ccf18e855aba1b014b72303dbd66874d739411 Mon Sep 17 00:00:00 2001 From: ngosang Date: Mon, 20 Mar 2023 17:06:16 +0100 Subject: [PATCH] Several fixes in Sessions --- README.md | 17 +++++++++-------- src/dtos.py | 2 ++ src/flaresolverr_service.py | 18 +++++++----------- src/sessions.py | 13 ++++++------- src/tests.py | 5 +++-- 5 files changed, 27 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 04bf364..da301d5 100644 --- a/README.md +++ b/README.md @@ -137,14 +137,15 @@ session. When you no longer need to use a session you should make sure to close #### + `request.get` -| Parameter | Notes | -|-------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| url | Mandatory | -| session | Optional. Will send the request from and existing browser instance. If one is not sent it will create a temporary instance that will be destroyed immediately after the request is completed. | -| maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds. | -| cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format. | -| returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed. | -| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported. (When the `session` parameter is set, the proxy is ignored; a session specific proxy can be set in `sessions.create`.) | +| Parameter | Notes | +|---------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| url | Mandatory | +| session | Optional. Will send the request from and existing browser instance. If one is not sent it will create a temporary instance that will be destroyed immediately after the request is completed. | +| session_ttl_minutes | Optional. FlareSolverr will automatically rotate expired sessions based on the TTL provided in minutes. | +| maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds. | +| cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format. | +| returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed. | +| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported. (When the `session` parameter is set, the proxy is ignored; a session specific proxy can be set in `sessions.create`.) | :warning: If you want to use Cloudflare clearance cookie in your scripts, make sure you use the FlareSolverr User-Agent too. If they don't match you will see the challenge. diff --git a/src/dtos.py b/src/dtos.py index bf4414b..1e9aace 100644 --- a/src/dtos.py +++ b/src/dtos.py @@ -52,6 +52,8 @@ class V1ResponseBase(object): # V1ResponseBase status: str = None message: str = None + session: str = None + sessions: list[str] = None startTimestamp: int = None endTimestamp: int = None version: str = None diff --git a/src/flaresolverr_service.py b/src/flaresolverr_service.py index 3014ff0..dd5306f 100644 --- a/src/flaresolverr_service.py +++ b/src/flaresolverr_service.py @@ -4,12 +4,10 @@ import sys import time from datetime import timedelta from urllib.parse import unquote -from uuid import uuid1 from func_timeout import FunctionTimedOut, func_timeout from selenium.common import TimeoutException from selenium.webdriver.chrome.webdriver import WebDriver -from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.by import By from selenium.webdriver.support.expected_conditions import ( presence_of_element_located, staleness_of, title_is) @@ -49,9 +47,9 @@ CHALLENGE_SELECTORS = [ 'div.vc div.text-box h2' ] SHORT_TIMEOUT = 10 - SESSIONS_STORAGE = SessionsStorage() + def test_browser_installation(): logging.info("Testing web browser installation...") logging.info("Platform: " + platform.platform()) @@ -181,7 +179,7 @@ def _cmd_request_post(req: V1RequestBase) -> V1ResponseBase: def _cmd_sessions_create(req: V1RequestBase) -> V1ResponseBase: logging.debug("Creating new session...") - session, fresh = SESSIONS_STORAGE.create() + session, fresh = SESSIONS_STORAGE.create(session_id=req.session) session_id = session.session_id if not fresh: @@ -213,10 +211,7 @@ def _cmd_sessions_destroy(req: V1RequestBase) -> V1ResponseBase: existed = SESSIONS_STORAGE.destroy(session_id) if not existed: - return V1ResponseBase({ - "status": STATUS_OK, - "message": "The session doesn't exists." - }) + raise Exception("The session doesn't exist.") return V1ResponseBase({ "status": STATUS_OK, @@ -236,7 +231,8 @@ def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT: if fresh: logging.debug(f"new session created to perform the request (session_id={session_id})") else: - logging.debug(f"existing session is used to perform the request (session_id={session_id}, lifetime={str(session.lifetime())}, ttl={str(ttl)})") + logging.debug(f"existing session is used to perform the request (session_id={session_id}, " + f"lifetime={str(session.lifetime())}, ttl={str(ttl)})") driver = session.driver else: @@ -268,9 +264,8 @@ def click_verify(driver: WebDriver): actions.click(checkbox) actions.perform() logging.debug("Cloudflare verify checkbox found and clicked") - except Exception as e: + except Exception: logging.debug("Cloudflare verify checkbox not found on the page") - # print(e) finally: driver.switch_to.default_content() @@ -292,6 +287,7 @@ def click_verify(driver: WebDriver): time.sleep(2) + def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> ChallengeResolutionT: res = ChallengeResolutionT({}) res.status = STATUS_OK diff --git a/src/sessions.py b/src/sessions.py index 1fb434d..4a2f2e5 100644 --- a/src/sessions.py +++ b/src/sessions.py @@ -17,7 +17,8 @@ class Session: def lifetime(self) -> timedelta: return datetime.now() - self.created_at - + + class SessionsStorage: """SessionsStorage creates, stores and process all the sessions""" @@ -25,12 +26,12 @@ class SessionsStorage: self.sessions = {} def create(self, session_id: Optional[str] = None, force_new: Optional[bool] = False) -> Tuple[Session, bool]: - """create creates new instance of WebDriver if neccessary, + """create creates new instance of WebDriver if necessary, assign defined (or newly generated) session_id to the instance and returns the session object. If a new session has been created second argument is set to True. - Note: The function is idemponent, so in case if session_id + Note: The function is idempotent, so in case if session_id already exists in the storage a new instance of WebDriver won't be created and existing session will be returned. Second argument defines if new session has been created (True) or an existing one was used (False). @@ -70,13 +71,11 @@ class SessionsStorage: def get(self, session_id: str, ttl: Optional[timedelta] = None) -> Tuple[Session, bool]: session, fresh = self.create(session_id) - if ttl != None and not fresh and session.lifetime() > ttl: + if ttl is not None and not fresh and session.lifetime() > ttl: logging.debug(f'session\'s lifetime has expired, so the session is recreated (session_id={session_id})') - session, fresh = self.create(session_id, force_new = True) + session, fresh = self.create(session_id, force_new=True) return session, fresh - def session_ids(self) -> list[str]: return list(self.sessions.keys()) - diff --git a/src/tests.py b/src/tests.py index be67489..0f6d974 100644 --- a/src/tests.py +++ b/src/tests.py @@ -437,11 +437,11 @@ class TestFlareSolverr(unittest.TestCase): "cmd": "sessions.destroy", "session": "non_existing_session_name" }, status=500) - self.assertEqual(res.status_code, 200) + self.assertEqual(res.status_code, 500) body = V1ResponseBase(res.json) self.assertEqual(STATUS_ERROR, body.status) - self.assertEqual("The session doesn't exists.", body.message) + self.assertEqual("Error: The session doesn't exist.", body.message) def test_v1_endpoint_request_get_with_session(self): self.app.post_json('/v1', { @@ -458,5 +458,6 @@ class TestFlareSolverr(unittest.TestCase): body = V1ResponseBase(res.json) self.assertEqual(STATUS_OK, body.status) + if __name__ == '__main__': unittest.main()