From e3b4200d94521c243056b6fbefa7fe63540bb476 Mon Sep 17 00:00:00 2001 From: ngosang Date: Sun, 8 Jan 2023 20:27:58 +0100 Subject: [PATCH] Detect Cloudflare blocked access --- CHANGELOG.md | 4 ++++ src/flaresolverr_service.py | 23 +++++++++++++++++------ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8422f5e..9a3aaf5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## v3.0.2 (2023/01/08) + +* Detect Cloudflare blocked access + ## v3.0.1 (2023/01/06) * Kill Chromium processes properly to avoid defunct/zombie processes diff --git a/src/flaresolverr_service.py b/src/flaresolverr_service.py index cb225a2..2acc17c 100644 --- a/src/flaresolverr_service.py +++ b/src/flaresolverr_service.py @@ -13,13 +13,19 @@ from dtos import V1RequestBase, V1ResponseBase, ChallengeResolutionT, ChallengeR HealthResponse, STATUS_OK, STATUS_ERROR import utils +ACCESS_DENIED_TITLES = [ + # Cloudflare + 'Access denied', + # Cloudflare http://bitturk.net/ Firefox + 'Attention Required! | Cloudflare' +] ACCESS_DENIED_SELECTORS = [ # Cloudflare - 'div.cf-error-title span.cf-code-label span' + 'div.cf-error-title span.cf-code-label span', # Cloudflare http://bitturk.net/ Firefox '#cf-error-details div.cf-error-overview h1' ] -CHALLENGE_TITLE = [ +CHALLENGE_TITLES = [ # Cloudflare 'Just a moment...', # DDoS-GUARD @@ -174,7 +180,13 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge # wait for the page html_element = driver.find_element(By.TAG_NAME, "html") + page_title = driver.title + # find access denied titles + for title in ACCESS_DENIED_TITLES: + if title == page_title: + raise Exception('Cloudflare has blocked this request. ' + 'Probably your IP is banned for this site, check in your web browser.') # find access denied selectors for selector in ACCESS_DENIED_SELECTORS: found_elements = driver.find_elements(By.CSS_SELECTOR, selector) @@ -184,8 +196,7 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge # find challenge by title challenge_found = False - page_title = driver.title - for title in CHALLENGE_TITLE: + for title in CHALLENGE_TITLES: if title == page_title: challenge_found = True logging.info("Challenge detected. Title found: " + title) @@ -202,8 +213,8 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge if challenge_found: while True: try: - # wait until the title change - for title in CHALLENGE_TITLE: + # wait until the title changes + for title in CHALLENGE_TITLES: logging.debug("Waiting for title: " + title) WebDriverWait(driver, SHORT_TIMEOUT).until_not(title_is(title))