From 1841e33587ec2d41331f6401818959e2ce569755 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
<161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Mon, 23 Jun 2025 22:13:34 +0000
Subject: [PATCH 1/3] feat: Implement JSON POST support in FlareSolverr via
WebDriver
This commit introduces functionality to send POST requests with
an 'application/json' content type using a Selenium WebDriver.
The core logic involves:
1. Serializing a Python dictionary to a JSON string.
2. Escaping single quotes in the JSON string to allow safe embedding
into a JavaScript single-quoted string literal.
3. Dynamically generating an HTML5 document with embedded JavaScript.
4. The JavaScript uses the `fetch` API to make the POST request with
the correct 'Content-Type': 'application/json' header and the
JSON payload.
5. The HTML page (and thus the script) is loaded into the WebDriver
using a data URI.
6. Status messages (success/error) are displayed in a `div` on the
generated page and also logged to the browser console.
7. The function waits for the operation to complete and returns
the status message from the page.
A demonstration example using `https://httpbin.org/post` is included
to showcase the functionality, including handling of successful requests,
network errors, and HTTP error responses.
This approach is designed for scenarios where direct Python HTTP requests
might be insufficient due to browser-specific behaviors or JavaScript
dependencies on target sites.
---
flaresolverr_post_json.py | 266 ++++++++++++++++++++++++++++++++++++++
1 file changed, 266 insertions(+)
create mode 100644 flaresolverr_post_json.py
diff --git a/flaresolverr_post_json.py b/flaresolverr_post_json.py
new file mode 100644
index 0000000..033d21e
--- /dev/null
+++ b/flaresolverr_post_json.py
@@ -0,0 +1,266 @@
+import json
+from urllib.parse import quote, unquote # unquote is not used in the final solution but often useful
+from html import escape # escape is not used in the final solution but often useful
+
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options as ChromeOptions
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.common.by import By
+import time
+
+# Step 1: Define V1RequestBase class
+class V1RequestBase:
+ """
+ A simple class to mimic the structure of a request object,
+ holding the target URL and JSON data for the POST request.
+ """
+ def __init__(self, url: str, json_data: dict):
+ """
+ Initializes the V1RequestBase object.
+
+ Args:
+ url: The target URL for the POST request.
+ json_data: A Python dictionary containing the data to be sent as JSON.
+ """
+ self.url = url
+ self.json_data = json_data
+
+def solve_post_json(driver: webdriver.Chrome, req: V1RequestBase) -> str:
+ """
+ Simulates an HTTP POST request with Content-Type: application/json
+ by generating an HTML page with JavaScript, loading it into the WebDriver,
+ and using the browser's fetch API.
+
+ Args:
+ driver: An instance of Selenium WebDriver (e.g., Chrome).
+ req: An object with 'url' and 'json_data' attributes.
+ req.url: The target URL for the POST request.
+ req.json_data: A Python dictionary for the JSON payload.
+
+ Returns:
+ A string indicating the outcome of the request, usually the content
+ of the 'status-message' div from the generated HTML page.
+ """
+ try:
+ # Step 2a: JSON Payload Preparation
+ # Convert Python dictionary to JSON string.
+ json_payload_str_raw = json.dumps(req.json_data)
+
+ # Escape single quotes in the JSON string to safely embed it in JavaScript.
+ # JSON strings use double quotes, so we only need to worry about single quotes
+ # if they appear *within* string values in the JSON.
+ # The critical part is escaping for JavaScript single-quoted string literals.
+ json_payload_str_escaped = json_payload_str_raw.replace("'", "\\'")
+
+ # Step 2b: HTML Document Generation
+ # Construct a minimal HTML5 document with embedded JavaScript.
+ html_content = f"""
+
+
+
+
+
+ POST Request Executor
+
+
+ Executing POST Request...
+ Request initiated...
+
+
+
+
+"""
+ # Step 2c: WebDriver Execution
+ # URL-encode the HTML content for the data URI.
+ # `safe=''` ensures that characters like '/', '?', '&', '=', ':' are also encoded.
+ # However, for data URIs, common characters in HTML are generally fine.
+ # The primary concern is characters that would break the URI structure itself.
+ # Using `quote` without `safe=''` is usually sufficient for `data:text/html`.
+ # Let's be more specific with `safe` if issues arise, but default should be fine.
+ encoded_html_content = quote(html_content)
+ data_uri = f"data:text/html,{encoded_html_content}"
+
+ # Load the data URI into the WebDriver.
+ driver.get(data_uri)
+
+ # Wait for the JavaScript to update the status message.
+ # This indicates that the fetch operation has likely completed (or failed).
+ # We wait for "Success!", "Error:", or "HTTP error!" to appear in the div.
+ # Increased timeout for potentially slow network requests.
+ wait = WebDriverWait(driver, 30) # 30 seconds timeout
+ try:
+ status_element = wait.until(
+ EC.presence_of_element_located((By.ID, "status-message"))
+ )
+
+ # Wait until text contains one of the terminal keywords
+ wait.until(
+ lambda d: "Success!" in status_element.text or \
+ "Error:" in status_element.text or \
+ "HTTP error!" in status_element.text or \
+ "Request initiated..." not in status_element.text # fallback if it never changes from initial
+ )
+
+ # A small explicit sleep to allow final JS updates to the DOM if any race condition.
+ time.sleep(1)
+
+ final_status_text = status_element.text
+ except Exception as e:
+ print(f"Timeout or error waiting for status message: {e}")
+ # Try to get current status message anyway or logs
+ try:
+ final_status_text = driver.find_element(By.ID, "status-message").text
+ except:
+ final_status_text = "Error: Could not retrieve final status message."
+
+ # Retrieve browser console logs for debugging (optional but good for complex cases)
+ try:
+ browser_logs = driver.get_log('browser')
+ if browser_logs:
+ print("Browser Console Logs:")
+ for entry in browser_logs:
+ print(f" [{entry['level']}] {entry['message']}")
+ except Exception as e:
+ print(f"Could not retrieve browser logs: {e} (This might be normal if not supported by driver/config)")
+
+ return final_status_text
+
+ except json.JSONDecodeError as e:
+ error_msg = f"JSON Encoding Error: {e}"
+ print(error_msg)
+ return error_msg
+ except Exception as e:
+ # Catch any other exceptions during the process.
+ error_msg = f"An unexpected error occurred in solve_post_json: {e}"
+ print(error_msg)
+ # It might be useful to also get the current page source for debugging
+ # print("Current Page Source on Error:\n", driver.page_source)
+ return error_msg
+
+if __name__ == '__main__':
+ # Placeholder for example usage, will be filled in later
+ print("V1RequestBase class defined.")
+ # Basic test of V1RequestBase
+ # req_obj = V1RequestBase("http://example.com", {"key": "value"}) # Keep for basic class test if needed
+ # print(f"Request URL: {req_obj.url}, Data: {req_obj.json_data}")
+
+ print("\nsolve_post_json function defined. Running demonstration example...")
+
+ # Step 3: Create Demonstration Example
+ # Setup headless Chrome WebDriver
+ chrome_options = ChromeOptions()
+ chrome_options.add_argument("--headless")
+ chrome_options.add_argument("--no-sandbox") # Necessary for running in some environments (e.g. Docker)
+ chrome_options.add_argument("--disable-dev-shm-usage") # Overcome limited resource problems
+ # Enable browser logging
+ chrome_options.set_capability('goog:loggingPrefs', {'browser': 'ALL'})
+
+
+ driver = None # Initialize driver to None for finally block
+ try:
+ driver = webdriver.Chrome(options=chrome_options)
+
+ # Create a dummy V1RequestBase instance
+ # Using httpbin.org/post which echoes back the POST request data.
+ # This is a great service for testing HTTP requests.
+ request_data = {
+ "name": "FlareSolverr Test",
+ "type": "JSON_POST",
+ "message": "Hello from Selenium!",
+ "details": {
+ "version": 1.0,
+ "nested_key": "value with 'single quote' and \"double quote\""
+ },
+ "items": [1, "two", {"three": 3.0}]
+ }
+ # Test with a URL that might fail to see error handling
+ # req_instance = V1RequestBase(url="https://nonexistent-domain-for-testing123.com/post", json_data=request_data)
+
+ req_instance = V1RequestBase(url="https://httpbin.org/post", json_data=request_data)
+
+ # Call the function
+ print(f"\nSending POST request to: {req_instance.url}")
+ print(f"With JSON data: {req_instance.json_data}")
+
+ result_message = solve_post_json(driver, req_instance)
+
+ # Print the result
+ print("\n--- Result from solve_post_json ---")
+ print(result_message)
+ print("--- End of Result ---")
+
+ # Example of a failing request (e.g. network error)
+ print("\n--- Testing a failing request (network error) ---")
+ failing_req_instance = V1RequestBase(url="https://domain.invalid/post", json_data={"error_test": True})
+ failing_result_message = solve_post_json(driver, failing_req_instance)
+ print("\n--- Result from failing solve_post_json ---")
+ print(failing_result_message)
+ print("--- End of Failing Result ---")
+
+ # Example of a request to a URL that returns a 404
+ print("\n--- Testing a request that results in HTTP 404 ---")
+ notfound_req_instance = V1RequestBase(url="https://httpbin.org/status/404", json_data={"status_test": 404})
+ notfound_result_message = solve_post_json(driver, notfound_req_instance)
+ print("\n--- Result from 404 solve_post_json ---")
+ print(notfound_result_message)
+ print("--- End of 404 Result ---")
+
+
+ except Exception as e:
+ print(f"An error occurred during the demonstration: {e}")
+ finally:
+ if driver:
+ # Quit the WebDriver session.
+ driver.quit()
+ print("\nDemonstration finished and WebDriver closed.")
From 48383a459de1c5e92c1d17c1532154fd889dcc51 Mon Sep 17 00:00:00 2001
From: Wyatt Henke
Date: Mon, 23 Jun 2025 19:30:17 -0400
Subject: [PATCH 2/3] Add support for handling JSON POST requests with embedded
fetch logic in Selenium.
---
flaresolverr_post_json.py | 266 ------------------------------------
src/flaresolverr_service.py | 164 ++++++++++++++++++++++
2 files changed, 164 insertions(+), 266 deletions(-)
delete mode 100644 flaresolverr_post_json.py
diff --git a/flaresolverr_post_json.py b/flaresolverr_post_json.py
deleted file mode 100644
index 033d21e..0000000
--- a/flaresolverr_post_json.py
+++ /dev/null
@@ -1,266 +0,0 @@
-import json
-from urllib.parse import quote, unquote # unquote is not used in the final solution but often useful
-from html import escape # escape is not used in the final solution but often useful
-
-from selenium import webdriver
-from selenium.webdriver.chrome.options import Options as ChromeOptions
-from selenium.webdriver.support.ui import WebDriverWait
-from selenium.webdriver.support import expected_conditions as EC
-from selenium.webdriver.common.by import By
-import time
-
-# Step 1: Define V1RequestBase class
-class V1RequestBase:
- """
- A simple class to mimic the structure of a request object,
- holding the target URL and JSON data for the POST request.
- """
- def __init__(self, url: str, json_data: dict):
- """
- Initializes the V1RequestBase object.
-
- Args:
- url: The target URL for the POST request.
- json_data: A Python dictionary containing the data to be sent as JSON.
- """
- self.url = url
- self.json_data = json_data
-
-def solve_post_json(driver: webdriver.Chrome, req: V1RequestBase) -> str:
- """
- Simulates an HTTP POST request with Content-Type: application/json
- by generating an HTML page with JavaScript, loading it into the WebDriver,
- and using the browser's fetch API.
-
- Args:
- driver: An instance of Selenium WebDriver (e.g., Chrome).
- req: An object with 'url' and 'json_data' attributes.
- req.url: The target URL for the POST request.
- req.json_data: A Python dictionary for the JSON payload.
-
- Returns:
- A string indicating the outcome of the request, usually the content
- of the 'status-message' div from the generated HTML page.
- """
- try:
- # Step 2a: JSON Payload Preparation
- # Convert Python dictionary to JSON string.
- json_payload_str_raw = json.dumps(req.json_data)
-
- # Escape single quotes in the JSON string to safely embed it in JavaScript.
- # JSON strings use double quotes, so we only need to worry about single quotes
- # if they appear *within* string values in the JSON.
- # The critical part is escaping for JavaScript single-quoted string literals.
- json_payload_str_escaped = json_payload_str_raw.replace("'", "\\'")
-
- # Step 2b: HTML Document Generation
- # Construct a minimal HTML5 document with embedded JavaScript.
- html_content = f"""
-
-
-
-
-
- POST Request Executor
-
-
- Executing POST Request...
- Request initiated...
-
-
-
-
-"""
- # Step 2c: WebDriver Execution
- # URL-encode the HTML content for the data URI.
- # `safe=''` ensures that characters like '/', '?', '&', '=', ':' are also encoded.
- # However, for data URIs, common characters in HTML are generally fine.
- # The primary concern is characters that would break the URI structure itself.
- # Using `quote` without `safe=''` is usually sufficient for `data:text/html`.
- # Let's be more specific with `safe` if issues arise, but default should be fine.
- encoded_html_content = quote(html_content)
- data_uri = f"data:text/html,{encoded_html_content}"
-
- # Load the data URI into the WebDriver.
- driver.get(data_uri)
-
- # Wait for the JavaScript to update the status message.
- # This indicates that the fetch operation has likely completed (or failed).
- # We wait for "Success!", "Error:", or "HTTP error!" to appear in the div.
- # Increased timeout for potentially slow network requests.
- wait = WebDriverWait(driver, 30) # 30 seconds timeout
- try:
- status_element = wait.until(
- EC.presence_of_element_located((By.ID, "status-message"))
- )
-
- # Wait until text contains one of the terminal keywords
- wait.until(
- lambda d: "Success!" in status_element.text or \
- "Error:" in status_element.text or \
- "HTTP error!" in status_element.text or \
- "Request initiated..." not in status_element.text # fallback if it never changes from initial
- )
-
- # A small explicit sleep to allow final JS updates to the DOM if any race condition.
- time.sleep(1)
-
- final_status_text = status_element.text
- except Exception as e:
- print(f"Timeout or error waiting for status message: {e}")
- # Try to get current status message anyway or logs
- try:
- final_status_text = driver.find_element(By.ID, "status-message").text
- except:
- final_status_text = "Error: Could not retrieve final status message."
-
- # Retrieve browser console logs for debugging (optional but good for complex cases)
- try:
- browser_logs = driver.get_log('browser')
- if browser_logs:
- print("Browser Console Logs:")
- for entry in browser_logs:
- print(f" [{entry['level']}] {entry['message']}")
- except Exception as e:
- print(f"Could not retrieve browser logs: {e} (This might be normal if not supported by driver/config)")
-
- return final_status_text
-
- except json.JSONDecodeError as e:
- error_msg = f"JSON Encoding Error: {e}"
- print(error_msg)
- return error_msg
- except Exception as e:
- # Catch any other exceptions during the process.
- error_msg = f"An unexpected error occurred in solve_post_json: {e}"
- print(error_msg)
- # It might be useful to also get the current page source for debugging
- # print("Current Page Source on Error:\n", driver.page_source)
- return error_msg
-
-if __name__ == '__main__':
- # Placeholder for example usage, will be filled in later
- print("V1RequestBase class defined.")
- # Basic test of V1RequestBase
- # req_obj = V1RequestBase("http://example.com", {"key": "value"}) # Keep for basic class test if needed
- # print(f"Request URL: {req_obj.url}, Data: {req_obj.json_data}")
-
- print("\nsolve_post_json function defined. Running demonstration example...")
-
- # Step 3: Create Demonstration Example
- # Setup headless Chrome WebDriver
- chrome_options = ChromeOptions()
- chrome_options.add_argument("--headless")
- chrome_options.add_argument("--no-sandbox") # Necessary for running in some environments (e.g. Docker)
- chrome_options.add_argument("--disable-dev-shm-usage") # Overcome limited resource problems
- # Enable browser logging
- chrome_options.set_capability('goog:loggingPrefs', {'browser': 'ALL'})
-
-
- driver = None # Initialize driver to None for finally block
- try:
- driver = webdriver.Chrome(options=chrome_options)
-
- # Create a dummy V1RequestBase instance
- # Using httpbin.org/post which echoes back the POST request data.
- # This is a great service for testing HTTP requests.
- request_data = {
- "name": "FlareSolverr Test",
- "type": "JSON_POST",
- "message": "Hello from Selenium!",
- "details": {
- "version": 1.0,
- "nested_key": "value with 'single quote' and \"double quote\""
- },
- "items": [1, "two", {"three": 3.0}]
- }
- # Test with a URL that might fail to see error handling
- # req_instance = V1RequestBase(url="https://nonexistent-domain-for-testing123.com/post", json_data=request_data)
-
- req_instance = V1RequestBase(url="https://httpbin.org/post", json_data=request_data)
-
- # Call the function
- print(f"\nSending POST request to: {req_instance.url}")
- print(f"With JSON data: {req_instance.json_data}")
-
- result_message = solve_post_json(driver, req_instance)
-
- # Print the result
- print("\n--- Result from solve_post_json ---")
- print(result_message)
- print("--- End of Result ---")
-
- # Example of a failing request (e.g. network error)
- print("\n--- Testing a failing request (network error) ---")
- failing_req_instance = V1RequestBase(url="https://domain.invalid/post", json_data={"error_test": True})
- failing_result_message = solve_post_json(driver, failing_req_instance)
- print("\n--- Result from failing solve_post_json ---")
- print(failing_result_message)
- print("--- End of Failing Result ---")
-
- # Example of a request to a URL that returns a 404
- print("\n--- Testing a request that results in HTTP 404 ---")
- notfound_req_instance = V1RequestBase(url="https://httpbin.org/status/404", json_data={"status_test": 404})
- notfound_result_message = solve_post_json(driver, notfound_req_instance)
- print("\n--- Result from 404 solve_post_json ---")
- print(notfound_result_message)
- print("--- End of 404 Result ---")
-
-
- except Exception as e:
- print(f"An error occurred during the demonstration: {e}")
- finally:
- if driver:
- # Quit the WebDriver session.
- driver.quit()
- print("\nDemonstration finished and WebDriver closed.")
diff --git a/src/flaresolverr_service.py b/src/flaresolverr_service.py
index a469bea..4fad0c7 100644
--- a/src/flaresolverr_service.py
+++ b/src/flaresolverr_service.py
@@ -2,6 +2,7 @@ import logging
import platform
import sys
import time
+import json
from datetime import timedelta
from html import escape
from urllib.parse import unquote, quote
@@ -15,6 +16,7 @@ from selenium.webdriver.support.expected_conditions import (
presence_of_element_located, staleness_of, title_is)
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.wait import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
import utils
from dtos import (STATUS_ERROR, STATUS_OK, ChallengeResolutionResultT,
@@ -136,6 +138,8 @@ def _controller_v1_handler(req: V1RequestBase) -> V1ResponseBase:
res = _cmd_request_get(req)
elif req.cmd == 'request.post':
res = _cmd_request_post(req)
+ elif req.cmd == 'request.postJSON':
+ res = _cmd_request_postJSON(req)
else:
raise Exception(f"Request parameter 'cmd' = '{req.cmd}' is invalid.")
@@ -178,6 +182,23 @@ def _cmd_request_post(req: V1RequestBase) -> V1ResponseBase:
return res
+def _cmd_request_postJSON(req: V1RequestBase) -> V1ResponseBase:
+ # do some validations
+ if req.postData is None:
+ raise Exception("Request parameter 'postData' is mandatory in 'request.post' command.")
+ if req.returnRawHtml is not None:
+ logging.warning("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.")
+ if req.download is not None:
+ logging.warning("Request parameter 'download' was removed in FlareSolverr v2.")
+
+ challenge_res = _resolve_challenge(req, 'POSTJSON')
+ res = V1ResponseBase({})
+ res.status = challenge_res.status
+ res.message = challenge_res.message
+ res.solution = challenge_res.result
+ return res
+
+
def _cmd_sessions_create(req: V1RequestBase) -> V1ResponseBase:
logging.debug("Creating new session...")
@@ -292,6 +313,8 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
logging.debug(f'Navigating to... {req.url}')
if method == 'POST':
_post_request(req, driver)
+ elif method == 'POSTJSON':
+ _post_request_json(req, driver)
else:
driver.get(req.url)
@@ -425,3 +448,144 @@ def _post_request(req: V1RequestBase, driver: WebDriver):