Add support for handling JSON POST requests with embedded fetch logic in Selenium.

2025-07-23 10:30:08 +00:00 · 2025-06-23 19:30:17 -04:00 · 2025-06-23 19:30:17 -04:00 · 48383a459d
commit 48383a459d
parent 1841e33587
2 changed files with 164 additions and 266 deletions
--- a/flaresolverr_post_json.py
+++ b/flaresolverr_post_json.py
@ -1,266 +0,0 @@
-import json
-from urllib.parse import quote, unquote # unquote is not used in the final solution but often useful
-from html import escape # escape is not used in the final solution but often useful
-
-from selenium import webdriver
-from selenium.webdriver.chrome.options import Options as ChromeOptions
-from selenium.webdriver.support.ui import WebDriverWait
-from selenium.webdriver.support import expected_conditions as EC
-from selenium.webdriver.common.by import By
-import time
-
-# Step 1: Define V1RequestBase class
-class V1RequestBase:
-    """
-    A simple class to mimic the structure of a request object,
-    holding the target URL and JSON data for the POST request.
-    """
-    def __init__(self, url: str, json_data: dict):
-        """
-        Initializes the V1RequestBase object.
-
-        Args:
-            url: The target URL for the POST request.
-            json_data: A Python dictionary containing the data to be sent as JSON.
-        """
-        self.url = url
-        self.json_data = json_data
-
-def solve_post_json(driver: webdriver.Chrome, req: V1RequestBase) -> str:
-    """
-    Simulates an HTTP POST request with Content-Type: application/json
-    by generating an HTML page with JavaScript, loading it into the WebDriver,
-    and using the browser's fetch API.
-
-    Args:
-        driver: An instance of Selenium WebDriver (e.g., Chrome).
-        req: An object with 'url' and 'json_data' attributes.
-             req.url: The target URL for the POST request.
-             req.json_data: A Python dictionary for the JSON payload.
-
-    Returns:
-        A string indicating the outcome of the request, usually the content
-        of the 'status-message' div from the generated HTML page.
-    """
-    try:
-        # Step 2a: JSON Payload Preparation
-        # Convert Python dictionary to JSON string.
-        json_payload_str_raw = json.dumps(req.json_data)
-
-        # Escape single quotes in the JSON string to safely embed it in JavaScript.
-        # JSON strings use double quotes, so we only need to worry about single quotes
-        # if they appear *within* string values in the JSON.
-        # The critical part is escaping for JavaScript single-quoted string literals.
-        json_payload_str_escaped = json_payload_str_raw.replace("'", "\\'")
-
-        # Step 2b: HTML Document Generation
-        # Construct a minimal HTML5 document with embedded JavaScript.
-        html_content = f"""
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>POST Request Executor</title>
-</head>
-<body>
-    <h1>Executing POST Request...</h1>
-    <div id="status-message">Request initiated...</div>
-
-    <script>
-        // JavaScript implementation
-        (async function() {{
-            const url = '{req.url}';
-            // The JSON payload string is directly embedded here.
-            // Note: It's wrapped in single quotes in the JS.
-            const jsonPayloadStr = '{json_payload_str_escaped}';
-            const statusDiv = document.getElementById('status-message');
-
-            console.log('Request URL:', url);
-            console.log('JSON Payload (raw string for JS):', jsonPayloadStr);
-
-            try {{
-                // Parse the JSON string to ensure it's valid before sending
-                // This also correctly unescapes characters for the actual HTTP body
-                const jsonData = JSON.parse(jsonPayloadStr);
-                console.log('JSON Payload (parsed JS object):', jsonData);
-
-                statusDiv.textContent = 'Sending request to ' + url + '...';
-
-                // fetch API Call
-                const response = await fetch(url, {{
-                    method: 'POST',
-                    headers: {{
-                        'Content-Type': 'application/json',
-                        // Add any other headers if necessary, e.g., 'Accept': 'application/json'
-                    }},
-                    body: JSON.stringify(jsonData) // Send the parsed and re-stringified JSON object
-                }});
-
-                statusDiv.textContent = 'Waiting for response... Status: ' + response.status;
-                const responseText = await response.text(); // Get response as text
-
-                if (!response.ok) {{
-                    // If response is not ok, throw an error to be caught by the catch block
-                    throw new Error(`HTTP error! Status: ${{response.status}} - ${{response.statusText}}. Response: ${{responseText.substring(0, 500)}}`);
-                }}
-
-                // Success Display
-                // Displaying a snippet of the response.
-                const responseSnippet = responseText.substring(0, 200);
-                statusDiv.textContent = `Success! Status: ${{response.status}}. Response Snippet: ${{responseSnippet}}`;
-                console.log('Full Response:', responseText);
-
-            }} catch (error) {{
-                // Error Handling
-                console.error('Fetch Error:', error);
-                statusDiv.textContent = 'Error: ' + error.message;
-            }}
-        }})();
-    </script>
-</body>
-</html>
-"""
-        # Step 2c: WebDriver Execution
-        # URL-encode the HTML content for the data URI.
-        # `safe=''` ensures that characters like '/', '?', '&', '=', ':' are also encoded.
-        # However, for data URIs, common characters in HTML are generally fine.
-        # The primary concern is characters that would break the URI structure itself.
-        # Using `quote` without `safe=''` is usually sufficient for `data:text/html`.
-        # Let's be more specific with `safe` if issues arise, but default should be fine.
-        encoded_html_content = quote(html_content)
-        data_uri = f"data:text/html,{encoded_html_content}"
-
-        # Load the data URI into the WebDriver.
-        driver.get(data_uri)
-
-        # Wait for the JavaScript to update the status message.
-        # This indicates that the fetch operation has likely completed (or failed).
-        # We wait for "Success!", "Error:", or "HTTP error!" to appear in the div.
-        # Increased timeout for potentially slow network requests.
-        wait = WebDriverWait(driver, 30) # 30 seconds timeout
-        try:
-            status_element = wait.until(
-                EC.presence_of_element_located((By.ID, "status-message"))
-            )
-
-            # Wait until text contains one of the terminal keywords
-            wait.until(
-                lambda d: "Success!" in status_element.text or \
-                          "Error:" in status_element.text or \
-                          "HTTP error!" in status_element.text or \
-                          "Request initiated..." not in status_element.text # fallback if it never changes from initial
-            )
-
-            # A small explicit sleep to allow final JS updates to the DOM if any race condition.
-            time.sleep(1)
-
-            final_status_text = status_element.text
-        except Exception as e:
-            print(f"Timeout or error waiting for status message: {e}")
-            # Try to get current status message anyway or logs
-            try:
-                final_status_text = driver.find_element(By.ID, "status-message").text
-            except:
-                final_status_text = "Error: Could not retrieve final status message."
-
-        # Retrieve browser console logs for debugging (optional but good for complex cases)
-        try:
-            browser_logs = driver.get_log('browser')
-            if browser_logs:
-                print("Browser Console Logs:")
-                for entry in browser_logs:
-                    print(f"  [{entry['level']}] {entry['message']}")
-        except Exception as e:
-            print(f"Could not retrieve browser logs: {e} (This might be normal if not supported by driver/config)")
-
-        return final_status_text
-
-    except json.JSONDecodeError as e:
-        error_msg = f"JSON Encoding Error: {e}"
-        print(error_msg)
-        return error_msg
-    except Exception as e:
-        # Catch any other exceptions during the process.
-        error_msg = f"An unexpected error occurred in solve_post_json: {e}"
-        print(error_msg)
-        # It might be useful to also get the current page source for debugging
-        # print("Current Page Source on Error:\n", driver.page_source)
-        return error_msg
-
-if __name__ == '__main__':
-    # Placeholder for example usage, will be filled in later
-    print("V1RequestBase class defined.")
-    # Basic test of V1RequestBase
-    # req_obj = V1RequestBase("http://example.com", {"key": "value"}) # Keep for basic class test if needed
-    # print(f"Request URL: {req_obj.url}, Data: {req_obj.json_data}")
-
-    print("\nsolve_post_json function defined. Running demonstration example...")
-
-    # Step 3: Create Demonstration Example
-    # Setup headless Chrome WebDriver
-    chrome_options = ChromeOptions()
-    chrome_options.add_argument("--headless")
-    chrome_options.add_argument("--no-sandbox") # Necessary for running in some environments (e.g. Docker)
-    chrome_options.add_argument("--disable-dev-shm-usage") # Overcome limited resource problems
-    # Enable browser logging
-    chrome_options.set_capability('goog:loggingPrefs', {'browser': 'ALL'})
-
-
-    driver = None  # Initialize driver to None for finally block
-    try:
-        driver = webdriver.Chrome(options=chrome_options)
-
-        # Create a dummy V1RequestBase instance
-        # Using httpbin.org/post which echoes back the POST request data.
-        # This is a great service for testing HTTP requests.
-        request_data = {
-            "name": "FlareSolverr Test",
-            "type": "JSON_POST",
-            "message": "Hello from Selenium!",
-            "details": {
-                "version": 1.0,
-                "nested_key": "value with 'single quote' and \"double quote\""
-            },
-            "items": [1, "two", {"three": 3.0}]
-        }
-        # Test with a URL that might fail to see error handling
-        # req_instance = V1RequestBase(url="https://nonexistent-domain-for-testing123.com/post", json_data=request_data)
-
-        req_instance = V1RequestBase(url="https://httpbin.org/post", json_data=request_data)
-
-        # Call the function
-        print(f"\nSending POST request to: {req_instance.url}")
-        print(f"With JSON data: {req_instance.json_data}")
-
-        result_message = solve_post_json(driver, req_instance)
-
-        # Print the result
-        print("\n--- Result from solve_post_json ---")
-        print(result_message)
-        print("--- End of Result ---")
-
-        # Example of a failing request (e.g. network error)
-        print("\n--- Testing a failing request (network error) ---")
-        failing_req_instance = V1RequestBase(url="https://domain.invalid/post", json_data={"error_test": True})
-        failing_result_message = solve_post_json(driver, failing_req_instance)
-        print("\n--- Result from failing solve_post_json ---")
-        print(failing_result_message)
-        print("--- End of Failing Result ---")
-
-        # Example of a request to a URL that returns a 404
-        print("\n--- Testing a request that results in HTTP 404 ---")
-        notfound_req_instance = V1RequestBase(url="https://httpbin.org/status/404", json_data={"status_test": 404})
-        notfound_result_message = solve_post_json(driver, notfound_req_instance)
-        print("\n--- Result from 404 solve_post_json ---")
-        print(notfound_result_message)
-        print("--- End of 404 Result ---")
-
-
-    except Exception as e:
-        print(f"An error occurred during the demonstration: {e}")
-    finally:
-        if driver:
-            # Quit the WebDriver session.
-            driver.quit()
-        print("\nDemonstration finished and WebDriver closed.")
--- a/src/flaresolverr_service.py
+++ b/src/flaresolverr_service.py
@ -2,6 +2,7 @@ import logging
 import platform
 import sys
 import time
+import json
 from datetime import timedelta
 from html import escape
 from urllib.parse import unquote, quote
@ -15,6 +16,7 @@ from selenium.webdriver.support.expected_conditions import (
    presence_of_element_located, staleness_of, title_is)
 from selenium.webdriver.common.action_chains import ActionChains
 from selenium.webdriver.support.wait import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC

 import utils
 from dtos import (STATUS_ERROR, STATUS_OK, ChallengeResolutionResultT,
@ -136,6 +138,8 @@ def _controller_v1_handler(req: V1RequestBase) -> V1ResponseBase:
        res = _cmd_request_get(req)
    elif req.cmd == 'request.post':
        res = _cmd_request_post(req)
+    elif req.cmd == 'request.postJSON':
+        res = _cmd_request_postJSON(req)
    else:
        raise Exception(f"Request parameter 'cmd' = '{req.cmd}' is invalid.")

@ -178,6 +182,23 @@ def _cmd_request_post(req: V1RequestBase) -> V1ResponseBase:
    return res


+def _cmd_request_postJSON(req: V1RequestBase) -> V1ResponseBase:
+    # do some validations
+    if req.postData is None:
+        raise Exception("Request parameter 'postData' is mandatory in 'request.post' command.")
+    if req.returnRawHtml is not None:
+        logging.warning("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.")
+    if req.download is not None:
+        logging.warning("Request parameter 'download' was removed in FlareSolverr v2.")
+
+    challenge_res = _resolve_challenge(req, 'POSTJSON')
+    res = V1ResponseBase({})
+    res.status = challenge_res.status
+    res.message = challenge_res.message
+    res.solution = challenge_res.result
+    return res
+
+
 def _cmd_sessions_create(req: V1RequestBase) -> V1ResponseBase:
    logging.debug("Creating new session...")

@ -292,6 +313,8 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
    logging.debug(f'Navigating to... {req.url}')
    if method == 'POST':
        _post_request(req, driver)
+    elif method == 'POSTJSON':
+        _post_request_json(req, driver)
    else:
        driver.get(req.url)

@ -425,3 +448,144 @@ def _post_request(req: V1RequestBase, driver: WebDriver):
        </body>
        </html>"""
    driver.get("data:text/html;charset=utf-8,{html_content}".format(html_content=html_content))
+
+def _post_request_json(req: V1RequestBase, driver: WebDriver):
+    try:
+        # Step 2a: JSON Payload Preparation
+        # Convert Python dictionary to JSON string.
+        json_payload_str_raw = json.dumps(req.postData)
+
+        # Step 2b: HTML Document Generation
+        # Construct a minimal HTML5 document with embedded JavaScript.
+        html_content = f"""
+    <!DOCTYPE html>
+    <html lang="en">
+    <head>
+        <meta charset="UTF-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>POST Request Executor</title>
+    </head>
+    <body>
+        <h1>Executing POST Request...</h1>
+        <div id="status-message">Request initiated...</div>
+
+        <script>
+            // JavaScript implementation
+            (async function() {{
+                const url = '{req.url}';
+                // The JSON payload string is directly embedded here.
+                // Note: It's wrapped in single quotes in the JS.
+                const jsonPayloadStr = {json_payload_str_raw};
+                const statusDiv = document.getElementById('status-message');
+
+                console.log('Request URL:', url);
+                console.log('JSON Payload (raw string for JS):', jsonPayloadStr);
+
+                try {{
+                    // Parse the JSON string to ensure it's valid before sending
+                    // This also correctly unescapes characters for the actual HTTP body
+                    const jsonData = JSON.parse(jsonPayloadStr);
+                    console.log('JSON Payload (parsed JS object):', jsonData);
+
+                    statusDiv.textContent = 'Sending request to ' + url + '...';
+
+                    // fetch API Call
+                    const response = await fetch(url, {{
+                        method: 'POST',
+                        headers: {{
+                            'Content-Type': 'application/json',
+                            // Add any other headers if necessary, e.g., 'Accept': 'application/json'
+                        }},
+                        body: JSON.stringify(jsonData) // Send the parsed and re-stringified JSON object
+                    }});
+
+                    statusDiv.textContent = 'Waiting for response... Status: ' + response.status;
+                    const responseText = await response.text(); // Get response as text
+
+                    if (!response.ok) {{
+                        // If response is not ok, throw an error to be caught by the catch block
+                        throw new Error(`HTTP error! Status: ${{response.status}} - ${{response.statusText}}. Response: ${{responseText.substring(0, 500)}}`);
+                    }}
+
+                    // Success Display
+                    // Displaying a snippet of the response.
+                    const responseSnippet = responseText.substring(0, 200);
+                    statusDiv.textContent = `Success! Status: ${{response.status}}. Response Snippet: ${{responseSnippet}}`;
+                    console.log('Full Response:', responseText);
+
+                }} catch (error) {{
+                    // Error Handling
+                    console.error('Fetch Error:', error);
+                    statusDiv.textContent = 'Error: ' + error.message;
+                }}
+            }})();
+        </script>
+    </body>
+    </html>
+    """
+        # Step 2c: WebDriver Execution
+        # URL-encode the HTML content for the data URI.
+        # `safe=''` ensures that characters like '/', '?', '&', '=', ':' are also encoded.
+        # However, for data URIs, common characters in HTML are generally fine.
+        # The primary concern is characters that would break the URI structure itself.
+        # Using `quote` without `safe=''` is usually sufficient for `data:text/html`.
+        # Let's be more specific with `safe` if issues arise, but default should be fine.
+        encoded_html_content = quote(html_content)
+        data_uri = f"data:text/html,{encoded_html_content}"
+
+        # Load the data URI into the WebDriver.
+        driver.get(data_uri)
+
+        # Wait for the JavaScript to update the status message.
+        # This indicates that the fetch operation has likely completed (or failed).
+        # We wait for "Success!", "Error:", or "HTTP error!" to appear in the div.
+        # Increased timeout for potentially slow network requests.
+        wait = WebDriverWait(driver, 30)  # 30 seconds timeout
+        try:
+            status_element = wait.until(
+                EC.presence_of_element_located((By.ID, "status-message"))
+            )
+
+            # Wait until text contains one of the terminal keywords
+            wait.until(
+                lambda d: "Success!" in status_element.text or \
+                          "Error:" in status_element.text or \
+                          "HTTP error!" in status_element.text or \
+                          "Request initiated..." not in status_element.text  # fallback if it never changes from initial
+            )
+
+            # A small explicit sleep to allow final JS updates to the DOM if any race condition.
+            time.sleep(1)
+
+            final_status_text = status_element.text
+        except Exception as e:
+            print(f"Timeout or error waiting for status message: {e}")
+            # Try to get current status message anyway or logs
+            try:
+                final_status_text = driver.find_element(By.ID, "status-message").text
+            except:
+                final_status_text = "Error: Could not retrieve final status message."
+
+        # Retrieve browser console logs for debugging (optional but good for complex cases)
+        try:
+            browser_logs = driver.get_log('browser')
+            if browser_logs:
+                print("Browser Console Logs:")
+                for entry in browser_logs:
+                    print(f"  [{entry['level']}] {entry['message']}")
+        except Exception as e:
+            print(f"Could not retrieve browser logs: {e} (This might be normal if not supported by driver/config)")
+
+        return final_status_text
+
+    except json.JSONDecodeError as e:
+        error_msg = f"JSON Encoding Error: {e}"
+        print(error_msg)
+        return error_msg
+    except Exception as e:
+        # Catch any other exceptions during the process.
+        error_msg = f"An unexpected error occurred in solve_post_json: {e}"
+        print(error_msg)
+        # It might be useful to also get the current page source for debugging
+        # print("Current Page Source on Error:\n", driver.page_source)
+        return error_msg