mirror of
https://github.com/FlareSolverr/FlareSolverr.git
synced 2025-06-08 20:45:27 +00:00
download: support download and return images
Find all elements of tag '<img>' and retrun binary data as base64 encoded string Signed-off-by: acg5159 <acg5159@gmail.com>
This commit is contained in:
parent
ce5369dd41
commit
622ea9a1af
@ -2,9 +2,12 @@ import logging
|
|||||||
import platform
|
import platform
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
import requests
|
||||||
|
import os
|
||||||
|
import base64
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from html import escape
|
from html import escape
|
||||||
from urllib.parse import unquote, quote
|
from urllib.parse import unquote, quote, urljoin, urlsplit
|
||||||
|
|
||||||
from func_timeout import FunctionTimedOut, func_timeout
|
from func_timeout import FunctionTimedOut, func_timeout
|
||||||
from selenium.common import TimeoutException
|
from selenium.common import TimeoutException
|
||||||
@ -149,8 +152,6 @@ def _cmd_request_get(req: V1RequestBase) -> V1ResponseBase:
|
|||||||
raise Exception("Cannot use 'postBody' when sending a GET request.")
|
raise Exception("Cannot use 'postBody' when sending a GET request.")
|
||||||
if req.returnRawHtml is not None:
|
if req.returnRawHtml is not None:
|
||||||
logging.warning("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.")
|
logging.warning("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.")
|
||||||
if req.download is not None:
|
|
||||||
logging.warning("Request parameter 'download' was removed in FlareSolverr v2.")
|
|
||||||
|
|
||||||
challenge_res = _resolve_challenge(req, 'GET')
|
challenge_res = _resolve_challenge(req, 'GET')
|
||||||
res = V1ResponseBase({})
|
res = V1ResponseBase({})
|
||||||
@ -418,6 +419,44 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
|
|||||||
challenge_res.headers = {} # todo: fix, selenium not provides this info
|
challenge_res.headers = {} # todo: fix, selenium not provides this info
|
||||||
challenge_res.response = driver.page_source
|
challenge_res.response = driver.page_source
|
||||||
|
|
||||||
|
if req.download:
|
||||||
|
# todo: support XPATH
|
||||||
|
img_elements = driver.find_elements(By.TAG_NAME, "img")
|
||||||
|
|
||||||
|
resp = []
|
||||||
|
for img in img_elements:
|
||||||
|
try:
|
||||||
|
# todo: support other attribute
|
||||||
|
src = img.get_attribute("src")
|
||||||
|
if src:
|
||||||
|
img_url = urljoin(driver.current_url, src)
|
||||||
|
response = requests.get(img_url, timeout=10)
|
||||||
|
if response.status_code == 200:
|
||||||
|
logging.info(f"Found {img_url}")
|
||||||
|
|
||||||
|
filename = os.path.basename(urlsplit(img_url).path)
|
||||||
|
mime_type = response.headers.get("Content-Type", "image/jpeg")
|
||||||
|
b64_data = base64.b64encode(response.content).decode('utf-8')
|
||||||
|
|
||||||
|
r = {
|
||||||
|
"url": f"{img_url}",
|
||||||
|
"filename": f"{filename}",
|
||||||
|
"mime_type": f"{mime_type}",
|
||||||
|
"encoded_data": f"{b64_data}"
|
||||||
|
}
|
||||||
|
resp.append(r)
|
||||||
|
else:
|
||||||
|
logging.error(f"Invalid Repsonse Code ({response.status_code}): {img_url}")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error: {e}")
|
||||||
|
if not resp:
|
||||||
|
logging.error("No Images Found!")
|
||||||
|
res.message = "No Images Found!"
|
||||||
|
else:
|
||||||
|
logging.info(f"Found {len(resp)} Images!")
|
||||||
|
res.message = f"Found {len(resp)} Images!"
|
||||||
|
challenge_res.download = resp
|
||||||
|
|
||||||
res.result = challenge_res
|
res.result = challenge_res
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user