FlareSolverr/src/utils.py
2023-01-06 18:05:23 +01:00

177 lines
5.9 KiB
Python

import json
import logging
import os
import re
import shutil
from selenium.webdriver.chrome.webdriver import WebDriver
import undetected_chromedriver as uc
FLARESOLVERR_VERSION = None
CHROME_MAJOR_VERSION = None
USER_AGENT = None
XVFB_DISPLAY = None
PATCHED_DRIVER_PATH = None
def get_config_log_html() -> bool:
return os.environ.get('LOG_HTML', 'false').lower() == 'true'
def get_config_headless() -> bool:
return os.environ.get('HEADLESS', 'true').lower() == 'true'
def get_flaresolverr_version() -> str:
global FLARESOLVERR_VERSION
if FLARESOLVERR_VERSION is not None:
return FLARESOLVERR_VERSION
package_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'package.json')
with open(package_path) as f:
FLARESOLVERR_VERSION = json.loads(f.read())['version']
return FLARESOLVERR_VERSION
def get_webdriver() -> WebDriver:
global PATCHED_DRIVER_PATH
logging.debug('Launching web browser...')
# undetected_chromedriver
options = uc.ChromeOptions()
options.add_argument('--no-sandbox')
options.add_argument('--window-size=1920,1080')
# todo: this param shows a warning in chrome head-full
options.add_argument('--disable-setuid-sandbox')
options.add_argument('--disable-dev-shm-usage')
# this option removes the zygote sandbox (it seems that the resolution is a bit faster)
options.add_argument('--no-zygote')
# note: headless mode is detected (options.headless = True)
# we launch the browser in head-full mode with the window hidden
windows_headless = False
if get_config_headless():
if os.name == 'nt':
windows_headless = True
else:
start_xvfb_display()
# if we are inside the Docker container, we avoid downloading the driver
driver_exe_path = None
version_main = None
if os.path.exists("/app/chromedriver"):
# running inside Docker
driver_exe_path = "/app/chromedriver"
else:
version_main = get_chrome_major_version()
if PATCHED_DRIVER_PATH is not None:
driver_exe_path = PATCHED_DRIVER_PATH
# downloads and patches the chromedriver
# if we don't set driver_executable_path it downloads, patches, and deletes the driver each time
driver = uc.Chrome(options=options, driver_executable_path=driver_exe_path, version_main=version_main,
windows_headless=windows_headless)
# save the patched driver to avoid re-downloads
if driver_exe_path is None:
PATCHED_DRIVER_PATH = os.path.join(driver.patcher.data_path, driver.patcher.exe_name)
shutil.copy(driver.patcher.executable_path, PATCHED_DRIVER_PATH)
# selenium vanilla
# options = webdriver.ChromeOptions()
# options.add_argument('--no-sandbox')
# options.add_argument('--window-size=1920,1080')
# options.add_argument('--disable-setuid-sandbox')
# options.add_argument('--disable-dev-shm-usage')
# driver = webdriver.Chrome(options=options)
return driver
def get_chrome_major_version() -> str:
global CHROME_MAJOR_VERSION
if CHROME_MAJOR_VERSION is not None:
return CHROME_MAJOR_VERSION
if os.name == 'nt':
try:
stream = os.popen(
'reg query "HKLM\\SOFTWARE\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\Google Chrome"')
output = stream.read()
# Example: '104.0.5112.79'
complete_version = extract_version_registry(output)
except Exception:
# Example: '104.0.5112.79'
complete_version = extract_version_folder()
else:
chrome_path = uc.find_chrome_executable()
process = os.popen(f'"{chrome_path}" --version')
# Example 1: 'Chromium 104.0.5112.79 Arch Linux\n'
# Example 2: 'Google Chrome 104.0.5112.79 Arch Linux\n'
complete_version = process.read()
process.close()
CHROME_MAJOR_VERSION = complete_version.split('.')[0].split(' ')[-1]
logging.info(f"Chrome major version: {CHROME_MAJOR_VERSION}")
return CHROME_MAJOR_VERSION
def extract_version_registry(output) -> str:
try:
google_version = ''
for letter in output[output.rindex('DisplayVersion REG_SZ') + 24:]:
if letter != '\n':
google_version += letter
else:
break
return google_version.strip()
except TypeError:
return ''
def extract_version_folder() -> str:
# Check if the Chrome folder exists in the x32 or x64 Program Files folders.
for i in range(2):
path = 'C:\\Program Files' + (' (x86)' if i else '') + '\\Google\\Chrome\\Application'
if os.path.isdir(path):
paths = [f.path for f in os.scandir(path) if f.is_dir()]
for path in paths:
filename = os.path.basename(path)
pattern = '\d+\.\d+\.\d+\.\d+'
match = re.search(pattern, filename)
if match and match.group():
# Found a Chrome version.
return match.group(0)
return ''
def get_user_agent(driver=None) -> str:
global USER_AGENT
if USER_AGENT is not None:
return USER_AGENT
try:
if driver is None:
driver = get_webdriver()
USER_AGENT = driver.execute_script("return navigator.userAgent")
return USER_AGENT
except Exception as e:
raise Exception("Error getting browser User-Agent. " + str(e))
finally:
if driver is not None:
driver.quit()
def start_xvfb_display():
global XVFB_DISPLAY
if XVFB_DISPLAY is None:
from xvfbwrapper import Xvfb
XVFB_DISPLAY = Xvfb()
XVFB_DISPLAY.start()
def object_to_dict(_object):
json_dict = json.loads(json.dumps(_object, default=lambda o: o.__dict__))
# remove hidden fields
return {k: v for k, v in json_dict.items() if not k.startswith('__')}