Lovi d447cf53b7
Test httpx (#149)
* Migrate to httpx

* Revert "Migrate to httpx"

This reverts commit fdd2823865824eca5e8cb8806dbddba4bcb37280.

* Migrate httpx

* minor fixes (#146)

* Update headers

* Update config

* 1v. Add retry

* v1 Finish Guardaserie

* Need to fix client.

* Remove retry

* v2 Add comment guardaserie.

* Add domain ...

* Finish add ddl ...

* Fix use of proxy.

* Fix cookie error.

* Update cookie.

* Dynamic import.

---------

Co-authored-by: Francesco Grazioso <40018163+FrancescoGrazioso@users.noreply.github.com>
2024-06-14 17:07:51 +02:00

123 lines
3.4 KiB
Python

# 26.05.24
import sys
import logging
# External libraries
import httpx
from bs4 import BeautifulSoup
# Internal utilities
from Src.Util.headers import get_headers
from Src.Util.os import run_node_script
class VideoSource:
def __init__(self) -> None:
"""
Initializes the VideoSource object with default values.
Attributes:
headers (dict): An empty dictionary to store HTTP headers.
"""
self.headers = {'user-agent': get_headers()}
def setup(self, url: str) -> None:
"""
Sets up the video source with the provided URL.
Args:
url (str): The URL of the video source.
"""
self.url = url
def make_request(self, url: str) -> str:
"""
Make an HTTP GET request to the provided URL.
Args:
url (str): The URL to make the request to.
Returns:
str: The response content if successful, None otherwise.
"""
try:
response = httpx.get(url, headers=self.headers, follow_redirects=True)
response.raise_for_status()
return response.text
except Exception as e:
logging.error(f"Request failed: {e}")
return None
def parse_html(self, html_content: str) -> BeautifulSoup:
"""
Parse the provided HTML content using BeautifulSoup.
Args:
html_content (str): The HTML content to parse.
Returns:
BeautifulSoup: Parsed HTML content if successful, None otherwise.
"""
try:
soup = BeautifulSoup(html_content, "html.parser")
return soup
except Exception as e:
logging.error(f"Failed to parse HTML content: {e}")
return None
def get_result_node_js(self, soup):
"""
Prepares and runs a Node.js script from the provided BeautifulSoup object to retrieve the video URL.
Args:
soup (BeautifulSoup): A BeautifulSoup object representing the parsed HTML content.
Returns:
str: The output from the Node.js script, or None if the script cannot be found or executed.
"""
for script in soup.find_all("script"):
if "eval" in str(script):
new_script = str(script.text).replace("eval", "var a = ")
new_script = new_script.replace(")))", ")));console.log(a);")
return run_node_script(new_script)
return None
def get_playlist(self) -> str:
"""
Download a video from the provided URL.
Returns:
str: The URL of the downloaded video if successful, None otherwise.
"""
try:
html_content = self.make_request(self.url)
if not html_content:
logging.error("Failed to fetch HTML content.")
return None
soup = self.parse_html(html_content)
if not soup:
logging.error("Failed to parse HTML content.")
return None
result = self.get_result_node_js(soup)
if not result:
logging.error("No video URL found in script.")
return None
master_playlist = str(result).split(":")[3].split('"}')[0]
return f"https:{master_playlist}"
except Exception as e:
logging.error(f"An error occurred: {e}")
return None