Fix output path.

This commit is contained in:
Lovi 2024-11-03 18:32:21 +01:00
parent 6bfb38a2e6
commit 1b0d14fcdb
9 changed files with 120 additions and 448 deletions

View File

@ -1,165 +0,0 @@
# 05.07.24
import re
import logging
# External libraries
import httpx
import jsbeautifier
from bs4 import BeautifulSoup
# Internal utilities
from Src.Util.headers import get_headers
from Src.Util._jsonConfig import config_manager
# Variable
max_timeout = config_manager.get_int("REQUESTS", "timeout")
class VideoSource:
def __init__(self, url: str):
"""
Sets up the video source with the provided URL.
Parameters:
- url (str): The URL of the video.
"""
self.url = url
self.redirect_url = None
self.maxstream_url = None
self.m3u8_url = None
self.headers = {'user-agent': get_headers()}
def get_redirect_url(self):
"""
Sends a request to the initial URL and extracts the redirect URL.
"""
try:
# Send a GET request to the initial URL
response = httpx.get(
url=self.url,
headers=self.headers,
follow_redirects=True,
timeout=max_timeout
)
response.raise_for_status()
# Extract the redirect URL from the HTML
soup = BeautifulSoup(response.text, "html.parser")
self.redirect_url = soup.find("div", id="iframen1").get("data-src")
logging.info(f"Redirect URL: {self.redirect_url}")
return self.redirect_url
except httpx.RequestError as e:
logging.error(f"Error during the initial request: {e}")
raise
except AttributeError as e:
logging.error(f"Error parsing HTML: {e}")
raise
def get_maxstream_url(self):
"""
Sends a request to the redirect URL and extracts the Maxstream URL.
"""
try:
# Send a GET request to the redirect URL
response = httpx.get(
url=self.redirect_url,
headers=self.headers,
follow_redirects=True,
timeout=max_timeout
)
response.raise_for_status()
# Extract the Maxstream URL from the HTML
soup = BeautifulSoup(response.text, "html.parser")
maxstream_url = soup.find("a")
if maxstream_url is None:
# If no anchor tag is found, try the alternative method
logging.warning("Anchor tag not found. Trying the alternative method.")
headers = {
'origin': 'https://stayonline.pro',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 OPR/111.0.0.0',
'x-requested-with': 'XMLHttpRequest',
}
# Make request to stayonline api
data = {'id': self.redirect_url.split("/")[-2], 'ref': ''}
response = httpx.post('https://stayonline.pro/ajax/linkEmbedView.php', headers=headers, data=data, timeout=max_timeout)
response.raise_for_status()
uprot_url = response.json()['data']['value']
# Retry getting maxtstream url
response = httpx.get(uprot_url, headers=self.headers, follow_redirects=True, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
maxstream_url = soup.find("a").get("href")
else:
maxstream_url = maxstream_url.get("href")
self.maxstream_url = maxstream_url
logging.info(f"Maxstream URL: {self.maxstream_url}")
return self.maxstream_url
except httpx.RequestError as e:
logging.error(f"Error during the request to the redirect URL: {e}")
raise
except AttributeError as e:
logging.error(f"Error parsing HTML: {e}")
raise
def get_m3u8_url(self):
"""
Sends a request to the Maxstream URL and extracts the .m3u8 file URL.
"""
try:
# Send a GET request to the Maxstream URL
response = httpx.get(
url=self.maxstream_url,
headers=self.headers,
follow_redirects=True,
timeout=max_timeout
)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
# Iterate over all script tags in the HTML
for script in soup.find_all("script"):
if "eval(function(p,a,c,k,e,d)" in script.text:
# Execute the script using
data_js = jsbeautifier.beautify(script.text)
# Extract the .m3u8 URL from the script's output
match = re.search(r'sources:\s*\[\{\s*src:\s*"([^"]+)"', data_js)
if match:
self.m3u8_url = match.group(1)
logging.info(f"M3U8 URL: {self.m3u8_url}")
break
return self.m3u8_url
except Exception as e:
logging.error(f"Error executing the Node.js script: {e}")
raise
def get_playlist(self):
"""
Executes the entire flow to obtain the final .m3u8 file URL.
"""
self.get_redirect_url()
self.get_maxstream_url()
return self.get_m3u8_url()

View File

@ -1,40 +0,0 @@
# 09.06.24
# Internal utilities
from Src.Util.console import console, msg
# Logic class
from .site import title_search, run_get_select_title
from .film import download_film
# Variable
indice = 9
_use_for = "film"
_deprecate = True
def search():
"""
Main function of the application for film and series.
"""
# Make request to site to get content that corrsisponde to that string
string_to_search = msg.ask("\n[purple]Insert word to search in all site").strip()
len_database = title_search(string_to_search)
if len_database > 0:
# Select title from list
select_title = run_get_select_title()
# !!! ADD TYPE DONT WORK FOR SERIE
download_film(select_title)
else:
console.print(f"\n[red]Nothing matching was found for[white]: [purple]{string_to_search}")
# Retry
search()

View File

@ -1,15 +0,0 @@
# 03.07.24
import os
# Internal utilities
from Src.Util._jsonConfig import config_manager
SITE_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
ROOT_PATH = config_manager.get('DEFAULT', 'root_path')
DOMAIN_NOW = config_manager.get_dict('SITE', SITE_NAME)['domain']
MOVIE_FOLDER = "Movie"
SERIES_FOLDER = "Serie"

View File

@ -1,60 +0,0 @@
# 03.07.24
import os
import time
# Internal utilities
from Src.Util.console import console, msg
from Src.Util.os import remove_special_characters
from Src.Util.message import start_message
from Src.Util.call_stack import get_call_stack
from Src.Lib.Downloader import HLS_Downloader
from ..Template import execute_search
# Logic class
from ..Template.Class.SearchType import MediaItem
from .Player.maxstream import VideoSource
# Config
from .costant import ROOT_PATH, SITE_NAME, MOVIE_FOLDER
def download_film(select_title: MediaItem):
"""
Downloads a film using the provided obj.
Parameters:
- select_title (MediaItem): The media item to be downloaded. This should be an instance of the MediaItem class, containing attributes like `name` and `url`.
"""
# Start message and display film information
start_message()
console.print(f"[yellow]Download: [red]{select_title.name} \n")
# Setup api manger
video_source = VideoSource(select_title.url)
# Define output path
mp4_name = remove_special_characters(select_title.name) +".mp4"
mp4_path = os.path.join(ROOT_PATH, SITE_NAME, MOVIE_FOLDER, remove_special_characters(select_title.name))
# Get m3u8 master playlist
master_playlist = video_source.get_playlist()
# Download the film using the m3u8 playlist, and output filename
r_proc = HLS_Downloader(m3u8_playlist = master_playlist, output_filename = os.path.join(mp4_path, mp4_name)).start()
if r_proc == 404:
time.sleep(2)
# Re call search function
if msg.ask("[green]Do you want to continue [white]([red]y[white])[green] or return at home[white]([red]n[white]) ", choices=['y', 'n'], default='y', show_choices=True) == "n":
frames = get_call_stack()
execute_search(frames[-4])
if r_proc != None:
console.print("[green]Result: ")
console.print(r_proc)

View File

@ -1,80 +0,0 @@
# 03.07.24
# External libraries
import httpx
from bs4 import BeautifulSoup
from unidecode import unidecode
# Internal utilities
from Src.Util.console import console
from Src.Util._jsonConfig import config_manager
from Src.Util.headers import get_headers
from Src.Util.table import TVShowManager
from ..Template import search_domain, get_select_title
# Logic class
from ..Template.Class.SearchType import MediaManager
# Variable
from .costant import SITE_NAME
media_search_manager = MediaManager()
table_show_manager = TVShowManager()
def title_search(word_to_search: str) -> int:
"""
Search for titles based on a search query.
Parameters:
- title_search (str): The title to search for.
Returns:
- int: The number of titles found.
"""
# Find new domain if prev dont work
max_timeout = config_manager.get_int("REQUESTS", "timeout")
domain_to_use, _ = search_domain(SITE_NAME, f"https://{SITE_NAME}")
# Send request to search for titles
try:
response = httpx.get(
url=f"https://{SITE_NAME}.{domain_to_use}/?s={unidecode(word_to_search)}",
headers={'user-agent': get_headers()},
follow_redirects=True,
timeout=max_timeout
)
response.raise_for_status()
except Exception as e:
console.print(f"Site: {SITE_NAME}, request search error: {e}")
# Create soup and find table
soup = BeautifulSoup(response.text, "html.parser")
for div_title in soup.find_all("div", class_ = "card"):
url = div_title.find("h3").find("a").get("href")
title = div_title.find("h3").find("a").get_text(strip=True)
desc = div_title.find("p").find("strong").text
title_info = {
'name': title,
'desc': desc,
'url': url
}
media_search_manager.add_media(title_info)
# Return the number of titles found
return media_search_manager.get_length()
def run_get_select_title():
"""
Display a selection of titles and prompt the user to choose one.
"""
return get_select_title(table_show_manager, media_search_manager)

View File

@ -77,6 +77,7 @@ class M3U8_Segments:
# Sync
self.queue = PriorityQueue()
self.stop_event = threading.Event()
self.downloaded_segments = set()
def __get_key__(self, m3u8_parser: M3U8_Parser) -> bytes:
"""
@ -176,13 +177,16 @@ class M3U8_Segments:
if self.is_index_url:
# Send a GET request to retrieve the index M3U8 file
response = httpx.get(self.url, headers=headers_index)
response = httpx.get(
self.url,
headers=headers_index,
timeout=max_timeout
)
response.raise_for_status()
# Save the M3U8 file to the temporary folder
if response.status_code == 200:
path_m3u8_file = os.path.join(self.tmp_folder, "playlist.m3u8")
open(path_m3u8_file, "w+").write(response.text)
path_m3u8_file = os.path.join(self.tmp_folder, "playlist.m3u8")
open(path_m3u8_file, "w+").write(response.text)
# Parse the text from the M3U8 index file
self.parse_data(response.text)
@ -204,115 +208,122 @@ class M3U8_Segments:
- backoff_factor (float): The backoff factor for exponential backoff (default is 1.5 seconds).
"""
need_verify = REQUEST_VERIFY
min_segment_size = 100 # Minimum acceptable size for a TS segment in bytes
for attempt in range(retries):
try:
start_time = time.time()
# Make request to get content
if THERE_IS_PROXY_LIST:
# Get proxy from list
proxy = self.valid_proxy[index % len(self.valid_proxy)]
logging.info(f"Use proxy: {proxy}")
with httpx.Client(proxies=proxy, verify=need_verify) as client:
if 'key_base_url' in self.__dict__:
response = client.get(
url=ts_url,
headers=random_headers(self.key_base_url),
timeout=max_timeout,
follow_redirects=True
)
response = client.get(ts_url, headers=random_headers(self.key_base_url), timeout=max_timeout, follow_redirects=True)
else:
response = client.get(
url=ts_url,
headers={'user-agent': get_headers()},
timeout=max_timeout,
follow_redirects=True
)
response = client.get(ts_url, headers={'user-agent': get_headers()}, timeout=max_timeout, follow_redirects=True)
else:
with httpx.Client(verify=need_verify) as client_2:
if 'key_base_url' in self.__dict__:
response = client_2.get(
url=ts_url,
headers=random_headers(self.key_base_url),
timeout=max_timeout,
follow_redirects=True
)
response = client_2.get(ts_url, headers=random_headers(self.key_base_url), timeout=max_timeout, follow_redirects=True)
else:
response = client_2.get(
url=ts_url,
headers={'user-agent': get_headers()},
timeout=max_timeout,
follow_redirects=True
)
response = client_2.get(ts_url, headers={'user-agent': get_headers()}, timeout=max_timeout, follow_redirects=True)
# Get response content
response.raise_for_status() # Raise exception for HTTP errors
duration = time.time() - start_time
# Validate response and content
response.raise_for_status()
segment_content = response.content
content_size = len(segment_content)
# Update bar
response_size = int(response.headers.get('Content-Length', 0)) or len(segment_content)
# Check if segment is too small (possibly corrupted or empty)
if content_size < min_segment_size:
raise httpx.RequestError(f"Segment {index} too small ({content_size} bytes)")
# Update progress bar with custom Class
self.class_ts_estimator.update_progress_bar(response_size, duration, progress_bar)
duration = time.time() - start_time
# Decrypt the segment content if decryption is needed
# Decrypt if needed and verify decrypted content
if self.decryption is not None:
segment_content = self.decryption.decrypt(segment_content)
try:
segment_content = self.decryption.decrypt(segment_content)
if len(segment_content) < min_segment_size:
raise Exception(f"Decrypted segment {index} too small ({len(segment_content)} bytes)")
except Exception as e:
logging.error(f"Decryption failed for segment {index}: {str(e)}")
raise
# Add the segment to the queue
# Update progress and queue
self.class_ts_estimator.update_progress_bar(content_size, duration, progress_bar)
self.queue.put((index, segment_content))
self.downloaded_segments.add(index) # Track successfully downloaded segments
progress_bar.update(1)
# Break out of the loop on success
return
except (httpx.RequestError, httpx.HTTPStatusError) as e:
console.print(f"\nAttempt {attempt + 1} failed for '{ts_url}' with error: {e}")
except Exception as e:
#logging.error(f"Attempt {attempt + 1} failed for segment {index} - '{ts_url}': {e}")
if attempt + 1 == retries:
console.print(f"\nFailed after {retries} attempts. Skipping '{ts_url}'")
#logging.error(f"Final retry failed for segment {index}")
self.queue.put((index, None)) # Marker for failed segment
progress_bar.update(1)
break
# Exponential backoff before retrying
sleep_time = backoff_factor * (2 ** attempt)
console.print(f"Retrying in {sleep_time} seconds...")
logging.info(f"Retrying segment {index} in {sleep_time} seconds...")
time.sleep(sleep_time)
def write_segments_to_file(self):
"""
Writes downloaded segments to a file in the correct order.
Writes segments to file with additional verification.
"""
with open(self.tmp_file_path, 'wb') as f:
expected_index = 0
buffer = {}
total_written = 0
segments_written = set()
while not self.stop_event.is_set() or not self.queue.empty():
try:
index, segment_content = self.queue.get(timeout=1)
# Handle failed segments
if segment_content is None:
if index == expected_index:
expected_index += 1
continue
# Write segment if it's the next expected one
if index == expected_index:
f.write(segment_content)
total_written += len(segment_content)
segments_written.add(index)
f.flush()
expected_index += 1
# Write any buffered segments in order
# Write any buffered segments that are now in order
while expected_index in buffer:
f.write(buffer.pop(expected_index))
f.flush()
next_segment = buffer.pop(expected_index)
if next_segment is not None:
f.write(next_segment)
total_written += len(next_segment)
segments_written.add(expected_index)
f.flush()
expected_index += 1
else:
buffer[index] = segment_content
except queue.Empty:
if self.stop_event.is_set():
break
continue
except Exception as e:
logging.error(f"Error writing segment {index}: {str(e)}")
continue
# Final verification
if total_written == 0:
raise Exception("No data written to file")
def download_streams(self, add_desc):
"""
Downloads all TS segments in parallel and writes them to a file.
@ -375,37 +386,63 @@ class M3U8_Segments:
mininterval=0.05
)
# Start a separate thread to write segments to the file
# Start writer thread
writer_thread = threading.Thread(target=self.write_segments_to_file)
writer_thread.daemon = True
writer_thread.start()
# Ff proxy avaiable set max_workers to number of proxy
# else set max_workers to TQDM_MAX_WORKER
# Configure workers and delay
max_workers = len(self.valid_proxy) if THERE_IS_PROXY_LIST else TQDM_MAX_WORKER
delay = max(PROXY_START_MIN, min(PROXY_START_MAX, 1 / (len(self.valid_proxy) + 1))) if THERE_IS_PROXY_LIST else TQDM_DELAY_WORKER
# if proxy avaiable set timeout to variable time
# else set timeout to TDQM_DELAY_WORKER
if THERE_IS_PROXY_LIST:
num_proxies = len(self.valid_proxy)
self.working_proxy_list = self.valid_proxy
if num_proxies > 0:
# calculate delay based on number of proxies
# dalay should be between 0.5 and 1
delay = max(PROXY_START_MIN, min(PROXY_START_MAX, 1 / (num_proxies + 1)))
else:
delay = TQDM_DELAY_WORKER
else:
delay = TQDM_DELAY_WORKER
# Start all workers
# Download segments with completion verification
with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = []
for index, segment_url in enumerate(self.segments):
time.sleep(delay)
executor.submit(self.make_requests_stream, segment_url, index, progress_bar)
futures.append(executor.submit(self.make_requests_stream, segment_url, index, progress_bar))
# Wait for all tasks to complete
executor.shutdown(wait=True)
# Wait for all futures to complete
for future in futures:
try:
future.result()
except Exception as e:
logging.error(f"Error in download thread: {str(e)}")
# Verify completion and retry missing segments
total_segments = len(self.segments)
completed_segments = len(self.downloaded_segments)
if completed_segments < total_segments:
missing_segments = set(range(total_segments)) - self.downloaded_segments
logging.warning(f"Missing segments: {sorted(missing_segments)}")
# Retry missing segments
for index in missing_segments:
if self.stop_event.is_set():
break
try:
self.make_requests_stream(self.segments[index], index, progress_bar)
except Exception as e:
logging.error(f"Failed to retry segment {index}: {str(e)}")
# Clean up
self.stop_event.set()
writer_thread.join()
writer_thread.join(timeout=30)
progress_bar.close()
# Final verification
final_completion = (len(self.downloaded_segments) / total_segments) * 100
if final_completion < 99.9: # Less than 99.9% complete
missing = set(range(total_segments)) - self.downloaded_segments
raise Exception(f"Download incomplete ({final_completion:.1f}%). Missing segments: {sorted(missing)}")
# Verify output file
if not os.path.exists(self.tmp_file_path):
raise Exception("Output file missing")
file_size = os.path.getsize(self.tmp_file_path)
if file_size == 0:
raise Exception("Output file is empty")
logging.info(f"Download completed. File size: {file_size} bytes")

View File

@ -61,5 +61,4 @@ def update():
console.print(f"[red]{__title__} has been downloaded [yellow]{total_download_count} [red]times, but only [yellow]{percentual_stars}% [red]of users have starred it.\n\
[cyan]Help the repository grow today by leaving a [yellow]star [cyan]and [yellow]sharing [cyan]it with others online!")
console.print("\n")
time.sleep(4)
time.sleep(3)

View File

@ -7,7 +7,7 @@
"clean_console": true,
"root_path": "Video",
"map_episode_name": "%(tv_name)_S%(season)E%(episode)_%(episode_name)",
"special_chars_to_remove": "!@#$%^&*()[]{}<>|`~'\";:,.?=+\u00e2\u20ac\u00a6",
"special_chars_to_remove": "!@#$%^&*()[]{}<>|`~'\";:,?=+\u00e2\u20ac\u00a6",
"config_qbit_tor": {
"host": "192.168.1.58",
"port": "8080",
@ -18,7 +18,7 @@
"show_trending": false
},
"REQUESTS": {
"timeout": 15,
"timeout": 20,
"max_retry": 3,
"verify_ssl": true,
"user-agent": "",
@ -59,8 +59,8 @@
},
"SITE": {
"streamingcommunity": {
"video_workers": 6,
"audio_workers": 6,
"video_workers": 7,
"audio_workers": 7,
"domain": "computer"
},
"altadefinizione": {

4
run.py
View File

@ -136,10 +136,6 @@ def initialize():
def main():
# Create folder root path if not exist
folder_root_path = config_manager.get("DEFAULT", "root_path")
create_folder(folder_name=folder_root_path)
# Load search functions
search_functions = load_search_functions()