From 1b0d14fcdbc2bd00736270159cadca94e34e5ebf Mon Sep 17 00:00:00 2001 From: Lovi <62809003+Lovi-0@users.noreply.github.com> Date: Sun, 3 Nov 2024 18:32:21 +0100 Subject: [PATCH] Fix output path. --- Src/Api/cb01/Player/maxstream.py | 165 ------------------------ Src/Api/cb01/__init__.py | 40 ------ Src/Api/cb01/costant.py | 15 --- Src/Api/cb01/film.py | 60 --------- Src/Api/cb01/site.py | 80 ------------ Src/Lib/Downloader/HLS/segments.py | 193 +++++++++++++++++------------ Src/Upload/update.py | 3 +- config.json | 8 +- run.py | 4 - 9 files changed, 120 insertions(+), 448 deletions(-) delete mode 100644 Src/Api/cb01/Player/maxstream.py delete mode 100644 Src/Api/cb01/__init__.py delete mode 100644 Src/Api/cb01/costant.py delete mode 100644 Src/Api/cb01/film.py delete mode 100644 Src/Api/cb01/site.py diff --git a/Src/Api/cb01/Player/maxstream.py b/Src/Api/cb01/Player/maxstream.py deleted file mode 100644 index 3b58fb9..0000000 --- a/Src/Api/cb01/Player/maxstream.py +++ /dev/null @@ -1,165 +0,0 @@ -# 05.07.24 - -import re -import logging - - -# External libraries -import httpx -import jsbeautifier -from bs4 import BeautifulSoup - - -# Internal utilities -from Src.Util.headers import get_headers -from Src.Util._jsonConfig import config_manager - - -# Variable -max_timeout = config_manager.get_int("REQUESTS", "timeout") - -class VideoSource: - def __init__(self, url: str): - """ - Sets up the video source with the provided URL. - - Parameters: - - url (str): The URL of the video. - """ - self.url = url - self.redirect_url = None - self.maxstream_url = None - self.m3u8_url = None - self.headers = {'user-agent': get_headers()} - - def get_redirect_url(self): - """ - Sends a request to the initial URL and extracts the redirect URL. - """ - try: - - # Send a GET request to the initial URL - response = httpx.get( - url=self.url, - headers=self.headers, - follow_redirects=True, - timeout=max_timeout - ) - response.raise_for_status() - - # Extract the redirect URL from the HTML - soup = BeautifulSoup(response.text, "html.parser") - self.redirect_url = soup.find("div", id="iframen1").get("data-src") - logging.info(f"Redirect URL: {self.redirect_url}") - - return self.redirect_url - - except httpx.RequestError as e: - logging.error(f"Error during the initial request: {e}") - raise - - except AttributeError as e: - logging.error(f"Error parsing HTML: {e}") - raise - - def get_maxstream_url(self): - """ - Sends a request to the redirect URL and extracts the Maxstream URL. - """ - try: - - # Send a GET request to the redirect URL - response = httpx.get( - url=self.redirect_url, - headers=self.headers, - follow_redirects=True, - timeout=max_timeout - ) - response.raise_for_status() - - # Extract the Maxstream URL from the HTML - soup = BeautifulSoup(response.text, "html.parser") - maxstream_url = soup.find("a") - - if maxstream_url is None: - - # If no anchor tag is found, try the alternative method - logging.warning("Anchor tag not found. Trying the alternative method.") - headers = { - 'origin': 'https://stayonline.pro', - 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 OPR/111.0.0.0', - 'x-requested-with': 'XMLHttpRequest', - } - - # Make request to stayonline api - data = {'id': self.redirect_url.split("/")[-2], 'ref': ''} - response = httpx.post('https://stayonline.pro/ajax/linkEmbedView.php', headers=headers, data=data, timeout=max_timeout) - response.raise_for_status() - uprot_url = response.json()['data']['value'] - - # Retry getting maxtstream url - response = httpx.get(uprot_url, headers=self.headers, follow_redirects=True, timeout=10) - response.raise_for_status() - soup = BeautifulSoup(response.text, "html.parser") - maxstream_url = soup.find("a").get("href") - - else: - maxstream_url = maxstream_url.get("href") - - self.maxstream_url = maxstream_url - logging.info(f"Maxstream URL: {self.maxstream_url}") - - return self.maxstream_url - - except httpx.RequestError as e: - logging.error(f"Error during the request to the redirect URL: {e}") - raise - - except AttributeError as e: - logging.error(f"Error parsing HTML: {e}") - raise - - def get_m3u8_url(self): - """ - Sends a request to the Maxstream URL and extracts the .m3u8 file URL. - """ - try: - - # Send a GET request to the Maxstream URL - response = httpx.get( - url=self.maxstream_url, - headers=self.headers, - follow_redirects=True, - timeout=max_timeout - ) - response.raise_for_status() - soup = BeautifulSoup(response.text, "html.parser") - - # Iterate over all script tags in the HTML - for script in soup.find_all("script"): - if "eval(function(p,a,c,k,e,d)" in script.text: - - # Execute the script using - data_js = jsbeautifier.beautify(script.text) - - # Extract the .m3u8 URL from the script's output - match = re.search(r'sources:\s*\[\{\s*src:\s*"([^"]+)"', data_js) - - if match: - self.m3u8_url = match.group(1) - logging.info(f"M3U8 URL: {self.m3u8_url}") - break - - return self.m3u8_url - - except Exception as e: - logging.error(f"Error executing the Node.js script: {e}") - raise - - def get_playlist(self): - """ - Executes the entire flow to obtain the final .m3u8 file URL. - """ - self.get_redirect_url() - self.get_maxstream_url() - return self.get_m3u8_url() diff --git a/Src/Api/cb01/__init__.py b/Src/Api/cb01/__init__.py deleted file mode 100644 index 64c20b1..0000000 --- a/Src/Api/cb01/__init__.py +++ /dev/null @@ -1,40 +0,0 @@ -# 09.06.24 - -# Internal utilities -from Src.Util.console import console, msg - - -# Logic class -from .site import title_search, run_get_select_title -from .film import download_film - - -# Variable -indice = 9 -_use_for = "film" -_deprecate = True - - -def search(): - """ - Main function of the application for film and series. - """ - - # Make request to site to get content that corrsisponde to that string - string_to_search = msg.ask("\n[purple]Insert word to search in all site").strip() - len_database = title_search(string_to_search) - - if len_database > 0: - - # Select title from list - select_title = run_get_select_title() - - # !!! ADD TYPE DONT WORK FOR SERIE - download_film(select_title) - - - else: - console.print(f"\n[red]Nothing matching was found for[white]: [purple]{string_to_search}") - - # Retry - search() diff --git a/Src/Api/cb01/costant.py b/Src/Api/cb01/costant.py deleted file mode 100644 index ba6ce6e..0000000 --- a/Src/Api/cb01/costant.py +++ /dev/null @@ -1,15 +0,0 @@ -# 03.07.24 - -import os - - -# Internal utilities -from Src.Util._jsonConfig import config_manager - - -SITE_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__))) -ROOT_PATH = config_manager.get('DEFAULT', 'root_path') -DOMAIN_NOW = config_manager.get_dict('SITE', SITE_NAME)['domain'] - -MOVIE_FOLDER = "Movie" -SERIES_FOLDER = "Serie" diff --git a/Src/Api/cb01/film.py b/Src/Api/cb01/film.py deleted file mode 100644 index b0ee589..0000000 --- a/Src/Api/cb01/film.py +++ /dev/null @@ -1,60 +0,0 @@ -# 03.07.24 - -import os -import time - - -# Internal utilities -from Src.Util.console import console, msg -from Src.Util.os import remove_special_characters -from Src.Util.message import start_message -from Src.Util.call_stack import get_call_stack -from Src.Lib.Downloader import HLS_Downloader -from ..Template import execute_search - - -# Logic class -from ..Template.Class.SearchType import MediaItem -from .Player.maxstream import VideoSource - - -# Config -from .costant import ROOT_PATH, SITE_NAME, MOVIE_FOLDER - - -def download_film(select_title: MediaItem): - """ - Downloads a film using the provided obj. - - Parameters: - - select_title (MediaItem): The media item to be downloaded. This should be an instance of the MediaItem class, containing attributes like `name` and `url`. - """ - - # Start message and display film information - start_message() - console.print(f"[yellow]Download: [red]{select_title.name} \n") - - # Setup api manger - video_source = VideoSource(select_title.url) - - # Define output path - mp4_name = remove_special_characters(select_title.name) +".mp4" - mp4_path = os.path.join(ROOT_PATH, SITE_NAME, MOVIE_FOLDER, remove_special_characters(select_title.name)) - - # Get m3u8 master playlist - master_playlist = video_source.get_playlist() - - # Download the film using the m3u8 playlist, and output filename - r_proc = HLS_Downloader(m3u8_playlist = master_playlist, output_filename = os.path.join(mp4_path, mp4_name)).start() - - if r_proc == 404: - time.sleep(2) - - # Re call search function - if msg.ask("[green]Do you want to continue [white]([red]y[white])[green] or return at home[white]([red]n[white]) ", choices=['y', 'n'], default='y', show_choices=True) == "n": - frames = get_call_stack() - execute_search(frames[-4]) - - if r_proc != None: - console.print("[green]Result: ") - console.print(r_proc) diff --git a/Src/Api/cb01/site.py b/Src/Api/cb01/site.py deleted file mode 100644 index f19af88..0000000 --- a/Src/Api/cb01/site.py +++ /dev/null @@ -1,80 +0,0 @@ -# 03.07.24 - -# External libraries -import httpx -from bs4 import BeautifulSoup -from unidecode import unidecode - - -# Internal utilities -from Src.Util.console import console -from Src.Util._jsonConfig import config_manager -from Src.Util.headers import get_headers -from Src.Util.table import TVShowManager -from ..Template import search_domain, get_select_title - - -# Logic class -from ..Template.Class.SearchType import MediaManager - - -# Variable -from .costant import SITE_NAME -media_search_manager = MediaManager() -table_show_manager = TVShowManager() - - -def title_search(word_to_search: str) -> int: - """ - Search for titles based on a search query. - - Parameters: - - title_search (str): The title to search for. - - Returns: - - int: The number of titles found. - """ - - # Find new domain if prev dont work - max_timeout = config_manager.get_int("REQUESTS", "timeout") - domain_to_use, _ = search_domain(SITE_NAME, f"https://{SITE_NAME}") - - # Send request to search for titles - try: - response = httpx.get( - url=f"https://{SITE_NAME}.{domain_to_use}/?s={unidecode(word_to_search)}", - headers={'user-agent': get_headers()}, - follow_redirects=True, - timeout=max_timeout - ) - response.raise_for_status() - - except Exception as e: - console.print(f"Site: {SITE_NAME}, request search error: {e}") - - # Create soup and find table - soup = BeautifulSoup(response.text, "html.parser") - - for div_title in soup.find_all("div", class_ = "card"): - - url = div_title.find("h3").find("a").get("href") - title = div_title.find("h3").find("a").get_text(strip=True) - desc = div_title.find("p").find("strong").text - - title_info = { - 'name': title, - 'desc': desc, - 'url': url - } - - media_search_manager.add_media(title_info) - - # Return the number of titles found - return media_search_manager.get_length() - - -def run_get_select_title(): - """ - Display a selection of titles and prompt the user to choose one. - """ - return get_select_title(table_show_manager, media_search_manager) diff --git a/Src/Lib/Downloader/HLS/segments.py b/Src/Lib/Downloader/HLS/segments.py index b940433..e1653f7 100644 --- a/Src/Lib/Downloader/HLS/segments.py +++ b/Src/Lib/Downloader/HLS/segments.py @@ -77,6 +77,7 @@ class M3U8_Segments: # Sync self.queue = PriorityQueue() self.stop_event = threading.Event() + self.downloaded_segments = set() def __get_key__(self, m3u8_parser: M3U8_Parser) -> bytes: """ @@ -176,13 +177,16 @@ class M3U8_Segments: if self.is_index_url: # Send a GET request to retrieve the index M3U8 file - response = httpx.get(self.url, headers=headers_index) + response = httpx.get( + self.url, + headers=headers_index, + timeout=max_timeout + ) response.raise_for_status() # Save the M3U8 file to the temporary folder - if response.status_code == 200: - path_m3u8_file = os.path.join(self.tmp_folder, "playlist.m3u8") - open(path_m3u8_file, "w+").write(response.text) + path_m3u8_file = os.path.join(self.tmp_folder, "playlist.m3u8") + open(path_m3u8_file, "w+").write(response.text) # Parse the text from the M3U8 index file self.parse_data(response.text) @@ -204,115 +208,122 @@ class M3U8_Segments: - backoff_factor (float): The backoff factor for exponential backoff (default is 1.5 seconds). """ need_verify = REQUEST_VERIFY + min_segment_size = 100 # Minimum acceptable size for a TS segment in bytes for attempt in range(retries): try: start_time = time.time() - + # Make request to get content if THERE_IS_PROXY_LIST: - - # Get proxy from list proxy = self.valid_proxy[index % len(self.valid_proxy)] logging.info(f"Use proxy: {proxy}") with httpx.Client(proxies=proxy, verify=need_verify) as client: if 'key_base_url' in self.__dict__: - response = client.get( - url=ts_url, - headers=random_headers(self.key_base_url), - timeout=max_timeout, - follow_redirects=True - ) - + response = client.get(ts_url, headers=random_headers(self.key_base_url), timeout=max_timeout, follow_redirects=True) else: - response = client.get( - url=ts_url, - headers={'user-agent': get_headers()}, - timeout=max_timeout, - follow_redirects=True - ) - + response = client.get(ts_url, headers={'user-agent': get_headers()}, timeout=max_timeout, follow_redirects=True) else: with httpx.Client(verify=need_verify) as client_2: if 'key_base_url' in self.__dict__: - response = client_2.get( - url=ts_url, - headers=random_headers(self.key_base_url), - timeout=max_timeout, - follow_redirects=True - ) - + response = client_2.get(ts_url, headers=random_headers(self.key_base_url), timeout=max_timeout, follow_redirects=True) else: - response = client_2.get( - url=ts_url, - headers={'user-agent': get_headers()}, - timeout=max_timeout, - follow_redirects=True - ) + response = client_2.get(ts_url, headers={'user-agent': get_headers()}, timeout=max_timeout, follow_redirects=True) - # Get response content - response.raise_for_status() # Raise exception for HTTP errors - duration = time.time() - start_time + # Validate response and content + response.raise_for_status() segment_content = response.content + content_size = len(segment_content) - # Update bar - response_size = int(response.headers.get('Content-Length', 0)) or len(segment_content) + # Check if segment is too small (possibly corrupted or empty) + if content_size < min_segment_size: + raise httpx.RequestError(f"Segment {index} too small ({content_size} bytes)") - # Update progress bar with custom Class - self.class_ts_estimator.update_progress_bar(response_size, duration, progress_bar) + duration = time.time() - start_time - # Decrypt the segment content if decryption is needed + # Decrypt if needed and verify decrypted content if self.decryption is not None: - segment_content = self.decryption.decrypt(segment_content) + try: + segment_content = self.decryption.decrypt(segment_content) + if len(segment_content) < min_segment_size: + raise Exception(f"Decrypted segment {index} too small ({len(segment_content)} bytes)") + except Exception as e: + logging.error(f"Decryption failed for segment {index}: {str(e)}") + raise - # Add the segment to the queue + # Update progress and queue + self.class_ts_estimator.update_progress_bar(content_size, duration, progress_bar) self.queue.put((index, segment_content)) + self.downloaded_segments.add(index) # Track successfully downloaded segments progress_bar.update(1) - - # Break out of the loop on success return - except (httpx.RequestError, httpx.HTTPStatusError) as e: - console.print(f"\nAttempt {attempt + 1} failed for '{ts_url}' with error: {e}") + except Exception as e: + #logging.error(f"Attempt {attempt + 1} failed for segment {index} - '{ts_url}': {e}") if attempt + 1 == retries: - console.print(f"\nFailed after {retries} attempts. Skipping '{ts_url}'") + #logging.error(f"Final retry failed for segment {index}") + self.queue.put((index, None)) # Marker for failed segment + progress_bar.update(1) break - # Exponential backoff before retrying sleep_time = backoff_factor * (2 ** attempt) - console.print(f"Retrying in {sleep_time} seconds...") + logging.info(f"Retrying segment {index} in {sleep_time} seconds...") time.sleep(sleep_time) def write_segments_to_file(self): """ - Writes downloaded segments to a file in the correct order. + Writes segments to file with additional verification. """ with open(self.tmp_file_path, 'wb') as f: expected_index = 0 buffer = {} + total_written = 0 + segments_written = set() while not self.stop_event.is_set() or not self.queue.empty(): try: index, segment_content = self.queue.get(timeout=1) + # Handle failed segments + if segment_content is None: + if index == expected_index: + expected_index += 1 + continue + + # Write segment if it's the next expected one if index == expected_index: f.write(segment_content) + total_written += len(segment_content) + segments_written.add(index) f.flush() expected_index += 1 - # Write any buffered segments in order + # Write any buffered segments that are now in order while expected_index in buffer: - f.write(buffer.pop(expected_index)) - f.flush() + next_segment = buffer.pop(expected_index) + if next_segment is not None: + f.write(next_segment) + total_written += len(next_segment) + segments_written.add(expected_index) + f.flush() expected_index += 1 else: buffer[index] = segment_content except queue.Empty: + if self.stop_event.is_set(): + break + continue + except Exception as e: + logging.error(f"Error writing segment {index}: {str(e)}") continue + # Final verification + if total_written == 0: + raise Exception("No data written to file") + def download_streams(self, add_desc): """ Downloads all TS segments in parallel and writes them to a file. @@ -375,37 +386,63 @@ class M3U8_Segments: mininterval=0.05 ) - # Start a separate thread to write segments to the file + # Start writer thread writer_thread = threading.Thread(target=self.write_segments_to_file) + writer_thread.daemon = True writer_thread.start() - # Ff proxy avaiable set max_workers to number of proxy - # else set max_workers to TQDM_MAX_WORKER + # Configure workers and delay max_workers = len(self.valid_proxy) if THERE_IS_PROXY_LIST else TQDM_MAX_WORKER + delay = max(PROXY_START_MIN, min(PROXY_START_MAX, 1 / (len(self.valid_proxy) + 1))) if THERE_IS_PROXY_LIST else TQDM_DELAY_WORKER - # if proxy avaiable set timeout to variable time - # else set timeout to TDQM_DELAY_WORKER - if THERE_IS_PROXY_LIST: - num_proxies = len(self.valid_proxy) - self.working_proxy_list = self.valid_proxy - - if num_proxies > 0: - # calculate delay based on number of proxies - # dalay should be between 0.5 and 1 - delay = max(PROXY_START_MIN, min(PROXY_START_MAX, 1 / (num_proxies + 1))) - else: - delay = TQDM_DELAY_WORKER - else: - delay = TQDM_DELAY_WORKER - - # Start all workers + # Download segments with completion verification with ThreadPoolExecutor(max_workers=max_workers) as executor: + futures = [] for index, segment_url in enumerate(self.segments): time.sleep(delay) - executor.submit(self.make_requests_stream, segment_url, index, progress_bar) + futures.append(executor.submit(self.make_requests_stream, segment_url, index, progress_bar)) - # Wait for all tasks to complete - executor.shutdown(wait=True) + # Wait for all futures to complete + for future in futures: + try: + future.result() + except Exception as e: + logging.error(f"Error in download thread: {str(e)}") + + # Verify completion and retry missing segments + total_segments = len(self.segments) + completed_segments = len(self.downloaded_segments) + + if completed_segments < total_segments: + missing_segments = set(range(total_segments)) - self.downloaded_segments + logging.warning(f"Missing segments: {sorted(missing_segments)}") + + # Retry missing segments + for index in missing_segments: + if self.stop_event.is_set(): + break + try: + self.make_requests_stream(self.segments[index], index, progress_bar) + except Exception as e: + logging.error(f"Failed to retry segment {index}: {str(e)}") + + # Clean up self.stop_event.set() - writer_thread.join() + writer_thread.join(timeout=30) progress_bar.close() + + # Final verification + final_completion = (len(self.downloaded_segments) / total_segments) * 100 + if final_completion < 99.9: # Less than 99.9% complete + missing = set(range(total_segments)) - self.downloaded_segments + raise Exception(f"Download incomplete ({final_completion:.1f}%). Missing segments: {sorted(missing)}") + + # Verify output file + if not os.path.exists(self.tmp_file_path): + raise Exception("Output file missing") + + file_size = os.path.getsize(self.tmp_file_path) + if file_size == 0: + raise Exception("Output file is empty") + + logging.info(f"Download completed. File size: {file_size} bytes") diff --git a/Src/Upload/update.py b/Src/Upload/update.py index 9f20270..107beb4 100644 --- a/Src/Upload/update.py +++ b/Src/Upload/update.py @@ -61,5 +61,4 @@ def update(): console.print(f"[red]{__title__} has been downloaded [yellow]{total_download_count} [red]times, but only [yellow]{percentual_stars}% [red]of users have starred it.\n\ [cyan]Help the repository grow today by leaving a [yellow]star [cyan]and [yellow]sharing [cyan]it with others online!") - console.print("\n") - time.sleep(4) + time.sleep(3) diff --git a/config.json b/config.json index 2f395ca..e0390d6 100644 --- a/config.json +++ b/config.json @@ -7,7 +7,7 @@ "clean_console": true, "root_path": "Video", "map_episode_name": "%(tv_name)_S%(season)E%(episode)_%(episode_name)", - "special_chars_to_remove": "!@#$%^&*()[]{}<>|`~'\";:,.?=+\u00e2\u20ac\u00a6", + "special_chars_to_remove": "!@#$%^&*()[]{}<>|`~'\";:,?=+\u00e2\u20ac\u00a6", "config_qbit_tor": { "host": "192.168.1.58", "port": "8080", @@ -18,7 +18,7 @@ "show_trending": false }, "REQUESTS": { - "timeout": 15, + "timeout": 20, "max_retry": 3, "verify_ssl": true, "user-agent": "", @@ -59,8 +59,8 @@ }, "SITE": { "streamingcommunity": { - "video_workers": 6, - "audio_workers": 6, + "video_workers": 7, + "audio_workers": 7, "domain": "computer" }, "altadefinizione": { diff --git a/run.py b/run.py index e01a1b9..9bba080 100644 --- a/run.py +++ b/run.py @@ -136,10 +136,6 @@ def initialize(): def main(): - # Create folder root path if not exist - folder_root_path = config_manager.get("DEFAULT", "root_path") - create_folder(folder_name=folder_root_path) - # Load search functions search_functions = load_search_functions()