Fix output path.

2025-06-06 19:45:24 +00:00 · 2024-11-03 18:32:21 +01:00 · 2024-11-03 18:32:21 +01:00 · 1b0d14fcdb
commit 1b0d14fcdb
parent 6bfb38a2e6
9 changed files with 120 additions and 448 deletions
--- a/Src/Api/cb01/Player/maxstream.py
+++ b/Src/Api/cb01/Player/maxstream.py
@ -1,165 +0,0 @@
-# 05.07.24
-
-import re
-import logging
-
-
-# External libraries
-import httpx
-import jsbeautifier
-from bs4 import BeautifulSoup
-
-
-# Internal utilities
-from Src.Util.headers import get_headers
-from Src.Util._jsonConfig import config_manager
-
-
-# Variable
-max_timeout = config_manager.get_int("REQUESTS", "timeout")
-
-class VideoSource:
-    def __init__(self, url: str):
-        """
-        Sets up the video source with the provided URL.
-
-        Parameters:
-            - url (str): The URL of the video.
-        """
-        self.url = url
-        self.redirect_url = None
-        self.maxstream_url = None
-        self.m3u8_url = None
-        self.headers = {'user-agent': get_headers()}
-
-    def get_redirect_url(self):
-        """
-        Sends a request to the initial URL and extracts the redirect URL.
-        """
-        try:
-
-            # Send a GET request to the initial URL
-            response = httpx.get(
-                url=self.url, 
-                headers=self.headers, 
-                follow_redirects=True, 
-                timeout=max_timeout
-            )
-            response.raise_for_status()
-
-            # Extract the redirect URL from the HTML
-            soup = BeautifulSoup(response.text, "html.parser")
-            self.redirect_url = soup.find("div", id="iframen1").get("data-src")
-            logging.info(f"Redirect URL: {self.redirect_url}")
-
-            return self.redirect_url
-        
-        except httpx.RequestError as e:
-            logging.error(f"Error during the initial request: {e}")
-            raise
-
-        except AttributeError as e:
-            logging.error(f"Error parsing HTML: {e}")
-            raise
-
-    def get_maxstream_url(self):
-        """
-        Sends a request to the redirect URL and extracts the Maxstream URL.
-        """
-        try:
-
-            # Send a GET request to the redirect URL
-            response = httpx.get(
-                url=self.redirect_url, 
-                headers=self.headers, 
-                follow_redirects=True, 
-                timeout=max_timeout
-            )
-            response.raise_for_status()
-
-            # Extract the Maxstream URL from the HTML
-            soup = BeautifulSoup(response.text, "html.parser")
-            maxstream_url = soup.find("a")
-            
-            if maxstream_url is None:
-
-                # If no anchor tag is found, try the alternative method
-                logging.warning("Anchor tag not found. Trying the alternative method.")
-                headers = {
-                    'origin': 'https://stayonline.pro',
-                    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 OPR/111.0.0.0',
-                    'x-requested-with': 'XMLHttpRequest',
-                }
-
-                # Make request to stayonline api
-                data = {'id': self.redirect_url.split("/")[-2], 'ref': ''}
-                response = httpx.post('https://stayonline.pro/ajax/linkEmbedView.php', headers=headers, data=data, timeout=max_timeout)
-                response.raise_for_status()
-                uprot_url = response.json()['data']['value']
-
-                # Retry getting maxtstream url
-                response = httpx.get(uprot_url, headers=self.headers, follow_redirects=True, timeout=10)
-                response.raise_for_status()
-                soup = BeautifulSoup(response.text, "html.parser")
-                maxstream_url = soup.find("a").get("href")
-                
-            else:
-                maxstream_url = maxstream_url.get("href")
-
-            self.maxstream_url = maxstream_url
-            logging.info(f"Maxstream URL: {self.maxstream_url}")
-
-            return self.maxstream_url
-        
-        except httpx.RequestError as e:
-            logging.error(f"Error during the request to the redirect URL: {e}")
-            raise
-
-        except AttributeError as e:
-            logging.error(f"Error parsing HTML: {e}")
-            raise
-
-    def get_m3u8_url(self):
-        """
-        Sends a request to the Maxstream URL and extracts the .m3u8 file URL.
-        """
-        try:
-            
-            # Send a GET request to the Maxstream URL
-            response = httpx.get(
-                url=self.maxstream_url, 
-                headers=self.headers, 
-                follow_redirects=True, 
-                timeout=max_timeout
-            )
-            response.raise_for_status() 
-            soup = BeautifulSoup(response.text, "html.parser")
-
-            # Iterate over all script tags in the HTML
-            for script in soup.find_all("script"):
-                if "eval(function(p,a,c,k,e,d)" in script.text:
-
-                    # Execute the script using
-                    data_js = jsbeautifier.beautify(script.text)
-
-                    # Extract the .m3u8 URL from the script's output
-                    match = re.search(r'sources:\s*\[\{\s*src:\s*"([^"]+)"', data_js)
-
-                    if match:
-                        self.m3u8_url = match.group(1)
-                        logging.info(f"M3U8 URL: {self.m3u8_url}")
-                        break
-
-            return self.m3u8_url
-
-        except Exception as e:
-            logging.error(f"Error executing the Node.js script: {e}")
-            raise
-
-    def get_playlist(self):
-        """
-        Executes the entire flow to obtain the final .m3u8 file URL.
-        """
-        self.get_redirect_url()
-        self.get_maxstream_url()
-        return self.get_m3u8_url()
--- a/Src/Api/cb01/init.py
+++ b/Src/Api/cb01/init.py
@ -1,40 +0,0 @@
-# 09.06.24
-
-# Internal utilities
-from Src.Util.console import console, msg
-
-
-# Logic class
-from .site import title_search, run_get_select_title
-from .film import download_film
-
-
-# Variable
-indice = 9
-_use_for = "film"
-_deprecate = True
-
-
-def search():
-    """
-    Main function of the application for film and series.
-    """
-
-    # Make request to site to get content that corrsisponde to that string
-    string_to_search = msg.ask("\n[purple]Insert word to search in all site").strip()
-    len_database = title_search(string_to_search)
-
-    if len_database > 0:
-
-        # Select title from list
-        select_title = run_get_select_title()
-
-        # !!! ADD TYPE DONT WORK FOR SERIE
-        download_film(select_title)
-
-
-    else:
-        console.print(f"\n[red]Nothing matching was found for[white]: [purple]{string_to_search}")
-
-        # Retry
-        search()
--- a/Src/Api/cb01/costant.py
+++ b/Src/Api/cb01/costant.py
@ -1,15 +0,0 @@
-# 03.07.24
-
-import os
-
-
-# Internal utilities
-from Src.Util._jsonConfig import config_manager
-
-
-SITE_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
-ROOT_PATH = config_manager.get('DEFAULT', 'root_path')
-DOMAIN_NOW = config_manager.get_dict('SITE', SITE_NAME)['domain']
-
-MOVIE_FOLDER = "Movie"
-SERIES_FOLDER = "Serie"
--- a/Src/Api/cb01/film.py
+++ b/Src/Api/cb01/film.py
@ -1,60 +0,0 @@
-# 03.07.24
-
-import os
-import time
-
-
-# Internal utilities
-from Src.Util.console import console, msg
-from Src.Util.os import remove_special_characters
-from Src.Util.message import start_message
-from Src.Util.call_stack import get_call_stack
-from Src.Lib.Downloader import HLS_Downloader
-from ..Template import execute_search
-
-
-# Logic class
-from ..Template.Class.SearchType import MediaItem
-from .Player.maxstream import VideoSource
-
-
-# Config
-from .costant import ROOT_PATH, SITE_NAME, MOVIE_FOLDER
-
-
-def download_film(select_title: MediaItem):
-    """
-    Downloads a film using the provided obj.
-
-    Parameters:
-        - select_title (MediaItem): The media item to be downloaded. This should be an instance of the MediaItem class, containing attributes like `name` and `url`.
-    """
-
-    # Start message and display film information
-    start_message()
-    console.print(f"[yellow]Download:  [red]{select_title.name} \n")
-
-    # Setup api manger
-    video_source = VideoSource(select_title.url)
-
-    # Define output path
-    mp4_name = remove_special_characters(select_title.name) +".mp4"
-    mp4_path = os.path.join(ROOT_PATH, SITE_NAME, MOVIE_FOLDER, remove_special_characters(select_title.name))
-
-    # Get m3u8 master playlist
-    master_playlist = video_source.get_playlist()
-
-    # Download the film using the m3u8 playlist, and output filename
-    r_proc = HLS_Downloader(m3u8_playlist = master_playlist, output_filename = os.path.join(mp4_path, mp4_name)).start()
-
-    if r_proc == 404:
-        time.sleep(2)
-
-        # Re call search function
-        if msg.ask("[green]Do you want to continue [white]([red]y[white])[green] or return at home[white]([red]n[white]) ", choices=['y', 'n'], default='y', show_choices=True) == "n":
-            frames = get_call_stack()
-            execute_search(frames[-4])
-
-    if r_proc != None:
-        console.print("[green]Result: ")
-        console.print(r_proc)
--- a/Src/Api/cb01/site.py
+++ b/Src/Api/cb01/site.py
@ -1,80 +0,0 @@
-# 03.07.24
-
-# External libraries
-import httpx
-from bs4 import BeautifulSoup
-from unidecode import unidecode
-
-
-# Internal utilities
-from Src.Util.console import console
-from Src.Util._jsonConfig import config_manager
-from Src.Util.headers import get_headers
-from Src.Util.table import TVShowManager
-from ..Template import search_domain, get_select_title
-
-
-# Logic class
-from ..Template.Class.SearchType import MediaManager
-
-
-# Variable
-from .costant import SITE_NAME
-media_search_manager = MediaManager()
-table_show_manager = TVShowManager()
-
-
-def title_search(word_to_search: str) -> int:
-    """
-    Search for titles based on a search query.
-
-    Parameters:
-        - title_search (str): The title to search for.
-
-    Returns:
-        - int: The number of titles found.
-    """
-
-    # Find new domain if prev dont work
-    max_timeout = config_manager.get_int("REQUESTS", "timeout")
-    domain_to_use, _ = search_domain(SITE_NAME, f"https://{SITE_NAME}")
-
-    # Send request to search for titles
-    try:
-        response = httpx.get(
-            url=f"https://{SITE_NAME}.{domain_to_use}/?s={unidecode(word_to_search)}", 
-            headers={'user-agent': get_headers()}, 
-            follow_redirects=True,
-            timeout=max_timeout
-        )
-        response.raise_for_status()
-
-    except Exception as e:
-        console.print(f"Site: {SITE_NAME}, request search error: {e}")
-
-    # Create soup and find table
-    soup = BeautifulSoup(response.text, "html.parser")
-
-    for div_title in soup.find_all("div", class_ = "card"):
-
-        url = div_title.find("h3").find("a").get("href")
-        title = div_title.find("h3").find("a").get_text(strip=True)
-        desc = div_title.find("p").find("strong").text
-
-        title_info = {
-            'name': title,
-            'desc': desc,
-            'url': url
-        }
-
-        media_search_manager.add_media(title_info)
-
-    # Return the number of titles found
-    return media_search_manager.get_length()
-
-
-def run_get_select_title():
-    """
-    Display a selection of titles and prompt the user to choose one.
-    """
-    return get_select_title(table_show_manager, media_search_manager)
--- a/Src/Lib/Downloader/HLS/segments.py
+++ b/Src/Lib/Downloader/HLS/segments.py
@ -77,6 +77,7 @@ class M3U8_Segments:
        # Sync
        self.queue = PriorityQueue()
        self.stop_event = threading.Event()
+        self.downloaded_segments = set()

    def __get_key__(self, m3u8_parser: M3U8_Parser) -> bytes:
        """
@ -176,13 +177,16 @@ class M3U8_Segments:
        if self.is_index_url:

            # Send a GET request to retrieve the index M3U8 file
-            response = httpx.get(self.url, headers=headers_index)
+            response = httpx.get(
+                self.url, 
+                headers=headers_index, 
+                timeout=max_timeout
+            )
            response.raise_for_status()

            # Save the M3U8 file to the temporary folder
-            if response.status_code == 200:
-                path_m3u8_file = os.path.join(self.tmp_folder, "playlist.m3u8")
-                open(path_m3u8_file, "w+").write(response.text) 
+            path_m3u8_file = os.path.join(self.tmp_folder, "playlist.m3u8")
+            open(path_m3u8_file, "w+").write(response.text) 

            # Parse the text from the M3U8 index file
            self.parse_data(response.text)  
@ -204,115 +208,122 @@ class M3U8_Segments:
            - backoff_factor (float): The backoff factor for exponential backoff (default is 1.5 seconds).
        """
        need_verify = REQUEST_VERIFY
+        min_segment_size = 100  # Minimum acceptable size for a TS segment in bytes

        for attempt in range(retries):
            try:
                start_time = time.time()
-
+                
                # Make request to get content
                if THERE_IS_PROXY_LIST:
-
-                    # Get proxy from list
                    proxy = self.valid_proxy[index % len(self.valid_proxy)]
                    logging.info(f"Use proxy: {proxy}")

                    with httpx.Client(proxies=proxy, verify=need_verify) as client:  
                        if 'key_base_url' in self.__dict__:
-                            response = client.get(
-                                url=ts_url, 
-                                headers=random_headers(self.key_base_url), 
-                                timeout=max_timeout, 
-                                follow_redirects=True
-                            )
-
+                            response = client.get(ts_url, headers=random_headers(self.key_base_url), timeout=max_timeout, follow_redirects=True)
                        else:
-                            response = client.get(
-                                url=ts_url, 
-                                headers={'user-agent': get_headers()}, 
-                                timeout=max_timeout, 
-                                follow_redirects=True
-                            )
-
+                            response = client.get(ts_url, headers={'user-agent': get_headers()}, timeout=max_timeout, follow_redirects=True)
                else:
                    with httpx.Client(verify=need_verify) as client_2:
                        if 'key_base_url' in self.__dict__:
-                            response = client_2.get(
-                                url=ts_url, 
-                                headers=random_headers(self.key_base_url), 
-                                timeout=max_timeout, 
-                                follow_redirects=True
-                            )
-
+                            response = client_2.get(ts_url, headers=random_headers(self.key_base_url), timeout=max_timeout, follow_redirects=True)
                        else:
-                            response = client_2.get(
-                                url=ts_url, 
-                                headers={'user-agent': get_headers()}, 
-                                timeout=max_timeout, 
-                                follow_redirects=True
-                            )
+                            response = client_2.get(ts_url, headers={'user-agent': get_headers()}, timeout=max_timeout, follow_redirects=True)

-                # Get response content
-                response.raise_for_status()  # Raise exception for HTTP errors
-                duration = time.time() - start_time
+                # Validate response and content
+                response.raise_for_status()
                segment_content = response.content
+                content_size = len(segment_content)

-                # Update bar
-                response_size = int(response.headers.get('Content-Length', 0)) or len(segment_content)
+                # Check if segment is too small (possibly corrupted or empty)
+                if content_size < min_segment_size:
+                    raise httpx.RequestError(f"Segment {index} too small ({content_size} bytes)")

-                # Update progress bar with custom Class
-                self.class_ts_estimator.update_progress_bar(response_size, duration, progress_bar)
+                duration = time.time() - start_time

-                # Decrypt the segment content if decryption is needed
+                # Decrypt if needed and verify decrypted content
                if self.decryption is not None:
-                    segment_content = self.decryption.decrypt(segment_content)
+                    try:
+                        segment_content = self.decryption.decrypt(segment_content)
+                        if len(segment_content) < min_segment_size:
+                            raise Exception(f"Decrypted segment {index} too small ({len(segment_content)} bytes)")
+                    except Exception as e:
+                        logging.error(f"Decryption failed for segment {index}: {str(e)}")
+                        raise

-                # Add the segment to the queue
+                # Update progress and queue
+                self.class_ts_estimator.update_progress_bar(content_size, duration, progress_bar)
                self.queue.put((index, segment_content))
+                self.downloaded_segments.add(index)  # Track successfully downloaded segments
                progress_bar.update(1)
-
-                # Break out of the loop on success
                return

-            except (httpx.RequestError, httpx.HTTPStatusError) as e:
-                console.print(f"\nAttempt {attempt + 1} failed for '{ts_url}' with error: {e}")
+            except Exception as e:
+                #logging.error(f"Attempt {attempt + 1} failed for segment {index} - '{ts_url}': {e}")
                
                if attempt + 1 == retries:
-                    console.print(f"\nFailed after {retries} attempts. Skipping '{ts_url}'")
+                    #logging.error(f"Final retry failed for segment {index}")
+                    self.queue.put((index, None))  # Marker for failed segment
+                    progress_bar.update(1)
                    break
                
-                # Exponential backoff before retrying
                sleep_time = backoff_factor * (2 ** attempt)
-                console.print(f"Retrying in {sleep_time} seconds...")
+                logging.info(f"Retrying segment {index} in {sleep_time} seconds...")
                time.sleep(sleep_time)

    def write_segments_to_file(self):
        """
-        Writes downloaded segments to a file in the correct order.
+        Writes segments to file with additional verification.
        """
        with open(self.tmp_file_path, 'wb') as f:
            expected_index = 0
            buffer = {}
+            total_written = 0
+            segments_written = set()

            while not self.stop_event.is_set() or not self.queue.empty():
                try:
                    index, segment_content = self.queue.get(timeout=1)

+                    # Handle failed segments
+                    if segment_content is None:
+                        if index == expected_index:
+                            expected_index += 1
+                        continue
+
+                    # Write segment if it's the next expected one
                    if index == expected_index:
                        f.write(segment_content)
+                        total_written += len(segment_content)
+                        segments_written.add(index)
                        f.flush()
                        expected_index += 1

-                        # Write any buffered segments in order
+                        # Write any buffered segments that are now in order
                        while expected_index in buffer:
-                            f.write(buffer.pop(expected_index))
-                            f.flush()
+                            next_segment = buffer.pop(expected_index)
+                            if next_segment is not None:
+                                f.write(next_segment)
+                                total_written += len(next_segment)
+                                segments_written.add(expected_index)
+                                f.flush()
                            expected_index += 1
                    else:
                        buffer[index] = segment_content

                except queue.Empty:
+                    if self.stop_event.is_set():
+                        break
+                    continue
+                except Exception as e:
+                    logging.error(f"Error writing segment {index}: {str(e)}")
                    continue

+            # Final verification
+            if total_written == 0:
+                raise Exception("No data written to file")
+    
    def download_streams(self, add_desc):
        """
        Downloads all TS segments in parallel and writes them to a file.
@ -375,37 +386,63 @@ class M3U8_Segments:
            mininterval=0.05
        )

-        # Start a separate thread to write segments to the file
+        # Start writer thread
        writer_thread = threading.Thread(target=self.write_segments_to_file)
+        writer_thread.daemon = True
        writer_thread.start()

-        # Ff proxy avaiable set max_workers to number of proxy
-        # else set max_workers to TQDM_MAX_WORKER
+        # Configure workers and delay
        max_workers = len(self.valid_proxy) if THERE_IS_PROXY_LIST else TQDM_MAX_WORKER
+        delay = max(PROXY_START_MIN, min(PROXY_START_MAX, 1 / (len(self.valid_proxy) + 1))) if THERE_IS_PROXY_LIST else TQDM_DELAY_WORKER

-        # if proxy avaiable set timeout to variable time
-        # else set timeout to TDQM_DELAY_WORKER
-        if THERE_IS_PROXY_LIST:
-            num_proxies = len(self.valid_proxy)
-            self.working_proxy_list = self.valid_proxy
-
-            if num_proxies > 0:
-                # calculate delay based on number of proxies
-                # dalay should be between 0.5 and 1
-                delay = max(PROXY_START_MIN, min(PROXY_START_MAX, 1 / (num_proxies + 1)))
-            else:
-                delay = TQDM_DELAY_WORKER
-        else:
-            delay = TQDM_DELAY_WORKER
-
-        # Start all workers
+        # Download segments with completion verification
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            futures = []
            for index, segment_url in enumerate(self.segments):
                time.sleep(delay)
-                executor.submit(self.make_requests_stream, segment_url, index, progress_bar)
+                futures.append(executor.submit(self.make_requests_stream, segment_url, index, progress_bar))

-        # Wait for all tasks to complete
-        executor.shutdown(wait=True)
+            # Wait for all futures to complete
+            for future in futures:
+                try:
+                    future.result()
+                except Exception as e:
+                    logging.error(f"Error in download thread: {str(e)}")
+
+            # Verify completion and retry missing segments
+            total_segments = len(self.segments)
+            completed_segments = len(self.downloaded_segments)
+            
+            if completed_segments < total_segments:
+                missing_segments = set(range(total_segments)) - self.downloaded_segments
+                logging.warning(f"Missing segments: {sorted(missing_segments)}")
+                
+                # Retry missing segments
+                for index in missing_segments:
+                    if self.stop_event.is_set():
+                        break
+                    try:
+                        self.make_requests_stream(self.segments[index], index, progress_bar)
+                    except Exception as e:
+                        logging.error(f"Failed to retry segment {index}: {str(e)}")
+
+        # Clean up
        self.stop_event.set()
-        writer_thread.join()
+        writer_thread.join(timeout=30)
        progress_bar.close()
+
+        # Final verification
+        final_completion = (len(self.downloaded_segments) / total_segments) * 100
+        if final_completion < 99.9:  # Less than 99.9% complete
+            missing = set(range(total_segments)) - self.downloaded_segments
+            raise Exception(f"Download incomplete ({final_completion:.1f}%). Missing segments: {sorted(missing)}")
+
+        # Verify output file
+        if not os.path.exists(self.tmp_file_path):
+            raise Exception("Output file missing")
+        
+        file_size = os.path.getsize(self.tmp_file_path)
+        if file_size == 0:
+            raise Exception("Output file is empty")
+        
+        logging.info(f"Download completed. File size: {file_size} bytes")
--- a/Src/Upload/update.py
+++ b/Src/Upload/update.py
@ -61,5 +61,4 @@ def update():
    console.print(f"[red]{__title__} has been downloaded [yellow]{total_download_count} [red]times, but only [yellow]{percentual_stars}% [red]of users have starred it.\n\
        [cyan]Help the repository grow today by leaving a [yellow]star [cyan]and [yellow]sharing [cyan]it with others online!")
    
-    console.print("\n")
-    time.sleep(4)
+    time.sleep(3)
--- a/config.json
+++ b/config.json
@ -7,7 +7,7 @@
        "clean_console": true,
        "root_path": "Video",
        "map_episode_name": "%(tv_name)_S%(season)E%(episode)_%(episode_name)",
-        "special_chars_to_remove": "!@#$%^&*()[]{}<>|`~'\";:,.?=+\u00e2\u20ac\u00a6",
+        "special_chars_to_remove": "!@#$%^&*()[]{}<>|`~'\";:,?=+\u00e2\u20ac\u00a6",
        "config_qbit_tor": {
            "host": "192.168.1.58",
            "port": "8080",
@ -18,7 +18,7 @@
        "show_trending": false
    },
    "REQUESTS": {
-        "timeout": 15,
+        "timeout": 20,
        "max_retry": 3,
        "verify_ssl": true,
        "user-agent": "",
@ -59,8 +59,8 @@
    },
    "SITE": {
        "streamingcommunity": {
-            "video_workers": 6,
-            "audio_workers": 6,
+            "video_workers": 7,
+            "audio_workers": 7,
            "domain": "computer"
        },
        "altadefinizione": {
--- a/run.py
+++ b/run.py
@ -136,10 +136,6 @@ def initialize():

 def main():

-    # Create folder root path if not exist
-    folder_root_path = config_manager.get("DEFAULT", "root_path")
-    create_folder(folder_name=folder_root_path)
-
    # Load search functions
    search_functions = load_search_functions()