From d076073a08c32185fb836189b4866ec34f296acb Mon Sep 17 00:00:00 2001 From: Lovi <62809003+Lovi-0@users.noreply.github.com> Date: Sat, 28 Dec 2024 15:03:03 +0100 Subject: [PATCH] Update domain --- .../ilcorsaronero/util/ilCorsarScraper.py | 20 +++-- .../Api/Site/streamingcommunity/film.py | 2 +- .../Api/Template/Util/get_domain.py | 23 ++--- Test/call_updateDomain.py | 90 +++++++++++++++++++ config.json | 2 +- 5 files changed, 118 insertions(+), 19 deletions(-) create mode 100644 Test/call_updateDomain.py diff --git a/StreamingCommunity/Api/Site/ilcorsaronero/util/ilCorsarScraper.py b/StreamingCommunity/Api/Site/ilcorsaronero/util/ilCorsarScraper.py index 90804fa..4d8049c 100644 --- a/StreamingCommunity/Api/Site/ilcorsaronero/util/ilCorsarScraper.py +++ b/StreamingCommunity/Api/Site/ilcorsaronero/util/ilCorsarScraper.py @@ -25,13 +25,21 @@ class IlCorsaroNeroScraper: self.base_url = base_url self.max_page = max_page self.headers = { - 'User-Agent': get_headers(), - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', - 'Accept-Language': 'en-US,en;q=0.5', - 'Connection': 'keep-alive', - 'Upgrade-Insecure-Requests': '1' + 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', + 'accept-language': 'it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7', + 'cache-control': 'max-age=0', + 'priority': 'u=0, i', + 'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"Windows"', + 'sec-fetch-dest': 'document', + 'sec-fetch-mode': 'navigate', + 'sec-fetch-site': 'same-origin', + 'sec-fetch-user': '?1', + 'upgrade-insecure-requests': '1', + 'user-agent': get_headers() } - + async def fetch_url(self, url: str) -> Optional[str]: """ Fetch the HTML content of a given URL. diff --git a/StreamingCommunity/Api/Site/streamingcommunity/film.py b/StreamingCommunity/Api/Site/streamingcommunity/film.py index 54625c0..2f1fe05 100644 --- a/StreamingCommunity/Api/Site/streamingcommunity/film.py +++ b/StreamingCommunity/Api/Site/streamingcommunity/film.py @@ -39,7 +39,7 @@ def download_film(select_title: MediaItem) -> str: # Start message and display film information start_message() - console.print(f"[yellow]Download: [red]{select_title.slug} \n") + console.print(f"[yellow]Download: [red]{select_title.name} \n") # Init class video_source = VideoSource(SITE_NAME, False) diff --git a/StreamingCommunity/Api/Template/Util/get_domain.py b/StreamingCommunity/Api/Template/Util/get_domain.py index 205e306..aa1abb4 100644 --- a/StreamingCommunity/Api/Template/Util/get_domain.py +++ b/StreamingCommunity/Api/Template/Util/get_domain.py @@ -76,17 +76,19 @@ def get_final_redirect_url(initial_url, max_timeout): console.print(f"\n[cyan]Test url[white]: [red]{initial_url}, [cyan]error[white]: [red]{e}") return None -def search_domain(site_name: str, base_url: str): +def search_domain(site_name: str, base_url: str, get_first: bool = False): """ Search for a valid domain for the given site name and base URL. Parameters: - site_name (str): The name of the site to search the domain for. - base_url (str): The base URL to construct complete URLs. + - get_first (bool): If True, automatically update to the first valid match without user confirmation. Returns: tuple: The found domain and the complete URL. """ + # Extract config domain max_timeout = config_manager.get_int("REQUESTS", "timeout") domain = str(config_manager.get_dict("SITE", site_name)['domain']) @@ -107,10 +109,10 @@ def search_domain(site_name: str, base_url: str): except Exception as e: query = base_url.split("/")[-1] - + # Perform a Google search with multiple results search_results = list(search(query, num_results=10, lang="it")) - console.print(f"\nGoogle search results: {search_results}") + #console.print(f"\nGoogle search results: {search_results}") def normalize_for_comparison(url): """Normalize URL by removing protocol, www, and trailing slashes""" @@ -121,15 +123,15 @@ def search_domain(site_name: str, base_url: str): # Normalize the base_url we're looking for target_url = normalize_for_comparison(base_url) - + # Iterate through search results for first_url in search_results: console.print(f"[green]Checking url[white]: [red]{first_url}") - + # Get just the domain part of the search result parsed_result = urlparse(first_url) result_domain = normalize_for_comparison(parsed_result.netloc) - + # Compare with our target URL (without the protocol part) if result_domain.startswith(target_url.split("/")[-1]): try: @@ -143,21 +145,20 @@ def search_domain(site_name: str, base_url: str): new_domain_extract = extract_domain(str(final_url)) - if msg.ask(f"\n[cyan]Do you want to auto update site[white] [red]'{site_name}'[cyan] with domain[white] [red]'{new_domain_extract}'.", choices=["y", "n"], default="y").lower() == "y": - + if get_first or msg.ask(f"\n[cyan]Do you want to auto update site[white] [red]'{site_name}'[cyan] with domain[white] [red]'{new_domain_extract}'.", choices=["y", "n"], default="y").lower() == "y": # Update domain in config.json config_manager.config['SITE'][site_name]['domain'] = new_domain_extract config_manager.write_config() return new_domain_extract, f"{base_url}.{new_domain_extract}" - + except Exception as redirect_error: console.print(f"[red]Error following redirect for {first_url}: {redirect_error}") continue - # If no matching URL is found + # If no matching URL is found return base domain console.print("[bold red]No valid URL found matching the base URL.[/bold red]") - raise Exception("No matching domain found") + return domain, f"{base_url}.{domain}" # Handle successful initial domain check parsed_url = urlparse(str(response_follow.url)) diff --git a/Test/call_updateDomain.py b/Test/call_updateDomain.py new file mode 100644 index 0000000..0f03936 --- /dev/null +++ b/Test/call_updateDomain.py @@ -0,0 +1,90 @@ +# 12.11.24 + +# Fix import +import os +import sys +src_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +sys.path.append(src_path) + + +# Other +import glob +import logging +import importlib +from rich.console import Console + + +# Util +from StreamingCommunity.Api.Template.Util import search_domain + + +# Variable +console = Console() + + +def load_site_names(): + modules = [] + site_names = {} + + # Traverse the Api directory + api_dir = os.path.join(os.path.dirname(__file__), '..', 'StreamingCommunity', 'Api', 'Site') + init_files = glob.glob(os.path.join(api_dir, '*', '__init__.py')) + + # Retrieve modules and their indices + for init_file in init_files: + + # Get folder name as module name + module_name = os.path.basename(os.path.dirname(init_file)) + logging.info(f"Load module name: {module_name}") + + try: + # Dynamically import the module + mod = importlib.import_module(f'StreamingCommunity.Api.Site.{module_name}') + + # Get 'indice' from the module + indice = getattr(mod, 'indice', 0) + is_deprecate = bool(getattr(mod, '_deprecate', True)) + use_for = getattr(mod, '_useFor', 'other') + + if not is_deprecate: + modules.append((module_name, indice, use_for)) + + except Exception as e: + console.print(f"[red]Failed to import module {module_name}: {str(e)}") + + # Sort modules by 'indice' + modules.sort(key=lambda x: x[1]) + + # Load SITE_NAME from each module in the sorted order + for module_name, _, use_for in modules: + + # Construct a unique alias for the module + module_alias = f'{module_name}_site_name' + logging.info(f"Module alias: {module_alias}") + + try: + # Dynamically import the module + mod = importlib.import_module(f'StreamingCommunity.Api.Site.{module_name}') + + # Get the SITE_NAME variable from the module + site_name = getattr(mod, 'SITE_NAME', None) + + if site_name: + # Add the SITE_NAME to the dictionary + site_names[module_alias] = (site_name, use_for) + + except Exception as e: + console.print(f"[red]Failed to load SITE_NAME from module {module_name}: {str(e)}") + + return site_names + +if __name__ == "__main__": + site_names = load_site_names() + for alias, (site_name, use_for) in site_names.items(): + print(f"Alias: {alias}, SITE_NAME: {site_name}, Use for: {use_for}") + + if site_name == "animeunity": + domain_to_use, _ = search_domain(site_name=site_name, base_url=f"https://www.{site_name}", get_first=True) + + else: + domain_to_use, _ = search_domain(site_name=site_name, base_url=f"https://{site_name}", get_first=True) \ No newline at end of file diff --git a/config.json b/config.json index 1d66c03..1b6b4d8 100644 --- a/config.json +++ b/config.json @@ -79,7 +79,7 @@ "domain": "to" }, "cb01new": { - "domain": "icu" + "domain": "stream" }, "1337xx": { "domain": "to"