From e0f76fe93c86656e8d0a66f9fd276ce1e20297da Mon Sep 17 00:00:00 2001 From: Lovi <62809003+Lovi-0@users.noreply.github.com> Date: Mon, 6 Jan 2025 15:02:58 +0100 Subject: [PATCH] Update get_domain --- .github/workflows/python-script.yml | 2 +- README.md | 2 +- .../ilcorsaronero/util/ilCorsarScraper.py | 2 +- .../Api/Template/Util/get_domain.py | 79 ++++++++++++------- .../Lib/Downloader/TOR/downloader.py | 6 +- Test/call_updateDomain.py | 27 ++++--- config.json | 6 +- 7 files changed, 74 insertions(+), 50 deletions(-) diff --git a/.github/workflows/python-script.yml b/.github/workflows/python-script.yml index d2f3c37..c844bfa 100644 --- a/.github/workflows/python-script.yml +++ b/.github/workflows/python-script.yml @@ -2,7 +2,7 @@ name: Update domain on: schedule: - - cron: '0 0 * * *' + - cron: '0 */8 * * *' workflow_dispatch: jobs: diff --git a/README.md b/README.md index 9629e1f..cc31597 100644 --- a/README.md +++ b/README.md @@ -404,7 +404,7 @@ The `run-container` command mounts also the `config.json` file, so any change to | [Altadefinizione](https://altadefinizione.prof/) | ✅ | | [AnimeUnity](https://animeunity.so/) | ✅ | | [Ilcorsaronero](https://ilcorsaronero.link/) | ✅ | -| [CB01New](https://cb01new.pics/) | ✅ | +| [CB01New](https://cb01new.quest/) | ✅ | | [DDLStreamItaly](https://ddlstreamitaly.co/) | ✅ | | [GuardaSerie](https://guardaserie.academy/) | ✅ | | [MostraGuarda](https://mostraguarda.stream/) | ✅ | diff --git a/StreamingCommunity/Api/Site/ilcorsaronero/util/ilCorsarScraper.py b/StreamingCommunity/Api/Site/ilcorsaronero/util/ilCorsarScraper.py index 4d8049c..9c60757 100644 --- a/StreamingCommunity/Api/Site/ilcorsaronero/util/ilCorsarScraper.py +++ b/StreamingCommunity/Api/Site/ilcorsaronero/util/ilCorsarScraper.py @@ -54,7 +54,7 @@ class IlCorsaroNeroScraper: return response.text except Exception as e: - logging.error(f"Error fetching {url}: {e}") + logging.error(f"Error fetching from {url}: {e}") return None def parse_torrents(self, html: str) -> List[Dict[str, str]]: diff --git a/StreamingCommunity/Api/Template/Util/get_domain.py b/StreamingCommunity/Api/Template/Util/get_domain.py index 4eb8996..1186e97 100644 --- a/StreamingCommunity/Api/Template/Util/get_domain.py +++ b/StreamingCommunity/Api/Template/Util/get_domain.py @@ -1,6 +1,6 @@ # 18.06.24 -from urllib.parse import urlparse +from urllib.parse import urlparse, unquote # External libraries @@ -14,6 +14,21 @@ from StreamingCommunity.Util.console import console, msg from StreamingCommunity.Util._jsonConfig import config_manager +def get_tld(url_str): + """Extract the TLD (Top-Level Domain) from the URL without using external libraries.""" + url_str = unquote(url_str) + + parsed = urlparse(url_str) + domain = parsed.netloc.lower() + if domain.startswith('www.'): + domain = domain[4:] + + parts = domain.split('.') + + if len(parts) >= 2: + return parts[-1] + return None + def get_base_domain(url_str): """Extract base domain without protocol, www and path""" parsed = urlparse(url_str) @@ -49,7 +64,7 @@ def validate_url(url, base_url, max_timeout): ) as client: response = client.get(url) if not check_response(response, 1): - return False + return False, None # Check 2: Follow redirects and verify final domain console.print("[cyan]Checking redirect destination...") @@ -61,33 +76,32 @@ def validate_url(url, base_url, max_timeout): response = client.get(url) if not check_response(response, 2): - return False + return False, None # Compare base domains original_base = get_base_domain(url) final_base = get_base_domain(str(response.url)) console.print(f"[cyan]Comparing domains:") - console.print(f"Original base domain: [yellow]{original_base}") - console.print(f"Final base domain: [yellow]{final_base}") + console.print(f"Original base domain: [yellow]{original_base}.{get_tld(str(url))}") + console.print(f"Final base domain: [yellow]{final_base}.{get_tld(str(response.url))}") if original_base != final_base: - console.print(f"[red]Domain mismatch: Redirected to different base domain") - return False + return False, None - # Verify against expected base_url expected_base = get_base_domain(base_url) if final_base != expected_base: - console.print(f"[red]Domain mismatch: Final domain does not match expected base URL") - console.print(f"Expected: [yellow]{expected_base}") - return False + return False, None + + if get_tld(str(url)) != get_tld(str(response.url)): + return True, get_tld(str(response.url)) console.print(f"[green]All checks passed: URL is valid and matches expected domain") - return True + return True, None except Exception as e: console.print(f"[red]Error during validation: {str(e)}") - return False + return False, None def search_domain(site_name: str, base_url: str, get_first: bool = False): """ @@ -100,7 +114,15 @@ def search_domain(site_name: str, base_url: str, get_first: bool = False): console.print(f"\n[cyan]Testing initial URL[white]: [yellow]{test_url}") try: - if validate_url(test_url, base_url, max_timeout): + is_correct, redirect_tld = validate_url(test_url, base_url, max_timeout) + + if is_correct and redirect_tld is not None: + config_manager.config['SITE'][site_name]['domain'] = redirect_tld + config_manager.write_config() + console.print(f"[green]Successfully validated initial URL") + return redirect_tld, test_url + + if is_correct: parsed_url = urlparse(test_url) tld = parsed_url.netloc.split('.')[-1] config_manager.config['SITE'][site_name]['domain'] = tld @@ -114,24 +136,25 @@ def search_domain(site_name: str, base_url: str, get_first: bool = False): # Google search phase query = base_url.split("/")[-1] console.print(f"\n[cyan]Performing Google search for[white]: [yellow]{query}") - search_results = list(search(query, num_results=15, lang="it")) + search_results = list(search(query, num_results=20, lang="it")) for idx, result_url in enumerate(search_results, 1): - console.print(f"\n[cyan]Checking Google result {idx}/15[white]: [yellow]{result_url}") - - if validate_url(result_url, base_url, max_timeout): - parsed_result = urlparse(result_url) - new_domain = parsed_result.netloc.split(".")[-1] + if get_base_domain(result_url) == get_base_domain(test_url): + console.print(f"\n[cyan]Checking Google result {idx}/20[white]: [yellow]{result_url}") - if get_first or msg.ask( - f"\n[cyan]Do you want to update site[white] [red]'{site_name}'[cyan] with domain[white] [red]'{new_domain}'", - choices=["y", "n"], - default="y" - ).lower() == "y": + if validate_url(result_url, base_url, max_timeout): + parsed_result = urlparse(result_url) + new_domain = parsed_result.netloc.split(".")[-1] - config_manager.config['SITE'][site_name]['domain'] = new_domain - config_manager.write_config() - return new_domain, f"{base_url}.{new_domain}" + if get_first or msg.ask( + f"\n[cyan]Do you want to update site[white] [red]'{site_name}'[cyan] with domain[white] [red]'{new_domain}'", + choices=["y", "n"], + default="y" + ).lower() == "y": + + config_manager.config['SITE'][site_name]['domain'] = new_domain + config_manager.write_config() + return new_domain, f"{base_url}.{new_domain}" console.print("[bold red]No valid URLs found matching the base URL.") return domain, f"{base_url}.{domain}" \ No newline at end of file diff --git a/StreamingCommunity/Lib/Downloader/TOR/downloader.py b/StreamingCommunity/Lib/Downloader/TOR/downloader.py index c23b1bd..08bfd8e 100644 --- a/StreamingCommunity/Lib/Downloader/TOR/downloader.py +++ b/StreamingCommunity/Lib/Downloader/TOR/downloader.py @@ -139,9 +139,8 @@ class TOR_downloader: # Formatta e stampa le informazioni console.print("\n[bold green]🔗 Dettagli Torrent Aggiunto:[/bold green]") - console.print(f"[yellow]Nome:[/yellow] {torrent_info.get('name', torrent_name)}") + console.print(f"[yellow]Name:[/yellow] {torrent_info.get('name', torrent_name)}") console.print(f"[yellow]Hash:[/yellow] {torrent_info['hash']}") - console.print(f"[yellow]Dimensione:[/yellow] {internet_manager.format_file_size(torrent_info.get('size'))}") print() # Salva l'hash per usi successivi e il path @@ -288,7 +287,8 @@ class TOR_downloader: raise # Delete the torrent data - #self.qb.delete_permanently(self.qb.torrents()[-1]['hash']) + time.sleep(5) + self.qb.delete_permanently(self.qb.torrents()[-1]['hash']) return True except Exception as e: diff --git a/Test/call_updateDomain.py b/Test/call_updateDomain.py index f15fe2c..07186e3 100644 --- a/Test/call_updateDomain.py +++ b/Test/call_updateDomain.py @@ -118,16 +118,17 @@ if __name__ == "__main__": for alias, (site_name, use_for) in site_names.items(): original_domain = config_manager.get_list("SITE", alias)['domain'] - if site_name == "animeunity": - domain_to_use, _ = search_domain(site_name=site_name, base_url=f"https://www.{site_name}", get_first=True) - else: - domain_to_use, _ = search_domain(site_name=site_name, base_url=f"https://{site_name}", get_first=True) - - # Update readme - if original_domain != domain_to_use: - print("\n") - print("Return domain: ", domain_to_use) - update_readme(alias, domain_to_use) - - print("------------------------------------") - time.sleep(2) \ No newline at end of file + if site_name != "ilcorsaronero": + if site_name == "animeunity": + domain_to_use, _ = search_domain(site_name=site_name, base_url=f"https://www.{site_name}", get_first=True) + else: + domain_to_use, _ = search_domain(site_name=site_name, base_url=f"https://{site_name}", get_first=True) + + # Update readme + if original_domain != domain_to_use: + print("\n") + print("Return domain: ", domain_to_use) + update_readme(alias, domain_to_use) + + print("------------------------------------") + time.sleep(3) \ No newline at end of file diff --git a/config.json b/config.json index 9216015..9fdba69 100644 --- a/config.json +++ b/config.json @@ -10,7 +10,7 @@ "serie_folder_name": "TV", "map_episode_name": "E%(episode)_%(episode_name)", "config_qbit_tor": { - "host": "192.168.1.58", + "host": "192.168.1.99", "port": "7060", "user": "admin", "pass": "adminadmin" @@ -20,7 +20,7 @@ "not_close": false }, "REQUESTS": { - "timeout": 20, + "timeout": 35, "max_retry": 8, "proxy_start_min": 0.1, "proxy_start_max": 0.5 @@ -81,7 +81,7 @@ "domain": "so" }, "cb01new": { - "domain": "pics" + "domain": "quest" }, "1337xx": { "domain": "to"