From d929a79941670f9430259fb445dc94c2c0e814bc Mon Sep 17 00:00:00 2001 From: Lovi <62809003+Lovi-0@users.noreply.github.com> Date: Mon, 6 Jan 2025 18:50:24 +0100 Subject: [PATCH] Update get_domain --- .github/workflows/python-script.yml | 2 +- README.md | 2 +- .../Api/Template/Util/get_domain.py | 122 +++++++++--------- Test/call_updateDomain.py | 11 +- config.json | 2 +- 5 files changed, 69 insertions(+), 70 deletions(-) diff --git a/.github/workflows/python-script.yml b/.github/workflows/python-script.yml index c844bfa..a644411 100644 --- a/.github/workflows/python-script.yml +++ b/.github/workflows/python-script.yml @@ -2,7 +2,7 @@ name: Update domain on: schedule: - - cron: '0 */8 * * *' + - cron: '0 */6 * * *' workflow_dispatch: jobs: diff --git a/README.md b/README.md index df26349..cc31597 100644 --- a/README.md +++ b/README.md @@ -406,7 +406,7 @@ The `run-container` command mounts also the `config.json` file, so any change to | [Ilcorsaronero](https://ilcorsaronero.link/) | ✅ | | [CB01New](https://cb01new.quest/) | ✅ | | [DDLStreamItaly](https://ddlstreamitaly.co/) | ✅ | -| [GuardaSerie](https://guardaserie.help/) | ✅ | +| [GuardaSerie](https://guardaserie.academy/) | ✅ | | [MostraGuarda](https://mostraguarda.stream/) | ✅ | | [StreamingCommunity](https://streamingcommunity.prof/) | ✅ | diff --git a/StreamingCommunity/Api/Template/Util/get_domain.py b/StreamingCommunity/Api/Template/Util/get_domain.py index 1186e97..338c010 100644 --- a/StreamingCommunity/Api/Template/Util/get_domain.py +++ b/StreamingCommunity/Api/Template/Util/get_domain.py @@ -37,12 +37,12 @@ def get_base_domain(url_str): domain = domain[4:] return domain.split('.')[0] -def validate_url(url, base_url, max_timeout): +def validate_url(url, base_url, max_timeout, max_retries=5): """ - Validate if URL is accessible and matches expected base domain + Validate if URL is accessible and matches expected base domain, with retry mechanism for 403 errors. """ console.print(f"\n[cyan]Starting validation for URL[white]: [yellow]{url}") - + def check_response(response, check_num): if response.status_code == 403: console.print(f"[red]Check {check_num} failed: Access forbidden (403)") @@ -53,55 +53,63 @@ def validate_url(url, base_url, max_timeout): console.print(f"[green]Check {check_num} passed: HTTP {response.status_code}") return True - try: - - # Check 1: Initial request without following redirects - console.print("[cyan]Performing initial connection check...") - with httpx.Client( - headers={'User-Agent': get_headers()}, - follow_redirects=False, - timeout=max_timeout - ) as client: - response = client.get(url) - if not check_response(response, 1): - return False, None + retries = 0 - # Check 2: Follow redirects and verify final domain - console.print("[cyan]Checking redirect destination...") - with httpx.Client( - headers={'User-Agent': get_headers()}, - follow_redirects=True, - timeout=max_timeout - ) as client: - - response = client.get(url) - if not check_response(response, 2): - return False, None - - # Compare base domains - original_base = get_base_domain(url) - final_base = get_base_domain(str(response.url)) - - console.print(f"[cyan]Comparing domains:") - console.print(f"Original base domain: [yellow]{original_base}.{get_tld(str(url))}") - console.print(f"Final base domain: [yellow]{final_base}.{get_tld(str(response.url))}") - - if original_base != final_base: - return False, None - - expected_base = get_base_domain(base_url) - if final_base != expected_base: - return False, None + while retries < max_retries: + try: + # Check 1: Initial request without following redirects + #console.print("[cyan]Performing initial connection check...") + with httpx.Client( + headers={'User-Agent': get_headers()}, + follow_redirects=False, + timeout=max_timeout + ) as client: + response = client.get(url) + if not check_response(response, 1): + if response.status_code == 403: + retries += 1 + console.print(f"[yellow]Retrying... Attempt {retries}/{max_retries}") + continue # Retry on 403 error + return False, None - if get_tld(str(url)) != get_tld(str(response.url)): - return True, get_tld(str(response.url)) - - console.print(f"[green]All checks passed: URL is valid and matches expected domain") - return True, None - - except Exception as e: - console.print(f"[red]Error during validation: {str(e)}") - return False, None + # Check 2: Follow redirects and verify final domain + #console.print("[cyan]Checking redirect destination...") + with httpx.Client( + headers={'User-Agent': get_headers()}, + follow_redirects=True, + timeout=max_timeout + ) as client: + response = client.get(url) + if not check_response(response, 2): + return False, None + + # Compare base domains + original_base = get_base_domain(url) + final_base = get_base_domain(str(response.url)) + + """console.print(f"[cyan]Comparing domains:") + console.print(f"Original base domain: [yellow]{original_base}.{get_tld(str(url))}") + console.print(f"Final base domain: [yellow]{final_base}.{get_tld(str(response.url))}")""" + + if original_base != final_base: + return False, None + + expected_base = get_base_domain(base_url) + if final_base != expected_base: + return False, None + + if get_tld(str(url)) != get_tld(str(response.url)): + return True, get_tld(str(response.url)) + + #console.print(f"[green]All checks passed: URL is valid and matches expected domain") + return True, None + + except Exception as e: + console.print(f"[red]Error during validation: {str(e)}") + return False, None + + console.print(f"[red]Maximum retries reached for URL: {url}") + return False, None def search_domain(site_name: str, base_url: str, get_first: bool = False): """ @@ -111,10 +119,8 @@ def search_domain(site_name: str, base_url: str, get_first: bool = False): domain = str(config_manager.get_dict("SITE", site_name)['domain']) test_url = f"{base_url}.{domain}" - console.print(f"\n[cyan]Testing initial URL[white]: [yellow]{test_url}") - try: - is_correct, redirect_tld = validate_url(test_url, base_url, max_timeout) + is_correct, redirect_tld = validate_url(test_url, base_url, max_timeout, max_retries=5) if is_correct and redirect_tld is not None: config_manager.config['SITE'][site_name]['domain'] = redirect_tld @@ -129,7 +135,7 @@ def search_domain(site_name: str, base_url: str, get_first: bool = False): config_manager.write_config() console.print(f"[green]Successfully validated initial URL") return tld, test_url - + except Exception as e: console.print(f"[red]Error testing initial URL: {str(e)}") @@ -137,21 +143,21 @@ def search_domain(site_name: str, base_url: str, get_first: bool = False): query = base_url.split("/")[-1] console.print(f"\n[cyan]Performing Google search for[white]: [yellow]{query}") search_results = list(search(query, num_results=20, lang="it")) - + for idx, result_url in enumerate(search_results, 1): if get_base_domain(result_url) == get_base_domain(test_url): console.print(f"\n[cyan]Checking Google result {idx}/20[white]: [yellow]{result_url}") - + if validate_url(result_url, base_url, max_timeout): parsed_result = urlparse(result_url) new_domain = parsed_result.netloc.split(".")[-1] - + if get_first or msg.ask( f"\n[cyan]Do you want to update site[white] [red]'{site_name}'[cyan] with domain[white] [red]'{new_domain}'", choices=["y", "n"], default="y" ).lower() == "y": - + config_manager.config['SITE'][site_name]['domain'] = new_domain config_manager.write_config() return new_domain, f"{base_url}.{new_domain}" diff --git a/Test/call_updateDomain.py b/Test/call_updateDomain.py index 07186e3..a3cf3f5 100644 --- a/Test/call_updateDomain.py +++ b/Test/call_updateDomain.py @@ -97,14 +97,12 @@ def update_readme(site_names, domain_to_use): alias = f"{site_name.lower()}" if alias in site_names: - print("Update line: ", line) - if site_name == "animeunity": updated_line = f"| [{site_name}](https://www.{alias}.{domain_to_use}/) | ✅ |\n" else: updated_line = f"| [{site_name}](https://{alias}.{domain_to_use}/) | ✅ |\n" - print("To: ", updated_line.strip()) + print("Update: ", updated_line.strip()) updated_lines.append(updated_line) continue @@ -124,11 +122,6 @@ if __name__ == "__main__": else: domain_to_use, _ = search_domain(site_name=site_name, base_url=f"https://{site_name}", get_first=True) - # Update readme - if original_domain != domain_to_use: - print("\n") - print("Return domain: ", domain_to_use) - update_readme(alias, domain_to_use) - + update_readme(alias, domain_to_use) print("------------------------------------") time.sleep(3) \ No newline at end of file diff --git a/config.json b/config.json index 00baa84..9fdba69 100644 --- a/config.json +++ b/config.json @@ -64,7 +64,7 @@ "domain": "prof" }, "guardaserie": { - "domain": "help" + "domain": "academy" }, "mostraguarda": { "domain": "stream"