From 7b94b17d2dabffe320c51f8d0ab1627e1e8b9f08 Mon Sep 17 00:00:00 2001
From: Lovi <62809003+Lovi-0@users.noreply.github.com>
Date: Mon, 6 Jan 2025 22:17:24 +0100
Subject: [PATCH] Update get_domain

---
 README.md                                     |   2 +-
 StreamingCommunity/Api/Site/1337xx/site.py    |   2 +-
 .../Api/Site/altadefinizione/site.py          |   2 +-
 .../Api/Site/animeunity/site.py               |   2 +-
 StreamingCommunity/Api/Site/cb01new/site.py   |   2 +-
 .../Api/Site/ddlstreamitaly/site.py           |   2 +-
 .../Api/Site/guardaserie/site.py              |   2 +-
 .../Api/Site/ilcorsaronero/site.py            |   2 +-
 .../Api/Site/streamingcommunity/site.py       |   2 +-
 .../Api/Template/Util/get_domain.py           | 229 +++++++++---------
 Test/call_updateDomain.py                     |   2 +-
 config.json                                   |   2 +-
 12 files changed, 121 insertions(+), 130 deletions(-)

diff --git a/README.md b/README.md
index eabde7a..cc31597 100644
--- a/README.md
+++ b/README.md
@@ -406,7 +406,7 @@ The `run-container` command mounts also the `config.json` file, so any change to
 | [Ilcorsaronero](https://ilcorsaronero.link/) |   ✅   |
 | [CB01New](https://cb01new.quest/) |   ✅   |
 | [DDLStreamItaly](https://ddlstreamitaly.co/) |   ✅   |
-| [GuardaSerie](https://guardaserie.com/) |   ✅   |
+| [GuardaSerie](https://guardaserie.academy/) |   ✅   |
 | [MostraGuarda](https://mostraguarda.stream/) |   ✅   |
 | [StreamingCommunity](https://streamingcommunity.prof/) |   ✅   |
 
diff --git a/StreamingCommunity/Api/Site/1337xx/site.py b/StreamingCommunity/Api/Site/1337xx/site.py
index 6b9a54c..4ad4590 100644
--- a/StreamingCommunity/Api/Site/1337xx/site.py
+++ b/StreamingCommunity/Api/Site/1337xx/site.py
@@ -43,7 +43,7 @@ def title_search(word_to_search: str) -> int:
     domain_to_use = DOMAIN_NOW
 
     if not disable_searchDomain:
-        domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}")
+        domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}.{DOMAIN_NOW}")
 
     # Construct the full site URL and load the search page
     try:
diff --git a/StreamingCommunity/Api/Site/altadefinizione/site.py b/StreamingCommunity/Api/Site/altadefinizione/site.py
index 3045fde..20b89fd 100644
--- a/StreamingCommunity/Api/Site/altadefinizione/site.py
+++ b/StreamingCommunity/Api/Site/altadefinizione/site.py
@@ -43,7 +43,7 @@ def title_search(title_search: str) -> int:
     domain_to_use = DOMAIN_NOW
     
     if not disable_searchDomain:
-        domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}")
+        domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}.{DOMAIN_NOW}")
         
     # Send request to search for title
     client = httpx.Client()
diff --git a/StreamingCommunity/Api/Site/animeunity/site.py b/StreamingCommunity/Api/Site/animeunity/site.py
index 0e6d949..c83238c 100644
--- a/StreamingCommunity/Api/Site/animeunity/site.py
+++ b/StreamingCommunity/Api/Site/animeunity/site.py
@@ -110,7 +110,7 @@ def title_search(title: str) -> int:
     domain_to_use = DOMAIN_NOW
     
     if not disable_searchDomain:
-        domain_to_use, base_url = search_domain(SITE_NAME, f"https://www.{SITE_NAME}")
+        domain_to_use, base_url = search_domain(SITE_NAME, f"https://www.{SITE_NAME}.{DOMAIN_NOW}")
     
     data = get_token(SITE_NAME, domain_to_use)
 
diff --git a/StreamingCommunity/Api/Site/cb01new/site.py b/StreamingCommunity/Api/Site/cb01new/site.py
index 20f7173..0b3f910 100644
--- a/StreamingCommunity/Api/Site/cb01new/site.py
+++ b/StreamingCommunity/Api/Site/cb01new/site.py
@@ -42,7 +42,7 @@ def title_search(word_to_search: str) -> int:
     domain_to_use = DOMAIN_NOW
     
     if not disable_searchDomain:
-        domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}")
+        domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}.{DOMAIN_NOW}")
 
     response = httpx.get(
         url=f"https://{SITE_NAME}.{domain_to_use}/?s={word_to_search}",
diff --git a/StreamingCommunity/Api/Site/ddlstreamitaly/site.py b/StreamingCommunity/Api/Site/ddlstreamitaly/site.py
index ab46b72..3fceee0 100644
--- a/StreamingCommunity/Api/Site/ddlstreamitaly/site.py
+++ b/StreamingCommunity/Api/Site/ddlstreamitaly/site.py
@@ -46,7 +46,7 @@ def title_search(word_to_search: str) -> int:
     domain_to_use = DOMAIN_NOW
     
     if not disable_searchDomain:
-        domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}")
+        domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}.{DOMAIN_NOW}")
 
     # Send request to search for titles
     try:
diff --git a/StreamingCommunity/Api/Site/guardaserie/site.py b/StreamingCommunity/Api/Site/guardaserie/site.py
index 25e9eae..8611b19 100644
--- a/StreamingCommunity/Api/Site/guardaserie/site.py
+++ b/StreamingCommunity/Api/Site/guardaserie/site.py
@@ -43,7 +43,7 @@ def title_search(word_to_search: str) -> int:
     domain_to_use = DOMAIN_NOW
 
     if not disable_searchDomain:
-        domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}")
+        domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}.{DOMAIN_NOW}")
 
     # Send request to search for titles
     try:
diff --git a/StreamingCommunity/Api/Site/ilcorsaronero/site.py b/StreamingCommunity/Api/Site/ilcorsaronero/site.py
index 14df4c0..90aa769 100644
--- a/StreamingCommunity/Api/Site/ilcorsaronero/site.py
+++ b/StreamingCommunity/Api/Site/ilcorsaronero/site.py
@@ -38,7 +38,7 @@ async def title_search(word_to_search: str) -> int:
     domain_to_use = DOMAIN_NOW
     
     if not disable_searchDomain:
-        domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}")
+        domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}.{DOMAIN_NOW}")
 
     # Create scraper and collect result
     print("\n")
diff --git a/StreamingCommunity/Api/Site/streamingcommunity/site.py b/StreamingCommunity/Api/Site/streamingcommunity/site.py
index c85ac69..ba75c2a 100644
--- a/StreamingCommunity/Api/Site/streamingcommunity/site.py
+++ b/StreamingCommunity/Api/Site/streamingcommunity/site.py
@@ -81,7 +81,7 @@ def get_version_and_domain():
     domain_to_use = DOMAIN_NOW
 
     if not disable_searchDomain:
-        domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}")
+        domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}.{DOMAIN_NOW}")
 
     version = get_version(domain_to_use)
         
diff --git a/StreamingCommunity/Api/Template/Util/get_domain.py b/StreamingCommunity/Api/Template/Util/get_domain.py
index 6f6ed25..e4088e2 100644
--- a/StreamingCommunity/Api/Template/Util/get_domain.py
+++ b/StreamingCommunity/Api/Template/Util/get_domain.py
@@ -1,5 +1,8 @@
 # 18.06.24
 
+import ssl
+import time
+import certifi
 from urllib.parse import urlparse, unquote
 
 
@@ -26,158 +29,146 @@ base_headers = {
     'sec-fetch-site': 'none',
     'sec-fetch-user': '?1',
     'upgrade-insecure-requests': '1',
-    'user-agent': None
+    'user-agent': ''
 }
 
 
 def get_tld(url_str):
-    """Extract the TLD (Top-Level Domain) from the URL without using external libraries."""
-    url_str = unquote(url_str)
-    
-    parsed = urlparse(url_str)
-    domain = parsed.netloc.lower()
-    if domain.startswith('www.'):
-        domain = domain[4:]
-    
-    parts = domain.split('.')
-    
-    if len(parts) >= 2:
-        return parts[-1]
-    return None
+    """Extract the TLD (Top-Level Domain) from the URL."""
+    try:
+        url_str = unquote(url_str)
+        parsed = urlparse(url_str)
+        domain = parsed.netloc.lower()
+        if domain.startswith('www.'):
+            domain = domain[4:]
+        parts = domain.split('.')
+        return parts[-1] if len(parts) >= 2 else None
+    except Exception:
+        return None
 
 def get_base_domain(url_str):
-    """Extract base domain without protocol, www and path"""
-    parsed = urlparse(url_str)
-    domain = parsed.netloc.lower()
-    if domain.startswith('www.'):
-        domain = domain[4:]
-    return domain.split('.')[0]
+    """Extract base domain without protocol, www and path."""
+    try:
+        parsed = urlparse(url_str)
+        domain = parsed.netloc.lower()
+        if domain.startswith('www.'):
+            domain = domain[4:]
+        # Check if domain has multiple parts separated by dots
+        parts = domain.split('.')
+        if len(parts) > 2:  # Handle subdomains
+            return '.'.join(parts[:-1])  # Return everything except TLD
+        return parts[0]  # Return base domain
+    except Exception:
+        return None
 
 def validate_url(url, base_url, max_timeout, max_retries=5):
-    """
-    Validate if URL is accessible and matches expected base domain, with retry mechanism for 403 errors.
-    """
+    """Validate if URL is accessible and matches expected base domain."""
     console.print(f"\n[cyan]Starting validation for URL[white]: [yellow]{url}")
+    
+    # Verify URL structure matches base_url structure
+    base_domain = get_base_domain(base_url)
+    url_domain = get_base_domain(url)
+    base_headers['user-agent'] = get_headers()
+    
+    if base_domain != url_domain:
+        console.print(f"[red]Domain structure mismatch: {url_domain} != {base_domain}")
+        return False, None
+    
+    # Count dots to ensure we don't have extra subdomains
+    base_dots = base_url.count('.')
+    url_dots = url.count('.')
+    if url_dots > base_dots + 1:  # Allow for one extra dot for TLD change
+        console.print(f"[red]Too many subdomains in URL")
+        return False, None
 
-    def check_response(response, check_num):
-        if response.status_code == 403:
-            console.print(f"[red]Check {check_num} failed: Access forbidden (403)")
-            return False
-        if response.status_code >= 400:
-            console.print(f"[red]Check {check_num} failed: HTTP {response.status_code}")
-            return False
-        console.print(f"[green]Check {check_num} passed: HTTP {response.status_code}")
-        return True
+    client = httpx.Client(
+        verify=certifi.where(),
+        headers=base_headers,
+        timeout=max_timeout
+    )
 
-    retries = 0
-
-    while retries < max_retries:
+    for retry in range(max_retries):
         try:
-            # Check 1: Initial request without following redirects
-            #console.print("[cyan]Performing initial connection check...")
-            base_headers['user-agent'] = get_headers()
-
-            with httpx.Client(
-                headers=base_headers,
-                follow_redirects=False,
-                timeout=max_timeout
-            ) as client:
-                response = client.get(url)
-                if not check_response(response, 1):
-                    if response.status_code == 403:
-                        retries += 1
-                        console.print(f"[yellow]Retrying... Attempt {retries}/{max_retries}")
-                        continue  # Retry on 403 error
-                    return False, None
-
-            # Check 2: Follow redirects and verify final domain
-            #console.print("[cyan]Checking redirect destination...")
-            with httpx.Client(
-                headers=base_headers,
-                follow_redirects=True,
-                timeout=max_timeout
-            ) as client:
-                response = client.get(url)
-                if not check_response(response, 2):
-                    return False, None
-
-                # Compare base domains
-                original_base = get_base_domain(url)
-                final_base = get_base_domain(str(response.url))
-
-                """console.print(f"[cyan]Comparing domains:")
-                console.print(f"Original base domain: [yellow]{original_base}.{get_tld(str(url))}")
-                console.print(f"Final base domain: [yellow]{final_base}.{get_tld(str(response.url))}")"""
-
-                if original_base != final_base:
-                    return False, None
-
-                expected_base = get_base_domain(base_url)
-                if final_base != expected_base:
-                    return False, None
-
-                if get_tld(str(url)) != get_tld(str(response.url)):
-                    return True, get_tld(str(response.url))
-
-                #console.print(f"[green]All checks passed: URL is valid and matches expected domain")
-                return True, None
-
-        except Exception as e:
-            console.print(f"[red]Error during validation: {str(e)}")
-            return False, None
-
-    console.print(f"[red]Maximum retries reached for URL: {url}")
+            time.sleep(2)  # Add delay between retries
+            
+            # Initial check without redirects
+            response = client.get(url, follow_redirects=False)
+            if response.status_code == 403:
+                console.print(f"[red]Check failed (403) - Attempt {retry + 1}/{max_retries}")
+                continue
+                
+            if response.status_code >= 400:
+                console.print(f"[red]Check failed: HTTP {response.status_code}")
+                return False, None
+                
+            # Follow redirects and verify final domain
+            final_response = client.get(url, follow_redirects=True)
+            final_domain = get_base_domain(str(final_response.url))
+            console.print(f"[cyan]Redirect url: [red]{final_response.url}")
+            
+            if final_domain != base_domain:
+                console.print(f"[red]Final domain mismatch: {final_domain} != {base_domain}")
+                return False, None
+                
+            new_tld = get_tld(str(final_response.url))
+            if new_tld != get_tld(url):
+                return True, new_tld
+                
+            return True, None
+            
+        except (httpx.RequestError, ssl.SSLError) as e:
+            console.print(f"[red]Connection error: {str(e)}")
+            time.sleep(2)  # Add delay after error
+            continue
+            
     return False, None
 
 def search_domain(site_name: str, base_url: str, get_first: bool = False):
-    """
-    Search for valid domain matching site name and base URL.
-    """
+    """Search for valid domain matching site name and base URL."""
     max_timeout = config_manager.get_int("REQUESTS", "timeout")
     domain = str(config_manager.get_dict("SITE", site_name)['domain'])
-
+    
+    # Test initial URL
     try:
-        is_correct, redirect_tld = validate_url(base_url, base_url, max_timeout, max_retries=5)
-
-        if is_correct and redirect_tld is not None:
-            config_manager.config['SITE'][site_name]['domain'] = redirect_tld
-            config_manager.write_config()
-            console.print(f"[green]Successfully validated initial URL")
-            return redirect_tld, base_url
-
+        is_correct, redirect_tld = validate_url(base_url, base_url, max_timeout)
         if is_correct:
-            parsed_url = urlparse(base_url)
-            tld = parsed_url.netloc.split('.')[-1]
+            tld = redirect_tld or get_tld(base_url)
             config_manager.config['SITE'][site_name]['domain'] = tld
             config_manager.write_config()
             console.print(f"[green]Successfully validated initial URL")
             return tld, base_url
-
     except Exception as e:
         console.print(f"[red]Error testing initial URL: {str(e)}")
 
     # Google search phase
-    query = base_url.split("/")[-1]
-    console.print(f"\n[cyan]Performing Google search for[white]: [yellow]{query}")
-    search_results = list(search(query, num_results=20, lang="it"))
-
-    for idx, result_url in enumerate(search_results, 1):
-        if get_base_domain(result_url) == get_base_domain(base_url):
-            console.print(f"\n[cyan]Checking Google result {idx}/20[white]: [yellow]{result_url}")
-
-            if validate_url(result_url, base_url, max_timeout):
-                parsed_result = urlparse(result_url)
-                new_domain = parsed_result.netloc.split(".")[-1]
+    base_domain = get_base_domain(base_url)
+    console.print(f"\n[cyan]Searching for alternate domains for[white]: [yellow]{base_domain}")
+    
+    try:
+        search_results = list(search(base_domain, num_results=20, lang="it"))
+        filtered_results = [
+            url for url in search_results 
+            if get_base_domain(url) == base_domain 
+            and url.count('.') <= base_url.count('.') + 1
+        ]
 
+        for idx, result_url in enumerate(filtered_results, 1):
+            console.print(f"\n[cyan]Checking result {idx}/{len(filtered_results)}[white]: [yellow]{result_url}")
+            
+            is_valid, new_tld = validate_url(result_url, base_url, max_timeout)
+            if is_valid:
+                final_tld = new_tld or get_tld(result_url)
                 if get_first or msg.ask(
-                    f"\n[cyan]Do you want to update site[white] [red]'{site_name}'[cyan] with domain[white] [red]'{new_domain}'",
+                    f"\n[cyan]Update site[white] [red]'{site_name}'[cyan] with domain[white] [red]'{final_tld}'",
                     choices=["y", "n"],
                     default="y"
                 ).lower() == "y":
-
-                    config_manager.config['SITE'][site_name]['domain'] = new_domain
+                    config_manager.config['SITE'][site_name]['domain'] = final_tld
                     config_manager.write_config()
-                    return new_domain, f"{base_url}.{new_domain}"
+                    return final_tld, f"{base_url}.{final_tld}"
+                    
+    except Exception as e:
+        console.print(f"[red]Error during search: {str(e)}")
 
     console.print("[bold red]No valid URLs found matching the base URL.")
     return domain, f"{base_url}.{domain}"
\ No newline at end of file
diff --git a/Test/call_updateDomain.py b/Test/call_updateDomain.py
index 355792b..0fb5d6d 100644
--- a/Test/call_updateDomain.py
+++ b/Test/call_updateDomain.py
@@ -123,4 +123,4 @@ if __name__ == "__main__":
                 
             update_readme(alias, domain_to_use)
             print("------------------------------------")
-            time.sleep(3)
\ No newline at end of file
+            time.sleep(2)
\ No newline at end of file
diff --git a/config.json b/config.json
index 70f3e81..9fdba69 100644
--- a/config.json
+++ b/config.json
@@ -64,7 +64,7 @@
             "domain": "prof"
         },
         "guardaserie": {
-            "domain": "com"
+            "domain": "academy"
         },
         "mostraguarda": {
             "domain": "stream"