Remove database of domain

2025-07-18 16:10:01 +00:00 · 2025-05-31 10:52:16 +02:00 · 2025-05-31 10:52:16 +02:00 · ded66f446e
commit ded66f446e
parent 86c7293779
4 changed files with 331 additions and 18 deletions
--- a/.github/.domain/domain_update.py
+++ b/.github/.domain/domain_update.py
@ -0,0 +1,253 @@
+# 20.04.2024
+
+import os
+import re
+import time
+import json
+from datetime import datetime
+from urllib.parse import urlparse, urlunparse
+
+
+import httpx
+import ua_generator
+
+
+JSON_FILE_PATH = os.path.join(".github", ".domain", "domains.json")
+
+
+def load_domains(file_path):
+    if not os.path.exists(file_path):
+        print(f"Error: The file {file_path} was not found.")
+        return None
+    
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            return json.load(f)
+
+    except Exception as e:
+        print(f"Error reading the file {file_path}: {e}")
+        return None
+
+def save_domains(file_path, data):
+    try:
+        with open(file_path, 'w', encoding='utf-8') as f:
+            json.dump(data, f, indent=2, ensure_ascii=False)
+        print(f"Data successfully saved to {file_path}")
+
+    except Exception as e:
+        print(f"Error saving the file {file_path}: {e}")
+
+def get_new_tld(full_url):
+    try:
+        parsed_url = urlparse(full_url)
+        hostname = parsed_url.hostname
+        if hostname:
+            parts = hostname.split('.')
+            return parts[-1]
+        
+    except Exception:
+        pass
+
+    return None
+
+def try_url_with_retries(url_to_try, headers, timeout=15, retries=3, backoff_factor=0.5):
+    for attempt in range(retries):
+        try:
+            with httpx.Client(headers=headers, timeout=timeout, follow_redirects=True) as client:
+                response = client.get(url_to_try)
+                response.raise_for_status()
+            return response
+        
+        except (httpx.TimeoutException, httpx.ConnectError) as e:
+            print(f"  [!] Attempt {attempt + 1}/{retries} for {url_to_try}: Network error ({type(e).__name__}). Retrying in {backoff_factor * (2 ** attempt)}s...")
+            if attempt + 1 == retries:
+                print(f"  [!] Failed all {retries} attempts for {url_to_try} due to {type(e).__name__}.")
+                return None
+            time.sleep(backoff_factor * (2 ** attempt))
+            
+        except httpx.HTTPStatusError as http_err:
+            if http_err.response.status_code in [403, 429, 503]:
+                print(f"  [!] HTTP error {http_err.response.status_code} for {url_to_try}. Suspected Cloudflare, checking for <base href>...")
+                try:
+                    with httpx.Client(headers=headers, timeout=timeout, follow_redirects=False) as cf_client:
+                        cf_page_response = cf_client.get(url_to_try)
+                        if cf_page_response.status_code != http_err.response.status_code and not (200 <= cf_page_response.status_code < 300) :
+                            cf_page_response.raise_for_status()
+
+                    match = re.search(r'<base\s+href="([^"]+)"', cf_page_response.text, re.IGNORECASE)
+                    if match:
+                        base_href_url = match.group(1)
+                        parsed_base_href = urlparse(base_href_url)
+                        if not parsed_base_href.scheme or not parsed_base_href.netloc:
+                            original_parsed_url = urlparse(url_to_try)
+                            base_href_url = urlunparse(original_parsed_url._replace(path=base_href_url if base_href_url.startswith('/') else '/' + base_href_url, query='', fragment=''))
+
+                        print(f"    [+] Found <base href>: {base_href_url}")
+                        try:
+                            print(f"    [] Attempting request to <base href> URL: {base_href_url}")
+                            with httpx.Client(headers=headers, timeout=timeout, follow_redirects=True) as base_client:
+                                final_response_from_base = base_client.get(base_href_url)
+                                final_response_from_base.raise_for_status()
+                            print(f"    [+] Successfully fetched from <base href> URL.")
+                            return final_response_from_base
+                        
+                        except httpx.RequestError as base_req_e:
+                            print(f"    [!] Error requesting <base href> URL {base_href_url}: {base_req_e}")
+                            return None
+                        
+                    else:
+                        print(f"    [!] No <base href> found in page content for {url_to_try}.")
+                        return None
+                    
+                except httpx.RequestError as cf_req_e:
+                    print(f"    [!] Error fetching Cloudflare-like page content for {url_to_try}: {cf_req_e}")
+                    return None
+                
+            else:
+                print(f"  [!] HTTP error {http_err.response.status_code} for {url_to_try}. No retry.")
+                return None
+            
+        except httpx.RequestError as e:
+            print(f"  [!] Generic error for {url_to_try}: {e}. No retry.")
+            return None
+        
+    return None
+
+
+def update_domain_entries(data):
+    if not data:
+        return False
+
+    updated_count = 0
+
+    for key, entry in data.items():
+        print(f"\n--- [DOMAIN] {key} ---")
+        original_full_url = entry.get("full_url")
+        original_domain_in_entry = entry.get("domain")
+
+        if not original_full_url:
+            print(f"  [!] 'full_url' missing. Skipped.")
+            continue
+
+        ua = ua_generator.generate(device=('desktop', 'mobile'), browser=('chrome', 'edge', 'firefox', 'safari'))
+        current_headers = ua.headers.get()
+
+        print(f"  [] Stored URL: {original_full_url}")
+        if original_domain_in_entry:
+            print(f"  [] Stored Domain (TLD): {original_domain_in_entry}")
+
+        potential_urls_to_try = []
+        potential_urls_to_try.append(("Original", original_full_url))
+
+        try:
+            parsed_original = urlparse(original_full_url)
+            
+            current_netloc = parsed_original.netloc
+            if current_netloc.startswith("www."):
+                varied_netloc = current_netloc[4:]
+                potential_urls_to_try.append(("Without www", urlunparse(parsed_original._replace(netloc=varied_netloc))))
+            else:
+                varied_netloc = "www." + current_netloc
+                potential_urls_to_try.append(("With www", urlunparse(parsed_original._replace(netloc=varied_netloc))))
+
+            current_path = parsed_original.path
+            if not current_path:
+                potential_urls_to_try.append(("With trailing slash", urlunparse(parsed_original._replace(path='/'))))
+            elif current_path.endswith('/'):
+                potential_urls_to_try.append(("Without trailing slash", urlunparse(parsed_original._replace(path=current_path[:-1]))))
+            else:
+                potential_urls_to_try.append(("With trailing slash", urlunparse(parsed_original._replace(path=current_path + '/'))))
+            
+        except Exception as e:
+            print(f"  [!] Error generating URL variations: {e}")
+
+        entry_updated_in_this_run = False
+        
+        seen_urls_for_entry = set()
+        unique_potential_urls = []
+        for label, url_val in potential_urls_to_try:
+            if url_val not in seen_urls_for_entry:
+                unique_potential_urls.append((label, url_val))
+                seen_urls_for_entry.add(url_val)
+        
+        parsed_original_for_http_check = urlparse(original_full_url)
+        if parsed_original_for_http_check.scheme == 'https':
+            http_url = urlunparse(parsed_original_for_http_check._replace(scheme='http'))
+            if http_url not in seen_urls_for_entry:
+                unique_potential_urls.append(("HTTP Fallback", http_url))
+
+        for label, url_to_check in unique_potential_urls:
+            if entry_updated_in_this_run:
+                break
+            
+            print(f"  [] Testing URL ({label}): {url_to_check}")
+            response = try_url_with_retries(url_to_check, current_headers)
+
+            if response:
+                final_url_from_request = str(response.url)
+                print(f"    [+] Redirect/Response to: {final_url_from_request}")
+
+                parsed_final_url = urlparse(final_url_from_request)
+                normalized_full_url = urlunparse(parsed_final_url._replace(path='/', params='', query='', fragment=''))
+                if parsed_final_url.path == '' and not normalized_full_url.endswith('/'):
+                    normalized_full_url += '/'
+                
+                if normalized_full_url != final_url_from_request:
+                    print(f"    [+] Normalized URL: {normalized_full_url}")
+
+                if normalized_full_url != original_full_url:
+                    new_tld_val = get_new_tld(final_url_from_request)
+                    
+                    if new_tld_val:
+                        entry["full_url"] = normalized_full_url
+                        
+                        if new_tld_val != original_domain_in_entry:
+                            print(f"    [-] Domain TLD Changed: '{original_domain_in_entry}' -> '{new_tld_val}'")
+                            entry["old_domain"] = original_domain_in_entry if original_domain_in_entry else entry.get("old_domain", "")
+                            entry["domain"] = new_tld_val
+                            entry["time_change"] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+                            print(f"    [-] Domain & URL Updated: New TLD '{new_tld_val}', New URL '{normalized_full_url}'")
+
+                        else:
+                            entry["domain"] = new_tld_val
+                            print(f"    [-] URL Updated (TLD Unchanged '{new_tld_val}'): New URL '{normalized_full_url}'")
+                        
+                        updated_count += 1
+                        entry_updated_in_this_run = True
+
+                    else:
+                        print(f"    [!] Could not extract TLD from {final_url_from_request}. URL not updated despite potential change.")
+                else:
+                    if final_url_from_request != original_full_url:
+                        print(f"    [] Same Domain (after normalization): {final_url_from_request} -> {normalized_full_url}")
+
+                    else:
+                        print(f"    [] Same Domain: {final_url_from_request}")
+                    
+                    if label == "Original" or normalized_full_url == original_full_url :
+                        entry_updated_in_this_run = True
+
+        if not entry_updated_in_this_run:
+            print(f"  [-] No Update for {key} after {len(unique_potential_urls)} attempts.")
+        
+    return updated_count > 0
+
+def main():
+    print("Starting domain update script...")
+    domain_data = load_domains(JSON_FILE_PATH)
+
+    if domain_data:
+        if update_domain_entries(domain_data):
+            save_domains(JSON_FILE_PATH, domain_data)
+            print("\nUpdate complete. Some entries were modified.")
+
+        else:
+            print("\nUpdate complete. No domains were modified.")
+
+    else:
+        print("\nCannot proceed without domain data.")
+    
+    print("Script finished.")
+
+if __name__ == "__main__":
+    main()
--- a/.github/.domain/domains.json
+++ b/.github/.domain/domains.json
@ -0,0 +1,62 @@
+{
+  "1337xx": {
+    "domain": "to",
+    "full_url": "https://www.1337xx.to/",
+    "old_domain": "to",
+    "time_change": "2025-03-19 12:20:19"
+  },
+  "cb01new": {
+    "domain": "download",
+    "full_url": "https://cb01net.download/",
+    "old_domain": "my",
+    "time_change": "2025-05-26 22:23:24"
+  },
+  "animeunity": {
+    "domain": "so",
+    "full_url": "https://www.animeunity.so/",
+    "old_domain": "so",
+    "time_change": "2025-03-19 12:20:23"
+  },
+  "animeworld": {
+    "domain": "ac",
+    "full_url": "https://www.animeworld.ac/",
+    "old_domain": "ac",
+    "time_change": "2025-03-21 12:20:27"
+  },
+  "guardaserie": {
+    "domain": "meme",
+    "full_url": "https://guardaserie.meme/",
+    "old_domain": "meme",
+    "time_change": "2025-03-19 12:20:24"
+  },
+  "ddlstreamitaly": {
+    "domain": "co",
+    "full_url": "https://ddlstreamitaly.co/",
+    "old_domain": "co",
+    "time_change": "2025-03-19 12:20:26"
+  },
+  "streamingwatch": {
+    "domain": "org",
+    "full_url": "https://www.streamingwatch.org/",
+    "old_domain": "org",
+    "time_change": "2025-04-29 12:30:30"
+  },
+  "altadefinizione": {
+    "domain": "spa",
+    "full_url": "https://altadefinizione.spa/",
+    "old_domain": "locker",
+    "time_change": "2025-05-26 23:22:45"
+  },
+  "streamingcommunity": {
+    "domain": "blog",
+    "full_url": "https://streamingunity.blog/",
+    "old_domain": "to",
+    "time_change": "2025-05-31 10:45:55"
+  },
+  "altadefinizionegratis": {
+    "domain": "icu",
+    "full_url": "https://altadefinizionegratis.icu/",
+    "old_domain": "taipei",
+    "time_change": "2025-05-18 11:21:05"
+  }
+}
--- a/.gitignore
+++ b/.gitignore
@ -52,5 +52,4 @@ cmd.txt
 bot_config.json
 scripts.json
 active_requests.json
-domains.json
 working_proxies.json
--- a/StreamingCommunity/Util/config_json.py
+++ b/StreamingCommunity/Util/config_json.py
@ -268,33 +268,32 @@ class ConfigManager:
            self._load_site_data_from_file()
    
    def _load_site_data_from_api(self) -> None:
-        """Load site data from API."""
+        """Load site data from GitHub."""
+        domains_github_url = "https://raw.githubusercontent.com/Arrowar/StreamingCommunity/refs/heads/main/.github/.domain/domains.json"
        headers = {
-            "apikey": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Inp2Zm5ncG94d3Jnc3duenl0YWRoIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NDAxNTIxNjMsImV4cCI6MjA1NTcyODE2M30.FNTCCMwi0QaKjOu8gtZsT5yQttUW8QiDDGXmzkn89QE",
-            "Authorization": f"Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Inp2Zm5ncG94d3Jnc3duenl0YWRoIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NDAxNTIxNjMsImV4cCI6MjA1NTcyODE2M30.FNTCCMwi0QaKjOu8gtZsT5yQttUW8QiDDGXmzkn89QE",
-            "Content-Type": "application/json",
-            "User-Agent":  get_userAgent()
+            "User-Agent": get_userAgent()
        }
        
        try:
-            console.print("[bold cyan]Retrieving site data from API...[/bold cyan]")
-            response = requests.get("https://zvfngpoxwrgswnzytadh.supabase.co/rest/v1/public", timeout=8, headers=headers)
+            console.print(f"[bold cyan]Retrieving site data from GitHub:[/bold cyan] [green]{domains_github_url}[/green]")
+            response = requests.get(domains_github_url, timeout=8, headers=headers)

            if response.ok:
-                data = response.json()
-                if data and len(data) > 0:
-                    self.configSite = data[0]['data']
-                    
-                    site_count = len(self.configSite) if isinstance(self.configSite, dict) else 0
-                    
-                else:
-                    console.print("[bold yellow]API returned an empty data set[/bold yellow]")
+                self.configSite = response.json()
+                
+                site_count = len(self.configSite) if isinstance(self.configSite, dict) else 0
+                console.print(f"[bold green]Site data loaded from GitHub:[/bold green] {site_count} streaming services found.")
+                
            else:
-                console.print(f"[bold red]API request failed:[/bold red] HTTP {response.status_code}, {response.text[:100]}")
+                console.print(f"[bold red]GitHub request failed:[/bold red] HTTP {response.status_code}, {response.text[:100]}")
                self._handle_site_data_fallback()
        
+        except json.JSONDecodeError as e:
+            console.print(f"[bold red]Error parsing JSON from GitHub:[/bold red] {str(e)}")
+            self._handle_site_data_fallback()
+            
        except Exception as e:
-            console.print(f"[bold red]API connection error:[/bold red] {str(e)}")
+            console.print(f"[bold red]GitHub connection error:[/bold red] {str(e)}")
            self._handle_site_data_fallback()
    
    def _load_site_data_from_file(self) -> None: