Remove database of domain

This commit is contained in:
Lovi 2025-05-31 10:52:16 +02:00
parent 86c7293779
commit ded66f446e
4 changed files with 331 additions and 18 deletions

253
.github/.domain/domain_update.py vendored Normal file
View File

@ -0,0 +1,253 @@
# 20.04.2024
import os
import re
import time
import json
from datetime import datetime
from urllib.parse import urlparse, urlunparse
import httpx
import ua_generator
JSON_FILE_PATH = os.path.join(".github", ".domain", "domains.json")
def load_domains(file_path):
if not os.path.exists(file_path):
print(f"Error: The file {file_path} was not found.")
return None
try:
with open(file_path, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception as e:
print(f"Error reading the file {file_path}: {e}")
return None
def save_domains(file_path, data):
try:
with open(file_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
print(f"Data successfully saved to {file_path}")
except Exception as e:
print(f"Error saving the file {file_path}: {e}")
def get_new_tld(full_url):
try:
parsed_url = urlparse(full_url)
hostname = parsed_url.hostname
if hostname:
parts = hostname.split('.')
return parts[-1]
except Exception:
pass
return None
def try_url_with_retries(url_to_try, headers, timeout=15, retries=3, backoff_factor=0.5):
for attempt in range(retries):
try:
with httpx.Client(headers=headers, timeout=timeout, follow_redirects=True) as client:
response = client.get(url_to_try)
response.raise_for_status()
return response
except (httpx.TimeoutException, httpx.ConnectError) as e:
print(f" [!] Attempt {attempt + 1}/{retries} for {url_to_try}: Network error ({type(e).__name__}). Retrying in {backoff_factor * (2 ** attempt)}s...")
if attempt + 1 == retries:
print(f" [!] Failed all {retries} attempts for {url_to_try} due to {type(e).__name__}.")
return None
time.sleep(backoff_factor * (2 ** attempt))
except httpx.HTTPStatusError as http_err:
if http_err.response.status_code in [403, 429, 503]:
print(f" [!] HTTP error {http_err.response.status_code} for {url_to_try}. Suspected Cloudflare, checking for <base href>...")
try:
with httpx.Client(headers=headers, timeout=timeout, follow_redirects=False) as cf_client:
cf_page_response = cf_client.get(url_to_try)
if cf_page_response.status_code != http_err.response.status_code and not (200 <= cf_page_response.status_code < 300) :
cf_page_response.raise_for_status()
match = re.search(r'<base\s+href="([^"]+)"', cf_page_response.text, re.IGNORECASE)
if match:
base_href_url = match.group(1)
parsed_base_href = urlparse(base_href_url)
if not parsed_base_href.scheme or not parsed_base_href.netloc:
original_parsed_url = urlparse(url_to_try)
base_href_url = urlunparse(original_parsed_url._replace(path=base_href_url if base_href_url.startswith('/') else '/' + base_href_url, query='', fragment=''))
print(f" [+] Found <base href>: {base_href_url}")
try:
print(f" [] Attempting request to <base href> URL: {base_href_url}")
with httpx.Client(headers=headers, timeout=timeout, follow_redirects=True) as base_client:
final_response_from_base = base_client.get(base_href_url)
final_response_from_base.raise_for_status()
print(f" [+] Successfully fetched from <base href> URL.")
return final_response_from_base
except httpx.RequestError as base_req_e:
print(f" [!] Error requesting <base href> URL {base_href_url}: {base_req_e}")
return None
else:
print(f" [!] No <base href> found in page content for {url_to_try}.")
return None
except httpx.RequestError as cf_req_e:
print(f" [!] Error fetching Cloudflare-like page content for {url_to_try}: {cf_req_e}")
return None
else:
print(f" [!] HTTP error {http_err.response.status_code} for {url_to_try}. No retry.")
return None
except httpx.RequestError as e:
print(f" [!] Generic error for {url_to_try}: {e}. No retry.")
return None
return None
def update_domain_entries(data):
if not data:
return False
updated_count = 0
for key, entry in data.items():
print(f"\n--- [DOMAIN] {key} ---")
original_full_url = entry.get("full_url")
original_domain_in_entry = entry.get("domain")
if not original_full_url:
print(f" [!] 'full_url' missing. Skipped.")
continue
ua = ua_generator.generate(device=('desktop', 'mobile'), browser=('chrome', 'edge', 'firefox', 'safari'))
current_headers = ua.headers.get()
print(f" [] Stored URL: {original_full_url}")
if original_domain_in_entry:
print(f" [] Stored Domain (TLD): {original_domain_in_entry}")
potential_urls_to_try = []
potential_urls_to_try.append(("Original", original_full_url))
try:
parsed_original = urlparse(original_full_url)
current_netloc = parsed_original.netloc
if current_netloc.startswith("www."):
varied_netloc = current_netloc[4:]
potential_urls_to_try.append(("Without www", urlunparse(parsed_original._replace(netloc=varied_netloc))))
else:
varied_netloc = "www." + current_netloc
potential_urls_to_try.append(("With www", urlunparse(parsed_original._replace(netloc=varied_netloc))))
current_path = parsed_original.path
if not current_path:
potential_urls_to_try.append(("With trailing slash", urlunparse(parsed_original._replace(path='/'))))
elif current_path.endswith('/'):
potential_urls_to_try.append(("Without trailing slash", urlunparse(parsed_original._replace(path=current_path[:-1]))))
else:
potential_urls_to_try.append(("With trailing slash", urlunparse(parsed_original._replace(path=current_path + '/'))))
except Exception as e:
print(f" [!] Error generating URL variations: {e}")
entry_updated_in_this_run = False
seen_urls_for_entry = set()
unique_potential_urls = []
for label, url_val in potential_urls_to_try:
if url_val not in seen_urls_for_entry:
unique_potential_urls.append((label, url_val))
seen_urls_for_entry.add(url_val)
parsed_original_for_http_check = urlparse(original_full_url)
if parsed_original_for_http_check.scheme == 'https':
http_url = urlunparse(parsed_original_for_http_check._replace(scheme='http'))
if http_url not in seen_urls_for_entry:
unique_potential_urls.append(("HTTP Fallback", http_url))
for label, url_to_check in unique_potential_urls:
if entry_updated_in_this_run:
break
print(f" [] Testing URL ({label}): {url_to_check}")
response = try_url_with_retries(url_to_check, current_headers)
if response:
final_url_from_request = str(response.url)
print(f" [+] Redirect/Response to: {final_url_from_request}")
parsed_final_url = urlparse(final_url_from_request)
normalized_full_url = urlunparse(parsed_final_url._replace(path='/', params='', query='', fragment=''))
if parsed_final_url.path == '' and not normalized_full_url.endswith('/'):
normalized_full_url += '/'
if normalized_full_url != final_url_from_request:
print(f" [+] Normalized URL: {normalized_full_url}")
if normalized_full_url != original_full_url:
new_tld_val = get_new_tld(final_url_from_request)
if new_tld_val:
entry["full_url"] = normalized_full_url
if new_tld_val != original_domain_in_entry:
print(f" [-] Domain TLD Changed: '{original_domain_in_entry}' -> '{new_tld_val}'")
entry["old_domain"] = original_domain_in_entry if original_domain_in_entry else entry.get("old_domain", "")
entry["domain"] = new_tld_val
entry["time_change"] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print(f" [-] Domain & URL Updated: New TLD '{new_tld_val}', New URL '{normalized_full_url}'")
else:
entry["domain"] = new_tld_val
print(f" [-] URL Updated (TLD Unchanged '{new_tld_val}'): New URL '{normalized_full_url}'")
updated_count += 1
entry_updated_in_this_run = True
else:
print(f" [!] Could not extract TLD from {final_url_from_request}. URL not updated despite potential change.")
else:
if final_url_from_request != original_full_url:
print(f" [] Same Domain (after normalization): {final_url_from_request} -> {normalized_full_url}")
else:
print(f" [] Same Domain: {final_url_from_request}")
if label == "Original" or normalized_full_url == original_full_url :
entry_updated_in_this_run = True
if not entry_updated_in_this_run:
print(f" [-] No Update for {key} after {len(unique_potential_urls)} attempts.")
return updated_count > 0
def main():
print("Starting domain update script...")
domain_data = load_domains(JSON_FILE_PATH)
if domain_data:
if update_domain_entries(domain_data):
save_domains(JSON_FILE_PATH, domain_data)
print("\nUpdate complete. Some entries were modified.")
else:
print("\nUpdate complete. No domains were modified.")
else:
print("\nCannot proceed without domain data.")
print("Script finished.")
if __name__ == "__main__":
main()

62
.github/.domain/domains.json vendored Normal file
View File

@ -0,0 +1,62 @@
{
"1337xx": {
"domain": "to",
"full_url": "https://www.1337xx.to/",
"old_domain": "to",
"time_change": "2025-03-19 12:20:19"
},
"cb01new": {
"domain": "download",
"full_url": "https://cb01net.download/",
"old_domain": "my",
"time_change": "2025-05-26 22:23:24"
},
"animeunity": {
"domain": "so",
"full_url": "https://www.animeunity.so/",
"old_domain": "so",
"time_change": "2025-03-19 12:20:23"
},
"animeworld": {
"domain": "ac",
"full_url": "https://www.animeworld.ac/",
"old_domain": "ac",
"time_change": "2025-03-21 12:20:27"
},
"guardaserie": {
"domain": "meme",
"full_url": "https://guardaserie.meme/",
"old_domain": "meme",
"time_change": "2025-03-19 12:20:24"
},
"ddlstreamitaly": {
"domain": "co",
"full_url": "https://ddlstreamitaly.co/",
"old_domain": "co",
"time_change": "2025-03-19 12:20:26"
},
"streamingwatch": {
"domain": "org",
"full_url": "https://www.streamingwatch.org/",
"old_domain": "org",
"time_change": "2025-04-29 12:30:30"
},
"altadefinizione": {
"domain": "spa",
"full_url": "https://altadefinizione.spa/",
"old_domain": "locker",
"time_change": "2025-05-26 23:22:45"
},
"streamingcommunity": {
"domain": "blog",
"full_url": "https://streamingunity.blog/",
"old_domain": "to",
"time_change": "2025-05-31 10:45:55"
},
"altadefinizionegratis": {
"domain": "icu",
"full_url": "https://altadefinizionegratis.icu/",
"old_domain": "taipei",
"time_change": "2025-05-18 11:21:05"
}
}

1
.gitignore vendored
View File

@ -52,5 +52,4 @@ cmd.txt
bot_config.json
scripts.json
active_requests.json
domains.json
working_proxies.json

View File

@ -268,33 +268,32 @@ class ConfigManager:
self._load_site_data_from_file()
def _load_site_data_from_api(self) -> None:
"""Load site data from API."""
"""Load site data from GitHub."""
domains_github_url = "https://raw.githubusercontent.com/Arrowar/StreamingCommunity/refs/heads/main/.github/.domain/domains.json"
headers = {
"apikey": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Inp2Zm5ncG94d3Jnc3duenl0YWRoIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NDAxNTIxNjMsImV4cCI6MjA1NTcyODE2M30.FNTCCMwi0QaKjOu8gtZsT5yQttUW8QiDDGXmzkn89QE",
"Authorization": f"Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Inp2Zm5ncG94d3Jnc3duenl0YWRoIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NDAxNTIxNjMsImV4cCI6MjA1NTcyODE2M30.FNTCCMwi0QaKjOu8gtZsT5yQttUW8QiDDGXmzkn89QE",
"Content-Type": "application/json",
"User-Agent": get_userAgent()
"User-Agent": get_userAgent()
}
try:
console.print("[bold cyan]Retrieving site data from API...[/bold cyan]")
response = requests.get("https://zvfngpoxwrgswnzytadh.supabase.co/rest/v1/public", timeout=8, headers=headers)
console.print(f"[bold cyan]Retrieving site data from GitHub:[/bold cyan] [green]{domains_github_url}[/green]")
response = requests.get(domains_github_url, timeout=8, headers=headers)
if response.ok:
data = response.json()
if data and len(data) > 0:
self.configSite = data[0]['data']
site_count = len(self.configSite) if isinstance(self.configSite, dict) else 0
else:
console.print("[bold yellow]API returned an empty data set[/bold yellow]")
self.configSite = response.json()
site_count = len(self.configSite) if isinstance(self.configSite, dict) else 0
console.print(f"[bold green]Site data loaded from GitHub:[/bold green] {site_count} streaming services found.")
else:
console.print(f"[bold red]API request failed:[/bold red] HTTP {response.status_code}, {response.text[:100]}")
console.print(f"[bold red]GitHub request failed:[/bold red] HTTP {response.status_code}, {response.text[:100]}")
self._handle_site_data_fallback()
except json.JSONDecodeError as e:
console.print(f"[bold red]Error parsing JSON from GitHub:[/bold red] {str(e)}")
self._handle_site_data_fallback()
except Exception as e:
console.print(f"[bold red]API connection error:[/bold red] {str(e)}")
console.print(f"[bold red]GitHub connection error:[/bold red] {str(e)}")
self._handle_site_data_fallback()
def _load_site_data_from_file(self) -> None: