diff --git a/.github/.domain/domain_update.py b/.github/.domain/domain_update.py
index e4be02d..937661f 100644
--- a/.github/.domain/domain_update.py
+++ b/.github/.domain/domain_update.py
@@ -1,17 +1,14 @@
# 20.04.2024
-import os
import re
-import time
+import os
import json
from datetime import datetime
from urllib.parse import urlparse, urlunparse
-
import httpx
import ua_generator
-
JSON_FILE_PATH = os.path.join(".github", ".domain", "domains.json")
@@ -50,69 +47,137 @@ def get_new_tld(full_url):
return None
-def try_url_with_retries(url_to_try, headers, timeout=15, retries=3, backoff_factor=0.5):
- for attempt in range(retries):
- try:
- with httpx.Client(headers=headers, timeout=timeout, follow_redirects=True) as client:
- response = client.get(url_to_try)
- response.raise_for_status()
- return response
-
- except (httpx.TimeoutException, httpx.ConnectError) as e:
- print(f" [!] Attempt {attempt + 1}/{retries} for {url_to_try}: Network error ({type(e).__name__}). Retrying in {backoff_factor * (2 ** attempt)}s...")
- if attempt + 1 == retries:
- print(f" [!] Failed all {retries} attempts for {url_to_try} due to {type(e).__name__}.")
- return None
- time.sleep(backoff_factor * (2 ** attempt))
+def extract_domain_from_response(response, original_url):
+ if 'location' in response.headers:
+ return response.headers['location']
+
+ if str(response.url) != original_url:
+ return str(response.url)
+
+ try:
+ content_type = response.headers.get('content-type', '').lower()
+ if 'text/html' in content_type or 'text/plain' in content_type:
+ response_text = response.text
- except httpx.HTTPStatusError as http_err:
- if http_err.response.status_code in [403, 429, 503]:
- print(f" [!] HTTP error {http_err.response.status_code} for {url_to_try}. Suspected Cloudflare, checking for ...")
- try:
- with httpx.Client(headers=headers, timeout=timeout, follow_redirects=False) as cf_client:
- cf_page_response = cf_client.get(url_to_try)
- if cf_page_response.status_code != http_err.response.status_code and not (200 <= cf_page_response.status_code < 300) :
- cf_page_response.raise_for_status()
-
- match = re.search(r': {base_href_url}")
- try:
- print(f" [] Attempting request to URL: {base_href_url}")
- with httpx.Client(headers=headers, timeout=timeout, follow_redirects=True) as base_client:
- final_response_from_base = base_client.get(base_href_url)
- final_response_from_base.raise_for_status()
- print(f" [+] Successfully fetched from URL.")
- return final_response_from_base
-
- except httpx.RequestError as base_req_e:
- print(f" [!] Error requesting URL {base_href_url}: {base_req_e}")
- return None
-
- else:
- print(f" [!] No found in page content for {url_to_try}.")
- return None
-
- except httpx.RequestError as cf_req_e:
- print(f" [!] Error fetching Cloudflare-like page content for {url_to_try}: {cf_req_e}")
- return None
-
- else:
- print(f" [!] HTTP error {http_err.response.status_code} for {url_to_try}. No retry.")
- return None
+ js_redirect_patterns = [
+ r'window\.location\.href\s*=\s*["\']([^"\']+)["\']',
+ r'window\.location\s*=\s*["\']([^"\']+)["\']',
+ r'location\.href\s*=\s*["\']([^"\']+)["\']',
+ r'document\.location\s*=\s*["\']([^"\']+)["\']'
+ ]
- except httpx.RequestError as e:
- print(f" [!] Generic error for {url_to_try}: {e}. No retry.")
- return None
-
+ for pattern in js_redirect_patterns:
+ js_match = re.search(pattern, response_text, re.IGNORECASE)
+ if js_match:
+ return js_match.group(1)
+
+ meta_patterns = [
+ r']*http-equiv=["\']?refresh["\']?[^>]*content=["\'][^"\']*url=([^"\'>\s]+)',
+ r']*content=["\'][^"\']*url=([^"\'>\s]+)[^>]*http-equiv=["\']?refresh["\']?'
+ ]
+
+ for pattern in meta_patterns:
+ meta_match = re.search(pattern, response_text, re.IGNORECASE)
+ if meta_match:
+ return meta_match.group(1)
+
+ canonical_match = re.search(r']*rel=["\']?canonical["\']?[^>]*href=["\']([^"\']+)["\']', response_text, re.IGNORECASE)
+ if canonical_match:
+ return canonical_match.group(1)
+
+ base_match = re.search(r']*href=["\']([^"\']+)["\']', response_text, re.IGNORECASE)
+ if base_match:
+ return base_match.group(1)
+
+ error_redirect_patterns = [
+ r'[Rr]edirect(?:ed)?\s+to:?\s*([^\s<>"\']+)',
+ r'[Nn]ew\s+[Uu][Rr][Ll]:?\s*([^\s<>"\']+)',
+ r'[Mm]oved\s+to:?\s*([^\s<>"\']+)',
+ r'[Ff]ound\s+at:?\s*([^\s<>"\']+)'
+ ]
+
+ for pattern in error_redirect_patterns:
+ error_match = re.search(pattern, response_text)
+ if error_match:
+ potential_url = error_match.group(1)
+ if potential_url.startswith(('http://', 'https://', '//')):
+ return potential_url
+
+ except Exception as e:
+ print(f" [!] Error extracting from response content: {e}")
+
return None
+def try_url(url_to_try, headers, timeout=15):
+ try:
+ with httpx.Client(headers=headers, timeout=timeout, follow_redirects=False) as client:
+ response = client.get(url_to_try)
+
+ if response.status_code in [301, 302, 303, 307, 308]:
+ location = response.headers.get('location')
+ if location:
+ print(f" [+] Found redirect ({response.status_code}) to: {location}")
+ try:
+ final_response = client.get(location)
+ if 200 <= final_response.status_code < 400:
+ return final_response
+ else:
+ return httpx.Response(
+ status_code=200,
+ headers={"location": location},
+ content=b"",
+ request=response.request
+ )
+ except Exception:
+ return httpx.Response(
+ status_code=200,
+ headers={"location": location},
+ content=b"",
+ request=response.request
+ )
+
+ elif response.status_code in [403, 409, 429, 503]:
+ print(f" [!] HTTP {response.status_code} - attempting to extract redirect info")
+
+ location = response.headers.get('location')
+ if location:
+ print(f" [+] Found location header in error response: {location}")
+ return httpx.Response(
+ status_code=200,
+ headers={"location": location},
+ content=b"",
+ request=response.request
+ )
+
+ new_url = extract_domain_from_response(response, url_to_try)
+ if new_url and new_url != url_to_try:
+ print(f" [+] Found redirect URL in error response content: {new_url}")
+ return httpx.Response(
+ status_code=200,
+ headers={"location": new_url},
+ content=b"",
+ request=response.request
+ )
+
+ if 200 <= response.status_code < 400:
+ return response
+
+ print(f" [!] HTTP {response.status_code} for {url_to_try}")
+
+ except httpx.HTTPStatusError as http_err:
+ new_url = extract_domain_from_response(http_err.response, url_to_try)
+ if new_url:
+ print(f" [+] Found new URL from HTTPStatusError response: {new_url}")
+ return httpx.Response(
+ status_code=200,
+ headers={"location": new_url},
+ content=b"",
+ request=http_err.request
+ )
+ except Exception as e:
+ print(f" [!] Error for {url_to_try}: {type(e).__name__}")
+
+ return None
def update_domain_entries(data):
if not data:
@@ -135,100 +200,47 @@ def update_domain_entries(data):
print(f" [] Stored URL: {original_full_url}")
if original_domain_in_entry:
print(f" [] Stored Domain (TLD): {original_domain_in_entry}")
-
- potential_urls_to_try = []
- potential_urls_to_try.append(("Original", original_full_url))
-
- try:
- parsed_original = urlparse(original_full_url)
-
- current_netloc = parsed_original.netloc
- if current_netloc.startswith("www."):
- varied_netloc = current_netloc[4:]
- potential_urls_to_try.append(("Without www", urlunparse(parsed_original._replace(netloc=varied_netloc))))
- else:
- varied_netloc = "www." + current_netloc
- potential_urls_to_try.append(("With www", urlunparse(parsed_original._replace(netloc=varied_netloc))))
-
- current_path = parsed_original.path
- if not current_path:
- potential_urls_to_try.append(("With trailing slash", urlunparse(parsed_original._replace(path='/'))))
- elif current_path.endswith('/'):
- potential_urls_to_try.append(("Without trailing slash", urlunparse(parsed_original._replace(path=current_path[:-1]))))
- else:
- potential_urls_to_try.append(("With trailing slash", urlunparse(parsed_original._replace(path=current_path + '/'))))
-
- except Exception as e:
- print(f" [!] Error generating URL variations: {e}")
-
- entry_updated_in_this_run = False
- seen_urls_for_entry = set()
- unique_potential_urls = []
- for label, url_val in potential_urls_to_try:
- if url_val not in seen_urls_for_entry:
- unique_potential_urls.append((label, url_val))
- seen_urls_for_entry.add(url_val)
-
- parsed_original_for_http_check = urlparse(original_full_url)
- if parsed_original_for_http_check.scheme == 'https':
- http_url = urlunparse(parsed_original_for_http_check._replace(scheme='http'))
- if http_url not in seen_urls_for_entry:
- unique_potential_urls.append(("HTTP Fallback", http_url))
+ print(f" [] Testing URL: {original_full_url}")
+ response = try_url(original_full_url, current_headers)
- for label, url_to_check in unique_potential_urls:
- if entry_updated_in_this_run:
- break
+ if response:
+ final_url_from_request = str(response.url)
+ print(f" [+] Redirect/Response to: {final_url_from_request}")
+
+ parsed_final_url = urlparse(final_url_from_request)
+ normalized_full_url = urlunparse(parsed_final_url._replace(path='/', params='', query='', fragment=''))
+ if parsed_final_url.path == '' and not normalized_full_url.endswith('/'):
+ normalized_full_url += '/'
- print(f" [] Testing URL ({label}): {url_to_check}")
- response = try_url_with_retries(url_to_check, current_headers)
+ if normalized_full_url != final_url_from_request:
+ print(f" [+] Normalized URL: {normalized_full_url}")
- if response:
- final_url_from_request = str(response.url)
- print(f" [+] Redirect/Response to: {final_url_from_request}")
-
- parsed_final_url = urlparse(final_url_from_request)
- normalized_full_url = urlunparse(parsed_final_url._replace(path='/', params='', query='', fragment=''))
- if parsed_final_url.path == '' and not normalized_full_url.endswith('/'):
- normalized_full_url += '/'
+ if normalized_full_url != original_full_url:
+ new_tld_val = get_new_tld(final_url_from_request)
- if normalized_full_url != final_url_from_request:
- print(f" [+] Normalized URL: {normalized_full_url}")
-
- if normalized_full_url != original_full_url:
- new_tld_val = get_new_tld(final_url_from_request)
+ if new_tld_val:
+ entry["full_url"] = normalized_full_url
- if new_tld_val:
- entry["full_url"] = normalized_full_url
-
- if new_tld_val != original_domain_in_entry:
- print(f" [-] Domain TLD Changed: '{original_domain_in_entry}' -> '{new_tld_val}'")
- entry["old_domain"] = original_domain_in_entry if original_domain_in_entry else entry.get("old_domain", "")
- entry["domain"] = new_tld_val
- entry["time_change"] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- print(f" [-] Domain & URL Updated: New TLD '{new_tld_val}', New URL '{normalized_full_url}'")
-
- else:
- entry["domain"] = new_tld_val
- print(f" [-] URL Updated (TLD Unchanged '{new_tld_val}'): New URL '{normalized_full_url}'")
-
- updated_count += 1
- entry_updated_in_this_run = True
-
+ if new_tld_val != original_domain_in_entry:
+ print(f" [-] Domain TLD Changed: '{original_domain_in_entry}' -> '{new_tld_val}'")
+ entry["old_domain"] = original_domain_in_entry if original_domain_in_entry else entry.get("old_domain", "")
+ entry["domain"] = new_tld_val
+ entry["time_change"] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+ print(f" [-] Domain & URL Updated: New TLD '{new_tld_val}', New URL '{normalized_full_url}'")
else:
- print(f" [!] Could not extract TLD from {final_url_from_request}. URL not updated despite potential change.")
+ entry["domain"] = new_tld_val
+ print(f" [-] URL Updated (TLD Unchanged '{new_tld_val}'): New URL '{normalized_full_url}'")
+
+ updated_count += 1
+
else:
- if final_url_from_request != original_full_url:
- print(f" [] Same Domain (after normalization): {final_url_from_request} -> {normalized_full_url}")
+ print(f" [!] Could not extract TLD from {final_url_from_request}. URL not updated despite potential change.")
+ else:
+ print(f" [] Same Domain: {final_url_from_request}")
- else:
- print(f" [] Same Domain: {final_url_from_request}")
-
- if label == "Original" or normalized_full_url == original_full_url :
- entry_updated_in_this_run = True
-
- if not entry_updated_in_this_run:
- print(f" [-] No Update for {key} after {len(unique_potential_urls)} attempts.")
+ else:
+ print(f" [-] No response for {key}")
return updated_count > 0
@@ -240,10 +252,8 @@ def main():
if update_domain_entries(domain_data):
save_domains(JSON_FILE_PATH, domain_data)
print("\nUpdate complete. Some entries were modified.")
-
else:
print("\nUpdate complete. No domains were modified.")
-
else:
print("\nCannot proceed without domain data.")
diff --git a/.github/workflows/update_domain.yml b/.github/workflows/update_domain.yml
index 3d7a0bc..231c795 100644
--- a/.github/workflows/update_domain.yml
+++ b/.github/workflows/update_domain.yml
@@ -1,8 +1,8 @@
-name: Aggiorna Domini Periodicamente
+name: Update domains
on:
schedule:
- - cron: "*/45 * * * *"
+ - cron: "0 */2 * * *"
workflow_dispatch:
jobs:
@@ -12,37 +12,38 @@ jobs:
contents: write
steps:
- - name: Checkout del codice
+ - name: Checkout code
uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v5
with:
- python-version: '3.12'
+ python-version: '3.12'
+
+ - name: Install dependencies
+ run: |
+ pip install httpx ua-generator requests
+ pip install --upgrade pip setuptools wheel
- - name: Installa dipendenze
- run: pip install httpx ua-generator
-
- - name: Configura DNS
+ - name: Configure DNS
run: |
sudo sh -c 'echo "nameserver 9.9.9.9" > /etc/resolv.conf'
- sudo sh -c 'echo "nameserver 149.112.112.122" >> /etc/resolv.conf'
cat /etc/resolv.conf
- - name: Esegui lo script di aggiornamento domini
- run: python domain_updater.py
+ - name: Execute domain update script
+ run: python .github/.domain/domain_update.py
- - name: Commit e Push delle modifiche (se presenti)
+ - name: Commit and push changes (if any)
run: |
git config --global user.name 'github-actions[bot]'
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
- # Controlla se domain.json รจ stato modificato
- if ! git diff --quiet domain.json; then
- git add domain.json
- git commit -m "Aggiornamento automatico domini [skip ci]"
- echo "Modifiche committate. Tentativo di push..."
+ # Check if domains.json was modified
+ if ! git diff --quiet .github/.domain/domains.json; then
+ git add .github/.domain/domains.json
+ git commit -m "Automatic domain update [skip ci]"
+ echo "Changes committed. Attempting to push..."
git push
else
- echo "Nessuna modifica a domain.json da committare."
- fi
+ echo "No changes to .github/.domain/domains.json to commit."
+ fi
\ No newline at end of file