diff --git a/.github/.domain/domain_update.py b/.github/.domain/domain_update.py
index 937661f..52c2ded 100644
--- a/.github/.domain/domain_update.py
+++ b/.github/.domain/domain_update.py
@@ -1,5 +1,3 @@
-# 20.04.2024
-
import re
import os
import json
@@ -47,6 +45,90 @@ def get_new_tld(full_url):
return None
+def get_enhanced_headers():
+ ua = ua_generator.generate(device='desktop', browser='chrome')
+ headers = ua.headers.get()
+
+ additional_headers = {
+ 'DNT': '1',
+ 'Upgrade-Insecure-Requests': '1',
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
+ 'Accept-Language': 'en-US,en;q=0.9,it;q=0.8',
+ 'Accept-Encoding': 'gzip, deflate, br',
+ 'Cache-Control': 'max-age=0',
+ 'Connection': 'keep-alive',
+ 'Referer': 'https://www.google.com/',
+ }
+
+ headers.update(additional_headers)
+ return headers
+
+def extract_redirect_from_403(response, original_url):
+ redirect_headers = ['location', 'refresh', 'x-redirect-to', 'x-location', 'redirect']
+ for header in redirect_headers:
+ if header in response.headers:
+ return response.headers[header]
+
+ try:
+ content = response.text
+
+ js_patterns = [
+ r'window\.location\.href\s*=\s*["\']([^"\']+)["\']',
+ r'window\.location\s*=\s*["\']([^"\']+)["\']',
+ r'location\.href\s*=\s*["\']([^"\']+)["\']',
+ r'document\.location\s*=\s*["\']([^"\']+)["\']',
+ r'top\.location\.href\s*=\s*["\']([^"\']+)["\']',
+ r'parent\.location\s*=\s*["\']([^"\']+)["\']'
+ ]
+
+ for pattern in js_patterns:
+ match = re.search(pattern, content, re.IGNORECASE)
+ if match:
+ return match.group(1)
+
+ meta_patterns = [
+ r']*http-equiv=["\']?refresh["\']?[^>]*content=["\'][^"\']*url=([^"\'>\s]+)',
+ r']*content=["\'][^"\']*url=([^"\'>\s]+)[^>]*http-equiv=["\']?refresh["\']?'
+ ]
+
+ for pattern in meta_patterns:
+ match = re.search(pattern, content, re.IGNORECASE)
+ if match:
+ return match.group(1)
+
+ text_patterns = [
+ r'[Rr]edirect(?:ed)?\s+to:?\s*([^\s<>"\']+)',
+ r'[Nn]ew\s+[Uu][Rr][Ll]:?\s*([^\s<>"\']+)',
+ r'[Mm]oved\s+to:?\s*([^\s<>"\']+)',
+ r'[Ff]ound\s+at:?\s*([^\s<>"\']+)',
+ r'[Gg]o\s+to:?\s*([^\s<>"\']+)',
+ r'[Vv]isit:?\s*([^\s<>"\']+)',
+ r'https?://[^\s<>"\']+\.[a-z]{2,}[^\s<>"\']*'
+ ]
+
+ for pattern in text_patterns:
+ match = re.search(pattern, content)
+ if match:
+ potential_url = match.group(1) if '(' in pattern else match.group(0)
+ if potential_url.startswith(('http://', 'https://', '//')):
+ return potential_url
+
+ link_patterns = [
+ r']*href=["\']([^"\']+)["\'][^>]*>(?:click here|continue|proceed|go here)',
+ r']*rel=["\']?canonical["\']?[^>]*href=["\']([^"\']+)["\']',
+ r']*href=["\']([^"\']+)["\']'
+ ]
+
+ for pattern in link_patterns:
+ match = re.search(pattern, content, re.IGNORECASE)
+ if match:
+ return match.group(1)
+
+ except Exception:
+ pass
+
+ return None
+
def extract_domain_from_response(response, original_url):
if 'location' in response.headers:
return response.headers['location']
@@ -108,7 +190,10 @@ def extract_domain_from_response(response, original_url):
return None
-def try_url(url_to_try, headers, timeout=15):
+def try_url(url_to_try, headers=None, timeout=15):
+ if headers is None:
+ headers = get_enhanced_headers()
+
try:
with httpx.Client(headers=headers, timeout=timeout, follow_redirects=False) as client:
response = client.get(url_to_try)
@@ -136,7 +221,20 @@ def try_url(url_to_try, headers, timeout=15):
request=response.request
)
- elif response.status_code in [403, 409, 429, 503]:
+ elif response.status_code == 403:
+ print(f" [!] HTTP 403 - attempting enhanced extraction")
+
+ redirect_url = extract_redirect_from_403(response, url_to_try)
+ if redirect_url:
+ print(f" [+] Found redirect URL in 403 response: {redirect_url}")
+ return httpx.Response(
+ status_code=200,
+ headers={"location": redirect_url},
+ content=b"",
+ request=response.request
+ )
+
+ elif response.status_code in [409, 429, 503]:
print(f" [!] HTTP {response.status_code} - attempting to extract redirect info")
location = response.headers.get('location')
@@ -194,15 +292,12 @@ def update_domain_entries(data):
print(f" [!] 'full_url' missing. Skipped.")
continue
- ua = ua_generator.generate(device=('desktop', 'mobile'), browser=('chrome', 'edge', 'firefox', 'safari'))
- current_headers = ua.headers.get()
-
print(f" [] Stored URL: {original_full_url}")
if original_domain_in_entry:
print(f" [] Stored Domain (TLD): {original_domain_in_entry}")
print(f" [] Testing URL: {original_full_url}")
- response = try_url(original_full_url, current_headers)
+ response = try_url(original_full_url)
if response:
final_url_from_request = str(response.url)
diff --git a/.github/workflows/update_domain.yml b/.github/workflows/update_domain.yml
index 231c795..205596d 100644
--- a/.github/workflows/update_domain.yml
+++ b/.github/workflows/update_domain.yml
@@ -2,7 +2,7 @@ name: Update domains
on:
schedule:
- - cron: "0 */2 * * *"
+ - cron: "0 */3 * * *"
workflow_dispatch:
jobs:
diff --git a/StreamingCommunity/Upload/version.py b/StreamingCommunity/Upload/version.py
index 535de5b..da2e3af 100644
--- a/StreamingCommunity/Upload/version.py
+++ b/StreamingCommunity/Upload/version.py
@@ -1,5 +1,5 @@
__title__ = 'StreamingCommunity'
-__version__ = '3.0.8'
+__version__ = '3.0.9'
__author__ = 'Arrowar'
__description__ = 'A command-line program to download film'
__copyright__ = 'Copyright 2024'
diff --git a/setup.py b/setup.py
index 1fe021a..760cae5 100644
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@ with open(os.path.join(os.path.dirname(__file__), "requirements.txt"), "r", enco
setup(
name="StreamingCommunity",
- version="3.0.8",
+ version="3.0.9",
long_description=read_readme(),
long_description_content_type="text/markdown",
author="Lovi-0",