mirror of
https://github.com/Arrowar/StreamingCommunity.git
synced 2025-06-06 19:45:24 +00:00
Update get_domain
This commit is contained in:
parent
5a091d94d7
commit
7b94b17d2d
@ -406,7 +406,7 @@ The `run-container` command mounts also the `config.json` file, so any change to
|
|||||||
| [Ilcorsaronero](https://ilcorsaronero.link/) | ✅ |
|
| [Ilcorsaronero](https://ilcorsaronero.link/) | ✅ |
|
||||||
| [CB01New](https://cb01new.quest/) | ✅ |
|
| [CB01New](https://cb01new.quest/) | ✅ |
|
||||||
| [DDLStreamItaly](https://ddlstreamitaly.co/) | ✅ |
|
| [DDLStreamItaly](https://ddlstreamitaly.co/) | ✅ |
|
||||||
| [GuardaSerie](https://guardaserie.com/) | ✅ |
|
| [GuardaSerie](https://guardaserie.academy/) | ✅ |
|
||||||
| [MostraGuarda](https://mostraguarda.stream/) | ✅ |
|
| [MostraGuarda](https://mostraguarda.stream/) | ✅ |
|
||||||
| [StreamingCommunity](https://streamingcommunity.prof/) | ✅ |
|
| [StreamingCommunity](https://streamingcommunity.prof/) | ✅ |
|
||||||
|
|
||||||
|
@ -43,7 +43,7 @@ def title_search(word_to_search: str) -> int:
|
|||||||
domain_to_use = DOMAIN_NOW
|
domain_to_use = DOMAIN_NOW
|
||||||
|
|
||||||
if not disable_searchDomain:
|
if not disable_searchDomain:
|
||||||
domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}")
|
domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}.{DOMAIN_NOW}")
|
||||||
|
|
||||||
# Construct the full site URL and load the search page
|
# Construct the full site URL and load the search page
|
||||||
try:
|
try:
|
||||||
|
@ -43,7 +43,7 @@ def title_search(title_search: str) -> int:
|
|||||||
domain_to_use = DOMAIN_NOW
|
domain_to_use = DOMAIN_NOW
|
||||||
|
|
||||||
if not disable_searchDomain:
|
if not disable_searchDomain:
|
||||||
domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}")
|
domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}.{DOMAIN_NOW}")
|
||||||
|
|
||||||
# Send request to search for title
|
# Send request to search for title
|
||||||
client = httpx.Client()
|
client = httpx.Client()
|
||||||
|
@ -110,7 +110,7 @@ def title_search(title: str) -> int:
|
|||||||
domain_to_use = DOMAIN_NOW
|
domain_to_use = DOMAIN_NOW
|
||||||
|
|
||||||
if not disable_searchDomain:
|
if not disable_searchDomain:
|
||||||
domain_to_use, base_url = search_domain(SITE_NAME, f"https://www.{SITE_NAME}")
|
domain_to_use, base_url = search_domain(SITE_NAME, f"https://www.{SITE_NAME}.{DOMAIN_NOW}")
|
||||||
|
|
||||||
data = get_token(SITE_NAME, domain_to_use)
|
data = get_token(SITE_NAME, domain_to_use)
|
||||||
|
|
||||||
|
@ -42,7 +42,7 @@ def title_search(word_to_search: str) -> int:
|
|||||||
domain_to_use = DOMAIN_NOW
|
domain_to_use = DOMAIN_NOW
|
||||||
|
|
||||||
if not disable_searchDomain:
|
if not disable_searchDomain:
|
||||||
domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}")
|
domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}.{DOMAIN_NOW}")
|
||||||
|
|
||||||
response = httpx.get(
|
response = httpx.get(
|
||||||
url=f"https://{SITE_NAME}.{domain_to_use}/?s={word_to_search}",
|
url=f"https://{SITE_NAME}.{domain_to_use}/?s={word_to_search}",
|
||||||
|
@ -46,7 +46,7 @@ def title_search(word_to_search: str) -> int:
|
|||||||
domain_to_use = DOMAIN_NOW
|
domain_to_use = DOMAIN_NOW
|
||||||
|
|
||||||
if not disable_searchDomain:
|
if not disable_searchDomain:
|
||||||
domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}")
|
domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}.{DOMAIN_NOW}")
|
||||||
|
|
||||||
# Send request to search for titles
|
# Send request to search for titles
|
||||||
try:
|
try:
|
||||||
|
@ -43,7 +43,7 @@ def title_search(word_to_search: str) -> int:
|
|||||||
domain_to_use = DOMAIN_NOW
|
domain_to_use = DOMAIN_NOW
|
||||||
|
|
||||||
if not disable_searchDomain:
|
if not disable_searchDomain:
|
||||||
domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}")
|
domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}.{DOMAIN_NOW}")
|
||||||
|
|
||||||
# Send request to search for titles
|
# Send request to search for titles
|
||||||
try:
|
try:
|
||||||
|
@ -38,7 +38,7 @@ async def title_search(word_to_search: str) -> int:
|
|||||||
domain_to_use = DOMAIN_NOW
|
domain_to_use = DOMAIN_NOW
|
||||||
|
|
||||||
if not disable_searchDomain:
|
if not disable_searchDomain:
|
||||||
domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}")
|
domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}.{DOMAIN_NOW}")
|
||||||
|
|
||||||
# Create scraper and collect result
|
# Create scraper and collect result
|
||||||
print("\n")
|
print("\n")
|
||||||
|
@ -81,7 +81,7 @@ def get_version_and_domain():
|
|||||||
domain_to_use = DOMAIN_NOW
|
domain_to_use = DOMAIN_NOW
|
||||||
|
|
||||||
if not disable_searchDomain:
|
if not disable_searchDomain:
|
||||||
domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}")
|
domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}.{DOMAIN_NOW}")
|
||||||
|
|
||||||
version = get_version(domain_to_use)
|
version = get_version(domain_to_use)
|
||||||
|
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
# 18.06.24
|
# 18.06.24
|
||||||
|
|
||||||
|
import ssl
|
||||||
|
import time
|
||||||
|
import certifi
|
||||||
from urllib.parse import urlparse, unquote
|
from urllib.parse import urlparse, unquote
|
||||||
|
|
||||||
|
|
||||||
@ -26,158 +29,146 @@ base_headers = {
|
|||||||
'sec-fetch-site': 'none',
|
'sec-fetch-site': 'none',
|
||||||
'sec-fetch-user': '?1',
|
'sec-fetch-user': '?1',
|
||||||
'upgrade-insecure-requests': '1',
|
'upgrade-insecure-requests': '1',
|
||||||
'user-agent': None
|
'user-agent': ''
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_tld(url_str):
|
def get_tld(url_str):
|
||||||
"""Extract the TLD (Top-Level Domain) from the URL without using external libraries."""
|
"""Extract the TLD (Top-Level Domain) from the URL."""
|
||||||
url_str = unquote(url_str)
|
try:
|
||||||
|
url_str = unquote(url_str)
|
||||||
parsed = urlparse(url_str)
|
parsed = urlparse(url_str)
|
||||||
domain = parsed.netloc.lower()
|
domain = parsed.netloc.lower()
|
||||||
if domain.startswith('www.'):
|
if domain.startswith('www.'):
|
||||||
domain = domain[4:]
|
domain = domain[4:]
|
||||||
|
parts = domain.split('.')
|
||||||
parts = domain.split('.')
|
return parts[-1] if len(parts) >= 2 else None
|
||||||
|
except Exception:
|
||||||
if len(parts) >= 2:
|
return None
|
||||||
return parts[-1]
|
|
||||||
return None
|
|
||||||
|
|
||||||
def get_base_domain(url_str):
|
def get_base_domain(url_str):
|
||||||
"""Extract base domain without protocol, www and path"""
|
"""Extract base domain without protocol, www and path."""
|
||||||
parsed = urlparse(url_str)
|
try:
|
||||||
domain = parsed.netloc.lower()
|
parsed = urlparse(url_str)
|
||||||
if domain.startswith('www.'):
|
domain = parsed.netloc.lower()
|
||||||
domain = domain[4:]
|
if domain.startswith('www.'):
|
||||||
return domain.split('.')[0]
|
domain = domain[4:]
|
||||||
|
# Check if domain has multiple parts separated by dots
|
||||||
|
parts = domain.split('.')
|
||||||
|
if len(parts) > 2: # Handle subdomains
|
||||||
|
return '.'.join(parts[:-1]) # Return everything except TLD
|
||||||
|
return parts[0] # Return base domain
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
def validate_url(url, base_url, max_timeout, max_retries=5):
|
def validate_url(url, base_url, max_timeout, max_retries=5):
|
||||||
"""
|
"""Validate if URL is accessible and matches expected base domain."""
|
||||||
Validate if URL is accessible and matches expected base domain, with retry mechanism for 403 errors.
|
|
||||||
"""
|
|
||||||
console.print(f"\n[cyan]Starting validation for URL[white]: [yellow]{url}")
|
console.print(f"\n[cyan]Starting validation for URL[white]: [yellow]{url}")
|
||||||
|
|
||||||
|
# Verify URL structure matches base_url structure
|
||||||
|
base_domain = get_base_domain(base_url)
|
||||||
|
url_domain = get_base_domain(url)
|
||||||
|
base_headers['user-agent'] = get_headers()
|
||||||
|
|
||||||
|
if base_domain != url_domain:
|
||||||
|
console.print(f"[red]Domain structure mismatch: {url_domain} != {base_domain}")
|
||||||
|
return False, None
|
||||||
|
|
||||||
|
# Count dots to ensure we don't have extra subdomains
|
||||||
|
base_dots = base_url.count('.')
|
||||||
|
url_dots = url.count('.')
|
||||||
|
if url_dots > base_dots + 1: # Allow for one extra dot for TLD change
|
||||||
|
console.print(f"[red]Too many subdomains in URL")
|
||||||
|
return False, None
|
||||||
|
|
||||||
def check_response(response, check_num):
|
client = httpx.Client(
|
||||||
if response.status_code == 403:
|
verify=certifi.where(),
|
||||||
console.print(f"[red]Check {check_num} failed: Access forbidden (403)")
|
headers=base_headers,
|
||||||
return False
|
timeout=max_timeout
|
||||||
if response.status_code >= 400:
|
)
|
||||||
console.print(f"[red]Check {check_num} failed: HTTP {response.status_code}")
|
|
||||||
return False
|
|
||||||
console.print(f"[green]Check {check_num} passed: HTTP {response.status_code}")
|
|
||||||
return True
|
|
||||||
|
|
||||||
retries = 0
|
for retry in range(max_retries):
|
||||||
|
|
||||||
while retries < max_retries:
|
|
||||||
try:
|
try:
|
||||||
# Check 1: Initial request without following redirects
|
time.sleep(2) # Add delay between retries
|
||||||
#console.print("[cyan]Performing initial connection check...")
|
|
||||||
base_headers['user-agent'] = get_headers()
|
# Initial check without redirects
|
||||||
|
response = client.get(url, follow_redirects=False)
|
||||||
with httpx.Client(
|
if response.status_code == 403:
|
||||||
headers=base_headers,
|
console.print(f"[red]Check failed (403) - Attempt {retry + 1}/{max_retries}")
|
||||||
follow_redirects=False,
|
continue
|
||||||
timeout=max_timeout
|
|
||||||
) as client:
|
if response.status_code >= 400:
|
||||||
response = client.get(url)
|
console.print(f"[red]Check failed: HTTP {response.status_code}")
|
||||||
if not check_response(response, 1):
|
return False, None
|
||||||
if response.status_code == 403:
|
|
||||||
retries += 1
|
# Follow redirects and verify final domain
|
||||||
console.print(f"[yellow]Retrying... Attempt {retries}/{max_retries}")
|
final_response = client.get(url, follow_redirects=True)
|
||||||
continue # Retry on 403 error
|
final_domain = get_base_domain(str(final_response.url))
|
||||||
return False, None
|
console.print(f"[cyan]Redirect url: [red]{final_response.url}")
|
||||||
|
|
||||||
# Check 2: Follow redirects and verify final domain
|
if final_domain != base_domain:
|
||||||
#console.print("[cyan]Checking redirect destination...")
|
console.print(f"[red]Final domain mismatch: {final_domain} != {base_domain}")
|
||||||
with httpx.Client(
|
return False, None
|
||||||
headers=base_headers,
|
|
||||||
follow_redirects=True,
|
new_tld = get_tld(str(final_response.url))
|
||||||
timeout=max_timeout
|
if new_tld != get_tld(url):
|
||||||
) as client:
|
return True, new_tld
|
||||||
response = client.get(url)
|
|
||||||
if not check_response(response, 2):
|
return True, None
|
||||||
return False, None
|
|
||||||
|
except (httpx.RequestError, ssl.SSLError) as e:
|
||||||
# Compare base domains
|
console.print(f"[red]Connection error: {str(e)}")
|
||||||
original_base = get_base_domain(url)
|
time.sleep(2) # Add delay after error
|
||||||
final_base = get_base_domain(str(response.url))
|
continue
|
||||||
|
|
||||||
"""console.print(f"[cyan]Comparing domains:")
|
|
||||||
console.print(f"Original base domain: [yellow]{original_base}.{get_tld(str(url))}")
|
|
||||||
console.print(f"Final base domain: [yellow]{final_base}.{get_tld(str(response.url))}")"""
|
|
||||||
|
|
||||||
if original_base != final_base:
|
|
||||||
return False, None
|
|
||||||
|
|
||||||
expected_base = get_base_domain(base_url)
|
|
||||||
if final_base != expected_base:
|
|
||||||
return False, None
|
|
||||||
|
|
||||||
if get_tld(str(url)) != get_tld(str(response.url)):
|
|
||||||
return True, get_tld(str(response.url))
|
|
||||||
|
|
||||||
#console.print(f"[green]All checks passed: URL is valid and matches expected domain")
|
|
||||||
return True, None
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
console.print(f"[red]Error during validation: {str(e)}")
|
|
||||||
return False, None
|
|
||||||
|
|
||||||
console.print(f"[red]Maximum retries reached for URL: {url}")
|
|
||||||
return False, None
|
return False, None
|
||||||
|
|
||||||
def search_domain(site_name: str, base_url: str, get_first: bool = False):
|
def search_domain(site_name: str, base_url: str, get_first: bool = False):
|
||||||
"""
|
"""Search for valid domain matching site name and base URL."""
|
||||||
Search for valid domain matching site name and base URL.
|
|
||||||
"""
|
|
||||||
max_timeout = config_manager.get_int("REQUESTS", "timeout")
|
max_timeout = config_manager.get_int("REQUESTS", "timeout")
|
||||||
domain = str(config_manager.get_dict("SITE", site_name)['domain'])
|
domain = str(config_manager.get_dict("SITE", site_name)['domain'])
|
||||||
|
|
||||||
|
# Test initial URL
|
||||||
try:
|
try:
|
||||||
is_correct, redirect_tld = validate_url(base_url, base_url, max_timeout, max_retries=5)
|
is_correct, redirect_tld = validate_url(base_url, base_url, max_timeout)
|
||||||
|
|
||||||
if is_correct and redirect_tld is not None:
|
|
||||||
config_manager.config['SITE'][site_name]['domain'] = redirect_tld
|
|
||||||
config_manager.write_config()
|
|
||||||
console.print(f"[green]Successfully validated initial URL")
|
|
||||||
return redirect_tld, base_url
|
|
||||||
|
|
||||||
if is_correct:
|
if is_correct:
|
||||||
parsed_url = urlparse(base_url)
|
tld = redirect_tld or get_tld(base_url)
|
||||||
tld = parsed_url.netloc.split('.')[-1]
|
|
||||||
config_manager.config['SITE'][site_name]['domain'] = tld
|
config_manager.config['SITE'][site_name]['domain'] = tld
|
||||||
config_manager.write_config()
|
config_manager.write_config()
|
||||||
console.print(f"[green]Successfully validated initial URL")
|
console.print(f"[green]Successfully validated initial URL")
|
||||||
return tld, base_url
|
return tld, base_url
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
console.print(f"[red]Error testing initial URL: {str(e)}")
|
console.print(f"[red]Error testing initial URL: {str(e)}")
|
||||||
|
|
||||||
# Google search phase
|
# Google search phase
|
||||||
query = base_url.split("/")[-1]
|
base_domain = get_base_domain(base_url)
|
||||||
console.print(f"\n[cyan]Performing Google search for[white]: [yellow]{query}")
|
console.print(f"\n[cyan]Searching for alternate domains for[white]: [yellow]{base_domain}")
|
||||||
search_results = list(search(query, num_results=20, lang="it"))
|
|
||||||
|
try:
|
||||||
for idx, result_url in enumerate(search_results, 1):
|
search_results = list(search(base_domain, num_results=20, lang="it"))
|
||||||
if get_base_domain(result_url) == get_base_domain(base_url):
|
filtered_results = [
|
||||||
console.print(f"\n[cyan]Checking Google result {idx}/20[white]: [yellow]{result_url}")
|
url for url in search_results
|
||||||
|
if get_base_domain(url) == base_domain
|
||||||
if validate_url(result_url, base_url, max_timeout):
|
and url.count('.') <= base_url.count('.') + 1
|
||||||
parsed_result = urlparse(result_url)
|
]
|
||||||
new_domain = parsed_result.netloc.split(".")[-1]
|
|
||||||
|
|
||||||
|
for idx, result_url in enumerate(filtered_results, 1):
|
||||||
|
console.print(f"\n[cyan]Checking result {idx}/{len(filtered_results)}[white]: [yellow]{result_url}")
|
||||||
|
|
||||||
|
is_valid, new_tld = validate_url(result_url, base_url, max_timeout)
|
||||||
|
if is_valid:
|
||||||
|
final_tld = new_tld or get_tld(result_url)
|
||||||
if get_first or msg.ask(
|
if get_first or msg.ask(
|
||||||
f"\n[cyan]Do you want to update site[white] [red]'{site_name}'[cyan] with domain[white] [red]'{new_domain}'",
|
f"\n[cyan]Update site[white] [red]'{site_name}'[cyan] with domain[white] [red]'{final_tld}'",
|
||||||
choices=["y", "n"],
|
choices=["y", "n"],
|
||||||
default="y"
|
default="y"
|
||||||
).lower() == "y":
|
).lower() == "y":
|
||||||
|
config_manager.config['SITE'][site_name]['domain'] = final_tld
|
||||||
config_manager.config['SITE'][site_name]['domain'] = new_domain
|
|
||||||
config_manager.write_config()
|
config_manager.write_config()
|
||||||
return new_domain, f"{base_url}.{new_domain}"
|
return final_tld, f"{base_url}.{final_tld}"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
console.print(f"[red]Error during search: {str(e)}")
|
||||||
|
|
||||||
console.print("[bold red]No valid URLs found matching the base URL.")
|
console.print("[bold red]No valid URLs found matching the base URL.")
|
||||||
return domain, f"{base_url}.{domain}"
|
return domain, f"{base_url}.{domain}"
|
@ -123,4 +123,4 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
update_readme(alias, domain_to_use)
|
update_readme(alias, domain_to_use)
|
||||||
print("------------------------------------")
|
print("------------------------------------")
|
||||||
time.sleep(3)
|
time.sleep(2)
|
@ -64,7 +64,7 @@
|
|||||||
"domain": "prof"
|
"domain": "prof"
|
||||||
},
|
},
|
||||||
"guardaserie": {
|
"guardaserie": {
|
||||||
"domain": "com"
|
"domain": "academy"
|
||||||
},
|
},
|
||||||
"mostraguarda": {
|
"mostraguarda": {
|
||||||
"domain": "stream"
|
"domain": "stream"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user