Update config.json

This commit is contained in:
Lovi 2025-01-02 17:31:47 +01:00
parent 738de7d52f
commit af3a429b9f
3 changed files with 113 additions and 107 deletions

View File

@ -6,9 +6,6 @@
<a href="https://pypi.org/project/streamingcommunity"> <a href="https://pypi.org/project/streamingcommunity">
<img src="https://img.shields.io/pypi/v/streamingcommunity?logo=pypi&labelColor=555555&style=for-the-badge" alt="PyPI"/> <img src="https://img.shields.io/pypi/v/streamingcommunity?logo=pypi&labelColor=555555&style=for-the-badge" alt="PyPI"/>
</a> </a>
<a href="https://www.python.org">
<img src="https://img.shields.io/badge/Python->=3.8-3776AB?style=for-the-badge&logo=python&logoColor=white" alt="Python"/>
</a>
<a href="https://www.paypal.com/donate/?hosted_button_id=UXTWMT8P6HE2C"> <a href="https://www.paypal.com/donate/?hosted_button_id=UXTWMT8P6HE2C">
<img src="https://img.shields.io/badge/_-Donate-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge" alt="Donate"/> <img src="https://img.shields.io/badge/_-Donate-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge" alt="Donate"/>
</a> </a>

View File

@ -18,17 +18,16 @@ from StreamingCommunity.Util._jsonConfig import config_manager
def google_search(query): def google_search(query):
""" """
Perform a Google search and return the first result. Perform a Google search and return the first result.
Args:
query (str): The search query to execute on Google.
Returns:
str: The first URL result from the search, or None if no result is found.
"""
# Perform the search on Google and limit to 1 result
search_results = search(query, num_results=1)
# Extract the first result Args:
query (str): Search query to execute
Returns:
str: First URL from search results, None if no results found
"""
# Perform search with single result limit
search_results = search(query, num_results=1)
first_result = next(search_results, None) first_result = next(search_results, None)
if not first_result: if not first_result:
@ -36,18 +35,51 @@ def google_search(query):
return first_result return first_result
def validate_url(url, max_timeout):
"""
Validate if a URL is accessible via GET request.
Args:
url (str): URL to validate
max_timeout (int): Maximum timeout for request
Returns:
bool: True if URL is valid and accessible, False otherwise
"""
try:
with httpx.Client(
headers={
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'accept-language': 'it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7',
'User-Agent': get_headers()
},
follow_redirects=False,
timeout=max_timeout
) as client:
# Attempt GET request
response = client.get(url)
if response.status_code == 403:
return False
response.raise_for_status()
return True
except Exception:
return False
def get_final_redirect_url(initial_url, max_timeout): def get_final_redirect_url(initial_url, max_timeout):
""" """
Follow redirects from the initial URL and return the final URL after all redirects. Follow all redirects for a URL and return final destination.
Args: Args:
initial_url (str): The URL to start with and follow redirects. initial_url (str): Starting URL to follow redirects from
max_timeout (int): Maximum timeout for request
Returns: Returns:
str: The final URL after all redirects are followed. str: Final URL after all redirects, None if error occurs
""" """
# Create a client with redirects enabled
try: try:
with httpx.Client( with httpx.Client(
headers={ headers={
@ -57,8 +89,9 @@ def get_final_redirect_url(initial_url, max_timeout):
}, },
follow_redirects=True, follow_redirects=True,
timeout=max_timeout timeout=max_timeout
) as client: ) as client:
# Follow redirects and get response
response = client.get(initial_url) response = client.get(initial_url)
if response.status_code == 403: if response.status_code == 403:
@ -66,11 +99,7 @@ def get_final_redirect_url(initial_url, max_timeout):
raise raise
response.raise_for_status() response.raise_for_status()
return response.url
# Capture the final URL after all redirects
final_url = response.url
return final_url
except Exception as e: except Exception as e:
console.print(f"\n[cyan]Test url[white]: [red]{initial_url}, [cyan]error[white]: [red]{e}") console.print(f"\n[cyan]Test url[white]: [red]{initial_url}, [cyan]error[white]: [red]{e}")
@ -78,97 +107,77 @@ def get_final_redirect_url(initial_url, max_timeout):
def search_domain(site_name: str, base_url: str, get_first: bool = False): def search_domain(site_name: str, base_url: str, get_first: bool = False):
""" """
Search for a valid domain for the given site name and base URL. Search for valid domain matching site name and base URL.
Parameters:
- site_name (str): The name of the site to search the domain for.
- base_url (str): The base URL to construct complete URLs.
- get_first (bool): If True, automatically update to the first valid match without user confirmation.
Returns:
tuple: The found domain and the complete URL.
"""
# Extract config domain Args:
site_name (str): Name of site to find domain for
base_url (str): Base URL to construct complete URLs
get_first (bool): Auto-update config with first valid match if True
Returns:
tuple: (found_domain, complete_url)
"""
# Get configuration values
max_timeout = config_manager.get_int("REQUESTS", "timeout") max_timeout = config_manager.get_int("REQUESTS", "timeout")
domain = str(config_manager.get_dict("SITE", site_name)['domain']) domain = str(config_manager.get_dict("SITE", site_name)['domain'])
try: try:
# Test the current domain # Test current domain configuration
with httpx.Client( test_url = f"{base_url}.{domain}"
headers={
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'accept-language': 'it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7',
'User-Agent': get_headers()
},
follow_redirects=True,
timeout=max_timeout
) as client:
response_follow = client.get(f"{base_url}.{domain}")
response_follow.raise_for_status()
except Exception as e: if validate_url(test_url, max_timeout):
query = base_url.split("/")[-1] parsed_url = urlparse(test_url)
tld = parsed_url.netloc.split('.')[-1]
config_manager.config['SITE'][site_name]['domain'] = tld
config_manager.write_config()
return tld, test_url
# Perform a Google search with multiple results except Exception:
search_results = list(search(query, num_results=10, lang="it")) pass
#console.print(f"\nGoogle search results: {search_results}")
def normalize_for_comparison(url): # Perform Google search if current domain fails
"""Normalize URL by removing protocol, www, and trailing slashes""" query = base_url.split("/")[-1]
url = url.lower() search_results = list(search(query, num_results=10, lang="it"))
url = url.replace("https://", "").replace("http://", "") console.print(f"Google search: {search_results}")
url = url.replace("www.", "")
return url.rstrip("/")
# Normalize the base_url we're looking for def normalize_for_comparison(url):
target_url = normalize_for_comparison(base_url) """Normalize URL by removing protocol, www, and trailing slashes"""
url = url.lower()
url = url.replace("https://", "").replace("http://", "")
url = url.replace("www.", "")
return url.rstrip("/")
# Iterate through search results target_url = normalize_for_comparison(base_url)
for first_url in search_results:
console.print(f"[green]Checking url[white]: [red]{first_url}")
# Get just the domain part of the search result # Check each search result
parsed_result = urlparse(first_url) for result_url in search_results:
result_domain = normalize_for_comparison(parsed_result.netloc) #console.print(f"[green]Checking url[white]: [red]{result_url}")
# Compare with our target URL (without the protocol part) # Skip invalid URLs
if result_domain.startswith(target_url.split("/")[-1]): if not validate_url(result_url, max_timeout):
try: console.print(f"[red]URL validation failed for: {result_url}")
final_url = get_final_redirect_url(first_url, max_timeout) continue
if final_url is not None: parsed_result = urlparse(result_url)
def extract_domain(url): result_domain = normalize_for_comparison(parsed_result.netloc)
parsed_url = urlparse(url)
domain = parsed_url.netloc
return domain.split(".")[-1]
new_domain_extract = extract_domain(str(final_url)) # Check if domain matches target
if result_domain.startswith(target_url.split("/")[-1]):
final_url = get_final_redirect_url(result_url, max_timeout)
if final_url is not None:
new_domain = urlparse(str(final_url)).netloc.split(".")[-1]
if get_first or msg.ask(f"\n[cyan]Do you want to auto update site[white] [red]'{site_name}'[cyan] with domain[white] [red]'{new_domain_extract}'.", choices=["y", "n"], default="y").lower() == "y": # Update config if auto-update enabled or user confirms
# Update domain in config.json if get_first or msg.ask(
config_manager.config['SITE'][site_name]['domain'] = new_domain_extract f"\n[cyan]Do you want to auto update site[white] [red]'{site_name}'[cyan] with domain[white] [red]'{new_domain}'.",
config_manager.write_config() choices=["y", "n"],
default="y"
).lower() == "y":
config_manager.config['SITE'][site_name]['domain'] = new_domain
config_manager.write_config()
return new_domain, f"{base_url}.{new_domain}"
return new_domain_extract, f"{base_url}.{new_domain_extract}" # Return original domain if no valid matches found
console.print("[bold red]No valid URL found matching the base URL.[/bold red]")
except Exception as redirect_error: return domain, f"{base_url}.{domain}"
console.print(f"[red]Error following redirect for {first_url}: {redirect_error}")
continue
# If no matching URL is found return base domain
console.print("[bold red]No valid URL found matching the base URL.[/bold red]")
return domain, f"{base_url}.{domain}"
# Handle successful initial domain check
parsed_url = urlparse(str(response_follow.url))
parse_domain = parsed_url.netloc
tld = parse_domain.split('.')[-1]
if tld is not None:
# Update domain in config.json
config_manager.config['SITE'][site_name]['domain'] = tld
config_manager.write_config()
# Return config domain
return tld, f"{base_url}.{tld}"

View File

@ -59,7 +59,7 @@
"domain": "prof" "domain": "prof"
}, },
"altadefinizione": { "altadefinizione": {
"domain": "deal" "domain": "prof"
}, },
"guardaserie": { "guardaserie": {
"domain": "academy" "domain": "academy"
@ -76,10 +76,10 @@
} }
}, },
"animeunity": { "animeunity": {
"domain": "to" "domain": "so"
}, },
"cb01new": { "cb01new": {
"domain": "stream" "domain": "pics"
}, },
"1337xx": { "1337xx": {
"domain": "to" "domain": "to"