mirror of
https://github.com/Arrowar/StreamingCommunity.git
synced 2025-06-06 19:45:24 +00:00
Update domain
This commit is contained in:
parent
2fe1f449eb
commit
d076073a08
@ -25,13 +25,21 @@ class IlCorsaroNeroScraper:
|
|||||||
self.base_url = base_url
|
self.base_url = base_url
|
||||||
self.max_page = max_page
|
self.max_page = max_page
|
||||||
self.headers = {
|
self.headers = {
|
||||||
'User-Agent': get_headers(),
|
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
'accept-language': 'it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7',
|
||||||
'Accept-Language': 'en-US,en;q=0.5',
|
'cache-control': 'max-age=0',
|
||||||
'Connection': 'keep-alive',
|
'priority': 'u=0, i',
|
||||||
'Upgrade-Insecure-Requests': '1'
|
'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
||||||
|
'sec-ch-ua-mobile': '?0',
|
||||||
|
'sec-ch-ua-platform': '"Windows"',
|
||||||
|
'sec-fetch-dest': 'document',
|
||||||
|
'sec-fetch-mode': 'navigate',
|
||||||
|
'sec-fetch-site': 'same-origin',
|
||||||
|
'sec-fetch-user': '?1',
|
||||||
|
'upgrade-insecure-requests': '1',
|
||||||
|
'user-agent': get_headers()
|
||||||
}
|
}
|
||||||
|
|
||||||
async def fetch_url(self, url: str) -> Optional[str]:
|
async def fetch_url(self, url: str) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
Fetch the HTML content of a given URL.
|
Fetch the HTML content of a given URL.
|
||||||
|
@ -39,7 +39,7 @@ def download_film(select_title: MediaItem) -> str:
|
|||||||
|
|
||||||
# Start message and display film information
|
# Start message and display film information
|
||||||
start_message()
|
start_message()
|
||||||
console.print(f"[yellow]Download: [red]{select_title.slug} \n")
|
console.print(f"[yellow]Download: [red]{select_title.name} \n")
|
||||||
|
|
||||||
# Init class
|
# Init class
|
||||||
video_source = VideoSource(SITE_NAME, False)
|
video_source = VideoSource(SITE_NAME, False)
|
||||||
|
@ -76,17 +76,19 @@ def get_final_redirect_url(initial_url, max_timeout):
|
|||||||
console.print(f"\n[cyan]Test url[white]: [red]{initial_url}, [cyan]error[white]: [red]{e}")
|
console.print(f"\n[cyan]Test url[white]: [red]{initial_url}, [cyan]error[white]: [red]{e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def search_domain(site_name: str, base_url: str):
|
def search_domain(site_name: str, base_url: str, get_first: bool = False):
|
||||||
"""
|
"""
|
||||||
Search for a valid domain for the given site name and base URL.
|
Search for a valid domain for the given site name and base URL.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
- site_name (str): The name of the site to search the domain for.
|
- site_name (str): The name of the site to search the domain for.
|
||||||
- base_url (str): The base URL to construct complete URLs.
|
- base_url (str): The base URL to construct complete URLs.
|
||||||
|
- get_first (bool): If True, automatically update to the first valid match without user confirmation.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
tuple: The found domain and the complete URL.
|
tuple: The found domain and the complete URL.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Extract config domain
|
# Extract config domain
|
||||||
max_timeout = config_manager.get_int("REQUESTS", "timeout")
|
max_timeout = config_manager.get_int("REQUESTS", "timeout")
|
||||||
domain = str(config_manager.get_dict("SITE", site_name)['domain'])
|
domain = str(config_manager.get_dict("SITE", site_name)['domain'])
|
||||||
@ -107,10 +109,10 @@ def search_domain(site_name: str, base_url: str):
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
query = base_url.split("/")[-1]
|
query = base_url.split("/")[-1]
|
||||||
|
|
||||||
# Perform a Google search with multiple results
|
# Perform a Google search with multiple results
|
||||||
search_results = list(search(query, num_results=10, lang="it"))
|
search_results = list(search(query, num_results=10, lang="it"))
|
||||||
console.print(f"\nGoogle search results: {search_results}")
|
#console.print(f"\nGoogle search results: {search_results}")
|
||||||
|
|
||||||
def normalize_for_comparison(url):
|
def normalize_for_comparison(url):
|
||||||
"""Normalize URL by removing protocol, www, and trailing slashes"""
|
"""Normalize URL by removing protocol, www, and trailing slashes"""
|
||||||
@ -121,15 +123,15 @@ def search_domain(site_name: str, base_url: str):
|
|||||||
|
|
||||||
# Normalize the base_url we're looking for
|
# Normalize the base_url we're looking for
|
||||||
target_url = normalize_for_comparison(base_url)
|
target_url = normalize_for_comparison(base_url)
|
||||||
|
|
||||||
# Iterate through search results
|
# Iterate through search results
|
||||||
for first_url in search_results:
|
for first_url in search_results:
|
||||||
console.print(f"[green]Checking url[white]: [red]{first_url}")
|
console.print(f"[green]Checking url[white]: [red]{first_url}")
|
||||||
|
|
||||||
# Get just the domain part of the search result
|
# Get just the domain part of the search result
|
||||||
parsed_result = urlparse(first_url)
|
parsed_result = urlparse(first_url)
|
||||||
result_domain = normalize_for_comparison(parsed_result.netloc)
|
result_domain = normalize_for_comparison(parsed_result.netloc)
|
||||||
|
|
||||||
# Compare with our target URL (without the protocol part)
|
# Compare with our target URL (without the protocol part)
|
||||||
if result_domain.startswith(target_url.split("/")[-1]):
|
if result_domain.startswith(target_url.split("/")[-1]):
|
||||||
try:
|
try:
|
||||||
@ -143,21 +145,20 @@ def search_domain(site_name: str, base_url: str):
|
|||||||
|
|
||||||
new_domain_extract = extract_domain(str(final_url))
|
new_domain_extract = extract_domain(str(final_url))
|
||||||
|
|
||||||
if msg.ask(f"\n[cyan]Do you want to auto update site[white] [red]'{site_name}'[cyan] with domain[white] [red]'{new_domain_extract}'.", choices=["y", "n"], default="y").lower() == "y":
|
if get_first or msg.ask(f"\n[cyan]Do you want to auto update site[white] [red]'{site_name}'[cyan] with domain[white] [red]'{new_domain_extract}'.", choices=["y", "n"], default="y").lower() == "y":
|
||||||
|
|
||||||
# Update domain in config.json
|
# Update domain in config.json
|
||||||
config_manager.config['SITE'][site_name]['domain'] = new_domain_extract
|
config_manager.config['SITE'][site_name]['domain'] = new_domain_extract
|
||||||
config_manager.write_config()
|
config_manager.write_config()
|
||||||
|
|
||||||
return new_domain_extract, f"{base_url}.{new_domain_extract}"
|
return new_domain_extract, f"{base_url}.{new_domain_extract}"
|
||||||
|
|
||||||
except Exception as redirect_error:
|
except Exception as redirect_error:
|
||||||
console.print(f"[red]Error following redirect for {first_url}: {redirect_error}")
|
console.print(f"[red]Error following redirect for {first_url}: {redirect_error}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# If no matching URL is found
|
# If no matching URL is found return base domain
|
||||||
console.print("[bold red]No valid URL found matching the base URL.[/bold red]")
|
console.print("[bold red]No valid URL found matching the base URL.[/bold red]")
|
||||||
raise Exception("No matching domain found")
|
return domain, f"{base_url}.{domain}"
|
||||||
|
|
||||||
# Handle successful initial domain check
|
# Handle successful initial domain check
|
||||||
parsed_url = urlparse(str(response_follow.url))
|
parsed_url = urlparse(str(response_follow.url))
|
||||||
|
90
Test/call_updateDomain.py
Normal file
90
Test/call_updateDomain.py
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
# 12.11.24
|
||||||
|
|
||||||
|
# Fix import
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
src_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||||
|
sys.path.append(src_path)
|
||||||
|
|
||||||
|
|
||||||
|
# Other
|
||||||
|
import glob
|
||||||
|
import logging
|
||||||
|
import importlib
|
||||||
|
from rich.console import Console
|
||||||
|
|
||||||
|
|
||||||
|
# Util
|
||||||
|
from StreamingCommunity.Api.Template.Util import search_domain
|
||||||
|
|
||||||
|
|
||||||
|
# Variable
|
||||||
|
console = Console()
|
||||||
|
|
||||||
|
|
||||||
|
def load_site_names():
|
||||||
|
modules = []
|
||||||
|
site_names = {}
|
||||||
|
|
||||||
|
# Traverse the Api directory
|
||||||
|
api_dir = os.path.join(os.path.dirname(__file__), '..', 'StreamingCommunity', 'Api', 'Site')
|
||||||
|
init_files = glob.glob(os.path.join(api_dir, '*', '__init__.py'))
|
||||||
|
|
||||||
|
# Retrieve modules and their indices
|
||||||
|
for init_file in init_files:
|
||||||
|
|
||||||
|
# Get folder name as module name
|
||||||
|
module_name = os.path.basename(os.path.dirname(init_file))
|
||||||
|
logging.info(f"Load module name: {module_name}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Dynamically import the module
|
||||||
|
mod = importlib.import_module(f'StreamingCommunity.Api.Site.{module_name}')
|
||||||
|
|
||||||
|
# Get 'indice' from the module
|
||||||
|
indice = getattr(mod, 'indice', 0)
|
||||||
|
is_deprecate = bool(getattr(mod, '_deprecate', True))
|
||||||
|
use_for = getattr(mod, '_useFor', 'other')
|
||||||
|
|
||||||
|
if not is_deprecate:
|
||||||
|
modules.append((module_name, indice, use_for))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
console.print(f"[red]Failed to import module {module_name}: {str(e)}")
|
||||||
|
|
||||||
|
# Sort modules by 'indice'
|
||||||
|
modules.sort(key=lambda x: x[1])
|
||||||
|
|
||||||
|
# Load SITE_NAME from each module in the sorted order
|
||||||
|
for module_name, _, use_for in modules:
|
||||||
|
|
||||||
|
# Construct a unique alias for the module
|
||||||
|
module_alias = f'{module_name}_site_name'
|
||||||
|
logging.info(f"Module alias: {module_alias}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Dynamically import the module
|
||||||
|
mod = importlib.import_module(f'StreamingCommunity.Api.Site.{module_name}')
|
||||||
|
|
||||||
|
# Get the SITE_NAME variable from the module
|
||||||
|
site_name = getattr(mod, 'SITE_NAME', None)
|
||||||
|
|
||||||
|
if site_name:
|
||||||
|
# Add the SITE_NAME to the dictionary
|
||||||
|
site_names[module_alias] = (site_name, use_for)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
console.print(f"[red]Failed to load SITE_NAME from module {module_name}: {str(e)}")
|
||||||
|
|
||||||
|
return site_names
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
site_names = load_site_names()
|
||||||
|
for alias, (site_name, use_for) in site_names.items():
|
||||||
|
print(f"Alias: {alias}, SITE_NAME: {site_name}, Use for: {use_for}")
|
||||||
|
|
||||||
|
if site_name == "animeunity":
|
||||||
|
domain_to_use, _ = search_domain(site_name=site_name, base_url=f"https://www.{site_name}", get_first=True)
|
||||||
|
|
||||||
|
else:
|
||||||
|
domain_to_use, _ = search_domain(site_name=site_name, base_url=f"https://{site_name}", get_first=True)
|
@ -79,7 +79,7 @@
|
|||||||
"domain": "to"
|
"domain": "to"
|
||||||
},
|
},
|
||||||
"cb01new": {
|
"cb01new": {
|
||||||
"domain": "icu"
|
"domain": "stream"
|
||||||
},
|
},
|
||||||
"1337xx": {
|
"1337xx": {
|
||||||
"domain": "to"
|
"domain": "to"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user