Update get_domain

This commit is contained in:
Lovi 2025-01-06 15:02:58 +01:00
parent 90adae0c3e
commit e0f76fe93c
7 changed files with 74 additions and 50 deletions

View File

@ -2,7 +2,7 @@ name: Update domain
on: on:
schedule: schedule:
- cron: '0 0 * * *' - cron: '0 */8 * * *'
workflow_dispatch: workflow_dispatch:
jobs: jobs:

View File

@ -404,7 +404,7 @@ The `run-container` command mounts also the `config.json` file, so any change to
| [Altadefinizione](https://altadefinizione.prof/) | ✅ | | [Altadefinizione](https://altadefinizione.prof/) | ✅ |
| [AnimeUnity](https://animeunity.so/) | ✅ | | [AnimeUnity](https://animeunity.so/) | ✅ |
| [Ilcorsaronero](https://ilcorsaronero.link/) | ✅ | | [Ilcorsaronero](https://ilcorsaronero.link/) | ✅ |
| [CB01New](https://cb01new.pics/) | ✅ | | [CB01New](https://cb01new.quest/) | ✅ |
| [DDLStreamItaly](https://ddlstreamitaly.co/) | ✅ | | [DDLStreamItaly](https://ddlstreamitaly.co/) | ✅ |
| [GuardaSerie](https://guardaserie.academy/) | ✅ | | [GuardaSerie](https://guardaserie.academy/) | ✅ |
| [MostraGuarda](https://mostraguarda.stream/) | ✅ | | [MostraGuarda](https://mostraguarda.stream/) | ✅ |

View File

@ -54,7 +54,7 @@ class IlCorsaroNeroScraper:
return response.text return response.text
except Exception as e: except Exception as e:
logging.error(f"Error fetching {url}: {e}") logging.error(f"Error fetching from {url}: {e}")
return None return None
def parse_torrents(self, html: str) -> List[Dict[str, str]]: def parse_torrents(self, html: str) -> List[Dict[str, str]]:

View File

@ -1,6 +1,6 @@
# 18.06.24 # 18.06.24
from urllib.parse import urlparse from urllib.parse import urlparse, unquote
# External libraries # External libraries
@ -14,6 +14,21 @@ from StreamingCommunity.Util.console import console, msg
from StreamingCommunity.Util._jsonConfig import config_manager from StreamingCommunity.Util._jsonConfig import config_manager
def get_tld(url_str):
"""Extract the TLD (Top-Level Domain) from the URL without using external libraries."""
url_str = unquote(url_str)
parsed = urlparse(url_str)
domain = parsed.netloc.lower()
if domain.startswith('www.'):
domain = domain[4:]
parts = domain.split('.')
if len(parts) >= 2:
return parts[-1]
return None
def get_base_domain(url_str): def get_base_domain(url_str):
"""Extract base domain without protocol, www and path""" """Extract base domain without protocol, www and path"""
parsed = urlparse(url_str) parsed = urlparse(url_str)
@ -49,7 +64,7 @@ def validate_url(url, base_url, max_timeout):
) as client: ) as client:
response = client.get(url) response = client.get(url)
if not check_response(response, 1): if not check_response(response, 1):
return False return False, None
# Check 2: Follow redirects and verify final domain # Check 2: Follow redirects and verify final domain
console.print("[cyan]Checking redirect destination...") console.print("[cyan]Checking redirect destination...")
@ -61,33 +76,32 @@ def validate_url(url, base_url, max_timeout):
response = client.get(url) response = client.get(url)
if not check_response(response, 2): if not check_response(response, 2):
return False return False, None
# Compare base domains # Compare base domains
original_base = get_base_domain(url) original_base = get_base_domain(url)
final_base = get_base_domain(str(response.url)) final_base = get_base_domain(str(response.url))
console.print(f"[cyan]Comparing domains:") console.print(f"[cyan]Comparing domains:")
console.print(f"Original base domain: [yellow]{original_base}") console.print(f"Original base domain: [yellow]{original_base}.{get_tld(str(url))}")
console.print(f"Final base domain: [yellow]{final_base}") console.print(f"Final base domain: [yellow]{final_base}.{get_tld(str(response.url))}")
if original_base != final_base: if original_base != final_base:
console.print(f"[red]Domain mismatch: Redirected to different base domain") return False, None
return False
# Verify against expected base_url
expected_base = get_base_domain(base_url) expected_base = get_base_domain(base_url)
if final_base != expected_base: if final_base != expected_base:
console.print(f"[red]Domain mismatch: Final domain does not match expected base URL") return False, None
console.print(f"Expected: [yellow]{expected_base}")
return False if get_tld(str(url)) != get_tld(str(response.url)):
return True, get_tld(str(response.url))
console.print(f"[green]All checks passed: URL is valid and matches expected domain") console.print(f"[green]All checks passed: URL is valid and matches expected domain")
return True return True, None
except Exception as e: except Exception as e:
console.print(f"[red]Error during validation: {str(e)}") console.print(f"[red]Error during validation: {str(e)}")
return False return False, None
def search_domain(site_name: str, base_url: str, get_first: bool = False): def search_domain(site_name: str, base_url: str, get_first: bool = False):
""" """
@ -100,7 +114,15 @@ def search_domain(site_name: str, base_url: str, get_first: bool = False):
console.print(f"\n[cyan]Testing initial URL[white]: [yellow]{test_url}") console.print(f"\n[cyan]Testing initial URL[white]: [yellow]{test_url}")
try: try:
if validate_url(test_url, base_url, max_timeout): is_correct, redirect_tld = validate_url(test_url, base_url, max_timeout)
if is_correct and redirect_tld is not None:
config_manager.config['SITE'][site_name]['domain'] = redirect_tld
config_manager.write_config()
console.print(f"[green]Successfully validated initial URL")
return redirect_tld, test_url
if is_correct:
parsed_url = urlparse(test_url) parsed_url = urlparse(test_url)
tld = parsed_url.netloc.split('.')[-1] tld = parsed_url.netloc.split('.')[-1]
config_manager.config['SITE'][site_name]['domain'] = tld config_manager.config['SITE'][site_name]['domain'] = tld
@ -114,24 +136,25 @@ def search_domain(site_name: str, base_url: str, get_first: bool = False):
# Google search phase # Google search phase
query = base_url.split("/")[-1] query = base_url.split("/")[-1]
console.print(f"\n[cyan]Performing Google search for[white]: [yellow]{query}") console.print(f"\n[cyan]Performing Google search for[white]: [yellow]{query}")
search_results = list(search(query, num_results=15, lang="it")) search_results = list(search(query, num_results=20, lang="it"))
for idx, result_url in enumerate(search_results, 1): for idx, result_url in enumerate(search_results, 1):
console.print(f"\n[cyan]Checking Google result {idx}/15[white]: [yellow]{result_url}") if get_base_domain(result_url) == get_base_domain(test_url):
console.print(f"\n[cyan]Checking Google result {idx}/20[white]: [yellow]{result_url}")
if validate_url(result_url, base_url, max_timeout):
parsed_result = urlparse(result_url)
new_domain = parsed_result.netloc.split(".")[-1]
if get_first or msg.ask( if validate_url(result_url, base_url, max_timeout):
f"\n[cyan]Do you want to update site[white] [red]'{site_name}'[cyan] with domain[white] [red]'{new_domain}'", parsed_result = urlparse(result_url)
choices=["y", "n"], new_domain = parsed_result.netloc.split(".")[-1]
default="y"
).lower() == "y":
config_manager.config['SITE'][site_name]['domain'] = new_domain if get_first or msg.ask(
config_manager.write_config() f"\n[cyan]Do you want to update site[white] [red]'{site_name}'[cyan] with domain[white] [red]'{new_domain}'",
return new_domain, f"{base_url}.{new_domain}" choices=["y", "n"],
default="y"
).lower() == "y":
config_manager.config['SITE'][site_name]['domain'] = new_domain
config_manager.write_config()
return new_domain, f"{base_url}.{new_domain}"
console.print("[bold red]No valid URLs found matching the base URL.") console.print("[bold red]No valid URLs found matching the base URL.")
return domain, f"{base_url}.{domain}" return domain, f"{base_url}.{domain}"

View File

@ -139,9 +139,8 @@ class TOR_downloader:
# Formatta e stampa le informazioni # Formatta e stampa le informazioni
console.print("\n[bold green]🔗 Dettagli Torrent Aggiunto:[/bold green]") console.print("\n[bold green]🔗 Dettagli Torrent Aggiunto:[/bold green]")
console.print(f"[yellow]Nome:[/yellow] {torrent_info.get('name', torrent_name)}") console.print(f"[yellow]Name:[/yellow] {torrent_info.get('name', torrent_name)}")
console.print(f"[yellow]Hash:[/yellow] {torrent_info['hash']}") console.print(f"[yellow]Hash:[/yellow] {torrent_info['hash']}")
console.print(f"[yellow]Dimensione:[/yellow] {internet_manager.format_file_size(torrent_info.get('size'))}")
print() print()
# Salva l'hash per usi successivi e il path # Salva l'hash per usi successivi e il path
@ -288,7 +287,8 @@ class TOR_downloader:
raise raise
# Delete the torrent data # Delete the torrent data
#self.qb.delete_permanently(self.qb.torrents()[-1]['hash']) time.sleep(5)
self.qb.delete_permanently(self.qb.torrents()[-1]['hash'])
return True return True
except Exception as e: except Exception as e:

View File

@ -118,16 +118,17 @@ if __name__ == "__main__":
for alias, (site_name, use_for) in site_names.items(): for alias, (site_name, use_for) in site_names.items():
original_domain = config_manager.get_list("SITE", alias)['domain'] original_domain = config_manager.get_list("SITE", alias)['domain']
if site_name == "animeunity": if site_name != "ilcorsaronero":
domain_to_use, _ = search_domain(site_name=site_name, base_url=f"https://www.{site_name}", get_first=True) if site_name == "animeunity":
else: domain_to_use, _ = search_domain(site_name=site_name, base_url=f"https://www.{site_name}", get_first=True)
domain_to_use, _ = search_domain(site_name=site_name, base_url=f"https://{site_name}", get_first=True) else:
domain_to_use, _ = search_domain(site_name=site_name, base_url=f"https://{site_name}", get_first=True)
# Update readme
if original_domain != domain_to_use: # Update readme
print("\n") if original_domain != domain_to_use:
print("Return domain: ", domain_to_use) print("\n")
update_readme(alias, domain_to_use) print("Return domain: ", domain_to_use)
update_readme(alias, domain_to_use)
print("------------------------------------")
time.sleep(2) print("------------------------------------")
time.sleep(3)

View File

@ -10,7 +10,7 @@
"serie_folder_name": "TV", "serie_folder_name": "TV",
"map_episode_name": "E%(episode)_%(episode_name)", "map_episode_name": "E%(episode)_%(episode_name)",
"config_qbit_tor": { "config_qbit_tor": {
"host": "192.168.1.58", "host": "192.168.1.99",
"port": "7060", "port": "7060",
"user": "admin", "user": "admin",
"pass": "adminadmin" "pass": "adminadmin"
@ -20,7 +20,7 @@
"not_close": false "not_close": false
}, },
"REQUESTS": { "REQUESTS": {
"timeout": 20, "timeout": 35,
"max_retry": 8, "max_retry": 8,
"proxy_start_min": 0.1, "proxy_start_min": 0.1,
"proxy_start_max": 0.5 "proxy_start_max": 0.5
@ -81,7 +81,7 @@
"domain": "so" "domain": "so"
}, },
"cb01new": { "cb01new": {
"domain": "pics" "domain": "quest"
}, },
"1337xx": { "1337xx": {
"domain": "to" "domain": "to"