2024-11-07 11:22:11 +01:00

142 lines
4.8 KiB
Python

# 18.06.24
import sys
from urllib.parse import urlparse
# External libraries
import httpx
from googlesearch import search
# Internal utilities
from Src.Util.headers import get_headers
from Src.Util.console import console, msg
from Src.Util._jsonConfig import config_manager
def google_search(query):
"""
Perform a Google search and return the first result.
Args:
query (str): The search query to execute on Google.
Returns:
str: The first URL result from the search, or None if no result is found.
"""
# Perform the search on Google and limit to 1 result
search_results = search(query, num_results=1)
# Extract the first result
first_result = next(search_results, None)
if not first_result:
console.print("[red]No results found.[/red]")
return first_result
def get_final_redirect_url(initial_url, max_timeout):
"""
Follow redirects from the initial URL and return the final URL after all redirects.
Args:
initial_url (str): The URL to start with and follow redirects.
Returns:
str: The final URL after all redirects are followed.
"""
# Create a client with redirects enabled
try:
with httpx.Client(follow_redirects=True, timeout=max_timeout, headers={'user-agent': get_headers()}) as client:
response = client.get(initial_url)
response.raise_for_status()
# Capture the final URL after all redirects
final_url = response.url
return final_url
except Exception as e:
console.print(f"[cyan]Test url[white]: [red]{initial_url}, [cyan]error[white]: [red]{e}")
return None
def search_domain(site_name: str, base_url: str):
"""
Search for a valid domain for the given site name and base URL.
Parameters:
- site_name (str): The name of the site to search the domain for.
- base_url (str): The base URL to construct complete URLs.
- follow_redirects (bool): To follow redirect url or not.
Returns:
tuple: The found domain and the complete URL.
"""
# Extract config domain
max_timeout = config_manager.get_int("REQUESTS", "timeout")
domain = str(config_manager.get_dict("SITE", site_name)['domain'])
#console.print(f"[cyan]Test site[white]: [red]{base_url}.{domain}")
try:
# Test the current domain
response_follow = httpx.get(f"{base_url}.{domain}", headers={'user-agent': get_headers()}, timeout=max_timeout, follow_redirects=True)
#console.print(f"[cyan]Response site[white]: [red]{response_follow.status_code}")
response_follow.raise_for_status()
except Exception as e:
#console.print(f"[cyan]Test url[white]: [red]{base_url}.{domain}, [cyan]error[white]: [red]{e}")
query = base_url.split("/")[-1]
first_url = google_search(query)
console.print(f"[green]First url from google seach[white]: [red]{first_url}")
if first_url:
final_url = get_final_redirect_url(first_url, max_timeout)
if final_url != None:
console.print(f"\n[bold yellow]Suggestion:[/bold yellow] [white](Experimental)\n"
f"[cyan]New final URL[white]: [green]{final_url}")
def extract_domain(url):
parsed_url = urlparse(url)
domain = parsed_url.netloc
return domain.split(".")[-1]
new_domain_extract = extract_domain(str(final_url))
if msg.ask(f"[red]Do you want to auto update config.json - '[green]{site_name}[red]' with domain: [green]{new_domain_extract}", choices=["y", "n"], default="y").lower() == "y":
# Update domain in config.json
config_manager.config['SITE'][site_name]['domain'] = new_domain_extract
config_manager.write_config()
# Return config domain
#console.print(f"[cyan]Return domain: [red]{new_domain_extract} \n")
return new_domain_extract, f"{base_url}.{new_domain_extract}"
else:
console.print("[bold red]\nManually change the domain in the JSON file.[/bold red]")
raise
else:
console.print("[bold red]No valid URL to follow redirects.[/bold red]")
# Ensure the URL is in string format before parsing
parsed_url = urlparse(str(response_follow.url))
parse_domain = parsed_url.netloc
tld = parse_domain.split('.')[-1]
if tld is not None:
# Update domain in config.json
config_manager.config['SITE'][site_name]['domain'] = tld
config_manager.write_config()
# Return config domain
#console.print(f"[cyan]Return domain: [red]{tld} \n")
return tld, f"{base_url}.{tld}"