# 04.4.24 # Note: verify_ssl need to be set to false for macOs import base64 import json import logging import ssl import time import re import urllib.parse import urllib.request import urllib.error from typing import Dict, Optional, Union, TypedDict, Any try: from typing import Unpack except ImportError: # (Python <= 3.10), try: from typing_extensions import Unpack except ImportError: raise ImportError("Unable to import Unpack from typing or typing_extensions. " "Please make sure you have the necessary libraries installed.") # External library from bs4 import BeautifulSoup # Constants HTTP_TIMEOUT = 3 HTTP_RETRIES = 1 HTTP_DELAY = 1 class RequestError(Exception): """Custom exception class for request errors.""" def __init__(self, message: str, original_exception: Optional[Exception] = None) -> None: """ Initialize a RequestError instance. Args: message (str): The error message. original_exception (Optional[Exception], optional): The original exception that occurred. Defaults to None. """ super().__init__(message) self.original_exception = original_exception def __str__(self) -> str: """Return a string representation of the exception.""" if self.original_exception: return f"{super().__str__()} Original Exception: {type(self.original_exception).__name__}: {str(self.original_exception)}" else: return super().__str__() class Response: """Class representing an HTTP response.""" def __init__( self, status: int, text: str, is_json: bool = False, content: bytes = b"", headers: Optional[Dict[str, str]] = None, cookies: Optional[Dict[str, str]] = None, redirect_url: Optional[str] = None, response_time: Optional[float] = None, timeout: Optional[float] = None, ): """ Initialize a Response object. Args: status (int): The HTTP status code of the response. text (str): The response content as text. is_json (bool, optional): Indicates if the response content is JSON. Defaults to False. content (bytes, optional): The response content as bytes. Defaults to b"". headers (Optional[Dict[str, str]], optional): The response headers. Defaults to None. cookies (Optional[Dict[str, str]], optional): The cookies set in the response. Defaults to None. redirect_url (Optional[str], optional): The URL if a redirection occurred. Defaults to None. response_time (Optional[float], optional): The time taken to receive the response. Defaults to None. timeout (Optional[float], optional): The request timeout. Defaults to None. """ self.status_code = status self.text = text self.is_json = is_json self.content = content self.headers = headers or {} self.cookies = cookies or {} self.redirect_url = redirect_url self.response_time = response_time self.timeout = timeout self.ok = 200 <= status < 300 def raise_for_status(self): """Raise an error if the response status code is not in the 2xx range.""" if not self.ok: raise RequestError(f"Request failed with status code {self.status_code}") def json(self): """ Return the response content as JSON if it is JSON. Returns: dict or list or None: A Python dictionary or list parsed from JSON if the response content is JSON, otherwise None. """ if self.is_json: return json.loads(self.text) else: return None def get_redirects(self): """ Extracts unique site URLs from HTML elements within the section. Returns: list or None: A list of unique site URLs if found, otherwise None. """ site_find = [] if self.text: soup = BeautifulSoup(self.text, "html.parser") for links in soup.find("head").find_all('link'): if links is not None: parsed_url = urllib.parse.urlparse(links.get('href')) site = parsed_url.scheme + "://" + parsed_url.netloc if site not in site_find: site_find.append(site) if site_find: return site_find else: return None class ManageRequests: """Class for managing HTTP requests.""" def __init__( self, url: str, method: str = 'GET', headers: Optional[Dict[str, str]] = None, timeout: float = HTTP_TIMEOUT, retries: int = HTTP_RETRIES, params: Optional[Dict[str, str]] = None, verify_ssl: bool = True, auth: Optional[tuple] = None, proxy: Optional[str] = None, cookies: Optional[Dict[str, str]] = None, json_data: Optional[Dict[str, Any]] = None, redirection_handling: bool = True, ): """ Initialize a ManageRequests object. Args: url (str): The URL to which the request will be sent. method (str, optional): The HTTP method to be used for the request. Defaults to 'GET'. headers (Optional[Dict[str, str]], optional): The request headers. Defaults to None. timeout (float, optional): The request timeout. Defaults to HTTP_TIMEOUT. retries (int, optional): The number of retries in case of request failure. Defaults to HTTP_RETRIES. params (Optional[Dict[str, str]], optional): The query parameters for the request. Defaults to None. verify_ssl (bool, optional): Indicates whether SSL certificate verification should be performed. Defaults to True. auth (Optional[tuple], optional): Tuple containing the username and password for basic authentication. Defaults to None. proxy (Optional[str], optional): The proxy URL. Defaults to None. cookies (Optional[Dict[str, str]], optional): The cookies to be included in the request. Defaults to None. redirection_handling (bool, optional): Indicates whether redirections should be followed. Defaults to True. """ self.url = url self.method = method self.headers = headers or {} self.timeout = timeout self.retries = retries self.params = params self.verify_ssl = verify_ssl self.auth = auth self.proxy = proxy self.cookies = cookies self.json_data = json_data self.redirection_handling = redirection_handling def add_header(self, key: str, value: str) -> None: """Add a header to the request.""" self.headers[key] = value def send(self) -> Response: """Send the HTTP request.""" start_time = time.time() self.attempt = 0 redirect_url = None while self.attempt < self.retries: try: req = self._build_request() response = self._perform_request(req) return self._process_response(response, start_time, redirect_url) except (urllib.error.URLError, urllib.error.HTTPError) as e: self._handle_error(e) self.attempt += 1 def _build_request(self) -> urllib.request.Request: """Build the urllib Request object.""" headers = self.headers.copy() if self.params: url = self.url + '?' + urllib.parse.urlencode(self.params) else: url = self.url req = urllib.request.Request(url, headers=headers, method=self.method) if self.json_data: req.add_header('Content-Type', 'application/json') req.body = json.dumps(self.json_data).encode('utf-8') else: req = urllib.request.Request(url, headers=headers, method=self.method) if self.auth: req.add_header('Authorization', 'Basic ' + base64.b64encode(f"{self.auth[0]}:{self.auth[1]}".encode()).decode()) if self.cookies: cookie_str = '; '.join([f"{name}={value}" for name, value in self.cookies.items()]) req.add_header('Cookie', cookie_str) if self.headers: for key, value in self.headers.items(): req.add_header(key, value) # Add default user agent if True: there_is_agent = False for key, value in self.headers.items(): if str(key).lower() == 'user-agent': there_is_agent = True if not there_is_agent: default_user_agent = 'Mozilla/5.0' req.add_header('user-agent', default_user_agent) return req def _perform_request(self, req: urllib.request.Request) -> urllib.response.addinfourl: """Perform the HTTP request.""" if self.proxy: proxy_handler = urllib.request.ProxyHandler({'http': self.proxy, 'https': self.proxy}) opener = urllib.request.build_opener(proxy_handler) urllib.request.install_opener(opener) if not self.verify_ssl: ssl_context = ssl.create_default_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE response = urllib.request.urlopen(req, timeout=self.timeout, context=ssl_context) else: response = urllib.request.urlopen(req, timeout=self.timeout) return response def _process_response(self, response: urllib.response.addinfourl, start_time: float, redirect_url: Optional[str]) -> Response: """Process the HTTP response.""" response_data = response.read() content_type = response.headers.get('Content-Type', '').lower() if self.redirection_handling and response.status in (301, 302, 303, 307, 308): location = response.headers.get('Location') logging.info(f"Redirecting to: {location}") redirect_url = location self.url = location return self.send() return self._build_response(response, response_data, start_time, redirect_url, content_type) def _build_response(self, response: urllib.response.addinfourl, response_data: bytes, start_time: float, redirect_url: Optional[str], content_type: str) -> Response: """Build the Response object.""" response_time = time.time() - start_time response_headers = dict(response.headers) response_cookies = {} for cookie in response.headers.get_all('Set-Cookie', []): cookie_parts = cookie.split(';') cookie_name, cookie_value = cookie_parts[0].split('=', 1) response_cookies[cookie_name.strip()] = cookie_value.strip() return Response( status=response.status, text=response_data.decode('latin-1'), is_json=("json" in content_type), content=response_data, headers=response_headers, cookies=response_cookies, redirect_url=redirect_url, response_time=response_time, timeout=self.timeout, ) def _handle_error(self, e: Union[urllib.error.URLError, urllib.error.HTTPError]) -> None: """Handle request error.""" logging.error(f"Request failed for URL '{self.url}': {str(e)}") if self.attempt < self.retries: logging.info(f"Retrying request for URL '{self.url}' (attempt {self.attempt}/{self.retries})") time.sleep(HTTP_DELAY) else: logging.error(f"Maximum retries reached for URL '{self.url}'") raise RequestError(str(e)) class ValidateRequest: """Class for validating request inputs.""" @staticmethod def validate_url(url: str) -> bool: """Validate URL format.""" url_regex = re.compile( r'^(?:http|ftp)s?://' # http:// or https:// r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|' # domain... r'localhost|' # localhost... r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})', re.IGNORECASE) return re.match(url_regex, url) is not None @staticmethod def validate_headers(headers: Dict[str, str]) -> bool: """Validate header values.""" for key, value in headers.items(): if not isinstance(key, str) or not isinstance(value, str): return False return True class ValidateResponse: """Class for validating response data.""" @staticmethod def is_valid_json(data: str) -> bool: """Check if response data is a valid JSON.""" try: json.loads(data) return True except ValueError: return False class SSLHandler: """Class for handling SSL certificates.""" @staticmethod def load_certificate(custom_cert_path: str) -> None: """Load custom SSL certificate.""" ssl_context = ssl.create_default_context(cafile=custom_cert_path) ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE class KwargsRequest(TypedDict, total = False): url: str headers: Optional[Dict[str, str]] = None timeout: float = HTTP_TIMEOUT retries: int = HTTP_RETRIES params: Optional[Dict[str, str]] = None cookies: Optional[Dict[str, str]] = None json_data: Optional[Dict[str, Any]] = None class Request: """Class for making HTTP requests.""" def __init__(self) -> None: pass def get(self, url: str, **kwargs: Unpack[KwargsRequest])-> 'Response': """ Send a GET request. Args: url (str): The URL to which the request will be sent. **kwargs: Additional keyword arguments for the request. Returns: Response: The response object. """ return self._send_request(url, 'GET', **kwargs) def post(self, url: str, **kwargs: Unpack[KwargsRequest]) -> 'Response': """ Send a POST request. Args: url (str): The URL to which the request will be sent. **kwargs: Additional keyword arguments for the request. Returns: Response: The response object. """ return self._send_request(url, 'POST', **kwargs) def _send_request(self, url: str, method: str, **kwargs: Unpack[KwargsRequest]) -> 'Response': """Send an HTTP request.""" if not ValidateRequest.validate_url(url): raise ValueError("Invalid URL format") if 'headers' in kwargs and not ValidateRequest.validate_headers(kwargs['headers']): raise ValueError("Invalid header values") return ManageRequests(url, method, **kwargs).send() # Out requests: Request = Request()