youlama/youtube_handler.py
2025-05-23 10:17:52 +02:00

118 lines
3.5 KiB
Python

import re
import os
import tempfile
from typing import Optional, Tuple
import yt_dlp
from urllib.parse import urlparse, parse_qs
def is_youtube_url(url: str) -> bool:
"""Check if the URL is a valid YouTube URL."""
youtube_regex = r"(https?://)?(www\.)?(youtube|youtu|youtube-nocookie)\.(com|be)/(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})"
return bool(re.match(youtube_regex, url))
def extract_video_id(url: str) -> Optional[str]:
"""Extract video ID from various YouTube URL formats."""
if not is_youtube_url(url):
return None
# Handle youtu.be URLs
if "youtu.be" in url:
return url.split("/")[-1].split("?")[0]
# Handle youtube.com URLs
parsed_url = urlparse(url)
if parsed_url.netloc in ["www.youtube.com", "youtube.com"]:
if parsed_url.path == "/watch":
return parse_qs(parsed_url.query).get("v", [None])[0]
elif parsed_url.path.startswith(("/embed/", "/v/")):
return parsed_url.path.split("/")[2]
return None
def get_video_info(url: str) -> dict:
"""Get video information using yt-dlp."""
ydl_opts = {
"quiet": True,
"no_warnings": True,
"extract_flat": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
try:
return ydl.extract_info(url, download=False)
except Exception as e:
raise Exception(f"Error fetching video info: {str(e)}")
def download_video(url: str) -> Tuple[str, str]:
"""Download video and return the path to the audio file."""
temp_dir = tempfile.mkdtemp()
output_path = os.path.join(temp_dir, "%(id)s.%(ext)s")
ydl_opts = {
"format": "bestaudio/best",
"postprocessors": [
{
"key": "FFmpegExtractAudio",
"preferredcodec": "mp3",
"preferredquality": "192",
}
],
"outtmpl": output_path,
"quiet": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
try:
info = ydl.extract_info(url, download=True)
audio_path = os.path.join(temp_dir, f"{info['id']}.mp3")
return audio_path, info["title"]
except Exception as e:
raise Exception(f"Error downloading video: {str(e)}")
def get_available_subtitles(url: str) -> list:
"""Get available subtitles for the video."""
ydl_opts = {
"writesubtitles": True,
"writeautomaticsub": True,
"subtitleslangs": ["en"],
"skip_download": True,
"quiet": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
try:
info = ydl.extract_info(url, download=False)
return list(info.get("subtitles", {}).keys())
except Exception:
return []
def download_subtitles(url: str, lang: str = "en") -> Optional[str]:
"""Download subtitles for the video."""
temp_dir = tempfile.mkdtemp()
output_path = os.path.join(temp_dir, "%(id)s.%(ext)s")
ydl_opts = {
"writesubtitles": True,
"writeautomaticsub": True,
"subtitleslangs": [lang],
"skip_download": True,
"outtmpl": output_path,
"quiet": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
try:
info = ydl.extract_info(url, download=True)
subtitle_path = os.path.join(temp_dir, f"{info['id']}.{lang}.vtt")
if os.path.exists(subtitle_path):
return subtitle_path
return None
except Exception:
return None