2025-06-03 18:00:31 +00:00
28 changed files with 1115 additions and 1115 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,57 @@
+# Version control
+.git
+.gitignore
+.gitattributes
+
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+.python-version
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+
+# Docker
+Dockerfile
+docker-compose.yml
+.dockerignore
+
+# Project specific
+models/
+*.log
+*.mp3
+*.wav
+*.mp4
+*.avi
+*.mkv
+*.mov
+*.flac
+*.ogg
+*.m4a
+*.aac
+
+# Documentation
+README.md
+LICENSE
+*.md
+docs/
+
+# Test files
+tests/
+test/
+*.test
+*.spec 
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,11 @@
-transcript_cache/*.json
-__pycache__
-.env
-downloads/output.m4a
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+*.pyw
+*.pyz
+*.pywz
+*.pyzw
+*.pyzwz
+config.ini
+.venv
--- a/.gradio/certificate.pem
+++ b/.gradio/certificate.pem
@ -0,0 +1,31 @@
+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----
--- a/38
+++ b/38
@ -0,0 +1,38 @@
+FROM nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04
+
+# Set environment variables
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+
+# Define the path to PyTorch's bundled NVIDIA libraries (adjust if necessary for your specific Python version/setup)
+# This path assumes nvidia-cudnn-cuXX or similar packages install here.
+ENV PYTORCH_NVIDIA_LIBS_DIR /usr/local/lib/python3.10/dist-packages/nvidia/cudnn/lib
+# Prepend PyTorch's NVIDIA library directory to LD_LIBRARY_PATH
+# Also include the standard NVIDIA paths that the base image might set for other CUDA components.
+ENV LD_LIBRARY_PATH=${PYTORCH_NVIDIA_LIBS_DIR}:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
+
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    python3.10 \
+    python3-pip \
+    ffmpeg \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set working directory
+WORKDIR /app
+
+# Copy requirements first to leverage Docker cache
+COPY requirements.txt .
+
+# Install Python dependencies
+RUN pip3 install --no-cache-dir -r requirements.txt
+
+# Copy application code
+COPY . .
+
+# Expose port
+EXPOSE 7860
+
+# Set entrypoint
+ENTRYPOINT ["python3", "app.py"] 
--- a/LICENSE.md
+++ b/LICENSE.md
@ -1,13 +0,0 @@
-  DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 
-                    Version 2, December 2004 
-
- Copyright (C) 2024 TCSenpai <tcsenpai@discus.sh> 
-
- Everyone is permitted to copy and distribute verbatim or modified 
- copies of this license document, and changing it is allowed as long 
- as the name is changed. 
-
-            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 
-   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 
-
-  0. You just DO WHAT THE FUCK YOU WANT TO.
--- a/README.md
+++ b/README.md
@ -1,137 +1,152 @@
-# YouLama by TCSenpai
+# YouLama

-[![justforfunnoreally.dev badge](https://img.shields.io/badge/justforfunnoreally-dev-9ff)](https://justforfunnoreally.dev)
-
-YouLama is a Streamlit-based web application that allows users to generate summaries of YouTube videos using AI-powered language models and optionally Whisper for transcription.
-
- [YouLama by TCSenpai](#youlama-by-tcsenpai)
-  - [Features](#features)
-  - [Installation](#installation)
-  - [Usage](#usage)
-  - [Global Installation](#global-installation)
-  - [Run with the included binary](#run-with-the-included-binary)
-  - [Dependencies](#dependencies)
-  - [Project Structure](#project-structure)
-  - [Contributing](#contributing)
-  - [License](#license)
-  - [Credits](#credits)
-
-![Screenshot](screenshot.png)
+A powerful web application for transcribing and summarizing YouTube videos and local media files using faster-whisper and Ollama.

 ## Features

- Supports multiple YouTube frontends (e.g. YouTube, Invidious, etc.)
- Fetch and cache YouTube video transcripts
- Summarize video content using Ollama AI models
- Display video information (title and channel)
- Customizable Ollama URL and model selection
- Fallback to Whisper for transcription if no transcript is found
- Customizable Whisper URL and model selection
- Optional force Whisper transcription
+- 🎥 YouTube video transcription with subtitle extraction
+- 🎙️ Local audio/video file transcription
+- 🤖 Automatic language detection
+- 📝 Multiple Whisper model options
+- 📚 AI-powered text summarization using Ollama
+- 🎨 Modern web interface with Gradio
+- 🐳 Docker support with CUDA
+- ⚙️ Configurable settings via config.ini
+
+## Requirements
+
+- Docker and Docker Compose
+- NVIDIA GPU with CUDA support
+- NVIDIA Container Toolkit
+- Ollama installed locally (optional, for summarization)

 ## Installation

 1. Clone the repository:
-
-   ```
-   git clone git@github.com:tcsenpai/youlama.git
-   cd youlama
-   ```
-
-2. Install the required dependencies:
-
-   2a. Using pip:
-
-   ```
-   pip install -r requirements.txt
-   ```
-
-   2b. Using conda:
-
-   ```
-   conda env create -f environment.yml
-   ```
-
-   Note: You might need to install `conda` first.
-
-3. Set up environment variables:
-   Create a `.env` file in the root directory and add the following:
-
-   ```
-   YOUTUBE_API_KEY=your_youtube_api_key
-   OLLAMA_MODEL=default_model_name
-   WHISPER_URL=http://localhost:8000/
-   WHISPER_MODEL=Systran/faster-whisper-large-v3
-   PASTEBIN_API_KEY=your_pastebin_api_key
-   ```
-
-   - Note: you can copy the `env.example` file to `.env` and modify the values.
-   - Important: the `WHISPER_URL` should point to the whisper server you want to use. You can leave it as it is if you are not planning on using Whisper.
-   - Important: the `PASTEBIN_API_KEY` is optional, but if you want to use it, you need to get one from [Pastebin](https://pastebin.com/doc_api).
-
-## Usage
-
-1. Run the Streamlit app:
-
-   ```
-   streamlit run src/main.py
-   ```
-
-2. Open your web browser and navigate to the provided local URL (usually `http://localhost:8501`).
-
-3. Enter a YouTube video URL in the input field.
-
-4. (Optional) Customize the Ollama URL and select a different AI model.
-5. (Optional) Customize the Whisper URL and select a different Whisper model.
-
-6. Click the "Summarize" button to generate a summary of the video.
-
-## Global Installation
-
-You can install the application globally on your system by running the following command:
-
-```
-sudo ./install.sh
+```bash
+git clone <repository-url>
+cd youlama
 ```

-This will create a new command `youlama` that you can use to run the application.
+2. Install NVIDIA Container Toolkit (if not already installed):
+```bash
+# Add NVIDIA package repositories
+distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
+curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
+curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list

-## Run with the included binary
+# Install nvidia-docker2 package
+sudo apt-get update
+sudo apt-get install -y nvidia-docker2

-You can also run the application with the included binary:
-
-```
-./youlama
+# Restart the Docker daemon
+sudo systemctl restart docker
 ```

-## Dependencies
+3. Install Ollama locally (optional, for summarization):
+```bash
+curl https://ollama.ai/install.sh | sh
+```

- Streamlit
- Pytube
- Ollama
- YouTube Data API
- Python-dotenv
- pytubefix
- Gradio
+4. Copy the example configuration file:
+```bash
+cp .env.example .env
+```

-## Project Structure
+5. Edit the configuration files:
+- `.env`: Set your environment variables
+- `config.ini`: Configure Whisper, Ollama, and application settings

- `src/main.py`: Main Streamlit application
- `src/ollama_client.py`: Ollama API client for model interaction
- `src/video_info.py`: YouTube API integration for video information
- `src/whisper_module.py`: Whisper API client for transcription
- `src/yt_audiophile.py`: Audio downloader for YouTube videos
- `transcript_cache/`: Directory for caching video transcripts
- `downloads/`: Directory for downloaded audio files, might be empty
+## Running the Application

-## Contributing
+1. Start Ollama locally (if you want to use summarization):
+```bash
+ollama serve
+```

-Contributions are welcome! Please feel free to submit a Pull Request.
+2. Build and start the YouLama container:
+```bash
+docker-compose up --build
+```
+
+3. Open your web browser and navigate to:
+```
+http://localhost:7860
+```
+
+## Configuration
+
+### Environment Variables (.env)
+
+```ini
+# Server configuration
+SERVER_NAME=0.0.0.0
+SERVER_PORT=7860
+SHARE=true
+```
+
+### Application Settings (config.ini)
+
+```ini
+[whisper]
+default_model = base
+device = cuda
+compute_type = float16
+beam_size = 5
+vad_filter = true
+
+[app]
+max_duration = 3600
+server_name = 0.0.0.0
+server_port = 7860
+share = true
+
+[models]
+available_models = tiny,base,small,medium,large-v1,large-v2,large-v3
+
+[languages]
+available_languages = en,es,fr,de,it,pt,nl,ja,ko,zh
+
+[ollama]
+enabled = false
+url = http://host.docker.internal:11434
+default_model = mistral
+summarize_prompt = Please provide a comprehensive yet concise summary of the following text. Focus on the main points, key arguments, and important details while maintaining accuracy and completeness. Here's the text to summarize: 
+```
+
+## Features in Detail
+
+### YouTube Video Processing
+- Supports youtube.com, youtu.be, and invidious URLs
+- Automatically extracts subtitles if available
+- Falls back to transcription if no subtitles found
+- Optional AI-powered summarization with Ollama
+
+### Local File Transcription
+- Supports various audio and video formats
+- Automatic language detection
+- Multiple Whisper model options
+- Optional AI-powered summarization with Ollama
+
+### AI Summarization
+- Uses locally running Ollama for text summarization
+- Configurable model selection
+- Customizable prompt
+- Available for both local files and YouTube videos
+
+## Tips
+
+- For better accuracy, use larger models (medium, large)
+- Processing time increases with model size
+- GPU is recommended for faster processing
+- Maximum audio duration is configurable (default: 60 minutes)
+- YouTube videos will first try to use available subtitles
+- If no subtitles are available, the video will be transcribed
+- Ollama summarization is optional and requires Ollama to be running locally
+- The application runs in a Docker container with CUDA support
+- Models are downloaded and cached in the `models` directory
+- The container connects to the local Ollama instance using host.docker.internal

 ## License

-WTFPL License
-
-## Credits
-
-Icon: "https://www.flaticon.com/free-icons/subtitles" by Freepik - Flaticon
+This project is licensed under the MIT License - see the LICENSE file for details. 
--- a/app.py
+++ b/app.py
@ -0,0 +1,539 @@
+import os
+import gradio as gr
+import torch
+import configparser
+from typing import List, Tuple, Optional
+import youtube_handler as yt
+from ollama_handler import OllamaHandler
+import logging
+from faster_whisper import WhisperModel
+import subprocess
+import sys
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+
+def check_cuda_compatibility():
+    """Check if the current CUDA setup is compatible with faster-whisper."""
+    logger.info("Checking CUDA compatibility...")
+
+    # Check PyTorch CUDA
+    if not torch.cuda.is_available():
+        logger.warning("CUDA is not available in PyTorch")
+        return False
+
+    cuda_version = torch.version.cuda
+    cudnn_version = torch.backends.cudnn.version()
+    device_name = torch.cuda.get_device_name(0)
+
+    logger.info(f"CUDA Version: {cuda_version}")
+    logger.info(f"cuDNN Version: {cudnn_version}")
+    logger.info(f"GPU Device: {device_name}")
+
+    # Check CUDA version
+    try:
+        cuda_major = int(cuda_version.split(".")[0])
+        if cuda_major > 11:
+            logger.warning(
+                f"CUDA {cuda_version} might not be fully compatible with faster-whisper. Recommended: CUDA 11.x"
+            )
+            logger.info(
+                "Consider creating a new environment with CUDA 11.x if you encounter issues"
+            )
+    except Exception as e:
+        logger.error(f"Error parsing CUDA version: {str(e)}")
+
+    return True
+
+
+def load_config() -> configparser.ConfigParser:
+    """Load configuration from config.ini file."""
+    config = configparser.ConfigParser()
+    config_path = os.path.join(os.path.dirname(__file__), "config.ini")
+    config.read(config_path)
+    return config
+
+
+# Load configuration
+config = load_config()
+
+# Whisper configuration
+DEFAULT_MODEL = config["whisper"]["default_model"]
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+COMPUTE_TYPE = "float16" if DEVICE == "cuda" else "float32"
+BEAM_SIZE = config["whisper"].getint("beam_size")
+VAD_FILTER = config["whisper"].getboolean("vad_filter")
+
+# Log device and compute type
+logger.info(f"PyTorch CUDA available: {torch.cuda.is_available()}")
+if torch.cuda.is_available():
+    logger.info(f"CUDA device: {torch.cuda.get_device_name(0)}")
+    logger.info(f"CUDA version: {torch.version.cuda}")
+    logger.info(f"cuDNN version: {torch.backends.cudnn.version()}")
+logger.info(f"Using device: {DEVICE}, compute type: {COMPUTE_TYPE}")
+logger.info(
+    f"Default model: {DEFAULT_MODEL}, beam size: {BEAM_SIZE}, VAD filter: {VAD_FILTER}"
+)
+
+# App configuration
+MAX_DURATION = config["app"].getint("max_duration")
+SERVER_NAME = config["app"]["server_name"]
+SERVER_PORT = config["app"].getint("server_port")
+SHARE = config["app"].getboolean("share")
+
+# Available models and languages
+WHISPER_MODELS = config["models"]["available_models"].split(",")
+AVAILABLE_LANGUAGES = config["languages"]["available_languages"].split(",")
+
+# Initialize Ollama handler
+ollama = OllamaHandler()
+OLLAMA_AVAILABLE = ollama.is_available()
+OLLAMA_MODELS = ollama.get_available_models() if OLLAMA_AVAILABLE else []
+DEFAULT_OLLAMA_MODEL = ollama.get_default_model() if OLLAMA_AVAILABLE else None
+
+
+def load_model(model_name: str):
+    """Load the Whisper model with the specified configuration."""
+    try:
+        logger.info(f"Loading Whisper model: {model_name}")
+        return WhisperModel(
+            model_name,
+            device=DEVICE,
+            compute_type=COMPUTE_TYPE,
+            download_root=os.path.join(os.path.dirname(__file__), "models"),
+        )
+    except Exception as e:
+        logger.error(f"Error loading model with CUDA: {str(e)}")
+        logger.info("Falling back to CPU")
+        return WhisperModel(
+            model_name,
+            device="cpu",
+            compute_type="float32",
+            download_root=os.path.join(os.path.dirname(__file__), "models"),
+        )
+
+
+def transcribe_audio(
+    audio_file: str,
+    model_name: str,
+    language: str = None,
+    summarize: bool = False,
+    ollama_model: str = None,
+) -> tuple[str, str, Optional[str]]:
+    """Transcribe audio using the selected Whisper model."""
+    try:
+        logger.info(f"Starting transcription of {audio_file}")
+        logger.info(
+            f"Model: {model_name}, Language: {language}, Summarize: {summarize}"
+        )
+
+        # Load the model
+        model = load_model(model_name)
+
+        # Transcribe the audio
+        logger.info("Starting audio transcription...")
+        segments, info = model.transcribe(
+            audio_file,
+            language=language if language != "Auto-detect" else None,
+            beam_size=BEAM_SIZE,
+            vad_filter=VAD_FILTER,
+        )
+
+        # Get the full text with timestamps
+        full_text = " ".join([segment.text for segment in segments])
+        logger.info(
+            f"Transcription completed. Text length: {len(full_text)} characters"
+        )
+        logger.info(f"Detected language: {info.language}")
+
+        # Generate summary if requested
+        summary = None
+        if summarize and OLLAMA_AVAILABLE:
+            logger.info(f"Generating summary using Ollama model: {ollama_model}")
+            summary = ollama.summarize(full_text, ollama_model)
+            if summary:
+                logger.info(f"Summary generated. Length: {len(summary)} characters")
+            else:
+                logger.warning("Failed to generate summary")
+
+        return full_text, info.language, summary
+    except Exception as e:
+        logger.error(f"Error during transcription: {str(e)}")
+        return f"Error during transcription: {str(e)}", None, None
+
+
+def process_youtube_url(
+    url: str,
+    model_name: str,
+    language: str = None,
+    summarize: bool = False,
+    ollama_model: str = None,
+) -> Tuple[str, str, str, Optional[str]]:
+    """Process a YouTube URL and return transcription or subtitles."""
+    try:
+        logger.info(f"Processing YouTube URL: {url}")
+        logger.info(
+            f"Model: {model_name}, Language: {language}, Summarize: {summarize}"
+        )
+
+        # First try to get available subtitles
+        logger.info("Checking for available subtitles...")
+        available_subs = yt.get_available_subtitles(url)
+
+        if available_subs:
+            logger.info(f"Found available subtitles: {', '.join(available_subs)}")
+            # Try to download English subtitles first, then fall back to any available
+            subtitle_path = yt.download_subtitles(url, "en")
+            if not subtitle_path:
+                logger.info(
+                    "English subtitles not available, trying first available language"
+                )
+                subtitle_path = yt.download_subtitles(url, available_subs[0])
+
+            if subtitle_path:
+                logger.info(f"Successfully downloaded subtitles to: {subtitle_path}")
+                with open(subtitle_path, "r", encoding="utf-8") as f:
+                    text = f.read()
+                    summary = None
+                    if summarize and OLLAMA_AVAILABLE:
+                        logger.info(
+                            f"Generating summary from subtitles using Ollama model: {ollama_model}"
+                        )
+                        summary = ollama.summarize(text, ollama_model)
+                        if summary:
+                            logger.info(
+                                f"Summary generated. Length: {len(summary)} characters"
+                            )
+                        else:
+                            logger.warning("Failed to generate summary")
+                    return text, "en", "Subtitles", summary
+
+        # If no subtitles available, download and transcribe
+        logger.info("No subtitles available, downloading video for transcription...")
+        audio_path, video_title = yt.download_video(url)
+        logger.info(f"Video downloaded: {video_title}")
+
+        transcription, detected_lang, summary = transcribe_audio(
+            audio_path, model_name, language, summarize, ollama_model
+        )
+
+        # Clean up the temporary audio file
+        try:
+            os.remove(audio_path)
+            logger.info("Cleaned up temporary audio file")
+        except Exception as e:
+            logger.warning(f"Failed to clean up temporary file: {str(e)}")
+
+        return transcription, detected_lang, "Transcription", summary
+
+    except Exception as e:
+        logger.error(f"Error processing YouTube video: {str(e)}")
+        return f"Error processing YouTube video: {str(e)}", None, "Error", None
+
+
+def create_interface():
+    """Create and return the Gradio interface."""
+    with gr.Blocks(theme=gr.themes.Soft()) as app:
+        gr.Markdown("# 🎥 YouLama")
+        gr.Markdown("### AI-powered YouTube video transcription and summarization")
+
+        with gr.Tabs() as tabs:
+            with gr.TabItem("YouTube"):
+                gr.Markdown(
+                    """
+                ### YouTube Video Processing
+                Enter a YouTube URL to transcribe the video or extract available subtitles.
+                - Supports youtube.com, youtu.be, and invidious URLs
+                - Automatically extracts subtitles if available
+                - Falls back to transcription if no subtitles found
+                - Optional AI-powered summarization with Ollama
+                """
+                )
+
+                with gr.Row():
+                    with gr.Column():
+                        # YouTube input components
+                        youtube_url = gr.Textbox(
+                            label="YouTube URL",
+                            placeholder="Enter YouTube URL (youtube.com, youtu.be, or invidious)",
+                        )
+                        yt_model_dropdown = gr.Dropdown(
+                            choices=WHISPER_MODELS,
+                            value=DEFAULT_MODEL,
+                            label="Select Whisper Model",
+                        )
+                        yt_language_dropdown = gr.Dropdown(
+                            choices=["Auto-detect"] + AVAILABLE_LANGUAGES,
+                            value="Auto-detect",
+                            label="Language (optional)",
+                        )
+                        with gr.Group():
+                            yt_summarize_checkbox = gr.Checkbox(
+                                label="Generate AI Summary",
+                                value=False,
+                                interactive=OLLAMA_AVAILABLE,
+                            )
+                            yt_ollama_model_dropdown = gr.Dropdown(
+                                choices=(
+                                    OLLAMA_MODELS
+                                    if OLLAMA_AVAILABLE
+                                    else ["No models available"]
+                                ),
+                                value=(
+                                    DEFAULT_OLLAMA_MODEL if OLLAMA_AVAILABLE else None
+                                ),
+                                label="Ollama Model",
+                                interactive=OLLAMA_AVAILABLE,
+                            )
+
+                        # Add status bar
+                        yt_status = gr.Textbox(
+                            label="Status",
+                            value="Waiting for input...",
+                            interactive=False,
+                            elem_classes=["status-bar"],
+                        )
+
+                        yt_process_btn = gr.Button("Process Video", variant="primary")
+
+                    with gr.Column():
+                        # YouTube output components
+                        yt_output_text = gr.Textbox(
+                            label="Transcription", lines=10, max_lines=20
+                        )
+                        yt_detected_language = gr.Textbox(
+                            label="Detected Language", interactive=False
+                        )
+                        yt_source = gr.Textbox(label="Source", interactive=False)
+
+                # Add summary text box below the main output
+                if OLLAMA_AVAILABLE:
+                    yt_summary_text = gr.Textbox(
+                        label="AI Summary", lines=5, max_lines=10, value=""
+                    )
+
+                # Set up the event handler
+                def process_yt_with_summary(url, model, lang, summarize, ollama_model):
+                    try:
+                        # Update status for each step
+                        status = "Checking URL and fetching video information..."
+                        result = process_youtube_url(
+                            url, model, lang, summarize, ollama_model
+                        )
+
+                        if len(result) == 4:
+                            text, lang, source, summary = result
+                            if source == "Subtitles":
+                                status = "Processing subtitles..."
+                            else:
+                                status = "Transcribing video..."
+
+                            if summarize and summary:
+                                status = "Generating AI summary..."
+
+                            return (
+                                text,
+                                lang,
+                                source,
+                                summary if summary else "",
+                                "Processing complete!",
+                            )
+                        else:
+                            return (
+                                result[0],
+                                result[1],
+                                result[2],
+                                "",
+                                f"Error: {result[0]}",
+                            )
+                    except Exception as e:
+                        logger.error(f"Error in process_yt_with_summary: {str(e)}")
+                        return f"Error: {str(e)}", None, None, "", "Processing failed!"
+
+                yt_process_btn.click(
+                    fn=process_yt_with_summary,
+                    inputs=[
+                        youtube_url,
+                        yt_model_dropdown,
+                        yt_language_dropdown,
+                        yt_summarize_checkbox,
+                        yt_ollama_model_dropdown,
+                    ],
+                    outputs=[
+                        yt_output_text,
+                        yt_detected_language,
+                        yt_source,
+                        yt_summary_text if OLLAMA_AVAILABLE else gr.Textbox(),
+                        yt_status,
+                    ],
+                )
+
+            with gr.TabItem("Local File"):
+                gr.Markdown(
+                    """
+                ### Local File Transcription
+                Upload an audio or video file to transcribe it using Whisper.
+                - Supports various audio and video formats
+                - Automatic language detection
+                - Optional AI-powered summarization with Ollama
+                """
+                )
+
+                with gr.Row():
+                    with gr.Column():
+                        # Input components
+                        audio_input = gr.Audio(
+                            label="Upload Audio/Video", type="filepath", format="mp3"
+                        )
+                        model_dropdown = gr.Dropdown(
+                            choices=WHISPER_MODELS,
+                            value=DEFAULT_MODEL,
+                            label="Select Whisper Model",
+                        )
+                        language_dropdown = gr.Dropdown(
+                            choices=["Auto-detect"] + AVAILABLE_LANGUAGES,
+                            value="Auto-detect",
+                            label="Language (optional)",
+                        )
+                        with gr.Group():
+                            summarize_checkbox = gr.Checkbox(
+                                label="Generate AI Summary",
+                                value=False,
+                                interactive=OLLAMA_AVAILABLE,
+                            )
+                            ollama_model_dropdown = gr.Dropdown(
+                                choices=(
+                                    OLLAMA_MODELS
+                                    if OLLAMA_AVAILABLE
+                                    else ["No models available"]
+                                ),
+                                value=(
+                                    DEFAULT_OLLAMA_MODEL if OLLAMA_AVAILABLE else None
+                                ),
+                                label="Ollama Model",
+                                interactive=OLLAMA_AVAILABLE,
+                            )
+
+                        # Add status bar
+                        file_status = gr.Textbox(
+                            label="Status",
+                            value="Waiting for input...",
+                            interactive=False,
+                            elem_classes=["status-bar"],
+                        )
+
+                        transcribe_btn = gr.Button("Transcribe", variant="primary")
+
+                    with gr.Column():
+                        # Output components
+                        output_text = gr.Textbox(
+                            label="Transcription", lines=10, max_lines=20
+                        )
+                        detected_language = gr.Textbox(
+                            label="Detected Language", interactive=False
+                        )
+                        if OLLAMA_AVAILABLE:
+                            summary_text = gr.Textbox(
+                                label="AI Summary", lines=5, max_lines=10, value=""
+                            )
+
+                # Set up the event handler
+                def transcribe_with_summary(
+                    audio, model, lang, summarize, ollama_model
+                ):
+                    try:
+                        if not audio:
+                            return "", None, "", "Please upload an audio file"
+
+                        # Update status for each step
+                        status = "Loading model..."
+                        model = load_model(model)
+
+                        status = "Transcribing audio..."
+                        segments, info = model.transcribe(
+                            audio,
+                            language=lang if lang != "Auto-detect" else None,
+                            beam_size=BEAM_SIZE,
+                            vad_filter=VAD_FILTER,
+                        )
+
+                        # Get the full text with timestamps
+                        full_text = " ".join([segment.text for segment in segments])
+
+                        if summarize and OLLAMA_AVAILABLE:
+                            status = "Generating AI summary..."
+                            summary = ollama.summarize(full_text, ollama_model)
+                            return (
+                                full_text,
+                                info.language,
+                                summary if summary else "",
+                                "Processing complete!",
+                            )
+                        else:
+                            return (
+                                full_text,
+                                info.language,
+                                "",
+                                "Processing complete!",
+                            )
+
+                    except Exception as e:
+                        logger.error(f"Error in transcribe_with_summary: {str(e)}")
+                        return f"Error: {str(e)}", None, "", "Processing failed!"
+
+                transcribe_btn.click(
+                    fn=transcribe_with_summary,
+                    inputs=[
+                        audio_input,
+                        model_dropdown,
+                        language_dropdown,
+                        summarize_checkbox,
+                        ollama_model_dropdown,
+                    ],
+                    outputs=[
+                        output_text,
+                        detected_language,
+                        summary_text if OLLAMA_AVAILABLE else gr.Textbox(),
+                        file_status,
+                    ],
+                )
+
+        # Add some helpful information
+        gr.Markdown(
+            f"""
+        ### Tips:
+        - For better accuracy, use larger models (medium, large)
+        - Processing time increases with model size
+        - GPU is recommended for faster processing
+        - Maximum audio duration is {MAX_DURATION // 60} minutes
+        - YouTube videos will first try to use available subtitles
+        - If no subtitles are available, the video will be transcribed
+        {"- AI-powered summarization is available for both local files and YouTube videos" if OLLAMA_AVAILABLE else "- AI-powered summarization is currently unavailable"}
+        
+        ### Status:
+        - Device: {DEVICE}
+        - Compute Type: {COMPUTE_TYPE}
+        - Ollama Status: {"Available" if OLLAMA_AVAILABLE else "Not Available"}
+        {"- Available Ollama Models: " + ", ".join(OLLAMA_MODELS) if OLLAMA_AVAILABLE else ""}
+        """
+        )
+
+    return app
+
+
+if __name__ == "__main__":
+    logger.info("Starting Whisper Transcription Web App")
+
+    # Check CUDA compatibility before starting
+    if not check_cuda_compatibility():
+        logger.warning(
+            "CUDA compatibility check failed. The application might not work as expected."
+        )
+
+    logger.info(f"Server will be available at http://{SERVER_NAME}:{SERVER_PORT}")
+    app = create_interface()
+    app.launch(share=SHARE, server_name=SERVER_NAME, server_port=SERVER_PORT)
--- a/config.ini.example
+++ b/config.ini.example
@ -0,0 +1,41 @@
+[whisper]
+default_model = base
+device = cuda
+compute_type = float16
+beam_size = 5
+vad_filter = true
+
+[app]
+max_duration = 3600
+server_name = 0.0.0.0
+server_port = 7860
+share = true
+
+[models]
+available_models = tiny,base,small,medium,large-v1,large-v2,large-v3
+
+[languages]
+available_languages = en,es,fr,de,it,pt,nl,ja,ko,zh
+
+[ollama]
+enabled = false
+url = http://host.docker.internal:11434
+default_model = mistral
+summarize_prompt = Your mission is to create a **detailed and comprehensive summary**.
+    
+    Before you dive into summarizing, a quick heads-up on the input:
+    * If the text looks like a subtitle file (you know the drill: timestamps, short, disconnected lines), first mentally stitch it together into a flowing, continuous narrative. Then, summarize *that* coherent version.
+    
+    Now, for the summary itself, here's what I'm looking for:
+    1.  **Focus on Comprehensive Coverage:** As you generate a more detailed summary, ensure you thoroughly cover the main ideas, key arguments, significant supporting details, important examples or explanations offered in the text, and the overall conclusions or takeaways. Don't just skim the surface.
+    2.  **Depth and Desired Length (This is Crucial!):**
+        * **Target Range:** Produce a summary that is approximately **10 percent to 25 percent of the original text's length**. For example, if the original text is 1000 words, aim for a summary in the 100-250 word range. If it's 100 lines, aim for 10-25 lines. Use your best judgment to hit this target.
+        * **Information Density:** The goal here is not just arbitrary length, but to fill that length with **all genuinely significant information**. Prioritize retaining details that contribute to a deeper understanding of the subject. It's better to include a supporting detail that seems relevant than to omit it and risk losing nuance.
+        * **Beyond a Basic Abstract:** This should be much more than a high-level overview. Think of it as creating a condensed version of the text that preserves a good deal of its informative richness and narrative flow. The emphasis is on **thoroughness and completeness of key information** rather than extreme brevity.
+    3.  **Accuracy is King:** What you write needs to be a faithful representation of the source material. No making things up, and no injecting your own opinions unless they're explicitly in the text.
+    4.  **Clarity and Cohesion:** Even though it's longer, the summary should still be well-organized, clear, and easy to read.
+    
+    * "Present the summary as a series of well-developed paragraphs."
+    * "Give me a detailed summary of approximately [calculate 10-25 percent of expected input length] words."
+    * "The summary should be extensive, aiming for about 15 percent of the original content's length."
+    
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,25 @@
+version: '3.8'
+
+services:
+  youlama:
+    build: .
+    ports:
+      - "7860:7860"
+    volumes:
+      - .:/app
+      - ./models:/app/models
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+      - OLLAMA_HOST=host.docker.internal
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+
+volumes:
+  ollama_data: 
--- a/downloads/placeholder
+++ b/downloads/placeholder
--- a/env.example
+++ b/env.example
@ -1,7 +0,0 @@
-OLLAMA_URL=http://localhost:11434
-OLLAMA_MODEL=llama3.1:8b
-YOUTUBE_API_KEY=your_youtube_api_key
-WHISPER_URL=http://localhost:8000/
-WHISPER_MODEL=Systran/faster-whisper-large-v3
-PASTEBIN_API_KEY=your_pastebin_api_key
-USE_PO_TOKEN=true 
--- a/environment.yml
+++ b/environment.yml
@ -1,104 +0,0 @@
-name: youlama
-channels:
-  - conda-forge
-dependencies:
-  - _libgcc_mutex=0.1=conda_forge
-  - _openmp_mutex=4.5=2_gnu
-  - bzip2=1.0.8=h4bc722e_7
-  - ca-certificates=2024.12.14=hbcca054_0
-  - ld_impl_linux-64=2.43=h712a8e2_2
-  - libffi=3.4.2=h7f98852_5
-  - libgcc=14.2.0=h77fa898_1
-  - libgcc-ng=14.2.0=h69a702a_1
-  - libgomp=14.2.0=h77fa898_1
-  - liblzma=5.6.3=hb9d3cd8_1
-  - libnsl=2.0.1=hd590300_0
-  - libsqlite=3.47.2=hee588c1_0
-  - libuuid=2.38.1=h0b41bf4_0
-  - libxcrypt=4.4.36=hd590300_1
-  - libzlib=1.3.1=hb9d3cd8_2
-  - ncurses=6.5=he02047a_1
-  - openssl=3.4.0=hb9d3cd8_0
-  - pip=24.3.1=pyh8b19718_0
-  - python=3.10.16=he725a3c_1_cpython
-  - readline=8.2=h8228510_1
-  - setuptools=75.6.0=pyhff2d567_1
-  - tk=8.6.13=noxft_h4845f30_101
-  - wheel=0.45.1=pyhd8ed1ab_1
-  - pip:
-      - altair==5.5.0
-      - anyio==4.7.0
-      - attrs==24.2.0
-      - blinker==1.9.0
-      - cachetools==5.5.0
-      - certifi==2024.12.14
-      - charset-normalizer==3.4.0
-      - click==8.1.7
-      - exceptiongroup==1.2.2
-      - filelock==3.16.1
-      - fsspec==2024.10.0
-      - gitdb==4.0.11
-      - gitpython==3.1.43
-      - google-api-core==2.24.0
-      - google-api-python-client==2.101.0
-      - google-auth==2.37.0
-      - google-auth-httplib2==0.2.0
-      - googleapis-common-protos==1.66.0
-      - gradio-client==1.5.2
-      - h11==0.14.0
-      - httpcore==1.0.7
-      - httplib2==0.22.0
-      - httpx==0.28.1
-      - huggingface-hub==0.26.5
-      - idna==3.10
-      - importlib-metadata==7.2.1
-      - jinja2==3.1.4
-      - jsonschema==4.23.0
-      - jsonschema-specifications==2024.10.1
-      - markdown-it-py==3.0.0
-      - markupsafe==3.0.2
-      - mdurl==0.1.2
-      - narwhals==1.18.3
-      - numpy==1.26.4
-      - packaging==23.2
-      - pandas==2.2.3
-      - pillow==10.4.0
-      - proto-plus==1.25.0
-      - protobuf==4.25.5
-      - pyarrow==18.1.0
-      - pyasn1==0.6.1
-      - pyasn1-modules==0.4.1
-      - pydeck==0.9.1
-      - pydub==0.25.1
-      - pygments==2.18.0
-      - pyparsing==3.2.0
-      - python-dateutil==2.9.0.post0
-      - python-dotenv==1.0.1
-      - pytube==15.0.0
-      - pytubefix==8.8.1
-      - pytz==2024.2
-      - pyyaml==6.0.2
-      - referencing==0.35.1
-      - requests==2.31.0
-      - rich==13.9.4
-      - rpds-py==0.22.3
-      - rsa==4.9
-      - six==1.17.0
-      - smmap==5.0.1
-      - sniffio==1.3.1
-      - streamlit==1.31.1
-      - tenacity==8.5.0
-      - toml==0.10.2
-      - tornado==6.4.2
-      - tqdm==4.67.1
-      - typing-extensions==4.12.2
-      - tzdata==2024.2
-      - tzlocal==5.2
-      - uritemplate==4.1.1
-      - urllib3==2.2.3
-      - validators==0.34.0
-      - watchdog==6.0.0
-      - websockets==14.1
-      - youtube-transcript-api==0.6.2
-      - yt-dlp==2024.12.13
-      - zipp==3.21.0
--- a/firefox_extension/placeholder
+++ b/firefox_extension/placeholder
@ -1 +0,0 @@
-I am still working on this.
--- a/install.sh
+++ b/install.sh
@ -1,25 +0,0 @@
-#! /bin/bash
-
-# Ensure the script is run with sudo
-if [ "$EUID" -ne 0 ]; then
-    echo "Please run this script with sudo"
-    exit 1
-fi
-
-# Install dependencies
-pip install -r requirements.txt || exit 1
-
-# Get the directory of the script
-DIR=$(pwd)
-
-# Create a proper executable
-CURRENT_DIR=$(pwd)
-CMD="""
-#!/bin/bash
-
-cd $CURRENT_DIR
-streamlit run src/main.py
-"""
-
-echo "$CMD" > /usr/local/bin/youlama
-chmod +x /usr/local/bin/youlama
--- a/ollama_handler.py
+++ b/ollama_handler.py
@ -0,0 +1,112 @@
+import os
+import configparser
+import logging
+from typing import List, Optional
+from ollama import Client
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+
+class OllamaHandler:
+    def __init__(self):
+        """Initialize Ollama handler with configuration."""
+        self.config = self._load_config()
+        self.endpoint = self.config["ollama"]["url"]
+        self.default_model = self.config["ollama"]["default_model"]
+        self.summarize_prompt = self.config["ollama"]["summarize_prompt"]
+        self.client = Client(host=self.endpoint)
+        self.available = self._check_availability()
+        logger.info(f"Initialized Ollama handler with endpoint: {self.endpoint}")
+        logger.info(f"Default model: {self.default_model}")
+        logger.info(f"Ollama available: {self.available}")
+
+    def _load_config(self) -> configparser.ConfigParser:
+        """Load configuration from config.ini file."""
+        config = configparser.ConfigParser()
+        config_path = os.path.join(os.path.dirname(__file__), "config.ini")
+        config.read(config_path)
+        return config
+
+    def _check_availability(self) -> bool:
+        """Check if Ollama server is available."""
+        try:
+            self.client.list()
+            logger.info("Ollama server is available")
+            return True
+        except Exception as e:
+            logger.warning(f"Ollama server is not available: {str(e)}")
+            return False
+
+    def is_available(self) -> bool:
+        """Return whether Ollama is available."""
+        return self.available
+
+    def get_available_models(self) -> List[str]:
+        """Get list of available Ollama models."""
+        try:
+            models = self.client.list()
+            # The response structure is different, models are directly in the response
+            model_names = [model["model"] for model in models["models"]]
+            logger.info(f"Found {len(model_names)} available models")
+            return model_names
+        except Exception as e:
+            logger.error(f"Error getting available models: {str(e)}")
+            return []
+
+    def get_default_model(self) -> str:
+        """Get the default model, falling back to first available if configured model not found."""
+        if not self.available:
+            return None
+
+        available_models = self.get_available_models()
+        if not available_models:
+            return None
+
+        if self.default_model in available_models:
+            logger.info(f"Using configured default model: {self.default_model}")
+            return self.default_model
+        else:
+            logger.warning(
+                f"Configured model '{self.default_model}' not found, using first available model: {available_models[0]}"
+            )
+            return available_models[0]
+
+    def summarize(self, text: str, model: str = None) -> Optional[str]:
+        """Summarize text using Ollama."""
+        if not self.available:
+            logger.warning("Cannot summarize: Ollama is not available")
+            return None
+
+        if not text:
+            logger.warning("Cannot summarize: Empty text provided")
+            return None
+
+        model = model or self.default_model
+        if not model:
+            logger.warning("Cannot summarize: No model specified")
+            return None
+
+        try:
+            logger.info(f"Generating summary using model: {model}")
+            logger.info(f"Input text length: {len(text)} characters")
+
+            # Generate the summary using the prompt from config
+            response = self.client.chat(
+                model=model,
+                messages=[
+                    {"role": "system", "content": self.summarize_prompt},
+                    {"role": "user", "content": text},
+                ],
+            )
+
+            summary = response["message"]["content"]
+            logger.info(f"Summary generated. Length: {len(summary)} characters")
+            return summary
+
+        except Exception as e:
+            logger.error(f"Error generating summary: {str(e)}")
+            return None
--- a/requirements.txt
+++ b/requirements.txt
@ -1,10 +1,14 @@
-streamlit==1.31.1
-python-dotenv==1.0.1
-youtube-transcript-api==0.6.2
-requests==2.31.0
-google-api-python-client==2.101.0
-yt-dlp
-pydub
-gradio-client
-pytube
-pytubefix
+gradio>=4.0.0
+# Choose one of these whisper implementations:
+faster-whisper>=0.9.0
+torch>=2.0.0
+torchvision>=0.15.0
+torchaudio>=2.0.0
+yt-dlp>=2023.12.30
+python-dotenv>=1.0.0
+requests>=2.31.0
+ollama>=0.1.0
+# WhisperX dependencies
+ffmpeg-python>=0.2.0
+pyannote.audio>=3.1.1
+configparser>=6.0.0
--- a/screenshot.png
+++ b/screenshot.png
--- a/src/assets/style.css
+++ b/src/assets/style.css
@ -1,182 +0,0 @@
-/* Base theme */
-:root {
-  --primary-color: #00b4d8;
-  --bg-color: #1b1b1b;
-  --card-bg: #2d2d2d;
-  --text-color: #f0f0f0;
-  --border-color: #404040;
-  --hover-color: #90e0ef;
-}
-
-/* Main container */
-.stApp {
-  background-color: var(--bg-color);
-  color: var(--text-color);
-}
-
-/* Responsive container */
-.stApp > div:nth-child(2) {
-  padding: 2rem !important;
-  max-width: 1200px;
-  margin: 0 auto;
-}
-
-@media (min-width: 768px) {
-  .stApp > div:nth-child(2) {
-    padding: 2rem 5rem !important;
-  }
-}
-
-/* Headers */
-h1,
-h2,
-h3,
-h4,
-h5,
-h6 {
-  color: white !important;
-  font-weight: 600 !important;
-  margin-bottom: 1rem !important;
-}
-
-/* Input fields */
-.stTextInput input,
-.stSelectbox select {
-  background-color: var(--card-bg) !important;
-  color: var(--text-color) !important;
-  border: 1px solid var(--border-color) !important;
-  border-radius: 8px;
-  padding: 0.75rem 1rem;
-  font-size: clamp(14px, 2vw, 16px);
-  transition: all 0.3s;
-  width: 100% !important;
-}
-
-.stTextInput input:focus,
-.stSelectbox select:focus {
-  border-color: var(--primary-color) !important;
-  box-shadow: 0 0 0 2px rgba(114, 137, 218, 0.2);
-}
-
-/* Buttons */
-.stButton button {
-  background: linear-gradient(45deg, var(--primary-color), #8ea1e1) !important;
-  color: white !important;
-  border: none !important;
-  border-radius: 8px !important;
-  padding: 0.75rem 1.5rem !important;
-  font-weight: 600 !important;
-  width: 100% !important;
-  transition: all 0.3s !important;
-  font-size: clamp(14px, 2vw, 16px);
-}
-
-.stButton button:hover {
-  transform: translateY(-2px);
-  box-shadow: 0 4px 12px rgba(114, 137, 218, 0.3);
-}
-
-/* Cards */
-.card {
-  background-color: var(--card-bg);
-  border-radius: 12px;
-  padding: clamp(1rem, 3vw, 1.5rem);
-  border: 1px solid var(--border-color);
-  margin-bottom: 1rem;
-  transition: all 0.3s;
-}
-
-.card:hover {
-  border-color: var(--primary-color);
-  box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
-}
-
-/* Expander */
-.streamlit-expanderHeader {
-  background-color: var(--card-bg) !important;
-  color: var(--text-color) !important;
-  border: 1px solid var(--border-color) !important;
-  border-radius: 8px !important;
-  padding: 1rem !important;
-  font-size: clamp(14px, 2vw, 16px);
-}
-
-.streamlit-expanderContent {
-  border: none !important;
-  padding: 1rem 0 0 0 !important;
-}
-
-/* Status messages */
-.stSuccess,
-.stInfo,
-.stWarning,
-.stError {
-  background-color: var(--card-bg) !important;
-  color: var(--text-color) !important;
-  border: 1px solid var(--border-color) !important;
-  border-radius: 8px !important;
-  padding: 1rem !important;
-  font-size: clamp(14px, 2vw, 16px);
-}
-
-/* Hide Streamlit branding */
-#MainMenu {
-  visibility: hidden;
-}
-footer {
-  visibility: hidden;
-}
-
-/* Column spacing */
-[data-testid="column"] {
-  padding: 0.5rem !important;
-}
-
-/* Checkbox and radio */
-.stCheckbox,
-.stRadio {
-  font-size: clamp(14px, 2vw, 16px);
-}
-
-/* Mobile optimizations */
-@media (max-width: 768px) {
-  .stButton button {
-    padding: 0.5rem 1rem !important;
-  }
-
-  [data-testid="column"] {
-    padding: 0.25rem !important;
-  }
-
-  .card {
-    padding: 1rem;
-  }
-}
-
-/* Add these styles for sections */
-.stTextInput,
-.stSelectbox,
-.stButton {
-  background-color: var(--card-bg);
-  border-radius: 8px;
-  padding: 1rem;
-  margin-bottom: 1rem;
-}
-
-.streamlit-expanderHeader {
-  background-color: var(--card-bg) !important;
-  border-radius: 8px !important;
-  margin-bottom: 1rem;
-}
-
-.streamlit-expanderContent {
-  background-color: var(--card-bg);
-  border-radius: 8px;
-  padding: 1rem !important;
-  margin-top: 0.5rem;
-}
-
-/* Add spacing between sections */
-.stMarkdown {
-  margin-bottom: 1.5rem;
-}
--- a/src/assets/subtitles.png
+++ b/src/assets/subtitles.png
--- a/src/main.py
+++ b/src/main.py
@ -1,448 +0,0 @@
-import os
-import json
-import streamlit as st
-from dotenv import load_dotenv
-from youtube_transcript_api import YouTubeTranscriptApi
-from ollama_client import OllamaClient
-from video_info import get_video_info
-from yt_audiophile import download_audio, get_po_token_setting
-from whisper_module import transcribe
-from pastebin_client import create_paste
-from pathlib import Path
-
-# Load environment variables
-load_dotenv()
-
-# Set page config first, before any other st commands
-st.set_page_config(
-    page_title="YouTube Video Companion by TCSenpai",
-    page_icon="src/assets/subtitles.png",
-    layout="wide",
-)
-
-
-def load_css():
-    css_file = Path(__file__).parent / "assets" / "style.css"
-    with open(css_file) as f:
-        st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
-
-
-def get_ollama_models(ollama_url):
-    ollama_client = OllamaClient(ollama_url, "")
-    models = ollama_client.get_models()
-    return models
-
-
-def main():
-    # Load CSS
-    load_css()
-
-    # st.write("###### YouTube Video Companion")
-
-    # Ollama Settings section
-    # st.subheader("🎯 Ollama Settings")
-
-    default_ollama_url = os.getenv("OLLAMA_URL")
-    ollama_url = st.text_input(
-        "Ollama URL",
-        value=default_ollama_url,
-        placeholder="Enter Ollama URL",
-    )
-    if not ollama_url:
-        ollama_url = default_ollama_url
-
-    available_models = get_ollama_models(ollama_url)
-    default_model = os.getenv("OLLAMA_MODEL")
-    if default_model not in available_models:
-        available_models.append(default_model)
-
-    selected_model = st.selectbox(
-        "Model",
-        options=available_models,
-        index=(
-            available_models.index(default_model)
-            if default_model in available_models
-            else 0
-        ),
-    )
-
-    # Video URL and buttons section
-    video_url = st.text_input(
-        "🎥 Video URL",
-        placeholder="https://www.youtube.com/watch?v=...",
-    )
-
-    col1, col2 = st.columns(2)
-    with col1:
-        summarize_button = st.button("🚀 Summarize", use_container_width=True)
-    with col2:
-        read_button = st.button("📖 Read", use_container_width=True)
-
-    # Advanced settings section
-    with st.expander("⚙️ Advanced Settings", expanded=False):
-        col1, col2 = st.columns(2)
-        with col1:
-            fallback_to_whisper = st.checkbox(
-                "Fallback to Whisper",
-                value=True,
-                help="If no transcript is available, try to generate one using Whisper",
-            )
-            force_whisper = st.checkbox(
-                "Force Whisper",
-                value=False,
-                help="Always use Whisper for transcription",
-            )
-        with col2:
-            use_po_token = st.checkbox(
-                "Use PO Token",
-                value=get_po_token_setting(),
-                help="Use PO token for YouTube authentication (helps bypass restrictions)",
-            )
-
-    # Initialize session state for messages if not exists
-    if "messages" not in st.session_state:
-        st.session_state.messages = []
-
-    # Initialize session state for rephrased transcript if not exists
-    if "rephrased_transcript" not in st.session_state:
-        st.session_state.rephrased_transcript = None
-
-    # Create a single header container
-    header = st.container()
-
-    def show_warning(message):
-        update_header("⚠️ " + message)
-
-    def show_error(message):
-        update_header("🚫 " + message)
-
-    def show_info(message):
-        update_header("💡 " + message)
-
-    def update_header(message):
-        with header:
-            st.markdown(
-                f"""
-                <div class='fixed-header'>
-                    <div class='header-message'>{message}</div>
-                    <div class='header-title'>YouLama - A YouTube Video Companion</div>
-                    <style>
-                        div.fixed-header {{
-                            position: fixed;
-                            top: 2.875rem;
-                            left: 0;
-                            right: 0;
-                            z-index: 999;
-                            padding: 10px;
-                            margin: 0 1rem;
-                            border-radius: 0.5rem;
-                            border: 1px solid rgba(128, 128, 128, 0.2);
-                            height: 45px !important;
-                            background-color: rgba(40, 40, 40, 0.95);
-                            backdrop-filter: blur(5px);
-                            box-shadow: 0 4px 6px rgb(0, 0, 0, 0.1);
-                            transition: all 0.3s ease;
-                            display: flex;
-                            align-items: center;
-                            justify-content: space-between;
-                        }}
-                        .header-message {{
-                            flex: 1;
-                        }}
-                        .header-title {{
-                            color: rgba(250, 250, 250, 0.8);
-                            font-size: 0.9em;
-                            margin-left: 20px;
-                        }}
-                    </style>
-                </div>
-                """,
-                unsafe_allow_html=True,
-            )
-
-    # Initialize the header with a ready message
-    update_header("✅ Ready to summarize!")
-
-    # Add spacing after the fixed header
-    # st.markdown("<div style='margin-top: 120px;'></div>", unsafe_allow_html=True)
-
-    def get_transcript(video_id):
-        cache_dir = "transcript_cache"
-        cache_file = os.path.join(cache_dir, f"{video_id}.json")
-
-        # Create cache directory if it doesn't exist
-        os.makedirs(cache_dir, exist_ok=True)
-
-        # Check if transcript is cached
-        if os.path.exists(cache_file):
-            with open(cache_file, "r") as f:
-                return json.load(f)["transcript"]
-
-        try:
-            transcript = YouTubeTranscriptApi.get_transcript(video_id)
-            full_transcript = " ".join([entry["text"] for entry in transcript])
-
-            # Cache the transcript
-            with open(cache_file, "w") as f:
-                json.dump({"transcript": full_transcript}, f)
-
-            return full_transcript
-        except Exception as e:
-            print(f"Error fetching transcript: {e}")
-            return None
-
-    def summarize_video(
-        video_url,
-        model,
-        ollama_url,
-        fallback_to_whisper=True,
-        force_whisper=False,
-        use_po_token=None,
-    ):
-        video_id = None
-        # Get the video id from the url if it's a valid youtube or invidious or any other url that contains a video id
-        if "v=" in video_url:
-            video_id = video_url.split("v=")[-1]
-        # Support short urls as well
-        elif "youtu.be/" in video_url:
-            video_id = video_url.split("youtu.be/")[-1]
-        # Also cut out any part of the url after the video id
-        video_id = video_id.split("&")[0]
-        st.write(f"Video ID: {video_id}")
-
-        with st.spinner("Fetching transcript..."):
-            transcript = get_transcript(video_id)
-        show_info("Summarizer fetched successfully!")
-
-        # Forcing whisper if specified
-        if force_whisper:
-            show_warning("Forcing whisper...")
-            fallback_to_whisper = True
-            transcript = None
-
-        if not transcript:
-            print("No transcript found, trying to download audio...")
-            if not fallback_to_whisper:
-                print("Fallback to whisper is disabled")
-                return (
-                    "Unable to fetch transcript (and fallback to whisper is disabled)"
-                )
-            if not force_whisper:
-                show_warning("Unable to fetch transcript. Trying to download audio...")
-            try:
-                print("Downloading audio...")
-                download_audio(video_url, use_po_token=use_po_token)
-                show_info("Audio downloaded successfully!")
-                show_warning("Starting transcription...it might take a while...")
-                transcript = transcribe("downloads/output.m4a")
-                show_info("Transcription completed successfully!")
-                os.remove("downloads/output.m4a")
-            except Exception as e:
-                print(f"Error downloading audio or transcribing: {e}")
-                show_error(f"Error downloading audio or transcribing: {e}")
-                if os.path.exists("downloads/output.m4a"):
-                    os.remove("downloads/output.m4a")
-                return "Unable to fetch transcript."
-        print(f"Transcript: {transcript}")
-        ollama_client = OllamaClient(ollama_url, model)
-        show_info(f"Ollama client created with model: {model}")
-
-        show_warning("Starting summary generation, this might take a while...")
-        with st.spinner("Generating summary..."):
-            prompt = f"Summarize the following YouTube video transcript in a concise yet detailed manner:\n\n```{transcript}```\n\nSummary with introduction and conclusion formatted in markdown:"
-            summary = ollama_client.generate(prompt)
-        print(summary)
-        show_info("Summary generated successfully (scroll down to see the summary)!")
-
-        with st.spinner("Fetching video info..."):
-            video_info = get_video_info(video_id)
-        st.success("Video info fetched successfully!")
-
-        return {
-            "title": video_info["title"],
-            "channel": video_info["channel"],
-            "transcript": transcript,
-            "summary": summary,
-        }
-
-    def fix_transcript(
-        video_url,
-        model,
-        ollama_url,
-        fallback_to_whisper=True,
-        force_whisper=False,
-        use_po_token=None,
-    ):
-        video_id = None
-        # Get the video id from the url if it's a valid youtube or invidious or any other url that contains a video id
-        if "v=" in video_url:
-            video_id = video_url.split("v=")[-1]
-        # Support short urls as well
-        elif "youtu.be/" in video_url:
-            video_id = video_url.split("youtu.be/")[-1]
-        # Also cut out any part of the url after the video id
-        video_id = video_id.split("&")[0]
-        st.write(f"Video ID: {video_id}")
-
-        with st.spinner("Fetching transcript..."):
-            transcript = get_transcript(video_id)
-        show_info("Transcript fetched successfully!")
-
-        # Forcing whisper if specified
-        if force_whisper:
-            show_warning("Forcing whisper...")
-            fallback_to_whisper = True
-            transcript = None
-
-        if not transcript:
-            print("No transcript found, trying to download audio...")
-            if not fallback_to_whisper:
-                print("Fallback to whisper is disabled")
-                return (
-                    "Unable to fetch transcript (and fallback to whisper is disabled)"
-                )
-            if not force_whisper:
-                show_warning("Unable to fetch transcript. Trying to download audio...")
-            try:
-                print("Downloading audio...")
-                download_audio(video_url, use_po_token=use_po_token)
-                show_info("Audio downloaded successfully!")
-                show_warning("Starting transcription...it might take a while...")
-                transcript = transcribe("downloads/output.m4a")
-                show_info("Transcription completed successfully!")
-                os.remove("downloads/output.m4a")
-            except Exception as e:
-                print(f"Error downloading audio or transcribing: {e}")
-                show_error(f"Error downloading audio or transcribing: {e}")
-                if os.path.exists("downloads/output.m4a"):
-                    os.remove("downloads/output.m4a")
-                return "Unable to fetch transcript."
-
-        ollama_client = OllamaClient(ollama_url, model)
-        show_info(f"Ollama client created with model: {model}")
-
-        show_warning("Starting transcript enhancement...")
-        with st.spinner("Enhancing transcript..."):
-            prompt = f"""Fix the grammar and punctuation of the following transcript, maintaining the exact same content and meaning. 
-            Only correct grammatical errors, add proper punctuation, and fix sentence structure where needed. 
-            Do not rephrase or change the content:\n\n{transcript}"""
-            enhanced = ollama_client.generate(prompt)
-        show_info(
-            "Transcript enhanced successfully (scroll down to see the enhanced transcript)!"
-        )
-
-        with st.spinner("Fetching video info..."):
-            video_info = get_video_info(video_id)
-        st.success("Video info fetched successfully!")
-
-        return {
-            "title": video_info["title"],
-            "channel": video_info["channel"],
-            "transcript": transcript,
-            "enhanced": enhanced,
-        }
-
-    if (summarize_button or read_button) and video_url:
-        if read_button:
-            # Enhance transcript (now called read)
-            result = fix_transcript(
-                video_url,
-                selected_model,
-                ollama_url,
-                fallback_to_whisper=fallback_to_whisper,
-                force_whisper=force_whisper,
-                use_po_token=use_po_token,
-            )
-
-            # Display results
-            st.subheader("📺 Video Information")
-            info_col1, info_col2 = st.columns(2)
-            with info_col1:
-                st.write(f"**Title:** {result['title']}")
-            with info_col2:
-                st.write(f"**Channel:** {result['channel']}")
-
-            st.subheader("📝 Enhanced Transcript")
-            st.markdown(result["enhanced"])
-
-            # Original transcript in expander
-            with st.expander("📝 Original Transcript", expanded=False):
-                st.text_area(
-                    "Raw Transcript",
-                    result["transcript"],
-                    height=200,
-                    disabled=True,
-                )
-
-        elif summarize_button:
-            # Continue with existing summarize functionality
-            summary = summarize_video(
-                video_url,
-                selected_model,
-                ollama_url,
-                fallback_to_whisper=fallback_to_whisper,
-                force_whisper=force_whisper,
-                use_po_token=use_po_token,
-            )
-
-            # Video Information
-            st.subheader("📺 Video Information")
-            info_col1, info_col2 = st.columns(2)
-            with info_col1:
-                st.write(f"**Title:** {summary['title']}")
-            with info_col2:
-                st.write(f"**Channel:** {summary['channel']}")
-
-            # Transcript Section
-            with st.expander("📝 Original Transcript", expanded=False):
-                col1, col2 = st.columns([3, 1])
-                with col1:
-                    st.text_area(
-                        "Raw Transcript",
-                        summary["transcript"],
-                        height=200,
-                        disabled=True,
-                    )
-                with col2:
-                    if st.button("🔄 Rephrase"):
-                        with st.spinner("Rephrasing transcript..."):
-                            ollama_client = OllamaClient(ollama_url, selected_model)
-                            prompt = f"Rephrase the following transcript to make it more readable and well-formatted, keeping the main content intact:\n\n{summary['transcript']}"
-                            st.session_state.rephrased_transcript = (
-                                ollama_client.generate(prompt)
-                            )
-
-                    if st.button("📋 Share"):
-                        try:
-                            content = f"""Video Title: {summary['title']}
-Channel: {summary['channel']}
-URL: {video_url}
-
--- Transcript ---
-
-{summary['transcript']}"""
-                            paste_url = create_paste(
-                                f"Transcript: {summary['title']}", content
-                            )
-                            st.success(
-                                f"Transcript shared successfully! [View here]({paste_url})"
-                            )
-                        except Exception as e:
-                            if "PASTEBIN_API_KEY" not in os.environ:
-                                st.warning(
-                                    "PASTEBIN_API_KEY not found in environment variables"
-                                )
-                            else:
-                                st.error(f"Error sharing transcript: {str(e)}")
-
-            # Summary Section
-            st.subheader("📊 AI Summary")
-            st.markdown(summary["summary"])
-
-            # After the rephrase button, add:
-            if st.session_state.rephrased_transcript:
-                st.markdown(st.session_state.rephrased_transcript)
-
-
-if __name__ == "__main__":
-    main()
--- a/src/ollama_client.py
+++ b/src/ollama_client.py
@ -1,54 +0,0 @@
-import requests
-import os
-from dotenv import load_dotenv
-
-load_dotenv()
-
-ollama_model = os.getenv("OLLAMA_MODEL") or "llama3.1:8b"
-
-
-class OllamaClient:
-    def __init__(self, base_url, model):
-        self.base_url = base_url
-        self.model = model
-        self.context_size_table = {
-            "llama3.1": 128000,
-            "mistral-nemo": 128000,
-            "mistral_small_obliterated_22b": 128000,
-        }
-        self.context_size = 2048
-        if self.model not in self.context_size_table:
-            print(
-                f"Model {self.model} not found in context size table: using default {self.context_size}"
-            )
-        else:
-            self.context_size = self.context_size_table[self.model]
-            print(f"Using context size {self.context_size} for model {self.model}")
-
-    def get_models(self):
-        url = f"{self.base_url}/api/tags"
-        response = requests.get(url)
-        models = []
-        response_json = response.json()
-        all_models = response_json["models"]
-        for model in all_models:
-            models.append(model["name"])
-        return models
-
-    def generate(self, prompt):
-        url = f"{self.base_url}/api/generate"
-        data = {
-            "model": self.model,
-            "prompt": prompt,
-            "stream": False,
-            "num_ctx": self.context_size,
-        }
-        response = requests.post(url, json=data)
-        if response.status_code == 200:
-            try:
-                return response.json()["response"]
-            except Exception as e:
-                print(response)
-                return response
-        else:
-            raise Exception(f"Error generating text: {response.text}")
--- a/src/pastebin_client.py
+++ b/src/pastebin_client.py
@ -1,26 +0,0 @@
-import requests
-import os
-from dotenv import load_dotenv
-
-load_dotenv()
-
-def create_paste(title, content):
-    api_key = os.getenv("PASTEBIN_API_KEY")
-    if not api_key:
-        raise Exception("PASTEBIN_API_KEY not found in environment variables")
-
-    url = 'https://pastebin.com/api/api_post.php'
-    data = {
-        'api_dev_key': api_key,
-        'api_option': 'paste',
-        'api_paste_code': content,
-        'api_paste_private': '0',  # 0=public, 1=unlisted, 2=private
-        'api_paste_name': title,
-        'api_paste_expire_date': '1W'  # Expires in 1 week
-    }
-
-    response = requests.post(url, data=data)
-    if response.status_code == 200 and not response.text.startswith('Bad API request'):
-        return response.text
-    else:
-        raise Exception(f"Error creating paste: {response.text}")
--- a/src/video_info.py
+++ b/src/video_info.py
@ -1,29 +0,0 @@
-from googleapiclient.discovery import build
-from googleapiclient.errors import HttpError
-import os
-from dotenv import load_dotenv
-
-load_dotenv()
-
-def get_video_info(video_id):
-    youtube = build("youtube", "v3", developerKey=os.getenv("YOUTUBE_API_KEY"))
-
-    try:
-        request = youtube.videos().list(
-            part="snippet",
-            id=video_id
-        )
-        response = request.execute()
-
-        if response["items"]:
-            snippet = response["items"][0]["snippet"]
-            return {
-                "title": snippet["title"],
-                "channel": snippet["channelTitle"]
-            }
-        else:
-            return {"title": "Unknown", "channel": "Unknown"}
-
-    except HttpError as e:
-        print(f"An HTTP error occurred: {e}")
-        return {"title": "Error", "channel": "Error"}
--- a/src/whisper_module.py
+++ b/src/whisper_module.py
@ -1,18 +0,0 @@
-from gradio_client import Client, handle_file
-from yt_audiophile import download_audio
-
-
-def transcribe(file_path):
-    client = Client("http://192.168.178.121:8300/")
-    result = client.predict(
-        file_path=handle_file(file_path),
-        model="Systran/faster-whisper-large-v3",
-        task="transcribe",
-        temperature=0,
-        stream=False,
-        api_name="/predict",
-    )
-    print(result)
-    return result
-
-
--- a/src/yt_audiophile.py
+++ b/src/yt_audiophile.py
@ -1,67 +0,0 @@
-from pytubefix import YouTube
-from pytubefix.cli import on_progress
-from dotenv import load_dotenv
-import os
-
-load_dotenv()
-
-
-def get_po_token_setting():
-    env_setting = os.getenv("USE_PO_TOKEN", "true").lower() == "true"
-    return env_setting
-
-
-def download_audio(url, use_po_token=None):
-    try:
-        # If use_po_token is not provided, use the environment variable
-        if use_po_token is None:
-            use_po_token = get_po_token_setting()
-
-        # Create YouTube object with bot detection bypass
-        yt = YouTube(
-            url,
-            on_progress_callback=on_progress,
-            use_oauth=True,
-            allow_oauth_cache=True,
-            use_po_token=use_po_token,  # Now configurable
-        )
-
-        # Get audio stream
-        audio_stream = yt.streams.filter(only_audio=True).order_by("abr").desc().first()
-        if not audio_stream:
-            raise Exception("No audio stream found")
-
-        # Download audio
-        audio_stream.download("downloads", "output.m4a")
-        return True
-
-    except Exception as e:
-        print(f"Error in download_audio: {str(e)}")
-        raise Exception(f"Download failed: {str(e)}")
-
-
-def itags(yt: YouTube, resolution="1080p"):
-    try:
-        # Get best audio stream
-        audio_stream = yt.streams.filter(only_audio=True).order_by("abr").desc().first()
-        audio_value = audio_stream.itag if audio_stream else None
-
-        # Get video stream
-        video_stream = None
-        for fps in [60, 30, 24]:
-            try:
-                video_stream = yt.streams.filter(res=resolution, fps=fps).first()
-                if video_stream:
-                    print(f"Found {fps} FPS stream")
-                    break
-            except IndexError:
-                continue
-
-        if not video_stream:
-            raise Exception(f"No video stream found for resolution {resolution}")
-
-        return audio_value, video_stream.itag
-
-    except Exception as e:
-        print(f"Error in itags: {str(e)}")
-        raise Exception(f"Stream selection failed: {str(e)}")
--- a/transcript_cache/placeholder
+++ b/transcript_cache/placeholder
--- a/12
+++ b/12
@ -1,12 +0,0 @@
-#!/bin/bash
-    
-# Install dependencies
-if [ ! -f .installed ]; then
-    echo "Installing dependencies..."
-    pip install -r requirements.txt || exit 1
-    touch .installed
-fi
-
-# Run the app
-echo "Running the app..."
-streamlit run src/main.py
--- a/youtube_handler.py
+++ b/youtube_handler.py
@ -0,0 +1,117 @@
+import re
+import os
+import tempfile
+from typing import Optional, Tuple
+import yt_dlp
+from urllib.parse import urlparse, parse_qs
+
+
+def is_youtube_url(url: str) -> bool:
+    """Check if the URL is a valid YouTube URL."""
+    youtube_regex = r"(https?://)?(www\.)?(youtube|youtu|youtube-nocookie)\.(com|be)/(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})"
+    return bool(re.match(youtube_regex, url))
+
+
+def extract_video_id(url: str) -> Optional[str]:
+    """Extract video ID from various YouTube URL formats."""
+    if not is_youtube_url(url):
+        return None
+
+    # Handle youtu.be URLs
+    if "youtu.be" in url:
+        return url.split("/")[-1].split("?")[0]
+
+    # Handle youtube.com URLs
+    parsed_url = urlparse(url)
+    if parsed_url.netloc in ["www.youtube.com", "youtube.com"]:
+        if parsed_url.path == "/watch":
+            return parse_qs(parsed_url.query).get("v", [None])[0]
+        elif parsed_url.path.startswith(("/embed/", "/v/")):
+            return parsed_url.path.split("/")[2]
+
+    return None
+
+
+def get_video_info(url: str) -> dict:
+    """Get video information using yt-dlp."""
+    ydl_opts = {
+        "quiet": True,
+        "no_warnings": True,
+        "extract_flat": True,
+    }
+
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        try:
+            return ydl.extract_info(url, download=False)
+        except Exception as e:
+            raise Exception(f"Error fetching video info: {str(e)}")
+
+
+def download_video(url: str) -> Tuple[str, str]:
+    """Download video and return the path to the audio file."""
+    temp_dir = tempfile.mkdtemp()
+    output_path = os.path.join(temp_dir, "%(id)s.%(ext)s")
+
+    ydl_opts = {
+        "format": "bestaudio/best",
+        "postprocessors": [
+            {
+                "key": "FFmpegExtractAudio",
+                "preferredcodec": "mp3",
+                "preferredquality": "192",
+            }
+        ],
+        "outtmpl": output_path,
+        "quiet": True,
+    }
+
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        try:
+            info = ydl.extract_info(url, download=True)
+            audio_path = os.path.join(temp_dir, f"{info['id']}.mp3")
+            return audio_path, info["title"]
+        except Exception as e:
+            raise Exception(f"Error downloading video: {str(e)}")
+
+
+def get_available_subtitles(url: str) -> list:
+    """Get available subtitles for the video."""
+    ydl_opts = {
+        "writesubtitles": True,
+        "writeautomaticsub": True,
+        "subtitleslangs": ["en"],
+        "skip_download": True,
+        "quiet": True,
+    }
+
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        try:
+            info = ydl.extract_info(url, download=False)
+            return list(info.get("subtitles", {}).keys())
+        except Exception:
+            return []
+
+
+def download_subtitles(url: str, lang: str = "en") -> Optional[str]:
+    """Download subtitles for the video."""
+    temp_dir = tempfile.mkdtemp()
+    output_path = os.path.join(temp_dir, "%(id)s.%(ext)s")
+
+    ydl_opts = {
+        "writesubtitles": True,
+        "writeautomaticsub": True,
+        "subtitleslangs": [lang],
+        "skip_download": True,
+        "outtmpl": output_path,
+        "quiet": True,
+    }
+
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        try:
+            info = ydl.extract_info(url, download=True)
+            subtitle_path = os.path.join(temp_dir, f"{info['id']}.{lang}.vtt")
+            if os.path.exists(subtitle_path):
+                return subtitle_path
+            return None
+        except Exception:
+            return None