This commit is contained in:
tcsenpai 2025-05-23 13:05:40 +02:00
parent ee8ab99f48
commit e5add93553
3 changed files with 34 additions and 36 deletions

View File

@ -1,17 +1,17 @@
# Audio/Video Transcription Web App # YouLama
A web application for transcribing audio and video files using faster-whisper, with support for YouTube videos and optional summarization using Ollama. A powerful web application for transcribing and summarizing YouTube videos and local media files using faster-whisper and Ollama.
## Features ## Features
- Transcribe local audio/video files - 🎥 YouTube video transcription with subtitle extraction
- Process YouTube videos (with subtitle extraction when available) - 🎙️ Local audio/video file transcription
- Automatic language detection - 🤖 Automatic language detection
- Multiple Whisper model options - 📝 Multiple Whisper model options
- Optional text summarization using Ollama - 📚 AI-powered text summarization using Ollama
- Modern web interface with Gradio - 🎨 Modern web interface with Gradio
- Docker support with CUDA - 🐳 Docker support with CUDA
- Configurable settings via config.ini - ⚙️ Configurable settings via config.ini
## Requirements ## Requirements
@ -25,7 +25,7 @@ A web application for transcribing audio and video files using faster-whisper, w
1. Clone the repository: 1. Clone the repository:
```bash ```bash
git clone <repository-url> git clone <repository-url>
cd whisperapp cd youlama
``` ```
2. Install NVIDIA Container Toolkit (if not already installed): 2. Install NVIDIA Container Toolkit (if not already installed):
@ -64,7 +64,7 @@ cp .env.example .env
ollama serve ollama serve
``` ```
2. Build and start the Whisper app container: 2. Build and start the YouLama container:
```bash ```bash
docker-compose up --build docker-compose up --build
``` ```
@ -116,25 +116,25 @@ summarize_prompt = Please provide a comprehensive yet concise summary of the fol
## Features in Detail ## Features in Detail
### Local File Transcription
- Supports various audio and video formats
- Automatic language detection
- Multiple Whisper model options
- Optional summarization with Ollama
### YouTube Video Processing ### YouTube Video Processing
- Supports youtube.com, youtu.be, and invidious URLs - Supports youtube.com, youtu.be, and invidious URLs
- Automatically extracts subtitles if available - Automatically extracts subtitles if available
- Falls back to transcription if no subtitles found - Falls back to transcription if no subtitles found
- Optional summarization with Ollama - Optional AI-powered summarization with Ollama
### Summarization ### Local File Transcription
- Supports various audio and video formats
- Automatic language detection
- Multiple Whisper model options
- Optional AI-powered summarization with Ollama
### AI Summarization
- Uses locally running Ollama for text summarization - Uses locally running Ollama for text summarization
- Configurable model selection - Configurable model selection
- Customizable prompt - Customizable prompt
- Available for both local files and YouTube videos - Available for both local files and YouTube videos
## Notes ## Tips
- For better accuracy, use larger models (medium, large) - For better accuracy, use larger models (medium, large)
- Processing time increases with model size - Processing time increases with model size

26
app.py
View File

@ -238,10 +238,8 @@ def process_youtube_url(
def create_interface(): def create_interface():
"""Create and return the Gradio interface.""" """Create and return the Gradio interface."""
with gr.Blocks(theme=gr.themes.Soft()) as app: with gr.Blocks(theme=gr.themes.Soft()) as app:
gr.Markdown("# 🎙️ Audio/Video Transcription with Whisper") gr.Markdown("# 🎥 YouLama")
gr.Markdown( gr.Markdown("### AI-powered YouTube video transcription and summarization")
"### A powerful tool for transcribing and summarizing audio/video content"
)
with gr.Tabs() as tabs: with gr.Tabs() as tabs:
with gr.TabItem("YouTube"): with gr.TabItem("YouTube"):
@ -252,7 +250,7 @@ def create_interface():
- Supports youtube.com, youtu.be, and invidious URLs - Supports youtube.com, youtu.be, and invidious URLs
- Automatically extracts subtitles if available - Automatically extracts subtitles if available
- Falls back to transcription if no subtitles found - Falls back to transcription if no subtitles found
- Optional summarization with Ollama - Optional AI-powered summarization with Ollama
""" """
) )
@ -275,7 +273,7 @@ def create_interface():
) )
with gr.Group(): with gr.Group():
yt_summarize_checkbox = gr.Checkbox( yt_summarize_checkbox = gr.Checkbox(
label="Generate Summary", label="Generate AI Summary",
value=False, value=False,
interactive=OLLAMA_AVAILABLE, interactive=OLLAMA_AVAILABLE,
) )
@ -305,7 +303,7 @@ def create_interface():
with gr.Column(): with gr.Column():
# YouTube output components # YouTube output components
yt_output_text = gr.Textbox( yt_output_text = gr.Textbox(
label="Result", lines=10, max_lines=20 label="Transcription", lines=10, max_lines=20
) )
yt_detected_language = gr.Textbox( yt_detected_language = gr.Textbox(
label="Detected Language", interactive=False label="Detected Language", interactive=False
@ -315,7 +313,7 @@ def create_interface():
# Add summary text box below the main output # Add summary text box below the main output
if OLLAMA_AVAILABLE: if OLLAMA_AVAILABLE:
yt_summary_text = gr.Textbox( yt_summary_text = gr.Textbox(
label="Summary", lines=5, max_lines=10, value="" label="AI Summary", lines=5, max_lines=10, value=""
) )
# Set up the event handler # Set up the event handler
@ -335,7 +333,7 @@ def create_interface():
status = "Transcribing video..." status = "Transcribing video..."
if summarize and summary: if summarize and summary:
status = "Generating summary..." status = "Generating AI summary..."
return ( return (
text, text,
@ -381,7 +379,7 @@ def create_interface():
Upload an audio or video file to transcribe it using Whisper. Upload an audio or video file to transcribe it using Whisper.
- Supports various audio and video formats - Supports various audio and video formats
- Automatic language detection - Automatic language detection
- Optional summarization with Ollama - Optional AI-powered summarization with Ollama
""" """
) )
@ -403,7 +401,7 @@ def create_interface():
) )
with gr.Group(): with gr.Group():
summarize_checkbox = gr.Checkbox( summarize_checkbox = gr.Checkbox(
label="Generate Summary", label="Generate AI Summary",
value=False, value=False,
interactive=OLLAMA_AVAILABLE, interactive=OLLAMA_AVAILABLE,
) )
@ -440,7 +438,7 @@ def create_interface():
) )
if OLLAMA_AVAILABLE: if OLLAMA_AVAILABLE:
summary_text = gr.Textbox( summary_text = gr.Textbox(
label="Summary", lines=5, max_lines=10, value="" label="AI Summary", lines=5, max_lines=10, value=""
) )
# Set up the event handler # Set up the event handler
@ -467,7 +465,7 @@ def create_interface():
full_text = " ".join([segment.text for segment in segments]) full_text = " ".join([segment.text for segment in segments])
if summarize and OLLAMA_AVAILABLE: if summarize and OLLAMA_AVAILABLE:
status = "Generating summary..." status = "Generating AI summary..."
summary = ollama.summarize(full_text, ollama_model) summary = ollama.summarize(full_text, ollama_model)
return ( return (
full_text, full_text,
@ -514,7 +512,7 @@ def create_interface():
- Maximum audio duration is {MAX_DURATION // 60} minutes - Maximum audio duration is {MAX_DURATION // 60} minutes
- YouTube videos will first try to use available subtitles - YouTube videos will first try to use available subtitles
- If no subtitles are available, the video will be transcribed - If no subtitles are available, the video will be transcribed
{"- Ollama summarization is available for both local files and YouTube videos" if OLLAMA_AVAILABLE else "- Ollama summarization is currently unavailable"} {"- AI-powered summarization is available for both local files and YouTube videos" if OLLAMA_AVAILABLE else "- AI-powered summarization is currently unavailable"}
### Status: ### Status:
- Device: {DEVICE} - Device: {DEVICE}

View File

@ -1,7 +1,7 @@
version: '3.8' version: '3.8'
services: services:
whisperapp: youlama:
build: . build: .
ports: ports:
- "7860:7860" - "7860:7860"