mirror of
https://github.com/tcsenpai/youlama.git
synced 2025-06-03 18:00:31 +00:00
rebrand
This commit is contained in:
parent
ee8ab99f48
commit
e5add93553
42
README.md
42
README.md
@ -1,17 +1,17 @@
|
||||
# Audio/Video Transcription Web App
|
||||
# YouLama
|
||||
|
||||
A web application for transcribing audio and video files using faster-whisper, with support for YouTube videos and optional summarization using Ollama.
|
||||
A powerful web application for transcribing and summarizing YouTube videos and local media files using faster-whisper and Ollama.
|
||||
|
||||
## Features
|
||||
|
||||
- Transcribe local audio/video files
|
||||
- Process YouTube videos (with subtitle extraction when available)
|
||||
- Automatic language detection
|
||||
- Multiple Whisper model options
|
||||
- Optional text summarization using Ollama
|
||||
- Modern web interface with Gradio
|
||||
- Docker support with CUDA
|
||||
- Configurable settings via config.ini
|
||||
- 🎥 YouTube video transcription with subtitle extraction
|
||||
- 🎙️ Local audio/video file transcription
|
||||
- 🤖 Automatic language detection
|
||||
- 📝 Multiple Whisper model options
|
||||
- 📚 AI-powered text summarization using Ollama
|
||||
- 🎨 Modern web interface with Gradio
|
||||
- 🐳 Docker support with CUDA
|
||||
- ⚙️ Configurable settings via config.ini
|
||||
|
||||
## Requirements
|
||||
|
||||
@ -25,7 +25,7 @@ A web application for transcribing audio and video files using faster-whisper, w
|
||||
1. Clone the repository:
|
||||
```bash
|
||||
git clone <repository-url>
|
||||
cd whisperapp
|
||||
cd youlama
|
||||
```
|
||||
|
||||
2. Install NVIDIA Container Toolkit (if not already installed):
|
||||
@ -64,7 +64,7 @@ cp .env.example .env
|
||||
ollama serve
|
||||
```
|
||||
|
||||
2. Build and start the Whisper app container:
|
||||
2. Build and start the YouLama container:
|
||||
```bash
|
||||
docker-compose up --build
|
||||
```
|
||||
@ -116,25 +116,25 @@ summarize_prompt = Please provide a comprehensive yet concise summary of the fol
|
||||
|
||||
## Features in Detail
|
||||
|
||||
### Local File Transcription
|
||||
- Supports various audio and video formats
|
||||
- Automatic language detection
|
||||
- Multiple Whisper model options
|
||||
- Optional summarization with Ollama
|
||||
|
||||
### YouTube Video Processing
|
||||
- Supports youtube.com, youtu.be, and invidious URLs
|
||||
- Automatically extracts subtitles if available
|
||||
- Falls back to transcription if no subtitles found
|
||||
- Optional summarization with Ollama
|
||||
- Optional AI-powered summarization with Ollama
|
||||
|
||||
### Summarization
|
||||
### Local File Transcription
|
||||
- Supports various audio and video formats
|
||||
- Automatic language detection
|
||||
- Multiple Whisper model options
|
||||
- Optional AI-powered summarization with Ollama
|
||||
|
||||
### AI Summarization
|
||||
- Uses locally running Ollama for text summarization
|
||||
- Configurable model selection
|
||||
- Customizable prompt
|
||||
- Available for both local files and YouTube videos
|
||||
|
||||
## Notes
|
||||
## Tips
|
||||
|
||||
- For better accuracy, use larger models (medium, large)
|
||||
- Processing time increases with model size
|
||||
|
26
app.py
26
app.py
@ -238,10 +238,8 @@ def process_youtube_url(
|
||||
def create_interface():
|
||||
"""Create and return the Gradio interface."""
|
||||
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
||||
gr.Markdown("# 🎙️ Audio/Video Transcription with Whisper")
|
||||
gr.Markdown(
|
||||
"### A powerful tool for transcribing and summarizing audio/video content"
|
||||
)
|
||||
gr.Markdown("# 🎥 YouLama")
|
||||
gr.Markdown("### AI-powered YouTube video transcription and summarization")
|
||||
|
||||
with gr.Tabs() as tabs:
|
||||
with gr.TabItem("YouTube"):
|
||||
@ -252,7 +250,7 @@ def create_interface():
|
||||
- Supports youtube.com, youtu.be, and invidious URLs
|
||||
- Automatically extracts subtitles if available
|
||||
- Falls back to transcription if no subtitles found
|
||||
- Optional summarization with Ollama
|
||||
- Optional AI-powered summarization with Ollama
|
||||
"""
|
||||
)
|
||||
|
||||
@ -275,7 +273,7 @@ def create_interface():
|
||||
)
|
||||
with gr.Group():
|
||||
yt_summarize_checkbox = gr.Checkbox(
|
||||
label="Generate Summary",
|
||||
label="Generate AI Summary",
|
||||
value=False,
|
||||
interactive=OLLAMA_AVAILABLE,
|
||||
)
|
||||
@ -305,7 +303,7 @@ def create_interface():
|
||||
with gr.Column():
|
||||
# YouTube output components
|
||||
yt_output_text = gr.Textbox(
|
||||
label="Result", lines=10, max_lines=20
|
||||
label="Transcription", lines=10, max_lines=20
|
||||
)
|
||||
yt_detected_language = gr.Textbox(
|
||||
label="Detected Language", interactive=False
|
||||
@ -315,7 +313,7 @@ def create_interface():
|
||||
# Add summary text box below the main output
|
||||
if OLLAMA_AVAILABLE:
|
||||
yt_summary_text = gr.Textbox(
|
||||
label="Summary", lines=5, max_lines=10, value=""
|
||||
label="AI Summary", lines=5, max_lines=10, value=""
|
||||
)
|
||||
|
||||
# Set up the event handler
|
||||
@ -335,7 +333,7 @@ def create_interface():
|
||||
status = "Transcribing video..."
|
||||
|
||||
if summarize and summary:
|
||||
status = "Generating summary..."
|
||||
status = "Generating AI summary..."
|
||||
|
||||
return (
|
||||
text,
|
||||
@ -381,7 +379,7 @@ def create_interface():
|
||||
Upload an audio or video file to transcribe it using Whisper.
|
||||
- Supports various audio and video formats
|
||||
- Automatic language detection
|
||||
- Optional summarization with Ollama
|
||||
- Optional AI-powered summarization with Ollama
|
||||
"""
|
||||
)
|
||||
|
||||
@ -403,7 +401,7 @@ def create_interface():
|
||||
)
|
||||
with gr.Group():
|
||||
summarize_checkbox = gr.Checkbox(
|
||||
label="Generate Summary",
|
||||
label="Generate AI Summary",
|
||||
value=False,
|
||||
interactive=OLLAMA_AVAILABLE,
|
||||
)
|
||||
@ -440,7 +438,7 @@ def create_interface():
|
||||
)
|
||||
if OLLAMA_AVAILABLE:
|
||||
summary_text = gr.Textbox(
|
||||
label="Summary", lines=5, max_lines=10, value=""
|
||||
label="AI Summary", lines=5, max_lines=10, value=""
|
||||
)
|
||||
|
||||
# Set up the event handler
|
||||
@ -467,7 +465,7 @@ def create_interface():
|
||||
full_text = " ".join([segment.text for segment in segments])
|
||||
|
||||
if summarize and OLLAMA_AVAILABLE:
|
||||
status = "Generating summary..."
|
||||
status = "Generating AI summary..."
|
||||
summary = ollama.summarize(full_text, ollama_model)
|
||||
return (
|
||||
full_text,
|
||||
@ -514,7 +512,7 @@ def create_interface():
|
||||
- Maximum audio duration is {MAX_DURATION // 60} minutes
|
||||
- YouTube videos will first try to use available subtitles
|
||||
- If no subtitles are available, the video will be transcribed
|
||||
{"- Ollama summarization is available for both local files and YouTube videos" if OLLAMA_AVAILABLE else "- Ollama summarization is currently unavailable"}
|
||||
{"- AI-powered summarization is available for both local files and YouTube videos" if OLLAMA_AVAILABLE else "- AI-powered summarization is currently unavailable"}
|
||||
|
||||
### Status:
|
||||
- Device: {DEVICE}
|
||||
|
@ -1,7 +1,7 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
whisperapp:
|
||||
youlama:
|
||||
build: .
|
||||
ports:
|
||||
- "7860:7860"
|
||||
|
Loading…
x
Reference in New Issue
Block a user