mirror of
https://github.com/tcsenpai/youlama.git
synced 2025-06-05 18:55:39 +00:00
rebrand
This commit is contained in:
parent
ee8ab99f48
commit
e5add93553
42
README.md
42
README.md
@ -1,17 +1,17 @@
|
|||||||
# Audio/Video Transcription Web App
|
# YouLama
|
||||||
|
|
||||||
A web application for transcribing audio and video files using faster-whisper, with support for YouTube videos and optional summarization using Ollama.
|
A powerful web application for transcribing and summarizing YouTube videos and local media files using faster-whisper and Ollama.
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- Transcribe local audio/video files
|
- 🎥 YouTube video transcription with subtitle extraction
|
||||||
- Process YouTube videos (with subtitle extraction when available)
|
- 🎙️ Local audio/video file transcription
|
||||||
- Automatic language detection
|
- 🤖 Automatic language detection
|
||||||
- Multiple Whisper model options
|
- 📝 Multiple Whisper model options
|
||||||
- Optional text summarization using Ollama
|
- 📚 AI-powered text summarization using Ollama
|
||||||
- Modern web interface with Gradio
|
- 🎨 Modern web interface with Gradio
|
||||||
- Docker support with CUDA
|
- 🐳 Docker support with CUDA
|
||||||
- Configurable settings via config.ini
|
- ⚙️ Configurable settings via config.ini
|
||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
|
|
||||||
@ -25,7 +25,7 @@ A web application for transcribing audio and video files using faster-whisper, w
|
|||||||
1. Clone the repository:
|
1. Clone the repository:
|
||||||
```bash
|
```bash
|
||||||
git clone <repository-url>
|
git clone <repository-url>
|
||||||
cd whisperapp
|
cd youlama
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Install NVIDIA Container Toolkit (if not already installed):
|
2. Install NVIDIA Container Toolkit (if not already installed):
|
||||||
@ -64,7 +64,7 @@ cp .env.example .env
|
|||||||
ollama serve
|
ollama serve
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Build and start the Whisper app container:
|
2. Build and start the YouLama container:
|
||||||
```bash
|
```bash
|
||||||
docker-compose up --build
|
docker-compose up --build
|
||||||
```
|
```
|
||||||
@ -116,25 +116,25 @@ summarize_prompt = Please provide a comprehensive yet concise summary of the fol
|
|||||||
|
|
||||||
## Features in Detail
|
## Features in Detail
|
||||||
|
|
||||||
### Local File Transcription
|
|
||||||
- Supports various audio and video formats
|
|
||||||
- Automatic language detection
|
|
||||||
- Multiple Whisper model options
|
|
||||||
- Optional summarization with Ollama
|
|
||||||
|
|
||||||
### YouTube Video Processing
|
### YouTube Video Processing
|
||||||
- Supports youtube.com, youtu.be, and invidious URLs
|
- Supports youtube.com, youtu.be, and invidious URLs
|
||||||
- Automatically extracts subtitles if available
|
- Automatically extracts subtitles if available
|
||||||
- Falls back to transcription if no subtitles found
|
- Falls back to transcription if no subtitles found
|
||||||
- Optional summarization with Ollama
|
- Optional AI-powered summarization with Ollama
|
||||||
|
|
||||||
### Summarization
|
### Local File Transcription
|
||||||
|
- Supports various audio and video formats
|
||||||
|
- Automatic language detection
|
||||||
|
- Multiple Whisper model options
|
||||||
|
- Optional AI-powered summarization with Ollama
|
||||||
|
|
||||||
|
### AI Summarization
|
||||||
- Uses locally running Ollama for text summarization
|
- Uses locally running Ollama for text summarization
|
||||||
- Configurable model selection
|
- Configurable model selection
|
||||||
- Customizable prompt
|
- Customizable prompt
|
||||||
- Available for both local files and YouTube videos
|
- Available for both local files and YouTube videos
|
||||||
|
|
||||||
## Notes
|
## Tips
|
||||||
|
|
||||||
- For better accuracy, use larger models (medium, large)
|
- For better accuracy, use larger models (medium, large)
|
||||||
- Processing time increases with model size
|
- Processing time increases with model size
|
||||||
|
26
app.py
26
app.py
@ -238,10 +238,8 @@ def process_youtube_url(
|
|||||||
def create_interface():
|
def create_interface():
|
||||||
"""Create and return the Gradio interface."""
|
"""Create and return the Gradio interface."""
|
||||||
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
||||||
gr.Markdown("# 🎙️ Audio/Video Transcription with Whisper")
|
gr.Markdown("# 🎥 YouLama")
|
||||||
gr.Markdown(
|
gr.Markdown("### AI-powered YouTube video transcription and summarization")
|
||||||
"### A powerful tool for transcribing and summarizing audio/video content"
|
|
||||||
)
|
|
||||||
|
|
||||||
with gr.Tabs() as tabs:
|
with gr.Tabs() as tabs:
|
||||||
with gr.TabItem("YouTube"):
|
with gr.TabItem("YouTube"):
|
||||||
@ -252,7 +250,7 @@ def create_interface():
|
|||||||
- Supports youtube.com, youtu.be, and invidious URLs
|
- Supports youtube.com, youtu.be, and invidious URLs
|
||||||
- Automatically extracts subtitles if available
|
- Automatically extracts subtitles if available
|
||||||
- Falls back to transcription if no subtitles found
|
- Falls back to transcription if no subtitles found
|
||||||
- Optional summarization with Ollama
|
- Optional AI-powered summarization with Ollama
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -275,7 +273,7 @@ def create_interface():
|
|||||||
)
|
)
|
||||||
with gr.Group():
|
with gr.Group():
|
||||||
yt_summarize_checkbox = gr.Checkbox(
|
yt_summarize_checkbox = gr.Checkbox(
|
||||||
label="Generate Summary",
|
label="Generate AI Summary",
|
||||||
value=False,
|
value=False,
|
||||||
interactive=OLLAMA_AVAILABLE,
|
interactive=OLLAMA_AVAILABLE,
|
||||||
)
|
)
|
||||||
@ -305,7 +303,7 @@ def create_interface():
|
|||||||
with gr.Column():
|
with gr.Column():
|
||||||
# YouTube output components
|
# YouTube output components
|
||||||
yt_output_text = gr.Textbox(
|
yt_output_text = gr.Textbox(
|
||||||
label="Result", lines=10, max_lines=20
|
label="Transcription", lines=10, max_lines=20
|
||||||
)
|
)
|
||||||
yt_detected_language = gr.Textbox(
|
yt_detected_language = gr.Textbox(
|
||||||
label="Detected Language", interactive=False
|
label="Detected Language", interactive=False
|
||||||
@ -315,7 +313,7 @@ def create_interface():
|
|||||||
# Add summary text box below the main output
|
# Add summary text box below the main output
|
||||||
if OLLAMA_AVAILABLE:
|
if OLLAMA_AVAILABLE:
|
||||||
yt_summary_text = gr.Textbox(
|
yt_summary_text = gr.Textbox(
|
||||||
label="Summary", lines=5, max_lines=10, value=""
|
label="AI Summary", lines=5, max_lines=10, value=""
|
||||||
)
|
)
|
||||||
|
|
||||||
# Set up the event handler
|
# Set up the event handler
|
||||||
@ -335,7 +333,7 @@ def create_interface():
|
|||||||
status = "Transcribing video..."
|
status = "Transcribing video..."
|
||||||
|
|
||||||
if summarize and summary:
|
if summarize and summary:
|
||||||
status = "Generating summary..."
|
status = "Generating AI summary..."
|
||||||
|
|
||||||
return (
|
return (
|
||||||
text,
|
text,
|
||||||
@ -381,7 +379,7 @@ def create_interface():
|
|||||||
Upload an audio or video file to transcribe it using Whisper.
|
Upload an audio or video file to transcribe it using Whisper.
|
||||||
- Supports various audio and video formats
|
- Supports various audio and video formats
|
||||||
- Automatic language detection
|
- Automatic language detection
|
||||||
- Optional summarization with Ollama
|
- Optional AI-powered summarization with Ollama
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -403,7 +401,7 @@ def create_interface():
|
|||||||
)
|
)
|
||||||
with gr.Group():
|
with gr.Group():
|
||||||
summarize_checkbox = gr.Checkbox(
|
summarize_checkbox = gr.Checkbox(
|
||||||
label="Generate Summary",
|
label="Generate AI Summary",
|
||||||
value=False,
|
value=False,
|
||||||
interactive=OLLAMA_AVAILABLE,
|
interactive=OLLAMA_AVAILABLE,
|
||||||
)
|
)
|
||||||
@ -440,7 +438,7 @@ def create_interface():
|
|||||||
)
|
)
|
||||||
if OLLAMA_AVAILABLE:
|
if OLLAMA_AVAILABLE:
|
||||||
summary_text = gr.Textbox(
|
summary_text = gr.Textbox(
|
||||||
label="Summary", lines=5, max_lines=10, value=""
|
label="AI Summary", lines=5, max_lines=10, value=""
|
||||||
)
|
)
|
||||||
|
|
||||||
# Set up the event handler
|
# Set up the event handler
|
||||||
@ -467,7 +465,7 @@ def create_interface():
|
|||||||
full_text = " ".join([segment.text for segment in segments])
|
full_text = " ".join([segment.text for segment in segments])
|
||||||
|
|
||||||
if summarize and OLLAMA_AVAILABLE:
|
if summarize and OLLAMA_AVAILABLE:
|
||||||
status = "Generating summary..."
|
status = "Generating AI summary..."
|
||||||
summary = ollama.summarize(full_text, ollama_model)
|
summary = ollama.summarize(full_text, ollama_model)
|
||||||
return (
|
return (
|
||||||
full_text,
|
full_text,
|
||||||
@ -514,7 +512,7 @@ def create_interface():
|
|||||||
- Maximum audio duration is {MAX_DURATION // 60} minutes
|
- Maximum audio duration is {MAX_DURATION // 60} minutes
|
||||||
- YouTube videos will first try to use available subtitles
|
- YouTube videos will first try to use available subtitles
|
||||||
- If no subtitles are available, the video will be transcribed
|
- If no subtitles are available, the video will be transcribed
|
||||||
{"- Ollama summarization is available for both local files and YouTube videos" if OLLAMA_AVAILABLE else "- Ollama summarization is currently unavailable"}
|
{"- AI-powered summarization is available for both local files and YouTube videos" if OLLAMA_AVAILABLE else "- AI-powered summarization is currently unavailable"}
|
||||||
|
|
||||||
### Status:
|
### Status:
|
||||||
- Device: {DEVICE}
|
- Device: {DEVICE}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
version: '3.8'
|
version: '3.8'
|
||||||
|
|
||||||
services:
|
services:
|
||||||
whisperapp:
|
youlama:
|
||||||
build: .
|
build: .
|
||||||
ports:
|
ports:
|
||||||
- "7860:7860"
|
- "7860:7860"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user