mirror of
https://github.com/tcsenpai/youlama.git
synced 2025-06-07 03:35:41 +00:00
added ollama and youtube summarization features
This commit is contained in:
parent
5d41615a40
commit
bb592bcc55
155
app.py
155
app.py
@ -5,6 +5,7 @@ import torch
|
|||||||
import configparser
|
import configparser
|
||||||
from typing import List, Tuple, Optional
|
from typing import List, Tuple, Optional
|
||||||
import youtube_handler as yt
|
import youtube_handler as yt
|
||||||
|
from ollama_handler import OllamaHandler
|
||||||
|
|
||||||
|
|
||||||
def load_config() -> configparser.ConfigParser:
|
def load_config() -> configparser.ConfigParser:
|
||||||
@ -35,6 +36,11 @@ SHARE = config["app"].getboolean("share")
|
|||||||
WHISPER_MODELS = config["models"]["available_models"].split(",")
|
WHISPER_MODELS = config["models"]["available_models"].split(",")
|
||||||
AVAILABLE_LANGUAGES = config["languages"]["available_languages"].split(",")
|
AVAILABLE_LANGUAGES = config["languages"]["available_languages"].split(",")
|
||||||
|
|
||||||
|
# Initialize Ollama handler
|
||||||
|
ollama = OllamaHandler()
|
||||||
|
OLLAMA_AVAILABLE = ollama.is_available()
|
||||||
|
OLLAMA_MODELS = ollama.get_available_models() if OLLAMA_AVAILABLE else []
|
||||||
|
|
||||||
|
|
||||||
def load_model(model_name: str) -> WhisperModel:
|
def load_model(model_name: str) -> WhisperModel:
|
||||||
"""Load the Whisper model with the specified configuration."""
|
"""Load the Whisper model with the specified configuration."""
|
||||||
@ -42,8 +48,12 @@ def load_model(model_name: str) -> WhisperModel:
|
|||||||
|
|
||||||
|
|
||||||
def transcribe_audio(
|
def transcribe_audio(
|
||||||
audio_file: str, model_name: str, language: str = None
|
audio_file: str,
|
||||||
) -> tuple[str, str]:
|
model_name: str,
|
||||||
|
language: str = None,
|
||||||
|
summarize: bool = False,
|
||||||
|
ollama_model: str = None,
|
||||||
|
) -> tuple[str, str, Optional[str]]:
|
||||||
"""Transcribe audio using the selected Whisper model."""
|
"""Transcribe audio using the selected Whisper model."""
|
||||||
try:
|
try:
|
||||||
# Load the model
|
# Load the model
|
||||||
@ -60,14 +70,23 @@ def transcribe_audio(
|
|||||||
# Combine all segments into one text
|
# Combine all segments into one text
|
||||||
full_text = " ".join([segment.text for segment in segments])
|
full_text = " ".join([segment.text for segment in segments])
|
||||||
|
|
||||||
return full_text, info.language
|
# Generate summary if requested
|
||||||
|
summary = None
|
||||||
|
if summarize and OLLAMA_AVAILABLE:
|
||||||
|
summary = ollama.summarize(full_text, ollama_model)
|
||||||
|
|
||||||
|
return full_text, info.language, summary
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"Error during transcription: {str(e)}", None
|
return f"Error during transcription: {str(e)}", None, None
|
||||||
|
|
||||||
|
|
||||||
def process_youtube_url(
|
def process_youtube_url(
|
||||||
url: str, model_name: str, language: str = None
|
url: str,
|
||||||
) -> Tuple[str, str, str]:
|
model_name: str,
|
||||||
|
language: str = None,
|
||||||
|
summarize: bool = False,
|
||||||
|
ollama_model: str = None,
|
||||||
|
) -> Tuple[str, str, str, Optional[str]]:
|
||||||
"""Process a YouTube URL and return transcription or subtitles."""
|
"""Process a YouTube URL and return transcription or subtitles."""
|
||||||
try:
|
try:
|
||||||
# First try to get available subtitles
|
# First try to get available subtitles
|
||||||
@ -81,12 +100,16 @@ def process_youtube_url(
|
|||||||
|
|
||||||
if subtitle_path:
|
if subtitle_path:
|
||||||
with open(subtitle_path, "r", encoding="utf-8") as f:
|
with open(subtitle_path, "r", encoding="utf-8") as f:
|
||||||
return f.read(), "en", "Subtitles"
|
text = f.read()
|
||||||
|
summary = None
|
||||||
|
if summarize and OLLAMA_AVAILABLE:
|
||||||
|
summary = ollama.summarize(text, ollama_model)
|
||||||
|
return text, "en", "Subtitles", summary
|
||||||
|
|
||||||
# If no subtitles available, download and transcribe
|
# If no subtitles available, download and transcribe
|
||||||
audio_path, video_title = yt.download_video(url)
|
audio_path, video_title = yt.download_video(url)
|
||||||
transcription, detected_lang = transcribe_audio(
|
transcription, detected_lang, summary = transcribe_audio(
|
||||||
audio_path, model_name, language
|
audio_path, model_name, language, summarize, ollama_model
|
||||||
)
|
)
|
||||||
|
|
||||||
# Clean up the temporary audio file
|
# Clean up the temporary audio file
|
||||||
@ -95,10 +118,10 @@ def process_youtube_url(
|
|||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return transcription, detected_lang, "Transcription"
|
return transcription, detected_lang, "Transcription", summary
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"Error processing YouTube video: {str(e)}", None, "Error"
|
return f"Error processing YouTube video: {str(e)}", None, "Error", None
|
||||||
|
|
||||||
|
|
||||||
def create_interface():
|
def create_interface():
|
||||||
@ -128,6 +151,22 @@ def create_interface():
|
|||||||
value="Auto-detect",
|
value="Auto-detect",
|
||||||
label="Language (optional)",
|
label="Language (optional)",
|
||||||
)
|
)
|
||||||
|
if OLLAMA_AVAILABLE:
|
||||||
|
with gr.Group():
|
||||||
|
summarize_checkbox = gr.Checkbox(
|
||||||
|
label="Generate Summary", value=False
|
||||||
|
)
|
||||||
|
ollama_model_dropdown = gr.Dropdown(
|
||||||
|
choices=OLLAMA_MODELS,
|
||||||
|
value=OLLAMA_MODELS[0] if OLLAMA_MODELS else None,
|
||||||
|
label="Ollama Model",
|
||||||
|
visible=False,
|
||||||
|
)
|
||||||
|
summarize_checkbox.change(
|
||||||
|
fn=lambda x: gr.Dropdown.update(visible=x),
|
||||||
|
inputs=[summarize_checkbox],
|
||||||
|
outputs=[ollama_model_dropdown],
|
||||||
|
)
|
||||||
transcribe_btn = gr.Button("Transcribe", variant="primary")
|
transcribe_btn = gr.Button("Transcribe", variant="primary")
|
||||||
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
@ -138,12 +177,45 @@ def create_interface():
|
|||||||
detected_language = gr.Textbox(
|
detected_language = gr.Textbox(
|
||||||
label="Detected Language", interactive=False
|
label="Detected Language", interactive=False
|
||||||
)
|
)
|
||||||
|
if OLLAMA_AVAILABLE:
|
||||||
|
summary_text = gr.Textbox(
|
||||||
|
label="Summary", lines=5, max_lines=10, visible=False
|
||||||
|
)
|
||||||
|
|
||||||
# Set up the event handler
|
# Set up the event handler
|
||||||
|
def transcribe_with_summary(
|
||||||
|
audio, model, lang, summarize, ollama_model
|
||||||
|
):
|
||||||
|
result = transcribe_audio(
|
||||||
|
audio, model, lang, summarize, ollama_model
|
||||||
|
)
|
||||||
|
if len(result) == 3:
|
||||||
|
text, lang, summary = result
|
||||||
|
return text, lang, summary if summary else ""
|
||||||
|
return result[0], result[1], ""
|
||||||
|
|
||||||
transcribe_btn.click(
|
transcribe_btn.click(
|
||||||
fn=transcribe_audio,
|
fn=transcribe_with_summary,
|
||||||
inputs=[audio_input, model_dropdown, language_dropdown],
|
inputs=[
|
||||||
outputs=[output_text, detected_language],
|
audio_input,
|
||||||
|
model_dropdown,
|
||||||
|
language_dropdown,
|
||||||
|
(
|
||||||
|
summarize_checkbox
|
||||||
|
if OLLAMA_AVAILABLE
|
||||||
|
else gr.Checkbox(value=False)
|
||||||
|
),
|
||||||
|
(
|
||||||
|
ollama_model_dropdown
|
||||||
|
if OLLAMA_AVAILABLE
|
||||||
|
else gr.Dropdown(value=None)
|
||||||
|
),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
output_text,
|
||||||
|
detected_language,
|
||||||
|
summary_text if OLLAMA_AVAILABLE else gr.Textbox(),
|
||||||
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
with gr.TabItem("YouTube"):
|
with gr.TabItem("YouTube"):
|
||||||
@ -168,6 +240,22 @@ def create_interface():
|
|||||||
value="Auto-detect",
|
value="Auto-detect",
|
||||||
label="Language (optional)",
|
label="Language (optional)",
|
||||||
)
|
)
|
||||||
|
if OLLAMA_AVAILABLE:
|
||||||
|
with gr.Group():
|
||||||
|
yt_summarize_checkbox = gr.Checkbox(
|
||||||
|
label="Generate Summary", value=False
|
||||||
|
)
|
||||||
|
yt_ollama_model_dropdown = gr.Dropdown(
|
||||||
|
choices=OLLAMA_MODELS,
|
||||||
|
value=OLLAMA_MODELS[0] if OLLAMA_MODELS else None,
|
||||||
|
label="Ollama Model",
|
||||||
|
visible=False,
|
||||||
|
)
|
||||||
|
yt_summarize_checkbox.change(
|
||||||
|
fn=lambda x: gr.Dropdown.update(visible=x),
|
||||||
|
inputs=[yt_summarize_checkbox],
|
||||||
|
outputs=[yt_ollama_model_dropdown],
|
||||||
|
)
|
||||||
yt_process_btn = gr.Button("Process Video", variant="primary")
|
yt_process_btn = gr.Button("Process Video", variant="primary")
|
||||||
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
@ -179,12 +267,44 @@ def create_interface():
|
|||||||
label="Detected Language", interactive=False
|
label="Detected Language", interactive=False
|
||||||
)
|
)
|
||||||
yt_source = gr.Textbox(label="Source", interactive=False)
|
yt_source = gr.Textbox(label="Source", interactive=False)
|
||||||
|
if OLLAMA_AVAILABLE:
|
||||||
|
yt_summary_text = gr.Textbox(
|
||||||
|
label="Summary", lines=5, max_lines=10, visible=False
|
||||||
|
)
|
||||||
|
|
||||||
# Set up the event handler
|
# Set up the event handler
|
||||||
|
def process_yt_with_summary(url, model, lang, summarize, ollama_model):
|
||||||
|
result = process_youtube_url(
|
||||||
|
url, model, lang, summarize, ollama_model
|
||||||
|
)
|
||||||
|
if len(result) == 4:
|
||||||
|
text, lang, source, summary = result
|
||||||
|
return text, lang, source, summary if summary else ""
|
||||||
|
return result[0], result[1], result[2], ""
|
||||||
|
|
||||||
yt_process_btn.click(
|
yt_process_btn.click(
|
||||||
fn=process_youtube_url,
|
fn=process_yt_with_summary,
|
||||||
inputs=[youtube_url, yt_model_dropdown, yt_language_dropdown],
|
inputs=[
|
||||||
outputs=[yt_output_text, yt_detected_language, yt_source],
|
youtube_url,
|
||||||
|
yt_model_dropdown,
|
||||||
|
yt_language_dropdown,
|
||||||
|
(
|
||||||
|
yt_summarize_checkbox
|
||||||
|
if OLLAMA_AVAILABLE
|
||||||
|
else gr.Checkbox(value=False)
|
||||||
|
),
|
||||||
|
(
|
||||||
|
yt_ollama_model_dropdown
|
||||||
|
if OLLAMA_AVAILABLE
|
||||||
|
else gr.Dropdown(value=None)
|
||||||
|
),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
yt_output_text,
|
||||||
|
yt_detected_language,
|
||||||
|
yt_source,
|
||||||
|
yt_summary_text if OLLAMA_AVAILABLE else gr.Textbox(),
|
||||||
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add some helpful information
|
# Add some helpful information
|
||||||
@ -197,6 +317,7 @@ def create_interface():
|
|||||||
- Maximum audio duration is {MAX_DURATION // 60} minutes
|
- Maximum audio duration is {MAX_DURATION // 60} minutes
|
||||||
- YouTube videos will first try to use available subtitles
|
- YouTube videos will first try to use available subtitles
|
||||||
- If no subtitles are available, the video will be transcribed
|
- If no subtitles are available, the video will be transcribed
|
||||||
|
{"- Ollama summarization is available for both local files and YouTube videos" if OLLAMA_AVAILABLE else ""}
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -15,4 +15,10 @@ share = true
|
|||||||
available_models = tiny,base,small,medium,large-v1,large-v2,large-v3
|
available_models = tiny,base,small,medium,large-v1,large-v2,large-v3
|
||||||
|
|
||||||
[languages]
|
[languages]
|
||||||
available_languages = en,es,fr,de,it,pt,nl,ja,ko,zh
|
available_languages = en,es,fr,de,it,pt,nl,ja,ko,zh
|
||||||
|
|
||||||
|
[ollama]
|
||||||
|
enabled = false
|
||||||
|
url = http://localhost:11434
|
||||||
|
default_model = mistral
|
||||||
|
summarize_prompt = Please provide a comprehensive yet concise summary of the following text. Focus on the main points, key arguments, and important details while maintaining accuracy and completeness. Here's the text to summarize:
|
64
ollama_handler.py
Normal file
64
ollama_handler.py
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
import requests
|
||||||
|
from typing import Optional
|
||||||
|
import configparser
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def load_config() -> configparser.ConfigParser:
|
||||||
|
"""Load configuration from config.ini file."""
|
||||||
|
config = configparser.ConfigParser()
|
||||||
|
config_path = os.path.join(os.path.dirname(__file__), "config.ini")
|
||||||
|
config.read(config_path)
|
||||||
|
return config
|
||||||
|
|
||||||
|
|
||||||
|
config = load_config()
|
||||||
|
|
||||||
|
|
||||||
|
class OllamaHandler:
|
||||||
|
def __init__(self):
|
||||||
|
self.enabled = config["ollama"].getboolean("enabled")
|
||||||
|
self.url = config["ollama"]["url"]
|
||||||
|
self.default_model = config["ollama"]["default_model"]
|
||||||
|
self.prompt = config["ollama"]["summarize_prompt"]
|
||||||
|
|
||||||
|
def is_available(self) -> bool:
|
||||||
|
"""Check if Ollama is available and enabled."""
|
||||||
|
if not self.enabled:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
response = requests.get(f"{self.url}/api/tags")
|
||||||
|
return response.status_code == 200
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_available_models(self) -> list:
|
||||||
|
"""Get list of available Ollama models."""
|
||||||
|
try:
|
||||||
|
response = requests.get(f"{self.url}/api/tags")
|
||||||
|
if response.status_code == 200:
|
||||||
|
return [model["name"] for model in response.json()["models"]]
|
||||||
|
return []
|
||||||
|
except:
|
||||||
|
return []
|
||||||
|
|
||||||
|
def summarize(self, text: str, model: Optional[str] = None) -> Optional[str]:
|
||||||
|
"""Summarize text using Ollama."""
|
||||||
|
if not self.is_available():
|
||||||
|
return None
|
||||||
|
|
||||||
|
model = model or self.default_model
|
||||||
|
prompt = f"{self.prompt}\n\n{text}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
f"{self.url}/api/generate",
|
||||||
|
json={"model": model, "prompt": prompt, "stream": False},
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
return response.json()["response"]
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error summarizing text: {str(e)}")
|
||||||
|
return None
|
@ -4,4 +4,5 @@ python-dotenv>=1.0.0
|
|||||||
torch>=2.0.0
|
torch>=2.0.0
|
||||||
torchaudio>=2.0.0
|
torchaudio>=2.0.0
|
||||||
yt-dlp>=2023.12.30
|
yt-dlp>=2023.12.30
|
||||||
pytube>=15.0.0
|
pytube>=15.0.0
|
||||||
|
requests>=2.31.0
|
Loading…
x
Reference in New Issue
Block a user