mirror of
https://github.com/tcsenpai/youlama.git
synced 2025-06-07 03:35:41 +00:00
added whisper support
This commit is contained in:
parent
629cc5b881
commit
08afab9d02
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,3 +1,4 @@
|
|||||||
transcript_cache/*.json
|
transcript_cache/*.json
|
||||||
__pycache__
|
__pycache__
|
||||||
.env
|
.env
|
||||||
|
downloads/output.m4a
|
||||||
|
20
README.md
20
README.md
@ -1,15 +1,19 @@
|
|||||||
# YouTube Summarizer by TCSenpai
|
# YouTube Summarizer by TCSenpai
|
||||||
|
|
||||||
YouTube Summarizer is a Streamlit-based web application that allows users to generate summaries of YouTube videos using AI-powered language models.
|
YouTube Summarizer is a Streamlit-based web application that allows users to generate summaries of YouTube videos using AI-powered language models and optionally Whisper for transcription.
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
|
- Supports multiple YouTube frontends (e.g. YouTube, Invidious, etc.)
|
||||||
- Fetch and cache YouTube video transcripts
|
- Fetch and cache YouTube video transcripts
|
||||||
- Summarize video content using Ollama AI models
|
- Summarize video content using Ollama AI models
|
||||||
- Display video information (title and channel)
|
- Display video information (title and channel)
|
||||||
- Customizable Ollama URL and model selection
|
- Customizable Ollama URL and model selection
|
||||||
|
- Fallback to Whisper for transcription if no transcript is found
|
||||||
|
- Customizable Whisper URL and model selection
|
||||||
|
- Optional force Whisper transcription
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
@ -28,11 +32,17 @@ YouTube Summarizer is a Streamlit-based web application that allows users to gen
|
|||||||
|
|
||||||
3. Set up environment variables:
|
3. Set up environment variables:
|
||||||
Create a `.env` file in the root directory and add the following:
|
Create a `.env` file in the root directory and add the following:
|
||||||
|
|
||||||
```
|
```
|
||||||
YOUTUBE_API_KEY=your_youtube_api_key
|
YOUTUBE_API_KEY=your_youtube_api_key
|
||||||
OLLAMA_MODEL=default_model_name
|
OLLAMA_MODEL=default_model_name
|
||||||
|
WHISPER_URL=http://localhost:8000/
|
||||||
|
WHISPER_MODEL=Systran/faster-whisper-large-v3
|
||||||
```
|
```
|
||||||
|
|
||||||
|
- Note: you can copy the `env.example` file to `.env` and modify the values.
|
||||||
|
- Important: the `WHISPER_URL` should point to the whisper server you want to use. You can leave it as it is if you are not planning on using Whisper.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
1. Run the Streamlit app:
|
1. Run the Streamlit app:
|
||||||
@ -46,8 +56,9 @@ YouTube Summarizer is a Streamlit-based web application that allows users to gen
|
|||||||
3. Enter a YouTube video URL in the input field.
|
3. Enter a YouTube video URL in the input field.
|
||||||
|
|
||||||
4. (Optional) Customize the Ollama URL and select a different AI model.
|
4. (Optional) Customize the Ollama URL and select a different AI model.
|
||||||
|
5. (Optional) Customize the Whisper URL and select a different Whisper model.
|
||||||
|
|
||||||
5. Click the "Summarize" button to generate a summary of the video.
|
6. Click the "Summarize" button to generate a summary of the video.
|
||||||
|
|
||||||
## Dependencies
|
## Dependencies
|
||||||
|
|
||||||
@ -56,13 +67,18 @@ YouTube Summarizer is a Streamlit-based web application that allows users to gen
|
|||||||
- Ollama
|
- Ollama
|
||||||
- YouTube Data API
|
- YouTube Data API
|
||||||
- Python-dotenv
|
- Python-dotenv
|
||||||
|
- pytubefix
|
||||||
|
- Gradio
|
||||||
|
|
||||||
## Project Structure
|
## Project Structure
|
||||||
|
|
||||||
- `src/main.py`: Main Streamlit application
|
- `src/main.py`: Main Streamlit application
|
||||||
- `src/ollama_client.py`: Ollama API client for model interaction
|
- `src/ollama_client.py`: Ollama API client for model interaction
|
||||||
- `src/video_info.py`: YouTube API integration for video information
|
- `src/video_info.py`: YouTube API integration for video information
|
||||||
|
- `src/whisper_module.py`: Whisper API client for transcription
|
||||||
|
- `src/yt_audiophile.py`: Audio downloader for YouTube videos
|
||||||
- `transcript_cache/`: Directory for caching video transcripts
|
- `transcript_cache/`: Directory for caching video transcripts
|
||||||
|
- `downloads/`: Directory for downloaded audio files, might be empty
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
|
0
downloads/placeholder
Normal file
0
downloads/placeholder
Normal file
@ -1,3 +1,5 @@
|
|||||||
OLLAMA_URL=http://localhost:11434
|
OLLAMA_URL=http://localhost:11434
|
||||||
OLLAMA_MODEL=llama3.1:8b
|
OLLAMA_MODEL=llama3.1:8b
|
||||||
YOUTUBE_API_KEY=your_youtube_api_key
|
YOUTUBE_API_KEY=your_youtube_api_key
|
||||||
|
WHISPER_URL=http://localhost:8000/
|
||||||
|
WHISPER_MODEL=Systran/faster-whisper-large-v3
|
@ -2,4 +2,9 @@ streamlit==1.31.1
|
|||||||
python-dotenv==1.0.1
|
python-dotenv==1.0.1
|
||||||
youtube-transcript-api==0.6.2
|
youtube-transcript-api==0.6.2
|
||||||
requests==2.31.0
|
requests==2.31.0
|
||||||
google-api-python-client==2.101.0
|
google-api-python-client==2.101.0
|
||||||
|
yt-dlp
|
||||||
|
pydub
|
||||||
|
gradio-client
|
||||||
|
pytube
|
||||||
|
pytubefix
|
BIN
screenshot.png
BIN
screenshot.png
Binary file not shown.
Before Width: | Height: | Size: 208 KiB After Width: | Height: | Size: 171 KiB |
58
src/main.py
58
src/main.py
@ -5,6 +5,8 @@ from dotenv import load_dotenv
|
|||||||
from youtube_transcript_api import YouTubeTranscriptApi
|
from youtube_transcript_api import YouTubeTranscriptApi
|
||||||
from ollama_client import OllamaClient
|
from ollama_client import OllamaClient
|
||||||
from video_info import get_video_info
|
from video_info import get_video_info
|
||||||
|
from yt_audiophile import download_audio
|
||||||
|
from whisper_module import transcribe
|
||||||
|
|
||||||
# Load environment variables
|
# Load environment variables
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
@ -91,7 +93,9 @@ def get_ollama_models(ollama_url):
|
|||||||
return models
|
return models
|
||||||
|
|
||||||
|
|
||||||
def summarize_video(video_url, model, ollama_url):
|
def summarize_video(
|
||||||
|
video_url, model, ollama_url, fallback_to_whisper=True, force_whisper=False
|
||||||
|
):
|
||||||
video_id = video_url.split("v=")[-1]
|
video_id = video_url.split("v=")[-1]
|
||||||
st.write(f"Video ID: {video_id}")
|
st.write(f"Video ID: {video_id}")
|
||||||
|
|
||||||
@ -99,8 +103,29 @@ def summarize_video(video_url, model, ollama_url):
|
|||||||
transcript = get_transcript(video_id)
|
transcript = get_transcript(video_id)
|
||||||
st.success("Summarizer fetched successfully!")
|
st.success("Summarizer fetched successfully!")
|
||||||
|
|
||||||
|
# Forcing whisper if specified
|
||||||
|
if force_whisper:
|
||||||
|
st.warning("Forcing whisper...")
|
||||||
|
fallback_to_whisper = True
|
||||||
|
transcript = None
|
||||||
|
|
||||||
if not transcript:
|
if not transcript:
|
||||||
return "Unable to fetch transcript."
|
if not fallback_to_whisper:
|
||||||
|
return "Unable to fetch transcript (and fallback to whisper is disabled)"
|
||||||
|
if not force_whisper:
|
||||||
|
st.warning("Unable to fetch transcript. Trying to download audio...")
|
||||||
|
try:
|
||||||
|
download_audio(video_url)
|
||||||
|
st.success("Audio downloaded successfully!")
|
||||||
|
st.warning("Starting transcription...it might take a while...")
|
||||||
|
transcript = transcribe("downloads/output.m4a")
|
||||||
|
st.success("Transcription completed successfully!")
|
||||||
|
os.remove("downloads/output.m4a")
|
||||||
|
except Exception as e:
|
||||||
|
st.error(f"Error downloading audio or transcribing: {e}")
|
||||||
|
if os.path.exists("downloads/output.m4a"):
|
||||||
|
os.remove("downloads/output.m4a")
|
||||||
|
return "Unable to fetch transcript."
|
||||||
|
|
||||||
ollama_client = OllamaClient(ollama_url, model)
|
ollama_client = OllamaClient(ollama_url, model)
|
||||||
st.success(f"Ollama client created with model: {model}")
|
st.success(f"Ollama client created with model: {model}")
|
||||||
@ -140,6 +165,20 @@ def main():
|
|||||||
if not default_model in available_models:
|
if not default_model in available_models:
|
||||||
available_models.append(default_model)
|
available_models.append(default_model)
|
||||||
|
|
||||||
|
# Sets whisper options
|
||||||
|
default_whisper_url = os.getenv("WHISPER_URL")
|
||||||
|
whisper_url = st.text_input(
|
||||||
|
"Whisper URL (optional)",
|
||||||
|
value=default_whisper_url,
|
||||||
|
placeholder="Enter custom Whisper URL",
|
||||||
|
)
|
||||||
|
if not whisper_url:
|
||||||
|
whisper_url = default_whisper_url
|
||||||
|
whisper_model = os.getenv("WHISPER_MODEL")
|
||||||
|
if not whisper_model:
|
||||||
|
whisper_model = "Systran/faster-whisper-large-v3"
|
||||||
|
st.caption(f"Whisper model: {whisper_model}")
|
||||||
|
|
||||||
# Create model selection dropdown
|
# Create model selection dropdown
|
||||||
selected_model = st.selectbox(
|
selected_model = st.selectbox(
|
||||||
"Select Ollama Model",
|
"Select Ollama Model",
|
||||||
@ -153,6 +192,13 @@ def main():
|
|||||||
|
|
||||||
video_url = st.text_input("Enter the YouTube video URL:")
|
video_url = st.text_input("Enter the YouTube video URL:")
|
||||||
|
|
||||||
|
# Add checkboxes for whisper options
|
||||||
|
col1, col2 = st.columns(2)
|
||||||
|
with col1:
|
||||||
|
force_whisper = st.checkbox("Force Whisper", value=False)
|
||||||
|
with col2:
|
||||||
|
fallback_to_whisper = st.checkbox("Fallback to Whisper", value=True)
|
||||||
|
|
||||||
# Support any video that has a valid YouTube ID
|
# Support any video that has a valid YouTube ID
|
||||||
if not "https://www.youtube.com/watch?v=" or "https://youtu.be/" in video_url:
|
if not "https://www.youtube.com/watch?v=" or "https://youtu.be/" in video_url:
|
||||||
if "watch?v=" in video_url:
|
if "watch?v=" in video_url:
|
||||||
@ -167,7 +213,13 @@ def main():
|
|||||||
|
|
||||||
if st.button("Summarize"):
|
if st.button("Summarize"):
|
||||||
if video_url:
|
if video_url:
|
||||||
summary = summarize_video(video_url, selected_model, ollama_url)
|
summary = summarize_video(
|
||||||
|
video_url,
|
||||||
|
selected_model,
|
||||||
|
ollama_url,
|
||||||
|
fallback_to_whisper=fallback_to_whisper,
|
||||||
|
force_whisper=force_whisper,
|
||||||
|
)
|
||||||
st.subheader("Summary:")
|
st.subheader("Summary:")
|
||||||
st.write(summary)
|
st.write(summary)
|
||||||
else:
|
else:
|
||||||
|
18
src/whisper_module.py
Normal file
18
src/whisper_module.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
from gradio_client import Client, handle_file
|
||||||
|
from yt_audiophile import download_audio
|
||||||
|
|
||||||
|
|
||||||
|
def transcribe(file_path):
|
||||||
|
client = Client("http://192.168.178.121:8300/")
|
||||||
|
result = client.predict(
|
||||||
|
file_path=handle_file(file_path),
|
||||||
|
model="Systran/faster-whisper-large-v3",
|
||||||
|
task="transcribe",
|
||||||
|
temperature=0,
|
||||||
|
stream=False,
|
||||||
|
api_name="/predict",
|
||||||
|
)
|
||||||
|
print(result)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
36
src/yt_audiophile.py
Normal file
36
src/yt_audiophile.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
from pytubefix import YouTube
|
||||||
|
from pytubefix.cli import on_progress
|
||||||
|
|
||||||
|
"""e.g.
|
||||||
|
https://www.youtube.com/watch?v=vwTDiLH6mqg
|
||||||
|
"""
|
||||||
|
|
||||||
|
def download_audio(url):
|
||||||
|
yt = YouTube(url, on_progress_callback=on_progress)
|
||||||
|
audio, video = itags(yt, "1080p") # specify the resolution
|
||||||
|
yt.streams.get_by_itag(audio).download("downloads","output.m4a") # downloads audio
|
||||||
|
|
||||||
|
|
||||||
|
def itags(yt: YouTube, resolution="1080p"):
|
||||||
|
max_audio = 0
|
||||||
|
audio_value = 0
|
||||||
|
for audio_stream in yt.streams.filter(only_audio=True):
|
||||||
|
abr = int(audio_stream.abr.replace("kbps", ""))
|
||||||
|
if abr > max_audio:
|
||||||
|
max_audio = abr
|
||||||
|
audio_value = audio_stream.itag
|
||||||
|
streams = yt.streams
|
||||||
|
try:
|
||||||
|
video_tag = streams.filter(res=resolution, fps=60)[0].itag
|
||||||
|
print("60 FPS")
|
||||||
|
except IndexError:
|
||||||
|
video_tag = streams.filter(res=resolution, fps=30)
|
||||||
|
if video_tag:
|
||||||
|
video_tag = video_tag[0].itag
|
||||||
|
print("30 FPS")
|
||||||
|
else:
|
||||||
|
video_tag = streams.filter(res=resolution, fps=24)[0].itag
|
||||||
|
print("24 FPS")
|
||||||
|
return audio_value, video_tag
|
||||||
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user