diff --git a/.gitignore b/.gitignore index ae46c25..aeddbec 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ transcript_cache/*.json __pycache__ -.env \ No newline at end of file +.env +downloads/output.m4a diff --git a/README.md b/README.md index 7870ee7..c5e2d38 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,19 @@ # YouTube Summarizer by TCSenpai -YouTube Summarizer is a Streamlit-based web application that allows users to generate summaries of YouTube videos using AI-powered language models. +YouTube Summarizer is a Streamlit-based web application that allows users to generate summaries of YouTube videos using AI-powered language models and optionally Whisper for transcription. ![Screenshot](screenshot.png) ## Features +- Supports multiple YouTube frontends (e.g. YouTube, Invidious, etc.) - Fetch and cache YouTube video transcripts - Summarize video content using Ollama AI models - Display video information (title and channel) - Customizable Ollama URL and model selection +- Fallback to Whisper for transcription if no transcript is found +- Customizable Whisper URL and model selection +- Optional force Whisper transcription ## Installation @@ -28,11 +32,17 @@ YouTube Summarizer is a Streamlit-based web application that allows users to gen 3. Set up environment variables: Create a `.env` file in the root directory and add the following: + ``` YOUTUBE_API_KEY=your_youtube_api_key OLLAMA_MODEL=default_model_name + WHISPER_URL=http://localhost:8000/ + WHISPER_MODEL=Systran/faster-whisper-large-v3 ``` + - Note: you can copy the `env.example` file to `.env` and modify the values. + - Important: the `WHISPER_URL` should point to the whisper server you want to use. You can leave it as it is if you are not planning on using Whisper. + ## Usage 1. Run the Streamlit app: @@ -46,8 +56,9 @@ YouTube Summarizer is a Streamlit-based web application that allows users to gen 3. Enter a YouTube video URL in the input field. 4. (Optional) Customize the Ollama URL and select a different AI model. +5. (Optional) Customize the Whisper URL and select a different Whisper model. -5. Click the "Summarize" button to generate a summary of the video. +6. Click the "Summarize" button to generate a summary of the video. ## Dependencies @@ -56,13 +67,18 @@ YouTube Summarizer is a Streamlit-based web application that allows users to gen - Ollama - YouTube Data API - Python-dotenv +- pytubefix +- Gradio ## Project Structure - `src/main.py`: Main Streamlit application - `src/ollama_client.py`: Ollama API client for model interaction - `src/video_info.py`: YouTube API integration for video information +- `src/whisper_module.py`: Whisper API client for transcription +- `src/yt_audiophile.py`: Audio downloader for YouTube videos - `transcript_cache/`: Directory for caching video transcripts +- `downloads/`: Directory for downloaded audio files, might be empty ## Contributing diff --git a/downloads/placeholder b/downloads/placeholder new file mode 100644 index 0000000..e69de29 diff --git a/env.example b/env.example index 0fbf9a0..d54cc1f 100644 --- a/env.example +++ b/env.example @@ -1,3 +1,5 @@ OLLAMA_URL=http://localhost:11434 OLLAMA_MODEL=llama3.1:8b -YOUTUBE_API_KEY=your_youtube_api_key \ No newline at end of file +YOUTUBE_API_KEY=your_youtube_api_key +WHISPER_URL=http://localhost:8000/ +WHISPER_MODEL=Systran/faster-whisper-large-v3 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 4a288e1..2dc9ae5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,9 @@ streamlit==1.31.1 python-dotenv==1.0.1 youtube-transcript-api==0.6.2 requests==2.31.0 -google-api-python-client==2.101.0 \ No newline at end of file +google-api-python-client==2.101.0 +yt-dlp +pydub +gradio-client +pytube +pytubefix \ No newline at end of file diff --git a/screenshot.png b/screenshot.png index a17d6f1..b3dc620 100644 Binary files a/screenshot.png and b/screenshot.png differ diff --git a/src/main.py b/src/main.py index 9cf531b..3d8d58b 100644 --- a/src/main.py +++ b/src/main.py @@ -5,6 +5,8 @@ from dotenv import load_dotenv from youtube_transcript_api import YouTubeTranscriptApi from ollama_client import OllamaClient from video_info import get_video_info +from yt_audiophile import download_audio +from whisper_module import transcribe # Load environment variables load_dotenv() @@ -91,7 +93,9 @@ def get_ollama_models(ollama_url): return models -def summarize_video(video_url, model, ollama_url): +def summarize_video( + video_url, model, ollama_url, fallback_to_whisper=True, force_whisper=False +): video_id = video_url.split("v=")[-1] st.write(f"Video ID: {video_id}") @@ -99,8 +103,29 @@ def summarize_video(video_url, model, ollama_url): transcript = get_transcript(video_id) st.success("Summarizer fetched successfully!") + # Forcing whisper if specified + if force_whisper: + st.warning("Forcing whisper...") + fallback_to_whisper = True + transcript = None + if not transcript: - return "Unable to fetch transcript." + if not fallback_to_whisper: + return "Unable to fetch transcript (and fallback to whisper is disabled)" + if not force_whisper: + st.warning("Unable to fetch transcript. Trying to download audio...") + try: + download_audio(video_url) + st.success("Audio downloaded successfully!") + st.warning("Starting transcription...it might take a while...") + transcript = transcribe("downloads/output.m4a") + st.success("Transcription completed successfully!") + os.remove("downloads/output.m4a") + except Exception as e: + st.error(f"Error downloading audio or transcribing: {e}") + if os.path.exists("downloads/output.m4a"): + os.remove("downloads/output.m4a") + return "Unable to fetch transcript." ollama_client = OllamaClient(ollama_url, model) st.success(f"Ollama client created with model: {model}") @@ -140,6 +165,20 @@ def main(): if not default_model in available_models: available_models.append(default_model) + # Sets whisper options + default_whisper_url = os.getenv("WHISPER_URL") + whisper_url = st.text_input( + "Whisper URL (optional)", + value=default_whisper_url, + placeholder="Enter custom Whisper URL", + ) + if not whisper_url: + whisper_url = default_whisper_url + whisper_model = os.getenv("WHISPER_MODEL") + if not whisper_model: + whisper_model = "Systran/faster-whisper-large-v3" + st.caption(f"Whisper model: {whisper_model}") + # Create model selection dropdown selected_model = st.selectbox( "Select Ollama Model", @@ -153,6 +192,13 @@ def main(): video_url = st.text_input("Enter the YouTube video URL:") + # Add checkboxes for whisper options + col1, col2 = st.columns(2) + with col1: + force_whisper = st.checkbox("Force Whisper", value=False) + with col2: + fallback_to_whisper = st.checkbox("Fallback to Whisper", value=True) + # Support any video that has a valid YouTube ID if not "https://www.youtube.com/watch?v=" or "https://youtu.be/" in video_url: if "watch?v=" in video_url: @@ -167,7 +213,13 @@ def main(): if st.button("Summarize"): if video_url: - summary = summarize_video(video_url, selected_model, ollama_url) + summary = summarize_video( + video_url, + selected_model, + ollama_url, + fallback_to_whisper=fallback_to_whisper, + force_whisper=force_whisper, + ) st.subheader("Summary:") st.write(summary) else: diff --git a/src/whisper_module.py b/src/whisper_module.py new file mode 100644 index 0000000..b911f40 --- /dev/null +++ b/src/whisper_module.py @@ -0,0 +1,18 @@ +from gradio_client import Client, handle_file +from yt_audiophile import download_audio + + +def transcribe(file_path): + client = Client("http://192.168.178.121:8300/") + result = client.predict( + file_path=handle_file(file_path), + model="Systran/faster-whisper-large-v3", + task="transcribe", + temperature=0, + stream=False, + api_name="/predict", + ) + print(result) + return result + + diff --git a/src/yt_audiophile.py b/src/yt_audiophile.py new file mode 100644 index 0000000..3f5fa11 --- /dev/null +++ b/src/yt_audiophile.py @@ -0,0 +1,36 @@ +from pytubefix import YouTube +from pytubefix.cli import on_progress + +"""e.g. +https://www.youtube.com/watch?v=vwTDiLH6mqg +""" + +def download_audio(url): + yt = YouTube(url, on_progress_callback=on_progress) + audio, video = itags(yt, "1080p") # specify the resolution + yt.streams.get_by_itag(audio).download("downloads","output.m4a") # downloads audio + + +def itags(yt: YouTube, resolution="1080p"): + max_audio = 0 + audio_value = 0 + for audio_stream in yt.streams.filter(only_audio=True): + abr = int(audio_stream.abr.replace("kbps", "")) + if abr > max_audio: + max_audio = abr + audio_value = audio_stream.itag + streams = yt.streams + try: + video_tag = streams.filter(res=resolution, fps=60)[0].itag + print("60 FPS") + except IndexError: + video_tag = streams.filter(res=resolution, fps=30) + if video_tag: + video_tag = video_tag[0].itag + print("30 FPS") + else: + video_tag = streams.filter(res=resolution, fps=24)[0].itag + print("24 FPS") + return audio_value, video_tag + +