commit eb125fcac4725b399c84eed3f50c78090b18eb3b Author: tcsenpai Date: Sat Oct 5 12:30:21 2024 +0200 First commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ae46c25 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +transcript_cache/*.json +__pycache__ +.env \ No newline at end of file diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..82c90ac --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,13 @@ + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + + Copyright (C) 2024 TCSenpai + + Everyone is permitted to copy and distribute verbatim or modified + copies of this license document, and changing it is allowed as long + as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..2789754 --- /dev/null +++ b/README.md @@ -0,0 +1,73 @@ +# YouTube Summarizer by TCSenpai + +YouTube Summarizer is a Streamlit-based web application that allows users to generate summaries of YouTube videos using AI-powered language models. + +## Features + +- Fetch and cache YouTube video transcripts +- Summarize video content using Ollama AI models +- Display video information (title and channel) +- Customizable Ollama URL and model selection + +## Installation + +1. Clone the repository: + ``` + git clone https://github.com/yourusername/youtube-summarizer.git + cd youtube-summarizer + ``` + +2. Install the required dependencies: + ``` + pip install -r requirements.txt + ``` + +3. Set up environment variables: + Create a `.env` file in the root directory and add the following: + ``` + YOUTUBE_API_KEY=your_youtube_api_key + OLLAMA_MODEL=default_model_name + ``` + +## Usage + +1. Run the Streamlit app: + ``` + streamlit run src/main.py + ``` + +2. Open your web browser and navigate to the provided local URL (usually `http://localhost:8501`). + +3. Enter a YouTube video URL in the input field. + +4. (Optional) Customize the Ollama URL and select a different AI model. + +5. Click the "Summarize" button to generate a summary of the video. + +## Dependencies + +- Streamlit +- Pytube +- Ollama +- YouTube Data API +- Python-dotenv + + +## Project Structure + +- `src/main.py`: Main Streamlit application +- `src/ollama_client.py`: Ollama API client for model interaction +- `src/video_info.py`: YouTube API integration for video information +- `transcript_cache/`: Directory for caching video transcripts + +## Contributing + +Contributions are welcome! Please feel free to submit a Pull Request. + +## License + +WTFPL License + +## Credits + +Icon: "https://www.flaticon.com/free-icons/subtitles" by Freepik - Flaticon \ No newline at end of file diff --git a/env.example b/env.example new file mode 100644 index 0000000..0fbf9a0 --- /dev/null +++ b/env.example @@ -0,0 +1,3 @@ +OLLAMA_URL=http://localhost:11434 +OLLAMA_MODEL=llama3.1:8b +YOUTUBE_API_KEY=your_youtube_api_key \ No newline at end of file diff --git a/firefox_extension/placeholder b/firefox_extension/placeholder new file mode 100644 index 0000000..c7d43c7 --- /dev/null +++ b/firefox_extension/placeholder @@ -0,0 +1 @@ +I am still working on this. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4a288e1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +streamlit==1.31.1 +python-dotenv==1.0.1 +youtube-transcript-api==0.6.2 +requests==2.31.0 +google-api-python-client==2.101.0 \ No newline at end of file diff --git a/src/assets/subtitles.png b/src/assets/subtitles.png new file mode 100644 index 0000000..08dbf2d Binary files /dev/null and b/src/assets/subtitles.png differ diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..9cf531b --- /dev/null +++ b/src/main.py @@ -0,0 +1,178 @@ +import os +import json +import streamlit as st +from dotenv import load_dotenv +from youtube_transcript_api import YouTubeTranscriptApi +from ollama_client import OllamaClient +from video_info import get_video_info + +# Load environment variables +load_dotenv() + +# Set page config for favicon +st.set_page_config( + page_title="YouTube Summarizer by TCSenpai", + page_icon="src/assets/subtitles.png", +) + +# Custom CSS for the banner +st.markdown( + """ + + """, + unsafe_allow_html=True, +) + +# Banner with icon and title +st.markdown( + """ + + """, + unsafe_allow_html=True, +) + +# Initialize Rich console + + +def get_transcript(video_id): + cache_dir = "transcript_cache" + cache_file = os.path.join(cache_dir, f"{video_id}.json") + + # Create cache directory if it doesn't exist + os.makedirs(cache_dir, exist_ok=True) + + # Check if transcript is cached + if os.path.exists(cache_file): + with open(cache_file, "r") as f: + return json.load(f)["transcript"] + + try: + transcript = YouTubeTranscriptApi.get_transcript(video_id) + full_transcript = " ".join([entry["text"] for entry in transcript]) + + # Cache the transcript + with open(cache_file, "w") as f: + json.dump({"transcript": full_transcript}, f) + + return full_transcript + except Exception as e: + print(f"Error fetching transcript: {e}") + return None + + +def get_ollama_models(ollama_url): + ollama_client = OllamaClient(ollama_url, "") + models = ollama_client.get_models() + return models + + +def summarize_video(video_url, model, ollama_url): + video_id = video_url.split("v=")[-1] + st.write(f"Video ID: {video_id}") + + with st.spinner("Fetching transcript..."): + transcript = get_transcript(video_id) + st.success("Summarizer fetched successfully!") + + if not transcript: + return "Unable to fetch transcript." + + ollama_client = OllamaClient(ollama_url, model) + st.success(f"Ollama client created with model: {model}") + + st.warning("Starting summary generation, this might take a while...") + with st.spinner("Generating summary..."): + prompt = f"Summarize the following YouTube video transcript:\n\n{transcript}\n\nSummary:" + summary = ollama_client.generate(prompt) + st.success("Summary generated successfully!") + + with st.spinner("Fetching video info..."): + video_info = get_video_info(video_id) + st.success("Video info fetched successfully!") + + return f"Title: {video_info['title']}\n\nChannel: {video_info['channel']}\n\nSummary:\n{summary}" + + +def main(): + # Remove the existing title + # st.title("YouTube Video Summarizer") + + # Add input for custom Ollama URL + default_ollama_url = os.getenv("OLLAMA_URL") + ollama_url = st.text_input( + "Ollama URL (optional)", + value=default_ollama_url, + placeholder="Enter custom Ollama URL", + ) + + if not ollama_url: + ollama_url = default_ollama_url + + # Fetch available models using the specified Ollama URL + available_models = get_ollama_models(ollama_url) + default_model = os.getenv("OLLAMA_MODEL") + + if not default_model in available_models: + available_models.append(default_model) + + # Create model selection dropdown + selected_model = st.selectbox( + "Select Ollama Model", + options=available_models, + index=( + available_models.index(default_model) + if default_model in available_models + else 0 + ), + ) + + video_url = st.text_input("Enter the YouTube video URL:") + + # Support any video that has a valid YouTube ID + if not "https://www.youtube.com/watch?v=" or "https://youtu.be/" in video_url: + if "watch?v=" in video_url: + st.warning( + "This is not a YouTube URL. Might be a privacy-fronted embed. Trying to extract the YouTube ID..." + ) + video_id = video_url.split("watch?v=")[-1] + video_url = f"https://www.youtube.com/watch?v={video_id}" + else: + st.error("Please enter a valid YouTube video URL.") + return + + if st.button("Summarize"): + if video_url: + summary = summarize_video(video_url, selected_model, ollama_url) + st.subheader("Summary:") + st.write(summary) + else: + st.error("Please enter a valid YouTube video URL.") + + +if __name__ == "__main__": + main() diff --git a/src/ollama_client.py b/src/ollama_client.py new file mode 100644 index 0000000..3d0a150 --- /dev/null +++ b/src/ollama_client.py @@ -0,0 +1,40 @@ +import requests +import os +from dotenv import load_dotenv + +load_dotenv() + +ollama_model = os.getenv("OLLAMA_MODEL") or "llama3.1:8b" + + +class OllamaClient: + def __init__(self, base_url, model): + self.base_url = base_url + self.model = model + + def get_models(self): + url = f"{self.base_url}/api/tags" + response = requests.get(url) + models = [] + response_json = response.json() + all_models = response_json["models"] + for model in all_models: + models.append(model["name"]) + return models + + def generate(self, prompt): + url = f"{self.base_url}/api/generate" + data = { + "model": self.model, + "prompt": prompt, + "stream": False, + } + response = requests.post(url, json=data) + if response.status_code == 200: + try: + return response.json()["response"] + except Exception as e: + print(response) + return response + else: + raise Exception(f"Error generating text: {response.text}") diff --git a/src/video_info.py b/src/video_info.py new file mode 100644 index 0000000..368af34 --- /dev/null +++ b/src/video_info.py @@ -0,0 +1,29 @@ +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError +import os +from dotenv import load_dotenv + +load_dotenv() + +def get_video_info(video_id): + youtube = build("youtube", "v3", developerKey=os.getenv("YOUTUBE_API_KEY")) + + try: + request = youtube.videos().list( + part="snippet", + id=video_id + ) + response = request.execute() + + if response["items"]: + snippet = response["items"][0]["snippet"] + return { + "title": snippet["title"], + "channel": snippet["channelTitle"] + } + else: + return {"title": "Unknown", "channel": "Unknown"} + + except HttpError as e: + print(f"An HTTP error occurred: {e}") + return {"title": "Error", "channel": "Error"} \ No newline at end of file diff --git a/transcript_cache/placeholder b/transcript_cache/placeholder new file mode 100644 index 0000000..e69de29