First commit

2025-06-05 18:55:39 +00:00 · 2024-10-05 12:30:21 +02:00 · 2024-10-05 12:30:21 +02:00 · eb125fcac4
commit eb125fcac4
11 changed files with 345 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
+transcript_cache/*.json
+__pycache__
+.env
--- a/LICENSE.md
+++ b/LICENSE.md
@ -0,0 +1,13 @@
+  DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 
+                    Version 2, December 2004 
+
+ Copyright (C) 2024 TCSenpai <tcsenpai@discus.sh> 
+
+ Everyone is permitted to copy and distribute verbatim or modified 
+ copies of this license document, and changing it is allowed as long 
+ as the name is changed. 
+
+            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 
+
+  0. You just DO WHAT THE FUCK YOU WANT TO.
--- a/README.md
+++ b/README.md
@ -0,0 +1,73 @@
+# YouTube Summarizer by TCSenpai
+
+YouTube Summarizer is a Streamlit-based web application that allows users to generate summaries of YouTube videos using AI-powered language models.
+
+## Features
+
+- Fetch and cache YouTube video transcripts
+- Summarize video content using Ollama AI models
+- Display video information (title and channel)
+- Customizable Ollama URL and model selection
+
+## Installation
+
+1. Clone the repository:
+   ```
+   git clone https://github.com/yourusername/youtube-summarizer.git
+   cd youtube-summarizer
+   ```
+
+2. Install the required dependencies:
+   ```
+   pip install -r requirements.txt
+   ```
+
+3. Set up environment variables:
+   Create a `.env` file in the root directory and add the following:
+   ```
+   YOUTUBE_API_KEY=your_youtube_api_key
+   OLLAMA_MODEL=default_model_name
+   ```
+
+## Usage
+
+1. Run the Streamlit app:
+   ```
+   streamlit run src/main.py
+   ```
+
+2. Open your web browser and navigate to the provided local URL (usually `http://localhost:8501`).
+
+3. Enter a YouTube video URL in the input field.
+
+4. (Optional) Customize the Ollama URL and select a different AI model.
+
+5. Click the "Summarize" button to generate a summary of the video.
+
+## Dependencies
+
+- Streamlit
+- Pytube
+- Ollama
+- YouTube Data API
+- Python-dotenv
+
+
+## Project Structure
+
+- `src/main.py`: Main Streamlit application
+- `src/ollama_client.py`: Ollama API client for model interaction
+- `src/video_info.py`: YouTube API integration for video information
+- `transcript_cache/`: Directory for caching video transcripts
+
+## Contributing
+
+Contributions are welcome! Please feel free to submit a Pull Request.
+
+## License
+
+WTFPL License
+
+## Credits
+
+Icon: "https://www.flaticon.com/free-icons/subtitles" by Freepik - Flaticon
--- a/env.example
+++ b/env.example
@ -0,0 +1,3 @@
+OLLAMA_URL=http://localhost:11434
+OLLAMA_MODEL=llama3.1:8b
+YOUTUBE_API_KEY=your_youtube_api_key
--- a/firefox_extension/placeholder
+++ b/firefox_extension/placeholder
@ -0,0 +1 @@
+I am still working on this.
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,5 @@
+streamlit==1.31.1
+python-dotenv==1.0.1
+youtube-transcript-api==0.6.2
+requests==2.31.0
+google-api-python-client==2.101.0
--- a/src/assets/subtitles.png
+++ b/src/assets/subtitles.png
--- a/src/main.py
+++ b/src/main.py
@ -0,0 +1,178 @@
+import os
+import json
+import streamlit as st
+from dotenv import load_dotenv
+from youtube_transcript_api import YouTubeTranscriptApi
+from ollama_client import OllamaClient
+from video_info import get_video_info
+
+# Load environment variables
+load_dotenv()
+
+# Set page config for favicon
+st.set_page_config(
+    page_title="YouTube Summarizer by TCSenpai",
+    page_icon="src/assets/subtitles.png",
+)
+
+# Custom CSS for the banner
+st.markdown(
+    """
+    <style>
+    .banner {
+        position: fixed;
+        top: 60px;  /* Adjusted to position below Streamlit header */
+        left: 0;
+        right: 0;
+        z-index: 998;  /* Reduced z-index to be below Streamlit elements */
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        background-color: black;
+        padding: 0.5rem 1rem;
+        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+    }
+    .banner-title {
+        color: white;
+        text-align: center;
+        margin: 0;
+        font-size: 1rem;
+    }
+    .stApp {
+        margin-top: 120px;  /* Increased to accommodate both headers */
+    }
+    </style>
+    """,
+    unsafe_allow_html=True,
+)
+
+# Banner with icon and title
+st.markdown(
+    """
+    <div class="banner">
+        <h3 class="banner-title" align="center">YouTube Summarizer by TCSenpai</h3>
+    </div>
+    """,
+    unsafe_allow_html=True,
+)
+
+# Initialize Rich console
+
+
+def get_transcript(video_id):
+    cache_dir = "transcript_cache"
+    cache_file = os.path.join(cache_dir, f"{video_id}.json")
+
+    # Create cache directory if it doesn't exist
+    os.makedirs(cache_dir, exist_ok=True)
+
+    # Check if transcript is cached
+    if os.path.exists(cache_file):
+        with open(cache_file, "r") as f:
+            return json.load(f)["transcript"]
+
+    try:
+        transcript = YouTubeTranscriptApi.get_transcript(video_id)
+        full_transcript = " ".join([entry["text"] for entry in transcript])
+
+        # Cache the transcript
+        with open(cache_file, "w") as f:
+            json.dump({"transcript": full_transcript}, f)
+
+        return full_transcript
+    except Exception as e:
+        print(f"Error fetching transcript: {e}")
+        return None
+
+
+def get_ollama_models(ollama_url):
+    ollama_client = OllamaClient(ollama_url, "")
+    models = ollama_client.get_models()
+    return models
+
+
+def summarize_video(video_url, model, ollama_url):
+    video_id = video_url.split("v=")[-1]
+    st.write(f"Video ID: {video_id}")
+
+    with st.spinner("Fetching transcript..."):
+        transcript = get_transcript(video_id)
+    st.success("Summarizer fetched successfully!")
+
+    if not transcript:
+        return "Unable to fetch transcript."
+
+    ollama_client = OllamaClient(ollama_url, model)
+    st.success(f"Ollama client created with model: {model}")
+
+    st.warning("Starting summary generation, this might take a while...")
+    with st.spinner("Generating summary..."):
+        prompt = f"Summarize the following YouTube video transcript:\n\n{transcript}\n\nSummary:"
+        summary = ollama_client.generate(prompt)
+    st.success("Summary generated successfully!")
+
+    with st.spinner("Fetching video info..."):
+        video_info = get_video_info(video_id)
+    st.success("Video info fetched successfully!")
+
+    return f"Title: {video_info['title']}\n\nChannel: {video_info['channel']}\n\nSummary:\n{summary}"
+
+
+def main():
+    # Remove the existing title
+    # st.title("YouTube Video Summarizer")
+
+    # Add input for custom Ollama URL
+    default_ollama_url = os.getenv("OLLAMA_URL")
+    ollama_url = st.text_input(
+        "Ollama URL (optional)",
+        value=default_ollama_url,
+        placeholder="Enter custom Ollama URL",
+    )
+
+    if not ollama_url:
+        ollama_url = default_ollama_url
+
+    # Fetch available models using the specified Ollama URL
+    available_models = get_ollama_models(ollama_url)
+    default_model = os.getenv("OLLAMA_MODEL")
+
+    if not default_model in available_models:
+        available_models.append(default_model)
+
+    # Create model selection dropdown
+    selected_model = st.selectbox(
+        "Select Ollama Model",
+        options=available_models,
+        index=(
+            available_models.index(default_model)
+            if default_model in available_models
+            else 0
+        ),
+    )
+
+    video_url = st.text_input("Enter the YouTube video URL:")
+
+    # Support any video that has a valid YouTube ID
+    if not "https://www.youtube.com/watch?v=" or "https://youtu.be/" in video_url:
+        if "watch?v=" in video_url:
+            st.warning(
+                "This is not a YouTube URL. Might be a privacy-fronted embed. Trying to extract the YouTube ID..."
+            )
+            video_id = video_url.split("watch?v=")[-1]
+            video_url = f"https://www.youtube.com/watch?v={video_id}"
+        else:
+            st.error("Please enter a valid YouTube video URL.")
+            return
+
+    if st.button("Summarize"):
+        if video_url:
+            summary = summarize_video(video_url, selected_model, ollama_url)
+            st.subheader("Summary:")
+            st.write(summary)
+        else:
+            st.error("Please enter a valid YouTube video URL.")
+
+
+if __name__ == "__main__":
+    main()
--- a/src/ollama_client.py
+++ b/src/ollama_client.py
@ -0,0 +1,40 @@
+import requests
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+ollama_model = os.getenv("OLLAMA_MODEL") or "llama3.1:8b"
+
+
+class OllamaClient:
+    def __init__(self, base_url, model):
+        self.base_url = base_url
+        self.model = model
+
+    def get_models(self):
+        url = f"{self.base_url}/api/tags"
+        response = requests.get(url)
+        models = []
+        response_json = response.json()
+        all_models = response_json["models"]
+        for model in all_models:
+                models.append(model["name"])
+        return models
+
+    def generate(self, prompt):
+        url = f"{self.base_url}/api/generate"
+        data = {
+            "model": self.model,
+            "prompt": prompt,
+            "stream": False,
+        }
+        response = requests.post(url, json=data)
+        if response.status_code == 200:
+            try:
+                return response.json()["response"]
+            except Exception as e:
+                print(response)
+                return response
+        else:
+            raise Exception(f"Error generating text: {response.text}")
--- a/src/video_info.py
+++ b/src/video_info.py
@ -0,0 +1,29 @@
+from googleapiclient.discovery import build
+from googleapiclient.errors import HttpError
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+def get_video_info(video_id):
+    youtube = build("youtube", "v3", developerKey=os.getenv("YOUTUBE_API_KEY"))
+
+    try:
+        request = youtube.videos().list(
+            part="snippet",
+            id=video_id
+        )
+        response = request.execute()
+
+        if response["items"]:
+            snippet = response["items"][0]["snippet"]
+            return {
+                "title": snippet["title"],
+                "channel": snippet["channelTitle"]
+            }
+        else:
+            return {"title": "Unknown", "channel": "Unknown"}
+
+    except HttpError as e:
+        print(f"An HTTP error occurred: {e}")
+        return {"title": "Error", "channel": "Error"}
--- a/transcript_cache/placeholder
+++ b/transcript_cache/placeholder