First commit

2025-06-10 13:07:41 +00:00 · 2024-10-05 12:30:21 +02:00 · 2024-10-05 12:30:21 +02:00 · eb125fcac4
commit eb125fcac4
11 changed files with 345 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
 transcript_cache/*.json
 __pycache__
 .env
--- a/LICENSE.md
+++ b/LICENSE.md
@ -0,0 +1,13 @@
  DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 
                    Version 2, December 2004 
 Copyright (C) 2024 TCSenpai <tcsenpai@discus.sh> 
 Everyone is permitted to copy and distribute verbatim or modified 
 copies of this license document, and changing it is allowed as long 
 as the name is changed. 
            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 
   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 
  0. You just DO WHAT THE FUCK YOU WANT TO.
--- a/README.md
+++ b/README.md
@ -0,0 +1,73 @@
 # YouTube Summarizer by TCSenpai
 YouTube Summarizer is a Streamlit-based web application that allows users to generate summaries of YouTube videos using AI-powered language models.
 ## Features
 - Fetch and cache YouTube video transcripts
 - Summarize video content using Ollama AI models
 - Display video information (title and channel)
 - Customizable Ollama URL and model selection
 ## Installation
 1. Clone the repository:
   ```
   git clone https://github.com/yourusername/youtube-summarizer.git
   cd youtube-summarizer
   ```
 2. Install the required dependencies:
   ```
   pip install -r requirements.txt
   ```
 3. Set up environment variables:
   Create a `.env` file in the root directory and add the following:
   ```
   YOUTUBE_API_KEY=your_youtube_api_key
   OLLAMA_MODEL=default_model_name
   ```
 ## Usage
 1. Run the Streamlit app:
   ```
   streamlit run src/main.py
   ```
 2. Open your web browser and navigate to the provided local URL (usually `http://localhost:8501`).
 3. Enter a YouTube video URL in the input field.
 4. (Optional) Customize the Ollama URL and select a different AI model.
 5. Click the "Summarize" button to generate a summary of the video.
 ## Dependencies
 - Streamlit
 - Pytube
 - Ollama
 - YouTube Data API
 - Python-dotenv
 ## Project Structure
 - `src/main.py`: Main Streamlit application
 - `src/ollama_client.py`: Ollama API client for model interaction
 - `src/video_info.py`: YouTube API integration for video information
 - `transcript_cache/`: Directory for caching video transcripts
 ## Contributing
 Contributions are welcome! Please feel free to submit a Pull Request.
 ## License
 WTFPL License
 ## Credits
 Icon: "https://www.flaticon.com/free-icons/subtitles" by Freepik - Flaticon
--- a/env.example
+++ b/env.example
@ -0,0 +1,3 @@
 OLLAMA_URL=http://localhost:11434
 OLLAMA_MODEL=llama3.1:8b
 YOUTUBE_API_KEY=your_youtube_api_key
--- a/firefox_extension/placeholder
+++ b/firefox_extension/placeholder
@ -0,0 +1 @@
 I am still working on this.
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,5 @@
 streamlit==1.31.1
 python-dotenv==1.0.1
 youtube-transcript-api==0.6.2
 requests==2.31.0
 google-api-python-client==2.101.0
--- a/src/assets/subtitles.png
+++ b/src/assets/subtitles.png
--- a/src/main.py
+++ b/src/main.py
@ -0,0 +1,178 @@
 import os
 import json
 import streamlit as st
 from dotenv import load_dotenv
 from youtube_transcript_api import YouTubeTranscriptApi
 from ollama_client import OllamaClient
 from video_info import get_video_info
 # Load environment variables
 load_dotenv()
 # Set page config for favicon
 st.set_page_config(
    page_title="YouTube Summarizer by TCSenpai",
    page_icon="src/assets/subtitles.png",
 )
 # Custom CSS for the banner
 st.markdown(
    """
    <style>
    .banner {
        position: fixed;
        top: 60px;  /* Adjusted to position below Streamlit header */
        left: 0;
        right: 0;
        z-index: 998;  /* Reduced z-index to be below Streamlit elements */
        display: flex;
        align-items: center;
        justify-content: center;
        background-color: black;
        padding: 0.5rem 1rem;
        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
    }
    .banner-title {
        color: white;
        text-align: center;
        margin: 0;
        font-size: 1rem;
    }
    .stApp {
        margin-top: 120px;  /* Increased to accommodate both headers */
    }
    </style>
    """,
    unsafe_allow_html=True,
 )
 # Banner with icon and title
 st.markdown(
    """
    <div class="banner">
        <h3 class="banner-title" align="center">YouTube Summarizer by TCSenpai</h3>
    </div>
    """,
    unsafe_allow_html=True,
 )
 # Initialize Rich console
 def get_transcript(video_id):
    cache_dir = "transcript_cache"
    cache_file = os.path.join(cache_dir, f"{video_id}.json")
    # Create cache directory if it doesn't exist
    os.makedirs(cache_dir, exist_ok=True)
    # Check if transcript is cached
    if os.path.exists(cache_file):
        with open(cache_file, "r") as f:
            return json.load(f)["transcript"]
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        full_transcript = " ".join([entry["text"] for entry in transcript])
        # Cache the transcript
        with open(cache_file, "w") as f:
            json.dump({"transcript": full_transcript}, f)
        return full_transcript
    except Exception as e:
        print(f"Error fetching transcript: {e}")
        return None
 def get_ollama_models(ollama_url):
    ollama_client = OllamaClient(ollama_url, "")
    models = ollama_client.get_models()
    return models
 def summarize_video(video_url, model, ollama_url):
    video_id = video_url.split("v=")[-1]
    st.write(f"Video ID: {video_id}")
    with st.spinner("Fetching transcript..."):
        transcript = get_transcript(video_id)
    st.success("Summarizer fetched successfully!")
    if not transcript:
        return "Unable to fetch transcript."
    ollama_client = OllamaClient(ollama_url, model)
    st.success(f"Ollama client created with model: {model}")
    st.warning("Starting summary generation, this might take a while...")
    with st.spinner("Generating summary..."):
        prompt = f"Summarize the following YouTube video transcript:\n\n{transcript}\n\nSummary:"
        summary = ollama_client.generate(prompt)
    st.success("Summary generated successfully!")
    with st.spinner("Fetching video info..."):
        video_info = get_video_info(video_id)
    st.success("Video info fetched successfully!")
    return f"Title: {video_info['title']}\n\nChannel: {video_info['channel']}\n\nSummary:\n{summary}"
 def main():
    # Remove the existing title
    # st.title("YouTube Video Summarizer")
    # Add input for custom Ollama URL
    default_ollama_url = os.getenv("OLLAMA_URL")
    ollama_url = st.text_input(
        "Ollama URL (optional)",
        value=default_ollama_url,
        placeholder="Enter custom Ollama URL",
    )
    if not ollama_url:
        ollama_url = default_ollama_url
    # Fetch available models using the specified Ollama URL
    available_models = get_ollama_models(ollama_url)
    default_model = os.getenv("OLLAMA_MODEL")
    if not default_model in available_models:
        available_models.append(default_model)
    # Create model selection dropdown
    selected_model = st.selectbox(
        "Select Ollama Model",
        options=available_models,
        index=(
            available_models.index(default_model)
            if default_model in available_models
            else 0
        ),
    )
    video_url = st.text_input("Enter the YouTube video URL:")
    # Support any video that has a valid YouTube ID
    if not "https://www.youtube.com/watch?v=" or "https://youtu.be/" in video_url:
        if "watch?v=" in video_url:
            st.warning(
                "This is not a YouTube URL. Might be a privacy-fronted embed. Trying to extract the YouTube ID..."
            )
            video_id = video_url.split("watch?v=")[-1]
            video_url = f"https://www.youtube.com/watch?v={video_id}"
        else:
            st.error("Please enter a valid YouTube video URL.")
            return
    if st.button("Summarize"):
        if video_url:
            summary = summarize_video(video_url, selected_model, ollama_url)
            st.subheader("Summary:")
            st.write(summary)
        else:
            st.error("Please enter a valid YouTube video URL.")
 if __name__ == "__main__":
    main()
--- a/src/ollama_client.py
+++ b/src/ollama_client.py
@ -0,0 +1,40 @@
 import requests
 import os
 from dotenv import load_dotenv
 load_dotenv()
 ollama_model = os.getenv("OLLAMA_MODEL") or "llama3.1:8b"
 class OllamaClient:
    def __init__(self, base_url, model):
        self.base_url = base_url
        self.model = model
    def get_models(self):
        url = f"{self.base_url}/api/tags"
        response = requests.get(url)
        models = []
        response_json = response.json()
        all_models = response_json["models"]
        for model in all_models:
                models.append(model["name"])
        return models
    def generate(self, prompt):
        url = f"{self.base_url}/api/generate"
        data = {
            "model": self.model,
            "prompt": prompt,
            "stream": False,
        }
        response = requests.post(url, json=data)
        if response.status_code == 200:
            try:
                return response.json()["response"]
            except Exception as e:
                print(response)
                return response
        else:
            raise Exception(f"Error generating text: {response.text}")
--- a/src/video_info.py
+++ b/src/video_info.py
@ -0,0 +1,29 @@
 from googleapiclient.discovery import build
 from googleapiclient.errors import HttpError
 import os
 from dotenv import load_dotenv
 load_dotenv()
 def get_video_info(video_id):
    youtube = build("youtube", "v3", developerKey=os.getenv("YOUTUBE_API_KEY"))
    try:
        request = youtube.videos().list(
            part="snippet",
            id=video_id
        )
        response = request.execute()
        if response["items"]:
            snippet = response["items"][0]["snippet"]
            return {
                "title": snippet["title"],
                "channel": snippet["channelTitle"]
            }
        else:
            return {"title": "Unknown", "channel": "Unknown"}
    except HttpError as e:
        print(f"An HTTP error occurred: {e}")
        return {"title": "Error", "channel": "Error"}
--- a/transcript_cache/placeholder
+++ b/transcript_cache/placeholder