First commit

This commit is contained in:
tcsenpai 2024-10-05 12:30:21 +02:00
commit eb125fcac4
11 changed files with 345 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
transcript_cache/*.json
__pycache__
.env

13
LICENSE.md Normal file
View File

@ -0,0 +1,13 @@
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
Version 2, December 2004
Copyright (C) 2024 TCSenpai <tcsenpai@discus.sh>
Everyone is permitted to copy and distribute verbatim or modified
copies of this license document, and changing it is allowed as long
as the name is changed.
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. You just DO WHAT THE FUCK YOU WANT TO.

73
README.md Normal file
View File

@ -0,0 +1,73 @@
# YouTube Summarizer by TCSenpai
YouTube Summarizer is a Streamlit-based web application that allows users to generate summaries of YouTube videos using AI-powered language models.
## Features
- Fetch and cache YouTube video transcripts
- Summarize video content using Ollama AI models
- Display video information (title and channel)
- Customizable Ollama URL and model selection
## Installation
1. Clone the repository:
```
git clone https://github.com/yourusername/youtube-summarizer.git
cd youtube-summarizer
```
2. Install the required dependencies:
```
pip install -r requirements.txt
```
3. Set up environment variables:
Create a `.env` file in the root directory and add the following:
```
YOUTUBE_API_KEY=your_youtube_api_key
OLLAMA_MODEL=default_model_name
```
## Usage
1. Run the Streamlit app:
```
streamlit run src/main.py
```
2. Open your web browser and navigate to the provided local URL (usually `http://localhost:8501`).
3. Enter a YouTube video URL in the input field.
4. (Optional) Customize the Ollama URL and select a different AI model.
5. Click the "Summarize" button to generate a summary of the video.
## Dependencies
- Streamlit
- Pytube
- Ollama
- YouTube Data API
- Python-dotenv
## Project Structure
- `src/main.py`: Main Streamlit application
- `src/ollama_client.py`: Ollama API client for model interaction
- `src/video_info.py`: YouTube API integration for video information
- `transcript_cache/`: Directory for caching video transcripts
## Contributing
Contributions are welcome! Please feel free to submit a Pull Request.
## License
WTFPL License
## Credits
Icon: "https://www.flaticon.com/free-icons/subtitles" by Freepik - Flaticon

3
env.example Normal file
View File

@ -0,0 +1,3 @@
OLLAMA_URL=http://localhost:11434
OLLAMA_MODEL=llama3.1:8b
YOUTUBE_API_KEY=your_youtube_api_key

View File

@ -0,0 +1 @@
I am still working on this.

5
requirements.txt Normal file
View File

@ -0,0 +1,5 @@
streamlit==1.31.1
python-dotenv==1.0.1
youtube-transcript-api==0.6.2
requests==2.31.0
google-api-python-client==2.101.0

BIN
src/assets/subtitles.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

178
src/main.py Normal file
View File

@ -0,0 +1,178 @@
import os
import json
import streamlit as st
from dotenv import load_dotenv
from youtube_transcript_api import YouTubeTranscriptApi
from ollama_client import OllamaClient
from video_info import get_video_info
# Load environment variables
load_dotenv()
# Set page config for favicon
st.set_page_config(
page_title="YouTube Summarizer by TCSenpai",
page_icon="src/assets/subtitles.png",
)
# Custom CSS for the banner
st.markdown(
"""
<style>
.banner {
position: fixed;
top: 60px; /* Adjusted to position below Streamlit header */
left: 0;
right: 0;
z-index: 998; /* Reduced z-index to be below Streamlit elements */
display: flex;
align-items: center;
justify-content: center;
background-color: black;
padding: 0.5rem 1rem;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.banner-title {
color: white;
text-align: center;
margin: 0;
font-size: 1rem;
}
.stApp {
margin-top: 120px; /* Increased to accommodate both headers */
}
</style>
""",
unsafe_allow_html=True,
)
# Banner with icon and title
st.markdown(
"""
<div class="banner">
<h3 class="banner-title" align="center">YouTube Summarizer by TCSenpai</h3>
</div>
""",
unsafe_allow_html=True,
)
# Initialize Rich console
def get_transcript(video_id):
cache_dir = "transcript_cache"
cache_file = os.path.join(cache_dir, f"{video_id}.json")
# Create cache directory if it doesn't exist
os.makedirs(cache_dir, exist_ok=True)
# Check if transcript is cached
if os.path.exists(cache_file):
with open(cache_file, "r") as f:
return json.load(f)["transcript"]
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
full_transcript = " ".join([entry["text"] for entry in transcript])
# Cache the transcript
with open(cache_file, "w") as f:
json.dump({"transcript": full_transcript}, f)
return full_transcript
except Exception as e:
print(f"Error fetching transcript: {e}")
return None
def get_ollama_models(ollama_url):
ollama_client = OllamaClient(ollama_url, "")
models = ollama_client.get_models()
return models
def summarize_video(video_url, model, ollama_url):
video_id = video_url.split("v=")[-1]
st.write(f"Video ID: {video_id}")
with st.spinner("Fetching transcript..."):
transcript = get_transcript(video_id)
st.success("Summarizer fetched successfully!")
if not transcript:
return "Unable to fetch transcript."
ollama_client = OllamaClient(ollama_url, model)
st.success(f"Ollama client created with model: {model}")
st.warning("Starting summary generation, this might take a while...")
with st.spinner("Generating summary..."):
prompt = f"Summarize the following YouTube video transcript:\n\n{transcript}\n\nSummary:"
summary = ollama_client.generate(prompt)
st.success("Summary generated successfully!")
with st.spinner("Fetching video info..."):
video_info = get_video_info(video_id)
st.success("Video info fetched successfully!")
return f"Title: {video_info['title']}\n\nChannel: {video_info['channel']}\n\nSummary:\n{summary}"
def main():
# Remove the existing title
# st.title("YouTube Video Summarizer")
# Add input for custom Ollama URL
default_ollama_url = os.getenv("OLLAMA_URL")
ollama_url = st.text_input(
"Ollama URL (optional)",
value=default_ollama_url,
placeholder="Enter custom Ollama URL",
)
if not ollama_url:
ollama_url = default_ollama_url
# Fetch available models using the specified Ollama URL
available_models = get_ollama_models(ollama_url)
default_model = os.getenv("OLLAMA_MODEL")
if not default_model in available_models:
available_models.append(default_model)
# Create model selection dropdown
selected_model = st.selectbox(
"Select Ollama Model",
options=available_models,
index=(
available_models.index(default_model)
if default_model in available_models
else 0
),
)
video_url = st.text_input("Enter the YouTube video URL:")
# Support any video that has a valid YouTube ID
if not "https://www.youtube.com/watch?v=" or "https://youtu.be/" in video_url:
if "watch?v=" in video_url:
st.warning(
"This is not a YouTube URL. Might be a privacy-fronted embed. Trying to extract the YouTube ID..."
)
video_id = video_url.split("watch?v=")[-1]
video_url = f"https://www.youtube.com/watch?v={video_id}"
else:
st.error("Please enter a valid YouTube video URL.")
return
if st.button("Summarize"):
if video_url:
summary = summarize_video(video_url, selected_model, ollama_url)
st.subheader("Summary:")
st.write(summary)
else:
st.error("Please enter a valid YouTube video URL.")
if __name__ == "__main__":
main()

40
src/ollama_client.py Normal file
View File

@ -0,0 +1,40 @@
import requests
import os
from dotenv import load_dotenv
load_dotenv()
ollama_model = os.getenv("OLLAMA_MODEL") or "llama3.1:8b"
class OllamaClient:
def __init__(self, base_url, model):
self.base_url = base_url
self.model = model
def get_models(self):
url = f"{self.base_url}/api/tags"
response = requests.get(url)
models = []
response_json = response.json()
all_models = response_json["models"]
for model in all_models:
models.append(model["name"])
return models
def generate(self, prompt):
url = f"{self.base_url}/api/generate"
data = {
"model": self.model,
"prompt": prompt,
"stream": False,
}
response = requests.post(url, json=data)
if response.status_code == 200:
try:
return response.json()["response"]
except Exception as e:
print(response)
return response
else:
raise Exception(f"Error generating text: {response.text}")

29
src/video_info.py Normal file
View File

@ -0,0 +1,29 @@
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import os
from dotenv import load_dotenv
load_dotenv()
def get_video_info(video_id):
youtube = build("youtube", "v3", developerKey=os.getenv("YOUTUBE_API_KEY"))
try:
request = youtube.videos().list(
part="snippet",
id=video_id
)
response = request.execute()
if response["items"]:
snippet = response["items"][0]["snippet"]
return {
"title": snippet["title"],
"channel": snippet["channelTitle"]
}
else:
return {"title": "Unknown", "channel": "Unknown"}
except HttpError as e:
print(f"An HTTP error occurred: {e}")
return {"title": "Error", "channel": "Error"}

View File