mirror of
https://github.com/tcsenpai/youlama.git
synced 2025-06-10 13:07:41 +00:00
First commit
This commit is contained in:
commit
eb125fcac4
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
transcript_cache/*.json
|
||||||
|
__pycache__
|
||||||
|
.env
|
13
LICENSE.md
Normal file
13
LICENSE.md
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||||
|
Version 2, December 2004
|
||||||
|
|
||||||
|
Copyright (C) 2024 TCSenpai <tcsenpai@discus.sh>
|
||||||
|
|
||||||
|
Everyone is permitted to copy and distribute verbatim or modified
|
||||||
|
copies of this license document, and changing it is allowed as long
|
||||||
|
as the name is changed.
|
||||||
|
|
||||||
|
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||||
|
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||||
|
|
||||||
|
0. You just DO WHAT THE FUCK YOU WANT TO.
|
73
README.md
Normal file
73
README.md
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
# YouTube Summarizer by TCSenpai
|
||||||
|
|
||||||
|
YouTube Summarizer is a Streamlit-based web application that allows users to generate summaries of YouTube videos using AI-powered language models.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- Fetch and cache YouTube video transcripts
|
||||||
|
- Summarize video content using Ollama AI models
|
||||||
|
- Display video information (title and channel)
|
||||||
|
- Customizable Ollama URL and model selection
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
1. Clone the repository:
|
||||||
|
```
|
||||||
|
git clone https://github.com/yourusername/youtube-summarizer.git
|
||||||
|
cd youtube-summarizer
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Install the required dependencies:
|
||||||
|
```
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Set up environment variables:
|
||||||
|
Create a `.env` file in the root directory and add the following:
|
||||||
|
```
|
||||||
|
YOUTUBE_API_KEY=your_youtube_api_key
|
||||||
|
OLLAMA_MODEL=default_model_name
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
1. Run the Streamlit app:
|
||||||
|
```
|
||||||
|
streamlit run src/main.py
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Open your web browser and navigate to the provided local URL (usually `http://localhost:8501`).
|
||||||
|
|
||||||
|
3. Enter a YouTube video URL in the input field.
|
||||||
|
|
||||||
|
4. (Optional) Customize the Ollama URL and select a different AI model.
|
||||||
|
|
||||||
|
5. Click the "Summarize" button to generate a summary of the video.
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
|
||||||
|
- Streamlit
|
||||||
|
- Pytube
|
||||||
|
- Ollama
|
||||||
|
- YouTube Data API
|
||||||
|
- Python-dotenv
|
||||||
|
|
||||||
|
|
||||||
|
## Project Structure
|
||||||
|
|
||||||
|
- `src/main.py`: Main Streamlit application
|
||||||
|
- `src/ollama_client.py`: Ollama API client for model interaction
|
||||||
|
- `src/video_info.py`: YouTube API integration for video information
|
||||||
|
- `transcript_cache/`: Directory for caching video transcripts
|
||||||
|
|
||||||
|
## Contributing
|
||||||
|
|
||||||
|
Contributions are welcome! Please feel free to submit a Pull Request.
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
WTFPL License
|
||||||
|
|
||||||
|
## Credits
|
||||||
|
|
||||||
|
Icon: "https://www.flaticon.com/free-icons/subtitles" by Freepik - Flaticon
|
3
env.example
Normal file
3
env.example
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
OLLAMA_URL=http://localhost:11434
|
||||||
|
OLLAMA_MODEL=llama3.1:8b
|
||||||
|
YOUTUBE_API_KEY=your_youtube_api_key
|
1
firefox_extension/placeholder
Normal file
1
firefox_extension/placeholder
Normal file
@ -0,0 +1 @@
|
|||||||
|
I am still working on this.
|
5
requirements.txt
Normal file
5
requirements.txt
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
streamlit==1.31.1
|
||||||
|
python-dotenv==1.0.1
|
||||||
|
youtube-transcript-api==0.6.2
|
||||||
|
requests==2.31.0
|
||||||
|
google-api-python-client==2.101.0
|
BIN
src/assets/subtitles.png
Normal file
BIN
src/assets/subtitles.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 12 KiB |
178
src/main.py
Normal file
178
src/main.py
Normal file
@ -0,0 +1,178 @@
|
|||||||
|
import os
|
||||||
|
import json
|
||||||
|
import streamlit as st
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from youtube_transcript_api import YouTubeTranscriptApi
|
||||||
|
from ollama_client import OllamaClient
|
||||||
|
from video_info import get_video_info
|
||||||
|
|
||||||
|
# Load environment variables
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Set page config for favicon
|
||||||
|
st.set_page_config(
|
||||||
|
page_title="YouTube Summarizer by TCSenpai",
|
||||||
|
page_icon="src/assets/subtitles.png",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Custom CSS for the banner
|
||||||
|
st.markdown(
|
||||||
|
"""
|
||||||
|
<style>
|
||||||
|
.banner {
|
||||||
|
position: fixed;
|
||||||
|
top: 60px; /* Adjusted to position below Streamlit header */
|
||||||
|
left: 0;
|
||||||
|
right: 0;
|
||||||
|
z-index: 998; /* Reduced z-index to be below Streamlit elements */
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
background-color: black;
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||||
|
}
|
||||||
|
.banner-title {
|
||||||
|
color: white;
|
||||||
|
text-align: center;
|
||||||
|
margin: 0;
|
||||||
|
font-size: 1rem;
|
||||||
|
}
|
||||||
|
.stApp {
|
||||||
|
margin-top: 120px; /* Increased to accommodate both headers */
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
""",
|
||||||
|
unsafe_allow_html=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Banner with icon and title
|
||||||
|
st.markdown(
|
||||||
|
"""
|
||||||
|
<div class="banner">
|
||||||
|
<h3 class="banner-title" align="center">YouTube Summarizer by TCSenpai</h3>
|
||||||
|
</div>
|
||||||
|
""",
|
||||||
|
unsafe_allow_html=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Initialize Rich console
|
||||||
|
|
||||||
|
|
||||||
|
def get_transcript(video_id):
|
||||||
|
cache_dir = "transcript_cache"
|
||||||
|
cache_file = os.path.join(cache_dir, f"{video_id}.json")
|
||||||
|
|
||||||
|
# Create cache directory if it doesn't exist
|
||||||
|
os.makedirs(cache_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# Check if transcript is cached
|
||||||
|
if os.path.exists(cache_file):
|
||||||
|
with open(cache_file, "r") as f:
|
||||||
|
return json.load(f)["transcript"]
|
||||||
|
|
||||||
|
try:
|
||||||
|
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
||||||
|
full_transcript = " ".join([entry["text"] for entry in transcript])
|
||||||
|
|
||||||
|
# Cache the transcript
|
||||||
|
with open(cache_file, "w") as f:
|
||||||
|
json.dump({"transcript": full_transcript}, f)
|
||||||
|
|
||||||
|
return full_transcript
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error fetching transcript: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_ollama_models(ollama_url):
|
||||||
|
ollama_client = OllamaClient(ollama_url, "")
|
||||||
|
models = ollama_client.get_models()
|
||||||
|
return models
|
||||||
|
|
||||||
|
|
||||||
|
def summarize_video(video_url, model, ollama_url):
|
||||||
|
video_id = video_url.split("v=")[-1]
|
||||||
|
st.write(f"Video ID: {video_id}")
|
||||||
|
|
||||||
|
with st.spinner("Fetching transcript..."):
|
||||||
|
transcript = get_transcript(video_id)
|
||||||
|
st.success("Summarizer fetched successfully!")
|
||||||
|
|
||||||
|
if not transcript:
|
||||||
|
return "Unable to fetch transcript."
|
||||||
|
|
||||||
|
ollama_client = OllamaClient(ollama_url, model)
|
||||||
|
st.success(f"Ollama client created with model: {model}")
|
||||||
|
|
||||||
|
st.warning("Starting summary generation, this might take a while...")
|
||||||
|
with st.spinner("Generating summary..."):
|
||||||
|
prompt = f"Summarize the following YouTube video transcript:\n\n{transcript}\n\nSummary:"
|
||||||
|
summary = ollama_client.generate(prompt)
|
||||||
|
st.success("Summary generated successfully!")
|
||||||
|
|
||||||
|
with st.spinner("Fetching video info..."):
|
||||||
|
video_info = get_video_info(video_id)
|
||||||
|
st.success("Video info fetched successfully!")
|
||||||
|
|
||||||
|
return f"Title: {video_info['title']}\n\nChannel: {video_info['channel']}\n\nSummary:\n{summary}"
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Remove the existing title
|
||||||
|
# st.title("YouTube Video Summarizer")
|
||||||
|
|
||||||
|
# Add input for custom Ollama URL
|
||||||
|
default_ollama_url = os.getenv("OLLAMA_URL")
|
||||||
|
ollama_url = st.text_input(
|
||||||
|
"Ollama URL (optional)",
|
||||||
|
value=default_ollama_url,
|
||||||
|
placeholder="Enter custom Ollama URL",
|
||||||
|
)
|
||||||
|
|
||||||
|
if not ollama_url:
|
||||||
|
ollama_url = default_ollama_url
|
||||||
|
|
||||||
|
# Fetch available models using the specified Ollama URL
|
||||||
|
available_models = get_ollama_models(ollama_url)
|
||||||
|
default_model = os.getenv("OLLAMA_MODEL")
|
||||||
|
|
||||||
|
if not default_model in available_models:
|
||||||
|
available_models.append(default_model)
|
||||||
|
|
||||||
|
# Create model selection dropdown
|
||||||
|
selected_model = st.selectbox(
|
||||||
|
"Select Ollama Model",
|
||||||
|
options=available_models,
|
||||||
|
index=(
|
||||||
|
available_models.index(default_model)
|
||||||
|
if default_model in available_models
|
||||||
|
else 0
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
video_url = st.text_input("Enter the YouTube video URL:")
|
||||||
|
|
||||||
|
# Support any video that has a valid YouTube ID
|
||||||
|
if not "https://www.youtube.com/watch?v=" or "https://youtu.be/" in video_url:
|
||||||
|
if "watch?v=" in video_url:
|
||||||
|
st.warning(
|
||||||
|
"This is not a YouTube URL. Might be a privacy-fronted embed. Trying to extract the YouTube ID..."
|
||||||
|
)
|
||||||
|
video_id = video_url.split("watch?v=")[-1]
|
||||||
|
video_url = f"https://www.youtube.com/watch?v={video_id}"
|
||||||
|
else:
|
||||||
|
st.error("Please enter a valid YouTube video URL.")
|
||||||
|
return
|
||||||
|
|
||||||
|
if st.button("Summarize"):
|
||||||
|
if video_url:
|
||||||
|
summary = summarize_video(video_url, selected_model, ollama_url)
|
||||||
|
st.subheader("Summary:")
|
||||||
|
st.write(summary)
|
||||||
|
else:
|
||||||
|
st.error("Please enter a valid YouTube video URL.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
40
src/ollama_client.py
Normal file
40
src/ollama_client.py
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
import requests
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
ollama_model = os.getenv("OLLAMA_MODEL") or "llama3.1:8b"
|
||||||
|
|
||||||
|
|
||||||
|
class OllamaClient:
|
||||||
|
def __init__(self, base_url, model):
|
||||||
|
self.base_url = base_url
|
||||||
|
self.model = model
|
||||||
|
|
||||||
|
def get_models(self):
|
||||||
|
url = f"{self.base_url}/api/tags"
|
||||||
|
response = requests.get(url)
|
||||||
|
models = []
|
||||||
|
response_json = response.json()
|
||||||
|
all_models = response_json["models"]
|
||||||
|
for model in all_models:
|
||||||
|
models.append(model["name"])
|
||||||
|
return models
|
||||||
|
|
||||||
|
def generate(self, prompt):
|
||||||
|
url = f"{self.base_url}/api/generate"
|
||||||
|
data = {
|
||||||
|
"model": self.model,
|
||||||
|
"prompt": prompt,
|
||||||
|
"stream": False,
|
||||||
|
}
|
||||||
|
response = requests.post(url, json=data)
|
||||||
|
if response.status_code == 200:
|
||||||
|
try:
|
||||||
|
return response.json()["response"]
|
||||||
|
except Exception as e:
|
||||||
|
print(response)
|
||||||
|
return response
|
||||||
|
else:
|
||||||
|
raise Exception(f"Error generating text: {response.text}")
|
29
src/video_info.py
Normal file
29
src/video_info.py
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
from googleapiclient.discovery import build
|
||||||
|
from googleapiclient.errors import HttpError
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
def get_video_info(video_id):
|
||||||
|
youtube = build("youtube", "v3", developerKey=os.getenv("YOUTUBE_API_KEY"))
|
||||||
|
|
||||||
|
try:
|
||||||
|
request = youtube.videos().list(
|
||||||
|
part="snippet",
|
||||||
|
id=video_id
|
||||||
|
)
|
||||||
|
response = request.execute()
|
||||||
|
|
||||||
|
if response["items"]:
|
||||||
|
snippet = response["items"][0]["snippet"]
|
||||||
|
return {
|
||||||
|
"title": snippet["title"],
|
||||||
|
"channel": snippet["channelTitle"]
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {"title": "Unknown", "channel": "Unknown"}
|
||||||
|
|
||||||
|
except HttpError as e:
|
||||||
|
print(f"An HTTP error occurred: {e}")
|
||||||
|
return {"title": "Error", "channel": "Error"}
|
0
transcript_cache/placeholder
Normal file
0
transcript_cache/placeholder
Normal file
Loading…
x
Reference in New Issue
Block a user