mirror of
https://github.com/tcsenpai/youlama.git
synced 2025-06-05 18:55:39 +00:00
First commit
This commit is contained in:
commit
eb125fcac4
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
transcript_cache/*.json
|
||||
__pycache__
|
||||
.env
|
13
LICENSE.md
Normal file
13
LICENSE.md
Normal file
@ -0,0 +1,13 @@
|
||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||
Version 2, December 2004
|
||||
|
||||
Copyright (C) 2024 TCSenpai <tcsenpai@discus.sh>
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim or modified
|
||||
copies of this license document, and changing it is allowed as long
|
||||
as the name is changed.
|
||||
|
||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. You just DO WHAT THE FUCK YOU WANT TO.
|
73
README.md
Normal file
73
README.md
Normal file
@ -0,0 +1,73 @@
|
||||
# YouTube Summarizer by TCSenpai
|
||||
|
||||
YouTube Summarizer is a Streamlit-based web application that allows users to generate summaries of YouTube videos using AI-powered language models.
|
||||
|
||||
## Features
|
||||
|
||||
- Fetch and cache YouTube video transcripts
|
||||
- Summarize video content using Ollama AI models
|
||||
- Display video information (title and channel)
|
||||
- Customizable Ollama URL and model selection
|
||||
|
||||
## Installation
|
||||
|
||||
1. Clone the repository:
|
||||
```
|
||||
git clone https://github.com/yourusername/youtube-summarizer.git
|
||||
cd youtube-summarizer
|
||||
```
|
||||
|
||||
2. Install the required dependencies:
|
||||
```
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
3. Set up environment variables:
|
||||
Create a `.env` file in the root directory and add the following:
|
||||
```
|
||||
YOUTUBE_API_KEY=your_youtube_api_key
|
||||
OLLAMA_MODEL=default_model_name
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
1. Run the Streamlit app:
|
||||
```
|
||||
streamlit run src/main.py
|
||||
```
|
||||
|
||||
2. Open your web browser and navigate to the provided local URL (usually `http://localhost:8501`).
|
||||
|
||||
3. Enter a YouTube video URL in the input field.
|
||||
|
||||
4. (Optional) Customize the Ollama URL and select a different AI model.
|
||||
|
||||
5. Click the "Summarize" button to generate a summary of the video.
|
||||
|
||||
## Dependencies
|
||||
|
||||
- Streamlit
|
||||
- Pytube
|
||||
- Ollama
|
||||
- YouTube Data API
|
||||
- Python-dotenv
|
||||
|
||||
|
||||
## Project Structure
|
||||
|
||||
- `src/main.py`: Main Streamlit application
|
||||
- `src/ollama_client.py`: Ollama API client for model interaction
|
||||
- `src/video_info.py`: YouTube API integration for video information
|
||||
- `transcript_cache/`: Directory for caching video transcripts
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions are welcome! Please feel free to submit a Pull Request.
|
||||
|
||||
## License
|
||||
|
||||
WTFPL License
|
||||
|
||||
## Credits
|
||||
|
||||
Icon: "https://www.flaticon.com/free-icons/subtitles" by Freepik - Flaticon
|
3
env.example
Normal file
3
env.example
Normal file
@ -0,0 +1,3 @@
|
||||
OLLAMA_URL=http://localhost:11434
|
||||
OLLAMA_MODEL=llama3.1:8b
|
||||
YOUTUBE_API_KEY=your_youtube_api_key
|
1
firefox_extension/placeholder
Normal file
1
firefox_extension/placeholder
Normal file
@ -0,0 +1 @@
|
||||
I am still working on this.
|
5
requirements.txt
Normal file
5
requirements.txt
Normal file
@ -0,0 +1,5 @@
|
||||
streamlit==1.31.1
|
||||
python-dotenv==1.0.1
|
||||
youtube-transcript-api==0.6.2
|
||||
requests==2.31.0
|
||||
google-api-python-client==2.101.0
|
BIN
src/assets/subtitles.png
Normal file
BIN
src/assets/subtitles.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 12 KiB |
178
src/main.py
Normal file
178
src/main.py
Normal file
@ -0,0 +1,178 @@
|
||||
import os
|
||||
import json
|
||||
import streamlit as st
|
||||
from dotenv import load_dotenv
|
||||
from youtube_transcript_api import YouTubeTranscriptApi
|
||||
from ollama_client import OllamaClient
|
||||
from video_info import get_video_info
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Set page config for favicon
|
||||
st.set_page_config(
|
||||
page_title="YouTube Summarizer by TCSenpai",
|
||||
page_icon="src/assets/subtitles.png",
|
||||
)
|
||||
|
||||
# Custom CSS for the banner
|
||||
st.markdown(
|
||||
"""
|
||||
<style>
|
||||
.banner {
|
||||
position: fixed;
|
||||
top: 60px; /* Adjusted to position below Streamlit header */
|
||||
left: 0;
|
||||
right: 0;
|
||||
z-index: 998; /* Reduced z-index to be below Streamlit elements */
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
background-color: black;
|
||||
padding: 0.5rem 1rem;
|
||||
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||
}
|
||||
.banner-title {
|
||||
color: white;
|
||||
text-align: center;
|
||||
margin: 0;
|
||||
font-size: 1rem;
|
||||
}
|
||||
.stApp {
|
||||
margin-top: 120px; /* Increased to accommodate both headers */
|
||||
}
|
||||
</style>
|
||||
""",
|
||||
unsafe_allow_html=True,
|
||||
)
|
||||
|
||||
# Banner with icon and title
|
||||
st.markdown(
|
||||
"""
|
||||
<div class="banner">
|
||||
<h3 class="banner-title" align="center">YouTube Summarizer by TCSenpai</h3>
|
||||
</div>
|
||||
""",
|
||||
unsafe_allow_html=True,
|
||||
)
|
||||
|
||||
# Initialize Rich console
|
||||
|
||||
|
||||
def get_transcript(video_id):
|
||||
cache_dir = "transcript_cache"
|
||||
cache_file = os.path.join(cache_dir, f"{video_id}.json")
|
||||
|
||||
# Create cache directory if it doesn't exist
|
||||
os.makedirs(cache_dir, exist_ok=True)
|
||||
|
||||
# Check if transcript is cached
|
||||
if os.path.exists(cache_file):
|
||||
with open(cache_file, "r") as f:
|
||||
return json.load(f)["transcript"]
|
||||
|
||||
try:
|
||||
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
||||
full_transcript = " ".join([entry["text"] for entry in transcript])
|
||||
|
||||
# Cache the transcript
|
||||
with open(cache_file, "w") as f:
|
||||
json.dump({"transcript": full_transcript}, f)
|
||||
|
||||
return full_transcript
|
||||
except Exception as e:
|
||||
print(f"Error fetching transcript: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def get_ollama_models(ollama_url):
|
||||
ollama_client = OllamaClient(ollama_url, "")
|
||||
models = ollama_client.get_models()
|
||||
return models
|
||||
|
||||
|
||||
def summarize_video(video_url, model, ollama_url):
|
||||
video_id = video_url.split("v=")[-1]
|
||||
st.write(f"Video ID: {video_id}")
|
||||
|
||||
with st.spinner("Fetching transcript..."):
|
||||
transcript = get_transcript(video_id)
|
||||
st.success("Summarizer fetched successfully!")
|
||||
|
||||
if not transcript:
|
||||
return "Unable to fetch transcript."
|
||||
|
||||
ollama_client = OllamaClient(ollama_url, model)
|
||||
st.success(f"Ollama client created with model: {model}")
|
||||
|
||||
st.warning("Starting summary generation, this might take a while...")
|
||||
with st.spinner("Generating summary..."):
|
||||
prompt = f"Summarize the following YouTube video transcript:\n\n{transcript}\n\nSummary:"
|
||||
summary = ollama_client.generate(prompt)
|
||||
st.success("Summary generated successfully!")
|
||||
|
||||
with st.spinner("Fetching video info..."):
|
||||
video_info = get_video_info(video_id)
|
||||
st.success("Video info fetched successfully!")
|
||||
|
||||
return f"Title: {video_info['title']}\n\nChannel: {video_info['channel']}\n\nSummary:\n{summary}"
|
||||
|
||||
|
||||
def main():
|
||||
# Remove the existing title
|
||||
# st.title("YouTube Video Summarizer")
|
||||
|
||||
# Add input for custom Ollama URL
|
||||
default_ollama_url = os.getenv("OLLAMA_URL")
|
||||
ollama_url = st.text_input(
|
||||
"Ollama URL (optional)",
|
||||
value=default_ollama_url,
|
||||
placeholder="Enter custom Ollama URL",
|
||||
)
|
||||
|
||||
if not ollama_url:
|
||||
ollama_url = default_ollama_url
|
||||
|
||||
# Fetch available models using the specified Ollama URL
|
||||
available_models = get_ollama_models(ollama_url)
|
||||
default_model = os.getenv("OLLAMA_MODEL")
|
||||
|
||||
if not default_model in available_models:
|
||||
available_models.append(default_model)
|
||||
|
||||
# Create model selection dropdown
|
||||
selected_model = st.selectbox(
|
||||
"Select Ollama Model",
|
||||
options=available_models,
|
||||
index=(
|
||||
available_models.index(default_model)
|
||||
if default_model in available_models
|
||||
else 0
|
||||
),
|
||||
)
|
||||
|
||||
video_url = st.text_input("Enter the YouTube video URL:")
|
||||
|
||||
# Support any video that has a valid YouTube ID
|
||||
if not "https://www.youtube.com/watch?v=" or "https://youtu.be/" in video_url:
|
||||
if "watch?v=" in video_url:
|
||||
st.warning(
|
||||
"This is not a YouTube URL. Might be a privacy-fronted embed. Trying to extract the YouTube ID..."
|
||||
)
|
||||
video_id = video_url.split("watch?v=")[-1]
|
||||
video_url = f"https://www.youtube.com/watch?v={video_id}"
|
||||
else:
|
||||
st.error("Please enter a valid YouTube video URL.")
|
||||
return
|
||||
|
||||
if st.button("Summarize"):
|
||||
if video_url:
|
||||
summary = summarize_video(video_url, selected_model, ollama_url)
|
||||
st.subheader("Summary:")
|
||||
st.write(summary)
|
||||
else:
|
||||
st.error("Please enter a valid YouTube video URL.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
40
src/ollama_client.py
Normal file
40
src/ollama_client.py
Normal file
@ -0,0 +1,40 @@
|
||||
import requests
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
ollama_model = os.getenv("OLLAMA_MODEL") or "llama3.1:8b"
|
||||
|
||||
|
||||
class OllamaClient:
|
||||
def __init__(self, base_url, model):
|
||||
self.base_url = base_url
|
||||
self.model = model
|
||||
|
||||
def get_models(self):
|
||||
url = f"{self.base_url}/api/tags"
|
||||
response = requests.get(url)
|
||||
models = []
|
||||
response_json = response.json()
|
||||
all_models = response_json["models"]
|
||||
for model in all_models:
|
||||
models.append(model["name"])
|
||||
return models
|
||||
|
||||
def generate(self, prompt):
|
||||
url = f"{self.base_url}/api/generate"
|
||||
data = {
|
||||
"model": self.model,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
}
|
||||
response = requests.post(url, json=data)
|
||||
if response.status_code == 200:
|
||||
try:
|
||||
return response.json()["response"]
|
||||
except Exception as e:
|
||||
print(response)
|
||||
return response
|
||||
else:
|
||||
raise Exception(f"Error generating text: {response.text}")
|
29
src/video_info.py
Normal file
29
src/video_info.py
Normal file
@ -0,0 +1,29 @@
|
||||
from googleapiclient.discovery import build
|
||||
from googleapiclient.errors import HttpError
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
def get_video_info(video_id):
|
||||
youtube = build("youtube", "v3", developerKey=os.getenv("YOUTUBE_API_KEY"))
|
||||
|
||||
try:
|
||||
request = youtube.videos().list(
|
||||
part="snippet",
|
||||
id=video_id
|
||||
)
|
||||
response = request.execute()
|
||||
|
||||
if response["items"]:
|
||||
snippet = response["items"][0]["snippet"]
|
||||
return {
|
||||
"title": snippet["title"],
|
||||
"channel": snippet["channelTitle"]
|
||||
}
|
||||
else:
|
||||
return {"title": "Unknown", "channel": "Unknown"}
|
||||
|
||||
except HttpError as e:
|
||||
print(f"An HTTP error occurred: {e}")
|
||||
return {"title": "Error", "channel": "Error"}
|
0
transcript_cache/placeholder
Normal file
0
transcript_cache/placeholder
Normal file
Loading…
x
Reference in New Issue
Block a user