mirror of
https://github.com/tcsenpai/easy-web-summarizer.git
synced 2025-06-07 02:55:20 +00:00
Merge pull request #1 from emre570/emre570-yt-summarizer
YouTube Summarizer
This commit is contained in:
commit
add9d7da95
@ -45,7 +45,7 @@ def setup_summarization_chain():
|
||||
input_variables=["text"],
|
||||
)
|
||||
|
||||
llm = ChatOllama(model="llama3", base_url="http://0.0.0.0:11434")
|
||||
llm = ChatOllama(model="llama3", base_url="http://127.0.0.1:11434")
|
||||
llm_chain = LLMChain(llm=llm, prompt=prompt_template)
|
||||
return llm_chain
|
||||
|
||||
|
22
webui.py
22
webui.py
@ -1,32 +1,34 @@
|
||||
import gradio as gr
|
||||
|
||||
from summarizer import load_document, setup_summarization_chain
|
||||
from yt_summarizer import summarize_video, check_link
|
||||
from translator import setup_translator_chain
|
||||
|
||||
|
||||
def summarize(url):
|
||||
docs = load_document(url)
|
||||
llm_chain = setup_summarization_chain()
|
||||
result = llm_chain.run(docs)
|
||||
if check_link(url):
|
||||
result = summarize_video(url)
|
||||
else:
|
||||
docs = load_document(url)
|
||||
llm_chain = setup_summarization_chain()
|
||||
result = llm_chain.run(docs)
|
||||
|
||||
return [result, gr.Button("🇹🇷 Translate ", visible=True)]
|
||||
|
||||
|
||||
def translate(text):
|
||||
llm_chain = setup_translator_chain()
|
||||
result = llm_chain.run(text)
|
||||
return result
|
||||
|
||||
|
||||
with gr.Blocks() as demo:
|
||||
gr.Markdown(
|
||||
"""# Cobanov Web Summarizer
|
||||
Easily summarize any web page with a single click."""
|
||||
"""# Cobanov Web and Video Summarizer
|
||||
Easily summarize any web page or YouTube video with a single click."""
|
||||
)
|
||||
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
url = gr.Text(label="URL", placeholder="Enter URL here")
|
||||
|
||||
btn_generate = gr.Button("Generate")
|
||||
|
||||
summary = gr.Markdown(label="Summary")
|
||||
@ -36,6 +38,7 @@ with gr.Blocks() as demo:
|
||||
[
|
||||
"https://cobanov.dev/haftalik-bulten/hafta-13",
|
||||
"https://bawolf.substack.com/p/embeddings-are-a-good-starting-point",
|
||||
"https://www.youtube.com/watch?v=4pOpQwiUVXc",
|
||||
],
|
||||
inputs=[url],
|
||||
)
|
||||
@ -51,5 +54,4 @@ with gr.Blocks() as demo:
|
||||
btn_generate.click(summarize, inputs=[url], outputs=[summary, btn_translate])
|
||||
btn_translate.click(translate, inputs=[summary], outputs=[summary])
|
||||
|
||||
|
||||
demo.launch()
|
||||
demo.launch()
|
58
yt_summarizer.py
Normal file
58
yt_summarizer.py
Normal file
@ -0,0 +1,58 @@
|
||||
from langchain_community.document_loaders import YoutubeLoader
|
||||
from langchain.text_splitter import TokenTextSplitter
|
||||
from langchain_community.chat_models import ChatOllama
|
||||
from langchain.chains.summarize import load_summarize_chain
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
import re
|
||||
|
||||
def check_link(link):
|
||||
yt_regex = r"(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)[\w-]+"
|
||||
return re.match(yt_regex, link) is not None
|
||||
|
||||
def get_transcript(video_link):
|
||||
# Get video transcript
|
||||
if check_link(video_link):
|
||||
loader = YoutubeLoader.from_youtube_url(video_link, language=["en", "en-US"])
|
||||
transcript = loader.load()
|
||||
return transcript
|
||||
return "Invalid YouTube URL."
|
||||
|
||||
def split_chunks(transcript):
|
||||
# Split the transcript into chunks
|
||||
# Llama 3 model takes up to 8192 input tokens, so I set chunk size to 7500 for leaving some space to model.
|
||||
splitter = TokenTextSplitter(chunk_size = 7500, chunk_overlap = 100)
|
||||
chunks = splitter.split_documents(transcript)
|
||||
return chunks
|
||||
|
||||
def yt_summarization_chain():
|
||||
prompt_template = PromptTemplate(
|
||||
template="""As a professional summarizer specialized in video content, create a detailed and comprehensive summary of the YouTube video transcript provided. While crafting your summary, adhere to these guidelines:
|
||||
1. Capture the essence of the video, focusing on main ideas and key details. Ensure the summary is in-depth and insightful, reflecting any narrative or instructional elements present in the video.
|
||||
|
||||
2. Exclude any redundant expressions and non-critical details to enhance the clarity and conciseness of the summary.
|
||||
|
||||
3. Base the summary strictly on the transcript provided, avoiding assumptions or additions from external sources.
|
||||
|
||||
4. Present the summary in a well-structured paragraph form, making it easy to read and understand.
|
||||
|
||||
5. Conclude with "[End of Notes, Message #X]", where "X" is the sequence number of the summarizing request, to indicate the completion of the task.
|
||||
|
||||
By adhering to this optimized prompt, you are expected to produce a clear, detailed, and audience-friendly summary that effectively conveys the core content and themes of the YouTube video.
|
||||
|
||||
"{text}"
|
||||
|
||||
DETAILED SUMMARY:""",
|
||||
input_variables=["text"],
|
||||
)
|
||||
llm = ChatOllama(model="llama3")
|
||||
summarize_chain = load_summarize_chain(llm=llm, prompt=prompt_template, verbose=True)
|
||||
return summarize_chain
|
||||
|
||||
def summarize_video(video_link):
|
||||
transcript = get_transcript(video_link)
|
||||
chunks = split_chunks(transcript)
|
||||
|
||||
sum_chain = yt_summarization_chain()
|
||||
result = sum_chain.run(chunks)
|
||||
|
||||
return result
|
Loading…
x
Reference in New Issue
Block a user