Merge pull request #1 from emre570/emre570-yt-summarizer

YouTube Summarizer
2025-06-07 19:15:20 +00:00 · 2024-05-01 19:29:15 +03:00 · 2024-05-01 19:29:15 +03:00 · add9d7da95
commit add9d7da95
parent 961b78d72d af677b26a1
3 changed files with 71 additions and 11 deletions
--- a/summarizer.py
+++ b/summarizer.py
@ -45,7 +45,7 @@ def setup_summarization_chain():
        input_variables=["text"],
    )
-    llm = ChatOllama(model="llama3", base_url="http://0.0.0.0:11434")
+    llm = ChatOllama(model="llama3", base_url="http://127.0.0.1:11434")
    llm_chain = LLMChain(llm=llm, prompt=prompt_template)
    return llm_chain
--- a/webui.py
+++ b/webui.py
@ -1,32 +1,34 @@
 import gradio as gr
 from summarizer import load_document, setup_summarization_chain
 from yt_summarizer import summarize_video, check_link
 from translator import setup_translator_chain
 def summarize(url):
    if check_link(url):
        result = summarize_video(url)
    else:
        docs = load_document(url)
        llm_chain = setup_summarization_chain()
        result = llm_chain.run(docs)
    return [result, gr.Button("🇹🇷 Translate ", visible=True)]
 def translate(text):
    llm_chain = setup_translator_chain()
    result = llm_chain.run(text)
    return result
 with gr.Blocks() as demo:
    gr.Markdown(
-        """# Cobanov Web Summarizer
+        """# Cobanov Web and Video Summarizer
-    Easily summarize any web page with a single click."""
+    Easily summarize any web page or YouTube video with a single click."""
    )
    with gr.Row():
        with gr.Column():
            url = gr.Text(label="URL", placeholder="Enter URL here")
            btn_generate = gr.Button("Generate")
            summary = gr.Markdown(label="Summary")
@ -36,6 +38,7 @@ with gr.Blocks() as demo:
        [
            "https://cobanov.dev/haftalik-bulten/hafta-13",
            "https://bawolf.substack.com/p/embeddings-are-a-good-starting-point",
            "https://www.youtube.com/watch?v=4pOpQwiUVXc",
        ],
        inputs=[url],
    )
@ -51,5 +54,4 @@ with gr.Blocks() as demo:
    btn_generate.click(summarize, inputs=[url], outputs=[summary, btn_translate])
    btn_translate.click(translate, inputs=[summary], outputs=[summary])
 demo.launch()
--- a/yt_summarizer.py
+++ b/yt_summarizer.py
@ -0,0 +1,58 @@
 from langchain_community.document_loaders import YoutubeLoader
 from langchain.text_splitter import TokenTextSplitter
 from langchain_community.chat_models import ChatOllama
 from langchain.chains.summarize import load_summarize_chain
 from langchain_core.prompts import PromptTemplate
 import re
 def check_link(link):
    yt_regex = r"(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)[\w-]+"
    return re.match(yt_regex, link) is not None
 def get_transcript(video_link):
    # Get video transcript
    if check_link(video_link):
        loader = YoutubeLoader.from_youtube_url(video_link, language=["en", "en-US"])
        transcript = loader.load()
        return transcript
    return "Invalid YouTube URL."
 def split_chunks(transcript):
    # Split the transcript into chunks
    # Llama 3 model takes up to 8192 input tokens, so I set chunk size to 7500 for leaving some space to model.
    splitter = TokenTextSplitter(chunk_size = 7500, chunk_overlap = 100)
    chunks = splitter.split_documents(transcript)
    return chunks
 def yt_summarization_chain():
    prompt_template = PromptTemplate(
        template="""As a professional summarizer specialized in video content, create a detailed and comprehensive summary of the YouTube video transcript provided. While crafting your summary, adhere to these guidelines:
            1. Capture the essence of the video, focusing on main ideas and key details. Ensure the summary is in-depth and insightful, reflecting any narrative or instructional elements present in the video.
            2. Exclude any redundant expressions and non-critical details to enhance the clarity and conciseness of the summary.
            3. Base the summary strictly on the transcript provided, avoiding assumptions or additions from external sources.
            4. Present the summary in a well-structured paragraph form, making it easy to read and understand.
            5. Conclude with "[End of Notes, Message #X]", where "X" is the sequence number of the summarizing request, to indicate the completion of the task.
        By adhering to this optimized prompt, you are expected to produce a clear, detailed, and audience-friendly summary that effectively conveys the core content and themes of the YouTube video.
        "{text}"
        DETAILED SUMMARY:""",
        input_variables=["text"],
    )
    llm = ChatOllama(model="llama3")
    summarize_chain = load_summarize_chain(llm=llm, prompt=prompt_template, verbose=True)
    return summarize_chain
 def summarize_video(video_link):
    transcript = get_transcript(video_link)
    chunks = split_chunks(transcript)
    sum_chain = yt_summarization_chain()
    result = sum_chain.run(chunks)
    return result