diff --git a/summarizer.py b/summarizer.py index 9dbcf44..0489919 100644 --- a/summarizer.py +++ b/summarizer.py @@ -45,7 +45,7 @@ def setup_summarization_chain(): input_variables=["text"], ) - llm = ChatOllama(model="llama3", base_url="http://0.0.0.0:11434") + llm = ChatOllama(model="llama3", base_url="http://127.0.0.1:11434") llm_chain = LLMChain(llm=llm, prompt=prompt_template) return llm_chain diff --git a/webui.py b/webui.py index 60f8c52..c74146a 100644 --- a/webui.py +++ b/webui.py @@ -1,32 +1,34 @@ import gradio as gr from summarizer import load_document, setup_summarization_chain +from yt_summarizer import summarize_video, check_link from translator import setup_translator_chain - def summarize(url): - docs = load_document(url) - llm_chain = setup_summarization_chain() - result = llm_chain.run(docs) + if check_link(url): + result = summarize_video(url) + else: + docs = load_document(url) + llm_chain = setup_summarization_chain() + result = llm_chain.run(docs) return [result, gr.Button("🇹🇷 Translate ", visible=True)] - def translate(text): llm_chain = setup_translator_chain() result = llm_chain.run(text) return result - with gr.Blocks() as demo: gr.Markdown( - """# Cobanov Web Summarizer - Easily summarize any web page with a single click.""" + """# Cobanov Web and Video Summarizer + Easily summarize any web page or YouTube video with a single click.""" ) with gr.Row(): with gr.Column(): url = gr.Text(label="URL", placeholder="Enter URL here") + btn_generate = gr.Button("Generate") summary = gr.Markdown(label="Summary") @@ -36,6 +38,7 @@ with gr.Blocks() as demo: [ "https://cobanov.dev/haftalik-bulten/hafta-13", "https://bawolf.substack.com/p/embeddings-are-a-good-starting-point", + "https://www.youtube.com/watch?v=4pOpQwiUVXc", ], inputs=[url], ) @@ -51,5 +54,4 @@ with gr.Blocks() as demo: btn_generate.click(summarize, inputs=[url], outputs=[summary, btn_translate]) btn_translate.click(translate, inputs=[summary], outputs=[summary]) - -demo.launch() +demo.launch() \ No newline at end of file diff --git a/yt_summarizer.py b/yt_summarizer.py new file mode 100644 index 0000000..ae74759 --- /dev/null +++ b/yt_summarizer.py @@ -0,0 +1,58 @@ +from langchain_community.document_loaders import YoutubeLoader +from langchain.text_splitter import TokenTextSplitter +from langchain_community.chat_models import ChatOllama +from langchain.chains.summarize import load_summarize_chain +from langchain_core.prompts import PromptTemplate +import re + +def check_link(link): + yt_regex = r"(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)[\w-]+" + return re.match(yt_regex, link) is not None + +def get_transcript(video_link): + # Get video transcript + if check_link(video_link): + loader = YoutubeLoader.from_youtube_url(video_link, language=["en", "en-US"]) + transcript = loader.load() + return transcript + return "Invalid YouTube URL." + +def split_chunks(transcript): + # Split the transcript into chunks + # Llama 3 model takes up to 8192 input tokens, so I set chunk size to 7500 for leaving some space to model. + splitter = TokenTextSplitter(chunk_size = 7500, chunk_overlap = 100) + chunks = splitter.split_documents(transcript) + return chunks + +def yt_summarization_chain(): + prompt_template = PromptTemplate( + template="""As a professional summarizer specialized in video content, create a detailed and comprehensive summary of the YouTube video transcript provided. While crafting your summary, adhere to these guidelines: + 1. Capture the essence of the video, focusing on main ideas and key details. Ensure the summary is in-depth and insightful, reflecting any narrative or instructional elements present in the video. + + 2. Exclude any redundant expressions and non-critical details to enhance the clarity and conciseness of the summary. + + 3. Base the summary strictly on the transcript provided, avoiding assumptions or additions from external sources. + + 4. Present the summary in a well-structured paragraph form, making it easy to read and understand. + + 5. Conclude with "[End of Notes, Message #X]", where "X" is the sequence number of the summarizing request, to indicate the completion of the task. + + By adhering to this optimized prompt, you are expected to produce a clear, detailed, and audience-friendly summary that effectively conveys the core content and themes of the YouTube video. + + "{text}" + + DETAILED SUMMARY:""", + input_variables=["text"], + ) + llm = ChatOllama(model="llama3") + summarize_chain = load_summarize_chain(llm=llm, prompt=prompt_template, verbose=True) + return summarize_chain + +def summarize_video(video_link): + transcript = get_transcript(video_link) + chunks = split_chunks(transcript) + + sum_chain = yt_summarization_chain() + result = sum_chain.run(chunks) + + return result \ No newline at end of file