From 5cf4681a5f990bca442e8d82aa5b806eb998ad8a Mon Sep 17 00:00:00 2001
From: Cemal Emre Albayrak <70805503+emre570@users.noreply.github.com>
Date: Sat, 27 Apr 2024 16:47:24 +0300
Subject: [PATCH 1/5] Initial Commit

---
 yt_summarizer.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 yt_summarizer.py

diff --git a/yt_summarizer.py b/yt_summarizer.py
new file mode 100644
index 0000000..35af3e6
--- /dev/null
+++ b/yt_summarizer.py
@@ -0,0 +1,25 @@
+from langchain_community.document_loaders import YoutubeLoader
+from langchain.text_splitter import TokenTextSplitter
+from langchain_community.chat_models import ChatOllama
+from langchain.chains.summarize import load_summarize_chain
+from dotenv import load_dotenv
+
+# Get video transcript
+videos = ["https://www.youtube.com/watch?v=bYjQ9fzinT8", "https://www.youtube.com/watch?v=QCg0axyXxs4"]
+
+loader = YoutubeLoader.from_youtube_url(videos[0], language=["en", "en-US"])
+transcript = loader.load()
+#print(transcript)
+
+# Split the transcript into chunks
+# Llama 3 model takes up to 8192 input tokens, so I set chunk size to 7500 for leaving some space to model.
+splitter = TokenTextSplitter(chunk_size = 7500, chunk_overlap = 100)
+chunks = splitter.split_documents(transcript)
+#print(chunks)
+#print("chunks: ", len(chunks))
+
+llm = ChatOllama(model="llama3")
+summarize_chain = load_summarize_chain(llm=llm, chain_type="refine", verbose=True)
+
+summary = summarize_chain.run(chunks)
+print(summary)
\ No newline at end of file

From 41d866530023824525592e1ba84fbb71ff126d56 Mon Sep 17 00:00:00 2001
From: Cemal Emre Albayrak <70805503+emre570@users.noreply.github.com>
Date: Sat, 27 Apr 2024 18:19:01 +0300
Subject: [PATCH 2/5] Full Working Code

---
 yt_summarizer.py | 47 +++++++++++++++++++++++++++++++----------------
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/yt_summarizer.py b/yt_summarizer.py
index 35af3e6..78feaba 100644
--- a/yt_summarizer.py
+++ b/yt_summarizer.py
@@ -2,24 +2,39 @@ from langchain_community.document_loaders import YoutubeLoader
 from langchain.text_splitter import TokenTextSplitter
 from langchain_community.chat_models import ChatOllama
 from langchain.chains.summarize import load_summarize_chain
-from dotenv import load_dotenv
+import re
 
-# Get video transcript
-videos = ["https://www.youtube.com/watch?v=bYjQ9fzinT8", "https://www.youtube.com/watch?v=QCg0axyXxs4"]
+def check_link(link):
+    yt_regex = r"(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)[\w-]+"
+    return re.match(yt_regex, link) is not None
 
-loader = YoutubeLoader.from_youtube_url(videos[0], language=["en", "en-US"])
-transcript = loader.load()
-#print(transcript)
+def get_transcript(video_link):
+    # Get video transcript
+    if check_link(video_link):
+        loader = YoutubeLoader.from_youtube_url(video_link, language=["en", "en-US"])
+        transcript = loader.load()
+        return transcript
+    return "Invalid YouTube URL."
 
-# Split the transcript into chunks
-# Llama 3 model takes up to 8192 input tokens, so I set chunk size to 7500 for leaving some space to model.
-splitter = TokenTextSplitter(chunk_size = 7500, chunk_overlap = 100)
-chunks = splitter.split_documents(transcript)
-#print(chunks)
-#print("chunks: ", len(chunks))
+def split_chunks(transcript):
+    # Split the transcript into chunks
+    # Llama 3 model takes up to 8192 input tokens, so I set chunk size to 7500 for leaving some space to model.
+    splitter = TokenTextSplitter(chunk_size = 7500, chunk_overlap = 100)
+    chunks = splitter.split_documents(transcript)
+    return chunks
 
-llm = ChatOllama(model="llama3")
-summarize_chain = load_summarize_chain(llm=llm, chain_type="refine", verbose=True)
+def yt_summarization_chain():
+    llm = ChatOllama(model="llama3")
+    summarize_chain = load_summarize_chain(llm=llm, chain_type="refine", verbose=True)
+    return summarize_chain
 
-summary = summarize_chain.run(chunks)
-print(summary)
\ No newline at end of file
+if __name__ == "__main__":
+    videos = ["https://www.youtube.com/watch?v=bYjQ9fzinT8", "https://www.youtube.com/watch?v=QCg0axyXxs4"]
+
+    transcript = get_transcript(videos[0])
+    chunks = split_chunks(transcript)
+
+    sum_chain = yt_summarization_chain()
+    result = sum_chain.run(chunks)
+    
+    print(result)
\ No newline at end of file

From e9a7d7c7851681656d1907153111dee702e8b959 Mon Sep 17 00:00:00 2001
From: Cemal Emre Albayrak <70805503+emre570@users.noreply.github.com>
Date: Sat, 27 Apr 2024 18:27:06 +0300
Subject: [PATCH 3/5] summarize_video main function

---
 yt_summarizer.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/yt_summarizer.py b/yt_summarizer.py
index 78feaba..6b98640 100644
--- a/yt_summarizer.py
+++ b/yt_summarizer.py
@@ -28,13 +28,14 @@ def yt_summarization_chain():
     summarize_chain = load_summarize_chain(llm=llm, chain_type="refine", verbose=True)
     return summarize_chain
 
-if __name__ == "__main__":
-    videos = ["https://www.youtube.com/watch?v=bYjQ9fzinT8", "https://www.youtube.com/watch?v=QCg0axyXxs4"]
-
-    transcript = get_transcript(videos[0])
+def summarize_video(video_link):
+    transcript = get_transcript(video_link)
     chunks = split_chunks(transcript)
 
     sum_chain = yt_summarization_chain()
     result = sum_chain.run(chunks)
-    
-    print(result)
\ No newline at end of file
+
+    return result
+
+if __name__ == "__main__":
+    #summarize_video()
\ No newline at end of file

From eb95fc41a5f6ad7e9118de68c98f535ddbbfca80 Mon Sep 17 00:00:00 2001
From: Cemal Emre Albayrak <70805503+emre570@users.noreply.github.com>
Date: Mon, 29 Apr 2024 15:08:29 +0300
Subject: [PATCH 4/5] Added yt_summarizer to webui

---
 summarizer.py    |  2 +-
 webui.py         | 38 +++++++++++++++++++++++++++++++-------
 yt_summarizer.py |  5 +----
 3 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/summarizer.py b/summarizer.py
index 9dbcf44..0489919 100644
--- a/summarizer.py
+++ b/summarizer.py
@@ -45,7 +45,7 @@ def setup_summarization_chain():
         input_variables=["text"],
     )
 
-    llm = ChatOllama(model="llama3", base_url="http://0.0.0.0:11434")
+    llm = ChatOllama(model="llama3", base_url="http://127.0.0.1:11434")
     llm_chain = LLMChain(llm=llm, prompt=prompt_template)
     return llm_chain
 
diff --git a/webui.py b/webui.py
index 60f8c52..1747a8b 100644
--- a/webui.py
+++ b/webui.py
@@ -1,9 +1,9 @@
 import gradio as gr
 
 from summarizer import load_document, setup_summarization_chain
+from yt_summarizer import summarize_video
 from translator import setup_translator_chain
 
-
 def summarize(url):
     docs = load_document(url)
     llm_chain = setup_summarization_chain()
@@ -11,26 +11,49 @@ def summarize(url):
 
     return [result, gr.Button("🇹🇷 Translate ", visible=True)]
 
-
 def translate(text):
     llm_chain = setup_translator_chain()
     result = llm_chain.run(text)
     return result
 
+def update_ui(content_type):
+    if content_type == "Web":
+        # Set visibility for Web URL input to True and Video URL input to False
+        url_visibility = ""  # Empty string signifies no change for a text box.
+        video_url_visibility = ""  # Set video URL field to empty since it should be hidden.
+        btn_text = "Generate Summary"  # Button text for generating summary from a web URL
+    elif content_type == "Video":
+        # Set visibility for Web URL input to False and Video URL input to True
+        url_visibility = ""  # Set web URL field to empty since it should be hidden.
+        video_url_visibility = ""  # Empty string signifies no change for a text box.
+        btn_text = "Summarize Video"  # Button text for summarizing video content
+    else:
+        # Hide both inputs (unlikely to need this else, but just in case)
+        url_visibility = ""
+        video_url_visibility = ""
+        btn_text = ""  # Clear the button text
+
+    return url_visibility, video_url_visibility, btn_text
 
 with gr.Blocks() as demo:
     gr.Markdown(
-        """# Cobanov Web Summarizer
-    Easily summarize any web page with a single click."""
+        """# Cobanov Web and Video Summarizer
+    Easily summarize any web page or YouTube video with a single click."""
     )
 
     with gr.Row():
         with gr.Column():
+            content_type = gr.Radio(choices=["Web", "Video"], label="Select Content Type", value="Web")
+            
             url = gr.Text(label="URL", placeholder="Enter URL here")
+            video_url = gr.Text(label="YouTube Video URL", placeholder="Enter YouTube video URL here", visible=False)
+
             btn_generate = gr.Button("Generate")
 
             summary = gr.Markdown(label="Summary")
             btn_translate = gr.Button(visible=False)
+        
+        content_type.change(update_ui, inputs=[content_type], outputs=[url, video_url, btn_generate])
 
     gr.Examples(
         [
@@ -48,8 +71,9 @@ with gr.Blocks() as demo:
         Repo: github.com/mertcobanov/easy-web-summarizer
         ```"""
     )
-    btn_generate.click(summarize, inputs=[url], outputs=[summary, btn_translate])
+    btn_generate.click(lambda url, video_url: summarize(url) if url else summarize_video(video_url), 
+                     inputs=[url, video_url], 
+                     outputs=[summary, btn_translate])
     btn_translate.click(translate, inputs=[summary], outputs=[summary])
 
-
-demo.launch()
+demo.launch()
\ No newline at end of file
diff --git a/yt_summarizer.py b/yt_summarizer.py
index 6b98640..428524b 100644
--- a/yt_summarizer.py
+++ b/yt_summarizer.py
@@ -35,7 +35,4 @@ def summarize_video(video_link):
     sum_chain = yt_summarization_chain()
     result = sum_chain.run(chunks)
 
-    return result
-
-if __name__ == "__main__":
-    #summarize_video()
\ No newline at end of file
+    return result[0]
\ No newline at end of file

From af677b26a180f4048cb483029986050933c00bb6 Mon Sep 17 00:00:00 2001
From: Cemal Emre Albayrak <70805503+emre570@users.noreply.github.com>
Date: Wed, 1 May 2024 12:43:03 +0300
Subject: [PATCH 5/5] Improved Code

Added prompt template to chain
Added link check to Web UI
---
 webui.py         | 40 +++++++++-------------------------------
 yt_summarizer.py | 24 ++++++++++++++++++++++--
 2 files changed, 31 insertions(+), 33 deletions(-)

diff --git a/webui.py b/webui.py
index 1747a8b..c74146a 100644
--- a/webui.py
+++ b/webui.py
@@ -1,13 +1,16 @@
 import gradio as gr
 
 from summarizer import load_document, setup_summarization_chain
-from yt_summarizer import summarize_video
+from yt_summarizer import summarize_video, check_link
 from translator import setup_translator_chain
 
 def summarize(url):
-    docs = load_document(url)
-    llm_chain = setup_summarization_chain()
-    result = llm_chain.run(docs)
+    if check_link(url):
+        result = summarize_video(url)
+    else:
+        docs = load_document(url)
+        llm_chain = setup_summarization_chain()
+        result = llm_chain.run(docs)
 
     return [result, gr.Button("🇹🇷 Translate ", visible=True)]
 
@@ -16,25 +19,6 @@ def translate(text):
     result = llm_chain.run(text)
     return result
 
-def update_ui(content_type):
-    if content_type == "Web":
-        # Set visibility for Web URL input to True and Video URL input to False
-        url_visibility = ""  # Empty string signifies no change for a text box.
-        video_url_visibility = ""  # Set video URL field to empty since it should be hidden.
-        btn_text = "Generate Summary"  # Button text for generating summary from a web URL
-    elif content_type == "Video":
-        # Set visibility for Web URL input to False and Video URL input to True
-        url_visibility = ""  # Set web URL field to empty since it should be hidden.
-        video_url_visibility = ""  # Empty string signifies no change for a text box.
-        btn_text = "Summarize Video"  # Button text for summarizing video content
-    else:
-        # Hide both inputs (unlikely to need this else, but just in case)
-        url_visibility = ""
-        video_url_visibility = ""
-        btn_text = ""  # Clear the button text
-
-    return url_visibility, video_url_visibility, btn_text
-
 with gr.Blocks() as demo:
     gr.Markdown(
         """# Cobanov Web and Video Summarizer
@@ -43,22 +27,18 @@ with gr.Blocks() as demo:
 
     with gr.Row():
         with gr.Column():
-            content_type = gr.Radio(choices=["Web", "Video"], label="Select Content Type", value="Web")
-            
             url = gr.Text(label="URL", placeholder="Enter URL here")
-            video_url = gr.Text(label="YouTube Video URL", placeholder="Enter YouTube video URL here", visible=False)
 
             btn_generate = gr.Button("Generate")
 
             summary = gr.Markdown(label="Summary")
             btn_translate = gr.Button(visible=False)
-        
-        content_type.change(update_ui, inputs=[content_type], outputs=[url, video_url, btn_generate])
 
     gr.Examples(
         [
             "https://cobanov.dev/haftalik-bulten/hafta-13",
             "https://bawolf.substack.com/p/embeddings-are-a-good-starting-point",
+            "https://www.youtube.com/watch?v=4pOpQwiUVXc",
         ],
         inputs=[url],
     )
@@ -71,9 +51,7 @@ with gr.Blocks() as demo:
         Repo: github.com/mertcobanov/easy-web-summarizer
         ```"""
     )
-    btn_generate.click(lambda url, video_url: summarize(url) if url else summarize_video(video_url), 
-                     inputs=[url, video_url], 
-                     outputs=[summary, btn_translate])
+    btn_generate.click(summarize, inputs=[url], outputs=[summary, btn_translate])
     btn_translate.click(translate, inputs=[summary], outputs=[summary])
 
 demo.launch()
\ No newline at end of file
diff --git a/yt_summarizer.py b/yt_summarizer.py
index 428524b..ae74759 100644
--- a/yt_summarizer.py
+++ b/yt_summarizer.py
@@ -2,6 +2,7 @@ from langchain_community.document_loaders import YoutubeLoader
 from langchain.text_splitter import TokenTextSplitter
 from langchain_community.chat_models import ChatOllama
 from langchain.chains.summarize import load_summarize_chain
+from langchain_core.prompts import PromptTemplate
 import re
 
 def check_link(link):
@@ -24,8 +25,27 @@ def split_chunks(transcript):
     return chunks
 
 def yt_summarization_chain():
+    prompt_template = PromptTemplate(
+        template="""As a professional summarizer specialized in video content, create a detailed and comprehensive summary of the YouTube video transcript provided. While crafting your summary, adhere to these guidelines:
+            1. Capture the essence of the video, focusing on main ideas and key details. Ensure the summary is in-depth and insightful, reflecting any narrative or instructional elements present in the video.
+
+            2. Exclude any redundant expressions and non-critical details to enhance the clarity and conciseness of the summary.
+
+            3. Base the summary strictly on the transcript provided, avoiding assumptions or additions from external sources.
+
+            4. Present the summary in a well-structured paragraph form, making it easy to read and understand.
+
+            5. Conclude with "[End of Notes, Message #X]", where "X" is the sequence number of the summarizing request, to indicate the completion of the task.
+
+        By adhering to this optimized prompt, you are expected to produce a clear, detailed, and audience-friendly summary that effectively conveys the core content and themes of the YouTube video.
+
+        "{text}"
+
+        DETAILED SUMMARY:""",
+        input_variables=["text"],
+    )
     llm = ChatOllama(model="llama3")
-    summarize_chain = load_summarize_chain(llm=llm, chain_type="refine", verbose=True)
+    summarize_chain = load_summarize_chain(llm=llm, prompt=prompt_template, verbose=True)
     return summarize_chain
 
 def summarize_video(video_link):
@@ -35,4 +55,4 @@ def summarize_video(video_link):
     sum_chain = yt_summarization_chain()
     result = sum_chain.run(chunks)
 
-    return result[0]
\ No newline at end of file
+    return result
\ No newline at end of file