From 935a605f6b0eea78bb5cbb15298375751037be5f Mon Sep 17 00:00:00 2001 From: Mert Cobanov Date: Wed, 1 May 2024 19:38:01 +0300 Subject: [PATCH] Update translator.py and yt_summarizer.py with correct base_url --- translator.py | 2 +- yt_summarizer.py | 15 +++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/translator.py b/translator.py index 816fbef..430432d 100644 --- a/translator.py +++ b/translator.py @@ -14,6 +14,6 @@ def setup_translator_chain(): input_variables=["text"], ) - llm = ChatOllama(model="llama3", base_url="http://0.0.0.0:11434") + llm = ChatOllama(model="llama3", base_url="http://127.0.0.1:11434") llm_chain = LLMChain(llm=llm, prompt=prompt_template) return llm_chain diff --git a/yt_summarizer.py b/yt_summarizer.py index ae74759..aaa4032 100644 --- a/yt_summarizer.py +++ b/yt_summarizer.py @@ -5,10 +5,12 @@ from langchain.chains.summarize import load_summarize_chain from langchain_core.prompts import PromptTemplate import re + def check_link(link): yt_regex = r"(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)[\w-]+" return re.match(yt_regex, link) is not None + def get_transcript(video_link): # Get video transcript if check_link(video_link): @@ -17,13 +19,15 @@ def get_transcript(video_link): return transcript return "Invalid YouTube URL." + def split_chunks(transcript): # Split the transcript into chunks # Llama 3 model takes up to 8192 input tokens, so I set chunk size to 7500 for leaving some space to model. - splitter = TokenTextSplitter(chunk_size = 7500, chunk_overlap = 100) + splitter = TokenTextSplitter(chunk_size=7500, chunk_overlap=100) chunks = splitter.split_documents(transcript) return chunks + def yt_summarization_chain(): prompt_template = PromptTemplate( template="""As a professional summarizer specialized in video content, create a detailed and comprehensive summary of the YouTube video transcript provided. While crafting your summary, adhere to these guidelines: @@ -44,10 +48,13 @@ def yt_summarization_chain(): DETAILED SUMMARY:""", input_variables=["text"], ) - llm = ChatOllama(model="llama3") - summarize_chain = load_summarize_chain(llm=llm, prompt=prompt_template, verbose=True) + llm = ChatOllama(model="llama3", base_url="http://127.0.0.1:11434") + summarize_chain = load_summarize_chain( + llm=llm, prompt=prompt_template, verbose=True + ) return summarize_chain + def summarize_video(video_link): transcript = get_transcript(video_link) chunks = split_chunks(transcript) @@ -55,4 +62,4 @@ def summarize_video(video_link): sum_chain = yt_summarization_chain() result = sum_chain.run(chunks) - return result \ No newline at end of file + return result