Update translator.py and yt_summarizer.py with correct base_url

This commit is contained in:
Mert Cobanov 2024-05-01 19:38:01 +03:00
parent 9e383c0012
commit 935a605f6b
2 changed files with 12 additions and 5 deletions

View File

@ -14,6 +14,6 @@ def setup_translator_chain():
input_variables=["text"],
)
llm = ChatOllama(model="llama3", base_url="http://0.0.0.0:11434")
llm = ChatOllama(model="llama3", base_url="http://127.0.0.1:11434")
llm_chain = LLMChain(llm=llm, prompt=prompt_template)
return llm_chain

View File

@ -5,10 +5,12 @@ from langchain.chains.summarize import load_summarize_chain
from langchain_core.prompts import PromptTemplate
import re
def check_link(link):
yt_regex = r"(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)[\w-]+"
return re.match(yt_regex, link) is not None
def get_transcript(video_link):
# Get video transcript
if check_link(video_link):
@ -17,13 +19,15 @@ def get_transcript(video_link):
return transcript
return "Invalid YouTube URL."
def split_chunks(transcript):
# Split the transcript into chunks
# Llama 3 model takes up to 8192 input tokens, so I set chunk size to 7500 for leaving some space to model.
splitter = TokenTextSplitter(chunk_size = 7500, chunk_overlap = 100)
splitter = TokenTextSplitter(chunk_size=7500, chunk_overlap=100)
chunks = splitter.split_documents(transcript)
return chunks
def yt_summarization_chain():
prompt_template = PromptTemplate(
template="""As a professional summarizer specialized in video content, create a detailed and comprehensive summary of the YouTube video transcript provided. While crafting your summary, adhere to these guidelines:
@ -44,10 +48,13 @@ def yt_summarization_chain():
DETAILED SUMMARY:""",
input_variables=["text"],
)
llm = ChatOllama(model="llama3")
summarize_chain = load_summarize_chain(llm=llm, prompt=prompt_template, verbose=True)
llm = ChatOllama(model="llama3", base_url="http://127.0.0.1:11434")
summarize_chain = load_summarize_chain(
llm=llm, prompt=prompt_template, verbose=True
)
return summarize_chain
def summarize_video(video_link):
transcript = get_transcript(video_link)
chunks = split_chunks(transcript)
@ -55,4 +62,4 @@ def summarize_video(video_link):
sum_chain = yt_summarization_chain()
result = sum_chain.run(chunks)
return result
return result