Refactor code to improve readability and maintainability

This commit is contained in:
Mert Cobanov 2024-05-01 20:05:00 +03:00
parent a1e28966fb
commit d9de8deab6
2 changed files with 13 additions and 8 deletions

View File

@ -1,8 +1,9 @@
import gradio as gr
from summarizer import load_document, setup_summarization_chain
from yt_summarizer import summarize_video, check_link
from translator import setup_translator_chain
from yt_summarizer import check_link, summarize_video
def summarize(url):
if check_link(url):
@ -14,11 +15,13 @@ def summarize(url):
return [result, gr.Button("🇹🇷 Translate ", visible=True)]
def translate(text):
llm_chain = setup_translator_chain()
result = llm_chain.run(text)
return result
with gr.Blocks() as demo:
gr.Markdown(
"""# Cobanov Web and Video Summarizer
@ -54,4 +57,4 @@ with gr.Blocks() as demo:
btn_generate.click(summarize, inputs=[url], outputs=[summary, btn_translate])
btn_translate.click(translate, inputs=[summary], outputs=[summary])
demo.launch()
demo.launch()

View File

@ -1,9 +1,10 @@
from langchain_community.document_loaders import YoutubeLoader
import re
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import TokenTextSplitter
from langchain_community.chat_models import ChatOllama
from langchain.chains.summarize import load_summarize_chain
from langchain_community.document_loaders import YoutubeLoader
from langchain_core.prompts import PromptTemplate
import re
def check_link(link):
@ -23,9 +24,10 @@ def get_transcript(video_link):
def split_chunks(transcript):
# Split the transcript into chunks
# Llama 3 model takes up to 8192 input tokens, so I set chunk size to 7500 for leaving some space to model.
splitter = TokenTextSplitter(chunk_size=7500, chunk_overlap=100)
splitter = TokenTextSplitter(
chunk_size=7500, chunk_overlap=100
) # Llama 3 model takes up to 8192 input tokens, so I set chunk size to 7500 for leaving some space to model.
chunks = splitter.split_documents(transcript)
return chunks