From eb125fcac4725b399c84eed3f50c78090b18eb3b Mon Sep 17 00:00:00 2001 From: tcsenpai Date: Sat, 5 Oct 2024 12:30:21 +0200 Subject: [PATCH] First commit --- .gitignore | 3 + LICENSE.md | 13 +++ README.md | 73 ++++++++++++++ env.example | 3 + firefox_extension/placeholder | 1 + requirements.txt | 5 + src/assets/subtitles.png | Bin 0 -> 12032 bytes src/main.py | 178 ++++++++++++++++++++++++++++++++++ src/ollama_client.py | 40 ++++++++ src/video_info.py | 29 ++++++ transcript_cache/placeholder | 0 11 files changed, 345 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE.md create mode 100644 README.md create mode 100644 env.example create mode 100644 firefox_extension/placeholder create mode 100644 requirements.txt create mode 100644 src/assets/subtitles.png create mode 100644 src/main.py create mode 100644 src/ollama_client.py create mode 100644 src/video_info.py create mode 100644 transcript_cache/placeholder diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ae46c25 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +transcript_cache/*.json +__pycache__ +.env \ No newline at end of file diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..82c90ac --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,13 @@ + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + + Copyright (C) 2024 TCSenpai + + Everyone is permitted to copy and distribute verbatim or modified + copies of this license document, and changing it is allowed as long + as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..2789754 --- /dev/null +++ b/README.md @@ -0,0 +1,73 @@ +# YouTube Summarizer by TCSenpai + +YouTube Summarizer is a Streamlit-based web application that allows users to generate summaries of YouTube videos using AI-powered language models. + +## Features + +- Fetch and cache YouTube video transcripts +- Summarize video content using Ollama AI models +- Display video information (title and channel) +- Customizable Ollama URL and model selection + +## Installation + +1. Clone the repository: + ``` + git clone https://github.com/yourusername/youtube-summarizer.git + cd youtube-summarizer + ``` + +2. Install the required dependencies: + ``` + pip install -r requirements.txt + ``` + +3. Set up environment variables: + Create a `.env` file in the root directory and add the following: + ``` + YOUTUBE_API_KEY=your_youtube_api_key + OLLAMA_MODEL=default_model_name + ``` + +## Usage + +1. Run the Streamlit app: + ``` + streamlit run src/main.py + ``` + +2. Open your web browser and navigate to the provided local URL (usually `http://localhost:8501`). + +3. Enter a YouTube video URL in the input field. + +4. (Optional) Customize the Ollama URL and select a different AI model. + +5. Click the "Summarize" button to generate a summary of the video. + +## Dependencies + +- Streamlit +- Pytube +- Ollama +- YouTube Data API +- Python-dotenv + + +## Project Structure + +- `src/main.py`: Main Streamlit application +- `src/ollama_client.py`: Ollama API client for model interaction +- `src/video_info.py`: YouTube API integration for video information +- `transcript_cache/`: Directory for caching video transcripts + +## Contributing + +Contributions are welcome! Please feel free to submit a Pull Request. + +## License + +WTFPL License + +## Credits + +Icon: "https://www.flaticon.com/free-icons/subtitles" by Freepik - Flaticon \ No newline at end of file diff --git a/env.example b/env.example new file mode 100644 index 0000000..0fbf9a0 --- /dev/null +++ b/env.example @@ -0,0 +1,3 @@ +OLLAMA_URL=http://localhost:11434 +OLLAMA_MODEL=llama3.1:8b +YOUTUBE_API_KEY=your_youtube_api_key \ No newline at end of file diff --git a/firefox_extension/placeholder b/firefox_extension/placeholder new file mode 100644 index 0000000..c7d43c7 --- /dev/null +++ b/firefox_extension/placeholder @@ -0,0 +1 @@ +I am still working on this. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4a288e1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +streamlit==1.31.1 +python-dotenv==1.0.1 +youtube-transcript-api==0.6.2 +requests==2.31.0 +google-api-python-client==2.101.0 \ No newline at end of file diff --git a/src/assets/subtitles.png b/src/assets/subtitles.png new file mode 100644 index 0000000000000000000000000000000000000000..08dbf2d7ca312502010d35ce614b4f671c37f05e GIT binary patch literal 12032 zcmb7qi9b}|8~?pC##jegvo9n2QXxBK9kNr1Oi3E+$XduuK1IpWpk#>{3E8q{$^Cp3 zr6QB$W1Uj=Wn>NUyYv10et*Eviy8OabD#6v^PKZM&v`%ZJC62N+#I4D003~~tj(PP z00IAt0N7aJul1P09r%kS{1ncW4gQH^yOa!HvlFd7!T|vDn)wH2t6;?7#={X7?h!7b zmm{JtT=fH@qodUcA%Wq(7l?l9p;!I$mJLM#KpMcApLC7MUmA~Y_jW7TT6JKP!BP=V zwkDkBym=if1YI3fhb^UI^L`oj>-ODCiyRyX%kMdM^V#gP;xfBHNFYq%*`-2cxn!K0)YX)XF- z0J;wCK$egzo`%nRF?eu(>iVn0~_aA(B=I*m7Ga51zPs+!0g zlwsq|O=0>4`ceG(gaa>}TRo8Ry^1}EJxF^y+y#$(5V5%cekOe>Mr`7bFxbi!Tcl!$ zwc}_*&EdyvwhHeMrWNm4@K$)VvsGurXhz7snlWmOeN20M&7O{->(JjpUd{>;t7cXe zG+nsM+Oc5Hy;!k3R-|*JbLfN##Ms-lG9~QB-#pmz=qa)=7|Y#cgqxhT;@PmB{}&)+ z(QlWIY*4k)6w9{=Q_;s?$V+6S2LZ8Qp!w}X8A6ih(OwuYa8FWL#-{QTC|xxy3Qa&M zR8TikTCE3RYBR*LDHFM!1ng3#*S^H{-+#(^gLCgti$IQ8 zuC+1=MQRO-rC*^d(xsjGiZ=6Xl@WbXLD8?f?_YEWp4oPTXdnsMlX+&rHX13+)5M7r zlfYH^gGEiFA~7hUDj{Yv^tWiQA+43XMh*{)Fjcz@7U_&|#mEy8wNf8PPCEF#6x=?_ zh^KGTRh`cW0Rbs$q=%XMfBvj?RP6}~BPZpStfaMbr6>U}Qc0QU(dEBA1|^Wt7bu-{ zaKvcJG(ema(Eg}PW9%zY-^4AqmRYBeRLD{Y|BT0>(zxC$JajQ?*$3|7AXL*esuQ_u z^?kM|bfVm*Nrs_AucGSRC|BBNiR0JhE_9Q|bv2>q62rmO$@!qB8zXVFXHb}~cj-U6 zh$83Hs0o(#lm@fHL7nJKOVlf~>tl7;$PrF*D6*gTAo%0u%94)?f?%60P9j2@!AVn| zpXIrfr&i%@pDRh!I^$efb+R>nCH-{*V55=tdA4*R@ai6?0olmZ3x+fW@$L0zWmmw>25(@o})2kJEq;${6{9t*`iY zJ>Y0^1ew6y&SP+++};87VR>clsUe-3gCbo&db^?GK#II^v)BNN1FyQ_zqO1z30eU?bfQOrVD+n#gC*>C=%^3D!~>548ZdCS)I~8tU^= z9+!mtpe^EOwW^!xWrMx<(EAu&gJ0Ij64bS8|$ROPBTBNUn* zIEmc|)@L+BbJPvQAv{<#GnzfPDs@Q0+&StYas^$TjwwE-i;}k9T1$A)@Ok*m-yT>u z-{W*wS_32q)z^gyeB=WpkZ}^jTKdVkAhqCOd0 zAtqUM(lD~++RLa3(-Bi^wQsV}raS-kA%-8#-iMO)UVxqo$K-wIH}#$z@G%^L`E9G*141k4go=~xoJD$;U?K3z z#&cBzSDIrzqDKTLl!ry^L9H%xdIbF^Jdp&!S3K>=9DWINvwA>rG79p5(wNOTAuQ)p zn>mC&Y6E@-m=+;}1l-3L26Ocb&t|1P^}YgG1OY}E{oq?%C-t%@x*Ze4vF%M1sv@CG zuMJ_1-p(L(E#%7oJtLQjIL&(F&N(*WH<+iYfk+c12A(P0qXJYummxgp9h9kF6H6a5 zDj9ASeEFcEX>MRv`YIn@Y4eB?zN^p2Id2K&b&E4E1rPGB2tqaFjp9UWXYM9!uu#O6 zh>YrbAbA6%D?r<`(iGQ{atH;9l_W+yiUwc*7GOBEV9cCPq#j8nsRQafeKed9upx$u zW8b!htIsKcVxtt#hb4It_K%5>1eU`#!g8E&X|m zDdJl-m`lsDYCJ4SZ)(7ncSwSpfaG1wA$ZRb>XQpE%xK`eYe5E*re0GpEedS263t-)I0x&Xk069C z(RT(nlt`Jl%6)m!fQC><8wD-YAhu$lXQka1{s|_PkV1Xo*Yj*Fry7@R{P7ij3QX|qxiO#i4YIg? z<{uGw!EgH|F{93bb*siZ*0Hnnax!kvK&ejNvCmlIu$ne++f= zG2fl}KAygS?x7Z4eQWz*6`(saiZ~*xn}jX*UIwE|E&4g!jr2_~AKWWA=v9?AVw<}_ zm+{mslrQ&pA*ZsAszai6VRswk842WcEA+sxO=%-q7_?~;%*Sw{H@5H>Q@=erz4KxF z@Z53iPWW@`rYa#1FL+5{POonuG>tOyhFWY*x!$NiWM3`xmv6M)Z7uFy{`k66$%c?Z za##;PDo;4Pf)+Y-@cWF7!IRGSGT6Nl zt(VL@7R`QX&C`bFIAOgvbGs#)xv1`hB9*M zZu^7zuKwf%{;W?=GdE2$s1;SWK3AuV65`L>Zp>>V1(gmU)P%dn|$MIo2h3H#Zfs*9hr&FgH0L7}osXXJI08uV^qzxaW~ z^AX9X7CH09C5lzL5nFsd8`-q;qmw_gsE_l__hx*QmQIScU@Gs(0V}LJDHx}W(GsNf zJk})k!Enyt32mFt(`rY6XmG&-t@M@^DuG>4WS=F~AS27rzEwLuDk9@szz7?ZRJZ0c zG&KNXBPl+Hk3+rb@dGlJ#W*=&f%59lu4~&&AT3a_+?%x+DKM=>Q#6&6Y@VR z`A}S|l^}Gn8P{2tS4JU-G4T0e396~xHE#LFbUEOCo`onXMpplAg_a)+eG1-Nl-?7} zu#)`k%0JCL8><5;W`uuW*#+XDdz~IMB=E}d1-l#|^$#q0P&^QnO2O1AmnOt|iM0sl zh~$@CD($>Y8F|k~%Fg2qGP{2D;(Z$_Fh&Y?HPq?D=r~RN*N!&(n=L|fqX(^j>)D+B z1KF`7KY?hoNV<~z|GF~9SlUVdRG+<5(8C8=49JL5 z`@Ry9^i1u?4z#y>KJ@#d{bya1&a`xqeJqmPJ@23(J?QBF?Pf(E675$YT#7=3RIbJe z0bS9?#E6LG((EQy{5krwSzZd(FG`>`mf3U$Xixuj3`fc8+ATqq6LOs;3KL6SY1!>XcPtH8RmJ6UkF^hcN3P6|7 z-Dq}<0tZ8F`WGjKG+vz3I4-6%MXn1)wnP0xMW}rJra~^|z^AvR$D5*{>~CFQ|(4pzBmWgmEjRrty&^;EWeb zXlW|DJqj);0%^~_iBqtR?`$iLPfDPmZ`%206sk^noP_C@wVuk%kb^w0)oZOEv;j(S z3u}>nxOFqKuV$XbEge`)E$GDJ5-N3F}Btw~#^hd?835)Y~{8!5~ z8)fR}mV(7egju_xs{{691m^Y+8ddaPJs@(EsEXC#vk5QS(t7{2G0{y9Fu2GR)Y%QT zxrNayI7mok{c2cHeW)P1ssbQ_MR2y}4l6ZyGFt#}QRACzZ~Ck$RAs+B*0&x&P~oaO1zvljKlC$DBrNXZ1yCC2j1DeXFoloGD*(Km zJT20IpuGRO{mVh}s@zmSI{4{7ufQP=m@#kmk2A51e!m*|sp@Gm54Vv@yP!?kWiDKC@L}))TKiFE1BWmAs$co^z`aJHgA|MWax_cTh7M3W!SXg=x z8)o7lQ+dsenozoE;3oj%UvG9qYHvg&-?anxThk{e^CE1i4aXJ+CY-c3%2r;T=&||S zhRe%Rw7gS!%LKu= z{EQ)Nk_FT@SF?uRHlY8#KlSXE5MqTo*5^|P&pEt9MFPq=;p92#+1trX{|iHMR4c^R zIqPTf#vFUc;6>$T4$@&P>lcdtc|hja?pb=_amf`{ItxCk+qdE+E6L&hgg>o!-5dBp z8-=k9;5Tq6_58?Ocz1xzX`3*#)S0|A_kL z^Z_o{Bs4di&j0F<7KRjm`D)$2nUs~*91B}<*eziLxuJTqx-e}b8s7(ha zUa<|S0BV#i_I#gU&qwW%c2>I1r?1;Fbm_pc#oa2|KabWWcZ~^JjRZK&j2M=OY?>Cvf{=4?f!4d{ytz=w6=c(D&!1TTEMd+4mt-tf z*S1p?_@0kye^H2B|Dy@gi~;8w$tS<1Zo1{^4TmrRD3SUdBhxf(z~Qp)Z7fdo)vK@U zKNW$?m;QHU%n7GBFpBoSPo_7;DE~jNW;Cz@ii5CIy|yz^_CHTMeXU^<`D553MBKPn z%lUIOB5TlVnMGtu53d?;d{+Kz7fBoi_m@nke0vh0#{rP$9;M0QD7PQ@?M5)i$$j&` z5qf`7o&US!V-BlmTNucU3iV?JQ_`@`Y(=nOdrUEf<&usUmS({QxHyL z5bywvFH`{*D`xDV$Z`VAe=3FwuSzJ+s9l=i5b0V_Q@Yy>4h8aQvgc$@;o(sBm{qbj z1`dD?ug5RV@4*rQqRaFYEb_;6KKi{kFqZQ>22RS9T7hlf|Jonqqr;gY;qqHg;K%us zukft?HC~-SNh{rr1_x*KXyZ{HkCDJ{SiIbn(On9bon(?(x0bY?YSb)77Ir0i zHNJ0!K?fPgGBG&ritJAYmZ3@1AWAXcEtPj2)Z*} zoPOt%dBDv{E3`-&&;8rCf;JkSUXvlJ6B-nEM*gh`ttV+9KR@e#3}a#c+&f599u23; z;`wT;rh_isXh1z<79<*{V~)wNN8@DO$gNp zmSITXFD`SsJeds~Ds9@*^vS$X`8_?%a4x;$?i@<3hCKynM)a{prD2MR{)kX{?!=h} z?m43-c!Dfj4(mR?#}i-v$uxqL=5D%kC_7SLZA;eFI$?MjUG%02QQh6=^DYqAHST*% zGCYFgs2f2Wnbn%-zdM(XLFT$Vk7oZPvX zQaDFCbNOlnui&>4WsIjA$Y{HpR(bh1oa4aGY)%bYI3_-G4zfR-=29#{*04x&+Z0e0NoM5f^_HR;y*`#$x z{$|3iPmmUTPGYie6VY!LpMB)CUHo@^n$1d;Klu+ zoUyI^wR->@RLmP(H?W(YbUk5Y=Xu)u4xGNCplm@&pPwU&`XtfPhf1XLH_G1a<;_1; zA^BJ8$9$=5Nbp`=mDetePI;dD^;w>>*nZOdr0YZ1xVY@OZ?ix^R?_wD0;?BgilghZ z0LN;cGSB%)kSH-HX&Gs&AvHT+eSSj2dx2N2qRk&u&Qo>z3knK{LRE~Ddc&_p0NUL^ zM~9F4dkHGhdv{AYe%HtGC;NQP*{h;Ov3*B^FDO^_MnXuVy@~#Cw9>FFy?vIqVjSgG zSd@mK?}_$3d0}{W*YrwE-{R`la*07+s`sg_A;V|4U)5I^G%T|;RnK{OyUd4N@s4p! z*;(VQRb!k^?@Z7Xtf8nw01@~pWMItoOtcv}dN!WY8lo1qT@hO?V^rqt`RKrGrND%G zZM8QEm-r{=szHKV*#qPHY6N{Ox3AKRA9$Ek8^JA1fx~T{*P<*|-cVcj$n))bRH|dCW_ z{Za>M5wSVHl;XLe-XQMwn_9x{N^^E5I$HOy=5QNYdqPnY_&mNWKLdIUotG9H6yF<@ z{!uP?X;*qqr@NliX2!n}BCT)w-plfOXA{lpPEGB|5LU@%ea2^WB-SeG_pZxUbB`U4 zGctFls?q$sEzW805Tzfe)zh9B^4vQ9R7yIS7mgYaTJGjrjA+k{O z;`T6s)ZU&(Db_`0^Z>Q;+ADs1M-dEQE(8uGSy4UD~ZZyff%NW)6fgxd4$Va)8{LR_#$u6@9j>;|M@AugFRk#tsrcD^b^ye!VhUw#k(mU}SHK z+RS6DfZL(K@U4c4DmM940NCSe(PC=%sKDVQtQ{PF<7AlHJ&HHhhhEr-8%tV!L@%^*B@0G(?@PHMF#Vt8qDm(*2Qb(Zz=<>ix(W1exj?TM!h}r-i z#5u6v676vY2KzG92Rkj`(m}a{4;AW@>fPVe{(G1g zvZZN&>4?azNimGu4wlq7UYX(fnGib$w4mLO>s$YAlK^9N59I)r6Jo=Mm{?tJmLURQ zurX4Z=u+yZvx+TiNtff84{(aWNWP;TY?wyaFe;l`gr5h#11E1g@2~+>C*Ui>nv(ln zMPTB$S@*;K7L`tu3E8@Y7#>0I7$Zz^pOQl(oK(nrJPI zPPIbg)IVT@dFFW-g8R+}tr^+vY#zkG2<4StSUTtYGU>gTgoLTH!ta0U+ciFA^hxDz zErkWsB*0}%%WF%h>pyT4TA$@bT@x6-t;O+)%}x{qVh(M)MkN0eB*J_)SX`n=^T0Gw zKjF%G5Fm)sC*a=ys_T=e_P^@=?~IZ|Et5pbj|c&4q)sNB>dS;vsZ%iU`6E8rPI!Q(w2;IT%wZ zl^B_6bWQJ*-G8I;#11_)&jgu?r=`cQ3H*6^_LMo(YX$ zj}e{Id)MgjgmW|kErKx_4VS;a8`h<8ywI6@DVqlvOif=@S-VDu;pVG^bD$<)3%8WB z46b+K2F-eHaaQ`J)~W|!_9#^jpiuYr?+Qike|iNwsMBKwu*2#<3|)ht3mg@IE{cIC zkS-;O))dhtcDw>zW8mafcnK8A*5forR^h@QE_G9##(&^hRt0Z~{oom-2D3;OK(!@x z;hKRa`#es7^pR{rS;l0**x6$+3bmYuan5eC2UT@4wItvif*0pp635aat#6(^VvGRh z?2~+ua3TShQT}-Y-EWAPO{h~B0Dw{81i%j$R4I3Lki)4BfdQ3`@f`R%1za;R<}3+I zPz*&?-@`a1RSN(amOzy3Qn(t9_M#6kJ}gnJ?)?bi9;D1bN%e2DyF`=0<9f!q@9|!VntPB!nr$tGn z<6J?&;$gU}Yo$i!^u9smdlw)j6{X%IYf5P?u3y`?tp}`ikk#m+JMcVK47yOS9mEzV znlJofKFrhp=z+=hNs4|5Y29P;cb^noscRU;eFo9YvZC8}DN?9;lHx5Fp#Owu3!AR0 zelk|6vYvGIoAt^;E#N*eLbbcu#57RvcL(Sd^Z_pG)dB8x5~NC!64RLRkHAIlF}Qr7 zrH=5bGXZOmLYA=LjUYY-Of)$AmRW=p)0nK??awOrjXBt(vuQ42#EHfK5qD z&bL$s8sK!)z40idK{jJf*6p9p^ipW=ztZO84eJu{98+EHxifTtmX;uTlUYdR_5zd#?&PAs{%XWe6@15;jqy8oE5~%Jrh zyg0Gp7RvNSCOL&{1aB*6TQ7(Cy+XJ|4LiIq?!qlB!`krNYL97C6?T6U3qOo;w1}o> zN=n7q!I}lsE|5xU*J}Qf1HR;PtV|| z9s;qxGd^6xh1q`U3?$x{ehVh;C_OwtoVOY|>~^abQ0zZNI}h??KEC-=F_rpM6fkCw z<6T+RNDBmdH;>VisAU{B&Y2%rC%E>sbd%2fgD8p{rC753kKWN0fhLq5s12NKxQfwu zxPqYr^zX2ST3r5_A;C>+8u@`h?ge#5$KQoX-`=KDe{lhYsELy!^Io^=a%OXF_`H3#D_gvh% z0@GviVR}sTAqF@86m0T>qnt4cb2@(7StwJ3MYeo$8``E~;tRBZOFw=#Q4jSAla8EZ z6#~(|7Zj8_LHeY1Nty%9yJ3wISPBcO$nIp26M-8>SAHyQ+EpxBc^Gj zpIkA;X1WXSOq+@y{%HUEu1)?Vo{T8*#gu8a7j zwgqa)RdF+PBykx-?D3=s^!0B67~q3E54%t2oN!X~H)4W<9^9A?)Er4=H57rT zrehgQL!@=}b=HruJ2q2>P=@G5^m9ouE9YB}k*_%NO?O7mCh4JyN|(%ch`LOQ)?zg< zfgBT&F!zJ8aND-*2Hx2iIvHl2{mQW)>E4oF+-npl0~%zTGj(`8`G@%@6`c7$qKvuQ zG28J(Awd{0!yhIAx!n`kj9px zf+AAztxP>A0a1vQ#K0R;rBR)sLFK8M)pO3PdKWAN6S=zDsL#bg$Ly!~n3nsq~MWxAu*AU+ss36I-(dQ%rwbY&C_l@C_cg3AZ4NoC{}qz@v56V~XP zP^fYQ=j!B#peE%J32Mi&F)drlnTtGhEj+j?HKKFQBv_ImM<?n_9IDzU2$zjGCg9p_%QqYE-g znB;*cguzHoQVK~b_#71H66X|02Jwqo-D*L%WDI^g5M@?V zDw6)Az4}2HsQ=(?g2caEVx2p;{{{|@Z5OAkkE#OHOIi#B-4ymQ*7@pJQf5eEI0^WE zDOCUD4iy2H-&Ay1hs9!!&IxtJlOvh5TS;U!q8g}Xsg?_-$q|%<02(9Msx7X0_I&IvA8amalx|WbX>K83&ee*i@Jp~nCK literal 0 HcmV?d00001 diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..9cf531b --- /dev/null +++ b/src/main.py @@ -0,0 +1,178 @@ +import os +import json +import streamlit as st +from dotenv import load_dotenv +from youtube_transcript_api import YouTubeTranscriptApi +from ollama_client import OllamaClient +from video_info import get_video_info + +# Load environment variables +load_dotenv() + +# Set page config for favicon +st.set_page_config( + page_title="YouTube Summarizer by TCSenpai", + page_icon="src/assets/subtitles.png", +) + +# Custom CSS for the banner +st.markdown( + """ + + """, + unsafe_allow_html=True, +) + +# Banner with icon and title +st.markdown( + """ + + """, + unsafe_allow_html=True, +) + +# Initialize Rich console + + +def get_transcript(video_id): + cache_dir = "transcript_cache" + cache_file = os.path.join(cache_dir, f"{video_id}.json") + + # Create cache directory if it doesn't exist + os.makedirs(cache_dir, exist_ok=True) + + # Check if transcript is cached + if os.path.exists(cache_file): + with open(cache_file, "r") as f: + return json.load(f)["transcript"] + + try: + transcript = YouTubeTranscriptApi.get_transcript(video_id) + full_transcript = " ".join([entry["text"] for entry in transcript]) + + # Cache the transcript + with open(cache_file, "w") as f: + json.dump({"transcript": full_transcript}, f) + + return full_transcript + except Exception as e: + print(f"Error fetching transcript: {e}") + return None + + +def get_ollama_models(ollama_url): + ollama_client = OllamaClient(ollama_url, "") + models = ollama_client.get_models() + return models + + +def summarize_video(video_url, model, ollama_url): + video_id = video_url.split("v=")[-1] + st.write(f"Video ID: {video_id}") + + with st.spinner("Fetching transcript..."): + transcript = get_transcript(video_id) + st.success("Summarizer fetched successfully!") + + if not transcript: + return "Unable to fetch transcript." + + ollama_client = OllamaClient(ollama_url, model) + st.success(f"Ollama client created with model: {model}") + + st.warning("Starting summary generation, this might take a while...") + with st.spinner("Generating summary..."): + prompt = f"Summarize the following YouTube video transcript:\n\n{transcript}\n\nSummary:" + summary = ollama_client.generate(prompt) + st.success("Summary generated successfully!") + + with st.spinner("Fetching video info..."): + video_info = get_video_info(video_id) + st.success("Video info fetched successfully!") + + return f"Title: {video_info['title']}\n\nChannel: {video_info['channel']}\n\nSummary:\n{summary}" + + +def main(): + # Remove the existing title + # st.title("YouTube Video Summarizer") + + # Add input for custom Ollama URL + default_ollama_url = os.getenv("OLLAMA_URL") + ollama_url = st.text_input( + "Ollama URL (optional)", + value=default_ollama_url, + placeholder="Enter custom Ollama URL", + ) + + if not ollama_url: + ollama_url = default_ollama_url + + # Fetch available models using the specified Ollama URL + available_models = get_ollama_models(ollama_url) + default_model = os.getenv("OLLAMA_MODEL") + + if not default_model in available_models: + available_models.append(default_model) + + # Create model selection dropdown + selected_model = st.selectbox( + "Select Ollama Model", + options=available_models, + index=( + available_models.index(default_model) + if default_model in available_models + else 0 + ), + ) + + video_url = st.text_input("Enter the YouTube video URL:") + + # Support any video that has a valid YouTube ID + if not "https://www.youtube.com/watch?v=" or "https://youtu.be/" in video_url: + if "watch?v=" in video_url: + st.warning( + "This is not a YouTube URL. Might be a privacy-fronted embed. Trying to extract the YouTube ID..." + ) + video_id = video_url.split("watch?v=")[-1] + video_url = f"https://www.youtube.com/watch?v={video_id}" + else: + st.error("Please enter a valid YouTube video URL.") + return + + if st.button("Summarize"): + if video_url: + summary = summarize_video(video_url, selected_model, ollama_url) + st.subheader("Summary:") + st.write(summary) + else: + st.error("Please enter a valid YouTube video URL.") + + +if __name__ == "__main__": + main() diff --git a/src/ollama_client.py b/src/ollama_client.py new file mode 100644 index 0000000..3d0a150 --- /dev/null +++ b/src/ollama_client.py @@ -0,0 +1,40 @@ +import requests +import os +from dotenv import load_dotenv + +load_dotenv() + +ollama_model = os.getenv("OLLAMA_MODEL") or "llama3.1:8b" + + +class OllamaClient: + def __init__(self, base_url, model): + self.base_url = base_url + self.model = model + + def get_models(self): + url = f"{self.base_url}/api/tags" + response = requests.get(url) + models = [] + response_json = response.json() + all_models = response_json["models"] + for model in all_models: + models.append(model["name"]) + return models + + def generate(self, prompt): + url = f"{self.base_url}/api/generate" + data = { + "model": self.model, + "prompt": prompt, + "stream": False, + } + response = requests.post(url, json=data) + if response.status_code == 200: + try: + return response.json()["response"] + except Exception as e: + print(response) + return response + else: + raise Exception(f"Error generating text: {response.text}") diff --git a/src/video_info.py b/src/video_info.py new file mode 100644 index 0000000..368af34 --- /dev/null +++ b/src/video_info.py @@ -0,0 +1,29 @@ +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError +import os +from dotenv import load_dotenv + +load_dotenv() + +def get_video_info(video_id): + youtube = build("youtube", "v3", developerKey=os.getenv("YOUTUBE_API_KEY")) + + try: + request = youtube.videos().list( + part="snippet", + id=video_id + ) + response = request.execute() + + if response["items"]: + snippet = response["items"][0]["snippet"] + return { + "title": snippet["title"], + "channel": snippet["channelTitle"] + } + else: + return {"title": "Unknown", "channel": "Unknown"} + + except HttpError as e: + print(f"An HTTP error occurred: {e}") + return {"title": "Error", "channel": "Error"} \ No newline at end of file diff --git a/transcript_cache/placeholder b/transcript_cache/placeholder new file mode 100644 index 0000000..e69de29