From 9a20b17970d0b584d88e75625f300d0b35085813 Mon Sep 17 00:00:00 2001 From: yihong Date: Sat, 6 Jan 2024 22:21:53 +0800 Subject: [PATCH] feat Gemini (#366) * feat: gemini init Signed-off-by: yihong0618 * fix: useless code Signed-off-by: yihong0618 * feat: gemini model --------- Signed-off-by: yihong0618 --- .lemo.temp.bin | Bin 11108 -> 0 bytes README-CN.md | 4 + README.md | 7 +- book_maker/cli.py | 10 +++ book_maker/translator/__init__.py | 4 +- .../translator/chatgptapi_translator.py | 3 +- book_maker/translator/gemini_translator.py | 83 ++++++++++++++++++ setup.py | 1 + 8 files changed, 109 insertions(+), 3 deletions(-) delete mode 100644 .lemo.temp.bin create mode 100644 book_maker/translator/gemini_translator.py diff --git a/.lemo.temp.bin b/.lemo.temp.bin deleted file mode 100644 index 10d065f69e81ec53412c2ac55033b40ce3f186a0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 11108 zcma)?OK)pgR>zyB#h{rXBnAlSs)P`z)g`KFL>FR$7%*Xg1cS=})J1opE>zu(sA>sj z&cliA_>sp+oW#!a#>vggcHVx*h#BHrRC(`nPEQIx0sQ`Z*{)D9;L5s+kJny%?e+Yx zwNK_7n}7AKuZ4eq9sPFkm;Aw@zqpEuf98)L$GuH=(DPSE=|cI{4?g(GtH1s5)u;dX z!B0N<_=8VB{NbxlfBNc^kAD0QAAj_BAMx!^e)Q2#KK$gvS08-x>ib_=`~B$k>DRyZ zwQ;B9Dy{gS5$`VYcid=dGsw{?_N#cQ1rR2 zB$G{A_kO$O%0)l-_*rI?k89t!g-Y6JrrC9ITP=?VyS|=x%~86PA9phT@JY*C6?bxO zWZJ#g8+Q+&jQbT=UJ_aJA-?I42LoFOjjulX`T>$4n7@8wl<{6B8SOwk`<1V8{Xq2f zd_3Qc?+)U-O;)+?Bf?AC+Tpm<^Yd@Uox-@c~hwn&&##)v?>E!z6w#yVITY;BMSqMO{WSQOk3A+!;cXw5qPs;8%AFtAcyFMP^?; z^mm(Xgk`HpsDqd8#=}vZ@A^e_+jpy1F1qpc9t)Gvw!b}cxwgdUoQykp*RCXmc^N3T zhAHEVxp?u>FCHXk*sc^uH_7tFxYKv_c6_;$JUscm23w%&aX&Ne=Ej|R(ilz7Vv^L~ zBKEj*!4kjKb#wK2b4OCu4*m6sZ3IDFyGJMPpzJzmdnK*j`-eIIZ~buBNi#1(tW29X&k0)RSHxYdpu$Rkiy z((GXt=#zT|f!}E*H%HP6wwizAdS}T|HJ#f)%yFmemY)3JP!>i3-CLf=wT^E^{F*Ko z)4gMt&HDb)xOeMv%jisptLJ@jUQW7L@%Po_=#8(JWZwRryI%b~GdJ$uj5`Z1dnrF9 z+Ik0)v%ia8`5$w^MDn0uwAia9^G#Su)Z7RqNpt&+Bm>PdCknd3fpDXVCh99@B*kRy zty?dqZ_4B6hh#Uy=DwbZ`#CpUANR_xz3oES%udW#cf8~7&)rc`N6C%c&5FxB`>i@( zUFAZDJ1HS=937;aBMjoJy|`AJoV)sn;1>Ki1QG657yLp&r(9k0)kd=RfIPnbHYwjG zg&Q}>_~<#<`+?jf=2OGMBzb7#9}4xuCpaWeEuxo=%t6sakZ=u3*B-h;ZQMIUDyiqJ zltw$aF9b;w7w@4N%w_RB?p>t|L;aK~$`MZc_QY?t^hvfmMjd@%))F(IACoV8IQg_lEHgD1WsB@ ztdi0~Y``Qv-E!F+(z@ncx~EvORq=T*DIM{F*jKaZDGTKz5a6wDIU^>$g4@3gOm^Jb zozLK{J)S4K1=qnJ=W$Z(=$>=4CH~pAIxu+L|FW-rMs zXzSB?zhB325c}#mNy_I|%?gB<9ZDK&3@6!YxMB9fGv$_pj5}Lli7SM7LWc0fwU_+D z9l?t5>2cj}?fA#}nN*UtdYJC*qb0;PhB(_S&~0BSq(bbHnhyyOR>=+6qI6H%g`3M# zGp^+xqbGMVF)Id2c6MD?xt?qYJ z)>^J)l|AwGtE6`+vt}Qu@DQ-CMKR7m3UPgsteoG@3oF#Z*SL2~EXVhlbuR5M@_(|i z$6?*-BAfcXqqx^4zW86nVRyeLWg@e?xp6HyfZskyR)<j(904aNz*5z(MXRg$$c+c$~b+qO`cSPHr=Y$L=I8JE^K&W7%gev>laTP$3PI zov=F^;~`KZdAbW~{C8#iep$z7)oPi&q*XJ0$Zw|DIC|@b=SluHF8Aa0g_%y=pc_{X zC1LmAr6F92qIeaT^X%eUU3ijc0Wnazr4nj)8*{F`r+ukdYj@=MNkJVRyQUO4?p@*= zELVhvnnwkI9;3{}&{=FPRjoTrYgymPy5)R$e*DGYfGTUr&CVq#)9Ls^!8I4ifyt=m zw(n6OzsCIyh0pP2oO|@GQgZ*soyZ)VD`|FQ4y$;V4cEPge;n15dR^slw4_4?80RY6 z3hI*^Ozf5yO%fA8&cD_656!sTcFR4q;a1OB_)-Jo{%Aa`xy~s^o*jo(5Y<I!c;Ac-iY#$k@e@tB;pLA^ath>fzz z=^W3cN#(8+ms?_Mo|vz)bsX&`%XflAwc70086OrJ_O-2FE_BWPbSH~{Y9rOGC%>@9 zsRc}l$B-V!!#YRPs$Ner43}O;3MVxUDCp{KDup{ai2K4gvH~^=3SnXN`g0UZrStdw zalhp27ic5Nu2SDQ0-%7JqE#Ve*v>$h4A+W;8ao}gwK(pcx?UA-WDAfdk~XNJCV zl0NVFb_kHt1WbW$+$`dr8cDvVI(}cHm*4}y@P`A|#;c9Ek}-zy=qMf?@^^YFZt=#W zwp%#xTkK23Ra{PhB$)?=&GUycOi4kC@c0;}6b2}Ol_s@j5(zlVW$9CwB1Cc;VD) zsOeVRx~K6?N$0vg#WXrjvl`d75s3gF_XRl!8EYHZF1?P@tAV6Cd?E%FiPP>Yt`9&I z@Qhyn+BXD(13ajn`<=I+Wg=#+6WA#mM?)TdZ6>n zmxxfvkWCKuq_)%A4>-qnTjW*T9x|TC^&XH6%#+;jVXyRVGrbw$)yd5g(G>4C<7-kM zzbZGHgCUYYDT>9TdPVANy0e&!LLsB1M(VF-+i^zMe8lr@Q&R;1CT?b2X>;6PiTgBQ zoI)mhNhv0wU*$QTEJNor@zQ~xS2anB3+UV}WfAGi81Gfn4P}*NXNbm3y|eDuYL=VD zav)|GLSQDIdPNdDAsqTAS{XEEeAtM@<+q=cHDwH2;@6faThN$A=!zUDZs*9|T}%0u zHh2L4VHe^BJ_;y%I2eq(0u?M|?vU=}=-XJ5_SgJi$?uKWf}ez)g1`#5&=`yD=#oy= zHB@x0Po`s3+7FEv9dB|rKOR)Ac;SSuxTTbPaK|`!P33;q*Awnk~|jNbB5?}hfDZ2U(-uK)cmX#z}yt5 ztrw1x)je0LyNa@nDS6yK5#WE=ca<&+t>^O(9khy6Ks7DASwfRawv?639X`ni;^_7F zNDedp>eL`(;m$5<*EHn0EMbCMrkPz+xIA`mAK(!$T&GoG8;c-|dUKv>=U<>+5S3sj zc6TRd>GlN$6jbA>s)81~lU#Am$e;=D}d z1oYNm&Lm7)d*V=OshYPbWF;yZlnhOh){P+qGKo$)YxnyM{8kXg7`o+-Av8qt^H4>_km{EDWJw`(qj?afg&^0%CVffw5)Z#rpR6*i6@ z5YkuYlIN~=udk36uvFYTiTg_sB>O#IT@lS4BetH)Hn0w7gxDqt6zf^ZNa;Iv-dYIA znHvM^Xox1Cds1#W*m(r*~Yb;U8Dj{!Js>fSs4dhp$#S;O_U%F!#PCY}a2q_xtzB$GDV|;lq z10HVU0y*89no5={?ns`~K`K8%i8CwiHb;P48wb%}a7DHJ<~VSqe~zxq!89Zhh9_YjspVN2S>BsKL2zKMoeuh{KFmVP{j}PL z16d2b!9i2IF!M3TV@@mG77ktR9N%#5!ML{z{Yy&z#vJ}8pR}%%)-x(qCz5L;t%_G^ z%b#y^7X{`|-e#y!R!8{B=k81rD_aykIKZ6hFaF^wfP=Wo8=KrGg**CF*wWN+8-&1S zAoJmgqG$>hJILT439c}_>YV%CEmkE*8UJ`ItJ%P0DXWPVsUtc`mzc4K#`91i|5@gg zPgMN3mrf#)EN@SJ8=hDnhH65&)(8oQhj=M77Wxzh9rlN-W;Jy|wzDPvkPIP&&3Tg1 z(6uzKbdiwuET79Lhfhh9k?fkwtlMNx18ZUjL4H z2yq(Ma&nBTH)LA=NRE_+aFUs3{B7Qlt%N))k&cp^3xz=NH=CwH9G}azzAVJ`Qrc)M z^-+b7=pWTkNNH1#l5E4$%T)}nlVjbtFrE^+=AL9KjQJR2Wc-kp+;E6)Y4R)XNj|0t zz!{kNU^3ZL%re{8RMyJ3FI>tk3zxc5G5Vt`pAUGflADzHaQA6?Mhu&Z!yzjErmM=b zvj)LnrwrX8rI$#sNEp5T`Xop7hxX9!5dp@0tm3;`8}tZs3D&wK8OB>{Tr~1k+-3_& zP0Y-}ISla{f=QNL@gjRT)3ZJIrDJNb>Rk{9`WJO-3s0ajs*W(CKjA*@FkYEgKZ}+^ z`XzIc^g;=qUfGn^llB!fTu)9x=^hZ&)%H=fgvXS%XI)`VQ}?W1L`7nmL6ABuV3>9Z zw~0``9AS}g3cmLP&*ZE&$ zwqvP2h^msUnl7D~O37)Kqx3bDQE~;L$9TkdR{dj+=`O3})ze$e*{Dqo1&vRbw!xEA zYY2I_IhiO1dwpfD=tgH$WR=w3T6{H#hv!5h!k9>AT$w498--+nIp;D3mk_fg7H{49 z1>H1BfniP(M}4+pmDuh4>g}&3I*0%YT zWWJcp=j8&;6()-+5nH#eAn%Y$Y?IsOxT9H}4ff1=%}{t&Xe@G!YMAIy!+D~;n1UZF zfTH5R2=|doLU(Sj3&0tgiIegY#Ri#AF$%q)Vutxv#y?b&vmEW040ZHi*H(r#P)Uf7 zd(1fLhUvGucg!}hAxKt73ojph2|7Y9v}Of>mR=O0K_m^srq^h=Cplp9;>U{|5EY|$ z(3(up>e}a>Svs55%TOJ`uNp1Q8X@eh`88XG3oUvrUQs~C`0`Kz_CjZH$5W;LNEh?7 zZc3U6&h!pDON91~6~K3*;?L!K;iAp#Bqe5Z=-OZd+gj6Vwxl=LJ1f&|*=hz|6nB}1 zqZj7a@=AQHD?)oXd2T=TxkG<88&CY>sgL`yFBe#B_mW|J)=zg%_=yt$x1dLE)3th# zZhunXxQIVIQkSjwu!f{8-NS~f6BYf9JsTH9#g8Xf*iF*-Ds1H|!C)Mg=_QhL-I+Ws zaIT;x4I<*vfDb<@?!ud2Hd)n$_3=)6#8^qeFiS5a5N@}55*7c$TrDoQ(j(%%hM$=; z$?|~}=S4OyO#S>~s7f6lF=ArSDkIykNqYfdtY+EE6_%6JTkban-H8^qvq)1CW6q$9 z@+^5w?~oj*owQsCL>1y|JU(Pa<#KHhHg0^4!w0h^kA?{(j#I+Wy)GUOOA=A(< zErt%R_?ISOe09alL=G6xh23UyE~^FbQ6y6!rePCbZ@Jt$a871j-H--CiksX%7yt0v zc4HyMg(rnzKF^e9NFt$BWEje~?N=z_78U>NtEvn;vK`ZK zd*cD`kpKkYb2GluorH=TD{<-$#^zgSF3a0jORgkT{Zt)^J~ig<*gxu7%KXN z?cAe(quUg<5L~V-m6Y>JbK&+>FC3T|NUmT_UvEv<@&?JZTz~2}b{|RFkfrI9qXTzx z7U$@nJ1|j90%WHc5hW*&Z>&T_#UF4S`rhQ|NzpYL`kV0bm9kUDoP#6N8CpWp!vH-e z{?Wd7?W-4JgclNR@_2vldm1FsNh+|X;~0Od7KO36Z#>YG2p4u%toSL@+vQNmCS1@0 zk{n5$JG1%657V8}GgU2jAgEf%m@S>$&%SmzS_yZZFQ~ z&@zotRQy$b|E^vnxVrkTW9&G2{Oi1Oq;5alO$IS|+ijij#wVlVFTeNxcYa??;TaWw zC3)Up)Ctw!zx%UKKl&p!U)|NEWS|APyF@cZvZAHMtFdFxOQ diff --git a/README-CN.md b/README-CN.md index a07ef89..1686c22 100644 --- a/README-CN.md +++ b/README-CN.md @@ -25,6 +25,7 @@ bilingual_book_maker 是一个 AI 翻译工具,使用 ChatGPT 帮助用户制 - 可以使用 [Claude](https://console.anthropic.com/docs) 模型进行翻译 `--model claude --claude_key ${claude_key}` - 可以使用 google 来翻译 `--model google` - 可用使用彩云进行翻译 `--model caiyun --caiyun_key ${caiyun_key}` +- 可用使用 Gemini 进行翻译 `--model gemini --gemini_key ${gemini_key}` - 使用 `--test` 命令如果大家没付费可以加上这个先看看效果(有 limit 稍微有些慢) - 使用 `--language` 指定目标语言,例如: `--language "Simplified Chinese"`,预设值为 `"Simplified Chinese"`. 请阅读 helper message 来查找可用的目标语言: `python make_book.py --help` @@ -57,6 +58,9 @@ python3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${open # 或翻译完整本书 python3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --language zh-hans +# Or translate the whole book using Gemini +python3 make_book.py --book_name test_books/animal_farm.epub --gemini_key ${gemini_key} --model gemini + # 指定环境变量来略过 --openai_key export OPENAI_API_KEY=${your_api_key} diff --git a/README.md b/README.md index fd18901..72b457b 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist u ![image](https://user-images.githubusercontent.com/15976103/222317531-a05317c5-4eee-49de-95cd-04063d9539d9.png) ## Supported Models -gpt-4, gpt-3.5-turbo, claude-2, palm, llama-2, azure-openai, command-nightly +gpt-4, gpt-3.5-turbo, claude-2, palm, llama-2, azure-openai, command-nightly, gemini For using Non-OpenAI models, use class `liteLLM()` - liteLLM supports all models above. Find more info here for using liteLLM: https://github.com/BerriAI/litellm/blob/main/setup.py @@ -28,6 +28,7 @@ Find more info here for using liteLLM: https://github.com/BerriAI/litellm/blob/m If using `GPT4`, you can add `--use_context` to add a context paragraph to each passage sent to the model for translation (see below) - support DeepL model [DeepL Translator](https://rapidapi.com/splintPRO/api/dpl-translator) need pay to get the token use `--model deepl --deepl_key ${deepl_key}` - support DeepL free model `--model deeplfree` +- support Google [Gemini](https://makersuite.google.com/app/apikey) model `--model gemini --gemini_key ${gemini_key}` - Support [Claude](https://console.anthropic.com/docs) model, use `--model claude --claude_key ${claude_key}` - Use `--test` option to preview the result if you haven't paid for the service. Note that there is a limit and it may take some time. - Set the target language like `--language "Simplified Chinese"`. Default target language is `"Simplified Chinese"`. @@ -72,6 +73,9 @@ python3 make_book.py --book_name test_books/Lex_Fridman_episode_322.srt --openai # Or translate the whole book python3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --language zh-hans +# Or translate the whole book using Gemini +python3 make_book.py --book_name test_books/animal_farm.epub --gemini_key ${gemini_key} --model gemini + # Set env OPENAI_API_KEY to ignore option --openai_key export OPENAI_API_KEY=${your_api_key} @@ -81,6 +85,7 @@ python3 make_book.py --book_name test_books/animal_farm.epub --model gpt4 --use_ # Use the DeepL model with Japanese python3 make_book.py --book_name test_books/animal_farm.epub --model deepl --deepl_key ${deepl_key} --language ja + # Use the Claude model with Japanese python3 make_book.py --book_name test_books/animal_farm.epub --model claude --claude_key ${claude_key} --language ja diff --git a/book_maker/cli.py b/book_maker/cli.py index 139c52c..1004439 100644 --- a/book_maker/cli.py +++ b/book_maker/cli.py @@ -106,6 +106,14 @@ def main(): help="you should build your own translation api", ) + # for Google Gemini + parser.add_argument( + "--gemini_key", + dest="gemini_key", + type=str, + help="You can get Gemini Key from https://makersuite.google.com/app/apikey", + ) + parser.add_argument( "--test", dest="test", @@ -308,6 +316,8 @@ So you are close to reaching the limit. You have to choose your own value, there API_KEY = options.custom_api or env.get("BBM_CUSTOM_API") if not API_KEY: raise Exception("Please provide custom translate api") + elif options.model == "gemini": + API_KEY = options.gemini_key or env.get("BBM_GOOGLE_GEMINI_KEY") else: API_KEY = "" diff --git a/book_maker/translator/__init__.py b/book_maker/translator/__init__.py index 4e494ee..c0f265a 100644 --- a/book_maker/translator/__init__.py +++ b/book_maker/translator/__init__.py @@ -4,6 +4,7 @@ from book_maker.translator.deepl_translator import DeepL from book_maker.translator.deepl_free_translator import DeepLFree from book_maker.translator.google_translator import Google from book_maker.translator.claude_translator import Claude +from book_maker.translator.gemini_translator import Gemini from book_maker.translator.custom_api_translator import CustomAPI MODEL_DICT = { @@ -14,6 +15,7 @@ MODEL_DICT = { "deeplfree": DeepLFree, "gpt4": ChatGPTAPI, "claude": Claude, - "customapi": CustomAPI + "gemini": Gemini, + "customapi": CustomAPI, # add more here } diff --git a/book_maker/translator/chatgptapi_translator.py b/book_maker/translator/chatgptapi_translator.py index 038a9d4..e880bfb 100644 --- a/book_maker/translator/chatgptapi_translator.py +++ b/book_maker/translator/chatgptapi_translator.py @@ -128,7 +128,8 @@ class ChatGPTAPI(Base): print(f"Get {attempt_count} consecutive exceptions") raise except Exception as e: - print(str(e), "!!") + print(str(e)) + return # todo: Determine whether to print according to the cli option if needprint: diff --git a/book_maker/translator/gemini_translator.py b/book_maker/translator/gemini_translator.py new file mode 100644 index 0000000..872772d --- /dev/null +++ b/book_maker/translator/gemini_translator.py @@ -0,0 +1,83 @@ +import re +import time + +import google.generativeai as genai +from google.generativeai.types.generation_types import ( + StopCandidateException, + BlockedPromptException, +) +from rich import print + +from .base_translator import Base + +generation_config = { + "temperature": 0.7, + "top_p": 1, + "top_k": 1, + "max_output_tokens": 2048, +} + +safety_settings = [ + {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}, + {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}, + { + "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", + "threshold": "BLOCK_MEDIUM_AND_ABOVE", + }, + { + "category": "HARM_CATEGORY_DANGEROUS_CONTENT", + "threshold": "BLOCK_MEDIUM_AND_ABOVE", + }, +] + + +class Gemini(Base): + """ + Google gemini translator + """ + + DEFAULT_PROMPT = "Please help me to translate,`{text}` to {language}, please return only translated content not include the origin text" + + def __init__(self, key, language, **kwargs) -> None: + genai.configure(api_key=key) + super().__init__(key, language) + model = genai.GenerativeModel( + model_name="gemini-pro", + generation_config=generation_config, + safety_settings=safety_settings, + ) + self.convo = model.start_chat() + + def rotate_key(self): + pass + + def translate(self, text): + t_text = "" + try: + self.convo.send_message( + self.DEFAULT_PROMPT.format(text=text, language=self.language) + ) + print(text) + t_text = self.convo.last.text.strip() + except StopCandidateException as e: + print("Here") + match = re.search(r'content\s*{\s*parts\s*{\s*text:\s*"([^"]+)"', str(e)) + if match: + t_text = match.group(1) + t_text = re.sub(r"\\n", "\n", t_text) + else: + t_text = "Can not translate" + except BlockedPromptException as e: + print(str(e)) + t_text = "Can not translate by SAFETY reason.(因安全问题不能翻译)" + except Exception as e: + print(str(e)) + t_text = "Can not translate by other reason.(因安全问题不能翻译)" + + if len(self.convo.history) > 10: + self.convo.history = self.convo.history[2:] + + print("[bold green]" + re.sub("\n{3,}", "\n\n", t_text) + "[/bold green]") + # for limit + time.sleep(0.5) + return t_text diff --git a/setup.py b/setup.py index ce4ee06..e103876 100644 --- a/setup.py +++ b/setup.py @@ -11,6 +11,7 @@ packages = [ "tqdm", "tiktoken", "PyDeepLX", + "google-generativeai", ]