mirror of
https://github.com/yihong0618/bilingual_book_maker.git
synced 2025-06-02 09:30:24 +00:00

* add prompt template * format output for ChatGPTAPITranslator * black format files * fix: google txt loader failed --------- Co-authored-by: yihong0618 <zouzou0208@gmail.com>
193 lines
5.7 KiB
Python
193 lines
5.7 KiB
Python
import argparse
|
|
import os
|
|
from os import environ as env
|
|
|
|
from book_maker.loader import BOOK_LOADER_DICT
|
|
from book_maker.translator import MODEL_DICT
|
|
from book_maker.utils import LANGUAGES, TO_LANGUAGE_CODE
|
|
import book_maker.obok as obok
|
|
|
|
|
|
def parse_prompt_arg(prompt_arg):
|
|
prompt = None
|
|
if prompt_arg is None:
|
|
return prompt
|
|
if not prompt_arg.endswith(".txt"):
|
|
prompt = prompt_arg
|
|
else:
|
|
if os.path.exists(prompt_arg):
|
|
with open(prompt_arg, "r") as f:
|
|
prompt = f.read()
|
|
else:
|
|
raise FileNotFoundError(f"{prompt_arg} not found")
|
|
if prompt is None or not (all(c in prompt for c in ["{text}", "{language}"])):
|
|
raise ValueError("prompt must contain `{text}` and `{language}`")
|
|
return prompt
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
"--book_name",
|
|
dest="book_name",
|
|
type=str,
|
|
help="path of the epub file to be translated",
|
|
)
|
|
parser.add_argument(
|
|
"--book_from",
|
|
dest="book_from",
|
|
type=str,
|
|
choices=["kobo"], # support kindle later
|
|
metavar="E-READER",
|
|
help="e-reader type, available: {%(choices)s}",
|
|
)
|
|
parser.add_argument(
|
|
"--device_path",
|
|
dest="device_path",
|
|
type=str,
|
|
help="Path of e-reader device",
|
|
)
|
|
parser.add_argument(
|
|
"--openai_key",
|
|
dest="openai_key",
|
|
type=str,
|
|
default="",
|
|
help="OpenAI api key,if you have more than one key, please use comma"
|
|
" to split them to go beyond the rate limits",
|
|
)
|
|
parser.add_argument(
|
|
"--test",
|
|
dest="test",
|
|
action="store_true",
|
|
help="only the first 10 paragraphs will be translated, for testing",
|
|
)
|
|
parser.add_argument(
|
|
"--test_num",
|
|
dest="test_num",
|
|
type=int,
|
|
default=10,
|
|
help="how many paragraphs will be translated for testing",
|
|
)
|
|
parser.add_argument(
|
|
"-m",
|
|
"--model",
|
|
dest="model",
|
|
type=str,
|
|
default="chatgptapi",
|
|
choices=["chatgptapi", "gpt3", "google"], # support DeepL later
|
|
metavar="MODEL",
|
|
help="model to use, available: {%(choices)s}",
|
|
)
|
|
parser.add_argument(
|
|
"--language",
|
|
type=str,
|
|
choices=sorted(LANGUAGES.keys())
|
|
+ sorted([k.title() for k in TO_LANGUAGE_CODE.keys()]),
|
|
default="zh-hans",
|
|
metavar="LANGUAGE",
|
|
help="language to translate to, available: {%(choices)s}",
|
|
)
|
|
parser.add_argument(
|
|
"--resume",
|
|
dest="resume",
|
|
action="store_true",
|
|
help="if program stop unexpected you can use this to resume",
|
|
)
|
|
parser.add_argument(
|
|
"-p",
|
|
"--proxy",
|
|
dest="proxy",
|
|
type=str,
|
|
default="",
|
|
help="use proxy like http://127.0.0.1:7890",
|
|
)
|
|
# args to change api_base
|
|
parser.add_argument(
|
|
"--api_base",
|
|
dest="api_base",
|
|
type=str,
|
|
help="specify base url other than the OpenAI's official API address",
|
|
)
|
|
parser.add_argument(
|
|
"--translate-tags",
|
|
dest="translate_tags",
|
|
type=str,
|
|
default="p",
|
|
help="example --translate-tags p,blockquote",
|
|
)
|
|
parser.add_argument(
|
|
"--allow_navigable_strings",
|
|
dest="allow_navigable_strings",
|
|
action="store_true",
|
|
default=False,
|
|
help="allow NavigableStrings to be translated",
|
|
)
|
|
parser.add_argument(
|
|
"--prompt",
|
|
dest="prompt_template",
|
|
type=str,
|
|
metavar="PROMPT_TEMPLATE",
|
|
help="used for customizing the prompt. It can be the prompt template string, or a path to the template file. The valid placeholders are `{text}` and `{language}`.",
|
|
)
|
|
|
|
options = parser.parse_args()
|
|
PROXY = options.proxy
|
|
if PROXY != "":
|
|
os.environ["http_proxy"] = PROXY
|
|
os.environ["https_proxy"] = PROXY
|
|
|
|
translate_model = MODEL_DICT.get(options.model)
|
|
assert translate_model is not None, "unsupported model"
|
|
if options.model in ["gpt3", "chatgptapi"]:
|
|
OPENAI_API_KEY = options.openai_key or env.get("OPENAI_API_KEY")
|
|
if not OPENAI_API_KEY:
|
|
raise Exception(
|
|
"OpenAI API key not provided, please google how to obtain it"
|
|
)
|
|
else:
|
|
OPENAI_API_KEY = ""
|
|
|
|
if options.book_from == "kobo":
|
|
device_path = options.device_path
|
|
if device_path is None:
|
|
raise Exception(
|
|
"Device path is not given, please specify the path by --device_path <DEVICE_PATH>"
|
|
)
|
|
options.book_name = obok.cli_main(device_path)
|
|
|
|
book_type = options.book_name.split(".")[-1]
|
|
support_type_list = list(BOOK_LOADER_DICT.keys())
|
|
if book_type not in support_type_list:
|
|
raise Exception(
|
|
f"now only support files of these formats: {','.join(support_type_list)}"
|
|
)
|
|
|
|
book_loader = BOOK_LOADER_DICT.get(book_type)
|
|
assert book_loader is not None, "unsupported loader"
|
|
language = options.language
|
|
if options.language in LANGUAGES:
|
|
# use the value for prompt
|
|
language = LANGUAGES.get(language, language)
|
|
|
|
# change api_base for issue #42
|
|
model_api_base = options.api_base
|
|
|
|
e = book_loader(
|
|
options.book_name,
|
|
translate_model,
|
|
OPENAI_API_KEY,
|
|
options.resume,
|
|
language=language,
|
|
model_api_base=model_api_base,
|
|
is_test=options.test,
|
|
test_num=options.test_num,
|
|
translate_tags=options.translate_tags,
|
|
allow_navigable_strings=options.allow_navigable_strings,
|
|
prompt_template=parse_prompt_arg(options.prompt_template),
|
|
)
|
|
e.make_bilingual_book()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|