diff --git a/.github/workflows/make_test_ebook.yaml b/.github/workflows/make_test_ebook.yaml index 8565ae8..90d0f42 100644 --- a/.github/workflows/make_test_ebook.yaml +++ b/.github/workflows/make_test_ebook.yaml @@ -37,6 +37,10 @@ jobs: run: | python3 make_book.py --book_name "test_books/the_little_prince.txt" --test --test_num 20 --model google + - name: make txt book test with batch_size + run: | + python3 make_book.py --book_name "test_books/the_little_prince.txt" --test --batch_size 30 --test_num 20 --model google + - name: make openai key ebook test if: env.OPENAI_API_KEY != null diff --git a/README-CN.md b/README-CN.md index ea89315..028139f 100644 --- a/README-CN.md +++ b/README-CN.md @@ -42,7 +42,7 @@ bilingual_book_maker 是一个 AI 翻译工具,使用 ChatGPT 帮助用户制 16. 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书 17. 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了 18. 如果你想要翻译电子书中的无标签字符串,可以使用 `--allow_navigable_strings` 参数,会将可遍历字符串加入翻译队列,**注意,在条件允许情况下,请寻找更规范的电子书** - +19. 使用`--batch_size` 参数,指定批量翻译的行数(默认行数为10,目前只对txt生效) ### 示范用例 **如果使用 `pip install bbook_maker` 以下命令都可以改成 `bbook args`** @@ -72,6 +72,9 @@ python3 make_book.py --book_from kobo --device_path /tmp/kobo # 翻译 txt 文件 python3 make_book.py --book_name test_books/the_little_prince.txt --test +# 聚合多行翻译 txt 文件 +python3 make_book.py --book_name test_books/the_little_prince.txt --test --batch_size 20 + ``` 更加小白的示例 diff --git a/README.md b/README.md index 7b55549..0150d4f 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,7 @@ The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist u 16. Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated. 17. If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name. 18. If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.** +19. Use the `--batch_size` parameter to specify the number of lines for batch translation (default is 10, currently only effective for txt files). ### Examples @@ -74,6 +75,8 @@ python3 make_book.py --book_from kobo --device_path /tmp/kobo # translate txt file python3 make_book.py --book_name test_books/the_little_prince.txt --test --language zh-hans +# aggregated translation txt file +python3 make_book.py --book_name test_books/the_little_prince.txt --test --batch_size 20 ``` More understandable example diff --git a/book_maker/cli.py b/book_maker/cli.py index 67eeff3..8314a09 100644 --- a/book_maker/cli.py +++ b/book_maker/cli.py @@ -156,6 +156,13 @@ def main(): metavar="PROMPT_ARG", help="used for customizing the prompt. It can be the prompt template string, or a path to the template file. The valid placeholders are `{text}` and `{language}`.", ) + parser.add_argument( + "--batch_size", + dest="batch_size", + type=int, + default=10, + help="how many lines will be translated by aggregated translation(This options currently only applies to txt files)", + ) options = parser.parse_args() PROXY = options.proxy @@ -219,6 +226,7 @@ def main(): translate_tags=options.translate_tags, allow_navigable_strings=options.allow_navigable_strings, prompt_config=parse_prompt_arg(options.prompt_arg), + batch_size=options.batch_size, ) e.make_bilingual_book() diff --git a/book_maker/loader/epub_loader.py b/book_maker/loader/epub_loader.py index 462a5b5..3587ae7 100644 --- a/book_maker/loader/epub_loader.py +++ b/book_maker/loader/epub_loader.py @@ -22,6 +22,7 @@ class EPUBBookLoader(BaseBookLoader): key, resume, language, + batch_size, model_api_base=None, is_test=False, test_num=5, diff --git a/book_maker/loader/txt_loader.py b/book_maker/loader/txt_loader.py index b64c827..f5238a7 100644 --- a/book_maker/loader/txt_loader.py +++ b/book_maker/loader/txt_loader.py @@ -14,6 +14,7 @@ class TXTBookLoader(BaseBookLoader): key, resume, language, + batch_size, translate_tags, allow_navigable_strings, model_api_base=None, @@ -33,6 +34,7 @@ class TXTBookLoader(BaseBookLoader): self.bilingual_result = [] self.bilingual_temp_result = [] self.test_num = test_num + self.batch_size = batch_size try: with open(f"{txt_name}", "r", encoding="utf-8") as f: @@ -58,17 +60,22 @@ class TXTBookLoader(BaseBookLoader): p_to_save_len = len(self.p_to_save) try: - for i in self.origin_book: - if self._is_special_text(i): + sliced_list = [ + self.origin_book[i : i + self.batch_size] + for i in range(0, len(self.origin_book), self.batch_size) + ] + for i in sliced_list: + batch_text = "".join(i) + if self._is_special_text(batch_text): continue if self.resume and index < p_to_save_len: pass else: - temp = self.translate_model.translate(i) + temp = self.translate_model.translate(batch_text) self.p_to_save.append(temp) - self.bilingual_result.append(i) + self.bilingual_result.append(batch_text) self.bilingual_result.append(temp) - index += 1 + index += self.batch_size if self.is_test and index > self.test_num: break @@ -86,8 +93,14 @@ class TXTBookLoader(BaseBookLoader): def _save_temp_book(self): index = 0 - for i in range(0, len(self.origin_book)): - self.bilingual_temp_result.append(self.origin_book[i]) + sliced_list = [ + self.origin_book[i : i + self.batch_size] + for i in range(0, len(self.origin_book), self.batch_size) + ] + + for i in range(0, len(sliced_list)): + batch_text = "".join(sliced_list[i]) + self.bilingual_temp_result.append(batch_text) if self._is_special_text(self.origin_book[i]): continue if index < len(self.p_to_save):