mirror of
https://github.com/yihong0618/bilingual_book_maker.git
synced 2025-06-05 19:15:34 +00:00
feat: batch translate txt file (#153)
* feat: batch translate txt file * feat: batch size customed * reslove conflicts
This commit is contained in:
parent
f09c2717f1
commit
5d2c89a841
4
.github/workflows/make_test_ebook.yaml
vendored
4
.github/workflows/make_test_ebook.yaml
vendored
@ -37,6 +37,10 @@ jobs:
|
||||
run: |
|
||||
python3 make_book.py --book_name "test_books/the_little_prince.txt" --test --test_num 20 --model google
|
||||
|
||||
- name: make txt book test with batch_size
|
||||
run: |
|
||||
python3 make_book.py --book_name "test_books/the_little_prince.txt" --test --batch_size 30 --test_num 20 --model google
|
||||
|
||||
|
||||
- name: make openai key ebook test
|
||||
if: env.OPENAI_API_KEY != null
|
||||
|
@ -42,7 +42,7 @@ bilingual_book_maker 是一个 AI 翻译工具,使用 ChatGPT 帮助用户制
|
||||
16. 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书
|
||||
17. 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了
|
||||
18. 如果你想要翻译电子书中的无标签字符串,可以使用 `--allow_navigable_strings` 参数,会将可遍历字符串加入翻译队列,**注意,在条件允许情况下,请寻找更规范的电子书**
|
||||
|
||||
19. 使用`--batch_size` 参数,指定批量翻译的行数(默认行数为10,目前只对txt生效)
|
||||
### 示范用例
|
||||
|
||||
**如果使用 `pip install bbook_maker` 以下命令都可以改成 `bbook args`**
|
||||
@ -72,6 +72,9 @@ python3 make_book.py --book_from kobo --device_path /tmp/kobo
|
||||
|
||||
# 翻译 txt 文件
|
||||
python3 make_book.py --book_name test_books/the_little_prince.txt --test
|
||||
# 聚合多行翻译 txt 文件
|
||||
python3 make_book.py --book_name test_books/the_little_prince.txt --test --batch_size 20
|
||||
|
||||
```
|
||||
|
||||
更加小白的示例
|
||||
|
@ -41,6 +41,7 @@ The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist u
|
||||
16. Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated.
|
||||
17. If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name.
|
||||
18. If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.**
|
||||
19. Use the `--batch_size` parameter to specify the number of lines for batch translation (default is 10, currently only effective for txt files).
|
||||
|
||||
### Examples
|
||||
|
||||
@ -74,6 +75,8 @@ python3 make_book.py --book_from kobo --device_path /tmp/kobo
|
||||
|
||||
# translate txt file
|
||||
python3 make_book.py --book_name test_books/the_little_prince.txt --test --language zh-hans
|
||||
# aggregated translation txt file
|
||||
python3 make_book.py --book_name test_books/the_little_prince.txt --test --batch_size 20
|
||||
```
|
||||
|
||||
More understandable example
|
||||
|
@ -156,6 +156,13 @@ def main():
|
||||
metavar="PROMPT_ARG",
|
||||
help="used for customizing the prompt. It can be the prompt template string, or a path to the template file. The valid placeholders are `{text}` and `{language}`.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--batch_size",
|
||||
dest="batch_size",
|
||||
type=int,
|
||||
default=10,
|
||||
help="how many lines will be translated by aggregated translation(This options currently only applies to txt files)",
|
||||
)
|
||||
|
||||
options = parser.parse_args()
|
||||
PROXY = options.proxy
|
||||
@ -219,6 +226,7 @@ def main():
|
||||
translate_tags=options.translate_tags,
|
||||
allow_navigable_strings=options.allow_navigable_strings,
|
||||
prompt_config=parse_prompt_arg(options.prompt_arg),
|
||||
batch_size=options.batch_size,
|
||||
)
|
||||
e.make_bilingual_book()
|
||||
|
||||
|
@ -22,6 +22,7 @@ class EPUBBookLoader(BaseBookLoader):
|
||||
key,
|
||||
resume,
|
||||
language,
|
||||
batch_size,
|
||||
model_api_base=None,
|
||||
is_test=False,
|
||||
test_num=5,
|
||||
|
@ -14,6 +14,7 @@ class TXTBookLoader(BaseBookLoader):
|
||||
key,
|
||||
resume,
|
||||
language,
|
||||
batch_size,
|
||||
translate_tags,
|
||||
allow_navigable_strings,
|
||||
model_api_base=None,
|
||||
@ -33,6 +34,7 @@ class TXTBookLoader(BaseBookLoader):
|
||||
self.bilingual_result = []
|
||||
self.bilingual_temp_result = []
|
||||
self.test_num = test_num
|
||||
self.batch_size = batch_size
|
||||
|
||||
try:
|
||||
with open(f"{txt_name}", "r", encoding="utf-8") as f:
|
||||
@ -58,17 +60,22 @@ class TXTBookLoader(BaseBookLoader):
|
||||
p_to_save_len = len(self.p_to_save)
|
||||
|
||||
try:
|
||||
for i in self.origin_book:
|
||||
if self._is_special_text(i):
|
||||
sliced_list = [
|
||||
self.origin_book[i : i + self.batch_size]
|
||||
for i in range(0, len(self.origin_book), self.batch_size)
|
||||
]
|
||||
for i in sliced_list:
|
||||
batch_text = "".join(i)
|
||||
if self._is_special_text(batch_text):
|
||||
continue
|
||||
if self.resume and index < p_to_save_len:
|
||||
pass
|
||||
else:
|
||||
temp = self.translate_model.translate(i)
|
||||
temp = self.translate_model.translate(batch_text)
|
||||
self.p_to_save.append(temp)
|
||||
self.bilingual_result.append(i)
|
||||
self.bilingual_result.append(batch_text)
|
||||
self.bilingual_result.append(temp)
|
||||
index += 1
|
||||
index += self.batch_size
|
||||
if self.is_test and index > self.test_num:
|
||||
break
|
||||
|
||||
@ -86,8 +93,14 @@ class TXTBookLoader(BaseBookLoader):
|
||||
|
||||
def _save_temp_book(self):
|
||||
index = 0
|
||||
for i in range(0, len(self.origin_book)):
|
||||
self.bilingual_temp_result.append(self.origin_book[i])
|
||||
sliced_list = [
|
||||
self.origin_book[i : i + self.batch_size]
|
||||
for i in range(0, len(self.origin_book), self.batch_size)
|
||||
]
|
||||
|
||||
for i in range(0, len(sliced_list)):
|
||||
batch_text = "".join(sliced_list[i])
|
||||
self.bilingual_temp_result.append(batch_text)
|
||||
if self._is_special_text(self.origin_book[i]):
|
||||
continue
|
||||
if index < len(self.p_to_save):
|
||||
|
Loading…
x
Reference in New Issue
Block a user