From b5060b8746d9f570a8d2c512d640a06174b70c07 Mon Sep 17 00:00:00 2001 From: zstone12 <522089185@qq.com> Date: Sun, 7 May 2023 21:22:46 +0800 Subject: [PATCH] feat: support single_translate for txt & srt (#265) * feat: support single_translate for txt & srt * style: black format * chore: change func name --- book_maker/loader/srt_loader.py | 28 +++++++++++++++++++++------- book_maker/loader/txt_loader.py | 4 +++- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/book_maker/loader/srt_loader.py b/book_maker/loader/srt_loader.py index a4562b8..3cabda1 100644 --- a/book_maker/loader/srt_loader.py +++ b/book_maker/loader/srt_loader.py @@ -43,6 +43,7 @@ class SRTBookLoader(BaseBookLoader): self.test_num = test_num self.accumulated_num = 1 self.blocks = [] + self.single_translate = single_translate self.resume = resume self.bin_path = f"{Path(srt_name).parent}/.{Path(srt_name).stem}.temp.bin" @@ -76,6 +77,9 @@ class SRTBookLoader(BaseBookLoader): def _get_block_text(self, block): return f"{block['number']}\n{block['time']}\n{block['text']}" + def _get_block_except_text(self, block): + return f"{block['number']}\n{block['time']}" + def _concat_blocks(self, sliced_text: str, text: str): return f"{sliced_text}\n\n{text}" if sliced_text else text @@ -186,7 +190,7 @@ class SRTBookLoader(BaseBookLoader): translated_blocks = [] # try to translate one by one, so don't accumulate too much print( - f"retry it one by one: {self.blocks[begin]['number']} - {self.blocks[end-1]['number']}" + f"retry it one by one: {self.blocks[begin]['number']} - {self.blocks[end - 1]['number']}" ) for block in self.blocks[begin:end]: try: @@ -210,15 +214,25 @@ class SRTBookLoader(BaseBookLoader): for i, block in enumerate(translated_blocks): text = block.get("text", "") self.p_to_save.append(text) - self.bilingual_result.append( - f"{self._get_block_text(self.blocks[begin + i])}\n{text}" - ) + if self.single_translate: + self.bilingual_result.append( + f"{self._get_block_except_text(self.blocks[begin + i])}\n{text}" + ) + else: + self.bilingual_result.append( + f"{self._get_block_text(self.blocks[begin + i])}\n{text}" + ) else: for i, block in enumerate(self.blocks[begin:end]): text = self.p_to_save[begin + i] - self.bilingual_result.append( - f"{self._get_block_text(self.blocks[begin + i])}\n{text}" - ) + if self.single_translate: + self.bilingual_result.append( + f"{self._get_block_except_text(self.blocks[begin + i])}\n{text}" + ) + else: + self.bilingual_result.append( + f"{self._get_block_text(self.blocks[begin + i])}\n{text}" + ) index += end - begin if self.is_test and index > self.test_num: diff --git a/book_maker/loader/txt_loader.py b/book_maker/loader/txt_loader.py index 9a365a6..a771799 100644 --- a/book_maker/loader/txt_loader.py +++ b/book_maker/loader/txt_loader.py @@ -33,6 +33,7 @@ class TXTBookLoader(BaseBookLoader): self.bilingual_temp_result = [] self.test_num = test_num self.batch_size = 10 + self.single_translate = single_translate try: with open(f"{txt_name}", encoding="utf-8") as f: @@ -73,7 +74,8 @@ class TXTBookLoader(BaseBookLoader): print(e) raise Exception("Something is wrong when translate") from e self.p_to_save.append(temp) - self.bilingual_result.append(batch_text) + if not self.single_translate: + self.bilingual_result.append(batch_text) self.bilingual_result.append(temp) index += self.batch_size if self.is_test and index > self.test_num: