feat: add txt book loader (#143)

* feat: add txt book loader * chore: add test book * style: black * feat: add _save_temp_book * doc: add txt support desc
2025-07-17 15:40:06 +00:00 · 2023-03-11 14:37:25 +08:00 · 2023-03-11 14:37:25 +08:00 · aaa1ab4d7b
commit aaa1ab4d7b
parent d95f0b3942
6 changed files with 3280 additions and 6 deletions
--- a/.github/workflows/make_test_ebook.yaml
+++ b/.github/workflows/make_test_ebook.yaml
@ -33,6 +33,11 @@ jobs:
            python3 make_book.py --book_name "test_books/Liber_Esther.epub" --test --test_num 10 --model google --translate-tags div,p
            python3 make_book.py --book_name "test_books/Liber_Esther.epub" --test --test_num 20 --model google

+      - name: make txt book test using google translate
+        run: |
+          python3 make_book.py --book_name "test_books/the_little_prince.txt" --test --test_num 20 --model google
+
+
      - name: make openai key ebook test
        if: env.OPENAI_API_KEY != null
        run: |
--- a/README-CN.md
+++ b/README-CN.md
@ -1,6 +1,6 @@
 # bilingual_book_maker

-bilingual_book_maker 是一个 AI 翻译工具，使用 ChatGPT 帮助用户制作多语言版本的 epub 文件和图书。该工具仅适用于翻译进入公共版权领域的 epub 图书，不适用于有版权的书籍。请在使用之前阅读项目的 **[免责声明](./disclaimer.md)**。
+bilingual_book_maker 是一个 AI 翻译工具，使用 ChatGPT 帮助用户制作多语言版本的 epub/txt 文件和图书。该工具仅适用于翻译进入公共版权领域的 epub/txt 图书，不适用于有版权的书籍。请在使用之前阅读项目的 **[免责声明](./disclaimer.md)**。

 ![image](https://user-images.githubusercontent.com/15976103/222317531-a05317c5-4eee-49de-95cd-04063d9539d9.png)

@ -8,7 +8,7 @@ bilingual_book_maker 是一个 AI 翻译工具，使用 ChatGPT 帮助用户制
 ## 准备

 1. ChatGPT or OpenAI token [^token]
-2. epub books
+2. epub/txt books
 3. 能正常联网的环境或 proxy
 4. python3.8+

@ -50,6 +50,9 @@ python3 make_book.py --book_name test_books/animal_farm.epub --model gpt3 --lang

 # Translate contents in <div> and <p>
 python3 make_book.py --book_name test_books/animal_farm.epub --translate-tags div,p
+
+# 翻译 txt 文件
+python3 make_book.py --book_name test_books/the_little_prince.txt -openai_key ${openai_key} --test 
 ```

 更加小白的示例
--- a/README.md
+++ b/README.md
@ -5,7 +5,7 @@ Usage: make sure to add `--model google` in the command.
 **[中文](./README-CN.md) | English**

 # bilingual_book_maker
-The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist users in creating multi-language versions of epub files and books. This tool is exclusively designed for translating epub books that have entered the public domain and is not intended for copyrighted works. Before using this tool, please review the project's **[disclaimer](./disclaimer.md)**.
+The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist users in creating multi-language versions of epub/txt files and books. This tool is exclusively designed for translating epub books that have entered the public domain and is not intended for copyrighted works. Before using this tool, please review the project's **[disclaimer](./disclaimer.md)**.

 ![image](https://user-images.githubusercontent.com/15976103/222317531-a05317c5-4eee-49de-95cd-04063d9539d9.png)

@ -13,7 +13,7 @@ The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist u
 ## Preparation

 1. ChatGPT or OpenAI token [^token]
-2. epub books
+2. epub/txt books
 3. Environment with internet access or proxy
 4. Python 3.8+

@ -55,6 +55,9 @@ python3 make_book.py --book_name test_books/animal_farm.epub --model gpt3 --lang

 # Translate contents in <div> and <p>
 python3 make_book.py --book_name test_books/animal_farm.epub --translate-tags div,p
+
+# translate txt file
+python3 make_book.py --book_name test_books/the_little_prince.txt -openai_key ${openai_key} --test --language zh-hans
 ```

 More understandable example
--- a/book_maker/loader/init.py
+++ b/book_maker/loader/init.py
@ -1,6 +1,9 @@
 from book_maker.loader.epub_loader import EPUBBookLoader

+from book_maker.loader.txt_loader import TXTBookLoader
+
 BOOK_LOADER_DICT = {
-    "epub": EPUBBookLoader
+    "epub": EPUBBookLoader,
+    "txt": TXTBookLoader
    # TODO add more here
 }
--- a/book_maker/loader/txt_loader.py
+++ b/book_maker/loader/txt_loader.py
@ -1 +1,113 @@
-"""TODO"""
+import sys
+from pathlib import Path
+
+from .base_loader import BaseBookLoader
+
+
+class TXTBookLoader(BaseBookLoader):
+    def __init__(
+        self,
+        txt_name,
+        model,
+        key,
+        resume,
+        language,
+        translate_tags,
+        allow_navigable_strings,
+        model_api_base=None,
+        is_test=False,
+        test_num=5,
+    ):
+        self.txt_name = txt_name
+        self.translate_model = model(key, language, model_api_base)
+        self.is_test = is_test
+        self.p_to_save = []
+        self.bilingual_result = []
+        self.bilingual_temp_result = []
+        self.test_num = test_num
+
+        try:
+            with open(f"{txt_name}", "r", encoding="utf-8") as f:
+                self.origin_book = f.read().split("\n")
+
+        except Exception:
+            raise Exception("can not load file")
+
+        self.resume = resume
+        self.bin_path = f"{Path(txt_name).parent}/.{Path(txt_name).stem}.temp.bin"
+        if self.resume:
+            self.load_state()
+
+    @staticmethod
+    def _is_special_text(text):
+        return text.isdigit() or text.isspace() or len(text) == 0
+
+    def _make_new_book(self, book):
+        pass
+
+    def make_bilingual_book(self):
+        index = 0
+        p_to_save_len = len(self.p_to_save)
+
+        try:
+            for i in self.origin_book:
+                if self._is_special_text(i):
+                    continue
+                if self.resume and index < p_to_save_len:
+                    pass
+                else:
+                    temp = self.translate_model.translate(i)
+                    self.p_to_save.append(temp)
+                    self.bilingual_result.append(i)
+                    self.bilingual_result.append(temp)
+                index += 1
+                if self.is_test and index > self.test_num:
+                    break
+
+            self.save_file(
+                f"{Path(self.txt_name).parent}/{Path(self.txt_name).stem}_bilingual.txt",
+                self.bilingual_result,
+            )
+
+        except (KeyboardInterrupt, Exception) as e:
+            print(e)
+            print("you can resume it next time")
+            self._save_progress()
+            self._save_temp_book()
+            sys.exit(0)
+
+    def _save_temp_book(self):
+        index = 0
+        for i in range(0, len(self.origin_book)):
+            self.bilingual_temp_result.append(self.origin_book[i])
+            if self._is_special_text(self.origin_book[i]):
+                continue
+            if index < len(self.p_to_save):
+                self.bilingual_temp_result.append(self.p_to_save[index])
+            index += 1
+
+        self.save_file(
+            f"{Path(self.txt_name).parent}/{Path(self.txt_name).stem}_bilingual_temp.txt",
+            self.bilingual_temp_result,
+        )
+
+    def _save_progress(self):
+        try:
+            with open(self.bin_path, "w") as f:
+                f.write("\n".join(self.p_to_save))
+        except:
+            raise Exception("can not save resume file")
+
+    def load_state(self):
+        try:
+            with open(self.bin_path, "r", encoding="utf-8") as f:
+                self.p_to_save = f.read().split("\n")
+        except Exception:
+            raise Exception("can not load resume file")
+
+    def save_file(self, book_path, content):
+        try:
+            with open(book_path, "w", encoding="utf-8") as f:
+                f.write("\n".join(content))
+        except:
+            raise Exception("can not save file")
--- a/test_books/the_little_prince.txt
+++ b/test_books/the_little_prince.txt