Merge branch 'yihong0618:main' into main

2025-07-23 18:40:14 +00:00 · 2024-05-05 11:52:02 +08:00 · 2024-05-05 11:52:02 +08:00 · 9de922c5b4
commit 9de922c5b4
parent 216f5b14d8 307898f5cd
12 changed files with 2204 additions and 74 deletions
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@ -0,0 +1,37 @@
+name: Release and Build Docker Image
+
+permissions:
+  contents: write
+
+on:
+  push:
+    tags:
+      - "*"
+
+jobs:
+  release-pypi:
+    name: Build and Release PyPI
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+
+      - uses: actions/setup-node@v3
+        with:
+          node-version: 16
+
+      - name: Build artifacts
+        run: |
+          pip install build
+          python -m build
+
+      - uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          password: ${{ secrets.PYPI_API_TOKEN }}
+
--- a/.pdm-python
+++ b/.pdm-python
@ -0,0 +1 @@
+/home/yihong/use_now/bilingual_book_maker/.venv/bin/python
--- a/README.md
+++ b/README.md
@ -25,8 +25,9 @@ Find more info here for using liteLLM: https://github.com/BerriAI/litellm/blob/m
   Or, just set environment variable `BBM_OPENAI_API_KEY` instead.
 - A sample book, `test_books/animal_farm.epub`, is provided for testing purposes.
 - The default underlying model is [GPT-3.5-turbo](https://openai.com/blog/introducing-chatgpt-and-whisper-apis), which is used by ChatGPT currently. Use `--model gpt4` to change the underlying model to `GPT4`.
-   If using `GPT4`, you can add `--use_context` to add a context paragraph to each passage sent to the model for translation (see below)
- support DeepL model [DeepL Translator](https://rapidapi.com/splintPRO/api/dpl-translator) need pay to get the token use `--model deepl --deepl_key ${deepl_key}`
+  - Important to note that `gpt-4` is significantly more expensive than `gpt-4-turbo`, but to avoid bumping into rate limits, we automatically balance queries across `gpt-4-1106-preview`, `gpt-4`, `gpt-4-32k`, `gpt-4-0613`,`gpt-4-32k-0613`.
+    - If you want to use a specific model alias with OpenAI (eg `gpt-4-1106-preview` or `gpt-3.5-turbo-0125`), you can use `--model openai --model_list gpt-4-1106-preview,gpt-3.5-turbo-0125`. `--model_list` takes a comma-separated list of model aliases.
+  - If using `GPT4`, you can add `--use_context` to add a context paragraph to each passage sent to the model for translation (see below).- support DeepL model [DeepL Translator](https://rapidapi.com/splintPRO/api/dpl-translator) need pay to get the token use `--model deepl --deepl_key ${deepl_key}`
 - support DeepL free model `--model deeplfree`
 - support Google [Gemini](https://makersuite.google.com/app/apikey) model `--model gemini --gemini_key ${gemini_key}`
 - Support [Claude](https://console.anthropic.com/docs) model, use `--model claude --claude_key ${claude_key}`
@ -83,10 +84,18 @@ export OPENAI_API_KEY=${your_api_key}
 # Use the GPT-4 model with context to Japanese
 python3 make_book.py --book_name test_books/animal_farm.epub --model gpt4 --use_context --language ja

+# Use a specific OpenAI model alias
+python3 make_book.py --book_name test_books/animal_farm.epub --model openai --model_list gpt-4-1106-preview --openai_key ${openai_key}
+
+**Note** you can use other `openai like` model in this way
+python3 make_book.py --book_name test_books/animal_farm.epub --model openai --model_list yi-34b-chat-0205 --openai_key ${openai_key} --api_base "https://api.lingyiwanwu.com/v1"
+
+# Use a specific list of OpenAI model aliases
+python3 make_book.py --book_name test_books/animal_farm.epub --model openai --model_list gpt-4-1106-preview,gpt-4-0125-preview,gpt-3.5-turbo-0125 --openai_key ${openai_key}
+
 # Use the DeepL model with Japanese
 python3 make_book.py --book_name test_books/animal_farm.epub --model deepl --deepl_key ${deepl_key} --language ja

-
 # Use the Claude model with Japanese
 python3 make_book.py --book_name test_books/animal_farm.epub --model claude --claude_key ${claude_key} --language ja

--- a/book_maker/cli.py
+++ b/book_maker/cli.py
@ -137,6 +137,14 @@ def main():
        metavar="MODEL",
        help="model to use, available: {%(choices)s}",
    )
+    parser.add_argument(
+        "--ollama_model",
+        dest="ollama_model",
+        type=str,
+        default="ollama_model",
+        metavar="MODEL",
+        help="use ollama",
+    )
    parser.add_argument(
        "--language",
        type=str,
@ -275,6 +283,12 @@ So you are close to reaching the limit. You have to choose your own value, there
        default=-1,
        help="merge multiple paragraphs into one block, may increase accuracy and speed up the process, but disturb the original format, must be used with `--single_translate`",
    )
+    parser.add_argument(
+        "--model_list",
+        type=str,
+        dest="model_list",
+        help="Rather than using our preset lists of models, specify exactly the models you want as a comma separated list `gpt-4-32k,gpt-3.5-turbo-0125` (Currently only supports: `openai`)",
+    )

    options = parser.parse_args()

@ -290,7 +304,7 @@ So you are close to reaching the limit. You have to choose your own value, there
    translate_model = MODEL_DICT.get(options.model)
    assert translate_model is not None, "unsupported model"
    API_KEY = ""
-    if options.model in ["chatgptapi", "gpt4"]:
+    if options.model in ["openai", "chatgptapi", "gpt4"]:
        if OPENAI_API_KEY := (
            options.openai_key
            or env.get(
@ -302,6 +316,9 @@ So you are close to reaching the limit. You have to choose your own value, there
        ):
            API_KEY = OPENAI_API_KEY
            # patch
+        elif options.ollama_model:
+            # any string is ok, can't be empty
+            API_KEY = "ollama"
        else:
            raise Exception(
                "OpenAI API key not provided, please google how to obtain it",
@ -359,6 +376,10 @@ So you are close to reaching the limit. You have to choose your own value, there
    # change api_base for issue #42
    model_api_base = options.api_base

+    if options.ollama_model and not model_api_base:
+        # ollama default api_base
+        model_api_base = "http://localhost:11434/v1"
+
    e = book_loader(
        options.book_name,
        translate_model,
@ -402,9 +423,20 @@ So you are close to reaching the limit. You have to choose your own value, there
        if not options.api_base:
            raise ValueError("`api_base` must be provided when using `deployment_id`")
        e.translate_model.set_deployment_id(options.deployment_id)
+    if options.model == "openai":
+        # Currently only supports `openai` when you also have --model_list set
+        if options.model_list:
+            e.translate_model.set_model_list(options.model_list.split(","))
+        else:
+            raise ValueError(
+                "When using `openai` model, you must also provide `--model_list`. For default model sets use `--model chatgptapi` or `--model gpt4`",
+            )
    # TODO refactor, quick fix for gpt4 model
    if options.model == "chatgptapi":
-        e.translate_model.set_gpt35_models()
+        if options.ollama_model:
+            e.translate_model.set_gpt35_models(ollama_model=options.ollama_model)
+        else:
+            e.translate_model.set_gpt35_models()
    if options.model == "gpt4":
        e.translate_model.set_gpt4_models()
    if options.block_size > 0:
--- a/book_maker/loader/helper.py
+++ b/book_maker/loader/helper.py
@ -1,5 +1,10 @@
 import re
 from copy import copy
+import backoff
+import logging
+
+logging.basicConfig(level=logging.WARNING)
+logger = logging.getLogger(__name__)


 class EPUBBookLoaderHelper:
@ -27,13 +32,20 @@ class EPUBBookLoaderHelper:
        if single_translate:
            p.extract()

+    @backoff.on_exception(
+        backoff.expo,
+        Exception,
+        on_backoff=lambda details: logger.warning(f"retry backoff: {details}"),
+        on_giveup=lambda details: logger.warning(f"retry abort: {details}"),
+    )
+    def translate_with_backoff(self, **kwargs):
+        return self.translate_model.translate(**kwargs)
+
    def deal_new(self, p, wait_p_list, single_translate=False):
        self.deal_old(wait_p_list, single_translate, self.context_flag)
        self.insert_trans(
            p,
-            shorter_result_link(
-                self.translate_model.translate(p.text, self.context_flag)
-            ),
+            shorter_result_link(self.translate_with_backoff(p.text, self.context_flag)),
            self.translation_style,
            single_translate,
        )
--- a/book_maker/translator/init.py
+++ b/book_maker/translator/init.py
@ -9,12 +9,13 @@ from book_maker.translator.tencent_transmart_translator import TencentTranSmart
 from book_maker.translator.custom_api_translator import CustomAPI

 MODEL_DICT = {
+    "openai": ChatGPTAPI,
    "chatgptapi": ChatGPTAPI,
+    "gpt4": ChatGPTAPI,
    "google": Google,
    "caiyun": Caiyun,
    "deepl": DeepL,
    "deeplfree": DeepLFree,
-    "gpt4": ChatGPTAPI,
    "claude": Claude,
    "gemini": Gemini,
    "tencentransmart": TencentTranSmart,
--- a/book_maker/translator/chatgptapi_translator.py
+++ b/book_maker/translator/chatgptapi_translator.py
@ -307,7 +307,10 @@ class ChatGPTAPI(Base):
            azure_deployment=self.deployment_id,
        )

-    def set_gpt35_models(self):
+    def set_gpt35_models(self, ollama_model=""):
+        if ollama_model:
+            self.model_list = cycle([ollama_model])
+            return
        # gpt3 all models for save the limit
        if self.deployment_id:
            self.model_list = cycle(["gpt-35-turbo"])
@ -330,3 +333,8 @@ class ChatGPTAPI(Base):
            model_list = list(set(my_model_list) & set(GPT4_MODEL_LIST))
            print(f"Using model list {model_list}")
            self.model_list = cycle(model_list)
+
+    def set_model_list(self, model_list):
+        model_list = list(set(model_list))
+        print(f"Using model list {model_list}")
+        self.model_list = cycle(model_list)
--- a/book_maker/translator/claude_translator.py
+++ b/book_maker/translator/claude_translator.py
@ -1,6 +1,7 @@
 import re
-import requests
+import time
 from rich import print
+from anthropic import Anthropic

 from .base_translator import Base

@ -16,23 +17,9 @@ class Claude(Base):
        **kwargs,
    ) -> None:
        super().__init__(key, language)
-        self.api_url = (
-            f"{api_base}v1/complete"
-            if api_base
-            else "https://api.anthropic.com/v1/complete"
-        )
-        self.headers = {
-            "Content-Type": "application/json",
-            "x-api-key": key,
-        }
-        self.data = {
-            "prompt": "",
-            "model": "claude-v1.3",
-            "max_tokens_to_sample": 1024,
-            "temperature": temperature,
-            "stop_sequences": ["\n\nHuman:"],
-        }
-        self.session = requests.session()
+        self.api_url = f"{api_base}" if api_base else "https://api.anthropic.com"
+        self.client = Anthropic(base_url=api_base, api_key=key, timeout=20)
+
        self.language = language
        self.prompt_template = (
            prompt_template
@ -45,14 +32,19 @@ class Claude(Base):
    def translate(self, text):
        print(text)
        self.rotate_key()
-        self.data["prompt"] = self.prompt_template.format(
+        prompt = self.prompt_template.format(
            text=text,
            language=self.language,
        )
-        r = self.session.post(self.api_url, headers=self.headers, json=self.data)
-        if not r.ok:
-            return text
-        t_text = r.json().get("completion").strip()
+        message = [{"role": "user", "content": prompt}]
+        r = self.client.messages.create(
+            max_tokens=4096,
+            messages=message,
+            model="claude-3-haiku-20240307",  # default it for now
+        )
+        t_text = r.content[0].text
+        # api limit rate and spider rule
+        time.sleep(1)

        print("[bold green]" + re.sub("\n{3,}", "\n\n", t_text) + "[/bold green]")
        return t_text
--- a/pdm.lock
+++ b/pdm.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,47 @@
+[project]
+name = "bbook-maker"
+description = "The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist users in creating multi-language versions of epub/txt files and books."
+readme = "README.md"
+license = {text = "MIT"}
+dynamic = ["version"]
+requires-python = ">=3.9"
+authors = [
+    { name = "yihong0618", email = "zouzou0208@gmail.com" },
+]
+classifiers = [
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+]
+dependencies = [
+    "anthropic",
+    "backoff",
+    "bs4",
+    "ebooklib",
+    "google-generativeai",
+    "langdetect",
+    "litellm",
+    "openai>=1.1.1",
+    "PyDeepLX",
+    "requests",
+    "rich",
+    "tiktoken",
+    "tqdm",
+]
+
+[project.scripts]
+bbook_maker = "book_maker.cli:main"
+
+[project.urls]
+Homepage = "https://github.com/yihong0618/bilingual_book_maker"
+
+[tool.pdm]
+plugins = ["pdm-autoexport"]
+[[tool.pdm.autoexport]]
+filename = "requirements.txt"
+without-hashes = true
+[build-system]
+requires = ["pdm-backend>=2.0.0"]
+build-backend = "pdm.backend"
+[tool.pdm.version]
+source = "scm"
--- a/requirements.txt
+++ b/requirements.txt
@ -1,3 +1,81 @@
-e .
-mkdocs
-mkdocs-material
+# This file is @generated by PDM.
+# Please do not edit it manually.
+
+aiohttp==3.9.5
+aiosignal==1.3.1
+annotated-types==0.6.0
+anthropic==0.25.7
+anyio==4.3.0
+async-timeout==4.0.3; python_version < "3.11"
+attrs==23.2.0
+backoff==2.2.1
+beautifulsoup4==4.12.3
+brotli==1.1.0; platform_python_implementation == "CPython"
+brotlicffi==1.1.0.0; platform_python_implementation != "CPython"
+bs4==0.0.2
+cachetools==5.3.3
+certifi==2024.2.2
+cffi==1.16.0; platform_python_implementation != "CPython"
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6; platform_system == "Windows"
+distro==1.9.0
+ebooklib==0.18
+exceptiongroup==1.2.1; python_version < "3.11"
+filelock==3.14.0
+frozenlist==1.4.1
+fsspec==2024.3.1
+google-ai-generativelanguage==0.6.2
+google-api-core==2.19.0
+google-api-python-client==2.127.0
+google-auth==2.29.0
+google-auth-httplib2==0.2.0
+google-generativeai==0.5.2
+googleapis-common-protos==1.63.0
+grpcio==1.63.0
+grpcio-status==1.62.2
+h11==0.14.0
+httpcore==1.0.5
+httplib2==0.22.0
+httpx==0.27.0
+huggingface-hub==0.22.2
+idna==3.7
+importlib-metadata==7.1.0
+jinja2==3.1.3
+langdetect==1.0.9
+litellm==1.35.34
+lxml==5.2.1
+markdown-it-py==3.0.0
+markupsafe==2.1.5
+mdurl==0.1.2
+multidict==6.0.5
+openai==1.25.0
+packaging==24.0
+proto-plus==1.23.0
+protobuf==4.25.3
+pyasn1==0.6.0
+pyasn1-modules==0.4.0
+pycparser==2.22; platform_python_implementation != "CPython"
+pydantic==2.7.1
+pydantic-core==2.18.2
+pydeeplx==1.0.7
+pygments==2.17.2
+pyparsing==3.1.2; python_version > "3.0"
+python-dotenv==1.0.1
+pyyaml==6.0.1
+regex==2024.4.28
+requests==2.31.0
+rich==13.7.1
+rsa==4.9
+six==1.16.0
+sniffio==1.3.1
+socksio==1.0.0
+soupsieve==2.5
+tiktoken==0.6.0
+tokenizers==0.19.1
+tqdm==4.66.2
+typing-extensions==4.11.0
+uritemplate==4.1.1
+urllib3==2.2.1
+yarl==1.9.4
+zipp==3.18.1
--- a/setup.py
+++ b/setup.py
@ -1,38 +0,0 @@
-#!/usr/bin/env python3
-from setuptools import find_packages, setup
-
-packages = [
-    "bs4",
-    "openai>=1.1.1",
-    "litellm",
-    "requests",
-    "ebooklib",
-    "rich",
-    "tqdm",
-    "tiktoken",
-    "PyDeepLX",
-    "google-generativeai",
-    "langdetect",
-]
-
-
-setup(
-    name="bbook_maker",
-    description="The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist users in creating multi-language versions of epub/txt files and books.",
-    version="0.7.8",
-    license="MIT",
-    author="yihong0618",
-    author_email="zouzou0208@gmail.com",
-    packages=find_packages(),
-    url="https://github.com/yihong0618/bilingual_book_maker",
-    python_requires=">=3.8",
-    install_requires=packages,
-    classifiers=[
-        "Programming Language :: Python :: 3",
-        "License :: OSI Approved :: MIT License",
-        "Operating System :: OS Independent",
-    ],
-    entry_points={
-        "console_scripts": ["bbook_maker = book_maker.cli:main"],
-    },
-)
				`@ -0,0 +1 @@`
				`/home/yihong/use_now/bilingual_book_maker/.venv/bin/python`