mirror of
https://github.com/yihong0618/bilingual_book_maker.git
synced 2025-06-08 04:25:34 +00:00
Merge branch 'yihong0618:main' into main
This commit is contained in:
commit
9de922c5b4
37
.github/workflows/release.yaml
vendored
Normal file
37
.github/workflows/release.yaml
vendored
Normal file
@ -0,0 +1,37 @@
|
||||
name: Release and Build Docker Image
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "*"
|
||||
|
||||
jobs:
|
||||
release-pypi:
|
||||
name: Build and Release PyPI
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
- uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: 16
|
||||
|
||||
- name: Build artifacts
|
||||
run: |
|
||||
pip install build
|
||||
python -m build
|
||||
|
||||
- uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
password: ${{ secrets.PYPI_API_TOKEN }}
|
||||
|
1
.pdm-python
Normal file
1
.pdm-python
Normal file
@ -0,0 +1 @@
|
||||
/home/yihong/use_now/bilingual_book_maker/.venv/bin/python
|
15
README.md
15
README.md
@ -25,8 +25,9 @@ Find more info here for using liteLLM: https://github.com/BerriAI/litellm/blob/m
|
||||
Or, just set environment variable `BBM_OPENAI_API_KEY` instead.
|
||||
- A sample book, `test_books/animal_farm.epub`, is provided for testing purposes.
|
||||
- The default underlying model is [GPT-3.5-turbo](https://openai.com/blog/introducing-chatgpt-and-whisper-apis), which is used by ChatGPT currently. Use `--model gpt4` to change the underlying model to `GPT4`.
|
||||
If using `GPT4`, you can add `--use_context` to add a context paragraph to each passage sent to the model for translation (see below)
|
||||
- support DeepL model [DeepL Translator](https://rapidapi.com/splintPRO/api/dpl-translator) need pay to get the token use `--model deepl --deepl_key ${deepl_key}`
|
||||
- Important to note that `gpt-4` is significantly more expensive than `gpt-4-turbo`, but to avoid bumping into rate limits, we automatically balance queries across `gpt-4-1106-preview`, `gpt-4`, `gpt-4-32k`, `gpt-4-0613`,`gpt-4-32k-0613`.
|
||||
- If you want to use a specific model alias with OpenAI (eg `gpt-4-1106-preview` or `gpt-3.5-turbo-0125`), you can use `--model openai --model_list gpt-4-1106-preview,gpt-3.5-turbo-0125`. `--model_list` takes a comma-separated list of model aliases.
|
||||
- If using `GPT4`, you can add `--use_context` to add a context paragraph to each passage sent to the model for translation (see below).- support DeepL model [DeepL Translator](https://rapidapi.com/splintPRO/api/dpl-translator) need pay to get the token use `--model deepl --deepl_key ${deepl_key}`
|
||||
- support DeepL free model `--model deeplfree`
|
||||
- support Google [Gemini](https://makersuite.google.com/app/apikey) model `--model gemini --gemini_key ${gemini_key}`
|
||||
- Support [Claude](https://console.anthropic.com/docs) model, use `--model claude --claude_key ${claude_key}`
|
||||
@ -83,10 +84,18 @@ export OPENAI_API_KEY=${your_api_key}
|
||||
# Use the GPT-4 model with context to Japanese
|
||||
python3 make_book.py --book_name test_books/animal_farm.epub --model gpt4 --use_context --language ja
|
||||
|
||||
# Use a specific OpenAI model alias
|
||||
python3 make_book.py --book_name test_books/animal_farm.epub --model openai --model_list gpt-4-1106-preview --openai_key ${openai_key}
|
||||
|
||||
**Note** you can use other `openai like` model in this way
|
||||
python3 make_book.py --book_name test_books/animal_farm.epub --model openai --model_list yi-34b-chat-0205 --openai_key ${openai_key} --api_base "https://api.lingyiwanwu.com/v1"
|
||||
|
||||
# Use a specific list of OpenAI model aliases
|
||||
python3 make_book.py --book_name test_books/animal_farm.epub --model openai --model_list gpt-4-1106-preview,gpt-4-0125-preview,gpt-3.5-turbo-0125 --openai_key ${openai_key}
|
||||
|
||||
# Use the DeepL model with Japanese
|
||||
python3 make_book.py --book_name test_books/animal_farm.epub --model deepl --deepl_key ${deepl_key} --language ja
|
||||
|
||||
|
||||
# Use the Claude model with Japanese
|
||||
python3 make_book.py --book_name test_books/animal_farm.epub --model claude --claude_key ${claude_key} --language ja
|
||||
|
||||
|
@ -137,6 +137,14 @@ def main():
|
||||
metavar="MODEL",
|
||||
help="model to use, available: {%(choices)s}",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ollama_model",
|
||||
dest="ollama_model",
|
||||
type=str,
|
||||
default="ollama_model",
|
||||
metavar="MODEL",
|
||||
help="use ollama",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--language",
|
||||
type=str,
|
||||
@ -275,6 +283,12 @@ So you are close to reaching the limit. You have to choose your own value, there
|
||||
default=-1,
|
||||
help="merge multiple paragraphs into one block, may increase accuracy and speed up the process, but disturb the original format, must be used with `--single_translate`",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model_list",
|
||||
type=str,
|
||||
dest="model_list",
|
||||
help="Rather than using our preset lists of models, specify exactly the models you want as a comma separated list `gpt-4-32k,gpt-3.5-turbo-0125` (Currently only supports: `openai`)",
|
||||
)
|
||||
|
||||
options = parser.parse_args()
|
||||
|
||||
@ -290,7 +304,7 @@ So you are close to reaching the limit. You have to choose your own value, there
|
||||
translate_model = MODEL_DICT.get(options.model)
|
||||
assert translate_model is not None, "unsupported model"
|
||||
API_KEY = ""
|
||||
if options.model in ["chatgptapi", "gpt4"]:
|
||||
if options.model in ["openai", "chatgptapi", "gpt4"]:
|
||||
if OPENAI_API_KEY := (
|
||||
options.openai_key
|
||||
or env.get(
|
||||
@ -302,6 +316,9 @@ So you are close to reaching the limit. You have to choose your own value, there
|
||||
):
|
||||
API_KEY = OPENAI_API_KEY
|
||||
# patch
|
||||
elif options.ollama_model:
|
||||
# any string is ok, can't be empty
|
||||
API_KEY = "ollama"
|
||||
else:
|
||||
raise Exception(
|
||||
"OpenAI API key not provided, please google how to obtain it",
|
||||
@ -359,6 +376,10 @@ So you are close to reaching the limit. You have to choose your own value, there
|
||||
# change api_base for issue #42
|
||||
model_api_base = options.api_base
|
||||
|
||||
if options.ollama_model and not model_api_base:
|
||||
# ollama default api_base
|
||||
model_api_base = "http://localhost:11434/v1"
|
||||
|
||||
e = book_loader(
|
||||
options.book_name,
|
||||
translate_model,
|
||||
@ -402,9 +423,20 @@ So you are close to reaching the limit. You have to choose your own value, there
|
||||
if not options.api_base:
|
||||
raise ValueError("`api_base` must be provided when using `deployment_id`")
|
||||
e.translate_model.set_deployment_id(options.deployment_id)
|
||||
if options.model == "openai":
|
||||
# Currently only supports `openai` when you also have --model_list set
|
||||
if options.model_list:
|
||||
e.translate_model.set_model_list(options.model_list.split(","))
|
||||
else:
|
||||
raise ValueError(
|
||||
"When using `openai` model, you must also provide `--model_list`. For default model sets use `--model chatgptapi` or `--model gpt4`",
|
||||
)
|
||||
# TODO refactor, quick fix for gpt4 model
|
||||
if options.model == "chatgptapi":
|
||||
e.translate_model.set_gpt35_models()
|
||||
if options.ollama_model:
|
||||
e.translate_model.set_gpt35_models(ollama_model=options.ollama_model)
|
||||
else:
|
||||
e.translate_model.set_gpt35_models()
|
||||
if options.model == "gpt4":
|
||||
e.translate_model.set_gpt4_models()
|
||||
if options.block_size > 0:
|
||||
|
@ -1,5 +1,10 @@
|
||||
import re
|
||||
from copy import copy
|
||||
import backoff
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.WARNING)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EPUBBookLoaderHelper:
|
||||
@ -27,13 +32,20 @@ class EPUBBookLoaderHelper:
|
||||
if single_translate:
|
||||
p.extract()
|
||||
|
||||
@backoff.on_exception(
|
||||
backoff.expo,
|
||||
Exception,
|
||||
on_backoff=lambda details: logger.warning(f"retry backoff: {details}"),
|
||||
on_giveup=lambda details: logger.warning(f"retry abort: {details}"),
|
||||
)
|
||||
def translate_with_backoff(self, **kwargs):
|
||||
return self.translate_model.translate(**kwargs)
|
||||
|
||||
def deal_new(self, p, wait_p_list, single_translate=False):
|
||||
self.deal_old(wait_p_list, single_translate, self.context_flag)
|
||||
self.insert_trans(
|
||||
p,
|
||||
shorter_result_link(
|
||||
self.translate_model.translate(p.text, self.context_flag)
|
||||
),
|
||||
shorter_result_link(self.translate_with_backoff(p.text, self.context_flag)),
|
||||
self.translation_style,
|
||||
single_translate,
|
||||
)
|
||||
|
@ -9,12 +9,13 @@ from book_maker.translator.tencent_transmart_translator import TencentTranSmart
|
||||
from book_maker.translator.custom_api_translator import CustomAPI
|
||||
|
||||
MODEL_DICT = {
|
||||
"openai": ChatGPTAPI,
|
||||
"chatgptapi": ChatGPTAPI,
|
||||
"gpt4": ChatGPTAPI,
|
||||
"google": Google,
|
||||
"caiyun": Caiyun,
|
||||
"deepl": DeepL,
|
||||
"deeplfree": DeepLFree,
|
||||
"gpt4": ChatGPTAPI,
|
||||
"claude": Claude,
|
||||
"gemini": Gemini,
|
||||
"tencentransmart": TencentTranSmart,
|
||||
|
@ -307,7 +307,10 @@ class ChatGPTAPI(Base):
|
||||
azure_deployment=self.deployment_id,
|
||||
)
|
||||
|
||||
def set_gpt35_models(self):
|
||||
def set_gpt35_models(self, ollama_model=""):
|
||||
if ollama_model:
|
||||
self.model_list = cycle([ollama_model])
|
||||
return
|
||||
# gpt3 all models for save the limit
|
||||
if self.deployment_id:
|
||||
self.model_list = cycle(["gpt-35-turbo"])
|
||||
@ -330,3 +333,8 @@ class ChatGPTAPI(Base):
|
||||
model_list = list(set(my_model_list) & set(GPT4_MODEL_LIST))
|
||||
print(f"Using model list {model_list}")
|
||||
self.model_list = cycle(model_list)
|
||||
|
||||
def set_model_list(self, model_list):
|
||||
model_list = list(set(model_list))
|
||||
print(f"Using model list {model_list}")
|
||||
self.model_list = cycle(model_list)
|
||||
|
@ -1,6 +1,7 @@
|
||||
import re
|
||||
import requests
|
||||
import time
|
||||
from rich import print
|
||||
from anthropic import Anthropic
|
||||
|
||||
from .base_translator import Base
|
||||
|
||||
@ -16,23 +17,9 @@ class Claude(Base):
|
||||
**kwargs,
|
||||
) -> None:
|
||||
super().__init__(key, language)
|
||||
self.api_url = (
|
||||
f"{api_base}v1/complete"
|
||||
if api_base
|
||||
else "https://api.anthropic.com/v1/complete"
|
||||
)
|
||||
self.headers = {
|
||||
"Content-Type": "application/json",
|
||||
"x-api-key": key,
|
||||
}
|
||||
self.data = {
|
||||
"prompt": "",
|
||||
"model": "claude-v1.3",
|
||||
"max_tokens_to_sample": 1024,
|
||||
"temperature": temperature,
|
||||
"stop_sequences": ["\n\nHuman:"],
|
||||
}
|
||||
self.session = requests.session()
|
||||
self.api_url = f"{api_base}" if api_base else "https://api.anthropic.com"
|
||||
self.client = Anthropic(base_url=api_base, api_key=key, timeout=20)
|
||||
|
||||
self.language = language
|
||||
self.prompt_template = (
|
||||
prompt_template
|
||||
@ -45,14 +32,19 @@ class Claude(Base):
|
||||
def translate(self, text):
|
||||
print(text)
|
||||
self.rotate_key()
|
||||
self.data["prompt"] = self.prompt_template.format(
|
||||
prompt = self.prompt_template.format(
|
||||
text=text,
|
||||
language=self.language,
|
||||
)
|
||||
r = self.session.post(self.api_url, headers=self.headers, json=self.data)
|
||||
if not r.ok:
|
||||
return text
|
||||
t_text = r.json().get("completion").strip()
|
||||
message = [{"role": "user", "content": prompt}]
|
||||
r = self.client.messages.create(
|
||||
max_tokens=4096,
|
||||
messages=message,
|
||||
model="claude-3-haiku-20240307", # default it for now
|
||||
)
|
||||
t_text = r.content[0].text
|
||||
# api limit rate and spider rule
|
||||
time.sleep(1)
|
||||
|
||||
print("[bold green]" + re.sub("\n{3,}", "\n\n", t_text) + "[/bold green]")
|
||||
return t_text
|
||||
|
47
pyproject.toml
Normal file
47
pyproject.toml
Normal file
@ -0,0 +1,47 @@
|
||||
[project]
|
||||
name = "bbook-maker"
|
||||
description = "The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist users in creating multi-language versions of epub/txt files and books."
|
||||
readme = "README.md"
|
||||
license = {text = "MIT"}
|
||||
dynamic = ["version"]
|
||||
requires-python = ">=3.9"
|
||||
authors = [
|
||||
{ name = "yihong0618", email = "zouzou0208@gmail.com" },
|
||||
]
|
||||
classifiers = [
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3",
|
||||
]
|
||||
dependencies = [
|
||||
"anthropic",
|
||||
"backoff",
|
||||
"bs4",
|
||||
"ebooklib",
|
||||
"google-generativeai",
|
||||
"langdetect",
|
||||
"litellm",
|
||||
"openai>=1.1.1",
|
||||
"PyDeepLX",
|
||||
"requests",
|
||||
"rich",
|
||||
"tiktoken",
|
||||
"tqdm",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
bbook_maker = "book_maker.cli:main"
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/yihong0618/bilingual_book_maker"
|
||||
|
||||
[tool.pdm]
|
||||
plugins = ["pdm-autoexport"]
|
||||
[[tool.pdm.autoexport]]
|
||||
filename = "requirements.txt"
|
||||
without-hashes = true
|
||||
[build-system]
|
||||
requires = ["pdm-backend>=2.0.0"]
|
||||
build-backend = "pdm.backend"
|
||||
[tool.pdm.version]
|
||||
source = "scm"
|
@ -1,3 +1,81 @@
|
||||
-e .
|
||||
mkdocs
|
||||
mkdocs-material
|
||||
# This file is @generated by PDM.
|
||||
# Please do not edit it manually.
|
||||
|
||||
aiohttp==3.9.5
|
||||
aiosignal==1.3.1
|
||||
annotated-types==0.6.0
|
||||
anthropic==0.25.7
|
||||
anyio==4.3.0
|
||||
async-timeout==4.0.3; python_version < "3.11"
|
||||
attrs==23.2.0
|
||||
backoff==2.2.1
|
||||
beautifulsoup4==4.12.3
|
||||
brotli==1.1.0; platform_python_implementation == "CPython"
|
||||
brotlicffi==1.1.0.0; platform_python_implementation != "CPython"
|
||||
bs4==0.0.2
|
||||
cachetools==5.3.3
|
||||
certifi==2024.2.2
|
||||
cffi==1.16.0; platform_python_implementation != "CPython"
|
||||
charset-normalizer==3.3.2
|
||||
click==8.1.7
|
||||
colorama==0.4.6; platform_system == "Windows"
|
||||
distro==1.9.0
|
||||
ebooklib==0.18
|
||||
exceptiongroup==1.2.1; python_version < "3.11"
|
||||
filelock==3.14.0
|
||||
frozenlist==1.4.1
|
||||
fsspec==2024.3.1
|
||||
google-ai-generativelanguage==0.6.2
|
||||
google-api-core==2.19.0
|
||||
google-api-python-client==2.127.0
|
||||
google-auth==2.29.0
|
||||
google-auth-httplib2==0.2.0
|
||||
google-generativeai==0.5.2
|
||||
googleapis-common-protos==1.63.0
|
||||
grpcio==1.63.0
|
||||
grpcio-status==1.62.2
|
||||
h11==0.14.0
|
||||
httpcore==1.0.5
|
||||
httplib2==0.22.0
|
||||
httpx==0.27.0
|
||||
huggingface-hub==0.22.2
|
||||
idna==3.7
|
||||
importlib-metadata==7.1.0
|
||||
jinja2==3.1.3
|
||||
langdetect==1.0.9
|
||||
litellm==1.35.34
|
||||
lxml==5.2.1
|
||||
markdown-it-py==3.0.0
|
||||
markupsafe==2.1.5
|
||||
mdurl==0.1.2
|
||||
multidict==6.0.5
|
||||
openai==1.25.0
|
||||
packaging==24.0
|
||||
proto-plus==1.23.0
|
||||
protobuf==4.25.3
|
||||
pyasn1==0.6.0
|
||||
pyasn1-modules==0.4.0
|
||||
pycparser==2.22; platform_python_implementation != "CPython"
|
||||
pydantic==2.7.1
|
||||
pydantic-core==2.18.2
|
||||
pydeeplx==1.0.7
|
||||
pygments==2.17.2
|
||||
pyparsing==3.1.2; python_version > "3.0"
|
||||
python-dotenv==1.0.1
|
||||
pyyaml==6.0.1
|
||||
regex==2024.4.28
|
||||
requests==2.31.0
|
||||
rich==13.7.1
|
||||
rsa==4.9
|
||||
six==1.16.0
|
||||
sniffio==1.3.1
|
||||
socksio==1.0.0
|
||||
soupsieve==2.5
|
||||
tiktoken==0.6.0
|
||||
tokenizers==0.19.1
|
||||
tqdm==4.66.2
|
||||
typing-extensions==4.11.0
|
||||
uritemplate==4.1.1
|
||||
urllib3==2.2.1
|
||||
yarl==1.9.4
|
||||
zipp==3.18.1
|
||||
|
38
setup.py
38
setup.py
@ -1,38 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
from setuptools import find_packages, setup
|
||||
|
||||
packages = [
|
||||
"bs4",
|
||||
"openai>=1.1.1",
|
||||
"litellm",
|
||||
"requests",
|
||||
"ebooklib",
|
||||
"rich",
|
||||
"tqdm",
|
||||
"tiktoken",
|
||||
"PyDeepLX",
|
||||
"google-generativeai",
|
||||
"langdetect",
|
||||
]
|
||||
|
||||
|
||||
setup(
|
||||
name="bbook_maker",
|
||||
description="The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist users in creating multi-language versions of epub/txt files and books.",
|
||||
version="0.7.8",
|
||||
license="MIT",
|
||||
author="yihong0618",
|
||||
author_email="zouzou0208@gmail.com",
|
||||
packages=find_packages(),
|
||||
url="https://github.com/yihong0618/bilingual_book_maker",
|
||||
python_requires=">=3.8",
|
||||
install_requires=packages,
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
],
|
||||
entry_points={
|
||||
"console_scripts": ["bbook_maker = book_maker.cli:main"],
|
||||
},
|
||||
)
|
Loading…
x
Reference in New Issue
Block a user