Merge branch 'yihong0618:main' into main

This commit is contained in:
umm 2024-05-05 11:52:02 +08:00 committed by GitHub
commit 9de922c5b4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 2204 additions and 74 deletions

37
.github/workflows/release.yaml vendored Normal file
View File

@ -0,0 +1,37 @@
name: Release and Build Docker Image
permissions:
contents: write
on:
push:
tags:
- "*"
jobs:
release-pypi:
name: Build and Release PyPI
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- uses: actions/setup-python@v4
with:
python-version: "3.10"
- uses: actions/setup-node@v3
with:
node-version: 16
- name: Build artifacts
run: |
pip install build
python -m build
- uses: pypa/gh-action-pypi-publish@release/v1
with:
password: ${{ secrets.PYPI_API_TOKEN }}

1
.pdm-python Normal file
View File

@ -0,0 +1 @@
/home/yihong/use_now/bilingual_book_maker/.venv/bin/python

View File

@ -25,8 +25,9 @@ Find more info here for using liteLLM: https://github.com/BerriAI/litellm/blob/m
Or, just set environment variable `BBM_OPENAI_API_KEY` instead.
- A sample book, `test_books/animal_farm.epub`, is provided for testing purposes.
- The default underlying model is [GPT-3.5-turbo](https://openai.com/blog/introducing-chatgpt-and-whisper-apis), which is used by ChatGPT currently. Use `--model gpt4` to change the underlying model to `GPT4`.
If using `GPT4`, you can add `--use_context` to add a context paragraph to each passage sent to the model for translation (see below)
- support DeepL model [DeepL Translator](https://rapidapi.com/splintPRO/api/dpl-translator) need pay to get the token use `--model deepl --deepl_key ${deepl_key}`
- Important to note that `gpt-4` is significantly more expensive than `gpt-4-turbo`, but to avoid bumping into rate limits, we automatically balance queries across `gpt-4-1106-preview`, `gpt-4`, `gpt-4-32k`, `gpt-4-0613`,`gpt-4-32k-0613`.
- If you want to use a specific model alias with OpenAI (eg `gpt-4-1106-preview` or `gpt-3.5-turbo-0125`), you can use `--model openai --model_list gpt-4-1106-preview,gpt-3.5-turbo-0125`. `--model_list` takes a comma-separated list of model aliases.
- If using `GPT4`, you can add `--use_context` to add a context paragraph to each passage sent to the model for translation (see below).- support DeepL model [DeepL Translator](https://rapidapi.com/splintPRO/api/dpl-translator) need pay to get the token use `--model deepl --deepl_key ${deepl_key}`
- support DeepL free model `--model deeplfree`
- support Google [Gemini](https://makersuite.google.com/app/apikey) model `--model gemini --gemini_key ${gemini_key}`
- Support [Claude](https://console.anthropic.com/docs) model, use `--model claude --claude_key ${claude_key}`
@ -83,10 +84,18 @@ export OPENAI_API_KEY=${your_api_key}
# Use the GPT-4 model with context to Japanese
python3 make_book.py --book_name test_books/animal_farm.epub --model gpt4 --use_context --language ja
# Use a specific OpenAI model alias
python3 make_book.py --book_name test_books/animal_farm.epub --model openai --model_list gpt-4-1106-preview --openai_key ${openai_key}
**Note** you can use other `openai like` model in this way
python3 make_book.py --book_name test_books/animal_farm.epub --model openai --model_list yi-34b-chat-0205 --openai_key ${openai_key} --api_base "https://api.lingyiwanwu.com/v1"
# Use a specific list of OpenAI model aliases
python3 make_book.py --book_name test_books/animal_farm.epub --model openai --model_list gpt-4-1106-preview,gpt-4-0125-preview,gpt-3.5-turbo-0125 --openai_key ${openai_key}
# Use the DeepL model with Japanese
python3 make_book.py --book_name test_books/animal_farm.epub --model deepl --deepl_key ${deepl_key} --language ja
# Use the Claude model with Japanese
python3 make_book.py --book_name test_books/animal_farm.epub --model claude --claude_key ${claude_key} --language ja

View File

@ -137,6 +137,14 @@ def main():
metavar="MODEL",
help="model to use, available: {%(choices)s}",
)
parser.add_argument(
"--ollama_model",
dest="ollama_model",
type=str,
default="ollama_model",
metavar="MODEL",
help="use ollama",
)
parser.add_argument(
"--language",
type=str,
@ -275,6 +283,12 @@ So you are close to reaching the limit. You have to choose your own value, there
default=-1,
help="merge multiple paragraphs into one block, may increase accuracy and speed up the process, but disturb the original format, must be used with `--single_translate`",
)
parser.add_argument(
"--model_list",
type=str,
dest="model_list",
help="Rather than using our preset lists of models, specify exactly the models you want as a comma separated list `gpt-4-32k,gpt-3.5-turbo-0125` (Currently only supports: `openai`)",
)
options = parser.parse_args()
@ -290,7 +304,7 @@ So you are close to reaching the limit. You have to choose your own value, there
translate_model = MODEL_DICT.get(options.model)
assert translate_model is not None, "unsupported model"
API_KEY = ""
if options.model in ["chatgptapi", "gpt4"]:
if options.model in ["openai", "chatgptapi", "gpt4"]:
if OPENAI_API_KEY := (
options.openai_key
or env.get(
@ -302,6 +316,9 @@ So you are close to reaching the limit. You have to choose your own value, there
):
API_KEY = OPENAI_API_KEY
# patch
elif options.ollama_model:
# any string is ok, can't be empty
API_KEY = "ollama"
else:
raise Exception(
"OpenAI API key not provided, please google how to obtain it",
@ -359,6 +376,10 @@ So you are close to reaching the limit. You have to choose your own value, there
# change api_base for issue #42
model_api_base = options.api_base
if options.ollama_model and not model_api_base:
# ollama default api_base
model_api_base = "http://localhost:11434/v1"
e = book_loader(
options.book_name,
translate_model,
@ -402,9 +423,20 @@ So you are close to reaching the limit. You have to choose your own value, there
if not options.api_base:
raise ValueError("`api_base` must be provided when using `deployment_id`")
e.translate_model.set_deployment_id(options.deployment_id)
if options.model == "openai":
# Currently only supports `openai` when you also have --model_list set
if options.model_list:
e.translate_model.set_model_list(options.model_list.split(","))
else:
raise ValueError(
"When using `openai` model, you must also provide `--model_list`. For default model sets use `--model chatgptapi` or `--model gpt4`",
)
# TODO refactor, quick fix for gpt4 model
if options.model == "chatgptapi":
e.translate_model.set_gpt35_models()
if options.ollama_model:
e.translate_model.set_gpt35_models(ollama_model=options.ollama_model)
else:
e.translate_model.set_gpt35_models()
if options.model == "gpt4":
e.translate_model.set_gpt4_models()
if options.block_size > 0:

View File

@ -1,5 +1,10 @@
import re
from copy import copy
import backoff
import logging
logging.basicConfig(level=logging.WARNING)
logger = logging.getLogger(__name__)
class EPUBBookLoaderHelper:
@ -27,13 +32,20 @@ class EPUBBookLoaderHelper:
if single_translate:
p.extract()
@backoff.on_exception(
backoff.expo,
Exception,
on_backoff=lambda details: logger.warning(f"retry backoff: {details}"),
on_giveup=lambda details: logger.warning(f"retry abort: {details}"),
)
def translate_with_backoff(self, **kwargs):
return self.translate_model.translate(**kwargs)
def deal_new(self, p, wait_p_list, single_translate=False):
self.deal_old(wait_p_list, single_translate, self.context_flag)
self.insert_trans(
p,
shorter_result_link(
self.translate_model.translate(p.text, self.context_flag)
),
shorter_result_link(self.translate_with_backoff(p.text, self.context_flag)),
self.translation_style,
single_translate,
)

View File

@ -9,12 +9,13 @@ from book_maker.translator.tencent_transmart_translator import TencentTranSmart
from book_maker.translator.custom_api_translator import CustomAPI
MODEL_DICT = {
"openai": ChatGPTAPI,
"chatgptapi": ChatGPTAPI,
"gpt4": ChatGPTAPI,
"google": Google,
"caiyun": Caiyun,
"deepl": DeepL,
"deeplfree": DeepLFree,
"gpt4": ChatGPTAPI,
"claude": Claude,
"gemini": Gemini,
"tencentransmart": TencentTranSmart,

View File

@ -307,7 +307,10 @@ class ChatGPTAPI(Base):
azure_deployment=self.deployment_id,
)
def set_gpt35_models(self):
def set_gpt35_models(self, ollama_model=""):
if ollama_model:
self.model_list = cycle([ollama_model])
return
# gpt3 all models for save the limit
if self.deployment_id:
self.model_list = cycle(["gpt-35-turbo"])
@ -330,3 +333,8 @@ class ChatGPTAPI(Base):
model_list = list(set(my_model_list) & set(GPT4_MODEL_LIST))
print(f"Using model list {model_list}")
self.model_list = cycle(model_list)
def set_model_list(self, model_list):
model_list = list(set(model_list))
print(f"Using model list {model_list}")
self.model_list = cycle(model_list)

View File

@ -1,6 +1,7 @@
import re
import requests
import time
from rich import print
from anthropic import Anthropic
from .base_translator import Base
@ -16,23 +17,9 @@ class Claude(Base):
**kwargs,
) -> None:
super().__init__(key, language)
self.api_url = (
f"{api_base}v1/complete"
if api_base
else "https://api.anthropic.com/v1/complete"
)
self.headers = {
"Content-Type": "application/json",
"x-api-key": key,
}
self.data = {
"prompt": "",
"model": "claude-v1.3",
"max_tokens_to_sample": 1024,
"temperature": temperature,
"stop_sequences": ["\n\nHuman:"],
}
self.session = requests.session()
self.api_url = f"{api_base}" if api_base else "https://api.anthropic.com"
self.client = Anthropic(base_url=api_base, api_key=key, timeout=20)
self.language = language
self.prompt_template = (
prompt_template
@ -45,14 +32,19 @@ class Claude(Base):
def translate(self, text):
print(text)
self.rotate_key()
self.data["prompt"] = self.prompt_template.format(
prompt = self.prompt_template.format(
text=text,
language=self.language,
)
r = self.session.post(self.api_url, headers=self.headers, json=self.data)
if not r.ok:
return text
t_text = r.json().get("completion").strip()
message = [{"role": "user", "content": prompt}]
r = self.client.messages.create(
max_tokens=4096,
messages=message,
model="claude-3-haiku-20240307", # default it for now
)
t_text = r.content[0].text
# api limit rate and spider rule
time.sleep(1)
print("[bold green]" + re.sub("\n{3,}", "\n\n", t_text) + "[/bold green]")
return t_text

1951
pdm.lock generated Normal file

File diff suppressed because it is too large Load Diff

47
pyproject.toml Normal file
View File

@ -0,0 +1,47 @@
[project]
name = "bbook-maker"
description = "The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist users in creating multi-language versions of epub/txt files and books."
readme = "README.md"
license = {text = "MIT"}
dynamic = ["version"]
requires-python = ">=3.9"
authors = [
{ name = "yihong0618", email = "zouzou0208@gmail.com" },
]
classifiers = [
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
]
dependencies = [
"anthropic",
"backoff",
"bs4",
"ebooklib",
"google-generativeai",
"langdetect",
"litellm",
"openai>=1.1.1",
"PyDeepLX",
"requests",
"rich",
"tiktoken",
"tqdm",
]
[project.scripts]
bbook_maker = "book_maker.cli:main"
[project.urls]
Homepage = "https://github.com/yihong0618/bilingual_book_maker"
[tool.pdm]
plugins = ["pdm-autoexport"]
[[tool.pdm.autoexport]]
filename = "requirements.txt"
without-hashes = true
[build-system]
requires = ["pdm-backend>=2.0.0"]
build-backend = "pdm.backend"
[tool.pdm.version]
source = "scm"

View File

@ -1,3 +1,81 @@
-e .
mkdocs
mkdocs-material
# This file is @generated by PDM.
# Please do not edit it manually.
aiohttp==3.9.5
aiosignal==1.3.1
annotated-types==0.6.0
anthropic==0.25.7
anyio==4.3.0
async-timeout==4.0.3; python_version < "3.11"
attrs==23.2.0
backoff==2.2.1
beautifulsoup4==4.12.3
brotli==1.1.0; platform_python_implementation == "CPython"
brotlicffi==1.1.0.0; platform_python_implementation != "CPython"
bs4==0.0.2
cachetools==5.3.3
certifi==2024.2.2
cffi==1.16.0; platform_python_implementation != "CPython"
charset-normalizer==3.3.2
click==8.1.7
colorama==0.4.6; platform_system == "Windows"
distro==1.9.0
ebooklib==0.18
exceptiongroup==1.2.1; python_version < "3.11"
filelock==3.14.0
frozenlist==1.4.1
fsspec==2024.3.1
google-ai-generativelanguage==0.6.2
google-api-core==2.19.0
google-api-python-client==2.127.0
google-auth==2.29.0
google-auth-httplib2==0.2.0
google-generativeai==0.5.2
googleapis-common-protos==1.63.0
grpcio==1.63.0
grpcio-status==1.62.2
h11==0.14.0
httpcore==1.0.5
httplib2==0.22.0
httpx==0.27.0
huggingface-hub==0.22.2
idna==3.7
importlib-metadata==7.1.0
jinja2==3.1.3
langdetect==1.0.9
litellm==1.35.34
lxml==5.2.1
markdown-it-py==3.0.0
markupsafe==2.1.5
mdurl==0.1.2
multidict==6.0.5
openai==1.25.0
packaging==24.0
proto-plus==1.23.0
protobuf==4.25.3
pyasn1==0.6.0
pyasn1-modules==0.4.0
pycparser==2.22; platform_python_implementation != "CPython"
pydantic==2.7.1
pydantic-core==2.18.2
pydeeplx==1.0.7
pygments==2.17.2
pyparsing==3.1.2; python_version > "3.0"
python-dotenv==1.0.1
pyyaml==6.0.1
regex==2024.4.28
requests==2.31.0
rich==13.7.1
rsa==4.9
six==1.16.0
sniffio==1.3.1
socksio==1.0.0
soupsieve==2.5
tiktoken==0.6.0
tokenizers==0.19.1
tqdm==4.66.2
typing-extensions==4.11.0
uritemplate==4.1.1
urllib3==2.2.1
yarl==1.9.4
zipp==3.18.1

View File

@ -1,38 +0,0 @@
#!/usr/bin/env python3
from setuptools import find_packages, setup
packages = [
"bs4",
"openai>=1.1.1",
"litellm",
"requests",
"ebooklib",
"rich",
"tqdm",
"tiktoken",
"PyDeepLX",
"google-generativeai",
"langdetect",
]
setup(
name="bbook_maker",
description="The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist users in creating multi-language versions of epub/txt files and books.",
version="0.7.8",
license="MIT",
author="yihong0618",
author_email="zouzou0208@gmail.com",
packages=find_packages(),
url="https://github.com/yihong0618/bilingual_book_maker",
python_requires=">=3.8",
install_requires=packages,
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
entry_points={
"console_scripts": ["bbook_maker = book_maker.cli:main"],
},
)