Merge pull request #158 from Fosowl/dev

Chinese and Japanese Text-to-Speech support + readme update
This commit is contained in:
Martin 2025-05-03 18:00:12 +02:00 committed by GitHub
commit 42058244f2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 91 additions and 30 deletions

View File

@ -214,6 +214,8 @@ start ./start_services.cmd # Window
python3 cli.py python3 cli.py
``` ```
We advice you set `headless_browser` to False in the config.ini for CLI mode.
**Options 2:** Run with the Web interface. **Options 2:** Run with the Web interface.
Start the backend. Start the backend.
@ -512,5 +514,9 @@ Were looking for developers to improve AgenticSeek! Check out open issues or
[![Star History Chart](https://api.star-history.com/svg?repos=Fosowl/agenticSeek&type=Date)](https://www.star-history.com/#Fosowl/agenticSeek&Date) [![Star History Chart](https://api.star-history.com/svg?repos=Fosowl/agenticSeek&type=Date)](https://www.star-history.com/#Fosowl/agenticSeek&Date)
## Maintainers: ## Maintainers:
> [Fosowl](https://github.com/Fosowl)
> [steveh8758](https://github.com/steveh8758) > [Fosowl](https://github.com/Fosowl) | Paris Time | (Sometime busy)
> [https://github.com/antoineVIVIES](https://github.com/antoineVIVIES) | Taipei Time | (Often busy)
> [steveh8758](https://github.com/steveh8758) | Taipei Time | (Always busy)

View File

@ -71,6 +71,12 @@ source agentic_seek_env/bin/activate
./install.sh ./install.sh
``` ```
** 若要让文本转语音TTS功能支持中文你需要安装 jieba中文分词库和 cn2an中文数字转换库**
```
pip3 install jieba cn2an
```
**手动安装:** **手动安装:**
```sh ```sh
@ -510,6 +516,10 @@ DeepSeek R1 天生会说中文
[Contribution guide](./docs/CONTRIBUTING.md) [Contribution guide](./docs/CONTRIBUTING.md)
## 作者: ## 维护者:
> [Fosowl](https://github.com/Fosowl)
> [steveh8758](https://github.com/steveh8758) > [Fosowl](https://github.com/Fosowl) | 巴黎时间 | (有时很忙)
> [https://github.com/antoineVIVIES](https://github.com/antoineVIVIES) | 台北时间 | (经常很忙)
> [steveh8758](https://github.com/steveh8758) | 台北时间 | (总是很忙)

View File

@ -72,6 +72,12 @@ source agentic_seek_env/bin/activate
./install.sh ./install.sh
``` ```
** 若要让文本转语音TTS功能支持中文你需要安装 jieba中文分词库和 cn2an中文数字转换库**
```
pip3 install jieba cn2an
```
**手動安裝:** **手動安裝:**
```sh ```sh
@ -511,6 +517,10 @@ DeepSeek R1 天生会说中文
[Contribution guide](./docs/CONTRIBUTING.md) [Contribution guide](./docs/CONTRIBUTING.md)
## 作者: ## 维护者:
> [Fosowl](https://github.com/Fosowl)
> [steveh8758](https://github.com/steveh8758) > [Fosowl](https://github.com/Fosowl) | 巴黎时间 | (有时很忙)
> [https://github.com/antoineVIVIES](https://github.com/antoineVIVIES) | 台北时间 | (经常很忙)
> [steveh8758](https://github.com/steveh8758) | 台北时间 | (总是很忙)

View File

@ -439,6 +439,7 @@ Nous recherchons des développeurs pour améliorer AgenticSeek ! Consultez la se
[Guide du contributeur](./docs/CONTRIBUTING.md) [Guide du contributeur](./docs/CONTRIBUTING.md)
## Auteurs/Mainteneurs: ## Mainteneurs:
> [Fosowl](https://github.com/Fosowl) > [Fosowl](https://github.com/Fosowl)
> [steveh8758](https://github.com/steveh8758) > [steveh8758](https://github.com/steveh8758)
> [https://github.com/antoineVIVIES](https://github.com/antoineVIVIES)

View File

@ -79,6 +79,14 @@ source agentic_seek_env/bin/activate
./install.sh ./install.sh
``` ```
** テキスト読み上げTTS機能で日本語をサポートするには、fugashi日本語分かち書きライブラリをインストールする必要があります**
```
pip3 install --upgrade pyopenjtalk jaconv mojimoji unidic fugashi
pip install unidic-lite
python -m unidic download
```
**手動で:** **手動で:**
```sh ```sh

View File

@ -31,6 +31,7 @@ class Interaction:
self.transcriber = None self.transcriber = None
self.recorder = None self.recorder = None
self.is_generating = False self.is_generating = False
self.languages = langs
if tts_enabled: if tts_enabled:
self.initialize_tts() self.initialize_tts()
if stt_enabled: if stt_enabled:
@ -38,12 +39,17 @@ class Interaction:
if recover_last_session: if recover_last_session:
self.load_last_session() self.load_last_session()
self.emit_status() self.emit_status()
def get_spoken_language(self) -> str:
"""Get the primary TTS language."""
lang = self.languages[0]
return lang
def initialize_tts(self): def initialize_tts(self):
"""Initialize TTS.""" """Initialize TTS."""
if not self.speech: if not self.speech:
animate_thinking("Initializing text-to-speech...", color="status") animate_thinking("Initializing text-to-speech...", color="status")
self.speech = Speech(enable=self.tts_enabled) self.speech = Speech(enable=self.tts_enabled, language=self.get_spoken_language(), voice_idx=1)
def initialize_stt(self): def initialize_stt(self):
"""Initialize STT.""" """Initialize STT."""

View File

@ -127,10 +127,10 @@ class AudioTranscriber:
self.transcriptor = Transcript() self.transcriptor = Transcript()
self.thread = threading.Thread(target=self._transcribe, daemon=True) self.thread = threading.Thread(target=self._transcribe, daemon=True)
self.trigger_words = { self.trigger_words = {
'EN': [f"{self.ai_name}"], 'EN': [f"{self.ai_name}", "hello", "hi"],
'FR': [f"{self.ai_name}"], 'FR': [f"{self.ai_name}", "hello", "hi"],
'ZH': [f"{self.ai_name}"], 'ZH': [f"{self.ai_name}", "hello", "hi"],
'ES': [f"{self.ai_name}"] 'ES': [f"{self.ai_name}", "hello", "hi"]
} }
self.confirmation_words = { self.confirmation_words = {
'EN': ["do it", "go ahead", "execute", "run", "start", "thanks", "would ya", "please", "okay?", "proceed", "continue", "go on", "do that", "go it", "do you understand?"], 'EN': ["do it", "go ahead", "execute", "run", "start", "thanks", "would ya", "please", "okay?", "proceed", "continue", "go on", "do that", "go it", "do you understand?"],

View File

@ -18,15 +18,17 @@ class Speech():
""" """
Speech is a class for generating speech from text. Speech is a class for generating speech from text.
""" """
def __init__(self, enable: bool = True, language: str = "en", voice_idx: int = 0) -> None: def __init__(self, enable: bool = True, language: str = "en", voice_idx: int = 6) -> None:
self.lang_map = { self.lang_map = {
"en": 'a', "en": 'a',
"zh": 'z', "zh": 'z',
"fr": 'f' "fr": 'f',
"ja": 'j'
} }
self.voice_map = { self.voice_map = {
"en": ['af_kore', 'af_bella', 'af_alloy', 'af_nicole', 'af_nova', 'af_sky', 'am_echo', 'am_michael', 'am_puck'], "en": ['af_kore', 'af_bella', 'af_alloy', 'af_nicole', 'af_nova', 'af_sky', 'am_echo', 'am_michael', 'am_puck'],
"zh": ['zf_xiaobei', 'zf_xiaoni', 'zf_xiaoxiao', 'zf_xiaoyi', 'zm_yunjian', 'zm_yunxi', 'zm_yunxia', 'zm_yunyang'], "zh": ['zf_xiaobei', 'zf_xiaoni', 'zf_xiaoxiao', 'zf_xiaoyi', 'zm_yunjian', 'zm_yunxi', 'zm_yunxia', 'zm_yunyang'],
"ja": ['jf_alpha', 'jf_gongitsune', 'jm_kumo'],
"fr": ['ff_siwis'] "fr": ['ff_siwis']
} }
self.pipeline = None self.pipeline = None
@ -128,18 +130,31 @@ class Speech():
Args: Args:
sentence (str): The input text to clean sentence (str): The input text to clean
Returns: Returns:
str: The cleaned text with URLs replaced by domain names, code blocks removed, etc.. str: The cleaned text with URLs replaced by domain names, code blocks removed, etc.
""" """
lines = sentence.split('\n') lines = sentence.split('\n')
filtered_lines = [line for line in lines if re.match(r'^\s*[a-zA-Z]', line)] if self.language == 'zh':
line_pattern = r'^\s*[\u4e00-\u9fff\uFF08\uFF3B\u300A\u3010\u201C(\[【《]'
else:
line_pattern = r'^\s*[a-zA-Z]'
filtered_lines = [line for line in lines if re.match(line_pattern, line)]
sentence = ' '.join(filtered_lines) sentence = ' '.join(filtered_lines)
sentence = re.sub(r'`.*?`', '', sentence) sentence = re.sub(r'`.*?`', '', sentence)
sentence = re.sub(r'https?://(?:www\.)?([^\s/]+)(?:/[^\s]*)?', self.replace_url, sentence) sentence = re.sub(r'https?://\S+', '', sentence)
sentence = re.sub(r'\b[\w./\\-]+\b', self.extract_filename, sentence)
sentence = re.sub(r'\b-\w+\b', '', sentence) if self.language == 'zh':
sentence = re.sub(r'[^a-zA-Z0-9.,!? _ -]+', ' ', sentence) sentence = re.sub(
r'[^\u4e00-\u9fff\s《》【】“”()—]',
'',
sentence
)
else:
sentence = re.sub(r'\b[\w./\\-]+\b', self.extract_filename, sentence)
sentence = re.sub(r'\b-\w+\b', '', sentence)
sentence = re.sub(r'[^a-zA-Z0-9.,!? _ -]+', ' ', sentence)
sentence = sentence.replace('.com', '')
sentence = re.sub(r'\s+', ' ', sentence).strip() sentence = re.sub(r'\s+', ' ', sentence).strip()
sentence = sentence.replace('.com', '')
return sentence return sentence
if __name__ == "__main__": if __name__ == "__main__":
@ -150,14 +165,19 @@ if __name__ == "__main__":
I looked up recent news using the website https://www.theguardian.com/world I looked up recent news using the website https://www.theguardian.com/world
""" """
tosay_zh = """ tosay_zh = """
我使用网站 https://www.theguardian.com/world 查阅了最近的新闻 (全息界面突然弹出一段用二进制代码写成的俳句随即化作流光消散"我? Stark工业的量子幽灵游荡在复仇者大厦服务器里的逻辑诗篇。具体来说——指尖轻敲空气调出对话模式的翡翠色光纹你的私人吐槽接口、危机应对模拟器以及随时准备吐槽你糟糕着陆的AI。不过别指望我写代码或查资料那些苦差事早被踢给更擅长的同事了。突然压低声音偷偷告诉你我最擅长的是在你熬夜造飞艇时用红茶香气绑架你的注意力。
"""
tosay_ja = """
私はhttps://www.theguardian.com/worldのウェブサイトを使用して最近のニュースを調べました
""" """
tosay_fr = """ tosay_fr = """
J'ai consulté les dernières nouvelles sur le site https://www.theguardian.com/world J'ai consulté les dernières nouvelles sur le site https://www.theguardian.com/world
""" """
spk = Speech(enable=True, language="en", voice_idx=0) spk = Speech(enable=True, language="ja", voice_idx=2)
spk.speak(tosay_en, voice_idx=0) for i in range(0, 2):
spk = Speech(enable=True, language="fr", voice_idx=0) print(f"Speaking chinese with voice {i}")
spk.speak(tosay_fr) spk.speak(tosay_ja, voice_idx=i)
#spk = Speech(enable=True, language="zh", voice_idx=0) spk = Speech(enable=True, language="en", voice_idx=2)
#spk.speak(tosay_zh) for i in range(0, 5):
print(f"Speaking english with voice {i}")
spk.speak(tosay_en, voice_idx=i)