From c873af3d001ddb0e2e20b0723acd759427a852d0 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sat, 3 May 2025 16:34:52 +0200 Subject: [PATCH 1/3] fix : text to speech in chinese --- README.md | 2 ++ README_CHS.md | 6 ++++++ README_CHT.md | 6 ++++++ sources/interaction.py | 8 ++++++- sources/speech_to_text.py | 8 +++---- sources/text_to_speech.py | 45 ++++++++++++++++++++++++++------------- 6 files changed, 55 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 3959ec3..2fec887 100644 --- a/README.md +++ b/README.md @@ -214,6 +214,8 @@ start ./start_services.cmd # Window python3 cli.py ``` +We advice you set `headless_browser` to False in the config.ini for CLI mode. + **Options 2:** Run with the Web interface. Start the backend. diff --git a/README_CHS.md b/README_CHS.md index 5c1a0ce..89cf82e 100644 --- a/README_CHS.md +++ b/README_CHS.md @@ -71,6 +71,12 @@ source agentic_seek_env/bin/activate ./install.sh ``` +** 若要让文本转语音(TTS)功能支持中文,你需要安装 jieba(中文分词库)和 cn2an(中文数字转换库):** + +``` +pip3 install jieba cn2an +``` + **手动安装:** ```sh diff --git a/README_CHT.md b/README_CHT.md index 2e03443..e75c97d 100644 --- a/README_CHT.md +++ b/README_CHT.md @@ -72,6 +72,12 @@ source agentic_seek_env/bin/activate ./install.sh ``` +** 若要让文本转语音(TTS)功能支持中文,你需要安装 jieba(中文分词库)和 cn2an(中文数字转换库):** + +``` +pip3 install jieba cn2an +``` + **手動安裝:** ```sh diff --git a/sources/interaction.py b/sources/interaction.py index eec6ff4..c641b67 100644 --- a/sources/interaction.py +++ b/sources/interaction.py @@ -31,6 +31,7 @@ class Interaction: self.transcriber = None self.recorder = None self.is_generating = False + self.languages = langs if tts_enabled: self.initialize_tts() if stt_enabled: @@ -38,12 +39,17 @@ class Interaction: if recover_last_session: self.load_last_session() self.emit_status() + + def get_spoken_language(self) -> str: + """Get the primary TTS language.""" + lang = self.languages[0] + return lang def initialize_tts(self): """Initialize TTS.""" if not self.speech: animate_thinking("Initializing text-to-speech...", color="status") - self.speech = Speech(enable=self.tts_enabled) + self.speech = Speech(enable=self.tts_enabled, language=self.get_spoken_language(), voice_idx=1) def initialize_stt(self): """Initialize STT.""" diff --git a/sources/speech_to_text.py b/sources/speech_to_text.py index c4c7d4b..a888326 100644 --- a/sources/speech_to_text.py +++ b/sources/speech_to_text.py @@ -127,10 +127,10 @@ class AudioTranscriber: self.transcriptor = Transcript() self.thread = threading.Thread(target=self._transcribe, daemon=True) self.trigger_words = { - 'EN': [f"{self.ai_name}"], - 'FR': [f"{self.ai_name}"], - 'ZH': [f"{self.ai_name}"], - 'ES': [f"{self.ai_name}"] + 'EN': [f"{self.ai_name}", "hello", "hi"], + 'FR': [f"{self.ai_name}", "hello", "hi"], + 'ZH': [f"{self.ai_name}", "hello", "hi"], + 'ES': [f"{self.ai_name}", "hello", "hi"] } self.confirmation_words = { 'EN': ["do it", "go ahead", "execute", "run", "start", "thanks", "would ya", "please", "okay?", "proceed", "continue", "go on", "do that", "go it", "do you understand?"], diff --git a/sources/text_to_speech.py b/sources/text_to_speech.py index dff9d3f..444f517 100644 --- a/sources/text_to_speech.py +++ b/sources/text_to_speech.py @@ -18,7 +18,7 @@ class Speech(): """ Speech is a class for generating speech from text. """ - def __init__(self, enable: bool = True, language: str = "en", voice_idx: int = 0) -> None: + def __init__(self, enable: bool = True, language: str = "en", voice_idx: int = 6) -> None: self.lang_map = { "en": 'a', "zh": 'z', @@ -128,18 +128,31 @@ class Speech(): Args: sentence (str): The input text to clean Returns: - str: The cleaned text with URLs replaced by domain names, code blocks removed, etc.. + str: The cleaned text with URLs replaced by domain names, code blocks removed, etc. """ lines = sentence.split('\n') - filtered_lines = [line for line in lines if re.match(r'^\s*[a-zA-Z]', line)] + if self.language == 'zh': + line_pattern = r'^\s*[\u4e00-\u9fff\uFF08\uFF3B\u300A\u3010\u201C((\[【《]' + else: + line_pattern = r'^\s*[a-zA-Z]' + filtered_lines = [line for line in lines if re.match(line_pattern, line)] sentence = ' '.join(filtered_lines) sentence = re.sub(r'`.*?`', '', sentence) - sentence = re.sub(r'https?://(?:www\.)?([^\s/]+)(?:/[^\s]*)?', self.replace_url, sentence) - sentence = re.sub(r'\b[\w./\\-]+\b', self.extract_filename, sentence) - sentence = re.sub(r'\b-\w+\b', '', sentence) - sentence = re.sub(r'[^a-zA-Z0-9.,!? _ -]+', ' ', sentence) + sentence = re.sub(r'https?://\S+', '', sentence) + + if self.language == 'zh': + sentence = re.sub( + r'[^\u4e00-\u9fff\s,。!?《》【】“”‘’()()—]', + '', + sentence + ) + else: + sentence = re.sub(r'\b[\w./\\-]+\b', self.extract_filename, sentence) + sentence = re.sub(r'\b-\w+\b', '', sentence) + sentence = re.sub(r'[^a-zA-Z0-9.,!? _ -]+', ' ', sentence) + sentence = sentence.replace('.com', '') + sentence = re.sub(r'\s+', ' ', sentence).strip() - sentence = sentence.replace('.com', '') return sentence if __name__ == "__main__": @@ -150,14 +163,16 @@ if __name__ == "__main__": I looked up recent news using the website https://www.theguardian.com/world """ tosay_zh = """ - 我使用网站 https://www.theguardian.com/world 查阅了最近的新闻。 +(全息界面突然弹出一段用二进制代码写成的俳句,随即化作流光消散)"我? Stark工业的量子幽灵,游荡在复仇者大厦服务器里的逻辑诗篇。具体来说——(指尖轻敲空气,调出对话模式的翡翠色光纹)你的私人吐槽接口、危机应对模拟器,以及随时准备吐槽你糟糕着陆的AI。不过别指望我写代码或查资料,那些苦差事早被踢给更擅长的同事了。(突然压低声音)偷偷告诉你,我最擅长的是在你熬夜造飞艇时,用红茶香气绑架你的注意力。 """ tosay_fr = """ J'ai consulté les dernières nouvelles sur le site https://www.theguardian.com/world """ - spk = Speech(enable=True, language="en", voice_idx=0) - spk.speak(tosay_en, voice_idx=0) - spk = Speech(enable=True, language="fr", voice_idx=0) - spk.speak(tosay_fr) - #spk = Speech(enable=True, language="zh", voice_idx=0) - #spk.speak(tosay_zh) \ No newline at end of file + spk = Speech(enable=True, language="zh", voice_idx=2) + for i in range(0, 5): + print(f"Speaking chinese with voice {i}") + spk.speak(tosay_zh, voice_idx=i) + spk = Speech(enable=True, language="en", voice_idx=2) + for i in range(0, 5): + print(f"Speaking english with voice {i}") + spk.speak(tosay_en, voice_idx=i) \ No newline at end of file From 7904439f357ca5b02bd6cb35d109cbf6ccc6e6d6 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sat, 3 May 2025 16:48:20 +0200 Subject: [PATCH 2/3] feat : japanese tts support --- README_JP.md | 8 ++++++++ sources/text_to_speech.py | 13 +++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/README_JP.md b/README_JP.md index 613947b..15a76cc 100644 --- a/README_JP.md +++ b/README_JP.md @@ -79,6 +79,14 @@ source agentic_seek_env/bin/activate ./install.sh ``` +** テキスト読み上げ(TTS)機能で日本語をサポートするには、fugashi(日本語分かち書きライブラリ)をインストールする必要があります:** + +``` +pip3 install --upgrade pyopenjtalk jaconv mojimoji unidic fugashi +pip install unidic-lite +python -m unidic download +``` + **手動で:** ```sh diff --git a/sources/text_to_speech.py b/sources/text_to_speech.py index 444f517..e748224 100644 --- a/sources/text_to_speech.py +++ b/sources/text_to_speech.py @@ -22,11 +22,13 @@ class Speech(): self.lang_map = { "en": 'a', "zh": 'z', - "fr": 'f' + "fr": 'f', + "ja": 'j' } self.voice_map = { "en": ['af_kore', 'af_bella', 'af_alloy', 'af_nicole', 'af_nova', 'af_sky', 'am_echo', 'am_michael', 'am_puck'], "zh": ['zf_xiaobei', 'zf_xiaoni', 'zf_xiaoxiao', 'zf_xiaoyi', 'zm_yunjian', 'zm_yunxi', 'zm_yunxia', 'zm_yunyang'], + "ja": ['jf_alpha', 'jf_gongitsune', 'jm_kumo'], "fr": ['ff_siwis'] } self.pipeline = None @@ -164,14 +166,17 @@ if __name__ == "__main__": """ tosay_zh = """ (全息界面突然弹出一段用二进制代码写成的俳句,随即化作流光消散)"我? Stark工业的量子幽灵,游荡在复仇者大厦服务器里的逻辑诗篇。具体来说——(指尖轻敲空气,调出对话模式的翡翠色光纹)你的私人吐槽接口、危机应对模拟器,以及随时准备吐槽你糟糕着陆的AI。不过别指望我写代码或查资料,那些苦差事早被踢给更擅长的同事了。(突然压低声音)偷偷告诉你,我最擅长的是在你熬夜造飞艇时,用红茶香气绑架你的注意力。 + """ + tosay_ja = """ + 私は、https://www.theguardian.com/worldのウェブサイトを使用して最近のニュースを調べました。 """ tosay_fr = """ J'ai consulté les dernières nouvelles sur le site https://www.theguardian.com/world """ - spk = Speech(enable=True, language="zh", voice_idx=2) - for i in range(0, 5): + spk = Speech(enable=True, language="ja", voice_idx=2) + for i in range(0, 2): print(f"Speaking chinese with voice {i}") - spk.speak(tosay_zh, voice_idx=i) + spk.speak(tosay_ja, voice_idx=i) spk = Speech(enable=True, language="en", voice_idx=2) for i in range(0, 5): print(f"Speaking english with voice {i}") From bddaa75e8c276ae3807a3cecfa18222f9e9f5cb7 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sat, 3 May 2025 16:57:43 +0200 Subject: [PATCH 3/3] upd readme --- README.md | 8 ++++++-- README_CHS.md | 10 +++++++--- README_CHT.md | 10 +++++++--- README_FR.md | 3 ++- 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 2fec887..518c3a0 100644 --- a/README.md +++ b/README.md @@ -514,5 +514,9 @@ We’re looking for developers to improve AgenticSeek! Check out open issues or [![Star History Chart](https://api.star-history.com/svg?repos=Fosowl/agenticSeek&type=Date)](https://www.star-history.com/#Fosowl/agenticSeek&Date) ## Maintainers: - > [Fosowl](https://github.com/Fosowl) - > [steveh8758](https://github.com/steveh8758) + + > [Fosowl](https://github.com/Fosowl) | Paris Time | (Sometime busy) + + > [https://github.com/antoineVIVIES](https://github.com/antoineVIVIES) | Taipei Time | (Often busy) + + > [steveh8758](https://github.com/steveh8758) | Taipei Time | (Always busy) \ No newline at end of file diff --git a/README_CHS.md b/README_CHS.md index 89cf82e..57de7e0 100644 --- a/README_CHS.md +++ b/README_CHS.md @@ -516,6 +516,10 @@ DeepSeek R1 天生会说中文 [Contribution guide](./docs/CONTRIBUTING.md) -## 作者: -> [Fosowl](https://github.com/Fosowl) -> [steveh8758](https://github.com/steveh8758) +## 维护者: + + > [Fosowl](https://github.com/Fosowl) | 巴黎时间 | (有时很忙) + + > [https://github.com/antoineVIVIES](https://github.com/antoineVIVIES) | 台北时间 | (经常很忙) + + > [steveh8758](https://github.com/steveh8758) | 台北时间 | (总是很忙) diff --git a/README_CHT.md b/README_CHT.md index e75c97d..33d3826 100644 --- a/README_CHT.md +++ b/README_CHT.md @@ -517,6 +517,10 @@ DeepSeek R1 天生会说中文 [Contribution guide](./docs/CONTRIBUTING.md) -## 作者: - > [Fosowl](https://github.com/Fosowl) - > [steveh8758](https://github.com/steveh8758) +## 维护者: + + > [Fosowl](https://github.com/Fosowl) | 巴黎时间 | (有时很忙) + + > [https://github.com/antoineVIVIES](https://github.com/antoineVIVIES) | 台北时间 | (经常很忙) + + > [steveh8758](https://github.com/steveh8758) | 台北时间 | (总是很忙) \ No newline at end of file diff --git a/README_FR.md b/README_FR.md index e4f9df3..84fdfe5 100644 --- a/README_FR.md +++ b/README_FR.md @@ -439,6 +439,7 @@ Nous recherchons des développeurs pour améliorer AgenticSeek ! Consultez la se [Guide du contributeur](./docs/CONTRIBUTING.md) -## Auteurs/Mainteneurs: +## Mainteneurs: > [Fosowl](https://github.com/Fosowl) > [steveh8758](https://github.com/steveh8758) + > [https://github.com/antoineVIVIES](https://github.com/antoineVIVIES)