Merge pull request #159 from Fosowl/dev

Fixed tts not working with web interface
This commit is contained in:
Martin 2025-05-03 19:23:16 +02:00 committed by GitHub
commit 9707dbcbf9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 19 additions and 7 deletions

7
api.py
View File

@ -151,9 +151,8 @@ async def get_latest_answer():
return JSONResponse(status_code=200, content=query_resp_history[-1])
return JSONResponse(status_code=404, content={"error": "No answer available"})
async def think_wrapper(interaction, query, tts_enabled):
async def think_wrapper(interaction, query):
try:
interaction.tts_enabled = tts_enabled
interaction.last_query = query
logger.info("Agents request is being processed")
success = await interaction.think()
@ -162,6 +161,8 @@ async def think_wrapper(interaction, query, tts_enabled):
interaction.last_success = False
else:
interaction.last_success = True
pretty_print(interaction.last_answer)
interaction.speak_answer()
return success
except Exception as e:
logger.error(f"Error in think_wrapper: {str(e)}")
@ -188,7 +189,7 @@ async def process_query(request: QueryRequest):
try:
is_generating = True
success = await think_wrapper(interaction, request.query, request.tts_enabled)
success = await think_wrapper(interaction, request.query)
is_generating = False
if not success:

1
cli.py
View File

@ -62,6 +62,7 @@ async def main():
interaction.get_user()
if await interaction.think():
interaction.show_answer()
interaction.speak_answer()
except Exception as e:
if config.getboolean('MAIN', 'save_session'):
interaction.save_session()

View File

@ -250,6 +250,7 @@ class BrowserAgent(Agent):
Expand on the finding or step that lead to success, and provide a conclusion that answer the request. Include link when possible.
Do not give advices or try to answer the human. Just structure the AI finding in a structured and clear way.
You should answer in the same language as the user.
"""
def search_prompt(self, user_prompt: str) -> str:

View File

@ -5,6 +5,7 @@ from sources.text_to_speech import Speech
from sources.utility import pretty_print, animate_thinking
from sources.router import AgentRouter
from sources.speech_to_text import AudioTranscriber, AudioRecorder
import threading
class Interaction:
@ -173,12 +174,20 @@ class Interaction:
return None
return self.current_agent.get_last_block_answer()
def speak_answer(self) -> None:
"""Speak the answer to the user in a non-blocking thread."""
if self.last_query is None:
return
if self.tts_enabled and self.last_answer and self.speech:
def speak_in_thread(speech_instance, text):
speech_instance.speak(text)
thread = threading.Thread(target=speak_in_thread, args=(self.speech, self.last_answer))
thread.start()
def show_answer(self) -> None:
"""Show the answer to the user."""
if self.last_query is None:
return
if self.current_agent is not None:
self.current_agent.show_answer()
if self.tts_enabled and self.last_answer:
self.speech.speak(self.last_answer)

View File

@ -173,10 +173,10 @@ if __name__ == "__main__":
tosay_fr = """
J'ai consulté les dernières nouvelles sur le site https://www.theguardian.com/world
"""
spk = Speech(enable=True, language="ja", voice_idx=2)
spk = Speech(enable=True, language="zh", voice_idx=0)
for i in range(0, 2):
print(f"Speaking chinese with voice {i}")
spk.speak(tosay_ja, voice_idx=i)
spk.speak(tosay_zh, voice_idx=i)
spk = Speech(enable=True, language="en", voice_idx=2)
for i in range(0, 5):
print(f"Speaking english with voice {i}")