first version

2025-07-22 18:10:06 +00:00 · 2024-08-17 23:17:36 -04:00 · 2024-08-17 23:17:36 -04:00 · 61102738b7
commit 61102738b7
parent 661f4200a6
4 changed files with 107 additions and 4 deletions
--- a/alltalk_test.py
+++ b/alltalk_test.py
@ -0,0 +1,39 @@
+import requests
+import winsound
+
+# Use the API endpoint to generate TTS
+url = "http://127.0.0.1:7851/api/tts-generate"
+
+# Prepare the form data
+data = {
+    "text_input": "This is a test stream.",
+    "text_filtering": "standard",
+    "character_voice_gen": "maxheadroom_00000005.wav",
+    "narrator_enabled": "false",
+    "narrator_voice_gen": "male_01.wav",
+    "text_not_inside": "character",
+    "language": "en",
+    "output_file_name": "stream_output",
+    "output_file_timestamp": "true",
+    "autoplay": "false",
+    "autoplay_volume": "0.8"
+}
+
+# Send the POST request to generate TTS
+response = requests.post(url, data=data)
+
+# Check if the request was successful
+if response.status_code == 200:
+    # Parse the JSON response to get the file URL
+    result = response.json()
+    audio_url = result['output_file_url']
+    
+    # Download the audio file
+    audio_response = requests.get(audio_url)
+    
+    # Save the audio file locally
+    with open("output.wav", "wb") as f:
+        f.write(audio_response.content)
+    winsound.PlaySound('output.wav', winsound.SND_FILENAME)
+else:
+    print(f"Failed with status code {response.status_code}: {response.text}")
--- a/main.py
+++ b/main.py
@ -1,11 +1,13 @@
-from modules import adapter, stt
+from modules import adapter, speak

-stt = stt.STT()
+
+sp = speak.Speak()
 ad = adapter.Adapter("openai")


 while True:
-    text = stt.listen()
+    text = sp.listen()
    response = ad.chat(text)
-    stt.speak(response)
+    sp.max_headroom(response)
+    
    print("Listening again...")
--- a/modules/speak.py
+++ b/modules/speak.py
@ -0,0 +1,62 @@
+import requests
+import winsound
+import speech_recognition as sr
+import pyttsx3 
+import os
+
+class Speak:
+    def __init__(self):
+        self.url = "http://127.0.0.1:7851/api/tts-generate"
+        self.recognizer = sr.Recognizer()
+        self.microphone = sr.Microphone()
+        self.engine = pyttsx3.init()
+        self.engine.setProperty('rate', 150)
+
+    def max_headroom(self, text):
+        data = {
+            "text_input": str(text),
+            "text_filtering": "standard",
+            "character_voice_gen": "maxheadroom_00000045.wav",
+            "narrator_enabled": "false",
+            "narrator_voice_gen": "male_01.wav",
+            "text_not_inside": "character",
+            "language": "en",
+            "output_file_name": "stream_output",
+            "output_file_timestamp": "true",
+            "autoplay": "false",
+            "autoplay_volume": "0.8"
+        }
+        # Send the POST request to generate TTS
+        response = requests.post(self.url, data=data)
+
+        # Check if the request was successful
+        if response.status_code == 200:
+            # Parse the JSON response to get the file URL
+            result = response.json()
+            audio_url = result['output_file_url']
+            
+            # Download the audio file
+            audio_response = requests.get(audio_url)
+            
+            output_path = os.path.abspath("tmp/output.wav")
+            # Save the audio file locally
+            with open(output_path, "wb") as f:
+                f.write(audio_response.content)
+            winsound.PlaySound(output_path, winsound.SND_FILENAME)
+        else:
+            print(f"Failed with status code {response.status_code}: {response.text}")
+            self.engine.say(text)
+            self.engine.runAndWait()
+            
+    def listen(self):
+        with self.microphone as source:
+            print("Listening...")
+            audio = self.recognizer.listen(source)
+            try:
+                text = self.recognizer.recognize_google(audio)
+                print("You said: ", text)
+                return text
+            except sr.UnknownValueError:
+                print("Sorry, I didn't get that.")
+            except sr.RequestError as e:
+                print("Sorry, I couldn't request results; {0}".format(e))
--- a/tmp/output.wav
+++ b/tmp/output.wav