clean up

2025-07-21 17:40:11 +00:00 · 2024-10-02 14:03:21 -04:00 · 2024-10-02 14:03:21 -04:00 · c244ccc04d
commit c244ccc04d
parent 70869b910d
4 changed files with 128 additions and 1 deletions
--- a/README.md
+++ b/README.md
@ -10,6 +10,7 @@ written to work on Windows. Agent and logic will run on linux but some tools are

 it currently will respond as an LLM like usual, but also has the following capabilities:

+- custom prompt options
 - can also control spotify
 - can open applications on windows
 - can change the focused window
@ -66,3 +67,7 @@ this tool will open an application. when you run max it will create an index of
 this tool will set a timer with a popup. you tell max to set a time for X time, it will convert it to seconds on the backend and create the timer.

 the default timer will have a "clippy" popup, with potentially custom text
+
+# Custom Prompt
+
+Max Headroom is the default prompt. If you want to make a custom prompt look in modules/prompts.py and add it there. then set the name in .env
--- a/main.py
+++ b/main.py
@ -11,7 +11,6 @@ asyncio.set_event_loop(loop)


 if os.name == "nt":
-    print("windows")
    op = "windows"
 elif os.name == "posix":
    # Further check to differentiate between Linux and macOS
@ -38,4 +37,5 @@ while True:
        response = loop.run_until_complete(graph.invoke_agent(text))
        if response:
            graph.spk.glitch_stream_output(response)
+            # graph.spk.stream(response)
    
--- a/modules/speak.py
+++ b/modules/speak.py
@ -212,6 +212,67 @@ class Speak:
            self.engine.say(text)
            self.engine.runAndWait()
            
+    def stream(self, text):
+        # Example parameters
+        voice = ""
+        language = "en"
+        output_file = "stream_output.wav"
+        
+        # Encode the text for URL
+        encoded_text = urllib.parse.quote(text)
+        
+        # Create the streaming URL
+        streaming_url = f"http://localhost:7851/api/tts-generate-streaming?text={encoded_text}&voice={voice}&language={language}&output_file={output_file}"
+        
+        try:
+            # Stream the audio data
+            response = requests.get(streaming_url, stream=True)
+            
+            # Initialize PyAudio
+            p = pyaudio.PyAudio()
+            stream = None
+            
+            # Process the audio stream in chunks
+            chunk_size = 1024 * 6  # Adjust chunk size if needed
+            audio_buffer = b''
+
+            for chunk in response.iter_content(chunk_size=chunk_size):
+                audio_buffer += chunk
+
+                if len(audio_buffer) < chunk_size:
+                    continue
+                
+                audio_segment = AudioSegment(
+                    data=audio_buffer,
+                    sample_width=2,  # 2 bytes for 16-bit audio
+                    frame_rate=24000,  # Assumed frame rate, adjust as necessary
+                    channels=1  # Assuming mono audio
+                )
+
+                if stream is None:
+                    # Define stream parameters without any modifications
+                    stream = p.open(format=pyaudio.paInt16,
+                                    channels=1,
+                                    rate=audio_segment.frame_rate,
+                                    output=True)
+
+                # Play the original chunk (without any modification)
+                stream.write(audio_segment.raw_data)
+
+                # Reset buffer
+                audio_buffer = b''
+
+            # Final cleanup
+            if stream:
+                stream.stop_stream()
+                stream.close()
+            p.terminate()
+            
+        except:
+            self.engine.say(text)
+            self.engine.runAndWait()
+
+            
 # Example usage:
 # sp = Speak(model="whisper")  # or "whisper" or "google"
 # transcription = sp.transcoder(time_listen=10)
--- a/modules/speak_backup.py
+++ b/modules/speak_backup.py
@ -221,6 +221,67 @@ class Speak:
            self.engine.say(text)
            self.engine.runAndWait()
            
+    def stream(self, text):
+        # Example parameters
+        voice = ""
+        language = "en"
+        output_file = "stream_output.wav"
+        
+        # Encode the text for URL
+        encoded_text = urllib.parse.quote(text)
+        
+        # Create the streaming URL
+        streaming_url = f"http://localhost:7851/api/tts-generate-streaming?text={encoded_text}&voice={voice}&language={language}&output_file={output_file}"
+        
+        try:
+            # Stream the audio data
+            response = requests.get(streaming_url, stream=True)
+            
+            # Initialize PyAudio
+            p = pyaudio.PyAudio()
+            stream = None
+            
+            # Process the audio stream in chunks
+            chunk_size = 1024 * 6  # Adjust chunk size if needed
+            audio_buffer = b''
+
+            for chunk in response.iter_content(chunk_size=chunk_size):
+                audio_buffer += chunk
+
+                if len(audio_buffer) < chunk_size:
+                    continue
+                
+                audio_segment = AudioSegment(
+                    data=audio_buffer,
+                    sample_width=2,  # 2 bytes for 16-bit audio
+                    frame_rate=24000,  # Assumed frame rate, adjust as necessary
+                    channels=1  # Assuming mono audio
+                )
+
+                if stream is None:
+                    # Define stream parameters without any modifications
+                    stream = p.open(format=pyaudio.paInt16,
+                                    channels=1,
+                                    rate=audio_segment.frame_rate,
+                                    output=True)
+
+                # Play the original chunk (without any modification)
+                stream.write(audio_segment.raw_data)
+
+                # Reset buffer
+                audio_buffer = b''
+
+            # Final cleanup
+            if stream:
+                stream.stop_stream()
+                stream.close()
+            p.terminate()
+            
+        except:
+            self.engine.say(text)
+            self.engine.runAndWait()
+
+            
 # Example usage:
 # sp = Speak(model="vosk")  # or "vosk" or "google"
 # transcription = sp.transcoder(time_listen=10)