clean up

2025-07-24 02:50:15 +00:00 · 2024-10-02 14:03:21 -04:00 · 2024-10-02 14:03:21 -04:00 · c244ccc04d
commit c244ccc04d
parent 70869b910d
4 changed files with 128 additions and 1 deletions
--- a/README.md
+++ b/README.md
@ -10,6 +10,7 @@ written to work on Windows. Agent and logic will run on linux but some tools are
 it currently will respond as an LLM like usual, but also has the following capabilities:
 - custom prompt options
 - can also control spotify
 - can open applications on windows
 - can change the focused window
@ -66,3 +67,7 @@ this tool will open an application. when you run max it will create an index of
 this tool will set a timer with a popup. you tell max to set a time for X time, it will convert it to seconds on the backend and create the timer.
 the default timer will have a "clippy" popup, with potentially custom text
 # Custom Prompt
 Max Headroom is the default prompt. If you want to make a custom prompt look in modules/prompts.py and add it there. then set the name in .env
--- a/main.py
+++ b/main.py
@ -11,7 +11,6 @@ asyncio.set_event_loop(loop)
 if os.name == "nt":
    print("windows")
    op = "windows"
 elif os.name == "posix":
    # Further check to differentiate between Linux and macOS
@ -38,4 +37,5 @@ while True:
        response = loop.run_until_complete(graph.invoke_agent(text))
        if response:
            graph.spk.glitch_stream_output(response)
            # graph.spk.stream(response)
--- a/modules/speak.py
+++ b/modules/speak.py
@ -212,6 +212,67 @@ class Speak:
            self.engine.say(text)
            self.engine.runAndWait()
    def stream(self, text):
        # Example parameters
        voice = ""
        language = "en"
        output_file = "stream_output.wav"
        # Encode the text for URL
        encoded_text = urllib.parse.quote(text)
        # Create the streaming URL
        streaming_url = f"http://localhost:7851/api/tts-generate-streaming?text={encoded_text}&voice={voice}&language={language}&output_file={output_file}"
        try:
            # Stream the audio data
            response = requests.get(streaming_url, stream=True)
            # Initialize PyAudio
            p = pyaudio.PyAudio()
            stream = None
            # Process the audio stream in chunks
            chunk_size = 1024 * 6  # Adjust chunk size if needed
            audio_buffer = b''
            for chunk in response.iter_content(chunk_size=chunk_size):
                audio_buffer += chunk
                if len(audio_buffer) < chunk_size:
                    continue
                audio_segment = AudioSegment(
                    data=audio_buffer,
                    sample_width=2,  # 2 bytes for 16-bit audio
                    frame_rate=24000,  # Assumed frame rate, adjust as necessary
                    channels=1  # Assuming mono audio
                )
                if stream is None:
                    # Define stream parameters without any modifications
                    stream = p.open(format=pyaudio.paInt16,
                                    channels=1,
                                    rate=audio_segment.frame_rate,
                                    output=True)
                # Play the original chunk (without any modification)
                stream.write(audio_segment.raw_data)
                # Reset buffer
                audio_buffer = b''
            # Final cleanup
            if stream:
                stream.stop_stream()
                stream.close()
            p.terminate()
        except:
            self.engine.say(text)
            self.engine.runAndWait()
 # Example usage:
 # sp = Speak(model="whisper")  # or "whisper" or "google"
 # transcription = sp.transcoder(time_listen=10)
--- a/modules/speak_backup.py
+++ b/modules/speak_backup.py
@ -221,6 +221,67 @@ class Speak:
            self.engine.say(text)
            self.engine.runAndWait()
    def stream(self, text):
        # Example parameters
        voice = ""
        language = "en"
        output_file = "stream_output.wav"
        # Encode the text for URL
        encoded_text = urllib.parse.quote(text)
        # Create the streaming URL
        streaming_url = f"http://localhost:7851/api/tts-generate-streaming?text={encoded_text}&voice={voice}&language={language}&output_file={output_file}"
        try:
            # Stream the audio data
            response = requests.get(streaming_url, stream=True)
            # Initialize PyAudio
            p = pyaudio.PyAudio()
            stream = None
            # Process the audio stream in chunks
            chunk_size = 1024 * 6  # Adjust chunk size if needed
            audio_buffer = b''
            for chunk in response.iter_content(chunk_size=chunk_size):
                audio_buffer += chunk
                if len(audio_buffer) < chunk_size:
                    continue
                audio_segment = AudioSegment(
                    data=audio_buffer,
                    sample_width=2,  # 2 bytes for 16-bit audio
                    frame_rate=24000,  # Assumed frame rate, adjust as necessary
                    channels=1  # Assuming mono audio
                )
                if stream is None:
                    # Define stream parameters without any modifications
                    stream = p.open(format=pyaudio.paInt16,
                                    channels=1,
                                    rate=audio_segment.frame_rate,
                                    output=True)
                # Play the original chunk (without any modification)
                stream.write(audio_segment.raw_data)
                # Reset buffer
                audio_buffer = b''
            # Final cleanup
            if stream:
                stream.stop_stream()
                stream.close()
            p.terminate()
        except:
            self.engine.say(text)
            self.engine.runAndWait()
 # Example usage:
 # sp = Speak(model="vosk")  # or "vosk" or "google"
 # transcription = sp.transcoder(time_listen=10)