This commit is contained in:
maglore9900 2024-10-02 14:03:21 -04:00
parent 70869b910d
commit c244ccc04d
4 changed files with 128 additions and 1 deletions

View File

@ -10,6 +10,7 @@ written to work on Windows. Agent and logic will run on linux but some tools are
it currently will respond as an LLM like usual, but also has the following capabilities:
- custom prompt options
- can also control spotify
- can open applications on windows
- can change the focused window
@ -66,3 +67,7 @@ this tool will open an application. when you run max it will create an index of
this tool will set a timer with a popup. you tell max to set a time for X time, it will convert it to seconds on the backend and create the timer.
the default timer will have a "clippy" popup, with potentially custom text
# Custom Prompt
Max Headroom is the default prompt. If you want to make a custom prompt look in modules/prompts.py and add it there. then set the name in .env

View File

@ -11,7 +11,6 @@ asyncio.set_event_loop(loop)
if os.name == "nt":
print("windows")
op = "windows"
elif os.name == "posix":
# Further check to differentiate between Linux and macOS
@ -38,4 +37,5 @@ while True:
response = loop.run_until_complete(graph.invoke_agent(text))
if response:
graph.spk.glitch_stream_output(response)
# graph.spk.stream(response)

View File

@ -212,6 +212,67 @@ class Speak:
self.engine.say(text)
self.engine.runAndWait()
def stream(self, text):
# Example parameters
voice = ""
language = "en"
output_file = "stream_output.wav"
# Encode the text for URL
encoded_text = urllib.parse.quote(text)
# Create the streaming URL
streaming_url = f"http://localhost:7851/api/tts-generate-streaming?text={encoded_text}&voice={voice}&language={language}&output_file={output_file}"
try:
# Stream the audio data
response = requests.get(streaming_url, stream=True)
# Initialize PyAudio
p = pyaudio.PyAudio()
stream = None
# Process the audio stream in chunks
chunk_size = 1024 * 6 # Adjust chunk size if needed
audio_buffer = b''
for chunk in response.iter_content(chunk_size=chunk_size):
audio_buffer += chunk
if len(audio_buffer) < chunk_size:
continue
audio_segment = AudioSegment(
data=audio_buffer,
sample_width=2, # 2 bytes for 16-bit audio
frame_rate=24000, # Assumed frame rate, adjust as necessary
channels=1 # Assuming mono audio
)
if stream is None:
# Define stream parameters without any modifications
stream = p.open(format=pyaudio.paInt16,
channels=1,
rate=audio_segment.frame_rate,
output=True)
# Play the original chunk (without any modification)
stream.write(audio_segment.raw_data)
# Reset buffer
audio_buffer = b''
# Final cleanup
if stream:
stream.stop_stream()
stream.close()
p.terminate()
except:
self.engine.say(text)
self.engine.runAndWait()
# Example usage:
# sp = Speak(model="whisper") # or "whisper" or "google"
# transcription = sp.transcoder(time_listen=10)

View File

@ -221,6 +221,67 @@ class Speak:
self.engine.say(text)
self.engine.runAndWait()
def stream(self, text):
# Example parameters
voice = ""
language = "en"
output_file = "stream_output.wav"
# Encode the text for URL
encoded_text = urllib.parse.quote(text)
# Create the streaming URL
streaming_url = f"http://localhost:7851/api/tts-generate-streaming?text={encoded_text}&voice={voice}&language={language}&output_file={output_file}"
try:
# Stream the audio data
response = requests.get(streaming_url, stream=True)
# Initialize PyAudio
p = pyaudio.PyAudio()
stream = None
# Process the audio stream in chunks
chunk_size = 1024 * 6 # Adjust chunk size if needed
audio_buffer = b''
for chunk in response.iter_content(chunk_size=chunk_size):
audio_buffer += chunk
if len(audio_buffer) < chunk_size:
continue
audio_segment = AudioSegment(
data=audio_buffer,
sample_width=2, # 2 bytes for 16-bit audio
frame_rate=24000, # Assumed frame rate, adjust as necessary
channels=1 # Assuming mono audio
)
if stream is None:
# Define stream parameters without any modifications
stream = p.open(format=pyaudio.paInt16,
channels=1,
rate=audio_segment.frame_rate,
output=True)
# Play the original chunk (without any modification)
stream.write(audio_segment.raw_data)
# Reset buffer
audio_buffer = b''
# Final cleanup
if stream:
stream.stop_stream()
stream.close()
p.terminate()
except:
self.engine.say(text)
self.engine.runAndWait()
# Example usage:
# sp = Speak(model="vosk") # or "vosk" or "google"
# transcription = sp.transcoder(time_listen=10)