mirror of
https://github.com/maglore9900/max_headroom.git
synced 2025-07-22 18:10:06 +00:00
clean up
This commit is contained in:
parent
70869b910d
commit
c244ccc04d
@ -10,6 +10,7 @@ written to work on Windows. Agent and logic will run on linux but some tools are
|
|||||||
|
|
||||||
it currently will respond as an LLM like usual, but also has the following capabilities:
|
it currently will respond as an LLM like usual, but also has the following capabilities:
|
||||||
|
|
||||||
|
- custom prompt options
|
||||||
- can also control spotify
|
- can also control spotify
|
||||||
- can open applications on windows
|
- can open applications on windows
|
||||||
- can change the focused window
|
- can change the focused window
|
||||||
@ -66,3 +67,7 @@ this tool will open an application. when you run max it will create an index of
|
|||||||
this tool will set a timer with a popup. you tell max to set a time for X time, it will convert it to seconds on the backend and create the timer.
|
this tool will set a timer with a popup. you tell max to set a time for X time, it will convert it to seconds on the backend and create the timer.
|
||||||
|
|
||||||
the default timer will have a "clippy" popup, with potentially custom text
|
the default timer will have a "clippy" popup, with potentially custom text
|
||||||
|
|
||||||
|
# Custom Prompt
|
||||||
|
|
||||||
|
Max Headroom is the default prompt. If you want to make a custom prompt look in modules/prompts.py and add it there. then set the name in .env
|
||||||
|
2
main.py
2
main.py
@ -11,7 +11,6 @@ asyncio.set_event_loop(loop)
|
|||||||
|
|
||||||
|
|
||||||
if os.name == "nt":
|
if os.name == "nt":
|
||||||
print("windows")
|
|
||||||
op = "windows"
|
op = "windows"
|
||||||
elif os.name == "posix":
|
elif os.name == "posix":
|
||||||
# Further check to differentiate between Linux and macOS
|
# Further check to differentiate between Linux and macOS
|
||||||
@ -38,4 +37,5 @@ while True:
|
|||||||
response = loop.run_until_complete(graph.invoke_agent(text))
|
response = loop.run_until_complete(graph.invoke_agent(text))
|
||||||
if response:
|
if response:
|
||||||
graph.spk.glitch_stream_output(response)
|
graph.spk.glitch_stream_output(response)
|
||||||
|
# graph.spk.stream(response)
|
||||||
|
|
||||||
|
@ -212,6 +212,67 @@ class Speak:
|
|||||||
self.engine.say(text)
|
self.engine.say(text)
|
||||||
self.engine.runAndWait()
|
self.engine.runAndWait()
|
||||||
|
|
||||||
|
def stream(self, text):
|
||||||
|
# Example parameters
|
||||||
|
voice = ""
|
||||||
|
language = "en"
|
||||||
|
output_file = "stream_output.wav"
|
||||||
|
|
||||||
|
# Encode the text for URL
|
||||||
|
encoded_text = urllib.parse.quote(text)
|
||||||
|
|
||||||
|
# Create the streaming URL
|
||||||
|
streaming_url = f"http://localhost:7851/api/tts-generate-streaming?text={encoded_text}&voice={voice}&language={language}&output_file={output_file}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Stream the audio data
|
||||||
|
response = requests.get(streaming_url, stream=True)
|
||||||
|
|
||||||
|
# Initialize PyAudio
|
||||||
|
p = pyaudio.PyAudio()
|
||||||
|
stream = None
|
||||||
|
|
||||||
|
# Process the audio stream in chunks
|
||||||
|
chunk_size = 1024 * 6 # Adjust chunk size if needed
|
||||||
|
audio_buffer = b''
|
||||||
|
|
||||||
|
for chunk in response.iter_content(chunk_size=chunk_size):
|
||||||
|
audio_buffer += chunk
|
||||||
|
|
||||||
|
if len(audio_buffer) < chunk_size:
|
||||||
|
continue
|
||||||
|
|
||||||
|
audio_segment = AudioSegment(
|
||||||
|
data=audio_buffer,
|
||||||
|
sample_width=2, # 2 bytes for 16-bit audio
|
||||||
|
frame_rate=24000, # Assumed frame rate, adjust as necessary
|
||||||
|
channels=1 # Assuming mono audio
|
||||||
|
)
|
||||||
|
|
||||||
|
if stream is None:
|
||||||
|
# Define stream parameters without any modifications
|
||||||
|
stream = p.open(format=pyaudio.paInt16,
|
||||||
|
channels=1,
|
||||||
|
rate=audio_segment.frame_rate,
|
||||||
|
output=True)
|
||||||
|
|
||||||
|
# Play the original chunk (without any modification)
|
||||||
|
stream.write(audio_segment.raw_data)
|
||||||
|
|
||||||
|
# Reset buffer
|
||||||
|
audio_buffer = b''
|
||||||
|
|
||||||
|
# Final cleanup
|
||||||
|
if stream:
|
||||||
|
stream.stop_stream()
|
||||||
|
stream.close()
|
||||||
|
p.terminate()
|
||||||
|
|
||||||
|
except:
|
||||||
|
self.engine.say(text)
|
||||||
|
self.engine.runAndWait()
|
||||||
|
|
||||||
|
|
||||||
# Example usage:
|
# Example usage:
|
||||||
# sp = Speak(model="whisper") # or "whisper" or "google"
|
# sp = Speak(model="whisper") # or "whisper" or "google"
|
||||||
# transcription = sp.transcoder(time_listen=10)
|
# transcription = sp.transcoder(time_listen=10)
|
||||||
|
@ -221,6 +221,67 @@ class Speak:
|
|||||||
self.engine.say(text)
|
self.engine.say(text)
|
||||||
self.engine.runAndWait()
|
self.engine.runAndWait()
|
||||||
|
|
||||||
|
def stream(self, text):
|
||||||
|
# Example parameters
|
||||||
|
voice = ""
|
||||||
|
language = "en"
|
||||||
|
output_file = "stream_output.wav"
|
||||||
|
|
||||||
|
# Encode the text for URL
|
||||||
|
encoded_text = urllib.parse.quote(text)
|
||||||
|
|
||||||
|
# Create the streaming URL
|
||||||
|
streaming_url = f"http://localhost:7851/api/tts-generate-streaming?text={encoded_text}&voice={voice}&language={language}&output_file={output_file}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Stream the audio data
|
||||||
|
response = requests.get(streaming_url, stream=True)
|
||||||
|
|
||||||
|
# Initialize PyAudio
|
||||||
|
p = pyaudio.PyAudio()
|
||||||
|
stream = None
|
||||||
|
|
||||||
|
# Process the audio stream in chunks
|
||||||
|
chunk_size = 1024 * 6 # Adjust chunk size if needed
|
||||||
|
audio_buffer = b''
|
||||||
|
|
||||||
|
for chunk in response.iter_content(chunk_size=chunk_size):
|
||||||
|
audio_buffer += chunk
|
||||||
|
|
||||||
|
if len(audio_buffer) < chunk_size:
|
||||||
|
continue
|
||||||
|
|
||||||
|
audio_segment = AudioSegment(
|
||||||
|
data=audio_buffer,
|
||||||
|
sample_width=2, # 2 bytes for 16-bit audio
|
||||||
|
frame_rate=24000, # Assumed frame rate, adjust as necessary
|
||||||
|
channels=1 # Assuming mono audio
|
||||||
|
)
|
||||||
|
|
||||||
|
if stream is None:
|
||||||
|
# Define stream parameters without any modifications
|
||||||
|
stream = p.open(format=pyaudio.paInt16,
|
||||||
|
channels=1,
|
||||||
|
rate=audio_segment.frame_rate,
|
||||||
|
output=True)
|
||||||
|
|
||||||
|
# Play the original chunk (without any modification)
|
||||||
|
stream.write(audio_segment.raw_data)
|
||||||
|
|
||||||
|
# Reset buffer
|
||||||
|
audio_buffer = b''
|
||||||
|
|
||||||
|
# Final cleanup
|
||||||
|
if stream:
|
||||||
|
stream.stop_stream()
|
||||||
|
stream.close()
|
||||||
|
p.terminate()
|
||||||
|
|
||||||
|
except:
|
||||||
|
self.engine.say(text)
|
||||||
|
self.engine.runAndWait()
|
||||||
|
|
||||||
|
|
||||||
# Example usage:
|
# Example usage:
|
||||||
# sp = Speak(model="vosk") # or "vosk" or "google"
|
# sp = Speak(model="vosk") # or "vosk" or "google"
|
||||||
# transcription = sp.transcoder(time_listen=10)
|
# transcription = sp.transcoder(time_listen=10)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user