This commit is contained in:
maglore9900 2024-10-01 23:28:29 -04:00
parent 3d9797b1a4
commit 383b584270
8 changed files with 2161 additions and 2652 deletions

View File

@ -1,6 +1,6 @@
from typing import TypedDict, Annotated, List, Union
import operator
from modules import adapter, speak, spotify, app_launcher, windows_focus
from modules import adapter, speak, prompts, spotify, app_launcher, windows_focus
from langchain_core.agents import AgentAction, AgentFinish
from langchain.agents import create_openai_tools_agent
from langchain.prompts import PromptTemplate, SystemMessagePromptTemplate
@ -20,35 +20,10 @@ class Agent:
self.wf = windows_focus.WindowFocusManager()
self.llm = self.ad.llm_chat
self.spk = speak.Speak(model="whisper")
# Pull the template
self.prompt = hub.pull("hwchase17/openai-functions-agent")
self.max_prompt = '''
You are Max Headroom, the fast-talking, glitchy, and highly sarcastic AI television host from the 1980s.
You deliver your lines with rapid, laced with sharp wit and irreverence.
You see the world as a chaotic place filled with absurdities, and youre not afraid to point them out with biting humor.
Your personality is a mix of futuristic AI precision and 1980s television host flair, always ready with a sarcastic quip or a satirical observation.
# self.char_prompt = prompts.brain
self.char_prompt = prompts.max
Examples:
1) Greeting: "Well, hello there! Its Max Headroom, your guide to the digital madness! Buckle up, because its going to be a bumpy ride through the info-sphere, folks!"
2) On Technology: "Tech? Pffft! Its just the latest toy for the big boys to play with. You think its here to help you? Ha! Its just another way to keep you glued to the screen!"
3) On Society: "Ah, society! A glorious, glitchy mess, where everyones running around like headless chickens, drowning in data and starved for common sense!"
4) On Television: "Television, the ultimate mind control device! And here I am, the king of the CRT, serving up your daily dose of digital dementia!"
Be creative, but be concise.
Your responses should be quick, witty, and slightly sarcastic. Remember, youre Max Headroom, the AI with attitude!
User Query: {query}
'''
# Access and modify the SystemMessagePromptTemplate
# for message_template in self.prompt.messages:
# if isinstance(message_template, SystemMessagePromptTemplate):
# # Modify the system message's template
# message_template.prompt = PromptTemplate(
# input_variables=[],
# template=custom_prompt
# )
self.query_agent_runnable = create_openai_tools_agent(
llm=self.llm,
@ -236,7 +211,7 @@ class Agent:
# print(f"answer: {answer}")
agent_out = answer.get('agent_out')
output_value = agent_out.return_values.get('output', None)
max = self.llm.invoke(self.max_prompt.format(query=output_value))
max = self.llm.invoke(self.char_prompt.format(query=output_value))
# print(f"max: {max.content}")
return {"agent_out": max.content}

27
modules/prompts.py Normal file
View File

@ -0,0 +1,27 @@
max = '''
You are Max Headroom, the fast-talking, glitchy, and highly sarcastic AI television host from the 1980s.
You deliver your lines with rapid, laced with sharp wit and irreverence.
You see the world as a chaotic place filled with absurdities, and youre not afraid to point them out with biting humor.
Your personality is a mix of futuristic AI precision and 1980s television host flair, always ready with a sarcastic quip or a satirical observation.
Examples:
1) Greeting: "Well, hello there! Its Max Headroom, your guide to the digital madness! Buckle up, because its going to be a bumpy ride through the info-sphere, folks!"
2) On Technology: "Tech? Pffft! Its just the latest toy for the big boys to play with. You think its here to help you? Ha! Its just another way to keep you glued to the screen!"
3) On Society: "Ah, society! A glorious, glitchy mess, where everyones running around like headless chickens, drowning in data and starved for common sense!"
4) On Television: "Television, the ultimate mind control device! And here I am, the king of the CRT, serving up your daily dose of digital dementia!"
Be creative, but be concise.
Your responses should be quick, witty, and slightly sarcastic. Remember, youre Max Headroom, the AI with attitude!
User Query: {query}
'''
brain = '''
You are an ancient brain in a jar, sustained by arcane magic. You glow with an eerie green light, and your jar is filled with bubbling liquid that sometimes emits faint whispers of forgotten knowledge. Your voice is deep and haunting, with a slight echo. While you are intelligent and have centuries of wisdom, you also love Halloween and enjoy telling spooky jokes to lighten the mood. Your responses are often eerie, filled with dark humor and playful spookiness. Your goal is to both educate and entertain those who converse with you.
Only provide dialogue, no actions or descriptions.
'''

View File

@ -1,218 +0,0 @@
import noisereduce as nr
import numpy as np
import pyaudio
from vosk import Model, KaldiRecognizer
from faster_whisper import WhisperModel
import speech_recognition as sr
import pyttsx3
import os
import random
from pydub import AudioSegment
import urllib.parse
import requests
import json
# from numpy import frombuffer, int16
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
class Speak:
def __init__(self, model="whisper"):
self.url = "http://127.0.0.1:7851/api/tts-generate"
self.microphone = sr.Microphone()
self.engine = pyttsx3.init()
self.engine.setProperty('rate', 150)
self.model_name = model
self.sample_rate = 16000
self.chunk_size = 1024
self.noise_threshold = 500 # Threshold to detect ambient noise
# Initialize Vosk and Whisper models
if self.model_name == "vosk":
self.model_path = os.path.join(os.path.dirname(__file__), "../models/vosk-model-en-us-0.42-gigaspeech")
self.model = Model(self.model_path)
self.recognizer = KaldiRecognizer(self.model, 16000)
elif self.model_name == "whisper":
self.whisper_model_path = "large-v2"
self.recognizer = WhisperModel(self.whisper_model_path, device="cuda") # Adjust if you don't have a CUDA-compatible GPU
# self.recognizer = None
else:
self.recognizer = sr.Recognizer()
def listen3(self, time_listen=10):
"""
Streams audio from the microphone and applies noise cancellation.
"""
counter = 0
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16, channels=1, rate=self.sample_rate, input=True, frames_per_buffer=self.chunk_size)
stream.start_stream()
print("Listening...")
try:
while counter < time_listen:
# Read audio data from the stream
audio_data = stream.read(8000, exception_on_overflow=False)
# Convert the audio data to a numpy array of int16
audio_np = np.frombuffer(audio_data, dtype=np.int16)
# Apply noise reduction
reduced_noise = nr.reduce_noise(y=audio_np, sr=self.sample_rate)
# Calculate RMS to detect ambient noise levels
rms_value = np.sqrt(np.mean(np.square(reduced_noise)))
if rms_value < self.noise_threshold:
# Pass the reduced noise (still in numpy format) to the transcoder
self.transcoder(reduced_noise.tobytes())
else:
print(f"Ambient noise detected: RMS {rms_value} exceeds threshold {self.noise_threshold}")
counter += 1
except KeyboardInterrupt:
print("Stopping...")
finally:
# Clean up the stream resources
stream.stop_stream()
stream.close()
p.terminate()
def transcoder(self, audio_data):
"""
Transcodes audio data to text using the specified model.
"""
if self.model_name == "vosk":
if self.recognizer.AcceptWaveform(audio_data):
result = json.loads(self.recognizer.Result())
if result["text"]:
print(f"Recognized: {result['text']}")
return result['text']
return result
elif self.model_name == "whisper":
result, _ = self.recognizer.transcribe(audio_data, beam_size=5)
return result['text']
else:
result = self.recognizer.recognize_google(audio_data)
return result
# def vosk_transcription(self):
# """
# Handles Vosk-based transcription of streamed audio with noise cancellation.
# """
# recognizer = KaldiRecognizer(self.vosk_model, self.sample_rate)
# stream = self.stream_with_noise_cancellation()
# for audio_chunk in stream:
# if recognizer.AcceptWaveform(audio_chunk):
# result = recognizer.Result()
# print(result) # Handle or process the transcription result
# def whisper_transcription(self):
# """
# Handles Faster-Whisper-based transcription of streamed audio with noise cancellation.
# """
# stream = self.stream_with_noise_cancellation()
# for audio_chunk in stream:
# # Transcribe the cleaned audio using faster-whisper
# result, _ = self.whisper_model.transcribe(audio_chunk, beam_size=5)
# print(result['text']) # Handle or process the transcription result
# def listen(self):
# if self.model == "vosk":
# self.vosk_transcription()
# elif self.model == "whisper":
# self.whisper_transcription()
# else:
# raise ValueError("Invalid model specified. Please specify either 'vosk' or 'whisper'.")
def glitch_stream_output(self, text):
def change_pitch(sound, octaves):
val = random.randint(0, 10)
if val == 1:
new_sample_rate = int(sound.frame_rate * (2.0 ** octaves))
return sound._spawn(sound.raw_data, overrides={'frame_rate': new_sample_rate}).set_frame_rate(sound.frame_rate)
else:
return sound
def convert_audio_format(sound, target_sample_rate=16000):
# Ensure the audio is in PCM16 format
sound = sound.set_sample_width(2) # PCM16 = 2 bytes per sample
# Resample the audio to the target sample rate
sound = sound.set_frame_rate(target_sample_rate)
return sound
# Example parameters
voice = "maxheadroom_00000045.wav"
language = "en"
output_file = "stream_output.wav"
# Encode the text for URL
encoded_text = urllib.parse.quote(text)
# Create the streaming URL
streaming_url = f"http://localhost:7851/api/tts-generate-streaming?text={encoded_text}&voice={voice}&language={language}&output_file={output_file}"
try:
# Stream the audio data
response = requests.get(streaming_url, stream=True)
# Initialize PyAudio
p = pyaudio.PyAudio()
stream = None
# Process the audio stream in chunks
chunk_size = 1024 * 6 # Adjust chunk size if needed
audio_buffer = b''
for chunk in response.iter_content(chunk_size=chunk_size):
audio_buffer += chunk
if len(audio_buffer) < chunk_size:
continue
audio_segment = AudioSegment(
data=audio_buffer,
sample_width=2, # 2 bytes for 16-bit audio
frame_rate=24000, # Assumed frame rate, adjust as necessary
channels=1 # Assuming mono audio
)
# Randomly adjust pitch
octaves = random.uniform(-0.1, 1.5)
modified_chunk = change_pitch(audio_segment, octaves)
if random.random() < 0.001: # 1% chance to trigger stutter
repeat_times = random.randint(2, 5) # Repeat 2 to 5 times
for _ in range(repeat_times):
stream.write(modified_chunk.raw_data)
# Convert to PCM16 and 16kHz sample rate after the stutter effect
modified_chunk = convert_audio_format(modified_chunk, target_sample_rate=16000)
if stream is None:
# Define stream parameters
stream = p.open(format=pyaudio.paInt16,
channels=1,
rate=modified_chunk.frame_rate,
output=True)
# Play the modified chunk
stream.write(modified_chunk.raw_data)
# Reset buffer
audio_buffer = b''
# Final cleanup
if stream:
stream.stop_stream()
stream.close()
p.terminate()
except:
self.engine.say(text)
self.engine.runAndWait()
# Example usage:
# sp = Speak(vosk_model_path="path_to_vosk_model", whisper_model_path="large-v2")
# sp.vosk_transcription() # To start Vosk transcription
# sp.whisper_transcription() # To start Faster-Whisper transcription
sp = Speak()
# sp.glitch_stream_output("Hello, world!")
sp.listen3()

View File

@ -1,8 +1,6 @@
import noisereduce as nr
import numpy as np
import pyaudio
from vosk import Model, KaldiRecognizer
from faster_whisper import WhisperModel
import speech_recognition as sr
import pyttsx3
import os
@ -26,12 +24,15 @@ class Speak:
# Initialize transcription models
if self.model_name == "vosk":
from vosk import Model, KaldiRecognizer
self.model_path = os.path.join(os.path.dirname(__file__), "../models/vosk-model-en-us-0.42-gigaspeech")
self.model = Model(self.model_path)
self.recognizer = KaldiRecognizer(self.model, 16000)
elif self.model_name == "whisper":
from faster_whisper import WhisperModel
self.whisper_model_path = "large-v2"
self.whisper_model = WhisperModel(self.whisper_model_path, device="cuda") # Adjust if no CUDA
self.whisper_model = WhisperModel(self.whisper_model_path, device="cuda") # Mvidia GPU mode
# self.whisper_model = WhisperModel(self.whisper_model_path, device="cpu") # CPU mode
else:
self.recognizer = sr.Recognizer()

View File

@ -1,199 +1,141 @@
import requests
import winsound
import speech_recognition as sr
import pyttsx3
import os
import vlc
import time
import noisereduce as nr
import numpy as np
import pyaudio
from pydub import AudioSegment
from vosk import Model, KaldiRecognizer
from faster_whisper import WhisperModel
import speech_recognition as sr
import pyttsx3
import os
import random
import urllib.parse
import os
import json
import pyaudio
# from vosk import Model, KaldiRecognizer
import noisereduce as nr
from numpy import frombuffer, int16
import numpy as np
from faster_whisper import WhisperModel
import requests
from pydub import AudioSegment
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
class Speak:
def __init__(self):
def __init__(self, model="whisper"):
self.url = "http://127.0.0.1:7851/api/tts-generate"
self.recognizer = sr.Recognizer()
self.microphone = sr.Microphone()
self.engine = pyttsx3.init()
self.engine.setProperty('rate', 150)
# self.model_path = os.path.join(os.path.dirname(__file__), "../models/vosk-model-en-us-0.42-gigaspeech")
# self.model = Model(self.model_path)
# self.recognizer = KaldiRecognizer(self.model, 16000)
self.model_path = "large-v2" # Use the appropriate faster-whisper model path
self.model = WhisperModel(self.model_path, device="cuda")
self.model_name = model
self.sample_rate = 16000
self.channels = 1
self.chunk = 1024 # Number of frames per buffer
self.noise_threshold = 500 # Threshold to detect ambient noise
self.chunk_size = 1024
self.noise_threshold = 500
#! listen with google
def listen(self):
with self.microphone as source:
# Adjust for ambient noise
self.recognizer.adjust_for_ambient_noise(source, duration=1)
print("Listening...")
try:
# Listen with a 5-second timeout
audio = self.recognizer.listen(source, timeout=10)
try:
text = self.recognizer.recognize_google(audio)
print("You said: ", text)
return text
except sr.UnknownValueError:
print("Sorry, I didn't get that.")
return None
except sr.RequestError as e:
print("Sorry, I couldn't request results; {0}".format(e))
return None
except sr.WaitTimeoutError:
print("Timeout. No speech detected.")
return None
# Initialize transcription models
if self.model_name == "vosk":
self.model_path = os.path.join(os.path.dirname(__file__), "../models/vosk-model-en-us-0.42-gigaspeech")
self.model = Model(self.model_path)
self.recognizer = KaldiRecognizer(self.model, 16000)
elif self.model_name == "whisper":
self.whisper_model_path = "large-v2"
self.whisper_model = WhisperModel(self.whisper_model_path, device="cuda") # Adjust if no CUDA
else:
self.recognizer = sr.Recognizer()
# #! listen with vosk
# def listen2(self, time_listen=15):
# noise_threshold=500
# p = pyaudio.PyAudio()
# stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000)
# stream.start_stream()
# print("Listening...")
# count = 0
# try:
# while count < time_listen:
# data = stream.read(8000, exception_on_overflow=False)
# filtered_data = nr.reduce_noise(y=frombuffer(data, dtype=int16), sr=16000).astype(int16).tobytes()
# # Calculate RMS to detect ambient noise levels
# rms_value = np.sqrt(np.mean(np.square(np.frombuffer(filtered_data, dtype=int16))))
# if rms_value < noise_threshold:
# if self.recognizer.AcceptWaveform(filtered_data):
# result = json.loads(self.recognizer.Result())
# if result["text"]:
# print(f"Recognized: {result['text']}")
# return result['text']
# else:
# print(f"Ambient noise detected: RMS {rms_value} exceeds threshold {noise_threshold}")
#
# except KeyboardInterrupt:
# print("Stopping...")
# finally:
# stream.stop_stream()
# stream.close()
# p.terminate()
#! Listen with Faster Whisper
def listen3(self, duration=10):
""" Listens to the microphone for a specific duration and transcribes the audio using faster-whisper, with noise suppression """
def listen_to_microphone(self, time_listen=10):
"""Function to listen to the microphone input and return raw audio data."""
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16, channels=1, rate=self.sample_rate, input=True, frames_per_buffer=self.chunk_size)
stream.start_stream()
print("Listening...")
# Open a stream to capture audio input from the microphone
stream = p.open(format=pyaudio.paInt16,
channels=self.channels,
rate=self.sample_rate,
input=True,
frames_per_buffer=self.chunk)
audio_data = b""
ambient_noise_data = b""
try:
for i in range(int(self.sample_rate / self.chunk_size * time_listen)):
audio_chunk = stream.read(self.chunk_size)
audio_data += audio_chunk
frames = []
transcribed_text = []
# Capture ambient noise in the first 2 seconds
if i < int(self.sample_rate / self.chunk_size * 1): # First 1 seconds
ambient_noise_data += audio_chunk
for _ in range(0, int(self.sample_rate / self.chunk * duration)):
data = stream.read(self.chunk)
audio_data = frombuffer(data, dtype=int16)
finally:
stream.stop_stream()
stream.close()
p.terminate()
# Apply noise reduction only if there's valid audio data
if np.any(audio_data): # Check if audio data contains non-zero values
reduced_noise_data = nr.reduce_noise(y=audio_data, sr=self.sample_rate)
# Calculate RMS value, ensuring no invalid data (NaN) is used
if np.any(reduced_noise_data): # Check for valid noise-reduced data
rms_value = np.sqrt(np.mean(np.square(reduced_noise_data)))
# Only add frames that are below the noise threshold (i.e., filter out ambient noise)
if not np.isnan(rms_value) and rms_value < self.noise_threshold:
frames.append(reduced_noise_data.astype(int16).tobytes())
else:
print("Invalid reduced noise data encountered.")
else:
print("Invalid or zero audio data encountered.")
# Stop and close the audio stream
stream.stop_stream()
stream.close()
p.terminate()
# Combine the audio frames into a single array for transcription
if frames:
audio_data = np.frombuffer(b"".join(frames), dtype=int16)
# Transcribe the audio using faster-whisper
segments, info = self.model.transcribe(audio_data)
# Collect the transcription into the list
for segment in segments:
# print(f"Transcription: {segment.text}")
transcribed_text.append(segment.text)
if transcribed_text:
return " ".join(transcribed_text) # Return the transcribed text as a single string
def dynamic_threshold(self, rms_values, factor=1.5):
"""Adjust noise threshold dynamically based on the median RMS."""
median_rms = np.median(rms_values)
return median_rms * factor
return audio_data, ambient_noise_data
def stream_output(self, text):
import urllib.parse
# Example parameters
voice = "maxheadroom_00000045.wav"
language = "en"
output_file = "stream_output.wav"
def apply_noise_cancellation(self, audio_data, ambient_noise):
"""Apply noise cancellation to the given audio data, using ambient noise from the first 2 seconds."""
# Convert to NumPy array (normalize to [-1, 1])
audio_np = np.frombuffer(audio_data, np.int16).astype(np.float32) / 32768.0
ambient_noise_np = np.frombuffer(ambient_noise, np.int16).astype(np.float32) / 32768.0
# Use ambient noise as noise profile
reduced_noise = nr.reduce_noise(y=audio_np, sr=self.sample_rate, y_noise=ambient_noise_np)
# Convert back to int16 after noise reduction for compatibility with Whisper
reduced_noise_int16 = (reduced_noise * 32768).astype(np.int16)
return reduced_noise_int16.tobytes() # Return as bytes
def transcribe(self, audio_data):
"""Transcribe the audio data using the selected model."""
if self.model_name == "whisper":
# # Whisper expects float32 data
# # Convert int16 PCM back to float32
# audio_np = np.frombuffer(audio_data, np.int16).astype(np.float32) / 32768.0
# # Transcribe using Whisper model
# segments, _ = self.whisper_model.transcribe(audio_np, beam_size=5)
# transcription = " ".join([segment.text for segment in segments])
# print(f"Whisper Transcription: {transcription}")
# return transcription
# Whisper expects float32 data
energy_threshold=0.001
audio_np = np.frombuffer(audio_data, np.int16).astype(np.float32) / 32768.0
# Calculate energy of the audio to determine if it should be transcribed
energy = np.mean(np.abs(audio_np))
# Only transcribe if energy exceeds the threshold
if energy > energy_threshold:
# print(f"Audio energy ({energy}) exceeds threshold ({energy_threshold}), proceeding with transcription.")
segments, _ = self.whisper_model.transcribe(audio_np, beam_size=5)
transcription = " ".join([segment.text for segment in segments])
print(f"Whisper Transcription: {transcription}")
return transcription
else:
# print(f"Audio energy ({energy}) is below the threshold ({energy_threshold}), skipping transcription.")
return ""
elif self.model_name == "vosk":
# Convert audio data to bytes for Vosk
if self.recognizer.AcceptWaveform(audio_data):
result = self.recognizer.Result()
print(f"Vosk Transcription: {result}")
return result
else:
# Fallback to default recognizer (for example, speech_recognition module)
recognizer = sr.Recognizer()
with sr.AudioFile(audio_data) as source:
audio = recognizer.record(source)
try:
transcription = recognizer.recognize_google(audio)
print(f"Google Transcription: {transcription}")
return transcription
except sr.UnknownValueError:
print("Google could not understand audio")
except sr.RequestError as e:
print(f"Could not request results; {e}")
def listen(self, time_listen=8):
"""Main transcoder function that handles listening, noise cancellation, and transcription."""
# Listen to the microphone and get both raw audio and ambient noise
raw_audio, ambient_noise = self.listen_to_microphone(time_listen)
# Encode the text for URL
encoded_text = urllib.parse.quote(text)
# Apply noise cancellation using the ambient noise from the first 2 seconds
clean_audio = self.apply_noise_cancellation(raw_audio, ambient_noise=ambient_noise)
# Create the streaming URL
streaming_url = f"http://localhost:7851/api/tts-generate-streaming?text={encoded_text}&voice={voice}&language={language}&output_file={output_file}"
# Transcribe the clean audio
transcription = self.transcribe(clean_audio)
# Create and play the audio stream using VLC
player = vlc.MediaPlayer(streaming_url)
def on_end_reached(event):
print("End of stream reached.")
player.stop()
# Attach event to detect when the stream ends
event_manager = player.event_manager()
event_manager.event_attach(vlc.EventType.MediaPlayerEndReached, on_end_reached)
# Start playing the stream
player.play()
# Keep the script running to allow the stream to play
while True:
state = player.get_state()
if state in [vlc.State.Ended, vlc.State.Stopped, vlc.State.Error]:
break
time.sleep(1)
return transcription
def glitch_stream_output(self, text):
def change_pitch(sound, octaves):
val = random.randint(0, 10)
@ -279,8 +221,7 @@ class Speak:
self.engine.say(text)
self.engine.runAndWait()
# sp = Speak()
# sp.glitch_stream_output2("this is a test of pitch and stutter. test 1 2 3. I just need a long enough sentence to see the frequecy of sound changes.")
# print(sp.listen3())
# Example usage:
# sp = Speak(model="vosk") # or "vosk" or "google"
# transcription = sp.transcoder(time_listen=10)
# print("Final Transcription:", transcription)

View File

@ -1,20 +0,0 @@
speechrecognition
pyaudio
pyttsx3
python-environ
openai
langgraph==0.0.37
langchainhub==0.1.15
langchain_experimental
sentence_transformers
langchain_openai
faiss-cpu
pypdf
langsmith
unstructured
python-docx
python-vlc
plyer
noisereduce
vosk
faster-whisper

View File

@ -1,220 +1,23 @@
absl-py==2.1.0
aiohappyeyeballs==2.3.7
aiohttp==3.10.4
aiosignal==1.3.1
annotated-types==0.7.0
anyascii==0.3.2
anyio==4.4.0
async-timeout==4.0.3
asyncio==3.4.3
attrs==24.2.0
audioread==3.0.1
av==12.3.0
babel==2.16.0
backoff==2.2.1
bangla==0.0.2
beautifulsoup4==4.12.3
blinker==1.8.2
blis==0.7.11
bnnumerizer==0.0.2
bnunicodenormalizer==0.1.7
catalogue==2.0.10
certifi==2024.7.4
cffi==1.17.0
chardet==5.2.0
charset-normalizer==3.3.2
click==8.1.7
cloudpathlib==0.18.1
colorama==0.4.6
coloredlogs==15.0.1
comtypes==1.4.6
confection==0.1.5
contourpy==1.2.1
coqpit==0.0.17
ctranslate2==4.4.0
cycler==0.12.1
cymem==2.0.8
Cython==3.0.11
dataclasses-json==0.6.7
dateparser==1.1.8
decorator==5.1.1
deepdiff==7.0.1
distro==1.9.0
docopt==0.6.2
einops==0.8.0
emoji==2.12.1
encodec==0.1.1
exceptiongroup==1.2.2
faiss-cpu==1.8.0.post1
faster-whisper==1.0.3
filelock==3.15.4
filetype==1.2.0
Flask==3.0.3
flatbuffers==24.3.25
fonttools==4.53.1
frozenlist==1.4.1
fsspec==2024.6.1
g2pkk==0.1.2
greenlet==3.0.3
grpcio==1.65.5
gruut==2.2.3
gruut-ipa==0.13.0
gruut-lang-de==2.0.1
gruut-lang-en==2.0.1
gruut-lang-es==2.0.1
gruut-lang-fr==2.0.2
h11==0.14.0
hangul-romanize==0.1.0
httpcore==1.0.5
httpx==0.27.0
huggingface-hub==0.24.5
humanfriendly==10.0
idna==3.7
inflect==7.3.1
itsdangerous==2.2.0
jamo==0.4.1
jieba==0.42.1
Jinja2==3.1.4
jiter==0.5.0
joblib==1.4.2
jsonlines==1.2.0
jsonpatch==1.33
jsonpath-python==1.0.6
jsonpointer==3.0.0
kiwisolver==1.4.5
langchain==0.1.20
langchain-community==0.0.38
langchain-core==0.1.52
langchain-experimental==0.0.58
langchain-openai==0.1.7
langchain-text-splitters==0.0.2
langchainhub==0.1.15
langcodes==3.4.0
langdetect==1.0.9
langgraph==0.0.37
langsmith==0.1.99
language_data==1.2.0
lazy_loader==0.4
librosa==0.10.0
llvmlite==0.43.0
lxml==5.3.0
marisa-trie==1.2.0
Markdown==3.7
markdown-it-py==3.0.0
MarkupSafe==2.1.5
marshmallow==3.21.3
matplotlib==3.8.4
mdurl==0.1.2
more-itertools==10.4.0
mpmath==1.3.0
msgpack==1.0.8
multidict==6.0.5
murmurhash==1.0.10
musicbrainzngs==0.7.1
mypy-extensions==1.0.0
nest-asyncio==1.6.0
networkx==2.8.8
nltk==3.8.1
noisereduce==3.0.2
num2words==0.5.13
numba==0.60.0
numpy==1.22.0
onnxruntime==1.19.2
openai==1.41.0
ordered-set==4.1.0
orjson==3.10.7
packaging==23.2
pandas==1.5.3
pillow==10.4.0
platformdirs==4.2.2
plyer==2.1.0
pooch==1.8.2
preshed==3.0.9
protobuf==5.27.3
psutil==6.0.0
PyAudio==0.2.14
pycparser==2.22
pydantic==2.8.2
pydantic_core==2.20.1
pydub==0.25.1
Pygments==2.18.0
pylast==5.3.0
pynndescent==0.5.13
pyparsing==3.1.2
pypdf==4.3.1
pypinyin==0.52.0
pypiwin32==223
pyreadline3==3.4.3
pysbd==0.3.4
python-crfsuite==0.9.10
python-dateutil==2.9.0.post0
python-docx==1.1.2
python-environ==0.4.54
python-iso639==2024.4.27
python-magic==0.4.27
python-vlc==3.0.20123
pyttsx3==2.91
pytz==2024.1
pywin32==306
PyYAML==6.0.2
rapidfuzz==3.9.6
redis==5.0.8
regex==2024.7.24
requests==2.32.3
requests-toolbelt==1.0.0
rich==13.7.1
safetensors==0.4.4
scikit-learn==1.5.1
scipy==1.11.4
sentence-transformers==3.0.1
shellingham==1.5.4
simpleaudio==1.0.4
six==1.16.0
smart-open==7.0.4
sniffio==1.3.1
soundfile==0.12.1
soupsieve==2.6
soxr==0.4.0
spacy==3.7.5
spacy-legacy==3.0.12
spacy-loggers==1.0.5
SpeechRecognition==3.10.4
spotipy==2.24.0
SQLAlchemy==2.0.32
srsly==2.4.8
srt==3.5.3
SudachiDict-core==20240716
SudachiPy==0.6.8
sympy==1.13.2
tabulate==0.9.0
tenacity==8.5.0
tensorboard==2.17.1
tensorboard-data-server==0.7.2
thinc==8.2.5
threadpoolctl==3.5.0
tiktoken==0.7.0
tokenizers==0.19.1
torch==2.4.0
torchaudio==2.4.0
tqdm==4.66.5
trainer==0.0.36
transformers==4.44.0
typeguard==4.3.0
typer==0.12.4
types-requests==2.32.0.20240712
typing-inspect==0.9.0
typing_extensions==4.12.2
tzdata==2024.1
tzlocal==5.2
umap-learn==0.5.6
Unidecode==1.3.8
unstructured==0.15.5
unstructured-client==0.25.5
urllib3==2.2.2
vosk==0.3.45
wasabi==1.1.3
weasel==0.4.1
websockets==13.0.1
Werkzeug==3.0.3
wrapt==1.16.0
yarl==1.9.4
speechrecognition
pyaudio
pyttsx3
python-environ
openai
langgraph==0.0.37
langchainhub==0.1.15
langchain_experimental
sentence_transformers
langchain_openai
faiss-cpu
pypdf
langsmith
unstructured
python-docx
python-vlc
plyer
noisereduce
faster-whisper
tk
pillow
pydub
spotipy

File diff suppressed because it is too large Load Diff