created env for specifying listen_model

This commit is contained in:
maglore9900 2024-10-02 13:27:09 -04:00
parent 1ed477c639
commit 9ed115a100
4 changed files with 9 additions and 7 deletions

View File

@ -19,8 +19,8 @@ this is a fun work in progress. if you want to use it and or develop for it be m
Note:
1) this will work with openai or ollama models. you will need to set up the .env for that as well as spotify
2) this is designed to pull a custom voice from the alltalk project https://github.com/erew123/alltalk_tts, that is how I am cloning max headroom's voice. You can alter or simply not use this, it will currently fallback to pyttsx3.
3) speech-to-text can use google, vosk, or faster-whisper. faster-whisper is currently the default and optimal method.
2) this is designed to pull a custom voice from the alltalk project https://github.com/erew123/alltalk_tts, that is how I am cloning max headroom's voice. You can alter or simply not use this, it will currently fallback to pyttsx3 aka a robot voice.
3) speech-to-text can use google, or faster-whisper. faster-whisper is currently the default and optimal method.
Will move more variables to the .env folders soon.

View File

@ -6,9 +6,11 @@ OPENAI_MODEL='gpt-4o-mini'
OLLAMA_MODEL='gemma2'
OLLAMA_URL='http://localhost:11434'
CHARACTER='max'
spotify_client_id = ''
spotify_client_secret = ''
spotify_redirect_uri = 'http://localhost:8888/callback'
#LISTEN_MODEL will take whisper or google, whisper is the default option and best for most cases
LISTEN_MODEL='whisper'
SPOTIFY_CLIENT_ID = ''
SPOTIFY_CLIENT_SECRET = ''
SPOTIFY_REDIRECT_URI = 'http://localhost:8888/callback'

View File

@ -17,7 +17,7 @@ class Agent:
self.ap = app_launcher.AppLauncher()
self.wf = windows_focus.WindowFocusManager()
self.llm = self.ad.llm_chat
self.spk = speak.Speak(model="whisper")
self.spk = speak.Speak(model=env("LISTEN_MODEL"))
self.prompt = hub.pull("hwchase17/openai-functions-agent")
self.char = env("CHARACTER").lower()
self.char_prompt = getattr(prompts, self.char, "You are a helpful assistant. User Query: {query}")

View File

@ -17,7 +17,7 @@ class Speak:
self.microphone = sr.Microphone()
self.engine = pyttsx3.init()
self.engine.setProperty('rate', 150)
self.model_name = model
self.model_name = model.lower()
self.sample_rate = 16000
self.chunk_size = 1024
self.noise_threshold = 500