created env for specifying listen_model

This commit is contained in:
maglore9900 2024-10-02 13:27:09 -04:00
parent 1ed477c639
commit 9ed115a100
4 changed files with 9 additions and 7 deletions

View File

@ -19,8 +19,8 @@ this is a fun work in progress. if you want to use it and or develop for it be m
Note: Note:
1) this will work with openai or ollama models. you will need to set up the .env for that as well as spotify 1) this will work with openai or ollama models. you will need to set up the .env for that as well as spotify
2) this is designed to pull a custom voice from the alltalk project https://github.com/erew123/alltalk_tts, that is how I am cloning max headroom's voice. You can alter or simply not use this, it will currently fallback to pyttsx3. 2) this is designed to pull a custom voice from the alltalk project https://github.com/erew123/alltalk_tts, that is how I am cloning max headroom's voice. You can alter or simply not use this, it will currently fallback to pyttsx3 aka a robot voice.
3) speech-to-text can use google, vosk, or faster-whisper. faster-whisper is currently the default and optimal method. 3) speech-to-text can use google, or faster-whisper. faster-whisper is currently the default and optimal method.
Will move more variables to the .env folders soon. Will move more variables to the .env folders soon.

View File

@ -6,9 +6,11 @@ OPENAI_MODEL='gpt-4o-mini'
OLLAMA_MODEL='gemma2' OLLAMA_MODEL='gemma2'
OLLAMA_URL='http://localhost:11434' OLLAMA_URL='http://localhost:11434'
CHARACTER='max' CHARACTER='max'
spotify_client_id = '' #LISTEN_MODEL will take whisper or google, whisper is the default option and best for most cases
spotify_client_secret = '' LISTEN_MODEL='whisper'
spotify_redirect_uri = 'http://localhost:8888/callback' SPOTIFY_CLIENT_ID = ''
SPOTIFY_CLIENT_SECRET = ''
SPOTIFY_REDIRECT_URI = 'http://localhost:8888/callback'

View File

@ -17,7 +17,7 @@ class Agent:
self.ap = app_launcher.AppLauncher() self.ap = app_launcher.AppLauncher()
self.wf = windows_focus.WindowFocusManager() self.wf = windows_focus.WindowFocusManager()
self.llm = self.ad.llm_chat self.llm = self.ad.llm_chat
self.spk = speak.Speak(model="whisper") self.spk = speak.Speak(model=env("LISTEN_MODEL"))
self.prompt = hub.pull("hwchase17/openai-functions-agent") self.prompt = hub.pull("hwchase17/openai-functions-agent")
self.char = env("CHARACTER").lower() self.char = env("CHARACTER").lower()
self.char_prompt = getattr(prompts, self.char, "You are a helpful assistant. User Query: {query}") self.char_prompt = getattr(prompts, self.char, "You are a helpful assistant. User Query: {query}")

View File

@ -17,7 +17,7 @@ class Speak:
self.microphone = sr.Microphone() self.microphone = sr.Microphone()
self.engine = pyttsx3.init() self.engine = pyttsx3.init()
self.engine.setProperty('rate', 150) self.engine.setProperty('rate', 150)
self.model_name = model self.model_name = model.lower()
self.sample_rate = 16000 self.sample_rate = 16000
self.chunk_size = 1024 self.chunk_size = 1024
self.noise_threshold = 500 self.noise_threshold = 500