diff --git a/README.md b/README.md index 0c4ebfd..18f813f 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,8 @@ this is a fun work in progress. if you want to use it and or develop for it be m Note: 1) this will work with openai or ollama models. you will need to set up the .env for that as well as spotify -2) this is designed to pull a custom voice from the alltalk project https://github.com/erew123/alltalk_tts, that is how I am cloning max headroom's voice. You can alter or simply not use this, it will currently fallback to pyttsx3. -3) speech-to-text can use google, vosk, or faster-whisper. faster-whisper is currently the default and optimal method. +2) this is designed to pull a custom voice from the alltalk project https://github.com/erew123/alltalk_tts, that is how I am cloning max headroom's voice. You can alter or simply not use this, it will currently fallback to pyttsx3 aka a robot voice. +3) speech-to-text can use google, or faster-whisper. faster-whisper is currently the default and optimal method. Will move more variables to the .env folders soon. diff --git a/example_env.txt b/example_env.txt index 1cb5d2f..c9f2799 100644 --- a/example_env.txt +++ b/example_env.txt @@ -6,9 +6,11 @@ OPENAI_MODEL='gpt-4o-mini' OLLAMA_MODEL='gemma2' OLLAMA_URL='http://localhost:11434' CHARACTER='max' -spotify_client_id = '' -spotify_client_secret = '' -spotify_redirect_uri = 'http://localhost:8888/callback' +#LISTEN_MODEL will take whisper or google, whisper is the default option and best for most cases +LISTEN_MODEL='whisper' +SPOTIFY_CLIENT_ID = '' +SPOTIFY_CLIENT_SECRET = '' +SPOTIFY_REDIRECT_URI = 'http://localhost:8888/callback' diff --git a/modules/agent.py b/modules/agent.py index e3dfb13..de2997b 100644 --- a/modules/agent.py +++ b/modules/agent.py @@ -17,7 +17,7 @@ class Agent: self.ap = app_launcher.AppLauncher() self.wf = windows_focus.WindowFocusManager() self.llm = self.ad.llm_chat - self.spk = speak.Speak(model="whisper") + self.spk = speak.Speak(model=env("LISTEN_MODEL")) self.prompt = hub.pull("hwchase17/openai-functions-agent") self.char = env("CHARACTER").lower() self.char_prompt = getattr(prompts, self.char, "You are a helpful assistant. User Query: {query}") diff --git a/modules/speak.py b/modules/speak.py index 41448c6..f920317 100644 --- a/modules/speak.py +++ b/modules/speak.py @@ -17,7 +17,7 @@ class Speak: self.microphone = sr.Microphone() self.engine = pyttsx3.init() self.engine.setProperty('rate', 150) - self.model_name = model + self.model_name = model.lower() self.sample_rate = 16000 self.chunk_size = 1024 self.noise_threshold = 500