created env for specifying listen_model

2025-06-06 03:25:34 +00:00 · 2024-10-02 13:27:09 -04:00 · 2024-10-02 13:27:09 -04:00 · 9ed115a100
commit 9ed115a100
parent 1ed477c639
4 changed files with 9 additions and 7 deletions
--- a/README.md
+++ b/README.md
@ -19,8 +19,8 @@ this is a fun work in progress. if you want to use it and or develop for it be m
 Note:

 1) this will work with openai or ollama models. you will need to set up the .env for that as well as spotify
-2) this is designed to pull a custom voice from the alltalk project https://github.com/erew123/alltalk_tts, that is how I am cloning max headroom's voice. You can alter or simply not use this, it will currently fallback to pyttsx3.
-3) speech-to-text can use google, vosk, or faster-whisper. faster-whisper is currently the default and optimal method.
+2) this is designed to pull a custom voice from the alltalk project https://github.com/erew123/alltalk_tts, that is how I am cloning max headroom's voice. You can alter or simply not use this, it will currently fallback to pyttsx3 aka a robot voice.
+3) speech-to-text can use google, or faster-whisper. faster-whisper is currently the default and optimal method.

 Will move more variables to the .env folders soon.

--- a/example_env.txt
+++ b/example_env.txt
@ -6,9 +6,11 @@ OPENAI_MODEL='gpt-4o-mini'
 OLLAMA_MODEL='gemma2' 
 OLLAMA_URL='http://localhost:11434' 
 CHARACTER='max' 
-spotify_client_id = ''
-spotify_client_secret = ''
-spotify_redirect_uri = 'http://localhost:8888/callback'
+#LISTEN_MODEL will take whisper or google, whisper is the default option and best for most cases
+LISTEN_MODEL='whisper' 
+SPOTIFY_CLIENT_ID = ''
+SPOTIFY_CLIENT_SECRET = ''
+SPOTIFY_REDIRECT_URI = 'http://localhost:8888/callback'



--- a/modules/agent.py
+++ b/modules/agent.py
@ -17,7 +17,7 @@ class Agent:
        self.ap = app_launcher.AppLauncher()
        self.wf = windows_focus.WindowFocusManager()
        self.llm = self.ad.llm_chat
-        self.spk = speak.Speak(model="whisper")
+        self.spk = speak.Speak(model=env("LISTEN_MODEL"))
        self.prompt = hub.pull("hwchase17/openai-functions-agent")
        self.char = env("CHARACTER").lower()
        self.char_prompt = getattr(prompts, self.char, "You are a helpful assistant. User Query: {query}")
--- a/modules/speak.py
+++ b/modules/speak.py
@ -17,7 +17,7 @@ class Speak:
        self.microphone = sr.Microphone()
        self.engine = pyttsx3.init()
        self.engine.setProperty('rate', 150)
-        self.model_name = model
+        self.model_name = model.lower()
        self.sample_rate = 16000
        self.chunk_size = 1024
        self.noise_threshold = 500