expanded readme, updates to agent and speak for customization

2025-06-06 19:45:31 +00:00 · 2024-10-02 13:50:51 -04:00 · 2024-10-02 13:50:51 -04:00 · 4f79a3f01a
commit 4f79a3f01a
parent 9ed115a100
4 changed files with 51 additions and 13 deletions
--- a/README.md
+++ b/README.md
@ -2,6 +2,8 @@ this is a personal project to create a voice directed digital assistant based on
 ![alt text](https://www.cartoonbrew.com/wp-content/uploads/2013/05/maxheadroom_main-1280x600.jpg)
 #SUMMARY
 written in python, using langchain, langgraph, etc.
 written to work on Windows. Agent and logic will run on linux but tools are currently windows only.
@ -26,7 +28,7 @@ Will move more variables to the .env folders soon.
 .env is under the module folder for now
-INSTALLATION
+#INSTALLATION
 so basically the steps are pretty simple
@ -38,3 +40,35 @@ so basically the steps are pretty simple
 - then copy example_env.txt to `.env`
 - open that, and put in your info, like openai key or ollama or whatever
 - then run `python main.py` to start the whole thing up
 #TOOLS
 ##Spotify
 you will need get your spotify credentials in order to have Max control your spotify software.
 you can find information on getting that information here: https://developer.spotify.com/documentation/web-api/concepts/apps
 max can take the following commands: play, pause, stop, next, previous, favorite
 *note: you can say really any words that are similiar, max will attempt to read your intent and use the right command
 ##Window Focus
 this tool brings the focus of whatever app you name to the front, it will not open an app
 *note: only works on windows
 ##Open App
 this tool will open an application. when you run max it will create an index of the apps installed on your system
 *note: only works on windows
 ##Timer
 this tool will set a timer with a popup. you tell max to set a time for X time, it will convert it to seconds on the backend and create the timer.
 the default timer will have a "clippy" popup, with potentially custom text
--- a/example_env.txt
+++ b/example_env.txt
@ -1,13 +1,16 @@
-OPENAI_API_KEY=''
+OPENAI_API_KEY = ''
-#LLM_TYPE will take openai, local, or hybrid
+#LLM_TYPE will take openai, local
-LLM_TYPE='openai' 
+LLM_TYPE = 'openai' 
-OPENAI_MODEL='gpt-4o-mini'
+OPENAI_MODEL = 'gpt-4o-mini'
 #OLLAMA_MODEL will take any model you can load in ollama
-OLLAMA_MODEL='gemma2' 
+OLLAMA_MODEL = 'gemma2' 
-OLLAMA_URL='http://localhost:11434' 
+OLLAMA_URL = 'http://localhost:11434' 
-CHARACTER='max' 
+#CHARACTER will take any character prompt you have in the modules/prompts.py file. 'max' or 'none' are the default options
 CHARACTER = 'max'
 #LISTEN_MODEL will take whisper or google, whisper is the default option and best for most cases
 LISTEN_MODEL='whisper'
 #STREAM SPEAK URL is using the default url for Alltalk. If you dont have all talk you can ignore this, if you want to use a different service, simply replace the url
 STREAM_SPEAK_URL = 'http://127.0.0.1:7851/api/tts-generate'
 SPOTIFY_CLIENT_ID = ''
 SPOTIFY_CLIENT_SECRET = ''
 SPOTIFY_REDIRECT_URI = 'http://localhost:8888/callback'
--- a/modules/agent.py
+++ b/modules/agent.py
@ -17,7 +17,7 @@ class Agent:
        self.ap = app_launcher.AppLauncher()
        self.wf = windows_focus.WindowFocusManager()
        self.llm = self.ad.llm_chat
-        self.spk = speak.Speak(model=env("LISTEN_MODEL"))
+        self.spk = speak.Speak(env)
        self.prompt = hub.pull("hwchase17/openai-functions-agent")
        self.char = env("CHARACTER").lower()
        self.char_prompt = getattr(prompts, self.char, "You are a helpful assistant. User Query: {query}")
--- a/modules/speak.py
+++ b/modules/speak.py
@ -12,16 +12,17 @@ from pydub import AudioSegment
 os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
 class Speak:
-    def __init__(self, model="whisper"):
+    def __init__(self, env):
-        self.url = "http://127.0.0.1:7851/api/tts-generate"
+        self.url = env("STREAM_SPEAK_URL")
        self.microphone = sr.Microphone()
        self.engine = pyttsx3.init()
        self.engine.setProperty('rate', 150)
-        self.model_name = model.lower()
+        self.model_name = env("LISTEN_MODEL".lower(), default="whisper")
        self.sample_rate = 16000
        self.chunk_size = 1024
        self.noise_threshold = 500
        # Initialize transcription models
        if self.model_name == "whisper":
            from faster_whisper import WhisperModel