From 4f79a3f01aeef632e8d2cb03141e30e3ca2e6947 Mon Sep 17 00:00:00 2001
From: maglore9900 <maglore46@gmail.com>
Date: Wed, 2 Oct 2024 13:50:51 -0400
Subject: [PATCH] expanded readme, updates to agent and speak for customization

---
 README.md        | 36 +++++++++++++++++++++++++++++++++++-
 example_env.txt  | 19 +++++++++++--------
 modules/agent.py |  2 +-
 modules/speak.py |  7 ++++---
 4 files changed, 51 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index 18f813f..35ebf8d 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,8 @@ this is a personal project to create a voice directed digital assistant based on
 
 ![alt text](https://www.cartoonbrew.com/wp-content/uploads/2013/05/maxheadroom_main-1280x600.jpg)
 
+#SUMMARY
+
 written in python, using langchain, langgraph, etc.
 
 written to work on Windows. Agent and logic will run on linux but tools are currently windows only.
@@ -26,7 +28,7 @@ Will move more variables to the .env folders soon.
 
 .env is under the module folder for now
 
-INSTALLATION
+#INSTALLATION
 
 so basically the steps are pretty simple
 
@@ -38,3 +40,35 @@ so basically the steps are pretty simple
 - then copy example_env.txt to `.env`
 - open that, and put in your info, like openai key or ollama or whatever
 - then run `python main.py` to start the whole thing up
+
+
+
+#TOOLS
+
+##Spotify
+
+you will need get your spotify credentials in order to have Max control your spotify software.
+
+you can find information on getting that information here: https://developer.spotify.com/documentation/web-api/concepts/apps
+
+max can take the following commands: play, pause, stop, next, previous, favorite
+
+*note: you can say really any words that are similiar, max will attempt to read your intent and use the right command
+
+##Window Focus
+
+this tool brings the focus of whatever app you name to the front, it will not open an app
+
+*note: only works on windows
+
+##Open App
+
+this tool will open an application. when you run max it will create an index of the apps installed on your system
+
+*note: only works on windows
+
+##Timer
+
+this tool will set a timer with a popup. you tell max to set a time for X time, it will convert it to seconds on the backend and create the timer.
+
+the default timer will have a "clippy" popup, with potentially custom text
diff --git a/example_env.txt b/example_env.txt
index c9f2799..637c4b0 100644
--- a/example_env.txt
+++ b/example_env.txt
@@ -1,13 +1,16 @@
-OPENAI_API_KEY=''
-#LLM_TYPE will take openai, local, or hybrid
-LLM_TYPE='openai' 
-OPENAI_MODEL='gpt-4o-mini'
+OPENAI_API_KEY = ''
+#LLM_TYPE will take openai, local
+LLM_TYPE = 'openai' 
+OPENAI_MODEL = 'gpt-4o-mini'
 #OLLAMA_MODEL will take any model you can load in ollama
-OLLAMA_MODEL='gemma2' 
-OLLAMA_URL='http://localhost:11434' 
-CHARACTER='max' 
+OLLAMA_MODEL = 'gemma2' 
+OLLAMA_URL = 'http://localhost:11434' 
+#CHARACTER will take any character prompt you have in the modules/prompts.py file. 'max' or 'none' are the default options
+CHARACTER = 'max'
 #LISTEN_MODEL will take whisper or google, whisper is the default option and best for most cases
-LISTEN_MODEL='whisper' 
+LISTEN_MODEL='whisper'
+#STREAM SPEAK URL is using the default url for Alltalk. If you dont have all talk you can ignore this, if you want to use a different service, simply replace the url
+STREAM_SPEAK_URL = 'http://127.0.0.1:7851/api/tts-generate'
 SPOTIFY_CLIENT_ID = ''
 SPOTIFY_CLIENT_SECRET = ''
 SPOTIFY_REDIRECT_URI = 'http://localhost:8888/callback'
diff --git a/modules/agent.py b/modules/agent.py
index de2997b..3796bcf 100644
--- a/modules/agent.py
+++ b/modules/agent.py
@@ -17,7 +17,7 @@ class Agent:
         self.ap = app_launcher.AppLauncher()
         self.wf = windows_focus.WindowFocusManager()
         self.llm = self.ad.llm_chat
-        self.spk = speak.Speak(model=env("LISTEN_MODEL"))
+        self.spk = speak.Speak(env)
         self.prompt = hub.pull("hwchase17/openai-functions-agent")
         self.char = env("CHARACTER").lower()
         self.char_prompt = getattr(prompts, self.char, "You are a helpful assistant. User Query: {query}")
diff --git a/modules/speak.py b/modules/speak.py
index f920317..b8e1888 100644
--- a/modules/speak.py
+++ b/modules/speak.py
@@ -12,16 +12,17 @@ from pydub import AudioSegment
 os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
 
 class Speak:
-    def __init__(self, model="whisper"):
-        self.url = "http://127.0.0.1:7851/api/tts-generate"
+    def __init__(self, env):
+        self.url = env("STREAM_SPEAK_URL")
         self.microphone = sr.Microphone()
         self.engine = pyttsx3.init()
         self.engine.setProperty('rate', 150)
-        self.model_name = model.lower()
+        self.model_name = env("LISTEN_MODEL".lower(), default="whisper")
         self.sample_rate = 16000
         self.chunk_size = 1024
         self.noise_threshold = 500
         
+        
         # Initialize transcription models
         if self.model_name == "whisper":
             from faster_whisper import WhisperModel