From 4f79a3f01aeef632e8d2cb03141e30e3ca2e6947 Mon Sep 17 00:00:00 2001 From: maglore9900 Date: Wed, 2 Oct 2024 13:50:51 -0400 Subject: [PATCH] expanded readme, updates to agent and speak for customization --- README.md | 36 +++++++++++++++++++++++++++++++++++- example_env.txt | 19 +++++++++++-------- modules/agent.py | 2 +- modules/speak.py | 7 ++++--- 4 files changed, 51 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 18f813f..35ebf8d 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ this is a personal project to create a voice directed digital assistant based on ![alt text](https://www.cartoonbrew.com/wp-content/uploads/2013/05/maxheadroom_main-1280x600.jpg) +#SUMMARY + written in python, using langchain, langgraph, etc. written to work on Windows. Agent and logic will run on linux but tools are currently windows only. @@ -26,7 +28,7 @@ Will move more variables to the .env folders soon. .env is under the module folder for now -INSTALLATION +#INSTALLATION so basically the steps are pretty simple @@ -38,3 +40,35 @@ so basically the steps are pretty simple - then copy example_env.txt to `.env` - open that, and put in your info, like openai key or ollama or whatever - then run `python main.py` to start the whole thing up + + + +#TOOLS + +##Spotify + +you will need get your spotify credentials in order to have Max control your spotify software. + +you can find information on getting that information here: https://developer.spotify.com/documentation/web-api/concepts/apps + +max can take the following commands: play, pause, stop, next, previous, favorite + +*note: you can say really any words that are similiar, max will attempt to read your intent and use the right command + +##Window Focus + +this tool brings the focus of whatever app you name to the front, it will not open an app + +*note: only works on windows + +##Open App + +this tool will open an application. when you run max it will create an index of the apps installed on your system + +*note: only works on windows + +##Timer + +this tool will set a timer with a popup. you tell max to set a time for X time, it will convert it to seconds on the backend and create the timer. + +the default timer will have a "clippy" popup, with potentially custom text diff --git a/example_env.txt b/example_env.txt index c9f2799..637c4b0 100644 --- a/example_env.txt +++ b/example_env.txt @@ -1,13 +1,16 @@ -OPENAI_API_KEY='' -#LLM_TYPE will take openai, local, or hybrid -LLM_TYPE='openai' -OPENAI_MODEL='gpt-4o-mini' +OPENAI_API_KEY = '' +#LLM_TYPE will take openai, local +LLM_TYPE = 'openai' +OPENAI_MODEL = 'gpt-4o-mini' #OLLAMA_MODEL will take any model you can load in ollama -OLLAMA_MODEL='gemma2' -OLLAMA_URL='http://localhost:11434' -CHARACTER='max' +OLLAMA_MODEL = 'gemma2' +OLLAMA_URL = 'http://localhost:11434' +#CHARACTER will take any character prompt you have in the modules/prompts.py file. 'max' or 'none' are the default options +CHARACTER = 'max' #LISTEN_MODEL will take whisper or google, whisper is the default option and best for most cases -LISTEN_MODEL='whisper' +LISTEN_MODEL='whisper' +#STREAM SPEAK URL is using the default url for Alltalk. If you dont have all talk you can ignore this, if you want to use a different service, simply replace the url +STREAM_SPEAK_URL = 'http://127.0.0.1:7851/api/tts-generate' SPOTIFY_CLIENT_ID = '' SPOTIFY_CLIENT_SECRET = '' SPOTIFY_REDIRECT_URI = 'http://localhost:8888/callback' diff --git a/modules/agent.py b/modules/agent.py index de2997b..3796bcf 100644 --- a/modules/agent.py +++ b/modules/agent.py @@ -17,7 +17,7 @@ class Agent: self.ap = app_launcher.AppLauncher() self.wf = windows_focus.WindowFocusManager() self.llm = self.ad.llm_chat - self.spk = speak.Speak(model=env("LISTEN_MODEL")) + self.spk = speak.Speak(env) self.prompt = hub.pull("hwchase17/openai-functions-agent") self.char = env("CHARACTER").lower() self.char_prompt = getattr(prompts, self.char, "You are a helpful assistant. User Query: {query}") diff --git a/modules/speak.py b/modules/speak.py index f920317..b8e1888 100644 --- a/modules/speak.py +++ b/modules/speak.py @@ -12,16 +12,17 @@ from pydub import AudioSegment os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" class Speak: - def __init__(self, model="whisper"): - self.url = "http://127.0.0.1:7851/api/tts-generate" + def __init__(self, env): + self.url = env("STREAM_SPEAK_URL") self.microphone = sr.Microphone() self.engine = pyttsx3.init() self.engine.setProperty('rate', 150) - self.model_name = model.lower() + self.model_name = env("LISTEN_MODEL".lower(), default="whisper") self.sample_rate = 16000 self.chunk_size = 1024 self.noise_threshold = 500 + # Initialize transcription models if self.model_name == "whisper": from faster_whisper import WhisperModel