diff --git a/main.py b/main.py index 5aa913d..dba32c4 100644 --- a/main.py +++ b/main.py @@ -6,14 +6,14 @@ asyncio.set_event_loop(loop) sp = speak.Speak() -graph = agent.Agent("openai") +graph = agent.Agent() while True: + print("Listening...") text = sp.listen() if text and "max" in text.lower(): response = loop.run_until_complete(graph.invoke_agent(text)) if response: sp.glitch_stream_output(response) - print("Listening again...") \ No newline at end of file diff --git a/modules/adapter.py b/modules/adapter.py index 74af9c4..bede04c 100644 --- a/modules/adapter.py +++ b/modules/adapter.py @@ -5,8 +5,8 @@ env = environ.Env() environ.Env.read_env() class Adapter: - def __init__(self, llm_type): - self.llm_text = llm_type + def __init__(self): + self.llm_text = env("LLM_TYPE") if self.llm_text.lower() == "openai": from langchain_openai import OpenAIEmbeddings, OpenAI from langchain_openai import ChatOpenAI @@ -15,16 +15,15 @@ class Adapter: "answer the following request: {topic}" ) self.llm_chat = ChatOpenAI( - temperature=0.3, model="gpt-4o-mini", openai_api_key=env("OPENAI_API_KEY") + temperature=0.3, model=env("OPENAI_MODEL"), openai_api_key=env("OPENAI_API_KEY") ) self.embedding = OpenAIEmbeddings(model="text-embedding-ada-002") elif self.llm_text.lower() == "local": from langchain_community.llms import Ollama from langchain_community.embeddings import HuggingFaceBgeEmbeddings from langchain_community.chat_models import ChatOllama - llm_model = "llama3" - # llm_model = "notus" - self.llm = Ollama(base_url="http://10.0.0.231:11434", model=llm_model) + llm_model = env("OLLAMA_MODEL") + self.llm = Ollama(base_url=env("OLLAMA_URL"), model=llm_model) self.prompt = ChatPromptTemplate.from_template( "answer the following request: {topic}" ) diff --git a/modules/agent.py b/modules/agent.py index 7e41279..1bf5cef 100644 --- a/modules/agent.py +++ b/modules/agent.py @@ -1,5 +1,4 @@ from typing import TypedDict, Annotated, List, Union -import json import operator from modules import adapter, spotify from langchain_core.agents import AgentAction, AgentFinish @@ -13,8 +12,8 @@ import asyncio class Agent: - def __init__(self, model): - self.ad = adapter.Adapter(model) + def __init__(self): + self.ad = adapter.Adapter() self.sp = spotify.Spotify() self.llm = self.ad.llm_chat # self.final_answer_llm = self.llm.bind_tools( diff --git a/modules/example env b/modules/example env new file mode 100644 index 0000000..6345784 --- /dev/null +++ b/modules/example env @@ -0,0 +1,15 @@ +OPENAI_API_KEY='' +#LLM_TYPE will take openai, local, or hybrid +LLM_TYPE='openai' +OPENAI_MODEL='gpt-4o-mini' +#OLLAMA_MODEL will take any model you can load in ollama +OLLAMA_MODEL='llama' +OLLAMA_URL='http://10.0.0.231:11434' +spotify_client_id = '' +spotify_client_secret = '' +spotify_redirect_uri = 'http://localhost:8888/callback' + + + + + diff --git a/modules/speak.py b/modules/speak.py index 90323e8..5d1067e 100644 --- a/modules/speak.py +++ b/modules/speak.py @@ -56,20 +56,21 @@ class Speak: def listen(self): with self.microphone as source: - # Adjust for ambient noise + #! Adjust for ambient noise self.recognizer.adjust_for_ambient_noise(source, duration=1) - - print("Listening...") - audio = self.recognizer.listen(source) - + #! added 5 second timeout so ambient noise detection can compensate for music that started playing + audio = self.recognizer.listen(source, timeout=5) try: + text = self.recognizer.recognize_google(audio) print("You said: ", text) return text - except sr.UnknownValueError: - print("Sorry, I didn't get that.") - except sr.RequestError as e: - print("Sorry, I couldn't request results; {0}".format(e)) + except: + pass + # except sr.UnknownValueError: + # print("Sorry, I didn't get that.") + # except sr.RequestError as e: + # print("Sorry, I couldn't request results; {0}".format(e)) def stream_output(self, text): import urllib.parse @@ -152,7 +153,7 @@ class Speak: # Randomly adjust pitch # octaves = random.uniform(-0.5, 0.5) - octaves = random.uniform(-1, 1) + octaves = random.uniform(-0.5, 1) modified_chunk = change_pitch(audio_segment, octaves) if stream is None: @@ -162,7 +163,7 @@ class Speak: rate=modified_chunk.frame_rate, output=True) - if random.random() < 0.01: # 1% chance to trigger stutter + if random.random() < 0.001: # 1% chance to trigger stutter repeat_times = random.randint(2, 5) # Repeat 2 to 5 times for _ in range(repeat_times): stream.write(modified_chunk.raw_data)