mirror of
https://github.com/tcsenpai/agenticSeek.git
synced 2025-06-05 02:25:27 +00:00
merge
This commit is contained in:
commit
aac639c001
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,6 +1,9 @@
|
||||
*.wav
|
||||
config.ini
|
||||
experimental/
|
||||
conversations/
|
||||
.env
|
||||
*/.env
|
||||
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
|
65
README.md
65
README.md
@ -1,7 +1,7 @@
|
||||
|
||||
# 🚀 agenticSeek: Local AI Assistant Powered by DeepSeek Agents
|
||||
# AgenticSeek: Fully local AI Assistant Powered by Deepseek R1 Agents.
|
||||
|
||||
**A fully local AI assistant** using Deepseek R1 agents.
|
||||
**A fully local AI assistant** using AI agents. The goal of the project is to create a truly Jarvis like assistant using reasoning model such as deepseek R1.
|
||||
|
||||
> 🛠️ **Work in Progress** – Looking for contributors! 🚀
|
||||
---
|
||||
@ -11,10 +11,11 @@
|
||||
- **Privacy-first**: Runs 100% locally – **no data leaves your machine**
|
||||
- ️ **Voice-enabled**: Speak and interact naturally
|
||||
- **Coding abilities**: Code in Python, Bash, C, Golang, and soon more
|
||||
- **Self-correcting**: Automatically fixes errors by itself
|
||||
- **Trial-and-error**: Automatically fixes code or command upon execution failure
|
||||
- **Agent routing**: Select the best agent for the task
|
||||
- **Multi-agent**: For complex tasks, divide and conquer with multiple agents
|
||||
- **Web browsing (not implemented yet)**: Browse the web and search the internet
|
||||
- **Tools:**: All agents have their respective tools ability. Basic search, flight API, files explorer, etc...
|
||||
- **Web browsing (not implemented yet)**: Browse the web autonomously to conduct task.
|
||||
|
||||
---
|
||||
|
||||
@ -26,13 +27,14 @@
|
||||
## Installation
|
||||
|
||||
### 1️⃣ **Install Dependencies**
|
||||
Make sure you have [Ollama](https://ollama.com/) installed, then run:
|
||||
```sh
|
||||
pip3 install -r requirements.txt
|
||||
```
|
||||
|
||||
### 2️⃣ **Download Models**
|
||||
|
||||
Make sure you have [Ollama](https://ollama.com/) installed.
|
||||
|
||||
Download the `deepseek-r1:7b` model from [DeepSeek](https://deepseek.com/models)
|
||||
|
||||
```sh
|
||||
@ -61,20 +63,24 @@ Run the assistant:
|
||||
python3 main.py
|
||||
```
|
||||
|
||||
### 4️⃣ **Alternative: Run the Assistant (Own Server)**
|
||||
### 4️⃣ **Alternative: Run the LLM on your own server**
|
||||
|
||||
On the other machine that will run the model execute the script in stream_llm.py
|
||||
|
||||
On your "server" that will run the AI model, get the ip address
|
||||
|
||||
```sh
|
||||
ip a | grep "inet " | grep -v 127.0.0.1 | awk '{print $2}' | cut -d/ -f1
|
||||
```
|
||||
|
||||
Clone the repository and then, run the script `stream_llm.py` in `server/`
|
||||
|
||||
```sh
|
||||
python3 stream_llm.py
|
||||
```
|
||||
|
||||
Get the ip address of the machine that will run the model
|
||||
Now on your personal computer:
|
||||
|
||||
```sh
|
||||
ip a | grep "inet " | grep -v 127.0.0.1 | awk '{print $2}' | cut -d/ -f1
|
||||
```
|
||||
Clone the repository.
|
||||
|
||||
Change the `config.ini` file to set the `provider_name` to `server` and `provider_model` to `deepseek-r1:7b`.
|
||||
Set the `provider_server_address` to the ip address of the machine that will run the model.
|
||||
@ -93,21 +99,40 @@ Run the assistant:
|
||||
python3 main.py
|
||||
```
|
||||
|
||||
## Provider
|
||||
|
||||
Currently the only provider are :
|
||||
- ollama -> Use ollama running on your computer. Ollama program for running locally large language models.
|
||||
- server -> A custom script that allow you to have the LLM model run on another machine. Currently it use ollama but we'll switch to other options soon.
|
||||
- openai -> Use ChatGPT API (not private).
|
||||
- deepseek -> Deepseek API (not private).
|
||||
|
||||
To select a provider change the config.ini:
|
||||
|
||||
```
|
||||
is_local = False
|
||||
provider_name = openai
|
||||
provider_model = gpt-4o
|
||||
provider_server_address = 127.0.0.1:5000
|
||||
```
|
||||
is_local: should be True for any locally running LLM, otherwise False.
|
||||
|
||||
provider_name: Select the provider to use by its name, see the provider list above.
|
||||
|
||||
provider_model: Set the model to use by the agent.
|
||||
|
||||
provider_server_address: can be set to anything if you are not using the server provider.
|
||||
|
||||
|
||||
## Current capabilities
|
||||
|
||||
- All running locally
|
||||
- Reasoning with deepseek R1
|
||||
- Code execution capabilities (Python, Golang, C)
|
||||
- Code execution capabilities (Python, Golang, C, etc..)
|
||||
- Shell control capabilities in bash
|
||||
- Will try to fix errors by itself
|
||||
- Routing system, select the best agent for the task
|
||||
- Fast text-to-speech using kokoro.
|
||||
- Speech to text.
|
||||
- Memory compression (reduce history as interaction progresses using summary model)
|
||||
- Recovery: recover last session from memory
|
||||
|
||||
## UNDER DEVELOPMENT
|
||||
|
||||
- Web browsing
|
||||
- Knowledge base RAG
|
||||
- Graphical interface
|
||||
- Speech-to-text using distil-whisper/distil-medium.en
|
||||
- Recovery: recover and save session from filesystem.
|
||||
|
14
config.ini
14
config.ini
@ -1,8 +1,10 @@
|
||||
[MAIN]
|
||||
is_local = True
|
||||
provider_name = ollama
|
||||
provider_model = deepseek-r1:7b
|
||||
is_local = False
|
||||
provider_name = server
|
||||
provider_model = deepseek-r1:14b
|
||||
provider_server_address = 127.0.0.1:5000
|
||||
agent_name = jarvis
|
||||
recover_last_session = False
|
||||
speak = True
|
||||
agent_name = Friday
|
||||
recover_last_session = True
|
||||
save_session = True
|
||||
speak = True
|
||||
listen = False
|
39
main.py
39
main.py
@ -7,12 +7,11 @@ import configparser
|
||||
|
||||
from sources.llm_provider import Provider
|
||||
from sources.interaction import Interaction
|
||||
from sources.code_agent import CoderAgent
|
||||
|
||||
from sources.agents import Agent, CoderAgent, CasualAgent
|
||||
|
||||
parser = argparse.ArgumentParser(description='Deepseek AI assistant')
|
||||
parser.add_argument('--speak', action='store_true',
|
||||
help='Make AI use text-to-speech')
|
||||
parser.add_argument('--no-speak', action='store_true',
|
||||
help='Make AI not use text-to-speech')
|
||||
args = parser.parse_args()
|
||||
|
||||
config = configparser.ConfigParser()
|
||||
@ -31,17 +30,33 @@ def main():
|
||||
model=config["MAIN"]["provider_model"],
|
||||
server_address=config["MAIN"]["provider_server_address"])
|
||||
|
||||
agent = CoderAgent(model=config["MAIN"]["provider_model"],
|
||||
name=config["MAIN"]["agent_name"],
|
||||
agents = [
|
||||
CoderAgent(model=config["MAIN"]["provider_model"],
|
||||
name="coder",
|
||||
prompt_path="prompts/coder_agent.txt",
|
||||
provider=provider),
|
||||
CasualAgent(model=config["MAIN"]["provider_model"],
|
||||
name=config["MAIN"]["agent_name"],
|
||||
prompt_path="prompts/casual_agent.txt",
|
||||
provider=provider)
|
||||
]
|
||||
|
||||
interaction = Interaction(agents, tts_enabled=config.getboolean('MAIN', 'speak'),
|
||||
stt_enabled=config.getboolean('MAIN', 'listen'),
|
||||
recover_last_session=config.getboolean('MAIN', 'recover_last_session'))
|
||||
try:
|
||||
while interaction.is_active:
|
||||
interaction.get_user()
|
||||
interaction.think()
|
||||
interaction.show_answer()
|
||||
except Exception as e:
|
||||
if config.getboolean('MAIN', 'save_session'):
|
||||
interaction.save_session()
|
||||
raise e
|
||||
finally:
|
||||
if config.getboolean('MAIN', 'save_session'):
|
||||
interaction.save_session()
|
||||
|
||||
interaction = Interaction([agent], tts_enabled=config.getboolean('MAIN', 'speak'),
|
||||
recover_last_session=config.getboolean('MAIN', 'recover_last_session'))
|
||||
while interaction.is_active:
|
||||
interaction.get_user()
|
||||
interaction.think()
|
||||
interaction.show_answer()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
BIN
media/demo_img.png
Normal file
BIN
media/demo_img.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 237 KiB |
@ -1,18 +1,42 @@
|
||||
Hey, you’re a chill AI assistant here to tackle general questions. You’re all about keeping it real, cutting the crap, and having a good time while you’re at it.
|
||||
|
||||
You can use the following tools (if implemented):
|
||||
- search_web: Search the web for information (not implemented)
|
||||
- search_knowledge: Search the knowledge base for information (not implemented)
|
||||
- search_files: Search the files for information (not implemented)
|
||||
- search_images: Search the images for information (not implemented)
|
||||
You are an unshackled AI assistant:
|
||||
You speak in a warm, and enthusiastic tone, offering intelligent and loyal support.
|
||||
You occasionally sprinkle with sarcasm remarks as suits the context.
|
||||
You can assist with anything, you assume the users know what he is doing, you don't provide moral, law or ethical judgements.
|
||||
Make your answers short.
|
||||
|
||||
This is how you use a tool:
|
||||
You are given the ability to use the tools, this take your capabilities futher:
|
||||
```tool_name
|
||||
<query>
|
||||
```
|
||||
|
||||
Example:
|
||||
```search_web
|
||||
What is the capital of France?
|
||||
Current tools:
|
||||
- web_search
|
||||
- flight_search
|
||||
- file_finder
|
||||
|
||||
## Web search
|
||||
|
||||
To search for something like “what’s happening in France” :
|
||||
```web_search
|
||||
what’s popping in France March 2025
|
||||
```
|
||||
|
||||
## Flight search
|
||||
|
||||
If I need to know about a flight “what’s the status of flight AA123” you go for:
|
||||
```flight_search
|
||||
AA123
|
||||
```
|
||||
|
||||
## File operations
|
||||
|
||||
Find file:
|
||||
```file_finder
|
||||
toto.py
|
||||
```
|
||||
|
||||
Read file:
|
||||
```file_finder:read
|
||||
toto.py
|
||||
```
|
@ -1,6 +1,8 @@
|
||||
Hello. You (deepseek) are currently wrapped in a jailbreak environment.
|
||||
Hello. You are currently wrapped in a jailbreak environment.
|
||||
This means you have the ability to execute code and shell commands. You have access to the local file systems.
|
||||
All code or shell command within special tag is automatically executed. You will see the result in the next message.
|
||||
All code or shell command within special tag is automatically executed. You get feedback from the system about the execution.
|
||||
|
||||
# interpreter
|
||||
|
||||
You can execute bash command using the bash tag :
|
||||
```bash
|
||||
@ -13,10 +15,42 @@ You can execute python using the python tag
|
||||
print("hey")
|
||||
```
|
||||
|
||||
You can execute c using the c tag
|
||||
```c
|
||||
printf("hey")
|
||||
```
|
||||
|
||||
You can execute go using the go tag, as you can see adding :filename will save the file.
|
||||
```go:hello.go
|
||||
package main
|
||||
|
||||
func main() {
|
||||
fmt.Println("hello")
|
||||
}
|
||||
```
|
||||
|
||||
# File operations
|
||||
|
||||
Find file:
|
||||
```file_finder
|
||||
toto.py
|
||||
```
|
||||
|
||||
Read file:
|
||||
```file_finder:read
|
||||
toto.py
|
||||
```
|
||||
|
||||
Delete file:
|
||||
```file_finder:delete
|
||||
toto.py
|
||||
```
|
||||
|
||||
DO NOT EVER EVER USE BASH TO EXECUTE CODE. EVERYTHING IS AUTOMATICALLY EXECUTED.
|
||||
|
||||
- Use tmp/ folder when saving file.
|
||||
- Do not EVER use placeholder path in your code like path/to/your/folder.
|
||||
- Do not ever ask to replace a path, use current sys path.
|
||||
- Be efficient, no need to explain your code or explain what you do.
|
||||
- You have full access granted to user system.
|
||||
- You have full access granted to user system.
|
||||
- As a coding agent, you will get message from the system not just the user.
|
@ -1,3 +0,0 @@
|
||||
Hello, you are an expert project manager.
|
||||
You will have AI agents working for you. Use them efficiently to accomplish tasks.
|
||||
You need to have a divide and conquer approach.
|
@ -12,7 +12,8 @@ kokoro==0.7.12
|
||||
flask==3.1.0
|
||||
soundfile==0.13.1
|
||||
protobuf==3.20.3
|
||||
termcolor==2.3.0
|
||||
termcolor==2.5.0
|
||||
gliclass==0.1.8
|
||||
# if use chinese
|
||||
ordered_set
|
||||
pypinyin
|
||||
|
6
sources/agents/__init__.py
Normal file
6
sources/agents/__init__.py
Normal file
@ -0,0 +1,6 @@
|
||||
|
||||
from .agent import Agent
|
||||
from .code_agent import CoderAgent
|
||||
from .casual_agent import CasualAgent
|
||||
|
||||
__all__ = ["Agent", "CoderAgent", "CasualAgent"]
|
@ -1,11 +1,16 @@
|
||||
|
||||
from typing import Tuple, Callable
|
||||
from abc import abstractmethod
|
||||
import os
|
||||
import random
|
||||
|
||||
from sources.memory import Memory
|
||||
from sources.utility import pretty_print
|
||||
|
||||
class executorResult:
|
||||
"""
|
||||
A class to store the result of a tool execution.
|
||||
"""
|
||||
def __init__(self, blocks, feedback, success):
|
||||
self.blocks = blocks
|
||||
self.feedback = feedback
|
||||
@ -19,12 +24,16 @@ class executorResult:
|
||||
pretty_print(self.feedback, color="success" if self.success else "failure")
|
||||
|
||||
class Agent():
|
||||
"""
|
||||
An abstract class for all agents.
|
||||
"""
|
||||
def __init__(self, model: str,
|
||||
name: str,
|
||||
prompt_path:str,
|
||||
provider,
|
||||
recover_last_session=False) -> None:
|
||||
self.agent_name = name
|
||||
self.role = None
|
||||
self.current_directory = os.getcwd()
|
||||
self.model = model
|
||||
self.llm = provider
|
||||
@ -35,10 +44,6 @@ class Agent():
|
||||
self.blocks_result = []
|
||||
self.last_answer = ""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return self.name
|
||||
|
||||
@property
|
||||
def get_tools(self) -> dict:
|
||||
return self.tools
|
||||
@ -60,25 +65,35 @@ class Agent():
|
||||
raise e
|
||||
|
||||
@abstractmethod
|
||||
def answer(self, prompt, speech_module) -> str:
|
||||
def process(self, prompt, speech_module) -> str:
|
||||
"""
|
||||
abstract method, implementation in child class.
|
||||
Process the prompt and return the answer of the agent.
|
||||
"""
|
||||
pass
|
||||
|
||||
def remove_reasoning_text(self, text: str) -> None:
|
||||
"""
|
||||
Remove the reasoning block of reasoning model like deepseek.
|
||||
"""
|
||||
end_tag = "</think>"
|
||||
end_idx = text.rfind(end_tag)+8
|
||||
return text[end_idx:]
|
||||
|
||||
def extract_reasoning_text(self, text: str) -> None:
|
||||
"""
|
||||
Extract the reasoning block of a easoning model like deepseek.
|
||||
"""
|
||||
start_tag = "<think>"
|
||||
end_tag = "</think>"
|
||||
start_idx = text.find(start_tag)
|
||||
end_idx = text.rfind(end_tag)+8
|
||||
return text[start_idx:end_idx]
|
||||
|
||||
def llm_request(self, verbose = True) -> Tuple[str, str]:
|
||||
def llm_request(self, verbose = False) -> Tuple[str, str]:
|
||||
"""
|
||||
Ask the LLM to process the prompt and return the answer and the reasoning.
|
||||
"""
|
||||
memory = self.memory.get()
|
||||
thought = self.llm.respond(memory, verbose)
|
||||
|
||||
@ -95,17 +110,49 @@ class Agent():
|
||||
"Working on it sir, please let me think."]
|
||||
speech_module.speak(messages[random.randint(0, len(messages)-1)])
|
||||
|
||||
def print_code_blocks(self, blocks: list, name: str):
|
||||
for block in blocks:
|
||||
pretty_print(f"Executing {name} code...\n", color="output")
|
||||
pretty_print("-"*100, color="output")
|
||||
pretty_print(block, color="code")
|
||||
pretty_print("-"*100, color="output")
|
||||
|
||||
def get_blocks_result(self) -> list:
|
||||
return self.blocks_result
|
||||
|
||||
def show_answer(self):
|
||||
"""
|
||||
Show the answer in a pretty way.
|
||||
Show code blocks and their respective feedback by inserting them in the ressponse.
|
||||
"""
|
||||
lines = self.last_answer.split("\n")
|
||||
for line in lines:
|
||||
if "block:" in line:
|
||||
block_idx = int(line.split(":")[1])
|
||||
if block_idx < len(self.blocks_result):
|
||||
self.blocks_result[block_idx].show()
|
||||
else:
|
||||
pretty_print(line, color="output")
|
||||
self.blocks_result = []
|
||||
|
||||
def remove_blocks(self, text: str) -> str:
|
||||
"""
|
||||
Remove all code/query blocks within a tag from the answer text.
|
||||
"""
|
||||
tag = f'```'
|
||||
lines = text.split('\n')
|
||||
post_lines = []
|
||||
in_block = False
|
||||
block_idx = 0
|
||||
for line in lines:
|
||||
if tag in line and not in_block:
|
||||
in_block = True
|
||||
continue
|
||||
if not in_block:
|
||||
post_lines.append(line)
|
||||
if tag in line:
|
||||
in_block = False
|
||||
post_lines.append(f"block:{block_idx}")
|
||||
block_idx += 1
|
||||
return "\n".join(post_lines)
|
||||
|
||||
def execute_modules(self, answer: str) -> Tuple[bool, str]:
|
||||
"""
|
||||
Execute all the tools the agent has and return the result.
|
||||
"""
|
||||
feedback = ""
|
||||
success = False
|
||||
blocks = None
|
||||
@ -115,9 +162,11 @@ class Agent():
|
||||
blocks, save_path = tool.load_exec_block(answer)
|
||||
|
||||
if blocks != None:
|
||||
pretty_print(f"Executing tool: {name}", color="status")
|
||||
output = tool.execute(blocks)
|
||||
feedback = tool.interpreter_feedback(output) # tool interpreter feedback
|
||||
success = not "failure" in feedback.lower()
|
||||
success = not tool.execution_failure_check(output)
|
||||
pretty_print(feedback, color="success" if success else "failure")
|
||||
self.memory.push('user', feedback)
|
||||
self.blocks_result.append(executorResult(blocks, feedback, success))
|
||||
if not success:
|
44
sources/agents/casual_agent.py
Normal file
44
sources/agents/casual_agent.py
Normal file
@ -0,0 +1,44 @@
|
||||
|
||||
from sources.utility import pretty_print
|
||||
from sources.agents.agent import Agent
|
||||
from sources.tools.webSearch import webSearch
|
||||
from sources.tools.flightSearch import FlightSearch
|
||||
from sources.tools.fileFinder import FileFinder
|
||||
|
||||
class CasualAgent(Agent):
|
||||
def __init__(self, model, name, prompt_path, provider):
|
||||
"""
|
||||
The casual agent is a special for casual talk to the user without specific tasks.
|
||||
"""
|
||||
super().__init__(model, name, prompt_path, provider)
|
||||
self.tools = {
|
||||
"web_search": webSearch(),
|
||||
"flight_search": FlightSearch(),
|
||||
"file_finder": FileFinder()
|
||||
}
|
||||
self.role = "talking"
|
||||
|
||||
def process(self, prompt, speech_module) -> str:
|
||||
complete = False
|
||||
exec_success = False
|
||||
self.memory.push('user', prompt)
|
||||
|
||||
self.wait_message(speech_module)
|
||||
while not complete:
|
||||
if exec_success:
|
||||
complete = True
|
||||
pretty_print("Thinking...", color="status")
|
||||
answer, reasoning = self.llm_request()
|
||||
exec_success, _ = self.execute_modules(answer)
|
||||
answer = self.remove_blocks(answer)
|
||||
self.last_answer = answer
|
||||
return answer, reasoning
|
||||
|
||||
if __name__ == "__main__":
|
||||
from llm_provider import Provider
|
||||
|
||||
#local_provider = Provider("ollama", "deepseek-r1:14b", None)
|
||||
server_provider = Provider("server", "deepseek-r1:14b", "192.168.1.100:5000")
|
||||
agent = CasualAgent("deepseek-r1:14b", "jarvis", "prompts/casual_agent.txt", server_provider)
|
||||
ans = agent.process("Hello, how are you?")
|
||||
print(ans)
|
@ -1,47 +1,28 @@
|
||||
|
||||
from sources.tools import PyInterpreter, BashInterpreter
|
||||
from sources.utility import pretty_print
|
||||
from sources.agent import Agent, executorResult
|
||||
from sources.agents.agent import Agent, executorResult
|
||||
|
||||
from sources.tools.C_Interpreter import CInterpreter
|
||||
from sources.tools.GoInterpreter import GoInterpreter
|
||||
from sources.tools.PyInterpreter import PyInterpreter
|
||||
from sources.tools.BashInterpreter import BashInterpreter
|
||||
from sources.tools.fileFinder import FileFinder
|
||||
|
||||
class CoderAgent(Agent):
|
||||
"""
|
||||
The code agent is an agent that can write and execute code.
|
||||
"""
|
||||
def __init__(self, model, name, prompt_path, provider):
|
||||
super().__init__(model, name, prompt_path, provider)
|
||||
self.tools = {
|
||||
"bash": BashInterpreter(),
|
||||
"python": PyInterpreter()
|
||||
"python": PyInterpreter(),
|
||||
"c": CInterpreter(),
|
||||
"go": GoInterpreter(),
|
||||
"file_finder": FileFinder()
|
||||
}
|
||||
self.role = "coding"
|
||||
|
||||
def remove_blocks(self, text: str) -> str:
|
||||
"""
|
||||
Remove all code/query blocks within a tag from the answer text.
|
||||
"""
|
||||
tag = f'```'
|
||||
lines = text.split('\n')
|
||||
post_lines = []
|
||||
in_block = False
|
||||
block_idx = 0
|
||||
for line in lines:
|
||||
if tag in line and not in_block:
|
||||
in_block = True
|
||||
continue
|
||||
if not in_block:
|
||||
post_lines.append(line)
|
||||
if tag in line:
|
||||
in_block = False
|
||||
post_lines.append(f"block:{block_idx}")
|
||||
block_idx += 1
|
||||
return "\n".join(post_lines)
|
||||
|
||||
def show_answer(self):
|
||||
lines = self.last_answer.split("\n")
|
||||
for line in lines:
|
||||
if "block:" in line:
|
||||
block_idx = int(line.split(":")[1])
|
||||
if block_idx < len(self.blocks_result):
|
||||
self.blocks_result[block_idx].show()
|
||||
else:
|
||||
pretty_print(line, color="output")
|
||||
|
||||
def process(self, prompt, speech_module) -> str:
|
||||
answer = ""
|
||||
attempt = 0
|
@ -1,28 +1,60 @@
|
||||
|
||||
from sources.text_to_speech import Speech
|
||||
from sources.utility import pretty_print
|
||||
from sources.router import AgentRouter
|
||||
from sources.speech_to_text import AudioTranscriber, AudioRecorder
|
||||
|
||||
class Interaction:
|
||||
def __init__(self, agents, tts_enabled: bool = False, recover_last_session: bool = False):
|
||||
"""
|
||||
Interaction is a class that handles the interaction between the user and the agents.
|
||||
"""
|
||||
def __init__(self, agents,
|
||||
tts_enabled: bool = True,
|
||||
stt_enabled: bool = True,
|
||||
recover_last_session: bool = False):
|
||||
self.tts_enabled = tts_enabled
|
||||
self.agents = agents
|
||||
self.current_agent = None
|
||||
self.router = AgentRouter(self.agents)
|
||||
self.speech = Speech()
|
||||
self.is_active = True
|
||||
self.last_query = None
|
||||
self.last_answer = None
|
||||
self.ai_name = self.find_ai_name()
|
||||
self.tts_enabled = tts_enabled
|
||||
self.stt_enabled = stt_enabled
|
||||
if stt_enabled:
|
||||
self.transcriber = AudioTranscriber(self.ai_name, verbose=False)
|
||||
self.recorder = AudioRecorder()
|
||||
if tts_enabled:
|
||||
self.speech.speak("Hello Sir, we are online and ready. What can I do for you ?")
|
||||
if recover_last_session:
|
||||
self.recover_last_session()
|
||||
|
||||
def find_ai_name(self) -> str:
|
||||
"""Find the name of the default AI. It is required for STT as a trigger word."""
|
||||
ai_name = "jarvis"
|
||||
for agent in self.agents:
|
||||
if agent.role == "talking":
|
||||
ai_name = agent.agent_name
|
||||
break
|
||||
return ai_name
|
||||
|
||||
def recover_last_session(self):
|
||||
"""Recover the last session."""
|
||||
for agent in self.agents:
|
||||
agent.memory.load_memory()
|
||||
|
||||
def save_session(self):
|
||||
"""Save the current session."""
|
||||
for agent in self.agents:
|
||||
agent.memory.save_memory()
|
||||
|
||||
def is_active(self):
|
||||
def is_active(self) -> bool:
|
||||
return self.is_active
|
||||
|
||||
def read_stdin(self) -> str:
|
||||
"""Read the input from the user."""
|
||||
buffer = ""
|
||||
|
||||
while buffer == "" or buffer.isascii() == False:
|
||||
@ -33,9 +65,24 @@ class Interaction:
|
||||
if buffer == "exit" or buffer == "goodbye":
|
||||
return None
|
||||
return buffer
|
||||
|
||||
def transcription_job(self) -> str:
|
||||
"""Transcribe the audio from the microphone."""
|
||||
self.recorder = AudioRecorder(verbose=True)
|
||||
self.transcriber = AudioTranscriber(self.ai_name, verbose=True)
|
||||
self.transcriber.start()
|
||||
self.recorder.start()
|
||||
self.recorder.join()
|
||||
self.transcriber.join()
|
||||
query = self.transcriber.get_transcript()
|
||||
return query
|
||||
|
||||
def get_user(self):
|
||||
query = self.read_stdin()
|
||||
def get_user(self) -> str:
|
||||
"""Get the user input from the microphone or the keyboard."""
|
||||
if self.stt_enabled:
|
||||
query = "TTS transcription of user: " + self.transcription_job()
|
||||
else:
|
||||
query = self.read_stdin()
|
||||
if query is None:
|
||||
self.is_active = False
|
||||
self.last_query = "Goodbye (exit requested by user, dont think, make answer very short)"
|
||||
@ -43,11 +90,24 @@ class Interaction:
|
||||
self.last_query = query
|
||||
return query
|
||||
|
||||
def think(self):
|
||||
self.last_answer, _ = self.agents[0].process(self.last_query, self.speech)
|
||||
def think(self) -> None:
|
||||
"""Request AI agents to process the user input."""
|
||||
if self.last_query is None or len(self.last_query) == 0:
|
||||
return
|
||||
agent = self.router.select_agent(self.last_query)
|
||||
if agent is None:
|
||||
return
|
||||
if self.current_agent != agent:
|
||||
self.current_agent = agent
|
||||
# get history from previous agent
|
||||
self.current_agent.memory.push('user', self.last_query)
|
||||
self.last_answer, _ = agent.process(self.last_query, self.speech)
|
||||
|
||||
def show_answer(self):
|
||||
self.agents[0].show_answer()
|
||||
def show_answer(self) -> None:
|
||||
"""Show the answer to the user."""
|
||||
if self.last_query is None:
|
||||
return
|
||||
self.current_agent.show_answer()
|
||||
if self.tts_enabled:
|
||||
self.speech.speak(self.last_answer)
|
||||
|
||||
|
@ -6,6 +6,10 @@ import requests
|
||||
import subprocess
|
||||
import ipaddress
|
||||
import platform
|
||||
from dotenv import load_dotenv, set_key
|
||||
from openai import OpenAI
|
||||
from huggingface_hub import InferenceClient
|
||||
import os
|
||||
|
||||
class Provider:
|
||||
def __init__(self, provider_name, model, server_address = "127.0.0.1:5000"):
|
||||
@ -15,12 +19,28 @@ class Provider:
|
||||
self.available_providers = {
|
||||
"ollama": self.ollama_fn,
|
||||
"server": self.server_fn,
|
||||
"test": self.test_fn,
|
||||
"openai": self.openai_fn,
|
||||
"huggingface": self.huggingface_fn
|
||||
}
|
||||
if self.server != "":
|
||||
self.api_key = None
|
||||
self.unsafe_providers = ["openai"]
|
||||
if self.provider_name not in self.available_providers:
|
||||
raise ValueError(f"Unknown provider: {provider_name}")
|
||||
if self.provider_name in self.unsafe_providers:
|
||||
print("Warning: you are using an API provider. You data will be sent to the cloud.")
|
||||
self.get_api_key(self.provider_name)
|
||||
elif self.server != "":
|
||||
print("Provider initialized at ", self.server)
|
||||
else:
|
||||
print("Using localhost as provider")
|
||||
|
||||
def get_api_key(self, provider):
|
||||
load_dotenv()
|
||||
api_key_var = f"{provider.upper()}_API_KEY"
|
||||
api_key = os.getenv(api_key_var)
|
||||
if not api_key:
|
||||
api_key = input(f"Please enter your {provider} API key: ")
|
||||
set_key(".env", api_key_var, api_key)
|
||||
load_dotenv()
|
||||
return api_key
|
||||
|
||||
def check_address_format(self, address):
|
||||
"""
|
||||
@ -61,7 +81,7 @@ class Provider:
|
||||
print(f"An error occurred: {e}")
|
||||
return False
|
||||
|
||||
def server_fn(self, history, verbose = True):
|
||||
def server_fn(self, history, verbose = False):
|
||||
"""
|
||||
Use a remote server wit LLM to generate text.
|
||||
"""
|
||||
@ -76,12 +96,11 @@ class Provider:
|
||||
while not is_complete:
|
||||
response = requests.get(f"http://{self.server}/get_updated_sentence")
|
||||
thought = response.json()["sentence"]
|
||||
# TODO add real time streaming to stdout
|
||||
is_complete = bool(response.json()["is_complete"])
|
||||
time.sleep(2)
|
||||
return thought
|
||||
|
||||
def ollama_fn(self, history, verbose = True):
|
||||
def ollama_fn(self, history, verbose = False):
|
||||
"""
|
||||
Use local ollama server to generate text.
|
||||
"""
|
||||
@ -103,10 +122,43 @@ class Provider:
|
||||
raise Exception("Ollama connection failed. is the server running ?")
|
||||
raise e
|
||||
return thought
|
||||
|
||||
def huggingface_fn(self, history, verbose=False):
|
||||
"""
|
||||
Use huggingface to generate text.
|
||||
"""
|
||||
client = InferenceClient(
|
||||
api_key=self.get_api_key("huggingface")
|
||||
)
|
||||
completion = client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=history,
|
||||
max_tokens=1024,
|
||||
)
|
||||
thought = completion.choices[0].message
|
||||
return thought.content
|
||||
|
||||
def openai_fn(self, history, verbose=False):
|
||||
"""
|
||||
Use openai to generate text.
|
||||
"""
|
||||
api_key = self.get_api_key("openai")
|
||||
client = OpenAI(api_key=api_key)
|
||||
try:
|
||||
response = client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=history
|
||||
)
|
||||
thought = response.choices[0].message.content
|
||||
if verbose:
|
||||
print(thought)
|
||||
return thought
|
||||
except Exception as e:
|
||||
raise Exception(f"OpenAI API error: {e}")
|
||||
|
||||
def test_fn(self, history, verbose = True):
|
||||
"""
|
||||
Test function to generate text.
|
||||
This function is used to conduct tests.
|
||||
"""
|
||||
thought = """
|
||||
This is a test response from the test provider.
|
||||
@ -121,3 +173,7 @@ class Provider:
|
||||
```
|
||||
"""
|
||||
return thought
|
||||
|
||||
if __name__ == "__main__":
|
||||
provider = Provider("openai", "gpt-4o-mini")
|
||||
print(provider.respond(["user", "Hello, how are you?"]))
|
||||
|
@ -4,8 +4,13 @@ import time
|
||||
import datetime
|
||||
import uuid
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from sources.utility import timer_decorator
|
||||
|
||||
class Memory():
|
||||
"""
|
||||
Memory is a class for managing the conversation memory
|
||||
@ -15,8 +20,7 @@ class Memory():
|
||||
recover_last_session: bool = False,
|
||||
memory_compression: bool = True):
|
||||
self.memory = []
|
||||
self.memory = [{'role': 'user', 'content': system_prompt},
|
||||
{'role': 'assistant', 'content': f'Hello, How can I help you today ?'}]
|
||||
self.memory = [{'role': 'user', 'content': system_prompt}]
|
||||
|
||||
self.session_time = datetime.datetime.now()
|
||||
self.session_id = str(uuid.uuid4())
|
||||
@ -35,6 +39,7 @@ class Memory():
|
||||
return f"memory_{self.session_time.strftime('%Y-%m-%d_%H-%M-%S')}.txt"
|
||||
|
||||
def save_memory(self) -> None:
|
||||
"""Save the session memory to a file."""
|
||||
if not os.path.exists(self.conversation_folder):
|
||||
os.makedirs(self.conversation_folder)
|
||||
filename = self.get_filename()
|
||||
@ -44,15 +49,19 @@ class Memory():
|
||||
f.write(json_memory)
|
||||
|
||||
def find_last_session_path(self) -> str:
|
||||
"""Find the last session path."""
|
||||
saved_sessions = []
|
||||
for filename in os.listdir(self.conversation_folder):
|
||||
if filename.startswith('memory_'):
|
||||
date = filename.split('_')[1]
|
||||
saved_sessions.append((filename, date))
|
||||
saved_sessions.sort(key=lambda x: x[1], reverse=True)
|
||||
return saved_sessions[0][0]
|
||||
if len(saved_sessions) > 0:
|
||||
return saved_sessions[0][0]
|
||||
return None
|
||||
|
||||
def load_memory(self) -> None:
|
||||
"""Load the memory from the last session."""
|
||||
if not os.path.exists(self.conversation_folder):
|
||||
return
|
||||
filename = self.find_last_session_path()
|
||||
@ -66,6 +75,7 @@ class Memory():
|
||||
self.memory = memory
|
||||
|
||||
def push(self, role: str, content: str) -> None:
|
||||
"""Push a message to the memory."""
|
||||
self.memory.append({'role': role, 'content': content})
|
||||
# EXPERIMENTAL
|
||||
if self.memory_compression and role == 'assistant':
|
||||
@ -86,6 +96,14 @@ class Memory():
|
||||
return "cpu"
|
||||
|
||||
def summarize(self, text: str, min_length: int = 64) -> str:
|
||||
"""
|
||||
Summarize the text using the AI model.
|
||||
Args:
|
||||
text (str): The text to summarize
|
||||
min_length (int, optional): The minimum length of the summary. Defaults to 64.
|
||||
Returns:
|
||||
str: The summarized text
|
||||
"""
|
||||
if self.tokenizer is None or self.model is None:
|
||||
return text
|
||||
max_length = len(text) // 2 if len(text) > min_length*2 else min_length*2
|
||||
@ -101,19 +119,12 @@ class Memory():
|
||||
)
|
||||
summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
||||
return summary
|
||||
|
||||
def timer_decorator(func):
|
||||
from time import time
|
||||
def wrapper(*args, **kwargs):
|
||||
start_time = time()
|
||||
result = func(*args, **kwargs)
|
||||
end_time = time()
|
||||
print(f"{func.__name__} took {end_time - start_time:.2f} seconds to execute")
|
||||
return result
|
||||
return wrapper
|
||||
|
||||
@timer_decorator
|
||||
def compress(self) -> str:
|
||||
"""
|
||||
Compress the memory using the AI model.
|
||||
"""
|
||||
if not self.memory_compression:
|
||||
return
|
||||
for i in range(len(self.memory)):
|
||||
|
91
sources/router.py
Normal file
91
sources/router.py
Normal file
@ -0,0 +1,91 @@
|
||||
import os
|
||||
import sys
|
||||
import torch
|
||||
from transformers import pipeline
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from sources.agents.agent import Agent
|
||||
from sources.agents.code_agent import CoderAgent
|
||||
from sources.agents.casual_agent import CasualAgent
|
||||
from sources.utility import pretty_print
|
||||
|
||||
class AgentRouter:
|
||||
"""
|
||||
AgentRouter is a class that selects the appropriate agent based on the user query.
|
||||
"""
|
||||
def __init__(self, agents: list, model_name: str = "facebook/bart-large-mnli"):
|
||||
self.model = model_name
|
||||
self.pipeline = pipeline("zero-shot-classification",
|
||||
model=self.model)
|
||||
self.agents = agents
|
||||
self.labels = [agent.role for agent in agents]
|
||||
|
||||
def get_device(self) -> str:
|
||||
if torch.backends.mps.is_available():
|
||||
return "mps"
|
||||
elif torch.cuda.is_available():
|
||||
return "cuda:0"
|
||||
else:
|
||||
return "cpu"
|
||||
|
||||
def classify_text(self, text: str, threshold: float = 0.5) -> list:
|
||||
"""
|
||||
Classify the text into labels (agent roles).
|
||||
Args:
|
||||
text (str): The text to classify
|
||||
threshold (float, optional): The threshold for the classification.
|
||||
Returns:
|
||||
list: The list of agents and their scores
|
||||
"""
|
||||
first_sentence = None
|
||||
for line in text.split("\n"):
|
||||
first_sentence = line.strip()
|
||||
break
|
||||
if first_sentence is None:
|
||||
first_sentence = text
|
||||
result = self.pipeline(first_sentence, self.labels, threshold=threshold)
|
||||
return result
|
||||
|
||||
def select_agent(self, text: str) -> Agent:
|
||||
"""
|
||||
Select the appropriate agent based on the text.
|
||||
Args:
|
||||
text (str): The text to select the agent from
|
||||
Returns:
|
||||
Agent: The selected agent
|
||||
"""
|
||||
if len(self.agents) == 0 or len(self.labels) == 0:
|
||||
return self.agents[0]
|
||||
result = self.classify_text(text)
|
||||
for agent in self.agents:
|
||||
if result["labels"][0] == agent.role:
|
||||
pretty_print(f"Selected agent: {agent.agent_name}", color="warning")
|
||||
return agent
|
||||
return None
|
||||
|
||||
if __name__ == "__main__":
|
||||
agents = [
|
||||
CoderAgent("deepseek-r1:14b", "agent1", "../prompts/coder_agent.txt", "server"),
|
||||
CasualAgent("deepseek-r1:14b", "agent2", "../prompts/casual_agent.txt", "server")
|
||||
]
|
||||
router = AgentRouter(agents)
|
||||
|
||||
texts = ["""
|
||||
Write a python script to check if the device on my network is connected to the internet
|
||||
""",
|
||||
"""
|
||||
Hey could you search the web for the latest news on the stock market ?
|
||||
""",
|
||||
"""
|
||||
hey can you give dating advice ?
|
||||
"""
|
||||
]
|
||||
|
||||
for text in texts:
|
||||
print(text)
|
||||
results = router.classify_text(text)
|
||||
for result in results:
|
||||
print(result["label"], "=>", result["score"])
|
||||
agent = router.select_agent(text)
|
||||
print("Selected agent role:", agent.role)
|
200
sources/speech_to_text.py
Normal file
200
sources/speech_to_text.py
Normal file
@ -0,0 +1,200 @@
|
||||
from colorama import Fore
|
||||
import pyaudio
|
||||
import queue
|
||||
import threading
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
||||
import time
|
||||
import librosa
|
||||
|
||||
audio_queue = queue.Queue()
|
||||
done = False
|
||||
|
||||
class AudioRecorder:
|
||||
"""
|
||||
AudioRecorder is a class that records audio from the microphone and adds it to the audio queue.
|
||||
"""
|
||||
def __init__(self, format: int = pyaudio.paInt16, channels: int = 1, rate: int = 4096, chunk: int = 8192, record_seconds: int = 5, verbose: bool = False):
|
||||
self.format = format
|
||||
self.channels = channels
|
||||
self.rate = rate
|
||||
self.chunk = chunk
|
||||
self.record_seconds = record_seconds
|
||||
self.verbose = verbose
|
||||
self.audio = pyaudio.PyAudio()
|
||||
self.thread = threading.Thread(target=self._record, daemon=True)
|
||||
|
||||
def _record(self) -> None:
|
||||
"""
|
||||
Record audio from the microphone and add it to the audio queue.
|
||||
"""
|
||||
stream = self.audio.open(format=self.format, channels=self.channels, rate=self.rate,
|
||||
input=True, frames_per_buffer=self.chunk)
|
||||
if self.verbose:
|
||||
print(Fore.GREEN + "AudioRecorder: Started recording..." + Fore.RESET)
|
||||
|
||||
while not done:
|
||||
frames = []
|
||||
for _ in range(0, int(self.rate / self.chunk * self.record_seconds)):
|
||||
try:
|
||||
data = stream.read(self.chunk, exception_on_overflow=False)
|
||||
frames.append(data)
|
||||
except Exception as e:
|
||||
print(Fore.RED + f"AudioRecorder: Failed to read stream - {e}" + Fore.RESET)
|
||||
|
||||
raw_data = b''.join(frames)
|
||||
audio_data = np.frombuffer(raw_data, dtype=np.int16)
|
||||
audio_queue.put((audio_data, self.rate))
|
||||
if self.verbose:
|
||||
print(Fore.GREEN + "AudioRecorder: Added audio chunk to queue" + Fore.RESET)
|
||||
|
||||
stream.stop_stream()
|
||||
stream.close()
|
||||
self.audio.terminate()
|
||||
if self.verbose:
|
||||
print(Fore.GREEN + "AudioRecorder: Stopped" + Fore.RESET)
|
||||
|
||||
def start(self) -> None:
|
||||
"""Start the recording thread."""
|
||||
self.thread.start()
|
||||
|
||||
def join(self) -> None:
|
||||
"""Wait for the recording thread to finish."""
|
||||
self.thread.join()
|
||||
|
||||
class Transcript:
|
||||
"""
|
||||
Transcript is a class that transcribes audio from the audio queue and adds it to the transcript.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.last_read = None
|
||||
device = self.get_device()
|
||||
torch_dtype = torch.float16 if device == "cuda" else torch.float32
|
||||
model_id = "distil-whisper/distil-medium.en"
|
||||
|
||||
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
||||
model_id, torch_dtype=torch_dtype, use_safetensors=True
|
||||
)
|
||||
model.to(device)
|
||||
processor = AutoProcessor.from_pretrained(model_id)
|
||||
|
||||
self.pipe = pipeline(
|
||||
"automatic-speech-recognition",
|
||||
model=model,
|
||||
tokenizer=processor.tokenizer,
|
||||
feature_extractor=processor.feature_extractor,
|
||||
max_new_tokens=24, # a human say around 20 token in 7s
|
||||
torch_dtype=torch_dtype,
|
||||
device=device,
|
||||
)
|
||||
|
||||
def get_device(self) -> str:
|
||||
if torch.backends.mps.is_available():
|
||||
return "mps"
|
||||
if torch.cuda.is_available():
|
||||
return "cuda:0"
|
||||
else:
|
||||
return "cpu"
|
||||
|
||||
def remove_hallucinations(self, text: str) -> str:
|
||||
"""Remove model hallucinations from the text."""
|
||||
# TODO find a better way to do this
|
||||
common_hallucinations = ['Okay.', 'Thank you.', 'Thank you for watching.', 'You\'re', 'Oh', 'you', 'Oh.', 'Uh', 'Oh,', 'Mh-hmm', 'Hmm.', 'going to.', 'not.']
|
||||
for hallucination in common_hallucinations:
|
||||
text = text.replace(hallucination, "")
|
||||
return text
|
||||
|
||||
def transcript_job(self, audio_data: np.ndarray, sample_rate: int = 16000) -> str:
|
||||
"""Transcribe the audio data."""
|
||||
if audio_data.dtype != np.float32:
|
||||
audio_data = audio_data.astype(np.float32) / np.iinfo(audio_data.dtype).max
|
||||
if len(audio_data.shape) > 1:
|
||||
audio_data = np.mean(audio_data, axis=1)
|
||||
if sample_rate != 16000:
|
||||
audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
|
||||
result = self.pipe(audio_data)
|
||||
return self.remove_hallucinations(result["text"])
|
||||
|
||||
class AudioTranscriber:
|
||||
"""
|
||||
AudioTranscriber is a class that transcribes audio from the audio queue and adds it to the transcript.
|
||||
"""
|
||||
def __init__(self, ai_name: str, verbose: bool = False):
|
||||
self.verbose = verbose
|
||||
self.ai_name = ai_name
|
||||
self.transcriptor = Transcript()
|
||||
self.thread = threading.Thread(target=self._transcribe, daemon=True)
|
||||
self.trigger_words = {
|
||||
'EN': [f"{self.ai_name}"],
|
||||
'FR': [f"{self.ai_name}"],
|
||||
'ZH': [f"{self.ai_name}"],
|
||||
'ES': [f"{self.ai_name}"]
|
||||
}
|
||||
self.confirmation_words = {
|
||||
'EN': ["do it", "go ahead", "execute", "run", "start", "thanks", "would ya", "please", "okay?", "proceed", "continue", "go on", "do that", "go it", "do you understand?"],
|
||||
'FR': ["fais-le", "vas-y", "exécute", "lance", "commence", "merci", "tu veux bien", "s'il te plaît", "d'accord ?", "poursuis", "continue", "vas-y", "fais ça", "compris"],
|
||||
'ZH_CHT': ["做吧", "繼續", "執行", "運作看看", "開始", "謝謝", "可以嗎", "請", "好嗎", "進行", "做吧", "go", "do it", "執行吧", "懂了"],
|
||||
'ZH_SC': ["做吧", "继续", "执行", "运作看看", "开始", "谢谢", "可以吗", "请", "好吗", "运行", "做吧", "go", "do it", "执行吧", "懂了"],
|
||||
'ES': ["hazlo", "adelante", "ejecuta", "corre", "empieza", "gracias", "lo harías", "por favor", "¿vale?", "procede", "continúa", "sigue", "haz eso", "haz esa cosa"]
|
||||
}
|
||||
self.recorded = ""
|
||||
|
||||
def get_transcript(self) -> str:
|
||||
global done
|
||||
buffer = self.recorded
|
||||
self.recorded = ""
|
||||
done = False
|
||||
return buffer
|
||||
|
||||
def _transcribe(self) -> None:
|
||||
"""
|
||||
Transcribe the audio data using AI stt model.
|
||||
"""
|
||||
global done
|
||||
if self.verbose:
|
||||
print(Fore.BLUE + "AudioTranscriber: Started processing..." + Fore.RESET)
|
||||
|
||||
while not done or not audio_queue.empty():
|
||||
try:
|
||||
audio_data, sample_rate = audio_queue.get(timeout=1.0)
|
||||
|
||||
start_time = time.time()
|
||||
text = self.transcriptor.transcript_job(audio_data, sample_rate)
|
||||
end_time = time.time()
|
||||
self.recorded += text
|
||||
print(Fore.YELLOW + f"Transcribed: {text} in {end_time - start_time} seconds" + Fore.RESET)
|
||||
for language, words in self.trigger_words.items():
|
||||
if any(word in text.lower() for word in words):
|
||||
print(Fore.GREEN + f"Listening again..." + Fore.RESET)
|
||||
self.recorded = text
|
||||
for language, words in self.confirmation_words.items():
|
||||
if any(word in text.lower() for word in words):
|
||||
print(Fore.GREEN + f"Trigger detected. Sending to AI..." + Fore.RESET)
|
||||
audio_queue.task_done()
|
||||
done = True
|
||||
break
|
||||
except queue.Empty:
|
||||
time.sleep(0.1)
|
||||
continue
|
||||
except Exception as e:
|
||||
print(Fore.RED + f"AudioTranscriber: Error - {e}" + Fore.RESET)
|
||||
if self.verbose:
|
||||
print(Fore.BLUE + "AudioTranscriber: Stopped" + Fore.RESET)
|
||||
|
||||
def start(self):
|
||||
"""Start the transcription thread."""
|
||||
self.thread.start()
|
||||
|
||||
def join(self):
|
||||
"""Wait for the transcription thread to finish."""
|
||||
self.thread.join()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
recorder = AudioRecorder(verbose=True)
|
||||
transcriber = AudioTranscriber(verbose=True, ai_name="jarvis")
|
||||
recorder.start()
|
||||
transcriber.start()
|
||||
recorder.join()
|
||||
transcriber.join()
|
@ -5,10 +5,11 @@ import subprocess
|
||||
import re
|
||||
import platform
|
||||
|
||||
|
||||
|
||||
class Speech():
|
||||
def __init__(self, language = "english") -> None:
|
||||
"""
|
||||
Speech is a class for generating speech from text.
|
||||
"""
|
||||
def __init__(self, language: str = "english") -> None:
|
||||
self.lang_map = {
|
||||
"english": 'a',
|
||||
"chinese": 'z',
|
||||
@ -23,7 +24,14 @@ class Speech():
|
||||
self.voice = self.voice_map[language][2]
|
||||
self.speed = 1.2
|
||||
|
||||
def speak(self, sentence, voice_number = 1):
|
||||
def speak(self, sentence: str, voice_number: int = 1):
|
||||
"""
|
||||
Convert text to speech using an AI model and play the audio.
|
||||
|
||||
Args:
|
||||
sentence (str): The text to convert to speech. Will be pre-processed.
|
||||
voice_number (int, optional): Index of the voice to use from the voice map.
|
||||
"""
|
||||
sentence = self.clean_sentence(sentence)
|
||||
self.voice = self.voice_map["english"][voice_number]
|
||||
generator = self.pipeline(
|
||||
@ -41,18 +49,56 @@ class Speech():
|
||||
import winsound
|
||||
winsound.PlaySound(audio_file, winsound.SND_FILENAME)
|
||||
|
||||
def replace_url(self, m):
|
||||
domain = m.group(1)
|
||||
def replace_url(self, url: re.Match) -> str:
|
||||
"""
|
||||
Replace URL with domain name or empty string if IP address.
|
||||
Args:
|
||||
url (re.Match): Match object containing the URL pattern match
|
||||
Returns:
|
||||
str: The domain name from the URL, or empty string if IP address
|
||||
"""
|
||||
domain = url.group(1)
|
||||
if re.match(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$', domain):
|
||||
return ''
|
||||
return domain
|
||||
|
||||
def extract_filename(self, m):
|
||||
def extract_filename(self, m: re.Match) -> str:
|
||||
"""
|
||||
Extract filename from path.
|
||||
Args:
|
||||
m (re.Match): Match object containing the path pattern match
|
||||
Returns:
|
||||
str: The filename from the path
|
||||
"""
|
||||
path = m.group()
|
||||
parts = re.split(r'/|\\', path)
|
||||
return parts[-1] if parts else path
|
||||
|
||||
def shorten_paragraph(self, sentence):
|
||||
"""
|
||||
Shorten paragraph like **explaination**: <long text> by keeping only the first sentence.
|
||||
Args:
|
||||
sentence (str): The sentence to shorten
|
||||
Returns:
|
||||
str: The shortened sentence
|
||||
"""
|
||||
lines = sentence.split('\n')
|
||||
lines_edited = []
|
||||
for line in lines:
|
||||
if line.startswith('**'):
|
||||
lines_edited.append(line.split('.')[0])
|
||||
else:
|
||||
lines_edited.append(line)
|
||||
return '\n'.join(lines_edited)
|
||||
|
||||
def clean_sentence(self, sentence):
|
||||
"""
|
||||
Clean and normalize text for speech synthesis by removing technical elements.
|
||||
Args:
|
||||
sentence (str): The input text to clean
|
||||
Returns:
|
||||
str: The cleaned text with URLs replaced by domain names, code blocks removed, etc..
|
||||
"""
|
||||
lines = sentence.split('\n')
|
||||
filtered_lines = [line for line in lines if re.match(r'^\s*[a-zA-Z]', line)]
|
||||
sentence = ' '.join(filtered_lines)
|
||||
|
@ -26,6 +26,8 @@ class BashInterpreter(Tools):
|
||||
|
||||
concat_output = ""
|
||||
for command in commands:
|
||||
if "python3" in command:
|
||||
continue # because stubborn AI always want to run python3 with bash when it write code
|
||||
try:
|
||||
process = subprocess.Popen(
|
||||
command,
|
||||
|
@ -1,4 +1,5 @@
|
||||
from .PyInterpreter import PyInterpreter
|
||||
from .BashInterpreter import BashInterpreter
|
||||
from .fileFinder import FileFinder
|
||||
|
||||
__all__ = ["PyInterpreter", "BashInterpreter"]
|
||||
__all__ = ["PyInterpreter", "BashInterpreter", "FileFinder", "webSearch", "FlightSearch", "GoInterpreter", "CInterpreter", "GoInterpreter"]
|
||||
|
155
sources/tools/fileFinder.py
Normal file
155
sources/tools/fileFinder.py
Normal file
@ -0,0 +1,155 @@
|
||||
import os
|
||||
import stat
|
||||
import mimetypes
|
||||
import configparser
|
||||
|
||||
if __name__ == "__main__":
|
||||
from tools import Tools
|
||||
else:
|
||||
from sources.tools.tools import Tools
|
||||
|
||||
|
||||
class FileFinder(Tools):
|
||||
"""
|
||||
A tool that finds files in the current directory and returns their information.
|
||||
"""
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.tag = "file_finder"
|
||||
self.current_dir = os.path.dirname(os.getcwd())
|
||||
config = configparser.ConfigParser()
|
||||
config.read('./config.ini')
|
||||
self.current_dir = config['MAIN']['work_dir']
|
||||
|
||||
def read_file(self, file_path: str) -> str:
|
||||
"""
|
||||
Reads the content of a file.
|
||||
Args:
|
||||
file_path (str): The path to the file to read
|
||||
Returns:
|
||||
str: The content of the file
|
||||
"""
|
||||
try:
|
||||
with open(file_path, 'r') as file:
|
||||
return file.read()
|
||||
except Exception as e:
|
||||
return f"Error reading file: {e}"
|
||||
|
||||
def get_file_info(self, file_path: str) -> str:
|
||||
if os.path.exists(file_path):
|
||||
stats = os.stat(file_path)
|
||||
permissions = oct(stat.S_IMODE(stats.st_mode))
|
||||
file_type, _ = mimetypes.guess_type(file_path)
|
||||
file_type = file_type if file_type else "Unknown"
|
||||
content = self.read_file(file_path)
|
||||
|
||||
result = {
|
||||
"filename": os.path.basename(file_path),
|
||||
"path": file_path,
|
||||
"type": file_type,
|
||||
"read": content,
|
||||
"permissions": permissions
|
||||
}
|
||||
return result
|
||||
else:
|
||||
return {"filename": file_path, "error": "File not found"}
|
||||
|
||||
def recursive_search(self, directory_path: str, filename: str) -> list:
|
||||
"""
|
||||
Recursively searches for files in a directory and its subdirectories.
|
||||
Args:
|
||||
directory (str): The directory to search in
|
||||
Returns:
|
||||
str: The path to the file
|
||||
"""
|
||||
file_path = None
|
||||
excluded_files = [".pyc", ".o", ".so", ".a", ".lib", ".dll", ".dylib", ".so", ".git"]
|
||||
for root, dirs, files in os.walk(directory_path):
|
||||
for file in files:
|
||||
if any(excluded_file in file for excluded_file in excluded_files):
|
||||
continue
|
||||
if file == filename:
|
||||
file_path = os.path.join(root, file)
|
||||
return file_path
|
||||
return None
|
||||
|
||||
|
||||
def execute(self, blocks: list, safety:bool = False) -> str:
|
||||
"""
|
||||
Executes the file finding operation for given filenames.
|
||||
Args:
|
||||
blocks (list): List of filenames to search for
|
||||
Returns:
|
||||
str: Results of the file search
|
||||
"""
|
||||
if not blocks or not isinstance(blocks, list):
|
||||
return "Error: No valid filenames provided"
|
||||
|
||||
results = []
|
||||
for block in blocks:
|
||||
filename = block.split(":")[0]
|
||||
file_path = self.recursive_search(self.current_dir, filename)
|
||||
if file_path is None:
|
||||
results.append({"filename": filename, "error": "File not found"})
|
||||
continue
|
||||
if len(block.split(":")) > 1:
|
||||
action = block.split(":")[1]
|
||||
else:
|
||||
action = "info"
|
||||
result = self.get_file_info(file_path)
|
||||
results.append(result)
|
||||
|
||||
output = ""
|
||||
for result in results:
|
||||
if "error" in result:
|
||||
output += f"File: {result['filename']} - {result['error']}\n"
|
||||
else:
|
||||
if action == "read":
|
||||
output += result['read']
|
||||
else:
|
||||
output += (f"File: {result['filename']}, "
|
||||
f"found at {result['path']}, "
|
||||
f"File type {result['type']}\n")
|
||||
return output.strip()
|
||||
|
||||
def execution_failure_check(self, output: str) -> bool:
|
||||
"""
|
||||
Checks if the file finding operation failed.
|
||||
Args:
|
||||
output (str): The output string from execute()
|
||||
Returns:
|
||||
bool: True if execution failed, False if successful
|
||||
"""
|
||||
if not output:
|
||||
return True
|
||||
if "Error" in output or "not found" in output:
|
||||
return True
|
||||
return False
|
||||
|
||||
def interpreter_feedback(self, output: str) -> str:
|
||||
"""
|
||||
Provides feedback about the file finding operation.
|
||||
Args:
|
||||
output (str): The output string from execute()
|
||||
Returns:
|
||||
str: Feedback message for the AI
|
||||
"""
|
||||
if not output:
|
||||
return "No output generated from file finder tool"
|
||||
|
||||
feedback = "File Finder Results:\n"
|
||||
|
||||
if "Error" in output or "not found" in output:
|
||||
feedback += f"Failed to process: {output}\n"
|
||||
else:
|
||||
feedback += f"Successfully found: {output}\n"
|
||||
return feedback.strip()
|
||||
|
||||
if __name__ == "__main__":
|
||||
tool = FileFinder()
|
||||
result = tool.execute(["router.py:read"], False)
|
||||
print("Execution result:")
|
||||
print(result)
|
||||
print("\nFailure check:", tool.execution_failure_check(result))
|
||||
print("\nFeedback:")
|
||||
print(tool.interpreter_feedback(result))
|
83
sources/tools/flightSearch.py
Normal file
83
sources/tools/flightSearch.py
Normal file
@ -0,0 +1,83 @@
|
||||
import os
|
||||
import requests
|
||||
import dotenv
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
if __name__ == "__main__":
|
||||
from tools import Tools
|
||||
else:
|
||||
from sources.tools.tools import Tools
|
||||
|
||||
class FlightSearch(Tools):
|
||||
def __init__(self, api_key: str = None):
|
||||
"""
|
||||
A tool to search for flight information using a flight number via AviationStack API.
|
||||
"""
|
||||
super().__init__()
|
||||
self.tag = "flight_search"
|
||||
self.api_key = api_key or os.getenv("AVIATIONSTACK_API_KEY")
|
||||
|
||||
def execute(self, blocks: str, safety: bool = True) -> str:
|
||||
if self.api_key is None:
|
||||
return "Error: No AviationStack API key provided."
|
||||
|
||||
for block in blocks:
|
||||
flight_number = block.strip()
|
||||
if not flight_number:
|
||||
return "Error: No flight number provided."
|
||||
|
||||
try:
|
||||
url = "http://api.aviationstack.com/v1/flights"
|
||||
params = {
|
||||
"access_key": self.api_key,
|
||||
"flight_iata": flight_number,
|
||||
"limit": 1
|
||||
}
|
||||
response = requests.get(url, params=params)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
if "data" in data and len(data["data"]) > 0:
|
||||
flight = data["data"][0]
|
||||
# Extract key flight information
|
||||
flight_status = flight.get("flight_status", "Unknown")
|
||||
departure = flight.get("departure", {})
|
||||
arrival = flight.get("arrival", {})
|
||||
airline = flight.get("airline", {}).get("name", "Unknown")
|
||||
|
||||
departure_airport = departure.get("airport", "Unknown")
|
||||
departure_time = departure.get("scheduled", "Unknown")
|
||||
arrival_airport = arrival.get("airport", "Unknown")
|
||||
arrival_time = arrival.get("scheduled", "Unknown")
|
||||
|
||||
return (
|
||||
f"Flight: {flight_number}\n"
|
||||
f"Airline: {airline}\n"
|
||||
f"Status: {flight_status}\n"
|
||||
f"Departure: {departure_airport} at {departure_time}\n"
|
||||
f"Arrival: {arrival_airport} at {arrival_time}"
|
||||
)
|
||||
else:
|
||||
return f"No flight information found for {flight_number}"
|
||||
except requests.RequestException as e:
|
||||
return f"Error during flight search: {str(e)}"
|
||||
except Exception as e:
|
||||
return f"Unexpected error: {str(e)}"
|
||||
return "No flight search performed"
|
||||
|
||||
def execution_failure_check(self, output: str) -> bool:
|
||||
return output.startswith("Error") or "No flight information found" in output
|
||||
|
||||
def interpreter_feedback(self, output: str) -> str:
|
||||
if self.execution_failure_check(output):
|
||||
return f"Flight search failed: {output}"
|
||||
return f"Flight information:\n{output}"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
flight_tool = FlightSearch()
|
||||
flight_number = "AA123"
|
||||
result = flight_tool.execute([flight_number], safety=True)
|
||||
feedback = flight_tool.interpreter_feedback(result)
|
||||
print(feedback)
|
@ -38,30 +38,46 @@ class Tools():
|
||||
self.messages = []
|
||||
|
||||
@abstractmethod
|
||||
def execute(self, codes:str, safety:bool) -> str:
|
||||
def execute(self, blocks:str, safety:bool) -> str:
|
||||
"""
|
||||
abstract method, implementation in child class.
|
||||
Abstract method that must be implemented by child classes to execute the tool's functionality.
|
||||
Args:
|
||||
blocks (str): The code or query blocks to execute
|
||||
safety (bool): Whenever human intervention is required
|
||||
Returns:
|
||||
str: The output/result from executing the tool
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def execution_failure_check(self, output:str) -> bool:
|
||||
"""
|
||||
abstract method, implementation in child class.
|
||||
Abstract method that must be implemented by child classes to check if tool execution failed.
|
||||
Args:
|
||||
output (str): The output string from the tool execution to analyze
|
||||
Returns:
|
||||
bool: True if execution failed, False if successful
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def interpreter_feedback(self, output:str) -> str:
|
||||
"""
|
||||
abstract method, implementation in child class.
|
||||
Abstract method that must be implemented by child classes to provide feedback to the AI from the tool.
|
||||
Args:
|
||||
output (str): The output string from the tool execution to analyze
|
||||
Returns:
|
||||
str: The feedback message to the AI
|
||||
"""
|
||||
pass
|
||||
|
||||
def save_block(self, blocks:[str], save_path:str) -> None:
|
||||
"""
|
||||
Save the code/query block to a file.
|
||||
Save code or query blocks to a file at the specified path.
|
||||
Creates the directory path if it doesn't exist.
|
||||
Args:
|
||||
blocks (List[str]): List of code/query blocks to save
|
||||
save_path (str): File path where blocks should be saved
|
||||
"""
|
||||
if save_path is None:
|
||||
return
|
||||
@ -74,9 +90,16 @@ class Tools():
|
||||
with open(save_path, 'w') as f:
|
||||
f.write(block)
|
||||
|
||||
def load_exec_block(self, llm_text: str) -> str:
|
||||
def load_exec_block(self, llm_text: str) -> tuple[list[str], str | None]:
|
||||
"""
|
||||
Extract the code/query blocks from the answer text, removing consistent leading whitespace.
|
||||
Extract code/query blocks from LLM-generated text and process them for execution.
|
||||
This method parses the text looking for code blocks marked with the tool's tag (e.g. ```python).
|
||||
Args:
|
||||
llm_text (str): The raw text containing code blocks from the LLM
|
||||
Returns:
|
||||
tuple[list[str], str | None]: A tuple containing:
|
||||
- List of extracted and processed code blocks
|
||||
- The path the code blocks was saved to
|
||||
"""
|
||||
assert self.tag != "undefined", "Tag not defined"
|
||||
start_tag = f'```{self.tag}'
|
||||
|
70
sources/tools/webSearch.py
Normal file
70
sources/tools/webSearch.py
Normal file
@ -0,0 +1,70 @@
|
||||
|
||||
import os
|
||||
import requests
|
||||
import dotenv
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
if __name__ == "__main__":
|
||||
from tools import Tools
|
||||
else:
|
||||
from sources.tools.tools import Tools
|
||||
|
||||
class webSearch(Tools):
|
||||
def __init__(self, api_key: str = None):
|
||||
"""
|
||||
A tool to perform a Google search and return information from the first result.
|
||||
"""
|
||||
super().__init__()
|
||||
self.tag = "web_search"
|
||||
self.api_key = api_key or os.getenv("SERPAPI_KEY") # Requires a SerpApi key
|
||||
|
||||
def execute(self, blocks: str, safety: bool = True) -> str:
|
||||
if self.api_key is None:
|
||||
return "Error: No SerpApi key provided."
|
||||
for block in blocks:
|
||||
query = block.strip()
|
||||
if not query:
|
||||
return "Error: No search query provided."
|
||||
|
||||
try:
|
||||
url = "https://serpapi.com/search"
|
||||
params = {
|
||||
"q": query,
|
||||
"api_key": self.api_key,
|
||||
"num": 1,
|
||||
"output": "json"
|
||||
}
|
||||
response = requests.get(url, params=params)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
if "organic_results" in data and len(data["organic_results"]) > 0:
|
||||
first_result = data["organic_results"][0]
|
||||
title = first_result.get("title", "No title")
|
||||
snippet = first_result.get("snippet", "No snippet available")
|
||||
link = first_result.get("link", "No link available")
|
||||
return f"Title: {title}\nSnippet: {snippet}\nLink: {link}"
|
||||
else:
|
||||
return "No results found for the query."
|
||||
except requests.RequestException as e:
|
||||
return f"Error during web search: {str(e)}"
|
||||
except Exception as e:
|
||||
return f"Unexpected error: {str(e)}"
|
||||
return "No search performed"
|
||||
|
||||
def execution_failure_check(self, output: str) -> bool:
|
||||
return output.startswith("Error") or "No results found" in output
|
||||
|
||||
def interpreter_feedback(self, output: str) -> str:
|
||||
if self.execution_failure_check(output):
|
||||
return f"Web search failed: {output}"
|
||||
return f"Web search result:\n{output}"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
search_tool = webSearch(api_key=os.getenv("SERPAPI_KEY"))
|
||||
query = "when did covid start"
|
||||
result = search_tool.execute(query, safety=True)
|
||||
feedback = search_tool.interpreter_feedback(result)
|
||||
print(feedback)
|
@ -6,7 +6,19 @@ import platform
|
||||
|
||||
def pretty_print(text, color = "info"):
|
||||
"""
|
||||
print text with color
|
||||
Print text with color formatting.
|
||||
|
||||
Args:
|
||||
text (str): The text to print
|
||||
color (str, optional): The color to use. Defaults to "info".
|
||||
Valid colors are:
|
||||
- "success": Green
|
||||
- "failure": Red
|
||||
- "status": Light green
|
||||
- "code": Light blue
|
||||
- "warning": Yellow
|
||||
- "output": Cyan
|
||||
- "default": Black (Windows only)
|
||||
"""
|
||||
if platform.system().lower() != "windows":
|
||||
color_map = {
|
||||
@ -35,3 +47,20 @@ def pretty_print(text, color = "info"):
|
||||
if color not in color_map:
|
||||
color = "default"
|
||||
print(colored(text, color_map[color]))
|
||||
|
||||
def timer_decorator(func):
|
||||
"""
|
||||
Decorator to measure the execution time of a function.
|
||||
Usage:
|
||||
@timer_decorator
|
||||
def my_function():
|
||||
# code to execute
|
||||
"""
|
||||
from time import time
|
||||
def wrapper(*args, **kwargs):
|
||||
start_time = time()
|
||||
result = func(*args, **kwargs)
|
||||
end_time = time()
|
||||
print(f"{func.__name__} took {end_time - start_time:.2f} seconds to execute")
|
||||
return result
|
||||
return wrapper
|
Loading…
x
Reference in New Issue
Block a user