mirror of
https://github.com/tcsenpai/agenticSeek.git
synced 2025-06-06 11:05:26 +00:00
Fix : TTS crash + perf improvement + router error fix
This commit is contained in:
parent
39bf625d6b
commit
d2154d5769
@ -21,13 +21,14 @@
|
|||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
### 1️⃣ **Install Dependencies**
|
### 1️⃣ **Install Dependencies**
|
||||||
Make sure you have [Ollama](https://ollama.com/) installed, then run:
|
|
||||||
```sh
|
```sh
|
||||||
pip3 install -r requirements.txt
|
pip3 install -r requirements.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
### 2️⃣ **Download Models**
|
### 2️⃣ **Download Models**
|
||||||
|
|
||||||
|
Make sure you have [Ollama](https://ollama.com/) installed.
|
||||||
|
|
||||||
Download the `deepseek-r1:7b` model from [DeepSeek](https://deepseek.com/models)
|
Download the `deepseek-r1:7b` model from [DeepSeek](https://deepseek.com/models)
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
|
11
config.ini
11
config.ini
@ -1,9 +1,10 @@
|
|||||||
[MAIN]
|
[MAIN]
|
||||||
is_local = True
|
is_local = False
|
||||||
provider_name = ollama
|
provider_name = server
|
||||||
provider_model = deepseek-r1:14b
|
provider_model = deepseek-r1:14b
|
||||||
provider_server_address = 127.0.0.1:11434
|
provider_server_address = 192.168.1.100:5000
|
||||||
agent_name = jarvis
|
agent_name = Eva
|
||||||
recover_last_session = True
|
recover_last_session = True
|
||||||
|
save_session = True
|
||||||
speak = True
|
speak = True
|
||||||
listen = False
|
listen = True
|
2
main.py
2
main.py
@ -49,6 +49,8 @@ def main():
|
|||||||
interaction.get_user()
|
interaction.get_user()
|
||||||
interaction.think()
|
interaction.think()
|
||||||
interaction.show_answer()
|
interaction.show_answer()
|
||||||
|
if config.getboolean('MAIN', 'save_session'):
|
||||||
|
interaction.save_session()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
@ -39,6 +39,10 @@ class Interaction:
|
|||||||
def recover_last_session(self):
|
def recover_last_session(self):
|
||||||
for agent in self.agents:
|
for agent in self.agents:
|
||||||
agent.memory.load_memory()
|
agent.memory.load_memory()
|
||||||
|
|
||||||
|
def save_session(self):
|
||||||
|
for agent in self.agents:
|
||||||
|
agent.memory.save_memory()
|
||||||
|
|
||||||
def is_active(self):
|
def is_active(self):
|
||||||
return self.is_active
|
return self.is_active
|
||||||
@ -78,9 +82,11 @@ class Interaction:
|
|||||||
return query
|
return query
|
||||||
|
|
||||||
def think(self):
|
def think(self):
|
||||||
if self.last_query is None:
|
if self.last_query is None or len(self.last_query) == 0:
|
||||||
return
|
return
|
||||||
agent = self.router.select_agent(self.last_query)
|
agent = self.router.select_agent(self.last_query)
|
||||||
|
if agent is None:
|
||||||
|
return
|
||||||
if self.current_agent != agent:
|
if self.current_agent != agent:
|
||||||
self.current_agent = agent
|
self.current_agent = agent
|
||||||
# get history from previous agent
|
# get history from previous agent
|
||||||
|
@ -55,7 +55,9 @@ class Memory():
|
|||||||
date = filename.split('_')[1]
|
date = filename.split('_')[1]
|
||||||
saved_sessions.append((filename, date))
|
saved_sessions.append((filename, date))
|
||||||
saved_sessions.sort(key=lambda x: x[1], reverse=True)
|
saved_sessions.sort(key=lambda x: x[1], reverse=True)
|
||||||
return saved_sessions[0][0]
|
if len(saved_sessions) > 0:
|
||||||
|
return saved_sessions[0][0]
|
||||||
|
return None
|
||||||
|
|
||||||
def load_memory(self) -> None:
|
def load_memory(self) -> None:
|
||||||
if not os.path.exists(self.conversation_folder):
|
if not os.path.exists(self.conversation_folder):
|
||||||
|
@ -26,7 +26,7 @@ class AgentRouter:
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
def select_agent(self, text: str) -> Agent:
|
def select_agent(self, text: str) -> Agent:
|
||||||
if text is None:
|
if len(self.agents) == 0 or len(self.labels) == 0:
|
||||||
return self.agents[0]
|
return self.agents[0]
|
||||||
result = self.classify_text(text)
|
result = self.classify_text(text)
|
||||||
for agent in self.agents:
|
for agent in self.agents:
|
||||||
|
@ -12,7 +12,7 @@ audio_queue = queue.Queue()
|
|||||||
done = False
|
done = False
|
||||||
|
|
||||||
class AudioRecorder:
|
class AudioRecorder:
|
||||||
def __init__(self, format=pyaudio.paInt16, channels=1, rate=44100, chunk=8192, record_seconds=7, verbose=False):
|
def __init__(self, format=pyaudio.paInt16, channels=1, rate=4096, chunk=8192, record_seconds=7, verbose=False):
|
||||||
self.format = format
|
self.format = format
|
||||||
self.channels = channels
|
self.channels = channels
|
||||||
self.rate = rate
|
self.rate = rate
|
||||||
@ -60,8 +60,8 @@ class AudioRecorder:
|
|||||||
class Transcript:
|
class Transcript:
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.last_read = None
|
self.last_read = None
|
||||||
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
device = self.get_device()
|
||||||
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
torch_dtype = torch.float16 if device == "cuda" else torch.float32
|
||||||
model_id = "distil-whisper/distil-medium.en"
|
model_id = "distil-whisper/distil-medium.en"
|
||||||
|
|
||||||
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
||||||
@ -75,11 +75,26 @@ class Transcript:
|
|||||||
model=model,
|
model=model,
|
||||||
tokenizer=processor.tokenizer,
|
tokenizer=processor.tokenizer,
|
||||||
feature_extractor=processor.feature_extractor,
|
feature_extractor=processor.feature_extractor,
|
||||||
max_new_tokens=128,
|
max_new_tokens=24, # a human say around 20 token in 7s
|
||||||
torch_dtype=torch_dtype,
|
torch_dtype=torch_dtype,
|
||||||
device=device,
|
device=device,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def get_device(self):
|
||||||
|
if torch.backends.mps.is_available():
|
||||||
|
return "mps"
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
return "cuda:0"
|
||||||
|
else:
|
||||||
|
return "cpu"
|
||||||
|
|
||||||
|
def remove_hallucinations(self, text: str):
|
||||||
|
# TODO find a better way to do this
|
||||||
|
common_hallucinations = ['Okay.', 'Thank you.', 'Thank you for watching.', 'You\'re', 'Oh', 'you', 'Oh.', 'Uh', 'Oh,', 'Mh-hmm', 'Hmm.']
|
||||||
|
for hallucination in common_hallucinations:
|
||||||
|
text = text.replace(hallucination, "")
|
||||||
|
return text
|
||||||
|
|
||||||
def transcript_job(self, audio_data: np.ndarray, sample_rate: int = 16000):
|
def transcript_job(self, audio_data: np.ndarray, sample_rate: int = 16000):
|
||||||
if audio_data.dtype != np.float32:
|
if audio_data.dtype != np.float32:
|
||||||
audio_data = audio_data.astype(np.float32) / np.iinfo(audio_data.dtype).max
|
audio_data = audio_data.astype(np.float32) / np.iinfo(audio_data.dtype).max
|
||||||
@ -88,7 +103,7 @@ class Transcript:
|
|||||||
if sample_rate != 16000:
|
if sample_rate != 16000:
|
||||||
audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
|
audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
|
||||||
result = self.pipe(audio_data)
|
result = self.pipe(audio_data)
|
||||||
return result["text"]
|
return self.remove_hallucinations(result["text"])
|
||||||
|
|
||||||
class AudioTranscriber:
|
class AudioTranscriber:
|
||||||
def __init__(self, ai_name: str, verbose=False):
|
def __init__(self, ai_name: str, verbose=False):
|
||||||
@ -103,16 +118,18 @@ class AudioTranscriber:
|
|||||||
'ES': [f"{self.ai_name}"]
|
'ES': [f"{self.ai_name}"]
|
||||||
}
|
}
|
||||||
self.confirmation_words = {
|
self.confirmation_words = {
|
||||||
'EN': ["do it", "go ahead", "execute", "run", "start", "thanks", "would ya", "please", "okay?", "proceed", "continue", "go on", "do that", "do that thing"],
|
'EN': ["do it", "go ahead", "execute", "run", "start", "thanks", "would ya", "please", "okay?", "proceed", "continue", "go on", "do that", "go it", "do you understand?"],
|
||||||
'FR': ["fais-le", "vas-y", "exécute", "lance", "commence", "merci", "tu veux bien", "s'il te plaît", "d'accord ?", "poursuis", "continue", "vas-y", "fais ça", "fais ce truc"],
|
'FR': ["fais-le", "vas-y", "exécute", "lance", "commence", "merci", "tu veux bien", "s'il te plaît", "d'accord ?", "poursuis", "continue", "vas-y", "fais ça", "compris"],
|
||||||
'ZH': ["做吧", "继续", "执行", "运行", "开始", "谢谢", "可以吗", "请", "好吗", "进行", "继续", "往前走", "做那个", "做那件事"],
|
'ZH': ["做吧", "继续", "执行", "运行", "开始", "谢谢", "可以吗", "请", "好吗", "进行", "继续", "往前走", "做那个", "做那件事", "聽得懂"],
|
||||||
'ES': ["hazlo", "adelante", "ejecuta", "corre", "empieza", "gracias", "lo harías", "por favor", "¿vale?", "procede", "continúa", "sigue", "haz eso", "haz esa cosa"]
|
'ES': ["hazlo", "adelante", "ejecuta", "corre", "empieza", "gracias", "lo harías", "por favor", "¿vale?", "procede", "continúa", "sigue", "haz eso", "haz esa cosa"]
|
||||||
}
|
}
|
||||||
self.recorded = ""
|
self.recorded = ""
|
||||||
|
|
||||||
def get_transcript(self):
|
def get_transcript(self):
|
||||||
|
global done
|
||||||
buffer = self.recorded
|
buffer = self.recorded
|
||||||
self.recorded = ""
|
self.recorded = ""
|
||||||
|
done = False
|
||||||
return buffer
|
return buffer
|
||||||
|
|
||||||
def _transcribe(self):
|
def _transcribe(self):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user