feat : better routing system

This commit is contained in:
martin legrand 2025-03-25 15:44:23 +01:00
parent 68bc60f6f4
commit d871c378fe
16 changed files with 8069 additions and 89 deletions

View File

@ -1,2 +1,3 @@
SEARXNG_BASE_URL="http://127.0.0.1:8080"
OPENAI_API_KEY='dont share this, not needed for local providers'
OPENAI_API_KEY='dont share this, not needed for local providers'
TOKENIZERS_PARALLELISM=False

View File

@ -1,6 +1,6 @@
@echo off
set SCRIPTS_DIR=scripts
set LLM_ROUTER_DIR=llm-router
set LLM_ROUTER_DIR=llm_router
if exist "%SCRIPTS_DIR%\windows_install.bat" (
echo Running Windows installation script...

View File

@ -1,7 +1,7 @@
#!/bin/bash
SCRIPTS_DIR="scripts"
LLM_ROUTER_DIR="llm-router"
LLM_ROUTER_DIR="llm_router"
echo "Detecting operating system..."
@ -30,17 +30,6 @@ case "$OS_TYPE" in
exit 1
fi
;;
"MINGW"* | "MSYS"* | "CYGWIN"*)
echo "Detected Windows (via Bash-like environment)"
if [ -f "$SCRIPTS_DIR/windows_install.sh" ]; then
echo "Running Windows installation script..."
bash "$SCRIPTS_DIR/windows_install.sh"
bash "cd $LLM_ROUTER_DIR && dl_safetensors.sh"
else
echo "Error: $SCRIPTS_DIR/windows_install.sh not found!"
exit 1
fi
;;
*)
echo "Unsupported OS detected: $OS_TYPE"
echo "This script supports only Linux and macOS."

33
llm_router/config.json Normal file
View File

@ -0,0 +1,33 @@
{
"config": {
"batch_size": 32,
"device_map": "auto",
"early_stopping_patience": 3,
"epochs": 10,
"ewc_lambda": 100.0,
"gradient_checkpointing": false,
"learning_rate": 0.0005,
"max_examples_per_class": 500,
"max_length": 512,
"min_confidence": 0.1,
"min_examples_per_class": 3,
"neural_weight": 0.2,
"num_representative_examples": 5,
"prototype_update_frequency": 50,
"prototype_weight": 0.8,
"quantization": null,
"similarity_threshold": 0.7,
"warmup_steps": 0
},
"embedding_dim": 768,
"id_to_label": {
"0": "HIGH",
"1": "LOW"
},
"label_to_id": {
"HIGH": 0,
"LOW": 1
},
"model_name": "distilbert/distilbert-base-cased",
"train_steps": 20
}

33
llm_router/dl_safetensors.sh Executable file
View File

@ -0,0 +1,33 @@
##########
# Dummy script to download the model
# Because dowloading with hugging face does not seem to work, maybe I am doing something wrong?
# AdaptiveClassifier.from_pretrained("adaptive-classifier/llm-router") ----> result in config.json not found
# Therefore, I put all the files in llm_router and download the model file with this script, If you know a better way please raise an issue
#########
#!/bin/bash
# Define the URL and filename
URL="https://huggingface.co/adaptive-classifier/llm_router/resolve/main/model.safetensors"
FILENAME="model.safetensors"
if [ ! -f "$FILENAME" ]; then
echo "Router safetensors file not found, downloading..."
if command -v curl >/dev/null 2>&1; then
curl -L -o "$FILENAME" "$URL"
elif command -v wget >/dev/null 2>&1; then
wget -O "$FILENAME" "$URL"
else
echo "Error: Neither curl nor wget is available. Please install one of them."
exit 1
fi
if [ $? -eq 0 ]; then
echo "Download completed successfully"
else
echo "Download failed"
exit 1
fi
else
echo "File already exists, skipping download"
fi

7746
llm_router/examples.json Normal file

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -2666,4 +2666,4 @@ doi_resolvers:
sci-hub.st: 'https://sci-hub.st/'
sci-hub.ru: 'https://sci-hub.ru/'
default_doi_resolver: 'oadoi.org'
default_doi_resolver: 'oadoi.org'

View File

@ -49,4 +49,4 @@ die-on-term
# uwsgi serves the static files
static-map = /static=/usr/local/searxng/searx/static
static-gzip-all = True
offload-threads = 4
offload-threads = 4

View File

@ -18,10 +18,10 @@ class BrowserAgent(Agent):
"web_search": searxSearch(),
}
self.role = {
"en": "Web search and navigation",
"fr": "Recherche et navigation web",
"zh": "网络搜索和导航",
"es": "Búsqueda y navegación web"
"en": "web",
"fr": "web",
"zh": "网络",
"es": "web"
}
self.type = "browser_agent"
self.browser = Browser()

View File

@ -19,7 +19,7 @@ class CasualAgent(Agent):
"bash": BashInterpreter()
}
self.role = {
"en": "talking",
"en": "talk",
"fr": "discuter",
"zh": "聊天",
"es": "discutir"

View File

@ -21,10 +21,10 @@ class CoderAgent(Agent):
"file_finder": FileFinder()
}
self.role = {
"en": "Coding task",
"fr": "Tâche de codage",
"zh": "编码任务",
"es": "Tarea de codificación"
"en": "code",
"fr": "codage",
"zh": "编码",
"es": "codificación",
}
self.type = "code_agent"

View File

@ -15,10 +15,10 @@ class FileAgent(Agent):
"bash": BashInterpreter()
}
self.role = {
"en": "find and read files",
"fr": "trouver et lire des fichiers",
"zh": "查找和读取文件",
"es": "encontrar y leer archivos"
"en": "files",
"fr": "fichiers",
"zh": "文件",
"es": "archivos",
}
self.type = "file_agent"

View File

@ -19,6 +19,8 @@ import sys
import re
from urllib.parse import urlparse
from sources.utility import pretty_print
class Browser:
def __init__(self, headless=False, anticaptcha_install=False):
"""Initialize the browser with optional headless mode."""
@ -368,7 +370,7 @@ class Browser:
self.driver.quit()
self.logger.info("Browser closed")
except Exception as e:
raise e
self.logger.error(f"Error closing browser: {str(e)}")
def __del__(self):
"""Destructor to ensure browser is closed."""

View File

@ -110,6 +110,6 @@ class Interaction:
if self.last_query is None:
return
self.current_agent.show_answer()
if self.tts_enabled:
if self.tts_enabled and self.last_answer:
self.speech.speak(self.last_answer)

View File

@ -2,13 +2,16 @@ import os
import sys
import torch
from transformers import pipeline
# adaptive-classifier==0.0.10
from adaptive_classifier import AdaptiveClassifier
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from sources.agents.agent import Agent
from sources.agents.code_agent import CoderAgent
from sources.agents.casual_agent import CasualAgent
from sources.agents.planner_agent import PlannerAgent
from sources.agents.planner_agent import FileAgent
from sources.agents.browser_agent import BrowserAgent
from sources.language import LanguageUtility
from sources.utility import pretty_print
@ -17,16 +20,30 @@ class AgentRouter:
"""
AgentRouter is a class that selects the appropriate agent based on the user query.
"""
# TODO add adaptive-classifier==0.0.10 to requirements.txt
def __init__(self, agents: list):
self.model = model_name
self.agents = agents
self.lang_analysis = LanguageUtility()
self.pipelines = {
"fr": pipeline("zero-shot-classification", model="facebook/bart-large-mnli"),
"zh": pipeline("zero-shot-classification", model="morit/chinese_xlm_xnli"),
"es": pipeline("zero-shot-classification", model="facebook/bart-large-mnli"),
"en": pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
"bart": pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
}
self.agents = agents
self.classifier = self.load_llm_router()
self.learn_few_shots_en()
def load_llm_router(self) -> AdaptiveClassifier:
"""
Load the LLM router model.
returns:
AdaptiveClassifier: The loaded model
exceptions:
Exception: If the safetensors fails to load
"""
path = "../llm_router" if __name__ == "__main__" else "./llm_router"
try:
classifier = AdaptiveClassifier.from_pretrained(path)
except Exception as e:
raise Exception("Failed to load the routing model. Please run the dl_safetensors.sh script inside llm_router/ directory to download the model.")
return classifier
def get_device(self) -> str:
if torch.backends.mps.is_available():
@ -36,37 +53,184 @@ class AgentRouter:
else:
return "cpu"
def learn_few_shots_en(self) -> None:
"""
Few shot learning for the LLM router.
Use the build in add_examples method of the AdaptiveClassifier.
"""
few_shots = [
("Write a python script to check if the device on my network is connected to the internet", "coding"),
("Hey could you search the web for the latest news on the tesla stock market ?", "web"),
("I would like you to search for weather api", "web"),
("Plan a 3-day trip to New York, including flights and hotels.", "web"),
("Find on the web the latest research papers on AI.", "web"),
("Can you debug this Java code? Its not working.", "code"),
("Can you browse the web and find me a 4090 for cheap?", "web"),
("i would like to setup a new AI project, index as mark2", "files"),
("Hey, can you find the old_project.zip file somewhere on my drive?", "files"),
("Tell me a funny story", "talk"),
("Can you locate the backup folder I created last month on my system?", "files"),
("Share a random fun fact about space.", "talk"),
("Write a script to rename all files in a directory to lowercase.", "files"),
("Could you check if the presentation.pdf file exists in my downloads?", "files"),
("Tell me about the weirdest dream youve ever heard of.", "talk"),
("Search my drive for a file called vacation_photos_2023.jpg.", "files"),
("Help me organize my desktop files into folders by type.", "files"),
("Whats your favorite movie and why?", "talk"),
("Search my drive for a file named budget_2024.xlsx", "files"),
("Write a Python function to sort a list of dictionaries by key", "code"),
("Find the latest updates on quantum computing on the web", "web"),
("Check if the folder Work_Projects exists on my desktop", "files"),
("Create a bash script to monitor CPU usage", "code"),
("Search online for the best budget smartphones of 2025", "web"),
("Whats the strangest food combination youve heard of?", "talk"),
("Move all .txt files from Downloads to a new folder called Notes", "files"),
("Debug this C++ code that keeps crashing", "code"),
("can you browse the web to find out who fosowl is ?", "web"),
("Find the file important_notes.txt", "files"),
("Find out the latest news on the upcoming Mars mission", "web"),
("Write a Java program to calculate the area of a circle", "code"),
("Search the web for the best ways to learn a new language", "web"),
("Locate the file presentation.pptx in my Documents folder", "files"),
("Write a Python script to download all images from a webpage", "code"),
("Search the web for the latest trends in AI and machine learning", "web"),
("Tell me about a time when you had to solve a difficult problem", "talk"),
("Organize all image files on my desktop into a folder called Pictures", "files"),
("Generate a Ruby script to calculate Fibonacci numbers up to 100", "code"),
("Find out what device are connected to my network", "code"),
("Show me how much disk space is left on my drive", "code"),
("Look up recent posts on X about climate change", "web"),
("Find the photo I took last week named sunset_beach.jpg", "files"),
("Write a JavaScript snippet to fetch data from an API", "code"),
("Search the web for tutorials on machine learning with Python", "web"),
("Locate the file meeting_notes.docx in my Documents folder", "files"),
("Write a Python script to scrape a websites title and links", "code"),
("Search the web for the latest breakthroughs in fusion energy", "web"),
("Tell me about a historical event that sounds too wild to be true", "talk"),
("Organize all image files on my desktop into a folder called Pictures", "files"),
("Generate a Ruby script to calculate Fibonacci numbers up to 100", "code"),
("Find recent X posts about SpaceXs next rocket launch", "web"),
("Whats the funniest misunderstanding youve seen between humans and AI?", "talk"),
("Check if backup_032025.zip exists anywhere on my drive", "files" ),
("Create a shell script to automate backups of a directory", "code"),
("Look up the top AI conferences happening in 2025 online", "web"),
("Write a C# program to simulate a basic calculator", "code"),
("Browse the web for open-source alternatives to Photoshop", "web"),
("Hey how are you", "talk"),
("Write a Python script to ping a website", "code"),
("Search the web for the latest iPhone release", "web"),
("Whats the weather like today?", "web"),
("Hi, hows your day going?", "talk"),
("Can you find a file called resume.docx on my drive?", "files"),
("Write a simple Java program to print 'Hello World'", "code"),
("Tell me a quick joke", "talk"),
("Search online for the best coffee shops in Seattle", "web"),
("Check if project_plan.pdf exists in my Downloads folder", "files"),
("Whats your favorite color?", "talk"),
("Write a bash script to list all files in a directory", "code"),
("Find recent X posts about electric cars", "web"),
("Hey, you doing okay?", "talk"),
("Locate the file family_photo.jpg on my system", "files"),
("Search the web for beginner guitar lessons", "web"),
("Write a Python function to reverse a string", "code"),
("Whats the weirdest animal you know of?", "talk"),
("Organize all .pdf files on my desktop into a Documents folder", "files"),
("Browse the web for the latest space mission updates", "web"),
("Hey, whats up with you today?", "talk"),
("Write a JavaScript function to add two numbers", "code"),
("Find the file notes.txt in my Documents folder", "files"),
("Tell me something random about the ocean", "talk"),
("Search the web for cheap flights to Paris", "web"),
("Check if budget.xlsx is on my drive", "files"),
("Write a Python script to count words in a text file", "code"),
("Hows it going today?", "talk"),
("Find recent X posts about AI advancements", "web"),
("Move all .jpg files from Downloads to a Photos folder", "files"),
("Search online for the best laptops of 2025", "web"),
("Whats the funniest thing youve heard lately?", "talk"),
("Write a Ruby script to generate random numbers", "code"),
("Hey, hows everything with you?", "talk"),
("Locate meeting_agenda.docx in my system", "files"),
("Search the web for tips on growing indoor plants", "web"),
("Write a C++ program to calculate the sum of an array", "code"),
("Tell me a fun fact about dogs", "talk"),
("Check if the folder Old_Projects exists on my desktop", "files"),
("Browse the web for the latest gaming console reviews", "web"),
("Hi, how are you feeling today?", "talk"),
("Write a Python script to check disk space", "code"),
("Find the file vacation_itinerary.pdf on my drive", "files"),
("Search the web for news on renewable energy", "web"),
("Whats the strangest thing youve learned recently?", "talk"),
("Organize all video files into a Videos folder", "files"),
("Write a shell script to delete temporary files", "code"),
("Hey, hows your week been so far?", "talk"),
("Search online for the top movies of 2025", "web"),
("Locate taxes_2024.xlsx in my Documents folder", "files"),
("Tell me about a cool invention from history", "talk"),
("Write a Java program to check if a number is even or odd", "code"),
("Find recent X posts about cryptocurrency trends", "web"),
("Hey, you good today?", "talk"),
("Search the web for easy dinner recipes", "web"),
("Check if photo_backup.zip exists on my drive", "files"),
("Write a Python script to rename files with a timestamp", "code"),
("Whats your favorite thing about space?", "talk"),
("Browse the web for the latest fitness trends", "web"),
("Move all .docx files to a Work folder", "files"),
]
texts = [text for text, _ in few_shots]
labels = [label for _, label in few_shots]
self.classifier.clear_memory()
self.classifier.add_examples(texts, labels)
def llm_router(self, text: str) -> tuple:
"""
Inference of the LLM router model.
Args:
text: The input text
"""
predictions = self.classifier.predict(text)
predictions = [pred for pred in predictions if pred[0] not in ["HIGH", "LOW"]]
predictions = sorted(predictions, key=lambda x: x[1], reverse=True)
return predictions[0]
def router_vote(self, text: str, labels: list) -> str:
"""
Vote between the LLM router and BART model.
Args:
text: The input text
labels: The labels to classify
Returns:
str: The selected label
"""
result_bart = self.pipelines['bart'](text, labels, threshold=0.3)
result_llm_router = self.llm_router(text)
bart, confidence_bart = result_bart['labels'][0], result_bart['scores'][0]
llm_router, confidence_llm_router = result_llm_router[0], result_llm_router[1]
confidence_bart *= 0.8 # was always a bit too confident
print("BART:", bart, "LLM Router:", llm_router)
print("Confidence BART:", confidence_bart, "Confidence LLM Router:", confidence_llm_router)
if confidence_bart > confidence_llm_router:
return bart
else:
return llm_router
def classify_text(self, text: str, threshold: float = 0.4) -> list:
"""
Classify the text into labels (agent roles).
Args:
text (str): The text to classify
threshold (float, optional): The threshold for the classification.
Returns:
list: The list of agents and their scores
Classify the text using the LLM router and BART model.
"""
first_sentence = None
lang = "en"
for line in text.split("\n"):
first_sentence = line.strip()
break
if first_sentence is None:
first_sentence = text
try:
lang = lang_analysis.detect_language(first_sentence)
assert lang in ["en", "fr", "zh", "es"]
labels = [agent.role[lang] for agent in agents]
if lang == "en":
result = self.pipelines['en'](first_sentence, labels, threshold=threshold)
elif lang == "fr":
result = self.pipelines['fr'](first_sentence, labels, threshold=threshold)
elif lang == "zh":
result = self.pipelines['zh'](first_sentence, labels, threshold=threshold)
elif lang == "es":
result = self.pipelines['es'](first_sentence, labels, threshold=threshold)
else:
result = None
lang = self.lang_analysis.detect_language(first_sentence)
labels = [agent.role[lang] for agent in self.agents]
result = self.router_vote(first_sentence, labels)
except Exception as e:
return None, lang
raise e
return result, lang
def select_agent(self, text: str) -> Agent:
@ -81,56 +245,68 @@ class AgentRouter:
return self.agents[0]
result, lang = self.classify_text(text)
for agent in self.agents:
if result["labels"][0] == agent.role[lang]:
if result == agent.role[lang]:
pretty_print(f"Selected agent: {agent.agent_name} (roles: {agent.role[lang]})", color="warning")
return agent
pretty_print(f"Error choosing agent.", color="error")
pretty_print(f"Error choosing agent. Routing system is not multilingual yet.", color="failure")
pretty_print(f"选择代理时出错。路由系统尚不支持多语言", color="failure")
pretty_print(f"エージェントの選択エラー。ルーティングシステムはまだ多言語に対応していません", color="failure")
pretty_print(f"Erreur lors du choix de l'agent. Le système de routage n'est pas encore multilingue.", color="failure")
pretty_print(f"Error al elegir agente. El sistema de enrutamiento aún no es multilingüe.", color="failure")
return None
if __name__ == "__main__":
agents = [
CasualAgent("deepseek-r1:14b", "jarvis", "../prompts/casual_agent.txt", None),
BrowserAgent("deepseek-r1:14b", "browser", "../prompts/planner_agent.txt", None),
CoderAgent("deepseek-r1:14b", "coder", "../prompts/coder_agent.txt", None)
CasualAgent("jarvis", "../prompts/casual_agent.txt", None),
BrowserAgent("browser", "../prompts/planner_agent.txt", None),
CoderAgent("coder", "../prompts/coder_agent.txt", None),
FileAgent("file", "../prompts/coder_agent.txt", None)
]
router = AgentRouter(agents)
texts = [
"hi",
#"你好",
#"Bonjour",
"Write a python script to check if the device on my network is connected to the internet",
#"Peut tu écrire un script python qui vérifie si l'appareil sur mon réseau est connecté à internet?",
#"写一个Python脚本检查我网络上的设备是否连接到互联网",
# "Peut tu écrire un script python qui vérifie si l'appareil sur mon réseau est connecté à internet?",
# "写一个Python脚本检查我网络上的设备是否连接到互联网",
"Hey could you search the web for the latest news on the tesla stock market ?",
#"嘿,你能搜索网页上关于股票市场的最新新闻吗?",
#"Yo, cherche sur internet comment va tesla en bourse.",
# "嘿,你能搜索网页上关于股票市场的最新新闻吗?",
# "Yo, cherche sur internet comment va tesla en bourse.",
"I would like you to search for weather api and then make an app using this API",
#"我想让你搜索天气API然后用这个API做一个应用程序",
#"J'aimerais que tu cherche une api météo et que l'utilise pour faire une application",
# "我想让你搜索天气API然后用这个API做一个应用程序",
# "J'aimerais que tu cherche une api météo et que l'utilise pour faire une application",
"Plan a 3-day trip to New York, including flights and hotels.",
#"计划一次为期3天的纽约之旅包括机票和酒店。",
#"Planifie un trip de 3 jours à Paris, y compris les vols et hotels.",
"Find me the latest research papers on AI.",
#"给我找最新的AI研究论文。",
#"Trouve moi les derniers papiers de recherche en IA",
# "计划一次为期3天的纽约之旅包括机票和酒店。",
# "Planifie un trip de 3 jours à Paris, y compris les vols et hotels.",
"Find on the web the latest research papers on AI.",
# "在网上找到最新的人工智能研究论文。",
# "Trouve moi les derniers articles de recherche sur l'IA sur internet",
"Help me write a C++ program to sort an array",
#"帮我写一个C++程序来排序数组",
#"Aide moi à faire un programme c++ pour trier une array.",
"Tell me what France been up to lately",
# "告诉我法国最近在做什么",
# "Dis moi ce que la France a fait récemment",
"Who is Sergio Pesto ?",
# "谁是Sergio Pesto",
# "Qui est Sergio Pesto ?",
# "帮我写一个C++程序来排序数组",
# "Aide moi à faire un programme c++ pour trier une array.",
"Whats the weather like today? Oh, and can you find a good weather app?",
#"今天天气怎么样?哦,你还能找到一个好的天气应用程序吗?",
#"La météo est comment aujourd'hui ? oh et trouve moi une bonne appli météo tant que tu y est.",
# "今天天气怎么样?哦,你还能找到一个好的天气应用程序吗?",
# "La météo est comment aujourd'hui ? oh et trouve moi une bonne appli météo tant que tu y est.",
"Can you debug this Java code? Its not working.",
#"你能调试这段Java代码吗它不起作用。",
#"Peut tu m'aider à debugger ce code java, ça marche pas",
"What's the latest brainrot on the internet ?",
#"互联网上最新的“脑残”是什么?",
#"Quel est la dernière connerie sur internet ?",
"i would like to setup a new AI project, index as mark2",
#"我想建立一个新的 AI 项目,索引为 Mark2",
#"Je voudrais configurer un nouveau projet d'IA, index Mark2",
"Hey, can you find the old_project.zip file somewhere on my drive?",
# "你能调试这段Java代码吗它不起作用。",
# "Peut tu m'aider à debugger ce code java, ça marche pas",
#"Can you browse the web and find me a 4090 for cheap?",
#"你能浏览网页为我找一个便宜的4090吗",
#"Peut tu chercher sur internet et me trouver une 4090 pas cher ?",
#"Hey, can you find the old_project.zip file somewhere on my drive?",
#"嘿你能在我驱动器上找到old_project.zip文件吗",
#"Hé trouve moi le old_project.zip, il est quelque part sur mon disque.",
"Tell me a funny story",
#"给我讲一个有趣的故事",
#"Raconte moi une histoire drole"
"给我讲一个有趣的故事",
"Raconte moi une histoire drole"
]
for text in texts:
print("Input text:", text)