feat : support any language set in config

This commit is contained in:
martin legrand 2025-04-11 11:13:07 +02:00
parent e66f535dd3
commit 4198e932ca
5 changed files with 22 additions and 16 deletions

View File

@ -10,6 +10,7 @@ speak = False
listen = False
work_dir = /Users/mlg/Documents/ai_folder
jarvis_personality = False
languages = en zh fr
[BROWSER]
headless_browser = False
stealth_mode = False
stealth_mode = True

View File

@ -18,17 +18,19 @@ config.read('config.ini')
def main():
pretty_print("Initializing...", color="status")
stealth_mode = config.getboolean('BROWSER', 'stealth_mode')
personality_folder = "jarvis" if config.getboolean('MAIN', 'jarvis_personality') else "base"
languages = config["MAIN"]["languages"].split(' ')
provider = Provider(provider_name=config["MAIN"]["provider_name"],
model=config["MAIN"]["provider_model"],
server_address=config["MAIN"]["provider_server_address"],
is_local=config.getboolean('MAIN', 'is_local'))
stealth_mode = config.getboolean('BROWSER', 'stealth_mode')
browser = Browser(
create_driver(headless=config.getboolean('BROWSER', 'headless_browser'), stealth_mode=stealth_mode),
anticaptcha_manual_install=stealth_mode
)
personality_folder = "jarvis" if config.getboolean('MAIN', 'jarvis_personality') else "base"
agents = [
CasualAgent(name=config["MAIN"]["agent_name"],
@ -52,6 +54,7 @@ def main():
tts_enabled=config.getboolean('MAIN', 'speak'),
stt_enabled=config.getboolean('MAIN', 'listen'),
recover_last_session=config.getboolean('MAIN', 'recover_last_session'),
langs=languages
)
try:
while interaction.is_active:

View File

@ -15,6 +15,7 @@ class Interaction:
tts_enabled: bool = True,
stt_enabled: bool = True,
recover_last_session: bool = False,
langs: List[str] = ["en", "zh"]
):
self.is_active = True
self.current_agent = None
@ -25,7 +26,7 @@ class Interaction:
self.tts_enabled = tts_enabled
self.stt_enabled = stt_enabled
self.recover_last_session = recover_last_session
self.router = AgentRouter(self.agents)
self.router = AgentRouter(self.agents, supported_language=langs)
if tts_enabled:
animate_thinking("Initializing text-to-speech...", color="status")
self.speech = Speech(enable=tts_enabled)

View File

@ -10,11 +10,17 @@ from sources.logger import Logger
class LanguageUtility:
"""LanguageUtility for language, or emotion identification"""
def __init__(self):
def __init__(self, supported_language: List[str] = ["en", "fr", "zh"]):
"""
Initialize the LanguageUtility class
args:
supported_language: list of languages for translation, determine which Helsinki-NLP model to load
"""
self.sid = None
self.translators_tokenizer = None
self.translators_model = None
self.logger = Logger("language.log")
self.supported_language = supported_language
self.load_model()
def load_model(self) -> None:
@ -24,23 +30,18 @@ class LanguageUtility:
except LookupError:
nltk.download('vader_lexicon')
self.sid = SentimentIntensityAnalyzer()
self.translators_tokenizer = {
"fr": MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-fr-en"),
"zh": MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
}
self.translators_model = {
"fr": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-fr-en"),
"zh": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
}
self.translators_tokenizer = {lang: MarianTokenizer.from_pretrained(f"Helsinki-NLP/opus-mt-{lang}-en") for lang in self.supported_language if lang != "en"}
self.translators_model = {lang: MarianMTModel.from_pretrained(f"Helsinki-NLP/opus-mt-{lang}-en") for lang in self.supported_language if lang != "en"}
def detect_language(self, text: str) -> str:
"""
Detect the language of the given text using langdetect
Limited to the supported languages list because of the model tendency to mistake similar languages
Args:
text: string to analyze
Returns: ISO639-1 language code
"""
langid.set_languages(['fr', 'en', 'zh'])
langid.set_languages(self.supported_language)
lang, score = langid.classify(text)
self.logger.info(f"Identified: {text} as {lang} with conf {score}")
return lang

View File

@ -20,10 +20,10 @@ class AgentRouter:
"""
AgentRouter is a class that selects the appropriate agent based on the user query.
"""
def __init__(self, agents: list):
def __init__(self, agents: list, supported_language: List[str] = ["en", "fr", "zh"]):
self.agents = agents
self.logger = Logger("router.log")
self.lang_analysis = LanguageUtility()
self.lang_analysis = LanguageUtility(supported_language=supported_language)
self.pipelines = self.load_pipelines()
self.talk_classifier = self.load_llm_router()
self.complexity_classifier = self.load_llm_router()