feat : support any language set in config

This commit is contained in:
martin legrand 2025-04-11 11:13:07 +02:00
parent e66f535dd3
commit 4198e932ca
5 changed files with 22 additions and 16 deletions

View File

@ -10,6 +10,7 @@ speak = False
listen = False listen = False
work_dir = /Users/mlg/Documents/ai_folder work_dir = /Users/mlg/Documents/ai_folder
jarvis_personality = False jarvis_personality = False
languages = en zh fr
[BROWSER] [BROWSER]
headless_browser = False headless_browser = False
stealth_mode = False stealth_mode = True

View File

@ -18,17 +18,19 @@ config.read('config.ini')
def main(): def main():
pretty_print("Initializing...", color="status") pretty_print("Initializing...", color="status")
stealth_mode = config.getboolean('BROWSER', 'stealth_mode')
personality_folder = "jarvis" if config.getboolean('MAIN', 'jarvis_personality') else "base"
languages = config["MAIN"]["languages"].split(' ')
provider = Provider(provider_name=config["MAIN"]["provider_name"], provider = Provider(provider_name=config["MAIN"]["provider_name"],
model=config["MAIN"]["provider_model"], model=config["MAIN"]["provider_model"],
server_address=config["MAIN"]["provider_server_address"], server_address=config["MAIN"]["provider_server_address"],
is_local=config.getboolean('MAIN', 'is_local')) is_local=config.getboolean('MAIN', 'is_local'))
stealth_mode = config.getboolean('BROWSER', 'stealth_mode')
browser = Browser( browser = Browser(
create_driver(headless=config.getboolean('BROWSER', 'headless_browser'), stealth_mode=stealth_mode), create_driver(headless=config.getboolean('BROWSER', 'headless_browser'), stealth_mode=stealth_mode),
anticaptcha_manual_install=stealth_mode anticaptcha_manual_install=stealth_mode
) )
personality_folder = "jarvis" if config.getboolean('MAIN', 'jarvis_personality') else "base"
agents = [ agents = [
CasualAgent(name=config["MAIN"]["agent_name"], CasualAgent(name=config["MAIN"]["agent_name"],
@ -52,6 +54,7 @@ def main():
tts_enabled=config.getboolean('MAIN', 'speak'), tts_enabled=config.getboolean('MAIN', 'speak'),
stt_enabled=config.getboolean('MAIN', 'listen'), stt_enabled=config.getboolean('MAIN', 'listen'),
recover_last_session=config.getboolean('MAIN', 'recover_last_session'), recover_last_session=config.getboolean('MAIN', 'recover_last_session'),
langs=languages
) )
try: try:
while interaction.is_active: while interaction.is_active:

View File

@ -15,6 +15,7 @@ class Interaction:
tts_enabled: bool = True, tts_enabled: bool = True,
stt_enabled: bool = True, stt_enabled: bool = True,
recover_last_session: bool = False, recover_last_session: bool = False,
langs: List[str] = ["en", "zh"]
): ):
self.is_active = True self.is_active = True
self.current_agent = None self.current_agent = None
@ -25,7 +26,7 @@ class Interaction:
self.tts_enabled = tts_enabled self.tts_enabled = tts_enabled
self.stt_enabled = stt_enabled self.stt_enabled = stt_enabled
self.recover_last_session = recover_last_session self.recover_last_session = recover_last_session
self.router = AgentRouter(self.agents) self.router = AgentRouter(self.agents, supported_language=langs)
if tts_enabled: if tts_enabled:
animate_thinking("Initializing text-to-speech...", color="status") animate_thinking("Initializing text-to-speech...", color="status")
self.speech = Speech(enable=tts_enabled) self.speech = Speech(enable=tts_enabled)

View File

@ -10,11 +10,17 @@ from sources.logger import Logger
class LanguageUtility: class LanguageUtility:
"""LanguageUtility for language, or emotion identification""" """LanguageUtility for language, or emotion identification"""
def __init__(self): def __init__(self, supported_language: List[str] = ["en", "fr", "zh"]):
"""
Initialize the LanguageUtility class
args:
supported_language: list of languages for translation, determine which Helsinki-NLP model to load
"""
self.sid = None self.sid = None
self.translators_tokenizer = None self.translators_tokenizer = None
self.translators_model = None self.translators_model = None
self.logger = Logger("language.log") self.logger = Logger("language.log")
self.supported_language = supported_language
self.load_model() self.load_model()
def load_model(self) -> None: def load_model(self) -> None:
@ -24,23 +30,18 @@ class LanguageUtility:
except LookupError: except LookupError:
nltk.download('vader_lexicon') nltk.download('vader_lexicon')
self.sid = SentimentIntensityAnalyzer() self.sid = SentimentIntensityAnalyzer()
self.translators_tokenizer = { self.translators_tokenizer = {lang: MarianTokenizer.from_pretrained(f"Helsinki-NLP/opus-mt-{lang}-en") for lang in self.supported_language if lang != "en"}
"fr": MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-fr-en"), self.translators_model = {lang: MarianMTModel.from_pretrained(f"Helsinki-NLP/opus-mt-{lang}-en") for lang in self.supported_language if lang != "en"}
"zh": MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
}
self.translators_model = {
"fr": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-fr-en"),
"zh": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
}
def detect_language(self, text: str) -> str: def detect_language(self, text: str) -> str:
""" """
Detect the language of the given text using langdetect Detect the language of the given text using langdetect
Limited to the supported languages list because of the model tendency to mistake similar languages
Args: Args:
text: string to analyze text: string to analyze
Returns: ISO639-1 language code Returns: ISO639-1 language code
""" """
langid.set_languages(['fr', 'en', 'zh']) langid.set_languages(self.supported_language)
lang, score = langid.classify(text) lang, score = langid.classify(text)
self.logger.info(f"Identified: {text} as {lang} with conf {score}") self.logger.info(f"Identified: {text} as {lang} with conf {score}")
return lang return lang

View File

@ -20,10 +20,10 @@ class AgentRouter:
""" """
AgentRouter is a class that selects the appropriate agent based on the user query. AgentRouter is a class that selects the appropriate agent based on the user query.
""" """
def __init__(self, agents: list): def __init__(self, agents: list, supported_language: List[str] = ["en", "fr", "zh"]):
self.agents = agents self.agents = agents
self.logger = Logger("router.log") self.logger = Logger("router.log")
self.lang_analysis = LanguageUtility() self.lang_analysis = LanguageUtility(supported_language=supported_language)
self.pipelines = self.load_pipelines() self.pipelines = self.load_pipelines()
self.talk_classifier = self.load_llm_router() self.talk_classifier = self.load_llm_router()
self.complexity_classifier = self.load_llm_router() self.complexity_classifier = self.load_llm_router()