From ed76c8415b4acc9db6bf9c0db5d4dc8fa9c4ca55 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Fri, 2 May 2025 14:10:16 +0200 Subject: [PATCH] feat : fileFinder read pdf, browser better chromedriver install --- sources/browser.py | 30 ++++++++++++++++------ sources/text_to_speech.py | 1 + sources/tools/BashInterpreter.py | 13 +++++----- sources/tools/C_Interpreter.py | 10 ++++---- sources/tools/GoInterpreter.py | 10 ++++---- sources/tools/JavaInterpreter.py | 10 ++++---- sources/tools/PyInterpreter.py | 8 +++--- sources/tools/fileFinder.py | 44 +++++++++++++++++++++++++++----- sources/tools/flightSearch.py | 10 ++++---- sources/tools/mcpFinder.py | 11 ++++---- sources/tools/searxSearch.py | 8 +++--- sources/tools/tools.py | 4 +++ sources/tools/webSearch.py | 10 ++------ 13 files changed, 106 insertions(+), 63 deletions(-) diff --git a/sources/browser.py b/sources/browser.py index 0351cb9..a5dc67f 100644 --- a/sources/browser.py +++ b/sources/browser.py @@ -65,6 +65,25 @@ def get_random_user_agent() -> str: ] return random.choice(user_agents) +def install_chromedriver() -> str: + """ + Install the ChromeDriver if not already installed. Return the path. + """ + chromedriver_path = shutil.which("chromedriver") + if not chromedriver_path: + try: + chromedriver_path = chromedriver_autoinstaller.install() + except Exception as e: + raise FileNotFoundError( + "ChromeDriver not found and could not be installed automatically. " + "Please install it manually from https://chromedriver.chromium.org/downloads." + "and ensure it's in your PATH or specify the path directly." + "See know issues in readme if your chrome version is above 115." + ) from e + if not chromedriver_path: + raise FileNotFoundError("ChromeDriver not found. Please install it or add it to your PATH.") + return chromedriver_path + def create_driver(headless=False, stealth_mode=True, crx_path="./crx/nopecha.crx") -> webdriver.Chrome: """Create a Chrome WebDriver with specified options.""" chrome_options = Options() @@ -97,14 +116,9 @@ def create_driver(headless=False, stealth_mode=True, crx_path="./crx/nopecha.crx pretty_print(f"Anti-captcha CRX not found at {crx_path}.", color="failure") else: chrome_options.add_extension(crx_path) - - chromedriver_path = shutil.which("chromedriver") - if not chromedriver_path: - chromedriver_path = chromedriver_autoinstaller.install() - - if not chromedriver_path: - raise FileNotFoundError("ChromeDriver not found. Please install it or add it to your PATH.") - + + chromedriver_path = install_chromedriver() + service = Service(chromedriver_path) if stealth_mode: chrome_options.add_argument("--disable-blink-features=AutomationControlled") diff --git a/sources/text_to_speech.py b/sources/text_to_speech.py index e881f50..e50d742 100644 --- a/sources/text_to_speech.py +++ b/sources/text_to_speech.py @@ -140,6 +140,7 @@ class Speech(): return sentence if __name__ == "__main__": + # TODO add info message for cn2an, jieba chinese related import sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) speech = Speech() tosay_en = """ diff --git a/sources/tools/BashInterpreter.py b/sources/tools/BashInterpreter.py index cbc644a..e6ae113 100644 --- a/sources/tools/BashInterpreter.py +++ b/sources/tools/BashInterpreter.py @@ -1,15 +1,14 @@ -import sys +import os, sys import re from io import StringIO import subprocess -if __name__ == "__main__": - from tools import Tools - from safety import is_unsafe -else: - from sources.tools.tools import Tools - from sources.tools.safety import is_unsafe +if __name__ == "__main__": # if running as a script for individual testing + sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + +from sources.tools.tools import Tools +from sources.tools.safety import is_unsafe class BashInterpreter(Tools): """ diff --git a/sources/tools/C_Interpreter.py b/sources/tools/C_Interpreter.py index 45c501f..2c0cbf9 100644 --- a/sources/tools/C_Interpreter.py +++ b/sources/tools/C_Interpreter.py @@ -1,12 +1,12 @@ import subprocess -import os +import os, sys import tempfile import re -if __name__ == "__main__": - from tools import Tools -else: - from sources.tools.tools import Tools +if __name__ == "__main__": # if running as a script for individual testing + sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + +from sources.tools.tools import Tools class CInterpreter(Tools): """ diff --git a/sources/tools/GoInterpreter.py b/sources/tools/GoInterpreter.py index 2f6053b..ab23f76 100644 --- a/sources/tools/GoInterpreter.py +++ b/sources/tools/GoInterpreter.py @@ -1,12 +1,12 @@ import subprocess -import os +import os, sys import tempfile import re -if __name__ == "__main__": - from tools import Tools -else: - from sources.tools.tools import Tools +if __name__ == "__main__": # if running as a script for individual testing + sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + +from sources.tools.tools import Tools class GoInterpreter(Tools): """ diff --git a/sources/tools/JavaInterpreter.py b/sources/tools/JavaInterpreter.py index a6bbc0a..4658c13 100644 --- a/sources/tools/JavaInterpreter.py +++ b/sources/tools/JavaInterpreter.py @@ -1,12 +1,12 @@ import subprocess -import os +import os, sys import tempfile import re -if __name__ == "__main__": - from tools import Tools -else: - from sources.tools.tools import Tools +if __name__ == "__main__": # if running as a script for individual testing + sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + +from sources.tools.tools import Tools class JavaInterpreter(Tools): """ diff --git a/sources/tools/PyInterpreter.py b/sources/tools/PyInterpreter.py index 350e590..2b4f965 100644 --- a/sources/tools/PyInterpreter.py +++ b/sources/tools/PyInterpreter.py @@ -4,10 +4,10 @@ import os import re from io import StringIO -if __name__ == "__main__": - from tools import Tools -else: - from sources.tools.tools import Tools +if __name__ == "__main__": # if running as a script for individual testing + sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + +from sources.tools.tools import Tools class PyInterpreter(Tools): """ diff --git a/sources/tools/fileFinder.py b/sources/tools/fileFinder.py index 90130b7..1977395 100644 --- a/sources/tools/fileFinder.py +++ b/sources/tools/fileFinder.py @@ -1,13 +1,12 @@ -import os +import os, sys import stat import mimetypes import configparser -if __name__ == "__main__": - from tools import Tools -else: - from sources.tools.tools import Tools +if __name__ == "__main__": # if running as a script for individual testing + sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) +from sources.tools.tools import Tools class FileFinder(Tools): """ @@ -30,14 +29,45 @@ class FileFinder(Tools): return file.read() except Exception as e: return f"Error reading file: {e}" + + def read_arbitrary_file(self, file_path: str, file_type: str) -> str: + """ + Reads the content of a file with arbitrary encoding. + Args: + file_path (str): The path to the file to read + Returns: + str: The content of the file in markdown format + """ + content_raw = self.read_file(file_path) + language = ["python", "bash", "javascript", "html", "css", "json"] + if "text" in file_type: + content = content_raw + elif any(lang in file_type for lang in language): + content = content_raw + elif "pdf" in file_type: + from pypdf import PdfReader + reader = PdfReader(file_path) + content = '\n'.join([pt.extract_text() for pt in reader.pages]) + elif "binary" in file_type: + content = content_raw.decode('utf-8', errors='replace') + else: + content = f"Can't read file: ![{file_type}]({file_path})" + return content def get_file_info(self, file_path: str) -> str: + """ + Gets information about a file, including its name, path, type, content, and permissions. + Args: + file_path (str): The path to the file + Returns: + str: A dictionary containing the file information + """ if os.path.exists(file_path): stats = os.stat(file_path) permissions = oct(stat.S_IMODE(stats.st_mode)) file_type, _ = mimetypes.guess_type(file_path) file_type = file_type if file_type else "Unknown" - content = self.read_file(file_path) + content = self.read_arbitrary_file(file_path, file_type) result = { "filename": os.path.basename(file_path), @@ -148,7 +178,7 @@ if __name__ == "__main__": tool = FileFinder() result = tool.execute([""" action=read -name=tools.py +name=cover_letter_martin_legrand.pdf """], False) print("Execution result:") print(result) diff --git a/sources/tools/flightSearch.py b/sources/tools/flightSearch.py index 43246f1..3c6f759 100644 --- a/sources/tools/flightSearch.py +++ b/sources/tools/flightSearch.py @@ -1,13 +1,13 @@ -import os +import os, sys import requests import dotenv dotenv.load_dotenv() -if __name__ == "__main__": - from tools import Tools -else: - from sources.tools.tools import Tools +if __name__ == "__main__": # if running as a script for individual testing + sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + +from sources.tools.tools import Tools class FlightSearch(Tools): def __init__(self, api_key: str = None): diff --git a/sources/tools/mcpFinder.py b/sources/tools/mcpFinder.py index c055405..96e07f5 100644 --- a/sources/tools/mcpFinder.py +++ b/sources/tools/mcpFinder.py @@ -1,12 +1,13 @@ -import os +import os, sys import requests from urllib.parse import urljoin from typing import Dict, Any, Optional -if __name__ == "__main__": - from tools import Tools -else: - from sources.tools.tools import Tools +from sources.tools.tools import Tools + +if __name__ == "__main__": # if running as a script for individual testing + sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + class MCP_finder(Tools): """ diff --git a/sources/tools/searxSearch.py b/sources/tools/searxSearch.py index 83a6203..15a4438 100644 --- a/sources/tools/searxSearch.py +++ b/sources/tools/searxSearch.py @@ -2,10 +2,10 @@ import requests from bs4 import BeautifulSoup import os -if __name__ == "__main__": - from tools import Tools -else: - from sources.tools.tools import Tools +if __name__ == "__main__": # if running as a script for individual testing + sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + +from sources.tools.tools import Tools class searxSearch(Tools): def __init__(self, base_url: str = None): diff --git a/sources/tools/tools.py b/sources/tools/tools.py index e3b5b86..80bf25a 100644 --- a/sources/tools/tools.py +++ b/sources/tools/tools.py @@ -21,6 +21,10 @@ import sys import os import configparser from abc import abstractmethod + +if __name__ == "__main__": # if running as a script for individual testing + sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + from sources.logger import Logger class Tools(): diff --git a/sources/tools/webSearch.py b/sources/tools/webSearch.py index 6cf6e5f..3da083d 100644 --- a/sources/tools/webSearch.py +++ b/sources/tools/webSearch.py @@ -5,14 +5,8 @@ import dotenv dotenv.load_dotenv() -if __name__ == "__main__": - import sys - sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - from utility import animate_thinking, pretty_print - from tools import Tools -else: - from sources.tools.tools import Tools - from sources.utility import animate_thinking, pretty_print +from sources.tools.tools import Tools +from sources.utility import animate_thinking, pretty_print """ WARNING