feat : fileFinder read pdf, browser better chromedriver install

This commit is contained in:
martin legrand 2025-05-02 14:10:16 +02:00
parent 3cf1cab68f
commit ed76c8415b
13 changed files with 106 additions and 63 deletions

View File

@ -65,6 +65,25 @@ def get_random_user_agent() -> str:
] ]
return random.choice(user_agents) return random.choice(user_agents)
def install_chromedriver() -> str:
"""
Install the ChromeDriver if not already installed. Return the path.
"""
chromedriver_path = shutil.which("chromedriver")
if not chromedriver_path:
try:
chromedriver_path = chromedriver_autoinstaller.install()
except Exception as e:
raise FileNotFoundError(
"ChromeDriver not found and could not be installed automatically. "
"Please install it manually from https://chromedriver.chromium.org/downloads."
"and ensure it's in your PATH or specify the path directly."
"See know issues in readme if your chrome version is above 115."
) from e
if not chromedriver_path:
raise FileNotFoundError("ChromeDriver not found. Please install it or add it to your PATH.")
return chromedriver_path
def create_driver(headless=False, stealth_mode=True, crx_path="./crx/nopecha.crx") -> webdriver.Chrome: def create_driver(headless=False, stealth_mode=True, crx_path="./crx/nopecha.crx") -> webdriver.Chrome:
"""Create a Chrome WebDriver with specified options.""" """Create a Chrome WebDriver with specified options."""
chrome_options = Options() chrome_options = Options()
@ -97,14 +116,9 @@ def create_driver(headless=False, stealth_mode=True, crx_path="./crx/nopecha.crx
pretty_print(f"Anti-captcha CRX not found at {crx_path}.", color="failure") pretty_print(f"Anti-captcha CRX not found at {crx_path}.", color="failure")
else: else:
chrome_options.add_extension(crx_path) chrome_options.add_extension(crx_path)
chromedriver_path = shutil.which("chromedriver") chromedriver_path = install_chromedriver()
if not chromedriver_path:
chromedriver_path = chromedriver_autoinstaller.install()
if not chromedriver_path:
raise FileNotFoundError("ChromeDriver not found. Please install it or add it to your PATH.")
service = Service(chromedriver_path) service = Service(chromedriver_path)
if stealth_mode: if stealth_mode:
chrome_options.add_argument("--disable-blink-features=AutomationControlled") chrome_options.add_argument("--disable-blink-features=AutomationControlled")

View File

@ -140,6 +140,7 @@ class Speech():
return sentence return sentence
if __name__ == "__main__": if __name__ == "__main__":
# TODO add info message for cn2an, jieba chinese related import
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
speech = Speech() speech = Speech()
tosay_en = """ tosay_en = """

View File

@ -1,15 +1,14 @@
import sys import os, sys
import re import re
from io import StringIO from io import StringIO
import subprocess import subprocess
if __name__ == "__main__": if __name__ == "__main__": # if running as a script for individual testing
from tools import Tools sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from safety import is_unsafe
else: from sources.tools.tools import Tools
from sources.tools.tools import Tools from sources.tools.safety import is_unsafe
from sources.tools.safety import is_unsafe
class BashInterpreter(Tools): class BashInterpreter(Tools):
""" """

View File

@ -1,12 +1,12 @@
import subprocess import subprocess
import os import os, sys
import tempfile import tempfile
import re import re
if __name__ == "__main__": if __name__ == "__main__": # if running as a script for individual testing
from tools import Tools sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
else:
from sources.tools.tools import Tools from sources.tools.tools import Tools
class CInterpreter(Tools): class CInterpreter(Tools):
""" """

View File

@ -1,12 +1,12 @@
import subprocess import subprocess
import os import os, sys
import tempfile import tempfile
import re import re
if __name__ == "__main__": if __name__ == "__main__": # if running as a script for individual testing
from tools import Tools sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
else:
from sources.tools.tools import Tools from sources.tools.tools import Tools
class GoInterpreter(Tools): class GoInterpreter(Tools):
""" """

View File

@ -1,12 +1,12 @@
import subprocess import subprocess
import os import os, sys
import tempfile import tempfile
import re import re
if __name__ == "__main__": if __name__ == "__main__": # if running as a script for individual testing
from tools import Tools sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
else:
from sources.tools.tools import Tools from sources.tools.tools import Tools
class JavaInterpreter(Tools): class JavaInterpreter(Tools):
""" """

View File

@ -4,10 +4,10 @@ import os
import re import re
from io import StringIO from io import StringIO
if __name__ == "__main__": if __name__ == "__main__": # if running as a script for individual testing
from tools import Tools sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
else:
from sources.tools.tools import Tools from sources.tools.tools import Tools
class PyInterpreter(Tools): class PyInterpreter(Tools):
""" """

View File

@ -1,13 +1,12 @@
import os import os, sys
import stat import stat
import mimetypes import mimetypes
import configparser import configparser
if __name__ == "__main__": if __name__ == "__main__": # if running as a script for individual testing
from tools import Tools sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
else:
from sources.tools.tools import Tools
from sources.tools.tools import Tools
class FileFinder(Tools): class FileFinder(Tools):
""" """
@ -30,14 +29,45 @@ class FileFinder(Tools):
return file.read() return file.read()
except Exception as e: except Exception as e:
return f"Error reading file: {e}" return f"Error reading file: {e}"
def read_arbitrary_file(self, file_path: str, file_type: str) -> str:
"""
Reads the content of a file with arbitrary encoding.
Args:
file_path (str): The path to the file to read
Returns:
str: The content of the file in markdown format
"""
content_raw = self.read_file(file_path)
language = ["python", "bash", "javascript", "html", "css", "json"]
if "text" in file_type:
content = content_raw
elif any(lang in file_type for lang in language):
content = content_raw
elif "pdf" in file_type:
from pypdf import PdfReader
reader = PdfReader(file_path)
content = '\n'.join([pt.extract_text() for pt in reader.pages])
elif "binary" in file_type:
content = content_raw.decode('utf-8', errors='replace')
else:
content = f"Can't read file: ![{file_type}]({file_path})"
return content
def get_file_info(self, file_path: str) -> str: def get_file_info(self, file_path: str) -> str:
"""
Gets information about a file, including its name, path, type, content, and permissions.
Args:
file_path (str): The path to the file
Returns:
str: A dictionary containing the file information
"""
if os.path.exists(file_path): if os.path.exists(file_path):
stats = os.stat(file_path) stats = os.stat(file_path)
permissions = oct(stat.S_IMODE(stats.st_mode)) permissions = oct(stat.S_IMODE(stats.st_mode))
file_type, _ = mimetypes.guess_type(file_path) file_type, _ = mimetypes.guess_type(file_path)
file_type = file_type if file_type else "Unknown" file_type = file_type if file_type else "Unknown"
content = self.read_file(file_path) content = self.read_arbitrary_file(file_path, file_type)
result = { result = {
"filename": os.path.basename(file_path), "filename": os.path.basename(file_path),
@ -148,7 +178,7 @@ if __name__ == "__main__":
tool = FileFinder() tool = FileFinder()
result = tool.execute([""" result = tool.execute(["""
action=read action=read
name=tools.py name=cover_letter_martin_legrand.pdf
"""], False) """], False)
print("Execution result:") print("Execution result:")
print(result) print(result)

View File

@ -1,13 +1,13 @@
import os import os, sys
import requests import requests
import dotenv import dotenv
dotenv.load_dotenv() dotenv.load_dotenv()
if __name__ == "__main__": if __name__ == "__main__": # if running as a script for individual testing
from tools import Tools sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
else:
from sources.tools.tools import Tools from sources.tools.tools import Tools
class FlightSearch(Tools): class FlightSearch(Tools):
def __init__(self, api_key: str = None): def __init__(self, api_key: str = None):

View File

@ -1,12 +1,13 @@
import os import os, sys
import requests import requests
from urllib.parse import urljoin from urllib.parse import urljoin
from typing import Dict, Any, Optional from typing import Dict, Any, Optional
if __name__ == "__main__": from sources.tools.tools import Tools
from tools import Tools
else: if __name__ == "__main__": # if running as a script for individual testing
from sources.tools.tools import Tools sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
class MCP_finder(Tools): class MCP_finder(Tools):
""" """

View File

@ -2,10 +2,10 @@ import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import os import os
if __name__ == "__main__": if __name__ == "__main__": # if running as a script for individual testing
from tools import Tools sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
else:
from sources.tools.tools import Tools from sources.tools.tools import Tools
class searxSearch(Tools): class searxSearch(Tools):
def __init__(self, base_url: str = None): def __init__(self, base_url: str = None):

View File

@ -21,6 +21,10 @@ import sys
import os import os
import configparser import configparser
from abc import abstractmethod from abc import abstractmethod
if __name__ == "__main__": # if running as a script for individual testing
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from sources.logger import Logger from sources.logger import Logger
class Tools(): class Tools():

View File

@ -5,14 +5,8 @@ import dotenv
dotenv.load_dotenv() dotenv.load_dotenv()
if __name__ == "__main__": from sources.tools.tools import Tools
import sys from sources.utility import animate_thinking, pretty_print
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utility import animate_thinking, pretty_print
from tools import Tools
else:
from sources.tools.tools import Tools
from sources.utility import animate_thinking, pretty_print
""" """
WARNING WARNING