feat : fileFinder read pdf, browser better chromedriver install

This commit is contained in:
martin legrand 2025-05-02 14:10:16 +02:00
parent 3cf1cab68f
commit ed76c8415b
13 changed files with 106 additions and 63 deletions

View File

@ -65,6 +65,25 @@ def get_random_user_agent() -> str:
]
return random.choice(user_agents)
def install_chromedriver() -> str:
"""
Install the ChromeDriver if not already installed. Return the path.
"""
chromedriver_path = shutil.which("chromedriver")
if not chromedriver_path:
try:
chromedriver_path = chromedriver_autoinstaller.install()
except Exception as e:
raise FileNotFoundError(
"ChromeDriver not found and could not be installed automatically. "
"Please install it manually from https://chromedriver.chromium.org/downloads."
"and ensure it's in your PATH or specify the path directly."
"See know issues in readme if your chrome version is above 115."
) from e
if not chromedriver_path:
raise FileNotFoundError("ChromeDriver not found. Please install it or add it to your PATH.")
return chromedriver_path
def create_driver(headless=False, stealth_mode=True, crx_path="./crx/nopecha.crx") -> webdriver.Chrome:
"""Create a Chrome WebDriver with specified options."""
chrome_options = Options()
@ -97,14 +116,9 @@ def create_driver(headless=False, stealth_mode=True, crx_path="./crx/nopecha.crx
pretty_print(f"Anti-captcha CRX not found at {crx_path}.", color="failure")
else:
chrome_options.add_extension(crx_path)
chromedriver_path = shutil.which("chromedriver")
if not chromedriver_path:
chromedriver_path = chromedriver_autoinstaller.install()
if not chromedriver_path:
raise FileNotFoundError("ChromeDriver not found. Please install it or add it to your PATH.")
chromedriver_path = install_chromedriver()
service = Service(chromedriver_path)
if stealth_mode:
chrome_options.add_argument("--disable-blink-features=AutomationControlled")

View File

@ -140,6 +140,7 @@ class Speech():
return sentence
if __name__ == "__main__":
# TODO add info message for cn2an, jieba chinese related import
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
speech = Speech()
tosay_en = """

View File

@ -1,15 +1,14 @@
import sys
import os, sys
import re
from io import StringIO
import subprocess
if __name__ == "__main__":
from tools import Tools
from safety import is_unsafe
else:
from sources.tools.tools import Tools
from sources.tools.safety import is_unsafe
if __name__ == "__main__": # if running as a script for individual testing
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from sources.tools.tools import Tools
from sources.tools.safety import is_unsafe
class BashInterpreter(Tools):
"""

View File

@ -1,12 +1,12 @@
import subprocess
import os
import os, sys
import tempfile
import re
if __name__ == "__main__":
from tools import Tools
else:
from sources.tools.tools import Tools
if __name__ == "__main__": # if running as a script for individual testing
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from sources.tools.tools import Tools
class CInterpreter(Tools):
"""

View File

@ -1,12 +1,12 @@
import subprocess
import os
import os, sys
import tempfile
import re
if __name__ == "__main__":
from tools import Tools
else:
from sources.tools.tools import Tools
if __name__ == "__main__": # if running as a script for individual testing
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from sources.tools.tools import Tools
class GoInterpreter(Tools):
"""

View File

@ -1,12 +1,12 @@
import subprocess
import os
import os, sys
import tempfile
import re
if __name__ == "__main__":
from tools import Tools
else:
from sources.tools.tools import Tools
if __name__ == "__main__": # if running as a script for individual testing
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from sources.tools.tools import Tools
class JavaInterpreter(Tools):
"""

View File

@ -4,10 +4,10 @@ import os
import re
from io import StringIO
if __name__ == "__main__":
from tools import Tools
else:
from sources.tools.tools import Tools
if __name__ == "__main__": # if running as a script for individual testing
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from sources.tools.tools import Tools
class PyInterpreter(Tools):
"""

View File

@ -1,13 +1,12 @@
import os
import os, sys
import stat
import mimetypes
import configparser
if __name__ == "__main__":
from tools import Tools
else:
from sources.tools.tools import Tools
if __name__ == "__main__": # if running as a script for individual testing
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from sources.tools.tools import Tools
class FileFinder(Tools):
"""
@ -30,14 +29,45 @@ class FileFinder(Tools):
return file.read()
except Exception as e:
return f"Error reading file: {e}"
def read_arbitrary_file(self, file_path: str, file_type: str) -> str:
"""
Reads the content of a file with arbitrary encoding.
Args:
file_path (str): The path to the file to read
Returns:
str: The content of the file in markdown format
"""
content_raw = self.read_file(file_path)
language = ["python", "bash", "javascript", "html", "css", "json"]
if "text" in file_type:
content = content_raw
elif any(lang in file_type for lang in language):
content = content_raw
elif "pdf" in file_type:
from pypdf import PdfReader
reader = PdfReader(file_path)
content = '\n'.join([pt.extract_text() for pt in reader.pages])
elif "binary" in file_type:
content = content_raw.decode('utf-8', errors='replace')
else:
content = f"Can't read file: ![{file_type}]({file_path})"
return content
def get_file_info(self, file_path: str) -> str:
"""
Gets information about a file, including its name, path, type, content, and permissions.
Args:
file_path (str): The path to the file
Returns:
str: A dictionary containing the file information
"""
if os.path.exists(file_path):
stats = os.stat(file_path)
permissions = oct(stat.S_IMODE(stats.st_mode))
file_type, _ = mimetypes.guess_type(file_path)
file_type = file_type if file_type else "Unknown"
content = self.read_file(file_path)
content = self.read_arbitrary_file(file_path, file_type)
result = {
"filename": os.path.basename(file_path),
@ -148,7 +178,7 @@ if __name__ == "__main__":
tool = FileFinder()
result = tool.execute(["""
action=read
name=tools.py
name=cover_letter_martin_legrand.pdf
"""], False)
print("Execution result:")
print(result)

View File

@ -1,13 +1,13 @@
import os
import os, sys
import requests
import dotenv
dotenv.load_dotenv()
if __name__ == "__main__":
from tools import Tools
else:
from sources.tools.tools import Tools
if __name__ == "__main__": # if running as a script for individual testing
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from sources.tools.tools import Tools
class FlightSearch(Tools):
def __init__(self, api_key: str = None):

View File

@ -1,12 +1,13 @@
import os
import os, sys
import requests
from urllib.parse import urljoin
from typing import Dict, Any, Optional
if __name__ == "__main__":
from tools import Tools
else:
from sources.tools.tools import Tools
from sources.tools.tools import Tools
if __name__ == "__main__": # if running as a script for individual testing
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
class MCP_finder(Tools):
"""

View File

@ -2,10 +2,10 @@ import requests
from bs4 import BeautifulSoup
import os
if __name__ == "__main__":
from tools import Tools
else:
from sources.tools.tools import Tools
if __name__ == "__main__": # if running as a script for individual testing
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from sources.tools.tools import Tools
class searxSearch(Tools):
def __init__(self, base_url: str = None):

View File

@ -21,6 +21,10 @@ import sys
import os
import configparser
from abc import abstractmethod
if __name__ == "__main__": # if running as a script for individual testing
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from sources.logger import Logger
class Tools():

View File

@ -5,14 +5,8 @@ import dotenv
dotenv.load_dotenv()
if __name__ == "__main__":
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utility import animate_thinking, pretty_print
from tools import Tools
else:
from sources.tools.tools import Tools
from sources.utility import animate_thinking, pretty_print
from sources.tools.tools import Tools
from sources.utility import animate_thinking, pretty_print
"""
WARNING