Add support for jupyter notebooks

Credits to @pakio and @mateuslatrova for the contributions
This commit is contained in:
fernandocrz 2023-12-04 16:50:56 -03:00 committed by Alan Barzilay
parent 03c92488de
commit b50b4a76eb
12 changed files with 1397 additions and 12 deletions

View File

@ -39,13 +39,14 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install coverage docopt yarg requests
pip install poetry
poetry install --with dev
- name: Calculate coverage
run: coverage run --source=pipreqs -m unittest discover
run: poetry run coverage run --source=pipreqs -m unittest discover
- name: Create XML report
run: coverage xml
run: poetry run coverage xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3

View File

@ -25,6 +25,14 @@ Installation
pip install pipreqs
Obs.: if you don't want support for jupyter notebooks, you can install pipreqs without the dependencies that give support to it.
To do so, run:
.. code-block:: sh
pip install --no-deps pipreqs
pip install yarg==0.1.9 docopt==0.6.2
Usage
-----
@ -57,6 +65,7 @@ Usage
<compat> | e.g. Flask~=1.1.2
<gt> | e.g. Flask>=1.1.2
<no-pin> | e.g. Flask
--scan-notebooks Look for imports in jupyter notebook files.
Example
-------

View File

@ -35,6 +35,7 @@ Options:
<compat> | e.g. Flask~=1.1.2
<gt> | e.g. Flask>=1.1.2
<no-pin> | e.g. Flask
--scan-notebooks Look for imports in jupyter notebook files.
"""
from contextlib import contextmanager
import os
@ -53,6 +54,18 @@ from pipreqs import __version__
REGEXP = [re.compile(r"^import (.+)$"), re.compile(r"^from ((?!\.+).*?) import (?:.*)$")]
scan_noteboooks = False
class NbconvertNotInstalled(ImportError):
default_message = (
"In order to scan jupyter notebooks, please install the nbconvert and ipython libraries"
)
def __init__(self, message=default_message):
super().__init__(message)
@contextmanager
def _open(filename=None, mode="r"):
"""Open a file or ``sys.stdout`` depending on the provided filename.
@ -89,7 +102,16 @@ def get_all_imports(path, encoding="utf-8", extra_ignore_dirs=None, follow_links
raw_imports = set()
candidates = []
ignore_errors = False
ignore_dirs = [".hg", ".svn", ".git", ".tox", "__pycache__", "env", "venv"]
ignore_dirs = [
".hg",
".svn",
".git",
".tox",
"__pycache__",
"env",
"venv",
".ipynb_checkpoints",
]
if extra_ignore_dirs:
ignore_dirs_parsed = []
@ -97,18 +119,22 @@ def get_all_imports(path, encoding="utf-8", extra_ignore_dirs=None, follow_links
ignore_dirs_parsed.append(os.path.basename(os.path.realpath(e)))
ignore_dirs.extend(ignore_dirs_parsed)
extensions = get_file_extensions()
walk = os.walk(path, followlinks=follow_links)
for root, dirs, files in walk:
dirs[:] = [d for d in dirs if d not in ignore_dirs]
candidates.append(os.path.basename(root))
files = [fn for fn in files if os.path.splitext(fn)[1] == ".py"]
py_files = [file for file in files if file_ext_is_allowed(file, [".py"])]
candidates.extend([os.path.splitext(filename)[0] for filename in py_files])
files = [fn for fn in files if file_ext_is_allowed(fn, extensions)]
candidates += [os.path.splitext(fn)[0] for fn in files]
for file_name in files:
file_name = os.path.join(root, file_name)
with open(file_name, "r", encoding=encoding) as f:
contents = f.read()
contents = read_file_content(file_name, encoding)
try:
tree = ast.parse(contents)
for node in ast.walk(tree):
@ -145,6 +171,40 @@ def get_all_imports(path, encoding="utf-8", extra_ignore_dirs=None, follow_links
return list(packages - data)
def get_file_extensions():
return [".py", ".ipynb"] if scan_noteboooks else [".py"]
def read_file_content(file_name: str, encoding="utf-8"):
if file_ext_is_allowed(file_name, [".py"]):
with open(file_name, "r", encoding=encoding) as f:
contents = f.read()
elif file_ext_is_allowed(file_name, [".ipynb"]) and scan_noteboooks:
contents = ipynb_2_py(file_name, encoding=encoding)
return contents
def file_ext_is_allowed(file_name, acceptable):
return os.path.splitext(file_name)[1] in acceptable
def ipynb_2_py(file_name, encoding="utf-8"):
"""
Args:
file_name (str): notebook file path to parse as python script
encoding (str): encoding of file
Returns:
str: parsed string
"""
exporter = PythonExporter()
(body, _) = exporter.from_filename(file_name)
return body.encode(encoding)
def generate_requirements_file(path, imports, symbol):
with _open(path, "w") as out_file:
logging.debug(
@ -427,10 +487,27 @@ def dynamic_versioning(scheme, imports):
return imports, symbol
def handle_scan_noteboooks():
if not scan_noteboooks:
logging.info("Not scanning for jupyter notebooks.")
return
try:
global PythonExporter
from nbconvert import PythonExporter
except ImportError:
raise NbconvertNotInstalled()
def init(args):
global scan_noteboooks
encoding = args.get("--encoding")
extra_ignore_dirs = args.get("--ignore")
follow_links = not args.get("--no-follow-links")
scan_noteboooks = args.get("--scan-notebooks", False)
handle_scan_noteboooks()
input_path = args["<path>"]
if encoding is None:

988
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -28,6 +28,8 @@ pipreqs = "pipreqs.pipreqs:main"
python = ">=3.8.1,<=3.12"
yarg = "0.1.9"
docopt = "0.6.2"
nbconvert = "^7.11.0"
ipython = "8.12.3"
[tool.poetry.group.dev.dependencies]
flake8 = "^6.1.0"

View File

@ -0,0 +1,65 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Magic test"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%automagic true"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ls -la\n",
"logstate"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ls -la"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%automagic false"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ls -la"
]
}
],
"metadata": {
"language_info": {
"name": "python"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -0,0 +1,37 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Markdown test\n",
"import sklearn\n",
"\n",
"```python\n",
"import FastAPI\n",
"```"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

View File

@ -0,0 +1,102 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\"\"\"unused import\"\"\"\n",
"# pylint: disable=undefined-all-variable, import-error, no-absolute-import, too-few-public-methods, missing-docstring\n",
"import xml.etree # [unused-import]\n",
"import xml.sax # [unused-import]\n",
"import os.path as test # [unused-import]\n",
"from sys import argv as test2 # [unused-import]\n",
"from sys import flags # [unused-import]\n",
"# +1:[unused-import,unused-import]\n",
"from collections import deque, OrderedDict, Counter\n",
"# All imports above should be ignored\n",
"import requests # [unused-import]\n",
"\n",
"# setuptools\n",
"import zipimport # command/easy_install.py\n",
"\n",
"# twisted\n",
"from importlib import invalidate_caches # python/test/test_deprecate.py\n",
"\n",
"# astroid\n",
"import zipimport # manager.py\n",
"# IPython\n",
"from importlib.machinery import all_suffixes # core/completerlib.py\n",
"import importlib # html/notebookapp.py\n",
"\n",
"from IPython.utils.importstring import import_item # Many files\n",
"\n",
"# pyflakes\n",
"# test/test_doctests.py\n",
"from pyflakes.test.test_imports import Test as TestImports\n",
"\n",
"# Nose\n",
"from nose.importer import Importer, add_path, remove_path # loader.py\n",
"\n",
"import atexit\n",
"from __future__ import print_function\n",
"from docopt import docopt\n",
"import curses, logging, sqlite3\n",
"import logging\n",
"import os\n",
"import sqlite3\n",
"import time\n",
"import sys\n",
"import signal\n",
"import bs4\n",
"import nonexistendmodule\n",
"import boto as b, peewee as p\n",
"# import django\n",
"import flask.ext.somext # # #\n",
"from sqlalchemy import model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"try:\n",
" import ujson as json\n",
"except ImportError:\n",
" import json\n",
"\n",
"import models\n",
"\n",
"\n",
"def main():\n",
" pass\n",
"\n",
"import after_method_is_valid_even_if_not_pep8"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@ -0,0 +1,34 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cd ."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@ -10,7 +10,7 @@ Tests for `pipreqs` module.
from io import StringIO
import logging
from unittest.mock import patch
from unittest.mock import patch, Mock
import unittest
import os
import requests
@ -48,7 +48,7 @@ class TestPipreqs(unittest.TestCase):
"after_method_is_valid_even_if_not_pep8",
]
cls.modules2 = ["beautifulsoup4"]
cls.local = ["docopt", "requests", "nose", "pyflakes"]
cls.local = ["docopt", "requests", "nose", "pyflakes", "ipython"]
cls.project = os.path.join(os.path.dirname(__file__), "_data")
cls.empty_filepath = os.path.join(cls.project, "empty.txt")
cls.imports_filepath = os.path.join(cls.project, "imports.txt")
@ -84,6 +84,12 @@ class TestPipreqs(unittest.TestCase):
cls.alt_requirement_path = os.path.join(cls.project, "requirements2.txt")
cls.non_existing_filepath = "xpto"
cls.project_with_notebooks = os.path.join(os.path.dirname(__file__), "_data_notebook")
cls.project_with_invalid_notebooks = os.path.join(os.path.dirname(__file__), "_invalid_data_notebook")
cls.python_path_same_imports = os.path.join(os.path.dirname(__file__), "_data/test.py")
cls.notebook_path_same_imports = os.path.join(os.path.dirname(__file__), "_data_notebook/test.ipynb")
def test_get_all_imports(self):
imports = pipreqs.get_all_imports(self.project)
self.assertEqual(len(imports), 15)
@ -519,6 +525,44 @@ class TestPipreqs(unittest.TestCase):
stdout_content = capturedOutput.getvalue().lower()
self.assertTrue(file_content == stdout_content)
def test_import_notebooks(self):
"""
Test the function get_all_imports() using .ipynb file
"""
self.mock_scan_notebooks()
imports = pipreqs.get_all_imports(self.project_with_notebooks)
for item in imports:
self.assertTrue(item.lower() in self.modules, "Import is missing: " + item)
not_desired_imports = ["time", "logging", "curses", "__future__", "django", "models", "FastAPI", "sklearn"]
for not_desired_import in not_desired_imports:
self.assertFalse(
not_desired_import in imports,
f"{not_desired_import} was imported, but it should not have been."
)
def test_invalid_notebook(self):
"""
Test that invalid notebook files cannot be imported.
"""
self.mock_scan_notebooks()
self.assertRaises(SyntaxError, pipreqs.get_all_imports, self.project_with_invalid_notebooks)
def test_ipynb_2_py(self):
"""
Test the function ipynb_2_py() which converts .ipynb file to .py format
"""
python_imports = pipreqs.get_all_imports(self.python_path_same_imports)
notebook_imports = pipreqs.get_all_imports(self.notebook_path_same_imports)
self.assertEqual(python_imports, notebook_imports)
def test_file_ext_is_allowed(self):
"""
Test the function file_ext_is_allowed()
"""
self.assertTrue(pipreqs.file_ext_is_allowed("main.py", [".py"]))
self.assertTrue(pipreqs.file_ext_is_allowed("main.py", [".py", ".ipynb"]))
self.assertFalse(pipreqs.file_ext_is_allowed("main.py", [".ipynb"]))
def test_parse_requirements(self):
"""
Test parse_requirements function
@ -561,6 +605,34 @@ class TestPipreqs(unittest.TestCase):
self.assertEqual(printed_text, "File xpto was not found. Please, fix it and run again.")
def test_ignore_notebooks(self):
"""
Test if notebooks are ignored when the scan-notebooks parameter is False
"""
notebook_requirement_path = os.path.join(self.project_with_notebooks, "requirements.txt")
pipreqs.init(
{
"<path>": self.project_with_notebooks,
"--savepath": None,
"--use-local": None,
"--force": True,
"--proxy": None,
"--pypi-server": None,
"--print": False,
"--diff": None,
"--clean": None,
"--mode": None,
"--scan-notebooks": False,
}
)
assert os.path.exists(notebook_requirement_path) == 1
assert os.path.getsize(notebook_requirement_path) == 1 # file only has a "\n", meaning it's empty
def mock_scan_notebooks(self):
pipreqs.scan_noteboooks = Mock(return_value=True)
pipreqs.handle_scan_noteboooks()
def tearDown(self):
"""
Remove requiremnts.txt files that were written

View File

@ -23,4 +23,4 @@ exclude =
tests/_data_duplicated_deps/
tests/_data_ignore/
tests/_invalid_data/
max-line-length = 120
max-line-length = 120