From 60983f0fde791ab3b9d4e61982acd7e1369646b4 Mon Sep 17 00:00:00 2001 From: Kay Sackey Date: Thu, 28 Jan 2016 01:04:58 -0500 Subject: [PATCH 1/6] Walk Abstract Syntax Tree to find imports Rationale: Using regex means that you can only find the imports that are at the top of the page without any spaces between them. There's likely other issues, but that was the one that bit me. Caveat: Old method would parse files with broken syntax. This one will not. --- pipreqs/pipreqs.py | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/pipreqs/pipreqs.py b/pipreqs/pipreqs.py index 29bdd13..b2d63e2 100755 --- a/pipreqs/pipreqs.py +++ b/pipreqs/pipreqs.py @@ -42,8 +42,11 @@ else: open_func = codecs.open +import ast, traceback + def get_all_imports(path, encoding=None): - imports = [] + imports = set() + raw_imports = set() candidates = [] ignore_dirs = [".hg", ".svn", ".git", "__pycache__", "env", "venv"] @@ -56,19 +59,27 @@ def get_all_imports(path, encoding=None): candidates += [os.path.splitext(fn)[0] for fn in files] for file_name in files: with open_func(os.path.join(root, file_name), "r", encoding=encoding) as f: - contents = re.sub(re.compile("'''.+?'''", re.DOTALL), '', f.read()) - contents = re.sub(re.compile('""".+?"""', re.DOTALL), "", contents) - lines = contents.split("\n") - lines = filter( - filter_line, map(lambda l: l.partition("#")[0].strip(), lines)) - for line in lines: - if "(" in line: - break - for rex in REGEXP: - s = rex.findall(line) - for item in s: - res = map(get_name_without_alias, item.split(",")) - imports = imports + [x for x in res if len(x) > 0] + contents = f.read() + try: + tree = ast.parse(contents) + except Exception, e: + traceback.print_exc(e) + print("Failed on file: %s" % os.path.join(root, file_name)) + exit(1) + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for subnode in node.names: + raw_imports.add(subnode.name) + elif isinstance(node, ast.ImportFrom): + raw_imports.add(node.module) + + # Clean up imports + for name in [n for n in raw_imports if n]: + # Sanity check: Name could have been None if the import statement was as from . import X + # Cleanup: We only want to first part of the import. + # Ex: from django.conf --> django.conf. But we only want django as an import + cleaned_name, _, _ = name.partition('.') + imports.add(cleaned_name) packages = set(imports) - set(set(candidates) & set(imports)) logging.debug('Found packages: {0}'.format(packages)) From 415ede7691519252eda87d33481adea19bf3d7ba Mon Sep 17 00:00:00 2001 From: Kay Sackey Date: Thu, 28 Jan 2016 01:08:38 -0500 Subject: [PATCH 2/6] Cleanup Moved the import statement to the top of the page where it belongs --- pipreqs/pipreqs.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pipreqs/pipreqs.py b/pipreqs/pipreqs.py index b2d63e2..e20939c 100755 --- a/pipreqs/pipreqs.py +++ b/pipreqs/pipreqs.py @@ -23,7 +23,7 @@ import sys import re import logging import codecs - +import ast, traceback from docopt import docopt import requests from yarg import json2package @@ -42,8 +42,6 @@ else: open_func = codecs.open -import ast, traceback - def get_all_imports(path, encoding=None): imports = set() raw_imports = set() @@ -60,6 +58,9 @@ def get_all_imports(path, encoding=None): for file_name in files: with open_func(os.path.join(root, file_name), "r", encoding=encoding) as f: contents = f.read() + #contents = re.sub(re.compile("'''.+?'''", re.DOTALL), '', f.read()) + #contents = re.sub(re.compile('""".+?"""', re.DOTALL), "", contents) + try: tree = ast.parse(contents) except Exception, e: From 20596265831c71546801be63fd0806b5f9183bb5 Mon Sep 17 00:00:00 2001 From: Kay Sackey Date: Thu, 28 Jan 2016 15:01:39 -0500 Subject: [PATCH 3/6] Passing Tests Also added test documentation. The changes to the tests were necessary as AST walking will retrieve all valid dependencies and fail on syntactically incorrect .py files. --- pipreqs/pipreqs.py | 31 +++++++++++--------- tests/_data/test.py | 4 +-- tests/_invalid_data/invalid.py | 1 + tests/test_pipreqs.py | 53 ++++++++++++++++++++++++++++++---- 4 files changed, 68 insertions(+), 21 deletions(-) create mode 100644 tests/_invalid_data/invalid.py diff --git a/pipreqs/pipreqs.py b/pipreqs/pipreqs.py index e20939c..74061a2 100755 --- a/pipreqs/pipreqs.py +++ b/pipreqs/pipreqs.py @@ -23,7 +23,8 @@ import sys import re import logging import codecs -import ast, traceback +import ast +import traceback from docopt import docopt import requests from yarg import json2package @@ -46,6 +47,7 @@ def get_all_imports(path, encoding=None): imports = set() raw_imports = set() candidates = [] + ignore_errors = False ignore_dirs = [".hg", ".svn", ".git", "__pycache__", "env", "venv"] for root, dirs, files in os.walk(path): @@ -58,21 +60,24 @@ def get_all_imports(path, encoding=None): for file_name in files: with open_func(os.path.join(root, file_name), "r", encoding=encoding) as f: contents = f.read() - #contents = re.sub(re.compile("'''.+?'''", re.DOTALL), '', f.read()) - #contents = re.sub(re.compile('""".+?"""', re.DOTALL), "", contents) - try: tree = ast.parse(contents) + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for subnode in node.names: + raw_imports.add(subnode.name) + elif isinstance(node, ast.ImportFrom): + raw_imports.add(node.module) except Exception, e: - traceback.print_exc(e) - print("Failed on file: %s" % os.path.join(root, file_name)) - exit(1) - for node in ast.walk(tree): - if isinstance(node, ast.Import): - for subnode in node.names: - raw_imports.add(subnode.name) - elif isinstance(node, ast.ImportFrom): - raw_imports.add(node.module) + if ignore_errors: + traceback.print_exc(e) + logging.warn("Failed on file: %s" % os.path.join(root, file_name)) + continue + else: + logging.error("Failed on file: %s" % os.path.join(root, file_name)) + raise e + + # Clean up imports for name in [n for n in raw_imports if n]: diff --git a/tests/_data/test.py b/tests/_data/test.py index 2ea6692..cfd039c 100644 --- a/tests/_data/test.py +++ b/tests/_data/test.py @@ -43,7 +43,7 @@ import sys import signal import bs4 import nonexistendmodule -import boto as b, import peewee as p, +import boto as b, peewee as p # import django import flask.ext.somext # # # from sqlalchemy import model @@ -58,4 +58,4 @@ import models def main(): pass -import after_method_should_be_ignored +import after_method_is_valid_even_if_not_pep8 diff --git a/tests/_invalid_data/invalid.py b/tests/_invalid_data/invalid.py new file mode 100644 index 0000000..aa3e849 --- /dev/null +++ b/tests/_invalid_data/invalid.py @@ -0,0 +1 @@ +import boto as b, import peewee as p, diff --git a/tests/test_pipreqs.py b/tests/test_pipreqs.py index acf26d8..a50275b 100755 --- a/tests/test_pipreqs.py +++ b/tests/test_pipreqs.py @@ -20,17 +20,18 @@ class TestPipreqs(unittest.TestCase): def setUp(self): self.modules = ['flask', 'requests', 'sqlalchemy', 'docopt', 'boto', 'ipython', 'pyflakes', 'nose', - 'peewee', 'ujson', 'nonexistendmodule', 'bs4', ] + 'peewee', 'ujson', 'nonexistendmodule', 'bs4', 'after_method_is_valid_even_if_not_pep8' ] self.modules2 = ['beautifulsoup4'] - self.local = ["docopt", "requests", "nose"] + self.local = ["docopt", "requests", "nose", 'pyflakes'] self.project = os.path.join(os.path.dirname(__file__), "_data") + self.project_invalid = os.path.join(os.path.dirname(__file__), "_invalid_data") self.requirements_path = os.path.join(self.project, "requirements.txt") self.alt_requirement_path = os.path.join( self.project, "requirements2.txt") def test_get_all_imports(self): imports = pipreqs.get_all_imports(self.project) - self.assertEqual(len(imports), 12) + self.assertEqual(len(imports), 13) for item in imports: self.assertTrue( item.lower() in self.modules, "Import is missing: " + item) @@ -41,10 +42,20 @@ class TestPipreqs(unittest.TestCase): self.assertFalse("django" in imports) self.assertFalse("models" in imports) + def test_invalid_python(self): + """ + Test that invalid python files cannot be imported. + """ + with self.assertRaises(SyntaxError) as exc: + imports = pipreqs.get_all_imports(self.project_invalid) + def test_get_imports_info(self): + """ + Test to see that the right number of packages were found on PyPI + """ imports = pipreqs.get_all_imports(self.project) with_info = pipreqs.get_imports_info(imports) - # Should contain only 5 Elements without the "nonexistendmodule" + # Should contain 10 items without the "nonexistendmodule" and "after_method_is_valid_even_if_not_pep8" self.assertEqual(len(with_info), 10) for item in with_info: self.assertTrue( @@ -52,21 +63,33 @@ class TestPipreqs(unittest.TestCase): "Import item appears to be missing " + item['name']) def test_get_use_local_only(self): + """ + Test without checking PyPI, check to see if names of local imports matches what we expect + + - Note even though pyflakes isn't in requirements.txt, + It's added to locals since it is a development dependency for testing + """ # should find only docopt and requests imports_with_info = pipreqs.get_import_local(self.modules) for item in imports_with_info: self.assertTrue(item['name'].lower() in self.local) def test_init(self): + """ + Test that all modules we will test upon, are in requirements file + """ pipreqs.init({'': self.project, '--savepath': None, '--use-local': None, '--force': True, '--proxy':None, '--pypi-server':None}) assert os.path.exists(self.requirements_path) == 1 with open(self.requirements_path, "r") as f: data = f.read().lower() - for item in self.modules[:-2]: + for item in self.modules[:-3]: self.assertTrue(item.lower() in data) def test_init_local_only(self): + """ + Test that items listed in requirements.text are the same as locals expected + """ pipreqs.init({'': self.project, '--savepath': None, '--use-local': True, '--force': True, '--proxy':None, '--pypi-server':None}) assert os.path.exists(self.requirements_path) == 1 @@ -77,17 +100,23 @@ class TestPipreqs(unittest.TestCase): self.assertTrue(item[0].lower() in self.local) def test_init_savepath(self): + """ + Test that we can save requiremnts.tt correctly to a different path + """ pipreqs.init({'': self.project, '--savepath': self.alt_requirement_path, '--use-local': None, '--proxy':None, '--pypi-server':None}) assert os.path.exists(self.alt_requirement_path) == 1 with open(self.alt_requirement_path, "r") as f: data = f.read().lower() - for item in self.modules[:-2]: + for item in self.modules[:-3]: self.assertTrue(item.lower() in data) for item in self.modules2: self.assertTrue(item.lower() in data) def test_init_overwrite(self): + """ + Test that if requiremnts.txt exists, it will not automatically be overwritten + """ with open(self.requirements_path, "w") as f: f.write("should_not_be_overwritten") pipreqs.init({'': self.project, '--savepath': None, @@ -98,6 +127,10 @@ class TestPipreqs(unittest.TestCase): self.assertEqual(data, "should_not_be_overwritten") def test_get_import_name_without_alias(self): + """ + Test that function get_name_without_alias() will work on a string. + - Note: This isn't truly needed when pipreqs is walking the AST to find imports + """ import_name_with_alias = "requests as R" expected_import_name_without_alias = "requests" import_name_without_aliases = pipreqs.get_name_without_alias( @@ -106,16 +139,24 @@ class TestPipreqs(unittest.TestCase): import_name_without_aliases, expected_import_name_without_alias) def test_custom_pypi_server(self): + """ + Test that trying to get a custom pypi sever fails correctly + """ self.assertRaises(requests.exceptions.MissingSchema, pipreqs.init, {'': self.project, '--savepath': None, '--use-local': None, '--force': True, '--proxy': None, '--pypi-server': 'nonexistent'}) def tearDown(self): + """ + Remove requiremnts.txt files that were written + """ try: os.remove(self.requirements_path) + pass except OSError: pass try: os.remove(self.alt_requirement_path) + pass except OSError: pass From bc5cd169ececebd878c302dd8b419a284a01fbad Mon Sep 17 00:00:00 2001 From: Kay Sackey Date: Thu, 28 Jan 2016 15:05:04 -0500 Subject: [PATCH 4/6] Python 3 Compatability --- pipreqs/pipreqs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pipreqs/pipreqs.py b/pipreqs/pipreqs.py index 74061a2..a3dd64f 100755 --- a/pipreqs/pipreqs.py +++ b/pipreqs/pipreqs.py @@ -68,14 +68,14 @@ def get_all_imports(path, encoding=None): raw_imports.add(subnode.name) elif isinstance(node, ast.ImportFrom): raw_imports.add(node.module) - except Exception, e: + except Exception as exc: if ignore_errors: - traceback.print_exc(e) + traceback.print_exc(exc) logging.warn("Failed on file: %s" % os.path.join(root, file_name)) continue else: logging.error("Failed on file: %s" % os.path.join(root, file_name)) - raise e + raise exc From e4a58ee0ada0e809860d49d725524405969f6091 Mon Sep 17 00:00:00 2001 From: Kay Sackey Date: Thu, 28 Jan 2016 15:38:00 -0500 Subject: [PATCH 5/6] Python 2.6 Compatibility --- tests/test_pipreqs.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_pipreqs.py b/tests/test_pipreqs.py index a50275b..039ceb3 100755 --- a/tests/test_pipreqs.py +++ b/tests/test_pipreqs.py @@ -46,8 +46,7 @@ class TestPipreqs(unittest.TestCase): """ Test that invalid python files cannot be imported. """ - with self.assertRaises(SyntaxError) as exc: - imports = pipreqs.get_all_imports(self.project_invalid) + self.assertRaises(SyntaxError, pipreqs.get_all_imports, self.project_invalid) def test_get_imports_info(self): """ From 6ab86c37e4b2da8ffabeb55ca790e0b6d9275186 Mon Sep 17 00:00:00 2001 From: Kay Sackey Date: Thu, 28 Jan 2016 15:45:17 -0500 Subject: [PATCH 6/6] Removing accidentally added lines --- tests/test_pipreqs.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_pipreqs.py b/tests/test_pipreqs.py index 039ceb3..2a51bb5 100755 --- a/tests/test_pipreqs.py +++ b/tests/test_pipreqs.py @@ -46,7 +46,7 @@ class TestPipreqs(unittest.TestCase): """ Test that invalid python files cannot be imported. """ - self.assertRaises(SyntaxError, pipreqs.get_all_imports, self.project_invalid) + self.assertRaises(SyntaxError, pipreqs.get_all_imports, self.project_invalid) def test_get_imports_info(self): """ @@ -150,12 +150,10 @@ class TestPipreqs(unittest.TestCase): """ try: os.remove(self.requirements_path) - pass except OSError: pass try: os.remove(self.alt_requirement_path) - pass except OSError: pass