Walk Abstract Syntax Tree to find imports

Rationale: Using regex means that you can only find the imports that are at the top of the page without any spaces between them. There's likely other issues, but that was the one that bit me. 

Caveat: Old method would parse files with broken syntax. This one will not.
This commit is contained in:
Kay Sackey 2016-01-28 01:04:58 -05:00
parent 5930fdde61
commit 60983f0fde

View File

@ -42,8 +42,11 @@ else:
open_func = codecs.open open_func = codecs.open
import ast, traceback
def get_all_imports(path, encoding=None): def get_all_imports(path, encoding=None):
imports = [] imports = set()
raw_imports = set()
candidates = [] candidates = []
ignore_dirs = [".hg", ".svn", ".git", "__pycache__", "env", "venv"] ignore_dirs = [".hg", ".svn", ".git", "__pycache__", "env", "venv"]
@ -56,19 +59,27 @@ def get_all_imports(path, encoding=None):
candidates += [os.path.splitext(fn)[0] for fn in files] candidates += [os.path.splitext(fn)[0] for fn in files]
for file_name in files: for file_name in files:
with open_func(os.path.join(root, file_name), "r", encoding=encoding) as f: with open_func(os.path.join(root, file_name), "r", encoding=encoding) as f:
contents = re.sub(re.compile("'''.+?'''", re.DOTALL), '', f.read()) contents = f.read()
contents = re.sub(re.compile('""".+?"""', re.DOTALL), "", contents) try:
lines = contents.split("\n") tree = ast.parse(contents)
lines = filter( except Exception, e:
filter_line, map(lambda l: l.partition("#")[0].strip(), lines)) traceback.print_exc(e)
for line in lines: print("Failed on file: %s" % os.path.join(root, file_name))
if "(" in line: exit(1)
break for node in ast.walk(tree):
for rex in REGEXP: if isinstance(node, ast.Import):
s = rex.findall(line) for subnode in node.names:
for item in s: raw_imports.add(subnode.name)
res = map(get_name_without_alias, item.split(",")) elif isinstance(node, ast.ImportFrom):
imports = imports + [x for x in res if len(x) > 0] raw_imports.add(node.module)
# Clean up imports
for name in [n for n in raw_imports if n]:
# Sanity check: Name could have been None if the import statement was as from . import X
# Cleanup: We only want to first part of the import.
# Ex: from django.conf --> django.conf. But we only want django as an import
cleaned_name, _, _ = name.partition('.')
imports.add(cleaned_name)
packages = set(imports) - set(set(candidates) & set(imports)) packages = set(imports) - set(set(candidates) & set(imports))
logging.debug('Found packages: {0}'.format(packages)) logging.debug('Found packages: {0}'.format(packages))