Merge pull request #364 from adeadfed/master

Mitigation for dependency confusion in pipreqs
This commit is contained in:
Vadim Kravcenko 2023-04-13 20:38:09 +02:00 committed by GitHub
commit 717d4926bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -176,6 +176,11 @@ def get_imports_info(
for item in imports: for item in imports:
try: try:
logging.warning(
'Import named "%s" not found locally. '
'Trying to resolve it at the PyPI server.',
item
)
response = requests.get( response = requests.get(
"{0}{1}/json".format(pypi_server, item), proxies=proxy) "{0}{1}/json".format(pypi_server, item), proxies=proxy)
if response.status_code == 200: if response.status_code == 200:
@ -187,15 +192,24 @@ def get_imports_info(
raise HTTPError(status_code=response.status_code, raise HTTPError(status_code=response.status_code,
reason=response.reason) reason=response.reason)
except HTTPError: except HTTPError:
logging.debug( logging.warning(
'Package %s does not exist or network problems', item) 'Package "%s" does not exist or network problems', item)
continue continue
logging.warning(
'Import named "%s" was resolved to "%s:%s" package (%s).\n'
'Please, verify manually the final list of requirements.txt '
'to avoid possible dependency confusions.',
item,
data.name,
data.latest_release_id,
data.pypi_url
)
result.append({'name': item, 'version': data.latest_release_id}) result.append({'name': item, 'version': data.latest_release_id})
return result return result
def get_locally_installed_packages(encoding=None): def get_locally_installed_packages(encoding=None):
packages = {} packages = []
ignore = ["tests", "_tests", "egg", "EGG", "info"] ignore = ["tests", "_tests", "egg", "EGG", "info"]
for path in sys.path: for path in sys.path:
for root, dirs, files in os.walk(path): for root, dirs, files in os.walk(path):
@ -205,22 +219,35 @@ def get_locally_installed_packages(encoding=None):
with open(item, "r", encoding=encoding) as f: with open(item, "r", encoding=encoding) as f:
package = root.split(os.sep)[-1].split("-") package = root.split(os.sep)[-1].split("-")
try: try:
package_import = f.read().strip().split("\n") top_level_modules = f.read().strip().split("\n")
except: # NOQA except: # NOQA
# TODO: What errors do we intend to suppress here? # TODO: What errors do we intend to suppress here?
continue continue
for i_item in package_import:
if ((i_item not in ignore) and
(package[0] not in ignore)):
version = None
if len(package) > 1:
version = package[1].replace(
".dist", "").replace(".egg", "")
packages[i_item] = { # filter off explicitly ignored top-level modules
'version': version, # such as test, egg, etc.
'name': package[0] filtered_top_level_modules = list()
}
for module in top_level_modules:
if (
(module not in ignore) and
(package[0] not in ignore)
):
# append exported top level modules to the list
filtered_top_level_modules.append(module)
version = None
if len(package) > 1:
version = package[1].replace(
".dist", "").replace(".egg", "")
# append package: top_level_modules pairs
# instead of top_level_module: package pairs
packages.append({
'name': package[0],
'version': version,
'exports': filtered_top_level_modules
})
return packages return packages
@ -228,16 +255,19 @@ def get_import_local(imports, encoding=None):
local = get_locally_installed_packages() local = get_locally_installed_packages()
result = [] result = []
for item in imports: for item in imports:
if item.lower() in local: # search through local packages
result.append(local[item.lower()]) for package in local:
# if candidate import name matches export name
# or candidate import name equals to the package name
# append it to the result
if item in package['exports'] or item == package['name']:
result.append(package)
# removing duplicates of package/version # removing duplicates of package/version
result_unique = [ # had to use second method instead of the previous one,
dict(t) # because we have a list in the 'exports' field
for t in set([ # https://stackoverflow.com/questions/9427163/remove-duplicate-dict-in-list-in-python
tuple(d.items()) for d in result result_unique = [i for n, i in enumerate(result) if i not in result[n+1:]]
])
]
return result_unique return result_unique
@ -443,9 +473,20 @@ def init(args):
else: else:
logging.debug("Getting packages information from Local/PyPI") logging.debug("Getting packages information from Local/PyPI")
local = get_import_local(candidates, encoding=encoding) local = get_import_local(candidates, encoding=encoding)
# Get packages that were not found locally
difference = [x for x in candidates # check if candidate name is found in
if x.lower() not in [z['name'].lower() for z in local]] # the list of exported modules, installed locally
# and the package name is not in the list of local module names
# it add to difference
difference = [x for x in candidates if
# aggregate all export lists into one
# flatten the list
# check if candidate is in exports
x.lower() not in [y for x in local for y in x['exports']]
and
# check if candidate is package names
x.lower() not in [x['name'] for x in local]]
imports = local + get_imports_info(difference, imports = local + get_imports_info(difference,
proxy=proxy, proxy=proxy,
pypi_server=pypi_server) pypi_server=pypi_server)