Skip to content

Commit

Permalink
Add package name deduplication by stemming
Browse files Browse the repository at this point in the history
  • Loading branch information
peterdemin committed Nov 23, 2022
1 parent 4c2fd1a commit 5266cf7
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 1 deletion.
31 changes: 30 additions & 1 deletion pipcompilemulti/deduplicate.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,39 @@ def ignored_packages(self, in_path):
if self.env_confs is None:
return {}
rrefs = recursive_refs(self.env_confs, in_path)
return merged_packages(self.env_packages, rrefs)
return IgnoredPackages(merged_packages(self.env_packages, rrefs))

def recursive_refs(self, in_path):
"""Return recursive list of environment names referenced by in_path."""
if self.env_confs is None:
return {}
return recursive_refs(self.env_confs, in_path)


class IgnoredPackages:
"""Mapping from package name to version.
Handles name normalization for packages like:
zope.interface, zope-interface, zope_interface.
"""
_DELIMITERS = ('_', '-', '.')

def __init__(self, package_versions):
self._package_versions = package_versions
self._stems = {
self._make_stem(name): name
for name in self._package_versions
}

def __getitem__(self, key):
canonical_key = self._stems[self._make_stem(key)]
return self._package_versions[canonical_key]

def __contains__(self, key):
return self._make_stem(key) in self._stems

@classmethod
def _make_stem(cls, name):
for delim in cls._DELIMITERS:
name = name.replace(delim, '')
return name.lower()
15 changes: 15 additions & 0 deletions tests/test_deduplicate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Package name deduplication tests"""
from pipcompilemulti.deduplicate import PackageDeduplicator


def test_package_deduplicator_handles_delimiters_normalization():
"""Tests minor package name variations are handled."""
package_deduplicator = PackageDeduplicator()
package_deduplicator.on_discover([
{'in_path': 'a', 'refs': ['b']},
{'in_path': 'b', 'refs': []}
])
package_deduplicator.register_packages_for_env('b', {'pkg.name': '1.0'})
ignored_packages = package_deduplicator.ignored_packages('a')
assert 'pkg-name' in ignored_packages
assert ignored_packages['Pkg_Name'] == '1.0'

0 comments on commit 5266cf7

Please sign in to comment.