From d247c1e8c500dda395954c8a1e8f546fdfa49b85 Mon Sep 17 00:00:00 2001 From: Richard Si Date: Sat, 27 Apr 2024 19:56:49 -0400 Subject: [PATCH] importlib: Read distribution name/version from metadata directory name, if possible importlib does not cache metadata in-memory, so querying even simple attributes like distribution names and versions can quickly become expensive (as each access requires reading METADATA). Fortunately, `Distribution.canonical_name` is optimized to parse the metadata directory name to query the name if possible. This commit extends this optimization to the finder implementation and version attribute. .egg-info directory names tend to not include the version so they are not considered for optimizing version lookup. simplewheel-2.0-1-py2.py3-none-any.whl had to be modified to rename the .dist-info directory which mistakenly included the wheel build tag (in violation of the wheel specification). simplewheel/__init__.py simplewheel-2.0-1.dist-info/DESCRIPTION.rst simplewheel-2.0-1.dist-info/metadata.json simplewheel-2.0-1.dist-info/top_level.txt simplewheel-2.0-1.dist-info/WHEEL simplewheel-2.0-1.dist-info/METADATA simplewheel-2.0-1.dist-info/RECORD Otherwise, it was mistaken for part of the version and led pip to think the wheel was a post-release, breaking tests... --- news/12656.feature.rst | 3 ++ ...1b-1578-4128-8db3-9aa72b3a6a84.trivial.rst | 0 .../_internal/metadata/importlib/_compat.py | 37 ++++++++++++++++-- .../_internal/metadata/importlib/_dists.py | 24 ++++-------- src/pip/_internal/metadata/importlib/_envs.py | 9 ++--- .../simplewheel-2.0-1-py2.py3-none-any.whl | Bin 1872 -> 2156 bytes tests/functional/test_install.py | 2 +- tests/functional/test_install_report.py | 2 +- 8 files changed, 50 insertions(+), 27 deletions(-) create mode 100644 news/12656.feature.rst create mode 100644 news/aa82171b-1578-4128-8db3-9aa72b3a6a84.trivial.rst diff --git a/news/12656.feature.rst b/news/12656.feature.rst new file mode 100644 index 00000000000..fdbba5484ba --- /dev/null +++ b/news/12656.feature.rst @@ -0,0 +1,3 @@ +Improve discovery performance of installed packages when the +``importlib.metadata`` backend is used to load distribution metadata +(used by default under Python 3.11+). diff --git a/news/aa82171b-1578-4128-8db3-9aa72b3a6a84.trivial.rst b/news/aa82171b-1578-4128-8db3-9aa72b3a6a84.trivial.rst new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/pip/_internal/metadata/importlib/_compat.py b/src/pip/_internal/metadata/importlib/_compat.py index 593bff23ede..76e45a5dc03 100644 --- a/src/pip/_internal/metadata/importlib/_compat.py +++ b/src/pip/_internal/metadata/importlib/_compat.py @@ -1,5 +1,8 @@ import importlib.metadata -from typing import Any, Optional, Protocol, cast +import os +from typing import Any, Optional, Protocol, Tuple, cast + +from pip._vendor.packaging.utils import NormalizedName, canonicalize_name class BadMetadata(ValueError): @@ -43,13 +46,39 @@ def get_info_location(d: importlib.metadata.Distribution) -> Optional[BasePath]: return getattr(d, "_path", None) -def get_dist_name(dist: importlib.metadata.Distribution) -> str: - """Get the distribution's project name. +def parse_name_and_version_from_info_directory( + dist: importlib.metadata.Distribution, +) -> Tuple[Optional[str], Optional[str]]: + """Get a name and version from the metadata directory name. + + This is much faster than reading distribution metadata. + """ + info_location = get_info_location(dist) + if info_location is None: + return None, None + + stem, suffix = os.path.splitext(info_location.name) + if suffix == ".dist-info" and stem.count("-") == 1: + name, version = stem.split("-") + return name, version + + if suffix == ".egg-info": + name = stem.split("-", 1)[0] + return name, None + + return None, None + + +def get_dist_canonical_name(dist: importlib.metadata.Distribution) -> NormalizedName: + """Get the distribution's normalized name. The ``name`` attribute is only available in Python 3.10 or later. We are targeting exactly that, but Mypy does not know this. """ + if name := parse_name_and_version_from_info_directory(dist)[0]: + return canonicalize_name(name) + name = cast(Any, dist).name if not isinstance(name, str): raise BadMetadata(dist, reason="invalid metadata entry 'name'") - return name + return canonicalize_name(name) diff --git a/src/pip/_internal/metadata/importlib/_dists.py b/src/pip/_internal/metadata/importlib/_dists.py index f65ccb1e706..ee1a84cc78e 100644 --- a/src/pip/_internal/metadata/importlib/_dists.py +++ b/src/pip/_internal/metadata/importlib/_dists.py @@ -1,6 +1,5 @@ import email.message import importlib.metadata -import os import pathlib import zipfile from typing import ( @@ -30,7 +29,11 @@ from pip._internal.utils.temp_dir import TempDirectory from pip._internal.utils.wheel import parse_wheel, read_wheel_metadata_file -from ._compat import BasePath, get_dist_name +from ._compat import ( + BasePath, + get_dist_canonical_name, + parse_name_and_version_from_info_directory, +) class WheelDistribution(importlib.metadata.Distribution): @@ -153,25 +156,14 @@ def installed_location(self) -> Optional[str]: return None return normalize_path(str(self._installed_location)) - def _get_dist_name_from_location(self) -> Optional[str]: - """Try to get the name from the metadata directory name. - - This is much faster than reading metadata. - """ - if self._info_location is None: - return None - stem, suffix = os.path.splitext(self._info_location.name) - if suffix not in (".dist-info", ".egg-info"): - return None - return stem.split("-", 1)[0] - @property def canonical_name(self) -> NormalizedName: - name = self._get_dist_name_from_location() or get_dist_name(self._dist) - return canonicalize_name(name) + return get_dist_canonical_name(self._dist) @property def version(self) -> Version: + if version := parse_name_and_version_from_info_directory(self._dist)[1]: + return parse_version(version) return parse_version(self._dist.version) @property diff --git a/src/pip/_internal/metadata/importlib/_envs.py b/src/pip/_internal/metadata/importlib/_envs.py index 048dc55dcb2..7791db32471 100644 --- a/src/pip/_internal/metadata/importlib/_envs.py +++ b/src/pip/_internal/metadata/importlib/_envs.py @@ -15,7 +15,7 @@ from pip._internal.utils.deprecation import deprecated from pip._internal.utils.filetypes import WHEEL_EXTENSION -from ._compat import BadMetadata, BasePath, get_dist_name, get_info_location +from ._compat import BadMetadata, BasePath, get_dist_canonical_name, get_info_location from ._dists import Distribution logger = logging.getLogger(__name__) @@ -61,14 +61,13 @@ def _find_impl(self, location: str) -> Iterator[FoundResult]: for dist in importlib.metadata.distributions(path=[location]): info_location = get_info_location(dist) try: - raw_name = get_dist_name(dist) + name = get_dist_canonical_name(dist) except BadMetadata as e: logger.warning("Skipping %s due to %s", info_location, e.reason) continue - normalized_name = canonicalize_name(raw_name) - if normalized_name in self._found_names: + if name in self._found_names: continue - self._found_names.add(normalized_name) + self._found_names.add(name) yield dist, info_location def find(self, location: str) -> Iterator[BaseDistribution]: diff --git a/tests/data/packages/simplewheel-2.0-1-py2.py3-none-any.whl b/tests/data/packages/simplewheel-2.0-1-py2.py3-none-any.whl index cd34cf8a0463fea5047ffc1a8b67362646d3c7a3..ba852ba289574377fbcf49fb9d83d4646772aa42 100644 GIT binary patch delta 983 zcmcb>_eNlXza|$02pros(I-x~{t}1>!kP>+48@tb1v#nZ8L6o`x<+~ilN(v3)Iviz z8JI&hUyB0a(h6<{MwS=M3=Ci*)X&?`KirRti)(T}la!MZrV)B6nZ+f#nR#jX`o69q zjxLTNj`$3me35CEg92V-f?S>bgIw?#vUwtN9wQSo%Vd8xF?)HuhJ|~$y87TVYqC01 z*~E1`!s?jDOgt|kgHP+k=NpiM`h4n_SRP<7gD@7uCL6LzXy7vqDQdVT|7G#Clw-yf zj}kyD7#J9VxS`P-NFr&4#j0#*2rDF3(PA{fn~_Ow@?19YKqUrbb9P{wgEeaLnfjhh z++P93)G0tmqPY&qR9H;oGt-Yv+)p0G%qU>u#BC-t!toiofK^;k9mPm#77QbSkspdT o43T07HR_Q=Rf847U{FHP!)-Dv8!#dm*n#jM69dCkpne7h0F$Kt!2kdN delta 819 zcmaDOaDi`ve}D)B0|N)cv27E5;&kgT@c?;TK&%bK#hJMUIjQ9tsi`@-MtTOihI%QP z#U;9#d1?9jF0R4OL7o92p8kG%Ma3oDUq65Q%>VuIqeo1WrC6kOH3?|XO)W`GNi0d! z%PP*#n_R=PArRq=^Qm8A`GERCP9bDYNq#|mPHI_dj$TPciT1fOx+gq!&wKcueaOTx zIf_|IRgr)@!aZDFeI{>aE}I<5#-pJ^K(()Hh@*>Rh~s1t##w4g1QZ9kI{OE?Y<|a> z$2fU9tGl8IGs27LAq_N&fnoB0RtZIXfgIq?$RskkkzKqV6f7{%(6|7|#1~g6fekhU z$tmcejWBjHBQ9elA!z_5@FC_-j$;G&H`zX$-^^a1=ni=Eo*p4~hdAXlU$b z!sP~tW>}1%`|A3=IXpQ)i-UmrK@kN54UK`!cukbmhs0ZeH!B-R6FU&v0uA}g0^$Jx DpWfp< diff --git a/tests/functional/test_install.py b/tests/functional/test_install.py index eaea12a163c..ba1c75bc9ce 100644 --- a/tests/functional/test_install.py +++ b/tests/functional/test_install.py @@ -1172,7 +1172,7 @@ def test_install_nonlocal_compatible_wheel( ) assert result.returncode == SUCCESS - distinfo = Path("scratch") / "target" / "simplewheel-2.0-1.dist-info" + distinfo = Path("scratch") / "target" / "simplewheel-2.0.dist-info" result.did_create(distinfo) # Test install without --target diff --git a/tests/functional/test_install_report.py b/tests/functional/test_install_report.py index a1e7f8375d9..a25de64a3d1 100644 --- a/tests/functional/test_install_report.py +++ b/tests/functional/test_install_report.py @@ -39,7 +39,7 @@ def test_install_report_basic( assert url.endswith("/packages/simplewheel-2.0-1-py2.py3-none-any.whl") assert ( simplewheel_report["download_info"]["archive_info"]["hash"] - == "sha256=191d6520d0570b13580bf7642c97ddfbb46dd04da5dd2cf7bef9f32391dfe716" + == "sha256=71e1ca6b16ae3382a698c284013f66504f2581099b2ce4801f60e9536236ceee" )