Skip to content

Commit

Permalink
importlib: Read distribution name/version from metadata directory nam…
Browse files Browse the repository at this point in the history
…e, if possible

importlib does not cache metadata in-memory, so querying even simple
attributes like distribution names and versions can quickly become
expensive (as each access requires reading METADATA). Fortunately,
`Distribution.canonical_name` is optimized to parse the metadata
directory name to query the name if possible. This commit extends this
optimization to the finder implementation and version attribute.

.egg-info directory names tend to not include the version so they are
not considered for optimizing version lookup.

simplewheel-2.0-1-py2.py3-none-any.whl had to be modified to rename the
.dist-info directory which mistakenly included the wheel build tag (in
violation of the wheel specification).

    simplewheel/__init__.py
    simplewheel-2.0-1.dist-info/DESCRIPTION.rst
    simplewheel-2.0-1.dist-info/metadata.json
    simplewheel-2.0-1.dist-info/top_level.txt
    simplewheel-2.0-1.dist-info/WHEEL
    simplewheel-2.0-1.dist-info/METADATA
    simplewheel-2.0-1.dist-info/RECORD

Otherwise, it was mistaken for part of the version and led pip to think
the wheel was a post-release, breaking tests...
  • Loading branch information
ichard26 committed May 8, 2024
1 parent 5545a15 commit d247c1e
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 27 deletions.
3 changes: 3 additions & 0 deletions news/12656.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Improve discovery performance of installed packages when the
``importlib.metadata`` backend is used to load distribution metadata
(used by default under Python 3.11+).
Empty file.
37 changes: 33 additions & 4 deletions src/pip/_internal/metadata/importlib/_compat.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import importlib.metadata
from typing import Any, Optional, Protocol, cast
import os
from typing import Any, Optional, Protocol, Tuple, cast

from pip._vendor.packaging.utils import NormalizedName, canonicalize_name


class BadMetadata(ValueError):
Expand Down Expand Up @@ -43,13 +46,39 @@ def get_info_location(d: importlib.metadata.Distribution) -> Optional[BasePath]:
return getattr(d, "_path", None)


def get_dist_name(dist: importlib.metadata.Distribution) -> str:
"""Get the distribution's project name.
def parse_name_and_version_from_info_directory(
dist: importlib.metadata.Distribution,
) -> Tuple[Optional[str], Optional[str]]:
"""Get a name and version from the metadata directory name.
This is much faster than reading distribution metadata.
"""
info_location = get_info_location(dist)
if info_location is None:
return None, None

stem, suffix = os.path.splitext(info_location.name)
if suffix == ".dist-info" and stem.count("-") == 1:
name, version = stem.split("-")
return name, version

if suffix == ".egg-info":
name = stem.split("-", 1)[0]
return name, None

return None, None


def get_dist_canonical_name(dist: importlib.metadata.Distribution) -> NormalizedName:
"""Get the distribution's normalized name.
The ``name`` attribute is only available in Python 3.10 or later. We are
targeting exactly that, but Mypy does not know this.
"""
if name := parse_name_and_version_from_info_directory(dist)[0]:
return canonicalize_name(name)

name = cast(Any, dist).name
if not isinstance(name, str):
raise BadMetadata(dist, reason="invalid metadata entry 'name'")
return name
return canonicalize_name(name)
24 changes: 8 additions & 16 deletions src/pip/_internal/metadata/importlib/_dists.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import email.message
import importlib.metadata
import os
import pathlib
import zipfile
from typing import (
Expand Down Expand Up @@ -30,7 +29,11 @@
from pip._internal.utils.temp_dir import TempDirectory
from pip._internal.utils.wheel import parse_wheel, read_wheel_metadata_file

from ._compat import BasePath, get_dist_name
from ._compat import (
BasePath,
get_dist_canonical_name,
parse_name_and_version_from_info_directory,
)


class WheelDistribution(importlib.metadata.Distribution):
Expand Down Expand Up @@ -153,25 +156,14 @@ def installed_location(self) -> Optional[str]:
return None
return normalize_path(str(self._installed_location))

def _get_dist_name_from_location(self) -> Optional[str]:
"""Try to get the name from the metadata directory name.
This is much faster than reading metadata.
"""
if self._info_location is None:
return None
stem, suffix = os.path.splitext(self._info_location.name)
if suffix not in (".dist-info", ".egg-info"):
return None
return stem.split("-", 1)[0]

@property
def canonical_name(self) -> NormalizedName:
name = self._get_dist_name_from_location() or get_dist_name(self._dist)
return canonicalize_name(name)
return get_dist_canonical_name(self._dist)

@property
def version(self) -> Version:
if version := parse_name_and_version_from_info_directory(self._dist)[1]:
return parse_version(version)
return parse_version(self._dist.version)

@property
Expand Down
9 changes: 4 additions & 5 deletions src/pip/_internal/metadata/importlib/_envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from pip._internal.utils.deprecation import deprecated
from pip._internal.utils.filetypes import WHEEL_EXTENSION

from ._compat import BadMetadata, BasePath, get_dist_name, get_info_location
from ._compat import BadMetadata, BasePath, get_dist_canonical_name, get_info_location
from ._dists import Distribution

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -61,14 +61,13 @@ def _find_impl(self, location: str) -> Iterator[FoundResult]:
for dist in importlib.metadata.distributions(path=[location]):
info_location = get_info_location(dist)
try:
raw_name = get_dist_name(dist)
name = get_dist_canonical_name(dist)
except BadMetadata as e:
logger.warning("Skipping %s due to %s", info_location, e.reason)
continue
normalized_name = canonicalize_name(raw_name)
if normalized_name in self._found_names:
if name in self._found_names:
continue
self._found_names.add(normalized_name)
self._found_names.add(name)
yield dist, info_location

def find(self, location: str) -> Iterator[BaseDistribution]:
Expand Down
Binary file modified tests/data/packages/simplewheel-2.0-1-py2.py3-none-any.whl
Binary file not shown.
2 changes: 1 addition & 1 deletion tests/functional/test_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -1172,7 +1172,7 @@ def test_install_nonlocal_compatible_wheel(
)
assert result.returncode == SUCCESS

distinfo = Path("scratch") / "target" / "simplewheel-2.0-1.dist-info"
distinfo = Path("scratch") / "target" / "simplewheel-2.0.dist-info"
result.did_create(distinfo)

# Test install without --target
Expand Down
2 changes: 1 addition & 1 deletion tests/functional/test_install_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def test_install_report_basic(
assert url.endswith("/packages/simplewheel-2.0-1-py2.py3-none-any.whl")
assert (
simplewheel_report["download_info"]["archive_info"]["hash"]
== "sha256=191d6520d0570b13580bf7642c97ddfbb46dd04da5dd2cf7bef9f32391dfe716"
== "sha256=71e1ca6b16ae3382a698c284013f66504f2581099b2ce4801f60e9536236ceee"
)


Expand Down

0 comments on commit d247c1e

Please sign in to comment.