diff --git a/pex/dist_metadata.py b/pex/dist_metadata.py index 34f347b5e..39cc3f2ee 100644 --- a/pex/dist_metadata.py +++ b/pex/dist_metadata.py @@ -4,34 +4,205 @@ from __future__ import absolute_import +import os +import tarfile +import zipfile +from collections import namedtuple +from contextlib import closing from email.message import Message from email.parser import Parser +from pex import pex_warnings +from pex.common import open_zip from pex.third_party.packaging.specifiers import SpecifierSet from pex.third_party.pkg_resources import DistInfoDistribution, Distribution, Requirement -from pex.typing import TYPE_CHECKING +from pex.typing import TYPE_CHECKING, cast if TYPE_CHECKING: - from typing import Dict, Iterator, Optional + from typing import Dict, Iterator, Optional, Text, Union + + DistributionLike = Union[Distribution, str] + + +class MetadataError(Exception): + """Indicates an error reading distribution metadata.""" _PKG_INFO_BY_DIST = {} # type: Dict[Distribution, Optional[Message]] -def _parse_pkg_info(dist): +def _strip_sdist_path(sdist_path): + # type: (str) -> Optional[str] + if not sdist_path.endswith((".sdist", ".tar.gz", ".zip")): + return None + + sdist_basename = os.path.basename(sdist_path) + filename, _ = os.path.splitext(sdist_basename) + if filename.endswith(".tar"): + filename, _ = os.path.splitext(filename) + return filename + + +def _parse_message(message): + # type: (Text) -> Message + return cast(Message, Parser().parsestr(message)) + + +def _parse_sdist_package_info(sdist_path): + # type: (str) -> Optional[Message] + sdist_filename = _strip_sdist_path(sdist_path) + if sdist_filename is None: + return None + + pkg_info_path = os.path.join(sdist_filename, Distribution.PKG_INFO) + + if zipfile.is_zipfile(sdist_path): + with open_zip(sdist_path) as zip: + try: + return _parse_message(zip.read(pkg_info_path).decode("utf-8")) + except KeyError as e: + pex_warnings.warn( + "Source distribution {} did not have the expected metadata file {}: {}".format( + sdist_path, pkg_info_path, e + ) + ) + return None + + if tarfile.is_tarfile(sdist_path): + with tarfile.open(sdist_path) as tf: + try: + pkg_info = tf.extractfile(pkg_info_path) + if pkg_info is None: + # N.B.: `extractfile` returns None for directories and special files. + return None + with closing(pkg_info) as fp: + return _parse_message(fp.read().decode("utf-8")) + except KeyError as e: + pex_warnings.warn( + "Source distribution {} did not have the expected metadata file {}: {}".format( + sdist_path, pkg_info_path, e + ) + ) + return None + + return None + + +def _parse_wheel_package_info(wheel_path): + # type: (str) -> Optional[Message] + if not wheel_path.endswith(".whl") or not zipfile.is_zipfile(wheel_path): + return None + project_name, version, _ = os.path.basename(wheel_path).split("-", 2) + dist_info_dir = "{}-{}.dist-info".format(project_name, version) + with open_zip(wheel_path) as whl: + with whl.open(os.path.join(dist_info_dir, DistInfoDistribution.PKG_INFO)) as fp: + return _parse_message(fp.read().decode("utf-8")) + + +def _parse_distribution_package_info(dist): # type: (Distribution) -> Optional[Message] + if not dist.has_metadata(DistInfoDistribution.PKG_INFO): + return None + metadata = dist.get_metadata(DistInfoDistribution.PKG_INFO) + return _parse_message(metadata) + + +def _parse_pkg_info(dist): + # type: (DistributionLike) -> Optional[Message] if dist not in _PKG_INFO_BY_DIST: - if not dist.has_metadata(DistInfoDistribution.PKG_INFO): - pkg_info = None + if isinstance(dist, Distribution): + pkg_info = _parse_distribution_package_info(dist) + elif dist.endswith(".whl"): + pkg_info = _parse_wheel_package_info(dist) else: - metadata = dist.get_metadata(DistInfoDistribution.PKG_INFO) - pkg_info = Parser().parsestr(metadata) + pkg_info = _parse_sdist_package_info(dist) _PKG_INFO_BY_DIST[dist] = pkg_info return _PKG_INFO_BY_DIST[dist] +class ProjectNameAndVersion(namedtuple("ProjectNameAndVersion", ["project_name", "version"])): + @classmethod + def from_parsed_pkg_info(cls, source, pkg_info): + # type: (DistributionLike, Message) -> ProjectNameAndVersion + project_name = pkg_info.get("Name", None) + version = pkg_info.get("Version", None) + if project_name is None or version is None: + raise MetadataError( + "The 'Name' and 'Version' fields are not both present in package metadata for " + "{source}:\n{fields}".format( + source=source, + fields="\n".join("{}: {}".format(k, v) for k, v in pkg_info.items()), + ) + ) + return cls(project_name=pkg_info["Name"], version=pkg_info["Version"]) + + @classmethod + def from_distribution(cls, dist): + # type: (Distribution) -> ProjectNameAndVersion + project_name = dist.project_name + try: + version = dist.version + except ValueError as e: + raise MetadataError( + "The version could not be determined for project {} @ {}: {}".format( + project_name, dist.location, e + ) + ) + return cls(project_name=project_name, version=version) + + @classmethod + def from_filename(cls, path): + # type: (str) -> ProjectNameAndVersion + # Handle wheels: + # + # The wheel filename convention is specified here: + # https://www.python.org/dev/peps/pep-0427/#file-name-convention. + if path.endswith(".whl"): + project_name, version, _ = os.path.basename(path).split("-", 2) + return cls(project_name=project_name, version=version) + + # Handle sdists: + # + # The sdist name format has no accepted specification yet, but there is a proposal here: + # https://www.python.org/dev/peps/pep-0625/#specification. + # + # We do the best we can to support the current landscape. A version number can technically + # contain a dash though, even under the standards, in un-normalized form: + # https://www.python.org/dev/peps/pep-0440/#pre-release-separators. + # For those cases this logic will produce incorrect results and it does not seem there is + # much we can do since both project names and versions can contain both alphanumeric + # characters and dashes. + fname = _strip_sdist_path(path) + if fname is not None: + project_name, version = fname.rsplit("-", 1) + return cls(project_name=project_name, version=version) + + raise MetadataError( + "The distribution at path {!r} does not have a file name matching known sdist or wheel " + "file name formats.".format(path) + ) + + +def project_name_and_version(dist, fallback_to_filename=True): + # type: (DistributionLike, bool) -> Optional[ProjectNameAndVersion] + """Extracts name and version metadata from dist. + + :param dist: A distribution to extract project name and version metadata from. + :return: The project name and version. + :raise: MetadataError if dist has invalid metadata. + """ + pkg_info = _parse_pkg_info(dist) + if pkg_info is not None: + return ProjectNameAndVersion.from_parsed_pkg_info(dist, pkg_info) + if isinstance(dist, Distribution): + return ProjectNameAndVersion.from_distribution(dist) + if fallback_to_filename: + return ProjectNameAndVersion.from_filename(dist) + return None + + def requires_python(dist): - # type: (Distribution) -> Optional[SpecifierSet] + # type: (DistributionLike) -> Optional[SpecifierSet] """Examines dist for `Python-Requires` metadata and returns version constraints if any. See: https://www.python.org/dev/peps/pep-0345/#requires-python @@ -50,7 +221,7 @@ def requires_python(dist): def requires_dists( - dist, # type: Distribution + dist, # type: DistributionLike include_1_1_requires=True, # type: bool ): # type: (...) -> Iterator[Requirement] diff --git a/tests/test_dist_metadata.py b/tests/test_dist_metadata.py index 4bcda883a..1405fb38f 100644 --- a/tests/test_dist_metadata.py +++ b/tests/test_dist_metadata.py @@ -4,83 +4,249 @@ from __future__ import absolute_import import os +import tarfile +import warnings from contextlib import contextmanager -from pex.common import temporary_dir -from pex.dist_metadata import requires_dists, requires_python +import pytest + +from pex.common import open_zip, temporary_dir +from pex.dist_metadata import ( + MetadataError, + ProjectNameAndVersion, + project_name_and_version, + requires_dists, + requires_python, +) +from pex.pex_warnings import PEXWarning from pex.pip import get_pip from pex.third_party.packaging.specifiers import SpecifierSet from pex.third_party.pkg_resources import Distribution, Requirement +from pex.typing import TYPE_CHECKING from pex.util import DistributionHelper +from pex.variables import ENV - -def install_wheel( - wheel_path, # type: str - install_dir, # type: str -): - # type: (...) -> Distribution - get_pip().spawn_install_wheel(wheel=wheel_path, install_dir=install_dir).wait() - dist = DistributionHelper.distribution_from_path(install_dir) - assert dist is not None, "Could not load a distribution from {}".format(install_dir) - return dist +if TYPE_CHECKING: + from typing import Tuple, Iterator, Any -def example_package(name): - # type: (str) -> str - return os.path.join("./tests/example_packages", name) +@contextmanager +def installed_wheel(wheel_path): + # type: (str) -> Iterator[Distribution] + with temporary_dir() as install_dir: + get_pip().spawn_install_wheel(wheel=wheel_path, install_dir=install_dir).wait() + dist = DistributionHelper.distribution_from_path(install_dir) + assert dist is not None, "Could not load a distribution from {}.".format(install_dir) + yield dist @contextmanager def example_distribution(name): - # type: (str) -> Distribution - wheel_path = example_package(name) - with temporary_dir() as install_dir: - yield install_wheel(wheel_path, install_dir=install_dir) + # type: (str) -> Iterator[Tuple[str, Distribution]] + wheel_path = os.path.join("./tests/example_packages", name) + with installed_wheel(wheel_path) as distribution: + yield wheel_path, distribution @contextmanager -def resolved_distribution(requirement): - # type: (str) -> Distribution +def downloaded_sdist(requirement): + # type: (str) -> Iterator[str] with temporary_dir() as td: download_dir = os.path.join(td, "download") get_pip().spawn_download_distributions( - download_dir=download_dir, requirements=[requirement], transitive=False + download_dir=download_dir, + requirements=[requirement], + transitive=False, + use_wheel=False, ).wait() - wheels = os.listdir(download_dir) - assert len(wheels) == 1, "Expected 1 wheel to be downloaded for {}".format(requirement) - wheel_path = os.path.join(download_dir, wheels[0]) - install_dir = os.path.join(td, "install") - yield install_wheel(wheel_path, install_dir=install_dir) + dists = os.listdir(download_dir) + assert len(dists) == 1, "Expected 1 dist to be downloaded for {}.".format(requirement) + sdist = os.path.join(download_dir, dists[0]) + assert sdist.endswith((".sdist", ".tar.gz", ".zip")) + yield sdist + + +def as_requirement(project_name_and_version): + # type: (ProjectNameAndVersion) -> str + return "{}=={}".format(project_name_and_version.project_name, project_name_and_version.version) + + +PYGOOGLEEARTH_PROJECT_NAME_AND_VERSION = ProjectNameAndVersion("pygoogleearth", "0.0.2") + + +@pytest.fixture(scope="module") +def pygoogleearth_zip_sdist(): + # type: () -> Iterator[str] + with downloaded_sdist(as_requirement(PYGOOGLEEARTH_PROJECT_NAME_AND_VERSION)) as sdist: + assert sdist.endswith(".zip") + yield sdist + + +PIP_PROJECT_NAME_AND_VERSION = ProjectNameAndVersion("pip", "20.3.1") + +@pytest.fixture(scope="module") +def pip_tgz_sdist(): + # type: () -> Iterator[str] + with downloaded_sdist(as_requirement(PIP_PROJECT_NAME_AND_VERSION)) as sdist: + assert sdist.endswith(".tar.gz") + yield sdist -def test_requires_python(): + +@pytest.fixture(scope="module") +def pip_wheel(pip_tgz_sdist): + # type: (str) -> Iterator[str] + with temporary_dir() as wheel_dir: + get_pip().spawn_build_wheels([pip_tgz_sdist], wheel_dir=wheel_dir).wait() + wheels = os.listdir(wheel_dir) + assert len(wheels) == 1, "Expected 1 wheel to be built for {}.".format(pip_tgz_sdist) + wheel = os.path.join(wheel_dir, wheels[0]) + assert wheel.endswith(".whl") + yield wheel + + +@pytest.fixture(scope="module") +def pip_distribution(pip_wheel): + # type: (str) -> Iterator[Distribution] + with installed_wheel(pip_wheel) as distribution: + yield distribution + + +def test_project_name_and_version_from_filename( + pygoogleearth_zip_sdist, # type: str + pip_tgz_sdist, # type: str + pip_wheel, # type: str +): + # type: (...) -> None + assert PYGOOGLEEARTH_PROJECT_NAME_AND_VERSION == ProjectNameAndVersion.from_filename( + pygoogleearth_zip_sdist + ) + assert PIP_PROJECT_NAME_AND_VERSION == ProjectNameAndVersion.from_filename(pip_tgz_sdist) + assert PIP_PROJECT_NAME_AND_VERSION == ProjectNameAndVersion.from_filename(pip_wheel) + + +def test_project_name_and_version_from_filename_pep625(): # type: () -> None - with resolved_distribution("pip==20.3.1") as dist: - assert SpecifierSet(">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*") == requires_python( - dist - ) + assert ProjectNameAndVersion( + "a-distribution-name", "1.2.3" + ) == ProjectNameAndVersion.from_filename("a-distribution-name-1.2.3.sdist") -def test_requires_python_none(): +def test_project_name_and_version_from_filename_invalid(): # type: () -> None - with example_distribution("aws_cfn_bootstrap-1.4-py2-none-any.whl") as dist: + with pytest.raises(MetadataError): + ProjectNameAndVersion.from_filename("unknown_distribution.format") + + +def test_project_name_and_version_from_metadata( + pygoogleearth_zip_sdist, # type: str + pip_tgz_sdist, # type: str + pip_wheel, # type: str + pip_distribution, # type: Distribution +): + # type: (...) -> None + assert PYGOOGLEEARTH_PROJECT_NAME_AND_VERSION == project_name_and_version( + pygoogleearth_zip_sdist, fallback_to_filename=False + ) + assert PIP_PROJECT_NAME_AND_VERSION == project_name_and_version( + pip_tgz_sdist, fallback_to_filename=False + ) + assert PIP_PROJECT_NAME_AND_VERSION == project_name_and_version( + pip_wheel, fallback_to_filename=False + ) + assert PIP_PROJECT_NAME_AND_VERSION == project_name_and_version( + pip_distribution, fallback_to_filename=False + ) + + +def test_project_name_and_version_fallback(tmpdir): + # type: (Any) -> None + def tmp_path(relpath): + # type: (str) -> str + return os.path.join(str(tmpdir), relpath) + + expected_metadata_project_name_and_version = ProjectNameAndVersion("foo", "1.2.3") + + pkg_info_src = tmp_path("PKG-INFO") + with open(pkg_info_src, "w") as fp: + fp.write("Name: {}\n".format(expected_metadata_project_name_and_version.project_name)) + fp.write("Version: {}\n".format(expected_metadata_project_name_and_version.version)) + + sdist_path = tmp_path("bar-baz-4.5.6.tar.gz") + with tarfile.open(sdist_path, mode="w:gz") as tf: + # N.B.: Valid PKG-INFO at an invalid location. + tf.add(pkg_info_src, arcname="PKG-INFO") + + with ENV.patch(PEX_EMIT_WARNINGS="True"), warnings.catch_warnings(record=True) as events: + assert project_name_and_version(sdist_path, fallback_to_filename=False) is None + assert 1 == len(events) + warning = events[0] + assert PEXWarning == warning.category + assert "bar-baz-4.5.6/PKG-INFO" in str(warning.message) + + assert ProjectNameAndVersion("bar-baz", "4.5.6") == project_name_and_version( + sdist_path, fallback_to_filename=True + ) + + name_and_version = "eggs-7.8.9" + pkf_info_path = "{}/PKG-INFO".format(name_and_version) + + def write_sdist_tgz(extension): + sdist_path = tmp_path("{}.{}".format(name_and_version, extension)) + with tarfile.open(sdist_path, mode="w:gz") as tf: + tf.add(pkg_info_src, arcname=pkf_info_path) + return sdist_path + + assert expected_metadata_project_name_and_version == project_name_and_version( + write_sdist_tgz("tar.gz"), fallback_to_filename=False + ) + assert expected_metadata_project_name_and_version == project_name_and_version( + write_sdist_tgz("sdist"), fallback_to_filename=False + ) + + zip_sdist_path = tmp_path("{}.zip".format(name_and_version)) + with open_zip(zip_sdist_path, mode="w") as zf: + zf.write(pkg_info_src, arcname=pkf_info_path) + + assert expected_metadata_project_name_and_version == project_name_and_version( + zip_sdist_path, fallback_to_filename=False + ) + + +def test_requires_python( + pip_tgz_sdist, # type: str + pip_wheel, # type: str + pip_distribution, # type: Distribution +): + # type: (...) -> None + expected_requires_python = SpecifierSet(">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*") + assert expected_requires_python == requires_python(pip_tgz_sdist) + assert expected_requires_python == requires_python(pip_wheel) + assert expected_requires_python == requires_python(pip_distribution) + + +def test_requires_python_none(pygoogleearth_zip_sdist): + # type: (str) -> None + assert requires_python(pygoogleearth_zip_sdist) is None + with example_distribution("aws_cfn_bootstrap-1.4-py2-none-any.whl") as (wheel_path, dist): + assert requires_python(wheel_path) is None assert requires_python(dist) is None def test_requires_dists(): # type: () -> None - with example_distribution("aws_cfn_bootstrap-1.4-py2-none-any.whl") as dist: - assert [ + with example_distribution("aws_cfn_bootstrap-1.4-py2-none-any.whl") as (wheel_path, dist): + expected_requirements = [ Requirement.parse(req) - for req in ( - "python-daemon>=1.5.2,<2.0", - "pystache>=0.4.0", - "setuptools", - ) - ] == list(requires_dists(dist)) + for req in ("python-daemon>=1.5.2,<2.0", "pystache>=0.4.0", "setuptools") + ] + assert expected_requirements == list(requires_dists(wheel_path)) + assert expected_requirements == list(requires_dists(dist)) -def test_requires_dists_none(): - # type: () -> None - with example_distribution("MarkupSafe-1.0-cp27-cp27mu-linux_x86_64.whl") as dist: +def test_requires_dists_none(pygoogleearth_zip_sdist): + # type: (str) -> None + assert [] == list(requires_dists(pygoogleearth_zip_sdist)) + with example_distribution("MarkupSafe-1.0-cp27-cp27mu-linux_x86_64.whl") as (wheel_path, dist): + assert [] == list(requires_dists(wheel_path)) assert [] == list(requires_dists(dist))