Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better freeze of distributions installed from direct URL references #7612

Merged
merged 8 commits into from
Apr 1, 2020
2 changes: 2 additions & 0 deletions news/609.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pip now implements PEP 610, so ``pip freeze`` has better fidelity
in presence of distributions installed from Direct URL requirements.
34 changes: 32 additions & 2 deletions src/pip/_internal/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,16 @@ def __init__(self, format_control):
)


class CacheEntry(object):
def __init__(
self,
link, # type: Link
persistent, # type: bool
):
self.link = link
self.persistent = persistent


class WheelCache(Cache):
"""Wraps EphemWheelCache and SimpleWheelCache into a single Cache

Expand Down Expand Up @@ -304,16 +314,36 @@ def get(
supported_tags, # type: List[Tag]
):
# type: (...) -> Link
cache_entry = self.get_cache_entry(link, package_name, supported_tags)
if cache_entry is None:
return link
return cache_entry.link

def get_cache_entry(
self,
link, # type: Link
package_name, # type: Optional[str]
supported_tags, # type: List[Tag]
):
# type: (...) -> Optional[CacheEntry]
"""Returns a CacheEntry with a link to a cached item if it exists or
None. The cache entry indicates if the item was found in the persistent
or ephemeral cache.
"""
retval = self._wheel_cache.get(
link=link,
package_name=package_name,
supported_tags=supported_tags,
)
if retval is not link:
return retval
return CacheEntry(retval, persistent=True)

return self._ephem_cache.get(
retval = self._ephem_cache.get(
link=link,
package_name=package_name,
supported_tags=supported_tags,
)
if retval is not link:
return CacheEntry(retval, persistent=False)

return None
245 changes: 245 additions & 0 deletions src/pip/_internal/models/direct_url.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
""" PEP 610 """
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this file would likely end in https://github.com/pypa/packaging once the PEP is accepted ?

Copy link
Member Author

@sbidoul sbidoul Feb 6, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possibly. I've been careful to put only (and all) the PEP 610 implementation in this file with the corresponding tests, so it can be extracted easily. pip-specific stuff is in direct_url_helpers.py.

import json
import re

from pip._vendor import six
from pip._vendor.six.moves.urllib import parse as urllib_parse

from pip._internal.utils.typing import MYPY_CHECK_RUNNING

if MYPY_CHECK_RUNNING:
from typing import (
Any, Dict, Iterable, Optional, Type, TypeVar, Union
)

T = TypeVar("T")


DIRECT_URL_METADATA_NAME = "direct_url.json"
ENV_VAR_RE = re.compile(r"^\$\{[A-Za-z0-9-_]+\}(:\$\{[A-Za-z0-9-_]+\})?$")

__all__ = [
"DirectUrl",
"DirectUrlValidationError",
"DirInfo",
"ArchiveInfo",
"VcsInfo",
]


class DirectUrlValidationError(Exception):
pass


def _get(d, expected_type, key, default=None):
# type: (Dict[str, Any], Type[T], str, Optional[T]) -> Optional[T]
"""Get value from dictionary and verify expected type."""
if key not in d:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

try:
    value = d[key]
except KeyError:
    return default

is more Pythonic IMO.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh. Is it? I've used both over time and I now try to avoid exceptions for flow control.

return default
value = d[key]
if six.PY2 and expected_type is str:
expected_type = six.string_types # type: ignore
if not isinstance(value, expected_type):
raise DirectUrlValidationError(
"{!r} has unexpected type for {} (expected {})".format(
value, key, expected_type
)
)
return value


def _get_required(d, expected_type, key, default=None):
# type: (Dict[str, Any], Type[T], str, Optional[T]) -> T
value = _get(d, expected_type, key, default)
if value is None:
raise DirectUrlValidationError("{} must have a value".format(key))
return value


def _exactly_one_of(infos):
# type: (Iterable[Optional[InfoType]]) -> InfoType
infos = [info for info in infos if info is not None]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
infos = [info for info in infos if info is not None]
infos = list(filter(None, infos))

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

filter(None, iterable) removes falsy entries, not just None. It’s probably not a problem here, but I prefer the current comprehension implementation for clarity.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also find list comprehension more readable, as its more explicit. With filter I have to think for a fraction of a second what None means as a filter function.

if not infos:
raise DirectUrlValidationError(
"missing one of archive_info, dir_info, vcs_info"
)
if len(infos) > 1:
raise DirectUrlValidationError(
"more than one of archive_info, dir_info, vcs_info"
)
assert infos[0] is not None
return infos[0]


def _filter_none(**kwargs):
# type: (Any) -> Dict[str, Any]
"""Make dict excluding None values."""
return {k: v for k, v in kwargs.items() if v is not None}


class VcsInfo(object):
name = "vcs_info"

def __init__(
self,
vcs, # type: str
commit_id, # type: str
requested_revision=None, # type: Optional[str]
resolved_revision=None, # type: Optional[str]
resolved_revision_type=None, # type: Optional[str]
):
self.vcs = vcs
self.requested_revision = requested_revision
self.commit_id = commit_id
self.resolved_revision = resolved_revision
self.resolved_revision_type = resolved_revision_type

@classmethod
def _from_dict(cls, d):
# type: (Optional[Dict[str, Any]]) -> Optional[VcsInfo]
if d is None:
return None
return cls(
vcs=_get_required(d, str, "vcs"),
commit_id=_get_required(d, str, "commit_id"),
requested_revision=_get(d, str, "requested_revision"),
resolved_revision=_get(d, str, "resolved_revision"),
resolved_revision_type=_get(d, str, "resolved_revision_type"),
)

def _to_dict(self):
# type: () -> Dict[str, Any]
return _filter_none(
vcs=self.vcs,
requested_revision=self.requested_revision,
commit_id=self.commit_id,
resolved_revision=self.resolved_revision,
resolved_revision_type=self.resolved_revision_type,
)


class ArchiveInfo(object):
name = "archive_info"

def __init__(
self,
hash=None, # type: Optional[str]
):
self.hash = hash

@classmethod
def _from_dict(cls, d):
# type: (Optional[Dict[str, Any]]) -> Optional[ArchiveInfo]
if d is None:
return None
return cls(hash=_get(d, str, "hash"))

def _to_dict(self):
# type: () -> Dict[str, Any]
return _filter_none(hash=self.hash)


class DirInfo(object):
name = "dir_info"

def __init__(
self,
editable=False, # type: bool
):
self.editable = editable

@classmethod
def _from_dict(cls, d):
# type: (Optional[Dict[str, Any]]) -> Optional[DirInfo]
if d is None:
return None
return cls(
editable=_get_required(d, bool, "editable", default=False)
)

def _to_dict(self):
# type: () -> Dict[str, Any]
return _filter_none(editable=self.editable or None)


if MYPY_CHECK_RUNNING:
InfoType = Union[ArchiveInfo, DirInfo, VcsInfo]


class DirectUrl(object):

def __init__(
self,
url, # type: str
info, # type: InfoType
subdirectory=None, # type: Optional[str]
):
self.url = url
self.info = info
self.subdirectory = subdirectory

def _remove_auth_from_netloc(self, netloc):
# type: (str) -> str
if "@" not in netloc:
return netloc
user_pass, netloc_no_user_pass = netloc.split("@", 1)
if (
isinstance(self.info, VcsInfo) and
self.info.vcs == "git" and
user_pass == "git"
):
return netloc
if ENV_VAR_RE.match(user_pass):
return netloc
return netloc_no_user_pass

@property
def redacted_url(self):
# type: () -> str
"""url with user:password part removed unless it is formed with
environment variables as specified in PEP 610, or it is ``git``
in the case of a git URL.
"""
purl = urllib_parse.urlsplit(self.url)
netloc = self._remove_auth_from_netloc(purl.netloc)
surl = urllib_parse.urlunsplit(
(purl.scheme, netloc, purl.path, purl.query, purl.fragment)
)
return surl

def validate(self):
# type: () -> None
self.from_dict(self.to_dict())

@classmethod
def from_dict(cls, d):
# type: (Dict[str, Any]) -> DirectUrl
return DirectUrl(
url=_get_required(d, str, "url"),
subdirectory=_get(d, str, "subdirectory"),
info=_exactly_one_of(
[
ArchiveInfo._from_dict(_get(d, dict, "archive_info")),
DirInfo._from_dict(_get(d, dict, "dir_info")),
VcsInfo._from_dict(_get(d, dict, "vcs_info")),
]
),
)

def to_dict(self):
# type: () -> Dict[str, Any]
res = _filter_none(
url=self.redacted_url,
subdirectory=self.subdirectory,
)
res[self.info.name] = self.info._to_dict()
return res

@classmethod
def from_json(cls, s):
# type: (str) -> DirectUrl
return cls.from_dict(json.loads(s))

def to_json(self):
# type: () -> str
return json.dumps(self.to_dict(), sort_keys=True)
16 changes: 16 additions & 0 deletions src/pip/_internal/operations/freeze.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
install_req_from_line,
)
from pip._internal.req.req_file import COMMENT_RE
from pip._internal.utils.direct_url_helpers import (
direct_url_as_pep440_direct_reference,
dist_get_direct_url,
)
from pip._internal.utils.misc import (
dist_is_editable,
get_installed_distributions,
Expand Down Expand Up @@ -250,8 +254,20 @@ def __init__(self, name, req, editable, comments=()):
@classmethod
def from_dist(cls, dist):
# type: (Distribution) -> FrozenRequirement
# TODO `get_requirement_info` is taking care of editable requirements.
# TODO This should be refactored when we will add detection of
sbidoul marked this conversation as resolved.
Show resolved Hide resolved
# editable that provide .dist-info metadata.
req, editable, comments = get_requirement_info(dist)
if req is None and not editable:
# if PEP 610 metadata is present, attempt to use it
direct_url = dist_get_direct_url(dist)
if direct_url:
req = direct_url_as_pep440_direct_reference(
direct_url, dist.project_name
)
comments = []
if req is None:
# name==version requirement
req = dist.as_requirement()

return cls(dist.project_name, req, editable, comments=comments)
Expand Down
14 changes: 13 additions & 1 deletion src/pip/_internal/operations/install/wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

from pip._internal.exceptions import InstallationError
from pip._internal.locations import get_major_minor_version
from pip._internal.models.direct_url import DIRECT_URL_METADATA_NAME, DirectUrl
from pip._internal.utils.filesystem import adjacent_tmp_file, replace
from pip._internal.utils.misc import captured_stdout, ensure_dir, hash_file
from pip._internal.utils.temp_dir import TempDirectory
Expand Down Expand Up @@ -289,7 +290,8 @@ def install_unpacked_wheel(
scheme, # type: Scheme
req_description, # type: str
pycompile=True, # type: bool
warn_script_location=True # type: bool
warn_script_location=True, # type: bool
direct_url=None, # type: Optional[DirectUrl]
):
# type: (...) -> None
"""Install a wheel.
Expand Down Expand Up @@ -570,6 +572,14 @@ def is_entrypoint_wrapper(name):
replace(installer_file.name, installer_path)
generated.append(installer_path)

# Record the PEP 610 direct URL reference
if direct_url is not None:
direct_url_path = os.path.join(dest_info_dir, DIRECT_URL_METADATA_NAME)
with adjacent_tmp_file(direct_url_path) as direct_url_file:
direct_url_file.write(direct_url.to_json().encode("utf-8"))
replace(direct_url_file.name, direct_url_path)
generated.append(direct_url_path)

# Record details of all files installed
record_path = os.path.join(dest_info_dir, 'RECORD')
with open(record_path, **csv_io_kwargs('r')) as record_file:
Expand All @@ -593,6 +603,7 @@ def install_wheel(
pycompile=True, # type: bool
warn_script_location=True, # type: bool
_temp_dir_for_testing=None, # type: Optional[str]
direct_url=None, # type: Optional[DirectUrl]
):
# type: (...) -> None
with TempDirectory(
Expand All @@ -607,4 +618,5 @@ def install_wheel(
req_description=req_description,
pycompile=pycompile,
warn_script_location=warn_script_location,
direct_url=direct_url,
)
Loading