Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Get hashes from PyPI JSON API #1109

Merged
merged 1 commit into from
Apr 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 91 additions & 7 deletions piptools/repositories/pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from pip._internal.cache import WheelCache
from pip._internal.commands import create_command
from pip._internal.models.index import PyPI
from pip._internal.models.index import PackageIndex, PyPI
from pip._internal.models.link import Link
from pip._internal.models.wheel import Wheel
from pip._internal.req import RequirementSet
Expand All @@ -18,12 +18,14 @@
from pip._internal.utils.misc import normalize_path
from pip._internal.utils.temp_dir import TempDirectory, global_tempdir_manager
from pip._internal.utils.urls import path_to_url, url_to_path
from pip._vendor.requests import RequestException

from .._compat import PIP_VERSION, TemporaryDirectory, contextlib
from ..click import progressbar
from ..exceptions import NoCandidateFound
from ..logging import log
from ..utils import (
as_tuple,
fs_str,
is_pinned_requirement,
is_url_requirement,
Expand Down Expand Up @@ -227,6 +229,47 @@ def get_dependencies(self, ireq):

return self._dependencies_cache[ireq]

def _get_project(self, ireq):
"""
Return a dict of a project info from PyPI JSON API for a given
InstallRequirement. Return None on HTTP/JSON error or if a package
is not found on PyPI server.

API reference: https://warehouse.readthedocs.io/api-reference/json/
"""
package_indexes = (
PackageIndex(url=index_url, file_storage_domain="")
for index_url in self.finder.search_scope.index_urls
)
for package_index in package_indexes:
url = "{url}/{name}/json".format(url=package_index.pypi_url, name=ireq.name)
try:
response = self.session.get(url)
except RequestException as e:
log.debug(
"Fetch package info from PyPI failed: {url}: {e}".format(
url=url, e=e
)
)
continue

# Skip this PyPI server, because there is no package
# or JSON API might be not supported
if response.status_code == 404:
continue

try:
data = response.json()
except ValueError as e:
log.debug(
"Cannot parse JSON response from PyPI: {url}: {e}".format(
url=url, e=e
)
)
continue
return data
return None

def get_hashes(self, ireq):
"""
Given an InstallRequirement, return a set of hashes that represent all
Expand Down Expand Up @@ -257,6 +300,50 @@ def get_hashes(self, ireq):
if not is_pinned_requirement(ireq):
raise TypeError("Expected pinned requirement, got {}".format(ireq))

log.debug("{}".format(ireq.name))

with log.indentation():
hashes = self._get_hashes_from_pypi(ireq)
if hashes is None:
log.log("Couldn't get hashes from PyPI, fallback to hashing files")
return self._get_hashes_from_files(ireq)

return hashes

def _get_hashes_from_pypi(self, ireq):
"""
Return a set of hashes from PyPI JSON API for a given InstallRequirement.
Return None if fetching data is failed or missing digests.
"""
project = self._get_project(ireq)
if project is None:
return None

_, version, _ = as_tuple(ireq)

try:
release_files = project["releases"][version]
except KeyError:
log.debug("Missing release files on PyPI")
return None

try:
hashes = {
"{algo}:{digest}".format(
algo=FAVORITE_HASH, digest=file_["digests"][FAVORITE_HASH]
)
for file_ in release_files
}
except KeyError:
log.debug("Missing digests of release files on PyPI")
return None

return hashes

def _get_hashes_from_files(self, ireq):
"""
Return a set of hashes for all release files of a given InstallRequirement.
"""
# We need to get all of the candidates that match our current version
# pin, these will represent all of the files that could possibly
# satisfy this constraint.
Expand All @@ -267,12 +354,9 @@ def get_hashes(self, ireq):
)
matching_candidates = candidates_by_version[matching_versions[0]]

log.debug(ireq.name)

with log.indentation():
return {
self._get_file_hash(candidate.link) for candidate in matching_candidates
}
return {
self._get_file_hash(candidate.link) for candidate in matching_candidates
}

def _get_file_hash(self, link):
log.debug("Hashing {}".format(link.url_without_fragment))
Expand Down
137 changes: 136 additions & 1 deletion tests/test_repository_pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest
from pip._internal.models.link import Link
from pip._internal.utils.urls import path_to_url
from pip._vendor.requests import Session
from pip._vendor.requests import HTTPError, Session

from piptools._compat import PIP_VERSION
from piptools.repositories import PyPIRepository
Expand Down Expand Up @@ -166,3 +166,138 @@ def test_pip_cache_dir_is_empty(from_line, tmpdir):
)

assert not pypi_repository.options.cache_dir


@pytest.mark.parametrize(
"project_data, expected_hashes",
(
pytest.param(
{"releases": {"0.1": [{"digests": {"sha256": "fake-hash"}}]}},
{"sha256:fake-hash"},
id="return single hash",
),
pytest.param(
{
"releases": {
"0.1": [
{"digests": {"sha256": "fake-hash-number1"}},
{"digests": {"sha256": "fake-hash-number2"}},
]
}
},
{"sha256:fake-hash-number1", "sha256:fake-hash-number2"},
id="return multiple hashes",
),
pytest.param(None, None, id="not found project data"),
pytest.param({}, None, id="not found releases key"),
pytest.param({"releases": {}}, None, id="not found version"),
pytest.param({"releases": {"0.1": [{}]}}, None, id="not found digests"),
pytest.param(
{"releases": {"0.1": [{"digests": {}}]}}, None, id="digests are empty"
),
pytest.param(
{"releases": {"0.1": [{"digests": {"md5": "fake-hash"}}]}},
None,
id="not found sha256 algo",
),
),
)
def test_get_hashes_from_pypi(from_line, tmpdir, project_data, expected_hashes):
"""
Test PyPIRepository._get_hashes_from_pypi() returns expected hashes or None.
"""

class MockPyPIRepository(PyPIRepository):
def _get_project(self, ireq):
return project_data

pypi_repository = MockPyPIRepository(
["--no-cache-dir"], cache_dir=str(tmpdir / "pypi-repo-cache")
)
ireq = from_line("fake-package==0.1")

actual_hashes = pypi_repository._get_hashes_from_pypi(ireq)
assert actual_hashes == expected_hashes


def test_get_project__returns_data(from_line, tmpdir, monkeypatch, pypi_repository):
"""
Test PyPIRepository._get_project() returns expected project data.
"""
expected_data = {"releases": {"0.1": [{"digests": {"sha256": "fake-hash"}}]}}

class MockResponse:
status_code = 200

@staticmethod
def json():
return expected_data

def mock_get(*args, **kwargs):
return MockResponse()

monkeypatch.setattr(pypi_repository.session, "get", mock_get)
ireq = from_line("fake-package==0.1")

actual_data = pypi_repository._get_project(ireq)
assert actual_data == expected_data


def test_get_project__handles_http_error(
from_line, tmpdir, monkeypatch, pypi_repository
):
"""
Test PyPIRepository._get_project() returns None if HTTP error is raised.
"""

def mock_get(*args, **kwargs):
raise HTTPError("test http error")

monkeypatch.setattr(pypi_repository.session, "get", mock_get)
ireq = from_line("fake-package==0.1")

actual_data = pypi_repository._get_project(ireq)
assert actual_data is None


def test_get_project__handles_json_decode_error(
from_line, tmpdir, monkeypatch, pypi_repository
):
"""
Test PyPIRepository._get_project() returns None if JSON decode error is raised.
"""

class MockResponse:
status_code = 200

@staticmethod
def json():
raise ValueError("test json error")

def mock_get(*args, **kwargs):
return MockResponse()

monkeypatch.setattr(pypi_repository.session, "get", mock_get)
ireq = from_line("fake-package==0.1")

actual_data = pypi_repository._get_project(ireq)
assert actual_data is None


def test_get_project__handles_404(from_line, tmpdir, monkeypatch, pypi_repository):
"""
Test PyPIRepository._get_project() returns None if PyPI
response's status code is 404.
"""

class MockResponse:
status_code = 404

def mock_get(*args, **kwargs):
return MockResponse()

monkeypatch.setattr(pypi_repository.session, "get", mock_get)
ireq = from_line("fake-package==0.1")

actual_data = pypi_repository._get_project(ireq)
assert actual_data is None