From 7781e31880f7994e9b75905728a4d7cb40e73732 Mon Sep 17 00:00:00 2001 From: Wolf Vollprecht Date: Fri, 3 Feb 2023 20:09:25 +0100 Subject: [PATCH] honor pagination when getting tags and adjust API to match types add raise_for_status and test getting many tags from ghcr.io Signed-off-by: Wolf Vollprecht --- examples/conda-mirror.py | 1 - oras/client.py | 2 +- oras/container.py | 4 +++- oras/provider.py | 33 +++++++++++++++++++++++++++++---- oras/tests/test_oras.py | 15 ++++++++++++--- oras/tests/test_utils.py | 1 - setup.py | 1 - 7 files changed, 45 insertions(+), 12 deletions(-) diff --git a/examples/conda-mirror.py b/examples/conda-mirror.py index 1b5d795..a74be55 100644 --- a/examples/conda-mirror.py +++ b/examples/conda-mirror.py @@ -33,7 +33,6 @@ class CondaMirror(oras.provider.Registry): } def inspect(self, name): - # Parse the name into a container container = self.get_container(name) diff --git a/oras/client.py b/oras/client.py index e3a6628..a3ac500 100644 --- a/oras/client.py +++ b/oras/client.py @@ -103,7 +103,7 @@ def get_tags(self, name: str, N: int = 10_000) -> List[str]: :param N: number of tags :type N: int """ - return self.remote.get_tags(name, N=N).json() + return self.remote.get_tags(name, N=N) def push(self, *args, **kwargs): """ diff --git a/oras/container.py b/oras/container.py index 4761344..884d7fd 100644 --- a/oras/container.py +++ b/oras/container.py @@ -54,7 +54,9 @@ def get_blob_url(self, digest: str) -> str: def upload_blob_url(self) -> str: return f"{self.registry}/v2/{self.api_prefix}/blobs/uploads/" - def tags_url(self, N=10_000) -> str: + def tags_url(self, N=10_000, query=None) -> str: + if query: + return f"{self.registry}/v2/{self.api_prefix}/tags/list?{query}" return f"{self.registry}/v2/{self.api_prefix}/tags/list?n={N}" def put_manifest_url(self) -> str: diff --git a/oras/provider.py b/oras/provider.py index 111c031..20d5806 100644 --- a/oras/provider.py +++ b/oras/provider.py @@ -4,6 +4,7 @@ import copy import os +import urllib from typing import List, Optional, Tuple, Union import jsonschema @@ -236,8 +237,34 @@ def get_tags( :param N: number of tags :type N: int """ - tags_url = f"{self.prefix}://{container.tags_url(N)}" # type: ignore - return self.do_request(tags_url, "GET", headers=self.headers) + tags_url = f"{self.prefix}://{container.tags_url(N=N)}" # type: ignore + + tags: List[str] = [] + has_next_link = True + # get all tags using the pagination + while len(tags) < N and has_next_link: + res = self.do_request(tags_url, "GET", headers=self.headers) + + # raise before trying to get `json` value + res.raise_for_status() + + if res.headers.get("Link"): + link = res.headers.get("Link") + # if we have a next link, that looks something like: + # ; rel="next" + # we want to extract the url and get the rest of the tags + assert link.endswith('; rel="next"') + next_link = link[link.find("<") + 1 : link.find(">")] + query = urllib.parse.urlparse(next_link).query + tags_url = f"{self.prefix}://{container.tags_url(query=query)}" # type: ignore + else: + has_next_link = False + + # if the package does not exist, the response is an + # {"errors":[{"code":"NAME_UNKNOWN","message":"repository name not known to registry"}]} + tags += res.json().get("tags", []) + + return tags @ensure_container def get_blob( @@ -548,7 +575,6 @@ def push(self, *args, **kwargs) -> requests.Response: # Upload files as blobs for blob in kwargs.get("files", []): - # You can provide a blob + content type if ":" in str(blob): blob, media_type = str(blob).split(":", 1) @@ -809,7 +835,6 @@ def authenticate_request(self, originalResponse: requests.Response) -> bool: h = oras.auth.parse_auth_header(authHeaderRaw) if "Authorization" not in headers: - # First try to request an anonymous token logger.debug("No Authorization, requesting anonymous token") if self.request_anonymous_token(h): diff --git a/oras/tests/test_oras.py b/oras/tests/test_oras.py index dfea823..e64e956 100644 --- a/oras/tests/test_oras.py +++ b/oras/tests/test_oras.py @@ -72,9 +72,7 @@ def test_basic_push_pull(tmp_path): # Test getting tags tags = client.get_tags(target) - for key in ["name", "tags"]: - assert key in tags - assert "v1" in tags["tags"] + assert "v1" in tags # Test pulling elsewhere files = client.pull(target=target, outdir=tmp_path) @@ -94,6 +92,17 @@ def test_basic_push_pull(tmp_path): assert res.status_code == 201 +def test_get_many_tags(): + """ + Test getting many tags + """ + client = oras.client.OrasClient(hostname="ghcr.io", insecure=False) + tags = client.get_tags( + "channel-mirrors/conda-forge/linux-aarch64/arrow-cpp", N=100000 + ) + assert len(tags) > 1000 + + @pytest.mark.skipif(with_auth, reason="token auth is needed for push and pull") def test_directory_push_pull(tmp_path): """ diff --git a/oras/tests/test_utils.py b/oras/tests/test_utils.py index deb3e13..a08c3b5 100644 --- a/oras/tests/test_utils.py +++ b/oras/tests/test_utils.py @@ -45,7 +45,6 @@ def test_write_bad_json(tmp_path): def test_write_json(tmp_path): - good_json = {"Wakkawakkawakka": [True, "2", 3]} tmpfile = str(tmp_path / "good_json_file.txt") diff --git a/setup.py b/setup.py index 1203271..ad6234c 100644 --- a/setup.py +++ b/setup.py @@ -63,7 +63,6 @@ def get_reqs(lookup=None, key="INSTALL_REQUIRES"): ################################################################################ if __name__ == "__main__": - INSTALL_REQUIRES = get_reqs(lookup) TESTS_REQUIRES = get_reqs(lookup, "TESTS_REQUIRES") INSTALL_REQUIRES_ALL = get_reqs(lookup, "INSTALL_REQUIRES_ALL")