Skip to content

Commit

Permalink
Fix get tags (#68)
Browse files Browse the repository at this point in the history
* honor pagination when getting tags and adjust API to match types
add raise_for_status and test getting many tags from ghcr.io

* add general function to get tags

this extends the updated get tags function (with pagination!) to
use a general function, so a future caller can use the same functionality.

* use callable in paginated request, and use links dictionary of requests to find link
* update types for tags function, add more tests and docs

Signed-off-by: Wolf Vollprecht <[email protected]>
  • Loading branch information
wolfv authored Feb 4, 2023
1 parent cb88582 commit 7e2a092
Show file tree
Hide file tree
Showing 11 changed files with 128 additions and 20 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ and **Merged pull requests**. Critical items to know are:
The versions coincide with releases on pip. Only major versions will be released as tags on Github.

## [0.0.x](https://github.com/oras-project/oras-py/tree/main) (0.0.x)
- pagination for tags (and general function for pagination) (0.1.14)
- expose upload_blob function to be consistent (0.1.13)
- ensure we always strip path separators before pull/push (0.1.12)
- exposing download_blob to the user since it uses streaming (0.1.11)
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
project = "Oras Python"
html_title = "Oras Python"

copyright = "2022, Oras Python Developers"
copyright = "2023, Oras Python Developers"
author = "@vsoch"

# The full version, including alpha/beta/rc tags
Expand Down
24 changes: 24 additions & 0 deletions docs/getting_started/user-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,30 @@ you should do blobs (layers) and the config first.

</details>

### Tags

We provide a simple "get_tags" function to make it easy to instantiate a client and ask for tags from
a registry. Let's say we want to get tags from conda-forge. We could create a client:

```python
import oras.client

client = oras.client.OrasClient(hostname="ghcr.io", insecure=False)
```

And then ask for either a specific number of tags:

```python
tags = client.get_tags("channel-mirrors/conda-forge/linux-aarch64/arrow-cpp", N=1005)
```

Or more likely, just ask for all tags (the default).

```python
tags = client.get_tags("channel-mirrors/conda-forge/linux-aarch64/arrow-cpp")
```
You can read more about how registries provide tags [at the distribution spec](https://github.com/opencontainers/distribution-spec/blob/067a0f5b0e256583bb9a088f72cba85ed043d1d2/spec.md?plain=1#L471-L513).

### Push Interactions

Let's start with a very basic push interaction, and this one
Expand Down
1 change: 0 additions & 1 deletion examples/conda-mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ class CondaMirror(oras.provider.Registry):
}

def inspect(self, name):

# Parse the name into a container
container = self.get_container(name)

Expand Down
6 changes: 3 additions & 3 deletions oras/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,16 @@ def version(self, return_items: bool = False) -> Union[dict, str]:
# Otherwise return a string that can be printed
return "\n".join(["%s: %s" % (k, v) for k, v in versions.items()])

def get_tags(self, name: str, N: int = 10_000) -> List[str]:
def get_tags(self, name: str, N: int = -1) -> List[str]:
"""
Retrieve tags for a package.
:param name: container URI to parse
:type name: str
:param N: number of tags
:param N: number of tags (-1 to get all tags)
:type N: int
"""
return self.remote.get_tags(name, N=N).json()
return self.remote.get_tags(name, N=N)

def push(self, *args, **kwargs):
"""
Expand Down
2 changes: 1 addition & 1 deletion oras/defaults.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
__author__ = "Vanessa Sochat"
__copyright__ = "Copyright 2021-2022, Vanessa Sochat"
__copyright__ = "Copyright The ORAS Authors"
__license__ = "Apache-2.0"


Expand Down
70 changes: 62 additions & 8 deletions oras/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

import copy
import os
from typing import List, Optional, Tuple, Union
import urllib
from typing import Callable, List, Optional, Tuple, Union

import jsonschema
import requests
Expand Down Expand Up @@ -226,18 +227,73 @@ def upload_blob(

@ensure_container
def get_tags(
self, container: Union[str, oras.container.Container], N: int = 10_000
self, container: Union[str, oras.container.Container], N: int = -1
) -> List[str]:
"""
Retrieve tags for a package.
:param container: parsed container URI
:type container: oras.container.Container or str
:param N: number of tags
:type N: int
:param N: limit number of tags, -1 for all (default)
:type N: Optional[int]
"""
# -1 is a flag for retrieving all, if set we use arbitrarily high number
retrieve_all = N == -1
N = N if (N and N > 0) else 10_0000

tags_url = f"{self.prefix}://{container.tags_url(N=N)}" # type: ignore
tags: List[str] = []

def extract_tags(response: requests.Response) -> bool:
"""
Determine if we should continue based on new tags and under limit.
"""
json = response.json()
new_tags = json.get("tags", [])
tags.extend(new_tags)
return bool(len(new_tags) and (retrieve_all or len(tags) < N))

self._do_paginated_request(tags_url, callable=extract_tags)

# If we got a longer set than was asked for
if len(tags) > N:
tags = tags[:N]
return tags

def _do_paginated_request(
self, url: str, callable: Callable[[requests.Response], bool]
):
"""
Paginate a request for a URL.
We look for the "Link" header to get the next URL to ping. If
the callable returns True, we continue to the next page, otherwise
we stop.
"""
tags_url = f"{self.prefix}://{container.tags_url(N)}" # type: ignore
return self.do_request(tags_url, "GET", headers=self.headers)

# Save the base url to add parameters to, assuming only the params change
parts = urllib.parse.urlparse(url)
base_url = f"{parts.scheme}://{parts.netloc}"

# get all results using the pagination
while True:
response = self.do_request(url, "GET", headers=self.headers)

# Check 200 response, show errors if any
self._check_200_response(response)

want_more = callable(response)
if not want_more:
break

link = response.links.get("next", {}).get("url")

# Get the next link
if not link:
break

# use link + base url to continue with next page
url = f"{base_url}{link}"

@ensure_container
def get_blob(
Expand Down Expand Up @@ -548,7 +604,6 @@ def push(self, *args, **kwargs) -> requests.Response:

# Upload files as blobs
for blob in kwargs.get("files", []):

# You can provide a blob + content type
if ":" in str(blob):
blob, media_type = str(blob).split(":", 1)
Expand Down Expand Up @@ -809,7 +864,6 @@ def authenticate_request(self, originalResponse: requests.Response) -> bool:
h = oras.auth.parse_auth_header(authHeaderRaw)

if "Authorization" not in headers:

# First try to request an anonymous token
logger.debug("No Authorization, requesting anonymous token")
if self.request_anonymous_token(h):
Expand Down
38 changes: 35 additions & 3 deletions oras/tests/test_oras.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,7 @@ def test_basic_push_pull(tmp_path):

# Test getting tags
tags = client.get_tags(target)
for key in ["name", "tags"]:
assert key in tags
assert "v1" in tags["tags"]
assert "v1" in tags

# Test pulling elsewhere
files = client.pull(target=target, outdir=tmp_path)
Expand All @@ -94,6 +92,40 @@ def test_basic_push_pull(tmp_path):
assert res.status_code == 201


def test_get_many_tags():
"""
Test getting many tags
"""
client = oras.client.OrasClient(hostname="ghcr.io", insecure=False)

# Test getting tags with a limit set
tags = client.get_tags(
"channel-mirrors/conda-forge/linux-aarch64/arrow-cpp", N=1005
)
assert len(tags) == 1005

# This should retrieve all tags (defaults to -1)
tags = client.get_tags("channel-mirrors/conda-forge/linux-aarch64/arrow-cpp")
assert len(tags) > 1500

# Same result (assuming doesn't change in small seconds between)
same_tags = client.get_tags(
"channel-mirrors/conda-forge/linux-aarch64/arrow-cpp", N=-1
)
assert not set(tags).difference(set(same_tags))

# None defaults to -1 too
same_tags = client.get_tags(
"channel-mirrors/conda-forge/linux-aarch64/arrow-cpp", N=None
)
assert not set(tags).difference(set(same_tags))

# Small number of tags
tags = client.get_tags("channel-mirrors/conda-forge/linux-aarch64/arrow-cpp", N=10)
assert not set(tags).difference(set(same_tags))
assert len(tags) == 10


@pytest.mark.skipif(with_auth, reason="token auth is needed for push and pull")
def test_directory_push_pull(tmp_path):
"""
Expand Down
1 change: 0 additions & 1 deletion oras/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ def test_write_bad_json(tmp_path):


def test_write_json(tmp_path):

good_json = {"Wakkawakkawakka": [True, "2", 3]}
tmpfile = str(tmp_path / "good_json_file.txt")

Expand Down
2 changes: 1 addition & 1 deletion oras/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
__copyright__ = "Copyright The ORAS Authors."
__license__ = "Apache-2.0"

__version__ = "0.1.13"
__version__ = "0.1.14"
AUTHOR = "Vanessa Sochat"
EMAIL = "[email protected]"
NAME = "oras"
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ def get_reqs(lookup=None, key="INSTALL_REQUIRES"):
################################################################################

if __name__ == "__main__":

INSTALL_REQUIRES = get_reqs(lookup)
TESTS_REQUIRES = get_reqs(lookup, "TESTS_REQUIRES")
INSTALL_REQUIRES_ALL = get_reqs(lookup, "INSTALL_REQUIRES_ALL")
Expand Down

0 comments on commit 7e2a092

Please sign in to comment.