Skip to content

Commit

Permalink
Merge pull request #589 from bioimage-io/resolve_ids
Browse files Browse the repository at this point in the history
Resolve version specific collection IDs
  • Loading branch information
FynnBe authored Apr 30, 2024
2 parents 0debec1 + 9a413e8 commit 2c3b5e6
Show file tree
Hide file tree
Showing 9 changed files with 205 additions and 13 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ Made with [contrib.rocks](https://contrib.rocks).

### bioimageio.spec Python package

#### bioimageio.spec 0.5.2post2

* resolve version (un)specific collection IDs, e.g. `load_description('affable-shark')`, `load_description('affable-shark/1')`

#### bioimageio.spec 0.5.2post1

* fix model packaging with weights format priority
Expand Down
2 changes: 1 addition & 1 deletion bioimageio/spec/VERSION
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"version": "0.5.2post1"
"version": "0.5.2post2"
}
39 changes: 29 additions & 10 deletions bioimageio/spec/_internal/_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,30 +14,49 @@ class Settings(BaseSettings, extra="ignore"):
env_prefix="BIOIMAGEIO_", env_file=".env", env_file_encoding="utf-8"
)

github_username: Optional[str] = None
"""GitHub username for API requests"""
cache_path: Path = pooch.os_cache("bioimageio")
"""bioimageio cache location"""

github_token: Optional[str] = None
"""GitHub token for API requests"""
collection: str = (
"https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/collection.json"
)
"""url to bioimageio collection.json to resolve collection specific resource IDs.
"""

log_warnings: bool = True
"""log validation warnings to console"""
collection_staged: str = (
"https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/collection_staged.json"
)
"""url to bioimageio collection_staged.json to resolve collection specific, staged
resource IDs."""

resolve_staged: bool = True
"""Flag to resolve staged resource versions following the pattern
<resource id>/staged/<stage number>.
Note that anyone may stage a new resource version and that such a staged version
may not have been reviewed.
Set this flag to False to avoid this potential security risk."""

perform_io_checks: bool = True
"""wether or not to perform validation that requires file io,
e.g. downloading a remote files.
Existence of local absolute file paths is still being checked."""
Existence of any local absolute file paths is still being checked."""

log_warnings: bool = True
"""log validation warnings to console"""

github_username: Optional[str] = None
"""GitHub username for API requests"""

github_token: Optional[str] = None
"""GitHub token for API requests"""

CI: Annotated[Union[bool, str], Field(alias="CI")] = False
"""wether or not the execution happens in a continuous integration (CI) environment"""

user_agent: Optional[str] = None
"""user agent for http requests"""

cache_path: Path = pooch.os_cache("bioimageio")
"""bioimageio cache location"""

@property
def github_auth(self):
if self.github_username is None or self.github_token is None:
Expand Down
2 changes: 2 additions & 0 deletions bioimageio/spec/_internal/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,8 @@ def download(
strict_source = strict_source.absolute

if isinstance(strict_source, PurePath):
if not strict_source.exists():
raise FileNotFoundError(strict_source)
local_source = strict_source
root: Union[RootHttpUrl, DirectoryPath] = strict_source.parent
else:
Expand Down
152 changes: 150 additions & 2 deletions bioimageio/spec/_internal/io_utils.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,38 @@
import io
import warnings
from contextlib import nullcontext
from functools import lru_cache
from pathlib import Path
from typing import IO, Any, Dict, Mapping, Optional, TextIO, Union, cast
from typing import (
IO,
Any,
Dict,
List,
Mapping,
NamedTuple,
Optional,
TextIO,
Union,
cast,
)
from zipfile import ZipFile, is_zipfile

import numpy
import requests
from loguru import logger
from numpy.typing import NDArray
from pydantic import DirectoryPath, FilePath, NewPath
from ruyaml import YAML
from typing_extensions import Unpack

from ._settings import settings
from .io import (
BIOIMAGEIO_YAML,
BioimageioYamlContent,
FileDescr,
HashKwargs,
OpenedBioimageioYaml,
Sha256,
YamlValue,
download,
find_bioimageio_yaml_file_name,
Expand Down Expand Up @@ -68,7 +84,44 @@ def _sanitize_bioimageio_yaml(content: YamlValue) -> BioimageioYamlContent:
def open_bioimageio_yaml(
source: PermissiveFileSource, /, **kwargs: Unpack[HashKwargs]
) -> OpenedBioimageioYaml:
downloaded = download(source, **kwargs)
try:
downloaded = download(source, **kwargs)
except Exception:
# check if `source` is a collection id
if (
not isinstance(source, str)
or not isinstance(settings.collection, str)
or "/" not in settings.collection
):
raise

collection = get_collection()
if source not in collection:
if "/staged/" in source:
if settings.resolve_staged:
collection_url = settings.collection_staged
else:
collection_url = ""
logger.error(
"Did not try to resolve '{}' as BIOIMAGEIO_RESOLVE_STAGED is set to False",
source,
)
else:
collection_url = settings.collection

logger.error("'{}' not found in collection {}", source, collection_url)
raise

entry = collection[source]
logger.info(
"{} loading {} {} from {}",
entry.emoji,
entry.id,
entry.version,
entry.url,
)
downloaded = download(entry.url, sha256=entry.sha256)

local_source = downloaded.path
root = downloaded.original_root

Expand All @@ -84,6 +137,101 @@ def open_bioimageio_yaml(
return OpenedBioimageioYaml(content, root, downloaded.original_file_name)


class _CollectionEntry(NamedTuple):
id: str
emoji: str
url: str
sha256: Optional[Sha256]
version: str


def _get_one_collection(url: str):
ret: Dict[str, _CollectionEntry] = {}
if not isinstance(url, str) or "/" not in url:
logger.error("invalid collection url: {}", url)
try:
collection: List[Dict[Any, Any]] = requests.get(url).json().get("collection")
except Exception as e:
logger.error("failed to get {}: {}", url, e)
return ret

if not isinstance(collection, list):
logger.error("`collection` field of {} has type {}", url, type(collection))
return ret

for entry in collection:
if not isinstance(entry, dict):
logger.error("entry has type {}", type(entry))
continue
if not isinstance(entry["id"], str):
logger.error("entry['id'] has type {}", type(entry["id"]))
continue
if not isinstance(entry["id_emoji"], str):
logger.error(
"{}.id_emoji has type {}", entry["id"], type(entry["id_emoji"])
)
continue
if not isinstance(entry["entry_source"], str):
logger.error(
"{}.entry_source has type {}", entry["id"], type(entry["entry_source"])
)
continue
if not isinstance(entry["entry_sha256"], str):
logger.error(
"{}.entry_sha256 has type {}", entry["id"], type(entry["entry_sha256"])
)
continue

c_entry = _CollectionEntry(
entry["id"],
entry["id_emoji"],
entry["entry_source"],
(
None
if entry.get("entry_sha256") is None
else Sha256(entry["entry_sha256"])
),
version=str(entry["version_number"]),
)
# set version specific entry
ret[c_entry.id + "/" + str(entry["version_number"])] = c_entry

# update 'latest version' entry
if c_entry.id not in ret:
update = True
else:
old_v = ret[c_entry.id].version
v = c_entry.version

if old_v.startswith("staged"):
update = not v.startswith("staged") or int(
v.replace("staged/", "")
) > int(old_v.replace("staged/", ""))
else:
update = not v.startswith("staged") and int(v) > int(old_v)

if update:
ret[c_entry.id] = c_entry

return ret


@lru_cache
def get_collection() -> Mapping[str, _CollectionEntry]:
try:
if settings.resolve_staged:
ret = _get_one_collection(settings.collection_staged)
else:
ret = {}

ret.update(_get_one_collection(settings.collection))
return ret

except Exception as e:
logger.error("failed to get resource id mapping: {}", e)
return {}


def unzip(
zip_file: Union[FilePath, ZipFile],
out_path: Optional[DirectoryPath] = None,
Expand Down
1 change: 1 addition & 0 deletions bioimageio/spec/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from ._internal.io import download as download
from ._internal.io import extract_file_name as extract_file_name
from ._internal.io import get_sha256 as get_sha256
from ._internal.io import (
identify_bioimageio_yaml_file_name as identify_bioimageio_yaml_file_name,
)
Expand Down
1 change: 1 addition & 0 deletions dev/env.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ dependencies:
- pyright
- pooch
- pre-commit
- requests
- rich
- typing-extensions
- pytest
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
"pydantic>=2.6.3",
"python-dateutil",
"python-dotenv",
"requests",
"rich",
"ruyaml",
"tqdm",
Expand Down
16 changes: 16 additions & 0 deletions tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,19 @@ def test_load_non_existing_rdf():

with pytest.raises(FileNotFoundError):
_ = load_description(spec_path)


@pytest.mark.parametrize(
"rid",
[
"invigorating-lab-coat",
"invigorating-lab-coat/1",
"invigorating-lab-coat/staged/1",
],
)
def test_load_by_id(rid: str):
from bioimageio.spec import InvalidDescr, load_description

model = load_description(rid)
assert not isinstance(model, InvalidDescr)
assert model.id == rid.split("/")[0]

0 comments on commit 2c3b5e6

Please sign in to comment.