From 2b3b4f5ee9c575f035421903ea5f83553133a70b Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 12 Jan 2022 16:18:19 +0100 Subject: [PATCH 01/17] set id in collection entries --- bioimageio/spec/collection/v0_2/converters.py | 7 +++++++ bioimageio/spec/commands.py | 8 +++++++- bioimageio/spec/model/v0_4/converters.py | 2 -- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/bioimageio/spec/collection/v0_2/converters.py b/bioimageio/spec/collection/v0_2/converters.py index 6058c0a39..102b57024 100644 --- a/bioimageio/spec/collection/v0_2/converters.py +++ b/bioimageio/spec/collection/v0_2/converters.py @@ -8,10 +8,17 @@ def maybe_convert(data: Dict[str, Any]) -> Dict[str, Any]: # move all type groups to the 'collection' field if "collection" not in data: data["collection"] = [] + for group in ["application", "model", "dataset", "notebook"]: if group in data: data["collection"] += data[group] + config = data.get("config") + if config and isinstance(config, dict): + id_ = config.pop("id", data.get("id")) + if id_ is not None: + data["id"] = id_ + data["format_version"] = "0.2.2" return maybe_convert_rdf(data) diff --git a/bioimageio/spec/commands.py b/bioimageio/spec/commands.py index a1ca1bf97..c3fca7922 100644 --- a/bioimageio/spec/commands.py +++ b/bioimageio/spec/commands.py @@ -80,6 +80,7 @@ def validate( if raw_rd is not None and raw_rd.type == "collection": assert hasattr(raw_rd, "collection") + seen_ids = set() for idx, entry in enumerate(raw_rd.collection): # type: ignore entry_error: Optional[str] = None rdf_update = entry.rdf_update @@ -90,7 +91,7 @@ def validate( rdf_data.pop("collection") # ... without the collection field to avoid recursion root_id = rdf_data.pop("id", missing) - # update rdf entry with entrie's rdf_source + # update rdf entry with entry's rdf_source sub_id: Union[str, _Missing] = missing if entry.rdf_source is not missing: try: @@ -106,8 +107,13 @@ def validate( sub_id = rdf_update.pop("id", sub_id) if sub_id is missing: entry_error = f"collection[{idx}]: Missing `id` field for collection entry" + elif sub_id in seen_ids: + entry_error = f"collection[{idx}]: Duplicate `id` value {sub_id} for collection entry" + else: + seen_ids.add(sub_id) rdf_data.update(rdf_update) + rdf_data["id"] = f"{root_id}/{sub_id}" if entry_error: entry_summary = {"error": entry_error} diff --git a/bioimageio/spec/model/v0_4/converters.py b/bioimageio/spec/model/v0_4/converters.py index ba99d72ba..d378082b1 100644 --- a/bioimageio/spec/model/v0_4/converters.py +++ b/bioimageio/spec/model/v0_4/converters.py @@ -3,8 +3,6 @@ from marshmallow import missing -from bioimageio.spec.exceptions import UnconvertibleError - def convert_model_from_v0_3_to_0_4_0(data: Dict[str, Any]) -> Dict[str, Any]: from bioimageio.spec.model import v0_3 From f7f74b8e72839853e156854643b3b55ded2b91d0 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 12 Jan 2022 16:18:59 +0100 Subject: [PATCH 02/17] refactor resolve_collection_entries --- bioimageio/spec/collection/v0_2/utils.py | 49 ++++++++++++++++++++++++ bioimageio/spec/commands.py | 38 ++---------------- 2 files changed, 52 insertions(+), 35 deletions(-) diff --git a/bioimageio/spec/collection/v0_2/utils.py b/bioimageio/spec/collection/v0_2/utils.py index e67e9b1f0..22f34e19f 100644 --- a/bioimageio/spec/collection/v0_2/utils.py +++ b/bioimageio/spec/collection/v0_2/utils.py @@ -1,5 +1,54 @@ +from typing import List, Optional, Tuple, Union + +from marshmallow import missing +from marshmallow.utils import _Missing + from . import raw_nodes def filter_resource_description(raw_rd: raw_nodes.RDF) -> raw_nodes.RDF: return raw_rd + + +def resolve_collection_entries(raw_rd: raw_nodes.Collection) -> List[Tuple[dict, Optional[str]]]: + from bioimageio.spec import serialize_raw_resource_description_to_dict + from bioimageio.spec.shared.utils import resolve_rdf_source + + ret = [] + seen_ids = set() + for idx, entry in enumerate(raw_rd.collection): # type: ignore + entry_error: Optional[str] = None + rdf_update = entry.rdf_update + id_info = f"(id={rdf_update['id']}) " if "id" in rdf_update else "" + + # rdf entries are based on collection RDF... + rdf_data = serialize_raw_resource_description_to_dict(raw_rd) + rdf_data.pop("collection") # ... without the collection field to avoid recursion + + root_id = rdf_data.pop("id", missing) + # update rdf entry with entry's rdf_source + sub_id: Union[str, _Missing] = missing + if entry.rdf_source is not missing: + try: + rdf_update, _, _ = resolve_rdf_source(entry.rdf_source) + except Exception as e: + entry_error = f"collection[{idx}]: {id_info}Invalid rdf_source: {e}" + else: + sub_id = rdf_update.pop("id", missing) + rdf_data.update(rdf_update) + + # update rdf entry with fields specified directly in the entry + rdf_update = dict(entry.rdf_update) + sub_id = rdf_update.pop("id", sub_id) + if sub_id is missing: + entry_error = f"collection[{idx}]: Missing `id` field for collection entry" + elif sub_id in seen_ids: + entry_error = f"collection[{idx}]: Duplicate `id` value {sub_id} for collection entry" + else: + seen_ids.add(sub_id) + + rdf_data.update(rdf_update) + rdf_data["id"] = f"{root_id}/{sub_id}" + ret.append((rdf_data, entry_error)) + + return ret diff --git a/bioimageio/spec/commands.py b/bioimageio/spec/commands.py index c3fca7922..2edb774dd 100644 --- a/bioimageio/spec/commands.py +++ b/bioimageio/spec/commands.py @@ -7,6 +7,7 @@ from marshmallow import ValidationError, missing from marshmallow.utils import _Missing +from .collection.v0_2.utils import resolve_collection_entries from .io_ import ( load_raw_resource_description, resolve_rdf_source, @@ -80,41 +81,7 @@ def validate( if raw_rd is not None and raw_rd.type == "collection": assert hasattr(raw_rd, "collection") - seen_ids = set() - for idx, entry in enumerate(raw_rd.collection): # type: ignore - entry_error: Optional[str] = None - rdf_update = entry.rdf_update - id_info = f"(id={rdf_update['id']}) " if "id" in rdf_update else "" - - # rdf entries are based on collection RDF... - rdf_data = serialize_raw_resource_description_to_dict(raw_rd) - rdf_data.pop("collection") # ... without the collection field to avoid recursion - - root_id = rdf_data.pop("id", missing) - # update rdf entry with entry's rdf_source - sub_id: Union[str, _Missing] = missing - if entry.rdf_source is not missing: - try: - rdf_update, _, _ = resolve_rdf_source(entry.rdf_source) - except Exception as e: - entry_error = f"collection[{idx}]: {id_info}Invalid rdf_source: {e}" - else: - sub_id = rdf_update.pop("id", missing) - rdf_data.update(rdf_update) - - # update rdf entry with fields specified directly in the entry - rdf_update = dict(entry.rdf_update) - sub_id = rdf_update.pop("id", sub_id) - if sub_id is missing: - entry_error = f"collection[{idx}]: Missing `id` field for collection entry" - elif sub_id in seen_ids: - entry_error = f"collection[{idx}]: Duplicate `id` value {sub_id} for collection entry" - else: - seen_ids.add(sub_id) - - rdf_data.update(rdf_update) - rdf_data["id"] = f"{root_id}/{sub_id}" - + for idx, (rdf_data, entry_error) in resolve_collection_entries(raw_rd): # type: ignore if entry_error: entry_summary = {"error": entry_error} else: @@ -124,6 +91,7 @@ def validate( wrns: Union[str, dict] = entry_summary.get("warnings", {}) assert isinstance(wrns, dict) + id_info = f"(id={rdf_data['id']}) " if "id" in rdf_data else "" for k, v in wrns.items(): warnings.warn(f"collection[{idx}]:{k}: {id_info}{v}", category=ValidationWarning) From cf19c585efda4e73eccf74c7fc8e43bcc7fa0654 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 12 Jan 2022 16:27:36 +0100 Subject: [PATCH 03/17] fix enumerate --- bioimageio/spec/commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/spec/commands.py b/bioimageio/spec/commands.py index 2edb774dd..74424c4f4 100644 --- a/bioimageio/spec/commands.py +++ b/bioimageio/spec/commands.py @@ -81,7 +81,7 @@ def validate( if raw_rd is not None and raw_rd.type == "collection": assert hasattr(raw_rd, "collection") - for idx, (rdf_data, entry_error) in resolve_collection_entries(raw_rd): # type: ignore + for idx, (rdf_data, entry_error) in enumerate(resolve_collection_entries(raw_rd)): # type: ignore if entry_error: entry_summary = {"error": entry_error} else: From d15daa0a48d487608496a34624fe4118248e2c2e Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 12 Jan 2022 16:29:29 +0100 Subject: [PATCH 04/17] fix raw_nodes.RDF --- bioimageio/spec/rdf/v0_2/raw_nodes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bioimageio/spec/rdf/v0_2/raw_nodes.py b/bioimageio/spec/rdf/v0_2/raw_nodes.py index fe2033d57..327b26eab 100644 --- a/bioimageio/spec/rdf/v0_2/raw_nodes.py +++ b/bioimageio/spec/rdf/v0_2/raw_nodes.py @@ -84,10 +84,12 @@ class RDF(ResourceDescription): documentation: Path = missing format_version: FormatVersion = missing git_repo: Union[_Missing, str] = missing + id: Union[_Missing, str] = missing icon: Union[_Missing, str] = missing license: Union[_Missing, str] = missing links: Union[_Missing, List[str]] = missing maintainers: Union[_Missing, List[Maintainer]] = missing + rdf_source: Union[_Missing, URI] = missing tags: List[str] = missing # manual __init__ to allow for unknown kwargs From 7f38532207334b12211038c45fcc83ef8e16a591 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 12 Jan 2022 16:30:40 +0100 Subject: [PATCH 05/17] briefer entry errors --- bioimageio/spec/collection/v0_2/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bioimageio/spec/collection/v0_2/utils.py b/bioimageio/spec/collection/v0_2/utils.py index 22f34e19f..28d7ba0d0 100644 --- a/bioimageio/spec/collection/v0_2/utils.py +++ b/bioimageio/spec/collection/v0_2/utils.py @@ -41,9 +41,9 @@ def resolve_collection_entries(raw_rd: raw_nodes.Collection) -> List[Tuple[dict, rdf_update = dict(entry.rdf_update) sub_id = rdf_update.pop("id", sub_id) if sub_id is missing: - entry_error = f"collection[{idx}]: Missing `id` field for collection entry" + entry_error = f"collection[{idx}]: Missing `id` field" elif sub_id in seen_ids: - entry_error = f"collection[{idx}]: Duplicate `id` value {sub_id} for collection entry" + entry_error = f"collection[{idx}]: Duplicate `id` value {sub_id}" else: seen_ids.add(sub_id) From 44b9ad1a88186e110cbee3fb62396f6c06ce17e8 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 12 Jan 2022 16:46:02 +0100 Subject: [PATCH 06/17] cosmetics in resolve_collection_entries --- bioimageio/spec/collection/v0_2/utils.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/bioimageio/spec/collection/v0_2/utils.py b/bioimageio/spec/collection/v0_2/utils.py index 28d7ba0d0..9a2fd4470 100644 --- a/bioimageio/spec/collection/v0_2/utils.py +++ b/bioimageio/spec/collection/v0_2/utils.py @@ -18,8 +18,7 @@ def resolve_collection_entries(raw_rd: raw_nodes.Collection) -> List[Tuple[dict, seen_ids = set() for idx, entry in enumerate(raw_rd.collection): # type: ignore entry_error: Optional[str] = None - rdf_update = entry.rdf_update - id_info = f"(id={rdf_update['id']}) " if "id" in rdf_update else "" + id_info = f"(id={entry.rdf_update['id']}) " if "id" in entry.rdf_update else "" # rdf entries are based on collection RDF... rdf_data = serialize_raw_resource_description_to_dict(raw_rd) @@ -30,12 +29,12 @@ def resolve_collection_entries(raw_rd: raw_nodes.Collection) -> List[Tuple[dict, sub_id: Union[str, _Missing] = missing if entry.rdf_source is not missing: try: - rdf_update, _, _ = resolve_rdf_source(entry.rdf_source) + remote_rdf_update, _, _ = resolve_rdf_source(entry.rdf_source) except Exception as e: entry_error = f"collection[{idx}]: {id_info}Invalid rdf_source: {e}" else: - sub_id = rdf_update.pop("id", missing) - rdf_data.update(rdf_update) + sub_id = remote_rdf_update.pop("id", missing) + rdf_data.update(remote_rdf_update) # update rdf entry with fields specified directly in the entry rdf_update = dict(entry.rdf_update) From 78a7a87b06c3e9d8f57ba7e63d2425c7e83e8d74 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 12 Jan 2022 17:54:58 +0100 Subject: [PATCH 07/17] fix calling dataclasses.replace on WithUnknown raw_nodes --- bioimageio/spec/collection/v0_2/raw_nodes.py | 15 ++++++++++----- bioimageio/spec/rdf/v0_2/raw_nodes.py | 10 ++++++++-- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/bioimageio/spec/collection/v0_2/raw_nodes.py b/bioimageio/spec/collection/v0_2/raw_nodes.py index b26b1a385..c285b8fae 100644 --- a/bioimageio/spec/collection/v0_2/raw_nodes.py +++ b/bioimageio/spec/collection/v0_2/raw_nodes.py @@ -5,7 +5,7 @@ RDF <--schema--> raw nodes """ import distutils.version -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path from typing import Any, Dict, List, NewType, Union @@ -30,9 +30,12 @@ class CollectionEntry(RawNode): rdf_source: Union[_Missing, URI] = missing rdf_update: Dict[str, Any] = missing - def __init__(self, rdf_source=missing, **rdf_update): + def __init__( + self, rdf_source: Union[_Missing, URI] = missing, rdf_update: Dict[str, Any] = missing, **implicit_rdf_update + ): self.rdf_source = rdf_source - self.rdf_update = rdf_update + self.rdf_update = rdf_update or {} + self.rdf_update.update(implicit_rdf_update) super().__init__() @@ -65,7 +68,8 @@ def __init__( tags: List[str], # collection RDF collection: List[CollectionEntry], - **unknown, + unknown: Dict[str, Any] = missing, + **implicitly_unknown, ): self.collection = collection super().__init__( @@ -87,4 +91,5 @@ def __init__( type=type, version=version, ) - self.unknown = unknown + self.unknown = unknown or {} + self.unknown.update(implicitly_unknown) diff --git a/bioimageio/spec/rdf/v0_2/raw_nodes.py b/bioimageio/spec/rdf/v0_2/raw_nodes.py index 327b26eab..1582a77d1 100644 --- a/bioimageio/spec/rdf/v0_2/raw_nodes.py +++ b/bioimageio/spec/rdf/v0_2/raw_nodes.py @@ -33,9 +33,15 @@ class Attachments(RawNode): files: Union[_Missing, List[Union[Path, URI]]] = missing unknown: Dict[str, Any] = missing - def __init__(self, files: Union[_Missing, List[Union[Path, URI]]] = missing, **unknown): + def __init__( + self, + files: Union[_Missing, List[Union[Path, URI]]] = missing, + unknown: Dict[str, Any] = missing, + **implicitly_unknown, + ): self.files = files - self.unknown = unknown + self.unknown = unknown or {} + self.unknown.update(implicitly_unknown) super().__init__() From b2e844466edb202e6e825f8f952d0bbf81d26572 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 12 Jan 2022 18:27:05 +0100 Subject: [PATCH 08/17] add option to overwrite collection_id --- bioimageio/spec/collection/v0_2/utils.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/bioimageio/spec/collection/v0_2/utils.py b/bioimageio/spec/collection/v0_2/utils.py index 9a2fd4470..ac6a69727 100644 --- a/bioimageio/spec/collection/v0_2/utils.py +++ b/bioimageio/spec/collection/v0_2/utils.py @@ -10,21 +10,23 @@ def filter_resource_description(raw_rd: raw_nodes.RDF) -> raw_nodes.RDF: return raw_rd -def resolve_collection_entries(raw_rd: raw_nodes.Collection) -> List[Tuple[dict, Optional[str]]]: +def resolve_collection_entries( + collection: raw_nodes.Collection, collection_id: Optional[str] = None +) -> List[Tuple[dict, Optional[str]]]: from bioimageio.spec import serialize_raw_resource_description_to_dict from bioimageio.spec.shared.utils import resolve_rdf_source ret = [] seen_ids = set() - for idx, entry in enumerate(raw_rd.collection): # type: ignore + for idx, entry in enumerate(collection.collection): # type: ignore entry_error: Optional[str] = None id_info = f"(id={entry.rdf_update['id']}) " if "id" in entry.rdf_update else "" # rdf entries are based on collection RDF... - rdf_data = serialize_raw_resource_description_to_dict(raw_rd) + rdf_data = serialize_raw_resource_description_to_dict(collection) rdf_data.pop("collection") # ... without the collection field to avoid recursion - root_id = rdf_data.pop("id", missing) + root_id = rdf_data.pop("id", None) if collection_id is None else collection_id # update rdf entry with entry's rdf_source sub_id: Union[str, _Missing] = missing if entry.rdf_source is not missing: @@ -47,7 +49,11 @@ def resolve_collection_entries(raw_rd: raw_nodes.Collection) -> List[Tuple[dict, seen_ids.add(sub_id) rdf_data.update(rdf_update) - rdf_data["id"] = f"{root_id}/{sub_id}" + if root_id is None: + rdf_data["id"] = sub_id + else: + rdf_data["id"] = f"{root_id}/{sub_id}" + ret.append((rdf_data, entry_error)) return ret From f6a31a2b8bcf45a3cbbcd5320f5acc80270f8df5 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 12 Jan 2022 20:17:49 +0100 Subject: [PATCH 09/17] fix rdf_update serialization --- bioimageio/spec/collection/v0_2/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bioimageio/spec/collection/v0_2/utils.py b/bioimageio/spec/collection/v0_2/utils.py index ac6a69727..b3199c23e 100644 --- a/bioimageio/spec/collection/v0_2/utils.py +++ b/bioimageio/spec/collection/v0_2/utils.py @@ -3,7 +3,7 @@ from marshmallow import missing from marshmallow.utils import _Missing -from . import raw_nodes +from . import raw_nodes, schema def filter_resource_description(raw_rd: raw_nodes.RDF) -> raw_nodes.RDF: @@ -39,7 +39,7 @@ def resolve_collection_entries( rdf_data.update(remote_rdf_update) # update rdf entry with fields specified directly in the entry - rdf_update = dict(entry.rdf_update) + rdf_update = schema.CollectionEntry().dump(entry)["rdf_update"] sub_id = rdf_update.pop("id", sub_id) if sub_id is missing: entry_error = f"collection[{idx}]: Missing `id` field" @@ -54,6 +54,7 @@ def resolve_collection_entries( else: rdf_data["id"] = f"{root_id}/{sub_id}" + assert missing not in rdf_data.values() ret.append((rdf_data, entry_error)) return ret From 2684d45f6d5c8c916ec93852c3cf0e465c9169d0 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 12 Jan 2022 20:34:05 +0100 Subject: [PATCH 10/17] add asserts --- bioimageio/spec/collection/v0_2/utils.py | 2 ++ bioimageio/spec/io_.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/bioimageio/spec/collection/v0_2/utils.py b/bioimageio/spec/collection/v0_2/utils.py index b3199c23e..43255f5ca 100644 --- a/bioimageio/spec/collection/v0_2/utils.py +++ b/bioimageio/spec/collection/v0_2/utils.py @@ -24,6 +24,7 @@ def resolve_collection_entries( # rdf entries are based on collection RDF... rdf_data = serialize_raw_resource_description_to_dict(collection) + assert missing not in rdf_data.values() rdf_data.pop("collection") # ... without the collection field to avoid recursion root_id = rdf_data.pop("id", None) if collection_id is None else collection_id @@ -36,6 +37,7 @@ def resolve_collection_entries( entry_error = f"collection[{idx}]: {id_info}Invalid rdf_source: {e}" else: sub_id = remote_rdf_update.pop("id", missing) + assert missing not in remote_rdf_update.values() rdf_data.update(remote_rdf_update) # update rdf entry with fields specified directly in the entry diff --git a/bioimageio/spec/io_.py b/bioimageio/spec/io_.py index ab9f7e494..56c23e7dc 100644 --- a/bioimageio/spec/io_.py +++ b/bioimageio/spec/io_.py @@ -11,6 +11,8 @@ from types import ModuleType from typing import Dict, IO, Optional, Sequence, Tuple, Union +from marshmallow import missing + from bioimageio.spec.shared import raw_nodes from bioimageio.spec.shared.common import ( BIOIMAGEIO_CACHE_PATH, @@ -206,6 +208,7 @@ def serialize_raw_resource_description_to_dict(raw_rd: RawResourceDescription) - schema: SharedBioImageIOSchema = getattr(sub_spec.schema, class_name)() serialized = schema.dump(raw_rd) assert isinstance(serialized, dict) + assert missing not in serialized.values() return serialized From 5a0cae45081a17078cfee2a6b632c724bfafaac0 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 12 Jan 2022 20:36:08 +0100 Subject: [PATCH 11/17] fix serialization in resolve_collection_entries --- bioimageio/spec/collection/v0_2/utils.py | 4 +++- bioimageio/spec/shared/fields.py | 14 +++++++++++--- bioimageio/spec/shared/schema.py | 4 +++- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/bioimageio/spec/collection/v0_2/utils.py b/bioimageio/spec/collection/v0_2/utils.py index 43255f5ca..fa4b34889 100644 --- a/bioimageio/spec/collection/v0_2/utils.py +++ b/bioimageio/spec/collection/v0_2/utils.py @@ -41,7 +41,8 @@ def resolve_collection_entries( rdf_data.update(remote_rdf_update) # update rdf entry with fields specified directly in the entry - rdf_update = schema.CollectionEntry().dump(entry)["rdf_update"] + rdf_update = schema.CollectionEntry().dump(entry) + assert missing not in rdf_update.values() sub_id = rdf_update.pop("id", sub_id) if sub_id is missing: entry_error = f"collection[{idx}]: Missing `id` field" @@ -56,6 +57,7 @@ def resolve_collection_entries( else: rdf_data["id"] = f"{root_id}/{sub_id}" + rdf_data.pop("rdf_source", None) # remove rdf_source as we return a plain dict that has no simple source file assert missing not in rdf_data.values() ret.append((rdf_data, entry_error)) diff --git a/bioimageio/spec/shared/fields.py b/bioimageio/spec/shared/fields.py index f9945b096..409c45231 100644 --- a/bioimageio/spec/shared/fields.py +++ b/bioimageio/spec/shared/fields.py @@ -7,7 +7,7 @@ import marshmallow_union import numpy -from marshmallow import ValidationError, fields as marshmallow_fields, Schema +from marshmallow import Schema, ValidationError, fields as marshmallow_fields, missing from . import field_validators, raw_nodes @@ -119,13 +119,21 @@ class YamlDict(Dict): @staticmethod def _make_yaml_friendly(obj): if isinstance(obj, (list, tuple)): - return [YamlDict._make_yaml_friendly(ob) for ob in obj] + return [YamlDict._make_yaml_friendly(ob) for ob in obj if ob is not missing] elif isinstance(obj, dict): - return {YamlDict._make_yaml_friendly(k): YamlDict._make_yaml_friendly(v) for k, v in obj.items()} + return { + YamlDict._make_yaml_friendly(k): YamlDict._make_yaml_friendly(v) + for k, v in obj.items() + if v is not missing + } elif obj is None or isinstance(obj, (float, int, str, bool)): return obj elif isinstance(obj, pathlib.PurePath): return obj.as_posix() + elif isinstance(obj, raw_nodes.URI): + return str(obj) + elif obj is missing: + return missing else: raise TypeError(f"Encountered YAML unfriendly type: {type(obj)}") diff --git a/bioimageio/spec/shared/schema.py b/bioimageio/spec/shared/schema.py index d53b64753..38b7d7c1a 100644 --- a/bioimageio/spec/shared/schema.py +++ b/bioimageio/spec/shared/schema.py @@ -74,7 +74,9 @@ def make_object(self, data, **kwargs): @post_dump(pass_original=True) def keep_unknowns(self, output, orig, **kwargs): if orig and hasattr(orig, self.field_name_unknown_dict): - out_w_unknown = dict(getattr(orig, self.field_name_unknown_dict)) + out_w_unknown = fields.YamlDict()._serialize( + getattr(orig, self.field_name_unknown_dict), self.field_name_unknown_dict, self + ) out_w_unknown.update(output) return out_w_unknown else: From 2d64d6bab82b19287375e4babcdcac4364275add Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 12 Jan 2022 20:53:31 +0100 Subject: [PATCH 12/17] fix colleciton converter --- bioimageio/spec/collection/v0_2/converters.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bioimageio/spec/collection/v0_2/converters.py b/bioimageio/spec/collection/v0_2/converters.py index 102b57024..3d1cf5311 100644 --- a/bioimageio/spec/collection/v0_2/converters.py +++ b/bioimageio/spec/collection/v0_2/converters.py @@ -1,7 +1,5 @@ from typing import Any, Dict -from bioimageio.spec.rdf.v0_2.converters import maybe_convert as maybe_convert_rdf - def maybe_convert(data: Dict[str, Any]) -> Dict[str, Any]: if data.get("format_version") in ("0.2.0", "0.2.1"): @@ -21,4 +19,4 @@ def maybe_convert(data: Dict[str, Any]) -> Dict[str, Any]: data["format_version"] = "0.2.2" - return maybe_convert_rdf(data) + return data From ce0d6a5cc715da2b601f0a62e304d226b3c13561 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 12 Jan 2022 21:10:30 +0100 Subject: [PATCH 13/17] deep copy in converters otherwise we are messing with marhsmallow internals --- bioimageio/spec/collection/v0_2/converters.py | 2 ++ bioimageio/spec/model/v0_3/converters.py | 4 ++++ bioimageio/spec/rdf/v0_2/converters.py | 3 +++ 3 files changed, 9 insertions(+) diff --git a/bioimageio/spec/collection/v0_2/converters.py b/bioimageio/spec/collection/v0_2/converters.py index 3d1cf5311..48f2b67da 100644 --- a/bioimageio/spec/collection/v0_2/converters.py +++ b/bioimageio/spec/collection/v0_2/converters.py @@ -1,7 +1,9 @@ +import copy from typing import Any, Dict def maybe_convert(data: Dict[str, Any]) -> Dict[str, Any]: + data = copy.deepcopy(data) if data.get("format_version") in ("0.2.0", "0.2.1"): # move all type groups to the 'collection' field if "collection" not in data: diff --git a/bioimageio/spec/model/v0_3/converters.py b/bioimageio/spec/model/v0_3/converters.py index 0b290c226..4293e5655 100644 --- a/bioimageio/spec/model/v0_3/converters.py +++ b/bioimageio/spec/model/v0_3/converters.py @@ -1,3 +1,4 @@ +import copy import pathlib from typing import Any, Dict @@ -91,6 +92,9 @@ def convert_model_v0_3_2_to_v0_3_3(data: Dict[str, Any]) -> Dict[str, Any]: def maybe_convert(data: Dict[str, Any]) -> Dict[str, Any]: """auto converts model 'data' to newest format""" + + data = copy.deepcopy(data) + if data.get("format_version", "0.3.0") == "0.3.0": # no breaking change, bump to 0.3.1 data["format_version"] = "0.3.1" diff --git a/bioimageio/spec/rdf/v0_2/converters.py b/bioimageio/spec/rdf/v0_2/converters.py index 28e34514c..558b7454d 100644 --- a/bioimageio/spec/rdf/v0_2/converters.py +++ b/bioimageio/spec/rdf/v0_2/converters.py @@ -1,7 +1,10 @@ +import copy from typing import Any, Dict def maybe_convert(data: Dict[str, Any]) -> Dict[str, Any]: + data = copy.deepcopy(data) + # we unofficially accept strings as author entries... authors = data.get("authors") if isinstance(authors, list): From d979a847233701c0a24cbec7efca5a00dd697653 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 12 Jan 2022 23:03:49 +0100 Subject: [PATCH 14/17] fix raw_nodes.RDF.icon --- bioimageio/spec/commands.py | 6 ++---- bioimageio/spec/rdf/v0_2/raw_nodes.py | 4 +++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bioimageio/spec/commands.py b/bioimageio/spec/commands.py index 74424c4f4..4893a98b3 100644 --- a/bioimageio/spec/commands.py +++ b/bioimageio/spec/commands.py @@ -2,17 +2,15 @@ import traceback import warnings from pathlib import Path -from typing import Any, Dict, IO, Optional, Union +from typing import Any, Dict, IO, Union -from marshmallow import ValidationError, missing -from marshmallow.utils import _Missing +from marshmallow import ValidationError from .collection.v0_2.utils import resolve_collection_entries from .io_ import ( load_raw_resource_description, resolve_rdf_source, save_raw_resource_description, - serialize_raw_resource_description_to_dict, ) from .shared.common import ValidationWarning, nested_default_dict_as_nested_dict diff --git a/bioimageio/spec/rdf/v0_2/raw_nodes.py b/bioimageio/spec/rdf/v0_2/raw_nodes.py index 1582a77d1..600c8fc2d 100644 --- a/bioimageio/spec/rdf/v0_2/raw_nodes.py +++ b/bioimageio/spec/rdf/v0_2/raw_nodes.py @@ -118,6 +118,7 @@ def __init__( documentation: Path, git_repo: Union[_Missing, str] = missing, id: Union[_Missing, str] = missing, + icon: Union[_Missing, str] = missing, license: Union[_Missing, str] = missing, links: Union[_Missing, List[str]] = missing, maintainers: Union[_Missing, List[Maintainer]] = missing, @@ -135,6 +136,7 @@ def __init__( self.documentation = documentation self.git_repo = git_repo self.id = id + self.icon = icon self.license = license self.links = links self.maintainers = maintainers @@ -146,7 +148,7 @@ def __init__( # make sure we didn't forget a defined field field_names = set(f.name for f in dataclasses.fields(self)) for uk in unknown_kwargs: - assert uk not in field_names + assert uk not in field_names, uk warnings.warn(f"discarding unknown RDF fields: {unknown_kwargs}") From 1a2e9a1440e69218a4eda646f7d58920d6f0f7aa Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 12 Jan 2022 23:12:16 +0100 Subject: [PATCH 15/17] support datetime.datetime in YamlDict --- bioimageio/spec/shared/fields.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bioimageio/spec/shared/fields.py b/bioimageio/spec/shared/fields.py index 409c45231..721ea37d4 100644 --- a/bioimageio/spec/shared/fields.py +++ b/bioimageio/spec/shared/fields.py @@ -132,6 +132,8 @@ def _make_yaml_friendly(obj): return obj.as_posix() elif isinstance(obj, raw_nodes.URI): return str(obj) + elif isinstance(obj, (datetime.datetime, datetime.time)): + return obj.isoformat() elif obj is missing: return missing else: From b938d8173281b87a4275026f9deee975bebe21ac Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 12 Jan 2022 23:14:21 +0100 Subject: [PATCH 16/17] fix latest patch version in tests --- .../{rdf_v0_3_4.yaml => rdf_v0_3_6.yaml} | 2 +- tests/conftest.py | 6 +++--- tests/test_dump_spec.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) rename example_specs/models/unet2d_nuclei_broad/{rdf_v0_3_4.yaml => rdf_v0_3_6.yaml} (99%) diff --git a/example_specs/models/unet2d_nuclei_broad/rdf_v0_3_4.yaml b/example_specs/models/unet2d_nuclei_broad/rdf_v0_3_6.yaml similarity index 99% rename from example_specs/models/unet2d_nuclei_broad/rdf_v0_3_4.yaml rename to example_specs/models/unet2d_nuclei_broad/rdf_v0_3_6.yaml index 0c095fb26..cedad7277 100644 --- a/example_specs/models/unet2d_nuclei_broad/rdf_v0_3_4.yaml +++ b/example_specs/models/unet2d_nuclei_broad/rdf_v0_3_6.yaml @@ -1,5 +1,5 @@ # TODO physical scale of the data -format_version: 0.3.4 +format_version: 0.3.6 name: UNet 2D Nuclei Broad description: A 2d U-Net trained on the nuclei broad dataset. diff --git a/tests/conftest.py b/tests/conftest.py index 794426530..0cb7611a4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,12 +21,12 @@ def get_unet2d_nuclei_broad(unet2d_nuclei_broad_base_path, request) -> dict: return yaml.load(path) -@pytest.fixture(params=["v0_3_0", "v0_3_1", "v0_3_2", "v0_3_3", "v0_3_4", "v0_4_0", "v0_4_2"]) +@pytest.fixture(params=["v0_3_0", "v0_3_1", "v0_3_2", "v0_3_3", "v0_3_6", "v0_4_0", "v0_4_2"]) def unet2d_nuclei_broad_any(unet2d_nuclei_broad_base_path, request): yield get_unet2d_nuclei_broad(unet2d_nuclei_broad_base_path, request) -@pytest.fixture(params=["v0_3_0", "v0_3_1", "v0_3_2", "v0_3_3", "v0_3_4", "v0_4_0"]) +@pytest.fixture(params=["v0_3_0", "v0_3_1", "v0_3_2", "v0_3_3", "v0_3_6", "v0_4_0"]) def unet2d_nuclei_broad_before_latest(unet2d_nuclei_broad_base_path, request): yield get_unet2d_nuclei_broad(unet2d_nuclei_broad_base_path, request) @@ -36,7 +36,7 @@ def unet2d_nuclei_broad_latest(unet2d_nuclei_broad_base_path, request): yield get_unet2d_nuclei_broad(unet2d_nuclei_broad_base_path, request) -@pytest.fixture(params=["v0_3_4", "v0_4_2"]) +@pytest.fixture(params=["v0_3_6", "v0_4_2"]) def unet2d_nuclei_broad_any_minor(unet2d_nuclei_broad_base_path, request): yield get_unet2d_nuclei_broad(unet2d_nuclei_broad_base_path, request) diff --git a/tests/test_dump_spec.py b/tests/test_dump_spec.py index 37cd181c4..b87c336c7 100644 --- a/tests/test_dump_spec.py +++ b/tests/test_dump_spec.py @@ -46,7 +46,7 @@ def test_dataset_rdf_round_trip(): covers=["https://raw.githubusercontent.com/ilastik/bioimage-io-models/main/dataset_src/platy-cover0.png"], description="Training data for EM segmentation of cellular membranes, nuclei, cuticle and cilia in Platynereis.", documentation="https://raw.githubusercontent.com/ilastik/bioimage-io-models/main/dataset_src/platy.md", - format_version="0.2.1", + format_version="0.2.2", license="CC-BY-4.0", name="Platynereis EM Traning Data", source="https://doi.org/10.5281/zenodo.3675220", From 839360ed9978c1356fa7768b09e99e48702f8f78 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 12 Jan 2022 23:18:01 +0100 Subject: [PATCH 17/17] bump post --- bioimageio/spec/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/spec/VERSION b/bioimageio/spec/VERSION index 70753fc9d..4fc4ac5ad 100644 --- a/bioimageio/spec/VERSION +++ b/bioimageio/spec/VERSION @@ -1,3 +1,3 @@ { - "version": "0.4.2" + "version": "0.4.2patch1" }