From 738ea6411450157f29c4a5328da452f69305d8e0 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Tue, 15 Jun 2021 15:48:37 +0200 Subject: [PATCH 01/14] Add utility function for adding additional weight formats to model spec --- bioimageio/spec/latest/__init__.py | 2 +- bioimageio/spec/latest/build_spec.py | 22 +++++++++++++++++++++- bioimageio/spec/v0_3/utils.py | 6 +++--- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/bioimageio/spec/latest/__init__.py b/bioimageio/spec/latest/__init__.py index 1a9f68d18..a12252c4f 100644 --- a/bioimageio/spec/latest/__init__.py +++ b/bioimageio/spec/latest/__init__.py @@ -1,2 +1,2 @@ from bioimageio.spec.v0_3 import * # noqa -from .build_spec import build_spec +from .build_spec import build_spec, add_weights diff --git a/bioimageio/spec/latest/build_spec.py b/bioimageio/spec/latest/build_spec.py index 3235ebc43..11aa3d00d 100644 --- a/bioimageio/spec/latest/build_spec.py +++ b/bioimageio/spec/latest/build_spec.py @@ -48,7 +48,6 @@ def _infer_weight_type(path): raise ValueError(f"Could not infer weight type from extension {ext} for weight file {path}") -# TODO extend supported weight types def _get_weights(weight_uri, weight_type, source, root, **kwargs): weight_path = _get_local_path(weight_uri, root) if weight_type is None: @@ -219,6 +218,7 @@ def _build_cite(cite): return citation_list +# TODO we should make the name more specific: "build_model_spec"? # NOTE does not support multiple input / output tensors yet # to implement this we should wait for 0.4.0, see also # https://github.com/bioimage-io/spec-bioimage-io/issues/70#issuecomment-825737433 @@ -412,3 +412,23 @@ def build_spec( model = spec.schema.Model().load(serialized) return model + + +def add_weights( + model: spec.raw_nodes.Model, + weight_uri: str, + root: Optional[str] = None, + weight_type: Optional[str] = None, + **weight_kwargs +): + """ Add weight entry to bioimage.io model. + """ + new_weights = _get_weights(weight_uri, weight_type, None, root, **weight_kwargs)[0] + model.weights.update(new_weights) + + # FIXME this fails with + # ImportedSource(factory=) has unexpected type + serialized = spec.schema.Model().dump(model) + model = spec.schema.Model().load(serialized) + + return model diff --git a/bioimageio/spec/v0_3/utils.py b/bioimageio/spec/v0_3/utils.py index d1ab884e3..203cb77a4 100644 --- a/bioimageio/spec/v0_3/utils.py +++ b/bioimageio/spec/v0_3/utils.py @@ -35,9 +35,9 @@ def _(source: dict, root_path: Optional[pathlib.Path] = None) -> Tuple[raw_nodes def _(source: os.PathLike, root_path: Optional[pathlib.Path] = None) -> Tuple[raw_nodes.Model, pathlib.Path]: source = pathlib.Path(source) - suffixes = source.suffixes - if len(suffixes) < 2 or suffixes[-1] not in (".yml", ".yaml") or source.suffixes[-2] != ".model": - raise ValidationError(f"invalid suffixes {''.join(suffixes)} for source {source}") + # suffixes = source.suffixes + # if len(suffixes) < 2 or suffixes[-1] not in (".yml", ".yaml") or source.suffixes[-2] != ".model": + # raise ValidationError(f"invalid suffixes {''.join(suffixes)} for source {source}") data = yaml.load(source) From f981dd4b7bfae41f8cc40f0f30c405b39a777f98 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Tue, 15 Jun 2021 18:03:50 +0200 Subject: [PATCH 02/14] Add model serialization function that removes empty defaults --- bioimageio/spec/latest/__init__.py | 2 +- bioimageio/spec/latest/build_spec.py | 15 ++++++++++++--- setup.py | 1 + 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/bioimageio/spec/latest/__init__.py b/bioimageio/spec/latest/__init__.py index a12252c4f..54b35e232 100644 --- a/bioimageio/spec/latest/__init__.py +++ b/bioimageio/spec/latest/__init__.py @@ -1,2 +1,2 @@ from bioimageio.spec.v0_3 import * # noqa -from .build_spec import build_spec, add_weights +from .build_spec import add_weights, build_spec, serialize_spec diff --git a/bioimageio/spec/latest/build_spec.py b/bioimageio/spec/latest/build_spec.py index 11aa3d00d..08d08c327 100644 --- a/bioimageio/spec/latest/build_spec.py +++ b/bioimageio/spec/latest/build_spec.py @@ -5,6 +5,7 @@ import numpy as np import bioimageio.spec as spec +from boltons.iterutils import remap # # utility functions to build the spec from python @@ -415,7 +416,7 @@ def build_spec( def add_weights( - model: spec.raw_nodes.Model, + model, weight_uri: str, root: Optional[str] = None, weight_type: Optional[str] = None, @@ -426,9 +427,17 @@ def add_weights( new_weights = _get_weights(weight_uri, weight_type, None, root, **weight_kwargs)[0] model.weights.update(new_weights) - # FIXME this fails with - # ImportedSource(factory=) has unexpected type serialized = spec.schema.Model().dump(model) model = spec.schema.Model().load(serialized) return model + + +def serialize_spec(model, out_path, clear_defaults=True): + serialized = spec.schema.Model().dump(model) + # clear the default values using boltons remap + if clear_defaults: + defaults = ([], {}, None) + cleared = remap(serialized, visit=lambda p, k, v: v not in defaults) + with open(out_path, 'w') as f: + spec.utils.yaml.dump(cleared, f) diff --git a/setup.py b/setup.py index 06fd045d8..a7557317a 100644 --- a/setup.py +++ b/setup.py @@ -22,6 +22,7 @@ packages=find_namespace_packages(exclude=["tests"]), # Required install_requires=[ "PyYAML>=5.2", + "boltons", "imageio>=2.5", "marshmallow>=3.6.0,<4.0", "marshmallow_jsonschema", From fa8b190344713cd20522448117d86c033a311b7a Mon Sep 17 00:00:00 2001 From: FynnBe Date: Thu, 17 Jun 2021 10:09:53 +0200 Subject: [PATCH 03/14] do not check for .model suffix but yml/yaml --- bioimageio/spec/v0_3/utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bioimageio/spec/v0_3/utils.py b/bioimageio/spec/v0_3/utils.py index 203cb77a4..1e4c741d1 100644 --- a/bioimageio/spec/v0_3/utils.py +++ b/bioimageio/spec/v0_3/utils.py @@ -35,9 +35,8 @@ def _(source: dict, root_path: Optional[pathlib.Path] = None) -> Tuple[raw_nodes def _(source: os.PathLike, root_path: Optional[pathlib.Path] = None) -> Tuple[raw_nodes.Model, pathlib.Path]: source = pathlib.Path(source) - # suffixes = source.suffixes - # if len(suffixes) < 2 or suffixes[-1] not in (".yml", ".yaml") or source.suffixes[-2] != ".model": - # raise ValidationError(f"invalid suffixes {''.join(suffixes)} for source {source}") + if source.suffix not in (".yml", ".yaml"): + raise ValidationError(f"invalid suffix {source.suffix} for source {source}") data = yaml.load(source) From ed9340a3aabf945bbe8cb7f8c940942149a6d4c5 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 17 Jun 2021 10:29:49 +0200 Subject: [PATCH 04/14] mkdir dist --- scripts/generate_docs.py | 4 +++- scripts/generate_json_specs.py | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/generate_docs.py b/scripts/generate_docs.py index 13277edee..04b334dca 100644 --- a/scripts/generate_docs.py +++ b/scripts/generate_docs.py @@ -126,4 +126,6 @@ def export_markdown_docs(folder: Path): if __name__ == "__main__": - export_markdown_docs(Path(__file__).parent / "../dist") + dist = Path(__file__).parent / "../dist" + dist.mkdir(exist_ok=True) + export_markdown_docs(dist) diff --git a/scripts/generate_json_specs.py b/scripts/generate_json_specs.py index e631ba624..498a10af7 100644 --- a/scripts/generate_json_specs.py +++ b/scripts/generate_json_specs.py @@ -16,4 +16,7 @@ def export_json_model_spec(path: Path): if __name__ == "__main__": - export_json_model_spec(Path(__file__).parent / f"../dist/model_spec_{bioimageio.spec.__version__}.json") + dist = Path(__file__).parent / "../dist" + dist.mkdir(exist_ok=True) + + export_json_model_spec(dist / f"model_spec_{bioimageio.spec.__version__}.json") From 964735f9a1918e6ddcb77fa1b6e62a1a16bfb2e9 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 17 Jun 2021 10:30:00 +0200 Subject: [PATCH 05/14] warn about ".yml" --- bioimageio/spec/v0_3/utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bioimageio/spec/v0_3/utils.py b/bioimageio/spec/v0_3/utils.py index 1e4c741d1..5971226f3 100644 --- a/bioimageio/spec/v0_3/utils.py +++ b/bioimageio/spec/v0_3/utils.py @@ -1,5 +1,6 @@ import os import pathlib +import warnings from functools import singledispatch from typing import Optional, Sequence, Tuple @@ -37,6 +38,11 @@ def _(source: os.PathLike, root_path: Optional[pathlib.Path] = None) -> Tuple[ra if source.suffix not in (".yml", ".yaml"): raise ValidationError(f"invalid suffix {source.suffix} for source {source}") + elif source.suffix == ".yml": + warnings.warn( + "suffix '.yml' is not recommended and will raise a ValidationError in the future. Use '.yaml' instead " + "(https://yaml.org/faq.html)" + ) data = yaml.load(source) From a5fc68ebc6a4540668c7e6a2618aac43c74ae1d1 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 17 Jun 2021 11:56:19 +0200 Subject: [PATCH 06/14] remove empty default (None, [], {}) --- bioimageio/spec/shared/fields.py | 4 +- bioimageio/spec/v0_3/converters.py | 4 + bioimageio/spec/v0_3/raw_nodes.py | 71 ++++----- bioimageio/spec/v0_3/schema.py | 135 ++++++++---------- .../RandomForestClassifier_v0_3_1.model.yaml | 1 - .../RandomForestClassifier_v0_3_2.model.yaml | 2 - tests/conftest.py | 5 + tests/test_dump_spec.py | 22 +-- tests/test_format_version_conversion.py | 10 +- tests/test_load_spec.py | 2 +- tests/test_schema.py | 8 +- 11 files changed, 115 insertions(+), 149 deletions(-) diff --git a/bioimageio/spec/shared/fields.py b/bioimageio/spec/shared/fields.py index 97d90e3e4..086fee36e 100644 --- a/bioimageio/spec/shared/fields.py +++ b/bioimageio/spec/shared/fields.py @@ -260,8 +260,8 @@ def __init__(self, **super_kwargs): class Kwargs(Dict): - def __init__(self, keys=String, missing=dict, bioimageio_description="Key word arguments.", **super_kwargs): - super().__init__(keys, missing=missing, bioimageio_description=bioimageio_description, **super_kwargs) + def __init__(self, keys=String, bioimageio_description="Key word arguments.", **super_kwargs): + super().__init__(keys, bioimageio_description=bioimageio_description, **super_kwargs) class OutputShape(Union): diff --git a/bioimageio/spec/v0_3/converters.py b/bioimageio/spec/v0_3/converters.py index d8b9324e0..acc99feef 100644 --- a/bioimageio/spec/v0_3/converters.py +++ b/bioimageio/spec/v0_3/converters.py @@ -194,6 +194,10 @@ def maybe_convert_model(data: Dict[str, Any]) -> Dict[str, Any]: if config.get("future") == {}: del config["future"] + # remove 'config' if now empty + if data.get("config") == {}: + del data["config"] + return data diff --git a/bioimageio/spec/v0_3/raw_nodes.py b/bioimageio/spec/v0_3/raw_nodes.py index 292ee150f..5e92fc59b 100644 --- a/bioimageio/spec/v0_3/raw_nodes.py +++ b/bioimageio/spec/v0_3/raw_nodes.py @@ -1,9 +1,10 @@ import distutils.version from dataclasses import dataclass from datetime import datetime -from typing import Any, Dict, List, NewType, Optional, Tuple, Union +from typing import Any, Dict, List, NewType, Tuple, Union from marshmallow import missing +from marshmallow.utils import _Missing from bioimageio.spec.shared.raw_nodes import ( ImplicitInputShape, @@ -51,8 +52,8 @@ @dataclass class Author(Node): name: str = missing - affiliation: Optional[str] = missing - orcid: Optional[str] = missing + affiliation: Union[_Missing, str] = missing + orcid: Union[_Missing, str] = missing ImportableSource = Union[ImportableModule, ImportablePath] @@ -61,37 +62,37 @@ class Author(Node): @dataclass class CiteEntry(Node): text: str = missing - doi: Optional[str] = missing - url: Optional[str] = missing + doi: Union[_Missing, str] = missing + url: Union[_Missing, str] = missing @dataclass class RunMode(Node): name: str = missing - kwargs: Dict[str, Any] = missing + kwargs: Union[_Missing, Dict[str, Any]] = missing @dataclass class RDF(Node): - attachments: Dict[str, Any] = missing + attachments: Union[_Missing, Dict[str, Any]] = missing authors: List[Author] = missing cite: List[CiteEntry] = missing - config: dict = missing - covers: List[URI] = missing - dependencies: Optional[Dependencies] = missing + config: Union[_Missing, dict] = missing + covers: Union[_Missing, List[URI]] = missing + dependencies: Union[_Missing, Dependencies] = missing description: str = missing documentation: URI = missing format_version: FormatVersion = missing - framework: Framework = missing - git_repo: Optional[str] = missing - language: Language = missing + framework: Union[_Missing, Framework] = missing + git_repo: Union[_Missing, str] = missing + language: Union[_Missing, Language] = missing license: str = missing name: str = missing - run_mode: Optional[RunMode] = missing + run_mode: Union[_Missing, RunMode] = missing tags: List[str] = missing timestamp: datetime = missing type: Type = missing - version: Optional[distutils.version.StrictVersion] = missing + version: Union[_Missing, distutils.version.StrictVersion] = missing @dataclass @@ -112,9 +113,9 @@ class InputTensor: data_type: str = missing axes: Axes = missing shape: Union[List[int], ImplicitInputShape] = missing - preprocessing: List[Preprocessing] = missing - description: Optional[str] = missing - data_range: Tuple[float, float] = missing + preprocessing: Union[_Missing, List[Preprocessing]] = missing + description: Union[_Missing, str] = missing + data_range: Union[_Missing, Tuple[float, float]] = missing @dataclass @@ -123,24 +124,24 @@ class OutputTensor: data_type: str = missing axes: Axes = missing shape: Union[List[int], ImplicitOutputShape] = missing - halo: List[int] = missing - postprocessing: List[Postprocessing] = missing - description: Optional[str] = missing - data_range: Tuple[float, float] = missing + halo: Union[_Missing, List[int]] = missing + postprocessing: Union[_Missing, List[Postprocessing]] = missing + description: Union[_Missing, str] = missing + data_range: Union[_Missing, Tuple[float, float]] = missing @dataclass class WeightsEntry(Node): - authors: List[Author] = missing - attachments: Dict = missing - parent: Optional[str] = missing + authors: Union[_Missing, List[Author]] = missing + attachments: Union[_Missing, Dict] = missing + parent: Union[_Missing, str] = missing # ONNX specific - opset_version: Optional[int] = missing + opset_version: Union[_Missing, int] = missing # tag: Optional[str] # todo: check schema. only valid for tensorflow_saved_model_bundle format # todo: check schema. only valid for tensorflow_saved_model_bundle format - sha256: str = missing + sha256: Union[_Missing, str] = missing source: URI = missing - tensorflow_version: Optional[distutils.version.StrictVersion] = missing + tensorflow_version: Union[_Missing, distutils.version.StrictVersion] = missing @dataclass @@ -152,14 +153,14 @@ class ModelParent(Node): @dataclass class Model(RDF): inputs: List[InputTensor] = missing - kwargs: Dict[str, Any] = missing + kwargs: Union[_Missing, Dict[str, Any]] = missing outputs: List[OutputTensor] = missing - packaged_by: List[Author] = missing - parent: ModelParent = missing - sample_inputs: List[URI] = missing - sample_outputs: List[URI] = missing - sha256: str = missing - source: Optional[ImportableSource] = missing + packaged_by: Union[_Missing, List[Author]] = missing + parent: Union[_Missing, ModelParent] = missing + sample_inputs: Union[_Missing, List[URI]] = missing + sample_outputs: Union[_Missing, List[URI]] = missing + sha256: Union[_Missing, str] = missing + source: Union[_Missing, ImportableSource] = missing test_inputs: List[URI] = missing test_outputs: List[URI] = missing weights: Dict[WeightsFormat, WeightsEntry] = missing diff --git a/bioimageio/spec/v0_3/schema.py b/bioimageio/spec/v0_3/schema.py index a9f0c4bd0..cd865d11d 100644 --- a/bioimageio/spec/v0_3/schema.py +++ b/bioimageio/spec/v0_3/schema.py @@ -2,7 +2,7 @@ import warnings import stdnum.iso7064.mod_11_2 -from marshmallow import Schema, ValidationError, post_load, validates, validates_schema +from marshmallow import Schema, ValidationError, missing as missing_, post_load, validates, validates_schema from spdx_license_list import LICENSES from bioimageio.spec.shared import field_validators, fields @@ -21,14 +21,13 @@ class PyBioSchema(SharedPyBioSchema): class Author(PyBioSchema): name = fields.String(required=True, bioimageio_description="Full name.") - affiliation = fields.String(missing=None, bioimageio_description="Affiliation.") + affiliation = fields.String(bioimageio_description="Affiliation.") orcid = fields.String( validate=[ field_validators.Length(19), lambda oid: all(oid[idx] == "-" for idx in [4, 9, 14]), lambda oid: stdnum.iso7064.mod_11_2.is_valid(oid.replace("-", "")), ], - missing=None, bioimageio_description="[orcid](https://support.orcid.org/hc/en-us/sections/360001495313-What-is-ORCID) id " "in hyphenated groups of 4 digits, e.g. '0000-0001-2345-6789' (and [valid](" "https://support.orcid.org/hc/en-us/articles/360006897674-Structure-of-the-ORCID-Identifier" @@ -38,12 +37,12 @@ class Author(PyBioSchema): class CiteEntry(PyBioSchema): text = fields.String(required=True) - doi = fields.String(missing=None) - url = fields.String(missing=None) + doi = fields.String(bioimageio_maybe_required=True) + url = fields.String(bioimageio_maybe_required=True) @validates_schema def doi_or_url(self, data, **kwargs): - if data["doi"] is None and data["url"] is None: + if data.get("doi") is None and data.get("url") is None: raise ValidationError("doi or url needs to be specified in a citation") @@ -87,7 +86,6 @@ class RDF(PyBioSchema): git_repo = fields.String( validate=field_validators.URL(schemes=["http", "https"]), - missing=None, bioimageio_description="""A url to the git repository, e.g. to Github or Gitlab. If the model is contained in a subfolder of a git repository, then a url to the exact folder (which contains the configuration yaml file) should be used.""", @@ -119,7 +117,6 @@ def warn_about_deprecated_spdx_license(self, value: str): ) covers = fields.List( fields.URI, - missing=list, bioimageio_description="A list of cover images provided by either a relative path to the model folder, or a " "hyperlink starting with 'https'.Please use an image smaller than 500KB and an aspect ratio width to height " "of 2:1. The supported image formats are: 'jpg', 'png', 'gif'.", # todo: field_validators image format @@ -127,7 +124,6 @@ def warn_about_deprecated_spdx_license(self, value: str): attachments = fields.Dict( fields.String, fields.Union([fields.URI(), fields.List(fields.URI)]), - missing=dict, bioimageio_maybe_required=True, bioimageio_description="""Dictionary of text keys and URI (or a list of URI) values to additional, relevant files. E.g. we can place a list of URIs under the `files` to list images and other files that are necessary for the @@ -136,31 +132,27 @@ def warn_about_deprecated_spdx_license(self, value: str): run_mode = fields.Nested( RunMode, - missing=None, bioimageio_description="Custom run mode for this model: for more complex prediction procedures like test time " "data augmentation that currently cannot be expressed in the specification. The different run modes should be " "listed in [supported_formats_and_operations.md#Run Modes]" "(https://github.com/bioimage-io/configuration/blob/master/supported_formats_and_operations.md#run-modes).", ) - config = fields.Dict(missing=dict) + config = fields.Dict() language = fields.String( validate=field_validators.OneOf(get_args(raw_nodes.Language)), - missing=None, bioimageio_maybe_required=True, bioimageio_description=f"Programming language of the source code. One of: " f"{', '.join(get_args(raw_nodes.Language))}. This field is only required if the field `source` is present.", ) framework = fields.String( validate=field_validators.OneOf(get_args(raw_nodes.Framework)), - missing=None, bioimageio_description=f"The deep learning framework of the source code. One of: " f"{', '.join(get_args(raw_nodes.Framework))}. This field is only required if the field `source` is present.", ) dependencies = fields.Dependencies( - missing=None, bioimageio_description="Dependency manager and dependency file, specified as `:`. For example: 'conda:./environment.yaml', 'maven:./pom.xml', or 'pip:./requirements.txt'", + "path to file>`. For example: 'conda:./environment.yaml', 'maven:./pom.xml', or 'pip:./requirements.txt'" ) timestamp = fields.DateTime( required=True, @@ -169,10 +161,9 @@ def warn_about_deprecated_spdx_license(self, value: str): ) type = fields.String(validate=field_validators.OneOf(get_args(raw_nodes.Type))) version = fields.StrictVersion( - missing=None, bioimageio_description="The version number of the model. The version number format must be a string in " "`MAJOR.MINOR.PATCH` format following the guidelines in Semantic Versioning 2.0.0 (see https://semver.org/), " - "e.g. the initial version number should be `0.1.0`.", + "e.g. the initial version number should be `0.1.0`." ) @@ -185,7 +176,7 @@ class Tensor(PyBioSchema): name = fields.String( required=True, validate=field_validators.Predicate("isidentifier"), bioimageio_description="Tensor name." ) - description = fields.String(missing=None) + description = fields.String() axes = fields.Axes( required=True, bioimageio_description="""Axes identifying characters from: bitczyx. Same length and order as the axes in `shape`. @@ -209,7 +200,6 @@ class Tensor(PyBioSchema): ) data_range = fields.Tuple( (fields.Float(allow_nan=True), fields.Float(allow_nan=True)), - missing=(None, None), bioimageio_description="Tuple `(minimum, maximum)` specifying the allowed range of the data in this tensor. " "If not specified, the full data range that can be expressed in `data_type` is allowed.", ) @@ -231,6 +221,7 @@ def validate_processing_kwargs(self, data, **kwargs): class Processing(PyBioSchema): class Binarize(Schema): # do not inherit from PyBioSchema, return only a validated dict, no specific node + # todo: inherit from a "TransformSchema" that allows generation of docs for pre and postprocessing threshold = fields.Float(required=True) class Clip(PyBioSchema): @@ -260,7 +251,7 @@ def kwargs_match_selected_preprocessing_name(self, data, **kwargs): f"Schema {schema_name} for {data['name']} {self.__class__.__name__.lower()}" ) from missing_schema_error - kwargs_validation_errors = schema_class().validate(data["kwargs"]) + kwargs_validation_errors = schema_class().validate(data.get("kwargs", {})) if kwargs_validation_errors: raise ValidationError(f"Invalid `kwargs` for '{data['name']}': {kwargs_validation_errors}") @@ -270,17 +261,17 @@ class Sigmoid(PyBioSchema): class ZeroMeanUnitVariance(PyBioSchema): mode = fields.ProcMode(required=True) axes = fields.Axes(required=True, valid_axes="czyx") - mean = fields.Array(fields.Float(), missing=None) # todo: check if means match input axes (for mode 'fixed') - std = fields.Array(fields.Float(), missing=None) + mean = fields.Array(fields.Float()) # todo: check if means match input axes (for mode 'fixed') + std = fields.Array(fields.Float()) eps = fields.Float(missing=1e-6) @validates_schema def mean_and_std_match_mode(self, data, **kwargs): - if data["mode"] == "fixed" and (data["mean"] is None or data["std"] is None): + if data["mode"] == "fixed" and (data["mean"] is missing_ or data["std"] is missing_): raise ValidationError( "`kwargs` for 'zero_mean_unit_variance' preprocessing with `mode` 'fixed' require additional `kwargs`: `mean` and `std`." ) - elif data["mode"] != "fixed" and (data.get("mean") is not None or data.get("std") is not None): + elif data["mode"] != "fixed" and (data.get("mean") is not missing_ or data.get("std") is not missing_): raise ValidationError( "`kwargs`: `mean` and `std` for 'zero_mean_unit_variance' preprocessing are only valid for `mode` 'fixed'." ) @@ -338,7 +329,6 @@ class InputTensor(Tensor): shape = fields.InputShape(required=True, bioimageio_description="Specification of tensor shape.") preprocessing = fields.List( fields.Nested(Preprocessing), - missing=list, bioimageio_description="Description of how this input should be preprocessed.", ) processing_name = "preprocessing" @@ -376,7 +366,6 @@ class OutputTensor(Tensor): shape = fields.OutputShape(required=True) halo = fields.List( fields.Integer, - missing=None, bioimageio_description="The halo to crop from the output tensor (for example to crop away boundary effects or " "for tiling). The halo should be cropped from both sides, i.e. `shape_after_crop = shape - 2 * halo`. The " "`halo` is not cropped by the bioimage.io model, but is left to be cropped by the consumer software. Use " @@ -384,7 +373,6 @@ class OutputTensor(Tensor): ) postprocessing = fields.List( fields.Nested(Postprocessing), - missing=list, bioimageio_description="Description of how this output should be postprocessed.", ) processing_name = "postprocessing" @@ -392,7 +380,7 @@ class OutputTensor(Tensor): @validates_schema def matching_halo_length(self, data, **kwargs): shape = data["shape"] - halo = data["halo"] + halo = data.get("halo") if halo is None: return elif isinstance(shape, list) or isinstance(shape, raw_nodes.ImplicitOutputShape): @@ -401,14 +389,14 @@ def matching_halo_length(self, data, **kwargs): else: raise NotImplementedError(type(shape)) - @post_load - def make_object(self, data, **kwargs): - shape = data["shape"] - halo = data["halo"] - if halo is None: - data["halo"] = [0] * len(shape) - - return super().make_object(data, **kwargs) + # @post_load + # def make_object(self, data, **kwargs): + # shape = data["shape"] + # halo = data["halo"] + # if halo is missing_: + # data["halo"] = [0] * len(shape) + # + # return super().make_object(data, **kwargs) _common_sha256_hint = ( @@ -436,32 +424,28 @@ def make_object(self, data, **kwargs): class WeightsEntry(PyBioSchema): authors = fields.List( fields.Nested(Author), - missing=list, bioimageio_description="A list of authors. If this is the root weight (it does not have a `parent` field): the " "person(s) that have trained this model. If this is a child weight (it has a `parent` field): the person(s) " "who have converted the weights to this format.", ) # todo: copy root authors if missing attachments = fields.Dict( - missing=dict, bioimageio_description="Dictionary of text keys and URI (or a list of URI) values to additional, relevant " "files that are specific to the current weight format. A list of URIs can be listed under the `files` key to " "included additional files for generating the model package.", ) parent = fields.String( - missing=None, bioimageio_description="The source weights used as input for converting the weights to this format. For " "example, if the weights were converted from the format `pytorch_state_dict` to `pytorch_script`, the parent " "is `pytorch_state_dict`. All weight entries except one (the initial set of weights resulting from training " "the model), need to have this field.", ) - opset_version = fields.Number(missing=None) # ONNX Specific + opset_version = fields.Number() # ONNX Specific sha256 = fields.String( validate=field_validators.Length(equal=64), - missing=None, bioimageio_description="SHA256 checksum of the source file specified. " + _common_sha256_hint, ) source = fields.URI(required=True, bioimageio_description="Link to the source file. Preferably a url.") - tensorflow_version = fields.StrictVersion(missing=None) + tensorflow_version = fields.StrictVersion() class ModelParent(PyBioSchema): @@ -491,21 +475,18 @@ class Model(RDF): packaged_by = fields.List( fields.Nested(Author), - missing=list, bioimageio_description=f"The persons that have packaged and uploaded this model. Only needs to be specified if " f"different from `authors` in root or any {WeightsEntry.__name__}.", ) parent = fields.Nested( ModelParent, - missing=None, bioimageio_description="Parent model from which the trained weights of this model have been derived, e.g. by " "finetuning the weights of this model on a different dataset. For format changes of the same trained model " "checkpoint, see `weights`.", ) source = fields.ImportableSource( - missing=None, bioimageio_maybe_required=True, bioimageio_description="Language and framework specific implementation. As some weights contain the model " "architecture, the source is optional depending on the present weight formats. `source` can either point to a " @@ -515,7 +496,6 @@ class Model(RDF): ) sha256 = fields.String( validate=field_validators.Length(equal=64), - missing=None, bioimageio_description="SHA256 checksum of the model source code file." + _common_sha256_hint + " This field is only required if the field source is present.", @@ -557,18 +537,14 @@ class Model(RDF): sample_inputs = fields.List( fields.URI, - missing=[], bioimageio_description="List of URIs to sample inputs to illustrate possible inputs for the model, for example " "stored as png or tif images.", ) sample_outputs = fields.List( - fields.URI, - missing=[], - bioimageio_description="List of URIs to sample outputs corresponding to the `sample_inputs`.", + fields.URI, bioimageio_description="List of URIs to sample outputs corresponding to the `sample_inputs`." ) config = fields.Dict( - missing=dict, bioimageio_description=""" A custom configuration field that can contain any other keys which are not defined above. It can be very specifc to a framework or specific tool. To avoid conflicted definitions, it is recommended to wrap configuration into a sub-field named with the specific framework or tool name. @@ -590,34 +566,34 @@ class Model(RDF): runtime: 78.8s # Time it took to run the model pixel_size: [9.658E-4µmx9.658E-4µm] # Size of the pixels of the input ``` -""", +""" ) @validates_schema def language_and_framework_match(self, data, **kwargs): field_names = ("language", "framework") valid_combinations = [ - ("python", "scikit-learn"), + ("python", "scikit-learn"), # todo: remove ("python", "pytorch"), ("python", "tensorflow"), ("java", "tensorflow"), ] - if data["source"] is None: - valid_combinations.append((None, None)) - valid_combinations.append(("python", None)) # todo: in py3.9 use typing.get_args(raw_nodes.Langauge) - valid_combinations.append(("java", None)) + if "source" not in data: + valid_combinations.append((missing_, missing_)) + valid_combinations.append(("python", missing_)) + valid_combinations.append(("java", missing_)) - combination = tuple(data[name] for name in field_names) + combination = tuple(data.get(name, missing_) for name in field_names) if combination not in valid_combinations: raise ValidationError(f"invalid combination of {dict(zip(field_names, combination))}") @validates_schema def source_specified_if_required(self, data, **kwargs): - if data["source"] is not None: + if "source" in data: return weight_format_requires_source = { - "pickle": True, + "pickle": True, # todo: remove "pytorch_state_dict": True, "pytorch_script": False, "keras_hdf5": False, @@ -635,9 +611,12 @@ def source_specified_if_required(self, data, **kwargs): def validate_reference_tensor_names(self, data, **kwargs): valid_input_tensor_references = [ipt.name for ipt in data["inputs"]] for out in data["outputs"]: + if out.postprocessing is missing_: + continue + for postpr in out.postprocessing: - ref_tensor = postpr.kwargs.get("reference_tensor", None) - if ref_tensor is not None and ref_tensor not in valid_input_tensor_references: + ref_tensor = postpr.kwargs.get("reference_tensor", missing_) + if ref_tensor is not missing_ and ref_tensor not in valid_input_tensor_references: raise ValidationError(f"{ref_tensor} not found in inputs") @validates_schema @@ -645,19 +624,19 @@ def weights_entries_match_weights_formats(self, data, **kwargs): weights: typing.Dict[str, WeightsEntry] = data["weights"] for weights_format, weights_entry in weights.items(): if weights_format in ["keras_hdf5", "tensorflow_js", "tensorflow_saved_model_bundle"]: - if weights_entry.tensorflow_version is None: + if weights_entry.tensorflow_version is missing_: # todo: raise ValidationError (allow -> require)? warnings.warn(f"missing 'tensorflow_version' entry for weights format {weights_format}") else: - if weights_entry.tensorflow_version is not None: + if weights_entry.tensorflow_version is not missing_: raise ValidationError(f"invalid 'tensorflow_version' entry for weights format {weights_format}") if weights_format == "onnx": - if weights_entry.opset_version is None: + if weights_entry.opset_version is missing_: # todo: raise ValidationError? warnings.warn(f"missing 'opset_version' entry for weights format {weights_format}") else: - if weights_entry.opset_version is not None: + if weights_entry.opset_version is not missing_: raise ValidationError( f"invalid 'opset_version' entry for weights format {weights_format} (only valid for onnx)" ) @@ -667,7 +646,7 @@ def weights_entries_match_weights_formats(self, data, **kwargs): class BioImageIoManifestModelEntry(PyBioSchema): id = fields.String(required=True) source = fields.String(validate=field_validators.URL(schemes=["http", "https"])) - links = fields.List(fields.String, missing=list) + links = fields.List(fields.String) download_url = fields.String(validate=field_validators.URL(schemes=["http", "https"])) @@ -689,24 +668,24 @@ class BioImageIoManifestNotebookEntry(PyBioSchema): ) description = fields.String(required=True) - cite = fields.List(fields.Nested(CiteEntry), missing=list) + cite = fields.List(fields.Nested(CiteEntry)) authors = fields.List(fields.Nested(Author), required=True) - covers = fields.List(fields.URI, missing=list) + covers = fields.List(fields.URI) - badges = fields.List(fields.Nested(Badge), missing=list) - tags = fields.List(fields.String, missing=list) + badges = fields.List(fields.Nested(Badge)) + tags = fields.List(fields.String) source = fields.URI(required=True) - links = fields.List(fields.String, missing=list) # todo: make List[URI]? + links = fields.List(fields.String) # todo: make List[URI]? class BioImageIoManifest(PyBioSchema): format_version = fields.String( validate=field_validators.OneOf(get_args(raw_nodes.ManifestFormatVersion)), required=True ) - config = fields.Dict(missing=dict) + config = fields.Dict() - application = fields.List(fields.Dict, missing=list) - collection = fields.List(fields.Dict, missing=list) - model = fields.List(fields.Nested(BioImageIoManifestModelEntry), missing=list) - dataset = fields.List(fields.Dict, missing=list) - notebook = fields.List(fields.Nested(BioImageIoManifestNotebookEntry), missing=list) + application = fields.List(fields.Dict) + collection = fields.List(fields.Dict) + model = fields.List(fields.Nested(BioImageIoManifestModelEntry)) + dataset = fields.List(fields.Dict) + notebook = fields.List(fields.Nested(BioImageIoManifestNotebookEntry)) diff --git a/specs/models/sklearn/RandomForestClassifier_v0_3_1.model.yaml b/specs/models/sklearn/RandomForestClassifier_v0_3_1.model.yaml index 07ab8aa03..2e09b247b 100644 --- a/specs/models/sklearn/RandomForestClassifier_v0_3_1.model.yaml +++ b/specs/models/sklearn/RandomForestClassifier_v0_3_1.model.yaml @@ -15,7 +15,6 @@ license: MIT documentation: sklearnbased.md covers: [] -attachments: {} inputs: - name: raw diff --git a/specs/models/sklearn/RandomForestClassifier_v0_3_2.model.yaml b/specs/models/sklearn/RandomForestClassifier_v0_3_2.model.yaml index 164c58167..6caac0148 100644 --- a/specs/models/sklearn/RandomForestClassifier_v0_3_2.model.yaml +++ b/specs/models/sklearn/RandomForestClassifier_v0_3_2.model.yaml @@ -15,7 +15,6 @@ license: MIT documentation: sklearnbased.md covers: [] -attachments: {} inputs: - name: raw @@ -74,4 +73,3 @@ weights: sha256: abcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefgh type: model -version: null diff --git a/tests/conftest.py b/tests/conftest.py index bf1e721ef..8882e875a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,6 +22,11 @@ def rf_config_path_v0_3_2(): return Path(__file__).parent / "../specs/models/sklearn/RandomForestClassifier_v0_3_2.model.yaml" +@pytest.fixture +def rf_config_path_v0_3(rf_config_path_v0_3_2): + return rf_config_path_v0_3_2 + + @pytest.fixture def rf_config_path(rf_config_path_v0_3_2): return rf_config_path_v0_3_2 diff --git a/tests/test_dump_spec.py b/tests/test_dump_spec.py index a59bd42b7..3fd999e7c 100644 --- a/tests/test_dump_spec.py +++ b/tests/test_dump_spec.py @@ -15,29 +15,9 @@ def test_spec_roundtrip(rf_config_path): # yaml.dump(serialized, Path() / "serialized.yml") - # manually remove all inserted defaults to test round trip at raw data level - serialized_wo_defaults = deepcopy(serialized) - serialized_wo_defaults["cite"][0].pop("doi") - serialized_wo_defaults.pop("config") - serialized_wo_defaults["inputs"][0].pop("preprocessing") - serialized_wo_defaults["outputs"][0].pop("halo") - serialized_wo_defaults["outputs"][0].pop("postprocessing") - serialized_wo_defaults.pop("packaged_by") - serialized_wo_defaults.pop("parent") - serialized_wo_defaults.pop("run_mode") - serialized_wo_defaults.pop("sample_inputs") - serialized_wo_defaults.pop("sample_outputs") - serialized_wo_defaults.pop("sha256") - serialized_wo_defaults["weights"]["pickle"].pop("attachments") - serialized_wo_defaults["weights"]["pickle"].pop("authors") - serialized_wo_defaults["weights"]["pickle"].pop("opset_version") - serialized_wo_defaults["weights"]["pickle"].pop("parent") - serialized_wo_defaults["weights"]["pickle"].pop("tensorflow_version") - - assert serialized_wo_defaults == data + assert serialized == data assert not schema.Model().validate(serialized) - assert not schema.Model().validate(serialized_wo_defaults) raw_model_from_serialized, _ = load_raw_model(serialized) assert raw_model_from_serialized == raw_model diff --git a/tests/test_format_version_conversion.py b/tests/test_format_version_conversion.py index 543421434..51de25e8a 100644 --- a/tests/test_format_version_conversion.py +++ b/tests/test_format_version_conversion.py @@ -8,9 +8,9 @@ yaml = YAML(typ="safe") -def test_model_nodes_format_0_1_to_0_3(rf_config_path_v0_1, rf_config_path): +def test_model_nodes_format_0_1_to_0_3(rf_config_path_v0_1, rf_config_path_v0_3): rf_model_data_v0_1 = yaml.load(rf_config_path_v0_1) - rf_model_data = yaml.load(rf_config_path) + rf_model_data = yaml.load(rf_config_path_v0_3) expected = asdict(schema.Model().load(rf_model_data)) converted_data = maybe_convert_model(rf_model_data_v0_1) @@ -24,9 +24,9 @@ def test_model_nodes_format_0_1_to_0_3(rf_config_path_v0_1, rf_config_path): out["description"] = out["name"] for key, item in expected.items(): - assert key in actual - assert actual[key] == item + assert key in actual, key + assert actual[key] == item, key for key, item in actual.items(): assert key in expected - assert expected[key] == item + assert item == expected[key] diff --git a/tests/test_load_spec.py b/tests/test_load_spec.py index b101164e9..9f9fe5966 100644 --- a/tests/test_load_spec.py +++ b/tests/test_load_spec.py @@ -12,7 +12,7 @@ def test_load_non_existing_spec(): def test_load_non_valid_spec_name(): - spec_path = "some/none/existing/path/to/spec.not_valid.yaml" + spec_path = "some/none/existing/path/to/spec.not_valid_suffix" with pytest.raises(ValidationError): load_model(spec_path) diff --git a/tests/test_schema.py b/tests/test_schema.py index 6486ea3ba..5782ad780 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -1,6 +1,8 @@ import pytest from datetime import datetime +from marshmallow import missing + from bioimageio.spec import nodes, schema @@ -41,22 +43,20 @@ def test_tensor_schema_preprocessing(): "data_type": "float32", "axes": "xyc", "shape": [128, 128, 3], - "preprocessing": [], }, {"name": "input_1", "description": "Input 1", "data_type": "float32", "axes": "xyc", "shape": [128, 128, 3]}, ], ) def test_tensor_schema_no_preprocessing(data): validated_data = schema.InputTensor().load(data) - assert isinstance(validated_data.preprocessing, list) - assert len(validated_data.preprocessing) == 0 + assert validated_data.preprocessing is missing @pytest.mark.parametrize("schema_instance", [schema.InputTensor(), schema.OutputTensor()]) def test_tensor_schema_optional_description(schema_instance): data = {"name": "input_1", "data_type": "float32", "axes": "xyc", "shape": [128, 128, 3]} validated_data = schema_instance.load(data) - assert validated_data.description is None + assert validated_data.description is missing @pytest.fixture From d0e6b53b7bf5f200e95d47ed312c118a309e4ce8 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 17 Jun 2021 11:58:25 +0200 Subject: [PATCH 07/14] rename PyBioSchema -> BioImageIOSchema --- bioimageio/spec/shared/schema.py | 6 ++--- bioimageio/spec/v0_3/schema.py | 44 ++++++++++++++++---------------- scripts/generate_docs.py | 6 ++--- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/bioimageio/spec/shared/schema.py b/bioimageio/spec/shared/schema.py index 828b2febb..70a1d3a3b 100644 --- a/bioimageio/spec/shared/schema.py +++ b/bioimageio/spec/shared/schema.py @@ -6,7 +6,7 @@ from . import raw_nodes -class SharedPyBioSchema(Schema): +class SharedBioImageIOSchema(Schema): raw_nodes: ModuleType = raw_nodes # should be overwritten in subclass by version specific raw nodes module bioimageio_description: str = "" @@ -31,7 +31,7 @@ def make_object(self, data, **kwargs): raise e -class ImplicitInputShape(SharedPyBioSchema): +class ImplicitInputShape(SharedBioImageIOSchema): min = fields.List( fields.Integer, required=True, bioimageio_description="The minimum input shape with same length as `axes`" ) @@ -50,7 +50,7 @@ def matching_lengths(self, data, **kwargs): raise ValidationError(f"'min' and 'step' have to have the same length! (min: {min_}, step: {step})") -class ImplicitOutputShape(SharedPyBioSchema): +class ImplicitOutputShape(SharedBioImageIOSchema): reference_input = fields.String(required=True, bioimageio_description="Name of the reference input tensor.") scale = fields.List( fields.Float, required=True, bioimageio_description="'output_pix/input_pix' for each dimension." diff --git a/bioimageio/spec/v0_3/schema.py b/bioimageio/spec/v0_3/schema.py index cd865d11d..34c47d2d1 100644 --- a/bioimageio/spec/v0_3/schema.py +++ b/bioimageio/spec/v0_3/schema.py @@ -6,7 +6,7 @@ from spdx_license_list import LICENSES from bioimageio.spec.shared import field_validators, fields -from bioimageio.spec.shared.schema import SharedPyBioSchema +from bioimageio.spec.shared.schema import SharedBioImageIOSchema from . import raw_nodes try: @@ -15,11 +15,11 @@ from typing_extensions import get_args -class PyBioSchema(SharedPyBioSchema): +class BioImageIOSchema(SharedBioImageIOSchema): raw_nodes = raw_nodes -class Author(PyBioSchema): +class Author(BioImageIOSchema): name = fields.String(required=True, bioimageio_description="Full name.") affiliation = fields.String(bioimageio_description="Affiliation.") orcid = fields.String( @@ -35,7 +35,7 @@ class Author(PyBioSchema): ) -class CiteEntry(PyBioSchema): +class CiteEntry(BioImageIOSchema): text = fields.String(required=True) doi = fields.String(bioimageio_maybe_required=True) url = fields.String(bioimageio_maybe_required=True) @@ -46,14 +46,14 @@ def doi_or_url(self, data, **kwargs): raise ValidationError("doi or url needs to be specified in a citation") -class RunMode(PyBioSchema): +class RunMode(BioImageIOSchema): name = fields.String( required=True, bioimageio_description="The name of the `run_mode`" ) # todo: limit valid run mode names kwargs = fields.Kwargs() -class RDF(PyBioSchema): +class RDF(BioImageIOSchema): """not the reference for RDF; todo: match definition of rdf json schema; move other fields to Model""" format_version = fields.String( @@ -167,12 +167,12 @@ def warn_about_deprecated_spdx_license(self, value: str): ) -class SpecWithKwargs(PyBioSchema): +class SpecWithKwargs(BioImageIOSchema): spec: fields.SpecURI kwargs = fields.Kwargs() -class Tensor(PyBioSchema): +class Tensor(BioImageIOSchema): name = fields.String( required=True, validate=field_validators.Predicate("isidentifier"), bioimageio_description="Tensor name." ) @@ -219,16 +219,16 @@ def validate_processing_kwargs(self, data, **kwargs): raise ValidationError("`kwargs.axes` needs to be subset of axes") -class Processing(PyBioSchema): - class Binarize(Schema): # do not inherit from PyBioSchema, return only a validated dict, no specific node +class Processing(BioImageIOSchema): + class Binarize(Schema): # do not inherit from BioImageIOSchema, return only a validated dict, no specific node # todo: inherit from a "TransformSchema" that allows generation of docs for pre and postprocessing threshold = fields.Float(required=True) - class Clip(PyBioSchema): + class Clip(BioImageIOSchema): min = fields.Float(required=True) max = fields.Float(required=True) - class ScaleLinear(PyBioSchema): + class ScaleLinear(BioImageIOSchema): axes = fields.Axes(required=True, valid_axes="czyx") gain = fields.Array(fields.Float(), missing=fields.Float(missing=1.0)) # todo: check if gain match input axes offset = fields.Array( @@ -255,10 +255,10 @@ def kwargs_match_selected_preprocessing_name(self, data, **kwargs): if kwargs_validation_errors: raise ValidationError(f"Invalid `kwargs` for '{data['name']}': {kwargs_validation_errors}") - class Sigmoid(PyBioSchema): + class Sigmoid(BioImageIOSchema): pass - class ZeroMeanUnitVariance(PyBioSchema): + class ZeroMeanUnitVariance(BioImageIOSchema): mode = fields.ProcMode(required=True) axes = fields.Axes(required=True, valid_axes="czyx") mean = fields.Array(fields.Float()) # todo: check if means match input axes (for mode 'fixed') @@ -288,7 +288,7 @@ class Preprocessing(Processing): ) kwargs = fields.Kwargs() - class ScaleRange(PyBioSchema): + class ScaleRange(BioImageIOSchema): mode = fields.ProcMode(required=True, valid_modes=("per_dataset", "per_sample")) axes = fields.Axes(required=True, valid_axes="czyx") min_percentile = fields.Float( @@ -320,7 +320,7 @@ class Postprocessing(Processing): class ScaleRange(Preprocessing.ScaleRange): reference_tensor: fields.String(required=True, validate=field_validators.Predicate("isidentifier")) - class ScaleMeanVariance(PyBioSchema): + class ScaleMeanVariance(BioImageIOSchema): mode = fields.ProcMode(required=True, valid_modes=("per_dataset", "per_sample")) reference_tensor: fields.String(required=True, validate=field_validators.Predicate("isidentifier")) @@ -421,7 +421,7 @@ def matching_halo_length(self, data, **kwargs): ) -class WeightsEntry(PyBioSchema): +class WeightsEntry(BioImageIOSchema): authors = fields.List( fields.Nested(Author), bioimageio_description="A list of authors. If this is the root weight (it does not have a `parent` field): the " @@ -448,7 +448,7 @@ class WeightsEntry(PyBioSchema): tensorflow_version = fields.StrictVersion() -class ModelParent(PyBioSchema): +class ModelParent(BioImageIOSchema): uri = fields.URI( bioimageio_description="Url of another model available on bioimage.io or path to a local model in the " "bioimage.io specification. If it is a url, it needs to be a github url linking to the page containing the " @@ -643,20 +643,20 @@ def weights_entries_match_weights_formats(self, data, **kwargs): # Manifest -class BioImageIoManifestModelEntry(PyBioSchema): +class BioImageIoManifestModelEntry(BioImageIOSchema): id = fields.String(required=True) source = fields.String(validate=field_validators.URL(schemes=["http", "https"])) links = fields.List(fields.String) download_url = fields.String(validate=field_validators.URL(schemes=["http", "https"])) -class Badge(PyBioSchema): +class Badge(BioImageIOSchema): label = fields.String(required=True) icon = fields.URI() url = fields.URI() -class BioImageIoManifestNotebookEntry(PyBioSchema): +class BioImageIoManifestNotebookEntry(BioImageIOSchema): id = fields.String(required=True) name = fields.String(required=True) documentation = fields.RelativeLocalPath( @@ -678,7 +678,7 @@ class BioImageIoManifestNotebookEntry(PyBioSchema): links = fields.List(fields.String) # todo: make List[URI]? -class BioImageIoManifest(PyBioSchema): +class BioImageIoManifest(BioImageIOSchema): format_version = fields.String( validate=field_validators.OneOf(get_args(raw_nodes.ManifestFormatVersion)), required=True ) diff --git a/scripts/generate_docs.py b/scripts/generate_docs.py index 04b334dca..69f0b02ba 100644 --- a/scripts/generate_docs.py +++ b/scripts/generate_docs.py @@ -41,11 +41,11 @@ def doc_from_schema(obj) -> typing.Union[typing.Dict[str, DocNode], DocNode]: details = [] sub_docs = [] required = True - if inspect.isclass(obj) and issubclass(obj, schema.SharedPyBioSchema): + if inspect.isclass(obj) and issubclass(obj, schema.SharedBioImageIOSchema): obj = obj() - if isinstance(obj, schema.SharedPyBioSchema): + if isinstance(obj, schema.SharedBioImageIOSchema): def sort_key(name_and_nested_field): name, nested_field = name_and_nested_field @@ -115,7 +115,7 @@ def markdown_from_doc(doc: DocNode, indent: int = 0): return f"{type_name}{doc.description}\n{sub_doc}" -def markdown_from_schema(schema: schema.SharedPyBioSchema) -> str: +def markdown_from_schema(schema: schema.SharedBioImageIOSchema) -> str: doc = doc_from_schema(schema) return markdown_from_doc(doc) From ce7b743922150ab1bd5518ab7b9b22ad62a8b9de Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 17 Jun 2021 12:31:03 +0200 Subject: [PATCH 08/14] actually make type field mandatory --- bioimageio/spec/v0_3/schema.py | 2 +- tests/test_schema.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/bioimageio/spec/v0_3/schema.py b/bioimageio/spec/v0_3/schema.py index 34c47d2d1..86f1adee8 100644 --- a/bioimageio/spec/v0_3/schema.py +++ b/bioimageio/spec/v0_3/schema.py @@ -159,7 +159,7 @@ def warn_about_deprecated_spdx_license(self, value: str): bioimageio_description="Timestamp of the initial creation of this model in [ISO 8601]" "(#https://en.wikipedia.org/wiki/ISO_8601) format.", ) - type = fields.String(validate=field_validators.OneOf(get_args(raw_nodes.Type))) + type = fields.String(required=True, validate=field_validators.OneOf(get_args(raw_nodes.Type))) version = fields.StrictVersion( bioimageio_description="The version number of the model. The version number format must be a string in " "`MAJOR.MINOR.PATCH` format following the guidelines in Semantic Versioning 2.0.0 (see https://semver.org/), " diff --git a/tests/test_schema.py b/tests/test_schema.py index 5782ad780..eb83640f1 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -96,6 +96,7 @@ def model_dict(): "weights": {}, "test_inputs": [], "test_outputs": [], + "type": "model", } From f62324735c2767782f68c38edc4e8774ac300833 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 17 Jun 2021 12:31:21 +0200 Subject: [PATCH 09/14] set a default for raw model to avoid raw_nodes.Model(... type="model") --- bioimageio/spec/v0_3/raw_nodes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bioimageio/spec/v0_3/raw_nodes.py b/bioimageio/spec/v0_3/raw_nodes.py index 5e92fc59b..827480485 100644 --- a/bioimageio/spec/v0_3/raw_nodes.py +++ b/bioimageio/spec/v0_3/raw_nodes.py @@ -163,6 +163,7 @@ class Model(RDF): source: Union[_Missing, ImportableSource] = missing test_inputs: List[URI] = missing test_outputs: List[URI] = missing + type: Type = "model" weights: Dict[WeightsFormat, WeightsEntry] = missing From 2f33382319224edd8eb3e9b84342e9e1f7c49848 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 17 Jun 2021 12:32:27 +0200 Subject: [PATCH 10/14] black --- bioimageio/spec/v0_3/schema.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/bioimageio/spec/v0_3/schema.py b/bioimageio/spec/v0_3/schema.py index 86f1adee8..906a61783 100644 --- a/bioimageio/spec/v0_3/schema.py +++ b/bioimageio/spec/v0_3/schema.py @@ -328,8 +328,7 @@ class ScaleMeanVariance(BioImageIOSchema): class InputTensor(Tensor): shape = fields.InputShape(required=True, bioimageio_description="Specification of tensor shape.") preprocessing = fields.List( - fields.Nested(Preprocessing), - bioimageio_description="Description of how this input should be preprocessed.", + fields.Nested(Preprocessing), bioimageio_description="Description of how this input should be preprocessed." ) processing_name = "preprocessing" @@ -372,8 +371,7 @@ class OutputTensor(Tensor): "`shape:offset` if the model output itself is cropped and input and output shapes not fixed.", ) postprocessing = fields.List( - fields.Nested(Postprocessing), - bioimageio_description="Description of how this output should be postprocessed.", + fields.Nested(Postprocessing), bioimageio_description="Description of how this output should be postprocessed." ) processing_name = "postprocessing" @@ -431,13 +429,13 @@ class WeightsEntry(BioImageIOSchema): attachments = fields.Dict( bioimageio_description="Dictionary of text keys and URI (or a list of URI) values to additional, relevant " "files that are specific to the current weight format. A list of URIs can be listed under the `files` key to " - "included additional files for generating the model package.", + "included additional files for generating the model package." ) parent = fields.String( bioimageio_description="The source weights used as input for converting the weights to this format. For " "example, if the weights were converted from the format `pytorch_state_dict` to `pytorch_script`, the parent " "is `pytorch_state_dict`. All weight entries except one (the initial set of weights resulting from training " - "the model), need to have this field.", + "the model), need to have this field." ) opset_version = fields.Number() # ONNX Specific sha256 = fields.String( From 00cee49fd885d45c7803e6fb0a3ce3316970f7d1 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 17 Jun 2021 12:33:39 +0200 Subject: [PATCH 11/14] raise TypeError on missing required Node inputs --- bioimageio/spec/shared/raw_nodes.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/bioimageio/spec/shared/raw_nodes.py b/bioimageio/spec/shared/raw_nodes.py index cb5006aa8..6b7e196ff 100644 --- a/bioimageio/spec/shared/raw_nodes.py +++ b/bioimageio/spec/shared/raw_nodes.py @@ -1,15 +1,25 @@ """shared raw nodes that shared transformer act on""" - +import dataclasses from dataclasses import dataclass from pathlib import Path -from typing import List +from typing import List, Union + +try: + from typing import get_args, get_origin +except ImportError: + from typing_extensions import get_args, get_origin from marshmallow import missing @dataclass class Node: - pass + def __post_init__(self): + for f in dataclasses.fields(self): + if getattr(self, f.name) is missing and ( + get_origin(f.type) is not Union or not isinstance(missing, get_args(f.type)) + ): + raise TypeError(f"{self.__class__}.__init__() missing required argument: '{f.name}'") @dataclass From 5b40a3167050b1e0435a832a36be4d8a3a1e1947 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 17 Jun 2021 14:08:03 +0200 Subject: [PATCH 12/14] improve build_spec build all raw_node objects --- bioimageio/spec/latest/build_spec.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/bioimageio/spec/latest/build_spec.py b/bioimageio/spec/latest/build_spec.py index 08d08c327..21d599000 100644 --- a/bioimageio/spec/latest/build_spec.py +++ b/bioimageio/spec/latest/build_spec.py @@ -213,13 +213,18 @@ def _get_output_tensor(test_out, name, reference_input, scale, offset, axes, dat return outputs +def _build_authors(authors: List[Dict[str, str]]): + return [spec.raw_nodes.Author(**a) for a in authors] + + # TODO The citation entry should be improved so that we can properly derive doi vs. url -def _build_cite(cite): +def _build_cite(cite: Dict[str, str]): citation_list = [spec.raw_nodes.CiteEntry(text=k, url=v) for k, v in cite.items()] return citation_list # TODO we should make the name more specific: "build_model_spec"? +# TODO maybe "build_raw_model" as it return raw_nodes.Model # NOTE does not support multiple input / output tensors yet # to implement this we should wait for 0.4.0, see also # https://github.com/bioimage-io/spec-bioimage-io/issues/70#issuecomment-825737433 @@ -385,8 +390,13 @@ def build_spec( } kwargs = {k: v for k, v in optional_kwargs.items() if v is not None} - # build the citation object + # build raw_nodes objects + authors = _build_authors(authors) cite = _build_cite(cite) + documentation = spec.fields.URI().deserialize(documentation) + covers = [spec.fields.URI().deserialize(uri) for uri in covers] + test_inputs = [spec.fields.URI().deserialize(uri) for uri in test_inputs] + test_outputs = [spec.fields.URI().deserialize(uri) for uri in test_outputs] model = spec.raw_nodes.Model( format_version=format_version, @@ -415,15 +425,8 @@ def build_spec( return model -def add_weights( - model, - weight_uri: str, - root: Optional[str] = None, - weight_type: Optional[str] = None, - **weight_kwargs -): - """ Add weight entry to bioimage.io model. - """ +def add_weights(model, weight_uri: str, root: Optional[str] = None, weight_type: Optional[str] = None, **weight_kwargs): + """Add weight entry to bioimage.io model.""" new_weights = _get_weights(weight_uri, weight_type, None, root, **weight_kwargs)[0] model.weights.update(new_weights) @@ -439,5 +442,5 @@ def serialize_spec(model, out_path, clear_defaults=True): if clear_defaults: defaults = ([], {}, None) cleared = remap(serialized, visit=lambda p, k, v: v not in defaults) - with open(out_path, 'w') as f: + with open(out_path, "w") as f: spec.utils.yaml.dump(cleared, f) From e88579e59f51530d3233e7666fad79be07953f7f Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 17 Jun 2021 14:14:18 +0200 Subject: [PATCH 13/14] remove clearing of default values in serialization --- bioimageio/spec/latest/build_spec.py | 14 +++++--------- setup.py | 1 - 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/bioimageio/spec/latest/build_spec.py b/bioimageio/spec/latest/build_spec.py index 21d599000..9fe82ccb3 100644 --- a/bioimageio/spec/latest/build_spec.py +++ b/bioimageio/spec/latest/build_spec.py @@ -1,11 +1,12 @@ -import os import datetime import hashlib +import os from typing import Any, Dict, List, Optional, Union import numpy as np + import bioimageio.spec as spec -from boltons.iterutils import remap + # # utility functions to build the spec from python @@ -436,11 +437,6 @@ def add_weights(model, weight_uri: str, root: Optional[str] = None, weight_type: return model -def serialize_spec(model, out_path, clear_defaults=True): +def serialize_spec(model, out_path): # TODO change name to include model (see build_model_spec) serialized = spec.schema.Model().dump(model) - # clear the default values using boltons remap - if clear_defaults: - defaults = ([], {}, None) - cleared = remap(serialized, visit=lambda p, k, v: v not in defaults) - with open(out_path, "w") as f: - spec.utils.yaml.dump(cleared, f) + spec.utils.yaml.dump(serialized, out_path) diff --git a/setup.py b/setup.py index a7557317a..06fd045d8 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,6 @@ packages=find_namespace_packages(exclude=["tests"]), # Required install_requires=[ "PyYAML>=5.2", - "boltons", "imageio>=2.5", "marshmallow>=3.6.0,<4.0", "marshmallow_jsonschema", From 0316007f3141744ccea644da1aa26a5a4d2454d0 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 17 Jun 2021 16:16:22 +0200 Subject: [PATCH 14/14] fix documentation field in build_spec --- bioimageio/spec/latest/build_spec.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bioimageio/spec/latest/build_spec.py b/bioimageio/spec/latest/build_spec.py index 9fe82ccb3..68a8ef687 100644 --- a/bioimageio/spec/latest/build_spec.py +++ b/bioimageio/spec/latest/build_spec.py @@ -1,6 +1,7 @@ import datetime import hashlib import os +from pathlib import Path from typing import Any, Dict, List, Optional, Union import numpy as np @@ -394,7 +395,7 @@ def build_spec( # build raw_nodes objects authors = _build_authors(authors) cite = _build_cite(cite) - documentation = spec.fields.URI().deserialize(documentation) + documentation = Path(documentation) covers = [spec.fields.URI().deserialize(uri) for uri in covers] test_inputs = [spec.fields.URI().deserialize(uri) for uri in test_inputs] test_outputs = [spec.fields.URI().deserialize(uri) for uri in test_outputs]