From 738ea6411450157f29c4a5328da452f69305d8e0 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@embl.de>
Date: Tue, 15 Jun 2021 15:48:37 +0200
Subject: [PATCH 01/14] Add utility function for adding additional weight
 formats to model spec

---
 bioimageio/spec/latest/__init__.py   |  2 +-
 bioimageio/spec/latest/build_spec.py | 22 +++++++++++++++++++++-
 bioimageio/spec/v0_3/utils.py        |  6 +++---
 3 files changed, 25 insertions(+), 5 deletions(-)
diff --git a/bioimageio/spec/latest/__init__.py b/bioimageio/spec/latest/__init__.py
index 1a9f68d18..a12252c4f 100644
--- a/bioimageio/spec/latest/__init__.py
+++ b/bioimageio/spec/latest/__init__.py
@@ -1,2 +1,2 @@
 from bioimageio.spec.v0_3 import *  # noqa
-from .build_spec import build_spec
+from .build_spec import build_spec, add_weights
diff --git a/bioimageio/spec/latest/build_spec.py b/bioimageio/spec/latest/build_spec.py
index 3235ebc43..11aa3d00d 100644
--- a/bioimageio/spec/latest/build_spec.py
+++ b/bioimageio/spec/latest/build_spec.py
@@ -48,7 +48,6 @@ def _infer_weight_type(path):
         raise ValueError(f"Could not infer weight type from extension {ext} for weight file {path}")
 
 
-# TODO extend supported weight types
 def _get_weights(weight_uri, weight_type, source, root, **kwargs):
     weight_path = _get_local_path(weight_uri, root)
     if weight_type is None:
@@ -219,6 +218,7 @@ def _build_cite(cite):
     return citation_list
 
 
+# TODO we should make the name more specific: "build_model_spec"?
 # NOTE does not support multiple input / output tensors yet
 # to implement this we should wait for 0.4.0, see also
 # https://github.com/bioimage-io/spec-bioimage-io/issues/70#issuecomment-825737433
@@ -412,3 +412,23 @@ def build_spec(
     model = spec.schema.Model().load(serialized)
 
     return model
+
+
+def add_weights(
+    model: spec.raw_nodes.Model,
+    weight_uri: str,
+    root: Optional[str] = None,
+    weight_type: Optional[str] = None,
+    **weight_kwargs
+):
+    """ Add weight entry to bioimage.io model.
+    """
+    new_weights = _get_weights(weight_uri, weight_type, None, root, **weight_kwargs)[0]
+    model.weights.update(new_weights)
+
+    # FIXME this fails with
+    # ImportedSource(factory=<class 'user_imports.5e008e787272408180a19fd72b83134b.UNet2d'>) has unexpected type <class 'bioimageio.spec.shared.nodes.ImportedSource'>
+    serialized = spec.schema.Model().dump(model)
+    model = spec.schema.Model().load(serialized)
+
+    return model
diff --git a/bioimageio/spec/v0_3/utils.py b/bioimageio/spec/v0_3/utils.py
index d1ab884e3..203cb77a4 100644
--- a/bioimageio/spec/v0_3/utils.py
+++ b/bioimageio/spec/v0_3/utils.py
@@ -35,9 +35,9 @@ def _(source: dict, root_path: Optional[pathlib.Path] = None) -> Tuple[raw_nodes
 def _(source: os.PathLike, root_path: Optional[pathlib.Path] = None) -> Tuple[raw_nodes.Model, pathlib.Path]:
     source = pathlib.Path(source)
 
-    suffixes = source.suffixes
-    if len(suffixes) < 2 or suffixes[-1] not in (".yml", ".yaml") or source.suffixes[-2] != ".model":
-        raise ValidationError(f"invalid suffixes {''.join(suffixes)} for source {source}")
+    # suffixes = source.suffixes
+    # if len(suffixes) < 2 or suffixes[-1] not in (".yml", ".yaml") or source.suffixes[-2] != ".model":
+    #     raise ValidationError(f"invalid suffixes {''.join(suffixes)} for source {source}")
 
     data = yaml.load(source)
 

From f981dd4b7bfae41f8cc40f0f30c405b39a777f98 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@embl.de>
Date: Tue, 15 Jun 2021 18:03:50 +0200
Subject: [PATCH 02/14] Add model serialization function that removes empty
 defaults

---
 bioimageio/spec/latest/__init__.py   |  2 +-
 bioimageio/spec/latest/build_spec.py | 15 ++++++++++++---
 setup.py                             |  1 +
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/bioimageio/spec/latest/__init__.py b/bioimageio/spec/latest/__init__.py
index a12252c4f..54b35e232 100644
--- a/bioimageio/spec/latest/__init__.py
+++ b/bioimageio/spec/latest/__init__.py
@@ -1,2 +1,2 @@
 from bioimageio.spec.v0_3 import *  # noqa
-from .build_spec import build_spec, add_weights
+from .build_spec import add_weights, build_spec, serialize_spec
diff --git a/bioimageio/spec/latest/build_spec.py b/bioimageio/spec/latest/build_spec.py
index 11aa3d00d..08d08c327 100644
--- a/bioimageio/spec/latest/build_spec.py
+++ b/bioimageio/spec/latest/build_spec.py
@@ -5,6 +5,7 @@
 
 import numpy as np
 import bioimageio.spec as spec
+from boltons.iterutils import remap
 
 #
 # utility functions to build the spec from python
@@ -415,7 +416,7 @@ def build_spec(
 
 
 def add_weights(
-    model: spec.raw_nodes.Model,
+    model,
     weight_uri: str,
     root: Optional[str] = None,
     weight_type: Optional[str] = None,
@@ -426,9 +427,17 @@ def add_weights(
     new_weights = _get_weights(weight_uri, weight_type, None, root, **weight_kwargs)[0]
     model.weights.update(new_weights)
 
-    # FIXME this fails with
-    # ImportedSource(factory=<class 'user_imports.5e008e787272408180a19fd72b83134b.UNet2d'>) has unexpected type <class 'bioimageio.spec.shared.nodes.ImportedSource'>
     serialized = spec.schema.Model().dump(model)
     model = spec.schema.Model().load(serialized)
 
     return model
+
+
+def serialize_spec(model, out_path, clear_defaults=True):
+    serialized = spec.schema.Model().dump(model)
+    # clear the default values using boltons remap
+    if clear_defaults:
+        defaults = ([], {}, None)
+        cleared = remap(serialized, visit=lambda p, k, v: v not in defaults)
+    with open(out_path, 'w') as f:
+        spec.utils.yaml.dump(cleared, f)
diff --git a/setup.py b/setup.py
index 06fd045d8..a7557317a 100644
--- a/setup.py
+++ b/setup.py
@@ -22,6 +22,7 @@
     packages=find_namespace_packages(exclude=["tests"]),  # Required
     install_requires=[
         "PyYAML>=5.2",
+        "boltons",
         "imageio>=2.5",
         "marshmallow>=3.6.0,<4.0",
         "marshmallow_jsonschema",

From fa8b190344713cd20522448117d86c033a311b7a Mon Sep 17 00:00:00 2001
From: FynnBe <thefynnbe@gmail.com>
Date: Thu, 17 Jun 2021 10:09:53 +0200
Subject: [PATCH 03/14] do not check for .model suffix but yml/yaml

---
 bioimageio/spec/v0_3/utils.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/bioimageio/spec/v0_3/utils.py b/bioimageio/spec/v0_3/utils.py
index 203cb77a4..1e4c741d1 100644
--- a/bioimageio/spec/v0_3/utils.py
+++ b/bioimageio/spec/v0_3/utils.py
@@ -35,9 +35,8 @@ def _(source: dict, root_path: Optional[pathlib.Path] = None) -> Tuple[raw_nodes
 def _(source: os.PathLike, root_path: Optional[pathlib.Path] = None) -> Tuple[raw_nodes.Model, pathlib.Path]:
     source = pathlib.Path(source)
 
-    # suffixes = source.suffixes
-    # if len(suffixes) < 2 or suffixes[-1] not in (".yml", ".yaml") or source.suffixes[-2] != ".model":
-    #     raise ValidationError(f"invalid suffixes {''.join(suffixes)} for source {source}")
+    if source.suffix not in (".yml", ".yaml"):
+        raise ValidationError(f"invalid suffix {source.suffix} for source {source}")
 
     data = yaml.load(source)
 

From ed9340a3aabf945bbe8cb7f8c940942149a6d4c5 Mon Sep 17 00:00:00 2001
From: fynnbe <thefynnbe@gmail.com>
Date: Thu, 17 Jun 2021 10:29:49 +0200
Subject: [PATCH 04/14] mkdir dist

---
 scripts/generate_docs.py       | 4 +++-
 scripts/generate_json_specs.py | 5 ++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/scripts/generate_docs.py b/scripts/generate_docs.py
index 13277edee..04b334dca 100644
--- a/scripts/generate_docs.py
+++ b/scripts/generate_docs.py
@@ -126,4 +126,6 @@ def export_markdown_docs(folder: Path):
 
 
 if __name__ == "__main__":
-    export_markdown_docs(Path(__file__).parent / "../dist")
+    dist = Path(__file__).parent / "../dist"
+    dist.mkdir(exist_ok=True)
+    export_markdown_docs(dist)
diff --git a/scripts/generate_json_specs.py b/scripts/generate_json_specs.py
index e631ba624..498a10af7 100644
--- a/scripts/generate_json_specs.py
+++ b/scripts/generate_json_specs.py
@@ -16,4 +16,7 @@ def export_json_model_spec(path: Path):
 
 
 if __name__ == "__main__":
-    export_json_model_spec(Path(__file__).parent / f"../dist/model_spec_{bioimageio.spec.__version__}.json")
+    dist = Path(__file__).parent / "../dist"
+    dist.mkdir(exist_ok=True)
+
+    export_json_model_spec(dist / f"model_spec_{bioimageio.spec.__version__}.json")

From 964735f9a1918e6ddcb77fa1b6e62a1a16bfb2e9 Mon Sep 17 00:00:00 2001
From: fynnbe <thefynnbe@gmail.com>
Date: Thu, 17 Jun 2021 10:30:00 +0200
Subject: [PATCH 05/14] warn about ".yml"

---
 bioimageio/spec/v0_3/utils.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/bioimageio/spec/v0_3/utils.py b/bioimageio/spec/v0_3/utils.py
index 1e4c741d1..5971226f3 100644
--- a/bioimageio/spec/v0_3/utils.py
+++ b/bioimageio/spec/v0_3/utils.py
@@ -1,5 +1,6 @@
 import os
 import pathlib
+import warnings
 from functools import singledispatch
 from typing import Optional, Sequence, Tuple
 
@@ -37,6 +38,11 @@ def _(source: os.PathLike, root_path: Optional[pathlib.Path] = None) -> Tuple[ra
 
     if source.suffix not in (".yml", ".yaml"):
         raise ValidationError(f"invalid suffix {source.suffix} for source {source}")
+    elif source.suffix == ".yml":
+        warnings.warn(
+            "suffix '.yml' is not recommended and will raise a ValidationError in the future. Use '.yaml' instead "
+            "(https://yaml.org/faq.html)"
+        )
 
     data = yaml.load(source)
 

From a5fc68ebc6a4540668c7e6a2618aac43c74ae1d1 Mon Sep 17 00:00:00 2001
From: fynnbe <thefynnbe@gmail.com>
Date: Thu, 17 Jun 2021 11:56:19 +0200
Subject: [PATCH 06/14] remove empty default (None, [], {})

---
 bioimageio/spec/shared/fields.py              |   4 +-
 bioimageio/spec/v0_3/converters.py            |   4 +
 bioimageio/spec/v0_3/raw_nodes.py             |  71 ++++-----
 bioimageio/spec/v0_3/schema.py                | 135 ++++++++----------
 .../RandomForestClassifier_v0_3_1.model.yaml  |   1 -
 .../RandomForestClassifier_v0_3_2.model.yaml  |   2 -
 tests/conftest.py                             |   5 +
 tests/test_dump_spec.py                       |  22 +--
 tests/test_format_version_conversion.py       |  10 +-
 tests/test_load_spec.py                       |   2 +-
 tests/test_schema.py                          |   8 +-
 11 files changed, 115 insertions(+), 149 deletions(-)

diff --git a/bioimageio/spec/shared/fields.py b/bioimageio/spec/shared/fields.py
index 97d90e3e4..086fee36e 100644
--- a/bioimageio/spec/shared/fields.py
+++ b/bioimageio/spec/shared/fields.py
@@ -260,8 +260,8 @@ def __init__(self, **super_kwargs):
 
 
 class Kwargs(Dict):
-    def __init__(self, keys=String, missing=dict, bioimageio_description="Key word arguments.", **super_kwargs):
-        super().__init__(keys, missing=missing, bioimageio_description=bioimageio_description, **super_kwargs)
+    def __init__(self, keys=String, bioimageio_description="Key word arguments.", **super_kwargs):
+        super().__init__(keys, bioimageio_description=bioimageio_description, **super_kwargs)
 
 
 class OutputShape(Union):
diff --git a/bioimageio/spec/v0_3/converters.py b/bioimageio/spec/v0_3/converters.py
index d8b9324e0..acc99feef 100644
--- a/bioimageio/spec/v0_3/converters.py
+++ b/bioimageio/spec/v0_3/converters.py
@@ -194,6 +194,10 @@ def maybe_convert_model(data: Dict[str, Any]) -> Dict[str, Any]:
     if config.get("future") == {}:
         del config["future"]
 
+    # remove 'config' if now empty
+    if data.get("config") == {}:
+        del data["config"]
+
     return data
 
 
diff --git a/bioimageio/spec/v0_3/raw_nodes.py b/bioimageio/spec/v0_3/raw_nodes.py
index 292ee150f..5e92fc59b 100644
--- a/bioimageio/spec/v0_3/raw_nodes.py
+++ b/bioimageio/spec/v0_3/raw_nodes.py
@@ -1,9 +1,10 @@
 import distutils.version
 from dataclasses import dataclass
 from datetime import datetime
-from typing import Any, Dict, List, NewType, Optional, Tuple, Union
+from typing import Any, Dict, List, NewType, Tuple, Union
 
 from marshmallow import missing
+from marshmallow.utils import _Missing
 
 from bioimageio.spec.shared.raw_nodes import (
     ImplicitInputShape,
@@ -51,8 +52,8 @@
 @dataclass
 class Author(Node):
     name: str = missing
-    affiliation: Optional[str] = missing
-    orcid: Optional[str] = missing
+    affiliation: Union[_Missing, str] = missing
+    orcid: Union[_Missing, str] = missing
 
 
 ImportableSource = Union[ImportableModule, ImportablePath]
@@ -61,37 +62,37 @@ class Author(Node):
 @dataclass
 class CiteEntry(Node):
     text: str = missing
-    doi: Optional[str] = missing
-    url: Optional[str] = missing
+    doi: Union[_Missing, str] = missing
+    url: Union[_Missing, str] = missing
 
 
 @dataclass
 class RunMode(Node):
     name: str = missing
-    kwargs: Dict[str, Any] = missing
+    kwargs: Union[_Missing, Dict[str, Any]] = missing
 
 
 @dataclass
 class RDF(Node):
-    attachments: Dict[str, Any] = missing
+    attachments: Union[_Missing, Dict[str, Any]] = missing
     authors: List[Author] = missing
     cite: List[CiteEntry] = missing
-    config: dict = missing
-    covers: List[URI] = missing
-    dependencies: Optional[Dependencies] = missing
+    config: Union[_Missing, dict] = missing
+    covers: Union[_Missing, List[URI]] = missing
+    dependencies: Union[_Missing, Dependencies] = missing
     description: str = missing
     documentation: URI = missing
     format_version: FormatVersion = missing
-    framework: Framework = missing
-    git_repo: Optional[str] = missing
-    language: Language = missing
+    framework: Union[_Missing, Framework] = missing
+    git_repo: Union[_Missing, str] = missing
+    language: Union[_Missing, Language] = missing
     license: str = missing
     name: str = missing
-    run_mode: Optional[RunMode] = missing
+    run_mode: Union[_Missing, RunMode] = missing
     tags: List[str] = missing
     timestamp: datetime = missing
     type: Type = missing
-    version: Optional[distutils.version.StrictVersion] = missing
+    version: Union[_Missing, distutils.version.StrictVersion] = missing
 
 
 @dataclass
@@ -112,9 +113,9 @@ class InputTensor:
     data_type: str = missing
     axes: Axes = missing
     shape: Union[List[int], ImplicitInputShape] = missing
-    preprocessing: List[Preprocessing] = missing
-    description: Optional[str] = missing
-    data_range: Tuple[float, float] = missing
+    preprocessing: Union[_Missing, List[Preprocessing]] = missing
+    description: Union[_Missing, str] = missing
+    data_range: Union[_Missing, Tuple[float, float]] = missing
 
 
 @dataclass
@@ -123,24 +124,24 @@ class OutputTensor:
     data_type: str = missing
     axes: Axes = missing
     shape: Union[List[int], ImplicitOutputShape] = missing
-    halo: List[int] = missing
-    postprocessing: List[Postprocessing] = missing
-    description: Optional[str] = missing
-    data_range: Tuple[float, float] = missing
+    halo: Union[_Missing, List[int]] = missing
+    postprocessing: Union[_Missing, List[Postprocessing]] = missing
+    description: Union[_Missing, str] = missing
+    data_range: Union[_Missing, Tuple[float, float]] = missing
 
 
 @dataclass
 class WeightsEntry(Node):
-    authors: List[Author] = missing
-    attachments: Dict = missing
-    parent: Optional[str] = missing
+    authors: Union[_Missing, List[Author]] = missing
+    attachments: Union[_Missing, Dict] = missing
+    parent: Union[_Missing, str] = missing
     # ONNX specific
-    opset_version: Optional[int] = missing
+    opset_version: Union[_Missing, int] = missing
     # tag: Optional[str]  # todo: check schema. only valid for tensorflow_saved_model_bundle format
     # todo: check schema. only valid for tensorflow_saved_model_bundle format
-    sha256: str = missing
+    sha256: Union[_Missing, str] = missing
     source: URI = missing
-    tensorflow_version: Optional[distutils.version.StrictVersion] = missing
+    tensorflow_version: Union[_Missing, distutils.version.StrictVersion] = missing
 
 
 @dataclass
@@ -152,14 +153,14 @@ class ModelParent(Node):
 @dataclass
 class Model(RDF):
     inputs: List[InputTensor] = missing
-    kwargs: Dict[str, Any] = missing
+    kwargs: Union[_Missing, Dict[str, Any]] = missing
     outputs: List[OutputTensor] = missing
-    packaged_by: List[Author] = missing
-    parent: ModelParent = missing
-    sample_inputs: List[URI] = missing
-    sample_outputs: List[URI] = missing
-    sha256: str = missing
-    source: Optional[ImportableSource] = missing
+    packaged_by: Union[_Missing, List[Author]] = missing
+    parent: Union[_Missing, ModelParent] = missing
+    sample_inputs: Union[_Missing, List[URI]] = missing
+    sample_outputs: Union[_Missing, List[URI]] = missing
+    sha256: Union[_Missing, str] = missing
+    source: Union[_Missing, ImportableSource] = missing
     test_inputs: List[URI] = missing
     test_outputs: List[URI] = missing
     weights: Dict[WeightsFormat, WeightsEntry] = missing
diff --git a/bioimageio/spec/v0_3/schema.py b/bioimageio/spec/v0_3/schema.py
index a9f0c4bd0..cd865d11d 100644
--- a/bioimageio/spec/v0_3/schema.py
+++ b/bioimageio/spec/v0_3/schema.py
@@ -2,7 +2,7 @@
 import warnings
 
 import stdnum.iso7064.mod_11_2
-from marshmallow import Schema, ValidationError, post_load, validates, validates_schema
+from marshmallow import Schema, ValidationError, missing as missing_, post_load, validates, validates_schema
 from spdx_license_list import LICENSES
 
 from bioimageio.spec.shared import field_validators, fields
@@ -21,14 +21,13 @@ class PyBioSchema(SharedPyBioSchema):
 
 class Author(PyBioSchema):
     name = fields.String(required=True, bioimageio_description="Full name.")
-    affiliation = fields.String(missing=None, bioimageio_description="Affiliation.")
+    affiliation = fields.String(bioimageio_description="Affiliation.")
     orcid = fields.String(
         validate=[
             field_validators.Length(19),
             lambda oid: all(oid[idx] == "-" for idx in [4, 9, 14]),
             lambda oid: stdnum.iso7064.mod_11_2.is_valid(oid.replace("-", "")),
         ],
-        missing=None,
         bioimageio_description="[orcid](https://support.orcid.org/hc/en-us/sections/360001495313-What-is-ORCID) id "
         "in hyphenated groups of 4 digits, e.g. '0000-0001-2345-6789' (and [valid]("
         "https://support.orcid.org/hc/en-us/articles/360006897674-Structure-of-the-ORCID-Identifier"
@@ -38,12 +37,12 @@ class Author(PyBioSchema):
 
 class CiteEntry(PyBioSchema):
     text = fields.String(required=True)
-    doi = fields.String(missing=None)
-    url = fields.String(missing=None)
+    doi = fields.String(bioimageio_maybe_required=True)
+    url = fields.String(bioimageio_maybe_required=True)
 
     @validates_schema
     def doi_or_url(self, data, **kwargs):
-        if data["doi"] is None and data["url"] is None:
+        if data.get("doi") is None and data.get("url") is None:
             raise ValidationError("doi or url needs to be specified in a citation")
 
 
@@ -87,7 +86,6 @@ class RDF(PyBioSchema):
 
     git_repo = fields.String(
         validate=field_validators.URL(schemes=["http", "https"]),
-        missing=None,
         bioimageio_description="""A url to the git repository, e.g. to Github or Gitlab.
 If the model is contained in a subfolder of a git repository, then a url to the exact folder
 (which contains the configuration yaml file) should be used.""",
@@ -119,7 +117,6 @@ def warn_about_deprecated_spdx_license(self, value: str):
     )
     covers = fields.List(
         fields.URI,
-        missing=list,
         bioimageio_description="A list of cover images provided by either a relative path to the model folder, or a "
         "hyperlink starting with 'https'.Please use an image smaller than 500KB and an aspect ratio width to height "
         "of 2:1. The supported image formats are: 'jpg', 'png', 'gif'.",  # todo: field_validators image format
@@ -127,7 +124,6 @@ def warn_about_deprecated_spdx_license(self, value: str):
     attachments = fields.Dict(
         fields.String,
         fields.Union([fields.URI(), fields.List(fields.URI)]),
-        missing=dict,
         bioimageio_maybe_required=True,
         bioimageio_description="""Dictionary of text keys and URI (or a list of URI) values to additional, relevant
 files. E.g. we can place a list of URIs under the `files` to list images and other files that are necessary for the
@@ -136,31 +132,27 @@ def warn_about_deprecated_spdx_license(self, value: str):
 
     run_mode = fields.Nested(
         RunMode,
-        missing=None,
         bioimageio_description="Custom run mode for this model: for more complex prediction procedures like test time "
         "data augmentation that currently cannot be expressed in the specification. The different run modes should be "
         "listed in [supported_formats_and_operations.md#Run Modes]"
         "(https://github.com/bioimage-io/configuration/blob/master/supported_formats_and_operations.md#run-modes).",
     )
-    config = fields.Dict(missing=dict)
+    config = fields.Dict()
 
     language = fields.String(
         validate=field_validators.OneOf(get_args(raw_nodes.Language)),
-        missing=None,
         bioimageio_maybe_required=True,
         bioimageio_description=f"Programming language of the source code. One of: "
         f"{', '.join(get_args(raw_nodes.Language))}. This field is only required if the field `source` is present.",
     )
     framework = fields.String(
         validate=field_validators.OneOf(get_args(raw_nodes.Framework)),
-        missing=None,
         bioimageio_description=f"The deep learning framework of the source code. One of: "
         f"{', '.join(get_args(raw_nodes.Framework))}. This field is only required if the field `source` is present.",
     )
     dependencies = fields.Dependencies(
-        missing=None,
         bioimageio_description="Dependency manager and dependency file, specified as `<dependency manager>:<relative "
-        "path to file>`. For example: 'conda:./environment.yaml', 'maven:./pom.xml', or 'pip:./requirements.txt'",
+        "path to file>`. For example: 'conda:./environment.yaml', 'maven:./pom.xml', or 'pip:./requirements.txt'"
     )
     timestamp = fields.DateTime(
         required=True,
@@ -169,10 +161,9 @@ def warn_about_deprecated_spdx_license(self, value: str):
     )
     type = fields.String(validate=field_validators.OneOf(get_args(raw_nodes.Type)))
     version = fields.StrictVersion(
-        missing=None,
         bioimageio_description="The version number of the model. The version number format must be a string in "
         "`MAJOR.MINOR.PATCH` format following the guidelines in Semantic Versioning 2.0.0 (see https://semver.org/), "
-        "e.g. the initial version number should be `0.1.0`.",
+        "e.g. the initial version number should be `0.1.0`."
     )
 
 
@@ -185,7 +176,7 @@ class Tensor(PyBioSchema):
     name = fields.String(
         required=True, validate=field_validators.Predicate("isidentifier"), bioimageio_description="Tensor name."
     )
-    description = fields.String(missing=None)
+    description = fields.String()
     axes = fields.Axes(
         required=True,
         bioimageio_description="""Axes identifying characters from: bitczyx. Same length and order as the axes in `shape`.
@@ -209,7 +200,6 @@ class Tensor(PyBioSchema):
     )
     data_range = fields.Tuple(
         (fields.Float(allow_nan=True), fields.Float(allow_nan=True)),
-        missing=(None, None),
         bioimageio_description="Tuple `(minimum, maximum)` specifying the allowed range of the data in this tensor. "
         "If not specified, the full data range that can be expressed in `data_type` is allowed.",
     )
@@ -231,6 +221,7 @@ def validate_processing_kwargs(self, data, **kwargs):
 
 class Processing(PyBioSchema):
     class Binarize(Schema):  # do not inherit from PyBioSchema, return only a validated dict, no specific node
+        # todo: inherit from a "TransformSchema" that allows generation of docs for pre and postprocessing
         threshold = fields.Float(required=True)
 
     class Clip(PyBioSchema):
@@ -260,7 +251,7 @@ def kwargs_match_selected_preprocessing_name(self, data, **kwargs):
                 f"Schema {schema_name} for {data['name']} {self.__class__.__name__.lower()}"
             ) from missing_schema_error
 
-        kwargs_validation_errors = schema_class().validate(data["kwargs"])
+        kwargs_validation_errors = schema_class().validate(data.get("kwargs", {}))
         if kwargs_validation_errors:
             raise ValidationError(f"Invalid `kwargs` for '{data['name']}': {kwargs_validation_errors}")
 
@@ -270,17 +261,17 @@ class Sigmoid(PyBioSchema):
     class ZeroMeanUnitVariance(PyBioSchema):
         mode = fields.ProcMode(required=True)
         axes = fields.Axes(required=True, valid_axes="czyx")
-        mean = fields.Array(fields.Float(), missing=None)  # todo: check if means match input axes (for mode 'fixed')
-        std = fields.Array(fields.Float(), missing=None)
+        mean = fields.Array(fields.Float())  # todo: check if means match input axes (for mode 'fixed')
+        std = fields.Array(fields.Float())
         eps = fields.Float(missing=1e-6)
 
         @validates_schema
         def mean_and_std_match_mode(self, data, **kwargs):
-            if data["mode"] == "fixed" and (data["mean"] is None or data["std"] is None):
+            if data["mode"] == "fixed" and (data["mean"] is missing_ or data["std"] is missing_):
                 raise ValidationError(
                     "`kwargs` for 'zero_mean_unit_variance' preprocessing with `mode` 'fixed' require additional `kwargs`: `mean` and `std`."
                 )
-            elif data["mode"] != "fixed" and (data.get("mean") is not None or data.get("std") is not None):
+            elif data["mode"] != "fixed" and (data.get("mean") is not missing_ or data.get("std") is not missing_):
                 raise ValidationError(
                     "`kwargs`: `mean` and `std` for 'zero_mean_unit_variance' preprocessing are only valid for `mode` 'fixed'."
                 )
@@ -338,7 +329,6 @@ class InputTensor(Tensor):
     shape = fields.InputShape(required=True, bioimageio_description="Specification of tensor shape.")
     preprocessing = fields.List(
         fields.Nested(Preprocessing),
-        missing=list,
         bioimageio_description="Description of how this input should be preprocessed.",
     )
     processing_name = "preprocessing"
@@ -376,7 +366,6 @@ class OutputTensor(Tensor):
     shape = fields.OutputShape(required=True)
     halo = fields.List(
         fields.Integer,
-        missing=None,
         bioimageio_description="The halo to crop from the output tensor (for example to crop away boundary effects or "
         "for tiling). The halo should be cropped from both sides, i.e. `shape_after_crop = shape - 2 * halo`. The "
         "`halo` is not cropped by the bioimage.io model, but is left to be cropped by the consumer software. Use "
@@ -384,7 +373,6 @@ class OutputTensor(Tensor):
     )
     postprocessing = fields.List(
         fields.Nested(Postprocessing),
-        missing=list,
         bioimageio_description="Description of how this output should be postprocessed.",
     )
     processing_name = "postprocessing"
@@ -392,7 +380,7 @@ class OutputTensor(Tensor):
     @validates_schema
     def matching_halo_length(self, data, **kwargs):
         shape = data["shape"]
-        halo = data["halo"]
+        halo = data.get("halo")
         if halo is None:
             return
         elif isinstance(shape, list) or isinstance(shape, raw_nodes.ImplicitOutputShape):
@@ -401,14 +389,14 @@ def matching_halo_length(self, data, **kwargs):
         else:
             raise NotImplementedError(type(shape))
 
-    @post_load
-    def make_object(self, data, **kwargs):
-        shape = data["shape"]
-        halo = data["halo"]
-        if halo is None:
-            data["halo"] = [0] * len(shape)
-
-        return super().make_object(data, **kwargs)
+    # @post_load
+    # def make_object(self, data, **kwargs):
+    #     shape = data["shape"]
+    #     halo = data["halo"]
+    #     if halo is missing_:
+    #         data["halo"] = [0] * len(shape)
+    #
+    #     return super().make_object(data, **kwargs)
 
 
 _common_sha256_hint = (
@@ -436,32 +424,28 @@ def make_object(self, data, **kwargs):
 class WeightsEntry(PyBioSchema):
     authors = fields.List(
         fields.Nested(Author),
-        missing=list,
         bioimageio_description="A list of authors. If this is the root weight (it does not have a `parent` field): the "
         "person(s) that have trained this model. If this is a child weight (it has a `parent` field): the person(s) "
         "who have converted the weights to this format.",
     )  # todo: copy root authors if missing
     attachments = fields.Dict(
-        missing=dict,
         bioimageio_description="Dictionary of text keys and URI (or a list of URI) values to additional, relevant "
         "files that are specific to the current weight format. A list of URIs can be listed under the `files` key to "
         "included additional files for generating the model package.",
     )
     parent = fields.String(
-        missing=None,
         bioimageio_description="The source weights used as input for converting the weights to this format. For "
         "example, if the weights were converted from the format `pytorch_state_dict` to `pytorch_script`, the parent "
         "is `pytorch_state_dict`. All weight entries except one (the initial set of weights resulting from training "
         "the model), need to have this field.",
     )
-    opset_version = fields.Number(missing=None)  # ONNX Specific
+    opset_version = fields.Number()  # ONNX Specific
     sha256 = fields.String(
         validate=field_validators.Length(equal=64),
-        missing=None,
         bioimageio_description="SHA256 checksum of the source file specified. " + _common_sha256_hint,
     )
     source = fields.URI(required=True, bioimageio_description="Link to the source file. Preferably a url.")
-    tensorflow_version = fields.StrictVersion(missing=None)
+    tensorflow_version = fields.StrictVersion()
 
 
 class ModelParent(PyBioSchema):
@@ -491,21 +475,18 @@ class Model(RDF):
 
     packaged_by = fields.List(
         fields.Nested(Author),
-        missing=list,
         bioimageio_description=f"The persons that have packaged and uploaded this model. Only needs to be specified if "
         f"different from `authors` in root or any {WeightsEntry.__name__}.",
     )
 
     parent = fields.Nested(
         ModelParent,
-        missing=None,
         bioimageio_description="Parent model from which the trained weights of this model have been derived, e.g. by "
         "finetuning the weights of this model on a different dataset. For format changes of the same trained model "
         "checkpoint, see `weights`.",
     )
 
     source = fields.ImportableSource(
-        missing=None,
         bioimageio_maybe_required=True,
         bioimageio_description="Language and framework specific implementation. As some weights contain the model "
         "architecture, the source is optional depending on the present weight formats. `source` can either point to a "
@@ -515,7 +496,6 @@ class Model(RDF):
     )
     sha256 = fields.String(
         validate=field_validators.Length(equal=64),
-        missing=None,
         bioimageio_description="SHA256 checksum of the model source code file."
         + _common_sha256_hint
         + " This field is only required if the field source is present.",
@@ -557,18 +537,14 @@ class Model(RDF):
 
     sample_inputs = fields.List(
         fields.URI,
-        missing=[],
         bioimageio_description="List of URIs to sample inputs to illustrate possible inputs for the model, for example "
         "stored as png or tif images.",
     )
     sample_outputs = fields.List(
-        fields.URI,
-        missing=[],
-        bioimageio_description="List of URIs to sample outputs corresponding to the `sample_inputs`.",
+        fields.URI, bioimageio_description="List of URIs to sample outputs corresponding to the `sample_inputs`."
     )
 
     config = fields.Dict(
-        missing=dict,
         bioimageio_description="""
 A custom configuration field that can contain any other keys which are not defined above. It can be very specifc to a framework or specific tool. To avoid conflicted definitions, it is recommended to wrap configuration into a sub-field named with the specific framework or tool name.
 
@@ -590,34 +566,34 @@ class Model(RDF):
       runtime: 78.8s # Time it took to run the model
       pixel_size: [9.658E-4µmx9.658E-4µm] # Size of the pixels of the input
 ```
-""",
+"""
     )
 
     @validates_schema
     def language_and_framework_match(self, data, **kwargs):
         field_names = ("language", "framework")
         valid_combinations = [
-            ("python", "scikit-learn"),
+            ("python", "scikit-learn"),  # todo: remove
             ("python", "pytorch"),
             ("python", "tensorflow"),
             ("java", "tensorflow"),
         ]
-        if data["source"] is None:
-            valid_combinations.append((None, None))
-            valid_combinations.append(("python", None))  # todo: in py3.9 use typing.get_args(raw_nodes.Langauge)
-            valid_combinations.append(("java", None))
+        if "source" not in data:
+            valid_combinations.append((missing_, missing_))
+            valid_combinations.append(("python", missing_))
+            valid_combinations.append(("java", missing_))
 
-        combination = tuple(data[name] for name in field_names)
+        combination = tuple(data.get(name, missing_) for name in field_names)
         if combination not in valid_combinations:
             raise ValidationError(f"invalid combination of {dict(zip(field_names, combination))}")
 
     @validates_schema
     def source_specified_if_required(self, data, **kwargs):
-        if data["source"] is not None:
+        if "source" in data:
             return
 
         weight_format_requires_source = {
-            "pickle": True,
+            "pickle": True,  # todo: remove
             "pytorch_state_dict": True,
             "pytorch_script": False,
             "keras_hdf5": False,
@@ -635,9 +611,12 @@ def source_specified_if_required(self, data, **kwargs):
     def validate_reference_tensor_names(self, data, **kwargs):
         valid_input_tensor_references = [ipt.name for ipt in data["inputs"]]
         for out in data["outputs"]:
+            if out.postprocessing is missing_:
+                continue
+
             for postpr in out.postprocessing:
-                ref_tensor = postpr.kwargs.get("reference_tensor", None)
-                if ref_tensor is not None and ref_tensor not in valid_input_tensor_references:
+                ref_tensor = postpr.kwargs.get("reference_tensor", missing_)
+                if ref_tensor is not missing_ and ref_tensor not in valid_input_tensor_references:
                     raise ValidationError(f"{ref_tensor} not found in inputs")
 
     @validates_schema
@@ -645,19 +624,19 @@ def weights_entries_match_weights_formats(self, data, **kwargs):
         weights: typing.Dict[str, WeightsEntry] = data["weights"]
         for weights_format, weights_entry in weights.items():
             if weights_format in ["keras_hdf5", "tensorflow_js", "tensorflow_saved_model_bundle"]:
-                if weights_entry.tensorflow_version is None:
+                if weights_entry.tensorflow_version is missing_:
                     # todo: raise ValidationError (allow -> require)?
                     warnings.warn(f"missing 'tensorflow_version' entry for weights format {weights_format}")
             else:
-                if weights_entry.tensorflow_version is not None:
+                if weights_entry.tensorflow_version is not missing_:
                     raise ValidationError(f"invalid 'tensorflow_version' entry for weights format {weights_format}")
 
             if weights_format == "onnx":
-                if weights_entry.opset_version is None:
+                if weights_entry.opset_version is missing_:
                     # todo: raise ValidationError?
                     warnings.warn(f"missing 'opset_version' entry for weights format {weights_format}")
             else:
-                if weights_entry.opset_version is not None:
+                if weights_entry.opset_version is not missing_:
                     raise ValidationError(
                         f"invalid 'opset_version' entry for weights format {weights_format} (only valid for onnx)"
                     )
@@ -667,7 +646,7 @@ def weights_entries_match_weights_formats(self, data, **kwargs):
 class BioImageIoManifestModelEntry(PyBioSchema):
     id = fields.String(required=True)
     source = fields.String(validate=field_validators.URL(schemes=["http", "https"]))
-    links = fields.List(fields.String, missing=list)
+    links = fields.List(fields.String)
     download_url = fields.String(validate=field_validators.URL(schemes=["http", "https"]))
 
 
@@ -689,24 +668,24 @@ class BioImageIoManifestNotebookEntry(PyBioSchema):
     )
     description = fields.String(required=True)
 
-    cite = fields.List(fields.Nested(CiteEntry), missing=list)
+    cite = fields.List(fields.Nested(CiteEntry))
     authors = fields.List(fields.Nested(Author), required=True)
-    covers = fields.List(fields.URI, missing=list)
+    covers = fields.List(fields.URI)
 
-    badges = fields.List(fields.Nested(Badge), missing=list)
-    tags = fields.List(fields.String, missing=list)
+    badges = fields.List(fields.Nested(Badge))
+    tags = fields.List(fields.String)
     source = fields.URI(required=True)
-    links = fields.List(fields.String, missing=list)  # todo: make List[URI]?
+    links = fields.List(fields.String)  # todo: make List[URI]?
 
 
 class BioImageIoManifest(PyBioSchema):
     format_version = fields.String(
         validate=field_validators.OneOf(get_args(raw_nodes.ManifestFormatVersion)), required=True
     )
-    config = fields.Dict(missing=dict)
+    config = fields.Dict()
 
-    application = fields.List(fields.Dict, missing=list)
-    collection = fields.List(fields.Dict, missing=list)
-    model = fields.List(fields.Nested(BioImageIoManifestModelEntry), missing=list)
-    dataset = fields.List(fields.Dict, missing=list)
-    notebook = fields.List(fields.Nested(BioImageIoManifestNotebookEntry), missing=list)
+    application = fields.List(fields.Dict)
+    collection = fields.List(fields.Dict)
+    model = fields.List(fields.Nested(BioImageIoManifestModelEntry))
+    dataset = fields.List(fields.Dict)
+    notebook = fields.List(fields.Nested(BioImageIoManifestNotebookEntry))
diff --git a/specs/models/sklearn/RandomForestClassifier_v0_3_1.model.yaml b/specs/models/sklearn/RandomForestClassifier_v0_3_1.model.yaml
index 07ab8aa03..2e09b247b 100644
--- a/specs/models/sklearn/RandomForestClassifier_v0_3_1.model.yaml
+++ b/specs/models/sklearn/RandomForestClassifier_v0_3_1.model.yaml
@@ -15,7 +15,6 @@ license: MIT
 
 documentation: sklearnbased.md
 covers: []
-attachments: {}
 
 inputs:
   - name: raw
diff --git a/specs/models/sklearn/RandomForestClassifier_v0_3_2.model.yaml b/specs/models/sklearn/RandomForestClassifier_v0_3_2.model.yaml
index 164c58167..6caac0148 100644
--- a/specs/models/sklearn/RandomForestClassifier_v0_3_2.model.yaml
+++ b/specs/models/sklearn/RandomForestClassifier_v0_3_2.model.yaml
@@ -15,7 +15,6 @@ license: MIT
 
 documentation: sklearnbased.md
 covers: []
-attachments: {}
 
 inputs:
   - name: raw
@@ -74,4 +73,3 @@ weights:
     sha256: abcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefgh
 
 type: model
-version: null
diff --git a/tests/conftest.py b/tests/conftest.py
index bf1e721ef..8882e875a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -22,6 +22,11 @@ def rf_config_path_v0_3_2():
     return Path(__file__).parent / "../specs/models/sklearn/RandomForestClassifier_v0_3_2.model.yaml"
 
 
+@pytest.fixture
+def rf_config_path_v0_3(rf_config_path_v0_3_2):
+    return rf_config_path_v0_3_2
+
+
 @pytest.fixture
 def rf_config_path(rf_config_path_v0_3_2):
     return rf_config_path_v0_3_2
diff --git a/tests/test_dump_spec.py b/tests/test_dump_spec.py
index a59bd42b7..3fd999e7c 100644
--- a/tests/test_dump_spec.py
+++ b/tests/test_dump_spec.py
@@ -15,29 +15,9 @@ def test_spec_roundtrip(rf_config_path):
 
     # yaml.dump(serialized, Path() / "serialized.yml")
 
-    # manually remove all inserted defaults to test round trip at raw data level
-    serialized_wo_defaults = deepcopy(serialized)
-    serialized_wo_defaults["cite"][0].pop("doi")
-    serialized_wo_defaults.pop("config")
-    serialized_wo_defaults["inputs"][0].pop("preprocessing")
-    serialized_wo_defaults["outputs"][0].pop("halo")
-    serialized_wo_defaults["outputs"][0].pop("postprocessing")
-    serialized_wo_defaults.pop("packaged_by")
-    serialized_wo_defaults.pop("parent")
-    serialized_wo_defaults.pop("run_mode")
-    serialized_wo_defaults.pop("sample_inputs")
-    serialized_wo_defaults.pop("sample_outputs")
-    serialized_wo_defaults.pop("sha256")
-    serialized_wo_defaults["weights"]["pickle"].pop("attachments")
-    serialized_wo_defaults["weights"]["pickle"].pop("authors")
-    serialized_wo_defaults["weights"]["pickle"].pop("opset_version")
-    serialized_wo_defaults["weights"]["pickle"].pop("parent")
-    serialized_wo_defaults["weights"]["pickle"].pop("tensorflow_version")
-
-    assert serialized_wo_defaults == data
+    assert serialized == data
 
     assert not schema.Model().validate(serialized)
-    assert not schema.Model().validate(serialized_wo_defaults)
 
     raw_model_from_serialized, _ = load_raw_model(serialized)
     assert raw_model_from_serialized == raw_model
diff --git a/tests/test_format_version_conversion.py b/tests/test_format_version_conversion.py
index 543421434..51de25e8a 100644
--- a/tests/test_format_version_conversion.py
+++ b/tests/test_format_version_conversion.py
@@ -8,9 +8,9 @@
 yaml = YAML(typ="safe")
 
 
-def test_model_nodes_format_0_1_to_0_3(rf_config_path_v0_1, rf_config_path):
+def test_model_nodes_format_0_1_to_0_3(rf_config_path_v0_1, rf_config_path_v0_3):
     rf_model_data_v0_1 = yaml.load(rf_config_path_v0_1)
-    rf_model_data = yaml.load(rf_config_path)
+    rf_model_data = yaml.load(rf_config_path_v0_3)
 
     expected = asdict(schema.Model().load(rf_model_data))
     converted_data = maybe_convert_model(rf_model_data_v0_1)
@@ -24,9 +24,9 @@ def test_model_nodes_format_0_1_to_0_3(rf_config_path_v0_1, rf_config_path):
         out["description"] = out["name"]
 
     for key, item in expected.items():
-        assert key in actual
-        assert actual[key] == item
+        assert key in actual, key
+        assert actual[key] == item, key
 
     for key, item in actual.items():
         assert key in expected
-        assert expected[key] == item
+        assert item == expected[key]
diff --git a/tests/test_load_spec.py b/tests/test_load_spec.py
index b101164e9..9f9fe5966 100644
--- a/tests/test_load_spec.py
+++ b/tests/test_load_spec.py
@@ -12,7 +12,7 @@ def test_load_non_existing_spec():
 
 
 def test_load_non_valid_spec_name():
-    spec_path = "some/none/existing/path/to/spec.not_valid.yaml"
+    spec_path = "some/none/existing/path/to/spec.not_valid_suffix"
 
     with pytest.raises(ValidationError):
         load_model(spec_path)
diff --git a/tests/test_schema.py b/tests/test_schema.py
index 6486ea3ba..5782ad780 100644
--- a/tests/test_schema.py
+++ b/tests/test_schema.py
@@ -1,6 +1,8 @@
 import pytest
 from datetime import datetime
 
+from marshmallow import missing
+
 from bioimageio.spec import nodes, schema
 
 
@@ -41,22 +43,20 @@ def test_tensor_schema_preprocessing():
             "data_type": "float32",
             "axes": "xyc",
             "shape": [128, 128, 3],
-            "preprocessing": [],
         },
         {"name": "input_1", "description": "Input 1", "data_type": "float32", "axes": "xyc", "shape": [128, 128, 3]},
     ],
 )
 def test_tensor_schema_no_preprocessing(data):
     validated_data = schema.InputTensor().load(data)
-    assert isinstance(validated_data.preprocessing, list)
-    assert len(validated_data.preprocessing) == 0
+    assert validated_data.preprocessing is missing
 
 
 @pytest.mark.parametrize("schema_instance", [schema.InputTensor(), schema.OutputTensor()])
 def test_tensor_schema_optional_description(schema_instance):
     data = {"name": "input_1", "data_type": "float32", "axes": "xyc", "shape": [128, 128, 3]}
     validated_data = schema_instance.load(data)
-    assert validated_data.description is None
+    assert validated_data.description is missing
 
 
 @pytest.fixture

From d0e6b53b7bf5f200e95d47ed312c118a309e4ce8 Mon Sep 17 00:00:00 2001
From: fynnbe <thefynnbe@gmail.com>
Date: Thu, 17 Jun 2021 11:58:25 +0200
Subject: [PATCH 07/14] rename PyBioSchema -> BioImageIOSchema

---
 bioimageio/spec/shared/schema.py |  6 ++---
 bioimageio/spec/v0_3/schema.py   | 44 ++++++++++++++++----------------
 scripts/generate_docs.py         |  6 ++---
 3 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/bioimageio/spec/shared/schema.py b/bioimageio/spec/shared/schema.py
index 828b2febb..70a1d3a3b 100644
--- a/bioimageio/spec/shared/schema.py
+++ b/bioimageio/spec/shared/schema.py
@@ -6,7 +6,7 @@
 from . import raw_nodes
 
 
-class SharedPyBioSchema(Schema):
+class SharedBioImageIOSchema(Schema):
     raw_nodes: ModuleType = raw_nodes  # should be overwritten in subclass by version specific raw nodes module
     bioimageio_description: str = ""
 
@@ -31,7 +31,7 @@ def make_object(self, data, **kwargs):
             raise e
 
 
-class ImplicitInputShape(SharedPyBioSchema):
+class ImplicitInputShape(SharedBioImageIOSchema):
     min = fields.List(
         fields.Integer, required=True, bioimageio_description="The minimum input shape with same length as `axes`"
     )
@@ -50,7 +50,7 @@ def matching_lengths(self, data, **kwargs):
             raise ValidationError(f"'min' and 'step' have to have the same length! (min: {min_}, step: {step})")
 
 
-class ImplicitOutputShape(SharedPyBioSchema):
+class ImplicitOutputShape(SharedBioImageIOSchema):
     reference_input = fields.String(required=True, bioimageio_description="Name of the reference input tensor.")
     scale = fields.List(
         fields.Float, required=True, bioimageio_description="'output_pix/input_pix' for each dimension."
diff --git a/bioimageio/spec/v0_3/schema.py b/bioimageio/spec/v0_3/schema.py
index cd865d11d..34c47d2d1 100644
--- a/bioimageio/spec/v0_3/schema.py
+++ b/bioimageio/spec/v0_3/schema.py
@@ -6,7 +6,7 @@
 from spdx_license_list import LICENSES
 
 from bioimageio.spec.shared import field_validators, fields
-from bioimageio.spec.shared.schema import SharedPyBioSchema
+from bioimageio.spec.shared.schema import SharedBioImageIOSchema
 from . import raw_nodes
 
 try:
@@ -15,11 +15,11 @@
     from typing_extensions import get_args
 
 
-class PyBioSchema(SharedPyBioSchema):
+class BioImageIOSchema(SharedBioImageIOSchema):
     raw_nodes = raw_nodes
 
 
-class Author(PyBioSchema):
+class Author(BioImageIOSchema):
     name = fields.String(required=True, bioimageio_description="Full name.")
     affiliation = fields.String(bioimageio_description="Affiliation.")
     orcid = fields.String(
@@ -35,7 +35,7 @@ class Author(PyBioSchema):
     )
 
 
-class CiteEntry(PyBioSchema):
+class CiteEntry(BioImageIOSchema):
     text = fields.String(required=True)
     doi = fields.String(bioimageio_maybe_required=True)
     url = fields.String(bioimageio_maybe_required=True)
@@ -46,14 +46,14 @@ def doi_or_url(self, data, **kwargs):
             raise ValidationError("doi or url needs to be specified in a citation")
 
 
-class RunMode(PyBioSchema):
+class RunMode(BioImageIOSchema):
     name = fields.String(
         required=True, bioimageio_description="The name of the `run_mode`"
     )  # todo: limit valid run mode names
     kwargs = fields.Kwargs()
 
 
-class RDF(PyBioSchema):
+class RDF(BioImageIOSchema):
     """not the reference for RDF; todo: match definition of rdf json schema; move other fields to Model"""
 
     format_version = fields.String(
@@ -167,12 +167,12 @@ def warn_about_deprecated_spdx_license(self, value: str):
     )
 
 
-class SpecWithKwargs(PyBioSchema):
+class SpecWithKwargs(BioImageIOSchema):
     spec: fields.SpecURI
     kwargs = fields.Kwargs()
 
 
-class Tensor(PyBioSchema):
+class Tensor(BioImageIOSchema):
     name = fields.String(
         required=True, validate=field_validators.Predicate("isidentifier"), bioimageio_description="Tensor name."
     )
@@ -219,16 +219,16 @@ def validate_processing_kwargs(self, data, **kwargs):
                 raise ValidationError("`kwargs.axes` needs to be subset of axes")
 
 
-class Processing(PyBioSchema):
-    class Binarize(Schema):  # do not inherit from PyBioSchema, return only a validated dict, no specific node
+class Processing(BioImageIOSchema):
+    class Binarize(Schema):  # do not inherit from BioImageIOSchema, return only a validated dict, no specific node
         # todo: inherit from a "TransformSchema" that allows generation of docs for pre and postprocessing
         threshold = fields.Float(required=True)
 
-    class Clip(PyBioSchema):
+    class Clip(BioImageIOSchema):
         min = fields.Float(required=True)
         max = fields.Float(required=True)
 
-    class ScaleLinear(PyBioSchema):
+    class ScaleLinear(BioImageIOSchema):
         axes = fields.Axes(required=True, valid_axes="czyx")
         gain = fields.Array(fields.Float(), missing=fields.Float(missing=1.0))  # todo: check if gain match input axes
         offset = fields.Array(
@@ -255,10 +255,10 @@ def kwargs_match_selected_preprocessing_name(self, data, **kwargs):
         if kwargs_validation_errors:
             raise ValidationError(f"Invalid `kwargs` for '{data['name']}': {kwargs_validation_errors}")
 
-    class Sigmoid(PyBioSchema):
+    class Sigmoid(BioImageIOSchema):
         pass
 
-    class ZeroMeanUnitVariance(PyBioSchema):
+    class ZeroMeanUnitVariance(BioImageIOSchema):
         mode = fields.ProcMode(required=True)
         axes = fields.Axes(required=True, valid_axes="czyx")
         mean = fields.Array(fields.Float())  # todo: check if means match input axes (for mode 'fixed')
@@ -288,7 +288,7 @@ class Preprocessing(Processing):
     )
     kwargs = fields.Kwargs()
 
-    class ScaleRange(PyBioSchema):
+    class ScaleRange(BioImageIOSchema):
         mode = fields.ProcMode(required=True, valid_modes=("per_dataset", "per_sample"))
         axes = fields.Axes(required=True, valid_axes="czyx")
         min_percentile = fields.Float(
@@ -320,7 +320,7 @@ class Postprocessing(Processing):
     class ScaleRange(Preprocessing.ScaleRange):
         reference_tensor: fields.String(required=True, validate=field_validators.Predicate("isidentifier"))
 
-    class ScaleMeanVariance(PyBioSchema):
+    class ScaleMeanVariance(BioImageIOSchema):
         mode = fields.ProcMode(required=True, valid_modes=("per_dataset", "per_sample"))
         reference_tensor: fields.String(required=True, validate=field_validators.Predicate("isidentifier"))
 
@@ -421,7 +421,7 @@ def matching_halo_length(self, data, **kwargs):
 )
 
 
-class WeightsEntry(PyBioSchema):
+class WeightsEntry(BioImageIOSchema):
     authors = fields.List(
         fields.Nested(Author),
         bioimageio_description="A list of authors. If this is the root weight (it does not have a `parent` field): the "
@@ -448,7 +448,7 @@ class WeightsEntry(PyBioSchema):
     tensorflow_version = fields.StrictVersion()
 
 
-class ModelParent(PyBioSchema):
+class ModelParent(BioImageIOSchema):
     uri = fields.URI(
         bioimageio_description="Url of another model available on bioimage.io or path to a local model in the "
         "bioimage.io specification. If it is a url, it needs to be a github url linking to the page containing the "
@@ -643,20 +643,20 @@ def weights_entries_match_weights_formats(self, data, **kwargs):
 
 
 # Manifest
-class BioImageIoManifestModelEntry(PyBioSchema):
+class BioImageIoManifestModelEntry(BioImageIOSchema):
     id = fields.String(required=True)
     source = fields.String(validate=field_validators.URL(schemes=["http", "https"]))
     links = fields.List(fields.String)
     download_url = fields.String(validate=field_validators.URL(schemes=["http", "https"]))
 
 
-class Badge(PyBioSchema):
+class Badge(BioImageIOSchema):
     label = fields.String(required=True)
     icon = fields.URI()
     url = fields.URI()
 
 
-class BioImageIoManifestNotebookEntry(PyBioSchema):
+class BioImageIoManifestNotebookEntry(BioImageIOSchema):
     id = fields.String(required=True)
     name = fields.String(required=True)
     documentation = fields.RelativeLocalPath(
@@ -678,7 +678,7 @@ class BioImageIoManifestNotebookEntry(PyBioSchema):
     links = fields.List(fields.String)  # todo: make List[URI]?
 
 
-class BioImageIoManifest(PyBioSchema):
+class BioImageIoManifest(BioImageIOSchema):
     format_version = fields.String(
         validate=field_validators.OneOf(get_args(raw_nodes.ManifestFormatVersion)), required=True
     )
diff --git a/scripts/generate_docs.py b/scripts/generate_docs.py
index 04b334dca..69f0b02ba 100644
--- a/scripts/generate_docs.py
+++ b/scripts/generate_docs.py
@@ -41,11 +41,11 @@ def doc_from_schema(obj) -> typing.Union[typing.Dict[str, DocNode], DocNode]:
     details = []
     sub_docs = []
     required = True
-    if inspect.isclass(obj) and issubclass(obj, schema.SharedPyBioSchema):
+    if inspect.isclass(obj) and issubclass(obj, schema.SharedBioImageIOSchema):
 
         obj = obj()
 
-    if isinstance(obj, schema.SharedPyBioSchema):
+    if isinstance(obj, schema.SharedBioImageIOSchema):
 
         def sort_key(name_and_nested_field):
             name, nested_field = name_and_nested_field
@@ -115,7 +115,7 @@ def markdown_from_doc(doc: DocNode, indent: int = 0):
     return f"{type_name}{doc.description}\n{sub_doc}"
 
 
-def markdown_from_schema(schema: schema.SharedPyBioSchema) -> str:
+def markdown_from_schema(schema: schema.SharedBioImageIOSchema) -> str:
     doc = doc_from_schema(schema)
     return markdown_from_doc(doc)
 

From ce7b743922150ab1bd5518ab7b9b22ad62a8b9de Mon Sep 17 00:00:00 2001
From: fynnbe <thefynnbe@gmail.com>
Date: Thu, 17 Jun 2021 12:31:03 +0200
Subject: [PATCH 08/14] actually make type field mandatory

---
 bioimageio/spec/v0_3/schema.py | 2 +-
 tests/test_schema.py           | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/bioimageio/spec/v0_3/schema.py b/bioimageio/spec/v0_3/schema.py
index 34c47d2d1..86f1adee8 100644
--- a/bioimageio/spec/v0_3/schema.py
+++ b/bioimageio/spec/v0_3/schema.py
@@ -159,7 +159,7 @@ def warn_about_deprecated_spdx_license(self, value: str):
         bioimageio_description="Timestamp of the initial creation of this model in [ISO 8601]"
         "(#https://en.wikipedia.org/wiki/ISO_8601) format.",
     )
-    type = fields.String(validate=field_validators.OneOf(get_args(raw_nodes.Type)))
+    type = fields.String(required=True, validate=field_validators.OneOf(get_args(raw_nodes.Type)))
     version = fields.StrictVersion(
         bioimageio_description="The version number of the model. The version number format must be a string in "
         "`MAJOR.MINOR.PATCH` format following the guidelines in Semantic Versioning 2.0.0 (see https://semver.org/), "
diff --git a/tests/test_schema.py b/tests/test_schema.py
index 5782ad780..eb83640f1 100644
--- a/tests/test_schema.py
+++ b/tests/test_schema.py
@@ -96,6 +96,7 @@ def model_dict():
         "weights": {},
         "test_inputs": [],
         "test_outputs": [],
+        "type": "model",
     }
 
 

From f62324735c2767782f68c38edc4e8774ac300833 Mon Sep 17 00:00:00 2001
From: fynnbe <thefynnbe@gmail.com>
Date: Thu, 17 Jun 2021 12:31:21 +0200
Subject: [PATCH 09/14] set a default for raw model to avoid
 raw_nodes.Model(... type="model")

---
 bioimageio/spec/v0_3/raw_nodes.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bioimageio/spec/v0_3/raw_nodes.py b/bioimageio/spec/v0_3/raw_nodes.py
index 5e92fc59b..827480485 100644
--- a/bioimageio/spec/v0_3/raw_nodes.py
+++ b/bioimageio/spec/v0_3/raw_nodes.py
@@ -163,6 +163,7 @@ class Model(RDF):
     source: Union[_Missing, ImportableSource] = missing
     test_inputs: List[URI] = missing
     test_outputs: List[URI] = missing
+    type: Type = "model"
     weights: Dict[WeightsFormat, WeightsEntry] = missing
 
 

From 2f33382319224edd8eb3e9b84342e9e1f7c49848 Mon Sep 17 00:00:00 2001
From: fynnbe <thefynnbe@gmail.com>
Date: Thu, 17 Jun 2021 12:32:27 +0200
Subject: [PATCH 10/14] black

---
 bioimageio/spec/v0_3/schema.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/bioimageio/spec/v0_3/schema.py b/bioimageio/spec/v0_3/schema.py
index 86f1adee8..906a61783 100644
--- a/bioimageio/spec/v0_3/schema.py
+++ b/bioimageio/spec/v0_3/schema.py
@@ -328,8 +328,7 @@ class ScaleMeanVariance(BioImageIOSchema):
 class InputTensor(Tensor):
     shape = fields.InputShape(required=True, bioimageio_description="Specification of tensor shape.")
     preprocessing = fields.List(
-        fields.Nested(Preprocessing),
-        bioimageio_description="Description of how this input should be preprocessed.",
+        fields.Nested(Preprocessing), bioimageio_description="Description of how this input should be preprocessed."
     )
     processing_name = "preprocessing"
 
@@ -372,8 +371,7 @@ class OutputTensor(Tensor):
         "`shape:offset` if the model output itself is cropped and input and output shapes not fixed.",
     )
     postprocessing = fields.List(
-        fields.Nested(Postprocessing),
-        bioimageio_description="Description of how this output should be postprocessed.",
+        fields.Nested(Postprocessing), bioimageio_description="Description of how this output should be postprocessed."
     )
     processing_name = "postprocessing"
 
@@ -431,13 +429,13 @@ class WeightsEntry(BioImageIOSchema):
     attachments = fields.Dict(
         bioimageio_description="Dictionary of text keys and URI (or a list of URI) values to additional, relevant "
         "files that are specific to the current weight format. A list of URIs can be listed under the `files` key to "
-        "included additional files for generating the model package.",
+        "included additional files for generating the model package."
     )
     parent = fields.String(
         bioimageio_description="The source weights used as input for converting the weights to this format. For "
         "example, if the weights were converted from the format `pytorch_state_dict` to `pytorch_script`, the parent "
         "is `pytorch_state_dict`. All weight entries except one (the initial set of weights resulting from training "
-        "the model), need to have this field.",
+        "the model), need to have this field."
     )
     opset_version = fields.Number()  # ONNX Specific
     sha256 = fields.String(

From 00cee49fd885d45c7803e6fb0a3ce3316970f7d1 Mon Sep 17 00:00:00 2001
From: fynnbe <thefynnbe@gmail.com>
Date: Thu, 17 Jun 2021 12:33:39 +0200
Subject: [PATCH 11/14] raise TypeError on missing required Node inputs

---
 bioimageio/spec/shared/raw_nodes.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/bioimageio/spec/shared/raw_nodes.py b/bioimageio/spec/shared/raw_nodes.py
index cb5006aa8..6b7e196ff 100644
--- a/bioimageio/spec/shared/raw_nodes.py
+++ b/bioimageio/spec/shared/raw_nodes.py
@@ -1,15 +1,25 @@
 """shared raw nodes that shared transformer act on"""
-
+import dataclasses
 from dataclasses import dataclass
 from pathlib import Path
-from typing import List
+from typing import List, Union
+
+try:
+    from typing import get_args, get_origin
+except ImportError:
+    from typing_extensions import get_args, get_origin
 
 from marshmallow import missing
 
 
 @dataclass
 class Node:
-    pass
+    def __post_init__(self):
+        for f in dataclasses.fields(self):
+            if getattr(self, f.name) is missing and (
+                get_origin(f.type) is not Union or not isinstance(missing, get_args(f.type))
+            ):
+                raise TypeError(f"{self.__class__}.__init__() missing required argument: '{f.name}'")
 
 
 @dataclass

From 5b40a3167050b1e0435a832a36be4d8a3a1e1947 Mon Sep 17 00:00:00 2001
From: fynnbe <thefynnbe@gmail.com>
Date: Thu, 17 Jun 2021 14:08:03 +0200
Subject: [PATCH 12/14] improve build_spec

build all raw_node objects
---
 bioimageio/spec/latest/build_spec.py | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/bioimageio/spec/latest/build_spec.py b/bioimageio/spec/latest/build_spec.py
index 08d08c327..21d599000 100644
--- a/bioimageio/spec/latest/build_spec.py
+++ b/bioimageio/spec/latest/build_spec.py
@@ -213,13 +213,18 @@ def _get_output_tensor(test_out, name, reference_input, scale, offset, axes, dat
     return outputs
 
 
+def _build_authors(authors: List[Dict[str, str]]):
+    return [spec.raw_nodes.Author(**a) for a in authors]
+
+
 # TODO The citation entry should be improved so that we can properly derive doi vs. url
-def _build_cite(cite):
+def _build_cite(cite: Dict[str, str]):
     citation_list = [spec.raw_nodes.CiteEntry(text=k, url=v) for k, v in cite.items()]
     return citation_list
 
 
 # TODO we should make the name more specific: "build_model_spec"?
+# TODO maybe "build_raw_model" as it return raw_nodes.Model
 # NOTE does not support multiple input / output tensors yet
 # to implement this we should wait for 0.4.0, see also
 # https://github.com/bioimage-io/spec-bioimage-io/issues/70#issuecomment-825737433
@@ -385,8 +390,13 @@ def build_spec(
     }
     kwargs = {k: v for k, v in optional_kwargs.items() if v is not None}
 
-    # build the citation object
+    # build raw_nodes objects
+    authors = _build_authors(authors)
     cite = _build_cite(cite)
+    documentation = spec.fields.URI().deserialize(documentation)
+    covers = [spec.fields.URI().deserialize(uri) for uri in covers]
+    test_inputs = [spec.fields.URI().deserialize(uri) for uri in test_inputs]
+    test_outputs = [spec.fields.URI().deserialize(uri) for uri in test_outputs]
 
     model = spec.raw_nodes.Model(
         format_version=format_version,
@@ -415,15 +425,8 @@ def build_spec(
     return model
 
 
-def add_weights(
-    model,
-    weight_uri: str,
-    root: Optional[str] = None,
-    weight_type: Optional[str] = None,
-    **weight_kwargs
-):
-    """ Add weight entry to bioimage.io model.
-    """
+def add_weights(model, weight_uri: str, root: Optional[str] = None, weight_type: Optional[str] = None, **weight_kwargs):
+    """Add weight entry to bioimage.io model."""
     new_weights = _get_weights(weight_uri, weight_type, None, root, **weight_kwargs)[0]
     model.weights.update(new_weights)
 
@@ -439,5 +442,5 @@ def serialize_spec(model, out_path, clear_defaults=True):
     if clear_defaults:
         defaults = ([], {}, None)
         cleared = remap(serialized, visit=lambda p, k, v: v not in defaults)
-    with open(out_path, 'w') as f:
+    with open(out_path, "w") as f:
         spec.utils.yaml.dump(cleared, f)

From e88579e59f51530d3233e7666fad79be07953f7f Mon Sep 17 00:00:00 2001
From: fynnbe <thefynnbe@gmail.com>
Date: Thu, 17 Jun 2021 14:14:18 +0200
Subject: [PATCH 13/14] remove clearing of default values in serialization

---
 bioimageio/spec/latest/build_spec.py | 14 +++++---------
 setup.py                             |  1 -
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/bioimageio/spec/latest/build_spec.py b/bioimageio/spec/latest/build_spec.py
index 21d599000..9fe82ccb3 100644
--- a/bioimageio/spec/latest/build_spec.py
+++ b/bioimageio/spec/latest/build_spec.py
@@ -1,11 +1,12 @@
-import os
 import datetime
 import hashlib
+import os
 from typing import Any, Dict, List, Optional, Union
 
 import numpy as np
+
 import bioimageio.spec as spec
-from boltons.iterutils import remap
+
 
 #
 # utility functions to build the spec from python
@@ -436,11 +437,6 @@ def add_weights(model, weight_uri: str, root: Optional[str] = None, weight_type:
     return model
 
 
-def serialize_spec(model, out_path, clear_defaults=True):
+def serialize_spec(model, out_path):  # TODO change name to include model (see build_model_spec)
     serialized = spec.schema.Model().dump(model)
-    # clear the default values using boltons remap
-    if clear_defaults:
-        defaults = ([], {}, None)
-        cleared = remap(serialized, visit=lambda p, k, v: v not in defaults)
-    with open(out_path, "w") as f:
-        spec.utils.yaml.dump(cleared, f)
+    spec.utils.yaml.dump(serialized, out_path)
diff --git a/setup.py b/setup.py
index a7557317a..06fd045d8 100644
--- a/setup.py
+++ b/setup.py
@@ -22,7 +22,6 @@
     packages=find_namespace_packages(exclude=["tests"]),  # Required
     install_requires=[
         "PyYAML>=5.2",
-        "boltons",
         "imageio>=2.5",
         "marshmallow>=3.6.0,<4.0",
         "marshmallow_jsonschema",

From 0316007f3141744ccea644da1aa26a5a4d2454d0 Mon Sep 17 00:00:00 2001
From: fynnbe <thefynnbe@gmail.com>
Date: Thu, 17 Jun 2021 16:16:22 +0200
Subject: [PATCH 14/14] fix documentation field in build_spec

---
 bioimageio/spec/latest/build_spec.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bioimageio/spec/latest/build_spec.py b/bioimageio/spec/latest/build_spec.py
index 9fe82ccb3..68a8ef687 100644
--- a/bioimageio/spec/latest/build_spec.py
+++ b/bioimageio/spec/latest/build_spec.py
@@ -1,6 +1,7 @@
 import datetime
 import hashlib
 import os
+from pathlib import Path
 from typing import Any, Dict, List, Optional, Union
 
 import numpy as np
@@ -394,7 +395,7 @@ def build_spec(
     # build raw_nodes objects
     authors = _build_authors(authors)
     cite = _build_cite(cite)
-    documentation = spec.fields.URI().deserialize(documentation)
+    documentation = Path(documentation)
     covers = [spec.fields.URI().deserialize(uri) for uri in covers]
     test_inputs = [spec.fields.URI().deserialize(uri) for uri in test_inputs]
     test_outputs = [spec.fields.URI().deserialize(uri) for uri in test_outputs]