diff --git a/README.md b/README.md index 84c41a4c2..40f766cd0 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ This repository contains specifications defined by the BioImage.IO community. Th All the BioImage.IO-compatible RDF must fulfill the following rules: - * Must be a YAML file encoded as UTF-8; + * Must be a YAML file encoded as UTF-8; If yaml syntax version is not specified to be 1.1 in the first line by `% YAML 1.1` it must be equivalent in yaml 1.1 and yaml 1.2. For differences see https://yaml.readthedocs.io/en/latest/pyyaml.html#differences-with-pyyaml. * The RDF file extension must be `.yaml` (not `.yml`) * The RDF file can be saved in a folder (or virtual folder) or in a zip package, the following additional rules must apply: 1. When stored in a local file system folder, github repo, zenodo deposition, blob storage virtual folder or similar kind, the RDF file name should match the pattern of `*.rdf.yaml`, for example `my-model.rdf.yaml`. diff --git a/bioimageio/spec/shared/common.py b/bioimageio/spec/shared/common.py index 2b410b0a8..bdf94e253 100644 --- a/bioimageio/spec/shared/common.py +++ b/bioimageio/spec/shared/common.py @@ -1,18 +1,77 @@ import dataclasses import os import pathlib +import re import tempfile import warnings from collections import UserDict from typing import Any, Dict, Generic, Optional -from ruamel.yaml import YAML try: from typing import Literal, get_args, get_origin, Protocol except ImportError: from typing_extensions import Literal, get_args, get_origin, Protocol # type: ignore +import yaml as _yaml + + +class PyYAML: + """ruamel.yaml.YAML replacement. This uses PyYAML's yaml 1.1 implementation with some manually added yaml 1.2 'fixes'""" + + def __init__(self, typ="safe"): + if typ != "safe": + raise NotImplementedError(typ) + + # floating point 'fix' for yaml 1.1 from https://stackoverflow.com/a/30462009 + self.loader = _yaml.SafeLoader + self.loader.add_implicit_resolver( + "tag:yaml.org,2002:float", + re.compile( + """^(?: + [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)? + |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+) + |\\.[0-9_]+(?:[eE][-+][0-9]+)? + |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]* + |[-+]?\\.(?:inf|Inf|INF) + |\\.(?:nan|NaN|NAN))$""", + re.X, + ), + list("-+0123456789."), + ) + + def load(self, stream): + if isinstance(stream, os.PathLike): + with pathlib.Path(stream).open() as f: + return _yaml.load(f, Loader=self.loader) + else: + return _yaml.load(stream, Loader=self.loader) + + @staticmethod + def dump(data, stream): + if isinstance(stream, os.PathLike): + with pathlib.Path(stream).open("w") as f: + return _yaml.dump(data, f) + else: + return _yaml.dump(data, stream) + + +pyyaml_yaml = PyYAML() + +try: + from ruamel.yaml import YAML +except ImportError: + ruamel_yaml: Optional[YAML] = None + yaml = pyyaml_yaml +else: + ruamel_yaml = YAML(typ="safe") + yaml = ruamel_yaml + + +BIOIMAGEIO_CACHE_PATH = pathlib.Path( + os.getenv("BIOIMAGEIO_CACHE_PATH", pathlib.Path(tempfile.gettempdir()) / "bioimageio_cache") +) + def get_format_version_module(type_: str, format_version: str): assert "." in format_version @@ -66,13 +125,6 @@ def get_args_flat(tp): return tuple(flat_args) -yaml = YAML(typ="safe") - -BIOIMAGEIO_CACHE_PATH = pathlib.Path( - os.getenv("BIOIMAGEIO_CACHE_PATH", pathlib.Path(tempfile.gettempdir()) / "bioimageio_cache") -) - - class Singleton(type): _instances: dict = {} diff --git a/scripts/compare_yaml_syntax.py b/scripts/compare_yaml_syntax.py new file mode 100644 index 000000000..14a7f0c80 --- /dev/null +++ b/scripts/compare_yaml_syntax.py @@ -0,0 +1,44 @@ +import json +import sys +from argparse import ArgumentParser +from pathlib import Path + +from marshmallow_jsonschema import JSONSchema + +import bioimageio.spec + +from bioimageio.spec.shared.common import ruamel_yaml, pyyaml_yaml + +if ruamel_yaml is None: + raise RuntimeError("Cannot compare yaml syntax without the ruamel.yaml package") + + +def parse_args(): + p = ArgumentParser( + description="Check for differences between yaml 1.1 (using PyYAML) and yaml 1.2 syntax (using ruamel.yaml)." + ) + p.add_argument("resource_description_path", type=Path) + args = p.parse_args() + return args + + +def main(resource_description_path: Path): + + pyyaml = pyyaml_yaml.load(resource_description_path) + assert isinstance(pyyaml, dict) + ruamel = ruamel_yaml.load(resource_description_path) + assert isinstance(ruamel, dict) + + diff = {key: (value, ruamel[key]) for key, value in pyyaml.items() if value != ruamel[key]} + if diff: + print(f"Found differences between yaml syntax 1.1/1.2 for {resource_description_path}:") + print(diff) + else: + print(f"No differences found between yaml syntax 1.1/1.2 for {resource_description_path}:") + + return len(diff) + + +if __name__ == "__main__": + args = parse_args() + sys.exit(main(args.resource_description_path)) diff --git a/setup.py b/setup.py index 8e6821ef6..8b5a1accf 100644 --- a/setup.py +++ b/setup.py @@ -32,12 +32,11 @@ "marshmallow-jsonschema", "marshmallow-union", "requests", - "ruamel.yaml", "typer", "typing-extensions", ], entry_points={"console_scripts": ["bioimageio = bioimageio.spec.__main__:app"]}, - extras_require={"test": ["pytest", "tox", "torch", "numpy", "mypy"], "dev": ["pre-commit"]}, + extras_require={"test": ["pytest", "tox", "torch", "numpy", "mypy", "ruamel.yaml"], "dev": ["pre-commit"]}, scripts=["scripts/generate_docs.py"], include_package_data=True, project_urls={ # Optional diff --git a/tests/conftest.py b/tests/conftest.py index 6d4b22a8a..9cb260088 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,7 +3,6 @@ from pathlib import Path import pytest -from ruamel.yaml import YAML from bioimageio import spec from bioimageio.spec import export_resource_package @@ -14,8 +13,6 @@ except ImportError: from typing_extensions import get_args # type: ignore -yaml = YAML(typ="safe") - def get_unet2d_nuclei_broad_path(version: str): assert isinstance(version, str), version diff --git a/tests/test_format_version_conversion.py b/tests/test_format_version_conversion.py index 49521d5a8..41e63a144 100644 --- a/tests/test_format_version_conversion.py +++ b/tests/test_format_version_conversion.py @@ -1,10 +1,7 @@ from dataclasses import asdict -from ruamel.yaml import YAML - from bioimageio.spec.model import schema - -yaml = YAML(typ="safe") +from bioimageio.spec.shared import yaml def test_model_format_version_conversion(unet2d_nuclei_broad_v0_1_0_path, unet2d_nuclei_broad_latest_path): diff --git a/tests/test_utils.py b/tests/test_utils.py index 2b1126206..3c24dcda8 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,12 +3,9 @@ from typing import Any import pytest -from ruamel.yaml import YAML from bioimageio.spec.shared import nodes, raw_nodes, utils -yaml = YAML(typ="safe") - @dataclass class MyNode(nodes.Node): diff --git a/tests/test_yaml.py b/tests/test_yaml.py new file mode 100644 index 000000000..f91c15896 --- /dev/null +++ b/tests/test_yaml.py @@ -0,0 +1,53 @@ +""" +Tests ruamel.yaml replacement. + +Why? +PyYAML defaults to YAML 1.1 and does not yet support YAML 1.2 (which is the default for ruamel.yaml atm). +For yaml 1.2 in PyYAML see https://github.com/yaml/pyyaml/issues/116 and https://github.com/yaml/pyyaml/pull/512 . +We want to ensure compatibility with yaml 1.2, while using yaml 1.1 for now to drop the ruamel.yaml dependency +in order to more easily run with pyodide. +For differences between yaml 1.1 and 1.2, see: +https://yaml.readthedocs.io/en/latest/pyyaml.html#defaulting-to-yaml-1-2-support +""" +import json + + +def test_unet2d_nuclei_broad_is_indifferent(unet2d_nuclei_broad_any_path): + from bioimageio.spec.shared.common import pyyaml_yaml, ruamel_yaml + + expected = ruamel_yaml.load(unet2d_nuclei_broad_any_path) + actual = pyyaml_yaml.load(unet2d_nuclei_broad_any_path) + + # ignore known difference: + # timestamp contains default utc timezone (tzinfo.utc) for ruamel.yaml, but not for PyYAML + expected.pop("timestamp") + actual.pop("timestamp") + + assert expected == actual + + +def test_flaoting_point_numbers_pyyaml(): + from bioimageio.spec.shared.common import pyyaml_yaml, ruamel_yaml + + expected = {"one": 1, "low": 0.000001} + data_str = json.dumps(expected) + actual = pyyaml_yaml.load(data_str) + assert expected == actual + + +# just to be sure we test ruamel.yaml as well... +def test_flaoting_point_numbers_ruamel(): + from bioimageio.spec.shared.common import ruamel_yaml + + expected = {"one": 1, "low": 0.000001} + data_str = json.dumps(expected) + actual = ruamel_yaml.load(data_str) + assert expected == actual + + +def test_compare_script(unet2d_nuclei_broad_any_path): + from scripts.compare_yaml_syntax import main + + diff = main(unet2d_nuclei_broad_any_path) + + assert diff == 1 # ignore difference for timestamp