Skip to content

Commit

Permalink
Merge pull request #212 from bioimage-io/remove_ruamel_yaml
Browse files Browse the repository at this point in the history
Remove ruamel.yaml dependency
  • Loading branch information
FynnBe authored Aug 2, 2021
2 parents 169f55f + 32fabe9 commit 65cdee7
Show file tree
Hide file tree
Showing 8 changed files with 160 additions and 21 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ This repository contains specifications defined by the BioImage.IO community. Th


All the BioImage.IO-compatible RDF must fulfill the following rules:
* Must be a YAML file encoded as UTF-8;
* Must be a YAML file encoded as UTF-8; If yaml syntax version is not specified to be 1.1 in the first line by `% YAML 1.1` it must be equivalent in yaml 1.1 and yaml 1.2. For differences see https://yaml.readthedocs.io/en/latest/pyyaml.html#differences-with-pyyaml.
* The RDF file extension must be `.yaml` (not `.yml`)
* The RDF file can be saved in a folder (or virtual folder) or in a zip package, the following additional rules must apply:
1. When stored in a local file system folder, github repo, zenodo deposition, blob storage virtual folder or similar kind, the RDF file name should match the pattern of `*.rdf.yaml`, for example `my-model.rdf.yaml`.
Expand Down
68 changes: 60 additions & 8 deletions bioimageio/spec/shared/common.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,77 @@
import dataclasses
import os
import pathlib
import re
import tempfile
import warnings
from collections import UserDict
from typing import Any, Dict, Generic, Optional

from ruamel.yaml import YAML

try:
from typing import Literal, get_args, get_origin, Protocol
except ImportError:
from typing_extensions import Literal, get_args, get_origin, Protocol # type: ignore

import yaml as _yaml


class PyYAML:
"""ruamel.yaml.YAML replacement. This uses PyYAML's yaml 1.1 implementation with some manually added yaml 1.2 'fixes'"""

def __init__(self, typ="safe"):
if typ != "safe":
raise NotImplementedError(typ)

# floating point 'fix' for yaml 1.1 from https://stackoverflow.com/a/30462009
self.loader = _yaml.SafeLoader
self.loader.add_implicit_resolver(
"tag:yaml.org,2002:float",
re.compile(
"""^(?:
[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)?
|[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+)
|\\.[0-9_]+(?:[eE][-+][0-9]+)?
|[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*
|[-+]?\\.(?:inf|Inf|INF)
|\\.(?:nan|NaN|NAN))$""",
re.X,
),
list("-+0123456789."),
)

def load(self, stream):
if isinstance(stream, os.PathLike):
with pathlib.Path(stream).open() as f:
return _yaml.load(f, Loader=self.loader)
else:
return _yaml.load(stream, Loader=self.loader)

@staticmethod
def dump(data, stream):
if isinstance(stream, os.PathLike):
with pathlib.Path(stream).open("w") as f:
return _yaml.dump(data, f)
else:
return _yaml.dump(data, stream)


pyyaml_yaml = PyYAML()

try:
from ruamel.yaml import YAML
except ImportError:
ruamel_yaml: Optional[YAML] = None
yaml = pyyaml_yaml
else:
ruamel_yaml = YAML(typ="safe")
yaml = ruamel_yaml


BIOIMAGEIO_CACHE_PATH = pathlib.Path(
os.getenv("BIOIMAGEIO_CACHE_PATH", pathlib.Path(tempfile.gettempdir()) / "bioimageio_cache")
)


def get_format_version_module(type_: str, format_version: str):
assert "." in format_version
Expand Down Expand Up @@ -66,13 +125,6 @@ def get_args_flat(tp):
return tuple(flat_args)


yaml = YAML(typ="safe")

BIOIMAGEIO_CACHE_PATH = pathlib.Path(
os.getenv("BIOIMAGEIO_CACHE_PATH", pathlib.Path(tempfile.gettempdir()) / "bioimageio_cache")
)


class Singleton(type):
_instances: dict = {}

Expand Down
44 changes: 44 additions & 0 deletions scripts/compare_yaml_syntax.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import json
import sys
from argparse import ArgumentParser
from pathlib import Path

from marshmallow_jsonschema import JSONSchema

import bioimageio.spec

from bioimageio.spec.shared.common import ruamel_yaml, pyyaml_yaml

if ruamel_yaml is None:
raise RuntimeError("Cannot compare yaml syntax without the ruamel.yaml package")


def parse_args():
p = ArgumentParser(
description="Check for differences between yaml 1.1 (using PyYAML) and yaml 1.2 syntax (using ruamel.yaml)."
)
p.add_argument("resource_description_path", type=Path)
args = p.parse_args()
return args


def main(resource_description_path: Path):

pyyaml = pyyaml_yaml.load(resource_description_path)
assert isinstance(pyyaml, dict)
ruamel = ruamel_yaml.load(resource_description_path)
assert isinstance(ruamel, dict)

diff = {key: (value, ruamel[key]) for key, value in pyyaml.items() if value != ruamel[key]}
if diff:
print(f"Found differences between yaml syntax 1.1/1.2 for {resource_description_path}:")
print(diff)
else:
print(f"No differences found between yaml syntax 1.1/1.2 for {resource_description_path}:")

return len(diff)


if __name__ == "__main__":
args = parse_args()
sys.exit(main(args.resource_description_path))
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,11 @@
"marshmallow-jsonschema",
"marshmallow-union",
"requests",
"ruamel.yaml",
"typer",
"typing-extensions",
],
entry_points={"console_scripts": ["bioimageio = bioimageio.spec.__main__:app"]},
extras_require={"test": ["pytest", "tox", "torch", "numpy", "mypy"], "dev": ["pre-commit"]},
extras_require={"test": ["pytest", "tox", "torch", "numpy", "mypy", "ruamel.yaml"], "dev": ["pre-commit"]},
scripts=["scripts/generate_docs.py"],
include_package_data=True,
project_urls={ # Optional
Expand Down
3 changes: 0 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from pathlib import Path

import pytest
from ruamel.yaml import YAML

from bioimageio import spec
from bioimageio.spec import export_resource_package
Expand All @@ -14,8 +13,6 @@
except ImportError:
from typing_extensions import get_args # type: ignore

yaml = YAML(typ="safe")


def get_unet2d_nuclei_broad_path(version: str):
assert isinstance(version, str), version
Expand Down
5 changes: 1 addition & 4 deletions tests/test_format_version_conversion.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
from dataclasses import asdict

from ruamel.yaml import YAML

from bioimageio.spec.model import schema

yaml = YAML(typ="safe")
from bioimageio.spec.shared import yaml


def test_model_format_version_conversion(unet2d_nuclei_broad_v0_1_0_path, unet2d_nuclei_broad_latest_path):
Expand Down
3 changes: 0 additions & 3 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,9 @@
from typing import Any

import pytest
from ruamel.yaml import YAML

from bioimageio.spec.shared import nodes, raw_nodes, utils

yaml = YAML(typ="safe")


@dataclass
class MyNode(nodes.Node):
Expand Down
53 changes: 53 additions & 0 deletions tests/test_yaml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""
Tests ruamel.yaml replacement.
Why?
PyYAML defaults to YAML 1.1 and does not yet support YAML 1.2 (which is the default for ruamel.yaml atm).
For yaml 1.2 in PyYAML see https://github.com/yaml/pyyaml/issues/116 and https://github.com/yaml/pyyaml/pull/512 .
We want to ensure compatibility with yaml 1.2, while using yaml 1.1 for now to drop the ruamel.yaml dependency
in order to more easily run with pyodide.
For differences between yaml 1.1 and 1.2, see:
https://yaml.readthedocs.io/en/latest/pyyaml.html#defaulting-to-yaml-1-2-support
"""
import json


def test_unet2d_nuclei_broad_is_indifferent(unet2d_nuclei_broad_any_path):
from bioimageio.spec.shared.common import pyyaml_yaml, ruamel_yaml

expected = ruamel_yaml.load(unet2d_nuclei_broad_any_path)
actual = pyyaml_yaml.load(unet2d_nuclei_broad_any_path)

# ignore known difference:
# timestamp contains default utc timezone (tzinfo.utc) for ruamel.yaml, but not for PyYAML
expected.pop("timestamp")
actual.pop("timestamp")

assert expected == actual


def test_flaoting_point_numbers_pyyaml():
from bioimageio.spec.shared.common import pyyaml_yaml, ruamel_yaml

expected = {"one": 1, "low": 0.000001}
data_str = json.dumps(expected)
actual = pyyaml_yaml.load(data_str)
assert expected == actual


# just to be sure we test ruamel.yaml as well...
def test_flaoting_point_numbers_ruamel():
from bioimageio.spec.shared.common import ruamel_yaml

expected = {"one": 1, "low": 0.000001}
data_str = json.dumps(expected)
actual = ruamel_yaml.load(data_str)
assert expected == actual


def test_compare_script(unet2d_nuclei_broad_any_path):
from scripts.compare_yaml_syntax import main

diff = main(unet2d_nuclei_broad_any_path)

assert diff == 1 # ignore difference for timestamp

0 comments on commit 65cdee7

Please sign in to comment.