diff --git a/0.4/examples/bf2raw/.config.json b/0.4/examples/bf2raw/.config.json new file mode 100644 index 00000000..2525328c --- /dev/null +++ b/0.4/examples/bf2raw/.config.json @@ -0,0 +1,3 @@ +{ + "schema": "schemas/bf2raw.schema" +} diff --git a/0.4/examples/bf2raw/image.json b/0.4/examples/bf2raw/image.json new file mode 100644 index 00000000..c5eadb8a --- /dev/null +++ b/0.4/examples/bf2raw/image.json @@ -0,0 +1,3 @@ +{ + "bioformats2raw.layout" : 3 +} \ No newline at end of file diff --git a/0.4/examples/bf2raw/plate.json b/0.4/examples/bf2raw/plate.json new file mode 100644 index 00000000..bd98a16e --- /dev/null +++ b/0.4/examples/bf2raw/plate.json @@ -0,0 +1,22 @@ +{ + "bioformats2raw.layout" : 3, + "plate" : { + "columns" : [ { + "name" : "1" + } ], + "name" : "Plate Name 0", + "wells" : [ { + "path" : "A/1", + "rowIndex" : 0, + "columnIndex" : 0 + } ], + "field_count" : 1, + "rows" : [ { + "name" : "A" + } ], + "acquisitions" : [ { + "id" : 0 + } ], + "version" : "0.4" + } +} diff --git a/0.4/examples/ome/.config.json b/0.4/examples/ome/.config.json new file mode 100644 index 00000000..8a611ccf --- /dev/null +++ b/0.4/examples/ome/.config.json @@ -0,0 +1,3 @@ +{ + "schema": "schemas/ome.schema" +} diff --git a/0.4/examples/ome/series-2.json b/0.4/examples/ome/series-2.json new file mode 100644 index 00000000..be7e9ed6 --- /dev/null +++ b/0.4/examples/ome/series-2.json @@ -0,0 +1,3 @@ +{ + "series" : [ "0", "1" ] +} diff --git a/0.4/index.bs b/0.4/index.bs index 18a6656f..d39b2c5f 100644 --- a/0.4/index.bs +++ b/0.4/index.bs @@ -99,6 +99,14 @@ The key words “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL “RECOMMENDED”, “MAY”, and “OPTIONAL” are to be interpreted as described in [RFC 2119](https://tools.ietf.org/html/rfc2119). +

+Transitional metadata is added to the specification with the +intention of removing it in the future. Implementations may be expected (MUST) or +encouraged (SHOULD) to support the reading of the data, but writing will usually +be optional (MAY). Examples of transitional metadata include custom additions by +implementations that are later submitted as a formal specification. (See [[#bf2raw]]) +

+ Some of the JSON examples in this document include commments. However, these are only for clarity purposes and comments MUST NOT be included in JSON objects. @@ -240,6 +248,85 @@ keys as specified below for discovering certain types of data, especially images If part of [[#multiscale-md]], the length of "axes" MUST be equal to the number of dimensions of the arrays that contain the image data. +"bioformats2raw.layout" (transitional) {#bf2raw} +------------------------------------------------ + +[=Transitional=] "bioformats2raw.layout" metadata identifies a group which implicitly describes a series of images. +The need for the collection stems from the common "multi-image file" scenario in microscopy. Parsers like Bio-Formats +define a strict, stable ordering of the images in a single container that can be used to refer to them by other tools. + +In order to capture that information within an OME-NGFF dataset, `bioformats2raw` internally introduced a wrapping layer. +The bioformats2raw layout has been added to v0.4 as a transitional specification to specify filesets that already exist +in the wild. An upcoming NGFF specification will replace this layout with explicit metadata. + +

Layout

+ +Typical Zarr layout produced by running `bioformats2raw` on a fileset that contains more than one image (series > 1): + +
+series.ome.zarr               # One converted fileset from bioformats2raw
+    ├── .zgroup
+    ├── .zattrs               # Contains "bioformats2raw.layout" metadata
+    ├── OME                   # Special group for containing OME metadata
+    │   ├── .zgroup
+    │   ├── .zattrs           # Contains "series" metadata
+    │   └── METADATA.ome.xml  # OME-XML file stored within the Zarr fileset
+    ├── 0                     # First image in the collection
+    ├── 1                     # Second image in the collection
+    └── ...
+
+ +

Attributes

+ +The top-level `.zattrs` file must contain the `bioformats2raw.layout` key: +
+path: examples/bf2raw/image.json
+highlight: json
+
+ +If the top-level group represents a plate, the `bioformats2raw.layout` metadata will be present but +the "plate" key MUST also be present, takes precedence and parsing of such datasets should follow [[#plate-md]]. It is not +possible to mix collections of images with plates at present. + +
+path: examples/bf2raw/plate.json
+highlight: json
+
+ +The `.zattrs` file within the OME group may contain the "series" key: + +
+path: examples/ome/series-2.json
+highlight: json
+
+ +

Details

+ +Conforming groups: + +- MUST have the value "3" for the "bioformats2raw.layout" key in their `.zattrs` metadata at the top of the hierarchy; +- SHOULD have OME metadata representing the entire collection of images in a file named "OME/METADATA.ome.xml" which: + - MUST adhere to the OME-XML specification but + - MUST use `` elements as opposed to ``, `` or ``; + - MAY make use of the [minimum specification](https://docs.openmicroscopy.org/ome-model/6.2.2/specifications/minimum.html). + +Additionally, the logic for finding the Zarr group for each image follows the following logic: + +- If "plate" metadata is present, images MUST be located at the defined location. + - Matching "series" metadata (as described next) SHOULD be provided for tools that are unaware of the "plate" specification. +- If the "OME" Zarr group exists, it: + - MAY contain a "series" attribute. If so: + - "series" MUST be a list of string objects, each of which is a path to an image group. + - The order of the paths MUST match the order of the "Image" elements in "OME/METADATA.ome.xml" if provided. +- If the "series" attribute does not exist and no "plate" is present: + - separate "multiscales" images MUST be stored in consecutively numbered groups starting from 0 (i.e. "0/", "1/", "2/", "3/", ...). +- Every "multiscales" group MUST represent exactly one OME-XML "Image" in the same order as either the series index or the group numbers. + +Conforming readers: +- SHOULD make users aware of the presence of more than one image (i.e. SHOULD NOT default to only opening the first image); +- MAY use the "series" attribute in the "OME" group to determine a list of valid groups to display; +- MAY choose to show all images within the collection or offer the user a choice of images, as with HCS plates; +- MAY ignore other groups or arrays under the root of the hierarchy. "coordinateTransformations" metadata {#trafo-md} ------------------------------------- @@ -315,10 +402,10 @@ if not datasets: datasets = [x["path"] for x in multiscales[0]["datasets"]] ``` -"omero" metadata {#omero-md} ----------------------------- +"omero" metadata (transitional) {#omero-md} +------------------------------------------- -Information specific to the channels of an image and how to render it +[=Transitional=] information specific to the channels of an image and how to render it can be found under the "omero" key in the group-level metadata: ```json @@ -581,6 +668,11 @@ Version History {#history} Description + + 0.4.1 + 2022-09-26 + transitional metadata for image collections ("bioformats2raw.layout") + 0.4.0 2022-02-08 diff --git a/0.4/schemas/bf2raw.schema b/0.4/schemas/bf2raw.schema new file mode 100644 index 00000000..834aee24 --- /dev/null +++ b/0.4/schemas/bf2raw.schema @@ -0,0 +1,14 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://ngff.openmicroscopy.org/latest/schemas/bf2raw.schema", + "title": "NGFF container produced by bioformats2raw", + "description": "JSON from OME-NGFF .zattrs", + "type": "object", + "properties": { + "bioformats2raw.layout": { + "description": "The top-level identifier metadata added by bioformats2raw", + "type": "number", + "enum": [3] + } + } +} diff --git a/0.4/schemas/ome.schema b/0.4/schemas/ome.schema new file mode 100644 index 00000000..bd600a2a --- /dev/null +++ b/0.4/schemas/ome.schema @@ -0,0 +1,17 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://ngff.openmicroscopy.org/latest/schemas/ome.schema", + "title": "NGFF group produced by bioformats2raw to contain OME metadata", + "description": "JSON from OME-NGFF OME/.zattrs linked to an OME-XML file", + "type": "object", + "properties": { + "series": { + "description": "An array of the same length and the same order as the images defined in the OME-XML", + "type": "array", + "items": { + "type": "string" + }, + "minContains": 1 + } + } +} diff --git a/latest/examples/bf2raw/.config.json b/latest/examples/bf2raw/.config.json new file mode 100644 index 00000000..2525328c --- /dev/null +++ b/latest/examples/bf2raw/.config.json @@ -0,0 +1,3 @@ +{ + "schema": "schemas/bf2raw.schema" +} diff --git a/latest/examples/bf2raw/image.json b/latest/examples/bf2raw/image.json new file mode 100644 index 00000000..c5eadb8a --- /dev/null +++ b/latest/examples/bf2raw/image.json @@ -0,0 +1,3 @@ +{ + "bioformats2raw.layout" : 3 +} \ No newline at end of file diff --git a/latest/examples/bf2raw/plate.json b/latest/examples/bf2raw/plate.json new file mode 100644 index 00000000..bd98a16e --- /dev/null +++ b/latest/examples/bf2raw/plate.json @@ -0,0 +1,22 @@ +{ + "bioformats2raw.layout" : 3, + "plate" : { + "columns" : [ { + "name" : "1" + } ], + "name" : "Plate Name 0", + "wells" : [ { + "path" : "A/1", + "rowIndex" : 0, + "columnIndex" : 0 + } ], + "field_count" : 1, + "rows" : [ { + "name" : "A" + } ], + "acquisitions" : [ { + "id" : 0 + } ], + "version" : "0.4" + } +} diff --git a/latest/examples/ome/.config.json b/latest/examples/ome/.config.json new file mode 100644 index 00000000..8a611ccf --- /dev/null +++ b/latest/examples/ome/.config.json @@ -0,0 +1,3 @@ +{ + "schema": "schemas/ome.schema" +} diff --git a/latest/examples/ome/series-2.json b/latest/examples/ome/series-2.json new file mode 100644 index 00000000..be7e9ed6 --- /dev/null +++ b/latest/examples/ome/series-2.json @@ -0,0 +1,3 @@ +{ + "series" : [ "0", "1" ] +} diff --git a/latest/index.bs b/latest/index.bs index 3ed6d7d7..bb935c48 100644 --- a/latest/index.bs +++ b/latest/index.bs @@ -101,6 +101,14 @@ The key words “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL “RECOMMENDED”, “MAY”, and “OPTIONAL” are to be interpreted as described in [RFC 2119](https://tools.ietf.org/html/rfc2119). +

+Transitional metadata is added to the specification with the +intention of removing it in the future. Implementations may be expected (MUST) or +encouraged (SHOULD) to support the reading of the data, but writing will usually +be optional (MAY). Examples of transitional metadata include custom additions by +implementations that are later submitted as a formal specification. (See [[#bf2raw]]) +

+ Some of the JSON examples in this document include commments. However, these are only for clarity purposes and comments MUST NOT be included in JSON objects. @@ -242,9 +250,89 @@ keys as specified below for discovering certain types of data, especially images If part of [[#multiscale-md]], the length of "axes" MUST be equal to the number of dimensions of the arrays that contain the image data. +"bioformats2raw.layout" (transitional) {#bf2raw} +------------------------------------------------ + +[=Transitional=] "bioformats2raw.layout" metadata identifies a group which implicitly describes a series of images. +The need for the collection stems from the common "multi-image file" scenario in microscopy. Parsers like Bio-Formats +define a strict, stable ordering of the images in a single container that can be used to refer to them by other tools. + +In order to capture that information within an OME-NGFF dataset, `bioformats2raw` internally introduced a wrapping layer. +The bioformats2raw layout has been added to v0.4 as a transitional specification to specify filesets that already exist +in the wild. An upcoming NGFF specification will replace this layout with explicit metadata. + +

Layout

+ +Typical Zarr layout produced by running `bioformats2raw` on a fileset that contains more than one image (series > 1): + +
+series.ome.zarr               # One converted fileset from bioformats2raw
+    ├── .zgroup
+    ├── .zattrs               # Contains "bioformats2raw.layout" metadata
+    ├── OME                   # Special group for containing OME metadata
+    │   ├── .zgroup
+    │   ├── .zattrs           # Contains "series" metadata
+    │   └── METADATA.ome.xml  # OME-XML file stored within the Zarr fileset
+    ├── 0                     # First image in the collection
+    ├── 1                     # Second image in the collection
+    └── ...
+
+ +

Attributes

+ +The top-level `.zattrs` file must contain the `bioformats2raw.layout` key: +
+path: examples/bf2raw/image.json
+highlight: json
+
+ +If the top-level group represents a plate, the `bioformats2raw.layout` metadata will be present but +the "plate" key MUST also be present, takes precedence and parsing of such datasets should follow [[#plate-md]]. It is not +possible to mix collections of images with plates at present. + +
+path: examples/bf2raw/plate.json
+highlight: json
+
+ +The `.zattrs` file within the OME group may contain the "series" key: + +
+path: examples/ome/series-2.json
+highlight: json
+
+ +

Details

+ +Conforming groups: + +- MUST have the value "3" for the "bioformats2raw.layout" key in their `.zattrs` metadata at the top of the hierarchy; +- SHOULD have OME metadata representing the entire collection of images in a file named "OME/METADATA.ome.xml" which: + - MUST adhere to the OME-XML specification but + - MUST use `` elements as opposed to ``, `` or ``; + - MAY make use of the [minimum specification](https://docs.openmicroscopy.org/ome-model/6.2.2/specifications/minimum.html). + +Additionally, the logic for finding the Zarr group for each image follows the following logic: + +- If "plate" metadata is present, images MUST be located at the defined location. + - Matching "series" metadata (as described next) SHOULD be provided for tools that are unaware of the "plate" specification. +- If the "OME" Zarr group exists, it: + - MAY contain a "series" attribute. If so: + - "series" MUST be a list of string objects, each of which is a path to an image group. + - The order of the paths MUST match the order of the "Image" elements in "OME/METADATA.ome.xml" if provided. +- If the "series" attribute does not exist and no "plate" is present: + - separate "multiscales" images MUST be stored in consecutively numbered groups starting from 0 (i.e. "0/", "1/", "2/", "3/", ...). +- Every "multiscales" group MUST represent exactly one OME-XML "Image" in the same order as either the series index or the group numbers. + +Conforming readers: +- SHOULD make users aware of the presence of more than one image (i.e. SHOULD NOT default to only opening the first image); +- MAY use the "series" attribute in the "OME" group to determine a list of valid groups to display; +- MAY choose to show all images within the collection or offer the user a choice of images, as with HCS plates; +- MAY ignore other groups or arrays under the root of the hierarchy. + "coordinateTransformations" metadata {#trafo-md} -------------------------------------- +------------------------------------------------ "coordinateTransformations" describe a series of transformations that map between two coordinate spaces (defined by "axes"). For example, to map a discrete data space of an array to the corresponding physical space. @@ -317,10 +405,10 @@ if not datasets: datasets = [x["path"] for x in multiscales[0]["datasets"]] ``` -"omero" metadata {#omero-md} ----------------------------- +"omero" metadata (transitional) {#omero-md} +------------------------------------------- -Information specific to the channels of an image and how to render it +[=Transitional=] information specific to the channels of an image and how to render it can be found under the "omero" key in the group-level metadata: ```json @@ -583,6 +671,11 @@ Version History {#history} Description + + 0.4.1 + 2022-09-26 + transitional metadata for image collections ("bioformats2raw.layout") + 0.4.0 2022-02-08 diff --git a/latest/schemas/bf2raw.schema b/latest/schemas/bf2raw.schema new file mode 100644 index 00000000..834aee24 --- /dev/null +++ b/latest/schemas/bf2raw.schema @@ -0,0 +1,14 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://ngff.openmicroscopy.org/latest/schemas/bf2raw.schema", + "title": "NGFF container produced by bioformats2raw", + "description": "JSON from OME-NGFF .zattrs", + "type": "object", + "properties": { + "bioformats2raw.layout": { + "description": "The top-level identifier metadata added by bioformats2raw", + "type": "number", + "enum": [3] + } + } +} diff --git a/latest/schemas/ome.schema b/latest/schemas/ome.schema new file mode 100644 index 00000000..bd600a2a --- /dev/null +++ b/latest/schemas/ome.schema @@ -0,0 +1,17 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://ngff.openmicroscopy.org/latest/schemas/ome.schema", + "title": "NGFF group produced by bioformats2raw to contain OME metadata", + "description": "JSON from OME-NGFF OME/.zattrs linked to an OME-XML file", + "type": "object", + "properties": { + "series": { + "description": "An array of the same length and the same order as the images defined in the OME-XML", + "type": "array", + "items": { + "type": "string" + }, + "minContains": 1 + } + } +} diff --git a/latest/tests/test_validation.py b/latest/tests/test_validation.py index 9e441fad..8c13b113 100644 --- a/latest/tests/test_validation.py +++ b/latest/tests/test_validation.py @@ -89,3 +89,17 @@ def test_run(suite): resolver = RefResolver.from_schema(suite.schema, store=schema_store) validator = Validator(suite.schema, resolver=resolver) suite.validate(validator) + + +def test_example_configs(): + """ + Test that all example folders have a config file + """ + missing = [] + for subdir in os.walk("examples"): + has_examples = glob.glob(f"{subdir[0]}/*.json") + has_config = glob.glob(f"{subdir[0]}/.config.json") + if has_examples and not has_config: + missing.append(subdir[0]) + if missing: + raise Exception(f"Directories missing configs: {missing}")