pydata · shoyer · Apr 14, 2022 · Mar 25, 2022 · Mar 25, 2022 · Mar 25, 2022
diff --git a/doc/internals/zarr-encoding-spec.rst b/doc/internals/zarr-encoding-spec.rst
@@ -32,9 +32,11 @@ the variable dimension names and then removed from the attributes dictionary
 returned to the user.
 
 Because of these choices, Xarray cannot read arbitrary array data, but only
-Zarr data with valid ``_ARRAY_DIMENSIONS`` attributes on each array.
+Zarr data with valid ``_ARRAY_DIMENSIONS`` or
+`NCZarr <https://docs.unidata.ucar.edu/nug/current/nczarr_head.html>`_ attributes
+on each array (NCZarr dimension names are defined in the ``.zarray`` file).
 
-After decoding the ``_ARRAY_DIMENSIONS`` attribute and assigning the variable
+After decoding the ``_ARRAY_DIMENSIONS`` or NCZarr attribute and assigning the variable
 dimensions, Xarray proceeds to [optionally] decode each variable using its
 standard CF decoding machinery used for NetCDF data (see :py:func:`decode_cf`).
 

diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst
@@ -519,7 +519,7 @@ the ability to store and analyze datasets far too large fit onto disk
 Xarray can't open just any zarr dataset, because xarray requires special
 metadata (attributes) describing the dataset dimensions and coordinates.
 At this time, xarray can only open zarr datasets that have been written by
-xarray. For implementation details, see :ref:`zarr_encoding`.
+xarray or `NCZarr`_. For implementation details, see :ref:`zarr_encoding`.
 
 To write a dataset with zarr, we use the :py:meth:`Dataset.to_zarr` method.
 
@@ -548,6 +548,11 @@ store is already present at that path, an error will be raised, preventing it
 from being overwritten. To override this behavior and overwrite an existing
 store, add ``mode='w'`` when invoking :py:meth:`~Dataset.to_zarr`.
 
+.. note::
+
+    xarray does not write NCZarr attributes. Therefore, NCZarr data must be
+    opened in read-only mode.
+
 To store variable length strings, convert them to object arrays first with
 ``dtype=object``.
 
@@ -606,6 +611,7 @@ instance and pass this, as follows:
 .. _Amazon S3: https://aws.amazon.com/s3/
 .. _Google Cloud Storage: https://cloud.google.com/storage/
 .. _gcsfs: https://github.com/fsspec/gcsfs
+.. _NCZarr: https://docs.unidata.ucar.edu/nug/current/nczarr_head.html
 
 Zarr Compressors and Filters
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -22,6 +22,8 @@ v2022.03.1 (unreleased)
 New Features
 ~~~~~~~~~~~~
 
+- The `zarr` backend is now able to read NCZarr.
+  By `Mattia Almansi <https://github.com/malmans2>`_.
 - Add a weighted ``quantile`` method to :py:class:`~core.weighted.DatasetWeighted` and
   :py:class:`~core.weighted.DataArrayWeighted` (:pull:`6059`). By
   `Christian Jauvin <https://github.com/cjauvin>`_ and `David Huard <https://github.com/huard>`_.

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
@@ -1,3 +1,4 @@
+import json
 import os
 import warnings
 
@@ -178,19 +179,37 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
     raise AssertionError("We should never get here. Function logic must be wrong.")
 
 
-def _get_zarr_dims_and_attrs(zarr_obj, dimension_key):
+def _get_zarr_dims_and_attrs(zarr_obj, dimension_key, try_nczarr):
     # Zarr arrays do not have dimensions. To get around this problem, we add
     # an attribute that specifies the dimension. We have to hide this attribute
     # when we send the attributes to the user.
     # zarr_obj can be either a zarr group or zarr array
     try:
+        # Xarray-Zarr
         dimensions = zarr_obj.attrs[dimension_key]
-    except KeyError:
-        raise KeyError(
-            f"Zarr object is missing the attribute `{dimension_key}`, which is "
-            "required for xarray to determine variable dimensions."
-        )
-    attributes = HiddenKeyDict(zarr_obj.attrs, [dimension_key])
+    except KeyError as e:
+        if not try_nczarr:
+            raise KeyError(
+                f"Zarr object is missing the attribute `{dimension_key}`, which is "
+                "required for xarray to determine variable dimensions."
+            ) from e
+
+        # NCZarr defines dimensions through metadata in .zarray
+        zarray_path = os.path.join(zarr_obj.path, ".zarray")
+        zarray = json.loads(zarr_obj.store[zarray_path])
+        try:
+            # NCZarr uses Fully Qualified Names
+            dimensions = [
+                os.path.basename(dim) for dim in zarray["_NCZARR_ARRAY"]["dimrefs"]
+            ]
+        except KeyError as e:
+            raise KeyError(
+                f"Zarr object is missing the attribute `{dimension_key}` and the NCZarr metadata, "
+                "which are required for xarray to determine variable dimensions."
+            ) from e
+
+    nc_attrs = [attr for attr in zarr_obj.attrs if attr.startswith("_NC")]
+    attributes = HiddenKeyDict(zarr_obj.attrs, [dimension_key] + nc_attrs)
     return dimensions, attributes
 
 
@@ -409,7 +428,10 @@ def ds(self):
 
     def open_store_variable(self, name, zarr_array):
         data = indexing.LazilyIndexedArray(ZarrArrayWrapper(name, self))
-        dimensions, attributes = _get_zarr_dims_and_attrs(zarr_array, DIMENSION_KEY)
+        try_nczarr = self._mode == "r"
+        dimensions, attributes = _get_zarr_dims_and_attrs(
+            zarr_array, DIMENSION_KEY, try_nczarr
+        )
         attributes = dict(attributes)
         encoding = {
             "chunks": zarr_array.chunks,
@@ -430,26 +452,24 @@ def get_variables(self):
         )
 
     def get_attrs(self):
-        return dict(self.zarr_group.attrs.asdict())
+        return {
+            k: v
+            for k, v in self.zarr_group.attrs.asdict().items()
+            if not k.startswith("_NC")
+        }
 
     def get_dimensions(self):
+        try_nczarr = self._mode == "r"
         dimensions = {}
         for k, v in self.zarr_group.arrays():
-            try:
-                for d, s in zip(v.attrs[DIMENSION_KEY], v.shape):
-                    if d in dimensions and dimensions[d] != s:
-                        raise ValueError(
-                            f"found conflicting lengths for dimension {d} "
-                            f"({s} != {dimensions[d]})"
-                        )
-                    dimensions[d] = s
-
-            except KeyError:
-                raise KeyError(
-                    f"Zarr object is missing the attribute `{DIMENSION_KEY}`, "
-                    "which is required for xarray to determine "
-                    "variable dimensions."
-                )
+            dim_names, _ = _get_zarr_dims_and_attrs(v, DIMENSION_KEY, try_nczarr)
+            for d, s in zip(dim_names, v.shape):
+                if d in dimensions and dimensions[d] != s:
+                    raise ValueError(
+                        f"found conflicting lengths for dimension {d} "
+                        f"({s} != {dimensions[d]})"
+                    )
+                dimensions[d] = s
         return dimensions
 
     def set_dimensions(self, variables, unlimited_dims=None):
@@ -645,7 +665,7 @@ def open_zarr(
 
     The `store` object should be a valid store for a Zarr group. `store`
     variables must contain dimension metadata encoded in the
-    `_ARRAY_DIMENSIONS` attribute.
+    `_ARRAY_DIMENSIONS` attribute or must have NCZarr format.
 
     Parameters
     ----------

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -4,6 +4,7 @@
 import math
 import os.path
 import pickle
+import platform
 import re
 import shutil
 import sys
@@ -5427,3 +5428,51 @@ def test_write_file_from_np_str(str_type, tmpdir) -> None:
     txr = tdf.to_xarray()
 
     txr.to_netcdf(tmpdir.join("test.nc"))
+
+
+@requires_zarr
+@requires_netCDF4
+class TestNCZarr:
+    @staticmethod
+    def _create_nczarr(filename):
+        netcdfc_version = Version(nc4.getlibversion().split()[0])
+        if netcdfc_version < Version("4.8.1"):
+            pytest.skip("requires netcdf-c>=4.8.1")
+        if (platform.system() == "Windows") and (netcdfc_version == Version("4.8.1")):
+            # Bug in netcdf-c==4.8.1 (typo: Nan instead of NaN)
+            # https://github.com/Unidata/netcdf-c/issues/2265
+            pytest.skip("netcdf-c==4.8.1 has issues on Windows")
+
+        ds = create_test_data()
+        # Drop dim3: netcdf-c does not support dtype='<U1'
+        # https://github.com/Unidata/netcdf-c/issues/2259
+        ds = ds.drop_vars("dim3")
+
+        # netcdf-c>4.8.1 will add _ARRAY_DIMENSIONS by default
+        mode = "nczarr" if netcdfc_version == Version("4.8.1") else "nczarr,noxarray"
+        ds.to_netcdf(f"file://{filename}#mode={mode}")
+        return ds
+
+    def test_open_nczarr(self):
+        with create_tmp_file(suffix=".zarr") as tmp:
+            expected = self._create_nczarr(tmp)
+            actual = xr.open_zarr(tmp, consolidated=False)
+            assert_identical(expected, actual)
+
+    def test_overwriting_nczarr(self):
+        with create_tmp_file(suffix=".zarr") as tmp:
+            ds = self._create_nczarr(tmp)
+            expected = ds[["var1"]]
+            expected.to_zarr(tmp, mode="w")
+            actual = xr.open_zarr(tmp, consolidated=False)
+            assert_identical(expected, actual)
+
+    @pytest.mark.parametrize("mode", ["a", "r+"])
+    @pytest.mark.filterwarnings("ignore:.*non-consolidated metadata.*")
+    def test_raise_writing_to_nczarr(self, mode):
+        with create_tmp_file(suffix=".zarr") as tmp:
+            ds = self._create_nczarr(tmp)
+            with pytest.raises(
+                KeyError, match="missing the attribute `_ARRAY_DIMENSIONS`,"
+            ):
+                ds.to_zarr(tmp, mode=mode)