pydata · shoyer · Sep 22, 2020 · Apr 23, 2020 · Apr 23, 2020 · Apr 23, 2020
diff --git a/doc/io.rst b/doc/io.rst
@@ -898,11 +898,11 @@ can be omitted as it will internally be set to ``'a'``.
 To store variable length strings use ``dtype=object``.
 
 To read back a zarr dataset that has been created this way, we use the
-:py:func:`open_zarr` method:
+:py:func:`open_dataset` method with ``engine="zarr"``:
 
 .. ipython:: python
 
-    ds_zarr = xr.open_zarr("path/to/directory.zarr")
+    ds_zarr = xr.open_dataset("path/to/directory.zarr", engine="zarr")
     ds_zarr
 
 Cloud Storage Buckets
@@ -919,7 +919,7 @@ pass to xarray::
     # write to the bucket
     ds.to_zarr(store=gcsmap)
     # read it back
-    ds_gcs = xr.open_zarr(gcsmap)
+    ds_gcs = xr.open_dataset(gcsmap, engine="zarr")
 
 .. _Zarr: http://zarr.readthedocs.io/
 .. _Amazon S3: https://aws.amazon.com/s3/
@@ -970,12 +970,12 @@ with consolidated metadata. To write consolidated metadata, pass the
 ``consolidated=True`` option to the
 :py:attr:`Dataset.to_zarr` method::
 
-    ds.to_zarr('foo.zarr', consolidated=True)
+    ds.to_zarr("foo.zarr", consolidated=True)
 
-To read a consolidated store, pass the ``consolidated=True`` option to
-:py:func:`open_zarr`::
+To read a consolidated store, pass ``{"consolidated": True}`` to the
+backend_kwargs option when using :py:func:`open_dataset` with ``engine="zarr"``::
 
-    ds = xr.open_zarr('foo.zarr', consolidated=True)
+    ds = xr.open_dataset("foo.zarr", engine="zarr", backend_kwargs={"consolidated": True})
 
 Xarray can't perform consolidation on pre-existing zarr datasets. This should
 be done directly from zarr, as described in the

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -54,6 +54,9 @@ New Features
   of :py:class:`DataArray` and :py:class:`Dataset` objects and
   document the new method in :doc:`internals`. (:pull:`4248`).
   By `Justus Magin <https://github.com/keewis>`_.
+- :py:func:`open_dataset` and :py:func:`open_mfdataset`
+  now works with ``engine="zarr"`` (:issue:`3668`, :pull:`4003`, :pull:`4187`).
+  By `Miguel Jimenez <https://github.com/Mikejmnez>`_ and `Wei Ji Leong <https://github.com/weiji14>`_.
 - Add support for parsing datetime strings formatted following the default
   string representation of cftime objects, i.e. YYYY-MM-DD hh:mm:ss, in
   partial datetime string indexing, as well as :py:meth:`~xarray.cftime_range`

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -1,5 +1,6 @@
 import os.path
 import warnings
+from collections.abc import MutableMapping
 from glob import glob
 from io import BytesIO
 from numbers import Number
@@ -344,12 +345,12 @@ def open_dataset(
         If True, decode the 'coordinates' attribute to identify coordinates in
         the resulting dataset.
     engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", "cfgrib", \
-        "pseudonetcdf"}, optional
+        "pseudonetcdf", "zarr"}, optional
         Engine to use when reading files. If not provided, the default engine
         is chosen based on available dependencies, with a preference for
         "netcdf4".
     chunks : int or dict, optional
-        If chunks is provided, it used to load the new dataset into dask
+        If chunks is provided, it is used to load the new dataset into dask
         arrays. ``chunks={}`` loads the dataset with dask using a single
         chunk for all arrays.
     lock : False or lock-like, optional
@@ -413,6 +414,7 @@ def open_dataset(
         "pynio",
         "cfgrib",
         "pseudonetcdf",
+        "zarr",
     ]
     if engine not in engines:
         raise ValueError(
@@ -447,7 +449,7 @@ def open_dataset(
     if backend_kwargs is None:
         backend_kwargs = {}
 
-    def maybe_decode_store(store, lock=False):
+    def maybe_decode_store(store, chunks, lock=False):
         ds = conventions.decode_cf(
             store,
             mask_and_scale=mask_and_scale,
@@ -461,7 +463,7 @@ def maybe_decode_store(store, lock=False):
 
         _protect_dataset_variables_inplace(ds, cache)
 
-        if chunks is not None:
+        if chunks is not None and engine != "zarr":
             from dask.base import tokenize
 
             # if passed an actual file path, augment the token with
@@ -487,10 +489,40 @@ def maybe_decode_store(store, lock=False):
             )
             name_prefix = "open_dataset-%s" % token
             ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token)
-            ds2._file_obj = ds._file_obj
+
+        elif engine == "zarr":
+            # adapted from Dataset.Chunk() and taken from open_zarr
+            if not (isinstance(chunks, (int, dict)) or chunks is None):
+                if chunks != "auto":
+                    raise ValueError(
+                        "chunks must be an int, dict, 'auto', or None. "
+                        "Instead found %s. " % chunks
+                    )
+
+            if chunks == "auto":
+                try:
+                    import dask.array  # noqa
+                except ImportError:
+                    chunks = None
+
+            # auto chunking needs to be here and not in ZarrStore because
+            # the variable chunks does not survive decode_cf
+            # return trivial case
+            if not chunks:  # e.g. chunks is 0, None or {}
+                return ds
+
+            if isinstance(chunks, int):
+                chunks = dict.fromkeys(ds.dims, chunks)
+
+            variables = {
+                k: store.maybe_chunk(k, v, chunks, overwrite_encoded_chunks)
+                for k, v in ds.variables.items()
+            }
+            ds2 = ds._replace(variables)
+
         else:
             ds2 = ds
-
+        ds2._file_obj = ds._file_obj
         return ds2
 
     if isinstance(filename_or_obj, Path):
@@ -499,6 +531,17 @@ def maybe_decode_store(store, lock=False):
     if isinstance(filename_or_obj, AbstractDataStore):
         store = filename_or_obj
 
+    elif isinstance(filename_or_obj, MutableMapping) and engine == "zarr":
+        # Zarr supports a wide range of access modes, but for now xarray either
+        # reads or writes from a store, never both.
+        # For open_dataset(engine="zarr"), we only read (i.e. mode="r")
+        mode = "r"
+        _backend_kwargs = backend_kwargs.copy()
+        overwrite_encoded_chunks = _backend_kwargs.pop("overwrite_encoded_chunks", None)
+        store = backends.ZarrStore.open_group(
+            filename_or_obj, mode=mode, group=group, **_backend_kwargs
+        )
+
     elif isinstance(filename_or_obj, str):
         filename_or_obj = _normalize_path(filename_or_obj)
 
@@ -526,7 +569,16 @@ def maybe_decode_store(store, lock=False):
             store = backends.CfGribDataStore(
                 filename_or_obj, lock=lock, **backend_kwargs
             )
-
+        elif engine == "zarr":
+            # on ZarrStore, mode='r', synchronizer=None, group=None,
+            # consolidated=False.
+            _backend_kwargs = backend_kwargs.copy()
+            overwrite_encoded_chunks = _backend_kwargs.pop(
+                "overwrite_encoded_chunks", None
+            )
+            store = backends.ZarrStore.open_group(
+                filename_or_obj, group=group, **_backend_kwargs
+            )
     else:
         if engine not in [None, "scipy", "h5netcdf"]:
             raise ValueError(
@@ -542,7 +594,7 @@ def maybe_decode_store(store, lock=False):
             )
 
     with close_on_error(store):
-        ds = maybe_decode_store(store)
+        ds = maybe_decode_store(store, chunks)
 
     # Ensure source filename always stored in dataset object (GH issue #2550)
     if "source" not in ds.encoding:
@@ -794,7 +846,7 @@ def open_mfdataset(
         If provided, call this function on each dataset prior to concatenation.
         You can find the file-name from which each dataset was loaded in
         ``ds.encoding["source"]``.
-    engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", "cfgrib"}, \
+    engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", "cfgrib", "zarr"}, \
         optional
         Engine to use when reading files. If not provided, the default engine
         is chosen based on available dependencies, with a preference for