pydata · dcherian · Feb 16, 2021 · Jan 18, 2021 · Jan 18, 2021 · Jan 18, 2021
diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -870,13 +870,19 @@ def open_mfdataset(
     """
     if isinstance(paths, str):
         if is_remote_uri(paths):
-            raise ValueError(
-                "cannot do wild-card matching for paths that are remote URLs: "
-                "{!r}. Instead, supply paths as an explicit list of strings.".format(
-                    paths
-                )
+            from fsspec.core import get_fs_token_paths
+
+            # get_fs_token_paths also allows arguments embedded in URLs
+            fs, _, _ = get_fs_token_paths(
+                paths,
+                mode="rb",
+                storage_options=kwargs.get("backend_kwargs", {}).get("storage_options"),
+                expand=False,
             )
-        paths = sorted(glob(_normalize_path(paths)))
+            paths = fs.glob(fs._strip_protocol(paths))
+            paths = [fs.get_mapper(path) for path in paths]
+        else:
+            paths = sorted(glob(_normalize_path(paths)))
     else:
         paths = [str(p) if isinstance(p, Path) else p for p in paths]
 

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
@@ -3,6 +3,8 @@
 
 import numpy as np
 
+from xarray.tests import LooseVersion
+
 from .. import coding, conventions
 from ..core import indexing
 from ..core.pycompat import integer_types
@@ -286,6 +288,7 @@ def open_group(
         consolidated=False,
         consolidate_on_close=False,
         chunk_store=None,
+        storage_options=None,
         append_dim=None,
         write_region=None,
     ):
@@ -295,7 +298,15 @@ def open_group(
         if isinstance(store, pathlib.Path):
             store = os.fspath(store)
 
-        open_kwargs = dict(mode=mode, synchronizer=synchronizer, path=group)
+        open_kwargs = dict(
+            mode=mode,
+            synchronizer=synchronizer,
+            path=group,
+        )
+        if LooseVersion(zarr.__version__) >= "2.5.0":
+            open_kwargs["storage_options"] = storage_options
+        elif storage_options:
+            raise ValueError("Storage options only compatible with zarr>=2.5.0")
         if chunk_store:
             open_kwargs["chunk_store"] = chunk_store
 
@@ -530,6 +541,7 @@ def open_zarr(
     consolidated=False,
     overwrite_encoded_chunks=False,
     chunk_store=None,
+    storage_options=None,
     decode_timedelta=None,
     use_cftime=None,
     **kwargs,
@@ -642,6 +654,7 @@ def open_zarr(
         "consolidated": consolidated,
         "overwrite_encoded_chunks": overwrite_encoded_chunks,
         "chunk_store": chunk_store,
+        "storage_options": storage_options,
     }
 
     ds = open_dataset(
@@ -678,8 +691,8 @@ def open_backend_dataset_zarr(
     consolidated=False,
     consolidate_on_close=False,
     chunk_store=None,
+    storage_options=None,
 ):
-
     store = ZarrStore.open_group(
         filename_or_obj,
         group=group,
@@ -688,6 +701,7 @@ def open_backend_dataset_zarr(
         consolidated=consolidated,
         consolidate_on_close=consolidate_on_close,
         chunk_store=chunk_store,
+        storage_options=storage_options,
     )
 
     with close_on_error(store):

diff --git a/xarray/core/utils.py b/xarray/core/utils.py
@@ -645,7 +645,12 @@ def close_on_error(f):
 
 
 def is_remote_uri(path: str) -> bool:
-    return bool(re.search(r"^https?\://", path))
+    """Finds URLs of the form protocol:// or protocol::
+
+    This also matches for http[s]://, which were the only remote URLs
+    supported in <=v0.16.2.
+    """
+    return bool(re.search(r"^[a-z][a-z0-9]*(\://|\:\:)", path))
 
 
 def read_magic_number(filename_or_obj, count=8):

diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py
@@ -73,6 +73,7 @@ def LooseVersion(vstring):
 has_nc_time_axis, requires_nc_time_axis = _importorskip("nc_time_axis")
 has_rasterio, requires_rasterio = _importorskip("rasterio")
 has_zarr, requires_zarr = _importorskip("zarr")
+has_fsspec, requires_fsspec = _importorskip("fsspec")
 has_iris, requires_iris = _importorskip("iris")
 has_cfgrib, requires_cfgrib = _importorskip("cfgrib")
 has_numbagg, requires_numbagg = _importorskip("numbagg")

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -54,6 +54,7 @@
     requires_cfgrib,
     requires_cftime,
     requires_dask,
+    requires_fsspec,
     requires_h5netcdf,
     requires_netCDF4,
     requires_pseudonetcdf,
@@ -3041,7 +3042,12 @@ def test_open_mfdataset(self):
         with raises_regex(IOError, "no files to open"):
             open_mfdataset("foo-bar-baz-*.nc")
 
-        with raises_regex(ValueError, "wild-card"):
+    @requires_fsspec
+    def test_open_mfdataset_no_files(self):
+        pytest.importorskip("aiobotocore")
+
+        with raises_regex(OSError, "no files"):
+            # glob is attempted as of #4823, but finds no files
             open_mfdataset("http://some/remote/uri")
 
     def test_open_mfdataset_2d(self):
@@ -4799,6 +4805,48 @@ def test_extract_zarr_variable_encoding():
         )
 
 
+@requires_zarr
+@requires_fsspec
+def test_open_fsspec():
+    import fsspec  # type: ignore
+    import zarr
+
+    if not hasattr(zarr.storage, "FSStore") or not hasattr(
+        zarr.storage.FSStore, "getitems"
+    ):
+        pytest.skip("zarr too old")
+
+    ds = open_dataset(os.path.join(os.path.dirname(__file__), "data", "example_1.nc"))
+
+    m = fsspec.filesystem("memory")
+    mm = m.get_mapper("out1.zarr")
+    ds.to_zarr(mm)  # old interface
+    ds0 = ds.copy()
+    ds0["time"] = ds.time + pd.to_timedelta("1 day")
+    mm = m.get_mapper("out2.zarr")
+    ds0.to_zarr(mm)  # old interface
+
+    # single dataset
+    url = "memory://out2.zarr"
+    ds2 = open_dataset(url, engine="zarr")
+    assert ds0 == ds2
+
+    # single dataset with caching
+    url = "simplecache::memory://out2.zarr"
+    ds2 = open_dataset(url, engine="zarr")
+    assert ds0 == ds2
+
+    # multi dataset
+    url = "memory://out*.zarr"
+    ds2 = open_mfdataset(url, engine="zarr")
+    assert xr.concat([ds, ds0], dim="time") == ds2
+
+    # multi dataset with caching
+    url = "simplecache::memory://out*.zarr"
+    ds2 = open_mfdataset(url, engine="zarr")
+    assert xr.concat([ds, ds0], dim="time") == ds2
+
+
 @requires_h5netcdf
 def test_load_single_value_h5netcdf(tmp_path):
     """Test that numeric single-element vector attributes are handled fine.