From 4eb13ff3ce42067393655ac03c13c074227ab6da Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Mon, 18 Jan 2021 11:20:51 -0500 Subject: [PATCH 01/19] 'Add fsspec hooks --- xarray/backends/api.py | 16 ++++++++----- xarray/backends/zarr.py | 6 ++++- xarray/core/utils.py | 7 +++++- xarray/tests/__init__.py | 1 + xarray/tests/test_backends.py | 43 +++++++++++++++++++++++++++++++++++ 5 files changed, 65 insertions(+), 8 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 4958062a262..3131e7d3f9d 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -881,13 +881,17 @@ def open_mfdataset( """ if isinstance(paths, str): if is_remote_uri(paths): - raise ValueError( - "cannot do wild-card matching for paths that are remote URLs: " - "{!r}. Instead, supply paths as an explicit list of strings.".format( - paths - ) + from fsspec.core import get_fs_token_paths + # get_fs_token_paths also allows arguments embedded in URLs + fs, _, _ = get_fs_token_paths( + paths, mode='rb', + storage_options=kwargs.get("backend_kwargs", {}).get("storage_options"), + expand=False ) - paths = sorted(glob(_normalize_path(paths))) + paths = fs.glob(fs._strip_protocol(paths)) + paths = [fs.get_mapper(path) for path in paths] + else: + paths = sorted(glob(_normalize_path(paths))) else: paths = [str(p) if isinstance(p, Path) else p for p in paths] diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 3b4b3a3d9d5..37f2793f4c2 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -286,6 +286,7 @@ def open_group( consolidated=False, consolidate_on_close=False, chunk_store=None, + storage_options=None, append_dim=None, write_region=None, ): @@ -295,7 +296,8 @@ def open_group( if isinstance(store, pathlib.Path): store = os.fspath(store) - open_kwargs = dict(mode=mode, synchronizer=synchronizer, path=group) + open_kwargs = dict(mode=mode, synchronizer=synchronizer, path=group, + storage_options=storage_options) if chunk_store: open_kwargs["chunk_store"] = chunk_store @@ -530,6 +532,7 @@ def open_zarr( consolidated=False, overwrite_encoded_chunks=False, chunk_store=None, + storage_options=None, decode_timedelta=None, use_cftime=None, **kwargs, @@ -642,6 +645,7 @@ def open_zarr( "consolidated": consolidated, "overwrite_encoded_chunks": overwrite_encoded_chunks, "chunk_store": chunk_store, + "storage_options": storage_options } ds = open_dataset( diff --git a/xarray/core/utils.py b/xarray/core/utils.py index ced688f32dd..9648458ec6d 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -645,7 +645,12 @@ def close_on_error(f): def is_remote_uri(path: str) -> bool: - return bool(re.search(r"^https?\://", path)) + """Finds URLs of the form protocol:// or protocol:: + + This also matches for http[s]://, which were the only remote URLs + supported in <=v0.16.2. + """ + return bool(re.search(r"^[a-z][a-z0-9]*(\://|\:\:)", path)) def read_magic_number(filename_or_obj, count=8): diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 7c18f1a8c8a..6e883fd90f5 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -73,6 +73,7 @@ def LooseVersion(vstring): has_nc_time_axis, requires_nc_time_axis = _importorskip("nc_time_axis") has_rasterio, requires_rasterio = _importorskip("rasterio") has_zarr, requires_zarr = _importorskip("zarr") +has_fsspec, requires_fsspec = _importorskip("fsspec") has_iris, requires_iris = _importorskip("iris") has_cfgrib, requires_cfgrib = _importorskip("cfgrib") has_numbagg, requires_numbagg = _importorskip("numbagg") diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 3750c0715ae..be3a57361a8 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -54,6 +54,7 @@ requires_cfgrib, requires_cftime, requires_dask, + requires_fsspec, requires_h5netcdf, requires_netCDF4, requires_pseudonetcdf, @@ -4799,6 +4800,48 @@ def test_extract_zarr_variable_encoding(): ) +@requires_zarr +@requires_fsspec +def test_open_fsspec(): + import fsspec # type: ignore + import zarr + + if not hasattr(zarr.storage, "FSStore") or not hasattr( + zarr.storage.FSStore, "getitems" + ): + pytest.skip("zarr too old") + + ds = open_dataset(os.path.join(os.path.dirname(__file__), "data", "example_1.nc")) + + m = fsspec.filesystem("memory") + mm = m.get_mapper("out1.zarr") + ds.to_zarr(mm) # old interface + ds0 = ds.copy() + ds0["time"] = ds.time + pd.to_timedelta("1 day") + mm = m.get_mapper("out2.zarr") + ds0.to_zarr(mm) # old interface + + # single dataset + url = "memory://out2.zarr" + ds2 = open_dataset(url, engine="zarr") + assert ds0 == ds2 + + # single dataset with caching + url = "simplecache::memory://out2.zarr" + ds2 = open_dataset(url, engine="zarr") + assert ds0 == ds2 + + # multi dataset + url = "memory://out*.zarr" + ds2 = open_mfdataset(url, engine="zarr") + assert xr.concat([ds, ds0], dim="time") == ds2 + + # multi dataset with caching + url = "simplecache::memory://out*.zarr" + ds2 = open_mfdataset(url, engine="zarr") + assert xr.concat([ds, ds0], dim="time") == ds2 + + @requires_h5netcdf def test_load_single_value_h5netcdf(tmp_path): """Test that numeric single-element vector attributes are handled fine. From bb4174e99e4ca93ea6d59cb1a4dcb76f0bbf03b1 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Mon, 18 Jan 2021 11:21:26 -0500 Subject: [PATCH 02/19] run black --- xarray/backends/api.py | 6 ++++-- xarray/backends/zarr.py | 10 +++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 3131e7d3f9d..a2ee6f1a2e7 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -882,11 +882,13 @@ def open_mfdataset( if isinstance(paths, str): if is_remote_uri(paths): from fsspec.core import get_fs_token_paths + # get_fs_token_paths also allows arguments embedded in URLs fs, _, _ = get_fs_token_paths( - paths, mode='rb', + paths, + mode="rb", storage_options=kwargs.get("backend_kwargs", {}).get("storage_options"), - expand=False + expand=False, ) paths = fs.glob(fs._strip_protocol(paths)) paths = [fs.get_mapper(path) for path in paths] diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 37f2793f4c2..a7659eae003 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -296,8 +296,12 @@ def open_group( if isinstance(store, pathlib.Path): store = os.fspath(store) - open_kwargs = dict(mode=mode, synchronizer=synchronizer, path=group, - storage_options=storage_options) + open_kwargs = dict( + mode=mode, + synchronizer=synchronizer, + path=group, + storage_options=storage_options, + ) if chunk_store: open_kwargs["chunk_store"] = chunk_store @@ -645,7 +649,7 @@ def open_zarr( "consolidated": consolidated, "overwrite_encoded_chunks": overwrite_encoded_chunks, "chunk_store": chunk_store, - "storage_options": storage_options + "storage_options": storage_options, } ds = open_dataset( From 86e045fcafea652c5ab4a1cee11b5f3e500e659d Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Mon, 18 Jan 2021 11:53:16 -0500 Subject: [PATCH 03/19] fix outdated test --- xarray/tests/test_backends.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index be3a57361a8..d6c635463bc 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3042,7 +3042,12 @@ def test_open_mfdataset(self): with raises_regex(IOError, "no files to open"): open_mfdataset("foo-bar-baz-*.nc") - with raises_regex(ValueError, "wild-card"): + @requires_fsspec + def test_open_mfdataset_no_files(self): + pytest.importorskip("aiobotocore") + + with raises_regex(OSError, "no files"): + # glob is attempted as of #4823, but finds no files open_mfdataset("http://some/remote/uri") def test_open_mfdataset_2d(self): From 0582d57e2952147b576a8b291c4ab00c02353110 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Mon, 18 Jan 2021 12:11:26 -0500 Subject: [PATCH 04/19] nother slot --- xarray/backends/zarr.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index a7659eae003..afd7a182f5f 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -686,6 +686,7 @@ def open_backend_dataset_zarr( consolidated=False, consolidate_on_close=False, chunk_store=None, + storage_options=None, ): store = ZarrStore.open_group( @@ -696,6 +697,7 @@ def open_backend_dataset_zarr( consolidated=consolidated, consolidate_on_close=consolidate_on_close, chunk_store=chunk_store, + storage_options=storage_options, ) with close_on_error(store): From e220b3c519b1a3a92a28abc8e5aa75b997556fa1 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Mon, 18 Jan 2021 12:36:45 -0500 Subject: [PATCH 05/19] plumb deeper --- xarray/backends/zarr.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index afd7a182f5f..a28de3d91d8 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -3,6 +3,7 @@ import numpy as np +from xarray.tests import LooseVersion from .. import coding, conventions from ..core import indexing from ..core.pycompat import integer_types @@ -300,8 +301,11 @@ def open_group( mode=mode, synchronizer=synchronizer, path=group, - storage_options=storage_options, ) + if LooseVersion(zarr.__version__) >= "2.5.0": + open_kwargs["storage_options"] = storage_options + elif storage_options: + raise ValueError("Storage options only compatible with zarr>=2.5.0") if chunk_store: open_kwargs["chunk_store"] = chunk_store @@ -688,7 +692,6 @@ def open_backend_dataset_zarr( chunk_store=None, storage_options=None, ): - store = ZarrStore.open_group( filename_or_obj, group=group, From 906e920e18cac7ec7f65486c19ff3ccf57ae6a61 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Mon, 18 Jan 2021 12:44:00 -0500 Subject: [PATCH 06/19] spacing --- xarray/backends/zarr.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index a28de3d91d8..1f691aa6952 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -4,6 +4,7 @@ import numpy as np from xarray.tests import LooseVersion + from .. import coding, conventions from ..core import indexing from ..core.pycompat import integer_types From f6b46346375dddc79d08f0e2e5c209b13c5ea8b0 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 19 Jan 2021 08:54:23 -0500 Subject: [PATCH 07/19] Update xarray/backends/zarr.py Co-authored-by: keewis --- xarray/backends/zarr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 1f691aa6952..7e302ccc273 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -3,7 +3,7 @@ import numpy as np -from xarray.tests import LooseVersion +from distutils.version import LooseVersion from .. import coding, conventions from ..core import indexing From 74b33606549552a30ab53e03d6927c2cff9f0cb7 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 19 Jan 2021 09:01:40 -0500 Subject: [PATCH 08/19] import reorder --- xarray/backends/zarr.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 7e302ccc273..7a2402bf4c6 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -1,10 +1,9 @@ import os import pathlib +from distutils.version import LooseVersion import numpy as np -from distutils.version import LooseVersion - from .. import coding, conventions from ..core import indexing from ..core.pycompat import integer_types From ff737b4ceab2a82c8c465935d4cd34d2a5ca3058 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 27 Jan 2021 09:47:28 -0500 Subject: [PATCH 09/19] Separate zarr engine code --- xarray/backends/api.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 17f3a7cb124..b3c66d6bf72 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -871,16 +871,24 @@ def open_mfdataset( if isinstance(paths, str): if is_remote_uri(paths): from fsspec.core import get_fs_token_paths + from fsspec import open_files + so = kwargs.get("backend_kwargs", {}).get("storage_options", {}) + # zarr requires mappers, other backends may work with file-like objects # get_fs_token_paths also allows arguments embedded in URLs fs, _, _ = get_fs_token_paths( paths, mode="rb", - storage_options=kwargs.get("backend_kwargs", {}).get("storage_options"), + storage_options=so, expand=False, ) paths = fs.glob(fs._strip_protocol(paths)) - paths = [fs.get_mapper(path) for path in paths] + if engine == "zarr": + paths = [fs.get_mapper(path) for path in paths] + else: + if not paths.startswith("http"): + # pass through HTTP unchanged for backward compatibility + paths = open_files(paths, mode="rb", **so) else: paths = sorted(glob(_normalize_path(paths))) else: From 414d81c5d5a22225b1c9e34e5a48d8e77c275f08 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 27 Jan 2021 09:51:50 -0500 Subject: [PATCH 10/19] lint: import order --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index b3c66d6bf72..8acfa275092 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -870,8 +870,8 @@ def open_mfdataset( """ if isinstance(paths, str): if is_remote_uri(paths): - from fsspec.core import get_fs_token_paths from fsspec import open_files + from fsspec.core import get_fs_token_paths so = kwargs.get("backend_kwargs", {}).get("storage_options", {}) # zarr requires mappers, other backends may work with file-like objects From f029213005ce5619d7f0dbcd19be884800237413 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 27 Jan 2021 09:58:22 -0500 Subject: [PATCH 11/19] Don't glob twice for fileobjs --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 8acfa275092..f692c5fd6ab 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -882,8 +882,8 @@ def open_mfdataset( storage_options=so, expand=False, ) - paths = fs.glob(fs._strip_protocol(paths)) if engine == "zarr": + paths = fs.glob(fs._strip_protocol(paths)) paths = [fs.get_mapper(path) for path in paths] else: if not paths.startswith("http"): From cf9519aecff940f19cc2611f20f4924376ee84cb Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 27 Jan 2021 10:31:34 -0500 Subject: [PATCH 12/19] Add docs --- doc/io.rst | 28 ++++++++++++++++++++++++++-- doc/whats-new.rst | 5 +++++ xarray/backends/api.py | 4 +++- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/doc/io.rst b/doc/io.rst index 2e46879929b..d6e7b5f67cb 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -890,8 +890,29 @@ Cloud Storage Buckets It is possible to read and write xarray datasets directly from / to cloud storage buckets using zarr. This example uses the `gcsfs`_ package to provide -a ``MutableMapping`` interface to `Google Cloud Storage`_, which we can then -pass to xarray:: +an interface to `Google Cloud Storage`_. + +From v0.16.2: general `fsspec`_ URLs are parsed and the store set up for you +automatically when reading, such that you can open a dataset in a single +call. You should include any arguments to the storage backend as the +key ``storage_options``, part of ``backend_kwargs``. + +.. code:: python + + ds_gcs = xr.open_dataset( + "gcs:///path.zarr", + backend_kwargs={"storage_options": {"project": '', "token": None}}, + engine="zarr" + ) + + +This also works with ``open_mfdataset``, allowing you to pass a list of paths or +a URL to be interpreted as a glob string. + +For older versions, and for writing, you must explicitly set up a ``MutableMapping`` +instance and pass this, as follows: + +.. code:: python import gcsfs fs = gcsfs.GCSFileSystem(project='', token=None) @@ -901,6 +922,9 @@ pass to xarray:: # read it back ds_gcs = xr.open_zarr(gcsmap) +(or use the utility function ``fsspec.get_mapper()``). + +.. _fsspec: https://filesystem-spec.readthedocs.io/en/latest/ .. _Zarr: http://zarr.readthedocs.io/ .. _Amazon S3: https://aws.amazon.com/s3/ .. _Google Cloud Storage: https://cloud.google.com/storage/ diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e873a76cab0..46e9812113e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -48,6 +48,11 @@ New Features - Performance improvement when constructing DataArrays. Significantly speeds up repr for Datasets with large number of variables. By `Deepak Cherian `_ +- :py:func:`open_dataset` and :py:func:`open_mfdataset` now accept ``fsspec`` URLs + (including globs for the latter) for ``engine="zarr"``, and so allow reading from + many remote and other file systems (:pull:`4461`) + By `Martin Durant `_ + Bug fixes ~~~~~~~~~ - :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` do not trigger computations anymore if :py:meth:`Dataset.weighted` or :py:meth:`DataArray.weighted` are applied (:issue:`4625`, :pull:`4668`). By `Julius Busecke `_. diff --git a/xarray/backends/api.py b/xarray/backends/api.py index f692c5fd6ab..c8a5a069467 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -643,7 +643,9 @@ def open_dataarray( backend_kwargs: dict, optional A dictionary of keyword arguments to pass on to the backend. This may be useful when backend options would improve performance or - allow user control of dataset processing. + allow user control of dataset processing. If using fsspec URLs, + include the key "storage_options" to pass arguments to the + storage layer. use_cftime: bool, optional Only relevant if encoded dates come from a standard calendar (e.g. "gregorian", "proleptic_gregorian", "standard", or not From 05f8e0815cf8cc3d671d719d1a46702bde286813 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 27 Jan 2021 10:35:05 -0500 Subject: [PATCH 13/19] doc formatting --- doc/io.rst | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/doc/io.rst b/doc/io.rst index d6e7b5f67cb..b97f1f5a699 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -901,8 +901,10 @@ key ``storage_options``, part of ``backend_kwargs``. ds_gcs = xr.open_dataset( "gcs:///path.zarr", - backend_kwargs={"storage_options": {"project": '', "token": None}}, - engine="zarr" + backend_kwargs={ + "storage_options": {"project": "", "token": None} + }, + engine="zarr", ) @@ -915,8 +917,9 @@ instance and pass this, as follows: .. code:: python import gcsfs - fs = gcsfs.GCSFileSystem(project='', token=None) - gcsmap = gcsfs.mapping.GCSMap('', gcs=fs, check=True, create=False) + + fs = gcsfs.GCSFileSystem(project="", token=None) + gcsmap = gcsfs.mapping.GCSMap("", gcs=fs, check=True, create=False) # write to the bucket ds.to_zarr(store=gcsmap) # read it back From c40ce4e036a3674c4dbb196de67d3126f7156934 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 28 Jan 2021 12:12:41 -0500 Subject: [PATCH 14/19] Only implement for zarr --- xarray/backends/api.py | 26 ++++++++++++-------------- xarray/tests/test_backends.py | 4 +++- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index c8a5a069467..129fd6cea5e 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -871,26 +871,24 @@ def open_mfdataset( .. [2] http://xarray.pydata.org/en/stable/dask.html#chunking-and-performance """ if isinstance(paths, str): - if is_remote_uri(paths): - from fsspec import open_files - from fsspec.core import get_fs_token_paths + if is_remote_uri(paths) and engine == "zarr": + try: + from fsspec.core import get_fs_token_paths + except ImportError as e: + raise ImportError( + "The use of remote URLs for opening zarr requires the package fsspec" + ) from e - so = kwargs.get("backend_kwargs", {}).get("storage_options", {}) - # zarr requires mappers, other backends may work with file-like objects - # get_fs_token_paths also allows arguments embedded in URLs fs, _, _ = get_fs_token_paths( paths, mode="rb", - storage_options=so, + storage_options=kwargs.get("backend_kwargs", {}).get( + "storage_options", {} + ), expand=False, ) - if engine == "zarr": - paths = fs.glob(fs._strip_protocol(paths)) - paths = [fs.get_mapper(path) for path in paths] - else: - if not paths.startswith("http"): - # pass through HTTP unchanged for backward compatibility - paths = open_files(paths, mode="rb", **so) + paths = fs.glob(fs._strip_protocol(paths)) # finds directories + paths = [fs.get_mapper(path) for path in paths] else: paths = sorted(glob(_normalize_path(paths))) else: diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index d6c635463bc..89897c84f6f 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3046,9 +3046,11 @@ def test_open_mfdataset(self): def test_open_mfdataset_no_files(self): pytest.importorskip("aiobotocore") + # glob is attempted as of #4823, but finds no files with raises_regex(OSError, "no files"): - # glob is attempted as of #4823, but finds no files open_mfdataset("http://some/remote/uri") + with raises_regex(OSError, "no files"): + open_mfdataset("http://some/remote/uri", engine="zarr") def test_open_mfdataset_2d(self): original = Dataset({"foo": (["x", "y"], np.random.randn(10, 8))}) From e6eb41b239b3c1401db1e604bd2763b5d3887e2c Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 28 Jan 2021 12:25:18 -0500 Subject: [PATCH 15/19] Reinstate original exception --- xarray/backends/api.py | 7 +++++++ xarray/backends/zarr.py | 2 +- xarray/tests/test_backends.py | 4 ++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 129fd6cea5e..0791d1cdaf1 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -889,6 +889,13 @@ def open_mfdataset( ) paths = fs.glob(fs._strip_protocol(paths)) # finds directories paths = [fs.get_mapper(path) for path in paths] + elif is_remote_uri(paths): + raise ValueError( + "cannot do wild-card matching for paths that are remote URLs: " + "{!r}. Instead, supply paths as an explicit list of strings.".format( + paths + ) + ) else: paths = sorted(glob(_normalize_path(paths))) else: diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index c71c2aeaf37..1cfd0cb4502 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -709,7 +709,7 @@ def open_dataset( consolidated=consolidated, consolidate_on_close=consolidate_on_close, chunk_store=chunk_store, - storage_options=storage_options + storage_options=storage_options, ) store_entrypoint = StoreBackendEntrypoint() diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 89897c84f6f..0c36eb18b01 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3041,14 +3041,14 @@ def test_open_mfdataset(self): with raises_regex(IOError, "no files to open"): open_mfdataset("foo-bar-baz-*.nc") + with raises_regex(ValueError, "wild-card"): + open_mfdataset("http://some/remote/uri") @requires_fsspec def test_open_mfdataset_no_files(self): pytest.importorskip("aiobotocore") # glob is attempted as of #4823, but finds no files - with raises_regex(OSError, "no files"): - open_mfdataset("http://some/remote/uri") with raises_regex(OSError, "no files"): open_mfdataset("http://some/remote/uri", engine="zarr") From 5e3b6a41963e15b9c77373447c43246d00f35966 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Sun, 14 Feb 2021 13:41:46 -0500 Subject: [PATCH 16/19] apply suggestions --- ci/requirements/py37-min-all-deps.yml | 1 + ci/requirements/py38-all-but-dask.yml | 1 + setup.cfg | 2 ++ xarray/tests/test_backends.py | 2 +- 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ci/requirements/py37-min-all-deps.yml b/ci/requirements/py37-min-all-deps.yml index feef86ddf5c..a494a49417d 100644 --- a/ci/requirements/py37-min-all-deps.yml +++ b/ci/requirements/py37-min-all-deps.yml @@ -7,6 +7,7 @@ dependencies: # Run ci/min_deps_check.py to verify that this file respects the policy. # When upgrading python, numpy, or pandas, must also change # doc/installing.rst and setup.py. + - aiobotocore=1.1.2 - python=3.7 - black - boto3=1.9 diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml index 14930f5272d..c8f600aaf9a 100644 --- a/ci/requirements/py38-all-but-dask.yml +++ b/ci/requirements/py38-all-but-dask.yml @@ -5,6 +5,7 @@ channels: dependencies: - python=3.8 - black + - aiobotocore - boto3 - bottleneck - cartopy diff --git a/setup.cfg b/setup.cfg index a695191bf02..ccc5808dae6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -183,6 +183,8 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-distributed.*] ignore_missing_imports = True +[mypy-fsspec.*] +ignore_missing_imports = True [mypy-h5netcdf.*] ignore_missing_imports = True [mypy-h5py.*] diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 0c36eb18b01..75e0edb4fb2 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -4810,7 +4810,7 @@ def test_extract_zarr_variable_encoding(): @requires_zarr @requires_fsspec def test_open_fsspec(): - import fsspec # type: ignore + import fsspec import zarr if not hasattr(zarr.storage, "FSStore") or not hasattr( From c82ba9eab2c6cd83d002c64db9de54af8bc78f2b Mon Sep 17 00:00:00 2001 From: Ray Bell Date: Tue, 16 Feb 2021 05:55:21 -0500 Subject: [PATCH 17/19] add aiobotocore --- ci/requirements/environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index 0f59d9570c8..1bbe349ab21 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -3,6 +3,7 @@ channels: - conda-forge - nodefaults dependencies: + - aiobotocore - boto3 - bottleneck - cartopy From 3cdf30fe1284c4f99097cfe5726776e0c25368fe Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 16 Feb 2021 10:02:08 -0500 Subject: [PATCH 18/19] try min --- ci/requirements/py37-min-all-deps.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements/py37-min-all-deps.yml b/ci/requirements/py37-min-all-deps.yml index a324df09ff8..7847e8ebad7 100644 --- a/ci/requirements/py37-min-all-deps.yml +++ b/ci/requirements/py37-min-all-deps.yml @@ -7,7 +7,7 @@ dependencies: # Run ci/min_deps_check.py to verify that this file respects the policy. # When upgrading python, numpy, or pandas, must also change # doc/installing.rst and setup.py. - - aiobotocore=1.1.2 + - aiobotocore=0.12 - python=3.7 - boto3=1.9 - bottleneck=1.2 From 40f96033c01a50ffc45036db5a6c8dd771ae4e4f Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 16 Feb 2021 10:12:28 -0500 Subject: [PATCH 19/19] rmove aoi from min because it pins othr deps too tightly, makes solver panic --- ci/requirements/py37-min-all-deps.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/requirements/py37-min-all-deps.yml b/ci/requirements/py37-min-all-deps.yml index 7847e8ebad7..166836243b4 100644 --- a/ci/requirements/py37-min-all-deps.yml +++ b/ci/requirements/py37-min-all-deps.yml @@ -7,7 +7,6 @@ dependencies: # Run ci/min_deps_check.py to verify that this file respects the policy. # When upgrading python, numpy, or pandas, must also change # doc/installing.rst and setup.py. - - aiobotocore=0.12 - python=3.7 - boto3=1.9 - bottleneck=1.2