From f8a00147124a2468ad7e94d66c719cdae5f198a4 Mon Sep 17 00:00:00 2001 From: garciam Date: Fri, 17 Feb 2023 16:55:43 +0100 Subject: [PATCH 01/25] Support for the new compression arguments. Use a dict for the arguments and update it with the encoding, so all variables are passed. --- xarray/backends/netCDF4_.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 0c6e083158d..7a1b347bc6d 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -485,20 +485,24 @@ def prepare_variable( if name in self.ds.variables: nc4_var = self.ds.variables[name] else: - nc4_var = self.ds.createVariable( + default_args = dict( varname=name, datatype=datatype, dimensions=variable.dims, - zlib=encoding.get("zlib", False), - complevel=encoding.get("complevel", 4), - shuffle=encoding.get("shuffle", True), - fletcher32=encoding.get("fletcher32", False), - contiguous=encoding.get("contiguous", False), - chunksizes=encoding.get("chunksizes"), + zlib=False, + complevel=4, + shuffle=True, + fletcher32=False, + contiguous=False, + chunksizes=None, endian="native", - least_significant_digit=encoding.get("least_significant_digit"), + least_significant_digit=None, fill_value=fill_value, ) + default_args.update(encoding) + if "_FillValue" in default_args: + default_args.pop("_FillValue") + nc4_var = self.ds.createVariable(**default_args) nc4_var.setncatts(attrs) From 7abf1037c419024dc7f91e7822e5764f5d077645 Mon Sep 17 00:00:00 2001 From: markelg Date: Wed, 22 Mar 2023 17:33:13 +0100 Subject: [PATCH 02/25] significant_digit and other missing keys added Should close #7634 --- xarray/backends/netCDF4_.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 7a1b347bc6d..0bde92754ab 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -239,6 +239,12 @@ def _extract_nc4_variable_encoding( "_FillValue", "dtype", "compression", + "significant_digits", + "quantize_mode", + "blosc_shuffle", + "szip_coding", + "szip_pixels_per_block", + "endian" } if lsd_okay: valid_encodings.add("least_significant_digit") From fa408c5e021f4a79363943eefb80efaaeb3f119b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 22 Mar 2023 16:33:54 +0000 Subject: [PATCH 03/25] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/backends/netCDF4_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 0bde92754ab..403d08fd639 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -244,7 +244,7 @@ def _extract_nc4_variable_encoding( "blosc_shuffle", "szip_coding", "szip_pixels_per_block", - "endian" + "endian", } if lsd_okay: valid_encodings.add("least_significant_digit") From e489a32bbd2dfad31c4ad6d6e7169051b135ebaa Mon Sep 17 00:00:00 2001 From: markelg Date: Wed, 21 Jun 2023 13:47:03 +0200 Subject: [PATCH 04/25] test for the new compression argument --- xarray/tests/__init__.py | 16 ++++++++--- xarray/tests/test_backends.py | 50 ++++++++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 7e1b964ecba..18d4ddc8fd8 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -2,8 +2,10 @@ import importlib import platform +import string import warnings from contextlib import contextmanager, nullcontext +from typing import Tuple from unittest import mock # noqa: F401 import numpy as np @@ -239,18 +241,24 @@ def assert_allclose(a, b, check_default_indexes=True, **kwargs): xarray.testing._assert_internal_invariants(b, check_default_indexes) -def create_test_data(seed: int | None = None, add_attrs: bool = True) -> Dataset: +def create_test_data( + seed: int | None = None, + add_attrs: bool = True, + dim_sizes: Tuple[int, int, int] = (8, 9, 10) +) -> Dataset: rs = np.random.RandomState(seed) _vars = { "var1": ["dim1", "dim2"], "var2": ["dim1", "dim2"], "var3": ["dim3", "dim1"], } - _dims = {"dim1": 8, "dim2": 9, "dim3": 10} + _dims = {"dim1": dim_sizes[0], "dim2": dim_sizes[1], "dim3": dim_sizes[2]} obj = Dataset() obj["dim2"] = ("dim2", 0.5 * np.arange(_dims["dim2"])) - obj["dim3"] = ("dim3", list("abcdefghij")) + if _dims["dim3"] > 26: + raise RuntimeError(f'Not enough letters for filling this dimension size ({_dims["dim3"]})') + obj["dim3"] = ("dim3", list(string.ascii_lowercase[0:_dims["dim3"]])) obj["time"] = ("time", pd.date_range("2000-01-01", periods=20)) for v, dims in sorted(_vars.items()): data = rs.normal(size=tuple(_dims[d] for d in dims)) @@ -259,7 +267,7 @@ def create_test_data(seed: int | None = None, add_attrs: bool = True) -> Dataset obj[v].attrs = {"foo": "variable"} obj.coords["numbers"] = ( "dim3", - np.array([0, 1, 2, 0, 0, 1, 1, 2, 2, 3], dtype="int64"), + np.random.randint(0, 3, _dims["dim3"], dtype="int64"), ) obj.encoding = {"foo": "bar"} assert all(obj.data.flags.writeable for obj in obj.variables.values()) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index e0a2262a339..132fde11633 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1481,7 +1481,7 @@ def test_dump_and_open_encodings(self) -> None: assert ds.variables["time"].getncattr("units") == units assert_array_equal(ds.variables["time"], np.arange(10) + 4) - def test_compression_encoding(self) -> None: + def test_compression_encoding_legacy(self) -> None: data = create_test_data() data["var2"].encoding.update( { @@ -1501,6 +1501,54 @@ def test_compression_encoding(self) -> None: with self.roundtrip(expected) as actual: assert_equal(expected, actual) + def test_compression_encoding(self) -> None: + data = create_test_data(dim_sizes=(20, 40, 10)) + compression_vals = (None, "zlib", "szip", "zstd", "blosc_lz", "blosc_lz4", "blosc_lz4hc", "blosc_zlib", "blosc_zstd") + for compression in compression_vals: + encoding_params = dict(compression=compression, blosc_shuffle=1) + data["var2"].encoding.update(encoding_params) + data["var2"].encoding.update( + { + "chunksizes": (20, 20), + "original_shape": data.var2.shape, + "blosc_shuffle": 1, + "fletcher32": False, + "zlib": False + } + ) + with self.roundtrip(data) as actual: + expected_encoding = data["var2"].encoding.copy() + # compression does not appear in the retrieved encoding, that differs + # from the input encoding. shuffle also chantges. Here we modify the + # expected encoding to account for this + compression = expected_encoding.pop("compression") + blosc_shuffle = expected_encoding.pop("blosc_shuffle") + if compression is not None: + if "blosc" in compression and blosc_shuffle: + expected_encoding["blosc"] = {"compressor": compression, "shuffle": blosc_shuffle} + expected_encoding["shuffle"] = False + elif compression == "szip": + expected_encoding["szip"] = {'coding': 'nn', + 'pixels_per_block': 8} + expected_encoding["shuffle"] = False + else: + # This will set a key like zlib=true which is what appears in + # the encoding when we read it. + expected_encoding[compression] = True + if compression == "zstd": + expected_encoding["shuffle"] = False + else: + expected_encoding["shuffle"] = False + + actual_encoding = actual["var2"].encoding + for k, v in expected_encoding.items(): + assert v == actual_encoding[k] + if encoding_params["compression"] is not None and "blosc" not in encoding_params["compression"]: + # regression test for #156 + expected = data.isel(dim1=0) + with self.roundtrip(expected) as actual: + assert_equal(expected, actual) + def test_encoding_kwarg_compression(self) -> None: ds = Dataset({"x": np.arange(10.0)}) encoding = dict( From fff18a81c230b3a7794a49d0adc2e0d0779b633d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 21 Jun 2023 11:47:56 +0000 Subject: [PATCH 05/25] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/__init__.py | 9 +++++---- xarray/tests/test_backends.py | 30 ++++++++++++++++++++++++------ 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 18d4ddc8fd8..5bf913f0631 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -5,7 +5,6 @@ import string import warnings from contextlib import contextmanager, nullcontext -from typing import Tuple from unittest import mock # noqa: F401 import numpy as np @@ -244,7 +243,7 @@ def assert_allclose(a, b, check_default_indexes=True, **kwargs): def create_test_data( seed: int | None = None, add_attrs: bool = True, - dim_sizes: Tuple[int, int, int] = (8, 9, 10) + dim_sizes: tuple[int, int, int] = (8, 9, 10), ) -> Dataset: rs = np.random.RandomState(seed) _vars = { @@ -257,8 +256,10 @@ def create_test_data( obj = Dataset() obj["dim2"] = ("dim2", 0.5 * np.arange(_dims["dim2"])) if _dims["dim3"] > 26: - raise RuntimeError(f'Not enough letters for filling this dimension size ({_dims["dim3"]})') - obj["dim3"] = ("dim3", list(string.ascii_lowercase[0:_dims["dim3"]])) + raise RuntimeError( + f'Not enough letters for filling this dimension size ({_dims["dim3"]})' + ) + obj["dim3"] = ("dim3", list(string.ascii_lowercase[0 : _dims["dim3"]])) obj["time"] = ("time", pd.date_range("2000-01-01", periods=20)) for v, dims in sorted(_vars.items()): data = rs.normal(size=tuple(_dims[d] for d in dims)) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 132fde11633..1c036789287 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1503,7 +1503,17 @@ def test_compression_encoding_legacy(self) -> None: def test_compression_encoding(self) -> None: data = create_test_data(dim_sizes=(20, 40, 10)) - compression_vals = (None, "zlib", "szip", "zstd", "blosc_lz", "blosc_lz4", "blosc_lz4hc", "blosc_zlib", "blosc_zstd") + compression_vals = ( + None, + "zlib", + "szip", + "zstd", + "blosc_lz", + "blosc_lz4", + "blosc_lz4hc", + "blosc_zlib", + "blosc_zstd", + ) for compression in compression_vals: encoding_params = dict(compression=compression, blosc_shuffle=1) data["var2"].encoding.update(encoding_params) @@ -1513,7 +1523,7 @@ def test_compression_encoding(self) -> None: "original_shape": data.var2.shape, "blosc_shuffle": 1, "fletcher32": False, - "zlib": False + "zlib": False, } ) with self.roundtrip(data) as actual: @@ -1525,11 +1535,16 @@ def test_compression_encoding(self) -> None: blosc_shuffle = expected_encoding.pop("blosc_shuffle") if compression is not None: if "blosc" in compression and blosc_shuffle: - expected_encoding["blosc"] = {"compressor": compression, "shuffle": blosc_shuffle} + expected_encoding["blosc"] = { + "compressor": compression, + "shuffle": blosc_shuffle, + } expected_encoding["shuffle"] = False elif compression == "szip": - expected_encoding["szip"] = {'coding': 'nn', - 'pixels_per_block': 8} + expected_encoding["szip"] = { + "coding": "nn", + "pixels_per_block": 8, + } expected_encoding["shuffle"] = False else: # This will set a key like zlib=true which is what appears in @@ -1543,7 +1558,10 @@ def test_compression_encoding(self) -> None: actual_encoding = actual["var2"].encoding for k, v in expected_encoding.items(): assert v == actual_encoding[k] - if encoding_params["compression"] is not None and "blosc" not in encoding_params["compression"]: + if ( + encoding_params["compression"] is not None + and "blosc" not in encoding_params["compression"] + ): # regression test for #156 expected = data.isel(dim1=0) with self.roundtrip(expected) as actual: From a89f816bdf6c347eb269a3619738ee5bf8305852 Mon Sep 17 00:00:00 2001 From: markelg Date: Wed, 21 Jun 2023 14:26:51 +0200 Subject: [PATCH 06/25] move the new test to TestNetCDF4Data --- xarray/tests/test_backends.py | 95 +++++++++++++++++------------------ 1 file changed, 47 insertions(+), 48 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 132fde11633..752d1fbb69b 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1501,54 +1501,6 @@ def test_compression_encoding_legacy(self) -> None: with self.roundtrip(expected) as actual: assert_equal(expected, actual) - def test_compression_encoding(self) -> None: - data = create_test_data(dim_sizes=(20, 40, 10)) - compression_vals = (None, "zlib", "szip", "zstd", "blosc_lz", "blosc_lz4", "blosc_lz4hc", "blosc_zlib", "blosc_zstd") - for compression in compression_vals: - encoding_params = dict(compression=compression, blosc_shuffle=1) - data["var2"].encoding.update(encoding_params) - data["var2"].encoding.update( - { - "chunksizes": (20, 20), - "original_shape": data.var2.shape, - "blosc_shuffle": 1, - "fletcher32": False, - "zlib": False - } - ) - with self.roundtrip(data) as actual: - expected_encoding = data["var2"].encoding.copy() - # compression does not appear in the retrieved encoding, that differs - # from the input encoding. shuffle also chantges. Here we modify the - # expected encoding to account for this - compression = expected_encoding.pop("compression") - blosc_shuffle = expected_encoding.pop("blosc_shuffle") - if compression is not None: - if "blosc" in compression and blosc_shuffle: - expected_encoding["blosc"] = {"compressor": compression, "shuffle": blosc_shuffle} - expected_encoding["shuffle"] = False - elif compression == "szip": - expected_encoding["szip"] = {'coding': 'nn', - 'pixels_per_block': 8} - expected_encoding["shuffle"] = False - else: - # This will set a key like zlib=true which is what appears in - # the encoding when we read it. - expected_encoding[compression] = True - if compression == "zstd": - expected_encoding["shuffle"] = False - else: - expected_encoding["shuffle"] = False - - actual_encoding = actual["var2"].encoding - for k, v in expected_encoding.items(): - assert v == actual_encoding[k] - if encoding_params["compression"] is not None and "blosc" not in encoding_params["compression"]: - # regression test for #156 - expected = data.isel(dim1=0) - with self.roundtrip(expected) as actual: - assert_equal(expected, actual) - def test_encoding_kwarg_compression(self) -> None: ds = Dataset({"x": np.arange(10.0)}) encoding = dict( @@ -1719,6 +1671,53 @@ def test_setncattr_string(self) -> None: assert_array_equal(one_element_list_of_strings, totest.attrs["bar"]) assert one_string == totest.attrs["baz"] + def test_compression_encoding(self) -> None: + data = create_test_data(dim_sizes=(20, 40, 10)) + compression_vals = (None, "zlib", "szip", "zstd", "blosc_lz", "blosc_lz4", "blosc_lz4hc", "blosc_zlib", "blosc_zstd") + for compression in compression_vals: + encoding_params = dict(compression=compression, blosc_shuffle=1) + data["var2"].encoding.update(encoding_params) + data["var2"].encoding.update( + { + "chunksizes": (20, 20), + "original_shape": data.var2.shape, + "blosc_shuffle": 1, + "fletcher32": False, + } + ) + with self.roundtrip(data) as actual: + expected_encoding = data["var2"].encoding.copy() + # compression does not appear in the retrieved encoding, that differs + # from the input encoding. shuffle also chantges. Here we modify the + # expected encoding to account for this + compression = expected_encoding.pop("compression") + blosc_shuffle = expected_encoding.pop("blosc_shuffle") + if compression is not None: + if "blosc" in compression and blosc_shuffle: + expected_encoding["blosc"] = {"compressor": compression, "shuffle": blosc_shuffle} + expected_encoding["shuffle"] = False + elif compression == "szip": + expected_encoding["szip"] = {'coding': 'nn', + 'pixels_per_block': 8} + expected_encoding["shuffle"] = False + else: + # This will set a key like zlib=true which is what appears in + # the encoding when we read it. + expected_encoding[compression] = True + if compression == "zstd": + expected_encoding["shuffle"] = False + else: + expected_encoding["shuffle"] = False + + actual_encoding = actual["var2"].encoding + for k, v in expected_encoding.items(): + assert v == actual_encoding[k] + if encoding_params["compression"] is not None and "blosc" not in encoding_params["compression"]: + # regression test for #156 + expected = data.isel(dim1=0) + with self.roundtrip(expected) as actual: + assert_equal(expected, actual) + @pytest.mark.skip(reason="https://github.com/Unidata/netcdf4-python/issues/1195") def test_refresh_from_disk(self) -> None: super().test_refresh_from_disk() From 371d2d732aff61dd41073003f11321897444d382 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 21 Jun 2023 12:31:37 +0000 Subject: [PATCH 07/25] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_backends.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 752d1fbb69b..0eb00bed70a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1673,7 +1673,17 @@ def test_setncattr_string(self) -> None: def test_compression_encoding(self) -> None: data = create_test_data(dim_sizes=(20, 40, 10)) - compression_vals = (None, "zlib", "szip", "zstd", "blosc_lz", "blosc_lz4", "blosc_lz4hc", "blosc_zlib", "blosc_zstd") + compression_vals = ( + None, + "zlib", + "szip", + "zstd", + "blosc_lz", + "blosc_lz4", + "blosc_lz4hc", + "blosc_zlib", + "blosc_zstd", + ) for compression in compression_vals: encoding_params = dict(compression=compression, blosc_shuffle=1) data["var2"].encoding.update(encoding_params) @@ -1694,11 +1704,16 @@ def test_compression_encoding(self) -> None: blosc_shuffle = expected_encoding.pop("blosc_shuffle") if compression is not None: if "blosc" in compression and blosc_shuffle: - expected_encoding["blosc"] = {"compressor": compression, "shuffle": blosc_shuffle} + expected_encoding["blosc"] = { + "compressor": compression, + "shuffle": blosc_shuffle, + } expected_encoding["shuffle"] = False elif compression == "szip": - expected_encoding["szip"] = {'coding': 'nn', - 'pixels_per_block': 8} + expected_encoding["szip"] = { + "coding": "nn", + "pixels_per_block": 8, + } expected_encoding["shuffle"] = False else: # This will set a key like zlib=true which is what appears in @@ -1712,7 +1727,10 @@ def test_compression_encoding(self) -> None: actual_encoding = actual["var2"].encoding for k, v in expected_encoding.items(): assert v == actual_encoding[k] - if encoding_params["compression"] is not None and "blosc" not in encoding_params["compression"]: + if ( + encoding_params["compression"] is not None + and "blosc" not in encoding_params["compression"] + ): # regression test for #156 expected = data.isel(dim1=0) with self.roundtrip(expected) as actual: From f77d1269f89da109e0844f84017b48de5953f1d1 Mon Sep 17 00:00:00 2001 From: Markel Date: Mon, 30 Oct 2023 14:48:44 +0100 Subject: [PATCH 08/25] simplify this line (code review) --- xarray/backends/netCDF4_.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 9491a518685..495dae58959 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -528,8 +528,7 @@ def prepare_variable( fill_value=fill_value, ) default_args.update(encoding) - if "_FillValue" in default_args: - default_args.pop("_FillValue") + default_args.pop("_FillValue", None) nc4_var = self.ds.createVariable(**default_args) nc4_var.setncatts(attrs) From 2df3387b097d2591101fa2c3803876eecf236aac Mon Sep 17 00:00:00 2001 From: markelg Date: Mon, 30 Oct 2023 17:34:26 +0100 Subject: [PATCH 09/25] Added entry to whats-new Also removed an unnecesary call to monkeypatch fixture. --- doc/whats-new.rst | 4 ++++ xarray/tests/test_backends.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b24a19c9129..2aa4a23f895 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,10 @@ New Features - Use `opt_einsum `_ for :py:func:`xarray.dot` by default if installed. By `Deepak Cherian `_. (:issue:`7764`, :pull:`8373`). +- Accept the compression arguments new in netCDF 1.6.0 in the netCDF4 backend. + See `netCDF4 documentation `_ for details. + By `Markel García-Díez `_. (:issue:`6929`, :pull:`7551`) Note that some + new compression filters needs plugins to be installed which may not be available in all netCDF distributions. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index e6e834183f1..e91cedcc6af 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -4741,7 +4741,7 @@ def test_extract_nc4_variable_encoding(self) -> None: assert {} == encoding @requires_netCDF4 - def test_extract_nc4_variable_encoding_netcdf4(self, monkeypatch): + def test_extract_nc4_variable_encoding_netcdf4(self): # New netCDF4 1.6.0 compression argument. var = xr.Variable(("x",), [1, 2, 3], {}, {"compression": "szlib"}) _extract_nc4_variable_encoding(var, backend="netCDF4", raise_on_invalid=True) From c2ce8d5c92e8f5823ad6bbafdd79b5c7f1148d5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Mon, 18 Dec 2023 14:45:12 +0100 Subject: [PATCH 10/25] bump netcdf4 to 1.6.2 in min-all-deps.yml --- ci/requirements/min-all-deps.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml index 22076f88854..b4f246aeb72 100644 --- a/ci/requirements/min-all-deps.yml +++ b/ci/requirements/min-all-deps.yml @@ -29,7 +29,7 @@ dependencies: - nc-time-axis=1.4 # netcdf follows a 1.major.minor[.patch] convention # (see https://github.com/Unidata/netcdf4-python/issues/1090) - - netcdf4=1.6.0 + - netcdf4=1.6.2 - numba=0.55 - numpy=1.22 - packaging=21.3 From e5d06092b08cb95f63598331b9251191835521bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Mon, 18 Dec 2023 14:45:52 +0100 Subject: [PATCH 11/25] parametrize compression in test --- xarray/tests/test_backends.py | 108 +++++++++++++++++----------------- 1 file changed, 55 insertions(+), 53 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 68de35ae131..57abd6b6f4d 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1767,9 +1767,9 @@ def test_setncattr_string(self) -> None: assert_array_equal(one_element_list_of_strings, totest.attrs["bar"]) assert one_string == totest.attrs["baz"] - def test_compression_encoding(self) -> None: - data = create_test_data(dim_sizes=(20, 40, 10)) - compression_vals = ( + @pytest.mark.parametrize( + "compression", + [ None, "zlib", "szip", @@ -1779,58 +1779,60 @@ def test_compression_encoding(self) -> None: "blosc_lz4hc", "blosc_zlib", "blosc_zstd", + ], + ) + def test_compression_encoding(self, compression) -> None: + data = create_test_data(dim_sizes=(20, 40, 10)) + encoding_params = dict(compression=compression, blosc_shuffle=1) + data["var2"].encoding.update(encoding_params) + data["var2"].encoding.update( + { + "chunksizes": (20, 20), + "original_shape": data.var2.shape, + "blosc_shuffle": 1, + "fletcher32": False, + } ) - for compression in compression_vals: - encoding_params = dict(compression=compression, blosc_shuffle=1) - data["var2"].encoding.update(encoding_params) - data["var2"].encoding.update( - { - "chunksizes": (20, 20), - "original_shape": data.var2.shape, - "blosc_shuffle": 1, - "fletcher32": False, - } - ) - with self.roundtrip(data) as actual: - expected_encoding = data["var2"].encoding.copy() - # compression does not appear in the retrieved encoding, that differs - # from the input encoding. shuffle also chantges. Here we modify the - # expected encoding to account for this - compression = expected_encoding.pop("compression") - blosc_shuffle = expected_encoding.pop("blosc_shuffle") - if compression is not None: - if "blosc" in compression and blosc_shuffle: - expected_encoding["blosc"] = { - "compressor": compression, - "shuffle": blosc_shuffle, - } - expected_encoding["shuffle"] = False - elif compression == "szip": - expected_encoding["szip"] = { - "coding": "nn", - "pixels_per_block": 8, - } - expected_encoding["shuffle"] = False - else: - # This will set a key like zlib=true which is what appears in - # the encoding when we read it. - expected_encoding[compression] = True - if compression == "zstd": - expected_encoding["shuffle"] = False - else: + with self.roundtrip(data) as actual: + expected_encoding = data["var2"].encoding.copy() + # compression does not appear in the retrieved encoding, that differs + # from the input encoding. shuffle also chantges. Here we modify the + # expected encoding to account for this + compression = expected_encoding.pop("compression") + blosc_shuffle = expected_encoding.pop("blosc_shuffle") + if compression is not None: + if "blosc" in compression and blosc_shuffle: + expected_encoding["blosc"] = { + "compressor": compression, + "shuffle": blosc_shuffle, + } expected_encoding["shuffle"] = False - - actual_encoding = actual["var2"].encoding - for k, v in expected_encoding.items(): - assert v == actual_encoding[k] - if ( - encoding_params["compression"] is not None - and "blosc" not in encoding_params["compression"] - ): - # regression test for #156 - expected = data.isel(dim1=0) - with self.roundtrip(expected) as actual: - assert_equal(expected, actual) + elif compression == "szip": + expected_encoding["szip"] = { + "coding": "nn", + "pixels_per_block": 8, + } + expected_encoding["shuffle"] = False + else: + # This will set a key like zlib=true which is what appears in + # the encoding when we read it. + expected_encoding[compression] = True + if compression == "zstd": + expected_encoding["shuffle"] = False + else: + expected_encoding["shuffle"] = False + + actual_encoding = actual["var2"].encoding + for k, v in expected_encoding.items(): + assert v == actual_encoding[k] + if ( + encoding_params["compression"] is not None + and "blosc" not in encoding_params["compression"] + ): + # regression test for #156 + expected = data.isel(dim1=0) + with self.roundtrip(expected) as actual: + assert_equal(expected, actual) @pytest.mark.skip(reason="https://github.com/Unidata/netcdf4-python/issues/1195") def test_refresh_from_disk(self) -> None: From d77e3fd6c468bf994eec55bc955363746743feae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Mon, 18 Dec 2023 15:03:29 +0100 Subject: [PATCH 12/25] Revert "bump netcdf4 to 1.6.2 in min-all-deps.yml" This reverts commit c2ce8d5c92e8f5823ad6bbafdd79b5c7f1148d5e. --- ci/requirements/min-all-deps.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml index b4f246aeb72..22076f88854 100644 --- a/ci/requirements/min-all-deps.yml +++ b/ci/requirements/min-all-deps.yml @@ -29,7 +29,7 @@ dependencies: - nc-time-axis=1.4 # netcdf follows a 1.major.minor[.patch] convention # (see https://github.com/Unidata/netcdf4-python/issues/1090) - - netcdf4=1.6.2 + - netcdf4=1.6.0 - numba=0.55 - numpy=1.22 - packaging=21.3 From fbffef24944df064e0147340b105939b4600fbf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Mon, 18 Dec 2023 15:13:30 +0100 Subject: [PATCH 13/25] check netCDF4 version and skip test if netcdf4 version <1.6.2 --- xarray/tests/test_backends.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 57abd6b6f4d..8e604dc5f58 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1710,6 +1710,9 @@ def test_raise_on_forward_slashes_in_names(self) -> None: @requires_netCDF4 class TestNetCDF4Data(NetCDF4Base): + def nc4_version(self): + return Version(nc4.__version__) + @contextlib.contextmanager def create_store(self): with create_tmp_file() as tmp_file: @@ -1782,6 +1785,10 @@ def test_setncattr_string(self) -> None: ], ) def test_compression_encoding(self, compression) -> None: + if self.nc4_version < Version("1.6.2"): + pytest.skip( + "Compression options only available for netcdf4-python >= 1.6.2, running with {self.nc_version}." + ) data = create_test_data(dim_sizes=(20, 40, 10)) encoding_params = dict(compression=compression, blosc_shuffle=1) data["var2"].encoding.update(encoding_params) From 5b271fab36ec84fbcb73f786c99bd277fd23be9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Mon, 18 Dec 2023 15:17:04 +0100 Subject: [PATCH 14/25] fix typing --- xarray/tests/test_backends.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 8e604dc5f58..6791bae946d 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1710,7 +1710,8 @@ def test_raise_on_forward_slashes_in_names(self) -> None: @requires_netCDF4 class TestNetCDF4Data(NetCDF4Base): - def nc4_version(self): + @property + def nc4_version(self) -> Version: return Version(nc4.__version__) @contextlib.contextmanager @@ -1784,7 +1785,7 @@ def test_setncattr_string(self) -> None: "blosc_zstd", ], ) - def test_compression_encoding(self, compression) -> None: + def test_compression_encoding(self, compression: str | None) -> None: if self.nc4_version < Version("1.6.2"): pytest.skip( "Compression options only available for netcdf4-python >= 1.6.2, running with {self.nc_version}." From a2a41abc0219a0255cf191c16a26b64ad3c305b3 Mon Sep 17 00:00:00 2001 From: markelg Date: Mon, 18 Dec 2023 19:34:30 +0100 Subject: [PATCH 15/25] Larger chunks to avoid random blosc errors With smaller chunks it raises "Blosc_FIlter Error: blosc_filter: Buffer is uncompressible." one out of three times. --- xarray/tests/test_backends.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 6791bae946d..bfdf73a11e8 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1790,12 +1790,12 @@ def test_compression_encoding(self, compression: str | None) -> None: pytest.skip( "Compression options only available for netcdf4-python >= 1.6.2, running with {self.nc_version}." ) - data = create_test_data(dim_sizes=(20, 40, 10)) + data = create_test_data(dim_sizes=(20, 80, 10)) encoding_params = dict(compression=compression, blosc_shuffle=1) data["var2"].encoding.update(encoding_params) data["var2"].encoding.update( { - "chunksizes": (20, 20), + "chunksizes": (20, 40), "original_shape": data.var2.shape, "blosc_shuffle": 1, "fletcher32": False, From b98c9268c41a2fcb79b6cc7beea2a9fe01160de1 Mon Sep 17 00:00:00 2001 From: markelg Date: Mon, 18 Dec 2023 20:06:10 +0100 Subject: [PATCH 16/25] use decorator to skip old netCDF4 versions --- xarray/tests/__init__.py | 4 ++++ xarray/tests/test_backends.py | 9 +++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 61d544cc3aa..5fe5420c9bd 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -113,6 +113,10 @@ def _importorskip( not has_h5netcdf_ros3[0], reason="requires h5netcdf 1.3.0" ) +has_netCDF4_1_6_2_or_above = _importorskip("netCDF4", "1.6.2") +requires_netCDF4_1_6_2_or_above = pytest.mark.skipif( + not has_netCDF4_1_6_2_or_above, reason="requires netCDF4 1.6.2 or above.") + # change some global options for tests set_options(warn_for_unclosed_files=True) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index bfdf73a11e8..7ac4931ce42 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -76,7 +76,7 @@ requires_pynio, requires_scipy, requires_scipy_or_netCDF4, - requires_zarr, + requires_zarr, requires_netCDF4_1_6_2_or_above, ) from xarray.tests.test_coding_times import ( _ALL_CALENDARS, @@ -1785,13 +1785,10 @@ def test_setncattr_string(self) -> None: "blosc_zstd", ], ) + @requires_netCDF4_1_6_2_or_above def test_compression_encoding(self, compression: str | None) -> None: - if self.nc4_version < Version("1.6.2"): - pytest.skip( - "Compression options only available for netcdf4-python >= 1.6.2, running with {self.nc_version}." - ) data = create_test_data(dim_sizes=(20, 80, 10)) - encoding_params = dict(compression=compression, blosc_shuffle=1) + encoding_params: dict[str, Any] = dict(compression=compression, blosc_shuffle=1) data["var2"].encoding.update(encoding_params) data["var2"].encoding.update( { From 74b74d7c92ff5678d2f9e7ab0ab8dac05b8d1e62 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 18 Dec 2023 19:06:52 +0000 Subject: [PATCH 17/25] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/__init__.py | 3 ++- xarray/tests/test_backends.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 5fe5420c9bd..f638763bd65 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -115,7 +115,8 @@ def _importorskip( has_netCDF4_1_6_2_or_above = _importorskip("netCDF4", "1.6.2") requires_netCDF4_1_6_2_or_above = pytest.mark.skipif( - not has_netCDF4_1_6_2_or_above, reason="requires netCDF4 1.6.2 or above.") + not has_netCDF4_1_6_2_or_above, reason="requires netCDF4 1.6.2 or above." +) # change some global options for tests set_options(warn_for_unclosed_files=True) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 7ac4931ce42..f51c7e2bbf4 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -72,11 +72,12 @@ requires_h5netcdf_ros3, requires_iris, requires_netCDF4, + requires_netCDF4_1_6_2_or_above, requires_pydap, requires_pynio, requires_scipy, requires_scipy_or_netCDF4, - requires_zarr, requires_netCDF4_1_6_2_or_above, + requires_zarr, ) from xarray.tests.test_coding_times import ( _ALL_CALENDARS, From a4e4d8c64c10904c08dc6c58b065594a4b5abe33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Tue, 19 Dec 2023 07:42:57 +0100 Subject: [PATCH 18/25] remove stale version-property --- xarray/tests/test_backends.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index f51c7e2bbf4..e5943a81220 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1711,10 +1711,6 @@ def test_raise_on_forward_slashes_in_names(self) -> None: @requires_netCDF4 class TestNetCDF4Data(NetCDF4Base): - @property - def nc4_version(self) -> Version: - return Version(nc4.__version__) - @contextlib.contextmanager def create_store(self): with create_tmp_file() as tmp_file: From 4ae2bca5875b60cae68deb77d2241dbada839a43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Tue, 19 Dec 2023 07:47:02 +0100 Subject: [PATCH 19/25] fix whats-new.rst --- doc/whats-new.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 97926c9b569..c77c380897d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,6 +26,10 @@ New Features - :py:meth:`xr.cov` and :py:meth:`xr.corr` now support using weights (:issue:`8527`, :pull:`7392`). By `Llorenç Lledó `_. +- Accept the compression arguments new in netCDF 1.6.0 in the netCDF4 backend. + See `netCDF4 documentation `_ for details. + By `Markel García-Díez `_. (:issue:`6929`, :pull:`7551`) Note that some + new compression filters needs plugins to be installed which may not be available in all netCDF distributions. Breaking changes ~~~~~~~~~~~~~~~~ @@ -200,10 +204,6 @@ New Features - Enable VLEN string fill_values, preserve VLEN string dtypes (:issue:`1647`, :issue:`7652`, :issue:`7868`, :pull:`7869`). By `Kai Mühlbauer `_. -- Accept the compression arguments new in netCDF 1.6.0 in the netCDF4 backend. - See `netCDF4 documentation `_ for details. - By `Markel García-Díez `_. (:issue:`6929`, :pull:`7551`) Note that some - new compression filters needs plugins to be installed which may not be available in all netCDF distributions. Breaking changes ~~~~~~~~~~~~~~~~ - drop support for `cdms2 `_. Please use From f8f37f4458765ecee89585a409fd15fbff4033a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Tue, 19 Dec 2023 08:02:57 +0100 Subject: [PATCH 20/25] fix requires-decorator --- xarray/tests/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index f638763bd65..f345c062589 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -115,7 +115,7 @@ def _importorskip( has_netCDF4_1_6_2_or_above = _importorskip("netCDF4", "1.6.2") requires_netCDF4_1_6_2_or_above = pytest.mark.skipif( - not has_netCDF4_1_6_2_or_above, reason="requires netCDF4 1.6.2 or above." + not has_netCDF4_1_6_2_or_above[0], reason="requires netCDF4 1.6.2 or above." ) # change some global options for tests From b7e56fef5d0e03b2d21d8306213632d0894df9dc Mon Sep 17 00:00:00 2001 From: markelg Date: Tue, 19 Dec 2023 14:33:16 +0100 Subject: [PATCH 21/25] fix for asserts of other tests that use test data --- xarray/tests/__init__.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index f345c062589..5c34c6479fa 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -268,10 +268,13 @@ def assert_allclose(a, b, check_default_indexes=True, **kwargs): xarray.testing._assert_internal_invariants(b, check_default_indexes) +_DEFAULT_TEST_DIM_SIZES = (8, 9, 10) + + def create_test_data( seed: int | None = None, add_attrs: bool = True, - dim_sizes: tuple[int, int, int] = (8, 9, 10), + dim_sizes: tuple[int, int, int] = _DEFAULT_TEST_DIM_SIZES, ) -> Dataset: rs = np.random.RandomState(seed) _vars = { @@ -294,10 +297,12 @@ def create_test_data( obj[v] = (dims, data) if add_attrs: obj[v].attrs = {"foo": "variable"} - obj.coords["numbers"] = ( - "dim3", - np.random.randint(0, 3, _dims["dim3"], dtype="int64"), - ) + + if dim_sizes == _DEFAULT_TEST_DIM_SIZES: + numbers_values = np.array([0, 1, 2, 0, 0, 1, 1, 2, 2, 3], dtype="int64") + else: + numbers_values = np.random.randint(0, 3, _dims["dim3"], dtype="int64") + obj.coords["numbers"] = ("dim3", numbers_values) obj.encoding = {"foo": "bar"} assert all(obj.data.flags.writeable for obj in obj.variables.values()) return obj From 138a439ced98d4c245b5be589eec76abfac4a35b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 20 Dec 2023 08:45:59 +0100 Subject: [PATCH 22/25] Apply suggestions from code review --- xarray/tests/__init__.py | 4 +--- xarray/tests/test_backends.py | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 5c34c6479fa..686e00d5fb1 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -113,9 +113,7 @@ def _importorskip( not has_h5netcdf_ros3[0], reason="requires h5netcdf 1.3.0" ) -has_netCDF4_1_6_2_or_above = _importorskip("netCDF4", "1.6.2") -requires_netCDF4_1_6_2_or_above = pytest.mark.skipif( - not has_netCDF4_1_6_2_or_above[0], reason="requires netCDF4 1.6.2 or above." +has_netCDF4_1_6_2_or_above, requires_netCDF4_1_6_2_or_above = _importorskip("netCDF4", "1.6.2") ) # change some global options for tests diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index e5943a81220..b61db7d1aa8 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1825,8 +1825,7 @@ def test_compression_encoding(self, compression: str | None) -> None: expected_encoding["shuffle"] = False actual_encoding = actual["var2"].encoding - for k, v in expected_encoding.items(): - assert v == actual_encoding[k] + assert expected_encoding.items() <= actual_encoding.items() if ( encoding_params["compression"] is not None and "blosc" not in encoding_params["compression"] From 9e25e6eed63b214402afa813b8f0c7089ae7746e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 20 Dec 2023 08:47:13 +0100 Subject: [PATCH 23/25] Update xarray/tests/__init__.py --- xarray/tests/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 686e00d5fb1..23083ad6607 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -114,7 +114,6 @@ def _importorskip( ) has_netCDF4_1_6_2_or_above, requires_netCDF4_1_6_2_or_above = _importorskip("netCDF4", "1.6.2") -) # change some global options for tests set_options(warn_for_unclosed_files=True) From 4e59c9a1c111e40dbac6c6978d288b5d3e9389fe Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Dec 2023 07:47:50 +0000 Subject: [PATCH 24/25] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 23083ad6607..7e173528222 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -113,7 +113,9 @@ def _importorskip( not has_h5netcdf_ros3[0], reason="requires h5netcdf 1.3.0" ) -has_netCDF4_1_6_2_or_above, requires_netCDF4_1_6_2_or_above = _importorskip("netCDF4", "1.6.2") +has_netCDF4_1_6_2_or_above, requires_netCDF4_1_6_2_or_above = _importorskip( + "netCDF4", "1.6.2" +) # change some global options for tests set_options(warn_for_unclosed_files=True) From bc74cd834034c72f83ea5a75ae1af8d3fcdd72c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 20 Dec 2023 16:07:50 +0100 Subject: [PATCH 25/25] Update xarray/tests/test_backends.py --- xarray/tests/test_backends.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index b61db7d1aa8..a8722d59659 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1783,6 +1783,7 @@ def test_setncattr_string(self) -> None: ], ) @requires_netCDF4_1_6_2_or_above + @pytest.mark.xfail(ON_WINDOWS, reason="new compression not yet implemented") def test_compression_encoding(self, compression: str | None) -> None: data = create_test_data(dim_sizes=(20, 80, 10)) encoding_params: dict[str, Any] = dict(compression=compression, blosc_shuffle=1)