diff --git a/doc/data-structures.rst b/doc/data-structures.rst index 504d820a234..70e34adabed 100644 --- a/doc/data-structures.rst +++ b/doc/data-structures.rst @@ -353,6 +353,8 @@ setting) variables and attributes: This is particularly useful in an exploratory context, because you can tab-complete these variable names with tools like IPython. +.. _dictionary_like_methods: + Dictionary like methods ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/howdoi.rst b/doc/howdoi.rst index 80266bd3b84..84c0c786027 100644 --- a/doc/howdoi.rst +++ b/doc/howdoi.rst @@ -11,6 +11,8 @@ How do I ... * - How do I... - Solution + * - add a DataArray to my dataset as a new variable + - ``my_dataset[varname] = my_dataArray`` or :py:meth:`Dataset.assign` (see also :ref:`dictionary_like_methods`) * - add variables from other datasets to my dataset - :py:meth:`Dataset.merge` * - add a new dimension and/or coordinate @@ -57,3 +59,4 @@ How do I ... - ``obj.dt.ceil``, ``obj.dt.floor``, ``obj.dt.round``. See :ref:`dt_accessor` for more. * - make a mask that is ``True`` where an object contains any of the values in a array - :py:meth:`Dataset.isin`, :py:meth:`DataArray.isin` + diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 00d1c50780e..1ad344a208d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -37,6 +37,12 @@ New Features - Added the ``count`` reduction method to both :py:class:`~core.rolling.DatasetCoarsen` and :py:class:`~core.rolling.DataArrayCoarsen` objects. (:pull:`3500`) By `Deepak Cherian `_ +- Add `attrs_file` option in :py:func:`~xarray.open_mfdataset` to choose the + source file for global attributes in a multi-file dataset (:issue:`2382`, + :pull:`3498`) by `Julien Seguinot _`. +- :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims` + now allow swapping to dimension names that don't exist yet. (:pull:`3636`) + By `Justus Magin `_. - Extend :py:class:`core.accessor_dt.DatetimeAccessor` properties and support `.dt` accessor for timedelta via :py:class:`core.accessor_dt.TimedeltaAccessor` (:pull:`3612`) @@ -44,6 +50,7 @@ New Features Bug fixes ~~~~~~~~~ + - Fix :py:meth:`xarray.combine_by_coords` to allow for combining incomplete hypercubes of Datasets (:issue:`3648`). By `Ian Bolliger `_. @@ -62,6 +69,16 @@ Bug fixes By `Tom Augspurger `_. - Ensure :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile` issue the correct error when ``q`` is out of bounds (:issue:`3634`) by `Mathias Hauser `_. +- Raise an error when trying to use :py:meth:`Dataset.rename_dims` to + rename to an existing name (:issue:`3438`, :pull:`3645`) + By `Justus Magin `_. +- :py:meth:`Dataset.rename`, :py:meth:`DataArray.rename` now check for conflicts with + MultiIndex level names. +- :py:meth:`Dataset.merge` no longer fails when passed a `DataArray` instead of a `Dataset` object. + By `Tom Nicholas `_. +- Fix a regression in :py:meth:`Dataset.drop`: allow passing any + iterable when dropping variables (:issue:`3552`, :pull:`3693`) + By `Justus Magin `_. Documentation ~~~~~~~~~~~~~ @@ -80,9 +97,14 @@ Documentation - Added examples for :py:meth:`DataArray.quantile`, :py:meth:`Dataset.quantile` and ``GroupBy.quantile``. (:pull:`3576`) By `Justus Magin `_. +- Added example for :py:func:`~xarray.map_blocks`. (:pull:`3667`) + By `Riley X. Brady `_. Internal Changes ~~~~~~~~~~~~~~~~ +- Make sure dask names change when rechunking by different chunk sizes. Conversely, make sure they + stay the same when rechunking by the same chunk size. (:issue:`3350`) + By `Deepak Cherian `_. - 2x to 5x speed boost (on small arrays) for :py:meth:`Dataset.isel`, :py:meth:`DataArray.isel`, and :py:meth:`DataArray.__getitem__` when indexing by int, slice, list of int, scalar ndarray, or 1-dimensional ndarray. diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 23d09ba5e33..eea1fb9ddce 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -718,6 +718,7 @@ def open_mfdataset( autoclose=None, parallel=False, join="outer", + attrs_file=None, **kwargs, ): """Open multiple files as a single dataset. @@ -729,8 +730,8 @@ def open_mfdataset( ``combine_by_coords`` and ``combine_nested``. By default the old (now deprecated) ``auto_combine`` will be used, please specify either ``combine='by_coords'`` or ``combine='nested'`` in future. Requires dask to be installed. See documentation for - details on dask [1]_. Attributes from the first dataset file are used for the - combined dataset. + details on dask [1]_. Global attributes from the ``attrs_file`` are used + for the combined dataset. Parameters ---------- @@ -827,6 +828,10 @@ def open_mfdataset( - 'override': if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. + attrs_file : str or pathlib.Path, optional + Path of the file used to read global attributes from. + By default global attributes are read from the first file provided, + with wildcard matches sorted by filename. **kwargs : optional Additional arguments passed on to :py:func:`xarray.open_dataset`. @@ -961,7 +966,15 @@ def open_mfdataset( raise combined._file_obj = _MultiFileCloser(file_objs) - combined.attrs = datasets[0].attrs + + # read global attributes from the attrs_file or from the first dataset + if attrs_file is not None: + if isinstance(attrs_file, Path): + attrs_file = str(attrs_file) + combined.attrs = datasets[paths.index(attrs_file)].attrs + else: + combined.attrs = datasets[0].attrs + return combined diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 31aa4da57b2..15db6ed468e 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1480,8 +1480,7 @@ def swap_dims(self, dims_dict: Mapping[Hashable, Hashable]) -> "DataArray": ---------- dims_dict : dict-like Dictionary whose keys are current dimension names and whose values - are new names. Each value must already be a coordinate on this - array. + are new names. Returns ------- @@ -1504,6 +1503,13 @@ def swap_dims(self, dims_dict: Mapping[Hashable, Hashable]) -> "DataArray": Coordinates: x (y) >> arr.swap_dims({"x": "z"}) + + array([0, 1]) + Coordinates: + x (z) dict: naming conventions. Converts all variables and attributes to native Python objects. - Useful for coverting to json. To avoid datetime incompatibility + Useful for converting to json. To avoid datetime incompatibility use decode_times=False kwarg in xarrray.open_dataset. Parameters diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 6be06fed117..79f1030fabe 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -85,11 +85,16 @@ either_dict_or_kwargs, hashable, is_dict_like, - is_list_like, is_scalar, maybe_wrap_array, ) -from .variable import IndexVariable, Variable, as_variable, broadcast_variables +from .variable import ( + IndexVariable, + Variable, + as_variable, + broadcast_variables, + assert_unique_multiindex_level_names, +) if TYPE_CHECKING: from ..backends import AbstractDataStore, ZarrStore @@ -1748,7 +1753,10 @@ def maybe_chunk(name, var, chunks): if not chunks: chunks = None if var.ndim > 0: - token2 = tokenize(name, token if token else var._data) + # when rechunking by different amounts, make sure dask names change + # by provinding chunks as an input to tokenize. + # subtle bugs result otherwise. see GH3350 + token2 = tokenize(name, token if token else var._data, chunks) name2 = f"{name_prefix}{name}-{token2}" return var.chunk(chunks, name=name2, lock=lock) else: @@ -2780,6 +2788,7 @@ def rename( variables, coord_names, dims, indexes = self._rename_all( name_dict=name_dict, dims_dict=name_dict ) + assert_unique_multiindex_level_names(variables) return self._replace(variables, coord_names, dims=dims, indexes=indexes) def rename_dims( @@ -2791,7 +2800,8 @@ def rename_dims( ---------- dims_dict : dict-like, optional Dictionary whose keys are current dimension names and - whose values are the desired names. + whose values are the desired names. The desired names must + not be the name of an existing dimension or Variable in the Dataset. **dims, optional Keyword form of ``dims_dict``. One of dims_dict or dims must be provided. @@ -2809,12 +2819,17 @@ def rename_dims( DataArray.rename """ dims_dict = either_dict_or_kwargs(dims_dict, dims, "rename_dims") - for k in dims_dict: + for k, v in dims_dict.items(): if k not in self.dims: raise ValueError( "cannot rename %r because it is not a " "dimension in this dataset" % k ) + if v in self.dims or v in self: + raise ValueError( + f"Cannot rename {k} to {v} because {v} already exists. " + "Try using swap_dims instead." + ) variables, coord_names, sizes, indexes = self._rename_all( name_dict={}, dims_dict=dims_dict @@ -2868,8 +2883,7 @@ def swap_dims( ---------- dims_dict : dict-like Dictionary whose keys are current dimension names and whose values - are new names. Each value must already be a variable in the - dataset. + are new names. Returns ------- @@ -2898,6 +2912,16 @@ def swap_dims( Data variables: a (y) int64 5 7 b (y) float64 0.1 2.4 + >>> ds.swap_dims({"x": "z"}) + + Dimensions: (z: 2) + Coordinates: + x (z) "Dataset": def merge( self, - other: "CoercibleMapping", + other: Union["CoercibleMapping", "DataArray"], inplace: bool = None, overwrite_vars: Union[Hashable, Iterable[Hashable]] = frozenset(), compat: str = "no_conflicts", @@ -3582,6 +3606,7 @@ def merge( If any variables conflict (see ``compat``). """ _check_inplace(inplace) + other = other.to_dataset() if isinstance(other, xr.DataArray) else other merge_result = dataset_merge_method( self, other, @@ -3664,7 +3689,7 @@ def drop(self, labels=None, dim=None, *, errors="raise", **labels_kwargs): raise ValueError("cannot specify dim and dict-like arguments.") labels = either_dict_or_kwargs(labels, labels_kwargs, "drop") - if dim is None and (is_list_like(labels) or is_scalar(labels)): + if dim is None and (is_scalar(labels) or isinstance(labels, Iterable)): warnings.warn( "dropping variables using `drop` will be deprecated; using drop_vars is encouraged.", PendingDeprecationWarning, @@ -4127,7 +4152,7 @@ def combine_first(self, other: "Dataset") -> "Dataset": Returns ------- - DataArray + Dataset """ out = ops.fillna(self, other, join="outer", dataset_join="outer") return out @@ -4641,7 +4666,7 @@ def to_dict(self, data=True): conventions. Converts all variables and attributes to native Python objects - Useful for coverting to json. To avoid datetime incompatibility + Useful for converting to json. To avoid datetime incompatibility use decode_times=False kwarg in xarrray.open_dataset. Parameters diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index dd6c67338d8..e4fb5803191 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -154,6 +154,48 @@ def map_blocks( -------- dask.array.map_blocks, xarray.apply_ufunc, xarray.Dataset.map_blocks, xarray.DataArray.map_blocks + + Examples + -------- + + Calculate an anomaly from climatology using ``.groupby()``. Using + ``xr.map_blocks()`` allows for parallel operations with knowledge of ``xarray``, + its indices, and its methods like ``.groupby()``. + + >>> def calculate_anomaly(da, groupby_type='time.month'): + ... # Necessary workaround to xarray's check with zero dimensions + ... # https://github.com/pydata/xarray/issues/3575 + ... if sum(da.shape) == 0: + ... return da + ... gb = da.groupby(groupby_type) + ... clim = gb.mean(dim='time') + ... return gb - clim + >>> time = xr.cftime_range('1990-01', '1992-01', freq='M') + >>> np.random.seed(123) + >>> array = xr.DataArray(np.random.rand(len(time)), + ... dims="time", coords=[time]).chunk() + >>> xr.map_blocks(calculate_anomaly, array).compute() + + array([ 0.12894847, 0.11323072, -0.0855964 , -0.09334032, 0.26848862, + 0.12382735, 0.22460641, 0.07650108, -0.07673453, -0.22865714, + -0.19063865, 0.0590131 , -0.12894847, -0.11323072, 0.0855964 , + 0.09334032, -0.26848862, -0.12382735, -0.22460641, -0.07650108, + 0.07673453, 0.22865714, 0.19063865, -0.0590131 ]) + Coordinates: + * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 + + Note that one must explicitly use ``args=[]`` and ``kwargs={}`` to pass arguments + to the function being applied in ``xr.map_blocks()``: + + >>> xr.map_blocks(calculate_anomaly, array, kwargs={'groupby_type': 'time.year'}) + + array([ 0.15361741, -0.25671244, -0.31600032, 0.008463 , 0.1766172 , + -0.11974531, 0.43791243, 0.14197797, -0.06191987, -0.15073425, + -0.19967375, 0.18619794, -0.05100474, -0.42989909, -0.09153273, + 0.24841842, -0.30708526, -0.31412523, 0.04197439, 0.0422506 , + 0.14482397, 0.35985481, 0.23487834, 0.12144652]) + Coordinates: + * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 """ def _wrapper(func, obj, to_array, args, kwargs): diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 6681375c18e..e335365d5ca 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -547,7 +547,12 @@ def __eq__(self, other) -> bool: return False def __hash__(self) -> int: - return hash((ReprObject, self._value)) + return hash((type(self), self._value)) + + def __dask_tokenize__(self): + from dask.base import normalize_token + + return normalize_token((type(self), self._value)) @contextlib.contextmanager diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a23527bd49a..4fccdf2dd6c 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2832,6 +2832,42 @@ def test_attrs_mfdataset(self): with raises_regex(AttributeError, "no attribute"): actual.test2 + def test_open_mfdataset_attrs_file(self): + original = Dataset({"foo": ("x", np.random.randn(10))}) + with create_tmp_files(2) as (tmp1, tmp2): + ds1 = original.isel(x=slice(5)) + ds2 = original.isel(x=slice(5, 10)) + ds1.attrs["test1"] = "foo" + ds2.attrs["test2"] = "bar" + ds1.to_netcdf(tmp1) + ds2.to_netcdf(tmp2) + with open_mfdataset( + [tmp1, tmp2], concat_dim="x", combine="nested", attrs_file=tmp2 + ) as actual: + # attributes are inherited from the master file + assert actual.attrs["test2"] == ds2.attrs["test2"] + # attributes from ds1 are not retained, e.g., + assert "test1" not in actual.attrs + + def test_open_mfdataset_attrs_file_path(self): + original = Dataset({"foo": ("x", np.random.randn(10))}) + with create_tmp_files(2) as (tmp1, tmp2): + tmp1 = Path(tmp1) + tmp2 = Path(tmp2) + ds1 = original.isel(x=slice(5)) + ds2 = original.isel(x=slice(5, 10)) + ds1.attrs["test1"] = "foo" + ds2.attrs["test2"] = "bar" + ds1.to_netcdf(tmp1) + ds2.to_netcdf(tmp2) + with open_mfdataset( + [tmp1, tmp2], concat_dim="x", combine="nested", attrs_file=tmp2 + ) as actual: + # attributes are inherited from the master file + assert actual.attrs["test2"] == ds2.attrs["test2"] + # attributes from ds1 are not retained, e.g., + assert "test1" not in actual.attrs + def test_open_mfdataset_auto_combine(self): original = Dataset({"foo": ("x", np.random.randn(10)), "x": np.arange(10)}) with create_tmp_file() as tmp1: diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index d012fb36c35..00c34940ce4 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -451,7 +451,7 @@ def test_cf_datetime_nan(num_dates, units, expected_list): warnings.filterwarnings("ignore", "All-NaN") actual = coding.times.decode_cf_datetime(num_dates, units) # use pandas because numpy will deprecate timezone-aware conversions - expected = pd.to_datetime(expected_list) + expected = pd.to_datetime(expected_list).to_numpy(dtype="datetime64[ns]") assert_array_equal(expected, actual) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index d0e2654eed3..cc554850839 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1083,7 +1083,7 @@ def func(obj): actual = xr.map_blocks(func, obj) expected = func(obj) assert_chunks_equal(expected.chunk(), actual) - xr.testing.assert_identical(actual.compute(), expected.compute()) + assert_identical(actual, expected) @pytest.mark.parametrize("obj", [make_da(), make_ds()]) @@ -1092,7 +1092,7 @@ def test_map_blocks_convert_args_to_list(obj): with raise_if_dask_computes(): actual = xr.map_blocks(operator.add, obj, [10]) assert_chunks_equal(expected.chunk(), actual) - xr.testing.assert_identical(actual.compute(), expected.compute()) + assert_identical(actual, expected) @pytest.mark.parametrize("obj", [make_da(), make_ds()]) @@ -1107,7 +1107,7 @@ def add_attrs(obj): with raise_if_dask_computes(): actual = xr.map_blocks(add_attrs, obj) - xr.testing.assert_identical(actual.compute(), expected.compute()) + assert_identical(actual, expected) def test_map_blocks_change_name(map_da): @@ -1120,7 +1120,7 @@ def change_name(obj): with raise_if_dask_computes(): actual = xr.map_blocks(change_name, map_da) - xr.testing.assert_identical(actual.compute(), expected.compute()) + assert_identical(actual, expected) @pytest.mark.parametrize("obj", [make_da(), make_ds()]) @@ -1129,7 +1129,7 @@ def test_map_blocks_kwargs(obj): with raise_if_dask_computes(): actual = xr.map_blocks(xr.full_like, obj, kwargs=dict(fill_value=np.nan)) assert_chunks_equal(expected.chunk(), actual) - xr.testing.assert_identical(actual.compute(), expected.compute()) + assert_identical(actual, expected) def test_map_blocks_to_array(map_ds): @@ -1137,7 +1137,7 @@ def test_map_blocks_to_array(map_ds): actual = xr.map_blocks(lambda x: x.to_array(), map_ds) # to_array does not preserve name, so cannot use assert_identical - assert_equal(actual.compute(), map_ds.to_array().compute()) + assert_equal(actual, map_ds.to_array()) @pytest.mark.parametrize( @@ -1156,7 +1156,7 @@ def test_map_blocks_da_transformations(func, map_da): with raise_if_dask_computes(): actual = xr.map_blocks(func, map_da) - assert_identical(actual.compute(), func(map_da).compute()) + assert_identical(actual, func(map_da)) @pytest.mark.parametrize( @@ -1175,7 +1175,7 @@ def test_map_blocks_ds_transformations(func, map_ds): with raise_if_dask_computes(): actual = xr.map_blocks(func, map_ds) - assert_identical(actual.compute(), func(map_ds).compute()) + assert_identical(actual, func(map_ds)) @pytest.mark.parametrize("obj", [make_da(), make_ds()]) @@ -1188,7 +1188,7 @@ def func(obj): expected = xr.map_blocks(func, obj) actual = obj.map_blocks(func) - assert_identical(expected.compute(), actual.compute()) + assert_identical(expected, actual) def test_map_blocks_hlg_layers(): diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index f957316d8ac..786eb5007a6 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -752,12 +752,19 @@ def test_chunk(self): blocked = unblocked.chunk() assert blocked.chunks == ((3,), (4,)) + first_dask_name = blocked.data.name blocked = unblocked.chunk(chunks=((2, 1), (2, 2))) assert blocked.chunks == ((2, 1), (2, 2)) + assert blocked.data.name != first_dask_name blocked = unblocked.chunk(chunks=(3, 3)) assert blocked.chunks == ((3,), (3, 1)) + assert blocked.data.name != first_dask_name + + # name doesn't change when rechunking by same amount + # this fails if ReprObject doesn't have __dask_tokenize__ defined + assert unblocked.chunk(2).data.name == unblocked.chunk(2).data.name assert blocked.load().chunks is None @@ -1530,6 +1537,11 @@ def test_swap_dims(self): actual = array.swap_dims({"x": "y"}) assert_identical(expected, actual) + array = DataArray(np.random.randn(3), {"x": list("abc")}, "x") + expected = DataArray(array.values, {"x": ("y", list("abc"))}, dims="y") + actual = array.swap_dims({"x": "y"}) + assert_identical(expected, actual) + def test_expand_dims_error(self): array = DataArray( np.random.randn(3, 4), diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 7db1911621b..f9eb37dbf2f 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -936,19 +936,35 @@ def test_chunk(self): expected_chunks = {"dim1": (8,), "dim2": (9,), "dim3": (10,)} assert reblocked.chunks == expected_chunks + def get_dask_names(ds): + return {k: v.data.name for k, v in ds.items()} + + orig_dask_names = get_dask_names(reblocked) + reblocked = data.chunk({"time": 5, "dim1": 5, "dim2": 5, "dim3": 5}) # time is not a dim in any of the data_vars, so it # doesn't get chunked expected_chunks = {"dim1": (5, 3), "dim2": (5, 4), "dim3": (5, 5)} assert reblocked.chunks == expected_chunks + # make sure dask names change when rechunking by different amounts + # regression test for GH3350 + new_dask_names = get_dask_names(reblocked) + for k, v in new_dask_names.items(): + assert v != orig_dask_names[k] + reblocked = data.chunk(expected_chunks) assert reblocked.chunks == expected_chunks # reblock on already blocked data + orig_dask_names = get_dask_names(reblocked) reblocked = reblocked.chunk(expected_chunks) + new_dask_names = get_dask_names(reblocked) assert reblocked.chunks == expected_chunks assert_identical(reblocked, data) + # recuhnking with same chunk sizes should not change names + for k, v in new_dask_names.items(): + assert v == orig_dask_names[k] with raises_regex(ValueError, "some chunks"): data.chunk({"foo": 10}) @@ -2151,6 +2167,10 @@ def test_drop_variables(self): actual = data.drop(["time", "not_found_here"], errors="ignore") assert_identical(expected, actual) + with pytest.warns(PendingDeprecationWarning): + actual = data.drop({"time", "not_found_here"}, errors="ignore") + assert_identical(expected, actual) + def test_drop_index_labels(self): data = Dataset({"A": (["x", "y"], np.random.randn(2, 3)), "x": ["a", "b"]}) @@ -2444,6 +2464,9 @@ def test_rename_dims(self): with pytest.raises(ValueError): original.rename_dims(dims_dict_bad) + with pytest.raises(ValueError): + original.rename_dims({"x": "z"}) + def test_rename_vars(self): original = Dataset({"x": ("x", [0, 1, 2]), "y": ("x", [10, 11, 12]), "z": 42}) expected = Dataset( @@ -2461,6 +2484,14 @@ def test_rename_vars(self): with pytest.raises(ValueError): original.rename_vars(names_dict_bad) + def test_rename_multiindex(self): + mindex = pd.MultiIndex.from_tuples( + [([1, 2]), ([3, 4])], names=["level0", "level1"] + ) + data = Dataset({}, {"x": mindex}) + with raises_regex(ValueError, "conflicting MultiIndex"): + data.rename({"x": "level0"}) + @requires_cftime def test_rename_does_not_change_CFTimeIndex_type(self): # make sure CFTimeIndex is not converted to DatetimeIndex #3522 @@ -2525,6 +2556,12 @@ def test_swap_dims(self): with raises_regex(ValueError, "replacement dimension"): original.swap_dims({"x": "z"}) + expected = Dataset( + {"y": ("u", list("abc")), "z": 42}, coords={"x": ("u", [1, 2, 3])} + ) + actual = original.swap_dims({"x": "u"}) + assert_identical(expected, actual) + def test_expand_dims_error(self): original = Dataset( { diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index c1e6c7a5ce8..6c8f3f65657 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -3,6 +3,7 @@ import xarray as xr from xarray.core import dtypes, merge +from xarray.testing import assert_identical from . import raises_regex from .test_dataset import create_test_data @@ -253,3 +254,9 @@ def test_merge_no_conflicts(self): with pytest.raises(xr.MergeError): ds3 = xr.Dataset({"a": ("y", [2, 3]), "y": [1, 2]}) ds1.merge(ds3, compat="no_conflicts") + + def test_merge_dataarray(self): + ds = xr.Dataset({"a": 0}) + da = xr.DataArray(data=1, name="b") + + assert_identical(ds.merge(da), xr.merge([ds, da]))