diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4d9d78cf83e..1d00cb369d1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -58,6 +58,8 @@ New Features Bug fixes ~~~~~~~~~ +- Fix wrong order in converting a ``pd.Series`` with a MultiIndex to ``DataArray``. (:issue:`3951`) + By `Keisuke Fujii `_. - Fix renaming of coords when one or more stacked coords is not in sorted order during stack+groupby+apply operations. (:issue:`3287`, :pull:`3906`) By `Spencer Hill `_ diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 97b3caf2b6e..dd1e31cc61a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4604,8 +4604,7 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> "Datas if not dataframe.columns.is_unique: raise ValueError("cannot convert DataFrame with non-unique columns") - idx = remove_unused_levels_categories(dataframe.index) - dataframe = dataframe.set_index(idx) + idx, dataframe = remove_unused_levels_categories(dataframe.index, dataframe) obj = cls() if isinstance(idx, pd.MultiIndex): diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index dea1767d50c..a4a5fa2c466 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -9,7 +9,7 @@ from .variable import Variable -def remove_unused_levels_categories(index): +def remove_unused_levels_categories(index, dataframe=None): """ Remove unused levels from MultiIndex and unused categories from CategoricalIndex """ @@ -28,7 +28,11 @@ def remove_unused_levels_categories(index): index = pd.MultiIndex.from_arrays(levels, names=index.names) elif isinstance(index, pd.CategoricalIndex): index = index.remove_unused_categories() - return index + + if dataframe is None: + return index + dataframe = dataframe.set_index(index) + return dataframe.index, dataframe class Indexes(collections.abc.Mapping): diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index cf31182ed30..c3e5aafabfe 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3515,6 +3515,14 @@ def test_to_and_from_series(self): expected_da, DataArray.from_series(actual).drop_vars(["x", "y"]) ) + def test_from_series_multiindex(self): + # GH:3951 + df = pd.DataFrame({"B": [1, 2, 3], "A": [4, 5, 6]}) + df = df.rename_axis("num").rename_axis("alpha", axis=1) + actual = df.stack("alpha").to_xarray() + assert (actual.sel(alpha="B") == [1, 2, 3]).all() + assert (actual.sel(alpha="A") == [4, 5, 6]).all() + @requires_sparse def test_from_series_sparse(self): import sparse @@ -4524,7 +4532,7 @@ def test_argmax(self, x, minindex, maxindex, nanindex): def test_idxmin(self, x, minindex, maxindex, nanindex): ar0 = xr.DataArray( - x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs, + x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs ) # dim doesn't exist @@ -4620,7 +4628,7 @@ def test_idxmin(self, x, minindex, maxindex, nanindex): def test_idxmax(self, x, minindex, maxindex, nanindex): ar0 = xr.DataArray( - x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs, + x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs ) # dim doesn't exist