Skip to content

Commit

Permalink
Fix wrong order of coordinate converted from pd.series with MultiIndex (
Browse files Browse the repository at this point in the history
#3953)

* Fix 3951

* lint

* black

* Updata whatsnew
  • Loading branch information
fujiisoup authored Apr 8, 2020
1 parent f07adb2 commit 1eedc5c
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 6 deletions.
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ New Features

Bug fixes
~~~~~~~~~
- Fix wrong order in converting a ``pd.Series`` with a MultiIndex to ``DataArray``. (:issue:`3951`)
By `Keisuke Fujii <https://github.com/fujiisoup>`_.
- Fix renaming of coords when one or more stacked coords is not in
sorted order during stack+groupby+apply operations. (:issue:`3287`,
:pull:`3906`) By `Spencer Hill <https://github.com/spencerahill>`_
Expand Down
3 changes: 1 addition & 2 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4604,8 +4604,7 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> "Datas
if not dataframe.columns.is_unique:
raise ValueError("cannot convert DataFrame with non-unique columns")

idx = remove_unused_levels_categories(dataframe.index)
dataframe = dataframe.set_index(idx)
idx, dataframe = remove_unused_levels_categories(dataframe.index, dataframe)
obj = cls()

if isinstance(idx, pd.MultiIndex):
Expand Down
8 changes: 6 additions & 2 deletions xarray/core/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from .variable import Variable


def remove_unused_levels_categories(index):
def remove_unused_levels_categories(index, dataframe=None):
"""
Remove unused levels from MultiIndex and unused categories from CategoricalIndex
"""
Expand All @@ -28,7 +28,11 @@ def remove_unused_levels_categories(index):
index = pd.MultiIndex.from_arrays(levels, names=index.names)
elif isinstance(index, pd.CategoricalIndex):
index = index.remove_unused_categories()
return index

if dataframe is None:
return index
dataframe = dataframe.set_index(index)
return dataframe.index, dataframe


class Indexes(collections.abc.Mapping):
Expand Down
12 changes: 10 additions & 2 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -3515,6 +3515,14 @@ def test_to_and_from_series(self):
expected_da, DataArray.from_series(actual).drop_vars(["x", "y"])
)

def test_from_series_multiindex(self):
# GH:3951
df = pd.DataFrame({"B": [1, 2, 3], "A": [4, 5, 6]})
df = df.rename_axis("num").rename_axis("alpha", axis=1)
actual = df.stack("alpha").to_xarray()
assert (actual.sel(alpha="B") == [1, 2, 3]).all()
assert (actual.sel(alpha="A") == [4, 5, 6]).all()

@requires_sparse
def test_from_series_sparse(self):
import sparse
Expand Down Expand Up @@ -4524,7 +4532,7 @@ def test_argmax(self, x, minindex, maxindex, nanindex):

def test_idxmin(self, x, minindex, maxindex, nanindex):
ar0 = xr.DataArray(
x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs,
x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
)

# dim doesn't exist
Expand Down Expand Up @@ -4620,7 +4628,7 @@ def test_idxmin(self, x, minindex, maxindex, nanindex):

def test_idxmax(self, x, minindex, maxindex, nanindex):
ar0 = xr.DataArray(
x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs,
x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
)

# dim doesn't exist
Expand Down

0 comments on commit 1eedc5c

Please sign in to comment.