Fix wrong order of coordinate converted from pd.series with MultiIndex (

#3953) * Fix 3951 * lint * black * Updata whatsnew
pydata · Apr 8, 2020 · 1eedc5c · 1eedc5c
1 parent f07adb2
commit 1eedc5c
Show file tree

Hide file tree

Showing 4 changed files with 19 additions and 6 deletions.
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -58,6 +58,8 @@ New Features
 
 Bug fixes
 ~~~~~~~~~
+- Fix wrong order in converting a ``pd.Series`` with a MultiIndex to ``DataArray``. (:issue:`3951`)
+  By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 - Fix renaming of coords when one or more stacked coords is not in
   sorted order during stack+groupby+apply operations. (:issue:`3287`,
   :pull:`3906`) By `Spencer Hill <https://github.com/spencerahill>`_

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -4604,8 +4604,7 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> "Datas
         if not dataframe.columns.is_unique:
             raise ValueError("cannot convert DataFrame with non-unique columns")
 
-        idx = remove_unused_levels_categories(dataframe.index)
-        dataframe = dataframe.set_index(idx)
+        idx, dataframe = remove_unused_levels_categories(dataframe.index, dataframe)
         obj = cls()
 
         if isinstance(idx, pd.MultiIndex):

diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py
@@ -9,7 +9,7 @@
 from .variable import Variable
 
 
-def remove_unused_levels_categories(index):
+def remove_unused_levels_categories(index, dataframe=None):
     """
     Remove unused levels from MultiIndex and unused categories from CategoricalIndex
     """
@@ -28,7 +28,11 @@ def remove_unused_levels_categories(index):
             index = pd.MultiIndex.from_arrays(levels, names=index.names)
     elif isinstance(index, pd.CategoricalIndex):
         index = index.remove_unused_categories()
-    return index
+
+    if dataframe is None:
+        return index
+    dataframe = dataframe.set_index(index)
+    return dataframe.index, dataframe
 
 
 class Indexes(collections.abc.Mapping):

diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
@@ -3515,6 +3515,14 @@ def test_to_and_from_series(self):
             expected_da, DataArray.from_series(actual).drop_vars(["x", "y"])
         )
 
+    def test_from_series_multiindex(self):
+        # GH:3951
+        df = pd.DataFrame({"B": [1, 2, 3], "A": [4, 5, 6]})
+        df = df.rename_axis("num").rename_axis("alpha", axis=1)
+        actual = df.stack("alpha").to_xarray()
+        assert (actual.sel(alpha="B") == [1, 2, 3]).all()
+        assert (actual.sel(alpha="A") == [4, 5, 6]).all()
+
     @requires_sparse
     def test_from_series_sparse(self):
         import sparse
@@ -4524,7 +4532,7 @@ def test_argmax(self, x, minindex, maxindex, nanindex):
 
     def test_idxmin(self, x, minindex, maxindex, nanindex):
         ar0 = xr.DataArray(
-            x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs,
+            x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
         )
 
         # dim doesn't exist
@@ -4620,7 +4628,7 @@ def test_idxmin(self, x, minindex, maxindex, nanindex):
 
     def test_idxmax(self, x, minindex, maxindex, nanindex):
         ar0 = xr.DataArray(
-            x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs,
+            x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
         )
 
         # dim doesn't exist