From 4afefaf0441cbb6de94788dcbdc6236f5a71c08a Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Thu, 15 Jul 2021 20:43:49 -0700 Subject: [PATCH] fix multiIndex issues --- python/cudf/cudf/core/dataframe.py | 24 ++++++++++++++++------- python/cudf/cudf/core/frame.py | 4 ++++ python/cudf/cudf/tests/test_reshape.py | 27 +++++++++++++++++++++----- 3 files changed, 43 insertions(+), 12 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index d6c7b40322f..e8d42b96b03 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2987,17 +2987,22 @@ class max_speed result = self else: result = self.copy() - if all(name is None for name in self.index.names): + + if not drop: if isinstance(self.index, cudf.MultiIndex): names = tuple( - f"level_{i}" for i, _ in enumerate(self.index.names) + name if name is not None else f"level_{i}" + for i, name in enumerate(self.index.names) ) else: - names = ("index",) - else: - names = self.index.names + if self.index.name is None: + if "index" in self._data.names: + names = ("level_0",) + else: + names = ("index",) + else: + names = (self.index.name,) - if not drop: index_columns = self.index._data.columns for name, index_column in zip( reversed(names), reversed(index_columns) @@ -7322,8 +7327,13 @@ def stack(self, level=-1, dropna=True): repeated_index = self.index.repeat(self.shape[1]) name_index = Frame({0: self._column_names}).tile(self.shape[0]) new_index = list(repeated_index._columns) + [name_index._columns[0]] + if isinstance(self._index, cudf.MultiIndex): + index_names = self._index.names + [None] + else: + index_names = [None] * len(new_index) new_index = cudf.core.multiindex.MultiIndex.from_frame( - DataFrame(dict(zip(range(0, len(new_index)), new_index))) + DataFrame(dict(zip(range(0, len(new_index)), new_index))), + names=index_names, ) # Collect datatypes and cast columns as that type diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 88a16b6fa69..3e8286c9f6b 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1380,6 +1380,10 @@ def _drop_na_rows( ) ) result._copy_type_metadata(frame) + if self._index is not None: + result._index.name = self._index.name + if isinstance(self._index, cudf.MultiIndex): + result._index.names = self._index.names return result def _drop_na_columns(self, how="any", subset=None, thresh=None): diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py index 0161df7696c..b8f975f233e 100644 --- a/python/cudf/cudf/tests/test_reshape.py +++ b/python/cudf/cudf/tests/test_reshape.py @@ -103,16 +103,33 @@ def test_df_stack(nulls, num_cols, num_rows, dtype): gdf = cudf.from_pandas(pdf) got = gdf.stack() - expect = pdf.stack() - if {None} == set(expect.index.names): - expect.rename_axis( - list(range(0, len(expect.index.names))), inplace=True - ) assert_eq(expect, got) +def test_df_stack_reset_index(): + df = cudf.DataFrame( + { + "a": [1, 2, 3, 4], + "b": [10, 11, 12, 13], + "c": ["ab", "cd", None, "gh"], + } + ) + df = df.set_index(["a", "b"]) + pdf = df.to_pandas() + + expected = pdf.stack() + actual = df.stack() + + assert_eq(expected, actual) + + expected = expected.reset_index() + actual = actual.reset_index() + + assert_eq(expected, actual) + + @pytest.mark.parametrize("num_rows", [1, 2, 10, 1000]) @pytest.mark.parametrize("num_cols", [1, 2, 10]) @pytest.mark.parametrize(