Skip to content

Commit

Permalink
fix multiIndex issues
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar committed Jul 16, 2021
1 parent 18f7c01 commit 4afefaf
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 12 deletions.
24 changes: 17 additions & 7 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2987,17 +2987,22 @@ class max_speed
result = self
else:
result = self.copy()
if all(name is None for name in self.index.names):

if not drop:
if isinstance(self.index, cudf.MultiIndex):
names = tuple(
f"level_{i}" for i, _ in enumerate(self.index.names)
name if name is not None else f"level_{i}"
for i, name in enumerate(self.index.names)
)
else:
names = ("index",)
else:
names = self.index.names
if self.index.name is None:
if "index" in self._data.names:
names = ("level_0",)
else:
names = ("index",)
else:
names = (self.index.name,)

if not drop:
index_columns = self.index._data.columns
for name, index_column in zip(
reversed(names), reversed(index_columns)
Expand Down Expand Up @@ -7322,8 +7327,13 @@ def stack(self, level=-1, dropna=True):
repeated_index = self.index.repeat(self.shape[1])
name_index = Frame({0: self._column_names}).tile(self.shape[0])
new_index = list(repeated_index._columns) + [name_index._columns[0]]
if isinstance(self._index, cudf.MultiIndex):
index_names = self._index.names + [None]
else:
index_names = [None] * len(new_index)
new_index = cudf.core.multiindex.MultiIndex.from_frame(
DataFrame(dict(zip(range(0, len(new_index)), new_index)))
DataFrame(dict(zip(range(0, len(new_index)), new_index))),
names=index_names,
)

# Collect datatypes and cast columns as that type
Expand Down
4 changes: 4 additions & 0 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1380,6 +1380,10 @@ def _drop_na_rows(
)
)
result._copy_type_metadata(frame)
if self._index is not None:
result._index.name = self._index.name
if isinstance(self._index, cudf.MultiIndex):
result._index.names = self._index.names
return result

def _drop_na_columns(self, how="any", subset=None, thresh=None):
Expand Down
27 changes: 22 additions & 5 deletions python/cudf/cudf/tests/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,16 +103,33 @@ def test_df_stack(nulls, num_cols, num_rows, dtype):
gdf = cudf.from_pandas(pdf)

got = gdf.stack()

expect = pdf.stack()
if {None} == set(expect.index.names):
expect.rename_axis(
list(range(0, len(expect.index.names))), inplace=True
)

assert_eq(expect, got)


def test_df_stack_reset_index():
df = cudf.DataFrame(
{
"a": [1, 2, 3, 4],
"b": [10, 11, 12, 13],
"c": ["ab", "cd", None, "gh"],
}
)
df = df.set_index(["a", "b"])
pdf = df.to_pandas()

expected = pdf.stack()
actual = df.stack()

assert_eq(expected, actual)

expected = expected.reset_index()
actual = actual.reset_index()

assert_eq(expected, actual)


@pytest.mark.parametrize("num_rows", [1, 2, 10, 1000])
@pytest.mark.parametrize("num_cols", [1, 2, 10])
@pytest.mark.parametrize(
Expand Down

0 comments on commit 4afefaf

Please sign in to comment.