Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Add Index.set_names api #6929

Merged
merged 8 commits into from
Dec 8, 2020
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

## New Features

- PR #6847 Add a cmake find module for cuFile in JNI code
- PR #6902 Implement `DataFrame.quantile` for `datetime` and `timedelta` data types
- PR #6929 Add `Index.set_names` api

## Improvements

Expand Down Expand Up @@ -50,7 +52,6 @@
- PR #6765 Cupy fallback for __array_function__ and __array_ufunc__ for cudf.Series
- PR #6817 Add support for scatter() on lists-of-struct columns
- PR #6805 Implement `cudf::detail::copy_if` for `decimal32` and `decimal64`
- PR #6847 Add a cmake find module for cuFile in JNI code
kkraus14 marked this conversation as resolved.
Show resolved Hide resolved
- PR #6726 Support selecting different hash functions in hash_partition
- PR #6619 Improve Dockerfile
- PR #6831 Added parquet chunked writing ability for list columns
Expand Down
80 changes: 80 additions & 0 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,86 @@ def _clean_nulls_from_index(self):
else:
return self

@property
def nlevels(self):
"""
Number of levels.
"""
return 1

def _set_names(self, names, inplace=False):
if inplace:
idx = self
else:
idx = self.copy(deep=False)
kkraus14 marked this conversation as resolved.
Show resolved Hide resolved

idx.names = names
if not inplace:
return idx

def set_names(self, names, level=None, inplace=False):
"""
Set Index or MultiIndex name.
Able to set new names partially and by level.

Parameters
----------
names : label or list of label
Name(s) to set.
level : int, label or list of int or label, optional
If the index is a MultiIndex, level(s) to set (None for all
levels). Otherwise level must be None.
inplace : bool, default False
Modifies the object directly, instead of creating a new Index or
MultiIndex.

Returns
-------
Index
The same type as the caller or None if inplace is True.

See Also
--------
cudf.core.index.Index.rename : Able to set new names without level.

Examples
--------
>>> import cudf
>>> idx = cudf.Index([1, 2, 3, 4])
>>> idx
Int64Index([1, 2, 3, 4], dtype='int64')
>>> idx.set_names('quarter')
Int64Index([1, 2, 3, 4], dtype='int64', name='quarter')
>>> idx = cudf.MultiIndex.from_product([['python', 'cobra'],
... [2018, 2019]])
>>> idx
MultiIndex(levels=[0 cobra
1 python
dtype: object, 0 2018
1 2019
dtype: int64],
codes= 0 1
0 1 0
1 1 1
2 0 0
3 0 1)
>>> idx.names
FrozenList([None, None])
>>> idx.set_names(['kind', 'year'], inplace=True)
>>> idx.names
FrozenList(['kind', 'year'])
>>> idx.set_names('species', level=0, inplace=True)
>>> idx.names
FrozenList(['species', 'year'])
"""
if level is not None:
raise ValueError("Level must be None for non-MultiIndex")

if not is_list_like(names):
names = [names]

return self._set_names(names=names, inplace=inplace)

def fillna(self, value, downcast=None):
"""
Fill null values with the specified value.
Expand Down
40 changes: 40 additions & 0 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,43 @@ def names(self, value):
assert len(value) == self.nlevels
self._names = pd.core.indexes.frozen.FrozenList(value)

def set_names(self, names, level=None, inplace=False):
if (
level is not None
and not cudf.utils.dtypes.is_list_like(level)
and cudf.utils.dtypes.is_list_like(names)
):
raise TypeError(
"Names must be a string when a single level is provided."
)

if (
not cudf.utils.dtypes.is_list_like(names)
and level is None
and self.nlevels > 1
):
raise TypeError("Must pass list-like as `names`.")

if not cudf.utils.dtypes.is_list_like(names):
names = [names]
if level is not None and not cudf.utils.dtypes.is_list_like(level):
level = [level]

if level is not None and len(names) != len(level):
raise ValueError("Length of names must match length of level.")
if level is None and len(names) != self.nlevels:
raise ValueError(
"Length of names must match number of levels in MultiIndex."
)

if self.nlevels > 1 and level is not None:
existing_names = list(self.names)
for i, l in enumerate(level):
existing_names[l] = names[i]
galipremsagar marked this conversation as resolved.
Show resolved Hide resolved
names = existing_names

return self._set_names(names=names, inplace=inplace)

@classmethod
def _from_table(cls, table, names=None):
df = cudf.DataFrame(table._data)
Expand Down Expand Up @@ -444,6 +481,9 @@ def codes(self):

@property
def nlevels(self):
"""
Integer number of levels in this MultiIndex.
"""
return self._source_data.shape[1]

@property
Expand Down
42 changes: 40 additions & 2 deletions python/cudf/cudf/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
RangeIndex,
as_index,
)
from cudf.utils.utils import search_range

from cudf.tests.utils import (
FLOAT_TYPES,
NUMERIC_TYPES,
Expand All @@ -32,6 +30,7 @@
assert_eq,
assert_exceptions_equal,
)
from cudf.utils.utils import search_range


def test_df_set_index_from_series():
Expand Down Expand Up @@ -1759,3 +1758,42 @@ def test_index_rangeindex_get_item_slices(rge, sl):
gridx = cudf.RangeIndex(*rge)

assert_eq(pridx[sl], gridx[sl])


@pytest.mark.parametrize(
"idx",
[
pd.Index([1, 2, 3]),
pd.Index(["abc", "def", "ghi"]),
pd.RangeIndex(0, 10, 1),
pd.Index([0.324, 0.234, 1.3], name="abc"),
],
)
@pytest.mark.parametrize("names", [None, "a", "new name", ["another name"]])
@pytest.mark.parametrize("inplace", [True, False])
def test_index_set_names(idx, names, inplace):
pi = idx.copy()
gi = cudf.from_pandas(idx)

expected = pi.set_names(names=names, inplace=inplace)
actual = gi.set_names(names=names, inplace=inplace)

if inplace:
expected, actual = pi, gi

assert_eq(expected, actual)


@pytest.mark.parametrize("idx", [pd.Index([1, 2, 3])])
@pytest.mark.parametrize("level", [1, [0]])
galipremsagar marked this conversation as resolved.
Show resolved Hide resolved
@pytest.mark.parametrize("names", [None, "a"])
def test_index_set_names_error(idx, level, names):
pi = idx.copy()
gi = cudf.from_pandas(idx)

assert_exceptions_equal(
lfunc=pi.set_names,
rfunc=gi.set_names,
lfunc_args_and_kwargs=([], {"names": names, "level": level}),
rfunc_args_and_kwargs=([], {"names": names, "level": level}),
)
73 changes: 73 additions & 0 deletions python/cudf/cudf/tests/test_multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -1340,3 +1340,76 @@ def test_multiIndex_argsort(pdi, ascending):
actual = gdi.argsort(ascending=ascending)

assert_eq(expected, actual)


@pytest.mark.parametrize(
"idx", [pd.MultiIndex.from_product([["python", "cobra"], [2018, 2019]])]
)
@pytest.mark.parametrize(
"names", [[None, None], ["a", None], ["new name", "another name"]]
)
@pytest.mark.parametrize("inplace", [True, False])
def test_multiindex_set_names(idx, names, inplace):
pi = idx.copy()
gi = cudf.from_pandas(idx)

expected = pi.set_names(names=names, inplace=inplace)
actual = gi.set_names(names=names, inplace=inplace)

if inplace:
expected, actual = pi, gi

assert_eq(expected, actual)


@pytest.mark.parametrize(
"idx",
[
pd.MultiIndex.from_product(
[["python", "cobra"], [2018, 2019], ["aab", "bcd"]]
),
pd.MultiIndex.from_product(
[["python", "cobra"], [2018, 2019], ["aab", "bcd"]],
names=["one", "two", "three"],
),
],
)
@pytest.mark.parametrize(
"level, names",
[
(0, "abc"),
(1, "xyz"),
([2, 1], ["a", "b"]),
(None, ["a", "b", "c"]),
(None, ["a", None, "c"]),
],
galipremsagar marked this conversation as resolved.
Show resolved Hide resolved
)
@pytest.mark.parametrize("inplace", [True, False])
def test_multiindex_set_names_level(idx, level, names, inplace):
pi = idx.copy()
gi = cudf.from_pandas(idx)

expected = pi.set_names(names=names, level=level, inplace=inplace)
actual = gi.set_names(names=names, level=level, inplace=inplace)

if inplace:
expected, actual = pi, gi

assert_eq(expected, actual)


@pytest.mark.parametrize(
"level, names", [(1, ["a"]), (None, "a"), ([1, 2], ["a"]), (None, ["a"])]
)
def test_multiindex_set_names_error(level, names):
pi = pd.MultiIndex.from_product(
[["python", "cobra"], [2018, 2019], ["aab", "bcd"]]
)
gi = cudf.from_pandas(pi)

assert_exceptions_equal(
lfunc=pi.set_names,
rfunc=gi.set_names,
lfunc_args_and_kwargs=([], {"names": names, "level": level}),
rfunc_args_and_kwargs=([], {"names": names, "level": level}),
)