Skip to content

Commit

Permalink
handle having nan values in one_hot_encoding.
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar committed Dec 31, 2020
1 parent 28d18d6 commit 484074a
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 8 deletions.
9 changes: 8 additions & 1 deletion python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2448,7 +2448,14 @@ def one_hot_encoding(self, cats, dtype="float64"):

def encode(cat):
if cat is None:
return self.isnull()
if self.dtype.kind == "f":
# Need to ignore `np.nan` values incase
# of a float column
return self.__class__(
libcudf.unary.is_null((self._column))
)
else:
return self.isnull()
elif np.issubdtype(type(cat), np.floating) and np.isnan(cat):
return self.__class__(libcudf.unary.is_nan(self._column))
else:
Expand Down
5 changes: 0 additions & 5 deletions python/cudf/cudf/tests/test_label_encode.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,3 @@ def test_label_encode_dtype(ncats, cat_dtype):
cats = s.unique().astype(s.dtype)
encoded_col = s.label_encoding(cats=cats)
np.testing.assert_equal(encoded_col.dtype, cat_dtype)


if __name__ == "__main__":
test_label_encode()
test_label_encode_drop_one()
18 changes: 16 additions & 2 deletions python/cudf/cudf/tests/test_onehot.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,5 +189,19 @@ def test_get_dummies_prefix_sep(prefix, prefix_sep):
utils.assert_eq(encoded_expected, encoded_actual)


if __name__ == "__main__":
test_onehot_random()
def test_get_dummies_with_nan():
df = cudf.DataFrame(
{"a": cudf.Series([1, 2, np.nan, None], nan_as_null=False)}
)
expected = cudf.DataFrame(
{
"a_1.0": [1, 0, 0, 0],
"a_2.0": [0, 1, 0, 0],
"a_nan": [0, 0, 1, 0],
"a_null": [0, 0, 0, 1],
},
dtype="uint8",
)
actual = cudf.get_dummies(df, dummy_na=True, columns=["a"])

utils.assert_eq(expected, actual)

0 comments on commit 484074a

Please sign in to comment.