diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 28fb10ea334..9602cf8d473 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -2448,7 +2448,14 @@ def one_hot_encoding(self, cats, dtype="float64"): def encode(cat): if cat is None: - return self.isnull() + if self.dtype.kind == "f": + # Need to ignore `np.nan` values incase + # of a float column + return self.__class__( + libcudf.unary.is_null((self._column)) + ) + else: + return self.isnull() elif np.issubdtype(type(cat), np.floating) and np.isnan(cat): return self.__class__(libcudf.unary.is_nan(self._column)) else: diff --git a/python/cudf/cudf/tests/test_label_encode.py b/python/cudf/cudf/tests/test_label_encode.py index bb4921aac8c..29a787768f2 100644 --- a/python/cudf/cudf/tests/test_label_encode.py +++ b/python/cudf/cudf/tests/test_label_encode.py @@ -123,8 +123,3 @@ def test_label_encode_dtype(ncats, cat_dtype): cats = s.unique().astype(s.dtype) encoded_col = s.label_encoding(cats=cats) np.testing.assert_equal(encoded_col.dtype, cat_dtype) - - -if __name__ == "__main__": - test_label_encode() - test_label_encode_drop_one() diff --git a/python/cudf/cudf/tests/test_onehot.py b/python/cudf/cudf/tests/test_onehot.py index 2c87508c3f5..8b5b0609f31 100644 --- a/python/cudf/cudf/tests/test_onehot.py +++ b/python/cudf/cudf/tests/test_onehot.py @@ -189,5 +189,19 @@ def test_get_dummies_prefix_sep(prefix, prefix_sep): utils.assert_eq(encoded_expected, encoded_actual) -if __name__ == "__main__": - test_onehot_random() +def test_get_dummies_with_nan(): + df = cudf.DataFrame( + {"a": cudf.Series([1, 2, np.nan, None], nan_as_null=False)} + ) + expected = cudf.DataFrame( + { + "a_1.0": [1, 0, 0, 0], + "a_2.0": [0, 1, 0, 0], + "a_nan": [0, 0, 1, 0], + "a_null": [0, 0, 0, 1], + }, + dtype="uint8", + ) + actual = cudf.get_dummies(df, dummy_na=True, columns=["a"]) + + utils.assert_eq(expected, actual)