rapidsai · rapids-bot · Jan 7, 2021 · Dec 31, 2020
@@ -2448,7 +2448,14 @@ def one_hot_encoding(self, cats, dtype="float64"):
 
         def encode(cat):
             if cat is None:
-                return self.isnull()
+                if self.dtype.kind == "f":
+                    # Need to ignore `np.nan` values incase
+                    # of a float column
+                    return self.__class__(
+                        libcudf.unary.is_null((self._column))
+                    )
+                else:
+                    return self.isnull()
             elif np.issubdtype(type(cat), np.floating) and np.isnan(cat):
                 return self.__class__(libcudf.unary.is_nan(self._column))
             else:

@@ -123,8 +123,3 @@ def test_label_encode_dtype(ncats, cat_dtype):
     cats = s.unique().astype(s.dtype)
     encoded_col = s.label_encoding(cats=cats)
     np.testing.assert_equal(encoded_col.dtype, cat_dtype)
-
-
-if __name__ == "__main__":
-    test_label_encode()
-    test_label_encode_drop_one()
@@ -189,5 +189,19 @@ def test_get_dummies_prefix_sep(prefix, prefix_sep):
     utils.assert_eq(encoded_expected, encoded_actual)
 
 
-if __name__ == "__main__":
-    test_onehot_random()
+def test_get_dummies_with_nan():
+    df = cudf.DataFrame(
+        {"a": cudf.Series([1, 2, np.nan, None], nan_as_null=False)}
+    )
+    expected = cudf.DataFrame(
+        {
+            "a_1.0": [1, 0, 0, 0],
+            "a_2.0": [0, 1, 0, 0],
+            "a_nan": [0, 0, 1, 0],
+            "a_null": [0, 0, 0, 1],
+        },
+        dtype="uint8",
+    )
+    actual = cudf.get_dummies(df, dummy_na=True, columns=["a"])
+
+    utils.assert_eq(expected, actual)