From 155f10f2584467c4db2e02fab0d0230159140a2b Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Thu, 11 Feb 2021 19:03:22 -0800
Subject: [PATCH 01/35] fix issues with updating to latest pandas

---
 python/cudf/cudf/core/_compat.py            |  1 +
 python/cudf/cudf/core/column/categorical.py |  6 ++++
 python/cudf/cudf/core/column/column.py      |  4 +--
 python/cudf/cudf/core/column/datetime.py    | 33 ++++++++++++++-------
 python/cudf/cudf/core/column/timedelta.py   | 15 ++++------
 python/cudf/cudf/tests/test_dataframe.py    |  8 ++++-
 python/cudf/cudf/tests/test_datetime.py     | 10 +++++--
 python/cudf/cudf/tests/test_reductions.py   | 22 +++++++++++---
 python/cudf/cudf/tests/test_replace.py      | 29 ++++++++++++------
 python/cudf/cudf/tests/test_string.py       |  2 +-
 python/cudf/cudf/tests/test_timedelta.py    | 14 ++-------
 11 files changed, 93 insertions(+), 51 deletions(-)

diff --git a/python/cudf/cudf/core/_compat.py b/python/cudf/cudf/core/_compat.py
index e18a204eedb..de19acf9ba4 100644
--- a/python/cudf/cudf/core/_compat.py
+++ b/python/cudf/cudf/core/_compat.py
@@ -6,3 +6,4 @@
 PANDAS_VERSION = version.parse(pd.__version__)
 PANDAS_GE_100 = PANDAS_VERSION >= version.parse("1.0")
 PANDAS_GE_110 = PANDAS_VERSION >= version.parse("1.1")
+PANDAS_GE_120 = PANDAS_VERSION >= version.parse("1.2.0")
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 01c8dfb5f1b..a1d4a2668dd 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1204,6 +1204,12 @@ def fillna(
                         raise ValueError(err_msg) from err
             else:
                 fill_value = column.as_column(fill_value, nan_as_null=False)
+                if isinstance(fill_value, CategoricalColumn):
+                    if self.dtype != fill_value.dtype:
+                        raise ValueError(
+                            "Cannot set a Categorical with another, "
+                            "without identical categories"
+                        )
                 # TODO: only required if fill_value has a subset of the
                 # categories:
                 fill_value = fill_value.cat()._set_categories(
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index d615a7cfae4..2344b785dbf 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1236,7 +1236,7 @@ def sum(
     def product(
         self, skipna: bool = None, dtype: Dtype = None, min_count: int = 0
     ):
-        raise TypeError(f"cannot perform prod with type {self.dtype}")
+        raise TypeError(f"cannot perform product with type {self.dtype}")
 
     def mean(self, skipna: bool = None, dtype: Dtype = None):
         raise TypeError(f"cannot perform mean with type {self.dtype}")
@@ -1248,7 +1248,7 @@ def var(self, skipna: bool = None, ddof=1, dtype: Dtype = np.float64):
         raise TypeError(f"cannot perform var with type {self.dtype}")
 
     def kurtosis(self, skipna: bool = None):
-        raise TypeError(f"cannot perform kurt with type {self.dtype}")
+        raise TypeError(f"cannot perform kurtosis with type {self.dtype}")
 
     def skew(self, skipna: bool = None):
         raise TypeError(f"cannot perform skew with type {self.dtype}")
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 6029052c1d3..1d706687489 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -13,11 +13,17 @@
 import cudf
 from cudf import _lib as libcudf
 from cudf._typing import DatetimeLikeScalar, Dtype, DtypeObj, ScalarLike
+from cudf.core._compat import PANDAS_GE_120
 from cudf.core.buffer import Buffer
 from cudf.core.column import ColumnBase, column, string
 from cudf.utils.dtypes import is_scalar
 from cudf.utils.utils import _fillna_natwise
 
+if PANDAS_GE_120:
+    _guess_datetime_format = pd.core.tools.datetimes.guess_datetime_format
+else:
+    _guess_datetime_format = pd.core.tools.datetimes._guess_datetime_format
+
 # nanoseconds per time_unit
 _numpy_to_pandas_conversion = {
     "ns": 1,
@@ -235,6 +241,19 @@ def mean(self, skipna=None, dtype=np.float64) -> ScalarLike:
             unit=self.time_unit,
         )
 
+    def std(
+        self, skipna: bool = None, ddof: int = 1, dtype: Dtype = np.float64
+    ) -> pd.Timedelta:
+        return pd.Timedelta(
+            self.as_numerical.std(skipna=skipna, ddof=ddof, dtype=dtype)
+            * _numpy_to_pandas_conversion[self.time_unit],
+        )
+
+    def median(self, skipna: bool = None) -> pd.Timestamp:
+        return pd.Timestamp(
+            self.as_numerical.median(skipna=skipna), unit=self.time_unit
+        )
+
     def quantile(
         self, q: Union[float, Sequence[float]], interpolation: str, exact: bool
     ) -> ColumnBase:
@@ -375,7 +394,7 @@ def infer_format(element: str, **kwargs) -> str:
     """
     Infers datetime format from a string, also takes cares for `ms` and `ns`
     """
-    fmt = pd.core.tools.datetimes._guess_datetime_format(element, **kwargs)
+    fmt = _guess_datetime_format(element, **kwargs)
 
     if fmt is not None:
         return fmt
@@ -389,15 +408,11 @@ def infer_format(element: str, **kwargs) -> str:
     second_parts = re.split(r"(\D+)", element_parts[1], maxsplit=1)
     subsecond_fmt = ".%" + str(len(second_parts[0])) + "f"
 
-    first_part = pd.core.tools.datetimes._guess_datetime_format(
-        element_parts[0], **kwargs
-    )
+    first_part = _guess_datetime_format(element_parts[0], **kwargs)
     # For the case where first_part is '00:00:03'
     if first_part is None:
         tmp = "1970-01-01 " + element_parts[0]
-        first_part = pd.core.tools.datetimes._guess_datetime_format(
-            tmp, **kwargs
-        ).split(" ", 1)[1]
+        first_part = _guess_datetime_format(tmp, **kwargs).split(" ", 1)[1]
     if first_part is None:
         raise ValueError("Unable to infer the timestamp format from the data")
 
@@ -411,9 +426,7 @@ def infer_format(element: str, **kwargs) -> str:
 
         if len(second_part) > 1:
             # Only infer if second_parts is not an empty string.
-            second_part = pd.core.tools.datetimes._guess_datetime_format(
-                second_part, **kwargs
-            )
+            second_part = _guess_datetime_format(second_part, **kwargs)
     else:
         second_part = ""
 
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 75509df4ec6..17222f16673 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -380,15 +380,12 @@ def quantile(
     def sum(
         self, skipna: bool = None, dtype: Dtype = None, min_count=0
     ) -> pd.Timedelta:
-        if len(self) == 0:
-            return pd.Timedelta(None, unit=self.time_unit)
-        else:
-            return pd.Timedelta(
-                self.as_numerical.sum(
-                    skipna=skipna, dtype=dtype, min_count=min_count
-                ),
-                unit=self.time_unit,
-            )
+        return pd.Timedelta(
+            self.as_numerical.sum(
+                skipna=skipna, dtype=dtype, min_count=min_count
+            ),
+            unit=self.time_unit,
+        )
 
     def std(
         self, skipna: bool = None, ddof: int = 1, dtype: Dtype = np.float64
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index d8005911fcd..78105561729 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -72,7 +72,13 @@ def test_init_via_list_of_empty_tuples(rows):
     pdf = pd.DataFrame(data)
     gdf = gd.DataFrame(data)
 
-    assert_eq(pdf, gdf, check_like=True)
+    assert_eq(
+        pdf,
+        gdf,
+        check_like=True,
+        check_column_type=False,
+        check_index_type=False,
+    )
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index 044c8bd5954..cffe640d1f9 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -1182,7 +1182,7 @@ def test_datetime_stats(data, dtype, stat):
         assert_eq(expected, actual)
 
 
-@pytest.mark.parametrize("op", ["max", "min"])
+@pytest.mark.parametrize("op", ["max", "min", "std", "median"])
 @pytest.mark.parametrize(
     "data",
     [
@@ -1201,10 +1201,14 @@ def test_datetime_reductions(data, op, dtype):
     actual = getattr(sr, op)()
     expected = getattr(psr, op)()
 
-    if np.isnat(expected.to_numpy()) and np.isnat(actual):
+    if (
+        expected is pd.NaT
+        and actual is pd.NaT
+        or (np.isnat(expected.to_numpy()) and np.isnat(actual))
+    ):
         assert True
     else:
-        assert_eq(expected.to_numpy(), actual)
+        assert_eq(expected, actual)
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_reductions.py b/python/cudf/cudf/tests/test_reductions.py
index faf895b8f42..323f8c62892 100644
--- a/python/cudf/cudf/tests/test_reductions.py
+++ b/python/cudf/cudf/tests/test_reductions.py
@@ -3,6 +3,7 @@
 from __future__ import division, print_function
 
 import random
+import re
 from itertools import product
 
 import numpy as np
@@ -166,15 +167,20 @@ def test_date_minmax():
 
 
 @pytest.mark.parametrize(
-    "op",
-    ["sum", "product", "std", "var", "median", "kurt", "kurtosis", "skew"],
+    "op", ["sum", "product", "var", "kurt", "kurtosis", "skew"],
 )
 def test_datetime_unsupported_reductions(op):
     gsr = cudf.Series([1, 2, 3, None], dtype="datetime64[ns]")
     psr = gsr.to_pandas()
 
     utils.assert_exceptions_equal(
-        lfunc=getattr(psr, op), rfunc=getattr(gsr, op),
+        lfunc=getattr(psr, op),
+        rfunc=getattr(gsr, op),
+        expected_error_message=re.escape(
+            "cannot perform "
+            + ("kurtosis" if op == "kurt" else op)
+            + " with type datetime64[ns]"
+        ),
     )
 
 
@@ -183,7 +189,15 @@ def test_timedelta_unsupported_reductions(op):
     gsr = cudf.Series([1, 2, 3, None], dtype="timedelta64[ns]")
     psr = gsr.to_pandas()
 
-    utils.assert_exceptions_equal(getattr(psr, op), getattr(gsr, op))
+    utils.assert_exceptions_equal(
+        lfunc=getattr(psr, op),
+        rfunc=getattr(gsr, op),
+        expected_error_message=re.escape(
+            "cannot perform "
+            + ("kurtosis" if op == "kurt" else op)
+            + " with type timedelta64[ns]"
+        ),
+    )
 
 
 @pytest.mark.parametrize("op", ["sum", "product", "std", "var"])
diff --git a/python/cudf/cudf/tests/test_replace.py b/python/cudf/cudf/tests/test_replace.py
index f4713b19015..a7f4d1a527a 100644
--- a/python/cudf/cudf/tests/test_replace.py
+++ b/python/cudf/cudf/tests/test_replace.py
@@ -333,7 +333,7 @@ def test_fillna_method_numerical(data, container, data_dtype, method, inplace):
 
 
 @pytest.mark.parametrize(
-    "psr",
+    "psr_data",
     [
         pd.Series(["a", "b", "a", None, "c", None], dtype="category"),
         pd.Series(
@@ -373,8 +373,8 @@ def test_fillna_method_numerical(data, container, data_dtype, method, inplace):
     ],
 )
 @pytest.mark.parametrize("inplace", [True, False])
-def test_fillna_categorical(psr, fill_value, inplace):
-
+def test_fillna_categorical(psr_data, fill_value, inplace):
+    psr = psr_data.copy()
     gsr = Series.from_pandas(psr)
 
     if isinstance(fill_value, pd.Series):
@@ -382,14 +382,25 @@ def test_fillna_categorical(psr, fill_value, inplace):
     else:
         fill_value_cudf = fill_value
 
-    expected = psr.fillna(fill_value, inplace=inplace)
-    got = gsr.fillna(fill_value_cudf, inplace=inplace)
+    if (
+        isinstance(fill_value_cudf, cudf.Series)
+        and gsr.dtype != fill_value_cudf.dtype
+    ):
+        assert_exceptions_equal(
+            lfunc=psr.fillna,
+            rfunc=gsr.fillna,
+            lfunc_args_and_kwargs=([fill_value], {"inplace": inplace}),
+            rfunc_args_and_kwargs=([fill_value_cudf], {"inplace": inplace}),
+        )
+    else:
+        expected = psr.fillna(fill_value, inplace=inplace)
+        got = gsr.fillna(fill_value_cudf, inplace=inplace)
 
-    if inplace:
-        expected = psr
-        got = gsr
+        if inplace:
+            expected = psr
+            got = gsr
 
-    assert_eq(expected, got)
+        assert_eq(expected, got)
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index 080420c8f75..f2748f5053c 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -2843,7 +2843,7 @@ def test_string_product():
         lfunc=psr.product,
         rfunc=sr.product,
         expected_error_message=re.escape(
-            f"cannot perform prod with type {sr.dtype}"
+            f"cannot perform product with type {sr.dtype}"
         ),
     )
 
diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py
index 3b625a5ad85..0b886dcef9e 100644
--- a/python/cudf/cudf/tests/test_timedelta.py
+++ b/python/cudf/cudf/tests/test_timedelta.py
@@ -597,18 +597,8 @@ def test_timedelta_series_ops_with_cudf_scalars(data, cpu_scalar, dtype, op):
         [1000000, 200000, 3000000],
         [1000000, 200000, None],
         [],
-        pytest.param(
-            [None],
-            marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/35644"
-            ),
-        ),
-        pytest.param(
-            [None, None, None, None, None],
-            marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/35644"
-            ),
-        ),
+        [None],
+        [None, None, None, None, None],
         [12, 12, 22, 343, 4353534, 435342],
         np.array([10, 20, 30, None, 100]),
         cp.asarray([10, 20, 30, 100]),

From 454ecf5c2d5f6e24d01433617e93cd305eed78a1 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Fri, 12 Feb 2021 12:32:57 -0800
Subject: [PATCH 02/35] remove xfails and fix issues

---
 python/cudf/cudf/core/column/string.py   |   2 +-
 python/cudf/cudf/tests/test_dataframe.py |  20 +--
 python/cudf/cudf/tests/test_dropna.py    |  30 ++--
 python/cudf/cudf/tests/test_index.py     |  23 ---
 python/cudf/cudf/tests/test_json.py      |   9 +-
 python/cudf/cudf/tests/test_replace.py   |  12 +-
 python/cudf/cudf/tests/test_setitem.py   |   2 +-
 python/cudf/cudf/tests/test_string.py    | 177 ++++++++---------------
 python/cudf/cudf/tests/test_timedelta.py |  28 +---
 9 files changed, 89 insertions(+), 214 deletions(-)

diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 21f504ea684..aa5172a9a89 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -5189,7 +5189,7 @@ def _get_cols_list(parent_obj, others):
         ]
 
         return cols_list
-    elif others is not None:
+    elif others is not None and not isinstance(others, StringMethods):
         if (
             parent_index is not None
             and isinstance(others, cudf.Series)
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 78105561729..a93796e46f7 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -1860,6 +1860,9 @@ def test_dataframe_min_count_ops(data, ops, skipna, min_count):
     psr = pd.DataFrame(data)
     gsr = gd.DataFrame(data)
 
+    if psr.shape[0] * psr.shape[1] < min_count:
+        pytest.xfail("https://github.com/pandas-dev/pandas/issues/39738")
+
     assert_eq(
         getattr(psr, ops)(skipna=skipna, min_count=min_count),
         getattr(gsr, ops)(skipna=skipna, min_count=min_count),
@@ -4294,16 +4297,11 @@ def test_isin_dataframe(data, values):
             rfunc_args_and_kwargs=([values],),
         )
     else:
-        try:
-            expected = pdf.isin(values)
-        except ValueError as e:
-            if str(e) == "Lengths must match.":
-                # xref https://github.com/pandas-dev/pandas/issues/34256
-                pytest.xfail(
-                    "https://github.com/pandas-dev/pandas/issues/34256"
-                )
+        expected = pdf.isin(values)
+
         if isinstance(values, (pd.DataFrame, pd.Series)):
             values = gd.from_pandas(values)
+
         got = gdf.isin(values)
         assert_eq(got, expected)
 
@@ -4907,17 +4905,13 @@ def test_rowwise_ops_datetime_dtypes_2(data, op, skipna):
     ],
 )
 def test_rowwise_ops_datetime_dtypes_pdbug(data):
-    """
-    Pandas bug: https://github.com/pandas-dev/pandas/issues/36907
-    """
     pdf = pd.DataFrame(data)
     gdf = gd.from_pandas(pdf)
 
     expected = pdf.max(axis=1, skipna=False)
     got = gdf.max(axis=1, skipna=False)
 
-    with pytest.raises(AssertionError, match="numpy array are different"):
-        assert_eq(got, expected)
+    assert_eq(got, expected)
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_dropna.py b/python/cudf/cudf/tests/test_dropna.py
index 08378361188..ddd569acf0d 100644
--- a/python/cudf/cudf/tests/test_dropna.py
+++ b/python/cudf/cudf/tests/test_dropna.py
@@ -40,14 +40,12 @@ def test_dropna_series(data, nulls, inplace):
     if gsr.null_count == len(gsr):
         check_dtype = False
 
+    expected = psr.dropna()
+    actual = gsr.dropna()
+
     if inplace:
-        psr.dropna()
-        gsr.dropna()
         expected = psr
         actual = gsr
-    else:
-        expected = psr.dropna()
-        actual = gsr.dropna()
 
     assert_eq(expected, actual, check_dtype=check_dtype)
 
@@ -71,14 +69,12 @@ def test_dropna_dataframe(data, how, axis, inplace):
     pdf = pd.DataFrame(data)
     gdf = cudf.from_pandas(pdf)
 
+    expected = pdf.dropna(axis=axis, how=how, inplace=inplace)
+    actual = gdf.dropna(axis=axis, how=how, inplace=inplace)
+
     if inplace:
-        pdf.dropna(axis=axis, how=how, inplace=inplace)
-        gdf.dropna(axis=axis, how=how, inplace=inplace)
         expected = pdf
         actual = gdf
-    else:
-        expected = pdf.dropna(axis=axis, how=how, inplace=inplace)
-        actual = gdf.dropna(axis=axis, how=how, inplace=inplace)
 
     assert_eq(expected, actual)
 
@@ -192,18 +188,14 @@ def test_dropna_thresh_cols(thresh, subset, inplace):
     )
     gdf = cudf.from_pandas(pdf)
 
+    expected = pdf.dropna(
+        axis=1, thresh=thresh, subset=subset, inplace=inplace
+    )
+    actual = gdf.dropna(axis=1, thresh=thresh, subset=subset, inplace=inplace)
+
     if inplace:
-        pdf.dropna(axis=1, thresh=thresh, subset=subset, inplace=inplace)
-        gdf.dropna(axis=1, thresh=thresh, subset=subset, inplace=inplace)
         expected = pdf
         actual = gdf
-    else:
-        expected = pdf.dropna(
-            axis=1, thresh=thresh, subset=subset, inplace=inplace
-        )
-        actual = gdf.dropna(
-            axis=1, thresh=thresh, subset=subset, inplace=inplace
-        )
 
     assert_eq(
         expected, actual,
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index f806b0a912c..38d3bb0542d 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -798,14 +798,6 @@ def test_index_difference(data, other, sort):
     gd_data = cudf.core.index.as_index(data)
     gd_other = cudf.core.index.as_index(other)
 
-    if (
-        gd_data.dtype.kind == "f"
-        and gd_other.dtype.kind != "f"
-        or (gd_data.dtype.kind != "f" and gd_other.dtype.kind == "f")
-    ):
-        pytest.xfail(
-            "Bug in Pandas: https://github.com/pandas-dev/pandas/issues/35217"
-        )
     expected = pd_data.difference(pd_other, sort=sort)
     actual = gd_data.difference(gd_other, sort=sort)
     assert_eq(expected, actual)
@@ -864,12 +856,6 @@ def test_index_equals(data, other):
     gd_data = cudf.core.index.as_index(data)
     gd_other = cudf.core.index.as_index(other)
 
-    if (
-        gd_data.dtype.kind == "f" or gd_other.dtype.kind == "f"
-    ) and cudf.utils.dtypes.is_mixed_with_object_dtype(gd_data, gd_other):
-        pytest.xfail(
-            "Bug in Pandas: https://github.com/pandas-dev/pandas/issues/35217"
-        )
     expected = pd_data.equals(pd_other)
     actual = gd_data.equals(gd_other)
     assert_eq(expected, actual)
@@ -916,15 +902,6 @@ def test_index_categories_equal(data, other):
     gd_data = cudf.core.index.as_index(data).astype("category")
     gd_other = cudf.core.index.as_index(other)
 
-    if (
-        gd_data.dtype.kind == "f"
-        and gd_other.dtype.kind != "f"
-        or (gd_data.dtype.kind != "f" and gd_other.dtype.kind == "f")
-    ):
-        pytest.xfail(
-            "Bug in Pandas: https://github.com/pandas-dev/pandas/issues/35217"
-        )
-
     expected = pd_data.equals(pd_other)
     actual = gd_data.equals(gd_other)
     assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index e032309bdbd..fe365f4e120 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -133,14 +133,7 @@ def test_json_writer(tmpdir, pdf, gdf):
         assert os.path.exists(pdf_series_fname)
         assert os.path.exists(gdf_series_fname)
 
-        try:
-            # xref 'https://github.com/pandas-dev/pandas/pull/33373')
-            expect_series = pd.read_json(pdf_series_fname, typ="series")
-        except TypeError as e:
-            if str(e) == "<class 'bool'> is not convertible to datetime":
-                continue
-            else:
-                raise e
+        expect_series = pd.read_json(pdf_series_fname, typ="series")
         got_series = pd.read_json(gdf_series_fname, typ="series")
 
         assert_eq(expect_series, got_series)
diff --git a/python/cudf/cudf/tests/test_replace.py b/python/cudf/cudf/tests/test_replace.py
index a7f4d1a527a..e7baa4ee926 100644
--- a/python/cudf/cudf/tests/test_replace.py
+++ b/python/cudf/cudf/tests/test_replace.py
@@ -374,7 +374,7 @@ def test_fillna_method_numerical(data, container, data_dtype, method, inplace):
 )
 @pytest.mark.parametrize("inplace", [True, False])
 def test_fillna_categorical(psr_data, fill_value, inplace):
-    psr = psr_data.copy()
+    psr = psr_data.copy(deep=True)
     gsr = Series.from_pandas(psr)
 
     if isinstance(fill_value, pd.Series):
@@ -404,7 +404,7 @@ def test_fillna_categorical(psr_data, fill_value, inplace):
 
 
 @pytest.mark.parametrize(
-    "psr",
+    "psr_data",
     [
         pd.Series(pd.date_range("2010-01-01", "2020-01-10", freq="1y")),
         pd.Series(["2010-01-01", None, "2011-10-10"], dtype="datetime64[ns]"),
@@ -486,7 +486,8 @@ def test_fillna_categorical(psr_data, fill_value, inplace):
     ],
 )
 @pytest.mark.parametrize("inplace", [True, False])
-def test_fillna_datetime(psr, fill_value, inplace):
+def test_fillna_datetime(psr_data, fill_value, inplace):
+    psr = psr_data.copy(deep=True)
     gsr = cudf.from_pandas(psr)
 
     if isinstance(fill_value, pd.Series):
@@ -645,7 +646,7 @@ def test_fillna_dataframe(df, value, inplace):
 
 
 @pytest.mark.parametrize(
-    "psr",
+    "ps_data",
     [
         pd.Series(["a", "b", "c", "d"]),
         pd.Series([None] * 4, dtype="object"),
@@ -666,7 +667,8 @@ def test_fillna_dataframe(df, value, inplace):
     ],
 )
 @pytest.mark.parametrize("inplace", [True, False])
-def test_fillna_string(psr, fill_value, inplace):
+def test_fillna_string(ps_data, fill_value, inplace):
+    psr = ps_data.copy(deep=True)
     gsr = cudf.from_pandas(psr)
 
     if isinstance(fill_value, pd.Series):
diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py
index 1b628142939..abe641c1943 100644
--- a/python/cudf/cudf/tests/test_setitem.py
+++ b/python/cudf/cudf/tests/test_setitem.py
@@ -114,7 +114,7 @@ def test_series_set_item(psr, arg):
     ],
 )
 def test_setitem_dataframe_series_inplace(df):
-    pdf = df
+    pdf = df.copy(deep=True)
     gdf = cudf.from_pandas(pdf)
 
     pdf["a"].replace(1, 500, inplace=True)
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index f2748f5053c..997249e3140 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -340,18 +340,8 @@ def _cat_convert_seq_to_cudf(others):
         ("f", "g", "h", "i", "j"),
         pd.Series(["f", "g", "h", "i", "j"]),
         pd.Series(["AbC", "de", "FGHI", "j", "kLm"]),
-        pytest.param(
-            pd.Index(["f", "g", "h", "i", "j"]),
-            marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/33436"
-            ),
-        ),
-        pytest.param(
-            pd.Index(["AbC", "de", "FGHI", "j", "kLm"]),
-            marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/33436"
-            ),
-        ),
+        pd.Index(["f", "g", "h", "i", "j"]),
+        pd.Index(["AbC", "de", "FGHI", "j", "kLm"]),
         (
             np.array(["f", "g", "h", "i", "j"]),
             np.array(["f", "g", "h", "i", "j"]),
@@ -376,36 +366,26 @@ def _cat_convert_seq_to_cudf(others):
             pd.Series(["f", "g", "h", "i", "j"]),
             np.array(["f", "g", "h", "i", "j"]),
         ),
-        pytest.param(
-            (
-                pd.Series(["f", "g", "h", "i", "j"]),
-                np.array(["f", "a", "b", "f", "a"]),
-                pd.Series(["f", "g", "h", "i", "j"]),
-                np.array(["f", "a", "b", "f", "a"]),
-                np.array(["f", "a", "b", "f", "a"]),
-                pd.Index(["1", "2", "3", "4", "5"]),
-                np.array(["f", "a", "b", "f", "a"]),
-                pd.Index(["f", "g", "h", "i", "j"]),
-            ),
-            marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/33436"
-            ),
-        ),
-        pytest.param(
-            [
-                pd.Index(["f", "g", "h", "i", "j"]),
-                np.array(["f", "a", "b", "f", "a"]),
-                pd.Series(["f", "g", "h", "i", "j"]),
-                np.array(["f", "a", "b", "f", "a"]),
-                np.array(["f", "a", "b", "f", "a"]),
-                pd.Index(["f", "g", "h", "i", "j"]),
-                np.array(["f", "a", "b", "f", "a"]),
-                pd.Index(["f", "g", "h", "i", "j"]),
-            ],
-            marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/33436"
-            ),
+        (
+            pd.Series(["f", "g", "h", "i", "j"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Series(["f", "g", "h", "i", "j"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Index(["1", "2", "3", "4", "5"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Index(["f", "g", "h", "i", "j"]),
         ),
+        [
+            pd.Index(["f", "g", "h", "i", "j"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Series(["f", "g", "h", "i", "j"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Index(["f", "g", "h", "i", "j"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Index(["f", "g", "h", "i", "j"]),
+        ],
         [
             pd.Series(["hello", "world", "abc", "xyz", "pqr"]),
             pd.Series(["abc", "xyz", "hello", "pqr", "world"]),
@@ -507,18 +487,8 @@ def test_string_cat(ps_gs, others, sep, na_rep, index):
         ("f", "g", "h", "i", "j"),
         pd.Series(["f", "g", "h", "i", "j"]),
         pd.Series(["AbC", "de", "FGHI", "j", "kLm"]),
-        pytest.param(
-            pd.Index(["f", "g", "h", "i", "j"]),
-            marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/33436"
-            ),
-        ),
-        pytest.param(
-            pd.Index(["AbC", "de", "FGHI", "j", "kLm"]),
-            marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/33436"
-            ),
-        ),
+        pd.Index(["f", "g", "h", "i", "j"]),
+        pd.Index(["AbC", "de", "FGHI", "j", "kLm"]),
         (
             np.array(["f", "g", "h", "i", "j"]),
             np.array(["f", "g", "h", "i", "j"]),
@@ -531,36 +501,26 @@ def test_string_cat(ps_gs, others, sep, na_rep, index):
             pd.Series(["f", "g", "h", "i", "j"]),
             pd.Series(["f", "g", "h", "i", "j"]),
         ],
-        pytest.param(
-            (
-                pd.Series(["f", "g", "h", "i", "j"]),
-                np.array(["f", "a", "b", "f", "a"]),
-                pd.Series(["f", "g", "h", "i", "j"]),
-                np.array(["f", "a", "b", "f", "a"]),
-                np.array(["f", "a", "b", "f", "a"]),
-                pd.Index(["1", "2", "3", "4", "5"]),
-                np.array(["f", "a", "b", "f", "a"]),
-                pd.Index(["f", "g", "h", "i", "j"]),
-            ),
-            marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/33436"
-            ),
-        ),
-        pytest.param(
-            [
-                pd.Index(["f", "g", "h", "i", "j"]),
-                np.array(["f", "a", "b", "f", "a"]),
-                pd.Series(["f", "g", "h", "i", "j"]),
-                np.array(["f", "a", "b", "f", "a"]),
-                np.array(["f", "a", "b", "f", "a"]),
-                pd.Index(["f", "g", "h", "i", "j"]),
-                np.array(["f", "a", "b", "f", "a"]),
-                pd.Index(["f", "g", "h", "i", "j"]),
-            ],
-            marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/33436"
-            ),
+        (
+            pd.Series(["f", "g", "h", "i", "j"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Series(["f", "g", "h", "i", "j"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Index(["1", "2", "3", "4", "5"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Index(["f", "g", "h", "i", "j"]),
         ),
+        [
+            pd.Index(["f", "g", "h", "i", "j"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Series(["f", "g", "h", "i", "j"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Index(["f", "g", "h", "i", "j"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Index(["f", "g", "h", "i", "j"]),
+        ],
         [
             pd.Series(
                 ["hello", "world", "abc", "xyz", "pqr"],
@@ -611,16 +571,7 @@ def test_string_index_str_cat(data, others, sep, na_rep, name):
 
 
 @pytest.mark.parametrize(
-    "data",
-    [
-        pytest.param(
-            ["a", None, "c", None, "e"],
-            marks=pytest.mark.xfail(
-                reason="https://github.com/rapidsai/cudf/issues/5862"
-            ),
-        ),
-        ["a", "b", "c", "d", "a"],
-    ],
+    "data", [["a", None, "c", None, "e"], ["a", "b", "c", "d", "a"]],
 )
 @pytest.mark.parametrize(
     "others",
@@ -628,18 +579,8 @@ def test_string_index_str_cat(data, others, sep, na_rep, name):
         None,
         ["f", "g", "h", "i", "j"],
         pd.Series(["AbC", "de", "FGHI", "j", "kLm"]),
-        pytest.param(
-            pd.Index(["f", "g", "h", "i", "j"]),
-            marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/33436"
-            ),
-        ),
-        pytest.param(
-            pd.Index(["AbC", "de", "FGHI", "j", "kLm"]),
-            marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/33436"
-            ),
-        ),
+        pd.Index(["f", "g", "h", "i", "j"]),
+        pd.Index(["AbC", "de", "FGHI", "j", "kLm"]),
         [
             np.array(["f", "g", "h", "i", "j"]),
             np.array(["f", "g", "h", "i", "j"]),
@@ -732,16 +673,20 @@ def test_string_index_duplicate_str_cat(data, others, sep, na_rep, name):
     )
 
 
-@pytest.mark.xfail(raises=ValueError)
-@pytest.mark.parametrize("sep", [None, "", " ", "|", ",", "|||"])
-@pytest.mark.parametrize("na_rep", [None, "", "null", "a"])
-def test_string_cat_str(ps_gs, sep, na_rep):
-    ps, gs = ps_gs
-
-    got = gs.str.cat(gs.str, sep=sep, na_rep=na_rep)
-    expect = ps.str.cat(ps.str, sep=sep, na_rep=na_rep)
-
-    assert_eq(expect, got)
+def test_string_cat_str_error():
+    gs = cudf.Series(["a", "v", "s"])
+    # https://github.com/pandas-dev/pandas/issues/28277
+    # ability to pass StringMethods is being removed in future.
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            "others must be Series, Index, DataFrame, np.ndarrary "
+            "or list-like (either containing only strings or "
+            "containing only objects of type Series/Index/"
+            "np.ndarray[1-dim])"
+        ),
+    ):
+        gs.str.cat(gs.str)
 
 
 @pytest.mark.xfail(raises=(NotImplementedError, AttributeError))
@@ -847,10 +792,6 @@ def test_string_upper(ps_gs):
 @pytest.mark.parametrize("n", [-1, 0, 1, 3, 10])
 @pytest.mark.parametrize("expand", [True, False, None])
 def test_string_split(data, pat, n, expand):
-
-    if data in (["a b", " c ", "   d", "e   ", "f"],) and pat is None:
-        pytest.xfail("None pattern split algorithm not implemented yet")
-
     ps = pd.Series(data, dtype="str")
     gs = Series(data, dtype="str")
 
diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py
index 0b886dcef9e..85a4b6f70b6 100644
--- a/python/cudf/cudf/tests/test_timedelta.py
+++ b/python/cudf/cudf/tests/test_timedelta.py
@@ -434,19 +434,7 @@ def test_timedelta_dataframe_ops(df, op):
 )
 @pytest.mark.parametrize("dtype", utils.TIMEDELTA_TYPES)
 @pytest.mark.parametrize(
-    "op",
-    [
-        "add",
-        "sub",
-        "truediv",
-        "mod",
-        pytest.param(
-            "floordiv",
-            marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/35529"
-            ),
-        ),
-    ],
+    "op", ["add", "sub", "truediv", "mod", "floordiv"],
 )
 def test_timedelta_series_ops_with_scalars(data, other_scalars, dtype, op):
     gsr = cudf.Series(data=data, dtype=dtype)
@@ -534,19 +522,7 @@ def test_timedelta_series_ops_with_scalars(data, other_scalars, dtype, op):
 )
 @pytest.mark.parametrize("dtype", utils.TIMEDELTA_TYPES)
 @pytest.mark.parametrize(
-    "op",
-    [
-        "add",
-        "sub",
-        "truediv",
-        "mod",
-        pytest.param(
-            "floordiv",
-            marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/35529"
-            ),
-        ),
-    ],
+    "op", ["add", "sub", "truediv", "mod", "floordiv"],
 )
 def test_timedelta_series_ops_with_cudf_scalars(data, cpu_scalar, dtype, op):
     gpu_scalar = cudf.Scalar(cpu_scalar)

From 303c77da5e23032441a605654529af1793f77143 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Mon, 22 Feb 2021 11:03:02 -0800
Subject: [PATCH 03/35] fix isin and misc tests

---
 python/cudf/cudf/core/column/categorical.py | 46 +++++++++++++++++++++
 python/cudf/cudf/core/column/column.py      | 33 +++++++--------
 python/cudf/cudf/core/column/datetime.py    | 31 ++++++++++++++
 python/cudf/cudf/core/column/numerical.py   | 29 +++++++++++++
 python/cudf/cudf/core/column/timedelta.py   | 35 ++++++++++++++++
 python/cudf/cudf/tests/test_csv.py          |  1 -
 python/cudf/cudf/tests/test_dataframe.py    | 22 ++++------
 python/cudf/cudf/tests/test_datetime.py     |  3 --
 python/cudf/cudf/tests/test_index.py        |  2 +-
 python/cudf/cudf/tests/test_indexing.py     |  1 -
 python/cudf/cudf/tests/test_joining.py      |  4 --
 python/cudf/cudf/tests/test_numerical.py    | 10 +----
 python/cudf/cudf/tests/test_ops.py          |  1 +
 python/cudf/cudf/tests/test_repr.py         |  3 +-
 python/cudf/cudf/tests/test_reshape.py      |  5 +--
 python/cudf/cudf/tests/test_setitem.py      | 19 ++++++---
 python/cudf/cudf/tests/test_sorting.py      |  5 ++-
 python/cudf/cudf/tests/test_timedelta.py    |  3 ++
 18 files changed, 189 insertions(+), 64 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index a1d4a2668dd..7c4fa42b93a 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -9,6 +9,7 @@
     Dict,
     Mapping,
     Optional,
+    Sequence,
     Tuple,
     Union,
     cast,
@@ -867,6 +868,35 @@ def set_base_data(self, value):
         else:
             super().set_base_data(value)
 
+    def isin(self, values: Sequence) -> ColumnBase:
+        if cudf.utils.dtypes.is_scalar(values):
+            raise TypeError(
+                "only list-like objects are allowed to be passed "
+                f"to isin(), you passed a [{type(values).__name__}]"
+            )
+
+        lhs = self
+        rhs = None
+
+        try:
+            # We need to convert values to same type as self,
+            # hence passing dtype=self.dtype
+            rhs = cudf.core.column.as_column(values, dtype=self.dtype)
+
+            if not (rhs.null_count == len(rhs)) and lhs.dtype != rhs.dtype:
+                return cudf.core.column.full(len(self), False, dtype="bool")
+
+            # Short-circuit if rhs is all null.
+            if lhs.null_count == 0 and (rhs.null_count == len(rhs)):
+                return cudf.core.column.full(len(self), False, dtype="bool")
+        except ValueError:
+            # pandas functionally returns all False when cleansing via
+            # typecasting fails
+            return cudf.core.column.full(len(self), False, dtype="bool")
+
+        res = lhs._obtain_isin_result(rhs)
+        return res
+
     def set_base_mask(self, value: Optional[Buffer]):
         super().set_base_mask(value)
         self._codes = None
@@ -936,6 +966,22 @@ def unary_operator(self, unaryop: str):
         )
 
     def __setitem__(self, key, value):
+        if cudf.utils.dtypes.is_scalar(value):
+            new_values = [value]
+        else:
+            new_values = value
+
+        to_add_categories = cudf.Index(new_values).difference(self.categories)
+
+        if (
+            len(to_add_categories)
+            and not to_add_categories.isna()._values.all()
+        ):
+            raise ValueError(
+                "Cannot setitem on a Categorical with a new "
+                "category, set the categories first"
+            )
+
         if cudf.utils.dtypes.is_scalar(value):
             value = self._encode(value) if value is not None else value
         else:
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 2344b785dbf..7daf8143338 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -855,9 +855,14 @@ def isin(self, values: Sequence) -> ColumnBase:
         rhs = None
 
         try:
-            # We need to convert values to same type as self,
-            # hence passing dtype=self.dtype
-            rhs = as_column(values, dtype=self.dtype)
+            rhs = as_column(values, nan_as_null=False)
+            if lhs.null_count == len(lhs):
+                lhs = lhs.astype(rhs.dtype)
+            elif rhs.null_count == len(rhs):
+                rhs = rhs.astype(lhs.dtype)
+
+            if not (rhs.null_count == len(rhs)) and lhs.dtype != rhs.dtype:
+                return full(len(self), False, dtype="bool")
 
             # Short-circuit if rhs is all null.
             if lhs.null_count == 0 and (rhs.null_count == len(rhs)):
@@ -867,28 +872,18 @@ def isin(self, values: Sequence) -> ColumnBase:
             # typecasting fails
             return full(len(self), False, dtype="bool")
 
-        # If categorical, combine categories first
-        if is_categorical_dtype(lhs):
-            lhs_cats = lhs.cat().categories._values
-            rhs_cats = rhs.cat().categories._values
-
-            if not np.issubdtype(rhs_cats.dtype, lhs_cats.dtype):
-                # If they're not the same dtype, short-circuit if the values
-                # list doesn't have any nulls. If it does have nulls, make
-                # the values list a Categorical with a single null
-                if not rhs.has_nulls:
-                    return full(len(self), False, dtype="bool")
-                rhs = as_column(pd.Categorical.from_codes([-1], categories=[]))
-                rhs = rhs.cat().set_categories(lhs_cats).astype(self.dtype)
-
-        ldf = cudf.DataFrame({"x": lhs, "orig_order": arange(len(lhs))})
+        res = lhs._obtain_isin_result(rhs)
+
+        return res
+
+    def _obtain_isin_result(self, rhs):
+        ldf = cudf.DataFrame({"x": self, "orig_order": arange(len(self))})
         rdf = cudf.DataFrame(
             {"x": rhs, "bool": full(len(rhs), True, dtype="bool")}
         )
         res = ldf.merge(rdf, on="x", how="left").sort_values(by="orig_order")
         res = res.drop_duplicates(subset="orig_order", ignore_index=True)
         res = res._data["bool"].fillna(False)
-
         return res
 
     def as_mask(self) -> Buffer:
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 1d706687489..c72f8f641c4 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -335,6 +335,37 @@ def find_last_value(self, value: ScalarLike, closest: bool = False) -> int:
     def is_unique(self) -> bool:
         return self.as_numerical.is_unique
 
+    def isin(self, values: Sequence) -> ColumnBase:
+        if cudf.utils.dtypes.is_scalar(values):
+            raise TypeError(
+                "only list-like objects are allowed to be passed "
+                f"to isin(), you passed a [{type(values).__name__}]"
+            )
+
+        lhs = self
+        rhs = None
+
+        try:
+            rhs = cudf.core.column.as_column(values)
+
+            if rhs.dtype.kind in {"f", "i", "u"}:
+                return cudf.core.column.full(len(self), False, dtype="bool")
+            rhs = rhs.astype(self.dtype)
+
+            if not (rhs.null_count == len(rhs)) and lhs.dtype != rhs.dtype:
+                return cudf.core.column.full(len(self), False, dtype="bool")
+
+            # Short-circuit if rhs is all null.
+            if lhs.null_count == 0 and (rhs.null_count == len(rhs)):
+                return cudf.core.column.full(len(self), False, dtype="bool")
+        except ValueError:
+            # pandas functionally returns all False when cleansing via
+            # typecasting fails
+            return cudf.core.column.full(len(self), False, dtype="bool")
+
+        res = lhs._obtain_isin_result(rhs)
+        return res
+
     def can_cast_safely(self, to_dtype: Dtype) -> bool:
         if np.issubdtype(to_dtype, np.datetime64):
 
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 0a8d93c913b..3ff03147583 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -248,6 +248,35 @@ def std(
     ) -> float:
         return self.reduce("std", skipna=skipna, dtype=dtype, ddof=ddof)
 
+    def isin(self, values: Sequence) -> ColumnBase:
+        if cudf.utils.dtypes.is_scalar(values):
+            raise TypeError(
+                "only list-like objects are allowed to be passed "
+                f"to isin(), you passed a [{type(values).__name__}]"
+            )
+
+        lhs = self
+        rhs = None
+
+        try:
+            rhs = as_column(values, nan_as_null=False)
+            if isinstance(rhs, NumericalColumn):
+                rhs = rhs.astype(dtype=self.dtype)
+
+            if not (rhs.null_count == len(rhs)) and lhs.dtype != rhs.dtype:
+                return cudf.core.column.full(len(self), False, dtype="bool")
+
+            # Short-circuit if rhs is all null.
+            if lhs.null_count == 0 and (rhs.null_count == len(rhs)):
+                return cudf.core.column.full(len(self), False, dtype="bool")
+        except ValueError:
+            # pandas functionally returns all False when cleansing via
+            # typecasting fails
+            return cudf.core.column.full(len(self), False, dtype="bool")
+
+        res = lhs._obtain_isin_result(rhs)
+        return res
+
     def sum_of_squares(self, dtype: Dtype = None) -> float:
         return libcudf.reduce.reduce("sum_of_squares", self, dtype=dtype)
 
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 17222f16673..7107b66d26c 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -367,6 +367,41 @@ def median(self, skipna: bool = None) -> pd.Timedelta:
             self.as_numerical.median(skipna=skipna), unit=self.time_unit
         )
 
+    def isin(self, values: Sequence) -> ColumnBase:
+        if cudf.utils.dtypes.is_scalar(values):
+            raise TypeError(
+                "only list-like objects are allowed to be passed "
+                f"to isin(), you passed a [{type(values).__name__}]"
+            )
+
+        lhs = self
+        rhs = None
+
+        try:
+            # We need to convert values to same type as self,
+            # hence passing dtype=self.dtype
+            rhs = cudf.core.column.as_column(values)
+
+            if rhs.dtype.kind in {"f", "i", "u"}:
+                return cudf.core.column.full(len(self), False, dtype="bool")
+
+            rhs = rhs.astype(self.dtype)
+
+            if not (rhs.null_count == len(rhs)) and lhs.dtype != rhs.dtype:
+                return cudf.core.column.full(len(self), False, dtype="bool")
+
+            # Short-circuit if rhs is all null.
+            if lhs.null_count == 0 and (rhs.null_count == len(rhs)):
+                return cudf.core.column.full(len(self), False, dtype="bool")
+        except ValueError:
+            # pandas functionally returns all False when cleansing via
+            # typecasting fails
+            return cudf.core.column.full(len(self), False, dtype="bool")
+
+        res = lhs._obtain_isin_result(rhs)
+
+        return res
+
     def quantile(
         self, q: Union[float, Sequence[float]], interpolation: str, exact: bool
     ) -> "column.ColumnBase":
diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py
index 23a950bb72d..31d502e4a23 100644
--- a/python/cudf/cudf/tests/test_csv.py
+++ b/python/cudf/cudf/tests/test_csv.py
@@ -1838,7 +1838,6 @@ def test_csv_reader_timedetla_dtypes(dtype):
     assert_eq(expected, actual)
 
 
-@pytest.mark.xfail(reason="https://github.com/rapidsai/cudf/issues/6719")
 @pytest.mark.parametrize(
     "dtype", sorted(list(cudf.utils.dtypes.DATETIME_TYPES))
 )
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index a93796e46f7..96e77bd6823 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -1502,7 +1502,6 @@ def test_dataframe_cupy_array_wrong_index():
         gd.DataFrame(d_ary, index="a")
 
 
-@pytest.mark.xfail(reason="constructor does not coerce index inputs")
 def test_index_in_dataframe_constructor():
     a = pd.DataFrame({"x": [1, 2, 3]}, index=[4.0, 5.0, 6.0])
     b = gd.DataFrame({"x": [1, 2, 3]}, index=[4.0, 5.0, 6.0])
@@ -3990,10 +3989,12 @@ def test_value_counts():
 def test_isin_numeric(data, values):
     index = np.random.randint(0, 100, len(data))
     psr = pd.Series(data, index=index)
-    gsr = gd.Series.from_pandas(psr)
+    gsr = gd.Series.from_pandas(psr, nan_as_null=False)
 
-    got = gsr.isin(values)
     expected = psr.isin(values)
+    print(expected)
+    got = gsr.isin(values)
+
     assert_eq(got, expected)
 
 
@@ -4066,15 +4067,7 @@ def test_isin_datetime(data, values):
         ["this", "is"],
         [None, None, None],
         ["12", "14", "19"],
-        pytest.param(
-            [12, 14, 19],
-            marks=[
-                pytest.mark.xfail(
-                    reason="pandas's failure here seems like a bug "
-                    "given the reverse succeeds"
-                )
-            ],
-        ),
+        [12, 14, 19],
         ["is", "this", "is", "this", "is"],
     ],
 )
@@ -8094,9 +8087,8 @@ def test_agg_for_dataframes(data, aggs):
     pdf = pd.DataFrame(data)
     gdf = gd.DataFrame(data)
 
-    expect = pdf.agg(aggs)
-    got = gdf.agg(aggs)
-
+    expect = pdf.agg(aggs).sort_index()
+    got = gdf.agg(aggs).sort_index()
     assert_eq(expect, got, check_dtype=False)
 
 
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index cffe640d1f9..7e545022eb8 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -413,9 +413,6 @@ def test_datetime_to_arrow(dtype):
 def test_datetime_unique(data, nulls):
     psr = pd.Series(data)
 
-    print(data)
-    print(nulls)
-
     if len(data) > 0:
         if nulls == "some":
             p = np.random.randint(0, len(data), 2)
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index 38d3bb0542d..9e401316e19 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -565,7 +565,7 @@ def test_empty_df_head_tail_index(n):
         (
             pd.CategoricalIndex(["a", "b", "c", "a", "b", "c"]),
             pd.CategoricalIndex(["a", "b", "c", "a", "b", "c"]) != "a",
-            "h",
+            "a",
             None,
         ),
         (
diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py
index 5229881df25..b4558cec01f 100644
--- a/python/cudf/cudf/tests/test_indexing.py
+++ b/python/cudf/cudf/tests/test_indexing.py
@@ -973,7 +973,6 @@ def test_series_setitem_datetime():
     assert_eq(psr, gsr)
 
 
-@pytest.mark.xfail(reason="Pandas will coerce to object datatype here")
 def test_series_setitem_datetime_coerced():
     psr = pd.Series(["2001", "2002", "2003"], dtype="datetime64[ns]")
     gsr = cudf.from_pandas(psr)
diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py
index d99897584ec..d7735f9029f 100644
--- a/python/cudf/cudf/tests/test_joining.py
+++ b/python/cudf/cudf/tests/test_joining.py
@@ -540,10 +540,6 @@ def test_empty_joins(how, left_empty, right_empty):
     assert len(expected) == len(result)
 
 
-@pytest.mark.xfail(
-    reason="left_on/right_on produces undefined results with 0"
-    "index and is disabled"
-)
 def test_merge_left_index_zero():
     left = pd.DataFrame({"x": [1, 2, 3, 4, 5, 6]}, index=[0, 1, 2, 3, 4, 5])
     right = pd.DataFrame(
diff --git a/python/cudf/cudf/tests/test_numerical.py b/python/cudf/cudf/tests/test_numerical.py
index a2afa9f0a97..17f73121b1c 100644
--- a/python/cudf/cudf/tests/test_numerical.py
+++ b/python/cudf/cudf/tests/test_numerical.py
@@ -87,9 +87,6 @@ def test_can_cast_safely_mixed_kind():
     assert not data.can_cast_safely(to_dtype)
 
 
-@pytest.mark.xfail(
-    reason="cuDF null <-> pd.NA compatibility not yet supported"
-)
 def test_to_pandas_nullable_integer():
     gsr_not_null = Series([1, 2, 3])
     gsr_has_null = Series([1, 2, None])
@@ -98,12 +95,9 @@ def test_to_pandas_nullable_integer():
     psr_has_null = pd.Series([1, 2, None], dtype="Int64")
 
     assert_eq(gsr_not_null.to_pandas(), psr_not_null)
-    assert_eq(gsr_has_null.to_pandas(), psr_has_null)
+    assert_eq(gsr_has_null.to_pandas(nullable=True), psr_has_null)
 
 
-@pytest.mark.xfail(
-    reason="cuDF null <-> pd.NA compatibility not yet supported"
-)
 def test_to_pandas_nullable_bool():
     gsr_not_null = Series([True, False, True])
     gsr_has_null = Series([True, False, None])
@@ -112,7 +106,7 @@ def test_to_pandas_nullable_bool():
     psr_has_null = pd.Series([True, False, None], dtype="boolean")
 
     assert_eq(gsr_not_null.to_pandas(), psr_not_null)
-    assert_eq(gsr_has_null.to_pandas(), psr_has_null)
+    assert_eq(gsr_has_null.to_pandas(nullable=True), psr_has_null)
 
 
 def test_can_cast_safely_has_nulls():
diff --git a/python/cudf/cudf/tests/test_ops.py b/python/cudf/cudf/tests/test_ops.py
index 888380bc559..981b0e833a0 100644
--- a/python/cudf/cudf/tests/test_ops.py
+++ b/python/cudf/cudf/tests/test_ops.py
@@ -27,6 +27,7 @@ def test_sqrt_integer():
 def math_op_test(
     dtype, fn, nelem=128, test_df=False, positive_only=False, check_dtype=True
 ):
+    np.random.seed(0)
     randvals = gen_rand(dtype, nelem, positive_only=positive_only)
     h_series = pd.Series(randvals.astype(dtype))
     d_series = cudf.Series(h_series)
diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py
index 8c09dc91253..16c24d5afaa 100644
--- a/python/cudf/cudf/tests/test_repr.py
+++ b/python/cudf/cudf/tests/test_repr.py
@@ -1169,8 +1169,7 @@ def test_timedelta_index_repr(index, expected_repr):
 def test_mulitIndex_repr(pmi, max_seq_items):
     pd.set_option("display.max_seq_items", max_seq_items)
     gmi = cudf.from_pandas(pmi)
-    print(gmi)
-    print(pmi)
+
     assert gmi.__repr__() == pmi.__repr__()
     pd.reset_option("display.max_seq_items")
 
diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py
index 315762c931f..030cbe7977d 100644
--- a/python/cudf/cudf/tests/test_reshape.py
+++ b/python/cudf/cudf/tests/test_reshape.py
@@ -71,9 +71,7 @@ def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype):
 @pytest.mark.parametrize("num_cols", [1, 2, 10])
 @pytest.mark.parametrize("num_rows", [1, 2, 1000])
 @pytest.mark.parametrize(
-    "dtype",
-    list(NUMERIC_TYPES + DATETIME_TYPES)
-    + [pytest.param("str", marks=pytest.mark.xfail())],
+    "dtype", list(NUMERIC_TYPES + DATETIME_TYPES) + ["str"],
 )
 @pytest.mark.parametrize("nulls", ["none", "some"])
 def test_df_stack(nulls, num_cols, num_rows, dtype):
@@ -102,7 +100,6 @@ def test_df_stack(nulls, num_cols, num_rows, dtype):
         )
 
     assert_eq(expect, got)
-    pass
 
 
 @pytest.mark.parametrize("num_rows", [1, 2, 10, 1000])
diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py
index abe641c1943..ba0509b16d4 100644
--- a/python/cudf/cudf/tests/test_setitem.py
+++ b/python/cudf/cudf/tests/test_setitem.py
@@ -5,7 +5,7 @@
 import pytest
 
 import cudf
-from cudf.tests.utils import assert_eq
+from cudf.tests.utils import assert_eq, assert_exceptions_equal
 
 
 @pytest.mark.parametrize("df", [pd.DataFrame({"a": [1, 2, 3]})])
@@ -19,10 +19,7 @@ def test_dataframe_setitem_bool_mask_scaler(df, arg, value):
     assert_eq(df, gdf)
 
 
-# pandas incorrectly adds nulls with dataframes
-# but works fine with scalers
-@pytest.mark.xfail()
-def test_dataframe_setitem_scaler_bool_inconsistency():
+def test_dataframe_setitem_scaler_bool():
     df = pd.DataFrame({"a": [1, 2, 3]})
     df[[True, False, True]] = pd.DataFrame({"a": [-1, -2]})
 
@@ -184,3 +181,15 @@ def test_column_set_equal_length_object_by_mask():
     data[bool_col] = replace_data
 
     assert_eq(cudf.Series(data), cudf.Series([100, 0, 300, 1, 500]))
+
+
+def test_categorical_setitem_invalid():
+    ps = pd.Series([1, 2, 3], dtype="category")
+    gs = cudf.Series([1, 2, 3], dtype="category")
+
+    assert_exceptions_equal(
+        lfunc=ps.__setitem__,
+        rfunc=gs.__setitem__,
+        lfunc_args_and_kwargs=([0, 5], {}),
+        rfunc_args_and_kwargs=([0, 5], {}),
+    )
diff --git a/python/cudf/cudf/tests/test_sorting.py b/python/cudf/cudf/tests/test_sorting.py
index 8bab802d89c..7c4cfee3f75 100644
--- a/python/cudf/cudf/tests/test_sorting.py
+++ b/python/cudf/cudf/tests/test_sorting.py
@@ -140,7 +140,10 @@ def test_series_nsmallest(data, n):
     sr = Series(data)
     psr = pd.Series(data)
     assert_eq(sr.nsmallest(n), psr.nsmallest(n))
-    assert_eq(sr.nsmallest(n, keep="last"), psr.nsmallest(n, keep="last"))
+    assert_eq(
+        sr.nsmallest(n, keep="last").sort_index(),
+        psr.nsmallest(n, keep="last").sort_index(),
+    )
 
     assert_exceptions_equal(
         lfunc=psr.nsmallest,
diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py
index 85a4b6f70b6..df3d04d36ba 100644
--- a/python/cudf/cudf/tests/test_timedelta.py
+++ b/python/cudf/cudf/tests/test_timedelta.py
@@ -420,6 +420,7 @@ def test_timedelta_dataframe_ops(df, op):
         np.timedelta64(4, "s"),
         np.timedelta64(456, "D"),
         np.timedelta64(46, "h"),
+        # TODO: PREM FIX THIS
         pytest.param(
             np.timedelta64("nat"),
             marks=pytest.mark.xfail(
@@ -508,6 +509,7 @@ def test_timedelta_series_ops_with_scalars(data, other_scalars, dtype, op):
         datetime.timedelta(seconds=768),
         datetime.timedelta(microseconds=7),
         np.timedelta64(4, "s"),
+        # TODO: PREM Fix this
         pytest.param(
             np.timedelta64("nat"),
             marks=pytest.mark.xfail(
@@ -772,6 +774,7 @@ def test_timedelta_datetime_index_ops_misc(
         "add",
         "sub",
         "truediv",
+        # TODO: PREM FIX THIS
         pytest.param(
             "floordiv",
             marks=pytest.mark.xfail(

From 18d1fb39d3e44ccaa44dcfd5472a0dac66a306e8 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Mon, 22 Feb 2021 12:32:59 -0800
Subject: [PATCH 04/35] remove redundant code

---
 python/cudf/cudf/core/column/categorical.py | 33 +++---------------
 python/cudf/cudf/core/column/column.py      | 36 +++++++++++++-------
 python/cudf/cudf/core/column/datetime.py    | 10 ++----
 python/cudf/cudf/core/column/numerical.py   | 37 ++++++---------------
 python/cudf/cudf/core/column/timedelta.py   | 12 ++-----
 5 files changed, 46 insertions(+), 82 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 7c4fa42b93a..98c0f1dcbbd 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -9,7 +9,6 @@
     Dict,
     Mapping,
     Optional,
-    Sequence,
     Tuple,
     Union,
     cast,
@@ -868,34 +867,12 @@ def set_base_data(self, value):
         else:
             super().set_base_data(value)
 
-    def isin(self, values: Sequence) -> ColumnBase:
-        if cudf.utils.dtypes.is_scalar(values):
-            raise TypeError(
-                "only list-like objects are allowed to be passed "
-                f"to isin(), you passed a [{type(values).__name__}]"
-            )
-
+    def _process_values_for_isin(self, values):
         lhs = self
-        rhs = None
-
-        try:
-            # We need to convert values to same type as self,
-            # hence passing dtype=self.dtype
-            rhs = cudf.core.column.as_column(values, dtype=self.dtype)
-
-            if not (rhs.null_count == len(rhs)) and lhs.dtype != rhs.dtype:
-                return cudf.core.column.full(len(self), False, dtype="bool")
-
-            # Short-circuit if rhs is all null.
-            if lhs.null_count == 0 and (rhs.null_count == len(rhs)):
-                return cudf.core.column.full(len(self), False, dtype="bool")
-        except ValueError:
-            # pandas functionally returns all False when cleansing via
-            # typecasting fails
-            return cudf.core.column.full(len(self), False, dtype="bool")
-
-        res = lhs._obtain_isin_result(rhs)
-        return res
+        # We need to convert values to same type as self,
+        # hence passing dtype=self.dtype
+        rhs = cudf.core.column.as_column(values, dtype=self.dtype)
+        return lhs, rhs
 
     def set_base_mask(self, value: Optional[Buffer]):
         super().set_base_mask(value)
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 7daf8143338..a8aef7a4d35 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -855,18 +855,10 @@ def isin(self, values: Sequence) -> ColumnBase:
         rhs = None
 
         try:
-            rhs = as_column(values, nan_as_null=False)
-            if lhs.null_count == len(lhs):
-                lhs = lhs.astype(rhs.dtype)
-            elif rhs.null_count == len(rhs):
-                rhs = rhs.astype(lhs.dtype)
-
-            if not (rhs.null_count == len(rhs)) and lhs.dtype != rhs.dtype:
-                return full(len(self), False, dtype="bool")
-
-            # Short-circuit if rhs is all null.
-            if lhs.null_count == 0 and (rhs.null_count == len(rhs)):
-                return full(len(self), False, dtype="bool")
+            lhs, rhs = self._process_values_for_isin(values)
+            res = lhs._isin_earlystop(rhs)
+            if res is not None:
+                return res
         except ValueError:
             # pandas functionally returns all False when cleansing via
             # typecasting fails
@@ -876,6 +868,26 @@ def isin(self, values: Sequence) -> ColumnBase:
 
         return res
 
+    def _process_values_for_isin(self, values):
+        lhs = self
+        rhs = as_column(values, nan_as_null=False)
+        if lhs.null_count == len(lhs):
+            lhs = lhs.astype(rhs.dtype)
+        elif rhs.null_count == len(rhs):
+            rhs = rhs.astype(lhs.dtype)
+        return lhs, rhs
+
+    def _isin_earlystop(self, rhs):
+        if self.dtype != rhs.dtype:
+            if self.null_count and rhs.null_count:
+                return self.isna()
+            else:
+                return cudf.core.column.full(len(self), False, dtype="bool")
+        elif self.null_count == 0 and (rhs.null_count == len(rhs)):
+            return cudf.core.column.full(len(self), False, dtype="bool")
+        else:
+            return None
+
     def _obtain_isin_result(self, rhs):
         ldf = cudf.DataFrame({"x": self, "orig_order": arange(len(self))})
         rdf = cudf.DataFrame(
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index c72f8f641c4..865094d24b1 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -351,13 +351,9 @@ def isin(self, values: Sequence) -> ColumnBase:
             if rhs.dtype.kind in {"f", "i", "u"}:
                 return cudf.core.column.full(len(self), False, dtype="bool")
             rhs = rhs.astype(self.dtype)
-
-            if not (rhs.null_count == len(rhs)) and lhs.dtype != rhs.dtype:
-                return cudf.core.column.full(len(self), False, dtype="bool")
-
-            # Short-circuit if rhs is all null.
-            if lhs.null_count == 0 and (rhs.null_count == len(rhs)):
-                return cudf.core.column.full(len(self), False, dtype="bool")
+            res = lhs._isin_earlystop(rhs)
+            if res is not None:
+                return res
         except ValueError:
             # pandas functionally returns all False when cleansing via
             # typecasting fails
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 3ff03147583..4e07dd531ae 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -248,34 +248,19 @@ def std(
     ) -> float:
         return self.reduce("std", skipna=skipna, dtype=dtype, ddof=ddof)
 
-    def isin(self, values: Sequence) -> ColumnBase:
-        if cudf.utils.dtypes.is_scalar(values):
-            raise TypeError(
-                "only list-like objects are allowed to be passed "
-                f"to isin(), you passed a [{type(values).__name__}]"
-            )
-
+    def _process_values_for_isin(self, values):
         lhs = self
-        rhs = None
+        rhs = as_column(values, nan_as_null=False)
 
-        try:
-            rhs = as_column(values, nan_as_null=False)
-            if isinstance(rhs, NumericalColumn):
-                rhs = rhs.astype(dtype=self.dtype)
-
-            if not (rhs.null_count == len(rhs)) and lhs.dtype != rhs.dtype:
-                return cudf.core.column.full(len(self), False, dtype="bool")
-
-            # Short-circuit if rhs is all null.
-            if lhs.null_count == 0 and (rhs.null_count == len(rhs)):
-                return cudf.core.column.full(len(self), False, dtype="bool")
-        except ValueError:
-            # pandas functionally returns all False when cleansing via
-            # typecasting fails
-            return cudf.core.column.full(len(self), False, dtype="bool")
-
-        res = lhs._obtain_isin_result(rhs)
-        return res
+        if isinstance(rhs, NumericalColumn):
+            rhs = rhs.astype(dtype=self.dtype)
+
+        if lhs.null_count == len(lhs):
+            lhs = lhs.astype(rhs.dtype)
+        elif rhs.null_count == len(rhs):
+            rhs = rhs.astype(lhs.dtype)
+
+        return lhs, rhs
 
     def sum_of_squares(self, dtype: Dtype = None) -> float:
         return libcudf.reduce.reduce("sum_of_squares", self, dtype=dtype)
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 7107b66d26c..99032185022 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -378,21 +378,15 @@ def isin(self, values: Sequence) -> ColumnBase:
         rhs = None
 
         try:
-            # We need to convert values to same type as self,
-            # hence passing dtype=self.dtype
             rhs = cudf.core.column.as_column(values)
 
             if rhs.dtype.kind in {"f", "i", "u"}:
                 return cudf.core.column.full(len(self), False, dtype="bool")
 
             rhs = rhs.astype(self.dtype)
-
-            if not (rhs.null_count == len(rhs)) and lhs.dtype != rhs.dtype:
-                return cudf.core.column.full(len(self), False, dtype="bool")
-
-            # Short-circuit if rhs is all null.
-            if lhs.null_count == 0 and (rhs.null_count == len(rhs)):
-                return cudf.core.column.full(len(self), False, dtype="bool")
+            res = lhs._isin_earlystop(rhs)
+            if res is not None:
+                return res
         except ValueError:
             # pandas functionally returns all False when cleansing via
             # typecasting fails

From 01afeceb2b4b9af57bdd63b9e50e8962b6afb767 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Mon, 22 Feb 2021 15:54:51 -0800
Subject: [PATCH 05/35] fix more issues

---
 python/cudf/cudf/core/column/timedelta.py |  2 +-
 python/cudf/cudf/tests/test_timedelta.py  | 20 ++++----------------
 2 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 99032185022..dcffdd4b282 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -127,7 +127,7 @@ def _binary_op_floordiv(
             common_dtype = determine_out_dtype(self.dtype, rhs.dtype)
             lhs = lhs.astype(common_dtype).astype("float64")
             if isinstance(rhs, cudf.Scalar):
-                if rhs.is_valid:
+                if rhs.is_valid():
                     rhs = cudf.Scalar(
                         np.timedelta64(rhs.value)
                         .astype(common_dtype)
diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py
index df3d04d36ba..90850ff0648 100644
--- a/python/cudf/cudf/tests/test_timedelta.py
+++ b/python/cudf/cudf/tests/test_timedelta.py
@@ -420,13 +420,7 @@ def test_timedelta_dataframe_ops(df, op):
         np.timedelta64(4, "s"),
         np.timedelta64(456, "D"),
         np.timedelta64(46, "h"),
-        # TODO: PREM FIX THIS
-        pytest.param(
-            np.timedelta64("nat"),
-            marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/35529"
-            ),
-        ),
+        np.timedelta64("nat"),
         np.timedelta64(1, "s"),
         np.timedelta64(1, "ms"),
         np.timedelta64(1, "us"),
@@ -509,16 +503,11 @@ def test_timedelta_series_ops_with_scalars(data, other_scalars, dtype, op):
         datetime.timedelta(seconds=768),
         datetime.timedelta(microseconds=7),
         np.timedelta64(4, "s"),
-        # TODO: PREM Fix this
-        pytest.param(
-            np.timedelta64("nat"),
-            marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/35529"
-            ),
-        ),
+        np.timedelta64("nat", "s"),
         np.timedelta64(1, "s"),
         np.timedelta64(1, "ms"),
         np.timedelta64(1, "us"),
+        np.timedelta64("nat", "ns"),
         np.timedelta64(1, "ns"),
     ],
 )
@@ -774,7 +763,6 @@ def test_timedelta_datetime_index_ops_misc(
         "add",
         "sub",
         "truediv",
-        # TODO: PREM FIX THIS
         pytest.param(
             "floordiv",
             marks=pytest.mark.xfail(
@@ -841,7 +829,7 @@ def test_timedelta_index_ops_with_scalars(data, other_scalars, dtype, op):
         pytest.param(
             "floordiv",
             marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/35529"
+                reason="https://github.com/rapidsai/cudf/issues/5938"
             ),
         ),
     ],

From c7c47b5d18e37ced9df9e1a0cba3d8a54d279057 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Mon, 22 Feb 2021 20:00:19 -0800
Subject: [PATCH 06/35] fix lots of deprecated warnings

---
 python/cudf/cudf/core/dataframe.py            |  2 +-
 python/cudf/cudf/core/multiindex.py           | 24 ++++-
 python/cudf/cudf/tests/test_categorical.py    |  4 +-
 python/cudf/cudf/tests/test_concat.py         |  4 +-
 python/cudf/cudf/tests/test_csv.py            |  2 +-
 python/cudf/cudf/tests/test_dataframe.py      | 94 +++++++++++--------
 python/cudf/cudf/tests/test_datetime.py       |  8 +-
 python/cudf/cudf/tests/test_dropna.py         |  2 +-
 python/cudf/cudf/tests/test_duplicates.py     |  2 +-
 python/cudf/cudf/tests/test_groupby.py        |  6 +-
 python/cudf/cudf/tests/test_index.py          |  4 +-
 python/cudf/cudf/tests/test_repr.py           |  5 +-
 python/cudf/cudf/tests/test_rolling.py        | 12 +--
 python/cudf/cudf/tests/test_series.py         | 13 ++-
 python/cudf/cudf/tests/test_stats.py          |  6 +-
 python/cudf/cudf/utils/dtypes.py              |  7 ++
 python/dask_cudf/dask_cudf/tests/test_core.py |  6 +-
 17 files changed, 125 insertions(+), 76 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 3e7e6625abe..697524dddd8 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -690,7 +690,7 @@ def __getitem__(self, arg):
         elif can_convert_to_column(arg):
             mask = arg
             if is_list_like(mask):
-                mask = pd.Series(mask)
+                mask = pd.Series(mask, dtype=None if len(mask) else "float64")
             if mask.dtype == "bool":
                 return self._apply_boolean_mask(mask)
             else:
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 4e82a1f72b0..f252ed1a9aa 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -15,6 +15,7 @@
 import cudf
 from cudf import _lib as libcudf
 from cudf._typing import DataFrameOrSeries
+from cudf.core._compat import PANDAS_GE_120
 from cudf.core.column import column
 from cudf.core.frame import Frame
 from cudf.core.index import Index, as_index
@@ -485,7 +486,28 @@ def __repr__(self):
                     )
                 )
             )
-            preprocess = preprocess.to_pandas(nullable=True)
+
+            if PANDAS_GE_120:
+                # TODO: Remove this whole `if` block,
+                # this is a workaround for the following issue:
+                # https://github.com/pandas-dev/pandas/issues/39984
+                temp_df = preprocess._source_data
+
+                preprocess_pdf = pd.DataFrame()
+                for col in temp_df.columns:
+                    if temp_df[col].dtype.kind == "f":
+                        preprocess_pdf[col] = temp_df[col].to_pandas(
+                            nullable=False
+                        )
+                    else:
+                        preprocess_pdf[col] = temp_df[col].to_pandas(
+                            nullable=True
+                        )
+
+                preprocess_pdf.columns = preprocess.names
+                preprocess = pd.MultiIndex.from_frame(preprocess_pdf)
+            else:
+                preprocess = preprocess.to_pandas(nullable=True)
             preprocess.values[:] = tuples_list
         else:
             preprocess = preprocess.to_pandas(nullable=True)
diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py
index 2d8130e6cb1..67fd07dfcd8 100644
--- a/python/cudf/cudf/tests/test_categorical.py
+++ b/python/cudf/cudf/tests/test_categorical.py
@@ -493,7 +493,7 @@ def test_categorical_dataframe_slice_copy():
         pd.Series([1, 2, 3, 89], dtype="float64"),
         pd.Series([1, 2.5, 3.001, 89], dtype="float64"),
         pd.Series([None, None, None]),
-        pd.Series([]),
+        pd.Series([], dtype="float64"),
     ],
 )
 @pytest.mark.parametrize(
@@ -526,7 +526,7 @@ def test_categorical_typecast(data, cat_type):
         pd.Series([1, 2, 3, 89], dtype="float64"),
         pd.Series([1, 2.5, 3.001, 89], dtype="float64"),
         pd.Series([None, None, None]),
-        pd.Series([]),
+        pd.Series([], dtype="float64"),
     ],
 )
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py
index f8a7099f1bf..3739e226cc6 100644
--- a/python/cudf/cudf/tests/test_concat.py
+++ b/python/cudf/cudf/tests/test_concat.py
@@ -372,8 +372,8 @@ def test_concat_mixed_input():
     [
         [pd.Series([1, 2, 3]), pd.DataFrame({"a": [1, 2]})],
         [pd.Series([1, 2, 3]), pd.DataFrame({"a": []})],
-        [pd.Series([]), pd.DataFrame({"a": []})],
-        [pd.Series([]), pd.DataFrame({"a": [1, 2]})],
+        [pd.Series([], dtype="float64"), pd.DataFrame({"a": []})],
+        [pd.Series([], dtype="float64"), pd.DataFrame({"a": [1, 2]})],
         [pd.Series([1, 2, 3.0, 1.2], name="abc"), pd.DataFrame({"a": [1, 2]})],
         [
             pd.Series(
diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py
index 31d502e4a23..d972d2ad11c 100644
--- a/python/cudf/cudf/tests/test_csv.py
+++ b/python/cudf/cudf/tests/test_csv.py
@@ -1815,7 +1815,7 @@ def test_csv_reader_dtypes(dtype):
 
 
 @pytest.mark.parametrize(
-    "dtype", ["Int64", "UInt32", {"a": "UInt64", "b": "float64", "c": "Int32"}]
+    "dtype", ["Int64", "UInt32", {"a": "UInt64", "b": "Float64", "c": "Int32"}]
 )
 def test_csv_reader_nullable_dtypes(dtype):
     buf = "a,b,c\n1,10,111\n2,11,112\n3,12,113\n4,13,114\n"
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 96e77bd6823..ecd1f42f4a7 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -1281,8 +1281,10 @@ def test_concat_different_column_dataframe(df1_d, df2_d):
     assert_eq(got, expect, check_dtype=False)
 
 
-@pytest.mark.parametrize("ser_1", [pd.Series([1, 2, 3]), pd.Series([])])
-@pytest.mark.parametrize("ser_2", [pd.Series([])])
+@pytest.mark.parametrize(
+    "ser_1", [pd.Series([1, 2, 3]), pd.Series([], dtype="float64")]
+)
+@pytest.mark.parametrize("ser_2", [pd.Series([], dtype="float64")])
 def test_concat_empty_series(ser_1, ser_2):
     got = gd.concat([gd.Series(ser_1), gd.Series(ser_2)])
     expect = pd.concat([ser_1, ser_2])
@@ -1689,7 +1691,7 @@ def test_series_shape():
 
 
 def test_series_shape_empty():
-    ps = pd.Series()
+    ps = pd.Series(dtype="float64")
     cs = gd.Series([])
 
     assert ps.shape == cs.shape
@@ -2285,7 +2287,7 @@ def test_series_all_null(num_elements, null_type):
         data = [null_type] * num_elements
 
     # Typecast Pandas because None will return `object` dtype
-    expect = pd.Series(data).astype("float64")
+    expect = pd.Series(data, dtype="float64")
     got = gd.Series(data)
 
     assert_eq(expect, got)
@@ -3245,7 +3247,7 @@ def test_ndim():
     assert pdf.ndim == gdf.ndim
     assert pdf.x.ndim == gdf.x.ndim
 
-    s = pd.Series()
+    s = pd.Series(dtype="float64")
     gs = gd.Series()
     assert s.ndim == gs.ndim
 
@@ -3486,7 +3488,7 @@ def test_as_column_types():
     col = column.as_column(gd.Series([]))
     assert_eq(col.dtype, np.dtype("float64"))
     gds = gd.Series(col)
-    pds = pd.Series(pd.Series([]))
+    pds = pd.Series(pd.Series([], dtype="float64"))
 
     assert_eq(pds, gds)
 
@@ -3521,7 +3523,7 @@ def test_as_column_types():
 
     assert_eq(pds, gds)
 
-    pds = pd.Series([])
+    pds = pd.Series([], dtype="float64")
     gds = gd.Series(column.as_column(pds))
     assert_eq(pds, gds)
 
@@ -3857,7 +3859,7 @@ def test_create_dataframe_column():
     ],
 )
 def test_series_values_host_property(data):
-    pds = pd.Series(data)
+    pds = pd.Series(data, dtype=None if len(data) else "float64")
     gds = gd.Series(data)
 
     np.testing.assert_array_equal(pds.values, gds.values_host)
@@ -3880,7 +3882,7 @@ def test_series_values_host_property(data):
     ],
 )
 def test_series_values_property(data):
-    pds = pd.Series(data)
+    pds = pd.Series(data, dtype=None if len(data) else "float64")
     gds = gd.Series(data)
     gds_vals = gds.values
     assert isinstance(gds_vals, cupy.ndarray)
@@ -3988,11 +3990,10 @@ def test_value_counts():
 )
 def test_isin_numeric(data, values):
     index = np.random.randint(0, 100, len(data))
-    psr = pd.Series(data, index=index)
+    psr = pd.Series(data, index=index, dtype=None if len(data) else "float64")
     gsr = gd.Series.from_pandas(psr, nan_as_null=False)
 
     expected = psr.isin(values)
-    print(expected)
     got = gsr.isin(values)
 
     assert_eq(got, expected)
@@ -4043,7 +4044,7 @@ def test_isin_numeric(data, values):
     ],
 )
 def test_isin_datetime(data, values):
-    psr = pd.Series(data)
+    psr = pd.Series(data, dtype=None if len(data) else "datetime64[ns]")
     gsr = gd.Series.from_pandas(psr)
 
     got = gsr.isin(values)
@@ -4072,7 +4073,7 @@ def test_isin_datetime(data, values):
     ],
 )
 def test_isin_string(data, values):
-    psr = pd.Series(data)
+    psr = pd.Series(data, dtype=None if len(data) else "float64")
     gsr = gd.Series.from_pandas(psr)
 
     got = gsr.isin(values)
@@ -4101,7 +4102,7 @@ def test_isin_string(data, values):
     ],
 )
 def test_isin_categorical(data, values):
-    psr = pd.Series(data)
+    psr = pd.Series(data, dtype=None if len(data) else "float64")
     gsr = gd.Series.from_pandas(psr)
 
     got = gsr.isin(values)
@@ -4135,7 +4136,7 @@ def test_isin_categorical(data, values):
     ],
 )
 def test_isin_index(data, values):
-    psr = pd.Series(data)
+    psr = pd.Series(data, dtype=None if len(data) else "float64")
     gsr = gd.Series.from_pandas(psr)
 
     got = gsr.index.isin(values)
@@ -6671,10 +6672,10 @@ def test_dataframe_keys(df):
             ["abc", "def", "ghi", "xyz", "pqr", "abc"],
             index=[1, 2, 3, 4, 5, 10],
         ),
-        pd.Series(index=["a", "b", "c", "d", "e", "f"]),
-        pd.Series(index=[10, 11, 12]),
-        pd.Series(),
-        pd.Series([]),
+        pd.Series(index=["a", "b", "c", "d", "e", "f"], dtype="float64"),
+        pd.Series(index=[10, 11, 12], dtype="float64"),
+        pd.Series(dtype="float64"),
+        pd.Series([], dtype="float64"),
     ],
 )
 def test_series_keys(ps):
@@ -7248,9 +7249,9 @@ def test_dataframe_size(df):
 @pytest.mark.parametrize(
     "ps",
     [
-        pd.Series(),
-        pd.Series(index=[100, 10, 1, 0]),
-        pd.Series([]),
+        pd.Series(dtype="float64"),
+        pd.Series(index=[100, 10, 1, 0], dtype="float64"),
+        pd.Series([], dtype="float64"),
         pd.Series(["a", "b", "c", "d"]),
         pd.Series(["a", "b", "c", "d"], index=[0, 1, 10, 11]),
     ],
@@ -7292,13 +7293,16 @@ def test_dataframe_init_with_columns(data, columns):
     "data, ignore_dtype",
     [
         ([pd.Series([1, 2, 3])], False),
-        ([pd.Series(index=[1, 2, 3])], False),
-        ([pd.Series(name="empty series name")], False),
-        ([pd.Series([1]), pd.Series([]), pd.Series([3])], False),
+        ([pd.Series(index=[1, 2, 3], dtype="float64")], False),
+        ([pd.Series(name="empty series name", dtype="float64")], False),
+        (
+            [pd.Series([1]), pd.Series([], dtype="float64"), pd.Series([3])],
+            False,
+        ),
         (
             [
                 pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
-                pd.Series([]),
+                pd.Series([], dtype="float64"),
                 pd.Series([3], name="series that is named"),
             ],
             False,
@@ -7315,16 +7319,16 @@ def test_dataframe_init_with_columns(data, columns):
         (
             [
                 pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
-                pd.Series([]),
-                pd.Series(index=[10, 11, 12]),
+                pd.Series([], dtype="float64"),
+                pd.Series(index=[10, 11, 12], dtype="float64"),
             ],
             False,
         ),
         (
             [
                 pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
-                pd.Series([], name="abc"),
-                pd.Series(index=[10, 11, 12]),
+                pd.Series([], name="abc", dtype="float64"),
+                pd.Series(index=[10, 11, 12], dtype="float64"),
             ],
             False,
         ),
@@ -7357,17 +7361,21 @@ def test_dataframe_init_from_series_list(data, ignore_dtype, columns):
     "data, ignore_dtype, index",
     [
         ([pd.Series([1, 2, 3])], False, ["a", "b", "c"]),
-        ([pd.Series(index=[1, 2, 3])], False, ["a", "b"]),
-        ([pd.Series(name="empty series name")], False, ["index1"]),
+        ([pd.Series(index=[1, 2, 3], dtype="float64")], False, ["a", "b"]),
         (
-            [pd.Series([1]), pd.Series([]), pd.Series([3])],
+            [pd.Series(name="empty series name", dtype="float64")],
+            False,
+            ["index1"],
+        ),
+        (
+            [pd.Series([1]), pd.Series([], dtype="float64"), pd.Series([3])],
             False,
             ["0", "2", "1"],
         ),
         (
             [
                 pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
-                pd.Series([]),
+                pd.Series([], dtype="float64"),
                 pd.Series([3], name="series that is named"),
             ],
             False,
@@ -7390,8 +7398,8 @@ def test_dataframe_init_from_series_list(data, ignore_dtype, columns):
         (
             [
                 pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
-                pd.Series([]),
-                pd.Series(index=[10, 11, 12]),
+                pd.Series([], dtype="float64"),
+                pd.Series(index=[10, 11, 12], dtype="float64"),
             ],
             False,
             ["a", "b", "c"],
@@ -7399,8 +7407,8 @@ def test_dataframe_init_from_series_list(data, ignore_dtype, columns):
         (
             [
                 pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
-                pd.Series([], name="abc"),
-                pd.Series(index=[10, 11, 12]),
+                pd.Series([], name="abc", dtype="float64"),
+                pd.Series(index=[10, 11, 12], dtype="float64"),
             ],
             False,
             ["a", "v", "z"],
@@ -7440,7 +7448,7 @@ def test_dataframe_init_from_series_list_with_index(
         (
             [
                 pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
-                pd.Series([]),
+                pd.Series([], dtype="float64"),
                 pd.Series([3], name="series that is named"),
             ],
             ["_", "+"],
@@ -7864,6 +7872,10 @@ def test_dataframe_error_equality(df1, df2, op):
                         ],
                         dtype="object",
                     ),
+                    "c": gd.Series(
+                        [0.1, None, 0.2, None, 3, 4, 1000, None],
+                        dtype="float64",
+                    ),
                 }
             ),
             pd.DataFrame(
@@ -7885,6 +7897,10 @@ def test_dataframe_error_equality(df1, df2, op):
                         ],
                         dtype=pd.StringDtype(),
                     ),
+                    "c": pd.Series(
+                        [0.1, None, 0.2, None, 3, 4, 1000, None],
+                        dtype=pd.Float64Dtype(),
+                    ),
                 }
             ),
         ),
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index 7e545022eb8..b59f76bd8bf 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -411,7 +411,7 @@ def test_datetime_to_arrow(dtype):
     "nulls", ["none", pytest.param("some", marks=pytest.mark.xfail)]
 )
 def test_datetime_unique(data, nulls):
-    psr = pd.Series(data)
+    psr = pd.Series(data, dtype=None if len(data) else "datetime64[ns]")
 
     if len(data) > 0:
         if nulls == "some":
@@ -435,7 +435,7 @@ def test_datetime_unique(data, nulls):
 )
 @pytest.mark.parametrize("nulls", ["none", "some"])
 def test_datetime_nunique(data, nulls):
-    psr = pd.Series(data)
+    psr = pd.Series(data, dtype=None if len(data) else "datetime64[ns]")
 
     if len(data) > 0:
         if nulls == "some":
@@ -537,7 +537,7 @@ def test_datetime_dataframe():
     [
         None,
         [],
-        pd.Series([]),
+        pd.Series([], dtype="float64"),
         pd.Index([]),
         pd.Series([1, 2, 3]),
         pd.Series([0, 1, -1]),
@@ -670,7 +670,7 @@ def test_to_datetime_not_implemented():
     [
         1,
         [],
-        pd.Series([]),
+        pd.Series([], dtype="float64"),
         pd.Index([]),
         pd.Series([1, 2, 3]),
         pd.Series([1, 2.4, 3]),
diff --git a/python/cudf/cudf/tests/test_dropna.py b/python/cudf/cudf/tests/test_dropna.py
index ddd569acf0d..0363534cdd5 100644
--- a/python/cudf/cudf/tests/test_dropna.py
+++ b/python/cudf/cudf/tests/test_dropna.py
@@ -21,7 +21,7 @@
 @pytest.mark.parametrize("inplace", [True, False])
 def test_dropna_series(data, nulls, inplace):
 
-    psr = pd.Series(data)
+    psr = pd.Series(data, dtype=None if len(data) else "float64")
 
     if len(data) > 0:
         if nulls == "one":
diff --git a/python/cudf/cudf/tests/test_duplicates.py b/python/cudf/cudf/tests/test_duplicates.py
index 29f1c31a1ee..9331fe5900f 100644
--- a/python/cudf/cudf/tests/test_duplicates.py
+++ b/python/cudf/cudf/tests/test_duplicates.py
@@ -56,7 +56,7 @@ def test_duplicated_with_misspelled_column_name(subset):
     ],
 )
 def test_drop_duplicates_series(data, keep):
-    pds = Series(data)
+    pds = Series(data, dtype=None if len(data) else "float64")
     gds = cudf.from_pandas(pds)
 
     assert_df(pds.drop_duplicates(keep=keep), gds.drop_duplicates(keep=keep))
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index 294443500a9..bc3af8581b5 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -1496,7 +1496,8 @@ def test_groupby_apply_return_series_dataframe(cust_func):
 
 
 @pytest.mark.parametrize(
-    "pdf", [pd.DataFrame(), pd.DataFrame({"a": []}), pd.Series([])]
+    "pdf",
+    [pd.DataFrame(), pd.DataFrame({"a": []}), pd.Series([], dtype="float64")],
 )
 def test_groupby_no_keys(pdf):
     gdf = cudf.from_pandas(pdf)
@@ -1509,7 +1510,8 @@ def test_groupby_no_keys(pdf):
 
 
 @pytest.mark.parametrize(
-    "pdf", [pd.DataFrame(), pd.DataFrame({"a": []}), pd.Series([])]
+    "pdf",
+    [pd.DataFrame(), pd.DataFrame({"a": []}), pd.Series([], dtype="float64")],
 )
 def test_groupby_apply_no_keys(pdf):
     gdf = cudf.from_pandas(pdf)
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index 9e401316e19..b59e352ff87 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -960,7 +960,9 @@ def test_index_equal_misc(data, other):
     actual = gd_data.equals(np.array(gd_other))
     assert_eq(expected, actual)
 
-    expected = pd_data.equals(pd.Series(pd_other))
+    expected = pd_data.equals(
+        pd.Series(pd_other, dtype=None if len(pd_other) else "float64")
+    )
     actual = gd_data.equals(cudf.Series(gd_other))
     assert_eq(expected, actual)
 
diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py
index 16c24d5afaa..a440c50d48f 100644
--- a/python/cudf/cudf/tests/test_repr.py
+++ b/python/cudf/cudf/tests/test_repr.py
@@ -158,7 +158,7 @@ def test_integer_dataframe(x):
 @settings(deadline=None)
 def test_integer_series(x):
     sr = cudf.Series(x)
-    ps = pd.Series(x)
+    ps = pd.Series(x, dtype=None if len(x) else "float64")
 
     assert sr.__repr__() == ps.__repr__()
 
@@ -175,7 +175,7 @@ def test_float_dataframe(x):
 @settings(deadline=None)
 def test_float_series(x):
     sr = cudf.Series(x, nan_as_null=False)
-    ps = pd.Series(x)
+    ps = pd.Series(x, dtype=None if len(x) else "float64")
     assert sr.__repr__() == ps.__repr__()
 
 
@@ -261,6 +261,7 @@ def test_generic_index(length, dtype):
     psr = pd.Series(
         range(length),
         index=np.random.randint(0, high=100, size=length).astype(dtype),
+        dtype="float64" if length == 0 else None,
     )
     gsr = cudf.Series.from_pandas(psr)
 
diff --git a/python/cudf/cudf/tests/test_rolling.py b/python/cudf/cudf/tests/test_rolling.py
index 1ae5bab0da4..794d3be889a 100644
--- a/python/cudf/cudf/tests/test_rolling.py
+++ b/python/cudf/cudf/tests/test_rolling.py
@@ -37,7 +37,7 @@ def test_rolling_series_basic(data, index, agg, nulls, center):
         elif nulls == "all":
             data = [np.nan] * len(data)
 
-    psr = pd.Series(data, index=index)
+    psr = pd.Series(data, index=index, dtype=None if len(data) else "float64")
     gsr = cudf.Series(psr)
     for window_size in range(1, len(data) + 1):
         for min_periods in range(1, window_size + 1):
@@ -99,13 +99,7 @@ def test_rolling_dataframe_basic(data, agg, nulls, center):
         pytest.param("min"),
         pytest.param("max"),
         pytest.param("mean"),
-        pytest.param(
-            "count",  # Does not follow similar conventions as
-            # with non-offset columns
-            marks=pytest.mark.xfail(
-                reason="Differs from pandas behaviour here"
-            ),
-        ),
+        pytest.param("count"),
     ],
 )
 def test_rolling_with_offset(agg):
@@ -218,7 +212,7 @@ def test_rolling_getitem_window():
 @pytest.mark.parametrize("center", [True, False])
 def test_rollling_series_numba_udf_basic(data, index, center):
 
-    psr = pd.Series(data, index=index)
+    psr = pd.Series(data, index=index, dtype=None if len(data) else "float64")
     gsr = cudf.from_pandas(psr)
 
     def some_func(A):
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index a19b88caf4c..3ffd7786057 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -28,7 +28,7 @@ def _series_na_data():
         pd.Series([0, 1, 2, 3, 4]),
         pd.Series(["a", "b", "u", "h", "d"]),
         pd.Series([None, None, np.nan, None, np.inf, -np.inf]),
-        pd.Series([]),
+        pd.Series([], dtype="float64"),
         pd.Series(
             [pd.NaT, pd.Timestamp("1939-05-27"), pd.Timestamp("1940-04-25")]
         ),
@@ -383,7 +383,7 @@ def test_series_tolist(data):
     [[], [None, None], ["a"], ["a", "b", "c"] * 500, [1.0, 2.0, 0.3] * 57],
 )
 def test_series_size(data):
-    psr = pd.Series(data)
+    psr = pd.Series(data, dtype=None if len(data) else "float64")
     gsr = cudf.Series(data)
 
     assert_eq(psr.size, gsr.size)
@@ -490,7 +490,7 @@ def test_series_factorize(data, na_sentinel):
 @pytest.mark.parametrize("normalize", [True, False])
 @pytest.mark.parametrize("nulls", ["none", "some"])
 def test_series_datetime_value_counts(data, nulls, normalize, dropna):
-    psr = pd.Series(data)
+    psr = pd.Series(data, dtype=None if len(data) else "datetime64[ns]")
 
     if len(data) > 0:
         if nulls == "one":
@@ -733,7 +733,8 @@ def test_series_notnull_notna(ps, nan_as_null):
     "sr1", [pd.Series([10, 11, 12], index=["a", "b", "z"]), pd.Series(["a"])]
 )
 @pytest.mark.parametrize(
-    "sr2", [pd.Series([]), pd.Series(["a", "a", "c", "z", "A"])]
+    "sr2",
+    [pd.Series([], dtype="float64"), pd.Series(["a", "a", "c", "z", "A"])],
 )
 @pytest.mark.parametrize(
     "op",
@@ -852,6 +853,10 @@ def test_series_memory_usage():
                 dtype=pd.StringDtype(),
             ),
         ),
+        (
+            cudf.Series([1, 2, None, 10.2, None], dtype="float32",),
+            pd.Series([1, 2, None, 10.2, None], dtype=pd.Float32Dtype(),),
+        ),
     ],
 )
 def test_series_to_pandas_nullable_dtypes(sr, expected_psr):
diff --git a/python/cudf/cudf/tests/test_stats.py b/python/cudf/cudf/tests/test_stats.py
index c06fdd4a48e..2f0b51ba377 100644
--- a/python/cudf/cudf/tests/test_stats.py
+++ b/python/cudf/cudf/tests/test_stats.py
@@ -204,7 +204,7 @@ def test_approx_quantiles_int():
 @pytest.mark.parametrize("q", [[], 0.5, 1, 0.234, [0.345], [0.243, 0.5, 1]])
 def test_misc_quantiles(data, q):
 
-    pdf_series = pd.Series(data)
+    pdf_series = pd.Series(data, dtype=None if len(data) else "float64")
     gdf_series = Series(data)
 
     expected = pdf_series.quantile(q)
@@ -434,13 +434,13 @@ def test_df_corr():
 )
 @pytest.mark.parametrize("skipna", [True, False, None])
 def test_nans_stats(data, ops, skipna):
-    psr = pd.Series(data)
+    psr = pd.Series(data, dtype=None if len(data) else "float64")
     gsr = Series(data)
     assert_eq(
         getattr(psr, ops)(skipna=skipna), getattr(gsr, ops)(skipna=skipna)
     )
 
-    psr = pd.Series(data)
+    psr = pd.Series(data, dtype=None if len(data) else "float64")
     gsr = Series(data, nan_as_null=False)
     # Since there is no concept of `nan_as_null` in pandas,
     # nulls will be returned in the operations. So only
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 274285990a6..20c86b2a4b7 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -14,6 +14,7 @@
 
 import cudf
 from cudf._lib.scalar import DeviceScalar, _is_null_host_scalar
+from cudf.core._compat import PANDAS_GE_120
 
 _NA_REP = "<NA>"
 _np_pa_dtypes = {
@@ -73,6 +74,12 @@
     pd.StringDtype(): np.dtype("object"),
 }
 
+if PANDAS_GE_120:
+    cudf_dtypes_to_pandas_dtypes[np.dtype("float32")] = pd.Float32Dtype()
+    cudf_dtypes_to_pandas_dtypes[np.dtype("float64")] = pd.Float64Dtype()
+    pandas_dtypes_to_cudf_dtypes[pd.Float32Dtype()] = np.dtype("float32")
+    pandas_dtypes_to_cudf_dtypes[pd.Float64Dtype()] = np.dtype("float64")
+
 SIGNED_INTEGER_TYPES = {"int8", "int16", "int32", "int64"}
 UNSIGNED_TYPES = {"uint8", "uint16", "uint32", "uint64"}
 INTEGER_TYPES = SIGNED_INTEGER_TYPES | UNSIGNED_TYPES
diff --git a/python/dask_cudf/dask_cudf/tests/test_core.py b/python/dask_cudf/dask_cudf/tests/test_core.py
index 10719794843..548aca53fd5 100644
--- a/python/dask_cudf/dask_cudf/tests/test_core.py
+++ b/python/dask_cudf/dask_cudf/tests/test_core.py
@@ -10,10 +10,10 @@
 from dask.dataframe.core import make_meta, meta_nonempty
 from dask.utils import M
 
-import cudf
-
 import dask_cudf as dgd
 
+import cudf
+
 
 def test_from_cudf():
     np.random.seed(0)
@@ -658,7 +658,7 @@ def test_make_meta_backends(index):
 @pytest.mark.parametrize(
     "data",
     [
-        pd.Series([]),
+        pd.Series([], dtype="float64"),
         pd.DataFrame({"abc": [], "xyz": []}),
         pd.Series([1, 2, 10, 11]),
         pd.DataFrame({"abc": [1, 2, 10, 11], "xyz": [100, 12, 120, 1]}),

From aea33134d5fde46bb0cbeffde7bfb382b63241b3 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Tue, 23 Feb 2021 10:11:18 -0800
Subject: [PATCH 07/35] fix multiple warnings

---
 python/cudf/cudf/core/dataframe.py            |   4 +-
 python/cudf/cudf/core/frame.py                |  10 +-
 python/cudf/cudf/testing/testing.py           | 230 +++++++++++++-----
 python/cudf/cudf/tests/test_dataframe.py      |  10 +-
 python/cudf/cudf/tests/test_hdf.py            |  16 +-
 python/dask_cudf/dask_cudf/tests/test_core.py |   2 +-
 .../dask_cudf/dask_cudf/tests/test_groupby.py |   8 +-
 .../dask_cudf/tests/test_reductions.py        |   8 +-
 8 files changed, 194 insertions(+), 94 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 697524dddd8..7912a20f740 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -585,7 +585,9 @@ def deserialize(cls, header, frames):
     def dtypes(self):
         """Return the dtypes in this object."""
         return pd.Series(
-            [x.dtype for x in self._data.columns], index=self._data.names
+            [x.dtype for x in self._data.columns],
+            index=self._data.names,
+            dtype=None if len(self._data.names) else "float64",
         )
 
     @property
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index e763a164003..cf956ec2654 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -3,7 +3,6 @@
 
 import copy
 import functools
-import operator
 import warnings
 from collections import OrderedDict, abc as abc
 from typing import TYPE_CHECKING, Any, Dict, Tuple, TypeVar, overload
@@ -27,7 +26,6 @@
     min_scalar_type,
 )
 
-
 T = TypeVar("T", bound="Frame")
 
 if TYPE_CHECKING:
@@ -340,9 +338,11 @@ def _concat(
                 np.intersect1d, all_columns_list
             )
             # get column names not present in all objs
-            non_intersecting_columns = (
-                functools.reduce(operator.or_, (obj.columns for obj in objs))
-                ^ intersecting_columns
+            union_of_columns = objs[0].columns
+            for obj in objs[1:]:
+                union_of_columns = union_of_columns.union(obj.columns)
+            non_intersecting_columns = union_of_columns.symmetric_difference(
+                intersecting_columns
             )
             names = OrderedDict.fromkeys(intersecting_columns).keys()
 
diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py
index 2048e574acc..2e49ee0bc43 100644
--- a/python/cudf/cudf/testing/testing.py
+++ b/python/cudf/cudf/testing/testing.py
@@ -8,6 +8,7 @@
 import pandas as pd
 
 import cudf
+from cudf.core._compat import PANDAS_GE_110
 from cudf.utils.dtypes import is_categorical_dtype
 
 
@@ -91,6 +92,8 @@ def assert_column_equal(
     check_datetimelike_compat=False,
     check_categorical=True,
     check_category_order=True,
+    rtol=1e-05,
+    atol=1e-08,
     obj="ColumnBase",
 ):
     """
@@ -122,6 +125,10 @@ def assert_column_equal(
         Whether to compare internal Categorical exactly.
     check_category_order : bool, default True
         Whether to compare category order of internal Categoricals
+    rtol : float, default 1e-5
+        Relative tolerance. Only used when `check_exact` is False.
+    atol : float, default 1e-8
+        Absolute tolerance. Only used when `check_exact` is False.
     obj : str, default ‘ColumnBase’
         Specify object name being compared, internally used to
         show appropriate assertion message.
@@ -165,6 +172,8 @@ def assert_column_equal(
                     exact=check_dtype,
                     check_exact=True,
                     check_categorical=False,
+                    rtol=rtol,
+                    atol=atol,
                 )
                 assert_column_equal(
                     left.codes,
@@ -173,6 +182,8 @@ def assert_column_equal(
                     check_exact=True,
                     check_categorical=False,
                     check_category_order=False,
+                    rtol=rtol,
+                    atol=atol,
                 )
 
             if left.ordered != right.ordered:
@@ -220,6 +231,8 @@ def assert_index_equal(
     check_less_precise: Union[bool, int] = False,
     check_exact: bool = True,
     check_categorical: bool = True,
+    rtol: float = 1e-5,
+    atol: float = 1e-8,
     obj: str = "Index",
 ):
     """
@@ -247,6 +260,10 @@ def assert_index_equal(
         Whether to compare number exactly.
     check_categorical : bool, default True
         Whether to compare internal Categorical exactly.
+    rtol : float, default 1e-5
+        Relative tolerance. Only used when `check_exact` is False.
+    atol : float, default 1e-8
+        Absolute tolerance. Only used when `check_exact` is False.
     obj : str, default ‘Index’
         Specify object name being compared, internally used to
         show appropriate assertion message.
@@ -304,15 +321,27 @@ def assert_index_equal(
             llevel = cudf.Index(left._columns[level], name=left.names[level])
             rlevel = cudf.Index(right._columns[level], name=right.names[level])
             mul_obj = f"MultiIndex level [{level}]"
-            assert_index_equal(
-                llevel,
-                rlevel,
-                exact=check_exact,
-                check_names=check_names,
-                check_less_precise=check_less_precise,
-                check_exact=check_exact,
-                obj=mul_obj,
-            )
+            if PANDAS_GE_110:
+                assert_index_equal(
+                    llevel,
+                    rlevel,
+                    exact=check_exact,
+                    check_names=check_names,
+                    check_exact=check_exact,
+                    rtol=rtol,
+                    atol=atol,
+                    obj=mul_obj,
+                )
+            else:
+                assert_index_equal(
+                    llevel,
+                    rlevel,
+                    exact=check_exact,
+                    check_names=check_names,
+                    check_less_precise=check_less_precise,
+                    check_exact=check_exact,
+                    obj=mul_obj,
+                )
         return
 
     assert_column_equal(
@@ -343,6 +372,8 @@ def assert_series_equal(
     check_datetimelike_compat=False,
     check_categorical=True,
     check_category_order=True,
+    rtol=1e-5,
+    atol=1e-8,
     obj="Series",
 ):
     """
@@ -380,6 +411,10 @@ def assert_series_equal(
         Whether to compare internal Categorical exactly.
     check_category_order : bool, default True
         Whether to compare category order of internal Categoricals
+    rtol : float, default 1e-5
+        Relative tolerance. Only used when `check_exact` is False.
+    atol : float, default 1e-8
+        Absolute tolerance. Only used when `check_exact` is False.
     obj : str, default ‘Series’
         Specify object name being compared, internally used to
         show appropriate assertion message.
@@ -423,28 +458,55 @@ def assert_series_equal(
         raise_assert_detail(obj, "Series length are different", msg1, msg2)
 
     # index comparison
-    assert_index_equal(
-        left.index,
-        right.index,
-        exact=check_index_type,
-        check_names=check_names,
-        check_less_precise=check_less_precise,
-        check_exact=check_exact,
-        check_categorical=check_categorical,
-        obj=f"{obj}.index",
-    )
+    if PANDAS_GE_110:
+        assert_index_equal(
+            left.index,
+            right.index,
+            exact=check_index_type,
+            check_names=check_names,
+            check_exact=check_exact,
+            check_categorical=check_categorical,
+            rtol=rtol,
+            atol=atol,
+            obj=f"{obj}.index",
+        )
+    else:
+        assert_index_equal(
+            left.index,
+            right.index,
+            exact=check_index_type,
+            check_names=check_names,
+            check_less_precise=check_less_precise,
+            check_exact=check_exact,
+            check_categorical=check_categorical,
+            obj=f"{obj}.index",
+        )
 
-    assert_column_equal(
-        left._column,
-        right._column,
-        check_dtype=check_dtype,
-        check_column_type=check_series_type,
-        check_less_precise=check_less_precise,
-        check_exact=check_exact,
-        check_datetimelike_compat=check_datetimelike_compat,
-        check_categorical=check_categorical,
-        check_category_order=check_category_order,
-    )
+    if PANDAS_GE_110:
+        assert_column_equal(
+            left._column,
+            right._column,
+            check_dtype=check_dtype,
+            check_column_type=check_series_type,
+            check_exact=check_exact,
+            check_datetimelike_compat=check_datetimelike_compat,
+            check_categorical=check_categorical,
+            check_category_order=check_category_order,
+            rtol=rtol,
+            atol=atol,
+        )
+    else:
+        assert_column_equal(
+            left._column,
+            right._column,
+            check_dtype=check_dtype,
+            check_column_type=check_series_type,
+            check_less_precise=check_less_precise,
+            check_exact=check_exact,
+            check_datetimelike_compat=check_datetimelike_compat,
+            check_categorical=check_categorical,
+            check_category_order=check_category_order,
+        )
 
     # metadata comparison
     if check_names and (left.name != right.name):
@@ -460,13 +522,14 @@ def assert_frame_equal(
     check_index_type="equiv",
     check_column_type="equiv",
     check_frame_type=True,
-    check_less_precise=False,
-    by_blocks=False,
     check_names=True,
+    by_blocks=False,
     check_exact=False,
     check_datetimelike_compat=False,
     check_categorical=True,
     check_like=False,
+    rtol=1e-5,
+    atol=1e-8,
     obj="DataFrame",
 ):
     """
@@ -493,8 +556,6 @@ def assert_frame_equal(
         and similar to pandas.
     check_frame_type : bool, default True
         Whether to check the DataFrame class is identical.
-    check_less_precise : bool or int, default False
-        Not yet supported
     check_names : bool, default True
         Whether to check that the names attribute for both the index and
         column attributes of the DataFrame is identical.
@@ -512,6 +573,10 @@ def assert_frame_equal(
         If True, ignore the order of index & columns.
         Note: index labels must match their respective
         rows (same as in columns) - same labels must be with the same data.
+    rtol : float, default 1e-5
+        Relative tolerance. Only used when `check_exact` is False.
+    atol : float, default 1e-8
+        Absolute tolerance. Only used when `check_exact` is False.
     obj : str, default ‘DataFrame’
         Specify object name being compared, internally used to
         show appropriate assertion message.
@@ -568,40 +633,73 @@ def assert_frame_equal(
         left, right = left.reindex(index=right.index), right
         right = right[list(left._data.names)]
 
-    if check_less_precise:
-        raise NotImplementedError("check_less_precise is not yet supported")
-
     # index comparison
-    assert_index_equal(
-        left.index,
-        right.index,
-        exact=check_index_type,
-        check_names=check_names,
-        check_less_precise=check_less_precise,
-        check_exact=check_exact,
-        check_categorical=check_categorical,
-        obj=f"{obj}.index",
-    )
-
-    pd.testing.assert_index_equal(
-        left.columns,
-        right.columns,
-        exact=check_column_type,
-        check_names=check_names,
-        check_less_precise=check_less_precise,
-        check_exact=check_exact,
-        check_categorical=check_categorical,
-        obj=f"{obj}.columns",
-    )
+    if PANDAS_GE_110:
+        assert_index_equal(
+            left.index,
+            right.index,
+            exact=check_index_type,
+            check_names=check_names,
+            check_exact=check_exact,
+            check_categorical=check_categorical,
+            rtol=rtol,
+            atol=atol,
+            obj=f"{obj}.index",
+        )
+    else:
+        assert_index_equal(
+            left.index,
+            right.index,
+            exact=check_index_type,
+            check_names=check_names,
+            check_exact=check_exact,
+            check_categorical=check_categorical,
+            obj=f"{obj}.index",
+        )
 
-    for col in left.columns:
-        assert_column_equal(
-            left._data[col],
-            right._data[col],
-            check_dtype=check_dtype,
-            check_less_precise=check_less_precise,
+    if PANDAS_GE_110:
+        pd.testing.assert_index_equal(
+            left.columns,
+            right.columns,
+            exact=check_column_type,
+            check_names=check_names,
+            check_exact=check_exact,
+            check_categorical=check_categorical,
+            rtol=rtol,
+            atol=atol,
+            obj=f"{obj}.columns",
+        )
+    else:
+        pd.testing.assert_index_equal(
+            left.columns,
+            right.columns,
+            exact=check_column_type,
+            check_names=check_names,
             check_exact=check_exact,
-            check_datetimelike_compat=check_datetimelike_compat,
             check_categorical=check_categorical,
-            obj=f'Column name="{col}"',
+            obj=f"{obj}.columns",
         )
+
+    for col in left.columns:
+        if PANDAS_GE_110:
+            assert_column_equal(
+                left._data[col],
+                right._data[col],
+                check_dtype=check_dtype,
+                check_exact=check_exact,
+                check_datetimelike_compat=check_datetimelike_compat,
+                check_categorical=check_categorical,
+                rtol=rtol,
+                atol=atol,
+                obj=f'Column name="{col}"',
+            )
+        else:
+            assert_column_equal(
+                left._data[col],
+                right._data[col],
+                check_dtype=check_dtype,
+                check_exact=check_exact,
+                check_datetimelike_compat=check_datetimelike_compat,
+                check_categorical=check_categorical,
+                obj=f'Column name="{col}"',
+            )
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index ecd1f42f4a7..64fc6fb1ab9 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -792,7 +792,7 @@ def test_dataframe_to_string():
 
 
 def test_dataframe_to_string_wide(monkeypatch):
-    monkeypatch.setenv("COLUMNS", 79)
+    monkeypatch.setenv("COLUMNS", "79")
     # Test basic
     df = gd.DataFrame()
     for i in range(100):
@@ -3340,7 +3340,9 @@ def test_all(data):
     # Pandas treats `None` in object type columns as True for some reason, so
     # replacing with `False`
     if np.array(data).ndim <= 1:
-        pdata = pd.Series(data).replace([None], False)
+        pdata = pd.Series(
+            data, dtype=None if len(data) else "float64"
+        ).replace([None], False)
         gdata = gd.Series.from_pandas(pdata)
     else:
         pdata = pd.DataFrame(data, columns=["a", "b"]).replace([None], False)
@@ -3393,7 +3395,7 @@ def test_all(data):
 @pytest.mark.parametrize("axis", [0, 1])
 def test_any(data, axis):
     if np.array(data).ndim <= 1:
-        pdata = pd.Series(data)
+        pdata = pd.Series(data, dtype=None if len(data) else "float64")
         gdata = gd.Series.from_pandas(pdata)
 
         if axis == 1:
@@ -4591,7 +4593,7 @@ def test_rowwise_ops(data, op, skipna):
         expected = getattr(pdf, op)(axis=1, skipna=skipna)
         got = getattr(gdf, op)(axis=1, skipna=skipna)
 
-    assert_eq(expected, got, check_less_precise=7)
+    assert_eq(expected, got, check_exact=False)
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_hdf.py b/python/cudf/cudf/tests/test_hdf.py
index d5b18a08281..f908d5f51f5 100644
--- a/python/cudf/cudf/tests/test_hdf.py
+++ b/python/cudf/cudf/tests/test_hdf.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
 import os
 from string import ascii_letters
@@ -14,7 +14,7 @@
     import tables  # noqa F401
 except ImportError:
     pytest.skip(
-        "PyTables is not installed and is required for HDF " "reading/writing",
+        "PyTables is not installed and is required for HDF reading/writing",
         allow_module_level=True,
     )
 
@@ -34,7 +34,7 @@ def pdf(request):
         nrows=nrows, ncols=ncols, data_gen_f=lambda r, c: r, r_idx_type="i"
     )
     # Delete the name of the column index, and rename the row index
-    del test_pdf.columns.name
+    test_pdf.columns.name = None
     test_pdf.index.name = "test_index"
 
     # Cast all the column dtypes to objects, rename them, and then cast to
@@ -94,14 +94,16 @@ def test_hdf_reader(hdf_files, columns):
     expect_df = pd.read_hdf(hdf_df_file, columns=columns)
     got_df = cudf.read_hdf(hdf_df_file, columns=columns)
 
-    assert_eq(expect_df, got_df, check_categorical=False)
+    assert_eq(
+        expect_df, got_df, check_categorical=False, check_index_type=False
+    )
 
     for column in hdf_series.keys():
 
         expect_series = pd.read_hdf(hdf_series[column])
         got_series = cudf.read_hdf(hdf_series[column])
 
-        assert_eq(expect_series, got_series)
+        assert_eq(expect_series, got_series, check_index_type=False)
 
 
 @pytest.mark.parametrize("format", ["fixed", "table"])
@@ -130,7 +132,7 @@ def test_hdf_writer(tmpdir, pdf, gdf, complib, format):
     expect = pd.read_hdf(pdf_df_fname)
     got = pd.read_hdf(gdf_df_fname)
 
-    assert_eq(expect, got)
+    assert_eq(expect, got, check_index_type=False)
 
     for column in pdf.columns:
         pdf_series_fname = tmpdir.join(column + "_" + "pdf_series.hdf")
@@ -149,4 +151,4 @@ def test_hdf_writer(tmpdir, pdf, gdf, complib, format):
         expect_series = pd.read_hdf(pdf_series_fname)
         got_series = pd.read_hdf(gdf_series_fname)
 
-        assert_eq(expect_series, got_series)
+        assert_eq(expect_series, got_series, check_index_type=False)
diff --git a/python/dask_cudf/dask_cudf/tests/test_core.py b/python/dask_cudf/dask_cudf/tests/test_core.py
index 548aca53fd5..ba1fb1882b1 100644
--- a/python/dask_cudf/dask_cudf/tests/test_core.py
+++ b/python/dask_cudf/dask_cudf/tests/test_core.py
@@ -717,7 +717,7 @@ def test_dataframe_describe():
     ddf = dgd.from_cudf(df, npartitions=4)
     pddf = dd.from_pandas(pdf, npartitions=4)
 
-    dd.assert_eq(ddf.describe(), pddf.describe(), check_less_precise=3)
+    dd.assert_eq(ddf.describe(), pddf.describe(), check_exact=False)
 
 
 def test_index_map_partitions():
diff --git a/python/dask_cudf/dask_cudf/tests/test_groupby.py b/python/dask_cudf/dask_cudf/tests/test_groupby.py
index 42ca4702987..22ba604d682 100644
--- a/python/dask_cudf/dask_cudf/tests/test_groupby.py
+++ b/python/dask_cudf/dask_cudf/tests/test_groupby.py
@@ -125,12 +125,8 @@ def test_groupby_std(func):
 @pytest.mark.parametrize(
     "func",
     [
-        pytest.param(
-            lambda df: df.groupby(["a", "b"]).x.sum(), marks=pytest.mark.xfail
-        ),
-        pytest.param(
-            lambda df: df.groupby(["a", "b"]).sum(), marks=pytest.mark.xfail
-        ),
+        pytest.param(lambda df: df.groupby(["a", "b"]).x.sum()),
+        pytest.param(lambda df: df.groupby(["a", "b"]).sum()),
         pytest.param(
             lambda df: df.groupby(["a", "b"]).agg({"x", "sum"}),
             marks=pytest.mark.xfail,
diff --git a/python/dask_cudf/dask_cudf/tests/test_reductions.py b/python/dask_cudf/dask_cudf/tests/test_reductions.py
index 61a7ae8af1c..b9f5df6e96f 100644
--- a/python/dask_cudf/dask_cudf/tests/test_reductions.py
+++ b/python/dask_cudf/dask_cudf/tests/test_reductions.py
@@ -2,12 +2,12 @@
 import pandas as pd
 import pytest
 
-import dask.dataframe as dd
-
-import cudf as gd
+from dask import dataframe as dd
 
 import dask_cudf as dgd
 
+import cudf as gd
+
 
 def _make_random_frame(nelem, npartitions=2):
     df = pd.DataFrame(
@@ -75,4 +75,4 @@ def test_rowwise_reductions(data, op):
         expected = getattr(pddf, op)(axis=1)
         got = getattr(pddf, op)(axis=1)
 
-    dd.assert_eq(expected.compute(), got.compute(), check_less_precise=7)
+    dd.assert_eq(expected.compute(), got.compute(), check_exact=False)

From 9fdbfe79c2cbdb5cc7f40a038966dc7cb41e42cd Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Tue, 23 Feb 2021 10:15:32 -0800
Subject: [PATCH 08/35] unpin pandas

---
 conda/recipes/cudf/meta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index c5f7bd34c25..bf6519bfa4e 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -35,7 +35,7 @@ requirements:
     - protobuf
     - python
     - typing_extensions
-    - pandas >=1.0,<1.2.0dev0
+    - pandas >=1.0
     - cupy >7.1.0,<9.0.0a0
     - numba >=0.49.0
     - numpy

From 27a782b96cb06f7f51db7adf07eab20d1a33d66e Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Tue, 23 Feb 2021 11:19:08 -0800
Subject: [PATCH 09/35] cleanup

---
 conda/environments/cudf_dev_cuda10.1.yml    |  2 +-
 conda/environments/cudf_dev_cuda10.2.yml    |  2 +-
 conda/environments/cudf_dev_cuda11.0.yml    |  2 +-
 python/cudf/cudf/core/column/categorical.py |  5 ++++-
 python/cudf/cudf/core/column/column.py      | 21 +++++++++++++++++----
 python/cudf/cudf/core/column/numerical.py   | 10 ++++++----
 6 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/conda/environments/cudf_dev_cuda10.1.yml b/conda/environments/cudf_dev_cuda10.1.yml
index 69d729aea0c..d8655bea3aa 100644
--- a/conda/environments/cudf_dev_cuda10.1.yml
+++ b/conda/environments/cudf_dev_cuda10.1.yml
@@ -17,7 +17,7 @@ dependencies:
   - python>=3.6,<3.8
   - numba>=0.49.0,!=0.51.0
   - numpy
-  - pandas>=1.0,<1.2.0dev0
+  - pandas>=1.0
   - pyarrow=1.0.1
   - fastavro>=0.22.9
   - notebook>=0.5.0
diff --git a/conda/environments/cudf_dev_cuda10.2.yml b/conda/environments/cudf_dev_cuda10.2.yml
index 68c2ffc6aee..f6113921323 100644
--- a/conda/environments/cudf_dev_cuda10.2.yml
+++ b/conda/environments/cudf_dev_cuda10.2.yml
@@ -17,7 +17,7 @@ dependencies:
   - python>=3.6,<3.8
   - numba>=0.49,!=0.51.0
   - numpy
-  - pandas>=1.0,<1.2.0dev0
+  - pandas>=1.0
   - pyarrow=1.0.1
   - fastavro>=0.22.9
   - notebook>=0.5.0
diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml
index 4070802e8a8..20481eea580 100644
--- a/conda/environments/cudf_dev_cuda11.0.yml
+++ b/conda/environments/cudf_dev_cuda11.0.yml
@@ -17,7 +17,7 @@ dependencies:
   - python>=3.6,<3.8
   - numba>=0.49,!=0.51.0
   - numpy
-  - pandas>=1.0,<1.2.0dev0
+  - pandas>=1.0
   - pyarrow=1.0.1
   - fastavro>=0.22.9
   - notebook>=0.5.0
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 98c0f1dcbbd..99da0f9970c 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -9,6 +9,7 @@
     Dict,
     Mapping,
     Optional,
+    Sequence,
     Tuple,
     Union,
     cast,
@@ -867,7 +868,9 @@ def set_base_data(self, value):
         else:
             super().set_base_data(value)
 
-    def _process_values_for_isin(self, values):
+    def _process_values_for_isin(
+        self, values: Sequence
+    ) -> Tuple[ColumnBase, ColumnBase]:
         lhs = self
         # We need to convert values to same type as self,
         # hence passing dtype=self.dtype
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index c5b3965d98e..a47174a9f20 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -49,12 +49,12 @@
     get_time_unit,
     is_categorical_dtype,
     is_decimal_dtype,
+    is_interval_dtype,
     is_list_dtype,
     is_numerical_dtype,
     is_scalar,
     is_string_dtype,
     is_struct_dtype,
-    is_interval_dtype,
     min_signed_type,
     min_unsigned_type,
     np_to_pa_dtype,
@@ -876,7 +876,12 @@ def isin(self, values: Sequence) -> ColumnBase:
 
         return res
 
-    def _process_values_for_isin(self, values):
+    def _process_values_for_isin(
+        self, values: Sequence
+    ) -> Tuple[ColumnBase, ColumnBase]:
+        """
+        Helper function for `isin` which pre-process `values` based on `self`.
+        """
         lhs = self
         rhs = as_column(values, nan_as_null=False)
         if lhs.null_count == len(lhs):
@@ -885,7 +890,11 @@ def _process_values_for_isin(self, values):
             rhs = rhs.astype(lhs.dtype)
         return lhs, rhs
 
-    def _isin_earlystop(self, rhs):
+    def _isin_earlystop(self, rhs: ColumnBase) -> Union[ColumnBase, None]:
+        """
+        Helper function for `isin` which determines possibility of
+        early-stopping or not.
+        """
         if self.dtype != rhs.dtype:
             if self.null_count and rhs.null_count:
                 return self.isna()
@@ -896,7 +905,11 @@ def _isin_earlystop(self, rhs):
         else:
             return None
 
-    def _obtain_isin_result(self, rhs):
+    def _obtain_isin_result(self, rhs: ColumnBase) -> ColumnBase:
+        """
+        Helper function for `isin` which merges `self` & `rhs`
+        to determine what values of `rhs` exist in `self`.
+        """
         ldf = cudf.DataFrame({"x": self, "orig_order": arange(len(self))})
         rdf = cudf.DataFrame(
             {"x": rhs, "bool": full(len(rhs), True, dtype="bool")}
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 4e07dd531ae..711815ba687 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from numbers import Number
-from typing import Any, Callable, Sequence, Union, cast
+from typing import Any, Callable, Sequence, Tuple, Union, cast
 
 import numpy as np
 import pandas as pd
@@ -248,15 +248,17 @@ def std(
     ) -> float:
         return self.reduce("std", skipna=skipna, dtype=dtype, ddof=ddof)
 
-    def _process_values_for_isin(self, values):
-        lhs = self
+    def _process_values_for_isin(
+        self, values: Sequence
+    ) -> Tuple[ColumnBase, ColumnBase]:
+        lhs = cast("cudf.core.column.ColumnBase", self)
         rhs = as_column(values, nan_as_null=False)
 
         if isinstance(rhs, NumericalColumn):
             rhs = rhs.astype(dtype=self.dtype)
 
         if lhs.null_count == len(lhs):
-            lhs = lhs.astype(rhs.dtype)
+            lhs = cast("cudf.core.column.ColumnBase", lhs.astype(rhs.dtype))
         elif rhs.null_count == len(rhs):
             rhs = rhs.astype(lhs.dtype)
 

From 3cde2efd63273cf7c075ddd8e17dce093122fe5c Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Tue, 23 Feb 2021 11:19:52 -0800
Subject: [PATCH 10/35] cleanup

---
 python/cudf/cudf/core/column/numerical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 711815ba687..f9b695e9ce3 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -258,7 +258,7 @@ def _process_values_for_isin(
             rhs = rhs.astype(dtype=self.dtype)
 
         if lhs.null_count == len(lhs):
-            lhs = cast("cudf.core.column.ColumnBase", lhs.astype(rhs.dtype))
+            lhs = lhs.astype(rhs.dtype)
         elif rhs.null_count == len(rhs):
             rhs = rhs.astype(lhs.dtype)
 

From 9a3b51ac13c5e0a999ba6de2d77149e774e43ac3 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Tue, 23 Feb 2021 11:33:29 -0800
Subject: [PATCH 11/35] copyright

---
 conda/recipes/cudf/meta.yaml                        | 2 +-
 python/cudf/cudf/core/_compat.py                    | 2 +-
 python/cudf/cudf/core/column/column.py              | 1 +
 python/cudf/cudf/core/column/datetime.py            | 1 +
 python/cudf/cudf/core/column/timedelta.py           | 1 +
 python/cudf/cudf/core/frame.py                      | 1 +
 python/cudf/cudf/core/multiindex.py                 | 3 ++-
 python/cudf/cudf/testing/testing.py                 | 2 +-
 python/cudf/cudf/tests/test_categorical.py          | 2 +-
 python/cudf/cudf/tests/test_concat.py               | 2 +-
 python/cudf/cudf/tests/test_datetime.py             | 3 ++-
 python/cudf/cudf/tests/test_dropna.py               | 3 ++-
 python/cudf/cudf/tests/test_duplicates.py           | 2 +-
 python/cudf/cudf/tests/test_groupby.py              | 2 +-
 python/cudf/cudf/tests/test_index.py                | 2 +-
 python/cudf/cudf/tests/test_indexing.py             | 2 ++
 python/cudf/cudf/tests/test_json.py                 | 2 +-
 python/cudf/cudf/tests/test_numerical.py            | 2 ++
 python/cudf/cudf/tests/test_ops.py                  | 2 ++
 python/cudf/cudf/tests/test_reductions.py           | 2 +-
 python/cudf/cudf/tests/test_repr.py                 | 3 ++-
 python/cudf/cudf/tests/test_reshape.py              | 2 ++
 python/cudf/cudf/tests/test_rolling.py              | 2 ++
 python/cudf/cudf/tests/test_series.py               | 1 +
 python/cudf/cudf/tests/test_setitem.py              | 2 +-
 python/cudf/cudf/tests/test_sorting.py              | 2 +-
 python/cudf/cudf/tests/test_stats.py                | 2 +-
 python/cudf/cudf/tests/test_string.py               | 3 ++-
 python/cudf/cudf/tests/test_timedelta.py            | 1 +
 python/dask_cudf/dask_cudf/tests/test_core.py       | 2 ++
 python/dask_cudf/dask_cudf/tests/test_groupby.py    | 2 ++
 python/dask_cudf/dask_cudf/tests/test_reductions.py | 2 ++
 32 files changed, 45 insertions(+), 18 deletions(-)

diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index bf6519bfa4e..58a1c2d4e8c 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2018, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
 {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set minor_version =  version.split('.')[0] + '.' + version.split('.')[1] %}
diff --git a/python/cudf/cudf/core/_compat.py b/python/cudf/cudf/core/_compat.py
index de19acf9ba4..e8b0259c142 100644
--- a/python/cudf/cudf/core/_compat.py
+++ b/python/cudf/cudf/core/_compat.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 import pandas as pd
 from packaging import version
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index a47174a9f20..28dd521b37c 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2018-2021, NVIDIA CORPORATION.
+
 from __future__ import annotations
 
 import builtins
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 865094d24b1..d32b3c2f8e2 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2019-2021, NVIDIA CORPORATION.
+
 from __future__ import annotations
 
 import datetime as dt
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index dcffdd4b282..82ce1f5f7a0 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
+
 from __future__ import annotations
 
 import datetime as dt
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index cf956ec2654..e8858936e83 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
+
 from __future__ import annotations
 
 import copy
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index f252ed1a9aa..19c5b827d50 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -1,4 +1,5 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+
 import itertools
 import numbers
 import pickle
diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py
index 2e49ee0bc43..2f9a78aab78 100644
--- a/python/cudf/cudf/testing/testing.py
+++ b/python/cudf/cudf/testing/testing.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py
index 67fd07dfcd8..d4dca164992 100644
--- a/python/cudf/cudf/tests/test_categorical.py
+++ b/python/cudf/cudf/tests/test_categorical.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
 import operator
 
diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py
index 3739e226cc6..d0e31a82b28 100644
--- a/python/cudf/cudf/tests/test_concat.py
+++ b/python/cudf/cudf/tests/test_concat.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
 import re
 
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index b59f76bd8bf..1d313c9f464 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -1,4 +1,5 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+
 import datetime
 import datetime as dt
 import operator
diff --git a/python/cudf/cudf/tests/test_dropna.py b/python/cudf/cudf/tests/test_dropna.py
index 0363534cdd5..92e70543cbe 100644
--- a/python/cudf/cudf/tests/test_dropna.py
+++ b/python/cudf/cudf/tests/test_dropna.py
@@ -1,4 +1,5 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+
 import numpy as np
 import pandas as pd
 import pytest
diff --git a/python/cudf/cudf/tests/test_duplicates.py b/python/cudf/cudf/tests/test_duplicates.py
index 9331fe5900f..b4a45ed001b 100644
--- a/python/cudf/cudf/tests/test_duplicates.py
+++ b/python/cudf/cudf/tests/test_duplicates.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 import numpy as np
 import pytest
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index bc3af8581b5..3542a5af537 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
 import itertools
 
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index b59e352ff87..adb6bb33763 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
 """
 Test related to Index
diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py
index b4558cec01f..6e33b1421c8 100644
--- a/python/cudf/cudf/tests/test_indexing.py
+++ b/python/cudf/cudf/tests/test_indexing.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
 from itertools import combinations
 
 import cupy
diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index fe365f4e120..791598110df 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
 import copy
 import itertools
diff --git a/python/cudf/cudf/tests/test_numerical.py b/python/cudf/cudf/tests/test_numerical.py
index 17f73121b1c..a70dd7f4024 100644
--- a/python/cudf/cudf/tests/test_numerical.py
+++ b/python/cudf/cudf/tests/test_numerical.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
 import numpy as np
 import pandas as pd
 import pytest
diff --git a/python/cudf/cudf/tests/test_ops.py b/python/cudf/cudf/tests/test_ops.py
index 981b0e833a0..8cdef19d9ba 100644
--- a/python/cudf/cudf/tests/test_ops.py
+++ b/python/cudf/cudf/tests/test_ops.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
 import numpy as np
 import pandas as pd
 import pytest
diff --git a/python/cudf/cudf/tests/test_reductions.py b/python/cudf/cudf/tests/test_reductions.py
index 323f8c62892..80a2e89bf46 100644
--- a/python/cudf/cudf/tests/test_reductions.py
+++ b/python/cudf/cudf/tests/test_reductions.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 from __future__ import division, print_function
 
diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py
index a440c50d48f..1dd3a5c1c8c 100644
--- a/python/cudf/cudf/tests/test_repr.py
+++ b/python/cudf/cudf/tests/test_repr.py
@@ -1,4 +1,5 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+
 import textwrap
 
 import cupy as cp
diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py
index 030cbe7977d..5e90c2348e4 100644
--- a/python/cudf/cudf/tests/test_reshape.py
+++ b/python/cudf/cudf/tests/test_reshape.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
 import re
 
 import numpy as np
diff --git a/python/cudf/cudf/tests/test_rolling.py b/python/cudf/cudf/tests/test_rolling.py
index 794d3be889a..c701e863c35 100644
--- a/python/cudf/cudf/tests/test_rolling.py
+++ b/python/cudf/cudf/tests/test_rolling.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
 import math
 
 import numpy as np
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index 3ffd7786057..b6210be62f3 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
+
 import operator
 import re
 from string import ascii_letters, digits
diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py
index ba0509b16d4..2d4791f541c 100644
--- a/python/cudf/cudf/tests/test_setitem.py
+++ b/python/cudf/cudf/tests/test_setitem.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
 import numpy as np
 import pandas as pd
diff --git a/python/cudf/cudf/tests/test_sorting.py b/python/cudf/cudf/tests/test_sorting.py
index 7c4cfee3f75..e30194e9eda 100644
--- a/python/cudf/cudf/tests/test_sorting.py
+++ b/python/cudf/cudf/tests/test_sorting.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
 from itertools import product
 
diff --git a/python/cudf/cudf/tests/test_stats.py b/python/cudf/cudf/tests/test_stats.py
index 2f0b51ba377..e8483e44462 100644
--- a/python/cudf/cudf/tests/test_stats.py
+++ b/python/cudf/cudf/tests/test_stats.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
 import re
 
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index 997249e3140..f98f897ef72 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -1,4 +1,5 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+
 import re
 from contextlib import ExitStack as does_not_raise
 from sys import getsizeof
diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py
index 90850ff0648..d55bc533ba8 100644
--- a/python/cudf/cudf/tests/test_timedelta.py
+++ b/python/cudf/cudf/tests/test_timedelta.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
+
 import datetime
 import operator
 import re
diff --git a/python/dask_cudf/dask_cudf/tests/test_core.py b/python/dask_cudf/dask_cudf/tests/test_core.py
index ba1fb1882b1..aebdb9fe5b9 100644
--- a/python/dask_cudf/dask_cudf/tests/test_core.py
+++ b/python/dask_cudf/dask_cudf/tests/test_core.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
 import random
 
 import cupy as cp
diff --git a/python/dask_cudf/dask_cudf/tests/test_groupby.py b/python/dask_cudf/dask_cudf/tests/test_groupby.py
index 22ba604d682..2bb80b85568 100644
--- a/python/dask_cudf/dask_cudf/tests/test_groupby.py
+++ b/python/dask_cudf/dask_cudf/tests/test_groupby.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
 import numpy as np
 import pandas as pd
 import pytest
diff --git a/python/dask_cudf/dask_cudf/tests/test_reductions.py b/python/dask_cudf/dask_cudf/tests/test_reductions.py
index b9f5df6e96f..4da81e4f86c 100644
--- a/python/dask_cudf/dask_cudf/tests/test_reductions.py
+++ b/python/dask_cudf/dask_cudf/tests/test_reductions.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
 import numpy as np
 import pandas as pd
 import pytest

From 7a534b072c034a292232b96be8b443893f7c6d30 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Tue, 23 Feb 2021 16:19:37 -0800
Subject: [PATCH 12/35] pin pandas upper bound version

---
 conda/environments/cudf_dev_cuda10.1.yml | 2 +-
 conda/environments/cudf_dev_cuda10.2.yml | 2 +-
 conda/environments/cudf_dev_cuda11.0.yml | 2 +-
 conda/recipes/cudf/meta.yaml             | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/conda/environments/cudf_dev_cuda10.1.yml b/conda/environments/cudf_dev_cuda10.1.yml
index d8655bea3aa..993f64261d4 100644
--- a/conda/environments/cudf_dev_cuda10.1.yml
+++ b/conda/environments/cudf_dev_cuda10.1.yml
@@ -17,7 +17,7 @@ dependencies:
   - python>=3.6,<3.8
   - numba>=0.49.0,!=0.51.0
   - numpy
-  - pandas>=1.0
+  - pandas>=1.0,<1.3.0dev0
   - pyarrow=1.0.1
   - fastavro>=0.22.9
   - notebook>=0.5.0
diff --git a/conda/environments/cudf_dev_cuda10.2.yml b/conda/environments/cudf_dev_cuda10.2.yml
index f6113921323..e69289c51da 100644
--- a/conda/environments/cudf_dev_cuda10.2.yml
+++ b/conda/environments/cudf_dev_cuda10.2.yml
@@ -17,7 +17,7 @@ dependencies:
   - python>=3.6,<3.8
   - numba>=0.49,!=0.51.0
   - numpy
-  - pandas>=1.0
+  - pandas>=1.0,<1.3.0dev0
   - pyarrow=1.0.1
   - fastavro>=0.22.9
   - notebook>=0.5.0
diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml
index 20481eea580..aea87ba423c 100644
--- a/conda/environments/cudf_dev_cuda11.0.yml
+++ b/conda/environments/cudf_dev_cuda11.0.yml
@@ -17,7 +17,7 @@ dependencies:
   - python>=3.6,<3.8
   - numba>=0.49,!=0.51.0
   - numpy
-  - pandas>=1.0
+  - pandas>=1.0,<1.3.0dev0
   - pyarrow=1.0.1
   - fastavro>=0.22.9
   - notebook>=0.5.0
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 58a1c2d4e8c..9afc7094f27 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -35,7 +35,7 @@ requirements:
     - protobuf
     - python
     - typing_extensions
-    - pandas >=1.0
+    - pandas >=1.0,<1.3.0dev0
     - cupy >7.1.0,<9.0.0a0
     - numba >=0.49.0
     - numpy

From 81d9b5d853337d30ad4b7b25cef4538daccc2b00 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Tue, 23 Feb 2021 16:31:31 -0800
Subject: [PATCH 13/35] use only minor version

---
 python/cudf/cudf/core/_compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/_compat.py b/python/cudf/cudf/core/_compat.py
index e8b0259c142..0fedfcabb46 100644
--- a/python/cudf/cudf/core/_compat.py
+++ b/python/cudf/cudf/core/_compat.py
@@ -6,4 +6,4 @@
 PANDAS_VERSION = version.parse(pd.__version__)
 PANDAS_GE_100 = PANDAS_VERSION >= version.parse("1.0")
 PANDAS_GE_110 = PANDAS_VERSION >= version.parse("1.1")
-PANDAS_GE_120 = PANDAS_VERSION >= version.parse("1.2.0")
+PANDAS_GE_120 = PANDAS_VERSION >= version.parse("1.2")

From c5b83a21efc5e51fb550d114c559b5dfdfd0f9c0 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 24 Feb 2021 08:33:04 -0800
Subject: [PATCH 14/35] use functools for finding union

---
 python/cudf/cudf/core/frame.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index e8858936e83..dedefeaf9a2 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -339,9 +339,9 @@ def _concat(
                 np.intersect1d, all_columns_list
             )
             # get column names not present in all objs
-            union_of_columns = objs[0].columns
-            for obj in objs[1:]:
-                union_of_columns = union_of_columns.union(obj.columns)
+            union_of_columns = functools.reduce(
+                pd.Index.union, [obj.columns for obj in objs]
+            )
             non_intersecting_columns = union_of_columns.symmetric_difference(
                 intersecting_columns
             )

From 5e6855ddf80ee27b7c395276e1c132988776ffee Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 24 Feb 2021 10:10:19 -0800
Subject: [PATCH 15/35] add utility for creating a pandas series and refactor
 imports in test_dataframe

---
 python/cudf/cudf/core/dataframe.py        |    8 +-
 python/cudf/cudf/tests/test_dataframe.py  | 1388 +++++++++++----------
 python/cudf/cudf/tests/test_datetime.py   |    8 +-
 python/cudf/cudf/tests/test_dropna.py     |    2 +-
 python/cudf/cudf/tests/test_duplicates.py |    2 +-
 python/cudf/cudf/tests/test_index.py      |    2 +-
 python/cudf/cudf/tests/test_repr.py       |    4 +-
 python/cudf/cudf/tests/test_rolling.py    |    4 +-
 python/cudf/cudf/tests/test_series.py     |    6 +-
 python/cudf/cudf/tests/test_stats.py      |    7 +-
 python/cudf/cudf/utils/utils.py           |   18 +-
 11 files changed, 739 insertions(+), 710 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index cea22725002..a22fdf65f9f 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -584,10 +584,8 @@ def deserialize(cls, header, frames):
     @property
     def dtypes(self):
         """Return the dtypes in this object."""
-        return pd.Series(
-            [x.dtype for x in self._data.columns],
-            index=self._data.names,
-            dtype=None if len(self._data.names) else "float64",
+        return cudf.utils.utils.create_pandas_series(
+            data=[x.dtype for x in self._data.columns], index=self._data.names,
         )
 
     @property
@@ -692,7 +690,7 @@ def __getitem__(self, arg):
         elif can_convert_to_column(arg):
             mask = arg
             if is_list_like(mask):
-                mask = pd.Series(mask, dtype=None if len(mask) else "float64")
+                mask = cudf.utils.utils.create_pandas_series(data=mask)
             if mask.dtype == "bool":
                 return self._apply_boolean_mask(mask)
             else:
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index f95913e5cee..d792c62a247 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -1,11 +1,14 @@
 # Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
 import array as arr
+import datetime
 import io
 import operator
 import random
 import re
+import string
 import textwrap
+from copy import copy
 
 import cupy
 import numpy as np
@@ -14,7 +17,7 @@
 import pytest
 from numba import cuda
 
-import cudf as gd
+import cudf
 from cudf.core._compat import PANDAS_GE_110
 from cudf.core.column import column
 from cudf.tests import utils
@@ -37,7 +40,7 @@ def test_init_via_list_of_tuples():
     ]
 
     pdf = pd.DataFrame(data)
-    gdf = gd.DataFrame(data)
+    gdf = cudf.DataFrame(data)
 
     assert_eq(pdf, gdf)
 
@@ -70,7 +73,7 @@ def test_init_via_list_of_empty_tuples(rows):
     data = [()] * rows
 
     pdf = pd.DataFrame(data)
-    gdf = gd.DataFrame(data)
+    gdf = cudf.DataFrame(data)
 
     assert_eq(
         pdf,
@@ -109,15 +112,15 @@ def test_init_via_list_of_empty_tuples(rows):
 )
 def test_init_from_series_align(dict_of_series):
     pdf = pd.DataFrame(dict_of_series)
-    gdf = gd.DataFrame(dict_of_series)
+    gdf = cudf.DataFrame(dict_of_series)
 
     assert_eq(pdf, gdf)
 
     for key in dict_of_series:
         if isinstance(dict_of_series[key], pd.Series):
-            dict_of_series[key] = gd.Series(dict_of_series[key])
+            dict_of_series[key] = cudf.Series(dict_of_series[key])
 
-    gdf = gd.DataFrame(dict_of_series)
+    gdf = cudf.DataFrame(dict_of_series)
 
     assert_eq(pdf, gdf)
 
@@ -145,7 +148,7 @@ def test_init_from_series_align(dict_of_series):
 )
 def test_init_from_series_align_nonunique(dict_of_series, expectation):
     with expectation:
-        gdf = gd.DataFrame(dict_of_series)
+        gdf = cudf.DataFrame(dict_of_series)
 
     if expectation == does_not_raise():
         pdf = pd.DataFrame(dict_of_series)
@@ -160,10 +163,10 @@ def test_init_unaligned_with_index():
         },
         index=[7, 8, 9],
     )
-    gdf = gd.DataFrame(
+    gdf = cudf.DataFrame(
         {
-            "a": gd.Series([1.0, 2.0, 3.0], index=[4, 5, 6]),
-            "b": gd.Series([1.0, 2.0, 3.0], index=[1, 2, 3]),
+            "a": cudf.Series([1.0, 2.0, 3.0], index=[4, 5, 6]),
+            "b": cudf.Series([1.0, 2.0, 3.0], index=[1, 2, 3]),
         },
         index=[7, 8, 9],
     )
@@ -174,7 +177,7 @@ def test_init_unaligned_with_index():
 def test_series_basic():
     # Make series from buffer
     a1 = np.arange(10, dtype=np.float64)
-    series = gd.Series(a1)
+    series = cudf.Series(a1)
     assert len(series) == 10
     np.testing.assert_equal(series.to_array(), np.hstack([a1]))
 
@@ -183,8 +186,8 @@ def test_series_from_cupy_scalars():
     data = [0.1, 0.2, 0.3]
     data_np = np.array(data)
     data_cp = cupy.array(data)
-    s_np = gd.Series([data_np[0], data_np[2]])
-    s_cp = gd.Series([data_cp[0], data_cp[2]])
+    s_np = cudf.Series([data_np[0], data_np[2]])
+    s_cp = cudf.Series([data_cp[0], data_cp[2]])
     assert_eq(s_np, s_cp)
 
 
@@ -196,7 +199,7 @@ def test_append_index(a, b):
     df["a"] = a
     df["b"] = b
 
-    gdf = gd.DataFrame()
+    gdf = cudf.DataFrame()
     gdf["a"] = a
     gdf["b"] = b
 
@@ -218,17 +221,17 @@ def test_series_init_none():
 
     # test for creating empty series
     # 1: without initializing
-    sr1 = gd.Series()
+    sr1 = cudf.Series()
     got = sr1.to_string()
-    print(got)
+
     expect = "Series([], dtype: float64)"
     # values should match despite whitespace difference
     assert got.split() == expect.split()
 
     # 2: Using `None` as an initializer
-    sr2 = gd.Series(None)
+    sr2 = cudf.Series(None)
     got = sr2.to_string()
-    print(got)
+
     expect = "Series([], dtype: float64)"
     # values should match despite whitespace difference
     assert got.split() == expect.split()
@@ -236,7 +239,7 @@ def test_series_init_none():
 
 def test_dataframe_basic():
     np.random.seed(0)
-    df = gd.DataFrame()
+    df = cudf.DataFrame()
 
     # Populate with cuda memory
     df["keys"] = np.arange(10, dtype=np.float64)
@@ -251,12 +254,12 @@ def test_dataframe_basic():
     assert tuple(df.columns) == ("keys", "vals")
 
     # Make another dataframe
-    df2 = gd.DataFrame()
+    df2 = cudf.DataFrame()
     df2["keys"] = np.array([123], dtype=np.float64)
     df2["vals"] = np.array([321], dtype=np.float64)
 
     # Concat
-    df = gd.concat([df, df2])
+    df = cudf.concat([df, df2])
     assert len(df) == 11
 
     hkeys = np.asarray(np.arange(10, dtype=np.float64).tolist() + [123])
@@ -270,21 +273,19 @@ def test_dataframe_basic():
 
     expect = np.vstack([hkeys, hvals]).T
 
-    print(expect)
-    print(mat)
     np.testing.assert_equal(mat, expect)
 
     # test dataframe with tuple name
-    df_tup = gd.DataFrame()
+    df_tup = cudf.DataFrame()
     data = np.arange(10)
     df_tup[(1, "foobar")] = data
     np.testing.assert_equal(data, df_tup[(1, "foobar")].to_array())
 
-    df = gd.DataFrame(pd.DataFrame({"a": [1, 2, 3], "c": ["a", "b", "c"]}))
+    df = cudf.DataFrame(pd.DataFrame({"a": [1, 2, 3], "c": ["a", "b", "c"]}))
     pdf = pd.DataFrame(pd.DataFrame({"a": [1, 2, 3], "c": ["a", "b", "c"]}))
     assert_eq(df, pdf)
 
-    gdf = gd.DataFrame({"id": [0, 1], "val": [None, None]})
+    gdf = cudf.DataFrame({"id": [0, 1], "val": [None, None]})
     gdf["val"] = gdf["val"].astype("int")
 
     assert gdf["val"].isnull().all()
@@ -305,7 +306,7 @@ def test_dataframe_basic():
 @pytest.mark.parametrize("inplace", [True, False])
 def test_dataframe_drop_columns(pdf, columns, inplace):
     pdf = pdf.copy()
-    gdf = gd.from_pandas(pdf)
+    gdf = cudf.from_pandas(pdf)
 
     expected = pdf.drop(columns=columns, inplace=inplace)
     actual = gdf.drop(columns=columns, inplace=inplace)
@@ -333,7 +334,7 @@ def test_dataframe_drop_columns(pdf, columns, inplace):
 @pytest.mark.parametrize("inplace", [True, False])
 def test_dataframe_drop_labels_axis_0(pdf, labels, inplace):
     pdf = pdf.copy()
-    gdf = gd.from_pandas(pdf)
+    gdf = cudf.from_pandas(pdf)
 
     expected = pdf.drop(labels=labels, axis=0, inplace=inplace)
     actual = gdf.drop(labels=labels, axis=0, inplace=inplace)
@@ -361,7 +362,7 @@ def test_dataframe_drop_labels_axis_0(pdf, labels, inplace):
 @pytest.mark.parametrize("inplace", [True, False])
 def test_dataframe_drop_index(pdf, index, inplace):
     pdf = pdf.copy()
-    gdf = gd.from_pandas(pdf)
+    gdf = cudf.from_pandas(pdf)
 
     expected = pdf.drop(index=index, inplace=inplace)
     actual = gdf.drop(index=index, inplace=inplace)
@@ -426,7 +427,7 @@ def test_dataframe_drop_index(pdf, index, inplace):
 @pytest.mark.parametrize("inplace", [True, False])
 def test_dataframe_drop_multiindex(pdf, index, level, inplace):
     pdf = pdf.copy()
-    gdf = gd.from_pandas(pdf)
+    gdf = cudf.from_pandas(pdf)
 
     expected = pdf.drop(index=index, inplace=inplace, level=level)
     actual = gdf.drop(index=index, inplace=inplace, level=level)
@@ -453,7 +454,7 @@ def test_dataframe_drop_multiindex(pdf, index, level, inplace):
 @pytest.mark.parametrize("inplace", [True, False])
 def test_dataframe_drop_labels_axis_1(pdf, labels, inplace):
     pdf = pdf.copy()
-    gdf = gd.from_pandas(pdf)
+    gdf = cudf.from_pandas(pdf)
 
     expected = pdf.drop(labels=labels, axis=1, inplace=inplace)
     actual = gdf.drop(labels=labels, axis=1, inplace=inplace)
@@ -466,7 +467,7 @@ def test_dataframe_drop_labels_axis_1(pdf, labels, inplace):
 
 
 def test_dataframe_drop_error():
-    df = gd.DataFrame({"a": [1], "b": [2], "c": [3]})
+    df = cudf.DataFrame({"a": [1], "b": [2], "c": [3]})
     pdf = df.to_pandas()
 
     assert_exceptions_equal(
@@ -511,7 +512,7 @@ def test_dataframe_drop_error():
 
 
 def test_dataframe_drop_raises():
-    df = gd.DataFrame(
+    df = cudf.DataFrame(
         {"a": [1, 2, 3], "c": [10, 20, 30]}, index=["x", "y", "z"]
     )
     pdf = df.to_pandas()
@@ -556,7 +557,7 @@ def test_dataframe_drop_raises():
 
 
 def test_dataframe_column_add_drop_via_setitem():
-    df = gd.DataFrame()
+    df = cudf.DataFrame()
     data = np.asarray(range(10))
     df["a"] = data
     df["b"] = data
@@ -573,7 +574,7 @@ def test_dataframe_column_set_via_attr():
     data_0 = np.asarray([0, 2, 4, 5])
     data_1 = np.asarray([1, 4, 2, 3])
     data_2 = np.asarray([2, 0, 3, 0])
-    df = gd.DataFrame({"a": data_0, "b": data_1, "c": data_2})
+    df = cudf.DataFrame({"a": data_0, "b": data_1, "c": data_2})
 
     for i in range(10):
         df.c = df.a
@@ -586,7 +587,7 @@ def test_dataframe_column_set_via_attr():
 
 
 def test_dataframe_column_drop_via_attr():
-    df = gd.DataFrame({"a": []})
+    df = cudf.DataFrame({"a": []})
 
     with pytest.raises(AttributeError):
         del df.a
@@ -597,7 +598,7 @@ def test_dataframe_column_drop_via_attr():
 @pytest.mark.parametrize("axis", [0, "index"])
 def test_dataframe_index_rename(axis):
     pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
     expect = pdf.rename(mapper={1: 5, 2: 6}, axis=axis)
     got = gdf.rename(mapper={1: 5, 2: 6}, axis=axis)
@@ -621,7 +622,7 @@ def test_dataframe_index_rename(axis):
 
 
 def test_dataframe_MI_rename():
-    gdf = gd.DataFrame(
+    gdf = cudf.DataFrame(
         {"a": np.arange(10), "b": np.arange(10), "c": np.arange(10)}
     )
     gdg = gdf.groupby(["a", "b"]).count()
@@ -636,7 +637,7 @@ def test_dataframe_MI_rename():
 @pytest.mark.parametrize("axis", [1, "columns"])
 def test_dataframe_column_rename(axis):
     pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
     expect = pdf.rename(mapper=lambda name: 2 * name, axis=axis)
     got = gdf.rename(mapper=lambda name: 2 * name, axis=axis)
@@ -659,7 +660,7 @@ def test_dataframe_pop():
     pdf = pd.DataFrame(
         {"a": [1, 2, 3], "b": ["x", "y", "z"], "c": [7.0, 8.0, 9.0]}
     )
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
     # Test non-existing column error
     with pytest.raises(KeyError) as raises:
@@ -686,7 +687,7 @@ def test_dataframe_pop():
 
     # check empty dataframe edge case
     empty_pdf = pd.DataFrame(columns=["a", "b"])
-    empty_gdf = gd.DataFrame(columns=["a", "b"])
+    empty_gdf = cudf.DataFrame(columns=["a", "b"])
     pb = empty_pdf.pop("b")
     gb = empty_gdf.pop("b")
     assert len(pb) == len(gb)
@@ -695,7 +696,7 @@ def test_dataframe_pop():
 
 @pytest.mark.parametrize("nelem", [0, 3, 100, 1000])
 def test_dataframe_astype(nelem):
-    df = gd.DataFrame()
+    df = cudf.DataFrame()
     data = np.asarray(range(nelem), dtype=np.int32)
     df["a"] = data
     assert df["a"].dtype is np.dtype(np.int32)
@@ -706,7 +707,7 @@ def test_dataframe_astype(nelem):
 
 @pytest.mark.parametrize("nelem", [0, 100])
 def test_index_astype(nelem):
-    df = gd.DataFrame()
+    df = cudf.DataFrame()
     data = np.asarray(range(nelem), dtype=np.int32)
     df["a"] = data
     assert df.index.dtype is np.dtype(np.int64)
@@ -725,13 +726,15 @@ def test_dataframe_to_string():
     pd.options.display.max_rows = 5
     pd.options.display.max_columns = 8
     # Test basic
-    df = gd.DataFrame({"a": [1, 2, 3, 4, 5, 6], "b": [11, 12, 13, 14, 15, 16]})
+    df = cudf.DataFrame(
+        {"a": [1, 2, 3, 4, 5, 6], "b": [11, 12, 13, 14, 15, 16]}
+    )
     string = str(df)
-    print(string)
+
     assert string.splitlines()[-1] == "[6 rows x 2 columns]"
 
     # Test skipped columns
-    df = gd.DataFrame(
+    df = cudf.DataFrame(
         {
             "a": [1, 2, 3, 4, 5, 6],
             "b": [11, 12, 13, 14, 15, 16],
@@ -740,17 +743,19 @@ def test_dataframe_to_string():
         }
     )
     string = df.to_string()
-    print(string)
+
     assert string.splitlines()[-1] == "[6 rows x 4 columns]"
 
     # Test masked
-    df = gd.DataFrame({"a": [1, 2, 3, 4, 5, 6], "b": [11, 12, 13, 14, 15, 16]})
+    df = cudf.DataFrame(
+        {"a": [1, 2, 3, 4, 5, 6], "b": [11, 12, 13, 14, 15, 16]}
+    )
 
     data = np.arange(6)
-    mask = np.zeros(1, dtype=gd.utils.utils.mask_dtype)
+    mask = np.zeros(1, dtype=cudf.utils.utils.mask_dtype)
     mask[0] = 0b00101101
 
-    masked = gd.Series.from_masked_array(data, mask)
+    masked = cudf.Series.from_masked_array(data, mask)
     assert masked.null_count == 2
     df["c"] = masked
 
@@ -766,11 +771,11 @@ def test_dataframe_to_string():
     # null position is correct
     for i in range(len(values)):
         if i not in validids:
-            assert values[i] is gd.NA
+            assert values[i] is cudf.NA
 
     pd.options.display.max_rows = 10
     got = df.to_string()
-    print(got)
+
     expect = """
 a b  c
 0 1 11 0
@@ -787,12 +792,12 @@ def test_dataframe_to_string():
 def test_dataframe_to_string_wide(monkeypatch):
     monkeypatch.setenv("COLUMNS", "79")
     # Test basic
-    df = gd.DataFrame()
+    df = cudf.DataFrame()
     for i in range(100):
         df["a{}".format(i)] = list(range(3))
     pd.options.display.max_columns = 0
     got = df.to_string()
-    print(got)
+
     expect = """
     a0  a1  a2  a3  a4  a5  a6  a7 ...  a92 a93 a94 a95 a96 a97 a98 a99
 0    0   0   0   0   0   0   0   0 ...    0   0   0   0   0   0   0   0
@@ -806,9 +811,9 @@ def test_dataframe_to_string_wide(monkeypatch):
 
 def test_dataframe_empty_to_string():
     # Test for printing empty dataframe
-    df = gd.DataFrame()
+    df = cudf.DataFrame()
     got = df.to_string()
-    print(got)
+
     expect = "Empty DataFrame\nColumns: []\nIndex: []\n"
     # values should match despite whitespace difference
     assert got.split() == expect.split()
@@ -816,11 +821,11 @@ def test_dataframe_empty_to_string():
 
 def test_dataframe_emptycolumns_to_string():
     # Test for printing dataframe having empty columns
-    df = gd.DataFrame()
+    df = cudf.DataFrame()
     df["a"] = []
     df["b"] = []
     got = df.to_string()
-    print(got)
+
     expect = "Empty DataFrame\nColumns: [a, b]\nIndex: []\n"
     # values should match despite whitespace difference
     assert got.split() == expect.split()
@@ -828,14 +833,12 @@ def test_dataframe_emptycolumns_to_string():
 
 def test_dataframe_copy():
     # Test for copying the dataframe using python copy pkg
-    from copy import copy
-
-    df = gd.DataFrame()
+    df = cudf.DataFrame()
     df["a"] = [1, 2, 3]
     df2 = copy(df)
     df2["b"] = [4, 5, 6]
     got = df.to_string()
-    print(got)
+
     expect = """
      a
 0    1
@@ -848,12 +851,12 @@ def test_dataframe_copy():
 
 def test_dataframe_copy_shallow():
     # Test for copy dataframe using class method
-    df = gd.DataFrame()
+    df = cudf.DataFrame()
     df["a"] = [1, 2, 3]
     df2 = df.copy()
     df2["b"] = [4, 2, 3]
     got = df.to_string()
-    print(got)
+
     expect = """
      a
 0    1
@@ -868,7 +871,9 @@ def test_dataframe_dtypes():
     dtypes = pd.Series(
         [np.int32, np.float32, np.float64], index=["c", "a", "b"]
     )
-    df = gd.DataFrame({k: np.ones(10, dtype=v) for k, v in dtypes.iteritems()})
+    df = cudf.DataFrame(
+        {k: np.ones(10, dtype=v) for k, v in dtypes.iteritems()}
+    )
     assert df.dtypes.equals(dtypes)
 
 
@@ -879,7 +884,7 @@ def test_dataframe_add_col_to_object_dataframe():
 
     data = {k: v for (k, v) in zip(cols, [["a"] for _ in cols])}
 
-    gdf = gd.DataFrame(data)
+    gdf = cudf.DataFrame(data)
     gdf = gdf[:0]
 
     assert gdf.dtypes.equals(df.dtypes)
@@ -892,7 +897,7 @@ def test_dataframe_add_col_to_object_dataframe():
 
 
 def test_dataframe_dir_and_getattr():
-    df = gd.DataFrame(
+    df = cudf.DataFrame(
         {
             "a": np.ones(10),
             "b": np.ones(10),
@@ -914,13 +919,13 @@ def test_dataframe_dir_and_getattr():
 
 @pytest.mark.parametrize("order", ["C", "F"])
 def test_empty_dataframe_as_gpu_matrix(order):
-    df = gd.DataFrame()
+    df = cudf.DataFrame()
 
     # Check fully empty dataframe.
     mat = df.as_gpu_matrix(order=order).copy_to_host()
     assert mat.shape == (0, 0)
 
-    df = gd.DataFrame()
+    df = cudf.DataFrame()
     nelem = 123
     for k in "abc":
         df[k] = np.random.random(nelem)
@@ -932,7 +937,7 @@ def test_empty_dataframe_as_gpu_matrix(order):
 
 @pytest.mark.parametrize("order", ["C", "F"])
 def test_dataframe_as_gpu_matrix(order):
-    df = gd.DataFrame()
+    df = cudf.DataFrame()
 
     nelem = 123
     for k in "abcd":
@@ -953,7 +958,7 @@ def test_dataframe_as_gpu_matrix(order):
 
 
 def test_dataframe_as_gpu_matrix_null_values():
-    df = gd.DataFrame()
+    df = cudf.DataFrame()
 
     nelem = 123
     na = -10000
@@ -989,7 +994,7 @@ def test_dataframe_append_empty():
             "value": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
         }
     )
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
     gdf["newcol"] = 100
     pdf["newcol"] = 100
@@ -1006,30 +1011,30 @@ def test_dataframe_setitem_from_masked_object():
     np.random.shuffle(mask)
     ary[mask] = np.nan
 
-    test1_null = gd.Series(ary, nan_as_null=True)
+    test1_null = cudf.Series(ary, nan_as_null=True)
     assert test1_null.nullable
     assert test1_null.null_count == 20
-    test1_nan = gd.Series(ary, nan_as_null=False)
+    test1_nan = cudf.Series(ary, nan_as_null=False)
     assert test1_nan.null_count == 0
 
-    test2_null = gd.DataFrame.from_pandas(
+    test2_null = cudf.DataFrame.from_pandas(
         pd.DataFrame({"a": ary}), nan_as_null=True
     )
     assert test2_null["a"].nullable
     assert test2_null["a"].null_count == 20
-    test2_nan = gd.DataFrame.from_pandas(
+    test2_nan = cudf.DataFrame.from_pandas(
         pd.DataFrame({"a": ary}), nan_as_null=False
     )
     assert test2_nan["a"].null_count == 0
 
     gpu_ary = cupy.asarray(ary)
-    test3_null = gd.Series(gpu_ary, nan_as_null=True)
+    test3_null = cudf.Series(gpu_ary, nan_as_null=True)
     assert test3_null.nullable
     assert test3_null.null_count == 20
-    test3_nan = gd.Series(gpu_ary, nan_as_null=False)
+    test3_nan = cudf.Series(gpu_ary, nan_as_null=False)
     assert test3_nan.null_count == 0
 
-    test4 = gd.DataFrame()
+    test4 = cudf.DataFrame()
     lst = [1, 2, None, 4, 5, 6, None, 8, 9]
     test4["lst"] = lst
     assert test4["lst"].nullable
@@ -1041,7 +1046,7 @@ def test_dataframe_append_to_empty():
     pdf["a"] = []
     pdf["b"] = [1, 2, 3]
 
-    gdf = gd.DataFrame()
+    gdf = cudf.DataFrame()
     gdf["a"] = []
     gdf["b"] = [1, 2, 3]
 
@@ -1049,7 +1054,7 @@ def test_dataframe_append_to_empty():
 
 
 def test_dataframe_setitem_index_len1():
-    gdf = gd.DataFrame()
+    gdf = cudf.DataFrame()
     gdf["a"] = [1]
     gdf["b"] = gdf.index._values
 
@@ -1057,7 +1062,7 @@ def test_dataframe_setitem_index_len1():
 
 
 def test_assign():
-    gdf = gd.DataFrame({"x": [1, 2, 3]})
+    gdf = cudf.DataFrame({"x": [1, 2, 3]})
     gdf2 = gdf.assign(y=gdf.x + 1)
     assert list(gdf.columns) == ["x"]
     assert list(gdf2.columns) == ["x", "y"]
@@ -1067,7 +1072,7 @@ def test_assign():
 
 @pytest.mark.parametrize("nrows", [1, 8, 100, 1000])
 def test_dataframe_hash_columns(nrows):
-    gdf = gd.DataFrame()
+    gdf = cudf.DataFrame()
     data = np.asarray(range(nrows))
     data[0] = data[-1]  # make first and last the same
     gdf["a"] = data
@@ -1085,7 +1090,7 @@ def test_dataframe_hash_columns(nrows):
     out_one = cupy.asnumpy(gdf.hash_columns(["a"]))
     # First matches last
     assert out_one[0] == out_one[-1]
-    # Equivalent to the gd.Series.hash_values()
+    # Equivalent to the cudf.Series.hash_values()
     np.testing.assert_array_equal(cupy.asnumpy(gdf.a.hash_values()), out_one)
 
 
@@ -1094,7 +1099,7 @@ def test_dataframe_hash_columns(nrows):
 @pytest.mark.parametrize("nkeys", [1, 2])
 def test_dataframe_hash_partition(nrows, nparts, nkeys):
     np.random.seed(123)
-    gdf = gd.DataFrame()
+    gdf = cudf.DataFrame()
     keycols = []
     for i in range(nkeys):
         keyname = "key{}".format(i)
@@ -1108,7 +1113,7 @@ def test_dataframe_hash_partition(nrows, nparts, nkeys):
     # Must have correct number of partitions
     assert len(got) == nparts
     # All partitions must be DataFrame type
-    assert all(isinstance(p, gd.DataFrame) for p in got)
+    assert all(isinstance(p, cudf.DataFrame) for p in got)
     # Check that all partitions have unique keys
     part_unique_keys = set()
     for p in got:
@@ -1123,7 +1128,7 @@ def test_dataframe_hash_partition(nrows, nparts, nkeys):
 
 @pytest.mark.parametrize("nrows", [3, 10, 50])
 def test_dataframe_hash_partition_masked_value(nrows):
-    gdf = gd.DataFrame()
+    gdf = cudf.DataFrame()
     gdf["key"] = np.arange(nrows)
     gdf["val"] = np.arange(nrows) + 100
     bitmask = utils.random_bitmask(nrows)
@@ -1144,7 +1149,7 @@ def test_dataframe_hash_partition_masked_value(nrows):
 
 @pytest.mark.parametrize("nrows", [3, 10, 50])
 def test_dataframe_hash_partition_masked_keys(nrows):
-    gdf = gd.DataFrame()
+    gdf = cudf.DataFrame()
     gdf["key"] = np.arange(nrows)
     gdf["val"] = np.arange(nrows) + 100
     bitmask = utils.random_bitmask(nrows)
@@ -1167,14 +1172,14 @@ def test_dataframe_hash_partition_masked_keys(nrows):
 @pytest.mark.parametrize("keep_index", [True, False])
 def test_dataframe_hash_partition_keep_index(keep_index):
 
-    gdf = gd.DataFrame(
+    gdf = cudf.DataFrame(
         {"val": [1, 2, 3, 4], "key": [3, 2, 1, 4]}, index=[4, 3, 2, 1]
     )
 
-    expected_df1 = gd.DataFrame(
+    expected_df1 = cudf.DataFrame(
         {"val": [1], "key": [3]}, index=[4] if keep_index else None
     )
-    expected_df2 = gd.DataFrame(
+    expected_df2 = cudf.DataFrame(
         {"val": [2, 3, 4], "key": [2, 1, 4]},
         index=[3, 2, 1] if keep_index else range(1, 4),
     )
@@ -1187,7 +1192,7 @@ def test_dataframe_hash_partition_keep_index(keep_index):
 
 
 def test_dataframe_hash_partition_empty():
-    gdf = gd.DataFrame({"val": [1, 2], "key": [3, 2]}, index=["a", "b"])
+    gdf = cudf.DataFrame({"val": [1, 2], "key": [3, 2]}, index=["a", "b"])
     parts = gdf.iloc[:0].partition_by_hash(["key"], nparts=3)
     assert len(parts) == 3
     for part in parts:
@@ -1201,33 +1206,33 @@ def test_dataframe_concat_different_numerical_columns(dtype1, dtype2):
     df2 = pd.DataFrame(dict(x=pd.Series(np.arange(5)).astype(dtype2)))
     if dtype1 != dtype2 and "datetime" in dtype1 or "datetime" in dtype2:
         with pytest.raises(TypeError):
-            gd.concat([df1, df2])
+            cudf.concat([df1, df2])
     else:
         pres = pd.concat([df1, df2])
-        gres = gd.concat([gd.from_pandas(df1), gd.from_pandas(df2)])
-        assert_eq(gd.from_pandas(pres), gres)
+        gres = cudf.concat([cudf.from_pandas(df1), cudf.from_pandas(df2)])
+        assert_eq(cudf.from_pandas(pres), gres)
 
 
 def test_dataframe_concat_different_column_types():
-    df1 = gd.Series([42], dtype=np.float)
-    df2 = gd.Series(["a"], dtype="category")
+    df1 = cudf.Series([42], dtype=np.float)
+    df2 = cudf.Series(["a"], dtype="category")
     with pytest.raises(ValueError):
-        gd.concat([df1, df2])
+        cudf.concat([df1, df2])
 
-    df2 = gd.Series(["a string"])
+    df2 = cudf.Series(["a string"])
     with pytest.raises(TypeError):
-        gd.concat([df1, df2])
+        cudf.concat([df1, df2])
 
 
 @pytest.mark.parametrize(
-    "df_1", [gd.DataFrame({"a": [1, 2], "b": [1, 3]}), gd.DataFrame({})]
+    "df_1", [cudf.DataFrame({"a": [1, 2], "b": [1, 3]}), cudf.DataFrame({})]
 )
 @pytest.mark.parametrize(
-    "df_2", [gd.DataFrame({"a": [], "b": []}), gd.DataFrame({})]
+    "df_2", [cudf.DataFrame({"a": [], "b": []}), cudf.DataFrame({})]
 )
 def test_concat_empty_dataframe(df_1, df_2):
 
-    got = gd.concat([df_1, df_2])
+    got = cudf.concat([df_1, df_2])
     expect = pd.concat([df_1.to_pandas(), df_2.to_pandas()], sort=False)
 
     # ignoring dtypes as pandas upcasts int to float
@@ -1254,8 +1259,8 @@ def test_concat_empty_dataframe(df_1, df_2):
     ],
 )
 def test_concat_different_column_dataframe(df1_d, df2_d):
-    got = gd.concat(
-        [gd.DataFrame(df1_d), gd.DataFrame(df2_d), gd.DataFrame(df1_d)],
+    got = cudf.concat(
+        [cudf.DataFrame(df1_d), cudf.DataFrame(df2_d), cudf.DataFrame(df1_d)],
         sort=False,
     )
 
@@ -1279,7 +1284,7 @@ def test_concat_different_column_dataframe(df1_d, df2_d):
 )
 @pytest.mark.parametrize("ser_2", [pd.Series([], dtype="float64")])
 def test_concat_empty_series(ser_1, ser_2):
-    got = gd.concat([gd.Series(ser_1), gd.Series(ser_2)])
+    got = cudf.concat([cudf.Series(ser_1), cudf.Series(ser_2)])
     expect = pd.concat([ser_1, ser_2])
 
     assert_eq(got, expect)
@@ -1290,49 +1295,49 @@ def test_concat_with_axis():
     df2 = pd.DataFrame(dict(a=np.arange(5), b=np.arange(5)))
 
     concat_df = pd.concat([df1, df2], axis=1)
-    cdf1 = gd.from_pandas(df1)
-    cdf2 = gd.from_pandas(df2)
+    cdf1 = cudf.from_pandas(df1)
+    cdf2 = cudf.from_pandas(df2)
 
     # concat only dataframes
-    concat_cdf = gd.concat([cdf1, cdf2], axis=1)
+    concat_cdf = cudf.concat([cdf1, cdf2], axis=1)
     assert_eq(concat_cdf, concat_df)
 
     # concat only series
     concat_s = pd.concat([df1.x, df1.y], axis=1)
-    cs1 = gd.Series.from_pandas(df1.x)
-    cs2 = gd.Series.from_pandas(df1.y)
-    concat_cdf_s = gd.concat([cs1, cs2], axis=1)
+    cs1 = cudf.Series.from_pandas(df1.x)
+    cs2 = cudf.Series.from_pandas(df1.y)
+    concat_cdf_s = cudf.concat([cs1, cs2], axis=1)
 
     assert_eq(concat_cdf_s, concat_s)
 
     # concat series and dataframes
     s3 = pd.Series(np.random.random(5))
-    cs3 = gd.Series.from_pandas(s3)
+    cs3 = cudf.Series.from_pandas(s3)
 
-    concat_cdf_all = gd.concat([cdf1, cs3, cdf2], axis=1)
+    concat_cdf_all = cudf.concat([cdf1, cs3, cdf2], axis=1)
     concat_df_all = pd.concat([df1, s3, df2], axis=1)
     assert_eq(concat_cdf_all, concat_df_all)
 
     # concat manual multi index
-    midf1 = gd.from_pandas(df1)
-    midf1.index = gd.MultiIndex(
+    midf1 = cudf.from_pandas(df1)
+    midf1.index = cudf.MultiIndex(
         levels=[[0, 1, 2, 3], [0, 1]], codes=[[0, 1, 2, 3, 2], [0, 1, 0, 1, 0]]
     )
     midf2 = midf1[2:]
-    midf2.index = gd.MultiIndex(
+    midf2.index = cudf.MultiIndex(
         levels=[[3, 4, 5], [2, 0]], codes=[[0, 1, 2], [1, 0, 1]]
     )
     mipdf1 = midf1.to_pandas()
     mipdf2 = midf2.to_pandas()
 
-    assert_eq(gd.concat([midf1, midf2]), pd.concat([mipdf1, mipdf2]))
-    assert_eq(gd.concat([midf2, midf1]), pd.concat([mipdf2, mipdf1]))
+    assert_eq(cudf.concat([midf1, midf2]), pd.concat([mipdf1, mipdf2]))
+    assert_eq(cudf.concat([midf2, midf1]), pd.concat([mipdf2, mipdf1]))
     assert_eq(
-        gd.concat([midf1, midf2, midf1]), pd.concat([mipdf1, mipdf2, mipdf1])
+        cudf.concat([midf1, midf2, midf1]), pd.concat([mipdf1, mipdf2, mipdf1])
     )
 
     # concat groupby multi index
-    gdf1 = gd.DataFrame(
+    gdf1 = cudf.DataFrame(
         {
             "x": np.random.randint(0, 10, 10),
             "y": np.random.randint(0, 10, 10),
@@ -1346,8 +1351,8 @@ def test_concat_with_axis():
     pdg1 = gdg1.to_pandas()
     pdg2 = gdg2.to_pandas()
 
-    assert_eq(gd.concat([gdg1, gdg2]), pd.concat([pdg1, pdg2]))
-    assert_eq(gd.concat([gdg2, gdg1]), pd.concat([pdg2, pdg1]))
+    assert_eq(cudf.concat([gdg1, gdg2]), pd.concat([pdg1, pdg2]))
+    assert_eq(cudf.concat([gdg2, gdg1]), pd.concat([pdg2, pdg1]))
 
     # series multi index concat
     gdgz1 = gdg1.z
@@ -1355,15 +1360,15 @@ def test_concat_with_axis():
     pdgz1 = gdgz1.to_pandas()
     pdgz2 = gdgz2.to_pandas()
 
-    assert_eq(gd.concat([gdgz1, gdgz2]), pd.concat([pdgz1, pdgz2]))
-    assert_eq(gd.concat([gdgz2, gdgz1]), pd.concat([pdgz2, pdgz1]))
+    assert_eq(cudf.concat([gdgz1, gdgz2]), pd.concat([pdgz1, pdgz2]))
+    assert_eq(cudf.concat([gdgz2, gdgz1]), pd.concat([pdgz2, pdgz1]))
 
 
 @pytest.mark.parametrize("nrows", [0, 3, 10, 100, 1000])
 def test_nonmatching_index_setitem(nrows):
     np.random.seed(0)
 
-    gdf = gd.DataFrame()
+    gdf = cudf.DataFrame()
     gdf["a"] = np.random.randint(2147483647, size=nrows)
     gdf["b"] = np.random.randint(2147483647, size=nrows)
     gdf = gdf.set_index("b")
@@ -1374,20 +1379,20 @@ def test_nonmatching_index_setitem(nrows):
     assert (
         gdf["c"]
         .to_pandas()
-        .equals(gd.Series(test_values).set_index(gdf._index).to_pandas())
+        .equals(cudf.Series(test_values).set_index(gdf._index).to_pandas())
     )
 
 
 def test_from_pandas():
     df = pd.DataFrame({"x": [1, 2, 3]}, index=[4.0, 5.0, 6.0])
-    gdf = gd.DataFrame.from_pandas(df)
-    assert isinstance(gdf, gd.DataFrame)
+    gdf = cudf.DataFrame.from_pandas(df)
+    assert isinstance(gdf, cudf.DataFrame)
 
     assert_eq(df, gdf)
 
     s = df.x
-    gs = gd.Series.from_pandas(s)
-    assert isinstance(gs, gd.Series)
+    gs = cudf.Series.from_pandas(s)
+    assert isinstance(gs, cudf.Series)
 
     assert_eq(s, gs)
 
@@ -1397,14 +1402,14 @@ def test_from_records(dtypes):
     h_ary = np.ndarray(shape=(10, 4), dtype=dtypes)
     rec_ary = h_ary.view(np.recarray)
 
-    gdf = gd.DataFrame.from_records(rec_ary, columns=["a", "b", "c", "d"])
+    gdf = cudf.DataFrame.from_records(rec_ary, columns=["a", "b", "c", "d"])
     df = pd.DataFrame.from_records(rec_ary, columns=["a", "b", "c", "d"])
-    assert isinstance(gdf, gd.DataFrame)
+    assert isinstance(gdf, cudf.DataFrame)
     assert_eq(df, gdf)
 
-    gdf = gd.DataFrame.from_records(rec_ary)
+    gdf = cudf.DataFrame.from_records(rec_ary)
     df = pd.DataFrame.from_records(rec_ary)
-    assert isinstance(gdf, gd.DataFrame)
+    assert isinstance(gdf, cudf.DataFrame)
     assert_eq(df, gdf)
 
 
@@ -1426,9 +1431,9 @@ def test_from_records_index(columns, index):
         [("Rex", 9, 81.0), ("Fido", 3, 27.0)],
         dtype=[("name", "U10"), ("age", "i4"), ("weight", "f4")],
     )
-    gdf = gd.DataFrame.from_records(rec_ary, columns=columns, index=index)
+    gdf = cudf.DataFrame.from_records(rec_ary, columns=columns, index=index)
     df = pd.DataFrame.from_records(rec_ary, columns=columns, index=index)
-    assert isinstance(gdf, gd.DataFrame)
+    assert isinstance(gdf, cudf.DataFrame)
     assert_eq(df, gdf)
 
 
@@ -1436,37 +1441,37 @@ def test_dataframe_construction_from_cupy_arrays():
     h_ary = np.array([[1, 2, 3], [4, 5, 6]], np.int32)
     d_ary = cupy.asarray(h_ary)
 
-    gdf = gd.DataFrame(d_ary, columns=["a", "b", "c"])
+    gdf = cudf.DataFrame(d_ary, columns=["a", "b", "c"])
     df = pd.DataFrame(h_ary, columns=["a", "b", "c"])
-    assert isinstance(gdf, gd.DataFrame)
+    assert isinstance(gdf, cudf.DataFrame)
 
     assert_eq(df, gdf)
 
-    gdf = gd.DataFrame(d_ary)
+    gdf = cudf.DataFrame(d_ary)
     df = pd.DataFrame(h_ary)
-    assert isinstance(gdf, gd.DataFrame)
+    assert isinstance(gdf, cudf.DataFrame)
 
     assert_eq(df, gdf)
 
-    gdf = gd.DataFrame(d_ary, index=["a", "b"])
+    gdf = cudf.DataFrame(d_ary, index=["a", "b"])
     df = pd.DataFrame(h_ary, index=["a", "b"])
-    assert isinstance(gdf, gd.DataFrame)
+    assert isinstance(gdf, cudf.DataFrame)
 
     assert_eq(df, gdf)
 
-    gdf = gd.DataFrame(d_ary)
+    gdf = cudf.DataFrame(d_ary)
     gdf = gdf.set_index(keys=0, drop=False)
     df = pd.DataFrame(h_ary)
     df = df.set_index(keys=0, drop=False)
-    assert isinstance(gdf, gd.DataFrame)
+    assert isinstance(gdf, cudf.DataFrame)
 
     assert_eq(df, gdf)
 
-    gdf = gd.DataFrame(d_ary)
+    gdf = cudf.DataFrame(d_ary)
     gdf = gdf.set_index(keys=1, drop=False)
     df = pd.DataFrame(h_ary)
     df = df.set_index(keys=1, drop=False)
-    assert isinstance(gdf, gd.DataFrame)
+    assert isinstance(gdf, cudf.DataFrame)
 
     assert_eq(df, gdf)
 
@@ -1476,7 +1481,7 @@ def test_dataframe_cupy_wrong_dimensions():
     with pytest.raises(
         ValueError, match="records dimension expected 1 or 2 but found: 3"
     ):
-        gd.DataFrame(d_ary)
+        cudf.DataFrame(d_ary)
 
 
 def test_dataframe_cupy_array_wrong_index():
@@ -1487,19 +1492,19 @@ def test_dataframe_cupy_array_wrong_index():
         match="Length mismatch: Expected axis has 2 elements, "
         "new values have 1 elements",
     ):
-        gd.DataFrame(d_ary, index=["a"])
+        cudf.DataFrame(d_ary, index=["a"])
 
     with pytest.raises(
         ValueError,
         match="Length mismatch: Expected axis has 2 elements, "
         "new values have 1 elements",
     ):
-        gd.DataFrame(d_ary, index="a")
+        cudf.DataFrame(d_ary, index="a")
 
 
 def test_index_in_dataframe_constructor():
     a = pd.DataFrame({"x": [1, 2, 3]}, index=[4.0, 5.0, 6.0])
-    b = gd.DataFrame({"x": [1, 2, 3]}, index=[4.0, 5.0, 6.0])
+    b = cudf.DataFrame({"x": [1, 2, 3]}, index=[4.0, 5.0, 6.0])
 
     assert_eq(a, b)
     assert_eq(a.loc[4:], b.loc[4:])
@@ -1520,14 +1525,14 @@ def test_from_arrow(nelem, data_type):
     padf = pa.Table.from_pandas(
         df, preserve_index=False
     ).replace_schema_metadata(None)
-    gdf = gd.DataFrame.from_arrow(padf)
-    assert isinstance(gdf, gd.DataFrame)
+    gdf = cudf.DataFrame.from_arrow(padf)
+    assert isinstance(gdf, cudf.DataFrame)
 
     assert_eq(df, gdf)
 
     s = pa.Array.from_pandas(df.a)
-    gs = gd.Series.from_arrow(s)
-    assert isinstance(gs, gd.Series)
+    gs = cudf.Series.from_arrow(s)
+    assert isinstance(gs, cudf.Series)
 
     # For some reason PyArrow to_pandas() converts to numpy array and has
     # better type compatibility
@@ -1543,7 +1548,7 @@ def test_to_arrow(nelem, data_type):
             "b": np.random.randint(0, 1000, nelem).astype(data_type),
         }
     )
-    gdf = gd.DataFrame.from_pandas(df)
+    gdf = cudf.DataFrame.from_pandas(df)
 
     pa_df = pa.Table.from_pandas(
         df, preserve_index=False
@@ -1579,8 +1584,8 @@ def test_to_from_arrow_nulls(data_type):
             time_unit, _ = np.datetime_data(dtype)
             data_type = pa.timestamp(unit=time_unit)
         s1 = pa.array([1, None, 3, None, 5], type=data_type)
-    gs1 = gd.Series.from_arrow(s1)
-    assert isinstance(gs1, gd.Series)
+    gs1 = cudf.Series.from_arrow(s1)
+    assert isinstance(gs1, cudf.Series)
     # We have 64B padded buffers for nulls whereas Arrow returns a minimal
     # number of bytes, so only check the first byte in this case
     np.testing.assert_array_equal(
@@ -1590,8 +1595,8 @@ def test_to_from_arrow_nulls(data_type):
     assert pa.Array.equals(s1, gs1.to_arrow())
 
     s2 = pa.array([None, None, None, None, None], type=data_type)
-    gs2 = gd.Series.from_arrow(s2)
-    assert isinstance(gs2, gd.Series)
+    gs2 = cudf.Series.from_arrow(s2)
+    assert isinstance(gs2, cudf.Series)
     # We have 64B padded buffers for nulls whereas Arrow returns a minimal
     # number of bytes, so only check the first byte in this case
     np.testing.assert_array_equal(
@@ -1604,7 +1609,7 @@ def test_to_from_arrow_nulls(data_type):
 def test_to_arrow_categorical():
     df = pd.DataFrame()
     df["a"] = pd.Series(["a", "b", "c"], dtype="category")
-    gdf = gd.DataFrame.from_pandas(df)
+    gdf = cudf.DataFrame.from_pandas(df)
 
     pa_df = pa.Table.from_pandas(
         df, preserve_index=False
@@ -1624,9 +1629,9 @@ def test_to_arrow_categorical():
 def test_from_arrow_missing_categorical():
     pd_cat = pd.Categorical(["a", "b", "c"], categories=["a", "b"])
     pa_cat = pa.array(pd_cat, from_pandas=True)
-    gd_cat = gd.Series(pa_cat)
+    gd_cat = cudf.Series(pa_cat)
 
-    assert isinstance(gd_cat, gd.Series)
+    assert isinstance(gd_cat, cudf.Series)
     assert_eq(
         pd.Series(pa_cat.to_pandas()),  # PyArrow returns a pd.Categorical
         gd_cat.to_pandas(),
@@ -1636,9 +1641,9 @@ def test_from_arrow_missing_categorical():
 def test_to_arrow_missing_categorical():
     pd_cat = pd.Categorical(["a", "b", "c"], categories=["a", "b"])
     pa_cat = pa.array(pd_cat, from_pandas=True)
-    gd_cat = gd.Series(pa_cat)
+    gd_cat = cudf.Series(pa_cat)
 
-    assert isinstance(gd_cat, gd.Series)
+    assert isinstance(gd_cat, cudf.Series)
     assert pa.Array.equals(pa_cat, gd_cat.to_arrow())
 
 
@@ -1651,14 +1656,12 @@ def test_from_scalar_typing(data_type):
             .astype("datetime64[ms]")
         )
     elif data_type.startswith("datetime64"):
-        from datetime import date
-
-        scalar = np.datetime64(date.today()).astype("datetime64[ms]")
+        scalar = np.datetime64(datetime.date.today()).astype("datetime64[ms]")
         data_type = "datetime64[ms]"
     else:
         scalar = np.dtype(data_type).type(np.random.randint(0, 5))
 
-    gdf = gd.DataFrame()
+    gdf = cudf.DataFrame()
     gdf["a"] = [1, 2, 3, 4, 5]
     gdf["b"] = scalar
     assert gdf["b"].dtype == np.dtype(data_type)
@@ -1671,35 +1674,35 @@ def test_from_python_array(data_type):
     data = memoryview(np_arr)
     data = arr.array(data.format, data)
 
-    gs = gd.Series(data)
+    gs = cudf.Series(data)
 
     np.testing.assert_equal(gs.to_array(), np_arr)
 
 
 def test_series_shape():
     ps = pd.Series([1, 2, 3, 4])
-    cs = gd.Series([1, 2, 3, 4])
+    cs = cudf.Series([1, 2, 3, 4])
 
     assert ps.shape == cs.shape
 
 
 def test_series_shape_empty():
     ps = pd.Series(dtype="float64")
-    cs = gd.Series([])
+    cs = cudf.Series([])
 
     assert ps.shape == cs.shape
 
 
 def test_dataframe_shape():
     pdf = pd.DataFrame({"a": [0, 1, 2, 3], "b": [0.1, 0.2, None, 0.3]})
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
     assert pdf.shape == gdf.shape
 
 
 def test_dataframe_shape_empty():
     pdf = pd.DataFrame()
-    gdf = gd.DataFrame()
+    gdf = cudf.DataFrame()
 
     assert pdf.shape == gdf.shape
 
@@ -1709,14 +1712,12 @@ def test_dataframe_shape_empty():
 @pytest.mark.parametrize("dtype", dtypes)
 @pytest.mark.parametrize("nulls", ["none", "some", "all"])
 def test_dataframe_transpose(nulls, num_cols, num_rows, dtype):
-
     pdf = pd.DataFrame()
-    from string import ascii_lowercase
 
     null_rep = np.nan if dtype in ["float32", "float64"] else None
 
     for i in range(num_cols):
-        colname = ascii_lowercase[i]
+        colname = string.ascii_lowercase[i]
         data = pd.Series(np.random.randint(0, 26, num_rows).astype(dtype))
         if nulls == "some":
             idx = np.random.choice(
@@ -1727,7 +1728,7 @@ def test_dataframe_transpose(nulls, num_cols, num_rows, dtype):
             data[:] = null_rep
         pdf[colname] = data
 
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
     got_function = gdf.transpose()
     got_property = gdf.T
@@ -1742,15 +1743,14 @@ def test_dataframe_transpose(nulls, num_cols, num_rows, dtype):
 @pytest.mark.parametrize("num_rows", [1, 2, 20])
 def test_dataframe_transpose_category(num_cols, num_rows):
     pdf = pd.DataFrame()
-    from string import ascii_lowercase
 
     for i in range(num_cols):
-        colname = ascii_lowercase[i]
-        data = pd.Series(list(ascii_lowercase), dtype="category")
+        colname = string.ascii_lowercase[i]
+        data = pd.Series(list(string.ascii_lowercase), dtype="category")
         data = data.sample(num_rows, replace=True).reset_index(drop=True)
         pdf[colname] = data
 
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
     got_function = gdf.transpose()
     got_property = gdf.T
@@ -1762,7 +1762,7 @@ def test_dataframe_transpose_category(num_cols, num_rows):
 
 
 def test_generated_column():
-    gdf = gd.DataFrame({"a": (i for i in range(5))})
+    gdf = cudf.DataFrame({"a": (i for i in range(5))})
     assert len(gdf) == 5
 
 
@@ -1773,7 +1773,7 @@ def pdf():
 
 @pytest.fixture
 def gdf(pdf):
-    return gd.DataFrame.from_pandas(pdf)
+    return cudf.DataFrame.from_pandas(pdf)
 
 
 @pytest.mark.parametrize(
@@ -1813,9 +1813,7 @@ def gdf(pdf):
 @pytest.mark.parametrize("skipna", [True, False, None])
 def test_dataframe_reductions(data, func, skipna):
     pdf = pd.DataFrame(data=data)
-    print(func(pdf, skipna=skipna))
-    gdf = gd.DataFrame.from_pandas(pdf)
-    print(func(gdf, skipna=skipna))
+    gdf = cudf.DataFrame.from_pandas(pdf)
     assert_eq(func(pdf, skipna=skipna), func(gdf, skipna=skipna))
 
 
@@ -1832,7 +1830,7 @@ def test_dataframe_reductions(data, func, skipna):
 @pytest.mark.parametrize("func", [lambda df: df.count()])
 def test_dataframe_count_reduction(data, func):
     pdf = pd.DataFrame(data=data)
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
     assert_eq(func(pdf), func(gdf))
 
@@ -1852,7 +1850,7 @@ def test_dataframe_count_reduction(data, func):
 @pytest.mark.parametrize("min_count", [-10, -1, 0, 1, 2, 3, 10])
 def test_dataframe_min_count_ops(data, ops, skipna, min_count):
     psr = pd.DataFrame(data)
-    gsr = gd.DataFrame(data)
+    gsr = cudf.DataFrame(data)
 
     if psr.shape[0] * psr.shape[1] < min_count:
         pytest.xfail("https://github.com/pandas-dev/pandas/issues/39738")
@@ -1952,7 +1950,7 @@ def test_unary_operators(func, pdf, gdf):
 
 def test_is_monotonic(gdf):
     pdf = pd.DataFrame({"x": [1, 2, 3]}, index=[3, 1, 2])
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
     assert not gdf.index.is_monotonic
     assert not gdf.index.is_monotonic_increasing
     assert not gdf.index.is_monotonic_decreasing
@@ -1965,7 +1963,7 @@ def test_iter(pdf, gdf):
 def test_iteritems(gdf):
     for k, v in gdf.iteritems():
         assert k in gdf.columns
-        assert isinstance(v, gd.Series)
+        assert isinstance(v, cudf.Series)
         assert_eq(v, gdf[k])
 
 
@@ -1977,7 +1975,7 @@ def test_quantile(q, numeric_only):
     pdf = pd.DataFrame(
         {"date": ts, "delta": td, "val": np.random.randn(len(ts))}
     )
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
     assert_eq(pdf["date"].quantile(q), gdf["date"].quantile(q))
     assert_eq(pdf["delta"].quantile(q), gdf["delta"].quantile(q))
@@ -1997,7 +1995,7 @@ def test_quantile(q, numeric_only):
 
 def test_empty_quantile():
     pdf = pd.DataFrame({"x": []})
-    df = gd.DataFrame({"x": []})
+    df = cudf.DataFrame({"x": []})
 
     actual = df.quantile()
     expected = pdf.quantile()
@@ -2006,16 +2004,16 @@ def test_empty_quantile():
 
 
 def test_from_pandas_function(pdf):
-    gdf = gd.from_pandas(pdf)
-    assert isinstance(gdf, gd.DataFrame)
+    gdf = cudf.from_pandas(pdf)
+    assert isinstance(gdf, cudf.DataFrame)
     assert_eq(pdf, gdf)
 
-    gdf = gd.from_pandas(pdf.x)
-    assert isinstance(gdf, gd.Series)
+    gdf = cudf.from_pandas(pdf.x)
+    assert isinstance(gdf, cudf.Series)
     assert_eq(pdf.x, gdf)
 
     with pytest.raises(TypeError):
-        gd.from_pandas(123)
+        cudf.from_pandas(123)
 
 
 @pytest.mark.parametrize("preserve_index", [True, False])
@@ -2030,7 +2028,7 @@ def test_arrow_pandas_compat(pdf, gdf, preserve_index):
 
     assert pa.Table.equals(pdf_arrow_table, gdf_arrow_table)
 
-    gdf2 = gd.DataFrame.from_arrow(pdf_arrow_table)
+    gdf2 = cudf.DataFrame.from_arrow(pdf_arrow_table)
     pdf2 = pdf_arrow_table.to_pandas()
 
     assert_eq(pdf2, gdf2)
@@ -2043,11 +2041,11 @@ def test_series_hash_encode(nrows):
     # results in enc_with_name_arr and enc_arr to be same.
     # And there is no other better way to make hash return same value.
     # So using an integer name to get constant value back from hash.
-    s = gd.Series(data, name=1)
+    s = cudf.Series(data, name=1)
     num_features = 1000
 
     encoded_series = s.hash_encode(num_features)
-    assert isinstance(encoded_series, gd.Series)
+    assert isinstance(encoded_series, cudf.Series)
     enc_arr = encoded_series.to_array()
     assert np.all(enc_arr >= 0)
     assert np.max(enc_arr) < num_features
@@ -2063,10 +2061,10 @@ def test_cuda_array_interface(dtype):
     cupy_data = cupy.array(np_data)
     pd_data = pd.Series(np_data)
 
-    cudf_data = gd.Series(cupy_data)
+    cudf_data = cudf.Series(cupy_data)
     assert_eq(pd_data, cudf_data)
 
-    gdf = gd.DataFrame()
+    gdf = cudf.DataFrame()
     gdf["test"] = cupy_data
     pd_data.name = "test"
     assert_eq(pd_data, gdf["test"])
@@ -2083,7 +2081,7 @@ def test_from_arrow_chunked_arrays(nelem, nchunks, data_type):
     pa_chunk_array = pa.chunked_array(np_list_data)
 
     expect = pd.Series(pa_chunk_array.to_pandas())
-    got = gd.Series(pa_chunk_array)
+    got = cudf.Series(pa_chunk_array)
 
     assert_eq(expect, got)
 
@@ -2097,15 +2095,13 @@ def test_from_arrow_chunked_arrays(nelem, nchunks, data_type):
     )
 
     expect = pa_table.to_pandas()
-    got = gd.DataFrame.from_arrow(pa_table)
+    got = cudf.DataFrame.from_arrow(pa_table)
 
     assert_eq(expect, got)
 
 
 @pytest.mark.skip(reason="Test was designed to be run in isolation")
 def test_gpu_memory_usage_with_boolmask():
-    import cudf
-
     ctx = cuda.current_context()
 
     def query_GPU_memory(note=""):
@@ -2120,7 +2116,7 @@ def query_GPU_memory(note=""):
     colNames = ["col" + str(iCol) for iCol in range(nCols)]
     pandasDF = pd.DataFrame(data=dataNumpy, columns=colNames, dtype=np.float32)
     cudaDF = cudf.core.DataFrame.from_pandas(pandasDF)
-    boolmask = gd.Series(np.random.randint(1, 2, len(cudaDF)).astype("bool"))
+    boolmask = cudf.Series(np.random.randint(1, 2, len(cudaDF)).astype("bool"))
 
     memory_used = query_GPU_memory()
     cudaDF = cudaDF[boolmask]
@@ -2163,8 +2159,8 @@ def test_dataframe_boolmask(mask_shape):
     pdf_mask = pd.DataFrame()
     for col in mask_shape[1]:
         pdf_mask[col] = np.random.randint(0, 2, mask_shape[0]) > 0
-    gdf = gd.DataFrame.from_pandas(pdf)
-    gdf_mask = gd.DataFrame.from_pandas(pdf_mask)
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    gdf_mask = cudf.DataFrame.from_pandas(pdf_mask)
     gdf = gdf[gdf_mask]
     pdf = pdf[pdf_mask]
 
@@ -2180,7 +2176,7 @@ def test_dataframe_boolmask(mask_shape):
     [
         [True, False, True],
         pytest.param(
-            gd.Series([True, False, True]),
+            cudf.Series([True, False, True]),
             marks=pytest.mark.xfail(
                 reason="Pandas can't index a multiindex with a Series"
             ),
@@ -2188,7 +2184,7 @@ def test_dataframe_boolmask(mask_shape):
     ],
 )
 def test_dataframe_multiindex_boolmask(mask):
-    gdf = gd.DataFrame(
+    gdf = cudf.DataFrame(
         {"w": [3, 2, 1], "x": [1, 2, 3], "y": [0, 1, 0], "z": [1, 1, 1]}
     )
     gdg = gdf.groupby(["w", "x"]).count()
@@ -2200,7 +2196,7 @@ def test_dataframe_assignment():
     pdf = pd.DataFrame()
     for col in "abc":
         pdf[col] = np.array([0, 1, 1, -2, 10])
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
     gdf[gdf < 0] = 999
     pdf[pdf < 0] = 999
     assert_eq(gdf, pdf)
@@ -2212,7 +2208,7 @@ def test_1row_arrow_table():
     table = pa.Table.from_batches([batch])
 
     expect = table.to_pandas()
-    got = gd.DataFrame.from_arrow(table)
+    got = cudf.DataFrame.from_arrow(table)
     assert_eq(expect, got)
 
 
@@ -2221,7 +2217,7 @@ def test_arrow_handle_no_index_name(pdf, gdf):
     pdf_arrow = pa.Table.from_pandas(pdf)
     assert pa.Table.equals(pdf_arrow, gdf_arrow)
 
-    got = gd.DataFrame.from_arrow(gdf_arrow)
+    got = cudf.DataFrame.from_arrow(gdf_arrow)
     expect = pdf_arrow.to_pandas()
     assert_eq(expect, got)
 
@@ -2234,9 +2230,9 @@ def test_arrow_handle_no_index_name(pdf, gdf):
 def test_series_digitize(num_rows, num_bins, right, dtype, series_bins):
     data = np.random.randint(0, 100, num_rows).astype(dtype)
     bins = np.unique(np.sort(np.random.randint(2, 95, num_bins).astype(dtype)))
-    s = gd.Series(data)
+    s = cudf.Series(data)
     if series_bins:
-        s_bins = gd.Series(bins)
+        s_bins = cudf.Series(bins)
         indices = s.digitize(s_bins, right)
     else:
         indices = s.digitize(bins, right)
@@ -2246,8 +2242,8 @@ def test_series_digitize(num_rows, num_bins, right, dtype, series_bins):
 
 
 def test_series_digitize_invalid_bins():
-    s = gd.Series(np.random.randint(0, 30, 80), dtype="int32")
-    bins = gd.Series([2, None, None, 50, 90], dtype="int32")
+    s = cudf.Series(np.random.randint(0, 30, 80), dtype="int32")
+    bins = cudf.Series([2, None, None, 50, 90], dtype="int32")
 
     with pytest.raises(
         ValueError, match="`bins` cannot contain null entries."
@@ -2262,7 +2258,7 @@ def test_pandas_non_contiguious():
     for col in df.columns:
         assert df[col].values.flags["C_CONTIGUOUS"] is False
 
-    gdf = gd.DataFrame.from_pandas(df)
+    gdf = cudf.DataFrame.from_pandas(df)
     assert_eq(gdf.to_pandas(), df)
 
 
@@ -2281,7 +2277,7 @@ def test_series_all_null(num_elements, null_type):
 
     # Typecast Pandas because None will return `object` dtype
     expect = pd.Series(data, dtype="float64")
-    got = gd.Series(data)
+    got = cudf.Series(data)
 
     assert_eq(expect, got)
 
@@ -2289,13 +2285,13 @@ def test_series_all_null(num_elements, null_type):
 @pytest.mark.parametrize("num_elements", [0, 2, 10, 100])
 def test_series_all_valid_nan(num_elements):
     data = [np.nan] * num_elements
-    sr = gd.Series(data, nan_as_null=False)
+    sr = cudf.Series(data, nan_as_null=False)
     np.testing.assert_equal(sr.null_count, 0)
 
 
 def test_series_rename():
     pds = pd.Series([1, 2, 3], name="asdf")
-    gds = gd.Series([1, 2, 3], name="asdf")
+    gds = cudf.Series([1, 2, 3], name="asdf")
 
     expect = pds.rename("new_name")
     got = gds.rename("new_name")
@@ -2303,12 +2299,12 @@ def test_series_rename():
     assert_eq(expect, got)
 
     pds = pd.Series(expect)
-    gds = gd.Series(got)
+    gds = cudf.Series(got)
 
     assert_eq(pds, gds)
 
     pds = pd.Series(expect, name="name name")
-    gds = gd.Series(got, name="name name")
+    gds = cudf.Series(got, name="name name")
 
     assert_eq(pds, gds)
 
@@ -2329,7 +2325,7 @@ def check_frame_series_equality(left, right):
         check_index_equality(left, right)
         check_values_equality(left, right)
 
-    gdf = gd.DataFrame(
+    gdf = cudf.DataFrame(
         {
             "a": np.random.randint(0, 1000, nelem).astype(data_type),
             "b": np.random.randint(0, 1000, nelem).astype(data_type),
@@ -2356,9 +2352,9 @@ def check_frame_series_equality(left, right):
 
 
 def test_tail_for_string():
-    gdf = gd.DataFrame()
-    gdf["id"] = gd.Series(["a", "b"], dtype=np.object)
-    gdf["v"] = gd.Series([1, 2])
+    gdf = cudf.DataFrame()
+    gdf["id"] = cudf.Series(["a", "b"], dtype=np.object)
+    gdf["v"] = cudf.Series([1, 2])
     assert_eq(gdf.tail(3), gdf.to_pandas().tail(3))
 
 
@@ -2441,7 +2437,7 @@ def test_reset_index_inplace(pdf, gdf, drop):
 @pytest.mark.parametrize("append", [True, False])
 @pytest.mark.parametrize("inplace", [True, False])
 def test_set_index(data, index, drop, append, inplace):
-    gdf = gd.DataFrame(data)
+    gdf = cudf.DataFrame(data)
     pdf = gdf.to_pandas()
 
     expected = pdf.set_index(index, inplace=inplace, drop=drop, append=append)
@@ -2467,7 +2463,7 @@ def test_set_index(data, index, drop, append, inplace):
 @pytest.mark.parametrize("verify_integrity", [True])
 @pytest.mark.xfail
 def test_set_index_verify_integrity(data, index, verify_integrity):
-    gdf = gd.DataFrame(data)
+    gdf = cudf.DataFrame(data)
     gdf.set_index(index, verify_integrity=verify_integrity)
 
 
@@ -2486,7 +2482,7 @@ def test_set_index_multi(drop, nelem):
         }
     )
     df["e"] = df["d"].astype("category")
-    gdf = gd.DataFrame.from_pandas(df)
+    gdf = cudf.DataFrame.from_pandas(df)
 
     assert_eq(gdf.set_index("a", drop=drop), gdf.set_index(["a"], drop=drop))
     assert_eq(
@@ -2507,7 +2503,7 @@ def test_set_index_multi(drop, nelem):
 def test_dataframe_reindex_0(copy):
     # TODO (ptaylor): pandas changes `int` dtype to `float64`
     # when reindexing and filling new label indices with NaN
-    gdf = gd.datasets.randomdata(
+    gdf = cudf.datasets.randomdata(
         nrows=6,
         dtypes={
             "a": "category",
@@ -2524,7 +2520,7 @@ def test_dataframe_reindex_0(copy):
 @pytest.mark.parametrize("copy", [True, False])
 def test_dataframe_reindex_1(copy):
     index = [-3, 0, 3, 0, -2, 1, 3, 4, 6]
-    gdf = gd.datasets.randomdata(
+    gdf = cudf.datasets.randomdata(
         nrows=6, dtypes={"a": "category", "c": float, "d": str}
     )
     pdf = gdf.to_pandas()
@@ -2535,7 +2531,7 @@ def test_dataframe_reindex_1(copy):
 @pytest.mark.parametrize("copy", [True, False])
 def test_dataframe_reindex_2(copy):
     index = [-3, 0, 3, 0, -2, 1, 3, 4, 6]
-    gdf = gd.datasets.randomdata(
+    gdf = cudf.datasets.randomdata(
         nrows=6, dtypes={"a": "category", "c": float, "d": str}
     )
     pdf = gdf.to_pandas()
@@ -2549,7 +2545,7 @@ def test_dataframe_reindex_2(copy):
 @pytest.mark.parametrize("copy", [True, False])
 def test_dataframe_reindex_3(copy):
     columns = ["a", "b", "c", "d", "e"]
-    gdf = gd.datasets.randomdata(
+    gdf = cudf.datasets.randomdata(
         nrows=6, dtypes={"a": "category", "c": float, "d": str}
     )
     pdf = gdf.to_pandas()
@@ -2563,7 +2559,7 @@ def test_dataframe_reindex_3(copy):
 @pytest.mark.parametrize("copy", [True, False])
 def test_dataframe_reindex_4(copy):
     index = [-3, 0, 3, 0, -2, 1, 3, 4, 6]
-    gdf = gd.datasets.randomdata(
+    gdf = cudf.datasets.randomdata(
         nrows=6, dtypes={"a": "category", "c": float, "d": str}
     )
     pdf = gdf.to_pandas()
@@ -2577,7 +2573,7 @@ def test_dataframe_reindex_4(copy):
 @pytest.mark.parametrize("copy", [True, False])
 def test_dataframe_reindex_5(copy):
     columns = ["a", "b", "c", "d", "e"]
-    gdf = gd.datasets.randomdata(
+    gdf = cudf.datasets.randomdata(
         nrows=6, dtypes={"a": "category", "c": float, "d": str}
     )
     pdf = gdf.to_pandas()
@@ -2591,7 +2587,7 @@ def test_dataframe_reindex_5(copy):
 @pytest.mark.parametrize("copy", [True, False])
 def test_dataframe_reindex_6(copy):
     index = [-3, 0, 3, 0, -2, 1, 3, 4, 6]
-    gdf = gd.datasets.randomdata(
+    gdf = cudf.datasets.randomdata(
         nrows=6, dtypes={"a": "category", "c": float, "d": str}
     )
     pdf = gdf.to_pandas()
@@ -2605,7 +2601,7 @@ def test_dataframe_reindex_6(copy):
 @pytest.mark.parametrize("copy", [True, False])
 def test_dataframe_reindex_7(copy):
     columns = ["a", "b", "c", "d", "e"]
-    gdf = gd.datasets.randomdata(
+    gdf = cudf.datasets.randomdata(
         nrows=6, dtypes={"a": "category", "c": float, "d": str}
     )
     pdf = gdf.to_pandas()
@@ -2619,7 +2615,7 @@ def test_dataframe_reindex_7(copy):
 @pytest.mark.parametrize("copy", [True, False])
 def test_dataframe_reindex_8(copy):
     index = [-3, 0, 3, 0, -2, 1, 3, 4, 6]
-    gdf = gd.datasets.randomdata(
+    gdf = cudf.datasets.randomdata(
         nrows=6, dtypes={"a": "category", "c": float, "d": str}
     )
     pdf = gdf.to_pandas()
@@ -2633,7 +2629,7 @@ def test_dataframe_reindex_8(copy):
 @pytest.mark.parametrize("copy", [True, False])
 def test_dataframe_reindex_9(copy):
     columns = ["a", "b", "c", "d", "e"]
-    gdf = gd.datasets.randomdata(
+    gdf = cudf.datasets.randomdata(
         nrows=6, dtypes={"a": "category", "c": float, "d": str}
     )
     pdf = gdf.to_pandas()
@@ -2648,7 +2644,7 @@ def test_dataframe_reindex_9(copy):
 def test_dataframe_reindex_10(copy):
     index = [-3, 0, 3, 0, -2, 1, 3, 4, 6]
     columns = ["a", "b", "c", "d", "e"]
-    gdf = gd.datasets.randomdata(
+    gdf = cudf.datasets.randomdata(
         nrows=6, dtypes={"a": "category", "c": float, "d": str}
     )
     pdf = gdf.to_pandas()
@@ -2668,7 +2664,7 @@ def test_dataframe_reindex_change_dtype(copy):
         kwargs = {}
     index = pd.date_range("12/29/2009", periods=10, freq="D")
     columns = ["a", "b", "c", "d", "e"]
-    gdf = gd.datasets.randomdata(
+    gdf = cudf.datasets.randomdata(
         nrows=6, dtypes={"a": "category", "c": float, "d": str}
     )
     pdf = gdf.to_pandas()
@@ -2684,7 +2680,7 @@ def test_dataframe_reindex_change_dtype(copy):
 @pytest.mark.parametrize("copy", [True, False])
 def test_series_categorical_reindex(copy):
     index = [-3, 0, 3, 0, -2, 1, 3, 4, 6]
-    gdf = gd.datasets.randomdata(nrows=6, dtypes={"a": "category"})
+    gdf = cudf.datasets.randomdata(nrows=6, dtypes={"a": "category"})
     pdf = gdf.to_pandas()
     assert_eq(pdf["a"].reindex(copy=True), gdf["a"].reindex(copy=copy))
     assert_eq(
@@ -2699,7 +2695,7 @@ def test_series_categorical_reindex(copy):
 @pytest.mark.parametrize("copy", [True, False])
 def test_series_float_reindex(copy):
     index = [-3, 0, 3, 0, -2, 1, 3, 4, 6]
-    gdf = gd.datasets.randomdata(nrows=6, dtypes={"c": float})
+    gdf = cudf.datasets.randomdata(nrows=6, dtypes={"c": float})
     pdf = gdf.to_pandas()
     assert_eq(pdf["c"].reindex(copy=True), gdf["c"].reindex(copy=copy))
     assert_eq(
@@ -2714,7 +2710,7 @@ def test_series_float_reindex(copy):
 @pytest.mark.parametrize("copy", [True, False])
 def test_series_string_reindex(copy):
     index = [-3, 0, 3, 0, -2, 1, 3, 4, 6]
-    gdf = gd.datasets.randomdata(nrows=6, dtypes={"d": str})
+    gdf = cudf.datasets.randomdata(nrows=6, dtypes={"d": str})
     pdf = gdf.to_pandas()
     assert_eq(pdf["d"].reindex(copy=True), gdf["d"].reindex(copy=copy))
     assert_eq(
@@ -2743,7 +2739,7 @@ def test_to_frame(pdf, gdf):
 
 def test_dataframe_empty_sort_index():
     pdf = pd.DataFrame({"x": []})
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
     expect = pdf.sort_index()
     got = gdf.sort_index()
@@ -2763,7 +2759,7 @@ def test_dataframe_sort_index(
         {"b": [1, 3, 2], "a": [1, 4, 3], "c": [4, 1, 5]},
         index=[3.0, 1.0, np.nan],
     )
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
     expected = pdf.sort_index(
         axis=axis,
@@ -2819,7 +2815,7 @@ def test_dataframe_mulitindex_sort_index(
             "d": [1, 2, 8],
         }
     ).set_index(["b", "a", 1])
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
     # ignore_index is supported in v.1.0
     expected = pdf.sort_index(
@@ -2857,7 +2853,7 @@ def test_dataframe_0_row_dtype(dtype):
     else:
         data = np.array([1, 2, 3, 4, 5], dtype=dtype)
 
-    expect = gd.DataFrame()
+    expect = cudf.DataFrame()
     expect["x"] = data
     expect["y"] = data
     got = expect.head(0)
@@ -2865,7 +2861,7 @@ def test_dataframe_0_row_dtype(dtype):
     for col_name in got.columns:
         assert expect[col_name].dtype == got[col_name].dtype
 
-    expect = gd.Series(data)
+    expect = cudf.Series(data)
     got = expect.head(0)
 
     assert expect.dtype == got.dtype
@@ -2876,7 +2872,7 @@ def test_series_list_nanasnull(nan_as_null):
     data = [1.0, 2.0, 3.0, np.nan, None]
 
     expect = pa.array(data, from_pandas=nan_as_null)
-    got = gd.Series(data, nan_as_null=nan_as_null).to_arrow()
+    got = cudf.Series(data, nan_as_null=nan_as_null).to_arrow()
 
     # Bug in Arrow 0.14.1 where NaNs aren't handled
     expect = expect.cast("int64", safe=False)
@@ -2886,7 +2882,7 @@ def test_series_list_nanasnull(nan_as_null):
 
 
 def test_column_assignment():
-    gdf = gd.datasets.randomdata(
+    gdf = cudf.datasets.randomdata(
         nrows=20, dtypes={"a": "category", "b": int, "c": float}
     )
     new_cols = ["q", "r", "s"]
@@ -2895,7 +2891,7 @@ def test_column_assignment():
 
 
 def test_select_dtype():
-    gdf = gd.datasets.randomdata(
+    gdf = cudf.datasets.randomdata(
         nrows=20, dtypes={"a": "category", "b": int, "c": float, "d": str}
     )
     pdf = gdf.to_pandas()
@@ -2953,7 +2949,9 @@ def test_select_dtype():
         ),
     )
 
-    gdf = gd.DataFrame({"A": [3, 4, 5], "C": [1, 2, 3], "D": ["a", "b", "c"]})
+    gdf = cudf.DataFrame(
+        {"A": [3, 4, 5], "C": [1, 2, 3], "D": ["a", "b", "c"]}
+    )
     pdf = gdf.to_pandas()
     assert_eq(
         pdf.select_dtypes(include=["object", "int", "category"]),
@@ -2964,7 +2962,7 @@ def test_select_dtype():
         gdf.select_dtypes(include=["object"], exclude=["category"]),
     )
 
-    gdf = gd.DataFrame({"a": range(10), "b": range(10, 20)})
+    gdf = cudf.DataFrame({"a": range(10), "b": range(10, 20)})
     pdf = gdf.to_pandas()
     assert_eq(
         pdf.select_dtypes(include=["category"]),
@@ -2998,8 +2996,8 @@ def test_select_dtype():
         lfunc=pdf.select_dtypes, rfunc=gdf.select_dtypes,
     )
 
-    gdf = gd.DataFrame(
-        {"a": gd.Series([], dtype="int"), "b": gd.Series([], dtype="str")}
+    gdf = cudf.DataFrame(
+        {"a": cudf.Series([], dtype="int"), "b": cudf.Series([], dtype="str")}
     )
     pdf = gdf.to_pandas()
     assert_eq(
@@ -3013,7 +3011,7 @@ def test_select_dtype():
 
 
 def test_select_dtype_datetime():
-    gdf = gd.datasets.timeseries(
+    gdf = cudf.datasets.timeseries(
         start="2000-01-01", end="2000-01-02", freq="3600s", dtypes={"x": int}
     )
     gdf = gdf.reset_index()
@@ -3031,7 +3029,7 @@ def test_select_dtype_datetime():
 
 
 def test_select_dtype_datetime_with_frequency():
-    gdf = gd.datasets.timeseries(
+    gdf = cudf.datasets.timeseries(
         start="2000-01-01", end="2000-01-02", freq="3600s", dtypes={"x": int}
     )
     gdf = gdf.reset_index()
@@ -3046,7 +3044,7 @@ def test_select_dtype_datetime_with_frequency():
 
 
 def test_array_ufunc():
-    gdf = gd.DataFrame({"x": [2, 3, 4.0], "y": [9.0, 2.5, 1.1]})
+    gdf = cudf.DataFrame({"x": [2, 3, 4.0], "y": [9.0, 2.5, 1.1]})
     pdf = gdf.to_pandas()
 
     assert_eq(np.sqrt(gdf), np.sqrt(pdf))
@@ -3056,7 +3054,7 @@ def test_array_ufunc():
 @pytest.mark.parametrize("nan_value", [-5, -5.0, 0, 5, 5.0, None, "pandas"])
 def test_series_to_gpu_array(nan_value):
 
-    s = gd.Series([0, 1, None, 3])
+    s = cudf.Series([0, 1, None, 3])
     np.testing.assert_array_equal(
         s.to_array(nan_value), s.to_gpu_array(nan_value).copy_to_host()
     )
@@ -3066,7 +3064,7 @@ def test_dataframe_describe_exclude():
     np.random.seed(12)
     data_length = 10000
 
-    df = gd.DataFrame()
+    df = cudf.DataFrame()
     df["x"] = np.random.normal(10, 1, data_length)
     df["x"] = df.x.astype("int64")
     df["y"] = np.random.normal(10, 1, data_length)
@@ -3081,7 +3079,7 @@ def test_dataframe_describe_include():
     np.random.seed(12)
     data_length = 10000
 
-    df = gd.DataFrame()
+    df = cudf.DataFrame()
     df["x"] = np.random.normal(10, 1, data_length)
     df["x"] = df.x.astype("int64")
     df["y"] = np.random.normal(10, 1, data_length)
@@ -3096,7 +3094,7 @@ def test_dataframe_describe_default():
     np.random.seed(12)
     data_length = 10000
 
-    df = gd.DataFrame()
+    df = cudf.DataFrame()
     df["x"] = np.random.normal(10, 1, data_length)
     df["y"] = np.random.normal(10, 1, data_length)
     pdf = df.to_pandas()
@@ -3110,7 +3108,7 @@ def test_series_describe_include_all():
     np.random.seed(12)
     data_length = 10000
 
-    df = gd.DataFrame()
+    df = cudf.DataFrame()
     df["x"] = np.random.normal(10, 1, data_length)
     df["x"] = df.x.astype("int64")
     df["y"] = np.random.normal(10, 1, data_length)
@@ -3134,7 +3132,7 @@ def test_dataframe_describe_percentiles():
     data_length = 10000
     sample_percentiles = [0.0, 0.1, 0.33, 0.84, 0.4, 0.99]
 
-    df = gd.DataFrame()
+    df = cudf.DataFrame()
     df["x"] = np.random.normal(10, 1, data_length)
     df["y"] = np.random.normal(10, 1, data_length)
     pdf = df.to_pandas()
@@ -3148,7 +3146,7 @@ def test_get_numeric_data():
     pdf = pd.DataFrame(
         {"x": [1, 2, 3], "y": [1.0, 2.0, 3.0], "z": ["a", "b", "c"]}
     )
-    gdf = gd.from_pandas(pdf)
+    gdf = cudf.from_pandas(pdf)
 
     assert_eq(pdf._get_numeric_data(), gdf._get_numeric_data())
 
@@ -3167,7 +3165,7 @@ def test_shift(dtype, period, data_empty):
         else:
             data = gen_rand(dtype, 100000)
 
-    gdf = gd.DataFrame({"a": gd.Series(data, dtype=dtype)})
+    gdf = cudf.DataFrame({"a": cudf.Series(data, dtype=dtype)})
     pdf = pd.DataFrame({"a": pd.Series(data, dtype=dtype)})
 
     shifted_outcome = gdf.a.shift(period).fillna(0)
@@ -3192,7 +3190,7 @@ def test_diff(dtype, period, data_empty):
         else:
             data = gen_rand(dtype, 100000)
 
-    gdf = gd.DataFrame({"a": gd.Series(data, dtype=dtype)})
+    gdf = cudf.DataFrame({"a": cudf.Series(data, dtype=dtype)})
     pdf = pd.DataFrame({"a": pd.Series(data, dtype=dtype)})
 
     expected_outcome = pdf.a.diff(period)
@@ -3208,7 +3206,7 @@ def test_diff(dtype, period, data_empty):
 @pytest.mark.parametrize("nan_as_null", [True, False, None])
 def test_dataframe_isnull_isna(df, nan_as_null):
 
-    gdf = gd.DataFrame.from_pandas(df, nan_as_null=nan_as_null)
+    gdf = cudf.DataFrame.from_pandas(df, nan_as_null=nan_as_null)
 
     assert_eq(df.isnull(), gdf.isnull())
     assert_eq(df.isna(), gdf.isna())
@@ -3223,7 +3221,7 @@ def test_dataframe_isnull_isna(df, nan_as_null):
 @pytest.mark.parametrize("nan_as_null", [True, False, None])
 def test_dataframe_notna_notnull(df, nan_as_null):
 
-    gdf = gd.DataFrame.from_pandas(df, nan_as_null=nan_as_null)
+    gdf = cudf.DataFrame.from_pandas(df, nan_as_null=nan_as_null)
 
     assert_eq(df.notnull(), gdf.notnull())
     assert_eq(df.notna(), gdf.notna())
@@ -3236,12 +3234,12 @@ def test_dataframe_notna_notnull(df, nan_as_null):
 
 def test_ndim():
     pdf = pd.DataFrame({"x": range(5), "y": range(5, 10)})
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
     assert pdf.ndim == gdf.ndim
     assert pdf.x.ndim == gdf.x.ndim
 
     s = pd.Series(dtype="float64")
-    gs = gd.Series()
+    gs = cudf.Series()
     assert s.ndim == gs.ndim
 
 
@@ -3252,7 +3250,7 @@ def test_ndim():
         0,
         5,
         pd.Series([1, 4, 3, -6], index=["w", "x", "y", "z"]),
-        gd.Series([-4, -2, 12], index=["x", "y", "z"]),
+        cudf.Series([-4, -2, 12], index=["x", "y", "z"]),
         {"w": -1, "x": 15, "y": 2},
     ],
 )
@@ -3278,9 +3276,9 @@ def test_dataframe_round(decimals):
             "z": np.repeat([-0.6459412758761901], 10),
         }
     )
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
-    if isinstance(decimals, gd.Series):
+    if isinstance(decimals, cudf.Series):
         pdecimals = decimals.to_pandas()
     else:
         pdecimals = decimals
@@ -3333,13 +3331,13 @@ def test_all(data):
     # Pandas treats `None` in object type columns as True for some reason, so
     # replacing with `False`
     if np.array(data).ndim <= 1:
-        pdata = pd.Series(
-            data, dtype=None if len(data) else "float64"
-        ).replace([None], False)
-        gdata = gd.Series.from_pandas(pdata)
+        pdata = cudf.utils.utils.create_pandas_series(data=data).replace(
+            [None], False
+        )
+        gdata = cudf.Series.from_pandas(pdata)
     else:
         pdata = pd.DataFrame(data, columns=["a", "b"]).replace([None], False)
-        gdata = gd.DataFrame.from_pandas(pdata)
+        gdata = cudf.DataFrame.from_pandas(pdata)
 
         # test bool_only
         if pdata["b"].dtype == "bool":
@@ -3388,8 +3386,8 @@ def test_all(data):
 @pytest.mark.parametrize("axis", [0, 1])
 def test_any(data, axis):
     if np.array(data).ndim <= 1:
-        pdata = pd.Series(data, dtype=None if len(data) else "float64")
-        gdata = gd.Series.from_pandas(pdata)
+        pdata = cudf.utils.utils.create_pandas_series(data=data)
+        gdata = cudf.Series.from_pandas(pdata)
 
         if axis == 1:
             with pytest.raises(NotImplementedError):
@@ -3400,7 +3398,7 @@ def test_any(data, axis):
             assert_eq(got, expected)
     else:
         pdata = pd.DataFrame(data, columns=["a", "b"])
-        gdata = gd.DataFrame.from_pandas(pdata)
+        gdata = cudf.DataFrame.from_pandas(pdata)
 
         # test bool_only
         if pdata["b"].dtype == "bool":
@@ -3421,7 +3419,7 @@ def test_any(data, axis):
 @pytest.mark.parametrize("axis", [0, 1])
 def test_empty_dataframe_any(axis):
     pdf = pd.DataFrame({}, columns=["a", "b"])
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
     got = gdf.any(axis=axis)
     expected = pdf.any(axis=axis)
     assert_eq(got, expected, check_index_type=False)
@@ -3432,7 +3430,7 @@ def test_dataframe_sizeof(indexed):
     rows = int(1e6)
     index = list(i for i in range(rows)) if indexed else None
 
-    gdf = gd.DataFrame({"A": [8] * rows, "B": [32] * rows}, index=index)
+    gdf = cudf.DataFrame({"A": [8] * rows, "B": [32] * rows}, index=index)
 
     for c in gdf._data.columns:
         assert gdf._index.__sizeof__() == gdf._index.__sizeof__()
@@ -3449,19 +3447,19 @@ def test_dataframe_sizeof(indexed):
 @pytest.mark.parametrize("non_list_data", [123, "abc", "zyx", "rapids", 0.8])
 def test_create_dataframe_cols_empty_data(a, b, misc_data, non_list_data):
     expected = pd.DataFrame({"a": a})
-    actual = gd.DataFrame.from_pandas(expected)
+    actual = cudf.DataFrame.from_pandas(expected)
     expected["b"] = b
     actual["b"] = b
     assert_eq(actual, expected)
 
     expected = pd.DataFrame({"a": []})
-    actual = gd.DataFrame.from_pandas(expected)
+    actual = cudf.DataFrame.from_pandas(expected)
     expected["b"] = misc_data
     actual["b"] = misc_data
     assert_eq(actual, expected)
 
     expected = pd.DataFrame({"a": a})
-    actual = gd.DataFrame.from_pandas(expected)
+    actual = cudf.DataFrame.from_pandas(expected)
     expected["b"] = non_list_data
     actual["b"] = non_list_data
     assert_eq(actual, expected)
@@ -3469,7 +3467,7 @@ def test_create_dataframe_cols_empty_data(a, b, misc_data, non_list_data):
 
 def test_empty_dataframe_describe():
     pdf = pd.DataFrame({"a": [], "b": []})
-    gdf = gd.from_pandas(pdf)
+    gdf = cudf.from_pandas(pdf)
 
     expected = pdf.describe()
     actual = gdf.describe()
@@ -3478,75 +3476,77 @@ def test_empty_dataframe_describe():
 
 
 def test_as_column_types():
-    from cudf.core.column import column
-
-    col = column.as_column(gd.Series([]))
+    col = column.as_column(cudf.Series([]))
     assert_eq(col.dtype, np.dtype("float64"))
-    gds = gd.Series(col)
+    gds = cudf.Series(col)
     pds = pd.Series(pd.Series([], dtype="float64"))
 
     assert_eq(pds, gds)
 
-    col = column.as_column(gd.Series([]), dtype="float32")
+    col = column.as_column(cudf.Series([]), dtype="float32")
     assert_eq(col.dtype, np.dtype("float32"))
-    gds = gd.Series(col)
+    gds = cudf.Series(col)
     pds = pd.Series(pd.Series([], dtype="float32"))
 
     assert_eq(pds, gds)
 
-    col = column.as_column(gd.Series([]), dtype="str")
+    col = column.as_column(cudf.Series([]), dtype="str")
     assert_eq(col.dtype, np.dtype("object"))
-    gds = gd.Series(col)
+    gds = cudf.Series(col)
     pds = pd.Series(pd.Series([], dtype="str"))
 
     assert_eq(pds, gds)
 
-    col = column.as_column(gd.Series([]), dtype="object")
+    col = column.as_column(cudf.Series([]), dtype="object")
     assert_eq(col.dtype, np.dtype("object"))
-    gds = gd.Series(col)
+    gds = cudf.Series(col)
     pds = pd.Series(pd.Series([], dtype="object"))
 
     assert_eq(pds, gds)
 
     pds = pd.Series(np.array([1, 2, 3]), dtype="float32")
-    gds = gd.Series(column.as_column(np.array([1, 2, 3]), dtype="float32"))
+    gds = cudf.Series(column.as_column(np.array([1, 2, 3]), dtype="float32"))
 
     assert_eq(pds, gds)
 
     pds = pd.Series([1, 2, 3], dtype="float32")
-    gds = gd.Series([1, 2, 3], dtype="float32")
+    gds = cudf.Series([1, 2, 3], dtype="float32")
 
     assert_eq(pds, gds)
 
     pds = pd.Series([], dtype="float64")
-    gds = gd.Series(column.as_column(pds))
+    gds = cudf.Series(column.as_column(pds))
     assert_eq(pds, gds)
 
     pds = pd.Series([1, 2, 4], dtype="int64")
-    gds = gd.Series(column.as_column(gd.Series([1, 2, 4]), dtype="int64"))
+    gds = cudf.Series(column.as_column(cudf.Series([1, 2, 4]), dtype="int64"))
 
     assert_eq(pds, gds)
 
     pds = pd.Series([1.2, 18.0, 9.0], dtype="float32")
-    gds = gd.Series(
-        column.as_column(gd.Series([1.2, 18.0, 9.0]), dtype="float32")
+    gds = cudf.Series(
+        column.as_column(cudf.Series([1.2, 18.0, 9.0]), dtype="float32")
     )
 
     assert_eq(pds, gds)
 
     pds = pd.Series([1.2, 18.0, 9.0], dtype="str")
-    gds = gd.Series(column.as_column(gd.Series([1.2, 18.0, 9.0]), dtype="str"))
+    gds = cudf.Series(
+        column.as_column(cudf.Series([1.2, 18.0, 9.0]), dtype="str")
+    )
 
     assert_eq(pds, gds)
 
     pds = pd.Series(pd.Index(["1", "18", "9"]), dtype="int")
-    gds = gd.Series(gd.core.index.StringIndex(["1", "18", "9"]), dtype="int")
+    gds = cudf.Series(
+        cudf.core.index.StringIndex(["1", "18", "9"]), dtype="int"
+    )
 
     assert_eq(pds, gds)
 
 
 def test_one_row_head():
-    gdf = gd.DataFrame({"name": ["carl"], "score": [100]}, index=[123])
+    gdf = cudf.DataFrame({"name": ["carl"], "score": [100]}, index=[123])
     pdf = gdf.to_pandas()
 
     head_gdf = gdf.head()
@@ -3559,7 +3559,7 @@ def test_one_row_head():
 @pytest.mark.parametrize("as_dtype", NUMERIC_TYPES)
 def test_series_astype_numeric_to_numeric(dtype, as_dtype):
     psr = pd.Series([1, 2, 4, 3], dtype=dtype)
-    gsr = gd.from_pandas(psr)
+    gsr = cudf.from_pandas(psr)
     assert_eq(psr.astype(as_dtype), gsr.astype(as_dtype))
 
 
@@ -3567,9 +3567,9 @@ def test_series_astype_numeric_to_numeric(dtype, as_dtype):
 @pytest.mark.parametrize("as_dtype", NUMERIC_TYPES)
 def test_series_astype_numeric_to_numeric_nulls(dtype, as_dtype):
     data = [1, 2, None, 3]
-    sr = gd.Series(data, dtype=dtype)
+    sr = cudf.Series(data, dtype=dtype)
     got = sr.astype(as_dtype)
-    expect = gd.Series([1, 2, None, 3], dtype=as_dtype)
+    expect = cudf.Series([1, 2, None, 3], dtype=as_dtype)
     assert_eq(expect, got)
 
 
@@ -3587,7 +3587,7 @@ def test_series_astype_numeric_to_numeric_nulls(dtype, as_dtype):
 )
 def test_series_astype_numeric_to_other(dtype, as_dtype):
     psr = pd.Series([1, 2, 3], dtype=dtype)
-    gsr = gd.from_pandas(psr)
+    gsr = cudf.from_pandas(psr)
     assert_eq(psr.astype(as_dtype), gsr.astype(as_dtype))
 
 
@@ -3611,7 +3611,7 @@ def test_series_astype_string_to_other(as_dtype):
     else:
         data = ["1", "2", "3"]
     psr = pd.Series(data)
-    gsr = gd.from_pandas(psr)
+    gsr = cudf.from_pandas(psr)
     assert_eq(psr.astype(as_dtype), gsr.astype(as_dtype))
 
 
@@ -3628,7 +3628,7 @@ def test_series_astype_string_to_other(as_dtype):
 def test_series_astype_datetime_to_other(as_dtype):
     data = ["2001-01-01", "2002-02-02", "2001-01-05"]
     psr = pd.Series(data)
-    gsr = gd.from_pandas(psr)
+    gsr = cudf.from_pandas(psr)
     assert_eq(psr.astype(as_dtype), gsr.astype(as_dtype))
 
 
@@ -3644,7 +3644,7 @@ def test_series_astype_datetime_to_other(as_dtype):
 def test_series_astype_datetime_to_string(inp):
     dtype, expect = inp
     base_date = "2011-01-01"
-    sr = gd.Series([base_date], dtype=dtype)
+    sr = cudf.Series([base_date], dtype=dtype)
     got = sr.astype(str)[0]
     assert expect == got
 
@@ -3669,19 +3669,19 @@ def test_series_astype_categorical_to_other(as_dtype):
     else:
         data = [1, 2, 3, 1]
     psr = pd.Series(data, dtype="category")
-    gsr = gd.from_pandas(psr)
+    gsr = cudf.from_pandas(psr)
     assert_eq(psr.astype(as_dtype), gsr.astype(as_dtype))
 
 
 @pytest.mark.parametrize("ordered", [True, False])
 def test_series_astype_to_categorical_ordered(ordered):
     psr = pd.Series([1, 2, 3, 1], dtype="category")
-    gsr = gd.from_pandas(psr)
+    gsr = cudf.from_pandas(psr)
 
     ordered_dtype_pd = pd.CategoricalDtype(
         categories=[1, 2, 3], ordered=ordered
     )
-    ordered_dtype_gd = gd.CategoricalDtype.from_pandas(ordered_dtype_pd)
+    ordered_dtype_gd = cudf.CategoricalDtype.from_pandas(ordered_dtype_pd)
     assert_eq(
         psr.astype("int32").astype(ordered_dtype_pd).astype("int32"),
         gsr.astype("int32").astype(ordered_dtype_gd).astype("int32"),
@@ -3694,11 +3694,11 @@ def test_series_astype_cat_ordered_to_unordered(ordered):
     pd_to_dtype = pd.CategoricalDtype(
         categories=[1, 2, 3], ordered=not ordered
     )
-    gd_dtype = gd.CategoricalDtype.from_pandas(pd_dtype)
-    gd_to_dtype = gd.CategoricalDtype.from_pandas(pd_to_dtype)
+    gd_dtype = cudf.CategoricalDtype.from_pandas(pd_dtype)
+    gd_to_dtype = cudf.CategoricalDtype.from_pandas(pd_to_dtype)
 
     psr = pd.Series([1, 2, 3], dtype=pd_dtype)
-    gsr = gd.Series([1, 2, 3], dtype=gd_dtype)
+    gsr = cudf.Series([1, 2, 3], dtype=gd_dtype)
 
     expect = psr.astype(pd_to_dtype)
     got = gsr.astype(gd_to_dtype)
@@ -3710,62 +3710,63 @@ def test_series_astype_null_cases():
     data = [1, 2, None, 3]
 
     # numerical to other
-    assert_eq(gd.Series(data, dtype="str"), gd.Series(data).astype("str"))
+    assert_eq(cudf.Series(data, dtype="str"), cudf.Series(data).astype("str"))
 
     assert_eq(
-        gd.Series(data, dtype="category"), gd.Series(data).astype("category")
+        cudf.Series(data, dtype="category"),
+        cudf.Series(data).astype("category"),
     )
 
     assert_eq(
-        gd.Series(data, dtype="float32"),
-        gd.Series(data, dtype="int32").astype("float32"),
+        cudf.Series(data, dtype="float32"),
+        cudf.Series(data, dtype="int32").astype("float32"),
     )
 
     assert_eq(
-        gd.Series(data, dtype="float32"),
-        gd.Series(data, dtype="uint32").astype("float32"),
+        cudf.Series(data, dtype="float32"),
+        cudf.Series(data, dtype="uint32").astype("float32"),
     )
 
     assert_eq(
-        gd.Series(data, dtype="datetime64[ms]"),
-        gd.Series(data).astype("datetime64[ms]"),
+        cudf.Series(data, dtype="datetime64[ms]"),
+        cudf.Series(data).astype("datetime64[ms]"),
     )
 
     # categorical to other
     assert_eq(
-        gd.Series(data, dtype="str"),
-        gd.Series(data, dtype="category").astype("str"),
+        cudf.Series(data, dtype="str"),
+        cudf.Series(data, dtype="category").astype("str"),
     )
 
     assert_eq(
-        gd.Series(data, dtype="float32"),
-        gd.Series(data, dtype="category").astype("float32"),
+        cudf.Series(data, dtype="float32"),
+        cudf.Series(data, dtype="category").astype("float32"),
     )
 
     assert_eq(
-        gd.Series(data, dtype="datetime64[ms]"),
-        gd.Series(data, dtype="category").astype("datetime64[ms]"),
+        cudf.Series(data, dtype="datetime64[ms]"),
+        cudf.Series(data, dtype="category").astype("datetime64[ms]"),
     )
 
     # string to other
     assert_eq(
-        gd.Series([1, 2, None, 3], dtype="int32"),
-        gd.Series(["1", "2", None, "3"]).astype("int32"),
+        cudf.Series([1, 2, None, 3], dtype="int32"),
+        cudf.Series(["1", "2", None, "3"]).astype("int32"),
     )
 
     assert_eq(
-        gd.Series(
+        cudf.Series(
             ["2001-01-01", "2001-02-01", None, "2001-03-01"],
             dtype="datetime64[ms]",
         ),
-        gd.Series(["2001-01-01", "2001-02-01", None, "2001-03-01"]).astype(
+        cudf.Series(["2001-01-01", "2001-02-01", None, "2001-03-01"]).astype(
             "datetime64[ms]"
         ),
     )
 
     assert_eq(
-        gd.Series(["a", "b", "c", None], dtype="category").to_pandas(),
-        gd.Series(["a", "b", "c", None]).astype("category").to_pandas(),
+        cudf.Series(["a", "b", "c", None], dtype="category").to_pandas(),
+        cudf.Series(["a", "b", "c", None]).astype("category").to_pandas(),
     )
 
     # datetime to other
@@ -3776,20 +3777,21 @@ def test_series_astype_null_cases():
         "2001-03-01 00:00:00.000000",
     ]
     assert_eq(
-        gd.Series(data), gd.Series(data, dtype="datetime64[us]").astype("str"),
+        cudf.Series(data),
+        cudf.Series(data, dtype="datetime64[us]").astype("str"),
     )
 
     assert_eq(
         pd.Series(data, dtype="datetime64[ns]").astype("category"),
-        gd.from_pandas(pd.Series(data, dtype="datetime64[ns]")).astype(
+        cudf.from_pandas(pd.Series(data, dtype="datetime64[ns]")).astype(
             "category"
         ),
     )
 
 
 def test_series_astype_null_categorical():
-    sr = gd.Series([None, None, None], dtype="category")
-    expect = gd.Series([None, None, None], dtype="int32")
+    sr = cudf.Series([None, None, None], dtype="category")
+    expect = cudf.Series([None, None, None], dtype="int32")
     got = sr.astype("int32")
     assert_eq(expect, got)
 
@@ -3813,19 +3815,19 @@ def test_series_astype_null_categorical():
 )
 def test_create_dataframe_from_list_like(data):
     pdf = pd.DataFrame(data, index=["count", "mean", "std", "min"])
-    gdf = gd.DataFrame(data, index=["count", "mean", "std", "min"])
+    gdf = cudf.DataFrame(data, index=["count", "mean", "std", "min"])
 
     assert_eq(pdf, gdf)
 
     pdf = pd.DataFrame(data)
-    gdf = gd.DataFrame(data)
+    gdf = cudf.DataFrame(data)
 
     assert_eq(pdf, gdf)
 
 
 def test_create_dataframe_column():
     pdf = pd.DataFrame(columns=["a", "b", "c"], index=["A", "Z", "X"])
-    gdf = gd.DataFrame(columns=["a", "b", "c"], index=["A", "Z", "X"])
+    gdf = cudf.DataFrame(columns=["a", "b", "c"], index=["A", "Z", "X"])
 
     assert_eq(pdf, gdf)
 
@@ -3834,7 +3836,7 @@ def test_create_dataframe_column():
         columns=["a", "b", "c"],
         index=["A", "Z", "X"],
     )
-    gdf = gd.DataFrame(
+    gdf = cudf.DataFrame(
         {"a": [1, 2, 3], "b": [2, 3, 5]},
         columns=["a", "b", "c"],
         index=["A", "Z", "X"],
@@ -3854,8 +3856,8 @@ def test_create_dataframe_column():
     ],
 )
 def test_series_values_host_property(data):
-    pds = pd.Series(data, dtype=None if len(data) else "float64")
-    gds = gd.Series(data)
+    pds = cudf.utils.utils.create_pandas_series(data=data)
+    gds = cudf.Series(data)
 
     np.testing.assert_array_equal(pds.values, gds.values_host)
 
@@ -3877,8 +3879,8 @@ def test_series_values_host_property(data):
     ],
 )
 def test_series_values_property(data):
-    pds = pd.Series(data, dtype=None if len(data) else "float64")
-    gds = gd.Series(data)
+    pds = cudf.utils.utils.create_pandas_series(data=data)
+    gds = cudf.Series(data)
     gds_vals = gds.values
     assert isinstance(gds_vals, cupy.ndarray)
     np.testing.assert_array_equal(gds_vals.get(), pds.values)
@@ -3923,7 +3925,7 @@ def test_series_values_property(data):
 )
 def test_df_values_property(data):
     pdf = pd.DataFrame.from_dict(data)
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
     pmtr = pdf.values
     gmtr = gdf.values.get()
@@ -3939,7 +3941,7 @@ def test_value_counts():
         }
     )
 
-    gdf = gd.DataFrame(
+    gdf = cudf.DataFrame(
         {
             "numeric": [1, 2, 3, 4, 5, 6, 1, 2, 4] * 10,
             "alpha": ["u", "h", "d", "a", "m", "u", "h", "d", "a"] * 10,
@@ -3985,8 +3987,8 @@ def test_value_counts():
 )
 def test_isin_numeric(data, values):
     index = np.random.randint(0, 100, len(data))
-    psr = pd.Series(data, index=index, dtype=None if len(data) else "float64")
-    gsr = gd.Series.from_pandas(psr, nan_as_null=False)
+    psr = cudf.utils.utils.create_pandas_series(data=data, index=index)
+    gsr = cudf.Series.from_pandas(psr, nan_as_null=False)
 
     expected = psr.isin(values)
     got = gsr.isin(values)
@@ -4039,8 +4041,8 @@ def test_isin_numeric(data, values):
     ],
 )
 def test_isin_datetime(data, values):
-    psr = pd.Series(data, dtype=None if len(data) else "datetime64[ns]")
-    gsr = gd.Series.from_pandas(psr)
+    psr = cudf.utils.utils.create_pandas_series(data=data)
+    gsr = cudf.Series.from_pandas(psr)
 
     got = gsr.isin(values)
     expected = psr.isin(values)
@@ -4068,8 +4070,8 @@ def test_isin_datetime(data, values):
     ],
 )
 def test_isin_string(data, values):
-    psr = pd.Series(data, dtype=None if len(data) else "float64")
-    gsr = gd.Series.from_pandas(psr)
+    psr = cudf.utils.utils.create_pandas_series(data=data)
+    gsr = cudf.Series.from_pandas(psr)
 
     got = gsr.isin(values)
     expected = psr.isin(values)
@@ -4097,8 +4099,8 @@ def test_isin_string(data, values):
     ],
 )
 def test_isin_categorical(data, values):
-    psr = pd.Series(data, dtype=None if len(data) else "float64")
-    gsr = gd.Series.from_pandas(psr)
+    psr = cudf.utils.utils.create_pandas_series(data=data)
+    gsr = cudf.Series.from_pandas(psr)
 
     got = gsr.isin(values)
     expected = psr.isin(values)
@@ -4131,8 +4133,8 @@ def test_isin_categorical(data, values):
     ],
 )
 def test_isin_index(data, values):
-    psr = pd.Series(data, dtype=None if len(data) else "float64")
-    gsr = gd.Series.from_pandas(psr)
+    psr = cudf.utils.utils.create_pandas_series(data=data)
+    gsr = cudf.Series.from_pandas(psr)
 
     got = gsr.index.isin(values)
     expected = psr.index.isin(values)
@@ -4194,12 +4196,12 @@ def test_isin_index(data, values):
 )
 def test_isin_multiindex(data, values, level, err):
     pmdx = data
-    gmdx = gd.from_pandas(data)
+    gmdx = cudf.from_pandas(data)
 
     if err is None:
         expected = pmdx.isin(values, level=level)
         if isinstance(values, pd.MultiIndex):
-            values = gd.from_pandas(values)
+            values = cudf.from_pandas(values)
         got = gmdx.isin(values, level=level)
 
         assert_eq(got, expected)
@@ -4273,12 +4275,10 @@ def test_isin_multiindex(data, values, level, err):
     ],
 )
 def test_isin_dataframe(data, values):
-    from cudf.utils.dtypes import is_scalar
-
     pdf = data
-    gdf = gd.from_pandas(pdf)
+    gdf = cudf.from_pandas(pdf)
 
-    if is_scalar(values):
+    if cudf.utils.dtypes.is_scalar(values):
         assert_exceptions_equal(
             lfunc=pdf.isin,
             rfunc=gdf.isin,
@@ -4289,14 +4289,14 @@ def test_isin_dataframe(data, values):
         expected = pdf.isin(values)
 
         if isinstance(values, (pd.DataFrame, pd.Series)):
-            values = gd.from_pandas(values)
+            values = cudf.from_pandas(values)
 
         got = gdf.isin(values)
         assert_eq(got, expected)
 
 
 def test_constructor_properties():
-    df = gd.DataFrame()
+    df = cudf.DataFrame()
     key1 = "a"
     key2 = "b"
     val1 = np.array([123], dtype=np.float64)
@@ -4307,16 +4307,16 @@ def test_constructor_properties():
     # Correct use of _constructor (for DataFrame)
     assert_eq(df, df._constructor({key1: val1, key2: val2}))
 
-    # Correct use of _constructor (for gd.Series)
+    # Correct use of _constructor (for cudf.Series)
     assert_eq(df[key1], df[key2]._constructor(val1, name=key1))
 
     # Correct use of _constructor_sliced (for DataFrame)
     assert_eq(df[key1], df._constructor_sliced(val1, name=key1))
 
-    # Correct use of _constructor_expanddim (for gd.Series)
+    # Correct use of _constructor_expanddim (for cudf.Series)
     assert_eq(df, df[key2]._constructor_expanddim({key1: val1, key2: val2}))
 
-    # Incorrect use of _constructor_sliced (Raises for gd.Series)
+    # Incorrect use of _constructor_sliced (Raises for cudf.Series)
     with pytest.raises(NotImplementedError):
         df[key1]._constructor_sliced
 
@@ -4335,14 +4335,14 @@ def test_df_astype_numeric_to_all(dtype, as_dtype):
     elif "float" in dtype:
         data = [1.0, 2.0, None, 4.0, np.nan, -7.0]
 
-    gdf = gd.DataFrame()
+    gdf = cudf.DataFrame()
 
-    gdf["foo"] = gd.Series(data, dtype=dtype)
-    gdf["bar"] = gd.Series(data, dtype=dtype)
+    gdf["foo"] = cudf.Series(data, dtype=dtype)
+    gdf["bar"] = cudf.Series(data, dtype=dtype)
 
-    insert_data = gd.Series(data, dtype=dtype)
+    insert_data = cudf.Series(data, dtype=dtype)
 
-    expect = gd.DataFrame()
+    expect = cudf.DataFrame()
     expect["foo"] = insert_data.astype(as_dtype)
     expect["bar"] = insert_data.astype(as_dtype)
 
@@ -4375,11 +4375,11 @@ def test_df_astype_string_to_other(as_dtype):
     elif "float" in as_dtype:
         data = [1.0, 2.0, 3.0, np.nan]
 
-    insert_data = gd.Series.from_pandas(pd.Series(data, dtype="str"))
-    expect_data = gd.Series(data, dtype=as_dtype)
+    insert_data = cudf.Series.from_pandas(pd.Series(data, dtype="str"))
+    expect_data = cudf.Series(data, dtype=as_dtype)
 
-    gdf = gd.DataFrame()
-    expect = gd.DataFrame()
+    gdf = cudf.DataFrame()
+    expect = cudf.DataFrame()
 
     gdf["foo"] = insert_data
     gdf["bar"] = insert_data
@@ -4410,28 +4410,28 @@ def test_df_astype_datetime_to_other(as_dtype):
         None,
     ]
 
-    gdf = gd.DataFrame()
-    expect = gd.DataFrame()
+    gdf = cudf.DataFrame()
+    expect = cudf.DataFrame()
 
-    gdf["foo"] = gd.Series(data, dtype="datetime64[ms]")
-    gdf["bar"] = gd.Series(data, dtype="datetime64[ms]")
+    gdf["foo"] = cudf.Series(data, dtype="datetime64[ms]")
+    gdf["bar"] = cudf.Series(data, dtype="datetime64[ms]")
 
     if as_dtype == "int64":
-        expect["foo"] = gd.Series(
+        expect["foo"] = cudf.Series(
             [690595200000, 1102118400000, 1473724800000, None], dtype="int64"
         )
-        expect["bar"] = gd.Series(
+        expect["bar"] = cudf.Series(
             [690595200000, 1102118400000, 1473724800000, None], dtype="int64"
         )
     elif as_dtype == "str":
-        expect["foo"] = gd.Series(data, dtype="str")
-        expect["bar"] = gd.Series(data, dtype="str")
+        expect["foo"] = cudf.Series(data, dtype="str")
+        expect["bar"] = cudf.Series(data, dtype="str")
     elif as_dtype == "category":
-        expect["foo"] = gd.Series(gdf["foo"], dtype="category")
-        expect["bar"] = gd.Series(gdf["bar"], dtype="category")
+        expect["foo"] = cudf.Series(gdf["foo"], dtype="category")
+        expect["bar"] = cudf.Series(gdf["bar"], dtype="category")
     else:
-        expect["foo"] = gd.Series(data, dtype=as_dtype)
-        expect["bar"] = gd.Series(data, dtype=as_dtype)
+        expect["foo"] = cudf.Series(data, dtype=as_dtype)
+        expect["bar"] = cudf.Series(data, dtype=as_dtype)
 
     got = gdf.astype(as_dtype)
 
@@ -4460,7 +4460,7 @@ def test_df_astype_categorical_to_other(as_dtype):
     pdf = pd.DataFrame()
     pdf["foo"] = psr
     pdf["bar"] = psr
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
     assert_eq(pdf.astype(as_dtype), gdf.astype(as_dtype))
 
 
@@ -4470,12 +4470,12 @@ def test_df_astype_to_categorical_ordered(ordered):
     pdf = pd.DataFrame()
     pdf["foo"] = psr
     pdf["bar"] = psr
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
     ordered_dtype_pd = pd.CategoricalDtype(
         categories=[1, 2, 3], ordered=ordered
     )
-    ordered_dtype_gd = gd.CategoricalDtype.from_pandas(ordered_dtype_pd)
+    ordered_dtype_gd = cudf.CategoricalDtype.from_pandas(ordered_dtype_pd)
 
     assert_eq(
         pdf.astype(ordered_dtype_pd).astype("int32"),
@@ -4489,7 +4489,7 @@ def test_df_astype_to_categorical_ordered(ordered):
     + [("category", {"ordered": True}), ("category", {"ordered": False})],
 )
 def test_empty_df_astype(dtype, args):
-    df = gd.DataFrame()
+    df = cudf.DataFrame()
     kwargs = {}
     kwargs.update(args)
     assert_eq(df, df.astype(dtype=dtype, **kwargs))
@@ -4509,7 +4509,7 @@ def test_empty_df_astype(dtype, args):
     ],
 )
 def test_series_astype_error_handling(errors):
-    sr = gd.Series(["random", "words"])
+    sr = cudf.Series(["random", "words"])
     got = sr.astype("datetime64", errors=errors)
     assert_eq(sr, got)
 
@@ -4527,12 +4527,12 @@ def test_df_constructor_dtype(dtype):
     else:
         data = [1, 2, 3, None]
 
-    sr = gd.Series(data, dtype=dtype)
+    sr = cudf.Series(data, dtype=dtype)
 
-    expect = gd.DataFrame()
+    expect = cudf.DataFrame()
     expect["foo"] = sr
     expect["bar"] = sr
-    got = gd.DataFrame({"foo": data, "bar": data}, dtype=dtype)
+    got = cudf.DataFrame({"foo": data, "bar": data}, dtype=dtype)
 
     assert_eq(expect, got)
 
@@ -4540,31 +4540,31 @@ def test_df_constructor_dtype(dtype):
 @pytest.mark.parametrize(
     "data",
     [
-        gd.datasets.randomdata(
+        cudf.datasets.randomdata(
             nrows=10, dtypes={"a": "category", "b": int, "c": float, "d": int}
         ),
-        gd.datasets.randomdata(
+        cudf.datasets.randomdata(
             nrows=10, dtypes={"a": "category", "b": int, "c": float, "d": str}
         ),
-        gd.datasets.randomdata(
+        cudf.datasets.randomdata(
             nrows=10, dtypes={"a": bool, "b": int, "c": float, "d": str}
         ),
-        gd.DataFrame(),
-        gd.DataFrame({"a": [0, 1, 2], "b": [1, None, 3]}),
-        gd.DataFrame(
+        cudf.DataFrame(),
+        cudf.DataFrame({"a": [0, 1, 2], "b": [1, None, 3]}),
+        cudf.DataFrame(
             {
                 "a": [1, 2, 3, 4],
                 "b": [7, np.NaN, 9, 10],
                 "c": [np.NaN, np.NaN, np.NaN, np.NaN],
-                "d": gd.Series([None, None, None, None], dtype="int64"),
+                "d": cudf.Series([None, None, None, None], dtype="int64"),
                 "e": [100, None, 200, None],
-                "f": gd.Series([10, None, np.NaN, 11], nan_as_null=False),
+                "f": cudf.Series([10, None, np.NaN, 11], nan_as_null=False),
             }
         ),
-        gd.DataFrame(
+        cudf.DataFrame(
             {
                 "a": [10, 11, 12, 13, 14, 15],
-                "b": gd.Series(
+                "b": cudf.Series(
                     [10, None, np.NaN, 2234, None, np.NaN], nan_as_null=False
                 ),
             }
@@ -4593,18 +4593,18 @@ def test_rowwise_ops(data, op, skipna):
     "op", ["max", "min", "sum", "product", "mean", "var", "std"]
 )
 def test_rowwise_ops_nullable_dtypes_all_null(op):
-    gdf = gd.DataFrame(
+    gdf = cudf.DataFrame(
         {
             "a": [1, 2, 3, 4],
             "b": [7, np.NaN, 9, 10],
             "c": [np.NaN, np.NaN, np.NaN, np.NaN],
-            "d": gd.Series([None, None, None, None], dtype="int64"),
+            "d": cudf.Series([None, None, None, None], dtype="int64"),
             "e": [100, None, 200, None],
-            "f": gd.Series([10, None, np.NaN, 11], nan_as_null=False),
+            "f": cudf.Series([10, None, np.NaN, 11], nan_as_null=False),
         }
     )
 
-    expected = gd.Series([None, None, None, None], dtype="float64")
+    expected = cudf.Series([None, None, None, None], dtype="float64")
 
     if op in ("var", "std"):
         got = getattr(gdf, op)(axis=1, ddof=0, skipna=False)
@@ -4620,7 +4620,7 @@ def test_rowwise_ops_nullable_dtypes_all_null(op):
     [
         (
             "max",
-            gd.Series(
+            cudf.Series(
                 [10.0, None, np.NaN, 2234.0, None, np.NaN],
                 dtype="float64",
                 nan_as_null=False,
@@ -4628,7 +4628,7 @@ def test_rowwise_ops_nullable_dtypes_all_null(op):
         ),
         (
             "min",
-            gd.Series(
+            cudf.Series(
                 [10.0, None, np.NaN, 13.0, None, np.NaN],
                 dtype="float64",
                 nan_as_null=False,
@@ -4636,7 +4636,7 @@ def test_rowwise_ops_nullable_dtypes_all_null(op):
         ),
         (
             "sum",
-            gd.Series(
+            cudf.Series(
                 [20.0, None, np.NaN, 2247.0, None, np.NaN],
                 dtype="float64",
                 nan_as_null=False,
@@ -4644,7 +4644,7 @@ def test_rowwise_ops_nullable_dtypes_all_null(op):
         ),
         (
             "product",
-            gd.Series(
+            cudf.Series(
                 [100.0, None, np.NaN, 29042.0, None, np.NaN],
                 dtype="float64",
                 nan_as_null=False,
@@ -4652,7 +4652,7 @@ def test_rowwise_ops_nullable_dtypes_all_null(op):
         ),
         (
             "mean",
-            gd.Series(
+            cudf.Series(
                 [10.0, None, np.NaN, 1123.5, None, np.NaN],
                 dtype="float64",
                 nan_as_null=False,
@@ -4660,7 +4660,7 @@ def test_rowwise_ops_nullable_dtypes_all_null(op):
         ),
         (
             "var",
-            gd.Series(
+            cudf.Series(
                 [0.0, None, np.NaN, 1233210.25, None, np.NaN],
                 dtype="float64",
                 nan_as_null=False,
@@ -4668,7 +4668,7 @@ def test_rowwise_ops_nullable_dtypes_all_null(op):
         ),
         (
             "std",
-            gd.Series(
+            cudf.Series(
                 [0.0, None, np.NaN, 1110.5, None, np.NaN],
                 dtype="float64",
                 nan_as_null=False,
@@ -4677,10 +4677,10 @@ def test_rowwise_ops_nullable_dtypes_all_null(op):
     ],
 )
 def test_rowwise_ops_nullable_dtypes_partial_null(op, expected):
-    gdf = gd.DataFrame(
+    gdf = cudf.DataFrame(
         {
             "a": [10, 11, 12, 13, 14, 15],
-            "b": gd.Series(
+            "b": cudf.Series(
                 [10, None, np.NaN, 2234, None, np.NaN], nan_as_null=False,
             ),
         }
@@ -4698,38 +4698,44 @@ def test_rowwise_ops_nullable_dtypes_partial_null(op, expected):
 @pytest.mark.parametrize(
     "op,expected",
     [
-        ("max", gd.Series([10, None, None, 2234, None, 453], dtype="int64",),),
-        ("min", gd.Series([10, None, None, 13, None, 15], dtype="int64",),),
-        ("sum", gd.Series([20, None, None, 2247, None, 468], dtype="int64",),),
+        (
+            "max",
+            cudf.Series([10, None, None, 2234, None, 453], dtype="int64",),
+        ),
+        ("min", cudf.Series([10, None, None, 13, None, 15], dtype="int64",),),
+        (
+            "sum",
+            cudf.Series([20, None, None, 2247, None, 468], dtype="int64",),
+        ),
         (
             "product",
-            gd.Series([100, None, None, 29042, None, 6795], dtype="int64",),
+            cudf.Series([100, None, None, 29042, None, 6795], dtype="int64",),
         ),
         (
             "mean",
-            gd.Series(
+            cudf.Series(
                 [10.0, None, None, 1123.5, None, 234.0], dtype="float32",
             ),
         ),
         (
             "var",
-            gd.Series(
+            cudf.Series(
                 [0.0, None, None, 1233210.25, None, 47961.0], dtype="float32",
             ),
         ),
         (
             "std",
-            gd.Series(
+            cudf.Series(
                 [0.0, None, None, 1110.5, None, 219.0], dtype="float32",
             ),
         ),
     ],
 )
 def test_rowwise_ops_nullable_int_dtypes(op, expected):
-    gdf = gd.DataFrame(
+    gdf = cudf.DataFrame(
         {
             "a": [10, 11, None, 13, None, 15],
-            "b": gd.Series(
+            "b": cudf.Series(
                 [10, None, 323, 2234, None, 453], nan_as_null=False,
             ),
         }
@@ -4748,62 +4754,62 @@ def test_rowwise_ops_nullable_int_dtypes(op, expected):
     "data",
     [
         {
-            "t1": gd.Series(
+            "t1": cudf.Series(
                 ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
             ),
-            "t2": gd.Series(
+            "t2": cudf.Series(
                 ["1940-08-31 06:00:00", "2020-08-02 10:00:00"], dtype="<M8[ms]"
             ),
         },
         {
-            "t1": gd.Series(
+            "t1": cudf.Series(
                 ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
             ),
-            "t2": gd.Series(
+            "t2": cudf.Series(
                 ["1940-08-31 06:00:00", "2020-08-02 10:00:00"], dtype="<M8[ns]"
             ),
-            "t3": gd.Series(
+            "t3": cudf.Series(
                 ["1960-08-31 06:00:00", "2030-08-02 10:00:00"], dtype="<M8[s]"
             ),
         },
         {
-            "t1": gd.Series(
+            "t1": cudf.Series(
                 ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
             ),
-            "t2": gd.Series(
+            "t2": cudf.Series(
                 ["1940-08-31 06:00:00", "2020-08-02 10:00:00"], dtype="<M8[us]"
             ),
         },
         {
-            "t1": gd.Series(
+            "t1": cudf.Series(
                 ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
             ),
-            "t2": gd.Series(
+            "t2": cudf.Series(
                 ["1940-08-31 06:00:00", "2020-08-02 10:00:00"], dtype="<M8[ms]"
             ),
-            "i1": gd.Series([1001, 2002], dtype="int64"),
+            "i1": cudf.Series([1001, 2002], dtype="int64"),
         },
         {
-            "t1": gd.Series(
+            "t1": cudf.Series(
                 ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
             ),
-            "t2": gd.Series(["1940-08-31 06:00:00", None], dtype="<M8[ms]"),
-            "i1": gd.Series([1001, 2002], dtype="int64"),
+            "t2": cudf.Series(["1940-08-31 06:00:00", None], dtype="<M8[ms]"),
+            "i1": cudf.Series([1001, 2002], dtype="int64"),
         },
         {
-            "t1": gd.Series(
+            "t1": cudf.Series(
                 ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
             ),
-            "i1": gd.Series([1001, 2002], dtype="int64"),
-            "f1": gd.Series([-100.001, 123.456], dtype="float64"),
+            "i1": cudf.Series([1001, 2002], dtype="int64"),
+            "f1": cudf.Series([-100.001, 123.456], dtype="float64"),
         },
         {
-            "t1": gd.Series(
+            "t1": cudf.Series(
                 ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
             ),
-            "i1": gd.Series([1001, 2002], dtype="int64"),
-            "f1": gd.Series([-100.001, 123.456], dtype="float64"),
-            "b1": gd.Series([True, False], dtype="bool"),
+            "i1": cudf.Series([1001, 2002], dtype="int64"),
+            "f1": cudf.Series([-100.001, 123.456], dtype="float64"),
+            "b1": cudf.Series([True, False], dtype="bool"),
         },
     ],
 )
@@ -4811,7 +4817,7 @@ def test_rowwise_ops_nullable_int_dtypes(op, expected):
 @pytest.mark.parametrize("skipna", [True, False])
 def test_rowwise_ops_datetime_dtypes(data, op, skipna):
 
-    gdf = gd.DataFrame(data)
+    gdf = cudf.DataFrame(data)
 
     pdf = gdf.to_pandas()
 
@@ -4826,11 +4832,11 @@ def test_rowwise_ops_datetime_dtypes(data, op, skipna):
     [
         (
             {
-                "t1": gd.Series(
+                "t1": cudf.Series(
                     ["2020-08-01 09:00:00", "1920-05-01 10:30:00"],
                     dtype="<M8[ms]",
                 ),
-                "t2": gd.Series(
+                "t2": cudf.Series(
                     ["1940-08-31 06:00:00", None], dtype="<M8[ms]"
                 ),
             },
@@ -4839,11 +4845,11 @@ def test_rowwise_ops_datetime_dtypes(data, op, skipna):
         ),
         (
             {
-                "t1": gd.Series(
+                "t1": cudf.Series(
                     ["2020-08-01 09:00:00", "1920-05-01 10:30:00"],
                     dtype="<M8[ms]",
                 ),
-                "t2": gd.Series(
+                "t2": cudf.Series(
                     ["1940-08-31 06:00:00", None], dtype="<M8[ms]"
                 ),
             },
@@ -4852,11 +4858,11 @@ def test_rowwise_ops_datetime_dtypes(data, op, skipna):
         ),
         (
             {
-                "t1": gd.Series(
+                "t1": cudf.Series(
                     ["2020-08-01 09:00:00", "1920-05-01 10:30:00"],
                     dtype="<M8[ms]",
                 ),
-                "t2": gd.Series(
+                "t2": cudf.Series(
                     ["1940-08-31 06:00:00", None], dtype="<M8[ms]"
                 ),
             },
@@ -4867,7 +4873,7 @@ def test_rowwise_ops_datetime_dtypes(data, op, skipna):
 )
 def test_rowwise_ops_datetime_dtypes_2(data, op, skipna):
 
-    gdf = gd.DataFrame(data)
+    gdf = cudf.DataFrame(data)
 
     pdf = gdf.to_pandas()
 
@@ -4895,7 +4901,7 @@ def test_rowwise_ops_datetime_dtypes_2(data, op, skipna):
 )
 def test_rowwise_ops_datetime_dtypes_pdbug(data):
     pdf = pd.DataFrame(data)
-    gdf = gd.from_pandas(pdf)
+    gdf = cudf.from_pandas(pdf)
 
     expected = pdf.max(axis=1, skipna=False)
     got = gdf.max(axis=1, skipna=False)
@@ -4914,7 +4920,7 @@ def test_rowwise_ops_datetime_dtypes_pdbug(data):
 )
 def test_insert(data):
     pdf = pd.DataFrame.from_dict({"A": [1, 2, 3], "B": ["a", "b", "c"]})
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
     # insertion by index
 
@@ -4941,7 +4947,7 @@ def test_insert(data):
 
 
 def test_cov():
-    gdf = gd.datasets.randomdata(10)
+    gdf = cudf.datasets.randomdata(10)
     pdf = gdf.to_pandas()
 
     assert_eq(pdf.cov(), gdf.cov())
@@ -4954,7 +4960,7 @@ def test_cov_nans():
     pdf["b"] = [0.36403686, None, None, None, None]
     pdf["c"] = [None, None, None, 0.64882227, None]
     pdf["d"] = [None, -1.46863125, None, 1.22477948, -0.06031689]
-    gdf = gd.from_pandas(pdf)
+    gdf = cudf.from_pandas(pdf)
 
     assert_eq(pdf.cov(), gdf.cov())
 
@@ -4962,13 +4968,13 @@ def test_cov_nans():
 @pytest.mark.parametrize(
     "gsr",
     [
-        gd.Series([1, 2, 3]),
-        gd.Series([1, 2, 3], index=["a", "b", "c"]),
-        gd.Series([1, 2, 3], index=["a", "b", "d"]),
-        gd.Series([1, 2], index=["a", "b"]),
-        gd.Series([1, 2, 3], index=gd.core.index.RangeIndex(0, 3)),
+        cudf.Series([1, 2, 3]),
+        cudf.Series([1, 2, 3], index=["a", "b", "c"]),
+        cudf.Series([1, 2, 3], index=["a", "b", "d"]),
+        cudf.Series([1, 2], index=["a", "b"]),
+        cudf.Series([1, 2, 3], index=cudf.core.index.RangeIndex(0, 3)),
         pytest.param(
-            gd.Series([1, 2, 3, 4, 5], index=["a", "b", "d", "0", "12"]),
+            cudf.Series([1, 2, 3, 4, 5], index=["a", "b", "d", "0", "12"]),
             marks=pytest.mark.xfail,
         ),
     ],
@@ -4995,7 +5001,7 @@ def test_df_sr_binop(gsr, colnames, op):
     data = [[0, 2, 5], [3, None, 5], [6, 7, np.nan]]
     data = dict(zip(colnames, data))
 
-    gdf = gd.DataFrame(data)
+    gdf = cudf.DataFrame(data)
     pdf = pd.DataFrame.from_dict(data)
 
     psr = gsr.to_pandas()
@@ -5027,14 +5033,14 @@ def test_df_sr_binop(gsr, colnames, op):
     ],
 )
 @pytest.mark.parametrize(
-    "gsr", [gd.Series([1, 2, 3, 4, 5], index=["a", "b", "d", "0", "12"])]
+    "gsr", [cudf.Series([1, 2, 3, 4, 5], index=["a", "b", "d", "0", "12"])]
 )
 def test_df_sr_binop_col_order(gsr, op):
     colnames = [0, 1, 2]
     data = [[0, 2, 5], [3, None, 5], [6, 7, np.nan]]
     data = dict(zip(colnames, data))
 
-    gdf = gd.DataFrame(data)
+    gdf = cudf.DataFrame(data)
     pdf = pd.DataFrame.from_dict(data)
 
     psr = gsr.to_pandas()
@@ -5064,7 +5070,7 @@ def test_memory_usage(deep, index, set_index):
     if set_index:
         df = df.set_index(set_index)
 
-    gdf = gd.from_pandas(df)
+    gdf = cudf.from_pandas(df)
 
     if index and set_index is None:
 
@@ -5094,7 +5100,7 @@ def test_memory_usage_string():
             "B": np.random.choice(["apple", "banana", "orange"], rows),
         }
     )
-    gdf = gd.from_pandas(df)
+    gdf = cudf.from_pandas(df)
 
     # Check deep=False (should match pandas)
     assert gdf.B.memory_usage(deep=False, index=False) == df.B.memory_usage(
@@ -5121,7 +5127,7 @@ def test_memory_usage_cat():
         }
     )
     df["B"] = df.B.astype("category")
-    gdf = gd.from_pandas(df)
+    gdf = cudf.from_pandas(df)
 
     expected = (
         gdf.B._column.cat().categories.__sizeof__()
@@ -5136,7 +5142,7 @@ def test_memory_usage_cat():
 
 
 def test_memory_usage_list():
-    df = gd.DataFrame({"A": [[0, 1, 2, 3], [4, 5, 6], [7, 8], [9]]})
+    df = cudf.DataFrame({"A": [[0, 1, 2, 3], [4, 5, 6], [7, 8], [9]]})
     expected = (
         df.A._column.offsets._memory_usage()
         + df.A._column.elements._memory_usage()
@@ -5155,7 +5161,7 @@ def test_memory_usage_multi():
             "C": np.random.choice(np.arange(3, dtype="float64"), rows),
         }
     ).set_index(["B", "C"])
-    gdf = gd.from_pandas(df)
+    gdf = cudf.from_pandas(df)
 
     # Assume MultiIndex memory footprint is just that
     # of the underlying columns, levels, and codes
@@ -5182,7 +5188,7 @@ def test_memory_usage_multi():
     ],
 )
 def test_setitem_diff_size_list(list_input, key):
-    gdf = gd.datasets.randomdata(5)
+    gdf = cudf.datasets.randomdata(5)
     with pytest.raises(
         ValueError, match=("All values must be of equal length")
     ):
@@ -5192,9 +5198,9 @@ def test_setitem_diff_size_list(list_input, key):
 @pytest.mark.parametrize(
     "series_input",
     [
-        pytest.param(gd.Series([1, 2, 3, 4]), id="smaller_cudf"),
-        pytest.param(gd.Series([1, 2, 3, 4, 5, 6]), id="larger_cudf"),
-        pytest.param(gd.Series([1, 2, 3], index=[4, 5, 6]), id="index_cudf"),
+        pytest.param(cudf.Series([1, 2, 3, 4]), id="smaller_cudf"),
+        pytest.param(cudf.Series([1, 2, 3, 4, 5, 6]), id="larger_cudf"),
+        pytest.param(cudf.Series([1, 2, 3], index=[4, 5, 6]), id="index_cudf"),
         pytest.param(pd.Series([1, 2, 3, 4]), id="smaller_pandas"),
         pytest.param(pd.Series([1, 2, 3, 4, 5, 6]), id="larger_pandas"),
         pytest.param(pd.Series([1, 2, 3], index=[4, 5, 6]), id="index_pandas"),
@@ -5208,11 +5214,11 @@ def test_setitem_diff_size_list(list_input, key):
     ],
 )
 def test_setitem_diff_size_series(series_input, key):
-    gdf = gd.datasets.randomdata(5)
+    gdf = cudf.datasets.randomdata(5)
     pdf = gdf.to_pandas()
 
     pandas_input = series_input
-    if isinstance(pandas_input, gd.Series):
+    if isinstance(pandas_input, cudf.Series):
         pandas_input = pandas_input.to_pandas()
 
     expect = pdf
@@ -5231,7 +5237,7 @@ def test_setitem_diff_size_series(series_input, key):
 
 def test_tupleize_cols_False_set():
     pdf = pd.DataFrame()
-    gdf = gd.DataFrame()
+    gdf = cudf.DataFrame()
     pdf[("a", "b")] = [1]
     gdf[("a", "b")] = [1]
     assert_eq(pdf, gdf)
@@ -5240,14 +5246,14 @@ def test_tupleize_cols_False_set():
 
 def test_init_multiindex_from_dict():
     pdf = pd.DataFrame({("a", "b"): [1]})
-    gdf = gd.DataFrame({("a", "b"): [1]})
+    gdf = cudf.DataFrame({("a", "b"): [1]})
     assert_eq(pdf, gdf)
     assert_eq(pdf.columns, gdf.columns)
 
 
 def test_change_column_dtype_in_empty():
     pdf = pd.DataFrame({"a": [], "b": []})
-    gdf = gd.from_pandas(pdf)
+    gdf = cudf.from_pandas(pdf)
     assert_eq(pdf, gdf)
     pdf["b"] = pdf["b"].astype("int64")
     gdf["b"] = gdf["b"].astype("int64")
@@ -5255,13 +5261,11 @@ def test_change_column_dtype_in_empty():
 
 
 def test_dataframe_from_table_empty_index():
-    from cudf._lib.table import Table
-
-    df = gd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    df = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
     odict = df._data
-    tbl = Table(odict)
+    tbl = cudf._lib.table.Table(odict)
 
-    result = gd.DataFrame._from_table(tbl)  # noqa: F841
+    result = cudf.DataFrame._from_table(tbl)  # noqa: F841
 
 
 @pytest.mark.parametrize("dtype", ["int64", "str"])
@@ -5271,13 +5275,13 @@ def test_dataframe_from_dictionary_series_same_name_index(dtype):
     pd_series1 = pd.Series([1, 2, 3], index=pd_idx1)
     pd_series2 = pd.Series([1, 2, 3], index=pd_idx2)
 
-    gd_idx1 = gd.from_pandas(pd_idx1)
-    gd_idx2 = gd.from_pandas(pd_idx2)
-    gd_series1 = gd.Series([1, 2, 3], index=gd_idx1)
-    gd_series2 = gd.Series([1, 2, 3], index=gd_idx2)
+    gd_idx1 = cudf.from_pandas(pd_idx1)
+    gd_idx2 = cudf.from_pandas(pd_idx2)
+    gd_series1 = cudf.Series([1, 2, 3], index=gd_idx1)
+    gd_series2 = cudf.Series([1, 2, 3], index=gd_idx2)
 
     expect = pd.DataFrame({"a": pd_series1, "b": pd_series2})
-    got = gd.DataFrame({"a": gd_series1, "b": gd_series2})
+    got = cudf.DataFrame({"a": gd_series1, "b": gd_series2})
 
     if dtype == "str":
         # Pandas actually loses its index name erroneously here...
@@ -5301,7 +5305,7 @@ def test_dataframe_strided_slice(arg):
         {"Val": [10, 9, 8, 7, 6, 5, 4, 3, 2]},
         index=pd.MultiIndex.from_frame(mul),
     )
-    gdf = gd.DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
     expect = pdf[arg]
     got = gdf[arg]
@@ -5460,7 +5464,7 @@ def test_dataframe_strided_slice(arg):
 @pytest.mark.parametrize("inplace", [True, False])
 def test_df_sr_mask_where(data, condition, other, error, inplace):
     ps_where = data
-    gs_where = gd.from_pandas(data)
+    gs_where = cudf.from_pandas(data)
 
     ps_mask = ps_where.copy(deep=True)
     gs_mask = gs_where.copy(deep=True)
@@ -5474,13 +5478,13 @@ def test_df_sr_mask_where(data, condition, other, error, inplace):
         ps_condition = condition
 
     if type(condition).__module__.split(".")[0] == "pandas":
-        gs_condition = gd.from_pandas(condition)
+        gs_condition = cudf.from_pandas(condition)
     else:
         gs_condition = condition
 
     ps_other = other
     if type(other).__module__.split(".")[0] == "pandas":
-        gs_other = gd.from_pandas(other)
+        gs_other = cudf.from_pandas(other)
     else:
         gs_other = other
 
@@ -5701,17 +5705,17 @@ def test_df_sr_mask_where(data, condition, other, error, inplace):
 )
 def test_df_string_cat_types_mask_where(data, condition, other, has_cat):
     ps = data
-    gs = gd.from_pandas(data)
+    gs = cudf.from_pandas(data)
 
     ps_condition = condition
     if type(condition).__module__.split(".")[0] == "pandas":
-        gs_condition = gd.from_pandas(condition)
+        gs_condition = cudf.from_pandas(condition)
     else:
         gs_condition = condition
 
     ps_other = other
     if type(other).__module__.split(".")[0] == "pandas":
-        gs_other = gd.from_pandas(other)
+        gs_other = cudf.from_pandas(other)
     else:
         gs_other = other
 
@@ -5766,31 +5770,31 @@ def test_from_pandas_unsupported_types(data, expected_upcast_type, error):
     pdf = pd.DataFrame({"one_col": data})
     if error == NotImplementedError:
         with pytest.raises(error):
-            gd.from_pandas(data)
+            cudf.from_pandas(data)
 
         with pytest.raises(error):
-            gd.Series(data)
+            cudf.Series(data)
 
         with pytest.raises(error):
-            gd.from_pandas(pdf)
+            cudf.from_pandas(pdf)
 
         with pytest.raises(error):
-            gd.DataFrame(pdf)
+            cudf.DataFrame(pdf)
     else:
-        df = gd.from_pandas(data)
+        df = cudf.from_pandas(data)
 
         assert_eq(data, df, check_dtype=False)
         assert df.dtype == expected_upcast_type
 
-        df = gd.Series(data)
+        df = cudf.Series(data)
         assert_eq(data, df, check_dtype=False)
         assert df.dtype == expected_upcast_type
 
-        df = gd.from_pandas(pdf)
+        df = cudf.from_pandas(pdf)
         assert_eq(pdf, df, check_dtype=False)
         assert df["one_col"].dtype == expected_upcast_type
 
-        df = gd.DataFrame(pdf)
+        df = cudf.DataFrame(pdf)
         assert_eq(pdf, df, check_dtype=False)
         assert df["one_col"].dtype == expected_upcast_type
 
@@ -5803,7 +5807,7 @@ def test_from_pandas_nan_as_null(nan_as_null, index):
 
     if index is None:
         pdf = pd.DataFrame({"a": data, "b": data})
-        expected = gd.DataFrame(
+        expected = cudf.DataFrame(
             {
                 "a": column.as_column(data, nan_as_null=nan_as_null),
                 "b": column.as_column(data, nan_as_null=nan_as_null),
@@ -5811,13 +5815,13 @@ def test_from_pandas_nan_as_null(nan_as_null, index):
         )
     else:
         pdf = pd.DataFrame({"a": data, "b": data}).set_index(index)
-        expected = gd.DataFrame(
+        expected = cudf.DataFrame(
             {
                 "a": column.as_column(data, nan_as_null=nan_as_null),
                 "b": column.as_column(data, nan_as_null=nan_as_null),
             }
         )
-        expected = gd.DataFrame(
+        expected = cudf.DataFrame(
             {
                 "a": column.as_column(data, nan_as_null=nan_as_null),
                 "b": column.as_column(data, nan_as_null=nan_as_null),
@@ -5825,7 +5829,7 @@ def test_from_pandas_nan_as_null(nan_as_null, index):
         )
         expected = expected.set_index(index)
 
-    got = gd.from_pandas(pdf, nan_as_null=nan_as_null)
+    got = cudf.from_pandas(pdf, nan_as_null=nan_as_null)
 
     assert_eq(expected, got)
 
@@ -5836,15 +5840,15 @@ def test_from_pandas_for_series_nan_as_null(nan_as_null):
     data = [np.nan, 2.0, 3.0]
     psr = pd.Series(data)
 
-    expected = gd.Series(column.as_column(data, nan_as_null=nan_as_null))
-    got = gd.from_pandas(psr, nan_as_null=nan_as_null)
+    expected = cudf.Series(column.as_column(data, nan_as_null=nan_as_null))
+    got = cudf.from_pandas(psr, nan_as_null=nan_as_null)
 
     assert_eq(expected, got)
 
 
 @pytest.mark.parametrize("copy", [True, False])
 def test_df_series_dataframe_astype_copy(copy):
-    gdf = gd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
+    gdf = cudf.DataFrame({"col1": [1, 2], "col2": [3, 4]})
     pdf = gdf.to_pandas()
 
     assert_eq(
@@ -5853,7 +5857,7 @@ def test_df_series_dataframe_astype_copy(copy):
     )
     assert_eq(gdf, pdf)
 
-    gsr = gd.Series([1, 2])
+    gsr = cudf.Series([1, 2])
     psr = gsr.to_pandas()
 
     assert_eq(
@@ -5862,7 +5866,7 @@ def test_df_series_dataframe_astype_copy(copy):
     )
     assert_eq(gsr, psr)
 
-    gsr = gd.Series([1, 2])
+    gsr = cudf.Series([1, 2])
     psr = gsr.to_pandas()
 
     actual = gsr.astype(dtype="int64", copy=copy)
@@ -5876,7 +5880,7 @@ def test_df_series_dataframe_astype_copy(copy):
 
 @pytest.mark.parametrize("copy", [True, False])
 def test_df_series_dataframe_astype_dtype_dict(copy):
-    gdf = gd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
+    gdf = cudf.DataFrame({"col1": [1, 2], "col2": [3, 4]})
     pdf = gdf.to_pandas()
 
     assert_eq(
@@ -5885,7 +5889,7 @@ def test_df_series_dataframe_astype_dtype_dict(copy):
     )
     assert_eq(gdf, pdf)
 
-    gsr = gd.Series([1, 2])
+    gsr = cudf.Series([1, 2])
     psr = gsr.to_pandas()
 
     assert_eq(
@@ -5901,7 +5905,7 @@ def test_df_series_dataframe_astype_dtype_dict(copy):
         rfunc_args_and_kwargs=([], {"dtype": {"a": "float"}, "copy": copy}),
     )
 
-    gsr = gd.Series([1, 2])
+    gsr = cudf.Series([1, 2])
     psr = gsr.to_pandas()
 
     actual = gsr.astype({None: "int64"}, copy=copy)
@@ -5932,14 +5936,14 @@ def test_df_series_dataframe_astype_dtype_dict(copy):
 )
 def test_dataframe_init_1d_list(data, columns):
     expect = pd.DataFrame(data, columns=columns)
-    actual = gd.DataFrame(data, columns=columns)
+    actual = cudf.DataFrame(data, columns=columns)
 
     assert_eq(
         expect, actual, check_index_type=False if len(data) == 0 else True
     )
 
     expect = pd.DataFrame(data, columns=None)
-    actual = gd.DataFrame(data, columns=None)
+    actual = cudf.DataFrame(data, columns=None)
 
     assert_eq(
         expect, actual, check_index_type=False if len(data) == 0 else True
@@ -5996,23 +6000,23 @@ def test_dataframe_init_from_arrays_cols(data, cols, index):
 
     # verify with columns & index
     pdf = pd.DataFrame(pd_data, columns=cols, index=index)
-    gdf = gd.DataFrame(gd_data, columns=cols, index=index)
+    gdf = cudf.DataFrame(gd_data, columns=cols, index=index)
 
     assert_eq(pdf, gdf, check_dtype=False)
 
     # verify with columns
     pdf = pd.DataFrame(pd_data, columns=cols)
-    gdf = gd.DataFrame(gd_data, columns=cols)
+    gdf = cudf.DataFrame(gd_data, columns=cols)
 
     assert_eq(pdf, gdf, check_dtype=False)
 
     pdf = pd.DataFrame(pd_data)
-    gdf = gd.DataFrame(gd_data)
+    gdf = cudf.DataFrame(gd_data)
 
     assert_eq(pdf, gdf, check_dtype=False)
 
     if numba_data is not None:
-        gdf = gd.DataFrame(numba_data)
+        gdf = cudf.DataFrame(numba_data)
         assert_eq(pdf, gdf, check_dtype=False)
 
 
@@ -6047,7 +6051,7 @@ def test_dataframe_init_from_arrays_cols(data, cols, index):
 )
 def test_dataframe_assign_scalar(col_data, assign_val):
     pdf = pd.DataFrame({"a": col_data})
-    gdf = gd.DataFrame({"a": col_data})
+    gdf = cudf.DataFrame({"a": col_data})
 
     pdf["b"] = (
         cupy.asnumpy(assign_val)
@@ -6102,7 +6106,7 @@ def test_dataframe_assign_scalar_with_scalar_cols(col_data, assign_val):
         },
         index=["dummy_mandatory_index"],
     )
-    gdf = gd.DataFrame({"a": col_data}, index=["dummy_mandatory_index"])
+    gdf = cudf.DataFrame({"a": col_data}, index=["dummy_mandatory_index"])
 
     pdf["b"] = (
         cupy.asnumpy(assign_val)
@@ -6142,7 +6146,7 @@ def test_dataframe_info_basic():
         np.random.randn(10, 10),
         index=["a", "2", "3", "4", "5", "6", "7", "8", "100", "1111"],
     )
-    gd.from_pandas(df).info(buf=buffer, verbose=True)
+    cudf.from_pandas(df).info(buf=buffer, verbose=True)
     s = buffer.getvalue()
     assert str_cmp == s
 
@@ -6163,7 +6167,7 @@ def test_dataframe_info_verbose_mem_usage():
     memory usage: 56.0+ bytes
     """
     )
-    gd.from_pandas(df).info(buf=buffer, verbose=True)
+    cudf.from_pandas(df).info(buf=buffer, verbose=True)
     s = buffer.getvalue()
     assert str_cmp == s
 
@@ -6179,7 +6183,7 @@ def test_dataframe_info_verbose_mem_usage():
     memory usage: 56.0+ bytes
     """
     )
-    gd.from_pandas(df).info(buf=buffer, verbose=False)
+    cudf.from_pandas(df).info(buf=buffer, verbose=False)
     s = buffer.getvalue()
     assert str_cmp == s
 
@@ -6203,7 +6207,7 @@ def test_dataframe_info_verbose_mem_usage():
     memory usage: 91.0 bytes
     """
     )
-    gd.from_pandas(df).info(buf=buffer, verbose=True, memory_usage="deep")
+    cudf.from_pandas(df).info(buf=buffer, verbose=True, memory_usage="deep")
     s = buffer.getvalue()
     assert str_cmp == s
 
@@ -6214,7 +6218,7 @@ def test_dataframe_info_verbose_mem_usage():
     text_values = ["alpha", "beta", "gamma", "delta", "epsilon"]
     float_values = [0.0, 0.25, 0.5, 0.75, 1.0]
 
-    df = gd.DataFrame(
+    df = cudf.DataFrame(
         {
             "int_col": int_values,
             "text_col": text_values,
@@ -6248,7 +6252,7 @@ def test_dataframe_info_null_counts():
     text_values = ["alpha", "beta", "gamma", "delta", "epsilon"]
     float_values = [0.0, 0.25, 0.5, 0.75, 1.0]
 
-    df = gd.DataFrame(
+    df = cudf.DataFrame(
         {
             "int_col": int_values,
             "text_col": text_values,
@@ -6284,7 +6288,7 @@ def test_dataframe_info_null_counts():
     buffer.truncate(0)
     buffer.seek(0)
 
-    df = gd.DataFrame()
+    df = cudf.DataFrame()
 
     str_cmp = textwrap.dedent(
         """\
@@ -6299,7 +6303,7 @@ def test_dataframe_info_null_counts():
     buffer.truncate(0)
     buffer.seek(0)
 
-    df = gd.DataFrame(
+    df = cudf.DataFrame(
         {
             "a": [1, 2, 3, None, 10, 11, 12, None],
             "b": ["a", "b", "c", "sd", "sdf", "sd", None, None],
@@ -6446,28 +6450,30 @@ def test_cudf_isclose(data1, data2, rtol, atol):
     array1 = cupy.array(data1)
     array2 = cupy.array(data2)
 
-    expected = gd.Series(cupy.isclose(array1, array2, rtol=rtol, atol=atol))
+    expected = cudf.Series(cupy.isclose(array1, array2, rtol=rtol, atol=atol))
 
-    actual = gd.isclose(
-        gd.Series(data1), gd.Series(data2), rtol=rtol, atol=atol
+    actual = cudf.isclose(
+        cudf.Series(data1), cudf.Series(data2), rtol=rtol, atol=atol
     )
 
     assert_eq(expected, actual)
-    actual = gd.isclose(data1, data2, rtol=rtol, atol=atol)
+    actual = cudf.isclose(data1, data2, rtol=rtol, atol=atol)
 
     assert_eq(expected, actual)
 
-    actual = gd.isclose(
+    actual = cudf.isclose(
         cupy.array(data1), cupy.array(data2), rtol=rtol, atol=atol
     )
 
     assert_eq(expected, actual)
 
-    actual = gd.isclose(np.array(data1), np.array(data2), rtol=rtol, atol=atol)
+    actual = cudf.isclose(
+        np.array(data1), np.array(data2), rtol=rtol, atol=atol
+    )
 
     assert_eq(expected, actual)
 
-    actual = gd.isclose(
+    actual = cudf.isclose(
         pd.Series(data1), pd.Series(data2), rtol=rtol, atol=atol
     )
 
@@ -6534,58 +6540,60 @@ def test_cudf_isclose_nulls(data1, data2, equal_nan):
     array1 = cupy.array(data1)
     array2 = cupy.array(data2)
 
-    expected = gd.Series(cupy.isclose(array1, array2, equal_nan=equal_nan))
+    expected = cudf.Series(cupy.isclose(array1, array2, equal_nan=equal_nan))
 
-    actual = gd.isclose(
-        gd.Series(data1), gd.Series(data2), equal_nan=equal_nan
+    actual = cudf.isclose(
+        cudf.Series(data1), cudf.Series(data2), equal_nan=equal_nan
     )
     assert_eq(expected, actual, check_dtype=False)
-    actual = gd.isclose(data1, data2, equal_nan=equal_nan)
+    actual = cudf.isclose(data1, data2, equal_nan=equal_nan)
     assert_eq(expected, actual, check_dtype=False)
 
 
 def test_cudf_isclose_different_index():
-    s1 = gd.Series(
+    s1 = cudf.Series(
         [-1.9876543, -2.9876654, -3.9876543, -4.1234587, -5.23, -7.00001],
         index=[0, 1, 2, 3, 4, 5],
     )
-    s2 = gd.Series(
+    s2 = cudf.Series(
         [-1.9876543, -2.9876654, -7.00001, -4.1234587, -5.23, -3.9876543],
         index=[0, 1, 5, 3, 4, 2],
     )
 
-    expected = gd.Series([True] * 6, index=s1.index)
-    assert_eq(expected, gd.isclose(s1, s2))
+    expected = cudf.Series([True] * 6, index=s1.index)
+    assert_eq(expected, cudf.isclose(s1, s2))
 
-    s1 = gd.Series(
+    s1 = cudf.Series(
         [-1.9876543, -2.9876654, -3.9876543, -4.1234587, -5.23, -7.00001],
         index=[0, 1, 2, 3, 4, 5],
     )
-    s2 = gd.Series(
+    s2 = cudf.Series(
         [-1.9876543, -2.9876654, -7.00001, -4.1234587, -5.23, -3.9876543],
         index=[0, 1, 5, 10, 4, 2],
     )
 
-    expected = gd.Series([True, True, True, False, True, True], index=s1.index)
-    assert_eq(expected, gd.isclose(s1, s2))
+    expected = cudf.Series(
+        [True, True, True, False, True, True], index=s1.index
+    )
+    assert_eq(expected, cudf.isclose(s1, s2))
 
-    s1 = gd.Series(
+    s1 = cudf.Series(
         [-1.9876543, -2.9876654, -3.9876543, -4.1234587, -5.23, -7.00001],
         index=[100, 1, 2, 3, 4, 5],
     )
-    s2 = gd.Series(
+    s2 = cudf.Series(
         [-1.9876543, -2.9876654, -7.00001, -4.1234587, -5.23, -3.9876543],
         index=[0, 1, 100, 10, 4, 2],
     )
 
-    expected = gd.Series(
+    expected = cudf.Series(
         [False, True, True, False, True, False], index=s1.index
     )
-    assert_eq(expected, gd.isclose(s1, s2))
+    assert_eq(expected, cudf.isclose(s1, s2))
 
 
 def test_dataframe_to_dict_error():
-    df = gd.DataFrame({"a": [1, 2, 3], "b": [9, 5, 3]})
+    df = cudf.DataFrame({"a": [1, 2, 3], "b": [9, 5, 3]})
     with pytest.raises(
         TypeError,
         match=re.escape(
@@ -6649,7 +6657,7 @@ def test_dataframe_to_dict_error():
     ],
 )
 def test_dataframe_keys(df):
-    gdf = gd.from_pandas(df)
+    gdf = cudf.from_pandas(df)
 
     assert_eq(df.keys(), gdf.keys())
 
@@ -6674,7 +6682,7 @@ def test_dataframe_keys(df):
     ],
 )
 def test_series_keys(ps):
-    gds = gd.from_pandas(ps)
+    gds = cudf.from_pandas(ps)
 
     if len(ps) == 0 and not isinstance(ps.index, pd.RangeIndex):
         assert_eq(ps.keys().astype("float64"), gds.keys())
@@ -6750,8 +6758,8 @@ def test_dataframe_append_dataframe(df, other, sort, ignore_index):
     pdf = df
     other_pd = other
 
-    gdf = gd.from_pandas(df)
-    other_gd = gd.from_pandas(other)
+    gdf = cudf.from_pandas(df)
+    other_gd = cudf.from_pandas(other)
 
     expected = pdf.append(other_pd, sort=sort, ignore_index=ignore_index)
     actual = gdf.append(other_gd, sort=sort, ignore_index=ignore_index)
@@ -6808,9 +6816,9 @@ def test_dataframe_append_series_dict(df, other, sort):
     pdf = df
     other_pd = other
 
-    gdf = gd.from_pandas(df)
+    gdf = cudf.from_pandas(df)
     if isinstance(other, pd.Series):
-        other_gd = gd.from_pandas(other)
+        other_gd = cudf.from_pandas(other)
     else:
         other_gd = other
 
@@ -6826,8 +6834,8 @@ def test_dataframe_append_series_dict(df, other, sort):
 
 
 def test_dataframe_append_series_mixed_index():
-    df = gd.DataFrame({"first": [], "d": []})
-    sr = gd.Series([1, 2, 3, 4])
+    df = cudf.DataFrame({"first": [], "d": []})
+    sr = cudf.Series([1, 2, 3, 4])
 
     with pytest.raises(
         TypeError,
@@ -6965,9 +6973,10 @@ def test_dataframe_append_dataframe_lists(df, other, sort, ignore_index):
     pdf = df
     other_pd = other
 
-    gdf = gd.from_pandas(df)
+    gdf = cudf.from_pandas(df)
     other_gd = [
-        gd.from_pandas(o) if isinstance(o, pd.DataFrame) else o for o in other
+        cudf.from_pandas(o) if isinstance(o, pd.DataFrame) else o
+        for o in other
     ]
 
     expected = pdf.append(other_pd, sort=sort, ignore_index=ignore_index)
@@ -7026,9 +7035,10 @@ def test_dataframe_append_lists(df, other, sort, ignore_index):
     pdf = df
     other_pd = other
 
-    gdf = gd.from_pandas(df)
+    gdf = cudf.from_pandas(df)
     other_gd = [
-        gd.from_pandas(o) if isinstance(o, pd.DataFrame) else o for o in other
+        cudf.from_pandas(o) if isinstance(o, pd.DataFrame) else o
+        for o in other
     ]
 
     expected = pdf.append(other_pd, sort=sort, ignore_index=ignore_index)
@@ -7043,8 +7053,8 @@ def test_dataframe_append_lists(df, other, sort, ignore_index):
 
 
 def test_dataframe_append_error():
-    df = gd.DataFrame({"a": [1, 2, 3]})
-    ps = gd.Series([1, 2, 3])
+    df = cudf.DataFrame({"a": [1, 2, 3]})
+    ps = cudf.Series([1, 2, 3])
 
     with pytest.raises(
         TypeError,
@@ -7055,7 +7065,7 @@ def test_dataframe_append_error():
 
 
 def test_cudf_arrow_array_error():
-    df = gd.DataFrame({"a": [1, 2, 3]})
+    df = cudf.DataFrame({"a": [1, 2, 3]})
 
     with pytest.raises(
         TypeError,
@@ -7065,7 +7075,7 @@ def test_cudf_arrow_array_error():
     ):
         df.__arrow_array__()
 
-    sr = gd.Series([1, 2, 3])
+    sr = cudf.Series([1, 2, 3])
 
     with pytest.raises(
         TypeError,
@@ -7075,7 +7085,7 @@ def test_cudf_arrow_array_error():
     ):
         sr.__arrow_array__()
 
-    sr = gd.Series(["a", "b", "c"])
+    sr = cudf.Series(["a", "b", "c"])
     with pytest.raises(
         TypeError,
         match="Implicit conversion to a host PyArrow Array via __arrow_array__"
@@ -7101,7 +7111,7 @@ def test_dataframe_sample_basic(n, frac, replace, axis):
         },
         index=[1, 2, 3, 4, 5],
     )
-    df = gd.DataFrame.from_pandas(pdf)
+    df = cudf.DataFrame.from_pandas(pdf)
     random_state = 0
 
     try:
@@ -7151,7 +7161,7 @@ def test_dataframe_sample_basic(n, frac, replace, axis):
 @pytest.mark.parametrize("replace", [True, False])
 @pytest.mark.parametrize("random_state", [1, np.random.mtrand.RandomState(10)])
 def test_dataframe_reproducibility(replace, random_state):
-    df = gd.DataFrame({"a": cupy.arange(0, 1024)})
+    df = cudf.DataFrame({"a": cupy.arange(0, 1024)})
 
     expected = df.sample(1024, replace=replace, random_state=random_state)
     out = df.sample(1024, replace=replace, random_state=random_state)
@@ -7164,7 +7174,7 @@ def test_dataframe_reproducibility(replace, random_state):
 @pytest.mark.parametrize("replace", [True, False])
 def test_series_sample_basic(n, frac, replace):
     psr = pd.Series([1, 2, 3, 4, 5])
-    sr = gd.Series.from_pandas(psr)
+    sr = cudf.Series.from_pandas(psr)
     random_state = 0
 
     try:
@@ -7216,7 +7226,7 @@ def test_series_sample_basic(n, frac, replace):
 )
 def test_dataframe_empty(df):
     pdf = df
-    gdf = gd.from_pandas(pdf)
+    gdf = cudf.from_pandas(pdf)
 
     assert_eq(pdf.empty, gdf.empty)
 
@@ -7236,7 +7246,7 @@ def test_dataframe_empty(df):
 )
 def test_dataframe_size(df):
     pdf = df
-    gdf = gd.from_pandas(pdf)
+    gdf = cudf.from_pandas(pdf)
 
     assert_eq(pdf.size, gdf.size)
 
@@ -7253,7 +7263,7 @@ def test_dataframe_size(df):
 )
 def test_series_empty(ps):
     ps = ps
-    gs = gd.from_pandas(ps)
+    gs = cudf.from_pandas(ps)
 
     assert_eq(ps.empty, gs.empty)
 
@@ -7274,7 +7284,7 @@ def test_series_empty(ps):
 @pytest.mark.parametrize("columns", [["a"], ["another column name"], None])
 def test_dataframe_init_with_columns(data, columns):
     pdf = pd.DataFrame(data, columns=columns)
-    gdf = gd.DataFrame(data, columns=columns)
+    gdf = cudf.DataFrame(data, columns=columns)
 
     assert_eq(
         pdf,
@@ -7341,10 +7351,10 @@ def test_dataframe_init_with_columns(data, columns):
     "columns", [None, ["0"], [0], ["abc"], [144, 13], [2, 1, 0]]
 )
 def test_dataframe_init_from_series_list(data, ignore_dtype, columns):
-    gd_data = [gd.from_pandas(obj) for obj in data]
+    gd_data = [cudf.from_pandas(obj) for obj in data]
 
     expected = pd.DataFrame(data, columns=columns)
-    actual = gd.DataFrame(gd_data, columns=columns)
+    actual = cudf.DataFrame(gd_data, columns=columns)
 
     if ignore_dtype:
         assert_eq(expected.fillna(-1), actual.fillna(-1), check_dtype=False)
@@ -7425,10 +7435,10 @@ def test_dataframe_init_from_series_list(data, ignore_dtype, columns):
 def test_dataframe_init_from_series_list_with_index(
     data, ignore_dtype, index, columns
 ):
-    gd_data = [gd.from_pandas(obj) for obj in data]
+    gd_data = [cudf.from_pandas(obj) for obj in data]
 
     expected = pd.DataFrame(data, columns=columns, index=index)
-    actual = gd.DataFrame(gd_data, columns=columns, index=index)
+    actual = cudf.DataFrame(gd_data, columns=columns, index=index)
 
     if ignore_dtype:
         assert_eq(expected.fillna(-1), actual.fillna(-1), check_dtype=False)
@@ -7452,11 +7462,11 @@ def test_dataframe_init_from_series_list_with_index(
     ],
 )
 def test_dataframe_init_from_series_list_with_index_error(data, index):
-    gd_data = [gd.from_pandas(obj) for obj in data]
+    gd_data = [cudf.from_pandas(obj) for obj in data]
 
     assert_exceptions_equal(
         pd.DataFrame,
-        gd.DataFrame,
+        cudf.DataFrame,
         ([data], {"index": index}),
         ([gd_data], {"index": index}),
     )
@@ -7479,11 +7489,11 @@ def test_dataframe_init_from_series_list_with_index_error(data, index):
     ],
 )
 def test_dataframe_init_from_series_list_duplicate_index_error(data):
-    gd_data = [gd.from_pandas(obj) for obj in data]
+    gd_data = [cudf.from_pandas(obj) for obj in data]
 
     assert_exceptions_equal(
         lfunc=pd.DataFrame,
-        rfunc=gd.DataFrame,
+        rfunc=cudf.DataFrame,
         lfunc_args_and_kwargs=([], {"data": data}),
         rfunc_args_and_kwargs=([], {"data": gd_data}),
         check_exception_type=False,
@@ -7491,7 +7501,7 @@ def test_dataframe_init_from_series_list_duplicate_index_error(data):
 
 
 def test_dataframe_iterrows_itertuples():
-    df = gd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    df = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
 
     with pytest.raises(
         TypeError,
@@ -7519,7 +7529,7 @@ def test_dataframe_iterrows_itertuples():
 @pytest.mark.parametrize(
     "df",
     [
-        gd.DataFrame(
+        cudf.DataFrame(
             {
                 "a": [1, 2, 3],
                 "b": [10, 22, 33],
@@ -7527,7 +7537,7 @@ def test_dataframe_iterrows_itertuples():
                 "d": ["hello", "world", "hello"],
             }
         ),
-        gd.DataFrame(
+        cudf.DataFrame(
             {
                 "a": [1, 2, 3],
                 "b": ["hello", "world", "hello"],
@@ -7535,15 +7545,15 @@ def test_dataframe_iterrows_itertuples():
             }
         ),
         pytest.param(
-            gd.DataFrame(
+            cudf.DataFrame(
                 {
                     "int_data": [1, 2, 3],
                     "str_data": ["hello", "world", "hello"],
                     "float_data": [0.3234, 0.23432, 0.0],
-                    "timedelta_data": gd.Series(
+                    "timedelta_data": cudf.Series(
                         [1, 2, 1], dtype="timedelta64[ns]"
                     ),
-                    "datetime_data": gd.Series(
+                    "datetime_data": cudf.Series(
                         [1, 2, 1], dtype="datetime64[ns]"
                     ),
                 }
@@ -7553,18 +7563,18 @@ def test_dataframe_iterrows_itertuples():
             ),
         ),
         pytest.param(
-            gd.DataFrame(
+            cudf.DataFrame(
                 {
                     "int_data": [1, 2, 3],
                     "str_data": ["hello", "world", "hello"],
                     "float_data": [0.3234, 0.23432, 0.0],
-                    "timedelta_data": gd.Series(
+                    "timedelta_data": cudf.Series(
                         [1, 2, 1], dtype="timedelta64[ns]"
                     ),
-                    "datetime_data": gd.Series(
+                    "datetime_data": cudf.Series(
                         [1, 2, 1], dtype="datetime64[ns]"
                     ),
-                    "category_data": gd.Series(
+                    "category_data": cudf.Series(
                         ["a", "a", "b"], dtype="category"
                     ),
                 }
@@ -7596,7 +7606,7 @@ def test_describe_misc_include(df, include):
 @pytest.mark.parametrize(
     "df",
     [
-        gd.DataFrame(
+        cudf.DataFrame(
             {
                 "a": [1, 2, 3],
                 "b": [10, 22, 33],
@@ -7604,7 +7614,7 @@ def test_describe_misc_include(df, include):
                 "d": ["hello", "world", "hello"],
             }
         ),
-        gd.DataFrame(
+        cudf.DataFrame(
             {
                 "a": [1, 2, 3],
                 "b": ["hello", "world", "hello"],
@@ -7612,15 +7622,15 @@ def test_describe_misc_include(df, include):
             }
         ),
         pytest.param(
-            gd.DataFrame(
+            cudf.DataFrame(
                 {
                     "int_data": [1, 2, 3],
                     "str_data": ["hello", "world", "hello"],
                     "float_data": [0.3234, 0.23432, 0.0],
-                    "timedelta_data": gd.Series(
+                    "timedelta_data": cudf.Series(
                         [1, 2, 1], dtype="timedelta64[ns]"
                     ),
-                    "datetime_data": gd.Series(
+                    "datetime_data": cudf.Series(
                         [1, 2, 1], dtype="datetime64[ns]"
                     ),
                 }
@@ -7630,18 +7640,18 @@ def test_describe_misc_include(df, include):
             ),
         ),
         pytest.param(
-            gd.DataFrame(
+            cudf.DataFrame(
                 {
                     "int_data": [1, 2, 3],
                     "str_data": ["hello", "world", "hello"],
                     "float_data": [0.3234, 0.23432, 0.0],
-                    "timedelta_data": gd.Series(
+                    "timedelta_data": cudf.Series(
                         [1, 2, 1], dtype="timedelta64[ns]"
                     ),
-                    "datetime_data": gd.Series(
+                    "datetime_data": cudf.Series(
                         [1, 2, 1], dtype="datetime64[ns]"
                     ),
-                    "category_data": gd.Series(
+                    "category_data": cudf.Series(
                         ["a", "a", "b"], dtype="category"
                     ),
                 }
@@ -7672,28 +7682,28 @@ def test_describe_misc_exclude(df, exclude):
 @pytest.mark.parametrize(
     "df",
     [
-        gd.DataFrame({"a": [1, 2, 3]}),
-        gd.DataFrame(
+        cudf.DataFrame({"a": [1, 2, 3]}),
+        cudf.DataFrame(
             {"a": [1, 2, 3], "b": ["a", "z", "c"]}, index=["a", "z", "x"]
         ),
-        gd.DataFrame(
+        cudf.DataFrame(
             {
                 "a": [1, 2, 3, None, 2, 1, None],
                 "b": ["a", "z", "c", "a", "v", "z", "z"],
             }
         ),
-        gd.DataFrame({"a": [], "b": []}),
-        gd.DataFrame({"a": [None, None], "b": [None, None]}),
-        gd.DataFrame(
+        cudf.DataFrame({"a": [], "b": []}),
+        cudf.DataFrame({"a": [None, None], "b": [None, None]}),
+        cudf.DataFrame(
             {
                 "a": ["hello", "world", "rapids", "ai", "nvidia"],
-                "b": gd.Series([1, 21, 21, 11, 11], dtype="timedelta64[s]"),
+                "b": cudf.Series([1, 21, 21, 11, 11], dtype="timedelta64[s]"),
             }
         ),
-        gd.DataFrame(
+        cudf.DataFrame(
             {
                 "a": ["hello", None, "world", "rapids", None, "ai", "nvidia"],
-                "b": gd.Series(
+                "b": cudf.Series(
                     [1, 21, None, 11, None, 11, None], dtype="datetime64[s]"
                 ),
             }
@@ -7713,8 +7723,8 @@ def test_dataframe_mode(df, numeric_only, dropna):
 
 @pytest.mark.parametrize("lhs, rhs", [("a", "a"), ("a", "b"), (1, 1.0)])
 def test_equals_names(lhs, rhs):
-    lhs = gd.DataFrame({lhs: [1, 2]})
-    rhs = gd.DataFrame({rhs: [1, 2]})
+    lhs = cudf.DataFrame({lhs: [1, 2]})
+    rhs = cudf.DataFrame({rhs: [1, 2]})
 
     got = lhs.equals(rhs)
     expect = lhs.to_pandas().equals(rhs.to_pandas())
@@ -7723,8 +7733,8 @@ def test_equals_names(lhs, rhs):
 
 
 def test_equals_dtypes():
-    lhs = gd.DataFrame({"a": [1, 2.0]})
-    rhs = gd.DataFrame({"a": [1, 2]})
+    lhs = cudf.DataFrame({"a": [1, 2.0]})
+    rhs = cudf.DataFrame({"a": [1, 2]})
 
     got = lhs.equals(rhs)
     expect = lhs.to_pandas().equals(rhs.to_pandas())
@@ -7758,8 +7768,8 @@ def test_equals_dtypes():
     ],
 )
 def test_dataframe_error_equality(df1, df2, op):
-    gdf1 = gd.from_pandas(df1)
-    gdf2 = gd.from_pandas(df2)
+    gdf1 = cudf.from_pandas(df1)
+    gdf2 = cudf.from_pandas(df2)
 
     assert_exceptions_equal(op, op, ([df1, df2],), ([gdf1, gdf2],))
 
@@ -7768,10 +7778,10 @@ def test_dataframe_error_equality(df1, df2, op):
     "df,expected_pdf",
     [
         (
-            gd.DataFrame(
+            cudf.DataFrame(
                 {
-                    "a": gd.Series([1, 2, None, 3], dtype="uint8"),
-                    "b": gd.Series([23, None, None, 32], dtype="uint16"),
+                    "a": cudf.Series([1, 2, None, 3], dtype="uint8"),
+                    "b": cudf.Series([23, None, None, 32], dtype="uint16"),
                 }
             ),
             pd.DataFrame(
@@ -7784,10 +7794,10 @@ def test_dataframe_error_equality(df1, df2, op):
             ),
         ),
         (
-            gd.DataFrame(
+            cudf.DataFrame(
                 {
-                    "a": gd.Series([None, 123, None, 1], dtype="uint32"),
-                    "b": gd.Series(
+                    "a": cudf.Series([None, 123, None, 1], dtype="uint32"),
+                    "b": cudf.Series(
                         [234, 2323, 23432, None, None, 224], dtype="uint64"
                     ),
                 }
@@ -7805,10 +7815,14 @@ def test_dataframe_error_equality(df1, df2, op):
             ),
         ),
         (
-            gd.DataFrame(
+            cudf.DataFrame(
                 {
-                    "a": gd.Series([-10, 1, None, -1, None, 3], dtype="int8"),
-                    "b": gd.Series([111, None, 222, None, 13], dtype="int16"),
+                    "a": cudf.Series(
+                        [-10, 1, None, -1, None, 3], dtype="int8"
+                    ),
+                    "b": cudf.Series(
+                        [111, None, 222, None, 13], dtype="int16"
+                    ),
                 }
             ),
             pd.DataFrame(
@@ -7823,12 +7837,12 @@ def test_dataframe_error_equality(df1, df2, op):
             ),
         ),
         (
-            gd.DataFrame(
+            cudf.DataFrame(
                 {
-                    "a": gd.Series(
+                    "a": cudf.Series(
                         [11, None, 22, 33, None, 2, None, 3], dtype="int32"
                     ),
-                    "b": gd.Series(
+                    "b": cudf.Series(
                         [32431, None, None, 32322, 0, 10, -32324, None],
                         dtype="int64",
                     ),
@@ -7848,13 +7862,13 @@ def test_dataframe_error_equality(df1, df2, op):
             ),
         ),
         (
-            gd.DataFrame(
+            cudf.DataFrame(
                 {
-                    "a": gd.Series(
+                    "a": cudf.Series(
                         [True, None, False, None, False, True, True, False],
                         dtype="bool_",
                     ),
-                    "b": gd.Series(
+                    "b": cudf.Series(
                         [
                             "abc",
                             "a",
@@ -7867,7 +7881,7 @@ def test_dataframe_error_equality(df1, df2, op):
                         ],
                         dtype="object",
                     ),
-                    "c": gd.Series(
+                    "c": cudf.Series(
                         [0.1, None, 0.2, None, 3, 4, 1000, None],
                         dtype="float64",
                     ),
@@ -7920,14 +7934,14 @@ def test_dataframe_to_pandas_nullable_dtypes(df, expected_pdf):
 )
 def test_dataframe_init_from_list_of_dicts(data):
     expect = pd.DataFrame(data)
-    got = gd.DataFrame(data)
+    got = cudf.DataFrame(data)
 
     assert_eq(expect, got)
 
 
 def test_dataframe_pipe():
     pdf = pd.DataFrame()
-    gdf = gd.DataFrame()
+    gdf = cudf.DataFrame()
 
     def add_int_col(df, column):
         df[column] = df._constructor_sliced([10, 20, 30, 40])
@@ -7966,7 +7980,7 @@ def add_str_col(df, column):
 
 def test_dataframe_pipe_error():
     pdf = pd.DataFrame()
-    gdf = gd.DataFrame()
+    gdf = cudf.DataFrame()
 
     def custom_func(df, column):
         df[column] = df._constructor_sliced([10, 20, 30, 40])
@@ -7994,7 +8008,7 @@ def custom_func(df, column):
     ],
 )
 def test_dataframe_axis1_unsupported_ops(op):
-    df = gd.DataFrame({"a": [1, 2, 3], "b": [8, 9, 10]})
+    df = cudf.DataFrame({"a": [1, 2, 3], "b": [8, 9, 10]})
 
     with pytest.raises(
         NotImplementedError, match="Only axis=0 is currently supported."
@@ -8009,7 +8023,7 @@ def test_dataframe_from_pandas_duplicate_columns():
     with pytest.raises(
         ValueError, match="Duplicate column names are not allowed"
     ):
-        gd.from_pandas(pdf)
+        cudf.from_pandas(pdf)
 
 
 @pytest.mark.parametrize(
@@ -8030,7 +8044,7 @@ def test_dataframe_from_pandas_duplicate_columns():
         ["b", "a", "c"],
         [],
         pd.Index(["c", "a"]),
-        gd.Index(["c", "a"]),
+        cudf.Index(["c", "a"]),
         ["abc", "a"],
         ["column_not_exists1", "column_not_exists2"],
     ],
@@ -8051,13 +8065,13 @@ def assert_local_eq(actual, df, expected, host_columns):
         else:
             assert_eq(expected, actual, check_index_type=check_index_type)
 
-    gdf = gd.from_pandas(df)
+    gdf = cudf.from_pandas(df)
     host_columns = (
-        columns.to_pandas() if isinstance(columns, gd.Index) else columns
+        columns.to_pandas() if isinstance(columns, cudf.Index) else columns
     )
 
     expected = pd.DataFrame(df, columns=host_columns, index=index)
-    actual = gd.DataFrame(gdf, columns=columns, index=index)
+    actual = cudf.DataFrame(gdf, columns=columns, index=index)
 
     assert_local_eq(actual, df, expected, host_columns)
 
@@ -8096,7 +8110,7 @@ def assert_local_eq(actual, df, expected, host_columns):
 )
 def test_agg_for_dataframes(data, aggs):
     pdf = pd.DataFrame(data)
-    gdf = gd.DataFrame(data)
+    gdf = cudf.DataFrame(data)
 
     expect = pdf.agg(aggs).sort_index()
     got = gdf.agg(aggs).sort_index()
@@ -8105,7 +8119,7 @@ def test_agg_for_dataframes(data, aggs):
 
 @pytest.mark.parametrize("aggs", [{"a": np.sum, "b": np.min, "c": np.max}])
 def test_agg_for_unsupported_function(aggs):
-    gdf = gd.DataFrame(
+    gdf = cudf.DataFrame(
         {"a": [1, 2, 3], "b": [3.0, 4.0, 5.0], "c": [True, True, False]}
     )
 
@@ -8115,7 +8129,7 @@ def test_agg_for_unsupported_function(aggs):
 
 @pytest.mark.parametrize("aggs", ["asdf"])
 def test_agg_for_dataframe_with_invalid_function(aggs):
-    gdf = gd.DataFrame(
+    gdf = cudf.DataFrame(
         {"a": [1, 2, 3], "b": [3.0, 4.0, 5.0], "c": [True, True, False]}
     )
 
@@ -8128,7 +8142,7 @@ def test_agg_for_dataframe_with_invalid_function(aggs):
 
 @pytest.mark.parametrize("aggs", [{"a": "asdf"}])
 def test_agg_for_series_with_invalid_function(aggs):
-    gdf = gd.DataFrame(
+    gdf = cudf.DataFrame(
         {"a": [1, 2, 3], "b": [3.0, 4.0, 5.0], "c": [True, True, False]}
     )
 
@@ -8148,7 +8162,7 @@ def test_agg_for_series_with_invalid_function(aggs):
     ],
 )
 def test_agg_for_dataframe_with_string_columns(aggs):
-    gdf = gd.DataFrame(
+    gdf = cudf.DataFrame(
         {"a": ["m", "n", "o"], "b": ["t", "u", "v"], "c": ["x", "y", "z"]},
         index=["a", "b", "c"],
     )
@@ -8220,10 +8234,10 @@ def test_update_for_dataframes(
     data, data2, join, overwrite, filter_func, errors
 ):
     pdf = pd.DataFrame(data)
-    gdf = gd.DataFrame(data)
+    gdf = cudf.DataFrame(data)
 
     other_pd = pd.DataFrame(data2)
-    other_gd = gd.DataFrame(data2)
+    other_gd = cudf.DataFrame(data2)
 
     expect = pdf.update(other_pd, join, overwrite, filter_func, errors)
     got = gdf.update(other_gd, join, overwrite, filter_func, errors)
@@ -8235,8 +8249,8 @@ def test_update_for_dataframes(
     "join", ["right"],
 )
 def test_update_for_right_join(join):
-    gdf = gd.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
-    other_gd = gd.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
+    other_gd = cudf.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
 
     with pytest.raises(
         NotImplementedError, match="Only left join is supported"
@@ -8249,10 +8263,10 @@ def test_update_for_right_join(join):
 )
 def test_update_for_data_overlap(errors):
     pdf = pd.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
-    gdf = gd.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
 
     other_pd = pd.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
-    other_gd = gd.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
+    other_gd = cudf.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
 
     assert_exceptions_equal(
         lfunc=pdf.update,
@@ -8265,15 +8279,15 @@ def test_update_for_data_overlap(errors):
 @pytest.mark.parametrize(
     "gdf",
     [
-        gd.DataFrame({"a": [[1], [2], [3]]}),
-        gd.DataFrame(
+        cudf.DataFrame({"a": [[1], [2], [3]]}),
+        cudf.DataFrame(
             {
                 "left-a": [0, 1, 2],
                 "a": [[1], None, [3]],
                 "right-a": ["abc", "def", "ghi"],
             }
         ),
-        gd.DataFrame(
+        cudf.DataFrame(
             {
                 "left-a": [[], None, None],
                 "a": [[1], None, [3]],
@@ -8284,7 +8298,7 @@ def test_update_for_data_overlap(errors):
 )
 def test_dataframe_roundtrip_arrow_list_dtype(gdf):
     table = gdf.to_arrow()
-    expected = gd.DataFrame.from_arrow(table)
+    expected = cudf.DataFrame.from_arrow(table)
 
     assert_eq(gdf, expected)
 
@@ -8292,15 +8306,15 @@ def test_dataframe_roundtrip_arrow_list_dtype(gdf):
 @pytest.mark.parametrize(
     "gdf",
     [
-        gd.DataFrame({"a": [{"one": 3, "two": 4, "three": 10}]}),
-        gd.DataFrame(
+        cudf.DataFrame({"a": [{"one": 3, "two": 4, "three": 10}]}),
+        cudf.DataFrame(
             {
                 "left-a": [0, 1, 2],
                 "a": [{"x": 0.23, "y": 43}, None, {"x": 23.9, "y": 4.3}],
                 "right-a": ["abc", "def", "ghi"],
             }
         ),
-        gd.DataFrame(
+        cudf.DataFrame(
             {
                 "left-a": [{"a": 1}, None, None],
                 "a": [
@@ -8315,7 +8329,7 @@ def test_dataframe_roundtrip_arrow_list_dtype(gdf):
 )
 def test_dataframe_roundtrip_arrow_struct_dtype(gdf):
     table = gdf.to_arrow()
-    expected = gd.DataFrame.from_arrow(table)
+    expected = cudf.DataFrame.from_arrow(table)
 
     assert_eq(gdf, expected)
 
@@ -8323,7 +8337,7 @@ def test_dataframe_roundtrip_arrow_struct_dtype(gdf):
 def test_dataframe_setitem_cupy_array():
     np.random.seed(0)
     pdf = pd.DataFrame(np.random.randn(10, 2))
-    gdf = gd.from_pandas(pdf)
+    gdf = cudf.from_pandas(pdf)
 
     gpu_array = cupy.array([True, False] * 5)
     pdf[gpu_array.get()] = 1.5
@@ -8347,7 +8361,7 @@ def test_rename_for_level_MultiIndex_dataframe(data, index, level):
         index=pd.MultiIndex.from_tuples([(0, 1, 2), (1, 2, 3), (2, 3, 4)]),
     )
     pdf.index.names = ["x", "y", "z"]
-    gdf = gd.from_pandas(pdf)
+    gdf = cudf.from_pandas(pdf)
 
     expect = pdf.rename(index=index, level=level)
     got = gdf.rename(index=index, level=level)
@@ -8365,7 +8379,7 @@ def test_rename_for_level_MultiIndex_dataframe(data, index, level):
     "level", [0, 1],
 )
 def test_rename_for_level_MultiColumn_dataframe(data, columns, level):
-    gdf = gd.DataFrame(data)
+    gdf = cudf.DataFrame(data)
     gdf.columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)])
 
     pdf = gdf.to_pandas()
@@ -8377,7 +8391,7 @@ def test_rename_for_level_MultiColumn_dataframe(data, columns, level):
 
 
 def test_rename_for_level_RangeIndex_dataframe():
-    gdf = gd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
     pdf = gdf.to_pandas()
 
     expect = pdf.rename(columns={"a": "f"}, index={0: 3, 1: 4}, level=0)
@@ -8388,7 +8402,7 @@ def test_rename_for_level_RangeIndex_dataframe():
 
 @pytest.mark.xfail(reason="level=None not implemented yet")
 def test_rename_for_level_is_None_MC():
-    gdf = gd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
     gdf.columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)])
     pdf = gdf.to_pandas()
 
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index 1d313c9f464..7eb8fcd0aa4 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -403,7 +403,7 @@ def test_datetime_to_arrow(dtype):
 @pytest.mark.parametrize(
     "data",
     [
-        [],
+        pd.Series([], dtype="datetime64[ns]"),
         pd.Series(pd.date_range("2010-01-01", "2010-02-01")),
         pd.Series([None, None], dtype="datetime64[ns]"),
     ],
@@ -412,7 +412,7 @@ def test_datetime_to_arrow(dtype):
     "nulls", ["none", pytest.param("some", marks=pytest.mark.xfail)]
 )
 def test_datetime_unique(data, nulls):
-    psr = pd.Series(data, dtype=None if len(data) else "datetime64[ns]")
+    psr = data.copy()
 
     if len(data) > 0:
         if nulls == "some":
@@ -429,14 +429,14 @@ def test_datetime_unique(data, nulls):
 @pytest.mark.parametrize(
     "data",
     [
-        [],
+        pd.Series([], dtype="datetime64[ns]"),
         pd.Series(pd.date_range("2010-01-01", "2010-02-01")),
         pd.Series([None, None], dtype="datetime64[ns]"),
     ],
 )
 @pytest.mark.parametrize("nulls", ["none", "some"])
 def test_datetime_nunique(data, nulls):
-    psr = pd.Series(data, dtype=None if len(data) else "datetime64[ns]")
+    psr = data.copy()
 
     if len(data) > 0:
         if nulls == "some":
diff --git a/python/cudf/cudf/tests/test_dropna.py b/python/cudf/cudf/tests/test_dropna.py
index 92e70543cbe..b354f6b2f8a 100644
--- a/python/cudf/cudf/tests/test_dropna.py
+++ b/python/cudf/cudf/tests/test_dropna.py
@@ -22,7 +22,7 @@
 @pytest.mark.parametrize("inplace", [True, False])
 def test_dropna_series(data, nulls, inplace):
 
-    psr = pd.Series(data, dtype=None if len(data) else "float64")
+    psr = cudf.utils.utils.create_pandas_series(data=data)
 
     if len(data) > 0:
         if nulls == "one":
diff --git a/python/cudf/cudf/tests/test_duplicates.py b/python/cudf/cudf/tests/test_duplicates.py
index b4a45ed001b..eb8fb1db46f 100644
--- a/python/cudf/cudf/tests/test_duplicates.py
+++ b/python/cudf/cudf/tests/test_duplicates.py
@@ -56,7 +56,7 @@ def test_duplicated_with_misspelled_column_name(subset):
     ],
 )
 def test_drop_duplicates_series(data, keep):
-    pds = Series(data, dtype=None if len(data) else "float64")
+    pds = cudf.utils.utils.create_pandas_series(data)
     gds = cudf.from_pandas(pds)
 
     assert_df(pds.drop_duplicates(keep=keep), gds.drop_duplicates(keep=keep))
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index adb6bb33763..000bd87803d 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -961,7 +961,7 @@ def test_index_equal_misc(data, other):
     assert_eq(expected, actual)
 
     expected = pd_data.equals(
-        pd.Series(pd_other, dtype=None if len(pd_other) else "float64")
+        cudf.utils.utils.create_pandas_series(data=pd_other)
     )
     actual = gd_data.equals(cudf.Series(gd_other))
     assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py
index 1dd3a5c1c8c..9cf8b3ac239 100644
--- a/python/cudf/cudf/tests/test_repr.py
+++ b/python/cudf/cudf/tests/test_repr.py
@@ -159,7 +159,7 @@ def test_integer_dataframe(x):
 @settings(deadline=None)
 def test_integer_series(x):
     sr = cudf.Series(x)
-    ps = pd.Series(x, dtype=None if len(x) else "float64")
+    ps = cudf.utils.utils.create_pandas_series(data=x)
 
     assert sr.__repr__() == ps.__repr__()
 
@@ -176,7 +176,7 @@ def test_float_dataframe(x):
 @settings(deadline=None)
 def test_float_series(x):
     sr = cudf.Series(x, nan_as_null=False)
-    ps = pd.Series(x, dtype=None if len(x) else "float64")
+    ps = cudf.utils.utils.create_pandas_series(data=x)
     assert sr.__repr__() == ps.__repr__()
 
 
diff --git a/python/cudf/cudf/tests/test_rolling.py b/python/cudf/cudf/tests/test_rolling.py
index c701e863c35..27236910ebb 100644
--- a/python/cudf/cudf/tests/test_rolling.py
+++ b/python/cudf/cudf/tests/test_rolling.py
@@ -39,7 +39,7 @@ def test_rolling_series_basic(data, index, agg, nulls, center):
         elif nulls == "all":
             data = [np.nan] * len(data)
 
-    psr = pd.Series(data, index=index, dtype=None if len(data) else "float64")
+    psr = cudf.utils.utils.create_pandas_series(data=data, index=index)
     gsr = cudf.Series(psr)
     for window_size in range(1, len(data) + 1):
         for min_periods in range(1, window_size + 1):
@@ -214,7 +214,7 @@ def test_rolling_getitem_window():
 @pytest.mark.parametrize("center", [True, False])
 def test_rollling_series_numba_udf_basic(data, index, center):
 
-    psr = pd.Series(data, index=index, dtype=None if len(data) else "float64")
+    psr = cudf.utils.utils.create_pandas_series(data=data, index=index)
     gsr = cudf.from_pandas(psr)
 
     def some_func(A):
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index b6210be62f3..d62942c2364 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -384,7 +384,7 @@ def test_series_tolist(data):
     [[], [None, None], ["a"], ["a", "b", "c"] * 500, [1.0, 2.0, 0.3] * 57],
 )
 def test_series_size(data):
-    psr = pd.Series(data, dtype=None if len(data) else "float64")
+    psr = cudf.utils.utils.create_pandas_series(data=data)
     gsr = cudf.Series(data)
 
     assert_eq(psr.size, gsr.size)
@@ -482,7 +482,7 @@ def test_series_factorize(data, na_sentinel):
 @pytest.mark.parametrize(
     "data",
     [
-        [],
+        pd.Series([], dtype="datetime64[ns]"),
         pd.Series(pd.date_range("2010-01-01", "2010-02-01")),
         pd.Series([None, None], dtype="datetime64[ns]"),
     ],
@@ -491,7 +491,7 @@ def test_series_factorize(data, na_sentinel):
 @pytest.mark.parametrize("normalize", [True, False])
 @pytest.mark.parametrize("nulls", ["none", "some"])
 def test_series_datetime_value_counts(data, nulls, normalize, dropna):
-    psr = pd.Series(data, dtype=None if len(data) else "datetime64[ns]")
+    psr = data.copy()
 
     if len(data) > 0:
         if nulls == "one":
diff --git a/python/cudf/cudf/tests/test_stats.py b/python/cudf/cudf/tests/test_stats.py
index e8483e44462..1512c87d160 100644
--- a/python/cudf/cudf/tests/test_stats.py
+++ b/python/cudf/cudf/tests/test_stats.py
@@ -6,6 +6,7 @@
 import pandas as pd
 import pytest
 
+import cudf
 from cudf.core import Series
 from cudf.datasets import randomdata
 from cudf.tests.utils import assert_eq, assert_exceptions_equal
@@ -204,7 +205,7 @@ def test_approx_quantiles_int():
 @pytest.mark.parametrize("q", [[], 0.5, 1, 0.234, [0.345], [0.243, 0.5, 1]])
 def test_misc_quantiles(data, q):
 
-    pdf_series = pd.Series(data, dtype=None if len(data) else "float64")
+    pdf_series = cudf.utils.utils.create_pandas_series(data=data)
     gdf_series = Series(data)
 
     expected = pdf_series.quantile(q)
@@ -434,13 +435,13 @@ def test_df_corr():
 )
 @pytest.mark.parametrize("skipna", [True, False, None])
 def test_nans_stats(data, ops, skipna):
-    psr = pd.Series(data, dtype=None if len(data) else "float64")
+    psr = cudf.utils.utils.create_pandas_series(data=data)
     gsr = Series(data)
     assert_eq(
         getattr(psr, ops)(skipna=skipna), getattr(gsr, ops)(skipna=skipna)
     )
 
-    psr = pd.Series(data, dtype=None if len(data) else "float64")
+    psr = cudf.utils.utils.create_pandas_series(data=data)
     gsr = Series(data, nan_as_null=False)
     # Since there is no concept of `nan_as_null` in pandas,
     # nulls will be returned in the operations. So only
diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py
index 74622a8ceb2..b0a1aff4ada 100644
--- a/python/cudf/cudf/utils/utils.py
+++ b/python/cudf/cudf/utils/utils.py
@@ -1,4 +1,5 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+
 import functools
 from collections import OrderedDict
 from collections.abc import Sequence
@@ -622,3 +623,18 @@ def _categorical_scalar_broadcast_to(cat_scalar, size):
         offset=codes.offset,
         ordered=ordered,
     )
+
+
+def create_pandas_series(
+    data=None, index=None, dtype=None, name=None, copy=False, fastpath=False
+):
+    if (data is None or len(data) == 0) and dtype is None:
+        dtype = "float64"
+    return pd.Series(
+        data=data,
+        index=index,
+        dtype=dtype,
+        name=name,
+        copy=copy,
+        fastpath=fastpath,
+    )

From ea6173301efb8f04bac59b7a2d4893a62cb36a27 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 24 Feb 2021 10:23:25 -0800
Subject: [PATCH 16/35] remove is_scalar check

---
 python/cudf/cudf/core/column/categorical.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 99da0f9970c..0649f82256e 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -946,12 +946,7 @@ def unary_operator(self, unaryop: str):
         )
 
     def __setitem__(self, key, value):
-        if cudf.utils.dtypes.is_scalar(value):
-            new_values = [value]
-        else:
-            new_values = value
-
-        to_add_categories = cudf.Index(new_values).difference(self.categories)
+        to_add_categories = cudf.Index(value).difference(self.categories)
 
         if (
             len(to_add_categories)

From d8ca966f426fc70175068fcd59667b10423edf47 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 24 Feb 2021 12:35:40 -0800
Subject: [PATCH 17/35] version all pytest xfails

---
 python/cudf/cudf/tests/test_dataframe.py      |  29 +++-
 python/cudf/cudf/tests/test_index.py          |  32 ++++
 python/cudf/cudf/tests/test_indexing.py       |   6 +-
 python/cudf/cudf/tests/test_joining.py        |   6 +
 python/cudf/cudf/tests/test_json.py           |  14 +-
 python/cudf/cudf/tests/test_numerical.py      |   9 ++
 python/cudf/cudf/tests/test_reshape.py        |  12 +-
 python/cudf/cudf/tests/test_setitem.py        |   6 +
 python/cudf/cudf/tests/test_string.py         | 149 ++++++++++++------
 python/cudf/cudf/tests/test_timedelta.py      |  47 +++++-
 .../dask_cudf/dask_cudf/tests/test_groupby.py |  15 +-
 11 files changed, 267 insertions(+), 58 deletions(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index d792c62a247..b72b3338342 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -18,7 +18,7 @@
 from numba import cuda
 
 import cudf
-from cudf.core._compat import PANDAS_GE_110
+from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_120
 from cudf.core.column import column
 from cudf.tests import utils
 from cudf.tests.utils import (
@@ -1852,7 +1852,7 @@ def test_dataframe_min_count_ops(data, ops, skipna, min_count):
     psr = pd.DataFrame(data)
     gsr = cudf.DataFrame(data)
 
-    if psr.shape[0] * psr.shape[1] < min_count:
+    if PANDAS_GE_120 and psr.shape[0] * psr.shape[1] < min_count:
         pytest.xfail("https://github.com/pandas-dev/pandas/issues/39738")
 
     assert_eq(
@@ -4065,7 +4065,14 @@ def test_isin_datetime(data, values):
         ["this", "is"],
         [None, None, None],
         ["12", "14", "19"],
-        [12, 14, 19],
+        pytest.param(
+            [12, 14, 19],
+            marks=pytest.mark.xfail(
+                not PANDAS_GE_120,
+                reason="pandas's failure here seems like a bug(in < 1.2) "
+                "given the reverse succeeds",
+            ),
+        ),
         ["is", "this", "is", "this", "is"],
     ],
 )
@@ -4286,7 +4293,14 @@ def test_isin_dataframe(data, values):
             rfunc_args_and_kwargs=([values],),
         )
     else:
-        expected = pdf.isin(values)
+        try:
+            expected = pdf.isin(values)
+        except ValueError as e:
+            if str(e) == "Lengths must match.":
+                pytest.xfail(
+                    not PANDAS_GE_110,
+                    "https://github.com/pandas-dev/pandas/issues/34256",
+                )
 
         if isinstance(values, (pd.DataFrame, pd.Series)):
             values = cudf.from_pandas(values)
@@ -4906,7 +4920,12 @@ def test_rowwise_ops_datetime_dtypes_pdbug(data):
     expected = pdf.max(axis=1, skipna=False)
     got = gdf.max(axis=1, skipna=False)
 
-    assert_eq(got, expected)
+    if PANDAS_GE_120:
+        assert_eq(got, expected)
+    else:
+        # PANDAS BUG: https://github.com/pandas-dev/pandas/issues/36907
+        with pytest.raises(AssertionError, match="numpy array are different"):
+            assert_eq(got, expected)
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index 000bd87803d..127d198d61e 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -12,6 +12,7 @@
 
 import cudf
 from cudf.core import DataFrame
+from cudf.core._compat import PANDAS_GE_110
 from cudf.core.index import (
     CategoricalIndex,
     DatetimeIndex,
@@ -798,6 +799,17 @@ def test_index_difference(data, other, sort):
     gd_data = cudf.core.index.as_index(data)
     gd_other = cudf.core.index.as_index(other)
 
+    if (
+        gd_data.dtype.kind == "f"
+        and gd_other.dtype.kind != "f"
+        or (gd_data.dtype.kind != "f" and gd_other.dtype.kind == "f")
+    ):
+        pytest.mark.xfail(
+            condition=not PANDAS_GE_110,
+            reason="Bug in Pandas: "
+            "https://github.com/pandas-dev/pandas/issues/35217",
+        )
+
     expected = pd_data.difference(pd_other, sort=sort)
     actual = gd_data.difference(gd_other, sort=sort)
     assert_eq(expected, actual)
@@ -856,6 +868,15 @@ def test_index_equals(data, other):
     gd_data = cudf.core.index.as_index(data)
     gd_other = cudf.core.index.as_index(other)
 
+    if (
+        gd_data.dtype.kind == "f" or gd_other.dtype.kind == "f"
+    ) and cudf.utils.dtypes.is_mixed_with_object_dtype(gd_data, gd_other):
+        pytest.mark.xfail(
+            condition=not PANDAS_GE_110,
+            reason="Bug in Pandas: "
+            "https://github.com/pandas-dev/pandas/issues/35217",
+        )
+
     expected = pd_data.equals(pd_other)
     actual = gd_data.equals(gd_other)
     assert_eq(expected, actual)
@@ -902,6 +923,17 @@ def test_index_categories_equal(data, other):
     gd_data = cudf.core.index.as_index(data).astype("category")
     gd_other = cudf.core.index.as_index(other)
 
+    if (
+        gd_data.dtype.kind == "f"
+        and gd_other.dtype.kind != "f"
+        or (gd_data.dtype.kind != "f" and gd_other.dtype.kind == "f")
+    ):
+        pytest.mark.xfail(
+            condition=not PANDAS_GE_110,
+            reason="Bug in Pandas: "
+            "https://github.com/pandas-dev/pandas/issues/35217",
+        )
+
     expected = pd_data.equals(pd_other)
     actual = gd_data.equals(gd_other)
     assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py
index 6e33b1421c8..6921ac3fa35 100644
--- a/python/cudf/cudf/tests/test_indexing.py
+++ b/python/cudf/cudf/tests/test_indexing.py
@@ -9,7 +9,7 @@
 
 import cudf
 from cudf import DataFrame, Series
-from cudf.core._compat import PANDAS_GE_110
+from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_120
 from cudf.tests import utils
 from cudf.tests.utils import INTEGER_TYPES, assert_eq, assert_exceptions_equal
 
@@ -975,6 +975,10 @@ def test_series_setitem_datetime():
     assert_eq(psr, gsr)
 
 
+@pytest.mark.xfail(
+    condition=not PANDAS_GE_120,
+    reason="Pandas will coerce to object datatype here",
+)
 def test_series_setitem_datetime_coerced():
     psr = pd.Series(["2001", "2002", "2003"], dtype="datetime64[ns]")
     gsr = cudf.from_pandas(psr)
diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py
index d7735f9029f..f8af320eb84 100644
--- a/python/cudf/cudf/tests/test_joining.py
+++ b/python/cudf/cudf/tests/test_joining.py
@@ -6,6 +6,7 @@
 
 import cudf
 from cudf.core import DataFrame, Series
+from cudf.core._compat import PANDAS_GE_120
 from cudf.core.dtypes import CategoricalDtype
 from cudf.tests.utils import (
     INTEGER_TYPES,
@@ -540,6 +541,11 @@ def test_empty_joins(how, left_empty, right_empty):
     assert len(expected) == len(result)
 
 
+@pytest.mark.xfail(
+    condition=not PANDAS_GE_120,
+    reason="left_on/right_on produces undefined results with 0"
+    "index and is disabled",
+)
 def test_merge_left_index_zero():
     left = pd.DataFrame({"x": [1, 2, 3, 4, 5, 6]}, index=[0, 1, 2, 3, 4, 5])
     right = pd.DataFrame(
diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index 791598110df..e0a922f35fe 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -11,6 +11,7 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_GE_110
 from cudf.tests.utils import DATETIME_TYPES, NUMERIC_TYPES, assert_eq
 
 
@@ -133,7 +134,18 @@ def test_json_writer(tmpdir, pdf, gdf):
         assert os.path.exists(pdf_series_fname)
         assert os.path.exists(gdf_series_fname)
 
-        expect_series = pd.read_json(pdf_series_fname, typ="series")
+        try:
+            # xref 'https://github.com/pandas-dev/pandas/pull/33373'
+            expect_series = pd.read_json(pdf_series_fname, typ="series")
+        except TypeError as e:
+            if (
+                not PANDAS_GE_110
+                and str(e) == "<class 'bool'> is not convertible to datetime"
+            ):
+                continue
+            else:
+                raise e
+
         got_series = pd.read_json(gdf_series_fname, typ="series")
 
         assert_eq(expect_series, got_series)
diff --git a/python/cudf/cudf/tests/test_numerical.py b/python/cudf/cudf/tests/test_numerical.py
index a70dd7f4024..f4cdf619212 100644
--- a/python/cudf/cudf/tests/test_numerical.py
+++ b/python/cudf/cudf/tests/test_numerical.py
@@ -6,6 +6,7 @@
 
 import cudf
 from cudf import Series
+from cudf.core._compat import PANDAS_GE_100
 from cudf.tests.utils import assert_eq
 
 
@@ -89,6 +90,10 @@ def test_can_cast_safely_mixed_kind():
     assert not data.can_cast_safely(to_dtype)
 
 
+@pytest.mark.xfail(
+    condition=not PANDAS_GE_100,
+    reason="cuDF null <-> pd.NA compatibility not yet supported",
+)
 def test_to_pandas_nullable_integer():
     gsr_not_null = Series([1, 2, 3])
     gsr_has_null = Series([1, 2, None])
@@ -100,6 +105,10 @@ def test_to_pandas_nullable_integer():
     assert_eq(gsr_has_null.to_pandas(nullable=True), psr_has_null)
 
 
+@pytest.mark.xfail(
+    condition=not PANDAS_GE_100,
+    reason="cuDF null <-> pd.NA compatibility not yet supported",
+)
 def test_to_pandas_nullable_bool():
     gsr_not_null = Series([True, False, True])
     gsr_has_null = Series([True, False, None])
diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py
index 5e90c2348e4..a8196c596f0 100644
--- a/python/cudf/cudf/tests/test_reshape.py
+++ b/python/cudf/cudf/tests/test_reshape.py
@@ -9,6 +9,7 @@
 import cudf
 from cudf import melt as cudf_melt
 from cudf.core import DataFrame
+from cudf.core._compat import PANDAS_GE_120
 from cudf.tests.utils import (
     ALL_TYPES,
     DATETIME_TYPES,
@@ -73,7 +74,16 @@ def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype):
 @pytest.mark.parametrize("num_cols", [1, 2, 10])
 @pytest.mark.parametrize("num_rows", [1, 2, 1000])
 @pytest.mark.parametrize(
-    "dtype", list(NUMERIC_TYPES + DATETIME_TYPES) + ["str"],
+    "dtype",
+    list(NUMERIC_TYPES + DATETIME_TYPES)
+    + [
+        pytest.param(
+            "str",
+            marks=pytest.mark.xfail(
+                condition=not PANDAS_GE_120, reason="pandas bug"
+            ),
+        )
+    ],
 )
 @pytest.mark.parametrize("nulls", ["none", "some"])
 def test_df_stack(nulls, num_cols, num_rows, dtype):
diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py
index 2d4791f541c..57661511f5b 100644
--- a/python/cudf/cudf/tests/test_setitem.py
+++ b/python/cudf/cudf/tests/test_setitem.py
@@ -5,6 +5,7 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_GE_120
 from cudf.tests.utils import assert_eq, assert_exceptions_equal
 
 
@@ -19,6 +20,11 @@ def test_dataframe_setitem_bool_mask_scaler(df, arg, value):
     assert_eq(df, gdf)
 
 
+@pytest.mark.xfail(
+    condition=not PANDAS_GE_120,
+    reason="pandas incorrectly adds nulls with dataframes "
+    "but works fine with scalers",
+)
 def test_dataframe_setitem_scaler_bool():
     df = pd.DataFrame({"a": [1, 2, 3]})
     df[[True, False, True]] = pd.DataFrame({"a": [-1, -2]})
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index f98f897ef72..13501d97405 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -13,6 +13,7 @@
 import cudf
 from cudf import concat
 from cudf.core import DataFrame, Series
+from cudf.core._compat import PANDAS_GE_110
 from cudf.core.column.string import StringColumn
 from cudf.core.index import StringIndex, as_index
 from cudf.tests.utils import (
@@ -341,8 +342,20 @@ def _cat_convert_seq_to_cudf(others):
         ("f", "g", "h", "i", "j"),
         pd.Series(["f", "g", "h", "i", "j"]),
         pd.Series(["AbC", "de", "FGHI", "j", "kLm"]),
-        pd.Index(["f", "g", "h", "i", "j"]),
-        pd.Index(["AbC", "de", "FGHI", "j", "kLm"]),
+        pytest.param(
+            pd.Index(["f", "g", "h", "i", "j"]),
+            marks=pytest.mark.xfail(
+                condition=not PANDAS_GE_110,
+                reason="https://github.com/pandas-dev/pandas/issues/33436",
+            ),
+        ),
+        pytest.param(
+            pd.Index(["AbC", "de", "FGHI", "j", "kLm"]),
+            marks=pytest.mark.xfail(
+                condition=not PANDAS_GE_110,
+                reason="https://github.com/pandas-dev/pandas/issues/33436",
+            ),
+        ),
         (
             np.array(["f", "g", "h", "i", "j"]),
             np.array(["f", "g", "h", "i", "j"]),
@@ -367,26 +380,38 @@ def _cat_convert_seq_to_cudf(others):
             pd.Series(["f", "g", "h", "i", "j"]),
             np.array(["f", "g", "h", "i", "j"]),
         ),
-        (
-            pd.Series(["f", "g", "h", "i", "j"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Series(["f", "g", "h", "i", "j"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Index(["1", "2", "3", "4", "5"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Index(["f", "g", "h", "i", "j"]),
+        pytest.param(
+            (
+                pd.Series(["f", "g", "h", "i", "j"]),
+                np.array(["f", "a", "b", "f", "a"]),
+                pd.Series(["f", "g", "h", "i", "j"]),
+                np.array(["f", "a", "b", "f", "a"]),
+                np.array(["f", "a", "b", "f", "a"]),
+                pd.Index(["1", "2", "3", "4", "5"]),
+                np.array(["f", "a", "b", "f", "a"]),
+                pd.Index(["f", "g", "h", "i", "j"]),
+            ),
+            marks=pytest.mark.xfail(
+                condition=not PANDAS_GE_110,
+                reason="https://github.com/pandas-dev/pandas/issues/33436",
+            ),
+        ),
+        pytest.param(
+            [
+                pd.Index(["f", "g", "h", "i", "j"]),
+                np.array(["f", "a", "b", "f", "a"]),
+                pd.Series(["f", "g", "h", "i", "j"]),
+                np.array(["f", "a", "b", "f", "a"]),
+                np.array(["f", "a", "b", "f", "a"]),
+                pd.Index(["f", "g", "h", "i", "j"]),
+                np.array(["f", "a", "b", "f", "a"]),
+                pd.Index(["f", "g", "h", "i", "j"]),
+            ],
+            marks=pytest.mark.xfail(
+                condition=not PANDAS_GE_110,
+                reason="https://github.com/pandas-dev/pandas/issues/33436",
+            ),
         ),
-        [
-            pd.Index(["f", "g", "h", "i", "j"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Series(["f", "g", "h", "i", "j"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Index(["f", "g", "h", "i", "j"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Index(["f", "g", "h", "i", "j"]),
-        ],
         [
             pd.Series(["hello", "world", "abc", "xyz", "pqr"]),
             pd.Series(["abc", "xyz", "hello", "pqr", "world"]),
@@ -488,8 +513,20 @@ def test_string_cat(ps_gs, others, sep, na_rep, index):
         ("f", "g", "h", "i", "j"),
         pd.Series(["f", "g", "h", "i", "j"]),
         pd.Series(["AbC", "de", "FGHI", "j", "kLm"]),
-        pd.Index(["f", "g", "h", "i", "j"]),
-        pd.Index(["AbC", "de", "FGHI", "j", "kLm"]),
+        pytest.param(
+            pd.Index(["f", "g", "h", "i", "j"]),
+            marks=pytest.mark.xfail(
+                condition=not PANDAS_GE_110,
+                reason="https://github.com/pandas-dev/pandas/issues/33436",
+            ),
+        ),
+        pytest.param(
+            pd.Index(["AbC", "de", "FGHI", "j", "kLm"]),
+            marks=pytest.mark.xfail(
+                condition=not PANDAS_GE_110,
+                reason="https://github.com/pandas-dev/pandas/issues/33436",
+            ),
+        ),
         (
             np.array(["f", "g", "h", "i", "j"]),
             np.array(["f", "g", "h", "i", "j"]),
@@ -502,26 +539,38 @@ def test_string_cat(ps_gs, others, sep, na_rep, index):
             pd.Series(["f", "g", "h", "i", "j"]),
             pd.Series(["f", "g", "h", "i", "j"]),
         ],
-        (
-            pd.Series(["f", "g", "h", "i", "j"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Series(["f", "g", "h", "i", "j"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Index(["1", "2", "3", "4", "5"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Index(["f", "g", "h", "i", "j"]),
+        pytest.param(
+            (
+                pd.Series(["f", "g", "h", "i", "j"]),
+                np.array(["f", "a", "b", "f", "a"]),
+                pd.Series(["f", "g", "h", "i", "j"]),
+                np.array(["f", "a", "b", "f", "a"]),
+                np.array(["f", "a", "b", "f", "a"]),
+                pd.Index(["1", "2", "3", "4", "5"]),
+                np.array(["f", "a", "b", "f", "a"]),
+                pd.Index(["f", "g", "h", "i", "j"]),
+            ),
+            marks=pytest.mark.xfail(
+                condition=not PANDAS_GE_110,
+                reason="https://github.com/pandas-dev/pandas/issues/33436",
+            ),
+        ),
+        pytest.param(
+            [
+                pd.Index(["f", "g", "h", "i", "j"]),
+                np.array(["f", "a", "b", "f", "a"]),
+                pd.Series(["f", "g", "h", "i", "j"]),
+                np.array(["f", "a", "b", "f", "a"]),
+                np.array(["f", "a", "b", "f", "a"]),
+                pd.Index(["f", "g", "h", "i", "j"]),
+                np.array(["f", "a", "b", "f", "a"]),
+                pd.Index(["f", "g", "h", "i", "j"]),
+            ],
+            marks=pytest.mark.xfail(
+                condition=not PANDAS_GE_110,
+                reason="https://github.com/pandas-dev/pandas/issues/33436",
+            ),
         ),
-        [
-            pd.Index(["f", "g", "h", "i", "j"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Series(["f", "g", "h", "i", "j"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Index(["f", "g", "h", "i", "j"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Index(["f", "g", "h", "i", "j"]),
-        ],
         [
             pd.Series(
                 ["hello", "world", "abc", "xyz", "pqr"],
@@ -580,8 +629,20 @@ def test_string_index_str_cat(data, others, sep, na_rep, name):
         None,
         ["f", "g", "h", "i", "j"],
         pd.Series(["AbC", "de", "FGHI", "j", "kLm"]),
-        pd.Index(["f", "g", "h", "i", "j"]),
-        pd.Index(["AbC", "de", "FGHI", "j", "kLm"]),
+        pytest.param(
+            pd.Index(["f", "g", "h", "i", "j"]),
+            marks=pytest.mark.xfail(
+                condition=not PANDAS_GE_110,
+                reason="https://github.com/pandas-dev/pandas/issues/33436",
+            ),
+        ),
+        pytest.param(
+            pd.Index(["AbC", "de", "FGHI", "j", "kLm"]),
+            marks=pytest.mark.xfail(
+                condition=not PANDAS_GE_110,
+                reason="https://github.com/pandas-dev/pandas/issues/33436",
+            ),
+        ),
         [
             np.array(["f", "g", "h", "i", "j"]),
             np.array(["f", "g", "h", "i", "j"]),
diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py
index d55bc533ba8..3efc30af01e 100644
--- a/python/cudf/cudf/tests/test_timedelta.py
+++ b/python/cudf/cudf/tests/test_timedelta.py
@@ -10,6 +10,7 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_GE_120
 from cudf.tests import utils as utils
 from cudf.tests.utils import assert_eq, assert_exceptions_equal
 
@@ -421,7 +422,13 @@ def test_timedelta_dataframe_ops(df, op):
         np.timedelta64(4, "s"),
         np.timedelta64(456, "D"),
         np.timedelta64(46, "h"),
-        np.timedelta64("nat"),
+        pytest.param(
+            np.timedelta64("nat"),
+            marks=pytest.mark.xfail(
+                condition=not PANDAS_GE_120,
+                reason="https://github.com/pandas-dev/pandas/issues/35529",
+            ),
+        ),
         np.timedelta64(1, "s"),
         np.timedelta64(1, "ms"),
         np.timedelta64(1, "us"),
@@ -430,7 +437,20 @@ def test_timedelta_dataframe_ops(df, op):
 )
 @pytest.mark.parametrize("dtype", utils.TIMEDELTA_TYPES)
 @pytest.mark.parametrize(
-    "op", ["add", "sub", "truediv", "mod", "floordiv"],
+    "op",
+    [
+        "add",
+        "sub",
+        "truediv",
+        "mod",
+        pytest.param(
+            "floordiv",
+            marks=pytest.mark.xfail(
+                condition=not PANDAS_GE_120,
+                reason="https://github.com/pandas-dev/pandas/issues/35529",
+            ),
+        ),
+    ],
 )
 def test_timedelta_series_ops_with_scalars(data, other_scalars, dtype, op):
     gsr = cudf.Series(data=data, dtype=dtype)
@@ -504,7 +524,13 @@ def test_timedelta_series_ops_with_scalars(data, other_scalars, dtype, op):
         datetime.timedelta(seconds=768),
         datetime.timedelta(microseconds=7),
         np.timedelta64(4, "s"),
-        np.timedelta64("nat", "s"),
+        pytest.param(
+            np.timedelta64("nat", "s"),
+            marks=pytest.mark.xfail(
+                condition=not PANDAS_GE_120,
+                reason="https://github.com/pandas-dev/pandas/issues/35529",
+            ),
+        ),
         np.timedelta64(1, "s"),
         np.timedelta64(1, "ms"),
         np.timedelta64(1, "us"),
@@ -514,7 +540,20 @@ def test_timedelta_series_ops_with_scalars(data, other_scalars, dtype, op):
 )
 @pytest.mark.parametrize("dtype", utils.TIMEDELTA_TYPES)
 @pytest.mark.parametrize(
-    "op", ["add", "sub", "truediv", "mod", "floordiv"],
+    "op",
+    [
+        "add",
+        "sub",
+        "truediv",
+        "mod",
+        pytest.param(
+            "floordiv",
+            marks=pytest.mark.xfail(
+                condition=not PANDAS_GE_120,
+                reason="https://github.com/pandas-dev/pandas/issues/35529",
+            ),
+        ),
+    ],
 )
 def test_timedelta_series_ops_with_cudf_scalars(data, cpu_scalar, dtype, op):
     gpu_scalar = cudf.Scalar(cpu_scalar)
diff --git a/python/dask_cudf/dask_cudf/tests/test_groupby.py b/python/dask_cudf/dask_cudf/tests/test_groupby.py
index 2bb80b85568..f8ed00beb4f 100644
--- a/python/dask_cudf/dask_cudf/tests/test_groupby.py
+++ b/python/dask_cudf/dask_cudf/tests/test_groupby.py
@@ -10,6 +10,7 @@
 import dask_cudf
 
 import cudf
+from cudf.core._compat import PANDAS_GE_120
 
 
 @pytest.mark.parametrize("aggregation", ["sum", "mean", "count", "min", "max"])
@@ -127,8 +128,18 @@ def test_groupby_std(func):
 @pytest.mark.parametrize(
     "func",
     [
-        pytest.param(lambda df: df.groupby(["a", "b"]).x.sum()),
-        pytest.param(lambda df: df.groupby(["a", "b"]).sum()),
+        pytest.param(
+            lambda df: df.groupby(["a", "b"]).x.sum(),
+            marks=pytest.mark.xfail(
+                condition=not PANDAS_GE_120, reason="pandas bug"
+            ),
+        ),
+        pytest.param(
+            lambda df: df.groupby(["a", "b"]).sum(),
+            marks=pytest.mark.xfail(
+                condition=not PANDAS_GE_120, reason="pandas bug"
+            ),
+        ),
         pytest.param(
             lambda df: df.groupby(["a", "b"]).agg({"x", "sum"}),
             marks=pytest.mark.xfail,

From 8d079f0375c37fd410198edb6eea4636cd097560 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 24 Feb 2021 13:05:00 -0800
Subject: [PATCH 18/35] add check_order flag

---
 python/cudf/cudf/testing/testing.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py
index 2f9a78aab78..c9b519ed1e9 100644
--- a/python/cudf/cudf/testing/testing.py
+++ b/python/cudf/cudf/testing/testing.py
@@ -231,6 +231,7 @@ def assert_index_equal(
     check_less_precise: Union[bool, int] = False,
     check_exact: bool = True,
     check_categorical: bool = True,
+    check_order: bool = True,
     rtol: float = 1e-5,
     atol: float = 1e-8,
     obj: str = "Index",
@@ -260,6 +261,13 @@ def assert_index_equal(
         Whether to compare number exactly.
     check_categorical : bool, default True
         Whether to compare internal Categorical exactly.
+    check_order : bool, default True
+        Whether to compare the order of index entries as
+        well as their values.
+        If True, both indexes must contain the same elements,
+        in the same order.
+        If False, both indexes must contain the same elements,
+        but in any order.
     rtol : float, default 1e-5
         Relative tolerance. Only used when `check_exact` is False.
     atol : float, default 1e-8
@@ -310,6 +318,11 @@ def assert_index_equal(
             obj, "lengths are different", f"{len(left)}", f"{len(right)}"
         )
 
+    # If order doesn't matter then sort the index entries
+    if not check_order:
+        left = left.sort_values()
+        right = right.sort_values()
+
     if isinstance(left, cudf.MultiIndex):
         if left.nlevels != right.nlevels:
             raise AssertionError(
@@ -328,6 +341,7 @@ def assert_index_equal(
                     exact=check_exact,
                     check_names=check_names,
                     check_exact=check_exact,
+                    check_order=check_order,
                     rtol=rtol,
                     atol=atol,
                     obj=mul_obj,

From d8ff5349d51f6c27479b6651fa3f3dddc7b63c5b Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 24 Feb 2021 13:22:22 -0800
Subject: [PATCH 19/35] remove version for cudf apis

---
 python/cudf/cudf/testing/testing.py | 140 ++++++++++------------------
 1 file changed, 48 insertions(+), 92 deletions(-)

diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py
index c9b519ed1e9..ec1af0b7321 100644
--- a/python/cudf/cudf/testing/testing.py
+++ b/python/cudf/cudf/testing/testing.py
@@ -334,28 +334,18 @@ def assert_index_equal(
             llevel = cudf.Index(left._columns[level], name=left.names[level])
             rlevel = cudf.Index(right._columns[level], name=right.names[level])
             mul_obj = f"MultiIndex level [{level}]"
-            if PANDAS_GE_110:
-                assert_index_equal(
-                    llevel,
-                    rlevel,
-                    exact=check_exact,
-                    check_names=check_names,
-                    check_exact=check_exact,
-                    check_order=check_order,
-                    rtol=rtol,
-                    atol=atol,
-                    obj=mul_obj,
-                )
-            else:
-                assert_index_equal(
-                    llevel,
-                    rlevel,
-                    exact=check_exact,
-                    check_names=check_names,
-                    check_less_precise=check_less_precise,
-                    check_exact=check_exact,
-                    obj=mul_obj,
-                )
+            assert_index_equal(
+                llevel,
+                rlevel,
+                exact=check_exact,
+                check_names=check_names,
+                check_exact=check_exact,
+                check_less_precise=check_less_precise,
+                check_order=check_order,
+                rtol=rtol,
+                atol=atol,
+                obj=mul_obj,
+            )
         return
 
     assert_column_equal(
@@ -472,55 +462,32 @@ def assert_series_equal(
         raise_assert_detail(obj, "Series length are different", msg1, msg2)
 
     # index comparison
-    if PANDAS_GE_110:
-        assert_index_equal(
-            left.index,
-            right.index,
-            exact=check_index_type,
-            check_names=check_names,
-            check_exact=check_exact,
-            check_categorical=check_categorical,
-            rtol=rtol,
-            atol=atol,
-            obj=f"{obj}.index",
-        )
-    else:
-        assert_index_equal(
-            left.index,
-            right.index,
-            exact=check_index_type,
-            check_names=check_names,
-            check_less_precise=check_less_precise,
-            check_exact=check_exact,
-            check_categorical=check_categorical,
-            obj=f"{obj}.index",
-        )
+    assert_index_equal(
+        left.index,
+        right.index,
+        exact=check_index_type,
+        check_names=check_names,
+        check_less_precise=check_less_precise,
+        check_exact=check_exact,
+        check_categorical=check_categorical,
+        rtol=rtol,
+        atol=atol,
+        obj=f"{obj}.index",
+    )
 
-    if PANDAS_GE_110:
-        assert_column_equal(
-            left._column,
-            right._column,
-            check_dtype=check_dtype,
-            check_column_type=check_series_type,
-            check_exact=check_exact,
-            check_datetimelike_compat=check_datetimelike_compat,
-            check_categorical=check_categorical,
-            check_category_order=check_category_order,
-            rtol=rtol,
-            atol=atol,
-        )
-    else:
-        assert_column_equal(
-            left._column,
-            right._column,
-            check_dtype=check_dtype,
-            check_column_type=check_series_type,
-            check_less_precise=check_less_precise,
-            check_exact=check_exact,
-            check_datetimelike_compat=check_datetimelike_compat,
-            check_categorical=check_categorical,
-            check_category_order=check_category_order,
-        )
+    assert_column_equal(
+        left._column,
+        right._column,
+        check_dtype=check_dtype,
+        check_column_type=check_series_type,
+        check_less_precise=check_less_precise,
+        check_exact=check_exact,
+        check_datetimelike_compat=check_datetimelike_compat,
+        check_categorical=check_categorical,
+        check_category_order=check_category_order,
+        rtol=rtol,
+        atol=atol,
+    )
 
     # metadata comparison
     if check_names and (left.name != right.name):
@@ -695,25 +662,14 @@ def assert_frame_equal(
         )
 
     for col in left.columns:
-        if PANDAS_GE_110:
-            assert_column_equal(
-                left._data[col],
-                right._data[col],
-                check_dtype=check_dtype,
-                check_exact=check_exact,
-                check_datetimelike_compat=check_datetimelike_compat,
-                check_categorical=check_categorical,
-                rtol=rtol,
-                atol=atol,
-                obj=f'Column name="{col}"',
-            )
-        else:
-            assert_column_equal(
-                left._data[col],
-                right._data[col],
-                check_dtype=check_dtype,
-                check_exact=check_exact,
-                check_datetimelike_compat=check_datetimelike_compat,
-                check_categorical=check_categorical,
-                obj=f'Column name="{col}"',
-            )
+        assert_column_equal(
+            left._data[col],
+            right._data[col],
+            check_dtype=check_dtype,
+            check_exact=check_exact,
+            check_datetimelike_compat=check_datetimelike_compat,
+            check_categorical=check_categorical,
+            rtol=rtol,
+            atol=atol,
+            obj=f'Column name="{col}"',
+        )

From a0637b9727f9a6cf9d8e26dbbd55826152b3bcc4 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 24 Feb 2021 13:35:35 -0800
Subject: [PATCH 20/35] make importing cudf uniform in pytests

---
 python/cudf/cudf/tests/test_categorical.py    | 102 +++++++++---------
 .../dask_cudf/tests/test_reductions.py        |  10 +-
 2 files changed, 55 insertions(+), 57 deletions(-)

diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py
index d4dca164992..4be0475a4a3 100644
--- a/python/cudf/cudf/tests/test_categorical.py
+++ b/python/cudf/cudf/tests/test_categorical.py
@@ -6,10 +6,8 @@
 import pandas as pd
 import pytest
 
-import cudf as gd
-from cudf.core import DataFrame, Series
+import cudf
 from cudf.core._compat import PANDAS_GE_110
-from cudf.core.index import as_index
 from cudf.tests.utils import assert_eq, assert_exceptions_equal
 
 
@@ -22,10 +20,10 @@ def pd_str_cat():
 
 def test_categorical_basic():
     cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
-    cudf_cat = as_index(cat)
+    cudf_cat = cudf.Index(cat)
 
     pdsr = pd.Series(cat, index=["p", "q", "r", "s", "t"])
-    sr = Series(cat, index=["p", "q", "r", "s", "t"])
+    sr = cudf.Series(cat, index=["p", "q", "r", "s", "t"])
     assert_eq(pdsr.cat.codes, sr.cat.codes, check_dtype=False)
 
     # Test attributes
@@ -53,7 +51,7 @@ def test_categorical_integer():
         pytest.xfail(reason="pandas >=1.1 required")
     cat = pd.Categorical(["a", "_", "_", "c", "a"], categories=["a", "b", "c"])
     pdsr = pd.Series(cat)
-    sr = Series(cat)
+    sr = cudf.Series(cat)
     np.testing.assert_array_equal(
         cat.codes, sr.cat.codes.astype(cat.codes.dtype).fillna(-1).to_array()
     )
@@ -81,7 +79,7 @@ def test_categorical_compare_unordered():
     cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
     pdsr = pd.Series(cat)
 
-    sr = Series(cat)
+    sr = cudf.Series(cat)
 
     # test equal
     out = sr == sr
@@ -112,12 +110,12 @@ def test_categorical_compare_ordered():
         ["a", "a", "b", "c", "a"], categories=["a", "b", "c"], ordered=True
     )
     pdsr1 = pd.Series(cat1)
-    sr1 = Series(cat1)
+    sr1 = cudf.Series(cat1)
     cat2 = pd.Categorical(
         ["a", "b", "a", "c", "b"], categories=["a", "b", "c"], ordered=True
     )
     pdsr2 = pd.Series(cat2)
-    sr2 = Series(cat2)
+    sr2 = cudf.Series(cat2)
 
     # test equal
     out = sr1 == sr1
@@ -142,7 +140,7 @@ def test_categorical_compare_ordered():
 def test_categorical_binary_add():
     cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
     pdsr = pd.Series(cat)
-    sr = Series(cat)
+    sr = cudf.Series(cat)
 
     assert_exceptions_equal(
         lfunc=operator.add,
@@ -157,7 +155,7 @@ def test_categorical_binary_add():
 def test_categorical_unary_ceil():
     cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
     pdsr = pd.Series(cat)
-    sr = Series(cat)
+    sr = cudf.Series(cat)
 
     assert_exceptions_equal(
         lfunc=getattr,
@@ -176,7 +174,7 @@ def test_categorical_element_indexing():
     """
     cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
     pdsr = pd.Series(cat)
-    sr = Series(cat)
+    sr = cudf.Series(cat)
     assert_eq(pdsr, sr)
     assert_eq(pdsr.cat.codes, sr.cat.codes, check_dtype=False)
 
@@ -188,7 +186,7 @@ def test_categorical_masking():
     """
     cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
     pdsr = pd.Series(cat)
-    sr = Series(cat)
+    sr = cudf.Series(cat)
 
     # check scalar comparison
     expect_matches = pdsr == "a"
@@ -208,7 +206,7 @@ def test_categorical_masking():
 
 
 def test_df_cat_set_index():
-    df = DataFrame()
+    df = cudf.DataFrame()
     df["a"] = pd.Categorical(list("aababcabbc"), categories=list("abc"))
     df["b"] = np.arange(len(df))
     got = df.set_index("a")
@@ -220,7 +218,7 @@ def test_df_cat_set_index():
 
 
 def test_df_cat_sort_index():
-    df = DataFrame()
+    df = cudf.DataFrame()
     df["a"] = pd.Categorical(list("aababcabbc"), categories=list("abc"))
     df["b"] = np.arange(len(df))
 
@@ -231,7 +229,7 @@ def test_df_cat_sort_index():
 
 
 def test_cat_series_binop_error():
-    df = DataFrame()
+    df = cudf.DataFrame()
     df["a"] = pd.Categorical(list("aababcabbc"), categories=list("abc"))
     df["b"] = np.arange(len(df))
 
@@ -273,8 +271,8 @@ def test_categorical_unique(num_elements):
     )
 
     # gdf
-    gdf = DataFrame()
-    gdf["a"] = Series.from_categorical(pd_cat)
+    gdf = cudf.DataFrame()
+    gdf["a"] = cudf.Series.from_categorical(pd_cat)
     gdf_unique_sorted = np.sort(gdf["a"].unique().to_pandas())
 
     # pandas
@@ -300,8 +298,8 @@ def test_categorical_unique_count(nelem):
     )
 
     # gdf
-    gdf = DataFrame()
-    gdf["a"] = Series.from_categorical(pd_cat)
+    gdf = cudf.DataFrame()
+    gdf["a"] = cudf.Series.from_categorical(pd_cat)
     gdf_unique_count = gdf["a"].nunique()
 
     # pandas
@@ -316,7 +314,7 @@ def test_categorical_unique_count(nelem):
 def test_categorical_empty():
     cat = pd.Categorical([])
     pdsr = pd.Series(cat)
-    sr = Series(cat)
+    sr = cudf.Series(cat)
     np.testing.assert_array_equal(cat.codes, sr.cat.codes.to_array())
 
     # Test attributes
@@ -331,7 +329,7 @@ def test_categorical_empty():
 def test_categorical_set_categories():
     cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
     psr = pd.Series(cat)
-    sr = Series.from_categorical(cat)
+    sr = cudf.Series.from_categorical(cat)
 
     # adding category
     expect = psr.cat.set_categories(["a", "b", "c", "d"])
@@ -349,7 +347,7 @@ def test_categorical_set_categories_preserves_order():
     # reassigning categories should preserve element ordering
     assert_eq(
         series.cat.set_categories([1, 2]),
-        Series(series).cat.set_categories([1, 2]),
+        cudf.Series(series).cat.set_categories([1, 2]),
     )
 
 
@@ -357,7 +355,7 @@ def test_categorical_set_categories_preserves_order():
 def test_categorical_as_ordered(pd_str_cat, inplace):
 
     pd_sr = pd.Series(pd_str_cat.copy().set_ordered(False))
-    cd_sr = gd.Series(pd_str_cat.copy().set_ordered(False))
+    cd_sr = cudf.Series(pd_str_cat.copy().set_ordered(False))
 
     assert cd_sr.cat.ordered is False
     assert cd_sr.cat.ordered == pd_sr.cat.ordered
@@ -376,7 +374,7 @@ def test_categorical_as_ordered(pd_str_cat, inplace):
 def test_categorical_as_unordered(pd_str_cat, inplace):
 
     pd_sr = pd.Series(pd_str_cat.copy().set_ordered(True))
-    cd_sr = gd.Series(pd_str_cat.copy().set_ordered(True))
+    cd_sr = cudf.Series(pd_str_cat.copy().set_ordered(True))
 
     assert cd_sr.cat.ordered is True
     assert cd_sr.cat.ordered == pd_sr.cat.ordered
@@ -399,7 +397,7 @@ def test_categorical_reorder_categories(
 ):
 
     pd_sr = pd.Series(pd_str_cat.copy().set_ordered(from_ordered))
-    cd_sr = gd.Series(pd_str_cat.copy().set_ordered(from_ordered))
+    cd_sr = cudf.Series(pd_str_cat.copy().set_ordered(from_ordered))
 
     assert_eq(pd_sr, cd_sr)
 
@@ -421,7 +419,7 @@ def test_categorical_reorder_categories(
 def test_categorical_add_categories(pd_str_cat, inplace):
 
     pd_sr = pd.Series(pd_str_cat.copy())
-    cd_sr = gd.Series(pd_str_cat.copy())
+    cd_sr = cudf.Series(pd_str_cat.copy())
 
     assert_eq(pd_sr, cd_sr)
 
@@ -442,7 +440,7 @@ def test_categorical_add_categories(pd_str_cat, inplace):
 def test_categorical_remove_categories(pd_str_cat, inplace):
 
     pd_sr = pd.Series(pd_str_cat.copy())
-    cd_sr = gd.Series(pd_str_cat.copy())
+    cd_sr = cudf.Series(pd_str_cat.copy())
 
     assert_eq(pd_sr, cd_sr)
 
@@ -470,7 +468,7 @@ def test_categorical_remove_categories(pd_str_cat, inplace):
 
 def test_categorical_dataframe_slice_copy():
     pdf = pd.DataFrame({"g": pd.Series(["a", "b", "z"], dtype="category")})
-    gdf = DataFrame.from_pandas(pdf)
+    gdf = cudf.from_pandas(pdf)
 
     exp = pdf[1:].copy()
     gdf = gdf[1:].copy()
@@ -511,7 +509,7 @@ def test_categorical_dataframe_slice_copy():
 )
 def test_categorical_typecast(data, cat_type):
     pd_data = data.copy()
-    gd_data = gd.from_pandas(data)
+    gd_data = cudf.from_pandas(data)
 
     assert_eq(pd_data.astype(cat_type), gd_data.astype(cat_type))
 
@@ -545,7 +543,7 @@ def test_categorical_typecast(data, cat_type):
 )
 def test_categorical_set_categories_categoricals(data, new_categories):
     pd_data = data.copy().astype("category")
-    gd_data = gd.from_pandas(pd_data)
+    gd_data = cudf.from_pandas(pd_data)
 
     assert_eq(
         pd_data.cat.set_categories(new_categories=new_categories),
@@ -557,7 +555,7 @@ def test_categorical_set_categories_categoricals(data, new_categories):
             new_categories=pd.Series(new_categories, dtype="category")
         ),
         gd_data.cat.set_categories(
-            new_categories=gd.Series(new_categories, dtype="category")
+            new_categories=cudf.Series(new_categories, dtype="category")
         ),
     )
 
@@ -590,14 +588,14 @@ def test_categorical_set_categories_categoricals(data, new_categories):
 )
 def test_categorical_creation(data, dtype):
     expected = pd.Series(data, dtype=dtype)
-    got = gd.Series(data, dtype=dtype)
+    got = cudf.Series(data, dtype=dtype)
     assert_eq(expected, got)
 
-    got = gd.Series(data, dtype=gd.from_pandas(dtype))
+    got = cudf.Series(data, dtype=cudf.from_pandas(dtype))
     assert_eq(expected, got)
 
     expected = pd.Series(data, dtype="category")
-    got = gd.Series(data, dtype="category")
+    got = cudf.Series(data, dtype="category")
     assert_eq(expected, got)
 
 
@@ -613,33 +611,33 @@ def test_categorical_creation(data, dtype):
 @pytest.mark.parametrize("ordered", [True, False])
 def test_categorical_dtype(categories, ordered):
     expected = pd.CategoricalDtype(categories=categories, ordered=ordered)
-    got = gd.CategoricalDtype(categories=categories, ordered=ordered)
+    got = cudf.CategoricalDtype(categories=categories, ordered=ordered)
     assert_eq(expected, got)
 
 
 @pytest.mark.parametrize(
     ("data", "expected"),
     [
-        (gd.Series([1]), np.uint8),
-        (gd.Series([1, None]), np.uint8),
-        (gd.Series(np.arange(np.iinfo(np.int8).max)), np.uint8),
+        (cudf.Series([1]), np.uint8),
+        (cudf.Series([1, None]), np.uint8),
+        (cudf.Series(np.arange(np.iinfo(np.int8).max)), np.uint8),
         (
-            gd.Series(np.append(np.arange(np.iinfo(np.int8).max), [None])),
+            cudf.Series(np.append(np.arange(np.iinfo(np.int8).max), [None])),
             np.uint8,
         ),
-        (gd.Series(np.arange(np.iinfo(np.int16).max)), np.uint16),
+        (cudf.Series(np.arange(np.iinfo(np.int16).max)), np.uint16),
         (
-            gd.Series(np.append(np.arange(np.iinfo(np.int16).max), [None])),
+            cudf.Series(np.append(np.arange(np.iinfo(np.int16).max), [None])),
             np.uint16,
         ),
-        (gd.Series(np.arange(np.iinfo(np.uint8).max)), np.uint8),
+        (cudf.Series(np.arange(np.iinfo(np.uint8).max)), np.uint8),
         (
-            gd.Series(np.append(np.arange(np.iinfo(np.uint8).max), [None])),
+            cudf.Series(np.append(np.arange(np.iinfo(np.uint8).max), [None])),
             np.uint8,
         ),
-        (gd.Series(np.arange(np.iinfo(np.uint16).max)), np.uint16),
+        (cudf.Series(np.arange(np.iinfo(np.uint16).max)), np.uint16),
         (
-            gd.Series(np.append(np.arange(np.iinfo(np.uint16).max), [None])),
+            cudf.Series(np.append(np.arange(np.iinfo(np.uint16).max), [None])),
             np.uint16,
         ),
     ],
@@ -664,7 +662,7 @@ def test_astype_dtype(data, expected):
 )
 def test_add_categories(data, add):
     pds = pd.Series(data, dtype="category")
-    gds = gd.Series(data, dtype="category")
+    gds = cudf.Series(data, dtype="category")
 
     expected = pds.cat.add_categories(add)
     actual = gds.cat.add_categories(add)
@@ -692,7 +690,7 @@ def test_add_categories(data, add):
 )
 def test_add_categories_error(data, add):
     pds = pd.Series(data, dtype="category")
-    gds = gd.Series(data, dtype="category")
+    gds = cudf.Series(data, dtype="category")
 
     assert_exceptions_equal(
         pds.cat.add_categories,
@@ -704,12 +702,12 @@ def test_add_categories_error(data, add):
 
 
 def test_add_categories_mixed_error():
-    gds = gd.Series(["a", "bd", "ef"], dtype="category")
+    gds = cudf.Series(["a", "bd", "ef"], dtype="category")
 
     with pytest.raises(TypeError):
         gds.cat.add_categories([1, 2, 3])
 
-    gds = gd.Series([1, 2, 3], dtype="category")
+    gds = cudf.Series([1, 2, 3], dtype="category")
 
     with pytest.raises(TypeError):
         gds.cat.add_categories(["a", "bd", "ef"])
@@ -743,7 +741,7 @@ def test_add_categories_mixed_error():
 def test_categorical_assignment(data, cat_dtype):
     pd_df = pd.DataFrame()
     pd_df["a"] = np.ones(len(data))
-    cd_df = gd.from_pandas(pd_df)
+    cd_df = cudf.from_pandas(pd_df)
 
     pd_cat_series = pd.Series(data, dtype=cat_dtype)
     # assign categorical series
@@ -757,7 +755,7 @@ def test_categorical_assignment(data, cat_dtype):
     # see issue: https://github.com/rapidsai/cudf/issues/2269
     pd_df = pd.DataFrame()
     pd_df["a"] = np.ones(len(data))
-    cd_df = gd.from_pandas(pd_df)
+    cd_df = cudf.from_pandas(pd_df)
 
     pd_categorical = pd.Categorical(data, dtype=cat_dtype)
     pd_df.assign(cat_col=pd_categorical)
diff --git a/python/dask_cudf/dask_cudf/tests/test_reductions.py b/python/dask_cudf/dask_cudf/tests/test_reductions.py
index 4da81e4f86c..030b7717fbc 100644
--- a/python/dask_cudf/dask_cudf/tests/test_reductions.py
+++ b/python/dask_cudf/dask_cudf/tests/test_reductions.py
@@ -8,7 +8,7 @@
 
 import dask_cudf as dgd
 
-import cudf as gd
+import cudf
 
 
 def _make_random_frame(nelem, npartitions=2):
@@ -18,7 +18,7 @@ def _make_random_frame(nelem, npartitions=2):
             "y": np.random.normal(size=nelem) + 1,
         }
     )
-    gdf = gd.DataFrame.from_pandas(df)
+    gdf = cudf.DataFrame.from_pandas(df)
     dgf = dgd.from_cudf(gdf, npartitions=npartitions)
     return df, dgf
 
@@ -49,15 +49,15 @@ def test_series_reduce(reducer):
 @pytest.mark.parametrize(
     "data",
     [
-        gd.datasets.randomdata(
+        cudf.datasets.randomdata(
             nrows=10000,
             dtypes={"a": "category", "b": int, "c": float, "d": int},
         ),
-        gd.datasets.randomdata(
+        cudf.datasets.randomdata(
             nrows=10000,
             dtypes={"a": "category", "b": int, "c": float, "d": str},
         ),
-        gd.datasets.randomdata(
+        cudf.datasets.randomdata(
             nrows=10000, dtypes={"a": bool, "b": int, "c": float, "d": str}
         ),
     ],

From b63ae03d30a7403ba43113ad3e89016ebe373371 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 24 Feb 2021 14:51:46 -0800
Subject: [PATCH 21/35] refactor imports to be uniform and less confusing

---
 python/cudf/cudf/tests/test_categorical.py |  13 +-
 python/cudf/cudf/tests/test_duplicates.py  |   6 +-
 python/cudf/cudf/tests/test_groupby.py     |  10 +-
 python/cudf/cudf/tests/test_index.py       |  17 +-
 python/cudf/cudf/tests/test_indexing.py    |  71 +++---
 python/cudf/cudf/tests/test_joining.py     | 230 +++++++++---------
 python/cudf/cudf/tests/test_numerical.py   |  49 ++--
 python/cudf/cudf/tests/test_reshape.py     |  11 +-
 python/cudf/cudf/tests/test_sorting.py     |  13 +-
 python/cudf/cudf/tests/test_stats.py       | 114 +++++----
 python/cudf/cudf/tests/test_string.py      | 258 ++++++++++-----------
 11 files changed, 393 insertions(+), 399 deletions(-)

diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py
index 4be0475a4a3..9779fb786f6 100644
--- a/python/cudf/cudf/tests/test_categorical.py
+++ b/python/cudf/cudf/tests/test_categorical.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
 import operator
+import string
 
 import numpy as np
 import pandas as pd
@@ -259,13 +260,13 @@ def test_cat_series_binop_error():
 
 @pytest.mark.parametrize("num_elements", [10, 100, 1000])
 def test_categorical_unique(num_elements):
-    from string import ascii_letters, digits
-
     # create categorical series
     np.random.seed(12)
     pd_cat = pd.Categorical(
         pd.Series(
-            np.random.choice(list(ascii_letters + digits), num_elements),
+            np.random.choice(
+                list(string.ascii_letters + string.digits), num_elements
+            ),
             dtype="category",
         )
     )
@@ -286,13 +287,13 @@ def test_categorical_unique(num_elements):
 
 @pytest.mark.parametrize("nelem", [20, 50, 100])
 def test_categorical_unique_count(nelem):
-    from string import ascii_letters, digits
-
     # create categorical series
     np.random.seed(12)
     pd_cat = pd.Categorical(
         pd.Series(
-            np.random.choice(list(ascii_letters + digits), nelem),
+            np.random.choice(
+                list(string.ascii_letters + string.digits), nelem
+            ),
             dtype="category",
         )
     )
diff --git a/python/cudf/cudf/tests/test_duplicates.py b/python/cudf/cudf/tests/test_duplicates.py
index eb8fb1db46f..d429f658451 100644
--- a/python/cudf/cudf/tests/test_duplicates.py
+++ b/python/cudf/cudf/tests/test_duplicates.py
@@ -1,5 +1,8 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
+import itertools as it
+import random
+
 import numpy as np
 import pytest
 from pandas import DataFrame, MultiIndex, Series, date_range
@@ -277,9 +280,6 @@ def test_drop_duplicates_empty(df):
 
 @pytest.mark.parametrize("num_columns", [3, 4, 5])
 def test_dataframe_drop_duplicates_numeric_method(num_columns):
-    import itertools as it
-    import random
-
     comb = list(it.permutations(range(num_columns), num_columns))
     shuf = list(comb)
     random.Random(num_columns).shuffle(shuf)
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index 3542a5af537..8011510d340 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -1,10 +1,12 @@
 # Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
+import datetime
 import itertools
 
 import numpy as np
 import pandas as pd
 import pytest
+from numba import cuda
 from numpy.testing import assert_array_equal
 
 import cudf
@@ -284,8 +286,6 @@ def foo(df):
 
 
 def test_groupby_apply_grouped():
-    from numba import cuda
-
     np.random.seed(0)
     df = DataFrame()
     nelem = 20
@@ -732,12 +732,12 @@ def test_groupby_multi_agg_multi_groupby():
 
 
 def test_groupby_datetime_multi_agg_multi_groupby():
-    from datetime import datetime, timedelta
-
     pdf = pd.DataFrame(
         {
             "a": pd.date_range(
-                datetime.now(), datetime.now() + timedelta(9), freq="D"
+                datetime.datetime.now(),
+                datetime.datetime.now() + datetime.timedelta(9),
+                freq="D",
             ),
             "b": np.random.randint(0, 5, 10),
             "c": np.random.randint(0, 5, 10),
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index 127d198d61e..af25b48dd23 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -11,7 +11,6 @@
 import pytest
 
 import cudf
-from cudf.core import DataFrame
 from cudf.core._compat import PANDAS_GE_110
 from cudf.core.index import (
     CategoricalIndex,
@@ -35,7 +34,7 @@
 
 
 def test_df_set_index_from_series():
-    df = DataFrame()
+    df = cudf.DataFrame()
     df["a"] = list(range(10))
     df["b"] = list(range(0, 20, 2))
 
@@ -49,7 +48,7 @@ def test_df_set_index_from_series():
 
 
 def test_df_set_index_from_name():
-    df = DataFrame()
+    df = cudf.DataFrame()
     df["a"] = list(range(10))
     df["b"] = list(range(0, 20, 2))
 
@@ -65,7 +64,7 @@ def test_df_set_index_from_name():
 
 
 def test_df_slice_empty_index():
-    df = DataFrame()
+    df = cudf.DataFrame()
     assert isinstance(df.index, RangeIndex)
     assert isinstance(df.index[:1], RangeIndex)
     with pytest.raises(IndexError):
@@ -153,10 +152,10 @@ def test_categorical_index():
     pdf = pd.DataFrame()
     pdf["a"] = [1, 2, 3]
     pdf["index"] = pd.Categorical(["a", "b", "c"])
-    initial_df = DataFrame.from_pandas(pdf)
+    initial_df = cudf.from_pandas(pdf)
     pdf = pdf.set_index("index")
-    gdf1 = DataFrame.from_pandas(pdf)
-    gdf2 = DataFrame()
+    gdf1 = cudf.from_pandas(pdf)
+    gdf2 = cudf.DataFrame()
     gdf2["a"] = [1, 2, 3]
     gdf2["index"] = pd.Categorical(["a", "b", "c"])
     assert_eq(initial_df.index, gdf2.index)
@@ -273,7 +272,7 @@ def test_index_rename_preserves_arg():
 
 
 def test_set_index_as_property():
-    cdf = DataFrame()
+    cdf = cudf.DataFrame()
     col1 = np.arange(10)
     col2 = np.arange(0, 20, 2)
     cdf["a"] = col1
@@ -1419,7 +1418,7 @@ def test_multiindex_sample_basic(n, frac, replace, axis):
             "int": [1, 3, 5, 4, 2],
         },
     )
-    mul_index = cudf.Index(DataFrame.from_pandas(pdf))
+    mul_index = cudf.Index(cudf.from_pandas(pdf))
     random_state = 0
 
     try:
diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py
index 6921ac3fa35..73a074c0376 100644
--- a/python/cudf/cudf/tests/test_indexing.py
+++ b/python/cudf/cudf/tests/test_indexing.py
@@ -8,7 +8,6 @@
 import pytest
 
 import cudf
-from cudf import DataFrame, Series
 from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_120
 from cudf.tests import utils
 from cudf.tests.utils import INTEGER_TYPES, assert_eq, assert_exceptions_equal
@@ -61,7 +60,11 @@ def pdf_gdf_multi():
                 pd.Series(range(3, 12)),
                 pd.Series(range(0, 9, 2)),
             ),
-            (Series(range(12)), Series(range(3, 12)), Series(range(0, 9, 2))),
+            (
+                cudf.Series(range(12)),
+                cudf.Series(range(3, 12)),
+                cudf.Series(range(0, 9, 2)),
+            ),
             (
                 [i in range(12) for i in range(20)],
                 [i in range(3, 12) for i in range(12)],
@@ -98,7 +101,7 @@ def pdf_gdf_multi():
 )
 def test_series_indexing(i1, i2, i3):
     a1 = np.arange(20)
-    series = Series(a1)
+    series = cudf.Series(a1)
     # Indexing
     sr1 = series.iloc[i1]
     assert sr1.null_count == 0
@@ -125,7 +128,7 @@ def test_series_indexing_large_size():
     gsr = cudf.Series(cupy.ones(n_elem))
     gsr[0] = None
     got = gsr[gsr.isna()]
-    expect = Series([None], dtype="float64")
+    expect = cudf.Series([None], dtype="float64")
 
     assert_eq(expect, got)
 
@@ -135,7 +138,7 @@ def test_series_indexing_large_size():
     "arg", ["b", ["a", "c"], slice(1, 2, 1), [True, False, True]]
 )
 def test_series_get_item(psr, arg):
-    gsr = Series.from_pandas(psr)
+    gsr = cudf.from_pandas(psr)
 
     expect = psr[arg]
     got = gsr[arg]
@@ -144,7 +147,7 @@ def test_series_get_item(psr, arg):
 
 
 def test_dataframe_column_name_indexing():
-    df = DataFrame()
+    df = cudf.DataFrame()
     data = np.asarray(range(10), dtype=np.int32)
     df["a"] = data
     df[1] = data
@@ -161,7 +164,7 @@ def test_dataframe_column_name_indexing():
     pdf["key2"] = np.random.randint(0, 3, nelem)
     pdf[1] = np.arange(1, 1 + nelem)
     pdf[2] = np.random.random(nelem)
-    df = DataFrame.from_pandas(pdf)
+    df = cudf.from_pandas(pdf)
 
     assert_eq(df[df.columns], df)
     assert_eq(df[df.columns[:1]], df[["key1"]])
@@ -174,7 +177,7 @@ def test_dataframe_column_name_indexing():
     df = pd.DataFrame()
     for i in range(0, 10):
         df[i] = range(nelem)
-    gdf = DataFrame.from_pandas(df)
+    gdf = cudf.DataFrame.from_pandas(df)
     assert_eq(gdf, df)
 
     assert_eq(gdf[gdf.columns], gdf)
@@ -182,7 +185,7 @@ def test_dataframe_column_name_indexing():
 
 
 def test_dataframe_slicing():
-    df = DataFrame()
+    df = cudf.DataFrame()
     size = 123
     df["a"] = ha = np.random.randint(low=0, high=100, size=size).astype(
         np.int32
@@ -239,7 +242,7 @@ def test_dataframe_loc(scalar, step):
         }
     )
 
-    df = DataFrame.from_pandas(pdf)
+    df = cudf.DataFrame.from_pandas(pdf)
 
     assert_eq(df.loc[:, ["a"]], pdf.loc[:, ["a"]])
 
@@ -311,7 +314,7 @@ def test_dataframe_loc(scalar, step):
 
 def test_dataframe_loc_duplicate_index_scalar():
     pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5]}, index=[1, 2, 1, 4, 2])
-    gdf = DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
     assert_eq(pdf.loc[2], gdf.loc[2])
 
@@ -325,13 +328,13 @@ def test_dataframe_loc_mask(mask, arg):
     pdf = pd.DataFrame(
         {"a": ["a", "b", "c", "d", "e"], "b": ["f", "g", "h", "i", "j"]}
     )
-    gdf = DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
     assert_eq(pdf.loc[mask, arg], gdf.loc[mask, arg])
 
 
 def test_dataframe_loc_outbound():
-    df = DataFrame()
+    df = cudf.DataFrame()
     size = 10
     df["a"] = ha = np.random.randint(low=0, high=100, size=size).astype(
         np.int32
@@ -347,7 +350,7 @@ def test_dataframe_loc_outbound():
 
 def test_series_loc_numerical():
     ps = pd.Series([1, 2, 3, 4, 5], index=[5, 6, 7, 8, 9])
-    gs = Series.from_pandas(ps)
+    gs = cudf.Series.from_pandas(ps)
 
     assert_eq(ps.loc[5], gs.loc[5])
     assert_eq(ps.loc[6], gs.loc[6])
@@ -365,7 +368,7 @@ def test_series_loc_numerical():
 
 def test_series_loc_float_index():
     ps = pd.Series([1, 2, 3, 4, 5], index=[5.43, 6.34, 7.34, 8.0, 9.1])
-    gs = Series.from_pandas(ps)
+    gs = cudf.Series.from_pandas(ps)
 
     assert_eq(ps.loc[5.43], gs.loc[5.43])
     assert_eq(ps.loc[8], gs.loc[8])
@@ -383,7 +386,7 @@ def test_series_loc_string():
     ps = pd.Series(
         [1, 2, 3, 4, 5], index=["one", "two", "three", "four", "five"]
     )
-    gs = Series.from_pandas(ps)
+    gs = cudf.Series.from_pandas(ps)
 
     assert_eq(ps.loc["one"], gs.loc["one"])
     assert_eq(ps.loc["five"], gs.loc["five"])
@@ -406,7 +409,7 @@ def test_series_loc_datetime():
     ps = pd.Series(
         [1, 2, 3, 4, 5], index=pd.date_range("20010101", "20010105")
     )
-    gs = Series.from_pandas(ps)
+    gs = cudf.Series.from_pandas(ps)
 
     # a few different ways of specifying a datetime label:
     assert_eq(ps.loc["20010101"], gs.loc["20010101"])
@@ -467,7 +470,7 @@ def test_series_loc_categorical():
     ps = pd.Series(
         [1, 2, 3, 4, 5], index=pd.Categorical(["a", "b", "c", "d", "e"])
     )
-    gs = Series.from_pandas(ps)
+    gs = cudf.Series.from_pandas(ps)
 
     assert_eq(ps.loc["a"], gs.loc["a"])
     assert_eq(ps.loc["e"], gs.loc["e"])
@@ -531,12 +534,12 @@ def test_dataframe_series_loc_multiindex(obj):
 @pytest.mark.parametrize("nelem", [2, 5, 20, 100])
 def test_series_iloc(nelem):
 
-    # create random series
+    # create random cudf.Series
     np.random.seed(12)
     ps = pd.Series(np.random.sample(nelem))
 
-    # gpu series
-    gs = Series(ps)
+    # gpu cudf.Series
+    gs = cudf.Series(ps)
 
     # positive tests for indexing
     np.testing.assert_allclose(gs.iloc[-1 * nelem], ps.iloc[-1 * nelem])
@@ -567,7 +570,7 @@ def test_series_iloc(nelem):
 
 @pytest.mark.parametrize("nelem", [2, 5, 20, 100])
 def test_dataframe_iloc(nelem):
-    gdf = DataFrame()
+    gdf = cudf.DataFrame()
 
     gdf["a"] = ha = np.random.randint(low=0, high=100, size=nelem).astype(
         np.int32
@@ -619,7 +622,7 @@ def test_dataframe_iloc(nelem):
 
 @pytest.mark.xfail(raises=AssertionError, reason="Series.index are different")
 def test_dataframe_iloc_tuple():
-    gdf = DataFrame()
+    gdf = cudf.DataFrame()
     nelem = 123
     gdf["a"] = ha = np.random.randint(low=0, high=100, size=nelem).astype(
         np.int32
@@ -641,7 +644,7 @@ def test_dataframe_iloc_tuple():
     raises=IndexError, reason="positional indexers are out-of-bounds"
 )
 def test_dataframe_iloc_index_error():
-    gdf = DataFrame()
+    gdf = cudf.DataFrame()
     nelem = 123
     gdf["a"] = ha = np.random.randint(low=0, high=100, size=nelem).astype(
         np.int32
@@ -662,7 +665,7 @@ def assert_col(g, p):
 @pytest.mark.parametrize("ntake", [0, 1, 10, 123, 122, 200])
 def test_dataframe_take(ntake):
     np.random.seed(0)
-    df = DataFrame()
+    df = cudf.DataFrame()
 
     nelem = 123
     df["ii"] = np.random.randint(0, 20, nelem)
@@ -681,7 +684,7 @@ def test_dataframe_take(ntake):
 @pytest.mark.parametrize("ntake", [1, 2, 8, 9])
 def test_dataframe_take_with_multiIndex(ntake):
     np.random.seed(0)
-    df = DataFrame(
+    df = cudf.DataFrame(
         index=cudf.MultiIndex(
             levels=[["lama", "cow", "falcon"], ["speed", "weight", "length"]],
             codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
@@ -707,7 +710,7 @@ def test_series_take(ntake, keep_index):
     nelem = 123
 
     data = np.random.randint(0, 20, nelem)
-    sr = Series(data)
+    sr = cudf.Series(data)
 
     take_indices = np.random.randint(0, len(sr), ntake)
 
@@ -725,7 +728,7 @@ def test_series_take(ntake, keep_index):
 def test_series_take_positional():
     psr = pd.Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"])
 
-    gsr = Series.from_pandas(psr)
+    gsr = cudf.Series.from_pandas(psr)
 
     take_indices = [1, 2, 0, 3]
 
@@ -739,7 +742,7 @@ def test_series_take_positional():
 @pytest.mark.parametrize("slice_start", [None, 0, 1, 3, 10, -10])
 @pytest.mark.parametrize("slice_end", [None, 0, 1, 30, 50, -1])
 def test_dataframe_masked_slicing(nelem, slice_start, slice_end):
-    gdf = DataFrame()
+    gdf = cudf.DataFrame()
     gdf["a"] = list(range(nelem))
     gdf["b"] = list(range(nelem, 2 * nelem))
     gdf["a"] = gdf["a"].set_mask(utils.random_bitmask(nelem))
@@ -756,13 +759,13 @@ def do_slice(x):
 
 def test_dataframe_boolean_mask_with_None():
     pdf = pd.DataFrame({"a": [0, 1, 2, 3], "b": [0.1, 0.2, None, 0.3]})
-    gdf = DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
     pdf_masked = pdf[[True, False, True, False]]
     gdf_masked = gdf[[True, False, True, False]]
     assert_eq(pdf_masked, gdf_masked)
 
     with pytest.raises(ValueError):
-        gdf[Series([True, False, None, False])]
+        gdf[cudf.Series([True, False, None, False])]
 
 
 @pytest.mark.parametrize("dtype", [int, float, str])
@@ -842,12 +845,12 @@ def test_dataframe_apply_boolean_mask():
             "c": ["a", None, "b", "c"],
         }
     )
-    gdf = DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
     assert_eq(pdf[[True, False, True, False]], gdf[[True, False, True, False]])
 
 
 """
-This test compares cudf and Pandas dataframe boolean indexing.
+This test compares cudf and Pandas DataFrame boolean indexing.
 """
 
 
@@ -1161,7 +1164,7 @@ def test_sliced_indexing():
     a = list(range(4, 4 + 150))
     b = list(range(0, 0 + 150))
     pdf = pd.DataFrame({"a": a, "b": b})
-    gdf = DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
     pdf = pdf.set_index("a")
     gdf = gdf.set_index("a")
     pidx = pdf.index[:75]
diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py
index f8af320eb84..8692057aa58 100644
--- a/python/cudf/cudf/tests/test_joining.py
+++ b/python/cudf/cudf/tests/test_joining.py
@@ -5,7 +5,6 @@
 import pytest
 
 import cudf
-from cudf.core import DataFrame, Series
 from cudf.core._compat import PANDAS_GE_120
 from cudf.core.dtypes import CategoricalDtype
 from cudf.tests.utils import (
@@ -72,7 +71,7 @@ def pd_odd_joins(left, right, join_type):
 
 @pytest.mark.parametrize("aa,bb,how,method", make_params())
 def test_dataframe_join_how(aa, bb, how, method):
-    df = DataFrame()
+    df = cudf.DataFrame()
     df["a"] = aa
     df["b"] = bb
 
@@ -133,8 +132,7 @@ def work_gdf(df):
 
 def _check_series(expect, got):
     magic = 0xDEADBEAF
-    # print("expect\n", expect)
-    # print("got\n", got.to_string(nrows=None))
+
     direct_equal = np.all(expect.values == got.to_array())
     nanfilled_equal = np.all(
         expect.fillna(magic).values == got.fillna(magic).to_array()
@@ -148,7 +146,7 @@ def _check_series(expect, got):
 def test_dataframe_join_suffix():
     np.random.seed(0)
 
-    df = DataFrame()
+    df = cudf.DataFrame()
     for k in "abc":
         df[k] = np.random.randint(0, 5, 5)
 
@@ -175,12 +173,12 @@ def test_dataframe_join_suffix():
 
 
 def test_dataframe_join_cats():
-    lhs = DataFrame()
+    lhs = cudf.DataFrame()
     lhs["a"] = pd.Categorical(list("aababcabbc"), categories=list("abc"))
     lhs["b"] = bb = np.arange(len(lhs))
     lhs = lhs.set_index("a")
 
-    rhs = DataFrame()
+    rhs = cudf.DataFrame()
     rhs["a"] = pd.Categorical(list("abcac"), categories=list("abc"))
     rhs["c"] = cc = np.arange(len(rhs))
     rhs = rhs.set_index("a")
@@ -243,8 +241,8 @@ def test_dataframe_join_mismatch_cats(how):
     pdf1["join_col"] = pdf1["join_col"].astype("category")
     pdf2["join_col"] = pdf2["join_col"].astype("category")
 
-    gdf1 = DataFrame.from_pandas(pdf1)
-    gdf2 = DataFrame.from_pandas(pdf2)
+    gdf1 = cudf.from_pandas(pdf1)
+    gdf2 = cudf.from_pandas(pdf2)
 
     gdf1 = gdf1.set_index("join_col")
     gdf2 = gdf2.set_index("join_col")
@@ -274,13 +272,13 @@ def test_dataframe_merge_on(on):
     np.random.seed(0)
 
     # Make cuDF
-    df_left = DataFrame()
+    df_left = cudf.DataFrame()
     nelem = 500
     df_left["key1"] = np.random.randint(0, 40, nelem)
     df_left["key2"] = np.random.randint(0, 50, nelem)
     df_left["left_val"] = np.arange(nelem)
 
-    df_right = DataFrame()
+    df_right = cudf.DataFrame()
     nelem = 500
     df_right["key1"] = np.random.randint(0, 30, nelem)
     df_right["key2"] = np.random.randint(0, 50, nelem)
@@ -340,13 +338,13 @@ def test_dataframe_merge_on_unknown_column():
     np.random.seed(0)
 
     # Make cuDF
-    df_left = DataFrame()
+    df_left = cudf.DataFrame()
     nelem = 500
     df_left["key1"] = np.random.randint(0, 40, nelem)
     df_left["key2"] = np.random.randint(0, 50, nelem)
     df_left["left_val"] = np.arange(nelem)
 
-    df_right = DataFrame()
+    df_right = cudf.DataFrame()
     nelem = 500
     df_right["key1"] = np.random.randint(0, 30, nelem)
     df_right["key2"] = np.random.randint(0, 50, nelem)
@@ -361,13 +359,13 @@ def test_dataframe_merge_no_common_column():
     np.random.seed(0)
 
     # Make cuDF
-    df_left = DataFrame()
+    df_left = cudf.DataFrame()
     nelem = 500
     df_left["key1"] = np.random.randint(0, 40, nelem)
     df_left["key2"] = np.random.randint(0, 50, nelem)
     df_left["left_val"] = np.arange(nelem)
 
-    df_right = DataFrame()
+    df_right = cudf.DataFrame()
     nelem = 500
     df_right["key3"] = np.random.randint(0, 30, nelem)
     df_right["key4"] = np.random.randint(0, 50, nelem)
@@ -379,18 +377,18 @@ def test_dataframe_merge_no_common_column():
 
 
 def test_dataframe_empty_merge():
-    gdf1 = DataFrame({"a": [], "b": []})
-    gdf2 = DataFrame({"a": [], "c": []})
+    gdf1 = cudf.DataFrame({"a": [], "b": []})
+    gdf2 = cudf.DataFrame({"a": [], "c": []})
 
-    expect = DataFrame({"a": [], "b": [], "c": []})
+    expect = cudf.DataFrame({"a": [], "b": [], "c": []})
     got = gdf1.merge(gdf2, how="left", on=["a"])
 
     assert_eq(expect, got)
 
 
 def test_dataframe_merge_order():
-    gdf1 = DataFrame()
-    gdf2 = DataFrame()
+    gdf1 = cudf.DataFrame()
+    gdf2 = cudf.DataFrame()
     gdf1["id"] = [10, 11]
     gdf1["timestamp"] = [1, 2]
     gdf1["a"] = [3, 4]
@@ -458,8 +456,8 @@ def test_dataframe_pairs_of_triples(pairs, max, rows, how):
         pdf_left[left_column] = np.random.randint(0, max, rows)
     for right_column in pairs[1]:
         pdf_right[right_column] = np.random.randint(0, max, rows)
-    gdf_left = DataFrame.from_pandas(pdf_left)
-    gdf_right = DataFrame.from_pandas(pdf_right)
+    gdf_left = cudf.from_pandas(pdf_left)
+    gdf_right = cudf.from_pandas(pdf_right)
     if not set(pdf_left.columns).intersection(pdf_right.columns):
         with pytest.raises(
             pd.core.reshape.merge.MergeError,
@@ -494,10 +492,6 @@ def test_dataframe_pairs_of_triples(pairs, max, rows, how):
 
 
 def test_safe_merging_with_left_empty():
-    import numpy as np
-    import pandas as pd
-
-    from cudf import DataFrame
 
     np.random.seed(0)
 
@@ -508,8 +502,8 @@ def test_safe_merging_with_left_empty():
         pdf_left[left_column] = np.random.randint(0, 10, 0)
     for right_column in pairs[1]:
         pdf_right[right_column] = np.random.randint(0, 10, 5)
-    gdf_left = DataFrame.from_pandas(pdf_left)
-    gdf_right = DataFrame.from_pandas(pdf_right)
+    gdf_left = cudf.from_pandas(pdf_left)
+    gdf_right = cudf.from_pandas(pdf_right)
 
     pdf_result = pdf_left.merge(pdf_right)
     gdf_result = gdf_left.merge(gdf_right)
@@ -551,8 +545,8 @@ def test_merge_left_index_zero():
     right = pd.DataFrame(
         {"y": [10, 20, 30, 6, 5, 4]}, index=[0, 1, 2, 3, 4, 6]
     )
-    gleft = DataFrame.from_pandas(left)
-    gright = DataFrame.from_pandas(right)
+    gleft = cudf.from_pandas(left)
+    gright = cudf.from_pandas(right)
     pd_merge = left.merge(right, left_on="x", right_on="y")
     gd_merge = gleft.merge(gright, left_on="x", right_on="y")
 
@@ -573,8 +567,8 @@ def test_merge_left_right_index_left_right_on_zero_kwargs(kwargs):
     right = pd.DataFrame(
         {"y": [10, 20, 30, 6, 5, 4]}, index=[0, 1, 2, 3, 4, 6]
     )
-    gleft = DataFrame.from_pandas(left)
-    gright = DataFrame.from_pandas(right)
+    gleft = cudf.from_pandas(left)
+    gright = cudf.from_pandas(right)
     pd_merge = left.merge(right, **kwargs)
     gd_merge = gleft.merge(gright, **kwargs)
     assert_eq(pd_merge, gd_merge)
@@ -594,8 +588,8 @@ def test_merge_left_right_index_left_right_on_kwargs(kwargs):
     right = pd.DataFrame(
         {"y": [10, 20, 30, 6, 5, 4]}, index=[1, 2, 3, 4, 5, 7]
     )
-    gleft = DataFrame.from_pandas(left)
-    gright = DataFrame.from_pandas(right)
+    gleft = cudf.from_pandas(left)
+    gright = cudf.from_pandas(right)
     pd_merge = left.merge(right, **kwargs)
     gd_merge = gleft.merge(gright, **kwargs)
     assert_eq(pd_merge, gd_merge)
@@ -672,8 +666,8 @@ def test_merge_on_index_retained():
 def test_merge_left_right_index_left_right_on_kwargs2(kwargs):
     left = pd.DataFrame({"x": [1, 2, 3]}, index=[10, 20, 30])
     right = pd.DataFrame({"y": [10, 20, 30]}, index=[1, 2, 30])
-    gleft = DataFrame.from_pandas(left)
-    gright = DataFrame.from_pandas(right)
+    gleft = cudf.from_pandas(left)
+    gright = cudf.from_pandas(right)
     gd_merge = gleft.merge(gright, **kwargs)
     pd_merge = left.merge(right, **kwargs)
     if pd_merge.empty:
@@ -707,8 +701,8 @@ def test_merge_sort(ons, hows):
     left.index = [6, 5, 4, 7, 5, 5, 5, 4, 4]
     right.index = [5, 4, 1, 9, 4, 3, 5, 4, 4]
 
-    gleft = DataFrame.from_pandas(left)
-    gright = DataFrame.from_pandas(right)
+    gleft = cudf.from_pandas(left)
+    gright = cudf.from_pandas(right)
     gd_merge = gleft.merge(gright, **kwargs)
 
     pd_merge = left.merge(right, **kwargs)
@@ -753,8 +747,8 @@ def test_merge_sort_on_indexes(kwargs):
     left.index = [6, 5, 4, 7, 5, 5, 5, 4, 4]
     right.index = [5, 4, 1, 9, 4, 3, 5, 4, 4]
 
-    gleft = DataFrame.from_pandas(left)
-    gright = DataFrame.from_pandas(right)
+    gleft = cudf.from_pandas(left)
+    gright = cudf.from_pandas(right)
     gd_merge = gleft.merge(gright, **kwargs)
 
     if left_index and right_index:
@@ -777,8 +771,8 @@ def test_join_datetimes_index(dtype):
     datetimes = pd.Series(pd.date_range("20010101", "20010102", freq="12h"))
     pdf_lhs = pd.DataFrame(index=[1, 0, 1, 2, 0, 0, 1])
     pdf_rhs = pd.DataFrame({"d": datetimes})
-    gdf_lhs = DataFrame.from_pandas(pdf_lhs)
-    gdf_rhs = DataFrame.from_pandas(pdf_rhs)
+    gdf_lhs = cudf.from_pandas(pdf_lhs)
+    gdf_rhs = cudf.from_pandas(pdf_rhs)
 
     gdf_rhs["d"] = gdf_rhs["d"].astype(dtype)
 
@@ -793,8 +787,8 @@ def test_join_datetimes_index(dtype):
 def test_join_with_different_names():
     left = pd.DataFrame({"a": [0, 1, 2.0, 3, 4, 5, 9]})
     right = pd.DataFrame({"b": [12, 5, 3, 9.0, 5], "c": [1, 2, 3, 4, 5.0]})
-    gleft = DataFrame.from_pandas(left)
-    gright = DataFrame.from_pandas(right)
+    gleft = cudf.from_pandas(left)
+    gright = cudf.from_pandas(right)
     pd_merge = left.merge(right, how="outer", left_on=["a"], right_on=["b"])
     gd_merge = gleft.merge(gright, how="outer", left_on=["a"], right_on=["b"])
     assert_eq(pd_merge, gd_merge.sort_values(by=["a"]).reset_index(drop=True))
@@ -803,8 +797,8 @@ def test_join_with_different_names():
 def test_join_same_name_different_order():
     left = pd.DataFrame({"a": [0, 0], "b": [1, 2]})
     right = pd.DataFrame({"a": [1, 2], "b": [0, 0]})
-    gleft = DataFrame.from_pandas(left)
-    gright = DataFrame.from_pandas(right)
+    gleft = cudf.from_pandas(left)
+    gright = cudf.from_pandas(right)
     pd_merge = left.merge(right, left_on=["a", "b"], right_on=["b", "a"])
     gd_merge = gleft.merge(gright, left_on=["a", "b"], right_on=["b", "a"])
     assert_eq(
@@ -815,8 +809,8 @@ def test_join_same_name_different_order():
 def test_join_empty_table_dtype():
     left = pd.DataFrame({"a": []})
     right = pd.DataFrame({"b": [12, 5, 3, 9.0, 5], "c": [1, 2, 3, 4, 5.0]})
-    gleft = DataFrame.from_pandas(left)
-    gright = DataFrame.from_pandas(right)
+    gleft = cudf.from_pandas(left)
+    gright = cudf.from_pandas(right)
     pd_merge = left.merge(right, how="left", left_on=["a"], right_on=["b"])
     gd_merge = gleft.merge(gright, how="left", left_on=["a"], right_on=["b"])
     assert_eq(pd_merge["a"].dtype, gd_merge["a"].dtype)
@@ -917,7 +911,7 @@ def test_join_multi(how, column_a, column_b, column_c):
 )
 def test_merge_multi(kwargs):
 
-    left = DataFrame(
+    left = cudf.DataFrame(
         {
             "a": [1, 2, 3, 4, 3, 5, 6],
             "b": [1, 3, 5, 7, 5, 9, 0],
@@ -925,7 +919,7 @@ def test_merge_multi(kwargs):
             "d": ["v", "w", "x", "y", "z", "1", "2"],
         }
     )
-    right = DataFrame(
+    right = cudf.DataFrame(
         {
             "a": [0, 9, 3, 4, 3, 7, 8],
             "b": [2, 4, 5, 7, 5, 6, 8],
@@ -981,19 +975,19 @@ def test_merge_multi(kwargs):
 def test_typecast_on_join_int_to_int(dtype_l, dtype_r):
     other_data = ["a", "b", "c"]
 
-    join_data_l = Series([1, 2, 3], dtype=dtype_l)
-    join_data_r = Series([1, 2, 4], dtype=dtype_r)
+    join_data_l = cudf.Series([1, 2, 3], dtype=dtype_l)
+    join_data_r = cudf.Series([1, 2, 4], dtype=dtype_r)
 
-    gdf_l = DataFrame({"join_col": join_data_l, "B": other_data})
-    gdf_r = DataFrame({"join_col": join_data_r, "B": other_data})
+    gdf_l = cudf.DataFrame({"join_col": join_data_l, "B": other_data})
+    gdf_r = cudf.DataFrame({"join_col": join_data_r, "B": other_data})
 
     exp_dtype = np.find_common_type([], [np.dtype(dtype_l), np.dtype(dtype_r)])
 
     exp_join_data = [1, 2]
     exp_other_data = ["a", "b"]
-    exp_join_col = Series(exp_join_data, dtype=exp_dtype)
+    exp_join_col = cudf.Series(exp_join_data, dtype=exp_dtype)
 
-    expect = DataFrame(
+    expect = cudf.DataFrame(
         {
             "join_col": exp_join_col,
             "B_x": exp_other_data,
@@ -1011,11 +1005,11 @@ def test_typecast_on_join_int_to_int(dtype_l, dtype_r):
 def test_typecast_on_join_float_to_float(dtype_l, dtype_r):
     other_data = ["a", "b", "c", "d", "e", "f"]
 
-    join_data_l = Series([1, 2, 3, 0.9, 4.5, 6], dtype=dtype_l)
-    join_data_r = Series([1, 2, 3, 0.9, 4.5, 7], dtype=dtype_r)
+    join_data_l = cudf.Series([1, 2, 3, 0.9, 4.5, 6], dtype=dtype_l)
+    join_data_r = cudf.Series([1, 2, 3, 0.9, 4.5, 7], dtype=dtype_r)
 
-    gdf_l = DataFrame({"join_col": join_data_l, "B": other_data})
-    gdf_r = DataFrame({"join_col": join_data_r, "B": other_data})
+    gdf_l = cudf.DataFrame({"join_col": join_data_l, "B": other_data})
+    gdf_r = cudf.DataFrame({"join_col": join_data_r, "B": other_data})
 
     exp_dtype = np.find_common_type([], [np.dtype(dtype_l), np.dtype(dtype_r)])
 
@@ -1026,9 +1020,9 @@ def test_typecast_on_join_float_to_float(dtype_l, dtype_r):
         exp_join_data = [1, 2, 3, 0.9, 4.5]
         exp_other_data = ["a", "b", "c", "d", "e"]
 
-    exp_join_col = Series(exp_join_data, dtype=exp_dtype)
+    exp_join_col = cudf.Series(exp_join_data, dtype=exp_dtype)
 
-    expect = DataFrame(
+    expect = cudf.DataFrame(
         {
             "join_col": exp_join_col,
             "B_x": exp_other_data,
@@ -1052,19 +1046,19 @@ def test_typecast_on_join_mixed_int_float(dtype_l, dtype_r):
 
     other_data = ["a", "b", "c", "d", "e", "f"]
 
-    join_data_l = Series([1, 2, 3, 0.9, 4.5, 6], dtype=dtype_l)
-    join_data_r = Series([1, 2, 3, 0.9, 4.5, 7], dtype=dtype_r)
+    join_data_l = cudf.Series([1, 2, 3, 0.9, 4.5, 6], dtype=dtype_l)
+    join_data_r = cudf.Series([1, 2, 3, 0.9, 4.5, 7], dtype=dtype_r)
 
-    gdf_l = DataFrame({"join_col": join_data_l, "B": other_data})
-    gdf_r = DataFrame({"join_col": join_data_r, "B": other_data})
+    gdf_l = cudf.DataFrame({"join_col": join_data_l, "B": other_data})
+    gdf_r = cudf.DataFrame({"join_col": join_data_r, "B": other_data})
 
     exp_dtype = np.find_common_type([], [np.dtype(dtype_l), np.dtype(dtype_r)])
 
     exp_join_data = [1, 2, 3]
     exp_other_data = ["a", "b", "c"]
-    exp_join_col = Series(exp_join_data, dtype=exp_dtype)
+    exp_join_col = cudf.Series(exp_join_data, dtype=exp_dtype)
 
-    expect = DataFrame(
+    expect = cudf.DataFrame(
         {
             "join_col": exp_join_col,
             "B_x": exp_other_data,
@@ -1081,18 +1075,18 @@ def test_typecast_on_join_no_float_round():
 
     other_data = ["a", "b", "c", "d", "e"]
 
-    join_data_l = Series([1, 2, 3, 4, 5], dtype="int8")
-    join_data_r = Series([1, 2, 3, 4.01, 4.99], dtype="float32")
+    join_data_l = cudf.Series([1, 2, 3, 4, 5], dtype="int8")
+    join_data_r = cudf.Series([1, 2, 3, 4.01, 4.99], dtype="float32")
 
-    gdf_l = DataFrame({"join_col": join_data_l, "B": other_data})
-    gdf_r = DataFrame({"join_col": join_data_r, "B": other_data})
+    gdf_l = cudf.DataFrame({"join_col": join_data_l, "B": other_data})
+    gdf_r = cudf.DataFrame({"join_col": join_data_r, "B": other_data})
 
     exp_join_data = [1, 2, 3, 4, 5]
     exp_Bx = ["a", "b", "c", "d", "e"]
     exp_By = ["a", "b", "c", None, None]
-    exp_join_col = Series(exp_join_data, dtype="float32")
+    exp_join_col = cudf.Series(exp_join_data, dtype="float32")
 
-    expect = DataFrame(
+    expect = cudf.DataFrame(
         {"join_col": exp_join_col, "B_x": exp_Bx, "B_y": exp_By}
     )
 
@@ -1145,23 +1139,23 @@ def test_typecast_on_join_overflow_unsafe(dtypes):
 )
 def test_typecast_on_join_dt_to_dt(dtype_l, dtype_r):
     other_data = ["a", "b", "c", "d", "e"]
-    join_data_l = Series(
+    join_data_l = cudf.Series(
         ["1991-11-20", "1999-12-31", "2004-12-04", "2015-01-01", "2019-08-15"]
     ).astype(dtype_l)
-    join_data_r = Series(
+    join_data_r = cudf.Series(
         ["1991-11-20", "1999-12-31", "2004-12-04", "2015-01-01", "2019-08-16"]
     ).astype(dtype_r)
 
-    gdf_l = DataFrame({"join_col": join_data_l, "B": other_data})
-    gdf_r = DataFrame({"join_col": join_data_r, "B": other_data})
+    gdf_l = cudf.DataFrame({"join_col": join_data_l, "B": other_data})
+    gdf_r = cudf.DataFrame({"join_col": join_data_r, "B": other_data})
 
     exp_dtype = max(np.dtype(dtype_l), np.dtype(dtype_r))
 
     exp_join_data = ["1991-11-20", "1999-12-31", "2004-12-04", "2015-01-01"]
     exp_other_data = ["a", "b", "c", "d"]
-    exp_join_col = Series(exp_join_data, dtype=exp_dtype)
+    exp_join_col = cudf.Series(exp_join_data, dtype=exp_dtype)
 
-    expect = DataFrame(
+    expect = cudf.DataFrame(
         {
             "join_col": exp_join_col,
             "B_x": exp_other_data,
@@ -1183,21 +1177,21 @@ def test_typecast_on_join_categorical(dtype_l, dtype_r):
         pytest.skip("Can't determine which categorical to use")
 
     other_data = ["a", "b", "c", "d", "e"]
-    join_data_l = Series([1, 2, 3, 4, 5], dtype=dtype_l)
-    join_data_r = Series([1, 2, 3, 4, 6], dtype=dtype_r)
+    join_data_l = cudf.Series([1, 2, 3, 4, 5], dtype=dtype_l)
+    join_data_r = cudf.Series([1, 2, 3, 4, 6], dtype=dtype_r)
     if dtype_l == "category":
         exp_dtype = join_data_l.dtype.categories.dtype
     elif dtype_r == "category":
         exp_dtype = join_data_r.dtype.categories.dtype
 
-    gdf_l = DataFrame({"join_col": join_data_l, "B": other_data})
-    gdf_r = DataFrame({"join_col": join_data_r, "B": other_data})
+    gdf_l = cudf.DataFrame({"join_col": join_data_l, "B": other_data})
+    gdf_r = cudf.DataFrame({"join_col": join_data_r, "B": other_data})
 
     exp_join_data = [1, 2, 3, 4]
     exp_other_data = ["a", "b", "c", "d"]
-    exp_join_col = Series(exp_join_data, dtype=exp_dtype)
+    exp_join_col = cudf.Series(exp_join_data, dtype=exp_dtype)
 
-    expect = DataFrame(
+    expect = cudf.DataFrame(
         {
             "join_col": exp_join_col,
             "B_x": exp_other_data,
@@ -1426,8 +1420,8 @@ def test_categorical_typecast_outer_one_cat(dtype):
 def test_index_join(lhs, rhs, how, level):
     l_pdf = pd.DataFrame({"a": [2, 3, 1, 4], "b": [3, 7, 8, 1]})
     r_pdf = pd.DataFrame({"a": [1, 5, 4, 0], "b": [3, 9, 8, 4]})
-    l_df = DataFrame.from_pandas(l_pdf)
-    r_df = DataFrame.from_pandas(r_pdf)
+    l_df = cudf.from_pandas(l_pdf)
+    r_df = cudf.from_pandas(r_pdf)
     p_lhs = l_pdf.set_index(lhs).index
     p_rhs = r_pdf.set_index(rhs).index
     g_lhs = l_df.set_index(lhs).index
@@ -1454,8 +1448,8 @@ def test_index_join_corner_cases():
     r_pdf = pd.DataFrame(
         {"a": [1, 5, 4, 0], "b": [3, 9, 8, 4], "c": [2, 3, 6, 0]}
     )
-    l_df = DataFrame.from_pandas(l_pdf)
-    r_df = DataFrame.from_pandas(r_pdf)
+    l_df = cudf.from_pandas(l_pdf)
+    r_df = cudf.from_pandas(r_pdf)
 
     # Join when column name doesn't match with level
     lhs = ["a", "b"]
@@ -1529,8 +1523,10 @@ def test_index_join_corner_cases():
 
 
 def test_index_join_exception_cases():
-    l_df = DataFrame({"a": [2, 3, 1, 4], "b": [3, 7, 8, 1]})
-    r_df = DataFrame({"a": [1, 5, 4, 0], "b": [3, 9, 8, 4], "c": [2, 3, 6, 0]})
+    l_df = cudf.DataFrame({"a": [2, 3, 1, 4], "b": [3, 7, 8, 1]})
+    r_df = cudf.DataFrame(
+        {"a": [1, 5, 4, 0], "b": [3, 9, 8, 4], "c": [2, 3, 6, 0]}
+    )
 
     # Join between two MultiIndex
     lhs = ["a", "b"]
@@ -1553,12 +1549,12 @@ def test_index_join_exception_cases():
 
 
 def test_typecast_on_join_indexes():
-    join_data_l = Series([1, 2, 3, 4, 5], dtype="int8")
-    join_data_r = Series([1, 2, 3, 4, 6], dtype="int32")
+    join_data_l = cudf.Series([1, 2, 3, 4, 5], dtype="int8")
+    join_data_r = cudf.Series([1, 2, 3, 4, 6], dtype="int32")
     other_data = ["a", "b", "c", "d", "e"]
 
-    gdf_l = DataFrame({"join_col": join_data_l, "B": other_data})
-    gdf_r = DataFrame({"join_col": join_data_r, "B": other_data})
+    gdf_l = cudf.DataFrame({"join_col": join_data_l, "B": other_data})
+    gdf_r = cudf.DataFrame({"join_col": join_data_r, "B": other_data})
 
     gdf_l = gdf_l.set_index("join_col")
     gdf_r = gdf_r.set_index("join_col")
@@ -1566,7 +1562,7 @@ def test_typecast_on_join_indexes():
     exp_join_data = [1, 2, 3, 4]
     exp_other_data = ["a", "b", "c", "d"]
 
-    expect = DataFrame(
+    expect = cudf.DataFrame(
         {
             "join_col": exp_join_data,
             "B_x": exp_other_data,
@@ -1581,17 +1577,17 @@ def test_typecast_on_join_indexes():
 
 
 def test_typecast_on_join_multiindices():
-    join_data_l_0 = Series([1, 2, 3, 4, 5], dtype="int8")
-    join_data_l_1 = Series([2, 3, 4.1, 5.9, 6], dtype="float32")
-    join_data_l_2 = Series([7, 8, 9, 0, 1], dtype="float32")
+    join_data_l_0 = cudf.Series([1, 2, 3, 4, 5], dtype="int8")
+    join_data_l_1 = cudf.Series([2, 3, 4.1, 5.9, 6], dtype="float32")
+    join_data_l_2 = cudf.Series([7, 8, 9, 0, 1], dtype="float32")
 
-    join_data_r_0 = Series([1, 2, 3, 4, 5], dtype="int32")
-    join_data_r_1 = Series([2, 3, 4, 5, 6], dtype="int32")
-    join_data_r_2 = Series([7, 8, 9, 0, 0], dtype="float64")
+    join_data_r_0 = cudf.Series([1, 2, 3, 4, 5], dtype="int32")
+    join_data_r_1 = cudf.Series([2, 3, 4, 5, 6], dtype="int32")
+    join_data_r_2 = cudf.Series([7, 8, 9, 0, 0], dtype="float64")
 
     other_data = ["a", "b", "c", "d", "e"]
 
-    gdf_l = DataFrame(
+    gdf_l = cudf.DataFrame(
         {
             "join_col_0": join_data_l_0,
             "join_col_1": join_data_l_1,
@@ -1599,7 +1595,7 @@ def test_typecast_on_join_multiindices():
             "B": other_data,
         }
     )
-    gdf_r = DataFrame(
+    gdf_r = cudf.DataFrame(
         {
             "join_col_0": join_data_r_0,
             "join_col_1": join_data_r_1,
@@ -1611,12 +1607,12 @@ def test_typecast_on_join_multiindices():
     gdf_l = gdf_l.set_index(["join_col_0", "join_col_1", "join_col_2"])
     gdf_r = gdf_r.set_index(["join_col_0", "join_col_1", "join_col_2"])
 
-    exp_join_data_0 = Series([1, 2], dtype="int32")
-    exp_join_data_1 = Series([2, 3], dtype="float64")
-    exp_join_data_2 = Series([7, 8], dtype="float64")
-    exp_other_data = Series(["a", "b"])
+    exp_join_data_0 = cudf.Series([1, 2], dtype="int32")
+    exp_join_data_1 = cudf.Series([2, 3], dtype="float64")
+    exp_join_data_2 = cudf.Series([7, 8], dtype="float64")
+    exp_other_data = cudf.Series(["a", "b"])
 
-    expect = DataFrame(
+    expect = cudf.DataFrame(
         {
             "join_col_0": exp_join_data_0,
             "join_col_1": exp_join_data_1,
@@ -1632,12 +1628,12 @@ def test_typecast_on_join_multiindices():
 
 
 def test_typecast_on_join_indexes_matching_categorical():
-    join_data_l = Series(["a", "b", "c", "d", "e"], dtype="category")
-    join_data_r = Series(["a", "b", "c", "d", "e"], dtype="str")
+    join_data_l = cudf.Series(["a", "b", "c", "d", "e"], dtype="category")
+    join_data_r = cudf.Series(["a", "b", "c", "d", "e"], dtype="str")
     other_data = [1, 2, 3, 4, 5]
 
-    gdf_l = DataFrame({"join_col": join_data_l, "B": other_data})
-    gdf_r = DataFrame({"join_col": join_data_r, "B": other_data})
+    gdf_l = cudf.DataFrame({"join_col": join_data_l, "B": other_data})
+    gdf_r = cudf.DataFrame({"join_col": join_data_r, "B": other_data})
 
     gdf_l = gdf_l.set_index("join_col")
     gdf_r = gdf_r.set_index("join_col")
@@ -1645,7 +1641,7 @@ def test_typecast_on_join_indexes_matching_categorical():
     exp_join_data = ["a", "b", "c", "d", "e"]
     exp_other_data = [1, 2, 3, 4, 5]
 
-    expect = DataFrame(
+    expect = cudf.DataFrame(
         {
             "join_col": exp_join_data,
             "B_x": exp_other_data,
@@ -1699,9 +1695,9 @@ def test_series_dataframe_mixed_merging(lhs, rhs, how, kwargs):
 
     check_lhs = lhs.copy()
     check_rhs = rhs.copy()
-    if isinstance(lhs, Series):
+    if isinstance(lhs, cudf.Series):
         check_lhs = lhs.to_frame()
-    if isinstance(rhs, Series):
+    if isinstance(rhs, cudf.Series):
         check_rhs = rhs.to_frame()
 
     expect = check_lhs.merge(check_rhs, how=how, **kwargs)
diff --git a/python/cudf/cudf/tests/test_numerical.py b/python/cudf/cudf/tests/test_numerical.py
index f4cdf619212..6d9bcda2c0b 100644
--- a/python/cudf/cudf/tests/test_numerical.py
+++ b/python/cudf/cudf/tests/test_numerical.py
@@ -5,87 +5,88 @@
 import pytest
 
 import cudf
-from cudf import Series
 from cudf.core._compat import PANDAS_GE_100
 from cudf.tests.utils import assert_eq
 
 
 def test_can_cast_safely_same_kind():
     # 'i' -> 'i'
-    data = Series([1, 2, 3], dtype="int32")._column
+    data = cudf.Series([1, 2, 3], dtype="int32")._column
     to_dtype = np.dtype("int64")
 
     assert data.can_cast_safely(to_dtype)
 
-    data = Series([1, 2, 3], dtype="int64")._column
+    data = cudf.Series([1, 2, 3], dtype="int64")._column
     to_dtype = np.dtype("int32")
 
     assert data.can_cast_safely(to_dtype)
 
-    data = Series([1, 2, 2 ** 31], dtype="int64")._column
+    data = cudf.Series([1, 2, 2 ** 31], dtype="int64")._column
     assert not data.can_cast_safely(to_dtype)
 
     # 'u' -> 'u'
-    data = Series([1, 2, 3], dtype="uint32")._column
+    data = cudf.Series([1, 2, 3], dtype="uint32")._column
     to_dtype = np.dtype("uint64")
 
     assert data.can_cast_safely(to_dtype)
 
-    data = Series([1, 2, 3], dtype="uint64")._column
+    data = cudf.Series([1, 2, 3], dtype="uint64")._column
     to_dtype = np.dtype("uint32")
 
     assert data.can_cast_safely(to_dtype)
 
-    data = Series([1, 2, 2 ** 33], dtype="uint64")._column
+    data = cudf.Series([1, 2, 2 ** 33], dtype="uint64")._column
     assert not data.can_cast_safely(to_dtype)
 
     # 'f' -> 'f'
-    data = Series([np.inf, 1.0], dtype="float64")._column
+    data = cudf.Series([np.inf, 1.0], dtype="float64")._column
     to_dtype = np.dtype("float32")
     assert data.can_cast_safely(to_dtype)
 
-    data = Series([np.finfo("float32").max * 2, 1.0], dtype="float64")._column
+    data = cudf.Series(
+        [np.finfo("float32").max * 2, 1.0], dtype="float64"
+    )._column
     to_dtype = np.dtype("float32")
     assert not data.can_cast_safely(to_dtype)
 
 
 def test_can_cast_safely_mixed_kind():
-    data = Series([1, 2, 3], dtype="int32")._column
+    data = cudf.Series([1, 2, 3], dtype="int32")._column
     to_dtype = np.dtype("float32")
     assert data.can_cast_safely(to_dtype)
 
     # too big to fit into f32 exactly
-    data = Series([1, 2, 2 ** 24 + 1], dtype="int32")._column
+    data = cudf.Series([1, 2, 2 ** 24 + 1], dtype="int32")._column
     assert not data.can_cast_safely(to_dtype)
 
-    data = Series([1, 2, 3], dtype="uint32")._column
+    data = cudf.Series([1, 2, 3], dtype="uint32")._column
     to_dtype = np.dtype("float32")
     assert data.can_cast_safely(to_dtype)
 
     # too big to fit into f32 exactly
-    data = Series([1, 2, 2 ** 24 + 1], dtype="uint32")._column
+    data = cudf.Series([1, 2, 2 ** 24 + 1], dtype="uint32")._column
     assert not data.can_cast_safely(to_dtype)
 
     to_dtype = np.dtype("float64")
     assert data.can_cast_safely(to_dtype)
 
-    data = Series([1.0, 2.0, 3.0], dtype="float32")._column
+    data = cudf.Series([1.0, 2.0, 3.0], dtype="float32")._column
     to_dtype = np.dtype("int32")
     assert data.can_cast_safely(to_dtype)
 
     # not integer float
-    data = Series([1.0, 2.0, 3.5], dtype="float32")._column
+    data = cudf.Series([1.0, 2.0, 3.5], dtype="float32")._column
     assert not data.can_cast_safely(to_dtype)
 
-    data = Series([10.0, 11.0, 2000.0], dtype="float64")._column
+    data = cudf.Series([10.0, 11.0, 2000.0], dtype="float64")._column
     assert data.can_cast_safely(to_dtype)
 
     # float out of int range
-    data = Series([1.0, 2.0, 1.0 * (2 ** 31)], dtype="float32")._column
+    data = cudf.Series([1.0, 2.0, 1.0 * (2 ** 31)], dtype="float32")._column
     assert not data.can_cast_safely(to_dtype)
 
     # negative signed integers casting to unsigned integers
-    data = Series([-1, 0, 1], dtype="int32")._column
+    data = cudf.Series([-1, 0, 1], dtype="int32")._column
     to_dtype = np.dtype("uint32")
     assert not data.can_cast_safely(to_dtype)
 
@@ -95,8 +96,8 @@ def test_can_cast_safely_mixed_kind():
     reason="cuDF null <-> pd.NA compatibility not yet supported",
 )
 def test_to_pandas_nullable_integer():
-    gsr_not_null = Series([1, 2, 3])
-    gsr_has_null = Series([1, 2, None])
+    gsr_not_null = cudf.Series([1, 2, 3])
+    gsr_has_null = cudf.Series([1, 2, None])
 
     psr_not_null = pd.Series([1, 2, 3], dtype="int64")
     psr_has_null = pd.Series([1, 2, None], dtype="Int64")
@@ -110,8 +111,8 @@ def test_to_pandas_nullable_integer():
     reason="cuDF null <-> pd.NA compatibility not yet supported",
 )
 def test_to_pandas_nullable_bool():
-    gsr_not_null = Series([True, False, True])
-    gsr_has_null = Series([True, False, None])
+    gsr_not_null = cudf.Series([True, False, True])
+    gsr_has_null = cudf.Series([True, False, None])
 
     psr_not_null = pd.Series([True, False, True], dtype="bool")
     psr_has_null = pd.Series([True, False, None], dtype="boolean")
@@ -121,12 +122,12 @@ def test_to_pandas_nullable_bool():
 
 
 def test_can_cast_safely_has_nulls():
-    data = Series([1, 2, 3, None], dtype="float32")._column
+    data = cudf.Series([1, 2, 3, None], dtype="float32")._column
     to_dtype = np.dtype("int64")
 
     assert data.can_cast_safely(to_dtype)
 
-    data = Series([1, 2, 3.1, None], dtype="float32")._column
+    data = cudf.Series([1, 2, 3.1, None], dtype="float32")._column
     assert not data.can_cast_safely(to_dtype)
 
 
diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py
index a8196c596f0..b030924779d 100644
--- a/python/cudf/cudf/tests/test_reshape.py
+++ b/python/cudf/cudf/tests/test_reshape.py
@@ -8,7 +8,6 @@
 
 import cudf
 from cudf import melt as cudf_melt
-from cudf.core import DataFrame
 from cudf.core._compat import PANDAS_GE_120
 from cudf.tests.utils import (
     ALL_TYPES,
@@ -56,7 +55,7 @@ def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype):
         pdf[colname] = data
         value_vars.append(colname)
 
-    gdf = DataFrame.from_pandas(pdf)
+    gdf = cudf.from_pandas(pdf)
 
     got = cudf_melt(frame=gdf, id_vars=id_vars, value_vars=value_vars)
     got_from_melt_method = gdf.melt(id_vars=id_vars, value_vars=value_vars)
@@ -101,7 +100,7 @@ def test_df_stack(nulls, num_cols, num_rows, dtype):
             data[idx] = np.nan
         pdf[colname] = data
 
-    gdf = DataFrame.from_pandas(pdf)
+    gdf = cudf.from_pandas(pdf)
 
     got = gdf.stack()
 
@@ -137,7 +136,7 @@ def test_interleave_columns(nulls, num_cols, num_rows, dtype):
             data[idx] = np.nan
         pdf[colname] = data
 
-    gdf = DataFrame.from_pandas(pdf)
+    gdf = cudf.from_pandas(pdf)
 
     if dtype == "category":
         with pytest.raises(ValueError):
@@ -176,7 +175,7 @@ def test_tile(nulls, num_cols, num_rows, dtype, count):
             data[idx] = np.nan
         pdf[colname] = data
 
-    gdf = DataFrame.from_pandas(pdf)
+    gdf = cudf.from_pandas(pdf)
 
     got = gdf.tile(count)
     expect = pd.DataFrame(pd.concat([pdf] * count))
@@ -356,7 +355,7 @@ def test_series_merge_sorted(nparts, key, na_position, ascending):
 )
 def test_pivot_simple(index, column, data):
     pdf = pd.DataFrame({"index": index, "column": column, "data": data})
-    gdf = cudf.DataFrame.from_pandas(pdf)
+    gdf = cudf.from_pandas(pdf)
 
     expect = pdf.pivot("index", "column")
     got = gdf.pivot("index", "column")
diff --git a/python/cudf/cudf/tests/test_sorting.py b/python/cudf/cudf/tests/test_sorting.py
index e30194e9eda..b90aebc33dc 100644
--- a/python/cudf/cudf/tests/test_sorting.py
+++ b/python/cudf/cudf/tests/test_sorting.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
+import string
 from itertools import product
 
 import numpy as np
@@ -225,14 +226,12 @@ def test_dataframe_multi_column(
     num_cols, num_rows, dtype, ascending, na_position
 ):
 
-    from string import ascii_lowercase
-
     np.random.seed(0)
-    by = list(ascii_lowercase[:num_cols])
+    by = list(string.ascii_lowercase[:num_cols])
     pdf = pd.DataFrame()
 
     for i in range(5):
-        colname = ascii_lowercase[i]
+        colname = string.ascii_lowercase[i]
         data = np.random.randint(0, 26, num_rows).astype(dtype)
         pdf[colname] = data
 
@@ -256,14 +255,12 @@ def test_dataframe_multi_column_nulls(
     num_cols, num_rows, dtype, nulls, ascending, na_position
 ):
 
-    from string import ascii_lowercase
-
     np.random.seed(0)
-    by = list(ascii_lowercase[:num_cols])
+    by = list(string.ascii_lowercase[:num_cols])
     pdf = pd.DataFrame()
 
     for i in range(3):
-        colname = ascii_lowercase[i]
+        colname = string.ascii_lowercase[i]
         data = np.random.randint(0, 26, num_rows).astype(dtype)
         if nulls == "some":
             idx = np.array([], dtype="int64")
diff --git a/python/cudf/cudf/tests/test_stats.py b/python/cudf/cudf/tests/test_stats.py
index 1512c87d160..1eae8ddbf1e 100644
--- a/python/cudf/cudf/tests/test_stats.py
+++ b/python/cudf/cudf/tests/test_stats.py
@@ -1,13 +1,13 @@
 # Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
 import re
+from concurrent.futures import ThreadPoolExecutor
 
 import numpy as np
 import pandas as pd
 import pytest
 
 import cudf
-from cudf.core import Series
 from cudf.datasets import randomdata
 from cudf.tests.utils import assert_eq, assert_exceptions_equal
 
@@ -32,7 +32,7 @@ def test_series_reductions(method, dtype, skipna):
     arr = arr.astype(dtype)
     if dtype in (np.float32, np.float64):
         arr[[2, 5, 14, 19, 50, 70]] = np.nan
-    sr = Series.from_masked_array(arr, Series(mask).as_mask())
+    sr = cudf.Series.from_masked_array(arr, cudf.Series(mask).as_mask())
     psr = sr.to_pandas()
     psr[~mask] = np.nan
 
@@ -44,18 +44,16 @@ def call_test(sr, skipna):
             return fn(skipna=skipna)
 
     expect, got = call_test(psr, skipna=skipna), call_test(sr, skipna=skipna)
-    print(expect, got)
+
     np.testing.assert_approx_equal(expect, got)
 
 
 @pytest.mark.parametrize("method", methods)
 def test_series_reductions_concurrency(method):
-    from concurrent.futures import ThreadPoolExecutor
-
     e = ThreadPoolExecutor(10)
 
     np.random.seed(0)
-    srs = [Series(np.random.random(10000)) for _ in range(1)]
+    srs = [cudf.Series(np.random.random(10000)) for _ in range(1)]
 
     def call_test(sr):
         fn = getattr(sr, method)
@@ -74,7 +72,7 @@ def f(sr):
 def test_series_std(ddof):
     np.random.seed(0)
     arr = np.random.random(100) - 0.5
-    sr = Series(arr)
+    sr = cudf.Series(arr)
     pd = sr.to_pandas()
     got = sr.std(ddof=ddof)
     expect = pd.std(ddof=ddof)
@@ -85,7 +83,7 @@ def test_series_unique():
     for size in [10 ** x for x in range(5)]:
         arr = np.random.randint(low=-1, high=10, size=size)
         mask = arr != -1
-        sr = Series.from_masked_array(arr, Series(mask).as_mask())
+        sr = cudf.Series.from_masked_array(arr, cudf.Series(mask).as_mask())
         assert set(arr[mask]) == set(sr.unique().to_array())
         assert len(set(arr[mask])) == sr.nunique()
 
@@ -97,13 +95,13 @@ def test_series_unique():
 def test_series_nunique(nan_as_null, dropna):
     # We remove nulls as opposed to NaNs using the dropna parameter,
     # so to test against pandas we replace NaN with another discrete value
-    cudf_series = Series([1, 2, 2, 3, 3], nan_as_null=nan_as_null)
+    cudf_series = cudf.Series([1, 2, 2, 3, 3], nan_as_null=nan_as_null)
     pd_series = pd.Series([1, 2, 2, 3, 3])
     expect = pd_series.nunique(dropna=dropna)
     got = cudf_series.nunique(dropna=dropna)
     assert expect == got
 
-    cudf_series = Series(
+    cudf_series = cudf.Series(
         [1.0, 2.0, 3.0, np.nan, None], nan_as_null=nan_as_null
     )
     if nan_as_null is True:
@@ -115,7 +113,7 @@ def test_series_nunique(nan_as_null, dropna):
     got = cudf_series.nunique(dropna=dropna)
     assert expect == got
 
-    cudf_series = Series([1.0, np.nan, np.nan], nan_as_null=nan_as_null)
+    cudf_series = cudf.Series([1.0, np.nan, np.nan], nan_as_null=nan_as_null)
     if nan_as_null is True:
         pd_series = pd.Series([1.0, np.nan, np.nan])
     else:
@@ -127,7 +125,7 @@ def test_series_nunique(nan_as_null, dropna):
 
 def test_series_scale():
     arr = pd.Series(np.random.randint(low=-10, high=10, size=100))
-    sr = Series(arr)
+    sr = cudf.Series(arr)
 
     vmin = arr.min()
     vmax = arr.max()
@@ -143,7 +141,7 @@ def test_exact_quantiles(int_method):
     quant_values = [0.0, 0.25, 0.33, 0.5, 1.0]
 
     df = pd.DataFrame(arr)
-    gdf_series = Series(arr)
+    gdf_series = cudf.Series(arr)
 
     q1 = gdf_series.quantile(
         quant_values, interpolation=int_method, exact=True
@@ -162,7 +160,7 @@ def test_exact_quantiles_int(int_method):
     quant_values = [0.0, 0.25, 0.33, 0.5, 1.0]
 
     df = pd.DataFrame(arr)
-    gdf_series = Series(arr)
+    gdf_series = cudf.Series(arr)
 
     q1 = gdf_series.quantile(
         quant_values, interpolation=int_method, exact=True
@@ -180,7 +178,7 @@ def test_approx_quantiles():
     arr = np.asarray([6.8, 0.15, 3.4, 4.17, 2.13, 1.11, -1.01, 0.8, 5.7])
     quant_values = [0.0, 0.25, 0.33, 0.5, 1.0]
 
-    gdf_series = Series(arr)
+    gdf_series = cudf.Series(arr)
     pdf_series = pd.Series(arr)
 
     q1 = gdf_series.quantile(quant_values, exact=False)
@@ -194,7 +192,7 @@ def test_approx_quantiles_int():
     quant_values = [0.5]
     approx_results = [2]
 
-    gdf_series = Series(arr)
+    gdf_series = cudf.Series(arr)
 
     q1 = gdf_series.quantile(quant_values, exact=False)
 
@@ -206,7 +204,7 @@ def test_approx_quantiles_int():
 def test_misc_quantiles(data, q):
 
     pdf_series = cudf.utils.utils.create_pandas_series(data=data)
-    gdf_series = Series(data)
+    gdf_series = cudf.Series(data)
 
     expected = pdf_series.quantile(q)
     actual = gdf_series.quantile(q)
@@ -216,17 +214,17 @@ def test_misc_quantiles(data, q):
 @pytest.mark.parametrize(
     "data",
     [
-        Series(np.random.normal(-100, 100, 1000)),
-        Series(np.random.randint(-50, 50, 1000)),
-        Series(np.zeros(100)),
-        Series(np.repeat(np.nan, 100)),
-        Series(np.array([1.123, 2.343, np.nan, 0.0])),
-        Series(
+        cudf.Series(np.random.normal(-100, 100, 1000)),
+        cudf.Series(np.random.randint(-50, 50, 1000)),
+        cudf.Series(np.zeros(100)),
+        cudf.Series(np.repeat(np.nan, 100)),
+        cudf.Series(np.array([1.123, 2.343, np.nan, 0.0])),
+        cudf.Series(
             [5, 10, 53, None, np.nan, None, 12, 43, -423], nan_as_null=False
         ),
-        Series([1.1032, 2.32, 43.4, 13, -312.0], index=[0, 4, 3, 19, 6]),
-        Series([]),
-        Series([-3]),
+        cudf.Series([1.1032, 2.32, 43.4, 13, -312.0], index=[0, 4, 3, 19, 6]),
+        cudf.Series([]),
+        cudf.Series([-3]),
         randomdata(
             nrows=1000, dtypes={"a": float, "b": int, "c": float, "d": str}
         ),
@@ -257,17 +255,17 @@ def test_kurtosis(data, null_flag):
 @pytest.mark.parametrize(
     "data",
     [
-        Series(np.random.normal(-100, 100, 1000)),
-        Series(np.random.randint(-50, 50, 1000)),
-        Series(np.zeros(100)),
-        Series(np.repeat(np.nan, 100)),
-        Series(np.array([1.123, 2.343, np.nan, 0.0])),
-        Series(
+        cudf.Series(np.random.normal(-100, 100, 1000)),
+        cudf.Series(np.random.randint(-50, 50, 1000)),
+        cudf.Series(np.zeros(100)),
+        cudf.Series(np.repeat(np.nan, 100)),
+        cudf.Series(np.array([1.123, 2.343, np.nan, 0.0])),
+        cudf.Series(
             [5, 10, 53, None, np.nan, None, 12, 43, -423], nan_as_null=False
         ),
-        Series([1.1032, 2.32, 43.4, 13, -312.0], index=[0, 4, 3, 19, 6]),
-        Series([]),
-        Series([-3]),
+        cudf.Series([1.1032, 2.32, 43.4, 13, -312.0], index=[0, 4, 3, 19, 6]),
+        cudf.Series([]),
+        cudf.Series([-3]),
         randomdata(
             nrows=1000, dtypes={"a": float, "b": int, "c": float, "d": str}
         ),
@@ -300,13 +298,13 @@ def test_series_median(dtype, num_na):
     mask = np.arange(100) >= num_na
 
     arr = arr.astype(dtype)
-    sr = Series.from_masked_array(arr, Series(mask).as_mask())
+    sr = cudf.Series.from_masked_array(arr, cudf.Series(mask).as_mask())
     arr2 = arr[mask]
     ps = pd.Series(arr2, dtype=dtype)
 
     actual = sr.median(skipna=True)
     desired = ps.median(skipna=True)
-    print(actual, desired)
+
     np.testing.assert_approx_equal(actual, desired)
 
     # only for float until integer null supported convert to pandas in cudf
@@ -326,10 +324,10 @@ def test_series_median(dtype, num_na):
         np.zeros(100),
         np.repeat(np.nan, 100),
         np.array([1.123, 2.343, np.nan, 0.0]),
-        Series([5, 10, 53, None, np.nan, None], nan_as_null=False),
-        Series([1.1, 2.32, 43.4], index=[0, 4, 3]),
-        Series([]),
-        Series([-3]),
+        cudf.Series([5, 10, 53, None, np.nan, None], nan_as_null=False),
+        cudf.Series([1.1, 2.32, 43.4], index=[0, 4, 3]),
+        cudf.Series([]),
+        cudf.Series([-3]),
     ],
 )
 @pytest.mark.parametrize(
@@ -340,13 +338,13 @@ def test_series_median(dtype, num_na):
         np.zeros(100),
         np.repeat(np.nan, 100),
         np.array([1.123, 2.343, np.nan, 0.0]),
-        Series([1.1, 2.32, 43.4], index=[0, 500, 4000]),
-        Series([5]),
+        cudf.Series([1.1, 2.32, 43.4], index=[0, 500, 4000]),
+        cudf.Series([5]),
     ],
 )
 def test_cov1d(data1, data2):
-    gs1 = Series(data1)
-    gs2 = Series(data2)
+    gs1 = cudf.Series(data1)
+    gs2 = cudf.Series(data2)
 
     ps1 = gs1.to_pandas()
     ps2 = gs2.to_pandas()
@@ -364,10 +362,10 @@ def test_cov1d(data1, data2):
         np.zeros(100),
         np.repeat(np.nan, 100),
         np.array([1.123, 2.343, np.nan, 0.0]),
-        Series([5, 10, 53, None, np.nan, None], nan_as_null=False),
-        Series([1.1032, 2.32, 43.4], index=[0, 4, 3]),
-        Series([]),
-        Series([-3]),
+        cudf.Series([5, 10, 53, None, np.nan, None], nan_as_null=False),
+        cudf.Series([1.1032, 2.32, 43.4], index=[0, 4, 3]),
+        cudf.Series([]),
+        cudf.Series([-3]),
     ],
 )
 @pytest.mark.parametrize(
@@ -378,13 +376,13 @@ def test_cov1d(data1, data2):
         np.zeros(100),
         np.repeat(np.nan, 100),
         np.array([1.123, 2.343, np.nan, 0.0]),
-        Series([1.1, 2.32, 43.4], index=[0, 500, 4000]),
-        Series([5]),
+        cudf.Series([1.1, 2.32, 43.4], index=[0, 500, 4000]),
+        cudf.Series([5]),
     ],
 )
 def test_corr1d(data1, data2):
-    gs1 = Series(data1)
-    gs2 = Series(data2)
+    gs1 = cudf.Series(data1)
+    gs2 = cudf.Series(data2)
 
     ps1 = gs1.to_pandas()
     ps2 = gs2.to_pandas()
@@ -436,13 +434,13 @@ def test_df_corr():
 @pytest.mark.parametrize("skipna", [True, False, None])
 def test_nans_stats(data, ops, skipna):
     psr = cudf.utils.utils.create_pandas_series(data=data)
-    gsr = Series(data)
+    gsr = cudf.Series(data)
     assert_eq(
         getattr(psr, ops)(skipna=skipna), getattr(gsr, ops)(skipna=skipna)
     )
 
     psr = cudf.utils.utils.create_pandas_series(data=data)
-    gsr = Series(data, nan_as_null=False)
+    gsr = cudf.Series(data, nan_as_null=False)
     # Since there is no concept of `nan_as_null` in pandas,
     # nulls will be returned in the operations. So only
     # testing for `skipna=True` when `nan_as_null=False`
@@ -462,7 +460,7 @@ def test_nans_stats(data, ops, skipna):
 @pytest.mark.parametrize("min_count", [-10, -1, 0, 1, 2, 3, 5, 10])
 def test_min_count_ops(data, ops, skipna, min_count):
     psr = pd.Series(data)
-    gsr = Series(data)
+    gsr = cudf.Series(data)
 
     assert_eq(
         getattr(psr, ops)(skipna=skipna, min_count=min_count),
@@ -473,8 +471,8 @@ def test_min_count_ops(data, ops, skipna, min_count):
 @pytest.mark.parametrize(
     "gsr",
     [
-        Series([1, 2, 3, 4], dtype="datetime64[ns]"),
-        Series([1, 2, 3, 4], dtype="timedelta64[ns]"),
+        cudf.Series([1, 2, 3, 4], dtype="datetime64[ns]"),
+        cudf.Series([1, 2, 3, 4], dtype="timedelta64[ns]"),
     ],
 )
 def test_cov_corr_invalid_dtypes(gsr):
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index 13501d97405..a015f3387b4 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
 import re
+import urllib.parse
 from contextlib import ExitStack as does_not_raise
 from sys import getsizeof
 
@@ -12,7 +13,6 @@
 
 import cudf
 from cudf import concat
-from cudf.core import DataFrame, Series
 from cudf.core._compat import PANDAS_GE_110
 from cudf.core.column.string import StringColumn
 from cudf.core.index import StringIndex, as_index
@@ -57,7 +57,7 @@ def index(request):
 @pytest.fixture
 def ps_gs(data, index):
     ps = pd.Series(data, index=index, dtype="str", name="nice name")
-    gs = Series(data, index=index, dtype="str", name="nice name")
+    gs = cudf.Series(data, index=index, dtype="str", name="nice name")
     return (ps, gs)
 
 
@@ -65,7 +65,7 @@ def ps_gs(data, index):
 def test_string_ingest(construct):
     expect = ["a", "a", "b", "c", "a"]
     data = construct(expect)
-    got = Series(data)
+    got = cudf.Series(data)
     assert got.dtype == np.dtype("object")
     assert len(got) == 5
     for idx, val in enumerate(expect):
@@ -106,7 +106,7 @@ def test_string_get_item(ps_gs, item):
     ps, gs = ps_gs
 
     got = gs.iloc[item]
-    if isinstance(got, Series):
+    if isinstance(got, cudf.Series):
         got = got.to_arrow()
 
     if isinstance(item, cupy.ndarray):
@@ -140,7 +140,7 @@ def test_string_bool_mask(ps_gs, item):
     ps, gs = ps_gs
 
     got = gs.iloc[item]
-    if isinstance(got, Series):
+    if isinstance(got, cudf.Series):
         got = got.to_arrow()
 
     if isinstance(item, cupy.ndarray):
@@ -196,7 +196,7 @@ def test_string_astype(dtype):
     elif dtype == "str" or dtype == "object":
         data = ["ab", "cd", "ef", "gh", "ij"]
     ps = pd.Series(data)
-    gs = Series(data)
+    gs = cudf.Series(data)
 
     # Pandas str --> bool typecasting always returns True if there's a string
     if dtype.startswith("bool"):
@@ -214,7 +214,7 @@ def test_string_astype(dtype):
 def test_string_empty_astype(dtype):
     data = []
     ps = pd.Series(data, dtype="str")
-    gs = Series(data, dtype="str")
+    gs = cudf.Series(data, dtype="str")
 
     expect = ps.astype(dtype)
     got = gs.astype(dtype)
@@ -245,7 +245,7 @@ def test_string_numeric_astype(dtype):
     if not dtype.startswith("datetime64"):
         ps = pd.Series(data, dtype=dtype)
 
-    gs = Series(data, dtype=dtype)
+    gs = cudf.Series(data, dtype=dtype)
 
     expect = pd.Series(ps.astype("str"))
     got = gs.astype("str")
@@ -261,7 +261,7 @@ def test_string_empty_numeric_astype(dtype):
         ps = pd.Series(data, dtype="datetime64[ns]")
     else:
         ps = pd.Series(data, dtype=dtype)
-    gs = Series(data, dtype=dtype)
+    gs = cudf.Series(data, dtype=dtype)
 
     expect = ps.astype("str")
     got = gs.astype("str")
@@ -276,8 +276,8 @@ def test_string_concat():
 
     ps1 = pd.Series(data1, index=index)
     ps2 = pd.Series(data2, index=index)
-    gs1 = Series(data1, index=index)
-    gs2 = Series(data2, index=index)
+    gs1 = cudf.Series(data1, index=index)
+    gs2 = cudf.Series(data2, index=index)
 
     expect = pd.concat([ps1, ps2])
     got = concat([gs1, gs2])
@@ -855,7 +855,7 @@ def test_string_upper(ps_gs):
 @pytest.mark.parametrize("expand", [True, False, None])
 def test_string_split(data, pat, n, expand):
     ps = pd.Series(data, dtype="str")
-    gs = Series(data, dtype="str")
+    gs = cudf.Series(data, dtype="str")
 
     expect = ps.str.split(pat=pat, n=n, expand=expand)
     got = gs.str.split(pat=pat, n=n, expand=expand)
@@ -877,10 +877,10 @@ def test_string_join_key(str_data, str_data_raise, num_keys, how):
     other_data = [1, 2, 3, 4, 5][: len(str_data)]
 
     pdf = pd.DataFrame()
-    gdf = DataFrame()
+    gdf = cudf.DataFrame()
     for i in range(num_keys):
         pdf[i] = pd.Series(str_data, dtype="str")
-        gdf[i] = Series(str_data, dtype="str")
+        gdf[i] = cudf.Series(str_data, dtype="str")
     pdf["a"] = other_data
     gdf["a"] = other_data
 
@@ -920,18 +920,18 @@ def test_string_join_key_nulls(str_data_nulls):
     other_data_nulls = [6, 7, 8, 9, 10][: len(str_data_nulls)]
 
     pdf = pd.DataFrame()
-    gdf = DataFrame()
+    gdf = cudf.DataFrame()
     pdf["key"] = pd.Series(str_data, dtype="str")
-    gdf["key"] = Series(str_data, dtype="str")
+    gdf["key"] = cudf.Series(str_data, dtype="str")
     pdf["vals"] = other_data
     gdf["vals"] = other_data
 
     pdf2 = pd.DataFrame()
-    gdf2 = DataFrame()
+    gdf2 = cudf.DataFrame()
     pdf2["key"] = pd.Series(str_data_nulls, dtype="str")
-    gdf2["key"] = Series(str_data_nulls, dtype="str")
+    gdf2["key"] = cudf.Series(str_data_nulls, dtype="str")
     pdf2["vals"] = pd.Series(other_data_nulls, dtype="int64")
-    gdf2["vals"] = Series(other_data_nulls, dtype="int64")
+    gdf2["vals"] = cudf.Series(other_data_nulls, dtype="int64")
 
     expect = pdf.merge(pdf2, on="key", how="left")
     got = gdf.merge(gdf2, on="key", how="left")
@@ -955,10 +955,10 @@ def test_string_join_non_key(str_data, num_cols, how):
     other_data = [1, 2, 3, 4, 5][: len(str_data)]
 
     pdf = pd.DataFrame()
-    gdf = DataFrame()
+    gdf = cudf.DataFrame()
     for i in range(num_cols):
         pdf[i] = pd.Series(str_data, dtype="str")
-        gdf[i] = Series(str_data, dtype="str")
+        gdf[i] = cudf.Series(str_data, dtype="str")
     pdf["a"] = other_data
     gdf["a"] = other_data
 
@@ -993,18 +993,18 @@ def test_string_join_non_key_nulls(str_data_nulls):
     other_data_nulls = [6, 7, 8, 9, 10][: len(str_data_nulls)]
 
     pdf = pd.DataFrame()
-    gdf = DataFrame()
+    gdf = cudf.DataFrame()
     pdf["vals"] = pd.Series(str_data, dtype="str")
-    gdf["vals"] = Series(str_data, dtype="str")
+    gdf["vals"] = cudf.Series(str_data, dtype="str")
     pdf["key"] = other_data
     gdf["key"] = other_data
 
     pdf2 = pd.DataFrame()
-    gdf2 = DataFrame()
+    gdf2 = cudf.DataFrame()
     pdf2["vals"] = pd.Series(str_data_nulls, dtype="str")
-    gdf2["vals"] = Series(str_data_nulls, dtype="str")
+    gdf2["vals"] = cudf.Series(str_data_nulls, dtype="str")
     pdf2["key"] = pd.Series(other_data_nulls, dtype="int64")
-    gdf2["key"] = Series(other_data_nulls, dtype="int64")
+    gdf2["key"] = cudf.Series(other_data_nulls, dtype="int64")
 
     expect = pdf.merge(pdf2, on="key", how="left")
     got = gdf.merge(gdf2, on="key", how="left")
@@ -1044,8 +1044,8 @@ def test_string_join_values_nulls():
     left_pdf = pd.DataFrame(left_dict)
     right_pdf = pd.DataFrame(right_dict)
 
-    left_gdf = DataFrame.from_pandas(left_pdf)
-    right_gdf = DataFrame.from_pandas(right_pdf)
+    left_gdf = cudf.DataFrame.from_pandas(left_pdf)
+    right_gdf = cudf.DataFrame.from_pandas(right_pdf)
 
     expect = left_pdf.merge(right_pdf, how="left", on="b")
     got = left_gdf.merge(right_gdf, how="left", on="b")
@@ -1064,10 +1064,10 @@ def test_string_groupby_key(str_data, num_keys):
     other_data = [1, 2, 3, 4, 5][: len(str_data)]
 
     pdf = pd.DataFrame()
-    gdf = DataFrame()
+    gdf = cudf.DataFrame()
     for i in range(num_keys):
         pdf[i] = pd.Series(str_data, dtype="str")
-        gdf[i] = Series(str_data, dtype="str")
+        gdf[i] = cudf.Series(str_data, dtype="str")
     pdf["a"] = other_data
     gdf["a"] = other_data
 
@@ -1089,10 +1089,10 @@ def test_string_groupby_non_key(str_data, num_cols, agg):
     other_data = [1, 2, 3, 4, 5][: len(str_data)]
 
     pdf = pd.DataFrame()
-    gdf = DataFrame()
+    gdf = cudf.DataFrame()
     for i in range(num_cols):
         pdf[i] = pd.Series(str_data, dtype="str")
-        gdf[i] = Series(str_data, dtype="str")
+        gdf[i] = cudf.Series(str_data, dtype="str")
     pdf["a"] = other_data
     gdf["a"] = other_data
 
@@ -1114,9 +1114,9 @@ def test_string_groupby_key_index():
     other_data = [1, 2, 3, 4, 5]
 
     pdf = pd.DataFrame()
-    gdf = DataFrame()
+    gdf = cudf.DataFrame()
     pdf["a"] = pd.Series(str_data, dtype="str")
-    gdf["a"] = Series(str_data, dtype="str")
+    gdf["a"] = cudf.Series(str_data, dtype="str")
     pdf["b"] = other_data
     gdf["b"] = other_data
 
@@ -1130,7 +1130,7 @@ def test_string_groupby_key_index():
 def test_string_set_scalar(scalar):
     pdf = pd.DataFrame()
     pdf["a"] = [1, 2, 3, 4, 5]
-    gdf = DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
     pdf["b"] = "a"
     gdf["b"] = "a"
@@ -1140,10 +1140,8 @@ def test_string_set_scalar(scalar):
 
 
 def test_string_index():
-    from cudf.core.column import as_column
-
     pdf = pd.DataFrame(np.random.rand(5, 5))
-    gdf = DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
     stringIndex = ["a", "b", "c", "d", "e"]
     pdf.index = stringIndex
     gdf.index = stringIndex
@@ -1156,7 +1154,9 @@ def test_string_index():
     pdf.index = stringIndex.to_pandas()
     gdf.index = stringIndex
     assert_eq(pdf, gdf)
-    stringIndex = as_index(as_column(["a", "b", "c", "d", "e"]), name="name")
+    stringIndex = cudf.Index(
+        cudf.core.column.as_column(["a", "b", "c", "d", "e"]), name="name"
+    )
     pdf.index = stringIndex.to_pandas()
     gdf.index = stringIndex
     assert_eq(pdf, gdf)
@@ -1174,7 +1174,7 @@ def test_string_index():
 )
 def test_string_unique(item):
     ps = pd.Series(item)
-    gs = Series(item)
+    gs = cudf.Series(item)
     # Pandas `unique` returns a numpy array
     pres = pd.Series(ps.unique())
     # cudf returns sorted unique with `None` placed before other strings
@@ -1184,12 +1184,12 @@ def test_string_unique(item):
 
 
 def test_string_slice():
-    df = DataFrame({"a": ["hello", "world"]})
+    df = cudf.DataFrame({"a": ["hello", "world"]})
     pdf = pd.DataFrame({"a": ["hello", "world"]})
     a_slice_got = df.a.str.slice(0, 2)
     a_slice_expected = pdf.a.str.slice(0, 2)
 
-    assert isinstance(a_slice_got, Series)
+    assert isinstance(a_slice_got, cudf.Series)
     assert_eq(a_slice_expected, a_slice_got)
 
 
@@ -1199,8 +1199,8 @@ def test_string_equality():
 
     ps1 = pd.Series(data1)
     ps2 = pd.Series(data2)
-    gs1 = Series(data1)
-    gs2 = Series(data2)
+    gs1 = cudf.Series(data1)
+    gs2 = cudf.Series(data2)
 
     expect = ps1 == ps2
     got = gs1 == gs2
@@ -1213,7 +1213,7 @@ def test_string_equality():
     assert_eq(expect, got.fillna(False))
 
     ps1 = pd.Series(["a"])
-    gs1 = Series(["a"])
+    gs1 = cudf.Series(["a"])
 
     expect = ps1 == "m"
     got = gs1 == "m"
@@ -1237,7 +1237,7 @@ def test_string_equality():
 )
 def test_string_binary_op_add(lhs, rhs):
     pds = pd.Series(lhs) + pd.Series(rhs)
-    gds = Series(lhs) + Series(rhs)
+    gds = cudf.Series(lhs) + cudf.Series(rhs)
 
     assert_eq(pds, gds)
 
@@ -1282,7 +1282,7 @@ def test_string_no_children_properties():
 )
 def test_string_get(string, index):
     pds = pd.Series(string)
-    gds = Series(string)
+    gds = cudf.Series(string)
 
     assert_eq(
         pds.str.get(index).fillna(""), gds.str.get(index).fillna(""),
@@ -1305,7 +1305,7 @@ def test_string_get(string, index):
 )
 def test_string_slice_str(string, number, diff):
     pds = pd.Series(string)
-    gds = Series(string)
+    gds = cudf.Series(string)
 
     assert_eq(pds.str.slice(start=number), gds.str.slice(start=number))
     assert_eq(pds.str.slice(stop=number), gds.str.slice(stop=number))
@@ -1323,11 +1323,11 @@ def test_string_slice_str(string, number, diff):
 
 
 def test_string_slice_from():
-    gs = Series(["hello world", "holy accéntéd", "batman", None, ""])
-    d_starts = Series([2, 3, 0, -1, -1], dtype=np.int32)
-    d_stops = Series([-1, -1, 0, -1, -1], dtype=np.int32)
+    gs = cudf.Series(["hello world", "holy accéntéd", "batman", None, ""])
+    d_starts = cudf.Series([2, 3, 0, -1, -1], dtype=np.int32)
+    d_stops = cudf.Series([-1, -1, 0, -1, -1], dtype=np.int32)
     got = gs.str.slice_from(starts=d_starts._column, stops=d_stops._column)
-    expected = Series(["llo world", "y accéntéd", "", None, ""])
+    expected = cudf.Series(["llo world", "y accéntéd", "", None, ""])
     assert_eq(got, expected)
 
 
@@ -1344,7 +1344,7 @@ def test_string_slice_from():
 @pytest.mark.parametrize("repr", ["2", "!!"])
 def test_string_slice_replace(string, number, diff, repr):
     pds = pd.Series(string)
-    gds = Series(string)
+    gds = cudf.Series(string)
 
     assert_eq(
         pds.str.slice_replace(start=number, repl=repr),
@@ -1368,7 +1368,7 @@ def test_string_slice_replace(string, number, diff, repr):
 
 
 def test_string_insert():
-    gs = Series(["hello world", "holy accéntéd", "batman", None, ""])
+    gs = cudf.Series(["hello world", "holy accéntéd", "batman", None, ""])
 
     ps = pd.Series(["hello world", "holy accéntéd", "batman", None, ""])
 
@@ -1422,7 +1422,7 @@ def test_string_insert():
 )
 @pytest.mark.parametrize("data", _string_char_types_data)
 def test_string_char_types(type_op, data):
-    gs = Series(data)
+    gs = cudf.Series(data)
     ps = pd.Series(data)
 
     assert_eq(getattr(gs.str, type_op)(), getattr(ps.str, type_op)())
@@ -1438,8 +1438,8 @@ def test_string_filter_alphanum():
                 rs = rs + c
         expected.append(rs)
 
-    gs = Series(data)
-    assert_eq(gs.str.filter_alphanum(), Series(expected))
+    gs = cudf.Series(data)
+    assert_eq(gs.str.filter_alphanum(), cudf.Series(expected))
 
     expected = []
     for st in data:
@@ -1448,7 +1448,7 @@ def test_string_filter_alphanum():
             if not str.isalnum(c):
                 rs = rs + c
         expected.append(rs)
-    assert_eq(gs.str.filter_alphanum(keep=False), Series(expected))
+    assert_eq(gs.str.filter_alphanum(keep=False), cudf.Series(expected))
 
     expected = []
     for st in data:
@@ -1459,7 +1459,7 @@ def test_string_filter_alphanum():
             else:
                 rs = rs + "*"
         expected.append(rs)
-    assert_eq(gs.str.filter_alphanum("*"), Series(expected))
+    assert_eq(gs.str.filter_alphanum("*"), cudf.Series(expected))
 
     expected = []
     for st in data:
@@ -1470,7 +1470,7 @@ def test_string_filter_alphanum():
             else:
                 rs = rs + "*"
         expected.append(rs)
-    assert_eq(gs.str.filter_alphanum("*", keep=False), Series(expected))
+    assert_eq(gs.str.filter_alphanum("*", keep=False), cudf.Series(expected))
 
 
 @pytest.mark.parametrize(
@@ -1489,7 +1489,7 @@ def test_string_filter_alphanum():
     ],
 )
 def test_string_char_case(case_op, data):
-    gs = Series(data)
+    gs = cudf.Series(data)
     ps = pd.Series(data)
 
     s = gs.str
@@ -1519,7 +1519,7 @@ def test_string_char_case(case_op, data):
     ],
 )
 def test_strings_rpartition(data):
-    gs = Series(data)
+    gs = cudf.Series(data)
     ps = pd.Series(data)
 
     assert_eq(ps.str.rpartition(), gs.str.rpartition())
@@ -1538,7 +1538,7 @@ def test_strings_rpartition(data):
     ],
 )
 def test_strings_partition(data):
-    gs = Series(data, name="str_name")
+    gs = cudf.Series(data, name="str_name")
     ps = pd.Series(data, name="str_name")
 
     assert_eq(ps.str.partition(), gs.str.partition())
@@ -1570,7 +1570,7 @@ def test_strings_partition(data):
 @pytest.mark.parametrize("n", [-1, 2, 1, 9])
 @pytest.mark.parametrize("expand", [True, False, None])
 def test_strings_rsplit(data, n, expand):
-    gs = Series(data)
+    gs = cudf.Series(data)
     ps = pd.Series(data)
 
     assert_eq(
@@ -1606,7 +1606,7 @@ def test_strings_rsplit(data, n, expand):
 @pytest.mark.parametrize("n", [-1, 2, 1, 9])
 @pytest.mark.parametrize("expand", [True, False, None])
 def test_strings_split(data, n, expand):
-    gs = Series(data)
+    gs = cudf.Series(data)
     ps = pd.Series(data)
 
     assert_eq(
@@ -1645,7 +1645,7 @@ def test_strings_split(data, n, expand):
     "to_strip", ["⅕", None, "123.", ".!? \n\t", "123.!? \n\t", " ", ".", ","]
 )
 def test_strings_strip_tests(data, to_strip):
-    gs = Series(data)
+    gs = cudf.Series(data)
     ps = pd.Series(data)
 
     assert_eq(ps.str.strip(to_strip=to_strip), gs.str.strip(to_strip=to_strip))
@@ -1687,7 +1687,7 @@ def test_strings_strip_tests(data, to_strip):
 @pytest.mark.parametrize("width", [0, 1, 4, 9, 100])
 @pytest.mark.parametrize("fillchar", ["⅕", "1", ".", "t", " ", ","])
 def test_strings_filling_tests(data, width, fillchar):
-    gs = Series(data)
+    gs = cudf.Series(data)
     ps = pd.Series(data)
 
     assert_eq(
@@ -1733,7 +1733,7 @@ def test_strings_filling_tests(data, width, fillchar):
 )
 @pytest.mark.parametrize("width", [0, 1, 4, 6, 9, 100])
 def test_strings_zfill_tests(data, width):
-    gs = Series(data)
+    gs = cudf.Series(data)
     ps = pd.Series(data)
 
     assert_eq(ps.str.zfill(width=width), gs.str.zfill(width=width))
@@ -1761,7 +1761,7 @@ def test_strings_zfill_tests(data, width):
 )
 @pytest.mark.parametrize("fillchar", [" ", ".", "\n", "+", "\t"])
 def test_strings_pad_tests(data, width, side, fillchar):
-    gs = Series(data)
+    gs = cudf.Series(data)
     ps = pd.Series(data)
 
     assert_eq(
@@ -1791,7 +1791,7 @@ def test_strings_pad_tests(data, width, side, fillchar):
 )
 @pytest.mark.parametrize("width", [1, 4, 8, 12, 100])
 def test_string_wrap(data, width):
-    gs = Series(data)
+    gs = cudf.Series(data)
     ps = pd.Series(data)
 
     assert_eq(
@@ -1835,7 +1835,7 @@ def test_string_wrap(data, width):
 )
 @pytest.mark.parametrize("pat", ["a", " ", "\t", "another", "0", r"\$"])
 def test_string_count(data, pat):
-    gs = Series(data)
+    gs = cudf.Series(data)
     ps = pd.Series(data)
 
     assert_eq(gs.str.count(pat=pat), ps.str.count(pat=pat), check_dtype=False)
@@ -1844,7 +1844,7 @@ def test_string_count(data, pat):
 
 def test_string_findall():
     ps = pd.Series(["Lion", "Monkey", "Rabbit"])
-    gs = Series(["Lion", "Monkey", "Rabbit"])
+    gs = cudf.Series(["Lion", "Monkey", "Rabbit"])
 
     assert_eq(ps.str.findall("Monkey")[1][0], gs.str.findall("Monkey")[0][1])
     assert_eq(ps.str.findall("on")[0][0], gs.str.findall("on")[0][0])
@@ -1855,21 +1855,21 @@ def test_string_findall():
 
 def test_string_replace_multi():
     ps = pd.Series(["hello", "goodbye"])
-    gs = Series(["hello", "goodbye"])
+    gs = cudf.Series(["hello", "goodbye"])
     expect = ps.str.replace("e", "E").str.replace("o", "O")
     got = gs.str.replace(["e", "o"], ["E", "O"])
 
     assert_eq(expect, got)
 
     ps = pd.Series(["foo", "fuz", np.nan])
-    gs = Series.from_pandas(ps)
+    gs = cudf.Series.from_pandas(ps)
 
     expect = ps.str.replace("f.", "ba", regex=True)
     got = gs.str.replace(["f."], ["ba"], regex=True)
     assert_eq(expect, got)
 
     ps = pd.Series(["f.o", "fuz", np.nan])
-    gs = Series.from_pandas(ps)
+    gs = cudf.Series.from_pandas(ps)
 
     expect = ps.str.replace("f.", "ba", regex=False)
     got = gs.str.replace(["f."], ["ba"], regex=False)
@@ -1905,7 +1905,7 @@ def test_string_replace_with_backrefs(find, replace):
         "tést-string-again",
     ]
     ps = pd.Series(s)
-    gs = Series(s)
+    gs = cudf.Series(s)
     got = gs.str.replace_with_backrefs(find, replace)
     expected = ps.str.replace(find, replace, regex=True)
     assert_eq(got, expected)
@@ -1918,7 +1918,7 @@ def test_string_replace_with_backrefs(find, replace):
 def test_string_table_view_creation():
     data = ["hi"] * 25 + [None] * 2027
     psr = pd.Series(data)
-    gsr = Series.from_pandas(psr)
+    gsr = cudf.Series.from_pandas(psr)
 
     expect = psr[:1]
     got = gsr[:1]
@@ -1944,7 +1944,7 @@ def test_string_table_view_creation():
 )
 def test_string_starts_ends(data, pat):
     ps = pd.Series(data)
-    gs = Series(data)
+    gs = cudf.Series(data)
 
     assert_eq(
         ps.str.startswith(pat), gs.str.startswith(pat), check_dtype=False
@@ -1981,7 +1981,7 @@ def test_string_starts_ends(data, pat):
     ],
 )
 def test_string_starts_ends_list_like_pat(data, pat):
-    gs = Series(data)
+    gs = cudf.Series(data)
 
     starts_expected = []
     ends_expected = []
@@ -2020,7 +2020,7 @@ def test_string_starts_ends_list_like_pat(data, pat):
 )
 def test_string_find(data, sub):
     ps = pd.Series(data)
-    gs = Series(data)
+    gs = cudf.Series(data)
 
     got = gs.str.find(sub)
     expect = ps.str.find(sub)
@@ -2090,7 +2090,7 @@ def test_string_find(data, sub):
 )
 def test_string_str_index(data, sub, er):
     ps = pd.Series(data)
-    gs = Series(data)
+    gs = cudf.Series(data)
 
     if er is None:
         assert_eq(ps.str.index(sub), gs.str.index(sub), check_dtype=False)
@@ -2129,7 +2129,7 @@ def test_string_str_index(data, sub, er):
 )
 def test_string_str_rindex(data, sub, er):
     ps = pd.Series(data)
-    gs = Series(data)
+    gs = cudf.Series(data)
 
     if er is None:
         assert_eq(ps.str.rindex(sub), gs.str.rindex(sub), check_dtype=False)
@@ -2186,10 +2186,10 @@ def test_string_str_rindex(data, sub, er):
     ],
 )
 def test_string_contains_multi(data, sub, expect):
-    gs = Series(data)
-    sub = Series(sub)
+    gs = cudf.Series(data)
+    sub = cudf.Series(sub)
     got = gs.str.contains(sub)
-    expect = Series(expect)
+    expect = cudf.Series(expect)
     assert_eq(expect, got, check_dtype=False)
 
 
@@ -2209,7 +2209,7 @@ def test_string_contains_multi(data, sub, expect):
 @pytest.mark.parametrize("pat", ["", " ", "a", "abc", "cat", "$", "\n"])
 def test_string_str_match(data, pat):
     ps = pd.Series(data)
-    gs = Series(data)
+    gs = cudf.Series(data)
 
     assert_eq(ps.str.match(pat), gs.str.match(pat))
     assert_eq(
@@ -2232,7 +2232,7 @@ def test_string_str_match(data, pat):
 )
 def test_string_str_translate(data):
     ps = pd.Series(data)
-    gs = Series(data)
+    gs = cudf.Series(data)
 
     assert_eq(
         ps.str.translate(str.maketrans({"a": "z"})),
@@ -2287,15 +2287,17 @@ def test_string_str_filter_characters():
         "$1.50",
         "",
     ]
-    gs = Series(data)
-    expected = Series(["helloworld", "ABCD", "", "accnt", None, "150", ""])
+    gs = cudf.Series(data)
+    expected = cudf.Series(
+        ["helloworld", "ABCD", "", "accnt", None, "150", ""]
+    )
     filter = {"a": "z", "A": "Z", "0": "9"}
     assert_eq(expected, gs.str.filter_characters(filter))
 
-    expected = Series([" ", "+++", "?!@#$%^&*()", "é", None, "$.", ""])
+    expected = cudf.Series([" ", "+++", "?!@#$%^&*()", "é", None, "$.", ""])
     assert_eq(expected, gs.str.filter_characters(filter, False))
 
-    expected = Series(
+    expected = cudf.Series(
         ["hello world", "A B C D", "           ", "acc nt", None, " 1 50", ""]
     )
     assert_eq(expected, gs.str.filter_characters(filter, True, " "))
@@ -2314,7 +2316,7 @@ def test_string_str_code_points():
         " 1234 ",
         "XYZ",
     ]
-    gs = Series(data)
+    gs = cudf.Series(data)
     expected = [
         97,
         98,
@@ -2354,7 +2356,7 @@ def test_string_str_code_points():
         89,
         90,
     ]
-    expected = Series(expected)
+    expected = cudf.Series(expected)
 
     assert_eq(expected, gs.str.code_points(), check_dtype=False)
 
@@ -2369,9 +2371,7 @@ def test_string_str_code_points():
     ],
 )
 def test_string_str_url_encode(data):
-    import urllib.parse
-
-    gs = Series(data)
+    gs = cudf.Series(data)
 
     got = gs.str.url_encode()
     expected = pd.Series([urllib.parse.quote(url, safe="~") for url in data])
@@ -2389,9 +2389,7 @@ def test_string_str_url_encode(data):
     ],
 )
 def test_string_str_decode_url(data):
-    import urllib.parse
-
-    gs = Series(data)
+    gs = cudf.Series(data)
 
     got = gs.str.url_decode()
     expected = pd.Series([urllib.parse.unquote(url) for url in data])
@@ -2413,7 +2411,7 @@ def test_string_str_decode_url(data):
 @pytest.mark.parametrize("obj_type", [None, "str", "category"])
 def test_string_typecast(data, obj_type, dtype):
     psr = pd.Series(data, dtype=obj_type)
-    gsr = Series(data, dtype=obj_type)
+    gsr = cudf.Series(data, dtype=obj_type)
 
     expect = psr.astype(dtype=dtype)
     actual = gsr.astype(dtype=dtype)
@@ -2452,7 +2450,7 @@ def test_string_typecast(data, obj_type, dtype):
 @pytest.mark.parametrize("obj_type", [None, "str", "category"])
 def test_string_typecast_error(data, obj_type, dtype):
     psr = pd.Series(data, dtype=obj_type)
-    gsr = Series(data, dtype=obj_type)
+    gsr = cudf.Series(data, dtype=obj_type)
 
     assert_exceptions_equal(
         lfunc=psr.astype,
@@ -2474,23 +2472,23 @@ def test_string_typecast_error(data, obj_type, dtype):
 )
 def test_string_hex_to_int(data):
 
-    gsr = Series(data)
+    gsr = cudf.Series(data)
 
     got = gsr.str.htoi()
-    expected = Series([263988422296292, 0, 281474976710655])
+    expected = cudf.Series([263988422296292, 0, 281474976710655])
 
     assert_eq(expected, got)
 
 
 def test_string_ishex():
-    gsr = Series(["", None, "0x01a2b3c4d5e6f", "0789", "ABCDEF0"])
+    gsr = cudf.Series(["", None, "0x01a2b3c4d5e6f", "0789", "ABCDEF0"])
     got = gsr.str.ishex()
-    expected = Series([False, None, True, True, True])
+    expected = cudf.Series([False, None, True, True, True])
     assert_eq(expected, got)
 
 
 def test_string_istimestamp():
-    gsr = Series(
+    gsr = cudf.Series(
         [
             "",
             None,
@@ -2508,7 +2506,7 @@ def test_string_istimestamp():
         ]
     )
     got = gsr.str.istimestamp(r"%Y%m%d %H%M%S.%f%p%z")
-    expected = Series(
+    expected = cudf.Series(
         [
             False,
             None,
@@ -2529,8 +2527,10 @@ def test_string_istimestamp():
 
 
 def test_string_ip4_to_int():
-    gsr = Series(["", None, "hello", "41.168.0.1", "127.0.0.1", "41.197.0.1"])
-    expected = Series([0, None, 0, 698875905, 2130706433, 700776449])
+    gsr = cudf.Series(
+        ["", None, "hello", "41.168.0.1", "127.0.0.1", "41.197.0.1"]
+    )
+    expected = cudf.Series([0, None, 0, 698875905, 2130706433, 700776449])
 
     got = gsr.str.ip2int()
 
@@ -2538,18 +2538,18 @@ def test_string_ip4_to_int():
 
 
 def test_string_int_to_ipv4():
-    gsr = Series([0, None, 0, 698875905, 2130706433, 700776449])
-    expected = Series(
+    gsr = cudf.Series([0, None, 0, 698875905, 2130706433, 700776449])
+    expected = cudf.Series(
         ["0.0.0.0", None, "0.0.0.0", "41.168.0.1", "127.0.0.1", "41.197.0.1"]
     )
 
-    got = Series(gsr._column.int2ip())
+    got = cudf.Series(gsr._column.int2ip())
 
     assert_eq(expected, got)
 
 
 def test_string_isipv4():
-    gsr = Series(
+    gsr = cudf.Series(
         [
             "",
             None,
@@ -2565,7 +2565,7 @@ def test_string_isipv4():
         ]
     )
     got = gsr.str.isipv4()
-    expected = Series(
+    expected = cudf.Series(
         [
             False,
             None,
@@ -2587,7 +2587,7 @@ def test_string_isipv4():
     "dtype", sorted(list(dtypeutils.NUMERIC_TYPES - {"int64", "uint64"}))
 )
 def test_string_int_to_ipv4_dtype_fail(dtype):
-    gsr = Series([1, 2, 3, 4, 5]).astype(dtype)
+    gsr = cudf.Series([1, 2, 3, 4, 5]).astype(dtype)
     with pytest.raises(TypeError):
         gsr._column.int2ip()
 
@@ -2618,7 +2618,7 @@ def test_string_int_to_ipv4_dtype_fail(dtype):
 )
 def test_string_str_subscriptable(data, index):
     psr = pd.Series(data)
-    gsr = Series(data)
+    gsr = cudf.Series(data)
 
     assert_eq(psr.str[index], gsr.str[index])
 
@@ -2640,8 +2640,8 @@ def test_string_str_subscriptable(data, index):
     ],
 )
 def test_string_str_byte_count(data, expected):
-    sr = Series(data)
-    expected = Series(expected, dtype="int32")
+    sr = cudf.Series(data)
+    expected = cudf.Series(expected, dtype="int32")
     actual = sr.str.byte_count()
     assert_eq(expected, actual)
 
@@ -2689,8 +2689,8 @@ def test_string_str_byte_count(data, expected):
     ],
 )
 def test_str_isinteger(data, expected):
-    sr = Series(data, dtype="str")
-    expected = Series(expected)
+    sr = cudf.Series(data, dtype="str")
+    expected = cudf.Series(expected)
     actual = sr.str.isinteger()
     assert_eq(expected, actual)
 
@@ -2745,8 +2745,8 @@ def test_str_isinteger(data, expected):
     ],
 )
 def test_str_isfloat(data, expected):
-    sr = Series(data, dtype="str")
-    expected = Series(expected)
+    sr = cudf.Series(data, dtype="str")
+    expected = cudf.Series(expected)
     actual = sr.str.isfloat()
     assert_eq(expected, actual)
 
@@ -2776,7 +2776,7 @@ def test_str_isfloat(data, expected):
 )
 def test_str_min(data):
     psr = pd.Series(data)
-    sr = Series(data)
+    sr = cudf.Series(data)
 
     assert_eq(psr.min(), sr.min())
 
@@ -2801,7 +2801,7 @@ def test_str_min(data):
 )
 def test_str_max(data):
     psr = pd.Series(data)
-    sr = Series(data)
+    sr = cudf.Series(data)
 
     assert_eq(psr.max(), sr.max())
 
@@ -2826,13 +2826,13 @@ def test_str_max(data):
 )
 def test_str_sum(data):
     psr = pd.Series(data)
-    sr = Series(data)
+    sr = cudf.Series(data)
 
     assert_eq(psr.sum(), sr.sum())
 
 
 def test_str_mean():
-    sr = Series(["a", "b", "c", "d", "e"])
+    sr = cudf.Series(["a", "b", "c", "d", "e"])
 
     with pytest.raises(TypeError):
         sr.mean()
@@ -2840,7 +2840,7 @@ def test_str_mean():
 
 def test_string_product():
     psr = pd.Series(["1", "2", "3", "4", "5"])
-    sr = Series(["1", "2", "3", "4", "5"])
+    sr = cudf.Series(["1", "2", "3", "4", "5"])
 
     assert_exceptions_equal(
         lfunc=psr.product,
@@ -2853,7 +2853,7 @@ def test_string_product():
 
 def test_string_var():
     psr = pd.Series(["1", "2", "3", "4", "5"])
-    sr = Series(["1", "2", "3", "4", "5"])
+    sr = cudf.Series(["1", "2", "3", "4", "5"])
 
     assert_exceptions_equal(
         lfunc=psr.var, rfunc=sr.var, compare_error_message=False
@@ -2862,7 +2862,7 @@ def test_string_var():
 
 def test_string_std():
     psr = pd.Series(["1", "2", "3", "4", "5"])
-    sr = Series(["1", "2", "3", "4", "5"])
+    sr = cudf.Series(["1", "2", "3", "4", "5"])
 
     assert_exceptions_equal(
         lfunc=psr.std, rfunc=sr.std, compare_error_message=False

From c3c3e6826be007668c68bb47a687e283d80ffa24 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 24 Feb 2021 15:03:44 -0800
Subject: [PATCH 22/35] remove versioning of cudf api call

---
 python/cudf/cudf/testing/testing.py | 33 ++++++++++-------------------
 1 file changed, 11 insertions(+), 22 deletions(-)

diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py
index ec1af0b7321..bacab24a6f3 100644
--- a/python/cudf/cudf/testing/testing.py
+++ b/python/cudf/cudf/testing/testing.py
@@ -615,28 +615,17 @@ def assert_frame_equal(
         right = right[list(left._data.names)]
 
     # index comparison
-    if PANDAS_GE_110:
-        assert_index_equal(
-            left.index,
-            right.index,
-            exact=check_index_type,
-            check_names=check_names,
-            check_exact=check_exact,
-            check_categorical=check_categorical,
-            rtol=rtol,
-            atol=atol,
-            obj=f"{obj}.index",
-        )
-    else:
-        assert_index_equal(
-            left.index,
-            right.index,
-            exact=check_index_type,
-            check_names=check_names,
-            check_exact=check_exact,
-            check_categorical=check_categorical,
-            obj=f"{obj}.index",
-        )
+    assert_index_equal(
+        left.index,
+        right.index,
+        exact=check_index_type,
+        check_names=check_names,
+        check_exact=check_exact,
+        check_categorical=check_categorical,
+        rtol=rtol,
+        atol=atol,
+        obj=f"{obj}.index",
+    )
 
     if PANDAS_GE_110:
         pd.testing.assert_index_equal(

From 992b483ce33bd22cb18aec60a54253decc587707 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 24 Feb 2021 17:04:32 -0600
Subject: [PATCH 23/35] Update python/cudf/cudf/tests/test_setitem.py

Co-authored-by: Keith Kraus <kkraus@nvidia.com>
---
 python/cudf/cudf/tests/test_setitem.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py
index 57661511f5b..fc885a13808 100644
--- a/python/cudf/cudf/tests/test_setitem.py
+++ b/python/cudf/cudf/tests/test_setitem.py
@@ -23,7 +23,7 @@ def test_dataframe_setitem_bool_mask_scaler(df, arg, value):
 @pytest.mark.xfail(
     condition=not PANDAS_GE_120,
     reason="pandas incorrectly adds nulls with dataframes "
-    "but works fine with scalers",
+    "but works fine with scalars",
 )
 def test_dataframe_setitem_scaler_bool():
     df = pd.DataFrame({"a": [1, 2, 3]})

From 355e1923280692544814a1da1e4f7427ed4143ff Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 24 Feb 2021 15:26:18 -0800
Subject: [PATCH 24/35] remove double validation

---
 python/cudf/cudf/core/column/column.py    | 6 ------
 python/cudf/cudf/core/column/datetime.py  | 6 ------
 python/cudf/cudf/core/column/timedelta.py | 6 ------
 3 files changed, 18 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 28dd521b37c..e69459010be 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -854,12 +854,6 @@ def isin(self, values: Sequence) -> ColumnBase:
         TypeError
             If values is a string
         """
-        if is_scalar(values):
-            raise TypeError(
-                "only list-like objects are allowed to be passed "
-                f"to isin(), you passed a [{type(values).__name__}]"
-            )
-
         lhs = self
         rhs = None
 
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index d32b3c2f8e2..638f339e757 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -337,12 +337,6 @@ def is_unique(self) -> bool:
         return self.as_numerical.is_unique
 
     def isin(self, values: Sequence) -> ColumnBase:
-        if cudf.utils.dtypes.is_scalar(values):
-            raise TypeError(
-                "only list-like objects are allowed to be passed "
-                f"to isin(), you passed a [{type(values).__name__}]"
-            )
-
         lhs = self
         rhs = None
 
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 82ce1f5f7a0..bd3e655bc79 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -369,12 +369,6 @@ def median(self, skipna: bool = None) -> pd.Timedelta:
         )
 
     def isin(self, values: Sequence) -> ColumnBase:
-        if cudf.utils.dtypes.is_scalar(values):
-            raise TypeError(
-                "only list-like objects are allowed to be passed "
-                f"to isin(), you passed a [{type(values).__name__}]"
-            )
-
         lhs = self
         rhs = None
 

From 8d06667fd70da9fe92742fdcc7e6c4bd1c576aec Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 24 Feb 2021 15:48:30 -0800
Subject: [PATCH 25/35] move datetime / duration isin logic to a common utility

---
 python/cudf/cudf/core/column/column.py    |  4 ---
 python/cudf/cudf/core/column/datetime.py  | 20 +----------
 python/cudf/cudf/core/column/timedelta.py | 22 +-----------
 python/cudf/cudf/core/tools/datetimes.py  | 43 ++++++++++++++++++++++-
 4 files changed, 44 insertions(+), 45 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index e69459010be..02cd7407802 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -849,10 +849,6 @@ def isin(self, values: Sequence) -> ColumnBase:
         -------
         result: Column
             Column of booleans indicating if each element is in values.
-        Raises
-        -------
-        TypeError
-            If values is a string
         """
         lhs = self
         rhs = None
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 638f339e757..7c5385b9bbf 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -337,25 +337,7 @@ def is_unique(self) -> bool:
         return self.as_numerical.is_unique
 
     def isin(self, values: Sequence) -> ColumnBase:
-        lhs = self
-        rhs = None
-
-        try:
-            rhs = cudf.core.column.as_column(values)
-
-            if rhs.dtype.kind in {"f", "i", "u"}:
-                return cudf.core.column.full(len(self), False, dtype="bool")
-            rhs = rhs.astype(self.dtype)
-            res = lhs._isin_earlystop(rhs)
-            if res is not None:
-                return res
-        except ValueError:
-            # pandas functionally returns all False when cleansing via
-            # typecasting fails
-            return cudf.core.column.full(len(self), False, dtype="bool")
-
-        res = lhs._obtain_isin_result(rhs)
-        return res
+        return cudf.core.tools.datetimes._isin_datetimelike(self, values)
 
     def can_cast_safely(self, to_dtype: Dtype) -> bool:
         if np.issubdtype(to_dtype, np.datetime64):
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index bd3e655bc79..ac63192b692 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -369,27 +369,7 @@ def median(self, skipna: bool = None) -> pd.Timedelta:
         )
 
     def isin(self, values: Sequence) -> ColumnBase:
-        lhs = self
-        rhs = None
-
-        try:
-            rhs = cudf.core.column.as_column(values)
-
-            if rhs.dtype.kind in {"f", "i", "u"}:
-                return cudf.core.column.full(len(self), False, dtype="bool")
-
-            rhs = rhs.astype(self.dtype)
-            res = lhs._isin_earlystop(rhs)
-            if res is not None:
-                return res
-        except ValueError:
-            # pandas functionally returns all False when cleansing via
-            # typecasting fails
-            return cudf.core.column.full(len(self), False, dtype="bool")
-
-        res = lhs._obtain_isin_result(rhs)
-
-        return res
+        return cudf.core.tools.datetimes._isin_datetimelike(self, values)
 
     def quantile(
         self, q: Union[float, Sequence[float]], interpolation: str, exact: bool
diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py
index 206786fad42..4e5e4ce1987 100644
--- a/python/cudf/cudf/core/tools/datetimes.py
+++ b/python/cudf/cudf/core/tools/datetimes.py
@@ -1,6 +1,7 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 
 import warnings
+from typing import Sequence, Union
 
 import numpy as np
 import pandas as pd
@@ -497,3 +498,43 @@ def __setattr__(self, name, value):
             raise AttributeError("DateOffset objects are immutable.")
         else:
             object.__setattr__(self, name, value)
+
+
+def _isin_datetimelike(
+    lhs: Union[column.TimeDeltaColumn, column.DatetimeColumn], values: Sequence
+) -> column.ColumnBase:
+    """
+    Check whether values are contained in the
+    DateTimeColumn or TimeDeltaColumn.
+
+    Parameters
+    ----------
+    lhs : TimeDeltaColumn or DatetimeColumn
+        Column to check whether the `values` exist in.
+    values : set or list-like
+        The sequence of values to test. Passing in a single string will
+        raise a TypeError. Instead, turn a single string into a list
+        of one element.
+
+    Returns
+    -------
+    result: Column
+        Column of booleans indicating if each element is in values.
+    """
+    rhs = None
+    try:
+        rhs = cudf.core.column.as_column(values)
+
+        if rhs.dtype.kind in {"f", "i", "u"}:
+            return cudf.core.column.full(len(lhs), False, dtype="bool")
+        rhs = rhs.astype(lhs.dtype)
+        res = lhs._isin_earlystop(rhs)
+        if res is not None:
+            return res
+    except ValueError:
+        # pandas functionally returns all False when cleansing via
+        # typecasting fails
+        return cudf.core.column.full(len(lhs), False, dtype="bool")
+
+    res = lhs._obtain_isin_result(rhs)
+    return res

From dd842f311b5f1fa030bfb7af70a381ea6c913165 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Thu, 25 Feb 2021 09:18:42 -0800
Subject: [PATCH 26/35] add atol

---
 python/dask_cudf/dask_cudf/tests/test_core.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/python/dask_cudf/dask_cudf/tests/test_core.py b/python/dask_cudf/dask_cudf/tests/test_core.py
index aebdb9fe5b9..e2b77ba192e 100644
--- a/python/dask_cudf/dask_cudf/tests/test_core.py
+++ b/python/dask_cudf/dask_cudf/tests/test_core.py
@@ -12,10 +12,10 @@
 from dask.dataframe.core import make_meta, meta_nonempty
 from dask.utils import M
 
-import dask_cudf as dgd
-
 import cudf
 
+import dask_cudf as dgd
+
 
 def test_from_cudf():
     np.random.seed(0)
@@ -719,7 +719,9 @@ def test_dataframe_describe():
     ddf = dgd.from_cudf(df, npartitions=4)
     pddf = dd.from_pandas(pdf, npartitions=4)
 
-    dd.assert_eq(ddf.describe(), pddf.describe(), check_exact=False)
+    dd.assert_eq(
+        ddf.describe(), pddf.describe(), check_exact=False, atol=0.0001
+    )
 
 
 def test_index_map_partitions():

From 9fe44cd7ba9272e74c6880c2a39a2f416ab6fca1 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Thu, 25 Feb 2021 09:20:33 -0800
Subject: [PATCH 27/35] rename internal api

---
 python/cudf/cudf/core/dataframe.py        |  4 +--
 python/cudf/cudf/tests/test_dataframe.py  | 18 +++++++-------
 python/cudf/cudf/tests/test_dropna.py     |  2 +-
 python/cudf/cudf/tests/test_duplicates.py |  2 +-
 python/cudf/cudf/tests/test_index.py      |  2 +-
 python/cudf/cudf/tests/test_repr.py       |  4 +--
 python/cudf/cudf/tests/test_rolling.py    |  4 +--
 python/cudf/cudf/tests/test_series.py     |  2 +-
 python/cudf/cudf/tests/test_stats.py      |  6 ++---
 python/cudf/cudf/utils/utils.py           | 30 ++++++++++++++++++++++-
 10 files changed, 51 insertions(+), 23 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index a22fdf65f9f..2afbad2d0e7 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -584,7 +584,7 @@ def deserialize(cls, header, frames):
     @property
     def dtypes(self):
         """Return the dtypes in this object."""
-        return cudf.utils.utils.create_pandas_series(
+        return cudf.utils.utils._create_pandas_series(
             data=[x.dtype for x in self._data.columns], index=self._data.names,
         )
 
@@ -690,7 +690,7 @@ def __getitem__(self, arg):
         elif can_convert_to_column(arg):
             mask = arg
             if is_list_like(mask):
-                mask = cudf.utils.utils.create_pandas_series(data=mask)
+                mask = cudf.utils.utils._create_pandas_series(data=mask)
             if mask.dtype == "bool":
                 return self._apply_boolean_mask(mask)
             else:
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index b72b3338342..a3bad0ab5a6 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -3331,7 +3331,7 @@ def test_all(data):
     # Pandas treats `None` in object type columns as True for some reason, so
     # replacing with `False`
     if np.array(data).ndim <= 1:
-        pdata = cudf.utils.utils.create_pandas_series(data=data).replace(
+        pdata = cudf.utils.utils._create_pandas_series(data=data).replace(
             [None], False
         )
         gdata = cudf.Series.from_pandas(pdata)
@@ -3386,7 +3386,7 @@ def test_all(data):
 @pytest.mark.parametrize("axis", [0, 1])
 def test_any(data, axis):
     if np.array(data).ndim <= 1:
-        pdata = cudf.utils.utils.create_pandas_series(data=data)
+        pdata = cudf.utils.utils._create_pandas_series(data=data)
         gdata = cudf.Series.from_pandas(pdata)
 
         if axis == 1:
@@ -3856,7 +3856,7 @@ def test_create_dataframe_column():
     ],
 )
 def test_series_values_host_property(data):
-    pds = cudf.utils.utils.create_pandas_series(data=data)
+    pds = cudf.utils.utils._create_pandas_series(data=data)
     gds = cudf.Series(data)
 
     np.testing.assert_array_equal(pds.values, gds.values_host)
@@ -3879,7 +3879,7 @@ def test_series_values_host_property(data):
     ],
 )
 def test_series_values_property(data):
-    pds = cudf.utils.utils.create_pandas_series(data=data)
+    pds = cudf.utils.utils._create_pandas_series(data=data)
     gds = cudf.Series(data)
     gds_vals = gds.values
     assert isinstance(gds_vals, cupy.ndarray)
@@ -3987,7 +3987,7 @@ def test_value_counts():
 )
 def test_isin_numeric(data, values):
     index = np.random.randint(0, 100, len(data))
-    psr = cudf.utils.utils.create_pandas_series(data=data, index=index)
+    psr = cudf.utils.utils._create_pandas_series(data=data, index=index)
     gsr = cudf.Series.from_pandas(psr, nan_as_null=False)
 
     expected = psr.isin(values)
@@ -4041,7 +4041,7 @@ def test_isin_numeric(data, values):
     ],
 )
 def test_isin_datetime(data, values):
-    psr = cudf.utils.utils.create_pandas_series(data=data)
+    psr = cudf.utils.utils._create_pandas_series(data=data)
     gsr = cudf.Series.from_pandas(psr)
 
     got = gsr.isin(values)
@@ -4077,7 +4077,7 @@ def test_isin_datetime(data, values):
     ],
 )
 def test_isin_string(data, values):
-    psr = cudf.utils.utils.create_pandas_series(data=data)
+    psr = cudf.utils.utils._create_pandas_series(data=data)
     gsr = cudf.Series.from_pandas(psr)
 
     got = gsr.isin(values)
@@ -4106,7 +4106,7 @@ def test_isin_string(data, values):
     ],
 )
 def test_isin_categorical(data, values):
-    psr = cudf.utils.utils.create_pandas_series(data=data)
+    psr = cudf.utils.utils._create_pandas_series(data=data)
     gsr = cudf.Series.from_pandas(psr)
 
     got = gsr.isin(values)
@@ -4140,7 +4140,7 @@ def test_isin_categorical(data, values):
     ],
 )
 def test_isin_index(data, values):
-    psr = cudf.utils.utils.create_pandas_series(data=data)
+    psr = cudf.utils.utils._create_pandas_series(data=data)
     gsr = cudf.Series.from_pandas(psr)
 
     got = gsr.index.isin(values)
diff --git a/python/cudf/cudf/tests/test_dropna.py b/python/cudf/cudf/tests/test_dropna.py
index b354f6b2f8a..d01627309d6 100644
--- a/python/cudf/cudf/tests/test_dropna.py
+++ b/python/cudf/cudf/tests/test_dropna.py
@@ -22,7 +22,7 @@
 @pytest.mark.parametrize("inplace", [True, False])
 def test_dropna_series(data, nulls, inplace):
 
-    psr = cudf.utils.utils.create_pandas_series(data=data)
+    psr = cudf.utils.utils._create_pandas_series(data=data)
 
     if len(data) > 0:
         if nulls == "one":
diff --git a/python/cudf/cudf/tests/test_duplicates.py b/python/cudf/cudf/tests/test_duplicates.py
index d429f658451..f721b7a28e5 100644
--- a/python/cudf/cudf/tests/test_duplicates.py
+++ b/python/cudf/cudf/tests/test_duplicates.py
@@ -59,7 +59,7 @@ def test_duplicated_with_misspelled_column_name(subset):
     ],
 )
 def test_drop_duplicates_series(data, keep):
-    pds = cudf.utils.utils.create_pandas_series(data)
+    pds = cudf.utils.utils._create_pandas_series(data)
     gds = cudf.from_pandas(pds)
 
     assert_df(pds.drop_duplicates(keep=keep), gds.drop_duplicates(keep=keep))
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index af25b48dd23..688efef555b 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -992,7 +992,7 @@ def test_index_equal_misc(data, other):
     assert_eq(expected, actual)
 
     expected = pd_data.equals(
-        cudf.utils.utils.create_pandas_series(data=pd_other)
+        cudf.utils.utils._create_pandas_series(data=pd_other)
     )
     actual = gd_data.equals(cudf.Series(gd_other))
     assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py
index 9cf8b3ac239..96cd3d23b57 100644
--- a/python/cudf/cudf/tests/test_repr.py
+++ b/python/cudf/cudf/tests/test_repr.py
@@ -159,7 +159,7 @@ def test_integer_dataframe(x):
 @settings(deadline=None)
 def test_integer_series(x):
     sr = cudf.Series(x)
-    ps = cudf.utils.utils.create_pandas_series(data=x)
+    ps = cudf.utils.utils._create_pandas_series(data=x)
 
     assert sr.__repr__() == ps.__repr__()
 
@@ -176,7 +176,7 @@ def test_float_dataframe(x):
 @settings(deadline=None)
 def test_float_series(x):
     sr = cudf.Series(x, nan_as_null=False)
-    ps = cudf.utils.utils.create_pandas_series(data=x)
+    ps = cudf.utils.utils._create_pandas_series(data=x)
     assert sr.__repr__() == ps.__repr__()
 
 
diff --git a/python/cudf/cudf/tests/test_rolling.py b/python/cudf/cudf/tests/test_rolling.py
index 27236910ebb..fcc5591adda 100644
--- a/python/cudf/cudf/tests/test_rolling.py
+++ b/python/cudf/cudf/tests/test_rolling.py
@@ -39,7 +39,7 @@ def test_rolling_series_basic(data, index, agg, nulls, center):
         elif nulls == "all":
             data = [np.nan] * len(data)
 
-    psr = cudf.utils.utils.create_pandas_series(data=data, index=index)
+    psr = cudf.utils.utils._create_pandas_series(data=data, index=index)
     gsr = cudf.Series(psr)
     for window_size in range(1, len(data) + 1):
         for min_periods in range(1, window_size + 1):
@@ -214,7 +214,7 @@ def test_rolling_getitem_window():
 @pytest.mark.parametrize("center", [True, False])
 def test_rollling_series_numba_udf_basic(data, index, center):
 
-    psr = cudf.utils.utils.create_pandas_series(data=data, index=index)
+    psr = cudf.utils.utils._create_pandas_series(data=data, index=index)
     gsr = cudf.from_pandas(psr)
 
     def some_func(A):
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index d62942c2364..ab9d3d91f73 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -384,7 +384,7 @@ def test_series_tolist(data):
     [[], [None, None], ["a"], ["a", "b", "c"] * 500, [1.0, 2.0, 0.3] * 57],
 )
 def test_series_size(data):
-    psr = cudf.utils.utils.create_pandas_series(data=data)
+    psr = cudf.utils.utils._create_pandas_series(data=data)
     gsr = cudf.Series(data)
 
     assert_eq(psr.size, gsr.size)
diff --git a/python/cudf/cudf/tests/test_stats.py b/python/cudf/cudf/tests/test_stats.py
index 1eae8ddbf1e..4e07c974280 100644
--- a/python/cudf/cudf/tests/test_stats.py
+++ b/python/cudf/cudf/tests/test_stats.py
@@ -203,7 +203,7 @@ def test_approx_quantiles_int():
 @pytest.mark.parametrize("q", [[], 0.5, 1, 0.234, [0.345], [0.243, 0.5, 1]])
 def test_misc_quantiles(data, q):
 
-    pdf_series = cudf.utils.utils.create_pandas_series(data=data)
+    pdf_series = cudf.utils.utils._create_pandas_series(data=data)
     gdf_series = cudf.Series(data)
 
     expected = pdf_series.quantile(q)
@@ -433,13 +433,13 @@ def test_df_corr():
 )
 @pytest.mark.parametrize("skipna", [True, False, None])
 def test_nans_stats(data, ops, skipna):
-    psr = cudf.utils.utils.create_pandas_series(data=data)
+    psr = cudf.utils.utils._create_pandas_series(data=data)
     gsr = cudf.Series(data)
     assert_eq(
         getattr(psr, ops)(skipna=skipna), getattr(gsr, ops)(skipna=skipna)
     )
 
-    psr = cudf.utils.utils.create_pandas_series(data=data)
+    psr = cudf.utils.utils._create_pandas_series(data=data)
     gsr = cudf.Series(data, nan_as_null=False)
     # Since there is no concept of `nan_as_null` in pandas,
     # nulls will be returned in the operations. So only
diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py
index b0a1aff4ada..e8b8c53312a 100644
--- a/python/cudf/cudf/utils/utils.py
+++ b/python/cudf/cudf/utils/utils.py
@@ -625,9 +625,37 @@ def _categorical_scalar_broadcast_to(cat_scalar, size):
     )
 
 
-def create_pandas_series(
+def _create_pandas_series(
     data=None, index=None, dtype=None, name=None, copy=False, fastpath=False
 ):
+    """
+    Wrapper to create a Pandas Series. If the length of data is 0 and
+    dtype is not passed, this wrapper defaults the dtype to `float64`.
+
+    Parameters
+    ----------
+    data : array-like, Iterable, dict, or scalar value
+        Contains data stored in Series. If data is a dict, argument
+        order is maintained.
+    index : array-like or Index (1d)
+        Values must be hashable and have the same length as data.
+        Non-unique index values are allowed. Will default to
+        RangeIndex (0, 1, 2, …, n) if not provided.
+        If data is dict-like and index is None, then the keys
+        in the data are used as the index. If the index is not None,
+        the resulting Series is reindexed with the index values.
+    dtype : str, numpy.dtype, or ExtensionDtype, optional
+        Data type for the output Series. If not specified, this
+        will be inferred from data. See the user guide for more usages.
+    name : str, optional
+        The name to give to the Series.
+    copy : bool, default False
+        Copy input data.
+
+    Returns
+    -------
+    pd.Series
+    """
     if (data is None or len(data) == 0) and dtype is None:
         dtype = "float64"
     return pd.Series(

From da1a3a3dfc8fab3c942dcad015cb70175cb1b853 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Thu, 25 Feb 2021 17:01:11 -0800
Subject: [PATCH 28/35] fix categorical setitem and allow np.nan into
 categories

---
 python/cudf/cudf/core/column/categorical.py | 41 +++++++++++----
 python/cudf/cudf/core/column/column.py      | 12 +++--
 python/cudf/cudf/core/index.py              | 15 +++++-
 python/cudf/cudf/core/indexing.py           |  6 ++-
 python/cudf/cudf/core/series.py             | 10 +++-
 python/cudf/cudf/tests/test_categorical.py  | 16 ++++++
 python/cudf/cudf/tests/test_repr.py         | 56 +++++++++++++++++++++
 7 files changed, 138 insertions(+), 18 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 0649f82256e..7a8a7d371f7 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -946,12 +946,14 @@ def unary_operator(self, unaryop: str):
         )
 
     def __setitem__(self, key, value):
-        to_add_categories = cudf.Index(value).difference(self.categories)
+        if cudf.utils.dtypes.is_scalar(
+            value
+        ) and cudf._lib.scalar._is_null_host_scalar(value):
+            to_add_categories = []
+        else:
+            to_add_categories = cudf.Index(value).difference(self.categories)
 
-        if (
-            len(to_add_categories)
-            and not to_add_categories.isna()._values.all()
-        ):
+        if len(to_add_categories):
             raise ValueError(
                 "Cannot setitem on a Categorical with a new "
                 "category, set the categories first"
@@ -1067,11 +1069,18 @@ def __cuda_array_interface__(self) -> Mapping[str, Any]:
     def to_pandas(
         self, index: ColumnLike = None, nullable: bool = False, **kwargs
     ) -> pd.Series:
-        signed_dtype = min_signed_type(len(self.categories))
-        codes = self.cat().codes.astype(signed_dtype).fillna(-1).to_array()
-        categories = self.categories.to_pandas()
+
+        if self.categories.isnull().any():
+            col = self.copy(deep=True)
+            col[col.isnull()] = None
+        else:
+            col = self
+
+        signed_dtype = min_signed_type(len(col.categories))
+        codes = col.cat().codes.astype(signed_dtype).fillna(-1).to_array()
+        categories = col.categories.dropna(drop_nan=True).to_pandas()
         data = pd.Categorical.from_codes(
-            codes, categories=categories, ordered=self.ordered
+            codes, categories=categories, ordered=col.ordered
         )
         return pd.Series(data, index=index)
 
@@ -1201,6 +1210,20 @@ def find_and_replace(
             ordered=self.dtype.ordered,
         )
 
+    def isnull(self) -> ColumnBase:
+        """Identify missing values in a Column.
+        """
+        result = libcudf.unary.is_null(self)
+
+        if self.categories.dtype.kind == "f":
+            # Need to consider `np.nan` values incase
+            # of a float column
+            result = result | libcudf.unary.is_nan(
+                self.astype(self.categories.dtype)
+            )
+
+        return result
+
     def fillna(
         self, fill_value: Any = None, method: Any = None, dtype: Dtype = None
     ) -> CategoricalColumn:
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 02cd7407802..1bad2c3a451 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1063,14 +1063,14 @@ def as_categorical_column(self, dtype, **kwargs) -> ColumnBase:
 
         # columns include null index in factorization; remove:
         if self.has_nulls:
-            cats = cats.dropna()
+            cats = cats._column.dropna(drop_nan=False)
             min_type = min_unsigned_type(len(cats), 8)
             labels = labels - 1
             if np.dtype(min_type).itemsize < labels.dtype.itemsize:
                 labels = labels.astype(min_type)
 
         return build_categorical_column(
-            categories=cats._column,
+            categories=cats,
             codes=labels._column,
             mask=self.mask,
             ordered=ordered,
@@ -2077,9 +2077,11 @@ def _construct_array(
         arbitrary = cupy.asarray(arbitrary, dtype=dtype)
     except (TypeError, ValueError):
         native_dtype = dtype
-        if dtype is None and pd.api.types.infer_dtype(arbitrary) in (
-            "mixed",
-            "mixed-integer",
+        if (
+            dtype is None
+            and not cudf._lib.scalar._is_null_host_scalar(arbitrary)
+            and pd.api.types.infer_dtype(arbitrary)
+            in ("mixed", "mixed-integer",)
         ):
             native_dtype = "object"
         arbitrary = np.asarray(
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index e3899a403f1..88f3f8c4c89 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1993,7 +1993,20 @@ def __repr__(self):
         # utilize `Index.to_string` once it is implemented
         # related issue : https://github.com/pandas-dev/pandas/issues/35389
         if isinstance(preprocess, CategoricalIndex):
-            output = preprocess.to_pandas().__repr__()
+            if preprocess.categories.dtype.kind == "f":
+                output = (
+                    preprocess.astype("str")
+                    .to_pandas()
+                    .astype("category")
+                    .__repr__()
+                )
+                break_idx = output.find("ordered=")
+                output = (
+                    output[:break_idx].replace("'", "") + output[break_idx:]
+                )
+            else:
+                output = preprocess.to_pandas().__repr__()
+
             output = output.replace("nan", cudf._NA_REP)
         elif preprocess._values.nullable:
             output = self._clean_nulls_from_index().to_pandas().__repr__()
diff --git a/python/cudf/cudf/core/indexing.py b/python/cudf/cudf/core/indexing.py
index 4d685408df3..cf372286b7e 100755
--- a/python/cudf/cudf/core/indexing.py
+++ b/python/cudf/cudf/core/indexing.py
@@ -95,8 +95,10 @@ def __setitem__(self, key, value):
         else:
             value = column.as_column(value)
 
-        if hasattr(value, "dtype") and pd.api.types.is_numeric_dtype(
-            value.dtype
+        if (
+            not is_categorical_dtype(self._sr._column.dtype)
+            and hasattr(value, "dtype")
+            and pd.api.types.is_numeric_dtype(value.dtype)
         ):
             # normalize types if necessary:
             if not pd.api.types.is_integer(key):
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 72e468002db..86045397d46 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1070,7 +1070,13 @@ def __repr__(self):
                 else get_option("display.min_rows")
             )
             show_dimensions = get_option("display.show_dimensions")
-            output = preprocess.to_pandas().to_string(
+            if preprocess._column.categories.dtype.kind == "f":
+                pd_series = (
+                    preprocess.astype("str").to_pandas().astype("category")
+                )
+            else:
+                pd_series = preprocess.to_pandas()
+            output = pd_series.to_string(
                 name=self.name,
                 dtype=self.dtype,
                 min_rows=min_rows,
@@ -1085,6 +1091,8 @@ def __repr__(self):
 
         if isinstance(preprocess._column, cudf.core.column.CategoricalColumn):
             category_memory = lines[-1]
+            if preprocess._column.categories.dtype.kind == "f":
+                category_memory = category_memory.replace("'", "")
             lines = lines[:-1]
         if len(lines) > 1:
             if lines[-1].startswith("Name: "):
diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py
index 9779fb786f6..164e72048a7 100644
--- a/python/cudf/cudf/tests/test_categorical.py
+++ b/python/cudf/cudf/tests/test_categorical.py
@@ -762,3 +762,19 @@ def test_categorical_assignment(data, cat_dtype):
     pd_df.assign(cat_col=pd_categorical)
     cd_df.assign(cat_col=pd_categorical)
     assert_eq(pd_df, cd_df)
+
+
+def test_categorical_allow_nan():
+    gs = cudf.Series([1, 2, np.nan, 10, np.nan, None], nan_as_null=False)
+    gs = gs.astype("category")
+    expected_codes = cudf.Series([0, 1, 3, 2, 3, None], dtype="uint8")
+    assert_eq(expected_codes, gs.cat.codes)
+
+    expected_categories = cudf.Index([1.0, 2.0, 10.0, np.nan], dtype="float64")
+    assert_eq(expected_categories, gs.cat.categories)
+
+    actual_ps = gs.to_pandas()
+    expected_ps = pd.Series(
+        [1.0, 2.0, np.nan, 10.0, np.nan, np.nan], dtype="category"
+    )
+    assert_eq(actual_ps, expected_ps)
diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py
index 96cd3d23b57..729ee60a82a 100644
--- a/python/cudf/cudf/tests/test_repr.py
+++ b/python/cudf/cudf/tests/test_repr.py
@@ -1417,3 +1417,59 @@ def test_mulitIndex_null_repr(gdi, expected_repr):
     actual_repr = gdi.__repr__()
 
     assert actual_repr.split() == expected_repr.split()
+
+
+def test_categorical_series_with_nan_repr():
+    series = cudf.Series(
+        [1, 2, np.nan, 10, np.nan, None], nan_as_null=False
+    ).astype("category")
+
+    expected_repr = textwrap.dedent(
+        """
+    0     1.0
+    1     2.0
+    2     NaN
+    3    10.0
+    4     NaN
+    5    <NA>
+    dtype: category
+    Categories (4, object): [1.0, 10.0, 2.0, NaN]
+    """
+    )
+
+    assert series.__repr__().split() == expected_repr.split()
+
+
+def test_categorical_dataframe_with_nan_repr():
+    series = cudf.Series(
+        [1, 2, np.nan, 10, np.nan, None], nan_as_null=False
+    ).astype("category")
+    df = cudf.DataFrame({"a": series})
+    expected_repr = textwrap.dedent(
+        """
+          a
+    0   1.0
+    1   2.0
+    2   NaN
+    3  10.0
+    4   NaN
+    5  <NA>
+    """
+    )
+
+    assert df.__repr__().split() == expected_repr.split()
+
+
+def test_categorical_index_with_nan_repr():
+    cat_index = cudf.Index(
+        cudf.Series(
+            [1, 2, np.nan, 10, np.nan, None], nan_as_null=False
+        ).astype("category")
+    )
+
+    expected_repr = (
+        "CategoricalIndex([1.0, 2.0, NaN, 10.0, NaN, <NA>], "
+        "categories=[1.0, 10.0, 2.0, NaN], ordered=False, dtype='category')"
+    )
+
+    assert cat_index.__repr__() == expected_repr

From e70686f549507f7bfab7315c58ca58e37ccefa88 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Thu, 25 Feb 2021 17:10:15 -0800
Subject: [PATCH 29/35] add nan setitem test

---
 python/cudf/cudf/tests/test_categorical.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py
index 164e72048a7..a117c15f14d 100644
--- a/python/cudf/cudf/tests/test_categorical.py
+++ b/python/cudf/cudf/tests/test_categorical.py
@@ -778,3 +778,15 @@ def test_categorical_allow_nan():
         [1.0, 2.0, np.nan, 10.0, np.nan, np.nan], dtype="category"
     )
     assert_eq(actual_ps, expected_ps)
+
+
+def test_categorical_setitem_with_nan():
+    gs = cudf.Series(
+        [1, 2, np.nan, 10, np.nan, None], nan_as_null=False
+    ).astype("category")
+    gs[[1, 3]] = np.nan
+
+    expected_series = cudf.Series(
+        [1, np.nan, np.nan, np.nan, np.nan, None], nan_as_null=False
+    ).astype(gs.dtype)
+    assert_eq(gs, expected_series)

From 39ba07a3996ebc48c816d7f62f331dd5a4025874 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Thu, 25 Feb 2021 20:52:40 -0800
Subject: [PATCH 30/35] make null checks and to_pandas code flow more effecient

---
 python/cudf/cudf/core/column/categorical.py | 51 ++++++++++++++++-----
 1 file changed, 40 insertions(+), 11 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 7a8a7d371f7..dc59727187c 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -949,11 +949,13 @@ def __setitem__(self, key, value):
         if cudf.utils.dtypes.is_scalar(
             value
         ) and cudf._lib.scalar._is_null_host_scalar(value):
-            to_add_categories = []
+            to_add_categories = 0
         else:
-            to_add_categories = cudf.Index(value).difference(self.categories)
+            to_add_categories = len(
+                cudf.Index(value).difference(self.categories)
+            )
 
-        if len(to_add_categories):
+        if to_add_categories > 0:
             raise ValueError(
                 "Cannot setitem on a Categorical with a new "
                 "category, set the categories first"
@@ -1070,9 +1072,18 @@ def to_pandas(
         self, index: ColumnLike = None, nullable: bool = False, **kwargs
     ) -> pd.Series:
 
-        if self.categories.isnull().any():
-            col = self.copy(deep=True)
-            col[col.isnull()] = None
+        if self.categories.dtype.kind == "f":
+            new_mask = bools_to_mask(self.notnull())
+            col = column.build_categorical_column(
+                categories=self.dtype.categories._values,
+                codes=column.as_column(
+                    self.codes.base_data, dtype=self.codes.dtype
+                ),
+                mask=new_mask,
+                ordered=self.dtype.ordered,
+                offset=self.offset,
+                size=self.size,
+            )
         else:
             col = self
 
@@ -1211,16 +1222,34 @@ def find_and_replace(
         )
 
     def isnull(self) -> ColumnBase:
-        """Identify missing values in a Column.
+        """
+        Identify missing values in a CategoricalColumn.
         """
         result = libcudf.unary.is_null(self)
 
         if self.categories.dtype.kind == "f":
             # Need to consider `np.nan` values incase
-            # of a float column
-            result = result | libcudf.unary.is_nan(
-                self.astype(self.categories.dtype)
-            )
+            # of an underlying float column
+            categories = libcudf.unary.is_nan(self.categories)
+            if categories.any():
+                code = self._encode(np.nan)
+                result = result | (self.codes == cudf.Scalar(code))
+
+        return result
+
+    def notnull(self) -> ColumnBase:
+        """
+        Identify non-missing values in a CategoricalColumn.
+        """
+        result = libcudf.unary.is_valid(self)
+
+        if self.categories.dtype.kind == "f":
+            # Need to consider `np.nan` values incase
+            # of an underlying float column
+            categories = libcudf.unary.is_nan(self.categories)
+            if categories.any():
+                code = self._encode(np.nan)
+                result = result & (self.codes != cudf.Scalar(code))
 
         return result
 

From 2cc496dc1a0b43e20bb7a8943ad5939db7cc576c Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Fri, 26 Feb 2021 01:09:53 -0800
Subject: [PATCH 31/35] fix repr

---
 python/cudf/cudf/core/column/categorical.py | 9 +++------
 python/cudf/cudf/core/dtypes.py             | 5 ++++-
 python/cudf/cudf/core/series.py             | 9 ++++++++-
 python/cudf/cudf/tests/test_repr.py         | 2 +-
 4 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index dc59727187c..c41a458f02b 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1075,14 +1075,11 @@ def to_pandas(
         if self.categories.dtype.kind == "f":
             new_mask = bools_to_mask(self.notnull())
             col = column.build_categorical_column(
-                categories=self.dtype.categories._values,
-                codes=column.as_column(
-                    self.codes.base_data, dtype=self.codes.dtype
-                ),
+                categories=self.categories,
+                codes=column.as_column(self.codes, dtype=self.codes.dtype),
                 mask=new_mask,
                 ordered=self.dtype.ordered,
-                offset=self.offset,
-                size=self.size,
+                size=self.codes.size,
             )
         else:
             col = self
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index f11f3692faf..2205c1821cb 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -56,7 +56,10 @@ def to_pandas(self) -> pd.CategoricalDtype:
         if self.categories is None:
             categories = None
         else:
-            categories = self.categories.to_pandas()
+            if self._categories.dtype.kind == "f":
+                categories = self.categories.dropna().to_pandas()
+            else:
+                categories = self.categories.to_pandas()
         return pd.CategoricalDtype(categories=categories, ordered=self.ordered)
 
     def _init_categories(self, categories: Any):
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 86045397d46..be03fb147ff 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1092,7 +1092,14 @@ def __repr__(self):
         if isinstance(preprocess._column, cudf.core.column.CategoricalColumn):
             category_memory = lines[-1]
             if preprocess._column.categories.dtype.kind == "f":
-                category_memory = category_memory.replace("'", "")
+                category_memory = category_memory.replace("'", "").split(": ")
+                category_memory = (
+                    category_memory[0].replace(
+                        "object", preprocess._column.categories.dtype.name
+                    )
+                    + ": "
+                    + category_memory[1]
+                )
             lines = lines[:-1]
         if len(lines) > 1:
             if lines[-1].startswith("Name: "):
diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py
index 729ee60a82a..66e09f61869 100644
--- a/python/cudf/cudf/tests/test_repr.py
+++ b/python/cudf/cudf/tests/test_repr.py
@@ -1433,7 +1433,7 @@ def test_categorical_series_with_nan_repr():
     4     NaN
     5    <NA>
     dtype: category
-    Categories (4, object): [1.0, 10.0, 2.0, NaN]
+    Categories (4, float64): [1.0, 10.0, 2.0, NaN]
     """
     )
 

From 0bd3bba26303f8eb94ddcb467f8b8c47bdfeac29 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Fri, 26 Feb 2021 01:18:33 -0800
Subject: [PATCH 32/35] fix typo

---
 python/cudf/cudf/core/dtypes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 2205c1821cb..78437f73b1a 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -53,7 +53,7 @@ def from_pandas(cls, dtype: pd.CategoricalDtype) -> "CategoricalDtype":
         )
 
     def to_pandas(self) -> pd.CategoricalDtype:
-        if self.categories is None:
+        if self._categories is None:
             categories = None
         else:
             if self._categories.dtype.kind == "f":

From 3d44f5f0f3e76ada04508953cc6a37916a733a49 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Fri, 26 Feb 2021 01:21:25 -0800
Subject: [PATCH 33/35] fix typo

---
 python/cudf/cudf/core/dtypes.py | 4 ++--
 python/cudf/cudf/core/index.py  | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 78437f73b1a..218cc457d7d 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -53,10 +53,10 @@ def from_pandas(cls, dtype: pd.CategoricalDtype) -> "CategoricalDtype":
         )
 
     def to_pandas(self) -> pd.CategoricalDtype:
-        if self._categories is None:
+        if self.categories is None:
             categories = None
         else:
-            if self._categories.dtype.kind == "f":
+            if self.categories.dtype.kind == "f":
                 categories = self.categories.dropna().to_pandas()
             else:
                 categories = self.categories.to_pandas()
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 88f3f8c4c89..b9bdb70de1b 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -137,6 +137,10 @@ def __init__(
     def _values(self) -> ColumnBase:
         raise NotImplementedError
 
+    @property
+    def dtype(self):
+        raise NotImplementedError
+
     def __getitem__(self, key):
         raise NotImplementedError()
 

From c1c2d96f927af0f1081ac2925ad2d53203bb2161 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Fri, 26 Feb 2021 02:26:13 -0800
Subject: [PATCH 34/35] update index code

---
 python/cudf/cudf/core/dtypes.py | 4 +++-
 python/cudf/cudf/core/index.py  | 4 ----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 218cc457d7d..8b7d54b6715 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -56,7 +56,9 @@ def to_pandas(self) -> pd.CategoricalDtype:
         if self.categories is None:
             categories = None
         else:
-            if self.categories.dtype.kind == "f":
+            if isinstance(
+                self.categories, (cudf.Float32Index, cudf.Float64Index)
+            ):
                 categories = self.categories.dropna().to_pandas()
             else:
                 categories = self.categories.to_pandas()
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index b9bdb70de1b..88f3f8c4c89 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -137,10 +137,6 @@ def __init__(
     def _values(self) -> ColumnBase:
         raise NotImplementedError
 
-    @property
-    def dtype(self):
-        raise NotImplementedError
-
     def __getitem__(self, key):
         raise NotImplementedError()
 

From ae1b8c688cd471c40eff60d366588795bfd101f3 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Fri, 26 Feb 2021 11:53:07 -0800
Subject: [PATCH 35/35] add packaging conda install

---
 conda/recipes/cudf/meta.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 9afc7094f27..21eb017eb23 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -45,6 +45,7 @@ requirements:
     - fsspec>=0.6.0
     - {{ pin_compatible('cudatoolkit', max_pin='x.x') }}
     - nvtx >=0.2.1
+    - packaging
 
 test:
   requires: