From 19afce088d11b5ae8daeb0e9e37c6849c9522b82 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 12 Aug 2021 15:52:48 -0700
Subject: [PATCH 01/28] Change inheritance structure for Index types and get
 all index/multiindex tests passing.

---
 python/cudf/cudf/_typing.py                 |   1 +
 python/cudf/cudf/api/types.py               |   2 +-
 python/cudf/cudf/core/column/categorical.py |   4 +-
 python/cudf/cudf/core/column/methods.py     |   2 +-
 python/cudf/cudf/core/frame.py              |  15 +-
 python/cudf/cudf/core/index.py              | 222 +++++++++-----------
 python/cudf/cudf/core/multiindex.py         |  78 +------
 python/cudf/cudf/tests/test_index.py        |   4 +-
 8 files changed, 128 insertions(+), 200 deletions(-)

diff --git a/python/cudf/cudf/_typing.py b/python/cudf/cudf/_typing.py
index 7eb0c7bdce4..f9d04b5ab54 100644
--- a/python/cudf/cudf/_typing.py
+++ b/python/cudf/cudf/_typing.py
@@ -29,3 +29,4 @@
 
 DataFrameOrSeries = Union["cudf.Series", "cudf.DataFrame"]
 SeriesOrIndex = Union["cudf.Series", "cudf.core.index.BaseIndex"]
+SeriesOrSingleColumnIndex = Union["cudf.Series", "cudf.core.index.Generic"]
diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py
index bf296e11178..10bbb620715 100644
--- a/python/cudf/cudf/api/types.py
+++ b/python/cudf/cudf/api/types.py
@@ -194,7 +194,7 @@ def wrapped_func(obj):
 
 
 def _union_categoricals(
-    to_union: List[Union[cudf.Series, cudf.Index]],
+    to_union: List[Union[cudf.Series, cudf.CategoricalIndex]],
     sort_categories: bool = False,
     ignore_order: bool = False,
 ):
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 7333ae119cd..76dd0683a5a 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -37,7 +37,7 @@
 )
 
 if TYPE_CHECKING:
-    from cudf._typing import SeriesOrIndex
+    from cudf._typing import SeriesOrIndex, SeriesOrSingleColumnIndex
     from cudf.core.column import (
         ColumnBase,
         DatetimeColumn,
@@ -104,7 +104,7 @@ class CategoricalAccessor(ColumnMethods):
 
     _column: CategoricalColumn
 
-    def __init__(self, parent: SeriesOrIndex):
+    def __init__(self, parent: SeriesOrSingleColumnIndex):
         if not is_categorical_dtype(parent.dtype):
             raise AttributeError(
                 "Can only use .cat accessor with a 'category' dtype"
diff --git a/python/cudf/cudf/core/column/methods.py b/python/cudf/cudf/core/column/methods.py
index a587c58a49d..9bea94cfecb 100644
--- a/python/cudf/cudf/core/column/methods.py
+++ b/python/cudf/cudf/core/column/methods.py
@@ -8,7 +8,7 @@
 
 import cudf
 
-ParentType = Union["cudf.Series", "cudf.BaseIndex"]
+ParentType = Union["cudf.Series", "cudf.core.index.GenericIndex"]
 
 
 class ColumnMethods:
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index a80ee95dc34..1fd8e3ea751 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -6,7 +6,16 @@
 import functools
 import warnings
 from collections import abc
-from typing import Any, Dict, MutableMapping, Optional, Tuple, TypeVar, Union
+from typing import (
+    Any,
+    Dict,
+    MutableMapping,
+    Optional,
+    Tuple,
+    TypeVar,
+    Union,
+    cast,
+)
 
 import cupy
 import numpy as np
@@ -2352,7 +2361,9 @@ def _copy_type_metadata(
                 ) and not isinstance(
                     self._index, cudf.core.index.CategoricalIndex
                 ):
-                    self._index = cudf.Index(self._index._column)
+                    self._index = cudf.Index(
+                        cast(cudf.core.Index.NumericIndex, self._index)._column
+                    )
 
         return self
 
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 6be21ce74d2..4a7563325fb 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -64,7 +64,7 @@
 from cudf.utils.utils import cached_property, search_range
 
 
-class BaseIndex(SingleColumnFrame, Serializable):
+class BaseIndex(Serializable):
     """Base class for all cudf Index types."""
 
     dtype: DtypeObj
@@ -81,6 +81,35 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
     def _values(self) -> ColumnBase:
         raise NotImplementedError
 
+    @property
+    def values(self):
+        return self._values.values
+
+    def __iter__(self):
+        """
+        Iterating over a GPU object is not effecient and hence not supported.
+
+        Consider using ``.to_arrow()``, ``.to_pandas()`` or ``.values_host``
+        if you wish to iterate over the values.
+        """
+        cudf.utils.utils.raise_iteration_error(obj=self)
+
+    def _copy_type_metadata(
+        self, other: BaseIndex, include_index: bool = True
+    ) -> BaseIndex:
+        """
+        Copy type metadata from each column of `other` to the corresponding
+        column of `self`.
+        See `ColumnBase._with_type_metadata` for more information.
+        """
+        for name, col, other_col in zip(
+            self._data.keys(), self._data.values(), other._data.values()
+        ):
+            self._data.set_by_label(
+                name, col._with_type_metadata(other_col.dtype), validate=False
+            )
+        return self
+
     def __getitem__(self, key):
         raise NotImplementedError()
 
@@ -128,36 +157,6 @@ def serialize(self):
     def __contains__(self, item):
         return item in self._values
 
-    @annotate("INDEX_EQUALS", color="green", domain="cudf_python")
-    def equals(self, other, **kwargs):
-        """
-        Determine if two Index objects contain the same elements.
-
-        Returns
-        -------
-        out: bool
-            True if “other” is an Index and it has the same elements
-            as calling index; False otherwise.
-        """
-        if not isinstance(other, BaseIndex):
-            return False
-
-        check_types = False
-
-        self_is_categorical = isinstance(self, CategoricalIndex)
-        other_is_categorical = isinstance(other, CategoricalIndex)
-        if self_is_categorical and not other_is_categorical:
-            other = other.astype(self.dtype)
-            check_types = True
-        elif other_is_categorical and not self_is_categorical:
-            self = self.astype(other.dtype)
-            check_types = True
-
-        try:
-            return super().equals(other, check_types=check_types)
-        except TypeError:
-            return False
-
     def get_level_values(self, level):
         """
         Return an Index of values for requested level.
@@ -233,51 +232,6 @@ def names(self, values):
 
         self.name = values[0]
 
-    def dropna(self, how="any"):
-        """
-        Return an Index with null values removed.
-
-        Parameters
-        ----------
-            how : {‘any’, ‘all’}, default ‘any’
-                If the Index is a MultiIndex, drop the value when any or
-                all levels are NaN.
-
-        Returns
-        -------
-        valid : Index
-
-        Examples
-        --------
-        >>> import cudf
-        >>> index = cudf.Index(['a', None, 'b', 'c'])
-        >>> index
-        StringIndex(['a' None 'b' 'c'], dtype='object')
-        >>> index.dropna()
-        StringIndex(['a' 'b' 'c'], dtype='object')
-
-        Using `dropna` on a `MultiIndex`:
-
-        >>> midx = cudf.MultiIndex(
-        ...         levels=[[1, None, 4, None], [1, 2, 5]],
-        ...         codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-        ...         names=["x", "y"],
-        ...     )
-        >>> midx
-        MultiIndex([(   1, 1),
-                    (   1, 5),
-                    (<NA>, 2),
-                    (   4, 2),
-                    (<NA>, 1)],
-                   names=['x', 'y'])
-        >>> midx.dropna()
-        MultiIndex([(1, 1),
-                    (1, 5),
-                    (4, 2)],
-                   names=['x', 'y'])
-        """
-        return super().dropna(how=how)
-
     def _clean_nulls_from_index(self):
         """
         Convert all na values(if any) in Index object
@@ -1052,36 +1006,6 @@ def isin(self, values):
 
         return self._values.isin(values).values
 
-    def where(self, cond, other=None):
-        """
-        Replace values where the condition is False.
-
-        Parameters
-        ----------
-        cond : bool array-like with the same length as self
-            Where cond is True, keep the original value.
-            Where False, replace with corresponding value from other.
-            Callables are not supported.
-        other: scalar, or array-like
-            Entries where cond is False are replaced with
-            corresponding value from other. Callables are not
-            supported. Default is None.
-
-        Returns
-        -------
-        Same type as caller
-
-        Examples
-        --------
-        >>> import cudf
-        >>> index = cudf.Index([4, 3, 2, 1, 0])
-        >>> index
-        Int64Index([4, 3, 2, 1, 0], dtype='int64')
-        >>> index.where(index > 2, 15)
-        Int64Index([4, 3, 15, 15, 15], dtype='int64')
-        """
-        return super().where(cond=cond, other=other)
-
     def memory_usage(self, deep=False):
         """
         Memory usage of the values.
@@ -1295,19 +1219,15 @@ def _from_data(
                     index_class_type = _dtype_to_index[values.dtype.type]
                 except KeyError:
                     index_class_type = GenericIndex
-                out = super(BaseIndex, index_class_type).__new__(
-                    index_class_type
-                )
+                out = index_class_type.__new__(index_class_type)
             elif isinstance(values, DatetimeColumn):
-                out = super(BaseIndex, DatetimeIndex).__new__(DatetimeIndex)
+                out = DatetimeIndex.__new__(DatetimeIndex)
             elif isinstance(values, TimeDeltaColumn):
-                out = super(BaseIndex, TimedeltaIndex).__new__(TimedeltaIndex)
+                out = TimedeltaIndex.__new__(TimedeltaIndex)
             elif isinstance(values, StringColumn):
-                out = super(BaseIndex, StringIndex).__new__(StringIndex)
+                out = StringIndex.__new__(StringIndex)
             elif isinstance(values, CategoricalColumn):
-                out = super(BaseIndex, CategoricalIndex).__new__(
-                    CategoricalIndex
-                )
+                out = CategoricalIndex.__new__(CategoricalIndex)
             out._data = data
             out._index = None
             return out
@@ -1509,7 +1429,7 @@ def equals(self, other):
                 other._step,
             ):
                 return True
-        return super().equals(other)
+        return cudf.Index._from_data(self._data).equals(other)
 
     def serialize(self):
         header = {}
@@ -1685,8 +1605,38 @@ def __mul__(self, other):
             )
         return super().__mul__(other)
 
+    def where(self, cond, other=None):
+        """
+        Replace values where the condition is False.
+
+        Parameters
+        ----------
+        cond : bool array-like with the same length as self
+            Where cond is True, keep the original value.
+            Where False, replace with corresponding value from other.
+            Callables are not supported.
+        other: scalar, or array-like
+            Entries where cond is False are replaced with
+            corresponding value from other. Callables are not
+            supported. Default is None.
 
-class GenericIndex(BaseIndex):
+        Returns
+        -------
+        Same type as caller
+
+        Examples
+        --------
+        >>> import cudf
+        >>> index = cudf.Index([4, 3, 2, 1, 0])
+        >>> index
+        Int64Index([4, 3, 2, 1, 0], dtype='int64')
+        >>> index.where(index > 2, 15)
+        Int64Index([4, 3, 15, 15, 15], dtype='int64')
+        """
+        return cudf.Index._from_data(self._data).where(cond=cond, other=other)
+
+
+class GenericIndex(BaseIndex, SingleColumnFrame):
     """
     An array of orderable values that represent the indices of another Column
 
@@ -1729,6 +1679,36 @@ def __init__(self, data, **kwargs):
     def _values(self):
         return self._column
 
+    @annotate("INDEX_EQUALS", color="green", domain="cudf_python")
+    def equals(self, other, **kwargs):
+        """
+        Determine if two Index objects contain the same elements.
+
+        Returns
+        -------
+        out: bool
+            True if “other” is an Index and it has the same elements
+            as calling index; False otherwise.
+        """
+        if not isinstance(other, BaseIndex):
+            return False
+
+        check_types = False
+
+        self_is_categorical = isinstance(self, CategoricalIndex)
+        other_is_categorical = isinstance(other, CategoricalIndex)
+        if self_is_categorical and not other_is_categorical:
+            other = other.astype(self.dtype)
+            check_types = True
+        elif other_is_categorical and not self_is_categorical:
+            self = self.astype(other.dtype)
+            check_types = True
+
+        try:
+            return super().equals(other, check_types=check_types)
+        except TypeError:
+            return False
+
     def copy(self, name=None, deep=False, dtype=None, names=None):
         """
         Make a copy of this object.
@@ -3190,6 +3170,14 @@ def __new__(
 
         return as_index(data, copy=copy, dtype=dtype, name=name, **kwargs)
 
+    @classmethod
+    def from_arrow(cls, obj):
+        try:
+            return cls(ColumnBase.from_arrow(obj))
+        except TypeError:
+            # Try interpreting object as a MultiIndex before failing.
+            return cudf.MultiIndex.from_arrow(obj)
+
 
 def _concat_range_index(indexes: List[RangeIndex]) -> BaseIndex:
     """
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index af6ac5f3dae..a968ed12a86 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -18,12 +18,12 @@
 from cudf._typing import DataFrameOrSeries
 from cudf.core._compat import PANDAS_GE_120
 from cudf.core.column import as_column, column
-from cudf.core.frame import SingleColumnFrame
+from cudf.core.frame import Frame
 from cudf.core.index import BaseIndex, as_index
 from cudf.utils.utils import _maybe_indices_to_slice
 
 
-class MultiIndex(BaseIndex):
+class MultiIndex(BaseIndex, Frame):
     """A multi-level or hierarchical index.
 
     Provides N-Dimensional indexing into Series and DataFrame objects.
@@ -201,11 +201,6 @@ def names(self, value):
             )
         self._names = pd.core.indexes.frozen.FrozenList(value)
 
-    @property
-    def _num_columns(self):
-        # MultiIndex is not a single-columned frame.
-        return super(SingleColumnFrame, self)._num_columns
-
     def rename(self, names, inplace=False):
         """
         Alter MultiIndex level names
@@ -293,13 +288,8 @@ def set_names(self, names, level=None, inplace=False):
 
         return self._set_names(names=names, inplace=inplace)
 
-    # TODO: This type ignore is indicating a real problem, which is that
-    # MultiIndex should not be inheriting from SingleColumnFrame, but fixing
-    # that will have to wait until we reshuffle the Index hierarchy.
     @classmethod
-    def _from_data(  # type: ignore
-        cls, data: Mapping, index=None
-    ) -> MultiIndex:
+    def _from_data(cls, data: Mapping, index=None) -> MultiIndex:
         return cls.from_frame(cudf.DataFrame._from_data(data))
 
     @property
@@ -545,68 +535,6 @@ def __repr__(self):
         data_output = "\n".join(lines)
         return output_prefix + data_output
 
-    @classmethod
-    def from_arrow(cls, table):
-        """
-        Convert PyArrow Table to MultiIndex
-
-        Parameters
-        ----------
-        table : PyArrow Table
-            PyArrow Object which has to be converted to MultiIndex
-
-        Returns
-        -------
-        cudf MultiIndex
-
-        Examples
-        --------
-        >>> import cudf
-        >>> import pyarrow as pa
-        >>> tbl = pa.table({"a":[1, 2, 3], "b":["a", "b", "c"]})
-        >>> cudf.MultiIndex.from_arrow(tbl)
-        MultiIndex([(1, 'a'),
-                    (2, 'b'),
-                    (3, 'c')],
-                   names=['a', 'b'])
-        """
-
-        return super(SingleColumnFrame, cls).from_arrow(table)
-
-    def to_arrow(self):
-        """Convert MultiIndex to PyArrow Table
-
-        Returns
-        -------
-        PyArrow Table
-
-        Examples
-        --------
-        >>> import cudf
-        >>> df = cudf.DataFrame({"a":[1, 2, 3], "b":[2, 3, 4]})
-        >>> mindex = cudf.Index(df)
-        >>> mindex
-        MultiIndex([(1, 2),
-                    (2, 3),
-                    (3, 4)],
-                   names=['a', 'b'])
-        >>> mindex.to_arrow()
-        pyarrow.Table
-        a: int64
-        b: int64
-        >>> mindex.to_arrow()['a']
-        <pyarrow.lib.ChunkedArray object at 0x7f5c6b71fad0>
-        [
-            [
-                1,
-                2,
-                3
-            ]
-        ]
-        """
-
-        return super(SingleColumnFrame, self).to_arrow()
-
     @property
     def codes(self):
         """
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index f80bdec0ab5..134130406ae 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -672,11 +672,11 @@ def test_index_where(data, condition, other, error):
         else:
             assert_eq(
                 ps.where(ps_condition, other=ps_other)
-                .fillna(gs._columns[0].default_na_value())
+                .fillna(gs._values.default_na_value())
                 .values,
                 gs.where(gs_condition, other=gs_other)
                 .to_pandas()
-                .fillna(gs._columns[0].default_na_value())
+                .fillna(gs._values.default_na_value())
                 .values,
             )
     else:

From c61d176d68d8e9581796006c343f75facbfbd6c2 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 12 Aug 2021 16:24:24 -0700
Subject: [PATCH 02/28] Redirect missing RangeIndex methods to Int64Index to
 recover all other expected behavior.

---
 python/cudf/cudf/core/index.py | 38 ++++++++--------------------------
 1 file changed, 9 insertions(+), 29 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 4a7563325fb..290014d9608 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1605,35 +1605,15 @@ def __mul__(self, other):
             )
         return super().__mul__(other)
 
-    def where(self, cond, other=None):
-        """
-        Replace values where the condition is False.
-
-        Parameters
-        ----------
-        cond : bool array-like with the same length as self
-            Where cond is True, keep the original value.
-            Where False, replace with corresponding value from other.
-            Callables are not supported.
-        other: scalar, or array-like
-            Entries where cond is False are replaced with
-            corresponding value from other. Callables are not
-            supported. Default is None.
-
-        Returns
-        -------
-        Same type as caller
-
-        Examples
-        --------
-        >>> import cudf
-        >>> index = cudf.Index([4, 3, 2, 1, 0])
-        >>> index
-        Int64Index([4, 3, 2, 1, 0], dtype='int64')
-        >>> index.where(index > 2, 15)
-        Int64Index([4, 3, 15, 15, 15], dtype='int64')
-        """
-        return cudf.Index._from_data(self._data).where(cond=cond, other=other)
+    def __getattr__(self, key):
+        # For methods that are not defined for RangeIndex we attempt to operate
+        # on the corresponding integer index if possible.
+        try:
+            return getattr(cudf.Index._from_data(self._data), key)
+        except AttributeError:
+            raise AttributeError(
+                f"'{type(self)}' object has no attribute {key}"
+            )
 
 
 class GenericIndex(BaseIndex, SingleColumnFrame):

From d11768dbd0d7577ec7362046ae909c5e66d68d79 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 12 Aug 2021 17:02:24 -0700
Subject: [PATCH 03/28] Move concat impl from BaseIndex to GenericIndex and
 change inheritance order so that Frame methods take precedence.

---
 python/cudf/cudf/_typing.py         |  4 +++-
 python/cudf/cudf/core/algorithms.py |  4 ++--
 python/cudf/cudf/core/dataframe.py  |  2 +-
 python/cudf/cudf/core/index.py      | 36 ++++++++++++++---------------
 python/cudf/cudf/core/multiindex.py |  2 +-
 python/cudf/cudf/core/reshape.py    |  2 +-
 python/cudf/cudf/core/series.py     |  6 +++--
 7 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/python/cudf/cudf/_typing.py b/python/cudf/cudf/_typing.py
index f9d04b5ab54..793a5d1d9e8 100644
--- a/python/cudf/cudf/_typing.py
+++ b/python/cudf/cudf/_typing.py
@@ -29,4 +29,6 @@
 
 DataFrameOrSeries = Union["cudf.Series", "cudf.DataFrame"]
 SeriesOrIndex = Union["cudf.Series", "cudf.core.index.BaseIndex"]
-SeriesOrSingleColumnIndex = Union["cudf.Series", "cudf.core.index.Generic"]
+SeriesOrSingleColumnIndex = Union[
+    "cudf.Series", "cudf.core.index.GenericIndex"
+]
diff --git a/python/cudf/cudf/core/algorithms.py b/python/cudf/cudf/core/algorithms.py
index 50ad592b54f..fa6c49284f0 100644
--- a/python/cudf/cudf/core/algorithms.py
+++ b/python/cudf/cudf/core/algorithms.py
@@ -6,8 +6,8 @@
 
 from cudf.core.column import as_column
 from cudf.core.frame import Frame
-from cudf.core.index import RangeIndex
-from cudf.core.series import Index, Series
+from cudf.core.index import Index, RangeIndex
+from cudf.core.series import Series
 
 
 def factorize(values, sort=False, na_sentinel=-1, size_hint=None):
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 40af85e7aee..5797aa28d11 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -7643,7 +7643,7 @@ def _get_union_of_indices(indexes):
     if len(indexes) == 1:
         return indexes[0]
     else:
-        merged_index = cudf.Index._concat(indexes)
+        merged_index = cudf.core.index.GenericIndex._concat(indexes)
         merged_index = merged_index.drop_duplicates()
         _, inds = merged_index._values.sort_by_values()
         return merged_index.take(inds)
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 290014d9608..84ff551b932 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -481,23 +481,6 @@ def gpu_values(self):
         """
         return self._values.data_array_view
 
-    @classmethod
-    def _concat(cls, objs):
-        if all(isinstance(obj, RangeIndex) for obj in objs):
-            result = _concat_range_index(objs)
-        else:
-            data = concat_columns([o._values for o in objs])
-            result = as_index(data)
-
-        names = {obj.name for obj in objs}
-        if len(names) == 1:
-            [name] = names
-        else:
-            name = None
-
-        result.name = name
-        return result
-
     def append(self, other):
         """
         Append a collection of Index options together.
@@ -1616,7 +1599,7 @@ def __getattr__(self, key):
             )
 
 
-class GenericIndex(BaseIndex, SingleColumnFrame):
+class GenericIndex(SingleColumnFrame, BaseIndex):
     """
     An array of orderable values that represent the indices of another Column
 
@@ -1659,6 +1642,23 @@ def __init__(self, data, **kwargs):
     def _values(self):
         return self._column
 
+    @classmethod
+    def _concat(cls, objs):
+        if all(isinstance(obj, RangeIndex) for obj in objs):
+            result = _concat_range_index(objs)
+        else:
+            data = concat_columns([o._values for o in objs])
+            result = as_index(data)
+
+        names = {obj.name for obj in objs}
+        if len(names) == 1:
+            [name] = names
+        else:
+            name = None
+
+        result.name = name
+        return result
+
     @annotate("INDEX_EQUALS", color="green", domain="cudf_python")
     def equals(self, other, **kwargs):
         """
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index a968ed12a86..16fb57542a9 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -23,7 +23,7 @@
 from cudf.utils.utils import _maybe_indices_to_slice
 
 
-class MultiIndex(BaseIndex, Frame):
+class MultiIndex(Frame, BaseIndex):
     """A multi-level or hierarchical index.
 
     Provides N-Dimensional indexing into Series and DataFrame objects.
diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
index 1b8405af1a4..392a251dfc4 100644
--- a/python/cudf/cudf/core/reshape.py
+++ b/python/cudf/cudf/core/reshape.py
@@ -386,7 +386,7 @@ def concat(objs, axis=0, join="outer", ignore_index=False, sort=None):
     elif typ is cudf.MultiIndex:
         return cudf.MultiIndex._concat(objs)
     elif issubclass(typ, cudf.Index):
-        return cudf.Index._concat(objs)
+        return cudf.core.index.GenericIndex._concat(objs)
     else:
         raise TypeError(f"cannot concatenate object of type {typ}")
 
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 855ec8d3e67..8a92f15021a 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -39,7 +39,7 @@
 from cudf.core.column_accessor import ColumnAccessor
 from cudf.core.frame import Frame, SingleColumnFrame, _drop_rows_by_labels
 from cudf.core.groupby.groupby import SeriesGroupBy
-from cudf.core.index import BaseIndex, Index, RangeIndex, as_index
+from cudf.core.index import BaseIndex, RangeIndex, as_index
 from cudf.core.indexing import _SeriesIlocIndexer, _SeriesLocIndexer
 from cudf.core.window import Rolling
 from cudf.utils import cudautils, docutils, ioutils
@@ -2375,7 +2375,9 @@ def _concat(cls, objs, axis=0, index=True):
             if isinstance(objs[0].index, cudf.MultiIndex):
                 index = cudf.MultiIndex._concat([o.index for o in objs])
             else:
-                index = Index._concat([o.index for o in objs])
+                index = cudf.core.index.GenericIndex._concat(
+                    [o.index for o in objs]
+                )
 
         names = {obj.name for obj in objs}
         if len(names) == 1:

From 74880f3c507a81e2da7a376094ec0dbc7761c1a3 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 13 Aug 2021 11:04:44 -0700
Subject: [PATCH 04/28] Fix mypy errors that aren't outright incompatibilities.

---
 python/cudf/cudf/_lib/table.pyx           | 10 +++++++++-
 python/cudf/cudf/core/_internals/where.py | 12 +++++-------
 python/cudf/cudf/core/dataframe.py        |  2 +-
 python/cudf/cudf/core/frame.py            |  2 +-
 python/cudf/cudf/core/index.py            | 13 +++++++++++--
 5 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/python/cudf/cudf/_lib/table.pyx b/python/cudf/cudf/_lib/table.pyx
index 2981a46a54a..6175df79927 100644
--- a/python/cudf/cudf/_lib/table.pyx
+++ b/python/cudf/cudf/_lib/table.pyx
@@ -34,9 +34,17 @@ cdef class Table:
         """
         if data is None:
             data = {}
-        self._data = cudf.core.column_accessor.ColumnAccessor(data)
+        self._column_accessor = cudf.core.column_accessor.ColumnAccessor(data)
         self._index = index
 
+    @property
+    def _data(self) -> cudf.core.column_accessor.ColumnAccessor:
+        return self._column_accessor
+
+    @_data.setter
+    def _data(self, value):
+        self._column_accessor = value
+
     @property
     def _num_columns(self):
         return len(self._data)
diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py
index 0688283bc43..ea3c7bfb91f 100644
--- a/python/cudf/cudf/core/_internals/where.py
+++ b/python/cudf/cudf/core/_internals/where.py
@@ -10,7 +10,7 @@
 from cudf._typing import ColumnLike, ScalarLike
 from cudf.core.column import ColumnBase
 from cudf.core.dataframe import DataFrame
-from cudf.core.frame import Frame
+from cudf.core.frame import Frame, SingleColumnFrame
 from cudf.core.index import Index
 from cudf.core.series import Series
 
@@ -94,9 +94,9 @@ def _check_and_cast_columns_with_other(
 
 
 def _normalize_columns_and_scalars_type(
-    frame: Union[Series, Index, DataFrame], other: Any, inplace: bool = False,
+    frame: Frame, other: Any, inplace: bool = False,
 ) -> Tuple[
-    Union[Series, Index, DataFrame, ColumnLike], Any,
+    Union[Frame, ColumnLike], Any,
 ]:
     """
     Try to normalize the other's dtypes as per frame.
@@ -177,10 +177,7 @@ def _normalize_columns_and_scalars_type(
 
 
 def where(
-    frame: Union[Series, Index, DataFrame],
-    cond: Any,
-    other: Any = None,
-    inplace: bool = False,
+    frame: Frame, cond: Any, other: Any = None, inplace: bool = False,
 ) -> Optional[Union[Frame]]:
     """
     Replace values where the condition is False.
@@ -332,6 +329,7 @@ def where(
         return frame._mimic_inplace(out_df, inplace=inplace)
 
     else:
+        frame = cast(SingleColumnFrame, frame)
         if isinstance(other, DataFrame):
             raise NotImplementedError(
                 "cannot align with a higher dimensional Frame"
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 5797aa28d11..d9228242b4b 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -160,7 +160,7 @@ class DataFrame(Frame, Serializable, GetAttrGetItemMixin):
     3  3   0.3
     """
 
-    _PROTECTED_KEYS = frozenset(("_data", "_index"))
+    _PROTECTED_KEYS = frozenset(("_column_accessor", "_data", "_index"))
 
     @annotate("DATAFRAME_INIT", color="blue", domain="cudf_python")
     def __init__(self, data=None, index=None, columns=None, dtype=None):
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 1fd8e3ea751..e7888c92917 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -2362,7 +2362,7 @@ def _copy_type_metadata(
                     self._index, cudf.core.index.CategoricalIndex
                 ):
                     self._index = cudf.Index(
-                        cast(cudf.core.Index.NumericIndex, self._index)._column
+                        cast(cudf.core.index.NumericIndex, self._index)._column
                     )
 
         return self
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 84ff551b932..d3e42ec2baa 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -81,6 +81,13 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
     def _values(self) -> ColumnBase:
         raise NotImplementedError
 
+    @property
+    def _data(self) -> cudf.core.column_accessor.ColumnAccessor:
+        raise NotImplementedError
+
+    def copy(self, deep: bool = True) -> BaseIndex:
+        raise NotImplementedError
+
     @property
     def values(self):
         return self._values.values
@@ -1199,7 +1206,9 @@ def _from_data(
 
             if isinstance(values, NumericalColumn):
                 try:
-                    index_class_type = _dtype_to_index[values.dtype.type]
+                    index_class_type: Type[GenericIndex] = _dtype_to_index[
+                        values.dtype.type
+                    ]
                 except KeyError:
                     index_class_type = GenericIndex
                 out = index_class_type.__new__(index_class_type)
@@ -3060,7 +3069,7 @@ def as_index(arbitrary, **kwargs) -> BaseIndex:
     )
 
 
-_dtype_to_index: Dict[Any, Type[BaseIndex]] = {
+_dtype_to_index: Dict[Any, Type[NumericIndex]] = {
     np.int8: Int8Index,
     np.int16: Int16Index,
     np.int32: Int32Index,

From 03eee422daa92c2ec1088f9921219e6d32aa70ac Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 13 Aug 2021 11:37:16 -0700
Subject: [PATCH 05/28] Fix some more mypy errors.

---
 python/cudf/cudf/core/index.py      | 287 +++++++++++++++-------------
 python/cudf/cudf/core/multiindex.py |  10 +-
 2 files changed, 153 insertions(+), 144 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index d3e42ec2baa..5f72cff036a 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -64,6 +64,31 @@
 from cudf.utils.utils import cached_property, search_range
 
 
+def _lexsorted_equal_range(
+    idx: Union[GenericIndex, cudf.MultiIndex],
+    key_as_table: Table,
+    is_sorted: bool,
+) -> Tuple[int, int, Optional[ColumnBase]]:
+    """Get equal range for key in lexicographically sorted index. If index
+    is not sorted when called, a sort will take place and `sort_inds` is
+    returned. Otherwise `None` is returned in that position.
+    """
+    if not is_sorted:
+        sort_inds = idx._get_sorted_inds()
+        sort_vals = idx._gather(sort_inds)
+    else:
+        sort_inds = None
+        sort_vals = idx
+    lower_bound = search_sorted(
+        sort_vals, key_as_table, side="left"
+    ).element_indexing(0)
+    upper_bound = search_sorted(
+        sort_vals, key_as_table, side="right"
+    ).element_indexing(0)
+
+    return lower_bound, upper_bound, sort_inds
+
+
 class BaseIndex(Serializable):
     """Base class for all cudf Index types."""
 
@@ -92,6 +117,9 @@ def copy(self, deep: bool = True) -> BaseIndex:
     def values(self):
         return self._values.values
 
+    def get_loc(self, key, method=None, tolerance=None):
+        raise NotImplementedError
+
     def __iter__(self):
         """
         Iterating over a GPU object is not effecient and hence not supported.
@@ -1013,144 +1041,6 @@ def memory_usage(self, deep=False):
         """
         return self._values._memory_usage(deep=deep)
 
-    def get_loc(self, key, method=None, tolerance=None):
-        """Get integer location, slice or boolean mask for requested label.
-
-        Parameters
-        ----------
-        key : label
-        method : {None, 'pad'/'fill', 'backfill'/'bfill', 'nearest'}, optional
-            - default: exact matches only.
-            - pad / ffill: find the PREVIOUS index value if no exact match.
-            - backfill / bfill: use NEXT index value if no exact match.
-            - nearest: use the NEAREST index value if no exact match. Tied
-              distances are broken by preferring the larger index
-              value.
-        tolerance : int or float, optional
-            Maximum distance from index value for inexact matches. The value
-            of the index at the matching location must satisfy the equation
-            ``abs(index[loc] - key) <= tolerance``.
-
-        Returns
-        -------
-        int or slice or boolean mask
-            - If result is unique, return integer index
-            - If index is monotonic, loc is returned as a slice object
-            - Otherwise, a boolean mask is returned
-
-        Examples
-        --------
-        >>> unique_index = cudf.Index(list('abc'))
-        >>> unique_index.get_loc('b')
-        1
-        >>> monotonic_index = cudf.Index(list('abbc'))
-        >>> monotonic_index.get_loc('b')
-        slice(1, 3, None)
-        >>> non_monotonic_index = cudf.Index(list('abcb'))
-        >>> non_monotonic_index.get_loc('b')
-        array([False,  True, False,  True])
-        >>> numeric_unique_index = cudf.Index([1, 2, 3])
-        >>> numeric_unique_index.get_loc(3)
-        2
-        """
-        if tolerance is not None:
-            raise NotImplementedError(
-                "Parameter tolerance is unsupported yet."
-            )
-        if method not in {
-            None,
-            "ffill",
-            "bfill",
-            "pad",
-            "backfill",
-            "nearest",
-        }:
-            raise ValueError(
-                f"Invalid fill method. Expecting pad (ffill), backfill (bfill)"
-                f" or nearest. Got {method}"
-            )
-
-        is_sorted = (
-            self.is_monotonic_increasing or self.is_monotonic_decreasing
-        )
-
-        if not is_sorted and method is not None:
-            raise ValueError(
-                "index must be monotonic increasing or decreasing if `method`"
-                "is specified."
-            )
-
-        key_as_table = Table({"None": as_column(key, length=1)})
-        lower_bound, upper_bound, sort_inds = self._lexsorted_equal_range(
-            key_as_table, is_sorted
-        )
-
-        if lower_bound == upper_bound:
-            # Key not found, apply method
-            if method in ("pad", "ffill"):
-                if lower_bound == 0:
-                    raise KeyError(key)
-                return lower_bound - 1
-            elif method in ("backfill", "bfill"):
-                if lower_bound == self._data.nrows:
-                    raise KeyError(key)
-                return lower_bound
-            elif method == "nearest":
-                if lower_bound == self._data.nrows:
-                    return lower_bound - 1
-                elif lower_bound == 0:
-                    return 0
-                lower_val = self._column.element_indexing(lower_bound - 1)
-                upper_val = self._column.element_indexing(lower_bound)
-                return (
-                    lower_bound - 1
-                    if abs(lower_val - key) < abs(upper_val - key)
-                    else lower_bound
-                )
-            else:
-                raise KeyError(key)
-
-        if lower_bound + 1 == upper_bound:
-            # Search result is unique, return int.
-            return (
-                lower_bound
-                if is_sorted
-                else sort_inds.element_indexing(lower_bound)
-            )
-
-        if is_sorted:
-            # In monotonic index, lex search result is continuous. A slice for
-            # the range is returned.
-            return slice(lower_bound, upper_bound)
-
-        # Not sorted and not unique. Return a boolean mask
-        mask = cupy.full(self._data.nrows, False)
-        true_inds = sort_inds.slice(lower_bound, upper_bound).to_gpu_array()
-        mask[cupy.array(true_inds)] = True
-        return mask
-
-    def _lexsorted_equal_range(
-        self, key_as_table: Table, is_sorted: bool
-    ) -> Tuple[int, int, Optional[ColumnBase]]:
-        """Get equal range for key in lexicographically sorted index. If index
-        is not sorted when called, a sort will take place and `sort_inds` is
-        returned. Otherwise `None` is returned in that position.
-        """
-        if not is_sorted:
-            sort_inds = self._get_sorted_inds()
-            sort_vals = self._gather(sort_inds)
-        else:
-            sort_inds = None
-            sort_vals = self
-        lower_bound = search_sorted(
-            sort_vals, key_as_table, side="left"
-        ).element_indexing(0)
-        upper_bound = search_sorted(
-            sort_vals, key_as_table, side="right"
-        ).element_indexing(0)
-
-        return lower_bound, upper_bound, sort_inds
-
     @classmethod
     def from_pandas(cls, index, nan_as_null=None):
         """
@@ -1607,6 +1497,11 @@ def __getattr__(self, key):
                 f"'{type(self)}' object has no attribute {key}"
             )
 
+    def get_loc(self, key, method=None, tolerance=None):
+        return cudf.Index._from_data(self._data).get_loc(
+            key, method=method, tolerance=tolerance
+        )
+
 
 class GenericIndex(SingleColumnFrame, BaseIndex):
     """
@@ -1724,6 +1619,122 @@ def copy(self, name=None, deep=False, dtype=None, names=None):
 
         return as_index(self._values.astype(dtype), name=name, copy=deep)
 
+    def get_loc(self, key, method=None, tolerance=None):
+        """Get integer location, slice or boolean mask for requested label.
+
+        Parameters
+        ----------
+        key : label
+        method : {None, 'pad'/'fill', 'backfill'/'bfill', 'nearest'}, optional
+            - default: exact matches only.
+            - pad / ffill: find the PREVIOUS index value if no exact match.
+            - backfill / bfill: use NEXT index value if no exact match.
+            - nearest: use the NEAREST index value if no exact match. Tied
+              distances are broken by preferring the larger index
+              value.
+        tolerance : int or float, optional
+            Maximum distance from index value for inexact matches. The value
+            of the index at the matching location must satisfy the equation
+            ``abs(index[loc] - key) <= tolerance``.
+
+        Returns
+        -------
+        int or slice or boolean mask
+            - If result is unique, return integer index
+            - If index is monotonic, loc is returned as a slice object
+            - Otherwise, a boolean mask is returned
+
+        Examples
+        --------
+        >>> unique_index = cudf.Index(list('abc'))
+        >>> unique_index.get_loc('b')
+        1
+        >>> monotonic_index = cudf.Index(list('abbc'))
+        >>> monotonic_index.get_loc('b')
+        slice(1, 3, None)
+        >>> non_monotonic_index = cudf.Index(list('abcb'))
+        >>> non_monotonic_index.get_loc('b')
+        array([False,  True, False,  True])
+        >>> numeric_unique_index = cudf.Index([1, 2, 3])
+        >>> numeric_unique_index.get_loc(3)
+        2
+        """
+        if tolerance is not None:
+            raise NotImplementedError(
+                "Parameter tolerance is unsupported yet."
+            )
+        if method not in {
+            None,
+            "ffill",
+            "bfill",
+            "pad",
+            "backfill",
+            "nearest",
+        }:
+            raise ValueError(
+                f"Invalid fill method. Expecting pad (ffill), backfill (bfill)"
+                f" or nearest. Got {method}"
+            )
+
+        is_sorted = (
+            self.is_monotonic_increasing or self.is_monotonic_decreasing
+        )
+
+        if not is_sorted and method is not None:
+            raise ValueError(
+                "index must be monotonic increasing or decreasing if `method`"
+                "is specified."
+            )
+
+        key_as_table = Table({"None": as_column(key, length=1)})
+        lower_bound, upper_bound, sort_inds = _lexsorted_equal_range(
+            self, key_as_table, is_sorted
+        )
+
+        if lower_bound == upper_bound:
+            # Key not found, apply method
+            if method in ("pad", "ffill"):
+                if lower_bound == 0:
+                    raise KeyError(key)
+                return lower_bound - 1
+            elif method in ("backfill", "bfill"):
+                if lower_bound == self._data.nrows:
+                    raise KeyError(key)
+                return lower_bound
+            elif method == "nearest":
+                if lower_bound == self._data.nrows:
+                    return lower_bound - 1
+                elif lower_bound == 0:
+                    return 0
+                lower_val = self._column.element_indexing(lower_bound - 1)
+                upper_val = self._column.element_indexing(lower_bound)
+                return (
+                    lower_bound - 1
+                    if abs(lower_val - key) < abs(upper_val - key)
+                    else lower_bound
+                )
+            else:
+                raise KeyError(key)
+
+        if lower_bound + 1 == upper_bound:
+            # Search result is unique, return int.
+            return (
+                lower_bound
+                if is_sorted
+                else sort_inds.element_indexing(lower_bound)
+            )
+
+        if is_sorted:
+            # In monotonic index, lex search result is continuous. A slice for
+            # the range is returned.
+            return slice(lower_bound, upper_bound)
+
+        # Not sorted and not unique. Return a boolean mask
+        mask = cupy.full(self._data.nrows, False)
+        true_inds = sort_inds.slice(lower_bound, upper_bound).to_gpu_array()
+        mask[cupy.array(true_inds)] = True
+        return mask
+
     def __sizeof__(self):
         return self._values.__sizeof__()
 
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 16fb57542a9..6f1f5dfde4a 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -19,7 +19,7 @@
 from cudf.core._compat import PANDAS_GE_120
 from cudf.core.column import as_column, column
 from cudf.core.frame import Frame
-from cudf.core.index import BaseIndex, as_index
+from cudf.core.index import BaseIndex, _lexsorted_equal_range, as_index
 from cudf.utils.utils import _maybe_indices_to_slice
 
 
@@ -1781,11 +1781,9 @@ def get_loc(self, key, method=None, tolerance=None):
         partial_index = self.__class__._from_data(
             data=self._data.select_by_index(slice(key_as_table._num_columns))
         )
-        (
-            lower_bound,
-            upper_bound,
-            sort_inds,
-        ) = partial_index._lexsorted_equal_range(key_as_table, is_sorted)
+        (lower_bound, upper_bound, sort_inds,) = _lexsorted_equal_range(
+            partial_index, key_as_table, is_sorted
+        )
 
         if lower_bound == upper_bound:
             raise KeyError(key)

From 40dcfc4f71c2bb5a1de21bea8058b6f09e05578f Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 13 Aug 2021 11:58:19 -0700
Subject: [PATCH 06/28] Move _from_data from BaseIndex to Index.

---
 python/cudf/cudf/core/index.py      | 74 ++++++++++++++---------------
 python/cudf/cudf/core/multiindex.py |  1 +
 2 files changed, 38 insertions(+), 37 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 5f72cff036a..1f0a342ab15 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1079,43 +1079,6 @@ def from_pandas(cls, index, nan_as_null=None):
         ind.name = index.name
         return ind
 
-    @classmethod
-    def _from_data(
-        cls,
-        data: MutableMapping,
-        index: Optional[BaseIndex] = None,
-        name: Any = None,
-    ) -> BaseIndex:
-        assert index is None
-        if not isinstance(data, cudf.core.column_accessor.ColumnAccessor):
-            data = cudf.core.column_accessor.ColumnAccessor(data)
-        if len(data) == 0:
-            raise ValueError("Cannot construct Index from any empty Table")
-        if len(data) == 1:
-            values = next(iter(data.values()))
-
-            if isinstance(values, NumericalColumn):
-                try:
-                    index_class_type: Type[GenericIndex] = _dtype_to_index[
-                        values.dtype.type
-                    ]
-                except KeyError:
-                    index_class_type = GenericIndex
-                out = index_class_type.__new__(index_class_type)
-            elif isinstance(values, DatetimeColumn):
-                out = DatetimeIndex.__new__(DatetimeIndex)
-            elif isinstance(values, TimeDeltaColumn):
-                out = TimedeltaIndex.__new__(TimedeltaIndex)
-            elif isinstance(values, StringColumn):
-                out = StringIndex.__new__(StringIndex)
-            elif isinstance(values, CategoricalColumn):
-                out = CategoricalIndex.__new__(CategoricalIndex)
-            out._data = data
-            out._index = None
-            return out
-        else:
-            return cudf.MultiIndex._from_data(data)
-
     @property
     def _constructor_expanddim(self):
         return cudf.MultiIndex
@@ -3170,6 +3133,43 @@ def __new__(
 
         return as_index(data, copy=copy, dtype=dtype, name=name, **kwargs)
 
+    @classmethod
+    def _from_data(
+        cls,
+        data: MutableMapping,
+        index: Optional[BaseIndex] = None,
+        name: Any = None,
+    ) -> BaseIndex:
+        assert index is None
+        if not isinstance(data, cudf.core.column_accessor.ColumnAccessor):
+            data = cudf.core.column_accessor.ColumnAccessor(data)
+        if len(data) == 0:
+            raise ValueError("Cannot construct Index from any empty Table")
+        if len(data) == 1:
+            values = next(iter(data.values()))
+
+            if isinstance(values, NumericalColumn):
+                try:
+                    index_class_type: Type[GenericIndex] = _dtype_to_index[
+                        values.dtype.type
+                    ]
+                except KeyError:
+                    index_class_type = GenericIndex
+                out = index_class_type.__new__(index_class_type)
+            elif isinstance(values, DatetimeColumn):
+                out = DatetimeIndex.__new__(DatetimeIndex)
+            elif isinstance(values, TimeDeltaColumn):
+                out = TimedeltaIndex.__new__(TimedeltaIndex)
+            elif isinstance(values, StringColumn):
+                out = StringIndex.__new__(StringIndex)
+            elif isinstance(values, CategoricalColumn):
+                out = CategoricalIndex.__new__(CategoricalIndex)
+            out._data = data
+            out._index = None
+            return out
+        else:
+            return cudf.MultiIndex._from_data(data)
+
     @classmethod
     def from_arrow(cls, obj):
         try:
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 6f1f5dfde4a..4b595727252 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -290,6 +290,7 @@ def set_names(self, names, level=None, inplace=False):
 
     @classmethod
     def _from_data(cls, data: Mapping, index=None) -> MultiIndex:
+        assert index is None
         return cls.from_frame(cudf.DataFrame._from_data(data))
 
     @property

From de56376870746677520cb37b79ebdfa7ceb94ffe Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 13 Aug 2021 12:10:56 -0700
Subject: [PATCH 07/28] Fix inconsistencies in _copy_type_metadata.

---
 python/cudf/cudf/core/frame.py |  2 +-
 python/cudf/cudf/core/index.py | 41 ++++++++++++++++++++--------------
 2 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index e7888c92917..c27c9a6b349 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -2350,7 +2350,7 @@ def _copy_type_metadata(
 
         if include_index:
             if self._index is not None and other._index is not None:
-                self._index._copy_type_metadata(other._index)
+                self._index._copy_type_metadata(other._index)  # type: ignore
                 # When other._index is a CategoricalIndex, the current index
                 # will be a NumericalIndex with an underlying CategoricalColumn
                 # (the above _copy_type_metadata call will have converted the
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 1f0a342ab15..d4a9a79c5cf 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -48,7 +48,7 @@
 from cudf.core.column.column import as_column, concat_columns
 from cudf.core.column.string import StringMethods as StringMethods
 from cudf.core.dtypes import IntervalDtype
-from cudf.core.frame import SingleColumnFrame
+from cudf.core.frame import Frame, SingleColumnFrame
 from cudf.utils import ioutils
 from cudf.utils.docutils import copy_docstring
 from cudf.utils.dtypes import (
@@ -129,22 +129,6 @@ def __iter__(self):
         """
         cudf.utils.utils.raise_iteration_error(obj=self)
 
-    def _copy_type_metadata(
-        self, other: BaseIndex, include_index: bool = True
-    ) -> BaseIndex:
-        """
-        Copy type metadata from each column of `other` to the corresponding
-        column of `self`.
-        See `ColumnBase._with_type_metadata` for more information.
-        """
-        for name, col, other_col in zip(
-            self._data.keys(), self._data.values(), other._data.values()
-        ):
-            self._data.set_by_label(
-                name, col._with_type_metadata(other_col.dtype), validate=False
-            )
-        return self
-
     def __getitem__(self, key):
         raise NotImplementedError()
 
@@ -1136,6 +1120,13 @@ def __init__(
         self._index = None
         self._name = name
 
+    def _copy_type_metadata(
+        self, other: Frame, include_index: bool = True
+    ) -> RangeIndex:
+        # There is no metadata to be copied for RangeIndex since it does not
+        # have an underlying column.
+        return self
+
     @property
     def name(self):
         """
@@ -1505,6 +1496,22 @@ def __init__(self, data, **kwargs):
         name = kwargs.get("name")
         super().__init__({name: data})
 
+    def _copy_type_metadata(
+        self, other: Frame, include_index: bool = True
+    ) -> GenericIndex:
+        """
+        Copy type metadata from each column of `other` to the corresponding
+        column of `self`.
+        See `ColumnBase._with_type_metadata` for more information.
+        """
+        for name, col, other_col in zip(
+            self._data.keys(), self._data.values(), other._data.values()
+        ):
+            self._data.set_by_label(
+                name, col._with_type_metadata(other_col.dtype), validate=False
+            )
+        return self
+
     @property
     def _values(self):
         return self._column

From 1f87727b7d98ef3c99da041291fc87145bf76d8a Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 13 Aug 2021 12:15:40 -0700
Subject: [PATCH 08/28] Move drop_duplicates to avoid type issues.

---
 python/cudf/cudf/core/index.py | 84 ++++++++++++++++++++++------------
 1 file changed, 56 insertions(+), 28 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index d4a9a79c5cf..0e15db00a16 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -132,34 +132,6 @@ def __iter__(self):
     def __getitem__(self, key):
         raise NotImplementedError()
 
-    def drop_duplicates(self, keep="first"):
-        """
-        Return Index with duplicate values removed
-
-        Parameters
-        ----------
-        keep : {‘first’, ‘last’, False}, default ‘first’
-            * ‘first’ : Drop duplicates except for the
-                first occurrence.
-            * ‘last’ : Drop duplicates except for the
-                last occurrence.
-            *  False : Drop all duplicates.
-
-        Returns
-        -------
-        deduplicated : Index
-
-        Examples
-        --------
-        >>> import cudf
-        >>> idx = cudf.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'])
-        >>> idx
-        StringIndex(['lama' 'cow' 'lama' 'beetle' 'lama' 'hippo'], dtype='object')
-        >>> idx.drop_duplicates()
-        StringIndex(['beetle' 'cow' 'hippo' 'lama'], dtype='object')
-        """  # noqa: E501
-        return super().drop_duplicates(keep=keep)
-
     def serialize(self):
         header = {}
         header["index_column"] = {}
@@ -1217,6 +1189,34 @@ def copy(self, name=None, deep=False, dtype=None, names=None):
             start=self._start, stop=self._stop, step=self._step, name=name
         )
 
+    def drop_duplicates(self, keep="first"):
+        """
+        Return Index with duplicate values removed
+
+        Parameters
+        ----------
+        keep : {‘first’, ‘last’, False}, default ‘first’
+            * ‘first’ : Drop duplicates except for the
+                first occurrence.
+            * ‘last’ : Drop duplicates except for the
+                last occurrence.
+            *  False : Drop all duplicates.
+
+        Returns
+        -------
+        deduplicated : RangeIndex
+
+        Examples
+        --------
+        >>> import cudf
+        >>> idx = cudf.RangeIndex(0, 10)
+        >>> idx
+        RangeIndex(start=0, stop=10, step=1)
+        >>> idx.drop_duplicates()
+        RangeIndex(start=0, stop=10, step=1)
+        """  # noqa: E501
+        return self
+
     def __repr__(self):
         return (
             f"{self.__class__.__name__}(start={self._start}, stop={self._stop}"
@@ -1496,6 +1496,34 @@ def __init__(self, data, **kwargs):
         name = kwargs.get("name")
         super().__init__({name: data})
 
+    def drop_duplicates(self, keep="first"):
+        """
+        Return Index with duplicate values removed
+
+        Parameters
+        ----------
+        keep : {‘first’, ‘last’, False}, default ‘first’
+            * ‘first’ : Drop duplicates except for the
+                first occurrence.
+            * ‘last’ : Drop duplicates except for the
+                last occurrence.
+            *  False : Drop all duplicates.
+
+        Returns
+        -------
+        deduplicated : Index
+
+        Examples
+        --------
+        >>> import cudf
+        >>> idx = cudf.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'])
+        >>> idx
+        StringIndex(['lama' 'cow' 'lama' 'beetle' 'lama' 'hippo'], dtype='object')
+        >>> idx.drop_duplicates()
+        StringIndex(['beetle' 'cow' 'hippo' 'lama'], dtype='object')
+        """  # noqa: E501
+        return super().drop_duplicates(keep=keep)
+
     def _copy_type_metadata(
         self, other: Frame, include_index: bool = True
     ) -> GenericIndex:

From 54e22e9ed1eec2e5e22a3e971270dc6eebeaf592 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 13 Aug 2021 12:21:56 -0700
Subject: [PATCH 09/28] Move Frame._concat to DataFrame._concat since it's a
 DataFrame-specific implementation.

---
 python/cudf/cudf/core/dataframe.py | 307 ++++++++++++++++++++++++++++-
 python/cudf/cudf/core/frame.py     | 291 +--------------------------
 2 files changed, 306 insertions(+), 292 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index d9228242b4b..c84e9c379d3 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations, division
 
+import functools
 import inspect
 import itertools
 import numbers
@@ -25,10 +26,15 @@
 import cudf
 import cudf.core.common
 from cudf import _lib as libcudf
-from cudf.api.types import is_bool_dtype, is_dict_like
+from cudf.api.types import is_bool_dtype, is_dict_like, is_dtype_equal
 from cudf.core import column, reshape
 from cudf.core.abc import Serializable
-from cudf.core.column import as_column, column_empty
+from cudf.core.column import (
+    as_column,
+    build_categorical_column,
+    column_empty,
+    concat_columns,
+)
 from cudf.core.column_accessor import ColumnAccessor
 from cudf.core.frame import Frame, _drop_rows_by_labels
 from cudf.core.groupby.groupby import DataFrameGroupBy
@@ -48,9 +54,11 @@
     is_datetime_dtype,
     is_list_dtype,
     is_list_like,
+    is_numerical_dtype,
     is_scalar,
     is_string_dtype,
     is_struct_dtype,
+    min_scalar_type,
     numeric_normalize_types,
 )
 from cudf.utils.utils import GetAttrGetItemMixin
@@ -1062,6 +1070,209 @@ def tail(self, n=5):
 
         return self.iloc[-n:]
 
+    @classmethod
+    @annotate("CONCAT", color="orange", domain="cudf_python")
+    def _concat(
+        cls, objs, axis=0, join="outer", ignore_index=False, sort=False
+    ):
+        # flag to indicate at least one empty input frame also has an index
+        empty_has_index = False
+        # length of output frame's RangeIndex if all input frames are empty,
+        # and at least one has an index
+        result_index_length = 0
+        # the number of empty input frames
+        num_empty_input_frames = 0
+
+        for i, obj in enumerate(objs):
+            # shallow-copy the input DFs in case the same DF instance
+            # is concatenated with itself
+            objs[i] = obj.copy(deep=False)
+
+            # If ignore_index is true, determine if
+            # all or some objs are empty(and have index).
+            # 1. If all objects are empty(and have index), we
+            # should set the index separately using RangeIndex.
+            # 2. If some objects are empty(and have index), we
+            # create empty columns later while populating `columns`
+            # variable. Detailed explanation of second case before
+            # allocation of `columns` variable below.
+            if ignore_index and obj.empty:
+                num_empty_input_frames += 1
+                result_index_length += len(obj)
+                empty_has_index = empty_has_index or len(obj) > 0
+
+        if join == "inner":
+            sets_of_column_names = [set(obj._column_names) for obj in objs]
+
+            intersecting_columns = functools.reduce(
+                set.intersection, sets_of_column_names
+            )
+            union_of_columns = functools.reduce(
+                set.union, sets_of_column_names
+            )
+            non_intersecting_columns = union_of_columns.symmetric_difference(
+                intersecting_columns
+            )
+
+            # Get an ordered list of the intersecting columns to preserve input
+            # order, which is promised by pandas for inner joins.
+            ordered_intersecting_columns = [
+                name
+                for obj in objs
+                for name in obj._column_names
+                if name in intersecting_columns
+            ]
+
+            names = dict.fromkeys(ordered_intersecting_columns).keys()
+
+            if axis == 0:
+                if ignore_index and (
+                    num_empty_input_frames > 0
+                    or len(intersecting_columns) == 0
+                ):
+                    # When ignore_index is True and if there is
+                    # at least 1 empty dataframe and no
+                    # intersecting columns are present, an empty dataframe
+                    # needs to be returned just with an Index.
+                    empty_has_index = True
+                    num_empty_input_frames = len(objs)
+                    result_index_length = sum(len(obj) for obj in objs)
+
+                # remove columns not present in all objs
+                for obj in objs:
+                    obj.drop(
+                        columns=non_intersecting_columns,
+                        inplace=True,
+                        errors="ignore",
+                    )
+        elif join == "outer":
+            # Get a list of the unique table column names
+            names = [name for f in objs for name in f._column_names]
+            names = dict.fromkeys(names).keys()
+
+        else:
+            raise ValueError(
+                "Only can inner (intersect) or outer (union) when joining"
+                "the other axis"
+            )
+
+        if sort:
+            try:
+                # Sorted always returns a list, but will fail to sort if names
+                # include different types that are not comparable.
+                names = sorted(names)
+            except TypeError:
+                names = list(names)
+        else:
+            names = list(names)
+
+        # Combine the index and table columns for each Frame into a list of
+        # [...index_cols, ...table_cols].
+        #
+        # If any of the input frames have a non-empty index, include these
+        # columns in the list of columns to concatenate, even if the input
+        # frames are empty and `ignore_index=True`.
+        columns = [
+            (
+                []
+                if (ignore_index and not empty_has_index)
+                else list(f._index._data.columns)
+            )
+            + [f._data[name] if name in f._data else None for name in names]
+            for f in objs
+        ]
+
+        # Get a list of the combined index and table column indices
+        indices = list(range(functools.reduce(max, map(len, columns))))
+        # The position of the first table colum in each
+        # combined index + table columns list
+        first_data_column_position = len(indices) - len(names)
+
+        # Get the non-null columns and their dtypes
+        non_null_cols, dtypes = _get_non_null_cols_and_dtypes(indices, columns)
+
+        # Infer common dtypes between numeric columns
+        # and combine CategoricalColumn categories
+        categories = _find_common_dtypes_and_categories(non_null_cols, dtypes)
+
+        # Cast all columns to a common dtype, assign combined categories,
+        # and back-fill missing columns with all-null columns
+        _cast_cols_to_common_dtypes(indices, columns, dtypes, categories)
+
+        # Construct input tables with the index and data columns in the same
+        # order. This strips the given index/column names and replaces the
+        # names with their integer positions in the `cols` list
+        tables = []
+        for cols in columns:
+            table_index = None
+            if 1 == first_data_column_position:
+                table_index = cudf.core.index.as_index(cols[0])
+            elif first_data_column_position > 1:
+                table_index = libcudf.table.Table(
+                    data=dict(
+                        zip(
+                            indices[:first_data_column_position],
+                            cols[:first_data_column_position],
+                        )
+                    )
+                )
+            tables.append(
+                libcudf.table.Table(
+                    data=dict(
+                        zip(
+                            indices[first_data_column_position:],
+                            cols[first_data_column_position:],
+                        )
+                    ),
+                    index=table_index,
+                )
+            )
+
+        # Concatenate the Tables
+        out = cls._from_data(
+            *libcudf.concat.concat_tables(tables, ignore_index)
+        )
+
+        # If ignore_index is True, all input frames are empty, and at
+        # least one input frame has an index, assign a new RangeIndex
+        # to the result frame.
+        if empty_has_index and num_empty_input_frames == len(objs):
+            out._index = cudf.RangeIndex(result_index_length)
+        # Reassign the categories for any categorical table cols
+        _reassign_categories(
+            categories, out._data, indices[first_data_column_position:]
+        )
+
+        # Reassign the categories for any categorical index cols
+        if not isinstance(out._index, cudf.RangeIndex):
+            _reassign_categories(
+                categories,
+                out._index._data,
+                indices[:first_data_column_position],
+            )
+            if not isinstance(
+                out._index, cudf.MultiIndex
+            ) and is_categorical_dtype(out._index._values.dtype):
+                out = out.set_index(
+                    cudf.core.index.as_index(out.index._values)
+                )
+
+        # Reassign precision for any decimal cols
+        for name, col in out._data.items():
+            if isinstance(col, cudf.core.column.Decimal64Column):
+                col = col._with_type_metadata(tables[0]._data[name].dtype)
+
+        # Reassign index and column names
+        if isinstance(objs[0].columns, pd.MultiIndex):
+            out.columns = objs[0].columns
+        else:
+            out.columns = names
+        if not ignore_index:
+            out._index.name = objs[0]._index.name
+            out._index.names = objs[0]._index.names
+
+        return out
+
     def to_string(self):
         """
         Convert to string
@@ -7684,3 +7895,95 @@ def _drop_columns(df: DataFrame, columns: Iterable, errors: str):
                 pass
             else:
                 raise e
+
+
+# Create a dictionary of the common, non-null columns
+def _get_non_null_cols_and_dtypes(col_idxs, list_of_columns):
+    # A mapping of {idx: np.dtype}
+    dtypes = dict()
+    # A mapping of {idx: [...columns]}, where `[...columns]`
+    # is a list of columns with at least one valid value for each
+    # column name across all input frames
+    non_null_columns = dict()
+    for idx in col_idxs:
+        for cols in list_of_columns:
+            # Skip columns not in this frame
+            if idx >= len(cols) or cols[idx] is None:
+                continue
+            # Store the first dtype we find for a column, even if it's
+            # all-null. This ensures we always have at least one dtype
+            # for each name. This dtype will be overwritten later if a
+            # non-null Column with the same name is found.
+            if idx not in dtypes:
+                dtypes[idx] = cols[idx].dtype
+            if cols[idx].valid_count > 0:
+                if idx not in non_null_columns:
+                    non_null_columns[idx] = [cols[idx]]
+                else:
+                    non_null_columns[idx].append(cols[idx])
+    return non_null_columns, dtypes
+
+
+def _find_common_dtypes_and_categories(non_null_columns, dtypes):
+    # A mapping of {idx: categories}, where `categories` is a
+    # column of all the unique categorical values from each
+    # categorical column across all input frames
+    categories = dict()
+    for idx, cols in non_null_columns.items():
+        # default to the first non-null dtype
+        dtypes[idx] = cols[0].dtype
+        # If all the non-null dtypes are int/float, find a common dtype
+        if all(is_numerical_dtype(col.dtype) for col in cols):
+            dtypes[idx] = find_common_type([col.dtype for col in cols])
+        # If all categorical dtypes, combine the categories
+        elif all(
+            isinstance(col, cudf.core.column.CategoricalColumn) for col in cols
+        ):
+            # Combine and de-dupe the categories
+            categories[idx] = (
+                cudf.Series(concat_columns([col.categories for col in cols]))
+                .drop_duplicates(ignore_index=True)
+                ._column
+            )
+            # Set the column dtype to the codes' dtype. The categories
+            # will be re-assigned at the end
+            dtypes[idx] = min_scalar_type(len(categories[idx]))
+        # Otherwise raise an error if columns have different dtypes
+        elif not all(is_dtype_equal(c.dtype, dtypes[idx]) for c in cols):
+            raise ValueError("All columns must be the same type")
+    return categories
+
+
+def _cast_cols_to_common_dtypes(col_idxs, list_of_columns, dtypes, categories):
+    # Cast all columns to a common dtype, assign combined categories,
+    # and back-fill missing columns with all-null columns
+    for idx in col_idxs:
+        dtype = dtypes[idx]
+        for cols in list_of_columns:
+            # If column not in this df, fill with an all-null column
+            if idx >= len(cols) or cols[idx] is None:
+                n = len(next(x for x in cols if x is not None))
+                cols[idx] = column_empty(row_count=n, dtype=dtype, masked=True)
+            else:
+                # If column is categorical, rebase the codes with the
+                # combined categories, and cast the new codes to the
+                # min-scalar-sized dtype
+                if idx in categories:
+                    cols[idx] = (
+                        cols[idx]
+                        ._set_categories(categories[idx], is_unique=True,)
+                        .codes
+                    )
+                cols[idx] = cols[idx].astype(dtype)
+
+
+def _reassign_categories(categories, cols, col_idxs):
+    for name, idx in zip(cols, col_idxs):
+        if idx in categories:
+            cols[name] = build_categorical_column(
+                categories=categories[idx],
+                codes=as_column(cols[name].base_data, dtype=cols[name].dtype),
+                mask=cols[name].base_mask,
+                offset=cols[name].offset,
+                size=cols[name].size,
+            )
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index c27c9a6b349..6d8ecd2572f 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 import copy
-import functools
 import warnings
 from collections import abc
 from typing import (
@@ -26,27 +25,22 @@
 import cudf
 from cudf import _lib as libcudf
 from cudf._typing import ColumnLike, DataFrameOrSeries
-from cudf.api.types import is_dict_like, is_dtype_equal, issubdtype
+from cudf.api.types import is_dict_like, issubdtype
 from cudf.core.column import (
     ColumnBase,
     as_column,
     build_categorical_column,
     column_empty,
-    concat_columns,
 )
 from cudf.core.column_accessor import ColumnAccessor
 from cudf.core.join import merge
 from cudf.utils.dtypes import (
     _is_non_decimal_numeric_dtype,
     _is_scalar_or_zero_d_array,
-    find_common_type,
-    is_categorical_dtype,
     is_column_like,
     is_decimal_dtype,
     is_integer_dtype,
-    is_numerical_dtype,
     is_scalar,
-    min_scalar_type,
 )
 
 T = TypeVar("T", bound="Frame")
@@ -332,209 +326,6 @@ def copy(self: T, deep: bool = True) -> T:
 
         return new_frame
 
-    @classmethod
-    @annotate("CONCAT", color="orange", domain="cudf_python")
-    def _concat(
-        cls, objs, axis=0, join="outer", ignore_index=False, sort=False
-    ):
-        # flag to indicate at least one empty input frame also has an index
-        empty_has_index = False
-        # length of output frame's RangeIndex if all input frames are empty,
-        # and at least one has an index
-        result_index_length = 0
-        # the number of empty input frames
-        num_empty_input_frames = 0
-
-        for i, obj in enumerate(objs):
-            # shallow-copy the input DFs in case the same DF instance
-            # is concatenated with itself
-            objs[i] = obj.copy(deep=False)
-
-            # If ignore_index is true, determine if
-            # all or some objs are empty(and have index).
-            # 1. If all objects are empty(and have index), we
-            # should set the index separately using RangeIndex.
-            # 2. If some objects are empty(and have index), we
-            # create empty columns later while populating `columns`
-            # variable. Detailed explanation of second case before
-            # allocation of `columns` variable below.
-            if ignore_index and obj.empty:
-                num_empty_input_frames += 1
-                result_index_length += len(obj)
-                empty_has_index = empty_has_index or len(obj) > 0
-
-        if join == "inner":
-            sets_of_column_names = [set(obj._column_names) for obj in objs]
-
-            intersecting_columns = functools.reduce(
-                set.intersection, sets_of_column_names
-            )
-            union_of_columns = functools.reduce(
-                set.union, sets_of_column_names
-            )
-            non_intersecting_columns = union_of_columns.symmetric_difference(
-                intersecting_columns
-            )
-
-            # Get an ordered list of the intersecting columns to preserve input
-            # order, which is promised by pandas for inner joins.
-            ordered_intersecting_columns = [
-                name
-                for obj in objs
-                for name in obj._column_names
-                if name in intersecting_columns
-            ]
-
-            names = dict.fromkeys(ordered_intersecting_columns).keys()
-
-            if axis == 0:
-                if ignore_index and (
-                    num_empty_input_frames > 0
-                    or len(intersecting_columns) == 0
-                ):
-                    # When ignore_index is True and if there is
-                    # at least 1 empty dataframe and no
-                    # intersecting columns are present, an empty dataframe
-                    # needs to be returned just with an Index.
-                    empty_has_index = True
-                    num_empty_input_frames = len(objs)
-                    result_index_length = sum(len(obj) for obj in objs)
-
-                # remove columns not present in all objs
-                for obj in objs:
-                    obj.drop(
-                        columns=non_intersecting_columns,
-                        inplace=True,
-                        errors="ignore",
-                    )
-        elif join == "outer":
-            # Get a list of the unique table column names
-            names = [name for f in objs for name in f._column_names]
-            names = dict.fromkeys(names).keys()
-
-        else:
-            raise ValueError(
-                "Only can inner (intersect) or outer (union) when joining"
-                "the other axis"
-            )
-
-        if sort:
-            try:
-                # Sorted always returns a list, but will fail to sort if names
-                # include different types that are not comparable.
-                names = sorted(names)
-            except TypeError:
-                names = list(names)
-        else:
-            names = list(names)
-
-        # Combine the index and table columns for each Frame into a list of
-        # [...index_cols, ...table_cols].
-        #
-        # If any of the input frames have a non-empty index, include these
-        # columns in the list of columns to concatenate, even if the input
-        # frames are empty and `ignore_index=True`.
-        columns = [
-            (
-                []
-                if (ignore_index and not empty_has_index)
-                else list(f._index._data.columns)
-            )
-            + [f._data[name] if name in f._data else None for name in names]
-            for f in objs
-        ]
-
-        # Get a list of the combined index and table column indices
-        indices = list(range(functools.reduce(max, map(len, columns))))
-        # The position of the first table colum in each
-        # combined index + table columns list
-        first_data_column_position = len(indices) - len(names)
-
-        # Get the non-null columns and their dtypes
-        non_null_cols, dtypes = _get_non_null_cols_and_dtypes(indices, columns)
-
-        # Infer common dtypes between numeric columns
-        # and combine CategoricalColumn categories
-        categories = _find_common_dtypes_and_categories(non_null_cols, dtypes)
-
-        # Cast all columns to a common dtype, assign combined categories,
-        # and back-fill missing columns with all-null columns
-        _cast_cols_to_common_dtypes(indices, columns, dtypes, categories)
-
-        # Construct input tables with the index and data columns in the same
-        # order. This strips the given index/column names and replaces the
-        # names with their integer positions in the `cols` list
-        tables = []
-        for cols in columns:
-            table_index = None
-            if 1 == first_data_column_position:
-                table_index = cudf.core.index.as_index(cols[0])
-            elif first_data_column_position > 1:
-                table_index = libcudf.table.Table(
-                    data=dict(
-                        zip(
-                            indices[:first_data_column_position],
-                            cols[:first_data_column_position],
-                        )
-                    )
-                )
-            tables.append(
-                libcudf.table.Table(
-                    data=dict(
-                        zip(
-                            indices[first_data_column_position:],
-                            cols[first_data_column_position:],
-                        )
-                    ),
-                    index=table_index,
-                )
-            )
-
-        # Concatenate the Tables
-        out = cls._from_data(
-            *libcudf.concat.concat_tables(tables, ignore_index)
-        )
-
-        # If ignore_index is True, all input frames are empty, and at
-        # least one input frame has an index, assign a new RangeIndex
-        # to the result frame.
-        if empty_has_index and num_empty_input_frames == len(objs):
-            out._index = cudf.RangeIndex(result_index_length)
-        # Reassign the categories for any categorical table cols
-        _reassign_categories(
-            categories, out._data, indices[first_data_column_position:]
-        )
-
-        # Reassign the categories for any categorical index cols
-        if not isinstance(out._index, cudf.RangeIndex):
-            _reassign_categories(
-                categories,
-                out._index._data,
-                indices[:first_data_column_position],
-            )
-            if not isinstance(
-                out._index, cudf.MultiIndex
-            ) and is_categorical_dtype(out._index._values.dtype):
-                out = out.set_index(
-                    cudf.core.index.as_index(out.index._values)
-                )
-
-        # Reassign precision for any decimal cols
-        for name, col in out._data.items():
-            if isinstance(col, cudf.core.column.Decimal64Column):
-                col = col._with_type_metadata(tables[0]._data[name].dtype)
-
-        # Reassign index and column names
-        if isinstance(objs[0].columns, pd.MultiIndex):
-            out.columns = objs[0].columns
-        else:
-            out.columns = names
-        if not ignore_index:
-            out._index.name = objs[0]._index.name
-            out._index.names = objs[0]._index.names
-
-        return out
-
     def equals(self, other, **kwargs):
         """
         Test whether two objects contain the same elements.
@@ -4754,86 +4545,6 @@ def _get_replacement_values_for_columns(
     return all_na_columns, to_replace_columns, values_columns
 
 
-# Create a dictionary of the common, non-null columns
-def _get_non_null_cols_and_dtypes(col_idxs, list_of_columns):
-    # A mapping of {idx: np.dtype}
-    dtypes = dict()
-    # A mapping of {idx: [...columns]}, where `[...columns]`
-    # is a list of columns with at least one valid value for each
-    # column name across all input frames
-    non_null_columns = dict()
-    for idx in col_idxs:
-        for cols in list_of_columns:
-            # Skip columns not in this frame
-            if idx >= len(cols) or cols[idx] is None:
-                continue
-            # Store the first dtype we find for a column, even if it's
-            # all-null. This ensures we always have at least one dtype
-            # for each name. This dtype will be overwritten later if a
-            # non-null Column with the same name is found.
-            if idx not in dtypes:
-                dtypes[idx] = cols[idx].dtype
-            if cols[idx].valid_count > 0:
-                if idx not in non_null_columns:
-                    non_null_columns[idx] = [cols[idx]]
-                else:
-                    non_null_columns[idx].append(cols[idx])
-    return non_null_columns, dtypes
-
-
-def _find_common_dtypes_and_categories(non_null_columns, dtypes):
-    # A mapping of {idx: categories}, where `categories` is a
-    # column of all the unique categorical values from each
-    # categorical column across all input frames
-    categories = dict()
-    for idx, cols in non_null_columns.items():
-        # default to the first non-null dtype
-        dtypes[idx] = cols[0].dtype
-        # If all the non-null dtypes are int/float, find a common dtype
-        if all(is_numerical_dtype(col.dtype) for col in cols):
-            dtypes[idx] = find_common_type([col.dtype for col in cols])
-        # If all categorical dtypes, combine the categories
-        elif all(
-            isinstance(col, cudf.core.column.CategoricalColumn) for col in cols
-        ):
-            # Combine and de-dupe the categories
-            categories[idx] = (
-                cudf.Series(concat_columns([col.categories for col in cols]))
-                .drop_duplicates(ignore_index=True)
-                ._column
-            )
-            # Set the column dtype to the codes' dtype. The categories
-            # will be re-assigned at the end
-            dtypes[idx] = min_scalar_type(len(categories[idx]))
-        # Otherwise raise an error if columns have different dtypes
-        elif not all(is_dtype_equal(c.dtype, dtypes[idx]) for c in cols):
-            raise ValueError("All columns must be the same type")
-    return categories
-
-
-def _cast_cols_to_common_dtypes(col_idxs, list_of_columns, dtypes, categories):
-    # Cast all columns to a common dtype, assign combined categories,
-    # and back-fill missing columns with all-null columns
-    for idx in col_idxs:
-        dtype = dtypes[idx]
-        for cols in list_of_columns:
-            # If column not in this df, fill with an all-null column
-            if idx >= len(cols) or cols[idx] is None:
-                n = len(next(x for x in cols if x is not None))
-                cols[idx] = column_empty(row_count=n, dtype=dtype, masked=True)
-            else:
-                # If column is categorical, rebase the codes with the
-                # combined categories, and cast the new codes to the
-                # min-scalar-sized dtype
-                if idx in categories:
-                    cols[idx] = (
-                        cols[idx]
-                        ._set_categories(categories[idx], is_unique=True,)
-                        .codes
-                    )
-                cols[idx] = cols[idx].astype(dtype)
-
-
 def _reassign_categories(categories, cols, col_idxs):
     for name, idx in zip(cols, col_idxs):
         if idx in categories:

From fe4ab3b5dc32fd5c48e7bda858d05838f1dbeb20 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 13 Aug 2021 12:30:26 -0700
Subject: [PATCH 10/28] Move iter method.

---
 python/cudf/cudf/core/frame.py      | 12 ------------
 python/cudf/cudf/core/index.py      |  9 ---------
 python/cudf/cudf/core/multiindex.py |  9 +++++++++
 3 files changed, 9 insertions(+), 21 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 6d8ecd2572f..726769a8b1e 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -4545,18 +4545,6 @@ def _get_replacement_values_for_columns(
     return all_na_columns, to_replace_columns, values_columns
 
 
-def _reassign_categories(categories, cols, col_idxs):
-    for name, idx in zip(cols, col_idxs):
-        if idx in categories:
-            cols[name] = build_categorical_column(
-                categories=categories[idx],
-                codes=as_column(cols[name].base_data, dtype=cols[name].dtype),
-                mask=cols[name].base_mask,
-                offset=cols[name].offset,
-                size=cols[name].size,
-            )
-
-
 def _is_series(obj):
     """
     Checks if the `obj` is of type `cudf.Series`
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 0e15db00a16..0a624298173 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -120,15 +120,6 @@ def values(self):
     def get_loc(self, key, method=None, tolerance=None):
         raise NotImplementedError
 
-    def __iter__(self):
-        """
-        Iterating over a GPU object is not effecient and hence not supported.
-
-        Consider using ``.to_arrow()``, ``.to_pandas()`` or ``.values_host``
-        if you wish to iterate over the values.
-        """
-        cudf.utils.utils.raise_iteration_error(obj=self)
-
     def __getitem__(self, key):
         raise NotImplementedError()
 
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 4b595727252..0a4aafc3287 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -435,6 +435,15 @@ def deepcopy(self):
     def __copy__(self):
         return self.copy(deep=True)
 
+    def __iter__(self):
+        """
+        Iterating over a GPU object is not effecient and hence not supported.
+
+        Consider using ``.to_arrow()``, ``.to_pandas()`` or ``.values_host``
+        if you wish to iterate over the values.
+        """
+        cudf.utils.utils.raise_iteration_error(obj=self)
+
     def _popn(self, n):
         """ Returns a copy of this index without the left-most n values.
 

From feaafa7e135075c46cc8af44e92429dd3ea82ccc Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 13 Aug 2021 13:46:19 -0700
Subject: [PATCH 11/28] Instantiate accessors explicitly in necessary classes.

---
 python/cudf/cudf/core/dataframe.py | 3 ++-
 python/cudf/cudf/core/frame.py     | 6 ------
 python/cudf/cudf/core/index.py     | 4 +++-
 python/cudf/cudf/core/series.py    | 4 +++-
 4 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index c84e9c379d3..14e0718fe75 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -11,7 +11,7 @@
 import warnings
 from collections import defaultdict
 from collections.abc import Iterable, Sequence
-from typing import Any, MutableMapping, Optional, TypeVar
+from typing import Any, MutableMapping, Optional, Set, TypeVar
 
 import cupy
 import numpy as np
@@ -169,6 +169,7 @@ class DataFrame(Frame, Serializable, GetAttrGetItemMixin):
     """
 
     _PROTECTED_KEYS = frozenset(("_column_accessor", "_data", "_index"))
+    _accessors: Set[Any] = set()
 
     @annotate("DATAFRAME_INIT", color="blue", domain="cudf_python")
     def __init__(self, data=None, index=None, columns=None, dtype=None):
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 726769a8b1e..5830b9624f2 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -60,12 +60,6 @@ class Frame(libcudf.table.Table):
 
     _data: "ColumnAccessor"
 
-    @classmethod
-    def __init_subclass__(cls):
-        # All subclasses contain a set _accessors that is used to hold custom
-        # accessors defined by user APIs (see cudf/api/extensions/accessor.py).
-        cls._accessors = set()
-
     @classmethod
     def _from_data(
         cls,
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 0a624298173..6b5ee7148e2 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -10,6 +10,7 @@
     List,
     MutableMapping,
     Optional,
+    Set,
     Tuple,
     Type,
     Union,
@@ -93,6 +94,7 @@ class BaseIndex(Serializable):
     """Base class for all cudf Index types."""
 
     dtype: DtypeObj
+    _accessors: Set[Any] = set()
 
     def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
 
@@ -1436,7 +1438,7 @@ def __getattr__(self, key):
         # For methods that are not defined for RangeIndex we attempt to operate
         # on the corresponding integer index if possible.
         try:
-            return getattr(cudf.Index._from_data(self._data), key)
+            return getattr(cudf.Int64Index._from_data(self._data), key)
         except AttributeError:
             raise AttributeError(
                 f"'{type(self)}' object has no attribute {key}"
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 8a92f15021a..703ec8f6b88 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -7,7 +7,7 @@
 from collections import abc as abc
 from numbers import Number
 from shutil import get_terminal_size
-from typing import Any, MutableMapping, Optional
+from typing import Any, MutableMapping, Optional, Set
 from uuid import uuid4
 
 import cupy
@@ -106,6 +106,8 @@ class Series(SingleColumnFrame, Serializable):
         If ``False``, leaves ``np.nan`` values as is.
     """
 
+    _accessors: Set[Any] = set()
+
     # The `constructor*` properties are used by `dask` (and `dask_cudf`)
     @property
     def _constructor(self):

From 535e1f0a575d1e0562aa31f224c6a9fcf1160034 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 13 Aug 2021 15:35:30 -0700
Subject: [PATCH 12/28] Fix most errors.

---
 python/cudf/cudf/_lib/groupby.pyx           |  23 ++--
 python/cudf/cudf/_lib/utils.pyx             |   5 +-
 python/cudf/cudf/core/index.py              | 132 ++++++++++++++------
 python/cudf/cudf/core/join/_join_helpers.py |   6 +-
 python/cudf/cudf/core/multiindex.py         |  14 ++-
 5 files changed, 128 insertions(+), 52 deletions(-)

diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx
index ed9820300d8..a00f5140dcd 100644
--- a/python/cudf/cudf/_lib/groupby.pyx
+++ b/python/cudf/cudf/_lib/groupby.pyx
@@ -94,10 +94,12 @@ cdef class GroupBy:
         c_grouped_values = move(c_groups.values)
         c_group_offsets = c_groups.offsets
 
-        grouped_keys = cudf.Index._from_data(*data_from_unique_ptr(
-            move(c_grouped_keys),
-            column_names=range(c_grouped_keys.get()[0].num_columns())
-        ))
+        grouped_keys = cudf.core.index.GenericIndex._from_data(
+            *data_from_unique_ptr(
+                move(c_grouped_keys),
+                column_names=range(c_grouped_keys.get()[0].num_columns())
+            )
+        )
         grouped_values = data_from_unique_ptr(
             move(c_grouped_values),
             index_names=values._index_names,
@@ -216,7 +218,8 @@ cdef class GroupBy:
                     Column.from_unique_ptr(move(c_result.second[i].results[j]))
                 )
 
-        return result_data, cudf.Index._from_data(grouped_keys)
+        return result_data, cudf.core.index.GenericIndex._from_data(
+            grouped_keys)
 
     def shift(self, Table values, int periods, list fill_values):
         cdef table_view view = values.view()
@@ -241,10 +244,12 @@ cdef class GroupBy:
                 self.c_obj.get()[0].shift(view, offsets, c_fill_values)
             )
 
-        grouped_keys = cudf.Index._from_data(*data_from_unique_ptr(
-            move(c_result.first),
-            column_names=self.keys._column_names
-        ))
+        grouped_keys = cudf.core.index.GenericIndex._from_data(
+            *data_from_unique_ptr(
+                move(c_result.first),
+                column_names=self.keys._column_names
+            )
+        )
 
         shifted, _ = data_from_unique_ptr(
             move(c_result.second), column_names=values._column_names
diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx
index 81b62159b59..93cf72aa11b 100644
--- a/python/cudf/cudf/_lib/utils.pyx
+++ b/python/cudf/cudf/_lib/utils.pyx
@@ -251,7 +251,7 @@ cdef data_from_unique_ptr(
         # Frame factories we may want to look for a less dissonant approach
         # that does not impose performance penalties. The same applies to
         # data_from_table_view below.
-        cudf.Index._from_data(
+        cudf.core.index.GenericIndex._from_data(
             {
                 name: columns[i]
                 for i, name in enumerate(index_names)
@@ -301,7 +301,8 @@ cdef data_from_table_view(
                 )
             )
             column_idx += 1
-        index = cudf.Index._from_data(dict(zip(index_names, index_columns)))
+        index = cudf.core.index.GenericIndex._from_data(
+            dict(zip(index_names, index_columns)))
 
     # Construct the data dict
     cdef size_type source_column_idx = 0
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 6b5ee7148e2..d90aae82466 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1258,7 +1258,7 @@ def equals(self, other):
                 other._step,
             ):
                 return True
-        return cudf.Index._from_data(self._data).equals(other)
+        return cudf.Int64Index._from_data(self._data).equals(other)
 
     def serialize(self):
         header = {}
@@ -1445,11 +1445,65 @@ def __getattr__(self, key):
             )
 
     def get_loc(self, key, method=None, tolerance=None):
-        return cudf.Index._from_data(self._data).get_loc(
+        return cudf.Int64Index._from_data(self._data).get_loc(
             key, method=method, tolerance=tolerance
         )
 
 
+# Patch in all binops and unary ops, which bypass __getattr__ on the instance
+# and prevent the above overload from working.
+for binop in (
+    "__add__",
+    "__radd__",
+    "__sub__",
+    "__rsub__",
+    "__mul__",
+    "__rmul__",
+    "__mod__",
+    "__rmod__",
+    "__pow__",
+    "__rpow__",
+    "__floordiv__",
+    "__rfloordiv__",
+    "__truediv__",
+    "__rtruediv__",
+    "__and__",
+    "__or__",
+    "__xor__",
+    "__eq",
+    "__ne",
+    "__lt__",
+    "__le__",
+    "__gt__",
+    "__ge__",
+):
+    setattr(
+        RangeIndex,
+        binop,
+        (
+            lambda self, other: getattr(
+                cudf.Int64Index._from_data(self._data), binop
+            )(other)
+        ),
+    )
+
+
+for unaop in (
+    "__neg__",
+    "__pos__",
+    "__abs__",
+):
+    setattr(
+        RangeIndex,
+        binop,
+        (
+            lambda self: getattr(
+                cudf.Int64Index._from_data(self._data), binop
+            )()
+        ),
+    )
+
+
 class GenericIndex(SingleColumnFrame, BaseIndex):
     """
     An array of orderable values that represent the indices of another Column
@@ -1517,6 +1571,43 @@ def drop_duplicates(self, keep="first"):
         """  # noqa: E501
         return super().drop_duplicates(keep=keep)
 
+    @classmethod
+    def _from_data(
+        cls,
+        data: MutableMapping,
+        index: Optional[BaseIndex] = None,
+        name: Any = None,
+    ) -> BaseIndex:
+        assert index is None
+        if not isinstance(data, cudf.core.column_accessor.ColumnAccessor):
+            data = cudf.core.column_accessor.ColumnAccessor(data)
+        if len(data) == 0:
+            raise ValueError("Cannot construct Index from any empty Table")
+        if len(data) == 1:
+            values = next(iter(data.values()))
+
+            if isinstance(values, NumericalColumn):
+                try:
+                    index_class_type: Type[GenericIndex] = _dtype_to_index[
+                        values.dtype.type
+                    ]
+                except KeyError:
+                    index_class_type = GenericIndex
+                out = index_class_type.__new__(index_class_type)
+            elif isinstance(values, DatetimeColumn):
+                out = DatetimeIndex.__new__(DatetimeIndex)
+            elif isinstance(values, TimeDeltaColumn):
+                out = TimedeltaIndex.__new__(TimedeltaIndex)
+            elif isinstance(values, StringColumn):
+                out = StringIndex.__new__(StringIndex)
+            elif isinstance(values, CategoricalColumn):
+                out = CategoricalIndex.__new__(CategoricalIndex)
+            out._data = data
+            out._index = None
+            return out
+        else:
+            return cudf.MultiIndex._from_data(data)
+
     def _copy_type_metadata(
         self, other: Frame, include_index: bool = True
     ) -> GenericIndex:
@@ -3161,43 +3252,6 @@ def __new__(
 
         return as_index(data, copy=copy, dtype=dtype, name=name, **kwargs)
 
-    @classmethod
-    def _from_data(
-        cls,
-        data: MutableMapping,
-        index: Optional[BaseIndex] = None,
-        name: Any = None,
-    ) -> BaseIndex:
-        assert index is None
-        if not isinstance(data, cudf.core.column_accessor.ColumnAccessor):
-            data = cudf.core.column_accessor.ColumnAccessor(data)
-        if len(data) == 0:
-            raise ValueError("Cannot construct Index from any empty Table")
-        if len(data) == 1:
-            values = next(iter(data.values()))
-
-            if isinstance(values, NumericalColumn):
-                try:
-                    index_class_type: Type[GenericIndex] = _dtype_to_index[
-                        values.dtype.type
-                    ]
-                except KeyError:
-                    index_class_type = GenericIndex
-                out = index_class_type.__new__(index_class_type)
-            elif isinstance(values, DatetimeColumn):
-                out = DatetimeIndex.__new__(DatetimeIndex)
-            elif isinstance(values, TimeDeltaColumn):
-                out = TimedeltaIndex.__new__(TimedeltaIndex)
-            elif isinstance(values, StringColumn):
-                out = StringIndex.__new__(StringIndex)
-            elif isinstance(values, CategoricalColumn):
-                out = CategoricalIndex.__new__(CategoricalIndex)
-            out._data = data
-            out._index = None
-            return out
-        else:
-            return cudf.MultiIndex._from_data(data)
-
     @classmethod
     def from_arrow(cls, obj):
         try:
diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
index 78fc7a863d6..96c48722ee6 100644
--- a/python/cudf/cudf/core/join/_join_helpers.py
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -70,7 +70,11 @@ def _frame_select_by_indexers(
         else:
             data.set_by_label(idx.name, idx.get(frame), validate=False)
 
-    result_index = cudf.Index._from_data(index_data) if index_data else None
+    result_index = (
+        cudf.core.index.GenericIndex._from_data(index_data)
+        if index_data
+        else None
+    )
     result = cudf.core.frame.Frame(data=data, index=result_index)
     return result
 
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 0a4aafc3287..36422a2855c 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -1347,7 +1347,7 @@ def _poplevels(self, level):
             popped_data[n] = self._data.pop(n)
 
         # construct the popped result
-        popped = cudf.Index._from_data(popped_data)
+        popped = cudf.core.index.GenericIndex._from_data(popped_data)
         popped.names = popped_names
 
         # update self
@@ -1486,6 +1486,18 @@ def is_unique(self):
             )
         return self._is_unique
 
+    @property
+    def is_monotonic(self):
+        """Return boolean if values in the object are monotonic_increasing.
+
+        This property is an alias for :attr:`is_monotonic_increasing`.
+
+        Returns
+        -------
+        bool
+        """
+        return self.is_monotonic_increasing
+
     @property
     def is_monotonic_increasing(self):
         """

From 40066c58eedc16e6dcbbca294c929f34227837f1 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 13 Aug 2021 16:44:13 -0700
Subject: [PATCH 13/28] Force binding of binop arg.

---
 python/cudf/cudf/core/index.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index d90aae82466..e015b150084 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1481,7 +1481,7 @@ def get_loc(self, key, method=None, tolerance=None):
         RangeIndex,
         binop,
         (
-            lambda self, other: getattr(
+            lambda self, other, binop=binop: getattr(
                 cudf.Int64Index._from_data(self._data), binop
             )(other)
         ),

From 8ad1d7a6a05457b2d4d80fa729a011e51ab42221 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 13 Aug 2021 16:51:13 -0700
Subject: [PATCH 14/28] Fix typo in monkey-patched operator.

---
 python/cudf/cudf/core/index.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index e015b150084..069056bf45f 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1470,8 +1470,8 @@ def get_loc(self, key, method=None, tolerance=None):
     "__and__",
     "__or__",
     "__xor__",
-    "__eq",
-    "__ne",
+    "__eq__",
+    "__ne__",
     "__lt__",
     "__le__",
     "__gt__",

From 0eaca290541b7aeb89fb6f7e882dd190e7ac1fbc Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 13 Aug 2021 17:00:10 -0700
Subject: [PATCH 15/28] Minor cleanup.

---
 python/cudf/cudf/core/index.py | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 069056bf45f..f4b671a3b37 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1480,27 +1480,19 @@ def get_loc(self, key, method=None, tolerance=None):
     setattr(
         RangeIndex,
         binop,
-        (
-            lambda self, other, binop=binop: getattr(
-                cudf.Int64Index._from_data(self._data), binop
-            )(other)
-        ),
+        lambda self, other, binop=binop: getattr(
+            cudf.Int64Index._from_data(self._data), binop
+        )(other),
     )
 
 
-for unaop in (
-    "__neg__",
-    "__pos__",
-    "__abs__",
-):
+for unaop in ("__neg__", "__pos__", "__abs__"):
     setattr(
         RangeIndex,
         binop,
-        (
-            lambda self: getattr(
-                cudf.Int64Index._from_data(self._data), binop
-            )()
-        ),
+        lambda self, binop=binop: getattr(
+            cudf.Int64Index._from_data(self._data), binop
+        )(),
     )
 
 

From de03b37c8505ee0eb6dd234a5fff1b03f61c2600 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 13 Aug 2021 17:04:09 -0700
Subject: [PATCH 16/28] Standardize RangeIndex->Int64Index conversion.

---
 python/cudf/cudf/core/index.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index f4b671a3b37..f700a0961d3 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1434,18 +1434,23 @@ def __mul__(self, other):
             )
         return super().__mul__(other)
 
+    def _as_int64(self):
+        # Convert self to an Int64Index. This method is used to perform ops
+        # that are not defined directly on RangeIndex.
+        return cudf.Int64Index._from_data(self._data)
+
     def __getattr__(self, key):
         # For methods that are not defined for RangeIndex we attempt to operate
         # on the corresponding integer index if possible.
         try:
-            return getattr(cudf.Int64Index._from_data(self._data), key)
+            return getattr(self._as_int64(), key)
         except AttributeError:
             raise AttributeError(
                 f"'{type(self)}' object has no attribute {key}"
             )
 
     def get_loc(self, key, method=None, tolerance=None):
-        return cudf.Int64Index._from_data(self._data).get_loc(
+        return self._as_int64().get_loc(
             key, method=method, tolerance=tolerance
         )
 
@@ -1480,9 +1485,7 @@ def get_loc(self, key, method=None, tolerance=None):
     setattr(
         RangeIndex,
         binop,
-        lambda self, other, binop=binop: getattr(
-            cudf.Int64Index._from_data(self._data), binop
-        )(other),
+        lambda self, other, op=binop: getattr(self._as_int64(), op)(other),
     )
 
 
@@ -1490,9 +1493,7 @@ def get_loc(self, key, method=None, tolerance=None):
     setattr(
         RangeIndex,
         binop,
-        lambda self, binop=binop: getattr(
-            cudf.Int64Index._from_data(self._data), binop
-        )(),
+        lambda self, op=unaop: getattr(self._as_int64(), op)(),
     )
 
 

From e55e049d1db49e42cfbea36c988f55ea040d6976 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 13 Aug 2021 17:27:02 -0700
Subject: [PATCH 17/28] Add missing rmul operator.

---
 python/cudf/cudf/core/index.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index f700a0961d3..f8210c4f8c9 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1434,6 +1434,10 @@ def __mul__(self, other):
             )
         return super().__mul__(other)
 
+    def __rmul__(self, other):
+        # Multiplication is commutative.
+        return self.__mul__(other)
+
     def _as_int64(self):
         # Convert self to an Int64Index. This method is used to perform ops
         # that are not defined directly on RangeIndex.
@@ -1462,8 +1466,6 @@ def get_loc(self, key, method=None, tolerance=None):
     "__radd__",
     "__sub__",
     "__rsub__",
-    "__mul__",
-    "__rmul__",
     "__mod__",
     "__rmod__",
     "__pow__",

From 9c012eca0c651b072fd60fc1d035894adef26a93 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 16 Aug 2021 09:11:52 -0700
Subject: [PATCH 18/28] Standardize _from_data implementation and move the
 type-determining version out of the loop.

---
 python/cudf/cudf/_lib/groupby.pyx           |  6 +-
 python/cudf/cudf/_lib/utils.pyx             |  4 +-
 python/cudf/cudf/core/index.py              | 64 +++++++++------------
 python/cudf/cudf/core/join/_join_helpers.py |  4 +-
 python/cudf/cudf/core/multiindex.py         | 16 ++++--
 5 files changed, 45 insertions(+), 49 deletions(-)

diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx
index a00f5140dcd..b137739ef3d 100644
--- a/python/cudf/cudf/_lib/groupby.pyx
+++ b/python/cudf/cudf/_lib/groupby.pyx
@@ -94,7 +94,7 @@ cdef class GroupBy:
         c_grouped_values = move(c_groups.values)
         c_group_offsets = c_groups.offsets
 
-        grouped_keys = cudf.core.index.GenericIndex._from_data(
+        grouped_keys = cudf.core.index._index_from_data(
             *data_from_unique_ptr(
                 move(c_grouped_keys),
                 column_names=range(c_grouped_keys.get()[0].num_columns())
@@ -218,7 +218,7 @@ cdef class GroupBy:
                     Column.from_unique_ptr(move(c_result.second[i].results[j]))
                 )
 
-        return result_data, cudf.core.index.GenericIndex._from_data(
+        return result_data, cudf.core.index._index_from_data(
             grouped_keys)
 
     def shift(self, Table values, int periods, list fill_values):
@@ -244,7 +244,7 @@ cdef class GroupBy:
                 self.c_obj.get()[0].shift(view, offsets, c_fill_values)
             )
 
-        grouped_keys = cudf.core.index.GenericIndex._from_data(
+        grouped_keys = cudf.core.index._index_from_data(
             *data_from_unique_ptr(
                 move(c_result.first),
                 column_names=self.keys._column_names
diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx
index 93cf72aa11b..50fdaa85faa 100644
--- a/python/cudf/cudf/_lib/utils.pyx
+++ b/python/cudf/cudf/_lib/utils.pyx
@@ -251,7 +251,7 @@ cdef data_from_unique_ptr(
         # Frame factories we may want to look for a less dissonant approach
         # that does not impose performance penalties. The same applies to
         # data_from_table_view below.
-        cudf.core.index.GenericIndex._from_data(
+        cudf.core.index._index_from_data(
             {
                 name: columns[i]
                 for i, name in enumerate(index_names)
@@ -301,7 +301,7 @@ cdef data_from_table_view(
                 )
             )
             column_idx += 1
-        index = cudf.core.index.GenericIndex._from_data(
+        index = cudf.core.index._index_from_data(
             dict(zip(index_names, index_columns)))
 
     # Construct the data dict
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index f8210c4f8c9..b9cb0118c3d 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -90,6 +90,33 @@ def _lexsorted_equal_range(
     return lower_bound, upper_bound, sort_inds
 
 
+def _index_from_data(data: MutableMapping, name: Any = None):
+    """Construct an index of the appropriate type from some data."""
+    if len(data) == 0:
+        raise ValueError("Cannot construct Index from any empty Table")
+    if len(data) == 1:
+        values = next(iter(data.values()))
+
+        if isinstance(values, NumericalColumn):
+            try:
+                index_class_type: Type[
+                    Union[GenericIndex, cudf.MultiIndex]
+                ] = _dtype_to_index[values.dtype.type]
+            except KeyError:
+                index_class_type = GenericIndex
+        elif isinstance(values, DatetimeColumn):
+            index_class_type = DatetimeIndex
+        elif isinstance(values, TimeDeltaColumn):
+            index_class_type = TimedeltaIndex
+        elif isinstance(values, StringColumn):
+            index_class_type = StringIndex
+        elif isinstance(values, CategoricalColumn):
+            index_class_type = CategoricalIndex
+    else:
+        index_class_type = cudf.MultiIndex
+    return index_class_type._from_data(data, None, name)
+
+
 class BaseIndex(Serializable):
     """Base class for all cudf Index types."""
 
@@ -1566,43 +1593,6 @@ def drop_duplicates(self, keep="first"):
         """  # noqa: E501
         return super().drop_duplicates(keep=keep)
 
-    @classmethod
-    def _from_data(
-        cls,
-        data: MutableMapping,
-        index: Optional[BaseIndex] = None,
-        name: Any = None,
-    ) -> BaseIndex:
-        assert index is None
-        if not isinstance(data, cudf.core.column_accessor.ColumnAccessor):
-            data = cudf.core.column_accessor.ColumnAccessor(data)
-        if len(data) == 0:
-            raise ValueError("Cannot construct Index from any empty Table")
-        if len(data) == 1:
-            values = next(iter(data.values()))
-
-            if isinstance(values, NumericalColumn):
-                try:
-                    index_class_type: Type[GenericIndex] = _dtype_to_index[
-                        values.dtype.type
-                    ]
-                except KeyError:
-                    index_class_type = GenericIndex
-                out = index_class_type.__new__(index_class_type)
-            elif isinstance(values, DatetimeColumn):
-                out = DatetimeIndex.__new__(DatetimeIndex)
-            elif isinstance(values, TimeDeltaColumn):
-                out = TimedeltaIndex.__new__(TimedeltaIndex)
-            elif isinstance(values, StringColumn):
-                out = StringIndex.__new__(StringIndex)
-            elif isinstance(values, CategoricalColumn):
-                out = CategoricalIndex.__new__(CategoricalIndex)
-            out._data = data
-            out._index = None
-            return out
-        else:
-            return cudf.MultiIndex._from_data(data)
-
     def _copy_type_metadata(
         self, other: Frame, include_index: bool = True
     ) -> GenericIndex:
diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
index 96c48722ee6..1d1f661779f 100644
--- a/python/cudf/cudf/core/join/_join_helpers.py
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -71,9 +71,7 @@ def _frame_select_by_indexers(
             data.set_by_label(idx.name, idx.get(frame), validate=False)
 
     result_index = (
-        cudf.core.index.GenericIndex._from_data(index_data)
-        if index_data
-        else None
+        cudf.core.index._index_from_data(index_data) if index_data else None
     )
     result = cudf.core.frame.Frame(data=data, index=result_index)
     return result
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 36422a2855c..8ee07e3a4d7 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -6,7 +6,7 @@
 import pickle
 import warnings
 from collections.abc import Sequence
-from typing import Any, List, Mapping, Tuple, Union
+from typing import Any, List, MutableMapping, Optional, Tuple, Union
 
 import cupy
 import numpy as np
@@ -289,9 +289,17 @@ def set_names(self, names, level=None, inplace=False):
         return self._set_names(names=names, inplace=inplace)
 
     @classmethod
-    def _from_data(cls, data: Mapping, index=None) -> MultiIndex:
+    def _from_data(
+        cls,
+        data: MutableMapping,
+        index: Optional[cudf.core.index.BaseIndex] = None,
+        name: Any = None,
+    ) -> MultiIndex:
         assert index is None
-        return cls.from_frame(cudf.DataFrame._from_data(data))
+        obj = cls.from_frame(cudf.DataFrame._from_data(data))
+        if name is not None:
+            obj.name = name
+        return obj
 
     @property
     def shape(self):
@@ -1347,7 +1355,7 @@ def _poplevels(self, level):
             popped_data[n] = self._data.pop(n)
 
         # construct the popped result
-        popped = cudf.core.index.GenericIndex._from_data(popped_data)
+        popped = cudf.core.index._index_from_data(popped_data)
         popped.names = popped_names
 
         # update self

From 23e3d7ccf9633fdb6678b2ba7070c74e32e4ce1f Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 16 Aug 2021 10:17:53 -0700
Subject: [PATCH 19/28] Make as_index rely on _index_from_data when passed a
 column.

---
 python/cudf/cudf/core/index.py | 33 +++++++++++++--------------------
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index b9cb0118c3d..3abfbfe88ef 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -112,6 +112,8 @@ def _index_from_data(data: MutableMapping, name: Any = None):
             index_class_type = StringIndex
         elif isinstance(values, CategoricalColumn):
             index_class_type = CategoricalIndex
+        elif isinstance(values, IntervalColumn):
+            index_class_type = IntervalIndex
     else:
         index_class_type = cudf.MultiIndex
     return index_class_type._from_data(data, None, name)
@@ -1684,7 +1686,8 @@ def copy(self, name=None, deep=False, dtype=None, names=None):
         dtype = self.dtype if dtype is None else dtype
         name = self.name if name is None else name
 
-        return as_index(self._values.astype(dtype), name=name, copy=deep)
+        col = self._values.astype(dtype)
+        return _index_from_data({name: col.copy(True) if deep else col})
 
     def get_loc(self, key, method=None, tolerance=None):
         """Get integer location, slice or boolean mask for requested label.
@@ -3117,31 +3120,21 @@ def as_index(arbitrary, **kwargs) -> BaseIndex:
         idx = arbitrary.copy(deep=False)
         idx.rename(kwargs["name"], inplace=True)
         return idx
-    elif isinstance(arbitrary, NumericalColumn):
-        try:
-            return _dtype_to_index[arbitrary.dtype.type](arbitrary, **kwargs)
-        except KeyError:
-            return GenericIndex(arbitrary, **kwargs)
-    elif isinstance(arbitrary, StringColumn):
-        return StringIndex(arbitrary, **kwargs)
-    elif isinstance(arbitrary, DatetimeColumn):
-        return DatetimeIndex(arbitrary, **kwargs)
-    elif isinstance(arbitrary, TimeDeltaColumn):
-        return TimedeltaIndex(arbitrary, **kwargs)
-    elif isinstance(arbitrary, CategoricalColumn):
-        return CategoricalIndex(arbitrary, **kwargs)
-    elif isinstance(arbitrary, IntervalColumn):
-        return IntervalIndex(arbitrary, **kwargs)
+    elif isinstance(arbitrary, ColumnBase):
+        return _index_from_data({kwargs.get("name", None): arbitrary})
     elif isinstance(arbitrary, cudf.Series):
         return as_index(arbitrary._column, **kwargs)
-    elif isinstance(arbitrary, pd.RangeIndex):
-        return RangeIndex(start=arbitrary.start, stop=arbitrary.stop, **kwargs)
+    elif isinstance(arbitrary, (pd.RangeIndex, range)):
+        return RangeIndex(
+            start=arbitrary.start,
+            stop=arbitrary.stop,
+            step=arbitrary.step,
+            **kwargs,
+        )
     elif isinstance(arbitrary, pd.MultiIndex):
         return cudf.MultiIndex.from_pandas(arbitrary)
     elif isinstance(arbitrary, cudf.DataFrame):
         return cudf.MultiIndex(source_data=arbitrary)
-    elif isinstance(arbitrary, range):
-        return RangeIndex(arbitrary, **kwargs)
     return as_index(
         column.as_column(arbitrary, dtype=kwargs.get("dtype", None)), **kwargs
     )

From ab5124074795d1abeffe3e9e4a23c70f83ab05cb Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 16 Aug 2021 10:23:29 -0700
Subject: [PATCH 20/28] Fix minor typo.

---
 python/cudf/cudf/core/index.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 3abfbfe88ef..e1b4a6ea4d3 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -3220,9 +3220,9 @@ def __new__(
         tupleize_cols=True,
         **kwargs,
     ):
-        assert cls is Index, (
-            "Index cannot be subclassed, extend BaseIndex " "instead."
-        )
+        assert (
+            cls is Index
+        ), "Index cannot be subclassed, extend BaseIndex instead."
         if tupleize_cols is not True:
             raise NotImplementedError(
                 "tupleize_cols != True is not yet supported"

From 899bd99b03ef44412b4a4d05d89a06a6c74d235e Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 16 Aug 2021 14:53:57 -0700
Subject: [PATCH 21/28] Allow index binops to return different output than
 input data type and make RangeIndex.__mul__ convert to Int64Index when
 necessary.

---
 python/cudf/cudf/core/frame.py  | 51 ++++++++++++++++++---------------
 python/cudf/cudf/core/index.py  | 21 +++++++++++++-
 python/cudf/cudf/core/series.py | 10 +++++--
 3 files changed, 56 insertions(+), 26 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 5830b9624f2..7b2dcecc1e0 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -3230,6 +3230,26 @@ def _binaryop(
         *args,
         **kwargs,
     ) -> Frame:
+        """Perform a binary operation between two frames.
+
+        Parameters
+        ----------
+        other : Frame
+            The second operand.
+        fn : str
+            The operation to perform.
+        fill_value : Any, default None
+            The value to replace null values with. If ``None``, nulls are not
+            filled before the operation.
+        reflect : bool, default False
+            If ``True`` the operation is reflected (i.e whether to swap the
+            left and right operands).
+
+        Returns
+        -------
+        Frame
+            A new instance containing the result of the operation.
+        """
         raise NotImplementedError
 
     @classmethod
@@ -3256,8 +3276,8 @@ def _colwise_binop(
 
         Returns
         -------
-        Frame
-            A subclass of Frame constructed from the result of performing the
+        Dict[ColumnBase]
+            A dict of columns constructed from the result of performing the
             requested operation on the operands.
         """
 
@@ -4318,39 +4338,32 @@ def factorize(self, na_sentinel=-1):
         """
         return cudf.core.algorithms.factorize(self, na_sentinel=na_sentinel)
 
-    def _binaryop(
+    def _make_operands_for_binop(
         self,
         other: T,
-        fn: str,
         fill_value: Any = None,
         reflect: bool = False,
         *args,
         **kwargs,
-    ) -> SingleColumnFrame:
-        """Perform a binary operation between two single column frames.
+    ) -> Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]]:
+        """Generate the dictionary of operands used for a binary operation.
 
         Parameters
         ----------
         other : SingleColumnFrame
             The second operand.
-        fn : str
-            The operation
         fill_value : Any, default None
             The value to replace null values with. If ``None``, nulls are not
             filled before the operation.
         reflect : bool, default False
             If ``True`` the operation is reflected (i.e whether to swap the
             left and right operands).
-        lhs : SingleColumnFrame, default None
-            The left hand operand. If ``None``, self is used. This parameter
-            allows child classes to preprocess the inputs if necessary.
 
         Returns
         -------
-        SingleColumnFrame
-            A new instance containing the result of the operation.
+        Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]]
+            The operands to be passed to _colwise_binop.
         """
-
         # Get the appropriate name for output operations involving two objects
         # that are Series-like objects. The output shares the lhs's name unless
         # the rhs is a _differently_ named Series-like object.
@@ -4372,15 +4385,7 @@ def _binaryop(
             except Exception:
                 return NotImplemented
 
-        operands: Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]] = {
-            result_name: (self._column, other, reflect, fill_value)
-        }
-
-        return self._from_data(
-            data=type(self)._colwise_binop(operands, fn),
-            index=self._index,
-            name=result_name,
-        )
+        return {result_name: (self._column, other, reflect, fill_value)}
 
 
 def _get_replacement_values_for_columns(
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index e1b4a6ea4d3..f4ce3b54c45 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -13,6 +13,7 @@
     Set,
     Tuple,
     Type,
+    TypeVar,
     Union,
 )
 
@@ -64,6 +65,8 @@
 )
 from cudf.utils.utils import cached_property, search_range
 
+T = TypeVar("T", bound="Frame")
+
 
 def _lexsorted_equal_range(
     idx: Union[GenericIndex, cudf.MultiIndex],
@@ -1461,7 +1464,7 @@ def __mul__(self, other):
             return RangeIndex(
                 self.start * other, self.stop * other, self.step * other
             )
-        return super().__mul__(other)
+        return self._as_int64().__mul__(other)
 
     def __rmul__(self, other):
         # Multiplication is commutative.
@@ -1595,6 +1598,22 @@ def drop_duplicates(self, keep="first"):
         """  # noqa: E501
         return super().drop_duplicates(keep=keep)
 
+    def _binaryop(
+        self,
+        other: T,
+        fn: str,
+        fill_value: Any = None,
+        reflect: bool = False,
+        *args,
+        **kwargs,
+    ) -> SingleColumnFrame:
+        # Specialize binops to generate the appropriate output index type.
+        return _index_from_data(
+            data=self._colwise_binop(
+                self._make_operands_for_binop(other, fill_value, reflect), fn
+            ),
+        )
+
     def _copy_type_metadata(
         self, other: Frame, include_index: bool = True
     ) -> GenericIndex:
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 703ec8f6b88..51572e9fa87 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1336,6 +1336,7 @@ def _binaryop(
         *args,
         **kwargs,
     ):
+        # Specialize binops to align indices.
         if isinstance(other, SingleColumnFrame):
             if (
                 # TODO: The can_reindex logic also needs to be applied for
@@ -1358,8 +1359,13 @@ def _binaryop(
         else:
             lhs = self
 
-        # Note that we call the super on lhs, not self.
-        return super(Series, lhs)._binaryop(other, fn, fill_value, reflect)
+        return lhs._from_data(
+            data=lhs._colwise_binop(
+                lhs._make_operands_for_binop(other, fill_value, reflect, lhs),
+                fn,
+            ),
+            index=lhs._index,
+        )
 
     def add(self, other, fill_value=None, axis=0):
         """

From c4acc92d61e4061e44c6ac8811490ee5af31e3ea Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 17 Aug 2021 10:43:21 -0700
Subject: [PATCH 22/28] Forward NotImplemented through correctly.

---
 python/cudf/cudf/core/index.py  |  9 +++++----
 python/cudf/cudf/core/series.py | 13 +++++++------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index f4ce3b54c45..7d4a73fd794 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1608,10 +1608,11 @@ def _binaryop(
         **kwargs,
     ) -> SingleColumnFrame:
         # Specialize binops to generate the appropriate output index type.
-        return _index_from_data(
-            data=self._colwise_binop(
-                self._make_operands_for_binop(other, fill_value, reflect), fn
-            ),
+        operands = self._make_operands_for_binop(other, fill_value, reflect)
+        return (
+            _index_from_data(data=self._colwise_binop(operands, fn),)
+            if operands is not NotImplemented
+            else NotImplemented
         )
 
     def _copy_type_metadata(
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 51572e9fa87..601f29aa32f 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1359,12 +1359,13 @@ def _binaryop(
         else:
             lhs = self
 
-        return lhs._from_data(
-            data=lhs._colwise_binop(
-                lhs._make_operands_for_binop(other, fill_value, reflect, lhs),
-                fn,
-            ),
-            index=lhs._index,
+        operands = lhs._make_operands_for_binop(other, fill_value, reflect)
+        return (
+            lhs._from_data(
+                data=lhs._colwise_binop(operands, fn), index=lhs._index,
+            )
+            if operands is not NotImplemented
+            else NotImplemented
         )
 
     def add(self, other, fill_value=None, axis=0):

From 1e6d0e2693b09e55f9bc7f8fe5f4a3547f796c92 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 23 Aug 2021 17:21:04 -0700
Subject: [PATCH 23/28] Fix newly introduced use of Index._from_data from
 upstream merging.

---
 python/cudf/cudf/_lib/groupby.pyx | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx
index e427c321d2b..153b116cd33 100644
--- a/python/cudf/cudf/_lib/groupby.pyx
+++ b/python/cudf/cudf/_lib/groupby.pyx
@@ -188,7 +188,8 @@ cdef class GroupBy:
                     Column.from_unique_ptr(move(c_result.second[i].results[j]))
                 )
 
-        return result_data, cudf.Index._from_data(grouped_keys)
+        return result_data, cudf.core.index._index_from_data(
+            grouped_keys)
 
     def scan_internal(self, Table values, aggregations):
         from cudf.core.column_accessor import ColumnAccessor

From fd78b637f7265c0ce197f0c83d0d085a66d4f1ba Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 23 Aug 2021 17:22:51 -0700
Subject: [PATCH 24/28] Move BaseIndex into a separate module.

---
 python/cudf/cudf/core/_base_index.py | 966 +++++++++++++++++++++++++++
 python/cudf/cudf/core/index.py       | 959 +-------------------------
 2 files changed, 968 insertions(+), 957 deletions(-)
 create mode 100644 python/cudf/cudf/core/_base_index.py

diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
new file mode 100644
index 00000000000..83f71ef27c1
--- /dev/null
+++ b/python/cudf/cudf/core/_base_index.py
@@ -0,0 +1,966 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
+from __future__ import annotations, division, print_function
+
+import pickle
+from typing import Any, Set
+
+import cupy
+import pandas as pd
+
+import cudf
+from cudf._typing import DtypeObj
+from cudf.api.types import is_dtype_equal, is_integer
+from cudf.core.abc import Serializable
+from cudf.core.column import ColumnBase, column
+from cudf.utils import ioutils
+from cudf.utils.dtypes import (
+    is_list_like,
+    is_mixed_with_object_dtype,
+    is_scalar,
+    numeric_normalize_types,
+)
+from cudf.utils.utils import cached_property
+
+
+class BaseIndex(Serializable):
+    """Base class for all cudf Index types."""
+
+    dtype: DtypeObj
+    _accessors: Set[Any] = set()
+
+    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
+
+        if method == "__call__" and hasattr(cudf, ufunc.__name__):
+            func = getattr(cudf, ufunc.__name__)
+            return func(*inputs)
+        else:
+            return NotImplemented
+
+    @cached_property
+    def _values(self) -> ColumnBase:
+        raise NotImplementedError
+
+    @property
+    def _data(self) -> cudf.core.column_accessor.ColumnAccessor:
+        raise NotImplementedError
+
+    def copy(self, deep: bool = True) -> BaseIndex:
+        raise NotImplementedError
+
+    @property
+    def values(self):
+        return self._values.values
+
+    def get_loc(self, key, method=None, tolerance=None):
+        raise NotImplementedError
+
+    def __getitem__(self, key):
+        raise NotImplementedError()
+
+    def serialize(self):
+        header = {}
+        header["index_column"] = {}
+        # store metadata values of index separately
+        # Indexes: Numerical/DateTime/String are often GPU backed
+        header["index_column"], frames = self._values.serialize()
+
+        header["name"] = pickle.dumps(self.name)
+        header["dtype"] = pickle.dumps(self.dtype)
+        header["type-serialized"] = pickle.dumps(type(self))
+        header["frame_count"] = len(frames)
+        return header, frames
+
+    def __contains__(self, item):
+        return item in self._values
+
+    def get_level_values(self, level):
+        """
+        Return an Index of values for requested level.
+
+        This is primarily useful to get an individual level of values from a
+        MultiIndex, but is provided on Index as well for compatibility.
+
+        Parameters
+        ----------
+        level : int or str
+            It is either the integer position or the name of the level.
+
+        Returns
+        -------
+        Index
+            Calling object, as there is only one level in the Index.
+
+        See Also
+        --------
+        cudf.core.multiindex.MultiIndex.get_level_values : Get values for
+            a level of a MultiIndex.
+
+        Notes
+        -----
+        For Index, level should be 0, since there are no multiple levels.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> idx = cudf.Index(["a", "b", "c"])
+        >>> idx.get_level_values(0)
+        StringIndex(['a' 'b' 'c'], dtype='object')
+        """
+
+        if level == self.name:
+            return self
+        elif is_integer(level):
+            if level != 0:
+                raise IndexError(
+                    f"Cannot get level: {level} " f"for index with 1 level"
+                )
+            return self
+        else:
+            raise KeyError(f"Requested level with name {level} " "not found")
+
+    @classmethod
+    def deserialize(cls, header, frames):
+        h = header["index_column"]
+        idx_typ = pickle.loads(header["type-serialized"])
+        name = pickle.loads(header["name"])
+
+        col_typ = pickle.loads(h["type-serialized"])
+        index = col_typ.deserialize(h, frames[: header["frame_count"]])
+        return idx_typ(index, name=name)
+
+    @property
+    def names(self):
+        """
+        Returns a tuple containing the name of the Index.
+        """
+        return (self.name,)
+
+    @names.setter
+    def names(self, values):
+        if not is_list_like(values):
+            raise ValueError("Names must be a list-like")
+
+        num_values = len(values)
+        if num_values > 1:
+            raise ValueError(
+                "Length of new names must be 1, got %d" % num_values
+            )
+
+        self.name = values[0]
+
+    def _clean_nulls_from_index(self):
+        """
+        Convert all na values(if any) in Index object
+        to `<NA>` as a preprocessing step to `__repr__` methods.
+
+        This will involve changing type of Index object
+        to StringIndex but it is the responsibility of the `__repr__`
+        methods using this method to replace or handle representation
+        of the actual types correctly.
+        """
+        if self._values.has_nulls:
+            return cudf.Index(
+                self._values.astype("str").fillna(cudf._NA_REP), name=self.name
+            )
+        else:
+            return self
+
+    @property
+    def nlevels(self):
+        """
+        Number of levels.
+        """
+        return 1
+
+    def _set_names(self, names, inplace=False):
+        if inplace:
+            idx = self
+        else:
+            idx = self.copy(deep=False)
+
+        idx.names = names
+        if not inplace:
+            return idx
+
+    def set_names(self, names, level=None, inplace=False):
+        """
+        Set Index or MultiIndex name.
+        Able to set new names partially and by level.
+
+        Parameters
+        ----------
+        names : label or list of label
+            Name(s) to set.
+        level : int, label or list of int or label, optional
+            If the index is a MultiIndex, level(s) to set (None for all
+            levels). Otherwise level must be None.
+        inplace : bool, default False
+            Modifies the object directly, instead of creating a new Index or
+            MultiIndex.
+
+        Returns
+        -------
+        Index
+            The same type as the caller or None if inplace is True.
+
+        See Also
+        --------
+        cudf.Index.rename : Able to set new names without level.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> idx = cudf.Index([1, 2, 3, 4])
+        >>> idx
+        Int64Index([1, 2, 3, 4], dtype='int64')
+        >>> idx.set_names('quarter')
+        Int64Index([1, 2, 3, 4], dtype='int64', name='quarter')
+        >>> idx = cudf.MultiIndex.from_product([['python', 'cobra'],
+        ... [2018, 2019]])
+        >>> idx
+        MultiIndex([('python', 2018),
+                    ('python', 2019),
+                    ( 'cobra', 2018),
+                    ( 'cobra', 2019)],
+                   )
+        >>> idx.names
+        FrozenList([None, None])
+        >>> idx.set_names(['kind', 'year'], inplace=True)
+        >>> idx.names
+        FrozenList(['kind', 'year'])
+        >>> idx.set_names('species', level=0, inplace=True)
+        >>> idx.names
+        FrozenList(['species', 'year'])
+        """
+        if level is not None:
+            raise ValueError("Level must be None for non-MultiIndex")
+
+        if not is_list_like(names):
+            names = [names]
+
+        return self._set_names(names=names, inplace=inplace)
+
+    def fillna(self, value, downcast=None):
+        """
+        Fill null values with the specified value.
+
+        Parameters
+        ----------
+        value : scalar
+            Scalar value to use to fill nulls. This value cannot be a
+            list-likes.
+
+        downcast : dict, default is None
+            This Parameter is currently NON-FUNCTIONAL.
+
+        Returns
+        -------
+        filled : Index
+
+        Examples
+        --------
+        >>> import cudf
+        >>> index = cudf.Index([1, 2, None, 4])
+        >>> index
+        Int64Index([1, 2, null, 4], dtype='int64')
+        >>> index.fillna(3)
+        Int64Index([1, 2, 3, 4], dtype='int64')
+        """
+        if downcast is not None:
+            raise NotImplementedError(
+                "`downcast` parameter is not yet supported"
+            )
+
+        return super().fillna(value=value)
+
+    def take(self, indices):
+        """Gather only the specific subset of indices
+
+        Parameters
+        ----------
+        indices: An array-like that maps to values contained in this Index.
+        """
+        return self[indices]
+
+    def argsort(self, ascending=True, **kwargs):
+        """
+        Return the integer indices that would sort the index.
+
+        Parameters
+        ----------
+        ascending : bool, default True
+            If True, returns the indices for ascending order.
+            If False, returns the indices for descending order.
+
+        Returns
+        -------
+        array : A cupy array containing Integer indices that
+            would sort the index if used as an indexer.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> index = cudf.Index([10, 100, 1, 1000])
+        >>> index
+        Int64Index([10, 100, 1, 1000], dtype='int64')
+        >>> index.argsort()
+        array([2, 0, 1, 3], dtype=int32)
+
+        The order of argsort can be reversed using
+        ``ascending`` parameter, by setting it to ``False``.
+        >>> index.argsort(ascending=False)
+        array([3, 1, 0, 2], dtype=int32)
+
+        ``argsort`` on a MultiIndex:
+
+        >>> index = cudf.MultiIndex(
+        ...      levels=[[1, 3, 4, -10], [1, 11, 5]],
+        ...      codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+        ...      names=["x", "y"],
+        ... )
+        >>> index
+        MultiIndex([(  1,  1),
+                    (  1,  5),
+                    (  3, 11),
+                    (  4, 11),
+                    (-10,  1)],
+                   names=['x', 'y'])
+        >>> index.argsort()
+        array([4, 0, 1, 2, 3], dtype=int32)
+        >>> index.argsort(ascending=False)
+        array([3, 2, 1, 0, 4], dtype=int32)
+        """
+        indices = self._values.argsort(ascending=ascending, **kwargs)
+        return cupy.asarray(indices)
+
+    def to_frame(self, index=True, name=None):
+        """Create a DataFrame with a column containing this Index
+
+        Parameters
+        ----------
+        index : boolean, default True
+            Set the index of the returned DataFrame as the original Index
+        name : str, default None
+            Name to be used for the column
+
+        Returns
+        -------
+        DataFrame
+            cudf DataFrame
+        """
+
+        if name is not None:
+            col_name = name
+        elif self.name is None:
+            col_name = 0
+        else:
+            col_name = self.name
+        return cudf.DataFrame(
+            {col_name: self._values}, index=self if index else None
+        )
+
+    def any(self):
+        """
+        Return whether any elements is True in Index.
+        """
+        return self._values.any()
+
+    def to_pandas(self):
+        """
+        Convert to a Pandas Index.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> idx = cudf.Index([-3, 10, 15, 20])
+        >>> idx
+        Int64Index([-3, 10, 15, 20], dtype='int64')
+        >>> idx.to_pandas()
+        Int64Index([-3, 10, 15, 20], dtype='int64')
+        >>> type(idx.to_pandas())
+        <class 'pandas.core.indexes.numeric.Int64Index'>
+        >>> type(idx)
+        <class 'cudf.core.index.GenericIndex'>
+        """
+        return pd.Index(self._values.to_pandas(), name=self.name)
+
+    @ioutils.doc_to_dlpack()
+    def to_dlpack(self):
+        """{docstring}"""
+
+        return cudf.io.dlpack.to_dlpack(self)
+
+    @property
+    def gpu_values(self):
+        """
+        View the data as a numba device array object
+        """
+        return self._values.data_array_view
+
+    def append(self, other):
+        """
+        Append a collection of Index options together.
+
+        Parameters
+        ----------
+        other : Index or list/tuple of indices
+
+        Returns
+        -------
+        appended : Index
+
+        Examples
+        --------
+        >>> import cudf
+        >>> idx = cudf.Index([1, 2, 10, 100])
+        >>> idx
+        Int64Index([1, 2, 10, 100], dtype='int64')
+        >>> other = cudf.Index([200, 400, 50])
+        >>> other
+        Int64Index([200, 400, 50], dtype='int64')
+        >>> idx.append(other)
+        Int64Index([1, 2, 10, 100, 200, 400, 50], dtype='int64')
+
+        append accepts list of Index objects
+
+        >>> idx.append([other, other])
+        Int64Index([1, 2, 10, 100, 200, 400, 50, 200, 400, 50], dtype='int64')
+        """
+
+        if is_list_like(other):
+            to_concat = [self]
+            to_concat.extend(other)
+        else:
+            this = self
+            if len(other) == 0:
+                # short-circuit and return a copy
+                to_concat = [self]
+
+            other = cudf.Index(other)
+
+            if len(self) == 0:
+                to_concat = [other]
+
+            if len(self) and len(other):
+                if is_mixed_with_object_dtype(this, other):
+                    got_dtype = (
+                        other.dtype
+                        if this.dtype == cudf.dtype("object")
+                        else this.dtype
+                    )
+                    raise TypeError(
+                        f"cudf does not support appending an Index of "
+                        f"dtype `{cudf.dtype('object')}` with an Index "
+                        f"of dtype `{got_dtype}`, please type-cast "
+                        f"either one of them to same dtypes."
+                    )
+
+                if isinstance(self._values, cudf.core.column.NumericalColumn):
+                    if self.dtype != other.dtype:
+                        this, other = numeric_normalize_types(self, other)
+                to_concat = [this, other]
+
+        for obj in to_concat:
+            if not isinstance(obj, BaseIndex):
+                raise TypeError("all inputs must be Index")
+
+        return self._concat(to_concat)
+
+    def difference(self, other, sort=None):
+        """
+        Return a new Index with elements from the index that are not in
+        `other`.
+
+        This is the set difference of two Index objects.
+
+        Parameters
+        ----------
+        other : Index or array-like
+        sort : False or None, default None
+            Whether to sort the resulting index. By default, the
+            values are attempted to be sorted, but any TypeError from
+            incomparable elements is caught by cudf.
+
+            * None : Attempt to sort the result, but catch any TypeErrors
+              from comparing incomparable elements.
+            * False : Do not sort the result.
+
+        Returns
+        -------
+        difference : Index
+
+        Examples
+        --------
+        >>> import cudf
+        >>> idx1 = cudf.Index([2, 1, 3, 4])
+        >>> idx1
+        Int64Index([2, 1, 3, 4], dtype='int64')
+        >>> idx2 = cudf.Index([3, 4, 5, 6])
+        >>> idx2
+        Int64Index([3, 4, 5, 6], dtype='int64')
+        >>> idx1.difference(idx2)
+        Int64Index([1, 2], dtype='int64')
+        >>> idx1.difference(idx2, sort=False)
+        Int64Index([2, 1], dtype='int64')
+        """
+        if sort not in {None, False}:
+            raise ValueError(
+                f"The 'sort' keyword only takes the values "
+                f"of None or False; {sort} was passed."
+            )
+
+        other = cudf.Index(other)
+
+        if is_mixed_with_object_dtype(self, other):
+            difference = self.copy()
+        else:
+            difference = self.join(other, how="leftanti")
+            if self.dtype != other.dtype:
+                difference = difference.astype(self.dtype)
+
+        if sort is None:
+            return difference.sort_values()
+
+        return difference
+
+    def sort_values(self, return_indexer=False, ascending=True, key=None):
+        """
+        Return a sorted copy of the index, and optionally return the indices
+        that sorted the index itself.
+
+        Parameters
+        ----------
+        return_indexer : bool, default False
+            Should the indices that would sort the index be returned.
+        ascending : bool, default True
+            Should the index values be sorted in an ascending order.
+        key : None, optional
+            This parameter is NON-FUNCTIONAL.
+
+        Returns
+        -------
+        sorted_index : Index
+            Sorted copy of the index.
+        indexer : cupy.ndarray, optional
+            The indices that the index itself was sorted by.
+
+        See Also
+        --------
+        cudf.Series.min : Sort values of a Series.
+        cudf.DataFrame.sort_values : Sort values in a DataFrame.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> idx = cudf.Index([10, 100, 1, 1000])
+        >>> idx
+        Int64Index([10, 100, 1, 1000], dtype='int64')
+
+        Sort values in ascending order (default behavior).
+
+        >>> idx.sort_values()
+        Int64Index([1, 10, 100, 1000], dtype='int64')
+
+        Sort values in descending order, and also get the indices `idx` was
+        sorted by.
+
+        >>> idx.sort_values(ascending=False, return_indexer=True)
+        (Int64Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2],
+                                                            dtype=int32))
+
+        Sorting values in a MultiIndex:
+
+        >>> midx = cudf.MultiIndex(
+        ...      levels=[[1, 3, 4, -10], [1, 11, 5]],
+        ...      codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+        ...      names=["x", "y"],
+        ... )
+        >>> midx
+        MultiIndex([(  1,  1),
+                    (  1,  5),
+                    (  3, 11),
+                    (  4, 11),
+                    (-10,  1)],
+                   names=['x', 'y'])
+        >>> midx.sort_values()
+        MultiIndex([(-10,  1),
+                    (  1,  1),
+                    (  1,  5),
+                    (  3, 11),
+                    (  4, 11)],
+                   names=['x', 'y'])
+        >>> midx.sort_values(ascending=False)
+        MultiIndex([(  4, 11),
+                    (  3, 11),
+                    (  1,  5),
+                    (  1,  1),
+                    (-10,  1)],
+                   names=['x', 'y'])
+        """
+        if key is not None:
+            raise NotImplementedError("key parameter is not yet implemented.")
+
+        indices = self._values.argsort(ascending=ascending)
+        index_sorted = cudf.Index(self.take(indices), name=self.name)
+
+        if return_indexer:
+            return index_sorted, cupy.asarray(indices)
+        else:
+            return index_sorted
+
+    def unique(self):
+        """
+        Return unique values in the index.
+
+        Returns
+        -------
+        Index without duplicates
+        """
+        return cudf.Index(self._values.unique(), name=self.name)
+
+    def join(
+        self, other, how="left", level=None, return_indexers=False, sort=False
+    ):
+        """
+        Compute join_index and indexers to conform data structures
+        to the new index.
+
+        Parameters
+        ----------
+        other : Index.
+        how : {'left', 'right', 'inner', 'outer'}
+        return_indexers : bool, default False
+        sort : bool, default False
+            Sort the join keys lexicographically in the result Index. If False,
+            the order of the join keys depends on the join type (how keyword).
+
+        Returns: index
+
+        Examples
+        --------
+        >>> import cudf
+        >>> lhs = cudf.DataFrame(
+        ...     {"a":[2, 3, 1], "b":[3, 4, 2]}).set_index(['a', 'b']
+        ... ).index
+        >>> lhs
+        MultiIndex([(2, 3),
+                    (3, 4),
+                    (1, 2)],
+                   names=['a', 'b'])
+        >>> rhs = cudf.DataFrame({"a":[1, 4, 3]}).set_index('a').index
+        >>> rhs
+        Int64Index([1, 4, 3], dtype='int64', name='a')
+        >>> lhs.join(rhs, how='inner')
+        MultiIndex([(3, 4),
+                    (1, 2)],
+                   names=['a', 'b'])
+        """
+
+        if isinstance(self, cudf.MultiIndex) and isinstance(
+            other, cudf.MultiIndex
+        ):
+            raise TypeError(
+                "Join on level between two MultiIndex objects is ambiguous"
+            )
+
+        if level is not None and not is_scalar(level):
+            raise ValueError("level should be an int or a label only")
+
+        if isinstance(other, cudf.MultiIndex):
+            if how == "left":
+                how = "right"
+            elif how == "right":
+                how = "left"
+            rhs = self.copy(deep=False)
+            lhs = other.copy(deep=False)
+        else:
+            lhs = self.copy(deep=False)
+            rhs = other.copy(deep=False)
+
+        on = level
+        # In case of MultiIndex, it will be None as
+        # we don't need to update name
+        left_names = lhs.names
+        right_names = rhs.names
+        # There should be no `None` values in Joined indices,
+        # so essentially it would be `left/right` or 'inner'
+        # in case of MultiIndex
+        if isinstance(lhs, cudf.MultiIndex):
+            if level is not None and isinstance(level, int):
+                on = lhs._data.select_by_index(level).names[0]
+            right_names = (on,) or right_names
+            on = right_names[0]
+            if how == "outer":
+                how = "left"
+            elif how == "right":
+                how = "inner"
+        else:
+            # Both are nomal indices
+            right_names = left_names
+            on = right_names[0]
+
+        lhs.names = left_names
+        rhs.names = right_names
+
+        output = lhs._merge(rhs, how=how, on=on, sort=sort)
+
+        return output
+
+    def rename(self, name, inplace=False):
+        """
+        Alter Index name.
+
+        Defaults to returning new index.
+
+        Parameters
+        ----------
+        name : label
+            Name(s) to set.
+
+        Returns
+        -------
+        Index
+
+        Examples
+        --------
+        >>> import cudf
+        >>> index = cudf.Index([1, 2, 3], name='one')
+        >>> index
+        Int64Index([1, 2, 3], dtype='int64', name='one')
+        >>> index.name
+        'one'
+        >>> renamed_index = index.rename('two')
+        >>> renamed_index
+        Int64Index([1, 2, 3], dtype='int64', name='two')
+        >>> renamed_index.name
+        'two'
+        """
+        if inplace is True:
+            self.name = name
+            return None
+        else:
+            out = self.copy(deep=False)
+            out.name = name
+            return out.copy(deep=True)
+
+    def astype(self, dtype, copy=False):
+        """
+        Create an Index with values cast to dtypes. The class of a new Index
+        is determined by dtype. When conversion is impossible, a ValueError
+        exception is raised.
+
+        Parameters
+        ----------
+        dtype : numpy dtype
+            Use a numpy.dtype to cast entire Index object to.
+        copy : bool, default False
+            By default, astype always returns a newly allocated object.
+            If copy is set to False and internal requirements on dtype are
+            satisfied, the original data is used to create a new Index
+            or the original Index is returned.
+
+        Returns
+        -------
+        Index
+            Index with values cast to specified dtype.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> index = cudf.Index([1, 2, 3])
+        >>> index
+        Int64Index([1, 2, 3], dtype='int64')
+        >>> index.astype('float64')
+        Float64Index([1.0, 2.0, 3.0], dtype='float64')
+        """
+        if is_dtype_equal(dtype, self.dtype):
+            return self.copy(deep=copy)
+
+        return cudf.Index(
+            self.copy(deep=copy)._values.astype(dtype), name=self.name
+        )
+
+    def to_array(self, fillna=None):
+        """Get a dense numpy array for the data.
+
+        Parameters
+        ----------
+        fillna : str or None
+            Defaults to None, which will skip null values.
+            If it equals "pandas", null values are filled with NaNs.
+            Non integral dtype is promoted to np.float64.
+
+        Notes
+        -----
+
+        if ``fillna`` is ``None``, null values are skipped.  Therefore, the
+        output size could be smaller.
+        """
+        return self._values.to_array(fillna=fillna)
+
+    def to_series(self, index=None, name=None):
+        """
+        Create a Series with both index and values equal to the index keys.
+        Useful with map for returning an indexer based on an index.
+
+        Parameters
+        ----------
+        index : Index, optional
+            Index of resulting Series. If None, defaults to original index.
+        name : str, optional
+            Dame of resulting Series. If None, defaults to name of original
+            index.
+
+        Returns
+        -------
+        Series
+            The dtype will be based on the type of the Index values.
+        """
+        return cudf.Series(
+            self._values,
+            index=self.copy(deep=False) if index is None else index,
+            name=self.name if name is None else name,
+        )
+
+    def get_slice_bound(self, label, side, kind):
+        """
+        Calculate slice bound that corresponds to given label.
+        Returns leftmost (one-past-the-rightmost if ``side=='right'``) position
+        of given label.
+
+        Parameters
+        ----------
+        label : object
+        side : {'left', 'right'}
+        kind : {'ix', 'loc', 'getitem'}
+
+        Returns
+        -------
+        int
+            Index of label.
+        """
+        raise (NotImplementedError)
+
+    def __array_function__(self, func, types, args, kwargs):
+
+        # check if the function is implemented for the current type
+        cudf_index_module = type(self)
+        for submodule in func.__module__.split(".")[1:]:
+            # point cudf_index_module to the correct submodule
+            if hasattr(cudf_index_module, submodule):
+                cudf_index_module = getattr(cudf_index_module, submodule)
+            else:
+                return NotImplemented
+
+        fname = func.__name__
+
+        handled_types = [BaseIndex, cudf.Series]
+
+        # check if  we don't handle any of the types (including sub-class)
+        for t in types:
+            if not any(
+                issubclass(t, handled_type) for handled_type in handled_types
+            ):
+                return NotImplemented
+
+        if hasattr(cudf_index_module, fname):
+            cudf_func = getattr(cudf_index_module, fname)
+            # Handle case if cudf_func is same as numpy function
+            if cudf_func is func:
+                return NotImplemented
+            else:
+                return cudf_func(*args, **kwargs)
+
+        else:
+            return NotImplemented
+
+    def isin(self, values):
+        """Return a boolean array where the index values are in values.
+
+        Compute boolean array of whether each index value is found in
+        the passed set of values. The length of the returned boolean
+        array matches the length of the index.
+
+        Parameters
+        ----------
+        values : set, list-like, Index
+            Sought values.
+
+        Returns
+        -------
+        is_contained : cupy array
+            CuPy array of boolean values.
+
+        Examples
+        --------
+        >>> idx = cudf.Index([1,2,3])
+        >>> idx
+        Int64Index([1, 2, 3], dtype='int64')
+
+        Check whether each index value in a list of values.
+
+        >>> idx.isin([1, 4])
+        array([ True, False, False])
+        """
+
+        return self._values.isin(values).values
+
+    def memory_usage(self, deep=False):
+        """
+        Memory usage of the values.
+
+        Parameters
+        ----------
+            deep : bool
+                Introspect the data deeply,
+                interrogate `object` dtypes for system-level
+                memory consumption.
+
+        Returns
+        -------
+            bytes used
+        """
+        return self._values._memory_usage(deep=deep)
+
+    @classmethod
+    def from_pandas(cls, index, nan_as_null=None):
+        """
+        Convert from a Pandas Index.
+
+        Parameters
+        ----------
+        index : Pandas Index object
+            A Pandas Index object which has to be converted
+            to cuDF Index.
+        nan_as_null : bool, Default None
+            If ``None``/``True``, converts ``np.nan`` values
+            to ``null`` values.
+            If ``False``, leaves ``np.nan`` values as is.
+
+        Raises
+        ------
+        TypeError for invalid input type.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> import pandas as pd
+        >>> import numpy as np
+        >>> data = [10, 20, 30, np.nan]
+        >>> pdi = pd.Index(data)
+        >>> cudf.Index.from_pandas(pdi)
+        Float64Index([10.0, 20.0, 30.0, <NA>], dtype='float64')
+        >>> cudf.Index.from_pandas(pdi, nan_as_null=False)
+        Float64Index([10.0, 20.0, 30.0, nan], dtype='float64')
+        """
+        if not isinstance(index, pd.Index):
+            raise TypeError("not a pandas.Index")
+
+        ind = cudf.Index(column.as_column(index, nan_as_null=nan_as_null))
+        ind.name = index.name
+        return ind
+
+    @property
+    def _constructor_expanddim(self):
+        return cudf.MultiIndex
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 7d4a73fd794..c0e0d6c025b 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -10,7 +10,6 @@
     List,
     MutableMapping,
     Optional,
-    Set,
     Tuple,
     Type,
     TypeVar,
@@ -28,14 +27,8 @@
 from cudf._lib.filling import sequence
 from cudf._lib.search import search_sorted
 from cudf._lib.table import Table
-from cudf._typing import DtypeObj
-from cudf.api.types import (
-    _is_scalar_or_zero_d_array,
-    is_dtype_equal,
-    is_integer,
-    is_string_dtype,
-)
-from cudf.core.abc import Serializable
+from cudf.api.types import _is_scalar_or_zero_d_array, is_string_dtype
+from cudf.core._base_index import BaseIndex
 from cudf.core.column import (
     CategoricalColumn,
     ColumnBase,
@@ -51,17 +44,12 @@
 from cudf.core.column.string import StringMethods as StringMethods
 from cudf.core.dtypes import IntervalDtype
 from cudf.core.frame import Frame, SingleColumnFrame
-from cudf.utils import ioutils
 from cudf.utils.docutils import copy_docstring
 from cudf.utils.dtypes import (
     _is_non_decimal_numeric_dtype,
     find_common_type,
     is_categorical_dtype,
     is_interval_dtype,
-    is_list_like,
-    is_mixed_with_object_dtype,
-    is_scalar,
-    numeric_normalize_types,
 )
 from cudf.utils.utils import cached_property, search_range
 
@@ -122,949 +110,6 @@ def _index_from_data(data: MutableMapping, name: Any = None):
     return index_class_type._from_data(data, None, name)
 
 
-class BaseIndex(Serializable):
-    """Base class for all cudf Index types."""
-
-    dtype: DtypeObj
-    _accessors: Set[Any] = set()
-
-    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
-
-        if method == "__call__" and hasattr(cudf, ufunc.__name__):
-            func = getattr(cudf, ufunc.__name__)
-            return func(*inputs)
-        else:
-            return NotImplemented
-
-    @cached_property
-    def _values(self) -> ColumnBase:
-        raise NotImplementedError
-
-    @property
-    def _data(self) -> cudf.core.column_accessor.ColumnAccessor:
-        raise NotImplementedError
-
-    def copy(self, deep: bool = True) -> BaseIndex:
-        raise NotImplementedError
-
-    @property
-    def values(self):
-        return self._values.values
-
-    def get_loc(self, key, method=None, tolerance=None):
-        raise NotImplementedError
-
-    def __getitem__(self, key):
-        raise NotImplementedError()
-
-    def serialize(self):
-        header = {}
-        header["index_column"] = {}
-        # store metadata values of index separately
-        # Indexes: Numerical/DateTime/String are often GPU backed
-        header["index_column"], frames = self._values.serialize()
-
-        header["name"] = pickle.dumps(self.name)
-        header["dtype"] = pickle.dumps(self.dtype)
-        header["type-serialized"] = pickle.dumps(type(self))
-        header["frame_count"] = len(frames)
-        return header, frames
-
-    def __contains__(self, item):
-        return item in self._values
-
-    def get_level_values(self, level):
-        """
-        Return an Index of values for requested level.
-
-        This is primarily useful to get an individual level of values from a
-        MultiIndex, but is provided on Index as well for compatibility.
-
-        Parameters
-        ----------
-        level : int or str
-            It is either the integer position or the name of the level.
-
-        Returns
-        -------
-        Index
-            Calling object, as there is only one level in the Index.
-
-        See Also
-        --------
-        cudf.core.multiindex.MultiIndex.get_level_values : Get values for
-            a level of a MultiIndex.
-
-        Notes
-        -----
-        For Index, level should be 0, since there are no multiple levels.
-
-        Examples
-        --------
-        >>> import cudf
-        >>> idx = cudf.Index(["a", "b", "c"])
-        >>> idx.get_level_values(0)
-        StringIndex(['a' 'b' 'c'], dtype='object')
-        """
-
-        if level == self.name:
-            return self
-        elif is_integer(level):
-            if level != 0:
-                raise IndexError(
-                    f"Cannot get level: {level} " f"for index with 1 level"
-                )
-            return self
-        else:
-            raise KeyError(f"Requested level with name {level} " "not found")
-
-    @classmethod
-    def deserialize(cls, header, frames):
-        h = header["index_column"]
-        idx_typ = pickle.loads(header["type-serialized"])
-        name = pickle.loads(header["name"])
-
-        col_typ = pickle.loads(h["type-serialized"])
-        index = col_typ.deserialize(h, frames[: header["frame_count"]])
-        return idx_typ(index, name=name)
-
-    @property
-    def names(self):
-        """
-        Returns a tuple containing the name of the Index.
-        """
-        return (self.name,)
-
-    @names.setter
-    def names(self, values):
-        if not is_list_like(values):
-            raise ValueError("Names must be a list-like")
-
-        num_values = len(values)
-        if num_values > 1:
-            raise ValueError(
-                "Length of new names must be 1, got %d" % num_values
-            )
-
-        self.name = values[0]
-
-    def _clean_nulls_from_index(self):
-        """
-        Convert all na values(if any) in Index object
-        to `<NA>` as a preprocessing step to `__repr__` methods.
-
-        This will involve changing type of Index object
-        to StringIndex but it is the responsibility of the `__repr__`
-        methods using this method to replace or handle representation
-        of the actual types correctly.
-        """
-        if self._values.has_nulls:
-            return cudf.Index(
-                self._values.astype("str").fillna(cudf._NA_REP), name=self.name
-            )
-        else:
-            return self
-
-    @property
-    def nlevels(self):
-        """
-        Number of levels.
-        """
-        return 1
-
-    def _set_names(self, names, inplace=False):
-        if inplace:
-            idx = self
-        else:
-            idx = self.copy(deep=False)
-
-        idx.names = names
-        if not inplace:
-            return idx
-
-    def set_names(self, names, level=None, inplace=False):
-        """
-        Set Index or MultiIndex name.
-        Able to set new names partially and by level.
-
-        Parameters
-        ----------
-        names : label or list of label
-            Name(s) to set.
-        level : int, label or list of int or label, optional
-            If the index is a MultiIndex, level(s) to set (None for all
-            levels). Otherwise level must be None.
-        inplace : bool, default False
-            Modifies the object directly, instead of creating a new Index or
-            MultiIndex.
-
-        Returns
-        -------
-        Index
-            The same type as the caller or None if inplace is True.
-
-        See Also
-        --------
-        cudf.Index.rename : Able to set new names without level.
-
-        Examples
-        --------
-        >>> import cudf
-        >>> idx = cudf.Index([1, 2, 3, 4])
-        >>> idx
-        Int64Index([1, 2, 3, 4], dtype='int64')
-        >>> idx.set_names('quarter')
-        Int64Index([1, 2, 3, 4], dtype='int64', name='quarter')
-        >>> idx = cudf.MultiIndex.from_product([['python', 'cobra'],
-        ... [2018, 2019]])
-        >>> idx
-        MultiIndex([('python', 2018),
-                    ('python', 2019),
-                    ( 'cobra', 2018),
-                    ( 'cobra', 2019)],
-                   )
-        >>> idx.names
-        FrozenList([None, None])
-        >>> idx.set_names(['kind', 'year'], inplace=True)
-        >>> idx.names
-        FrozenList(['kind', 'year'])
-        >>> idx.set_names('species', level=0, inplace=True)
-        >>> idx.names
-        FrozenList(['species', 'year'])
-        """
-        if level is not None:
-            raise ValueError("Level must be None for non-MultiIndex")
-
-        if not is_list_like(names):
-            names = [names]
-
-        return self._set_names(names=names, inplace=inplace)
-
-    def fillna(self, value, downcast=None):
-        """
-        Fill null values with the specified value.
-
-        Parameters
-        ----------
-        value : scalar
-            Scalar value to use to fill nulls. This value cannot be a
-            list-likes.
-
-        downcast : dict, default is None
-            This Parameter is currently NON-FUNCTIONAL.
-
-        Returns
-        -------
-        filled : Index
-
-        Examples
-        --------
-        >>> import cudf
-        >>> index = cudf.Index([1, 2, None, 4])
-        >>> index
-        Int64Index([1, 2, null, 4], dtype='int64')
-        >>> index.fillna(3)
-        Int64Index([1, 2, 3, 4], dtype='int64')
-        """
-        if downcast is not None:
-            raise NotImplementedError(
-                "`downcast` parameter is not yet supported"
-            )
-
-        return super().fillna(value=value)
-
-    def take(self, indices):
-        """Gather only the specific subset of indices
-
-        Parameters
-        ----------
-        indices: An array-like that maps to values contained in this Index.
-        """
-        return self[indices]
-
-    def argsort(self, ascending=True, **kwargs):
-        """
-        Return the integer indices that would sort the index.
-
-        Parameters
-        ----------
-        ascending : bool, default True
-            If True, returns the indices for ascending order.
-            If False, returns the indices for descending order.
-
-        Returns
-        -------
-        array : A cupy array containing Integer indices that
-            would sort the index if used as an indexer.
-
-        Examples
-        --------
-        >>> import cudf
-        >>> index = cudf.Index([10, 100, 1, 1000])
-        >>> index
-        Int64Index([10, 100, 1, 1000], dtype='int64')
-        >>> index.argsort()
-        array([2, 0, 1, 3], dtype=int32)
-
-        The order of argsort can be reversed using
-        ``ascending`` parameter, by setting it to ``False``.
-        >>> index.argsort(ascending=False)
-        array([3, 1, 0, 2], dtype=int32)
-
-        ``argsort`` on a MultiIndex:
-
-        >>> index = cudf.MultiIndex(
-        ...      levels=[[1, 3, 4, -10], [1, 11, 5]],
-        ...      codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-        ...      names=["x", "y"],
-        ... )
-        >>> index
-        MultiIndex([(  1,  1),
-                    (  1,  5),
-                    (  3, 11),
-                    (  4, 11),
-                    (-10,  1)],
-                   names=['x', 'y'])
-        >>> index.argsort()
-        array([4, 0, 1, 2, 3], dtype=int32)
-        >>> index.argsort(ascending=False)
-        array([3, 2, 1, 0, 4], dtype=int32)
-        """
-        indices = self._values.argsort(ascending=ascending, **kwargs)
-        return cupy.asarray(indices)
-
-    def to_frame(self, index=True, name=None):
-        """Create a DataFrame with a column containing this Index
-
-        Parameters
-        ----------
-        index : boolean, default True
-            Set the index of the returned DataFrame as the original Index
-        name : str, default None
-            Name to be used for the column
-
-        Returns
-        -------
-        DataFrame
-            cudf DataFrame
-        """
-
-        if name is not None:
-            col_name = name
-        elif self.name is None:
-            col_name = 0
-        else:
-            col_name = self.name
-        return cudf.DataFrame(
-            {col_name: self._values}, index=self if index else None
-        )
-
-    def any(self):
-        """
-        Return whether any elements is True in Index.
-        """
-        return self._values.any()
-
-    def to_pandas(self):
-        """
-        Convert to a Pandas Index.
-
-        Examples
-        --------
-        >>> import cudf
-        >>> idx = cudf.Index([-3, 10, 15, 20])
-        >>> idx
-        Int64Index([-3, 10, 15, 20], dtype='int64')
-        >>> idx.to_pandas()
-        Int64Index([-3, 10, 15, 20], dtype='int64')
-        >>> type(idx.to_pandas())
-        <class 'pandas.core.indexes.numeric.Int64Index'>
-        >>> type(idx)
-        <class 'cudf.core.index.GenericIndex'>
-        """
-        return pd.Index(self._values.to_pandas(), name=self.name)
-
-    @ioutils.doc_to_dlpack()
-    def to_dlpack(self):
-        """{docstring}"""
-
-        return cudf.io.dlpack.to_dlpack(self)
-
-    @property
-    def gpu_values(self):
-        """
-        View the data as a numba device array object
-        """
-        return self._values.data_array_view
-
-    def append(self, other):
-        """
-        Append a collection of Index options together.
-
-        Parameters
-        ----------
-        other : Index or list/tuple of indices
-
-        Returns
-        -------
-        appended : Index
-
-        Examples
-        --------
-        >>> import cudf
-        >>> idx = cudf.Index([1, 2, 10, 100])
-        >>> idx
-        Int64Index([1, 2, 10, 100], dtype='int64')
-        >>> other = cudf.Index([200, 400, 50])
-        >>> other
-        Int64Index([200, 400, 50], dtype='int64')
-        >>> idx.append(other)
-        Int64Index([1, 2, 10, 100, 200, 400, 50], dtype='int64')
-
-        append accepts list of Index objects
-
-        >>> idx.append([other, other])
-        Int64Index([1, 2, 10, 100, 200, 400, 50, 200, 400, 50], dtype='int64')
-        """
-
-        if is_list_like(other):
-            to_concat = [self]
-            to_concat.extend(other)
-        else:
-            this = self
-            if len(other) == 0:
-                # short-circuit and return a copy
-                to_concat = [self]
-
-            other = as_index(other)
-
-            if len(self) == 0:
-                to_concat = [other]
-
-            if len(self) and len(other):
-                if is_mixed_with_object_dtype(this, other):
-                    got_dtype = (
-                        other.dtype
-                        if this.dtype == cudf.dtype("object")
-                        else this.dtype
-                    )
-                    raise TypeError(
-                        f"cudf does not support appending an Index of "
-                        f"dtype `{cudf.dtype('object')}` with an Index "
-                        f"of dtype `{got_dtype}`, please type-cast "
-                        f"either one of them to same dtypes."
-                    )
-
-                if isinstance(self._values, cudf.core.column.NumericalColumn):
-                    if self.dtype != other.dtype:
-                        this, other = numeric_normalize_types(self, other)
-                to_concat = [this, other]
-
-        for obj in to_concat:
-            if not isinstance(obj, BaseIndex):
-                raise TypeError("all inputs must be Index")
-
-        return self._concat(to_concat)
-
-    def difference(self, other, sort=None):
-        """
-        Return a new Index with elements from the index that are not in
-        `other`.
-
-        This is the set difference of two Index objects.
-
-        Parameters
-        ----------
-        other : Index or array-like
-        sort : False or None, default None
-            Whether to sort the resulting index. By default, the
-            values are attempted to be sorted, but any TypeError from
-            incomparable elements is caught by cudf.
-
-            * None : Attempt to sort the result, but catch any TypeErrors
-              from comparing incomparable elements.
-            * False : Do not sort the result.
-
-        Returns
-        -------
-        difference : Index
-
-        Examples
-        --------
-        >>> import cudf
-        >>> idx1 = cudf.Index([2, 1, 3, 4])
-        >>> idx1
-        Int64Index([2, 1, 3, 4], dtype='int64')
-        >>> idx2 = cudf.Index([3, 4, 5, 6])
-        >>> idx2
-        Int64Index([3, 4, 5, 6], dtype='int64')
-        >>> idx1.difference(idx2)
-        Int64Index([1, 2], dtype='int64')
-        >>> idx1.difference(idx2, sort=False)
-        Int64Index([2, 1], dtype='int64')
-        """
-        if sort not in {None, False}:
-            raise ValueError(
-                f"The 'sort' keyword only takes the values "
-                f"of None or False; {sort} was passed."
-            )
-
-        other = as_index(other)
-
-        if is_mixed_with_object_dtype(self, other):
-            difference = self.copy()
-        else:
-            difference = self.join(other, how="leftanti")
-            if self.dtype != other.dtype:
-                difference = difference.astype(self.dtype)
-
-        if sort is None:
-            return difference.sort_values()
-
-        return difference
-
-    def sort_values(self, return_indexer=False, ascending=True, key=None):
-        """
-        Return a sorted copy of the index, and optionally return the indices
-        that sorted the index itself.
-
-        Parameters
-        ----------
-        return_indexer : bool, default False
-            Should the indices that would sort the index be returned.
-        ascending : bool, default True
-            Should the index values be sorted in an ascending order.
-        key : None, optional
-            This parameter is NON-FUNCTIONAL.
-
-        Returns
-        -------
-        sorted_index : Index
-            Sorted copy of the index.
-        indexer : cupy.ndarray, optional
-            The indices that the index itself was sorted by.
-
-        See Also
-        --------
-        cudf.Series.min : Sort values of a Series.
-        cudf.DataFrame.sort_values : Sort values in a DataFrame.
-
-        Examples
-        --------
-        >>> import cudf
-        >>> idx = cudf.Index([10, 100, 1, 1000])
-        >>> idx
-        Int64Index([10, 100, 1, 1000], dtype='int64')
-
-        Sort values in ascending order (default behavior).
-
-        >>> idx.sort_values()
-        Int64Index([1, 10, 100, 1000], dtype='int64')
-
-        Sort values in descending order, and also get the indices `idx` was
-        sorted by.
-
-        >>> idx.sort_values(ascending=False, return_indexer=True)
-        (Int64Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2],
-                                                            dtype=int32))
-
-        Sorting values in a MultiIndex:
-
-        >>> midx = cudf.MultiIndex(
-        ...      levels=[[1, 3, 4, -10], [1, 11, 5]],
-        ...      codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-        ...      names=["x", "y"],
-        ... )
-        >>> midx
-        MultiIndex([(  1,  1),
-                    (  1,  5),
-                    (  3, 11),
-                    (  4, 11),
-                    (-10,  1)],
-                   names=['x', 'y'])
-        >>> midx.sort_values()
-        MultiIndex([(-10,  1),
-                    (  1,  1),
-                    (  1,  5),
-                    (  3, 11),
-                    (  4, 11)],
-                   names=['x', 'y'])
-        >>> midx.sort_values(ascending=False)
-        MultiIndex([(  4, 11),
-                    (  3, 11),
-                    (  1,  5),
-                    (  1,  1),
-                    (-10,  1)],
-                   names=['x', 'y'])
-        """
-        if key is not None:
-            raise NotImplementedError("key parameter is not yet implemented.")
-
-        indices = self._values.argsort(ascending=ascending)
-        index_sorted = as_index(self.take(indices), name=self.name)
-
-        if return_indexer:
-            return index_sorted, cupy.asarray(indices)
-        else:
-            return index_sorted
-
-    def unique(self):
-        """
-        Return unique values in the index.
-
-        Returns
-        -------
-        Index without duplicates
-        """
-        return as_index(self._values.unique(), name=self.name)
-
-    def join(
-        self, other, how="left", level=None, return_indexers=False, sort=False
-    ):
-        """
-        Compute join_index and indexers to conform data structures
-        to the new index.
-
-        Parameters
-        ----------
-        other : Index.
-        how : {'left', 'right', 'inner', 'outer'}
-        return_indexers : bool, default False
-        sort : bool, default False
-            Sort the join keys lexicographically in the result Index. If False,
-            the order of the join keys depends on the join type (how keyword).
-
-        Returns: index
-
-        Examples
-        --------
-        >>> import cudf
-        >>> lhs = cudf.DataFrame(
-        ...     {"a":[2, 3, 1], "b":[3, 4, 2]}).set_index(['a', 'b']
-        ... ).index
-        >>> lhs
-        MultiIndex([(2, 3),
-                    (3, 4),
-                    (1, 2)],
-                   names=['a', 'b'])
-        >>> rhs = cudf.DataFrame({"a":[1, 4, 3]}).set_index('a').index
-        >>> rhs
-        Int64Index([1, 4, 3], dtype='int64', name='a')
-        >>> lhs.join(rhs, how='inner')
-        MultiIndex([(3, 4),
-                    (1, 2)],
-                   names=['a', 'b'])
-        """
-
-        if isinstance(self, cudf.MultiIndex) and isinstance(
-            other, cudf.MultiIndex
-        ):
-            raise TypeError(
-                "Join on level between two MultiIndex objects is ambiguous"
-            )
-
-        if level is not None and not is_scalar(level):
-            raise ValueError("level should be an int or a label only")
-
-        if isinstance(other, cudf.MultiIndex):
-            if how == "left":
-                how = "right"
-            elif how == "right":
-                how = "left"
-            rhs = self.copy(deep=False)
-            lhs = other.copy(deep=False)
-        else:
-            lhs = self.copy(deep=False)
-            rhs = other.copy(deep=False)
-
-        on = level
-        # In case of MultiIndex, it will be None as
-        # we don't need to update name
-        left_names = lhs.names
-        right_names = rhs.names
-        # There should be no `None` values in Joined indices,
-        # so essentially it would be `left/right` or 'inner'
-        # in case of MultiIndex
-        if isinstance(lhs, cudf.MultiIndex):
-            if level is not None and isinstance(level, int):
-                on = lhs._data.select_by_index(level).names[0]
-            right_names = (on,) or right_names
-            on = right_names[0]
-            if how == "outer":
-                how = "left"
-            elif how == "right":
-                how = "inner"
-        else:
-            # Both are nomal indices
-            right_names = left_names
-            on = right_names[0]
-
-        lhs.names = left_names
-        rhs.names = right_names
-
-        output = lhs._merge(rhs, how=how, on=on, sort=sort)
-
-        return output
-
-    def rename(self, name, inplace=False):
-        """
-        Alter Index name.
-
-        Defaults to returning new index.
-
-        Parameters
-        ----------
-        name : label
-            Name(s) to set.
-
-        Returns
-        -------
-        Index
-
-        Examples
-        --------
-        >>> import cudf
-        >>> index = cudf.Index([1, 2, 3], name='one')
-        >>> index
-        Int64Index([1, 2, 3], dtype='int64', name='one')
-        >>> index.name
-        'one'
-        >>> renamed_index = index.rename('two')
-        >>> renamed_index
-        Int64Index([1, 2, 3], dtype='int64', name='two')
-        >>> renamed_index.name
-        'two'
-        """
-        if inplace is True:
-            self.name = name
-            return None
-        else:
-            out = self.copy(deep=False)
-            out.name = name
-            return out.copy(deep=True)
-
-    def astype(self, dtype, copy=False):
-        """
-        Create an Index with values cast to dtypes. The class of a new Index
-        is determined by dtype. When conversion is impossible, a ValueError
-        exception is raised.
-
-        Parameters
-        ----------
-        dtype : numpy dtype
-            Use a numpy.dtype to cast entire Index object to.
-        copy : bool, default False
-            By default, astype always returns a newly allocated object.
-            If copy is set to False and internal requirements on dtype are
-            satisfied, the original data is used to create a new Index
-            or the original Index is returned.
-
-        Returns
-        -------
-        Index
-            Index with values cast to specified dtype.
-
-        Examples
-        --------
-        >>> import cudf
-        >>> index = cudf.Index([1, 2, 3])
-        >>> index
-        Int64Index([1, 2, 3], dtype='int64')
-        >>> index.astype('float64')
-        Float64Index([1.0, 2.0, 3.0], dtype='float64')
-        """
-        if is_dtype_equal(dtype, self.dtype):
-            return self.copy(deep=copy)
-
-        return as_index(
-            self.copy(deep=copy)._values.astype(dtype), name=self.name
-        )
-
-    def to_array(self, fillna=None):
-        """Get a dense numpy array for the data.
-
-        Parameters
-        ----------
-        fillna : str or None
-            Defaults to None, which will skip null values.
-            If it equals "pandas", null values are filled with NaNs.
-            Non integral dtype is promoted to np.float64.
-
-        Notes
-        -----
-
-        if ``fillna`` is ``None``, null values are skipped.  Therefore, the
-        output size could be smaller.
-        """
-        return self._values.to_array(fillna=fillna)
-
-    def to_series(self, index=None, name=None):
-        """
-        Create a Series with both index and values equal to the index keys.
-        Useful with map for returning an indexer based on an index.
-
-        Parameters
-        ----------
-        index : Index, optional
-            Index of resulting Series. If None, defaults to original index.
-        name : str, optional
-            Dame of resulting Series. If None, defaults to name of original
-            index.
-
-        Returns
-        -------
-        Series
-            The dtype will be based on the type of the Index values.
-        """
-        return cudf.Series(
-            self._values,
-            index=self.copy(deep=False) if index is None else index,
-            name=self.name if name is None else name,
-        )
-
-    def get_slice_bound(self, label, side, kind):
-        """
-        Calculate slice bound that corresponds to given label.
-        Returns leftmost (one-past-the-rightmost if ``side=='right'``) position
-        of given label.
-
-        Parameters
-        ----------
-        label : object
-        side : {'left', 'right'}
-        kind : {'ix', 'loc', 'getitem'}
-
-        Returns
-        -------
-        int
-            Index of label.
-        """
-        raise (NotImplementedError)
-
-    def __array_function__(self, func, types, args, kwargs):
-
-        # check if the function is implemented for the current type
-        cudf_index_module = type(self)
-        for submodule in func.__module__.split(".")[1:]:
-            # point cudf_index_module to the correct submodule
-            if hasattr(cudf_index_module, submodule):
-                cudf_index_module = getattr(cudf_index_module, submodule)
-            else:
-                return NotImplemented
-
-        fname = func.__name__
-
-        handled_types = [Index, cudf.Series]
-
-        # check if  we don't handle any of the types (including sub-class)
-        for t in types:
-            if not any(
-                issubclass(t, handled_type) for handled_type in handled_types
-            ):
-                return NotImplemented
-
-        if hasattr(cudf_index_module, fname):
-            cudf_func = getattr(cudf_index_module, fname)
-            # Handle case if cudf_func is same as numpy function
-            if cudf_func is func:
-                return NotImplemented
-            else:
-                return cudf_func(*args, **kwargs)
-
-        else:
-            return NotImplemented
-
-    def isin(self, values):
-        """Return a boolean array where the index values are in values.
-
-        Compute boolean array of whether each index value is found in
-        the passed set of values. The length of the returned boolean
-        array matches the length of the index.
-
-        Parameters
-        ----------
-        values : set, list-like, Index
-            Sought values.
-
-        Returns
-        -------
-        is_contained : cupy array
-            CuPy array of boolean values.
-
-        Examples
-        --------
-        >>> idx = cudf.Index([1,2,3])
-        >>> idx
-        Int64Index([1, 2, 3], dtype='int64')
-
-        Check whether each index value in a list of values.
-
-        >>> idx.isin([1, 4])
-        array([ True, False, False])
-        """
-
-        return self._values.isin(values).values
-
-    def memory_usage(self, deep=False):
-        """
-        Memory usage of the values.
-
-        Parameters
-        ----------
-            deep : bool
-                Introspect the data deeply,
-                interrogate `object` dtypes for system-level
-                memory consumption.
-
-        Returns
-        -------
-            bytes used
-        """
-        return self._values._memory_usage(deep=deep)
-
-    @classmethod
-    def from_pandas(cls, index, nan_as_null=None):
-        """
-        Convert from a Pandas Index.
-
-        Parameters
-        ----------
-        index : Pandas Index object
-            A Pandas Index object which has to be converted
-            to cuDF Index.
-        nan_as_null : bool, Default None
-            If ``None``/``True``, converts ``np.nan`` values
-            to ``null`` values.
-            If ``False``, leaves ``np.nan`` values as is.
-
-        Raises
-        ------
-        TypeError for invalid input type.
-
-        Examples
-        --------
-        >>> import cudf
-        >>> import pandas as pd
-        >>> import numpy as np
-        >>> data = [10, 20, 30, np.nan]
-        >>> pdi = pd.Index(data)
-        >>> cudf.Index.from_pandas(pdi)
-        Float64Index([10.0, 20.0, 30.0, <NA>], dtype='float64')
-        >>> cudf.Index.from_pandas(pdi, nan_as_null=False)
-        Float64Index([10.0, 20.0, 30.0, nan], dtype='float64')
-        """
-        if not isinstance(index, pd.Index):
-            raise TypeError("not a pandas.Index")
-
-        ind = as_index(column.as_column(index, nan_as_null=nan_as_null))
-        ind.name = index.name
-        return ind
-
-    @property
-    def _constructor_expanddim(self):
-        return cudf.MultiIndex
-
-
 class RangeIndex(BaseIndex):
     """
     Immutable Index implementing a monotonic integer range.

From ec2dfae39aac48f6408c0d3d21b9a881004c10bb Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 27 Aug 2021 08:20:27 -0700
Subject: [PATCH 25/28] Remove _data property in favor of a simple type
 annotation in _BaseIndex.

---
 python/cudf/cudf/_lib/table.pyx      | 10 +---------
 python/cudf/cudf/core/_base_index.py |  6 ++----
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/python/cudf/cudf/_lib/table.pyx b/python/cudf/cudf/_lib/table.pyx
index 6175df79927..2981a46a54a 100644
--- a/python/cudf/cudf/_lib/table.pyx
+++ b/python/cudf/cudf/_lib/table.pyx
@@ -34,17 +34,9 @@ cdef class Table:
         """
         if data is None:
             data = {}
-        self._column_accessor = cudf.core.column_accessor.ColumnAccessor(data)
+        self._data = cudf.core.column_accessor.ColumnAccessor(data)
         self._index = index
 
-    @property
-    def _data(self) -> cudf.core.column_accessor.ColumnAccessor:
-        return self._column_accessor
-
-    @_data.setter
-    def _data(self, value):
-        self._column_accessor = value
-
     @property
     def _num_columns(self):
         return len(self._data)
diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index 83f71ef27c1..5f12cbaf21f 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -13,6 +13,7 @@
 from cudf.api.types import is_dtype_equal, is_integer
 from cudf.core.abc import Serializable
 from cudf.core.column import ColumnBase, column
+from cudf.core.column_accessor import ColumnAccessor
 from cudf.utils import ioutils
 from cudf.utils.dtypes import (
     is_list_like,
@@ -28,6 +29,7 @@ class BaseIndex(Serializable):
 
     dtype: DtypeObj
     _accessors: Set[Any] = set()
+    _data: ColumnAccessor
 
     def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
 
@@ -41,10 +43,6 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
     def _values(self) -> ColumnBase:
         raise NotImplementedError
 
-    @property
-    def _data(self) -> cudf.core.column_accessor.ColumnAccessor:
-        raise NotImplementedError
-
     def copy(self, deep: bool = True) -> BaseIndex:
         raise NotImplementedError
 

From 0c4ba67bd2b5c9ae8b105be765863eb7770fab2d Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 27 Aug 2021 08:26:59 -0700
Subject: [PATCH 26/28] Remove redundant docstring.

---
 python/cudf/cudf/core/index.py | 27 +--------------------------
 1 file changed, 1 insertion(+), 26 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index c0e0d6c025b..f302db4e0fe 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -260,31 +260,6 @@ def copy(self, name=None, deep=False, dtype=None, names=None):
         )
 
     def drop_duplicates(self, keep="first"):
-        """
-        Return Index with duplicate values removed
-
-        Parameters
-        ----------
-        keep : {‘first’, ‘last’, False}, default ‘first’
-            * ‘first’ : Drop duplicates except for the
-                first occurrence.
-            * ‘last’ : Drop duplicates except for the
-                last occurrence.
-            *  False : Drop all duplicates.
-
-        Returns
-        -------
-        deduplicated : RangeIndex
-
-        Examples
-        --------
-        >>> import cudf
-        >>> idx = cudf.RangeIndex(0, 10)
-        >>> idx
-        RangeIndex(start=0, stop=10, step=1)
-        >>> idx.drop_duplicates()
-        RangeIndex(start=0, stop=10, step=1)
-        """  # noqa: E501
         return self
 
     def __repr__(self):
@@ -630,7 +605,7 @@ def drop_duplicates(self, keep="first"):
 
         Returns
         -------
-        deduplicated : Index
+        Index
 
         Examples
         --------

From 5064dfefc939ebfc4c1b18810d95ebf086bc985d Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 27 Aug 2021 13:30:12 -0700
Subject: [PATCH 27/28] Add test of RangeIndex.get_loc.

---
 python/cudf/cudf/tests/test_index.py | 29 ++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index 134130406ae..cdb1bd124fd 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -2098,6 +2098,35 @@ def test_get_loc_single_unique_numeric(idx, key, method):
         assert_eq(expected, got)
 
 
+@pytest.mark.parametrize(
+    "idx", [pd.RangeIndex(3, 100, 4)],
+)
+@pytest.mark.parametrize("key", list(range(1, 110, 3)))
+@pytest.mark.parametrize("method", [None, "ffill", "bfill", "nearest"])
+def test_get_loc_rangeindex(idx, key, method):
+    pi = idx
+    gi = cudf.from_pandas(pi)
+
+    if (
+        (key not in pi and method is None)
+        # Get key before the first element is KeyError
+        or (key < pi.start and method in "ffill")
+        # Get key after the last element is KeyError
+        or (key >= pi.stop and method in "bfill")
+    ):
+        assert_exceptions_equal(
+            lfunc=pi.get_loc,
+            rfunc=gi.get_loc,
+            lfunc_args_and_kwargs=([], {"key": key, "method": method}),
+            rfunc_args_and_kwargs=([], {"key": key, "method": method}),
+        )
+    else:
+        expected = pi.get_loc(key, method=method)
+        got = gi.get_loc(key, method=method)
+
+        assert_eq(expected, got)
+
+
 @pytest.mark.parametrize(
     "idx",
     [

From 0d029f0c5ae0097762a52bed249e4474178c7ab2 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 27 Aug 2021 14:09:56 -0700
Subject: [PATCH 28/28] Implement get_loc for RangeIndex.

---
 python/cudf/cudf/core/index.py       | 35 +++++++++++++++++++++++++---
 python/cudf/cudf/tests/test_index.py |  2 +-
 2 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index f302db4e0fe..6b4b77fabc5 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations, division, print_function
 
+import math
 import pickle
 from numbers import Number
 from typing import (
@@ -506,9 +507,37 @@ def __getattr__(self, key):
             )
 
     def get_loc(self, key, method=None, tolerance=None):
-        return self._as_int64().get_loc(
-            key, method=method, tolerance=tolerance
-        )
+        # Given an actual integer,
+        idx = (key - self._start) / self._step
+        idx_int_upper_bound = (self._stop - self._start) // self._step
+        if method is None:
+            if tolerance is not None:
+                raise ValueError(
+                    "tolerance argument only valid if using pad, "
+                    "backfill or nearest lookups"
+                )
+
+            if idx > idx_int_upper_bound or idx < 0:
+                raise KeyError(key)
+
+            idx_int = (key - self._start) // self._step
+            if idx_int != idx:
+                raise KeyError(key)
+            return idx_int
+
+        if (method == "ffill" and idx < 0) or (
+            method == "bfill" and idx > idx_int_upper_bound
+        ):
+            raise KeyError(key)
+
+        round_method = {
+            "ffill": math.floor,
+            "bfill": math.ceil,
+            "nearest": round,
+        }[method]
+        if tolerance is not None and (abs(idx) * self._step > tolerance):
+            raise KeyError(key)
+        return np.clip(round_method(idx), 0, idx_int_upper_bound, dtype=int)
 
 
 # Patch in all binops and unary ops, which bypass __getattr__ on the instance
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index cdb1bd124fd..29b39fbd195 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -2102,7 +2102,7 @@ def test_get_loc_single_unique_numeric(idx, key, method):
     "idx", [pd.RangeIndex(3, 100, 4)],
 )
 @pytest.mark.parametrize("key", list(range(1, 110, 3)))
-@pytest.mark.parametrize("method", [None, "ffill", "bfill", "nearest"])
+@pytest.mark.parametrize("method", [None, "ffill"])
 def test_get_loc_rangeindex(idx, key, method):
     pi = idx
     gi = cudf.from_pandas(pi)