Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create a common code path for 1d Frames #8115

Merged
merged 22 commits into from
May 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 3 additions & 18 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import warnings
from collections import defaultdict
from collections.abc import Iterable, Sequence
from typing import Any, Optional, Set, TypeVar
from typing import Any, Optional, TypeVar

import cupy
import numpy as np
Expand Down Expand Up @@ -554,6 +554,7 @@ def _align_input_series_indices(data, index):

return data, index

# The `constructor*` properties are used by `dask` (and `dask_cudf`)
@property
def _constructor(self):
return DataFrame
Expand Down Expand Up @@ -1456,7 +1457,7 @@ def _get_columns_by_label(self, labels, downcast=False):
new_data, index=self.index, name=labels
)
return out
out = self._constructor()._from_data(
out = self.__class__()._from_data(
new_data, index=self.index, columns=new_data.to_pandas_index()
)
return out
Expand Down Expand Up @@ -3158,20 +3159,6 @@ def take(self, positions, keep_index=True):
out.columns = self.columns
return out

def __copy__(self):
vyasr marked this conversation as resolved.
Show resolved Hide resolved
return self.copy(deep=True)

def __deepcopy__(self, memo=None):
"""
Parameters
----------
memo, default None
Standard signature. Unused
"""
if memo is None:
memo = {}
return self.copy(deep=True)

@annotate("INSERT", color="green", domain="cudf_python")
def insert(self, loc, name, value):
""" Add a column to DataFrame at the index specified by loc.
Expand Down Expand Up @@ -7769,8 +7756,6 @@ def explode(self, column, ignore_index=False):

return super()._explode(column, ignore_index)

_accessors = set() # type: Set[Any]


def from_pandas(obj, nan_as_null=None):
"""
Expand Down
213 changes: 212 additions & 1 deletion python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ class Frame(libcudf.table.Table):

_data: "ColumnAccessor"

@classmethod
def __init_subclass__(cls):
# All subclasses contain a set _accessors that is used to hold custom
# accessors defined by user APIs (see cudf/api/extensions/accessor.py).
cls._accessors = set()

@classmethod
def _from_table(cls, table: Frame):
return cls(table._data, index=table._index)
Expand Down Expand Up @@ -608,7 +614,7 @@ def _get_columns_by_index(self, indices):

"""
data = self._data.select_by_index(indices)
return self._constructor(
return self.__class__(
data, columns=data.to_pandas_index(), index=self.index
)

Expand Down Expand Up @@ -3295,6 +3301,211 @@ def _reindex(
return self._mimic_inplace(result, inplace=inplace)


class SingleColumnFrame(Frame):
"""A one-dimensional frame.

Frames with only a single column share certain logic that is encoded in
this class.
"""

@property
def name(self):
"""The name of this object."""
return next(iter(self._data.names))

@name.setter
def name(self, value):
self._data[value] = self._data.pop(self.name)

@property
def ndim(self):
"""Dimension of the data (always 1)."""
return 1

@property
def shape(self):
"""Returns a tuple representing the dimensionality of the Index.
"""
return (len(self),)

def __iter__(self):
cudf.utils.utils.raise_iteration_error(obj=self)

def __len__(self):
return len(self._column)

def __bool__(self):
raise TypeError(
f"The truth value of a {type(self)} is ambiguous. Use "
"a.empty, a.bool(), a.item(), a.any() or a.all()."
)

@property
def _column(self):
return self._data[self.name]

@_column.setter
def _column(self, value):
self._data[self.name] = value

@property
def values(self):
"""
Return a CuPy representation of the data.

Returns
-------
out : cupy.ndarray
A device representation of the underlying data.

Examples
--------
>>> import cudf
>>> ser = cudf.Series([1, -10, 100, 20])
>>> ser.values
array([ 1, -10, 100, 20])
>>> type(ser.values)
<class 'cupy.core.core.ndarray'>
>>> index = cudf.Index([1, -10, 100, 20])
>>> index.values
array([ 1, -10, 100, 20])
>>> type(index.values)
<class 'cupy.core.core.ndarray'>
"""
return self._column.values

@property
def values_host(self):
"""
Return a NumPy representation of the data.

Returns
-------
out : numpy.ndarray
A host representation of the underlying data.

Examples
--------
>>> import cudf
>>> ser = cudf.Series([1, -10, 100, 20])
>>> ser.values_host
array([ 1, -10, 100, 20])
>>> type(ser.values_host)
<class 'numpy.ndarray'>
>>> index = cudf.Index([1, -10, 100, 20])
>>> index.values_host
array([ 1, -10, 100, 20])
>>> type(index.values_host)
<class 'numpy.ndarray'>
"""
return self._column.values_host

def tolist(self):

raise TypeError(
"cuDF does not support conversion to host memory "
"via the `tolist()` method. Consider using "
"`.to_arrow().to_pylist()` to construct a Python list."
)

to_list = tolist

def to_gpu_array(self, fillna=None):
"""Get a dense numba device array for the data.

Parameters
----------
fillna : str or None
See *fillna* in ``.to_array``.

Notes
-----

if ``fillna`` is ``None``, null values are skipped. Therefore, the
output size could be smaller.

Returns
-------
numba.DeviceNDArray

Examples
--------
>>> import cudf
>>> s = cudf.Series([10, 20, 30, 40, 50])
>>> s
0 10
1 20
2 30
3 40
4 50
dtype: int64
>>> s.to_gpu_array()
<numba.cuda.cudadrv.devicearray.DeviceNDArray object at 0x7f1840858890>
"""
return self._column.to_gpu_array(fillna=fillna)

@classmethod
def from_arrow(cls, array):
"""Create from PyArrow Array/ChunkedArray.

Parameters
----------
array : PyArrow Array/ChunkedArray
PyArrow Object which has to be converted.

Raises
------
TypeError for invalid input type.

Returns
-------
SingleColumnFrame

Examples
--------
>>> import cudf
>>> import pyarrow as pa
>>> cudf.Index.from_arrow(pa.array(["a", "b", None]))
StringIndex(['a' 'b' None], dtype='object')
>>> cudf.Series.from_arrow(pa.array(["a", "b", None]))
0 a
1 b
2 <NA>
dtype: object
"""
return cls(cudf.core.column.column.ColumnBase.from_arrow(array))

def to_arrow(self):
"""
Convert to a PyArrow Array.

Returns
-------
PyArrow Array

Examples
--------
>>> import cudf
>>> sr = cudf.Series(["a", "b", None])
>>> sr.to_arrow()
<pyarrow.lib.StringArray object at 0x7f796b0e7600>
[
"a",
"b",
null
]
>>> ind = cudf.Index(["a", "b", None])
>>> ind.to_arrow()
<pyarrow.lib.StringArray object at 0x7f796b0e7750>
[
"a",
"b",
null
]
"""
return self._column.to_arrow()


def _get_replacement_values_for_columns(
to_replace: Any, value: Any, columns_dtype_map: Dict[Any, Any]
) -> Tuple[Dict[Any, bool], Dict[Any, Any], Dict[Any, Any]]:
Expand Down
Loading