Skip to content

Commit

Permalink
Use _from_data instead of _from_columns for initialzing Frame (#14755)
Browse files Browse the repository at this point in the history
In the spirit of reducing redundant methods, `_from_columns` just calls `_from_data` (hoping to rename to `_from_mapping` or similar) so removing the need for `_from_columns`.

Hoping to do the same for the `_from_columns_like_self` in a follow up.

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Michael Wang (https://github.com/isVoid)

URL: #14755
  • Loading branch information
mroeschke authored Jan 18, 2024
1 parent 734ca75 commit 70cdeec
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 64 deletions.
8 changes: 5 additions & 3 deletions python/cudf/cudf/core/_internals/timezones.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

import os
import zoneinfo
Expand Down Expand Up @@ -89,8 +89,10 @@ def _read_tzfile_as_frame(tzdir, zone_name):
[np.timedelta64(0, "s")]
)

return DataFrame._from_columns(
transition_times_and_offsets, ["transition_times", "offsets"]
return DataFrame._from_data(
dict(
zip(["transition_times", "offsets"], transition_times_and_offsets)
)
)


Expand Down
14 changes: 2 additions & 12 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,17 +144,6 @@ def _from_data(cls, data: MutableMapping):
def _from_data_like_self(self, data: MutableMapping):
return self._from_data(data)

@classmethod
@_cudf_nvtx_annotate
def _from_columns(
cls,
columns: List[ColumnBase],
column_names: abc.Iterable[str],
):
"""Construct a `Frame` object from a list of columns."""
data = {name: columns[i] for i, name in enumerate(column_names)}
return cls._from_data(data)

@_cudf_nvtx_annotate
def _from_columns_like_self(
self,
Expand All @@ -169,7 +158,8 @@ def _from_columns_like_self(
"""
if column_names is None:
column_names = self._column_names
frame = self.__class__._from_columns(columns, column_names)
data = dict(zip(column_names, columns))
frame = self.__class__._from_data(data)
return frame._copy_type_metadata(self, override_dtypes=override_dtypes)

@_cudf_nvtx_annotate
Expand Down
22 changes: 16 additions & 6 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2111,9 +2111,13 @@ def diff(self, periods=1, axis=0):
def _scan_fill(self, method: str, limit: int) -> DataFrameOrSeries:
"""Internal implementation for `ffill` and `bfill`"""
values = self.grouping.values
result = self.obj._from_columns(
self._groupby.replace_nulls([*values._columns], method),
values._column_names,
result = self.obj._from_data(
dict(
zip(
values._column_names,
self._groupby.replace_nulls([*values._columns], method),
)
)
)
result = self._mimic_pandas_order(result)
return result._copy_type_metadata(values)
Expand Down Expand Up @@ -2305,9 +2309,15 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
else:
fill_value = [fill_value] * len(values._data)

result = self.obj.__class__._from_columns(
self._groupby.shift([*values._columns], periods, fill_value)[0],
values._column_names,
result = self.obj.__class__._from_data(
dict(
zip(
values._column_names,
self._groupby.shift(
[*values._columns], periods, fill_value
)[0],
)
)
)
result = self._mimic_pandas_order(result)
return result._copy_type_metadata(values)
Expand Down
12 changes: 6 additions & 6 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -800,22 +800,22 @@ def sort_values(
@_cudf_nvtx_annotate
def _gather(self, gather_map, nullify=False, check_bounds=True):
gather_map = cudf.core.column.as_column(gather_map)
return _dtype_to_index[self.dtype.type]._from_columns(
[self._values.take(gather_map, nullify, check_bounds)], [self.name]
return _dtype_to_index[self.dtype.type]._from_data(
{self.name: self._values.take(gather_map, nullify, check_bounds)}
)

@_cudf_nvtx_annotate
def _apply_boolean_mask(self, boolean_mask):
return _dtype_to_index[self.dtype.type]._from_columns(
[self._values.apply_boolean_mask(boolean_mask)], [self.name]
return _dtype_to_index[self.dtype.type]._from_data(
{self.name: self._values.apply_boolean_mask(boolean_mask)}
)

def repeat(self, repeats, axis=None):
return self._as_int_index().repeat(repeats, axis)

def _split(self, splits):
return _dtype_to_index[self.dtype.type]._from_columns(
[self._as_int_index()._split(splits)], [self.name]
return _dtype_to_index[self.dtype.type]._from_data(
{self.name: self._as_int_index()._split(splits)}
)

def _binaryop(self, other, op: str):
Expand Down
51 changes: 17 additions & 34 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,19 +291,27 @@ def _from_data_like_self(self, data: MutableMapping):
out._data._level_names = self._data._level_names
return out

@classmethod
@_cudf_nvtx_annotate
def _from_columns(
cls,
def _from_columns_like_self(
self,
columns: List[ColumnBase],
column_names: List[str],
column_names: Optional[abc.Iterable[str]] = None,
index_names: Optional[List[str]] = None,
):
"""Construct a `Frame` object from a list of columns.
*,
override_dtypes: Optional[abc.Iterable[Optional[Dtype]]] = None,
) -> Self:
"""Construct a `Frame` from a list of columns with metadata from self.
If `index_names` is set, the first `len(index_names)` columns are
used to construct the index of the frame.
If override_dtypes is provided then any non-None entry will be
used for the dtype of the matching column in preference to the
dtype of the column in self.
"""
if column_names is None:
column_names = self._column_names

data_columns = columns
index = None

Expand All @@ -316,36 +324,11 @@ def _from_columns(
else:
index.name = index_names[0]

out = super()._from_columns(data_columns, column_names)
data = dict(zip(column_names, data_columns))
frame = self.__class__._from_data(data)

if index is not None:
out._index = index

return out

@_cudf_nvtx_annotate
def _from_columns_like_self(
self,
columns: List[ColumnBase],
column_names: Optional[abc.Iterable[str]] = None,
index_names: Optional[List[str]] = None,
*,
override_dtypes: Optional[abc.Iterable[Optional[Dtype]]] = None,
) -> Self:
"""Construct a `Frame` from a list of columns with metadata from self.
If `index_names` is set, the first `len(index_names)` columns are
used to construct the index of the frame.
If override_dtypes is provided then any non-None entry will be
used for the dtype of the matching column in preference to the
dtype of the column in self.
"""
if column_names is None:
column_names = self._column_names
frame = self.__class__._from_columns(
columns, column_names, index_names
)
frame._index = index
return frame._copy_type_metadata(
self,
include_index=bool(index_names),
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/io/dlpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ def from_dlpack(pycapsule_obj):
"""

columns = libdlpack.from_dlpack(pycapsule_obj)
column_names = range(len(columns))
data = dict(enumerate(columns))

if len(columns) == 1:
return cudf.Series._from_columns(columns, column_names=column_names)
return cudf.Series._from_data(data)
else:
return cudf.DataFrame._from_columns(columns, column_names=column_names)
return cudf.DataFrame._from_data(data)


@ioutils.doc_to_dlpack()
Expand Down

0 comments on commit 70cdeec

Please sign in to comment.