Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Miscellaneous column cleanup #9370

Merged
merged 17 commits into from
Oct 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 0 additions & 41 deletions python/cudf/cudf/_lib/copying.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -247,47 +247,6 @@ def scatter(object source, Column scatter_map, Column target_column,
return next(iter(data.values()))


def _reverse_column(Column source_column):
cdef column_view reverse_column_view = source_column.view()

cdef unique_ptr[column] c_result

with nogil:
c_result = move(cpp_copying.reverse(
reverse_column_view
))

return Column.from_unique_ptr(
move(c_result)
)


def _reverse_table(source_table):
cdef table_view reverse_table_view = table_view_from_columns(source_table)

cdef unique_ptr[table] c_result
with nogil:
c_result = move(cpp_copying.reverse(
reverse_table_view
))

return data_from_unique_ptr(
move(c_result),
column_names=source_table._column_names,
index_names=source_table._index_names
)


def reverse(object source):
"""
Reversing a column or a table
"""
if isinstance(source, Column):
return _reverse_column(source)
else:
return _reverse_table(source)


def column_empty_like(Column input_column):

cdef column_view input_column_view = input_column.view()
Expand Down
8 changes: 0 additions & 8 deletions python/cudf/cudf/_lib/cpp/copying.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,6 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
out_of_bounds_policy policy
) except +

cdef unique_ptr[table] reverse (
const table_view& source_table
) except +

cdef unique_ptr[column] reverse (
const column_view& source_column
) except +

cdef unique_ptr[column] shift(
const column_view& input,
size_type offset,
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/transpose.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def transpose(source):
if cats is not None:
data= [
(name, cudf.core.column.column.build_categorical_column(
codes=cudf.core.column.column.as_column(
codes=cudf.core.column.column.build_column(
col.base_data, dtype=col.dtype),
mask=col.base_mask,
size=col.size,
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/_internals/where.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ def where(
):
result = cudf.core.column.build_categorical_column(
categories=frame._data[column_name].categories,
codes=cudf.core.column.as_column(
codes=cudf.core.column.build_column(
result.base_data, dtype=result.dtype
),
mask=result.base_mask,
Expand Down Expand Up @@ -368,7 +368,7 @@ def where(
cudf.core.column.CategoricalColumn,
frame._data[frame.name],
).categories,
codes=cudf.core.column.as_column(
codes=cudf.core.column.build_column(
result.base_data, dtype=result.dtype
),
mask=result.base_mask,
Expand Down
36 changes: 21 additions & 15 deletions python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,9 @@ def deserialize(cls, header: dict, frames: list) -> CategoricalColumn:
data=None,
dtype=dtype,
mask=mask,
children=(column.as_column(data.base_data, dtype=data.dtype),),
children=(
column.build_column(data.base_data, dtype=data.dtype),
),
),
)

Expand Down Expand Up @@ -859,7 +861,7 @@ def slice(
codes = self.codes.slice(start, stop, stride)
return cudf.core.column.build_categorical_column(
categories=self.categories,
codes=cudf.core.column.as_column(
codes=cudf.core.column.build_column(
codes.base_data, dtype=codes.dtype
),
mask=codes.base_mask,
Expand Down Expand Up @@ -910,7 +912,7 @@ def sort_by_values(
codes, inds = self.as_numerical.sort_by_values(ascending, na_position)
col = column.build_categorical_column(
categories=self.dtype.categories._values,
codes=column.as_column(codes.base_data, dtype=codes.dtype),
codes=column.build_column(codes.base_data, dtype=codes.dtype),
mask=codes.base_mask,
size=codes.size,
ordered=self.dtype.ordered,
Expand Down Expand Up @@ -1001,7 +1003,7 @@ def unique(self) -> CategoricalColumn:
codes = self.as_numerical.unique()
return column.build_categorical_column(
categories=self.categories,
codes=column.as_column(codes.base_data, dtype=codes.dtype),
codes=column.build_column(codes.base_data, dtype=codes.dtype),
mask=codes.base_mask,
offset=codes.offset,
size=codes.size,
Expand Down Expand Up @@ -1044,7 +1046,7 @@ def find_and_replace(
df = cudf.DataFrame({"old": to_replace_col, "new": replacement_col})
df = df.drop_duplicates(subset=["old"], keep="last", ignore_index=True)
if df._data["old"].null_count == 1:
fill_value = df._data["new"][df._data["old"].isna()][0]
fill_value = df._data["new"][df._data["old"].isnull()][0]
if fill_value in self.categories:
replaced = self.fillna(fill_value)
else:
Expand All @@ -1060,7 +1062,7 @@ def find_and_replace(
else:
replaced = self
if df._data["new"].null_count > 0:
drop_values = df._data["old"][df._data["new"].isna()]
drop_values = df._data["old"][df._data["new"].isnull()]
cur_categories = replaced.categories
new_categories = cur_categories[
~cudf.Series(cur_categories.isin(drop_values))
Expand Down Expand Up @@ -1096,7 +1098,7 @@ def find_and_replace(
# those categories don't exist anymore
# Resetting the index creates a column 'index' that associates
# the original integers to the new labels
bmask = new_cats._data["cats"].notna()
bmask = new_cats._data["cats"].notnull()
new_cats = cudf.DataFrame(
{"cats": new_cats._data["cats"].apply_boolean_mask(bmask)}
).reset_index()
Expand All @@ -1123,7 +1125,7 @@ def find_and_replace(

return column.build_categorical_column(
categories=new_cats["cats"],
codes=column.as_column(output.base_data, dtype=output.dtype),
codes=column.build_column(output.base_data, dtype=output.dtype),
mask=output.base_mask,
offset=output.offset,
size=output.size,
Expand Down Expand Up @@ -1205,7 +1207,7 @@ def fillna(

result = column.build_categorical_column(
categories=self.dtype.categories._values,
codes=column.as_column(result.base_data, dtype=result.dtype),
codes=column.build_column(result.base_data, dtype=result.dtype),
offset=result.offset,
size=result.size,
mask=result.base_mask,
Expand Down Expand Up @@ -1301,7 +1303,7 @@ def copy(self, deep: bool = True) -> CategoricalColumn:

return column.build_categorical_column(
categories=copied_cat,
codes=column.as_column(
codes=column.build_column(
copied_col.base_data, dtype=copied_col.dtype
),
offset=copied_col.offset,
Expand All @@ -1312,7 +1314,7 @@ def copy(self, deep: bool = True) -> CategoricalColumn:
else:
return column.build_categorical_column(
categories=self.dtype.categories._values,
codes=column.as_column(
codes=column.build_column(
self.codes.base_data, dtype=self.codes.dtype
),
mask=self.base_mask,
Expand Down Expand Up @@ -1374,7 +1376,9 @@ def _concat(objs: MutableSequence[CategoricalColumn]) -> CategoricalColumn:

return column.build_categorical_column(
categories=column.as_column(cats),
codes=column.as_column(codes_col.base_data, dtype=codes_col.dtype),
codes=column.build_column(
codes_col.base_data, dtype=codes_col.dtype
),
mask=codes_col.base_mask,
size=codes_col.size,
offset=codes_col.offset,
Expand All @@ -1386,7 +1390,7 @@ def _with_type_metadata(
if isinstance(dtype, CategoricalDtype):
return column.build_categorical_column(
categories=dtype.categories._values,
codes=column.as_column(
codes=column.build_column(
self.codes.base_data, dtype=self.codes.dtype
),
mask=self.codes.base_mask,
Expand Down Expand Up @@ -1522,7 +1526,9 @@ def _set_categories(
# codes can't have masks, so take mask out before moving in
return column.build_categorical_column(
categories=new_cats,
codes=column.as_column(new_codes.base_data, dtype=new_codes.dtype),
codes=column.build_column(
new_codes.base_data, dtype=new_codes.dtype
),
mask=new_codes.base_mask,
size=new_codes.size,
offset=new_codes.offset,
Expand Down Expand Up @@ -1609,7 +1615,7 @@ def pandas_categorical_as_column(

return column.build_categorical_column(
categories=categorical.categories,
codes=column.as_column(codes.base_data, dtype=codes.dtype),
codes=column.build_column(codes.base_data, codes.dtype),
size=codes.size,
mask=mask,
ordered=categorical.ordered,
Expand Down
Loading