Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Index hierarchy #9039

Merged
merged 30 commits into from
Aug 31, 2021
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
19afce0
Change inheritance structure for Index types and get all index/multii…
vyasr Aug 12, 2021
c61d176
Redirect missing RangeIndex methods to Int64Index to recover all othe…
vyasr Aug 12, 2021
d11768d
Move concat impl from BaseIndex to GenericIndex and change inheritanc…
vyasr Aug 13, 2021
74880f3
Fix mypy errors that aren't outright incompatibilities.
vyasr Aug 13, 2021
03eee42
Fix some more mypy errors.
vyasr Aug 13, 2021
40dcfc4
Move _from_data from BaseIndex to Index.
vyasr Aug 13, 2021
de56376
Fix inconsistencies in _copy_type_metadata.
vyasr Aug 13, 2021
1f87727
Move drop_duplicates to avoid type issues.
vyasr Aug 13, 2021
54e22e9
Move Frame._concat to DataFrame._concat since it's a DataFrame-specif…
vyasr Aug 13, 2021
fe4ab3b
Move iter method.
vyasr Aug 13, 2021
feaafa7
Instantiate accessors explicitly in necessary classes.
vyasr Aug 13, 2021
535e1f0
Fix most errors.
vyasr Aug 13, 2021
40066c5
Force binding of binop arg.
vyasr Aug 13, 2021
8ad1d7a
Fix typo in monkey-patched operator.
vyasr Aug 13, 2021
0eaca29
Minor cleanup.
vyasr Aug 14, 2021
de03b37
Standardize RangeIndex->Int64Index conversion.
vyasr Aug 14, 2021
e55e049
Add missing rmul operator.
vyasr Aug 14, 2021
9c012ec
Standardize _from_data implementation and move the type-determining v…
vyasr Aug 16, 2021
23e3d7c
Make as_index rely on _index_from_data when passed a column.
vyasr Aug 16, 2021
ab51240
Fix minor typo.
vyasr Aug 16, 2021
899bd99
Allow index binops to return different output than input data type an…
vyasr Aug 16, 2021
c4acc92
Forward NotImplemented through correctly.
vyasr Aug 17, 2021
f0a0587
Merge remote-tracking branch 'origin/branch-21.10' into refactor/inde…
vyasr Aug 23, 2021
1e6d0e2
Fix newly introduced use of Index._from_data from upstream merging.
vyasr Aug 24, 2021
fd78b63
Move BaseIndex into a separate module.
vyasr Aug 24, 2021
ec2dfae
Remove _data property in favor of a simple type annotation in _BaseIn…
vyasr Aug 27, 2021
0c4ba67
Remove redundant docstring.
vyasr Aug 27, 2021
5064dfe
Add test of RangeIndex.get_loc.
vyasr Aug 27, 2021
0d029f0
Implement get_loc for RangeIndex.
vyasr Aug 27, 2021
c17b218
Merge remote-tracking branch 'origin/branch-21.10' into refactor/inde…
vyasr Aug 31, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 16 additions & 10 deletions python/cudf/cudf/_lib/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,12 @@ cdef class GroupBy:
c_grouped_values = move(c_groups.values)
c_group_offsets = c_groups.offsets

grouped_keys = cudf.Index._from_data(*data_from_unique_ptr(
move(c_grouped_keys),
column_names=range(c_grouped_keys.get()[0].num_columns())
))
grouped_keys = cudf.core.index._index_from_data(
*data_from_unique_ptr(
move(c_grouped_keys),
column_names=range(c_grouped_keys.get()[0].num_columns())
)
)
grouped_values = data_from_unique_ptr(
move(c_grouped_values),
index_names=values._index_names,
Expand Down Expand Up @@ -186,7 +188,8 @@ cdef class GroupBy:
Column.from_unique_ptr(move(c_result.second[i].results[j]))
)

return result_data, cudf.Index._from_data(grouped_keys)
return result_data, cudf.core.index._index_from_data(
grouped_keys)

def scan_internal(self, Table values, aggregations):
from cudf.core.column_accessor import ColumnAccessor
Expand Down Expand Up @@ -264,7 +267,8 @@ cdef class GroupBy:
Column.from_unique_ptr(move(c_result.second[i].results[j]))
)

return result_data, cudf.Index._from_data(grouped_keys)
return result_data, cudf.core.index._index_from_data(
grouped_keys)

def aggregate(self, Table values, aggregations):
"""
Expand Down Expand Up @@ -311,10 +315,12 @@ cdef class GroupBy:
self.c_obj.get()[0].shift(view, offsets, c_fill_values)
)

grouped_keys = cudf.Index._from_data(*data_from_unique_ptr(
move(c_result.first),
column_names=self.keys._column_names
))
grouped_keys = cudf.core.index._index_from_data(
*data_from_unique_ptr(
move(c_result.first),
column_names=self.keys._column_names
)
)

shifted, _ = data_from_unique_ptr(
move(c_result.second), column_names=values._column_names
Expand Down
10 changes: 9 additions & 1 deletion python/cudf/cudf/_lib/table.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,17 @@ cdef class Table:
"""
if data is None:
data = {}
self._data = cudf.core.column_accessor.ColumnAccessor(data)
self._column_accessor = cudf.core.column_accessor.ColumnAccessor(data)
self._index = index

@property
def _data(self) -> cudf.core.column_accessor.ColumnAccessor:
return self._column_accessor

@_data.setter
def _data(self, value):
self._column_accessor = value

vyasr marked this conversation as resolved.
Show resolved Hide resolved
@property
def _num_columns(self):
return len(self._data)
Expand Down
5 changes: 3 additions & 2 deletions python/cudf/cudf/_lib/utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ cdef data_from_unique_ptr(
# Frame factories we may want to look for a less dissonant approach
# that does not impose performance penalties. The same applies to
# data_from_table_view below.
cudf.Index._from_data(
cudf.core.index._index_from_data(
{
name: columns[i]
for i, name in enumerate(index_names)
Expand Down Expand Up @@ -301,7 +301,8 @@ cdef data_from_table_view(
)
)
column_idx += 1
index = cudf.Index._from_data(dict(zip(index_names, index_columns)))
index = cudf.core.index._index_from_data(
dict(zip(index_names, index_columns)))

# Construct the data dict
cdef size_type source_column_idx = 0
Expand Down
3 changes: 3 additions & 0 deletions python/cudf/cudf/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,6 @@

DataFrameOrSeries = Union["cudf.Series", "cudf.DataFrame"]
SeriesOrIndex = Union["cudf.Series", "cudf.core.index.BaseIndex"]
SeriesOrSingleColumnIndex = Union[
"cudf.Series", "cudf.core.index.GenericIndex"
]
2 changes: 1 addition & 1 deletion python/cudf/cudf/api/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def wrapped_func(obj):


def _union_categoricals(
to_union: List[Union[cudf.Series, cudf.Index]],
to_union: List[Union[cudf.Series, cudf.CategoricalIndex]],
sort_categories: bool = False,
ignore_order: bool = False,
):
Expand Down
Loading