Skip to content

Commit

Permalink
Enable transpose for string columns in cudf python (#9937)
Browse files Browse the repository at this point in the history
Fixes: #9930 

This PR enables string columns to transpose in cudf python.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Christopher Harris (https://github.com/cwharris)

URL: #9937
  • Loading branch information
galipremsagar authored Jan 4, 2022
1 parent 36fa5f3 commit b1ae789
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 7 deletions.
11 changes: 5 additions & 6 deletions python/cudf/cudf/_lib/transpose.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,19 @@ def transpose(source):
return source

cats = None
dtype = source._columns[0].dtype
columns = source._columns
dtype = columns[0].dtype

if is_categorical_dtype(dtype):
if any(not is_categorical_dtype(c.dtype) for c in source._columns):
if any(not is_categorical_dtype(c.dtype) for c in columns):
raise ValueError('Columns must all have the same dtype')
cats = list(c.categories for c in source._columns)
cats = list(c.categories for c in columns)
cats = cudf.core.column.concat_columns(cats).unique()
source = cudf.core.frame.Frame(index=source._index, data=[
(name, col._set_categories(cats, is_unique=True).codes)
for name, col in source._data.items()
])
elif dtype.kind in 'OU':
raise NotImplementedError('Cannot transpose string columns')
elif any(c.dtype != dtype for c in source._columns):
elif any(c.dtype != dtype for c in columns):
raise ValueError('Columns must all have the same dtype')

cdef pair[unique_ptr[column], table_view] c_result
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1769,7 +1769,7 @@ def test_dataframe_shape_empty():

@pytest.mark.parametrize("num_cols", [1, 2, 10])
@pytest.mark.parametrize("num_rows", [1, 2, 20])
@pytest.mark.parametrize("dtype", dtypes)
@pytest.mark.parametrize("dtype", dtypes + ["object"])
@pytest.mark.parametrize("nulls", ["none", "some", "all"])
def test_dataframe_transpose(nulls, num_cols, num_rows, dtype):
# In case of `bool` dtype: pandas <= 1.2.5 type-casts
Expand Down

0 comments on commit b1ae789

Please sign in to comment.