diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 7bc036587af..f1c7d5f5a3a 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -2037,6 +2037,8 @@ def as_column( np_type = None try: if dtype is not None: + if is_categorical_dtype(dtype) or is_interval_dtype(dtype): + raise TypeError if is_list_dtype(dtype): data = pa.array(arbitrary) if type(data) not in (pa.ListArray, pa.NullArray): @@ -2044,6 +2046,11 @@ def as_column( "Cannot create list column from given data" ) return as_column(data, nan_as_null=nan_as_null) + elif isinstance( + dtype, cudf.StructDtype + ) and not isinstance(dtype, cudf.IntervalDtype): + data = pa.array(arbitrary, type=dtype.to_arrow()) + return as_column(data, nan_as_null=nan_as_null) if isinstance(dtype, cudf.core.dtypes.Decimal64Dtype): data = pa.array( arbitrary, @@ -2065,14 +2072,11 @@ def as_column( data ) dtype = pd.api.types.pandas_dtype(dtype) - if is_categorical_dtype(dtype) or is_interval_dtype(dtype): - raise TypeError + np_type = np.dtype(dtype).type + if np_type == np.bool_: + pa_type = pa.bool_() else: - np_type = np.dtype(dtype).type - if np_type == np.bool_: - pa_type = pa.bool_() - else: - pa_type = np_to_pa_dtype(np.dtype(dtype)) + pa_type = np_to_pa_dtype(np.dtype(dtype)) data = as_column( pa.array( arbitrary, diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index c02bf3d11a4..275a0d59fc3 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -7477,6 +7477,25 @@ def to_dict(self, orient="dict", into=dict): "`.to_pandas().to_dict()` to construct a Python dictionary." ) + def to_struct(self, name=None): + """ + Return a struct Series composed of the columns of the DataFrame. + Note that no copies of the data are made. + + Parameters + ---------- + name: optional + Name of the resulting Series + """ + col = cudf.core.column.build_struct_column( + names=self._data.names, children=self._data.columns, size=len(self) + ) + return cudf.Series._from_data( + cudf.core.column_accessor.ColumnAccessor({name: col}), + index=self.index, + name=name, + ) + def keys(self): """ Get the columns. diff --git a/python/cudf/cudf/tests/test_struct.py b/python/cudf/cudf/tests/test_struct.py index b38fd3a5b6c..8c226ef1680 100644 --- a/python/cudf/cudf/tests/test_struct.py +++ b/python/cudf/cudf/tests/test_struct.py @@ -164,3 +164,17 @@ def test_struct_scalar_host_construction(data): def test_struct_scalar_null(): slr = cudf.Scalar(cudf.NA, dtype=StructDtype) assert slr.device_value.value is cudf.NA + + +def test_dataframe_to_struct(): + df = cudf.DataFrame() + expect = cudf.Series(dtype=cudf.StructDtype({})) + got = df.to_struct() + assert_eq(expect, got) + + df = cudf.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]}) + expect = cudf.Series( + [{"a": 1, "b": "x"}, {"a": 2, "b": "y"}, {"a": 3, "b": "z"}] + ) + got = df.to_struct() + assert_eq(expect, got)