Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Construct pylibcudf columns from objects supporting __cuda_array_interface__ #15615

62 changes: 62 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/column.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,42 @@ cdef class Column:
c_result = move(make_column_from_scalar(dereference(c_scalar), size))
return Column.from_libcudf(move(c_result))

@staticmethod
def from_cuda_array_interface_obj(object obj):
"""Create a Column from an object with a CUDA array interface.

Parameters
----------
obj : object
The object with the CUDA array interface to create a column from.

Returns
-------
Column
A Column containing the data from the CUDA array interface.

Note that data is not copied when creating the column. The caller is
responsible for ensuring the data is not mutated unexpectedly while the
column is in use.
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved

brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
"""
if not hasattr(obj, '__cuda_array_interface__'):
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError("Object does not have a CUDA array interface")

data = gpumemoryview(obj)
iface = data.__cuda_array_interface__()
data_type = _data_type_from_iface(iface)
size = iface['shape'][0]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue: Someone needs to check that the object being described is one-dimensional and contiguous. So we should probably:

Suggested change
size = iface['shape'][0]
(size,) = iface['shape']

And also assert that strides are appropriate (either None or (1,) I think?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We implemented something similar for buffer here, but it's in the cuDF python namespace. Would it be overkill to move the impl to a cython utility we can consume from both places?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, because we need to check here too, otherwise we're potentially producing a nonsense result.

return Column(
data_type,
size,
data,
None,
wence- marked this conversation as resolved.
Show resolved Hide resolved
0,
0,
[]
)

cpdef DataType type(self):
"""The type of data in the column."""
return self._data_type
Expand Down Expand Up @@ -296,3 +332,29 @@ cdef class ListColumnView:
cpdef offsets(self):
"""The offsets column of the underlying list column."""
return self._column.child(1)


def _data_type_from_iface(iface):
wence- marked this conversation as resolved.
Show resolved Hide resolved
typestr = iface['typestr'][1:]
mapping = {
'u1': type_id.UINT8,
'u2': type_id.UINT16,
'u4': type_id.UINT32,
'u8': type_id.UINT64,
'i1': type_id.INT8,
'i2': type_id.INT16,
'i4': type_id.INT32,
'i8': type_id.INT64,
'f4': type_id.FLOAT32,
'f8': type_id.FLOAT64,
'b1': type_id.BOOL8,
'M8[s]': type_id.TIMESTAMP_SECONDS,
'M8[ms]': type_id.TIMESTAMP_MILLISECONDS,
'M8[us]': type_id.TIMESTAMP_MICROSECONDS,
'M8[ns]': type_id.TIMESTAMP_NANOSECONDS,
'm8[s]': type_id.DURATION_SECONDS,
'm8[ms]': type_id.DURATION_MILLISECONDS,
'm8[us]': type_id.DURATION_MICROSECONDS,
'm8[ns]': type_id.DURATION_NANOSECONDS,
}
return DataType(mapping.get(typestr, None))
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
49 changes: 49 additions & 0 deletions python/cudf/cudf/pylibcudf_tests/test_column_from_device.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

import pyarrow as pa
import pytest
from utils import assert_column_eq

import cudf
from cudf._lib import pylibcudf as plc

VALID_TYPES = [
pa.int8(),
pa.int16(),
pa.int32(),
pa.int64(),
pa.uint8(),
pa.uint16(),
pa.uint32(),
pa.uint64(),
pa.float32(),
pa.float64(),
pa.bool_(),
pa.timestamp("s"),
pa.timestamp("ms"),
pa.timestamp("us"),
pa.timestamp("ns"),
pa.duration("s"),
pa.duration("ms"),
pa.duration("us"),
pa.duration("ns"),
]


@pytest.fixture(scope="module", params=VALID_TYPES, ids=repr)
def valid_type(request):
return request.param


@pytest.fixture(scope="module")
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
def valid_column(valid_type):
return pa.array([1, 2, 3], type=valid_type)


def test_from_cuda_array_interface(valid_column):
col = plc.column.Column.from_cuda_array_interface_obj(
cudf.Series(valid_column)
)
expect = valid_column

assert_column_eq(col, expect)
Loading