Skip to content

Commit

Permalink
apacheGH-43728:[Python] ChunkedArray fails gracefully on non-cpu devices
Browse files Browse the repository at this point in the history
  • Loading branch information
danepitkin committed Sep 3, 2024
1 parent c455d6b commit 6c9dc29
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 0 deletions.
12 changes: 12 additions & 0 deletions cpp/src/arrow/chunked_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,22 @@ ChunkedArray::ChunkedArray(ArrayVector chunks, std::shared_ptr<DataType> type)
type_(std::move(type)),
length_(0),
null_count_(0),
device_type_(DeviceAllocationType::kCPU),
chunk_resolver_{chunks_} {
if (type_ == nullptr) {
ARROW_CHECK_GT(chunks_.size(), 0)
<< "cannot construct ChunkedArray from empty vector and omitted type";
type_ = chunks_[0]->type();
}

if (chunks_.size() > 0) {
device_type_ = chunks[0]->device_type();
}

for (const auto& chunk : chunks_) {
length_ += chunk->length();
null_count_ += chunk->null_count();
DCHECK_EQ(device_type_, chunk->device_type());
}
}

Expand Down Expand Up @@ -106,6 +112,9 @@ bool ChunkedArray::Equals(const ChunkedArray& other, const EqualOptions& opts) c
if (null_count_ != other.null_count()) {
return false;
}
if (device_type_ != other.device_type()) {
return false;
}
// We cannot toggle check_metadata here yet, so we don't check it
if (!type_->Equals(*other.type_, /*check_metadata=*/false)) {
return false;
Expand Down Expand Up @@ -161,6 +170,9 @@ bool ChunkedArray::ApproxEquals(const ChunkedArray& other,
if (null_count_ != other.null_count()) {
return false;
}
if (device_type_ != other.device_type()) {
return false;
}
// We cannot toggle check_metadata here yet, so we don't check it
if (!type_->Equals(*other.type_, /*check_metadata=*/false)) {
return false;
Expand Down
11 changes: 11 additions & 0 deletions cpp/src/arrow/chunked_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "arrow/chunk_resolver.h"
#include "arrow/compare.h"
#include "arrow/device_allocation_type_set.h"
#include "arrow/device.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/type_fwd.h"
Expand Down Expand Up @@ -190,11 +191,21 @@ class ARROW_EXPORT ChunkedArray {
/// \return Status
Status ValidateFull() const;

/// \brief Return the device_type that this chunked array's data is allocated
/// on.
///
/// This just delegates to calling device_type on the underlying ArrayData
/// object which backs this Array.
///
/// \return DeviceAllocationType
DeviceAllocationType device_type() const { return device_type_; }

protected:
ArrayVector chunks_;
std::shared_ptr<DataType> type_;
int64_t length_;
int64_t null_count_;
DeviceAllocationType device_type_;

private:
template <typename T, typename V>
Expand Down
2 changes: 2 additions & 0 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -983,6 +983,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:

CResult[vector[shared_ptr[CChunkedArray]]] Flatten(CMemoryPool* pool)

CDeviceAllocationType device_type()

CStatus Validate() const
CStatus ValidateFull() const

Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/lib.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,7 @@ cdef class ChunkedArray(_PandasConvertible):

cdef void init(self, const shared_ptr[CChunkedArray]& chunked_array)
cdef getitem(self, int64_t i)
cdef void _assert_cpu(self) except *


cdef class _Tabular(_PandasConvertible):
Expand Down
22 changes: 22 additions & 0 deletions python/pyarrow/table.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -1410,6 +1410,28 @@ cdef class ChunkedArray(_PandasConvertible):
self.init(c_chunked_array)
return self

@property
def device_type(self):
"""
The device type where the chunks in the ChunkedArray reside.
Returns
-------
DeviceAllocationType
"""
return _wrap_device_allocation_type(self.sp_chunked_array.get().device_type())

@property
def is_cpu(self):
"""
Whether the ChunkedArrays's chunks are CPU-accessible.
"""
return self.device_type == DeviceAllocationType.CPU

cdef void _assert_cpu(self) except *:
if self.sp_chunked_array.get().device_type() != CDeviceAllocationType_kCPU:
raise NotImplementedError("Implemented only for data on CPU device")


def chunked_array(arrays, type=None):
"""
Expand Down

0 comments on commit 6c9dc29

Please sign in to comment.