diff --git a/cpp/src/arrow/chunked_array.cc b/cpp/src/arrow/chunked_array.cc index dd6aa51534fcb..a30ece2f5e43f 100644 --- a/cpp/src/arrow/chunked_array.cc +++ b/cpp/src/arrow/chunked_array.cc @@ -49,6 +49,7 @@ ChunkedArray::ChunkedArray(ArrayVector chunks, std::shared_ptr type) type_(std::move(type)), length_(0), null_count_(0), + device_type_(DeviceAllocationType::kCPU), chunk_resolver_{chunks_} { if (type_ == nullptr) { ARROW_CHECK_GT(chunks_.size(), 0) @@ -56,9 +57,14 @@ ChunkedArray::ChunkedArray(ArrayVector chunks, std::shared_ptr type) type_ = chunks_[0]->type(); } + if (chunks_.size() > 0) { + device_type_ = chunks[0]->device_type(); + } + for (const auto& chunk : chunks_) { length_ += chunk->length(); null_count_ += chunk->null_count(); + DCHECK_EQ(device_type_, chunk->device_type()); } } @@ -106,6 +112,9 @@ bool ChunkedArray::Equals(const ChunkedArray& other, const EqualOptions& opts) c if (null_count_ != other.null_count()) { return false; } + if (device_type_ != other.device_type()) { + return false; + } // We cannot toggle check_metadata here yet, so we don't check it if (!type_->Equals(*other.type_, /*check_metadata=*/false)) { return false; @@ -161,6 +170,9 @@ bool ChunkedArray::ApproxEquals(const ChunkedArray& other, if (null_count_ != other.null_count()) { return false; } + if (device_type_ != other.device_type()) { + return false; + } // We cannot toggle check_metadata here yet, so we don't check it if (!type_->Equals(*other.type_, /*check_metadata=*/false)) { return false; diff --git a/cpp/src/arrow/chunked_array.h b/cpp/src/arrow/chunked_array.h index c65b6cb6e227f..8c9647271c5eb 100644 --- a/cpp/src/arrow/chunked_array.h +++ b/cpp/src/arrow/chunked_array.h @@ -26,6 +26,7 @@ #include "arrow/chunk_resolver.h" #include "arrow/compare.h" #include "arrow/device_allocation_type_set.h" +#include "arrow/device.h" #include "arrow/result.h" #include "arrow/status.h" #include "arrow/type_fwd.h" @@ -190,11 +191,21 @@ class ARROW_EXPORT ChunkedArray { /// \return Status Status ValidateFull() const; + /// \brief Return the device_type that this chunked array's data is allocated + /// on. + /// + /// This just delegates to calling device_type on the underlying ArrayData + /// object which backs this Array. + /// + /// \return DeviceAllocationType + DeviceAllocationType device_type() const { return device_type_; } + protected: ArrayVector chunks_; std::shared_ptr type_; int64_t length_; int64_t null_count_; + DeviceAllocationType device_type_; private: template diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index c2346750a196f..8d6455c0c44c0 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -983,6 +983,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: CResult[vector[shared_ptr[CChunkedArray]]] Flatten(CMemoryPool* pool) + CDeviceAllocationType device_type() + CStatus Validate() const CStatus ValidateFull() const diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd index 5c3d981c3adc7..8694c2230c156 100644 --- a/python/pyarrow/lib.pxd +++ b/python/pyarrow/lib.pxd @@ -513,6 +513,7 @@ cdef class ChunkedArray(_PandasConvertible): cdef void init(self, const shared_ptr[CChunkedArray]& chunked_array) cdef getitem(self, int64_t i) + cdef void _assert_cpu(self) except * cdef class _Tabular(_PandasConvertible): diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index fff47373cb991..9d3af3446edf7 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -1410,6 +1410,28 @@ cdef class ChunkedArray(_PandasConvertible): self.init(c_chunked_array) return self + @property + def device_type(self): + """ + The device type where the chunks in the ChunkedArray reside. + + Returns + ------- + DeviceAllocationType + """ + return _wrap_device_allocation_type(self.sp_chunked_array.get().device_type()) + + @property + def is_cpu(self): + """ + Whether the ChunkedArrays's chunks are CPU-accessible. + """ + return self.device_type == DeviceAllocationType.CPU + + cdef void _assert_cpu(self) except *: + if self.sp_chunked_array.get().device_type() != CDeviceAllocationType_kCPU: + raise NotImplementedError("Implemented only for data on CPU device") + def chunked_array(arrays, type=None): """