diff --git a/cpp/src/arrow/chunk_resolver.cc b/cpp/src/arrow/chunk_resolver.cc index bda6b17810299..ca74ffa06c820 100644 --- a/cpp/src/arrow/chunk_resolver.cc +++ b/cpp/src/arrow/chunk_resolver.cc @@ -26,7 +26,7 @@ #include "arrow/array.h" #include "arrow/record_batch.h" -namespace arrow::internal { +namespace arrow { namespace { template @@ -167,4 +167,4 @@ void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint64_t* logical_i logical_index_vec, out_chunk_location_vec, chunk_hint); } -} // namespace arrow::internal +} // namespace arrow diff --git a/cpp/src/arrow/chunk_resolver.h b/cpp/src/arrow/chunk_resolver.h index 4a5e27c05361f..f40514f980a8c 100644 --- a/cpp/src/arrow/chunk_resolver.h +++ b/cpp/src/arrow/chunk_resolver.h @@ -27,12 +27,12 @@ #include "arrow/type_fwd.h" #include "arrow/util/macros.h" -namespace arrow::internal { +namespace arrow { struct ChunkResolver; template -struct TypedChunkLocation { +struct ARROW_EXPORT TypedChunkLocation { /// \brief Index of the chunk in the array of chunks /// /// The value is always in the range `[0, chunks.size()]`. `chunks.size()` is used @@ -41,7 +41,7 @@ struct TypedChunkLocation { /// \brief Index of the value in the chunk /// - /// The value is UNDEFINED if chunk_index >= chunks.size() + /// The value is UNDEFINED if `chunk_index >= chunks.size()` IndexType index_in_chunk = 0; TypedChunkLocation() = default; @@ -61,7 +61,7 @@ using ChunkLocation = TypedChunkLocation; /// \brief An utility that incrementally resolves logical indices into /// physical indices in a chunked array. -struct ARROW_EXPORT ChunkResolver { +class ARROW_EXPORT ChunkResolver { private: /// \brief Array containing `chunks.size() + 1` offsets. /// @@ -75,8 +75,16 @@ struct ARROW_EXPORT ChunkResolver { mutable std::atomic cached_chunk_; public: + /// \brief Initialize from an `ArrayVector`. explicit ChunkResolver(const ArrayVector& chunks) noexcept; + + /// \brief Initialize from a vector of raw `Array` pointers. explicit ChunkResolver(const std::vector& chunks) noexcept; + + /// \brief Initialize from a `RecordBatchVector`. + /// + /// Because all `Array`s in a `RecordBatch` must have the same length, this + /// can be useful for iterating over multiple columns simultaneously. explicit ChunkResolver(const RecordBatchVector& batches) noexcept; /// \brief Construct a ChunkResolver from a vector of chunks.size() + 1 offsets. @@ -115,11 +123,11 @@ struct ARROW_EXPORT ChunkResolver { /// The returned ChunkLocation contains the chunk index and the within-chunk index /// equivalent to the logical index. /// - /// \pre index >= 0 - /// \post location.chunk_index in [0, chunks.size()] + /// \pre `index >= 0` + /// \post `location.chunk_index` in `[0, chunks.size()]` /// \param index The logical index to resolve /// \return ChunkLocation with a valid chunk_index if index is within - /// bounds, or with chunk_index == chunks.size() if logical index is + /// bounds, or with `chunk_index == chunks.size()` if logical index is /// `>= chunked_array.length()`. inline ChunkLocation Resolve(int64_t index) const { const auto cached_chunk = cached_chunk_.load(std::memory_order_relaxed); @@ -133,13 +141,13 @@ struct ARROW_EXPORT ChunkResolver { /// The returned ChunkLocation contains the chunk index and the within-chunk index /// equivalent to the logical index. /// - /// \pre index >= 0 - /// \post location.chunk_index in [0, chunks.size()] + /// \pre `index >= 0` + /// \post `location.chunk_index` in `[0, chunks.size()]` /// \param index The logical index to resolve /// \param hint ChunkLocation{} or the last ChunkLocation returned by /// this ChunkResolver. /// \return ChunkLocation with a valid chunk_index if index is within - /// bounds, or with chunk_index == chunks.size() if logical index is + /// bounds, or with `chunk_index == chunks.size()` if logical index is /// `>= chunked_array.length()`. inline ChunkLocation ResolveWithHint(int64_t index, ChunkLocation hint) const { assert(hint.chunk_index < static_cast(offsets_.size())); @@ -281,4 +289,4 @@ struct ARROW_EXPORT ChunkResolver { } }; -} // namespace arrow::internal +} // namespace arrow diff --git a/cpp/src/arrow/chunk_resolver_benchmark.cc b/cpp/src/arrow/chunk_resolver_benchmark.cc index 0756de3fbe930..72b06d4e36fb1 100644 --- a/cpp/src/arrow/chunk_resolver_benchmark.cc +++ b/cpp/src/arrow/chunk_resolver_benchmark.cc @@ -28,8 +28,8 @@ namespace arrow { -using internal::ChunkResolver; -using internal::TypedChunkLocation; +using ChunkResolver; +using TypedChunkLocation; namespace { diff --git a/cpp/src/arrow/chunked_array.h b/cpp/src/arrow/chunked_array.h index c65b6cb6e227f..02bcd0f9026bc 100644 --- a/cpp/src/arrow/chunked_array.h +++ b/cpp/src/arrow/chunked_array.h @@ -199,7 +199,7 @@ class ARROW_EXPORT ChunkedArray { private: template friend class ::arrow::stl::ChunkedArrayIterator; - internal::ChunkResolver chunk_resolver_; + ChunkResolver chunk_resolver_; ARROW_DISALLOW_COPY_AND_ASSIGN(ChunkedArray); }; diff --git a/cpp/src/arrow/chunked_array_test.cc b/cpp/src/arrow/chunked_array_test.cc index f98dde689c237..62aff78b48035 100644 --- a/cpp/src/arrow/chunked_array_test.cc +++ b/cpp/src/arrow/chunked_array_test.cc @@ -36,9 +36,9 @@ namespace arrow { -using internal::ChunkLocation; -using internal::ChunkResolver; -using internal::TypedChunkLocation; +using arrow::ChunkLocation; +using arrow::ChunkResolver; +using arrow::TypedChunkLocation; class TestChunkedArray : public ::testing::Test { protected: diff --git a/cpp/src/arrow/compute/kernels/chunked_internal.h b/cpp/src/arrow/compute/kernels/chunked_internal.h index 2b72e0ab3109e..f7cb615f3ed81 100644 --- a/cpp/src/arrow/compute/kernels/chunked_internal.h +++ b/cpp/src/arrow/compute/kernels/chunked_internal.h @@ -52,7 +52,7 @@ struct ResolvedChunk { class ChunkedArrayResolver { private: - ::arrow::internal::ChunkResolver resolver_; + ChunkResolver resolver_; std::vector chunks_; public: diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc index 8766ca3baac96..15fafa1f4d219 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_sort.cc @@ -23,8 +23,8 @@ namespace arrow { +using arrow::ChunkLocation; using internal::checked_cast; -using internal::ChunkLocation; namespace compute { namespace internal { @@ -852,7 +852,7 @@ class TableSorter { const RecordBatchVector batches_; const SortOptions& options_; const NullPlacement null_placement_; - const ::arrow::internal::ChunkResolver left_resolver_, right_resolver_; + const ::arrow::ChunkResolver left_resolver_, right_resolver_; const std::vector sort_keys_; uint64_t* indices_begin_; uint64_t* indices_end_; diff --git a/cpp/src/arrow/compute/kernels/vector_sort_internal.h b/cpp/src/arrow/compute/kernels/vector_sort_internal.h index 564afb8c087d2..bee7f838a05da 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort_internal.h +++ b/cpp/src/arrow/compute/kernels/vector_sort_internal.h @@ -749,9 +749,9 @@ struct ResolvedTableSortKey { order(order), null_count(null_count) {} - using LocationType = ::arrow::internal::ChunkLocation; + using LocationType = ::arrow::ChunkLocation; - ResolvedChunk GetChunk(::arrow::internal::ChunkLocation loc) const { + ResolvedChunk GetChunk(::arrow::ChunkLocation loc) const { return {chunks[loc.chunk_index], loc.index_in_chunk}; } diff --git a/cpp/src/arrow/stl_iterator.h b/cpp/src/arrow/stl_iterator.h index 5f2acfb071b29..577066cba0fcd 100644 --- a/cpp/src/arrow/stl_iterator.h +++ b/cpp/src/arrow/stl_iterator.h @@ -237,7 +237,7 @@ class ChunkedArrayIterator { } private: - arrow::internal::ChunkLocation GetChunkLocation(int64_t index) const { + arrow::ChunkLocation GetChunkLocation(int64_t index) const { assert(chunked_array_); return chunked_array_->chunk_resolver_.Resolve(index); } diff --git a/docs/source/cpp/api/array.rst b/docs/source/cpp/api/array.rst index a7e5d0cf07e0a..133a432884d3b 100644 --- a/docs/source/cpp/api/array.rst +++ b/docs/source/cpp/api/array.rst @@ -19,6 +19,9 @@ Arrays ====== +Base classes +============ + .. doxygenclass:: arrow::ArrayData :project: arrow_cpp :members: @@ -85,6 +88,17 @@ Chunked Arrays :project: arrow_cpp :members: +.. doxygenstruct:: arrow::ChunkLocation + :project: arrow_cpp + :members: + +.. doxygenstruct:: arrow::TypedChunkLocation + :project: arrow_cpp + :members: + +.. doxygenclass:: arrow::ChunkResolver + :project: arrow_cpp + :members: Utilities ========= diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp index bdaac0a9ce5d2..e90e06599be91 100644 --- a/r/src/altrep.cpp +++ b/r/src/altrep.cpp @@ -87,13 +87,13 @@ class ArrowAltrepData { const std::shared_ptr& chunked_array() { return chunked_array_; } - arrow::internal::ChunkLocation locate(int64_t index) { + arrow::ChunkLocation locate(int64_t index) { return resolver_.Resolve(index); } private: std::shared_ptr chunked_array_; - arrow::internal::ChunkResolver resolver_; + arrow::ChunkResolver resolver_; }; // the ChunkedArray that is being wrapped by the altrep object