Skip to content

Commit

Permalink
apacheGH-34535: [C++] Move ChunkResolver to the public API
Browse files Browse the repository at this point in the history
Co-authored-by: SChakravorti21
  • Loading branch information
anjakefala committed Oct 9, 2024
1 parent c50c4fa commit b71df1a
Show file tree
Hide file tree
Showing 11 changed files with 49 additions and 27 deletions.
4 changes: 2 additions & 2 deletions cpp/src/arrow/chunk_resolver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#include "arrow/array.h"
#include "arrow/record_batch.h"

namespace arrow::internal {
namespace arrow {

namespace {
template <typename T>
Expand Down Expand Up @@ -167,4 +167,4 @@ void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint64_t* logical_i
logical_index_vec, out_chunk_location_vec, chunk_hint);
}

} // namespace arrow::internal
} // namespace arrow
30 changes: 19 additions & 11 deletions cpp/src/arrow/chunk_resolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@
#include "arrow/type_fwd.h"
#include "arrow/util/macros.h"

namespace arrow::internal {
namespace arrow {

struct ChunkResolver;

template <typename IndexType>
struct TypedChunkLocation {
struct ARROW_EXPORT TypedChunkLocation {
/// \brief Index of the chunk in the array of chunks
///
/// The value is always in the range `[0, chunks.size()]`. `chunks.size()` is used
Expand All @@ -41,7 +41,7 @@ struct TypedChunkLocation {

/// \brief Index of the value in the chunk
///
/// The value is UNDEFINED if chunk_index >= chunks.size()
/// The value is UNDEFINED if `chunk_index >= chunks.size()`
IndexType index_in_chunk = 0;

TypedChunkLocation() = default;
Expand All @@ -61,7 +61,7 @@ using ChunkLocation = TypedChunkLocation<int64_t>;

/// \brief An utility that incrementally resolves logical indices into
/// physical indices in a chunked array.
struct ARROW_EXPORT ChunkResolver {
class ARROW_EXPORT ChunkResolver {
private:
/// \brief Array containing `chunks.size() + 1` offsets.
///
Expand All @@ -75,8 +75,16 @@ struct ARROW_EXPORT ChunkResolver {
mutable std::atomic<int32_t> cached_chunk_;

public:
/// \brief Initialize from an `ArrayVector`.
explicit ChunkResolver(const ArrayVector& chunks) noexcept;

/// \brief Initialize from a vector of raw `Array` pointers.
explicit ChunkResolver(const std::vector<const Array*>& chunks) noexcept;

/// \brief Initialize from a `RecordBatchVector`.
///
/// Because all `Array`s in a `RecordBatch` must have the same length, this
/// can be useful for iterating over multiple columns simultaneously.
explicit ChunkResolver(const RecordBatchVector& batches) noexcept;

/// \brief Construct a ChunkResolver from a vector of chunks.size() + 1 offsets.
Expand Down Expand Up @@ -115,11 +123,11 @@ struct ARROW_EXPORT ChunkResolver {
/// The returned ChunkLocation contains the chunk index and the within-chunk index
/// equivalent to the logical index.
///
/// \pre index >= 0
/// \post location.chunk_index in [0, chunks.size()]
/// \pre `index >= 0`
/// \post `location.chunk_index` in `[0, chunks.size()]`
/// \param index The logical index to resolve
/// \return ChunkLocation with a valid chunk_index if index is within
/// bounds, or with chunk_index == chunks.size() if logical index is
/// bounds, or with `chunk_index == chunks.size()` if logical index is
/// `>= chunked_array.length()`.
inline ChunkLocation Resolve(int64_t index) const {
const auto cached_chunk = cached_chunk_.load(std::memory_order_relaxed);
Expand All @@ -133,13 +141,13 @@ struct ARROW_EXPORT ChunkResolver {
/// The returned ChunkLocation contains the chunk index and the within-chunk index
/// equivalent to the logical index.
///
/// \pre index >= 0
/// \post location.chunk_index in [0, chunks.size()]
/// \pre `index >= 0`
/// \post `location.chunk_index` in `[0, chunks.size()]`
/// \param index The logical index to resolve
/// \param hint ChunkLocation{} or the last ChunkLocation returned by
/// this ChunkResolver.
/// \return ChunkLocation with a valid chunk_index if index is within
/// bounds, or with chunk_index == chunks.size() if logical index is
/// bounds, or with `chunk_index == chunks.size()` if logical index is
/// `>= chunked_array.length()`.
inline ChunkLocation ResolveWithHint(int64_t index, ChunkLocation hint) const {
assert(hint.chunk_index < static_cast<uint32_t>(offsets_.size()));
Expand Down Expand Up @@ -281,4 +289,4 @@ struct ARROW_EXPORT ChunkResolver {
}
};

} // namespace arrow::internal
} // namespace arrow
4 changes: 2 additions & 2 deletions cpp/src/arrow/chunk_resolver_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@

namespace arrow {

using internal::ChunkResolver;
using internal::TypedChunkLocation;
using ChunkResolver;
using TypedChunkLocation;

namespace {

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/chunked_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ class ARROW_EXPORT ChunkedArray {
private:
template <typename T, typename V>
friend class ::arrow::stl::ChunkedArrayIterator;
internal::ChunkResolver chunk_resolver_;
ChunkResolver chunk_resolver_;
ARROW_DISALLOW_COPY_AND_ASSIGN(ChunkedArray);
};

Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/chunked_array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@

namespace arrow {

using internal::ChunkLocation;
using internal::ChunkResolver;
using internal::TypedChunkLocation;
using arrow::ChunkLocation;
using arrow::ChunkResolver;
using arrow::TypedChunkLocation;

class TestChunkedArray : public ::testing::Test {
protected:
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/kernels/chunked_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ struct ResolvedChunk {

class ChunkedArrayResolver {
private:
::arrow::internal::ChunkResolver resolver_;
ChunkResolver resolver_;
std::vector<const Array*> chunks_;

public:
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/compute/kernels/vector_sort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@

namespace arrow {

using arrow::ChunkLocation;
using internal::checked_cast;
using internal::ChunkLocation;

namespace compute {
namespace internal {
Expand Down Expand Up @@ -852,7 +852,7 @@ class TableSorter {
const RecordBatchVector batches_;
const SortOptions& options_;
const NullPlacement null_placement_;
const ::arrow::internal::ChunkResolver left_resolver_, right_resolver_;
const ::arrow::ChunkResolver left_resolver_, right_resolver_;
const std::vector<ResolvedSortKey> sort_keys_;
uint64_t* indices_begin_;
uint64_t* indices_end_;
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/compute/kernels/vector_sort_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -749,9 +749,9 @@ struct ResolvedTableSortKey {
order(order),
null_count(null_count) {}

using LocationType = ::arrow::internal::ChunkLocation;
using LocationType = ::arrow::ChunkLocation;

ResolvedChunk GetChunk(::arrow::internal::ChunkLocation loc) const {
ResolvedChunk GetChunk(::arrow::ChunkLocation loc) const {
return {chunks[loc.chunk_index], loc.index_in_chunk};
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/stl_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ class ChunkedArrayIterator {
}

private:
arrow::internal::ChunkLocation GetChunkLocation(int64_t index) const {
arrow::ChunkLocation GetChunkLocation(int64_t index) const {
assert(chunked_array_);
return chunked_array_->chunk_resolver_.Resolve(index);
}
Expand Down
14 changes: 14 additions & 0 deletions docs/source/cpp/api/array.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
Arrays
======

Base classes
============

.. doxygenclass:: arrow::ArrayData
:project: arrow_cpp
:members:
Expand Down Expand Up @@ -85,6 +88,17 @@ Chunked Arrays
:project: arrow_cpp
:members:

.. doxygenstruct:: arrow::ChunkLocation
:project: arrow_cpp
:members:

.. doxygenstruct:: arrow::TypedChunkLocation
:project: arrow_cpp
:members:

.. doxygenclass:: arrow::ChunkResolver
:project: arrow_cpp
:members:

Utilities
=========
Expand Down
4 changes: 2 additions & 2 deletions r/src/altrep.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,13 @@ class ArrowAltrepData {

const std::shared_ptr<ChunkedArray>& chunked_array() { return chunked_array_; }

arrow::internal::ChunkLocation locate(int64_t index) {
arrow::ChunkLocation locate(int64_t index) {
return resolver_.Resolve(index);
}

private:
std::shared_ptr<ChunkedArray> chunked_array_;
arrow::internal::ChunkResolver resolver_;
arrow::ChunkResolver resolver_;
};

// the ChunkedArray that is being wrapped by the altrep object
Expand Down

0 comments on commit b71df1a

Please sign in to comment.