Skip to content

Commit

Permalink
apacheGH-36111: [C++] Refactor dict_internal.h to use Result (apache#…
Browse files Browse the repository at this point in the history
…37754)

### Rationale for this change

This change addresses apache#36111 , updating the method GetDictionaryArrayData in dict_internal.h to return a Result instead of a Status type. 

### What changes are included in this PR?

This is a narrow interpretation of the above issue, only changing the return type with minimal updates to the call sites or their return types. 

### Are these changes tested?

Yes. The C++ test suite was run on the `ninja-debug` build target using the command `ctest -j16 --output-on-failure`. All tests not requiring parquet data passed. (I was unsure how to setup those tests based on the Arrow development guidelines.) 

### Are there any user-facing changes?

No. The only changes should be library-internal. 

* Closes: apache#36111 

Authored-by: Chris Jordan-Squire <[email protected]>
Signed-off-by: Benjamin Kietzman <[email protected]>
  • Loading branch information
chrisjordansquire authored Sep 20, 2023
1 parent cda1e8f commit 868b9bd
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 38 deletions.
12 changes: 6 additions & 6 deletions cpp/src/arrow/array/array_dict.cc
Original file line number Diff line number Diff line change
Expand Up @@ -282,9 +282,9 @@ class DictionaryUnifierImpl : public DictionaryUnifier {
*out_type = arrow::dictionary(index_type, value_type_);

// Build unified dictionary array
std::shared_ptr<ArrayData> data;
RETURN_NOT_OK(DictTraits::GetDictionaryArrayData(pool_, value_type_, memo_table_,
0 /* start_offset */, &data));
ARROW_ASSIGN_OR_RAISE(
auto data, DictTraits::GetDictionaryArrayData(pool_, value_type_, memo_table_,
0 /* start_offset */));
*out_dict = MakeArray(data);
return Status::OK();
}
Expand All @@ -299,9 +299,9 @@ class DictionaryUnifierImpl : public DictionaryUnifier {
}

// Build unified dictionary array
std::shared_ptr<ArrayData> data;
RETURN_NOT_OK(DictTraits::GetDictionaryArrayData(pool_, value_type_, memo_table_,
0 /* start_offset */, &data));
ARROW_ASSIGN_OR_RAISE(
auto data, DictTraits::GetDictionaryArrayData(pool_, value_type_, memo_table_,
0 /* start_offset */));
*out_dict = MakeArray(data);
return Status::OK();
}
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/arrow/array/builder_dict.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,9 @@ class DictionaryMemoTable::DictionaryMemoTableImpl {
enable_if_memoize<T, Status> Visit(const T&) {
using ConcreteMemoTable = typename DictionaryTraits<T>::MemoTableType;
auto memo_table = checked_cast<ConcreteMemoTable*>(memo_table_);
return DictionaryTraits<T>::GetDictionaryArrayData(pool_, value_type_, *memo_table,
start_offset_, out_);
ARROW_ASSIGN_OR_RAISE(*out_, DictionaryTraits<T>::GetDictionaryArrayData(
pool_, value_type_, *memo_table, start_offset_));
return Status::OK();
}
};

Expand Down
47 changes: 19 additions & 28 deletions cpp/src/arrow/array/dict_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

#include "arrow/array.h"
#include "arrow/buffer.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
Expand Down Expand Up @@ -63,11 +64,9 @@ struct DictionaryTraits<BooleanType> {
using T = BooleanType;
using MemoTableType = typename HashTraits<T>::MemoTableType;

static Status GetDictionaryArrayData(MemoryPool* pool,
const std::shared_ptr<DataType>& type,
const MemoTableType& memo_table,
int64_t start_offset,
std::shared_ptr<ArrayData>* out) {
static Result<std::shared_ptr<ArrayData>> GetDictionaryArrayData(
MemoryPool* pool, const std::shared_ptr<DataType>& type,
const MemoTableType& memo_table, int64_t start_offset) {
if (start_offset < 0) {
return Status::Invalid("invalid start_offset ", start_offset);
}
Expand All @@ -82,7 +81,9 @@ struct DictionaryTraits<BooleanType> {
: builder.Append(bool_values[i]));
}

return builder.FinishInternal(out);
std::shared_ptr<ArrayData> out;
RETURN_NOT_OK(builder.FinishInternal(&out));
return out;
}
}; // namespace internal

Expand All @@ -91,11 +92,9 @@ struct DictionaryTraits<T, enable_if_has_c_type<T>> {
using c_type = typename T::c_type;
using MemoTableType = typename HashTraits<T>::MemoTableType;

static Status GetDictionaryArrayData(MemoryPool* pool,
const std::shared_ptr<DataType>& type,
const MemoTableType& memo_table,
int64_t start_offset,
std::shared_ptr<ArrayData>* out) {
static Result<std::shared_ptr<ArrayData>> GetDictionaryArrayData(
MemoryPool* pool, const std::shared_ptr<DataType>& type,
const MemoTableType& memo_table, int64_t start_offset) {
auto dict_length = static_cast<int64_t>(memo_table.size()) - start_offset;
// This makes a copy, but we assume a dictionary array is usually small
// compared to the size of the dictionary-using array.
Expand All @@ -112,20 +111,17 @@ struct DictionaryTraits<T, enable_if_has_c_type<T>> {
RETURN_NOT_OK(
ComputeNullBitmap(pool, memo_table, start_offset, &null_count, &null_bitmap));

*out = ArrayData::Make(type, dict_length, {null_bitmap, dict_buffer}, null_count);
return Status::OK();
return ArrayData::Make(type, dict_length, {null_bitmap, dict_buffer}, null_count);
}
};

template <typename T>
struct DictionaryTraits<T, enable_if_base_binary<T>> {
using MemoTableType = typename HashTraits<T>::MemoTableType;

static Status GetDictionaryArrayData(MemoryPool* pool,
const std::shared_ptr<DataType>& type,
const MemoTableType& memo_table,
int64_t start_offset,
std::shared_ptr<ArrayData>* out) {
static Result<std::shared_ptr<ArrayData>> GetDictionaryArrayData(
MemoryPool* pool, const std::shared_ptr<DataType>& type,
const MemoTableType& memo_table, int64_t start_offset) {
using offset_type = typename T::offset_type;

// Create the offsets buffer
Expand All @@ -148,23 +144,19 @@ struct DictionaryTraits<T, enable_if_base_binary<T>> {
RETURN_NOT_OK(
ComputeNullBitmap(pool, memo_table, start_offset, &null_count, &null_bitmap));

*out = ArrayData::Make(type, dict_length,
return ArrayData::Make(type, dict_length,
{null_bitmap, std::move(dict_offsets), std::move(dict_data)},
null_count);

return Status::OK();
}
};

template <typename T>
struct DictionaryTraits<T, enable_if_fixed_size_binary<T>> {
using MemoTableType = typename HashTraits<T>::MemoTableType;

static Status GetDictionaryArrayData(MemoryPool* pool,
const std::shared_ptr<DataType>& type,
const MemoTableType& memo_table,
int64_t start_offset,
std::shared_ptr<ArrayData>* out) {
static Result<std::shared_ptr<ArrayData>> GetDictionaryArrayData(
MemoryPool* pool, const std::shared_ptr<DataType>& type,
const MemoTableType& memo_table, int64_t start_offset) {
const T& concrete_type = internal::checked_cast<const T&>(*type);

// Create the data buffer
Expand All @@ -182,9 +174,8 @@ struct DictionaryTraits<T, enable_if_fixed_size_binary<T>> {
RETURN_NOT_OK(
ComputeNullBitmap(pool, memo_table, start_offset, &null_count, &null_bitmap));

*out = ArrayData::Make(type, dict_length, {null_bitmap, std::move(dict_data)},
return ArrayData::Make(type, dict_length, {null_bitmap, std::move(dict_data)},
null_count);
return Status::OK();
}
};

Expand Down
5 changes: 3 additions & 2 deletions cpp/src/arrow/compute/kernels/vector_hash.cc
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,9 @@ class RegularHashKernel : public HashKernel {
Status FlushFinal(ExecResult* out) override { return action_.FlushFinal(out); }

Status GetDictionary(std::shared_ptr<ArrayData>* out) override {
return DictionaryTraits<Type>::GetDictionaryArrayData(pool_, type_, *memo_table_,
0 /* start_offset */, out);
ARROW_ASSIGN_OR_RAISE(*out, DictionaryTraits<Type>::GetDictionaryArrayData(
pool_, type_, *memo_table_, 0 /* start_offset */));
return Status::OK();
}

std::shared_ptr<DataType> value_type() const override { return type_; }
Expand Down

0 comments on commit 868b9bd

Please sign in to comment.