From a6ace685339a8f7fb786af66ef9328b7b3f7eb68 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Tue, 7 Feb 2023 08:52:28 -0800 Subject: [PATCH] GH-33971: [C++] Fix AdaptiveIntBuilder to always populate data buffer (#33994) If the AdaptiveIntBuilder was empty it would yield a null for the values buffer. The byte_size.h utilities were not expecting this which led to the error reported in the issue. Example: ``` >>> pa.array([], type=pa.dictionary(pa.int32(), pa.string())).buffers() [None, None] ``` * Closes: #33971 Authored-by: Weston Pace Signed-off-by: Weston Pace --- cpp/src/arrow/array/array_dict_test.cc | 12 ++++++++++++ cpp/src/arrow/array/array_test.cc | 14 +++++++++++++- cpp/src/arrow/array/builder_adaptive.cc | 8 +++++++- 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/array/array_dict_test.cc b/cpp/src/arrow/array/array_dict_test.cc index bfa732f165f85..54cf3a73d6f3a 100644 --- a/cpp/src/arrow/array/array_dict_test.cc +++ b/cpp/src/arrow/array/array_dict_test.cc @@ -157,6 +157,18 @@ TYPED_TEST(TestDictionaryBuilder, MakeBuilder) { AssertArraysEqual(expected, *result); } +TYPED_TEST(TestDictionaryBuilder, Empty) { + DictionaryBuilder dictionary_builder; + ASSERT_OK_AND_ASSIGN(std::shared_ptr empty_arr, dictionary_builder.Finish()); + std::shared_ptr empty_dict_arr = + checked_pointer_cast(empty_arr); + // Ensure that the indices value buffer is initialized + ASSERT_NE(nullptr, empty_dict_arr->data()->buffers[1]); + // Ensure that the dictionary's value buffer is initialized + ASSERT_NE(nullptr, empty_dict_arr->dictionary()); + ASSERT_NE(nullptr, empty_dict_arr->dictionary()->data()->buffers[1]); +} + TYPED_TEST(TestDictionaryBuilder, ArrayConversion) { auto type = std::make_shared(); diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc index 3c4b65c551c12..71e125bd86703 100644 --- a/cpp/src/arrow/array/array_test.cc +++ b/cpp/src/arrow/array/array_test.cc @@ -2320,7 +2320,17 @@ class TestAdaptiveIntBuilder : public TestBuilder { builder_ = std::make_shared(pool_); } - void Done() { FinishAndCheckPadding(builder_.get(), &result_); } + void EnsureValuesBufferNotNull(const ArrayData& data) { + // The values buffer should be initialized + ASSERT_EQ(2, data.buffers.size()); + ASSERT_NE(nullptr, data.buffers[1]); + ASSERT_NE(nullptr, data.buffers[1]->data()); + } + + void Done() { + FinishAndCheckPadding(builder_.get(), &result_); + EnsureValuesBufferNotNull(*result_->data()); + } template void TestAppendValues() { @@ -2571,6 +2581,8 @@ TEST_F(TestAdaptiveIntBuilder, TestAppendEmptyValue) { AssertArraysEqual(*result_, *ArrayFromJSON(int8(), "[null, null, 0, 42, 0, 0]")); } +TEST_F(TestAdaptiveIntBuilder, Empty) { Done(); } + TEST(TestAdaptiveIntBuilderWithStartIntSize, TestReset) { auto builder = std::make_shared( static_cast(sizeof(int16_t)), default_memory_pool()); diff --git a/cpp/src/arrow/array/builder_adaptive.cc b/cpp/src/arrow/array/builder_adaptive.cc index f6255a564fccb..3012891cf7586 100644 --- a/cpp/src/arrow/array/builder_adaptive.cc +++ b/cpp/src/arrow/array/builder_adaptive.cc @@ -139,7 +139,13 @@ Status AdaptiveIntBuilder::FinishInternal(std::shared_ptr* out) { RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap)); RETURN_NOT_OK(TrimBuffer(length_ * int_size_, data_.get())); - *out = ArrayData::Make(type(), length_, {null_bitmap, data_}, null_count_); + std::shared_ptr values_buffer = data_; + if (!values_buffer) { + ARROW_ASSIGN_OR_RAISE(values_buffer, AllocateBuffer(0, pool_)); + } + + *out = ArrayData::Make(type(), length_, {null_bitmap, std::move(values_buffer)}, + null_count_); data_ = nullptr; capacity_ = length_ = null_count_ = 0;