diff --git a/cpp/src/arrow/scalar.cc b/cpp/src/arrow/scalar.cc index 1f467ad93b123..1e40266b3caef 100644 --- a/cpp/src/arrow/scalar.cc +++ b/cpp/src/arrow/scalar.cc @@ -153,9 +153,10 @@ struct ScalarHashImpl { Status ArrayHash(const ArrayData& a) { RETURN_NOT_OK(StdHash(a.length) & StdHash(a.GetNullCount())); - if (a.buffers[0] != nullptr) { + if (a.GetNullCount() != 0 && a.buffers[0] != nullptr) { // We can't visit values without unboxing the whole array, so only hash - // the null bitmap for now. + // the null bitmap for now. Only hash the null bitmap if the null count + // is not 0 to ensure hash consistency. RETURN_NOT_OK(BufferHash(*a.buffers[0])); } for (const auto& child : a.child_data) { diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc index 5c0c0589934ec..5d74eafcfcc0f 100644 --- a/cpp/src/arrow/scalar_test.cc +++ b/cpp/src/arrow/scalar_test.cc @@ -1064,7 +1064,6 @@ class TestListScalar : public ::testing::Test { using ScalarType = typename TypeTraits::ScalarType; void SetUp() { - // type_ = std::make_shared(int16()); type_ = MakeListType(int16(), 3); value_ = ArrayFromJSON(int16(), "[1, 2, null]"); } @@ -1106,6 +1105,24 @@ class TestListScalar : public ::testing::Test { ASSERT_RAISES(Invalid, scalar.ValidateFull()); } + void TestHashing() { + // GH-35521: the hash value of a non-null list scalar should not + // depend on the presence or absence of a null bitmap in the underlying + // list values. + ScalarType empty_bitmap_scalar(ArrayFromJSON(int16(), "[1, 2, 3]")); + ASSERT_OK(empty_bitmap_scalar.ValidateFull()); + // Underlying list array doesn't have a null bitmap + ASSERT_EQ(empty_bitmap_scalar.value->data()->buffers[0], nullptr); + + auto list_array = ArrayFromJSON(type_, "[[1, 2, 3], [4, 5, null]]"); + ASSERT_OK_AND_ASSIGN(auto set_bitmap_scalar_uncasted, list_array->GetScalar(0)); + auto set_bitmap_scalar = checked_pointer_cast(set_bitmap_scalar_uncasted); + // Underlying list array has a null bitmap + ASSERT_NE(set_bitmap_scalar->value->data()->buffers[0], nullptr); + // ... yet it's hashing equal to the other scalar + ASSERT_EQ(empty_bitmap_scalar.hash(), set_bitmap_scalar->hash()); + } + protected: std::shared_ptr type_; std::shared_ptr value_; @@ -1119,6 +1136,8 @@ TYPED_TEST(TestListScalar, Basics) { this->TestBasics(); } TYPED_TEST(TestListScalar, ValidateErrors) { this->TestValidateErrors(); } +TYPED_TEST(TestListScalar, TestHashing) { this->TestHashing(); } + TEST(TestFixedSizeListScalar, ValidateErrors) { const auto ty = fixed_size_list(int16(), 3); FixedSizeListScalar scalar(ArrayFromJSON(int16(), "[1, 2, 5]"), ty);