Skip to content

Commit

Permalink
GH-35521: [C++] Hash null bitmap only if null count is 0 (#35522)
Browse files Browse the repository at this point in the history
### Are these changes tested?

I am new to contributing and am having a hard time creating a good test for this. The steps to reproduce this bug originally are too complicated for a simple test. I included my attempt at making a good test in the PR, but some help would be nice.
-->

### Are there any user-facing changes?
No.

**This PR contains a "Critical Fix".**
* Closes: #35521

Lead-authored-by: micah-white <[email protected]>
Co-authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
  • Loading branch information
micah-white and pitrou authored May 11, 2023
1 parent 401ae19 commit e7a885d
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 3 deletions.
5 changes: 3 additions & 2 deletions cpp/src/arrow/scalar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,10 @@ struct ScalarHashImpl {

Status ArrayHash(const ArrayData& a) {
RETURN_NOT_OK(StdHash(a.length) & StdHash(a.GetNullCount()));
if (a.buffers[0] != nullptr) {
if (a.GetNullCount() != 0 && a.buffers[0] != nullptr) {
// We can't visit values without unboxing the whole array, so only hash
// the null bitmap for now.
// the null bitmap for now. Only hash the null bitmap if the null count
// is not 0 to ensure hash consistency.
RETURN_NOT_OK(BufferHash(*a.buffers[0]));
}
for (const auto& child : a.child_data) {
Expand Down
21 changes: 20 additions & 1 deletion cpp/src/arrow/scalar_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1064,7 +1064,6 @@ class TestListScalar : public ::testing::Test {
using ScalarType = typename TypeTraits<T>::ScalarType;

void SetUp() {
// type_ = std::make_shared<T>(int16());
type_ = MakeListType<T>(int16(), 3);
value_ = ArrayFromJSON(int16(), "[1, 2, null]");
}
Expand Down Expand Up @@ -1106,6 +1105,24 @@ class TestListScalar : public ::testing::Test {
ASSERT_RAISES(Invalid, scalar.ValidateFull());
}

void TestHashing() {
// GH-35521: the hash value of a non-null list scalar should not
// depend on the presence or absence of a null bitmap in the underlying
// list values.
ScalarType empty_bitmap_scalar(ArrayFromJSON(int16(), "[1, 2, 3]"));
ASSERT_OK(empty_bitmap_scalar.ValidateFull());
// Underlying list array doesn't have a null bitmap
ASSERT_EQ(empty_bitmap_scalar.value->data()->buffers[0], nullptr);

auto list_array = ArrayFromJSON(type_, "[[1, 2, 3], [4, 5, null]]");
ASSERT_OK_AND_ASSIGN(auto set_bitmap_scalar_uncasted, list_array->GetScalar(0));
auto set_bitmap_scalar = checked_pointer_cast<ScalarType>(set_bitmap_scalar_uncasted);
// Underlying list array has a null bitmap
ASSERT_NE(set_bitmap_scalar->value->data()->buffers[0], nullptr);
// ... yet it's hashing equal to the other scalar
ASSERT_EQ(empty_bitmap_scalar.hash(), set_bitmap_scalar->hash());
}

protected:
std::shared_ptr<DataType> type_;
std::shared_ptr<Array> value_;
Expand All @@ -1119,6 +1136,8 @@ TYPED_TEST(TestListScalar, Basics) { this->TestBasics(); }

TYPED_TEST(TestListScalar, ValidateErrors) { this->TestValidateErrors(); }

TYPED_TEST(TestListScalar, TestHashing) { this->TestHashing(); }

TEST(TestFixedSizeListScalar, ValidateErrors) {
const auto ty = fixed_size_list(int16(), 3);
FixedSizeListScalar scalar(ArrayFromJSON(int16(), "[1, 2, 5]"), ty);
Expand Down

0 comments on commit e7a885d

Please sign in to comment.