From 3154561890535eb6203e3e7fec79ce68c0ee5555 Mon Sep 17 00:00:00 2001 From: Chris Larsen Date: Thu, 19 Oct 2023 14:48:58 -0700 Subject: [PATCH] GH38366 [Java][Memory] Fix Murmur hash failing to hash on buffers less than 4 bytes --- .../arrow/memory/util/hash/MurmurHasher.java | 2 +- .../memory/util/hash/TestArrowBufHasher.java | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java index ea565dfca67e4..75fc3f0c45831 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java @@ -96,7 +96,7 @@ public static int hashCode(long address, long length, int seed) { if (index < length) { // process remaining data as a integer in little endian int intValue = 0; - for (int i = index - 1; i >= index; i--) { + for (long i = length - 1; i >= index; i--) { intValue <<= 8; intValue |= (MemoryUtil.UNSAFE.getByte(address + i) & 0x000000ff); index += 1; diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/hash/TestArrowBufHasher.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/hash/TestArrowBufHasher.java index a8707e6ca93d7..3da0602bdfd9c 100644 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/hash/TestArrowBufHasher.java +++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/hash/TestArrowBufHasher.java @@ -18,8 +18,10 @@ package org.apache.arrow.memory.util.hash; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collection; @@ -110,6 +112,26 @@ public void testHasherNegative() { } } + @Test + public void testHasherLessThanInt() { + try (ArrowBuf buf1 = allocator.buffer(4); + ArrowBuf buf2 = allocator.buffer(4)) { + buf1.writeBytes("foo1".getBytes(StandardCharsets.UTF_8)); + buf2.writeBytes("bar2".getBytes(StandardCharsets.UTF_8)); + + for (int i = 1; i <= 4; i ++) { + verifyHashCodeNotEqual(buf1, 0, i, buf2, 0, i); + } + } + } + + private void verifyHashCodeNotEqual(ArrowBuf buf1, int offset1, int length1, + ArrowBuf buf2, int offset2, int length2) { + int hashCode1 = hasher.hashCode(buf1, 0, length1); + int hashCode2 = hasher.hashCode(buf2, 0, length2); + assertNotEquals(hashCode1, hashCode2); + } + @Parameterized.Parameters(name = "hasher = {0}") public static Collection getHasher() { return Arrays.asList(