Skip to content

Commit

Permalink
apacheGH-38366: [Java] Fix Murmur hash on buffers less than 4 bytes (a…
Browse files Browse the repository at this point in the history
…pache#38368)

### Rationale for this change

Using the `MurmurHash` implementation would cause collisions on small input values.

### What changes are included in this PR?

Fix the iteration for small and tail values that are not 4 bytes in length.

### Are these changes tested?

Yes

### Are there any user-facing changes?
Unlikely unless someone was using the `MurmurHash` functions to persist a hash value.

* Closes: apache#38366

Authored-by: Chris Larsen <[email protected]>
Signed-off-by: David Li <[email protected]>
  • Loading branch information
manolama authored Oct 20, 2023
1 parent a376e3c commit 4bbd48d
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ public static int hashCode(long address, long length, int seed) {
if (index < length) {
// process remaining data as a integer in little endian
int intValue = 0;
for (int i = index - 1; i >= index; i--) {
for (long i = length - 1; i >= index; i--) {
intValue <<= 8;
intValue |= (MemoryUtil.UNSAFE.getByte(address + i) & 0x000000ff);
index += 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
package org.apache.arrow.memory.util.hash;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;

import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collection;

Expand Down Expand Up @@ -110,6 +112,26 @@ public void testHasherNegative() {
}
}

@Test
public void testHasherLessThanInt() {
try (ArrowBuf buf1 = allocator.buffer(4);
ArrowBuf buf2 = allocator.buffer(4)) {
buf1.writeBytes("foo1".getBytes(StandardCharsets.UTF_8));
buf2.writeBytes("bar2".getBytes(StandardCharsets.UTF_8));

for (int i = 1; i <= 4; i ++) {
verifyHashCodeNotEqual(buf1, 0, i, buf2, 0, i);
}
}
}

private void verifyHashCodeNotEqual(ArrowBuf buf1, int offset1, int length1,
ArrowBuf buf2, int offset2, int length2) {
int hashCode1 = hasher.hashCode(buf1, 0, length1);
int hashCode2 = hasher.hashCode(buf2, 0, length2);
assertNotEquals(hashCode1, hashCode2);
}

@Parameterized.Parameters(name = "hasher = {0}")
public static Collection<Object[]> getHasher() {
return Arrays.asList(
Expand Down

0 comments on commit 4bbd48d

Please sign in to comment.