From 30ae3cca5ca68bdcf8df4bcdf848ac3727ad62d4 Mon Sep 17 00:00:00 2001 From: bfaria Date: Tue, 14 Oct 2014 21:24:07 +0200 Subject: [PATCH 1/2] Make MurmurHash of Strings independent of default encoding When hashing Strings MurmurHash simply uses the getBytes method, which will use the default platform encoding. This is not portable. --- .../java/com/clearspring/analytics/hash/MurmurHash.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/clearspring/analytics/hash/MurmurHash.java b/src/main/java/com/clearspring/analytics/hash/MurmurHash.java index 400423393..df372a1f6 100644 --- a/src/main/java/com/clearspring/analytics/hash/MurmurHash.java +++ b/src/main/java/com/clearspring/analytics/hash/MurmurHash.java @@ -1,5 +1,7 @@ package com.clearspring.analytics.hash; +import com.google.common.base.Charsets; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this @@ -45,7 +47,7 @@ public static int hash(Object o) { return hashLong(Float.floatToRawIntBits((Float) o)); } if (o instanceof String) { - return hash(((String) o).getBytes()); + return hash(((String) o).getBytes(Charsets.UTF_8)); } if (o instanceof byte[]) { return hash((byte[]) o); @@ -136,7 +138,7 @@ public static long hash64(Object o) { if (o == null) { return 0l; } else if (o instanceof String) { - final byte[] bytes = ((String) o).getBytes(); + final byte[] bytes = ((String) o).getBytes(Charsets.UTF_8); return hash64(bytes, bytes.length); } else if (o instanceof byte[]) { final byte[] bytes = (byte[]) o; From 1a98453b57e7d974790667b8db06e98b90c08688 Mon Sep 17 00:00:00 2001 From: bfaria Date: Tue, 14 Oct 2014 21:45:27 +0200 Subject: [PATCH 2/2] Removing runtime dep to guava --- .../java/com/clearspring/analytics/hash/MurmurHash.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/clearspring/analytics/hash/MurmurHash.java b/src/main/java/com/clearspring/analytics/hash/MurmurHash.java index df372a1f6..6df397d1b 100644 --- a/src/main/java/com/clearspring/analytics/hash/MurmurHash.java +++ b/src/main/java/com/clearspring/analytics/hash/MurmurHash.java @@ -1,6 +1,7 @@ package com.clearspring.analytics.hash; -import com.google.common.base.Charsets; +import java.nio.charset.Charset; + /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -47,7 +48,7 @@ public static int hash(Object o) { return hashLong(Float.floatToRawIntBits((Float) o)); } if (o instanceof String) { - return hash(((String) o).getBytes(Charsets.UTF_8)); + return hash(((String) o).getBytes(Charset.forName("UTF8"))); } if (o instanceof byte[]) { return hash((byte[]) o); @@ -138,7 +139,7 @@ public static long hash64(Object o) { if (o == null) { return 0l; } else if (o instanceof String) { - final byte[] bytes = ((String) o).getBytes(Charsets.UTF_8); + final byte[] bytes = ((String) o).getBytes(Charset.forName("UTF8")); return hash64(bytes, bytes.length); } else if (o instanceof byte[]) { final byte[] bytes = (byte[]) o;