Skip to content

Commit

Permalink
Added an Implementation of MurmurHash2 (#18)
Browse files Browse the repository at this point in the history
This is an implementation of MurmurHash2,
which is identical to `std::hash`(at least until GCC 8.1).
This one is more performant on short strings, because:
  1. It could be inlined.
  2. It utilizes the loop unrolling trick.
  3. Besides, it works with the plain old raw bytes array!
  • Loading branch information
dutor authored Sep 22, 2018
1 parent b9b7995 commit d96fc92
Show file tree
Hide file tree
Showing 4 changed files with 398 additions and 0 deletions.
128 changes: 128 additions & 0 deletions common/base/MurmurHash2.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved
*
* This source code is licensed under Apache 2.0 License
* (found in the LICENSE.Apache file in the root directory)
*/
#ifndef COMMON_BASE_MURMURHASH2_H_
#define COMMON_BASE_MURMURHASH2_H_

#include <string>
#include <cstring>
#include <memory>
#include <thread>
#include <type_traits>

namespace vesoft {

/**
* This is an implementation of MurmurHash2,
* which is identical to `std::hash'(at least until GCC 8.1).
* This one is more performant on short strings, because:
* 1. It could be inlined.
* 2. It utilizes the loop unrolling trick.
* Besides, it works with the plain old raw bytes array!
*/
class MurmurHash2 {
template <typename T>
static constexpr bool is_char_v = std::is_same<T, char>::value ||
std::is_same<T, signed char>::value ||
std::is_same<T, unsigned char>::value;

public:
// std::string
size_t operator()(const std::string &str) const noexcept {
return this->operator()(str.data(), str.length());
}

// null-terminated C-style string
template <typename T, typename = std::enable_if_t<is_char_v<T>>>
size_t operator()(const T *&str) const noexcept {
return this->operator()(str, ::strlen(str));
}

// raw bytes array
template <typename T, typename = std::enable_if_t<is_char_v<T>>>
size_t operator()(const T *str, size_t size) const noexcept {
uint64_t seed = 0xc70f6907UL;
const uint64_t m = 0xc6a4a7935bd1e995;
const uint32_t r = 47;
uint64_t h = seed ^ (size * m);
const uint64_t *data = (const uint64_t *)str;
const uint64_t *end = data + (size / 8);
while (data != end) {
uint64_t k = *data++;

k *= m;
k ^= k >> r;
k *= m;

h ^= k;
h *= m;
}

const unsigned char *data2 = (const unsigned char*)data;
switch (size & 7) {
case 7:
h ^= uint64_t(data2[6]) << 48;
case 6:
h ^= uint64_t(data2[5]) << 40;
case 5:
h ^= uint64_t(data2[4]) << 32;
case 4:
h ^= uint64_t(data2[3]) << 24;
case 3:
h ^= uint64_t(data2[2]) << 16;
case 2:
h ^= uint64_t(data2[1]) << 8;
case 1:
h ^= uint64_t(data2[0]);
h *= m;
}
h ^= h >> r;
h *= m;
h ^= h >> r;

return h;
}

// std::thread::id
size_t operator()(std::thread::id id) const noexcept {
return std::hash<std::thread::id>()(id);
}

// literal string(without decay)
template <size_t N, typename T, typename = std::enable_if_t<is_char_v<T>>>
size_t operator()(const T (&str)[N]) const noexcept {
return this->operator()(str, N - 1);
}

// integer
template <typename T>
std::enable_if_t<std::is_integral<T>::value, size_t>
operator()(T key) const noexcept {
return static_cast<size_t>(key);
}

// pointers
template <typename T>
std::enable_if_t<!is_char_v<T>, size_t>
operator() (const T *ptr) const noexcept {
return reinterpret_cast<size_t>(ptr);
}

// std::shared_ptr
template <typename T>
size_t operator()(const std::shared_ptr<T> &ptr) const noexcept {
return reinterpret_cast<size_t>(ptr.get());
}

// std::unique_ptr
template <typename T, typename Deleter>
size_t operator()(const std::unique_ptr<T, Deleter> &ptr) const noexcept {
return reinterpret_cast<size_t>(ptr.get());
}
};

} // namespace vesoft

#endif // COMMON_BASE_MURMURHASH2_H_
26 changes: 26 additions & 0 deletions common/base/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,29 @@ target_link_libraries(
-pthread
)

add_executable(murmurhash2_test MurmurHash2Test.cpp)
target_link_libraries(
murmurhash2_test
gtest
gtest_main
folly
glog
gflags
double-conversion
dl
-pthread
)
add_test(NAME murmurhash2_test COMMAND murmurhash2_test)

add_executable(hash_bm HashBenchmark.cpp)
target_link_libraries(
hash_bm
follybenchmark
folly
glog
gflags
boost_regex
double-conversion
dl
-pthread
)
149 changes: 149 additions & 0 deletions common/base/test/HashBenchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved
*
* This source code is licensed under Apache 2.0 License
* (found in the LICENSE.Apache file in the root directory)
*/
#include <ctype.h>
#include <folly/Benchmark.h>
#include <folly/Random.h>
#include <cstdlib>
#include <functional>
#include <algorithm>
#include <thread>
#include "common/base/MurmurHash2.h"

using vesoft::MurmurHash2;

std::string makeString(size_t size) {
std::string str;
str.resize(size);
for (auto &c : str) {
c = folly::Random::rand32() % (0x7E/*~*/ - 0x21/*!*/) + 0x21;
}
return str;
}

size_t StdHashTest(size_t iters, size_t size) {
constexpr size_t ops = 1000000UL;

std::hash<std::string> hash;
auto str = makeString(size);
auto i = 0UL;
while (i++ < ops * iters) {
auto hv = hash(str);
folly::doNotOptimizeAway(hv);
}

return iters * ops;
}

size_t MurmurHash2Test(size_t iters, size_t size) {
constexpr size_t ops = 1000000UL;

MurmurHash2 hash;
auto str = makeString(size);
auto i = 0UL;
while (i++ < ops * iters) {
auto hv = hash(str);
folly::doNotOptimizeAway(hv);
}

return iters * ops;
}

BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 1Byte, 1UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 1Byte, 1UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 2Byte, 2UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 2Byte, 2UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 3Byte, 3UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 3Byte, 3UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 4Byte, 4UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 4Byte, 4UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 5Byte, 5UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 5Byte, 5UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 6Byte, 6UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 6Byte, 6UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 7Byte, 7UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 7Byte, 7UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 8Byte, 8UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 8Byte, 8UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 9Byte, 9UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 9Byte, 9UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 10Byte, 10UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 10Byte, 10UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 64Byte, 64UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 64Byte, 64UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 256Byte, 256UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 256Byte, 256UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 1024Byte, 1024UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 1024Byte, 1024UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 4096Byte, 4096UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 4096Byte, 4096UL);

int
main(int argc, char **argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
folly::runBenchmarks();
return 0;
}

/* Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
============================================================================
common/base/test/HashBenchmark.cpp relative time/iter iters/s
============================================================================
StdHashTest(1Byte) 4.53ns 220.75M
MurmurHash2Test(1Byte) 293.53% 1.54ns 647.95M
----------------------------------------------------------------------------
StdHashTest(2Byte) 5.05ns 198.20M
MurmurHash2Test(2Byte) 271.92% 1.86ns 538.93M
----------------------------------------------------------------------------
StdHashTest(3Byte) 5.86ns 170.75M
MurmurHash2Test(3Byte) 278.04% 2.11ns 474.76M
----------------------------------------------------------------------------
StdHashTest(4Byte) 6.65ns 150.37M
MurmurHash2Test(4Byte) 291.51% 2.28ns 438.34M
----------------------------------------------------------------------------
StdHashTest(5Byte) 7.21ns 138.77M
MurmurHash2Test(5Byte) 270.39% 2.67ns 375.23M
----------------------------------------------------------------------------
StdHashTest(6Byte) 7.91ns 126.38M
MurmurHash2Test(6Byte) 264.52% 2.99ns 334.30M
----------------------------------------------------------------------------
StdHashTest(7Byte) 8.75ns 114.26M
MurmurHash2Test(7Byte) 258.92% 3.38ns 295.85M
----------------------------------------------------------------------------
StdHashTest(8Byte) 4.61ns 216.97M
MurmurHash2Test(8Byte) 173.35% 2.66ns 376.11M
----------------------------------------------------------------------------
StdHashTest(9Byte) 5.56ns 179.73M
MurmurHash2Test(9Byte) 187.64% 2.97ns 337.25M
----------------------------------------------------------------------------
StdHashTest(10Byte) 6.13ns 163.24M
MurmurHash2Test(10Byte) 196.97% 3.11ns 321.53M
----------------------------------------------------------------------------
StdHashTest(64Byte) 12.76ns 78.40M
MurmurHash2Test(64Byte) 117.30% 10.87ns 91.96M
----------------------------------------------------------------------------
StdHashTest(256Byte) 48.69ns 20.54M
MurmurHash2Test(256Byte) 108.87% 44.72ns 22.36M
----------------------------------------------------------------------------
StdHashTest(1024Byte) 204.19ns 4.90M
MurmurHash2Test(1024Byte) 98.92% 206.43ns 4.84M
----------------------------------------------------------------------------
StdHashTest(4096Byte) 825.29ns 1.21M
MurmurHash2Test(4096Byte) 98.29% 839.61ns 1.19M
============================================================================
*/
Loading

0 comments on commit d96fc92

Please sign in to comment.