Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added an Implementation of MurmurHash2 #18

Merged
merged 2 commits into from
Sep 22, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 128 additions & 0 deletions common/base/MurmurHash2.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved
*
* This source code is licensed under Apache 2.0 License
* (found in the LICENSE.Apache file in the root directory)
*/
#ifndef COMMON_BASE_MURMURHASH2_H_
#define COMMON_BASE_MURMURHASH2_H_

#include <string>
#include <cstring>
#include <memory>
#include <thread>
#include <type_traits>

namespace vesoft {

/**
* This is an implementation of MurmurHash2,
* which is identical to `std::hash'(at least until GCC 8.1).
* This one is more performant on short strings, because:
* 1. It could be inlined.
* 2. It utilizes the loop unrolling trick.
* Besides, it works with the plain old raw bytes array!
*/
class MurmurHash2 {
template <typename T>
static constexpr bool is_char_v = std::is_same<T, char>::value ||
std::is_same<T, signed char>::value ||
std::is_same<T, unsigned char>::value;

public:
// std::string
size_t operator()(const std::string &str) const noexcept {
return this->operator()(str.data(), str.length());
}

// null-terminated C-style string
template <typename T, typename = std::enable_if_t<is_char_v<T>>>
size_t operator()(const T *&str) const noexcept {
return this->operator()(str, ::strlen(str));
}

// raw bytes array
template <typename T, typename = std::enable_if_t<is_char_v<T>>>
size_t operator()(const T *str, size_t size) const noexcept {
uint64_t seed = 0xc70f6907UL;
const uint64_t m = 0xc6a4a7935bd1e995;
const uint32_t r = 47;
uint64_t h = seed ^ (size * m);
const uint64_t *data = (const uint64_t *)str;
const uint64_t *end = data + (size / 8);
while (data != end) {
uint64_t k = *data++;

k *= m;
k ^= k >> r;
k *= m;

h ^= k;
h *= m;
}

const unsigned char *data2 = (const unsigned char*)data;
switch (size & 7) {
case 7:
h ^= uint64_t(data2[6]) << 48;
case 6:
h ^= uint64_t(data2[5]) << 40;
case 5:
h ^= uint64_t(data2[4]) << 32;
case 4:
h ^= uint64_t(data2[3]) << 24;
case 3:
h ^= uint64_t(data2[2]) << 16;
case 2:
h ^= uint64_t(data2[1]) << 8;
case 1:
h ^= uint64_t(data2[0]);
h *= m;
}
h ^= h >> r;
h *= m;
h ^= h >> r;

return h;
}

// std::thread::id
size_t operator()(std::thread::id id) const noexcept {
return std::hash<std::thread::id>()(id);
}

// literal string(without decay)
template <size_t N, typename T, typename = std::enable_if_t<is_char_v<T>>>
size_t operator()(const T (&str)[N]) const noexcept {
return this->operator()(str, N - 1);
}

// integer
template <typename T>
std::enable_if_t<std::is_integral<T>::value, size_t>
operator()(T key) const noexcept {
return static_cast<size_t>(key);
}

// pointers
template <typename T>
std::enable_if_t<!is_char_v<T>, size_t>
operator() (const T *ptr) const noexcept {
return reinterpret_cast<size_t>(ptr);
}

// std::shared_ptr
template <typename T>
size_t operator()(const std::shared_ptr<T> &ptr) const noexcept {
return reinterpret_cast<size_t>(ptr.get());
}

// std::unique_ptr
template <typename T, typename Deleter>
size_t operator()(const std::unique_ptr<T, Deleter> &ptr) const noexcept {
return reinterpret_cast<size_t>(ptr.get());
}
};

} // namespace vesoft

#endif // COMMON_BASE_MURMURHASH2_H_
26 changes: 26 additions & 0 deletions common/base/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,29 @@ target_link_libraries(
-pthread
)

add_executable(murmurhash2_test MurmurHash2Test.cpp)
target_link_libraries(
murmurhash2_test
gtest
gtest_main
folly
glog
gflags
double-conversion
dl
-pthread
)
add_test(NAME murmurhash2_test COMMAND murmurhash2_test)

add_executable(hash_bm HashBenchmark.cpp)
target_link_libraries(
hash_bm
follybenchmark
folly
glog
gflags
boost_regex
double-conversion
dl
-pthread
)
149 changes: 149 additions & 0 deletions common/base/test/HashBenchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved
*
* This source code is licensed under Apache 2.0 License
* (found in the LICENSE.Apache file in the root directory)
*/
#include <ctype.h>
#include <folly/Benchmark.h>
#include <folly/Random.h>
#include <cstdlib>
#include <functional>
#include <algorithm>
#include <thread>
#include "common/base/MurmurHash2.h"

using vesoft::MurmurHash2;

std::string makeString(size_t size) {
std::string str;
str.resize(size);
for (auto &c : str) {
c = folly::Random::rand32() % (0x7E/*~*/ - 0x21/*!*/) + 0x21;
}
return str;
}

size_t StdHashTest(size_t iters, size_t size) {
constexpr size_t ops = 1000000UL;

std::hash<std::string> hash;
auto str = makeString(size);
auto i = 0UL;
while (i++ < ops * iters) {
auto hv = hash(str);
folly::doNotOptimizeAway(hv);
}

return iters * ops;
}

size_t MurmurHash2Test(size_t iters, size_t size) {
constexpr size_t ops = 1000000UL;

MurmurHash2 hash;
auto str = makeString(size);
auto i = 0UL;
while (i++ < ops * iters) {
auto hv = hash(str);
folly::doNotOptimizeAway(hv);
}

return iters * ops;
}

BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 1Byte, 1UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 1Byte, 1UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 2Byte, 2UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 2Byte, 2UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 3Byte, 3UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 3Byte, 3UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 4Byte, 4UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 4Byte, 4UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 5Byte, 5UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 5Byte, 5UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 6Byte, 6UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 6Byte, 6UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 7Byte, 7UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 7Byte, 7UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 8Byte, 8UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 8Byte, 8UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 9Byte, 9UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 9Byte, 9UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 10Byte, 10UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 10Byte, 10UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 64Byte, 64UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 64Byte, 64UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 256Byte, 256UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 256Byte, 256UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 1024Byte, 1024UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 1024Byte, 1024UL);
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 4096Byte, 4096UL);
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 4096Byte, 4096UL);

int
main(int argc, char **argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
folly::runBenchmarks();
return 0;
}

/* Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
============================================================================
common/base/test/HashBenchmark.cpp relative time/iter iters/s
============================================================================
StdHashTest(1Byte) 4.53ns 220.75M
MurmurHash2Test(1Byte) 293.53% 1.54ns 647.95M
----------------------------------------------------------------------------
StdHashTest(2Byte) 5.05ns 198.20M
MurmurHash2Test(2Byte) 271.92% 1.86ns 538.93M
----------------------------------------------------------------------------
StdHashTest(3Byte) 5.86ns 170.75M
MurmurHash2Test(3Byte) 278.04% 2.11ns 474.76M
----------------------------------------------------------------------------
StdHashTest(4Byte) 6.65ns 150.37M
MurmurHash2Test(4Byte) 291.51% 2.28ns 438.34M
----------------------------------------------------------------------------
StdHashTest(5Byte) 7.21ns 138.77M
MurmurHash2Test(5Byte) 270.39% 2.67ns 375.23M
----------------------------------------------------------------------------
StdHashTest(6Byte) 7.91ns 126.38M
MurmurHash2Test(6Byte) 264.52% 2.99ns 334.30M
----------------------------------------------------------------------------
StdHashTest(7Byte) 8.75ns 114.26M
MurmurHash2Test(7Byte) 258.92% 3.38ns 295.85M
----------------------------------------------------------------------------
StdHashTest(8Byte) 4.61ns 216.97M
MurmurHash2Test(8Byte) 173.35% 2.66ns 376.11M
----------------------------------------------------------------------------
StdHashTest(9Byte) 5.56ns 179.73M
MurmurHash2Test(9Byte) 187.64% 2.97ns 337.25M
----------------------------------------------------------------------------
StdHashTest(10Byte) 6.13ns 163.24M
MurmurHash2Test(10Byte) 196.97% 3.11ns 321.53M
----------------------------------------------------------------------------
StdHashTest(64Byte) 12.76ns 78.40M
MurmurHash2Test(64Byte) 117.30% 10.87ns 91.96M
----------------------------------------------------------------------------
StdHashTest(256Byte) 48.69ns 20.54M
MurmurHash2Test(256Byte) 108.87% 44.72ns 22.36M
----------------------------------------------------------------------------
StdHashTest(1024Byte) 204.19ns 4.90M
MurmurHash2Test(1024Byte) 98.92% 206.43ns 4.84M
----------------------------------------------------------------------------
StdHashTest(4096Byte) 825.29ns 1.21M
MurmurHash2Test(4096Byte) 98.29% 839.61ns 1.19M
============================================================================
*/
Loading