-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added an Implementation of MurmurHash2 (#18)
This is an implementation of MurmurHash2, which is identical to `std::hash`(at least until GCC 8.1). This one is more performant on short strings, because: 1. It could be inlined. 2. It utilizes the loop unrolling trick. 3. Besides, it works with the plain old raw bytes array!
- Loading branch information
Showing
4 changed files
with
398 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved | ||
* | ||
* This source code is licensed under Apache 2.0 License | ||
* (found in the LICENSE.Apache file in the root directory) | ||
*/ | ||
#ifndef COMMON_BASE_MURMURHASH2_H_ | ||
#define COMMON_BASE_MURMURHASH2_H_ | ||
|
||
#include <string> | ||
#include <cstring> | ||
#include <memory> | ||
#include <thread> | ||
#include <type_traits> | ||
|
||
namespace vesoft { | ||
|
||
/** | ||
* This is an implementation of MurmurHash2, | ||
* which is identical to `std::hash'(at least until GCC 8.1). | ||
* This one is more performant on short strings, because: | ||
* 1. It could be inlined. | ||
* 2. It utilizes the loop unrolling trick. | ||
* Besides, it works with the plain old raw bytes array! | ||
*/ | ||
class MurmurHash2 { | ||
template <typename T> | ||
static constexpr bool is_char_v = std::is_same<T, char>::value || | ||
std::is_same<T, signed char>::value || | ||
std::is_same<T, unsigned char>::value; | ||
|
||
public: | ||
// std::string | ||
size_t operator()(const std::string &str) const noexcept { | ||
return this->operator()(str.data(), str.length()); | ||
} | ||
|
||
// null-terminated C-style string | ||
template <typename T, typename = std::enable_if_t<is_char_v<T>>> | ||
size_t operator()(const T *&str) const noexcept { | ||
return this->operator()(str, ::strlen(str)); | ||
} | ||
|
||
// raw bytes array | ||
template <typename T, typename = std::enable_if_t<is_char_v<T>>> | ||
size_t operator()(const T *str, size_t size) const noexcept { | ||
uint64_t seed = 0xc70f6907UL; | ||
const uint64_t m = 0xc6a4a7935bd1e995; | ||
const uint32_t r = 47; | ||
uint64_t h = seed ^ (size * m); | ||
const uint64_t *data = (const uint64_t *)str; | ||
const uint64_t *end = data + (size / 8); | ||
while (data != end) { | ||
uint64_t k = *data++; | ||
|
||
k *= m; | ||
k ^= k >> r; | ||
k *= m; | ||
|
||
h ^= k; | ||
h *= m; | ||
} | ||
|
||
const unsigned char *data2 = (const unsigned char*)data; | ||
switch (size & 7) { | ||
case 7: | ||
h ^= uint64_t(data2[6]) << 48; | ||
case 6: | ||
h ^= uint64_t(data2[5]) << 40; | ||
case 5: | ||
h ^= uint64_t(data2[4]) << 32; | ||
case 4: | ||
h ^= uint64_t(data2[3]) << 24; | ||
case 3: | ||
h ^= uint64_t(data2[2]) << 16; | ||
case 2: | ||
h ^= uint64_t(data2[1]) << 8; | ||
case 1: | ||
h ^= uint64_t(data2[0]); | ||
h *= m; | ||
} | ||
h ^= h >> r; | ||
h *= m; | ||
h ^= h >> r; | ||
|
||
return h; | ||
} | ||
|
||
// std::thread::id | ||
size_t operator()(std::thread::id id) const noexcept { | ||
return std::hash<std::thread::id>()(id); | ||
} | ||
|
||
// literal string(without decay) | ||
template <size_t N, typename T, typename = std::enable_if_t<is_char_v<T>>> | ||
size_t operator()(const T (&str)[N]) const noexcept { | ||
return this->operator()(str, N - 1); | ||
} | ||
|
||
// integer | ||
template <typename T> | ||
std::enable_if_t<std::is_integral<T>::value, size_t> | ||
operator()(T key) const noexcept { | ||
return static_cast<size_t>(key); | ||
} | ||
|
||
// pointers | ||
template <typename T> | ||
std::enable_if_t<!is_char_v<T>, size_t> | ||
operator() (const T *ptr) const noexcept { | ||
return reinterpret_cast<size_t>(ptr); | ||
} | ||
|
||
// std::shared_ptr | ||
template <typename T> | ||
size_t operator()(const std::shared_ptr<T> &ptr) const noexcept { | ||
return reinterpret_cast<size_t>(ptr.get()); | ||
} | ||
|
||
// std::unique_ptr | ||
template <typename T, typename Deleter> | ||
size_t operator()(const std::unique_ptr<T, Deleter> &ptr) const noexcept { | ||
return reinterpret_cast<size_t>(ptr.get()); | ||
} | ||
}; | ||
|
||
} // namespace vesoft | ||
|
||
#endif // COMMON_BASE_MURMURHASH2_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved | ||
* | ||
* This source code is licensed under Apache 2.0 License | ||
* (found in the LICENSE.Apache file in the root directory) | ||
*/ | ||
#include <ctype.h> | ||
#include <folly/Benchmark.h> | ||
#include <folly/Random.h> | ||
#include <cstdlib> | ||
#include <functional> | ||
#include <algorithm> | ||
#include <thread> | ||
#include "common/base/MurmurHash2.h" | ||
|
||
using vesoft::MurmurHash2; | ||
|
||
std::string makeString(size_t size) { | ||
std::string str; | ||
str.resize(size); | ||
for (auto &c : str) { | ||
c = folly::Random::rand32() % (0x7E/*~*/ - 0x21/*!*/) + 0x21; | ||
} | ||
return str; | ||
} | ||
|
||
size_t StdHashTest(size_t iters, size_t size) { | ||
constexpr size_t ops = 1000000UL; | ||
|
||
std::hash<std::string> hash; | ||
auto str = makeString(size); | ||
auto i = 0UL; | ||
while (i++ < ops * iters) { | ||
auto hv = hash(str); | ||
folly::doNotOptimizeAway(hv); | ||
} | ||
|
||
return iters * ops; | ||
} | ||
|
||
size_t MurmurHash2Test(size_t iters, size_t size) { | ||
constexpr size_t ops = 1000000UL; | ||
|
||
MurmurHash2 hash; | ||
auto str = makeString(size); | ||
auto i = 0UL; | ||
while (i++ < ops * iters) { | ||
auto hv = hash(str); | ||
folly::doNotOptimizeAway(hv); | ||
} | ||
|
||
return iters * ops; | ||
} | ||
|
||
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 1Byte, 1UL); | ||
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 1Byte, 1UL); | ||
BENCHMARK_DRAW_LINE(); | ||
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 2Byte, 2UL); | ||
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 2Byte, 2UL); | ||
BENCHMARK_DRAW_LINE(); | ||
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 3Byte, 3UL); | ||
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 3Byte, 3UL); | ||
BENCHMARK_DRAW_LINE(); | ||
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 4Byte, 4UL); | ||
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 4Byte, 4UL); | ||
BENCHMARK_DRAW_LINE(); | ||
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 5Byte, 5UL); | ||
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 5Byte, 5UL); | ||
BENCHMARK_DRAW_LINE(); | ||
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 6Byte, 6UL); | ||
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 6Byte, 6UL); | ||
BENCHMARK_DRAW_LINE(); | ||
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 7Byte, 7UL); | ||
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 7Byte, 7UL); | ||
BENCHMARK_DRAW_LINE(); | ||
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 8Byte, 8UL); | ||
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 8Byte, 8UL); | ||
BENCHMARK_DRAW_LINE(); | ||
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 9Byte, 9UL); | ||
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 9Byte, 9UL); | ||
BENCHMARK_DRAW_LINE(); | ||
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 10Byte, 10UL); | ||
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 10Byte, 10UL); | ||
BENCHMARK_DRAW_LINE(); | ||
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 64Byte, 64UL); | ||
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 64Byte, 64UL); | ||
BENCHMARK_DRAW_LINE(); | ||
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 256Byte, 256UL); | ||
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 256Byte, 256UL); | ||
BENCHMARK_DRAW_LINE(); | ||
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 1024Byte, 1024UL); | ||
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 1024Byte, 1024UL); | ||
BENCHMARK_DRAW_LINE(); | ||
BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 4096Byte, 4096UL); | ||
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 4096Byte, 4096UL); | ||
|
||
int | ||
main(int argc, char **argv) { | ||
gflags::ParseCommandLineFlags(&argc, &argv, true); | ||
folly::runBenchmarks(); | ||
return 0; | ||
} | ||
|
||
/* Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz | ||
============================================================================ | ||
common/base/test/HashBenchmark.cpp relative time/iter iters/s | ||
============================================================================ | ||
StdHashTest(1Byte) 4.53ns 220.75M | ||
MurmurHash2Test(1Byte) 293.53% 1.54ns 647.95M | ||
---------------------------------------------------------------------------- | ||
StdHashTest(2Byte) 5.05ns 198.20M | ||
MurmurHash2Test(2Byte) 271.92% 1.86ns 538.93M | ||
---------------------------------------------------------------------------- | ||
StdHashTest(3Byte) 5.86ns 170.75M | ||
MurmurHash2Test(3Byte) 278.04% 2.11ns 474.76M | ||
---------------------------------------------------------------------------- | ||
StdHashTest(4Byte) 6.65ns 150.37M | ||
MurmurHash2Test(4Byte) 291.51% 2.28ns 438.34M | ||
---------------------------------------------------------------------------- | ||
StdHashTest(5Byte) 7.21ns 138.77M | ||
MurmurHash2Test(5Byte) 270.39% 2.67ns 375.23M | ||
---------------------------------------------------------------------------- | ||
StdHashTest(6Byte) 7.91ns 126.38M | ||
MurmurHash2Test(6Byte) 264.52% 2.99ns 334.30M | ||
---------------------------------------------------------------------------- | ||
StdHashTest(7Byte) 8.75ns 114.26M | ||
MurmurHash2Test(7Byte) 258.92% 3.38ns 295.85M | ||
---------------------------------------------------------------------------- | ||
StdHashTest(8Byte) 4.61ns 216.97M | ||
MurmurHash2Test(8Byte) 173.35% 2.66ns 376.11M | ||
---------------------------------------------------------------------------- | ||
StdHashTest(9Byte) 5.56ns 179.73M | ||
MurmurHash2Test(9Byte) 187.64% 2.97ns 337.25M | ||
---------------------------------------------------------------------------- | ||
StdHashTest(10Byte) 6.13ns 163.24M | ||
MurmurHash2Test(10Byte) 196.97% 3.11ns 321.53M | ||
---------------------------------------------------------------------------- | ||
StdHashTest(64Byte) 12.76ns 78.40M | ||
MurmurHash2Test(64Byte) 117.30% 10.87ns 91.96M | ||
---------------------------------------------------------------------------- | ||
StdHashTest(256Byte) 48.69ns 20.54M | ||
MurmurHash2Test(256Byte) 108.87% 44.72ns 22.36M | ||
---------------------------------------------------------------------------- | ||
StdHashTest(1024Byte) 204.19ns 4.90M | ||
MurmurHash2Test(1024Byte) 98.92% 206.43ns 4.84M | ||
---------------------------------------------------------------------------- | ||
StdHashTest(4096Byte) 825.29ns 1.21M | ||
MurmurHash2Test(4096Byte) 98.29% 839.61ns 1.19M | ||
============================================================================ | ||
*/ |
Oops, something went wrong.