diff --git a/common/base/MurmurHash2.h b/common/base/MurmurHash2.h new file mode 100644 index 00000000000..f2a465e960e --- /dev/null +++ b/common/base/MurmurHash2.h @@ -0,0 +1,128 @@ +/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved + * + * This source code is licensed under Apache 2.0 License + * (found in the LICENSE.Apache file in the root directory) + */ +#ifndef COMMON_BASE_MURMURHASH2_H_ +#define COMMON_BASE_MURMURHASH2_H_ + +#include +#include +#include +#include +#include + +namespace vesoft { + +/** + * This is an implementation of MurmurHash2, + * which is identical to `std::hash'(at least until GCC 8.1). + * This one is more performant on short strings, because: + * 1. It could be inlined. + * 2. It utilizes the loop unrolling trick. + * Besides, it works with the plain old raw bytes array! + */ +class MurmurHash2 { + template + static constexpr bool is_char_v = std::is_same::value || + std::is_same::value || + std::is_same::value; + +public: + // std::string + size_t operator()(const std::string &str) const noexcept { + return this->operator()(str.data(), str.length()); + } + + // null-terminated C-style string + template >> + size_t operator()(const T *&str) const noexcept { + return this->operator()(str, ::strlen(str)); + } + + // raw bytes array + template >> + size_t operator()(const T *str, size_t size) const noexcept { + uint64_t seed = 0xc70f6907UL; + const uint64_t m = 0xc6a4a7935bd1e995; + const uint32_t r = 47; + uint64_t h = seed ^ (size * m); + const uint64_t *data = (const uint64_t *)str; + const uint64_t *end = data + (size / 8); + while (data != end) { + uint64_t k = *data++; + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + } + + const unsigned char *data2 = (const unsigned char*)data; + switch (size & 7) { + case 7: + h ^= uint64_t(data2[6]) << 48; + case 6: + h ^= uint64_t(data2[5]) << 40; + case 5: + h ^= uint64_t(data2[4]) << 32; + case 4: + h ^= uint64_t(data2[3]) << 24; + case 3: + h ^= uint64_t(data2[2]) << 16; + case 2: + h ^= uint64_t(data2[1]) << 8; + case 1: + h ^= uint64_t(data2[0]); + h *= m; + } + h ^= h >> r; + h *= m; + h ^= h >> r; + + return h; + } + + // std::thread::id + size_t operator()(std::thread::id id) const noexcept { + return std::hash()(id); + } + + // literal string(without decay) + template >> + size_t operator()(const T (&str)[N]) const noexcept { + return this->operator()(str, N - 1); + } + + // integer + template + std::enable_if_t::value, size_t> + operator()(T key) const noexcept { + return static_cast(key); + } + + // pointers + template + std::enable_if_t, size_t> + operator() (const T *ptr) const noexcept { + return reinterpret_cast(ptr); + } + + // std::shared_ptr + template + size_t operator()(const std::shared_ptr &ptr) const noexcept { + return reinterpret_cast(ptr.get()); + } + + // std::unique_ptr + template + size_t operator()(const std::unique_ptr &ptr) const noexcept { + return reinterpret_cast(ptr.get()); + } +}; + +} // namespace vesoft + +#endif // COMMON_BASE_MURMURHASH2_H_ diff --git a/common/base/test/CMakeLists.txt b/common/base/test/CMakeLists.txt index a644d7b3c74..ed705f31eaa 100644 --- a/common/base/test/CMakeLists.txt +++ b/common/base/test/CMakeLists.txt @@ -25,3 +25,29 @@ target_link_libraries( -pthread ) +add_executable(murmurhash2_test MurmurHash2Test.cpp) +target_link_libraries( + murmurhash2_test + gtest + gtest_main + folly + glog + gflags + double-conversion + dl + -pthread +) +add_test(NAME murmurhash2_test COMMAND murmurhash2_test) + +add_executable(hash_bm HashBenchmark.cpp) +target_link_libraries( + hash_bm + follybenchmark + folly + glog + gflags + boost_regex + double-conversion + dl + -pthread +) diff --git a/common/base/test/HashBenchmark.cpp b/common/base/test/HashBenchmark.cpp new file mode 100644 index 00000000000..13c06667447 --- /dev/null +++ b/common/base/test/HashBenchmark.cpp @@ -0,0 +1,149 @@ +/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved + * + * This source code is licensed under Apache 2.0 License + * (found in the LICENSE.Apache file in the root directory) + */ +#include +#include +#include +#include +#include +#include +#include +#include "common/base/MurmurHash2.h" + +using vesoft::MurmurHash2; + +std::string makeString(size_t size) { + std::string str; + str.resize(size); + for (auto &c : str) { + c = folly::Random::rand32() % (0x7E/*~*/ - 0x21/*!*/) + 0x21; + } + return str; +} + +size_t StdHashTest(size_t iters, size_t size) { + constexpr size_t ops = 1000000UL; + + std::hash hash; + auto str = makeString(size); + auto i = 0UL; + while (i++ < ops * iters) { + auto hv = hash(str); + folly::doNotOptimizeAway(hv); + } + + return iters * ops; +} + +size_t MurmurHash2Test(size_t iters, size_t size) { + constexpr size_t ops = 1000000UL; + + MurmurHash2 hash; + auto str = makeString(size); + auto i = 0UL; + while (i++ < ops * iters) { + auto hv = hash(str); + folly::doNotOptimizeAway(hv); + } + + return iters * ops; +} + +BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 1Byte, 1UL); +BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 1Byte, 1UL); +BENCHMARK_DRAW_LINE(); +BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 2Byte, 2UL); +BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 2Byte, 2UL); +BENCHMARK_DRAW_LINE(); +BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 3Byte, 3UL); +BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 3Byte, 3UL); +BENCHMARK_DRAW_LINE(); +BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 4Byte, 4UL); +BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 4Byte, 4UL); +BENCHMARK_DRAW_LINE(); +BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 5Byte, 5UL); +BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 5Byte, 5UL); +BENCHMARK_DRAW_LINE(); +BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 6Byte, 6UL); +BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 6Byte, 6UL); +BENCHMARK_DRAW_LINE(); +BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 7Byte, 7UL); +BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 7Byte, 7UL); +BENCHMARK_DRAW_LINE(); +BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 8Byte, 8UL); +BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 8Byte, 8UL); +BENCHMARK_DRAW_LINE(); +BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 9Byte, 9UL); +BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 9Byte, 9UL); +BENCHMARK_DRAW_LINE(); +BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 10Byte, 10UL); +BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 10Byte, 10UL); +BENCHMARK_DRAW_LINE(); +BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 64Byte, 64UL); +BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 64Byte, 64UL); +BENCHMARK_DRAW_LINE(); +BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 256Byte, 256UL); +BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 256Byte, 256UL); +BENCHMARK_DRAW_LINE(); +BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 1024Byte, 1024UL); +BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 1024Byte, 1024UL); +BENCHMARK_DRAW_LINE(); +BENCHMARK_NAMED_PARAM_MULTI(StdHashTest, 4096Byte, 4096UL); +BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(MurmurHash2Test, 4096Byte, 4096UL); + +int +main(int argc, char **argv) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + folly::runBenchmarks(); + return 0; +} + +/* Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +============================================================================ +common/base/test/HashBenchmark.cpp relative time/iter iters/s +============================================================================ +StdHashTest(1Byte) 4.53ns 220.75M +MurmurHash2Test(1Byte) 293.53% 1.54ns 647.95M +---------------------------------------------------------------------------- +StdHashTest(2Byte) 5.05ns 198.20M +MurmurHash2Test(2Byte) 271.92% 1.86ns 538.93M +---------------------------------------------------------------------------- +StdHashTest(3Byte) 5.86ns 170.75M +MurmurHash2Test(3Byte) 278.04% 2.11ns 474.76M +---------------------------------------------------------------------------- +StdHashTest(4Byte) 6.65ns 150.37M +MurmurHash2Test(4Byte) 291.51% 2.28ns 438.34M +---------------------------------------------------------------------------- +StdHashTest(5Byte) 7.21ns 138.77M +MurmurHash2Test(5Byte) 270.39% 2.67ns 375.23M +---------------------------------------------------------------------------- +StdHashTest(6Byte) 7.91ns 126.38M +MurmurHash2Test(6Byte) 264.52% 2.99ns 334.30M +---------------------------------------------------------------------------- +StdHashTest(7Byte) 8.75ns 114.26M +MurmurHash2Test(7Byte) 258.92% 3.38ns 295.85M +---------------------------------------------------------------------------- +StdHashTest(8Byte) 4.61ns 216.97M +MurmurHash2Test(8Byte) 173.35% 2.66ns 376.11M +---------------------------------------------------------------------------- +StdHashTest(9Byte) 5.56ns 179.73M +MurmurHash2Test(9Byte) 187.64% 2.97ns 337.25M +---------------------------------------------------------------------------- +StdHashTest(10Byte) 6.13ns 163.24M +MurmurHash2Test(10Byte) 196.97% 3.11ns 321.53M +---------------------------------------------------------------------------- +StdHashTest(64Byte) 12.76ns 78.40M +MurmurHash2Test(64Byte) 117.30% 10.87ns 91.96M +---------------------------------------------------------------------------- +StdHashTest(256Byte) 48.69ns 20.54M +MurmurHash2Test(256Byte) 108.87% 44.72ns 22.36M +---------------------------------------------------------------------------- +StdHashTest(1024Byte) 204.19ns 4.90M +MurmurHash2Test(1024Byte) 98.92% 206.43ns 4.84M +---------------------------------------------------------------------------- +StdHashTest(4096Byte) 825.29ns 1.21M +MurmurHash2Test(4096Byte) 98.29% 839.61ns 1.19M +============================================================================ + */ diff --git a/common/base/test/MurmurHash2Test.cpp b/common/base/test/MurmurHash2Test.cpp new file mode 100644 index 00000000000..d016899aef1 --- /dev/null +++ b/common/base/test/MurmurHash2Test.cpp @@ -0,0 +1,95 @@ +/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved + * + * This source code is licensed under Apache 2.0 License + * (found in the LICENSE.Apache file in the root directory) + */ + +#include +#include +#include "common/base/MurmurHash2.h" + +namespace vesoft { + +TEST(MurmurHash2, Basic) { + MurmurHash2 hash; + // string + { +#define LITERAL "Another one bites the dust" + const char *cstr = LITERAL; + std::string str = cstr; + auto hv1 = hash(LITERAL); + auto hv2 = hash(cstr); + auto hv3 = hash(str); + ASSERT_EQ(hv1, hv2); + ASSERT_EQ(hv2, hv3); + ASSERT_EQ(hv3, std::hash()(str)); + } + // integer + { + bool rand8 = folly::Random::rand64(); + unsigned char rand8_2 = folly::Random::rand64(); + int16_t rand16 = folly::Random::rand64(); + int32_t rand32 = folly::Random::rand64(); + int64_t rand64 = folly::Random::rand64(); + ASSERT_EQ(static_cast(rand8), hash(rand8)); + ASSERT_EQ(static_cast(rand8_2), hash(rand8_2)); + ASSERT_EQ(static_cast(rand16), hash(rand16)); + ASSERT_EQ(static_cast(rand32), hash(rand32)); + ASSERT_EQ(static_cast(rand64), hash(rand64)); + } + // pointer + { + { + auto *ptr = new MurmurHash2(); + ASSERT_EQ(reinterpret_cast(ptr), hash(ptr)); + delete ptr; + } + { + auto *ptr = new std::string(); + ASSERT_EQ(reinterpret_cast(ptr), hash(ptr)); + delete ptr; + } + { + auto *ptr = new int(); + ASSERT_EQ(reinterpret_cast(ptr), hash(ptr)); + delete ptr; + } + } + // shared_ptr + { + { + auto ptr = std::make_shared(); + ASSERT_EQ(reinterpret_cast(ptr.get()), hash(ptr)); + } + { + auto ptr = std::make_shared(); + ASSERT_EQ(reinterpret_cast(ptr.get()), hash(ptr)); + } + { + auto ptr = std::make_shared(); + ASSERT_EQ(reinterpret_cast(ptr.get()), hash(ptr)); + } + } + // unique_ptr + { + { + auto ptr = std::make_unique(); + ASSERT_EQ(reinterpret_cast(ptr.get()), hash(ptr)); + } + { + auto ptr = std::make_unique(); + ASSERT_EQ(reinterpret_cast(ptr.get()), hash(ptr)); + } + { + auto ptr = std::make_unique(); + ASSERT_EQ(reinterpret_cast(ptr.get()), hash(ptr)); + } + } + // std::thread::id + { + auto id = std::this_thread::get_id(); + ASSERT_EQ(std::hash()(id), hash(id)); + } +} + +} // namespace vesoft