Skip to content

Commit

Permalink
Switch hash to rapidhash.
Browse files Browse the repository at this point in the history
This is the currently fastest hash that passes SMHasher and does not
require special instructions (e.g. SIMD). Like emhash8, it is
liberally licensed (2-clause BSD), and we include the .h file directly.

For a no-op build of Chromium (Linux, Zen 2),
this reduces time spent from 4.62 to 4.22 seconds.
(NOTE: This is a more difficult measurement than the previous ones,
as it necessarily involves removing the entire build log and doing
a clean build. However, just switching the HashMap hash takes
to 4.47 seconds or so.)
  • Loading branch information
Steinar H. Gunderson authored and sesse committed Nov 21, 2024
1 parent c3e3fb9 commit 32e7e23
Show file tree
Hide file tree
Showing 5 changed files with 342 additions and 92 deletions.
55 changes: 3 additions & 52 deletions src/build_log.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,63 +53,14 @@ using namespace std;
namespace {

const char kFileSignature[] = "# ninja log v%d\n";
const int kOldestSupportedVersion = 6;
const int kCurrentVersion = 6;

// 64bit MurmurHash2, by Austin Appleby
#if defined(_MSC_VER)
#define BIG_CONSTANT(x) (x)
#else // defined(_MSC_VER)
#define BIG_CONSTANT(x) (x##LLU)
#endif // !defined(_MSC_VER)
inline
uint64_t MurmurHash64A(const void* key, size_t len) {
static const uint64_t seed = 0xDECAFBADDECAFBADull;
const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
const int r = 47;
uint64_t h = seed ^ (len * m);
const unsigned char* data = static_cast<const unsigned char*>(key);
while (len >= 8) {
uint64_t k;
memcpy(&k, data, sizeof k);
k *= m;
k ^= k >> r;
k *= m;
h ^= k;
h *= m;
data += 8;
len -= 8;
}
switch (len & 7)
{
case 7: h ^= uint64_t(data[6]) << 48;
NINJA_FALLTHROUGH;
case 6: h ^= uint64_t(data[5]) << 40;
NINJA_FALLTHROUGH;
case 5: h ^= uint64_t(data[4]) << 32;
NINJA_FALLTHROUGH;
case 4: h ^= uint64_t(data[3]) << 24;
NINJA_FALLTHROUGH;
case 3: h ^= uint64_t(data[2]) << 16;
NINJA_FALLTHROUGH;
case 2: h ^= uint64_t(data[1]) << 8;
NINJA_FALLTHROUGH;
case 1: h ^= uint64_t(data[0]);
h *= m;
};
h ^= h >> r;
h *= m;
h ^= h >> r;
return h;
}
#undef BIG_CONSTANT

const int kOldestSupportedVersion = 7;
const int kCurrentVersion = 7;

} // namespace

// static
uint64_t BuildLog::LogEntry::HashCommand(StringPiece command) {
return MurmurHash64A(command.str_, command.len_);
return rapidhash(command.str_, command.len_);
}

BuildLog::LogEntry::LogEntry(const string& output)
Expand Down
12 changes: 6 additions & 6 deletions src/build_log_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ TEST_F(BuildLogTest, FirstWriteAddsSignature) {

TEST_F(BuildLogTest, DoubleEntry) {
FILE* f = fopen(kTestFilename, "wb");
fprintf(f, "# ninja log v6\n");
fprintf(f, "# ninja log v7\n");
fprintf(f, "0\t1\t2\tout\t%" PRIx64 "\n",
BuildLog::LogEntry::HashCommand("command abc"));
fprintf(f, "0\t1\t2\tout\t%" PRIx64 "\n",
Expand Down Expand Up @@ -177,7 +177,7 @@ TEST_F(BuildLogTest, ObsoleteOldVersion) {

TEST_F(BuildLogTest, SpacesInOutput) {
FILE* f = fopen(kTestFilename, "wb");
fprintf(f, "# ninja log v6\n");
fprintf(f, "# ninja log v7\n");
fprintf(f, "123\t456\t456\tout with space\t%" PRIx64 "\n",
BuildLog::LogEntry::HashCommand("command"));
fclose(f);
Expand All @@ -200,10 +200,10 @@ TEST_F(BuildLogTest, DuplicateVersionHeader) {
// build log on Windows. This shouldn't crash, and the second version header
// should be ignored.
FILE* f = fopen(kTestFilename, "wb");
fprintf(f, "# ninja log v6\n");
fprintf(f, "# ninja log v7\n");
fprintf(f, "123\t456\t456\tout\t%" PRIx64 "\n",
BuildLog::LogEntry::HashCommand("command"));
fprintf(f, "# ninja log v6\n");
fprintf(f, "# ninja log v7\n");
fprintf(f, "456\t789\t789\tout2\t%" PRIx64 "\n",
BuildLog::LogEntry::HashCommand("command2"));
fclose(f);
Expand Down Expand Up @@ -252,7 +252,7 @@ struct TestDiskInterface : public DiskInterface {

TEST_F(BuildLogTest, Restat) {
FILE* f = fopen(kTestFilename, "wb");
fprintf(f, "# ninja log v6\n"
fprintf(f, "# ninja log v7\n"
"1\t2\t3\tout\tcommand\n");
fclose(f);
std::string err;
Expand Down Expand Up @@ -280,7 +280,7 @@ TEST_F(BuildLogTest, VeryLongInputLine) {
// Ninja's build log buffer is currently 256kB. Lines longer than that are
// silently ignored, but don't affect parsing of other lines.
FILE* f = fopen(kTestFilename, "wb");
fprintf(f, "# ninja log v6\n");
fprintf(f, "# ninja log v7\n");
fprintf(f, "123\t456\t456\tout\tcommand start");
for (size_t i = 0; i < (512 << 10) / strlen(" more_command"); ++i)
fputs(" more_command", f);
Expand Down
37 changes: 3 additions & 34 deletions src/hash_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,40 +19,9 @@
#include <string.h>
#include "string_piece.h"
#include "util.h"
#include "third_party/emhash/hash_table8.hpp"

// MurmurHash2, by Austin Appleby
static inline
unsigned int MurmurHash2(const void* key, size_t len) {
static const unsigned int seed = 0xDECAFBAD;
const unsigned int m = 0x5bd1e995;
const int r = 24;
unsigned int h = seed ^ len;
const unsigned char* data = static_cast<const unsigned char*>(key);
while (len >= 4) {
unsigned int k;
memcpy(&k, data, sizeof k);
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
switch (len) {
case 3: h ^= data[2] << 16;
NINJA_FALLTHROUGH;
case 2: h ^= data[1] << 8;
NINJA_FALLTHROUGH;
case 1: h ^= data[0];
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
#include "third_party/emhash/hash_table8.hpp"
#include "third_party/rapidhash/rapidhash.h"

namespace std {
template<>
Expand All @@ -61,7 +30,7 @@ struct hash<StringPiece> {
typedef size_t result_type;

size_t operator()(StringPiece key) const {
return MurmurHash2(key.str_, key.len_);
return rapidhash(key.str_, key.len_);
}
};
}
Expand Down
7 changes: 7 additions & 0 deletions src/third_party/rapidhash/README.ninja
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Description: Very fast, high quality, platform-independent hashing algorithm.
Version: commit 4a6b2570e868536be84800353efd92c699f37d2c
URL: https://github.com/Nicoshev/rapidhash
Copyright: Copyright (C) 2024 Nicolas De Carli, Based on 'wyhash', by Wang Yi <[email protected]>
SPDX-License-Identifier: BSD-2-Clause
Local changes:
- Changed to UNIX line endings
Loading

0 comments on commit 32e7e23

Please sign in to comment.