diff --git a/BUILD b/BUILD
index 2974971889..6c4cbfa5a6 100644
--- a/BUILD
+++ b/BUILD
@@ -68,6 +68,7 @@ cc_library(
"upb/table.int.h",
"upb/upb.c",
"upb/upb.int.h",
+ "third_party/wyhash/wyhash.h",
],
hdrs = [
"upb/decode.h",
@@ -369,6 +370,7 @@ filegroup(
"upbc/**/*",
"upb/**/*",
"tests/**/*",
+ "third_party/**/*",
]),
visibility = ["//cmake:__pkg__"],
)
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index 0712167c8f..e23703fcfd 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -72,6 +72,7 @@ add_library(upb
../upb/table.int.h
../upb/upb.c
../upb/upb.int.h
+ ../third_party/wyhash/wyhash.h
../upb/decode.h
../upb/encode.h
../upb/upb.h
diff --git a/cmake/upb/json/parser.c b/cmake/upb/json/parser.c
index 75cacd4c1d..26996d8a59 100644
--- a/cmake/upb/json/parser.c
+++ b/cmake/upb/json/parser.c
@@ -3306,7 +3306,7 @@ static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c,
upb_byteshandler_setstring(&m->input_handler_, parse, m);
upb_byteshandler_setendstr(&m->input_handler_, end, m);
- upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, alloc);
+ upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, 4, alloc);
/* Build name_table */
diff --git a/tests/pb/test_decoder.cc b/tests/pb/test_decoder.cc
index e8b3a63ed5..110dd2f9be 100644
--- a/tests/pb/test_decoder.cc
+++ b/tests/pb/test_decoder.cc
@@ -52,17 +52,6 @@
#define PRINT_FAILURE(expr) \
fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \
fprintf(stderr, "expr: %s\n", #expr); \
- if (testhash) { \
- fprintf(stderr, "assertion failed running test %x.\n", testhash); \
- if (!filter_hash) { \
- fprintf(stderr, \
- "Run with the arg %x to run only this test. " \
- "(This will also turn on extra debugging output)\n", \
- testhash); \
- } \
- fprintf(stderr, "Failed at %02.2f%% through tests.\n", \
- (float)completed * 100 / total); \
- }
#define MAX_NESTING 64
@@ -467,17 +456,6 @@ upb::pb::DecoderPtr CreateDecoder(upb::Arena* arena,
return ret;
}
-uint32_t Hash(const string& proto, const string* expected_output, size_t seam1,
- size_t seam2, bool may_skip) {
- uint32_t hash = upb_murmur_hash2(proto.c_str(), proto.size(), 0);
- if (expected_output)
- hash = upb_murmur_hash2(expected_output->c_str(), expected_output->size(), hash);
- hash = upb_murmur_hash2(&seam1, sizeof(seam1), hash);
- hash = upb_murmur_hash2(&seam2, sizeof(seam2), hash);
- hash = upb_murmur_hash2(&may_skip, sizeof(may_skip), hash);
- return hash;
-}
-
void CheckBytesParsed(upb::pb::DecoderPtr decoder, size_t ofs) {
// We can't have parsed more data than the decoder callback is telling us it
// parsed.
@@ -506,13 +484,11 @@ void do_run_decoder(VerboseParserEnvironment* env, upb::pb::DecoderPtr decoder,
env->Reset(proto.c_str(), proto.size(), may_skip, expected_output == NULL);
decoder.Reset();
- testhash = Hash(proto, expected_output, i, j, may_skip);
- if (filter_hash && testhash != filter_hash) return;
if (test_mode != COUNT_ONLY) {
output.clear();
if (filter_hash) {
- fprintf(stderr, "RUNNING TEST CASE, hash=%x\n", testhash);
+ fprintf(stderr, "RUNNING TEST CASE\n");
fprintf(stderr, "Input (len=%u): ", (unsigned)proto.size());
PrintBinary(proto);
fprintf(stderr, "\n");
@@ -571,7 +547,6 @@ void run_decoder(const string& proto, const string* expected_output) {
}
}
}
- testhash = 0;
}
const static string thirty_byte_nop = cat(
@@ -871,23 +846,17 @@ void test_valid() {
// Empty protobuf where we never call PutString between
// StartString/EndString.
- // Randomly generated hash for this test, hope it doesn't conflict with others
- // by chance.
- const uint32_t emptyhash = 0x5709be8e;
- if (!filter_hash || filter_hash == testhash) {
- testhash = emptyhash;
- upb::Status status;
- upb::Arena arena;
- upb::Sink sink(global_handlers, &closures[0]);
- upb::pb::DecoderPtr decoder =
- CreateDecoder(&arena, global_method, sink, &status);
- output.clear();
- bool ok = upb::PutBuffer(std::string(), decoder.input());
- ASSERT(ok);
- ASSERT(status.ok());
- if (test_mode == ALL_HANDLERS) {
- ASSERT(output == string("<\n>\n"));
- }
+ upb::Status status;
+ upb::Arena arena;
+ upb::Sink sink(global_handlers, &closures[0]);
+ upb::pb::DecoderPtr decoder =
+ CreateDecoder(&arena, global_method, sink, &status);
+ output.clear();
+ bool ok = upb::PutBuffer(std::string(), decoder.input());
+ ASSERT(ok);
+ ASSERT(status.ok());
+ if (test_mode == ALL_HANDLERS) {
+ ASSERT(output == string("<\n>\n"));
}
test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_DOUBLE,
diff --git a/tests/test_table.cc b/tests/test_table.cc
index e19a74a50d..83b49f5c20 100644
--- a/tests/test_table.cc
+++ b/tests/test_table.cc
@@ -618,6 +618,16 @@ void test_delete() {
upb_inttable_uninit(&t);
}
+void test_init() {
+ for (int i = 0; i < 2048; i++) {
+ /* Tests that the size calculations in init() (lg2 size for target load)
+ * work for all expected sizes. */
+ upb_strtable t;
+ upb_strtable_init2(&t, UPB_CTYPE_BOOL, i, &upb_alloc_global);
+ upb_strtable_uninit(&t);
+ }
+}
+
extern "C" {
int run_tests(int argc, char *argv[]) {
diff --git a/third_party/wyhash/LICENSE b/third_party/wyhash/LICENSE
new file mode 100644
index 0000000000..471f09f4cf
--- /dev/null
+++ b/third_party/wyhash/LICENSE
@@ -0,0 +1,25 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to
+
diff --git a/third_party/wyhash/wyhash.h b/third_party/wyhash/wyhash.h
new file mode 100644
index 0000000000..1265169fd6
--- /dev/null
+++ b/third_party/wyhash/wyhash.h
@@ -0,0 +1,142 @@
+//Author: Wang Yi
+#ifndef wyhash_final_version
+#define wyhash_final_version
+//defines that change behavior
+#ifndef WYHASH_CONDOM
+#define WYHASH_CONDOM 1 //0: read 8 bytes before and after boudaries, dangerous but fastest. 1: normal valid behavior 2: extra protection against entropy loss (probability=2^-63), aka. "blind multiplication"
+#endif
+#define WYHASH_32BIT_MUM 0 //faster on 32 bit system
+//includes
+#include
+#include
+#if defined(_MSC_VER) && defined(_M_X64)
+ #include
+ #pragma intrinsic(_umul128)
+#endif
+#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
+ #define _likely_(x) __builtin_expect(x,1)
+ #define _unlikely_(x) __builtin_expect(x,0)
+#else
+ #define _likely_(x) (x)
+ #define _unlikely_(x) (x)
+#endif
+//mum function
+static inline uint64_t _wyrot(uint64_t x) { return (x>>32)|(x<<32); }
+static inline void _wymum(uint64_t *A, uint64_t *B){
+#if(WYHASH_32BIT_MUM)
+ uint64_t hh=(*A>>32)*(*B>>32), hl=(*A>>32)*(unsigned)*B, lh=(unsigned)*A*(*B>>32), ll=(uint64_t)(unsigned)*A*(unsigned)*B;
+ #if(WYHASH_CONDOM>1)
+ *A^=_wyrot(hl)^hh; *B^=_wyrot(lh)^ll;
+ #else
+ *A=_wyrot(hl)^hh; *B=_wyrot(lh)^ll;
+ #endif
+#elif defined(__SIZEOF_INT128__)
+ __uint128_t r=*A; r*=*B;
+ #if(WYHASH_CONDOM>1)
+ *A^=(uint64_t)r; *B^=(uint64_t)(r>>64);
+ #else
+ *A=(uint64_t)r; *B=(uint64_t)(r>>64);
+ #endif
+#elif defined(_MSC_VER) && defined(_M_X64)
+ #if(WYHASH_CONDOM>1)
+ uint64_t a, b;
+ a=_umul128(*A,*B,&b);
+ *A^=a; *B^=b;
+ #else
+ *A=_umul128(*A,*B,B);
+ #endif
+#else
+ uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo;
+ uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t>32)+(rm1>>32)+c;
+ #if(WYHASH_CONDOM>1)
+ *A^=lo; *B^=hi;
+ #else
+ *A=lo; *B=hi;
+ #endif
+#endif
+}
+static inline uint64_t _wymix(uint64_t A, uint64_t B){ _wymum(&A,&B); return A^B; }
+//read functions
+#ifndef WYHASH_LITTLE_ENDIAN
+ #if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+ #define WYHASH_LITTLE_ENDIAN 1
+ #elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ #define WYHASH_LITTLE_ENDIAN 0
+ #endif
+#endif
+#if (WYHASH_LITTLE_ENDIAN)
+static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;}
+static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return v;}
+#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
+static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);}
+static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return __builtin_bswap32(v);}
+#elif defined(_MSC_VER)
+static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);}
+static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return _byteswap_ulong(v);}
+#endif
+static inline uint64_t _wyr3(const uint8_t *p, unsigned k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];}
+//wyhash function
+static inline uint64_t _wyfinish16(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){
+#if(WYHASH_CONDOM>0)
+ uint64_t a, b;
+ if(_likely_(i<=8)){
+ if(_likely_(i>=4)){ a=_wyr4(p); b=_wyr4(p+i-4); }
+ else if (_likely_(i)){ a=_wyr3(p,i); b=0; }
+ else a=b=0;
+ }
+ else{ a=_wyr8(p); b=_wyr8(p+i-8); }
+ return _wymix(secret[1]^len,_wymix(a^secret[1], b^seed));
+#else
+ #define oneshot_shift ((i<8)*((8-i)<<3))
+ return _wymix(secret[1]^len,_wymix((_wyr8(p)<>oneshot_shift)^seed));
+#endif
+}
+
+static inline uint64_t _wyfinish(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){
+ if(_likely_(i<=16)) return _wyfinish16(p,len,seed,secret,i);
+ return _wyfinish(p+16,len,_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed),secret,i-16);
+}
+
+static inline uint64_t wyhash(const void *key, uint64_t len, uint64_t seed, const uint64_t *secret){
+ const uint8_t *p=(const uint8_t *)key;
+ uint64_t i=len; seed^=*secret;
+ if(_unlikely_(i>64)){
+ uint64_t see1=seed;
+ do{
+ seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed)^_wymix(_wyr8(p+16)^secret[2],_wyr8(p+24)^seed);
+ see1=_wymix(_wyr8(p+32)^secret[3],_wyr8(p+40)^see1)^_wymix(_wyr8(p+48)^secret[4],_wyr8(p+56)^see1);
+ p+=64; i-=64;
+ }while(i>64);
+ seed^=see1;
+ }
+ return _wyfinish(p,len,seed,secret,i);
+}
+//utility functions
+const uint64_t _wyp[5] = {0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull, 0x1d8e4e27c47d124full};
+static inline uint64_t wyhash64(uint64_t A, uint64_t B){ A^=_wyp[0]; B^=_wyp[1]; _wymum(&A,&B); return _wymix(A^_wyp[0],B^_wyp[1]);}
+static inline uint64_t wyrand(uint64_t *seed){ *seed+=_wyp[0]; return _wymix(*seed,*seed^_wyp[1]);}
+static inline double wy2u01(uint64_t r){ const double _wynorm=1.0/(1ull<<52); return (r>>12)*_wynorm;}
+static inline double wy2gau(uint64_t r){ const double _wynorm=1.0/(1ull<<20); return ((r&0x1fffff)+((r>>21)&0x1fffff)+((r>>42)&0x1fffff))*_wynorm-3.0;}
+static inline uint64_t wy2u0k(uint64_t r, uint64_t k){ _wymum(&r,&k); return k; }
+
+static inline void make_secret(uint64_t seed, uint64_t *secret){
+ uint8_t c[] = {15, 23, 27, 29, 30, 39, 43, 45, 46, 51, 53, 54, 57, 58, 60, 71, 75, 77, 78, 83, 85, 86, 89, 90, 92, 99, 101, 102, 105, 106, 108, 113, 114, 116, 120, 135, 139, 141, 142, 147, 149, 150, 153, 154, 156, 163, 165, 166, 169, 170, 172, 177, 178, 180, 184, 195, 197, 198, 201, 202, 204, 209, 210, 212, 216, 225, 226, 228, 232, 240 };
+ for(size_t i=0;i<5;i++){
+ uint8_t ok;
+ do{
+ ok=1; secret[i]=0;
+ for(size_t j=0;j<64;j+=8) secret[i]|=((uint64_t)c[wyrand(&seed)%sizeof(c)])<ntof, name.data, name.size, v, ctx->alloc));
CHK_OOM(upb_inttable_init2(&o->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
- CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc));
+ CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, 4, ctx->alloc));
return true;
}
@@ -1633,14 +1633,13 @@ static bool create_enumdef(
e->full_name = makefullname(ctx, prefix, name);
CHK_OOM(symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM)));
- CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, ctx->alloc));
+ values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n);
+ CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, n, ctx->alloc));
CHK_OOM(upb_inttable_init2(&e->iton, UPB_CTYPE_CSTR, ctx->alloc));
e->file = ctx->file;
e->defaultval = 0;
- values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n);
-
if (n == 0) {
upb_status_seterrf(ctx->status,
"enums must contain at least one value (%s)",
@@ -1690,7 +1689,7 @@ static bool create_msgdef(symtab_addctx *ctx, const char *prefix,
const google_protobuf_FieldDescriptorProto *const *fields;
const google_protobuf_EnumDescriptorProto *const *enums;
const google_protobuf_DescriptorProto *const *msgs;
- size_t i, n;
+ size_t i, n_oneof, n_field, n;
upb_strview name;
name = google_protobuf_DescriptorProto_name(msg_proto);
@@ -1700,8 +1699,12 @@ static bool create_msgdef(symtab_addctx *ctx, const char *prefix,
m->full_name = makefullname(ctx, prefix, name);
CHK_OOM(symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG)));
+ oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n_oneof);
+ fields = google_protobuf_DescriptorProto_field(msg_proto, &n_field);
+
CHK_OOM(upb_inttable_init2(&m->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
- CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc));
+ CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, n_oneof + n_field,
+ ctx->alloc));
m->file = ctx->file;
m->map_entry = false;
@@ -1720,17 +1723,15 @@ static bool create_msgdef(symtab_addctx *ctx, const char *prefix,
m->layout = upb_malloc(ctx->alloc, sizeof(*m->layout));
}
- oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n);
m->oneof_count = 0;
- m->oneofs = upb_malloc(ctx->alloc, sizeof(*m->oneofs) * n);
- for (i = 0; i < n; i++) {
+ m->oneofs = upb_malloc(ctx->alloc, sizeof(*m->oneofs) * n_oneof);
+ for (i = 0; i < n_oneof; i++) {
CHK(create_oneofdef(ctx, m, oneofs[i]));
}
- fields = google_protobuf_DescriptorProto_field(msg_proto, &n);
m->field_count = 0;
- m->fields = upb_malloc(ctx->alloc, sizeof(*m->fields) * n);
- for (i = 0; i < n; i++) {
+ m->fields = upb_malloc(ctx->alloc, sizeof(*m->fields) * n_field);
+ for (i = 0; i < n_field; i++) {
CHK(create_fielddef(ctx, m->full_name, m, fields[i]));
}
@@ -2085,8 +2086,8 @@ upb_symtab *upb_symtab_new(void) {
s->arena = upb_arena_new();
alloc = upb_arena_alloc(s->arena);
- if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, alloc) ||
- !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, alloc)) {
+ if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, 32, alloc) ||
+ !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, 4, alloc)) {
upb_arena_free(s->arena);
upb_gfree(s);
s = NULL;
@@ -2148,7 +2149,7 @@ static const upb_filedef *_upb_symtab_addfile(
ctx.layouts = layouts;
ctx.status = status;
- ok = file && upb_strtable_init2(&addtab, UPB_CTYPE_CONSTPTR, ctx.tmp) &&
+ ok = file && upb_strtable_init2(&addtab, UPB_CTYPE_CONSTPTR, 8, ctx.tmp) &&
build_filedef(&ctx, file, file_proto) && upb_symtab_addtotabs(s, &ctx);
upb_arena_free(tmparena);
diff --git a/upb/json/parser.rl b/upb/json/parser.rl
index ed7eead408..a7d75ff158 100644
--- a/upb/json/parser.rl
+++ b/upb/json/parser.rl
@@ -2869,7 +2869,7 @@ static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c,
upb_byteshandler_setstring(&m->input_handler_, parse, m);
upb_byteshandler_setendstr(&m->input_handler_, end, m);
- upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, alloc);
+ upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, 4, alloc);
/* Build name_table */
diff --git a/upb/msg.c b/upb/msg.c
index 5f6d1ce170..baafe59293 100644
--- a/upb/msg.c
+++ b/upb/msg.c
@@ -130,7 +130,7 @@ upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size) {
return NULL;
}
- upb_strtable_init2(&map->table, UPB_CTYPE_INT32, upb_arena_alloc(a));
+ upb_strtable_init2(&map->table, UPB_CTYPE_INT32, 4, upb_arena_alloc(a));
map->key_size = key_size;
map->val_size = value_size;
diff --git a/upb/table.c b/upb/table.c
index 34a20530d8..4b1cf9b466 100644
--- a/upb/table.c
+++ b/upb/table.c
@@ -4,10 +4,12 @@
** Implementation is heavily inspired by Lua's ltable.c.
*/
-#include "upb/table.int.h"
-
#include
+#include "third_party/wyhash/wyhash.h"
+#include "upb/table.int.h"
+
+/* Must be last. */
#include "upb/port_def.inc"
#define UPB_MAXARRSIZE 16 /* 64k. */
@@ -87,11 +89,7 @@ static upb_tabent *mutable_entries(upb_table *t) {
}
static bool isfull(upb_table *t) {
- if (upb_table_size(t) == 0) {
- return true;
- } else {
- return ((double)(t->count + 1) / upb_table_size(t)) > MAX_LOAD;
- }
+ return t->count == t->max_count;
}
static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) {
@@ -100,6 +98,7 @@ static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) {
t->count = 0;
t->size_lg2 = size_lg2;
t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
+ t->max_count = upb_table_size(t) * MAX_LOAD;
bytes = upb_table_size(t) * sizeof(upb_tabent);
if (bytes > 0) {
t->entries = upb_malloc(a, bytes);
@@ -115,9 +114,17 @@ static void uninit(upb_table *t, upb_alloc *a) {
upb_free(a, mutable_entries(t));
}
-static upb_tabent *emptyent(upb_table *t) {
- upb_tabent *e = mutable_entries(t) + upb_table_size(t);
- while (1) { if (upb_tabent_isempty(--e)) return e; UPB_ASSERT(e > t->entries); }
+static upb_tabent *emptyent(upb_table *t, upb_tabent *e) {
+ upb_tabent *begin = mutable_entries(t);
+ upb_tabent *end = begin + upb_table_size(t);
+ for (e = e + 1; e < end; e++) {
+ if (upb_tabent_isempty(e)) return e;
+ }
+ for (e = begin; e < end; e++) {
+ if (upb_tabent_isempty(e)) return e;
+ }
+ UPB_ASSERT(false);
+ return NULL;
}
static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
@@ -173,7 +180,7 @@ static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
our_e->next = NULL;
} else {
/* Collision. */
- upb_tabent *new_e = emptyent(t);
+ upb_tabent *new_e = emptyent(t, mainpos_e);
/* Head of collider's chain. */
upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
if (chain == mainpos_e) {
@@ -268,10 +275,14 @@ static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) {
return (uintptr_t)str;
}
+static uint32_t table_hash(const char *p, size_t n) {
+ return wyhash(p, n, 0, _wyp);
+}
+
static uint32_t strhash(upb_tabkey key) {
uint32_t len;
char *str = upb_tabstr(key, &len);
- return upb_murmur_hash2(str, len, 0);
+ return table_hash(str, len);
}
static bool streql(upb_tabkey k1, lookupkey_t k2) {
@@ -280,9 +291,14 @@ static bool streql(upb_tabkey k1, lookupkey_t k2) {
return len == k2.str.len && (len == 0 || memcmp(str, k2.str.str, len) == 0);
}
-bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, upb_alloc *a) {
+bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype,
+ size_t expected_size, upb_alloc *a) {
UPB_UNUSED(ctype); /* TODO(haberman): rm */
- return init(&t->t, 2, a);
+ // Multiply by approximate reciprocal of MAX_LOAD (0.85), with pow2 denominator.
+ size_t need_entries = (expected_size + 1) * 1204 / 1024;
+ UPB_ASSERT(need_entries >= expected_size * 0.85);
+ int size_lg2 = _upb_lg2ceil(need_entries);
+ return init(&t->t, size_lg2, a);
}
void upb_strtable_clear(upb_strtable *t) {
@@ -333,20 +349,20 @@ bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len,
tabkey = strcopy(key, a);
if (tabkey == 0) return false;
- hash = upb_murmur_hash2(key.str.str, key.str.len, 0);
+ hash = table_hash(key.str.str, key.str.len);
insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
return true;
}
bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
upb_value *v) {
- uint32_t hash = upb_murmur_hash2(key, len, 0);
+ uint32_t hash = table_hash(key, len);
return lookup(&t->t, strkey2(key, len), v, hash, &streql);
}
bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len,
upb_value *val, upb_alloc *alloc) {
- uint32_t hash = upb_murmur_hash2(key, len, 0);
+ uint32_t hash = table_hash(key, len);
upb_tabkey tabkey;
if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
if (alloc) {
@@ -699,182 +715,3 @@ bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
return i1->t == i2->t && i1->index == i2->index &&
i1->array_part == i2->array_part;
}
-
-#if defined(UPB_UNALIGNED_READS_OK) || defined(__s390x__)
-/* -----------------------------------------------------------------------------
- * MurmurHash2, by Austin Appleby (released as public domain).
- * Reformatted and C99-ified by Joshua Haberman.
- * Note - This code makes a few assumptions about how your machine behaves -
- * 1. We can read a 4-byte value from any address without crashing
- * 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
- * And it has a few limitations -
- * 1. It will not work incrementally.
- * 2. It will not produce the same results on little-endian and big-endian
- * machines. */
-uint32_t upb_murmur_hash2(const void *key, size_t len, uint32_t seed) {
- /* 'm' and 'r' are mixing constants generated offline.
- * They're not really 'magic', they just happen to work well. */
- const uint32_t m = 0x5bd1e995;
- const int32_t r = 24;
-
- /* Initialize the hash to a 'random' value */
- uint32_t h = seed ^ len;
-
- /* Mix 4 bytes at a time into the hash */
- const uint8_t * data = (const uint8_t *)key;
- while(len >= 4) {
- uint32_t k;
- memcpy(&k, data, sizeof(k));
-
- k *= m;
- k ^= k >> r;
- k *= m;
-
- h *= m;
- h ^= k;
-
- data += 4;
- len -= 4;
- }
-
- /* Handle the last few bytes of the input array */
- switch(len) {
- case 3: h ^= data[2] << 16;
- case 2: h ^= data[1] << 8;
- case 1: h ^= data[0]; h *= m;
- };
-
- /* Do a few final mixes of the hash to ensure the last few
- * bytes are well-incorporated. */
- h ^= h >> 13;
- h *= m;
- h ^= h >> 15;
-
- return h;
-}
-
-#else /* !UPB_UNALIGNED_READS_OK */
-
-/* -----------------------------------------------------------------------------
- * MurmurHashAligned2, by Austin Appleby
- * Same algorithm as MurmurHash2, but only does aligned reads - should be safer
- * on certain platforms.
- * Performance will be lower than MurmurHash2 */
-
-#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
-
-uint32_t upb_murmur_hash2(const void * key, size_t len, uint32_t seed) {
- const uint32_t m = 0x5bd1e995;
- const int32_t r = 24;
- const uint8_t * data = (const uint8_t *)key;
- uint32_t h = (uint32_t)(seed ^ len);
- uint8_t align = (uintptr_t)data & 3;
-
- if(align && (len >= 4)) {
- /* Pre-load the temp registers */
- uint32_t t = 0, d = 0;
- int32_t sl;
- int32_t sr;
-
- switch(align) {
- case 1: t |= data[2] << 16; /* fallthrough */
- case 2: t |= data[1] << 8; /* fallthrough */
- case 3: t |= data[0];
- }
-
- t <<= (8 * align);
-
- data += 4-align;
- len -= 4-align;
-
- sl = 8 * (4-align);
- sr = 8 * align;
-
- /* Mix */
-
- while(len >= 4) {
- uint32_t k;
-
- d = *(uint32_t *)data;
- t = (t >> sr) | (d << sl);
-
- k = t;
-
- MIX(h,k,m);
-
- t = d;
-
- data += 4;
- len -= 4;
- }
-
- /* Handle leftover data in temp registers */
-
- d = 0;
-
- if(len >= align) {
- uint32_t k;
-
- switch(align) {
- case 3: d |= data[2] << 16; /* fallthrough */
- case 2: d |= data[1] << 8; /* fallthrough */
- case 1: d |= data[0]; /* fallthrough */
- }
-
- k = (t >> sr) | (d << sl);
- MIX(h,k,m);
-
- data += align;
- len -= align;
-
- /* ----------
- * Handle tail bytes */
-
- switch(len) {
- case 3: h ^= data[2] << 16; /* fallthrough */
- case 2: h ^= data[1] << 8; /* fallthrough */
- case 1: h ^= data[0]; h *= m; /* fallthrough */
- };
- } else {
- switch(len) {
- case 3: d |= data[2] << 16; /* fallthrough */
- case 2: d |= data[1] << 8; /* fallthrough */
- case 1: d |= data[0]; /* fallthrough */
- case 0: h ^= (t >> sr) | (d << sl); h *= m;
- }
- }
-
- h ^= h >> 13;
- h *= m;
- h ^= h >> 15;
-
- return h;
- } else {
- while(len >= 4) {
- uint32_t k = *(uint32_t *)data;
-
- MIX(h,k,m);
-
- data += 4;
- len -= 4;
- }
-
- /* ----------
- * Handle tail bytes */
-
- switch(len) {
- case 3: h ^= data[2] << 16; /* fallthrough */
- case 2: h ^= data[1] << 8; /* fallthrough */
- case 1: h ^= data[0]; h *= m;
- };
-
- h ^= h >> 13;
- h *= m;
- h ^= h >> 15;
-
- return h;
- }
-}
-#undef MIX
-
-#endif /* UPB_UNALIGNED_READS_OK */
diff --git a/upb/table.int.h b/upb/table.int.h
index 600637eef2..1e6c232cf8 100644
--- a/upb/table.int.h
+++ b/upb/table.int.h
@@ -171,7 +171,8 @@ typedef struct _upb_tabent {
typedef struct {
size_t count; /* Number of entries in the hash part. */
- size_t mask; /* Mask to turn hash value -> bucket. */
+ uint32_t mask; /* Mask to turn hash value -> bucket. */
+ uint32_t max_count; /* Max count before we hit our load limit. */
uint8_t size_lg2; /* Size of the hashtable part is 2^size_lg2 entries. */
/* Hash table entries.
@@ -230,7 +231,8 @@ UPB_INLINE bool upb_arrhas(upb_tabval key) {
/* Initialize and uninitialize a table, respectively. If memory allocation
* failed, false is returned that the table is uninitialized. */
bool upb_inttable_init2(upb_inttable *table, upb_ctype_t ctype, upb_alloc *a);
-bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype, upb_alloc *a);
+bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype,
+ size_t expected_size, upb_alloc *a);
void upb_inttable_uninit2(upb_inttable *table, upb_alloc *a);
void upb_strtable_uninit2(upb_strtable *table, upb_alloc *a);
@@ -239,7 +241,7 @@ UPB_INLINE bool upb_inttable_init(upb_inttable *table, upb_ctype_t ctype) {
}
UPB_INLINE bool upb_strtable_init(upb_strtable *table, upb_ctype_t ctype) {
- return upb_strtable_init2(table, ctype, &upb_alloc_global);
+ return upb_strtable_init2(table, ctype, 4, &upb_alloc_global);
}
UPB_INLINE void upb_inttable_uninit(upb_inttable *table) {
diff --git a/upb/upb.h b/upb/upb.h
index 85205a5a76..f52e812478 100644
--- a/upb/upb.h
+++ b/upb/upb.h
@@ -313,6 +313,17 @@ UPB_INLINE uint64_t _upb_be_swap64(uint64_t val) {
}
}
+UPB_INLINE int _upb_lg2ceil(int x) {
+ if (x == 0) return 0;
+#ifdef __GNUC__
+ return 32 - __builtin_clz(x - 1);
+#else
+ int lg2 = 0;
+ while (1 << lg2 < x) lg2++;
+ return lg2;
+#endif
+}
+
#include "upb/port_undef.inc"
#ifdef __cplusplus