Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimized the loading of descriptors #328

Merged
merged 7 commits into from
Oct 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ cc_library(
"upb/table.int.h",
"upb/upb.c",
"upb/upb.int.h",
"third_party/wyhash/wyhash.h",
],
hdrs = [
"upb/decode.h",
Expand Down Expand Up @@ -369,6 +370,7 @@ filegroup(
"upbc/**/*",
"upb/**/*",
"tests/**/*",
"third_party/**/*",
]),
visibility = ["//cmake:__pkg__"],
)
Expand Down
1 change: 1 addition & 0 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ add_library(upb
../upb/table.int.h
../upb/upb.c
../upb/upb.int.h
../third_party/wyhash/wyhash.h
../upb/decode.h
../upb/encode.h
../upb/upb.h
Expand Down
2 changes: 1 addition & 1 deletion cmake/upb/json/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -3306,7 +3306,7 @@ static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c,
upb_byteshandler_setstring(&m->input_handler_, parse, m);
upb_byteshandler_setendstr(&m->input_handler_, end, m);

upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, alloc);
upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, 4, alloc);

/* Build name_table */

Expand Down
55 changes: 12 additions & 43 deletions tests/pb/test_decoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,6 @@
#define PRINT_FAILURE(expr) \
fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \
fprintf(stderr, "expr: %s\n", #expr); \
if (testhash) { \
fprintf(stderr, "assertion failed running test %x.\n", testhash); \
if (!filter_hash) { \
fprintf(stderr, \
"Run with the arg %x to run only this test. " \
"(This will also turn on extra debugging output)\n", \
testhash); \
} \
fprintf(stderr, "Failed at %02.2f%% through tests.\n", \
(float)completed * 100 / total); \
}

#define MAX_NESTING 64

Expand Down Expand Up @@ -467,17 +456,6 @@ upb::pb::DecoderPtr CreateDecoder(upb::Arena* arena,
return ret;
}

uint32_t Hash(const string& proto, const string* expected_output, size_t seam1,
size_t seam2, bool may_skip) {
uint32_t hash = upb_murmur_hash2(proto.c_str(), proto.size(), 0);
if (expected_output)
hash = upb_murmur_hash2(expected_output->c_str(), expected_output->size(), hash);
hash = upb_murmur_hash2(&seam1, sizeof(seam1), hash);
hash = upb_murmur_hash2(&seam2, sizeof(seam2), hash);
hash = upb_murmur_hash2(&may_skip, sizeof(may_skip), hash);
return hash;
}

void CheckBytesParsed(upb::pb::DecoderPtr decoder, size_t ofs) {
// We can't have parsed more data than the decoder callback is telling us it
// parsed.
Expand Down Expand Up @@ -506,13 +484,11 @@ void do_run_decoder(VerboseParserEnvironment* env, upb::pb::DecoderPtr decoder,
env->Reset(proto.c_str(), proto.size(), may_skip, expected_output == NULL);
decoder.Reset();

testhash = Hash(proto, expected_output, i, j, may_skip);
if (filter_hash && testhash != filter_hash) return;
if (test_mode != COUNT_ONLY) {
output.clear();

if (filter_hash) {
fprintf(stderr, "RUNNING TEST CASE, hash=%x\n", testhash);
fprintf(stderr, "RUNNING TEST CASE\n");
fprintf(stderr, "Input (len=%u): ", (unsigned)proto.size());
PrintBinary(proto);
fprintf(stderr, "\n");
Expand Down Expand Up @@ -571,7 +547,6 @@ void run_decoder(const string& proto, const string* expected_output) {
}
}
}
testhash = 0;
}

const static string thirty_byte_nop = cat(
Expand Down Expand Up @@ -871,23 +846,17 @@ void test_valid() {
// Empty protobuf where we never call PutString between
// StartString/EndString.

// Randomly generated hash for this test, hope it doesn't conflict with others
// by chance.
const uint32_t emptyhash = 0x5709be8e;
if (!filter_hash || filter_hash == testhash) {
testhash = emptyhash;
upb::Status status;
upb::Arena arena;
upb::Sink sink(global_handlers, &closures[0]);
upb::pb::DecoderPtr decoder =
CreateDecoder(&arena, global_method, sink, &status);
output.clear();
bool ok = upb::PutBuffer(std::string(), decoder.input());
ASSERT(ok);
ASSERT(status.ok());
if (test_mode == ALL_HANDLERS) {
ASSERT(output == string("<\n>\n"));
}
upb::Status status;
upb::Arena arena;
upb::Sink sink(global_handlers, &closures[0]);
upb::pb::DecoderPtr decoder =
CreateDecoder(&arena, global_method, sink, &status);
output.clear();
bool ok = upb::PutBuffer(std::string(), decoder.input());
ASSERT(ok);
ASSERT(status.ok());
if (test_mode == ALL_HANDLERS) {
ASSERT(output == string("<\n>\n"));
}

test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_DOUBLE,
Expand Down
10 changes: 10 additions & 0 deletions tests/test_table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,16 @@ void test_delete() {
upb_inttable_uninit(&t);
}

void test_init() {
for (int i = 0; i < 2048; i++) {
/* Tests that the size calculations in init() (lg2 size for target load)
* work for all expected sizes. */
upb_strtable t;
upb_strtable_init2(&t, UPB_CTYPE_BOOL, i, &upb_alloc_global);
upb_strtable_uninit(&t);
}
}

extern "C" {

int run_tests(int argc, char *argv[]) {
Expand Down
25 changes: 25 additions & 0 deletions third_party/wyhash/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
This is free and unencumbered software released into the public domain.

Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.

In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.

For more information, please refer to <http://unlicense.org/>

142 changes: 142 additions & 0 deletions third_party/wyhash/wyhash.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
//Author: Wang Yi <[email protected]>
#ifndef wyhash_final_version
#define wyhash_final_version
//defines that change behavior
#ifndef WYHASH_CONDOM
#define WYHASH_CONDOM 1 //0: read 8 bytes before and after boudaries, dangerous but fastest. 1: normal valid behavior 2: extra protection against entropy loss (probability=2^-63), aka. "blind multiplication"
#endif
#define WYHASH_32BIT_MUM 0 //faster on 32 bit system
//includes
#include <stdint.h>
#include <string.h>
#if defined(_MSC_VER) && defined(_M_X64)
#include <intrin.h>
#pragma intrinsic(_umul128)
#endif
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
#define _likely_(x) __builtin_expect(x,1)
#define _unlikely_(x) __builtin_expect(x,0)
#else
#define _likely_(x) (x)
#define _unlikely_(x) (x)
#endif
//mum function
static inline uint64_t _wyrot(uint64_t x) { return (x>>32)|(x<<32); }
static inline void _wymum(uint64_t *A, uint64_t *B){
#if(WYHASH_32BIT_MUM)
uint64_t hh=(*A>>32)*(*B>>32), hl=(*A>>32)*(unsigned)*B, lh=(unsigned)*A*(*B>>32), ll=(uint64_t)(unsigned)*A*(unsigned)*B;
#if(WYHASH_CONDOM>1)
*A^=_wyrot(hl)^hh; *B^=_wyrot(lh)^ll;
#else
*A=_wyrot(hl)^hh; *B=_wyrot(lh)^ll;
#endif
#elif defined(__SIZEOF_INT128__)
__uint128_t r=*A; r*=*B;
#if(WYHASH_CONDOM>1)
*A^=(uint64_t)r; *B^=(uint64_t)(r>>64);
#else
*A=(uint64_t)r; *B=(uint64_t)(r>>64);
#endif
#elif defined(_MSC_VER) && defined(_M_X64)
#if(WYHASH_CONDOM>1)
uint64_t a, b;
a=_umul128(*A,*B,&b);
*A^=a; *B^=b;
#else
*A=_umul128(*A,*B,B);
#endif
#else
uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo;
uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t<rl;
lo=t+(rm1<<32); c+=lo<t; hi=rh+(rm0>>32)+(rm1>>32)+c;
#if(WYHASH_CONDOM>1)
*A^=lo; *B^=hi;
#else
*A=lo; *B=hi;
#endif
#endif
}
static inline uint64_t _wymix(uint64_t A, uint64_t B){ _wymum(&A,&B); return A^B; }
//read functions
#ifndef WYHASH_LITTLE_ENDIAN
#if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#define WYHASH_LITTLE_ENDIAN 1
#elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#define WYHASH_LITTLE_ENDIAN 0
#endif
#endif
#if (WYHASH_LITTLE_ENDIAN)
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;}
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return v;}
#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);}
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return __builtin_bswap32(v);}
#elif defined(_MSC_VER)
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);}
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return _byteswap_ulong(v);}
#endif
static inline uint64_t _wyr3(const uint8_t *p, unsigned k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];}
//wyhash function
static inline uint64_t _wyfinish16(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){
#if(WYHASH_CONDOM>0)
uint64_t a, b;
if(_likely_(i<=8)){
if(_likely_(i>=4)){ a=_wyr4(p); b=_wyr4(p+i-4); }
else if (_likely_(i)){ a=_wyr3(p,i); b=0; }
else a=b=0;
}
else{ a=_wyr8(p); b=_wyr8(p+i-8); }
return _wymix(secret[1]^len,_wymix(a^secret[1], b^seed));
#else
#define oneshot_shift ((i<8)*((8-i)<<3))
return _wymix(secret[1]^len,_wymix((_wyr8(p)<<oneshot_shift)^secret[1],(_wyr8(p+i-8)>>oneshot_shift)^seed));
#endif
}

static inline uint64_t _wyfinish(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){
if(_likely_(i<=16)) return _wyfinish16(p,len,seed,secret,i);
return _wyfinish(p+16,len,_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed),secret,i-16);
}

static inline uint64_t wyhash(const void *key, uint64_t len, uint64_t seed, const uint64_t *secret){
const uint8_t *p=(const uint8_t *)key;
uint64_t i=len; seed^=*secret;
if(_unlikely_(i>64)){
uint64_t see1=seed;
do{
seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed)^_wymix(_wyr8(p+16)^secret[2],_wyr8(p+24)^seed);
see1=_wymix(_wyr8(p+32)^secret[3],_wyr8(p+40)^see1)^_wymix(_wyr8(p+48)^secret[4],_wyr8(p+56)^see1);
p+=64; i-=64;
}while(i>64);
seed^=see1;
}
return _wyfinish(p,len,seed,secret,i);
}
//utility functions
const uint64_t _wyp[5] = {0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull, 0x1d8e4e27c47d124full};
static inline uint64_t wyhash64(uint64_t A, uint64_t B){ A^=_wyp[0]; B^=_wyp[1]; _wymum(&A,&B); return _wymix(A^_wyp[0],B^_wyp[1]);}
static inline uint64_t wyrand(uint64_t *seed){ *seed+=_wyp[0]; return _wymix(*seed,*seed^_wyp[1]);}
static inline double wy2u01(uint64_t r){ const double _wynorm=1.0/(1ull<<52); return (r>>12)*_wynorm;}
static inline double wy2gau(uint64_t r){ const double _wynorm=1.0/(1ull<<20); return ((r&0x1fffff)+((r>>21)&0x1fffff)+((r>>42)&0x1fffff))*_wynorm-3.0;}
static inline uint64_t wy2u0k(uint64_t r, uint64_t k){ _wymum(&r,&k); return k; }

static inline void make_secret(uint64_t seed, uint64_t *secret){
uint8_t c[] = {15, 23, 27, 29, 30, 39, 43, 45, 46, 51, 53, 54, 57, 58, 60, 71, 75, 77, 78, 83, 85, 86, 89, 90, 92, 99, 101, 102, 105, 106, 108, 113, 114, 116, 120, 135, 139, 141, 142, 147, 149, 150, 153, 154, 156, 163, 165, 166, 169, 170, 172, 177, 178, 180, 184, 195, 197, 198, 201, 202, 204, 209, 210, 212, 216, 225, 226, 228, 232, 240 };
for(size_t i=0;i<5;i++){
uint8_t ok;
do{
ok=1; secret[i]=0;
for(size_t j=0;j<64;j+=8) secret[i]|=((uint64_t)c[wyrand(&seed)%sizeof(c)])<<j;
if(secret[i]%2==0){ ok=0; continue; }
for(size_t j=0;j<i;j++)
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
if(__builtin_popcountll(secret[j]^secret[i])!=32){ ok=0; break; }
#elif defined(_MSC_VER) && defined(_M_X64)
if(_mm_popcnt_u64(secret[j]^secret[i])!=32){ ok=0; break; }
#endif
if(!ok)continue;
for(uint64_t j=3;j<0x100000000ull;j+=2) if(secret[i]%j==0){ ok=0; break; }
}while(!ok);
}
}
#endif
2 changes: 1 addition & 1 deletion tools/amalgamate.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def _process_include(self, line, outfile):
include = parse_include(line)
if not include:
return False
if not (include.startswith("upb") or include.startswith("google")):
if not (include.startswith("upb") or include.startswith("google") or include.startswith("third_party")):
return False
if include.endswith("hpp"):
# Skip, we don't support the amalgamation from C++.
Expand Down
Loading