From ced3c03f1409b015faf08bd5084f9bb5ea490245 Mon Sep 17 00:00:00 2001 From: Shai Zarka Date: Mon, 14 Oct 2024 13:31:51 +0000 Subject: [PATCH 1/6] improved hashing algorithm in luaS_newlstr Signed-off-by: Shai Zarka --- deps/lua/src/lstring.c | 52 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/deps/lua/src/lstring.c b/deps/lua/src/lstring.c index 6a825f7865..69aa1968e2 100644 --- a/deps/lua/src/lstring.c +++ b/deps/lua/src/lstring.c @@ -6,6 +6,7 @@ #include +#include #define lstring_c #define LUA_CORE @@ -71,14 +72,55 @@ static TString *newlstr (lua_State *L, const char *str, size_t l, return ts; } +uint32_t murmur32(const uint8_t* key, size_t len, uint32_t seed) { + static const uint32_t c1 = 0xcc9e2d51; + static const uint32_t c2 = 0x1b873593; + static const uint32_t r1 = 15; + static const uint32_t r2 = 13; + static const uint32_t m = 5; + static const uint32_t n = 0xe6546b64; + uint32_t hash = seed; // static seed + + const int nblocks = len / 4; + const uint32_t* blocks = (const uint32_t*) key; + for (int i = 0; i < nblocks; i++) { + uint32_t k = blocks[i]; + k *= c1; + k = (k << r1) | (k >> (32 - r1)); + k *= c2; + + hash ^= k; + hash = ((hash << r2) | (hash >> (32 - r2))) * m + n; + } + +const uint8_t* tail = (const uint8_t*) (key + nblocks * 4); + uint32_t k1 = 0; + switch (len & 3) { + case 3: + k1 ^= tail[2] << 16; + case 2: + k1 ^= tail[1] << 8; + case 1: + k1 ^= tail[0]; + k1 *= c1; + k1 = (k1 << r1) | (k1 >> (32 - r1)); + k1 *= c2; + hash ^= k1; + } + + hash ^= len; + hash ^= (hash >> 16); + hash *= 0x85ebca6b; + hash ^= (hash >> 13); + hash *= 0xc2b2ae35; + hash ^= (hash >> 16); + + return hash; + } TString *luaS_newlstr (lua_State *L, const char *str, size_t l) { GCObject *o; - unsigned int h = cast(unsigned int, l); /* seed */ - size_t step = 1; - size_t l1; - for (l1=l; l1>=step; l1-=step) /* compute hash */ - h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1])); + unsigned int h = murmur32((uint8_t *)str, l, (uint32_t)l); /* seed */ for (o = G(L)->strt.hash[lmod(h, G(L)->strt.size)]; o != NULL; o = o->gch.next) { From 3c6d8a22aabae3a2c264677ff64608b7314bccc3 Mon Sep 17 00:00:00 2001 From: zarkash-aws Date: Tue, 15 Oct 2024 16:36:42 +0200 Subject: [PATCH 2/6] Update deps/lua/src/lstring.c Co-authored-by: Madelyn Olson Signed-off-by: zarkash-aws --- deps/lua/src/lstring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/lua/src/lstring.c b/deps/lua/src/lstring.c index 69aa1968e2..47d06aa813 100644 --- a/deps/lua/src/lstring.c +++ b/deps/lua/src/lstring.c @@ -93,7 +93,7 @@ uint32_t murmur32(const uint8_t* key, size_t len, uint32_t seed) { hash = ((hash << r2) | (hash >> (32 - r2))) * m + n; } -const uint8_t* tail = (const uint8_t*) (key + nblocks * 4); + const uint8_t* tail = (const uint8_t*) (key + nblocks * 4); uint32_t k1 = 0; switch (len & 3) { case 3: From a39bb6b280f61c73b6dacc5e05b95a61372e9022 Mon Sep 17 00:00:00 2001 From: Shai Zarka Date: Tue, 15 Oct 2024 15:31:14 +0000 Subject: [PATCH 3/6] updated lua diffs to include commit ced3c03 Signed-off-by: Shai Zarka --- deps/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/deps/README.md b/deps/README.md index 8a04f04b00..94a1d4e0b0 100644 --- a/deps/README.md +++ b/deps/README.md @@ -94,6 +94,7 @@ and our version: 1. Makefile is modified to allow a different compiler than GCC. 2. We have the implementation source code, and directly link to the following external libraries: `lua_cjson.o`, `lua_struct.o`, `lua_cmsgpack.o` and `lua_bit.o`. 3. There is a security fix in `ldo.c`, line 498: The check for `LUA_SIGNATURE[0]` is removed in order to avoid direct bytecode execution. +4. In lstring.c, the luaS_newlstr function's hash calculation has been upgraded from a simple hash function to MurmurHash3, implemented within the same file, to enhance performance, particularly for operations involving large strings. Hdr_Histogram --- From c235d9376574d966ccf51dec4c479b4276537c32 Mon Sep 17 00:00:00 2001 From: Madelyn Olson Date: Tue, 15 Oct 2024 09:56:16 -0700 Subject: [PATCH 4/6] Update deps/lua/src/lstring.c Signed-off-by: Madelyn Olson --- deps/lua/src/lstring.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/deps/lua/src/lstring.c b/deps/lua/src/lstring.c index 47d06aa813..1d05cf16aa 100644 --- a/deps/lua/src/lstring.c +++ b/deps/lua/src/lstring.c @@ -94,19 +94,19 @@ uint32_t murmur32(const uint8_t* key, size_t len, uint32_t seed) { } const uint8_t* tail = (const uint8_t*) (key + nblocks * 4); - uint32_t k1 = 0; - switch (len & 3) { - case 3: - k1 ^= tail[2] << 16; - case 2: - k1 ^= tail[1] << 8; - case 1: - k1 ^= tail[0]; - k1 *= c1; - k1 = (k1 << r1) | (k1 >> (32 - r1)); - k1 *= c2; - hash ^= k1; - } + uint32_t k1 = 0; + switch (len & 3) { + case 3: + k1 ^= tail[2] << 16; + case 2: + k1 ^= tail[1] << 8; + case 1: + k1 ^= tail[0]; + k1 *= c1; + k1 = (k1 << r1) | (k1 >> (32 - r1)); + k1 *= c2; + hash ^= k1; + } hash ^= len; hash ^= (hash >> 16); From ef6a2179cabe51a878e49eac7cd30b754c69c858 Mon Sep 17 00:00:00 2001 From: Madelyn Olson Date: Tue, 15 Oct 2024 10:10:47 -0700 Subject: [PATCH 5/6] Apply suggestions from code review Minor improvements. Signed-off-by: Madelyn Olson --- deps/README.md | 2 +- deps/lua/src/lstring.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/deps/README.md b/deps/README.md index 94a1d4e0b0..b918b47456 100644 --- a/deps/README.md +++ b/deps/README.md @@ -94,7 +94,7 @@ and our version: 1. Makefile is modified to allow a different compiler than GCC. 2. We have the implementation source code, and directly link to the following external libraries: `lua_cjson.o`, `lua_struct.o`, `lua_cmsgpack.o` and `lua_bit.o`. 3. There is a security fix in `ldo.c`, line 498: The check for `LUA_SIGNATURE[0]` is removed in order to avoid direct bytecode execution. -4. In lstring.c, the luaS_newlstr function's hash calculation has been upgraded from a simple hash function to MurmurHash3, implemented within the same file, to enhance performance, particularly for operations involving large strings. +4. In `lstring.c`, the luaS_newlstr function's hash calculation has been upgraded from a simple hash function to MurmurHash3, implemented within the same file, to enhance performance, particularly for operations involving large strings. Hdr_Histogram --- diff --git a/deps/lua/src/lstring.c b/deps/lua/src/lstring.c index 1d05cf16aa..de67030b99 100644 --- a/deps/lua/src/lstring.c +++ b/deps/lua/src/lstring.c @@ -79,7 +79,7 @@ uint32_t murmur32(const uint8_t* key, size_t len, uint32_t seed) { static const uint32_t r2 = 13; static const uint32_t m = 5; static const uint32_t n = 0xe6546b64; - uint32_t hash = seed; // static seed + uint32_t hash = seed; const int nblocks = len / 4; const uint32_t* blocks = (const uint32_t*) key; From 4c749cef835d672f1f2e3bcdcebd743006b564d1 Mon Sep 17 00:00:00 2001 From: Madelyn Olson Date: Tue, 15 Oct 2024 14:56:50 -0700 Subject: [PATCH 6/6] Update deps/lua/src/lstring.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Viktor Söderqvist Signed-off-by: Madelyn Olson --- deps/lua/src/lstring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/lua/src/lstring.c b/deps/lua/src/lstring.c index de67030b99..043a7867c0 100644 --- a/deps/lua/src/lstring.c +++ b/deps/lua/src/lstring.c @@ -120,7 +120,7 @@ uint32_t murmur32(const uint8_t* key, size_t len, uint32_t seed) { TString *luaS_newlstr (lua_State *L, const char *str, size_t l) { GCObject *o; - unsigned int h = murmur32((uint8_t *)str, l, (uint32_t)l); /* seed */ + unsigned int h = murmur32((uint8_t *)str, l, (uint32_t)l); for (o = G(L)->strt.hash[lmod(h, G(L)->strt.size)]; o != NULL; o = o->gch.next) {