From 88b6096d7f61d8010c2da359cbe99b3760887387 Mon Sep 17 00:00:00 2001 From: Igor Kamyshev Date: Wed, 21 Aug 2024 15:42:43 +0700 Subject: [PATCH] Decrease bundle size and key length in `cache` operator by replacing sha-1 by custom hashing function --- .changeset/rotten-plums-shave.md | 5 + README.md | 2 - apps/website/docs/recipes/cache.md | 4 +- packages/core/package.json | 2 +- .../core/src/cache/__test__/cache.test.ts | 4 +- packages/core/src/cache/key/key.ts | 4 +- .../core/src/cache/lib/__test__/hash.test.ts | 45 ++--- packages/core/src/cache/lib/hash.ts | 161 +----------------- 8 files changed, 33 insertions(+), 194 deletions(-) create mode 100644 .changeset/rotten-plums-shave.md diff --git a/.changeset/rotten-plums-shave.md b/.changeset/rotten-plums-shave.md new file mode 100644 index 000000000..01d46bcef --- /dev/null +++ b/.changeset/rotten-plums-shave.md @@ -0,0 +1,5 @@ +--- +"@farfetched/core": patch +--- + +Decrease bundle size and key length in `cache` operator by replacing sha-1 by custom hashing function diff --git a/README.md b/README.md index 8f5b145e0..75502c420 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,5 @@ Special thanks to all contributors and especially [Alexandr](https://github.com/ Some of external libraries were inlined to Farfetched due to bundle size and custom features requirements: - https://github.com/effector/patronum -- https://github.com/emn178/js-sha1/blob/master/tests/test.js -- http://www.movable-type.co.uk/scripts/sha1.html - https://github.com/smelukov/nano-equal - https://github.com/DirtyHairy/async-mutex diff --git a/apps/website/docs/recipes/cache.md b/apps/website/docs/recipes/cache.md index 25b079790..757a62b85 100644 --- a/apps/website/docs/recipes/cache.md +++ b/apps/website/docs/recipes/cache.md @@ -98,10 +98,10 @@ So, the key is a hash of the following data: - `params` of the particular call of the [_Query_](/api/primitives/query) - current values of all external [_Stores_](https://effector.dev/en/api/effector/store/) that affect [_Query_](/api/primitives/query) -To get short and unique key, we stringify all data, concatenate it and then hash it with [SHA-1](https://en.wikipedia.org/wiki/SHA-1). +To get short and unique key, we stringify all data, concatenate it and then hash it with [custom hash function](https://github.com/igorkamyshev/farfetched/blob/master/packages/core/src/cache/lib/hash.ts). :::tip -SHA-1 is a [cryptographically broken](https://blog.mozilla.org/security/2017/02/23/the-end-of-sha-1-on-the-public-web/), but we use it for key generation only, so it is safe to use it in this case. +It is a cryptographically broken, but we use it for key generation only, so it is safe to use it in this case. ::: ## Adapter replacement diff --git a/packages/core/package.json b/packages/core/package.json index 3f453bae1..071d80fc9 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -41,7 +41,7 @@ "size-limit": [ { "path": "./dist/core.js", - "limit": "16.32 kB" + "limit": "15.52 kB" } ] } diff --git a/packages/core/src/cache/__test__/cache.test.ts b/packages/core/src/cache/__test__/cache.test.ts index a3b02d700..b861a092d 100644 --- a/packages/core/src/cache/__test__/cache.test.ts +++ b/packages/core/src/cache/__test__/cache.test.ts @@ -8,7 +8,7 @@ import { Contract } from '../../contract/type'; import { createQuery } from '../../query/create_query'; import { inMemoryCache } from '../adapters/in_memory'; import { cache } from '../cache'; -import { sha1 } from '../lib/hash'; +import { hashCode } from '../lib/hash'; import { createJsonQuery } from '../../query/create_json_query'; import { declareParams } from '../../remote_operation/params'; import { unknownContract } from '../../contract/unknown_contract'; @@ -144,7 +144,7 @@ describe('cache', () => { await allSettled(adapter.set, { scope, params: { - key: sha1( + key: hashCode( query.$data.sid + /* params is undefined */ JSON.stringify(undefined) + /* sources is [] */ JSON.stringify([]) diff --git a/packages/core/src/cache/key/key.ts b/packages/core/src/cache/key/key.ts index c42462845..2f933d425 100644 --- a/packages/core/src/cache/key/key.ts +++ b/packages/core/src/cache/key/key.ts @@ -1,5 +1,5 @@ import { Query } from '../../query/type'; -import { sha1 } from '../lib/hash'; +import { hashCode } from '../lib/hash'; import { stableStringify } from '../lib/stable_stringify'; export function createHumanReadbleKey({ @@ -30,7 +30,7 @@ export function createKey({ try { const stableString = stableStringify({ params, sources, sid })!; - return sha1(stableString); + return hashCode(stableString); } catch (e: unknown) { return null; } diff --git a/packages/core/src/cache/lib/__test__/hash.test.ts b/packages/core/src/cache/lib/__test__/hash.test.ts index 726ce90cd..acaafbb99 100644 --- a/packages/core/src/cache/lib/__test__/hash.test.ts +++ b/packages/core/src/cache/lib/__test__/hash.test.ts @@ -1,54 +1,41 @@ import { describe, test, expect } from 'vitest'; -import { sha1 } from '../hash'; +import { hashCode } from '../hash'; -// Tests is copied from https://github.com/emn178/js-sha1/blob/master/tests/test.js -describe('sha1', () => { +describe('hashCode', () => { test.each([ - ['', 'da39a3ee5e6b4b0d3255bfef95601890afd80709'], - [ - 'The quick brown fox jumps over the lazy dog', - '2fd4e1c67a2d28fced849ee1bb76e7391b93eb12', - ], - [ - 'The quick brown fox jumps over the lazy dog.', - '408d94384216f890ff7a0c3528e8bed1e0b01621', - ], + ['', '0'], + ['The quick brown fox jumps over the lazy dog', '-a2u5rh'], + ['The quick brown fox jumps over the lazy dog.', '-sbiqpx'], [ 'The MD5 message-digest algorithm is a widely used cryptographic hash function producing a 128-bit (16-byte) hash value, typically expressed in text format as a 32 digit hexadecimal number. MD5 has been utilized in a wide variety of cryptographic applications, and is also commonly used to verify data integrity.', - '8690faab7755408a03875895176fac318f14a699', + 'eamfrc', ], - ['中文', '7be2d2d20c106eee0836c9bc2b939890a78e8fb3'], - ['aécio', '9e4e5d978deced901d621475b03f1ded19e945bf'], + ['中文', 'dure'], + ['aécio', '1lixi9'], [ '訊息摘要演算法第五版(英語:Message-Digest Algorithm 5,縮寫為MD5),是當前電腦領域用於確保資訊傳輸完整一致而廣泛使用的雜湊演算法之一', - 'ad8aae581c915fe01c4964a5e8b322cae74ee5c5', + '-w5hlrk', ], [ '訊息摘要演算法第五版(英語:Message-Digest Algorithm 5,縮寫為MD5),是當前電腦領域用於確保資訊傳輸完整一致而廣泛使用的雜湊演算法之一(又譯雜湊演算法、摘要演算法等),主流程式語言普遍已有MD5的實作。', - '3a15ad3ce9efdd4bf982eaaaecdeda36a887a3f9', - ], - [ - '0123456780123456780123456780123456780123456780123456780', - '4cdeae78e8b7285aef73e0a15eec7d5b30f3f3e3', - ], - [ - '01234567801234567801234567801234567801234567801234567801', - 'e657e6bb6b5d0c2bf7e929451c14a5302589a60b', + '-bdhbwb', ], + ['0123456780123456780123456780123456780123456780123456780', 'ju78hc'], + ['01234567801234567801234567801234567801234567801234567801', '-o9s6tb'], [ '0123456780123456780123456780123456780123456780123456780123456780', - 'e7ad97591c1a99d54d80751d341899769884c75a', + 'vbp3lo', ], [ '01234567801234567801234567801234567801234567801234567801234567801234567', - '55a13698cdc010c0d16dab2f7dc10f43a713f12f', + '-qz9yo8', ], [ '012345678012345678012345678012345678012345678012345678012345678012345678', - '006575418c27b0158e55a6d261c46f86b33a496a', + 'fzrsw0', ], ])('"%s" -> "%s"', (source, hash) => { - expect(sha1(source)).toEqual(hash); + expect(hashCode(source)).toEqual(hash); }); }); diff --git a/packages/core/src/cache/lib/hash.ts b/packages/core/src/cache/lib/hash.ts index 27bc9bb02..4d773f045 100644 --- a/packages/core/src/cache/lib/hash.ts +++ b/packages/core/src/cache/lib/hash.ts @@ -1,157 +1,6 @@ -// Copied from http://www.movable-type.co.uk/scripts/sha1.html - -/** - * Generates SHA-1 hash of string - */ -export function sha1(source: string): string { - // convert string to UTF-8, as SHA only deals with byte-streams - let msg = encodeUTF8(source); - - // constants [§4.2.1] - const K = [0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6]; - - // PREPROCESSING - msg += String.fromCharCode(0x80); // add trailing '1' bit (+ 0's padding) to string [§5.1.1] - - // convert string msg into 512-bit/16-integer blocks arrays of ints [§5.2.1] - const l = msg.length / 4 + 2; // length (in 32-bit integers) of msg + ‘1’ + appended length - const N = Math.ceil(l / 16); // number of 16-integer-blocks required to hold 'l' ints - const M = new Array(N); - - for (let i = 0; i < N; i++) { - M[i] = new Array(16); - for (let j = 0; j < 16; j++) { - // encode 4 chars per integer, big-endian encoding - M[i][j] = - (msg.charCodeAt(i * 64 + j * 4) << 24) | - (msg.charCodeAt(i * 64 + j * 4 + 1) << 16) | - (msg.charCodeAt(i * 64 + j * 4 + 2) << 8) | - msg.charCodeAt(i * 64 + j * 4 + 3); - } // note running off the end of msg is ok 'cos bitwise ops on NaN return 0 - } - // add length (in bits) into final pair of 32-bit integers (big-endian) [§5.1.1] - // note: most significant word would be (len-1)*8 >>> 32, but since JS converts - // bitwise-op args to 32 bits, we need to simulate this by arithmetic operators - M[N - 1][14] = ((msg.length - 1) * 8) / Math.pow(2, 32); - M[N - 1][14] = Math.floor(M[N - 1][14]); - M[N - 1][15] = ((msg.length - 1) * 8) & 0xffffffff; - - // set initial hash value [§5.3.1] - let H0 = 0x67452301; - let H1 = 0xefcdab89; - let H2 = 0x98badcfe; - let H3 = 0x10325476; - let H4 = 0xc3d2e1f0; - - // HASH COMPUTATION [§6.1.2] - - const W = new Array(80); - let a: number, b: number, c: number, d: number, e: number; - for (let i = 0; i < N; i++) { - // 1 - prepare message schedule 'W' - for (let t = 0; t < 16; t++) W[t] = M[i][t]; - for (let t = 16; t < 80; t++) - W[t] = ROTL(W[t - 3] ^ W[t - 8] ^ W[t - 14] ^ W[t - 16], 1); - - // 2 - initialise five working variables a, b, c, d, e with previous hash value - a = H0; - b = H1; - c = H2; - d = H3; - e = H4; - - // 3 - main loop - for (let t = 0; t < 80; t++) { - // seq for blocks of 'f' functions and 'K' constants - const s = Math.floor(t / 20) as 0 | 1 | 2 | 3; // it is safe to use 0 | 1 | 2 | 3 because of max value of t is 79 - const T = (ROTL(a, 5) + f(s, b, c, d) + e + K[s] + W[t]) & 0xffffffff; - e = d; - d = c; - c = ROTL(b, 30); - b = a; - a = T; - } - - // 4 - compute the new intermediate hash value - H0 = (H0 + a) & 0xffffffff; // note 'addition modulo 2^32' - H1 = (H1 + b) & 0xffffffff; - H2 = (H2 + c) & 0xffffffff; - H3 = (H3 + d) & 0xffffffff; - H4 = (H4 + e) & 0xffffffff; - } - - return ( - toHexString(H0) + - toHexString(H1) + - toHexString(H2) + - toHexString(H3) + - toHexString(H4) - ); -} - -// -// function 'f' [§4.1.1] -// -function f(s: 0 | 1 | 2 | 3, x: number, y: number, z: number): number { - switch (s) { - case 0: - return (x & y) ^ (~x & z); // Ch() - case 1: - return x ^ y ^ z; // Parity() - case 2: - return (x & y) ^ (x & z) ^ (y & z); // Maj() - case 3: - return x ^ y ^ z; // Parity() - } -} - -/** - * rotate left (circular left shift) value x by n positions [§3.2.5] - */ -function ROTL(x: number, n: number) { - return (x << n) | (x >>> (32 - n)); -} - -/** - * hexadecimal representation of a number - */ -function toHexString(n: number): string { - let s = ''; - let v; - for (let i = 7; i >= 0; i--) { - v = (n >>> (i * 4)) & 0xf; - s += v.toString(16); - } - return s; -} - -/** - * Encode multi-byte Unicode string into utf-8 multiple single-byte characters - * (BMP / basic multilingual plane only) - * - * Chars in range U+0080 - U+07FF are encoded in 2 chars, U+0800 - U+FFFF in 3 chars - * - * @param {String} unicodeString Unicode string to be encoded as UTF-8 - * @returns {String} encoded string - */ -function encodeUTF8(unicodeString: string): string { - return unicodeString - .replace( - /[\u0080-\u07ff]/g, // U+0080 - U+07FF => 2 bytes 110yyyyy, 10zzzzzz - (c) => { - const cc = c.charCodeAt(0); - return String.fromCharCode(0xc0 | (cc >> 6), 0x80 | (cc & 0x3f)); - } - ) - .replace( - /[\u0800-\uffff]/g, // U+0800 - U+FFFF => 3 bytes 1110xxxx, 10yyyyyy, 10zzzzzz - function (c) { - const cc = c.charCodeAt(0); - return String.fromCharCode( - 0xe0 | (cc >> 12), - 0x80 | ((cc >> 6) & 0x3f), - 0x80 | (cc & 0x3f) - ); - } - ); +export function hashCode(s: string) { + let h = 0; + let i = 0; + while (i < s.length) h = ((h << 5) - h + s.charCodeAt(i++)) | 0; + return h.toString(36); }