From 310c2405cab88b91e7e1aa0fe1105c1a1926342d Mon Sep 17 00:00:00 2001 From: Miki Date: Fri, 7 Jun 2024 12:43:30 -0700 Subject: [PATCH] Use JSON11 for handling long numerals (#6915) (#6970) (cherry picked from commit 6a2074249c92ec204bb7edff72f93a641da1385e) Signed-off-by: Miki --- changelogs/fragments/6915.yml | 2 + package.json | 3 +- packages/osd-std/package.json | 1 + packages/osd-std/src/json.ts | 267 ++-------------------------------- yarn.lock | 14 +- 5 files changed, 26 insertions(+), 261 deletions(-) create mode 100644 changelogs/fragments/6915.yml diff --git a/changelogs/fragments/6915.yml b/changelogs/fragments/6915.yml new file mode 100644 index 000000000000..4944e5e6c32c --- /dev/null +++ b/changelogs/fragments/6915.yml @@ -0,0 +1,2 @@ +feat: +- Use JSON11 for handling long numerals ([#6915](https://github.com/opensearch-project/OpenSearch-Dashboards/pull/6915)) diff --git a/package.json b/package.json index 589426f08ab7..6e09cf84306d 100644 --- a/package.json +++ b/package.json @@ -98,6 +98,7 @@ "**/jest-config": "npm:@amoo-miki/jest-config@27.5.1", "**/jest-jasmine2": "npm:@amoo-miki/jest-jasmine2@27.5.1", "**/joi/hoek": "npm:@amoo-miki/hoek@6.1.3", + "**/json11": "^1.1.2", "**/json-schema": "^0.4.0", "**/kind-of": ">=6.0.3", "**/loader-utils": "^2.0.4", @@ -154,7 +155,7 @@ "@hapi/vision": "^6.1.0", "@hapi/wreck": "^17.1.0", "@opensearch-project/opensearch": "^1.1.0", - "@opensearch-project/opensearch-next": "npm:@opensearch-project/opensearch@^2.6.0", + "@opensearch-project/opensearch-next": "npm:@opensearch-project/opensearch@^2.9.0", "@opensearch/datemath": "5.0.3", "@osd/ace": "1.0.0", "@osd/analytics": "1.0.0", diff --git a/packages/osd-std/package.json b/packages/osd-std/package.json index 3b1bea69bf8d..d0d859217c08 100644 --- a/packages/osd-std/package.json +++ b/packages/osd-std/package.json @@ -8,6 +8,7 @@ "private": true, "sideEffects": false, "dependencies": { + "json11": "^1.1.2", "lodash": "^4.17.21" }, "devDependencies": { diff --git a/packages/osd-std/src/json.ts b/packages/osd-std/src/json.ts index d8bb27e1eb6a..4dcd3eb03e65 100644 --- a/packages/osd-std/src/json.ts +++ b/packages/osd-std/src/json.ts @@ -3,260 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -/* In JavaScript, a `Number` is a 64-bit floating-point value which can store 16 digits. However, the - * serializer and deserializer will need to cater to numeric values generated by other languages which - * can have up to 19 digits. Native JSON parser and stringifier, incapable of handling the extra - * digits, corrupt the values, making them unusable. - * - * To work around this limitation, the deserializer converts long sequences of digits into strings and - * marks them before applying the parser. During the parsing, string values that begin with the mark - * are converted to `BigInt` values. - * Similarly, during stringification, the serializer converts `BigInt` values to marked strings and - * when done, it replaces them with plain numerals. - * - * `Number.MAX_SAFE_INTEGER`, 9,007,199,254,740,991, is the largest number that the native methods can - * parse and stringify, and any numeral greater than that would need to be translated using the - * workaround; all 17-digits or longer and only tail-end of the 16-digits need translation. It would - * be unfair to all the 16-digit numbers if the translation applied to `\d{16,}` only to cover the - * less than 10%. Hence, a RegExp is created to only match numerals too long to be a number. - * - * To make the explanation simpler, let's assume that MAX_SAFE_INTEGER is 8921 which has 4 digits. - * Starting from the right, we take each digit onwards, `[-9]`: - * 1) 7922 - 7929: 792[2-9]\d{0} - * 2) 7930 - 7999: 79[3-9]\d{1} - * 9) 9 + 1 = 10 which results in a rollover; no need to do anything. - * 8) 9000 - 9999: [9-9]\d{3} - * Finally we add anything 5 digits or longer: `\d{5,} - * - * Note: A better solution would use AST but considering its performance penalty, RegExp is the next - * best thing. - */ -const maxIntAsString = String(Number.MAX_SAFE_INTEGER); -const maxIntLength = maxIntAsString.length; -// Sub-patterns for each digit -const longNumeralMatcherTokens = [`\\d{${maxIntAsString.length + 1},}`]; -for (let i = 0; i < maxIntLength; i++) { - if (maxIntAsString[i] !== '9') { - longNumeralMatcherTokens.push( - maxIntAsString.substring(0, i) + - `[${parseInt(maxIntAsString[i], 10) + 1}-9]` + - `\\d{${maxIntLength - i - 1}}` - ); - } -} - -/* The matcher that looks for `": , ...}` and `[..., , ...]` - * - * The pattern starts by looking for `":` not immediately preceded by a `\`. That should be - * followed by any of the numeric sub-patterns. A comma, end of an array, end of an object, or - * the end of the input are the only acceptable elements after it. - * - * Note: This RegExp can result in false-positive hits on the likes of `{"key": "[ ]"}` and - * those are cleaned out during parsing. - */ -const longNumeralMatcher = new RegExp( - `((?:\\[|,|(? { - // coverage:ignore-line - if (!length || length < 0) return []; - const choices = []; - const arr = markerChars; - const arrLength = arr.length; - const temp = Array(length); - - (function fill(pos, start) { - if (pos === length) return choices.push(temp.join('')); - - for (let i = start; i < arrLength; i++) { - temp[pos] = arr[i]; - fill(pos + 1, i); - } - })(0, 0); - - return choices; -}; - -/* Experiments with different combinations of various lengths, until one is found to not be in - * the input string. - */ -const getMarker = (text: string): { marker: string; length: number } => { - let marker; - let length = 0; - do { - length++; - getMarkerChoices(length).some((markerChoice) => { - if (text.indexOf(markerChoice) === -1) { - marker = markerChoice; - return true; - } - }); - } while (!marker); - - return { - marker, - length, - }; -}; - -const parseStringWithLongNumerals = ( - text: string, - reviver?: ((this: any, key: string, value: any) => any) | null -): any => { - const { marker, length } = getMarker(text); - - let hadException; - let obj; - let markedJSON = text.replace(longNumeralMatcher, `$1"${marker}$2"$3`); - const markedValueMatcher = new RegExp(`^${marker}-?\\d+$`); - - /* Convert marked values to BigInt values. - * The `startsWith` is purely for performance, to avoid running `test` if not needed. - */ - const convertMarkedValues = (val: any) => - typeof val === 'string' && val.startsWith(marker) && markedValueMatcher.test(val) - ? BigInt(val.substring(length)) - : val; - - /* For better performance, instead of testing for existence of `reviver` on each value, two almost - * identical functions are used. - */ - const parseMarkedText = reviver - ? (markedText: string) => - JSON.parse(markedText, function (key, val) { - return reviver.call(this, key, convertMarkedValues(val)); - }) - : (markedText: string) => JSON.parse(markedText, (key, val) => convertMarkedValues(val)); - - /* RegExp cannot replace AST and the process of marking adds quotes. So, any false-positive hit - * will make the JSON string unparseable. - * - * To find those instances, we try to parse and watch for the location of any errors. If an error - * is caused by the marking, we remove that single marking and try again. - */ - try { - do { - try { - hadException = false; - obj = parseMarkedText(markedJSON); - } catch (e) { - hadException = true; - /* There are two types of exception objects that can be raised: - * 1) a textual message with the position that we need to parse - * i. Unexpected [token|string ...] at position ... - * ii. Expected ',' or ... after ... in JSON at position ... - * iii. expected ',' or ... after ... in object at line ... column ... - * 2) a proper object with lineNumber and columnNumber which we can use - * Note: this might refer to the part of the code that threw the exception but - * we will try it anyway; the regex is specific enough to not produce - * false-positives. - */ - let { lineNumber, columnNumber } = e; - - if (typeof e?.message === 'string') { - /* Check for 1-i and 1-ii - * Finding "..."෴1111"..." inside a string value, the extra quotes throw a syntax error - * and the position points to " that is assumed to be the begining of a value. - */ - let match = e.message.match(/^(?:Un)?expected .*at position (\d+)(\D|$)/i); - if (match) { - lineNumber = 1; - // Add 1 to reach the marker - columnNumber = parseInt(match[1], 10) + 1; - } else { - /* Check for 1-iii - * Finding "...,"෴1111"..." inside a string value, the extra quotes throw a syntax error - * and the column number points to the marker after the " that is assumed to be terminating the - * value. - * PS: There are different versions of this error across browsers and platforms. - */ - // ToDo: Add functional tests for this path - match = e.message.match(/expected .*line (\d+) column (\d+)(\D|$)/i); - if (match) { - lineNumber = parseInt(match[1], 10); - columnNumber = parseInt(match[2], 10); - } - } - } - - if (lineNumber < 1 || columnNumber < 2) { - /* The problem is not with this replacement. - * Note: This will never happen because the outer parse would have already thrown. - */ - // coverage:ignore-line - throw e; - } - - /* We need to skip e.lineNumber - 1 number of `\n` occurrences. - * Then, we need to go to e.columnNumber - 2 to look for `"\d+"`; we need to `-1` to - * account for the quote but an additional `-1` is needed because columnNumber starts from 1. - */ - const re = new RegExp( - `^((?:.*\\n){${lineNumber - 1}}[^\\n]{${columnNumber - 2}})"${marker}(-?\\d+)"` - ); - if (!re.test(markedJSON)) { - /* The exception is not caused by adding the marker. - * Note: This will never happen because the outer parse would have already thrown. - */ - // coverage:ignore-line - throw e; - } - - // We have found a bad replacement; let's remove it. - markedJSON = markedJSON.replace(re, '$1$2'); - } - } while (hadException); - } catch (ex) { - // If parsing of marked `text` fails, fallback to parsing the original `text` - obj = JSON.parse(text, reviver || undefined); - } - - return obj; -}; - -const stringifyObjectWithBigInts = ( - obj: any, - candidate: string, - replacer?: ((this: any, key: string, value: any) => any) | null, - space?: string | number -): string => { - const { marker } = getMarker(candidate); - - /* The matcher that looks for "" - * Because we have made sure that `marker` was never present in the original object, we can - * carelessly assume every "" is due to our marking. - */ - const markedBigIntMatcher = new RegExp(`"${marker}(-?\\d+)"`, 'g'); - - /* Convert BigInt values to a string and mark them. - * Can't be bothered with Number values outside the safe range because they are already corrupted. - * - * For better performance, instead of testing for existence of `replacer` on each value, two almost - * identical functions are used. - */ - const addMarkerToBigInts = replacer - ? function (this: any, key: string, val: any) { - // replacer is called before marking because marking changes the type - const newVal = replacer.call(this, key, val); - return typeof newVal === 'bigint' ? `${marker}${newVal.toString()}` : newVal; - } - : (key: string, val: any) => (typeof val === 'bigint' ? `${marker}${val.toString()}` : val); - - return ( - JSON.stringify(obj, addMarkerToBigInts, space) - // Replace marked substrings with just the numerals - .replace(markedBigIntMatcher, '$1') - ); -}; +import JSON11 from 'json11'; export const stringify = ( obj: any, @@ -298,7 +45,14 @@ export const stringify = ( text = JSON.stringify(obj, checkForBigInts, space); if (!numeralsAreNumbers) { - text = stringifyObjectWithBigInts(obj, text, replacer, space); + const temp = JSON11.stringify(obj, { + replacer, + space, + withBigInt: false, + quote: '"', + quoteNames: true, + }); + if (temp) text = temp; } return text; @@ -344,7 +98,8 @@ export const parse = ( obj = JSON.parse(text, checkForLargeNumerals); if (!numeralsAreNumbers) { - obj = parseStringWithLongNumerals(text, reviver); + const temp = JSON11.parse(text, reviver, { withLongNumerals: true }); + if (temp) obj = temp; } return obj; diff --git a/yarn.lock b/yarn.lock index d6084909cf0e..349ded92399b 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2517,14 +2517,15 @@ mkdirp "^1.0.4" rimraf "^3.0.2" -"@opensearch-project/opensearch-next@npm:@opensearch-project/opensearch@^2.6.0": - version "2.6.0" - resolved "https://registry.yarnpkg.com/@opensearch-project/opensearch/-/opensearch-2.6.0.tgz#cbacb34f92aed04e98cabcdc0dc65eb495023880" - integrity sha512-zgDSa/qUpoEwA+Nxjtv0qtln63M+hS4SVO94R9XjwzJAoqsUiNMjjzF6D6Djq/xJMgCzIYjvBZ5vUlB8/kXwjQ== +"@opensearch-project/opensearch-next@npm:@opensearch-project/opensearch@^2.9.0": + version "2.9.0" + resolved "https://registry.yarnpkg.com/@opensearch-project/opensearch/-/opensearch-2.9.0.tgz#319b4d174540b6d000c31477a56618e5054c6fcb" + integrity sha512-BXPWSBME1rszZ8OvtBVQ9F6kLiZSENDSFPawbPa1fv0GouuQfWxkKSI9TcnfGLp869fgLTEIfeC5Qexd4RbAYw== dependencies: aws4 "^1.11.0" debug "^4.3.1" hpagent "^1.2.0" + json11 "^1.0.4" ms "^2.1.3" secure-json-parse "^2.4.0" @@ -11084,6 +11085,11 @@ json-stringify-safe@5.0.1, json-stringify-safe@^5.0.1: resolved "https://registry.yarnpkg.com/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz#1296a2d58fd45f19a0f6ce01d65701e2c735b6eb" integrity sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA== +json11@^1.0.4, json11@^1.1.2: + version "1.1.2" + resolved "https://registry.yarnpkg.com/json11/-/json11-1.1.2.tgz#35ffd3ee5073b0cc09ef826b0a0dc005ebef2b5b" + integrity sha512-5r1RHT1/Gr/jsI/XZZj/P6F11BKM8xvTaftRuiLkQI9Z2PFDukM82Ysxw8yDszb3NJP/NKnRlSGmhUdG99rlBw== + json5@^1.0.1: version "1.0.2" resolved "https://registry.yarnpkg.com/json5/-/json5-1.0.2.tgz#63d98d60f21b313b77c4d6da18bfa69d80e1d593"