Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Manual backport 2.x] Use JSON11 for handling long numerals (#6915) #6970

Merged
merged 1 commit into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions changelogs/fragments/6915.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
feat:
- Use JSON11 for handling long numerals ([#6915](https://github.com/opensearch-project/OpenSearch-Dashboards/pull/6915))
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@
"**/jest-config": "npm:@amoo-miki/jest-config@27.5.1",
"**/jest-jasmine2": "npm:@amoo-miki/jest-jasmine2@27.5.1",
"**/joi/hoek": "npm:@amoo-miki/hoek@6.1.3",
"**/json11": "^1.1.2",
"**/json-schema": "^0.4.0",
"**/kind-of": ">=6.0.3",
"**/loader-utils": "^2.0.4",
Expand Down Expand Up @@ -154,7 +155,7 @@
"@hapi/vision": "^6.1.0",
"@hapi/wreck": "^17.1.0",
"@opensearch-project/opensearch": "^1.1.0",
"@opensearch-project/opensearch-next": "npm:@opensearch-project/opensearch@^2.6.0",
"@opensearch-project/opensearch-next": "npm:@opensearch-project/opensearch@^2.9.0",
"@opensearch/datemath": "5.0.3",
"@osd/ace": "1.0.0",
"@osd/analytics": "1.0.0",
Expand Down
1 change: 1 addition & 0 deletions packages/osd-std/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"private": true,
"sideEffects": false,
"dependencies": {
"json11": "^1.1.2",
"lodash": "^4.17.21"
},
"devDependencies": {
Expand Down
267 changes: 11 additions & 256 deletions packages/osd-std/src/json.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,260 +3,7 @@
* SPDX-License-Identifier: Apache-2.0
*/

/* In JavaScript, a `Number` is a 64-bit floating-point value which can store 16 digits. However, the
* serializer and deserializer will need to cater to numeric values generated by other languages which
* can have up to 19 digits. Native JSON parser and stringifier, incapable of handling the extra
* digits, corrupt the values, making them unusable.
*
* To work around this limitation, the deserializer converts long sequences of digits into strings and
* marks them before applying the parser. During the parsing, string values that begin with the mark
* are converted to `BigInt` values.
* Similarly, during stringification, the serializer converts `BigInt` values to marked strings and
* when done, it replaces them with plain numerals.
*
* `Number.MAX_SAFE_INTEGER`, 9,007,199,254,740,991, is the largest number that the native methods can
* parse and stringify, and any numeral greater than that would need to be translated using the
* workaround; all 17-digits or longer and only tail-end of the 16-digits need translation. It would
* be unfair to all the 16-digit numbers if the translation applied to `\d{16,}` only to cover the
* less than 10%. Hence, a RegExp is created to only match numerals too long to be a number.
*
* To make the explanation simpler, let's assume that MAX_SAFE_INTEGER is 8921 which has 4 digits.
* Starting from the right, we take each digit onwards, `[<start>-9]`:
* 1) 7922 - 7929: 792[2-9]\d{0}
* 2) 7930 - 7999: 79[3-9]\d{1}
* 9) 9 + 1 = 10 which results in a rollover; no need to do anything.
* 8) 9000 - 9999: [9-9]\d{3}
* Finally we add anything 5 digits or longer: `\d{5,}
*
* Note: A better solution would use AST but considering its performance penalty, RegExp is the next
* best thing.
*/
const maxIntAsString = String(Number.MAX_SAFE_INTEGER);
const maxIntLength = maxIntAsString.length;
// Sub-patterns for each digit
const longNumeralMatcherTokens = [`\\d{${maxIntAsString.length + 1},}`];
for (let i = 0; i < maxIntLength; i++) {
if (maxIntAsString[i] !== '9') {
longNumeralMatcherTokens.push(
maxIntAsString.substring(0, i) +
`[${parseInt(maxIntAsString[i], 10) + 1}-9]` +
`\\d{${maxIntLength - i - 1}}`
);
}
}

/* The matcher that looks for `": <numerals>, ...}` and `[..., <numeral>, ...]`
*
* The pattern starts by looking for `":` not immediately preceded by a `\`. That should be
* followed by any of the numeric sub-patterns. A comma, end of an array, end of an object, or
* the end of the input are the only acceptable elements after it.
*
* Note: This RegExp can result in false-positive hits on the likes of `{"key": "[ <numeral> ]"}` and
* those are cleaned out during parsing.
*/
const longNumeralMatcher = new RegExp(
`((?:\\[|,|(?<!\\\\)"\\s*:)\\s*)(-?(?:${longNumeralMatcherTokens.join('|')}))(\\s*)(?=,|}|]|$)`,
'g'
);

/* The characters with a highly unlikely chance of occurrence in strings, alone or in combination.
* These will be combined in various orders and lengths, to find a specific "marker" that is not
* present in the JSON string.
*/
const markerChars = ['෴', '߷', '֍'];

/* Generates an array of all combinations of `markerChars` with the requested length. */
const getMarkerChoices = (length: number): string[] => {
// coverage:ignore-line
if (!length || length < 0) return [];
const choices = [];
const arr = markerChars;
const arrLength = arr.length;
const temp = Array(length);

(function fill(pos, start) {
if (pos === length) return choices.push(temp.join(''));

for (let i = start; i < arrLength; i++) {
temp[pos] = arr[i];
fill(pos + 1, i);
}
})(0, 0);

return choices;
};

/* Experiments with different combinations of various lengths, until one is found to not be in
* the input string.
*/
const getMarker = (text: string): { marker: string; length: number } => {
let marker;
let length = 0;
do {
length++;
getMarkerChoices(length).some((markerChoice) => {
if (text.indexOf(markerChoice) === -1) {
marker = markerChoice;
return true;
}
});
} while (!marker);

return {
marker,
length,
};
};

const parseStringWithLongNumerals = (
text: string,
reviver?: ((this: any, key: string, value: any) => any) | null
): any => {
const { marker, length } = getMarker(text);

let hadException;
let obj;
let markedJSON = text.replace(longNumeralMatcher, `$1"${marker}$2"$3`);
const markedValueMatcher = new RegExp(`^${marker}-?\\d+$`);

/* Convert marked values to BigInt values.
* The `startsWith` is purely for performance, to avoid running `test` if not needed.
*/
const convertMarkedValues = (val: any) =>
typeof val === 'string' && val.startsWith(marker) && markedValueMatcher.test(val)
? BigInt(val.substring(length))
: val;

/* For better performance, instead of testing for existence of `reviver` on each value, two almost
* identical functions are used.
*/
const parseMarkedText = reviver
? (markedText: string) =>
JSON.parse(markedText, function (key, val) {
return reviver.call(this, key, convertMarkedValues(val));
})
: (markedText: string) => JSON.parse(markedText, (key, val) => convertMarkedValues(val));

/* RegExp cannot replace AST and the process of marking adds quotes. So, any false-positive hit
* will make the JSON string unparseable.
*
* To find those instances, we try to parse and watch for the location of any errors. If an error
* is caused by the marking, we remove that single marking and try again.
*/
try {
do {
try {
hadException = false;
obj = parseMarkedText(markedJSON);
} catch (e) {
hadException = true;
/* There are two types of exception objects that can be raised:
* 1) a textual message with the position that we need to parse
* i. Unexpected [token|string ...] at position ...
* ii. Expected ',' or ... after ... in JSON at position ...
* iii. expected ',' or ... after ... in object at line ... column ...
* 2) a proper object with lineNumber and columnNumber which we can use
* Note: this might refer to the part of the code that threw the exception but
* we will try it anyway; the regex is specific enough to not produce
* false-positives.
*/
let { lineNumber, columnNumber } = e;

if (typeof e?.message === 'string') {
/* Check for 1-i and 1-ii
* Finding "..."෴1111"..." inside a string value, the extra quotes throw a syntax error
* and the position points to " that is assumed to be the begining of a value.
*/
let match = e.message.match(/^(?:Un)?expected .*at position (\d+)(\D|$)/i);
if (match) {
lineNumber = 1;
// Add 1 to reach the marker
columnNumber = parseInt(match[1], 10) + 1;
} else {
/* Check for 1-iii
* Finding "...,"෴1111"..." inside a string value, the extra quotes throw a syntax error
* and the column number points to the marker after the " that is assumed to be terminating the
* value.
* PS: There are different versions of this error across browsers and platforms.
*/
// ToDo: Add functional tests for this path
match = e.message.match(/expected .*line (\d+) column (\d+)(\D|$)/i);
if (match) {
lineNumber = parseInt(match[1], 10);
columnNumber = parseInt(match[2], 10);
}
}
}

if (lineNumber < 1 || columnNumber < 2) {
/* The problem is not with this replacement.
* Note: This will never happen because the outer parse would have already thrown.
*/
// coverage:ignore-line
throw e;
}

/* We need to skip e.lineNumber - 1 number of `\n` occurrences.
* Then, we need to go to e.columnNumber - 2 to look for `"<mark>\d+"`; we need to `-1` to
* account for the quote but an additional `-1` is needed because columnNumber starts from 1.
*/
const re = new RegExp(
`^((?:.*\\n){${lineNumber - 1}}[^\\n]{${columnNumber - 2}})"${marker}(-?\\d+)"`
);
if (!re.test(markedJSON)) {
/* The exception is not caused by adding the marker.
* Note: This will never happen because the outer parse would have already thrown.
*/
// coverage:ignore-line
throw e;
}

// We have found a bad replacement; let's remove it.
markedJSON = markedJSON.replace(re, '$1$2');
}
} while (hadException);
} catch (ex) {
// If parsing of marked `text` fails, fallback to parsing the original `text`
obj = JSON.parse(text, reviver || undefined);
}

return obj;
};

const stringifyObjectWithBigInts = (
obj: any,
candidate: string,
replacer?: ((this: any, key: string, value: any) => any) | null,
space?: string | number
): string => {
const { marker } = getMarker(candidate);

/* The matcher that looks for "<marker><numerals>"
* Because we have made sure that `marker` was never present in the original object, we can
* carelessly assume every "<marker><numerals>" is due to our marking.
*/
const markedBigIntMatcher = new RegExp(`"${marker}(-?\\d+)"`, 'g');

/* Convert BigInt values to a string and mark them.
* Can't be bothered with Number values outside the safe range because they are already corrupted.
*
* For better performance, instead of testing for existence of `replacer` on each value, two almost
* identical functions are used.
*/
const addMarkerToBigInts = replacer
? function (this: any, key: string, val: any) {
// replacer is called before marking because marking changes the type
const newVal = replacer.call(this, key, val);
return typeof newVal === 'bigint' ? `${marker}${newVal.toString()}` : newVal;
}
: (key: string, val: any) => (typeof val === 'bigint' ? `${marker}${val.toString()}` : val);

return (
JSON.stringify(obj, addMarkerToBigInts, space)
// Replace marked substrings with just the numerals
.replace(markedBigIntMatcher, '$1')
);
};
import JSON11 from 'json11';

export const stringify = (
obj: any,
Expand Down Expand Up @@ -298,7 +45,14 @@ export const stringify = (
text = JSON.stringify(obj, checkForBigInts, space);

if (!numeralsAreNumbers) {
text = stringifyObjectWithBigInts(obj, text, replacer, space);
const temp = JSON11.stringify(obj, {
replacer,
space,
withBigInt: false,
quote: '"',
quoteNames: true,
});
if (temp) text = temp;
}

return text;
Expand Down Expand Up @@ -344,7 +98,8 @@ export const parse = (
obj = JSON.parse(text, checkForLargeNumerals);

if (!numeralsAreNumbers) {
obj = parseStringWithLongNumerals(text, reviver);
const temp = JSON11.parse(text, reviver, { withLongNumerals: true });
if (temp) obj = temp;
}

return obj;
Expand Down
14 changes: 10 additions & 4 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2517,14 +2517,15 @@
mkdirp "^1.0.4"
rimraf "^3.0.2"

"@opensearch-project/opensearch-next@npm:@opensearch-project/opensearch@^2.6.0":
version "2.6.0"
resolved "https://registry.yarnpkg.com/@opensearch-project/opensearch/-/opensearch-2.6.0.tgz#cbacb34f92aed04e98cabcdc0dc65eb495023880"
integrity sha512-zgDSa/qUpoEwA+Nxjtv0qtln63M+hS4SVO94R9XjwzJAoqsUiNMjjzF6D6Djq/xJMgCzIYjvBZ5vUlB8/kXwjQ==
"@opensearch-project/opensearch-next@npm:@opensearch-project/opensearch@^2.9.0":
version "2.9.0"
resolved "https://registry.yarnpkg.com/@opensearch-project/opensearch/-/opensearch-2.9.0.tgz#319b4d174540b6d000c31477a56618e5054c6fcb"
integrity sha512-BXPWSBME1rszZ8OvtBVQ9F6kLiZSENDSFPawbPa1fv0GouuQfWxkKSI9TcnfGLp869fgLTEIfeC5Qexd4RbAYw==
dependencies:
aws4 "^1.11.0"
debug "^4.3.1"
hpagent "^1.2.0"
json11 "^1.0.4"
ms "^2.1.3"
secure-json-parse "^2.4.0"

Expand Down Expand Up @@ -11084,6 +11085,11 @@ json-stringify-safe@5.0.1, json-stringify-safe@^5.0.1:
resolved "https://registry.yarnpkg.com/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz#1296a2d58fd45f19a0f6ce01d65701e2c735b6eb"
integrity sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==

json11@^1.0.4, json11@^1.1.2:
version "1.1.2"
resolved "https://registry.yarnpkg.com/json11/-/json11-1.1.2.tgz#35ffd3ee5073b0cc09ef826b0a0dc005ebef2b5b"
integrity sha512-5r1RHT1/Gr/jsI/XZZj/P6F11BKM8xvTaftRuiLkQI9Z2PFDukM82Ysxw8yDszb3NJP/NKnRlSGmhUdG99rlBw==

json5@^1.0.1:
version "1.0.2"
resolved "https://registry.yarnpkg.com/json5/-/json5-1.0.2.tgz#63d98d60f21b313b77c4d6da18bfa69d80e1d593"
Expand Down
Loading