From 182e3c6fb3e2ad2dc48c48e42adecdc9f0e8bd48 Mon Sep 17 00:00:00 2001 From: pjanik Date: Thu, 5 Oct 2023 13:03:49 -0300 Subject: [PATCH] feat(formula): fix double quote string escaping, refactor char escaping related code, add/modify unit tests to include escaped chars [PT-186145125] --- v3/src/models/data/formula-manager.ts | 14 ++--- v3/src/models/data/formula-utils.test.ts | 65 +++++++++++++----------- v3/src/models/data/formula-utils.ts | 51 ++++++++++--------- 3 files changed, 70 insertions(+), 60 deletions(-) diff --git a/v3/src/models/data/formula-manager.ts b/v3/src/models/data/formula-manager.ts index 38af611965..73b343f474 100644 --- a/v3/src/models/data/formula-manager.ts +++ b/v3/src/models/data/formula-manager.ts @@ -347,23 +347,24 @@ export class FormulaManager { dataSet: {} } - const mapAttributeNames = (dataSet: IDataSet, prefix: string) => { + const mapAttributeNames = (dataSet: IDataSet, prefix: string, _useSafeSymbolNames: boolean) => { const result: Record = {} dataSet.attributes.forEach(attr => { - result[useSafeSymbolNames ? safeSymbolName(attr.name) : attr.name] = `${prefix}${attr.id}` + result[_useSafeSymbolNames ? safeSymbolName(attr.name) : attr.name] = `${prefix}${attr.id}` }) return result } displayNameMap.localNames = { - ...mapAttributeNames(localDataSet, LOCAL_ATTR), + ...mapAttributeNames(localDataSet, LOCAL_ATTR, useSafeSymbolNames), // caseIndex is a special name supported by formulas. It essentially behaves like a local data set attribute // that returns the current, 1-based index of the case in its collection group. caseIndex: `${LOCAL_ATTR}${CASE_INDEX_FAKE_ATTR_ID}` } this.globalValueManager?.globals.forEach(global => { - displayNameMap.localNames[safeSymbolName(global.name)] = `${GLOBAL_VALUE}${global.id}` + const key = useSafeSymbolNames ? safeSymbolName(global.name) : global.name + displayNameMap.localNames[key] = `${GLOBAL_VALUE}${global.id}` }) this.dataSets.forEach(dataSet => { @@ -371,8 +372,9 @@ export class FormulaManager { displayNameMap.dataSet[dataSet.name] = { id: dataSet.id, // No prefix is necessary for external attributes. They always need to be resolved manually by custom - // mathjs functions (like "lookupByIndex"). - attribute: mapAttributeNames(dataSet, "") + // mathjs functions (like "lookupByIndex"). Also, it's never necessary to use safe names, as these names + // are string constants, not a symbols, so MathJS will not care about special characters there. + attribute: mapAttributeNames(dataSet, "", false) } } }) diff --git a/v3/src/models/data/formula-utils.test.ts b/v3/src/models/data/formula-utils.test.ts index cccf470b58..e1c2a5b5a1 100644 --- a/v3/src/models/data/formula-utils.test.ts +++ b/v3/src/models/data/formula-utils.test.ts @@ -1,7 +1,7 @@ import { DisplayNameMap } from "./formula-types" import { safeSymbolName, customizeDisplayFormula, reverseDisplayNameMap, canonicalToDisplay, makeDisplayNamesSafe, - displayToCanonical, unescapeCharactersInSafeSymbolName, escapeCharactersInSafeSymbolName + displayToCanonical, unescapeBacktickString, escapeBacktickString, safeSymbolNameFromDisplayFormula } from "./formula-utils" const displayNameMapExample: DisplayNameMap = { @@ -22,8 +22,9 @@ const displayNameMapExample: DisplayNameMap = { "Roller Coaster": { "id": "DATA_ROLLER_COASTER", "attribute": { - "Park": "ATTR_PARK", - "Top Speed": "ATTR_TOP_SPEED", + // \, ', and " are added to test characters escaping + "Park\"": "ATTR_PARK", + "Top\\Speed'": "ATTR_TOP_SPEED", } } } @@ -51,44 +52,44 @@ describe("displayToCanonical", () => { describe("when attribute name includes special characters", () => { const testDisplayMap: DisplayNameMap = { localNames: { - [safeSymbolName("mean attribute 🙃")]: "LOCAL_ATTR_ATTR_MEAN", + [safeSymbolName("mean`attribute\\🙃")]: "LOCAL_ATTR_ATTR_MEAN", }, dataSet: {} } it("works as long as it's enclosed in backticks", () => { expect(displayToCanonical( - "mean(`mean attribute 🙃`) + 'mean'", testDisplayMap + "mean(`mean\\`attribute\\\\🙃`) + 'mean'", testDisplayMap )).toEqual('mean(LOCAL_ATTR_ATTR_MEAN) + "mean"') }) }) describe("when attribute name is provided as string constant (e.g. lookup functions)", () => { it("is still converted correctly and names with special characters are NOT enclosed in backticks", () => { expect(displayToCanonical( - "lookupByKey('Roller Coaster', 'Park', 'Top Speed', Order) * 2", displayNameMapExample + 'lookupByKey("Roller Coaster", "Park\\"", "Top\\\\Speed\'", Order) * 2', displayNameMapExample )).toEqual('lookupByKey("DATA_ROLLER_COASTER", "ATTR_PARK", "ATTR_TOP_SPEED", LOCAL_ATTR_ATTR_ORDER) * 2') }) }) }) -describe("unescapeCharactersInSafeSymbolName", () => { +describe("unescapeBacktickString", () => { it("converts escaped characters in safe symbol name to original characters", () => { - expect(unescapeCharactersInSafeSymbolName("Attribute\\`Test")).toEqual("Attribute`Test") - expect(unescapeCharactersInSafeSymbolName("Attribute\\\\Test")).toEqual("Attribute\\Test") + expect(unescapeBacktickString("Attribute\\`Test")).toEqual("Attribute`Test") + expect(unescapeBacktickString("Attribute\\\\Test")).toEqual("Attribute\\Test") }) - it("is's an inverse of escapeCharactersInSafeSymbolName", () => { + it("is's an inverse of escapeBacktickString", () => { const testString = "Attribute\\\\\\`Test" - expect(unescapeCharactersInSafeSymbolName(escapeCharactersInSafeSymbolName(testString))).toEqual(testString) + expect(unescapeBacktickString(escapeBacktickString(testString))).toEqual(testString) }) }) -describe("escapeCharactersInSafeSymbolName", () => { +describe("escapeBacktickString", () => { it("converts some characters in safe symbol name to escaped characters", () => { - expect(escapeCharactersInSafeSymbolName("Attribute`Test")).toEqual("Attribute\\`Test") - expect(escapeCharactersInSafeSymbolName("Attribute\\Test")).toEqual("Attribute\\\\Test") + expect(escapeBacktickString("Attribute`Test")).toEqual("Attribute\\`Test") + expect(escapeBacktickString("Attribute\\Test")).toEqual("Attribute\\\\Test") }) - it("is's an inverse of unescapeCharactersInSafeSymbolName", () => { + it("is's an inverse of unescapeBacktickString", () => { const testString = "Attribute`Test\\" - expect(unescapeCharactersInSafeSymbolName(escapeCharactersInSafeSymbolName(testString))).toEqual(testString) + expect(unescapeBacktickString(escapeBacktickString(testString))).toEqual(testString) }) }) @@ -105,18 +106,22 @@ describe("safeSymbolName", () => { expect(safeSymbolName("Attribute\\`Test")).toEqual("Attribute__Test") expect(safeSymbolName("Attribute\\\\\\`Test")).toEqual("Attribute____Test") }) - it("supports `unescape` option", () => { - expect(safeSymbolName("Attribute\\Test", true)).toEqual("Attribute_Test") - expect(safeSymbolName("Attribute\\\\Test", true)).toEqual("Attribute_Test") // \\ treated as one character - expect(safeSymbolName("Attribute\\`Test", true)).toEqual("Attribute_Test") // \` treated as one character - expect(safeSymbolName("Attribute\\\\\\`Test", true)).toEqual("Attribute__Test") +}) +describe("safeSymbolNameFromDisplayFormula", () => { + it("unescapes special characters and converts strings that are not parsable by Mathjs to valid symbol names", () => { + // \\ and \` treated as one character + expect(safeSymbolNameFromDisplayFormula("Attribute\\Test")).toEqual("Attribute_Test") + expect(safeSymbolNameFromDisplayFormula("Attribute\\\\Test")).toEqual("Attribute_Test") + expect(safeSymbolNameFromDisplayFormula("Attribute\\`Test")).toEqual("Attribute_Test") + expect(safeSymbolNameFromDisplayFormula("Attribute\\\\\\`Test")).toEqual("Attribute__Test") }) }) describe("makeDisplayNamesSafe", () => { it("replaces all the symbols enclosed between `` with safe symbol names", () => { expect(makeDisplayNamesSafe("mean(`Attribute Name`)")).toEqual("mean(Attribute_Name)") - expect(makeDisplayNamesSafe("`Attribute Name` + `Attribute\\`Name 2`")).toEqual("Attribute_Name + Attribute_Name_2") + // \` and \\ treated as one symbol - unescaping is done in safeSymbolNameFromDisplayFormula + expect(makeDisplayNamesSafe("`Attr\\\\Name` + `Attr\\`Name 2`")).toEqual("Attr_Name + Attr_Name_2") }) }) @@ -144,8 +149,8 @@ describe("reverseDisplayNameMap", () => { ATTR_LIFE_SPAN: "LifeSpan", ATTR_ORDER: "Order", DATA_ROLLER_COASTER: "Roller Coaster", - ATTR_PARK: "Park", - ATTR_TOP_SPEED: "Top Speed", + ATTR_PARK: "Park\"", + ATTR_TOP_SPEED: "Top\\Speed'", }) }) }) @@ -178,24 +183,24 @@ describe("canonicalToDisplay", () => { describe("when attribute name includes special characters", () => { const testDisplayMap: DisplayNameMap = { localNames: { - "new mean attribute 🙃": "LOCAL_ATTR_ATTR_MEAN", + "new\\mean`attribute 🙃": "LOCAL_ATTR_ATTR_MEAN", }, dataSet: {} } - it("is enclosed in backticks", () => { + it("is enclosed in backticks and special characters are escaped", () => { expect(canonicalToDisplay( "mean(LOCAL_ATTR_ATTR_MEAN) + 'mean'", "mean ( mean ) + 'mean'", reverseDisplayNameMap(testDisplayMap) - )).toEqual("mean ( `new mean attribute 🙃` ) + 'mean'") + )).toEqual("mean ( `new\\\\mean\\`attribute 🙃` ) + 'mean'") }) }) describe("when attribute name is provided as string constant (e.g. lookup functions)", () => { it("is still converted correctly and names with special characters are NOT enclosed in backticks", () => { expect(canonicalToDisplay( - "lookupByKey('DATA_ROLLER_COASTER', 'ATTR_PARK', 'ATTR_TOP_SPEED', LOCAL_ATTR_ATTR_ORDER) * 2", - "lookupByKey('Old Roller Coaster', 'Old Park', 'Old Top Speed', OldOrder) * 2", + 'lookupByKey("DATA_ROLLER_COASTER", "ATTR_PARK", "ATTR_TOP_SPEED", LOCAL_ATTR_ATTR_ORDER) * 2', + 'lookupByKey("Old Roller Coaster", "Old Park", "Old Top Speed", OldOrder) * 2', reverseDisplayNameMap(displayNameMapExample) - )).toEqual("lookupByKey('Roller Coaster', 'Park', 'Top Speed', Order) * 2") + )).toEqual('lookupByKey("Roller Coaster", "Park\\"", "Top\\\\Speed\'", Order) * 2') }) }) }) diff --git a/v3/src/models/data/formula-utils.ts b/v3/src/models/data/formula-utils.ts index e7ab028d5b..05249e812f 100644 --- a/v3/src/models/data/formula-utils.ts +++ b/v3/src/models/data/formula-utils.ts @@ -26,29 +26,34 @@ export const parseCanonicalSymbolName = (canonicalName: string): IFormulaDepende return undefined } -export const unescapeCharactersInSafeSymbolName = (name: string) => +export const unescapeBacktickString = (name: string) => name.replace(/\\`/g, "`").replace(/\\\\/g, "\\") -export const escapeCharactersInSafeSymbolName = (name: string) => +export const escapeBacktickString = (name: string) => name.replace(/\\/g, "\\\\").replace(/`/g, "\\`") -export const safeSymbolName = (name: string, unescape = false) => { - if (unescape) { - // Replace escaped backslash and backticks with a single character, so they're not replaced by two underscores. - name = unescapeCharactersInSafeSymbolName(name) - } - return name +export const escapeDoubleQuoteString = (constant: string) => + constant.replace(/\\/g, "\\\\").replace(/"/g, "\\\"") + +export const safeSymbolName = (name: string) => + name // Math.js does not allow to use symbols that start with a number, so we need to add a prefix. .replace(/^(\d+)/, '_$1') // We also need to escape all the symbols that are not allowed in Math.js. .replace(/[^a-zA-Z0-9_]/g, "_") -} + + +export const safeSymbolNameFromDisplayFormula = (name: string) => + // Replace escaped backslash and backticks in user-generated string with a single character, so they're not replaced + // by two underscores by safeSymbolName. + safeSymbolName(unescapeBacktickString(name)) + export const makeDisplayNamesSafe = (formula: string) => { // Names between `` are symbols that require special processing, as otherwise they could not be parsed by Mathjs, // eg. names with spaces or names that start with a number. Also, it's necessary to ignore escaped backticks. return formula - .replace(/(? safeSymbolName(match, true)) + .replace(/(? safeSymbolNameFromDisplayFormula(match)) } export const customizeDisplayFormula = (formula: string) => { @@ -79,30 +84,25 @@ export const canonicalToDisplay = (canonical: string, originalDisplay: string, c // function names and constants might be identical to the symbol name. E.g. 'mean(mean) + "mean"' is a valid formula // if there's attribute called "mean". If we process function names and constants, it'll be handled correctly. originalDisplay = makeDisplayNamesSafe(originalDisplay) // so it can be parsed by MathJS - const getNameFromId = (id: string, wrapInBackTicks: boolean) => { - let name = canonicalNameMap[id] - // Wrap in backticks if it's not a (MathJS) safe symbol name. - if (wrapInBackTicks && name && name !== safeSymbolName(name)) { - // Escape special characters in the name. It reverses the process done by safeSymbolName. - name = escapeCharactersInSafeSymbolName(name) - // Finally, wrap in backticks. - name = `\`${name}\`` - } - return name || id - } + const getNameFromId = (id: string) => canonicalNameMap[id] || id + // Wrap in backticks if it's not a (MathJS) safe symbol name. + const wrapInBackticksIfNecessary = (name: string) => name !== safeSymbolName(name) ? `\`${name}\`` : name + const namesToReplace: string[] = [] const newNames: string[] = [] parse(originalDisplay).traverse((node: MathNode, path: string, parent: MathNode) => { isNonFunctionSymbolNode(node, parent) && namesToReplace.push(node.name) - isConstantStringNode(node) && namesToReplace.push(node.value) + isConstantStringNode(node) && namesToReplace.push(escapeDoubleQuoteString(node.value)) isFunctionNode(node) && namesToReplace.push(node.fn.name) }) parse(canonical).traverse((node: MathNode, path: string, parent: MathNode) => { // Symbol with nonstandard characters need to be wrapped in backticks, while constants don't (as they're already // wrapped in string quotes). - isNonFunctionSymbolNode(node, parent) && newNames.push(getNameFromId(node.name, true)) - isConstantStringNode(node) && newNames.push(getNameFromId(node.value, false)) + isNonFunctionSymbolNode(node, parent) && newNames.push( + wrapInBackticksIfNecessary(escapeBacktickString(getNameFromId(node.name))) + ) + isConstantStringNode(node) && newNames.push(escapeDoubleQuoteString(getNameFromId(node.value))) isFunctionNode(node) && newNames.push(node.fn.name) }) @@ -141,6 +141,9 @@ export const displayToCanonical = (displayExpression: string, displayNameMap: Di // Note that parseArguments will modify args array in place, because we're passing canonicalizeWith option. typedFnRegistry[node.fn.name].canonicalize?.(node.args, displayNameMap) } + if (isConstantStringNode(node)) { + node.value = escapeDoubleQuoteString(node.value) + } } formulaTree.traverse(visitNode) return formulaTree.toString()