Skip to content

Commit

Permalink
Merge pull request #919 from concord-consortium/186145125-escape-doub…
Browse files Browse the repository at this point in the history
…le-quote
  • Loading branch information
pjanik authored Oct 5, 2023
2 parents b41cba6 + 182e3c6 commit 070740d
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 60 deletions.
14 changes: 8 additions & 6 deletions v3/src/models/data/formula-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -347,32 +347,34 @@ export class FormulaManager {
dataSet: {}
}

const mapAttributeNames = (dataSet: IDataSet, prefix: string) => {
const mapAttributeNames = (dataSet: IDataSet, prefix: string, _useSafeSymbolNames: boolean) => {
const result: Record<string, string> = {}
dataSet.attributes.forEach(attr => {
result[useSafeSymbolNames ? safeSymbolName(attr.name) : attr.name] = `${prefix}${attr.id}`
result[_useSafeSymbolNames ? safeSymbolName(attr.name) : attr.name] = `${prefix}${attr.id}`
})
return result
}

displayNameMap.localNames = {
...mapAttributeNames(localDataSet, LOCAL_ATTR),
...mapAttributeNames(localDataSet, LOCAL_ATTR, useSafeSymbolNames),
// caseIndex is a special name supported by formulas. It essentially behaves like a local data set attribute
// that returns the current, 1-based index of the case in its collection group.
caseIndex: `${LOCAL_ATTR}${CASE_INDEX_FAKE_ATTR_ID}`
}

this.globalValueManager?.globals.forEach(global => {
displayNameMap.localNames[safeSymbolName(global.name)] = `${GLOBAL_VALUE}${global.id}`
const key = useSafeSymbolNames ? safeSymbolName(global.name) : global.name
displayNameMap.localNames[key] = `${GLOBAL_VALUE}${global.id}`
})

this.dataSets.forEach(dataSet => {
if (dataSet.name) {
displayNameMap.dataSet[dataSet.name] = {
id: dataSet.id,
// No prefix is necessary for external attributes. They always need to be resolved manually by custom
// mathjs functions (like "lookupByIndex").
attribute: mapAttributeNames(dataSet, "")
// mathjs functions (like "lookupByIndex"). Also, it's never necessary to use safe names, as these names
// are string constants, not a symbols, so MathJS will not care about special characters there.
attribute: mapAttributeNames(dataSet, "", false)
}
}
})
Expand Down
65 changes: 35 additions & 30 deletions v3/src/models/data/formula-utils.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { DisplayNameMap } from "./formula-types"
import {
safeSymbolName, customizeDisplayFormula, reverseDisplayNameMap, canonicalToDisplay, makeDisplayNamesSafe,
displayToCanonical, unescapeCharactersInSafeSymbolName, escapeCharactersInSafeSymbolName
displayToCanonical, unescapeBacktickString, escapeBacktickString, safeSymbolNameFromDisplayFormula
} from "./formula-utils"

const displayNameMapExample: DisplayNameMap = {
Expand All @@ -22,8 +22,9 @@ const displayNameMapExample: DisplayNameMap = {
"Roller Coaster": {
"id": "DATA_ROLLER_COASTER",
"attribute": {
"Park": "ATTR_PARK",
"Top Speed": "ATTR_TOP_SPEED",
// \, ', and " are added to test characters escaping
"Park\"": "ATTR_PARK",
"Top\\Speed'": "ATTR_TOP_SPEED",
}
}
}
Expand Down Expand Up @@ -51,44 +52,44 @@ describe("displayToCanonical", () => {
describe("when attribute name includes special characters", () => {
const testDisplayMap: DisplayNameMap = {
localNames: {
[safeSymbolName("mean attribute 🙃")]: "LOCAL_ATTR_ATTR_MEAN",
[safeSymbolName("mean`attribute\\🙃")]: "LOCAL_ATTR_ATTR_MEAN",
},
dataSet: {}
}
it("works as long as it's enclosed in backticks", () => {
expect(displayToCanonical(
"mean(`mean attribute 🙃`) + 'mean'", testDisplayMap
"mean(`mean\\`attribute\\\\🙃`) + 'mean'", testDisplayMap
)).toEqual('mean(LOCAL_ATTR_ATTR_MEAN) + "mean"')
})
})
describe("when attribute name is provided as string constant (e.g. lookup functions)", () => {
it("is still converted correctly and names with special characters are NOT enclosed in backticks", () => {
expect(displayToCanonical(
"lookupByKey('Roller Coaster', 'Park', 'Top Speed', Order) * 2", displayNameMapExample
'lookupByKey("Roller Coaster", "Park\\"", "Top\\\\Speed\'", Order) * 2', displayNameMapExample
)).toEqual('lookupByKey("DATA_ROLLER_COASTER", "ATTR_PARK", "ATTR_TOP_SPEED", LOCAL_ATTR_ATTR_ORDER) * 2')
})
})
})

describe("unescapeCharactersInSafeSymbolName", () => {
describe("unescapeBacktickString", () => {
it("converts escaped characters in safe symbol name to original characters", () => {
expect(unescapeCharactersInSafeSymbolName("Attribute\\`Test")).toEqual("Attribute`Test")
expect(unescapeCharactersInSafeSymbolName("Attribute\\\\Test")).toEqual("Attribute\\Test")
expect(unescapeBacktickString("Attribute\\`Test")).toEqual("Attribute`Test")
expect(unescapeBacktickString("Attribute\\\\Test")).toEqual("Attribute\\Test")
})
it("is's an inverse of escapeCharactersInSafeSymbolName", () => {
it("is's an inverse of escapeBacktickString", () => {
const testString = "Attribute\\\\\\`Test"
expect(unescapeCharactersInSafeSymbolName(escapeCharactersInSafeSymbolName(testString))).toEqual(testString)
expect(unescapeBacktickString(escapeBacktickString(testString))).toEqual(testString)
})
})

describe("escapeCharactersInSafeSymbolName", () => {
describe("escapeBacktickString", () => {
it("converts some characters in safe symbol name to escaped characters", () => {
expect(escapeCharactersInSafeSymbolName("Attribute`Test")).toEqual("Attribute\\`Test")
expect(escapeCharactersInSafeSymbolName("Attribute\\Test")).toEqual("Attribute\\\\Test")
expect(escapeBacktickString("Attribute`Test")).toEqual("Attribute\\`Test")
expect(escapeBacktickString("Attribute\\Test")).toEqual("Attribute\\\\Test")
})
it("is's an inverse of unescapeCharactersInSafeSymbolName", () => {
it("is's an inverse of unescapeBacktickString", () => {
const testString = "Attribute`Test\\"
expect(unescapeCharactersInSafeSymbolName(escapeCharactersInSafeSymbolName(testString))).toEqual(testString)
expect(unescapeBacktickString(escapeBacktickString(testString))).toEqual(testString)
})
})

Expand All @@ -105,18 +106,22 @@ describe("safeSymbolName", () => {
expect(safeSymbolName("Attribute\\`Test")).toEqual("Attribute__Test")
expect(safeSymbolName("Attribute\\\\\\`Test")).toEqual("Attribute____Test")
})
it("supports `unescape` option", () => {
expect(safeSymbolName("Attribute\\Test", true)).toEqual("Attribute_Test")
expect(safeSymbolName("Attribute\\\\Test", true)).toEqual("Attribute_Test") // \\ treated as one character
expect(safeSymbolName("Attribute\\`Test", true)).toEqual("Attribute_Test") // \` treated as one character
expect(safeSymbolName("Attribute\\\\\\`Test", true)).toEqual("Attribute__Test")
})
describe("safeSymbolNameFromDisplayFormula", () => {
it("unescapes special characters and converts strings that are not parsable by Mathjs to valid symbol names", () => {
// \\ and \` treated as one character
expect(safeSymbolNameFromDisplayFormula("Attribute\\Test")).toEqual("Attribute_Test")
expect(safeSymbolNameFromDisplayFormula("Attribute\\\\Test")).toEqual("Attribute_Test")
expect(safeSymbolNameFromDisplayFormula("Attribute\\`Test")).toEqual("Attribute_Test")
expect(safeSymbolNameFromDisplayFormula("Attribute\\\\\\`Test")).toEqual("Attribute__Test")
})
})

describe("makeDisplayNamesSafe", () => {
it("replaces all the symbols enclosed between `` with safe symbol names", () => {
expect(makeDisplayNamesSafe("mean(`Attribute Name`)")).toEqual("mean(Attribute_Name)")
expect(makeDisplayNamesSafe("`Attribute Name` + `Attribute\\`Name 2`")).toEqual("Attribute_Name + Attribute_Name_2")
// \` and \\ treated as one symbol - unescaping is done in safeSymbolNameFromDisplayFormula
expect(makeDisplayNamesSafe("`Attr\\\\Name` + `Attr\\`Name 2`")).toEqual("Attr_Name + Attr_Name_2")
})
})

Expand Down Expand Up @@ -144,8 +149,8 @@ describe("reverseDisplayNameMap", () => {
ATTR_LIFE_SPAN: "LifeSpan",
ATTR_ORDER: "Order",
DATA_ROLLER_COASTER: "Roller Coaster",
ATTR_PARK: "Park",
ATTR_TOP_SPEED: "Top Speed",
ATTR_PARK: "Park\"",
ATTR_TOP_SPEED: "Top\\Speed'",
})
})
})
Expand Down Expand Up @@ -178,24 +183,24 @@ describe("canonicalToDisplay", () => {
describe("when attribute name includes special characters", () => {
const testDisplayMap: DisplayNameMap = {
localNames: {
"new mean attribute 🙃": "LOCAL_ATTR_ATTR_MEAN",
"new\\mean`attribute 🙃": "LOCAL_ATTR_ATTR_MEAN",
},
dataSet: {}
}
it("is enclosed in backticks", () => {
it("is enclosed in backticks and special characters are escaped", () => {
expect(canonicalToDisplay(
"mean(LOCAL_ATTR_ATTR_MEAN) + 'mean'",
"mean ( mean ) + 'mean'", reverseDisplayNameMap(testDisplayMap)
)).toEqual("mean ( `new mean attribute 🙃` ) + 'mean'")
)).toEqual("mean ( `new\\\\mean\\`attribute 🙃` ) + 'mean'")
})
})
describe("when attribute name is provided as string constant (e.g. lookup functions)", () => {
it("is still converted correctly and names with special characters are NOT enclosed in backticks", () => {
expect(canonicalToDisplay(
"lookupByKey('DATA_ROLLER_COASTER', 'ATTR_PARK', 'ATTR_TOP_SPEED', LOCAL_ATTR_ATTR_ORDER) * 2",
"lookupByKey('Old Roller Coaster', 'Old Park', 'Old Top Speed', OldOrder) * 2",
'lookupByKey("DATA_ROLLER_COASTER", "ATTR_PARK", "ATTR_TOP_SPEED", LOCAL_ATTR_ATTR_ORDER) * 2',
'lookupByKey("Old Roller Coaster", "Old Park", "Old Top Speed", OldOrder) * 2',
reverseDisplayNameMap(displayNameMapExample)
)).toEqual("lookupByKey('Roller Coaster', 'Park', 'Top Speed', Order) * 2")
)).toEqual('lookupByKey("Roller Coaster", "Park\\"", "Top\\\\Speed\'", Order) * 2')
})
})
})
51 changes: 27 additions & 24 deletions v3/src/models/data/formula-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,29 +26,34 @@ export const parseCanonicalSymbolName = (canonicalName: string): IFormulaDepende
return undefined
}

export const unescapeCharactersInSafeSymbolName = (name: string) =>
export const unescapeBacktickString = (name: string) =>
name.replace(/\\`/g, "`").replace(/\\\\/g, "\\")

export const escapeCharactersInSafeSymbolName = (name: string) =>
export const escapeBacktickString = (name: string) =>
name.replace(/\\/g, "\\\\").replace(/`/g, "\\`")

export const safeSymbolName = (name: string, unescape = false) => {
if (unescape) {
// Replace escaped backslash and backticks with a single character, so they're not replaced by two underscores.
name = unescapeCharactersInSafeSymbolName(name)
}
return name
export const escapeDoubleQuoteString = (constant: string) =>
constant.replace(/\\/g, "\\\\").replace(/"/g, "\\\"")

export const safeSymbolName = (name: string) =>
name
// Math.js does not allow to use symbols that start with a number, so we need to add a prefix.
.replace(/^(\d+)/, '_$1')
// We also need to escape all the symbols that are not allowed in Math.js.
.replace(/[^a-zA-Z0-9_]/g, "_")
}


export const safeSymbolNameFromDisplayFormula = (name: string) =>
// Replace escaped backslash and backticks in user-generated string with a single character, so they're not replaced
// by two underscores by safeSymbolName.
safeSymbolName(unescapeBacktickString(name))


export const makeDisplayNamesSafe = (formula: string) => {
// Names between `` are symbols that require special processing, as otherwise they could not be parsed by Mathjs,
// eg. names with spaces or names that start with a number. Also, it's necessary to ignore escaped backticks.
return formula
.replace(/(?<!\\)`((?:[^`\\]|\\.)+)`/g, (_, match) => safeSymbolName(match, true))
.replace(/(?<!\\)`((?:[^`\\]|\\.)+)`/g, (_, match) => safeSymbolNameFromDisplayFormula(match))
}

export const customizeDisplayFormula = (formula: string) => {
Expand Down Expand Up @@ -79,30 +84,25 @@ export const canonicalToDisplay = (canonical: string, originalDisplay: string, c
// function names and constants might be identical to the symbol name. E.g. 'mean(mean) + "mean"' is a valid formula
// if there's attribute called "mean". If we process function names and constants, it'll be handled correctly.
originalDisplay = makeDisplayNamesSafe(originalDisplay) // so it can be parsed by MathJS
const getNameFromId = (id: string, wrapInBackTicks: boolean) => {
let name = canonicalNameMap[id]
// Wrap in backticks if it's not a (MathJS) safe symbol name.
if (wrapInBackTicks && name && name !== safeSymbolName(name)) {
// Escape special characters in the name. It reverses the process done by safeSymbolName.
name = escapeCharactersInSafeSymbolName(name)
// Finally, wrap in backticks.
name = `\`${name}\``
}
return name || id
}
const getNameFromId = (id: string) => canonicalNameMap[id] || id
// Wrap in backticks if it's not a (MathJS) safe symbol name.
const wrapInBackticksIfNecessary = (name: string) => name !== safeSymbolName(name) ? `\`${name}\`` : name

const namesToReplace: string[] = []
const newNames: string[] = []

parse(originalDisplay).traverse((node: MathNode, path: string, parent: MathNode) => {
isNonFunctionSymbolNode(node, parent) && namesToReplace.push(node.name)
isConstantStringNode(node) && namesToReplace.push(node.value)
isConstantStringNode(node) && namesToReplace.push(escapeDoubleQuoteString(node.value))
isFunctionNode(node) && namesToReplace.push(node.fn.name)
})
parse(canonical).traverse((node: MathNode, path: string, parent: MathNode) => {
// Symbol with nonstandard characters need to be wrapped in backticks, while constants don't (as they're already
// wrapped in string quotes).
isNonFunctionSymbolNode(node, parent) && newNames.push(getNameFromId(node.name, true))
isConstantStringNode(node) && newNames.push(getNameFromId(node.value, false))
isNonFunctionSymbolNode(node, parent) && newNames.push(
wrapInBackticksIfNecessary(escapeBacktickString(getNameFromId(node.name)))
)
isConstantStringNode(node) && newNames.push(escapeDoubleQuoteString(getNameFromId(node.value)))
isFunctionNode(node) && newNames.push(node.fn.name)
})

Expand Down Expand Up @@ -141,6 +141,9 @@ export const displayToCanonical = (displayExpression: string, displayNameMap: Di
// Note that parseArguments will modify args array in place, because we're passing canonicalizeWith option.
typedFnRegistry[node.fn.name].canonicalize?.(node.args, displayNameMap)
}
if (isConstantStringNode(node)) {
node.value = escapeDoubleQuoteString(node.value)
}
}
formulaTree.traverse(visitNode)
return formulaTree.toString()
Expand Down

0 comments on commit 070740d

Please sign in to comment.