diff --git a/lib/saxes.js b/lib/saxes.js index 5ff897e4..212e1e94 100644 --- a/lib/saxes.js +++ b/lib/saxes.js @@ -161,6 +161,10 @@ function isName(name) { return NAME_RE.test(name); } +const FORBIDDEN_START = 0; +const FORBIDDEN_BRACKET = 1; +const FORBIDDEN_BRACKET_BRACKET = 2; + /** * Data structure for an XML tag. * @@ -312,6 +316,7 @@ class SaxesParser { this.chunkPosition = 0; this.i = 0; this.trailingCR = false; + this.forbiddenState = FORBIDDEN_START; /** * A map of entity name to expansion. * @@ -801,33 +806,70 @@ class SaxesParser { // for performing the ]]> check. A previous version of this code, checked // ``this.text`` for the presence of ]]>. It simplified the code but was // very costly when character data contained a lot of entities to be parsed. + // + // Since we are using a specialized loop, we also keep track of the presence + // of ]]> in text data. The sequence ]]> is forbidden to appear as-is. + // const { chunk, limit, i: start } = this; + let { forbiddenState } = this; + let nonSpace = false; let c; + // eslint-disable-next-line no-labels, no-restricted-syntax + scanLoop: while (this.i < limit) { const code = this.getCode(); - if (code === LESS || code === AMP) { + switch (code) { + case LESS: + this.state = S_OPEN_WAKA; + c = code; + forbiddenState = FORBIDDEN_START; + // eslint-disable-next-line no-labels + break scanLoop; + case AMP: + this.state = S_ENTITY; + this.entityReturnState = S_TEXT; c = code; + forbiddenState = FORBIDDEN_START; + nonSpace = true; + // eslint-disable-next-line no-labels + break scanLoop; + case CLOSE_BRACKET: + switch (forbiddenState) { + case FORBIDDEN_START: + forbiddenState = FORBIDDEN_BRACKET; + break; + case FORBIDDEN_BRACKET: + forbiddenState = FORBIDDEN_BRACKET_BRACKET; + break; + case FORBIDDEN_BRACKET_BRACKET: + break; + default: + forbiddenState = FORBIDDEN_START; + } + nonSpace = true; + break; + case GREATER: + if (forbiddenState === FORBIDDEN_BRACKET_BRACKET) { + this.fail("the string \"]]>\" is disallowed in char data."); + } + forbiddenState = FORBIDDEN_START; + nonSpace = true; break; + default: + forbiddenState = FORBIDDEN_START; + if (!isS(code)) { + nonSpace = true; + } } } + this.forbiddenState = forbiddenState; // This is faster than adding codepoints one by one. - const slice = chunk.substring(start, - c === undefined ? undefined : - (this.i - (c <= 0xFFFF ? 1 : 2))); - - // We test for the presence of ]]>, which is not allowed in CharData. We - // have to take into account edge conditions. - if (slice.includes("]]>") || - (slice[0] === ">" && this.text.endsWith("]]")) || - (slice.startsWith("]>") && this.text.endsWith("]"))) { - this.fail("the string \"]]>\" is disallowed in char data."); - } + this.text += chunk.substring(start, + c === undefined ? undefined : + (this.i - (c <= 0xFFFF ? 1 : 2))); - this.text += slice; - - if ((!this.sawRoot || this.closedRoot) && - (/\S/.test(slice) || c === AMP)) { + if (nonSpace && (!this.sawRoot || this.closedRoot)) { // We use the reportedTextBeforeRoot and reportedTextAfterRoot flags // to avoid reporting errors for every single character that is out of // place. @@ -841,17 +883,6 @@ class SaxesParser { this.reportedTextAfterRoot = true; } } - - switch (c) { - case LESS: - this.state = S_OPEN_WAKA; - break; - case AMP: - this.state = S_ENTITY; - this.entityReturnState = S_TEXT; - break; - default: - } } /** @private */ diff --git a/test/wrong-cdata-closure.js b/test/wrong-cdata-closure.js index 37d74889..d6813542 100644 --- a/test/wrong-cdata-closure.js +++ b/test/wrong-cdata-closure.js @@ -19,7 +19,7 @@ describe("wrong cdata closure", () => { isSelfClosing: false, }], ["error", - "undefined:1:23: the string \"]]>\" is disallowed in char data."], + "undefined:1:19: the string \"]]>\" is disallowed in char data."], ["text", "somethingx]]>moo"], ["closetag", { name: "span", @@ -83,7 +83,7 @@ describe("wrong cdata closure", () => { isSelfClosing: false, }], ["error", - "undefined:1:20: the string \"]]>\" is disallowed in char data."], + "undefined:1:19: the string \"]]>\" is disallowed in char data."], ["text", "somethingx]]>moo"], ["closetag", { name: "span",