Skip to content

Commit

Permalink
fix: check that the characters we read are valid char data
Browse files Browse the repository at this point in the history
This required using String.fromCodePoint and .codePointAt. Unfortunately, they
slow down processing quite a bit.
  • Loading branch information
lddubeau committed Jul 6, 2018
1 parent 4fd67a1 commit 7611a85
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 20 deletions.
37 changes: 19 additions & 18 deletions lib/saxes.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,6 @@ const XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/";
const ENTITY_START_CHAR =
new RegExp(`^[${ED5.fragments.NAME_START_CHAR}#]$`, "u");

// This implementation works on strings, a single character at a time as such,
// it cannot ever support astral-plane characters (10000-EFFFF) without a
// significant breaking change to either this parser, or the JavaScript
// language. Implementation of an emoji-capable xml parser is left as an
// exercise for the reader.

const rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE };

const XML_ENTITIES = {
Expand Down Expand Up @@ -223,28 +217,29 @@ class SAXParser {
if (typeof chunk === "object") {
chunk = chunk.toString();
}

let i = 0;
let c = "";
// eslint-disable-next-line no-constant-condition
while (true) {
c = chunk[i++] || "";
const limit = chunk.length;
while (i < limit) {
let c = String.fromCodePoint(chunk.codePointAt(i));
i += c.length;
this.c = c;

if (!c) {
break;
}

if (this.trackPosition) {
this.position++;
this.position += c.length;
if (c === "\n") {
this.line++;
this.column = 0;
}
else {
this.column++;
this.column += c.length;
}
}

if (!CHAR.test(c)) {
this.fail("Invalid character");
}

switch (this.state) {
case S_BEGIN:
this.state = S_BEGIN_WHITESPACE;
Expand All @@ -269,8 +264,14 @@ class SAXParser {
case S_TEXT:
if (this.sawRoot && !this.closedRoot) {
const starti = i - 1;
while (c && c !== "<" && c !== "&") {
c = chunk[i++] || "";
while (i < limit && c !== "<" && c !== "&") {
c = String.fromCodePoint(chunk.codePointAt(i));
i += c.length;

if (!CHAR.test(c)) {
this.fail("Invalid character");
}

if (c && this.trackPosition) {
this.position++;
if (c === "\n") {
Expand Down
2 changes: 1 addition & 1 deletion test/issue-86.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ require(".").test({
],
[
"error",
"Unexpected end\nLine: 0\nColumn: 20\nChar: ",
"Unexpected end\nLine: 0\nColumn: 20\nChar: f",
],
],
opt: {},
Expand Down
2 changes: 1 addition & 1 deletion test/unclosed-root.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ require(".").test({
],
[
"error",
"Unclosed root tag\nLine: 0\nColumn: 6\nChar: ",
"Unclosed root tag\nLine: 0\nColumn: 6\nChar: >",
],
],
opt: {},
Expand Down

0 comments on commit 7611a85

Please sign in to comment.