Skip to content

Commit

Permalink
perf: improve text node checking speed
Browse files Browse the repository at this point in the history
We used to carve a fragment out of the node and check that, but it was clostly
to do. We now keep track of an index instead.
  • Loading branch information
lddubeau committed Aug 15, 2018
1 parent 7cfa4e2 commit f270e8b
Showing 1 changed file with 19 additions and 23 deletions.
42 changes: 19 additions & 23 deletions lib/saxes.js
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,9 @@ exports.EVENTS = [
];

const buffers = [
"comment", "openWakaBang", "textNode", "textFragments", "tagName", "doctype",
"piTarget", "piBody", "entity", "attribName", "attribValue", "cdata",
"xmlDeclName", "xmlDeclValue",
"comment", "openWakaBang", "textNode", "tagName", "doctype", "piTarget",
"piBody", "entity", "attribName", "attribValue", "cdata", "xmlDeclName",
"xmlDeclValue",
];

const NL = 0xA;
Expand Down Expand Up @@ -267,6 +267,10 @@ class SaxesParser {
this.requiredSeparator = undefined;
this.entityBufferName = undefined;
this.entityReturnState = undefined;
// This records the index before which we don't have to check for the
// presence of ]]]>. The text before that index has been checked already,
// and should not be checked twice.
this.textNodeCheckedBefore = 0;

// namespaces form a prototype chain.
// it always points at the current tag,
Expand Down Expand Up @@ -626,27 +630,19 @@ class SaxesParser {
this.reportedTextBeforeRoot = true;
}
this.textNode = String.fromCodePoint(c);
this.textNodeCheckedBefore = 0;
this.state = S_TEXT;
this.xmlDeclPossible = false;
}
}

/** @private */
sText(chunkState) {
const start = this.textNode.length;
const c = this.captureWhile(chunkState,
cx => cx !== LESS && cx !== AMP,
"textNode");

const fragment = this.textNode.slice(start);

// textFragments is a buffer we use to check for the precence of a
// literal "]]>" in text nodes. We cannot do the check against textNode
// itself because textNode will contain resolved entities so "]]>"
// would turn to "]]>" in textNode and raise a false error.
this.textFragments += fragment;

if (!this.inRoot && /\S/.test(fragment)) {
if (!this.inRoot && /\S/.test(this.textNode)) {
// We use the reportedTextBeforeRoot and reportedTextAfterRoot flags
// to avoid reporting errors for every single character that is out of
// place.
Expand All @@ -661,20 +657,19 @@ class SaxesParser {
}
}

// We also have to check the end of textFragments because some cases may
// slip through otherwise. For instance, if client code writes
// char-by-char. Then fragment will never contain ]]> but instead we'll have
// 3 fragments one with "]", a second with "]" and a third with ">".
if (this.textFragments.includes("]]>")) {
this.textFragments = "";
if (this.textNode.includes("]]>", this.textNodeCheckedBefore)) {
this.fail("the string \"]]>\" is disallowed in char data.");
}

// We have to go back two spaces so that we can catch the case where on a
// previous write call, the textNode buffer ended on ``]]`` and we started
// with ``>`` this time around.
this.textNodeCheckedBefore = this.textNode.length - 2;

switch (c) {
case LESS:
this.state = S_OPEN_WAKA;
this.startTagPosition = this.position;
this.textFragments = "";
break;
case AMP:
// We use the reportedTextBeforeRoot and reportedTextAfterRoot flags to
Expand All @@ -692,9 +687,6 @@ class SaxesParser {
this.state = S_ENTITY;
this.entityBufferName = "textNode";
this.entityReturnState = S_TEXT;
// If we run into an entity, then necessarily we do not have a "]]>"
// literal. So we flush this.textFragments.
this.textFragments = "";
break;
default:
}
Expand Down Expand Up @@ -1472,6 +1464,9 @@ class SaxesParser {
}
else if (c === SEMICOLON) {
this[this.entityBufferName] += this.parseEntity();
if (this.entityBufferName === "textNode") {
this.textNodeCheckedBefore = this.textNode.length;
}
this.entity = "";
this.state = this.entityReturnState;
}
Expand Down Expand Up @@ -1522,6 +1517,7 @@ class SaxesParser {
this.ontext(this.textNode);
}
this.textNode = "";
this.textNodeCheckedBefore = 0;
}

/**
Expand Down

0 comments on commit f270e8b

Please sign in to comment.