Skip to content

Commit

Permalink
perf: remove skipWhitespace
Browse files Browse the repository at this point in the history
``skipWhitespace`` had a perverse effect on performance. In general, when
parsing a clean document, it faster to assume there are not extraneous spaces.
  • Loading branch information
lddubeau committed Aug 23, 2018
1 parent 04855d6 commit c8b7ae2
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 36 deletions.
25 changes: 25 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,31 @@ continually check whether it may emit non-error events and this would have a
measurable cost even when parsing well-formed documents. We decided always emit
the events anyway and diverge a bit from the XML specifications.

## Optimize for "Clean" XML

This is well-formed XML:

```xml
<foo a="1" b="2" c="3">something</foo>
```

This is also well-formed and represents the same document as the previous
example:

```xml
<foo a = "1"


b = "2"

c = "3" >something</foo >


```

We want both documents to be parsed without error by saxes but, when writing
code, optimize for the former rather than the later.

## Pay Attention to Performance

The more a PR harms performance,
Expand Down
53 changes: 17 additions & 36 deletions lib/saxes.js
Original file line number Diff line number Diff line change
Expand Up @@ -656,29 +656,6 @@ class SaxesParser {
return undefined;
}

/**
* Skip whitespace characters.
*
* @private
*
* @param {ChunkState} chunkState The current chunk state.
*
* @return {string|undefined} The character that made the test fail, or
* ``undefined`` if we hit the end of the chunk.
*/
skipWhitespace(chunkState) {
const { limit } = chunkState;
while (chunkState.i < limit) {
const c = this.getCode(chunkState);
if (!isS(c)) {
return c;
}
}

return undefined;
}


// STATE HANDLERS

/** @private */
Expand Down Expand Up @@ -1084,31 +1061,31 @@ class SaxesParser {
}
break;
case S_XML_DECL_EQ:
c = this.skipWhitespace(chunkState);
c = this.getCode(chunkState);
// The question mark character is not valid inside any of the XML
// declaration name/value pairs.
if (c === QUESTION) {
this.state = S_PI_ENDING;
return;
}

if (c) {
if (c && !isS(c)) {
if (c !== EQUAL) {
this.fail("value required.");
}
this.xmlDeclState = S_XML_DECL_VALUE_START;
}
break;
case S_XML_DECL_VALUE_START:
c = this.skipWhitespace(chunkState);
c = this.getCode(chunkState);
// The question mark character is not valid inside any of the XML
// declaration name/value pairs.
if (c === QUESTION) {
this.state = S_PI_ENDING;
return;
}

if (c) {
if (c && !isS(c)) {
if (!isQuote(c)) {
this.fail("value must be quoted.");
this.q = SPACE;
Expand Down Expand Up @@ -1170,11 +1147,11 @@ class SaxesParser {
}
}
else if (this.piBody.length === 0) {
c = this.skipWhitespace(chunkState);
c = this.getCode(chunkState);
if (c === QUESTION) {
this.state = S_PI_ENDING;
}
else if (c) {
else if (c && !isS(c)) {
this.piBody = String.fromCodePoint(c);
}
}
Expand Down Expand Up @@ -1292,8 +1269,8 @@ class SaxesParser {

/** @private */
sAttrib(chunkState) {
const c = this.skipWhitespace(chunkState);
if (!c) {
const c = this.getCode(chunkState);
if (!c || isS(c)) {
return;
}
if (isNameStartChar(c)) {
Expand Down Expand Up @@ -1336,7 +1313,11 @@ class SaxesParser {

/** @private */
sAttribNameSawWhite(chunkState) {
const c = this.skipWhitespace(chunkState);
const c = this.getCode(chunkState);
if (isS(c)) {
return;
}

if (c === EQUAL) {
this.state = S_ATTRIB_VALUE;
}
Expand All @@ -1361,12 +1342,12 @@ class SaxesParser {

/** @private */
sAttribValue(chunkState) {
const c = this.skipWhitespace(chunkState);
const c = this.getCode(chunkState);
if (isQuote(c)) {
this.q = c;
this.state = S_ATTRIB_VALUE_QUOTED;
}
else if (c) {
else if (c && !isS(c)) {
this.fail("unquoted attribute value.");
this.state = S_ATTRIB_VALUE_UNQUOTED;
this.attribValue = String.fromCodePoint(c);
Expand Down Expand Up @@ -1470,11 +1451,11 @@ class SaxesParser {

/** @private */
sCloseTagSawWhite(chunkState) {
const c = this.skipWhitespace(chunkState);
const c = this.getCode(chunkState);
if (c === GREATER) {
this.closeTag();
}
else if (c) {
else if (c && !isS(c)) {
this.fail("disallowed character in closing tag.");
}
}
Expand Down

0 comments on commit c8b7ae2

Please sign in to comment.