Skip to content

Commit

Permalink
querystring: improve parse() performance
Browse files Browse the repository at this point in the history
PR-URL: #10874
Reviewed-By: James M Snell <[email protected]>
  • Loading branch information
mscdex authored and italoacasas committed Jan 30, 2017
1 parent d13aba8 commit 53421b1
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 63 deletions.
33 changes: 12 additions & 21 deletions benchmark/querystring/querystring-parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,26 @@ var common = require('../common.js');
var querystring = require('querystring');
var v8 = require('v8');

var types = [
'noencode',
'multicharsep',
'encodemany',
'encodelast',
'multivalue',
'multivaluemany',
'manypairs'
];
var inputs = {
noencode: 'foo=bar&baz=quux&xyzzy=thud',
multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud',
encodefake: 'foo=%©ar&baz=%A©uux&xyzzy=%©ud',
encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d',
encodelast: 'foo=bar&baz=quux&xyzzy=thu%64',
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz',
multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' +
'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz',
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z'
};

var bench = common.createBenchmark(main, {
type: types,
type: Object.keys(inputs),
n: [1e6],
});

function main(conf) {
var type = conf.type;
var n = conf.n | 0;

var inputs = {
noencode: 'foo=bar&baz=quux&xyzzy=thud',
multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud',
encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d',
encodelast: 'foo=bar&baz=quux&xyzzy=thu%64',
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz',
multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' +
'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz',
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z'
};
var input = inputs[type];

// Force-optimize querystring.parse() so that the benchmark doesn't get
Expand Down
104 changes: 62 additions & 42 deletions lib/querystring.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,19 @@
// Query String Utilities

'use strict';

const QueryString = exports;
const QueryString = module.exports = {
unescapeBuffer,
// `unescape()` is a JS global, so we need to use a different local name
unescape: qsUnescape,

// `escape()` is a JS global, so we need to use a different local name
escape: qsEscape,

stringify,
encode: stringify,

parse,
decode: parse
};
const Buffer = require('buffer').Buffer;

// This constructor is used to store parsed query string values. Instantiating
Expand All @@ -13,7 +24,7 @@ ParsedQueryString.prototype = Object.create(null);


// a safe fast alternative to decodeURIComponent
QueryString.unescapeBuffer = function(s, decodeSpaces) {
function unescapeBuffer(s, decodeSpaces) {
var out = Buffer.allocUnsafe(s.length);
var state = 0;
var n, m, hexchar;
Expand Down Expand Up @@ -77,7 +88,7 @@ QueryString.unescapeBuffer = function(s, decodeSpaces) {
// TODO support returning arbitrary buffers.

return out.slice(0, outIndex - 1);
};
}


function qsUnescape(s, decodeSpaces) {
Expand All @@ -87,13 +98,12 @@ function qsUnescape(s, decodeSpaces) {
return QueryString.unescapeBuffer(s, decodeSpaces).toString();
}
}
QueryString.unescape = qsUnescape;


var hexTable = new Array(256);
for (var i = 0; i < 256; ++i)
hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase();
QueryString.escape = function(str) {
function qsEscape(str) {
// replaces encodeURIComponent
// http://www.ecma-international.org/ecma-262/5.1/#sec-15.1.3.4
if (typeof str !== 'string') {
Expand Down Expand Up @@ -164,20 +174,20 @@ QueryString.escape = function(str) {
if (lastPos < str.length)
return out + str.slice(lastPos);
return out;
};
}

var stringifyPrimitive = function(v) {
function stringifyPrimitive(v) {
if (typeof v === 'string')
return v;
if (typeof v === 'number' && isFinite(v))
return '' + v;
if (typeof v === 'boolean')
return v ? 'true' : 'false';
return '';
};
}


QueryString.stringify = QueryString.encode = function(obj, sep, eq, options) {
function stringify(obj, sep, eq, options) {
sep = sep || '&';
eq = eq || '=';

Expand Down Expand Up @@ -215,34 +225,43 @@ QueryString.stringify = QueryString.encode = function(obj, sep, eq, options) {
return fields;
}
return '';
};
}

// Parse a key/val string.
QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
sep = sep || '&';
eq = eq || '=';
function charCodes(str) {
if (str.length === 0) return [];
if (str.length === 1) return [str.charCodeAt(0)];
const ret = [];
for (var i = 0; i < str.length; ++i)
ret[ret.length] = str.charCodeAt(i);
return ret;
}
const defSepCodes = [38]; // &
const defEqCodes = [61]; // =

// Parse a key/val string.
function parse(qs, sep, eq, options) {
const obj = new ParsedQueryString();

if (typeof qs !== 'string' || qs.length === 0) {
return obj;
}

if (typeof sep !== 'string')
sep += '';

const eqLen = eq.length;
const sepLen = sep.length;
var sepCodes = (!sep ? defSepCodes : charCodes(sep + ''));
var eqCodes = (!eq ? defEqCodes : charCodes(eq + ''));
const sepLen = sepCodes.length;
const eqLen = eqCodes.length;

var maxKeys = 1000;
var pairs = 1000;
if (options && typeof options.maxKeys === 'number') {
maxKeys = options.maxKeys;
// -1 is used in place of a value like Infinity for meaning
// "unlimited pairs" because of additional checks V8 (at least as of v5.4)
// has to do when using variables that contain values like Infinity. Since
// `pairs` is always decremented and checked explicitly for 0, -1 works
// effectively the same as Infinity, while providing a significant
// performance boost.
pairs = (options.maxKeys > 0 ? options.maxKeys : -1);
}

var pairs = Infinity;
if (maxKeys > 0)
pairs = maxKeys;

var decode = QueryString.unescape;
if (options && typeof options.decodeURIComponent === 'function') {
decode = options.decodeURIComponent;
Expand All @@ -262,7 +281,7 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
const code = qs.charCodeAt(i);

// Try matching key/value pair separator (e.g. '&')
if (code === sep.charCodeAt(sepIdx)) {
if (code === sepCodes[sepIdx]) {
if (++sepIdx === sepLen) {
// Key/value pair separator match!
const end = i - sepIdx + 1;
Expand All @@ -284,10 +303,10 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
keys[keys.length] = key;
} else {
const curValue = obj[key];
// `instanceof Array` is used instead of Array.isArray() because it
// is ~15-20% faster with v8 4.7 and is safe to use because we are
// using it with values being created within this function
if (curValue instanceof Array)
// A simple Array-specific property check is enough here to
// distinguish from a string value and is faster and still safe since
// we are generating all of the values being assigned.
if (curValue.pop)
curValue[curValue.length] = value;
else
obj[key] = [curValue, value];
Expand Down Expand Up @@ -322,7 +341,7 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {

// Try matching key/value separator (e.g. '=') if we haven't already
if (eqIdx < eqLen) {
if (code === eq.charCodeAt(eqIdx)) {
if (code === eqCodes[eqIdx]) {
if (++eqIdx === eqLen) {
// Key/value separator match!
const end = i - eqIdx + 1;
Expand Down Expand Up @@ -354,12 +373,12 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {

if (code === 43/*+*/) {
if (eqIdx < eqLen) {
if (i - lastPos > 0)
if (lastPos < i)
key += qs.slice(lastPos, i);
key += '%20';
keyEncoded = true;
} else {
if (i - lastPos > 0)
if (lastPos < i)
value += qs.slice(lastPos, i);
value += '%20';
valEncoded = true;
Expand All @@ -369,7 +388,7 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
}

// Check if we have leftover key or value data
if (pairs > 0 && (lastPos < qs.length || eqIdx > 0)) {
if (pairs !== 0 && (lastPos < qs.length || eqIdx > 0)) {
if (lastPos < qs.length) {
if (eqIdx < eqLen)
key += qs.slice(lastPos);
Expand All @@ -387,22 +406,23 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
keys[keys.length] = key;
} else {
const curValue = obj[key];
// `instanceof Array` is used instead of Array.isArray() because it
// is ~15-20% faster with v8 4.7 and is safe to use because we are
// using it with values being created within this function
if (curValue instanceof Array)
// A simple Array-specific property check is enough here to
// distinguish from a string value and is faster and still safe since
// we are generating all of the values being assigned.
if (curValue.pop)
curValue[curValue.length] = value;
else
obj[key] = [curValue, value];
}
}

return obj;
};
}


// v8 does not optimize functions with try-catch blocks, so we isolate them here
// to minimize the damage
// to minimize the damage (Note: no longer true as of V8 5.4 -- but still will
// not be inlined).
function decodeStr(s, decoder) {
try {
return decoder(s);
Expand Down

1 comment on commit 53421b1

@colelawrence
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very neat

Please sign in to comment.