diff --git a/benchmark/url/legacy-vs-whatwg-url-searchparams-parse.js b/benchmark/url/legacy-vs-whatwg-url-searchparams-parse.js index 86714df6c196a7..b4a80af4e5eabd 100644 --- a/benchmark/url/legacy-vs-whatwg-url-searchparams-parse.js +++ b/benchmark/url/legacy-vs-whatwg-url-searchparams-parse.js @@ -7,7 +7,7 @@ const inputs = require('../fixtures/url-inputs.js').searchParams; const bench = common.createBenchmark(main, { type: Object.keys(inputs), method: ['legacy', 'whatwg'], - n: [1e5] + n: [1e6] }); function useLegacy(n, input) { diff --git a/lib/internal/url.js b/lib/internal/url.js index 0e43364a792ae1..7a6ff227ed4191 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -1,7 +1,11 @@ 'use strict'; const util = require('util'); -const { hexTable, StorageObject } = require('internal/querystring'); +const { + hexTable, + isHexTable, + StorageObject +} = require('internal/querystring'); const binding = process.binding('url'); const context = Symbol('context'); const cannotBeBase = Symbol('cannot-be-base'); @@ -575,23 +579,106 @@ function initSearchParams(url, init) { url[searchParams] = []; return; } - url[searchParams] = getParamsFromObject(querystring.parse(init)); + url[searchParams] = parseParams(init); } -function getParamsFromObject(obj) { - const keys = Object.keys(obj); - const values = []; - for (var i = 0; i < keys.length; i++) { - const name = keys[i]; - const value = obj[name]; - if (Array.isArray(value)) { - for (const item of value) - values.push(name, item); - } else { - values.push(name, value); +// application/x-www-form-urlencoded parser +// Ref: https://url.spec.whatwg.org/#concept-urlencoded-parser +function parseParams(qs) { + const out = []; + var pairStart = 0; + var lastPos = 0; + var seenSep = false; + var buf = ''; + var encoded = false; + var encodeCheck = 0; + var i; + for (i = 0; i < qs.length; ++i) { + const code = qs.charCodeAt(i); + + // Try matching key/value pair separator + if (code === 38/*&*/) { + if (pairStart === i) { + // We saw an empty substring between pair separators + lastPos = pairStart = i + 1; + continue; + } + + if (lastPos < i) + buf += qs.slice(lastPos, i); + if (encoded) + buf = querystring.unescape(buf); + out.push(buf); + + // If `buf` is the key, add an empty value. + if (!seenSep) + out.push(''); + + seenSep = false; + buf = ''; + encoded = false; + encodeCheck = 0; + lastPos = pairStart = i + 1; + continue; + } + + // Try matching key/value separator (e.g. '=') if we haven't already + if (!seenSep && code === 61/*=*/) { + // Key/value separator match! + if (lastPos < i) + buf += qs.slice(lastPos, i); + if (encoded) + buf = querystring.unescape(buf); + out.push(buf); + + seenSep = true; + buf = ''; + encoded = false; + encodeCheck = 0; + lastPos = i + 1; + continue; + } + + // Handle + and percent decoding. + if (code === 43/*+*/) { + if (lastPos < i) + buf += qs.slice(lastPos, i); + buf += ' '; + lastPos = i + 1; + } else if (!encoded) { + // Try to match an (valid) encoded byte (once) to minimize unnecessary + // calls to string decoding functions + if (code === 37/*%*/) { + encodeCheck = 1; + } else if (encodeCheck > 0) { + // eslint-disable-next-line no-extra-boolean-cast + if (!!isHexTable[code]) { + if (++encodeCheck === 3) + encoded = true; + } else { + encodeCheck = 0; + } + } } } - return values; + + // Deal with any leftover key or value data + + // There is a trailing &. No more processing is needed. + if (pairStart === i) + return out; + + if (lastPos < i) + buf += qs.slice(lastPos, i); + if (encoded) + buf = querystring.unescape(buf); + out.push(buf); + + // If `buf` is the key, add an empty value. + if (!seenSep) + out.push(''); + + return out; } // Adapted from querystring's implementation. diff --git a/test/fixtures/url-searchparams.js b/test/fixtures/url-searchparams.js new file mode 100644 index 00000000000000..3b186fc97bc38b --- /dev/null +++ b/test/fixtures/url-searchparams.js @@ -0,0 +1,68 @@ +module.exports = [ + ['', '', []], + [ + 'foo=918854443121279438895193', + 'foo=918854443121279438895193', + [['foo', '918854443121279438895193']] + ], + ['foo=bar', 'foo=bar', [['foo', 'bar']]], + ['foo=bar&foo=quux', 'foo=bar&foo=quux', [['foo', 'bar'], ['foo', 'quux']]], + ['foo=1&bar=2', 'foo=1&bar=2', [['foo', '1'], ['bar', '2']]], + [ + "my%20weird%20field=q1!2%22'w%245%267%2Fz8)%3F", + 'my+weird+field=q1%212%22%27w%245%267%2Fz8%29%3F', + [['my weird field', 'q1!2"\'w$5&7/z8)?']] + ], + ['foo%3Dbaz=bar', 'foo%3Dbaz=bar', [['foo=baz', 'bar']]], + ['foo=baz=bar', 'foo=baz%3Dbar', [['foo', 'baz=bar']]], + [ + 'str=foo&arr=1&somenull&arr=2&undef=&arr=3', + 'str=foo&arr=1&somenull=&arr=2&undef=&arr=3', + [ + ['str', 'foo'], + ['arr', '1'], + ['somenull', ''], + ['arr', '2'], + ['undef', ''], + ['arr', '3'] + ] + ], + [' foo = bar ', '+foo+=+bar+', [[' foo ', ' bar ']]], + ['foo=%zx', 'foo=%25zx', [['foo', '%zx']]], + ['foo=%EF%BF%BD', 'foo=%EF%BF%BD', [['foo', '\ufffd']]], + // See: https://github.com/joyent/node/issues/3058 + ['foo&bar=baz', 'foo=&bar=baz', [['foo', ''], ['bar', 'baz']]], + ['a=b&c&d=e', 'a=b&c=&d=e', [['a', 'b'], ['c', ''], ['d', 'e']]], + ['a=b&c=&d=e', 'a=b&c=&d=e', [['a', 'b'], ['c', ''], ['d', 'e']]], + ['a=b&=c&d=e', 'a=b&=c&d=e', [['a', 'b'], ['', 'c'], ['d', 'e']]], + ['a=b&=&d=e', 'a=b&=&d=e', [['a', 'b'], ['', ''], ['d', 'e']]], + ['&&foo=bar&&', 'foo=bar', [['foo', 'bar']]], + ['&', '', []], + ['&&&&', '', []], + ['&=&', '=', [['', '']]], + ['&=&=', '=&=', [['', ''], ['', '']]], + ['=', '=', [['', '']]], + ['+', '+=', [[' ', '']]], + ['+=', '+=', [[' ', '']]], + ['=+', '=+', [['', ' ']]], + ['+=&', '+=', [[' ', '']]], + ['a&&b', 'a=&b=', [['a', ''], ['b', '']]], + ['a=a&&b=b', 'a=a&b=b', [['a', 'a'], ['b', 'b']]], + ['&a', 'a=', [['a', '']]], + ['&=', '=', [['', '']]], + ['a&a&', 'a=&a=', [['a', ''], ['a', '']]], + ['a&a&a&', 'a=&a=&a=', [['a', ''], ['a', ''], ['a', '']]], + ['a&a&a&a&', 'a=&a=&a=&a=', [['a', ''], ['a', ''], ['a', ''], ['a', '']]], + ['a=&a=value&a=', 'a=&a=value&a=', [['a', ''], ['a', 'value'], ['a', '']]], + ['foo%20bar=baz%20quux', 'foo+bar=baz+quux', [['foo bar', 'baz quux']]], + ['+foo=+bar', '+foo=+bar', [[' foo', ' bar']]], + [ + // fake percent encoding + 'foo=%©ar&baz=%A©uux&xyzzy=%©ud', + 'foo=%25%C2%A9ar&baz=%25A%C2%A9uux&xyzzy=%25%C2%A9ud', + [['foo', '%©ar'], ['baz', '%A©uux'], ['xyzzy', '%©ud']] + ], + // always preserve order of key-value pairs + ['a=1&b=2&a=3', 'a=1&b=2&a=3', [['a', '1'], ['b', '2'], ['a', '3']]], + ['?a', '%3Fa=', [['?a', '']]] +]; diff --git a/test/parallel/test-whatwg-url-searchparams.js b/test/parallel/test-whatwg-url-searchparams.js index 7d6df646407269..c7acb7d909d98c 100644 --- a/test/parallel/test-whatwg-url-searchparams.js +++ b/test/parallel/test-whatwg-url-searchparams.js @@ -1,8 +1,9 @@ 'use strict'; -require('../common'); +const common = require('../common'); const assert = require('assert'); -const URL = require('url').URL; +const path = require('path'); +const { URL, URLSearchParams } = require('url'); // Tests below are not from WPT. const serialized = 'a=a&a=1&a=true&a=undefined&a=null&a=%EF%BF%BD' + @@ -77,3 +78,27 @@ assert.throws(() => sp.forEach(1), m.search = '?a=a&b=b'; assert.strictEqual(sp.toString(), 'a=a&b=b'); + +const tests = require(path.join(common.fixturesDir, 'url-searchparams.js')); + +for (const [input, expected, parsed] of tests) { + if (input[0] !== '?') { + const sp = new URLSearchParams(input); + assert.strictEqual(String(sp), expected); + assert.deepStrictEqual(Array.from(sp), parsed); + + m.search = input; + assert.strictEqual(String(m.searchParams), expected); + assert.deepStrictEqual(Array.from(m.searchParams), parsed); + } + + { + const sp = new URLSearchParams(`?${input}`); + assert.strictEqual(String(sp), expected); + assert.deepStrictEqual(Array.from(sp), parsed); + + m.search = `?${input}`; + assert.strictEqual(String(m.searchParams), expected); + assert.deepStrictEqual(Array.from(m.searchParams), parsed); + } +}