From eaffcf9f1ac98247df44a4f8cf0a0f175b40183e Mon Sep 17 00:00:00 2001 From: James M Snell Date: Tue, 31 May 2016 11:52:19 -0700 Subject: [PATCH] url: adding WHATWG URL support Implements WHATWG URL support. Example: ``` var u = new url.URL('http://example.org'); ``` Currently passing all WHATWG url parsing tests and all but two of the setter tests. The two setter tests are intentionally skipped for now but will be revisited. --- benchmark/url/new-url-parse.js | 57 + lib/internal/url.js | 629 ++++++++ lib/url.js | 70 +- node.gyp | 2 + src/node_i18n.cc | 12 +- src/node_i18n.h | 7 + src/node_url.cc | 1406 +++++++++++++++++ src/node_url.h | 538 +++++++ test/fixtures/url-setter-tests.json | 1134 +++++++++++++ test/parallel/test-whatwg-url-parsing.js | 122 ++ test/parallel/test-whatwg-url-searchparams.js | 36 + test/parallel/test-whatwg-url-setters.js | 24 + 12 files changed, 3965 insertions(+), 72 deletions(-) create mode 100644 benchmark/url/new-url-parse.js create mode 100644 lib/internal/url.js create mode 100644 src/node_url.cc create mode 100644 src/node_url.h create mode 100644 test/fixtures/url-setter-tests.json create mode 100644 test/parallel/test-whatwg-url-parsing.js create mode 100644 test/parallel/test-whatwg-url-searchparams.js create mode 100644 test/parallel/test-whatwg-url-setters.js diff --git a/benchmark/url/new-url-parse.js b/benchmark/url/new-url-parse.js new file mode 100644 index 00000000000000..ef60e81847682b --- /dev/null +++ b/benchmark/url/new-url-parse.js @@ -0,0 +1,57 @@ +'use strict'; +const common = require('../common.js'); +const url = require('url'); +const v8 = require('v8'); + +const bench = common.createBenchmark(main, { + type: 'one two three four five'.split(' '), + method: ['old', 'new'], + n: [25e4] +}); + +function useOld(n, input) { + // Force-optimize url.parse() so that the benchmark doesn't get + // disrupted by the optimizer kicking in halfway through. + url.parse(input); + v8.setFlagsFromString('--allow_natives_syntax'); + eval('%OptimizeFunctionOnNextCall(url.parse)'); + + bench.start(); + for (var i = 0; i < n; i += 1) + url.parse(input); + bench.end(n); +} + +function useNew(n, input) { + bench.start(); + for (var i = 0; i < n; i += 1) + new url.URL(input); + bench.end(n); +} + +function main(conf) { + const type = conf.type; + const n = conf.n | 0; + const method = conf.method; + + var inputs = { + one: 'http://nodejs.org/docs/latest/api/url.html#url_url_format_urlobj', + two: 'http://blog.nodejs.org/', + three: 'https://encrypted.google.com/search?q=url&q=site:npmjs.org&hl=en', + four: 'javascript:alert("node is awesome");', + //five: 'some.ran/dom/url.thing?oh=yes#whoo', + five: 'https://user:pass@example.com/', + }; + var input = inputs[type] || ''; + + switch (method) { + case 'old': + useOld(n, input); + break; + case 'new': + useNew(n, input); + break; + default: + throw new Error('Unknown method'); + } +} diff --git a/lib/internal/url.js b/lib/internal/url.js new file mode 100644 index 00000000000000..79b9e1cb0f46bb --- /dev/null +++ b/lib/internal/url.js @@ -0,0 +1,629 @@ +'use strict'; + +function getPunycode() { + try { + return process.binding('icu'); + } catch (err) { + return require('punycode'); + } +} +const punycode = getPunycode(); +const binding = process.binding('url'); +const context = Symbol('context'); +const cannotBeBase = Symbol('cannot-be-base'); +const special = Symbol('special'); +const searchParams = Symbol('query'); +const querystring = require('querystring'); + +const kScheme = Symbol('scheme'); +const kHost = Symbol('host'); +const kPort = Symbol('port'); +const kDomain = Symbol('domain'); + +function StorageObject() {} +StorageObject.prototype = Object.create(null); + +class OpaqueOrigin { + toString() { + return 'null'; + } + + get effectiveDomain() { + return this; + } +} + +class TupleOrigin { + constructor(scheme, host, port, domain) { + this[kScheme] = scheme; + this[kHost] = host; + this[kPort] = port; + this[kDomain] = domain; + } + + get scheme() { + return this[kScheme]; + } + + get host() { + return this[kHost]; + } + + get port() { + return this[kPort]; + } + + get domain() { + return this[kDomain]; + } + + get effectiveDomain() { + return this[kDomain] || this[kHost]; + } + + toString(unicode = false) { + var result = this.scheme; + result += '://'; + result += unicode ? URL.domainToUnicode(this.host) : this.host; + if (this.port !== undefined && this.port !== null) + result += `:${this.port}`; + return result; + } +} + +class URL { + constructor(input, base) { + if (base !== undefined && !(base instanceof URL)) + base = new URL(String(base)); + input = String(input); + const base_context = base ? base[context] : undefined; + this[context] = new StorageObject(); + binding.parse(input.trim(), -1, base_context, undefined, + (flags, protocol, username, password, + host, port, path, query, fragment) => { + if (flags & binding.URL_FLAGS_FAILED) + throw new TypeError('Invalid URL'); + this[context].flags = flags; + this[context].scheme = protocol; + this[context].username = username; + this[context].password = password; + this[context].port = port; + this[context].path = path; + this[context].query = query; + this[context].fragment = fragment; + this[context].host = host; + this[searchParams] = new URLSearchParams(this); + }); + } + + get origin() { + return URL.originFor(this).toString(true); + } + + get [special]() { + return (this[context].flags & binding.URL_FLAGS_SPECIAL) != 0; + } + + get [cannotBeBase]() { + return (this[context].flags & binding.URL_FLAGS_CANNOT_BE_BASE) != 0; + } + + get protocol() { + return this[context].scheme; + } + + get searchParams() { + return this[searchParams]; + } + + set protocol(scheme) { + scheme = String(scheme); + if (scheme.length === 0) + return; + binding.parse(scheme, + binding.kSchemeStart, + null, + this[context], + (flags, protocol, username, password, + host, port, path, query, fragment) => { + if (flags & binding.URL_FLAGS_FAILED) + return; + const newIsSpecial = (flags & binding.URL_FLAGS_SPECIAL) != 0; + if ((this[special] && !newIsSpecial) || + (!this[special] && newIsSpecial) || + (newIsSpecial && !this[special] && + this[context].host === undefined)) { + return; + } + if (newIsSpecial) { + this[context].flags |= binding.URL_FLAGS_SPECIAL; + } else { + this[context].flags &= ~binding.URL_FLAGS_SPECIAL; + } + if (protocol) { + this[context].scheme = protocol; + this[context].flags |= binding.URL_FLAGS_HAS_SCHEME; + } else { + this[context].flags &= ~binding.URL_FLAGS_HAS_SCHEME; + } + }); + } + + get username() { + return this[context].username || ''; + } + + set username(username) { + username = String(username); + if (!this.hostname) + return; + if (!username) { + this[context].username = null; + this[context].flags &= ~binding.URL_FLAGS_HAS_USERNAME; + return; + } + this[context].username = binding.encodeAuth(username); + this[context].flags |= binding.URL_FLAGS_HAS_USERNAME; + } + + get password() { + return this[context].password || ''; + } + + set password(password) { + password = String(password); + if (!this.hostname) + return; + if (!password) { + this[context].password = null; + this[context].flags &= ~binding.URL_FLAGS_HAS_PASSWORD; + return; + } + this[context].password = binding.encodeAuth(password); + this[context].flags |= binding.URL_FLAGS_HAS_PASSWORD; + } + + get host() { + var ret = this[context].host || ''; + if (this[context].port !== undefined) + ret += `:${this[context].port}`; + return ret; + } + + set host(host) { + host = String(host); + if (this[cannotBeBase] || + (this[special] && host.length === 0)) { + // Cannot set the host if cannot-be-base is set or + // scheme is special and host length is zero + return; + } + if (!host) { + this[context].host = null; + this[context].flags &= ~binding.URL_FLAGS_HAS_HOST; + return; + } + binding.parse(host, binding.kHost, null, this[context], + (flags, protocol, username, password, + host, port, path, query, fragment) => { + if (flags & binding.URL_FLAGS_FAILED) + return; + if (host) { + this[context].host = host; + this[context].flags |= binding.URL_FLAGS_HAS_HOST; + } else { + this[context].flags &= ~binding.URL_FLAGS_HAS_HOST; + } + if (port !== undefined) + this[context].port = port; + }); + } + + get hostname() { + return this[context].host || ''; + } + + set hostname(host) { + host = String(host); + if (this[cannotBeBase] || + (this[special] && host.length === 0)) { + // Cannot set the host if cannot-be-base is set or + // scheme is special and host length is zero + return; + } + if (!host) { + this[context].host = null; + this[context].flags &= ~binding.URL_FLAGS_HAS_HOST; + return; + } + binding.parse(host, + binding.kHostname, + null, + this[context], + (flags, protocol, username, password, + host, port, path, query, fragment) => { + if (flags & binding.URL_FLAGS_FAILED) + return; + if (host) { + this[context].host = host; + this[context].flags |= binding.URL_FLAGS_HAS_HOST; + } else { + this[context].flags &= ~binding.URL_FLAGS_HAS_HOST; + } + }); + } + + get port() { + const port = this[context].port; + return port === undefined ? '' : String(port); + } + + set port(port) { + if (!this[context].host || this[cannotBeBase] || this.protocol === 'file:') + return; + port = String(port); + if (port === '') { + // Currently, if port number is empty, left unchanged. + // TODO(jasnell): This might be changing in the spec + return; + } + binding.parse(port, binding.kPort, null, this[context], + (flags, protocol, username, password, + host, port, path, query, fragment) => { + if (flags & binding.URL_FLAGS_FAILED) + return; + this[context].port = port; + }); + } + + get pathname() { + if (this[cannotBeBase]) + return this[context].path[0]; + return this[context].path !== undefined ? + `/${this[context].path.join('/')}` : ''; + } + + set pathname(path) { + if (this[cannotBeBase]) + return; + path = String(path); + binding.parse(path, + binding.kPathStart, + null, + this[context], + (flags, protocol, username, password, + host, port, path, query, fragment) => { + if (flags & binding.URL_FLAGS_FAILED) + return; + if (path) { + this[context].path = path; + this[context].flags |= binding.URL_FLAGS_HAS_PATH; + } else { + this[context].flags &= ~binding.URL_FLAGS_HAS_PATH; + } + }); + } + + get search() { + return !this[context].query ? '' : `?${this[context].query}`; + } + + set search(search) { + update(this, search); + this[searchParams][searchParams] = querystring.parse(this.search); + } + + get hash() { + return !this[context].fragment ? '' : `#${this[context].fragment}`; + } + + set hash(hash) { + hash = String(hash); + if (this.protocol === 'javascript:') + return; + if (!hash) { + this[context].fragment = null; + this[context].flags &= ~binding.URL_FLAGS_HAS_FRAGMENT; + return; + } + if (hash[0] === '#') hash = hash.slice(1); + this[context].fragment = ''; + binding.parse(hash, + binding.kFragment, + null, + this[context], + (flags, protocol, username, password, + host, port, path, query, fragment) => { + if (flags & binding.URL_FLAGS_FAILED) + return; + if (fragment) { + this[context].fragment = fragment; + this[context].flags |= binding.URL_FLAGS_HAS_FRAGMENT; + } else { + this[context].flags &= ~binding.URL_FLAGS_HAS_FRAGMENT; + } + }); + } + + get href() { + return this.toString(); + } + + toString(options) { + options = options || {}; + const fragment = + options.fragment !== undefined ? + !!options.fragment : true; + const unicode = !!options.unicode; + var ret; + if (this.protocol) + ret = this.protocol; + if (this[context].host !== undefined) { + ret += '//'; + const has_username = typeof this[context].username === 'string'; + const has_password = typeof this[context].password === 'string'; + if (has_username || has_password) { + if (has_username) + ret += this[context].username; + if (has_password) + ret += `:${this[context].password}`; + ret += '@'; + } + if (unicode) { + ret += punycode.toUnicode(this.hostname); + if (this.port !== undefined) + ret += `:${this.port}`; + } else { + ret += this.host; + } + } else if (this[context].scheme === 'file:') { + ret += '//'; + } + if (this.pathname) + ret += this.pathname; + if (typeof this[context].query === 'string') + ret += `?${this[context].query}`; + if (fragment & typeof this[context].fragment === 'string') + ret += `#${this[context].fragment}`; + return ret; + } + + inspect(depth, opts) { + var ret = 'URL {\n'; + ret += ` href: ${this.href}\n`; + if (this[context].scheme !== undefined) + ret += ` protocol: ${this.protocol}\n`; + if (this[context].username !== undefined) + ret += ` username: ${this.username}\n`; + if (this[context].password !== undefined) { + const pwd = opts.showHidden ? this[context].password : '--------'; + ret += ` password: ${pwd}\n`; + } + if (this[context].host !== undefined) + ret += ` hostname: ${this.hostname}\n`; + if (this[context].port !== undefined) + ret += ` port: ${this.port}\n`; + if (this[context].path !== undefined) + ret += ` pathname: ${this.pathname}\n`; + if (this[context].query !== undefined) + ret += ` search: ${this.search}\n`; + if (this[context].fragment !== undefined) + ret += ` hash: ${this.hash}\n`; + if (opts.showHidden) { + ret += ` cannot-be-base: ${this[cannotBeBase]}\n`; + ret += ` special: ${this[special]}\n;`; + } + ret += '}'; + return ret; + } +} + +var hexTable = new Array(256); +for (var i = 0; i < 256; ++i) + hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase(); +function encodeAuth(str) { + // faster encodeURIComponent alternative for encoding auth uri components + var out = ''; + var lastPos = 0; + for (var i = 0; i < str.length; ++i) { + var c = str.charCodeAt(i); + + // These characters do not need escaping: + // ! - . _ ~ + // ' ( ) * : + // digits + // alpha (uppercase) + // alpha (lowercase) + if (c === 0x21 || c === 0x2D || c === 0x2E || c === 0x5F || c === 0x7E || + (c >= 0x27 && c <= 0x2A) || + (c >= 0x30 && c <= 0x3A) || + (c >= 0x41 && c <= 0x5A) || + (c >= 0x61 && c <= 0x7A)) { + continue; + } + + if (i - lastPos > 0) + out += str.slice(lastPos, i); + + lastPos = i + 1; + + // Other ASCII characters + if (c < 0x80) { + out += hexTable[c]; + continue; + } + + // Multi-byte characters ... + if (c < 0x800) { + out += hexTable[0xC0 | (c >> 6)] + hexTable[0x80 | (c & 0x3F)]; + continue; + } + if (c < 0xD800 || c >= 0xE000) { + out += hexTable[0xE0 | (c >> 12)] + + hexTable[0x80 | ((c >> 6) & 0x3F)] + + hexTable[0x80 | (c & 0x3F)]; + continue; + } + // Surrogate pair + ++i; + var c2; + if (i < str.length) + c2 = str.charCodeAt(i) & 0x3FF; + else + c2 = 0; + c = 0x10000 + (((c & 0x3FF) << 10) | c2); + out += hexTable[0xF0 | (c >> 18)] + + hexTable[0x80 | ((c >> 12) & 0x3F)] + + hexTable[0x80 | ((c >> 6) & 0x3F)] + + hexTable[0x80 | (c & 0x3F)]; + } + if (lastPos === 0) + return str; + if (lastPos < str.length) + return out + str.slice(lastPos); + return out; +} + +function update(url, search) { + search = String(search); + if (!search) { + url[context].query = null; + url[context].flags &= ~binding.URL_FLAGS_HAS_QUERY; + return; + } + if (search[0] === '?') search = search.slice(1); + url[context].query = ''; + binding.parse(search, + binding.kQuery, + null, + url[context], + (flags, protocol, username, password, + host, port, path, query, fragment) => { + if (flags & binding.URL_FLAGS_FAILED) + return; + if (query) { + url[context].query = query; + url[context].flags |= binding.URL_FLAGS_HAS_QUERY; + } else { + url[context].flags &= ~binding.URL_FLAGS_HAS_QUERY; + } + }); +} + +class URLSearchParams { + constructor(url) { + this[context] = url; + this[searchParams] = querystring.parse(url[context].search || ''); + } + + append(name, value) { + const obj = this[searchParams]; + name = String(name); + value = String(value); + var existing = obj[name]; + if (!existing) { + obj[name] = value; + } else if (Array.isArray(existing)) { + existing.push(value); + } else { + obj[name] = [existing, value]; + } + update(this[context], querystring.stringify(obj)); + } + + delete(name) { + const obj = this[searchParams]; + name = String(name); + delete obj[name]; + update(this[context], querystring.stringify(obj)); + } + + set(name, value) { + const obj = this[searchParams]; + name = String(name); + value = String(value); + obj[name] = value; + update(this[context], querystring.stringify(obj)); + } + + get(name) { + const obj = this[searchParams]; + name = String(name); + var value = obj[name]; + return Array.isArray(value) ? value[0] : value; + } + + getAll(name) { + const obj = this[searchParams]; + name = String(name); + var value = obj[name]; + return value === undefined ? [] : Array.isArray(value) ? value : [value]; + } + + has(name) { + const obj = this[searchParams]; + name = String(name); + return name in obj; + } + + *[Symbol.iterator]() { + const obj = this[searchParams]; + for (const name in obj) { + const value = obj[name]; + if (Array.isArray(value)) { + for (const item of value) + yield [name, item]; + } else { + yield [name, value]; + } + } + } + + toString() { + return querystring.stringify(this[searchParams]); + } +} + +URL.originFor = function(url) { + if (!(url instanceof URL)) + url = new URL(url); + var origin; + const protocol = url.protocol; + switch (protocol) { + case 'blob:': + if (url[context].path && url[context].path.length > 0) { + try { + return (new URL(url[context].path[0])).origin; + } catch (err) { + // fall through... do nothing + } + } + origin = new OpaqueOrigin(); + break; + case 'ftp:': + case 'gopher:': + case 'http:': + case 'https:': + case 'ws:': + case 'wss:': + case 'file': + origin = new TupleOrigin(protocol.slice(0, -1), + url[context].host, + url[context].port, + null); + break; + default: + origin = new OpaqueOrigin(); + } + return origin; +}; + +URL.domainToASCII = function(domain) { + return binding.domainToASCII(String(domain)); +}; +URL.domainToUnicode = function(domain) { + return binding.domainToUnicode(String(domain)); +}; + +exports.URL = URL; +exports.encodeAuth = encodeAuth; diff --git a/lib/url.js b/lib/url.js index d9357268728985..201ebfedcc5cda 100644 --- a/lib/url.js +++ b/lib/url.js @@ -10,10 +10,14 @@ function importPunycode() { const { toASCII } = importPunycode(); +const internalUrl = require('internal/url'); +const encodeAuth = internalUrl.encodeAuth; exports.parse = urlParse; exports.resolve = urlResolve; exports.resolveObject = urlResolveObject; exports.format = urlFormat; +exports.URL = internalUrl.URL; + exports.Url = Url; @@ -942,69 +946,3 @@ function spliceOne(list, index) { list[i] = list[k]; list.pop(); } - -var hexTable = new Array(256); -for (var i = 0; i < 256; ++i) - hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase(); -function encodeAuth(str) { - // faster encodeURIComponent alternative for encoding auth uri components - var out = ''; - var lastPos = 0; - for (var i = 0; i < str.length; ++i) { - var c = str.charCodeAt(i); - - // These characters do not need escaping: - // ! - . _ ~ - // ' ( ) * : - // digits - // alpha (uppercase) - // alpha (lowercase) - if (c === 0x21 || c === 0x2D || c === 0x2E || c === 0x5F || c === 0x7E || - (c >= 0x27 && c <= 0x2A) || - (c >= 0x30 && c <= 0x3A) || - (c >= 0x41 && c <= 0x5A) || - (c >= 0x61 && c <= 0x7A)) { - continue; - } - - if (i - lastPos > 0) - out += str.slice(lastPos, i); - - lastPos = i + 1; - - // Other ASCII characters - if (c < 0x80) { - out += hexTable[c]; - continue; - } - - // Multi-byte characters ... - if (c < 0x800) { - out += hexTable[0xC0 | (c >> 6)] + hexTable[0x80 | (c & 0x3F)]; - continue; - } - if (c < 0xD800 || c >= 0xE000) { - out += hexTable[0xE0 | (c >> 12)] + - hexTable[0x80 | ((c >> 6) & 0x3F)] + - hexTable[0x80 | (c & 0x3F)]; - continue; - } - // Surrogate pair - ++i; - var c2; - if (i < str.length) - c2 = str.charCodeAt(i) & 0x3FF; - else - c2 = 0; - c = 0x10000 + (((c & 0x3FF) << 10) | c2); - out += hexTable[0xF0 | (c >> 18)] + - hexTable[0x80 | ((c >> 12) & 0x3F)] + - hexTable[0x80 | ((c >> 6) & 0x3F)] + - hexTable[0x80 | (c & 0x3F)]; - } - if (lastPos === 0) - return str; - if (lastPos < str.length) - return out + str.slice(lastPos); - return out; -} diff --git a/node.gyp b/node.gyp index a998d8292613b1..fa98547e09b198 100644 --- a/node.gyp +++ b/node.gyp @@ -89,6 +89,7 @@ 'lib/internal/readline.js', 'lib/internal/repl.js', 'lib/internal/socket_list.js', + 'lib/internal/url.js', 'lib/internal/util.js', 'lib/internal/v8_prof_polyfill.js', 'lib/internal/v8_prof_processor.js', @@ -158,6 +159,7 @@ 'src/node_main.cc', 'src/node_os.cc', 'src/node_revert.cc', + 'src/node_url.cc', 'src/node_util.cc', 'src/node_v8.cc', 'src/node_stat_watcher.cc', diff --git a/src/node_i18n.cc b/src/node_i18n.cc index 0f3b9b76e6959e..f89ae40a558b93 100644 --- a/src/node_i18n.cc +++ b/src/node_i18n.cc @@ -79,9 +79,9 @@ bool InitializeICUDirectory(const char* icu_data_path) { } } -static int32_t ToUnicode(MaybeStackBuffer* buf, - const char* input, - size_t length) { +int32_t ToUnicode(MaybeStackBuffer* buf, + const char* input, + size_t length) { UErrorCode status = U_ZERO_ERROR; uint32_t options = UIDNA_DEFAULT; options |= UIDNA_NONTRANSITIONAL_TO_UNICODE; @@ -113,9 +113,9 @@ static int32_t ToUnicode(MaybeStackBuffer* buf, return len; } -static int32_t ToASCII(MaybeStackBuffer* buf, - const char* input, - size_t length) { +int32_t ToASCII(MaybeStackBuffer* buf, + const char* input, + size_t length) { UErrorCode status = U_ZERO_ERROR; uint32_t options = UIDNA_DEFAULT; options |= UIDNA_NONTRANSITIONAL_TO_ASCII; diff --git a/src/node_i18n.h b/src/node_i18n.h index 31ad18fa47e960..21a579526ddc1a 100644 --- a/src/node_i18n.h +++ b/src/node_i18n.h @@ -15,6 +15,13 @@ namespace i18n { bool InitializeICUDirectory(const char* icu_data_path); +int32_t ToASCII(MaybeStackBuffer* buf, + const char* input, + size_t length); +int32_t ToUnicode(MaybeStackBuffer* buf, + const char* input, + size_t length); + } // namespace i18n } // namespace node diff --git a/src/node_url.cc b/src/node_url.cc new file mode 100644 index 00000000000000..f5b1a143f1bad5 --- /dev/null +++ b/src/node_url.cc @@ -0,0 +1,1406 @@ +#include "node_url.h" +#include "node.h" +#include "node_internals.h" +#include "env.h" +#include "env-inl.h" +#include "util.h" +#include "util-inl.h" +#include "v8.h" +#include "base-object.h" +#include "base-object-inl.h" +#include "node_i18n.h" + +#include +#include +#include +#include + +#if defined(NODE_HAVE_I18N_SUPPORT) +#include +#include +#endif + +namespace node { + +using v8::Array; +using v8::Context; +using v8::Function; +using v8::FunctionCallbackInfo; +using v8::HandleScope; +using v8::Integer; +using v8::Isolate; +using v8::Local; +using v8::Null; +using v8::Object; +using v8::String; +using v8::Undefined; +using v8::Value; + +#define GET(env, obj, name) \ + obj->Get(env->context(), \ + OneByteString(env->isolate(), name)).ToLocalChecked() + +#define GET_AND_SET(env, obj, name, data, flag) \ + { \ + Local val = GET(env, obj, #name); \ + if (val->IsString()) { \ + Utf8Value value(env->isolate(), val.As()); \ + data->name = *value; \ + data->flags |= flag; \ + } \ + } + +#define CANNOT_BE_BASE() url.flags |= URL_FLAGS_CANNOT_BE_BASE; +#define INVALID_PARSE_STATE() url.flags |= URL_FLAGS_INVALID_PARSE_STATE; +#define SPECIAL() \ + { \ + url.flags |= URL_FLAGS_SPECIAL; \ + special = true; \ + } +#define TERMINATE() \ + { \ + url.flags |= URL_FLAGS_TERMINATED; \ + goto done; \ + } +#define FAILED() \ + { \ + url.flags |= URL_FLAGS_FAILED; \ + goto done; \ + } + +#define CHECK_FLAG(flags, name) (flags & URL_FLAGS_##name) /* NOLINT */ + +#define IS_CANNOT_BE_BASE(flags) CHECK_FLAG(flags, CANNOT_BE_BASE) +#define IS_FAILED(flags) CHECK_FLAG(flags, FAILED) + +#define DOES_HAVE_SCHEME(url) CHECK_FLAG(url.flags, HAS_SCHEME) +#define DOES_HAVE_USERNAME(url) CHECK_FLAG(url.flags, HAS_USERNAME) +#define DOES_HAVE_PASSWORD(url) CHECK_FLAG(url.flags, HAS_PASSWORD) +#define DOES_HAVE_HOST(url) CHECK_FLAG(url.flags, HAS_HOST) +#define DOES_HAVE_PATH(url) CHECK_FLAG(url.flags, HAS_PATH) +#define DOES_HAVE_QUERY(url) CHECK_FLAG(url.flags, HAS_QUERY) +#define DOES_HAVE_FRAGMENT(url) CHECK_FLAG(url.flags, HAS_FRAGMENT) + +#define SET_HAVE_SCHEME() url.flags |= URL_FLAGS_HAS_SCHEME; +#define SET_HAVE_USERNAME() url.flags |= URL_FLAGS_HAS_USERNAME; +#define SET_HAVE_PASSWORD() url.flags |= URL_FLAGS_HAS_PASSWORD; +#define SET_HAVE_HOST() url.flags |= URL_FLAGS_HAS_HOST; +#define SET_HAVE_PATH() url.flags |= URL_FLAGS_HAS_PATH; +#define SET_HAVE_QUERY() url.flags |= URL_FLAGS_HAS_QUERY; +#define SET_HAVE_FRAGMENT() url.flags |= URL_FLAGS_HAS_FRAGMENT; + +#define UTF8STRING(isolate, str) \ + String::NewFromUtf8(isolate, str.c_str(), v8::NewStringType::kNormal) \ + .ToLocalChecked() + +namespace url { + +#if defined(NODE_HAVE_I18N_SUPPORT) + static int ToUnicode(std::string* input, std::string* output) { + MaybeStackBuffer buf; + if (i18n::ToUnicode(&buf, input->c_str(), input->length()) < 0) + return -1; + output->assign(*buf, buf.length()); + return 0; + } + + static int ToASCII(std::string* input, std::string* output) { + MaybeStackBuffer buf; + if (i18n::ToASCII(&buf, input->c_str(), input->length()) < 0) + return -1; + output->assign(*buf, buf.length()); + return 0; + } + + // Unfortunately there's not really a better way to do this. + // Iterate through each encoded codepoint and verify that + // it is a valid unicode codepoint. + static int IsValidUTF8(std::string* input) { + const char* p = input->c_str(); + int32_t len = input->length(); + for (int32_t i = 0; i < len;) { + UChar32 c; + U8_NEXT_UNSAFE(p, i, c); + if (!U_IS_UNICODE_CHAR(c)) + return -1; + } + return 0; + } +#else + // Intentional non-ops if ICU is not present. + static int ToUnicode(std::string* input, std::string* output) { + output->reserve(input.length()); + *output = input->c_str(); + } + + static int ToASCII(std::string* input, std::string* output) { + output->reserve(input.length()); + *output = input->c_str(); + } + + static int IsValidUTF8(std::string* input) { + return 0; + } +#endif + + static url_host_type ParseIPv6Host(url_host* host, + const char* input, + size_t length) { + url_host_type type = HOST_TYPE_FAILED; + for (unsigned n = 0; n < 8; n++) + host->value.ipv6[n] = 0; + uint16_t* piece_pointer = &host->value.ipv6[0]; + uint16_t* last_piece = piece_pointer + 8; + uint16_t* compress_pointer = nullptr; + const char* pointer = input; + const char* end = pointer + length; + unsigned value, len, swaps, dots; + char ch = pointer < end ? pointer[0] : kEOL; + if (ch == ':') { + if (length < 2 || pointer[1] != ':') + goto end; + pointer += 2; + ch = pointer < end ? pointer[0] : kEOL; + piece_pointer++; + compress_pointer = piece_pointer; + } + while (ch != kEOL) { + if (piece_pointer > last_piece) + goto end; + if (ch == ':') { + if (compress_pointer != nullptr) + goto end; + pointer++; + ch = pointer < end ? pointer[0] : kEOL; + piece_pointer++; + compress_pointer = piece_pointer; + continue; + } + value = 0; + len = 0; + while (len < 4 && ASCII_HEX_DIGIT(ch)) { + value = value * 0x10 + hex2bin(ch); + pointer++; + ch = pointer < end ? pointer[0] : kEOL; + len++; + } + switch (ch) { + case '.': + if (len == 0) + goto end; + pointer -= len; + ch = pointer < end ? pointer[0] : kEOL; + if (piece_pointer > last_piece - 2) + goto end; + dots = 0; + while (ch != kEOL) { + value = 0xffffffff; + if (!ASCII_DIGIT(ch)) + goto end; + while (ASCII_DIGIT(ch)) { + unsigned number = ch - '0'; + if (value == 0xffffffff) { + value = number; + } else if (value == 0) { + goto end; + } else { + value = value * 10 + number; + } + if (value > 255) + goto end; + pointer++; + ch = pointer < end ? pointer[0] : kEOL; + } + if (dots < 3 && ch != '.') + goto end; + *piece_pointer = *piece_pointer * 0x100 + value; + if (dots & 0x1) + piece_pointer++; + if (ch != kEOL) { + pointer++; + ch = pointer < end ? pointer[0] : kEOL; + } + if (dots == 3 && ch != kEOL) + goto end; + dots++; + } + continue; + case ':': + pointer++; + ch = pointer < end ? pointer[0] : kEOL; + if (ch == kEOL) + goto end; + break; + case kEOL: + break; + default: + goto end; + } + *piece_pointer = value; + piece_pointer++; + } + + if (compress_pointer != nullptr) { + swaps = piece_pointer - compress_pointer; + piece_pointer = last_piece - 1; + while (piece_pointer != &host->value.ipv6[0] && swaps > 0) { + uint16_t temp = *piece_pointer; + uint16_t* swap_piece = compress_pointer + swaps - 1; + *piece_pointer = *swap_piece; + *swap_piece = temp; + piece_pointer--; + swaps--; + } + } else if (compress_pointer == nullptr && + piece_pointer != last_piece) { + goto end; + } + type = HOST_TYPE_IPV6; + end: + host->type = type; + return type; + } + + static inline int ParseNumber(const char* start, const char* end) { + unsigned R = 10; + if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') { + start += 2; + R = 16; + } + if (end - start == 0) { + return 0; + } else if (R == 10 && end - start > 1 && start[0] == '0') { + start++; + R = 8; + } + const char* p = start; + + while (p < end) { + const char ch = p[0]; + switch (R) { + case 8: + if (ch < '0' || ch > '7') + return -1; + break; + case 10: + if (!ASCII_DIGIT(ch)) + return -1; + break; + case 16: + if (!ASCII_HEX_DIGIT(ch)) + return -1; + break; + } + p++; + } + return strtol(start, NULL, R); + } + + static url_host_type ParseIPv4Host(url_host* host, + const char* input, + size_t length) { + url_host_type type = HOST_TYPE_DOMAIN; + const char* pointer = input; + const char* mark = input; + const char* end = pointer + length; + int parts = 0; + uint32_t val = 0; + unsigned numbers[4]; + if (length == 0) + goto end; + + while (pointer <= end) { + const char ch = pointer < end ? pointer[0] : kEOL; + const int remaining = end - pointer - 1; + if (ch == '.' || ch == kEOL) { + if (++parts > 4 || pointer - mark == 0) + break; + int n = ParseNumber(mark, pointer); + if (n < 0) { + type = HOST_TYPE_DOMAIN; + goto end; + } + if (pointer - mark == 10) { + numbers[parts - 1] = n; + break; + } + if (n > 255) { + type = HOST_TYPE_FAILED; + goto end; + } + numbers[parts - 1] = n; + mark = pointer + 1; + if (ch == '.' && remaining == 0) + break; + } + pointer++; + } + + type = HOST_TYPE_IPV4; + if (parts > 0) { + val = numbers[parts - 1]; + for (int n = 0; n < parts - 1; n++) { + double b = 3-n; + val += numbers[n] * pow(256, b); + } + } + + host->value.ipv4 = val; + end: + host->type = type; + return type; + } + + static url_host_type ParseHost(url_host* host, + const char* input, + size_t length, + bool unicode = false) { + url_host_type type = HOST_TYPE_FAILED; + const char* pointer = input; + std::string decoded; + + if (length == 0) + goto end; + + if (pointer[0] == '[') { + if (pointer[length - 1] != ']') + goto end; + return ParseIPv6Host(host, ++pointer, length - 2); + } + + // First, we have to percent decode + if (PercentDecode(input, length, &decoded) < 0) + goto end; + + // If there are any invalid UTF8 byte sequences, we have to fail. + // Unfortunately this means iterating through the string and checking + // each decoded codepoint. + if (IsValidUTF8(&decoded) < 0) + goto end; + + // Then we have to punycode toASCII + if (ToASCII(&decoded, &decoded) < 0) + goto end; + + // If any of the following characters are still present, we have to fail + for (size_t n = 0; n < decoded.size(); n++) { + const char ch = decoded[n]; + if (ch == 0x00 || ch == 0x09 || ch == 0x0a || ch == 0x0d || + ch == 0x20 || ch == '#' || ch == '%' || ch == '/' || + ch == '?' || ch == '@' || ch == '[' || ch == '\\' || + ch == ']') { + goto end; + } + } + + // Check to see if it's an IPv4 IP address + type = ParseIPv4Host(host, decoded.c_str(), decoded.length()); + if (type == HOST_TYPE_IPV4 || type == HOST_TYPE_FAILED) + goto end; + + // If the unicode flag is set, run the result through punycode ToUnicode + if (unicode && ToUnicode(&decoded, &decoded) < 0) + goto end; + + // It's not an IPv4 or IPv6 address, it must be a domain + type = HOST_TYPE_DOMAIN; + host->value.domain = decoded; + + end: + host->type = type; + return type; + } + + // Locates the longest sequence of 0 segments in an IPv6 address + // in order to use the :: compression when serializing + static inline uint16_t* FindLongestZeroSequence(uint16_t* values, + size_t len) { + uint16_t* start = values; + uint16_t* end = start + len; + uint16_t* result = nullptr; + + uint16_t* current = nullptr; + unsigned counter = 0, longest = 1; + + while (start < end) { + if (*start == 0) { + if (current == nullptr) + current = start; + counter++; + } else { + if (counter > longest) { + longest = counter; + result = current; + } + counter = 0; + current = nullptr; + } + start++; + } + if (counter > longest) + result = current; + return result; + } + + static url_host_type WriteHost(url_host* host, std::string* dest) { + dest->clear(); + switch (host->type) { + case HOST_TYPE_DOMAIN: + *dest = host->value.domain; + break; + case HOST_TYPE_IPV4: { + dest->reserve(15); + uint32_t value = host->value.ipv4; + for (int n = 0; n < 4; n++) { + char buf[4]; + char* buffer = buf; + snprintf(buffer, sizeof(buf), "%d", value % 256); + dest->insert(0, buf); + if (n < 3) + dest->insert(0, 1, '.'); + value /= 256; + } + break; + } + case HOST_TYPE_IPV6: { + dest->reserve(41); + *dest+= '['; + uint16_t* start = &host->value.ipv6[0]; + uint16_t* compress_pointer = + FindLongestZeroSequence(start, 8); + for (int n = 0; n <= 7; n++) { + uint16_t* piece = &host->value.ipv6[n]; + if (compress_pointer == piece) { + *dest += n == 0 ? "::" : ":"; + while (*piece == 0 && n < 8) { + n++; + piece = &host->value.ipv6[n]; + } + if (n == 8) + break; + } + char buf[5]; + char* buffer = buf; + snprintf(buffer, sizeof(buf), "%x", *piece); + *dest += buf; + if (n < 7) + *dest += ':'; + } + *dest += ']'; + break; + } + case HOST_TYPE_FAILED: + break; + } + return host->type; + } + + static int ParseHost(std::string* input, + std::string* output, + bool unicode = false) { + if (input->length() == 0) + return 0; + url_host host{{""}, HOST_TYPE_DOMAIN}; + ParseHost(&host, input->c_str(), input->length(), unicode); + if (host.type == HOST_TYPE_FAILED) + return -1; + WriteHost(&host, output); + return 0; + } + + static inline void Copy(Isolate* isolate, + Local ary, + std::vector* vec) { + const int32_t len = ary->Length(); + if (len == 0) + return; // nothing to copy + vec->reserve(len); + for (int32_t n = 0; n < len; n++) { + Local val = ary->Get(n); + if (val->IsString()) { + Utf8Value value(isolate, val.As()); + vec->push_back(std::string(*value, value.length())); + } + } + } + + static inline Local Copy(Isolate* isolate, + std::vector vec) { + Local ary = Array::New(isolate, vec.size()); + for (size_t n = 0; n < vec.size(); n++) + ary->Set(n, UTF8STRING(isolate, vec[n])); + return ary; + } + + static inline void HarvestBase(Environment* env, + struct url_data* base, + Local base_obj) { + Local flags = GET(env, base_obj, "flags"); + if (flags->IsInt32()) + base->flags = flags->Int32Value(); + + GET_AND_SET(env, base_obj, scheme, base, URL_FLAGS_HAS_SCHEME); + GET_AND_SET(env, base_obj, username, base, URL_FLAGS_HAS_USERNAME); + GET_AND_SET(env, base_obj, password, base, URL_FLAGS_HAS_PASSWORD); + GET_AND_SET(env, base_obj, host, base, URL_FLAGS_HAS_HOST); + GET_AND_SET(env, base_obj, query, base, URL_FLAGS_HAS_QUERY); + GET_AND_SET(env, base_obj, fragment, base, URL_FLAGS_HAS_FRAGMENT); + Local port = GET(env, base_obj, "port"); + if (port->IsInt32()) + base->port = port->Int32Value(); + Local path = GET(env, base_obj, "path"); + if (path->IsArray()) { + base->flags |= URL_FLAGS_HAS_PATH; + Copy(env->isolate(), path.As(), &(base->path)); + } + } + + static inline void HarvestContext(Environment* env, + struct url_data* context, + Local context_obj) { + Local flags = GET(env, context_obj, "flags"); + if (flags->IsInt32()) { + int32_t _flags = flags->Int32Value(); + if (_flags & URL_FLAGS_SPECIAL) + context->flags |= URL_FLAGS_SPECIAL; + if (_flags & URL_FLAGS_CANNOT_BE_BASE) + context->flags |= URL_FLAGS_CANNOT_BE_BASE; + } + Local scheme = GET(env, context_obj, "scheme"); + if (scheme->IsString()) { + Utf8Value value(env->isolate(), scheme); + context->scheme.assign(*value, value.length()); + } + Local port = GET(env, context_obj, "port"); + if (port->IsInt32()) + context->port = port->Int32Value(); + } + + // Single dot segment can be ".", "%2e", or "%2E" + static inline bool IsSingleDotSegment(std::string str) { + switch (str.size()) { + case 1: + return str == "."; + case 3: + return str[0] == '%' && + str[1] == '2' && + TO_LOWER(str[2]) == 'e'; + default: + return false; + } + } + + // Double dot segment can be: + // "..", ".%2e", ".%2E", "%2e.", "%2E.", + // "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e" + static inline bool IsDoubleDotSegment(std::string str) { + switch (str.size()) { + case 2: + return str == ".."; + case 4: + if (str[0] != '.' && str[0] != '%') + return false; + return ((str[0] == '.' && + str[1] == '%' && + str[2] == '2' && + TO_LOWER(str[3]) == 'e') || + (str[0] == '%' && + str[1] == '2' && + TO_LOWER(str[2]) == 'e' && + str[3] == '.')); + case 6: + return (str[0] == '%' && + str[1] == '2' && + TO_LOWER(str[2]) == 'e' && + str[3] == '%' && + str[4] == '2' && + TO_LOWER(str[5]) == 'e'); + default: + return false; + } + } + + static void Parse(Environment* env, + Local recv, + const char* input, + const size_t len, + enum url_parse_state override, + Local base_obj, + Local context_obj, + Local cb) { + Isolate* isolate = env->isolate(); + Local context = env->context(); + HandleScope handle_scope(isolate); + Context::Scope context_scope(context); + + const bool has_base = base_obj->IsObject(); + bool atflag = false; + bool sbflag = false; + bool uflag = false; + bool base_is_file = false; + int wskip = 0; + + struct url_data base; + struct url_data url; + if (context_obj->IsObject()) + HarvestContext(env, &url, context_obj); + if (has_base) + HarvestBase(env, &base, base_obj); + + std::string buffer; + url.scheme.reserve(len); + url.username.reserve(len); + url.password.reserve(len); + url.host.reserve(len); + url.path.reserve(len); + url.query.reserve(len); + url.fragment.reserve(len); + buffer.reserve(len); + + // Set the initial parse state. + const bool state_override = override != kUnknownState; + enum url_parse_state state = state_override ? override : kSchemeStart; + + const char* p = input; + const char* end = input + len; + + if (state < kSchemeStart || state > kFragment) { + INVALID_PARSE_STATE(); + goto done; + } + + while (p <= end) { + const char ch = p < end ? p[0] : kEOL; + + if (TAB_AND_NEWLINE(ch)) { + if (state == kAuthority) { + // It's necessary to keep track of how much whitespace + // is being ignored when in kAuthority state because of + // how the buffer is managed. TODO: See if there's a better + // way + wskip++; + } + p++; + continue; + } + + bool special = url.flags & URL_FLAGS_SPECIAL; + const bool special_back_slash = (special && ch == '\\'); + switch (state) { + case kSchemeStart: + if (ASCII_ALPHA(ch)) { + buffer += TO_LOWER(ch); + state = kScheme; + } else if (!state_override) { + state = kNoScheme; + continue; + } else { + TERMINATE() + } + break; + case kScheme: + if (SCHEME_CHAR(ch)) { + buffer += TO_LOWER(ch); + p++; + continue; + } else if (ch == ':' || (state_override && ch == kEOL)) { + buffer += ':'; + if (buffer.size() > 0) { + SET_HAVE_SCHEME() + url.scheme = buffer; + } + if (IsSpecial(url.scheme)) { + SPECIAL() + } else { + url.flags &= ~URL_FLAGS_SPECIAL; + } + if (state_override) + goto done; + buffer.clear(); + if (url.scheme == "file:") { + state = kFile; + } else if (special && + has_base && + DOES_HAVE_SCHEME(base) && + url.scheme == base.scheme) { + state = kSpecialRelativeOrAuthority; + } else if (special) { + state = kSpecialAuthoritySlashes; + } else if (p[1] == '/') { + state = kPathOrAuthority; + p++; + } else { + CANNOT_BE_BASE() + SET_HAVE_PATH() + url.path.push_back(""); + state = kCannotBeBase; + } + } else if (!state_override) { + buffer.clear(); + state = kNoScheme; + p = input; + continue; + } else { + TERMINATE() + } + break; + case kNoScheme: + if (!has_base || (IS_CANNOT_BE_BASE(base.flags) && ch != '#')) { + FAILED() + } else if (IS_CANNOT_BE_BASE(base.flags) && ch == '#') { + SET_HAVE_SCHEME() + url.scheme = base.scheme; + if (IsSpecial(url.scheme)) { + SPECIAL() + } else { + url.flags &= ~URL_FLAGS_SPECIAL; + } + if (DOES_HAVE_PATH(base)) { + SET_HAVE_PATH() + url.path = base.path; + } + if (DOES_HAVE_QUERY(base)) { + SET_HAVE_QUERY() + url.query = base.query; + } + if (DOES_HAVE_FRAGMENT(base)) { + SET_HAVE_FRAGMENT() + url.fragment = base.fragment; + } + CANNOT_BE_BASE() + state = kFragment; + } else if (has_base && + DOES_HAVE_SCHEME(base) && + base.scheme != "file:") { + state = kRelative; + continue; + } else { + SET_HAVE_SCHEME() + url.scheme = "file:"; + SPECIAL() + state = kFile; + continue; + } + break; + case kSpecialRelativeOrAuthority: + if (ch == '/' && p[1] == '/') { + state = kSpecialAuthorityIgnoreSlashes; + p++; + } else { + state = kRelative; + continue; + } + break; + case kPathOrAuthority: + if (ch == '/') { + state = kAuthority; + } else { + state = kPath; + continue; + } + break; + case kRelative: + SET_HAVE_SCHEME() + url.scheme = base.scheme; + if (IsSpecial(url.scheme)) { + SPECIAL() + } else { + url.flags &= ~URL_FLAGS_SPECIAL; + } + switch (ch) { + case kEOL: + if (DOES_HAVE_USERNAME(base)) { + SET_HAVE_USERNAME() + url.username = base.username; + } + if (DOES_HAVE_PASSWORD(base)) { + SET_HAVE_PASSWORD() + url.password = base.password; + } + if (DOES_HAVE_HOST(base)) { + SET_HAVE_HOST() + url.host = base.host; + } + if (DOES_HAVE_QUERY(base)) { + SET_HAVE_QUERY() + url.query = base.query; + } + if (DOES_HAVE_PATH(base)) { + SET_HAVE_PATH() + url.path = base.path; + } + url.port = base.port; + break; + case '/': + state = kRelativeSlash; + break; + case '?': + if (DOES_HAVE_USERNAME(base)) { + SET_HAVE_USERNAME() + url.username = base.username; + } + if (DOES_HAVE_PASSWORD(base)) { + SET_HAVE_PASSWORD() + url.password = base.password; + } + if (DOES_HAVE_HOST(base)) { + SET_HAVE_HOST() + url.host = base.host; + } + if (DOES_HAVE_PATH(base)) { + SET_HAVE_PATH() + url.path = base.path; + } + url.port = base.port; + state = kQuery; + break; + case '#': + if (DOES_HAVE_USERNAME(base)) { + SET_HAVE_USERNAME() + url.username = base.username; + } + if (DOES_HAVE_PASSWORD(base)) { + SET_HAVE_PASSWORD() + url.password = base.password; + } + if (DOES_HAVE_HOST(base)) { + SET_HAVE_HOST() + url.host = base.host; + } + if (DOES_HAVE_QUERY(base)) { + SET_HAVE_QUERY() + url.query = base.query; + } + if (DOES_HAVE_PATH(base)) { + SET_HAVE_PATH() + url.path = base.path; + } + url.port = base.port; + state = kFragment; + break; + default: + if (special_back_slash) { + state = kRelativeSlash; + } else { + if (DOES_HAVE_USERNAME(base)) { + SET_HAVE_USERNAME() + url.username = base.username; + } + if (DOES_HAVE_PASSWORD(base)) { + SET_HAVE_PASSWORD() + url.password = base.password; + } + if (DOES_HAVE_HOST(base)) { + SET_HAVE_HOST() + url.host = base.host; + } + if (DOES_HAVE_PATH(base)) { + SET_HAVE_PATH() + url.path = base.path; + if (!url.path.empty()) + url.path.pop_back(); + } + url.port = base.port; + state = kPath; + continue; + } + } + break; + case kRelativeSlash: + if (ch == '/' || special_back_slash) { + state = kSpecialAuthorityIgnoreSlashes; + } else { + if (DOES_HAVE_USERNAME(base)) { + SET_HAVE_USERNAME() + url.username = base.username; + } + if (DOES_HAVE_PASSWORD(base)) { + SET_HAVE_PASSWORD() + url.password = base.password; + } + if (DOES_HAVE_HOST(base)) { + SET_HAVE_HOST() + url.host = base.host; + } + url.port = base.port; + state = kPath; + continue; + } + break; + case kSpecialAuthoritySlashes: + state = kSpecialAuthorityIgnoreSlashes; + if (ch == '/' && p[1] == '/') { + p++; + } else { + continue; + } + break; + case kSpecialAuthorityIgnoreSlashes: + if (ch != '/' && ch != '\\') { + state = kAuthority; + continue; + } + break; + case kAuthority: + if (ch == '@') { + if (atflag) { + buffer.reserve(buffer.size() + 3); + buffer.insert(0, "%40"); + } + atflag = true; + const size_t blen = buffer.size(); + if (blen > 0 && buffer[0] != ':') { + SET_HAVE_USERNAME() + } + for (size_t n = 0; n < blen; n++) { + const char bch = buffer[n]; + if (bch == ':') { + SET_HAVE_PASSWORD() + if (!uflag) { + uflag = true; + continue; + } + } + if (uflag) { + AppendOrEscape(&url.password, bch, UserinfoEncodeSet); + } else { + AppendOrEscape(&url.username, bch, UserinfoEncodeSet); + } + } + buffer.clear(); + } else if (ch == kEOL || + ch == '/' || + ch == '?' || + ch == '#' || + special_back_slash) { + p -= buffer.size() + 1 + wskip; + buffer.clear(); + state = kHost; + } else { + buffer += ch; + } + break; + case kHost: + case kHostname: + if (ch == ':' && !sbflag) { + if (special && buffer.size() == 0) + FAILED() + SET_HAVE_HOST() + if (ParseHost(&buffer, &url.host) < 0) + FAILED() + buffer.clear(); + state = kPort; + if (override == kHostname) + TERMINATE() + } else if (ch == kEOL || + ch == '/' || + ch == '?' || + ch == '#' || + special_back_slash) { + p--; + if (special && buffer.size() == 0) + FAILED() + SET_HAVE_HOST() + if (ParseHost(&buffer, &url.host) < 0) + FAILED() + buffer.clear(); + state = kPathStart; + if (state_override) + TERMINATE() + } else { + if (ch == '[') + sbflag = true; + if (ch == ']') + sbflag = false; + buffer += TO_LOWER(ch); + } + break; + case kPort: + if (ASCII_DIGIT(ch)) { + buffer += ch; + } else if (state_override || + ch == kEOL || + ch == '/' || + ch == '?' || + ch == '#' || + special_back_slash) { + if (buffer.size() > 0) { + int port = 0; + for (size_t i = 0; i < buffer.size(); i++) + port = port * 10 + buffer[i] - '0'; + if (port >= 0 && port <= 0xffff) { + url.port = NormalizePort(url.scheme, port); + } else if (!state_override) { + FAILED() + } + buffer.clear(); + } + state = kPathStart; + continue; + } else { + FAILED(); + } + break; + case kFile: + base_is_file = ( + has_base && + DOES_HAVE_SCHEME(base) && + base.scheme == "file:"); + switch (ch) { + case kEOL: + if (base_is_file) { + if (DOES_HAVE_HOST(base)) { + SET_HAVE_HOST() + url.host = base.host; + } + if (DOES_HAVE_PATH(base)) { + SET_HAVE_PATH() + url.path = base.path; + } + if (DOES_HAVE_QUERY(base)) { + SET_HAVE_QUERY() + url.query = base.query; + } + } + break; + case '\\': + case '/': + state = kFileSlash; + break; + case '?': + if (base_is_file) { + if (DOES_HAVE_HOST(base)) { + SET_HAVE_HOST() + url.host = base.host; + } + if (DOES_HAVE_PATH(base)) { + SET_HAVE_PATH() + url.path = base.path; + } + SET_HAVE_QUERY() + state = kQuery; + } + break; + case '#': + if (base_is_file) { + if (DOES_HAVE_HOST(base)) { + SET_HAVE_HOST() + url.host = base.host; + } + if (DOES_HAVE_PATH(base)) { + SET_HAVE_PATH() + url.path = base.path; + } + if (DOES_HAVE_QUERY(base)) { + SET_HAVE_QUERY() + url.query = base.query; + } + state = kFragment; + } + break; + default: + if (base_is_file && + (!WINDOWS_DRIVE_LETTER(ch, p[1]) || + end - p == 1 || + (p[2] != '/' && + p[2] != '\\' && + p[2] != '?' && + p[2] != '#'))) { + if (DOES_HAVE_HOST(base)) { + SET_HAVE_HOST() + url.host = base.host; + } + if (DOES_HAVE_PATH(base)) { + SET_HAVE_PATH() + url.path = base.path; + } + if (!url.path.empty()) + url.path.pop_back(); + } + state = kPath; + continue; + } + break; + case kFileSlash: + if (ch == '/' || ch == '\\') { + state = kFileHost; + } else { + if (has_base && + DOES_HAVE_SCHEME(base) && + base.scheme == "file:" && + DOES_HAVE_PATH(base) && + base.path.size() > 0 && + NORMALIZED_WINDOWS_DRIVE_LETTER(base.path[0])) { + SET_HAVE_PATH() + url.path.push_back(base.path[0]); + } + state = kPath; + continue; + } + break; + case kFileHost: + if (ch == kEOL || + ch == '/' || + ch == '\\' || + ch == '?' || + ch == '#') { + if (buffer.size() == 2 && + WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) { + state = kPath; + } else if (buffer.size() == 0) { + state = kPathStart; + } else { + if (buffer != "localhost") { + SET_HAVE_HOST() + if (ParseHost(&buffer, &url.host) < 0) + FAILED() + } + buffer.clear(); + state = kPathStart; + } + continue; + } else { + buffer += ch; + } + break; + case kPathStart: + state = kPath; + if (ch != '/' && !special_back_slash) + continue; + break; + case kPath: + if (ch == kEOL || + ch == '/' || + special_back_slash || + (!state_override && (ch == '?' || ch == '#'))) { + if (IsDoubleDotSegment(buffer)) { + if (!url.path.empty()) + url.path.pop_back(); + if (ch != '/' && !special_back_slash) { + SET_HAVE_PATH() + url.path.push_back(""); + } + } else if (IsSingleDotSegment(buffer)) { + if (ch != '/' && !special_back_slash) { + SET_HAVE_PATH(); + url.path.push_back(""); + } + } else { + if (DOES_HAVE_SCHEME(url) && + url.scheme == "file:" && + url.path.empty() && + buffer.size() == 2 && + WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) { + url.flags &= ~URL_FLAGS_HAS_HOST; + buffer[1] = ':'; + } + SET_HAVE_PATH() + std::string segment(buffer.c_str(), buffer.size()); + url.path.push_back(segment); + } + buffer.clear(); + if (ch == '?') { + SET_HAVE_QUERY() + state = kQuery; + } else if (ch == '#') { + state = kFragment; + } + } else { + if (ch == '%' && p[1] == '2' && TO_LOWER(p[2]) == 'e') { + buffer += '.'; + p += 2; + } else { + AppendOrEscape(&buffer, ch, DefaultEncodeSet); + } + } + break; + case kCannotBeBase: + switch (ch) { + case '?': + state = kQuery; + break; + case '#': + state = kFragment; + break; + default: + if (url.path.size() == 0) + url.path.push_back(""); + if (url.path.size() > 0 && ch != kEOL) + AppendOrEscape(&url.path[0], ch, SimpleEncodeSet); + } + break; + case kQuery: + if (ch == kEOL || (!state_override && ch == '#')) { + SET_HAVE_QUERY() + url.query = buffer; + buffer.clear(); + if (ch == '#') + state = kFragment; + } else { + AppendOrEscape(&buffer, ch, QueryEncodeSet); + } + break; + case kFragment: + switch (ch) { + case kEOL: + SET_HAVE_FRAGMENT() + url.fragment = buffer; + break; + case 0: + break; + default: + buffer += ch; + } + break; + default: + INVALID_PARSE_STATE() + goto done; + } + + p++; + } + + done: + + // Define the return value placeholders + const Local undef = Undefined(isolate); + Local argv[9] = { + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + }; + + argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags); + if (!IS_FAILED(url.flags)) { + if (DOES_HAVE_SCHEME(url)) + argv[ARG_PROTOCOL] = OneByteString(isolate, url.scheme.c_str()); + if (DOES_HAVE_USERNAME(url)) + argv[ARG_USERNAME] = UTF8STRING(isolate, url.username); + if (DOES_HAVE_PASSWORD(url)) + argv[ARG_PASSWORD] = UTF8STRING(isolate, url.password); + if (DOES_HAVE_HOST(url)) + argv[ARG_HOST] = UTF8STRING(isolate, url.host); + if (DOES_HAVE_QUERY(url)) + argv[ARG_QUERY] = UTF8STRING(isolate, url.query); + if (DOES_HAVE_FRAGMENT(url)) + argv[ARG_FRAGMENT] = UTF8STRING(isolate, url.fragment); + if (url.port > -1) + argv[ARG_PORT] = Integer::New(isolate, url.port); + if (DOES_HAVE_PATH(url)) + argv[ARG_PATH] = Copy(isolate, url.path); + } + + cb->Call(context, recv, 9, argv); + } + + static void Parse(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 5); + CHECK(args[0]->IsString()); + CHECK(args[2]->IsUndefined() || + args[2]->IsNull() || + args[2]->IsObject()); + CHECK(args[3]->IsUndefined() || + args[3]->IsNull() || + args[3]->IsObject()); + CHECK(args[4]->IsFunction()); + Utf8Value input(env->isolate(), args[0]); + enum url_parse_state override = kUnknownState; + if (args[1]->IsNumber()) + override = (enum url_parse_state)(args[1]->Uint32Value()); + + Parse(env, args.This(), + *input, input.length(), + override, + args[2].As(), + args[3].As(), + args[4].As()); + } + + static void EncodeAuthSet(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); + Utf8Value value(env->isolate(), args[0]); + std::string output; + const size_t len = value.length(); + output.reserve(len); + for (size_t n = 0; n < len; n++) { + const char ch = (*value)[n]; + AppendOrEscape(&output, ch, UserinfoEncodeSet); + } + args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), + output.c_str(), + v8::NewStringType::kNormal).ToLocalChecked()); + } + + static void DomainToASCII(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); + Utf8Value value(env->isolate(), args[0]); + + url_host host{{""}, HOST_TYPE_DOMAIN}; + ParseHost(&host, *value, value.length()); + if (host.type == HOST_TYPE_FAILED) { + args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); + return; + } + std::string out; + WriteHost(&host, &out); + args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), + out.c_str(), + v8::NewStringType::kNormal).ToLocalChecked()); + } + + static void DomainToUnicode(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); + Utf8Value value(env->isolate(), args[0]); + + url_host host{{""}, HOST_TYPE_DOMAIN}; + ParseHost(&host, *value, value.length(), true); + if (host.type == HOST_TYPE_FAILED) { + args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); + return; + } + std::string out; + WriteHost(&host, &out); + args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), + out.c_str(), + v8::NewStringType::kNormal).ToLocalChecked()); + } + + static void Init(Local target, + Local unused, + Local context, + void* priv) { + Environment* env = Environment::GetCurrent(context); + env->SetMethod(target, "parse", Parse); + env->SetMethod(target, "encodeAuth", EncodeAuthSet); + env->SetMethod(target, "domainToASCII", DomainToASCII); + env->SetMethod(target, "domainToUnicode", DomainToUnicode); + +#define XX(name, _) NODE_DEFINE_CONSTANT(target, name); + FLAGS(XX) +#undef XX + +#define XX(name) NODE_DEFINE_CONSTANT(target, name); + ARGS(XX) + PARSESTATES(XX) +#undef XX + } +} // namespace url +} // namespace node + +NODE_MODULE_CONTEXT_AWARE_BUILTIN(url, node::url::Init) diff --git a/src/node_url.h b/src/node_url.h new file mode 100644 index 00000000000000..198c29938b7d22 --- /dev/null +++ b/src/node_url.h @@ -0,0 +1,538 @@ +#ifndef SRC_NODE_URL_H_ +#define SRC_NODE_URL_H_ + +#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#include "node.h" +#include + +namespace node { +namespace url { + +#define BIT_AT(a, i) \ + (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ + (1 << ((unsigned int) (i) & 7)))) +#define TAB_AND_NEWLINE(ch) \ + (ch == 0x09 || ch == 0x0a || ch == 0x0d) +#define ASCII_DIGIT(ch) \ + (ch >= 0x30 && ch <= 0x39) +#define ASCII_HEX_DIGIT(ch) \ + (ASCII_DIGIT(ch) || (ch >= 0x41 && ch <= 0x46) || (ch >= 0x61 && ch <= 0x66)) +#define ASCII_ALPHA(ch) \ + ((ch >= 0x41 && ch <= 0x5a) || (ch >= 0x61 && ch <= 0x7a)) +#define ASCII_ALPHANUMERIC(ch) \ + (ASCII_DIGIT(ch) || ASCII_ALPHA(ch)) +#define TO_LOWER(ch) \ + (ASCII_ALPHA(ch) ? (ch | 0x20) : ch) +#define SCHEME_CHAR(ch) \ + (ASCII_ALPHANUMERIC(ch) || ch == '+' || ch == '-' || ch == '.') +#define WINDOWS_DRIVE_LETTER(ch, next) \ + (ASCII_ALPHA(ch) && (next == ':' || next == '|')) +#define NORMALIZED_WINDOWS_DRIVE_LETTER(str) \ + (str.length() == 2 && \ + ASCII_ALPHA(str[0]) && \ + str[1] == ':') + +static const char* hex[256] = { + "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", + "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", + "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", + "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", + "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27", + "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F", + "%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37", + "%38", "%39", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F", + "%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47", + "%48", "%49", "%4A", "%4B", "%4C", "%4D", "%4E", "%4F", + "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57", + "%58", "%59", "%5A", "%5B", "%5C", "%5D", "%5E", "%5F", + "%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67", + "%68", "%69", "%6A", "%6B", "%6C", "%6D", "%6E", "%6F", + "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77", + "%78", "%79", "%7A", "%7B", "%7C", "%7D", "%7E", "%7F", + "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", + "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", + "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", + "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", + "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", + "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", + "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", + "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", + "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", + "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", + "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", + "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", + "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", + "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", + "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", + "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF" +}; + +static const uint8_t SIMPLE_ENCODE_SET[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 +}; + +static const uint8_t DEFAULT_ENCODE_SET[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 +}; + +static const uint8_t USERINFO_ENCODE_SET[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 40 41 42 43 44 45 46 47 + 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 +}; + +static const uint8_t QUERY_ENCODE_SET[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 +}; + +// Must return true if the character is to be percent-encoded +typedef bool (*must_escape_cb)(const unsigned char ch); + +// Appends ch to str. If test(ch) returns true, the ch will +// be percent-encoded then appended. +static inline void AppendOrEscape(std::string* str, + const unsigned char ch, + must_escape_cb test) { + if (test(ch)) + *str += hex[ch]; + else + *str += ch; +} + +static inline bool SimpleEncodeSet(const unsigned char ch) { + return BIT_AT(SIMPLE_ENCODE_SET, ch); +} + +static inline bool DefaultEncodeSet(const unsigned char ch) { + return BIT_AT(DEFAULT_ENCODE_SET, ch); +} + +static inline bool UserinfoEncodeSet(const unsigned char ch) { + return BIT_AT(USERINFO_ENCODE_SET, ch); +} + +static inline bool QueryEncodeSet(const unsigned char ch) { + return BIT_AT(QUERY_ENCODE_SET, ch); +} + +static inline unsigned hex2bin(const char ch) { + if (ch >= '0' && ch <= '9') + return ch - '0'; + if (ch >= 'A' && ch <= 'F') + return 10 + (ch - 'A'); + if (ch >= 'a' && ch <= 'f') + return 10 + (ch - 'a'); + return static_cast(-1); +} + +static inline int PercentDecode(const char* input, + size_t len, + std::string* dest) { + if (len == 0) + return 0; + dest->reserve(len); + const char* pointer = input; + const char* end = input + len; + size_t remaining = pointer - end - 1; + while (pointer < end) { + const char ch = pointer[0]; + remaining = (end - pointer) + 1; + if (ch != '%' || remaining < 2 || + (ch == '%' && + (!ASCII_HEX_DIGIT(pointer[1]) || + !ASCII_HEX_DIGIT(pointer[2])))) { + *dest += ch; + pointer++; + continue; + } else { + unsigned a = hex2bin(pointer[1]); + unsigned b = hex2bin(pointer[2]); + char c = static_cast(a * 16 + b); + *dest += static_cast(c); + pointer += 3; + } + } + return 0; +} + +#define SPECIALS(XX) \ + XX("ftp:", 21) \ + XX("file:", -1) \ + XX("gopher:", 70) \ + XX("http:", 80) \ + XX("https:", 443) \ + XX("ws:", 80) \ + XX("wss:", 443) + +#define PARSESTATES(XX) \ + XX(kSchemeStart) \ + XX(kScheme) \ + XX(kNoScheme) \ + XX(kSpecialRelativeOrAuthority) \ + XX(kPathOrAuthority) \ + XX(kRelative) \ + XX(kRelativeSlash) \ + XX(kSpecialAuthoritySlashes) \ + XX(kSpecialAuthorityIgnoreSlashes) \ + XX(kAuthority) \ + XX(kHost) \ + XX(kHostname) \ + XX(kPort) \ + XX(kFile) \ + XX(kFileSlash) \ + XX(kFileHost) \ + XX(kPathStart) \ + XX(kPath) \ + XX(kCannotBeBase) \ + XX(kQuery) \ + XX(kFragment) + +#define FLAGS(XX) \ + XX(URL_FLAGS_NONE, 0) \ + XX(URL_FLAGS_FAILED, 0x01) \ + XX(URL_FLAGS_CANNOT_BE_BASE, 0x02) \ + XX(URL_FLAGS_INVALID_PARSE_STATE, 0x04) \ + XX(URL_FLAGS_TERMINATED, 0x08) \ + XX(URL_FLAGS_SPECIAL, 0x10) \ + XX(URL_FLAGS_HAS_SCHEME, 0x20) \ + XX(URL_FLAGS_HAS_USERNAME, 0x40) \ + XX(URL_FLAGS_HAS_PASSWORD, 0x80) \ + XX(URL_FLAGS_HAS_HOST, 0x100) \ + XX(URL_FLAGS_HAS_PATH, 0x200) \ + XX(URL_FLAGS_HAS_QUERY, 0x400) \ + XX(URL_FLAGS_HAS_FRAGMENT, 0x800) + +#define ARGS(XX) \ + XX(ARG_FLAGS) \ + XX(ARG_PROTOCOL) \ + XX(ARG_USERNAME) \ + XX(ARG_PASSWORD) \ + XX(ARG_HOST) \ + XX(ARG_PORT) \ + XX(ARG_PATH) \ + XX(ARG_QUERY) \ + XX(ARG_FRAGMENT) + +static const char kEOL = -1; + +enum url_parse_state { + kUnknownState = -1, +#define XX(name) name, + PARSESTATES(XX) +#undef XX +} url_parse_state; + +enum url_flags { +#define XX(name, val) name = val, + FLAGS(XX) +#undef XX +} url_flags; + +enum url_cb_args { +#define XX(name) name, + ARGS(XX) +#undef XX +} url_cb_args; + +static inline bool IsSpecial(std::string scheme) { +#define XX(name, _) if (scheme == name) return true; + SPECIALS(XX); +#undef XX + return false; +} + +static inline int NormalizePort(std::string scheme, int p) { +#define XX(name, port) if (scheme == name && p == port) return -1; + SPECIALS(XX); +#undef XX + return p; +} + +struct url_data { + int32_t flags = URL_FLAGS_NONE; + int port = -1; + std::string scheme; + std::string username; + std::string password; + std::string host; + std::string query; + std::string fragment; + std::vector path; +}; + +union url_host_value { + std::string domain; + uint32_t ipv4; + uint16_t ipv6[8]; + ~url_host_value() {} +}; + +enum url_host_type { + HOST_TYPE_FAILED = -1, + HOST_TYPE_DOMAIN = 0, + HOST_TYPE_IPV4 = 1, + HOST_TYPE_IPV6 = 2 +}; + +struct url_host { + url_host_value value; + enum url_host_type type; +}; +} // namespace url + +} // namespace node + +#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#endif // SRC_NODE_URL_H_ diff --git a/test/fixtures/url-setter-tests.json b/test/fixtures/url-setter-tests.json new file mode 100644 index 00000000000000..10d6895828b5f7 --- /dev/null +++ b/test/fixtures/url-setter-tests.json @@ -0,0 +1,1134 @@ +{ + "comment": [ + "## Tests for setters of https://url.spec.whatwg.org/#urlutils-members", + "", + "This file contains a JSON object.", + "Other than 'comment', each key is an attribute of the `URL` interface", + "defined in WHATWG’s URL Standard.", + "The values are arrays of test case objects for that attribute.", + "", + "To run a test case for the attribute `attr`:", + "", + "* Create a new `URL` object with the value for the 'href' key", + " the constructor single parameter. (Without a base URL.)", + " This must not throw.", + "* Set the attribute `attr` to (invoke its setter with)", + " with the value of for 'new_value' key.", + "* The value for the 'expected' key is another object.", + " For each `key` / `value` pair of that object,", + " get the attribute `key` (invoke its getter).", + " The returned string must be equal to `value`.", + "", + "Note: the 'href' setter is already covered by urltestdata.json.", + "Source: https://github.com/w3c/web-platform-tests/tree/master/url" + ], + "protocol": [ + { + "comment": "The empty string is not a valid scheme. Setter leaves the URL unchanged.", + "href": "a://example.net", + "new_value": "", + "expected": { + "href": "a://example.net/", + "protocol": "a:" + } + }, + { + "href": "a://example.net", + "new_value": "b", + "expected": { + "href": "b://example.net/", + "protocol": "b:" + } + }, + { + "comment": "Upper-case ASCII is lower-cased", + "href": "a://example.net", + "new_value": "B", + "expected": { + "href": "b://example.net/", + "protocol": "b:" + } + }, + { + "comment": "Non-ASCII is rejected", + "href": "a://example.net", + "new_value": "é", + "expected": { + "href": "a://example.net/", + "protocol": "a:" + } + }, + { + "comment": "No leading digit", + "href": "a://example.net", + "new_value": "0b", + "expected": { + "href": "a://example.net/", + "protocol": "a:" + } + }, + { + "comment": "No leading punctuation", + "href": "a://example.net", + "new_value": "+b", + "expected": { + "href": "a://example.net/", + "protocol": "a:" + } + }, + { + "href": "a://example.net", + "new_value": "bC0+-.", + "expected": { + "href": "bc0+-.://example.net/", + "protocol": "bc0+-.:" + } + }, + { + "comment": "Only some punctuation is acceptable", + "href": "a://example.net", + "new_value": "b,c", + "expected": { + "href": "a://example.net/", + "protocol": "a:" + } + }, + { + "comment": "Non-ASCII is rejected", + "href": "a://example.net", + "new_value": "bé", + "expected": { + "href": "a://example.net/", + "protocol": "a:" + } + }, + { + "comment": "Can’t switch from special scheme to non-special. Note: this may change, see https://github.com/whatwg/url/issues/104", + "href": "http://example.net", + "new_value": "b", + "expected": { + "href": "http://example.net/", + "protocol": "http:" + } + }, + { + "comment": "Cannot-be-a-base URL doesn’t have a host, but URL in a special scheme must.", + "href": "mailto:me@example.net", + "new_value": "http", + "expected": { + "href": "mailto:me@example.net", + "protocol": "mailto:" + } + }, + { + "comment": "Can’t switch from non-special scheme to special. Note: this may change, see https://github.com/whatwg/url/issues/104", + "href": "ssh://me@example.net", + "new_value": "http", + "expected": { + "href": "ssh://me@example.net/", + "protocol": "ssh:" + } + }, + { + "comment": "Stuff after the first ':' is ignored", + "href": "http://example.net", + "new_value": "https:foo : bar", + "expected": { + "href": "https://example.net/", + "protocol": "https:" + } + }, + { + "comment": "Stuff after the first ':' is ignored", + "href": "data:text/html,

Test", + "new_value": "view-source+data:foo : bar", + "expected": { + "href": "view-source+data:text/html,

Test", + "protocol": "view-source+data:" + } + } + ], + "username": [ + { + "comment": "No host means no username", + "href": "file:///home/you/index.html", + "new_value": "me", + "expected": { + "href": "file:///home/you/index.html", + "username": "" + } + }, + { + "comment": "No host means no username", + "href": "unix:/run/foo.socket", + "new_value": "me", + "expected": { + "href": "unix:/run/foo.socket", + "username": "" + } + }, + { + "comment": "Cannot-be-a-base means no username", + "href": "mailto:you@example.net", + "new_value": "me", + "expected": { + "href": "mailto:you@example.net", + "username": "" + } + }, + { + "href": "http://example.net", + "new_value": "me", + "expected": { + "href": "http://me@example.net/", + "username": "me" + } + }, + { + "href": "http://:secret@example.net", + "new_value": "me", + "expected": { + "href": "http://me:secret@example.net/", + "username": "me" + } + }, + { + "href": "http://me@example.net", + "new_value": "", + "expected": { + "href": "http://example.net/", + "username": "" + } + }, + { + "href": "http://me:secret@example.net", + "new_value": "", + "expected": { + "href": "http://:secret@example.net/", + "username": "" + } + }, + { + "comment": "UTF-8 percent encoding with the userinfo encode set.", + "href": "http://example.net", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "expected": { + "href": "http://%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9@example.net/", + "username": "%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9" + } + }, + { + "comment": "Bytes already percent-encoded are left as-is.", + "href": "http://example.net", + "new_value": "%c3%89té", + "expected": { + "href": "http://%c3%89t%C3%A9@example.net/", + "username": "%c3%89t%C3%A9" + } + } + ], + "password": [ + { + "comment": "No host means no password", + "href": "file:///home/me/index.html", + "new_value": "secret", + "expected": { + "href": "file:///home/me/index.html", + "password": "" + } + }, + { + "comment": "No host means no password", + "href": "unix:/run/foo.socket", + "new_value": "secret", + "expected": { + "href": "unix:/run/foo.socket", + "password": "" + } + }, + { + "comment": "Cannot-be-a-base means no password", + "href": "mailto:me@example.net", + "new_value": "secret", + "expected": { + "href": "mailto:me@example.net", + "password": "" + } + }, + { + "href": "http://example.net", + "new_value": "secret", + "expected": { + "href": "http://:secret@example.net/", + "password": "secret" + } + }, + { + "href": "http://me@example.net", + "new_value": "secret", + "expected": { + "href": "http://me:secret@example.net/", + "password": "secret" + } + }, + { + "href": "http://:secret@example.net", + "new_value": "", + "expected": { + "href": "http://example.net/", + "password": "" + } + }, + { + "href": "http://me:secret@example.net", + "new_value": "", + "expected": { + "href": "http://me@example.net/", + "password": "" + } + }, + { + "comment": "UTF-8 percent encoding with the userinfo encode set.", + "href": "http://example.net", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "expected": { + "href": "http://:%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9@example.net/", + "password": "%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9" + } + }, + { + "comment": "Bytes already percent-encoded are left as-is.", + "href": "http://example.net", + "new_value": "%c3%89té", + "expected": { + "href": "http://:%c3%89t%C3%A9@example.net/", + "password": "%c3%89t%C3%A9" + } + } + ], + "host": [ + { + "comment": "Cannot-be-a-base means no host", + "href": "mailto:me@example.net", + "new_value": "example.com", + "expected": { + "href": "mailto:me@example.net", + "host": "" + } + }, + { + "comment": "Cannot-be-a-base means no password", + "href": "data:text/plain,Stuff", + "new_value": "example.net", + "expected": { + "href": "data:text/plain,Stuff", + "host": "" + } + }, + { + "href": "http://example.net", + "new_value": "example.com:8080", + "expected": { + "href": "http://example.com:8080/", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Port number is unchanged if not specified in the new value", + "href": "http://example.net:8080", + "new_value": "example.com", + "expected": { + "href": "http://example.com:8080/", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Port number is unchanges if empty in the new value. Note: this may change, see https://github.com/whatwg/url/pull/113", + "href": "http://example.net:8080", + "new_value": "example.com:", + "expected": { + "href": "http://example.com:8080/", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + + "comment": "The empty host is not valid for special schemes", + "href": "http://example.net", + "new_value": "", + "expected": { + "href": "http://example.net/", + "host": "example.net" + } + }, + { + "comment": "The empty host is OK for non-special schemes", + "href": "view-source+http://example.net/foo", + "new_value": "", + "expected": { + "href": "view-source+http:///foo", + "host": "" + } + }, + { + "comment": "Path-only URLs can gain a host", + "href": "a:/foo", + "new_value": "example.net", + "expected": { + "href": "a://example.net/foo", + "host": "example.net" + } + }, + { + "comment": "IPv4 address syntax is normalized", + "href": "http://example.net", + "new_value": "0x7F000001:8080", + "expected": { + "href": "http://127.0.0.1:8080/", + "host": "127.0.0.1:8080", + "hostname": "127.0.0.1", + "port": "8080" + } + }, + { + "comment": "IPv6 address syntax is normalized", + "href": "http://example.net", + "new_value": "[::0:01]:2", + "expected": { + "href": "http://[::1]:2/", + "host": "[::1]:2", + "hostname": "[::1]", + "port": "2" + } + }, + { + "comment": "Default port number is removed", + "href": "http://example.net", + "new_value": "example.com:80", + "expected": { + "href": "http://example.com/", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Default port number is removed", + "href": "https://example.net", + "new_value": "example.com:443", + "expected": { + "href": "https://example.com/", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Default port number is only removed for the relevant scheme", + "href": "https://example.net", + "new_value": "example.com:80", + "expected": { + "href": "https://example.com:80/", + "host": "example.com:80", + "hostname": "example.com", + "port": "80" + } + }, + { + "comment": "Stuff after a / delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com/stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a / delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com:8080/stuff", + "expected": { + "href": "http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Stuff after a ? delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com?stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a ? delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com:8080?stuff", + "expected": { + "href": "http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Stuff after a # delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com#stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a # delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com:8080#stuff", + "expected": { + "href": "http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Stuff after a \\ delimiter is ignored for special schemes", + "href": "http://example.net/path", + "new_value": "example.com\\stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a \\ delimiter is ignored for special schemes", + "href": "http://example.net/path", + "new_value": "example.com:8080\\stuff", + "expected": { + "href": "http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "\\ is not a delimiter for non-special schemes, and it’s invalid in a domain", + "href": "view-source+http://example.net/path", + "new_value": "example.com\\stuff", + "expected": { + "href": "view-source+http://example.net/path", + "host": "example.net", + "hostname": "example.net", + "port": "" + } + }, + { + "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", + "href": "view-source+http://example.net/path", + "new_value": "example.com:8080stuff2", + "expected": { + "href": "view-source+http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", + "href": "http://example.net/path", + "new_value": "example.com:8080stuff2", + "expected": { + "href": "http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", + "href": "http://example.net/path", + "new_value": "example.com:8080+2", + "expected": { + "href": "http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Port numbers are 16 bit integers", + "href": "http://example.net/path", + "new_value": "example.com:65535", + "expected": { + "href": "http://example.com:65535/path", + "host": "example.com:65535", + "hostname": "example.com", + "port": "65535" + } + }, + { + "comment": "Port numbers are 16 bit integers, overflowing is an error. Hostname is still set, though.", + "href": "http://example.net/path", + "new_value": "example.com:65536", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + } + ], + "hostname": [ + { + "comment": "Cannot-be-a-base means no host", + "href": "mailto:me@example.net", + "new_value": "example.com", + "expected": { + "href": "mailto:me@example.net", + "host": "" + } + }, + { + "comment": "Cannot-be-a-base means no password", + "href": "data:text/plain,Stuff", + "new_value": "example.net", + "expected": { + "href": "data:text/plain,Stuff", + "host": "" + } + }, + { + "href": "http://example.net:8080", + "new_value": "example.com", + "expected": { + "href": "http://example.com:8080/", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "The empty host is not valid for special schemes", + "href": "http://example.net", + "new_value": "", + "expected": { + "href": "http://example.net/", + "host": "example.net" + } + }, + { + "comment": "The empty host is OK for non-special schemes", + "href": "view-source+http://example.net/foo", + "new_value": "", + "expected": { + "href": "view-source+http:///foo", + "host": "" + } + }, + { + "comment": "Path-only URLs can gain a host", + "href": "a:/foo", + "new_value": "example.net", + "expected": { + "href": "a://example.net/foo", + "host": "example.net" + } + }, + { + "comment": "IPv4 address syntax is normalized", + "href": "http://example.net:8080", + "new_value": "0x7F000001", + "expected": { + "href": "http://127.0.0.1:8080/", + "host": "127.0.0.1:8080", + "hostname": "127.0.0.1", + "port": "8080" + } + }, + { + "comment": "IPv6 address syntax is normalized", + "href": "http://example.net", + "new_value": "[::0:01]", + "expected": { + "href": "http://[::1]/", + "host": "[::1]", + "hostname": "[::1]", + "port": "" + } + }, + { + "comment": "Stuff after a : delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com:8080", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a : delimiter is ignored", + "href": "http://example.net:8080/path", + "new_value": "example.com:", + "expected": { + "href": "http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Stuff after a / delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com/stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a ? delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com?stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a # delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com#stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a \\ delimiter is ignored for special schemes", + "href": "http://example.net/path", + "new_value": "example.com\\stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "\\ is not a delimiter for non-special schemes, and it’s invalid in a domain", + "href": "view-source+http://example.net/path", + "new_value": "example.com\\stuff", + "expected": { + "href": "view-source+http://example.net/path", + "host": "example.net", + "hostname": "example.net", + "port": "" + } + } + ], + "port": [ + { + "href": "http://example.net", + "new_value": "8080", + "expected": { + "href": "http://example.net:8080/", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Port number is unchanged if empty in the new value. Note: this may change, see https://github.com/whatwg/url/pull/113", + "href": "http://example.net:8080", + "new_value": "", + "expected": { + "href": "http://example.net:8080/", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Default port number is removed", + "href": "http://example.net:8080", + "new_value": "80", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net", + "port": "" + } + }, + { + "comment": "Default port number is removed", + "href": "https://example.net:4433", + "new_value": "443", + "expected": { + "href": "https://example.net/", + "host": "example.net", + "hostname": "example.net", + "port": "" + } + }, + { + "comment": "Default port number is only removed for the relevant scheme", + "href": "https://example.net", + "new_value": "80", + "expected": { + "href": "https://example.net:80/", + "host": "example.net:80", + "hostname": "example.net", + "port": "80" + } + }, + { + "comment": "Stuff after a / delimiter is ignored", + "href": "http://example.net/path", + "new_value": "8080/stuff", + "expected": { + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Stuff after a ? delimiter is ignored", + "href": "http://example.net/path", + "new_value": "8080?stuff", + "expected": { + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Stuff after a # delimiter is ignored", + "href": "http://example.net/path", + "new_value": "8080#stuff", + "expected": { + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Stuff after a \\ delimiter is ignored for special schemes", + "href": "http://example.net/path", + "new_value": "8080\\stuff", + "expected": { + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", + "href": "view-source+http://example.net/path", + "new_value": "8080stuff2", + "expected": { + "href": "view-source+http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", + "href": "http://example.net/path", + "new_value": "8080stuff2", + "expected": { + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", + "href": "http://example.net/path", + "new_value": "8080+2", + "expected": { + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Port numbers are 16 bit integers", + "href": "http://example.net/path", + "new_value": "65535", + "expected": { + "href": "http://example.net:65535/path", + "host": "example.net:65535", + "hostname": "example.net", + "port": "65535" + } + }, + { + "comment": "Port numbers are 16 bit integers, overflowing is an error", + "href": "http://example.net:8080/path", + "new_value": "65536", + "expected": { + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + } + ], + "pathname": [ + { + "comment": "Cannot-be-a-base don’t have a path", + "href": "mailto:me@example.net", + "new_value": "/foo", + "expected": { + "href": "mailto:me@example.net", + "pathname": "me@example.net" + } + }, + { + "href": "unix:/run/foo.socket?timeout=10", + "new_value": "/var/log/../run/bar.socket", + "expected": { + "href": "unix:/var/run/bar.socket?timeout=10", + "pathname": "/var/run/bar.socket" + } + }, + { + "href": "https://example.net#nav", + "new_value": "home", + "expected": { + "href": "https://example.net/home#nav", + "pathname": "/home" + } + }, + { + "href": "https://example.net#nav", + "new_value": "../home", + "expected": { + "href": "https://example.net/home#nav", + "pathname": "/home" + } + }, + { + "comment": "\\ is a segment delimiter for 'special' URLs", + "href": "http://example.net/home?lang=fr#nav", + "new_value": "\\a\\%2E\\b\\%2e.\\c", + "expected": { + "href": "http://example.net/a/c?lang=fr#nav", + "pathname": "/a/c" + } + }, + { + "comment": "\\ is *not* a segment delimiter for non-'special' URLs", + "href": "view-source+http://example.net/home?lang=fr#nav", + "new_value": "\\a\\%2E\\b\\%2e.\\c", + "expected": { + "href": "view-source+http://example.net/\\a\\.\\b\\..\\c?lang=fr#nav", + "pathname": "/\\a\\.\\b\\..\\c" + } + }, + { + "comment": "UTF-8 percent encoding with the default encode set. Tabs and newlines are removed.", + "href": "a:/", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "expected": { + "href": "a:/%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9", + "pathname": "/%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9" + } + }, + { + "comment": "Bytes already percent-encoded are left as-is, except %2E.", + "href": "http://example.net", + "new_value": "%2e%2E%c3%89té", + "expected": { + "href": "http://example.net/..%c3%89t%C3%A9", + "pathname": "/..%c3%89t%C3%A9" + } + } + ], + "search": [ + { + "href": "https://example.net#nav", + "new_value": "lang=fr", + "expected": { + "href": "https://example.net/?lang=fr#nav", + "search": "?lang=fr" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "lang=fr", + "expected": { + "href": "https://example.net/?lang=fr#nav", + "search": "?lang=fr" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "?lang=fr", + "expected": { + "href": "https://example.net/?lang=fr#nav", + "search": "?lang=fr" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "??lang=fr", + "expected": { + "href": "https://example.net/??lang=fr#nav", + "search": "??lang=fr" + } + }, + { +"skip": "we do not pass this, but we do match chromes behavior", + "href": "https://example.net?lang=en-US#nav", + "new_value": "?", + "expected": { + "href": "https://example.net/?#nav", + "search": "" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "", + "expected": { + "href": "https://example.net/#nav", + "search": "" + } + }, + { + "href": "https://example.net?lang=en-US", + "new_value": "", + "expected": { + "href": "https://example.net/", + "search": "" + } + }, + { + "href": "https://example.net", + "new_value": "", + "expected": { + "href": "https://example.net/", + "search": "" + } + }, + { + "comment": "UTF-8 percent encoding with the query encode set. Tabs and newlines are removed.", + "href": "a:/", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "expected": { + "href": "a:/?%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9", + "search": "?%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9" + } + }, + { + "comment": "Bytes already percent-encoded are left as-is", + "href": "http://example.net", + "new_value": "%c3%89té", + "expected": { + "href": "http://example.net/?%c3%89t%C3%A9", + "search": "?%c3%89t%C3%A9" + } + } + ], + "hash": [ + { + "href": "https://example.net", + "new_value": "main", + "expected": { + "href": "https://example.net/#main", + "hash": "#main" + } + }, + { + "href": "https://example.net#nav", + "new_value": "main", + "expected": { + "href": "https://example.net/#main", + "hash": "#main" + } + }, + { + "href": "https://example.net?lang=en-US", + "new_value": "##nav", + "expected": { + "href": "https://example.net/?lang=en-US##nav", + "hash": "##nav" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "#main", + "expected": { + "href": "https://example.net/?lang=en-US#main", + "hash": "#main" + } + }, + { +"skip": "we do not pass this, but we do match chromes behavior", + "href": "https://example.net?lang=en-US#nav", + "new_value": "#", + "expected": { + "href": "https://example.net/?lang=en-US#", + "hash": "" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "", + "expected": { + "href": "https://example.net/?lang=en-US", + "hash": "" + } + }, + { + "comment": "No percent-encoding at all (!); nuls, tabs, and newlines are removed", + "href": "a:/", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "expected": { + "href": "a:/#\u0001\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "hash": "#\u0001\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé" + } + }, + { + "comment": "Bytes already percent-encoded are left as-is", + "href": "http://example.net", + "new_value": "%c3%89té", + "expected": { + "href": "http://example.net/#%c3%89té", + "hash": "#%c3%89té" + } + } + ] +} diff --git a/test/parallel/test-whatwg-url-parsing.js b/test/parallel/test-whatwg-url-parsing.js new file mode 100644 index 00000000000000..568bc821758dd5 --- /dev/null +++ b/test/parallel/test-whatwg-url-parsing.js @@ -0,0 +1,122 @@ +'use strict'; + +const common = require('../common'); +const URL = require('url').URL; +const path = require('path'); +const assert = require('assert'); +const tests = require(path.join(common.fixturesDir, 'url-tests.json')); + +for (const test of tests) { + if (typeof test === 'string') + continue; + + if (test.failure) { + assert.throws(() => new URL(test.input, test.base), /Invalid URL/); + } else { + assert.doesNotThrow(() => { + const url = new URL(test.input, test.base); + assert.strictEqual(url.href, test.href); + }); + } +} + +const additional_tests = [ + { + 'url': 'tftp://foobar.com/someconfig;mode=netascii', + 'protocol': 'tftp:', + 'hostname': 'foobar.com', + 'pathname': '/someconfig;mode=netascii' + }, + { + 'url': 'telnet://user:pass@foobar.com:23/', + 'protocol': 'telnet:', + 'username': 'user', + 'password': 'pass', + 'hostname': 'foobar.com', + 'port': '23', + 'pathname': '/' + }, + { + 'url': 'ut2004://10.10.10.10:7777/Index.ut2', + 'protocol': 'ut2004:', + 'hostname': '10.10.10.10', + 'port': '7777', + 'pathname': '/Index.ut2' + }, + { + 'url': 'redis://foo:bar@somehost:6379/0?baz=bam&qux=baz', + 'protocol': 'redis:', + 'username': 'foo', + 'password': 'bar', + 'hostname': 'somehost', + 'port': '6379', + 'pathname': '/0', + 'search': '?baz=bam&qux=baz' + }, + { + 'url': 'rsync://foo@host:911/sup', + 'protocol': 'rsync:', + 'username': 'foo', + 'hostname': 'host', + 'port': '911', + 'pathname': '/sup' + }, + { + 'url': 'git://github.com/foo/bar.git', + 'protocol': 'git:', + 'hostname': 'github.com', + 'pathname': '/foo/bar.git' + }, + { + 'url': 'irc://myserver.com:6999/channel?passwd', + 'protocol': 'irc:', + 'hostname': 'myserver.com', + 'port': '6999', + 'pathname': '/channel', + 'search': '?passwd' + }, + { + 'url': 'dns://fw.example.org:9999/foo.bar.org?type=TXT', + 'protocol': 'dns:', + 'hostname': 'fw.example.org', + 'port': '9999', + 'pathname': '/foo.bar.org', + 'search': '?type=TXT' + }, + { + 'url': 'ldap://localhost:389/ou=People,o=JNDITutorial', + 'protocol': 'ldap:', + 'hostname': 'localhost', + 'port': '389', + 'pathname': '/ou=People,o=JNDITutorial' + }, + { + 'url': 'git+https://github.com/foo/bar', + 'protocol': 'git+https:', + 'hostname': 'github.com', + 'pathname': '/foo/bar' + }, + { + 'url': 'urn:ietf:rfc:2648', + 'protocol': 'urn:', + 'pathname': 'ietf:rfc:2648' + }, + { + 'url': 'tag:joe@example.org,2001:foo/bar', + 'protocol': 'tag:', + 'pathname': 'joe@example.org,2001:foo/bar' + } +]; + +additional_tests.forEach((test) => { + const u = new URL(test.url); + if (test.protocol) assert.strictEqual(test.protocol, u.protocol); + if (test.username) assert.strictEqual(test.username, u.username); + if (test.password) assert.strictEqual(test.password, u.password); + if (test.hostname) assert.strictEqual(test.hostname, u.hostname); + if (test.host) assert.strictEqual(test.host, u.host); + if (test.port !== undefined) assert.strictEqual(test.port, u.port); + if (test.pathname) assert.strictEqual(test.pathname, u.pathname); + if (test.search) assert.strictEqual(test.search, u.search); + if (test.hash) assert.strictEqual(test.hash, u.hash); +}); diff --git a/test/parallel/test-whatwg-url-searchparams.js b/test/parallel/test-whatwg-url-searchparams.js new file mode 100644 index 00000000000000..99e2e6a748c264 --- /dev/null +++ b/test/parallel/test-whatwg-url-searchparams.js @@ -0,0 +1,36 @@ +'use strict'; + +require('../common'); +const assert = require('assert'); +const URL = require('url').URL; + +const serialized = 'a=a&a=1&a=true&a=undefined&a=null&a=%5Bobject%20Object%5D'; +const values = ['a', 1, true, undefined, null, {}]; + +const m = new URL('http://example.org'); +const sp = m.searchParams; + +assert(sp); +assert.strictEqual(sp.toString(), ''); +assert.strictEqual(m.search, ''); + +assert(!sp.has('a')); +values.forEach((i) => sp.set('a', i)); +assert(sp.has('a')); +assert.strictEqual(sp.get('a'), '[object Object]'); +sp.delete('a'); +assert(!sp.has('a')); +values.forEach((i) => sp.append('a', i)); +assert(sp.has('a')); +assert.strictEqual(sp.getAll('a').length, 6); +assert.strictEqual(sp.get('a'), 'a'); + +assert.strictEqual(sp.toString(), serialized); + +assert.strictEqual(m.search, `?${serialized}`); + +var key, val, n = 0; +for ([key, val] of sp) { + assert.strictEqual(key, 'a'); + assert.strictEqual(val, String(values[n++])); +} diff --git a/test/parallel/test-whatwg-url-setters.js b/test/parallel/test-whatwg-url-setters.js new file mode 100644 index 00000000000000..4c29ef098cc463 --- /dev/null +++ b/test/parallel/test-whatwg-url-setters.js @@ -0,0 +1,24 @@ +'use strict'; + +const common = require('../common'); +const path = require('path'); +const URL = require('url').URL; +const assert = require('assert'); +const attrs = require(path.join(common.fixturesDir, 'url-setter-tests.json')); + +for (const attr in attrs) { + if (attr === 'comment') + continue; + const tests = attrs[attr]; + var n = 0; + for (const test of tests) { + if (test.skip) continue; + n++; + const url = new URL(test.href); + url[attr] = test.new_value; + for (const test_attr in test.expected) { + assert.equal(test.expected[test_attr], url[test_attr], + `${n} ${attr} ${test_attr} ${test.href} ${test.comment}`); + } + } +}