From 7d759c19941948a63ea61474e0c7f78813e46a92 Mon Sep 17 00:00:00 2001 From: Luca Casonato Date: Sat, 14 Aug 2021 13:22:05 +0200 Subject: [PATCH 1/4] Update reference tests This commit updates the reference tests. Quite a few of the new tests are failing now. They need to be fixed. --- url/tests/data.rs | 51 +- url/tests/setters_tests.json | 240 +++++- url/tests/urltestdata.json | 1499 ++++++++++++++++++++++++++++++---- 3 files changed, 1594 insertions(+), 196 deletions(-) diff --git a/url/tests/data.rs b/url/tests/data.rs index b72c33306..f17620388 100644 --- a/url/tests/data.rs +++ b/url/tests/data.rs @@ -16,7 +16,7 @@ use url::{quirks, Url}; #[test] fn urltestdata() { - // Copied form https://github.com/w3c/web-platform-tests/blob/master/url/ + // Copied form https://github.com/web-platform-tests/wpt/blob/master/url/ let mut json = Value::from_str(include_str!("urltestdata.json")) .expect("JSON parse error in urltestdata.json"); @@ -26,25 +26,33 @@ fn urltestdata() { continue; // ignore comments } - let base = entry.take_string("base"); + let maybe_base = entry + .take_key("base") + .expect("missing base key") + .maybe_string(); let input = entry.take_string("input"); let failure = entry.take_key("failure").is_some(); - let base = match Url::parse(&base) { - Ok(base) => base, - Err(_) if failure => continue, - Err(message) => { - eprint_failure( - format!(" failed: error parsing base {:?}: {}", base, message), - &format!("parse base for {:?}", input), - None, - ); - passed = false; - continue; - } + let res = if let Some(base) = maybe_base { + let base = match Url::parse(&base) { + Ok(base) => base, + Err(_) if failure => continue, + Err(message) => { + eprint_failure( + format!(" failed: error parsing base {:?}: {}", base, message), + &format!("parse base for {:?}", input), + None, + ); + passed = false; + continue; + } + }; + base.join(&input) + } else { + Url::parse(&input) }; - let url = match (base.join(&input), failure) { + let url = match (res, failure) { (Ok(url), false) => url, (Err(_), true) => continue, (Err(message), false) => { @@ -153,6 +161,7 @@ fn check_invariants(url: &Url, name: &str, comment: Option<&str>) -> bool { trait JsonExt { fn take_key(&mut self, key: &str) -> Option; fn string(self) -> String; + fn maybe_string(self) -> Option; fn take_string(&mut self, key: &str) -> String; } @@ -162,10 +171,14 @@ impl JsonExt for Value { } fn string(self) -> String { - if let Value::String(s) = self { - s - } else { - panic!("Not a Value::String") + self.maybe_string().expect("") + } + + fn maybe_string(self) -> Option { + match self { + Value::String(s) => Some(s), + Value::Null => None, + _ => panic!("Not a Value::String or Value::Null"), } } diff --git a/url/tests/setters_tests.json b/url/tests/setters_tests.json index 4280032a2..3380c524e 100644 --- a/url/tests/setters_tests.json +++ b/url/tests/setters_tests.json @@ -1,6 +1,6 @@ { "comment": [ - "AS OF https://github.com/jsdom/whatwg-url/commit/35f04dfd3048cf6362f4398745bb13375c5020c2", + "AS OF https://github.com/web-platform-tests/wpt/commit/77d54aa9e0405f737987b59331f3584e3e1c26f9", "## Tests for setters of https://url.spec.whatwg.org/#urlutils-members", "", "This file contains a JSON object.", @@ -120,11 +120,11 @@ } }, { - "href": "gopher://example.net:1234", + "href": "https://example.net:1234", "new_value": "file", "expected": { - "href": "gopher://example.net:1234", - "protocol": "gopher:" + "href": "https://example.net:1234/", + "protocol": "https:" } }, { @@ -146,7 +146,7 @@ }, { "href": "file:///test", - "new_value": "gopher", + "new_value": "https", "expected": { "href": "file:///test", "protocol": "file:" @@ -343,7 +343,7 @@ { "comment": "UTF-8 percent encoding with the userinfo encode set.", "href": "http://example.net", - "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~€Éé", "expected": { "href": "http://%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9@example.net/", "username": "%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9" @@ -446,7 +446,7 @@ { "comment": "UTF-8 percent encoding with the userinfo encode set.", "href": "http://example.net", - "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~€Éé", "expected": { "href": "http://:%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9@example.net/", "password": "%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9" @@ -499,7 +499,7 @@ }, { "href": "sc://x/", - "new_value": "\u0009", + "new_value": "\t", "expected": { "href": "sc:///", "host": "", @@ -508,7 +508,7 @@ }, { "href": "sc://x/", - "new_value": "\u000A", + "new_value": "\n", "expected": { "href": "sc:///", "host": "", @@ -517,7 +517,7 @@ }, { "href": "sc://x/", - "new_value": "\u000D", + "new_value": "\r", "expected": { "href": "sc:///", "host": "", @@ -696,7 +696,7 @@ "host": "[2001:db8::2]:4002", "hostname": "[2001:db8::2]", "port": "4002" - } + } }, { "comment": "Default port number is removed", @@ -962,6 +962,16 @@ "port": "" } }, + { + "href": "file://hi/x", + "new_value": "", + "expected": { + "href": "file:///x", + "host": "", + "hostname": "", + "port": "" + } + }, { "href": "sc://test@test/", "new_value": "", @@ -981,6 +991,26 @@ "hostname": "test", "port": "12" } + }, + { + "comment": "Leading / is not stripped", + "href": "http://example.com/", + "new_value": "///bad.com", + "expected": { + "href": "http://example.com/", + "host": "example.com", + "hostname": "example.com" + } + }, + { + "comment": "Leading / is not stripped", + "href": "sc://example.com/", + "new_value": "///bad.com", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } } ], "hostname": [ @@ -996,7 +1026,7 @@ }, { "href": "sc://x/", - "new_value": "\u0009", + "new_value": "\t", "expected": { "href": "sc:///", "host": "", @@ -1005,7 +1035,7 @@ }, { "href": "sc://x/", - "new_value": "\u000A", + "new_value": "\n", "expected": { "href": "sc:///", "host": "", @@ -1014,7 +1044,7 @@ }, { "href": "sc://x/", - "new_value": "\u000D", + "new_value": "\r", "expected": { "href": "sc:///", "host": "", @@ -1144,24 +1174,24 @@ } }, { - "comment": "Stuff after a : delimiter is ignored", + "comment": ": delimiter invalidates entire value", "href": "http://example.net/path", "new_value": "example.com:8080", "expected": { - "href": "http://example.com/path", - "host": "example.com", - "hostname": "example.com", + "href": "http://example.net/path", + "host": "example.net", + "hostname": "example.net", "port": "" } }, { - "comment": "Stuff after a : delimiter is ignored", + "comment": ": delimiter invalidates entire value", "href": "http://example.net:8080/path", "new_value": "example.com:", "expected": { - "href": "http://example.com:8080/path", - "host": "example.com:8080", - "hostname": "example.com", + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", "port": "8080" } }, @@ -1286,6 +1316,16 @@ "port": "" } }, + { + "href": "file://hi/x", + "new_value": "", + "expected": { + "href": "file:///x", + "host": "", + "hostname": "", + "port": "" + } + }, { "href": "sc://test@test/", "new_value": "", @@ -1305,6 +1345,47 @@ "hostname": "test", "port": "12" } + }, + { + "comment": "Drop /. from path", + "href": "non-spec:/.//p", + "new_value": "h", + "expected": { + "href": "non-spec://h//p", + "host": "h", + "hostname": "h", + "pathname": "//p" + } + }, + { + "href": "non-spec:/.//p", + "new_value": "", + "expected": { + "href": "non-spec:////p", + "host": "", + "hostname": "", + "pathname": "//p" + } + }, + { + "comment": "Leading / is not stripped", + "href": "http://example.com/", + "new_value": "///bad.com", + "expected": { + "href": "http://example.com/", + "host": "example.com", + "hostname": "example.com" + } + }, + { + "comment": "Leading / is not stripped", + "href": "sc://example.com/", + "new_value": "///bad.com", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } } ], "port": [ @@ -1531,6 +1612,51 @@ "pathname": "me@example.net" } }, + { + "comment": "Special URLs cannot have their paths erased", + "href": "file:///some/path", + "new_value": "", + "expected": { + "href": "file:///", + "pathname": "/" + } + }, + { + "comment": "Non-special URLs can have their paths erased", + "href": "foo://somehost/some/path", + "new_value": "", + "expected": { + "href": "foo://somehost", + "pathname": "" + } + }, + { + "comment": "Non-special URLs with an empty host can have their paths erased", + "href": "foo:///some/path", + "new_value": "", + "expected": { + "href": "foo://", + "pathname": "" + } + }, + { + "comment": "Path-only URLs cannot have their paths erased", + "href": "foo:/some/path", + "new_value": "", + "expected": { + "href": "foo:/", + "pathname": "/" + } + }, + { + "comment": "Path-only URLs always have an initial slash", + "href": "foo:/some/path", + "new_value": "test", + "expected": { + "href": "foo:/test", + "pathname": "/test" + } + }, { "href": "unix:/run/foo.socket?timeout=10", "new_value": "/var/log/../run/bar.socket", @@ -1576,7 +1702,7 @@ { "comment": "UTF-8 percent encoding with the default encode set. Tabs and newlines are removed.", "href": "a:/", - "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~€Éé", "expected": { "href": "a:/%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9", "pathname": "/%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9" @@ -1627,13 +1753,31 @@ "pathname": "/%23" } }, + { + "comment": "? doesn't mess up encoding", + "href": "http://example.net", + "new_value": "/?é", + "expected": { + "href": "http://example.net/%3F%C3%A9", + "pathname": "/%3F%C3%A9" + } + }, + { + "comment": "# doesn't mess up encoding", + "href": "http://example.net", + "new_value": "/#é", + "expected": { + "href": "http://example.net/%23%C3%A9", + "pathname": "/%23%C3%A9" + } + }, { "comment": "File URLs and (back)slashes", "href": "file://monkey/", "new_value": "\\\\", "expected": { - "href": "file://monkey/", - "pathname": "/" + "href": "file://monkey//", + "pathname": "//" } }, { @@ -1641,8 +1785,8 @@ "href": "file:///unicorn", "new_value": "//\\/", "expected": { - "href": "file:///", - "pathname": "/" + "href": "file://////", + "pathname": "////" } }, { @@ -1650,8 +1794,42 @@ "href": "file:///unicorn", "new_value": "//monkey/..//", "expected": { - "href": "file:///", - "pathname": "/" + "href": "file://///", + "pathname": "///" + } + }, + { + "comment": "Serialize /. in path", + "href": "non-spec:/", + "new_value": "/.//p", + "expected": { + "href": "non-spec:/.//p", + "pathname": "//p" + } + }, + { + "href": "non-spec:/", + "new_value": "/..//p", + "expected": { + "href": "non-spec:/.//p", + "pathname": "//p" + } + }, + { + "href": "non-spec:/", + "new_value": "//p", + "expected": { + "href": "non-spec:/.//p", + "pathname": "//p" + } + }, + { + "comment": "Drop /. from path", + "href": "non-spec:/.//", + "new_value": "p", + "expected": { + "href": "non-spec:/p", + "pathname": "/p" } } ], @@ -1723,7 +1901,7 @@ { "comment": "UTF-8 percent encoding with the query encode set. Tabs and newlines are removed.", "href": "a:/", - "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~€Éé", "expected": { "href": "a:/?%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9", "search": "?%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9" @@ -1831,7 +2009,7 @@ { "comment": "Simple percent-encoding; tabs and newlines are removed", "href": "a:/", - "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~€Éé", "expected": { "href": "a:/#%00%01%1F%20!%22#$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_%60az{|}~%7F%C2%80%C2%81%C3%89%C3%A9", "hash": "#%00%01%1F%20!%22#$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_%60az{|}~%7F%C2%80%C2%81%C3%89%C3%A9" diff --git a/url/tests/urltestdata.json b/url/tests/urltestdata.json index 554e61914..361a65c7a 100644 --- a/url/tests/urltestdata.json +++ b/url/tests/urltestdata.json @@ -1,6 +1,6 @@ [ + "# AS OF https://github.com/web-platform-tests/wpt/commit/af915c5b7b8628a384f4f8ae132520602782e3bb", "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/script-tests/segments.js", - "# AS OF https://github.com/jsdom/whatwg-url/commit/35f04dfd3048cf6362f4398745bb13375c5020c2", { "input": "http://example\t.\norg", "base": "http://example.org/foo/bar", @@ -540,6 +540,36 @@ "search": "", "hash": "" }, + { + "input": "\\x", + "base": "http://example.org/foo/bar", + "href": "http://example.org/x", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/x", + "search": "", + "hash": "" + }, + { + "input": "\\\\x\\hello", + "base": "http://example.org/foo/bar", + "href": "http://x/hello", + "origin": "http://x", + "protocol": "http:", + "username": "", + "password": "", + "host": "x", + "hostname": "x", + "port": "", + "pathname": "/hello", + "search": "", + "hash": "" + }, { "input": "::", "base": "http://example.org/foo/bar", @@ -3157,7 +3187,8 @@ { "input": "http:/:@/www.example.com", "base": "about:blank", - "failure": true + "failure": true, + "inputCanBeRelative": true }, { "input": "http://user@/www.example.com", @@ -3167,12 +3198,14 @@ { "input": "http:@/www.example.com", "base": "about:blank", - "failure": true + "failure": true, + "inputCanBeRelative": true }, { "input": "http:/@/www.example.com", "base": "about:blank", - "failure": true + "failure": true, + "inputCanBeRelative": true }, { "input": "http://@/www.example.com", @@ -3182,17 +3215,20 @@ { "input": "https:@/www.example.com", "base": "about:blank", - "failure": true + "failure": true, + "inputCanBeRelative": true }, { "input": "http:a:b@/www.example.com", "base": "about:blank", - "failure": true + "failure": true, + "inputCanBeRelative": true }, { "input": "http:/a:b@/www.example.com", "base": "about:blank", - "failure": true + "failure": true, + "inputCanBeRelative": true }, { "input": "http://a:b@/www.example.com", @@ -3202,7 +3238,8 @@ { "input": "http::@/www.example.com", "base": "about:blank", - "failure": true + "failure": true, + "inputCanBeRelative": true }, { "input": "http:a:@www.example.com", @@ -3267,12 +3304,14 @@ { "input": "http:@:www.example.com", "base": "about:blank", - "failure": true + "failure": true, + "inputCanBeRelative": true }, { "input": "http:/@:www.example.com", "base": "about:blank", - "failure": true + "failure": true, + "inputCanBeRelative": true }, { "input": "http://@:www.example.com", @@ -3646,6 +3685,17 @@ "search": "?%EF%BF%BD", "hash": "#%EF%BF%BD" }, + "Domain is ASCII, but a label is invalid IDNA", + { + "input": "http://a.b.c.xn--pokxncvks", + "base": "about:blank", + "failure": true + }, + { + "input": "http://10.0.0.xn--pokxncvks", + "base": "about:blank", + "failure": true + }, "Test name prepping, fullwidth input should be converted to ASCII and NOT IDN-ized. This is 'Go' in fullwidth UTF-8/UTF-16.", { "input": "http://Go.com", @@ -3847,21 +3897,6 @@ "search": "", "hash": "" }, - { - "input": "http://0..0x300/", - "base": "about:blank", - "href": "http://0..0x300/", - "origin": "http://0..0x300", - "protocol": "http:", - "username": "", - "password": "", - "host": "0..0x300", - "hostname": "0..0x300", - "port": "", - "pathname": "/", - "search": "", - "hash": "" - }, "Broken IPv6", { "input": "http://[www.google.com]/", @@ -4619,7 +4654,7 @@ "search": "", "hash": "" }, - "# unknown scheme with non-URL characters in the path", + "# unknown scheme with non-URL characters", { "input": "wow:\uFFFF", "base": "about:blank", @@ -4635,6 +4670,7 @@ "search": "", "hash": "" }, + "NOTE: Uustream there is a test here that can not be parsed by serde because of lone surrogates", "Forbidden host code points", { "input": "http://a@[\\]^_`{|}~@host/", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "foo://%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~@host/", + "origin": "null", + "password": "", + "pathname": "/", + "port": "", + "protocol": "foo:", + "search": "", + "username": "%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~" + }, + { + "input": "wss:// !\"$%&'()*+,-.;<=>@[]^_`{|}~@host/", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "wss://%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~@host/", + "origin": "wss://host", + "password": "", + "pathname": "/", + "port": "", + "protocol": "wss:", + "search": "", + "username": "%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~" + }, + { + "input": "foo://joe: !\"$%&'()*+,-.:;<=>@[\\]^_`{|}~@host/", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "foo://joe:%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~@host/", + "origin": "null", + "password": "%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~", + "pathname": "/", + "port": "", + "protocol": "foo:", + "search": "", + "username": "joe" + }, + { + "input": "wss://joe: !\"$%&'()*+,-.:;<=>@[]^_`{|}~@host/", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "wss://joe:%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~@host/", + "origin": "wss://host", + "password": "%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~", + "pathname": "/", + "port": "", + "protocol": "wss:", + "search": "", + "username": "joe" + }, + { + "input": "foo://!\"$%&'()*+,-.;=_`{}~/", + "base": "about:blank", + "hash": "", + "host": "!\"$%&'()*+,-.;=_`{}~", + "hostname": "!\"$%&'()*+,-.;=_`{}~", + "href": "foo://!\"$%&'()*+,-.;=_`{}~/", + "origin": "null", + "password": "", + "pathname": "/", + "port": "", + "protocol": "foo:", + "search": "", + "username": "" + }, + { + "input": "wss://!\"$&'()*+,-.;=_`{}~/", + "base": "about:blank", + "hash": "", + "host": "!\"$&'()*+,-.;=_`{}~", + "hostname": "!\"$&'()*+,-.;=_`{}~", + "href": "wss://!\"$&'()*+,-.;=_`{}~/", + "origin": "wss://!\"$&'()*+,-.;=_`{}~", + "password": "", + "pathname": "/", + "port": "", + "protocol": "wss:", + "search": "", + "username": "" + }, + { + "input": "foo://host/ !\"$%&'()*+,-./:;<=>@[\\]^_`{|}~", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "foo://host/%20!%22$%&'()*+,-./:;%3C=%3E@[\\]^_%60%7B|%7D~", + "origin": "null", + "password": "", + "pathname": "/%20!%22$%&'()*+,-./:;%3C=%3E@[\\]^_%60%7B|%7D~", + "port": "", + "protocol": "foo:", + "search": "", + "username": "" + }, + { + "input": "wss://host/ !\"$%&'()*+,-./:;<=>@[\\]^_`{|}~", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "wss://host/%20!%22$%&'()*+,-./:;%3C=%3E@[/]^_%60%7B|%7D~", + "origin": "wss://host", + "password": "", + "pathname": "/%20!%22$%&'()*+,-./:;%3C=%3E@[/]^_%60%7B|%7D~", + "port": "", + "protocol": "wss:", + "search": "", + "username": "" + }, + { + "input": "foo://host/dir/? !\"$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "foo://host/dir/?%20!%22$%&'()*+,-./:;%3C=%3E?@[\\]^_`{|}~", + "origin": "null", + "password": "", + "pathname": "/dir/", + "port": "", + "protocol": "foo:", + "search": "?%20!%22$%&'()*+,-./:;%3C=%3E?@[\\]^_`{|}~", + "username": "" + }, + { + "input": "wss://host/dir/? !\"$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "wss://host/dir/?%20!%22$%&%27()*+,-./:;%3C=%3E?@[\\]^_`{|}~", + "origin": "wss://host", + "password": "", + "pathname": "/dir/", + "port": "", + "protocol": "wss:", + "search": "?%20!%22$%&%27()*+,-./:;%3C=%3E?@[\\]^_`{|}~", + "username": "" + }, + { + "input": "foo://host/dir/# !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "base": "about:blank", + "hash": "#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~", + "host": "host", + "hostname": "host", + "href": "foo://host/dir/#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~", + "origin": "null", + "password": "", + "pathname": "/dir/", + "port": "", + "protocol": "foo:", + "search": "", + "username": "" + }, + { + "input": "wss://host/dir/# !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "base": "about:blank", + "hash": "#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~", + "host": "host", + "hostname": "host", + "href": "wss://host/dir/#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~", + "origin": "wss://host", + "password": "", + "pathname": "/dir/", + "port": "", + "protocol": "wss:", + "search": "", + "username": "" + }, + "Ensure that input schemes are not ignored when resolving non-special URLs", + { + "input": "abc:rootless", + "base": "abc://host/path", + "hash": "", + "host": "", + "hostname": "", + "href": "abc:rootless", + "password": "", + "pathname": "rootless", + "port": "", + "protocol": "abc:", + "search": "", + "username": "" + }, + { + "input": "abc:rootless", + "base": "abc:/path", + "hash": "", + "host": "", + "hostname": "", + "href": "abc:rootless", + "password": "", + "pathname": "rootless", + "port": "", + "protocol": "abc:", + "search": "", + "username": "" + }, + { + "input": "abc:rootless", + "base": "abc:path", + "hash": "", + "host": "", + "hostname": "", + "href": "abc:rootless", + "password": "", + "pathname": "rootless", + "port": "", + "protocol": "abc:", + "search": "", + "username": "" + }, + { + "input": "abc:/rooted", + "base": "abc://host/path", + "hash": "", + "host": "", + "hostname": "", + "href": "abc:/rooted", + "password": "", + "pathname": "/rooted", + "port": "", + "protocol": "abc:", + "search": "", + "username": "" + }, + "Empty query and fragment with blank should throw an error", + { + "input": "#", + "base": null, + "failure": true + }, + { + "input": "?", + "base": null, + "failure": true + }, + "Last component looks like a number, but not valid IPv4", + { + "input": "http://1.2.3.4.5", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://1.2.3.4.5.", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://0..0x300/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://0..0x300./", + "base": "about:blank", + "failure": true + }, + { + "input": "http://256.256.256.256.256", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://256.256.256.256.256.", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://1.2.3.08", + "base": "about:blank", + "failure": true + }, + { + "input": "http://1.2.3.08.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://1.2.3.09", + "base": "about:blank", + "failure": true + }, + { + "input": "http://09.2.3.4", + "base": "about:blank", + "failure": true + }, + { + "input": "http://09.2.3.4.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://01.2.3.4.5", + "base": "about:blank", + "failure": true + }, + { + "input": "http://01.2.3.4.5.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://0x100.2.3.4", + "base": "about:blank", + "failure": true + }, + { + "input": "http://0x100.2.3.4.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://0x1.2.3.4.5", + "base": "about:blank", + "failure": true + }, + { + "input": "http://0x1.2.3.4.5.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.1.2.3.4", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.1.2.3.4.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.2.3.4", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.2.3.4.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.09", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.09.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.0x4", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.0x4.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.09..", + "base": "about:blank", + "hash": "", + "host": "foo.09..", + "hostname": "foo.09..", + "href": "http://foo.09../", + "password": "", + "pathname": "/", + "port": "", + "protocol": "http:", + "search": "", + "username": "" + }, + { + "input": "http://0999999999999999999/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.0x", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.0XFfFfFfFfFfFfFfFfFfAcE123", + "base": "about:blank", + "failure": true + }, + { + "input": "http://💩.123/", + "base": "about:blank", + "failure": true } ] From a3e61c4e6ac94b376d58f7be15749aed33b8e352 Mon Sep 17 00:00:00 2001 From: Luca Casonato Date: Sat, 14 Aug 2021 13:22:51 +0200 Subject: [PATCH 2/4] Forbid "|" in host name See https://github.com/whatwg/url/commit/40252530f93fe37f092be90583f82e9f337da1ab --- url/src/host.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/url/src/host.rs b/url/src/host.rs index 953743649..ee6cb710f 100644 --- a/url/src/host.rs +++ b/url/src/host.rs @@ -106,6 +106,7 @@ impl Host { | '\\' | ']' | '^' + | '|' ) }; From 5067e243eae81cb11319c08f2ef2c23126f0796b Mon Sep 17 00:00:00 2001 From: Luca Casonato Date: Sat, 14 Aug 2021 13:23:29 +0200 Subject: [PATCH 3/4] Quirks: set hostname with port is no-op See https://github.com/whatwg/url/commit/ec96993653a70d063843e0198694028c63348db4 --- url/src/parser.rs | 10 ++++++++-- url/src/quirks.rs | 4 ++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/url/src/parser.rs b/url/src/parser.rs index caedb59c7..666e7b9ce 100644 --- a/url/src/parser.rs +++ b/url/src/parser.rs @@ -953,7 +953,7 @@ impl<'a> Parser<'a> { scheme_end: u32, scheme_type: SchemeType, ) -> ParseResult<(u32, HostInternal, Option, Input<'i>)> { - let (host, remaining) = Parser::parse_host(input, scheme_type)?; + let (host, remaining) = Parser::parse_host(input, scheme_type, false)?; write!(&mut self.serialization, "{}", host).unwrap(); let host_end = to_u32(self.serialization.len())?; if let Host::Domain(h) = &host { @@ -983,6 +983,7 @@ impl<'a> Parser<'a> { pub fn parse_host( mut input: Input<'_>, scheme_type: SchemeType, + error_on_port: bool, ) -> ParseResult<(Host, Input<'_>)> { if scheme_type.is_file() { return Parser::get_file_host(input); @@ -996,7 +997,12 @@ impl<'a> Parser<'a> { let mut bytes = 0; for c in input_str.chars() { match c { - ':' if !inside_square_brackets => break, + ':' if !inside_square_brackets => { + if error_on_port { + return Err(ParseError::InvalidPort); + } + break; + } '\\' if scheme_type.is_special() => break, '/' | '?' | '#' => break, '\t' | '\n' | '\r' => { diff --git a/url/src/quirks.rs b/url/src/quirks.rs index 0dbc6eb44..16a0411e5 100644 --- a/url/src/quirks.rs +++ b/url/src/quirks.rs @@ -120,7 +120,7 @@ pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> { return Ok(()); } - if let Ok((h, remaining)) = Parser::parse_host(input, scheme_type) { + if let Ok((h, remaining)) = Parser::parse_host(input, scheme_type, false) { host = h; opt_port = if let Some(remaining) = remaining.split_prefix(':') { if remaining.is_empty() { @@ -171,7 +171,7 @@ pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> { return Ok(()); } - if let Ok((host, _remaining)) = Parser::parse_host(input, scheme_type) { + if let Ok((host, _remaining)) = Parser::parse_host(input, scheme_type, true) { if let Host::Domain(h) = &host { if h.is_empty() { // Empty host on special not file url From 2c67355861a7a95564204f92961edd96f4f874a7 Mon Sep 17 00:00:00 2001 From: Luca Casonato Date: Thu, 16 Sep 2021 12:19:43 +0200 Subject: [PATCH 4/4] Update url/tests/data.rs Co-authored-by: Valentin Gosu --- url/tests/data.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/url/tests/data.rs b/url/tests/data.rs index f17620388..c7f6502a7 100644 --- a/url/tests/data.rs +++ b/url/tests/data.rs @@ -16,7 +16,7 @@ use url::{quirks, Url}; #[test] fn urltestdata() { - // Copied form https://github.com/web-platform-tests/wpt/blob/master/url/ + // Copied from https://github.com/web-platform-tests/wpt/blob/master/url/ let mut json = Value::from_str(include_str!("urltestdata.json")) .expect("JSON parse error in urltestdata.json");