From 501c6c503cbccd4f6457cf21f9b66402955a3725 Mon Sep 17 00:00:00 2001 From: Quentin Santos Date: Sat, 4 Mar 2023 15:32:41 +0100 Subject: [PATCH] Handle case where input starts with a slash --- url/src/parser.rs | 66 ++++++++++++++++++++------------------ url/tests/urltestdata.json | 1 - 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/url/src/parser.rs b/url/src/parser.rs index 29a3d9019..30b118256 100644 --- a/url/src/parser.rs +++ b/url/src/parser.rs @@ -819,37 +819,6 @@ impl<'a> Parser<'a> { self.parse_path(scheme_type, &mut true, base_url.path_start as usize, input) } }; - // Special case for anarchist URL's with a leading empty fragment - let scheme_end = base_url.scheme_end as usize; - let mut path_start = base_url.path_start as usize; - if !base_url.has_authority() { - if path_start == scheme_end + 1 { - // The path starts right after the scheme_end - if self.serialization[path_start..].starts_with("//") { - // Case 1: The base URL did not have an empty fragment, but the resulting one does - // Insert the "/." prefix - self.serialization.insert_str(path_start, "/."); - path_start += 2; - } - } else { - assert_eq!(path_start, scheme_end + 3); - assert_eq!(&self.serialization[scheme_end..path_start], ":/."); - // The base URL has a "/." between the host and the path - assert_eq!( - self.serialization.as_bytes().get(path_start).copied(), - Some(b'/') - ); - if self.serialization.as_bytes().get(path_start + 1).copied() != Some(b'/') - { - // Case 2: The base URL had an empty fragment, but the resulting one does not - // Remove the "/." prefix - self.serialization - .replace_range(scheme_end..path_start, ":"); - path_start -= 2; - } - } - assert!(!self.serialization[scheme_end..].starts_with("://")); - } self.with_query_and_fragment( scheme_type, base_url.scheme_end, @@ -858,7 +827,7 @@ impl<'a> Parser<'a> { base_url.host_end, base_url.host, base_url.port, - to_u32(path_start)?, + base_url.path_start, remaining, ) } @@ -1397,6 +1366,39 @@ impl<'a> Parser<'a> { path_start: u32, remaining: Input<'_>, ) -> ParseResult { + // Special case for anarchist URL's with a leading empty fragment + let scheme_end = scheme_end as usize; + let mut path_start = path_start as usize; + if path_start == scheme_end + 1 { + // Anarchist URL + if self.serialization[path_start..].starts_with("//") { + // Case 1: The base URL did not have an empty fragment, but the resulting one does + // Insert the "/." prefix + self.serialization.insert_str(path_start, "/."); + path_start += 2; + } + assert!(!self.serialization[scheme_end..].starts_with("://")); + } else if path_start == scheme_end + 3 + && &self.serialization[scheme_end..path_start] == ":/." + { + // Anarchist URL with leading empty fragment + // The base URL has a "/." between the host and the path + assert_eq!( + self.serialization.as_bytes().get(path_start).copied(), + Some(b'/') + ); + if self.serialization.as_bytes().get(path_start + 1).copied() != Some(b'/') { + // Case 2: The base URL had an empty fragment, but the resulting one does not + // Remove the "/." prefix + self.serialization + .replace_range(scheme_end..path_start, ":"); + path_start -= 2; + } + assert!(!self.serialization[scheme_end..].starts_with("://")); + } + let scheme_end = to_u32(scheme_end)?; + let path_start = to_u32(path_start)?; + let (query_start, fragment_start) = self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?; Ok(Url { diff --git a/url/tests/urltestdata.json b/url/tests/urltestdata.json index 5d2924bd7..53d036886 100644 --- a/url/tests/urltestdata.json +++ b/url/tests/urltestdata.json @@ -7586,7 +7586,6 @@ "search": "", "hash": "" }, - "skip next", { "input": "/..//path", "base": "non-spec:/p",