From 111b4f47cc643c84ded4de6cc4a24dd3f6b0796c Mon Sep 17 00:00:00 2001 From: Thomas Tanon Date: Thu, 31 Oct 2024 15:54:38 +0100 Subject: [PATCH] Properly resolve dot segments in base paths Issue #44 --- src/lib.rs | 108 +++++++++++++++++++++++++++++++++------------------ tests/lib.rs | 44 +++++++++++++++++++++ 2 files changed, 115 insertions(+), 37 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 34a7934..c6dbd64 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1415,45 +1415,41 @@ impl<'a, O: OutputBuffer, const UNCHECKED: bool> IriParser<'a, O, UNCHECKED> { } fn parse_relative(&mut self) -> Result<(), IriParseError> { - if let Some(base) = self.base { + if self.base.is_some() { match self.input.front() { None => { - self.output.push_str(&base.iri[..base.positions.query_end]); - self.output_positions.scheme_end = base.positions.scheme_end; - self.output_positions.authority_end = base.positions.authority_end; - self.output_positions.path_end = base.positions.path_end; - self.output_positions.query_end = base.positions.query_end; + self.append_base_scheme(); + self.append_base_authority(); + self.append_base_path(); + self.append_base_query(); Ok(()) } Some('/') => { self.input.next(); - self.parse_relative_slash(&base) + self.parse_relative_slash() } Some('?') => { self.input.next(); - self.output.push_str(&base.iri[..base.positions.path_end]); + self.append_base_scheme(); + self.append_base_authority(); + self.append_base_path(); self.output.push('?'); - self.output_positions.scheme_end = base.positions.scheme_end; - self.output_positions.authority_end = base.positions.authority_end; - self.output_positions.path_end = base.positions.path_end; self.parse_query() } Some('#') => { self.input.next(); - self.output.push_str(&base.iri[..base.positions.query_end]); - self.output_positions.scheme_end = base.positions.scheme_end; - self.output_positions.authority_end = base.positions.authority_end; - self.output_positions.path_end = base.positions.path_end; - self.output_positions.query_end = base.positions.query_end; + self.append_base_scheme(); + self.append_base_authority(); + self.append_base_path(); + self.append_base_query(); self.output.push('#'); self.parse_fragment() } _ => { - self.output.push_str(&base.iri[..base.positions.path_end]); - self.output_positions.scheme_end = base.positions.scheme_end; - self.output_positions.authority_end = base.positions.authority_end; - self.output_positions.path_end = base.positions.path_end; - self.remove_last_segment(); + self.append_base_scheme(); + self.append_base_authority(); + self.append_base_path(); + Self::remove_last_segment(self.output, &mut self.output_positions); self.parse_relative_path() } } @@ -1482,20 +1478,17 @@ impl<'a, O: OutputBuffer, const UNCHECKED: bool> IriParser<'a, O, UNCHECKED> { self.parse_path() } - fn parse_relative_slash(&mut self, base: &IriRef<&'a str>) -> Result<(), IriParseError> { + fn parse_relative_slash(&mut self) -> Result<(), IriParseError> { if self.input.starts_with('/') { self.input.next(); - self.output.push_str(&base.iri[..base.positions.scheme_end]); - self.output_positions.scheme_end = base.positions.scheme_end; + self.append_base_scheme(); self.output.push('/'); self.output.push('/'); self.parse_authority() } else { - self.output - .push_str(&base.iri[..base.positions.authority_end]); + self.append_base_scheme(); + self.append_base_authority(); self.output.push('/'); - self.output_positions.scheme_end = base.positions.scheme_end; - self.output_positions.authority_end = base.positions.authority_end; self.parse_path() } } @@ -1646,7 +1639,7 @@ impl<'a, O: OutputBuffer, const UNCHECKED: bool> IriParser<'a, O, UNCHECKED> { None | Some('/') | Some('?') | Some('#') => { if self.output.as_str().ends_with("/..") { self.output.truncate(self.output.len() - 3); - self.remove_last_segment(); + Self::remove_last_segment(self.output, &mut self.output_positions); } else if self.output.as_str().ends_with("/.") { self.output.truncate(self.output.len() - 1); } else if c == Some('/') { @@ -1700,17 +1693,58 @@ impl<'a, O: OutputBuffer, const UNCHECKED: bool> IriParser<'a, O, UNCHECKED> { Ok(()) } - fn remove_last_segment(&mut self) { + fn append_base_scheme(&mut self) { + let base = self.base.as_ref().unwrap(); + self.output.push_str(&base.iri[..base.positions.scheme_end]); + self.output_positions.scheme_end = self.output.len(); + } + + fn append_base_authority(&mut self) { + let base = self.base.as_ref().unwrap(); + self.output + .push_str(&base.iri[base.positions.scheme_end..base.positions.authority_end]); + self.output_positions.authority_end = self.output.len(); + } + + fn append_base_path(&mut self) { + let base = self.base.as_ref().unwrap(); + let mut append_slash_allowed = true; + for (i, segment) in base.iri[base.positions.authority_end..base.positions.path_end] + .split('/') + .enumerate() + { + if segment == ".." { + Self::remove_last_segment(self.output, &mut self.output_positions); + append_slash_allowed = false; + } else if segment == "." { + } else { + if i > 0 && append_slash_allowed { + self.output.push('/'); + } + append_slash_allowed = true; + self.output.push_str(segment); + } + } + self.output_positions.path_end = self.output.len(); + } + + fn append_base_query(&mut self) { + let base = self.base.as_ref().unwrap(); + self.output + .push_str(&base.iri[base.positions.path_end..base.positions.query_end]); + self.output_positions.query_end = self.output.len(); + } + + fn remove_last_segment(output: &mut O, output_positions: &mut IriElementsPositions) { if let Some(last_slash_position) = - self.output.as_str()[self.output_positions.authority_end..].rfind('/') + output.as_str()[output_positions.authority_end..].rfind('/') { - self.output - .truncate(last_slash_position + self.output_positions.authority_end); - self.output.push('/'); + output.truncate(last_slash_position + output_positions.authority_end); + output.push('/'); } else { - self.output.truncate(self.output_positions.authority_end); - if self.output_positions.authority_end > self.output_positions.scheme_end { - self.output.push('/'); + output.truncate(output_positions.authority_end); + if output_positions.authority_end > output_positions.scheme_end { + output.push('/'); } } } diff --git a/tests/lib.rs b/tests/lib.rs index be27713..c564405 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -554,6 +554,50 @@ fn test_resolve_relative_iri() { ("///lv2.h", "file:foo", "file:///lv2.h"), ("lv2.h", "file:foo", "file:lv2.h"), ("s", "http://example.com", "http://example.com/s"), + ( + "s", + "http://example.com/fo/./bar/baz", + "http://example.com/fo/bar/s", + ), + ( + "?s", + "http://example.com/fo/./bar/baz", + "http://example.com/fo/bar/baz?s", + ), + ( + "#s", + "http://example.com/fo/./bar/baz", + "http://example.com/fo/bar/baz#s", + ), + ( + "s", + "http://example.com/fo/../bar/baz", + "http://example.com/bar/s", + ), + ( + "?s", + "http://example.com/fo/../bar/baz", + "http://example.com/bar/baz?s", + ), + ( + "#s", + "http://example.com/fo/../bar/baz", + "http://example.com/bar/baz#s", + ), + ( + "s", + "http://example.com/../bar/baz", + "http://example.com/bar/s", + ), + ( + "s", + "http://example.com/./bar/baz", + "http://example.com/bar/s", + ), + ("s", "s:foo/../bar/baz", "s:bar/s"), + ("s", "s:../bar/baz", "s:bar/s"), + ("s", "s:foo/./bar/baz", "s:foo/bar/s"), + ("s", "s:./bar/baz", "s:/bar/s"), ]; for (relative, base, output) in examples {