From e476dd9eac137a06a0b101b221c5ee0c00c77487 Mon Sep 17 00:00:00 2001 From: Cathal Mullan Date: Mon, 19 Aug 2024 15:28:49 +0100 Subject: [PATCH 1/7] Initial attempt at percent-decoding --- benches/matchit.rs | 3 +- benches/path_tree.rs | 3 +- examples/axum-fork/src/routing/path_router.rs | 12 +- examples/hyper/src/main.rs | 8 +- src/decode.rs | 39 ++++ src/errors.rs | 2 + src/errors/decode.rs | 16 ++ src/errors/insert.rs | 2 + src/errors/search.rs | 23 +++ src/lib.rs | 2 + src/path.rs | 19 ++ src/router.rs | 60 +++++- tests/common.rs | 5 +- tests/encoding.rs | 140 ++++++++++++++ tests/poem.rs | 6 +- tests/uncommon.rs | 175 +++++++++++++++--- 16 files changed, 469 insertions(+), 46 deletions(-) create mode 100644 src/decode.rs create mode 100644 src/errors/decode.rs create mode 100644 src/errors/search.rs create mode 100644 src/path.rs create mode 100644 tests/encoding.rs diff --git a/benches/matchit.rs b/benches/matchit.rs index 30e73ba5..ec99b3d8 100644 --- a/benches/matchit.rs +++ b/benches/matchit.rs @@ -19,7 +19,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for route in paths() { - let search = wayfind.search(route).unwrap(); + let mut path = wayfind::path::Path::new(route); + let search = wayfind.search(&mut path).unwrap().unwrap(); let _ = search .parameters .iter() diff --git a/benches/path_tree.rs b/benches/path_tree.rs index 9a25bfc4..a71fa9ea 100644 --- a/benches/path_tree.rs +++ b/benches/path_tree.rs @@ -19,7 +19,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for (index, path) in paths() { - let search = wayfind.search(path).unwrap(); + let mut path = wayfind::path::Path::new(path); + let search = wayfind.search(&mut path).unwrap().unwrap(); assert_eq!(search.data.value, index); let _ = search .parameters diff --git a/examples/axum-fork/src/routing/path_router.rs b/examples/axum-fork/src/routing/path_router.rs index 36cb94a0..9dd86fe9 100644 --- a/examples/axum-fork/src/routing/path_router.rs +++ b/examples/axum-fork/src/routing/path_router.rs @@ -3,7 +3,7 @@ use axum_core::response::IntoResponse; use std::{borrow::Cow, collections::HashMap, convert::Infallible, fmt, sync::Arc}; use tower_layer::Layer; use tower_service::Service; -use wayfind::{errors::insert::InsertError, node::search::Match, router::Router}; +use wayfind::{errors::insert::InsertError, node::search::Match, path::Path, router::Router}; use super::{ future::RouteFuture, not_found::NotFound, strip_prefix::StripPrefix, url_params, Endpoint, @@ -331,7 +331,8 @@ where } let path = req.uri().path().to_owned(); - let result = match self.node.matches(&path) { + let mut wayfind_path = Path::new(&path); + let result = match self.node.matches(&mut wayfind_path) { Some(match_) => { let id = match_.data.value; @@ -365,7 +366,8 @@ where } pub(super) fn replace_endpoint(&mut self, path: &str, endpoint: Endpoint) { - if let Some(match_) = self.node.matches(path) { + let mut wayfind_path = Path::new(path); + if let Some(match_) = self.node.matches(&mut wayfind_path) { let id = match_.data.value; self.routes.insert(id, endpoint); return; @@ -435,8 +437,8 @@ impl Node { Ok(()) } - fn matches<'n, 'p>(&'n self, path: &'p str) -> Option> { - self.inner.search(path) + fn matches<'n, 'p>(&'n self, path: &'p mut Path) -> Option> { + self.inner.search(path).expect("Failed to match!") } } diff --git a/examples/hyper/src/main.rs b/examples/hyper/src/main.rs index 95d3f776..72720bb6 100644 --- a/examples/hyper/src/main.rs +++ b/examples/hyper/src/main.rs @@ -15,7 +15,7 @@ use std::{ sync::Arc, }; use tokio::{net::TcpListener, task::JoinSet}; -use wayfind::{node::search::Parameter, router::Router}; +use wayfind::{node::search::Parameter, path::Path, router::Router}; type BoxFuture<'a> = Pin< Box< @@ -65,7 +65,11 @@ async fn main() -> Result<(), anyhow::Error> { let router = Arc::clone(&router); async move { let path = request.uri().path(); - let matches = router.search(path).expect("Failed to match!"); + let mut wayfind_path = Path::new(path); + let matches = router + .search(&mut wayfind_path) + .expect("Failed to match!") + .expect("Failed to match!"); let handler = &matches.data.value; let parameters = &matches.parameters; diff --git a/src/decode.rs b/src/decode.rs new file mode 100644 index 00000000..71f4df2f --- /dev/null +++ b/src/decode.rs @@ -0,0 +1,39 @@ +use crate::errors::decode::DecodeError; + +pub(crate) fn percent_decode(input: &[u8]) -> Result, DecodeError> { + if !input.contains(&b'%') { + return Ok(input.to_vec()); + } + + let mut output = Vec::with_capacity(input.len()); + + let mut i = 0; + let len = input.len(); + + while i < len { + if input[i] == b'%' && i + 2 < len { + let a = input[i + 1]; + let b = input[i + 2]; + output.push(decode_hex(a, b)?); + i += 3; + } else { + output.push(input[i]); + i += 1; + } + } + + Ok(output) +} + +#[allow(clippy::cast_possible_truncation)] +fn decode_hex(a: u8, b: u8) -> Result { + let a = (a as char) + .to_digit(16) + .ok_or(DecodeError::InvalidEncoding)?; + + let b = (b as char) + .to_digit(16) + .ok_or(DecodeError::InvalidEncoding)?; + + Ok((a as u8) << 4 | (b as u8)) +} diff --git a/src/errors.rs b/src/errors.rs index f1ba9d18..48a8c58a 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,4 +1,6 @@ pub mod constraint; +pub mod decode; pub mod delete; pub mod insert; pub mod route; +pub mod search; diff --git a/src/errors/decode.rs b/src/errors/decode.rs new file mode 100644 index 00000000..cc1e22ed --- /dev/null +++ b/src/errors/decode.rs @@ -0,0 +1,16 @@ +use std::{error::Error, fmt::Display}; + +#[derive(Debug, PartialEq, Eq)] +pub enum DecodeError { + InvalidEncoding, +} + +impl Error for DecodeError {} + +impl Display for DecodeError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::InvalidEncoding => write!(f, "Invalid Encoding"), + } + } +} diff --git a/src/errors/insert.rs b/src/errors/insert.rs index e6f4b831..c4be0b45 100644 --- a/src/errors/insert.rs +++ b/src/errors/insert.rs @@ -4,6 +4,7 @@ use std::{error::Error, fmt::Display}; #[derive(Debug, PartialEq, Eq)] pub enum InsertError { RouteError(RouteError), + EncodedPath, DuplicatePath, UnknownConstraint, } @@ -14,6 +15,7 @@ impl Display for InsertError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::RouteError(error) => error.fmt(f), + Self::EncodedPath => write!(f, "Encoded Path"), Self::DuplicatePath => write!(f, "Duplicate Path"), Self::UnknownConstraint => write!(f, "Unknown Constraint"), } diff --git a/src/errors/search.rs b/src/errors/search.rs new file mode 100644 index 00000000..a3e5e543 --- /dev/null +++ b/src/errors/search.rs @@ -0,0 +1,23 @@ +use super::decode::DecodeError; +use std::{error::Error, fmt::Display}; + +#[derive(Debug, PartialEq, Eq)] +pub enum SearchError { + DecodeError(DecodeError), +} + +impl Error for SearchError {} + +impl Display for SearchError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::DecodeError(error) => error.fmt(f), + } + } +} + +impl From for SearchError { + fn from(error: DecodeError) -> Self { + Self::DecodeError(error) + } +} diff --git a/src/lib.rs b/src/lib.rs index b4325bbe..3eedeb24 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,7 @@ pub mod constraints; +pub mod decode; pub mod errors; pub mod node; pub mod parts; +pub mod path; pub mod router; diff --git a/src/path.rs b/src/path.rs new file mode 100644 index 00000000..5dcb52df --- /dev/null +++ b/src/path.rs @@ -0,0 +1,19 @@ +use crate::{decode::percent_decode, errors::decode::DecodeError}; + +pub struct Path { + pub inner: Vec, +} + +impl Path { + #[must_use] + pub fn new(path: &str) -> Self { + Self { + inner: path.as_bytes().to_vec(), + } + } + + pub fn percent_decode(&mut self) -> Result<(), DecodeError> { + self.inner = percent_decode(&self.inner)?; + Ok(()) + } +} diff --git a/src/router.rs b/src/router.rs index 7f7b6dde..4b737574 100644 --- a/src/router.rs +++ b/src/router.rs @@ -1,8 +1,11 @@ use crate::{ constraints::Constraint, - errors::{constraint::ConstraintError, delete::DeleteError, insert::InsertError}, + errors::{ + constraint::ConstraintError, delete::DeleteError, insert::InsertError, search::SearchError, + }, node::{search::Match, Node, NodeData, NodeKind}, parts::{Part, Parts}, + path::Path, }; use smallvec::smallvec; use std::{ @@ -16,6 +19,7 @@ use std::{ pub struct Router { root: Node, constraints: HashMap, fn(&str) -> bool>, + percent_encoding: bool, } impl Router { @@ -37,6 +41,7 @@ impl Router { quick_dynamic: false, }, constraints: HashMap::new(), + percent_encoding: false, }; router.constraint::().unwrap(); @@ -74,7 +79,31 @@ impl Router { } } + pub fn percent_encoding(&mut self, enabled: bool) { + self.percent_encoding = enabled; + } + + // FIXME: This will likely match when it shouldn't. + // I guess the correct approach would be to percent-decode the input, and see if it's equal or not. + fn contains_percent_encoding(input: &str) -> bool { + let chars: Vec = input.chars().collect(); + for window in chars.windows(3) { + if let [a, b, c] = window { + if *a == '%' && b.is_ascii_hexdigit() && c.is_ascii_hexdigit() { + return true; + } + } + } + + false + } + pub fn insert(&mut self, route: &str, value: T) -> Result<(), InsertError> { + // Ensure this isn't already encoded. + if Self::contains_percent_encoding(route) { + return Err(InsertError::EncodedPath); + } + let path = Arc::from(route); let mut parts = Parts::new(route.as_bytes())?; @@ -102,16 +131,31 @@ impl Router { self.root.delete(&mut parts) } - pub fn search<'k, 'v>(&'k self, path: &'v str) -> Option> { + // FIXME: Let's try a 'Path' style approach to the returned value. + // Like what actix/ntex does. + // Maybe instead of returning anything, we just set something everything in Path? + // i.e. merge Match with Path? + pub fn search<'k, 'v>( + &'k self, + path: &'v mut Path, + ) -> Result>, SearchError> { + if self.percent_encoding { + path.percent_decode()?; + } + let mut parameters = smallvec![]; - let node = self + let Some(node) = self .root - .search(path.as_bytes(), &mut parameters, &self.constraints)?; + .search(&path.inner, &mut parameters, &self.constraints) + else { + return Ok(None); + }; + + let Some(data) = node.data.as_ref() else { + return Ok(None); + }; - Some(Match { - data: node.data.as_ref()?, - parameters, - }) + Ok(Some(Match { data, parameters })) } } diff --git a/tests/common.rs b/tests/common.rs index 1b97b8d7..af856f5b 100644 --- a/tests/common.rs +++ b/tests/common.rs @@ -1,6 +1,7 @@ use std::{fmt::Debug, sync::Arc}; use wayfind::{ node::search::{Match, Parameter}, + path::Path, router::Router, }; @@ -52,7 +53,9 @@ pub fn assert_router_match<'a, T: PartialEq + Debug>( input: &'a str, expected: Option>, ) { - let Some(Match { data, parameters }) = router.search(input) else { + let mut path = Path::new(input); + let Some(Match { data, parameters }) = router.search(&mut path).expect("Failed to match!") + else { assert!(expected.is_none(), "No match found for input: {input}"); return; }; diff --git a/tests/encoding.rs b/tests/encoding.rs new file mode 100644 index 00000000..e4bb3905 --- /dev/null +++ b/tests/encoding.rs @@ -0,0 +1,140 @@ +use std::error::Error; +use wayfind::router::Router; + +#[path = "./common.rs"] +mod common; + +fn router(percent_encoding: bool) -> Result, Box> { + let mut router = Router::new(); + router.percent_encoding(percent_encoding); + + router.insert("/hello@world", 1)?; + router.insert("/hello-world.com", 2)?; + router.insert("/hello world", 3)?; + router.insert("/こんにちは", 4)?; + router.insert("/50%", 5)?; + router.insert("/hello world@example.com", 6)?; + router.insert("/path/to/resource with spaces", 7)?; + router.insert("/encoded/slash", 8)?; + + Ok(router) +} + +#[test] +fn percent_encoding_disabled() -> Result<(), Box> { + let router = router(false)?; + + assert_router_matches!(router, { + "/hello%40world" => None + "/hello@world" => { + path: "/hello@world", + value: 1 + } + "/hello-world.com" => { + path: "/hello-world.com", + value: 2 + } + "/hello%20world" => None + "/hello world" => { + path: "/hello world", + value: 3 + } + "/%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF" => None + "/こんにちは" => { + path: "/こんにちは", + value: 4 + } + "/50%25" => None + "/50%" => { + path: "/50%", + value: 5 + } + "/hello%20world%40example.com" => None + "/hello world@example.com" => { + path: "/hello world@example.com", + value: 6 + } + "/path/to/resource%20with%20spaces" => None + "/path/to/resource with spaces" => { + path: "/path/to/resource with spaces", + value: 7 + } + "/encoded%2Fslash" => None + "/encoded/slash" => { + path: "/encoded/slash", + value: 8 + } + }); + + Ok(()) +} + +#[test] +fn percent_encoding_enabled() -> Result<(), Box> { + let router = router(true)?; + + assert_router_matches!(router, { + "/hello%40world" => { + path: "/hello@world", + value: 1 + } + "/hello@world" => { + path: "/hello@world", + value: 1 + } + "/hello-world.com" => { + path: "/hello-world.com", + value: 2 + } + "/hello%20world" => { + path: "/hello world", + value: 3 + } + "/hello world" => { + path: "/hello world", + value: 3 + } + "/%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF" => { + path: "/こんにちは", + value: 4 + } + "/こんにちは" => { + path: "/こんにちは", + value: 4 + } + "/50%25" => { + path: "/50%", + value: 5 + } + "/50%" => { + path: "/50%", + value: 5 + } + "/hello%20world%40example.com" => { + path: "/hello world@example.com", + value: 6 + } + "/hello world@example.com" => { + path: "/hello world@example.com", + value: 6 + } + "/path/to/resource%20with%20spaces" => { + path: "/path/to/resource with spaces", + value: 7 + } + "/path/to/resource with spaces" => { + path: "/path/to/resource with spaces", + value: 7 + } + "/encoded%2Fslash" => { + path: "/encoded/slash", + value: 8 + } + "/encoded/slash" => { + path: "/encoded/slash", + value: 8 + } + }); + + Ok(()) +} diff --git a/tests/poem.rs b/tests/poem.rs index 81f42998..c9d0d3ea 100644 --- a/tests/poem.rs +++ b/tests/poem.rs @@ -495,6 +495,7 @@ fn test_issue_275() -> Result<(), Box> { #[test] fn test_percent_decoded() -> Result<(), Box> { let mut router = Router::new(); + router.percent_encoding(true); router.insert("/a/{id}", 1)?; insta::assert_snapshot!(router, @r###" @@ -511,12 +512,11 @@ fn test_percent_decoded() -> Result<(), Box> { "id" => "abc" } } - // NOTE: Different behaviour: poem would decode to `你好` - "/a/%E4%BD%A0%E5%A5%BD" => { + "/a/你好" => { path: "/a/{id}", value: 1, params: { - "id" => "%E4%BD%A0%E5%A5%BD" + "id" => "你好" } } }); diff --git a/tests/uncommon.rs b/tests/uncommon.rs index fe214f9d..f6507aaa 100644 --- a/tests/uncommon.rs +++ b/tests/uncommon.rs @@ -6,9 +6,9 @@ use wayfind::router::Router; #[path = "./common.rs"] mod common; -#[test] -fn uncommon() -> Result<(), Box> { +fn router(percent_encoding: bool) -> Result, Box> { let mut router = Router::new(); + router.percent_encoding(percent_encoding); // Japanese (Konnichiwa) router.insert("/こんにちは", 0)?; @@ -47,14 +47,12 @@ fn uncommon() -> Result<(), Box> { // Unicode Control router.insert("/\u{0001}\u{0002}\u{0003}", 12)?; - // Punycode (müller.de) - router.insert("/xn--mller-kva.de", 13)?; - - // URL Encoded (😊) - router.insert("/%F0%9F%98%8A", 14)?; + Ok(router) +} - // Double URL Encoded (💀) - router.insert("/%25F0%259F%2592%2580", 15)?; +#[test] +fn uncommon() -> Result<(), Box> { + let router = router(false)?; assert_router_matches!(router, { // Japanese (Konnichiwa) @@ -144,28 +142,155 @@ fn uncommon() -> Result<(), Box> { value: 12 } "/123" => None + }); + + Ok(()) +} + +#[test] +fn uncommon_with_percent_encoding() -> Result<(), Box> { + let router = router(true)?; + + assert_router_matches!(router, { + // Japanese (Konnichiwa) + "/こんにちは" => { + path: "/こんにちは", + value: 0 + } + "/%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF" => { + path: "/こんにちは", + value: 0 + } + "/こんにちわ" => None + + // Russian (Privet) + "/привет" => { + path: "/привет", + value: 1 + } + "/%D0%BF%D1%80%D0%B8%D0%B2%D0%B5%D1%82" => { + path: "/привет", + value: 1 + } + "/привет!" => None + + // Chinese (Nǐ Hǎo) + "/你好" => { + path: "/你好", + value: 2 + } + "/%E4%BD%A0%E5%A5%BD" => { + path: "/你好", + value: 2 + } + "/你们好" => None + + // Arabic Numerals (full-width) + "/123" => { + path: "/123", + value: 3 + } + "/%EF%BC%91%EF%BC%92%EF%BC%93" => { + path: "/123", + value: 3 + } + "/123" => None + + // Null Byte + "/null\0byte" => { + path: "/null\0byte", + value: 4 + } + "/null%00byte" => { + path: "/null\0byte", + value: 4 + } + "/nullbyte" => None + + // Emoji + "/⚽️🏀🏈" => { + path: "/⚽️🏀🏈", + value: 5 + } + "/%E2%9A%BD%EF%B8%8F%F0%9F%8F%80%F0%9F%8F%88" => { + path: "/⚽️🏀🏈", + value: 5 + } + "/⚽️🏀" => None - // Punycode (müller.de) - "/xn--mller-kva.de" => { - path: "/xn--mller-kva.de", - value: 13 + // Unicode + "/♔♕♖♗♘♙" => { + path: "/♔♕♖♗♘♙", + value: 6 + } + "/%E2%99%94%E2%99%95%E2%99%96%E2%99%97%E2%99%98%E2%99%99" => { + path: "/♔♕♖♗♘♙", + value: 6 } - "/muller.de" => None + "/♔♕♖♗♘♟" => None - // URL Encoded (😊) - "/%F0%9F%98%8A" => { - path: "/%F0%9F%98%8A", - value: 14 + // Unicode Normalization + "/cafe\u{0301}" => { + path: "/cafe\u{0301}", + value: 7 + } + "/café" => { + path: "/café", + value: 8 } - "/😊" => None + "/cafe%CC%81" => { + path: "/cafe\u{0301}", + value: 7 + } + "/caf%C3%A9" => { + path: "/café", + value: 8 + } + "/cafe" => None - // Double URL Encoded (💀) - "/%25F0%259F%2592%2580" => { - path: "/%25F0%259F%2592%2580", - value: 15 + // Unicode Zero Width + "/abc\u{200B}123" => { + path: "/abc\u{200B}123", + value: 9 } - "/%F0%9F%92%80" => None - "/💀" => None + "/abc%E2%80%8B123" => { + path: "/abc\u{200B}123", + value: 9 + } + "/abc123" => None + + // Unicode Right to Left + "/hello\u{202E}dlrow" => { + path: "/hello\u{202E}dlrow", + value: 10 + } + "/hello%E2%80%AEdlrow" => { + path: "/hello\u{202E}dlrow", + value: 10 + } + "/helloworld" => None + + // Unicode Whitespace + "/\u{2000}\u{2001}\u{2002}" => { + path: "/\u{2000}\u{2001}\u{2002}", + value: 11 + } + "/%E2%80%80%E2%80%81%E2%80%82" => { + path: "/\u{2000}\u{2001}\u{2002}", + value: 11 + } + "/ " => None + + // Unicode Control + "/\u{0001}\u{0002}\u{0003}" => { + path: "/\u{0001}\u{0002}\u{0003}", + value: 12 + } + "/%01%02%03" => { + path: "/\u{0001}\u{0002}\u{0003}", + value: 12 + } + "/123" => None }); Ok(()) From dfe83cf4960522872f3157c5f2177a7be993b642 Mon Sep 17 00:00:00 2001 From: Cathal Mullan Date: Mon, 19 Aug 2024 15:44:26 +0100 Subject: [PATCH 2/7] Optimize decoding --- src/path.rs | 23 +++++++++++++++++------ src/router.rs | 6 +----- tests/poem.rs | 2 +- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/path.rs b/src/path.rs index 5dcb52df..5980be6e 100644 --- a/src/path.rs +++ b/src/path.rs @@ -1,19 +1,30 @@ use crate::{decode::percent_decode, errors::decode::DecodeError}; +use std::borrow::Cow; -pub struct Path { - pub inner: Vec, +pub struct Path<'a> { + original: Cow<'a, [u8]>, + decoded: Option>, } -impl Path { +impl<'a> Path<'a> { #[must_use] - pub fn new(path: &str) -> Self { + pub const fn new(path: &'a str) -> Self { Self { - inner: path.as_bytes().to_vec(), + original: Cow::Borrowed(path.as_bytes()), + decoded: None, } } pub fn percent_decode(&mut self) -> Result<(), DecodeError> { - self.inner = percent_decode(&self.inner)?; + if self.decoded.is_none() { + self.decoded = Some(percent_decode(&self.original)?); + } + Ok(()) } + + #[must_use] + pub fn as_bytes(&self) -> &[u8] { + self.decoded.as_deref().unwrap_or(&self.original) + } } diff --git a/src/router.rs b/src/router.rs index 4b737574..881c7bc5 100644 --- a/src/router.rs +++ b/src/router.rs @@ -131,10 +131,6 @@ impl Router { self.root.delete(&mut parts) } - // FIXME: Let's try a 'Path' style approach to the returned value. - // Like what actix/ntex does. - // Maybe instead of returning anything, we just set something everything in Path? - // i.e. merge Match with Path? pub fn search<'k, 'v>( &'k self, path: &'v mut Path, @@ -146,7 +142,7 @@ impl Router { let mut parameters = smallvec![]; let Some(node) = self .root - .search(&path.inner, &mut parameters, &self.constraints) + .search(path.as_bytes(), &mut parameters, &self.constraints) else { return Ok(None); }; diff --git a/tests/poem.rs b/tests/poem.rs index c9d0d3ea..ae0820d4 100644 --- a/tests/poem.rs +++ b/tests/poem.rs @@ -512,7 +512,7 @@ fn test_percent_decoded() -> Result<(), Box> { "id" => "abc" } } - "/a/你好" => { + "/a/%E4%BD%A0%E5%A5%BD" => { path: "/a/{id}", value: 1, params: { From 8068ad9d987c2207d567231f6ba2903794399546 Mon Sep 17 00:00:00 2001 From: Cathal Mullan Date: Mon, 19 Aug 2024 16:31:16 +0100 Subject: [PATCH 3/7] Better lifetime names --- src/node/search.rs | 82 +++++++++++++++++++++++----------------------- src/path.rs | 15 ++++----- src/router.rs | 8 ++--- 3 files changed, 52 insertions(+), 53 deletions(-) diff --git a/src/node/search.rs b/src/node/search.rs index d006b307..21369dba 100644 --- a/src/node/search.rs +++ b/src/node/search.rs @@ -3,24 +3,24 @@ use smallvec::{smallvec, SmallVec}; use std::collections::HashMap; #[derive(Debug, Eq, PartialEq)] -pub struct Match<'k, 'v, T> { - pub data: &'k NodeData, - pub parameters: SmallVec<[Parameter<'k, 'v>; 4]>, +pub struct Match<'router, 'path, T> { + pub data: &'router NodeData, + pub parameters: SmallVec<[Parameter<'router, 'path>; 4]>, } #[derive(Clone, Debug, Eq, PartialEq)] -pub struct Parameter<'k, 'v> { - pub key: &'k str, - pub value: &'v str, +pub struct Parameter<'router, 'path> { + pub key: &'router str, + pub value: &'path str, } impl Node { - pub fn search<'k, 'v>( - &'k self, - path: &'v [u8], - parameters: &mut SmallVec<[Parameter<'k, 'v>; 4]>, + pub fn search<'router, 'path>( + &'router self, + path: &'path [u8], + parameters: &mut SmallVec<[Parameter<'router, 'path>; 4]>, constraints: &HashMap, fn(&str) -> bool>, - ) -> Option<&'k Self> { + ) -> Option<&'router Self> { if path.is_empty() { return if self.data.is_some() { Some(self) @@ -48,12 +48,12 @@ impl Node { None } - fn search_static<'k, 'v>( - &'k self, - path: &'v [u8], - parameters: &mut SmallVec<[Parameter<'k, 'v>; 4]>, + fn search_static<'router, 'path>( + &'router self, + path: &'path [u8], + parameters: &mut SmallVec<[Parameter<'router, 'path>; 4]>, constraints: &HashMap, fn(&str) -> bool>, - ) -> Option<&'k Self> { + ) -> Option<&'router Self> { for static_child in &self.static_children { // NOTE: This was previously a "starts_with" call, but turns out this is much faster. if path.len() >= static_child.prefix.len() @@ -71,12 +71,12 @@ impl Node { None } - fn search_dynamic<'k, 'v>( - &'k self, - path: &'v [u8], - parameters: &mut SmallVec<[Parameter<'k, 'v>; 4]>, + fn search_dynamic<'router, 'path>( + &'router self, + path: &'path [u8], + parameters: &mut SmallVec<[Parameter<'router, 'path>; 4]>, constraints: &HashMap, fn(&str) -> bool>, - ) -> Option<&'k Self> { + ) -> Option<&'router Self> { if self.quick_dynamic { self.search_dynamic_segment(path, parameters, constraints) } else { @@ -90,12 +90,12 @@ impl Node { // Path: `my.long.file.txt` // Name: `my.long.file` // Ext: `txt` - fn search_dynamic_inline<'k, 'v>( - &'k self, - path: &'v [u8], - parameters: &mut SmallVec<[Parameter<'k, 'v>; 4]>, + fn search_dynamic_inline<'router, 'path>( + &'router self, + path: &'path [u8], + parameters: &mut SmallVec<[Parameter<'router, 'path>; 4]>, constraints: &HashMap, fn(&str) -> bool>, - ) -> Option<&'k Self> { + ) -> Option<&'router Self> { for dynamic_child in &self.dynamic_children { let mut consumed = 0; @@ -138,12 +138,12 @@ impl Node { } // Doesn't support inline dynamic sections, e.g. `{name}.{extension}`, only `/{segment}/` - fn search_dynamic_segment<'k, 'v>( - &'k self, - path: &'v [u8], - parameters: &mut SmallVec<[Parameter<'k, 'v>; 4]>, + fn search_dynamic_segment<'router, 'path>( + &'router self, + path: &'path [u8], + parameters: &mut SmallVec<[Parameter<'router, 'path>; 4]>, constraints: &HashMap, fn(&str) -> bool>, - ) -> Option<&'k Self> { + ) -> Option<&'router Self> { for dynamic_child in &self.dynamic_children { let segment_end = path.iter().position(|&b| b == b'/').unwrap_or(path.len()); @@ -169,12 +169,12 @@ impl Node { None } - fn search_wildcard<'k, 'v>( - &'k self, - path: &'v [u8], - parameters: &mut SmallVec<[Parameter<'k, 'v>; 4]>, + fn search_wildcard<'router, 'path>( + &'router self, + path: &'path [u8], + parameters: &mut SmallVec<[Parameter<'router, 'path>; 4]>, constraints: &HashMap, fn(&str) -> bool>, - ) -> Option<&'k Self> { + ) -> Option<&'router Self> { for wildcard_child in &self.wildcard_children { let mut consumed = 0; let mut remaining_path = path; @@ -232,12 +232,12 @@ impl Node { None } - fn search_end_wildcard<'k, 'v>( - &'k self, - path: &'v [u8], - parameters: &mut SmallVec<[Parameter<'k, 'v>; 4]>, + fn search_end_wildcard<'router, 'path>( + &'router self, + path: &'path [u8], + parameters: &mut SmallVec<[Parameter<'router, 'path>; 4]>, constraints: &HashMap, fn(&str) -> bool>, - ) -> Option<&'k Self> { + ) -> Option<&'router Self> { for end_wildcard in &self.end_wildcard_children { if !Self::check_constraint(end_wildcard, path, constraints) { continue; diff --git a/src/path.rs b/src/path.rs index 5980be6e..5efa0c80 100644 --- a/src/path.rs +++ b/src/path.rs @@ -1,23 +1,22 @@ use crate::{decode::percent_decode, errors::decode::DecodeError}; -use std::borrow::Cow; -pub struct Path<'a> { - original: Cow<'a, [u8]>, +pub struct Path<'path> { + original: &'path [u8], decoded: Option>, } -impl<'a> Path<'a> { +impl<'path> Path<'path> { #[must_use] - pub const fn new(path: &'a str) -> Self { + pub const fn new(path: &'path str) -> Self { Self { - original: Cow::Borrowed(path.as_bytes()), + original: path.as_bytes(), decoded: None, } } pub fn percent_decode(&mut self) -> Result<(), DecodeError> { if self.decoded.is_none() { - self.decoded = Some(percent_decode(&self.original)?); + self.decoded = Some(percent_decode(self.original)?); } Ok(()) @@ -25,6 +24,6 @@ impl<'a> Path<'a> { #[must_use] pub fn as_bytes(&self) -> &[u8] { - self.decoded.as_deref().unwrap_or(&self.original) + self.decoded.as_deref().unwrap_or(self.original) } } diff --git a/src/router.rs b/src/router.rs index 881c7bc5..7c22b5ba 100644 --- a/src/router.rs +++ b/src/router.rs @@ -131,10 +131,10 @@ impl Router { self.root.delete(&mut parts) } - pub fn search<'k, 'v>( - &'k self, - path: &'v mut Path, - ) -> Result>, SearchError> { + pub fn search<'router, 'path>( + &'router self, + path: &'path mut Path, + ) -> Result>, SearchError> { if self.percent_encoding { path.percent_decode()?; } From 18e1b637950c1cffd92720a45c7ff3e88ae5bb1f Mon Sep 17 00:00:00 2001 From: Cathal Mullan Date: Mon, 19 Aug 2024 19:06:30 +0100 Subject: [PATCH 4/7] Always perform percent-decoding --- Cargo.lock | 1 + Cargo.toml | 1 + benches/matchit.rs | 4 +- benches/path_tree.rs | 4 +- examples/axum-fork/src/routing/path_router.rs | 12 +- examples/hyper/src/main.rs | 7 +- src/decode.rs | 39 ------- src/errors.rs | 1 - src/errors/decode.rs | 12 +- src/errors/insert.rs | 10 +- src/errors/search.rs | 23 ---- src/lib.rs | 1 - src/path.rs | 28 ++--- src/router.rs | 52 ++------- tests/common.rs | 5 +- tests/encoding.rs | 61 +--------- tests/poem.rs | 1 - tests/uncommon.rs | 108 +----------------- 18 files changed, 57 insertions(+), 313 deletions(-) delete mode 100644 src/decode.rs delete mode 100644 src/errors/search.rs diff --git a/Cargo.lock b/Cargo.lock index 9f4f4371..1bfcc8d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2207,6 +2207,7 @@ dependencies = [ "matchit", "ntex-router", "path-tree", + "percent-encoding", "regex", "route-recognizer", "routefinder", diff --git a/Cargo.toml b/Cargo.toml index 05f7c4c5..b1373eeb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -63,6 +63,7 @@ workspace = true [dependencies] smallvec = "1.13" +percent-encoding = "2.3" [dev-dependencies] # Snapshots diff --git a/benches/matchit.rs b/benches/matchit.rs index ec99b3d8..d65c80c9 100644 --- a/benches/matchit.rs +++ b/benches/matchit.rs @@ -19,8 +19,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for route in paths() { - let mut path = wayfind::path::Path::new(route); - let search = wayfind.search(&mut path).unwrap().unwrap(); + let path = wayfind::path::Path::new(route).unwrap(); + let search = wayfind.search(&path).unwrap(); let _ = search .parameters .iter() diff --git a/benches/path_tree.rs b/benches/path_tree.rs index a71fa9ea..8e21ee29 100644 --- a/benches/path_tree.rs +++ b/benches/path_tree.rs @@ -19,8 +19,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for (index, path) in paths() { - let mut path = wayfind::path::Path::new(path); - let search = wayfind.search(&mut path).unwrap().unwrap(); + let path = wayfind::path::Path::new(path).unwrap(); + let search = wayfind.search(&path).unwrap(); assert_eq!(search.data.value, index); let _ = search .parameters diff --git a/examples/axum-fork/src/routing/path_router.rs b/examples/axum-fork/src/routing/path_router.rs index 9dd86fe9..e3554b77 100644 --- a/examples/axum-fork/src/routing/path_router.rs +++ b/examples/axum-fork/src/routing/path_router.rs @@ -331,8 +331,8 @@ where } let path = req.uri().path().to_owned(); - let mut wayfind_path = Path::new(&path); - let result = match self.node.matches(&mut wayfind_path) { + let wayfind_path = Path::new(&path).expect("Invalid path!"); + let result = match self.node.matches(&wayfind_path) { Some(match_) => { let id = match_.data.value; @@ -366,8 +366,8 @@ where } pub(super) fn replace_endpoint(&mut self, path: &str, endpoint: Endpoint) { - let mut wayfind_path = Path::new(path); - if let Some(match_) = self.node.matches(&mut wayfind_path) { + let wayfind_path = Path::new(path).expect("Invalid path!"); + if let Some(match_) = self.node.matches(&wayfind_path) { let id = match_.data.value; self.routes.insert(id, endpoint); return; @@ -437,8 +437,8 @@ impl Node { Ok(()) } - fn matches<'n, 'p>(&'n self, path: &'p mut Path) -> Option> { - self.inner.search(path).expect("Failed to match!") + fn matches<'n, 'p>(&'n self, path: &'p Path) -> Option> { + self.inner.search(path) } } diff --git a/examples/hyper/src/main.rs b/examples/hyper/src/main.rs index 72720bb6..cb28876b 100644 --- a/examples/hyper/src/main.rs +++ b/examples/hyper/src/main.rs @@ -65,11 +65,8 @@ async fn main() -> Result<(), anyhow::Error> { let router = Arc::clone(&router); async move { let path = request.uri().path(); - let mut wayfind_path = Path::new(path); - let matches = router - .search(&mut wayfind_path) - .expect("Failed to match!") - .expect("Failed to match!"); + let wayfind_path = Path::new(path).expect("Invalid path!"); + let matches = router.search(&wayfind_path).expect("Failed to match!"); let handler = &matches.data.value; let parameters = &matches.parameters; diff --git a/src/decode.rs b/src/decode.rs deleted file mode 100644 index 71f4df2f..00000000 --- a/src/decode.rs +++ /dev/null @@ -1,39 +0,0 @@ -use crate::errors::decode::DecodeError; - -pub(crate) fn percent_decode(input: &[u8]) -> Result, DecodeError> { - if !input.contains(&b'%') { - return Ok(input.to_vec()); - } - - let mut output = Vec::with_capacity(input.len()); - - let mut i = 0; - let len = input.len(); - - while i < len { - if input[i] == b'%' && i + 2 < len { - let a = input[i + 1]; - let b = input[i + 2]; - output.push(decode_hex(a, b)?); - i += 3; - } else { - output.push(input[i]); - i += 1; - } - } - - Ok(output) -} - -#[allow(clippy::cast_possible_truncation)] -fn decode_hex(a: u8, b: u8) -> Result { - let a = (a as char) - .to_digit(16) - .ok_or(DecodeError::InvalidEncoding)?; - - let b = (b as char) - .to_digit(16) - .ok_or(DecodeError::InvalidEncoding)?; - - Ok((a as u8) << 4 | (b as u8)) -} diff --git a/src/errors.rs b/src/errors.rs index 48a8c58a..8f150938 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -3,4 +3,3 @@ pub mod decode; pub mod delete; pub mod insert; pub mod route; -pub mod search; diff --git a/src/errors/decode.rs b/src/errors/decode.rs index cc1e22ed..3b35339a 100644 --- a/src/errors/decode.rs +++ b/src/errors/decode.rs @@ -1,8 +1,8 @@ -use std::{error::Error, fmt::Display}; +use std::{error::Error, fmt::Display, str::Utf8Error}; #[derive(Debug, PartialEq, Eq)] pub enum DecodeError { - InvalidEncoding, + Utf8Error(Utf8Error), } impl Error for DecodeError {} @@ -10,7 +10,13 @@ impl Error for DecodeError {} impl Display for DecodeError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::InvalidEncoding => write!(f, "Invalid Encoding"), + Self::Utf8Error(error) => error.fmt(f), } } } + +impl From for DecodeError { + fn from(error: Utf8Error) -> Self { + Self::Utf8Error(error) + } +} diff --git a/src/errors/insert.rs b/src/errors/insert.rs index c4be0b45..f9e20a72 100644 --- a/src/errors/insert.rs +++ b/src/errors/insert.rs @@ -1,9 +1,10 @@ -use super::route::RouteError; +use super::{decode::DecodeError, route::RouteError}; use std::{error::Error, fmt::Display}; #[derive(Debug, PartialEq, Eq)] pub enum InsertError { RouteError(RouteError), + DecodeError(DecodeError), EncodedPath, DuplicatePath, UnknownConstraint, @@ -15,6 +16,7 @@ impl Display for InsertError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::RouteError(error) => error.fmt(f), + Self::DecodeError(error) => error.fmt(f), Self::EncodedPath => write!(f, "Encoded Path"), Self::DuplicatePath => write!(f, "Duplicate Path"), Self::UnknownConstraint => write!(f, "Unknown Constraint"), @@ -27,3 +29,9 @@ impl From for InsertError { Self::RouteError(error) } } + +impl From for InsertError { + fn from(error: DecodeError) -> Self { + Self::DecodeError(error) + } +} diff --git a/src/errors/search.rs b/src/errors/search.rs deleted file mode 100644 index a3e5e543..00000000 --- a/src/errors/search.rs +++ /dev/null @@ -1,23 +0,0 @@ -use super::decode::DecodeError; -use std::{error::Error, fmt::Display}; - -#[derive(Debug, PartialEq, Eq)] -pub enum SearchError { - DecodeError(DecodeError), -} - -impl Error for SearchError {} - -impl Display for SearchError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::DecodeError(error) => error.fmt(f), - } - } -} - -impl From for SearchError { - fn from(error: DecodeError) -> Self { - Self::DecodeError(error) - } -} diff --git a/src/lib.rs b/src/lib.rs index 3eedeb24..fb56f444 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,4 @@ pub mod constraints; -pub mod decode; pub mod errors; pub mod node; pub mod parts; diff --git a/src/path.rs b/src/path.rs index 5efa0c80..5a7f15c5 100644 --- a/src/path.rs +++ b/src/path.rs @@ -1,29 +1,19 @@ -use crate::{decode::percent_decode, errors::decode::DecodeError}; +use crate::errors::decode::DecodeError; +use std::borrow::Cow; pub struct Path<'path> { - original: &'path [u8], - decoded: Option>, + decoded: Cow<'path, str>, } impl<'path> Path<'path> { - #[must_use] - pub const fn new(path: &'path str) -> Self { - Self { - original: path.as_bytes(), - decoded: None, - } - } - - pub fn percent_decode(&mut self) -> Result<(), DecodeError> { - if self.decoded.is_none() { - self.decoded = Some(percent_decode(self.original)?); - } - - Ok(()) + pub fn new(path: &'path str) -> Result { + Ok(Self { + decoded: percent_encoding::percent_decode_str(path).decode_utf8()?, + }) } #[must_use] - pub fn as_bytes(&self) -> &[u8] { - self.decoded.as_deref().unwrap_or(self.original) + pub fn decoded_bytes(&'path self) -> &'path [u8] { + self.decoded.as_bytes() } } diff --git a/src/router.rs b/src/router.rs index 7c22b5ba..04ff924c 100644 --- a/src/router.rs +++ b/src/router.rs @@ -1,8 +1,6 @@ use crate::{ constraints::Constraint, - errors::{ - constraint::ConstraintError, delete::DeleteError, insert::InsertError, search::SearchError, - }, + errors::{constraint::ConstraintError, delete::DeleteError, insert::InsertError}, node::{search::Match, Node, NodeData, NodeKind}, parts::{Part, Parts}, path::Path, @@ -19,7 +17,6 @@ use std::{ pub struct Router { root: Node, constraints: HashMap, fn(&str) -> bool>, - percent_encoding: bool, } impl Router { @@ -41,7 +38,6 @@ impl Router { quick_dynamic: false, }, constraints: HashMap::new(), - percent_encoding: false, }; router.constraint::().unwrap(); @@ -79,28 +75,8 @@ impl Router { } } - pub fn percent_encoding(&mut self, enabled: bool) { - self.percent_encoding = enabled; - } - - // FIXME: This will likely match when it shouldn't. - // I guess the correct approach would be to percent-decode the input, and see if it's equal or not. - fn contains_percent_encoding(input: &str) -> bool { - let chars: Vec = input.chars().collect(); - for window in chars.windows(3) { - if let [a, b, c] = window { - if *a == '%' && b.is_ascii_hexdigit() && c.is_ascii_hexdigit() { - return true; - } - } - } - - false - } - pub fn insert(&mut self, route: &str, value: T) -> Result<(), InsertError> { - // Ensure this isn't already encoded. - if Self::contains_percent_encoding(route) { + if route.as_bytes() != Path::new(route)?.decoded_bytes() { return Err(InsertError::EncodedPath); } @@ -133,25 +109,17 @@ impl Router { pub fn search<'router, 'path>( &'router self, - path: &'path mut Path, - ) -> Result>, SearchError> { - if self.percent_encoding { - path.percent_decode()?; - } - + path: &'path Path, + ) -> Option> { let mut parameters = smallvec![]; - let Some(node) = self + let node = self .root - .search(path.as_bytes(), &mut parameters, &self.constraints) - else { - return Ok(None); - }; - - let Some(data) = node.data.as_ref() else { - return Ok(None); - }; + .search(path.decoded_bytes(), &mut parameters, &self.constraints)?; - Ok(Some(Match { data, parameters })) + Some(Match { + data: node.data.as_ref()?, + parameters, + }) } } diff --git a/tests/common.rs b/tests/common.rs index af856f5b..87eb177b 100644 --- a/tests/common.rs +++ b/tests/common.rs @@ -53,9 +53,8 @@ pub fn assert_router_match<'a, T: PartialEq + Debug>( input: &'a str, expected: Option>, ) { - let mut path = Path::new(input); - let Some(Match { data, parameters }) = router.search(&mut path).expect("Failed to match!") - else { + let path = Path::new(input).expect("Invalid path!"); + let Some(Match { data, parameters }) = router.search(&path) else { assert!(expected.is_none(), "No match found for input: {input}"); return; }; diff --git a/tests/encoding.rs b/tests/encoding.rs index e4bb3905..72b0b8cd 100644 --- a/tests/encoding.rs +++ b/tests/encoding.rs @@ -4,10 +4,9 @@ use wayfind::router::Router; #[path = "./common.rs"] mod common; -fn router(percent_encoding: bool) -> Result, Box> { +#[test] +fn percent_encoding() -> Result<(), Box> { let mut router = Router::new(); - router.percent_encoding(percent_encoding); - router.insert("/hello@world", 1)?; router.insert("/hello-world.com", 2)?; router.insert("/hello world", 3)?; @@ -17,62 +16,6 @@ fn router(percent_encoding: bool) -> Result, Box> { router.insert("/path/to/resource with spaces", 7)?; router.insert("/encoded/slash", 8)?; - Ok(router) -} - -#[test] -fn percent_encoding_disabled() -> Result<(), Box> { - let router = router(false)?; - - assert_router_matches!(router, { - "/hello%40world" => None - "/hello@world" => { - path: "/hello@world", - value: 1 - } - "/hello-world.com" => { - path: "/hello-world.com", - value: 2 - } - "/hello%20world" => None - "/hello world" => { - path: "/hello world", - value: 3 - } - "/%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF" => None - "/こんにちは" => { - path: "/こんにちは", - value: 4 - } - "/50%25" => None - "/50%" => { - path: "/50%", - value: 5 - } - "/hello%20world%40example.com" => None - "/hello world@example.com" => { - path: "/hello world@example.com", - value: 6 - } - "/path/to/resource%20with%20spaces" => None - "/path/to/resource with spaces" => { - path: "/path/to/resource with spaces", - value: 7 - } - "/encoded%2Fslash" => None - "/encoded/slash" => { - path: "/encoded/slash", - value: 8 - } - }); - - Ok(()) -} - -#[test] -fn percent_encoding_enabled() -> Result<(), Box> { - let router = router(true)?; - assert_router_matches!(router, { "/hello%40world" => { path: "/hello@world", diff --git a/tests/poem.rs b/tests/poem.rs index ae0820d4..9b612f7d 100644 --- a/tests/poem.rs +++ b/tests/poem.rs @@ -495,7 +495,6 @@ fn test_issue_275() -> Result<(), Box> { #[test] fn test_percent_decoded() -> Result<(), Box> { let mut router = Router::new(); - router.percent_encoding(true); router.insert("/a/{id}", 1)?; insta::assert_snapshot!(router, @r###" diff --git a/tests/uncommon.rs b/tests/uncommon.rs index f6507aaa..aa3bcad0 100644 --- a/tests/uncommon.rs +++ b/tests/uncommon.rs @@ -6,9 +6,9 @@ use wayfind::router::Router; #[path = "./common.rs"] mod common; -fn router(percent_encoding: bool) -> Result, Box> { +#[test] +fn uncommon() -> Result<(), Box> { let mut router = Router::new(); - router.percent_encoding(percent_encoding); // Japanese (Konnichiwa) router.insert("/こんにちは", 0)?; @@ -47,110 +47,6 @@ fn router(percent_encoding: bool) -> Result, Box> { // Unicode Control router.insert("/\u{0001}\u{0002}\u{0003}", 12)?; - Ok(router) -} - -#[test] -fn uncommon() -> Result<(), Box> { - let router = router(false)?; - - assert_router_matches!(router, { - // Japanese (Konnichiwa) - "/こんにちは" => { - path: "/こんにちは", - value: 0 - } - "/こんにちわ" => None - - // Russian (Privet) - "/привет" => { - path: "/привет", - value: 1 - } - "/привет!" => None - - // Chinese (Nǐ Hǎo) - "/你好" => { - path: "/你好", - value: 2 - } - "/你们好" => None - - // Arabic Numerals (full-width) - "/123" => { - path: "/123", - value: 3 - } - "/123" => None - - // Null Byte - "/null\0byte" => { - path: "/null\0byte", - value: 4 - } - "/nullbyte" => None - - // Emoji - "/⚽️🏀🏈" => { - path: "/⚽️🏀🏈", - value: 5 - } - "/⚽️🏀" => None - - // Unicode - "/♔♕♖♗♘♙" => { - path: "/♔♕♖♗♘♙", - value: 6 - } - "/♔♕♖♗♘♟" => None - - // Unicode Normalization - "/cafe\u{0301}" => { - path: "/cafe\u{0301}", - value: 7 - } - "/café" => { - path: "/café", - value: 8 - } - "/cafe" => None - - // Unicode Zero Width - "/abc\u{200B}123" => { - path: "/abc\u{200B}123", - value: 9 - } - "/abc123" => None - - // Unicode Right to Left - "/hello\u{202E}dlrow" => { - path: "/hello\u{202E}dlrow", - value: 10 - } - "/helloworld" => None - - // Unicode Whitespace - "/\u{2000}\u{2001}\u{2002}" => { - path: "/\u{2000}\u{2001}\u{2002}", - value: 11 - } - "/ " => None - - // Unicode Control - "/\u{0001}\u{0002}\u{0003}" => { - path: "/\u{0001}\u{0002}\u{0003}", - value: 12 - } - "/123" => None - }); - - Ok(()) -} - -#[test] -fn uncommon_with_percent_encoding() -> Result<(), Box> { - let router = router(true)?; - assert_router_matches!(router, { // Japanese (Konnichiwa) "/こんにちは" => { From 55e8da92b4871ac23ebd8e0179af08556376faaa Mon Sep 17 00:00:00 2001 From: Cathal Mullan Date: Mon, 19 Aug 2024 20:12:33 +0100 Subject: [PATCH 5/7] Hand-rolled decoding --- Cargo.toml | 4 +++- src/decode.rs | 39 +++++++++++++++++++++++++++++++++++++++ src/errors/decode.rs | 12 +++--------- src/lib.rs | 1 + src/path.rs | 10 ++++++---- 5 files changed, 52 insertions(+), 14 deletions(-) create mode 100644 src/decode.rs diff --git a/Cargo.toml b/Cargo.toml index b1373eeb..cc8b0d43 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -63,13 +63,15 @@ workspace = true [dependencies] smallvec = "1.13" -percent-encoding = "2.3" [dev-dependencies] # Snapshots # NOTE: Keep in sync with `cargo-insta` Nix package. insta = "1.39.0" +# Encoding +percent-encoding = "2.3" + # Benchmarking criterion = { version = "0.5", features = ["html_reports"] } # NOTE: Keep in sync with `cargo-codspeed` Nix package. diff --git a/src/decode.rs b/src/decode.rs new file mode 100644 index 00000000..730c104c --- /dev/null +++ b/src/decode.rs @@ -0,0 +1,39 @@ +use crate::errors::decode::DecodeError; +use std::borrow::Cow; + +pub(crate) fn percent_decode(input: &[u8]) -> Result, DecodeError> { + if !input.contains(&b'%') { + return Ok(Cow::Borrowed(input)); + } + + let mut output = Vec::with_capacity(input.len()); + let mut i = 0; + let len = input.len(); + + while i < len { + if input[i] == b'%' && i + 2 < len { + let a = input[i + 1]; + let b = input[i + 2]; + output.push(decode_hex(a, b)?); + i += 3; + } else { + output.push(input[i]); + i += 1; + } + } + + Ok(Cow::Owned(output)) +} + +#[allow(clippy::cast_possible_truncation)] +fn decode_hex(a: u8, b: u8) -> Result { + let a = (a as char) + .to_digit(16) + .ok_or(DecodeError::InvalidEncoding)?; + + let b = (b as char) + .to_digit(16) + .ok_or(DecodeError::InvalidEncoding)?; + + Ok((a as u8) << 4 | (b as u8)) +} diff --git a/src/errors/decode.rs b/src/errors/decode.rs index 3b35339a..cc1e22ed 100644 --- a/src/errors/decode.rs +++ b/src/errors/decode.rs @@ -1,8 +1,8 @@ -use std::{error::Error, fmt::Display, str::Utf8Error}; +use std::{error::Error, fmt::Display}; #[derive(Debug, PartialEq, Eq)] pub enum DecodeError { - Utf8Error(Utf8Error), + InvalidEncoding, } impl Error for DecodeError {} @@ -10,13 +10,7 @@ impl Error for DecodeError {} impl Display for DecodeError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::Utf8Error(error) => error.fmt(f), + Self::InvalidEncoding => write!(f, "Invalid Encoding"), } } } - -impl From for DecodeError { - fn from(error: Utf8Error) -> Self { - Self::Utf8Error(error) - } -} diff --git a/src/lib.rs b/src/lib.rs index fb56f444..3eedeb24 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ pub mod constraints; +pub mod decode; pub mod errors; pub mod node; pub mod parts; diff --git a/src/path.rs b/src/path.rs index 5a7f15c5..e2f8fcf1 100644 --- a/src/path.rs +++ b/src/path.rs @@ -1,19 +1,21 @@ -use crate::errors::decode::DecodeError; +use crate::{decode::percent_decode, errors::decode::DecodeError}; use std::borrow::Cow; pub struct Path<'path> { - decoded: Cow<'path, str>, + _raw: &'path [u8], + decoded: Cow<'path, [u8]>, } impl<'path> Path<'path> { pub fn new(path: &'path str) -> Result { Ok(Self { - decoded: percent_encoding::percent_decode_str(path).decode_utf8()?, + _raw: path.as_bytes(), + decoded: percent_decode(path.as_bytes())?, }) } #[must_use] pub fn decoded_bytes(&'path self) -> &'path [u8] { - self.decoded.as_bytes() + &self.decoded } } From 4f5cece1cae6b7da81ef2c55cde889895975bb17 Mon Sep 17 00:00:00 2001 From: Cathal Mullan Date: Mon, 19 Aug 2024 20:50:31 +0100 Subject: [PATCH 6/7] Ensure all routers percent-decode before matching --- Cargo.lock | 1 - Cargo.toml | 1 - README.md | 2 -- benches/matchit.rs | 43 +++++++++++++----------------------- benches/matchit_routes.rs | 6 +---- benches/path_tree.rs | 44 +++++++++++++------------------------ benches/path_tree_routes.rs | 6 +---- 7 files changed, 32 insertions(+), 71 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1bfcc8d8..e11dc2bf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2208,7 +2208,6 @@ dependencies = [ "ntex-router", "path-tree", "percent-encoding", - "regex", "route-recognizer", "routefinder", "smallvec", diff --git a/Cargo.toml b/Cargo.toml index cc8b0d43..7d85857d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -82,7 +82,6 @@ actix-router = "0.5.3" matchit = "0.8.3" ntex-router = "0.5.3" path-tree = "0.8.1" -regex = "1.10.6" route-recognizer = "0.3.1" routefinder = "0.5.4" xitca-router = "0.3.0" diff --git a/README.md b/README.md index d913c6b6..904a17e5 100644 --- a/README.md +++ b/README.md @@ -141,7 +141,6 @@ In a router of 130 routes, benchmark matching 4 paths. | xitca-router | 415.02 ns | | ntex-router | 1.6291 µs | | route-recognizer | 4.3608 µs | -| regex | 4.5123 µs | | routefinder | 6.2077 µs | | actix-router | 20.722 µs | @@ -158,7 +157,6 @@ In a router of 320 routes, benchmark matching 80 paths. | ntex-router | 28.003 µs | | route-recognizer | 87.400 µs | | routefinder | 95.115 µs | -| regex | 117.12 µs | | actix-router | 176.11 µs | ## Inspirations diff --git a/benches/matchit.rs b/benches/matchit.rs index d65c80c9..6d854b62 100644 --- a/benches/matchit.rs +++ b/benches/matchit.rs @@ -5,6 +5,7 @@ use codspeed_criterion_compat::{criterion_group, criterion_main, Criterion}; use matchit_routes::paths; +use percent_encoding::percent_decode; pub mod matchit_routes; @@ -39,7 +40,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for route in paths() { - let mut path = actix_router::Path::new(route); + let route = percent_decode(route.as_bytes()).decode_utf8().unwrap(); + let mut path = actix_router::Path::new(route.as_ref()); actix.recognize(&mut path).unwrap(); let _ = path .iter() @@ -57,7 +59,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for route in paths() { - let at = matchit.at(route).unwrap(); + let route = percent_decode(route.as_bytes()).decode_utf8().unwrap(); + let at = matchit.at(route.as_ref()).unwrap(); let _ = at .params .iter() @@ -76,7 +79,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for route in paths() { - let mut path = ntex_router::Path::new(route); + let route = percent_decode(route.as_bytes()).decode_utf8().unwrap(); + let mut path = ntex_router::Path::new(route.as_ref()); ntex.recognize(&mut path).unwrap(); let _ = path .iter() @@ -94,7 +98,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for route in paths() { - let route = path_tree.find(route).unwrap(); + let route = percent_decode(route.as_bytes()).decode_utf8().unwrap(); + let route = path_tree.find(route.as_ref()).unwrap(); let _ = route .1 .params_iter() @@ -104,27 +109,6 @@ fn benchmark(criterion: &mut Criterion) { }); }); - group.bench_function("matchit benchmarks/regex", |bencher| { - let regex_set = regex::RegexSet::new(routes!(regex)).unwrap(); - let regexes: Vec<_> = routes!(regex) - .into_iter() - .map(|pattern| regex::Regex::new(pattern).unwrap()) - .collect(); - - bencher.iter(|| { - for route in paths() { - let matches = regex_set.matches(route).into_iter().collect::>(); - let index = matches.first().unwrap(); - let captures = regexes[*index].captures(route).unwrap(); - let _ = regexes[*index] - .capture_names() - .flatten() - .filter_map(|name| captures.name(name).map(|m| (name, m.as_str()))) - .collect::>(); - } - }); - }); - group.bench_function("matchit benchmarks/route-recognizer", |bencher| { let mut route_recognizer = route_recognizer::Router::new(); for route in routes!(colon) { @@ -133,7 +117,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for route in paths() { - let recognize = route_recognizer.recognize(route).unwrap(); + let route = percent_decode(route.as_bytes()).decode_utf8().unwrap(); + let recognize = route_recognizer.recognize(route.as_ref()).unwrap(); let _ = recognize .params() .iter() @@ -151,7 +136,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for route in paths() { - let best_match = routefinder.best_match(route).unwrap(); + let route = percent_decode(route.as_bytes()).decode_utf8().unwrap(); + let best_match = routefinder.best_match(route.as_ref()).unwrap(); let _ = best_match .captures() .iter() @@ -169,7 +155,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for route in paths() { - let at = xitca.at(route).unwrap(); + let route = percent_decode(route.as_bytes()).decode_utf8().unwrap(); + let at = xitca.at(route.as_ref()).unwrap(); let _ = at .params .iter() diff --git a/benches/matchit_routes.rs b/benches/matchit_routes.rs index 6b8643f6..43e3f2b2 100644 --- a/benches/matchit_routes.rs +++ b/benches/matchit_routes.rs @@ -4,7 +4,7 @@ pub fn paths() -> impl IntoIterator { "/user/repos", "/repos/rust-lang/rust/stargazers", "/orgs/rust-lang/public_members/nikomatsakis", - "/repos/rust-lang/rust/releases/1.51.0", + "/repos/rust-lang/rust/releases/1%2E51%2E0", ] } @@ -18,10 +18,6 @@ macro_rules! routes { routes!(finish => "{p1}", "{p2}", "{p3}", "{p4}") }}; - (regex) => {{ - routes!(finish => "(?.*)", "(?.*)", "(?.*)", "(?.*)") - }}; - (finish => $p1:literal, $p2:literal, $p3:literal, $p4:literal) => {{ [ concat!("/authorizations"), diff --git a/benches/path_tree.rs b/benches/path_tree.rs index 8e21ee29..522f00e0 100644 --- a/benches/path_tree.rs +++ b/benches/path_tree.rs @@ -5,6 +5,7 @@ use codspeed_criterion_compat::{criterion_group, criterion_main, Criterion}; use path_tree_routes::paths; +use percent_encoding::percent_decode; pub mod path_tree_routes; @@ -40,7 +41,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for (index, path) in paths() { - let mut path = actix_router::Path::new(path); + let path = percent_decode(path.as_bytes()).decode_utf8().unwrap(); + let mut path = actix_router::Path::new(path.as_ref()); let n = router.recognize(&mut path).unwrap(); assert_eq!(*n.0, index); let _ = path @@ -59,7 +61,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for (index, path) in paths() { - let n = matcher.at(path).unwrap(); + let path = percent_decode(path.as_bytes()).decode_utf8().unwrap(); + let n = matcher.at(path.as_ref()).unwrap(); assert_eq!(*n.value, index); let _ = n .params @@ -79,7 +82,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for (index, path) in paths() { - let mut path = ntex_router::Path::new(path); + let path = percent_decode(path.as_bytes()).decode_utf8().unwrap(); + let mut path = ntex_router::Path::new(path.as_ref()); let n = router.recognize(&mut path).unwrap(); assert_eq!(*n.0, index); let _ = path @@ -98,7 +102,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for (index, path) in paths() { - let n = tree.find(path).unwrap(); + let path = percent_decode(path.as_bytes()).decode_utf8().unwrap(); + let n = tree.find(path.as_ref()).unwrap(); assert_eq!(*n.0, index); let _ = n.1.params_iter() @@ -108,28 +113,6 @@ fn benchmark(criterion: &mut Criterion) { }); }); - group.bench_function("path-tree benchmarks/regex", |bencher| { - let regex_set = regex::RegexSet::new(routes!(regex)).unwrap(); - let regexes: Vec<_> = routes!(regex) - .into_iter() - .map(|pattern| regex::Regex::new(pattern).unwrap()) - .collect(); - - bencher.iter(|| { - for (index, path) in paths() { - let matches = regex_set.matches(path).into_iter().collect::>(); - assert!(matches.contains(&index)); - let i = matches.first().unwrap(); - let captures = regexes[*i].captures(path).unwrap(); - let _ = regexes[*i] - .capture_names() - .flatten() - .filter_map(|name| captures.name(name).map(|m| (name, m.as_str()))) - .collect::>(); - } - }); - }); - group.bench_function("path-tree benchmarks/route-recognizer", |bencher| { let mut router = route_recognizer::Router::::new(); for (index, route) in routes!(colon).iter().enumerate() { @@ -138,7 +121,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for (index, path) in paths() { - let n = router.recognize(path).unwrap(); + let path = percent_decode(path.as_bytes()).decode_utf8().unwrap(); + let n = router.recognize(path.as_ref()).unwrap(); assert_eq!(**n.handler(), index); let _ = n .params() @@ -157,7 +141,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for (index, path) in paths() { - let n = router.best_match(path).unwrap(); + let path = percent_decode(path.as_bytes()).decode_utf8().unwrap(); + let n = router.best_match(path.as_ref()).unwrap(); assert_eq!(*n, index); let _ = n .captures() @@ -176,7 +161,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for (index, path) in paths() { - let n = xitca.at(path).unwrap(); + let path = percent_decode(path.as_bytes()).decode_utf8().unwrap(); + let n = xitca.at(path.as_ref()).unwrap(); assert_eq!(*n.value, index); let _ = n .params diff --git a/benches/path_tree_routes.rs b/benches/path_tree_routes.rs index 7830d755..3aac7cbc 100644 --- a/benches/path_tree_routes.rs +++ b/benches/path_tree_routes.rs @@ -37,7 +37,7 @@ pub fn paths() -> impl IntoIterator { "/issues", "/legacy/issues/search/rust-lang/rust/987/1597", "/legacy/repos/search/1597", - "/legacy/user/email/rust@rust-lang.org", + "/legacy/user/email/rust%40rust-lang.org", "/legacy/user/search/1597", "/licenses", "/licenses/mit", @@ -96,10 +96,6 @@ macro_rules! routes { routes!(finish => "{p1}", "{p2}", "{p3}", "{p4}") }}; - (regex) => {{ - routes!(finish => "(?.*)", "(?.*)", "(?.*)", "(?.*)") - }}; - (finish => $p1:literal, $p2:literal, $p3:literal, $p4:literal) => {{ [ concat!("/app"), From 60a3fd66073b9556e8f3aa52ffa410b66cfebc67 Mon Sep 17 00:00:00 2001 From: Cathal Mullan Date: Mon, 19 Aug 2024 21:01:22 +0100 Subject: [PATCH 7/7] Update README stats --- README.md | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 904a17e5..d3000853 100644 --- a/README.md +++ b/README.md @@ -134,30 +134,30 @@ Check out our [codspeed results](https://codspeed.io/DuskSystems/wayfind/benchma In a router of 130 routes, benchmark matching 4 paths. | Library | Time | -|-----------|------| -| wayfind | 210.33 ns | -| matchit | 310.24 ns | -| path-tree | 406.26 ns | -| xitca-router | 415.02 ns | -| ntex-router | 1.6291 µs | -| route-recognizer | 4.3608 µs | -| routefinder | 6.2077 µs | -| actix-router | 20.722 µs | +|---------|------| +| wayfind | 301.64 ns | +| matchit | 471.11 ns | +| xitca-router | 568.31 ns | +| path-tree | 586.17 ns | +| ntex-router | 1.7905 µs | +| route-recognizer | 4.5652 µs | +| routefinder | 6.6322 µs | +| actix-router | 21.162 µs | ### `path-tree` inspired benches In a router of 320 routes, benchmark matching 80 paths. | Library | Time | -|-----------|------| -| wayfind | 3.5117 µs | -| matchit | 6.8657 µs | -| path-tree | 7.5262 µs | -| xitca-router | 8.5490 µs | -| ntex-router | 28.003 µs | -| route-recognizer | 87.400 µs | -| routefinder | 95.115 µs | -| actix-router | 176.11 µs | +|---------|------| +| wayfind | 3.9211 µs | +| matchit | 8.9698 µs | +| path-tree | 9.5825 µs | +| xitca-router | 10.882 µs | +| ntex-router | 30.931 µs | +| route-recognizer | 90.966 µs | +| routefinder | 98.779 µs | +| actix-router | 178.40 µs | ## Inspirations