diff --git a/Cargo.lock b/Cargo.lock index 9f4f4371..e11dc2bf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2207,7 +2207,7 @@ dependencies = [ "matchit", "ntex-router", "path-tree", - "regex", + "percent-encoding", "route-recognizer", "routefinder", "smallvec", diff --git a/Cargo.toml b/Cargo.toml index 05f7c4c5..7d85857d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,6 +69,9 @@ smallvec = "1.13" # NOTE: Keep in sync with `cargo-insta` Nix package. insta = "1.39.0" +# Encoding +percent-encoding = "2.3" + # Benchmarking criterion = { version = "0.5", features = ["html_reports"] } # NOTE: Keep in sync with `cargo-codspeed` Nix package. @@ -79,7 +82,6 @@ actix-router = "0.5.3" matchit = "0.8.3" ntex-router = "0.5.3" path-tree = "0.8.1" -regex = "1.10.6" route-recognizer = "0.3.1" routefinder = "0.5.4" xitca-router = "0.3.0" diff --git a/README.md b/README.md index d913c6b6..d3000853 100644 --- a/README.md +++ b/README.md @@ -134,32 +134,30 @@ Check out our [codspeed results](https://codspeed.io/DuskSystems/wayfind/benchma In a router of 130 routes, benchmark matching 4 paths. | Library | Time | -|-----------|------| -| wayfind | 210.33 ns | -| matchit | 310.24 ns | -| path-tree | 406.26 ns | -| xitca-router | 415.02 ns | -| ntex-router | 1.6291 µs | -| route-recognizer | 4.3608 µs | -| regex | 4.5123 µs | -| routefinder | 6.2077 µs | -| actix-router | 20.722 µs | +|---------|------| +| wayfind | 301.64 ns | +| matchit | 471.11 ns | +| xitca-router | 568.31 ns | +| path-tree | 586.17 ns | +| ntex-router | 1.7905 µs | +| route-recognizer | 4.5652 µs | +| routefinder | 6.6322 µs | +| actix-router | 21.162 µs | ### `path-tree` inspired benches In a router of 320 routes, benchmark matching 80 paths. | Library | Time | -|-----------|------| -| wayfind | 3.5117 µs | -| matchit | 6.8657 µs | -| path-tree | 7.5262 µs | -| xitca-router | 8.5490 µs | -| ntex-router | 28.003 µs | -| route-recognizer | 87.400 µs | -| routefinder | 95.115 µs | -| regex | 117.12 µs | -| actix-router | 176.11 µs | +|---------|------| +| wayfind | 3.9211 µs | +| matchit | 8.9698 µs | +| path-tree | 9.5825 µs | +| xitca-router | 10.882 µs | +| ntex-router | 30.931 µs | +| route-recognizer | 90.966 µs | +| routefinder | 98.779 µs | +| actix-router | 178.40 µs | ## Inspirations diff --git a/benches/matchit.rs b/benches/matchit.rs index 30e73ba5..6d854b62 100644 --- a/benches/matchit.rs +++ b/benches/matchit.rs @@ -5,6 +5,7 @@ use codspeed_criterion_compat::{criterion_group, criterion_main, Criterion}; use matchit_routes::paths; +use percent_encoding::percent_decode; pub mod matchit_routes; @@ -19,7 +20,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for route in paths() { - let search = wayfind.search(route).unwrap(); + let path = wayfind::path::Path::new(route).unwrap(); + let search = wayfind.search(&path).unwrap(); let _ = search .parameters .iter() @@ -38,7 +40,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for route in paths() { - let mut path = actix_router::Path::new(route); + let route = percent_decode(route.as_bytes()).decode_utf8().unwrap(); + let mut path = actix_router::Path::new(route.as_ref()); actix.recognize(&mut path).unwrap(); let _ = path .iter() @@ -56,7 +59,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for route in paths() { - let at = matchit.at(route).unwrap(); + let route = percent_decode(route.as_bytes()).decode_utf8().unwrap(); + let at = matchit.at(route.as_ref()).unwrap(); let _ = at .params .iter() @@ -75,7 +79,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for route in paths() { - let mut path = ntex_router::Path::new(route); + let route = percent_decode(route.as_bytes()).decode_utf8().unwrap(); + let mut path = ntex_router::Path::new(route.as_ref()); ntex.recognize(&mut path).unwrap(); let _ = path .iter() @@ -93,7 +98,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for route in paths() { - let route = path_tree.find(route).unwrap(); + let route = percent_decode(route.as_bytes()).decode_utf8().unwrap(); + let route = path_tree.find(route.as_ref()).unwrap(); let _ = route .1 .params_iter() @@ -103,27 +109,6 @@ fn benchmark(criterion: &mut Criterion) { }); }); - group.bench_function("matchit benchmarks/regex", |bencher| { - let regex_set = regex::RegexSet::new(routes!(regex)).unwrap(); - let regexes: Vec<_> = routes!(regex) - .into_iter() - .map(|pattern| regex::Regex::new(pattern).unwrap()) - .collect(); - - bencher.iter(|| { - for route in paths() { - let matches = regex_set.matches(route).into_iter().collect::>(); - let index = matches.first().unwrap(); - let captures = regexes[*index].captures(route).unwrap(); - let _ = regexes[*index] - .capture_names() - .flatten() - .filter_map(|name| captures.name(name).map(|m| (name, m.as_str()))) - .collect::>(); - } - }); - }); - group.bench_function("matchit benchmarks/route-recognizer", |bencher| { let mut route_recognizer = route_recognizer::Router::new(); for route in routes!(colon) { @@ -132,7 +117,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for route in paths() { - let recognize = route_recognizer.recognize(route).unwrap(); + let route = percent_decode(route.as_bytes()).decode_utf8().unwrap(); + let recognize = route_recognizer.recognize(route.as_ref()).unwrap(); let _ = recognize .params() .iter() @@ -150,7 +136,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for route in paths() { - let best_match = routefinder.best_match(route).unwrap(); + let route = percent_decode(route.as_bytes()).decode_utf8().unwrap(); + let best_match = routefinder.best_match(route.as_ref()).unwrap(); let _ = best_match .captures() .iter() @@ -168,7 +155,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for route in paths() { - let at = xitca.at(route).unwrap(); + let route = percent_decode(route.as_bytes()).decode_utf8().unwrap(); + let at = xitca.at(route.as_ref()).unwrap(); let _ = at .params .iter() diff --git a/benches/matchit_routes.rs b/benches/matchit_routes.rs index 6b8643f6..43e3f2b2 100644 --- a/benches/matchit_routes.rs +++ b/benches/matchit_routes.rs @@ -4,7 +4,7 @@ pub fn paths() -> impl IntoIterator { "/user/repos", "/repos/rust-lang/rust/stargazers", "/orgs/rust-lang/public_members/nikomatsakis", - "/repos/rust-lang/rust/releases/1.51.0", + "/repos/rust-lang/rust/releases/1%2E51%2E0", ] } @@ -18,10 +18,6 @@ macro_rules! routes { routes!(finish => "{p1}", "{p2}", "{p3}", "{p4}") }}; - (regex) => {{ - routes!(finish => "(?.*)", "(?.*)", "(?.*)", "(?.*)") - }}; - (finish => $p1:literal, $p2:literal, $p3:literal, $p4:literal) => {{ [ concat!("/authorizations"), diff --git a/benches/path_tree.rs b/benches/path_tree.rs index 9a25bfc4..522f00e0 100644 --- a/benches/path_tree.rs +++ b/benches/path_tree.rs @@ -5,6 +5,7 @@ use codspeed_criterion_compat::{criterion_group, criterion_main, Criterion}; use path_tree_routes::paths; +use percent_encoding::percent_decode; pub mod path_tree_routes; @@ -19,7 +20,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for (index, path) in paths() { - let search = wayfind.search(path).unwrap(); + let path = wayfind::path::Path::new(path).unwrap(); + let search = wayfind.search(&path).unwrap(); assert_eq!(search.data.value, index); let _ = search .parameters @@ -39,7 +41,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for (index, path) in paths() { - let mut path = actix_router::Path::new(path); + let path = percent_decode(path.as_bytes()).decode_utf8().unwrap(); + let mut path = actix_router::Path::new(path.as_ref()); let n = router.recognize(&mut path).unwrap(); assert_eq!(*n.0, index); let _ = path @@ -58,7 +61,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for (index, path) in paths() { - let n = matcher.at(path).unwrap(); + let path = percent_decode(path.as_bytes()).decode_utf8().unwrap(); + let n = matcher.at(path.as_ref()).unwrap(); assert_eq!(*n.value, index); let _ = n .params @@ -78,7 +82,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for (index, path) in paths() { - let mut path = ntex_router::Path::new(path); + let path = percent_decode(path.as_bytes()).decode_utf8().unwrap(); + let mut path = ntex_router::Path::new(path.as_ref()); let n = router.recognize(&mut path).unwrap(); assert_eq!(*n.0, index); let _ = path @@ -97,7 +102,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for (index, path) in paths() { - let n = tree.find(path).unwrap(); + let path = percent_decode(path.as_bytes()).decode_utf8().unwrap(); + let n = tree.find(path.as_ref()).unwrap(); assert_eq!(*n.0, index); let _ = n.1.params_iter() @@ -107,28 +113,6 @@ fn benchmark(criterion: &mut Criterion) { }); }); - group.bench_function("path-tree benchmarks/regex", |bencher| { - let regex_set = regex::RegexSet::new(routes!(regex)).unwrap(); - let regexes: Vec<_> = routes!(regex) - .into_iter() - .map(|pattern| regex::Regex::new(pattern).unwrap()) - .collect(); - - bencher.iter(|| { - for (index, path) in paths() { - let matches = regex_set.matches(path).into_iter().collect::>(); - assert!(matches.contains(&index)); - let i = matches.first().unwrap(); - let captures = regexes[*i].captures(path).unwrap(); - let _ = regexes[*i] - .capture_names() - .flatten() - .filter_map(|name| captures.name(name).map(|m| (name, m.as_str()))) - .collect::>(); - } - }); - }); - group.bench_function("path-tree benchmarks/route-recognizer", |bencher| { let mut router = route_recognizer::Router::::new(); for (index, route) in routes!(colon).iter().enumerate() { @@ -137,7 +121,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for (index, path) in paths() { - let n = router.recognize(path).unwrap(); + let path = percent_decode(path.as_bytes()).decode_utf8().unwrap(); + let n = router.recognize(path.as_ref()).unwrap(); assert_eq!(**n.handler(), index); let _ = n .params() @@ -156,7 +141,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for (index, path) in paths() { - let n = router.best_match(path).unwrap(); + let path = percent_decode(path.as_bytes()).decode_utf8().unwrap(); + let n = router.best_match(path.as_ref()).unwrap(); assert_eq!(*n, index); let _ = n .captures() @@ -175,7 +161,8 @@ fn benchmark(criterion: &mut Criterion) { bencher.iter(|| { for (index, path) in paths() { - let n = xitca.at(path).unwrap(); + let path = percent_decode(path.as_bytes()).decode_utf8().unwrap(); + let n = xitca.at(path.as_ref()).unwrap(); assert_eq!(*n.value, index); let _ = n .params diff --git a/benches/path_tree_routes.rs b/benches/path_tree_routes.rs index 7830d755..3aac7cbc 100644 --- a/benches/path_tree_routes.rs +++ b/benches/path_tree_routes.rs @@ -37,7 +37,7 @@ pub fn paths() -> impl IntoIterator { "/issues", "/legacy/issues/search/rust-lang/rust/987/1597", "/legacy/repos/search/1597", - "/legacy/user/email/rust@rust-lang.org", + "/legacy/user/email/rust%40rust-lang.org", "/legacy/user/search/1597", "/licenses", "/licenses/mit", @@ -96,10 +96,6 @@ macro_rules! routes { routes!(finish => "{p1}", "{p2}", "{p3}", "{p4}") }}; - (regex) => {{ - routes!(finish => "(?.*)", "(?.*)", "(?.*)", "(?.*)") - }}; - (finish => $p1:literal, $p2:literal, $p3:literal, $p4:literal) => {{ [ concat!("/app"), diff --git a/examples/axum-fork/src/routing/path_router.rs b/examples/axum-fork/src/routing/path_router.rs index 36cb94a0..e3554b77 100644 --- a/examples/axum-fork/src/routing/path_router.rs +++ b/examples/axum-fork/src/routing/path_router.rs @@ -3,7 +3,7 @@ use axum_core::response::IntoResponse; use std::{borrow::Cow, collections::HashMap, convert::Infallible, fmt, sync::Arc}; use tower_layer::Layer; use tower_service::Service; -use wayfind::{errors::insert::InsertError, node::search::Match, router::Router}; +use wayfind::{errors::insert::InsertError, node::search::Match, path::Path, router::Router}; use super::{ future::RouteFuture, not_found::NotFound, strip_prefix::StripPrefix, url_params, Endpoint, @@ -331,7 +331,8 @@ where } let path = req.uri().path().to_owned(); - let result = match self.node.matches(&path) { + let wayfind_path = Path::new(&path).expect("Invalid path!"); + let result = match self.node.matches(&wayfind_path) { Some(match_) => { let id = match_.data.value; @@ -365,7 +366,8 @@ where } pub(super) fn replace_endpoint(&mut self, path: &str, endpoint: Endpoint) { - if let Some(match_) = self.node.matches(path) { + let wayfind_path = Path::new(path).expect("Invalid path!"); + if let Some(match_) = self.node.matches(&wayfind_path) { let id = match_.data.value; self.routes.insert(id, endpoint); return; @@ -435,7 +437,7 @@ impl Node { Ok(()) } - fn matches<'n, 'p>(&'n self, path: &'p str) -> Option> { + fn matches<'n, 'p>(&'n self, path: &'p Path) -> Option> { self.inner.search(path) } } diff --git a/examples/hyper/src/main.rs b/examples/hyper/src/main.rs index 95d3f776..cb28876b 100644 --- a/examples/hyper/src/main.rs +++ b/examples/hyper/src/main.rs @@ -15,7 +15,7 @@ use std::{ sync::Arc, }; use tokio::{net::TcpListener, task::JoinSet}; -use wayfind::{node::search::Parameter, router::Router}; +use wayfind::{node::search::Parameter, path::Path, router::Router}; type BoxFuture<'a> = Pin< Box< @@ -65,7 +65,8 @@ async fn main() -> Result<(), anyhow::Error> { let router = Arc::clone(&router); async move { let path = request.uri().path(); - let matches = router.search(path).expect("Failed to match!"); + let wayfind_path = Path::new(path).expect("Invalid path!"); + let matches = router.search(&wayfind_path).expect("Failed to match!"); let handler = &matches.data.value; let parameters = &matches.parameters; diff --git a/src/decode.rs b/src/decode.rs new file mode 100644 index 00000000..730c104c --- /dev/null +++ b/src/decode.rs @@ -0,0 +1,39 @@ +use crate::errors::decode::DecodeError; +use std::borrow::Cow; + +pub(crate) fn percent_decode(input: &[u8]) -> Result, DecodeError> { + if !input.contains(&b'%') { + return Ok(Cow::Borrowed(input)); + } + + let mut output = Vec::with_capacity(input.len()); + let mut i = 0; + let len = input.len(); + + while i < len { + if input[i] == b'%' && i + 2 < len { + let a = input[i + 1]; + let b = input[i + 2]; + output.push(decode_hex(a, b)?); + i += 3; + } else { + output.push(input[i]); + i += 1; + } + } + + Ok(Cow::Owned(output)) +} + +#[allow(clippy::cast_possible_truncation)] +fn decode_hex(a: u8, b: u8) -> Result { + let a = (a as char) + .to_digit(16) + .ok_or(DecodeError::InvalidEncoding)?; + + let b = (b as char) + .to_digit(16) + .ok_or(DecodeError::InvalidEncoding)?; + + Ok((a as u8) << 4 | (b as u8)) +} diff --git a/src/errors.rs b/src/errors.rs index f1ba9d18..8f150938 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,4 +1,5 @@ pub mod constraint; +pub mod decode; pub mod delete; pub mod insert; pub mod route; diff --git a/src/errors/decode.rs b/src/errors/decode.rs new file mode 100644 index 00000000..cc1e22ed --- /dev/null +++ b/src/errors/decode.rs @@ -0,0 +1,16 @@ +use std::{error::Error, fmt::Display}; + +#[derive(Debug, PartialEq, Eq)] +pub enum DecodeError { + InvalidEncoding, +} + +impl Error for DecodeError {} + +impl Display for DecodeError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::InvalidEncoding => write!(f, "Invalid Encoding"), + } + } +} diff --git a/src/errors/insert.rs b/src/errors/insert.rs index e6f4b831..f9e20a72 100644 --- a/src/errors/insert.rs +++ b/src/errors/insert.rs @@ -1,9 +1,11 @@ -use super::route::RouteError; +use super::{decode::DecodeError, route::RouteError}; use std::{error::Error, fmt::Display}; #[derive(Debug, PartialEq, Eq)] pub enum InsertError { RouteError(RouteError), + DecodeError(DecodeError), + EncodedPath, DuplicatePath, UnknownConstraint, } @@ -14,6 +16,8 @@ impl Display for InsertError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::RouteError(error) => error.fmt(f), + Self::DecodeError(error) => error.fmt(f), + Self::EncodedPath => write!(f, "Encoded Path"), Self::DuplicatePath => write!(f, "Duplicate Path"), Self::UnknownConstraint => write!(f, "Unknown Constraint"), } @@ -25,3 +29,9 @@ impl From for InsertError { Self::RouteError(error) } } + +impl From for InsertError { + fn from(error: DecodeError) -> Self { + Self::DecodeError(error) + } +} diff --git a/src/lib.rs b/src/lib.rs index b4325bbe..3eedeb24 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,7 @@ pub mod constraints; +pub mod decode; pub mod errors; pub mod node; pub mod parts; +pub mod path; pub mod router; diff --git a/src/node/search.rs b/src/node/search.rs index d006b307..21369dba 100644 --- a/src/node/search.rs +++ b/src/node/search.rs @@ -3,24 +3,24 @@ use smallvec::{smallvec, SmallVec}; use std::collections::HashMap; #[derive(Debug, Eq, PartialEq)] -pub struct Match<'k, 'v, T> { - pub data: &'k NodeData, - pub parameters: SmallVec<[Parameter<'k, 'v>; 4]>, +pub struct Match<'router, 'path, T> { + pub data: &'router NodeData, + pub parameters: SmallVec<[Parameter<'router, 'path>; 4]>, } #[derive(Clone, Debug, Eq, PartialEq)] -pub struct Parameter<'k, 'v> { - pub key: &'k str, - pub value: &'v str, +pub struct Parameter<'router, 'path> { + pub key: &'router str, + pub value: &'path str, } impl Node { - pub fn search<'k, 'v>( - &'k self, - path: &'v [u8], - parameters: &mut SmallVec<[Parameter<'k, 'v>; 4]>, + pub fn search<'router, 'path>( + &'router self, + path: &'path [u8], + parameters: &mut SmallVec<[Parameter<'router, 'path>; 4]>, constraints: &HashMap, fn(&str) -> bool>, - ) -> Option<&'k Self> { + ) -> Option<&'router Self> { if path.is_empty() { return if self.data.is_some() { Some(self) @@ -48,12 +48,12 @@ impl Node { None } - fn search_static<'k, 'v>( - &'k self, - path: &'v [u8], - parameters: &mut SmallVec<[Parameter<'k, 'v>; 4]>, + fn search_static<'router, 'path>( + &'router self, + path: &'path [u8], + parameters: &mut SmallVec<[Parameter<'router, 'path>; 4]>, constraints: &HashMap, fn(&str) -> bool>, - ) -> Option<&'k Self> { + ) -> Option<&'router Self> { for static_child in &self.static_children { // NOTE: This was previously a "starts_with" call, but turns out this is much faster. if path.len() >= static_child.prefix.len() @@ -71,12 +71,12 @@ impl Node { None } - fn search_dynamic<'k, 'v>( - &'k self, - path: &'v [u8], - parameters: &mut SmallVec<[Parameter<'k, 'v>; 4]>, + fn search_dynamic<'router, 'path>( + &'router self, + path: &'path [u8], + parameters: &mut SmallVec<[Parameter<'router, 'path>; 4]>, constraints: &HashMap, fn(&str) -> bool>, - ) -> Option<&'k Self> { + ) -> Option<&'router Self> { if self.quick_dynamic { self.search_dynamic_segment(path, parameters, constraints) } else { @@ -90,12 +90,12 @@ impl Node { // Path: `my.long.file.txt` // Name: `my.long.file` // Ext: `txt` - fn search_dynamic_inline<'k, 'v>( - &'k self, - path: &'v [u8], - parameters: &mut SmallVec<[Parameter<'k, 'v>; 4]>, + fn search_dynamic_inline<'router, 'path>( + &'router self, + path: &'path [u8], + parameters: &mut SmallVec<[Parameter<'router, 'path>; 4]>, constraints: &HashMap, fn(&str) -> bool>, - ) -> Option<&'k Self> { + ) -> Option<&'router Self> { for dynamic_child in &self.dynamic_children { let mut consumed = 0; @@ -138,12 +138,12 @@ impl Node { } // Doesn't support inline dynamic sections, e.g. `{name}.{extension}`, only `/{segment}/` - fn search_dynamic_segment<'k, 'v>( - &'k self, - path: &'v [u8], - parameters: &mut SmallVec<[Parameter<'k, 'v>; 4]>, + fn search_dynamic_segment<'router, 'path>( + &'router self, + path: &'path [u8], + parameters: &mut SmallVec<[Parameter<'router, 'path>; 4]>, constraints: &HashMap, fn(&str) -> bool>, - ) -> Option<&'k Self> { + ) -> Option<&'router Self> { for dynamic_child in &self.dynamic_children { let segment_end = path.iter().position(|&b| b == b'/').unwrap_or(path.len()); @@ -169,12 +169,12 @@ impl Node { None } - fn search_wildcard<'k, 'v>( - &'k self, - path: &'v [u8], - parameters: &mut SmallVec<[Parameter<'k, 'v>; 4]>, + fn search_wildcard<'router, 'path>( + &'router self, + path: &'path [u8], + parameters: &mut SmallVec<[Parameter<'router, 'path>; 4]>, constraints: &HashMap, fn(&str) -> bool>, - ) -> Option<&'k Self> { + ) -> Option<&'router Self> { for wildcard_child in &self.wildcard_children { let mut consumed = 0; let mut remaining_path = path; @@ -232,12 +232,12 @@ impl Node { None } - fn search_end_wildcard<'k, 'v>( - &'k self, - path: &'v [u8], - parameters: &mut SmallVec<[Parameter<'k, 'v>; 4]>, + fn search_end_wildcard<'router, 'path>( + &'router self, + path: &'path [u8], + parameters: &mut SmallVec<[Parameter<'router, 'path>; 4]>, constraints: &HashMap, fn(&str) -> bool>, - ) -> Option<&'k Self> { + ) -> Option<&'router Self> { for end_wildcard in &self.end_wildcard_children { if !Self::check_constraint(end_wildcard, path, constraints) { continue; diff --git a/src/path.rs b/src/path.rs new file mode 100644 index 00000000..e2f8fcf1 --- /dev/null +++ b/src/path.rs @@ -0,0 +1,21 @@ +use crate::{decode::percent_decode, errors::decode::DecodeError}; +use std::borrow::Cow; + +pub struct Path<'path> { + _raw: &'path [u8], + decoded: Cow<'path, [u8]>, +} + +impl<'path> Path<'path> { + pub fn new(path: &'path str) -> Result { + Ok(Self { + _raw: path.as_bytes(), + decoded: percent_decode(path.as_bytes())?, + }) + } + + #[must_use] + pub fn decoded_bytes(&'path self) -> &'path [u8] { + &self.decoded + } +} diff --git a/src/router.rs b/src/router.rs index 7f7b6dde..04ff924c 100644 --- a/src/router.rs +++ b/src/router.rs @@ -3,6 +3,7 @@ use crate::{ errors::{constraint::ConstraintError, delete::DeleteError, insert::InsertError}, node::{search::Match, Node, NodeData, NodeKind}, parts::{Part, Parts}, + path::Path, }; use smallvec::smallvec; use std::{ @@ -75,6 +76,10 @@ impl Router { } pub fn insert(&mut self, route: &str, value: T) -> Result<(), InsertError> { + if route.as_bytes() != Path::new(route)?.decoded_bytes() { + return Err(InsertError::EncodedPath); + } + let path = Arc::from(route); let mut parts = Parts::new(route.as_bytes())?; @@ -102,11 +107,14 @@ impl Router { self.root.delete(&mut parts) } - pub fn search<'k, 'v>(&'k self, path: &'v str) -> Option> { + pub fn search<'router, 'path>( + &'router self, + path: &'path Path, + ) -> Option> { let mut parameters = smallvec![]; let node = self .root - .search(path.as_bytes(), &mut parameters, &self.constraints)?; + .search(path.decoded_bytes(), &mut parameters, &self.constraints)?; Some(Match { data: node.data.as_ref()?, diff --git a/tests/common.rs b/tests/common.rs index 1b97b8d7..87eb177b 100644 --- a/tests/common.rs +++ b/tests/common.rs @@ -1,6 +1,7 @@ use std::{fmt::Debug, sync::Arc}; use wayfind::{ node::search::{Match, Parameter}, + path::Path, router::Router, }; @@ -52,7 +53,8 @@ pub fn assert_router_match<'a, T: PartialEq + Debug>( input: &'a str, expected: Option>, ) { - let Some(Match { data, parameters }) = router.search(input) else { + let path = Path::new(input).expect("Invalid path!"); + let Some(Match { data, parameters }) = router.search(&path) else { assert!(expected.is_none(), "No match found for input: {input}"); return; }; diff --git a/tests/encoding.rs b/tests/encoding.rs new file mode 100644 index 00000000..72b0b8cd --- /dev/null +++ b/tests/encoding.rs @@ -0,0 +1,83 @@ +use std::error::Error; +use wayfind::router::Router; + +#[path = "./common.rs"] +mod common; + +#[test] +fn percent_encoding() -> Result<(), Box> { + let mut router = Router::new(); + router.insert("/hello@world", 1)?; + router.insert("/hello-world.com", 2)?; + router.insert("/hello world", 3)?; + router.insert("/こんにちは", 4)?; + router.insert("/50%", 5)?; + router.insert("/hello world@example.com", 6)?; + router.insert("/path/to/resource with spaces", 7)?; + router.insert("/encoded/slash", 8)?; + + assert_router_matches!(router, { + "/hello%40world" => { + path: "/hello@world", + value: 1 + } + "/hello@world" => { + path: "/hello@world", + value: 1 + } + "/hello-world.com" => { + path: "/hello-world.com", + value: 2 + } + "/hello%20world" => { + path: "/hello world", + value: 3 + } + "/hello world" => { + path: "/hello world", + value: 3 + } + "/%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF" => { + path: "/こんにちは", + value: 4 + } + "/こんにちは" => { + path: "/こんにちは", + value: 4 + } + "/50%25" => { + path: "/50%", + value: 5 + } + "/50%" => { + path: "/50%", + value: 5 + } + "/hello%20world%40example.com" => { + path: "/hello world@example.com", + value: 6 + } + "/hello world@example.com" => { + path: "/hello world@example.com", + value: 6 + } + "/path/to/resource%20with%20spaces" => { + path: "/path/to/resource with spaces", + value: 7 + } + "/path/to/resource with spaces" => { + path: "/path/to/resource with spaces", + value: 7 + } + "/encoded%2Fslash" => { + path: "/encoded/slash", + value: 8 + } + "/encoded/slash" => { + path: "/encoded/slash", + value: 8 + } + }); + + Ok(()) +} diff --git a/tests/poem.rs b/tests/poem.rs index 81f42998..9b612f7d 100644 --- a/tests/poem.rs +++ b/tests/poem.rs @@ -511,12 +511,11 @@ fn test_percent_decoded() -> Result<(), Box> { "id" => "abc" } } - // NOTE: Different behaviour: poem would decode to `你好` "/a/%E4%BD%A0%E5%A5%BD" => { path: "/a/{id}", value: 1, params: { - "id" => "%E4%BD%A0%E5%A5%BD" + "id" => "你好" } } }); diff --git a/tests/uncommon.rs b/tests/uncommon.rs index fe214f9d..aa3bcad0 100644 --- a/tests/uncommon.rs +++ b/tests/uncommon.rs @@ -47,21 +47,16 @@ fn uncommon() -> Result<(), Box> { // Unicode Control router.insert("/\u{0001}\u{0002}\u{0003}", 12)?; - // Punycode (müller.de) - router.insert("/xn--mller-kva.de", 13)?; - - // URL Encoded (😊) - router.insert("/%F0%9F%98%8A", 14)?; - - // Double URL Encoded (💀) - router.insert("/%25F0%259F%2592%2580", 15)?; - assert_router_matches!(router, { // Japanese (Konnichiwa) "/こんにちは" => { path: "/こんにちは", value: 0 } + "/%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF" => { + path: "/こんにちは", + value: 0 + } "/こんにちわ" => None // Russian (Privet) @@ -69,6 +64,10 @@ fn uncommon() -> Result<(), Box> { path: "/привет", value: 1 } + "/%D0%BF%D1%80%D0%B8%D0%B2%D0%B5%D1%82" => { + path: "/привет", + value: 1 + } "/привет!" => None // Chinese (Nǐ Hǎo) @@ -76,6 +75,10 @@ fn uncommon() -> Result<(), Box> { path: "/你好", value: 2 } + "/%E4%BD%A0%E5%A5%BD" => { + path: "/你好", + value: 2 + } "/你们好" => None // Arabic Numerals (full-width) @@ -83,6 +86,10 @@ fn uncommon() -> Result<(), Box> { path: "/123", value: 3 } + "/%EF%BC%91%EF%BC%92%EF%BC%93" => { + path: "/123", + value: 3 + } "/123" => None // Null Byte @@ -90,6 +97,10 @@ fn uncommon() -> Result<(), Box> { path: "/null\0byte", value: 4 } + "/null%00byte" => { + path: "/null\0byte", + value: 4 + } "/nullbyte" => None // Emoji @@ -97,6 +108,10 @@ fn uncommon() -> Result<(), Box> { path: "/⚽️🏀🏈", value: 5 } + "/%E2%9A%BD%EF%B8%8F%F0%9F%8F%80%F0%9F%8F%88" => { + path: "/⚽️🏀🏈", + value: 5 + } "/⚽️🏀" => None // Unicode @@ -104,6 +119,10 @@ fn uncommon() -> Result<(), Box> { path: "/♔♕♖♗♘♙", value: 6 } + "/%E2%99%94%E2%99%95%E2%99%96%E2%99%97%E2%99%98%E2%99%99" => { + path: "/♔♕♖♗♘♙", + value: 6 + } "/♔♕♖♗♘♟" => None // Unicode Normalization @@ -115,6 +134,14 @@ fn uncommon() -> Result<(), Box> { path: "/café", value: 8 } + "/cafe%CC%81" => { + path: "/cafe\u{0301}", + value: 7 + } + "/caf%C3%A9" => { + path: "/café", + value: 8 + } "/cafe" => None // Unicode Zero Width @@ -122,6 +149,10 @@ fn uncommon() -> Result<(), Box> { path: "/abc\u{200B}123", value: 9 } + "/abc%E2%80%8B123" => { + path: "/abc\u{200B}123", + value: 9 + } "/abc123" => None // Unicode Right to Left @@ -129,6 +160,10 @@ fn uncommon() -> Result<(), Box> { path: "/hello\u{202E}dlrow", value: 10 } + "/hello%E2%80%AEdlrow" => { + path: "/hello\u{202E}dlrow", + value: 10 + } "/helloworld" => None // Unicode Whitespace @@ -136,6 +171,10 @@ fn uncommon() -> Result<(), Box> { path: "/\u{2000}\u{2001}\u{2002}", value: 11 } + "/%E2%80%80%E2%80%81%E2%80%82" => { + path: "/\u{2000}\u{2001}\u{2002}", + value: 11 + } "/ " => None // Unicode Control @@ -143,29 +182,11 @@ fn uncommon() -> Result<(), Box> { path: "/\u{0001}\u{0002}\u{0003}", value: 12 } - "/123" => None - - // Punycode (müller.de) - "/xn--mller-kva.de" => { - path: "/xn--mller-kva.de", - value: 13 - } - "/muller.de" => None - - // URL Encoded (😊) - "/%F0%9F%98%8A" => { - path: "/%F0%9F%98%8A", - value: 14 - } - "/😊" => None - - // Double URL Encoded (💀) - "/%25F0%259F%2592%2580" => { - path: "/%25F0%259F%2592%2580", - value: 15 + "/%01%02%03" => { + path: "/\u{0001}\u{0002}\u{0003}", + value: 12 } - "/%F0%9F%92%80" => None - "/💀" => None + "/123" => None }); Ok(())