diff --git a/Cargo.toml b/Cargo.toml index 4e2015fb3..260302169 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ travis-ci = { repository = "servo/rust-url" } appveyor = { repository = "servo/rust-url" } [workspace] -members = [".", "idna", "url_serde"] +members = [".", "idna", "percent_encoding", "url_serde"] [[test]] name = "unit" @@ -44,5 +44,6 @@ encoding = {version = "0.2", optional = true} heapsize = {version = ">=0.1.1, <0.5", optional = true} idna = { version = "0.1.0", path = "./idna" } matches = "0.1" +percent_encoding = { version = "1.0.0", path = "./percent_encoding" } rustc-serialize = {version = "0.3", optional = true} serde = {version = ">=0.6.1, <0.9", optional = true} diff --git a/percent_encoding/Cargo.toml b/percent_encoding/Cargo.toml new file mode 100644 index 000000000..e2e02baaa --- /dev/null +++ b/percent_encoding/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "percent_encoding" +version = "1.0.0" +authors = ["The rust-url developers"] +description = "Percent encoding and decoding" +repository = "https://github.com/servo/rust-url/" +license = "MIT/Apache-2.0" + +[lib] +doctest = false +test = false + +[dev-dependencies] +rustc-test = "0.1" +rustc-serialize = "0.3" + +[dependencies] diff --git a/src/percent_encoding.rs b/percent_encoding/src/lib.rs similarity index 94% rename from src/percent_encoding.rs rename to percent_encoding/src/lib.rs index 6767dd664..16d37ada6 100644 --- a/src/percent_encoding.rs +++ b/percent_encoding/src/lib.rs @@ -32,7 +32,6 @@ //! assert_eq!(utf8_percent_encode("foo bar?", DEFAULT_ENCODE_SET).to_string(), "foo%20bar%3F"); //! ``` -use encoding; use std::ascii::AsciiExt; use std::borrow::Cow; use std::fmt; @@ -70,8 +69,8 @@ pub trait EncodeSet: Clone { /// ======= /// /// ```rust -/// #[macro_use] extern crate url; -/// use url::percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET}; +/// #[macro_use] extern crate percent_encoding; +/// use percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET}; /// define_encode_set! { /// /// This encode set is used in the URL parser for query strings. /// pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'} @@ -88,7 +87,7 @@ macro_rules! define_encode_set { #[allow(non_camel_case_types)] pub struct $name; - impl $crate::percent_encoding::EncodeSet for $name { + impl $crate::EncodeSet for $name { #[inline] fn contains(&self, byte: u8) -> bool { match byte as char { @@ -419,6 +418,25 @@ impl<'a> PercentDecode<'a> { /// Invalid UTF-8 percent-encoded byte sequences will be replaced � U+FFFD, /// the replacement character. pub fn decode_utf8_lossy(self) -> Cow<'a, str> { - encoding::decode_utf8_lossy(self.clone().into()) + decode_utf8_lossy(self.clone().into()) } } + +fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow { + match input { + Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes), + Cow::Owned(bytes) => { + let raw_utf8: *const [u8]; + match String::from_utf8_lossy(&bytes) { + Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(), + Cow::Owned(s) => return s.into(), + } + // from_utf8_lossy returned a borrow of `bytes` unchanged. + debug_assert!(raw_utf8 == &*bytes as *const [u8]); + // Reuse the existing `Vec` allocation. + unsafe { String::from_utf8_unchecked(bytes) }.into() + } + } +} + + diff --git a/src/lib.rs b/src/lib.rs index b1e014de8..0a5170383 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -112,6 +112,7 @@ assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css"); #[cfg(feature="heapsize")] #[macro_use] extern crate heapsize; pub extern crate idna; +pub extern crate percent_encoding; use encoding::EncodingOverride; #[cfg(feature = "heapsize")] use heapsize::HeapSizeOf; @@ -145,7 +146,6 @@ mod parser; mod slicing; pub mod form_urlencoded; -pub mod percent_encoding; pub mod quirks; /// A parsed URL record. @@ -2103,3 +2103,48 @@ impl<'a> Drop for UrlQuery<'a> { self.url.restore_already_parsed_fragment(self.fragment.take()) } } + + +/// Define a new struct +/// that implements the [`EncodeSet`](percent_encoding/trait.EncodeSet.html) trait, +/// for use in [`percent_decode()`](percent_encoding/fn.percent_encode.html) +/// and related functions. +/// +/// Parameters are characters to include in the set in addition to those of the base set. +/// See [encode sets specification](http://url.spec.whatwg.org/#simple-encode-set). +/// +/// Example +/// ======= +/// +/// ```rust +/// #[macro_use] extern crate url; +/// use url::percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET}; +/// define_encode_set! { +/// /// This encode set is used in the URL parser for query strings. +/// pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'} +/// } +/// # fn main() { +/// assert_eq!(utf8_percent_encode("foo bar", QUERY_ENCODE_SET).collect::(), "foo%20bar"); +/// # } +/// ``` +#[macro_export] +macro_rules! define_encode_set { + ($(#[$attr: meta])* pub $name: ident = [$base_set: expr] | {$($ch: pat),*}) => { + $(#[$attr])* + #[derive(Copy, Clone)] + #[allow(non_camel_case_types)] + pub struct $name; + + impl $crate::percent_encoding::EncodeSet for $name { + #[inline] + fn contains(&self, byte: u8) -> bool { + match byte as char { + $( + $ch => true, + )* + _ => $base_set.contains(byte) + } + } + } + } +}