From df366f13a3d2c39e12a6f615b0d72673dd88673f Mon Sep 17 00:00:00 2001 From: Andy Gauge Date: Thu, 8 Jun 2017 12:45:19 -0700 Subject: [PATCH 1/4] Documentation and examples enhancements of percent encoding --- rust-url-todo | 14 ++++++ src/percent_encoding.rs | 109 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+) create mode 100644 rust-url-todo diff --git a/rust-url-todo b/rust-url-todo new file mode 100644 index 000000000..6aeefbf13 --- /dev/null +++ b/rust-url-todo @@ -0,0 +1,14 @@ +* standalone path parsing? +* Test setters + * Test trim C0/space + * Test remove tab & newline + + + +#[test] +fn test_path_segments() { + let mut url = Url::parse("http://example.net").unwrap(); + url.push_path_segment("foo").unwrap(); + url.extend_path_segments(&["bar", "b/az"]).unwrap(); + assert_eq!(url.as_str(), "http://example.net/foo"); +} diff --git a/src/percent_encoding.rs b/src/percent_encoding.rs index 854ad0e67..1986a88ca 100644 --- a/src/percent_encoding.rs +++ b/src/percent_encoding.rs @@ -6,6 +6,39 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +//! URLs use special chacters to indicate the parts of the request. For example, a forward slash +//! indicates a path. In order for that charcter to exist outside of a path separator, that +//! charcter would need to be encoded. +//! +//! Percent encoding replaces reserved charcters with the `%` escape charcter followed by hexidecimal +//! ASCII representaton. For non-ASCII charcters that are percent encoded, a UTF-8 byte sequence +//! becomes percent encoded. A simple example can be seen when the space literal is replaced with +//! `%20`. +//! +//! Percent encoding is further complicated by the fact that different parts of the URI have +//! different encoding requirements. In order to support the variety of encoding requirements, +//! `url::percent_encoding` includes encoding sets that are defined in [IETF RFC 3986][rfc] and +//! updated through the [Living Standard][living]. +//! +//! [`url::percent_encoding::EncodeSet`](trait.EncodeSet.html) Trait allows a sequence of bytes +//! to be converted to a percent encoded sequence of bytes stripped of particular reserved +//! characters. This trait is applied to the `*_ENCODE_SET` structs. If your application requires +//! custom set of Encoding, see [`define_encode_set!`](../macro.define_encode_set!.html) macro. +//! +//! # Examples +//! +//! ``` +//! extern crate url; +//! use url::percent_encoding::{utf8_percent_encode, QUERY_ENCODE_SET}; +//! +//! //prints "foo%20bar%3F" +//! # fn main() { +//! println!("{}", utf8_percent_encode("foo bar?", QUERY_ENCODE_SET).collect::()); +//! # } +//! ``` +//! [rfc]:https://tools.ietf.org/html/rfc3986 +//! [living]:https://url.spec.whatwg.org + use encoding; use std::ascii::AsciiExt; use std::borrow::Cow; @@ -77,6 +110,9 @@ macro_rules! define_encode_set { } /// This encode set is used for the path of cannot-be-a-base URLs. +/// +/// All ASCII charcters less than hexidecimal 20 and greater than 7E are encoded. This includes +/// special charcters such as line feed, carriage return, NULL, etc. #[derive(Copy, Clone, Debug)] #[allow(non_camel_case_types)] pub struct SIMPLE_ENCODE_SET; @@ -90,21 +126,39 @@ impl EncodeSet for SIMPLE_ENCODE_SET { define_encode_set! { /// This encode set is used in the URL parser for query strings. + /// + /// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html), + /// space, double quote ("), hash (#), and inequality qualifiers (<), (>) are encoded. pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'} } define_encode_set! { /// This encode set is used for path components. + /// + /// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html), + /// space, double quote ("), hash (#), inequality qualifiers (<), (>), backtick (`), + /// question mark (?), and curly brackets ({), (}) are encoded. pub DEFAULT_ENCODE_SET = [QUERY_ENCODE_SET] | {'`', '?', '{', '}'} } define_encode_set! { /// This encode set is used for on '/'-separated path segment + /// + /// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html), + /// space, double quote ("), hash (#), inequality qualifiers (<), (>), backtick (`), + /// question mark (?), and curly brackets ({), (}), percent sign (%), forward slash (/) are + /// encoded. pub PATH_SEGMENT_ENCODE_SET = [DEFAULT_ENCODE_SET] | {'%', '/'} } define_encode_set! { /// This encode set is used for username and password. + /// + /// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html), + /// space, double quote ("), hash (#), inequality qualifiers (<), (>), backtick (`), + /// question mark (?), and curly brackets ({), (}), forward slash (/), colon (:), semi-colon (;), + /// equality (=), at (@), backslash (\\), square brackets ([), (]), caret (\^), and pipe (|) are + /// encoded. pub USERINFO_ENCODE_SET = [DEFAULT_ENCODE_SET] | { '/', ':', ';', '=', '@', '[', '\\', ']', '^', '|' } @@ -113,6 +167,21 @@ define_encode_set! { /// Return the percent-encoding of the given bytes. /// /// This is unconditional, unlike `percent_encode()` which uses an encode set. +/// +/// # Examples +/// +/// ``` +/// extern crate url; +/// use url::percent_encoding::percent_encode_byte; +/// +/// //prints %66%6F%6F%20%62%61%72 +/// # fn main() { +/// let sample = b"foo bar"; +/// for character in sample { +/// print!("{}", percent_encode_byte(*character)); +/// } +/// # } +/// ``` pub fn percent_encode_byte(byte: u8) -> &'static str { let index = usize::from(byte) * 3; &"\ @@ -146,6 +215,18 @@ pub fn percent_encode_byte(byte: u8) -> &'static str { /// that also implements `Display` and `Into>`. /// The latter returns `Cow::Borrowed` when none of the bytes in `input` /// are in the given encode set. +/// +/// # Examples +/// +/// ``` +/// extern crate url; +/// use url::percent_encoding::{percent_encode, DEFAULT_ENCODE_SET}; +/// +/// //prints foo%20bar%3F +/// # fn main() { +/// println!("{}", percent_encode(b"foo bar?", DEFAULT_ENCODE_SET).collect::()); +/// # } +/// ``` #[inline] pub fn percent_encode(input: &[u8], encode_set: E) -> PercentEncode { PercentEncode { @@ -157,6 +238,18 @@ pub fn percent_encode(input: &[u8], encode_set: E) -> PercentEncod /// Percent-encode the UTF-8 encoding of the given string. /// /// See `percent_encode()` for how to use the return value. +/// +/// # Examples +/// +/// ``` +/// extern crate url; +/// use url::percent_encoding::{utf8_percent_encode, QUERY_ENCODE_SET}; +/// +/// //prints "foo%20bar%3F" +/// # fn main() { +/// println!("{}", utf8_percent_encode("foo bar?", QUERY_ENCODE_SET).collect::()); +/// # } +/// ``` #[inline] pub fn utf8_percent_encode(input: &str, encode_set: E) -> PercentEncode { percent_encode(input.as_bytes(), encode_set) @@ -241,6 +334,22 @@ impl<'a, E: EncodeSet> From> for Cow<'a, str> { /// that also implements `Into>` /// (which returns `Cow::Borrowed` when `input` contains no percent-encoded sequence) /// and has `decode_utf8()` and `decode_utf8_lossy()` methods. +/// +/// # Examples +/// +/// ``` +/// extern crate url; +/// use url::percent_encoding::percent_decode; +/// +/// //prints "foo bar?" +/// # fn run() -> Result<(), std::str::Utf8Error> { +/// println!("{}", percent_decode(b"foo%20bar%3F").decode_utf8()?); +/// # Ok( () ) +/// # } +/// # fn main() { +/// # run().unwrap(); +/// # } +/// ``` #[inline] pub fn percent_decode(input: &[u8]) -> PercentDecode { PercentDecode { From 512b98a6fc037a13c8cb449ae2d56003517cfa4b Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 13 Jun 2017 17:32:37 +0200 Subject: [PATCH 2/4] URI -> URL, remove irrelevant reference to RFC 3986 --- src/percent_encoding.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/percent_encoding.rs b/src/percent_encoding.rs index 1986a88ca..7ac38dda9 100644 --- a/src/percent_encoding.rs +++ b/src/percent_encoding.rs @@ -15,10 +15,10 @@ //! becomes percent encoded. A simple example can be seen when the space literal is replaced with //! `%20`. //! -//! Percent encoding is further complicated by the fact that different parts of the URI have +//! Percent encoding is further complicated by the fact that different parts of an URL have //! different encoding requirements. In order to support the variety of encoding requirements, -//! `url::percent_encoding` includes encoding sets that are defined in [IETF RFC 3986][rfc] and -//! updated through the [Living Standard][living]. +//! `url::percent_encoding` includes different *encode sets*. +//! See [URL Standard](https://url.spec.whatwg.org/#percent-encoded-bytes) for details. //! //! [`url::percent_encoding::EncodeSet`](trait.EncodeSet.html) Trait allows a sequence of bytes //! to be converted to a percent encoded sequence of bytes stripped of particular reserved @@ -36,8 +36,6 @@ //! println!("{}", utf8_percent_encode("foo bar?", QUERY_ENCODE_SET).collect::()); //! # } //! ``` -//! [rfc]:https://tools.ietf.org/html/rfc3986 -//! [living]:https://url.spec.whatwg.org use encoding; use std::ascii::AsciiExt; From d02b1c3b45882f16c0ab768680431e314ceb15ab Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 13 Jun 2017 17:48:50 +0200 Subject: [PATCH 3/4] Docs: pretend encode set unit structs are constants --- src/percent_encoding.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/percent_encoding.rs b/src/percent_encoding.rs index 7ac38dda9..e9337b369 100644 --- a/src/percent_encoding.rs +++ b/src/percent_encoding.rs @@ -20,10 +20,9 @@ //! `url::percent_encoding` includes different *encode sets*. //! See [URL Standard](https://url.spec.whatwg.org/#percent-encoded-bytes) for details. //! -//! [`url::percent_encoding::EncodeSet`](trait.EncodeSet.html) Trait allows a sequence of bytes -//! to be converted to a percent encoded sequence of bytes stripped of particular reserved -//! characters. This trait is applied to the `*_ENCODE_SET` structs. If your application requires -//! custom set of Encoding, see [`define_encode_set!`](../macro.define_encode_set!.html) macro. +//! This module provides some `*_ENCODE_SET` constants. +//! If a different set is required, it can be created with +//! the [`define_encode_set!`](../macro.define_encode_set!.html) macro. //! //! # Examples //! From ea2f97f6ed3dc549559c38f020aa597bbecabf74 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 13 Jun 2017 17:49:28 +0200 Subject: [PATCH 4/4] Use assert_eq instead of println in doctests. Fix said doctests. --- src/percent_encoding.rs | 42 ++++++++--------------------------------- 1 file changed, 8 insertions(+), 34 deletions(-) diff --git a/src/percent_encoding.rs b/src/percent_encoding.rs index e9337b369..6767dd664 100644 --- a/src/percent_encoding.rs +++ b/src/percent_encoding.rs @@ -27,13 +27,9 @@ //! # Examples //! //! ``` -//! extern crate url; -//! use url::percent_encoding::{utf8_percent_encode, QUERY_ENCODE_SET}; +//! use url::percent_encoding::{utf8_percent_encode, DEFAULT_ENCODE_SET}; //! -//! //prints "foo%20bar%3F" -//! # fn main() { -//! println!("{}", utf8_percent_encode("foo bar?", QUERY_ENCODE_SET).collect::()); -//! # } +//! assert_eq!(utf8_percent_encode("foo bar?", DEFAULT_ENCODE_SET).to_string(), "foo%20bar%3F"); //! ``` use encoding; @@ -168,16 +164,10 @@ define_encode_set! { /// # Examples /// /// ``` -/// extern crate url; /// use url::percent_encoding::percent_encode_byte; /// -/// //prints %66%6F%6F%20%62%61%72 -/// # fn main() { -/// let sample = b"foo bar"; -/// for character in sample { -/// print!("{}", percent_encode_byte(*character)); -/// } -/// # } +/// assert_eq!("foo bar".bytes().map(percent_encode_byte).collect::(), +/// "%66%6F%6F%20%62%61%72"); /// ``` pub fn percent_encode_byte(byte: u8) -> &'static str { let index = usize::from(byte) * 3; @@ -216,13 +206,9 @@ pub fn percent_encode_byte(byte: u8) -> &'static str { /// # Examples /// /// ``` -/// extern crate url; /// use url::percent_encoding::{percent_encode, DEFAULT_ENCODE_SET}; /// -/// //prints foo%20bar%3F -/// # fn main() { -/// println!("{}", percent_encode(b"foo bar?", DEFAULT_ENCODE_SET).collect::()); -/// # } +/// assert_eq!(percent_encode(b"foo bar?", DEFAULT_ENCODE_SET).to_string(), "foo%20bar%3F"); /// ``` #[inline] pub fn percent_encode(input: &[u8], encode_set: E) -> PercentEncode { @@ -239,13 +225,9 @@ pub fn percent_encode(input: &[u8], encode_set: E) -> PercentEncod /// # Examples /// /// ``` -/// extern crate url; -/// use url::percent_encoding::{utf8_percent_encode, QUERY_ENCODE_SET}; +/// use url::percent_encoding::{utf8_percent_encode, DEFAULT_ENCODE_SET}; /// -/// //prints "foo%20bar%3F" -/// # fn main() { -/// println!("{}", utf8_percent_encode("foo bar?", QUERY_ENCODE_SET).collect::()); -/// # } +/// assert_eq!(utf8_percent_encode("foo bar?", DEFAULT_ENCODE_SET).to_string(), "foo%20bar%3F"); /// ``` #[inline] pub fn utf8_percent_encode(input: &str, encode_set: E) -> PercentEncode { @@ -335,17 +317,9 @@ impl<'a, E: EncodeSet> From> for Cow<'a, str> { /// # Examples /// /// ``` -/// extern crate url; /// use url::percent_encoding::percent_decode; /// -/// //prints "foo bar?" -/// # fn run() -> Result<(), std::str::Utf8Error> { -/// println!("{}", percent_decode(b"foo%20bar%3F").decode_utf8()?); -/// # Ok( () ) -/// # } -/// # fn main() { -/// # run().unwrap(); -/// # } +/// assert_eq!(percent_decode(b"foo%20bar%3F").decode_utf8().unwrap(), "foo bar?"); /// ``` #[inline] pub fn percent_decode(input: &[u8]) -> PercentDecode {