From 4c3448f356dcb04150db20dcef92da688d49a4d5 Mon Sep 17 00:00:00 2001 From: Zack Weinberg Date: Wed, 8 Feb 2017 17:43:48 -0500 Subject: [PATCH 1/3] Add equivalents of C's functions to AsciiExt. * `is_ascii_alphabetic` * `is_ascii_uppercase` * `is_ascii_lowercase` * `is_ascii_alphanumeric` * `is_ascii_digit` * `is_ascii_hexdigit` * `is_ascii_punctuation` * `is_ascii_graphic` * `is_ascii_whitespace` * `is_ascii_control` This addresses issue #39658. --- src/libstd/ascii.rs | 838 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 838 insertions(+) diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs index 35c388ba076ce..cf965291fa2a6 100644 --- a/src/libstd/ascii.rs +++ b/src/libstd/ascii.rs @@ -184,6 +184,348 @@ pub trait AsciiExt { /// [`to_ascii_lowercase`]: #tymethod.to_ascii_lowercase #[stable(feature = "ascii", since = "1.9.0")] fn make_ascii_lowercase(&mut self); + + /// Checks if the value is an ASCII alphabetic character: + /// U+0041 'A' ... U+005A 'Z' or U+0061 'a' ... U+007A 'z'. + /// For strings, true if all characters in the string are + /// ASCII alphabetic. + /// + /// # Examples + /// + /// ``` + /// use std::ascii::AsciiExt; + /// let A = 'A'; + /// let G = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc = '\u{001b}'; + /// + /// assert!(A.is_ascii_alphabetic()); + /// assert!(G.is_ascii_alphabetic()); + /// assert!(a.is_ascii_alphabetic()); + /// assert!(g.is_ascii_alphabetic()); + /// assert!(!zero.is_ascii_alphabetic()); + /// assert!(!percent.is_ascii_alphabetic()); + /// assert!(!space.is_ascii_alphabetic()); + /// assert!(!lf.is_ascii_alphabetic()); + /// assert!(!esc.is_ascii_alphabetic()); + /// ``` + #[unstable(feature = "ascii_ctype", issue = "39658")] + fn is_ascii_alphabetic(&self) -> bool { unimplemented!(); } + + /// Checks if the value is an ASCII uppercase character: + /// U+0041 'A' ... U+005A 'Z'. + /// For strings, true if all characters in the string are + /// ASCII uppercase. + /// + /// # Examples + /// + /// ``` + /// use std::ascii::AsciiExt; + /// let A = 'A'; + /// let G = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc = '\u{001b}'; + /// + /// assert!(A.is_ascii_uppercase()); + /// assert!(G.is_ascii_uppercase()); + /// assert!(!a.is_ascii_uppercase()); + /// assert!(!g.is_ascii_uppercase()); + /// assert!(!zero.is_ascii_uppercase()); + /// assert!(!percent.is_ascii_uppercase()); + /// assert!(!space.is_ascii_uppercase()); + /// assert!(!lf.is_ascii_uppercase()); + /// assert!(!esc.is_ascii_uppercase()); + /// ``` + #[unstable(feature = "ascii_ctype", issue = "39658")] + fn is_ascii_uppercase(&self) -> bool { unimplemented!(); } + + /// Checks if the value is an ASCII lowercase character: + /// U+0061 'a' ... U+007A 'z'. + /// For strings, true if all characters in the string are + /// ASCII lowercase. + /// + /// # Examples + /// + /// ``` + /// use std::ascii::AsciiExt; + /// let A = 'A'; + /// let G = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc = '\u{001b}'; + /// + /// assert!(!A.is_ascii_lowercase()); + /// assert!(!G.is_ascii_lowercase()); + /// assert!(a.is_ascii_lowercase()); + /// assert!(g.is_ascii_lowercase()); + /// assert!(!zero.is_ascii_lowercase()); + /// assert!(!percent.is_ascii_lowercase()); + /// assert!(!space.is_ascii_lowercase()); + /// assert!(!lf.is_ascii_lowercase()); + /// assert!(!esc.is_ascii_lowercase()); + /// ``` + #[unstable(feature = "ascii_ctype", issue = "39658")] + fn is_ascii_lowercase(&self) -> bool { unimplemented!(); } + + /// Checks if the value is an ASCII alphanumeric character: + /// U+0041 'A' ... U+005A 'Z', U+0061 'a' ... U+007A 'z', or + /// U+0030 '0' ... U+0039 '9'. + /// For strings, true if all characters in the string are + /// ASCII alphanumeric. + /// + /// # Examples + /// + /// ``` + /// use std::ascii::AsciiExt; + /// let A = 'A'; + /// let G = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc = '\u{001b}'; + /// + /// assert!(A.is_ascii_alphanumeric()); + /// assert!(G.is_ascii_alphanumeric()); + /// assert!(a.is_ascii_alphanumeric()); + /// assert!(g.is_ascii_alphanumeric()); + /// assert!(zero.is_ascii_alphanumeric()); + /// assert!(!percent.is_ascii_alphanumeric()); + /// assert!(!space.is_ascii_alphanumeric()); + /// assert!(!lf.is_ascii_alphanumeric()); + /// assert!(!esc.is_ascii_alphanumeric()); + /// ``` + #[unstable(feature = "ascii_ctype", issue = "39658")] + fn is_ascii_alphanumeric(&self) -> bool { unimplemented!(); } + + /// Checks if the value is an ASCII decimal digit: + /// U+0030 '0' ... U+0039 '9'. + /// For strings, true if all characters in the string are + /// ASCII digits. + /// + /// # Examples + /// + /// ``` + /// use std::ascii::AsciiExt; + /// let A = 'A'; + /// let G = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc = '\u{001b}'; + /// + /// assert!(!A.is_ascii_digit()); + /// assert!(!G.is_ascii_digit()); + /// assert!(!a.is_ascii_digit()); + /// assert!(!g.is_ascii_digit()); + /// assert!(zero.is_ascii_digit()); + /// assert!(!percent.is_ascii_digit()); + /// assert!(!space.is_ascii_digit()); + /// assert!(!lf.is_ascii_digit()); + /// assert!(!esc.is_ascii_digit()); + /// ``` + #[unstable(feature = "ascii_ctype", issue = "39658")] + fn is_ascii_digit(&self) -> bool { unimplemented!(); } + + /// Checks if the value is an ASCII hexadecimal digit: + /// U+0030 '0' ... U+0039 '9', U+0041 'A' ... U+0046 'F', or + /// U+0061 'a' ... U+0066 'f'. + /// For strings, true if all characters in the string are + /// ASCII hex digits. + /// + /// # Examples + /// + /// ``` + /// use std::ascii::AsciiExt; + /// let A = 'A'; + /// let G = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc = '\u{001b}'; + /// + /// assert!(A.is_ascii_hexdigit()); + /// assert!(!G.is_ascii_hexdigit()); + /// assert!(a.is_ascii_hexdigit()); + /// assert!(!g.is_ascii_hexdigit()); + /// assert!(zero.is_ascii_hexdigit()); + /// assert!(!percent.is_ascii_hexdigit()); + /// assert!(!space.is_ascii_hexdigit()); + /// assert!(!lf.is_ascii_hexdigit()); + /// assert!(!esc.is_ascii_hexdigit()); + /// ``` + #[unstable(feature = "ascii_ctype", issue = "39658")] + fn is_ascii_hexdigit(&self) -> bool { unimplemented!(); } + + /// Checks if the value is an ASCII punctuation character: + /// U+0021 ... U+002F `! " # $ % & ' ( ) * + , - . /` + /// U+003A ... U+0040 `: ; < = > ? @` + /// U+005B ... U+0060 `[ \\ ] ^ _ \`` + /// U+007B ... U+007E `{ | } ~` + /// For strings, true if all characters in the string are + /// ASCII punctuation. + /// + /// # Examples + /// + /// ``` + /// use std::ascii::AsciiExt; + /// let A = 'A'; + /// let G = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc = '\u{001b}'; + /// + /// assert!(!A.is_ascii_punctuation()); + /// assert!(!G.is_ascii_punctuation()); + /// assert!(!a.is_ascii_punctuation()); + /// assert!(!g.is_ascii_punctuation()); + /// assert!(!zero.is_ascii_punctuation()); + /// assert!(percent.is_ascii_punctuation()); + /// assert!(!space.is_ascii_punctuation()); + /// assert!(!lf.is_ascii_punctuation()); + /// assert!(!esc.is_ascii_punctuation()); + /// ``` + #[unstable(feature = "ascii_ctype", issue = "39658")] + fn is_ascii_punctuation(&self) -> bool { unimplemented!(); } + + /// Checks if the value is an ASCII graphic character: + /// U+0021 '@' ... U+007E '~'. + /// For strings, true if all characters in the string are + /// ASCII punctuation. + /// + /// # Examples + /// + /// ``` + /// use std::ascii::AsciiExt; + /// let A = 'A'; + /// let G = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc = '\u{001b}'; + /// + /// assert!(A.is_ascii_graphic()); + /// assert!(G.is_ascii_graphic()); + /// assert!(a.is_ascii_graphic()); + /// assert!(g.is_ascii_graphic()); + /// assert!(zero.is_ascii_graphic()); + /// assert!(percent.is_ascii_graphic()); + /// assert!(!space.is_ascii_graphic()); + /// assert!(!lf.is_ascii_graphic()); + /// assert!(!esc.is_ascii_graphic()); + /// ``` + #[unstable(feature = "ascii_ctype", issue = "39658")] + fn is_ascii_graphic(&self) -> bool { unimplemented!(); } + + /// Checks if the value is an ASCII whitespace character: + /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED, + /// U+000C FORM FEED, or U+000D CARRIAGE RETURN. + /// For strings, true if all characters in the string are + /// ASCII whitespace. + /// + /// Rust uses the WhatWG Infra Standard's [definition of ASCII + /// whitespace][infra-aw]. There are several other definitions in + /// wide use. For instance, [the POSIX locale][posix-ctype] + /// includes U+000B VERTICAL TAB as well as all the above + /// characters, but—from the very same specification—[the default + /// rule for "field splitting" in the Bourne shell][field-splitting] + /// considers *only* SPACE, HORIZONTAL TAB, and LINE FEED as whitespace. + /// + /// If you are writing a program that will process an existing + /// file format, check what that format's definition of whitespace is + /// before using this function. + /// + /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace + /// [posix-ctype]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01 + /// [field-splitting]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05 + /// + /// # Examples + /// + /// ``` + /// use std::ascii::AsciiExt; + /// let A = 'A'; + /// let G = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc = '\u{001b}'; + /// + /// assert!(!A.is_ascii_whitespace()); + /// assert!(!G.is_ascii_whitespace()); + /// assert!(!a.is_ascii_whitespace()); + /// assert!(!g.is_ascii_whitespace()); + /// assert!(!zero.is_ascii_whitespace()); + /// assert!(!percent.is_ascii_whitespace()); + /// assert!(space.is_ascii_whitespace()); + /// assert!(lf.is_ascii_whitespace()); + /// assert!(!esc.is_ascii_whitespace()); + /// ``` + #[unstable(feature = "ascii_ctype", issue = "39658")] + fn is_ascii_whitespace(&self) -> bool { unimplemented!(); } + + /// Checks if the value is an ASCII control character: + /// U+0000 NUL ... U+001F UNIT SEPARATOR, or U+007F DELETE. + /// Note that most ASCII whitespace characters are control + /// characters, but SPACE is not. + /// + /// # Examples + /// + /// ``` + /// use std::ascii::AsciiExt; + /// let A = 'A'; + /// let G = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc = '\u{001b}'; + /// + /// assert!(!A.is_ascii_control()); + /// assert!(!G.is_ascii_control()); + /// assert!(!a.is_ascii_control()); + /// assert!(!g.is_ascii_control()); + /// assert!(!zero.is_ascii_control()); + /// assert!(!percent.is_ascii_control()); + /// assert!(!space.is_ascii_control()); + /// assert!(lf.is_ascii_control()); + /// assert!(esc.is_ascii_control()); + /// ``` + #[unstable(feature = "ascii_ctype", issue = "39658")] + fn is_ascii_control(&self) -> bool { unimplemented!(); } } #[stable(feature = "rust1", since = "1.0.0")] @@ -225,6 +567,56 @@ impl AsciiExt for str { let me: &mut [u8] = unsafe { mem::transmute(self) }; me.make_ascii_lowercase() } + + #[inline] + fn is_ascii_alphabetic(&self) -> bool { + self.bytes().all(|b| b.is_ascii_alphabetic()) + } + + #[inline] + fn is_ascii_uppercase(&self) -> bool { + self.bytes().all(|b| b.is_ascii_uppercase()) + } + + #[inline] + fn is_ascii_lowercase(&self) -> bool { + self.bytes().all(|b| b.is_ascii_lowercase()) + } + + #[inline] + fn is_ascii_alphanumeric(&self) -> bool { + self.bytes().all(|b| b.is_ascii_alphanumeric()) + } + + #[inline] + fn is_ascii_digit(&self) -> bool { + self.bytes().all(|b| b.is_ascii_digit()) + } + + #[inline] + fn is_ascii_hexdigit(&self) -> bool { + self.bytes().all(|b| b.is_ascii_hexdigit()) + } + + #[inline] + fn is_ascii_punctuation(&self) -> bool { + self.bytes().all(|b| b.is_ascii_punctuation()) + } + + #[inline] + fn is_ascii_graphic(&self) -> bool { + self.bytes().all(|b| b.is_ascii_graphic()) + } + + #[inline] + fn is_ascii_whitespace(&self) -> bool { + self.bytes().all(|b| b.is_ascii_whitespace()) + } + + #[inline] + fn is_ascii_control(&self) -> bool { + self.bytes().all(|b| b.is_ascii_control()) + } } #[stable(feature = "rust1", since = "1.0.0")] @@ -268,6 +660,56 @@ impl AsciiExt for [u8] { byte.make_ascii_lowercase(); } } + + #[inline] + fn is_ascii_alphabetic(&self) -> bool { + self.iter().all(|b| b.is_ascii_alphabetic()) + } + + #[inline] + fn is_ascii_uppercase(&self) -> bool { + self.iter().all(|b| b.is_ascii_uppercase()) + } + + #[inline] + fn is_ascii_lowercase(&self) -> bool { + self.iter().all(|b| b.is_ascii_lowercase()) + } + + #[inline] + fn is_ascii_alphanumeric(&self) -> bool { + self.iter().all(|b| b.is_ascii_alphanumeric()) + } + + #[inline] + fn is_ascii_digit(&self) -> bool { + self.iter().all(|b| b.is_ascii_digit()) + } + + #[inline] + fn is_ascii_hexdigit(&self) -> bool { + self.iter().all(|b| b.is_ascii_hexdigit()) + } + + #[inline] + fn is_ascii_punctuation(&self) -> bool { + self.iter().all(|b| b.is_ascii_punctuation()) + } + + #[inline] + fn is_ascii_graphic(&self) -> bool { + self.iter().all(|b| b.is_ascii_graphic()) + } + + #[inline] + fn is_ascii_whitespace(&self) -> bool { + self.iter().all(|b| b.is_ascii_whitespace()) + } + + #[inline] + fn is_ascii_control(&self) -> bool { + self.iter().all(|b| b.is_ascii_control()) + } } #[stable(feature = "rust1", since = "1.0.0")] @@ -287,6 +729,96 @@ impl AsciiExt for u8 { fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); } #[inline] fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); } + + #[inline] + fn is_ascii_alphabetic(&self) -> bool { + if *self >= 0x80 { return false; } + match ASCII_CHARACTER_CLASS[*self as usize] { + L|Lx|U|Ux => true, + _ => false + } + } + + #[inline] + fn is_ascii_uppercase(&self) -> bool { + if *self >= 0x80 { return false } + match ASCII_CHARACTER_CLASS[*self as usize] { + U|Ux => true, + _ => false + } + } + + #[inline] + fn is_ascii_lowercase(&self) -> bool { + if *self >= 0x80 { return false } + match ASCII_CHARACTER_CLASS[*self as usize] { + L|Lx => true, + _ => false + } + } + + #[inline] + fn is_ascii_alphanumeric(&self) -> bool { + if *self >= 0x80 { return false } + match ASCII_CHARACTER_CLASS[*self as usize] { + D|L|Lx|U|Ux => true, + _ => false + } + } + + #[inline] + fn is_ascii_digit(&self) -> bool { + if *self >= 0x80 { return false } + match ASCII_CHARACTER_CLASS[*self as usize] { + D => true, + _ => false + } + } + + #[inline] + fn is_ascii_hexdigit(&self) -> bool { + if *self >= 0x80 { return false } + match ASCII_CHARACTER_CLASS[*self as usize] { + D|Lx|Ux => true, + _ => false + } + } + + #[inline] + fn is_ascii_punctuation(&self) -> bool { + if *self >= 0x80 { return false } + match ASCII_CHARACTER_CLASS[*self as usize] { + P => true, + _ => false + } + } + + #[inline] + fn is_ascii_graphic(&self) -> bool { + if *self >= 0x80 { return false; } + match ASCII_CHARACTER_CLASS[*self as usize] { + Ux|U|Lx|L|D|P => true, + _ => false + } + } + + #[inline] + fn is_ascii_whitespace(&self) -> bool { + if *self >= 0x80 { return false; } + match ASCII_CHARACTER_CLASS[*self as usize] { + Cw|W => true, + _ => false + } + } + + #[inline] + fn is_ascii_control(&self) -> bool { + if *self >= 0x80 { return false; } + match ASCII_CHARACTER_CLASS[*self as usize] { + C|Cw => true, + _ => false + } + } } #[stable(feature = "rust1", since = "1.0.0")] @@ -324,6 +856,56 @@ impl AsciiExt for char { fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); } #[inline] fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); } + + #[inline] + fn is_ascii_alphabetic(&self) -> bool { + (*self as u32 <= 0x7f) && (*self as u8).is_ascii_alphabetic() + } + + #[inline] + fn is_ascii_uppercase(&self) -> bool { + (*self as u32 <= 0x7f) && (*self as u8).is_ascii_uppercase() + } + + #[inline] + fn is_ascii_lowercase(&self) -> bool { + (*self as u32 <= 0x7f) && (*self as u8).is_ascii_lowercase() + } + + #[inline] + fn is_ascii_alphanumeric(&self) -> bool { + (*self as u32 <= 0x7f) && (*self as u8).is_ascii_alphanumeric() + } + + #[inline] + fn is_ascii_digit(&self) -> bool { + (*self as u32 <= 0x7f) && (*self as u8).is_ascii_digit() + } + + #[inline] + fn is_ascii_hexdigit(&self) -> bool { + (*self as u32 <= 0x7f) && (*self as u8).is_ascii_hexdigit() + } + + #[inline] + fn is_ascii_punctuation(&self) -> bool { + (*self as u32 <= 0x7f) && (*self as u8).is_ascii_punctuation() + } + + #[inline] + fn is_ascii_graphic(&self) -> bool { + (*self as u32 <= 0x7f) && (*self as u8).is_ascii_graphic() + } + + #[inline] + fn is_ascii_whitespace(&self) -> bool { + (*self as u32 <= 0x7f) && (*self as u8).is_ascii_whitespace() + } + + #[inline] + fn is_ascii_control(&self) -> bool { + (*self as u32 <= 0x7f) && (*self as u8).is_ascii_control() + } } /// An iterator over the escaped version of a byte, constructed via @@ -485,6 +1067,30 @@ static ASCII_UPPERCASE_MAP: [u8; 256] = [ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, ]; +enum AsciiCharacterClass { + C, // control + Cw, // control whitespace + W, // whitespace + D, // digit + L, // lowercase + Lx, // lowercase hex digit + U, // uppercase + Ux, // uppercase hex digit + P, // punctuation +} +use self::AsciiCharacterClass::*; + +static ASCII_CHARACTER_CLASS: [AsciiCharacterClass; 128] = [ +// _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _a _b _c _d _e _f + C, C, C, C, C, C, C, C, C, Cw,Cw,C, Cw,Cw,C, C, // 0_ + C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // 1_ + W, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, // 2_ + D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, P, // 3_ + P, Ux,Ux,Ux,Ux,Ux,Ux,U, U, U, U, U, U, U, U, U, // 4_ + U, U, U, U, U, U, U, U, U, U, U, P, P, P, P, P, // 5_ + P, Lx,Lx,Lx,Lx,Lx,Lx,L, L, L, L, L, L, L, L, L, // 6_ + L, L, L, L, L, L, L, L, L, L, L, P, P, P, P, C, // 7_ +]; #[cfg(test)] mod tests { @@ -606,4 +1212,236 @@ mod tests { let x = "a".to_string(); x.eq_ignore_ascii_case("A"); } + + // Shorthands used by the is_ascii_* tests. + macro_rules! assert_all { + ($what:ident, $($str:tt),+) => {{ + $( + for b in $str.chars() { + if !b.$what() { + panic!("expected {}({}) but it isn't", + stringify!($what), b); + } + } + for b in $str.as_bytes().iter() { + if !b.$what() { + panic!("expected {}(0x{:02x})) but it isn't", + stringify!($what), b); + } + } + assert!($str.$what()); + assert!($str.as_bytes().$what()); + )+ + }}; + ($what:ident, $($str:tt),+,) => (assert_all!($what,$($str),+)) + } + macro_rules! assert_none { + ($what:ident, $($str:tt),+) => {{ + $( + for b in $str.chars() { + if b.$what() { + panic!("expected not-{}({}) but it is", + stringify!($what), b); + } + } + for b in $str.as_bytes().iter() { + if b.$what() { + panic!("expected not-{}(0x{:02x})) but it is", + stringify!($what), b); + } + } + )* + }}; + ($what:ident, $($str:tt),+,) => (assert_none!($what,$($str),+)) + } + + #[test] + fn test_is_ascii_alphabetic() { + assert_all!(is_ascii_alphabetic, + "", + "abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + ); + assert_none!(is_ascii_alphabetic, + "0123456789", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_uppercase() { + assert_all!(is_ascii_uppercase, + "", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + ); + assert_none!(is_ascii_uppercase, + "abcdefghijklmnopqrstuvwxyz", + "0123456789", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_lowercase() { + assert_all!(is_ascii_lowercase, + "abcdefghijklmnopqrstuvwxyz", + ); + assert_none!(is_ascii_lowercase, + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + "0123456789", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_alphanumeric() { + assert_all!(is_ascii_alphanumeric, + "", + "abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + "0123456789", + ); + assert_none!(is_ascii_alphanumeric, + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_digit() { + assert_all!(is_ascii_digit, + "", + "0123456789", + ); + assert_none!(is_ascii_digit, + "abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_hexdigit() { + assert_all!(is_ascii_hexdigit, + "", + "0123456789", + "abcdefABCDEF", + ); + assert_none!(is_ascii_hexdigit, + "ghijklmnopqrstuvwxyz", + "GHIJKLMNOQPRSTUVWXYZ", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_punctuation() { + assert_all!(is_ascii_punctuation, + "", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + ); + assert_none!(is_ascii_punctuation, + "abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + "0123456789", + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_graphic() { + assert_all!(is_ascii_graphic, + "", + "abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + "0123456789", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + ); + assert_none!(is_ascii_graphic, + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_whitespace() { + assert_all!(is_ascii_whitespace, + "", + " \t\n\x0c\r", + ); + assert_none!(is_ascii_whitespace, + "abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + "0123456789", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x0b\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_control() { + assert_all!(is_ascii_control, + "", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + assert_none!(is_ascii_control, + "abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + "0123456789", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + " ", + ); + } } From c2566f638aa065f14f296aaad0666492ae00a636 Mon Sep 17 00:00:00 2001 From: Zack Weinberg Date: Mon, 13 Feb 2017 11:46:29 -0500 Subject: [PATCH 2/3] Squeeze URL lines under 100 chars wide to make tidy happy. --- src/libstd/ascii.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs index cf965291fa2a6..2a22e5e7a1136 100644 --- a/src/libstd/ascii.rs +++ b/src/libstd/ascii.rs @@ -454,19 +454,19 @@ pub trait AsciiExt { /// /// Rust uses the WhatWG Infra Standard's [definition of ASCII /// whitespace][infra-aw]. There are several other definitions in - /// wide use. For instance, [the POSIX locale][posix-ctype] - /// includes U+000B VERTICAL TAB as well as all the above - /// characters, but—from the very same specification—[the default - /// rule for "field splitting" in the Bourne shell][field-splitting] - /// considers *only* SPACE, HORIZONTAL TAB, and LINE FEED as whitespace. + /// wide use. For instance, [the POSIX locale][pct] includes + /// U+000B VERTICAL TAB as well as all the above characters, + /// but—from the very same specification—[the default rule for + /// "field splitting" in the Bourne shell][bfs] considers *only* + /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace. /// /// If you are writing a program that will process an existing /// file format, check what that format's definition of whitespace is /// before using this function. /// /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace - /// [posix-ctype]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01 - /// [field-splitting]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05 + /// [pct]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01 + /// [bfs]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05 /// /// # Examples /// From 162240c744fa415602dcd56f08895b9583037717 Mon Sep 17 00:00:00 2001 From: Zack Weinberg Date: Mon, 13 Feb 2017 18:44:43 -0500 Subject: [PATCH 3/3] Add feature annotations to the doctests for ascii_ctype. --- src/libstd/ascii.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs index 2a22e5e7a1136..af21d6d906eb5 100644 --- a/src/libstd/ascii.rs +++ b/src/libstd/ascii.rs @@ -193,6 +193,8 @@ pub trait AsciiExt { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// # #![allow(non_snake_case)] /// use std::ascii::AsciiExt; /// let A = 'A'; /// let G = 'G'; @@ -225,6 +227,8 @@ pub trait AsciiExt { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// # #![allow(non_snake_case)] /// use std::ascii::AsciiExt; /// let A = 'A'; /// let G = 'G'; @@ -257,6 +261,8 @@ pub trait AsciiExt { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// # #![allow(non_snake_case)] /// use std::ascii::AsciiExt; /// let A = 'A'; /// let G = 'G'; @@ -290,6 +296,8 @@ pub trait AsciiExt { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// # #![allow(non_snake_case)] /// use std::ascii::AsciiExt; /// let A = 'A'; /// let G = 'G'; @@ -322,6 +330,8 @@ pub trait AsciiExt { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// # #![allow(non_snake_case)] /// use std::ascii::AsciiExt; /// let A = 'A'; /// let G = 'G'; @@ -355,6 +365,8 @@ pub trait AsciiExt { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// # #![allow(non_snake_case)] /// use std::ascii::AsciiExt; /// let A = 'A'; /// let G = 'G'; @@ -390,6 +402,8 @@ pub trait AsciiExt { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// # #![allow(non_snake_case)] /// use std::ascii::AsciiExt; /// let A = 'A'; /// let G = 'G'; @@ -422,6 +436,8 @@ pub trait AsciiExt { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// # #![allow(non_snake_case)] /// use std::ascii::AsciiExt; /// let A = 'A'; /// let G = 'G'; @@ -471,6 +487,8 @@ pub trait AsciiExt { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// # #![allow(non_snake_case)] /// use std::ascii::AsciiExt; /// let A = 'A'; /// let G = 'G'; @@ -503,6 +521,8 @@ pub trait AsciiExt { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// # #![allow(non_snake_case)] /// use std::ascii::AsciiExt; /// let A = 'A'; /// let G = 'G';