From abdeefdbcc96e0f270a4f74892589e1e6cb9b928 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Tue, 30 Dec 2014 12:04:12 +1100 Subject: [PATCH 1/7] Remove deprecated functionality from `char`. --- src/libunicode/lib.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/libunicode/lib.rs b/src/libunicode/lib.rs index 72e9ce2bcafe6..1a619e57a9ce1 100644 --- a/src/libunicode/lib.rs +++ b/src/libunicode/lib.rs @@ -58,9 +58,7 @@ mod u_str; /// however the converse is not always true due to the above range limits /// and, as such, should be performed via the `from_u32` function.. pub mod char { - pub use core::char::{MAX, from_u32}; - pub use core::char::{from_digit}; - pub use core::char::Char; + pub use core::char::{MAX, from_u32, from_digit, Char}; pub use normalize::{decompose_canonical, decompose_compatible, compose}; From 8018293e0871645ad266b78864473d82a16d0c0f Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Tue, 30 Dec 2014 13:34:06 +1100 Subject: [PATCH 2/7] Switch encode_utf* to by-value self. --- src/libcore/char.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 3423e76ea6408..332c002451fe2 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -183,7 +183,7 @@ pub trait Char { /// If the buffer is not large enough, nothing will be written into it /// and a `None` will be returned. #[unstable = "pending trait organization"] - fn encode_utf8(&self, dst: &mut [u8]) -> Option; + fn encode_utf8(self, dst: &mut [u8]) -> Option; /// Encodes this character as UTF-16 into the provided `u16` buffer, /// and then returns the number of `u16`s written. @@ -191,7 +191,7 @@ pub trait Char { /// If the buffer is not large enough, nothing will be written into it /// and a `None` will be returned. #[unstable = "pending trait organization"] - fn encode_utf16(&self, dst: &mut [u16]) -> Option; + fn encode_utf16(self, dst: &mut [u16]) -> Option; } #[experimental = "trait is experimental"] @@ -260,9 +260,9 @@ impl Char for char { #[inline] #[unstable = "pending error conventions, trait organization"] - fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> Option { + fn encode_utf8(self, dst: &mut [u8]) -> Option { // Marked #[inline] to allow llvm optimizing it away - let code = *self as u32; + let code = self as u32; if code < MAX_ONE_B && dst.len() >= 1 { dst[0] = code as u8; Some(1) @@ -288,9 +288,9 @@ impl Char for char { #[inline] #[unstable = "pending error conventions, trait organization"] - fn encode_utf16(&self, dst: &mut [u16]) -> Option { + fn encode_utf16(self, dst: &mut [u16]) -> Option { // Marked #[inline] to allow llvm optimizing it away - let mut ch = *self as u32; + let mut ch = self as u32; if (ch & 0xFFFF_u32) == ch && dst.len() >= 1 { // The BMP falls through (assuming non-surrogate, as it should) dst[0] = ch as u16; From 01417f245cad2dc7dcafcf285ed6c1be163ac3a5 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Tue, 30 Dec 2014 13:36:24 +1100 Subject: [PATCH 3/7] Mark the contents of `char` stable. --- src/libcore/char.rs | 46 ++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 332c002451fe2..708b0bf863722 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -69,7 +69,7 @@ pub const MAX: char = '\u{10ffff}'; /// Converts from `u32` to a `char` #[inline] -#[unstable = "pending decisions about costructors for primitives"] +#[stable] pub fn from_u32(i: u32) -> Option { // catch out-of-bounds and surrogates if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) { @@ -92,7 +92,7 @@ pub fn from_u32(i: u32) -> Option { /// Panics if given an `radix` > 36. /// #[inline] -#[unstable = "pending decisions about costructors for primitives"] +#[unstable = "pending integer conventions"] pub fn from_digit(num: uint, radix: uint) -> Option { if radix > 36 { panic!("from_digit: radix is too high (maximum 36)"); @@ -111,7 +111,7 @@ pub fn from_digit(num: uint, radix: uint) -> Option { } /// Basic `char` manipulations. -#[experimental = "trait organization may change"] +#[stable] pub trait Char { /// Checks if a `char` parses as a numeric digit in the given radix. /// @@ -126,7 +126,7 @@ pub trait Char { /// # Panics /// /// Panics if given a radix > 36. - #[unstable = "pending error conventions"] + #[unstable = "pending integer conventions"] fn is_digit(self, radix: uint) -> bool; /// Converts a character to the corresponding digit. @@ -140,7 +140,7 @@ pub trait Char { /// # Panics /// /// Panics if given a radix outside the range [0..36]. - #[unstable = "pending error conventions, trait organization"] + #[unstable = "pending integer conventions"] fn to_digit(self, radix: uint) -> Option; /// Returns an iterator that yields the hexadecimal Unicode escape @@ -149,7 +149,7 @@ pub trait Char { /// All characters are escaped with Rust syntax of the form `\\u{NNNN}` /// where `NNNN` is the shortest hexadecimal representation of the code /// point. - #[unstable = "pending error conventions, trait organization"] + #[stable] fn escape_unicode(self) -> EscapeUnicode; /// Returns an iterator that yields the 'default' ASCII and @@ -164,17 +164,17 @@ pub trait Char { /// escaped. /// * Any other chars in the range [0x20,0x7e] are not escaped. /// * Any other chars are given hex Unicode escapes; see `escape_unicode`. - #[unstable = "pending error conventions, trait organization"] + #[stable] fn escape_default(self) -> EscapeDefault; /// Returns the amount of bytes this character would need if encoded in /// UTF-8. - #[unstable = "pending trait organization"] + #[stable] fn len_utf8(self) -> uint; /// Returns the amount of bytes this character would need if encoded in /// UTF-16. - #[unstable = "pending trait organization"] + #[stable] fn len_utf16(self) -> uint; /// Encodes this character as UTF-8 into the provided byte buffer, @@ -182,7 +182,7 @@ pub trait Char { /// /// If the buffer is not large enough, nothing will be written into it /// and a `None` will be returned. - #[unstable = "pending trait organization"] + #[stable] fn encode_utf8(self, dst: &mut [u8]) -> Option; /// Encodes this character as UTF-16 into the provided `u16` buffer, @@ -190,13 +190,13 @@ pub trait Char { /// /// If the buffer is not large enough, nothing will be written into it /// and a `None` will be returned. - #[unstable = "pending trait organization"] + #[stable] fn encode_utf16(self, dst: &mut [u16]) -> Option; } -#[experimental = "trait is experimental"] -impl Char for char { - #[unstable = "pending trait organization"] +#[stable] +impl CharExt for char { + #[unstable = "pending integer conventions"] fn is_digit(self, radix: uint) -> bool { match self.to_digit(radix) { Some(_) => true, @@ -204,7 +204,7 @@ impl Char for char { } } - #[unstable = "pending trait organization"] + #[unstable = "pending integer conventions"] fn to_digit(self, radix: uint) -> Option { if radix > 36 { panic!("to_digit: radix is too high (maximum 36)"); @@ -219,12 +219,12 @@ impl Char for char { else { None } } - #[unstable = "pending error conventions, trait organization"] + #[stable] fn escape_unicode(self) -> EscapeUnicode { EscapeUnicode { c: self, state: EscapeUnicodeState::Backslash } } - #[unstable = "pending error conventions, trait organization"] + #[stable] fn escape_default(self) -> EscapeDefault { let init_state = match self { '\t' => EscapeDefaultState::Backslash('t'), @@ -240,7 +240,7 @@ impl Char for char { } #[inline] - #[unstable = "pending trait organization"] + #[stable] fn len_utf8(self) -> uint { let code = self as u32; match () { @@ -252,14 +252,14 @@ impl Char for char { } #[inline] - #[unstable = "pending trait organization"] + #[stable] fn len_utf16(self) -> uint { let ch = self as u32; if (ch & 0xFFFF_u32) == ch { 1 } else { 2 } } #[inline] - #[unstable = "pending error conventions, trait organization"] + #[unstable = "pending decision about Iterator/Writer/Reader"] fn encode_utf8(self, dst: &mut [u8]) -> Option { // Marked #[inline] to allow llvm optimizing it away let code = self as u32; @@ -287,7 +287,7 @@ impl Char for char { } #[inline] - #[unstable = "pending error conventions, trait organization"] + #[unstable = "pending decision about Iterator/Writer/Reader"] fn encode_utf16(self, dst: &mut [u16]) -> Option { // Marked #[inline] to allow llvm optimizing it away let mut ch = self as u32; @@ -310,6 +310,7 @@ impl Char for char { /// An iterator over the characters that represent a `char`, as escaped by /// Rust's unicode escaping rules. #[derive(Clone)] +#[stable] pub struct EscapeUnicode { c: char, state: EscapeUnicodeState @@ -325,6 +326,7 @@ enum EscapeUnicodeState { Done, } +#[stable] impl Iterator for EscapeUnicode { type Item = char; @@ -370,6 +372,7 @@ impl Iterator for EscapeUnicode { /// An iterator over the characters that represent a `char`, escaped /// for maximum portability. #[derive(Clone)] +#[stable] pub struct EscapeDefault { state: EscapeDefaultState } @@ -382,6 +385,7 @@ enum EscapeDefaultState { Unicode(EscapeUnicode), } +#[stable] impl Iterator for EscapeDefault { type Item = char; From 19120209d8e532514203d16a2cff0ad3b44de3bb Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Tue, 30 Dec 2014 13:53:20 +1100 Subject: [PATCH 4/7] Rename `core::char::Char` to `CharExt` to match prelude guidelines. Imports may need to be updated so this is a [breaking-change] --- src/libcollections/lib.rs | 2 +- src/libcollections/str.rs | 2 +- src/libcore/char.rs | 2 +- src/libcore/fmt/float.rs | 2 +- src/libcore/fmt/mod.rs | 6 +++--- src/libcore/num/mod.rs | 2 +- src/libcore/prelude.rs | 2 +- src/librustdoc/clean/mod.rs | 1 - src/libstd/io/mod.rs | 2 +- src/libstd/prelude/v1.rs | 2 +- src/libunicode/lib.rs | 2 +- 11 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/libcollections/lib.rs b/src/libcollections/lib.rs index 944b224fed854..bf3b35b4f6817 100644 --- a/src/libcollections/lib.rs +++ b/src/libcollections/lib.rs @@ -103,7 +103,7 @@ mod std { mod prelude { // from core. pub use core::borrow::IntoCow; - pub use core::char::Char; + pub use core::char::CharExt; pub use core::clone::Clone; pub use core::cmp::{PartialEq, Eq, PartialOrd, Ord}; pub use core::cmp::Ordering::{Less, Equal, Greater}; diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index ed6a957d2acfa..ecf17820d2d8f 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -55,7 +55,7 @@ use self::RecompositionState::*; use self::DecompositionType::*; use core::borrow::{BorrowFrom, ToOwned}; -use core::char::Char; +use core::char::CharExt; use core::clone::Clone; use core::iter::AdditiveIterator; use core::iter::{range, Iterator, IteratorExt}; diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 708b0bf863722..ce530ae1d32e5 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -112,7 +112,7 @@ pub fn from_digit(num: uint, radix: uint) -> Option { /// Basic `char` manipulations. #[stable] -pub trait Char { +pub trait CharExt { /// Checks if a `char` parses as a numeric digit in the given radix. /// /// Compared to `is_numeric()`, this function only recognizes the characters diff --git a/src/libcore/fmt/float.rs b/src/libcore/fmt/float.rs index 9e62226220c0e..f63242b4f859a 100644 --- a/src/libcore/fmt/float.rs +++ b/src/libcore/fmt/float.rs @@ -15,7 +15,7 @@ pub use self::SignificantDigits::*; pub use self::SignFormat::*; use char; -use char::Char; +use char::CharExt; use fmt; use iter::{IteratorExt, range}; use num::{cast, Float, ToPrimitive}; diff --git a/src/libcore/fmt/mod.rs b/src/libcore/fmt/mod.rs index f49f87ff329f0..102836f8d3024 100644 --- a/src/libcore/fmt/mod.rs +++ b/src/libcore/fmt/mod.rs @@ -388,7 +388,7 @@ impl<'a> Formatter<'a> { prefix: &str, buf: &str) -> Result { - use char::Char; + use char::CharExt; use fmt::rt::{FlagAlternate, FlagSignPlus, FlagSignAwareZeroPad}; let mut width = buf.len(); @@ -504,7 +504,7 @@ impl<'a> Formatter<'a> { fn with_padding(&mut self, padding: uint, default: rt::Alignment, f: F) -> Result where F: FnOnce(&mut Formatter) -> Result, { - use char::Char; + use char::CharExt; let align = match self.align { rt::AlignUnknown => default, _ => self.align @@ -613,7 +613,7 @@ impl Show for str { impl Show for char { fn fmt(&self, f: &mut Formatter) -> Result { - use char::Char; + use char::CharExt; let mut utf8 = [0u8; 4]; let amt = self.encode_utf8(&mut utf8).unwrap_or(0); diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs index 6c3b153c00057..426c858d408ad 100644 --- a/src/libcore/num/mod.rs +++ b/src/libcore/num/mod.rs @@ -15,7 +15,7 @@ #![stable] #![allow(missing_docs)] -use char::Char; +use char::CharExt; use clone::Clone; use cmp::{PartialEq, Eq}; use cmp::{PartialOrd, Ord}; diff --git a/src/libcore/prelude.rs b/src/libcore/prelude.rs index 64f13a8f123a8..d4aca1bb73c23 100644 --- a/src/libcore/prelude.rs +++ b/src/libcore/prelude.rs @@ -38,7 +38,7 @@ pub use mem::drop; // Reexported types and traits -pub use char::Char; +pub use char::CharExt; pub use clone::Clone; pub use cmp::{PartialEq, PartialOrd, Eq, Ord}; pub use iter::{Extend, IteratorExt}; diff --git a/src/librustdoc/clean/mod.rs b/src/librustdoc/clean/mod.rs index f4d0bb79d88d6..5f6c62f1d7a68 100644 --- a/src/librustdoc/clean/mod.rs +++ b/src/librustdoc/clean/mod.rs @@ -50,7 +50,6 @@ use rustc::session::config; use std::rc::Rc; use std::u32; use std::str::Str as StrTrait; // Conflicts with Str variant -use std::char::Char as CharTrait; // Conflicts with Char variant use std::path::Path as FsPath; // Conflicts with Path struct use core::DocContext; diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs index ae401a04a96f0..51bf206f70e9f 100644 --- a/src/libstd/io/mod.rs +++ b/src/libstd/io/mod.rs @@ -225,7 +225,7 @@ pub use self::FileMode::*; pub use self::FileAccess::*; pub use self::IoErrorKind::*; -use char::Char; +use char::CharExt; use clone::Clone; use default::Default; use error::{FromError, Error}; diff --git a/src/libstd/prelude/v1.rs b/src/libstd/prelude/v1.rs index a122cb81b8c42..eda20fc7d603f 100644 --- a/src/libstd/prelude/v1.rs +++ b/src/libstd/prelude/v1.rs @@ -22,7 +22,7 @@ // Reexported types and traits #[stable] #[doc(no_inline)] pub use boxed::Box; -#[stable] #[doc(no_inline)] pub use char::{Char, UnicodeChar}; +#[stable] #[doc(no_inline)] pub use char::{CharExt, UnicodeChar}; #[stable] #[doc(no_inline)] pub use clone::Clone; #[stable] #[doc(no_inline)] pub use cmp::{PartialEq, PartialOrd, Eq, Ord}; #[stable] #[doc(no_inline)] pub use iter::CloneIteratorExt; diff --git a/src/libunicode/lib.rs b/src/libunicode/lib.rs index 1a619e57a9ce1..170700fb4d5fe 100644 --- a/src/libunicode/lib.rs +++ b/src/libunicode/lib.rs @@ -58,7 +58,7 @@ mod u_str; /// however the converse is not always true due to the above range limits /// and, as such, should be performed via the `from_u32` function.. pub mod char { - pub use core::char::{MAX, from_u32, from_digit, Char}; + pub use core::char::{MAX, from_u32, from_digit, CharExt}; pub use normalize::{decompose_canonical, decompose_compatible, compose}; From 0302d379776fca82d3eb693046239dc66998f691 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Tue, 30 Dec 2014 13:58:31 +1100 Subject: [PATCH 5/7] Merge `UnicodeChar` and `CharExt`. This "reexports" all the functionality of `core::char::CharExt` as methods on `unicode::u_char::UnicodeChar` (renamed to `CharExt`). Imports may need to be updated (one now just imports `unicode::CharExt`, or `std::char::CharExt` rather than two traits from either), so this is a [breaking-change] --- src/libcollections/lib.rs | 3 +- src/libstd/io/mod.rs | 1 - src/libstd/num/strconv.rs | 2 +- src/libstd/path/windows.rs | 2 +- src/libstd/prelude/v1.rs | 2 +- src/libunicode/lib.rs | 10 ++-- src/libunicode/tables.rs | 2 +- src/libunicode/u_char.rs | 107 +++++++++++++++++++++++++++++++++++-- src/libunicode/u_str.rs | 6 +-- 9 files changed, 116 insertions(+), 19 deletions(-) diff --git a/src/libcollections/lib.rs b/src/libcollections/lib.rs index bf3b35b4f6817..db2367950387b 100644 --- a/src/libcollections/lib.rs +++ b/src/libcollections/lib.rs @@ -103,7 +103,6 @@ mod std { mod prelude { // from core. pub use core::borrow::IntoCow; - pub use core::char::CharExt; pub use core::clone::Clone; pub use core::cmp::{PartialEq, Eq, PartialOrd, Ord}; pub use core::cmp::Ordering::{Less, Equal, Greater}; @@ -127,7 +126,7 @@ mod prelude { // from other crates. pub use alloc::boxed::Box; - pub use unicode::char::UnicodeChar; + pub use unicode::char::CharExt; // from collections. pub use slice::SliceConcatExt; diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs index 51bf206f70e9f..3fa0b5645c528 100644 --- a/src/libstd/io/mod.rs +++ b/src/libstd/io/mod.rs @@ -248,7 +248,6 @@ use str; use string::String; use uint; use unicode; -use unicode::char::UnicodeChar; use vec::Vec; // Reexports diff --git a/src/libstd/num/strconv.rs b/src/libstd/num/strconv.rs index 20dd70f0faab2..67fe599ecd6bf 100644 --- a/src/libstd/num/strconv.rs +++ b/src/libstd/num/strconv.rs @@ -16,7 +16,7 @@ use self::ExponentFormat::*; use self::SignificantDigits::*; use self::SignFormat::*; -use char::{self, Char}; +use char::{self, CharExt}; use num::{self, Int, Float, ToPrimitive}; use num::FpCategory as Fp; use ops::FnMut; diff --git a/src/libstd/path/windows.rs b/src/libstd/path/windows.rs index aae8d6cadefb2..cf8bc0e6242b3 100644 --- a/src/libstd/path/windows.rs +++ b/src/libstd/path/windows.rs @@ -16,6 +16,7 @@ use self::PathPrefix::*; use ascii::AsciiExt; use c_str::{CString, ToCStr}; +use char::CharExt; use clone::Clone; use cmp::{PartialEq, Eq, PartialOrd, Ord, Ordering}; use hash; @@ -28,7 +29,6 @@ use option::Option::{Some, None}; use slice::{SliceExt, SliceConcatExt}; use str::{SplitTerminator, FromStr, StrExt}; use string::{String, ToString}; -use unicode::char::UnicodeChar; use vec::Vec; use super::{contains_nul, BytesContainer, GenericPath, GenericPathUnsafe}; diff --git a/src/libstd/prelude/v1.rs b/src/libstd/prelude/v1.rs index eda20fc7d603f..f6bdcd53dff24 100644 --- a/src/libstd/prelude/v1.rs +++ b/src/libstd/prelude/v1.rs @@ -22,7 +22,7 @@ // Reexported types and traits #[stable] #[doc(no_inline)] pub use boxed::Box; -#[stable] #[doc(no_inline)] pub use char::{CharExt, UnicodeChar}; +#[stable] #[doc(no_inline)] pub use char::CharExt; #[stable] #[doc(no_inline)] pub use clone::Clone; #[stable] #[doc(no_inline)] pub use cmp::{PartialEq, PartialOrd, Eq, Ord}; #[stable] #[doc(no_inline)] pub use iter::CloneIteratorExt; diff --git a/src/libunicode/lib.rs b/src/libunicode/lib.rs index 170700fb4d5fe..a3884d0c86e1d 100644 --- a/src/libunicode/lib.rs +++ b/src/libunicode/lib.rs @@ -44,9 +44,9 @@ mod u_str; // re-export char so that std et al see it correctly /// Character manipulation (`char` type, Unicode Scalar Value) /// -/// This module provides the `Char` and `UnicodeChar` traits, as well as their -/// implementation for the primitive `char` type, in order to allow basic character -/// manipulation. +/// This module provides the `CharExt` trait, as well as its +/// implementation for the primitive `char` type, in order to allow +/// basic character manipulation. /// /// A `char` actually represents a /// *[Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value)*, @@ -58,14 +58,14 @@ mod u_str; /// however the converse is not always true due to the above range limits /// and, as such, should be performed via the `from_u32` function.. pub mod char { - pub use core::char::{MAX, from_u32, from_digit, CharExt}; + pub use core::char::{MAX, from_u32, from_digit}; pub use normalize::{decompose_canonical, decompose_compatible, compose}; pub use tables::normalization::canonical_combining_class; pub use tables::UNICODE_VERSION; - pub use u_char::UnicodeChar; + pub use u_char::CharExt; } pub mod str { diff --git a/src/libunicode/tables.rs b/src/libunicode/tables.rs index e3550810010b5..c755ea9318402 100644 --- a/src/libunicode/tables.rs +++ b/src/libunicode/tables.rs @@ -13,7 +13,7 @@ #![allow(missing_docs, non_upper_case_globals, non_snake_case)] /// The version of [Unicode](http://www.unicode.org/) -/// that the `UnicodeChar` and `UnicodeStrPrelude` traits are based on. +/// that the unicode parts of `CharExt` and `UnicodeStrPrelude` traits are based on. pub const UNICODE_VERSION: (uint, uint, uint) = (7, 0, 0); fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool { diff --git a/src/libunicode/u_char.rs b/src/libunicode/u_char.rs index 9c356801604dd..c1abfd4e18998 100644 --- a/src/libunicode/u_char.rs +++ b/src/libunicode/u_char.rs @@ -8,17 +8,99 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -//! Unicode-intensive `char` methods. +//! Unicode-intensive `char` methods along with the `core` methods. //! //! These methods implement functionality for `char` that requires knowledge of //! Unicode definitions, including normalization, categorization, and display information. +use core::char; +use core::char::CharExt as C; use core::option::Option; use tables::{derived_property, property, general_category, conversions, charwidth}; -/// Useful functions for Unicode characters. +/// Functionality for manipulating `char`. #[experimental = "pending prelude organization"] -pub trait UnicodeChar { +pub trait CharExt { + /// Checks if a `char` parses as a numeric digit in the given radix. + /// + /// Compared to `is_numeric()`, this function only recognizes the characters + /// `0-9`, `a-z` and `A-Z`. + /// + /// # Return value + /// + /// Returns `true` if `c` is a valid digit under `radix`, and `false` + /// otherwise. + /// + /// # Panics + /// + /// Panics if given a radix > 36. + #[unstable = "pending integer conventions"] + fn is_digit(self, radix: uint) -> bool; + + /// Converts a character to the corresponding digit. + /// + /// # Return value + /// + /// If `c` is between '0' and '9', the corresponding value between 0 and + /// 9. If `c` is 'a' or 'A', 10. If `c` is 'b' or 'B', 11, etc. Returns + /// none if the character does not refer to a digit in the given radix. + /// + /// # Panics + /// + /// Panics if given a radix outside the range [0..36]. + #[unstable = "pending integer conventions"] + fn to_digit(self, radix: uint) -> Option; + + /// Returns an iterator that yields the hexadecimal Unicode escape + /// of a character, as `char`s. + /// + /// All characters are escaped with Rust syntax of the form `\\u{NNNN}` + /// where `NNNN` is the shortest hexadecimal representation of the code + /// point. + #[stable] + fn escape_unicode(self) -> char::EscapeUnicode; + + /// Returns an iterator that yields the 'default' ASCII and + /// C++11-like literal escape of a character, as `char`s. + /// + /// The default is chosen with a bias toward producing literals that are + /// legal in a variety of languages, including C++11 and similar C-family + /// languages. The exact rules are: + /// + /// * Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively. + /// * Single-quote, double-quote and backslash chars are backslash- + /// escaped. + /// * Any other chars in the range [0x20,0x7e] are not escaped. + /// * Any other chars are given hex Unicode escapes; see `escape_unicode`. + #[stable] + fn escape_default(self) -> char::EscapeDefault; + + /// Returns the amount of bytes this character would need if encoded in + /// UTF-8. + #[stable] + fn len_utf8(self) -> uint; + + /// Returns the amount of bytes this character would need if encoded in + /// UTF-16. + #[stable] + fn len_utf16(self) -> uint; + + /// Encodes this character as UTF-8 into the provided byte buffer, + /// and then returns the number of bytes written. + /// + /// If the buffer is not large enough, nothing will be written into it + /// and a `None` will be returned. + #[unstable = "pending decision about Iterator/Writer/Reader"] + fn encode_utf8(self, dst: &mut [u8]) -> Option; + + /// Encodes this character as UTF-16 into the provided `u16` buffer, + /// and then returns the number of `u16`s written. + /// + /// If the buffer is not large enough, nothing will be written into it + /// and a `None` will be returned. + #[unstable = "pending decision about Iterator/Writer/Reader"] + fn encode_utf16(self, dst: &mut [u16]) -> Option; + /// Returns whether the specified character is considered a Unicode /// alphabetic code point. fn is_alphabetic(self) -> bool; @@ -118,7 +200,24 @@ pub trait UnicodeChar { } #[experimental = "pending prelude organization"] -impl UnicodeChar for char { +impl CharExt for char { + #[unstable = "pending integer conventions"] + fn is_digit(self, radix: uint) -> bool { C::is_digit(self, radix) } + #[unstable = "pending integer conventions"] + fn to_digit(self, radix: uint) -> Option { C::to_digit(self, radix) } + #[stable] + fn escape_unicode(self) -> char::EscapeUnicode { C::escape_unicode(self) } + #[stable] + fn escape_default(self) -> char::EscapeDefault { C::escape_default(self) } + #[stable] + fn len_utf8(self) -> uint { C::len_utf8(self) } + #[stable] + fn len_utf16(self) -> uint { C::len_utf16(self) } + #[unstable = "pending decision about Iterator/Writer/Reader"] + fn encode_utf8(self, dst: &mut [u8]) -> Option { C::encode_utf8(self, dst) } + #[unstable = "pending decision about Iterator/Writer/Reader"] + fn encode_utf16(self, dst: &mut [u16]) -> Option { C::encode_utf16(self, dst) } + fn is_alphabetic(self) -> bool { match self { 'a' ... 'z' | 'A' ... 'Z' => true, diff --git a/src/libunicode/u_str.rs b/src/libunicode/u_str.rs index 1b0c4171134ed..90949437774f1 100644 --- a/src/libunicode/u_str.rs +++ b/src/libunicode/u_str.rs @@ -13,7 +13,7 @@ //! Unicode-intensive string manipulations. //! //! This module provides functionality to `str` that requires the Unicode methods provided by the -//! UnicodeChar trait. +//! unicode parts of the CharExt trait. use self::GraphemeState::*; use core::prelude::*; @@ -26,7 +26,7 @@ use core::num::Int; use core::slice; use core::str::Split; -use u_char::UnicodeChar; +use u_char::CharExt as UCharExt; // conflicts with core::prelude::CharExt use tables::grapheme::GraphemeCat; /// An iterator over the words of a string, separated by a sequence of whitespace @@ -529,7 +529,7 @@ impl Iterator for Utf16Encoder where I: Iterator { let mut buf = [0u16; 2]; self.chars.next().map(|ch| { - let n = ch.encode_utf16(buf.as_mut_slice()).unwrap_or(0); + let n = CharExt::encode_utf16(ch, buf.as_mut_slice()).unwrap_or(0); if n == 2 { self.extra = buf[1]; } buf[0] }) From e670fb484b89dfbdfbee9b91497db61303882d8b Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Tue, 30 Dec 2014 14:14:01 +1100 Subject: [PATCH 6/7] Apply explicit stabilities to unicode parts of CharExt. --- src/libunicode/u_char.rs | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/libunicode/u_char.rs b/src/libunicode/u_char.rs index c1abfd4e18998..5693c222de123 100644 --- a/src/libunicode/u_char.rs +++ b/src/libunicode/u_char.rs @@ -19,7 +19,7 @@ use core::option::Option; use tables::{derived_property, property, general_category, conversions, charwidth}; /// Functionality for manipulating `char`. -#[experimental = "pending prelude organization"] +#[stable] pub trait CharExt { /// Checks if a `char` parses as a numeric digit in the given radix. /// @@ -103,6 +103,7 @@ pub trait CharExt { /// Returns whether the specified character is considered a Unicode /// alphabetic code point. + #[stable] fn is_alphabetic(self) -> bool; /// Returns whether the specified character satisfies the 'XID_Start' @@ -111,6 +112,7 @@ pub trait CharExt { /// 'XID_Start' is a Unicode Derived Property specified in /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), /// mostly similar to ID_Start but modified for closure under NFKx. + #[experimental = "mainly needed for compiler internals"] fn is_xid_start(self) -> bool; /// Returns whether the specified `char` satisfies the 'XID_Continue' @@ -119,38 +121,45 @@ pub trait CharExt { /// 'XID_Continue' is a Unicode Derived Property specified in /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), /// mostly similar to 'ID_Continue' but modified for closure under NFKx. + #[experimental = "mainly needed for compiler internals"] fn is_xid_continue(self) -> bool; /// Indicates whether a character is in lowercase. /// /// This is defined according to the terms of the Unicode Derived Core /// Property `Lowercase`. + #[stable] fn is_lowercase(self) -> bool; /// Indicates whether a character is in uppercase. /// /// This is defined according to the terms of the Unicode Derived Core /// Property `Uppercase`. + #[stable] fn is_uppercase(self) -> bool; /// Indicates whether a character is whitespace. /// /// Whitespace is defined in terms of the Unicode Property `White_Space`. + #[stable] fn is_whitespace(self) -> bool; /// Indicates whether a character is alphanumeric. /// /// Alphanumericness is defined in terms of the Unicode General Categories /// 'Nd', 'Nl', 'No' and the Derived Core Property 'Alphabetic'. + #[stable] fn is_alphanumeric(self) -> bool; /// Indicates whether a character is a control code point. /// /// Control code points are defined in terms of the Unicode General /// Category `Cc`. + #[stable] fn is_control(self) -> bool; /// Indicates whether the character is numeric (Nd, Nl, or No). + #[stable] fn is_numeric(self) -> bool; /// Converts a character to its lowercase equivalent. @@ -162,6 +171,7 @@ pub trait CharExt { /// /// Returns the lowercase equivalent of the character, or the character /// itself if no conversion is possible. + #[experimental = "pending case transformation decisions"] fn to_lowercase(self) -> char; /// Converts a character to its uppercase equivalent. @@ -184,6 +194,7 @@ pub trait CharExt { /// [`SpecialCasing`.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt /// /// [2]: http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf#G33992 + #[experimental = "pending case transformation decisions"] fn to_uppercase(self) -> char; /// Returns this character's displayed width in columns, or `None` if it is a @@ -199,7 +210,7 @@ pub trait CharExt { fn width(self, is_cjk: bool) -> Option; } -#[experimental = "pending prelude organization"] +#[stable] impl CharExt for char { #[unstable = "pending integer conventions"] fn is_digit(self, radix: uint) -> bool { C::is_digit(self, radix) } @@ -218,6 +229,7 @@ impl CharExt for char { #[unstable = "pending decision about Iterator/Writer/Reader"] fn encode_utf16(self, dst: &mut [u16]) -> Option { C::encode_utf16(self, dst) } + #[stable] fn is_alphabetic(self) -> bool { match self { 'a' ... 'z' | 'A' ... 'Z' => true, @@ -226,10 +238,13 @@ impl CharExt for char { } } + #[experimental = "mainly needed for compiler internals"] fn is_xid_start(self) -> bool { derived_property::XID_Start(self) } + #[experimental = "mainly needed for compiler internals"] fn is_xid_continue(self) -> bool { derived_property::XID_Continue(self) } + #[stable] fn is_lowercase(self) -> bool { match self { 'a' ... 'z' => true, @@ -238,6 +253,7 @@ impl CharExt for char { } } + #[stable] fn is_uppercase(self) -> bool { match self { 'A' ... 'Z' => true, @@ -246,6 +262,7 @@ impl CharExt for char { } } + #[stable] fn is_whitespace(self) -> bool { match self { ' ' | '\x09' ... '\x0d' => true, @@ -254,12 +271,15 @@ impl CharExt for char { } } + #[stable] fn is_alphanumeric(self) -> bool { self.is_alphabetic() || self.is_numeric() } + #[stable] fn is_control(self) -> bool { general_category::Cc(self) } + #[stable] fn is_numeric(self) -> bool { match self { '0' ... '9' => true, @@ -268,8 +288,10 @@ impl CharExt for char { } } + #[experimental = "pending case transformation decisions"] fn to_lowercase(self) -> char { conversions::to_lower(self) } + #[experimental = "pending case transformation decisions"] fn to_uppercase(self) -> char { conversions::to_upper(self) } #[experimental = "needs expert opinion. is_cjk flag stands out as ugly"] From 990a79f097e8e74308bfec6d72dcdbb769a7973b Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Sun, 4 Jan 2015 01:19:03 +1100 Subject: [PATCH 7/7] char: small tweak since `is_some` > equivalent `match`. --- src/libcore/char.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index ce530ae1d32e5..291b7f2ece445 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -198,10 +198,7 @@ pub trait CharExt { impl CharExt for char { #[unstable = "pending integer conventions"] fn is_digit(self, radix: uint) -> bool { - match self.to_digit(radix) { - Some(_) => true, - None => false, - } + self.to_digit(radix).is_some() } #[unstable = "pending integer conventions"]