Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Basic implementation of the string pattern API #22466

Merged
merged 9 commits into from
Feb 23, 2015
2 changes: 1 addition & 1 deletion src/compiletest/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ pub fn load_errors(testfile: &Path) -> Vec<ExpectedError> {
fn parse_expected(last_nonfollow_error: Option<uint>,
line_num: uint,
line: &str) -> Option<(WhichLine, ExpectedError)> {
let start = match line.find_str("//~") { Some(i) => i, None => return None };
let start = match line.find("//~") { Some(i) => i, None => return None };
let (follow, adjusts) = if line.char_at(start + 3) == '|' {
(true, 0)
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/compiletest/header.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ fn parse_name_directive(line: &str, directive: &str) -> bool {
pub fn parse_name_value_directive(line: &str, directive: &str)
-> Option<String> {
let keycolon = format!("{}:", directive);
match line.find_str(&keycolon) {
match line.find(&keycolon) {
Some(colon) => {
let value = line[(colon + keycolon.len()) .. line.len()].to_string();
debug!("{}: {}", directive, value);
Expand Down
4 changes: 2 additions & 2 deletions src/compiletest/runtest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -847,7 +847,7 @@ fn check_debugger_output(debugger_run_result: &ProcRes, check_lines: &[String])
check_lines.iter().map(|s| {
s
.trim()
.split_str("[...]")
.split("[...]")
.map(|x| x.to_string())
.collect()
}).collect();
Expand All @@ -866,7 +866,7 @@ fn check_debugger_output(debugger_run_result: &ProcRes, check_lines: &[String])
None
}
} else {
rest.find_str(frag)
rest.find(frag)
};
match found {
None => {
Expand Down
91 changes: 35 additions & 56 deletions src/libcollections/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ pub use core::str::{SplitN, RSplitN};
pub use core::str::{from_utf8, CharEq, Chars, CharIndices, Bytes};
pub use core::str::{from_utf8_unchecked, from_c_str, ParseBoolError};
pub use unicode::str::{Words, Graphemes, GraphemeIndices};
pub use core::str::Pattern;
pub use core::str::{Searcher, ReverseSearcher, DoubleEndedSearcher, SearchStep};

/*
Section: Creating a string
Expand Down Expand Up @@ -530,7 +532,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert!("bananas".contains("nana"));
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn contains(&self, pat: &str) -> bool {
fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
core_str::StrExt::contains(&self[..], pat)
}

Expand All @@ -545,9 +547,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// ```rust
/// assert!("hello".contains_char('e'));
/// ```
#[unstable(feature = "collections",
reason = "might get removed in favour of a more generic contains()")]
fn contains_char<P: CharEq>(&self, pat: P) -> bool {
#[unstable(feature = "collections")]
#[deprecated(since = "1.0.0", reason = "use `contains()` with a char")]
fn contains_char<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
core_str::StrExt::contains_char(&self[..], pat)
}

Expand Down Expand Up @@ -603,7 +605,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!(v, vec![""]);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn split<P: CharEq>(&self, pat: P) -> Split<P> {
fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
core_str::StrExt::split(&self[..], pat)
}

Expand All @@ -630,7 +632,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!(v, vec![""]);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn splitn<P: CharEq>(&self, count: usize, pat: P) -> SplitN<P> {
fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> {
core_str::StrExt::splitn(&self[..], count, pat)
}

Expand Down Expand Up @@ -658,8 +660,8 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
/// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
/// ```
#[unstable(feature = "collections", reason = "might get removed")]
fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> {
#[stable(feature = "rust1", since = "1.0.0")]
fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
core_str::StrExt::split_terminator(&self[..], pat)
}

Expand All @@ -680,7 +682,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn rsplitn<P: CharEq>(&self, count: usize, pat: P) -> RSplitN<P> {
fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> {
core_str::StrExt::rsplitn(&self[..], count, pat)
}

Expand All @@ -706,7 +708,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// ```
#[unstable(feature = "collections",
reason = "might have its iterator type changed")]
fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> {
// NB: Right now MatchIndices yields `(usize, usize)`,
// but it would be more consistent and useful to return `(usize, &str)`
fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
core_str::StrExt::match_indices(&self[..], pat)
}

Expand All @@ -721,9 +725,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
/// assert_eq!(v, vec!["1", "", "2"]);
/// ```
#[unstable(feature = "collections",
reason = "might get removed in the future in favor of a more generic split()")]
fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a> {
#[unstable(feature = "collections")]
#[deprecated(since = "1.0.0", reason = "use `split()` with a `&str`")]
fn split_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitStr<'a, P> {
core_str::StrExt::split_str(&self[..], pat)
}

Expand Down Expand Up @@ -825,7 +829,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert!("banana".starts_with("ba"));
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn starts_with(&self, pat: &str) -> bool {
fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
core_str::StrExt::starts_with(&self[..], pat)
}

Expand All @@ -837,7 +841,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert!("banana".ends_with("nana"));
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn ends_with(&self, pat: &str) -> bool {
fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
where P::Searcher: ReverseSearcher<'a>
{
core_str::StrExt::ends_with(&self[..], pat)
}

Expand All @@ -857,7 +863,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!("123foo1bar123".trim_matches(|c: char| c.is_numeric()), "foo1bar");
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn trim_matches<P: CharEq>(&self, pat: P) -> &str {
fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
where P::Searcher: DoubleEndedSearcher<'a>
{
core_str::StrExt::trim_matches(&self[..], pat)
}

Expand All @@ -877,7 +885,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!("123foo1bar123".trim_left_matches(|c: char| c.is_numeric()), "foo1bar123");
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn trim_left_matches<P: CharEq>(&self, pat: P) -> &str {
fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
core_str::StrExt::trim_left_matches(&self[..], pat)
}

Expand All @@ -897,7 +905,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!("123foo1bar123".trim_right_matches(|c: char| c.is_numeric()), "123foo1bar");
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn trim_right_matches<P: CharEq>(&self, pat: P) -> &str {
fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
where P::Searcher: ReverseSearcher<'a>
{
core_str::StrExt::trim_right_matches(&self[..], pat)
}

Expand Down Expand Up @@ -1074,7 +1084,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!(s.find(x), None);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn find<P: CharEq>(&self, pat: P) -> Option<usize> {
fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
core_str::StrExt::find(&self[..], pat)
}

Expand Down Expand Up @@ -1102,7 +1112,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!(s.rfind(x), None);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn rfind<P: CharEq>(&self, pat: P) -> Option<usize> {
fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
where P::Searcher: ReverseSearcher<'a>
{
core_str::StrExt::rfind(&self[..], pat)
}

Expand All @@ -1125,9 +1137,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!(s.find_str("老虎 L"), Some(6));
/// assert_eq!(s.find_str("muffin man"), None);
/// ```
#[unstable(feature = "collections",
reason = "might get removed in favor of a more generic find in the future")]
fn find_str(&self, needle: &str) -> Option<usize> {
#[unstable(feature = "collections")]
#[deprecated(since = "1.0.0", reason = "use `find()` with a `&str`")]
fn find_str<'a, P: Pattern<'a>>(&'a self, needle: P) -> Option<usize> {
core_str::StrExt::find_str(&self[..], needle)
}

Expand Down Expand Up @@ -2887,22 +2899,6 @@ mod bench {
b.iter(|| assert_eq!(s.split('V').count(), 3));
}

#[bench]
fn split_unicode_not_ascii(b: &mut Bencher) {
struct NotAscii(char);
impl CharEq for NotAscii {
fn matches(&mut self, c: char) -> bool {
let NotAscii(cc) = *self;
cc == c
}
fn only_ascii(&self) -> bool { false }
}
let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";

b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
}


#[bench]
fn split_ascii(b: &mut Bencher) {
let s = "Mary had a little lamb, Little lamb, little-lamb.";
Expand All @@ -2911,23 +2907,6 @@ mod bench {
b.iter(|| assert_eq!(s.split(' ').count(), len));
}

#[bench]
fn split_not_ascii(b: &mut Bencher) {
struct NotAscii(char);
impl CharEq for NotAscii {
#[inline]
fn matches(&mut self, c: char) -> bool {
let NotAscii(cc) = *self;
cc == c
}
fn only_ascii(&self) -> bool { false }
}
let s = "Mary had a little lamb, Little lamb, little-lamb.";
let len = s.split(' ').count();

b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
}

#[bench]
fn split_extern_fn(b: &mut Bencher) {
let s = "Mary had a little lamb, Little lamb, little-lamb.";
Expand Down
27 changes: 15 additions & 12 deletions src/libcore/char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ use option::Option;
use slice::SliceExt;

// UTF-8 ranges and tags for encoding characters
static TAG_CONT: u8 = 0b1000_0000u8;
static TAG_TWO_B: u8 = 0b1100_0000u8;
static TAG_THREE_B: u8 = 0b1110_0000u8;
static TAG_FOUR_B: u8 = 0b1111_0000u8;
static MAX_ONE_B: u32 = 0x80u32;
static MAX_TWO_B: u32 = 0x800u32;
static MAX_THREE_B: u32 = 0x10000u32;
const TAG_CONT: u8 = 0b1000_0000u8;
const TAG_TWO_B: u8 = 0b1100_0000u8;
const TAG_THREE_B: u8 = 0b1110_0000u8;
const TAG_FOUR_B: u8 = 0b1111_0000u8;
const MAX_ONE_B: u32 = 0x80u32;
const MAX_TWO_B: u32 = 0x800u32;
const MAX_THREE_B: u32 = 0x10000u32;

/*
Lu Uppercase_Letter an uppercase letter
Expand Down Expand Up @@ -398,11 +398,14 @@ impl CharExt for char {
#[stable(feature = "rust1", since = "1.0.0")]
fn len_utf8(self) -> usize {
let code = self as u32;
match () {
_ if code < MAX_ONE_B => 1,
_ if code < MAX_TWO_B => 2,
_ if code < MAX_THREE_B => 3,
_ => 4,
if code < MAX_ONE_B {
1
} else if code < MAX_TWO_B {
2
} else if code < MAX_THREE_B {
3
} else {
4
}
}

Expand Down
4 changes: 4 additions & 0 deletions src/libcore/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,8 @@ macro_rules! iterator {
fn next(&mut self) -> Option<$elem> {
// could be implemented with slices, but this avoids bounds checks
unsafe {
::intrinsics::assume(!self.ptr.is_null());
::intrinsics::assume(!self.end.is_null());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this duplicating the work of #21886 as well? (maybe leave to #21886?)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I talked with dotdash about - its not exactly duplication because the assumes are at a different location, and depending how the code will be inlined there might be cases where either of the two changes might not apply. Also, giving llvm multiple optimization hints can't really hurt.

However, currently these hints are unneeded for my code anyway, because the optimized codepaths that depended on them did not end up in the current iteration of this PR, so if you'd rather have them removed for now I can do that too.

if self.ptr == self.end {
None
} else {
Expand Down Expand Up @@ -693,6 +695,8 @@ macro_rules! iterator {
fn next_back(&mut self) -> Option<$elem> {
// could be implemented with slices, but this avoids bounds checks
unsafe {
::intrinsics::assume(!self.ptr.is_null());
::intrinsics::assume(!self.end.is_null());
if self.end == self.ptr {
None
} else {
Expand Down
Loading