From 13fbed44b3a2d04c206d2d7a182477fcc3107446 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Tue, 14 Feb 2023 22:34:37 +0100 Subject: [PATCH] core: move Pattern et al to core::pattern module (RFC 2295) Pattern is no longer str-specific, so move it from core::str::pattern module to a new core::pattern module. This introduces no changes in behaviour or implementation. Just moves stuff around and adjusts documentation. Issue: https://github.com/rust-lang/rust/issues/49802 --- library/alloc/src/str.rs | 4 +- library/alloc/src/string.rs | 4 +- library/alloc/tests/str.rs | 8 +- library/core/src/lib.rs | 1 + library/core/src/pattern.rs | 349 ++++++++++++++++++++++++++++++ library/core/src/str/iter.rs | 3 +- library/core/src/str/mod.rs | 4 +- library/core/src/str/pattern.rs | 367 +++----------------------------- library/core/tests/pattern.rs | 2 +- library/std/src/lib.rs | 2 + 10 files changed, 389 insertions(+), 355 deletions(-) create mode 100644 library/core/src/pattern.rs diff --git a/library/alloc/src/str.rs b/library/alloc/src/str.rs index c1447fdc967f7..27d14d4c63b6e 100644 --- a/library/alloc/src/str.rs +++ b/library/alloc/src/str.rs @@ -11,7 +11,7 @@ use core::borrow::{Borrow, BorrowMut}; use core::iter::FusedIterator; use core::mem; use core::ptr; -use core::str::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher}; +use core::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher}; use core::unicode::conversions; use crate::borrow::ToOwned; @@ -20,8 +20,6 @@ use crate::slice::{Concat, Join, SliceIndex}; use crate::string::String; use crate::vec::Vec; -#[stable(feature = "rust1", since = "1.0.0")] -pub use core::str::pattern; #[stable(feature = "encode_utf16", since = "1.8.0")] pub use core::str::EncodeUtf16; #[stable(feature = "split_ascii_whitespace", since = "1.34.0")] diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs index 9d41049d1a81e..ea4da19bce11a 100644 --- a/library/alloc/src/string.rs +++ b/library/alloc/src/string.rs @@ -57,7 +57,7 @@ use core::ops::Bound::{Excluded, Included, Unbounded}; use core::ops::{self, Index, IndexMut, Range, RangeBounds}; use core::ptr; use core::slice; -use core::str::pattern::Pattern; +use core::pattern::Pattern; #[cfg(not(no_global_oom_handling))] use core::str::Utf8Chunks; @@ -1373,7 +1373,7 @@ impl String { where P: for<'x> Pattern<&'x str>, { - use core::str::pattern::Searcher; + use core::pattern::Searcher; let rejections = { let mut searcher = pat.into_searcher(self); diff --git a/library/alloc/tests/str.rs b/library/alloc/tests/str.rs index cc2110f5673a0..7ba183edc77f7 100644 --- a/library/alloc/tests/str.rs +++ b/library/alloc/tests/str.rs @@ -1856,14 +1856,14 @@ fn test_repeat() { } mod pattern { - use std::str::pattern::SearchStep::{self, Done, Match, Reject}; - use std::str::pattern::{Pattern, ReverseSearcher, Searcher}; + use core::pattern::SearchStep::{self, Done, Match, Reject}; + use core::pattern::{Pattern, ReverseSearcher, Searcher}; macro_rules! make_test { ($name:ident, $p:expr, $h:expr, [$($e:expr,)*]) => { #[allow(unused_imports)] mod $name { - use std::str::pattern::SearchStep::{Match, Reject}; + use core::pattern::SearchStep::{Match, Reject}; use super::{cmp_search_to_vec}; #[test] fn fwd() { @@ -2139,7 +2139,7 @@ generate_iterator_test! { #[test] fn different_str_pattern_forwarding_lifetimes() { - use std::str::pattern::Pattern; + use core::pattern::Pattern; fn foo<'a, P>(p: P) where diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index dc0702c467a4e..14cfd8a119497 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -357,6 +357,7 @@ pub mod sync; pub mod fmt; pub mod hash; +pub mod pattern; pub mod slice; pub mod str; pub mod time; diff --git a/library/core/src/pattern.rs b/library/core/src/pattern.rs new file mode 100644 index 0000000000000..225c6d2dba3ef --- /dev/null +++ b/library/core/src/pattern.rs @@ -0,0 +1,349 @@ +//! The Pattern API. +//! +//! The Pattern API provides a generic mechanism for using different pattern +//! types when searching through different objects. +//! +//! For more details, see the traits [`Pattern`], [`Haystack`], [`Searcher`], +//! [`ReverseSearcher`] and [`DoubleEndedSearcher`]. Although this API is +//! unstable, it is exposed via stable APIs on the [`str`] type. +//! +//! # Examples +//! +//! [`Pattern`] is [implemented][pattern-impls] in the stable API for +//! [`&str`][`str`], [`char`], slices of [`char`], and functions and closures +//! implementing `FnMut(char) -> bool`. +//! +//! ``` +//! let s = "Can you find a needle in a haystack?"; +//! +//! // &str pattern +//! assert_eq!(s.find("you"), Some(4)); +//! // char pattern +//! assert_eq!(s.find('n'), Some(2)); +//! // array of chars pattern +//! assert_eq!(s.find(&['a', 'e', 'i', 'o', 'u']), Some(1)); +//! // slice of chars pattern +//! assert_eq!(s.find(&['a', 'e', 'i', 'o', 'u'][..]), Some(1)); +//! // closure pattern +//! assert_eq!(s.find(|c: char| c.is_ascii_punctuation()), Some(35)); +//! ``` +//! +//! [pattern-impls]: Pattern#implementors + +#![unstable( + feature = "pattern", + reason = "API not fully fleshed out and ready to be stabilized", + issue = "27721" +)] + +/// A pattern which can be matched against a [`Haystack`]. +/// +/// A `Pattern` expresses that the implementing type can be used as a pattern +/// for searching in a `H`. +/// +/// For example, character `'a'` and string `"aa"` are patterns that would match +/// at index `1` in the string `"baaaab"`. +/// +/// The trait itself acts as a builder for an associated +/// [`Searcher`] type, which does the actual work of finding +/// occurrences of the pattern in a string. +/// +/// Depending on the type of the pattern, the behaviour of methods like +/// [`str::find`] and [`str::contains`] can change. The table below describes +/// some of those behaviours. +/// +/// | Pattern type | Match condition | +/// |--------------------------|-------------------------------------------| +/// | `&str` | is substring | +/// | `char` | is contained in string | +/// | `&[char]` | any char in slice is contained in string | +/// | `F: FnMut(char) -> bool` | `F` returns `true` for a char in string | +/// | `&&str` | is substring | +/// | `&String` | is substring | +/// +/// # Examples +/// +/// ``` +/// // &str +/// assert_eq!("abaaa".find("ba"), Some(1)); +/// assert_eq!("abaaa".find("bac"), None); +/// +/// // char +/// assert_eq!("abaaa".find('a'), Some(0)); +/// assert_eq!("abaaa".find('b'), Some(1)); +/// assert_eq!("abaaa".find('c'), None); +/// +/// // &[char; N] +/// assert_eq!("ab".find(&['b', 'a']), Some(0)); +/// assert_eq!("abaaa".find(&['a', 'z']), Some(0)); +/// assert_eq!("abaaa".find(&['c', 'd']), None); +/// +/// // &[char] +/// assert_eq!("ab".find(&['b', 'a'][..]), Some(0)); +/// assert_eq!("abaaa".find(&['a', 'z'][..]), Some(0)); +/// assert_eq!("abaaa".find(&['c', 'd'][..]), None); +/// +/// // FnMut(char) -> bool +/// assert_eq!("abcdef_z".find(|ch| ch > 'd' && ch < 'y'), Some(4)); +/// assert_eq!("abcddd_z".find(|ch| ch > 'd' && ch < 'y'), None); +/// ``` +pub trait Pattern: Sized { + /// Associated searcher for this pattern + type Searcher: Searcher; + + /// Constructs the associated searcher from + /// `self` and the `haystack` to search in. + fn into_searcher(self, haystack: H) -> Self::Searcher; + + /// Checks whether the pattern matches anywhere in the haystack + fn is_contained_in(self, haystack: H) -> bool { + self.into_searcher(haystack).next_match().is_some() + } + + /// Checks whether the pattern matches at the front of the haystack + fn is_prefix_of(self, haystack: H) -> bool { + matches!( + self.into_searcher(haystack).next(), + SearchStep::Match(start, _) if start == haystack.cursor_at_front() + ) + } + + /// Checks whether the pattern matches at the back of the haystack + fn is_suffix_of(self, haystack: H) -> bool + where Self::Searcher: ReverseSearcher { + matches!( + self.into_searcher(haystack).next_back(), + SearchStep::Match(_, end) if end == haystack.cursor_at_back() + ) + } + + /// Removes the pattern from the front of haystack, if it matches. + fn strip_prefix_of(self, haystack: H) -> Option { + if let SearchStep::Match(start, end) = self.into_searcher(haystack).next() { + // This cannot be debug_assert_eq because StartCursor isn’t Debug. + debug_assert!(start == haystack.cursor_at_front(), + "The first search step from Searcher \ + must include the first character"); + // SAFETY: `Searcher` is known to return valid indices. + Some(unsafe { haystack.split_at_cursor_unchecked(end) }.1) + } else { + None + } + } + + /// Removes the pattern from the back of haystack, if it matches. + fn strip_suffix_of(self, haystack: H) -> Option + where Self::Searcher: ReverseSearcher { + if let SearchStep::Match(start, end) = self.into_searcher(haystack).next_back() { + // This cannot be debug_assert_eq because StartCursor isn’t Debug. + debug_assert!(end == haystack.cursor_at_back(), + "The first search step from ReverseSearcher \ + must include the last character"); + // SAFETY: `Searcher` is known to return valid indices. + Some(unsafe { haystack.split_at_cursor_unchecked(start) }.0) + } else { + None + } + } +} + + +/// A type which can be searched in using a [`Pattern`]. +/// +/// The trait is used in combination with [`Pattern`] trait to express a pattern +/// that can be used to search for elements in given haystack. +pub trait Haystack: Sized + Copy { + /// A cursor representing position in the haystack or its end. + type Cursor: Copy + PartialOrd; + + /// Returns cursor pointing at the beginning of the haystack. + fn cursor_at_front(&self) -> Self::Cursor; + + /// Returns cursor pointing at the end of the haystack. + fn cursor_at_back(&self) -> Self::Cursor; + + /// Splits haystack into two at given cursor position. + /// + /// Note that splitting a haystack isn’t guaranteed to preserve total + /// length. That is, each separate part’s length may be longer than length + /// of the original haystack. This property is preserved for `&str` and + /// `&[T]` haystacks but not for `&OsStr`. + unsafe fn split_at_cursor_unchecked(self, cursor: Self::Cursor) -> (Self, Self); +} + + +/// Result of calling [`Searcher::next()`] or [`ReverseSearcher::next_back()`]. +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub enum SearchStep { + /// Expresses that a match of the pattern has been found at + /// `haystack[a..b]`. + Match(T, T), + /// Expresses that `haystack[a..b]` has been rejected as a possible match + /// of the pattern. + /// + /// Note that there might be more than one `Reject` between two `Match`es, + /// there is no requirement for them to be combined into one. + Reject(T, T), + /// Expresses that every byte of the haystack has been visited, ending + /// the iteration. + Done, +} + +/// A searcher for a string pattern. +/// +/// This trait provides methods for searching for non-overlapping +/// matches of a pattern starting from the front (left) of a string. +/// +/// It will be implemented by associated `Searcher` +/// types of the [`Pattern`] trait. +/// +/// The trait is marked unsafe because the indices returned by the +/// [`next()`][Searcher::next] methods are required to lie on valid utf8 +/// boundaries in the haystack. This enables consumers of this trait to +/// slice the haystack without additional runtime checks. +pub unsafe trait Searcher { + /// Getter for the underlying string to be searched in + /// + /// Will always return the same [`&str`][str]. + fn haystack(&self) -> H; + + /// Performs the next search step starting from the front. + /// + /// - Returns [`Match(a, b)`][SearchStep::Match] if `haystack[a..b]` matches + /// the pattern. + /// - Returns [`Reject(a, b)`][SearchStep::Reject] if `haystack[a..b]` can + /// not match the pattern, even partially. + /// - Returns [`Done`][SearchStep::Done] if every byte of the haystack has + /// been visited. + /// + /// The stream of [`Match`][SearchStep::Match] and + /// [`Reject`][SearchStep::Reject] values up to a [`Done`][SearchStep::Done] + /// will contain index ranges that are adjacent, non-overlapping, + /// covering the whole haystack, and laying on utf8 boundaries. + /// + /// A [`Match`][SearchStep::Match] result needs to contain the whole matched + /// pattern, however [`Reject`][SearchStep::Reject] results may be split up + /// into arbitrary many adjacent fragments. Both ranges may have zero length. + /// + /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"` + /// might produce the stream + /// `[Reject(0, 1), Reject(1, 2), Match(2, 5), Reject(5, 8)]` + fn next(&mut self) -> SearchStep; + + /// Finds the next [`Match`][SearchStep::Match] result. See [`next()`][Searcher::next]. + /// + /// Unlike [`next()`][Searcher::next], there is no guarantee that the returned ranges + /// of this and [`next_reject`][Searcher::next_reject] will overlap. This will return + /// `(start_match, end_match)`, where start_match is the index of where + /// the match begins, and end_match is the index after the end of the match. + fn next_match(&mut self) -> Option<(H::Cursor, H::Cursor)> { + loop { + match self.next() { + SearchStep::Match(a, b) => return Some((a, b)), + SearchStep::Done => return None, + _ => continue, + } + } + } + + /// Finds the next [`Reject`][SearchStep::Reject] result. See [`next()`][Searcher::next] + /// and [`next_match()`][Searcher::next_match]. + /// + /// Unlike [`next()`][Searcher::next], there is no guarantee that the returned ranges + /// of this and [`next_match`][Searcher::next_match] will overlap. + fn next_reject(&mut self) -> Option<(H::Cursor, H::Cursor)> { + loop { + match self.next() { + SearchStep::Reject(a, b) => return Some((a, b)), + SearchStep::Done => return None, + _ => continue, + } + } + } +} + +/// A reverse searcher for a string pattern. +/// +/// This trait provides methods for searching for non-overlapping +/// matches of a pattern starting from the back (right) of a string. +/// +/// It will be implemented by associated [`Searcher`] +/// types of the [`Pattern`] trait if the pattern supports searching +/// for it from the back. +/// +/// The index ranges returned by this trait are not required +/// to exactly match those of the forward search in reverse. +/// +/// For the reason why this trait is marked unsafe, see the +/// parent trait [`Searcher`]. +pub unsafe trait ReverseSearcher: Searcher { + /// Performs the next search step starting from the back. + /// + /// - Returns [`Match(a, b)`][SearchStep::Match] if `haystack[a..b]` + /// matches the pattern. + /// - Returns [`Reject(a, b)`][SearchStep::Reject] if `haystack[a..b]` + /// can not match the pattern, even partially. + /// - Returns [`Done`][SearchStep::Done] if every byte of the haystack + /// has been visited + /// + /// The stream of [`Match`][SearchStep::Match] and + /// [`Reject`][SearchStep::Reject] values up to a [`Done`][SearchStep::Done] + /// will contain index ranges that are adjacent, non-overlapping, + /// covering the whole haystack, and laying on utf8 boundaries. + /// + /// A [`Match`][SearchStep::Match] result needs to contain the whole matched + /// pattern, however [`Reject`][SearchStep::Reject] results may be split up + /// into arbitrary many adjacent fragments. Both ranges may have zero length. + /// + /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"` + /// might produce the stream + /// `[Reject(7, 8), Match(4, 7), Reject(1, 4), Reject(0, 1)]`. + fn next_back(&mut self) -> SearchStep; + + /// Finds the next [`Match`][SearchStep::Match] result. + /// See [`next_back()`][ReverseSearcher::next_back]. + fn next_match_back(&mut self) -> Option<(H::Cursor, H::Cursor)> { + loop { + match self.next_back() { + SearchStep::Match(a, b) => return Some((a, b)), + SearchStep::Done => return None, + _ => continue, + } + } + } + + /// Finds the next [`Reject`][SearchStep::Reject] result. + /// See [`next_back()`][ReverseSearcher::next_back]. + fn next_reject_back(&mut self) -> Option<(H::Cursor, H::Cursor)> { + loop { + match self.next_back() { + SearchStep::Reject(a, b) => return Some((a, b)), + SearchStep::Done => return None, + _ => continue, + } + } + } +} + +/// A marker trait to express that a [`ReverseSearcher`] +/// can be used for a [`DoubleEndedIterator`] implementation. +/// +/// For this, the impl of [`Searcher`] and [`ReverseSearcher`] need +/// to follow these conditions: +/// +/// - All results of `next()` need to be identical +/// to the results of `next_back()` in reverse order. +/// - `next()` and `next_back()` need to behave as +/// the two ends of a range of values, that is they +/// can not "walk past each other". +/// +/// # Examples +/// +/// `char::Searcher` is a `DoubleEndedSearcher` because searching for a +/// [`char`] only requires looking at one at a time, which behaves the same +/// from both ends. +/// +/// `(&str)::Searcher` is not a `DoubleEndedSearcher` because +/// the pattern `"aa"` in the haystack `"aaa"` matches as either +/// `"[aa]a"` or `"a[aa]"`, depending from which side it is searched. +pub trait DoubleEndedSearcher: ReverseSearcher {} diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index bdb1f1d297af6..b323a0709530b 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -7,11 +7,10 @@ use crate::iter::{Copied, Filter, FusedIterator, Map, TrustedLen}; use crate::iter::{TrustedRandomAccess, TrustedRandomAccessNoCoerce}; use crate::ops::Try; use crate::option; +use crate::pattern::{DoubleEndedSearcher, ReverseSearcher, Pattern, Searcher}; use crate::slice::{self, Split as SliceSplit}; use super::from_utf8_unchecked; -use super::pattern::Pattern; -use super::pattern::{DoubleEndedSearcher, ReverseSearcher, Searcher}; use super::validations::{next_code_point, next_code_point_reverse}; use super::LinesAnyMap; use super::{BytesIsNotEmpty, UnsafeBytesToStr}; diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index d2ad655b42af8..02bb1de1c522d 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -13,11 +13,9 @@ mod iter; mod traits; mod validations; -use self::pattern::Pattern; -use self::pattern::{DoubleEndedSearcher, ReverseSearcher, Searcher}; - use crate::char::{self, EscapeDebugExtArgs}; use crate::mem; +use crate::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher}; use crate::slice::{self, SliceIndex}; pub mod pattern; diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs index b891fd9a4d337..f12f0c77f2207 100644 --- a/library/core/src/str/pattern.rs +++ b/library/core/src/str/pattern.rs @@ -1,36 +1,47 @@ -//! The string Pattern API. +//! [The Pattern API] implementation for searching in `&str`. //! -//! The Pattern API provides a generic mechanism for using different pattern -//! types when searching through a string. +//! The implementation provides generic mechanism for using different pattern +//! types when searching through a string. Although this API is unstable, it is +//! exposed via stable APIs on the [`str`] type. //! -//! For more details, see the traits [`Pattern`], [`Searcher`], -//! [`ReverseSearcher`], and [`DoubleEndedSearcher`]. +//! Depending on the type of the pattern, the behaviour of methods like +//! [`str::find`] and [`str::contains`] can change. The table below describes +//! some of those behaviours. //! -//! Although this API is unstable, it is exposed via stable APIs on the -//! [`str`] type. +//! | Pattern type | Match condition | +//! |--------------------------|-------------------------------------------| +//! | `&str` | is substring | +//! | `char` | is contained in string | +//! | `&[char]` | any char in slice is contained in string | +//! | `F: FnMut(char) -> bool` | `F` returns `true` for a char in string | +//! | `&&str` | is substring | +//! | `&String` | is substring | //! //! # Examples //! -//! [`Pattern`] is [implemented][pattern-impls] in the stable API for -//! [`&str`][`str`], [`char`], slices of [`char`], and functions and closures -//! implementing `FnMut(char) -> bool`. -//! //! ``` //! let s = "Can you find a needle in a haystack?"; //! //! // &str pattern //! assert_eq!(s.find("you"), Some(4)); +//! assert_eq!(s.find("thou"), None); +//! //! // char pattern //! assert_eq!(s.find('n'), Some(2)); -//! // array of chars pattern +//! assert_eq!(s.find('N'), None); +//! +//! // Array of chars pattern and slices thereof //! assert_eq!(s.find(&['a', 'e', 'i', 'o', 'u']), Some(1)); -//! // slice of chars pattern //! assert_eq!(s.find(&['a', 'e', 'i', 'o', 'u'][..]), Some(1)); -//! // closure pattern +//! assert_eq!(s.find(&['q', 'v', 'x']), None); +//! +//! // Predicate closure //! assert_eq!(s.find(|c: char| c.is_ascii_punctuation()), Some(35)); +//! assert_eq!(s.find(|c: char| c.is_lowercase()), Some(1)); +//! assert_eq!(s.find(|c: char| !c.is_ascii()), None); //! ``` //! -//! [pattern-impls]: Pattern#implementors +//! [The Pattern API]: crate::pattern #![unstable( feature = "pattern", @@ -41,333 +52,9 @@ use crate::cmp; use crate::cmp::Ordering; use crate::fmt; +use crate::pattern::{DoubleEndedSearcher, Haystack, Pattern, ReverseSearcher, Searcher, SearchStep}; use crate::slice::memchr; -// Pattern - -/// A pattern which can be matched against a [`Haystack`]. -/// -/// A `Pattern` expresses that the implementing type can be used as a pattern -/// for searching in a `H`. -/// -/// For example, character `'a'` and string `"aa"` are patterns that would match -/// at index `1` in the string `"baaaab"`. -/// -/// The trait itself acts as a builder for an associated -/// [`Searcher`] type, which does the actual work of finding -/// occurrences of the pattern in a string. -/// -/// Depending on the type of the pattern, the behaviour of methods like -/// [`str::find`] and [`str::contains`] can change. The table below describes -/// some of those behaviours. -/// -/// | Pattern type | Match condition | -/// |--------------------------|-------------------------------------------| -/// | `&str` | is substring | -/// | `char` | is contained in string | -/// | `&[char]` | any char in slice is contained in string | -/// | `F: FnMut(char) -> bool` | `F` returns `true` for a char in string | -/// | `&&str` | is substring | -/// | `&String` | is substring | -/// -/// # Examples -/// -/// ``` -/// // &str -/// assert_eq!("abaaa".find("ba"), Some(1)); -/// assert_eq!("abaaa".find("bac"), None); -/// -/// // char -/// assert_eq!("abaaa".find('a'), Some(0)); -/// assert_eq!("abaaa".find('b'), Some(1)); -/// assert_eq!("abaaa".find('c'), None); -/// -/// // &[char; N] -/// assert_eq!("ab".find(&['b', 'a']), Some(0)); -/// assert_eq!("abaaa".find(&['a', 'z']), Some(0)); -/// assert_eq!("abaaa".find(&['c', 'd']), None); -/// -/// // &[char] -/// assert_eq!("ab".find(&['b', 'a'][..]), Some(0)); -/// assert_eq!("abaaa".find(&['a', 'z'][..]), Some(0)); -/// assert_eq!("abaaa".find(&['c', 'd'][..]), None); -/// -/// // FnMut(char) -> bool -/// assert_eq!("abcdef_z".find(|ch| ch > 'd' && ch < 'y'), Some(4)); -/// assert_eq!("abcddd_z".find(|ch| ch > 'd' && ch < 'y'), None); -/// ``` -pub trait Pattern: Sized { - /// Associated searcher for this pattern - type Searcher: Searcher; - - /// Constructs the associated searcher from - /// `self` and the `haystack` to search in. - fn into_searcher(self, haystack: H) -> Self::Searcher; - - /// Checks whether the pattern matches anywhere in the haystack - #[inline] - fn is_contained_in(self, haystack: H) -> bool { - self.into_searcher(haystack).next_match().is_some() - } - - /// Checks whether the pattern matches at the front of the haystack - #[inline] - fn is_prefix_of(self, haystack: H) -> bool { - matches!( - self.into_searcher(haystack).next(), - SearchStep::Match(start, _) if start == haystack.cursor_at_front() - ) - } - - /// Checks whether the pattern matches at the back of the haystack - #[inline] - fn is_suffix_of(self, haystack: H) -> bool - where Self::Searcher: ReverseSearcher { - matches!( - self.into_searcher(haystack).next_back(), - SearchStep::Match(_, end) if end == haystack.cursor_at_back() - ) - } - - /// Removes the pattern from the front of haystack, if it matches. - #[inline] - fn strip_prefix_of(self, haystack: H) -> Option { - if let SearchStep::Match(start, end) = self.into_searcher(haystack).next() { - // This cannot be debug_assert_eq because StartCursor isn’t Debug. - debug_assert!(start == haystack.cursor_at_front(), - "The first search step from Searcher \ - must include the first character"); - // SAFETY: `Searcher` is known to return valid indices. - Some(unsafe { haystack.split_at_cursor_unchecked(end) }.1) - } else { - None - } - } - - /// Removes the pattern from the back of haystack, if it matches. - #[inline] - fn strip_suffix_of(self, haystack: H) -> Option - where Self::Searcher: ReverseSearcher { - if let SearchStep::Match(start, end) = self.into_searcher(haystack).next_back() { - // This cannot be debug_assert_eq because StartCursor isn’t Debug. - debug_assert!(end == haystack.cursor_at_back(), - "The first search step from ReverseSearcher \ - must include the last character"); - // SAFETY: `Searcher` is known to return valid indices. - Some(unsafe { haystack.split_at_cursor_unchecked(start) }.0) - } else { - None - } - } -} - -// Haystack - -/// A type which can be searched in using a [`Pattern`]. -/// -/// The trait is used in combination with [`Pattern`] trait to express a pattern -/// that can be used to search for elements in given haystack. -pub trait Haystack: Sized + Copy { - /// A cursor representing position in the haystack or its end. - type Cursor: Copy + PartialOrd; - - /// Returns cursor pointing at the beginning of the haystack. - fn cursor_at_front(&self) -> Self::Cursor; - - /// Returns cursor pointing at the end of the haystack. - fn cursor_at_back(&self) -> Self::Cursor; - - /// Splits haystack into two at given cursor position. - /// - /// Note that splitting a haystack isn’t guaranteed to preserve total - /// length. That is, each separate part’s length may be longer than length - /// of the original haystack. This property is preserved for `&str` and - /// `&[T]` haystacks but not for `&OsStr`. - unsafe fn split_at_cursor_unchecked(self, cursor: Self::Cursor) -> (Self, Self); -} - -// Searcher - -/// Result of calling [`Searcher::next()`] or [`ReverseSearcher::next_back()`]. -#[derive(Copy, Clone, Eq, PartialEq, Debug)] -pub enum SearchStep { - /// Expresses that a match of the pattern has been found at - /// `haystack[a..b]`. - Match(T, T), - /// Expresses that `haystack[a..b]` has been rejected as a possible match - /// of the pattern. - /// - /// Note that there might be more than one `Reject` between two `Match`es, - /// there is no requirement for them to be combined into one. - Reject(T, T), - /// Expresses that every byte of the haystack has been visited, ending - /// the iteration. - Done, -} - -/// A searcher for a string pattern. -/// -/// This trait provides methods for searching for non-overlapping -/// matches of a pattern starting from the front (left) of a string. -/// -/// It will be implemented by associated `Searcher` -/// types of the [`Pattern`] trait. -/// -/// The trait is marked unsafe because the indices returned by the -/// [`next()`][Searcher::next] methods are required to lie on valid utf8 -/// boundaries in the haystack. This enables consumers of this trait to -/// slice the haystack without additional runtime checks. -pub unsafe trait Searcher { - /// Getter for the underlying string to be searched in - /// - /// Will always return the same [`&str`][str]. - fn haystack(&self) -> H; - - /// Performs the next search step starting from the front. - /// - /// - Returns [`Match(a, b)`][SearchStep::Match] if `haystack[a..b]` matches - /// the pattern. - /// - Returns [`Reject(a, b)`][SearchStep::Reject] if `haystack[a..b]` can - /// not match the pattern, even partially. - /// - Returns [`Done`][SearchStep::Done] if every byte of the haystack has - /// been visited. - /// - /// The stream of [`Match`][SearchStep::Match] and - /// [`Reject`][SearchStep::Reject] values up to a [`Done`][SearchStep::Done] - /// will contain index ranges that are adjacent, non-overlapping, - /// covering the whole haystack, and laying on utf8 boundaries. - /// - /// A [`Match`][SearchStep::Match] result needs to contain the whole matched - /// pattern, however [`Reject`][SearchStep::Reject] results may be split up - /// into arbitrary many adjacent fragments. Both ranges may have zero length. - /// - /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"` - /// might produce the stream - /// `[Reject(0, 1), Reject(1, 2), Match(2, 5), Reject(5, 8)]` - fn next(&mut self) -> SearchStep; - - /// Finds the next [`Match`][SearchStep::Match] result. See [`next()`][Searcher::next]. - /// - /// Unlike [`next()`][Searcher::next], there is no guarantee that the returned ranges - /// of this and [`next_reject`][Searcher::next_reject] will overlap. This will return - /// `(start_match, end_match)`, where start_match is the index of where - /// the match begins, and end_match is the index after the end of the match. - #[inline] - fn next_match(&mut self) -> Option<(H::Cursor, H::Cursor)> { - loop { - match self.next() { - SearchStep::Match(a, b) => return Some((a, b)), - SearchStep::Done => return None, - _ => continue, - } - } - } - - /// Finds the next [`Reject`][SearchStep::Reject] result. See [`next()`][Searcher::next] - /// and [`next_match()`][Searcher::next_match]. - /// - /// Unlike [`next()`][Searcher::next], there is no guarantee that the returned ranges - /// of this and [`next_match`][Searcher::next_match] will overlap. - #[inline] - fn next_reject(&mut self) -> Option<(H::Cursor, H::Cursor)> { - loop { - match self.next() { - SearchStep::Reject(a, b) => return Some((a, b)), - SearchStep::Done => return None, - _ => continue, - } - } - } -} - -/// A reverse searcher for a string pattern. -/// -/// This trait provides methods for searching for non-overlapping -/// matches of a pattern starting from the back (right) of a string. -/// -/// It will be implemented by associated [`Searcher`] -/// types of the [`Pattern`] trait if the pattern supports searching -/// for it from the back. -/// -/// The index ranges returned by this trait are not required -/// to exactly match those of the forward search in reverse. -/// -/// For the reason why this trait is marked unsafe, see the -/// parent trait [`Searcher`]. -pub unsafe trait ReverseSearcher: Searcher { - /// Performs the next search step starting from the back. - /// - /// - Returns [`Match(a, b)`][SearchStep::Match] if `haystack[a..b]` - /// matches the pattern. - /// - Returns [`Reject(a, b)`][SearchStep::Reject] if `haystack[a..b]` - /// can not match the pattern, even partially. - /// - Returns [`Done`][SearchStep::Done] if every byte of the haystack - /// has been visited - /// - /// The stream of [`Match`][SearchStep::Match] and - /// [`Reject`][SearchStep::Reject] values up to a [`Done`][SearchStep::Done] - /// will contain index ranges that are adjacent, non-overlapping, - /// covering the whole haystack, and laying on utf8 boundaries. - /// - /// A [`Match`][SearchStep::Match] result needs to contain the whole matched - /// pattern, however [`Reject`][SearchStep::Reject] results may be split up - /// into arbitrary many adjacent fragments. Both ranges may have zero length. - /// - /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"` - /// might produce the stream - /// `[Reject(7, 8), Match(4, 7), Reject(1, 4), Reject(0, 1)]`. - fn next_back(&mut self) -> SearchStep; - - /// Finds the next [`Match`][SearchStep::Match] result. - /// See [`next_back()`][ReverseSearcher::next_back]. - #[inline] - fn next_match_back(&mut self) -> Option<(H::Cursor, H::Cursor)> { - loop { - match self.next_back() { - SearchStep::Match(a, b) => return Some((a, b)), - SearchStep::Done => return None, - _ => continue, - } - } - } - - /// Finds the next [`Reject`][SearchStep::Reject] result. - /// See [`next_back()`][ReverseSearcher::next_back]. - #[inline] - fn next_reject_back(&mut self) -> Option<(H::Cursor, H::Cursor)> { - loop { - match self.next_back() { - SearchStep::Reject(a, b) => return Some((a, b)), - SearchStep::Done => return None, - _ => continue, - } - } - } -} - -/// A marker trait to express that a [`ReverseSearcher`] -/// can be used for a [`DoubleEndedIterator`] implementation. -/// -/// For this, the impl of [`Searcher`] and [`ReverseSearcher`] need -/// to follow these conditions: -/// -/// - All results of `next()` need to be identical -/// to the results of `next_back()` in reverse order. -/// - `next()` and `next_back()` need to behave as -/// the two ends of a range of values, that is they -/// can not "walk past each other". -/// -/// # Examples -/// -/// `char::Searcher` is a `DoubleEndedSearcher` because searching for a -/// [`char`] only requires looking at one at a time, which behaves the same -/// from both ends. -/// -/// `(&str)::Searcher` is not a `DoubleEndedSearcher` because -/// the pattern `"aa"` in the haystack `"aaa"` matches as either -/// `"[aa]a"` or `"a[aa]"`, depending from which side it is searched. -pub trait DoubleEndedSearcher: ReverseSearcher {} - ///////////////////////////////////////////////////////////////////////////// // Impl for Haystack ///////////////////////////////////////////////////////////////////////////// diff --git a/library/core/tests/pattern.rs b/library/core/tests/pattern.rs index d4bec996d89a1..0e943bd80ec7f 100644 --- a/library/core/tests/pattern.rs +++ b/library/core/tests/pattern.rs @@ -1,4 +1,4 @@ -use std::str::pattern::*; +use std::pattern::*; // This macro makes it easier to write // tests that do a series of iterations diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index 363a266717467..e247185e1bf65 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -478,6 +478,8 @@ pub use core::mem; pub use core::ops; #[stable(feature = "rust1", since = "1.0.0")] pub use core::option; +#[unstable(feature = "pattern", issue = "27721")] +pub use core::pattern; #[stable(feature = "pin", since = "1.33.0")] pub use core::pin; #[stable(feature = "rust1", since = "1.0.0")]