diff --git a/Cargo.toml b/Cargo.toml index 99f0a41..c7e6df2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "matchers" -version = "0.1.0" +version = "0.2.0" authors = ["Eliza Weisman "] edition = "2018" license = "MIT" @@ -18,4 +18,4 @@ keywords = ["regex", "match", "pattern", "streaming"] maintenance = { status = "experimental" } [dependencies] -regex-automata = "0.1" +regex-automata = { version = "0.4", default-features = false, features = ["syntax", "dfa-build", "dfa-search"] } diff --git a/src/lib.rs b/src/lib.rs index 2720a1a..b045a9e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,38 +24,33 @@ //! //! [`regex`]: https://crates.io/crates/regex //! [`regex-automata`]: https://crates.io/crates/regex-automata -//! [syntax]: https://docs.rs/regex-automata/0.1.7/regex_automata/#syntax +//! [syntax]: https://docs.rs/regex-automata/0.4.3/regex_automata/#syntax -use regex_automata::{dense, DenseDFA, SparseDFA, StateID, DFA}; -use std::{fmt, io, marker::PhantomData, str::FromStr}; +use std::{fmt, io, str::FromStr}; -pub use regex_automata::Error; +pub use regex_automata::dfa::dense::BuildError; +use regex_automata::dfa::dense::DFA; +use regex_automata::dfa::Automaton; +use regex_automata::util::primitives::StateID; +use regex_automata::Anchored; /// A compiled match pattern that can match multipe inputs, or return a /// [`Matcher`] that matches a single input. /// /// [`Matcher`]: ../struct.Matcher.html #[derive(Debug, Clone)] -pub struct Pattern, S>> -where - S: StateID, - A: DFA, -{ +pub struct Pattern>> { automaton: A, + anchored: bool, } /// A reference to a [`Pattern`] that matches a single input. /// /// [`Pattern`]: ../struct.Pattern.html #[derive(Debug, Clone)] -pub struct Matcher<'a, S = usize, A = DenseDFA<&'a [S], S>> -where - S: StateID, - A: DFA, -{ +pub struct Matcher>> { automaton: A, - state: S, - _lt: PhantomData<&'a ()>, + state: StateID, } // === impl Pattern === @@ -86,9 +81,12 @@ impl Pattern { /// // sequence when it's followed by non-matching characters: /// assert!(pattern.display_matches(&"hello world! aaaaab")); /// ``` - pub fn new(pattern: &str) -> Result { - let automaton = DenseDFA::new(pattern)?; - Ok(Pattern { automaton }) + pub fn new(pattern: &str) -> Result { + let automaton = DFA::new(pattern)?; + Ok(Pattern { + automaton, + anchored: false, + }) } /// Returns a new `Pattern` anchored at the beginning of the input stream, @@ -120,25 +118,40 @@ impl Pattern { /// .expect("regex is not invalid"); /// assert!(pattern2.display_matches(&"hello world! aaaaab")); /// ``` - pub fn new_anchored(pattern: &str) -> Result { - let automaton = dense::Builder::new().anchored(true).build(pattern)?; - Ok(Pattern { automaton }) + pub fn new_anchored(pattern: &str) -> Result { + let automaton = DFA::new(pattern)?; + Ok(Pattern { + automaton, + anchored: true, + }) } } impl FromStr for Pattern { - type Err = Error; + type Err = BuildError; fn from_str(s: &str) -> Result { Self::new(s) } } -impl Pattern -where - S: StateID, - A: DFA, - Self: for<'a> ToMatcher<'a, S>, -{ +impl Pattern { + /// Obtains a `matcher` for this pattern. + /// + /// This conversion is useful when wanting to incrementally feed input (via + /// `io::Write`/`fmt::Write` to a matcher). Otherwise, the convenience methods on Pattern + /// suffice. + pub fn matcher(&self) -> Matcher<&'_ A> { + let config = regex_automata::util::start::Config::new().anchored(if self.anchored { + Anchored::Yes + } else { + Anchored::No + }); + Matcher { + automaton: &self.automaton, + state: self.automaton.start_state(&config).unwrap(), + } + } + /// Returns `true` if this pattern matches the given string. #[inline] pub fn matches(&self, s: &impl AsRef) -> bool { @@ -220,35 +233,24 @@ where // === impl Matcher === -impl<'a, S, A> Matcher<'a, S, A> +impl Matcher where - S: StateID, - A: DFA, + A: Automaton, { - fn new(automaton: A) -> Self { - let state = automaton.start_state(); - Self { - automaton, - state, - _lt: PhantomData, - } - } - #[inline] fn advance(&mut self, input: u8) { - self.state = unsafe { - // It's safe to call `next_state_unchecked` since the matcher may - // only be constructed by a `Pattern`, which, in turn,can only be - // constructed with a valid DFA. - self.automaton.next_state_unchecked(self.state, input) - }; + // It's safe to call `next_state_unchecked` since the matcher may + // only be constructed by a `Pattern`, which, in turn, can only be + // constructed with a valid DFA. + self.state = unsafe { self.automaton.next_state_unchecked(self.state, input) }; } /// Returns `true` if this `Matcher` has matched any input that has been /// provided. #[inline] pub fn is_matched(&self) -> bool { - self.automaton.is_match_state(self.state) + let eoi_state = self.automaton.next_eoi_state(self.state); + self.automaton.is_match_state(eoi_state) } /// Returns `true` if this pattern matches the formatted output of the given @@ -293,11 +295,7 @@ where } } -impl<'a, S, A> fmt::Write for Matcher<'a, S, A> -where - S: StateID, - A: DFA, -{ +impl fmt::Write for Matcher { fn write_str(&mut self, s: &str) -> fmt::Result { for &byte in s.as_bytes() { self.advance(byte); @@ -309,11 +307,7 @@ where } } -impl<'a, S, A> io::Write for Matcher<'a, S, A> -where - S: StateID, - A: DFA, -{ +impl io::Write for Matcher { fn write(&mut self, bytes: &[u8]) -> Result { let mut i = 0; for &byte in bytes { @@ -331,43 +325,6 @@ where } } -pub trait ToMatcher<'a, S> -where - Self: crate::sealed::Sealed, - S: StateID + 'a, -{ - type Automaton: DFA; - fn matcher(&'a self) -> Matcher<'a, S, Self::Automaton>; -} - -impl crate::sealed::Sealed for Pattern, S>> where S: StateID {} - -impl<'a, S> ToMatcher<'a, S> for Pattern, S>> -where - S: StateID + 'a, -{ - type Automaton = DenseDFA<&'a [S], S>; - fn matcher(&'a self) -> Matcher<'a, S, Self::Automaton> { - Matcher::new(self.automaton.as_ref()) - } -} - -impl<'a, S> ToMatcher<'a, S> for Pattern, S>> -where - S: StateID + 'a, -{ - type Automaton = SparseDFA<&'a [u8], S>; - fn matcher(&'a self) -> Matcher<'a, S, Self::Automaton> { - Matcher::new(self.automaton.as_ref()) - } -} - -impl crate::sealed::Sealed for Pattern, S>> where S: StateID {} - -mod sealed { - pub trait Sealed {} -} - #[cfg(test)] mod test { use super::*; @@ -409,7 +366,7 @@ mod test { } } - fn test_debug_matches(new_pattern: impl Fn(&str) -> Result) { + fn test_debug_matches(new_pattern: impl Fn(&str) -> Result) { let pat = new_pattern("hello world").unwrap(); assert!(pat.debug_matches(&Str::hello_world())); @@ -420,7 +377,7 @@ mod test { assert_eq!(pat.debug_matches(&Str::hello_world()), false); } - fn test_display_matches(new_pattern: impl Fn(&str) -> Result) { + fn test_display_matches(new_pattern: impl Fn(&str) -> Result) { let pat = new_pattern("hello world").unwrap(); assert!(pat.display_matches(&Str::hello_world())); @@ -431,7 +388,7 @@ mod test { assert_eq!(pat.display_matches(&Str::hello_world()), false); } - fn test_reader_matches(new_pattern: impl Fn(&str) -> Result) { + fn test_reader_matches(new_pattern: impl Fn(&str) -> Result) { let pat = new_pattern("hello world").unwrap(); assert!(pat .read_matches(Str::hello_world().to_reader()) @@ -450,7 +407,7 @@ mod test { ); } - fn test_debug_rep_patterns(new_pattern: impl Fn(&str) -> Result) { + fn test_debug_rep_patterns(new_pattern: impl Fn(&str) -> Result) { let pat = new_pattern("a+b").unwrap(); assert!(pat.debug_matches(&Str::new("ab"))); assert!(pat.debug_matches(&Str::new("aaaab")));