Skip to content

Commit

Permalink
Update to regex-automata v0.4
Browse files Browse the repository at this point in the history
  • Loading branch information
Mark-Simulacrum committed Jan 6, 2024
1 parent 6e5f38d commit e2ac72a
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 101 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "matchers"
version = "0.1.0"
version = "0.2.0"
authors = ["Eliza Weisman <[email protected]>"]
edition = "2018"
license = "MIT"
Expand All @@ -18,4 +18,4 @@ keywords = ["regex", "match", "pattern", "streaming"]
maintenance = { status = "experimental" }

[dependencies]
regex-automata = "0.1"
regex-automata = { version = "0.4", default-features = false, features = ["syntax", "dfa-build", "dfa-search"] }
155 changes: 56 additions & 99 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,38 +24,33 @@
//!
//! [`regex`]: https://crates.io/crates/regex
//! [`regex-automata`]: https://crates.io/crates/regex-automata
//! [syntax]: https://docs.rs/regex-automata/0.1.7/regex_automata/#syntax
//! [syntax]: https://docs.rs/regex-automata/0.4.3/regex_automata/#syntax
use regex_automata::{dense, DenseDFA, SparseDFA, StateID, DFA};
use std::{fmt, io, marker::PhantomData, str::FromStr};
use std::{fmt, io, str::FromStr};

pub use regex_automata::Error;
pub use regex_automata::dfa::dense::BuildError;
use regex_automata::dfa::dense::DFA;
use regex_automata::dfa::Automaton;
use regex_automata::util::primitives::StateID;
use regex_automata::Anchored;

/// A compiled match pattern that can match multipe inputs, or return a
/// [`Matcher`] that matches a single input.
///
/// [`Matcher`]: ../struct.Matcher.html
#[derive(Debug, Clone)]
pub struct Pattern<S = usize, A = DenseDFA<Vec<S>, S>>
where
S: StateID,
A: DFA<ID = S>,
{
pub struct Pattern<A = DFA<Vec<u32>>> {
automaton: A,
anchored: bool,
}

/// A reference to a [`Pattern`] that matches a single input.
///
/// [`Pattern`]: ../struct.Pattern.html
#[derive(Debug, Clone)]
pub struct Matcher<'a, S = usize, A = DenseDFA<&'a [S], S>>
where
S: StateID,
A: DFA<ID = S>,
{
pub struct Matcher<A = DFA<Vec<u32>>> {
automaton: A,
state: S,
_lt: PhantomData<&'a ()>,
state: StateID,
}

// === impl Pattern ===
Expand Down Expand Up @@ -86,9 +81,12 @@ impl Pattern {
/// // sequence when it's followed by non-matching characters:
/// assert!(pattern.display_matches(&"hello world! aaaaab"));
/// ```
pub fn new(pattern: &str) -> Result<Self, Error> {
let automaton = DenseDFA::new(pattern)?;
Ok(Pattern { automaton })
pub fn new(pattern: &str) -> Result<Self, BuildError> {
let automaton = DFA::new(pattern)?;
Ok(Pattern {
automaton,
anchored: false,
})
}

/// Returns a new `Pattern` anchored at the beginning of the input stream,
Expand Down Expand Up @@ -120,25 +118,40 @@ impl Pattern {
/// .expect("regex is not invalid");
/// assert!(pattern2.display_matches(&"hello world! aaaaab"));
/// ```
pub fn new_anchored(pattern: &str) -> Result<Self, Error> {
let automaton = dense::Builder::new().anchored(true).build(pattern)?;
Ok(Pattern { automaton })
pub fn new_anchored(pattern: &str) -> Result<Self, BuildError> {
let automaton = DFA::new(pattern)?;
Ok(Pattern {
automaton,
anchored: true,
})
}
}

impl FromStr for Pattern {
type Err = Error;
type Err = BuildError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::new(s)
}
}

impl<S, A> Pattern<S, A>
where
S: StateID,
A: DFA<ID = S>,
Self: for<'a> ToMatcher<'a, S>,
{
impl<A: Automaton> Pattern<A> {
/// Obtains a `matcher` for this pattern.
///
/// This conversion is useful when wanting to incrementally feed input (via
/// `io::Write`/`fmt::Write` to a matcher). Otherwise, the convenience methods on Pattern
/// suffice.
pub fn matcher(&self) -> Matcher<&'_ A> {
let config = regex_automata::util::start::Config::new().anchored(if self.anchored {
Anchored::Yes
} else {
Anchored::No
});
Matcher {
automaton: &self.automaton,
state: self.automaton.start_state(&config).unwrap(),
}
}

/// Returns `true` if this pattern matches the given string.
#[inline]
pub fn matches(&self, s: &impl AsRef<str>) -> bool {
Expand Down Expand Up @@ -220,35 +233,24 @@ where

// === impl Matcher ===

impl<'a, S, A> Matcher<'a, S, A>
impl<A> Matcher<A>
where
S: StateID,
A: DFA<ID = S>,
A: Automaton,
{
fn new(automaton: A) -> Self {
let state = automaton.start_state();
Self {
automaton,
state,
_lt: PhantomData,
}
}

#[inline]
fn advance(&mut self, input: u8) {
self.state = unsafe {
// It's safe to call `next_state_unchecked` since the matcher may
// only be constructed by a `Pattern`, which, in turn,can only be
// constructed with a valid DFA.
self.automaton.next_state_unchecked(self.state, input)
};
// It's safe to call `next_state_unchecked` since the matcher may
// only be constructed by a `Pattern`, which, in turn, can only be
// constructed with a valid DFA.
self.state = unsafe { self.automaton.next_state_unchecked(self.state, input) };
}

/// Returns `true` if this `Matcher` has matched any input that has been
/// provided.
#[inline]
pub fn is_matched(&self) -> bool {
self.automaton.is_match_state(self.state)
let eoi_state = self.automaton.next_eoi_state(self.state);
self.automaton.is_match_state(eoi_state)
}

/// Returns `true` if this pattern matches the formatted output of the given
Expand Down Expand Up @@ -293,11 +295,7 @@ where
}
}

impl<'a, S, A> fmt::Write for Matcher<'a, S, A>
where
S: StateID,
A: DFA<ID = S>,
{
impl<A: Automaton> fmt::Write for Matcher<A> {
fn write_str(&mut self, s: &str) -> fmt::Result {
for &byte in s.as_bytes() {
self.advance(byte);
Expand All @@ -309,11 +307,7 @@ where
}
}

impl<'a, S, A> io::Write for Matcher<'a, S, A>
where
S: StateID,
A: DFA<ID = S>,
{
impl<A: Automaton> io::Write for Matcher<A> {
fn write(&mut self, bytes: &[u8]) -> Result<usize, io::Error> {
let mut i = 0;
for &byte in bytes {
Expand All @@ -331,43 +325,6 @@ where
}
}

pub trait ToMatcher<'a, S>
where
Self: crate::sealed::Sealed,
S: StateID + 'a,
{
type Automaton: DFA<ID = S>;
fn matcher(&'a self) -> Matcher<'a, S, Self::Automaton>;
}

impl<S> crate::sealed::Sealed for Pattern<S, DenseDFA<Vec<S>, S>> where S: StateID {}

impl<'a, S> ToMatcher<'a, S> for Pattern<S, DenseDFA<Vec<S>, S>>
where
S: StateID + 'a,
{
type Automaton = DenseDFA<&'a [S], S>;
fn matcher(&'a self) -> Matcher<'a, S, Self::Automaton> {
Matcher::new(self.automaton.as_ref())
}
}

impl<'a, S> ToMatcher<'a, S> for Pattern<S, SparseDFA<Vec<u8>, S>>
where
S: StateID + 'a,
{
type Automaton = SparseDFA<&'a [u8], S>;
fn matcher(&'a self) -> Matcher<'a, S, Self::Automaton> {
Matcher::new(self.automaton.as_ref())
}
}

impl<S> crate::sealed::Sealed for Pattern<S, SparseDFA<Vec<u8>, S>> where S: StateID {}

mod sealed {
pub trait Sealed {}
}

#[cfg(test)]
mod test {
use super::*;
Expand Down Expand Up @@ -409,7 +366,7 @@ mod test {
}
}

fn test_debug_matches(new_pattern: impl Fn(&str) -> Result<Pattern, Error>) {
fn test_debug_matches(new_pattern: impl Fn(&str) -> Result<Pattern, BuildError>) {
let pat = new_pattern("hello world").unwrap();
assert!(pat.debug_matches(&Str::hello_world()));

Expand All @@ -420,7 +377,7 @@ mod test {
assert_eq!(pat.debug_matches(&Str::hello_world()), false);
}

fn test_display_matches(new_pattern: impl Fn(&str) -> Result<Pattern, Error>) {
fn test_display_matches(new_pattern: impl Fn(&str) -> Result<Pattern, BuildError>) {
let pat = new_pattern("hello world").unwrap();
assert!(pat.display_matches(&Str::hello_world()));

Expand All @@ -431,7 +388,7 @@ mod test {
assert_eq!(pat.display_matches(&Str::hello_world()), false);
}

fn test_reader_matches(new_pattern: impl Fn(&str) -> Result<Pattern, Error>) {
fn test_reader_matches(new_pattern: impl Fn(&str) -> Result<Pattern, BuildError>) {
let pat = new_pattern("hello world").unwrap();
assert!(pat
.read_matches(Str::hello_world().to_reader())
Expand All @@ -450,7 +407,7 @@ mod test {
);
}

fn test_debug_rep_patterns(new_pattern: impl Fn(&str) -> Result<Pattern, Error>) {
fn test_debug_rep_patterns(new_pattern: impl Fn(&str) -> Result<Pattern, BuildError>) {
let pat = new_pattern("a+b").unwrap();
assert!(pat.debug_matches(&Str::new("ab")));
assert!(pat.debug_matches(&Str::new("aaaab")));
Expand Down

0 comments on commit e2ac72a

Please sign in to comment.