Skip to content

Commit

Permalink
Update to regex-automata v0.4 (#5)
Browse files Browse the repository at this point in the history
This updates to use regex-automata v0.4, bumping the crate to 0.2 since
regex-automata is a public dependency of the crate. All tests in this
crate continue to pass.

This removes the `ToMatcher` trait in favor of directly using the
upstream Automaton trait, it didn't seem like that trait was really
being used. As of the current PR, there's no way to construct Patterns
with the sparse DFA (this seems to have been true of the prior version
of the crate too), but all the functionality is generic over
[regex_automata::Automaton](https://docs.rs/regex-automata/latest/regex_automata/dfa/trait.Automaton.html),
so just adding constructs for the lazy DFA would be enough to make it
possible to use.

This also reduces the enabled features on regex-automata to not include
unicode features. It's not obvious that downstream crates (in particular
tracing-subscriber) want to pay the binary size cost for having those
enabled in env-filter regexes, and this leaves the choice up to them.

Closes #3
  • Loading branch information
Mark-Simulacrum authored Jul 1, 2024
1 parent 6e5f38d commit 92217ad
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 102 deletions.
7 changes: 6 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,12 @@ jobs:
- name: Run tests
uses: actions-rs/cargo@v1
with:
command: build
command: test
- name: Run tests (unicode)
uses: actions-rs/cargo@v1
with:
command: test
args: --features unicode

clippy_check:
runs-on: ubuntu-latest
Expand Down
7 changes: 5 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "matchers"
version = "0.1.0"
version = "0.2.0"
authors = ["Eliza Weisman <[email protected]>"]
edition = "2018"
license = "MIT"
Expand All @@ -18,4 +18,7 @@ keywords = ["regex", "match", "pattern", "streaming"]
maintenance = { status = "experimental" }

[dependencies]
regex-automata = "0.1"
regex-automata = { version = "0.4", default-features = false, features = ["syntax", "dfa-build", "dfa-search"] }

[features]
unicode = ["regex-automata/unicode"]
151 changes: 52 additions & 99 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,38 +24,33 @@
//!
//! [`regex`]: https://crates.io/crates/regex
//! [`regex-automata`]: https://crates.io/crates/regex-automata
//! [syntax]: https://docs.rs/regex-automata/0.1.7/regex_automata/#syntax
//! [syntax]: https://docs.rs/regex-automata/0.4.3/regex_automata/#syntax
use regex_automata::{dense, DenseDFA, SparseDFA, StateID, DFA};
use std::{fmt, io, marker::PhantomData, str::FromStr};
use std::{fmt, io, str::FromStr};

pub use regex_automata::Error;
pub use regex_automata::dfa::dense::BuildError;
use regex_automata::dfa::dense::DFA;
use regex_automata::dfa::Automaton;
use regex_automata::util::primitives::StateID;
use regex_automata::Anchored;

/// A compiled match pattern that can match multipe inputs, or return a
/// [`Matcher`] that matches a single input.
///
/// [`Matcher`]: ../struct.Matcher.html
#[derive(Debug, Clone)]
pub struct Pattern<S = usize, A = DenseDFA<Vec<S>, S>>
where
S: StateID,
A: DFA<ID = S>,
{
pub struct Pattern<A = DFA<Vec<u32>>> {
automaton: A,
anchored: Anchored,
}

/// A reference to a [`Pattern`] that matches a single input.
///
/// [`Pattern`]: ../struct.Pattern.html
#[derive(Debug, Clone)]
pub struct Matcher<'a, S = usize, A = DenseDFA<&'a [S], S>>
where
S: StateID,
A: DFA<ID = S>,
{
pub struct Matcher<A = DFA<Vec<u32>>> {
automaton: A,
state: S,
_lt: PhantomData<&'a ()>,
state: StateID,
}

// === impl Pattern ===
Expand Down Expand Up @@ -86,9 +81,12 @@ impl Pattern {
/// // sequence when it's followed by non-matching characters:
/// assert!(pattern.display_matches(&"hello world! aaaaab"));
/// ```
pub fn new(pattern: &str) -> Result<Self, Error> {
let automaton = DenseDFA::new(pattern)?;
Ok(Pattern { automaton })
pub fn new(pattern: &str) -> Result<Self, BuildError> {

Check warning on line 84 in src/lib.rs

View workflow job for this annotation

GitHub Actions / clippy

the `Err`-variant returned from this function is very large

warning: the `Err`-variant returned from this function is very large --> src/lib.rs:84:34 | 84 | pub fn new(pattern: &str) -> Result<Self, BuildError> { | ^^^^^^^^^^^^^^^^^^^^^^^^ the `Err`-variant is at least 128 bytes | = help: try reducing the size of `regex_automata::dfa::dense::BuildError`, for example by boxing large elements or replacing it with `Box<regex_automata::dfa::dense::BuildError>` = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#result_large_err = note: `#[warn(clippy::result_large_err)]` on by default
let automaton = DFA::new(pattern)?;
Ok(Pattern {
automaton,
anchored: Anchored::No,
})
}

/// Returns a new `Pattern` anchored at the beginning of the input stream,
Expand Down Expand Up @@ -120,25 +118,36 @@ impl Pattern {
/// .expect("regex is not invalid");
/// assert!(pattern2.display_matches(&"hello world! aaaaab"));
/// ```
pub fn new_anchored(pattern: &str) -> Result<Self, Error> {
let automaton = dense::Builder::new().anchored(true).build(pattern)?;
Ok(Pattern { automaton })
pub fn new_anchored(pattern: &str) -> Result<Self, BuildError> {

Check warning on line 121 in src/lib.rs

View workflow job for this annotation

GitHub Actions / clippy

the `Err`-variant returned from this function is very large

warning: the `Err`-variant returned from this function is very large --> src/lib.rs:121:43 | 121 | pub fn new_anchored(pattern: &str) -> Result<Self, BuildError> { | ^^^^^^^^^^^^^^^^^^^^^^^^ the `Err`-variant is at least 128 bytes | = help: try reducing the size of `regex_automata::dfa::dense::BuildError`, for example by boxing large elements or replacing it with `Box<regex_automata::dfa::dense::BuildError>` = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#result_large_err
let automaton = DFA::new(pattern)?;
Ok(Pattern {
automaton,
anchored: Anchored::Yes,
})
}
}

impl FromStr for Pattern {
type Err = Error;
type Err = BuildError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::new(s)
}
}

impl<S, A> Pattern<S, A>
where
S: StateID,
A: DFA<ID = S>,
Self: for<'a> ToMatcher<'a, S>,
{
impl<A: Automaton> Pattern<A> {
/// Obtains a `matcher` for this pattern.
///
/// This conversion is useful when wanting to incrementally feed input (via
/// `io::Write`/`fmt::Write` to a matcher). Otherwise, the convenience methods on Pattern
/// suffice.
pub fn matcher(&self) -> Matcher<&'_ A> {
let config = regex_automata::util::start::Config::new().anchored(self.anchored);
Matcher {
automaton: &self.automaton,
state: self.automaton.start_state(&config).unwrap(),
}
}

/// Returns `true` if this pattern matches the given string.
#[inline]
pub fn matches(&self, s: &impl AsRef<str>) -> bool {
Expand Down Expand Up @@ -220,35 +229,24 @@ where

// === impl Matcher ===

impl<'a, S, A> Matcher<'a, S, A>
impl<A> Matcher<A>
where
S: StateID,
A: DFA<ID = S>,
A: Automaton,
{
fn new(automaton: A) -> Self {
let state = automaton.start_state();
Self {
automaton,
state,
_lt: PhantomData,
}
}

#[inline]
fn advance(&mut self, input: u8) {
self.state = unsafe {
// It's safe to call `next_state_unchecked` since the matcher may
// only be constructed by a `Pattern`, which, in turn,can only be
// constructed with a valid DFA.
self.automaton.next_state_unchecked(self.state, input)
};
// It's safe to call `next_state_unchecked` since the matcher may
// only be constructed by a `Pattern`, which, in turn, can only be
// constructed with a valid DFA.
self.state = unsafe { self.automaton.next_state_unchecked(self.state, input) };
}

/// Returns `true` if this `Matcher` has matched any input that has been
/// provided.
#[inline]
pub fn is_matched(&self) -> bool {
self.automaton.is_match_state(self.state)
let eoi_state = self.automaton.next_eoi_state(self.state);
self.automaton.is_match_state(eoi_state)
}

/// Returns `true` if this pattern matches the formatted output of the given
Expand Down Expand Up @@ -293,11 +291,7 @@ where
}
}

impl<'a, S, A> fmt::Write for Matcher<'a, S, A>
where
S: StateID,
A: DFA<ID = S>,
{
impl<A: Automaton> fmt::Write for Matcher<A> {
fn write_str(&mut self, s: &str) -> fmt::Result {
for &byte in s.as_bytes() {
self.advance(byte);
Expand All @@ -309,11 +303,7 @@ where
}
}

impl<'a, S, A> io::Write for Matcher<'a, S, A>
where
S: StateID,
A: DFA<ID = S>,
{
impl<A: Automaton> io::Write for Matcher<A> {
fn write(&mut self, bytes: &[u8]) -> Result<usize, io::Error> {
let mut i = 0;
for &byte in bytes {
Expand All @@ -331,43 +321,6 @@ where
}
}

pub trait ToMatcher<'a, S>
where
Self: crate::sealed::Sealed,
S: StateID + 'a,
{
type Automaton: DFA<ID = S>;
fn matcher(&'a self) -> Matcher<'a, S, Self::Automaton>;
}

impl<S> crate::sealed::Sealed for Pattern<S, DenseDFA<Vec<S>, S>> where S: StateID {}

impl<'a, S> ToMatcher<'a, S> for Pattern<S, DenseDFA<Vec<S>, S>>
where
S: StateID + 'a,
{
type Automaton = DenseDFA<&'a [S], S>;
fn matcher(&'a self) -> Matcher<'a, S, Self::Automaton> {
Matcher::new(self.automaton.as_ref())
}
}

impl<'a, S> ToMatcher<'a, S> for Pattern<S, SparseDFA<Vec<u8>, S>>
where
S: StateID + 'a,
{
type Automaton = SparseDFA<&'a [u8], S>;
fn matcher(&'a self) -> Matcher<'a, S, Self::Automaton> {
Matcher::new(self.automaton.as_ref())
}
}

impl<S> crate::sealed::Sealed for Pattern<S, SparseDFA<Vec<u8>, S>> where S: StateID {}

mod sealed {
pub trait Sealed {}
}

#[cfg(test)]
mod test {
use super::*;
Expand Down Expand Up @@ -409,7 +362,7 @@ mod test {
}
}

fn test_debug_matches(new_pattern: impl Fn(&str) -> Result<Pattern, Error>) {
fn test_debug_matches(new_pattern: impl Fn(&str) -> Result<Pattern, BuildError>) {
let pat = new_pattern("hello world").unwrap();
assert!(pat.debug_matches(&Str::hello_world()));

Expand All @@ -420,7 +373,7 @@ mod test {
assert_eq!(pat.debug_matches(&Str::hello_world()), false);
}

fn test_display_matches(new_pattern: impl Fn(&str) -> Result<Pattern, Error>) {
fn test_display_matches(new_pattern: impl Fn(&str) -> Result<Pattern, BuildError>) {
let pat = new_pattern("hello world").unwrap();
assert!(pat.display_matches(&Str::hello_world()));

Expand All @@ -431,7 +384,7 @@ mod test {
assert_eq!(pat.display_matches(&Str::hello_world()), false);
}

fn test_reader_matches(new_pattern: impl Fn(&str) -> Result<Pattern, Error>) {
fn test_reader_matches(new_pattern: impl Fn(&str) -> Result<Pattern, BuildError>) {
let pat = new_pattern("hello world").unwrap();
assert!(pat
.read_matches(Str::hello_world().to_reader())
Expand All @@ -450,7 +403,7 @@ mod test {
);
}

fn test_debug_rep_patterns(new_pattern: impl Fn(&str) -> Result<Pattern, Error>) {
fn test_debug_rep_patterns(new_pattern: impl Fn(&str) -> Result<Pattern, BuildError>) {
let pat = new_pattern("a+b").unwrap();
assert!(pat.debug_matches(&Str::new("ab")));
assert!(pat.debug_matches(&Str::new("aaaab")));
Expand Down

0 comments on commit 92217ad

Please sign in to comment.