Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to regex-automata v0.4 #5

Merged
merged 1 commit into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,12 @@ jobs:
- name: Run tests
uses: actions-rs/cargo@v1
with:
command: build
command: test
- name: Run tests (unicode)
uses: actions-rs/cargo@v1
with:
command: test
args: --features unicode

clippy_check:
runs-on: ubuntu-latest
Expand Down
7 changes: 5 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "matchers"
version = "0.1.0"
version = "0.2.0"
authors = ["Eliza Weisman <[email protected]>"]
edition = "2018"
license = "MIT"
Expand All @@ -18,4 +18,7 @@ keywords = ["regex", "match", "pattern", "streaming"]
maintenance = { status = "experimental" }

[dependencies]
regex-automata = "0.1"
regex-automata = { version = "0.4", default-features = false, features = ["syntax", "dfa-build", "dfa-search"] }
hawkw marked this conversation as resolved.
Show resolved Hide resolved

[features]
unicode = ["regex-automata/unicode"]
151 changes: 52 additions & 99 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,38 +24,33 @@
//!
//! [`regex`]: https://crates.io/crates/regex
//! [`regex-automata`]: https://crates.io/crates/regex-automata
//! [syntax]: https://docs.rs/regex-automata/0.1.7/regex_automata/#syntax
//! [syntax]: https://docs.rs/regex-automata/0.4.3/regex_automata/#syntax

use regex_automata::{dense, DenseDFA, SparseDFA, StateID, DFA};
use std::{fmt, io, marker::PhantomData, str::FromStr};
use std::{fmt, io, str::FromStr};

pub use regex_automata::Error;
pub use regex_automata::dfa::dense::BuildError;
use regex_automata::dfa::dense::DFA;
use regex_automata::dfa::Automaton;
use regex_automata::util::primitives::StateID;
use regex_automata::Anchored;

/// A compiled match pattern that can match multipe inputs, or return a
/// [`Matcher`] that matches a single input.
///
/// [`Matcher`]: ../struct.Matcher.html
#[derive(Debug, Clone)]
pub struct Pattern<S = usize, A = DenseDFA<Vec<S>, S>>
where
S: StateID,
A: DFA<ID = S>,
{
pub struct Pattern<A = DFA<Vec<u32>>> {
automaton: A,
anchored: Anchored,
}

/// A reference to a [`Pattern`] that matches a single input.
///
/// [`Pattern`]: ../struct.Pattern.html
#[derive(Debug, Clone)]
pub struct Matcher<'a, S = usize, A = DenseDFA<&'a [S], S>>
where
S: StateID,
A: DFA<ID = S>,
{
pub struct Matcher<A = DFA<Vec<u32>>> {
automaton: A,
state: S,
_lt: PhantomData<&'a ()>,
state: StateID,
}

// === impl Pattern ===
Expand Down Expand Up @@ -86,9 +81,12 @@ impl Pattern {
/// // sequence when it's followed by non-matching characters:
/// assert!(pattern.display_matches(&"hello world! aaaaab"));
/// ```
pub fn new(pattern: &str) -> Result<Self, Error> {
let automaton = DenseDFA::new(pattern)?;
Ok(Pattern { automaton })
pub fn new(pattern: &str) -> Result<Self, BuildError> {
let automaton = DFA::new(pattern)?;
Ok(Pattern {
automaton,
anchored: Anchored::No,
})
}

/// Returns a new `Pattern` anchored at the beginning of the input stream,
Expand Down Expand Up @@ -120,25 +118,36 @@ impl Pattern {
/// .expect("regex is not invalid");
/// assert!(pattern2.display_matches(&"hello world! aaaaab"));
/// ```
pub fn new_anchored(pattern: &str) -> Result<Self, Error> {
let automaton = dense::Builder::new().anchored(true).build(pattern)?;
Ok(Pattern { automaton })
pub fn new_anchored(pattern: &str) -> Result<Self, BuildError> {
let automaton = DFA::new(pattern)?;
Ok(Pattern {
automaton,
anchored: Anchored::Yes,
})
}
}

impl FromStr for Pattern {
type Err = Error;
type Err = BuildError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::new(s)
}
}

impl<S, A> Pattern<S, A>
where
S: StateID,
A: DFA<ID = S>,
Self: for<'a> ToMatcher<'a, S>,
{
impl<A: Automaton> Pattern<A> {
/// Obtains a `matcher` for this pattern.
///
/// This conversion is useful when wanting to incrementally feed input (via
/// `io::Write`/`fmt::Write` to a matcher). Otherwise, the convenience methods on Pattern
/// suffice.
pub fn matcher(&self) -> Matcher<&'_ A> {
let config = regex_automata::util::start::Config::new().anchored(self.anchored);
Matcher {
automaton: &self.automaton,
state: self.automaton.start_state(&config).unwrap(),
}
}

/// Returns `true` if this pattern matches the given string.
#[inline]
pub fn matches(&self, s: &impl AsRef<str>) -> bool {
Expand Down Expand Up @@ -220,35 +229,24 @@ where

// === impl Matcher ===

impl<'a, S, A> Matcher<'a, S, A>
impl<A> Matcher<A>
where
S: StateID,
A: DFA<ID = S>,
A: Automaton,
{
fn new(automaton: A) -> Self {
let state = automaton.start_state();
Self {
automaton,
state,
_lt: PhantomData,
}
}

#[inline]
fn advance(&mut self, input: u8) {
self.state = unsafe {
// It's safe to call `next_state_unchecked` since the matcher may
// only be constructed by a `Pattern`, which, in turn,can only be
// constructed with a valid DFA.
self.automaton.next_state_unchecked(self.state, input)
};
// It's safe to call `next_state_unchecked` since the matcher may
// only be constructed by a `Pattern`, which, in turn, can only be
// constructed with a valid DFA.
self.state = unsafe { self.automaton.next_state_unchecked(self.state, input) };
}

/// Returns `true` if this `Matcher` has matched any input that has been
/// provided.
#[inline]
pub fn is_matched(&self) -> bool {
self.automaton.is_match_state(self.state)
let eoi_state = self.automaton.next_eoi_state(self.state);
self.automaton.is_match_state(eoi_state)
}

/// Returns `true` if this pattern matches the formatted output of the given
Expand Down Expand Up @@ -293,11 +291,7 @@ where
}
}

impl<'a, S, A> fmt::Write for Matcher<'a, S, A>
where
S: StateID,
A: DFA<ID = S>,
{
impl<A: Automaton> fmt::Write for Matcher<A> {
fn write_str(&mut self, s: &str) -> fmt::Result {
for &byte in s.as_bytes() {
self.advance(byte);
Expand All @@ -309,11 +303,7 @@ where
}
}

impl<'a, S, A> io::Write for Matcher<'a, S, A>
where
S: StateID,
A: DFA<ID = S>,
{
impl<A: Automaton> io::Write for Matcher<A> {
fn write(&mut self, bytes: &[u8]) -> Result<usize, io::Error> {
let mut i = 0;
for &byte in bytes {
Expand All @@ -331,43 +321,6 @@ where
}
}

pub trait ToMatcher<'a, S>
where
Self: crate::sealed::Sealed,
S: StateID + 'a,
{
type Automaton: DFA<ID = S>;
fn matcher(&'a self) -> Matcher<'a, S, Self::Automaton>;
}

impl<S> crate::sealed::Sealed for Pattern<S, DenseDFA<Vec<S>, S>> where S: StateID {}

impl<'a, S> ToMatcher<'a, S> for Pattern<S, DenseDFA<Vec<S>, S>>
where
S: StateID + 'a,
{
type Automaton = DenseDFA<&'a [S], S>;
fn matcher(&'a self) -> Matcher<'a, S, Self::Automaton> {
Matcher::new(self.automaton.as_ref())
}
}

impl<'a, S> ToMatcher<'a, S> for Pattern<S, SparseDFA<Vec<u8>, S>>
where
S: StateID + 'a,
{
type Automaton = SparseDFA<&'a [u8], S>;
fn matcher(&'a self) -> Matcher<'a, S, Self::Automaton> {
Matcher::new(self.automaton.as_ref())
}
}

impl<S> crate::sealed::Sealed for Pattern<S, SparseDFA<Vec<u8>, S>> where S: StateID {}

mod sealed {
pub trait Sealed {}
}

#[cfg(test)]
mod test {
use super::*;
Expand Down Expand Up @@ -409,7 +362,7 @@ mod test {
}
}

fn test_debug_matches(new_pattern: impl Fn(&str) -> Result<Pattern, Error>) {
fn test_debug_matches(new_pattern: impl Fn(&str) -> Result<Pattern, BuildError>) {
let pat = new_pattern("hello world").unwrap();
assert!(pat.debug_matches(&Str::hello_world()));

Expand All @@ -420,7 +373,7 @@ mod test {
assert_eq!(pat.debug_matches(&Str::hello_world()), false);
}

fn test_display_matches(new_pattern: impl Fn(&str) -> Result<Pattern, Error>) {
fn test_display_matches(new_pattern: impl Fn(&str) -> Result<Pattern, BuildError>) {
let pat = new_pattern("hello world").unwrap();
assert!(pat.display_matches(&Str::hello_world()));

Expand All @@ -431,7 +384,7 @@ mod test {
assert_eq!(pat.display_matches(&Str::hello_world()), false);
}

fn test_reader_matches(new_pattern: impl Fn(&str) -> Result<Pattern, Error>) {
fn test_reader_matches(new_pattern: impl Fn(&str) -> Result<Pattern, BuildError>) {
let pat = new_pattern("hello world").unwrap();
assert!(pat
.read_matches(Str::hello_world().to_reader())
Expand All @@ -450,7 +403,7 @@ mod test {
);
}

fn test_debug_rep_patterns(new_pattern: impl Fn(&str) -> Result<Pattern, Error>) {
fn test_debug_rep_patterns(new_pattern: impl Fn(&str) -> Result<Pattern, BuildError>) {
let pat = new_pattern("a+b").unwrap();
assert!(pat.debug_matches(&Str::new("ab")));
assert!(pat.debug_matches(&Str::new("aaaab")));
Expand Down