Skip to content

Commit

Permalink
impl: switch to aho-corasick 1.0
Browse files Browse the repository at this point in the history
This is a transitory commit that will need to be updated once
aho-corasick 1.0 is actually released. Its purpose is to make it so the
regex crate, the "old" regex crate and regex-automata all agree on the
same version of aho-corasick to use while in development.
  • Loading branch information
BurntSushi committed Feb 28, 2023
1 parent 6c64620 commit 564d3f6
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 15 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,9 @@ pattern = []

# For very fast prefix literal matching.
[dependencies.aho-corasick]
version = "0.7.18"
version = "0.7.20"
optional = true
git = "https://github.com/BurntSushi/aho-corasick"

# For skipping along search text quickly when a leading byte is known.
[dependencies.memchr]
Expand Down
11 changes: 5 additions & 6 deletions src/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::panic::AssertUnwindSafe;
use std::sync::Arc;

#[cfg(feature = "perf-literal")]
use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
use aho_corasick::{AhoCorasick, MatchKind};
use regex_syntax::hir::literal;
use regex_syntax::hir::{Hir, Look};
use regex_syntax::ParserBuilder;
Expand Down Expand Up @@ -98,7 +98,7 @@ struct ExecReadOnly {
/// if we were to exhaust the ID space, we probably would have long
/// surpassed the compilation size limit.
#[cfg(feature = "perf-literal")]
ac: Option<AhoCorasick<u32>>,
ac: Option<AhoCorasick>,
/// match_type encodes as much upfront knowledge about how we're going to
/// execute a search as possible.
match_type: MatchType,
Expand Down Expand Up @@ -392,7 +392,7 @@ impl ExecBuilder {
}

#[cfg(feature = "perf-literal")]
fn build_aho_corasick(&self, parsed: &Parsed) -> Option<AhoCorasick<u32>> {
fn build_aho_corasick(&self, parsed: &Parsed) -> Option<AhoCorasick> {
if parsed.exprs.len() != 1 {
return None;
}
Expand All @@ -406,10 +406,9 @@ impl ExecBuilder {
return None;
}
Some(
AhoCorasickBuilder::new()
AhoCorasick::builder()
.match_kind(MatchKind::LeftmostFirst)
.auto_configure(&lits)
.build_with_size::<u32, _, _>(&lits)
.build(&lits)
// This should never happen because we'd long exceed the
// compilation limit for regexes first.
.expect("AC automaton too big"),
Expand Down
16 changes: 8 additions & 8 deletions src/literal/imp.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::mem;

use aho_corasick::{self, packed, AhoCorasick, AhoCorasickBuilder};
use aho_corasick::{self, packed, AhoCorasick};
use memchr::{memchr, memchr2, memchr3, memmem};
use regex_syntax::hir::literal::{Literal, Seq};

Expand All @@ -26,7 +26,7 @@ enum Matcher {
/// A single substring, using vector accelerated routines when available.
Memmem(Memmem),
/// An Aho-Corasick automaton.
AC { ac: AhoCorasick<u32>, lits: Vec<Literal> },
AC { ac: AhoCorasick, lits: Vec<Literal> },
/// A packed multiple substring searcher, using SIMD.
///
/// Note that Aho-Corasick will actually use this packed searcher
Expand Down Expand Up @@ -149,7 +149,7 @@ impl LiteralSearcher {
Empty => 0,
Bytes(ref sset) => sset.dense.len(),
Memmem(_) => 1,
AC { ref ac, .. } => ac.pattern_count(),
AC { ref ac, .. } => ac.patterns_len(),
Packed { ref lits, .. } => lits.len(),
}
}
Expand All @@ -161,8 +161,8 @@ impl LiteralSearcher {
Empty => 0,
Bytes(ref sset) => sset.approximate_size(),
Memmem(ref single) => single.approximate_size(),
AC { ref ac, .. } => ac.heap_bytes(),
Packed { ref s, .. } => s.heap_bytes(),
AC { ref ac, .. } => ac.memory_usage(),
Packed { ref s, .. } => s.memory_usage(),
}
}
}
Expand Down Expand Up @@ -212,10 +212,10 @@ impl Matcher {
return Matcher::Packed { s, lits: lits.to_owned() };
}
}
let ac = AhoCorasickBuilder::new()
let ac = AhoCorasick::builder()
.match_kind(aho_corasick::MatchKind::LeftmostFirst)
.dfa(true)
.build_with_size::<u32, _, _>(&pats)
.kind(aho_corasick::AhoCorasickKind::DFA)
.build(&pats)
.unwrap();
Matcher::AC { ac, lits: lits.to_owned() }
}
Expand Down

0 comments on commit 564d3f6

Please sign in to comment.