diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/support/Automatons.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/support/Automatons.java index c0e4af238b1bd..a738a35411655 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/support/Automatons.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/support/Automatons.java @@ -8,6 +8,8 @@ import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.CharacterRunAutomaton; +import org.apache.lucene.util.automaton.MinimizationOperations; +import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.RegExp; import org.elasticsearch.common.cache.Cache; import org.elasticsearch.common.cache.CacheBuilder; @@ -19,11 +21,13 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.concurrent.ExecutionException; +import java.util.function.Function; import java.util.function.Predicate; -import static org.apache.lucene.util.automaton.MinimizationOperations.minimize; import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES; import static org.apache.lucene.util.automaton.Operations.concatenate; import static org.apache.lucene.util.automaton.Operations.intersection; @@ -84,10 +88,82 @@ public static Automaton patterns(Collection patterns) { } private static Automaton buildAutomaton(Collection patterns) { - List automata = new ArrayList<>(patterns.size()); - for (String pattern : patterns) { - final Automaton patternAutomaton = pattern(pattern); - automata.add(patternAutomaton); + if (patterns.size() == 1) { + return minimize(pattern(patterns.iterator().next())); + } + + final Function, Automaton> build = strings -> { + List automata = new ArrayList<>(strings.size()); + for (String pattern : strings) { + final Automaton patternAutomaton = pattern(pattern); + automata.add(patternAutomaton); + } + return unionAndMinimize(automata); + }; + + // We originally just compiled each automaton separately and then unioned them all. + // However, that approach can be quite slow, and very memory intensive. + // It is far more efficient if + // 1. we strip leading/trailing "*" + // 2. union the automaton produced from the remaining text + // 3. append/prepend MatchAnyString automatons as appropriate + // That is: + // - `MATCH_ALL + (bullseye|daredevil) + MATCH_ALL` + // can be determinized more efficiently than + // - `(MATCH_ALL + bullseye + MATCH_ALL)|(MATCH_ALL + daredevil + MATCH_ALL)` + + final Set prefix = new HashSet<>(); + final Set infix = new HashSet<>(); + final Set suffix = new HashSet<>(); + final Set misc = new HashSet<>(); + + for (String p : patterns) { + if (p.length() <= 1) { + // Single character strings (like "x" or "*"), or stray empty strings + misc.add(p); + continue; + } + + final char first = p.charAt(0); + final char last = p.charAt(p.length() - 1); + if (first == '/') { + // regex ("/something/") + misc.add(p); + } else if (first == '*') { + if (last == '*') { + // *something* + infix.add(p.substring(1, p.length() - 1)); + } else { + // *something + suffix.add(p.substring(1)); + } + } else if (last == '*' && p.indexOf('*') != p.length() - 1) { + // some*thing* + // For simple prefix patterns ("something*") it's more efficient to do a single pass + // Lucene can efficiently determinize automata that share a trailing MATCH_ANY accept state, + // If we were to handle them here, we would run 2 minimize operations (one for the union of strings, + // then another after concatenating MATCH_ANY), which is substantially slower. + // However, that's not true if the string has an embedded '*' in it - in that case it is more efficient to determinize + // the set of prefixes (with the embedded MATCH_ANY) and then concatenate another MATCH_ANY and minimize. + prefix.add(p.substring(0, p.length() - 1)); + } else { + // something* / some*thing / some?thing / etc + misc.add(p); + } + } + + final List automata = new ArrayList<>(); + if (prefix.isEmpty() == false) { + automata.add(Operations.concatenate(build.apply(prefix), Automata.makeAnyString())); + } + if (suffix.isEmpty() == false) { + automata.add(Operations.concatenate(Automata.makeAnyString(), build.apply(suffix))); + } + if (infix.isEmpty() == false) { + automata.add(Operations.concatenate(Arrays.asList(Automata.makeAnyString(), build.apply(infix), Automata.makeAnyString()))); + } + if (misc.isEmpty() == false) { + automata.add(build.apply(misc)); } return unionAndMinimize(automata); } @@ -172,18 +248,22 @@ static Automaton wildcard(String text) { } public static Automaton unionAndMinimize(Collection automata) { - Automaton res = union(automata); - return minimize(res, maxDeterminizedStates); + Automaton res = automata.size() == 1 ? automata.iterator().next() : union(automata); + return minimize(res); } public static Automaton minusAndMinimize(Automaton a1, Automaton a2) { Automaton res = minus(a1, a2, maxDeterminizedStates); - return minimize(res, maxDeterminizedStates); + return minimize(res); } public static Automaton intersectAndMinimize(Automaton a1, Automaton a2) { Automaton res = intersection(a1, a2); - return minimize(res, maxDeterminizedStates); + return minimize(res); + } + + private static Automaton minimize(Automaton automaton) { + return MinimizationOperations.minimize(automaton, maxDeterminizedStates); } public static Predicate predicate(String... patterns) { diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/support/StringMatcher.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/support/StringMatcher.java index a1b6877a9f191..051a6e84a751e 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/support/StringMatcher.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/support/StringMatcher.java @@ -162,7 +162,7 @@ private static Predicate buildAutomataPredicate(Collection patte if (description.length() > 80) { description = Strings.cleanTruncate(description, 80) + "..."; } - throw new ElasticsearchSecurityException("The set patterns [{}] is too complex to evaluate", e, description); + throw new ElasticsearchSecurityException("The set of patterns [{}] is too complex to evaluate", e, description); } } }