From 85f16a7610561c22a5e2ccb046eb6fab6a250bfe Mon Sep 17 00:00:00 2001 From: markharwood Date: Tue, 3 Aug 2021 15:56:11 +0100 Subject: [PATCH 1/7] Remove use of BooleanQuery to prevent query caching framework from running contained verification query clauses across whole index at great expense. The new ApproximationAndVerificationQuery provides a replacement wrapper that plays better with caching logic Closes #75848 --- .../ApproximationAndVerificationQuery.java | 250 ++++++++++++++++++ .../wildcard/mapper/WildcardFieldMapper.java | 27 +- .../mapper/WildcardFieldMapperTests.java | 17 +- 3 files changed, 261 insertions(+), 33 deletions(-) create mode 100644 x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/ApproximationAndVerificationQuery.java diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/ApproximationAndVerificationQuery.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/ApproximationAndVerificationQuery.java new file mode 100644 index 0000000000000..a12ffa680c5ea --- /dev/null +++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/ApproximationAndVerificationQuery.java @@ -0,0 +1,250 @@ +package org.elasticsearch.xpack.wildcard.mapper; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.ConjunctionDISI; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Matches; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.ScorerSupplier; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; +import org.apache.lucene.search.Scorable.ChildScorable; + +import java.io.IOException; +import java.util.Collection; +import java.util.List; +import java.util.Set; + +/** + * A wrapper for two query clauses, one a cheap approximation and the other a + * more expensive verification. (The same principle as chainsaw and fine sandpaper, + * one quickly removes the bulk of what's needed but the other is required for + * producing a final finish). + * Wrapping the clauses in a BooleanQuery as two "musts" rather than using this class + * also works but there's an important disadvantage - the caching framework harvests + * the internals of BooleanQueries, trying to find expensive clauses to cache. + * Because verification clauses (the fine sandpaper) only ever get used in conjunction + * with the results from a rough approximation we want to prevent them being used by + * the query caching logic on the whole index (the equivalent of turning a large tree + * into a food bowl only using fine sandpaper). + * Equally, the approximation query provides inconclusive results that need further + * treatment, so this clause would never be used in isolation (you don't make a food bowl + * using just a chainsaw). + * + * This ApproximationAndVerificationQuery class provides + * an opaque wrapper whose results can be cached as a whole but whose component parts + * cannot be visible to the caching framework. + */ +public final class ApproximationAndVerificationQuery extends Query { + + private final Query approxQuery, verifyQuery; + + /** + * Create an {@link ApproximationAndVerificationQuery}. + * @param approxQuery a query that is quick to run across many documents but is only approximate + * @param verifyQuery a query used to verify matches (slowly) on individual matches from approxQuery + */ + public ApproximationAndVerificationQuery(Query approxQuery, Query verifyQuery) { + this.approxQuery = approxQuery; + this.verifyQuery = verifyQuery; + } + + public Query getApproximationQuery() { + return approxQuery; + } + + public Query getVerificationQuery() { + return verifyQuery; + } + + @Override + public String toString(String field) { + return verifyQuery.toString(field); + } + + @Override + public boolean equals(Object obj) { + if (sameClassAs(obj) == false) { + return false; + } + ApproximationAndVerificationQuery that = (ApproximationAndVerificationQuery) obj; + return approxQuery.equals(that.approxQuery) && verifyQuery.equals(that.verifyQuery); + } + + @Override + public int hashCode() { + int h = classHash(); + h = 31 * h + approxQuery.hashCode(); + h = 31 * h + verifyQuery.hashCode(); + return h; + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + Query approxRewrite = approxQuery.rewrite(reader); + Query verifyRewrite = verifyQuery.rewrite(reader); + if (approxQuery != approxRewrite || verifyQuery != verifyRewrite) { + return new ApproximationAndVerificationQuery(approxRewrite, verifyRewrite); + } + return this; + } + + @Override + public void visit(QueryVisitor visitor) { + QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.MUST, this); + approxQuery.visit(v); + verifyQuery.visit(v); + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + final Weight approxWeight = approxQuery.createWeight(searcher, scoreMode, boost); + final Weight verifyWeight = verifyQuery.createWeight(searcher, scoreMode, boost); + + class ApproxAndVerifyWeight extends Weight { + + protected ApproxAndVerifyWeight(Query query) { + super(query); + } + + @Override + public Matches matches(LeafReaderContext context, int doc) throws IOException { + return verifyWeight.matches(context, doc); + } + + @Override + public Explanation explain(LeafReaderContext context, int doc) throws IOException { + return verifyWeight.explain(context, doc); + } + + @Override + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + final ScorerSupplier approxScorerSupplier = approxWeight.scorerSupplier(context); + final ScorerSupplier verifyScorerSupplier = verifyWeight.scorerSupplier(context); + if (approxScorerSupplier == null || verifyScorerSupplier == null) { + return null; + } + return new ScorerSupplier() { + + @Override + public Scorer get(long leadCost) throws IOException { + return new ConjunctionScorer( + ApproxAndVerifyWeight.this, + approxScorerSupplier.get(leadCost), + verifyScorerSupplier.get(leadCost) + ); + + } + + @Override + public long cost() { + return approxScorerSupplier.cost(); + } + + /** Scorer for conjunction of the two queries */ + class ConjunctionScorer extends Scorer { + + final DocIdSetIterator disi; + Scorer verifyScorer; + Scorer approxScorer; + private DocIdSetIterator approxDisi; + private DocIdSetIterator verifyDisi; + + ConjunctionScorer(Weight weight, Scorer approxScorer, Scorer verifyScorer) throws IOException { + super(weight); + this.approxDisi = approxScorer.iterator(); + this.verifyDisi = verifyScorer.iterator(); + this.disi = ConjunctionDISI.intersectIterators(List.of(approxDisi, verifyDisi)); + this.approxScorer = approxScorer; + this.verifyScorer = verifyScorer; + } + + @Override + public TwoPhaseIterator twoPhaseIterator() { + return new TwoPhaseIterator(approxDisi) { + + @Override + public boolean matches() throws IOException { + + if (verifyDisi.docID() < approxDisi.docID()) { + verifyDisi.advance(approxDisi.docID()); + } + return verifyDisi.docID() == approxDisi.docID(); + + } + + @Override + public float matchCost() { + return 1000f; + } + }; + } + + @Override + public DocIdSetIterator iterator() { + return disi; + } + + @Override + public int docID() { + return disi.docID(); + } + + @Override + public float score() throws IOException { + return approxScorer.score() + verifyScorer.score(); + } + + @Override + public float getMaxScore(int upTo) throws IOException { + return Float.POSITIVE_INFINITY; + } + + // For profiling purposes + @Override + public Collection getChildren() { + return List.of(new ChildScorable(approxScorer, "MUST"), new ChildScorable(verifyScorer, "MUST")); + } + + } + + }; + + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context); + if (scorerSupplier == null) { + return null; + } + return scorerSupplier.get(Long.MAX_VALUE); + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + // Neither of the contained queries is intended to be cached on their own but their conjunction is. + return true; + } + + @Override + public void extractTerms(Set terms) { + verifyWeight.extractTerms(terms); + approxWeight.extractTerms(terms); + } + + } + + return new ApproxAndVerifyWeight(this); + } + +} + diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java index 16632e42a84b8..a5ec882c50fef 100644 --- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java +++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java @@ -334,10 +334,7 @@ public Query wildcardQuery(String wildcardPattern, RewriteMethod method, boolean if (clauseCount > 0) { // We can accelerate execution with the ngram query BooleanQuery approxQuery = rewritten.build(); - BooleanQuery.Builder verifyingBuilder = new BooleanQuery.Builder(); - verifyingBuilder.add(new BooleanClause(approxQuery, Occur.MUST)); - verifyingBuilder.add(new BooleanClause(verifyingQuery, Occur.MUST)); - return new ConstantScoreQuery(verifyingBuilder.build()); + return new ConstantScoreQuery(new ApproximationAndVerificationQuery(approxQuery, verifyingQuery)); } else if (numWildcardChars == 0 || numWildcardStrings > 0) { // We have no concrete characters and we're not a pure length query e.g. ??? return new DocValuesFieldExistsQuery(name()); @@ -374,10 +371,7 @@ public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxD } // We can accelerate execution with the ngram query - BooleanQuery.Builder verifyingBuilder = new BooleanQuery.Builder(); - verifyingBuilder.add(new BooleanClause(approxNgramQuery, Occur.MUST)); - verifyingBuilder.add(new BooleanClause(verifyingQuery, Occur.MUST)); - return verifyingBuilder.build(); + return new ApproximationAndVerificationQuery(approxNgramQuery, verifyingQuery); } // Convert a regular expression to a simplified query consisting of BooleanQuery and TermQuery objects @@ -750,11 +744,7 @@ public Query rangeQuery( if (accelerationQuery == null) { return slowQuery; } - - BooleanQuery.Builder qBuilder = new BooleanQuery.Builder(); - qBuilder.add(accelerationQuery, Occur.MUST); - qBuilder.add(slowQuery, Occur.MUST); - return qBuilder.build(); + return new ApproximationAndVerificationQuery(accelerationQuery, slowQuery); } @Override @@ -768,7 +758,6 @@ public Query fuzzyQuery( ) { String searchTerm = BytesRefs.toString(value); try { - BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); //The approximation query can have a prefix and any number of ngrams. BooleanQuery.Builder approxBuilder = new BooleanQuery.Builder(); @@ -824,9 +813,6 @@ public Query fuzzyQuery( } BooleanQuery ngramQ = approxBuilder.build(); - if (ngramQ.clauses().size()>0) { - bqBuilder.add(ngramQ, Occur.MUST); - } // Verification query FuzzyQuery fq = new FuzzyQuery( @@ -836,9 +822,12 @@ public Query fuzzyQuery( maxExpansions, transpositions ); - bqBuilder.add(new AutomatonQueryOnBinaryDv(name(), searchTerm, fq.getAutomata().automaton), Occur.MUST); + Query verificationQuery = new AutomatonQueryOnBinaryDv(name(), searchTerm, fq.getAutomata().automaton); + if (ngramQ.clauses().size() == 0) { + return verificationQuery; + } - return bqBuilder.build(); + return new ApproximationAndVerificationQuery(ngramQ, verificationQuery); } catch (IOException ioe) { throw new ElasticsearchParseException("Error parsing wildcard field fuzzy string [" + searchTerm + "]"); } diff --git a/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java b/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java index d67fda41a651f..3b94b53f1421b 100644 --- a/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java +++ b/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java @@ -575,7 +575,7 @@ public void testWildcardAcceleration() throws IOException, ParseException { String expectedAccelerationQueryString = test[1].replaceAll("_", "" + WildcardFieldMapper.TOKEN_START_OR_END_CHAR); Query wildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery(pattern, null, MOCK_CONTEXT); testExpectedAccelerationQuery(pattern, wildcardFieldQuery, expectedAccelerationQueryString); - assertTrue(unwrapAnyConstantScore(wildcardFieldQuery) instanceof BooleanQuery); + assertTrue(unwrapAnyConstantScore(wildcardFieldQuery) instanceof ApproximationAndVerificationQuery); } // TODO All these expressions have no acceleration at all and could be improved @@ -778,19 +778,8 @@ private Query unwrapAnyConstantScore(Query q) { } void testExpectedAccelerationQuery(String regex, Query combinedQuery, Query expectedAccelerationQuery) throws ParseException { - BooleanQuery cq = (BooleanQuery) unwrapAnyConstantScore(combinedQuery); - assert cq.clauses().size() == 2; - Query approximationQuery = null; - boolean verifyQueryFound = false; - for (BooleanClause booleanClause : cq.clauses()) { - Query q = booleanClause.getQuery(); - if (q instanceof AutomatonQueryOnBinaryDv) { - verifyQueryFound = true; - } else { - approximationQuery = q; - } - } - assert verifyQueryFound; + ApproximationAndVerificationQuery cq = (ApproximationAndVerificationQuery) unwrapAnyConstantScore(combinedQuery); + Query approximationQuery = cq.getApproximationQuery(); String message = "regex: "+ regex +"\nactual query: " + formatQuery(approximationQuery) + "\nexpected query: " + formatQuery(expectedAccelerationQuery) + "\n"; From 1d29eb47a585c6dc1bb7794511f291375312c099 Mon Sep 17 00:00:00 2001 From: markharwood Date: Tue, 3 Aug 2021 17:11:28 +0100 Subject: [PATCH 2/7] Tidied imports --- .../xpack/wildcard/mapper/WildcardFieldMapperTests.java | 1 - 1 file changed, 1 deletion(-) diff --git a/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java b/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java index 3b94b53f1421b..7509245f7ebdd 100644 --- a/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java +++ b/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java @@ -19,7 +19,6 @@ import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; -import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConstantScoreQuery; From c47b265fa059aa4e98cc199f8fdd6c480e05ea3e Mon Sep 17 00:00:00 2001 From: markharwood Date: Wed, 4 Aug 2021 10:46:29 +0100 Subject: [PATCH 3/7] WIP --- .../BinaryDvConfirmedAutomatonQuery.java | 126 ++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java new file mode 100644 index 0000000000000..0f7f01e87362a --- /dev/null +++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java @@ -0,0 +1,126 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.wildcard.mapper; + +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; +import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.ByteRunAutomaton; + +import java.io.IOException; +import java.util.Objects; + +/** + * Query that runs an Automaton across all binary doc values. + * Expensive to run so normally used in conjunction with more selective query clauses. + */ +public class BinaryDvConfirmedAutomatonQuery extends Query { + + private final String field; + private final String matchPattern; + private final ByteRunAutomaton bytesMatcher; + private final Query approxQuery; + + public BinaryDvConfirmedAutomatonQuery(Query approximation, String field, String matchPattern, Automaton automaton) { + this.approxQuery = approximation; + this.field = field; + this.matchPattern = matchPattern; + bytesMatcher = new ByteRunAutomaton(automaton); + } + + private BinaryDvConfirmedAutomatonQuery(Query approximation, String field, String matchPattern, ByteRunAutomaton bytesMatcher) { + this.approxQuery = approximation; + this.field = field; + this.matchPattern = matchPattern; + this.bytesMatcher = bytesMatcher; + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + Query approxRewrite = approxQuery.rewrite(reader); + if (approxQuery != approxRewrite ) { + return new BinaryDvConfirmedAutomatonQuery(approxRewrite, field, matchPattern, bytesMatcher); + } + return this; + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new ConstantScoreWeight(this, boost) { + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + ByteArrayDataInput badi = new ByteArrayDataInput(); + final BinaryDocValues values = DocValues.getBinary(context.reader(), field); + TwoPhaseIterator twoPhase = new TwoPhaseIterator(values) { + @Override + public boolean matches() throws IOException { + BytesRef arrayOfValues = values.binaryValue(); + badi.reset(arrayOfValues.bytes); + badi.setPosition(arrayOfValues.offset); + + int size = badi.readVInt(); + for (int i=0; i< size; i++) { + int valLength = badi.readVInt(); + if (bytesMatcher.run(arrayOfValues.bytes, badi.getPosition(), valLength)) { + return true; + } + badi.skipBytes(valLength); + } + return false; + } + + @Override + public float matchCost() { + // TODO: how can we compute this? + return 1000f; + } + }; + return new ConstantScoreScorer(this, score(), scoreMode, twoPhase); + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return true; + } + }; + } + @Override + public String toString(String field) { + return field+":"+matchPattern; + } + + @Override + public boolean equals(Object obj) { + if (sameClassAs(obj) == false) { + return false; + } + BinaryDvConfirmedAutomatonQuery other = (BinaryDvConfirmedAutomatonQuery) obj; + return Objects.equals(field, other.field) && Objects.equals(matchPattern, other.matchPattern) + && Objects.equals(bytesMatcher, other.bytesMatcher) + && Objects.equals(approxQuery, other.approxQuery); + } + + @Override + public int hashCode() { + return Objects.hash(classHash(), field, matchPattern, bytesMatcher, approxQuery); + } + +} From c00e579842144d5e4f281a6fca2fc52bf70f355d Mon Sep 17 00:00:00 2001 From: markharwood Date: Wed, 4 Aug 2021 15:04:20 +0100 Subject: [PATCH 4/7] Replaced AutomatonQueryOnBinaryDv with new BinaryDVConfirmedAutomatonQuery which contains both the approximation (ngram index) query for acceleration and the automaton DV query. --- .../ApproximationAndVerificationQuery.java | 250 ------------------ .../mapper/AutomatonQueryOnBinaryDv.java | 106 -------- .../BinaryDvConfirmedAutomatonQuery.java | 27 +- .../wildcard/mapper/WildcardFieldMapper.java | 22 +- .../mapper/WildcardFieldMapperTests.java | 16 +- 5 files changed, 43 insertions(+), 378 deletions(-) delete mode 100644 x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/ApproximationAndVerificationQuery.java delete mode 100644 x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/AutomatonQueryOnBinaryDv.java diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/ApproximationAndVerificationQuery.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/ApproximationAndVerificationQuery.java deleted file mode 100644 index a12ffa680c5ea..0000000000000 --- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/ApproximationAndVerificationQuery.java +++ /dev/null @@ -1,250 +0,0 @@ -package org.elasticsearch.xpack.wildcard.mapper; - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.ConjunctionDISI; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.Explanation; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Matches; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.QueryVisitor; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.ScorerSupplier; -import org.apache.lucene.search.TwoPhaseIterator; -import org.apache.lucene.search.Weight; -import org.apache.lucene.search.Scorable.ChildScorable; - -import java.io.IOException; -import java.util.Collection; -import java.util.List; -import java.util.Set; - -/** - * A wrapper for two query clauses, one a cheap approximation and the other a - * more expensive verification. (The same principle as chainsaw and fine sandpaper, - * one quickly removes the bulk of what's needed but the other is required for - * producing a final finish). - * Wrapping the clauses in a BooleanQuery as two "musts" rather than using this class - * also works but there's an important disadvantage - the caching framework harvests - * the internals of BooleanQueries, trying to find expensive clauses to cache. - * Because verification clauses (the fine sandpaper) only ever get used in conjunction - * with the results from a rough approximation we want to prevent them being used by - * the query caching logic on the whole index (the equivalent of turning a large tree - * into a food bowl only using fine sandpaper). - * Equally, the approximation query provides inconclusive results that need further - * treatment, so this clause would never be used in isolation (you don't make a food bowl - * using just a chainsaw). - * - * This ApproximationAndVerificationQuery class provides - * an opaque wrapper whose results can be cached as a whole but whose component parts - * cannot be visible to the caching framework. - */ -public final class ApproximationAndVerificationQuery extends Query { - - private final Query approxQuery, verifyQuery; - - /** - * Create an {@link ApproximationAndVerificationQuery}. - * @param approxQuery a query that is quick to run across many documents but is only approximate - * @param verifyQuery a query used to verify matches (slowly) on individual matches from approxQuery - */ - public ApproximationAndVerificationQuery(Query approxQuery, Query verifyQuery) { - this.approxQuery = approxQuery; - this.verifyQuery = verifyQuery; - } - - public Query getApproximationQuery() { - return approxQuery; - } - - public Query getVerificationQuery() { - return verifyQuery; - } - - @Override - public String toString(String field) { - return verifyQuery.toString(field); - } - - @Override - public boolean equals(Object obj) { - if (sameClassAs(obj) == false) { - return false; - } - ApproximationAndVerificationQuery that = (ApproximationAndVerificationQuery) obj; - return approxQuery.equals(that.approxQuery) && verifyQuery.equals(that.verifyQuery); - } - - @Override - public int hashCode() { - int h = classHash(); - h = 31 * h + approxQuery.hashCode(); - h = 31 * h + verifyQuery.hashCode(); - return h; - } - - @Override - public Query rewrite(IndexReader reader) throws IOException { - Query approxRewrite = approxQuery.rewrite(reader); - Query verifyRewrite = verifyQuery.rewrite(reader); - if (approxQuery != approxRewrite || verifyQuery != verifyRewrite) { - return new ApproximationAndVerificationQuery(approxRewrite, verifyRewrite); - } - return this; - } - - @Override - public void visit(QueryVisitor visitor) { - QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.MUST, this); - approxQuery.visit(v); - verifyQuery.visit(v); - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - final Weight approxWeight = approxQuery.createWeight(searcher, scoreMode, boost); - final Weight verifyWeight = verifyQuery.createWeight(searcher, scoreMode, boost); - - class ApproxAndVerifyWeight extends Weight { - - protected ApproxAndVerifyWeight(Query query) { - super(query); - } - - @Override - public Matches matches(LeafReaderContext context, int doc) throws IOException { - return verifyWeight.matches(context, doc); - } - - @Override - public Explanation explain(LeafReaderContext context, int doc) throws IOException { - return verifyWeight.explain(context, doc); - } - - @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { - final ScorerSupplier approxScorerSupplier = approxWeight.scorerSupplier(context); - final ScorerSupplier verifyScorerSupplier = verifyWeight.scorerSupplier(context); - if (approxScorerSupplier == null || verifyScorerSupplier == null) { - return null; - } - return new ScorerSupplier() { - - @Override - public Scorer get(long leadCost) throws IOException { - return new ConjunctionScorer( - ApproxAndVerifyWeight.this, - approxScorerSupplier.get(leadCost), - verifyScorerSupplier.get(leadCost) - ); - - } - - @Override - public long cost() { - return approxScorerSupplier.cost(); - } - - /** Scorer for conjunction of the two queries */ - class ConjunctionScorer extends Scorer { - - final DocIdSetIterator disi; - Scorer verifyScorer; - Scorer approxScorer; - private DocIdSetIterator approxDisi; - private DocIdSetIterator verifyDisi; - - ConjunctionScorer(Weight weight, Scorer approxScorer, Scorer verifyScorer) throws IOException { - super(weight); - this.approxDisi = approxScorer.iterator(); - this.verifyDisi = verifyScorer.iterator(); - this.disi = ConjunctionDISI.intersectIterators(List.of(approxDisi, verifyDisi)); - this.approxScorer = approxScorer; - this.verifyScorer = verifyScorer; - } - - @Override - public TwoPhaseIterator twoPhaseIterator() { - return new TwoPhaseIterator(approxDisi) { - - @Override - public boolean matches() throws IOException { - - if (verifyDisi.docID() < approxDisi.docID()) { - verifyDisi.advance(approxDisi.docID()); - } - return verifyDisi.docID() == approxDisi.docID(); - - } - - @Override - public float matchCost() { - return 1000f; - } - }; - } - - @Override - public DocIdSetIterator iterator() { - return disi; - } - - @Override - public int docID() { - return disi.docID(); - } - - @Override - public float score() throws IOException { - return approxScorer.score() + verifyScorer.score(); - } - - @Override - public float getMaxScore(int upTo) throws IOException { - return Float.POSITIVE_INFINITY; - } - - // For profiling purposes - @Override - public Collection getChildren() { - return List.of(new ChildScorable(approxScorer, "MUST"), new ChildScorable(verifyScorer, "MUST")); - } - - } - - }; - - } - - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context); - if (scorerSupplier == null) { - return null; - } - return scorerSupplier.get(Long.MAX_VALUE); - } - - @Override - public boolean isCacheable(LeafReaderContext ctx) { - // Neither of the contained queries is intended to be cached on their own but their conjunction is. - return true; - } - - @Override - public void extractTerms(Set terms) { - verifyWeight.extractTerms(terms); - approxWeight.extractTerms(terms); - } - - } - - return new ApproxAndVerifyWeight(this); - } - -} - diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/AutomatonQueryOnBinaryDv.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/AutomatonQueryOnBinaryDv.java deleted file mode 100644 index 6dbc342372905..0000000000000 --- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/AutomatonQueryOnBinaryDv.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.wildcard.mapper; - -import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.ConstantScoreScorer; -import org.apache.lucene.search.ConstantScoreWeight; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.TwoPhaseIterator; -import org.apache.lucene.search.Weight; -import org.apache.lucene.store.ByteArrayDataInput; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.automaton.Automaton; -import org.apache.lucene.util.automaton.ByteRunAutomaton; - -import java.io.IOException; -import java.util.Objects; - -/** - * Query that runs an Automaton across all binary doc values. - * Expensive to run so normally used in conjunction with more selective query clauses. - */ -public class AutomatonQueryOnBinaryDv extends Query { - - private final String field; - private final String matchPattern; - private final ByteRunAutomaton bytesMatcher; - - public AutomatonQueryOnBinaryDv(String field, String matchPattern, Automaton automaton) { - this.field = field; - this.matchPattern = matchPattern; - bytesMatcher = new ByteRunAutomaton(automaton); - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return new ConstantScoreWeight(this, boost) { - - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - ByteArrayDataInput badi = new ByteArrayDataInput(); - final BinaryDocValues values = DocValues.getBinary(context.reader(), field); - TwoPhaseIterator twoPhase = new TwoPhaseIterator(values) { - @Override - public boolean matches() throws IOException { - BytesRef arrayOfValues = values.binaryValue(); - badi.reset(arrayOfValues.bytes); - badi.setPosition(arrayOfValues.offset); - - int size = badi.readVInt(); - for (int i=0; i< size; i++) { - int valLength = badi.readVInt(); - if (bytesMatcher.run(arrayOfValues.bytes, badi.getPosition(), valLength)) { - return true; - } - badi.skipBytes(valLength); - } - return false; - } - - @Override - public float matchCost() { - // TODO: how can we compute this? - return 1000f; - } - }; - return new ConstantScoreScorer(this, score(), scoreMode, twoPhase); - } - - @Override - public boolean isCacheable(LeafReaderContext ctx) { - return true; - } - }; - } - @Override - public String toString(String field) { - return field+":"+matchPattern; - } - - @Override - public boolean equals(Object obj) { - if (sameClassAs(obj) == false) { - return false; - } - AutomatonQueryOnBinaryDv other = (AutomatonQueryOnBinaryDv) obj; - return Objects.equals(field, other.field) && Objects.equals(matchPattern, other.matchPattern) - && Objects.equals(bytesMatcher, other.bytesMatcher); - } - - @Override - public int hashCode() { - return Objects.hash(classHash(), field, matchPattern, bytesMatcher); - } - -} diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java index 0f7f01e87362a..ab79edbe29533 100644 --- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java +++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java @@ -13,7 +13,9 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.ConstantScoreScorer; import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Scorer; @@ -28,8 +30,8 @@ import java.util.Objects; /** - * Query that runs an Automaton across all binary doc values. - * Expensive to run so normally used in conjunction with more selective query clauses. + * Query that runs an Automaton across all binary doc values (but only for docs that also + * match a provided approximation query which is key to getting good performance). */ public class BinaryDvConfirmedAutomatonQuery extends Query { @@ -44,6 +46,9 @@ public BinaryDvConfirmedAutomatonQuery(Query approximation, String field, String this.matchPattern = matchPattern; bytesMatcher = new ByteRunAutomaton(automaton); } + public BinaryDvConfirmedAutomatonQuery(String field, String matchPattern, Automaton automaton) { + this(new MatchAllDocsQuery(), field, matchPattern, automaton); + } private BinaryDvConfirmedAutomatonQuery(Query approximation, String field, String matchPattern, ByteRunAutomaton bytesMatcher) { this.approxQuery = approximation; @@ -63,15 +68,27 @@ public Query rewrite(IndexReader reader) throws IOException { @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + final Weight approxWeight = approxQuery.createWeight(searcher, scoreMode, boost); + return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { ByteArrayDataInput badi = new ByteArrayDataInput(); final BinaryDocValues values = DocValues.getBinary(context.reader(), field); - TwoPhaseIterator twoPhase = new TwoPhaseIterator(values) { + Scorer approxScorer = approxWeight.scorer(context); + if (approxScorer == null) { + // No matches to be had + return null; + } + DocIdSetIterator approxDisi = approxScorer.iterator(); + TwoPhaseIterator twoPhase = new TwoPhaseIterator(approxDisi) { @Override public boolean matches() throws IOException { + if (values.advanceExact(approxDisi.docID()) == false) + { + return false; + } BytesRef arrayOfValues = values.binaryValue(); badi.reset(arrayOfValues.bytes); badi.setPosition(arrayOfValues.offset); @@ -122,5 +139,9 @@ public boolean equals(Object obj) { public int hashCode() { return Objects.hash(classHash(), field, matchPattern, bytesMatcher, approxQuery); } + + Query getApproximationQuery() { + return approxQuery; + } } diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java index a5ec882c50fef..36319a90d5d5c 100644 --- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java +++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java @@ -330,16 +330,17 @@ public Query wildcardQuery(String wildcardPattern, RewriteMethod method, boolean Automaton automaton = caseInsensitive ? AutomatonQueries.toCaseInsensitiveWildcardAutomaton(new Term(name(), wildcardPattern), Integer.MAX_VALUE) : WildcardQuery.toAutomaton(new Term(name(), wildcardPattern)); - AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), wildcardPattern, automaton); if (clauseCount > 0) { // We can accelerate execution with the ngram query BooleanQuery approxQuery = rewritten.build(); - return new ConstantScoreQuery(new ApproximationAndVerificationQuery(approxQuery, verifyingQuery)); + return new ConstantScoreQuery( + new BinaryDvConfirmedAutomatonQuery(approxQuery, name(), wildcardPattern, automaton)); } else if (numWildcardChars == 0 || numWildcardStrings > 0) { // We have no concrete characters and we're not a pure length query e.g. ??? return new DocValuesFieldExistsQuery(name()); } - return verifyingQuery; + return new ConstantScoreQuery( + new BinaryDvConfirmedAutomatonQuery(name(), wildcardPattern, automaton)); } @@ -362,16 +363,15 @@ public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxD } RegExp regex = new RegExp(value, syntaxFlags, matchFlags); Automaton automaton = regex.toAutomaton(maxDeterminizedStates); - AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), value, automaton); // MatchAllButRequireVerificationQuery is a special case meaning the regex is reduced to a single // clause which we can't accelerate at all and needs verification. Example would be ".." if (approxNgramQuery instanceof MatchAllButRequireVerificationQuery) { - return verifyingQuery; + return new BinaryDvConfirmedAutomatonQuery(name(), value, automaton); } // We can accelerate execution with the ngram query - return new ApproximationAndVerificationQuery(approxNgramQuery, verifyingQuery); + return new BinaryDvConfirmedAutomatonQuery(approxNgramQuery, name(), value, automaton); } // Convert a regular expression to a simplified query consisting of BooleanQuery and TermQuery objects @@ -739,12 +739,11 @@ public Query rangeQuery( } } Automaton automaton = TermRangeQuery.toAutomaton(lower, upper, includeLower, includeUpper); - AutomatonQueryOnBinaryDv slowQuery = new AutomatonQueryOnBinaryDv(name(), lower + "-" + upper, automaton); if (accelerationQuery == null) { - return slowQuery; + return new BinaryDvConfirmedAutomatonQuery(name(), lower + "-" + upper, automaton); } - return new ApproximationAndVerificationQuery(accelerationQuery, slowQuery); + return new BinaryDvConfirmedAutomatonQuery(accelerationQuery, name(), lower + "-" + upper, automaton); } @Override @@ -822,12 +821,11 @@ public Query fuzzyQuery( maxExpansions, transpositions ); - Query verificationQuery = new AutomatonQueryOnBinaryDv(name(), searchTerm, fq.getAutomata().automaton); if (ngramQ.clauses().size() == 0) { - return verificationQuery; + return new BinaryDvConfirmedAutomatonQuery(name(), searchTerm, fq.getAutomata().automaton); } - return new ApproximationAndVerificationQuery(ngramQ, verificationQuery); + return new BinaryDvConfirmedAutomatonQuery(ngramQ, name(), searchTerm, fq.getAutomata().automaton); } catch (IOException ioe) { throw new ElasticsearchParseException("Error parsing wildcard field fuzzy string [" + searchTerm + "]"); } diff --git a/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java b/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java index 7509245f7ebdd..1ddb6b1a7fb4e 100644 --- a/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java +++ b/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java @@ -521,8 +521,9 @@ public void testRegexAcceleration() throws IOException, ParseException { String matchAllButVerifyTests[]= { "..", "(a)?","(a|b){0,3}", "((foo)?|(foo|bar)?)", "@&~(abc.+)", "aaa.+&.+bbb"}; for (String regex : matchAllButVerifyTests) { Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp.ALL, 0, 20000, null, MOCK_CONTEXT); + BinaryDvConfirmedAutomatonQuery q = (BinaryDvConfirmedAutomatonQuery)wildcardFieldQuery; assertTrue(regex +" was not a pure verify query " +formatQuery(wildcardFieldQuery), - wildcardFieldQuery instanceof AutomatonQueryOnBinaryDv); + q.getApproximationQuery() instanceof MatchAllDocsQuery); } @@ -574,7 +575,7 @@ public void testWildcardAcceleration() throws IOException, ParseException { String expectedAccelerationQueryString = test[1].replaceAll("_", "" + WildcardFieldMapper.TOKEN_START_OR_END_CHAR); Query wildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery(pattern, null, MOCK_CONTEXT); testExpectedAccelerationQuery(pattern, wildcardFieldQuery, expectedAccelerationQueryString); - assertTrue(unwrapAnyConstantScore(wildcardFieldQuery) instanceof ApproximationAndVerificationQuery); + assertTrue(unwrapAnyConstantScore(wildcardFieldQuery) instanceof BinaryDvConfirmedAutomatonQuery); } // TODO All these expressions have no acceleration at all and could be improved @@ -582,9 +583,10 @@ public void testWildcardAcceleration() throws IOException, ParseException { for (String pattern : slowPatterns) { Query wildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery(pattern, null, MOCK_CONTEXT); wildcardFieldQuery = unwrapAnyConstantScore(wildcardFieldQuery); + BinaryDvConfirmedAutomatonQuery q = (BinaryDvConfirmedAutomatonQuery) wildcardFieldQuery; assertTrue( pattern + " was not as slow as we assumed " + formatQuery(wildcardFieldQuery), - wildcardFieldQuery instanceof AutomatonQueryOnBinaryDv + q.getApproximationQuery() instanceof MatchAllDocsQuery ); } @@ -598,14 +600,14 @@ public void testQueryCachingEquality() throws IOException, ParseException { new Term("field", pattern), Integer.MAX_VALUE ); - AutomatonQueryOnBinaryDv csQ = new AutomatonQueryOnBinaryDv("field", pattern, caseSensitiveAutomaton); - AutomatonQueryOnBinaryDv ciQ = new AutomatonQueryOnBinaryDv("field", pattern, caseInSensitiveAutomaton); + BinaryDvConfirmedAutomatonQuery csQ = new BinaryDvConfirmedAutomatonQuery("field", pattern, caseSensitiveAutomaton); + BinaryDvConfirmedAutomatonQuery ciQ = new BinaryDvConfirmedAutomatonQuery("field", pattern, caseInSensitiveAutomaton); assertNotEquals(csQ, ciQ); assertNotEquals(csQ.hashCode(), ciQ.hashCode()); // Same query should be equal Automaton caseSensitiveAutomaton2 = WildcardQuery.toAutomaton(new Term("field", pattern)); - AutomatonQueryOnBinaryDv csQ2 = new AutomatonQueryOnBinaryDv("field", pattern, caseSensitiveAutomaton2); + BinaryDvConfirmedAutomatonQuery csQ2 = new BinaryDvConfirmedAutomatonQuery("field", pattern, caseSensitiveAutomaton2); assertEquals(csQ, csQ2); assertEquals(csQ.hashCode(), csQ2.hashCode()); } @@ -777,7 +779,7 @@ private Query unwrapAnyConstantScore(Query q) { } void testExpectedAccelerationQuery(String regex, Query combinedQuery, Query expectedAccelerationQuery) throws ParseException { - ApproximationAndVerificationQuery cq = (ApproximationAndVerificationQuery) unwrapAnyConstantScore(combinedQuery); + BinaryDvConfirmedAutomatonQuery cq = (BinaryDvConfirmedAutomatonQuery) unwrapAnyConstantScore(combinedQuery); Query approximationQuery = cq.getApproximationQuery(); String message = "regex: "+ regex +"\nactual query: " + formatQuery(approximationQuery) + From deab87b2050bb05236af4e1ea49a50ae1b21f0c8 Mon Sep 17 00:00:00 2001 From: markharwood Date: Thu, 5 Aug 2021 14:16:16 +0100 Subject: [PATCH 5/7] Addressing review comments --- .../mapper/BinaryDvConfirmedAutomatonQuery.java | 8 +++----- .../xpack/wildcard/mapper/WildcardFieldMapper.java | 14 +++++++------- .../wildcard/mapper/WildcardFieldMapperTests.java | 9 ++++++--- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java index ab79edbe29533..222d3ee7315f3 100644 --- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java +++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java @@ -15,7 +15,6 @@ import org.apache.lucene.search.ConstantScoreWeight; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Scorer; @@ -46,9 +45,6 @@ public BinaryDvConfirmedAutomatonQuery(Query approximation, String field, String this.matchPattern = matchPattern; bytesMatcher = new ByteRunAutomaton(automaton); } - public BinaryDvConfirmedAutomatonQuery(String field, String matchPattern, Automaton automaton) { - this(new MatchAllDocsQuery(), field, matchPattern, automaton); - } private BinaryDvConfirmedAutomatonQuery(Query approximation, String field, String matchPattern, ByteRunAutomaton bytesMatcher) { this.approxQuery = approximation; @@ -86,7 +82,9 @@ public Scorer scorer(LeafReaderContext context) throws IOException { @Override public boolean matches() throws IOException { if (values.advanceExact(approxDisi.docID()) == false) - { + { + // Bug if we have an indexed value but no doc value. + assert false; return false; } BytesRef arrayOfValues = values.binaryValue(); diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java index 36319a90d5d5c..4cf488c790e2b 100644 --- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java +++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java @@ -333,14 +333,12 @@ public Query wildcardQuery(String wildcardPattern, RewriteMethod method, boolean if (clauseCount > 0) { // We can accelerate execution with the ngram query BooleanQuery approxQuery = rewritten.build(); - return new ConstantScoreQuery( - new BinaryDvConfirmedAutomatonQuery(approxQuery, name(), wildcardPattern, automaton)); + return new BinaryDvConfirmedAutomatonQuery(approxQuery, name(), wildcardPattern, automaton); } else if (numWildcardChars == 0 || numWildcardStrings > 0) { // We have no concrete characters and we're not a pure length query e.g. ??? return new DocValuesFieldExistsQuery(name()); } - return new ConstantScoreQuery( - new BinaryDvConfirmedAutomatonQuery(name(), wildcardPattern, automaton)); + return new BinaryDvConfirmedAutomatonQuery(new MatchAllDocsQuery(), name(), wildcardPattern, automaton); } @@ -367,7 +365,7 @@ public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxD // MatchAllButRequireVerificationQuery is a special case meaning the regex is reduced to a single // clause which we can't accelerate at all and needs verification. Example would be ".." if (approxNgramQuery instanceof MatchAllButRequireVerificationQuery) { - return new BinaryDvConfirmedAutomatonQuery(name(), value, automaton); + return new BinaryDvConfirmedAutomatonQuery(new MatchAllDocsQuery(), name(), value, automaton); } // We can accelerate execution with the ngram query @@ -741,7 +739,8 @@ public Query rangeQuery( Automaton automaton = TermRangeQuery.toAutomaton(lower, upper, includeLower, includeUpper); if (accelerationQuery == null) { - return new BinaryDvConfirmedAutomatonQuery(name(), lower + "-" + upper, automaton); + return new BinaryDvConfirmedAutomatonQuery(new MatchAllDocsQuery(), + name(), lower + "-" + upper, automaton); } return new BinaryDvConfirmedAutomatonQuery(accelerationQuery, name(), lower + "-" + upper, automaton); } @@ -822,7 +821,8 @@ public Query fuzzyQuery( transpositions ); if (ngramQ.clauses().size() == 0) { - return new BinaryDvConfirmedAutomatonQuery(name(), searchTerm, fq.getAutomata().automaton); + return new BinaryDvConfirmedAutomatonQuery(new MatchAllDocsQuery(), + name(), searchTerm, fq.getAutomata().automaton); } return new BinaryDvConfirmedAutomatonQuery(ngramQ, name(), searchTerm, fq.getAutomata().automaton); diff --git a/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java b/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java index 1ddb6b1a7fb4e..7407cd210ab30 100644 --- a/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java +++ b/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java @@ -600,14 +600,17 @@ public void testQueryCachingEquality() throws IOException, ParseException { new Term("field", pattern), Integer.MAX_VALUE ); - BinaryDvConfirmedAutomatonQuery csQ = new BinaryDvConfirmedAutomatonQuery("field", pattern, caseSensitiveAutomaton); - BinaryDvConfirmedAutomatonQuery ciQ = new BinaryDvConfirmedAutomatonQuery("field", pattern, caseInSensitiveAutomaton); + BinaryDvConfirmedAutomatonQuery csQ = new BinaryDvConfirmedAutomatonQuery(new MatchAllDocsQuery(), + "field", pattern, caseSensitiveAutomaton); + BinaryDvConfirmedAutomatonQuery ciQ = new BinaryDvConfirmedAutomatonQuery(new MatchAllDocsQuery(), + "field", pattern, caseInSensitiveAutomaton); assertNotEquals(csQ, ciQ); assertNotEquals(csQ.hashCode(), ciQ.hashCode()); // Same query should be equal Automaton caseSensitiveAutomaton2 = WildcardQuery.toAutomaton(new Term("field", pattern)); - BinaryDvConfirmedAutomatonQuery csQ2 = new BinaryDvConfirmedAutomatonQuery("field", pattern, caseSensitiveAutomaton2); + BinaryDvConfirmedAutomatonQuery csQ2 = new BinaryDvConfirmedAutomatonQuery(new MatchAllDocsQuery(), + "field", pattern, caseSensitiveAutomaton2); assertEquals(csQ, csQ2); assertEquals(csQ.hashCode(), csQ2.hashCode()); } From 80262fd35d1acc27f80b254cea6c2d669dd90880 Mon Sep 17 00:00:00 2001 From: markharwood Date: Thu, 5 Aug 2021 15:26:05 +0100 Subject: [PATCH 6/7] Fix assertion --- .../wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java index 222d3ee7315f3..d2b8cabbcd3d8 100644 --- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java +++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java @@ -15,6 +15,7 @@ import org.apache.lucene.search.ConstantScoreWeight; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Scorer; @@ -83,8 +84,8 @@ public Scorer scorer(LeafReaderContext context) throws IOException { public boolean matches() throws IOException { if (values.advanceExact(approxDisi.docID()) == false) { - // Bug if we have an indexed value but no doc value. - assert false; + // Bug if we have an indexed value (i.e an approxQuery) but no doc value. + assert approxQuery instanceof MatchAllDocsQuery == false; return false; } BytesRef arrayOfValues = values.binaryValue(); From 4ce38630aef38ff61c2402bf25ef22b4d45f99c6 Mon Sep 17 00:00:00 2001 From: markharwood Date: Thu, 5 Aug 2021 15:49:40 +0100 Subject: [PATCH 7/7] Assertion logic was wrong way round --- .../xpack/wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java index d2b8cabbcd3d8..e4a634f473488 100644 --- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java +++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedAutomatonQuery.java @@ -85,7 +85,7 @@ public boolean matches() throws IOException { if (values.advanceExact(approxDisi.docID()) == false) { // Bug if we have an indexed value (i.e an approxQuery) but no doc value. - assert approxQuery instanceof MatchAllDocsQuery == false; + assert approxQuery instanceof MatchAllDocsQuery; return false; } BytesRef arrayOfValues = values.binaryValue();