diff --git a/server/build.gradle b/server/build.gradle index 80ef95163e7fe..b4fb3f5a02ac7 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -311,10 +311,6 @@ tasks.named('splitPackagesAudit').configure { 'org.elasticsearch.cli.EnvironmentAwareCommand', 'org.elasticsearch.cli.KeyStoreAwareCommand', 'org.elasticsearch.cli.LoggingAwareCommand', - 'org.elasticsearch.cli.LoggingAwareMultiCommand', - - // these should be temporary, query needs package private access to TermScorer though - 'org.apache.lucene.search.XCombinedFieldQuery', - 'org.apache.lucene.search.XMultiNormsLeafSimScorer' + 'org.elasticsearch.cli.LoggingAwareMultiCommand' } diff --git a/server/src/main/java/org/apache/lucene/search/XCombinedFieldQuery.java b/server/src/main/java/org/apache/lucene/search/XCombinedFieldQuery.java deleted file mode 100644 index 76c1ffbeb1024..0000000000000 --- a/server/src/main/java/org/apache/lucene/search/XCombinedFieldQuery.java +++ /dev/null @@ -1,508 +0,0 @@ -/* @notice - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modifications copyright (C) 2020 Elasticsearch B.V. - */ -package org.apache.lucene.search; - -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FieldInfos; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermState; -import org.apache.lucene.index.TermStates; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.similarities.BM25Similarity; -import org.apache.lucene.search.similarities.DFRSimilarity; -import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.search.similarities.SimilarityBase; -import org.apache.lucene.util.Accountable; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.RamUsageEstimator; -import org.apache.lucene.util.SmallFloat; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Set; -import java.util.TreeMap; - -/** - * - * Copy of {@link CombinedFieldQuery} that contains a fix for LUCENE-9999. - * TODO: remove once LUCENE-9999 is fixed and integrated - * - * A {@link Query} that treats multiple fields as a single stream and scores terms as if you had - * indexed them as a single term in a single field. - * - *

The query works as follows: - * - *

    - *
  1. Given a list of fields and weights, it pretends there is a synthetic combined field where - * all terms have been indexed. It computes new term and collection statistics for this - * combined field. - *
  2. It uses a disjunction iterator and {@link IndexSearcher#getSimilarity} to score documents. - *
- * - *

In order for a similarity to be compatible, {@link Similarity#computeNorm} must be additive: - * the norm of the combined field is the sum of norms for each individual field. The norms must also - * be encoded using {@link SmallFloat#intToByte4}. These requirements hold for all similarities that - * compute norms the same way as {@link SimilarityBase#computeNorm}, which includes {@link - * BM25Similarity} and {@link DFRSimilarity}. Per-field similarities are not supported. - * - *

The query also requires that either all fields or no fields have norms enabled. Having only - * some fields with norms enabled can result in errors. - * - *

The scoring is based on BM25F's simple formula described in: - * http://www.staff.city.ac.uk/~sb317/papers/foundations_bm25_review.pdf. This query implements the - * same approach but allows other similarities besides {@link - * org.apache.lucene.search.similarities.BM25Similarity}. - * - */ -public final class XCombinedFieldQuery extends Query implements Accountable { - private static final long BASE_RAM_BYTES = - RamUsageEstimator.shallowSizeOfInstance(XCombinedFieldQuery.class); - - /** A builder for {@link XCombinedFieldQuery}. */ - public static class Builder { - private final Map fieldAndWeights = new HashMap<>(); - private final Set termsSet = new HashSet<>(); - - /** - * Adds a field to this builder. - * - * @param field The field name. - */ - public Builder addField(String field) { - return addField(field, 1f); - } - - /** - * Adds a field to this builder. - * - * @param field The field name. - * @param weight The weight associated to this field. - */ - public Builder addField(String field, float weight) { - if (weight < 1) { - throw new IllegalArgumentException("weight must be greater or equal to 1"); - } - fieldAndWeights.put(field, new FieldAndWeight(field, weight)); - return this; - } - - /** Adds a term to this builder. */ - public Builder addTerm(BytesRef term) { - if (termsSet.size() > BooleanQuery.getMaxClauseCount()) { - throw new BooleanQuery.TooManyClauses(); - } - termsSet.add(term); - return this; - } - - /** Builds the {@link XCombinedFieldQuery}. */ - public XCombinedFieldQuery build() { - int size = fieldAndWeights.size() * termsSet.size(); - if (size > BooleanQuery.getMaxClauseCount()) { - throw new BooleanQuery.TooManyClauses(); - } - BytesRef[] terms = termsSet.toArray(new BytesRef[0]); - return new XCombinedFieldQuery(new TreeMap<>(fieldAndWeights), terms); - } - } - - static class FieldAndWeight { - final String field; - final float weight; - - FieldAndWeight(String field, float weight) { - this.field = field; - this.weight = weight; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - FieldAndWeight that = (FieldAndWeight) o; - return Float.compare(that.weight, weight) == 0 && Objects.equals(field, that.field); - } - - @Override - public int hashCode() { - return Objects.hash(field, weight); - } - } - - // sorted map for fields. - private final TreeMap fieldAndWeights; - // array of terms, sorted. - private final BytesRef terms[]; - // array of terms per field, sorted - private final Term fieldTerms[]; - - private final long ramBytesUsed; - - private XCombinedFieldQuery(TreeMap fieldAndWeights, BytesRef[] terms) { - this.fieldAndWeights = fieldAndWeights; - this.terms = terms; - int numFieldTerms = fieldAndWeights.size() * terms.length; - if (numFieldTerms > BooleanQuery.getMaxClauseCount()) { - throw new BooleanQuery.TooManyClauses(); - } - this.fieldTerms = new Term[numFieldTerms]; - Arrays.sort(terms); - int pos = 0; - for (String field : fieldAndWeights.keySet()) { - for (BytesRef term : terms) { - fieldTerms[pos++] = new Term(field, term); - } - } - - this.ramBytesUsed = - BASE_RAM_BYTES - + RamUsageEstimator.sizeOfObject(fieldAndWeights) - + RamUsageEstimator.sizeOfObject(fieldTerms) - + RamUsageEstimator.sizeOfObject(terms); - } - - public List getTerms() { - return Collections.unmodifiableList(Arrays.asList(fieldTerms)); - } - - @Override - public String toString(String field) { - StringBuilder builder = new StringBuilder("CombinedFieldQuery(("); - int pos = 0; - for (FieldAndWeight fieldWeight : fieldAndWeights.values()) { - if (pos++ != 0) { - builder.append(" "); - } - builder.append(fieldWeight.field); - if (fieldWeight.weight != 1f) { - builder.append("^"); - builder.append(fieldWeight.weight); - } - } - builder.append(")("); - pos = 0; - for (BytesRef term : terms) { - if (pos++ != 0) { - builder.append(" "); - } - builder.append(term.utf8ToString()); - } - builder.append("))"); - return builder.toString(); - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (sameClassAs(o) == false) return false; - XCombinedFieldQuery that = (XCombinedFieldQuery) o; - return Objects.equals(fieldAndWeights, that.fieldAndWeights) && Arrays.equals(terms, that.terms); - } - - @Override - public int hashCode() { - int result = classHash(); - result = 31 * result + Objects.hash(fieldAndWeights); - result = 31 * result + Arrays.hashCode(terms); - return result; - } - - @Override - public long ramBytesUsed() { - return ramBytesUsed; - } - - @Override - public Query rewrite(IndexReader reader) throws IOException { - // optimize zero and single field cases - if (terms.length == 0) { - return new BooleanQuery.Builder().build(); - } - return this; - } - - @Override - public void visit(QueryVisitor visitor) { - Term[] selectedTerms = - Arrays.stream(fieldTerms).filter(t -> visitor.acceptField(t.field())).toArray(Term[]::new); - if (selectedTerms.length > 0) { - QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.SHOULD, this); - v.consumeTerms(this, selectedTerms); - } - } - - private BooleanQuery rewriteToBoolean() { - // rewrite to a simple disjunction if the score is not needed. - BooleanQuery.Builder bq = new BooleanQuery.Builder(); - for (Term term : fieldTerms) { - bq.add(new TermQuery(term), BooleanClause.Occur.SHOULD); - } - return bq.build(); - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) - throws IOException { - validateConsistentNorms(searcher.getIndexReader()); - if (scoreMode.needsScores()) { - return new CombinedFieldWeight(this, searcher, scoreMode, boost); - } else { - // rewrite to a simple disjunction if the score is not needed. - Query bq = rewriteToBoolean(); - return searcher.rewrite(bq).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost); - } - } - - private void validateConsistentNorms(IndexReader reader) { - boolean allFieldsHaveNorms = true; - boolean noFieldsHaveNorms = true; - - for (LeafReaderContext context : reader.leaves()) { - FieldInfos fieldInfos = context.reader().getFieldInfos(); - for (String field : fieldAndWeights.keySet()) { - FieldInfo fieldInfo = fieldInfos.fieldInfo(field); - if (fieldInfo != null) { - allFieldsHaveNorms &= fieldInfo.hasNorms(); - noFieldsHaveNorms &= fieldInfo.omitsNorms(); - } - } - } - - if (allFieldsHaveNorms == false && noFieldsHaveNorms == false) { - throw new IllegalArgumentException( - getClass().getSimpleName() - + " requires norms to be consistent across fields: some fields cannot " - + " have norms enabled, while others have norms disabled"); - } - } - - class CombinedFieldWeight extends Weight { - private final IndexSearcher searcher; - private final TermStates termStates[]; - private final Similarity.SimScorer simWeight; - - CombinedFieldWeight(Query query, IndexSearcher searcher, ScoreMode scoreMode, float boost) - throws IOException { - super(query); - assert scoreMode.needsScores(); - this.searcher = searcher; - long docFreq = 0; - long totalTermFreq = 0; - termStates = new TermStates[fieldTerms.length]; - for (int i = 0; i < termStates.length; i++) { - FieldAndWeight field = fieldAndWeights.get(fieldTerms[i].field()); - TermStates ts = TermStates.build(searcher.getTopReaderContext(), fieldTerms[i], true); - termStates[i] = ts; - if (ts.docFreq() > 0) { - TermStatistics termStats = - searcher.termStatistics(fieldTerms[i], ts.docFreq(), ts.totalTermFreq()); - docFreq = Math.max(termStats.docFreq(), docFreq); - totalTermFreq += (double) field.weight * termStats.totalTermFreq(); - } - } - if (docFreq > 0) { - CollectionStatistics pseudoCollectionStats = mergeCollectionStatistics(searcher); - TermStatistics pseudoTermStatistics = - new TermStatistics(new BytesRef("pseudo_term"), docFreq, Math.max(1, totalTermFreq)); - this.simWeight = - searcher.getSimilarity().scorer(boost, pseudoCollectionStats, pseudoTermStatistics); - } else { - this.simWeight = null; - } - } - - private CollectionStatistics mergeCollectionStatistics(IndexSearcher searcher) - throws IOException { - long maxDoc = searcher.getIndexReader().maxDoc(); - long docCount = 0; - long sumTotalTermFreq = 0; - long sumDocFreq = 0; - for (FieldAndWeight fieldWeight : fieldAndWeights.values()) { - CollectionStatistics collectionStats = searcher.collectionStatistics(fieldWeight.field); - if (collectionStats != null) { - docCount = Math.max(collectionStats.docCount(), docCount); - sumDocFreq = Math.max(collectionStats.sumDocFreq(), sumDocFreq); - sumTotalTermFreq += (double) fieldWeight.weight * collectionStats.sumTotalTermFreq(); - } - } - - return new CollectionStatistics( - "pseudo_field", maxDoc, docCount, sumTotalTermFreq, sumDocFreq); - } - - @Override - public void extractTerms(Set termSet) { - termSet.addAll(Arrays.asList(fieldTerms)); - } - - @Override - public Matches matches(LeafReaderContext context, int doc) throws IOException { - Weight weight = - searcher.rewrite(rewriteToBoolean()).createWeight(searcher, ScoreMode.COMPLETE, 1f); - return weight.matches(context, doc); - } - - @Override - public Explanation explain(LeafReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context); - if (scorer != null) { - int newDoc = scorer.iterator().advance(doc); - if (newDoc == doc) { - final float freq; - if (scorer instanceof CombinedFieldScorer) { - freq = ((CombinedFieldScorer) scorer).freq(); - } else { - assert scorer instanceof TermScorer; - freq = ((TermScorer) scorer).freq(); - } - final XMultiNormsLeafSimScorer docScorer = - new XMultiNormsLeafSimScorer( - simWeight, context.reader(), fieldAndWeights.values(), true); - Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq); - Explanation scoreExplanation = docScorer.explain(doc, freqExplanation); - return Explanation.match( - scoreExplanation.getValue(), - "weight(" + getQuery() + " in " + doc + "), result of:", - scoreExplanation); - } - } - return Explanation.noMatch("no matching term"); - } - - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - List iterators = new ArrayList<>(); - List fields = new ArrayList<>(); - for (int i = 0; i < fieldTerms.length; i++) { - TermState state = termStates[i].get(context); - if (state != null) { - TermsEnum termsEnum = context.reader().terms(fieldTerms[i].field()).iterator(); - termsEnum.seekExact(fieldTerms[i].bytes(), state); - PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.FREQS); - iterators.add(postingsEnum); - fields.add(fieldAndWeights.get(fieldTerms[i].field())); - } - } - - if (iterators.isEmpty()) { - return null; - } - - // we must optimize this case (term not in segment), disjunctions require >= 2 subs - if (iterators.size() == 1) { - final LeafSimScorer scoringSimScorer = - new LeafSimScorer(simWeight, context.reader(), fields.get(0).field, true); - return new TermScorer(this, iterators.get(0), scoringSimScorer); - } - final XMultiNormsLeafSimScorer scoringSimScorer = - new XMultiNormsLeafSimScorer(simWeight, context.reader(), fields, true); - LeafSimScorer nonScoringSimScorer = - new LeafSimScorer(simWeight, context.reader(), "pseudo_field", false); - // we use termscorers + disjunction as an impl detail - DisiPriorityQueue queue = new DisiPriorityQueue(iterators.size()); - for (int i = 0; i < iterators.size(); i++) { - float weight = fields.get(i).weight; - queue.add( - new WeightedDisiWrapper( - new TermScorer(this, iterators.get(i), nonScoringSimScorer), weight)); - } - // Even though it is called approximation, it is accurate since none of - // the sub iterators are two-phase iterators. - DocIdSetIterator iterator = new DisjunctionDISIApproximation(queue); - return new CombinedFieldScorer(this, queue, iterator, scoringSimScorer); - } - - @Override - public boolean isCacheable(LeafReaderContext ctx) { - return false; - } - } - - private static class WeightedDisiWrapper extends DisiWrapper { - final float weight; - - WeightedDisiWrapper(Scorer scorer, float weight) { - super(scorer); - this.weight = weight; - } - - float freq() throws IOException { - return weight * ((PostingsEnum) iterator).freq(); - } - } - - private static class CombinedFieldScorer extends Scorer { - private final DisiPriorityQueue queue; - private final DocIdSetIterator iterator; - private final XMultiNormsLeafSimScorer simScorer; - - CombinedFieldScorer( - Weight weight, - DisiPriorityQueue queue, - DocIdSetIterator iterator, - XMultiNormsLeafSimScorer simScorer) { - super(weight); - this.queue = queue; - this.iterator = iterator; - this.simScorer = simScorer; - } - - @Override - public int docID() { - return iterator.docID(); - } - - float freq() throws IOException { - DisiWrapper w = queue.topList(); - float freq = ((WeightedDisiWrapper) w).freq(); - for (w = w.next; w != null; w = w.next) { - freq += ((WeightedDisiWrapper) w).freq(); - if (freq < 0) { // overflow - return Integer.MAX_VALUE; - } - } - return freq; - } - - @Override - public float score() throws IOException { - return simScorer.score(iterator.docID(), freq()); - } - - @Override - public DocIdSetIterator iterator() { - return iterator; - } - - @Override - public float getMaxScore(int upTo) throws IOException { - return Float.POSITIVE_INFINITY; - } - } -} diff --git a/server/src/main/java/org/apache/lucene/search/XMultiNormsLeafSimScorer.java b/server/src/main/java/org/apache/lucene/search/XMultiNormsLeafSimScorer.java deleted file mode 100644 index 0a4fdf0ff4a14..0000000000000 --- a/server/src/main/java/org/apache/lucene/search/XMultiNormsLeafSimScorer.java +++ /dev/null @@ -1,171 +0,0 @@ -/* @notice - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modifications copyright (C) 2020 Elasticsearch B.V. - */ -package org.apache.lucene.search; - -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.search.XCombinedFieldQuery.FieldAndWeight; -import org.apache.lucene.search.similarities.Similarity.SimScorer; -import org.apache.lucene.util.SmallFloat; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Objects; - -/** - * Copy of {@link MultiNormsLeafSimScorer} that contains a fix for LUCENE-9999. - * TODO: remove once LUCENE-9999 is fixed and integrated - * - *

For all fields, norms must be encoded using {@link SmallFloat#intToByte4}. This scorer also - * requires that either all fields or no fields have norms enabled. Having only some fields with - * norms enabled can result in errors or undefined behavior. - */ -final class XMultiNormsLeafSimScorer { - /** Cache of decoded norms. */ - private static final float[] LENGTH_TABLE = new float[256]; - - static { - for (int i = 0; i < 256; i++) { - LENGTH_TABLE[i] = SmallFloat.byte4ToInt((byte) i); - } - } - - private final SimScorer scorer; - private final NumericDocValues norms; - - /** Sole constructor: Score documents of {@code reader} with {@code scorer}. */ - XMultiNormsLeafSimScorer( - SimScorer scorer, - LeafReader reader, - Collection normFields, - boolean needsScores) - throws IOException { - this.scorer = Objects.requireNonNull(scorer); - if (needsScores) { - final List normsList = new ArrayList<>(); - final List weightList = new ArrayList<>(); - for (FieldAndWeight field : normFields) { - NumericDocValues norms = reader.getNormValues(field.field); - if (norms != null) { - normsList.add(norms); - weightList.add(field.weight); - } - } - - if (normsList.isEmpty()) { - norms = null; - } else if (normsList.size() == 1) { - norms = normsList.get(0); - } else { - final NumericDocValues[] normsArr = normsList.toArray(new NumericDocValues[0]); - final float[] weightArr = new float[normsList.size()]; - for (int i = 0; i < weightList.size(); i++) { - weightArr[i] = weightList.get(i); - } - norms = new MultiFieldNormValues(normsArr, weightArr); - } - } else { - norms = null; - } - } - - private long getNormValue(int doc) throws IOException { - if (norms != null) { - boolean found = norms.advanceExact(doc); - assert found; - return norms.longValue(); - } else { - return 1L; // default norm - } - } - - /** - * Score the provided document assuming the given term document frequency. This method must be - * called on non-decreasing sequences of doc ids. - * - * @see SimScorer#score(float, long) - */ - public float score(int doc, float freq) throws IOException { - return scorer.score(freq, getNormValue(doc)); - } - - /** - * Explain the score for the provided document assuming the given term document frequency. This - * method must be called on non-decreasing sequences of doc ids. - * - * @see SimScorer#explain(Explanation, long) - */ - public Explanation explain(int doc, Explanation freqExpl) throws IOException { - return scorer.explain(freqExpl, getNormValue(doc)); - } - - private static class MultiFieldNormValues extends NumericDocValues { - private final NumericDocValues[] normsArr; - private final float[] weightArr; - private long current; - private int docID = -1; - - MultiFieldNormValues(NumericDocValues[] normsArr, float[] weightArr) { - this.normsArr = normsArr; - this.weightArr = weightArr; - } - - @Override - public long longValue() { - return current; - } - - @Override - public boolean advanceExact(int target) throws IOException { - float normValue = 0; - boolean found = false; - for (int i = 0; i < normsArr.length; i++) { - if (normsArr[i].advanceExact(target)) { - normValue += - weightArr[i] * LENGTH_TABLE[Byte.toUnsignedInt((byte) normsArr[i].longValue())]; - found = true; - } - } - current = SmallFloat.intToByte4(Math.round(normValue)); - return found; - } - - @Override - public int docID() { - return docID; - } - - @Override - public int nextDoc() { - throw new UnsupportedOperationException(); - } - - @Override - public int advance(int target) { - throw new UnsupportedOperationException(); - } - - @Override - public long cost() { - throw new UnsupportedOperationException(); - } - } -} diff --git a/server/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java b/server/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java index dd12837fce8c0..e80158a97b83a 100644 --- a/server/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java +++ b/server/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java @@ -12,13 +12,13 @@ import org.apache.lucene.index.Term; import org.apache.lucene.queries.BlendedTermQuery; import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.CombinedFieldQuery; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.XCombinedFieldQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery; @@ -75,8 +75,8 @@ protected void flatten(Query sourceQuery, IndexReader reader, Collection for (Term term : synQuery.getTerms()) { flatten(new TermQuery(term), reader, flatQueries, boost); } - } else if (sourceQuery instanceof XCombinedFieldQuery) { - XCombinedFieldQuery combinedFieldQuery = (XCombinedFieldQuery) sourceQuery; + } else if (sourceQuery instanceof CombinedFieldQuery) { + CombinedFieldQuery combinedFieldQuery = (CombinedFieldQuery) sourceQuery; for (Term term : combinedFieldQuery.getTerms()) { flatten(new TermQuery(term), reader, flatQueries, boost); } diff --git a/server/src/main/java/org/elasticsearch/index/query/CombinedFieldsQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/CombinedFieldsQueryBuilder.java index 3c8922321f49e..bfb8955f24bd0 100644 --- a/server/src/main/java/org/elasticsearch/index/query/CombinedFieldsQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/CombinedFieldsQueryBuilder.java @@ -16,20 +16,20 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostAttribute; import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.CombinedFieldQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.XCombinedFieldQuery; import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.QueryBuilder; +import org.elasticsearch.common.xcontent.ParseField; import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.xcontent.ConstructingObjectParser; import org.elasticsearch.common.xcontent.ObjectParser.ValueType; -import org.elasticsearch.common.xcontent.ParseField; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.mapper.MappedFieldType; @@ -395,7 +395,7 @@ public Query createPhraseQuery(String field, String queryText, int phraseSlop) { @Override protected Query newSynonymQuery(TermAndBoost[] terms) { - XCombinedFieldQuery.Builder query = new XCombinedFieldQuery.Builder(); + CombinedFieldQuery.Builder query = new CombinedFieldQuery.Builder(); for (TermAndBoost termAndBoost : terms) { assert termAndBoost.boost == BoostAttribute.DEFAULT_BOOST; BytesRef bytes = termAndBoost.term.bytes(); diff --git a/server/src/test/java/org/apache/lucene/search/XCombinedFieldQueryTests.java b/server/src/test/java/org/apache/lucene/search/XCombinedFieldQueryTests.java deleted file mode 100644 index 7b98109c9984e..0000000000000 --- a/server/src/test/java/org/apache/lucene/search/XCombinedFieldQueryTests.java +++ /dev/null @@ -1,211 +0,0 @@ -/* @notice - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modifications copyright (C) 2020 Elasticsearch B.V. - */ - -package org.apache.lucene.search; - -import com.carrotsearch.randomizedtesting.generators.RandomPicks; - -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.StringField; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.MultiReader; -import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.similarities.BM25Similarity; -import org.apache.lucene.search.similarities.BooleanSimilarity; -import org.apache.lucene.search.similarities.ClassicSimilarity; -import org.apache.lucene.search.similarities.LMDirichletSimilarity; -import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity; -import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.MMapDirectory; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.LuceneTestCase; - -import java.io.IOException; -import java.util.Arrays; - -/** - * Test for @link {@link XCombinedFieldQuery} - * TODO remove once LUCENE 9999 is fixed and integrated and we remove our copy of the query - * - */ -public class XCombinedFieldQueryTests extends LuceneTestCase { - - public void testRewrite() throws IOException { - IndexReader reader = new MultiReader(); - IndexSearcher searcher = new IndexSearcher(reader); - - BooleanQuery query = new BooleanQuery.Builder() - .add(new XCombinedFieldQuery.Builder() - .addField("field1") - .addField("field2") - .addTerm(new BytesRef("value")) - .build(), BooleanClause.Occur.SHOULD) - .add(new XCombinedFieldQuery.Builder() - .addField("field3") - .addField("field4") - .addTerm(new BytesRef("value")) - .build(), BooleanClause.Occur.SHOULD) - .build(); - assertEquals(query, searcher.rewrite(query)); - - query = new BooleanQuery.Builder() - .add(new XCombinedFieldQuery.Builder() - .addField("field1", 2.0f) - .addField("field2") - .addTerm(new BytesRef("value")) - .build(), BooleanClause.Occur.SHOULD) - .add(new XCombinedFieldQuery.Builder() - .addField("field1", 1.3f) - .addField("field2") - .addTerm(new BytesRef("value")) - .build(), BooleanClause.Occur.SHOULD) - .build(); - assertEquals(query, searcher.rewrite(query)); - } - - public void testNormsDisabled() throws IOException { - Directory dir = newDirectory(); - Similarity similarity = randomCompatibleSimilarity(); - - IndexWriterConfig iwc = new IndexWriterConfig(); - iwc.setSimilarity(similarity); - RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); - - Document doc = new Document(); - doc.add(new StringField("a", "value", Store.NO)); - doc.add(new StringField("b", "value", Store.NO)); - doc.add(new TextField("c", "value", Store.NO)); - w.addDocument(doc); - w.commit(); - - doc = new Document(); - doc.add(new StringField("a", "value", Store.NO)); - doc.add(new TextField("c", "value", Store.NO)); - w.addDocument(doc); - - IndexReader reader = w.getReader(); - IndexSearcher searcher = newSearcher(reader); - - Similarity searchSimilarity = randomCompatibleSimilarity(); - searcher.setSimilarity(searchSimilarity); - TopScoreDocCollector collector = TopScoreDocCollector.create(10, null, 10); - - XCombinedFieldQuery query = - new XCombinedFieldQuery.Builder() - .addField("a", 1.0f) - .addField("b", 1.0f) - .addTerm(new BytesRef("value")) - .build(); - searcher.search(query, collector); - TopDocs topDocs = collector.topDocs(); - assertEquals(new TotalHits(2, TotalHits.Relation.EQUAL_TO), topDocs.totalHits); - - TopScoreDocCollector invalidCollector = TopScoreDocCollector.create(10, null, 10); - XCombinedFieldQuery invalidQuery = - new XCombinedFieldQuery.Builder() - .addField("b", 1.0f) - .addField("c", 1.0f) - .addTerm(new BytesRef("value")) - .build(); - IllegalArgumentException e = - expectThrows( - IllegalArgumentException.class, () -> searcher.search(invalidQuery, invalidCollector)); - assertTrue(e.getMessage().contains("requires norms to be consistent across fields")); - - reader.close(); - w.close(); - dir.close(); - } - - public void testCopyFieldWithMissingFields() throws IOException { - Directory dir = new MMapDirectory(createTempDir()); - Similarity similarity = randomCompatibleSimilarity(); - - IndexWriterConfig iwc = new IndexWriterConfig(); - iwc.setSimilarity(similarity); - RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); - - int boost1 = Math.max(1, random().nextInt(5)); - int boost2 = Math.max(1, random().nextInt(5)); - int numMatch = atLeast(10); - for (int i = 0; i < numMatch; i++) { - Document doc = new Document(); - int freqA = random().nextInt(5) + 1; - for (int j = 0; j < freqA; j++) { - doc.add(new TextField("a", "foo", Store.NO)); - } - - // Choose frequencies such that sometimes we don't add field B - int freqB = random().nextInt(3); - for (int j = 0; j < freqB; j++) { - doc.add(new TextField("b", "foo", Store.NO)); - } - - int freqAB = freqA * boost1 + freqB * boost2; - for (int j = 0; j < freqAB; j++) { - doc.add(new TextField("ab", "foo", Store.NO)); - } - - w.addDocument(doc); - } - - IndexReader reader = w.getReader(); - IndexSearcher searcher = newSearcher(reader); - searcher.setSimilarity(similarity); - XCombinedFieldQuery query = new XCombinedFieldQuery.Builder().addField("a", boost1) - .addField("b", boost2) - .addTerm(new BytesRef("foo")) - .build(); - - checkExpectedHits(searcher, numMatch, query, new TermQuery(new Term("ab", "foo"))); - - reader.close(); - w.close(); - dir.close(); - } - - private void checkExpectedHits(IndexSearcher searcher, int numHits, Query firstQuery, Query secondQuery) throws IOException { - TopScoreDocCollector firstCollector = TopScoreDocCollector.create(numHits, null, Integer.MAX_VALUE); - searcher.search(firstQuery, firstCollector); - TopDocs firstTopDocs = firstCollector.topDocs(); - assertEquals(numHits, firstTopDocs.totalHits.value); - TopScoreDocCollector secondCollector = TopScoreDocCollector.create(numHits, null, Integer.MAX_VALUE); - searcher.search(secondQuery, secondCollector); - TopDocs secondTopDocs = secondCollector.topDocs(); - CheckHits.checkEqual(firstQuery, secondTopDocs.scoreDocs, firstTopDocs.scoreDocs); - } - - private static Similarity randomCompatibleSimilarity() { - return RandomPicks.randomFrom( - random(), - Arrays.asList( - new BM25Similarity(), - new BooleanSimilarity(), - new ClassicSimilarity(), - new LMDirichletSimilarity(), - new LMJelinekMercerSimilarity(0.1f) - ) - ); - } -} diff --git a/server/src/test/java/org/elasticsearch/index/query/CombinedFieldsQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/CombinedFieldsQueryBuilderTests.java index 789b214ba71e2..ca0f7d6ae2316 100644 --- a/server/src/test/java/org/elasticsearch/index/query/CombinedFieldsQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/CombinedFieldsQueryBuilderTests.java @@ -9,11 +9,11 @@ package org.elasticsearch.index.query; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CombinedFieldQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.XCombinedFieldQuery; import org.elasticsearch.test.AbstractQueryTestCase; import java.io.IOException; @@ -64,7 +64,7 @@ protected void doAssertLuceneQuery(CombinedFieldsQueryBuilder queryBuilder, Quer instanceOf(TermQuery.class), instanceOf(MatchAllDocsQuery.class), instanceOf(MatchNoDocsQuery.class), - instanceOf(XCombinedFieldQuery.class) + instanceOf(CombinedFieldQuery.class) ))); } diff --git a/server/src/test/java/org/elasticsearch/index/query/CombinedFieldsQueryParsingTests.java b/server/src/test/java/org/elasticsearch/index/query/CombinedFieldsQueryParsingTests.java index 8161b9917368d..e8d508a6e1ed0 100644 --- a/server/src/test/java/org/elasticsearch/index/query/CombinedFieldsQueryParsingTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/CombinedFieldsQueryParsingTests.java @@ -16,10 +16,10 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.CombinedFieldQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.XCombinedFieldQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentFactory; @@ -116,8 +116,8 @@ public void testWildcardFieldPattern() throws Exception { BooleanQuery booleanQuery = (BooleanQuery) query; assertThat(booleanQuery.clauses().size(), equalTo(2)); - assertThat(booleanQuery.clauses().get(0).getQuery(), instanceOf(XCombinedFieldQuery.class)); - assertThat(booleanQuery.clauses().get(1).getQuery(), instanceOf(XCombinedFieldQuery.class)); + assertThat(booleanQuery.clauses().get(0).getQuery(), instanceOf(CombinedFieldQuery.class)); + assertThat(booleanQuery.clauses().get(1).getQuery(), instanceOf(CombinedFieldQuery.class)); } public void testOperator() throws Exception { @@ -150,7 +150,7 @@ public void testQueryBoost() throws IOException { BoostQuery boostQuery = (BoostQuery) query; assertThat(boostQuery.getBoost(), equalTo(2.0f)); - assertThat(boostQuery.getQuery(), instanceOf(XCombinedFieldQuery.class)); + assertThat(boostQuery.getQuery(), instanceOf(CombinedFieldQuery.class)); } public void testInconsistentAnalyzers() { @@ -214,13 +214,13 @@ public void testCombinedFieldsWithSynonyms() throws IOException { .toQuery(context); Query expected = new BooleanQuery.Builder() - .add(new XCombinedFieldQuery.Builder() + .add(new CombinedFieldQuery.Builder() .addField("synonym1") .addField("synonym2") .addTerm(new BytesRef("dog")) .addTerm(new BytesRef("dogs")) .build(), BooleanClause.Occur.MUST) - .add(new XCombinedFieldQuery.Builder() + .add(new CombinedFieldQuery.Builder() .addField("synonym1") .addField("synonym2") .addTerm(new BytesRef("cats")) @@ -247,13 +247,13 @@ public void testSynonymsPhrase() throws IOException { .add(new Term("synonym2", "pig")) .build(), BooleanClause.Occur.SHOULD) .build(), BooleanClause.Occur.SHOULD) - .add(new XCombinedFieldQuery.Builder() + .add(new CombinedFieldQuery.Builder() .addField("synonym1") .addField("synonym2") .addTerm(new BytesRef("cavy")) .build(), BooleanClause.Occur.SHOULD) .build(), BooleanClause.Occur.MUST) - .add(new XCombinedFieldQuery.Builder() + .add(new CombinedFieldQuery.Builder() .addField("synonym1") .addField("synonym2") .addTerm(new BytesRef("cats")) @@ -272,24 +272,24 @@ public void testDisabledSynonymsPhrase() throws IOException { Query expected = new BooleanQuery.Builder() .add(new BooleanQuery.Builder() .add(new BooleanQuery.Builder() - .add(new XCombinedFieldQuery.Builder() + .add(new CombinedFieldQuery.Builder() .addField("synonym1") .addField("synonym2") .addTerm(new BytesRef("guinea")) .build(), BooleanClause.Occur.MUST) - .add(new XCombinedFieldQuery.Builder() + .add(new CombinedFieldQuery.Builder() .addField("synonym1") .addField("synonym2") .addTerm(new BytesRef("pig")) .build(), BooleanClause.Occur.MUST) .build(), BooleanClause.Occur.SHOULD) - .add(new XCombinedFieldQuery.Builder() + .add(new CombinedFieldQuery.Builder() .addField("synonym1") .addField("synonym2") .addTerm(new BytesRef("cavy")) .build(), BooleanClause.Occur.SHOULD) .build(), BooleanClause.Occur.MUST) - .add(new XCombinedFieldQuery.Builder() + .add(new CombinedFieldQuery.Builder() .addField("synonym1") .addField("synonym2") .addTerm(new BytesRef("cats")) @@ -312,8 +312,8 @@ public void testStopwords() throws Exception { .zeroTermsQuery(zeroTermsQuery) .toQuery(context); Query expected = new BooleanQuery.Builder() - .add(new XCombinedFieldQuery.Builder().addField("stopwords1").addTerm(quickTerm).build(), BooleanClause.Occur.SHOULD) - .add(new XCombinedFieldQuery.Builder().addField("stopwords1").addTerm(foxTerm).build(), BooleanClause.Occur.SHOULD) + .add(new CombinedFieldQuery.Builder().addField("stopwords1").addTerm(quickTerm).build(), BooleanClause.Occur.SHOULD) + .add(new CombinedFieldQuery.Builder().addField("stopwords1").addTerm(foxTerm).build(), BooleanClause.Occur.SHOULD) .build(); assertEquals(expected, query); @@ -323,12 +323,12 @@ public void testStopwords() throws Exception { .zeroTermsQuery(zeroTermsQuery) .toQuery(context); expected = new BooleanQuery.Builder() - .add(new XCombinedFieldQuery.Builder() + .add(new CombinedFieldQuery.Builder() .addField("stopwords1") .addField("stopwords2") .addTerm(quickTerm) .build(), BooleanClause.Occur.SHOULD) - .add(new XCombinedFieldQuery.Builder() + .add(new CombinedFieldQuery.Builder() .addField("stopwords1") .addField("stopwords2") .addTerm(foxTerm)