diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQuery.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQuery.java index dc51afe5d420d..3d0f26e8cc130 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQuery.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQuery.java @@ -9,9 +9,7 @@ package org.elasticsearch.index.mapper.extras; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInvertState; -import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermStates; @@ -300,19 +298,23 @@ public RuntimePhraseScorer scorer(LeafReaderContext context) throws IOException @Override public Matches matches(LeafReaderContext context, int doc) throws IOException { - FieldInfo fi = context.reader().getFieldInfos().fieldInfo(field); - if (fi == null) { + var terms = context.reader().terms(field); + if (terms == null) { return null; } - // Some highlighters will already have reindexed the source with positions and offsets, + // Some highlighters will already have re-indexed the source with positions and offsets, // so rather than doing it again we check to see if this data is available on the // current context and if so delegate directly to the inner query - if (fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) > 0) { + if (terms.hasOffsets()) { Weight innerWeight = in.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1); return innerWeight.matches(context, doc); } RuntimePhraseScorer scorer = scorer(context); - if (scorer == null || scorer.iterator().advance(doc) != doc) { + if (scorer == null) { + return null; + } + final TwoPhaseIterator twoPhase = scorer.twoPhaseIterator(); + if (twoPhase.approximation().advance(doc) != doc || scorer.twoPhaseIterator().matches() == false) { return null; } return scorer.matches(); @@ -321,13 +323,14 @@ public Matches matches(LeafReaderContext context, int doc) throws IOException { } private class RuntimePhraseScorer extends Scorer { - private final LeafSimScorer scorer; private final CheckedIntFunction, IOException> valueFetcher; private final String field; private final Query query; private final TwoPhaseIterator twoPhase; + private final MemoryIndexEntry cacheEntry = new MemoryIndexEntry(); + private int doc = -1; private float freq; @@ -357,7 +360,6 @@ public float matchCost() { // Defaults to a high-ish value so that it likely runs last. return 10_000f; } - }; } @@ -394,35 +396,35 @@ private float freq() throws IOException { return freq; } - private float computeFreq() throws IOException { - MemoryIndex index = new MemoryIndex(); - index.setSimilarity(FREQ_SIMILARITY); - List values = valueFetcher.apply(docID()); - float frequency = 0; - for (Object value : values) { - if (value == null) { - continue; + private MemoryIndex getOrCreateMemoryIndex() throws IOException { + if (cacheEntry.docID != docID()) { + cacheEntry.docID = docID(); + cacheEntry.memoryIndex = new MemoryIndex(true, false); + cacheEntry.memoryIndex.setSimilarity(FREQ_SIMILARITY); + List values = valueFetcher.apply(docID()); + for (Object value : values) { + if (value == null) { + continue; + } + cacheEntry.memoryIndex.addField(field, value.toString(), indexAnalyzer); } - index.addField(field, value.toString(), indexAnalyzer); - frequency += index.search(query); - index.reset(); } - return frequency; + return cacheEntry.memoryIndex; + } + + private float computeFreq() throws IOException { + return getOrCreateMemoryIndex().search(query); } private Matches matches() throws IOException { - MemoryIndex index = new MemoryIndex(true, false); - List values = valueFetcher.apply(docID()); - for (Object value : values) { - if (value == null) { - continue; - } - index.addField(field, value.toString(), indexAnalyzer); - } - IndexSearcher searcher = index.createSearcher(); + IndexSearcher searcher = getOrCreateMemoryIndex().createSearcher(); Weight w = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1); return w.matches(searcher.getLeafContexts().get(0), 0); } } + private static class MemoryIndexEntry { + private int docID = -1; + private MemoryIndex memoryIndex; + } } diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQueryTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQueryTests.java index 2b8d5870cb8aa..81e1dd7099860 100644 --- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQueryTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQueryTests.java @@ -49,13 +49,19 @@ import java.io.IOException; import java.util.Collections; import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; public class SourceConfirmedTextQueryTests extends ESTestCase { + private static final AtomicInteger sourceFetchCount = new AtomicInteger(); private static final IOFunction, IOException>> SOURCE_FETCHER_PROVIDER = - context -> docID -> Collections.singletonList(context.reader().document(docID).get("body")); + context -> docID -> { + sourceFetchCount.incrementAndGet(); + return Collections.singletonList(context.reader().document(docID).get("body")); + }; public void testTerm() throws Exception { try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) { @@ -440,11 +446,11 @@ public void testEmptyIndex() throws Exception { } public void testMatches() throws Exception { - checkMatches(new TermQuery(new Term("body", "d")), "a b c d e", new int[] { 3, 3 }); - checkMatches(new PhraseQuery("body", "b", "c"), "a b c d c b c a", new int[] { 1, 2, 5, 6 }); + checkMatches(new TermQuery(new Term("body", "d")), "a b c d e", new int[] { 3, 3 }, false); + checkMatches(new PhraseQuery("body", "b", "c"), "a b c d c b c a", new int[] { 1, 2, 5, 6 }, true); } - private static void checkMatches(Query query, String inputDoc, int[] expectedMatches) throws IOException { + private static void checkMatches(Query query, String inputDoc, int[] expectedMatches, boolean expectedFetch) throws IOException { try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) { Document doc = new Document(); doc.add(new TextField("body", "xxxxxnomatchxxxx", Store.YES)); @@ -464,30 +470,48 @@ private static void checkMatches(Query query, String inputDoc, int[] expectedMat Query sourceConfirmedQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); try (IndexReader ir = DirectoryReader.open(w)) { - - IndexSearcher searcher = new IndexSearcher(ir); - TopDocs td = searcher.search( - sourceConfirmedQuery, - 3, - new Sort(KeywordField.newSortField("sort", false, SortedSetSelector.Type.MAX)) - ); - - Weight weight = searcher.createWeight(searcher.rewrite(sourceConfirmedQuery), ScoreMode.COMPLETE_NO_SCORES, 1); - - int firstDoc = td.scoreDocs[0].doc; - LeafReaderContext firstCtx = searcher.getLeafContexts().get(ReaderUtil.subIndex(firstDoc, searcher.getLeafContexts())); - checkMatches(weight, firstCtx, firstDoc - firstCtx.docBase, expectedMatches, 0); - - int secondDoc = td.scoreDocs[1].doc; - LeafReaderContext secondCtx = searcher.getLeafContexts().get(ReaderUtil.subIndex(secondDoc, searcher.getLeafContexts())); - checkMatches(weight, secondCtx, secondDoc - secondCtx.docBase, expectedMatches, 1); - + { + IndexSearcher searcher = new IndexSearcher(ir); + TopDocs td = searcher.search( + sourceConfirmedQuery, + 3, + new Sort(KeywordField.newSortField("sort", false, SortedSetSelector.Type.MAX)) + ); + + Weight weight = searcher.createWeight(searcher.rewrite(sourceConfirmedQuery), ScoreMode.COMPLETE_NO_SCORES, 1); + + int firstDoc = td.scoreDocs[0].doc; + LeafReaderContext firstCtx = searcher.getLeafContexts().get(ReaderUtil.subIndex(firstDoc, searcher.getLeafContexts())); + checkMatches(weight, firstCtx, firstDoc - firstCtx.docBase, expectedMatches, 0, expectedFetch); + + int secondDoc = td.scoreDocs[1].doc; + LeafReaderContext secondCtx = searcher.getLeafContexts() + .get(ReaderUtil.subIndex(secondDoc, searcher.getLeafContexts())); + checkMatches(weight, secondCtx, secondDoc - secondCtx.docBase, expectedMatches, 1, expectedFetch); + } + + { + IndexSearcher searcher = new IndexSearcher(ir); + TopDocs td = searcher.search(KeywordField.newExactQuery("sort", "0"), 1); + + Weight weight = searcher.createWeight(searcher.rewrite(sourceConfirmedQuery), ScoreMode.COMPLETE_NO_SCORES, 1); + int firstDoc = td.scoreDocs[0].doc; + LeafReaderContext firstCtx = searcher.getLeafContexts().get(ReaderUtil.subIndex(firstDoc, searcher.getLeafContexts())); + checkMatches(weight, firstCtx, firstDoc - firstCtx.docBase, new int[0], 0, false); + } } } } - private static void checkMatches(Weight w, LeafReaderContext ctx, int doc, int[] expectedMatches, int offset) throws IOException { + private static void checkMatches(Weight w, LeafReaderContext ctx, int doc, int[] expectedMatches, int offset, boolean expectedFetch) + throws IOException { + int count = sourceFetchCount.get(); Matches matches = w.matches(ctx, doc); + if (expectedMatches.length == 0) { + assertNull(matches); + assertThat(sourceFetchCount.get() - count, equalTo(expectedFetch ? 1 : 0)); + return; + } assertNotNull(matches); MatchesIterator mi = matches.getMatches("body"); int i = 0; @@ -498,6 +522,7 @@ private static void checkMatches(Weight w, LeafReaderContext ctx, int doc, int[] i += 2; } assertEquals(expectedMatches.length, i); + assertThat(sourceFetchCount.get() - count, equalTo(expectedFetch ? 1 : 0)); } }