elastic · jimczi · Mar 6, 2024 · Mar 5, 2024
diff --git a/...-extras/src/main/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQuery.java b/...-extras/src/main/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQuery.java
@@ -9,9 +9,7 @@
 package org.elasticsearch.index.mapper.extras;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInvertState;
-import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermStates;
@@ -300,19 +298,23 @@ public RuntimePhraseScorer scorer(LeafReaderContext context) throws IOException
 
             @Override
             public Matches matches(LeafReaderContext context, int doc) throws IOException {
-                FieldInfo fi = context.reader().getFieldInfos().fieldInfo(field);
-                if (fi == null) {
+                var terms = context.reader().terms(field);
+                if (terms == null) {
                     return null;
                 }
-                // Some highlighters will already have reindexed the source with positions and offsets,
+                // Some highlighters will already have re-indexed the source with positions and offsets,
                 // so rather than doing it again we check to see if this data is available on the
                 // current context and if so delegate directly to the inner query
-                if (fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) > 0) {
+                if (terms.hasOffsets()) {
                     Weight innerWeight = in.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1);
                     return innerWeight.matches(context, doc);
                 }
                 RuntimePhraseScorer scorer = scorer(context);
-                if (scorer == null || scorer.iterator().advance(doc) != doc) {
+                if (scorer == null) {
+                    return null;
+                }
+                final TwoPhaseIterator twoPhase = scorer.twoPhaseIterator();
+                if (twoPhase.approximation().advance(doc) != doc || scorer.twoPhaseIterator().matches() == false) {
                     return null;
                 }
                 return scorer.matches();
@@ -321,13 +323,14 @@ public Matches matches(LeafReaderContext context, int doc) throws IOException {
     }
 
     private class RuntimePhraseScorer extends Scorer {
-
         private final LeafSimScorer scorer;
         private final CheckedIntFunction<List<Object>, IOException> valueFetcher;
         private final String field;
         private final Query query;
         private final TwoPhaseIterator twoPhase;
 
+        private final MemoryIndexEntry cacheEntry = new MemoryIndexEntry();
+
         private int doc = -1;
         private float freq;
 
@@ -357,7 +360,6 @@ public float matchCost() {
                     // Defaults to a high-ish value so that it likely runs last.
                     return 10_000f;
                 }
-
             };
         }
 
@@ -394,35 +396,35 @@ private float freq() throws IOException {
             return freq;
         }
 
-        private float computeFreq() throws IOException {
-            MemoryIndex index = new MemoryIndex();
-            index.setSimilarity(FREQ_SIMILARITY);
-            List<Object> values = valueFetcher.apply(docID());
-            float frequency = 0;
-            for (Object value : values) {
-                if (value == null) {
-                    continue;
+        private MemoryIndex getOrCreateMemoryIndex() throws IOException {
+            if (cacheEntry.docID != docID()) {
+                cacheEntry.docID = docID();
+                cacheEntry.memoryIndex = new MemoryIndex(true, false);
+                cacheEntry.memoryIndex.setSimilarity(FREQ_SIMILARITY);
+                List<Object> values = valueFetcher.apply(docID());
+                for (Object value : values) {
+                    if (value == null) {
+                        continue;
+                    }
+                    cacheEntry.memoryIndex.addField(field, value.toString(), indexAnalyzer);
                 }
-                index.addField(field, value.toString(), indexAnalyzer);
-                frequency += index.search(query);
-                index.reset();
             }
-            return frequency;
+            return cacheEntry.memoryIndex;
+        }
+
+        private float computeFreq() throws IOException {
+            return getOrCreateMemoryIndex().search(query);
         }
 
         private Matches matches() throws IOException {
-            MemoryIndex index = new MemoryIndex(true, false);
-            List<Object> values = valueFetcher.apply(docID());
-            for (Object value : values) {
-                if (value == null) {
-                    continue;
-                }
-                index.addField(field, value.toString(), indexAnalyzer);
-            }
-            IndexSearcher searcher = index.createSearcher();
+            IndexSearcher searcher = getOrCreateMemoryIndex().createSearcher();
             Weight w = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1);
             return w.matches(searcher.getLeafContexts().get(0), 0);
         }
     }
 
+    private static class MemoryIndexEntry {
+        private int docID = -1;
+        private MemoryIndex memoryIndex;
+    }
 }
diff --git a/...as/src/test/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQueryTests.java b/...as/src/test/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQueryTests.java
@@ -49,13 +49,19 @@
 import java.io.IOException;
 import java.util.Collections;
 import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
 
+import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThan;
 
 public class SourceConfirmedTextQueryTests extends ESTestCase {
 
+    private static final AtomicInteger sourceFetchCount = new AtomicInteger();
     private static final IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> SOURCE_FETCHER_PROVIDER =
-        context -> docID -> Collections.<Object>singletonList(context.reader().document(docID).get("body"));
+        context -> docID -> {
+            sourceFetchCount.incrementAndGet();
+            return Collections.<Object>singletonList(context.reader().document(docID).get("body"));
+        };
 
     public void testTerm() throws Exception {
         try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) {
@@ -440,11 +446,11 @@ public void testEmptyIndex() throws Exception {
     }
 
     public void testMatches() throws Exception {
-        checkMatches(new TermQuery(new Term("body", "d")), "a b c d e", new int[] { 3, 3 });
-        checkMatches(new PhraseQuery("body", "b", "c"), "a b c d c b c a", new int[] { 1, 2, 5, 6 });
+        checkMatches(new TermQuery(new Term("body", "d")), "a b c d e", new int[] { 3, 3 }, false);
+        checkMatches(new PhraseQuery("body", "b", "c"), "a b c d c b c a", new int[] { 1, 2, 5, 6 }, true);
     }
 
-    private static void checkMatches(Query query, String inputDoc, int[] expectedMatches) throws IOException {
+    private static void checkMatches(Query query, String inputDoc, int[] expectedMatches, boolean expectedFetch) throws IOException {
         try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) {
             Document doc = new Document();
             doc.add(new TextField("body", "xxxxxnomatchxxxx", Store.YES));
@@ -464,30 +470,48 @@ private static void checkMatches(Query query, String inputDoc, int[] expectedMat
             Query sourceConfirmedQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER);
 
             try (IndexReader ir = DirectoryReader.open(w)) {
-
-                IndexSearcher searcher = new IndexSearcher(ir);
-                TopDocs td = searcher.search(
-                    sourceConfirmedQuery,
-                    3,
-                    new Sort(KeywordField.newSortField("sort", false, SortedSetSelector.Type.MAX))
-                );
-
-                Weight weight = searcher.createWeight(searcher.rewrite(sourceConfirmedQuery), ScoreMode.COMPLETE_NO_SCORES, 1);
-
-                int firstDoc = td.scoreDocs[0].doc;
-                LeafReaderContext firstCtx = searcher.getLeafContexts().get(ReaderUtil.subIndex(firstDoc, searcher.getLeafContexts()));
-                checkMatches(weight, firstCtx, firstDoc - firstCtx.docBase, expectedMatches, 0);
-
-                int secondDoc = td.scoreDocs[1].doc;
-                LeafReaderContext secondCtx = searcher.getLeafContexts().get(ReaderUtil.subIndex(secondDoc, searcher.getLeafContexts()));
-                checkMatches(weight, secondCtx, secondDoc - secondCtx.docBase, expectedMatches, 1);
-
+                {
+                    IndexSearcher searcher = new IndexSearcher(ir);
+                    TopDocs td = searcher.search(
+                        sourceConfirmedQuery,
+                        3,
+                        new Sort(KeywordField.newSortField("sort", false, SortedSetSelector.Type.MAX))
+                    );
+
+                    Weight weight = searcher.createWeight(searcher.rewrite(sourceConfirmedQuery), ScoreMode.COMPLETE_NO_SCORES, 1);
+
+                    int firstDoc = td.scoreDocs[0].doc;
+                    LeafReaderContext firstCtx = searcher.getLeafContexts().get(ReaderUtil.subIndex(firstDoc, searcher.getLeafContexts()));
+                    checkMatches(weight, firstCtx, firstDoc - firstCtx.docBase, expectedMatches, 0, expectedFetch);
+
+                    int secondDoc = td.scoreDocs[1].doc;
+                    LeafReaderContext secondCtx = searcher.getLeafContexts()
+                        .get(ReaderUtil.subIndex(secondDoc, searcher.getLeafContexts()));
+                    checkMatches(weight, secondCtx, secondDoc - secondCtx.docBase, expectedMatches, 1, expectedFetch);
+                }
+
+                {
+                    IndexSearcher searcher = new IndexSearcher(ir);
+                    TopDocs td = searcher.search(KeywordField.newExactQuery("sort", "0"), 1);
+
+                    Weight weight = searcher.createWeight(searcher.rewrite(sourceConfirmedQuery), ScoreMode.COMPLETE_NO_SCORES, 1);
+                    int firstDoc = td.scoreDocs[0].doc;
+                    LeafReaderContext firstCtx = searcher.getLeafContexts().get(ReaderUtil.subIndex(firstDoc, searcher.getLeafContexts()));
+                    checkMatches(weight, firstCtx, firstDoc - firstCtx.docBase, new int[0], 0, false);
+                }
             }
         }
     }
 
-    private static void checkMatches(Weight w, LeafReaderContext ctx, int doc, int[] expectedMatches, int offset) throws IOException {
+    private static void checkMatches(Weight w, LeafReaderContext ctx, int doc, int[] expectedMatches, int offset, boolean expectedFetch)
+        throws IOException {
+        int count = sourceFetchCount.get();
         Matches matches = w.matches(ctx, doc);
+        if (expectedMatches.length == 0) {
+            assertNull(matches);
+            assertThat(sourceFetchCount.get() - count, equalTo(expectedFetch ? 1 : 0));
+            return;
+        }
         assertNotNull(matches);
         MatchesIterator mi = matches.getMatches("body");
         int i = 0;
@@ -498,6 +522,7 @@ private static void checkMatches(Weight w, LeafReaderContext ctx, int doc, int[]
             i += 2;
         }
         assertEquals(expectedMatches.length, i);
+        assertThat(sourceFetchCount.get() - count, equalTo(expectedFetch ? 1 : 0));
     }
 
 }