diff --git a/docs/changelog/92026.yaml b/docs/changelog/92026.yaml new file mode 100644 index 0000000000000..631daf0235b8b --- /dev/null +++ b/docs/changelog/92026.yaml @@ -0,0 +1,5 @@ +pr: 92026 +summary: Avoid doing I/O when fetching min and max for keyword fields +area: Search +type: bug +issues: [] diff --git a/server/src/main/java/org/elasticsearch/search/internal/ExitableDirectoryReader.java b/server/src/main/java/org/elasticsearch/search/internal/ExitableDirectoryReader.java index 965e0b97f4564..1bf5ffa615a57 100644 --- a/server/src/main/java/org/elasticsearch/search/internal/ExitableDirectoryReader.java +++ b/server/src/main/java/org/elasticsearch/search/internal/ExitableDirectoryReader.java @@ -173,7 +173,7 @@ static class ExitableTerms extends FilterLeafReader.FilterTerms { private final QueryCancellation queryCancellation; - private ExitableTerms(Terms terms, QueryCancellation queryCancellation) { + ExitableTerms(Terms terms, QueryCancellation queryCancellation) { super(terms); this.queryCancellation = queryCancellation; } @@ -187,6 +187,16 @@ public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throw public TermsEnum iterator() throws IOException { return new ExitableTermsEnum(in.iterator(), queryCancellation); } + + @Override + public BytesRef getMin() throws IOException { + return in.getMin(); + } + + @Override + public BytesRef getMax() throws IOException { + return in.getMax(); + } } /** diff --git a/server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java b/server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java index 5372f0b3abcde..af8c53a04a832 100644 --- a/server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java +++ b/server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java @@ -90,11 +90,11 @@ public interface FieldUsageNotifier { void onKnnVectorsUsed(String field); } - public static final class FieldUsageTrackingLeafReader extends SequentialStoredFieldsLeafReader { + static final class FieldUsageTrackingLeafReader extends SequentialStoredFieldsLeafReader { private final FieldUsageNotifier notifier; - public FieldUsageTrackingLeafReader(LeafReader in, FieldUsageNotifier notifier) { + FieldUsageTrackingLeafReader(LeafReader in, FieldUsageNotifier notifier) { super(in); this.notifier = notifier; } @@ -248,7 +248,7 @@ public void close() throws IOException { } } - private class FieldUsageTrackingTerms extends FilterTerms { + class FieldUsageTrackingTerms extends FilterTerms { private final String field; @@ -286,6 +286,16 @@ public long getSumTotalTermFreq() throws IOException { public long getSumDocFreq() throws IOException { return in.getSumDocFreq(); } + + @Override + public BytesRef getMin() throws IOException { + return in.getMin(); + } + + @Override + public BytesRef getMax() throws IOException { + return in.getMax(); + } } private class FieldUsageTrackingTermsEnum extends FilterTermsEnum { diff --git a/server/src/test/java/org/elasticsearch/search/internal/ContextIndexSearcherTests.java b/server/src/test/java/org/elasticsearch/search/internal/ContextIndexSearcherTests.java index 23a951e20ab73..34ea932da0408 100644 --- a/server/src/test/java/org/elasticsearch/search/internal/ContextIndexSearcherTests.java +++ b/server/src/test/java/org/elasticsearch/search/internal/ContextIndexSearcherTests.java @@ -16,6 +16,7 @@ import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.FilterDirectoryReader; +import org.apache.lucene.index.FilterLeafReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -24,6 +25,7 @@ import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.BulkScorer; @@ -271,6 +273,43 @@ public void onRemoval(ShardId shardId, Accountable accountable) { IOUtils.close(reader, w, dir); } + public void testExitableTermsMinAndMax() throws IOException { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(null)); + Document doc = new Document(); + StringField fooField = new StringField("foo", "bar", Field.Store.NO); + doc.add(fooField); + w.addDocument(doc); + w.flush(); + + DirectoryReader directoryReader = DirectoryReader.open(w); + for (LeafReaderContext lfc : directoryReader.leaves()) { + Terms terms = lfc.reader().terms("foo"); + FilterLeafReader.FilterTerms filterTerms = new ExitableTerms(terms, new ExitableDirectoryReader.QueryCancellation() { + @Override + public boolean isEnabled() { + return false; + } + + @Override + public void checkCancelled() { + + } + }) { + @Override + public TermsEnum iterator() { + fail("Retrieving min and max should retrieve values from block tree instead of iterating"); + return null; + } + }; + assertEquals("bar", filterTerms.getMin().utf8ToString()); + assertEquals("bar", filterTerms.getMax().utf8ToString()); + } + w.close(); + directoryReader.close(); + dir.close(); + } + private SparseFixedBitSet query(LeafReaderContext leaf, String field, String value) throws IOException { SparseFixedBitSet sparseFixedBitSet = new SparseFixedBitSet(leaf.reader().maxDoc()); TermsEnum tenum = leaf.reader().terms(field).iterator(); diff --git a/server/src/test/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReaderTests.java b/server/src/test/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReaderTests.java new file mode 100644 index 0000000000000..f17cbf83aa4e3 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReaderTests.java @@ -0,0 +1,115 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.search.internal; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.store.Directory; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; + +public class FieldUsageTrackingDirectoryReaderTests extends ESTestCase { + + public void testTermsMinAndMax() throws IOException { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(null)); + Document doc = new Document(); + StringField fooField = new StringField("foo", "bar", Field.Store.NO); + doc.add(fooField); + w.addDocument(doc); + w.flush(); + + DirectoryReader directoryReader = DirectoryReader.open(w); + for (LeafReaderContext lrc : directoryReader.leaves()) { + FieldUsageTrackingDirectoryReader.FieldUsageTrackingLeafReader leafReader = + new FieldUsageTrackingDirectoryReader.FieldUsageTrackingLeafReader(lrc.reader(), new TestFieldUsageNotifier()); + FieldUsageTrackingDirectoryReader.FieldUsageTrackingLeafReader.FieldUsageTrackingTerms terms = + leafReader.new FieldUsageTrackingTerms("foo", lrc.reader().terms("foo")) { + @Override + public TermsEnum iterator() { + fail("Retrieving min and max should retrieve values from block tree instead of iterating"); + return null; + } + }; + assertEquals("bar", terms.getMin().utf8ToString()); + assertEquals("bar", terms.getMax().utf8ToString()); + } + w.close(); + directoryReader.close(); + dir.close(); + } + + private static class TestFieldUsageNotifier implements FieldUsageTrackingDirectoryReader.FieldUsageNotifier { + @Override + public void onTermsUsed(String field) { + + } + + @Override + public void onPostingsUsed(String field) { + + } + + @Override + public void onTermFrequenciesUsed(String field) { + + } + + @Override + public void onPositionsUsed(String field) { + + } + + @Override + public void onOffsetsUsed(String field) { + + } + + @Override + public void onDocValuesUsed(String field) { + + } + + @Override + public void onStoredFieldsUsed(String field) { + + } + + @Override + public void onNormsUsed(String field) { + + } + + @Override + public void onPayloadsUsed(String field) { + + } + + @Override + public void onPointsUsed(String field) { + + } + + @Override + public void onTermVectorsUsed(String field) { + + } + + @Override + public void onKnnVectorsUsed(String field) { + + } + } +}