-
Notifications
You must be signed in to change notification settings - Fork 24.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Avoid doing I/O when fetching min and max for keyword fields (#92026)
Whenever sorting on a date, numeric or keyword field (as primary sort), the can_match phase retrieves min and max for the field and sorts the shards (asc or desc depending on the sort order) so that they are going to be queried following that order. This allows incremental results to be exposed in that same order when using async search, as well as optimizations built on top of such behaviour (#51852). For fields with points we call `getMinPackedValue` and `getMaxPackedValue`, while for keyword fields we call `Terms#getMin` and `Terms#getMax`. Elasticsearch uses `FilterTerms` implementations to cancel queries as well as to track field usage. Such filter implementations should delegate their `getMin` and `getMax` calls to the wrapped `Terms` instance, which will leverage info from the block tree that caches min and max, otherwise they are always going to be retrieved from the index, which does I/O and slows the can_match phase down.
- Loading branch information
Showing
5 changed files
with
183 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
pr: 92026 | ||
summary: Avoid doing I/O when fetching min and max for keyword fields | ||
area: Search | ||
type: bug | ||
issues: [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
115 changes: 115 additions & 0 deletions
115
...c/test/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReaderTests.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
|
||
package org.elasticsearch.search.internal; | ||
|
||
import org.apache.lucene.document.Document; | ||
import org.apache.lucene.document.Field; | ||
import org.apache.lucene.document.StringField; | ||
import org.apache.lucene.index.DirectoryReader; | ||
import org.apache.lucene.index.IndexWriter; | ||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.apache.lucene.index.TermsEnum; | ||
import org.apache.lucene.store.Directory; | ||
import org.elasticsearch.test.ESTestCase; | ||
|
||
import java.io.IOException; | ||
|
||
public class FieldUsageTrackingDirectoryReaderTests extends ESTestCase { | ||
|
||
public void testTermsMinAndMax() throws IOException { | ||
Directory dir = newDirectory(); | ||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(null)); | ||
Document doc = new Document(); | ||
StringField fooField = new StringField("foo", "bar", Field.Store.NO); | ||
doc.add(fooField); | ||
w.addDocument(doc); | ||
w.flush(); | ||
|
||
DirectoryReader directoryReader = DirectoryReader.open(w); | ||
for (LeafReaderContext lrc : directoryReader.leaves()) { | ||
FieldUsageTrackingDirectoryReader.FieldUsageTrackingLeafReader leafReader = | ||
new FieldUsageTrackingDirectoryReader.FieldUsageTrackingLeafReader(lrc.reader(), new TestFieldUsageNotifier()); | ||
FieldUsageTrackingDirectoryReader.FieldUsageTrackingLeafReader.FieldUsageTrackingTerms terms = | ||
leafReader.new FieldUsageTrackingTerms("foo", lrc.reader().terms("foo")) { | ||
@Override | ||
public TermsEnum iterator() { | ||
fail("Retrieving min and max should retrieve values from block tree instead of iterating"); | ||
return null; | ||
} | ||
}; | ||
assertEquals("bar", terms.getMin().utf8ToString()); | ||
assertEquals("bar", terms.getMax().utf8ToString()); | ||
} | ||
w.close(); | ||
directoryReader.close(); | ||
dir.close(); | ||
} | ||
|
||
private static class TestFieldUsageNotifier implements FieldUsageTrackingDirectoryReader.FieldUsageNotifier { | ||
@Override | ||
public void onTermsUsed(String field) { | ||
|
||
} | ||
|
||
@Override | ||
public void onPostingsUsed(String field) { | ||
|
||
} | ||
|
||
@Override | ||
public void onTermFrequenciesUsed(String field) { | ||
|
||
} | ||
|
||
@Override | ||
public void onPositionsUsed(String field) { | ||
|
||
} | ||
|
||
@Override | ||
public void onOffsetsUsed(String field) { | ||
|
||
} | ||
|
||
@Override | ||
public void onDocValuesUsed(String field) { | ||
|
||
} | ||
|
||
@Override | ||
public void onStoredFieldsUsed(String field) { | ||
|
||
} | ||
|
||
@Override | ||
public void onNormsUsed(String field) { | ||
|
||
} | ||
|
||
@Override | ||
public void onPayloadsUsed(String field) { | ||
|
||
} | ||
|
||
@Override | ||
public void onPointsUsed(String field) { | ||
|
||
} | ||
|
||
@Override | ||
public void onTermVectorsUsed(String field) { | ||
|
||
} | ||
|
||
@Override | ||
public void onKnnVectorsUsed(String field) { | ||
|
||
} | ||
} | ||
} |