Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[7.17] Avoid doing I/O when fetching min and max for keyword fields (#92026) #92865

Merged
merged 1 commit into from
Jan 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/92026.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 92026
summary: Avoid doing I/O when fetching min and max for keyword fields
area: Search
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ static class ExitableTerms extends FilterLeafReader.FilterTerms {

private final QueryCancellation queryCancellation;

private ExitableTerms(Terms terms, QueryCancellation queryCancellation) {
ExitableTerms(Terms terms, QueryCancellation queryCancellation) {
super(terms);
this.queryCancellation = queryCancellation;
}
Expand All @@ -141,6 +141,16 @@ public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throw
public TermsEnum iterator() throws IOException {
return new ExitableTermsEnum(in.iterator(), queryCancellation);
}

@Override
public BytesRef getMin() throws IOException {
return in.getMin();
}

@Override
public BytesRef getMax() throws IOException {
return in.getMax();
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,11 @@ public interface FieldUsageNotifier {
void onTermVectorsUsed(String field);
}

public static final class FieldUsageTrackingLeafReader extends SequentialStoredFieldsLeafReader {
static final class FieldUsageTrackingLeafReader extends SequentialStoredFieldsLeafReader {

private final FieldUsageNotifier notifier;

public FieldUsageTrackingLeafReader(LeafReader in, FieldUsageNotifier notifier) {
FieldUsageTrackingLeafReader(LeafReader in, FieldUsageNotifier notifier) {
super(in);
this.notifier = notifier;
}
Expand Down Expand Up @@ -230,7 +230,7 @@ public long ramBytesUsed() {
}
}

private class FieldUsageTrackingTerms extends FilterTerms {
class FieldUsageTrackingTerms extends FilterTerms {

private final String field;

Expand Down Expand Up @@ -268,6 +268,16 @@ public long getSumTotalTermFreq() throws IOException {
public long getSumDocFreq() throws IOException {
return in.getSumDocFreq();
}

@Override
public BytesRef getMin() throws IOException {
return in.getMin();
}

@Override
public BytesRef getMax() throws IOException {
return in.getMax();
}
}

private class FieldUsageTrackingTermsEnum extends FilterTermsEnum {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FilterDirectoryReader;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
Expand All @@ -24,6 +25,7 @@
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.BulkScorer;
Expand Down Expand Up @@ -270,6 +272,43 @@ public void onRemoval(ShardId shardId, Accountable accountable) {
IOUtils.close(reader, w, dir);
}

public void testExitableTermsMinAndMax() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
StringField fooField = new StringField("foo", "bar", Field.Store.NO);
doc.add(fooField);
w.addDocument(doc);
w.flush();

DirectoryReader directoryReader = DirectoryReader.open(w);
for (LeafReaderContext lfc : directoryReader.leaves()) {
Terms terms = lfc.reader().terms("foo");
FilterLeafReader.FilterTerms filterTerms = new ExitableTerms(terms, new ExitableDirectoryReader.QueryCancellation() {
@Override
public boolean isEnabled() {
return false;
}

@Override
public void checkCancelled() {

}
}) {
@Override
public TermsEnum iterator() {
fail("Retrieving min and max should retrieve values from block tree instead of iterating");
return null;
}
};
assertEquals("bar", filterTerms.getMin().utf8ToString());
assertEquals("bar", filterTerms.getMax().utf8ToString());
}
w.close();
directoryReader.close();
dir.close();
}

private SparseFixedBitSet query(LeafReaderContext leaf, String field, String value) throws IOException {
SparseFixedBitSet sparseFixedBitSet = new SparseFixedBitSet(leaf.reader().maxDoc());
TermsEnum tenum = leaf.reader().terms(field).iterator();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.search.internal;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.elasticsearch.test.ESTestCase;

import java.io.IOException;

public class FieldUsageTrackingDirectoryReaderTests extends ESTestCase {

public void testTermsMinAndMax() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
StringField fooField = new StringField("foo", "bar", Field.Store.NO);
doc.add(fooField);
w.addDocument(doc);
w.flush();

DirectoryReader directoryReader = DirectoryReader.open(w);
for (LeafReaderContext lrc : directoryReader.leaves()) {
FieldUsageTrackingDirectoryReader.FieldUsageTrackingLeafReader leafReader =
new FieldUsageTrackingDirectoryReader.FieldUsageTrackingLeafReader(lrc.reader(), new TestFieldUsageNotifier());
FieldUsageTrackingDirectoryReader.FieldUsageTrackingLeafReader.FieldUsageTrackingTerms terms =
leafReader.new FieldUsageTrackingTerms("foo", lrc.reader().terms("foo")) {
@Override
public TermsEnum iterator() {
fail("Retrieving min and max should retrieve values from block tree instead of iterating");
return null;
}
};
assertEquals("bar", terms.getMin().utf8ToString());
assertEquals("bar", terms.getMax().utf8ToString());
}
w.close();
directoryReader.close();
dir.close();
}

private static class TestFieldUsageNotifier implements FieldUsageTrackingDirectoryReader.FieldUsageNotifier {
@Override
public void onTermsUsed(String field) {

}

@Override
public void onPostingsUsed(String field) {

}

@Override
public void onTermFrequenciesUsed(String field) {

}

@Override
public void onPositionsUsed(String field) {

}

@Override
public void onOffsetsUsed(String field) {

}

@Override
public void onDocValuesUsed(String field) {

}

@Override
public void onStoredFieldsUsed(String field) {

}

@Override
public void onNormsUsed(String field) {

}

@Override
public void onPayloadsUsed(String field) {

}

@Override
public void onPointsUsed(String field) {

}

@Override
public void onTermVectorsUsed(String field) {

}
}
}