From fd300cffcf67785da2a1330dc19597ec5333ec1c Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 20 Nov 2023 09:44:04 -0500 Subject: [PATCH] ESQL: Load more than one field at once (#102192) This modifies ESQL to load a list of fields at one time which is especially effective when loading from stored fields or _source because it allows visiting the stored fields one time. Part of #101322 --- .../operator/ValuesSourceReaderBenchmark.java | 197 +++- docs/changelog/102192.yaml | 5 + .../extras/MatchOnlyTextFieldMapper.java | 4 +- .../mapper/extras/ScaledFloatFieldMapper.java | 6 +- .../index/mapper/BlockDocValuesReader.java | 551 +++++----- .../index/mapper/BlockLoader.java | 304 +++++- ...BlockLoaderStoredFieldsFromLeafLoader.java | 54 + .../index/mapper/BlockSourceReader.java | 366 ++++--- .../index/mapper/BlockStoredFieldsReader.java | 156 +-- .../index/mapper/BooleanFieldMapper.java | 4 +- .../BooleanScriptBlockDocValuesReader.java | 34 +- .../index/mapper/BooleanScriptFieldType.java | 2 +- .../index/mapper/DateFieldMapper.java | 4 +- .../DateScriptBlockDocValuesReader.java | 35 +- .../index/mapper/DateScriptFieldType.java | 2 +- .../DoubleScriptBlockDocValuesReader.java | 35 +- .../index/mapper/DoubleScriptFieldType.java | 2 +- .../index/mapper/IndexFieldMapper.java | 37 +- .../index/mapper/IpFieldMapper.java | 2 +- .../mapper/IpScriptBlockDocValuesReader.java | 35 +- .../index/mapper/IpScriptFieldType.java | 2 +- .../index/mapper/KeywordFieldMapper.java | 6 +- .../KeywordScriptBlockDocValuesReader.java | 35 +- .../index/mapper/KeywordScriptFieldType.java | 2 +- .../LongScriptBlockDocValuesReader.java | 35 +- .../index/mapper/LongScriptFieldType.java | 2 +- .../index/mapper/NumberFieldMapper.java | 30 +- .../index/mapper/ProvidedIdFieldMapper.java | 2 +- .../index/mapper/TextFieldMapper.java | 17 +- .../mapper/TsidExtractingIdFieldMapper.java | 2 +- .../index/mapper/VersionFieldMapper.java | 2 +- .../search/fetch/StoredFieldsSpec.java | 3 + .../mapper/BooleanScriptFieldTypeTests.java | 4 +- .../mapper/DateScriptFieldTypeTests.java | 7 +- .../mapper/DoubleScriptFieldTypeTests.java | 4 +- .../index/mapper/IpScriptFieldTypeTests.java | 4 +- .../mapper/KeywordScriptFieldTypeTests.java | 7 +- .../mapper/LongScriptFieldTypeTests.java | 4 +- .../index/mapper/TextFieldMapperTests.java | 6 + .../AbstractScriptFieldTypeTestCase.java | 15 +- .../index/mapper/MapperTestCase.java | 41 +- .../elasticsearch/index/mapper/TestBlock.java | 202 ++-- .../compute/lucene/BlockReaderFactories.java | 46 +- .../lucene/ValuesSourceReaderOperator.java | 313 ++++-- .../operator/OrdinalsGroupingOperator.java | 37 +- .../elasticsearch/compute/OperatorTests.java | 6 +- .../ValuesSourceReaderOperatorTests.java | 954 ++++++++++++++++-- .../resources/rest-api-spec/test/20_aggs.yml | 112 +- .../rest-api-spec/test/50_index_patterns.yml | 30 +- .../xpack/esql/action/EsqlActionTaskIT.java | 5 +- .../esql/enrich/EnrichLookupService.java | 13 +- .../planner/EsPhysicalOperationProviders.java | 19 +- .../mapper/ConstantKeywordFieldMapper.java | 40 +- .../ConstantKeywordFieldMapperTests.java | 34 +- .../unsignedlong/UnsignedLongFieldMapper.java | 6 +- .../VersionStringFieldMapper.java | 2 +- .../wildcard/mapper/WildcardFieldMapper.java | 4 +- 57 files changed, 2727 insertions(+), 1161 deletions(-) create mode 100644 docs/changelog/102192.yaml create mode 100644 server/src/main/java/org/elasticsearch/index/mapper/BlockLoaderStoredFieldsFromLeafLoader.java diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ValuesSourceReaderBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ValuesSourceReaderBenchmark.java index 9fa876a00c35c..40edc0b8b9b7f 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ValuesSourceReaderBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ValuesSourceReaderBenchmark.java @@ -8,8 +8,11 @@ package org.elasticsearch.benchmark.compute.operator; +import org.apache.lucene.document.FieldType; import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.StoredField; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -19,6 +22,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; +import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.compute.data.BlockFactory; import org.elasticsearch.compute.data.BytesRefBlock; import org.elasticsearch.compute.data.BytesRefVector; @@ -30,14 +34,16 @@ import org.elasticsearch.compute.data.LongBlock; import org.elasticsearch.compute.data.LongVector; import org.elasticsearch.compute.data.Page; -import org.elasticsearch.compute.lucene.BlockReaderFactories; import org.elasticsearch.compute.lucene.LuceneSourceOperator; import org.elasticsearch.compute.lucene.ValuesSourceReaderOperator; import org.elasticsearch.compute.operator.topn.TopNOperator; import org.elasticsearch.core.IOUtils; +import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.mapper.BlockLoader; import org.elasticsearch.index.mapper.KeywordFieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NumberFieldMapper; +import org.elasticsearch.search.lookup.SearchLookup; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; @@ -56,7 +62,9 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.PrimitiveIterator; +import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.stream.IntStream; @@ -93,18 +101,113 @@ public class ValuesSourceReaderBenchmark { } } - private static BlockLoader blockLoader(String name) { + private static List fields(String name) { return switch (name) { - case "long" -> numericBlockLoader(name, NumberFieldMapper.NumberType.LONG); - case "int" -> numericBlockLoader(name, NumberFieldMapper.NumberType.INTEGER); - case "double" -> numericBlockLoader(name, NumberFieldMapper.NumberType.DOUBLE); - case "keyword" -> new KeywordFieldMapper.KeywordFieldType(name).blockLoader(null); - default -> throw new IllegalArgumentException("can't read [" + name + "]"); + case "3_stored_keywords" -> List.of( + new ValuesSourceReaderOperator.FieldInfo("keyword_1", List.of(blockLoader("stored_keyword_1"))), + new ValuesSourceReaderOperator.FieldInfo("keyword_2", List.of(blockLoader("stored_keyword_2"))), + new ValuesSourceReaderOperator.FieldInfo("keyword_3", List.of(blockLoader("stored_keyword_3"))) + ); + default -> List.of(new ValuesSourceReaderOperator.FieldInfo(name, List.of(blockLoader(name)))); }; } - private static BlockLoader numericBlockLoader(String name, NumberFieldMapper.NumberType numberType) { - return new NumberFieldMapper.NumberFieldType(name, numberType).blockLoader(null); + enum Where { + DOC_VALUES, + SOURCE, + STORED; + } + + private static BlockLoader blockLoader(String name) { + Where where = Where.DOC_VALUES; + if (name.startsWith("stored_")) { + name = name.substring("stored_".length()); + where = Where.STORED; + } else if (name.startsWith("source_")) { + name = name.substring("source_".length()); + where = Where.SOURCE; + } + switch (name) { + case "long": + return numericBlockLoader(name, where, NumberFieldMapper.NumberType.LONG); + case "int": + return numericBlockLoader(name, where, NumberFieldMapper.NumberType.INTEGER); + case "double": + return numericBlockLoader(name, where, NumberFieldMapper.NumberType.DOUBLE); + case "keyword": + name = "keyword_1"; + } + if (name.startsWith("keyword")) { + boolean syntheticSource = false; + FieldType ft = new FieldType(KeywordFieldMapper.Defaults.FIELD_TYPE); + switch (where) { + case DOC_VALUES: + break; + case SOURCE: + ft.setDocValuesType(DocValuesType.NONE); + break; + case STORED: + ft.setStored(true); + ft.setDocValuesType(DocValuesType.NONE); + syntheticSource = true; + break; + } + ft.freeze(); + return new KeywordFieldMapper.KeywordFieldType( + name, + ft, + Lucene.KEYWORD_ANALYZER, + Lucene.KEYWORD_ANALYZER, + Lucene.KEYWORD_ANALYZER, + new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(ft.docValuesType() != DocValuesType.NONE), + syntheticSource + ).blockLoader(new MappedFieldType.BlockLoaderContext() { + @Override + public String indexName() { + return "benchmark"; + } + + @Override + public SearchLookup lookup() { + throw new UnsupportedOperationException(); + } + + @Override + public Set sourcePaths(String name) { + return Set.of(name); + } + }); + } + throw new IllegalArgumentException("can't read [" + name + "]"); + } + + private static BlockLoader numericBlockLoader(String name, Where where, NumberFieldMapper.NumberType numberType) { + boolean stored = false; + boolean docValues = true; + switch (where) { + case DOC_VALUES: + break; + case SOURCE: + stored = true; + docValues = false; + break; + case STORED: + throw new UnsupportedOperationException(); + } + return new NumberFieldMapper.NumberFieldType( + name, + numberType, + true, + stored, + docValues, + true, + null, + Map.of(), + null, + false, + null, + null + ).blockLoader(null); } /** @@ -122,7 +225,7 @@ private static BlockLoader numericBlockLoader(String name, NumberFieldMapper.Num @Param({ "in_order", "shuffled", "shuffled_singles" }) public String layout; - @Param({ "long", "int", "double", "keyword" }) + @Param({ "long", "int", "double", "keyword", "stored_keyword", "3_stored_keywords" }) public String name; private Directory directory; @@ -134,9 +237,9 @@ private static BlockLoader numericBlockLoader(String name, NumberFieldMapper.Num public void benchmark() { ValuesSourceReaderOperator op = new ValuesSourceReaderOperator( BlockFactory.getNonBreakingInstance(), - List.of(BlockReaderFactories.loaderToFactory(reader, blockLoader(name))), - 0, - name + fields(name), + List.of(reader), + 0 ); long sum = 0; for (Page page : pages) { @@ -160,7 +263,7 @@ public void benchmark() { sum += (long) values.getDouble(p); } } - case "keyword" -> { + case "keyword", "stored_keyword" -> { BytesRef scratch = new BytesRef(); BytesRefVector values = op.getOutput().getBlock(1).asVector(); for (int p = 0; p < values.getPositionCount(); p++) { @@ -170,21 +273,59 @@ public void benchmark() { sum += Integer.parseInt(r.utf8ToString()); } } + case "3_stored_keywords" -> { + BytesRef scratch = new BytesRef(); + Page out = op.getOutput(); + for (BytesRefVector values : new BytesRefVector[] { + out.getBlock(1).asVector(), + out.getBlock(2).asVector(), + out.getBlock(3).asVector() }) { + + for (int p = 0; p < values.getPositionCount(); p++) { + BytesRef r = values.getBytesRef(p, scratch); + r.offset++; + r.length--; + sum += Integer.parseInt(r.utf8ToString()); + } + } + } } } - long expected; - if (name.equals("keyword")) { - expected = 0; - for (int i = 0; i < INDEX_SIZE; i++) { - expected += i % 1000; - } - } else { - expected = INDEX_SIZE; - expected = expected * (expected - 1) / 2; + long expected = 0; + switch (name) { + case "keyword", "stored_keyword": + for (int i = 0; i < INDEX_SIZE; i++) { + expected += i % 1000; + } + break; + case "3_stored_keywords": + for (int i = 0; i < INDEX_SIZE; i++) { + expected += 3 * (i % 1000); + } + break; + default: + expected = INDEX_SIZE; + expected = expected * (expected - 1) / 2; } if (expected != sum) { throw new AssertionError("[" + layout + "][" + name + "] expected [" + expected + "] but was [" + sum + "]"); } + boolean foundStoredFieldLoader = false; + ValuesSourceReaderOperator.Status status = (ValuesSourceReaderOperator.Status) op.status(); + for (Map.Entry e : status.readersBuilt().entrySet()) { + if (e.getKey().indexOf("stored_fields") >= 0) { + foundStoredFieldLoader = true; + } + } + if (name.indexOf("stored") >= 0) { + if (foundStoredFieldLoader == false) { + throw new AssertionError("expected to use a stored field loader but only had: " + status.readersBuilt()); + } + } else { + if (foundStoredFieldLoader) { + throw new AssertionError("expected not to use a stored field loader but only had: " + status.readersBuilt()); + } + } } @Setup @@ -195,15 +336,23 @@ public void setup() throws IOException { private void setupIndex() throws IOException { directory = new ByteBuffersDirectory(); + FieldType keywordFieldType = new FieldType(KeywordFieldMapper.Defaults.FIELD_TYPE); + keywordFieldType.setStored(true); + keywordFieldType.freeze(); try (IndexWriter iw = new IndexWriter(directory, new IndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) { for (int i = 0; i < INDEX_SIZE; i++) { String c = Character.toString('a' - ((i % 1000) % 26) + 26); iw.addDocument( List.of( new NumericDocValuesField("long", i), + new StoredField("long", i), new NumericDocValuesField("int", i), + new StoredField("int", i), new NumericDocValuesField("double", NumericUtils.doubleToSortableLong(i)), - new KeywordFieldMapper.KeywordField("keyword", new BytesRef(c + i % 1000), KeywordFieldMapper.Defaults.FIELD_TYPE) + new StoredField("double", (double) i), + new KeywordFieldMapper.KeywordField("keyword_1", new BytesRef(c + i % 1000), keywordFieldType), + new KeywordFieldMapper.KeywordField("keyword_2", new BytesRef(c + i % 1000), keywordFieldType), + new KeywordFieldMapper.KeywordField("keyword_3", new BytesRef(c + i % 1000), keywordFieldType) ) ); if (i % COMMIT_INTERVAL == 0) { diff --git a/docs/changelog/102192.yaml b/docs/changelog/102192.yaml new file mode 100644 index 0000000000000..531aa943c9e36 --- /dev/null +++ b/docs/changelog/102192.yaml @@ -0,0 +1,5 @@ +pr: 102192 +summary: "ESQL: Load more than one field at once" +area: ES|QL +type: enhancement +issues: [] diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index ee04346591009..161cb1674a7b9 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -324,9 +324,9 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { if (textFieldType.isSyntheticSource()) { - return BlockStoredFieldsReader.bytesRefsFromStrings(storedFieldNameForSyntheticSource()); + return new BlockStoredFieldsReader.BytesFromStringsBlockLoader(storedFieldNameForSyntheticSource()); } - return BlockSourceReader.bytesRefs(SourceValueFetcher.toString(blContext.sourcePaths(name()))); + return new BlockSourceReader.BytesRefsBlockLoader(SourceValueFetcher.toString(blContext.sourcePaths(name()))); } @Override diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java index abed23621d5e9..b35fb09c2d053 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java @@ -310,13 +310,13 @@ public Query rangeQuery( public BlockLoader blockLoader(BlockLoaderContext blContext) { if (indexMode == IndexMode.TIME_SERIES && metricType == TimeSeriesParams.MetricType.COUNTER) { // Counters are not supported by ESQL so we load them in null - return BlockDocValuesReader.nulls(); + return BlockLoader.CONSTANT_NULLS; } if (hasDocValues()) { double scalingFactorInverse = 1d / scalingFactor; - return BlockDocValuesReader.doubles(name(), l -> l * scalingFactorInverse); + return new BlockDocValuesReader.DoublesBlockLoader(name(), l -> l * scalingFactorInverse); } - return BlockSourceReader.doubles(sourceValueFetcher(blContext.sourcePaths(name()))); + return new BlockSourceReader.DoublesBlockLoader(sourceValueFetcher(blContext.sourcePaths(name()))); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java index 90a295e5a25f2..6e572eceeafc4 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java @@ -8,6 +8,7 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; @@ -15,171 +16,94 @@ import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.UnicodeUtil; -import org.elasticsearch.core.CheckedFunction; -import org.elasticsearch.index.fielddata.SortedBinaryDocValues; +import org.elasticsearch.common.io.stream.ByteArrayStreamInput; +import org.elasticsearch.index.mapper.BlockLoader.BlockFactory; import org.elasticsearch.index.mapper.BlockLoader.BooleanBuilder; import org.elasticsearch.index.mapper.BlockLoader.Builder; -import org.elasticsearch.index.mapper.BlockLoader.BuilderFactory; import org.elasticsearch.index.mapper.BlockLoader.BytesRefBuilder; import org.elasticsearch.index.mapper.BlockLoader.Docs; import org.elasticsearch.index.mapper.BlockLoader.DoubleBuilder; import org.elasticsearch.index.mapper.BlockLoader.IntBuilder; import org.elasticsearch.index.mapper.BlockLoader.LongBuilder; +import org.elasticsearch.search.fetch.StoredFieldsSpec; import java.io.IOException; /** * A reader that supports reading doc-values from a Lucene segment in Block fashion. */ -public abstract class BlockDocValuesReader { - public interface Factory { - BlockDocValuesReader build(int segment) throws IOException; - - boolean supportsOrdinals(); - - SortedSetDocValues ordinals(int segment) throws IOException; - } - - protected final Thread creationThread; +public abstract class BlockDocValuesReader implements BlockLoader.AllReader { + private final Thread creationThread; public BlockDocValuesReader() { this.creationThread = Thread.currentThread(); } - /** - * Returns the current doc that this reader is on. - */ - public abstract int docID(); - - /** - * The {@link BlockLoader.Builder} for data of this type. - */ - public abstract Builder builder(BuilderFactory factory, int expectedCount); - - /** - * Reads the values of the given documents specified in the input block - */ - public abstract BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IOException; - - /** - * Reads the values of the given document into the builder - */ - public abstract void readValuesFromSingleDoc(int docId, Builder builder) throws IOException; + protected abstract int docId(); /** * Checks if the reader can be used to read a range documents starting with the given docID by the current thread. */ - public static boolean canReuse(BlockDocValuesReader reader, int startingDocID) { - return reader != null && reader.creationThread == Thread.currentThread() && reader.docID() <= startingDocID; + @Override + public final boolean canReuse(int startingDocID) { + return creationThread == Thread.currentThread() && docId() <= startingDocID; } - public static BlockLoader booleans(String fieldName) { - return context -> { - SortedNumericDocValues docValues = DocValues.getSortedNumeric(context.reader(), fieldName); - NumericDocValues singleton = DocValues.unwrapSingleton(docValues); - if (singleton != null) { - return new SingletonBooleans(singleton); - } - return new Booleans(docValues); - }; - } + @Override + public abstract String toString(); - public static BlockLoader bytesRefsFromOrds(String fieldName) { - return new BlockLoader() { - @Override - public BlockDocValuesReader reader(LeafReaderContext context) throws IOException { - SortedSetDocValues docValues = ordinals(context); - SortedDocValues singleton = DocValues.unwrapSingleton(docValues); - if (singleton != null) { - return new SingletonOrdinals(singleton); - } - return new Ordinals(docValues); - } + public abstract static class DocValuesBlockLoader implements BlockLoader { + public abstract AllReader reader(LeafReaderContext context) throws IOException; - @Override - public boolean supportsOrdinals() { - return true; - } + @Override + public final ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws IOException { + return reader(context); + } - @Override - public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException { - return DocValues.getSortedSet(context.reader(), fieldName); - } - }; - } + @Override + public final RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException { + return reader(context); + } - /** - * Load {@link BytesRef} values from doc values. Prefer {@link #bytesRefsFromOrds} if - * doc values are indexed with ordinals because that's generally much faster. It's - * possible to use this with field data, but generally should be avoided because field - * data has higher per invocation overhead. - */ - public static BlockLoader bytesRefsFromDocValues(CheckedFunction fieldData) { - return context -> new Bytes(fieldData.apply(context)); - } + @Override + public final StoredFieldsSpec rowStrideStoredFieldSpec() { + return StoredFieldsSpec.NO_REQUIREMENTS; + } - /** - * Convert from the stored {@link long} into the {@link double} to load. - * Sadly, this will go megamorphic pretty quickly and slow us down, - * but it gets the job done for now. - */ - public interface ToDouble { - double convert(long v); - } + @Override + public boolean supportsOrdinals() { + return false; + } - /** - * Load {@code double} values from doc values. - */ - public static BlockLoader doubles(String fieldName, ToDouble toDouble) { - return context -> { - SortedNumericDocValues docValues = DocValues.getSortedNumeric(context.reader(), fieldName); - NumericDocValues singleton = DocValues.unwrapSingleton(docValues); - if (singleton != null) { - return new SingletonDoubles(singleton, toDouble); - } - return new Doubles(docValues, toDouble); - }; + @Override + public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException { + throw new UnsupportedOperationException(); + } } - /** - * Load {@code int} values from doc values. - */ - public static BlockLoader ints(String fieldName) { - return context -> { - SortedNumericDocValues docValues = DocValues.getSortedNumeric(context.reader(), fieldName); - NumericDocValues singleton = DocValues.unwrapSingleton(docValues); - if (singleton != null) { - return new SingletonInts(singleton); - } - return new Ints(docValues); - }; - } + public static class LongsBlockLoader extends DocValuesBlockLoader { + private final String fieldName; - /** - * Load a block of {@code long}s from doc values. - */ - public static BlockLoader longs(String fieldName) { - return context -> { + public LongsBlockLoader(String fieldName) { + this.fieldName = fieldName; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.longs(expectedCount); + } + + @Override + public AllReader reader(LeafReaderContext context) throws IOException { SortedNumericDocValues docValues = DocValues.getSortedNumeric(context.reader(), fieldName); NumericDocValues singleton = DocValues.unwrapSingleton(docValues); if (singleton != null) { return new SingletonLongs(singleton); } return new Longs(docValues); - }; - } - - /** - * Load blocks with only null. - */ - public static BlockLoader nulls() { - return context -> new Nulls(); + } } - @Override - public abstract String toString(); - private static class SingletonLongs extends BlockDocValuesReader { private final NumericDocValues numericDocValues; @@ -188,13 +112,8 @@ private static class SingletonLongs extends BlockDocValuesReader { } @Override - public BlockLoader.LongBuilder builder(BuilderFactory factory, int expectedCount) { - return factory.longsFromDocValues(expectedCount); - } - - @Override - public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IOException { - try (BlockLoader.LongBuilder builder = builder(factory, docs.count())) { + public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException { + try (BlockLoader.LongBuilder builder = factory.longsFromDocValues(docs.count())) { int lastDoc = -1; for (int i = 0; i < docs.count(); i++) { int doc = docs.get(i); @@ -213,7 +132,7 @@ public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IO } @Override - public void readValuesFromSingleDoc(int docId, Builder builder) throws IOException { + public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException { BlockLoader.LongBuilder blockBuilder = (BlockLoader.LongBuilder) builder; if (numericDocValues.advanceExact(docId)) { blockBuilder.appendLong(numericDocValues.longValue()); @@ -223,13 +142,13 @@ public void readValuesFromSingleDoc(int docId, Builder builder) throws IOExcepti } @Override - public int docID() { + public int docId() { return numericDocValues.docID(); } @Override public String toString() { - return "SingletonLongs"; + return "BlockDocValuesReader.SingletonLongs"; } } @@ -242,13 +161,8 @@ private static class Longs extends BlockDocValuesReader { } @Override - public BlockLoader.LongBuilder builder(BuilderFactory factory, int expectedCount) { - return factory.longsFromDocValues(expectedCount); - } - - @Override - public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IOException { - try (BlockLoader.LongBuilder builder = builder(factory, docs.count())) { + public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException { + try (BlockLoader.LongBuilder builder = factory.longsFromDocValues(docs.count())) { for (int i = 0; i < docs.count(); i++) { int doc = docs.get(i); if (doc < this.docID) { @@ -261,7 +175,7 @@ public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IO } @Override - public void readValuesFromSingleDoc(int docId, Builder builder) throws IOException { + public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException { read(docId, (LongBuilder) builder); } @@ -284,14 +198,37 @@ private void read(int doc, LongBuilder builder) throws IOException { } @Override - public int docID() { + public int docId() { // There is a .docID on the numericDocValues but it is often not implemented. return docID; } @Override public String toString() { - return "Longs"; + return "BlockDocValuesReader.Longs"; + } + } + + public static class IntsBlockLoader extends DocValuesBlockLoader { + private final String fieldName; + + public IntsBlockLoader(String fieldName) { + this.fieldName = fieldName; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.ints(expectedCount); + } + + @Override + public AllReader reader(LeafReaderContext context) throws IOException { + SortedNumericDocValues docValues = DocValues.getSortedNumeric(context.reader(), fieldName); + NumericDocValues singleton = DocValues.unwrapSingleton(docValues); + if (singleton != null) { + return new SingletonInts(singleton); + } + return new Ints(docValues); } } @@ -303,13 +240,8 @@ private static class SingletonInts extends BlockDocValuesReader { } @Override - public IntBuilder builder(BuilderFactory factory, int expectedCount) { - return factory.intsFromDocValues(expectedCount); - } - - @Override - public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IOException { - try (BlockLoader.IntBuilder builder = builder(factory, docs.count())) { + public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException { + try (BlockLoader.IntBuilder builder = factory.intsFromDocValues(docs.count())) { int lastDoc = -1; for (int i = 0; i < docs.count(); i++) { int doc = docs.get(i); @@ -328,7 +260,7 @@ public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IO } @Override - public void readValuesFromSingleDoc(int docId, Builder builder) throws IOException { + public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException { IntBuilder blockBuilder = (IntBuilder) builder; if (numericDocValues.advanceExact(docId)) { blockBuilder.appendInt(Math.toIntExact(numericDocValues.longValue())); @@ -338,13 +270,13 @@ public void readValuesFromSingleDoc(int docId, Builder builder) throws IOExcepti } @Override - public int docID() { + public int docId() { return numericDocValues.docID(); } @Override public String toString() { - return "SingletonInts"; + return "BlockDocValuesReader.SingletonInts"; } } @@ -357,13 +289,8 @@ private static class Ints extends BlockDocValuesReader { } @Override - public IntBuilder builder(BuilderFactory factory, int expectedCount) { - return factory.intsFromDocValues(expectedCount); - } - - @Override - public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IOException { - try (BlockLoader.IntBuilder builder = builder(factory, docs.count())) { + public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException { + try (BlockLoader.IntBuilder builder = factory.intsFromDocValues(docs.count())) { for (int i = 0; i < docs.count(); i++) { int doc = docs.get(i); if (doc < this.docID) { @@ -376,7 +303,7 @@ public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IO } @Override - public void readValuesFromSingleDoc(int docId, Builder builder) throws IOException { + public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException { read(docId, (IntBuilder) builder); } @@ -399,14 +326,48 @@ private void read(int doc, IntBuilder builder) throws IOException { } @Override - public int docID() { - // There is a .docID on on the numericDocValues but it is often not implemented. + public int docId() { + // There is a .docID on the numericDocValues but it is often not implemented. return docID; } @Override public String toString() { - return "Ints"; + return "BlockDocValuesReader.Ints"; + } + } + + /** + * Convert from the stored {@link long} into the {@link double} to load. + * Sadly, this will go megamorphic pretty quickly and slow us down, + * but it gets the job done for now. + */ + public interface ToDouble { + double convert(long v); + } + + public static class DoublesBlockLoader extends DocValuesBlockLoader { + private final String fieldName; + private final ToDouble toDouble; + + public DoublesBlockLoader(String fieldName, ToDouble toDouble) { + this.fieldName = fieldName; + this.toDouble = toDouble; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.doubles(expectedCount); + } + + @Override + public AllReader reader(LeafReaderContext context) throws IOException { + SortedNumericDocValues docValues = DocValues.getSortedNumeric(context.reader(), fieldName); + NumericDocValues singleton = DocValues.unwrapSingleton(docValues); + if (singleton != null) { + return new SingletonDoubles(singleton, toDouble); + } + return new Doubles(docValues, toDouble); } } @@ -421,13 +382,8 @@ private static class SingletonDoubles extends BlockDocValuesReader { } @Override - public DoubleBuilder builder(BuilderFactory factory, int expectedCount) { - return factory.doublesFromDocValues(expectedCount); - } - - @Override - public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IOException { - try (BlockLoader.DoubleBuilder builder = builder(factory, docs.count())) { + public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException { + try (BlockLoader.DoubleBuilder builder = factory.doublesFromDocValues(docs.count())) { int lastDoc = -1; for (int i = 0; i < docs.count(); i++) { int doc = docs.get(i); @@ -447,7 +403,7 @@ public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IO } @Override - public void readValuesFromSingleDoc(int docId, Builder builder) throws IOException { + public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException { this.docID = docId; DoubleBuilder blockBuilder = (DoubleBuilder) builder; if (docValues.advanceExact(this.docID)) { @@ -458,13 +414,13 @@ public void readValuesFromSingleDoc(int docId, Builder builder) throws IOExcepti } @Override - public int docID() { + public int docId() { return docID; } @Override public String toString() { - return "SingletonDoubles"; + return "BlockDocValuesReader.SingletonDoubles"; } } @@ -479,13 +435,8 @@ private static class Doubles extends BlockDocValuesReader { } @Override - public DoubleBuilder builder(BuilderFactory factory, int expectedCount) { - return factory.doublesFromDocValues(expectedCount); - } - - @Override - public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IOException { - try (BlockLoader.DoubleBuilder builder = builder(factory, docs.count())) { + public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException { + try (BlockLoader.DoubleBuilder builder = factory.doublesFromDocValues(docs.count())) { for (int i = 0; i < docs.count(); i++) { int doc = docs.get(i); if (doc < this.docID) { @@ -498,7 +449,7 @@ public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IO } @Override - public void readValuesFromSingleDoc(int docId, Builder builder) throws IOException { + public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException { read(docId, (DoubleBuilder) builder); } @@ -521,13 +472,46 @@ private void read(int doc, DoubleBuilder builder) throws IOException { } @Override - public int docID() { + public int docId() { return docID; } @Override public String toString() { - return "Doubles"; + return "BlockDocValuesReader.Doubles"; + } + } + + public static class BytesRefsFromOrdsBlockLoader extends DocValuesBlockLoader { + private final String fieldName; + + public BytesRefsFromOrdsBlockLoader(String fieldName) { + this.fieldName = fieldName; + } + + @Override + public BytesRefBuilder builder(BlockFactory factory, int expectedCount) { + return factory.bytesRefs(expectedCount); + } + + @Override + public AllReader reader(LeafReaderContext context) throws IOException { + SortedSetDocValues docValues = ordinals(context); + SortedDocValues singleton = DocValues.unwrapSingleton(docValues); + if (singleton != null) { + return new SingletonOrdinals(singleton); + } + return new Ordinals(docValues); + } + + @Override + public boolean supportsOrdinals() { + return true; + } + + @Override + public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException { + return DocValues.getSortedSet(context.reader(), fieldName); } } @@ -539,12 +523,7 @@ private static class SingletonOrdinals extends BlockDocValuesReader { } @Override - public BytesRefBuilder builder(BuilderFactory factory, int expectedCount) { - return factory.bytesRefsFromDocValues(expectedCount); - } - - @Override - public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IOException { + public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException { try (BlockLoader.SingletonOrdinalsBuilder builder = factory.singletonOrdinalsBuilder(ordinals, docs.count())) { for (int i = 0; i < docs.count(); i++) { int doc = docs.get(i); @@ -562,8 +541,8 @@ public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IO } @Override - public void readValuesFromSingleDoc(int doc, Builder builder) throws IOException { - if (ordinals.advanceExact(doc)) { + public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException { + if (ordinals.advanceExact(docId)) { ((BytesRefBuilder) builder).appendBytesRef(ordinals.lookupOrd(ordinals.ordValue())); } else { builder.appendNull(); @@ -571,13 +550,13 @@ public void readValuesFromSingleDoc(int doc, Builder builder) throws IOException } @Override - public int docID() { + public int docId() { return ordinals.docID(); } @Override public String toString() { - return "SingletonOrdinals"; + return "BlockDocValuesReader.SingletonOrdinals"; } } @@ -589,13 +568,8 @@ private static class Ordinals extends BlockDocValuesReader { } @Override - public BytesRefBuilder builder(BuilderFactory factory, int expectedCount) { - return factory.bytesRefsFromDocValues(expectedCount); - } - - @Override - public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IOException { - try (BytesRefBuilder builder = builder(factory, docs.count())) { + public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException { + try (BytesRefBuilder builder = factory.bytesRefsFromDocValues(docs.count())) { for (int i = 0; i < docs.count(); i++) { int doc = docs.get(i); if (doc < ordinals.docID()) { @@ -608,12 +582,12 @@ public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IO } @Override - public void readValuesFromSingleDoc(int doc, Builder builder) throws IOException { - read(doc, (BytesRefBuilder) builder); + public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException { + read(docId, (BytesRefBuilder) builder); } - private void read(int doc, BytesRefBuilder builder) throws IOException { - if (false == ordinals.advanceExact(doc)) { + private void read(int docId, BytesRefBuilder builder) throws IOException { + if (false == ordinals.advanceExact(docId)) { builder.appendNull(); return; } @@ -630,32 +604,52 @@ private void read(int doc, BytesRefBuilder builder) throws IOException { } @Override - public int docID() { + public int docId() { return ordinals.docID(); } @Override public String toString() { - return "Ordinals"; + return "BlockDocValuesReader.Ordinals"; } } - private static class Bytes extends BlockDocValuesReader { - private final SortedBinaryDocValues docValues; - private int docID = -1; + public static class BytesRefsFromBinaryBlockLoader extends DocValuesBlockLoader { + private final String fieldName; - Bytes(SortedBinaryDocValues docValues) { - this.docValues = docValues; + public BytesRefsFromBinaryBlockLoader(String fieldName) { + this.fieldName = fieldName; } @Override - public BytesRefBuilder builder(BuilderFactory factory, int expectedCount) { - return factory.bytesRefsFromDocValues(expectedCount); + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.bytesRefs(expectedCount); } @Override - public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IOException { - try (BlockLoader.BytesRefBuilder builder = builder(factory, docs.count())) { + public AllReader reader(LeafReaderContext context) throws IOException { + BinaryDocValues docValues = context.reader().getBinaryDocValues(fieldName); + if (docValues == null) { + return new ConstantNullsReader(); + } + return new BytesRefsFromBinary(docValues); + } + } + + private static class BytesRefsFromBinary extends BlockDocValuesReader { + private final BinaryDocValues docValues; + private final ByteArrayStreamInput in = new ByteArrayStreamInput(); + private final BytesRef scratch = new BytesRef(); + + private int docID = -1; + + BytesRefsFromBinary(BinaryDocValues docValues) { + this.docValues = docValues; + } + + @Override + public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException { + try (BlockLoader.BytesRefBuilder builder = factory.bytesRefs(docs.count())) { for (int i = 0; i < docs.count(); i++) { int doc = docs.get(i); if (doc < docID) { @@ -668,7 +662,7 @@ public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IO } @Override - public void readValuesFromSingleDoc(int docId, Builder builder) throws IOException { + public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException { read(docId, (BytesRefBuilder) builder); } @@ -678,27 +672,59 @@ private void read(int doc, BytesRefBuilder builder) throws IOException { builder.appendNull(); return; } - int count = docValues.docValueCount(); + BytesRef bytes = docValues.binaryValue(); + assert bytes.length > 0; + in.reset(bytes.bytes, bytes.offset, bytes.length); + int count = in.readVInt(); + scratch.bytes = bytes.bytes; + if (count == 1) { - // TODO read ords in ascending order. Buffers and stuff. - builder.appendBytesRef(docValues.nextValue()); + scratch.length = in.readVInt(); + scratch.offset = in.getPosition(); + builder.appendBytesRef(scratch); return; } builder.beginPositionEntry(); for (int v = 0; v < count; v++) { - builder.appendBytesRef(docValues.nextValue()); + scratch.length = in.readVInt(); + scratch.offset = in.getPosition(); + in.setPosition(scratch.offset + scratch.length); + builder.appendBytesRef(scratch); } builder.endPositionEntry(); } @Override - public int docID() { + public int docId() { return docID; } @Override public String toString() { - return "Bytes"; + return "BlockDocValuesReader.Bytes"; + } + } + + public static class BooleansBlockLoader extends DocValuesBlockLoader { + private final String fieldName; + + public BooleansBlockLoader(String fieldName) { + this.fieldName = fieldName; + } + + @Override + public BooleanBuilder builder(BlockFactory factory, int expectedCount) { + return factory.booleans(expectedCount); + } + + @Override + public AllReader reader(LeafReaderContext context) throws IOException { + SortedNumericDocValues docValues = DocValues.getSortedNumeric(context.reader(), fieldName); + NumericDocValues singleton = DocValues.unwrapSingleton(docValues); + if (singleton != null) { + return new SingletonBooleans(singleton); + } + return new Booleans(docValues); } } @@ -710,13 +736,8 @@ private static class SingletonBooleans extends BlockDocValuesReader { } @Override - public BooleanBuilder builder(BuilderFactory factory, int expectedCount) { - return factory.booleansFromDocValues(expectedCount); - } - - @Override - public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IOException { - try (BlockLoader.BooleanBuilder builder = builder(factory, docs.count())) { + public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException { + try (BlockLoader.BooleanBuilder builder = factory.booleansFromDocValues(docs.count())) { int lastDoc = -1; for (int i = 0; i < docs.count(); i++) { int doc = docs.get(i); @@ -735,7 +756,7 @@ public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IO } @Override - public void readValuesFromSingleDoc(int docId, Builder builder) throws IOException { + public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException { BooleanBuilder blockBuilder = (BooleanBuilder) builder; if (numericDocValues.advanceExact(docId)) { blockBuilder.appendBoolean(numericDocValues.longValue() != 0); @@ -745,13 +766,13 @@ public void readValuesFromSingleDoc(int docId, Builder builder) throws IOExcepti } @Override - public int docID() { + public int docId() { return numericDocValues.docID(); } @Override public String toString() { - return "SingletonBooleans"; + return "BlockDocValuesReader.SingletonBooleans"; } } @@ -764,13 +785,8 @@ private static class Booleans extends BlockDocValuesReader { } @Override - public BooleanBuilder builder(BuilderFactory factory, int expectedCount) { - return factory.booleansFromDocValues(expectedCount); - } - - @Override - public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IOException { - try (BlockLoader.BooleanBuilder builder = builder(factory, docs.count())) { + public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException { + try (BlockLoader.BooleanBuilder builder = factory.booleansFromDocValues(docs.count())) { for (int i = 0; i < docs.count(); i++) { int doc = docs.get(i); if (doc < this.docID) { @@ -783,7 +799,7 @@ public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IO } @Override - public void readValuesFromSingleDoc(int docId, Builder builder) throws IOException { + public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException { read(docId, (BooleanBuilder) builder); } @@ -806,61 +822,14 @@ private void read(int doc, BooleanBuilder builder) throws IOException { } @Override - public int docID() { + public int docId() { // There is a .docID on the numericDocValues but it is often not implemented. return docID; } @Override public String toString() { - return "Booleans"; - } - } - - private static class Nulls extends BlockDocValuesReader { - private int docID = -1; - - @Override - public BlockLoader.Builder builder(BuilderFactory factory, int expectedCount) { - return factory.nulls(expectedCount); - } - - @Override - public BlockLoader.Block readValues(BuilderFactory factory, Docs docs) throws IOException { - try (BlockLoader.Builder builder = builder(factory, docs.count())) { - for (int i = 0; i < docs.count(); i++) { - builder.appendNull(); - } - return builder.build(); - } - } - - @Override - public void readValuesFromSingleDoc(int docId, Builder builder) { - this.docID = docId; - builder.appendNull(); - } - - @Override - public int docID() { - return docID; - } - - @Override - public String toString() { - return "Nulls"; - } - } - - /** - * Convert a {@link String} into a utf-8 {@link BytesRef}. - */ - protected static BytesRef toBytesRef(BytesRef scratch, String v) { - int len = UnicodeUtil.maxUTF8Length(v.length()); - if (scratch.bytes.length < len) { - scratch.bytes = new byte[len]; + return "BlockDocValuesReader.Booleans"; } - scratch.length = UnicodeUtil.UTF16toUTF8(v, 0, v.length(), scratch.bytes); - return scratch; } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java index af53ab42d35d9..a8f3b919f33cc 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java @@ -13,8 +13,12 @@ import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.util.BytesRef; import org.elasticsearch.core.Releasable; +import org.elasticsearch.search.fetch.StoredFieldsSpec; +import org.elasticsearch.search.lookup.Source; import java.io.IOException; +import java.util.List; +import java.util.Map; /** * Interface for loading data in a block shape. Instances of this class @@ -22,26 +26,292 @@ */ public interface BlockLoader { /** - * Build a {@link LeafReaderContext leaf} level reader. + * The {@link BlockLoader.Builder} for data of this type. Called when + * loading from a multi-segment or unsorted block. */ - BlockDocValuesReader reader(LeafReaderContext context) throws IOException; + Builder builder(BlockFactory factory, int expectedCount); + + interface Reader { + /** + * Checks if the reader can be used to read a range documents starting with the given docID by the current thread. + */ + boolean canReuse(int startingDocID); + } + + interface ColumnAtATimeReader extends Reader { + /** + * Reads the values of all documents in {@code docs}. + */ + BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException; + } + + interface RowStrideReader extends Reader { + /** + * Reads the values of the given document into the builder. + */ + void read(int docId, StoredFields storedFields, Builder builder) throws IOException; + } + + interface AllReader extends ColumnAtATimeReader, RowStrideReader {} + + interface StoredFields { + Source source(); + + /** + * @return the ID for the current document + */ + String id(); + + /** + * @return the routing path for the current document + */ + String routing(); + + /** + * @return stored fields for the current document + */ + Map> storedFields(); + } + + ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws IOException; + + RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException; + + StoredFieldsSpec rowStrideStoredFieldSpec(); /** * Does this loader support loading bytes via calling {@link #ordinals}. */ - default boolean supportsOrdinals() { - return false; - } + boolean supportsOrdinals(); /** * Load ordinals for the provided context. */ - default SortedSetDocValues ordinals(LeafReaderContext context) throws IOException { - throw new IllegalStateException("ordinals not supported"); + SortedSetDocValues ordinals(LeafReaderContext context) throws IOException; + + /** + * Load blocks with only null. + */ + BlockLoader CONSTANT_NULLS = new BlockLoader() { + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.nulls(expectedCount); + } + + @Override + public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) { + return new ConstantNullsReader(); + } + + @Override + public RowStrideReader rowStrideReader(LeafReaderContext context) { + return new ConstantNullsReader(); + } + + @Override + public StoredFieldsSpec rowStrideStoredFieldSpec() { + return StoredFieldsSpec.NO_REQUIREMENTS; + } + + @Override + public boolean supportsOrdinals() { + return false; + } + + @Override + public SortedSetDocValues ordinals(LeafReaderContext context) { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() { + return "ConstantNull"; + } + }; + + /** + * Implementation of {@link ColumnAtATimeReader} and {@link RowStrideReader} that always + * loads {@code null}. + */ + class ConstantNullsReader implements AllReader { + @Override + public Block read(BlockFactory factory, Docs docs) throws IOException { + return factory.constantNulls(docs.count()); + } + + @Override + public void read(int docId, StoredFields storedFields, Builder builder) throws IOException { + builder.appendNull(); + } + + @Override + public boolean canReuse(int startingDocID) { + return true; + } + + @Override + public String toString() { + return "constant_nulls"; + } } /** - * A list of documents to load. + * Load blocks with only {@code value}. + */ + static BlockLoader constantBytes(BytesRef value) { + return new BlockLoader() { + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.bytesRefs(expectedCount); + } + + @Override + public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) { + return new ColumnAtATimeReader() { + @Override + public Block read(BlockFactory factory, Docs docs) { + return factory.constantBytes(value, docs.count()); + } + + @Override + public boolean canReuse(int startingDocID) { + return true; + } + + @Override + public String toString() { + return "constant[" + value + "]"; + } + }; + } + + @Override + public RowStrideReader rowStrideReader(LeafReaderContext context) { + return new RowStrideReader() { + @Override + public void read(int docId, StoredFields storedFields, Builder builder) { + ((BlockLoader.BytesRefBuilder) builder).appendBytesRef(value); + } + + @Override + public boolean canReuse(int startingDocID) { + return true; + } + + @Override + public String toString() { + return "constant[" + value + "]"; + } + }; + } + + @Override + public StoredFieldsSpec rowStrideStoredFieldSpec() { + return StoredFieldsSpec.NO_REQUIREMENTS; + } + + @Override + public boolean supportsOrdinals() { + return false; + } + + @Override + public SortedSetDocValues ordinals(LeafReaderContext context) { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() { + return "ConstantBytes[" + value + "]"; + } + }; + } + + abstract class Delegating implements BlockLoader { + protected final BlockLoader delegate; + + protected Delegating(BlockLoader delegate) { + this.delegate = delegate; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return delegate.builder(factory, expectedCount); + } + + @Override + public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws IOException { + ColumnAtATimeReader reader = delegate.columnAtATimeReader(context); + if (reader == null) { + return null; + } + return new ColumnAtATimeReader() { + @Override + public Block read(BlockFactory factory, Docs docs) throws IOException { + return reader.read(factory, docs); + } + + @Override + public boolean canReuse(int startingDocID) { + return reader.canReuse(startingDocID); + } + + @Override + public String toString() { + return "Delegating[to=" + delegatingTo() + ", impl=" + reader + "]"; + } + }; + } + + @Override + public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException { + RowStrideReader reader = delegate.rowStrideReader(context); + if (reader == null) { + return null; + } + return new RowStrideReader() { + @Override + public void read(int docId, StoredFields storedFields, Builder builder) throws IOException { + reader.read(docId, storedFields, builder); + } + + @Override + public boolean canReuse(int startingDocID) { + return reader.canReuse(startingDocID); + } + + @Override + public String toString() { + return "Delegating[to=" + delegatingTo() + ", impl=" + reader + "]"; + } + }; + } + + @Override + public StoredFieldsSpec rowStrideStoredFieldSpec() { + return delegate.rowStrideStoredFieldSpec(); + } + + @Override + public boolean supportsOrdinals() { + return delegate.supportsOrdinals(); + } + + @Override + public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException { + return delegate.ordinals(context); + } + + protected abstract String delegatingTo(); + + @Override + public final String toString() { + return "Delegating[to=" + delegatingTo() + ", impl=" + delegate + "]"; + } + } + + /** + * A list of documents to load. Documents are always in non-decreasing order. */ interface Docs { int count(); @@ -55,7 +325,7 @@ interface Docs { * production code. That implementation sits in the "compute" project. The is * also a test implementation, but there may be no more other implementations. */ - interface BuilderFactory { + interface BlockFactory { /** * Build a builder to load booleans as loaded from doc values. Doc values * load booleans deduplicated and in sorted order. @@ -112,11 +382,21 @@ interface BuilderFactory { LongBuilder longs(int expectedCount); /** - * Build a builder that can only load null values. - * TODO this should return a block directly instead of a builder + * Build a builder to load only {@code null}s. */ Builder nulls(int expectedCount); + /** + * Build a block that contains only {@code null}. + */ + Block constantNulls(int size); + + /** + * Build a block that contains {@code value} repeated + * {@code size} times. + */ + Block constantBytes(BytesRef value, int size); + /** * Build a reader for reading keyword ordinals. */ @@ -129,7 +409,7 @@ interface BuilderFactory { * Marker interface for block results. The compute engine has a fleshed * out implementation. */ - interface Block {} + interface Block extends Releasable {} /** * A builder for typed values. For each document you may either call diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockLoaderStoredFieldsFromLeafLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockLoaderStoredFieldsFromLeafLoader.java new file mode 100644 index 0000000000000..8b1b794f1df55 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockLoaderStoredFieldsFromLeafLoader.java @@ -0,0 +1,54 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.mapper; + +import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; +import org.elasticsearch.search.lookup.Source; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +public class BlockLoaderStoredFieldsFromLeafLoader implements BlockLoader.StoredFields { + private final LeafStoredFieldLoader loader; + private final boolean loadSource; + private Source source; + + public BlockLoaderStoredFieldsFromLeafLoader(LeafStoredFieldLoader loader, boolean loadSource) { + this.loader = loader; + this.loadSource = loadSource; + } + + public void advanceTo(int doc) throws IOException { + loader.advanceTo(doc); + if (loadSource) { + source = Source.fromBytes(loader.source()); + } + } + + @Override + public Source source() { + return source; + } + + @Override + public String id() { + return loader.id(); + } + + @Override + public String routing() { + return loader.routing(); + } + + @Override + public Map> storedFields() { + return loader.storedFields(); + } +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockSourceReader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockSourceReader.java index 1261a3612d3cb..289b28949cdab 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BlockSourceReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockSourceReader.java @@ -8,172 +8,32 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; -import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; -import org.elasticsearch.search.lookup.Source; +import org.apache.lucene.util.UnicodeUtil; +import org.elasticsearch.search.fetch.StoredFieldsSpec; import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.Set; /** * Loads values from {@code _source}. This whole process is very slow and cast-tastic, * so it doesn't really try to avoid megamorphic invocations. It's just going to be * slow. - * - * Note that this extends {@link BlockDocValuesReader} because it pretends to load - * doc values because, for now, ESQL only knows how to load things in a doc values - * order. */ -public abstract class BlockSourceReader extends BlockDocValuesReader { - /** - * Read {@code boolean}s from {@code _source}. - */ - public static BlockLoader booleans(ValueFetcher fetcher) { - StoredFieldLoader loader = StoredFieldLoader.create(true, Set.of()); - return context -> new BlockSourceReader(fetcher, loader.getLoader(context, null)) { - @Override - public BlockLoader.Builder builder(BlockLoader.BuilderFactory factory, int expectedCount) { - return factory.booleans(expectedCount); - } - - @Override - protected void append(BlockLoader.Builder builder, Object v) { - ((BlockLoader.BooleanBuilder) builder).appendBoolean((Boolean) v); - } - - @Override - public String toString() { - return "SourceBooleans"; - } - }; - } - - /** - * Read {@link BytesRef}s from {@code _source}. - */ - public static BlockLoader bytesRefs(ValueFetcher fetcher) { - StoredFieldLoader loader = StoredFieldLoader.create(true, Set.of()); - return context -> new BlockSourceReader(fetcher, loader.getLoader(context, null)) { - BytesRef scratch = new BytesRef(); - - @Override - public BlockLoader.Builder builder(BlockLoader.BuilderFactory factory, int expectedCount) { - return factory.bytesRefs(expectedCount); - } - - @Override - protected void append(BlockLoader.Builder builder, Object v) { - ((BlockLoader.BytesRefBuilder) builder).appendBytesRef(toBytesRef(scratch, (String) v)); - } - - @Override - public String toString() { - return "SourceBytes"; - } - }; - } - - /** - * Read {@code double}s from {@code _source}. - */ - public static BlockLoader doubles(ValueFetcher fetcher) { - StoredFieldLoader loader = StoredFieldLoader.create(true, Set.of()); - return context -> new BlockSourceReader(fetcher, loader.getLoader(context, null)) { - @Override - public BlockLoader.Builder builder(BlockLoader.BuilderFactory factory, int expectedCount) { - return factory.doubles(expectedCount); - } - - @Override - protected void append(BlockLoader.Builder builder, Object v) { - ((BlockLoader.DoubleBuilder) builder).appendDouble(((Number) v).doubleValue()); - } - - @Override - public String toString() { - return "SourceDoubles"; - } - }; - } - - /** - * Read {@code int}s from {@code _source}. - */ - public static BlockLoader ints(ValueFetcher fetcher) { - StoredFieldLoader loader = StoredFieldLoader.create(true, Set.of()); - return context -> new BlockSourceReader(fetcher, loader.getLoader(context, null)) { - @Override - public BlockLoader.Builder builder(BlockLoader.BuilderFactory factory, int expectedCount) { - return factory.ints(expectedCount); - } - - @Override - protected void append(BlockLoader.Builder builder, Object v) { - ((BlockLoader.IntBuilder) builder).appendInt(((Number) v).intValue()); - } - - @Override - public String toString() { - return "SourceInts"; - } - }; - } - - /** - * Read {@code long}s from {@code _source}. - */ - public static BlockLoader longs(ValueFetcher fetcher) { - StoredFieldLoader loader = StoredFieldLoader.create(true, Set.of()); - return context -> new BlockSourceReader(fetcher, loader.getLoader(context, null)) { - @Override - public BlockLoader.Builder builder(BlockLoader.BuilderFactory factory, int expectedCount) { - return factory.longs(expectedCount); - } - - @Override - protected void append(BlockLoader.Builder builder, Object v) { - ((BlockLoader.LongBuilder) builder).appendLong(((Number) v).longValue()); - } - - @Override - public String toString() { - return "SourceLongs"; - } - }; - } - +public abstract class BlockSourceReader implements BlockLoader.RowStrideReader { private final ValueFetcher fetcher; - private final LeafStoredFieldLoader loader; private final List ignoredValues = new ArrayList<>(); - private int docID = -1; - BlockSourceReader(ValueFetcher fetcher, LeafStoredFieldLoader loader) { + BlockSourceReader(ValueFetcher fetcher) { this.fetcher = fetcher; - this.loader = loader; - } - - @Override - public BlockLoader.Block readValues(BlockLoader.BuilderFactory factory, BlockLoader.Docs docs) throws IOException { - try (BlockLoader.Builder builder = builder(factory, docs.count())) { - for (int i = 0; i < docs.count(); i++) { - int doc = docs.get(i); - if (doc < this.docID) { - throw new IllegalStateException("docs within same block must be in order"); - } - readValuesFromSingleDoc(doc, builder); - } - return builder.build(); - } } @Override - public void readValuesFromSingleDoc(int doc, BlockLoader.Builder builder) throws IOException { - this.docID = doc; - loader.advanceTo(doc); - List values = fetcher.fetchValues(Source.fromBytes(loader.source()), doc, ignoredValues); + public final void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException { + List values = fetcher.fetchValues(storedFields.source(), docId, ignoredValues); ignoredValues.clear(); // TODO do something with these? if (values == null) { builder.appendNull(); @@ -193,7 +53,213 @@ public void readValuesFromSingleDoc(int doc, BlockLoader.Builder builder) throws protected abstract void append(BlockLoader.Builder builder, Object v); @Override - public int docID() { - return docID; + public boolean canReuse(int startingDocID) { + return true; + } + + private abstract static class SourceBlockLoader implements BlockLoader { + @Override + public final ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws IOException { + return null; + } + + @Override + public final StoredFieldsSpec rowStrideStoredFieldSpec() { + return StoredFieldsSpec.NEEDS_SOURCE; + } + + @Override + public final boolean supportsOrdinals() { + return false; + } + + @Override + public final SortedSetDocValues ordinals(LeafReaderContext context) { + throw new UnsupportedOperationException(); + } + } + + public static class BooleansBlockLoader extends SourceBlockLoader { + private final ValueFetcher fetcher; + + public BooleansBlockLoader(ValueFetcher fetcher) { + this.fetcher = fetcher; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.booleans(expectedCount); + } + + @Override + public RowStrideReader rowStrideReader(LeafReaderContext context) { + return new Booleans(fetcher); + } + } + + private static class Booleans extends BlockSourceReader { + Booleans(ValueFetcher fetcher) { + super(fetcher); + } + + @Override + protected void append(BlockLoader.Builder builder, Object v) { + ((BlockLoader.BooleanBuilder) builder).appendBoolean((Boolean) v); + } + + @Override + public String toString() { + return "BlockSourceReader.Booleans"; + } + } + + public static class BytesRefsBlockLoader extends SourceBlockLoader { + private final ValueFetcher fetcher; + + public BytesRefsBlockLoader(ValueFetcher fetcher) { + this.fetcher = fetcher; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.bytesRefs(expectedCount); + } + + @Override + public RowStrideReader rowStrideReader(LeafReaderContext context) { + return new BytesRefs(fetcher); + } + } + + private static class BytesRefs extends BlockSourceReader { + BytesRef scratch = new BytesRef(); + + BytesRefs(ValueFetcher fetcher) { + super(fetcher); + } + + @Override + protected void append(BlockLoader.Builder builder, Object v) { + ((BlockLoader.BytesRefBuilder) builder).appendBytesRef(toBytesRef(scratch, (String) v)); + } + + @Override + public String toString() { + return "BlockSourceReader.Bytes"; + } + } + + public static class DoublesBlockLoader extends SourceBlockLoader { + private final ValueFetcher fetcher; + + public DoublesBlockLoader(ValueFetcher fetcher) { + this.fetcher = fetcher; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.doubles(expectedCount); + } + + @Override + public RowStrideReader rowStrideReader(LeafReaderContext context) { + return new Doubles(fetcher); + } + } + + private static class Doubles extends BlockSourceReader { + Doubles(ValueFetcher fetcher) { + super(fetcher); + } + + @Override + protected void append(BlockLoader.Builder builder, Object v) { + ((BlockLoader.DoubleBuilder) builder).appendDouble(((Number) v).doubleValue()); + } + + @Override + public String toString() { + return "BlockSourceReader.Doubles"; + } + } + + public static class IntsBlockLoader extends SourceBlockLoader { + private final ValueFetcher fetcher; + + public IntsBlockLoader(ValueFetcher fetcher) { + this.fetcher = fetcher; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.ints(expectedCount); + } + + @Override + public RowStrideReader rowStrideReader(LeafReaderContext context) { + return new Ints(fetcher); + } + } + + private static class Ints extends BlockSourceReader { + Ints(ValueFetcher fetcher) { + super(fetcher); + } + + @Override + protected void append(BlockLoader.Builder builder, Object v) { + ((BlockLoader.IntBuilder) builder).appendInt(((Number) v).intValue()); + } + + @Override + public String toString() { + return "BlockSourceReader.Ints"; + } + } + + public static class LongsBlockLoader extends SourceBlockLoader { + private final ValueFetcher fetcher; + + public LongsBlockLoader(ValueFetcher fetcher) { + this.fetcher = fetcher; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.longs(expectedCount); + } + + @Override + public RowStrideReader rowStrideReader(LeafReaderContext context) { + return new Longs(fetcher); + } + } + + private static class Longs extends BlockSourceReader { + Longs(ValueFetcher fetcher) { + super(fetcher); + } + + @Override + protected void append(BlockLoader.Builder builder, Object v) { + ((BlockLoader.LongBuilder) builder).appendLong(((Number) v).longValue()); + } + + @Override + public String toString() { + return "BlockSourceReader.Longs"; + } + } + + /** + * Convert a {@link String} into a utf-8 {@link BytesRef}. + */ + static BytesRef toBytesRef(BytesRef scratch, String v) { + int len = UnicodeUtil.maxUTF8Length(v.length()); + if (scratch.bytes.length < len) { + scratch.bytes = new byte[len]; + } + scratch.length = UnicodeUtil.UTF16toUTF8(v, 0, v.length(), scratch.bytes); + return scratch; } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockStoredFieldsReader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockStoredFieldsReader.java index 5984482fd9441..043ca38b1c78b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BlockStoredFieldsReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockStoredFieldsReader.java @@ -9,10 +9,11 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; -import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; import org.elasticsearch.index.mapper.BlockLoader.BytesRefBuilder; +import org.elasticsearch.search.fetch.StoredFieldsSpec; import java.io.IOException; import java.util.List; @@ -27,86 +28,101 @@ * doc values because, for now, ESQL only knows how to load things in a doc values * order. */ -public abstract class BlockStoredFieldsReader extends BlockDocValuesReader { - public static BlockLoader bytesRefsFromBytesRefs(String field) { - StoredFieldLoader loader = StoredFieldLoader.create(false, Set.of(field)); - return context -> new Bytes(loader.getLoader(context, null), field) { - @Override - protected BytesRef toBytesRef(Object v) { - return (BytesRef) v; - } - }; +public abstract class BlockStoredFieldsReader implements BlockLoader.RowStrideReader { + @Override + public boolean canReuse(int startingDocID) { + return true; } - public static BlockLoader bytesRefsFromStrings(String field) { - StoredFieldLoader loader = StoredFieldLoader.create(false, Set.of(field)); - return context -> new Bytes(loader.getLoader(context, null), field) { - private final BytesRef scratch = new BytesRef(); + private abstract static class StoredFieldsBlockLoader implements BlockLoader { + protected final String field; - @Override - protected BytesRef toBytesRef(Object v) { - return toBytesRef(scratch, (String) v); - } - }; - } + StoredFieldsBlockLoader(String field) { + this.field = field; + } - public static BlockLoader id() { - StoredFieldLoader loader = StoredFieldLoader.create(false, Set.of(IdFieldMapper.NAME)); - return context -> new Id(loader.getLoader(context, null)); - } + @Override + public final ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws IOException { + return null; + } - private final LeafStoredFieldLoader loader; - private int docID = -1; + @Override + public final StoredFieldsSpec rowStrideStoredFieldSpec() { + return new StoredFieldsSpec(false, false, Set.of(field)); + } - protected BlockStoredFieldsReader(LeafStoredFieldLoader loader) { - this.loader = loader; - } + @Override + public final boolean supportsOrdinals() { + return false; + } - @Override - public final BlockLoader.Block readValues(BlockLoader.BuilderFactory factory, BlockLoader.Docs docs) throws IOException { - try (BlockLoader.Builder builder = builder(factory, docs.count())) { - for (int i = 0; i < docs.count(); i++) { - readValuesFromSingleDoc(docs.get(i), builder); - } - return builder.build(); + @Override + public final SortedSetDocValues ordinals(LeafReaderContext context) { + throw new UnsupportedOperationException(); } } - @Override - public final void readValuesFromSingleDoc(int docId, BlockLoader.Builder builder) throws IOException { - if (docId < this.docID) { - throw new IllegalStateException("docs within same block must be in order"); + /** + * Load {@link BytesRef} blocks from stored {@link BytesRef}s. + */ + public static class BytesFromBytesRefsBlockLoader extends StoredFieldsBlockLoader { + public BytesFromBytesRefsBlockLoader(String field) { + super(field); + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.bytesRefs(expectedCount); + } + + @Override + public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException { + return new Bytes(field) { + @Override + protected BytesRef toBytesRef(Object v) { + return (BytesRef) v; + } + }; } - this.docID = docId; - loader.advanceTo(docId); - read(loader, builder); } - protected abstract void read(LeafStoredFieldLoader loader, BlockLoader.Builder builder) throws IOException; + /** + * Load {@link BytesRef} blocks from stored {@link String}s. + */ + public static class BytesFromStringsBlockLoader extends StoredFieldsBlockLoader { + public BytesFromStringsBlockLoader(String field) { + super(field); + } - @Override - public final int docID() { - return docID; + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.bytesRefs(expectedCount); + } + + @Override + public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException { + return new Bytes(field) { + private final BytesRef scratch = new BytesRef(); + + @Override + protected BytesRef toBytesRef(Object v) { + return BlockSourceReader.toBytesRef(scratch, (String) v); + } + }; + } } private abstract static class Bytes extends BlockStoredFieldsReader { private final String field; - Bytes(LeafStoredFieldLoader loader, String field) { - super(loader); + Bytes(String field) { this.field = field; } - @Override - public BytesRefBuilder builder(BlockLoader.BuilderFactory factory, int expectedCount) { - return factory.bytesRefs(expectedCount); - } - protected abstract BytesRef toBytesRef(Object v); @Override - protected void read(LeafStoredFieldLoader loader, BlockLoader.Builder builder) throws IOException { - List values = loader.storedFields().get(field); + public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException { + List values = storedFields.storedFields().get(field); if (values == null) { builder.appendNull(); return; @@ -128,21 +144,31 @@ public String toString() { } } - private static class Id extends BlockStoredFieldsReader { - private final BytesRef scratch = new BytesRef(); - - Id(LeafStoredFieldLoader loader) { - super(loader); + /** + * Load {@link BytesRef} blocks from stored {@link String}s. + */ + public static class IdBlockLoader extends StoredFieldsBlockLoader { + public IdBlockLoader() { + super(IdFieldMapper.NAME); } @Override - public BlockLoader.BytesRefBuilder builder(BlockLoader.BuilderFactory factory, int expectedCount) { + public Builder builder(BlockFactory factory, int expectedCount) { return factory.bytesRefs(expectedCount); } @Override - protected void read(LeafStoredFieldLoader loader, BlockLoader.Builder builder) throws IOException { - ((BytesRefBuilder) builder).appendBytesRef(toBytesRef(scratch, loader.id())); + public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException { + return new Id(); + } + } + + private static class Id extends BlockStoredFieldsReader { + private final BytesRef scratch = new BytesRef(); + + @Override + public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException { + ((BytesRefBuilder) builder).appendBytesRef(BlockSourceReader.toBytesRef(scratch, storedFields.id())); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java index a5793df3b82e0..7f175982dc28e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java @@ -257,9 +257,9 @@ public Boolean valueForDisplay(Object value) { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { if (hasDocValues()) { - return BlockDocValuesReader.booleans(name()); + return new BlockDocValuesReader.BooleansBlockLoader(name()); } - return BlockSourceReader.booleans(sourceValueFetcher(blContext.sourcePaths(name()))); + return new BlockSourceReader.BooleansBlockLoader(sourceValueFetcher(blContext.sourcePaths(name()))); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BooleanScriptBlockDocValuesReader.java b/server/src/main/java/org/elasticsearch/index/mapper/BooleanScriptBlockDocValuesReader.java index b59df56791fbe..953e13dc69eb0 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BooleanScriptBlockDocValuesReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BooleanScriptBlockDocValuesReader.java @@ -8,14 +8,31 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.index.LeafReaderContext; import org.elasticsearch.script.BooleanFieldScript; +import java.io.IOException; + /** * {@link BlockDocValuesReader} implementation for {@code boolean} scripts. */ public class BooleanScriptBlockDocValuesReader extends BlockDocValuesReader { - public static BlockLoader blockLoader(BooleanFieldScript.LeafFactory factory) { - return context -> new BooleanScriptBlockDocValuesReader(factory.newInstance(context)); + static class BooleanScriptBlockLoader extends DocValuesBlockLoader { + private final BooleanFieldScript.LeafFactory factory; + + BooleanScriptBlockLoader(BooleanFieldScript.LeafFactory factory) { + this.factory = factory; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.booleans(expectedCount); + } + + @Override + public AllReader reader(LeafReaderContext context) throws IOException { + return new BooleanScriptBlockDocValuesReader(factory.newInstance(context)); + } } private final BooleanFieldScript script; @@ -26,19 +43,14 @@ public static BlockLoader blockLoader(BooleanFieldScript.LeafFactory factory) { } @Override - public int docID() { + public int docId() { return docId; } @Override - public BlockLoader.BooleanBuilder builder(BlockLoader.BuilderFactory factory, int expectedCount) { + public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs) throws IOException { // Note that we don't emit falses before trues so we conform to the doc values contract and can use booleansFromDocValues - return factory.booleansFromDocValues(expectedCount); - } - - @Override - public BlockLoader.Block readValues(BlockLoader.BuilderFactory factory, BlockLoader.Docs docs) { - try (BlockLoader.BooleanBuilder builder = builder(factory, docs.count())) { + try (BlockLoader.BooleanBuilder builder = factory.booleans(docs.count())) { for (int i = 0; i < docs.count(); i++) { read(docs.get(i), builder); } @@ -47,7 +59,7 @@ public BlockLoader.Block readValues(BlockLoader.BuilderFactory factory, BlockLoa } @Override - public void readValuesFromSingleDoc(int docId, BlockLoader.Builder builder) { + public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException { this.docId = docId; read(docId, (BlockLoader.BooleanBuilder) builder); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BooleanScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/BooleanScriptFieldType.java index 6e3876644567f..749bb279cfed4 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BooleanScriptFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BooleanScriptFieldType.java @@ -112,7 +112,7 @@ public DocValueFormat docValueFormat(String format, ZoneId timeZone) { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { - return BooleanScriptBlockDocValuesReader.blockLoader(leafFactory(blContext.lookup())); + return new BooleanScriptBlockDocValuesReader.BooleanScriptBlockLoader(leafFactory(blContext.lookup())); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java index 9d12fc6910d66..e90bea103c4cb 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java @@ -775,9 +775,9 @@ public Function pointReaderIfPossible() { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { if (hasDocValues()) { - return BlockDocValuesReader.longs(name()); + return new BlockDocValuesReader.LongsBlockLoader(name()); } - return BlockSourceReader.longs(sourceValueFetcher(blContext.sourcePaths(name()))); + return new BlockSourceReader.LongsBlockLoader(sourceValueFetcher(blContext.sourcePaths(name()))); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DateScriptBlockDocValuesReader.java b/server/src/main/java/org/elasticsearch/index/mapper/DateScriptBlockDocValuesReader.java index ad630a71870a4..a5303f27573eb 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DateScriptBlockDocValuesReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DateScriptBlockDocValuesReader.java @@ -8,14 +8,31 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.index.LeafReaderContext; import org.elasticsearch.script.DateFieldScript; +import java.io.IOException; + /** * {@link BlockDocValuesReader} implementation for date scripts. */ public class DateScriptBlockDocValuesReader extends BlockDocValuesReader { - public static BlockLoader blockLoader(DateFieldScript.LeafFactory factory) { - return context -> new DateScriptBlockDocValuesReader(factory.newInstance(context)); + static class DateScriptBlockLoader extends DocValuesBlockLoader { + private final DateFieldScript.LeafFactory factory; + + DateScriptBlockLoader(DateFieldScript.LeafFactory factory) { + this.factory = factory; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.longs(expectedCount); + } + + @Override + public AllReader reader(LeafReaderContext context) throws IOException { + return new DateScriptBlockDocValuesReader(factory.newInstance(context)); + } } private final DateFieldScript script; @@ -26,18 +43,14 @@ public static BlockLoader blockLoader(DateFieldScript.LeafFactory factory) { } @Override - public int docID() { + public int docId() { return docId; } @Override - public BlockLoader.LongBuilder builder(BlockLoader.BuilderFactory factory, int expectedCount) { - return factory.longs(expectedCount); // Note that we don't pre-sort our output so we can't use longsFromDocValues - } - - @Override - public BlockLoader.Block readValues(BlockLoader.BuilderFactory factory, BlockLoader.Docs docs) { - try (BlockLoader.LongBuilder builder = builder(factory, docs.count())) { + public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs) throws IOException { + // Note that we don't sort the values sort, so we can't use factory.longsFromDocValues + try (BlockLoader.LongBuilder builder = factory.longs(docs.count())) { for (int i = 0; i < docs.count(); i++) { read(docs.get(i), builder); } @@ -46,7 +59,7 @@ public BlockLoader.Block readValues(BlockLoader.BuilderFactory factory, BlockLoa } @Override - public void readValuesFromSingleDoc(int docId, BlockLoader.Builder builder) { + public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException { this.docId = docId; read(docId, (BlockLoader.LongBuilder) builder); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DateScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/DateScriptFieldType.java index 8252d571dce68..238f7488f6b54 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DateScriptFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DateScriptFieldType.java @@ -181,7 +181,7 @@ public DocValueFormat docValueFormat(@Nullable String format, ZoneId timeZone) { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { - return DateScriptBlockDocValuesReader.blockLoader(leafFactory(blContext.lookup())); + return new DateScriptBlockDocValuesReader.DateScriptBlockLoader(leafFactory(blContext.lookup())); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DoubleScriptBlockDocValuesReader.java b/server/src/main/java/org/elasticsearch/index/mapper/DoubleScriptBlockDocValuesReader.java index 4e317a3ed11cb..a98f5ff661a78 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DoubleScriptBlockDocValuesReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DoubleScriptBlockDocValuesReader.java @@ -8,14 +8,31 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.index.LeafReaderContext; import org.elasticsearch.script.DoubleFieldScript; +import java.io.IOException; + /** * {@link BlockDocValuesReader} implementation for {@code double} scripts. */ public class DoubleScriptBlockDocValuesReader extends BlockDocValuesReader { - public static BlockLoader blockLoader(DoubleFieldScript.LeafFactory factory) { - return context -> new DoubleScriptBlockDocValuesReader(factory.newInstance(context)); + static class DoubleScriptBlockLoader extends DocValuesBlockLoader { + private final DoubleFieldScript.LeafFactory factory; + + DoubleScriptBlockLoader(DoubleFieldScript.LeafFactory factory) { + this.factory = factory; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.doubles(expectedCount); + } + + @Override + public AllReader reader(LeafReaderContext context) throws IOException { + return new DoubleScriptBlockDocValuesReader(factory.newInstance(context)); + } } private final DoubleFieldScript script; @@ -26,18 +43,14 @@ public static BlockLoader blockLoader(DoubleFieldScript.LeafFactory factory) { } @Override - public int docID() { + public int docId() { return docId; } @Override - public BlockLoader.DoubleBuilder builder(BlockLoader.BuilderFactory factory, int expectedCount) { - return factory.doubles(expectedCount); // Note that we don't pre-sort our output so we can't use doublesFromDocValues - } - - @Override - public BlockLoader.Block readValues(BlockLoader.BuilderFactory factory, BlockLoader.Docs docs) { - try (BlockLoader.DoubleBuilder builder = builder(factory, docs.count())) { + public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs) throws IOException { + // Note that we don't sort the values sort, so we can't use factory.doublesFromDocValues + try (BlockLoader.DoubleBuilder builder = factory.doubles(docs.count())) { for (int i = 0; i < docs.count(); i++) { read(docs.get(i), builder); } @@ -46,7 +59,7 @@ public BlockLoader.Block readValues(BlockLoader.BuilderFactory factory, BlockLoa } @Override - public void readValuesFromSingleDoc(int docId, BlockLoader.Builder builder) { + public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException { this.docId = docId; read(docId, (BlockLoader.DoubleBuilder) builder); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DoubleScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/DoubleScriptFieldType.java index ef5c112ef212a..c3f7e782c219a 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DoubleScriptFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DoubleScriptFieldType.java @@ -107,7 +107,7 @@ public DocValueFormat docValueFormat(String format, ZoneId timeZone) { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { - return DoubleScriptBlockDocValuesReader.blockLoader(leafFactory(blContext.lookup())); + return new DoubleScriptBlockDocValuesReader.DoubleScriptBlockLoader(leafFactory(blContext.lookup())); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IndexFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IndexFieldMapper.java index 5f987fd96ca66..1b2667fe9d2ea 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IndexFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IndexFieldMapper.java @@ -80,42 +80,7 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { - // TODO build a constant block directly - BytesRef bytes = new BytesRef(blContext.indexName()); - return context -> new BlockDocValuesReader() { - private int docId; - - @Override - public int docID() { - return docId; - } - - @Override - public BlockLoader.BytesRefBuilder builder(BlockLoader.BuilderFactory factory, int expectedCount) { - return factory.bytesRefs(expectedCount); - } - - @Override - public BlockLoader.Block readValues(BlockLoader.BuilderFactory factory, BlockLoader.Docs docs) { - try (BlockLoader.BytesRefBuilder builder = builder(factory, docs.count())) { - for (int i = 0; i < docs.count(); i++) { - builder.appendBytesRef(bytes); - } - return builder.build(); - } - } - - @Override - public void readValuesFromSingleDoc(int docId, BlockLoader.Builder builder) { - this.docId = docId; - ((BlockLoader.BytesRefBuilder) builder).appendBytesRef(bytes); - } - - @Override - public String toString() { - return "Index"; - } - }; + return BlockLoader.constantBytes(new BytesRef(blContext.indexName())); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java index 80fd384f15fb7..56a50c2dee0aa 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java @@ -408,7 +408,7 @@ public static Query rangeQuery( @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { if (hasDocValues()) { - return BlockDocValuesReader.bytesRefsFromOrds(name()); + return new BlockDocValuesReader.BytesRefsFromOrdsBlockLoader(name()); } return null; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IpScriptBlockDocValuesReader.java b/server/src/main/java/org/elasticsearch/index/mapper/IpScriptBlockDocValuesReader.java index 23229a6533cdb..ff063555ff05d 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IpScriptBlockDocValuesReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IpScriptBlockDocValuesReader.java @@ -8,14 +8,31 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.index.LeafReaderContext; import org.elasticsearch.script.IpFieldScript; +import java.io.IOException; + /** * {@link BlockDocValuesReader} implementation for keyword scripts. */ public class IpScriptBlockDocValuesReader extends BlockDocValuesReader { - public static BlockLoader blockLoader(IpFieldScript.LeafFactory factory) { - return context -> new IpScriptBlockDocValuesReader(factory.newInstance(context)); + static class IpScriptBlockLoader extends DocValuesBlockLoader { + private final IpFieldScript.LeafFactory factory; + + IpScriptBlockLoader(IpFieldScript.LeafFactory factory) { + this.factory = factory; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.bytesRefs(expectedCount); + } + + @Override + public AllReader reader(LeafReaderContext context) throws IOException { + return new IpScriptBlockDocValuesReader(factory.newInstance(context)); + } } private final IpFieldScript script; @@ -26,18 +43,14 @@ public static BlockLoader blockLoader(IpFieldScript.LeafFactory factory) { } @Override - public int docID() { + public int docId() { return docId; } @Override - public BlockLoader.BytesRefBuilder builder(BlockLoader.BuilderFactory factory, int expectedCount) { - return factory.bytesRefs(expectedCount); // Note that we don't pre-sort our output so we can't use bytesRefsFromDocValues - } - - @Override - public BlockLoader.Block readValues(BlockLoader.BuilderFactory factory, BlockLoader.Docs docs) { - try (BlockLoader.BytesRefBuilder builder = builder(factory, docs.count())) { + public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs) throws IOException { + // Note that we don't pre-sort our output so we can't use bytesRefsFromDocValues + try (BlockLoader.BytesRefBuilder builder = factory.bytesRefs(docs.count())) { for (int i = 0; i < docs.count(); i++) { read(docs.get(i), builder); } @@ -46,7 +59,7 @@ public BlockLoader.Block readValues(BlockLoader.BuilderFactory factory, BlockLoa } @Override - public void readValuesFromSingleDoc(int docId, BlockLoader.Builder builder) { + public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException { this.docId = docId; read(docId, (BlockLoader.BytesRefBuilder) builder); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IpScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/IpScriptFieldType.java index 0e56b30e2d5d9..4a64184d5d164 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IpScriptFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IpScriptFieldType.java @@ -211,6 +211,6 @@ private Query cidrQuery(String term, SearchExecutionContext context) { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { - return IpScriptBlockDocValuesReader.blockLoader(leafFactory(blContext.lookup())); + return new IpScriptBlockDocValuesReader.IpScriptBlockLoader(leafFactory(blContext.lookup())); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index f15bb0069570f..caac5b7f3bfe0 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -580,7 +580,7 @@ NamedAnalyzer normalizer() { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { if (hasDocValues()) { - return BlockDocValuesReader.bytesRefsFromOrds(name()); + return new BlockDocValuesReader.BytesRefsFromOrdsBlockLoader(name()); } if (isSyntheticSource) { if (false == isStored()) { @@ -590,9 +590,9 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) { + "] is only supported in synthetic _source index if it creates doc values or stored fields" ); } - return BlockStoredFieldsReader.bytesRefsFromBytesRefs(name()); + return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(name()); } - return BlockSourceReader.bytesRefs(sourceValueFetcher(blContext.sourcePaths(name()))); + return new BlockSourceReader.BytesRefsBlockLoader(sourceValueFetcher(blContext.sourcePaths(name()))); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordScriptBlockDocValuesReader.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordScriptBlockDocValuesReader.java index 6afbcae50d31f..df5ba51755c2a 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordScriptBlockDocValuesReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordScriptBlockDocValuesReader.java @@ -8,15 +8,32 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.util.BytesRefBuilder; import org.elasticsearch.script.StringFieldScript; +import java.io.IOException; + /** * {@link BlockDocValuesReader} implementation for keyword scripts. */ public class KeywordScriptBlockDocValuesReader extends BlockDocValuesReader { - public static BlockLoader blockLoader(StringFieldScript.LeafFactory factory) { - return context -> new KeywordScriptBlockDocValuesReader(factory.newInstance(context)); + static class KeywordScriptBlockLoader extends DocValuesBlockLoader { + private final StringFieldScript.LeafFactory factory; + + KeywordScriptBlockLoader(StringFieldScript.LeafFactory factory) { + this.factory = factory; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.bytesRefs(expectedCount); + } + + @Override + public AllReader reader(LeafReaderContext context) throws IOException { + return new KeywordScriptBlockDocValuesReader(factory.newInstance(context)); + } } private final BytesRefBuilder bytesBuild = new BytesRefBuilder(); @@ -28,18 +45,14 @@ public static BlockLoader blockLoader(StringFieldScript.LeafFactory factory) { } @Override - public int docID() { + public int docId() { return docId; } @Override - public BlockLoader.BytesRefBuilder builder(BlockLoader.BuilderFactory factory, int expectedCount) { - return factory.bytesRefs(expectedCount); // Note that we don't pre-sort our output so we can't use bytesRefsFromDocValues - } - - @Override - public BlockLoader.Block readValues(BlockLoader.BuilderFactory factory, BlockLoader.Docs docs) { - try (BlockLoader.BytesRefBuilder builder = builder(factory, docs.count())) { + public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs) throws IOException { + // Note that we don't pre-sort our output so we can't use bytesRefsFromDocValues + try (BlockLoader.BytesRefBuilder builder = factory.bytesRefs(docs.count())) { for (int i = 0; i < docs.count(); i++) { read(docs.get(i), builder); } @@ -48,7 +61,7 @@ public BlockLoader.Block readValues(BlockLoader.BuilderFactory factory, BlockLoa } @Override - public void readValuesFromSingleDoc(int docId, BlockLoader.Builder builder) { + public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException { this.docId = docId; read(docId, (BlockLoader.BytesRefBuilder) builder); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordScriptFieldType.java index 879a28d4c76c8..188f0ae508fcc 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordScriptFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordScriptFieldType.java @@ -112,7 +112,7 @@ public Object valueForDisplay(Object value) { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { - return KeywordScriptBlockDocValuesReader.blockLoader(leafFactory(blContext.lookup())); + return new KeywordScriptBlockDocValuesReader.KeywordScriptBlockLoader(leafFactory(blContext.lookup())); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/LongScriptBlockDocValuesReader.java b/server/src/main/java/org/elasticsearch/index/mapper/LongScriptBlockDocValuesReader.java index 91c099cd2813b..73ad359147571 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/LongScriptBlockDocValuesReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/LongScriptBlockDocValuesReader.java @@ -8,14 +8,31 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.index.LeafReaderContext; import org.elasticsearch.script.LongFieldScript; +import java.io.IOException; + /** * {@link BlockDocValuesReader} implementation for {@code long} scripts. */ public class LongScriptBlockDocValuesReader extends BlockDocValuesReader { - public static BlockLoader blockLoader(LongFieldScript.LeafFactory factory) { - return context -> new LongScriptBlockDocValuesReader(factory.newInstance(context)); + static class LongScriptBlockLoader extends DocValuesBlockLoader { + private final LongFieldScript.LeafFactory factory; + + LongScriptBlockLoader(LongFieldScript.LeafFactory factory) { + this.factory = factory; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.longs(expectedCount); + } + + @Override + public AllReader reader(LeafReaderContext context) throws IOException { + return new LongScriptBlockDocValuesReader(factory.newInstance(context)); + } } private final LongFieldScript script; @@ -26,18 +43,14 @@ public static BlockLoader blockLoader(LongFieldScript.LeafFactory factory) { } @Override - public int docID() { + public int docId() { return docId; } @Override - public BlockLoader.LongBuilder builder(BlockLoader.BuilderFactory factory, int expectedCount) { - return factory.longs(expectedCount); // Note that we don't pre-sort our output so we can't use longsFromDocValues - } - - @Override - public BlockLoader.Block readValues(BlockLoader.BuilderFactory factory, BlockLoader.Docs docs) { - try (BlockLoader.LongBuilder builder = builder(factory, docs.count())) { + public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs) throws IOException { + // Note that we don't pre-sort our output so we can't use longsFromDocValues + try (BlockLoader.LongBuilder builder = factory.longs(docs.count())) { for (int i = 0; i < docs.count(); i++) { read(docs.get(i), builder); } @@ -46,7 +59,7 @@ public BlockLoader.Block readValues(BlockLoader.BuilderFactory factory, BlockLoa } @Override - public void readValuesFromSingleDoc(int docId, BlockLoader.Builder builder) { + public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException { this.docId = docId; read(docId, (BlockLoader.LongBuilder) builder); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/LongScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/LongScriptFieldType.java index f89babe32d0a9..f099ee3625922 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/LongScriptFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/LongScriptFieldType.java @@ -107,7 +107,7 @@ public DocValueFormat docValueFormat(String format, ZoneId timeZone) { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { - return LongScriptBlockDocValuesReader.blockLoader(leafFactory(blContext.lookup())); + return new LongScriptBlockDocValuesReader.LongScriptBlockLoader(leafFactory(blContext.lookup())); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java index 84e9e84fb8ceb..091e3c61764b0 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java @@ -440,12 +440,12 @@ protected void writeValue(XContentBuilder b, long value) throws IOException { @Override BlockLoader blockLoaderFromDocValues(String fieldName) { - return BlockDocValuesReader.doubles(fieldName, l -> HalfFloatPoint.sortableShortToHalfFloat((short) l)); + return new BlockDocValuesReader.DoublesBlockLoader(fieldName, l -> HalfFloatPoint.sortableShortToHalfFloat((short) l)); } @Override BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher) { - return BlockSourceReader.doubles(sourceValueFetcher); + return new BlockSourceReader.DoublesBlockLoader(sourceValueFetcher); } }, FLOAT("float", NumericType.FLOAT) { @@ -602,12 +602,12 @@ protected void writeValue(XContentBuilder b, long value) throws IOException { @Override BlockLoader blockLoaderFromDocValues(String fieldName) { - return BlockDocValuesReader.doubles(fieldName, l -> NumericUtils.sortableIntToFloat((int) l)); + return new BlockDocValuesReader.DoublesBlockLoader(fieldName, l -> NumericUtils.sortableIntToFloat((int) l)); } @Override BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher) { - return BlockSourceReader.doubles(sourceValueFetcher); + return new BlockSourceReader.DoublesBlockLoader(sourceValueFetcher); } }, DOUBLE("double", NumericType.DOUBLE) { @@ -742,12 +742,12 @@ protected void writeValue(XContentBuilder b, long value) throws IOException { @Override BlockLoader blockLoaderFromDocValues(String fieldName) { - return BlockDocValuesReader.doubles(fieldName, NumericUtils::sortableLongToDouble); + return new BlockDocValuesReader.DoublesBlockLoader(fieldName, NumericUtils::sortableLongToDouble); } @Override BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher) { - return BlockSourceReader.doubles(sourceValueFetcher); + return new BlockSourceReader.DoublesBlockLoader(sourceValueFetcher); } }, BYTE("byte", NumericType.BYTE) { @@ -845,12 +845,12 @@ SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String fieldName, String @Override BlockLoader blockLoaderFromDocValues(String fieldName) { - return BlockDocValuesReader.ints(fieldName); + return new BlockDocValuesReader.IntsBlockLoader(fieldName); } @Override BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher) { - return BlockSourceReader.ints(sourceValueFetcher); + return new BlockSourceReader.IntsBlockLoader(sourceValueFetcher); } }, SHORT("short", NumericType.SHORT) { @@ -944,12 +944,12 @@ SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String fieldName, String @Override BlockLoader blockLoaderFromDocValues(String fieldName) { - return BlockDocValuesReader.ints(fieldName); + return new BlockDocValuesReader.IntsBlockLoader(fieldName); } @Override BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher) { - return BlockSourceReader.ints(sourceValueFetcher); + return new BlockSourceReader.IntsBlockLoader(sourceValueFetcher); } }, INTEGER("integer", NumericType.INT) { @@ -1111,12 +1111,12 @@ SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String fieldName, String @Override BlockLoader blockLoaderFromDocValues(String fieldName) { - return BlockDocValuesReader.ints(fieldName); + return new BlockDocValuesReader.IntsBlockLoader(fieldName); } @Override BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher) { - return BlockSourceReader.ints(sourceValueFetcher); + return new BlockSourceReader.IntsBlockLoader(sourceValueFetcher); } }, LONG("long", NumericType.LONG) { @@ -1248,12 +1248,12 @@ SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String fieldName, String @Override BlockLoader blockLoaderFromDocValues(String fieldName) { - return BlockDocValuesReader.longs(fieldName); + return new BlockDocValuesReader.LongsBlockLoader(fieldName); } @Override BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher) { - return BlockSourceReader.longs(sourceValueFetcher); + return new BlockSourceReader.LongsBlockLoader(sourceValueFetcher); } }; @@ -1656,7 +1656,7 @@ public Function pointReaderIfPossible() { public BlockLoader blockLoader(BlockLoaderContext blContext) { if (indexMode == IndexMode.TIME_SERIES && metricType == TimeSeriesParams.MetricType.COUNTER) { // Counters are not supported by ESQL so we load them in null - return BlockDocValuesReader.nulls(); + return BlockLoader.CONSTANT_NULLS; } if (hasDocValues()) { return type.blockLoaderFromDocValues(name()); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/ProvidedIdFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/ProvidedIdFieldMapper.java index f681d54ebbead..d8a4177ee3211 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/ProvidedIdFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/ProvidedIdFieldMapper.java @@ -119,7 +119,7 @@ public Query termsQuery(Collection values, SearchExecutionContext context) { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { - return BlockStoredFieldsReader.id(); + return new BlockStoredFieldsReader.IdBlockLoader(); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 5a0d9c7c0cf79..420f92cfbf847 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -678,7 +678,7 @@ public TextFieldType( super(name, indexed, stored, false, tsi, meta); fielddata = false; this.isSyntheticSource = isSyntheticSource; - this.syntheticSourceDelegate = syntheticSourceDelegate; + this.syntheticSourceDelegate = syntheticSourceDelegate; // TODO rename to "exactDelegate" or something this.eagerGlobalOrdinals = eagerGlobalOrdinals; this.indexPhrases = indexPhrases; } @@ -939,11 +939,16 @@ public boolean isAggregatable() { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { if (syntheticSourceDelegate != null) { - return syntheticSourceDelegate.blockLoader(blContext); + return new BlockLoader.Delegating(syntheticSourceDelegate.blockLoader(blContext)) { + @Override + protected String delegatingTo() { + return syntheticSourceDelegate.name(); + } + }; } if (isSyntheticSource) { if (isStored()) { - return BlockStoredFieldsReader.bytesRefsFromStrings(name()); + return new BlockStoredFieldsReader.BytesFromStringsBlockLoader(name()); } /* * We *shouldn't fall to this exception. The mapping should be @@ -957,7 +962,7 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) { + "] is not supported because synthetic _source is enabled and we don't have a way to load the fields" ); } - return BlockSourceReader.bytesRefs(SourceValueFetcher.toString(blContext.sourcePaths(name()))); + return new BlockSourceReader.BytesRefsBlockLoader(SourceValueFetcher.toString(blContext.sourcePaths(name()))); } @Override @@ -1034,6 +1039,10 @@ protected BytesRef storedToBytesRef(Object stored) { public boolean isSyntheticSource() { return isSyntheticSource; } + + KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate() { + return syntheticSourceDelegate; + } } public static class ConstantScoreTextFieldType extends TextFieldType { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java index 9d43ef398feac..9245e78602eb7 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java @@ -89,7 +89,7 @@ public Query termsQuery(Collection values, SearchExecutionContext context) { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { - return BlockStoredFieldsReader.id(); + return new BlockStoredFieldsReader.IdBlockLoader(); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/VersionFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/VersionFieldMapper.java index 54a44dd55caa4..8f69f6afe47db 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/VersionFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/VersionFieldMapper.java @@ -56,7 +56,7 @@ public ValueFetcher valueFetcher(SearchExecutionContext context, String format) @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { - return BlockDocValuesReader.longs(name()); + return new BlockDocValuesReader.LongsBlockLoader(name()); } @Override diff --git a/server/src/main/java/org/elasticsearch/search/fetch/StoredFieldsSpec.java b/server/src/main/java/org/elasticsearch/search/fetch/StoredFieldsSpec.java index 48aea98887ff0..87cbf9b1d6b85 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/StoredFieldsSpec.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/StoredFieldsSpec.java @@ -38,6 +38,9 @@ public boolean noRequirements() { * Combine these stored field requirements with those from another StoredFieldsSpec */ public StoredFieldsSpec merge(StoredFieldsSpec other) { + if (this == other) { + return this; + } Set mergedFields = new HashSet<>(this.requiredStoredFields); mergedFields.addAll(other.requiredStoredFields); return new StoredFieldsSpec( diff --git a/server/src/test/java/org/elasticsearch/index/mapper/BooleanScriptFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/BooleanScriptFieldTypeTests.java index 8d5a47f08c663..d8f063ece35c0 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/BooleanScriptFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/BooleanScriptFieldTypeTests.java @@ -417,8 +417,8 @@ public void testBlockLoader() throws IOException { try (DirectoryReader reader = iw.getReader()) { BooleanScriptFieldType fieldType = build("xor_param", Map.of("param", false), OnScriptError.FAIL); List expected = List.of(false, true); - assertThat(blockLoaderReadValues(reader, fieldType), equalTo(expected)); - assertThat(blockLoaderReadValuesFromSingleDoc(reader, fieldType), equalTo(expected)); + assertThat(blockLoaderReadValuesFromColumnAtATimeReader(reader, fieldType), equalTo(expected)); + assertThat(blockLoaderReadValuesFromRowStrideReader(reader, fieldType), equalTo(expected)); } } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DateScriptFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DateScriptFieldTypeTests.java index d1652b9f57716..eb3daf472ea2e 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DateScriptFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DateScriptFieldTypeTests.java @@ -477,8 +477,11 @@ public void testBlockLoader() throws IOException { iw.addDocument(List.of(new StoredField("_source", new BytesRef("{\"timestamp\": [1595432181355]}")))); try (DirectoryReader reader = iw.getReader()) { DateScriptFieldType fieldType = build("add_days", Map.of("days", 1), OnScriptError.FAIL); - assertThat(blockLoaderReadValues(reader, fieldType), equalTo(List.of(1595518581354L, 1595518581355L))); - assertThat(blockLoaderReadValuesFromSingleDoc(reader, fieldType), equalTo(List.of(1595518581354L, 1595518581355L))); + assertThat( + blockLoaderReadValuesFromColumnAtATimeReader(reader, fieldType), + equalTo(List.of(1595518581354L, 1595518581355L)) + ); + assertThat(blockLoaderReadValuesFromRowStrideReader(reader, fieldType), equalTo(List.of(1595518581354L, 1595518581355L))); } } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DoubleScriptFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DoubleScriptFieldTypeTests.java index 0f05dad8098f4..d37e42e04edca 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DoubleScriptFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DoubleScriptFieldTypeTests.java @@ -236,8 +236,8 @@ public void testBlockLoader() throws IOException { iw.addDocument(List.of(new StoredField("_source", new BytesRef("{\"foo\": [2]}")))); try (DirectoryReader reader = iw.getReader()) { DoubleScriptFieldType fieldType = build("add_param", Map.of("param", 1), OnScriptError.FAIL); - assertThat(blockLoaderReadValues(reader, fieldType), equalTo(List.of(2d, 3d))); - assertThat(blockLoaderReadValuesFromSingleDoc(reader, fieldType), equalTo(List.of(2d, 3d))); + assertThat(blockLoaderReadValuesFromColumnAtATimeReader(reader, fieldType), equalTo(List.of(2d, 3d))); + assertThat(blockLoaderReadValuesFromRowStrideReader(reader, fieldType), equalTo(List.of(2d, 3d))); } } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IpScriptFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IpScriptFieldTypeTests.java index 56ca5f3dae89f..cd19bb50b842c 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IpScriptFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IpScriptFieldTypeTests.java @@ -256,8 +256,8 @@ public void testBlockLoader() throws IOException { new BytesRef(InetAddressPoint.encode(InetAddresses.forString("192.168.0.1"))), new BytesRef(InetAddressPoint.encode(InetAddresses.forString("192.168.1.1"))) ); - assertThat(blockLoaderReadValues(reader, fieldType), equalTo(expected)); - assertThat(blockLoaderReadValuesFromSingleDoc(reader, fieldType), equalTo(expected)); + assertThat(blockLoaderReadValuesFromColumnAtATimeReader(reader, fieldType), equalTo(expected)); + assertThat(blockLoaderReadValuesFromRowStrideReader(reader, fieldType), equalTo(expected)); } } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordScriptFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordScriptFieldTypeTests.java index 65f4c2e3ea6eb..ce705f2e9ae8b 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordScriptFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordScriptFieldTypeTests.java @@ -382,9 +382,12 @@ public void testBlockLoader() throws IOException { iw.addDocument(List.of(new StoredField("_source", new BytesRef("{\"foo\": [2]}")))); try (DirectoryReader reader = iw.getReader()) { KeywordScriptFieldType fieldType = build("append_param", Map.of("param", "-Suffix"), OnScriptError.FAIL); - assertThat(blockLoaderReadValues(reader, fieldType), equalTo(List.of(new BytesRef("1-Suffix"), new BytesRef("2-Suffix")))); assertThat( - blockLoaderReadValuesFromSingleDoc(reader, fieldType), + blockLoaderReadValuesFromColumnAtATimeReader(reader, fieldType), + equalTo(List.of(new BytesRef("1-Suffix"), new BytesRef("2-Suffix"))) + ); + assertThat( + blockLoaderReadValuesFromRowStrideReader(reader, fieldType), equalTo(List.of(new BytesRef("1-Suffix"), new BytesRef("2-Suffix"))) ); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/LongScriptFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/LongScriptFieldTypeTests.java index 1688cab24af3e..fd20b6c71e984 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/LongScriptFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/LongScriptFieldTypeTests.java @@ -269,8 +269,8 @@ public void testBlockLoader() throws IOException { iw.addDocument(List.of(new StoredField("_source", new BytesRef("{\"foo\": [2]}")))); try (DirectoryReader reader = iw.getReader()) { LongScriptFieldType fieldType = build("add_param", Map.of("param", 1), OnScriptError.FAIL); - assertThat(blockLoaderReadValues(reader, fieldType), equalTo(List.of(2L, 3L))); - assertThat(blockLoaderReadValuesFromSingleDoc(reader, fieldType), equalTo(List.of(2L, 3L))); + assertThat(blockLoaderReadValuesFromColumnAtATimeReader(reader, fieldType), equalTo(List.of(2L, 3L))); + assertThat(blockLoaderReadValuesFromRowStrideReader(reader, fieldType), equalTo(List.of(2L, 3L))); } } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index bbfeaaa8b9d69..b2a729d6868d2 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -1324,4 +1324,10 @@ public void testEmpty() throws Exception { assertFalse(dv.advanceExact(3)); }); } + + @Override + protected boolean supportsColumnAtATimeReader(MappedFieldType ft) { + TextFieldMapper.TextFieldType text = (TextFieldType) ft; + return text.syntheticSourceDelegate() != null && text.syntheticSourceDelegate().hasDocValues(); + } } diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldTypeTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldTypeTestCase.java index 56ad35bee83d5..7eb2511f58206 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldTypeTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldTypeTestCase.java @@ -383,11 +383,12 @@ public final void testCacheable() throws IOException { } } - protected final List blockLoaderReadValues(DirectoryReader reader, MappedFieldType fieldType) throws IOException { + protected final List blockLoaderReadValuesFromColumnAtATimeReader(DirectoryReader reader, MappedFieldType fieldType) + throws IOException { BlockLoader loader = fieldType.blockLoader(blContext()); List all = new ArrayList<>(); for (LeafReaderContext ctx : reader.leaves()) { - TestBlock block = (TestBlock) loader.reader(ctx).readValues(TestBlock.FACTORY, TestBlock.docs(ctx)); + TestBlock block = (TestBlock) loader.columnAtATimeReader(ctx).read(TestBlock.FACTORY, TestBlock.docs(ctx)); for (int i = 0; i < block.size(); i++) { all.add(block.get(i)); } @@ -395,15 +396,17 @@ protected final List blockLoaderReadValues(DirectoryReader reader, Mappe return all; } - protected final List blockLoaderReadValuesFromSingleDoc(DirectoryReader reader, MappedFieldType fieldType) throws IOException { + protected final List blockLoaderReadValuesFromRowStrideReader(DirectoryReader reader, MappedFieldType fieldType) + throws IOException { BlockLoader loader = fieldType.blockLoader(blContext()); List all = new ArrayList<>(); for (LeafReaderContext ctx : reader.leaves()) { - BlockDocValuesReader blockReader = loader.reader(ctx); - TestBlock block = (TestBlock) blockReader.builder(TestBlock.FACTORY, ctx.reader().numDocs()); + BlockLoader.RowStrideReader blockReader = loader.rowStrideReader(ctx); + BlockLoader.Builder builder = loader.builder(TestBlock.FACTORY, ctx.reader().numDocs()); for (int i = 0; i < ctx.reader().numDocs(); i++) { - blockReader.readValuesFromSingleDoc(i, block); + blockReader.read(i, null, builder); } + TestBlock block = (TestBlock) builder.build(); for (int i = 0; i < block.size(); i++) { all.add(block.get(i)); } diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java index e34072fbf1668..d68324ff902e2 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java @@ -31,7 +31,6 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.CheckedConsumer; -import org.elasticsearch.core.CheckedFunction; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; @@ -1240,19 +1239,19 @@ public final void testSyntheticEmptyListNoDocValuesLoader() throws IOException { assertNoDocValueLoader(b -> b.startArray("field").endArray()); } - public final void testBlockLoaderReadValues() throws IOException { - testBlockLoader(blockReader -> (TestBlock) blockReader.readValues(TestBlock.FACTORY, TestBlock.docs(0))); + public final void testBlockLoaderFromColumnReader() throws IOException { + testBlockLoader(true); } - public final void testBlockLoaderReadValuesFromSingleDoc() throws IOException { - testBlockLoader(blockReader -> { - TestBlock block = (TestBlock) blockReader.builder(TestBlock.FACTORY, 1); - blockReader.readValuesFromSingleDoc(0, block); - return block; - }); + public final void testBlockLoaderFromRowStrideReader() throws IOException { + testBlockLoader(false); + } + + protected boolean supportsColumnAtATimeReader(MappedFieldType ft) { + return ft.hasDocValues(); } - private void testBlockLoader(CheckedFunction body) throws IOException { + private void testBlockLoader(boolean columnReader) throws IOException { SyntheticSourceExample example = syntheticSourceSupport(false).example(5); MapperService mapper = createMapperService(syntheticSourceMapping(b -> { b.startObject("field"); @@ -1289,7 +1288,25 @@ public Set sourcePaths(String name) { iw.addDocument(doc); iw.close(); try (DirectoryReader reader = DirectoryReader.open(directory)) { - TestBlock block = body.apply(loader.reader(reader.leaves().get(0))); + LeafReaderContext ctx = reader.leaves().get(0); + TestBlock block; + if (columnReader) { + if (supportsColumnAtATimeReader(mapper.fieldType("field"))) { + block = (TestBlock) loader.columnAtATimeReader(ctx).read(TestBlock.FACTORY, TestBlock.docs(0)); + } else { + assertNull(loader.columnAtATimeReader(ctx)); + return; + } + } else { + BlockLoaderStoredFieldsFromLeafLoader storedFieldsLoader = new BlockLoaderStoredFieldsFromLeafLoader( + StoredFieldLoader.fromSpec(loader.rowStrideStoredFieldSpec()).getLoader(ctx, null), + loader.rowStrideStoredFieldSpec().requiresSource() + ); + storedFieldsLoader.advanceTo(0); + BlockLoader.Builder builder = loader.builder(TestBlock.FACTORY, 1); + loader.rowStrideReader(ctx).read(0, storedFieldsLoader, builder); + block = (TestBlock) builder.build(); + } Object inBlock = block.get(0); if (inBlock != null) { if (inBlock instanceof List l) { @@ -1319,7 +1336,7 @@ public Set sourcePaths(String name) { } /** - * Matcher for {@link #testBlockLoaderReadValues} and {@link #testBlockLoaderReadValuesFromSingleDoc}. + * Matcher for {@link #testBlockLoaderFromColumnReader} and {@link #testBlockLoaderFromRowStrideReader}. */ protected Matcher blockItemMatcher(Object expected) { return equalTo(expected); diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java index 298acb9519532..30dece5767b61 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java @@ -11,7 +11,6 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.core.Nullable; import java.io.IOException; import java.io.UncheckedIOException; @@ -21,74 +20,130 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; -public class TestBlock - implements - BlockLoader.BooleanBuilder, - BlockLoader.BytesRefBuilder, - BlockLoader.DoubleBuilder, - BlockLoader.IntBuilder, - BlockLoader.LongBuilder, - BlockLoader.SingletonOrdinalsBuilder, - BlockLoader.Block { - public static BlockLoader.BuilderFactory FACTORY = new BlockLoader.BuilderFactory() { +public class TestBlock implements BlockLoader.Block { + public static BlockLoader.BlockFactory FACTORY = new BlockLoader.BlockFactory() { @Override public BlockLoader.BooleanBuilder booleansFromDocValues(int expectedCount) { - return new TestBlock(null); + return booleans(expectedCount); } @Override public BlockLoader.BooleanBuilder booleans(int expectedCount) { - return new TestBlock(null); + class BooleansBuilder extends TestBlock.Builder implements BlockLoader.BooleanBuilder { + @Override + public BooleansBuilder appendBoolean(boolean value) { + add(value); + return this; + } + } + return new BooleansBuilder(); } @Override public BlockLoader.BytesRefBuilder bytesRefsFromDocValues(int expectedCount) { - return new TestBlock(null); + return bytesRefs(expectedCount); } @Override public BlockLoader.BytesRefBuilder bytesRefs(int expectedCount) { - return new TestBlock(null); + class BytesRefsBuilder extends TestBlock.Builder implements BlockLoader.BytesRefBuilder { + @Override + public BytesRefsBuilder appendBytesRef(BytesRef value) { + add(BytesRef.deepCopyOf(value)); + return this; + } + } + return new BytesRefsBuilder(); } @Override public BlockLoader.DoubleBuilder doublesFromDocValues(int expectedCount) { - return new TestBlock(null); + return doubles(expectedCount); } @Override public BlockLoader.DoubleBuilder doubles(int expectedCount) { - return new TestBlock(null); + class DoublesBuilder extends TestBlock.Builder implements BlockLoader.DoubleBuilder { + @Override + public DoublesBuilder appendDouble(double value) { + add(value); + return this; + } + } + return new DoublesBuilder(); } @Override public BlockLoader.IntBuilder intsFromDocValues(int expectedCount) { - return new TestBlock(null); + return ints(expectedCount); } @Override public BlockLoader.IntBuilder ints(int expectedCount) { - return new TestBlock(null); + class IntsBuilder extends TestBlock.Builder implements BlockLoader.IntBuilder { + @Override + public IntsBuilder appendInt(int value) { + add(value); + return this; + } + } + return new IntsBuilder(); } @Override public BlockLoader.LongBuilder longsFromDocValues(int expectedCount) { - return new TestBlock(null); + return longs(expectedCount); } @Override public BlockLoader.LongBuilder longs(int expectedCount) { - return new TestBlock(null); + class LongsBuilder extends TestBlock.Builder implements BlockLoader.LongBuilder { + @Override + public LongsBuilder appendLong(long value) { + add(value); + return this; + } + } + return new LongsBuilder(); } @Override public BlockLoader.Builder nulls(int expectedCount) { - return new TestBlock(null); + return longs(expectedCount); + } + + @Override + public BlockLoader.Block constantNulls(int size) { + BlockLoader.LongBuilder builder = longs(size); + for (int i = 0; i < size; i++) { + builder.appendNull(); + } + return builder.build(); + } + + @Override + public BlockLoader.Block constantBytes(BytesRef value, int size) { + BlockLoader.BytesRefBuilder builder = bytesRefs(size); + for (int i = 0; i < size; i++) { + builder.appendBytesRef(value); + } + return builder.build(); } @Override public BlockLoader.SingletonOrdinalsBuilder singletonOrdinalsBuilder(SortedDocValues ordinals, int count) { - return new TestBlock(ordinals); + class SingletonOrdsBuilder extends TestBlock.Builder implements BlockLoader.SingletonOrdinalsBuilder { + @Override + public SingletonOrdsBuilder appendOrd(int value) { + try { + add(ordinals.lookupOrd(value)); + return this; + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + } + return new SingletonOrdsBuilder(); } }; @@ -120,13 +175,10 @@ public int get(int i) { }; } - private final SortedDocValues sortedDocValues; - private final List values = new ArrayList<>(); - - private List currentPosition = null; + private final List values; - private TestBlock(@Nullable SortedDocValues sortedDocValues) { - this.sortedDocValues = sortedDocValues; + private TestBlock(List values) { + this.values = values; } public Object get(int i) { @@ -138,73 +190,49 @@ public int size() { } @Override - public TestBlock appendNull() { - assertNull(currentPosition); - values.add(null); - return this; - } - - @Override - public TestBlock beginPositionEntry() { - assertNull(currentPosition); - currentPosition = new ArrayList<>(); - values.add(currentPosition); - return this; - } - - @Override - public TestBlock endPositionEntry() { - assertNotNull(currentPosition); - currentPosition = null; - return this; - } - - @Override - public TestBlock appendBoolean(boolean value) { - return add(value); + public void close() { + // TODO assert that we close the test blocks } - @Override - public TestBlock appendBytesRef(BytesRef value) { - return add(BytesRef.deepCopyOf(value)); - } + private abstract static class Builder implements BlockLoader.Builder { + private final List values = new ArrayList<>(); - @Override - public TestBlock appendDouble(double value) { - return add(value); - } + private List currentPosition = null; - @Override - public TestBlock appendInt(int value) { - return add(value); - } + @Override + public Builder appendNull() { + assertNull(currentPosition); + values.add(null); + return this; + } - @Override - public TestBlock appendLong(long value) { - return add(value); - } + @Override + public Builder beginPositionEntry() { + assertNull(currentPosition); + currentPosition = new ArrayList<>(); + values.add(currentPosition); + return this; + } - @Override - public TestBlock appendOrd(int value) { - try { - return add(sortedDocValues.lookupOrd(value)); - } catch (IOException e) { - throw new UncheckedIOException(e); + @Override + public Builder endPositionEntry() { + assertNotNull(currentPosition); + currentPosition = null; + return this; } - } - @Override - public TestBlock build() { - return this; - } + protected void add(Object value) { + (currentPosition == null ? values : currentPosition).add(value); + } - private TestBlock add(Object value) { - (currentPosition == null ? values : currentPosition).add(value); - return this; - } + @Override + public TestBlock build() { + return new TestBlock(values); + } - @Override - public void close() { - // TODO assert that we close the test blocks + @Override + public void close() { + // TODO assert that we close the test block builders + } } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/BlockReaderFactories.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/BlockReaderFactories.java index a0d08bc798fbb..a730931208663 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/BlockReaderFactories.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/BlockReaderFactories.java @@ -7,17 +7,13 @@ package org.elasticsearch.compute.lucene; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.SortedSetDocValues; import org.elasticsearch.common.logging.HeaderWarning; -import org.elasticsearch.index.mapper.BlockDocValuesReader; import org.elasticsearch.index.mapper.BlockLoader; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.lookup.SearchLookup; -import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Set; @@ -36,23 +32,19 @@ private BlockReaderFactories() {} * @param asUnsupportedSource should the field be loaded as "unsupported"? * These will always have {@code null} values */ - public static List factories( - List searchContexts, - String fieldName, - boolean asUnsupportedSource - ) { - List factories = new ArrayList<>(searchContexts.size()); + public static List loaders(List searchContexts, String fieldName, boolean asUnsupportedSource) { + List loaders = new ArrayList<>(searchContexts.size()); for (SearchContext searchContext : searchContexts) { SearchExecutionContext ctx = searchContext.getSearchExecutionContext(); if (asUnsupportedSource) { - factories.add(loaderToFactory(ctx.getIndexReader(), BlockDocValuesReader.nulls())); + loaders.add(BlockLoader.CONSTANT_NULLS); continue; } MappedFieldType fieldType = ctx.getFieldType(fieldName); if (fieldType == null) { // the field does not exist in this context - factories.add(loaderToFactory(ctx.getIndexReader(), BlockDocValuesReader.nulls())); + loaders.add(BlockLoader.CONSTANT_NULLS); continue; } BlockLoader loader = fieldType.blockLoader(new MappedFieldType.BlockLoaderContext() { @@ -73,36 +65,12 @@ public Set sourcePaths(String name) { }); if (loader == null) { HeaderWarning.addWarning("Field [{}] cannot be retrieved, it is unsupported or not indexed; returning null", fieldName); - factories.add(loaderToFactory(ctx.getIndexReader(), BlockDocValuesReader.nulls())); + loaders.add(BlockLoader.CONSTANT_NULLS); continue; } - factories.add(loaderToFactory(ctx.getIndexReader(), loader)); + loaders.add(loader); } - return factories; - } - - /** - * Converts a {@link BlockLoader}, something defined in core elasticsearch at - * the field level, into a {@link BlockDocValuesReader.Factory} which can be - * used inside ESQL. - */ - public static BlockDocValuesReader.Factory loaderToFactory(IndexReader reader, BlockLoader loader) { - return new BlockDocValuesReader.Factory() { - @Override - public BlockDocValuesReader build(int segment) throws IOException { - return loader.reader(reader.leaves().get(segment)); - } - - @Override - public boolean supportsOrdinals() { - return loader.supportsOrdinals(); - } - - @Override - public SortedSetDocValues ordinals(int segment) throws IOException { - return loader.ordinals(reader.leaves().get(segment)); - } - }; + return loaders; } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperator.java index 61c1bd9730e02..8d7a9df523c3d 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperator.java @@ -7,13 +7,17 @@ package org.elasticsearch.compute.lucene; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BytesRefBlock; import org.elasticsearch.compute.data.DocBlock; import org.elasticsearch.compute.data.DocVector; import org.elasticsearch.compute.data.ElementType; @@ -23,75 +27,69 @@ import org.elasticsearch.compute.operator.AbstractPageMappingOperator; import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.compute.operator.Operator; -import org.elasticsearch.index.mapper.BlockDocValuesReader; +import org.elasticsearch.core.Releasable; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; import org.elasticsearch.index.mapper.BlockLoader; -import org.elasticsearch.search.aggregations.support.ValuesSource; +import org.elasticsearch.index.mapper.BlockLoaderStoredFieldsFromLeafLoader; +import org.elasticsearch.search.fetch.StoredFieldsSpec; import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; import java.io.UncheckedIOException; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.TreeMap; +import java.util.stream.Collectors; /** * Operator that extracts doc_values from a Lucene index out of pages that have been produced by {@link LuceneSourceOperator} - * and outputs them to a new column. The operator leverages the {@link ValuesSource} infrastructure for extracting - * field values. This allows for a more uniform way of extracting data compared to deciding the correct doc_values - * loader for different field types. + * and outputs them to a new column. */ public class ValuesSourceReaderOperator extends AbstractPageMappingOperator { /** - * Creates a new extractor that uses ValuesSources load data - * @param sources the value source, type and index readers to use for extraction + * Creates a factory for {@link ValuesSourceReaderOperator}. + * @param fields fields to load * @param docChannel the channel containing the shard, leaf/segment and doc id - * @param field the lucene field being loaded */ - public record ValuesSourceReaderOperatorFactory(List sources, int docChannel, String field) - implements - OperatorFactory { + public record Factory(List fields, List readers, int docChannel) implements OperatorFactory { @Override public Operator get(DriverContext driverContext) { - return new ValuesSourceReaderOperator(driverContext.blockFactory(), sources, docChannel, field); + return new ValuesSourceReaderOperator(driverContext.blockFactory(), fields, readers, docChannel); } @Override public String describe() { - return "ValuesSourceReaderOperator[field = " + field + "]"; + return "ValuesSourceReaderOperator[field = " + fields.stream().map(f -> f.name).collect(Collectors.joining(", ")) + "]"; } } - /** - * A list, one entry per shard, of factories for {@link BlockDocValuesReader}s - * which perform the actual reading. - */ - private final List factories; + private final List fields; + private final List readers; private final int docChannel; - private final String field; private final ComputeBlockLoaderFactory blockFactory; - private BlockDocValuesReader lastReader; - private int lastShard = -1; - private int lastSegment = -1; - private final Map readersBuilt = new TreeMap<>(); + /** + * Configuration for a field to load. + * + * {@code blockLoaders} is a list, one entry per shard, of + * {@link BlockLoader}s which load the actual blocks. + */ + public record FieldInfo(String name, List blockLoaders) {} + /** * Creates a new extractor - * @param factories builds {@link BlockDocValuesReader} + * @param fields fields to load * @param docChannel the channel containing the shard, leaf/segment and doc id - * @param field the lucene field being loaded */ - public ValuesSourceReaderOperator( - BlockFactory blockFactory, - List factories, - int docChannel, - String field - ) { - this.factories = factories; + public ValuesSourceReaderOperator(BlockFactory blockFactory, List fields, List readers, int docChannel) { + this.fields = fields.stream().map(f -> new FieldWork(f)).toList(); + this.readers = readers; this.docChannel = docChannel; - this.field = field; this.blockFactory = new ComputeBlockLoaderFactory(blockFactory); } @@ -99,21 +97,31 @@ public ValuesSourceReaderOperator( protected Page process(Page page) { DocVector docVector = page.getBlock(docChannel).asVector(); + Block[] blocks = new Block[fields.size()]; + boolean success = false; try { if (docVector.singleSegmentNonDecreasing()) { - return page.appendBlock(loadFromSingleLeaf(docVector)); + loadFromSingleLeaf(blocks, docVector); + } else { + loadFromManyLeaves(blocks, docVector); } - return page.appendBlock(loadFromManyLeaves(docVector)); + success = true; } catch (IOException e) { throw new UncheckedIOException(e); + } finally { + if (success == false) { + Releasables.closeExpectNoException(blocks); + } } + return page.appendBlocks(blocks); } - private Block loadFromSingleLeaf(DocVector docVector) throws IOException { - setupReader(docVector.shards().getInt(0), docVector.segments().getInt(0), docVector.docs().getInt(0)); - return ((Block) lastReader.readValues(blockFactory, new BlockLoader.Docs() { - private final IntVector docs = docVector.docs(); - + private void loadFromSingleLeaf(Block[] blocks, DocVector docVector) throws IOException { + int shard = docVector.shards().getInt(0); + int segment = docVector.segments().getInt(0); + int firstDoc = docVector.docs().getInt(0); + IntVector docs = docVector.docs(); + BlockLoader.Docs loaderDocs = new BlockLoader.Docs() { @Override public int count() { return docs.getPositionCount(); @@ -123,44 +131,209 @@ public int count() { public int get(int i) { return docs.getInt(i); } - })); + }; + StoredFieldsSpec storedFieldsSpec = StoredFieldsSpec.NO_REQUIREMENTS; + List rowStrideReaders = new ArrayList<>(fields.size()); + try { + for (int b = 0; b < fields.size(); b++) { + FieldWork field = fields.get(b); + BlockLoader.ColumnAtATimeReader columnAtATime = field.columnAtATime.reader(shard, segment, firstDoc); + if (columnAtATime != null) { + blocks[b] = (Block) columnAtATime.read(blockFactory, loaderDocs); + } else { + BlockLoader.RowStrideReader rowStride = field.rowStride.reader(shard, segment, firstDoc); + rowStrideReaders.add( + new RowStrideReaderWork( + rowStride, + (Block.Builder) field.info.blockLoaders.get(shard).builder(blockFactory, docs.getPositionCount()), + b + ) + ); + storedFieldsSpec = storedFieldsSpec.merge(field.info.blockLoaders.get(shard).rowStrideStoredFieldSpec()); + } + } + + if (rowStrideReaders.isEmpty()) { + return; + } + if (storedFieldsSpec.equals(StoredFieldsSpec.NO_REQUIREMENTS)) { + throw new IllegalStateException( + "found row stride readers [" + rowStrideReaders + "] without stored fields [" + storedFieldsSpec + "]" + ); + } + BlockLoaderStoredFieldsFromLeafLoader storedFields = new BlockLoaderStoredFieldsFromLeafLoader( + // TODO enable the optimization by passing non-null to docs if correct + StoredFieldLoader.fromSpec(storedFieldsSpec).getLoader(ctx(shard, segment), null), + storedFieldsSpec.requiresSource() + ); + trackStoredFields(storedFieldsSpec); // TODO when optimization is enabled add it to tracking + for (int p = 0; p < docs.getPositionCount(); p++) { + int doc = docs.getInt(p); + if (storedFields != null) { + storedFields.advanceTo(doc); + } + for (int r = 0; r < rowStrideReaders.size(); r++) { + RowStrideReaderWork work = rowStrideReaders.get(r); + work.reader.read(doc, storedFields, work.builder); + } + } + for (int r = 0; r < rowStrideReaders.size(); r++) { + RowStrideReaderWork work = rowStrideReaders.get(r); + blocks[work.offset] = work.builder.build(); + } + } finally { + Releasables.close(rowStrideReaders); + } } - private Block loadFromManyLeaves(DocVector docVector) throws IOException { + private void loadFromManyLeaves(Block[] blocks, DocVector docVector) throws IOException { + IntVector shards = docVector.shards(); + IntVector segments = docVector.segments(); + IntVector docs = docVector.docs(); + Block.Builder[] builders = new Block.Builder[blocks.length]; int[] forwards = docVector.shardSegmentDocMapForwards(); - int doc = docVector.docs().getInt(forwards[0]); - setupReader(docVector.shards().getInt(forwards[0]), docVector.segments().getInt(forwards[0]), doc); - try (BlockLoader.Builder builder = lastReader.builder(blockFactory, forwards.length)) { - lastReader.readValuesFromSingleDoc(doc, builder); - for (int i = 1; i < forwards.length; i++) { - int shard = docVector.shards().getInt(forwards[i]); - int segment = docVector.segments().getInt(forwards[i]); - doc = docVector.docs().getInt(forwards[i]); - if (segment != lastSegment || shard != lastShard) { - setupReader(shard, segment, doc); + try { + for (int b = 0; b < fields.size(); b++) { + FieldWork field = fields.get(b); + builders[b] = builderFromFirstNonNull(field, docs.getPositionCount()); + } + int lastShard = -1; + int lastSegment = -1; + BlockLoaderStoredFieldsFromLeafLoader storedFields = null; + for (int i = 0; i < forwards.length; i++) { + int p = forwards[i]; + int shard = shards.getInt(p); + int segment = segments.getInt(p); + int doc = docs.getInt(p); + if (shard != lastShard || segment != lastSegment) { + lastShard = shard; + lastSegment = segment; + StoredFieldsSpec storedFieldsSpec = storedFieldsSpecForShard(shard); + storedFields = new BlockLoaderStoredFieldsFromLeafLoader( + StoredFieldLoader.fromSpec(storedFieldsSpec).getLoader(ctx(shard, segment), null), + storedFieldsSpec.requiresSource() + ); + if (false == storedFieldsSpec.equals(StoredFieldsSpec.NO_REQUIREMENTS)) { + trackStoredFields(storedFieldsSpec); + } + } + storedFields.advanceTo(doc); + for (int r = 0; r < blocks.length; r++) { + fields.get(r).rowStride.reader(shard, segment, doc).read(doc, storedFields, builders[r]); } - lastReader.readValuesFromSingleDoc(doc, builder); } - try (Block orig = ((Block.Builder) builder).build()) { - return orig.filter(docVector.shardSegmentDocMapBackwards()); + for (int r = 0; r < blocks.length; r++) { + try (Block orig = builders[r].build()) { + blocks[r] = orig.filter(docVector.shardSegmentDocMapBackwards()); + } } + } finally { + Releasables.closeExpectNoException(builders); } } - private void setupReader(int shard, int segment, int doc) throws IOException { - if (lastSegment == segment && lastShard == shard && BlockDocValuesReader.canReuse(lastReader, doc)) { - return; + private void trackStoredFields(StoredFieldsSpec spec) { + readersBuilt.merge( + "stored_fields[" + "requires_source:" + spec.requiresSource() + ", fields:" + spec.requiredStoredFields().size() + "]", + 1, + (prev, one) -> prev + one + ); + } + + /** + * Returns a builder from the first non - {@link BlockLoader#CONSTANT_NULLS} loader + * in the list. If they are all the null loader then returns a null builder. + */ + private Block.Builder builderFromFirstNonNull(FieldWork field, int positionCount) { + for (BlockLoader loader : field.info.blockLoaders) { + if (loader != BlockLoader.CONSTANT_NULLS) { + return (Block.Builder) loader.builder(blockFactory, positionCount); + } } + // All null, just let the first one build the null block loader. + return (Block.Builder) field.info.blockLoaders.get(0).builder(blockFactory, positionCount); + } - lastReader = factories.get(shard).build(segment); - lastShard = shard; - lastSegment = segment; - readersBuilt.compute(lastReader.toString(), (k, v) -> v == null ? 1 : v + 1); + private StoredFieldsSpec storedFieldsSpecForShard(int shard) { + StoredFieldsSpec storedFieldsSpec = StoredFieldsSpec.NO_REQUIREMENTS; + for (int b = 0; b < fields.size(); b++) { + FieldWork field = fields.get(b); + storedFieldsSpec = storedFieldsSpec.merge(field.info.blockLoaders.get(shard).rowStrideStoredFieldSpec()); + } + return storedFieldsSpec; + } + + private class FieldWork { + final FieldInfo info; + final GuardedReader columnAtATime = new GuardedReader<>() { + @Override + BlockLoader.ColumnAtATimeReader build(BlockLoader loader, LeafReaderContext ctx) throws IOException { + return loader.columnAtATimeReader(ctx); + } + + @Override + String type() { + return "column_at_a_time"; + } + }; + + final GuardedReader rowStride = new GuardedReader<>() { + @Override + BlockLoader.RowStrideReader build(BlockLoader loader, LeafReaderContext ctx) throws IOException { + return loader.rowStrideReader(ctx); + } + + @Override + String type() { + return "row_stride"; + } + }; + + FieldWork(FieldInfo info) { + this.info = info; + } + + private abstract class GuardedReader { + private int lastShard = -1; + private int lastSegment = -1; + V lastReader; + + V reader(int shard, int segment, int startingDocId) throws IOException { + if (lastShard == shard && lastSegment == segment) { + if (lastReader == null) { + return null; + } + if (lastReader.canReuse(startingDocId)) { + return lastReader; + } + } + lastShard = shard; + lastSegment = segment; + lastReader = build(info.blockLoaders.get(shard), ctx(shard, segment)); + readersBuilt.merge(info.name + ":" + type() + ":" + lastReader, 1, (prev, one) -> prev + one); + return lastReader; + } + + abstract V build(BlockLoader loader, LeafReaderContext ctx) throws IOException; + + abstract String type(); + } + } + + private record RowStrideReaderWork(BlockLoader.RowStrideReader reader, Block.Builder builder, int offset) implements Releasable { + @Override + public void close() { + builder.close(); + } + } + + private LeafReaderContext ctx(int shard, int segment) { + return readers.get(shard).leaves().get(segment); } @Override public String toString() { - return "ValuesSourceReaderOperator[field = " + field + "]"; + return "ValuesSourceReaderOperator[field = " + fields.stream().map(f -> f.info.name).collect(Collectors.joining(", ")) + "]"; } @Override @@ -233,7 +406,7 @@ public String toString() { } } - private static class ComputeBlockLoaderFactory implements BlockLoader.BuilderFactory { + private static class ComputeBlockLoaderFactory implements BlockLoader.BlockFactory { private final BlockFactory factory; private ComputeBlockLoaderFactory(BlockFactory factory) { @@ -295,9 +468,21 @@ public BlockLoader.Builder nulls(int expectedCount) { return ElementType.NULL.newBlockBuilder(expectedCount, factory); } + @Override + public Block constantNulls(int size) { + return factory.newConstantNullBlock(size); + } + + @Override + public BytesRefBlock constantBytes(BytesRef value, int size) { + return factory.newConstantBytesRefBlockWith(value, size); + } + @Override public BlockLoader.SingletonOrdinalsBuilder singletonOrdinalsBuilder(SortedDocValues ordinals, int count) { return new SingletonOrdinalsBuilder(factory, ordinals, count); } } + + // TODO tests that mix source loaded fields and doc values in the same block } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/OrdinalsGroupingOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/OrdinalsGroupingOperator.java index 07494f97cfd6d..2e1cbf9a1135d 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/OrdinalsGroupingOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/OrdinalsGroupingOperator.java @@ -7,6 +7,7 @@ package org.elasticsearch.compute.operator; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; @@ -32,7 +33,7 @@ import org.elasticsearch.compute.operator.HashAggregationOperator.GroupSpec; import org.elasticsearch.core.Releasable; import org.elasticsearch.core.Releasables; -import org.elasticsearch.index.mapper.BlockDocValuesReader; +import org.elasticsearch.index.mapper.BlockLoader; import java.io.IOException; import java.io.UncheckedIOException; @@ -52,7 +53,8 @@ */ public class OrdinalsGroupingOperator implements Operator { public record OrdinalsGroupingOperatorFactory( - List readerFactories, + List blockLoaders, + List readers, ElementType groupingElementType, int docChannel, String groupingField, @@ -64,7 +66,8 @@ public record OrdinalsGroupingOperatorFactory( @Override public Operator get(DriverContext driverContext) { return new OrdinalsGroupingOperator( - readerFactories, + blockLoaders, + readers, groupingElementType, docChannel, groupingField, @@ -81,7 +84,8 @@ public String describe() { } } - private final List readerFactories; + private final List blockLoaders; + private final List readers; private final int docChannel; private final String groupingField; @@ -99,7 +103,8 @@ public String describe() { private ValuesAggregator valuesAggregator; public OrdinalsGroupingOperator( - List readerFactories, + List blockLoaders, + List readers, ElementType groupingElementType, int docChannel, String groupingField, @@ -109,7 +114,8 @@ public OrdinalsGroupingOperator( DriverContext driverContext ) { Objects.requireNonNull(aggregatorFactories); - this.readerFactories = readerFactories; + this.blockLoaders = blockLoaders; + this.readers = readers; this.groupingElementType = groupingElementType; this.docChannel = docChannel; this.groupingField = groupingField; @@ -131,10 +137,10 @@ public void addInput(Page page) { requireNonNull(page, "page is null"); DocVector docVector = page.getBlock(docChannel).asVector(); final int shardIndex = docVector.shards().getInt(0); - final var readerFactory = readerFactories.get(shardIndex); + final var blockLoader = blockLoaders.get(shardIndex); boolean pagePassed = false; try { - if (docVector.singleSegmentNonDecreasing() && readerFactory.supportsOrdinals()) { + if (docVector.singleSegmentNonDecreasing() && blockLoader.supportsOrdinals()) { final IntVector segmentIndexVector = docVector.segments(); assert segmentIndexVector.isConstant(); final OrdinalSegmentAggregator ordinalAggregator = this.ordinalAggregators.computeIfAbsent( @@ -144,7 +150,7 @@ public void addInput(Page page) { return new OrdinalSegmentAggregator( driverContext.blockFactory(), this::createGroupingAggregators, - () -> readerFactory.ordinals(k.segmentIndex), + () -> blockLoader.ordinals(readers.get(k.shardIndex).leaves().get(k.segmentIndex)), bigArrays ); } catch (IOException e) { @@ -158,7 +164,8 @@ public void addInput(Page page) { if (valuesAggregator == null) { int channelIndex = page.getBlockCount(); // extractor will append a new block at the end valuesAggregator = new ValuesAggregator( - readerFactories, + blockLoaders, + readers, groupingElementType, docChannel, groupingField, @@ -458,7 +465,8 @@ private static class ValuesAggregator implements Releasable { private final HashAggregationOperator aggregator; ValuesAggregator( - List factories, + List blockLoaders, + List readers, ElementType groupingElementType, int docChannel, String groupingField, @@ -467,7 +475,12 @@ private static class ValuesAggregator implements Releasable { int maxPageSize, DriverContext driverContext ) { - this.extractor = new ValuesSourceReaderOperator(BlockFactory.getNonBreakingInstance(), factories, docChannel, groupingField); + this.extractor = new ValuesSourceReaderOperator( + BlockFactory.getNonBreakingInstance(), + List.of(new ValuesSourceReaderOperator.FieldInfo(groupingField, blockLoaders)), + readers, + docChannel + ); this.aggregator = new HashAggregationOperator( aggregatorFactories, () -> BlockHash.build(List.of(new GroupSpec(channelIndex, groupingElementType)), driverContext, maxPageSize, false), diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/OperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/OperatorTests.java index b45f597553e1b..d9730d3f602c7 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/OperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/OperatorTests.java @@ -45,7 +45,6 @@ import org.elasticsearch.compute.data.IntVector; import org.elasticsearch.compute.data.LongBlock; import org.elasticsearch.compute.data.Page; -import org.elasticsearch.compute.lucene.BlockReaderFactories; import org.elasticsearch.compute.lucene.DataPartitioning; import org.elasticsearch.compute.lucene.LuceneOperator; import org.elasticsearch.compute.lucene.LuceneSourceOperator; @@ -230,9 +229,8 @@ public String toString() { } }, new OrdinalsGroupingOperator( - List.of( - BlockReaderFactories.loaderToFactory(reader, new KeywordFieldMapper.KeywordFieldType("g").blockLoader(null)) - ), + List.of(new KeywordFieldMapper.KeywordFieldType("g").blockLoader(null)), + List.of(reader), ElementType.BYTES_REF, 0, gField, diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperatorTests.java index 269a478560bac..76810dbf2e3bc 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperatorTests.java @@ -9,9 +9,12 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.DoubleDocValuesField; +import org.apache.lucene.document.FieldType; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.NoMergePolicy; @@ -23,6 +26,8 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.common.Randomness; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.compute.data.Block; @@ -46,20 +51,41 @@ import org.elasticsearch.compute.operator.PageConsumerOperator; import org.elasticsearch.compute.operator.SourceOperator; import org.elasticsearch.core.IOUtils; +import org.elasticsearch.index.IndexMode; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.mapper.BlockLoader; import org.elasticsearch.index.mapper.BooleanFieldMapper; +import org.elasticsearch.index.mapper.IdFieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NumberFieldMapper; +import org.elasticsearch.index.mapper.ProvidedIdFieldMapper; +import org.elasticsearch.index.mapper.SourceFieldMapper; +import org.elasticsearch.index.mapper.TextFieldMapper; +import org.elasticsearch.index.mapper.TextSearchInfo; +import org.elasticsearch.index.mapper.TsidExtractingIdFieldMapper; +import org.elasticsearch.search.lookup.SearchLookup; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.json.JsonXContent; +import org.hamcrest.Matcher; import org.junit.After; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.stream.IntStream; import static org.elasticsearch.compute.lucene.LuceneSourceOperatorTests.mockSearchContext; +import static org.elasticsearch.test.MapMatcher.assertMap; +import static org.elasticsearch.test.MapMatcher.matchesMap; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.nullValue; /** * Tests for {@link ValuesSourceReaderOperator}. Turns off {@link HandleLimitFS} @@ -86,19 +112,48 @@ public void closeIndex() throws IOException { @Override protected Operator.OperatorFactory simple(BigArrays bigArrays) { - return factory(reader, new NumberFieldMapper.NumberFieldType("long", NumberFieldMapper.NumberType.LONG)); + if (reader == null) { + // Init a reader if one hasn't been built, so things don't blow up + try { + initIndex(100); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + return factory(reader, docValuesNumberField("long", NumberFieldMapper.NumberType.LONG)); } static Operator.OperatorFactory factory(IndexReader reader, MappedFieldType ft) { - return new ValuesSourceReaderOperator.ValuesSourceReaderOperatorFactory( - List.of(BlockReaderFactories.loaderToFactory(reader, ft.blockLoader(null))), - 0, - ft.name() + return factory(reader, ft.name(), ft.blockLoader(null)); + } + + static Operator.OperatorFactory factory(IndexReader reader, String name, BlockLoader loader) { + return new ValuesSourceReaderOperator.Factory( + List.of(new ValuesSourceReaderOperator.FieldInfo(name, List.of(loader))), + List.of(reader), + 0 ); } @Override protected SourceOperator simpleInput(BlockFactory blockFactory, int size) { + try { + initIndex(size); + } catch (IOException e) { + throw new RuntimeException(e); + } + var luceneFactory = new LuceneSourceOperator.Factory( + List.of(mockSearchContext(reader)), + ctx -> new MatchAllDocsQuery(), + randomFrom(DataPartitioning.values()), + randomIntBetween(1, 10), + randomPageSize(), + LuceneOperator.NO_LIMIT + ); + return luceneFactory.get(driverContext()); + } + + private void initIndex(int size) throws IOException { // The test wants more than one segment. We shoot for about 10. int commitEvery = Math.max(1, size / 10); try ( @@ -110,40 +165,68 @@ protected SourceOperator simpleInput(BlockFactory blockFactory, int size) { ) { for (int d = 0; d < size; d++) { List doc = new ArrayList<>(); + doc.add(IdFieldMapper.standardIdField("id")); doc.add(new SortedNumericDocValuesField("key", d)); + doc.add(new SortedNumericDocValuesField("int", d)); + doc.add(new SortedNumericDocValuesField("short", (short) d)); + doc.add(new SortedNumericDocValuesField("byte", (byte) d)); doc.add(new SortedNumericDocValuesField("long", d)); doc.add( new KeywordFieldMapper.KeywordField("kwd", new BytesRef(Integer.toString(d)), KeywordFieldMapper.Defaults.FIELD_TYPE) ); + doc.add(new StoredField("stored_kwd", new BytesRef(Integer.toString(d)))); + doc.add(new StoredField("stored_text", Integer.toString(d))); doc.add(new SortedNumericDocValuesField("bool", d % 2 == 0 ? 1 : 0)); doc.add(new SortedNumericDocValuesField("double", NumericUtils.doubleToSortableLong(d / 123_456d))); for (int v = 0; v <= d % 3; v++) { - doc.add( - new KeywordFieldMapper.KeywordField("mv_kwd", new BytesRef(PREFIX[v] + d), KeywordFieldMapper.Defaults.FIELD_TYPE) - ); doc.add(new SortedNumericDocValuesField("mv_bool", v % 2 == 0 ? 1 : 0)); - doc.add(new SortedNumericDocValuesField("mv_key", 1_000 * d + v)); + doc.add(new SortedNumericDocValuesField("mv_int", 1_000 * d + v)); + doc.add(new SortedNumericDocValuesField("mv_short", (short) (2_000 * d + v))); + doc.add(new SortedNumericDocValuesField("mv_byte", (byte) (3_000 * d + v))); doc.add(new SortedNumericDocValuesField("mv_long", -1_000 * d + v)); doc.add(new SortedNumericDocValuesField("mv_double", NumericUtils.doubleToSortableLong(d / 123_456d + v))); + doc.add( + new KeywordFieldMapper.KeywordField("mv_kwd", new BytesRef(PREFIX[v] + d), KeywordFieldMapper.Defaults.FIELD_TYPE) + ); + doc.add(new StoredField("mv_stored_kwd", new BytesRef(PREFIX[v] + d))); + doc.add(new StoredField("mv_stored_text", PREFIX[v] + d)); + } + XContentBuilder source = JsonXContent.contentBuilder(); + source.startObject(); + source.field("source_kwd", Integer.toString(d)); + source.startArray("mv_source_kwd"); + for (int v = 0; v <= d % 3; v++) { + source.value(PREFIX[v] + d); + } + source.endArray(); + source.field("source_text", Integer.toString(d)); + source.startArray("mv_source_text"); + for (int v = 0; v <= d % 3; v++) { + source.value(PREFIX[v] + d); } + source.endArray(); + source.field("source_long", (long) d); + source.startArray("mv_source_long"); + for (int v = 0; v <= d % 3; v++) { + source.value((long) (-1_000 * d + v)); + } + source.endArray(); + source.field("source_int", d); + source.startArray("mv_source_int"); + for (int v = 0; v <= d % 3; v++) { + source.value(1_000 * d + v); + } + source.endArray(); + + source.endObject(); + doc.add(new StoredField(SourceFieldMapper.NAME, BytesReference.bytes(source).toBytesRef())); writer.addDocument(doc); if (d % commitEvery == 0) { writer.commit(); } } reader = writer.getReader(); - } catch (IOException e) { - throw new RuntimeException(e); } - var luceneFactory = new LuceneSourceOperator.Factory( - List.of(mockSearchContext(reader)), - ctx -> new MatchAllDocsQuery(), - randomFrom(DataPartitioning.values()), - randomIntBetween(1, 10), - randomPageSize(), - LuceneOperator.NO_LIMIT - ); - return luceneFactory.get(driverContext()); } @Override @@ -184,7 +267,8 @@ public void testLoadAll() { DriverContext driverContext = driverContext(); loadSimpleAndAssert( driverContext, - CannedSourceOperator.collectPages(simpleInput(driverContext.blockFactory(), between(100, 5000))) + CannedSourceOperator.collectPages(simpleInput(driverContext.blockFactory(), between(100, 5000))), + Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING ); } @@ -196,13 +280,18 @@ public void testLoadAllInOnePage() { CannedSourceOperator.mergePages( CannedSourceOperator.collectPages(simpleInput(driverContext.blockFactory(), between(100, 5000))) ) - ) + ), + Block.MvOrdering.UNORDERED ); } public void testEmpty() { DriverContext driverContext = driverContext(); - loadSimpleAndAssert(driverContext, CannedSourceOperator.collectPages(simpleInput(driverContext.blockFactory(), 0))); + loadSimpleAndAssert( + driverContext, + CannedSourceOperator.collectPages(simpleInput(driverContext.blockFactory(), 0)), + Block.MvOrdering.UNORDERED + ); } public void testLoadAllInOnePageShuffled() { @@ -219,99 +308,681 @@ public void testLoadAllInOnePageShuffled() { shuffledBlocks[b] = source.getBlock(b).filter(shuffleArray); } source = new Page(shuffledBlocks); - loadSimpleAndAssert(driverContext, List.of(source)); - } - - private void loadSimpleAndAssert(DriverContext driverContext, List input) { - List operators = List.of( - factory(reader, new NumberFieldMapper.NumberFieldType("key", NumberFieldMapper.NumberType.INTEGER)).get(driverContext), - factory(reader, new NumberFieldMapper.NumberFieldType("long", NumberFieldMapper.NumberType.LONG)).get(driverContext), - factory(reader, new KeywordFieldMapper.KeywordFieldType("kwd")).get(driverContext), - factory(reader, new KeywordFieldMapper.KeywordFieldType("mv_kwd")).get(driverContext), - factory(reader, new BooleanFieldMapper.BooleanFieldType("bool")).get(driverContext), - factory(reader, new BooleanFieldMapper.BooleanFieldType("mv_bool")).get(driverContext), - factory(reader, new NumberFieldMapper.NumberFieldType("mv_key", NumberFieldMapper.NumberType.INTEGER)).get(driverContext), - factory(reader, new NumberFieldMapper.NumberFieldType("mv_long", NumberFieldMapper.NumberType.LONG)).get(driverContext), - factory(reader, new NumberFieldMapper.NumberFieldType("double", NumberFieldMapper.NumberType.DOUBLE)).get(driverContext), - factory(reader, new NumberFieldMapper.NumberFieldType("mv_double", NumberFieldMapper.NumberType.DOUBLE)).get(driverContext) + loadSimpleAndAssert(driverContext, List.of(source), Block.MvOrdering.UNORDERED); + } + + private static ValuesSourceReaderOperator.FieldInfo fieldInfo(MappedFieldType ft) { + return new ValuesSourceReaderOperator.FieldInfo(ft.name(), List.of(ft.blockLoader(new MappedFieldType.BlockLoaderContext() { + @Override + public String indexName() { + return "test_index"; + } + + @Override + public SearchLookup lookup() { + throw new UnsupportedOperationException(); + } + + @Override + public Set sourcePaths(String name) { + return Set.of(name); + } + }))); + } + + private void loadSimpleAndAssert(DriverContext driverContext, List input, Block.MvOrdering docValuesMvOrdering) { + List cases = infoAndChecksForEachType(docValuesMvOrdering); + + List operators = new ArrayList<>(); + operators.add( + new ValuesSourceReaderOperator.Factory( + List.of(fieldInfo(docValuesNumberField("key", NumberFieldMapper.NumberType.INTEGER))), + List.of(reader), + 0 + ).get(driverContext) ); + List tests = new ArrayList<>(); + while (cases.isEmpty() == false) { + List b = randomNonEmptySubsetOf(cases); + cases.removeAll(b); + tests.addAll(b); + operators.add( + new ValuesSourceReaderOperator.Factory(b.stream().map(i -> i.info).toList(), List.of(reader), 0).get(driverContext) + ); + } List results = drive(operators, input.iterator(), driverContext); assertThat(results, hasSize(input.size())); - for (Page p : results) { - assertThat(p.getBlockCount(), equalTo(11)); - IntVector keys = p.getBlock(1).asVector(); - LongVector longs = p.getBlock(2).asVector(); - BytesRefVector keywords = p.getBlock(3).asVector(); - BytesRefBlock mvKeywords = p.getBlock(4); - BooleanVector bools = p.getBlock(5).asVector(); - BooleanBlock mvBools = p.getBlock(6); - IntBlock mvInts = p.getBlock(7); - LongBlock mvLongs = p.getBlock(8); - DoubleVector doubles = p.getBlock(9).asVector(); - DoubleBlock mvDoubles = p.getBlock(10); - - for (int i = 0; i < p.getPositionCount(); i++) { - int key = keys.getInt(i); - assertThat(longs.getLong(i), equalTo((long) key)); - assertThat(keywords.getBytesRef(i, new BytesRef()).utf8ToString(), equalTo(Integer.toString(key))); - - assertThat(mvKeywords.getValueCount(i), equalTo(key % 3 + 1)); - int offset = mvKeywords.getFirstValueIndex(i); - for (int v = 0; v <= key % 3; v++) { - assertThat(mvKeywords.getBytesRef(offset + v, new BytesRef()).utf8ToString(), equalTo(PREFIX[v] + key)); - } - if (key % 3 > 0) { - assertThat(mvKeywords.mvOrdering(), equalTo(Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING)); + for (Page page : results) { + assertThat(page.getBlockCount(), equalTo(tests.size() + 2 /* one for doc and one for keys */)); + IntVector keys = page.getBlock(1).asVector(); + for (int p = 0; p < page.getPositionCount(); p++) { + int key = keys.getInt(p); + for (int i = 0; i < tests.size(); i++) { + try { + tests.get(i).checkResults.check(page.getBlock(2 + i), p, key); + } catch (AssertionError e) { + throw new AssertionError("error checking " + tests.get(i).info.name() + "[" + p + "]: " + e.getMessage(), e); + } } + } + } + for (Operator op : operators) { + assertThat(((ValuesSourceReaderOperator) op).status().pagesProcessed(), equalTo(input.size())); + } + assertDriverContext(driverContext); + } - assertThat(bools.getBoolean(i), equalTo(key % 2 == 0)); - assertThat(mvBools.getValueCount(i), equalTo(key % 3 + 1)); - offset = mvBools.getFirstValueIndex(i); - for (int v = 0; v <= key % 3; v++) { - assertThat(mvBools.getBoolean(offset + v), equalTo(BOOLEANS[key % 3][v])); - } - if (key % 3 > 0) { - assertThat(mvBools.mvOrdering(), equalTo(Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING)); - } + interface CheckResults { + void check(Block block, int position, int key); + } - assertThat(mvInts.getValueCount(i), equalTo(key % 3 + 1)); - offset = mvInts.getFirstValueIndex(i); - for (int v = 0; v <= key % 3; v++) { - assertThat(mvInts.getInt(offset + v), equalTo(1_000 * key + v)); - } - if (key % 3 > 0) { - assertThat(mvInts.mvOrdering(), equalTo(Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING)); - } + interface CheckReaders { + void check(boolean forcedRowByRow, int pageCount, int segmentCount, Map readersBuilt); + } - assertThat(mvLongs.getValueCount(i), equalTo(key % 3 + 1)); - offset = mvLongs.getFirstValueIndex(i); - for (int v = 0; v <= key % 3; v++) { - assertThat(mvLongs.getLong(offset + v), equalTo(-1_000L * key + v)); - } - if (key % 3 > 0) { - assertThat(mvLongs.mvOrdering(), equalTo(Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING)); - } + record FieldCase(ValuesSourceReaderOperator.FieldInfo info, CheckResults checkResults, CheckReaders checkReaders) { + FieldCase(MappedFieldType ft, CheckResults checkResults, CheckReaders checkReaders) { + this(fieldInfo(ft), checkResults, checkReaders); + } + } + + /** + * Asserts that {@link ValuesSourceReaderOperator#status} claims that only + * the expected readers are built after loading singleton pages. + */ + // @Repeat(iterations = 100) + public void testLoadAllStatus() { + DriverContext driverContext = driverContext(); + testLoadAllStatus(false); + } + + /** + * Asserts that {@link ValuesSourceReaderOperator#status} claims that only + * the expected readers are built after loading non-singleton pages. + */ + // @Repeat(iterations = 100) + public void testLoadAllStatusAllInOnePage() { + testLoadAllStatus(true); + } + + private void testLoadAllStatus(boolean allInOnePage) { + DriverContext driverContext = driverContext(); + List input = CannedSourceOperator.collectPages(simpleInput(driverContext.blockFactory(), between(100, 5000))); + List cases = infoAndChecksForEachType(Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING); + // Build one operator for each field, so we get a unique map to assert on + List operators = cases.stream() + .map(i -> new ValuesSourceReaderOperator.Factory(List.of(i.info), List.of(reader), 0).get(driverContext)) + .toList(); + if (allInOnePage) { + input = List.of(CannedSourceOperator.mergePages(input)); + } + drive(operators, input.iterator(), driverContext); + for (int i = 0; i < cases.size(); i++) { + ValuesSourceReaderOperator.Status status = (ValuesSourceReaderOperator.Status) operators.get(i).status(); + assertThat(status.pagesProcessed(), equalTo(input.size())); + FieldCase fc = cases.get(i); + fc.checkReaders.check(allInOnePage, input.size(), reader.leaves().size(), status.readersBuilt()); + } + } + + private List infoAndChecksForEachType(Block.MvOrdering docValuesMvOrdering) { + Checks checks = new Checks(docValuesMvOrdering); + List r = new ArrayList<>(); + r.add( + new FieldCase(docValuesNumberField("long", NumberFieldMapper.NumberType.LONG), checks::longs, StatusChecks::longsFromDocValues) + ); + r.add( + new FieldCase( + docValuesNumberField("mv_long", NumberFieldMapper.NumberType.LONG), + checks::mvLongsFromDocValues, + StatusChecks::mvLongsFromDocValues + ) + ); + r.add( + new FieldCase(sourceNumberField("source_long", NumberFieldMapper.NumberType.LONG), checks::longs, StatusChecks::longsFromSource) + ); + r.add( + new FieldCase( + sourceNumberField("mv_source_long", NumberFieldMapper.NumberType.LONG), + checks::mvLongsUnordered, + StatusChecks::mvLongsFromSource + ) + ); + r.add( + new FieldCase(docValuesNumberField("int", NumberFieldMapper.NumberType.INTEGER), checks::ints, StatusChecks::intsFromDocValues) + ); + r.add( + new FieldCase( + docValuesNumberField("mv_int", NumberFieldMapper.NumberType.INTEGER), + checks::mvIntsFromDocValues, + StatusChecks::mvIntsFromDocValues + ) + ); + r.add( + new FieldCase(sourceNumberField("source_int", NumberFieldMapper.NumberType.INTEGER), checks::ints, StatusChecks::intsFromSource) + ); + r.add( + new FieldCase( + sourceNumberField("mv_source_int", NumberFieldMapper.NumberType.INTEGER), + checks::mvIntsUnordered, + StatusChecks::mvIntsFromSource + ) + ); + r.add( + new FieldCase( + docValuesNumberField("short", NumberFieldMapper.NumberType.SHORT), + checks::shorts, + StatusChecks::shortsFromDocValues + ) + ); + r.add( + new FieldCase( + docValuesNumberField("mv_short", NumberFieldMapper.NumberType.SHORT), + checks::mvShorts, + StatusChecks::mvShortsFromDocValues + ) + ); + r.add( + new FieldCase(docValuesNumberField("byte", NumberFieldMapper.NumberType.BYTE), checks::bytes, StatusChecks::bytesFromDocValues) + ); + r.add( + new FieldCase( + docValuesNumberField("mv_byte", NumberFieldMapper.NumberType.BYTE), + checks::mvBytes, + StatusChecks::mvBytesFromDocValues + ) + ); + r.add( + new FieldCase( + docValuesNumberField("double", NumberFieldMapper.NumberType.DOUBLE), + checks::doubles, + StatusChecks::doublesFromDocValues + ) + ); + r.add( + new FieldCase( + docValuesNumberField("mv_double", NumberFieldMapper.NumberType.DOUBLE), + checks::mvDoubles, + StatusChecks::mvDoublesFromDocValues + ) + ); + r.add(new FieldCase(new BooleanFieldMapper.BooleanFieldType("bool"), checks::bools, StatusChecks::boolFromDocValues)); + r.add(new FieldCase(new BooleanFieldMapper.BooleanFieldType("mv_bool"), checks::mvBools, StatusChecks::mvBoolFromDocValues)); + r.add(new FieldCase(new KeywordFieldMapper.KeywordFieldType("kwd"), checks::strings, StatusChecks::keywordsFromDocValues)); + r.add( + new FieldCase( + new KeywordFieldMapper.KeywordFieldType("mv_kwd"), + checks::mvStringsFromDocValues, + StatusChecks::mvKeywordsFromDocValues + ) + ); + r.add(new FieldCase(storedKeywordField("stored_kwd"), checks::strings, StatusChecks::keywordsFromStored)); + r.add(new FieldCase(storedKeywordField("mv_stored_kwd"), checks::mvStringsUnordered, StatusChecks::mvKeywordsFromStored)); + r.add(new FieldCase(sourceKeywordField("source_kwd"), checks::strings, StatusChecks::keywordsFromSource)); + r.add(new FieldCase(sourceKeywordField("mv_source_kwd"), checks::mvStringsUnordered, StatusChecks::mvKeywordsFromSource)); + r.add(new FieldCase(new TextFieldMapper.TextFieldType("source_text", false), checks::strings, StatusChecks::textFromSource)); + r.add( + new FieldCase( + new TextFieldMapper.TextFieldType("mv_source_text", false), + checks::mvStringsUnordered, + StatusChecks::mvTextFromSource + ) + ); + r.add(new FieldCase(storedTextField("stored_text"), checks::strings, StatusChecks::textFromStored)); + r.add(new FieldCase(storedTextField("mv_stored_text"), checks::mvStringsUnordered, StatusChecks::mvTextFromStored)); + r.add( + new FieldCase( + textFieldWithDelegate("text_with_delegate", new KeywordFieldMapper.KeywordFieldType("kwd")), + checks::strings, + StatusChecks::textWithDelegate + ) + ); + r.add( + new FieldCase( + textFieldWithDelegate("mv_text_with_delegate", new KeywordFieldMapper.KeywordFieldType("mv_kwd")), + checks::mvStringsFromDocValues, + StatusChecks::mvTextWithDelegate + ) + ); + r.add(new FieldCase(new ProvidedIdFieldMapper(() -> false).fieldType(), checks::ids, StatusChecks::id)); + r.add(new FieldCase(TsidExtractingIdFieldMapper.INSTANCE.fieldType(), checks::ids, StatusChecks::id)); + r.add( + new FieldCase( + new ValuesSourceReaderOperator.FieldInfo("constant_bytes", List.of(BlockLoader.constantBytes(new BytesRef("foo")))), + checks::constantBytes, + StatusChecks::constantBytes + ) + ); + r.add( + new FieldCase( + new ValuesSourceReaderOperator.FieldInfo("null", List.of(BlockLoader.CONSTANT_NULLS)), + checks::constantNulls, + StatusChecks::constantNulls + ) + ); + Collections.shuffle(r, random()); + return r; + } + + record Checks(Block.MvOrdering docValuesMvOrdering) { + void longs(Block block, int position, int key) { + LongVector longs = ((LongBlock) block).asVector(); + assertThat(longs.getLong(position), equalTo((long) key)); + } + + void ints(Block block, int position, int key) { + IntVector ints = ((IntBlock) block).asVector(); + assertThat(ints.getInt(position), equalTo(key)); + } + + void shorts(Block block, int position, int key) { + IntVector ints = ((IntBlock) block).asVector(); + assertThat(ints.getInt(position), equalTo((int) (short) key)); + } + + void bytes(Block block, int position, int key) { + IntVector ints = ((IntBlock) block).asVector(); + assertThat(ints.getInt(position), equalTo((int) (byte) key)); + } + + void doubles(Block block, int position, int key) { + DoubleVector doubles = ((DoubleBlock) block).asVector(); + assertThat(doubles.getDouble(position), equalTo(key / 123_456d)); + } + + void strings(Block block, int position, int key) { + BytesRefVector keywords = ((BytesRefBlock) block).asVector(); + assertThat(keywords.getBytesRef(position, new BytesRef()).utf8ToString(), equalTo(Integer.toString(key))); + } + + void bools(Block block, int position, int key) { + BooleanVector bools = ((BooleanBlock) block).asVector(); + assertThat(bools.getBoolean(position), equalTo(key % 2 == 0)); + } + + void ids(Block block, int position, int key) { + BytesRefVector ids = ((BytesRefBlock) block).asVector(); + assertThat(ids.getBytesRef(position, new BytesRef()).utf8ToString(), equalTo("id")); + } + + void constantBytes(Block block, int position, int key) { + BytesRefVector keywords = ((BytesRefBlock) block).asVector(); + assertThat(keywords.getBytesRef(position, new BytesRef()).utf8ToString(), equalTo("foo")); + } + + void constantNulls(Block block, int position, int key) { + assertTrue(block.areAllValuesNull()); + assertTrue(block.isNull(position)); + } + + void mvLongsFromDocValues(Block block, int position, int key) { + mvLongs(block, position, key, docValuesMvOrdering); + } + + void mvLongsUnordered(Block block, int position, int key) { + mvLongs(block, position, key, Block.MvOrdering.UNORDERED); + } + + private void mvLongs(Block block, int position, int key, Block.MvOrdering expectedMv) { + LongBlock longs = (LongBlock) block; + assertThat(longs.getValueCount(position), equalTo(key % 3 + 1)); + int offset = longs.getFirstValueIndex(position); + for (int v = 0; v <= key % 3; v++) { + assertThat(longs.getLong(offset + v), equalTo(-1_000L * key + v)); + } + if (key % 3 > 0) { + assertThat(longs.mvOrdering(), equalTo(expectedMv)); + } + } + + void mvIntsFromDocValues(Block block, int position, int key) { + mvInts(block, position, key, docValuesMvOrdering); + } + + void mvIntsUnordered(Block block, int position, int key) { + mvInts(block, position, key, Block.MvOrdering.UNORDERED); + } + + private void mvInts(Block block, int position, int key, Block.MvOrdering expectedMv) { + IntBlock ints = (IntBlock) block; + assertThat(ints.getValueCount(position), equalTo(key % 3 + 1)); + int offset = ints.getFirstValueIndex(position); + for (int v = 0; v <= key % 3; v++) { + assertThat(ints.getInt(offset + v), equalTo(1_000 * key + v)); + } + if (key % 3 > 0) { + assertThat(ints.mvOrdering(), equalTo(expectedMv)); + } + } + + void mvShorts(Block block, int position, int key) { + IntBlock ints = (IntBlock) block; + assertThat(ints.getValueCount(position), equalTo(key % 3 + 1)); + int offset = ints.getFirstValueIndex(position); + for (int v = 0; v <= key % 3; v++) { + assertThat(ints.getInt(offset + v), equalTo((int) (short) (2_000 * key + v))); + } + if (key % 3 > 0) { + assertThat(ints.mvOrdering(), equalTo(docValuesMvOrdering)); + } + } + + void mvBytes(Block block, int position, int key) { + IntBlock ints = (IntBlock) block; + assertThat(ints.getValueCount(position), equalTo(key % 3 + 1)); + int offset = ints.getFirstValueIndex(position); + for (int v = 0; v <= key % 3; v++) { + assertThat(ints.getInt(offset + v), equalTo((int) (byte) (3_000 * key + v))); + } + if (key % 3 > 0) { + assertThat(ints.mvOrdering(), equalTo(docValuesMvOrdering)); + } + } + + void mvDoubles(Block block, int position, int key) { + DoubleBlock doubles = (DoubleBlock) block; + int offset = doubles.getFirstValueIndex(position); + for (int v = 0; v <= key % 3; v++) { + assertThat(doubles.getDouble(offset + v), equalTo(key / 123_456d + v)); + } + if (key % 3 > 0) { + assertThat(doubles.mvOrdering(), equalTo(docValuesMvOrdering)); + } + } + + void mvStringsFromDocValues(Block block, int position, int key) { + mvStrings(block, position, key, docValuesMvOrdering); + } + + void mvStringsUnordered(Block block, int position, int key) { + mvStrings(block, position, key, Block.MvOrdering.UNORDERED); + } + + void mvStrings(Block block, int position, int key, Block.MvOrdering expectedMv) { + BytesRefBlock text = (BytesRefBlock) block; + assertThat(text.getValueCount(position), equalTo(key % 3 + 1)); + int offset = text.getFirstValueIndex(position); + for (int v = 0; v <= key % 3; v++) { + assertThat(text.getBytesRef(offset + v, new BytesRef()).utf8ToString(), equalTo(PREFIX[v] + key)); + } + if (key % 3 > 0) { + assertThat(text.mvOrdering(), equalTo(expectedMv)); + } + } + + void mvBools(Block block, int position, int key) { + BooleanBlock bools = (BooleanBlock) block; + assertThat(bools.getValueCount(position), equalTo(key % 3 + 1)); + int offset = bools.getFirstValueIndex(position); + for (int v = 0; v <= key % 3; v++) { + assertThat(bools.getBoolean(offset + v), equalTo(BOOLEANS[key % 3][v])); + } + if (key % 3 > 0) { + assertThat(bools.mvOrdering(), equalTo(docValuesMvOrdering)); + } + } + } + + class StatusChecks { + static void longsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + docValues("long", "Longs", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void longsFromSource(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + source("source_long", "Longs", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void intsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + docValues("int", "Ints", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void intsFromSource(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + source("source_int", "Ints", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void shortsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + docValues("short", "Ints", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void bytesFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + docValues("byte", "Ints", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void doublesFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + docValues("double", "Doubles", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void boolFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + docValues("bool", "Booleans", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void keywordsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + docValues("kwd", "Ordinals", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void keywordsFromStored(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + stored("stored_kwd", "Bytes", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void keywordsFromSource(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + source("source_kwd", "Bytes", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void textFromSource(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + source("source_text", "Bytes", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void textFromStored(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + stored("stored_text", "Bytes", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvLongsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + mvDocValues("mv_long", "Longs", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvLongsFromSource(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + source("mv_source_long", "Longs", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvIntsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + mvDocValues("mv_int", "Ints", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvIntsFromSource(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + source("mv_source_int", "Ints", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvShortsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + mvDocValues("mv_short", "Ints", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvBytesFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + mvDocValues("mv_byte", "Ints", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvDoublesFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + mvDocValues("mv_double", "Doubles", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvBoolFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + mvDocValues("mv_bool", "Booleans", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvKeywordsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + mvDocValues("mv_kwd", "Ordinals", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvKeywordsFromStored(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + stored("mv_stored_kwd", "Bytes", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvKeywordsFromSource(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + source("mv_source_kwd", "Bytes", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvTextFromStored(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + stored("mv_stored_text", "Bytes", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvTextFromSource(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + source("mv_source_text", "Bytes", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void textWithDelegate(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + if (forcedRowByRow) { + assertMap( + readers, + matchesMap().entry( + "text_with_delegate:row_stride:Delegating[to=kwd, impl=BlockDocValuesReader.SingletonOrdinals]", + segmentCount + ) + ); + } else { + assertMap( + readers, + matchesMap().entry( + "text_with_delegate:column_at_a_time:Delegating[to=kwd, impl=BlockDocValuesReader.SingletonOrdinals]", + lessThanOrEqualTo(pageCount) + ) + ); + } + } + + static void mvTextWithDelegate(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + if (forcedRowByRow) { + assertMap( + readers, + matchesMap().entry( + "mv_text_with_delegate:row_stride:Delegating[to=mv_kwd, impl=BlockDocValuesReader.SingletonOrdinals]", + lessThanOrEqualTo(segmentCount) + ) + .entry( + "mv_text_with_delegate:row_stride:Delegating[to=mv_kwd, impl=BlockDocValuesReader.Ordinals]", + lessThanOrEqualTo(segmentCount) + ) + ); + } else { + assertMap( + readers, + matchesMap().entry( + "mv_text_with_delegate:column_at_a_time:Delegating[to=mv_kwd, impl=BlockDocValuesReader.SingletonOrdinals]", + lessThanOrEqualTo(pageCount) + ) + .entry( + "mv_text_with_delegate:column_at_a_time:Delegating[to=mv_kwd, impl=BlockDocValuesReader.Ordinals]", + lessThanOrEqualTo(pageCount) + ) + ); + } + } + + private static void docValues( + String name, + String type, + boolean forcedRowByRow, + int pageCount, + int segmentCount, + Map readers + ) { + if (forcedRowByRow) { + assertMap( + readers, + matchesMap().entry(name + ":row_stride:BlockDocValuesReader.Singleton" + type, lessThanOrEqualTo(segmentCount)) + ); + } else { + assertMap( + readers, + matchesMap().entry(name + ":column_at_a_time:BlockDocValuesReader.Singleton" + type, lessThanOrEqualTo(pageCount)) + ); + } + } - assertThat(doubles.getDouble(i), equalTo(key / 123_456d)); - offset = mvDoubles.getFirstValueIndex(i); - for (int v = 0; v <= key % 3; v++) { - assertThat(mvDoubles.getDouble(offset + v), equalTo(key / 123_456d + v)); + private static void mvDocValues( + String name, + String type, + boolean forcedRowByRow, + int pageCount, + int segmentCount, + Map readers + ) { + if (forcedRowByRow) { + Integer singletons = (Integer) readers.remove(name + ":row_stride:BlockDocValuesReader.Singleton" + type); + if (singletons != null) { + segmentCount -= singletons; } - if (key % 3 > 0) { - assertThat(mvDoubles.mvOrdering(), equalTo(Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING)); + assertMap(readers, matchesMap().entry(name + ":row_stride:BlockDocValuesReader." + type, segmentCount)); + } else { + Integer singletons = (Integer) readers.remove(name + ":column_at_a_time:BlockDocValuesReader.Singleton" + type); + if (singletons != null) { + pageCount -= singletons; } + assertMap( + readers, + matchesMap().entry(name + ":column_at_a_time:BlockDocValuesReader." + type, lessThanOrEqualTo(pageCount)) + ); } } - for (Operator op : operators) { - assertThat(((ValuesSourceReaderOperator) op).status().pagesProcessed(), equalTo(input.size())); + + static void id(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + stored("_id", "Id", forcedRowByRow, pageCount, segmentCount, readers); + } + + private static void source(String name, String type, boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + Matcher count; + if (forcedRowByRow) { + count = equalTo(segmentCount); + } else { + count = lessThanOrEqualTo(pageCount); + Integer columnAttempts = (Integer) readers.remove(name + ":column_at_a_time:null"); + assertThat(columnAttempts, not(nullValue())); + } + assertMap( + readers, + matchesMap().entry(name + ":row_stride:BlockSourceReader." + type, count) + .entry("stored_fields[requires_source:true, fields:0]", count) + ); + } + + private static void stored(String name, String type, boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + Matcher count; + if (forcedRowByRow) { + count = equalTo(segmentCount); + } else { + count = lessThanOrEqualTo(pageCount); + Integer columnAttempts = (Integer) readers.remove(name + ":column_at_a_time:null"); + assertThat(columnAttempts, not(nullValue())); + } + assertMap( + readers, + matchesMap().entry(name + ":row_stride:BlockStoredFieldsReader." + type, count) + .entry("stored_fields[requires_source:false, fields:1]", count) + ); + } + + static void constantBytes(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + if (forcedRowByRow) { + assertMap(readers, matchesMap().entry("constant_bytes:row_stride:constant[[66 6f 6f]]", segmentCount)); + } else { + assertMap( + readers, + matchesMap().entry("constant_bytes:column_at_a_time:constant[[66 6f 6f]]", lessThanOrEqualTo(pageCount)) + ); + } + } + + static void constantNulls(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + if (forcedRowByRow) { + assertMap(readers, matchesMap().entry("null:row_stride:constant_nulls", segmentCount)); + } else { + assertMap(readers, matchesMap().entry("null:column_at_a_time:constant_nulls", lessThanOrEqualTo(pageCount))); + } } - assertDriverContext(driverContext); } - public void testValuesSourceReaderOperatorWithNulls() throws IOException { - MappedFieldType intFt = new NumberFieldMapper.NumberFieldType("i", NumberFieldMapper.NumberType.INTEGER); - MappedFieldType longFt = new NumberFieldMapper.NumberFieldType("j", NumberFieldMapper.NumberType.LONG); - MappedFieldType doubleFt = new NumberFieldMapper.NumberFieldType("d", NumberFieldMapper.NumberType.DOUBLE); + public void testWithNulls() throws IOException { + MappedFieldType intFt = docValuesNumberField("i", NumberFieldMapper.NumberType.INTEGER); + MappedFieldType longFt = docValuesNumberField("j", NumberFieldMapper.NumberType.LONG); + MappedFieldType doubleFt = docValuesNumberField("d", NumberFieldMapper.NumberType.DOUBLE); MappedFieldType kwFt = new KeywordFieldMapper.KeywordFieldType("kw"); NumericDocValuesField intField = new NumericDocValuesField(intFt.name(), 0); @@ -384,4 +1055,85 @@ public void testValuesSourceReaderOperatorWithNulls() throws IOException { } assertDriverContext(driverContext); } + + private NumberFieldMapper.NumberFieldType docValuesNumberField(String name, NumberFieldMapper.NumberType type) { + return new NumberFieldMapper.NumberFieldType(name, type); + } + + private NumberFieldMapper.NumberFieldType sourceNumberField(String name, NumberFieldMapper.NumberType type) { + return new NumberFieldMapper.NumberFieldType( + name, + type, + randomBoolean(), + false, + false, + randomBoolean(), + null, + Map.of(), + null, + false, + null, + randomFrom(IndexMode.values()) + ); + } + + private KeywordFieldMapper.KeywordFieldType storedKeywordField(String name) { + FieldType ft = new FieldType(KeywordFieldMapper.Defaults.FIELD_TYPE); + ft.setDocValuesType(DocValuesType.NONE); + ft.setStored(true); + ft.freeze(); + return new KeywordFieldMapper.KeywordFieldType( + name, + ft, + Lucene.KEYWORD_ANALYZER, + Lucene.KEYWORD_ANALYZER, + Lucene.KEYWORD_ANALYZER, + new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(false), + true // TODO randomize - load from stored keyword fields if stored even in synthetic source + ); + } + + private KeywordFieldMapper.KeywordFieldType sourceKeywordField(String name) { + FieldType ft = new FieldType(KeywordFieldMapper.Defaults.FIELD_TYPE); + ft.setDocValuesType(DocValuesType.NONE); + ft.setStored(false); + ft.freeze(); + return new KeywordFieldMapper.KeywordFieldType( + name, + ft, + Lucene.KEYWORD_ANALYZER, + Lucene.KEYWORD_ANALYZER, + Lucene.KEYWORD_ANALYZER, + new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(false), + false + ); + } + + private TextFieldMapper.TextFieldType storedTextField(String name) { + return new TextFieldMapper.TextFieldType( + name, + false, + true, + new TextSearchInfo(TextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER), + true, // TODO randomize - if the field is stored we should load from the stored field even if there is source + null, + Map.of(), + false, + false + ); + } + + private TextFieldMapper.TextFieldType textFieldWithDelegate(String name, KeywordFieldMapper.KeywordFieldType delegate) { + return new TextFieldMapper.TextFieldType( + name, + false, + false, + new TextSearchInfo(TextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER), + randomBoolean(), + delegate, + Map.of(), + false, + false + ); + } } diff --git a/x-pack/plugin/esql/qa/server/single-node/src/yamlRestTest/resources/rest-api-spec/test/20_aggs.yml b/x-pack/plugin/esql/qa/server/single-node/src/yamlRestTest/resources/rest-api-spec/test/20_aggs.yml index 1087bd5ce06eb..e94cb6ccd8e3c 100644 --- a/x-pack/plugin/esql/qa/server/single-node/src/yamlRestTest/resources/rest-api-spec/test/20_aggs.yml +++ b/x-pack/plugin/esql/qa/server/single-node/src/yamlRestTest/resources/rest-api-spec/test/20_aggs.yml @@ -22,91 +22,93 @@ setup: type: long color: type: keyword + text: + type: text - do: bulk: index: "test" refresh: true body: - { "index": { } } - - { "data": 1, "count": 40, "data_d": 1, "count_d": 40, "time": 1674835275187, "color": "red" } + - { "data": 1, "count": 40, "data_d": 1, "count_d": 40, "time": 1674835275187, "color": "red", "text": "rr red" } - { "index": { } } - - { "data": 2, "count": 42, "data_d": 2, "count_d": 42, "time": 1674835275188, "color": "blue" } + - { "data": 2, "count": 42, "data_d": 2, "count_d": 42, "time": 1674835275188, "color": "blue", "text": "bb blue" } - { "index": { } } - - { "data": 1, "count": 44, "data_d": 1, "count_d": 44, "time": 1674835275189, "color": "green" } + - { "data": 1, "count": 44, "data_d": 1, "count_d": 44, "time": 1674835275189, "color": "green", "text": "gg green" } - { "index": { } } - - { "data": 2, "count": 46, "data_d": 2, "count_d": 46, "time": 1674835275190, "color": "red" } + - { "data": 2, "count": 46, "data_d": 2, "count_d": 46, "time": 1674835275190, "color": "red", "text": "rr red" } - { "index": { } } - - { "data": 1, "count": 40, "data_d": 1, "count_d": 40, "time": 1674835275191, "color": "red" } + - { "data": 1, "count": 40, "data_d": 1, "count_d": 40, "time": 1674835275191, "color": "red", "text": "rr red" } - { "index": { } } - - { "data": 2, "count": 42, "data_d": 2, "count_d": 42, "time": 1674835275192, "color": "blue" } + - { "data": 2, "count": 42, "data_d": 2, "count_d": 42, "time": 1674835275192, "color": "blue", "text": "bb blue" } - { "index": { } } - - { "data": 1, "count": 44, "data_d": 1, "count_d": 44, "time": 1674835275193, "color": "green" } + - { "data": 1, "count": 44, "data_d": 1, "count_d": 44, "time": 1674835275193, "color": "green", "text": "gg green" } - { "index": { } } - - { "data": 2, "count": 46, "data_d": 2, "count_d": 46, "time": 1674835275194, "color": "red" } + - { "data": 2, "count": 46, "data_d": 2, "count_d": 46, "time": 1674835275194, "color": "red", "text": "rr red" } - { "index": { } } - - { "data": 1, "count": 40, "data_d": 1, "count_d": 40, "time": 1674835275195, "color": "red" } + - { "data": 1, "count": 40, "data_d": 1, "count_d": 40, "time": 1674835275195, "color": "red", "text": "rr red" } - { "index": { } } - - { "data": 2, "count": 42, "data_d": 2, "count_d": 42, "time": 1674835275196, "color": "blue" } + - { "data": 2, "count": 42, "data_d": 2, "count_d": 42, "time": 1674835275196, "color": "blue", "text": "bb blue" } - { "index": { } } - - { "data": 1, "count": 44, "data_d": 1, "count_d": 44, "time": 1674835275197, "color": "green" } + - { "data": 1, "count": 44, "data_d": 1, "count_d": 44, "time": 1674835275197, "color": "green", "text": "gg green" } - { "index": { } } - - { "data": 2, "count": 46, "data_d": 2, "count_d": 46, "time": 1674835275198, "color": "red" } + - { "data": 2, "count": 46, "data_d": 2, "count_d": 46, "time": 1674835275198, "color": "red", "text": "rr red" } - { "index": { } } - - { "data": 1, "count": 40, "data_d": 1, "count_d": 40, "time": 1674835275199, "color": "red" } + - { "data": 1, "count": 40, "data_d": 1, "count_d": 40, "time": 1674835275199, "color": "red", "text": "rr red" } - { "index": { } } - - { "data": 2, "count": 42, "data_d": 2, "count_d": 42, "time": 1674835275200, "color": "blue" } + - { "data": 2, "count": 42, "data_d": 2, "count_d": 42, "time": 1674835275200, "color": "blue", "text": "bb blue" } - { "index": { } } - - { "data": 1, "count": 44, "data_d": 1, "count_d": 44, "time": 1674835275201, "color": "green" } + - { "data": 1, "count": 44, "data_d": 1, "count_d": 44, "time": 1674835275201, "color": "green", "text": "gg green" } - { "index": { } } - - { "data": 2, "count": 46, "data_d": 2, "count_d": 46, "time": 1674835275202, "color": "red" } + - { "data": 2, "count": 46, "data_d": 2, "count_d": 46, "time": 1674835275202, "color": "red", "text": "rr red" } - { "index": { } } - - { "data": 1, "count": 40, "data_d": 1, "count_d": 40, "time": 1674835275203, "color": "red" } + - { "data": 1, "count": 40, "data_d": 1, "count_d": 40, "time": 1674835275203, "color": "red", "text": "rr red" } - { "index": { } } - - { "data": 2, "count": 42, "data_d": 2, "count_d": 42, "time": 1674835275204, "color": "blue" } + - { "data": 2, "count": 42, "data_d": 2, "count_d": 42, "time": 1674835275204, "color": "blue", "text": "bb blue" } - { "index": { } } - - { "data": 1, "count": 44, "data_d": 1, "count_d": 44, "time": 1674835275205, "color": "green" } + - { "data": 1, "count": 44, "data_d": 1, "count_d": 44, "time": 1674835275205, "color": "green", "text": "gg green" } - { "index": { } } - - { "data": 2, "count": 46, "data_d": 2, "count_d": 46, "time": 1674835275206, "color": "red" } + - { "data": 2, "count": 46, "data_d": 2, "count_d": 46, "time": 1674835275206, "color": "red", "text": "rr red" } - { "index": { } } - - { "data": 1, "count": 40, "data_d": 1, "count_d": 40, "time": 1674835275207, "color": "red" } + - { "data": 1, "count": 40, "data_d": 1, "count_d": 40, "time": 1674835275207, "color": "red", "text": "rr red" } - { "index": { } } - - { "data": 2, "count": 42, "data_d": 2, "count_d": 42, "time": 1674835275208, "color": "blue" } + - { "data": 2, "count": 42, "data_d": 2, "count_d": 42, "time": 1674835275208, "color": "blue", "text": "bb blue" } - { "index": { } } - - { "data": 1, "count": 44, "data_d": 1, "count_d": 44, "time": 1674835275209, "color": "green" } + - { "data": 1, "count": 44, "data_d": 1, "count_d": 44, "time": 1674835275209, "color": "green", "text": "gg green" } - { "index": { } } - - { "data": 2, "count": 46, "data_d": 2, "count_d": 46, "time": 1674835275210, "color": "red" } + - { "data": 2, "count": 46, "data_d": 2, "count_d": 46, "time": 1674835275210, "color": "red", "text": "rr red" } - { "index": { } } - - { "data": 1, "count": 40, "data_d": 1, "count_d": 40, "time": 1674835275211, "color": "red" } + - { "data": 1, "count": 40, "data_d": 1, "count_d": 40, "time": 1674835275211, "color": "red", "text": "rr red" } - { "index": { } } - - { "data": 2, "count": 42, "data_d": 2, "count_d": 42, "time": 1674835275212, "color": "blue" } + - { "data": 2, "count": 42, "data_d": 2, "count_d": 42, "time": 1674835275212, "color": "blue", "text": "bb blue" } - { "index": { } } - - { "data": 1, "count": 44, "data_d": 1, "count_d": 44, "time": 1674835275213, "color": "green" } + - { "data": 1, "count": 44, "data_d": 1, "count_d": 44, "time": 1674835275213, "color": "green", "text": "gg green" } - { "index": { } } - - { "data": 2, "count": 46, "data_d": 2, "count_d": 46, "time": 1674835275214, "color": "red" } + - { "data": 2, "count": 46, "data_d": 2, "count_d": 46, "time": 1674835275214, "color": "red", "text": "rr red" } - { "index": { } } - - { "data": 1, "count": 40, "data_d": 1, "count_d": 40, "time": 1674835275215, "color": "red" } + - { "data": 1, "count": 40, "data_d": 1, "count_d": 40, "time": 1674835275215, "color": "red", "text": "rr red" } - { "index": { } } - - { "data": 2, "count": 42, "data_d": 2, "count_d": 42, "time": 1674835275216, "color": "blue" } + - { "data": 2, "count": 42, "data_d": 2, "count_d": 42, "time": 1674835275216, "color": "blue", "text": "bb blue" } - { "index": { } } - - { "data": 1, "count": 44, "data_d": 1, "count_d": 44, "time": 1674835275217, "color": "green" } + - { "data": 1, "count": 44, "data_d": 1, "count_d": 44, "time": 1674835275217, "color": "green", "text": "gg green" } - { "index": { } } - - { "data": 2, "count": 46, "data_d": 2, "count_d": 46, "time": 1674835275218, "color": "red" } + - { "data": 2, "count": 46, "data_d": 2, "count_d": 46, "time": 1674835275218, "color": "red", "text": "rr red" } - { "index": { } } - - { "data": 1, "count": 40, "data_d": 1, "count_d": 40, "time": 1674835275219, "color": "red" } + - { "data": 1, "count": 40, "data_d": 1, "count_d": 40, "time": 1674835275219, "color": "red", "text": "rr red" } - { "index": { } } - - { "data": 2, "count": 42, "data_d": 2, "count_d": 42, "time": 1674835275220, "color": "blue" } + - { "data": 2, "count": 42, "data_d": 2, "count_d": 42, "time": 1674835275220, "color": "blue", "text": "bb blue" } - { "index": { } } - - { "data": 1, "count": 44, "data_d": 1, "count_d": 44, "time": 1674835275221, "color": "green" } + - { "data": 1, "count": 44, "data_d": 1, "count_d": 44, "time": 1674835275221, "color": "green", "text": "gg green" } - { "index": { } } - - { "data": 2, "count": 46, "data_d": 2, "count_d": 46, "time": 1674835275222, "color": "red" } + - { "data": 2, "count": 46, "data_d": 2, "count_d": 46, "time": 1674835275222, "color": "red", "text": "rr red" } - { "index": { } } - - { "data": 1, "count": 40, "data_d": 1, "count_d": 40, "time": 1674835275223, "color": "red" } + - { "data": 1, "count": 40, "data_d": 1, "count_d": 40, "time": 1674835275223, "color": "red", "text": "rr red" } - { "index": { } } - - { "data": 2, "count": 42, "data_d": 2, "count_d": 42, "time": 1674835275224, "color": "blue" } + - { "data": 2, "count": 42, "data_d": 2, "count_d": 42, "time": 1674835275224, "color": "blue", "text": "bb blue" } - { "index": { } } - - { "data": 1, "count": 44, "data_d": 1, "count_d": 44, "time": 1674835275225, "color": "green" } + - { "data": 1, "count": 44, "data_d": 1, "count_d": 44, "time": 1674835275225, "color": "green", "text": "gg green" } - { "index": { } } - - { "data": 2, "count": 46, "data_d": 2, "count_d": 46, "time": 1674835275226, "color": "red" } + - { "data": 2, "count": 46, "data_d": 2, "count_d": 46, "time": 1674835275226, "color": "red", "text": "rr red" } --- "Test From": @@ -127,8 +129,10 @@ setup: - match: {columns.3.type: "long"} - match: {columns.4.name: "data_d"} - match: {columns.4.type: "double"} - - match: {columns.5.name: "time"} - - match: {columns.5.type: "long"} + - match: {columns.5.name: "text"} + - match: {columns.5.type: "text"} + - match: {columns.6.name: "time"} + - match: {columns.6.type: "long"} - length: {values: 40} --- @@ -429,11 +433,11 @@ setup: body: query: 'from test | eval nullsum = count_d + null | sort nullsum | limit 1' - - length: {columns: 7} + - length: {columns: 8} - length: {values: 1} - - match: {columns.6.name: "nullsum"} - - match: {columns.6.type: "double"} - - match: {values.0.6: null} + - match: {columns.7.name: "nullsum"} + - match: {columns.7.type: "double"} + - match: {values.0.7: null} --- "Test Eval Row With Null": @@ -501,3 +505,19 @@ setup: - match: {values.0.2: null} - match: {values.0.3: null} +--- +grouping on text: + - do: + warnings: + - "No limit defined, adding default limit of [500]" + esql.query: + body: + query: 'FROM test | STATS med=median(count) BY text | SORT med' + columnar: true + + - match: {columns.0.name: "med"} + - match: {columns.0.type: "double"} + - match: {columns.1.name: "text"} + - match: {columns.1.type: "text"} + - match: {values.0: [42.0, 43.0, 44.0]} + - match: {values.1: ["bb blue", "rr red", "gg green"]} diff --git a/x-pack/plugin/esql/qa/server/single-node/src/yamlRestTest/resources/rest-api-spec/test/50_index_patterns.yml b/x-pack/plugin/esql/qa/server/single-node/src/yamlRestTest/resources/rest-api-spec/test/50_index_patterns.yml index 5fceeee2f6e57..2098b9ee60d1e 100644 --- a/x-pack/plugin/esql/qa/server/single-node/src/yamlRestTest/resources/rest-api-spec/test/50_index_patterns.yml +++ b/x-pack/plugin/esql/qa/server/single-node/src/yamlRestTest/resources/rest-api-spec/test/50_index_patterns.yml @@ -28,9 +28,9 @@ disjoint_mappings: index: test1 refresh: true body: - - { "index": {} } - - { "message1": "foo1"} - - { "index": {} } + - { "index": { } } + - { "message1": "foo1" } + - { "index": { } } - { "message1": "foo2" } - do: @@ -38,9 +38,9 @@ disjoint_mappings: index: test2 refresh: true body: - - { "index": {} } + - { "index": { } } - { "message2": 1 } - - { "index": {} } + - { "index": { } } - { "message2": 2 } - do: @@ -315,9 +315,9 @@ same_name_different_type: index: test1 refresh: true body: - - { "index": {} } - - { "message": "foo1"} - - { "index": {} } + - { "index": { } } + - { "message": "foo1" } + - { "index": { } } - { "message": "foo2" } - do: @@ -325,9 +325,9 @@ same_name_different_type: index: test2 refresh: true body: - - { "index": {} } + - { "index": { } } - { "message": 1 } - - { "index": {} } + - { "index": { } } - { "message": 2 } - do: @@ -367,9 +367,9 @@ same_name_different_type_same_family: index: test1 refresh: true body: - - { "index": {} } - - { "message": "foo1"} - - { "index": {} } + - { "index": { } } + - { "message": "foo1" } + - { "index": { } } - { "message": "foo2" } - do: @@ -377,9 +377,9 @@ same_name_different_type_same_family: index: test2 refresh: true body: - - { "index": {} } + - { "index": { } } - { "message": "foo3" } - - { "index": {} } + - { "index": { } } - { "message": "foo4" } - do: diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionTaskIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionTaskIT.java index edaf9d91e9771..402ae2722d7ca 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionTaskIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionTaskIT.java @@ -177,7 +177,10 @@ public void testTaskContents() throws Exception { } if (o.operator().equals("ValuesSourceReaderOperator[field = pause_me]")) { ValuesSourceReaderOperator.Status oStatus = (ValuesSourceReaderOperator.Status) o.status(); - assertMap(oStatus.readersBuilt(), matchesMap().entry("ScriptLongs", greaterThanOrEqualTo(1))); + assertMap( + oStatus.readersBuilt(), + matchesMap().entry("pause_me:column_at_a_time:ScriptLongs", greaterThanOrEqualTo(1)) + ); assertThat(oStatus.pagesProcessed(), greaterThanOrEqualTo(1)); valuesSourceReaders++; continue; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichLookupService.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichLookupService.java index 384563cb815a4..bad7dd00d6c18 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichLookupService.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichLookupService.java @@ -257,21 +257,28 @@ private void doLookup( }; List intermediateOperators = new ArrayList<>(extractFields.size() + 2); final ElementType[] mergingTypes = new ElementType[extractFields.size()]; - // extract-field operators + + // load the fields + List fields = new ArrayList<>(extractFields.size()); for (int i = 0; i < extractFields.size(); i++) { NamedExpression extractField = extractFields.get(i); final ElementType elementType = LocalExecutionPlanner.toElementType(extractField.dataType()); mergingTypes[i] = elementType; - var sources = BlockReaderFactories.factories( + var loaders = BlockReaderFactories.loaders( List.of(searchContext), extractField instanceof Alias a ? ((NamedExpression) a.child()).name() : extractField.name(), EsqlDataTypes.isUnsupported(extractField.dataType()) ); - intermediateOperators.add(new ValuesSourceReaderOperator(blockFactory, sources, 0, extractField.name())); + fields.add(new ValuesSourceReaderOperator.FieldInfo(extractField.name(), loaders)); } + intermediateOperators.add( + new ValuesSourceReaderOperator(blockFactory, fields, List.of(searchContext.searcher().getIndexReader()), 0) + ); + // drop docs block intermediateOperators.add(droppingBlockOperator(extractFields.size() + 2, 0)); boolean singleLeaf = searchContext.searcher().getLeafContexts().size() == 1; + // merging field-values by position final int[] mergingChannels = IntStream.range(0, extractFields.size()).map(i -> i + 1).toArray(); intermediateOperators.add( diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index f73ab716cb534..1dddee5ed54ea 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.esql.planner; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; @@ -19,7 +20,7 @@ import org.elasticsearch.compute.lucene.ValuesSourceReaderOperator; import org.elasticsearch.compute.operator.Operator; import org.elasticsearch.compute.operator.OrdinalsGroupingOperator; -import org.elasticsearch.index.mapper.BlockDocValuesReader; +import org.elasticsearch.index.mapper.BlockLoader; import org.elasticsearch.index.mapper.NestedLookup; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; @@ -75,16 +76,17 @@ public final PhysicalOperation fieldExtractPhysicalOperation(FieldExtractExec fi DataType dataType = attr.dataType(); String fieldName = attr.name(); - List factories = BlockReaderFactories.factories( - searchContexts, - fieldName, - EsqlDataTypes.isUnsupported(dataType) - ); + List loaders = BlockReaderFactories.loaders(searchContexts, fieldName, EsqlDataTypes.isUnsupported(dataType)); + List readers = searchContexts.stream().map(s -> s.searcher().getIndexReader()).toList(); int docChannel = previousLayout.get(sourceAttr.id()).channel(); op = op.with( - new ValuesSourceReaderOperator.ValuesSourceReaderOperatorFactory(factories, docChannel, fieldName), + new ValuesSourceReaderOperator.Factory( + List.of(new ValuesSourceReaderOperator.FieldInfo(fieldName, loaders)), + readers, + docChannel + ), layout.build() ); } @@ -173,7 +175,8 @@ public final Operator.OperatorFactory ordinalGroupingOperatorFactory( // The grouping-by values are ready, let's group on them directly. // Costin: why are they ready and not already exposed in the layout? return new OrdinalsGroupingOperator.OrdinalsGroupingOperatorFactory( - BlockReaderFactories.factories(searchContexts, attrSource.name(), EsqlDataTypes.isUnsupported(attrSource.dataType())), + BlockReaderFactories.loaders(searchContexts, attrSource.name(), EsqlDataTypes.isUnsupported(attrSource.dataType())), + searchContexts.stream().map(s -> s.searcher().getIndexReader()).toList(), groupElementType, docChannel, attrSource.name(), diff --git a/x-pack/plugin/mapper-constant-keyword/src/main/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapper.java b/x-pack/plugin/mapper-constant-keyword/src/main/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapper.java index 6c8462c9e4948..ebe25ea1da1d9 100644 --- a/x-pack/plugin/mapper-constant-keyword/src/main/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapper.java +++ b/x-pack/plugin/mapper-constant-keyword/src/main/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapper.java @@ -30,7 +30,6 @@ import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.plain.ConstantIndexFieldData; -import org.elasticsearch.index.mapper.BlockDocValuesReader; import org.elasticsearch.index.mapper.BlockLoader; import org.elasticsearch.index.mapper.ConstantFieldType; import org.elasticsearch.index.mapper.DocumentParserContext; @@ -137,45 +136,10 @@ public String familyTypeName() { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { - // TODO build a constant block directly if (value == null) { - return BlockDocValuesReader.nulls(); + return BlockLoader.CONSTANT_NULLS; } - BytesRef bytes = new BytesRef(value); - return context -> new BlockDocValuesReader() { - private int docId; - - @Override - public int docID() { - return docId; - } - - @Override - public BlockLoader.BytesRefBuilder builder(BlockLoader.BuilderFactory factory, int expectedCount) { - return factory.bytesRefs(expectedCount); - } - - @Override - public BlockLoader.Block readValues(BlockLoader.BuilderFactory factory, BlockLoader.Docs docs) { - try (BlockLoader.BytesRefBuilder builder = builder(factory, docs.count())) { - for (int i = 0; i < docs.count(); i++) { - builder.appendBytesRef(bytes); - } - return builder.build(); - } - } - - @Override - public void readValuesFromSingleDoc(int docId, BlockLoader.Builder builder) { - this.docId = docId; - ((BlockLoader.BytesRefBuilder) builder).appendBytesRef(bytes); - } - - @Override - public String toString() { - return "ConstantKeyword"; - } - }; + return BlockLoader.constantBytes(new BytesRef(value)); } @Override diff --git a/x-pack/plugin/mapper-constant-keyword/src/test/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapperTests.java b/x-pack/plugin/mapper-constant-keyword/src/test/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapperTests.java index aaa28e28b72c9..87db404a40142 100644 --- a/x-pack/plugin/mapper-constant-keyword/src/test/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapperTests.java +++ b/x-pack/plugin/mapper-constant-keyword/src/test/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapperTests.java @@ -16,8 +16,6 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.core.CheckedFunction; -import org.elasticsearch.index.mapper.BlockDocValuesReader; import org.elasticsearch.index.mapper.BlockLoader; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentParsingException; @@ -229,24 +227,7 @@ protected boolean allowsNullValues() { * for newly created indices that haven't received any documents that * contain the field. */ - public void testNullValueBlockLoaderReadValues() throws IOException { - testNullBlockLoader(blockReader -> (TestBlock) blockReader.readValues(TestBlock.FACTORY, TestBlock.docs(0))); - } - - /** - * Test loading blocks when there is no defined value. This is allowed - * for newly created indices that haven't received any documents that - * contain the field. - */ - public void testNullValueBlockLoaderReadValuesFromSingleDoc() throws IOException { - testNullBlockLoader(blockReader -> { - TestBlock block = (TestBlock) blockReader.builder(TestBlock.FACTORY, 1); - blockReader.readValuesFromSingleDoc(0, block); - return block; - }); - } - - private void testNullBlockLoader(CheckedFunction body) throws IOException { + public void testNullValueBlockLoader() throws IOException { MapperService mapper = createMapperService(syntheticSourceMapping(b -> { b.startObject("field"); b.field("type", "constant_keyword"); @@ -274,7 +255,18 @@ public Set sourcePaths(String name) { iw.addDocument(doc); iw.close(); try (DirectoryReader reader = DirectoryReader.open(directory)) { - TestBlock block = body.apply(loader.reader(reader.leaves().get(0))); + TestBlock block = (TestBlock) loader.columnAtATimeReader(reader.leaves().get(0)) + .read(TestBlock.FACTORY, new BlockLoader.Docs() { + @Override + public int count() { + return 1; + } + + @Override + public int get(int i) { + return 0; + } + }); assertThat(block.get(0), nullValue()); } } diff --git a/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java b/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java index 90c055f3e77bb..97ffd50d5b8c3 100644 --- a/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java +++ b/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java @@ -319,12 +319,12 @@ public Query rangeQuery( public BlockLoader blockLoader(BlockLoaderContext blContext) { if (indexMode == IndexMode.TIME_SERIES && metricType == TimeSeriesParams.MetricType.COUNTER) { // Counters are not supported by ESQL so we load them in null - return BlockDocValuesReader.nulls(); + return BlockLoader.CONSTANT_NULLS; } if (hasDocValues()) { - return BlockDocValuesReader.longs(name()); + return new BlockDocValuesReader.LongsBlockLoader(name()); } - return BlockSourceReader.longs(new SourceValueFetcher(blContext.sourcePaths(name()), nullValueFormatted) { + return new BlockSourceReader.LongsBlockLoader(new SourceValueFetcher(blContext.sourcePaths(name()), nullValueFormatted) { @Override protected Object parseSourceValue(Object value) { if (value.equals("")) { diff --git a/x-pack/plugin/mapper-version/src/main/java/org/elasticsearch/xpack/versionfield/VersionStringFieldMapper.java b/x-pack/plugin/mapper-version/src/main/java/org/elasticsearch/xpack/versionfield/VersionStringFieldMapper.java index f4fb83fd9a91c..1ed63bb17e201 100644 --- a/x-pack/plugin/mapper-version/src/main/java/org/elasticsearch/xpack/versionfield/VersionStringFieldMapper.java +++ b/x-pack/plugin/mapper-version/src/main/java/org/elasticsearch/xpack/versionfield/VersionStringFieldMapper.java @@ -295,7 +295,7 @@ protected BytesRef indexedValueForSearch(Object value) { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { failIfNoDocValues(); - return BlockDocValuesReader.bytesRefsFromOrds(name()); + return new BlockDocValuesReader.BytesRefsFromOrdsBlockLoader(name()); } @Override diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java index 480704b89ca60..1954e291b1a7f 100644 --- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java +++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java @@ -855,9 +855,7 @@ public Query termsQuery(Collection values, SearchExecutionContext context) { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { if (hasDocValues()) { - // TODO it'd almost certainly be faster to drop directly to doc values like we do with keyword but this'll do for now - IndexFieldData fd = new StringBinaryIndexFieldData(name(), CoreValuesSourceType.KEYWORD, null); - return BlockDocValuesReader.bytesRefsFromDocValues(context -> fd.load(context).getBytesValues()); + return new BlockDocValuesReader.BytesRefsFromBinaryBlockLoader(name()); } return null; }