From f400839f54e2eabec7fb8356ba20ea02cc6a315d Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Thu, 14 Nov 2024 15:31:22 +0100 Subject: [PATCH] [ESQL] Adding a Lucene min/max operator (#113785) This operator only optimises the computation of the min/max value if the field contains a BKD tree, no deletes and we are visiting all documents for the segment. Otherwise it computes the value iterating on a tight loop. --- .../compute/lucene/LuceneMaxFactory.java | 146 ++++++++++++ .../compute/lucene/LuceneMinFactory.java | 146 ++++++++++++ .../compute/lucene/LuceneMinMaxOperator.java | 179 +++++++++++++++ .../lucene/LuceneMaxDoubleOperatorTests.java | 88 ++++++++ .../lucene/LuceneMaxFloatOperatorTests.java | 88 ++++++++ .../lucene/LuceneMaxIntOperatorTests.java | 87 ++++++++ .../lucene/LuceneMaxLongOperatorTests.java | 87 ++++++++ .../lucene/LuceneMaxOperatorTestCase.java | 210 ++++++++++++++++++ .../lucene/LuceneMinDoubleOperatorTests.java | 88 ++++++++ .../lucene/LuceneMinFloatOperatorTests.java | 89 ++++++++ .../lucene/LuceneMinIntegerOperatorTests.java | 87 ++++++++ .../lucene/LuceneMinLongOperatorTests.java | 87 ++++++++ .../lucene/LuceneMinOperatorTestCase.java | 210 ++++++++++++++++++ 13 files changed, 1592 insertions(+) create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneMaxFactory.java create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneMinFactory.java create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneMinMaxOperator.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMaxDoubleOperatorTests.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMaxFloatOperatorTests.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMaxIntOperatorTests.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMaxLongOperatorTests.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMaxOperatorTestCase.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMinDoubleOperatorTests.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMinFloatOperatorTests.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMinIntegerOperatorTests.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMinLongOperatorTests.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMinOperatorTestCase.java diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneMaxFactory.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneMaxFactory.java new file mode 100644 index 0000000000000..ba7de22b1b821 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneMaxFactory.java @@ -0,0 +1,146 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.lucene; + +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.util.NumericUtils; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.SourceOperator; +import org.elasticsearch.search.MultiValueMode; + +import java.io.IOException; +import java.util.List; +import java.util.function.Function; + +/** + * Factory that generates an operator that finds the max value of a field using the {@link LuceneMinMaxOperator}. + */ +public final class LuceneMaxFactory extends LuceneOperator.Factory { + + public enum NumberType implements LuceneMinMaxOperator.NumberType { + INTEGER { + @Override + public Block buildResult(BlockFactory blockFactory, long result, int pageSize) { + return blockFactory.newConstantIntBlockWith(Math.toIntExact(result), pageSize); + } + + @Override + public Block buildEmptyResult(BlockFactory blockFactory, int pageSize) { + return blockFactory.newConstantIntBlockWith(Integer.MIN_VALUE, pageSize); + } + + @Override + long bytesToLong(byte[] bytes) { + return NumericUtils.sortableBytesToInt(bytes, 0); + } + }, + FLOAT { + @Override + public Block buildResult(BlockFactory blockFactory, long result, int pageSize) { + return blockFactory.newConstantFloatBlockWith(NumericUtils.sortableIntToFloat(Math.toIntExact(result)), pageSize); + } + + @Override + public Block buildEmptyResult(BlockFactory blockFactory, int pageSize) { + return blockFactory.newConstantFloatBlockWith(-Float.MAX_VALUE, pageSize); + } + + @Override + long bytesToLong(byte[] bytes) { + return NumericUtils.sortableBytesToInt(bytes, 0); + } + }, + LONG { + @Override + public Block buildResult(BlockFactory blockFactory, long result, int pageSize) { + return blockFactory.newConstantLongBlockWith(result, pageSize); + } + + @Override + public Block buildEmptyResult(BlockFactory blockFactory, int pageSize) { + return blockFactory.newConstantLongBlockWith(Long.MIN_VALUE, pageSize); + } + + @Override + long bytesToLong(byte[] bytes) { + return NumericUtils.sortableBytesToLong(bytes, 0); + } + }, + DOUBLE { + @Override + public Block buildResult(BlockFactory blockFactory, long result, int pageSize) { + return blockFactory.newConstantDoubleBlockWith(NumericUtils.sortableLongToDouble(result), pageSize); + } + + @Override + public Block buildEmptyResult(BlockFactory blockFactory, int pageSize) { + return blockFactory.newConstantDoubleBlockWith(-Double.MAX_VALUE, pageSize); + } + + @Override + long bytesToLong(byte[] bytes) { + return NumericUtils.sortableBytesToLong(bytes, 0); + } + }; + + public final NumericDocValues multiValueMode(SortedNumericDocValues sortedNumericDocValues) { + return MultiValueMode.MAX.select(sortedNumericDocValues); + } + + public final long fromPointValues(PointValues pointValues) throws IOException { + return bytesToLong(pointValues.getMaxPackedValue()); + } + + public final long evaluate(long value1, long value2) { + return Math.max(value1, value2); + } + + abstract long bytesToLong(byte[] bytes); + } + + private final String fieldName; + private final NumberType numberType; + + public LuceneMaxFactory( + List contexts, + Function queryFunction, + DataPartitioning dataPartitioning, + int taskConcurrency, + String fieldName, + NumberType numberType, + int limit + ) { + super(contexts, queryFunction, dataPartitioning, taskConcurrency, limit, ScoreMode.COMPLETE_NO_SCORES); + this.fieldName = fieldName; + this.numberType = numberType; + } + + @Override + public SourceOperator get(DriverContext driverContext) { + return new LuceneMinMaxOperator(driverContext.blockFactory(), sliceQueue, fieldName, numberType, limit, Long.MIN_VALUE); + } + + @Override + public String describe() { + return "LuceneMaxOperator[type = " + + numberType.name() + + ", dataPartitioning = " + + dataPartitioning + + ", fieldName = " + + fieldName + + ", limit = " + + limit + + "]"; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneMinFactory.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneMinFactory.java new file mode 100644 index 0000000000000..e3c6c8310373d --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneMinFactory.java @@ -0,0 +1,146 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.lucene; + +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.util.NumericUtils; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.SourceOperator; +import org.elasticsearch.search.MultiValueMode; + +import java.io.IOException; +import java.util.List; +import java.util.function.Function; + +/** + * Factory that generates an operator that finds the min value of a field using the {@link LuceneMinMaxOperator}. + */ +public final class LuceneMinFactory extends LuceneOperator.Factory { + + public enum NumberType implements LuceneMinMaxOperator.NumberType { + INTEGER { + @Override + public Block buildResult(BlockFactory blockFactory, long result, int pageSize) { + return blockFactory.newConstantIntBlockWith(Math.toIntExact(result), pageSize); + } + + @Override + public Block buildEmptyResult(BlockFactory blockFactory, int pageSize) { + return blockFactory.newConstantIntBlockWith(Integer.MAX_VALUE, pageSize); + } + + @Override + long bytesToLong(byte[] bytes) { + return NumericUtils.sortableBytesToInt(bytes, 0); + } + }, + FLOAT { + @Override + public Block buildResult(BlockFactory blockFactory, long result, int pageSize) { + return blockFactory.newConstantFloatBlockWith(NumericUtils.sortableIntToFloat(Math.toIntExact(result)), pageSize); + } + + @Override + public Block buildEmptyResult(BlockFactory blockFactory, int pageSize) { + return blockFactory.newConstantFloatBlockWith(Float.POSITIVE_INFINITY, pageSize); + } + + @Override + long bytesToLong(byte[] bytes) { + return NumericUtils.sortableBytesToInt(bytes, 0); + } + }, + LONG { + @Override + public Block buildResult(BlockFactory blockFactory, long result, int pageSize) { + return blockFactory.newConstantLongBlockWith(result, pageSize); + } + + @Override + public Block buildEmptyResult(BlockFactory blockFactory, int pageSize) { + return blockFactory.newConstantLongBlockWith(Long.MAX_VALUE, pageSize); + } + + @Override + long bytesToLong(byte[] bytes) { + return NumericUtils.sortableBytesToLong(bytes, 0); + } + }, + DOUBLE { + @Override + public Block buildResult(BlockFactory blockFactory, long result, int pageSize) { + return blockFactory.newConstantDoubleBlockWith(NumericUtils.sortableLongToDouble(result), pageSize); + } + + @Override + public Block buildEmptyResult(BlockFactory blockFactory, int pageSize) { + return blockFactory.newConstantDoubleBlockWith(Double.POSITIVE_INFINITY, pageSize); + } + + @Override + long bytesToLong(byte[] bytes) { + return NumericUtils.sortableBytesToLong(bytes, 0); + } + }; + + public final NumericDocValues multiValueMode(SortedNumericDocValues sortedNumericDocValues) { + return MultiValueMode.MIN.select(sortedNumericDocValues); + } + + public final long fromPointValues(PointValues pointValues) throws IOException { + return bytesToLong(pointValues.getMinPackedValue()); + } + + public final long evaluate(long value1, long value2) { + return Math.min(value1, value2); + } + + abstract long bytesToLong(byte[] bytes); + } + + private final String fieldName; + private final NumberType numberType; + + public LuceneMinFactory( + List contexts, + Function queryFunction, + DataPartitioning dataPartitioning, + int taskConcurrency, + String fieldName, + NumberType numberType, + int limit + ) { + super(contexts, queryFunction, dataPartitioning, taskConcurrency, limit, ScoreMode.COMPLETE_NO_SCORES); + this.fieldName = fieldName; + this.numberType = numberType; + } + + @Override + public SourceOperator get(DriverContext driverContext) { + return new LuceneMinMaxOperator(driverContext.blockFactory(), sliceQueue, fieldName, numberType, limit, Long.MAX_VALUE); + } + + @Override + public String describe() { + return "LuceneMinOperator[type = " + + numberType.name() + + ", dataPartitioning = " + + dataPartitioning + + ", fieldName = " + + fieldName + + ", limit = " + + limit + + "]"; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneMinMaxOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneMinMaxOperator.java new file mode 100644 index 0000000000000..c41c31345df4e --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneMinMaxOperator.java @@ -0,0 +1,179 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.lucene; + +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.LeafCollector; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorable; +import org.apache.lucene.util.Bits; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.search.MultiValueMode; + +import java.io.IOException; + +/** + * Operator that finds the min or max value of a field using Lucene searches + * and returns always one entry that mimics the min/max aggregation internal state: + * 1. the min/max with a type depending on the {@link NumberType} (The initial value if no doc is seen) + * 2. a bool flag (seen) that is true if at least one document has been matched, otherwise false + *

+ * It works for fields that index data using lucene {@link PointValues} and/or {@link SortedNumericDocValues}. + * It assumes that {@link SortedNumericDocValues} are always present. + */ +final class LuceneMinMaxOperator extends LuceneOperator { + + sealed interface NumberType permits LuceneMinFactory.NumberType, LuceneMaxFactory.NumberType { + + /** Extract the competitive value from the {@link PointValues} */ + long fromPointValues(PointValues pointValues) throws IOException; + + /** Wraps the provided {@link SortedNumericDocValues} with a {@link MultiValueMode} */ + NumericDocValues multiValueMode(SortedNumericDocValues sortedNumericDocValues); + + /** Return the competitive value between {@code value1} and {@code value2} */ + long evaluate(long value1, long value2); + + /** Build the corresponding block */ + Block buildResult(BlockFactory blockFactory, long result, int pageSize); + + /** Build the corresponding block */ + Block buildEmptyResult(BlockFactory blockFactory, int pageSize); + } + + private static final int PAGE_SIZE = 1; + + private boolean seen = false; + private int remainingDocs; + private long result; + + private final NumberType numberType; + + private final String fieldName; + + LuceneMinMaxOperator( + BlockFactory blockFactory, + LuceneSliceQueue sliceQueue, + String fieldName, + NumberType numberType, + int limit, + long initialResult + ) { + super(blockFactory, PAGE_SIZE, sliceQueue); + this.remainingDocs = limit; + this.numberType = numberType; + this.fieldName = fieldName; + this.result = initialResult; + } + + @Override + public boolean isFinished() { + return doneCollecting || remainingDocs == 0; + } + + @Override + public void finish() { + doneCollecting = true; + } + + @Override + public Page getCheckedOutput() throws IOException { + if (isFinished()) { + assert remainingDocs <= 0 : remainingDocs; + return null; + } + final long start = System.nanoTime(); + try { + final LuceneScorer scorer = getCurrentOrLoadNextScorer(); + // no scorer means no more docs + if (scorer == null) { + remainingDocs = 0; + } else { + final LeafReader reader = scorer.leafReaderContext().reader(); + final Query query = scorer.weight().getQuery(); + if (query == null || query instanceof MatchAllDocsQuery) { + final PointValues pointValues = reader.getPointValues(fieldName); + // only apply shortcut if we are visiting all documents, otherwise we need to trigger the search + // on doc values as that's the order they are visited without push down. + if (pointValues != null && pointValues.getDocCount() >= remainingDocs) { + final Bits liveDocs = reader.getLiveDocs(); + if (liveDocs == null) { + // In data partitioning, we might have got the same segment previous + // to this but with a different document range. And we're totally ignoring that range. + // We're just reading the min/max from the segment. That's sneaky, but it makes sense. + // And if we get another slice in the same segment we may as well skip it - + // we've already looked. + if (scorer.position() == 0) { + seen = true; + result = numberType.evaluate(result, numberType.fromPointValues(pointValues)); + if (remainingDocs != NO_LIMIT) { + remainingDocs -= pointValues.getDocCount(); + } + } + scorer.markAsDone(); + } + } + } + if (scorer.isDone() == false) { + // could not apply shortcut, trigger the search + final NumericDocValues values = numberType.multiValueMode(reader.getSortedNumericDocValues(fieldName)); + final LeafCollector leafCollector = new LeafCollector() { + @Override + public void setScorer(Scorable scorer) {} + + @Override + public void collect(int doc) throws IOException { + assert remainingDocs > 0; + remainingDocs--; + if (values.advanceExact(doc)) { + seen = true; + result = numberType.evaluate(result, values.longValue()); + } + } + }; + scorer.scoreNextRange(leafCollector, reader.getLiveDocs(), remainingDocs); + } + } + + Page page = null; + // emit only one page + if (remainingDocs <= 0 && pagesEmitted == 0) { + pagesEmitted++; + Block result = null; + BooleanBlock seen = null; + try { + result = this.seen + ? numberType.buildResult(blockFactory, this.result, PAGE_SIZE) + : numberType.buildEmptyResult(blockFactory, PAGE_SIZE); + seen = blockFactory.newConstantBooleanBlockWith(this.seen, PAGE_SIZE); + page = new Page(PAGE_SIZE, result, seen); + } finally { + if (page == null) { + Releasables.closeExpectNoException(result, seen); + } + } + } + return page; + } finally { + processingNanos += System.nanoTime() - start; + } + } + + @Override + protected void describe(StringBuilder sb) { + sb.append(", remainingDocs=").append(remainingDocs); + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMaxDoubleOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMaxDoubleOperatorTests.java new file mode 100644 index 0000000000000..4cb113457b23f --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMaxDoubleOperatorTests.java @@ -0,0 +1,88 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.lucene; + +import org.apache.lucene.document.DoubleField; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.NumericUtils; +import org.elasticsearch.compute.aggregation.AggregatorFunction; +import org.elasticsearch.compute.aggregation.MaxDoubleAggregatorFunctionSupplier; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.DoubleBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; + +import java.util.List; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.lessThanOrEqualTo; + +public class LuceneMaxDoubleOperatorTests extends LuceneMaxOperatorTestCase { + + @Override + public LuceneMaxFactory.NumberType getNumberType() { + return LuceneMaxFactory.NumberType.DOUBLE; + } + + @Override + protected NumberTypeTest getNumberTypeTest() { + return new NumberTypeTest() { + + double max = -Double.MAX_VALUE; + + @Override + public IndexableField newPointField() { + return new DoubleField(FIELD_NAME, newValue(), randomFrom(Field.Store.values())); + } + + @Override + public IndexableField newDocValuesField() { + return new SortedNumericDocValuesField(FIELD_NAME, NumericUtils.doubleToSortableLong(newValue())); + } + + private double newValue() { + final double value = randomDoubleBetween(-Double.MAX_VALUE, Double.MAX_VALUE, true); + max = Math.max(max, value); + return value; + } + + @Override + public void assertPage(Page page) { + assertThat(page.getBlock(0), instanceOf(DoubleBlock.class)); + final DoubleBlock db = page.getBlock(0); + assertThat(page.getBlock(1), instanceOf(BooleanBlock.class)); + final BooleanBlock bb = page.getBlock(1); + if (bb.getBoolean(0) == false) { + assertThat(db.getDouble(0), equalTo(-Double.MAX_VALUE)); + } else { + assertThat(db.getDouble(0), lessThanOrEqualTo(max)); + } + } + + @Override + public AggregatorFunction newAggregatorFunction(DriverContext context) { + return new MaxDoubleAggregatorFunctionSupplier(List.of(0, 1)).aggregator(context); + } + + @Override + public void assertMaxValue(Block block, boolean exactResult) { + assertThat(block, instanceOf(DoubleBlock.class)); + final DoubleBlock db = (DoubleBlock) block; + if (exactResult) { + assertThat(db.getDouble(0), equalTo(max)); + } else { + assertThat(db.getDouble(0), lessThanOrEqualTo(max)); + } + } + }; + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMaxFloatOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMaxFloatOperatorTests.java new file mode 100644 index 0000000000000..4a009a2d84c66 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMaxFloatOperatorTests.java @@ -0,0 +1,88 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.lucene; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatField; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.NumericUtils; +import org.elasticsearch.compute.aggregation.AggregatorFunction; +import org.elasticsearch.compute.aggregation.MaxFloatAggregatorFunctionSupplier; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.FloatBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; + +import java.util.List; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.lessThanOrEqualTo; + +public class LuceneMaxFloatOperatorTests extends LuceneMaxOperatorTestCase { + + @Override + public LuceneMaxFactory.NumberType getNumberType() { + return LuceneMaxFactory.NumberType.FLOAT; + } + + @Override + protected NumberTypeTest getNumberTypeTest() { + return new NumberTypeTest() { + + float max = -Float.MAX_VALUE; + + @Override + public IndexableField newPointField() { + return new FloatField(FIELD_NAME, newValue(), randomFrom(Field.Store.values())); + } + + private float newValue() { + final float value = randomFloatBetween(-Float.MAX_VALUE, Float.MAX_VALUE, true); + max = Math.max(max, value); + return value; + } + + @Override + public IndexableField newDocValuesField() { + return new SortedNumericDocValuesField(FIELD_NAME, NumericUtils.floatToSortableInt(newValue())); + } + + @Override + public void assertPage(Page page) { + assertThat(page.getBlock(0), instanceOf(FloatBlock.class)); + final FloatBlock db = page.getBlock(0); + assertThat(page.getBlock(1), instanceOf(BooleanBlock.class)); + final BooleanBlock bb = page.getBlock(1); + if (bb.getBoolean(0) == false) { + assertThat(db.getFloat(0), equalTo(-Float.MAX_VALUE)); + } else { + assertThat(db.getFloat(0), lessThanOrEqualTo(max)); + } + } + + @Override + public AggregatorFunction newAggregatorFunction(DriverContext context) { + return new MaxFloatAggregatorFunctionSupplier(List.of(0, 1)).aggregator(context); + } + + @Override + public void assertMaxValue(Block block, boolean exactResult) { + assertThat(block, instanceOf(FloatBlock.class)); + final FloatBlock fb = (FloatBlock) block; + if (exactResult) { + assertThat(fb.getFloat(0), equalTo(max)); + } else { + assertThat(fb.getFloat(0), lessThanOrEqualTo(max)); + } + } + }; + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMaxIntOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMaxIntOperatorTests.java new file mode 100644 index 0000000000000..a6118481ca43d --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMaxIntOperatorTests.java @@ -0,0 +1,87 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.lucene; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.IntField; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.IndexableField; +import org.elasticsearch.compute.aggregation.AggregatorFunction; +import org.elasticsearch.compute.aggregation.MaxIntAggregatorFunctionSupplier; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; + +import java.util.List; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.lessThanOrEqualTo; + +public class LuceneMaxIntOperatorTests extends LuceneMaxOperatorTestCase { + + @Override + public LuceneMaxFactory.NumberType getNumberType() { + return LuceneMaxFactory.NumberType.INTEGER; + } + + @Override + protected NumberTypeTest getNumberTypeTest() { + return new NumberTypeTest() { + + int max = Integer.MIN_VALUE; + + @Override + public IndexableField newPointField() { + return new IntField(FIELD_NAME, newValue(), randomFrom(Field.Store.values())); + } + + private int newValue() { + final int value = randomInt(); + max = Math.max(max, value); + return value; + } + + @Override + public IndexableField newDocValuesField() { + return new SortedNumericDocValuesField(FIELD_NAME, newValue()); + } + + @Override + public void assertPage(Page page) { + assertThat(page.getBlock(0), instanceOf(IntBlock.class)); + final IntBlock db = page.getBlock(0); + assertThat(page.getBlock(1), instanceOf(BooleanBlock.class)); + final BooleanBlock bb = page.getBlock(1); + if (bb.getBoolean(0) == false) { + assertThat(db.getInt(0), equalTo(Integer.MIN_VALUE)); + } else { + assertThat(db.getInt(0), lessThanOrEqualTo(max)); + } + } + + @Override + public AggregatorFunction newAggregatorFunction(DriverContext context) { + return new MaxIntAggregatorFunctionSupplier(List.of(0, 1)).aggregator(context); + } + + @Override + public void assertMaxValue(Block block, boolean exactResult) { + assertThat(block, instanceOf(IntBlock.class)); + final IntBlock ib = (IntBlock) block; + if (exactResult) { + assertThat(ib.getInt(0), equalTo(max)); + } else { + assertThat(ib.getInt(0), lessThanOrEqualTo(max)); + } + } + }; + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMaxLongOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMaxLongOperatorTests.java new file mode 100644 index 0000000000000..894c8e862123e --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMaxLongOperatorTests.java @@ -0,0 +1,87 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.lucene; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.LongField; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.IndexableField; +import org.elasticsearch.compute.aggregation.AggregatorFunction; +import org.elasticsearch.compute.aggregation.MaxLongAggregatorFunctionSupplier; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.LongBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; + +import java.util.List; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.lessThanOrEqualTo; + +public class LuceneMaxLongOperatorTests extends LuceneMaxOperatorTestCase { + + @Override + public LuceneMaxFactory.NumberType getNumberType() { + return LuceneMaxFactory.NumberType.LONG; + } + + @Override + protected NumberTypeTest getNumberTypeTest() { + return new NumberTypeTest() { + + long max = Long.MIN_VALUE; + + @Override + public IndexableField newPointField() { + return new LongField(FIELD_NAME, newValue(), randomFrom(Field.Store.values())); + } + + @Override + public IndexableField newDocValuesField() { + return new SortedNumericDocValuesField(FIELD_NAME, newValue()); + } + + private long newValue() { + final long value = randomLong(); + max = Math.max(max, value); + return value; + } + + @Override + public void assertPage(Page page) { + assertThat(page.getBlock(0), instanceOf(LongBlock.class)); + final LongBlock db = page.getBlock(0); + assertThat(page.getBlock(1), instanceOf(BooleanBlock.class)); + final BooleanBlock bb = page.getBlock(1); + if (bb.getBoolean(0) == false) { + assertThat(db.getLong(0), equalTo(Long.MIN_VALUE)); + } else { + assertThat(db.getLong(0), lessThanOrEqualTo(max)); + } + } + + @Override + public AggregatorFunction newAggregatorFunction(DriverContext context) { + return new MaxLongAggregatorFunctionSupplier(List.of(0, 1)).aggregator(context); + } + + @Override + public void assertMaxValue(Block block, boolean exactResult) { + assertThat(block, instanceOf(LongBlock.class)); + final LongBlock lb = (LongBlock) block; + if (exactResult) { + assertThat(lb.getLong(0), equalTo(max)); + } else { + assertThat(lb.getLong(0), lessThanOrEqualTo(max)); + } + } + }; + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMaxOperatorTestCase.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMaxOperatorTestCase.java new file mode 100644 index 0000000000000..f5214dccbd00c --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMaxOperatorTestCase.java @@ -0,0 +1,210 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.lucene; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.elasticsearch.common.breaker.CircuitBreakingException; +import org.elasticsearch.compute.aggregation.AggregatorFunction; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.AnyOperatorTestCase; +import org.elasticsearch.compute.operator.Driver; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.OperatorTestCase; +import org.elasticsearch.compute.operator.TestResultPageSinkOperator; +import org.elasticsearch.core.IOUtils; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.indices.CrankyCircuitBreakerService; +import org.hamcrest.Matcher; +import org.junit.After; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.function.Supplier; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.hamcrest.Matchers.matchesRegex; + +public abstract class LuceneMaxOperatorTestCase extends AnyOperatorTestCase { + + protected interface NumberTypeTest { + + IndexableField newPointField(); + + IndexableField newDocValuesField(); + + void assertPage(Page page); + + AggregatorFunction newAggregatorFunction(DriverContext context); + + void assertMaxValue(Block block, boolean exactResult); + + } + + protected abstract NumberTypeTest getNumberTypeTest(); + + protected abstract LuceneMaxFactory.NumberType getNumberType(); + + protected static final String FIELD_NAME = "field"; + private final Directory directory = newDirectory(); + private IndexReader reader; + + @After + public void closeIndex() throws IOException { + IOUtils.close(reader, directory); + } + + @Override + protected LuceneMaxFactory simple() { + return simple(getNumberTypeTest(), randomFrom(DataPartitioning.values()), between(1, 10_000), 100); + } + + private LuceneMaxFactory simple(NumberTypeTest numberTypeTest, DataPartitioning dataPartitioning, int numDocs, int limit) { + final boolean enableShortcut = randomBoolean(); + final boolean enableMultiValue = randomBoolean(); + final int commitEvery = Math.max(1, numDocs / 10); + try ( + RandomIndexWriter writer = new RandomIndexWriter( + random(), + directory, + newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE) + ) + ) { + + for (int d = 0; d < numDocs; d++) { + final var numValues = enableMultiValue ? randomIntBetween(1, 5) : 1; + final var doc = new Document(); + for (int i = 0; i < numValues; i++) { + if (enableShortcut) { + doc.add(numberTypeTest.newPointField()); + } else { + doc.add(numberTypeTest.newDocValuesField()); + } + } + writer.addDocument(doc); + if (d % commitEvery == 0) { + writer.commit(); + } + } + reader = writer.getReader(); + } catch (IOException e) { + throw new RuntimeException(e); + } + + final ShardContext ctx = new LuceneSourceOperatorTests.MockShardContext(reader, 0); + final Query query; + if (enableShortcut && randomBoolean()) { + query = new MatchAllDocsQuery(); + } else { + query = SortedNumericDocValuesField.newSlowRangeQuery(FIELD_NAME, Long.MIN_VALUE, Long.MAX_VALUE); + } + return new LuceneMaxFactory(List.of(ctx), c -> query, dataPartitioning, between(1, 8), FIELD_NAME, getNumberType(), limit); + } + + public void testSimple() { + testSimple(this::driverContext); + } + + public void testSimpleWithCranky() { + try { + testSimple(this::crankyDriverContext); + logger.info("cranky didn't break"); + } catch (CircuitBreakingException e) { + logger.info("broken", e); + assertThat(e.getMessage(), equalTo(CrankyCircuitBreakerService.ERROR_MESSAGE)); + } + } + + private void testSimple(Supplier contexts) { + int size = between(1_000, 20_000); + int limit = randomBoolean() ? between(10, size) : Integer.MAX_VALUE; + testMax(contexts, size, limit); + } + + public void testEmpty() { + testEmpty(this::driverContext); + } + + public void testEmptyWithCranky() { + try { + testEmpty(this::crankyDriverContext); + logger.info("cranky didn't break"); + } catch (CircuitBreakingException e) { + logger.info("broken", e); + assertThat(e.getMessage(), equalTo(CrankyCircuitBreakerService.ERROR_MESSAGE)); + } + } + + private void testEmpty(Supplier contexts) { + int limit = randomBoolean() ? between(10, 10000) : Integer.MAX_VALUE; + testMax(contexts, 0, limit); + } + + private void testMax(Supplier contexts, int size, int limit) { + DataPartitioning dataPartitioning = randomFrom(DataPartitioning.values()); + NumberTypeTest numberTypeTest = getNumberTypeTest(); + LuceneMaxFactory factory = simple(numberTypeTest, dataPartitioning, size, limit); + List results = new CopyOnWriteArrayList<>(); + List drivers = new ArrayList<>(); + int taskConcurrency = between(1, 8); + for (int i = 0; i < taskConcurrency; i++) { + DriverContext ctx = contexts.get(); + drivers.add(new Driver(ctx, factory.get(ctx), List.of(), new TestResultPageSinkOperator(results::add), () -> {})); + } + OperatorTestCase.runDriver(drivers); + assertThat(results.size(), lessThanOrEqualTo(taskConcurrency)); + + try (AggregatorFunction aggregatorFunction = numberTypeTest.newAggregatorFunction(contexts.get())) { + for (Page page : results) { + assertThat(page.getPositionCount(), is(1)); // one row + assertThat(page.getBlockCount(), is(2)); // two blocks + numberTypeTest.assertPage(page); + aggregatorFunction.addIntermediateInput(page); + } + + final Block[] result = new Block[1]; + try { + aggregatorFunction.evaluateFinal(result, 0, contexts.get()); + if (result[0].areAllValuesNull() == false) { + boolean exactResult = size <= limit; + numberTypeTest.assertMaxValue(result[0], exactResult); + } + } finally { + Releasables.close(result); + } + } + } + + @Override + protected final Matcher expectedToStringOfSimple() { + return matchesRegex("LuceneMinMaxOperator\\[maxPageSize = \\d+, remainingDocs=100]"); + } + + @Override + protected final Matcher expectedDescriptionOfSimple() { + return matchesRegex( + "LuceneMaxOperator\\[type = " + + getNumberType().name() + + ", dataPartitioning = (DOC|SHARD|SEGMENT), fieldName = " + + FIELD_NAME + + ", limit = 100]" + ); + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMinDoubleOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMinDoubleOperatorTests.java new file mode 100644 index 0000000000000..5fef2d4897030 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMinDoubleOperatorTests.java @@ -0,0 +1,88 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.lucene; + +import org.apache.lucene.document.DoubleField; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.NumericUtils; +import org.elasticsearch.compute.aggregation.AggregatorFunction; +import org.elasticsearch.compute.aggregation.MinDoubleAggregatorFunctionSupplier; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.DoubleBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; + +import java.util.List; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.instanceOf; + +public class LuceneMinDoubleOperatorTests extends LuceneMinOperatorTestCase { + + @Override + public LuceneMinFactory.NumberType getNumberType() { + return LuceneMinFactory.NumberType.DOUBLE; + } + + @Override + protected NumberTypeTest getNumberTypeTest() { + return new NumberTypeTest() { + + double min = Double.MAX_VALUE; + + @Override + public IndexableField newPointField() { + return new DoubleField(FIELD_NAME, newValue(), randomFrom(Field.Store.values())); + } + + @Override + public IndexableField newDocValuesField() { + return new SortedNumericDocValuesField(FIELD_NAME, NumericUtils.doubleToSortableLong(newValue())); + } + + private double newValue() { + final double value = randomDoubleBetween(-Double.MAX_VALUE, Double.MAX_VALUE, true); + min = Math.min(min, value); + return value; + } + + @Override + public void assertPage(Page page) { + assertThat(page.getBlock(0), instanceOf(DoubleBlock.class)); + final DoubleBlock db = page.getBlock(0); + assertThat(page.getBlock(1), instanceOf(BooleanBlock.class)); + final BooleanBlock bb = page.getBlock(1); + if (bb.getBoolean(0) == false) { + assertThat(db.getDouble(0), equalTo(Double.POSITIVE_INFINITY)); + } else { + assertThat(db.getDouble(0), greaterThanOrEqualTo(min)); + } + } + + @Override + public AggregatorFunction newAggregatorFunction(DriverContext context) { + return new MinDoubleAggregatorFunctionSupplier(List.of(0, 1)).aggregator(context); + } + + @Override + public void assertMinValue(Block block, boolean exactResult) { + assertThat(block, instanceOf(DoubleBlock.class)); + final DoubleBlock db = (DoubleBlock) block; + if (exactResult) { + assertThat(db.getDouble(0), equalTo(min)); + } else { + assertThat(db.getDouble(0), greaterThanOrEqualTo(min)); + } + } + }; + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMinFloatOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMinFloatOperatorTests.java new file mode 100644 index 0000000000000..41c8751c08a96 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMinFloatOperatorTests.java @@ -0,0 +1,89 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.lucene; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatField; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.NumericUtils; +import org.elasticsearch.compute.aggregation.AggregatorFunction; +import org.elasticsearch.compute.aggregation.MinFloatAggregatorFunctionSupplier; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.FloatBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; + +import java.util.List; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.instanceOf; + +public class LuceneMinFloatOperatorTests extends LuceneMinOperatorTestCase { + + @Override + public LuceneMinFactory.NumberType getNumberType() { + return LuceneMinFactory.NumberType.FLOAT; + } + + @Override + protected NumberTypeTest getNumberTypeTest() { + return new NumberTypeTest() { + + float min = Float.MAX_VALUE; + + @Override + public IndexableField newPointField() { + return new FloatField(FIELD_NAME, newValue(), randomFrom(Field.Store.values())); + } + + @Override + public IndexableField newDocValuesField() { + return new SortedNumericDocValuesField(FIELD_NAME, NumericUtils.floatToSortableInt(newValue())); + } + + private float newValue() { + final float value = randomFloatBetween(-Float.MAX_VALUE, Float.MAX_VALUE, true); + min = Math.min(min, value); + return value; + } + + @Override + public void assertPage(Page page) { + assertThat(page.getBlock(0), instanceOf(FloatBlock.class)); + final FloatBlock db = page.getBlock(0); + assertThat(page.getBlock(1), instanceOf(BooleanBlock.class)); + final BooleanBlock bb = page.getBlock(1); + final float v = db.getFloat(0); + if (bb.getBoolean(0) == false) { + assertThat(db.getFloat(0), equalTo(Float.POSITIVE_INFINITY)); + } else { + assertThat(db.getFloat(0), greaterThanOrEqualTo(min)); + } + } + + @Override + public AggregatorFunction newAggregatorFunction(DriverContext context) { + return new MinFloatAggregatorFunctionSupplier(List.of(0, 1)).aggregator(context); + } + + @Override + public void assertMinValue(Block block, boolean exactResult) { + assertThat(block, instanceOf(FloatBlock.class)); + final FloatBlock fb = (FloatBlock) block; + if (exactResult) { + assertThat(fb.getFloat(0), equalTo(min)); + } else { + assertThat(fb.getFloat(0), greaterThanOrEqualTo(min)); + } + } + }; + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMinIntegerOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMinIntegerOperatorTests.java new file mode 100644 index 0000000000000..5d2c867f4f660 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMinIntegerOperatorTests.java @@ -0,0 +1,87 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.lucene; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.IntField; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.IndexableField; +import org.elasticsearch.compute.aggregation.AggregatorFunction; +import org.elasticsearch.compute.aggregation.MinIntAggregatorFunctionSupplier; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; + +import java.util.List; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.instanceOf; + +public class LuceneMinIntegerOperatorTests extends LuceneMinOperatorTestCase { + + @Override + public LuceneMinFactory.NumberType getNumberType() { + return LuceneMinFactory.NumberType.INTEGER; + } + + @Override + protected NumberTypeTest getNumberTypeTest() { + return new NumberTypeTest() { + + int min = Integer.MAX_VALUE; + + @Override + public IndexableField newPointField() { + return new IntField(FIELD_NAME, newValue(), randomFrom(Field.Store.values())); + } + + @Override + public IndexableField newDocValuesField() { + return new SortedNumericDocValuesField(FIELD_NAME, newValue()); + } + + private int newValue() { + final int value = randomInt(); + min = Math.min(min, value); + return value; + } + + @Override + public void assertPage(Page page) { + assertThat(page.getBlock(0), instanceOf(IntBlock.class)); + IntBlock db = page.getBlock(0); + assertThat(page.getBlock(1), instanceOf(BooleanBlock.class)); + final BooleanBlock bb = page.getBlock(1); + if (bb.getBoolean(0) == false) { + assertThat(db.getInt(0), equalTo(Integer.MAX_VALUE)); + } else { + assertThat(db.getInt(0), greaterThanOrEqualTo(min)); + } + } + + @Override + public AggregatorFunction newAggregatorFunction(DriverContext context) { + return new MinIntAggregatorFunctionSupplier(List.of(0, 1)).aggregator(context); + } + + @Override + public void assertMinValue(Block block, boolean exactResult) { + assertThat(block, instanceOf(IntBlock.class)); + final IntBlock ib = (IntBlock) block; + if (exactResult) { + assertThat(ib.getInt(0), equalTo(min)); + } else { + assertThat(ib.getInt(0), greaterThanOrEqualTo(min)); + } + } + }; + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMinLongOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMinLongOperatorTests.java new file mode 100644 index 0000000000000..15c34f5853ae2 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMinLongOperatorTests.java @@ -0,0 +1,87 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.lucene; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.LongField; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.IndexableField; +import org.elasticsearch.compute.aggregation.AggregatorFunction; +import org.elasticsearch.compute.aggregation.MinLongAggregatorFunctionSupplier; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.LongBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; + +import java.util.List; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.instanceOf; + +public class LuceneMinLongOperatorTests extends LuceneMinOperatorTestCase { + + @Override + public LuceneMinFactory.NumberType getNumberType() { + return LuceneMinFactory.NumberType.LONG; + } + + @Override + protected NumberTypeTest getNumberTypeTest() { + return new NumberTypeTest() { + + long min = Long.MAX_VALUE; + + @Override + public IndexableField newPointField() { + return new LongField(FIELD_NAME, newValue(), randomFrom(Field.Store.values())); + } + + @Override + public IndexableField newDocValuesField() { + return new SortedNumericDocValuesField(FIELD_NAME, newValue()); + } + + private long newValue() { + final long value = randomLong(); + min = Math.min(min, value); + return value; + } + + @Override + public void assertPage(Page page) { + assertThat(page.getBlock(0), instanceOf(LongBlock.class)); + final LongBlock db = page.getBlock(0); + assertThat(page.getBlock(1), instanceOf(BooleanBlock.class)); + final BooleanBlock bb = page.getBlock(1); + if (bb.getBoolean(0) == false) { + assertThat(db.getLong(0), equalTo(Long.MAX_VALUE)); + } else { + assertThat(db.getLong(0), greaterThanOrEqualTo(min)); + } + } + + @Override + public AggregatorFunction newAggregatorFunction(DriverContext context) { + return new MinLongAggregatorFunctionSupplier(List.of(0, 1)).aggregator(context); + } + + @Override + public void assertMinValue(Block block, boolean exactResult) { + assertThat(block, instanceOf(LongBlock.class)); + final LongBlock lb = (LongBlock) block; + if (exactResult) { + assertThat(lb.getLong(0), equalTo(min)); + } else { + assertThat(lb.getLong(0), greaterThanOrEqualTo(min)); + } + } + }; + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMinOperatorTestCase.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMinOperatorTestCase.java new file mode 100644 index 0000000000000..493512bd83bec --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneMinOperatorTestCase.java @@ -0,0 +1,210 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.lucene; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.elasticsearch.common.breaker.CircuitBreakingException; +import org.elasticsearch.compute.aggregation.AggregatorFunction; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.AnyOperatorTestCase; +import org.elasticsearch.compute.operator.Driver; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.OperatorTestCase; +import org.elasticsearch.compute.operator.TestResultPageSinkOperator; +import org.elasticsearch.core.IOUtils; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.indices.CrankyCircuitBreakerService; +import org.hamcrest.Matcher; +import org.junit.After; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.function.Supplier; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.hamcrest.Matchers.matchesRegex; + +public abstract class LuceneMinOperatorTestCase extends AnyOperatorTestCase { + + protected interface NumberTypeTest { + + IndexableField newPointField(); + + IndexableField newDocValuesField(); + + void assertPage(Page page); + + AggregatorFunction newAggregatorFunction(DriverContext context); + + void assertMinValue(Block block, boolean exactResult); + + } + + protected abstract NumberTypeTest getNumberTypeTest(); + + protected abstract LuceneMinFactory.NumberType getNumberType(); + + protected static final String FIELD_NAME = "field"; + private final Directory directory = newDirectory(); + private IndexReader reader; + + @After + public void closeIndex() throws IOException { + IOUtils.close(reader, directory); + } + + @Override + protected LuceneMinFactory simple() { + return simple(getNumberTypeTest(), randomFrom(DataPartitioning.values()), between(1, 10_000), 100); + } + + private LuceneMinFactory simple(NumberTypeTest numberTypeTest, DataPartitioning dataPartitioning, int numDocs, int limit) { + final boolean enableShortcut = randomBoolean(); + final boolean enableMultiValue = randomBoolean(); + final int commitEvery = Math.max(1, numDocs / 10); + try ( + RandomIndexWriter writer = new RandomIndexWriter( + random(), + directory, + newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE) + ) + ) { + + for (int d = 0; d < numDocs; d++) { + final var numValues = enableMultiValue ? randomIntBetween(1, 5) : 1; + final var doc = new Document(); + for (int i = 0; i < numValues; i++) { + if (enableShortcut) { + doc.add(numberTypeTest.newPointField()); + } else { + doc.add(numberTypeTest.newDocValuesField()); + } + } + writer.addDocument(doc); + if (d % commitEvery == 0) { + writer.commit(); + } + } + reader = writer.getReader(); + } catch (IOException e) { + throw new RuntimeException(e); + } + + final ShardContext ctx = new LuceneSourceOperatorTests.MockShardContext(reader, 0); + final Query query; + if (enableShortcut && randomBoolean()) { + query = new MatchAllDocsQuery(); + } else { + query = SortedNumericDocValuesField.newSlowRangeQuery(FIELD_NAME, Long.MIN_VALUE, Long.MAX_VALUE); + } + return new LuceneMinFactory(List.of(ctx), c -> query, dataPartitioning, between(1, 8), FIELD_NAME, getNumberType(), limit); + } + + public void testSimple() { + testSimple(this::driverContext); + } + + public void testSimpleWithCranky() { + try { + testSimple(this::crankyDriverContext); + logger.info("cranky didn't break"); + } catch (CircuitBreakingException e) { + logger.info("broken", e); + assertThat(e.getMessage(), equalTo(CrankyCircuitBreakerService.ERROR_MESSAGE)); + } + } + + private void testSimple(Supplier contexts) { + int size = between(1_000, 20_000); + int limit = randomBoolean() ? between(10, size) : Integer.MAX_VALUE; + testMin(contexts, size, limit); + } + + public void testEmpty() { + testEmpty(this::driverContext); + } + + public void testEmptyWithCranky() { + try { + testEmpty(this::crankyDriverContext); + logger.info("cranky didn't break"); + } catch (CircuitBreakingException e) { + logger.info("broken", e); + assertThat(e.getMessage(), equalTo(CrankyCircuitBreakerService.ERROR_MESSAGE)); + } + } + + private void testEmpty(Supplier contexts) { + int limit = randomBoolean() ? between(10, 10000) : Integer.MAX_VALUE; + testMin(contexts, 0, limit); + } + + private void testMin(Supplier contexts, int size, int limit) { + DataPartitioning dataPartitioning = randomFrom(DataPartitioning.values()); + NumberTypeTest numberTypeTest = getNumberTypeTest(); + LuceneMinFactory factory = simple(numberTypeTest, dataPartitioning, size, limit); + List results = new CopyOnWriteArrayList<>(); + List drivers = new ArrayList<>(); + int taskConcurrency = between(1, 8); + for (int i = 0; i < taskConcurrency; i++) { + DriverContext ctx = contexts.get(); + drivers.add(new Driver(ctx, factory.get(ctx), List.of(), new TestResultPageSinkOperator(results::add), () -> {})); + } + OperatorTestCase.runDriver(drivers); + assertThat(results.size(), lessThanOrEqualTo(taskConcurrency)); + + try (AggregatorFunction aggregatorFunction = numberTypeTest.newAggregatorFunction(contexts.get())) { + for (Page page : results) { + assertThat(page.getPositionCount(), is(1)); // one row + assertThat(page.getBlockCount(), is(2)); // two blocks + numberTypeTest.assertPage(page); + aggregatorFunction.addIntermediateInput(page); + } + + final Block[] result = new Block[1]; + try { + aggregatorFunction.evaluateFinal(result, 0, contexts.get()); + if (result[0].areAllValuesNull() == false) { + boolean exactResult = size <= limit; + numberTypeTest.assertMinValue(result[0], exactResult); + } + } finally { + Releasables.close(result); + } + } + } + + @Override + protected final Matcher expectedToStringOfSimple() { + return matchesRegex("LuceneMinMaxOperator\\[maxPageSize = \\d+, remainingDocs=100]"); + } + + @Override + protected final Matcher expectedDescriptionOfSimple() { + return matchesRegex( + "LuceneMinOperator\\[type = " + + getNumberType().name() + + ", dataPartitioning = (DOC|SHARD|SEGMENT), fieldName = " + + FIELD_NAME + + ", limit = 100]" + ); + } +}