From 9ce03adac2ed291f552d6279b8e4159ffeb57d14 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Wed, 4 Dec 2024 17:09:50 +0200 Subject: [PATCH 01/18] ESQL: ST_EXTENT_AGG binary extent optimization --- .../mapper/LegacyGeoShapeFieldMapper.java | 13 +- .../AbstractShapeGeometryFieldMapper.java | 82 +++++++ .../index/mapper/MappedFieldType.java | 20 +- ...AbstractShapeGeometryFieldMapperTests.java | 93 ++++++++ .../index/mapper/TextFieldMapperTests.java | 2 +- .../index/mapper/MapperTestCase.java | 19 +- .../test/hamcrest}/RectangleMatcher.java | 32 ++- .../WellKnownBinaryBytesRefMatcher.java | 14 +- .../aggregation/spatial/PointType.java | 63 ++---- .../spatial/SpatialExtentGroupingState.java | 16 +- ...entGroupingStateWrappedLongitudeState.java | 4 +- .../spatial/SpatialExtentState.java | 12 +- ...atialExtentStateWrappedLongitudeState.java | 4 +- .../xpack/esql/EsqlTestUtils.java | 2 +- .../function/aggregate/SpatialCentroid.java | 4 +- .../function/aggregate/SpatialExtent.java | 4 +- .../optimizer/LocalPhysicalPlanOptimizer.java | 9 +- .../local/SpatialShapeBoundExtraction.java | 100 +++++++++ .../esql/plan/physical/FieldExtractExec.java | 52 +++-- .../aggregate/SpatialExtentTests.java | 19 +- .../optimizer/PhysicalPlanOptimizerTests.java | 200 ++++++++++++++++-- .../TestPhysicalOperationProviders.java | 5 +- .../GeoShapeWithDocValuesFieldMapper.java | 12 +- .../index/mapper/ShapeFieldMapper.java | 10 + 24 files changed, 644 insertions(+), 147 deletions(-) create mode 100644 server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java rename {x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression => test/framework/src/main/java/org/elasticsearch/test/hamcrest}/RectangleMatcher.java (60%) rename {x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression => test/framework/src/main/java/org/elasticsearch/test/hamcrest}/WellKnownBinaryBytesRefMatcher.java (69%) create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundExtraction.java diff --git a/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java b/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java index 1616d2727bf8a..506918b12fe96 100644 --- a/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java +++ b/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java @@ -46,6 +46,7 @@ import org.elasticsearch.legacygeo.builders.ShapeBuilder; import org.elasticsearch.legacygeo.parsers.ShapeParser; import org.elasticsearch.legacygeo.query.LegacyGeoShapeQueryProcessor; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; import org.locationtech.spatial4j.shape.Point; @@ -401,7 +402,6 @@ public void parse( } public static final class GeoShapeFieldType extends AbstractShapeGeometryFieldType> implements GeoShapeQueryable { - private String tree = Defaults.TREE; private SpatialStrategy strategy = Defaults.STRATEGY; private boolean pointsOnly = Defaults.POINTS_ONLY; @@ -530,6 +530,17 @@ public PrefixTreeStrategy resolvePrefixTreeStrategy(String strategyName) { protected Function>, List> getFormatter(String format) { return GeometryFormatterFactory.getFormatter(format, ShapeBuilder::buildGeometry); } + + @Override + protected boolean isBoundsExtractionSupported() { + // Extracting bounds for geo shapes is not implemented yet. + return false; + } + + @Override + protected CoordinateEncoder coordinateEncoder() { + return CoordinateEncoder.GEO; + } } private final IndexVersion indexCreatedVersion; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java index 02a3ae11524e3..4b0542f7f7b03 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java @@ -8,9 +8,18 @@ */ package org.elasticsearch.index.mapper; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.geo.Orientation; +import org.elasticsearch.geometry.Rectangle; +import org.elasticsearch.geometry.utils.WellKnownBinary; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; +import org.elasticsearch.lucene.spatial.GeometryDocValueReader; +import java.io.IOException; +import java.nio.ByteOrder; import java.util.Map; import java.util.function.Function; @@ -69,6 +78,79 @@ protected Object nullValueAsSource(T nullValue) { // we don't support null value fors shapes return nullValue; } + + @Override + public BlockLoader blockLoader(BlockLoaderContext blContext) { + return blContext.fieldExtractPreference() == FieldExtractPreference.EXTRACT_SPATIAL_BOUNDS && isBoundsExtractionSupported() + ? new BoundsBlockLoader(name(), coordinateEncoder()) + : blockLoaderFromSource(blContext); + } + + protected abstract boolean isBoundsExtractionSupported(); + + protected abstract CoordinateEncoder coordinateEncoder(); + + // Visible for testing + static class BoundsBlockLoader extends BlockDocValuesReader.DocValuesBlockLoader { + private final String fieldName; + private final CoordinateEncoder encoder; + + BoundsBlockLoader(String fieldName, CoordinateEncoder encoder) { + this.fieldName = fieldName; + this.encoder = encoder; + } + + @Override + public BlockLoader.AllReader reader(LeafReaderContext context) throws IOException { + return new BlockLoader.AllReader() { + @Override + public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs) throws IOException { + var binaryDocValues = context.reader().getBinaryDocValues(fieldName); + var reader = new GeometryDocValueReader(); + try (var builder = factory.bytesRefs(docs.count())) { + for (int i = 0; i < docs.count(); i++) { + read(binaryDocValues, docs.get(i), reader, builder); + } + return builder.build(); + } + } + + @Override + public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException { + var binaryDocValues = context.reader().getBinaryDocValues(fieldName); + var reader = new GeometryDocValueReader(); + read(binaryDocValues, docId, reader, (BytesRefBuilder) builder); + } + + private void read(BinaryDocValues binaryDocValues, int doc, GeometryDocValueReader reader, BytesRefBuilder builder) + throws IOException { + binaryDocValues.advanceExact(doc); + reader.reset(binaryDocValues.binaryValue()); + var extent = reader.getExtent(); + // This is rather silly: an extent is already encoded as ints, but we convert it to Rectangle to + // preserve its properties as a WKB shape, only to convert it back to ints when we compute the + // aggregation. An obvious optimization would be to avoid this back-and-forth conversion. + var rectangle = new Rectangle( + encoder.decodeX(extent.minX()), + encoder.decodeX(extent.maxX()), + encoder.decodeY(extent.maxY()), + encoder.decodeY(extent.minY()) + ); + builder.appendBytesRef(new BytesRef(WellKnownBinary.toWKB(rectangle, ByteOrder.LITTLE_ENDIAN))); + } + + @Override + public boolean canReuse(int startingDocID) { + return true; + } + }; + } + + @Override + public BlockLoader.Builder builder(BlockLoader.BlockFactory factory, int expectedCount) { + return factory.bytesRefs(expectedCount); + } + } } protected Explicit coerce; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 35722be20b9be..20d23ab97ac26 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -676,11 +676,27 @@ public enum FieldExtractPreference { /** * Load the field from doc-values into a BlockLoader supporting doc-values. */ - DOC_VALUES, + DOC_VALUES(true), + /** Loads the field by extracting the extent from the binary encoded representation */ + EXTRACT_SPATIAL_BOUNDS(false), /** * No preference. Leave the choice of where to load the field from up to the FieldType. */ - NONE + NONE(false); + + private final boolean isColumnReader; + + FieldExtractPreference(boolean isColumnReader) { + this.isColumnReader = isColumnReader; + } + + public static FieldExtractPreference forColumnReader(boolean columnReader) { + return columnReader ? DOC_VALUES : NONE; + } + + public boolean isColumnReader() { + return isColumnReader; + } } /** diff --git a/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java new file mode 100644 index 0000000000000..8ca412405a062 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java @@ -0,0 +1,93 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.geo.Orientation; +import org.elasticsearch.geo.GeometryTestUtils; +import org.elasticsearch.geo.ShapeTestUtils; +import org.elasticsearch.geometry.Geometry; +import org.elasticsearch.geometry.Rectangle; +import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor; +import org.elasticsearch.lucene.spatial.BinaryShapeDocValuesField; +import org.elasticsearch.lucene.spatial.CartesianShapeIndexer; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.hamcrest.RectangleMatcher; +import org.elasticsearch.test.hamcrest.WellKnownBinaryBytesRefMatcher; + +import java.io.IOException; +import java.util.Optional; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.IntStream; + +public class AbstractShapeGeometryFieldMapperTests extends ESTestCase { + // TODO handle geo as well, this is actually bugged, since extracting the result ignores minneg etc. + public void testCartesianBoundsBlockLoader() throws IOException { + testBoundsBlockLoaderAux( + CoordinateEncoder.CARTESIAN, + () -> ShapeTestUtils.randomGeometryWithoutCircle(0, false), + field -> new CartesianShapeIndexer(field), + SpatialEnvelopeVisitor::visitCartesian + ); + } + + // TODO when we turn this optimization on for geo, handle this as well. + public void ignoreTestGeoBoundsBlockLoader() throws IOException { + testBoundsBlockLoaderAux( + CoordinateEncoder.GEO, + () -> GeometryTestUtils.randomGeometryWithoutCircle(0, false), + field -> new GeoShapeIndexer(Orientation.RIGHT, field), + g -> SpatialEnvelopeVisitor.visitGeo(g, SpatialEnvelopeVisitor.WrapLongitude.WRAP) + ); + } + + private void testBoundsBlockLoaderAux( + CoordinateEncoder encoder, + Supplier generator, + Function indexerFactory, + Function> visitor + ) throws IOException { + var geometries = IntStream.range(0, 20).mapToObj(i -> ShapeTestUtils.randomGeometryWithoutCircle(0, false)).toList(); + var loader = new AbstractShapeGeometryFieldMapper.AbstractShapeGeometryFieldType.BoundsBlockLoader("field", encoder); + try (Directory directory = newDirectory()) { + try (var iw = new RandomIndexWriter(random(), directory)) { + for (Geometry geometry : geometries) { + var shape = new BinaryShapeDocValuesField("field", encoder); + shape.add(indexerFactory.apply("field").indexShape(geometry), geometry); + var doc = new Document(); + doc.add(shape); + iw.addDocument(doc); + } + } + var indices = IntStream.range(0, geometries.size() / 2).map(x -> x * 2).toArray(); + try (DirectoryReader reader = DirectoryReader.open(directory)) { + LeafReaderContext ctx = reader.leaves().get(0); + TestBlock block = (TestBlock) loader.reader(ctx).read(TestBlock.factory(ctx.reader().numDocs()), TestBlock.docs(indices)); + for (int i = 0; i < indices.length; i++) { + var idx = indices[i]; + Rectangle r = visitor.apply(geometries.get(idx)).get(); + assertThat( + Strings.format("geometries[%d] ('%s') wasn't extracted correctly", idx, geometries.get(idx)), + (BytesRef) block.get(i), + WellKnownBinaryBytesRefMatcher.encodes(RectangleMatcher.closeToFloat(r, 1e-3, encoder)) + ); + } + } + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index 32cbcfc2441a1..9675638b2b394 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -1355,6 +1355,6 @@ private void testBlockLoaderFromParent(boolean columnReader, boolean syntheticSo MapperService mapper = syntheticSource ? createSytheticSourceMapperService(mapping) : createMapperService(mapping); BlockReaderSupport blockReaderSupport = getSupportedReaders(mapper, "field.sub"); var sourceLoader = mapper.mappingLookup().newSourceLoader(null, SourceFieldMetrics.NOOP); - testBlockLoader(columnReader, example, blockReaderSupport, sourceLoader); + testBlockLoader(MappedFieldType.FieldExtractPreference.forColumnReader(columnReader), example, blockReaderSupport, sourceLoader); } } diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java index 2da2c5a08c177..f6b2420ce0b03 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java @@ -51,6 +51,7 @@ import org.elasticsearch.index.fielddata.LeafFieldData; import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; +import org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.termvectors.TermVectorsService; import org.elasticsearch.index.translog.Translog; @@ -87,8 +88,6 @@ import java.util.stream.IntStream; import static java.util.stream.Collectors.toList; -import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.DOC_VALUES; -import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.NONE; import static org.elasticsearch.test.MapMatcher.assertMap; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.contains; @@ -1420,7 +1419,7 @@ public BlockReaderSupport(boolean columnAtATimeReader, MapperService mapper, Str this(columnAtATimeReader, true, mapper, loaderFieldName); } - private BlockLoader getBlockLoader(boolean columnReader) { + private BlockLoader getBlockLoader(FieldExtractPreference fieldExtractPreference) { SearchLookup searchLookup = new SearchLookup(mapper.mappingLookup().fieldTypesLookup()::get, null, null); return mapper.fieldType(loaderFieldName).blockLoader(new MappedFieldType.BlockLoaderContext() { @Override @@ -1434,8 +1433,8 @@ public IndexSettings indexSettings() { } @Override - public MappedFieldType.FieldExtractPreference fieldExtractPreference() { - return columnReader ? DOC_VALUES : NONE; + public FieldExtractPreference fieldExtractPreference() { + return fieldExtractPreference; } @Override @@ -1484,16 +1483,20 @@ private void testBlockLoader(boolean syntheticSource, boolean columnReader) thro ); } var sourceLoader = mapper.mappingLookup().newSourceLoader(null, SourceFieldMetrics.NOOP); - testBlockLoader(columnReader, example, blockReaderSupport, sourceLoader); + testBlockLoader(FieldExtractPreference.forColumnReader(columnReader), example, blockReaderSupport, sourceLoader); } protected final void testBlockLoader( - boolean columnReader, + FieldExtractPreference fieldExtractPreference, SyntheticSourceExample example, BlockReaderSupport blockReaderSupport, SourceLoader sourceLoader ) throws IOException { - BlockLoader loader = blockReaderSupport.getBlockLoader(columnReader); + var columnReader = switch (fieldExtractPreference) { + case DOC_VALUES -> true; + case NONE, EXTRACT_SPATIAL_BOUNDS -> false; + }; + BlockLoader loader = blockReaderSupport.getBlockLoader(fieldExtractPreference); Function valuesConvert = loadBlockExpected(blockReaderSupport, columnReader); if (valuesConvert == null) { assertNull(loader); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/RectangleMatcher.java b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/RectangleMatcher.java similarity index 60% rename from x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/RectangleMatcher.java rename to test/framework/src/main/java/org/elasticsearch/test/hamcrest/RectangleMatcher.java index 48fbc9c8e0378..2d55b439bd1b7 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/RectangleMatcher.java +++ b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/RectangleMatcher.java @@ -1,14 +1,16 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.xpack.esql.expression; +package org.elasticsearch.test.hamcrest; -import org.elasticsearch.compute.aggregation.spatial.PointType; import org.elasticsearch.geometry.Rectangle; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; import org.hamcrest.Description; import org.hamcrest.Matchers; import org.hamcrest.TypeSafeMatcher; @@ -19,23 +21,31 @@ */ public class RectangleMatcher extends TypeSafeMatcher { private final Rectangle r; - private final PointType pointType; + private final CoordinateEncoder coordinateEncoder; private final double error; - public static TypeSafeMatcher closeTo(Rectangle r, double error, PointType pointType) { - return new RectangleMatcher(r, error, pointType); + public static TypeSafeMatcher closeTo(Rectangle r, double error, CoordinateEncoder coordinateEncoder) { + return new RectangleMatcher(r, error, coordinateEncoder); } - private RectangleMatcher(Rectangle r, double error, PointType pointType) { + private RectangleMatcher(Rectangle r, double error, CoordinateEncoder coordinateEncoder) { this.r = r; - this.pointType = pointType; + this.coordinateEncoder = coordinateEncoder; this.error = error; } + /** + * Casts the rectangle coordinates to floats before comparing. Useful when working with extents which hold the coordinate data as ints. + */ + public static TypeSafeMatcher closeToFloat(Rectangle r, double v, CoordinateEncoder encoder) { + var normalized = new Rectangle((float) r.getMinX(), (float) r.getMaxX(), (float) r.getMaxY(), (float) r.getMinY()); + return closeTo(normalized, v, encoder); + } + @Override protected boolean matchesSafely(Rectangle other) { // For geo bounds, longitude of (-180, 180) and (epsilon, -epsilon) are actually very close, since both encompass the entire globe. - boolean wrapAroundWorkAround = pointType == PointType.GEO && r.getMinX() >= r.getMaxX(); + boolean wrapAroundWorkAround = coordinateEncoder == CoordinateEncoder.GEO && r.getMinX() >= r.getMaxX(); boolean matchMinX = Matchers.closeTo(r.getMinX(), error).matches(other.getMinX()) || (wrapAroundWorkAround && Matchers.closeTo(r.getMinX() - 180, error).matches(other.getMinX())) || (wrapAroundWorkAround && Matchers.closeTo(r.getMinX(), error).matches(other.getMinX() - 180)); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/WellKnownBinaryBytesRefMatcher.java b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/WellKnownBinaryBytesRefMatcher.java similarity index 69% rename from x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/WellKnownBinaryBytesRefMatcher.java rename to test/framework/src/main/java/org/elasticsearch/test/hamcrest/WellKnownBinaryBytesRefMatcher.java index 535bb820458cd..809f2862c208c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/WellKnownBinaryBytesRefMatcher.java +++ b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/WellKnownBinaryBytesRefMatcher.java @@ -1,11 +1,13 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.xpack.esql.expression; +package org.elasticsearch.test.hamcrest; import org.apache.lucene.util.BytesRef; import org.elasticsearch.geometry.Geometry; @@ -23,6 +25,10 @@ public WellKnownBinaryBytesRefMatcher(Matcher matcher) { this.matcher = matcher; } + public static Matcher encodes(TypeSafeMatcher matcher) { + return new WellKnownBinaryBytesRefMatcher(matcher); + } + @Override public boolean matchesSafely(BytesRef bytesRef) { return matcher.matches(fromBytesRef(bytesRef)); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/PointType.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/PointType.java index 5395ca0b85163..fb45f869c4133 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/PointType.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/PointType.java @@ -7,12 +7,11 @@ package org.elasticsearch.compute.aggregation.spatial; -import org.apache.lucene.geo.GeoEncodingUtils; -import org.apache.lucene.geo.XYEncodingUtils; import org.elasticsearch.geometry.Geometry; import org.elasticsearch.geometry.Rectangle; import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor; import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor.WrapLongitude; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; import java.util.Optional; @@ -23,26 +22,6 @@ public Optional computeEnvelope(Geometry geo) { return SpatialEnvelopeVisitor.visitGeo(geo, WrapLongitude.WRAP); } - @Override - public double decodeX(int encoded) { - return GeoEncodingUtils.decodeLongitude(encoded); - } - - @Override - public double decodeY(int encoded) { - return GeoEncodingUtils.decodeLatitude(encoded); - } - - @Override - public int encodeX(double decoded) { - return GeoEncodingUtils.encodeLongitude(decoded); - } - - @Override - public int encodeY(double decoded) { - return GeoEncodingUtils.encodeLatitude(decoded); - } - // Geo encodes the longitude in the lower 32 bits and the latitude in the upper 32 bits. @Override public int extractX(long encoded) { @@ -53,6 +32,11 @@ public int extractX(long encoded) { public int extractY(long encoded) { return SpatialAggregationUtils.extractFirst(encoded); } + + @Override + public CoordinateEncoder encoder() { + return CoordinateEncoder.GEO; + } }, CARTESIAN { @Override @@ -60,26 +44,6 @@ public Optional computeEnvelope(Geometry geo) { return SpatialEnvelopeVisitor.visitCartesian(geo); } - @Override - public double decodeX(int encoded) { - return XYEncodingUtils.decode(encoded); - } - - @Override - public double decodeY(int encoded) { - return XYEncodingUtils.decode(encoded); - } - - @Override - public int encodeX(double decoded) { - return XYEncodingUtils.encode((float) decoded); - } - - @Override - public int encodeY(double decoded) { - return XYEncodingUtils.encode((float) decoded); - } - @Override public int extractX(long encoded) { return SpatialAggregationUtils.extractFirst(encoded); @@ -89,19 +53,18 @@ public int extractX(long encoded) { public int extractY(long encoded) { return SpatialAggregationUtils.extractSecond(encoded); } + + @Override + public CoordinateEncoder encoder() { + return CoordinateEncoder.CARTESIAN; + } }; public abstract Optional computeEnvelope(Geometry geo); - public abstract double decodeX(int encoded); - - public abstract double decodeY(int encoded); - - public abstract int encodeX(double decoded); - - public abstract int encodeY(double decoded); - public abstract int extractX(long encoded); public abstract int extractY(long encoded); + + public abstract CoordinateEncoder encoder(); } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingState.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingState.java index 9ce0ccdda0ff5..cb765e4d6757e 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingState.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingState.java @@ -72,10 +72,10 @@ public void add(int groupId, Geometry geometry) { .ifPresent( r -> add( groupId, - pointType.encodeX(r.getMinX()), - pointType.encodeX(r.getMaxX()), - pointType.encodeY(r.getMaxY()), - pointType.encodeY(r.getMinY()) + pointType.encoder().encodeX(r.getMinX()), + pointType.encoder().encodeX(r.getMaxX()), + pointType.encoder().encodeY(r.getMaxY()), + pointType.encoder().encodeY(r.getMinY()) ) ); } @@ -122,10 +122,10 @@ public Block toBlock(IntVector selected, DriverContext driverContext) { new BytesRef( WellKnownBinary.toWKB( new Rectangle( - pointType.decodeX(minXs.get(si)), - pointType.decodeX(maxXs.get(si)), - pointType.decodeY(maxYs.get(si)), - pointType.decodeY(minYs.get(si)) + pointType.encoder().decodeX(minXs.get(si)), + pointType.encoder().decodeX(maxXs.get(si)), + pointType.encoder().decodeY(maxYs.get(si)), + pointType.encoder().decodeY(minYs.get(si)) ), ByteOrder.LITTLE_ENDIAN ) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingStateWrappedLongitudeState.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingStateWrappedLongitudeState.java index 3dd7a6d4acde2..41bc50abcf6bc 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingStateWrappedLongitudeState.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingStateWrappedLongitudeState.java @@ -91,8 +91,8 @@ public void add(int groupId, Geometry geo) { SpatialAggregationUtils.encodePositiveLongitude(geoPointVisitor.getMinPosX()), SpatialAggregationUtils.encodeNegativeLongitude(geoPointVisitor.getMaxNegX()), SpatialAggregationUtils.encodePositiveLongitude(geoPointVisitor.getMaxPosX()), - POINT_TYPE.encodeY(geoPointVisitor.getMaxY()), - POINT_TYPE.encodeY(geoPointVisitor.getMinY()) + POINT_TYPE.encoder().encodeY(geoPointVisitor.getMaxY()), + POINT_TYPE.encoder().encodeY(geoPointVisitor.getMinY()) ); } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentState.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentState.java index 0eea9b79f73ea..3dc150d1702a2 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentState.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentState.java @@ -14,6 +14,7 @@ import org.elasticsearch.geometry.Geometry; import org.elasticsearch.geometry.Rectangle; import org.elasticsearch.geometry.utils.WellKnownBinary; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; import java.nio.ByteOrder; @@ -46,10 +47,10 @@ public void add(Geometry geo) { pointType.computeEnvelope(geo) .ifPresent( r -> add( - pointType.encodeX(r.getMinX()), - pointType.encodeX(r.getMaxX()), - pointType.encodeY(r.getMaxY()), - pointType.encodeY(r.getMinY()) + pointType.encoder().encodeX(r.getMinX()), + pointType.encoder().encodeX(r.getMaxX()), + pointType.encoder().encodeY(r.getMaxY()), + pointType.encoder().encodeY(r.getMinY()) ) ); } @@ -74,8 +75,9 @@ public Block toBlock(DriverContext driverContext) { } private byte[] toWKB() { + CoordinateEncoder encoder = pointType.encoder(); return WellKnownBinary.toWKB( - new Rectangle(pointType.decodeX(minX), pointType.decodeX(maxX), pointType.decodeY(maxY), pointType.decodeY(minY)), + new Rectangle(encoder.decodeX(minX), encoder.decodeX(maxX), encoder.decodeY(maxY), encoder.decodeY(minY)), ByteOrder.LITTLE_ENDIAN ); } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentStateWrappedLongitudeState.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentStateWrappedLongitudeState.java index 99200d2ed99f5..0d6163636fcde 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentStateWrappedLongitudeState.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentStateWrappedLongitudeState.java @@ -53,8 +53,8 @@ public void add(Geometry geo) { SpatialAggregationUtils.encodePositiveLongitude(geoPointVisitor.getMinPosX()), SpatialAggregationUtils.encodeNegativeLongitude(geoPointVisitor.getMaxNegX()), SpatialAggregationUtils.encodePositiveLongitude(geoPointVisitor.getMaxPosX()), - POINT_TYPE.encodeY(geoPointVisitor.getMaxY()), - POINT_TYPE.encodeY(geoPointVisitor.getMinY()) + POINT_TYPE.encoder().encodeY(geoPointVisitor.getMaxY()), + POINT_TYPE.encoder().encodeY(geoPointVisitor.getMinY()) ); } } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java index 18ce9d7e3e057..77dde5e875080 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java @@ -390,7 +390,7 @@ public static LogicalPlan localSource(BlockFactory blockFactory, List } public static T as(Object node, Class type) { - Assert.assertThat(node, instanceOf(type)); + Assert.assertThat("Unexpected type: " + node.getClass(), node, instanceOf(type)); return type.cast(node); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroid.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroid.java index 84915d024ea82..54c05cf1bad52 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroid.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroid.java @@ -103,11 +103,11 @@ public AggregatorFunctionSupplier supplier(List inputChannels) { return switch (type) { case DataType.GEO_POINT -> switch (fieldExtractPreference) { case DOC_VALUES -> new SpatialCentroidGeoPointDocValuesAggregatorFunctionSupplier(inputChannels); - case NONE -> new SpatialCentroidGeoPointSourceValuesAggregatorFunctionSupplier(inputChannels); + case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialCentroidGeoPointSourceValuesAggregatorFunctionSupplier(inputChannels); }; case DataType.CARTESIAN_POINT -> switch (fieldExtractPreference) { case DOC_VALUES -> new SpatialCentroidCartesianPointDocValuesAggregatorFunctionSupplier(inputChannels); - case NONE -> new SpatialCentroidCartesianPointSourceValuesAggregatorFunctionSupplier(inputChannels); + case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialCentroidCartesianPointSourceValuesAggregatorFunctionSupplier(inputChannels); }; default -> throw EsqlIllegalArgumentException.illegalDataType(type); }; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtent.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtent.java index 5cc1701faf13a..34e5c9d68fc86 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtent.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtent.java @@ -104,11 +104,11 @@ public AggregatorFunctionSupplier supplier(List inputChannels) { return switch (field().dataType()) { case DataType.GEO_POINT -> switch (fieldExtractPreference) { case DOC_VALUES -> new SpatialExtentGeoPointDocValuesAggregatorFunctionSupplier(inputChannels); - case NONE -> new SpatialExtentGeoPointSourceValuesAggregatorFunctionSupplier(inputChannels); + case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialExtentGeoPointSourceValuesAggregatorFunctionSupplier(inputChannels); }; case DataType.CARTESIAN_POINT -> switch (fieldExtractPreference) { case DOC_VALUES -> new SpatialExtentCartesianPointDocValuesAggregatorFunctionSupplier(inputChannels); - case NONE -> new SpatialExtentCartesianPointSourceValuesAggregatorFunctionSupplier(inputChannels); + case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialExtentCartesianPointSourceValuesAggregatorFunctionSupplier(inputChannels); }; // Shapes don't differentiate between source and doc values. case DataType.GEO_SHAPE -> new SpatialExtentGeoShapeAggregatorFunctionSupplier(inputChannels); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java index 1eaade043658b..eb148952e0a26 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java @@ -17,6 +17,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.PushTopNToSource; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.ReplaceSourceAttributes; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.SpatialDocValuesExtraction; +import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.SpatialShapeBoundExtraction; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.esql.rule.ParameterizedRuleExecutor; import org.elasticsearch.xpack.esql.rule.Rule; @@ -73,7 +74,13 @@ protected List> rules(boolean optimizeForEsSource) { var pushdown = new Batch("Push to ES", esSourceRules.toArray(Rule[]::new)); // add the field extraction in just one pass // add it at the end after all the other rules have ran - var fieldExtraction = new Batch<>("Field extraction", Limiter.ONCE, new InsertFieldExtraction(), new SpatialDocValuesExtraction()); + var fieldExtraction = new Batch<>( + "Field extraction", + Limiter.ONCE, + new InsertFieldExtraction(), + new SpatialDocValuesExtraction(), + new SpatialShapeBoundExtraction() + ); return asList(pushdown, fieldExtraction); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundExtraction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundExtraction.java new file mode 100644 index 0000000000000..6949602aed0a4 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundExtraction.java @@ -0,0 +1,100 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.optimizer.rules.physical.local; + +import org.elasticsearch.lucene.spatial.GeometryDocValueWriter; +import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.type.EsField; +import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; +import org.elasticsearch.xpack.esql.expression.function.aggregate.SpatialExtent; +import org.elasticsearch.xpack.esql.optimizer.LocalPhysicalOptimizerContext; +import org.elasticsearch.xpack.esql.optimizer.PhysicalOptimizerRules.ParameterizedOptimizerRule; +import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; +import org.elasticsearch.xpack.esql.plan.physical.EvalExec; +import org.elasticsearch.xpack.esql.plan.physical.FieldExtractExec; +import org.elasticsearch.xpack.esql.plan.physical.FilterExec; +import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; +import org.elasticsearch.xpack.esql.plan.physical.UnaryExec; + +import java.util.HashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * This rule is responsible for marking spatial shape fields whose extent can be extracted from the binary representation encoded by + * {@link GeometryDocValueWriter}. + * This is a very specific optimization that is only used in the context of ST_EXTENT_AGG aggregations. + * Normally spatial fields are extracted from source values because this maintains original precision, but is very slow. + * Simply extracting the spatial bounds from the binary encoding loses both precision and geometry topological information for shapes. + * For this reason we only consider extract the extent under very specific conditions: + *
    + *
  • The spatial data is of type GEO_SHAPE or CARTESIAN_SHAPE.
  • + *
  • The spatial data is consumed directly by an ST_EXTENT_AGG.
  • + *
  • The spatial is not consumed by any other operation. While is this is stricter than necessary, + * it is a good enough approximation for now.
  • + *
+ */ +public class SpatialShapeBoundExtraction extends ParameterizedOptimizerRule { + @Override + protected PhysicalPlan rule(AggregateExec aggregate, LocalPhysicalOptimizerContext ctx) { + var foundAttributes = new HashSet(); + + return aggregate.transformDown(UnaryExec.class, exec -> { + switch (exec) { + case AggregateExec agg -> { + List aggregateFunctions = agg.aggregates() + .stream() + .flatMap(e -> SpatialShapeBoundExtraction.extractAggregateFunction(e).stream()) + .toList(); + List spatialExtents = aggregateFunctions.stream() + .filter(SpatialExtent.class::isInstance) + .map(SpatialExtent.class::cast) + .toList(); + List nonSpatialExtents = aggregateFunctions.stream() + .filter(a -> a instanceof SpatialExtent == false) + .toList(); + // While we currently do not have any non-extent aggregations which apply to shapes, we might have them in the future. + Set fieldsAppearingInNonSpatialExtents = nonSpatialExtents.stream() + .flatMap(af -> af.references().stream()) + .filter(FieldAttribute.class::isInstance) + .map(f -> ((FieldAttribute) f).field()) + .collect(Collectors.toSet()); + spatialExtents.stream() + .map(SpatialExtent::field) + .filter(FieldAttribute.class::isInstance) + .map(FieldAttribute.class::cast) + .filter(f -> isShape(f.field().getDataType()) && fieldsAppearingInNonSpatialExtents.contains(f.field()) == false) + .forEach(foundAttributes::add); + } + case EvalExec evalExec -> foundAttributes.removeAll(evalExec.references()); + case FilterExec filterExec -> foundAttributes.removeAll(filterExec.condition().references()); + case FieldExtractExec fieldExtractExec -> { + foundAttributes.retainAll(fieldExtractExec.attributesToExtract()); + return fieldExtractExec.withBoundAttributes(foundAttributes); + } + default -> { // Do nothing + } + } + return exec; + }); + } + + private static boolean isShape(DataType dataType) { + return dataType == DataType.GEO_SHAPE || dataType == DataType.CARTESIAN_SHAPE; + } + + private static Optional extractAggregateFunction(NamedExpression expr) { + return expr instanceof Alias as && as.child() instanceof AggregateFunction af ? Optional.of(af) : Optional.empty(); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java index ec996c5c84064..0fddfb652afff 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java @@ -7,9 +7,11 @@ package org.elasticsearch.xpack.esql.plan.physical; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.Nullable; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeSet; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; @@ -31,9 +33,10 @@ public class FieldExtractExec extends UnaryExec implements EstimatesRowSize { ); private final List attributesToExtract; - private final Attribute sourceAttribute; + private final @Nullable Attribute sourceAttribute; + /** - * Attributes that many be extracted as doc values even if that makes them + * Attributes that may be extracted as doc values even if that makes them * less accurate. This is mostly used for geo fields which lose a lot of * precision in their doc values, but in some cases doc values provides * enough precision to do the job. @@ -43,17 +46,32 @@ public class FieldExtractExec extends UnaryExec implements EstimatesRowSize { */ private final Set docValuesAttributes; + /** + * Attributes of a shape whose extent can be extracted directly from the encoded geometry. + *

+ * This is never serialized between nodes and only used locally. + *

+ */ + private final Set boundAttributes; + private List lazyOutput; public FieldExtractExec(Source source, PhysicalPlan child, List attributesToExtract) { - this(source, child, attributesToExtract, Set.of()); + this(source, child, attributesToExtract, Set.of(), Set.of()); } - private FieldExtractExec(Source source, PhysicalPlan child, List attributesToExtract, Set docValuesAttributes) { + private FieldExtractExec( + Source source, + PhysicalPlan child, + List attributesToExtract, + Set docValuesAttributes, + Set boundAttributes + ) { super(source, child); this.attributesToExtract = attributesToExtract; this.sourceAttribute = extractSourceAttributesFrom(child); this.docValuesAttributes = docValuesAttributes; + this.boundAttributes = boundAttributes; } private FieldExtractExec(StreamInput in) throws IOException { @@ -78,7 +96,7 @@ public String getWriteableName() { return ENTRY.name; } - public static Attribute extractSourceAttributesFrom(PhysicalPlan plan) { + public static @Nullable Attribute extractSourceAttributesFrom(PhysicalPlan plan) { for (Attribute attribute : plan.outputSet()) { if (EsQueryExec.isSourceAttribute(attribute)) { return attribute; @@ -99,18 +117,22 @@ protected NodeInfo info() { @Override public UnaryExec replaceChild(PhysicalPlan newChild) { - return new FieldExtractExec(source(), newChild, attributesToExtract, docValuesAttributes); + return new FieldExtractExec(source(), newChild, attributesToExtract, docValuesAttributes, boundAttributes); } public FieldExtractExec withDocValuesAttributes(Set docValuesAttributes) { - return new FieldExtractExec(source(), child(), attributesToExtract, docValuesAttributes); + return new FieldExtractExec(source(), child(), attributesToExtract, docValuesAttributes, boundAttributes); + } + + public FieldExtractExec withBoundAttributes(Set boundAttributes) { + return new FieldExtractExec(source(), child(), attributesToExtract, docValuesAttributes, boundAttributes); } public List attributesToExtract() { return attributesToExtract; } - public Attribute sourceAttribute() { + public @Nullable Attribute sourceAttribute() { return sourceAttribute; } @@ -118,8 +140,8 @@ public Set docValuesAttributes() { return docValuesAttributes; } - public boolean hasDocValuesAttribute(Attribute attr) { - return docValuesAttributes.contains(attr); + public Set boundAttributes() { + return boundAttributes; } @Override @@ -142,7 +164,7 @@ public PhysicalPlan estimateRowSize(State state) { @Override public int hashCode() { - return Objects.hash(attributesToExtract, docValuesAttributes, child()); + return Objects.hash(attributesToExtract, docValuesAttributes, boundAttributes, child()); } @Override @@ -158,12 +180,18 @@ public boolean equals(Object obj) { FieldExtractExec other = (FieldExtractExec) obj; return Objects.equals(attributesToExtract, other.attributesToExtract) && Objects.equals(docValuesAttributes, other.docValuesAttributes) + && Objects.equals(boundAttributes, other.boundAttributes) && Objects.equals(child(), other.child()); } @Override public String nodeString() { - return nodeName() + NodeUtils.limitedToString(attributesToExtract) + "<" + NodeUtils.limitedToString(docValuesAttributes) + ">"; + return Strings.format( + "%s<%s,%s>", + nodeName() + NodeUtils.limitedToString(attributesToExtract), + docValuesAttributes, + boundAttributes + ); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtentTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtentTests.java index a1faa537ba052..225e10f99c853 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtentTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtentTests.java @@ -17,11 +17,11 @@ import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor; import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor.WrapLongitude; import org.elasticsearch.geometry.utils.WellKnownBinary; +import org.elasticsearch.test.hamcrest.RectangleMatcher; +import org.elasticsearch.test.hamcrest.WellKnownBinaryBytesRefMatcher; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.expression.RectangleMatcher; -import org.elasticsearch.xpack.esql.expression.WellKnownBinaryBytesRefMatcher; import org.elasticsearch.xpack.esql.expression.function.AbstractAggregationTestCase; import org.elasticsearch.xpack.esql.expression.function.FunctionName; import org.elasticsearch.xpack.esql.expression.function.MultiRowTestCaseSupplier; @@ -82,20 +82,7 @@ private static TestCaseSupplier makeSupplier(TestCaseSupplier.TypedDataSupplier List.of(fieldTypedData), "SpatialExtent[field=Attribute[channel=0]]", expectedType, - new WellKnownBinaryBytesRefMatcher<>( - RectangleMatcher.closeTo( - new Rectangle( - // Since we use integers locally which are later decoded to doubles, all computation is effectively done using - // floats, not doubles. - (float) result.getMinX(), - (float) result.getMaxX(), - (float) result.getMaxY(), - (float) result.getMinY() - ), - 1e-3, - pointType - ) - ) + new WellKnownBinaryBytesRefMatcher<>(RectangleMatcher.closeToFloat(result, 1e-3, pointType.encoder())) ); }); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java index 964dd4642d7c2..5d1b36b756bac 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java @@ -164,12 +164,14 @@ import static org.elasticsearch.xpack.esql.core.expression.function.scalar.FunctionTestUtils.l; import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT; import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; +import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_SHAPE; import static org.elasticsearch.xpack.esql.parser.ExpressionBuilder.MAX_EXPRESSION_DEPTH; import static org.elasticsearch.xpack.esql.parser.LogicalPlanBuilder.MAX_QUERY_DEPTH; import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.hasItem; @@ -199,6 +201,7 @@ public class PhysicalPlanOptimizerTests extends ESTestCase { private TestDataSource testData; private int allFieldRowSize; // TODO: Move this into testDataSource so tests that load other indexes can also assert on this private TestDataSource airports; + private TestDataSource airportsCityBoundaries; private TestDataSource airportsNoDocValues; // Test when spatial field is indexed but has no doc values private TestDataSource airportsNotIndexed; // Test when spatial field has doc values but is not indexed private TestDataSource airportsNotIndexedNorDocValues; // Test when spatial field is neither indexed nor has doc-values @@ -250,6 +253,13 @@ public void init() { // Some tests use data from the airports and countries indexes, so we load that here, and use it in the plan(q, airports) function. this.airports = makeTestDataSource("airports", "mapping-airports.json", functionRegistry, enrichResolution); + this.airportsCityBoundaries = makeTestDataSource( + "airports_city_boundaries", + "mapping-airport_city_boundaries.json", + functionRegistry, + enrichResolution, + new TestConfigurableSearchStats().exclude(Config.DOC_VALUES, "city_boundary") + ); this.airportsNoDocValues = makeTestDataSource( "airports-no-doc-values", "mapping-airports_no_doc_values.json", @@ -2908,24 +2918,23 @@ public void testSpatialTypesAndStatsExtentUseDocValues() { * Before local optimizations: * * LimitExec[1000[INTEGER]] - * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, - * maxPosX{r}#55, maxY{r}#56, minY{r}#57],null] - * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] - * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[ - * Aggregate[STANDARD,[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent]] - * \_EsRelation[airports][abbrev{f}#44, city{f}#50, city_location{f}#51, coun..]]] + * \_AggregateExec[[],[SPATIALEXTENT(location{f}#12,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#12,true[BOOLEAN]) AS cen + * troid],FINAL,...] + * \_ExchangeExec[[...]] + * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[..]] + * \_EsRelation[airports-no-doc-values][abbrev{f}#8, city{f}#14, city_location{f}#15, count..]]] * * After local optimizations: * * LimitExec[1000[INTEGER]] - * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, - * maxPosX{r}#55, maxY{r}#56, minY{r}#57],21] - * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] - * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],INITIAL,[ - * minNegX{r}#73, minPosX{r}#74, maxNegX{rb#75, maxPosX{r}#76, maxY{r}#77, minY{r}#78],21] - * \_FieldExtractExec[location{f}#48][location{f}#48] - * \_EsQueryExec[airports], indexMode[standard], query[{"exists":{"field":"location","boost":1.0}}][ - * _doc{f}#79], limit[], sort[] estimatedRowSize[25] + * \_AggregateExec[[],[SPATIALEXTENT(location{f}#12,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#12,true[BOOLEAN]) AS cen + * troid],FINAL,[...]] + * \_ExchangeExec[[...]] + * \_AggregateExec[[],[SPATIALEXTENT(location{f}#12,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#12,true[BOOLEAN]) AS cen + * troid],INITIAL,...] + * \_FilterExec[ISNOTNULL(location{f}#12)] + * \_FieldExtractExec[location{f}#12] + * \_EsQueryExec[airports-no-doc-values], indexMode[standard], query[][_doc{f}#59], limit[], sort[] estimatedRowSize[25] * * Note the FieldExtractExec has 'location' set for stats: FieldExtractExec[location{f}#9][location{f}#9] *

@@ -2965,6 +2974,151 @@ public void testSpatialTypesAndStatsExtentAndCentroidUseDocValues() { } } + /** + * Before local optimizations: + * + * LimitExec[1000[INTEGER]] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, + * maxPosX{r}#55, maxY{r}#56, minY{r}#57],null] + * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] + * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[ + * Aggregate[STANDARD,[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent]] + * \_EsRelation[airports][abbrev{f}#44, city{f}#50, city_location{f}#51, coun..]]] + * + * After local optimizations: + * + * LimitExec[1000[INTEGER]] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, + * maxPosX{r}#55, maxY{r}#56, minY{r}#57],21] + * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],INITIAL,[ + * minNegX{r}#73, minPosX{r}#74, maxNegX{rb#75, maxPosX{r}#76, maxY{r}#77, minY{r}#78],21] + * \_FieldExtractExec[location{f}#48][location{f}#48] + * \_EsQueryExec[airports], indexMode[standard], query[{"exists":{"field":"location","boost":1.0}}][ + * _doc{f}#79], limit[], sort[] estimatedRowSize[25] + * + * Note the FieldExtractExec has 'location' set for stats: FieldExtractExec[location{f}#9][location{f}#9] + *

+ * Also note that the type converting function is removed when it does not actually convert the type, + * ensuring that ReferenceAttributes are not created for the same field, and the optimization can still work. + */ + public void testSpatialTypesAndStatsExtentOfShapesUsesBinaryExtraction() { + for (String query : new String[] { "from airports_city_boundaries | stats extent = st_extent_agg(city_boundary)", }) { + var withDocValues = false; + var testData = airportsCityBoundaries; + var plan = physicalPlan(query, testData); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + // Before optimization the aggregation does not use extent extraction + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, false); + + var exchange = as(agg.child(), ExchangeExec.class); + var fragment = as(exchange.child(), FragmentExec.class); + var fAgg = as(fragment.fragment(), Aggregate.class); + as(fAgg.child(), EsRelation.class); + + // Now optimize the plan and assert the aggregation uses extent extraction + var optimized = optimizedPlan(plan, testData.stats); + limit = as(optimized, LimitExec.class); + agg = as(limit.child(), AggregateExec.class); + // Above the exchange (in coordinator) the aggregation is not using doc-values + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + exchange = as(agg.child(), ExchangeExec.class); + agg = as(exchange.child(), AggregateExec.class); + // below the exchange (in data node) the aggregation is using a specific + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + assertChildIsExtractedAsBounds(agg, GEO_SHAPE); + } + } + + // This test verifies that the aggregation does not use spatial bounds extraction when the shape appears in an eval or filter. + public void testSpatialTypesAndStatsExtentOfShapesNegativeCases() { + for (String query : new String[] { """ + FROM airports_city_boundaries | \ + EVAL prefix = SUBSTRING(TO_STRING(city_boundary), 5) | \ + STATS extent = ST_EXTENT_AGG(city_boundary) BY prefix""", """ + FROM airports_city_boundaries \ + | WHERE STARTS_WITH(TO_STRING(city_boundary), "MULTIPOLYGON") \ + | STATS extent = ST_EXTENT_AGG(city_boundary)""", }) { + var withDocValues = false; + var testData = airportsCityBoundaries; + var plan = physicalPlan(query, testData); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, false); + + var optimized = optimizedPlan(plan, testData.stats); + limit = as(optimized, LimitExec.class); + agg = as(limit.child(), AggregateExec.class); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + var exchange = as(agg.child(), ExchangeExec.class); + agg = as(exchange.child(), AggregateExec.class); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + var exec = agg.child() instanceof FieldExtractExec ? agg : as(agg.child(), UnaryExec.class); + assertChildIsExtractedAsDocValues(exec, withDocValues, GEO_SHAPE); + } + } + + /** + * Before local optimizations: + * + * LimitExec[1000[INTEGER]] + * \_AggregateExec[[],[SPATIALEXTENT(city_boundary{f}#13,true[BOOLEAN]) AS extent, SPATIALCENTROID(city_location{f}#12,true[BOOLEA + * N]) AS centroid],...] + * \_ExchangeExec[[..]] + * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[...]] + * \_EsRelation[airports_city_boundaries][abbrev{f}#8, airport{f}#9, city{f}#11, city_boundar..] + * + * After local optimizations: + * + * LimitExec[1000[INTEGER]] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, + * maxPosX{r}#55, maxY{r}#56, minY{r}#57],21] + * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],INITIAL,[ + * minNegX{r}#73, minPosX{r}#74, maxNegX{rb#75, maxPosX{r}#76, maxY{r}#77, minY{r}#78],21] + * \_FieldExtractExec[location{f}#48][location{f}#48] + * \_EsQueryExec[airports], indexMode[standard], query[{"exists":{"field":"location","boost":1.0}}][ + * _doc{f}#79], limit[], sort[] estimatedRowSize[25] + * + */ + public void testMixedSpatialBoundsAndPointsExtracted() { + var query = """ + FROM airports_city_boundaries \ + | STATS extent = ST_EXTENT_AGG(city_boundary), centroid = ST_CENTROID_AGG(city_location)"""; + var withDocValues = false; + var testData = airportsCityBoundaries; + var plan = physicalPlan(query, testData); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + // Before optimization the aggregation does not use doc-values + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + + var exchange = as(agg.child(), ExchangeExec.class); + var fragment = as(exchange.child(), FragmentExec.class); + var fAgg = as(fragment.fragment(), Aggregate.class); + as(fAgg.child(), EsRelation.class); + + // Now optimize the plan and assert the aggregation uses both doc-values and bounds extraction + var optimized = optimizedPlan(plan, testData.stats); + limit = as(optimized, LimitExec.class); + agg = as(limit.child(), AggregateExec.class); + // Above the exchange (in coordinator) the aggregation is not field-optimized. + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, withDocValues); + exchange = as(agg.child(), ExchangeExec.class); + agg = as(exchange.child(), AggregateExec.class); + // below the exchange (in data node) the aggregation is field optimized. + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + var fieldExtractExec = as(agg.child(), FieldExtractExec.class); + assertThat(fieldExtractExec.boundAttributes().stream().map(a -> a.sourceText()).toList(), equalTo(List.of("city_boundary"))); + assertThat(fieldExtractExec.docValuesAttributes().stream().map(a -> a.sourceText()).toList(), equalTo(List.of("city_location"))); + } + /** * This test does not have real index fields, and therefor asserts that doc-values field extraction does NOT occur. * Before local optimizations: @@ -6912,12 +7066,23 @@ private EsQueryExec assertChildIsGeoPointExtract(UnaryExec parent, boolean useDo } private EsQueryExec assertChildIsExtractedAsDocValues(UnaryExec parent, boolean useDocValues, DataType dataType) { + // TODO(gal) why is this OK To vacuously true? var extract = as(parent.child(), FieldExtractExec.class); + assertThat(extract.boundAttributes(), is(empty())); assertTrue( "Expect field attribute to be extracted as " + (useDocValues ? "doc-values" : "source"), extract.attributesToExtract() .stream() - .allMatch(attr -> extract.hasDocValuesAttribute(attr) == useDocValues && attr.dataType() == dataType) + .allMatch(attr -> extract.docValuesAttributes().contains(attr) == useDocValues && attr.dataType() == dataType) + ); + return source(extract.child()); + } + + private static EsQueryExec assertChildIsExtractedAsBounds(UnaryExec parent, DataType dataType) { + var extract = as(parent.child(), FieldExtractExec.class); + assertTrue( + "Expect field attribute to be extracted as bounds", + extract.attributesToExtract().stream().allMatch(attr -> extract.boundAttributes().contains(attr) && attr.dataType() == dataType) ); return source(extract.child()); } @@ -6978,13 +7143,14 @@ private static QueryBuilder findQueryBuilder(BoolQueryBuilder booleanQuery, Stri } private void assertFieldExtractionWithDocValues(FieldExtractExec extract, DataType dataType, String... fieldNames) { + var docValuesAttributes = extract.docValuesAttributes(); extract.attributesToExtract().forEach(attr -> { String name = attr.name(); if (asList(fieldNames).contains(name)) { - assertThat("Expected field '" + name + "' to use doc-values", extract.hasDocValuesAttribute(attr), equalTo(true)); + assertThat("Expected field '" + name + "' to use doc-values", docValuesAttributes.contains(attr), equalTo(true)); assertThat("Expected field '" + name + "' to have data type " + dataType, attr.dataType(), equalTo(dataType)); } else { - assertThat("Expected field '" + name + "' to NOT use doc-values", extract.hasDocValuesAttribute(attr), equalTo(false)); + assertThat("Expected field '" + name + "' to NOT use doc-values", docValuesAttributes.contains(attr), equalTo(false)); } }); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java index e91fc6e49312d..78512636b57e9 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java @@ -86,7 +86,10 @@ public PhysicalOperation fieldExtractPhysicalOperation(FieldExtractExec fieldExt for (Attribute attr : fieldExtractExec.attributesToExtract()) { layout.append(attr); op = op.with( - new TestFieldExtractOperatorFactory(attr, PlannerUtils.extractPreference(fieldExtractExec.hasDocValuesAttribute(attr))), + new TestFieldExtractOperatorFactory( + attr, + PlannerUtils.extractPreference(fieldExtractExec.docValuesAttributes().contains(attr)) + ), layout.build() ); } diff --git a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java index 23505eda493af..224abd2002455 100644 --- a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java +++ b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java @@ -208,7 +208,6 @@ public GeoShapeWithDocValuesFieldMapper build(MapperBuilderContext context) { } public static final class GeoShapeWithDocValuesFieldType extends AbstractShapeGeometryFieldType implements GeoShapeQueryable { - private final GeoFormatterFactory geoFormatterFactory; private final FieldValues scriptValues; @@ -298,6 +297,17 @@ public List parseStoredValues(List storedValues) { protected Function, List> getFormatter(String format) { return geoFormatterFactory.getFormatter(format, Function.identity()); } + + @Override + protected boolean isBoundsExtractionSupported() { + // Extracting bounds for geo shapes is not implemented yet. + return false; + } + + @Override + protected CoordinateEncoder coordinateEncoder() { + return CoordinateEncoder.GEO; + } } public static class TypeParser implements Mapper.TypeParser { diff --git a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/ShapeFieldMapper.java b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/ShapeFieldMapper.java index e5d5354327f5a..2d586ac8eb86a 100644 --- a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/ShapeFieldMapper.java +++ b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/ShapeFieldMapper.java @@ -184,6 +184,16 @@ public String typeName() { protected Function, List> getFormatter(String format) { return GeometryFormatterFactory.getFormatter(format, Function.identity()); } + + @Override + protected boolean isBoundsExtractionSupported() { + return true; + } + + @Override + protected CoordinateEncoder coordinateEncoder() { + return CoordinateEncoder.CARTESIAN; + } } private final Builder builder; From a5197797bcfeffb849aee6df511f8a93e7ba3c49 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Wed, 4 Dec 2024 17:09:50 +0200 Subject: [PATCH 02/18] ESQL: ST_EXTENT_AGG binary extent optimization --- .../mapper/LegacyGeoShapeFieldMapper.java | 13 +- .../AbstractShapeGeometryFieldMapper.java | 82 +++++++ .../index/mapper/MappedFieldType.java | 20 +- ...AbstractShapeGeometryFieldMapperTests.java | 93 ++++++++ .../index/mapper/TextFieldMapperTests.java | 2 +- .../index/mapper/MapperTestCase.java | 19 +- .../test/hamcrest}/RectangleMatcher.java | 32 ++- .../WellKnownBinaryBytesRefMatcher.java | 14 +- .../aggregation/spatial/PointType.java | 63 ++---- .../spatial/SpatialExtentGroupingState.java | 16 +- ...entGroupingStateWrappedLongitudeState.java | 4 +- .../spatial/SpatialExtentState.java | 12 +- ...atialExtentStateWrappedLongitudeState.java | 4 +- .../xpack/esql/EsqlTestUtils.java | 2 +- .../function/aggregate/SpatialCentroid.java | 4 +- .../function/aggregate/SpatialExtent.java | 4 +- .../optimizer/LocalPhysicalPlanOptimizer.java | 9 +- .../local/SpatialShapeBoundExtraction.java | 100 +++++++++ .../esql/plan/physical/FieldExtractExec.java | 52 +++-- .../aggregate/SpatialExtentTests.java | 19 +- .../optimizer/PhysicalPlanOptimizerTests.java | 200 ++++++++++++++++-- .../TestPhysicalOperationProviders.java | 5 +- .../GeoShapeWithDocValuesFieldMapper.java | 12 +- .../index/mapper/ShapeFieldMapper.java | 10 + 24 files changed, 644 insertions(+), 147 deletions(-) create mode 100644 server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java rename {x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression => test/framework/src/main/java/org/elasticsearch/test/hamcrest}/RectangleMatcher.java (60%) rename {x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression => test/framework/src/main/java/org/elasticsearch/test/hamcrest}/WellKnownBinaryBytesRefMatcher.java (69%) create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundExtraction.java diff --git a/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java b/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java index 1616d2727bf8a..506918b12fe96 100644 --- a/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java +++ b/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java @@ -46,6 +46,7 @@ import org.elasticsearch.legacygeo.builders.ShapeBuilder; import org.elasticsearch.legacygeo.parsers.ShapeParser; import org.elasticsearch.legacygeo.query.LegacyGeoShapeQueryProcessor; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; import org.locationtech.spatial4j.shape.Point; @@ -401,7 +402,6 @@ public void parse( } public static final class GeoShapeFieldType extends AbstractShapeGeometryFieldType> implements GeoShapeQueryable { - private String tree = Defaults.TREE; private SpatialStrategy strategy = Defaults.STRATEGY; private boolean pointsOnly = Defaults.POINTS_ONLY; @@ -530,6 +530,17 @@ public PrefixTreeStrategy resolvePrefixTreeStrategy(String strategyName) { protected Function>, List> getFormatter(String format) { return GeometryFormatterFactory.getFormatter(format, ShapeBuilder::buildGeometry); } + + @Override + protected boolean isBoundsExtractionSupported() { + // Extracting bounds for geo shapes is not implemented yet. + return false; + } + + @Override + protected CoordinateEncoder coordinateEncoder() { + return CoordinateEncoder.GEO; + } } private final IndexVersion indexCreatedVersion; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java index 02a3ae11524e3..4b0542f7f7b03 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java @@ -8,9 +8,18 @@ */ package org.elasticsearch.index.mapper; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.geo.Orientation; +import org.elasticsearch.geometry.Rectangle; +import org.elasticsearch.geometry.utils.WellKnownBinary; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; +import org.elasticsearch.lucene.spatial.GeometryDocValueReader; +import java.io.IOException; +import java.nio.ByteOrder; import java.util.Map; import java.util.function.Function; @@ -69,6 +78,79 @@ protected Object nullValueAsSource(T nullValue) { // we don't support null value fors shapes return nullValue; } + + @Override + public BlockLoader blockLoader(BlockLoaderContext blContext) { + return blContext.fieldExtractPreference() == FieldExtractPreference.EXTRACT_SPATIAL_BOUNDS && isBoundsExtractionSupported() + ? new BoundsBlockLoader(name(), coordinateEncoder()) + : blockLoaderFromSource(blContext); + } + + protected abstract boolean isBoundsExtractionSupported(); + + protected abstract CoordinateEncoder coordinateEncoder(); + + // Visible for testing + static class BoundsBlockLoader extends BlockDocValuesReader.DocValuesBlockLoader { + private final String fieldName; + private final CoordinateEncoder encoder; + + BoundsBlockLoader(String fieldName, CoordinateEncoder encoder) { + this.fieldName = fieldName; + this.encoder = encoder; + } + + @Override + public BlockLoader.AllReader reader(LeafReaderContext context) throws IOException { + return new BlockLoader.AllReader() { + @Override + public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs) throws IOException { + var binaryDocValues = context.reader().getBinaryDocValues(fieldName); + var reader = new GeometryDocValueReader(); + try (var builder = factory.bytesRefs(docs.count())) { + for (int i = 0; i < docs.count(); i++) { + read(binaryDocValues, docs.get(i), reader, builder); + } + return builder.build(); + } + } + + @Override + public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException { + var binaryDocValues = context.reader().getBinaryDocValues(fieldName); + var reader = new GeometryDocValueReader(); + read(binaryDocValues, docId, reader, (BytesRefBuilder) builder); + } + + private void read(BinaryDocValues binaryDocValues, int doc, GeometryDocValueReader reader, BytesRefBuilder builder) + throws IOException { + binaryDocValues.advanceExact(doc); + reader.reset(binaryDocValues.binaryValue()); + var extent = reader.getExtent(); + // This is rather silly: an extent is already encoded as ints, but we convert it to Rectangle to + // preserve its properties as a WKB shape, only to convert it back to ints when we compute the + // aggregation. An obvious optimization would be to avoid this back-and-forth conversion. + var rectangle = new Rectangle( + encoder.decodeX(extent.minX()), + encoder.decodeX(extent.maxX()), + encoder.decodeY(extent.maxY()), + encoder.decodeY(extent.minY()) + ); + builder.appendBytesRef(new BytesRef(WellKnownBinary.toWKB(rectangle, ByteOrder.LITTLE_ENDIAN))); + } + + @Override + public boolean canReuse(int startingDocID) { + return true; + } + }; + } + + @Override + public BlockLoader.Builder builder(BlockLoader.BlockFactory factory, int expectedCount) { + return factory.bytesRefs(expectedCount); + } + } } protected Explicit coerce; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 35722be20b9be..20d23ab97ac26 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -676,11 +676,27 @@ public enum FieldExtractPreference { /** * Load the field from doc-values into a BlockLoader supporting doc-values. */ - DOC_VALUES, + DOC_VALUES(true), + /** Loads the field by extracting the extent from the binary encoded representation */ + EXTRACT_SPATIAL_BOUNDS(false), /** * No preference. Leave the choice of where to load the field from up to the FieldType. */ - NONE + NONE(false); + + private final boolean isColumnReader; + + FieldExtractPreference(boolean isColumnReader) { + this.isColumnReader = isColumnReader; + } + + public static FieldExtractPreference forColumnReader(boolean columnReader) { + return columnReader ? DOC_VALUES : NONE; + } + + public boolean isColumnReader() { + return isColumnReader; + } } /** diff --git a/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java new file mode 100644 index 0000000000000..8ca412405a062 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java @@ -0,0 +1,93 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.geo.Orientation; +import org.elasticsearch.geo.GeometryTestUtils; +import org.elasticsearch.geo.ShapeTestUtils; +import org.elasticsearch.geometry.Geometry; +import org.elasticsearch.geometry.Rectangle; +import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor; +import org.elasticsearch.lucene.spatial.BinaryShapeDocValuesField; +import org.elasticsearch.lucene.spatial.CartesianShapeIndexer; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.hamcrest.RectangleMatcher; +import org.elasticsearch.test.hamcrest.WellKnownBinaryBytesRefMatcher; + +import java.io.IOException; +import java.util.Optional; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.IntStream; + +public class AbstractShapeGeometryFieldMapperTests extends ESTestCase { + // TODO handle geo as well, this is actually bugged, since extracting the result ignores minneg etc. + public void testCartesianBoundsBlockLoader() throws IOException { + testBoundsBlockLoaderAux( + CoordinateEncoder.CARTESIAN, + () -> ShapeTestUtils.randomGeometryWithoutCircle(0, false), + field -> new CartesianShapeIndexer(field), + SpatialEnvelopeVisitor::visitCartesian + ); + } + + // TODO when we turn this optimization on for geo, handle this as well. + public void ignoreTestGeoBoundsBlockLoader() throws IOException { + testBoundsBlockLoaderAux( + CoordinateEncoder.GEO, + () -> GeometryTestUtils.randomGeometryWithoutCircle(0, false), + field -> new GeoShapeIndexer(Orientation.RIGHT, field), + g -> SpatialEnvelopeVisitor.visitGeo(g, SpatialEnvelopeVisitor.WrapLongitude.WRAP) + ); + } + + private void testBoundsBlockLoaderAux( + CoordinateEncoder encoder, + Supplier generator, + Function indexerFactory, + Function> visitor + ) throws IOException { + var geometries = IntStream.range(0, 20).mapToObj(i -> ShapeTestUtils.randomGeometryWithoutCircle(0, false)).toList(); + var loader = new AbstractShapeGeometryFieldMapper.AbstractShapeGeometryFieldType.BoundsBlockLoader("field", encoder); + try (Directory directory = newDirectory()) { + try (var iw = new RandomIndexWriter(random(), directory)) { + for (Geometry geometry : geometries) { + var shape = new BinaryShapeDocValuesField("field", encoder); + shape.add(indexerFactory.apply("field").indexShape(geometry), geometry); + var doc = new Document(); + doc.add(shape); + iw.addDocument(doc); + } + } + var indices = IntStream.range(0, geometries.size() / 2).map(x -> x * 2).toArray(); + try (DirectoryReader reader = DirectoryReader.open(directory)) { + LeafReaderContext ctx = reader.leaves().get(0); + TestBlock block = (TestBlock) loader.reader(ctx).read(TestBlock.factory(ctx.reader().numDocs()), TestBlock.docs(indices)); + for (int i = 0; i < indices.length; i++) { + var idx = indices[i]; + Rectangle r = visitor.apply(geometries.get(idx)).get(); + assertThat( + Strings.format("geometries[%d] ('%s') wasn't extracted correctly", idx, geometries.get(idx)), + (BytesRef) block.get(i), + WellKnownBinaryBytesRefMatcher.encodes(RectangleMatcher.closeToFloat(r, 1e-3, encoder)) + ); + } + } + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index 32cbcfc2441a1..9675638b2b394 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -1355,6 +1355,6 @@ private void testBlockLoaderFromParent(boolean columnReader, boolean syntheticSo MapperService mapper = syntheticSource ? createSytheticSourceMapperService(mapping) : createMapperService(mapping); BlockReaderSupport blockReaderSupport = getSupportedReaders(mapper, "field.sub"); var sourceLoader = mapper.mappingLookup().newSourceLoader(null, SourceFieldMetrics.NOOP); - testBlockLoader(columnReader, example, blockReaderSupport, sourceLoader); + testBlockLoader(MappedFieldType.FieldExtractPreference.forColumnReader(columnReader), example, blockReaderSupport, sourceLoader); } } diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java index 2da2c5a08c177..f6b2420ce0b03 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java @@ -51,6 +51,7 @@ import org.elasticsearch.index.fielddata.LeafFieldData; import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; +import org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.termvectors.TermVectorsService; import org.elasticsearch.index.translog.Translog; @@ -87,8 +88,6 @@ import java.util.stream.IntStream; import static java.util.stream.Collectors.toList; -import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.DOC_VALUES; -import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.NONE; import static org.elasticsearch.test.MapMatcher.assertMap; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.contains; @@ -1420,7 +1419,7 @@ public BlockReaderSupport(boolean columnAtATimeReader, MapperService mapper, Str this(columnAtATimeReader, true, mapper, loaderFieldName); } - private BlockLoader getBlockLoader(boolean columnReader) { + private BlockLoader getBlockLoader(FieldExtractPreference fieldExtractPreference) { SearchLookup searchLookup = new SearchLookup(mapper.mappingLookup().fieldTypesLookup()::get, null, null); return mapper.fieldType(loaderFieldName).blockLoader(new MappedFieldType.BlockLoaderContext() { @Override @@ -1434,8 +1433,8 @@ public IndexSettings indexSettings() { } @Override - public MappedFieldType.FieldExtractPreference fieldExtractPreference() { - return columnReader ? DOC_VALUES : NONE; + public FieldExtractPreference fieldExtractPreference() { + return fieldExtractPreference; } @Override @@ -1484,16 +1483,20 @@ private void testBlockLoader(boolean syntheticSource, boolean columnReader) thro ); } var sourceLoader = mapper.mappingLookup().newSourceLoader(null, SourceFieldMetrics.NOOP); - testBlockLoader(columnReader, example, blockReaderSupport, sourceLoader); + testBlockLoader(FieldExtractPreference.forColumnReader(columnReader), example, blockReaderSupport, sourceLoader); } protected final void testBlockLoader( - boolean columnReader, + FieldExtractPreference fieldExtractPreference, SyntheticSourceExample example, BlockReaderSupport blockReaderSupport, SourceLoader sourceLoader ) throws IOException { - BlockLoader loader = blockReaderSupport.getBlockLoader(columnReader); + var columnReader = switch (fieldExtractPreference) { + case DOC_VALUES -> true; + case NONE, EXTRACT_SPATIAL_BOUNDS -> false; + }; + BlockLoader loader = blockReaderSupport.getBlockLoader(fieldExtractPreference); Function valuesConvert = loadBlockExpected(blockReaderSupport, columnReader); if (valuesConvert == null) { assertNull(loader); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/RectangleMatcher.java b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/RectangleMatcher.java similarity index 60% rename from x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/RectangleMatcher.java rename to test/framework/src/main/java/org/elasticsearch/test/hamcrest/RectangleMatcher.java index 48fbc9c8e0378..2d55b439bd1b7 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/RectangleMatcher.java +++ b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/RectangleMatcher.java @@ -1,14 +1,16 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.xpack.esql.expression; +package org.elasticsearch.test.hamcrest; -import org.elasticsearch.compute.aggregation.spatial.PointType; import org.elasticsearch.geometry.Rectangle; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; import org.hamcrest.Description; import org.hamcrest.Matchers; import org.hamcrest.TypeSafeMatcher; @@ -19,23 +21,31 @@ */ public class RectangleMatcher extends TypeSafeMatcher { private final Rectangle r; - private final PointType pointType; + private final CoordinateEncoder coordinateEncoder; private final double error; - public static TypeSafeMatcher closeTo(Rectangle r, double error, PointType pointType) { - return new RectangleMatcher(r, error, pointType); + public static TypeSafeMatcher closeTo(Rectangle r, double error, CoordinateEncoder coordinateEncoder) { + return new RectangleMatcher(r, error, coordinateEncoder); } - private RectangleMatcher(Rectangle r, double error, PointType pointType) { + private RectangleMatcher(Rectangle r, double error, CoordinateEncoder coordinateEncoder) { this.r = r; - this.pointType = pointType; + this.coordinateEncoder = coordinateEncoder; this.error = error; } + /** + * Casts the rectangle coordinates to floats before comparing. Useful when working with extents which hold the coordinate data as ints. + */ + public static TypeSafeMatcher closeToFloat(Rectangle r, double v, CoordinateEncoder encoder) { + var normalized = new Rectangle((float) r.getMinX(), (float) r.getMaxX(), (float) r.getMaxY(), (float) r.getMinY()); + return closeTo(normalized, v, encoder); + } + @Override protected boolean matchesSafely(Rectangle other) { // For geo bounds, longitude of (-180, 180) and (epsilon, -epsilon) are actually very close, since both encompass the entire globe. - boolean wrapAroundWorkAround = pointType == PointType.GEO && r.getMinX() >= r.getMaxX(); + boolean wrapAroundWorkAround = coordinateEncoder == CoordinateEncoder.GEO && r.getMinX() >= r.getMaxX(); boolean matchMinX = Matchers.closeTo(r.getMinX(), error).matches(other.getMinX()) || (wrapAroundWorkAround && Matchers.closeTo(r.getMinX() - 180, error).matches(other.getMinX())) || (wrapAroundWorkAround && Matchers.closeTo(r.getMinX(), error).matches(other.getMinX() - 180)); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/WellKnownBinaryBytesRefMatcher.java b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/WellKnownBinaryBytesRefMatcher.java similarity index 69% rename from x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/WellKnownBinaryBytesRefMatcher.java rename to test/framework/src/main/java/org/elasticsearch/test/hamcrest/WellKnownBinaryBytesRefMatcher.java index 535bb820458cd..809f2862c208c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/WellKnownBinaryBytesRefMatcher.java +++ b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/WellKnownBinaryBytesRefMatcher.java @@ -1,11 +1,13 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.xpack.esql.expression; +package org.elasticsearch.test.hamcrest; import org.apache.lucene.util.BytesRef; import org.elasticsearch.geometry.Geometry; @@ -23,6 +25,10 @@ public WellKnownBinaryBytesRefMatcher(Matcher matcher) { this.matcher = matcher; } + public static Matcher encodes(TypeSafeMatcher matcher) { + return new WellKnownBinaryBytesRefMatcher(matcher); + } + @Override public boolean matchesSafely(BytesRef bytesRef) { return matcher.matches(fromBytesRef(bytesRef)); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/PointType.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/PointType.java index 5395ca0b85163..fb45f869c4133 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/PointType.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/PointType.java @@ -7,12 +7,11 @@ package org.elasticsearch.compute.aggregation.spatial; -import org.apache.lucene.geo.GeoEncodingUtils; -import org.apache.lucene.geo.XYEncodingUtils; import org.elasticsearch.geometry.Geometry; import org.elasticsearch.geometry.Rectangle; import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor; import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor.WrapLongitude; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; import java.util.Optional; @@ -23,26 +22,6 @@ public Optional computeEnvelope(Geometry geo) { return SpatialEnvelopeVisitor.visitGeo(geo, WrapLongitude.WRAP); } - @Override - public double decodeX(int encoded) { - return GeoEncodingUtils.decodeLongitude(encoded); - } - - @Override - public double decodeY(int encoded) { - return GeoEncodingUtils.decodeLatitude(encoded); - } - - @Override - public int encodeX(double decoded) { - return GeoEncodingUtils.encodeLongitude(decoded); - } - - @Override - public int encodeY(double decoded) { - return GeoEncodingUtils.encodeLatitude(decoded); - } - // Geo encodes the longitude in the lower 32 bits and the latitude in the upper 32 bits. @Override public int extractX(long encoded) { @@ -53,6 +32,11 @@ public int extractX(long encoded) { public int extractY(long encoded) { return SpatialAggregationUtils.extractFirst(encoded); } + + @Override + public CoordinateEncoder encoder() { + return CoordinateEncoder.GEO; + } }, CARTESIAN { @Override @@ -60,26 +44,6 @@ public Optional computeEnvelope(Geometry geo) { return SpatialEnvelopeVisitor.visitCartesian(geo); } - @Override - public double decodeX(int encoded) { - return XYEncodingUtils.decode(encoded); - } - - @Override - public double decodeY(int encoded) { - return XYEncodingUtils.decode(encoded); - } - - @Override - public int encodeX(double decoded) { - return XYEncodingUtils.encode((float) decoded); - } - - @Override - public int encodeY(double decoded) { - return XYEncodingUtils.encode((float) decoded); - } - @Override public int extractX(long encoded) { return SpatialAggregationUtils.extractFirst(encoded); @@ -89,19 +53,18 @@ public int extractX(long encoded) { public int extractY(long encoded) { return SpatialAggregationUtils.extractSecond(encoded); } + + @Override + public CoordinateEncoder encoder() { + return CoordinateEncoder.CARTESIAN; + } }; public abstract Optional computeEnvelope(Geometry geo); - public abstract double decodeX(int encoded); - - public abstract double decodeY(int encoded); - - public abstract int encodeX(double decoded); - - public abstract int encodeY(double decoded); - public abstract int extractX(long encoded); public abstract int extractY(long encoded); + + public abstract CoordinateEncoder encoder(); } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingState.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingState.java index 9ce0ccdda0ff5..cb765e4d6757e 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingState.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingState.java @@ -72,10 +72,10 @@ public void add(int groupId, Geometry geometry) { .ifPresent( r -> add( groupId, - pointType.encodeX(r.getMinX()), - pointType.encodeX(r.getMaxX()), - pointType.encodeY(r.getMaxY()), - pointType.encodeY(r.getMinY()) + pointType.encoder().encodeX(r.getMinX()), + pointType.encoder().encodeX(r.getMaxX()), + pointType.encoder().encodeY(r.getMaxY()), + pointType.encoder().encodeY(r.getMinY()) ) ); } @@ -122,10 +122,10 @@ public Block toBlock(IntVector selected, DriverContext driverContext) { new BytesRef( WellKnownBinary.toWKB( new Rectangle( - pointType.decodeX(minXs.get(si)), - pointType.decodeX(maxXs.get(si)), - pointType.decodeY(maxYs.get(si)), - pointType.decodeY(minYs.get(si)) + pointType.encoder().decodeX(minXs.get(si)), + pointType.encoder().decodeX(maxXs.get(si)), + pointType.encoder().decodeY(maxYs.get(si)), + pointType.encoder().decodeY(minYs.get(si)) ), ByteOrder.LITTLE_ENDIAN ) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingStateWrappedLongitudeState.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingStateWrappedLongitudeState.java index 3dd7a6d4acde2..41bc50abcf6bc 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingStateWrappedLongitudeState.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingStateWrappedLongitudeState.java @@ -91,8 +91,8 @@ public void add(int groupId, Geometry geo) { SpatialAggregationUtils.encodePositiveLongitude(geoPointVisitor.getMinPosX()), SpatialAggregationUtils.encodeNegativeLongitude(geoPointVisitor.getMaxNegX()), SpatialAggregationUtils.encodePositiveLongitude(geoPointVisitor.getMaxPosX()), - POINT_TYPE.encodeY(geoPointVisitor.getMaxY()), - POINT_TYPE.encodeY(geoPointVisitor.getMinY()) + POINT_TYPE.encoder().encodeY(geoPointVisitor.getMaxY()), + POINT_TYPE.encoder().encodeY(geoPointVisitor.getMinY()) ); } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentState.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentState.java index 0eea9b79f73ea..3dc150d1702a2 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentState.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentState.java @@ -14,6 +14,7 @@ import org.elasticsearch.geometry.Geometry; import org.elasticsearch.geometry.Rectangle; import org.elasticsearch.geometry.utils.WellKnownBinary; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; import java.nio.ByteOrder; @@ -46,10 +47,10 @@ public void add(Geometry geo) { pointType.computeEnvelope(geo) .ifPresent( r -> add( - pointType.encodeX(r.getMinX()), - pointType.encodeX(r.getMaxX()), - pointType.encodeY(r.getMaxY()), - pointType.encodeY(r.getMinY()) + pointType.encoder().encodeX(r.getMinX()), + pointType.encoder().encodeX(r.getMaxX()), + pointType.encoder().encodeY(r.getMaxY()), + pointType.encoder().encodeY(r.getMinY()) ) ); } @@ -74,8 +75,9 @@ public Block toBlock(DriverContext driverContext) { } private byte[] toWKB() { + CoordinateEncoder encoder = pointType.encoder(); return WellKnownBinary.toWKB( - new Rectangle(pointType.decodeX(minX), pointType.decodeX(maxX), pointType.decodeY(maxY), pointType.decodeY(minY)), + new Rectangle(encoder.decodeX(minX), encoder.decodeX(maxX), encoder.decodeY(maxY), encoder.decodeY(minY)), ByteOrder.LITTLE_ENDIAN ); } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentStateWrappedLongitudeState.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentStateWrappedLongitudeState.java index 99200d2ed99f5..0d6163636fcde 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentStateWrappedLongitudeState.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentStateWrappedLongitudeState.java @@ -53,8 +53,8 @@ public void add(Geometry geo) { SpatialAggregationUtils.encodePositiveLongitude(geoPointVisitor.getMinPosX()), SpatialAggregationUtils.encodeNegativeLongitude(geoPointVisitor.getMaxNegX()), SpatialAggregationUtils.encodePositiveLongitude(geoPointVisitor.getMaxPosX()), - POINT_TYPE.encodeY(geoPointVisitor.getMaxY()), - POINT_TYPE.encodeY(geoPointVisitor.getMinY()) + POINT_TYPE.encoder().encodeY(geoPointVisitor.getMaxY()), + POINT_TYPE.encoder().encodeY(geoPointVisitor.getMinY()) ); } } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java index 18ce9d7e3e057..77dde5e875080 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java @@ -390,7 +390,7 @@ public static LogicalPlan localSource(BlockFactory blockFactory, List } public static T as(Object node, Class type) { - Assert.assertThat(node, instanceOf(type)); + Assert.assertThat("Unexpected type: " + node.getClass(), node, instanceOf(type)); return type.cast(node); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroid.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroid.java index 84915d024ea82..54c05cf1bad52 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroid.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroid.java @@ -103,11 +103,11 @@ public AggregatorFunctionSupplier supplier(List inputChannels) { return switch (type) { case DataType.GEO_POINT -> switch (fieldExtractPreference) { case DOC_VALUES -> new SpatialCentroidGeoPointDocValuesAggregatorFunctionSupplier(inputChannels); - case NONE -> new SpatialCentroidGeoPointSourceValuesAggregatorFunctionSupplier(inputChannels); + case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialCentroidGeoPointSourceValuesAggregatorFunctionSupplier(inputChannels); }; case DataType.CARTESIAN_POINT -> switch (fieldExtractPreference) { case DOC_VALUES -> new SpatialCentroidCartesianPointDocValuesAggregatorFunctionSupplier(inputChannels); - case NONE -> new SpatialCentroidCartesianPointSourceValuesAggregatorFunctionSupplier(inputChannels); + case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialCentroidCartesianPointSourceValuesAggregatorFunctionSupplier(inputChannels); }; default -> throw EsqlIllegalArgumentException.illegalDataType(type); }; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtent.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtent.java index 5cc1701faf13a..34e5c9d68fc86 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtent.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtent.java @@ -104,11 +104,11 @@ public AggregatorFunctionSupplier supplier(List inputChannels) { return switch (field().dataType()) { case DataType.GEO_POINT -> switch (fieldExtractPreference) { case DOC_VALUES -> new SpatialExtentGeoPointDocValuesAggregatorFunctionSupplier(inputChannels); - case NONE -> new SpatialExtentGeoPointSourceValuesAggregatorFunctionSupplier(inputChannels); + case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialExtentGeoPointSourceValuesAggregatorFunctionSupplier(inputChannels); }; case DataType.CARTESIAN_POINT -> switch (fieldExtractPreference) { case DOC_VALUES -> new SpatialExtentCartesianPointDocValuesAggregatorFunctionSupplier(inputChannels); - case NONE -> new SpatialExtentCartesianPointSourceValuesAggregatorFunctionSupplier(inputChannels); + case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialExtentCartesianPointSourceValuesAggregatorFunctionSupplier(inputChannels); }; // Shapes don't differentiate between source and doc values. case DataType.GEO_SHAPE -> new SpatialExtentGeoShapeAggregatorFunctionSupplier(inputChannels); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java index 1eaade043658b..eb148952e0a26 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java @@ -17,6 +17,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.PushTopNToSource; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.ReplaceSourceAttributes; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.SpatialDocValuesExtraction; +import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.SpatialShapeBoundExtraction; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.esql.rule.ParameterizedRuleExecutor; import org.elasticsearch.xpack.esql.rule.Rule; @@ -73,7 +74,13 @@ protected List> rules(boolean optimizeForEsSource) { var pushdown = new Batch("Push to ES", esSourceRules.toArray(Rule[]::new)); // add the field extraction in just one pass // add it at the end after all the other rules have ran - var fieldExtraction = new Batch<>("Field extraction", Limiter.ONCE, new InsertFieldExtraction(), new SpatialDocValuesExtraction()); + var fieldExtraction = new Batch<>( + "Field extraction", + Limiter.ONCE, + new InsertFieldExtraction(), + new SpatialDocValuesExtraction(), + new SpatialShapeBoundExtraction() + ); return asList(pushdown, fieldExtraction); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundExtraction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundExtraction.java new file mode 100644 index 0000000000000..6949602aed0a4 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundExtraction.java @@ -0,0 +1,100 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.optimizer.rules.physical.local; + +import org.elasticsearch.lucene.spatial.GeometryDocValueWriter; +import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.type.EsField; +import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; +import org.elasticsearch.xpack.esql.expression.function.aggregate.SpatialExtent; +import org.elasticsearch.xpack.esql.optimizer.LocalPhysicalOptimizerContext; +import org.elasticsearch.xpack.esql.optimizer.PhysicalOptimizerRules.ParameterizedOptimizerRule; +import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; +import org.elasticsearch.xpack.esql.plan.physical.EvalExec; +import org.elasticsearch.xpack.esql.plan.physical.FieldExtractExec; +import org.elasticsearch.xpack.esql.plan.physical.FilterExec; +import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; +import org.elasticsearch.xpack.esql.plan.physical.UnaryExec; + +import java.util.HashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * This rule is responsible for marking spatial shape fields whose extent can be extracted from the binary representation encoded by + * {@link GeometryDocValueWriter}. + * This is a very specific optimization that is only used in the context of ST_EXTENT_AGG aggregations. + * Normally spatial fields are extracted from source values because this maintains original precision, but is very slow. + * Simply extracting the spatial bounds from the binary encoding loses both precision and geometry topological information for shapes. + * For this reason we only consider extract the extent under very specific conditions: + *
    + *
  • The spatial data is of type GEO_SHAPE or CARTESIAN_SHAPE.
  • + *
  • The spatial data is consumed directly by an ST_EXTENT_AGG.
  • + *
  • The spatial is not consumed by any other operation. While is this is stricter than necessary, + * it is a good enough approximation for now.
  • + *
+ */ +public class SpatialShapeBoundExtraction extends ParameterizedOptimizerRule { + @Override + protected PhysicalPlan rule(AggregateExec aggregate, LocalPhysicalOptimizerContext ctx) { + var foundAttributes = new HashSet(); + + return aggregate.transformDown(UnaryExec.class, exec -> { + switch (exec) { + case AggregateExec agg -> { + List aggregateFunctions = agg.aggregates() + .stream() + .flatMap(e -> SpatialShapeBoundExtraction.extractAggregateFunction(e).stream()) + .toList(); + List spatialExtents = aggregateFunctions.stream() + .filter(SpatialExtent.class::isInstance) + .map(SpatialExtent.class::cast) + .toList(); + List nonSpatialExtents = aggregateFunctions.stream() + .filter(a -> a instanceof SpatialExtent == false) + .toList(); + // While we currently do not have any non-extent aggregations which apply to shapes, we might have them in the future. + Set fieldsAppearingInNonSpatialExtents = nonSpatialExtents.stream() + .flatMap(af -> af.references().stream()) + .filter(FieldAttribute.class::isInstance) + .map(f -> ((FieldAttribute) f).field()) + .collect(Collectors.toSet()); + spatialExtents.stream() + .map(SpatialExtent::field) + .filter(FieldAttribute.class::isInstance) + .map(FieldAttribute.class::cast) + .filter(f -> isShape(f.field().getDataType()) && fieldsAppearingInNonSpatialExtents.contains(f.field()) == false) + .forEach(foundAttributes::add); + } + case EvalExec evalExec -> foundAttributes.removeAll(evalExec.references()); + case FilterExec filterExec -> foundAttributes.removeAll(filterExec.condition().references()); + case FieldExtractExec fieldExtractExec -> { + foundAttributes.retainAll(fieldExtractExec.attributesToExtract()); + return fieldExtractExec.withBoundAttributes(foundAttributes); + } + default -> { // Do nothing + } + } + return exec; + }); + } + + private static boolean isShape(DataType dataType) { + return dataType == DataType.GEO_SHAPE || dataType == DataType.CARTESIAN_SHAPE; + } + + private static Optional extractAggregateFunction(NamedExpression expr) { + return expr instanceof Alias as && as.child() instanceof AggregateFunction af ? Optional.of(af) : Optional.empty(); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java index ec996c5c84064..0fddfb652afff 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java @@ -7,9 +7,11 @@ package org.elasticsearch.xpack.esql.plan.physical; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.Nullable; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeSet; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; @@ -31,9 +33,10 @@ public class FieldExtractExec extends UnaryExec implements EstimatesRowSize { ); private final List attributesToExtract; - private final Attribute sourceAttribute; + private final @Nullable Attribute sourceAttribute; + /** - * Attributes that many be extracted as doc values even if that makes them + * Attributes that may be extracted as doc values even if that makes them * less accurate. This is mostly used for geo fields which lose a lot of * precision in their doc values, but in some cases doc values provides * enough precision to do the job. @@ -43,17 +46,32 @@ public class FieldExtractExec extends UnaryExec implements EstimatesRowSize { */ private final Set docValuesAttributes; + /** + * Attributes of a shape whose extent can be extracted directly from the encoded geometry. + *

+ * This is never serialized between nodes and only used locally. + *

+ */ + private final Set boundAttributes; + private List lazyOutput; public FieldExtractExec(Source source, PhysicalPlan child, List attributesToExtract) { - this(source, child, attributesToExtract, Set.of()); + this(source, child, attributesToExtract, Set.of(), Set.of()); } - private FieldExtractExec(Source source, PhysicalPlan child, List attributesToExtract, Set docValuesAttributes) { + private FieldExtractExec( + Source source, + PhysicalPlan child, + List attributesToExtract, + Set docValuesAttributes, + Set boundAttributes + ) { super(source, child); this.attributesToExtract = attributesToExtract; this.sourceAttribute = extractSourceAttributesFrom(child); this.docValuesAttributes = docValuesAttributes; + this.boundAttributes = boundAttributes; } private FieldExtractExec(StreamInput in) throws IOException { @@ -78,7 +96,7 @@ public String getWriteableName() { return ENTRY.name; } - public static Attribute extractSourceAttributesFrom(PhysicalPlan plan) { + public static @Nullable Attribute extractSourceAttributesFrom(PhysicalPlan plan) { for (Attribute attribute : plan.outputSet()) { if (EsQueryExec.isSourceAttribute(attribute)) { return attribute; @@ -99,18 +117,22 @@ protected NodeInfo info() { @Override public UnaryExec replaceChild(PhysicalPlan newChild) { - return new FieldExtractExec(source(), newChild, attributesToExtract, docValuesAttributes); + return new FieldExtractExec(source(), newChild, attributesToExtract, docValuesAttributes, boundAttributes); } public FieldExtractExec withDocValuesAttributes(Set docValuesAttributes) { - return new FieldExtractExec(source(), child(), attributesToExtract, docValuesAttributes); + return new FieldExtractExec(source(), child(), attributesToExtract, docValuesAttributes, boundAttributes); + } + + public FieldExtractExec withBoundAttributes(Set boundAttributes) { + return new FieldExtractExec(source(), child(), attributesToExtract, docValuesAttributes, boundAttributes); } public List attributesToExtract() { return attributesToExtract; } - public Attribute sourceAttribute() { + public @Nullable Attribute sourceAttribute() { return sourceAttribute; } @@ -118,8 +140,8 @@ public Set docValuesAttributes() { return docValuesAttributes; } - public boolean hasDocValuesAttribute(Attribute attr) { - return docValuesAttributes.contains(attr); + public Set boundAttributes() { + return boundAttributes; } @Override @@ -142,7 +164,7 @@ public PhysicalPlan estimateRowSize(State state) { @Override public int hashCode() { - return Objects.hash(attributesToExtract, docValuesAttributes, child()); + return Objects.hash(attributesToExtract, docValuesAttributes, boundAttributes, child()); } @Override @@ -158,12 +180,18 @@ public boolean equals(Object obj) { FieldExtractExec other = (FieldExtractExec) obj; return Objects.equals(attributesToExtract, other.attributesToExtract) && Objects.equals(docValuesAttributes, other.docValuesAttributes) + && Objects.equals(boundAttributes, other.boundAttributes) && Objects.equals(child(), other.child()); } @Override public String nodeString() { - return nodeName() + NodeUtils.limitedToString(attributesToExtract) + "<" + NodeUtils.limitedToString(docValuesAttributes) + ">"; + return Strings.format( + "%s<%s,%s>", + nodeName() + NodeUtils.limitedToString(attributesToExtract), + docValuesAttributes, + boundAttributes + ); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtentTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtentTests.java index a1faa537ba052..225e10f99c853 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtentTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtentTests.java @@ -17,11 +17,11 @@ import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor; import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor.WrapLongitude; import org.elasticsearch.geometry.utils.WellKnownBinary; +import org.elasticsearch.test.hamcrest.RectangleMatcher; +import org.elasticsearch.test.hamcrest.WellKnownBinaryBytesRefMatcher; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.expression.RectangleMatcher; -import org.elasticsearch.xpack.esql.expression.WellKnownBinaryBytesRefMatcher; import org.elasticsearch.xpack.esql.expression.function.AbstractAggregationTestCase; import org.elasticsearch.xpack.esql.expression.function.FunctionName; import org.elasticsearch.xpack.esql.expression.function.MultiRowTestCaseSupplier; @@ -82,20 +82,7 @@ private static TestCaseSupplier makeSupplier(TestCaseSupplier.TypedDataSupplier List.of(fieldTypedData), "SpatialExtent[field=Attribute[channel=0]]", expectedType, - new WellKnownBinaryBytesRefMatcher<>( - RectangleMatcher.closeTo( - new Rectangle( - // Since we use integers locally which are later decoded to doubles, all computation is effectively done using - // floats, not doubles. - (float) result.getMinX(), - (float) result.getMaxX(), - (float) result.getMaxY(), - (float) result.getMinY() - ), - 1e-3, - pointType - ) - ) + new WellKnownBinaryBytesRefMatcher<>(RectangleMatcher.closeToFloat(result, 1e-3, pointType.encoder())) ); }); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java index 964dd4642d7c2..5d1b36b756bac 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java @@ -164,12 +164,14 @@ import static org.elasticsearch.xpack.esql.core.expression.function.scalar.FunctionTestUtils.l; import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT; import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; +import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_SHAPE; import static org.elasticsearch.xpack.esql.parser.ExpressionBuilder.MAX_EXPRESSION_DEPTH; import static org.elasticsearch.xpack.esql.parser.LogicalPlanBuilder.MAX_QUERY_DEPTH; import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.hasItem; @@ -199,6 +201,7 @@ public class PhysicalPlanOptimizerTests extends ESTestCase { private TestDataSource testData; private int allFieldRowSize; // TODO: Move this into testDataSource so tests that load other indexes can also assert on this private TestDataSource airports; + private TestDataSource airportsCityBoundaries; private TestDataSource airportsNoDocValues; // Test when spatial field is indexed but has no doc values private TestDataSource airportsNotIndexed; // Test when spatial field has doc values but is not indexed private TestDataSource airportsNotIndexedNorDocValues; // Test when spatial field is neither indexed nor has doc-values @@ -250,6 +253,13 @@ public void init() { // Some tests use data from the airports and countries indexes, so we load that here, and use it in the plan(q, airports) function. this.airports = makeTestDataSource("airports", "mapping-airports.json", functionRegistry, enrichResolution); + this.airportsCityBoundaries = makeTestDataSource( + "airports_city_boundaries", + "mapping-airport_city_boundaries.json", + functionRegistry, + enrichResolution, + new TestConfigurableSearchStats().exclude(Config.DOC_VALUES, "city_boundary") + ); this.airportsNoDocValues = makeTestDataSource( "airports-no-doc-values", "mapping-airports_no_doc_values.json", @@ -2908,24 +2918,23 @@ public void testSpatialTypesAndStatsExtentUseDocValues() { * Before local optimizations: * * LimitExec[1000[INTEGER]] - * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, - * maxPosX{r}#55, maxY{r}#56, minY{r}#57],null] - * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] - * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[ - * Aggregate[STANDARD,[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent]] - * \_EsRelation[airports][abbrev{f}#44, city{f}#50, city_location{f}#51, coun..]]] + * \_AggregateExec[[],[SPATIALEXTENT(location{f}#12,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#12,true[BOOLEAN]) AS cen + * troid],FINAL,...] + * \_ExchangeExec[[...]] + * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[..]] + * \_EsRelation[airports-no-doc-values][abbrev{f}#8, city{f}#14, city_location{f}#15, count..]]] * * After local optimizations: * * LimitExec[1000[INTEGER]] - * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, - * maxPosX{r}#55, maxY{r}#56, minY{r}#57],21] - * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] - * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],INITIAL,[ - * minNegX{r}#73, minPosX{r}#74, maxNegX{rb#75, maxPosX{r}#76, maxY{r}#77, minY{r}#78],21] - * \_FieldExtractExec[location{f}#48][location{f}#48] - * \_EsQueryExec[airports], indexMode[standard], query[{"exists":{"field":"location","boost":1.0}}][ - * _doc{f}#79], limit[], sort[] estimatedRowSize[25] + * \_AggregateExec[[],[SPATIALEXTENT(location{f}#12,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#12,true[BOOLEAN]) AS cen + * troid],FINAL,[...]] + * \_ExchangeExec[[...]] + * \_AggregateExec[[],[SPATIALEXTENT(location{f}#12,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#12,true[BOOLEAN]) AS cen + * troid],INITIAL,...] + * \_FilterExec[ISNOTNULL(location{f}#12)] + * \_FieldExtractExec[location{f}#12] + * \_EsQueryExec[airports-no-doc-values], indexMode[standard], query[][_doc{f}#59], limit[], sort[] estimatedRowSize[25] * * Note the FieldExtractExec has 'location' set for stats: FieldExtractExec[location{f}#9][location{f}#9] *

@@ -2965,6 +2974,151 @@ public void testSpatialTypesAndStatsExtentAndCentroidUseDocValues() { } } + /** + * Before local optimizations: + * + * LimitExec[1000[INTEGER]] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, + * maxPosX{r}#55, maxY{r}#56, minY{r}#57],null] + * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] + * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[ + * Aggregate[STANDARD,[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent]] + * \_EsRelation[airports][abbrev{f}#44, city{f}#50, city_location{f}#51, coun..]]] + * + * After local optimizations: + * + * LimitExec[1000[INTEGER]] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, + * maxPosX{r}#55, maxY{r}#56, minY{r}#57],21] + * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],INITIAL,[ + * minNegX{r}#73, minPosX{r}#74, maxNegX{rb#75, maxPosX{r}#76, maxY{r}#77, minY{r}#78],21] + * \_FieldExtractExec[location{f}#48][location{f}#48] + * \_EsQueryExec[airports], indexMode[standard], query[{"exists":{"field":"location","boost":1.0}}][ + * _doc{f}#79], limit[], sort[] estimatedRowSize[25] + * + * Note the FieldExtractExec has 'location' set for stats: FieldExtractExec[location{f}#9][location{f}#9] + *

+ * Also note that the type converting function is removed when it does not actually convert the type, + * ensuring that ReferenceAttributes are not created for the same field, and the optimization can still work. + */ + public void testSpatialTypesAndStatsExtentOfShapesUsesBinaryExtraction() { + for (String query : new String[] { "from airports_city_boundaries | stats extent = st_extent_agg(city_boundary)", }) { + var withDocValues = false; + var testData = airportsCityBoundaries; + var plan = physicalPlan(query, testData); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + // Before optimization the aggregation does not use extent extraction + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, false); + + var exchange = as(agg.child(), ExchangeExec.class); + var fragment = as(exchange.child(), FragmentExec.class); + var fAgg = as(fragment.fragment(), Aggregate.class); + as(fAgg.child(), EsRelation.class); + + // Now optimize the plan and assert the aggregation uses extent extraction + var optimized = optimizedPlan(plan, testData.stats); + limit = as(optimized, LimitExec.class); + agg = as(limit.child(), AggregateExec.class); + // Above the exchange (in coordinator) the aggregation is not using doc-values + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + exchange = as(agg.child(), ExchangeExec.class); + agg = as(exchange.child(), AggregateExec.class); + // below the exchange (in data node) the aggregation is using a specific + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + assertChildIsExtractedAsBounds(agg, GEO_SHAPE); + } + } + + // This test verifies that the aggregation does not use spatial bounds extraction when the shape appears in an eval or filter. + public void testSpatialTypesAndStatsExtentOfShapesNegativeCases() { + for (String query : new String[] { """ + FROM airports_city_boundaries | \ + EVAL prefix = SUBSTRING(TO_STRING(city_boundary), 5) | \ + STATS extent = ST_EXTENT_AGG(city_boundary) BY prefix""", """ + FROM airports_city_boundaries \ + | WHERE STARTS_WITH(TO_STRING(city_boundary), "MULTIPOLYGON") \ + | STATS extent = ST_EXTENT_AGG(city_boundary)""", }) { + var withDocValues = false; + var testData = airportsCityBoundaries; + var plan = physicalPlan(query, testData); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, false); + + var optimized = optimizedPlan(plan, testData.stats); + limit = as(optimized, LimitExec.class); + agg = as(limit.child(), AggregateExec.class); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + var exchange = as(agg.child(), ExchangeExec.class); + agg = as(exchange.child(), AggregateExec.class); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + var exec = agg.child() instanceof FieldExtractExec ? agg : as(agg.child(), UnaryExec.class); + assertChildIsExtractedAsDocValues(exec, withDocValues, GEO_SHAPE); + } + } + + /** + * Before local optimizations: + * + * LimitExec[1000[INTEGER]] + * \_AggregateExec[[],[SPATIALEXTENT(city_boundary{f}#13,true[BOOLEAN]) AS extent, SPATIALCENTROID(city_location{f}#12,true[BOOLEA + * N]) AS centroid],...] + * \_ExchangeExec[[..]] + * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[...]] + * \_EsRelation[airports_city_boundaries][abbrev{f}#8, airport{f}#9, city{f}#11, city_boundar..] + * + * After local optimizations: + * + * LimitExec[1000[INTEGER]] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, + * maxPosX{r}#55, maxY{r}#56, minY{r}#57],21] + * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],INITIAL,[ + * minNegX{r}#73, minPosX{r}#74, maxNegX{rb#75, maxPosX{r}#76, maxY{r}#77, minY{r}#78],21] + * \_FieldExtractExec[location{f}#48][location{f}#48] + * \_EsQueryExec[airports], indexMode[standard], query[{"exists":{"field":"location","boost":1.0}}][ + * _doc{f}#79], limit[], sort[] estimatedRowSize[25] + * + */ + public void testMixedSpatialBoundsAndPointsExtracted() { + var query = """ + FROM airports_city_boundaries \ + | STATS extent = ST_EXTENT_AGG(city_boundary), centroid = ST_CENTROID_AGG(city_location)"""; + var withDocValues = false; + var testData = airportsCityBoundaries; + var plan = physicalPlan(query, testData); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + // Before optimization the aggregation does not use doc-values + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + + var exchange = as(agg.child(), ExchangeExec.class); + var fragment = as(exchange.child(), FragmentExec.class); + var fAgg = as(fragment.fragment(), Aggregate.class); + as(fAgg.child(), EsRelation.class); + + // Now optimize the plan and assert the aggregation uses both doc-values and bounds extraction + var optimized = optimizedPlan(plan, testData.stats); + limit = as(optimized, LimitExec.class); + agg = as(limit.child(), AggregateExec.class); + // Above the exchange (in coordinator) the aggregation is not field-optimized. + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, withDocValues); + exchange = as(agg.child(), ExchangeExec.class); + agg = as(exchange.child(), AggregateExec.class); + // below the exchange (in data node) the aggregation is field optimized. + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + var fieldExtractExec = as(agg.child(), FieldExtractExec.class); + assertThat(fieldExtractExec.boundAttributes().stream().map(a -> a.sourceText()).toList(), equalTo(List.of("city_boundary"))); + assertThat(fieldExtractExec.docValuesAttributes().stream().map(a -> a.sourceText()).toList(), equalTo(List.of("city_location"))); + } + /** * This test does not have real index fields, and therefor asserts that doc-values field extraction does NOT occur. * Before local optimizations: @@ -6912,12 +7066,23 @@ private EsQueryExec assertChildIsGeoPointExtract(UnaryExec parent, boolean useDo } private EsQueryExec assertChildIsExtractedAsDocValues(UnaryExec parent, boolean useDocValues, DataType dataType) { + // TODO(gal) why is this OK To vacuously true? var extract = as(parent.child(), FieldExtractExec.class); + assertThat(extract.boundAttributes(), is(empty())); assertTrue( "Expect field attribute to be extracted as " + (useDocValues ? "doc-values" : "source"), extract.attributesToExtract() .stream() - .allMatch(attr -> extract.hasDocValuesAttribute(attr) == useDocValues && attr.dataType() == dataType) + .allMatch(attr -> extract.docValuesAttributes().contains(attr) == useDocValues && attr.dataType() == dataType) + ); + return source(extract.child()); + } + + private static EsQueryExec assertChildIsExtractedAsBounds(UnaryExec parent, DataType dataType) { + var extract = as(parent.child(), FieldExtractExec.class); + assertTrue( + "Expect field attribute to be extracted as bounds", + extract.attributesToExtract().stream().allMatch(attr -> extract.boundAttributes().contains(attr) && attr.dataType() == dataType) ); return source(extract.child()); } @@ -6978,13 +7143,14 @@ private static QueryBuilder findQueryBuilder(BoolQueryBuilder booleanQuery, Stri } private void assertFieldExtractionWithDocValues(FieldExtractExec extract, DataType dataType, String... fieldNames) { + var docValuesAttributes = extract.docValuesAttributes(); extract.attributesToExtract().forEach(attr -> { String name = attr.name(); if (asList(fieldNames).contains(name)) { - assertThat("Expected field '" + name + "' to use doc-values", extract.hasDocValuesAttribute(attr), equalTo(true)); + assertThat("Expected field '" + name + "' to use doc-values", docValuesAttributes.contains(attr), equalTo(true)); assertThat("Expected field '" + name + "' to have data type " + dataType, attr.dataType(), equalTo(dataType)); } else { - assertThat("Expected field '" + name + "' to NOT use doc-values", extract.hasDocValuesAttribute(attr), equalTo(false)); + assertThat("Expected field '" + name + "' to NOT use doc-values", docValuesAttributes.contains(attr), equalTo(false)); } }); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java index e91fc6e49312d..78512636b57e9 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java @@ -86,7 +86,10 @@ public PhysicalOperation fieldExtractPhysicalOperation(FieldExtractExec fieldExt for (Attribute attr : fieldExtractExec.attributesToExtract()) { layout.append(attr); op = op.with( - new TestFieldExtractOperatorFactory(attr, PlannerUtils.extractPreference(fieldExtractExec.hasDocValuesAttribute(attr))), + new TestFieldExtractOperatorFactory( + attr, + PlannerUtils.extractPreference(fieldExtractExec.docValuesAttributes().contains(attr)) + ), layout.build() ); } diff --git a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java index 23505eda493af..224abd2002455 100644 --- a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java +++ b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java @@ -208,7 +208,6 @@ public GeoShapeWithDocValuesFieldMapper build(MapperBuilderContext context) { } public static final class GeoShapeWithDocValuesFieldType extends AbstractShapeGeometryFieldType implements GeoShapeQueryable { - private final GeoFormatterFactory geoFormatterFactory; private final FieldValues scriptValues; @@ -298,6 +297,17 @@ public List parseStoredValues(List storedValues) { protected Function, List> getFormatter(String format) { return geoFormatterFactory.getFormatter(format, Function.identity()); } + + @Override + protected boolean isBoundsExtractionSupported() { + // Extracting bounds for geo shapes is not implemented yet. + return false; + } + + @Override + protected CoordinateEncoder coordinateEncoder() { + return CoordinateEncoder.GEO; + } } public static class TypeParser implements Mapper.TypeParser { diff --git a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/ShapeFieldMapper.java b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/ShapeFieldMapper.java index e5d5354327f5a..2d586ac8eb86a 100644 --- a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/ShapeFieldMapper.java +++ b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/ShapeFieldMapper.java @@ -184,6 +184,16 @@ public String typeName() { protected Function, List> getFormatter(String format) { return GeometryFormatterFactory.getFormatter(format, Function.identity()); } + + @Override + protected boolean isBoundsExtractionSupported() { + return true; + } + + @Override + protected CoordinateEncoder coordinateEncoder() { + return CoordinateEncoder.CARTESIAN; + } } private final Builder builder; From d0c329f7d2bbf6bdc6c6e4cd0cddfa71eea419f2 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Tue, 17 Dec 2024 14:26:53 +0200 Subject: [PATCH 03/18] Update docs/changelog/118802.yaml --- docs/changelog/118802.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/118802.yaml diff --git a/docs/changelog/118802.yaml b/docs/changelog/118802.yaml new file mode 100644 index 0000000000000..f0309e5af9548 --- /dev/null +++ b/docs/changelog/118802.yaml @@ -0,0 +1,5 @@ +pr: 118802 +summary: ST_EXTENT_AGG binary extent optimization +area: "ES|QL, Geo" +type: enhancement +issues: [] From 905521907ad478199af6a4251686f92e1c6ef9a7 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Tue, 17 Dec 2024 14:28:59 +0200 Subject: [PATCH 04/18] Manually modify changelog --- docs/changelog/118802.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog/118802.yaml b/docs/changelog/118802.yaml index f0309e5af9548..52e1d0ba2a09a 100644 --- a/docs/changelog/118802.yaml +++ b/docs/changelog/118802.yaml @@ -1,5 +1,5 @@ pr: 118802 summary: ST_EXTENT_AGG binary extent optimization -area: "ES|QL, Geo" +area: "ES|QL" type: enhancement issues: [] From e61f7a83df05ba0b82da0e4b2eea98ed6df606ce Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Tue, 17 Dec 2024 14:32:17 +0200 Subject: [PATCH 05/18] Fix TODO --- .../index/mapper/AbstractShapeGeometryFieldMapperTests.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java index 8ca412405a062..9a4cbcebd9d27 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java @@ -36,7 +36,6 @@ import java.util.stream.IntStream; public class AbstractShapeGeometryFieldMapperTests extends ESTestCase { - // TODO handle geo as well, this is actually bugged, since extracting the result ignores minneg etc. public void testCartesianBoundsBlockLoader() throws IOException { testBoundsBlockLoaderAux( CoordinateEncoder.CARTESIAN, @@ -46,7 +45,7 @@ public void testCartesianBoundsBlockLoader() throws IOException { ); } - // TODO when we turn this optimization on for geo, handle this as well. + // TODO when we turn this optimization on for geo, this test should pass. public void ignoreTestGeoBoundsBlockLoader() throws IOException { testBoundsBlockLoaderAux( CoordinateEncoder.GEO, From 753c8d701befe8cacf6d1bdc28c1f11528d43604 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Tue, 17 Dec 2024 14:37:33 +0200 Subject: [PATCH 06/18] Small refactors --- .../index/mapper/AbstractShapeGeometryFieldMapperTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java index 9a4cbcebd9d27..ea75ff9df94a6 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java @@ -40,7 +40,7 @@ public void testCartesianBoundsBlockLoader() throws IOException { testBoundsBlockLoaderAux( CoordinateEncoder.CARTESIAN, () -> ShapeTestUtils.randomGeometryWithoutCircle(0, false), - field -> new CartesianShapeIndexer(field), + CartesianShapeIndexer::new, SpatialEnvelopeVisitor::visitCartesian ); } From f736839d6ff0b7fd442dc958b3144e95070b7795 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Wed, 4 Dec 2024 17:09:50 +0200 Subject: [PATCH 07/18] ESQL: ST_EXTENT_AGG binary extent optimization --- .../mapper/LegacyGeoShapeFieldMapper.java | 13 +- .../AbstractShapeGeometryFieldMapper.java | 82 +++++++ .../index/mapper/MappedFieldType.java | 20 +- ...AbstractShapeGeometryFieldMapperTests.java | 93 ++++++++ .../index/mapper/TextFieldMapperTests.java | 2 +- .../index/mapper/MapperTestCase.java | 19 +- .../test/hamcrest}/RectangleMatcher.java | 32 ++- .../WellKnownBinaryBytesRefMatcher.java | 14 +- .../aggregation/spatial/PointType.java | 63 ++---- .../spatial/SpatialExtentGroupingState.java | 16 +- ...entGroupingStateWrappedLongitudeState.java | 4 +- .../spatial/SpatialExtentState.java | 12 +- ...atialExtentStateWrappedLongitudeState.java | 4 +- .../xpack/esql/EsqlTestUtils.java | 2 +- .../function/aggregate/SpatialCentroid.java | 4 +- .../function/aggregate/SpatialExtent.java | 4 +- .../optimizer/LocalPhysicalPlanOptimizer.java | 9 +- .../local/SpatialShapeBoundExtraction.java | 100 +++++++++ .../esql/plan/physical/FieldExtractExec.java | 52 +++-- .../aggregate/SpatialExtentTests.java | 19 +- .../optimizer/PhysicalPlanOptimizerTests.java | 200 ++++++++++++++++-- .../TestPhysicalOperationProviders.java | 5 +- .../GeoShapeWithDocValuesFieldMapper.java | 12 +- .../index/mapper/ShapeFieldMapper.java | 10 + 24 files changed, 644 insertions(+), 147 deletions(-) create mode 100644 server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java rename {x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression => test/framework/src/main/java/org/elasticsearch/test/hamcrest}/RectangleMatcher.java (60%) rename {x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression => test/framework/src/main/java/org/elasticsearch/test/hamcrest}/WellKnownBinaryBytesRefMatcher.java (69%) create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundExtraction.java diff --git a/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java b/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java index 1616d2727bf8a..506918b12fe96 100644 --- a/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java +++ b/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java @@ -46,6 +46,7 @@ import org.elasticsearch.legacygeo.builders.ShapeBuilder; import org.elasticsearch.legacygeo.parsers.ShapeParser; import org.elasticsearch.legacygeo.query.LegacyGeoShapeQueryProcessor; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; import org.locationtech.spatial4j.shape.Point; @@ -401,7 +402,6 @@ public void parse( } public static final class GeoShapeFieldType extends AbstractShapeGeometryFieldType> implements GeoShapeQueryable { - private String tree = Defaults.TREE; private SpatialStrategy strategy = Defaults.STRATEGY; private boolean pointsOnly = Defaults.POINTS_ONLY; @@ -530,6 +530,17 @@ public PrefixTreeStrategy resolvePrefixTreeStrategy(String strategyName) { protected Function>, List> getFormatter(String format) { return GeometryFormatterFactory.getFormatter(format, ShapeBuilder::buildGeometry); } + + @Override + protected boolean isBoundsExtractionSupported() { + // Extracting bounds for geo shapes is not implemented yet. + return false; + } + + @Override + protected CoordinateEncoder coordinateEncoder() { + return CoordinateEncoder.GEO; + } } private final IndexVersion indexCreatedVersion; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java index 02a3ae11524e3..4b0542f7f7b03 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java @@ -8,9 +8,18 @@ */ package org.elasticsearch.index.mapper; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.geo.Orientation; +import org.elasticsearch.geometry.Rectangle; +import org.elasticsearch.geometry.utils.WellKnownBinary; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; +import org.elasticsearch.lucene.spatial.GeometryDocValueReader; +import java.io.IOException; +import java.nio.ByteOrder; import java.util.Map; import java.util.function.Function; @@ -69,6 +78,79 @@ protected Object nullValueAsSource(T nullValue) { // we don't support null value fors shapes return nullValue; } + + @Override + public BlockLoader blockLoader(BlockLoaderContext blContext) { + return blContext.fieldExtractPreference() == FieldExtractPreference.EXTRACT_SPATIAL_BOUNDS && isBoundsExtractionSupported() + ? new BoundsBlockLoader(name(), coordinateEncoder()) + : blockLoaderFromSource(blContext); + } + + protected abstract boolean isBoundsExtractionSupported(); + + protected abstract CoordinateEncoder coordinateEncoder(); + + // Visible for testing + static class BoundsBlockLoader extends BlockDocValuesReader.DocValuesBlockLoader { + private final String fieldName; + private final CoordinateEncoder encoder; + + BoundsBlockLoader(String fieldName, CoordinateEncoder encoder) { + this.fieldName = fieldName; + this.encoder = encoder; + } + + @Override + public BlockLoader.AllReader reader(LeafReaderContext context) throws IOException { + return new BlockLoader.AllReader() { + @Override + public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs) throws IOException { + var binaryDocValues = context.reader().getBinaryDocValues(fieldName); + var reader = new GeometryDocValueReader(); + try (var builder = factory.bytesRefs(docs.count())) { + for (int i = 0; i < docs.count(); i++) { + read(binaryDocValues, docs.get(i), reader, builder); + } + return builder.build(); + } + } + + @Override + public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException { + var binaryDocValues = context.reader().getBinaryDocValues(fieldName); + var reader = new GeometryDocValueReader(); + read(binaryDocValues, docId, reader, (BytesRefBuilder) builder); + } + + private void read(BinaryDocValues binaryDocValues, int doc, GeometryDocValueReader reader, BytesRefBuilder builder) + throws IOException { + binaryDocValues.advanceExact(doc); + reader.reset(binaryDocValues.binaryValue()); + var extent = reader.getExtent(); + // This is rather silly: an extent is already encoded as ints, but we convert it to Rectangle to + // preserve its properties as a WKB shape, only to convert it back to ints when we compute the + // aggregation. An obvious optimization would be to avoid this back-and-forth conversion. + var rectangle = new Rectangle( + encoder.decodeX(extent.minX()), + encoder.decodeX(extent.maxX()), + encoder.decodeY(extent.maxY()), + encoder.decodeY(extent.minY()) + ); + builder.appendBytesRef(new BytesRef(WellKnownBinary.toWKB(rectangle, ByteOrder.LITTLE_ENDIAN))); + } + + @Override + public boolean canReuse(int startingDocID) { + return true; + } + }; + } + + @Override + public BlockLoader.Builder builder(BlockLoader.BlockFactory factory, int expectedCount) { + return factory.bytesRefs(expectedCount); + } + } } protected Explicit coerce; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 35722be20b9be..20d23ab97ac26 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -676,11 +676,27 @@ public enum FieldExtractPreference { /** * Load the field from doc-values into a BlockLoader supporting doc-values. */ - DOC_VALUES, + DOC_VALUES(true), + /** Loads the field by extracting the extent from the binary encoded representation */ + EXTRACT_SPATIAL_BOUNDS(false), /** * No preference. Leave the choice of where to load the field from up to the FieldType. */ - NONE + NONE(false); + + private final boolean isColumnReader; + + FieldExtractPreference(boolean isColumnReader) { + this.isColumnReader = isColumnReader; + } + + public static FieldExtractPreference forColumnReader(boolean columnReader) { + return columnReader ? DOC_VALUES : NONE; + } + + public boolean isColumnReader() { + return isColumnReader; + } } /** diff --git a/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java new file mode 100644 index 0000000000000..8ca412405a062 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java @@ -0,0 +1,93 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.geo.Orientation; +import org.elasticsearch.geo.GeometryTestUtils; +import org.elasticsearch.geo.ShapeTestUtils; +import org.elasticsearch.geometry.Geometry; +import org.elasticsearch.geometry.Rectangle; +import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor; +import org.elasticsearch.lucene.spatial.BinaryShapeDocValuesField; +import org.elasticsearch.lucene.spatial.CartesianShapeIndexer; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.hamcrest.RectangleMatcher; +import org.elasticsearch.test.hamcrest.WellKnownBinaryBytesRefMatcher; + +import java.io.IOException; +import java.util.Optional; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.IntStream; + +public class AbstractShapeGeometryFieldMapperTests extends ESTestCase { + // TODO handle geo as well, this is actually bugged, since extracting the result ignores minneg etc. + public void testCartesianBoundsBlockLoader() throws IOException { + testBoundsBlockLoaderAux( + CoordinateEncoder.CARTESIAN, + () -> ShapeTestUtils.randomGeometryWithoutCircle(0, false), + field -> new CartesianShapeIndexer(field), + SpatialEnvelopeVisitor::visitCartesian + ); + } + + // TODO when we turn this optimization on for geo, handle this as well. + public void ignoreTestGeoBoundsBlockLoader() throws IOException { + testBoundsBlockLoaderAux( + CoordinateEncoder.GEO, + () -> GeometryTestUtils.randomGeometryWithoutCircle(0, false), + field -> new GeoShapeIndexer(Orientation.RIGHT, field), + g -> SpatialEnvelopeVisitor.visitGeo(g, SpatialEnvelopeVisitor.WrapLongitude.WRAP) + ); + } + + private void testBoundsBlockLoaderAux( + CoordinateEncoder encoder, + Supplier generator, + Function indexerFactory, + Function> visitor + ) throws IOException { + var geometries = IntStream.range(0, 20).mapToObj(i -> ShapeTestUtils.randomGeometryWithoutCircle(0, false)).toList(); + var loader = new AbstractShapeGeometryFieldMapper.AbstractShapeGeometryFieldType.BoundsBlockLoader("field", encoder); + try (Directory directory = newDirectory()) { + try (var iw = new RandomIndexWriter(random(), directory)) { + for (Geometry geometry : geometries) { + var shape = new BinaryShapeDocValuesField("field", encoder); + shape.add(indexerFactory.apply("field").indexShape(geometry), geometry); + var doc = new Document(); + doc.add(shape); + iw.addDocument(doc); + } + } + var indices = IntStream.range(0, geometries.size() / 2).map(x -> x * 2).toArray(); + try (DirectoryReader reader = DirectoryReader.open(directory)) { + LeafReaderContext ctx = reader.leaves().get(0); + TestBlock block = (TestBlock) loader.reader(ctx).read(TestBlock.factory(ctx.reader().numDocs()), TestBlock.docs(indices)); + for (int i = 0; i < indices.length; i++) { + var idx = indices[i]; + Rectangle r = visitor.apply(geometries.get(idx)).get(); + assertThat( + Strings.format("geometries[%d] ('%s') wasn't extracted correctly", idx, geometries.get(idx)), + (BytesRef) block.get(i), + WellKnownBinaryBytesRefMatcher.encodes(RectangleMatcher.closeToFloat(r, 1e-3, encoder)) + ); + } + } + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index 32cbcfc2441a1..9675638b2b394 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -1355,6 +1355,6 @@ private void testBlockLoaderFromParent(boolean columnReader, boolean syntheticSo MapperService mapper = syntheticSource ? createSytheticSourceMapperService(mapping) : createMapperService(mapping); BlockReaderSupport blockReaderSupport = getSupportedReaders(mapper, "field.sub"); var sourceLoader = mapper.mappingLookup().newSourceLoader(null, SourceFieldMetrics.NOOP); - testBlockLoader(columnReader, example, blockReaderSupport, sourceLoader); + testBlockLoader(MappedFieldType.FieldExtractPreference.forColumnReader(columnReader), example, blockReaderSupport, sourceLoader); } } diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java index 2da2c5a08c177..f6b2420ce0b03 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java @@ -51,6 +51,7 @@ import org.elasticsearch.index.fielddata.LeafFieldData; import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; +import org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.termvectors.TermVectorsService; import org.elasticsearch.index.translog.Translog; @@ -87,8 +88,6 @@ import java.util.stream.IntStream; import static java.util.stream.Collectors.toList; -import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.DOC_VALUES; -import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.NONE; import static org.elasticsearch.test.MapMatcher.assertMap; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.contains; @@ -1420,7 +1419,7 @@ public BlockReaderSupport(boolean columnAtATimeReader, MapperService mapper, Str this(columnAtATimeReader, true, mapper, loaderFieldName); } - private BlockLoader getBlockLoader(boolean columnReader) { + private BlockLoader getBlockLoader(FieldExtractPreference fieldExtractPreference) { SearchLookup searchLookup = new SearchLookup(mapper.mappingLookup().fieldTypesLookup()::get, null, null); return mapper.fieldType(loaderFieldName).blockLoader(new MappedFieldType.BlockLoaderContext() { @Override @@ -1434,8 +1433,8 @@ public IndexSettings indexSettings() { } @Override - public MappedFieldType.FieldExtractPreference fieldExtractPreference() { - return columnReader ? DOC_VALUES : NONE; + public FieldExtractPreference fieldExtractPreference() { + return fieldExtractPreference; } @Override @@ -1484,16 +1483,20 @@ private void testBlockLoader(boolean syntheticSource, boolean columnReader) thro ); } var sourceLoader = mapper.mappingLookup().newSourceLoader(null, SourceFieldMetrics.NOOP); - testBlockLoader(columnReader, example, blockReaderSupport, sourceLoader); + testBlockLoader(FieldExtractPreference.forColumnReader(columnReader), example, blockReaderSupport, sourceLoader); } protected final void testBlockLoader( - boolean columnReader, + FieldExtractPreference fieldExtractPreference, SyntheticSourceExample example, BlockReaderSupport blockReaderSupport, SourceLoader sourceLoader ) throws IOException { - BlockLoader loader = blockReaderSupport.getBlockLoader(columnReader); + var columnReader = switch (fieldExtractPreference) { + case DOC_VALUES -> true; + case NONE, EXTRACT_SPATIAL_BOUNDS -> false; + }; + BlockLoader loader = blockReaderSupport.getBlockLoader(fieldExtractPreference); Function valuesConvert = loadBlockExpected(blockReaderSupport, columnReader); if (valuesConvert == null) { assertNull(loader); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/RectangleMatcher.java b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/RectangleMatcher.java similarity index 60% rename from x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/RectangleMatcher.java rename to test/framework/src/main/java/org/elasticsearch/test/hamcrest/RectangleMatcher.java index 48fbc9c8e0378..2d55b439bd1b7 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/RectangleMatcher.java +++ b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/RectangleMatcher.java @@ -1,14 +1,16 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.xpack.esql.expression; +package org.elasticsearch.test.hamcrest; -import org.elasticsearch.compute.aggregation.spatial.PointType; import org.elasticsearch.geometry.Rectangle; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; import org.hamcrest.Description; import org.hamcrest.Matchers; import org.hamcrest.TypeSafeMatcher; @@ -19,23 +21,31 @@ */ public class RectangleMatcher extends TypeSafeMatcher { private final Rectangle r; - private final PointType pointType; + private final CoordinateEncoder coordinateEncoder; private final double error; - public static TypeSafeMatcher closeTo(Rectangle r, double error, PointType pointType) { - return new RectangleMatcher(r, error, pointType); + public static TypeSafeMatcher closeTo(Rectangle r, double error, CoordinateEncoder coordinateEncoder) { + return new RectangleMatcher(r, error, coordinateEncoder); } - private RectangleMatcher(Rectangle r, double error, PointType pointType) { + private RectangleMatcher(Rectangle r, double error, CoordinateEncoder coordinateEncoder) { this.r = r; - this.pointType = pointType; + this.coordinateEncoder = coordinateEncoder; this.error = error; } + /** + * Casts the rectangle coordinates to floats before comparing. Useful when working with extents which hold the coordinate data as ints. + */ + public static TypeSafeMatcher closeToFloat(Rectangle r, double v, CoordinateEncoder encoder) { + var normalized = new Rectangle((float) r.getMinX(), (float) r.getMaxX(), (float) r.getMaxY(), (float) r.getMinY()); + return closeTo(normalized, v, encoder); + } + @Override protected boolean matchesSafely(Rectangle other) { // For geo bounds, longitude of (-180, 180) and (epsilon, -epsilon) are actually very close, since both encompass the entire globe. - boolean wrapAroundWorkAround = pointType == PointType.GEO && r.getMinX() >= r.getMaxX(); + boolean wrapAroundWorkAround = coordinateEncoder == CoordinateEncoder.GEO && r.getMinX() >= r.getMaxX(); boolean matchMinX = Matchers.closeTo(r.getMinX(), error).matches(other.getMinX()) || (wrapAroundWorkAround && Matchers.closeTo(r.getMinX() - 180, error).matches(other.getMinX())) || (wrapAroundWorkAround && Matchers.closeTo(r.getMinX(), error).matches(other.getMinX() - 180)); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/WellKnownBinaryBytesRefMatcher.java b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/WellKnownBinaryBytesRefMatcher.java similarity index 69% rename from x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/WellKnownBinaryBytesRefMatcher.java rename to test/framework/src/main/java/org/elasticsearch/test/hamcrest/WellKnownBinaryBytesRefMatcher.java index 535bb820458cd..809f2862c208c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/WellKnownBinaryBytesRefMatcher.java +++ b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/WellKnownBinaryBytesRefMatcher.java @@ -1,11 +1,13 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.xpack.esql.expression; +package org.elasticsearch.test.hamcrest; import org.apache.lucene.util.BytesRef; import org.elasticsearch.geometry.Geometry; @@ -23,6 +25,10 @@ public WellKnownBinaryBytesRefMatcher(Matcher matcher) { this.matcher = matcher; } + public static Matcher encodes(TypeSafeMatcher matcher) { + return new WellKnownBinaryBytesRefMatcher(matcher); + } + @Override public boolean matchesSafely(BytesRef bytesRef) { return matcher.matches(fromBytesRef(bytesRef)); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/PointType.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/PointType.java index 5395ca0b85163..fb45f869c4133 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/PointType.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/PointType.java @@ -7,12 +7,11 @@ package org.elasticsearch.compute.aggregation.spatial; -import org.apache.lucene.geo.GeoEncodingUtils; -import org.apache.lucene.geo.XYEncodingUtils; import org.elasticsearch.geometry.Geometry; import org.elasticsearch.geometry.Rectangle; import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor; import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor.WrapLongitude; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; import java.util.Optional; @@ -23,26 +22,6 @@ public Optional computeEnvelope(Geometry geo) { return SpatialEnvelopeVisitor.visitGeo(geo, WrapLongitude.WRAP); } - @Override - public double decodeX(int encoded) { - return GeoEncodingUtils.decodeLongitude(encoded); - } - - @Override - public double decodeY(int encoded) { - return GeoEncodingUtils.decodeLatitude(encoded); - } - - @Override - public int encodeX(double decoded) { - return GeoEncodingUtils.encodeLongitude(decoded); - } - - @Override - public int encodeY(double decoded) { - return GeoEncodingUtils.encodeLatitude(decoded); - } - // Geo encodes the longitude in the lower 32 bits and the latitude in the upper 32 bits. @Override public int extractX(long encoded) { @@ -53,6 +32,11 @@ public int extractX(long encoded) { public int extractY(long encoded) { return SpatialAggregationUtils.extractFirst(encoded); } + + @Override + public CoordinateEncoder encoder() { + return CoordinateEncoder.GEO; + } }, CARTESIAN { @Override @@ -60,26 +44,6 @@ public Optional computeEnvelope(Geometry geo) { return SpatialEnvelopeVisitor.visitCartesian(geo); } - @Override - public double decodeX(int encoded) { - return XYEncodingUtils.decode(encoded); - } - - @Override - public double decodeY(int encoded) { - return XYEncodingUtils.decode(encoded); - } - - @Override - public int encodeX(double decoded) { - return XYEncodingUtils.encode((float) decoded); - } - - @Override - public int encodeY(double decoded) { - return XYEncodingUtils.encode((float) decoded); - } - @Override public int extractX(long encoded) { return SpatialAggregationUtils.extractFirst(encoded); @@ -89,19 +53,18 @@ public int extractX(long encoded) { public int extractY(long encoded) { return SpatialAggregationUtils.extractSecond(encoded); } + + @Override + public CoordinateEncoder encoder() { + return CoordinateEncoder.CARTESIAN; + } }; public abstract Optional computeEnvelope(Geometry geo); - public abstract double decodeX(int encoded); - - public abstract double decodeY(int encoded); - - public abstract int encodeX(double decoded); - - public abstract int encodeY(double decoded); - public abstract int extractX(long encoded); public abstract int extractY(long encoded); + + public abstract CoordinateEncoder encoder(); } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingState.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingState.java index 9ce0ccdda0ff5..cb765e4d6757e 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingState.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingState.java @@ -72,10 +72,10 @@ public void add(int groupId, Geometry geometry) { .ifPresent( r -> add( groupId, - pointType.encodeX(r.getMinX()), - pointType.encodeX(r.getMaxX()), - pointType.encodeY(r.getMaxY()), - pointType.encodeY(r.getMinY()) + pointType.encoder().encodeX(r.getMinX()), + pointType.encoder().encodeX(r.getMaxX()), + pointType.encoder().encodeY(r.getMaxY()), + pointType.encoder().encodeY(r.getMinY()) ) ); } @@ -122,10 +122,10 @@ public Block toBlock(IntVector selected, DriverContext driverContext) { new BytesRef( WellKnownBinary.toWKB( new Rectangle( - pointType.decodeX(minXs.get(si)), - pointType.decodeX(maxXs.get(si)), - pointType.decodeY(maxYs.get(si)), - pointType.decodeY(minYs.get(si)) + pointType.encoder().decodeX(minXs.get(si)), + pointType.encoder().decodeX(maxXs.get(si)), + pointType.encoder().decodeY(maxYs.get(si)), + pointType.encoder().decodeY(minYs.get(si)) ), ByteOrder.LITTLE_ENDIAN ) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingStateWrappedLongitudeState.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingStateWrappedLongitudeState.java index 3dd7a6d4acde2..41bc50abcf6bc 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingStateWrappedLongitudeState.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingStateWrappedLongitudeState.java @@ -91,8 +91,8 @@ public void add(int groupId, Geometry geo) { SpatialAggregationUtils.encodePositiveLongitude(geoPointVisitor.getMinPosX()), SpatialAggregationUtils.encodeNegativeLongitude(geoPointVisitor.getMaxNegX()), SpatialAggregationUtils.encodePositiveLongitude(geoPointVisitor.getMaxPosX()), - POINT_TYPE.encodeY(geoPointVisitor.getMaxY()), - POINT_TYPE.encodeY(geoPointVisitor.getMinY()) + POINT_TYPE.encoder().encodeY(geoPointVisitor.getMaxY()), + POINT_TYPE.encoder().encodeY(geoPointVisitor.getMinY()) ); } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentState.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentState.java index 0eea9b79f73ea..3dc150d1702a2 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentState.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentState.java @@ -14,6 +14,7 @@ import org.elasticsearch.geometry.Geometry; import org.elasticsearch.geometry.Rectangle; import org.elasticsearch.geometry.utils.WellKnownBinary; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; import java.nio.ByteOrder; @@ -46,10 +47,10 @@ public void add(Geometry geo) { pointType.computeEnvelope(geo) .ifPresent( r -> add( - pointType.encodeX(r.getMinX()), - pointType.encodeX(r.getMaxX()), - pointType.encodeY(r.getMaxY()), - pointType.encodeY(r.getMinY()) + pointType.encoder().encodeX(r.getMinX()), + pointType.encoder().encodeX(r.getMaxX()), + pointType.encoder().encodeY(r.getMaxY()), + pointType.encoder().encodeY(r.getMinY()) ) ); } @@ -74,8 +75,9 @@ public Block toBlock(DriverContext driverContext) { } private byte[] toWKB() { + CoordinateEncoder encoder = pointType.encoder(); return WellKnownBinary.toWKB( - new Rectangle(pointType.decodeX(minX), pointType.decodeX(maxX), pointType.decodeY(maxY), pointType.decodeY(minY)), + new Rectangle(encoder.decodeX(minX), encoder.decodeX(maxX), encoder.decodeY(maxY), encoder.decodeY(minY)), ByteOrder.LITTLE_ENDIAN ); } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentStateWrappedLongitudeState.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentStateWrappedLongitudeState.java index 99200d2ed99f5..0d6163636fcde 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentStateWrappedLongitudeState.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentStateWrappedLongitudeState.java @@ -53,8 +53,8 @@ public void add(Geometry geo) { SpatialAggregationUtils.encodePositiveLongitude(geoPointVisitor.getMinPosX()), SpatialAggregationUtils.encodeNegativeLongitude(geoPointVisitor.getMaxNegX()), SpatialAggregationUtils.encodePositiveLongitude(geoPointVisitor.getMaxPosX()), - POINT_TYPE.encodeY(geoPointVisitor.getMaxY()), - POINT_TYPE.encodeY(geoPointVisitor.getMinY()) + POINT_TYPE.encoder().encodeY(geoPointVisitor.getMaxY()), + POINT_TYPE.encoder().encodeY(geoPointVisitor.getMinY()) ); } } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java index 18ce9d7e3e057..77dde5e875080 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java @@ -390,7 +390,7 @@ public static LogicalPlan localSource(BlockFactory blockFactory, List } public static T as(Object node, Class type) { - Assert.assertThat(node, instanceOf(type)); + Assert.assertThat("Unexpected type: " + node.getClass(), node, instanceOf(type)); return type.cast(node); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroid.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroid.java index 84915d024ea82..54c05cf1bad52 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroid.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroid.java @@ -103,11 +103,11 @@ public AggregatorFunctionSupplier supplier(List inputChannels) { return switch (type) { case DataType.GEO_POINT -> switch (fieldExtractPreference) { case DOC_VALUES -> new SpatialCentroidGeoPointDocValuesAggregatorFunctionSupplier(inputChannels); - case NONE -> new SpatialCentroidGeoPointSourceValuesAggregatorFunctionSupplier(inputChannels); + case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialCentroidGeoPointSourceValuesAggregatorFunctionSupplier(inputChannels); }; case DataType.CARTESIAN_POINT -> switch (fieldExtractPreference) { case DOC_VALUES -> new SpatialCentroidCartesianPointDocValuesAggregatorFunctionSupplier(inputChannels); - case NONE -> new SpatialCentroidCartesianPointSourceValuesAggregatorFunctionSupplier(inputChannels); + case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialCentroidCartesianPointSourceValuesAggregatorFunctionSupplier(inputChannels); }; default -> throw EsqlIllegalArgumentException.illegalDataType(type); }; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtent.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtent.java index 5cc1701faf13a..34e5c9d68fc86 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtent.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtent.java @@ -104,11 +104,11 @@ public AggregatorFunctionSupplier supplier(List inputChannels) { return switch (field().dataType()) { case DataType.GEO_POINT -> switch (fieldExtractPreference) { case DOC_VALUES -> new SpatialExtentGeoPointDocValuesAggregatorFunctionSupplier(inputChannels); - case NONE -> new SpatialExtentGeoPointSourceValuesAggregatorFunctionSupplier(inputChannels); + case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialExtentGeoPointSourceValuesAggregatorFunctionSupplier(inputChannels); }; case DataType.CARTESIAN_POINT -> switch (fieldExtractPreference) { case DOC_VALUES -> new SpatialExtentCartesianPointDocValuesAggregatorFunctionSupplier(inputChannels); - case NONE -> new SpatialExtentCartesianPointSourceValuesAggregatorFunctionSupplier(inputChannels); + case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialExtentCartesianPointSourceValuesAggregatorFunctionSupplier(inputChannels); }; // Shapes don't differentiate between source and doc values. case DataType.GEO_SHAPE -> new SpatialExtentGeoShapeAggregatorFunctionSupplier(inputChannels); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java index 1eaade043658b..eb148952e0a26 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java @@ -17,6 +17,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.PushTopNToSource; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.ReplaceSourceAttributes; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.SpatialDocValuesExtraction; +import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.SpatialShapeBoundExtraction; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.esql.rule.ParameterizedRuleExecutor; import org.elasticsearch.xpack.esql.rule.Rule; @@ -73,7 +74,13 @@ protected List> rules(boolean optimizeForEsSource) { var pushdown = new Batch("Push to ES", esSourceRules.toArray(Rule[]::new)); // add the field extraction in just one pass // add it at the end after all the other rules have ran - var fieldExtraction = new Batch<>("Field extraction", Limiter.ONCE, new InsertFieldExtraction(), new SpatialDocValuesExtraction()); + var fieldExtraction = new Batch<>( + "Field extraction", + Limiter.ONCE, + new InsertFieldExtraction(), + new SpatialDocValuesExtraction(), + new SpatialShapeBoundExtraction() + ); return asList(pushdown, fieldExtraction); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundExtraction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundExtraction.java new file mode 100644 index 0000000000000..6949602aed0a4 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundExtraction.java @@ -0,0 +1,100 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.optimizer.rules.physical.local; + +import org.elasticsearch.lucene.spatial.GeometryDocValueWriter; +import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.type.EsField; +import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; +import org.elasticsearch.xpack.esql.expression.function.aggregate.SpatialExtent; +import org.elasticsearch.xpack.esql.optimizer.LocalPhysicalOptimizerContext; +import org.elasticsearch.xpack.esql.optimizer.PhysicalOptimizerRules.ParameterizedOptimizerRule; +import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; +import org.elasticsearch.xpack.esql.plan.physical.EvalExec; +import org.elasticsearch.xpack.esql.plan.physical.FieldExtractExec; +import org.elasticsearch.xpack.esql.plan.physical.FilterExec; +import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; +import org.elasticsearch.xpack.esql.plan.physical.UnaryExec; + +import java.util.HashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * This rule is responsible for marking spatial shape fields whose extent can be extracted from the binary representation encoded by + * {@link GeometryDocValueWriter}. + * This is a very specific optimization that is only used in the context of ST_EXTENT_AGG aggregations. + * Normally spatial fields are extracted from source values because this maintains original precision, but is very slow. + * Simply extracting the spatial bounds from the binary encoding loses both precision and geometry topological information for shapes. + * For this reason we only consider extract the extent under very specific conditions: + *
    + *
  • The spatial data is of type GEO_SHAPE or CARTESIAN_SHAPE.
  • + *
  • The spatial data is consumed directly by an ST_EXTENT_AGG.
  • + *
  • The spatial is not consumed by any other operation. While is this is stricter than necessary, + * it is a good enough approximation for now.
  • + *
+ */ +public class SpatialShapeBoundExtraction extends ParameterizedOptimizerRule { + @Override + protected PhysicalPlan rule(AggregateExec aggregate, LocalPhysicalOptimizerContext ctx) { + var foundAttributes = new HashSet(); + + return aggregate.transformDown(UnaryExec.class, exec -> { + switch (exec) { + case AggregateExec agg -> { + List aggregateFunctions = agg.aggregates() + .stream() + .flatMap(e -> SpatialShapeBoundExtraction.extractAggregateFunction(e).stream()) + .toList(); + List spatialExtents = aggregateFunctions.stream() + .filter(SpatialExtent.class::isInstance) + .map(SpatialExtent.class::cast) + .toList(); + List nonSpatialExtents = aggregateFunctions.stream() + .filter(a -> a instanceof SpatialExtent == false) + .toList(); + // While we currently do not have any non-extent aggregations which apply to shapes, we might have them in the future. + Set fieldsAppearingInNonSpatialExtents = nonSpatialExtents.stream() + .flatMap(af -> af.references().stream()) + .filter(FieldAttribute.class::isInstance) + .map(f -> ((FieldAttribute) f).field()) + .collect(Collectors.toSet()); + spatialExtents.stream() + .map(SpatialExtent::field) + .filter(FieldAttribute.class::isInstance) + .map(FieldAttribute.class::cast) + .filter(f -> isShape(f.field().getDataType()) && fieldsAppearingInNonSpatialExtents.contains(f.field()) == false) + .forEach(foundAttributes::add); + } + case EvalExec evalExec -> foundAttributes.removeAll(evalExec.references()); + case FilterExec filterExec -> foundAttributes.removeAll(filterExec.condition().references()); + case FieldExtractExec fieldExtractExec -> { + foundAttributes.retainAll(fieldExtractExec.attributesToExtract()); + return fieldExtractExec.withBoundAttributes(foundAttributes); + } + default -> { // Do nothing + } + } + return exec; + }); + } + + private static boolean isShape(DataType dataType) { + return dataType == DataType.GEO_SHAPE || dataType == DataType.CARTESIAN_SHAPE; + } + + private static Optional extractAggregateFunction(NamedExpression expr) { + return expr instanceof Alias as && as.child() instanceof AggregateFunction af ? Optional.of(af) : Optional.empty(); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java index ec996c5c84064..0fddfb652afff 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java @@ -7,9 +7,11 @@ package org.elasticsearch.xpack.esql.plan.physical; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.Nullable; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeSet; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; @@ -31,9 +33,10 @@ public class FieldExtractExec extends UnaryExec implements EstimatesRowSize { ); private final List attributesToExtract; - private final Attribute sourceAttribute; + private final @Nullable Attribute sourceAttribute; + /** - * Attributes that many be extracted as doc values even if that makes them + * Attributes that may be extracted as doc values even if that makes them * less accurate. This is mostly used for geo fields which lose a lot of * precision in their doc values, but in some cases doc values provides * enough precision to do the job. @@ -43,17 +46,32 @@ public class FieldExtractExec extends UnaryExec implements EstimatesRowSize { */ private final Set docValuesAttributes; + /** + * Attributes of a shape whose extent can be extracted directly from the encoded geometry. + *

+ * This is never serialized between nodes and only used locally. + *

+ */ + private final Set boundAttributes; + private List lazyOutput; public FieldExtractExec(Source source, PhysicalPlan child, List attributesToExtract) { - this(source, child, attributesToExtract, Set.of()); + this(source, child, attributesToExtract, Set.of(), Set.of()); } - private FieldExtractExec(Source source, PhysicalPlan child, List attributesToExtract, Set docValuesAttributes) { + private FieldExtractExec( + Source source, + PhysicalPlan child, + List attributesToExtract, + Set docValuesAttributes, + Set boundAttributes + ) { super(source, child); this.attributesToExtract = attributesToExtract; this.sourceAttribute = extractSourceAttributesFrom(child); this.docValuesAttributes = docValuesAttributes; + this.boundAttributes = boundAttributes; } private FieldExtractExec(StreamInput in) throws IOException { @@ -78,7 +96,7 @@ public String getWriteableName() { return ENTRY.name; } - public static Attribute extractSourceAttributesFrom(PhysicalPlan plan) { + public static @Nullable Attribute extractSourceAttributesFrom(PhysicalPlan plan) { for (Attribute attribute : plan.outputSet()) { if (EsQueryExec.isSourceAttribute(attribute)) { return attribute; @@ -99,18 +117,22 @@ protected NodeInfo info() { @Override public UnaryExec replaceChild(PhysicalPlan newChild) { - return new FieldExtractExec(source(), newChild, attributesToExtract, docValuesAttributes); + return new FieldExtractExec(source(), newChild, attributesToExtract, docValuesAttributes, boundAttributes); } public FieldExtractExec withDocValuesAttributes(Set docValuesAttributes) { - return new FieldExtractExec(source(), child(), attributesToExtract, docValuesAttributes); + return new FieldExtractExec(source(), child(), attributesToExtract, docValuesAttributes, boundAttributes); + } + + public FieldExtractExec withBoundAttributes(Set boundAttributes) { + return new FieldExtractExec(source(), child(), attributesToExtract, docValuesAttributes, boundAttributes); } public List attributesToExtract() { return attributesToExtract; } - public Attribute sourceAttribute() { + public @Nullable Attribute sourceAttribute() { return sourceAttribute; } @@ -118,8 +140,8 @@ public Set docValuesAttributes() { return docValuesAttributes; } - public boolean hasDocValuesAttribute(Attribute attr) { - return docValuesAttributes.contains(attr); + public Set boundAttributes() { + return boundAttributes; } @Override @@ -142,7 +164,7 @@ public PhysicalPlan estimateRowSize(State state) { @Override public int hashCode() { - return Objects.hash(attributesToExtract, docValuesAttributes, child()); + return Objects.hash(attributesToExtract, docValuesAttributes, boundAttributes, child()); } @Override @@ -158,12 +180,18 @@ public boolean equals(Object obj) { FieldExtractExec other = (FieldExtractExec) obj; return Objects.equals(attributesToExtract, other.attributesToExtract) && Objects.equals(docValuesAttributes, other.docValuesAttributes) + && Objects.equals(boundAttributes, other.boundAttributes) && Objects.equals(child(), other.child()); } @Override public String nodeString() { - return nodeName() + NodeUtils.limitedToString(attributesToExtract) + "<" + NodeUtils.limitedToString(docValuesAttributes) + ">"; + return Strings.format( + "%s<%s,%s>", + nodeName() + NodeUtils.limitedToString(attributesToExtract), + docValuesAttributes, + boundAttributes + ); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtentTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtentTests.java index a1faa537ba052..225e10f99c853 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtentTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtentTests.java @@ -17,11 +17,11 @@ import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor; import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor.WrapLongitude; import org.elasticsearch.geometry.utils.WellKnownBinary; +import org.elasticsearch.test.hamcrest.RectangleMatcher; +import org.elasticsearch.test.hamcrest.WellKnownBinaryBytesRefMatcher; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.expression.RectangleMatcher; -import org.elasticsearch.xpack.esql.expression.WellKnownBinaryBytesRefMatcher; import org.elasticsearch.xpack.esql.expression.function.AbstractAggregationTestCase; import org.elasticsearch.xpack.esql.expression.function.FunctionName; import org.elasticsearch.xpack.esql.expression.function.MultiRowTestCaseSupplier; @@ -82,20 +82,7 @@ private static TestCaseSupplier makeSupplier(TestCaseSupplier.TypedDataSupplier List.of(fieldTypedData), "SpatialExtent[field=Attribute[channel=0]]", expectedType, - new WellKnownBinaryBytesRefMatcher<>( - RectangleMatcher.closeTo( - new Rectangle( - // Since we use integers locally which are later decoded to doubles, all computation is effectively done using - // floats, not doubles. - (float) result.getMinX(), - (float) result.getMaxX(), - (float) result.getMaxY(), - (float) result.getMinY() - ), - 1e-3, - pointType - ) - ) + new WellKnownBinaryBytesRefMatcher<>(RectangleMatcher.closeToFloat(result, 1e-3, pointType.encoder())) ); }); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java index 964dd4642d7c2..5d1b36b756bac 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java @@ -164,12 +164,14 @@ import static org.elasticsearch.xpack.esql.core.expression.function.scalar.FunctionTestUtils.l; import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT; import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; +import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_SHAPE; import static org.elasticsearch.xpack.esql.parser.ExpressionBuilder.MAX_EXPRESSION_DEPTH; import static org.elasticsearch.xpack.esql.parser.LogicalPlanBuilder.MAX_QUERY_DEPTH; import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.hasItem; @@ -199,6 +201,7 @@ public class PhysicalPlanOptimizerTests extends ESTestCase { private TestDataSource testData; private int allFieldRowSize; // TODO: Move this into testDataSource so tests that load other indexes can also assert on this private TestDataSource airports; + private TestDataSource airportsCityBoundaries; private TestDataSource airportsNoDocValues; // Test when spatial field is indexed but has no doc values private TestDataSource airportsNotIndexed; // Test when spatial field has doc values but is not indexed private TestDataSource airportsNotIndexedNorDocValues; // Test when spatial field is neither indexed nor has doc-values @@ -250,6 +253,13 @@ public void init() { // Some tests use data from the airports and countries indexes, so we load that here, and use it in the plan(q, airports) function. this.airports = makeTestDataSource("airports", "mapping-airports.json", functionRegistry, enrichResolution); + this.airportsCityBoundaries = makeTestDataSource( + "airports_city_boundaries", + "mapping-airport_city_boundaries.json", + functionRegistry, + enrichResolution, + new TestConfigurableSearchStats().exclude(Config.DOC_VALUES, "city_boundary") + ); this.airportsNoDocValues = makeTestDataSource( "airports-no-doc-values", "mapping-airports_no_doc_values.json", @@ -2908,24 +2918,23 @@ public void testSpatialTypesAndStatsExtentUseDocValues() { * Before local optimizations: * * LimitExec[1000[INTEGER]] - * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, - * maxPosX{r}#55, maxY{r}#56, minY{r}#57],null] - * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] - * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[ - * Aggregate[STANDARD,[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent]] - * \_EsRelation[airports][abbrev{f}#44, city{f}#50, city_location{f}#51, coun..]]] + * \_AggregateExec[[],[SPATIALEXTENT(location{f}#12,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#12,true[BOOLEAN]) AS cen + * troid],FINAL,...] + * \_ExchangeExec[[...]] + * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[..]] + * \_EsRelation[airports-no-doc-values][abbrev{f}#8, city{f}#14, city_location{f}#15, count..]]] * * After local optimizations: * * LimitExec[1000[INTEGER]] - * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, - * maxPosX{r}#55, maxY{r}#56, minY{r}#57],21] - * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] - * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],INITIAL,[ - * minNegX{r}#73, minPosX{r}#74, maxNegX{rb#75, maxPosX{r}#76, maxY{r}#77, minY{r}#78],21] - * \_FieldExtractExec[location{f}#48][location{f}#48] - * \_EsQueryExec[airports], indexMode[standard], query[{"exists":{"field":"location","boost":1.0}}][ - * _doc{f}#79], limit[], sort[] estimatedRowSize[25] + * \_AggregateExec[[],[SPATIALEXTENT(location{f}#12,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#12,true[BOOLEAN]) AS cen + * troid],FINAL,[...]] + * \_ExchangeExec[[...]] + * \_AggregateExec[[],[SPATIALEXTENT(location{f}#12,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#12,true[BOOLEAN]) AS cen + * troid],INITIAL,...] + * \_FilterExec[ISNOTNULL(location{f}#12)] + * \_FieldExtractExec[location{f}#12] + * \_EsQueryExec[airports-no-doc-values], indexMode[standard], query[][_doc{f}#59], limit[], sort[] estimatedRowSize[25] * * Note the FieldExtractExec has 'location' set for stats: FieldExtractExec[location{f}#9][location{f}#9] *

@@ -2965,6 +2974,151 @@ public void testSpatialTypesAndStatsExtentAndCentroidUseDocValues() { } } + /** + * Before local optimizations: + * + * LimitExec[1000[INTEGER]] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, + * maxPosX{r}#55, maxY{r}#56, minY{r}#57],null] + * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] + * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[ + * Aggregate[STANDARD,[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent]] + * \_EsRelation[airports][abbrev{f}#44, city{f}#50, city_location{f}#51, coun..]]] + * + * After local optimizations: + * + * LimitExec[1000[INTEGER]] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, + * maxPosX{r}#55, maxY{r}#56, minY{r}#57],21] + * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],INITIAL,[ + * minNegX{r}#73, minPosX{r}#74, maxNegX{rb#75, maxPosX{r}#76, maxY{r}#77, minY{r}#78],21] + * \_FieldExtractExec[location{f}#48][location{f}#48] + * \_EsQueryExec[airports], indexMode[standard], query[{"exists":{"field":"location","boost":1.0}}][ + * _doc{f}#79], limit[], sort[] estimatedRowSize[25] + * + * Note the FieldExtractExec has 'location' set for stats: FieldExtractExec[location{f}#9][location{f}#9] + *

+ * Also note that the type converting function is removed when it does not actually convert the type, + * ensuring that ReferenceAttributes are not created for the same field, and the optimization can still work. + */ + public void testSpatialTypesAndStatsExtentOfShapesUsesBinaryExtraction() { + for (String query : new String[] { "from airports_city_boundaries | stats extent = st_extent_agg(city_boundary)", }) { + var withDocValues = false; + var testData = airportsCityBoundaries; + var plan = physicalPlan(query, testData); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + // Before optimization the aggregation does not use extent extraction + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, false); + + var exchange = as(agg.child(), ExchangeExec.class); + var fragment = as(exchange.child(), FragmentExec.class); + var fAgg = as(fragment.fragment(), Aggregate.class); + as(fAgg.child(), EsRelation.class); + + // Now optimize the plan and assert the aggregation uses extent extraction + var optimized = optimizedPlan(plan, testData.stats); + limit = as(optimized, LimitExec.class); + agg = as(limit.child(), AggregateExec.class); + // Above the exchange (in coordinator) the aggregation is not using doc-values + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + exchange = as(agg.child(), ExchangeExec.class); + agg = as(exchange.child(), AggregateExec.class); + // below the exchange (in data node) the aggregation is using a specific + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + assertChildIsExtractedAsBounds(agg, GEO_SHAPE); + } + } + + // This test verifies that the aggregation does not use spatial bounds extraction when the shape appears in an eval or filter. + public void testSpatialTypesAndStatsExtentOfShapesNegativeCases() { + for (String query : new String[] { """ + FROM airports_city_boundaries | \ + EVAL prefix = SUBSTRING(TO_STRING(city_boundary), 5) | \ + STATS extent = ST_EXTENT_AGG(city_boundary) BY prefix""", """ + FROM airports_city_boundaries \ + | WHERE STARTS_WITH(TO_STRING(city_boundary), "MULTIPOLYGON") \ + | STATS extent = ST_EXTENT_AGG(city_boundary)""", }) { + var withDocValues = false; + var testData = airportsCityBoundaries; + var plan = physicalPlan(query, testData); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, false); + + var optimized = optimizedPlan(plan, testData.stats); + limit = as(optimized, LimitExec.class); + agg = as(limit.child(), AggregateExec.class); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + var exchange = as(agg.child(), ExchangeExec.class); + agg = as(exchange.child(), AggregateExec.class); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + var exec = agg.child() instanceof FieldExtractExec ? agg : as(agg.child(), UnaryExec.class); + assertChildIsExtractedAsDocValues(exec, withDocValues, GEO_SHAPE); + } + } + + /** + * Before local optimizations: + * + * LimitExec[1000[INTEGER]] + * \_AggregateExec[[],[SPATIALEXTENT(city_boundary{f}#13,true[BOOLEAN]) AS extent, SPATIALCENTROID(city_location{f}#12,true[BOOLEA + * N]) AS centroid],...] + * \_ExchangeExec[[..]] + * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[...]] + * \_EsRelation[airports_city_boundaries][abbrev{f}#8, airport{f}#9, city{f}#11, city_boundar..] + * + * After local optimizations: + * + * LimitExec[1000[INTEGER]] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, + * maxPosX{r}#55, maxY{r}#56, minY{r}#57],21] + * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],INITIAL,[ + * minNegX{r}#73, minPosX{r}#74, maxNegX{rb#75, maxPosX{r}#76, maxY{r}#77, minY{r}#78],21] + * \_FieldExtractExec[location{f}#48][location{f}#48] + * \_EsQueryExec[airports], indexMode[standard], query[{"exists":{"field":"location","boost":1.0}}][ + * _doc{f}#79], limit[], sort[] estimatedRowSize[25] + * + */ + public void testMixedSpatialBoundsAndPointsExtracted() { + var query = """ + FROM airports_city_boundaries \ + | STATS extent = ST_EXTENT_AGG(city_boundary), centroid = ST_CENTROID_AGG(city_location)"""; + var withDocValues = false; + var testData = airportsCityBoundaries; + var plan = physicalPlan(query, testData); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + // Before optimization the aggregation does not use doc-values + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + + var exchange = as(agg.child(), ExchangeExec.class); + var fragment = as(exchange.child(), FragmentExec.class); + var fAgg = as(fragment.fragment(), Aggregate.class); + as(fAgg.child(), EsRelation.class); + + // Now optimize the plan and assert the aggregation uses both doc-values and bounds extraction + var optimized = optimizedPlan(plan, testData.stats); + limit = as(optimized, LimitExec.class); + agg = as(limit.child(), AggregateExec.class); + // Above the exchange (in coordinator) the aggregation is not field-optimized. + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, withDocValues); + exchange = as(agg.child(), ExchangeExec.class); + agg = as(exchange.child(), AggregateExec.class); + // below the exchange (in data node) the aggregation is field optimized. + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + var fieldExtractExec = as(agg.child(), FieldExtractExec.class); + assertThat(fieldExtractExec.boundAttributes().stream().map(a -> a.sourceText()).toList(), equalTo(List.of("city_boundary"))); + assertThat(fieldExtractExec.docValuesAttributes().stream().map(a -> a.sourceText()).toList(), equalTo(List.of("city_location"))); + } + /** * This test does not have real index fields, and therefor asserts that doc-values field extraction does NOT occur. * Before local optimizations: @@ -6912,12 +7066,23 @@ private EsQueryExec assertChildIsGeoPointExtract(UnaryExec parent, boolean useDo } private EsQueryExec assertChildIsExtractedAsDocValues(UnaryExec parent, boolean useDocValues, DataType dataType) { + // TODO(gal) why is this OK To vacuously true? var extract = as(parent.child(), FieldExtractExec.class); + assertThat(extract.boundAttributes(), is(empty())); assertTrue( "Expect field attribute to be extracted as " + (useDocValues ? "doc-values" : "source"), extract.attributesToExtract() .stream() - .allMatch(attr -> extract.hasDocValuesAttribute(attr) == useDocValues && attr.dataType() == dataType) + .allMatch(attr -> extract.docValuesAttributes().contains(attr) == useDocValues && attr.dataType() == dataType) + ); + return source(extract.child()); + } + + private static EsQueryExec assertChildIsExtractedAsBounds(UnaryExec parent, DataType dataType) { + var extract = as(parent.child(), FieldExtractExec.class); + assertTrue( + "Expect field attribute to be extracted as bounds", + extract.attributesToExtract().stream().allMatch(attr -> extract.boundAttributes().contains(attr) && attr.dataType() == dataType) ); return source(extract.child()); } @@ -6978,13 +7143,14 @@ private static QueryBuilder findQueryBuilder(BoolQueryBuilder booleanQuery, Stri } private void assertFieldExtractionWithDocValues(FieldExtractExec extract, DataType dataType, String... fieldNames) { + var docValuesAttributes = extract.docValuesAttributes(); extract.attributesToExtract().forEach(attr -> { String name = attr.name(); if (asList(fieldNames).contains(name)) { - assertThat("Expected field '" + name + "' to use doc-values", extract.hasDocValuesAttribute(attr), equalTo(true)); + assertThat("Expected field '" + name + "' to use doc-values", docValuesAttributes.contains(attr), equalTo(true)); assertThat("Expected field '" + name + "' to have data type " + dataType, attr.dataType(), equalTo(dataType)); } else { - assertThat("Expected field '" + name + "' to NOT use doc-values", extract.hasDocValuesAttribute(attr), equalTo(false)); + assertThat("Expected field '" + name + "' to NOT use doc-values", docValuesAttributes.contains(attr), equalTo(false)); } }); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java index e91fc6e49312d..78512636b57e9 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java @@ -86,7 +86,10 @@ public PhysicalOperation fieldExtractPhysicalOperation(FieldExtractExec fieldExt for (Attribute attr : fieldExtractExec.attributesToExtract()) { layout.append(attr); op = op.with( - new TestFieldExtractOperatorFactory(attr, PlannerUtils.extractPreference(fieldExtractExec.hasDocValuesAttribute(attr))), + new TestFieldExtractOperatorFactory( + attr, + PlannerUtils.extractPreference(fieldExtractExec.docValuesAttributes().contains(attr)) + ), layout.build() ); } diff --git a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java index 23505eda493af..224abd2002455 100644 --- a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java +++ b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java @@ -208,7 +208,6 @@ public GeoShapeWithDocValuesFieldMapper build(MapperBuilderContext context) { } public static final class GeoShapeWithDocValuesFieldType extends AbstractShapeGeometryFieldType implements GeoShapeQueryable { - private final GeoFormatterFactory geoFormatterFactory; private final FieldValues scriptValues; @@ -298,6 +297,17 @@ public List parseStoredValues(List storedValues) { protected Function, List> getFormatter(String format) { return geoFormatterFactory.getFormatter(format, Function.identity()); } + + @Override + protected boolean isBoundsExtractionSupported() { + // Extracting bounds for geo shapes is not implemented yet. + return false; + } + + @Override + protected CoordinateEncoder coordinateEncoder() { + return CoordinateEncoder.GEO; + } } public static class TypeParser implements Mapper.TypeParser { diff --git a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/ShapeFieldMapper.java b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/ShapeFieldMapper.java index e5d5354327f5a..2d586ac8eb86a 100644 --- a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/ShapeFieldMapper.java +++ b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/ShapeFieldMapper.java @@ -184,6 +184,16 @@ public String typeName() { protected Function, List> getFormatter(String format) { return GeometryFormatterFactory.getFormatter(format, Function.identity()); } + + @Override + protected boolean isBoundsExtractionSupported() { + return true; + } + + @Override + protected CoordinateEncoder coordinateEncoder() { + return CoordinateEncoder.CARTESIAN; + } } private final Builder builder; From b6ba18afd64c811a4294f037b8b338a69af64fd0 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Tue, 17 Dec 2024 14:26:53 +0200 Subject: [PATCH 08/18] Update docs/changelog/118802.yaml --- docs/changelog/118802.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/118802.yaml diff --git a/docs/changelog/118802.yaml b/docs/changelog/118802.yaml new file mode 100644 index 0000000000000..f0309e5af9548 --- /dev/null +++ b/docs/changelog/118802.yaml @@ -0,0 +1,5 @@ +pr: 118802 +summary: ST_EXTENT_AGG binary extent optimization +area: "ES|QL, Geo" +type: enhancement +issues: [] From 3f0b58fbc0167c7f1234782c01881ed984b72679 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Tue, 17 Dec 2024 14:28:59 +0200 Subject: [PATCH 09/18] Manually modify changelog --- docs/changelog/118802.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog/118802.yaml b/docs/changelog/118802.yaml index f0309e5af9548..52e1d0ba2a09a 100644 --- a/docs/changelog/118802.yaml +++ b/docs/changelog/118802.yaml @@ -1,5 +1,5 @@ pr: 118802 summary: ST_EXTENT_AGG binary extent optimization -area: "ES|QL, Geo" +area: "ES|QL" type: enhancement issues: [] From 47966cfdd2db04f47a0fea02641bd8b746ba97e6 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Tue, 17 Dec 2024 14:32:17 +0200 Subject: [PATCH 10/18] Fix TODO --- .../index/mapper/AbstractShapeGeometryFieldMapperTests.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java index 8ca412405a062..9a4cbcebd9d27 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java @@ -36,7 +36,6 @@ import java.util.stream.IntStream; public class AbstractShapeGeometryFieldMapperTests extends ESTestCase { - // TODO handle geo as well, this is actually bugged, since extracting the result ignores minneg etc. public void testCartesianBoundsBlockLoader() throws IOException { testBoundsBlockLoaderAux( CoordinateEncoder.CARTESIAN, @@ -46,7 +45,7 @@ public void testCartesianBoundsBlockLoader() throws IOException { ); } - // TODO when we turn this optimization on for geo, handle this as well. + // TODO when we turn this optimization on for geo, this test should pass. public void ignoreTestGeoBoundsBlockLoader() throws IOException { testBoundsBlockLoaderAux( CoordinateEncoder.GEO, From 4104176c82ae0b067b6481ca1b5913f945662142 Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Wed, 18 Dec 2024 17:47:04 +0100 Subject: [PATCH 11/18] Better changelog message --- docs/changelog/118802.yaml | 2 +- .../legacygeo/mapper/LegacyGeoShapeFieldMapper.java | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/changelog/118802.yaml b/docs/changelog/118802.yaml index 52e1d0ba2a09a..600c4b6a1e203 100644 --- a/docs/changelog/118802.yaml +++ b/docs/changelog/118802.yaml @@ -1,5 +1,5 @@ pr: 118802 -summary: ST_EXTENT_AGG binary extent optimization +summary: ST_EXTENT_AGG optimize envelope extraction from doc-values for cartesian_shape area: "ES|QL" type: enhancement issues: [] diff --git a/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java b/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java index 506918b12fe96..b0634f0f1332f 100644 --- a/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java +++ b/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java @@ -402,6 +402,7 @@ public void parse( } public static final class GeoShapeFieldType extends AbstractShapeGeometryFieldType> implements GeoShapeQueryable { + private String tree = Defaults.TREE; private SpatialStrategy strategy = Defaults.STRATEGY; private boolean pointsOnly = Defaults.POINTS_ONLY; From 05116c0bf0bcdff9a3b1f245f54de0c5499cf55e Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Wed, 18 Dec 2024 23:04:36 +0100 Subject: [PATCH 12/18] Reverted remove line --- .../spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java index 224abd2002455..67d25556a2aa7 100644 --- a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java +++ b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java @@ -208,6 +208,7 @@ public GeoShapeWithDocValuesFieldMapper build(MapperBuilderContext context) { } public static final class GeoShapeWithDocValuesFieldType extends AbstractShapeGeometryFieldType implements GeoShapeQueryable { + private final GeoFormatterFactory geoFormatterFactory; private final FieldValues scriptValues; From 72e3e346eab5867a563089aeb4ab8ea13ffaaa99 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Thu, 19 Dec 2024 19:48:35 +0200 Subject: [PATCH 13/18] Review fixes --- ...AbstractShapeGeometryFieldMapperTests.java | 9 +- .../xpack/esql/CsvTestsDataLoader.java | 3 + .../xpack/esql/EsqlTestUtils.java | 2 +- ...cartesian_multipolygons_no_doc_values.json | 15 + .../src/main/resources/spatial.csv-spec | 9 + .../optimizer/LocalPhysicalPlanOptimizer.java | 4 +- .../local/SpatialDocValuesExtraction.java | 4 + ...java => SpatialShapeBoundsExtraction.java} | 18 +- .../esql/plan/physical/FieldExtractExec.java | 34 +- .../planner/EsPhysicalOperationProviders.java | 3 +- .../optimizer/PhysicalPlanOptimizerTests.java | 352 ++++++++++-------- 11 files changed, 272 insertions(+), 181 deletions(-) create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-cartesian_multipolygons_no_doc_values.json rename x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/{SpatialShapeBoundExtraction.java => SpatialShapeBoundsExtraction.java} (83%) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java index ea75ff9df94a6..bd58f4d443d34 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java @@ -61,7 +61,7 @@ private void testBoundsBlockLoaderAux( Function indexerFactory, Function> visitor ) throws IOException { - var geometries = IntStream.range(0, 20).mapToObj(i -> ShapeTestUtils.randomGeometryWithoutCircle(0, false)).toList(); + var geometries = IntStream.range(0, 20).mapToObj(i -> generator.get()).toList(); var loader = new AbstractShapeGeometryFieldMapper.AbstractShapeGeometryFieldType.BoundsBlockLoader("field", encoder); try (Directory directory = newDirectory()) { try (var iw = new RandomIndexWriter(random(), directory)) { @@ -79,9 +79,12 @@ private void testBoundsBlockLoaderAux( TestBlock block = (TestBlock) loader.reader(ctx).read(TestBlock.factory(ctx.reader().numDocs()), TestBlock.docs(indices)); for (int i = 0; i < indices.length; i++) { var idx = indices[i]; - Rectangle r = visitor.apply(geometries.get(idx)).get(); + var geometry = geometries.get(idx); + var geoString = geometry.toString(); + var geometryString = geoString.length() > 200 ? geoString.substring(0, 200) + "..." : geoString; + Rectangle r = visitor.apply(geometry).get(); assertThat( - Strings.format("geometries[%d] ('%s') wasn't extracted correctly", idx, geometries.get(idx)), + Strings.format("geometries[%d] ('%s') wasn't extracted correctly", idx, geometryString), (BytesRef) block.get(i), WellKnownBinaryBytesRefMatcher.encodes(RectangleMatcher.closeToFloat(r, 1e-3, encoder)) ); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 8e81d14b4dfd7..2273b8a0c1f1c 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -99,6 +99,8 @@ public class CsvTestsDataLoader { private static final TestsDataset COUNTRIES_BBOX_WEB = new TestsDataset("countries_bbox_web"); private static final TestsDataset AIRPORT_CITY_BOUNDARIES = new TestsDataset("airport_city_boundaries"); private static final TestsDataset CARTESIAN_MULTIPOLYGONS = new TestsDataset("cartesian_multipolygons"); + private static final TestsDataset CARTESIAN_MULTIPOLYGONS_NO_DOC_VALUES = new TestsDataset("cartesian_multipolygons_no_doc_values") + .withData("cartesian_multipolygons.csv"); private static final TestsDataset MULTIVALUE_GEOMETRIES = new TestsDataset("multivalue_geometries"); private static final TestsDataset MULTIVALUE_POINTS = new TestsDataset("multivalue_points"); private static final TestsDataset DISTANCES = new TestsDataset("distances"); @@ -142,6 +144,7 @@ public class CsvTestsDataLoader { Map.entry(COUNTRIES_BBOX_WEB.indexName, COUNTRIES_BBOX_WEB), Map.entry(AIRPORT_CITY_BOUNDARIES.indexName, AIRPORT_CITY_BOUNDARIES), Map.entry(CARTESIAN_MULTIPOLYGONS.indexName, CARTESIAN_MULTIPOLYGONS), + Map.entry(CARTESIAN_MULTIPOLYGONS_NO_DOC_VALUES.indexName, CARTESIAN_MULTIPOLYGONS_NO_DOC_VALUES), Map.entry(MULTIVALUE_GEOMETRIES.indexName, MULTIVALUE_GEOMETRIES), Map.entry(MULTIVALUE_POINTS.indexName, MULTIVALUE_POINTS), Map.entry(DATE_NANOS.indexName, DATE_NANOS), diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java index 32988ce5b3f00..66fd7d3ee5eb5 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java @@ -393,7 +393,7 @@ public static LogicalPlan localSource(BlockFactory blockFactory, List } public static T as(Object node, Class type) { - Assert.assertThat("Unexpected type: " + node.getClass(), node, instanceOf(type)); + Assert.assertThat(node, instanceOf(type)); return type.cast(node); } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-cartesian_multipolygons_no_doc_values.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-cartesian_multipolygons_no_doc_values.json new file mode 100644 index 0000000000000..fb271c4975462 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-cartesian_multipolygons_no_doc_values.json @@ -0,0 +1,15 @@ +{ + "properties": { + "id": { + "type": "long" + }, + "name": { + "type": "keyword" + }, + "shape": { + "type": "shape", + "index": true, + "doc_values": false + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec index 8694c973448e9..a7aae79ee496d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec @@ -1797,6 +1797,15 @@ extent:cartesian_shape BBOX (0.0, 3.0, 3.0, 0.0) ; +stExtentCartesianShapesNoDocValues +required_capability: st_extent_agg +FROM cartesian_multipolygons_no_doc_values | STATS extent = ST_EXTENT_AGG(shape) +; + +extent:cartesian_shape +BBOX (0.0, 3.0, 3.0, 0.0) +; + ############################################### # Tests for ST_INTERSECTS on CARTESIAN_POINT type diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java index eb148952e0a26..a865f784137ad 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java @@ -17,7 +17,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.PushTopNToSource; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.ReplaceSourceAttributes; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.SpatialDocValuesExtraction; -import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.SpatialShapeBoundExtraction; +import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.SpatialShapeBoundsExtraction; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.esql.rule.ParameterizedRuleExecutor; import org.elasticsearch.xpack.esql.rule.Rule; @@ -79,7 +79,7 @@ protected List> rules(boolean optimizeForEsSource) { Limiter.ONCE, new InsertFieldExtraction(), new SpatialDocValuesExtraction(), - new SpatialShapeBoundExtraction() + new SpatialShapeBoundsExtraction() ); return asList(pushdown, fieldExtraction); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialDocValuesExtraction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialDocValuesExtraction.java index 0f1c32e94f867..e2a72cd20ffc3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialDocValuesExtraction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialDocValuesExtraction.java @@ -12,6 +12,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.expression.function.aggregate.SpatialAggregateFunction; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.BinarySpatialFunction; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.SpatialRelatesFunction; @@ -166,6 +167,9 @@ private boolean allowedForDocValues( if (stats.hasDocValues(fieldAttribute.fieldName()) == false) { return false; } + if (fieldAttribute.dataType() == DataType.GEO_SHAPE || fieldAttribute.dataType() == DataType.CARTESIAN_SHAPE) { + return false; + } var candidateDocValuesAttributes = new HashSet<>(foundAttributes); candidateDocValuesAttributes.add(fieldAttribute); var spatialRelatesAttributes = new HashSet(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundExtraction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundsExtraction.java similarity index 83% rename from x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundExtraction.java rename to x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundsExtraction.java index 6949602aed0a4..ec79014827a99 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundExtraction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundsExtraction.java @@ -41,11 +41,12 @@ *
    *
  • The spatial data is of type GEO_SHAPE or CARTESIAN_SHAPE.
  • *
  • The spatial data is consumed directly by an ST_EXTENT_AGG.
  • - *
  • The spatial is not consumed by any other operation. While is this is stricter than necessary, - * it is a good enough approximation for now.
  • + *
  • The spatial data is not consumed by any other operation. While is this is stricter than necessary, + * it is a good enough approximation for now. For example, an aggregation like {@code count} shouldn't stop this optimization, + * not a check like {@code isNotNull}.
  • *
*/ -public class SpatialShapeBoundExtraction extends ParameterizedOptimizerRule { +public class SpatialShapeBoundsExtraction extends ParameterizedOptimizerRule { @Override protected PhysicalPlan rule(AggregateExec aggregate, LocalPhysicalOptimizerContext ctx) { var foundAttributes = new HashSet(); @@ -55,7 +56,7 @@ protected PhysicalPlan rule(AggregateExec aggregate, LocalPhysicalOptimizerConte case AggregateExec agg -> { List aggregateFunctions = agg.aggregates() .stream() - .flatMap(e -> SpatialShapeBoundExtraction.extractAggregateFunction(e).stream()) + .flatMap(e -> SpatialShapeBoundsExtraction.extractAggregateFunction(e).stream()) .toList(); List spatialExtents = aggregateFunctions.stream() .filter(SpatialExtent.class::isInstance) @@ -74,14 +75,19 @@ protected PhysicalPlan rule(AggregateExec aggregate, LocalPhysicalOptimizerConte .map(SpatialExtent::field) .filter(FieldAttribute.class::isInstance) .map(FieldAttribute.class::cast) - .filter(f -> isShape(f.field().getDataType()) && fieldsAppearingInNonSpatialExtents.contains(f.field()) == false) + .filter( + f -> isShape(f.field().getDataType()) + && fieldsAppearingInNonSpatialExtents.contains(f.field()) == false + && ctx.searchStats().hasDocValues(f.fieldName()) + ) .forEach(foundAttributes::add); } case EvalExec evalExec -> foundAttributes.removeAll(evalExec.references()); case FilterExec filterExec -> foundAttributes.removeAll(filterExec.condition().references()); case FieldExtractExec fieldExtractExec -> { foundAttributes.retainAll(fieldExtractExec.attributesToExtract()); - return fieldExtractExec.withBoundAttributes(foundAttributes); + return fieldExtractExec.withBoundsAttributes(foundAttributes) + .withDocValuesAttributes(fieldExtractExec.docValuesAttributes()); } default -> { // Do nothing } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java index 0fddfb652afff..1f0990a924ed4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java @@ -12,6 +12,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.core.Nullable; +import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeSet; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; @@ -52,7 +53,7 @@ public class FieldExtractExec extends UnaryExec implements EstimatesRowSize { * This is never serialized between nodes and only used locally. *

*/ - private final Set boundAttributes; + private final Set boundsAttributes; private List lazyOutput; @@ -65,13 +66,13 @@ private FieldExtractExec( PhysicalPlan child, List attributesToExtract, Set docValuesAttributes, - Set boundAttributes + Set boundsAttributes ) { super(source, child); this.attributesToExtract = attributesToExtract; this.sourceAttribute = extractSourceAttributesFrom(child); this.docValuesAttributes = docValuesAttributes; - this.boundAttributes = boundAttributes; + this.boundsAttributes = boundsAttributes; } private FieldExtractExec(StreamInput in) throws IOException { @@ -117,15 +118,15 @@ protected NodeInfo info() { @Override public UnaryExec replaceChild(PhysicalPlan newChild) { - return new FieldExtractExec(source(), newChild, attributesToExtract, docValuesAttributes, boundAttributes); + return new FieldExtractExec(source(), newChild, attributesToExtract, docValuesAttributes, boundsAttributes); } public FieldExtractExec withDocValuesAttributes(Set docValuesAttributes) { - return new FieldExtractExec(source(), child(), attributesToExtract, docValuesAttributes, boundAttributes); + return new FieldExtractExec(source(), child(), attributesToExtract, docValuesAttributes, boundsAttributes); } - public FieldExtractExec withBoundAttributes(Set boundAttributes) { - return new FieldExtractExec(source(), child(), attributesToExtract, docValuesAttributes, boundAttributes); + public FieldExtractExec withBoundsAttributes(Set boundsAttributes) { + return new FieldExtractExec(source(), child(), attributesToExtract, docValuesAttributes, boundsAttributes); } public List attributesToExtract() { @@ -140,8 +141,8 @@ public Set docValuesAttributes() { return docValuesAttributes; } - public Set boundAttributes() { - return boundAttributes; + public Set boundsAttributes() { + return boundsAttributes; } @Override @@ -164,7 +165,7 @@ public PhysicalPlan estimateRowSize(State state) { @Override public int hashCode() { - return Objects.hash(attributesToExtract, docValuesAttributes, boundAttributes, child()); + return Objects.hash(attributesToExtract, docValuesAttributes, boundsAttributes, child()); } @Override @@ -180,7 +181,7 @@ public boolean equals(Object obj) { FieldExtractExec other = (FieldExtractExec) obj; return Objects.equals(attributesToExtract, other.attributesToExtract) && Objects.equals(docValuesAttributes, other.docValuesAttributes) - && Objects.equals(boundAttributes, other.boundAttributes) + && Objects.equals(boundsAttributes, other.boundsAttributes) && Objects.equals(child(), other.child()); } @@ -190,8 +191,17 @@ public String nodeString() { "%s<%s,%s>", nodeName() + NodeUtils.limitedToString(attributesToExtract), docValuesAttributes, - boundAttributes + boundsAttributes ); } + public MappedFieldType.FieldExtractPreference fieldExtractPreference(Attribute attr) { + if (boundsAttributes.contains(attr)) { + return MappedFieldType.FieldExtractPreference.EXTRACT_SPATIAL_BOUNDS; + } + if (docValuesAttributes.contains(attr)) { + return MappedFieldType.FieldExtractPreference.DOC_VALUES; + } + return MappedFieldType.FieldExtractPreference.NONE; + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index 17468f7afec1b..6d9cf38d34517 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -114,12 +114,11 @@ public final PhysicalOperation fieldExtractPhysicalOperation(FieldExtractExec fi .toList(); List fields = new ArrayList<>(); int docChannel = source.layout.get(sourceAttr.id()).channel(); - var docValuesAttrs = fieldExtractExec.docValuesAttributes(); for (Attribute attr : fieldExtractExec.attributesToExtract()) { layout.append(attr); var unionTypes = findUnionTypes(attr); DataType dataType = attr.dataType(); - MappedFieldType.FieldExtractPreference fieldExtractPreference = PlannerUtils.extractPreference(docValuesAttrs.contains(attr)); + MappedFieldType.FieldExtractPreference fieldExtractPreference = fieldExtractExec.fieldExtractPreference(attr); ElementType elementType = PlannerUtils.toElementType(dataType, fieldExtractPreference); // Do not use the field attribute name, this can deviate from the field name for union types. String fieldName = attr instanceof FieldAttribute fa ? fa.fieldName() : attr.name(); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java index ee0f1761b0fcd..b69f977439470 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java @@ -163,6 +163,7 @@ import static org.elasticsearch.xpack.esql.core.expression.Expressions.names; import static org.elasticsearch.xpack.esql.core.expression.function.scalar.FunctionTestUtils.l; import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT; +import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_SHAPE; import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_SHAPE; import static org.elasticsearch.xpack.esql.parser.ExpressionBuilder.MAX_EXPRESSION_DEPTH; @@ -206,6 +207,8 @@ public class PhysicalPlanOptimizerTests extends ESTestCase { private TestDataSource airportsNotIndexed; // Test when spatial field has doc values but is not indexed private TestDataSource airportsNotIndexedNorDocValues; // Test when spatial field is neither indexed nor has doc-values private TestDataSource airportsWeb; // Cartesian point field tests + private TestDataSource cartesianMultipolygons; // cartesian_shape field tests + private TestDataSource cartesianMultipolygonsNoDocValues; // cartesian_shape field tests but has no doc values private TestDataSource countriesBbox; // geo_shape field tests private TestDataSource countriesBboxWeb; // cartesian_shape field tests @@ -257,8 +260,7 @@ public void init() { "airports_city_boundaries", "mapping-airport_city_boundaries.json", functionRegistry, - enrichResolution, - new TestConfigurableSearchStats().exclude(Config.DOC_VALUES, "city_boundary") + enrichResolution ); this.airportsNoDocValues = makeTestDataSource( "airports-no-doc-values", @@ -282,6 +284,19 @@ public void init() { new TestConfigurableSearchStats().exclude(Config.INDEXED, "location").exclude(Config.DOC_VALUES, "location") ); this.airportsWeb = makeTestDataSource("airports_web", "mapping-airports_web.json", functionRegistry, enrichResolution); + this.cartesianMultipolygons = makeTestDataSource( + "cartesian_multipolygons", + "mapping-cartesian_multipolygons.json", + functionRegistry, + enrichResolution + ); + this.cartesianMultipolygonsNoDocValues = makeTestDataSource( + "cartesian_multipolygons_no_doc_values", + "mapping-cartesian_multipolygons_no_doc_values.json", + functionRegistry, + enrichResolution, + new TestConfigurableSearchStats().exclude(Config.DOC_VALUES, "shape") + ); this.countriesBbox = makeTestDataSource("countriesBbox", "mapping-countries_bbox.json", functionRegistry, enrichResolution); this.countriesBboxWeb = makeTestDataSource( "countriesBboxWeb", @@ -2825,12 +2840,13 @@ public void testSpatialTypesAndStatsCentroidUseDocValues() { "from airports | eval location = to_geopoint(location) | stats centroid = st_centroid_agg(location)" }) { for (boolean withDocValues : new boolean[] { false, true }) { var testData = withDocValues ? airports : airportsNoDocValues; + var fieldExtractPreference = withDocValues ? FieldExtractPreference.DOC_VALUES : FieldExtractPreference.NONE; var plan = physicalPlan(query, testData); var limit = as(plan, LimitExec.class); var agg = as(limit.child(), AggregateExec.class); // Before optimization the aggregation does not use doc-values - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -2842,12 +2858,12 @@ public void testSpatialTypesAndStatsCentroidUseDocValues() { limit = as(optimized, LimitExec.class); agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); // below the exchange (in data node) the aggregation is using doc-values - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, withDocValues); - assertChildIsGeoPointExtract(withDocValues ? agg : as(agg.child(), FilterExec.class), withDocValues); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, fieldExtractPreference); + assertChildIsGeoPointExtract(withDocValues ? agg : as(agg.child(), FilterExec.class), fieldExtractPreference); } } } @@ -2886,13 +2902,14 @@ public void testSpatialTypesAndStatsExtentUseDocValues() { "from airports | stats extent = st_extent_agg(to_geopoint(location))", "from airports | eval location = to_geopoint(location) | stats extent = st_extent_agg(location)" }) { for (boolean withDocValues : new boolean[] { false, true }) { + var fieldExtractPreference = withDocValues ? FieldExtractPreference.DOC_VALUES : FieldExtractPreference.NONE; var testData = withDocValues ? airports : airportsNoDocValues; var plan = physicalPlan(query, testData); var limit = as(plan, LimitExec.class); var agg = as(limit.child(), AggregateExec.class); // Before optimization the aggregation does not use doc-values - assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, false); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -2904,12 +2921,12 @@ public void testSpatialTypesAndStatsExtentUseDocValues() { limit = as(optimized, LimitExec.class); agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values - assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, false); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); // below the exchange (in data node) the aggregation is using doc-values - assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, withDocValues); - assertChildIsGeoPointExtract(withDocValues ? agg : as(agg.child(), FilterExec.class), withDocValues); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, fieldExtractPreference); + assertChildIsGeoPointExtract(withDocValues ? agg : as(agg.child(), FilterExec.class), fieldExtractPreference); } } } @@ -2946,13 +2963,14 @@ public void testSpatialTypesAndStatsExtentAndCentroidUseDocValues() { "from airports | stats extent = st_extent_agg(location), centroid = st_centroid_agg(location)", "from airports | stats extent = st_extent_agg(location), centroid = st_centroid_agg(city_location)", }) { for (boolean withDocValues : new boolean[] { false, true }) { + var fieldExtractPreference = withDocValues ? FieldExtractPreference.DOC_VALUES : FieldExtractPreference.NONE; var testData = withDocValues ? airports : airportsNoDocValues; var plan = physicalPlan(query, testData); var limit = as(plan, LimitExec.class); var agg = as(limit.child(), AggregateExec.class); // Before optimization the aggregation does not use doc-values - assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, false); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -2964,12 +2982,12 @@ public void testSpatialTypesAndStatsExtentAndCentroidUseDocValues() { limit = as(optimized, LimitExec.class); agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values - assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, false); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); // below the exchange (in data node) the aggregation is using doc-values - assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, withDocValues); - assertChildIsGeoPointExtract(withDocValues ? agg : as(agg.child(), FilterExec.class), withDocValues); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, fieldExtractPreference); + assertChildIsGeoPointExtract(withDocValues ? agg : as(agg.child(), FilterExec.class), fieldExtractPreference); } } } @@ -2978,58 +2996,56 @@ public void testSpatialTypesAndStatsExtentAndCentroidUseDocValues() { * Before local optimizations: * * LimitExec[1000[INTEGER]] - * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, - * maxPosX{r}#55, maxY{r}#56, minY{r}#57],null] - * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] - * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[ - * Aggregate[STANDARD,[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent]] - * \_EsRelation[airports][abbrev{f}#44, city{f}#50, city_location{f}#51, coun..]]] + * \_AggregateExec[[],[SPATIALEXTENT(city_boundary{f}#10,true[BOOLEAN]) AS extent],FINAL,[$$extent$minNegX{r}#11, $$extent$minPosX{r + * }#12, $$extent$maxNegX{r}#13, $$extent$maxPosX{r}#14, $$extent$maxY{r}#15, $$extent$minY{r}#16],null] + * \_ExchangeExec[[$$extent$minNegX{r}#11, $$extent$minPosX{r}#12, $$extent$maxNegX{r}#13, $$extent$maxPosX{r}#14, $$extent$maxY + * {r}#15, $$extent$minY{r}#16],true] + * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[<> + * Aggregate[STANDARD,[],[SPATIALEXTENT(city_boundary{f}#10,true[BOOLEAN]) AS extent]] + * \_EsRelation[airports_city_boundaries][abbrev{f}#5, airport{f}#6, city{f}#8, city_boundary..]<>]] * * After local optimizations: * * LimitExec[1000[INTEGER]] - * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, - * maxPosX{r}#55, maxY{r}#56, minY{r}#57],21] - * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] - * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],INITIAL,[ - * minNegX{r}#73, minPosX{r}#74, maxNegX{rb#75, maxPosX{r}#76, maxY{r}#77, minY{r}#78],21] - * \_FieldExtractExec[location{f}#48][location{f}#48] - * \_EsQueryExec[airports], indexMode[standard], query[{"exists":{"field":"location","boost":1.0}}][ - * _doc{f}#79], limit[], sort[] estimatedRowSize[25] + * \_AggregateExec[[],[SPATIALEXTENT(city_boundary{f}#10,true[BOOLEAN]) AS extent],FINAL,[$$extent$minNegX{r}#11, $$extent$minPosX{r + * }#12, $$extent$maxNegX{r}#13, $$extent$maxPosX{r}#14, $$extent$maxY{r}#15, $$extent$minY{r}#16],200] + * \_ExchangeExec[[$$extent$minNegX{r}#11, $$extent$minPosX{r}#12, $$extent$maxNegX{r}#13, $$extent$maxPosX{r}#14, $$extent$maxY + * {r}#15, $$extent$minY{r}#16],true] + * \_AggregateExec[[],[SPATIALEXTENT(city_boundary{f}#10,true[BOOLEAN]) AS extent],INITIAL,[$$extent$minNegX{r}#30, $$extent$minPosX + * {r}#31, $$extent$maxNegX{r}#32, $$extent$maxPosX{r}#33, $$extent$maxY{r}#34, $$extent$minY{r}#35],200] + * \_FieldExtractExec[city_boundary{f}#10]<[],[city_boundary{f}#10]> + * \_EsQueryExec[airports_city_boundaries], indexMode[standard], query[{"exists":{"field":"city_boundary","boost":1.0}}][ + * _doc{f}#36], limit[], sort[] estimatedRowSize[204] * - * Note the FieldExtractExec has 'location' set for stats: FieldExtractExec[location{f}#9][location{f}#9] - *

- * Also note that the type converting function is removed when it does not actually convert the type, - * ensuring that ReferenceAttributes are not created for the same field, and the optimization can still work. */ public void testSpatialTypesAndStatsExtentOfShapesUsesBinaryExtraction() { - for (String query : new String[] { "from airports_city_boundaries | stats extent = st_extent_agg(city_boundary)", }) { - var withDocValues = false; - var testData = airportsCityBoundaries; - var plan = physicalPlan(query, testData); + var query = "FROM airports_city_boundaries | STATS extent = ST_EXTENT_AGG(city_boundary)"; + var testData = airportsCityBoundaries; + var plan = physicalPlan(query, testData); - var limit = as(plan, LimitExec.class); - var agg = as(limit.child(), AggregateExec.class); - // Before optimization the aggregation does not use extent extraction - assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, false); + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + // Before optimization the aggregation does not use extent extraction + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, FieldExtractPreference.NONE); - var exchange = as(agg.child(), ExchangeExec.class); - var fragment = as(exchange.child(), FragmentExec.class); - var fAgg = as(fragment.fragment(), Aggregate.class); - as(fAgg.child(), EsRelation.class); + var exchange = as(agg.child(), ExchangeExec.class); + var fragment = as(exchange.child(), FragmentExec.class); + var fAgg = as(fragment.fragment(), Aggregate.class); + as(fAgg.child(), EsRelation.class); - // Now optimize the plan and assert the aggregation uses extent extraction - var optimized = optimizedPlan(plan, testData.stats); - limit = as(optimized, LimitExec.class); - agg = as(limit.child(), AggregateExec.class); - // Above the exchange (in coordinator) the aggregation is not using doc-values - assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); - exchange = as(agg.child(), ExchangeExec.class); - agg = as(exchange.child(), AggregateExec.class); - // below the exchange (in data node) the aggregation is using a specific - assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); - assertChildIsExtractedAsBounds(agg, GEO_SHAPE); - } + // Now optimize the plan and assert the aggregation uses extent extraction + System.out.println(plan); + var optimized = optimizedPlan(plan, testData.stats); + System.out.println(optimized); + limit = as(optimized, LimitExec.class); + agg = as(limit.child(), AggregateExec.class); + // Above the exchange (in coordinator) the aggregation is not using doc-values + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, FieldExtractPreference.NONE); + exchange = as(agg.child(), ExchangeExec.class); + agg = as(exchange.child(), AggregateExec.class); + // below the exchange (in data node) the aggregation is using a specific + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, FieldExtractPreference.NONE); + assertChildIsExtractedAs(agg, FieldExtractPreference.EXTRACT_SPATIAL_BOUNDS, GEO_SHAPE); } // This test verifies that the aggregation does not use spatial bounds extraction when the shape appears in an eval or filter. @@ -3040,27 +3056,49 @@ public void testSpatialTypesAndStatsExtentOfShapesNegativeCases() { STATS extent = ST_EXTENT_AGG(city_boundary) BY prefix""", """ FROM airports_city_boundaries \ | WHERE STARTS_WITH(TO_STRING(city_boundary), "MULTIPOLYGON") \ - | STATS extent = ST_EXTENT_AGG(city_boundary)""", }) { - var withDocValues = false; + | STATS extent = ST_EXTENT_AGG(city_boundary)""" }) { var testData = airportsCityBoundaries; var plan = physicalPlan(query, testData); var limit = as(plan, LimitExec.class); var agg = as(limit.child(), AggregateExec.class); - assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, false); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, FieldExtractPreference.NONE); var optimized = optimizedPlan(plan, testData.stats); limit = as(optimized, LimitExec.class); agg = as(limit.child(), AggregateExec.class); - assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); - assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, FieldExtractPreference.NONE); var exec = agg.child() instanceof FieldExtractExec ? agg : as(agg.child(), UnaryExec.class); - assertChildIsExtractedAsDocValues(exec, withDocValues, GEO_SHAPE); + assertChildIsExtractedAs(exec, FieldExtractPreference.NONE, GEO_SHAPE); } } + // This test verifies that the aggregation does not use spatial bounds extraction when the shape appears in an eval or filter. + public void testSpatialTypesAndStatsExtentOfShapesNegativeCaseNoDocValues() { + var query = """ + FROM cartesian_multipolygons_no_doc_values \ + | STATS extent = ST_EXTENT_AGG(shape)"""; + var testData = cartesianMultipolygonsNoDocValues; + var plan = physicalPlan(query, testData); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + assertAggregation(agg, "extent", SpatialExtent.class, CARTESIAN_SHAPE, FieldExtractPreference.NONE); + + var optimized = optimizedPlan(plan, testData.stats); + limit = as(optimized, LimitExec.class); + agg = as(limit.child(), AggregateExec.class); + assertAggregation(agg, "extent", SpatialExtent.class, CARTESIAN_SHAPE, FieldExtractPreference.NONE); + var exchange = as(agg.child(), ExchangeExec.class); + agg = as(exchange.child(), AggregateExec.class); + assertAggregation(agg, "extent", SpatialExtent.class, CARTESIAN_SHAPE, FieldExtractPreference.NONE); + var exec = agg.child() instanceof FieldExtractExec ? agg : as(agg.child(), UnaryExec.class); + assertChildIsExtractedAs(exec, FieldExtractPreference.NONE, CARTESIAN_SHAPE); + } + /** * Before local optimizations: * @@ -3088,15 +3126,14 @@ public void testMixedSpatialBoundsAndPointsExtracted() { var query = """ FROM airports_city_boundaries \ | STATS extent = ST_EXTENT_AGG(city_boundary), centroid = ST_CENTROID_AGG(city_location)"""; - var withDocValues = false; var testData = airportsCityBoundaries; var plan = physicalPlan(query, testData); var limit = as(plan, LimitExec.class); var agg = as(limit.child(), AggregateExec.class); // Before optimization the aggregation does not use doc-values - assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, false); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, FieldExtractPreference.NONE); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -3108,14 +3145,14 @@ public void testMixedSpatialBoundsAndPointsExtracted() { limit = as(optimized, LimitExec.class); agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not field-optimized. - assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, withDocValues); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, FieldExtractPreference.NONE); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); // below the exchange (in data node) the aggregation is field optimized. - assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, FieldExtractPreference.NONE); var fieldExtractExec = as(agg.child(), FieldExtractExec.class); - assertThat(fieldExtractExec.boundAttributes().stream().map(a -> a.sourceText()).toList(), equalTo(List.of("city_boundary"))); + assertThat(fieldExtractExec.boundsAttributes().stream().map(a -> a.sourceText()).toList(), equalTo(List.of("city_boundary"))); assertThat(fieldExtractExec.docValuesAttributes().stream().map(a -> a.sourceText()).toList(), equalTo(List.of("city_location"))); } @@ -3150,11 +3187,11 @@ public void testSpatialTypesAndStatsUseDocValuesNestedLiteral() { var agg = as(limit.child(), AggregateExec.class); assertThat("Aggregation is FINAL", agg.getMode(), equalTo(FINAL)); assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); agg = as(agg.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var eval = as(agg.child(), EvalExec.class); as(eval.child(), LocalSourceExec.class); @@ -3164,11 +3201,11 @@ public void testSpatialTypesAndStatsUseDocValuesNestedLiteral() { agg = as(limit.child(), AggregateExec.class); assertThat("Aggregation is FINAL", agg.getMode(), equalTo(FINAL)); assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); agg = as(agg.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); eval = as(agg.child(), EvalExec.class); as(eval.child(), LocalSourceExec.class); } @@ -3205,7 +3242,7 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregations() { assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); // Before optimization the aggregation does not use doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -3218,14 +3255,14 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregations() { agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); // below the exchange (in data node) the aggregation is using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, true); - assertChildIsGeoPointExtract(agg, true); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); + assertChildIsGeoPointExtract(agg, FieldExtractPreference.DOC_VALUES); } /** @@ -3266,8 +3303,8 @@ public void testSpatialTypesAndStatsUseDocValuesMultiSpatialAggregations() { assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); // Before optimization the aggregation does not use doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "airports", SpatialCentroid.class, GEO_POINT, false); - assertAggregation(agg, "cities", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "airports", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); + assertAggregation(agg, "cities", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -3280,16 +3317,16 @@ public void testSpatialTypesAndStatsUseDocValuesMultiSpatialAggregations() { agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "airports", SpatialCentroid.class, GEO_POINT, false); - assertAggregation(agg, "cities", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "airports", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); + assertAggregation(agg, "cities", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); // below the exchange (in data node) the aggregation is using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "airports", SpatialCentroid.class, GEO_POINT, true); - assertAggregation(agg, "cities", SpatialCentroid.class, GEO_POINT, true); - assertChildIsGeoPointExtract(agg, true); + assertAggregation(agg, "airports", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); + assertAggregation(agg, "cities", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); + assertChildIsGeoPointExtract(agg, FieldExtractPreference.DOC_VALUES); } /** @@ -3327,7 +3364,7 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsFiltered() { assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); // Before optimization the aggregation does not use doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -3342,14 +3379,14 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsFiltered() { agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); // below the exchange (in data node) the aggregation is using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, true); - var source = assertChildIsGeoPointExtract(agg, true); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); + var source = assertChildIsGeoPointExtract(agg, FieldExtractPreference.DOC_VALUES); var qb = as(source.query(), SingleValueQuery.Builder.class); assertThat("Expected predicate to be passed to Lucene query", qb.source().text(), equalTo("scalerank == 9")); } @@ -3381,6 +3418,7 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsFiltered() { public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsGrouped() { for (boolean useDocValues : new boolean[] { false }) { var testData = useDocValues ? airports : airportsNoDocValues; + var fieldExtractPreference = useDocValues ? FieldExtractPreference.DOC_VALUES : FieldExtractPreference.NONE; var plan = this.physicalPlan(""" FROM airports | STATS centroid=ST_CENTROID_AGG(location), count=COUNT() BY scalerank @@ -3393,7 +3431,7 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsGrouped() { assertThat(att.name(), equalTo("scalerank")); // Before optimization the aggregation does not use doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -3408,7 +3446,7 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsGrouped() { assertThat(att.name(), equalTo("scalerank")); // Above the exchange (in coordinator) the aggregation is not using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); @@ -3416,8 +3454,8 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsGrouped() { assertThat(att.name(), equalTo("scalerank")); // below the exchange (in data node) the aggregation is using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, useDocValues); - assertChildIsGeoPointExtract(agg, useDocValues); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, fieldExtractPreference); + assertChildIsGeoPointExtract(agg, fieldExtractPreference); } } @@ -3458,19 +3496,19 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsGroupedAggregat assertThat("Aggregation is FINAL", agg.getMode(), equalTo(FINAL)); assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); assertAggregation(agg, "count", Sum.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); agg = as(agg.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); assertAggregation(agg, "count", Sum.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); agg = as(agg.child(), AggregateExec.class); assertThat("Aggregation is FINAL", agg.getMode(), equalTo(FINAL)); assertThat("One grouping in aggregation", agg.groupings().size(), equalTo(1)); var att = as(agg.groupings().get(0), Attribute.class); assertThat(att.name(), equalTo("scalerank")); assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -3484,19 +3522,19 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsGroupedAggregat assertThat("Aggregation is FINAL", agg.getMode(), equalTo(FINAL)); assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); assertAggregation(agg, "count", Sum.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); agg = as(agg.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); assertAggregation(agg, "count", Sum.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); agg = as(agg.child(), AggregateExec.class); assertThat("Aggregation is FINAL", agg.getMode(), equalTo(FINAL)); assertThat("One grouping in aggregation", agg.groupings().size(), equalTo(1)); att = as(agg.groupings().get(0), Attribute.class); assertThat(att.name(), equalTo("scalerank")); assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("One grouping in aggregation", agg.groupings().size(), equalTo(1)); @@ -3505,8 +3543,8 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsGroupedAggregat // below the exchange (in data node) the aggregation is using doc-values assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, true); - assertChildIsGeoPointExtract(agg, true); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); + assertChildIsGeoPointExtract(agg, FieldExtractPreference.DOC_VALUES); } /** @@ -3546,7 +3584,7 @@ public void testEnrichBeforeSpatialAggregationSupportsDocValues() { var limit = as(plan, LimitExec.class); var agg = as(limit.child(), AggregateExec.class); // Before optimization the aggregation does not use doc-values - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -3562,16 +3600,16 @@ public void testEnrichBeforeSpatialAggregationSupportsDocValues() { limit = as(optimized, LimitExec.class); agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); // below the exchange (in data node) the aggregation is using doc-values - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, true); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); var enrichExec = as(agg.child(), EnrichExec.class); assertThat(enrichExec.mode(), equalTo(Enrich.Mode.ANY)); assertThat(enrichExec.concreteIndices(), equalTo(Map.of("", "airport_city_boundaries"))); assertThat(enrichExec.enrichFields().size(), equalTo(3)); - assertChildIsGeoPointExtract(enrichExec, true); + assertChildIsGeoPointExtract(enrichExec, FieldExtractPreference.DOC_VALUES); } /** @@ -3871,7 +3909,7 @@ public void testPushDownSpatialRelatesStringToSourceAndUseDocValuesForCentroid() assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); // Before optimization the aggregation does not use doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, test.locationType(), false); + assertAggregation(agg, "centroid", SpatialCentroid.class, test.locationType(), FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); var fAgg = as(fragment.fragment(), Aggregate.class); @@ -3884,15 +3922,15 @@ public void testPushDownSpatialRelatesStringToSourceAndUseDocValuesForCentroid() agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, test.locationType(), false); + assertAggregation(agg, "centroid", SpatialCentroid.class, test.locationType(), FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); // below the exchange (in data node) the aggregation is using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, test.locationType(), true); + assertAggregation(agg, "centroid", SpatialCentroid.class, test.locationType(), FieldExtractPreference.DOC_VALUES); if (test.canPushToSource) { - var source = assertChildIsExtractedAsDocValues(agg, true, test.locationType()); + var source = assertChildIsExtractedAs(agg, FieldExtractPreference.DOC_VALUES, test.locationType()); var condition = as(source.query(), SpatialRelatesQuery.ShapeQueryBuilder.class); assertThat("Geometry field name: " + test.predicate(), condition.fieldName(), equalTo("location")); assertThat("Spatial relationship: " + test.predicate(), condition.relation(), equalTo(test.relationship())); @@ -3958,6 +3996,7 @@ public void testPushSpatialIntersectsStringToSourceAndUseDocValuesForCentroid() for (boolean isIndexed : new boolean[] { true, false }) { for (boolean useDocValues : new boolean[] { true, false }) { + var fieldExtractPreference = useDocValues ? FieldExtractPreference.DOC_VALUES : FieldExtractPreference.NONE; var testData = useDocValues ? (isIndexed ? airports : airportsNotIndexed) : (isIndexed ? airportsNoDocValues : airportsNotIndexedNorDocValues); @@ -3967,7 +4006,7 @@ public void testPushSpatialIntersectsStringToSourceAndUseDocValuesForCentroid() assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); // Before optimization the aggregation does not use doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -3981,15 +4020,15 @@ public void testPushSpatialIntersectsStringToSourceAndUseDocValuesForCentroid() agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); // below the exchange (in data node) the aggregation is using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, useDocValues); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, fieldExtractPreference); if (isIndexed) { - var source = assertChildIsGeoPointExtract(agg, useDocValues); + var source = assertChildIsGeoPointExtract(agg, fieldExtractPreference); // Query is pushed to lucene if field is indexed (and does not require doc-values or isAggregatable) var condition = as(source.query(), SpatialRelatesQuery.ShapeQueryBuilder.class); assertThat("Geometry field name", condition.fieldName(), equalTo("location")); @@ -4080,7 +4119,7 @@ AND ST_INTERSECTS(TO_GEOSHAPE("POLYGON((42 14, 43 14, 43 15, 42 15, 42 14))"), l assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); // Before optimization the aggregation does not use doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -4096,14 +4135,14 @@ AND ST_INTERSECTS(TO_GEOSHAPE("POLYGON((42 14, 43 14, 43 15, 42 15, 42 14))"), l agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); // below the exchange (in data node) the aggregation is using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, true); - var source = assertChildIsGeoPointExtract(agg, true); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); + var source = assertChildIsGeoPointExtract(agg, FieldExtractPreference.DOC_VALUES); var booleanQuery = as(source.query(), BoolQueryBuilder.class); assertThat("Expected boolean query of three predicates", booleanQuery.must().size(), equalTo(3)); var condition = as(booleanQuery.must().get(1), SpatialRelatesQuery.ShapeQueryBuilder.class); @@ -4154,8 +4193,8 @@ public void testIntersectsOnTwoPointFieldAndBothCentroidUsesDocValues() { assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); // Before optimization the aggregation does not use doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, false); - assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); + assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -4169,15 +4208,15 @@ public void testIntersectsOnTwoPointFieldAndBothCentroidUsesDocValues() { agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, false); - assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); + assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); // below the exchange (in data node) the aggregation is using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, true); - assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); + assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var filterExec = as(agg.child(), FilterExec.class); var extract = as(filterExec.child(), FieldExtractExec.class); assertFieldExtractionWithDocValues(extract, GEO_POINT, "location"); @@ -4202,7 +4241,7 @@ public void testIntersectsOnTwoPointFieldAndOneCentroidUsesDocValues() { // Before optimization the aggregation does not use doc-values assertAggregation(agg, "count", Count.class); var aggFieldName = findSingleAggregation(agg, "location", "city_location"); - assertAggregation(agg, aggFieldName, SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, aggFieldName, SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -4216,13 +4255,13 @@ public void testIntersectsOnTwoPointFieldAndOneCentroidUsesDocValues() { agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, aggFieldName, SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, aggFieldName, SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); // below the exchange (in data node) the aggregation is using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, aggFieldName, SpatialCentroid.class, GEO_POINT, true); + assertAggregation(agg, aggFieldName, SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); var filterExec = as(agg.child(), FilterExec.class); var extract = as(filterExec.child(), FieldExtractExec.class); assertFieldExtractionWithDocValues(extract, GEO_POINT, aggFieldName); @@ -4244,8 +4283,8 @@ AND ST_INTERSECTS(city_location, TO_GEOSHAPE("POLYGON((42 14, 43 14, 43 15, 42 1 assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); // Before optimization the aggregation does not use doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, false); - assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); + assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -4261,15 +4300,15 @@ AND ST_INTERSECTS(city_location, TO_GEOSHAPE("POLYGON((42 14, 43 14, 43 15, 42 1 agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, false); - assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); + assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); // below the exchange (in data node) the aggregation is using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, true); - assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, true); + assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); + assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); var extract = as(agg.child(), FieldExtractExec.class); assertFieldExtractionWithDocValues(extract, GEO_POINT, "location", "city_location"); var source = source(extract.child()); @@ -4833,7 +4872,7 @@ public void testPushSpatialDistanceEvalWithSimpleStatsToSource() { var evalExec = as(aggExec2.child(), EvalExec.class); var stDistance = as(evalExec.fields().get(0).child(), StDistance.class); assertThat("Expect distance function to expect doc-values", stDistance.leftDocValues(), is(false)); - var source = assertChildIsGeoPointExtract(evalExec, false); + var source = assertChildIsGeoPointExtract(evalExec, FieldExtractPreference.NONE); // No sort is pushed down assertThat(source.limit(), nullValue()); @@ -4948,7 +4987,7 @@ public void testPushSpatialDistanceEvalWithStatsToSource() { var evalExec = as(aggExec2.child(), EvalExec.class); var stDistance = as(evalExec.fields().get(0).child(), StDistance.class); assertThat("Expect distance function to expect doc-values", stDistance.leftDocValues(), is(true)); - var source = assertChildIsGeoPointExtract(evalExec, true); + var source = assertChildIsGeoPointExtract(evalExec, FieldExtractPreference.DOC_VALUES); // No sort is pushed down assertThat(source.limit(), nullValue()); @@ -7061,28 +7100,35 @@ private static void assertFilterCondition( assertThat("Expected filter value", value.value(), equalTo(expected)); } - private EsQueryExec assertChildIsGeoPointExtract(UnaryExec parent, boolean useDocValues) { - return assertChildIsExtractedAsDocValues(parent, useDocValues, GEO_POINT); + private EsQueryExec assertChildIsGeoPointExtract(UnaryExec parent, FieldExtractPreference fieldExtractPreference) { + return assertChildIsExtractedAs(parent, fieldExtractPreference, GEO_POINT); } - private EsQueryExec assertChildIsExtractedAsDocValues(UnaryExec parent, boolean useDocValues, DataType dataType) { - // TODO(gal) why is this OK To vacuously true? + private static EsQueryExec assertChildIsExtractedAs( + UnaryExec parent, + FieldExtractPreference fieldExtractPreference, + DataType dataType + ) { var extract = as(parent.child(), FieldExtractExec.class); - assertThat(extract.boundAttributes(), is(empty())); + switch (fieldExtractPreference) { + case NONE -> { + assertThat(extract.docValuesAttributes(), is(empty())); + assertThat(extract.boundsAttributes(), is(empty())); + } + case DOC_VALUES -> { + assertThat(extract.docValuesAttributes(), is(not(empty()))); + assertThat(extract.boundsAttributes(), is(empty())); + } + case EXTRACT_SPATIAL_BOUNDS -> { + assertThat(extract.docValuesAttributes(), is(empty())); + assertThat(extract.boundsAttributes(), is(not(empty()))); + } + } assertTrue( - "Expect field attribute to be extracted as " + (useDocValues ? "doc-values" : "source"), + "Expect field attribute to be extracted as " + fieldExtractPreference, extract.attributesToExtract() .stream() - .allMatch(attr -> extract.docValuesAttributes().contains(attr) == useDocValues && attr.dataType() == dataType) - ); - return source(extract.child()); - } - - private static EsQueryExec assertChildIsExtractedAsBounds(UnaryExec parent, DataType dataType) { - var extract = as(parent.child(), FieldExtractExec.class); - assertTrue( - "Expect field attribute to be extracted as bounds", - extract.attributesToExtract().stream().allMatch(attr -> extract.boundAttributes().contains(attr) && attr.dataType() == dataType) + .allMatch(attr -> extract.fieldExtractPreference(attr) == fieldExtractPreference && attr.dataType() == dataType) ); return source(extract.child()); } @@ -7092,16 +7138,12 @@ private static void assertAggregation( String aliasName, Class aggClass, DataType fieldType, - boolean useDocValues + FieldExtractPreference fieldExtractPreference ) { var aggFunc = assertAggregation(plan, aliasName, aggClass); var aggField = as(aggFunc.field(), Attribute.class); var spatialAgg = as(aggFunc, SpatialAggregateFunction.class); - assertThat( - "Expected spatial aggregation to use doc-values", - spatialAgg.fieldExtractPreference(), - equalTo(useDocValues ? FieldExtractPreference.DOC_VALUES : FieldExtractPreference.NONE) - ); + assertThat(spatialAgg.fieldExtractPreference(), equalTo(fieldExtractPreference)); assertThat("", aggField.dataType(), equalTo(fieldType)); } From 58a491a5c4ec2319c48618b192f0facecddf5812 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Thu, 19 Dec 2024 20:54:58 +0200 Subject: [PATCH 14/18] Fix test docs, revert MapperTestCase changes --- .../index/mapper/TextFieldMapperTests.java | 2 +- .../index/mapper/MapperTestCase.java | 10 +--- .../optimizer/PhysicalPlanOptimizerTests.java | 55 +++++++++---------- 3 files changed, 29 insertions(+), 38 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index 9675638b2b394..32cbcfc2441a1 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -1355,6 +1355,6 @@ private void testBlockLoaderFromParent(boolean columnReader, boolean syntheticSo MapperService mapper = syntheticSource ? createSytheticSourceMapperService(mapping) : createMapperService(mapping); BlockReaderSupport blockReaderSupport = getSupportedReaders(mapper, "field.sub"); var sourceLoader = mapper.mappingLookup().newSourceLoader(null, SourceFieldMetrics.NOOP); - testBlockLoader(MappedFieldType.FieldExtractPreference.forColumnReader(columnReader), example, blockReaderSupport, sourceLoader); + testBlockLoader(columnReader, example, blockReaderSupport, sourceLoader); } } diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java index ccbf3cbf8d9f3..de9c8c9a2ef76 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java @@ -1483,20 +1483,16 @@ private void testBlockLoader(boolean syntheticSource, boolean columnReader) thro ); } var sourceLoader = mapper.mappingLookup().newSourceLoader(null, SourceFieldMetrics.NOOP); - testBlockLoader(FieldExtractPreference.forColumnReader(columnReader), example, blockReaderSupport, sourceLoader); + testBlockLoader(columnReader, example, blockReaderSupport, sourceLoader); } protected final void testBlockLoader( - FieldExtractPreference fieldExtractPreference, + boolean columnReader, SyntheticSourceExample example, BlockReaderSupport blockReaderSupport, SourceLoader sourceLoader ) throws IOException { - var columnReader = switch (fieldExtractPreference) { - case DOC_VALUES, EXTRACT_SPATIAL_BOUNDS -> true; - case NONE -> false; - }; - BlockLoader loader = blockReaderSupport.getBlockLoader(fieldExtractPreference); + BlockLoader loader = blockReaderSupport.getBlockLoader(FieldExtractPreference.forColumnReader(columnReader)); Function valuesConvert = loadBlockExpected(blockReaderSupport, columnReader); if (valuesConvert == null) { assertNull(loader); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java index b69f977439470..533b5855d4f25 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java @@ -2935,23 +2935,25 @@ public void testSpatialTypesAndStatsExtentUseDocValues() { * Before local optimizations: * * LimitExec[1000[INTEGER]] - * \_AggregateExec[[],[SPATIALEXTENT(location{f}#12,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#12,true[BOOLEAN]) AS cen - * troid],FINAL,...] - * \_ExchangeExec[[...]] - * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[..]] - * \_EsRelation[airports-no-doc-values][abbrev{f}#8, city{f}#14, city_location{f}#15, count..]]] + * \_AggregateExec[[],[SPATIALEXTENT(location{f}#70,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#70,true[BOOLEAN]) AS cen + * troid],FINAL,[...]] + * \_ExchangeExec[[...]] + * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[ + * Aggregate[STANDARD,[],[SPATIALEXTENT(location{f}#70,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#70,true[BOOLEAN] + * ) AS centroid]] + * \_EsRelation[airports][abbrev{f}#66, city{f}#72, city_location{f}#73, coun..]]] * * After local optimizations: * * LimitExec[1000[INTEGER]] - * \_AggregateExec[[],[SPATIALEXTENT(location{f}#12,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#12,true[BOOLEAN]) AS cen - * troid],FINAL,[...]] - * \_ExchangeExec[[...]] - * \_AggregateExec[[],[SPATIALEXTENT(location{f}#12,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#12,true[BOOLEAN]) AS cen - * troid],INITIAL,...] - * \_FilterExec[ISNOTNULL(location{f}#12)] - * \_FieldExtractExec[location{f}#12] - * \_EsQueryExec[airports-no-doc-values], indexMode[standard], query[][_doc{f}#59], limit[], sort[] estimatedRowSize[25] + * \_AggregateExec[[],[SPATIALEXTENT(location{f}#70,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#70,true[BOOLEAN]) AS cen + * troid],FINAL,[...]] + * \_ExchangeExec[[...]] + * \_AggregateExec[[],[SPATIALEXTENT(location{f}#70,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#70,true[BOOLEAN]) AS cen + * troid],INITIAL,[...]] + * \_FieldExtractExec[location{f}#70][location{f}#70],[] + * \_EsQueryExec[airports], indexMode[standard], query[{"exists":{"field":"location","boost":1.0}}][ + * _doc{f}#117], limit[], sort[] estimatedRowSize[25] * * Note the FieldExtractExec has 'location' set for stats: FieldExtractExec[location{f}#9][location{f}#9] *

@@ -2996,26 +2998,21 @@ public void testSpatialTypesAndStatsExtentAndCentroidUseDocValues() { * Before local optimizations: * * LimitExec[1000[INTEGER]] - * \_AggregateExec[[],[SPATIALEXTENT(city_boundary{f}#10,true[BOOLEAN]) AS extent],FINAL,[$$extent$minNegX{r}#11, $$extent$minPosX{r - * }#12, $$extent$maxNegX{r}#13, $$extent$maxPosX{r}#14, $$extent$maxY{r}#15, $$extent$minY{r}#16],null] - * \_ExchangeExec[[$$extent$minNegX{r}#11, $$extent$minPosX{r}#12, $$extent$maxNegX{r}#13, $$extent$maxPosX{r}#14, $$extent$maxY - * {r}#15, $$extent$minY{r}#16],true] - * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[<> + * \_AggregateExec[[],[SPATIALEXTENT(city_boundary{f}#10,true[BOOLEAN]) AS extent],FINAL,[...]] + * \_ExchangeExec[[...]] + * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[ * Aggregate[STANDARD,[],[SPATIALEXTENT(city_boundary{f}#10,true[BOOLEAN]) AS extent]] - * \_EsRelation[airports_city_boundaries][abbrev{f}#5, airport{f}#6, city{f}#8, city_boundary..]<>]] + * \_EsRelation[airports_city_boundaries][abbrev{f}#5, airport{f}#6, city{f}#8, city_boundary..]]] * * After local optimizations: * * LimitExec[1000[INTEGER]] - * \_AggregateExec[[],[SPATIALEXTENT(city_boundary{f}#10,true[BOOLEAN]) AS extent],FINAL,[$$extent$minNegX{r}#11, $$extent$minPosX{r - * }#12, $$extent$maxNegX{r}#13, $$extent$maxPosX{r}#14, $$extent$maxY{r}#15, $$extent$minY{r}#16],200] - * \_ExchangeExec[[$$extent$minNegX{r}#11, $$extent$minPosX{r}#12, $$extent$maxNegX{r}#13, $$extent$maxPosX{r}#14, $$extent$maxY - * {r}#15, $$extent$minY{r}#16],true] - * \_AggregateExec[[],[SPATIALEXTENT(city_boundary{f}#10,true[BOOLEAN]) AS extent],INITIAL,[$$extent$minNegX{r}#30, $$extent$minPosX - * {r}#31, $$extent$maxNegX{r}#32, $$extent$maxPosX{r}#33, $$extent$maxY{r}#34, $$extent$minY{r}#35],200] - * \_FieldExtractExec[city_boundary{f}#10]<[],[city_boundary{f}#10]> - * \_EsQueryExec[airports_city_boundaries], indexMode[standard], query[{"exists":{"field":"city_boundary","boost":1.0}}][ - * _doc{f}#36], limit[], sort[] estimatedRowSize[204] + * \_AggregateExec[[],[SPATIALEXTENT(city_boundary{f}#10,true[BOOLEAN]) AS extent],FINAL,[...]] + * \_ExchangeExec[[...]] + * \_AggregateExec[[],[SPATIALEXTENT(city_boundary{f}#10,true[BOOLEAN]) AS extent],INITIAL,[...]] + * \_FieldExtractExec[city_boundary{f}#10][],[city_boundary{f}#10] + * \_EsQueryExec[airports_city_boundaries], indexMode[standard], + * query[{"exists":{"field":"city_boundary","boost":1.0}}][_doc{f}#36], limit[], sort[] estimatedRowSize[204] * */ public void testSpatialTypesAndStatsExtentOfShapesUsesBinaryExtraction() { @@ -3034,9 +3031,7 @@ public void testSpatialTypesAndStatsExtentOfShapesUsesBinaryExtraction() { as(fAgg.child(), EsRelation.class); // Now optimize the plan and assert the aggregation uses extent extraction - System.out.println(plan); var optimized = optimizedPlan(plan, testData.stats); - System.out.println(optimized); limit = as(optimized, LimitExec.class); agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values From c6875bc15ab11a0263f1e1ec84ab271d3a7eae9e Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Thu, 19 Dec 2024 20:58:12 +0200 Subject: [PATCH 15/18] Inline forColumnReader --- .../java/org/elasticsearch/index/mapper/MappedFieldType.java | 3 --- .../java/org/elasticsearch/index/mapper/MapperTestCase.java | 4 +++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 67e12cb9c6f84..a53ed94da0954 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -684,9 +684,6 @@ public enum FieldExtractPreference { */ NONE; - public static FieldExtractPreference forColumnReader(boolean columnReader) { - return columnReader ? DOC_VALUES : NONE; - } } /** diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java index de9c8c9a2ef76..bce88a2a8d2f7 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java @@ -1492,7 +1492,9 @@ protected final void testBlockLoader( BlockReaderSupport blockReaderSupport, SourceLoader sourceLoader ) throws IOException { - BlockLoader loader = blockReaderSupport.getBlockLoader(FieldExtractPreference.forColumnReader(columnReader)); + // EXTRACT_SPATIAL_BOUNDS is not currently supported in this test path. + var fieldExtractPreference = columnReader ? FieldExtractPreference.DOC_VALUES : FieldExtractPreference.NONE; + BlockLoader loader = blockReaderSupport.getBlockLoader(fieldExtractPreference); Function valuesConvert = loadBlockExpected(blockReaderSupport, columnReader); if (valuesConvert == null) { assertNull(loader); From 7aa67341ddf2906d173912f66584c82fbbb6ef60 Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Fri, 20 Dec 2024 00:18:41 +0100 Subject: [PATCH 16/18] Added missing test for bounds extraction from cartesian_shape doc-values And made a few updates to comments and javadocs --- .../mapper/LegacyGeoShapeFieldMapper.java | 1 + .../index/mapper/MappedFieldType.java | 4 +- .../local/SpatialDocValuesExtraction.java | 3 + .../local/SpatialShapeBoundsExtraction.java | 3 +- .../esql/plan/physical/FieldExtractExec.java | 6 +- .../optimizer/PhysicalPlanOptimizerTests.java | 125 ++++++++++-------- .../GeoShapeWithDocValuesFieldMapper.java | 1 + 7 files changed, 80 insertions(+), 63 deletions(-) diff --git a/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java b/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java index 506918b12fe96..b0634f0f1332f 100644 --- a/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java +++ b/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java @@ -402,6 +402,7 @@ public void parse( } public static final class GeoShapeFieldType extends AbstractShapeGeometryFieldType> implements GeoShapeQueryable { + private String tree = Defaults.TREE; private SpatialStrategy strategy = Defaults.STRATEGY; private boolean pointsOnly = Defaults.POINTS_ONLY; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 67e12cb9c6f84..3e17231ee717e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -677,7 +677,9 @@ public enum FieldExtractPreference { * Load the field from doc-values into a BlockLoader supporting doc-values. */ DOC_VALUES, - /** Loads the field by extracting the extent from the binary encoded representation */ + /** + * Loads the field by extracting the extent from the binary encoded representation + */ EXTRACT_SPATIAL_BOUNDS, /** * No preference. Leave the choice of where to load the field from up to the FieldType. diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialDocValuesExtraction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialDocValuesExtraction.java index e2a72cd20ffc3..f66ed5c8e4ec1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialDocValuesExtraction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialDocValuesExtraction.java @@ -157,6 +157,9 @@ private boolean foundField(Expression expression, Set foundAttri /** * This function disallows the use of more than one field for doc-values extraction in the same spatial relation function. * This is because comparing two doc-values fields is not supported in the current implementation. + * This also rejects fields that do not have doc-values in the field mapping, as well as rejecting geo_shape and cartesian_shape + * because we do not yet support full doc-values extraction for non-point geometries. We do have aggregations that support + * shapes, and to prevent them triggering this rule on non-point geometries we have to explicitly disallow them here. */ private boolean allowedForDocValues( FieldAttribute fieldAttribute, diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundsExtraction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundsExtraction.java index ec79014827a99..e4bf966acee63 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundsExtraction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundsExtraction.java @@ -86,8 +86,7 @@ protected PhysicalPlan rule(AggregateExec aggregate, LocalPhysicalOptimizerConte case FilterExec filterExec -> foundAttributes.removeAll(filterExec.condition().references()); case FieldExtractExec fieldExtractExec -> { foundAttributes.retainAll(fieldExtractExec.attributesToExtract()); - return fieldExtractExec.withBoundsAttributes(foundAttributes) - .withDocValuesAttributes(fieldExtractExec.docValuesAttributes()); + exec = fieldExtractExec.withBoundsAttributes(foundAttributes); } default -> { // Do nothing } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java index 1f0990a924ed4..e9783a241f0b9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java @@ -48,7 +48,7 @@ public class FieldExtractExec extends UnaryExec implements EstimatesRowSize { private final Set docValuesAttributes; /** - * Attributes of a shape whose extent can be extracted directly from the encoded geometry. + * Attributes of a shape whose extent can be extracted directly from the doc-values encoded geometry. *

* This is never serialized between nodes and only used locally. *

@@ -81,7 +81,7 @@ private FieldExtractExec(StreamInput in) throws IOException { in.readNamedWriteable(PhysicalPlan.class), in.readNamedWriteableCollectionAsList(Attribute.class) ); - // docValueAttributes are only used on the data node and never serialized. + // docValueAttributes and boundsAttributes are only used on the data node and never serialized. } @Override @@ -89,7 +89,7 @@ public void writeTo(StreamOutput out) throws IOException { Source.EMPTY.writeTo(out); out.writeNamedWriteable(child()); out.writeNamedWriteableCollection(attributesToExtract()); - // docValueAttributes are only used on the data node and never serialized. + // docValueAttributes and boundsAttributes are only used on the data node and never serialized. } @Override diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java index 4660a3add0337..a625ab6b2640a 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java @@ -210,11 +210,11 @@ public class PhysicalPlanOptimizerTests extends ESTestCase { private TestDataSource testData; private int allFieldRowSize; // TODO: Move this into testDataSource so tests that load other indexes can also assert on this private TestDataSource airports; - private TestDataSource airportsCityBoundaries; private TestDataSource airportsNoDocValues; // Test when spatial field is indexed but has no doc values private TestDataSource airportsNotIndexed; // Test when spatial field has doc values but is not indexed private TestDataSource airportsNotIndexedNorDocValues; // Test when spatial field is neither indexed nor has doc-values private TestDataSource airportsWeb; // Cartesian point field tests + private TestDataSource airportsCityBoundaries; private TestDataSource cartesianMultipolygons; // cartesian_shape field tests private TestDataSource cartesianMultipolygonsNoDocValues; // cartesian_shape field tests but has no doc values private TestDataSource countriesBbox; // geo_shape field tests @@ -264,12 +264,6 @@ public void init() { // Some tests use data from the airports and countries indexes, so we load that here, and use it in the plan(q, airports) function. this.airports = makeTestDataSource("airports", "mapping-airports.json", functionRegistry, enrichResolution); - this.airportsCityBoundaries = makeTestDataSource( - "airports_city_boundaries", - "mapping-airport_city_boundaries.json", - functionRegistry, - enrichResolution - ); this.airportsNoDocValues = makeTestDataSource( "airports-no-doc-values", "mapping-airports_no_doc_values.json", @@ -292,6 +286,12 @@ public void init() { new TestConfigurableSearchStats().exclude(Config.INDEXED, "location").exclude(Config.DOC_VALUES, "location") ); this.airportsWeb = makeTestDataSource("airports_web", "mapping-airports_web.json", functionRegistry, enrichResolution); + this.airportsCityBoundaries = makeTestDataSource( + "airports_city_boundaries", + "mapping-airport_city_boundaries.json", + functionRegistry, + enrichResolution + ); this.cartesianMultipolygons = makeTestDataSource( "cartesian_multipolygons", "mapping-cartesian_multipolygons.json", @@ -3001,32 +3001,25 @@ public void testSpatialTypesAndStatsExtentAndCentroidUseDocValues() { } /** - * Before local optimizations: * * LimitExec[1000[INTEGER]] - * \_AggregateExec[[],[SPATIALEXTENT(city_boundary{f}#10,true[BOOLEAN]) AS extent],FINAL,[$$extent$minNegX{r}#11, $$extent$minPosX{r - * }#12, $$extent$maxNegX{r}#13, $$extent$maxPosX{r}#14, $$extent$maxY{r}#15, $$extent$minY{r}#16],null] - * \_ExchangeExec[[$$extent$minNegX{r}#11, $$extent$minPosX{r}#12, $$extent$maxNegX{r}#13, $$extent$maxPosX{r}#14, $$extent$maxY - * {r}#15, $$extent$minY{r}#16],true] - * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[<> - * Aggregate[STANDARD,[],[SPATIALEXTENT(city_boundary{f}#10,true[BOOLEAN]) AS extent]] - * \_EsRelation[airports_city_boundaries][abbrev{f}#5, airport{f}#6, city{f}#8, city_boundary..]<>]] - * - * After local optimizations: - * - * LimitExec[1000[INTEGER]] - * \_AggregateExec[[],[SPATIALEXTENT(city_boundary{f}#10,true[BOOLEAN]) AS extent],FINAL,[$$extent$minNegX{r}#11, $$extent$minPosX{r - * }#12, $$extent$maxNegX{r}#13, $$extent$maxPosX{r}#14, $$extent$maxY{r}#15, $$extent$minY{r}#16],200] - * \_ExchangeExec[[$$extent$minNegX{r}#11, $$extent$minPosX{r}#12, $$extent$maxNegX{r}#13, $$extent$maxPosX{r}#14, $$extent$maxY - * {r}#15, $$extent$minY{r}#16],true] - * \_AggregateExec[[],[SPATIALEXTENT(city_boundary{f}#10,true[BOOLEAN]) AS extent],INITIAL,[$$extent$minNegX{r}#30, $$extent$minPosX - * {r}#31, $$extent$maxNegX{r}#32, $$extent$maxPosX{r}#33, $$extent$maxY{r}#34, $$extent$minY{r}#35],200] - * \_FieldExtractExec[city_boundary{f}#10]<[],[city_boundary{f}#10]> - * \_EsQueryExec[airports_city_boundaries], indexMode[standard], query[{"exists":{"field":"city_boundary","boost":1.0}}][ - * _doc{f}#36], limit[], sort[] estimatedRowSize[204] + * \_AggregateExec[[],[SPATIALEXTENT(city_boundary{f}#10,true[BOOLEAN]) AS extent],FINAL,[ + * $$extent$minNegX{r}#11, $$extent$minPosX{r}#12, $$extent$maxNegX{r}#13, + * $$extent$maxPosX{r}#14, $$extent$maxY{r}#15, $$extent$minY{r}#16],200] + * \_ExchangeExec[[ + * $$extent$minNegX{r}#11, $$extent$minPosX{r}#12, $$extent$maxNegX{r}#13, + * $$extent$maxPosX{r}#14, $$extent$maxY{r}#15, $$extent$minY{r}#16],true] + * \_AggregateExec[[],[SPATIALEXTENT(city_boundary{f}#10,true[BOOLEAN]) AS extent],INITIAL,[ + * $$extent$minNegX{r}#30, $$extent$minPosX{r}#31, $$extent$maxNegX{r}#32, + * $$extent$maxPosX{r}#33, $$extent$maxY{r}#34, $$extent$minY{r}#35],200] + * \_FieldExtractExec[city_boundary{f}#10][],[city_boundary{f}#10] + * \_EsQueryExec[airports_city_boundaries], indexMode[standard], query[ + * {"exists":{"field":"city_boundary","boost":1.0}} + * ][_doc{f}#36], limit[], sort[] estimatedRowSize[204] * */ - public void testSpatialTypesAndStatsExtentOfShapesUsesBinaryExtraction() { + public void testSpatialTypesAndStatsExtentOfGeoShapeDoesNotUseBinaryExtraction() { + // TODO: When we get geo_shape working with bounds extraction from doc-values, change the name of this test var query = "FROM airports_city_boundaries | STATS extent = ST_EXTENT_AGG(city_boundary)"; var testData = airportsCityBoundaries; var plan = physicalPlan(query, testData); @@ -3042,9 +3035,7 @@ public void testSpatialTypesAndStatsExtentOfShapesUsesBinaryExtraction() { as(fAgg.child(), EsRelation.class); // Now optimize the plan and assert the aggregation uses extent extraction - System.out.println(plan); var optimized = optimizedPlan(plan, testData.stats); - System.out.println(optimized); limit = as(optimized, LimitExec.class); agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values @@ -3056,14 +3047,18 @@ public void testSpatialTypesAndStatsExtentOfShapesUsesBinaryExtraction() { assertChildIsExtractedAs(agg, FieldExtractPreference.EXTRACT_SPATIAL_BOUNDS, GEO_SHAPE); } - // This test verifies that the aggregation does not use spatial bounds extraction when the shape appears in an eval or filter. + /** + * This test verifies that the aggregation does not use spatial bounds extraction when the shape appears in an eval or filter. + * TODO: Currently this tests nothing, because geo_shape is not supported anyway for bounds extraction, + * but it should be updated when it is supported. + */ public void testSpatialTypesAndStatsExtentOfShapesNegativeCases() { for (String query : new String[] { """ - FROM airports_city_boundaries | \ - EVAL prefix = SUBSTRING(TO_STRING(city_boundary), 5) | \ - STATS extent = ST_EXTENT_AGG(city_boundary) BY prefix""", """ - FROM airports_city_boundaries \ - | WHERE STARTS_WITH(TO_STRING(city_boundary), "MULTIPOLYGON") \ + FROM airports_city_boundaries + | EVAL prefix = SUBSTRING(TO_STRING(city_boundary), 5) + | STATS extent = ST_EXTENT_AGG(city_boundary) BY prefix""", """ + FROM airports_city_boundaries + | WHERE STARTS_WITH(TO_STRING(city_boundary), "MULTIPOLYGON") | STATS extent = ST_EXTENT_AGG(city_boundary)""" }) { var testData = airportsCityBoundaries; var plan = physicalPlan(query, testData); @@ -3084,27 +3079,32 @@ public void testSpatialTypesAndStatsExtentOfShapesNegativeCases() { } } - // This test verifies that the aggregation does not use spatial bounds extraction when the shape appears in an eval or filter. - public void testSpatialTypesAndStatsExtentOfShapesNegativeCaseNoDocValues() { - var query = """ - FROM cartesian_multipolygons_no_doc_values \ - | STATS extent = ST_EXTENT_AGG(shape)"""; - var testData = cartesianMultipolygonsNoDocValues; - var plan = physicalPlan(query, testData); + /** + * Test cartesian_shape bounds extraction occurs when the shape has doc-values and not otherwise. + */ + public void testSpatialTypesAndStatsExtentOfCartesianShapesWithAndWithoutDocValues() { + for (boolean hasDocValues : new boolean[] { true, false }) { + var query = """ + FROM cartesian_multipolygons \ + | STATS extent = ST_EXTENT_AGG(shape)"""; + var testData = hasDocValues ? cartesianMultipolygons : cartesianMultipolygonsNoDocValues; + var fieldExtractPreference = hasDocValues ? FieldExtractPreference.EXTRACT_SPATIAL_BOUNDS : FieldExtractPreference.NONE; + var plan = physicalPlan(query, testData); - var limit = as(plan, LimitExec.class); - var agg = as(limit.child(), AggregateExec.class); - assertAggregation(agg, "extent", SpatialExtent.class, CARTESIAN_SHAPE, FieldExtractPreference.NONE); + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + assertAggregation(agg, "extent", SpatialExtent.class, CARTESIAN_SHAPE, FieldExtractPreference.NONE); - var optimized = optimizedPlan(plan, testData.stats); - limit = as(optimized, LimitExec.class); - agg = as(limit.child(), AggregateExec.class); - assertAggregation(agg, "extent", SpatialExtent.class, CARTESIAN_SHAPE, FieldExtractPreference.NONE); - var exchange = as(agg.child(), ExchangeExec.class); - agg = as(exchange.child(), AggregateExec.class); - assertAggregation(agg, "extent", SpatialExtent.class, CARTESIAN_SHAPE, FieldExtractPreference.NONE); - var exec = agg.child() instanceof FieldExtractExec ? agg : as(agg.child(), UnaryExec.class); - assertChildIsExtractedAs(exec, FieldExtractPreference.NONE, CARTESIAN_SHAPE); + var optimized = optimizedPlan(plan, testData.stats); + limit = as(optimized, LimitExec.class); + agg = as(limit.child(), AggregateExec.class); + assertAggregation(agg, "extent", SpatialExtent.class, CARTESIAN_SHAPE, FieldExtractPreference.NONE); + var exchange = as(agg.child(), ExchangeExec.class); + agg = as(exchange.child(), AggregateExec.class); + assertAggregation(agg, "extent", "hasDocValues:" + hasDocValues, SpatialExtent.class, CARTESIAN_SHAPE, fieldExtractPreference); + var exec = agg.child() instanceof FieldExtractExec ? agg : as(agg.child(), UnaryExec.class); + assertChildIsExtractedAs(exec, FieldExtractPreference.NONE, CARTESIAN_SHAPE); + } } /** @@ -7440,12 +7440,23 @@ private static void assertAggregation( Class aggClass, DataType fieldType, FieldExtractPreference fieldExtractPreference + ) { + assertAggregation(plan, aliasName, "Aggregation with fieldExtractPreference", aggClass, fieldType, fieldExtractPreference); + } + + private static void assertAggregation( + PhysicalPlan plan, + String aliasName, + String reason, + Class aggClass, + DataType fieldType, + FieldExtractPreference fieldExtractPreference ) { var aggFunc = assertAggregation(plan, aliasName, aggClass); var aggField = as(aggFunc.field(), Attribute.class); var spatialAgg = as(aggFunc, SpatialAggregateFunction.class); assertThat(spatialAgg.fieldExtractPreference(), equalTo(fieldExtractPreference)); - assertThat("", aggField.dataType(), equalTo(fieldType)); + assertThat(reason, aggField.dataType(), equalTo(fieldType)); } private static AggregateFunction assertAggregation(PhysicalPlan plan, String aliasName, Class aggClass) { diff --git a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java index 224abd2002455..67d25556a2aa7 100644 --- a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java +++ b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java @@ -208,6 +208,7 @@ public GeoShapeWithDocValuesFieldMapper build(MapperBuilderContext context) { } public static final class GeoShapeWithDocValuesFieldType extends AbstractShapeGeometryFieldType implements GeoShapeQueryable { + private final GeoFormatterFactory geoFormatterFactory; private final FieldValues scriptValues; From ebfd723e816f2ce218219bfb2e6875ea45732c6d Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Fri, 20 Dec 2024 13:46:30 +0100 Subject: [PATCH 17/18] Fixed the test that was testing the wrong thing --- .../esql/optimizer/PhysicalPlanOptimizerTests.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java index 8d2067f7ccffe..5ab45c8c5f383 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java @@ -3100,12 +3100,22 @@ public void testSpatialTypesAndStatsExtentOfCartesianShapesWithAndWithoutDocValu var optimized = optimizedPlan(plan, testData.stats); limit = as(optimized, LimitExec.class); agg = as(limit.child(), AggregateExec.class); + // For cartesian_shape extraction, we extract bounds from doc-values directly into a BBOX encoded as BytesRef, + // so the aggregation does not need to know about it. assertAggregation(agg, "extent", SpatialExtent.class, CARTESIAN_SHAPE, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); - assertAggregation(agg, "extent", "hasDocValues:" + hasDocValues, SpatialExtent.class, CARTESIAN_SHAPE, fieldExtractPreference); + assertAggregation( + agg, + "extent", + "hasDocValues:" + hasDocValues, + SpatialExtent.class, + CARTESIAN_SHAPE, + FieldExtractPreference.NONE + ); var exec = agg.child() instanceof FieldExtractExec ? agg : as(agg.child(), UnaryExec.class); - assertChildIsExtractedAs(exec, FieldExtractPreference.NONE, CARTESIAN_SHAPE); + // For cartesian_shape, the bounds extraction is done in the FieldExtractExec, so it does need to know about this + assertChildIsExtractedAs(exec, fieldExtractPreference, CARTESIAN_SHAPE); } } From a9d602790ac419350d1bc0a7579abd34acb5a880 Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Fri, 20 Dec 2024 14:47:46 +0100 Subject: [PATCH 18/18] Added bounds extracting with grouping tests and fixed bug with multiple extractions When there are multiple FieldExtractExec nodes, it is incorrect to allow the modification of the foundAttributes, but should take a copy, and modify the copy. --- .../src/main/resources/spatial.csv-spec | 32 +++++++++++++++++++ .../local/SpatialShapeBoundsExtraction.java | 7 ++-- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec index a7aae79ee496d..689c36197ee70 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec @@ -1797,6 +1797,22 @@ extent:cartesian_shape BBOX (0.0, 3.0, 3.0, 0.0) ; +stExtentCartesianShapesGrouping +required_capability: st_extent_agg + +FROM cartesian_multipolygons +| EVAL key = SUBSTRING(name,1,3) +| STATS extent = ST_EXTENT_AGG(shape), count = COUNT() BY key +| KEEP count, key, extent +| SORT count DESC, key ASC +; + +count:long | key:keyword | extent:cartesian_shape +8 | Bot | BBOX (0.0, 3.0, 1.0, 0.0) +8 | Top | BBOX (0.0, 3.0, 3.0, 2.0) +4 | Fou | BBOX (0.0, 3.0, 3.0, 0.0) +; + stExtentCartesianShapesNoDocValues required_capability: st_extent_agg FROM cartesian_multipolygons_no_doc_values | STATS extent = ST_EXTENT_AGG(shape) @@ -1806,6 +1822,22 @@ extent:cartesian_shape BBOX (0.0, 3.0, 3.0, 0.0) ; +stExtentCartesianShapesGroupingNoDocValues +required_capability: st_extent_agg + +FROM cartesian_multipolygons_no_doc_values +| EVAL key = SUBSTRING(name,1,3) +| STATS extent = ST_EXTENT_AGG(shape), count = COUNT() BY key +| KEEP count, key, extent +| SORT count DESC, key ASC +; + +count:long | key:keyword | extent:cartesian_shape +8 | Bot | BBOX (0.0, 3.0, 1.0, 0.0) +8 | Top | BBOX (0.0, 3.0, 3.0, 2.0) +4 | Fou | BBOX (0.0, 3.0, 3.0, 0.0) +; + ############################################### # Tests for ST_INTERSECTS on CARTESIAN_POINT type diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundsExtraction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundsExtraction.java index e4bf966acee63..f6f087064a02f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundsExtraction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundsExtraction.java @@ -85,8 +85,11 @@ protected PhysicalPlan rule(AggregateExec aggregate, LocalPhysicalOptimizerConte case EvalExec evalExec -> foundAttributes.removeAll(evalExec.references()); case FilterExec filterExec -> foundAttributes.removeAll(filterExec.condition().references()); case FieldExtractExec fieldExtractExec -> { - foundAttributes.retainAll(fieldExtractExec.attributesToExtract()); - exec = fieldExtractExec.withBoundsAttributes(foundAttributes); + var boundsAttributes = new HashSet<>(foundAttributes); + boundsAttributes.retainAll(fieldExtractExec.attributesToExtract()); + if (boundsAttributes.isEmpty() == false) { + exec = fieldExtractExec.withBoundsAttributes(boundsAttributes); + } } default -> { // Do nothing }