Skip to content

Commit

Permalink
ESQL: ST_EXTENT_AGG optimize envelope extraction from doc-values for …
Browse files Browse the repository at this point in the history
…Cartesian_shape (elastic#118802)

When we cartesian shapes to Lucene, we encode additional information in the binary, including the extent, so we can read the extent directly from the binary encoding instead of (re-)computing it per shape, and replace the (possibly very complicated) shape with a rectangle. At the moment, this is only done for Cartesian shapes, since for Geo shapes, we need to take dateline wrapping into account, which means it can't be directly encoded as a rectangle. We will deal with geo shapes in a future PR.
  • Loading branch information
GalLalouche authored Dec 20, 2024
1 parent 3ab612f commit ef6372a
Show file tree
Hide file tree
Showing 28 changed files with 872 additions and 231 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/118802.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 118802
summary: ST_EXTENT_AGG optimize envelope extraction from doc-values for cartesian_shape
area: "ES|QL"
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import org.elasticsearch.legacygeo.builders.ShapeBuilder;
import org.elasticsearch.legacygeo.parsers.ShapeParser;
import org.elasticsearch.legacygeo.query.LegacyGeoShapeQueryProcessor;
import org.elasticsearch.lucene.spatial.CoordinateEncoder;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser;
import org.locationtech.spatial4j.shape.Point;
Expand Down Expand Up @@ -530,6 +531,17 @@ public PrefixTreeStrategy resolvePrefixTreeStrategy(String strategyName) {
protected Function<List<ShapeBuilder<?, ?, ?>>, List<Object>> getFormatter(String format) {
return GeometryFormatterFactory.getFormatter(format, ShapeBuilder::buildGeometry);
}

@Override
protected boolean isBoundsExtractionSupported() {
// Extracting bounds for geo shapes is not implemented yet.
return false;
}

@Override
protected CoordinateEncoder coordinateEncoder() {
return CoordinateEncoder.GEO;
}
}

private final IndexVersion indexCreatedVersion;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,18 @@
*/
package org.elasticsearch.index.mapper;

import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.geo.Orientation;
import org.elasticsearch.geometry.Rectangle;
import org.elasticsearch.geometry.utils.WellKnownBinary;
import org.elasticsearch.lucene.spatial.CoordinateEncoder;
import org.elasticsearch.lucene.spatial.GeometryDocValueReader;

import java.io.IOException;
import java.nio.ByteOrder;
import java.util.Map;
import java.util.function.Function;

Expand Down Expand Up @@ -69,6 +78,79 @@ protected Object nullValueAsSource(T nullValue) {
// we don't support null value fors shapes
return nullValue;
}

@Override
public BlockLoader blockLoader(BlockLoaderContext blContext) {
return blContext.fieldExtractPreference() == FieldExtractPreference.EXTRACT_SPATIAL_BOUNDS && isBoundsExtractionSupported()
? new BoundsBlockLoader(name(), coordinateEncoder())
: blockLoaderFromSource(blContext);
}

protected abstract boolean isBoundsExtractionSupported();

protected abstract CoordinateEncoder coordinateEncoder();

// Visible for testing
static class BoundsBlockLoader extends BlockDocValuesReader.DocValuesBlockLoader {
private final String fieldName;
private final CoordinateEncoder encoder;

BoundsBlockLoader(String fieldName, CoordinateEncoder encoder) {
this.fieldName = fieldName;
this.encoder = encoder;
}

@Override
public BlockLoader.AllReader reader(LeafReaderContext context) throws IOException {
return new BlockLoader.AllReader() {
@Override
public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs) throws IOException {
var binaryDocValues = context.reader().getBinaryDocValues(fieldName);
var reader = new GeometryDocValueReader();
try (var builder = factory.bytesRefs(docs.count())) {
for (int i = 0; i < docs.count(); i++) {
read(binaryDocValues, docs.get(i), reader, builder);
}
return builder.build();
}
}

@Override
public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException {
var binaryDocValues = context.reader().getBinaryDocValues(fieldName);
var reader = new GeometryDocValueReader();
read(binaryDocValues, docId, reader, (BytesRefBuilder) builder);
}

private void read(BinaryDocValues binaryDocValues, int doc, GeometryDocValueReader reader, BytesRefBuilder builder)
throws IOException {
binaryDocValues.advanceExact(doc);
reader.reset(binaryDocValues.binaryValue());
var extent = reader.getExtent();
// This is rather silly: an extent is already encoded as ints, but we convert it to Rectangle to
// preserve its properties as a WKB shape, only to convert it back to ints when we compute the
// aggregation. An obvious optimization would be to avoid this back-and-forth conversion.
var rectangle = new Rectangle(
encoder.decodeX(extent.minX()),
encoder.decodeX(extent.maxX()),
encoder.decodeY(extent.maxY()),
encoder.decodeY(extent.minY())
);
builder.appendBytesRef(new BytesRef(WellKnownBinary.toWKB(rectangle, ByteOrder.LITTLE_ENDIAN)));
}

@Override
public boolean canReuse(int startingDocID) {
return true;
}
};
}

@Override
public BlockLoader.Builder builder(BlockLoader.BlockFactory factory, int expectedCount) {
return factory.bytesRefs(expectedCount);
}
}
}

protected Explicit<Boolean> coerce;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -677,10 +677,15 @@ public enum FieldExtractPreference {
* Load the field from doc-values into a BlockLoader supporting doc-values.
*/
DOC_VALUES,
/**
* Loads the field by extracting the extent from the binary encoded representation
*/
EXTRACT_SPATIAL_BOUNDS,
/**
* No preference. Leave the choice of where to load the field from up to the FieldType.
*/
NONE
NONE;

}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.mapper;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.geo.Orientation;
import org.elasticsearch.geo.GeometryTestUtils;
import org.elasticsearch.geo.ShapeTestUtils;
import org.elasticsearch.geometry.Geometry;
import org.elasticsearch.geometry.Rectangle;
import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor;
import org.elasticsearch.lucene.spatial.BinaryShapeDocValuesField;
import org.elasticsearch.lucene.spatial.CartesianShapeIndexer;
import org.elasticsearch.lucene.spatial.CoordinateEncoder;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.hamcrest.RectangleMatcher;
import org.elasticsearch.test.hamcrest.WellKnownBinaryBytesRefMatcher;

import java.io.IOException;
import java.util.Optional;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.IntStream;

public class AbstractShapeGeometryFieldMapperTests extends ESTestCase {
public void testCartesianBoundsBlockLoader() throws IOException {
testBoundsBlockLoaderAux(
CoordinateEncoder.CARTESIAN,
() -> ShapeTestUtils.randomGeometryWithoutCircle(0, false),
CartesianShapeIndexer::new,
SpatialEnvelopeVisitor::visitCartesian
);
}

// TODO when we turn this optimization on for geo, this test should pass.
public void ignoreTestGeoBoundsBlockLoader() throws IOException {
testBoundsBlockLoaderAux(
CoordinateEncoder.GEO,
() -> GeometryTestUtils.randomGeometryWithoutCircle(0, false),
field -> new GeoShapeIndexer(Orientation.RIGHT, field),
g -> SpatialEnvelopeVisitor.visitGeo(g, SpatialEnvelopeVisitor.WrapLongitude.WRAP)
);
}

private void testBoundsBlockLoaderAux(
CoordinateEncoder encoder,
Supplier<Geometry> generator,
Function<String, ShapeIndexer> indexerFactory,
Function<Geometry, Optional<Rectangle>> visitor
) throws IOException {
var geometries = IntStream.range(0, 20).mapToObj(i -> generator.get()).toList();
var loader = new AbstractShapeGeometryFieldMapper.AbstractShapeGeometryFieldType.BoundsBlockLoader("field", encoder);
try (Directory directory = newDirectory()) {
try (var iw = new RandomIndexWriter(random(), directory)) {
for (Geometry geometry : geometries) {
var shape = new BinaryShapeDocValuesField("field", encoder);
shape.add(indexerFactory.apply("field").indexShape(geometry), geometry);
var doc = new Document();
doc.add(shape);
iw.addDocument(doc);
}
}
var indices = IntStream.range(0, geometries.size() / 2).map(x -> x * 2).toArray();
try (DirectoryReader reader = DirectoryReader.open(directory)) {
LeafReaderContext ctx = reader.leaves().get(0);
TestBlock block = (TestBlock) loader.reader(ctx).read(TestBlock.factory(ctx.reader().numDocs()), TestBlock.docs(indices));
for (int i = 0; i < indices.length; i++) {
var idx = indices[i];
var geometry = geometries.get(idx);
var geoString = geometry.toString();
var geometryString = geoString.length() > 200 ? geoString.substring(0, 200) + "..." : geoString;
Rectangle r = visitor.apply(geometry).get();
assertThat(
Strings.format("geometries[%d] ('%s') wasn't extracted correctly", idx, geometryString),
(BytesRef) block.get(i),
WellKnownBinaryBytesRefMatcher.encodes(RectangleMatcher.closeToFloat(r, 1e-3, encoder))
);
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import org.elasticsearch.index.fielddata.LeafFieldData;
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
import org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.index.termvectors.TermVectorsService;
import org.elasticsearch.index.translog.Translog;
Expand Down Expand Up @@ -87,8 +88,6 @@
import java.util.stream.IntStream;

import static java.util.stream.Collectors.toList;
import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.DOC_VALUES;
import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.NONE;
import static org.elasticsearch.test.MapMatcher.assertMap;
import static org.hamcrest.Matchers.anyOf;
import static org.hamcrest.Matchers.contains;
Expand Down Expand Up @@ -1420,7 +1419,7 @@ public BlockReaderSupport(boolean columnAtATimeReader, MapperService mapper, Str
this(columnAtATimeReader, true, mapper, loaderFieldName);
}

private BlockLoader getBlockLoader(boolean columnReader) {
private BlockLoader getBlockLoader(FieldExtractPreference fieldExtractPreference) {
SearchLookup searchLookup = new SearchLookup(mapper.mappingLookup().fieldTypesLookup()::get, null, null);
return mapper.fieldType(loaderFieldName).blockLoader(new MappedFieldType.BlockLoaderContext() {
@Override
Expand All @@ -1434,8 +1433,8 @@ public IndexSettings indexSettings() {
}

@Override
public MappedFieldType.FieldExtractPreference fieldExtractPreference() {
return columnReader ? DOC_VALUES : NONE;
public FieldExtractPreference fieldExtractPreference() {
return fieldExtractPreference;
}

@Override
Expand Down Expand Up @@ -1493,7 +1492,9 @@ protected final void testBlockLoader(
BlockReaderSupport blockReaderSupport,
SourceLoader sourceLoader
) throws IOException {
BlockLoader loader = blockReaderSupport.getBlockLoader(columnReader);
// EXTRACT_SPATIAL_BOUNDS is not currently supported in this test path.
var fieldExtractPreference = columnReader ? FieldExtractPreference.DOC_VALUES : FieldExtractPreference.NONE;
BlockLoader loader = blockReaderSupport.getBlockLoader(fieldExtractPreference);
Function<Object, Object> valuesConvert = loadBlockExpected(blockReaderSupport, columnReader);
if (valuesConvert == null) {
assertNull(loader);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.xpack.esql.expression;
package org.elasticsearch.test.hamcrest;

import org.elasticsearch.compute.aggregation.spatial.PointType;
import org.elasticsearch.geometry.Rectangle;
import org.elasticsearch.lucene.spatial.CoordinateEncoder;
import org.hamcrest.Description;
import org.hamcrest.Matchers;
import org.hamcrest.TypeSafeMatcher;
Expand All @@ -19,23 +21,31 @@
*/
public class RectangleMatcher extends TypeSafeMatcher<Rectangle> {
private final Rectangle r;
private final PointType pointType;
private final CoordinateEncoder coordinateEncoder;
private final double error;

public static TypeSafeMatcher<Rectangle> closeTo(Rectangle r, double error, PointType pointType) {
return new RectangleMatcher(r, error, pointType);
public static TypeSafeMatcher<Rectangle> closeTo(Rectangle r, double error, CoordinateEncoder coordinateEncoder) {
return new RectangleMatcher(r, error, coordinateEncoder);
}

private RectangleMatcher(Rectangle r, double error, PointType pointType) {
private RectangleMatcher(Rectangle r, double error, CoordinateEncoder coordinateEncoder) {
this.r = r;
this.pointType = pointType;
this.coordinateEncoder = coordinateEncoder;
this.error = error;
}

/**
* Casts the rectangle coordinates to floats before comparing. Useful when working with extents which hold the coordinate data as ints.
*/
public static TypeSafeMatcher<Rectangle> closeToFloat(Rectangle r, double v, CoordinateEncoder encoder) {
var normalized = new Rectangle((float) r.getMinX(), (float) r.getMaxX(), (float) r.getMaxY(), (float) r.getMinY());
return closeTo(normalized, v, encoder);
}

@Override
protected boolean matchesSafely(Rectangle other) {
// For geo bounds, longitude of (-180, 180) and (epsilon, -epsilon) are actually very close, since both encompass the entire globe.
boolean wrapAroundWorkAround = pointType == PointType.GEO && r.getMinX() >= r.getMaxX();
boolean wrapAroundWorkAround = coordinateEncoder == CoordinateEncoder.GEO && r.getMinX() >= r.getMaxX();
boolean matchMinX = Matchers.closeTo(r.getMinX(), error).matches(other.getMinX())
|| (wrapAroundWorkAround && Matchers.closeTo(r.getMinX() - 180, error).matches(other.getMinX()))
|| (wrapAroundWorkAround && Matchers.closeTo(r.getMinX(), error).matches(other.getMinX() - 180));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.xpack.esql.expression;
package org.elasticsearch.test.hamcrest;

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.geometry.Geometry;
Expand All @@ -23,6 +25,10 @@ public WellKnownBinaryBytesRefMatcher(Matcher<G> matcher) {
this.matcher = matcher;
}

public static <G extends Geometry> Matcher<BytesRef> encodes(TypeSafeMatcher<G> matcher) {
return new WellKnownBinaryBytesRefMatcher<G>(matcher);
}

@Override
public boolean matchesSafely(BytesRef bytesRef) {
return matcher.matches(fromBytesRef(bytesRef));
Expand Down
Loading

0 comments on commit ef6372a

Please sign in to comment.