From d187fcb9de218c634c827f44fdc44947eec198ae Mon Sep 17 00:00:00 2001 From: Nikita Glashenko Date: Fri, 12 Jul 2019 20:44:59 +0500 Subject: [PATCH] Support WKT point conversion to geo_point type (#44107) This PR adds support for parsing geo_point values from WKT POINT format. Also, a few minor bugs in geo_point parsing were fixed. Closes #41821 --- .../mapping/types/geo-point.asciidoc | 14 ++++- .../elasticsearch/common/geo/GeoPoint.java | 51 +++++++++++++++-- .../elasticsearch/common/geo/GeoUtils.java | 40 ++------------ .../index/mapper/GeoPointFieldMapper.java | 55 +++++-------------- .../mapper/GeoPointFieldMapperTests.java | 17 ++++++ .../search/geo/GeoPointParsingTests.java | 16 ++++++ 6 files changed, 110 insertions(+), 83 deletions(-) diff --git a/docs/reference/mapping/types/geo-point.asciidoc b/docs/reference/mapping/types/geo-point.asciidoc index 51e137fbc33b6..f890cf92f89d2 100644 --- a/docs/reference/mapping/types/geo-point.asciidoc +++ b/docs/reference/mapping/types/geo-point.asciidoc @@ -11,7 +11,7 @@ Fields of type `geo_point` accept latitude-longitude pairs, which can be used: * to integrate distance into a document's <>. * to <> documents by distance. -There are four ways that a geo-point may be specified, as demonstrated below: +There are five ways that a geo-point may be specified, as demonstrated below: [source,js] -------------------------------------------------- @@ -53,10 +53,16 @@ PUT my_index/_doc/4 "location": [ -71.34, 41.12 ] <4> } +PUT my_index/_doc/5 +{ + "text": "Geo-point as a WKT POINT primitive", + "location" : "POINT (-71.34 41.12)" <5> +} + GET my_index/_search { "query": { - "geo_bounding_box": { <5> + "geo_bounding_box": { <6> "location": { "top_left": { "lat": 42, @@ -76,7 +82,9 @@ GET my_index/_search <2> Geo-point expressed as a string with the format: `"lat,lon"`. <3> Geo-point expressed as a geohash. <4> Geo-point expressed as an array with the format: [ `lon`, `lat`] -<5> A geo-bounding box query which finds all geo-points that fall inside the box. +<5> Geo-point expressed as a http://docs.opengeospatial.org/is/12-063r5/12-063r5.html[Well-Known Text] +POINT with the format: `"POINT(lon lat)"` +<6> A geo-bounding box query which finds all geo-points that fall inside the box. [IMPORTANT] .Geo-points expressed as an array or string diff --git a/server/src/main/java/org/elasticsearch/common/geo/GeoPoint.java b/server/src/main/java/org/elasticsearch/common/geo/GeoPoint.java index b4039fdbd2825..1043c9115e8ef 100644 --- a/server/src/main/java/org/elasticsearch/common/geo/GeoPoint.java +++ b/server/src/main/java/org/elasticsearch/common/geo/GeoPoint.java @@ -25,13 +25,21 @@ import org.apache.lucene.index.IndexableField; import org.apache.lucene.util.BitUtil; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.geo.GeoUtils.EffectivePoint; import org.elasticsearch.common.xcontent.ToXContentFragment; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.geo.geometry.Geometry; +import org.elasticsearch.geo.geometry.Point; +import org.elasticsearch.geo.geometry.Rectangle; +import org.elasticsearch.geo.geometry.ShapeType; +import org.elasticsearch.geo.utils.GeographyValidator; import org.elasticsearch.geo.utils.Geohash; +import org.elasticsearch.geo.utils.WellKnownText; import java.io.IOException; import java.util.Arrays; +import java.util.Locale; import static org.elasticsearch.index.mapper.GeoPointFieldMapper.Names.IGNORE_Z_VALUE; @@ -79,14 +87,16 @@ public GeoPoint resetLon(double lon) { } public GeoPoint resetFromString(String value) { - return resetFromString(value, false); + return resetFromString(value, false, EffectivePoint.BOTTOM_LEFT); } - public GeoPoint resetFromString(String value, final boolean ignoreZValue) { - if (value.contains(",")) { + public GeoPoint resetFromString(String value, final boolean ignoreZValue, EffectivePoint effectivePoint) { + if (value.toLowerCase(Locale.ROOT).contains("point")) { + return resetFromWKT(value, ignoreZValue); + } else if (value.contains(",")) { return resetFromCoordinates(value, ignoreZValue); } - return resetFromGeoHash(value); + return parseGeoHash(value, effectivePoint); } @@ -114,6 +124,39 @@ public GeoPoint resetFromCoordinates(String value, final boolean ignoreZValue) { return reset(lat, lon); } + private GeoPoint resetFromWKT(String value, boolean ignoreZValue) { + Geometry geometry; + try { + geometry = new WellKnownText(false, new GeographyValidator(ignoreZValue)) + .fromWKT(value); + } catch (Exception e) { + throw new ElasticsearchParseException("Invalid WKT format", e); + } + if (geometry.type() != ShapeType.POINT) { + throw new ElasticsearchParseException("[geo_point] supports only POINT among WKT primitives, " + + "but found " + geometry.type()); + } + Point point = (Point) geometry; + return reset(point.getLat(), point.getLon()); + } + + GeoPoint parseGeoHash(String geohash, EffectivePoint effectivePoint) { + if (effectivePoint == EffectivePoint.BOTTOM_LEFT) { + return resetFromGeoHash(geohash); + } else { + Rectangle rectangle = Geohash.toBoundingBox(geohash); + switch (effectivePoint) { + case TOP_LEFT: + return reset(rectangle.getMaxLat(), rectangle.getMinLon()); + case TOP_RIGHT: + return reset(rectangle.getMaxLat(), rectangle.getMaxLon()); + case BOTTOM_RIGHT: + return reset(rectangle.getMinLat(), rectangle.getMaxLon()); + default: + throw new IllegalArgumentException("Unsupported effective point " + effectivePoint); + } + } + } public GeoPoint resetFromIndexHash(long hash) { lon = Geohash.decodeLongitude(hash); diff --git a/server/src/main/java/org/elasticsearch/common/geo/GeoUtils.java b/server/src/main/java/org/elasticsearch/common/geo/GeoUtils.java index 33e4140a08e13..7e7cbac051f60 100644 --- a/server/src/main/java/org/elasticsearch/common/geo/GeoUtils.java +++ b/server/src/main/java/org/elasticsearch/common/geo/GeoUtils.java @@ -31,8 +31,6 @@ import org.elasticsearch.common.xcontent.XContentSubParser; import org.elasticsearch.common.xcontent.support.MapXContentParser; import org.elasticsearch.common.xcontent.support.XContentMapValues; -import org.elasticsearch.geo.geometry.Rectangle; -import org.elasticsearch.geo.utils.Geohash; import org.elasticsearch.index.fielddata.FieldData; import org.elasticsearch.index.fielddata.GeoPointValues; import org.elasticsearch.index.fielddata.MultiGeoPointValues; @@ -476,7 +474,7 @@ public static GeoPoint parseGeoPoint(XContentParser parser, GeoPoint point, fina if(!Double.isNaN(lat) || !Double.isNaN(lon)) { throw new ElasticsearchParseException("field must be either lat/lon or geohash"); } else { - return parseGeoHash(point, geohash, effectivePoint); + return point.parseGeoHash(geohash, effectivePoint); } } else if (numberFormatException != null) { throw new ElasticsearchParseException("[{}] and [{}] must be valid double values", numberFormatException, LATITUDE, @@ -499,8 +497,10 @@ public static GeoPoint parseGeoPoint(XContentParser parser, GeoPoint point, fina lon = subParser.doubleValue(); } else if (element == 2) { lat = subParser.doubleValue(); - } else { + } else if (element == 3) { GeoPoint.assertZValue(ignoreZValue, subParser.doubleValue()); + } else { + throw new ElasticsearchParseException("[geo_point] field type does not accept > 3 dimensions"); } } else { throw new ElasticsearchParseException("numeric value expected"); @@ -510,35 +510,12 @@ public static GeoPoint parseGeoPoint(XContentParser parser, GeoPoint point, fina return point.reset(lat, lon); } else if(parser.currentToken() == Token.VALUE_STRING) { String val = parser.text(); - if (val.contains(",")) { - return point.resetFromString(val, ignoreZValue); - } else { - return parseGeoHash(point, val, effectivePoint); - } - + return point.resetFromString(val, ignoreZValue, effectivePoint); } else { throw new ElasticsearchParseException("geo_point expected"); } } - private static GeoPoint parseGeoHash(GeoPoint point, String geohash, EffectivePoint effectivePoint) { - if (effectivePoint == EffectivePoint.BOTTOM_LEFT) { - return point.resetFromGeoHash(geohash); - } else { - Rectangle rectangle = Geohash.toBoundingBox(geohash); - switch (effectivePoint) { - case TOP_LEFT: - return point.reset(rectangle.getMaxLat(), rectangle.getMinLon()); - case TOP_RIGHT: - return point.reset(rectangle.getMaxLat(), rectangle.getMaxLon()); - case BOTTOM_RIGHT: - return point.reset(rectangle.getMinLat(), rectangle.getMaxLon()); - default: - throw new IllegalArgumentException("Unsupported effective point " + effectivePoint); - } - } - } - /** * Parse a {@link GeoPoint} from a string. The string must have one of the following forms: * @@ -552,12 +529,7 @@ private static GeoPoint parseGeoHash(GeoPoint point, String geohash, EffectivePo */ public static GeoPoint parseFromString(String val) { GeoPoint point = new GeoPoint(); - boolean ignoreZValue = false; - if (val.contains(",")) { - return point.resetFromString(val, ignoreZValue); - } else { - return parseGeoHash(point, val, EffectivePoint.BOTTOM_LEFT); - } + return point.resetFromString(val, false, EffectivePoint.BOTTOM_LEFT); } /** diff --git a/server/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java index 1621f60b9b784..bc3a4b08e273e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java @@ -301,38 +301,23 @@ public void parse(ParseContext context) throws IOException { XContentParser.Token token = context.parser().currentToken(); if (token == XContentParser.Token.START_ARRAY) { token = context.parser().nextToken(); - if (token == XContentParser.Token.START_ARRAY) { - // its an array of array of lon/lat [ [1.2, 1.3], [1.4, 1.5] ] - while (token != XContentParser.Token.END_ARRAY) { - parseGeoPointIgnoringMalformed(context, sparse); - token = context.parser().nextToken(); + if (token == XContentParser.Token.VALUE_NUMBER) { + double lon = context.parser().doubleValue(); + context.parser().nextToken(); + double lat = context.parser().doubleValue(); + token = context.parser().nextToken(); + if (token == XContentParser.Token.VALUE_NUMBER) { + GeoPoint.assertZValue(ignoreZValue.value(), context.parser().doubleValue()); + } else if (token != XContentParser.Token.END_ARRAY) { + throw new ElasticsearchParseException("[{}] field type does not accept > 3 dimensions", CONTENT_TYPE); } + parse(context, sparse.reset(lat, lon)); } else { - // its an array of other possible values - if (token == XContentParser.Token.VALUE_NUMBER) { - double lon = context.parser().doubleValue(); - context.parser().nextToken(); - double lat = context.parser().doubleValue(); + while (token != XContentParser.Token.END_ARRAY) { + parseGeoPointIgnoringMalformed(context, sparse); token = context.parser().nextToken(); - if (token == XContentParser.Token.VALUE_NUMBER) { - GeoPoint.assertZValue(ignoreZValue.value(), context.parser().doubleValue()); - } else if (token != XContentParser.Token.END_ARRAY) { - throw new ElasticsearchParseException("[{}] field type does not accept > 3 dimensions", CONTENT_TYPE); - } - parse(context, sparse.reset(lat, lon)); - } else { - while (token != XContentParser.Token.END_ARRAY) { - if (token == XContentParser.Token.VALUE_STRING) { - parseGeoPointStringIgnoringMalformed(context, sparse); - } else { - parseGeoPointIgnoringMalformed(context, sparse); - } - token = context.parser().nextToken(); - } } } - } else if (token == XContentParser.Token.VALUE_STRING) { - parseGeoPointStringIgnoringMalformed(context, sparse); } else if (token == XContentParser.Token.VALUE_NULL) { if (fieldType.nullValue() != null) { parse(context, (GeoPoint) fieldType.nullValue()); @@ -353,21 +338,7 @@ public void parse(ParseContext context) throws IOException { */ private void parseGeoPointIgnoringMalformed(ParseContext context, GeoPoint sparse) throws IOException { try { - parse(context, GeoUtils.parseGeoPoint(context.parser(), sparse)); - } catch (ElasticsearchParseException e) { - if (ignoreMalformed.value() == false) { - throw e; - } - context.addIgnoredField(fieldType.name()); - } - } - - /** - * Parses geopoint represented as a string and ignores malformed geopoints if needed - */ - private void parseGeoPointStringIgnoringMalformed(ParseContext context, GeoPoint sparse) throws IOException { - try { - parse(context, sparse.resetFromString(context.parser().text(), ignoreZValue.value())); + parse(context, GeoUtils.parseGeoPoint(context.parser(), sparse, ignoreZValue.value())); } catch (ElasticsearchParseException e) { if (ignoreMalformed.value() == false) { throw e; diff --git a/server/src/test/java/org/elasticsearch/index/mapper/GeoPointFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/GeoPointFieldMapperTests.java index d80d51320403e..27da7cbe0592a 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/GeoPointFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/GeoPointFieldMapperTests.java @@ -75,6 +75,23 @@ public void testGeoHashValue() throws Exception { assertThat(doc.rootDoc().getField("point"), notNullValue()); } + public void testWKT() throws Exception { + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties").startObject("point").field("type", "geo_point"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("type", new CompressedXContent(mapping)); + + ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "type", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject() + .field("point", "POINT (2 3)") + .endObject()), + XContentType.JSON)); + + assertThat(doc.rootDoc().getField("point"), notNullValue()); + } + public void testLatLonValuesStored() throws Exception { XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties").startObject("point").field("type", "geo_point"); diff --git a/server/src/test/java/org/elasticsearch/index/search/geo/GeoPointParsingTests.java b/server/src/test/java/org/elasticsearch/index/search/geo/GeoPointParsingTests.java index 9af651119e642..0d7f5f5fef68e 100644 --- a/server/src/test/java/org/elasticsearch/index/search/geo/GeoPointParsingTests.java +++ b/server/src/test/java/org/elasticsearch/index/search/geo/GeoPointParsingTests.java @@ -57,6 +57,22 @@ public void testGeoPointReset() throws IOException { assertPointsEqual(point.reset(0, 0), point2.reset(0, 0)); assertPointsEqual(point.resetFromString(Double.toString(lat) + ", " + Double.toHexString(lon)), point2.reset(lat, lon)); assertPointsEqual(point.reset(0, 0), point2.reset(0, 0)); + assertPointsEqual(point.resetFromString("POINT(" + lon + " " + lat + ")"), point2.reset(lat, lon)); + } + + public void testParseWktInvalid() { + GeoPoint point = new GeoPoint(0, 0); + Exception e = expectThrows( + ElasticsearchParseException.class, + () -> point.resetFromString("NOT A POINT(1 2)") + ); + assertEquals("Invalid WKT format", e.getMessage()); + + Exception e2 = expectThrows( + ElasticsearchParseException.class, + () -> point.resetFromString("MULTIPOINT(1 2, 3 4)") + ); + assertEquals("[geo_point] supports only POINT among WKT primitives, but found MULTIPOINT", e2.getMessage()); } public void testEqualsHashCodeContract() {