Skip to content

Commit

Permalink
SQL: Improve null handling in Geo Functions (#40708)
Browse files Browse the repository at this point in the history
ST_Distance function returns null now instead of throwing an error
when one of the arguments in null. It also brings the handling of
arrays when a single item is expected in line with the rest of 
data types and fixes the handling of geo_points when docvalues
are not available.

Relates to #29872
  • Loading branch information
imotov authored Apr 10, 2019
1 parent ee64cc9 commit d7647c8
Show file tree
Hide file tree
Showing 7 changed files with 226 additions and 70 deletions.
30 changes: 15 additions & 15 deletions x-pack/plugin/sql/qa/src/main/resources/geo/geosql-bulk.json
Original file line number Diff line number Diff line change
@@ -1,33 +1,33 @@
{"index":{"_id": "1"}}
{"region": "Americas", "city": "Mountain View", "location": {"lat":"37.386483", "lon":"-122.083843"}, "shape": "POINT (-122.083843 37.386483)", "region_point": "POINT(-105.2551 54.5260)"}
{"region": "Americas", "city": "Mountain View", "location": {"lat":"37.386483", "lon":"-122.083843"}, "location_no_dv": {"lat":"37.386483", "lon":"-122.083843"}, "shape": "POINT (-122.083843 37.386483)", "region_point": "POINT(-105.2551 54.5260)"}
{"index":{"_id": "2"}}
{"region": "Americas", "city": "Chicago", "location": [-87.637874, 41.888783], "shape": {"type" : "point", "coordinates" : [-87.637874, 41.888783]}, "region_point": "POINT(-105.2551 54.5260)"}
{"region": "Americas", "city": "Chicago", "location": [-87.637874, 41.888783], "location_no_dv": [-87.637874, 41.888783], "shape": {"type" : "point", "coordinates" : [-87.637874, 41.888783]}, "region_point": "POINT(-105.2551 54.5260)"}
{"index":{"_id": "3"}}
{"region": "Americas", "city": "New York", "location": "40.745171,-73.990027", "shape": "POINT (-73.990027 40.745171)", "region_point": "POINT(-105.2551 54.5260)"}
{"region": "Americas", "city": "New York", "location": "40.745171,-73.990027", "location_no_dv": "40.745171,-73.990027", "shape": "POINT (-73.990027 40.745171)", "region_point": "POINT(-105.2551 54.5260)"}
{"index":{"_id": "4"}}
{"region": "Americas", "city": "San Francisco", "location": "37.789541,-122.394228", "shape": "POINT (-122.394228 37.789541)", "region_point": "POINT(-105.2551 54.5260)"}
{"region": "Americas", "city": "San Francisco", "location": "37.789541,-122.394228", "location_no_dv": "37.789541,-122.394228", "shape": "POINT (-122.394228 37.789541)", "region_point": "POINT(-105.2551 54.5260)"}
{"index":{"_id": "5"}}
{"region": "Americas", "city": "Phoenix", "location": "33.376242,-111.973505", "shape": "POINT (-111.973505 33.376242)", "region_point": "POINT(-105.2551 54.5260)"}
{"region": "Americas", "city": "Phoenix", "location": "33.376242,-111.973505", "location_no_dv": "33.376242,-111.973505", "shape": "POINT (-111.973505 33.376242)", "region_point": "POINT(-105.2551 54.5260)"}
{"index":{"_id": "6"}}
{"region": "Europe", "city": "Amsterdam", "location": "52.347557,4.850312", "shape": "POINT (4.850312 52.347557)", "region_point": "POINT(15.2551 54.5260)"}
{"region": "Europe", "city": "Amsterdam", "location": "52.347557,4.850312", "location_no_dv": "52.347557,4.850312", "shape": "POINT (4.850312 52.347557)", "region_point": "POINT(15.2551 54.5260)"}
{"index":{"_id": "7"}}
{"region": "Europe", "city": "Berlin", "location": "52.486701,13.390889", "shape": "POINT (13.390889 52.486701)", "region_point": "POINT(15.2551 54.5260)"}
{"region": "Europe", "city": "Berlin", "location": "52.486701,13.390889", "location_no_dv": "52.486701,13.390889", "shape": "POINT (13.390889 52.486701)", "region_point": "POINT(15.2551 54.5260)"}
{"index":{"_id": "8"}}
{"region": "Europe", "city": "Munich", "location": "48.146321,11.537505", "shape": "POINT (11.537505 48.146321)", "region_point": "POINT(15.2551 54.5260)"}
{"region": "Europe", "city": "Munich", "location": "48.146321,11.537505", "location_no_dv": "48.146321,11.537505", "shape": "POINT (11.537505 48.146321)", "region_point": "POINT(15.2551 54.5260)"}
{"index":{"_id": "9"}}
{"region": "Europe", "city": "London", "location": "51.510871,-0.121672", "shape": "POINT (-0.121672 51.510871)", "region_point": "POINT(15.2551 54.5260)"}
{"region": "Europe", "city": "London", "location": "51.510871,-0.121672", "location_no_dv": "51.510871,-0.121672", "shape": "POINT (-0.121672 51.510871)", "region_point": "POINT(15.2551 54.5260)"}
{"index":{"_id": "10"}}
{"region": "Europe", "city": "Paris", "location": "48.845538,2.351773", "shape": "POINT (2.351773 48.845538)", "region_point": "POINT(15.2551 54.5260)"}
{"region": "Europe", "city": "Paris", "location": "48.845538,2.351773", "location_no_dv": "48.845538,2.351773", "shape": "POINT (2.351773 48.845538)", "region_point": "POINT(15.2551 54.5260)"}
{"index":{"_id": "11"}}
{"region": "Asia", "city": "Singapore", "location": "1.295868,103.855535", "shape": "POINT (103.855535 1.295868)", "region_point": "POINT(100.6197 34.0479)"}
{"region": "Asia", "city": "Singapore", "location": "1.295868,103.855535", "location_no_dv": "1.295868,103.855535", "shape": "POINT (103.855535 1.295868)", "region_point": "POINT(100.6197 34.0479)"}
{"index":{"_id": "12"}}
{"region": "Asia", "city": "Hong Kong", "location": "22.281397,114.183925", "shape": "POINT (114.183925 22.281397)", "region_point": "POINT(100.6197 34.0479)"}
{"region": "Asia", "city": "Hong Kong", "location": "22.281397,114.183925", "location_no_dv": "22.281397,114.183925", "shape": "POINT (114.183925 22.281397)", "region_point": "POINT(100.6197 34.0479)"}
{"index":{"_id": "13"}}
{"region": "Asia", "city": "Seoul", "location": "37.509132,127.060851", "shape": "POINT (127.060851 37.509132)", "region_point": "POINT(100.6197 34.0479)"}
{"region": "Asia", "city": "Seoul", "location": "37.509132,127.060851", "location_no_dv": "37.509132,127.060851", "shape": "POINT (127.060851 37.509132)", "region_point": "POINT(100.6197 34.0479)"}
{"index":{"_id": "14"}}
{"region": "Asia", "city": "Tokyo", "location": "35.669616,139.76402225", "shape": "POINT (139.76402225 35.669616)", "region_point": "POINT(100.6197 34.0479)"}
{"region": "Asia", "city": "Tokyo", "location": "35.669616,139.76402225", "location_no_dv": "35.669616,139.76402225", "shape": "POINT (139.76402225 35.669616)", "region_point": "POINT(100.6197 34.0479)"}
{"index":{"_id": "15"}}
{"region": "Asia", "city": "Sydney", "location": "-33.863385,151.208629", "shape": "POINT (151.208629 -33.863385)", "region_point": "POINT(100.6197 34.0479)"}
{"region": "Asia", "city": "Sydney", "location": "-33.863385,151.208629", "location_no_dv": "-33.863385,151.208629", "shape": "POINT (151.208629 -33.863385)", "region_point": "POINT(100.6197 34.0479)"}



49 changes: 31 additions & 18 deletions x-pack/plugin/sql/qa/src/main/resources/geo/geosql.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ DESCRIBE "geo";
column:s | type:s | mapping:s
city | VARCHAR | keyword
location | GEOMETRY | geo_point
location_no_dv | GEOMETRY | geo_point
region | VARCHAR | keyword
region_point | VARCHAR | keyword
shape | GEOMETRY | geo_shape
Expand All @@ -26,24 +27,24 @@ shape | GEOMETRY | geo_shape
// TODO: For now we just get geopoint formatted as is and we also need to convert it to STRING to work with CSV

selectAllPointsAsStrings
SELECT city, CAST(location AS STRING) location, CAST(shape AS STRING) shape, region FROM "geo" ORDER BY "city";

city:s | location:s | shape:s | region:s
Amsterdam |point (4.850311987102032 52.347556999884546) |point (4.850312 52.347557) |Europe
Berlin |point (13.390888944268227 52.48670099303126) |point (13.390889 52.486701) |Europe
Chicago |point (-87.63787407428026 41.888782968744636) |point (-87.637874 41.888783) |Americas
Hong Kong |point (114.18392493389547 22.28139698971063) |point (114.183925 22.281397) |Asia
London |point (-0.12167204171419144 51.51087098289281)|point (-0.121672 51.510871) |Europe
Mountain View |point (-122.08384302444756 37.38648299127817) |point (-122.083843 37.386483) |Americas
Munich |point (11.537504978477955 48.14632098656148) |point (11.537505 48.146321) |Europe
New York |point (-73.9900270756334 40.74517097789794) |point (-73.990027 40.745171) |Americas
Paris |point (2.3517729341983795 48.84553796611726) |point (2.351773 48.845538) |Europe
Phoenix |point (-111.97350500151515 33.37624196894467) |point (-111.973505 33.376242) |Americas
San Francisco |point (-122.39422800019383 37.789540970698) |point (-122.394228 37.789541) |Americas
Seoul |point (127.06085099838674 37.50913198571652) |point (127.060851 37.509132) |Asia
Singapore |point (103.8555349688977 1.2958679627627134) |point (103.855535 1.295868) |Asia
Sydney |point (151.20862897485495 -33.863385021686554)|point (151.208629 -33.863385) |Asia
Tokyo |point (139.76402222178876 35.66961596254259) |point (139.76402225 35.669616)|Asia
SELECT city, CAST(location AS STRING) location, CAST(location_no_dv AS STRING) location_no_dv, CAST(shape AS STRING) shape, region FROM "geo" ORDER BY "city";

city:s | location:s | location_no_dv:s | shape:s | region:s
Amsterdam |point (4.850311987102032 52.347556999884546) |point (4.850312 52.347557) |point (4.850312 52.347557) |Europe
Berlin |point (13.390888944268227 52.48670099303126) |point (13.390889 52.486701) |point (13.390889 52.486701) |Europe
Chicago |point (-87.63787407428026 41.888782968744636) |point (-87.637874 41.888783) |point (-87.637874 41.888783) |Americas
Hong Kong |point (114.18392493389547 22.28139698971063) |point (114.183925 22.281397) |point (114.183925 22.281397) |Asia
London |point (-0.12167204171419144 51.51087098289281)|point (-0.121672 51.510871) |point (-0.121672 51.510871) |Europe
Mountain View |point (-122.08384302444756 37.38648299127817) |point (-122.083843 37.386483) |point (-122.083843 37.386483) |Americas
Munich |point (11.537504978477955 48.14632098656148) |point (11.537505 48.146321) |point (11.537505 48.146321) |Europe
New York |point (-73.9900270756334 40.74517097789794) |point (-73.990027 40.745171) |point (-73.990027 40.745171) |Americas
Paris |point (2.3517729341983795 48.84553796611726) |point (2.351773 48.845538) |point (2.351773 48.845538) |Europe
Phoenix |point (-111.97350500151515 33.37624196894467) |point (-111.973505 33.376242) |point (-111.973505 33.376242) |Americas
San Francisco |point (-122.39422800019383 37.789540970698) |point (-122.394228 37.789541) |point (-122.394228 37.789541) |Americas
Seoul |point (127.06085099838674 37.50913198571652) |point (127.060851 37.509132) |point (127.060851 37.509132) |Asia
Singapore |point (103.8555349688977 1.2958679627627134) |point (103.855535 1.295868) |point (103.855535 1.295868) |Asia
Sydney |point (151.20862897485495 -33.863385021686554)|point (151.208629 -33.863385) |point (151.208629 -33.863385) |Asia
Tokyo |point (139.76402222178876 35.66961596254259) |point (139.76402225 35.669616)|point (139.76402225 35.669616)|Asia
;

// TODO: Both shape and location contain the same data for now, we should change it later to make things more interesting
Expand Down Expand Up @@ -201,3 +202,15 @@ SELECT COUNT(*) count, FIRST(region) region FROM geo GROUP BY FLOOR(ST_Distance(
3 |Asia
2 |Asia
;

selectWktToSqlOfNull
SELECT ST_ASWKT(ST_WktToSql(NULL)) shape;
shape:s
null
;

selectWktToSqlOfNull
SELECT ST_Distance(ST_WktToSql(NULL), ST_WktToSQL('POINT (-71 42)')) shape;
shape:d
null
;
4 changes: 4 additions & 0 deletions x-pack/plugin/sql/qa/src/main/resources/geo/geosql.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
"location": {
"type": "geo_point"
},
"location_no_dv": {
"type": "geo_point",
"doc_values": "false"
},
"shape": {
"type": "geo_shape"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,43 +127,34 @@ private Object unwrapMultiValue(Object values) {
if (values == null) {
return null;
}
if (dataType == DataType.GEO_POINT) {
Object value;
if (values instanceof List && ((List<?>) values).size() == 1) {
value = ((List<?>) values).get(0);
if (values instanceof List) {
List<?> list = (List<?>) values;
if (list.isEmpty()) {
return null;
} else {
value = values;
// let's make sure first that we are not dealing with an geo_point represented as an array
if (isGeoPointArray(list) == false) {
if (list.size() == 1 || arrayLeniency) {
return unwrapMultiValue(list.get(0));
} else {
throw new SqlIllegalArgumentException("Arrays (returned by [{}]) are not supported", fieldName);
}
}
}
}
if (dataType == DataType.GEO_POINT) {
try {
GeoPoint geoPoint = GeoUtils.parseGeoPoint(value, true);
GeoPoint geoPoint = GeoUtils.parseGeoPoint(values, true);
return new GeoShape(geoPoint.lon(), geoPoint.lat());
} catch (ElasticsearchParseException ex) {
throw new SqlIllegalArgumentException("Cannot parse geo_point value (returned by [{}])", fieldName);
throw new SqlIllegalArgumentException("Cannot parse geo_point value [{}] (returned by [{}])", values, fieldName);
}
}
if (dataType == DataType.GEO_SHAPE) {
Object value;
if (values instanceof List && ((List<?>) values).size() == 1) {
value = ((List<?>) values).get(0);
} else {
value = values;
}
try {
return new GeoShape(value);
return new GeoShape(values);
} catch (IOException ex) {
throw new SqlIllegalArgumentException("Cannot read geo_shape value (returned by [{}])", fieldName);
}
}
if (values instanceof List) {
List<?> list = (List<?>) values;
if (list.isEmpty()) {
return null;
} else {
if (arrayLeniency || list.size() == 1) {
return unwrapMultiValue(list.get(0));
} else {
throw new SqlIllegalArgumentException("Arrays (returned by [{}]) are not supported", fieldName);
}
throw new SqlIllegalArgumentException("Cannot read geo_shape value [{}] (returned by [{}])", values, fieldName);
}
}
if (values instanceof Map) {
Expand All @@ -180,6 +171,17 @@ private Object unwrapMultiValue(Object values) {
throw new SqlIllegalArgumentException("Type {} (returned by [{}]) is not supported", values.getClass().getSimpleName(), fieldName);
}

private boolean isGeoPointArray(List<?> list) {
if (dataType != DataType.GEO_POINT) {
return false;
}
// we expect the point in [lon lat] or [lon lat alt] formats
if (list.size() > 3 || list.size() < 1) {
return false;
}
return list.get(0) instanceof Number;
}

@SuppressWarnings({ "unchecked", "rawtypes" })
Object extractFromSource(Map<String, Object> map) {
Object value = null;
Expand All @@ -204,7 +206,9 @@ Object extractFromSource(Map<String, Object> map) {

if (node instanceof List) {
List listOfValues = (List) node;
if (listOfValues.size() == 1 || arrayLeniency) {
// we can only do this optimization until the last element of our pass since geo points are using arrays
// and we don't want to blindly ignore the second element of array if arrayLeniency is enabled
if ((i < path.length - 1) && (listOfValues.size() == 1 || arrayLeniency)) {
// this is a List with a size of 1 e.g.: {"a" : [{"b" : "value"}]} meaning the JSON is a list with one element
// or a list of values with one element e.g.: {"a": {"b" : ["value"]}}
// in case of being lenient about arrays, just extract the first value in the array
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,11 @@ protected Object doProcess(Object left, Object right) {
return process(left, right);
}

public static double process(Object source1, Object source2) {
public static Double process(Object source1, Object source2) {
if (source1 == null || source2 == null) {
return null;
}

if (source1 instanceof GeoShape == false) {
throw new SqlIllegalArgumentException("A geo_point or geo_shape with type point is required; received [{}]", source1);
}
Expand Down
Loading

0 comments on commit d7647c8

Please sign in to comment.