diff --git a/README.md b/README.md index 7d8558b6ac..3c7a12eda9 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,20 @@ Please refer to the [SQL Language Reference Manual](./docs/user/index.rst), [Pip Recently we have been actively improving our query engine primarily for better correctness and extensibility. The new enhanced query engine has been already supporting the new released Piped Processing Language query processing behind the scene. Meanwhile, the integration with SQL language is also under way. To try out the power of the new query engine with SQL, simply run the command to enable it by [plugin setting](https://github.com/opendistro-for-elasticsearch/sql/blob/develop/docs/user/admin/settings.rst#opendistro-sql-engine-new-enabled). In future release, this will be enabled by default and nothing required to do from your side. Please stay tuned for updates on our progress and its new exciting features. +Here is a documentation list with features only available in this improved SQL query engine. Please follow the instruction above to enable it before trying out example queries in these docs: + +* [Identifiers](./docs/user/general/identifiers.rst): support for identifier names with special characters +* [Data types](./docs/user/general/datatypes.rst): new data types such as date time and interval +* [Expressions](./docs/user/dql/expressions.rst): new expression system that can represent and evaluate complex expressions +* [SQL functions](./docs/user/dql/functions.rst): many more string and date functions added +* [Basic queries](./docs/user/dql/basics.rst) + * Ordering by Aggregate Functions section + * NULLS FIRST/LAST in section Specifying Order for Null +* [Aggregations](./docs/user/dql/aggregations.rst): aggregation over expression and more other features +* [Complex queries](./docs/user/dql/complex.rst) + * Improvement on Subqueries in FROM clause +* [Window functions](./docs/user/dql/window.rst): ranking window function support + ## Setup diff --git a/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/analysis/Analyzer.java b/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/analysis/Analyzer.java index d5ddb0ef98..9736ec99f0 100644 --- a/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/analysis/Analyzer.java @@ -15,6 +15,10 @@ package com.amazon.opendistroforelasticsearch.sql.analysis; +import static com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.NullOrder.NULL_FIRST; +import static com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.NullOrder.NULL_LAST; +import static com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.SortOrder.ASC; +import static com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.SortOrder.DESC; import static com.amazon.opendistroforelasticsearch.sql.data.type.ExprCoreType.STRUCT; import com.amazon.opendistroforelasticsearch.sql.analysis.symbol.Namespace; @@ -71,6 +75,7 @@ import com.google.common.collect.ImmutableSet; import java.util.ArrayList; import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; @@ -316,12 +321,9 @@ public LogicalPlan visitSort(Sort node, AnalysisContext context) { node.getSortList().stream() .map( sortField -> { - // the first options is {"asc": "true/false"} - Boolean asc = (Boolean) sortField.getFieldArgs().get(0).getValue().getValue(); Expression expression = optimizer.optimize( expressionAnalyzer.analyze(sortField.getField(), context), context); - return ImmutablePair.of( - asc ? SortOption.DEFAULT_ASC : SortOption.DEFAULT_DESC, expression); + return ImmutablePair.of(analyzeSortOption(sortField.getFieldArgs()), expression); }) .collect(Collectors.toList()); @@ -379,4 +381,20 @@ public LogicalPlan visitValues(Values node, AnalysisContext context) { return new LogicalValues(valueExprs); } + /** + * The first argument is always "asc", others are optional. + * Given nullFirst argument, use its value. Otherwise just use DEFAULT_ASC/DESC. + */ + private SortOption analyzeSortOption(List fieldArgs) { + Boolean asc = (Boolean) fieldArgs.get(0).getValue().getValue(); + Optional nullFirst = fieldArgs.stream() + .filter(option -> "nullFirst".equals(option.getArgName())).findFirst(); + + if (nullFirst.isPresent()) { + Boolean isNullFirst = (Boolean) nullFirst.get().getValue().getValue(); + return new SortOption((asc ? ASC : DESC), (isNullFirst ? NULL_FIRST : NULL_LAST)); + } + return asc ? SortOption.DEFAULT_ASC : SortOption.DEFAULT_DESC; + } + } diff --git a/core/src/test/java/com/amazon/opendistroforelasticsearch/sql/analysis/AnalyzerTest.java b/core/src/test/java/com/amazon/opendistroforelasticsearch/sql/analysis/AnalyzerTest.java index a7115c3d71..cef8e0c4d1 100644 --- a/core/src/test/java/com/amazon/opendistroforelasticsearch/sql/analysis/AnalyzerTest.java +++ b/core/src/test/java/com/amazon/opendistroforelasticsearch/sql/analysis/AnalyzerTest.java @@ -28,7 +28,10 @@ import static com.amazon.opendistroforelasticsearch.sql.ast.dsl.AstDSL.relation; import static com.amazon.opendistroforelasticsearch.sql.ast.dsl.AstDSL.unresolvedArg; import static com.amazon.opendistroforelasticsearch.sql.ast.dsl.AstDSL.unresolvedArgList; +import static com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.NullOrder; +import static com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.SortOption; import static com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.SortOption.DEFAULT_ASC; +import static com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.SortOrder; import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.integerValue; import static com.amazon.opendistroforelasticsearch.sql.data.type.ExprCoreType.DOUBLE; import static com.amazon.opendistroforelasticsearch.sql.data.type.ExprCoreType.INTEGER; @@ -39,13 +42,12 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import com.amazon.opendistroforelasticsearch.sql.ast.dsl.AstDSL; +import com.amazon.opendistroforelasticsearch.sql.ast.expression.Argument; import com.amazon.opendistroforelasticsearch.sql.ast.tree.RareTopN.CommandType; -import com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort; import com.amazon.opendistroforelasticsearch.sql.exception.SemanticCheckException; import com.amazon.opendistroforelasticsearch.sql.expression.DSL; import com.amazon.opendistroforelasticsearch.sql.expression.config.ExpressionConfig; import com.amazon.opendistroforelasticsearch.sql.expression.window.WindowDefinition; -import com.amazon.opendistroforelasticsearch.sql.planner.logical.LogicalPlan; import com.amazon.opendistroforelasticsearch.sql.planner.logical.LogicalPlanDSL; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -298,7 +300,7 @@ public void sort_with_aggregator() { ImmutableList.of(DSL.named("string_value", DSL.ref("string_value", STRING)))), 0, // Aggregator in Sort AST node is replaced with reference by expression optimizer - Pair.of(Sort.SortOption.DEFAULT_ASC, DSL.ref("avg(integer_value)", DOUBLE))), + Pair.of(SortOption.DEFAULT_ASC, DSL.ref("avg(integer_value)", DOUBLE))), DSL.named("string_value", DSL.ref("string_value", STRING))), AstDSL.project( AstDSL.sort( @@ -319,6 +321,49 @@ public void sort_with_aggregator() { AstDSL.alias("string_value", qualifiedName("string_value")))); } + @SuppressWarnings("unchecked") + @Test + public void sort_with_options() { + ImmutableMap argOptions = + ImmutableMap.builder() + .put(new Argument[]{argument("asc", booleanLiteral(true))}, + new SortOption(SortOrder.ASC, NullOrder.NULL_FIRST)) + .put(new Argument[]{argument("asc", booleanLiteral(false))}, + new SortOption(SortOrder.DESC, NullOrder.NULL_LAST)) + .put(new Argument[]{ + argument("asc", booleanLiteral(true)), + argument("nullFirst", booleanLiteral(true))}, + new SortOption(SortOrder.ASC, NullOrder.NULL_FIRST)) + .put(new Argument[]{ + argument("asc", booleanLiteral(true)), + argument("nullFirst", booleanLiteral(false))}, + new SortOption(SortOrder.ASC, NullOrder.NULL_LAST)) + .put(new Argument[]{ + argument("asc", booleanLiteral(false)), + argument("nullFirst", booleanLiteral(true))}, + new SortOption(SortOrder.DESC, NullOrder.NULL_FIRST)) + .put(new Argument[]{ + argument("asc", booleanLiteral(false)), + argument("nullFirst", booleanLiteral(false))}, + new SortOption(SortOrder.DESC, NullOrder.NULL_LAST)) + .build(); + + argOptions.forEach((args, expectOption) -> + assertAnalyzeEqual( + LogicalPlanDSL.project( + LogicalPlanDSL.sort( + LogicalPlanDSL.relation("test"), + 0, + Pair.of(expectOption, DSL.ref("integer_value", INTEGER))), + DSL.named("string_value", DSL.ref("string_value", STRING))), + AstDSL.project( + AstDSL.sort( + AstDSL.relation("test"), + ImmutableList.of(argument("count", intLiteral(0))), + field(qualifiedName("integer_value"), args)), + AstDSL.alias("string_value", qualifiedName("string_value"))))); + } + @SuppressWarnings("unchecked") @Test public void window_function() { diff --git a/docs/user/dql/basics.rst b/docs/user/dql/basics.rst index 443cce0284..d80b2981f0 100644 --- a/docs/user/dql/basics.rst +++ b/docs/user/dql/basics.rst @@ -25,7 +25,7 @@ The syntax of ``SELECT`` statement is as follows:: [WHERE predicates] [GROUP BY expression [, ...] [HAVING predicates]] - [ORDER BY expression [IS [NOT] NULL] [ASC | DESC] [, ...]] + [ORDER BY expression [ASC | DESC] [NULLS {FIRST | LAST}] [, ...]] [LIMIT [offset, ] size] Although multiple query statements to execute in batch is not supported, ending with semicolon ``;`` is still allowed. For example, you can run ``SELECT * FROM accounts;`` without issue. This is useful to support queries generated by other tool, such as Microsoft Excel or BI tool. @@ -929,6 +929,42 @@ Result set: | Quility| +--------+ +Note that the example above is essentially sorting on a predicate expression. In this case, nulls are put first because it's evaluated to false (0), though all the rest are evaluated to true and still in random order. If you want to specify order for both nulls and non-nulls, ``NULLS FIRST`` or ``NULLS LAST`` in SQL standard can help. Basically, it allows you to specify an independent order for nulls along with ``ASC`` or ``DESC`` keyword:: + + od> SELECT employer FROM accounts ORDER BY employer ASC NULLS LAST; + fetched rows / total rows = 4/4 + +------------+ + | employer | + |------------| + | Netagy | + | Pyrami | + | Quility | + | null | + +------------+ + +The sorting rule can be summarized as follows: + +- Without ``NULLS`` clause + + - ``ASC``: sort non-nulls in ascending order and put nulls first + - ``DESC``: sort non-nulls in descending order and put nulls last + +- With ``NULLS`` clause: just use the nulls order given + +Here is another example for sort in descending order without ``NULLS`` clause:: + + od> SELECT employer FROM accounts ORDER BY employer DESC; + fetched rows / total rows = 4/4 + +------------+ + | employer | + |------------| + | Quility | + | Pyrami | + | Netagy | + | null | + +------------+ + + Example 3: Ordering by Aggregate Functions ------------------------------------------ diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/runner/connection/ESConnection.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/runner/connection/ESConnection.java index f6ca6c52cb..0dda024153 100644 --- a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/runner/connection/ESConnection.java +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/runner/connection/ESConnection.java @@ -116,7 +116,9 @@ private String buildBulkBody(String[] columnNames, List batch) { for (Object[] fieldValues : batch) { JSONObject json = new JSONObject(); for (int i = 0; i < columnNames.length; i++) { - json.put(columnNames[i], fieldValues[i]); + if (fieldValues[i] != null) { + json.put(columnNames[i], fieldValues[i]); + } } body.append("{\"index\":{}}\n"). diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/runner/connection/JDBCConnection.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/runner/connection/JDBCConnection.java index 299a0631fd..36d5860ddf 100644 --- a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/runner/connection/JDBCConnection.java +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/runner/connection/JDBCConnection.java @@ -158,12 +158,21 @@ private String parseColumnNameAndTypesInSchemaJson(String schema) { private String getValueList(Object[] fieldValues) { return Arrays.stream(fieldValues). - map(String::valueOf). - map(val -> val.replace(SINGLE_QUOTE, DOUBLE_QUOTE)). - map(val -> SINGLE_QUOTE + val + SINGLE_QUOTE). + map(this::convertValueObjectToString). collect(joining(",")); } + private String convertValueObjectToString(Object value) { + if (value == null) { + return "NULL"; + } + + String str = String.valueOf(value); + str = str.replace(SINGLE_QUOTE, DOUBLE_QUOTE); + str = SINGLE_QUOTE + str + SINGLE_QUOTE; + return str; + } + private void populateMetaData(ResultSet resultSet, DBResult result) throws SQLException { ResultSetMetaData metaData = resultSet.getMetaData(); for (int i = 1; i <= metaData.getColumnCount(); i++) { @@ -181,7 +190,8 @@ private void populateData(ResultSet resultSet, DBResult result) throws SQLExcept while (resultSet.next()) { Row row = new Row(); for (int i = 1; i <= result.columnSize(); i++) { - row.add(resultSet.getObject(i)); + Object value = resultSet.getObject(i); + row.add(resultSet.wasNull() ? null : value); } result.addRow(row); } diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/tests/ESConnectionTest.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/tests/ESConnectionTest.java index 16a24375ff..8b17dd6793 100644 --- a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/tests/ESConnectionTest.java +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/tests/ESConnectionTest.java @@ -87,6 +87,23 @@ public void testInsertData() throws IOException { ); } + @Test + public void testInsertNullData() throws IOException { + conn.insert("test", new String[] {"name", "age"}, + Arrays.asList(new Object[] {null, 30}, new Object[] {"Hank", null})); + + Request actual = captureActualArg(); + assertEquals("POST", actual.getMethod()); + assertEquals("/test/_bulk?refresh=true", actual.getEndpoint()); + assertEquals( + "{\"index\":{}}\n" + + "{\"age\":30}\n" + + "{\"index\":{}}\n" + + "{\"name\":\"Hank\"}\n", + getBody(actual) + ); + } + @Test public void testDropTable() throws IOException { conn.drop("test"); diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/tests/JDBCConnectionTest.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/tests/JDBCConnectionTest.java index 057ebdb795..bdc36f5663 100644 --- a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/tests/JDBCConnectionTest.java +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/tests/JDBCConnectionTest.java @@ -108,6 +108,27 @@ public void testInsertData() throws SQLException { ); } + @Test + public void testInsertNullData() throws SQLException { + conn.insert("test", new String[] {"name", "age"}, + Arrays.asList( + new Object[] {"John", null}, + new Object[] {null, 25}, + new Object[] {"Hank", 30})); + + ArgumentCaptor argCap = ArgumentCaptor.forClass(String.class); + verify(statement, times(3)).addBatch(argCap.capture()); + List actual = argCap.getAllValues(); + + assertEquals( + Arrays.asList( + "INSERT INTO test(name,age) VALUES ('John',NULL)", + "INSERT INTO test(name,age) VALUES (NULL,'25')", + "INSERT INTO test(name,age) VALUES ('Hank','30')" + ), actual + ); + } + @Test public void testSelectQuery() throws SQLException { ResultSetMetaData metaData = mockMetaData(ImmutableMap.of("name", "VARCHAR", "age", "INT")); diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/tests/TestDataSetTest.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/tests/TestDataSetTest.java index 6077e18f47..2d08741ab2 100644 --- a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/tests/TestDataSetTest.java +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/tests/TestDataSetTest.java @@ -108,4 +108,33 @@ public void testDataSetWithEscapedComma() { ); } + @Test + public void testDataSetWithNullData() { + String mappings = + "{\n" + + " \"mappings\": {\n" + + " \"properties\": {\n" + + " \"field1\": {\n" + + " \"type\": \"text\"\n" + + " },\n" + + " \"field2\": {\n" + + " \"type\": \"integer\"\n" + + " }\n" + + " }\n" + + " }\n" + + "}"; + + TestDataSet dataSet = new TestDataSet("test", mappings, + "field1,field2\n,123\nworld,\n,"); + assertThat( + dataSet.getDataRows(), + contains( + new Object[] {"field1", "field2"}, + new Object[] {null, 123}, + new Object[] {"world", null}, + new Object[] {null, null} + ) + ); + } + } diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/testset/TestDataSet.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/testset/TestDataSet.java index 8d3cfc1d6a..f7ad418e7b 100644 --- a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/testset/TestDataSet.java +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/correctness/testset/TestDataSet.java @@ -114,6 +114,10 @@ private Object[] convertStringArrayToObjectArray(JSONObject types, String[] colu } private Object convertStringToObject(String type, String str) { + if (str.isEmpty()) { + return null; + } + switch (type.toLowerCase()) { case "text": case "keyword": diff --git a/integ-test/src/test/resources/correctness/bugfixes/277.txt b/integ-test/src/test/resources/correctness/bugfixes/277.txt index bf793e2af7..368a6c2d89 100644 --- a/integ-test/src/test/resources/correctness/bugfixes/277.txt +++ b/integ-test/src/test/resources/correctness/bugfixes/277.txt @@ -2,7 +2,6 @@ SELECT COUNT(FlightNum) FROM kibana_sample_data_flights GROUP BY FlightDelay ORD SELECT COUNT(FlightNum) AS cnt FROM kibana_sample_data_flights GROUP BY FlightDelay ORDER BY cnt SELECT COUNT(FlightNum) FROM kibana_sample_data_flights GROUP BY FlightDelay ORDER BY 1 SELECT OriginWeather, AVG(FlightTimeMin) FROM kibana_sample_data_flights GROUP BY OriginWeather ORDER BY AVG(FlightTimeMin) -SELECT OriginWeather, AVG(FlightTimeMin) FROM kibana_sample_data_flights GROUP BY OriginWeather ORDER BY SUM(FlightDelayMin) SELECT OriginWeather, AVG(FlightTimeMin), SUM(FlightDelayMin) FROM kibana_sample_data_flights GROUP BY OriginWeather ORDER BY AVG(FlightTimeMin), SUM(FlightDelayMin) SELECT OriginWeather, AVG(FlightTimeMin), SUM(FlightDelayMin) AS s FROM kibana_sample_data_flights GROUP BY OriginWeather ORDER BY AVG(FlightTimeMin), s SELECT OriginWeather, AVG(FlightTimeMin) AS a, SUM(FlightDelayMin) FROM kibana_sample_data_flights GROUP BY OriginWeather ORDER BY a, SUM(FlightDelayMin) diff --git a/integ-test/src/test/resources/correctness/kibana_sample_data_flights.csv b/integ-test/src/test/resources/correctness/kibana_sample_data_flights.csv index 94efedcafb..ccd28457de 100644 --- a/integ-test/src/test/resources/correctness/kibana_sample_data_flights.csv +++ b/integ-test/src/test/resources/correctness/kibana_sample_data_flights.csv @@ -19,3 +19,4 @@ HVWAL6J,Comodoro Arturo Merino Benitez International Airport,false,7292.72928960 7ORM12S,Leonardo da Vinci___Fiumicino Airport,false,160.39074208529965,23.46580713004768,Sunny,0,118.37483602607261,Kibana Airlines,0,IT-62,No Delay,PI05,Pisa International Airport,0.39109678550079463,false,258.1238784305245,Rome,Sunny,IT,IT,IT-52,RM11,Pisa,2019-12-23 03:54:12 2P36OEP,New Chitose Airport,true,5340.290617241973,941.1970552595557,Cloudy,0,705.7149863531135,Kibana Airlines,225,SE-BD,Late Aircraft Delay,VIE,Vienna International Airport,15.686617587659262,false,8594.364663114668,Chitose / Tomakomai,Rain,JP,AT,AT-9,CTS,Vienna,2019-12-23 09:41:52 HLNZHCX,Verona Villafranca Airport,false,0,0,Sunny,0,172.3790782673846,ES-Air,0,IT-34,No Delay,VR10,Verona Villafranca Airport,0,false,0,Verona,Sunny,IT,IT,IT-34,VR10,Verona,2019-12-23 19:34:51 +HLNNULL,Verona Villafranca Airport,,,0,,0,172.3790782673846,ES-Air,0,IT-34,No Delay,VR10,Verona Villafranca Airport,0,false,0,Verona,Sunny,IT,IT,IT-34,VR10,Verona, diff --git a/integ-test/src/test/resources/correctness/queries/orderby.txt b/integ-test/src/test/resources/correctness/queries/orderby.txt index bba7ea4a40..c4e3262e91 100644 --- a/integ-test/src/test/resources/correctness/queries/orderby.txt +++ b/integ-test/src/test/resources/correctness/queries/orderby.txt @@ -11,3 +11,15 @@ SELECT FlightDelay, OriginWeather FROM kibana_sample_data_flights GROUP BY Fligh SELECT FlightDelay, OriginWeather FROM kibana_sample_data_flights GROUP BY FlightDelay, OriginWeather ORDER BY FlightDelay DESC, OriginWeather SELECT FlightDelay, OriginWeather FROM kibana_sample_data_flights GROUP BY FlightDelay, OriginWeather ORDER BY FlightDelay, OriginWeather DESC SELECT FlightDelay, OriginWeather FROM kibana_sample_data_flights GROUP BY FlightDelay, OriginWeather ORDER BY FlightDelay DESC, OriginWeather DESC +SELECT FlightNum, DistanceMiles FROM kibana_sample_data_flights ORDER BY DistanceMiles NULLS FIRST +SELECT FlightNum, DistanceMiles FROM kibana_sample_data_flights ORDER BY DistanceMiles NULLS LAST +SELECT FlightNum, DistanceMiles FROM kibana_sample_data_flights ORDER BY DistanceMiles ASC NULLS LAST +SELECT FlightNum, DistanceMiles FROM kibana_sample_data_flights ORDER BY DistanceMiles DESC NULLS FIRST +SELECT FlightNum, DistanceMiles, FlightDelay FROM kibana_sample_data_flights ORDER BY DistanceMiles, FlightDelay NULLS FIRST +SELECT FlightNum, DistanceMiles, FlightDelay FROM kibana_sample_data_flights ORDER BY DistanceMiles, FlightDelay NULLS LAST +SELECT FlightNum, DistanceMiles, FlightDelay FROM kibana_sample_data_flights ORDER BY DistanceMiles, FlightDelay ASC NULLS LAST +SELECT FlightNum, DistanceMiles, FlightDelay FROM kibana_sample_data_flights ORDER BY DistanceMiles, FlightDelay DESC NULLS FIRST +SELECT FlightNum, DistanceMiles, FlightDelay FROM kibana_sample_data_flights ORDER BY DistanceMiles NULLS LAST, FlightDelay NULLS FIRST +SELECT FlightNum, DistanceMiles, FlightDelay FROM kibana_sample_data_flights ORDER BY DistanceMiles NULLS FIRST, FlightDelay NULLS LAST +SELECT FlightNum, DistanceMiles, FlightDelay FROM kibana_sample_data_flights ORDER BY DistanceMiles DESC NULLS FIRST, FlightDelay ASC NULLS LAST +SELECT FlightNum, DistanceMiles, FlightDelay FROM kibana_sample_data_flights ORDER BY DistanceMiles ASC NULLS LAST, FlightDelay DESC NULLS FIRST \ No newline at end of file diff --git a/sql/src/main/antlr/OpenDistroSQLLexer.g4 b/sql/src/main/antlr/OpenDistroSQLLexer.g4 index 3aace09834..90514571b4 100644 --- a/sql/src/main/antlr/OpenDistroSQLLexer.g4 +++ b/sql/src/main/antlr/OpenDistroSQLLexer.g4 @@ -61,6 +61,7 @@ ELSE: 'ELSE'; EXISTS: 'EXISTS'; FALSE: 'FALSE'; FLOAT: 'FLOAT'; +FIRST: 'FIRST'; FROM: 'FROM'; GROUP: 'GROUP'; HAVING: 'HAVING'; @@ -69,6 +70,7 @@ INNER: 'INNER'; INT: 'INT'; IS: 'IS'; JOIN: 'JOIN'; +LAST: 'LAST'; LEFT: 'LEFT'; LIKE: 'LIKE'; LIMIT: 'LIMIT'; @@ -78,6 +80,7 @@ NATURAL: 'NATURAL'; MISSING_LITERAL: 'MISSING'; NOT: 'NOT'; NULL_LITERAL: 'NULL'; +NULLS: 'NULLS'; ON: 'ON'; OR: 'OR'; ORDER: 'ORDER'; diff --git a/sql/src/main/antlr/OpenDistroSQLParser.g4 b/sql/src/main/antlr/OpenDistroSQLParser.g4 index 34114b4839..5c5055adf6 100644 --- a/sql/src/main/antlr/OpenDistroSQLParser.g4 +++ b/sql/src/main/antlr/OpenDistroSQLParser.g4 @@ -113,10 +113,9 @@ orderByClause ; orderByElement - : expression order=(ASC | DESC)? + : expression order=(ASC | DESC)? (NULLS (FIRST | LAST))? ; - // Window Function's Details windowFunction : function=rankingWindowFunction overClause diff --git a/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstSortBuilder.java b/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstSortBuilder.java index 880c8a9bb8..ddff91eac3 100644 --- a/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstSortBuilder.java +++ b/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstSortBuilder.java @@ -16,8 +16,10 @@ package com.amazon.opendistroforelasticsearch.sql.sql.parser; -import static com.amazon.opendistroforelasticsearch.sql.ast.expression.DataType.BOOLEAN; +import static com.amazon.opendistroforelasticsearch.sql.ast.dsl.AstDSL.booleanLiteral; import static com.amazon.opendistroforelasticsearch.sql.ast.expression.DataType.INTEGER; +import static com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.NullOrder.NULL_FIRST; +import static com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.SortOrder.DESC; import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.OrderByClauseContext; import com.amazon.opendistroforelasticsearch.sql.ast.expression.Argument; @@ -25,6 +27,9 @@ import com.amazon.opendistroforelasticsearch.sql.ast.expression.Literal; import com.amazon.opendistroforelasticsearch.sql.ast.expression.UnresolvedExpression; import com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort; +import com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.NullOrder; +import com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.SortOption; +import com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.SortOrder; import com.amazon.opendistroforelasticsearch.sql.ast.tree.UnresolvedPlan; import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParserBaseVisitor; import com.amazon.opendistroforelasticsearch.sql.sql.parser.context.QuerySpecification; @@ -54,21 +59,30 @@ public UnresolvedPlan visitOrderByClause(OrderByClauseContext ctx) { private List createSortFields() { List fields = new ArrayList<>(); List items = querySpec.getOrderByItems(); - List options = querySpec.getOrderByOptions(); + List options = querySpec.getOrderByOptions(); for (int i = 0; i < items.size(); i++) { fields.add( new Field( querySpec.replaceIfAliasOrOrdinal(items.get(i)), - createSortArgument(options.get(i)))); + createSortArguments(options.get(i)))); } return fields; } - private List createSortArgument(String option) { - return ImmutableList.of( - new Argument( - "asc", - new Literal("ASC".equalsIgnoreCase(option), BOOLEAN))); + /** + * Argument "asc" is required. + * Argument "nullFirst" is optional and determined by Analyzer later if absent. + */ + private List createSortArguments(SortOption option) { + SortOrder sortOrder = option.getSortOrder(); + NullOrder nullOrder = option.getNullOrder(); + ImmutableList.Builder args = ImmutableList.builder(); + args.add(new Argument("asc", booleanLiteral(sortOrder != DESC))); // handle both null and ASC + + if (nullOrder != null) { + args.add(new Argument("nullFirst", booleanLiteral(nullOrder == NULL_FIRST))); + } + return args.build(); } } diff --git a/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/context/QuerySpecification.java b/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/context/QuerySpecification.java index e8bed01604..c715f061b1 100644 --- a/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/context/QuerySpecification.java +++ b/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/context/QuerySpecification.java @@ -27,6 +27,9 @@ import com.amazon.opendistroforelasticsearch.sql.ast.expression.Literal; import com.amazon.opendistroforelasticsearch.sql.ast.expression.QualifiedName; import com.amazon.opendistroforelasticsearch.sql.ast.expression.UnresolvedExpression; +import com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.NullOrder; +import com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.SortOption; +import com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.SortOrder; import com.amazon.opendistroforelasticsearch.sql.common.utils.StringUtils; import com.amazon.opendistroforelasticsearch.sql.exception.SemanticCheckException; import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.AggregateFunctionCallContext; @@ -42,7 +45,9 @@ import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.ToString; +import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.tree.ParseTree; +import org.antlr.v4.runtime.tree.TerminalNode; /** * Query specification domain that collects basic info for a simple query. @@ -90,7 +95,7 @@ public class QuerySpecification { * Items in ORDER BY clause that may be different forms as above and its options. */ private final List orderByItems = new ArrayList<>(); - private final List orderByOptions = new ArrayList<>(); + private final List orderByOptions = new ArrayList<>(); /** * Collect all query information in the parse tree excluding info in sub-query). @@ -197,7 +202,10 @@ public Void visitGroupByElement(GroupByElementContext ctx) { @Override public Void visitOrderByElement(OrderByElementContext ctx) { orderByItems.add(visitAstExpression(ctx.expression())); - orderByOptions.add((ctx.order == null) ? "ASC" : ctx.order.getText()); + orderByOptions.add( + new SortOption( + visitSortOrder(ctx.order), + visitNullOrderClause(ctx.FIRST(), ctx.LAST()))); return super.visitOrderByElement(ctx); } @@ -207,6 +215,23 @@ public Void visitAggregateFunctionCall(AggregateFunctionCallContext ctx) { return super.visitAggregateFunctionCall(ctx); } + private SortOrder visitSortOrder(Token ctx) { + if (ctx == null) { + return null; + } + return SortOrder.valueOf(ctx.getText()); + } + + private NullOrder visitNullOrderClause(TerminalNode first, TerminalNode last) { + if (first != null) { + return NullOrder.NULL_FIRST; + } else if (last != null) { + return NullOrder.NULL_LAST; + } else { + return null; + } + } + private UnresolvedExpression visitAstExpression(ParseTree tree) { return expressionBuilder.visit(tree); } diff --git a/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/antlr/SQLSyntaxParserTest.java b/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/antlr/SQLSyntaxParserTest.java index 1abde91751..0227fc7b4e 100644 --- a/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/antlr/SQLSyntaxParserTest.java +++ b/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/antlr/SQLSyntaxParserTest.java @@ -134,4 +134,14 @@ public void canNotParseAggregateFunctionWithWrongArgument() { assertThrows(SyntaxCheckException.class, () -> parser.parse("SELECT AVG(a,b,c) FROM test")); } + @Test + public void canParseOrderByClause() { + assertNotNull(parser.parse("SELECT name, age FROM test ORDER BY name, age")); + assertNotNull(parser.parse("SELECT name, age FROM test ORDER BY name ASC, age DESC")); + assertNotNull(parser.parse( + "SELECT name, age FROM test ORDER BY name NULLS LAST, age NULLS FIRST")); + assertNotNull(parser.parse( + "SELECT name, age FROM test ORDER BY name ASC NULLS FIRST, age DESC NULLS LAST")); + } + } \ No newline at end of file diff --git a/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstBuilderTest.java b/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstBuilderTest.java index 1ee7421e78..4165dc1dcb 100644 --- a/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstBuilderTest.java +++ b/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstBuilderTest.java @@ -350,8 +350,9 @@ public void can_build_order_by_field_name() { sort( relation("test"), ImmutableList.of(argument("count", intLiteral(0))), - field("name", argument("asc", booleanLiteral(true)))), - alias("name", qualifiedName("name"))), + field("name", + argument("asc", booleanLiteral(true)))), + alias("name", qualifiedName("name"))), buildAST("SELECT name FROM test ORDER BY name")); } @@ -365,7 +366,7 @@ public void can_build_order_by_function() { field( function("ABS", qualifiedName("name")), argument("asc", booleanLiteral(true)))), - alias("name", qualifiedName("name"))), + alias("name", qualifiedName("name"))), buildAST("SELECT name FROM test ORDER BY ABS(name)")); } @@ -377,7 +378,7 @@ public void can_build_order_by_alias() { relation("test"), ImmutableList.of(argument("count", intLiteral(0))), field("name", argument("asc", booleanLiteral(true)))), - alias("name", qualifiedName("name"), "n")), + alias("name", qualifiedName("name"), "n")), buildAST("SELECT name AS n FROM test ORDER BY n ASC")); } @@ -389,7 +390,7 @@ public void can_build_order_by_ordinal() { relation("test"), ImmutableList.of(argument("count", intLiteral(0))), field("name", argument("asc", booleanLiteral(false)))), - alias("name", qualifiedName("name"))), + alias("name", qualifiedName("name"))), buildAST("SELECT name FROM test ORDER BY 1 DESC")); } @@ -401,12 +402,26 @@ public void can_build_order_by_multiple_field_names() { relation("test"), ImmutableList.of(argument("count", intLiteral(0))), field("name", argument("asc", booleanLiteral(true))), - field("age", argument("asc", booleanLiteral(false)))), - alias("name", qualifiedName("name")), + field("age", argument("asc", booleanLiteral(false)))), + alias("name", qualifiedName("name")), alias("age", qualifiedName("age"))), buildAST("SELECT name, age FROM test ORDER BY name, age DESC")); } + @Test + public void can_build_order_by_null_option() { + assertEquals( + project( + sort( + relation("test"), + ImmutableList.of(argument("count", intLiteral(0))), + field("name", + argument("asc", booleanLiteral(true)), + argument("nullFirst", booleanLiteral(false)))), + alias("name", qualifiedName("name"))), + buildAST("SELECT name FROM test ORDER BY name NULLS LAST")); + } + @Test public void can_build_from_subquery() { assertEquals( @@ -415,18 +430,18 @@ public void can_build_from_subquery() { relationSubquery( project( relation("test"), - alias("firstname", qualifiedName("firstname"), "first"), - alias("lastname", qualifiedName("lastname"), "last") + alias("firstname", qualifiedName("firstname"), "firstName"), + alias("lastname", qualifiedName("lastname"), "lastName") ), "a" ), function(">", qualifiedName("age"), intLiteral(20)) ), - alias("a.first", qualifiedName("a", "first")), - alias("last", qualifiedName("last"))), + alias("a.firstName", qualifiedName("a", "firstName")), + alias("lastName", qualifiedName("lastName"))), buildAST( - "SELECT a.first, last FROM (" - + "SELECT firstname AS first, lastname AS last FROM test" + "SELECT a.firstName, lastName FROM (" + + "SELECT firstname AS firstName, lastname AS lastName FROM test" + ") AS a where age > 20" ) ); diff --git a/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstSortBuilderTest.java b/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstSortBuilderTest.java index f055e62c9b..40cf6d7b46 100644 --- a/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstSortBuilderTest.java +++ b/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstSortBuilderTest.java @@ -21,17 +21,25 @@ import static com.amazon.opendistroforelasticsearch.sql.ast.dsl.AstDSL.field; import static com.amazon.opendistroforelasticsearch.sql.ast.dsl.AstDSL.intLiteral; import static com.amazon.opendistroforelasticsearch.sql.ast.dsl.AstDSL.qualifiedName; +import static com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.NullOrder.NULL_FIRST; +import static com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.NullOrder.NULL_LAST; +import static com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.SortOrder.ASC; +import static com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.SortOrder.DESC; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.AdditionalAnswers.returnsFirstArg; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.when; +import com.amazon.opendistroforelasticsearch.sql.ast.expression.Argument; import com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort; +import com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.SortOption; import com.amazon.opendistroforelasticsearch.sql.ast.tree.UnresolvedPlan; import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.OrderByClauseContext; import com.amazon.opendistroforelasticsearch.sql.sql.parser.context.QuerySpecification; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import java.util.List; import org.junit.jupiter.api.DisplayNameGeneration; import org.junit.jupiter.api.DisplayNameGenerator; import org.junit.jupiter.api.Test; @@ -56,15 +64,36 @@ class AstSortBuilderTest { void can_build_sort_node() { doAnswer(returnsFirstArg()).when(querySpec).replaceIfAliasOrOrdinal(any()); when(querySpec.getOrderByItems()).thenReturn(ImmutableList.of(qualifiedName("name"))); - when(querySpec.getOrderByOptions()).thenReturn(ImmutableList.of("ASC")); - AstSortBuilder sortBuilder = new AstSortBuilder(querySpec); - assertEquals( - new Sort( - child, // has to mock and attach child otherwise Guava ImmutableList NPE in getChild() - ImmutableList.of(argument("count", intLiteral(0))), - ImmutableList.of(field("name", argument("asc", booleanLiteral(true))))), - sortBuilder.visitOrderByClause(orderByClause).attach(child)); + ImmutableMap> expects = + ImmutableMap.>builder() + .put(new SortOption(null, null), + ImmutableList.of(argument("asc", booleanLiteral(true)))) + .put(new SortOption(ASC, null), + ImmutableList.of(argument("asc", booleanLiteral(true)))) + .put(new SortOption(DESC, null), + ImmutableList.of(argument("asc", booleanLiteral(false)))) + .put(new SortOption(null, NULL_LAST), + ImmutableList.of( + argument("asc", booleanLiteral(true)), + argument("nullFirst", booleanLiteral(false)))) + .put(new SortOption(DESC, NULL_FIRST), + ImmutableList.of( + argument("asc", booleanLiteral(false)), + argument("nullFirst", booleanLiteral(true)))) + .build(); + + expects.forEach((option, expect) -> { + when(querySpec.getOrderByOptions()).thenReturn(ImmutableList.of(option)); + + AstSortBuilder sortBuilder = new AstSortBuilder(querySpec); + assertEquals( + new Sort( + child, // has to mock and attach child otherwise Guava ImmutableList NPE in getChild() + ImmutableList.of(argument("count", intLiteral(0))), + ImmutableList.of(field("name", expect))), + sortBuilder.visitOrderByClause(orderByClause).attach(child)); + }); } } \ No newline at end of file diff --git a/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/context/QuerySpecificationTest.java b/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/context/QuerySpecificationTest.java index 3f449ce5e1..9ccee1aecc 100644 --- a/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/context/QuerySpecificationTest.java +++ b/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/context/QuerySpecificationTest.java @@ -20,8 +20,11 @@ import static com.amazon.opendistroforelasticsearch.sql.ast.dsl.AstDSL.alias; import static com.amazon.opendistroforelasticsearch.sql.ast.dsl.AstDSL.function; import static com.amazon.opendistroforelasticsearch.sql.ast.dsl.AstDSL.qualifiedName; +import static com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.NullOrder; +import static com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.SortOrder; import static org.junit.jupiter.api.Assertions.assertEquals; +import com.amazon.opendistroforelasticsearch.sql.ast.tree.Sort.SortOption; import com.amazon.opendistroforelasticsearch.sql.common.antlr.CaseInsensitiveCharStream; import com.amazon.opendistroforelasticsearch.sql.common.antlr.SyntaxAnalysisErrorListener; import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLLexer; @@ -107,6 +110,21 @@ void should_deduplicate_same_aggregators() { querySpec.getAggregators()); } + @Test + void can_collect_sort_options_in_order_by_clause() { + assertEquals( + ImmutableList.of(new SortOption(null, null)), + collect("SELECT name FROM test ORDER BY name").getOrderByOptions()); + + assertEquals( + ImmutableList.of(new SortOption(SortOrder.ASC, NullOrder.NULL_LAST)), + collect("SELECT name FROM test ORDER BY name ASC NULLS LAST").getOrderByOptions()); + + assertEquals( + ImmutableList.of(new SortOption(SortOrder.DESC, NullOrder.NULL_FIRST)), + collect("SELECT name FROM test ORDER BY name DESC NULLS FIRST").getOrderByOptions()); + } + private QuerySpecification collect(String query) { QuerySpecification querySpec = new QuerySpecification(); querySpec.collect(parse(query), query);