diff --git a/docs/user/dql/basics.rst b/docs/user/dql/basics.rst index d80b2981f0..4866e11684 100644 --- a/docs/user/dql/basics.rst +++ b/docs/user/dql/basics.rst @@ -20,7 +20,7 @@ Syntax The syntax of ``SELECT`` statement is as follows:: - SELECT [DISTINCT] (* | expression) [[AS] alias] [, ...] + SELECT [ALL | DISTINCT] (* | expression) [[AS] alias] [, ...] FROM index_name [WHERE predicates] [GROUP BY expression [, ...] @@ -199,7 +199,7 @@ Result set: Example 4: Selecting Distinct Fields ------------------------------------ -``DISTINCT`` is useful when you want to de-duplicate and get unique field value. You can provide one or more field names. +By default, ``SELECT ALL`` takes effect to return all rows. ``DISTINCT`` is useful when you want to de-duplicate and get unique field value. You can provide one or more field names ('DISTINCT *' is not supported yet). SQL query:: @@ -255,6 +255,17 @@ Result set: | 36| +---+ +In fact your can use any expression in a ``DISTINCT`` clause as follows:: + + od> SELECT DISTINCT SUBSTRING(lastname, 1, 1) FROM accounts; + fetched rows / total rows = 3/3 + +-----------------------------+ + | SUBSTRING(lastname, 1, 1) | + |-----------------------------| + | A | + | B | + | D | + +-----------------------------+ FROM ==== diff --git a/integ-test/src/test/resources/correctness/queries/select.txt b/integ-test/src/test/resources/correctness/queries/select.txt index fb94183fd1..9aef8669a9 100644 --- a/integ-test/src/test/resources/correctness/queries/select.txt +++ b/integ-test/src/test/resources/correctness/queries/select.txt @@ -20,3 +20,8 @@ SELECT AvgTicketPrice, Carrier FROM kibana_sample_data_flights WHERE AvgTicketPr SELECT AvgTicketPrice, Carrier FROM kibana_sample_data_flights WHERE ABS(AvgTicketPrice * -2) > 1000 SELECT AvgTicketPrice, Carrier FROM kibana_sample_data_flights WHERE Carrier LIKE 'JetBeat_' SELECT AvgTicketPrice, Carrier FROM kibana_sample_data_flights WHERE Carrier LIKE '%Air%' +SELECT ALL OriginWeather FROM kibana_sample_data_flights +SELECT DISTINCT OriginWeather FROM kibana_sample_data_flights +SELECT DISTINCT OriginWeather, FlightDelay FROM kibana_sample_data_flights +SELECT DISTINCT SUBSTRING(OriginWeather, 1, 1) AS origin FROM kibana_sample_data_flights +SELECT DISTINCT SUBSTRING(OriginWeather, 1, 1) AS origin, FlightDelay FROM kibana_sample_data_flights diff --git a/sql/src/main/antlr/OpenDistroSQLParser.g4 b/sql/src/main/antlr/OpenDistroSQLParser.g4 index 5c5055adf6..894b240011 100644 --- a/sql/src/main/antlr/OpenDistroSQLParser.g4 +++ b/sql/src/main/antlr/OpenDistroSQLParser.g4 @@ -64,7 +64,11 @@ querySpecification ; selectClause - : SELECT selectElements + : SELECT selectSpec? selectElements + ; + +selectSpec + : (ALL | DISTINCT) ; selectElements diff --git a/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstAggregationBuilder.java b/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstAggregationBuilder.java index e09914ab89..40f4d734f4 100644 --- a/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstAggregationBuilder.java +++ b/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstAggregationBuilder.java @@ -27,7 +27,6 @@ import com.amazon.opendistroforelasticsearch.sql.ast.tree.UnresolvedPlan; import com.amazon.opendistroforelasticsearch.sql.common.utils.StringUtils; import com.amazon.opendistroforelasticsearch.sql.exception.SemanticCheckException; -import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.GroupByClauseContext; import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParserBaseVisitor; import com.amazon.opendistroforelasticsearch.sql.sql.parser.context.QuerySpecification; import java.util.ArrayList; @@ -42,7 +41,7 @@ * AST aggregation builder that builds AST aggregation node for the following scenarios: * * 1. Explicit GROUP BY - * 1.1 Group by column name or scalar expression: + * 1.1 Group by column name or scalar expression (SELECT DISTINCT equivalent): * SELECT ABS(age) FROM test GROUP BY ABS(age) * 1.2 Group by alias in SELECT AS clause: * SELECT state AS s FROM test GROUP BY s @@ -77,18 +76,17 @@ public class AstAggregationBuilder extends OpenDistroSQLParserBaseVisitor groupByItems = replaceGroupByItemIfAliasOrOrdinal(); return new Aggregation( new ArrayList<>(querySpec.getAggregators()), diff --git a/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/context/QuerySpecification.java b/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/context/QuerySpecification.java index 68f34da90e..6927e2b712 100644 --- a/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/context/QuerySpecification.java +++ b/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/context/QuerySpecification.java @@ -18,6 +18,7 @@ import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.GroupByElementContext; import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.OrderByElementContext; +import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.SelectClauseContext; import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.SelectElementContext; import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.SubqueryAsRelationContext; import static com.amazon.opendistroforelasticsearch.sql.sql.parser.ParserUtils.getTextInQuery; @@ -34,6 +35,7 @@ import com.amazon.opendistroforelasticsearch.sql.exception.SemanticCheckException; import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.AggregateFunctionCallContext; import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.QuerySpecificationContext; +import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.SelectSpecContext; import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParserBaseVisitor; import com.amazon.opendistroforelasticsearch.sql.sql.parser.AstExpressionBuilder; import java.util.ArrayList; @@ -181,6 +183,17 @@ public Void visitSubqueryAsRelation(SubqueryAsRelationContext ctx) { return null; } + @Override + public Void visitSelectClause(SelectClauseContext ctx) { + super.visitSelectClause(ctx); + + // SELECT DISTINCT is an equivalent and special form of GROUP BY + if (isDistinct(ctx.selectSpec())) { + groupByItems.addAll(selectItems); + } + return null; + } + @Override public Void visitSelectElement(SelectElementContext ctx) { UnresolvedExpression expr = visitAstExpression(ctx.expression()); @@ -215,6 +228,10 @@ public Void visitAggregateFunctionCall(AggregateFunctionCallContext ctx) { return super.visitAggregateFunctionCall(ctx); } + private boolean isDistinct(SelectSpecContext ctx) { + return (ctx != null) && (ctx.DISTINCT() != null); + } + private SortOrder visitSortOrder(Token ctx) { if (ctx == null) { return null; diff --git a/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/antlr/SQLSyntaxParserTest.java b/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/antlr/SQLSyntaxParserTest.java index 0227fc7b4e..0b9a391d5e 100644 --- a/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/antlr/SQLSyntaxParserTest.java +++ b/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/antlr/SQLSyntaxParserTest.java @@ -116,6 +116,12 @@ public void canParseGroupByClause() { assertNotNull(parser.parse("SELECT ABS(balance) FROM test GROUP BY 1")); } + @Test + public void canParseDistinctClause() { + assertNotNull(parser.parse("SELECT DISTINCT name FROM test")); + assertNotNull(parser.parse("SELECT DISTINCT name, balance FROM test")); + } + @Test public void canParseCaseStatement() { assertNotNull(parser.parse("SELECT CASE WHEN age > 30 THEN 'age1' ELSE 'age2' END FROM test")); diff --git a/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstBuilderTest.java b/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstBuilderTest.java index 8f373a613a..89e9be4eb4 100644 --- a/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstBuilderTest.java +++ b/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstBuilderTest.java @@ -402,6 +402,54 @@ public void can_build_order_by_multiple_field_names() { buildAST("SELECT name, age FROM test ORDER BY name, age DESC")); } + @Test + public void can_build_select_distinct_clause() { + assertEquals( + project( + agg( + relation("test"), + emptyList(), + emptyList(), + ImmutableList.of( + alias("name", qualifiedName("name")), + alias("age", qualifiedName("age"))), + emptyList()), + alias("name", qualifiedName("name")), + alias("age", qualifiedName("age"))), + buildAST("SELECT DISTINCT name, age FROM test")); + } + + @Test + public void can_build_select_distinct_clause_with_function() { + assertEquals( + project( + agg( + relation("test"), + emptyList(), + emptyList(), + ImmutableList.of( + alias("SUBSTRING(name, 1, 2)", + function( + "SUBSTRING", + qualifiedName("name"), + intLiteral(1), intLiteral(2)))), + emptyList()), + alias("SUBSTRING(name, 1, 2)", + function( + "SUBSTRING", + qualifiedName("name"), + intLiteral(1), intLiteral(2)))), + buildAST("SELECT DISTINCT SUBSTRING(name, 1, 2) FROM test")); + } + + @Test + public void can_build_select_all_clause() { + assertEquals( + buildAST("SELECT name, age FROM test"), + buildAST("SELECT ALL name, age FROM test") + ); + } + @Test public void can_build_order_by_null_option() { assertEquals(