Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Commit

Permalink
Support SELECT DISTINCT in new SQL engine (#833)
Browse files Browse the repository at this point in the history
* Change grammar

* Add UT and AST builder

* Pass jacoco

* Pass jacoco

* Add comparison test

* Add doctest

* Change doc

* Prepare PR

* Add doc for distinct * limitation
  • Loading branch information
dai-chen authored Dec 8, 2020
1 parent 9f65c67 commit 0cb3240
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 9 deletions.
15 changes: 13 additions & 2 deletions docs/user/dql/basics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Syntax

The syntax of ``SELECT`` statement is as follows::

SELECT [DISTINCT] (* | expression) [[AS] alias] [, ...]
SELECT [ALL | DISTINCT] (* | expression) [[AS] alias] [, ...]
FROM index_name
[WHERE predicates]
[GROUP BY expression [, ...]
Expand Down Expand Up @@ -199,7 +199,7 @@ Result set:
Example 4: Selecting Distinct Fields
------------------------------------

``DISTINCT`` is useful when you want to de-duplicate and get unique field value. You can provide one or more field names.
By default, ``SELECT ALL`` takes effect to return all rows. ``DISTINCT`` is useful when you want to de-duplicate and get unique field value. You can provide one or more field names ('DISTINCT *' is not supported yet).
SQL query::

Expand Down Expand Up @@ -255,6 +255,17 @@ Result set:
| 36|
+---+

In fact your can use any expression in a ``DISTINCT`` clause as follows::

od> SELECT DISTINCT SUBSTRING(lastname, 1, 1) FROM accounts;
fetched rows / total rows = 3/3
+-----------------------------+
| SUBSTRING(lastname, 1, 1) |
|-----------------------------|
| A |
| B |
| D |
+-----------------------------+

FROM
====
Expand Down
5 changes: 5 additions & 0 deletions integ-test/src/test/resources/correctness/queries/select.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,8 @@ SELECT AvgTicketPrice, Carrier FROM kibana_sample_data_flights WHERE AvgTicketPr
SELECT AvgTicketPrice, Carrier FROM kibana_sample_data_flights WHERE ABS(AvgTicketPrice * -2) > 1000
SELECT AvgTicketPrice, Carrier FROM kibana_sample_data_flights WHERE Carrier LIKE 'JetBeat_'
SELECT AvgTicketPrice, Carrier FROM kibana_sample_data_flights WHERE Carrier LIKE '%Air%'
SELECT ALL OriginWeather FROM kibana_sample_data_flights
SELECT DISTINCT OriginWeather FROM kibana_sample_data_flights
SELECT DISTINCT OriginWeather, FlightDelay FROM kibana_sample_data_flights
SELECT DISTINCT SUBSTRING(OriginWeather, 1, 1) AS origin FROM kibana_sample_data_flights
SELECT DISTINCT SUBSTRING(OriginWeather, 1, 1) AS origin, FlightDelay FROM kibana_sample_data_flights
6 changes: 5 additions & 1 deletion sql/src/main/antlr/OpenDistroSQLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,11 @@ querySpecification
;

selectClause
: SELECT selectElements
: SELECT selectSpec? selectElements
;

selectSpec
: (ALL | DISTINCT)
;

selectElements
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import com.amazon.opendistroforelasticsearch.sql.ast.tree.UnresolvedPlan;
import com.amazon.opendistroforelasticsearch.sql.common.utils.StringUtils;
import com.amazon.opendistroforelasticsearch.sql.exception.SemanticCheckException;
import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.GroupByClauseContext;
import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParserBaseVisitor;
import com.amazon.opendistroforelasticsearch.sql.sql.parser.context.QuerySpecification;
import java.util.ArrayList;
Expand All @@ -42,7 +41,7 @@
* AST aggregation builder that builds AST aggregation node for the following scenarios:
*
* 1. Explicit GROUP BY
* 1.1 Group by column name or scalar expression:
* 1.1 Group by column name or scalar expression (SELECT DISTINCT equivalent):
* SELECT ABS(age) FROM test GROUP BY ABS(age)
* 1.2 Group by alias in SELECT AS clause:
* SELECT state AS s FROM test GROUP BY s
Expand Down Expand Up @@ -77,18 +76,17 @@ public class AstAggregationBuilder extends OpenDistroSQLParserBaseVisitor<Unreso

@Override
public UnresolvedPlan visit(ParseTree groupByClause) {
if (groupByClause == null) {
if (querySpec.getGroupByItems().isEmpty()) {
if (isAggregatorNotFoundAnywhere()) {
// Simple select query without GROUP BY and aggregate function in SELECT
return null;
}
return buildImplicitAggregation();
}
return super.visit(groupByClause);
return buildExplicitAggregation();
}

@Override
public UnresolvedPlan visitGroupByClause(GroupByClauseContext ctx) {
private UnresolvedPlan buildExplicitAggregation() {
List<UnresolvedExpression> groupByItems = replaceGroupByItemIfAliasOrOrdinal();
return new Aggregation(
new ArrayList<>(querySpec.getAggregators()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.GroupByElementContext;
import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.OrderByElementContext;
import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.SelectClauseContext;
import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.SelectElementContext;
import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.SubqueryAsRelationContext;
import static com.amazon.opendistroforelasticsearch.sql.sql.parser.ParserUtils.getTextInQuery;
Expand All @@ -34,6 +35,7 @@
import com.amazon.opendistroforelasticsearch.sql.exception.SemanticCheckException;
import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.AggregateFunctionCallContext;
import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.QuerySpecificationContext;
import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.SelectSpecContext;
import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParserBaseVisitor;
import com.amazon.opendistroforelasticsearch.sql.sql.parser.AstExpressionBuilder;
import java.util.ArrayList;
Expand Down Expand Up @@ -181,6 +183,17 @@ public Void visitSubqueryAsRelation(SubqueryAsRelationContext ctx) {
return null;
}

@Override
public Void visitSelectClause(SelectClauseContext ctx) {
super.visitSelectClause(ctx);

// SELECT DISTINCT is an equivalent and special form of GROUP BY
if (isDistinct(ctx.selectSpec())) {
groupByItems.addAll(selectItems);
}
return null;
}

@Override
public Void visitSelectElement(SelectElementContext ctx) {
UnresolvedExpression expr = visitAstExpression(ctx.expression());
Expand Down Expand Up @@ -215,6 +228,10 @@ public Void visitAggregateFunctionCall(AggregateFunctionCallContext ctx) {
return super.visitAggregateFunctionCall(ctx);
}

private boolean isDistinct(SelectSpecContext ctx) {
return (ctx != null) && (ctx.DISTINCT() != null);
}

private SortOrder visitSortOrder(Token ctx) {
if (ctx == null) {
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,12 @@ public void canParseGroupByClause() {
assertNotNull(parser.parse("SELECT ABS(balance) FROM test GROUP BY 1"));
}

@Test
public void canParseDistinctClause() {
assertNotNull(parser.parse("SELECT DISTINCT name FROM test"));
assertNotNull(parser.parse("SELECT DISTINCT name, balance FROM test"));
}

@Test
public void canParseCaseStatement() {
assertNotNull(parser.parse("SELECT CASE WHEN age > 30 THEN 'age1' ELSE 'age2' END FROM test"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,54 @@ public void can_build_order_by_multiple_field_names() {
buildAST("SELECT name, age FROM test ORDER BY name, age DESC"));
}

@Test
public void can_build_select_distinct_clause() {
assertEquals(
project(
agg(
relation("test"),
emptyList(),
emptyList(),
ImmutableList.of(
alias("name", qualifiedName("name")),
alias("age", qualifiedName("age"))),
emptyList()),
alias("name", qualifiedName("name")),
alias("age", qualifiedName("age"))),
buildAST("SELECT DISTINCT name, age FROM test"));
}

@Test
public void can_build_select_distinct_clause_with_function() {
assertEquals(
project(
agg(
relation("test"),
emptyList(),
emptyList(),
ImmutableList.of(
alias("SUBSTRING(name, 1, 2)",
function(
"SUBSTRING",
qualifiedName("name"),
intLiteral(1), intLiteral(2)))),
emptyList()),
alias("SUBSTRING(name, 1, 2)",
function(
"SUBSTRING",
qualifiedName("name"),
intLiteral(1), intLiteral(2)))),
buildAST("SELECT DISTINCT SUBSTRING(name, 1, 2) FROM test"));
}

@Test
public void can_build_select_all_clause() {
assertEquals(
buildAST("SELECT name, age FROM test"),
buildAST("SELECT ALL name, age FROM test")
);
}

@Test
public void can_build_order_by_null_option() {
assertEquals(
Expand Down

0 comments on commit 0cb3240

Please sign in to comment.