Skip to content

Commit

Permalink
[SPARK-21129][SQL] Arguments of SQL function call should not be named…
Browse files Browse the repository at this point in the history
… expressions

### What changes were proposed in this pull request?

Function argument should not be named expressions. It could cause two issues:
- Misleading error message
- Unexpected query results when the column name is `distinct`, which is not a reserved word in our parser.

```
spark-sql> select count(distinct c1, distinct c2) from t1;
Error in query: cannot resolve '`distinct`' given input columns: [c1, c2]; line 1 pos 26;
'Project [unresolvedalias('count(c1#30, 'distinct), None)]
+- SubqueryAlias t1
   +- CatalogRelation `default`.`t1`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [c1#30, c2#31]
```

After the fix, the error message becomes
```
spark-sql> select count(distinct c1, distinct c2) from t1;
Error in query:
extraneous input 'c2' expecting {')', ',', '.', '[', 'OR', 'AND', 'IN', NOT, 'BETWEEN', 'LIKE', RLIKE, 'IS', EQ, '<=>', '<>', '!=', '<', LTE, '>', GTE, '+', '-', '*', '/', '%', 'DIV', '&', '|', '||', '^'}(line 1, pos 35)

== SQL ==
select count(distinct c1, distinct c2) from t1
-----------------------------------^^^
```

### How was this patch tested?
Added a test case to parser suite.

Author: Xiao Li <[email protected]>
Author: gatorsmile <[email protected]>

Closes #18338 from gatorsmile/parserDistinctAggFunc.

(cherry picked from commit eed9c4e)
Signed-off-by: gatorsmile <[email protected]>
  • Loading branch information
gatorsmile committed Jun 30, 2017
1 parent 8b08fd0 commit 29a0be2
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -552,14 +552,15 @@ primaryExpression
| CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase
| CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase
| CAST '(' expression AS dataType ')' #cast
| STRUCT '(' (argument+=namedExpression (',' argument+=namedExpression)*)? ')' #struct
| FIRST '(' expression (IGNORE NULLS)? ')' #first
| LAST '(' expression (IGNORE NULLS)? ')' #last
| constant #constantDefault
| ASTERISK #star
| qualifiedName '.' ASTERISK #star
| '(' namedExpression (',' namedExpression)+ ')' #rowConstructor
| '(' query ')' #subqueryExpression
| qualifiedName '(' (setQuantifier? namedExpression (',' namedExpression)*)? ')'
| qualifiedName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')'
(OVER windowSpec)? #functionCall
| value=primaryExpression '[' index=valueExpression ']' #subscript
| identifier #columnReference
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ package object dsl {
case Seq() => UnresolvedStar(None)
case target => UnresolvedStar(Option(target))
}
def namedStruct(e: Expression*): Expression = CreateNamedStruct(e)

def callFunction[T, U](
func: T => U,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1033,6 +1033,13 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
Cast(expression(ctx.expression), visitSparkDataType(ctx.dataType))
}

/**
* Create a [[CreateStruct]] expression.
*/
override def visitStruct(ctx: StructContext): Expression = withOrigin(ctx) {
CreateStruct(ctx.argument.asScala.map(expression))
}

/**
* Create a [[First]] expression.
*/
Expand All @@ -1056,7 +1063,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
// Create the function call.
val name = ctx.qualifiedName.getText
val isDistinct = Option(ctx.setQuantifier()).exists(_.DISTINCT != null)
val arguments = ctx.namedExpression().asScala.map(expression) match {
val arguments = ctx.argument.asScala.map(expression) match {
case Seq(UnresolvedStar(None))
if name.toLowerCase(Locale.ROOT) == "count" && !isDistinct =>
// Transform COUNT(*) into COUNT(1).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ class ExpressionParserSuite extends PlanTest {
assertEqual("foo(distinct a, b)", 'foo.distinctFunction('a, 'b))
assertEqual("grouping(distinct a, b)", 'grouping.distinctFunction('a, 'b))
assertEqual("`select`(all a, b)", 'select.function('a, 'b))
assertEqual("foo(a as x, b as e)", 'foo.function('a as 'x, 'b as 'e))
intercept("foo(a x)", "extraneous input 'x'")
}

test("window function expressions") {
Expand Down Expand Up @@ -325,7 +325,9 @@ class ExpressionParserSuite extends PlanTest {
assertEqual("a.b", UnresolvedAttribute("a.b"))
assertEqual("`select`.b", UnresolvedAttribute("select.b"))
assertEqual("(a + b).b", ('a + 'b).getField("b")) // This will fail analysis.
assertEqual("struct(a, b).b", 'struct.function('a, 'b).getField("b"))
assertEqual(
"struct(a, b).b",
namedStruct(NamePlaceholder, 'a, NamePlaceholder, 'b).getField("b"))
}

test("reference") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,12 @@ class PlanParserSuite extends PlanTest {
assertEqual(s"$sql grouping sets((a, b), (a), ())",
GroupingSets(Seq(Seq('a, 'b), Seq('a), Seq()), Seq('a, 'b), table("d"),
Seq('a, 'b, 'sum.function('c).as("c"))))

val m = intercept[ParseException] {
parsePlan("SELECT a, b, count(distinct a, distinct b) as c FROM d GROUP BY a, b")
}.getMessage
assert(m.contains("extraneous input 'b'"))

}

test("limit") {
Expand Down
7 changes: 7 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/struct.sql
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,10 @@ SELECT ID, STRUCT(ST.*,CAST(ID AS STRING) AS E) NST FROM tbl_x;

-- Prepend a column to a struct
SELECT ID, STRUCT(CAST(ID AS STRING) AS AA, ST.*) NST FROM tbl_x;

-- Select a column from a struct
SELECT ID, STRUCT(ST.*).C NST FROM tbl_x;
SELECT ID, STRUCT(ST.C, ST.D).D NST FROM tbl_x;

-- Select an alias from a struct
SELECT ID, STRUCT(ST.C as STC, ST.D as STD).STD FROM tbl_x;
32 changes: 31 additions & 1 deletion sql/core/src/test/resources/sql-tests/results/struct.sql.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 6
-- Number of queries: 9


-- !query 0
Expand Down Expand Up @@ -58,3 +58,33 @@ struct<ID:int,NST:struct<AA:string,C:string,D:string>>
1 {"AA":"1","C":"gamma","D":"delta"}
2 {"AA":"2","C":"epsilon","D":"eta"}
3 {"AA":"3","C":"theta","D":"iota"}


-- !query 6
SELECT ID, STRUCT(ST.*).C NST FROM tbl_x
-- !query 6 schema
struct<ID:int,NST:string>
-- !query 6 output
1 gamma
2 epsilon
3 theta


-- !query 7
SELECT ID, STRUCT(ST.C, ST.D).D NST FROM tbl_x
-- !query 7 schema
struct<ID:int,NST:string>
-- !query 7 output
1 delta
2 eta
3 iota


-- !query 8
SELECT ID, STRUCT(ST.C as STC, ST.D as STD).STD FROM tbl_x
-- !query 8 schema
struct<ID:int,named_struct(STC, ST.C AS `C` AS `STC`, STD, ST.D AS `D` AS `STD`).STD:string>
-- !query 8 output
1 delta
2 eta
3 iota

0 comments on commit 29a0be2

Please sign in to comment.