From 452cfb5259e2942364aeede944cccaeda7d19a24 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 4 Sep 2015 16:53:57 +0800 Subject: [PATCH 1/4] Support aggregation expressions in Order By. --- .../spark/sql/catalyst/analysis/Analyzer.scala | 2 +- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 591747b45c376..c36ccb5e09336 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -561,7 +561,7 @@ class Analyzer( } case sort @ Sort(sortOrder, global, aggregate: Aggregate) - if aggregate.resolved && !sort.resolved => + if aggregate.resolved => // Try resolving the ordering as though it is in the aggregate clause. try { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 0ef25fe0faef0..b5cb2c2fff2f6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -1519,6 +1519,19 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { |ORDER BY sum(b) + 1 """.stripMargin), Row("4", 3) :: Row("1", 7) :: Row("3", 11) :: Row("2", 15) :: Nil) + + Seq("1" -> 3, "2" -> 7, "2" -> 8, "3" -> 5, "3" -> 6, "3" -> 2, "4" -> 1, "4" -> 2, + "4" -> 3, "4" -> 4).toDF("a", "b").registerTempTable("orderByData2") + + checkAnswer( + sql( + """ + |SELECT a, count(*) + |FROM orderByData2 + |GROUP BY a + |ORDER BY count(*) + """.stripMargin), + Row("1", 1) :: Row("2", 2) :: Row("3", 3) :: Row("4", 4) :: Nil) } test("SPARK-7952: fix the equality check between boolean and numeric types") { From e65f4dbf1fa00d786ba7ebf0f302861965d6d5cc Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 5 Sep 2015 00:02:32 +0800 Subject: [PATCH 2/4] For comment. --- .../spark/sql/catalyst/analysis/Analyzer.scala | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index c36ccb5e09336..02f34cbf58ad0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -598,9 +598,15 @@ class Analyzer( } } - Project(aggregate.output, - Sort(evaluatedOrderings, global, - aggregate.copy(aggregateExpressions = originalAggExprs ++ needsPushDown))) + // Since we don't rely on sort.resolved as the stop condition for this rule, + // we need to check this and prevent applying this rule multiple times + if (sortOrder == evaluatedOrderings) { + sort + } else { + Project(aggregate.output, + Sort(evaluatedOrderings, global, + aggregate.copy(aggregateExpressions = originalAggExprs ++ needsPushDown))) + } } catch { // Attempting to resolve in the aggregate can result in ambiguity. When this happens, // just return the original plan. From 8f73c40dedf1cbf711d3cf05e4f50c376932c8f6 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 5 Sep 2015 12:37:01 +0800 Subject: [PATCH 3/4] Remove orderByData2. --- .../test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index b5cb2c2fff2f6..5528331d13619 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -1520,18 +1520,15 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { """.stripMargin), Row("4", 3) :: Row("1", 7) :: Row("3", 11) :: Row("2", 15) :: Nil) - Seq("1" -> 3, "2" -> 7, "2" -> 8, "3" -> 5, "3" -> 6, "3" -> 2, "4" -> 1, "4" -> 2, - "4" -> 3, "4" -> 4).toDF("a", "b").registerTempTable("orderByData2") - checkAnswer( sql( """ - |SELECT a, count(*) - |FROM orderByData2 + |SELECT count(*) + |FROM orderByData |GROUP BY a |ORDER BY count(*) """.stripMargin), - Row("1", 1) :: Row("2", 2) :: Row("3", 3) :: Row("4", 4) :: Nil) + Row(2) :: Row(2) :: Row(2) :: Row(2) :: Nil) } test("SPARK-7952: fix the equality check between boolean and numeric types") { From 046a8cb880bd5418bdd9372772742e61320f4d43 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 9 Sep 2015 12:11:17 +0800 Subject: [PATCH 4/4] Add more test. --- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 9b7d3e9d0f731..c321ae661fd26 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -1538,6 +1538,16 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { |ORDER BY count(*) """.stripMargin), Row(2) :: Row(2) :: Row(2) :: Row(2) :: Nil) + + checkAnswer( + sql( + """ + |SELECT a + |FROM orderByData + |GROUP BY a + |ORDER BY a, count(*), sum(b) + """.stripMargin), + Row("1") :: Row("2") :: Row("3") :: Row("4") :: Nil) } test("SPARK-7952: fix the equality check between boolean and numeric types") {