From 56084cc2b9bd246ed84dc009d2d5b2b426d0f595 Mon Sep 17 00:00:00 2001
From: William Benton <willb@redhat.com>
Date: Wed, 18 Jun 2014 15:20:41 -0500
Subject: [PATCH 1/6] Add support for HAVING clauses in Hive queries.

---
 .../org/apache/spark/sql/hive/HiveQl.scala    | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index ec653efcc8c58..67ab17dd1fc20 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -480,6 +480,7 @@ private[hive] object HiveQl {
             whereClause ::
             groupByClause ::
             orderByClause ::
+            havingClause ::
             sortByClause ::
             clusterByClause ::
             distributeByClause ::
@@ -494,6 +495,7 @@ private[hive] object HiveQl {
               "TOK_WHERE",
               "TOK_GROUPBY",
               "TOK_ORDERBY",
+              "TOK_HAVING",
               "TOK_SORTBY",
               "TOK_CLUSTERBY",
               "TOK_DISTRIBUTEBY",
@@ -576,21 +578,26 @@ private[hive] object HiveQl {
         val withDistinct =
           if (selectDistinctClause.isDefined) Distinct(withProject) else withProject
 
+        val withHaving = havingClause.map { h => 
+          val Seq(havingExpr) = h.getChildren.toSeq
+          Filter(nodeToExpr(havingExpr), withDistinct)
+        }.getOrElse(withDistinct)
+
         val withSort =
           (orderByClause, sortByClause, distributeByClause, clusterByClause) match {
             case (Some(totalOrdering), None, None, None) =>
-              Sort(totalOrdering.getChildren.map(nodeToSortOrder), withDistinct)
+              Sort(totalOrdering.getChildren.map(nodeToSortOrder), withHaving)
             case (None, Some(perPartitionOrdering), None, None) =>
-              SortPartitions(perPartitionOrdering.getChildren.map(nodeToSortOrder), withDistinct)
+              SortPartitions(perPartitionOrdering.getChildren.map(nodeToSortOrder), withHaving)
             case (None, None, Some(partitionExprs), None) =>
-              Repartition(partitionExprs.getChildren.map(nodeToExpr), withDistinct)
+              Repartition(partitionExprs.getChildren.map(nodeToExpr), withHaving)
             case (None, Some(perPartitionOrdering), Some(partitionExprs), None) =>
               SortPartitions(perPartitionOrdering.getChildren.map(nodeToSortOrder),
-                Repartition(partitionExprs.getChildren.map(nodeToExpr), withDistinct))
+                Repartition(partitionExprs.getChildren.map(nodeToExpr), withHaving))
             case (None, None, None, Some(clusterExprs)) =>
               SortPartitions(clusterExprs.getChildren.map(nodeToExpr).map(SortOrder(_, Ascending)),
-                Repartition(clusterExprs.getChildren.map(nodeToExpr), withDistinct))
-            case (None, None, None, None) => withDistinct
+                Repartition(clusterExprs.getChildren.map(nodeToExpr), withHaving))
+            case (None, None, None, None) => withHaving
             case _ => sys.error("Unsupported set of ordering / distribution clauses.")
           }
 

From 942428e090b2dd45f51d98a501dccfb971ca8910 Mon Sep 17 00:00:00 2001
From: William Benton <willb@redhat.com>
Date: Thu, 19 Jun 2014 13:59:54 -0500
Subject: [PATCH 2/6] Added test coverage for SPARK-2180.

This is a simple test for HAVING clauses.
---
 .../sql/hive/execution/HiveQuerySuite.scala    | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 9f5cf282f7c48..577a4436b4308 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -224,6 +224,21 @@ class HiveQuerySuite extends HiveComparisonTest {
     TestHive.reset()
   }
 
+  test("SPARK-2180: HAVING support in GROUP BY clauses") {
+    val fixture = List(("foo", 2), ("bar", 1), ("foo", 4), ("bar", 3))
+      .zipWithIndex.map {case Pair(Pair(value, attr), key) => HavingRow(key, value, attr)}
+    
+    TestHive.sparkContext.parallelize(fixture).registerAsTable("having_test")
+    
+    val results = hql("SELECT value, max(attr) AS attr FROM having_test GROUP BY value HAVING attr > 3")
+      .collect()
+      .map(x => Pair(x.getString(0), x.getInt(1)))
+    
+    assert(results === Array(Pair("foo", 4)))
+    
+    TestHive.reset()
+  }
+
   test("Query Hive native command execution result") {
     val tableName = "test_native_commands"
 
@@ -441,3 +456,6 @@ class HiveQuerySuite extends HiveComparisonTest {
   // since they modify /clear stuff.
 
 }
+
+// for SPARK-2180 test
+case class HavingRow(key: Int, value: String, attr: Int)

From b880bef264431f6ab71032b6bd8b63a521f4cb84 Mon Sep 17 00:00:00 2001
From: William Benton <willb@redhat.com>
Date: Thu, 19 Jun 2014 15:59:08 -0500
Subject: [PATCH 3/6] Added semantic error for HAVING without GROUP BY

---
 .../src/main/scala/org/apache/spark/sql/hive/HiveQl.scala | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 67ab17dd1fc20..fe05757753b1f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -204,6 +204,9 @@ private[hive] object HiveQl {
   class ParseException(sql: String, cause: Throwable)
     extends Exception(s"Failed to parse: $sql", cause)
 
+  class SemanticException(msg: String)
+    extends Exception(msg)
+
   /**
    * Returns the AST for the given SQL string.
    */
@@ -579,6 +582,11 @@ private[hive] object HiveQl {
           if (selectDistinctClause.isDefined) Distinct(withProject) else withProject
 
         val withHaving = havingClause.map { h => 
+
+          if (groupByClause == None) {
+            throw new SemanticException("Error in semantic analysis: HAVING specified without GROUP BY")
+          }
+
           val Seq(havingExpr) = h.getChildren.toSeq
           Filter(nodeToExpr(havingExpr), withDistinct)
         }.getOrElse(withDistinct)

From 18387f108bcd7c754979db0207329d5437b86136 Mon Sep 17 00:00:00 2001
From: William Benton <willb@redhat.com>
Date: Thu, 19 Jun 2014 16:13:09 -0500
Subject: [PATCH 4/6] Add test for HAVING without GROUP BY

---
 .../apache/spark/sql/hive/execution/HiveQuerySuite.scala  | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 577a4436b4308..92d7027e89371 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -224,7 +224,7 @@ class HiveQuerySuite extends HiveComparisonTest {
     TestHive.reset()
   }
 
-  test("SPARK-2180: HAVING support in GROUP BY clauses") {
+  test("SPARK-2180: HAVING support in GROUP BY clauses (positive)") {
     val fixture = List(("foo", 2), ("bar", 1), ("foo", 4), ("bar", 3))
       .zipWithIndex.map {case Pair(Pair(value, attr), key) => HavingRow(key, value, attr)}
     
@@ -239,6 +239,12 @@ class HiveQuerySuite extends HiveComparisonTest {
     TestHive.reset()
   }
 
+  test("SPARK-2180:  HAVING without GROUP BY raises exception") {
+    intercept[Exception] {
+      hql("SELECT value, attr FROM having_test HAVING attr > 3")
+    }
+  }
+
   test("Query Hive native command execution result") {
     val tableName = "test_native_commands"
 

From 83f1340c70e4588fe2df3492138877c3453f64b3 Mon Sep 17 00:00:00 2001
From: William Benton <willb@redhat.com>
Date: Thu, 19 Jun 2014 16:37:32 -0500
Subject: [PATCH 5/6] scalastyle fixes

---
 .../src/main/scala/org/apache/spark/sql/hive/HiveQl.scala     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index fe05757753b1f..6901acff5c644 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -205,7 +205,7 @@ private[hive] object HiveQl {
     extends Exception(s"Failed to parse: $sql", cause)
 
   class SemanticException(msg: String)
-    extends Exception(msg)
+    extends Exception(s"Error in semantic analysis: $msg")
 
   /**
    * Returns the AST for the given SQL string.
@@ -584,7 +584,7 @@ private[hive] object HiveQl {
         val withHaving = havingClause.map { h => 
 
           if (groupByClause == None) {
-            throw new SemanticException("Error in semantic analysis: HAVING specified without GROUP BY")
+            throw new SemanticException("HAVING specified without GROUP BY")
           }
 
           val Seq(havingExpr) = h.getChildren.toSeq

From 3bbaf266c87f599c029560ebce24246513c5eec7 Mon Sep 17 00:00:00 2001
From: William Benton <willb@redhat.com>
Date: Fri, 20 Jun 2014 08:13:24 -0500
Subject: [PATCH 6/6] Added casts to HAVING expressions

---
 .../src/main/scala/org/apache/spark/sql/hive/HiveQl.scala  | 7 +++++--
 .../apache/spark/sql/hive/execution/HiveQuerySuite.scala   | 7 ++++++-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 6901acff5c644..c69e3dba6b467 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -587,8 +587,11 @@ private[hive] object HiveQl {
             throw new SemanticException("HAVING specified without GROUP BY")
           }
 
-          val Seq(havingExpr) = h.getChildren.toSeq
-          Filter(nodeToExpr(havingExpr), withDistinct)
+          val havingExpr = h.getChildren.toSeq match {
+            case Seq(hexpr) => nodeToExpr(hexpr)
+          }
+          
+          Filter(Cast(havingExpr, BooleanType), withDistinct)
         }.getOrElse(withDistinct)
 
         val withSort =
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 92d7027e89371..80185098bf24f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -230,7 +230,8 @@ class HiveQuerySuite extends HiveComparisonTest {
     
     TestHive.sparkContext.parallelize(fixture).registerAsTable("having_test")
     
-    val results = hql("SELECT value, max(attr) AS attr FROM having_test GROUP BY value HAVING attr > 3")
+    val results = 
+      hql("SELECT value, max(attr) AS attr FROM having_test GROUP BY value HAVING attr > 3")
       .collect()
       .map(x => Pair(x.getString(0), x.getInt(1)))
     
@@ -244,6 +245,10 @@ class HiveQuerySuite extends HiveComparisonTest {
       hql("SELECT value, attr FROM having_test HAVING attr > 3")
     }
   }
+  
+  test("SPARK-2180:  HAVING with non-boolean clause raises no exceptions") {
+    val results = hql("select key, count(*) c from src group by key having c").collect()
+  }
 
   test("Query Hive native command execution result") {
     val tableName = "test_native_commands"