apache · petermaxlee · Aug 3, 2016 · Aug 3, 2016 · Aug 3, 2016 · Aug 5, 2016
diff --git a/dev/.rat-excludes b/dev/.rat-excludes
@@ -99,4 +99,5 @@ spark-deps-.*
 .*tsv
 org.apache.spark.scheduler.ExternalClusterManager
 .*\.sql
+.*\.sql\.xml
 .Rbuildignore
diff --git a/sql/core/src/test/resources/sql-tests/inputs/blacklist.sql b/sql/core/src/test/resources/sql-tests/inputs/blacklist.sql
@@ -0,0 +1,4 @@
+-- This is a query file that has been blacklisted.
+-- It includes a query that should crash Spark.
+-- If the test case is run, the whole suite would fail.
+some random not working query that should crash Spark.
diff --git a/sql/core/src/test/resources/sql-tests/inputs/number-format.sql b/sql/core/src/test/resources/sql-tests/inputs/number-format.sql
@@ -0,0 +1,13 @@
+-- Verifies how we parse numbers
+
+-- parse as ints
+select 1, -1;
+
+-- parse as longs
+select 2147483648, -2147483649;
+
+-- parse as decimals
+select 9223372036854775808, -9223372036854775809;
+
+-- various floating point formats
+select 0.3, -0.8, .5, -.18;
diff --git a/sql/core/src/test/resources/sql-tests/results/number-format.sql.out b/sql/core/src/test/resources/sql-tests/results/number-format.sql.out
@@ -0,0 +1,50 @@
+-- Automatically generated by org.apache.spark.sql.SQLQueryTestSuite
+-- Number of queries: 4
+
+
+-- !query 0
+select 1, -1
+-- !query 0 schema
+int, int
+-- !query 0 output
++---+----+
+|  1|(-1)|
++---+----+
+|  1|  -1|
++---+----+
+
+
+-- !query 1
+select 2147483648, -2147483649
+-- !query 1 schema
+bigint, bigint
+-- !query 1 output
++----------+-------------+
+|2147483648|(-2147483649)|
++----------+-------------+
+|2147483648|  -2147483649|
++----------+-------------+
+
+
+-- !query 2
+select 9223372036854775808, -9223372036854775809
+-- !query 2 schema
+decimal(19,0), decimal(19,0)
+-- !query 2 output
++-------------------+----------------------+
+|9223372036854775808|(-9223372036854775809)|
++-------------------+----------------------+
+|9223372036854775808|  -9223372036854775809|
++-------------------+----------------------+
+
+
+-- !query 3
+select 0.3, -0.8, .5, -.18
+-- !query 3 schema
+decimal(1,1), decimal(1,1), decimal(1,1), decimal(2,2)
+-- !query 3 output
++---+------+---+-------+
+|0.3|(-0.8)|0.5|(-0.18)|
++---+------+---+-------+
+|0.3|  -0.8|0.5|  -0.18|
++---+------+---+-------+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -1358,42 +1358,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
     )
   }
 
-  test("Floating point number format") {
-    checkAnswer(
-      sql("SELECT 0.3"), Row(BigDecimal(0.3))
-    )
-
-    checkAnswer(
-      sql("SELECT -0.8"), Row(BigDecimal(-0.8))
-    )
-
-    checkAnswer(
-      sql("SELECT .5"), Row(BigDecimal(0.5))
-    )
-
-    checkAnswer(
-      sql("SELECT -.18"), Row(BigDecimal(-0.18))
-    )
-  }
-
-  test("Auto cast integer type") {
-    checkAnswer(
-      sql(s"SELECT ${Int.MaxValue + 1L}"), Row(Int.MaxValue + 1L)
-    )
-
-    checkAnswer(
-      sql(s"SELECT ${Int.MinValue - 1L}"), Row(Int.MinValue - 1L)
-    )
-
-    checkAnswer(
-      sql("SELECT 9223372036854775808"), Row(new java.math.BigDecimal("9223372036854775808"))
-    )
-
-    checkAnswer(
-      sql("SELECT -9223372036854775809"), Row(new java.math.BigDecimal("-9223372036854775809"))
-    )
-  }
-
   test("Test to check we can apply sign to expression") {
 
     checkAnswer(

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.io.File
+import java.util.{Locale, TimeZone}
+
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile}
+import org.apache.spark.sql.test.SharedSQLContext
+
+/**
+ * End-to-end test cases for SQL queries.
+ *
+ * Each case is loaded from a file in "spark/sql/core/src/test/resources/sql-tests/inputs".
+ * Each case has a golden result file in "spark/sql/core/src/test/resources/sql-tests/results".
+ *
+ * To re-generate golden files, run:
+ * {{{
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *SQLQueryTestSuite"
+ * }}}
+ *
+ * The format for input files is simple:
+ *  1. A list of SQL queries separated by semicolon.
+ *  2. Lines starting with -- are treated as comments and ignored.
+ *
+ * For example:
+ * {{{
+ *   -- this is a comment
+ *   select 1, -1;
+ *   select current_date;
+ * }}}
+ *
+ * The format for golden result files look roughly like:
+ * {{{
+ *   -- some header information
+ *
+ *   -- !query 0
+ *   select 1, -1
+ *   -- !query 0 schema
+ *   int, int
+ *   -- !query 0 output
+ *   +---+----+
+ *   |  1|(-1)|
+ *   +---+----+
+ *   |  1|  -1|
+ *   +---+----+
+ *
+ *   -- !query 1
+ *   ...
+ * }}}
+ */
+class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
+
+  private val regenerateGoldenFiles: Boolean = System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1"
+
+  private val inputFilePath = "src/test/resources/sql-tests/inputs/"
+  private val goldenFilePath = "src/test/resources/sql-tests/results/"
+
+  /** List of test cases to ignore, in lower cases. */
+  private val blackList = Set(
+    "blacklist.sql"  // Do NOT remove this one. It is here to test the blacklist functionality.
+  )
+
+  // Create all the test cases.
+  listTestCases().foreach(createScalaTestCase)
+
+  /** A test case. */
+  private case class TestCase(name: String, inputFile: String, resultFile: String)
+
+  /** A single SQL query's output. */
+  private case class QueryOutput(sql: String, schema: String, output: String) {
+    def toString(queryIndex: Int): String = {
+      // We are explicitly not using multi-line string due to stripMargin removing "|" in output.
+      s"-- !query $queryIndex\n" +
+        sql + "\n" +
+        s"-- !query $queryIndex schema\n" +
+        schema + "\n" +
+         s"-- !query $queryIndex output\n" +
+        output
+    }
+  }
+
+  private def createScalaTestCase(testCase: TestCase): Unit = {
+    if (blackList.contains(testCase.name.toLowerCase)) {
+      // Create a test case to ignore this case.
+      ignore(testCase.name) { /* Do nothing */ }
+    } else {
+      // Create a test case to run this case.
+      test(testCase.name) { runTest(testCase) }
+    }
+  }
+
+  /** Run a test case. */
+  private def runTest(testCase: TestCase): Unit = {
+    val input = fileToString(new File(testCase.inputFile))
+
+    // List of SQL queries to run
+    val queries: Seq[String] = {
+      // val cleaned = input.split("\n").filterNot(_.matches("--.*(?<=[^\\\\]);")).mkString("\n")
+      val cleaned = input.split("\n").filterNot(_.startsWith("--")).mkString("\n")
+      // note: this is not a robust way to split queries using semicolon, but works for now.
+      cleaned.split("(?<=[^\\\\]);").map(_.trim).filterNot(q => q == "").toSeq
+    }
+
+    // Run the SQL queries preparing them for comparison.
+    val outputs: Seq[QueryOutput] = queries.map { sql =>
+      val df = spark.sql(sql)
+      // We might need to do some query canonicalization in the future.
+      QueryOutput(
+        sql = sql,
+        schema = df.schema.map(_.dataType.simpleString).mkString(", "),
+        output = df.showString(_numRows = 10000, truncate = 10000).trim)
+    }
+
+    if (regenerateGoldenFiles) {
+      // Again, we are explicitly not using multi-line string due to stripMargin removing "|".
+      val goldenOutput = {
+        s"-- Automatically generated by ${getClass.getName}\n" +
+        s"-- Number of queries: ${outputs.size}\n\n\n" +
+        outputs.zipWithIndex.map{case (qr, i) => qr.toString(i)}.mkString("\n\n\n") + "\n"
+      }
+      stringToFile(new File(testCase.resultFile), goldenOutput)
+    }
+
+    // Read back the golden file.
+    val expectedOutputs: Seq[QueryOutput] = {
+      val goldenOutput = fileToString(new File(testCase.resultFile))
+      val segments = goldenOutput.split("-- !query.+\n")
+      assert(segments.size == outputs.size * 3 + 1)  // each query has 3 segments, plus the header
+      Seq.tabulate(outputs.size) { i =>
+        QueryOutput(
+          sql = segments(i * 3 + 1).trim,
+          schema = segments(i * 3 + 2).trim,
+          output = segments(i * 3 + 3).trim
+        )
+      }
+    }
+
+    // Compare results.
+    assertResult(expectedOutputs.size, s"Number of queries should be ${expectedOutputs.size}") {
+      outputs.size
+    }
+
+    outputs.zip(expectedOutputs).zipWithIndex.foreach { case ((output, expected), i) =>
+      assertResult(expected.sql, s"SQL query should match for query #$i") { output.sql }
+      assertResult(expected.schema, s"Schema should match for query #$i") { output.schema }
+      assertResult(expected.output, s"Result should match for query #$i") { output.output }
+    }
+  }
+
+  private def listTestCases(): Seq[TestCase] = {
+    listFilesRecursively(new File(inputFilePath)).map { file =>
+      val resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out"
+      TestCase(file.getName, file.getAbsolutePath, resultFile)
+    }
+  }
+
+  /** Returns all the files (not directories) in a directory, recursively. */
+  private def listFilesRecursively(path: File): Seq[File] = {
+    val (dirs, files) = path.listFiles().partition(_.isDirectory)
+    files ++ dirs.flatMap(listFilesRecursively)
+  }
+
+  private val originalTimeZone = TimeZone.getDefault
+  private val originalLocale = Locale.getDefault
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
+    TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))
+    // Add Locale setting
+    Locale.setDefault(Locale.US)
+    RuleExecutor.resetTime()
+  }
+
+  override def afterAll(): Unit = {
+    try {
+      TimeZone.setDefault(originalTimeZone)
+      Locale.setDefault(originalLocale)
+
+      // For debugging dump some statistics about how much time was spent in various optimizer rules
+      logWarning(RuleExecutor.dumpTimeSpent())
+    } finally {
+      super.afterAll()
+    }
+  }
+}