Merge pull request #5 from marmbrus/testCaching

Improvements to hive test caching
marmbrus · Jan 6, 2014 · b749b51 · b749b51
2 parents 66adceb + b3bd15f
commit b749b51
Show file tree

Hide file tree

Showing 4 changed files with 48 additions and 9 deletions.
diff --git a/build.sbt b/build.sbt
@@ -12,6 +12,8 @@ resolvers += "Local Maven Repository" at "file://"+Path.userHome.absolutePath+"/
 
 libraryDependencies += "org.apache.spark" %% "spark-core" % "0.9.0-incubating-SNAPSHOT"
 
+libraryDependencies += "catalyst" % "hive-golden" % "0" from "http://repository-databricks.forge.cloudbees.com/snapshot/catalystGolden.jar"
+
 // Hive 0.10.0 relies on a weird version of jdo that is not published anywhere... Remove when we upgrade to 0.11.0
 libraryDependencies += "javax.jdo" % "jdo2-api" % "2.3-ec" from "http://www.datanucleus.org/downloads/maven2/javax/jdo/jdo2-api/2.3-ec/jdo2-api-2.3-ec.jar"
 

diff --git a/src/main/scala/catalyst/util/package.scala b/src/main/scala/catalyst/util/package.scala
@@ -34,6 +34,28 @@ package object util {
     new String(outStream.toByteArray(), encoding)
   }
 
+  def resourceToString(
+      resource:String,
+      encoding: String = "UTF-8",
+      classLoader: ClassLoader = this.getClass.getClassLoader) = {
+    val inStream = classLoader.getResourceAsStream(resource)
+    val outStream = new ByteArrayOutputStream
+    try {
+      var reading = true
+      while ( reading ) {
+        inStream.read() match {
+          case -1 => reading = false
+          case c => outStream.write(c)
+        }
+      }
+      outStream.flush()
+    }
+    finally {
+      inStream.close()
+    }
+    new String(outStream.toByteArray(), encoding)
+  }
+
   def stringToFile(file: File, str: String): File = {
     val out = new PrintWriter(file)
     out.write(str)

diff --git a/src/test/scala/catalyst/execution/HiveComparisionTest.scala b/src/test/scala/catalyst/execution/HiveComparisionTest.scala
@@ -82,18 +82,18 @@ abstract class HiveComaparisionTest extends FunSuite with BeforeAndAfterAll with
         }
 
         val hiveCachedResults = hiveCacheFiles.flatMap { cachedAnswerFile =>
-          if(cachedAnswerFile.exists) {
-            val cachedString = fileToString(cachedAnswerFile)
-            val cachedAnswer =
-              if(cachedString == "")
-                Nil
-              else
-                cachedString.split("\n").toSeq
-            Some(cachedAnswer)
+          logger.debug(s"Looking for cached answer file $cachedAnswerFile.")
+          if (cachedAnswerFile.exists) {
+            Some(fileToString(cachedAnswerFile))
+          } else if (getClass.getClassLoader.getResourceAsStream(cachedAnswerFile.toString) != null) {
+            Some(resourceToString(cachedAnswerFile.toString))
           } else {
             logger.debug(s"File $cachedAnswerFile not found")
             None
           }
+        }.map {
+          case "" => Nil
+          case other => other.split("\n").toSeq
         }
 
         val hiveResults: Seq[Seq[String]] =
@@ -182,7 +182,6 @@ abstract class HiveComaparisionTest extends FunSuite with BeforeAndAfterAll with
                 // The testing setup traps exits so wait here for a long time so the developer can see when things started
                 // to go wrong.
                 Thread.sleep(1000000)
-                System.exit(1)
             }
           }
 

diff --git a/src/test/scala/catalyst/execution/HiveCompatability.scala b/src/test/scala/catalyst/execution/HiveCompatability.scala
@@ -26,6 +26,22 @@ class HiveCompatability extends HiveQueryFileTest {
     "index_compact_binary_search",
     "bucket_num_reducers",
 
+    // User specific test answers, breaks the caching mechanism.
+    "authorization_3",
+    "authorization_5",
+    "keyword_1",
+    "misc_json",
+
+    // Timezone specific test answers.
+    "udf_unix_timestamp",
+    "udf_to_unix_timestamp",
+
+    // Cant run without local map/reduce.
+    "index_auto_update",
+    "index_auto_self_join",
+    "index_stale",
+    "type_cast_1",
+
     // Hive seems to think 1.0 > NaN = true && 1.0 < NaN = false... which is wrong.
     // http://stackoverflow.com/a/1573715
     "ops_comparison",