Skip to content

Commit

Permalink
Merge pull request #5 from marmbrus/testCaching
Browse files Browse the repository at this point in the history
Improvements to hive test caching
  • Loading branch information
marmbrus committed Jan 6, 2014
2 parents 66adceb + b3bd15f commit b749b51
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 9 deletions.
2 changes: 2 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ resolvers += "Local Maven Repository" at "file://"+Path.userHome.absolutePath+"/

libraryDependencies += "org.apache.spark" %% "spark-core" % "0.9.0-incubating-SNAPSHOT"

libraryDependencies += "catalyst" % "hive-golden" % "0" from "http://repository-databricks.forge.cloudbees.com/snapshot/catalystGolden.jar"

// Hive 0.10.0 relies on a weird version of jdo that is not published anywhere... Remove when we upgrade to 0.11.0
libraryDependencies += "javax.jdo" % "jdo2-api" % "2.3-ec" from "http://www.datanucleus.org/downloads/maven2/javax/jdo/jdo2-api/2.3-ec/jdo2-api-2.3-ec.jar"

Expand Down
22 changes: 22 additions & 0 deletions src/main/scala/catalyst/util/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,28 @@ package object util {
new String(outStream.toByteArray(), encoding)
}

def resourceToString(
resource:String,
encoding: String = "UTF-8",
classLoader: ClassLoader = this.getClass.getClassLoader) = {
val inStream = classLoader.getResourceAsStream(resource)
val outStream = new ByteArrayOutputStream
try {
var reading = true
while ( reading ) {
inStream.read() match {
case -1 => reading = false
case c => outStream.write(c)
}
}
outStream.flush()
}
finally {
inStream.close()
}
new String(outStream.toByteArray(), encoding)
}

def stringToFile(file: File, str: String): File = {
val out = new PrintWriter(file)
out.write(str)
Expand Down
17 changes: 8 additions & 9 deletions src/test/scala/catalyst/execution/HiveComparisionTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -82,18 +82,18 @@ abstract class HiveComaparisionTest extends FunSuite with BeforeAndAfterAll with
}

val hiveCachedResults = hiveCacheFiles.flatMap { cachedAnswerFile =>
if(cachedAnswerFile.exists) {
val cachedString = fileToString(cachedAnswerFile)
val cachedAnswer =
if(cachedString == "")
Nil
else
cachedString.split("\n").toSeq
Some(cachedAnswer)
logger.debug(s"Looking for cached answer file $cachedAnswerFile.")
if (cachedAnswerFile.exists) {
Some(fileToString(cachedAnswerFile))
} else if (getClass.getClassLoader.getResourceAsStream(cachedAnswerFile.toString) != null) {
Some(resourceToString(cachedAnswerFile.toString))
} else {
logger.debug(s"File $cachedAnswerFile not found")
None
}
}.map {
case "" => Nil
case other => other.split("\n").toSeq
}

val hiveResults: Seq[Seq[String]] =
Expand Down Expand Up @@ -182,7 +182,6 @@ abstract class HiveComaparisionTest extends FunSuite with BeforeAndAfterAll with
// The testing setup traps exits so wait here for a long time so the developer can see when things started
// to go wrong.
Thread.sleep(1000000)
System.exit(1)
}
}

Expand Down
16 changes: 16 additions & 0 deletions src/test/scala/catalyst/execution/HiveCompatability.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,22 @@ class HiveCompatability extends HiveQueryFileTest {
"index_compact_binary_search",
"bucket_num_reducers",

// User specific test answers, breaks the caching mechanism.
"authorization_3",
"authorization_5",
"keyword_1",
"misc_json",

// Timezone specific test answers.
"udf_unix_timestamp",
"udf_to_unix_timestamp",

// Cant run without local map/reduce.
"index_auto_update",
"index_auto_self_join",
"index_stale",
"type_cast_1",

// Hive seems to think 1.0 > NaN = true && 1.0 < NaN = false... which is wrong.
// http://stackoverflow.com/a/1573715
"ops_comparison",
Expand Down

0 comments on commit b749b51

Please sign in to comment.