Skip to content

Commit

Permalink
[SPARK-18989][SQL] DESC TABLE should not fail with format class not f…
Browse files Browse the repository at this point in the history
…ound

## What changes were proposed in this pull request?

When we describe a table, we only wanna see the information of this table, not read it, so it's ok even if the format class is not present at the classpath.

## How was this patch tested?

new regression test

Author: Wenchen Fan <[email protected]>

Closes apache#16388 from cloud-fan/hive.
  • Loading branch information
cloud-fan authored and cmonkey committed Dec 29, 2016
1 parent c3b899d commit 9e0f318
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -408,8 +408,15 @@ private[hive] class HiveClientImpl(
lastAccessTime = h.getLastAccessTime.toLong * 1000,
storage = CatalogStorageFormat(
locationUri = shim.getDataLocation(h),
inputFormat = Option(h.getInputFormatClass).map(_.getName),
outputFormat = Option(h.getOutputFormatClass).map(_.getName),
// To avoid ClassNotFound exception, we try our best to not get the format class, but get
// the class name directly. However, for non-native tables, there is no interface to get
// the format class name, so we may still throw ClassNotFound in this case.
inputFormat = Option(h.getTTable.getSd.getInputFormat).orElse {
Option(h.getStorageHandler).map(_.getInputFormatClass.getName)
},
outputFormat = Option(h.getTTable.getSd.getOutputFormat).orElse {
Option(h.getStorageHandler).map(_.getOutputFormatClass.getName)
},
serde = Option(h.getSerializationLib),
compressed = h.getTTable.getSd.isCompressed,
properties = Option(h.getTTable.getSd.getSerdeInfo.getParameters)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,29 @@ class HiveSparkSubmitSuite
runSparkSubmit(args)
}

test("SPARK-18989: DESC TABLE should not fail with format class not found") {
val unusedJar = TestUtils.createJarWithClasses(Seq.empty)

val argsForCreateTable = Seq(
"--class", SPARK_18989_CREATE_TABLE.getClass.getName.stripSuffix("$"),
"--name", "SPARK-18947",
"--master", "local-cluster[2,1,1024]",
"--conf", "spark.ui.enabled=false",
"--conf", "spark.master.rest.enabled=false",
"--jars", TestHive.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath,
unusedJar.toString)
runSparkSubmit(argsForCreateTable)

val argsForShowTables = Seq(
"--class", SPARK_18989_DESC_TABLE.getClass.getName.stripSuffix("$"),
"--name", "SPARK-18947",
"--master", "local-cluster[2,1,1024]",
"--conf", "spark.ui.enabled=false",
"--conf", "spark.master.rest.enabled=false",
unusedJar.toString)
runSparkSubmit(argsForShowTables)
}

// NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly.
// This is copied from org.apache.spark.deploy.SparkSubmitSuite
private def runSparkSubmit(args: Seq[String]): Unit = {
Expand Down Expand Up @@ -853,3 +876,26 @@ object SPARK_18360 {
}
}
}

object SPARK_18989_CREATE_TABLE {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().enableHiveSupport().getOrCreate()
spark.sql(
"""
|CREATE TABLE IF NOT EXISTS base64_tbl(val string) STORED AS
|INPUTFORMAT 'org.apache.hadoop.hive.contrib.fileformat.base64.Base64TextInputFormat'
|OUTPUTFORMAT 'org.apache.hadoop.hive.contrib.fileformat.base64.Base64TextOutputFormat'
""".stripMargin)
}
}

object SPARK_18989_DESC_TABLE {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().enableHiveSupport().getOrCreate()
try {
spark.sql("DESC base64_tbl")
} finally {
spark.sql("DROP TABLE IF EXISTS base64_tbl")
}
}
}

0 comments on commit 9e0f318

Please sign in to comment.