From 1f277f4fcc1b19bb94e2a9debd1fe7f9786e7de4 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 23 Dec 2016 21:51:18 +0800 Subject: [PATCH] DESC TABLE should not fail with format class not found --- .../sql/hive/client/HiveClientImpl.scala | 11 ++++- .../spark/sql/hive/HiveSparkSubmitSuite.scala | 46 +++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index bacae8a9a5397..5c0e2f6ec4941 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -408,8 +408,15 @@ private[hive] class HiveClientImpl( lastAccessTime = h.getLastAccessTime.toLong * 1000, storage = CatalogStorageFormat( locationUri = shim.getDataLocation(h), - inputFormat = Option(h.getInputFormatClass).map(_.getName), - outputFormat = Option(h.getOutputFormatClass).map(_.getName), + // To avoid ClassNotFound exception, we try our best to not get the format class, but get + // the class name directly. However, for non-native tables, there is no interface to get + // the format class name, so we may still throw ClassNotFound in this case. + inputFormat = Option(h.getTTable.getSd.getInputFormat).orElse { + Option(h.getStorageHandler).map(_.getInputFormatClass.getName) + }, + outputFormat = Option(h.getTTable.getSd.getOutputFormat).orElse { + Option(h.getStorageHandler).map(_.getOutputFormatClass.getName) + }, serde = Option(h.getSerializationLib), compressed = h.getTTable.getSd.isCompressed, properties = Option(h.getTTable.getSd.getSerdeInfo.getParameters) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala index a670560c5969d..9aa9ebf1aa018 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala @@ -311,6 +311,29 @@ class HiveSparkSubmitSuite runSparkSubmit(args) } + test("SPARK-18989: DESC TABLE should not fail with format class not found") { + val unusedJar = TestUtils.createJarWithClasses(Seq.empty) + + val argsForCreateTable = Seq( + "--class", SPARK_18989_CREATE_TABLE.getClass.getName.stripSuffix("$"), + "--name", "SPARK-18947", + "--master", "local-cluster[2,1,1024]", + "--conf", "spark.ui.enabled=false", + "--conf", "spark.master.rest.enabled=false", + "--jars", TestHive.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath, + unusedJar.toString) + runSparkSubmit(argsForCreateTable) + + val argsForShowTables = Seq( + "--class", SPARK_18989_DESC_TABLE.getClass.getName.stripSuffix("$"), + "--name", "SPARK-18947", + "--master", "local-cluster[2,1,1024]", + "--conf", "spark.ui.enabled=false", + "--conf", "spark.master.rest.enabled=false", + unusedJar.toString) + runSparkSubmit(argsForShowTables) + } + // NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly. // This is copied from org.apache.spark.deploy.SparkSubmitSuite private def runSparkSubmit(args: Seq[String]): Unit = { @@ -853,3 +876,26 @@ object SPARK_18360 { } } } + +object SPARK_18989_CREATE_TABLE { + def main(args: Array[String]): Unit = { + val spark = SparkSession.builder().enableHiveSupport().getOrCreate() + spark.sql( + """ + |CREATE TABLE IF NOT EXISTS base64_tbl(val string) STORED AS + |INPUTFORMAT 'org.apache.hadoop.hive.contrib.fileformat.base64.Base64TextInputFormat' + |OUTPUTFORMAT 'org.apache.hadoop.hive.contrib.fileformat.base64.Base64TextOutputFormat' + """.stripMargin) + } +} + +object SPARK_18989_DESC_TABLE { + def main(args: Array[String]): Unit = { + val spark = SparkSession.builder().enableHiveSupport().getOrCreate() + try { + spark.sql("DESC base64_tbl") + } finally { + spark.sql("DROP TABLE IF EXISTS base64_tbl") + } + } +}