Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-18989][SQL] DESC TABLE should not fail with format class not found #16388

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -408,8 +408,15 @@ private[hive] class HiveClientImpl(
lastAccessTime = h.getLastAccessTime.toLong * 1000,
storage = CatalogStorageFormat(
locationUri = shim.getDataLocation(h),
inputFormat = Option(h.getInputFormatClass).map(_.getName),
outputFormat = Option(h.getOutputFormatClass).map(_.getName),
// To avoid ClassNotFound exception, we try our best to not get the format class, but get
// the class name directly. However, for non-native tables, there is no interface to get
// the format class name, so we may still throw ClassNotFound in this case.
inputFormat = Option(h.getTTable.getSd.getInputFormat).orElse {
Option(h.getStorageHandler).map(_.getInputFormatClass.getName)
Copy link
Contributor

@yhuai yhuai Dec 27, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it actually also fixed a bug (we did not look for storage handler)? Is it possible to have a test?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no it's not a bug, getInputFormatClass will look for storage handler.

},
outputFormat = Option(h.getTTable.getSd.getOutputFormat).orElse {
Option(h.getStorageHandler).map(_.getOutputFormatClass.getName)
},
serde = Option(h.getSerializationLib),
compressed = h.getTTable.getSd.isCompressed,
properties = Option(h.getTTable.getSd.getSerdeInfo.getParameters)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,29 @@ class HiveSparkSubmitSuite
runSparkSubmit(args)
}

test("SPARK-18989: DESC TABLE should not fail with format class not found") {
val unusedJar = TestUtils.createJarWithClasses(Seq.empty)

val argsForCreateTable = Seq(
"--class", SPARK_18989_CREATE_TABLE.getClass.getName.stripSuffix("$"),
"--name", "SPARK-18947",
"--master", "local-cluster[2,1,1024]",
"--conf", "spark.ui.enabled=false",
"--conf", "spark.master.rest.enabled=false",
"--jars", TestHive.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath,
unusedJar.toString)
runSparkSubmit(argsForCreateTable)

val argsForShowTables = Seq(
"--class", SPARK_18989_DESC_TABLE.getClass.getName.stripSuffix("$"),
"--name", "SPARK-18947",
"--master", "local-cluster[2,1,1024]",
"--conf", "spark.ui.enabled=false",
"--conf", "spark.master.rest.enabled=false",
unusedJar.toString)
runSparkSubmit(argsForShowTables)
}

// NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly.
// This is copied from org.apache.spark.deploy.SparkSubmitSuite
private def runSparkSubmit(args: Seq[String]): Unit = {
Expand Down Expand Up @@ -853,3 +876,26 @@ object SPARK_18360 {
}
}
}

object SPARK_18989_CREATE_TABLE {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().enableHiveSupport().getOrCreate()
spark.sql(
"""
|CREATE TABLE IF NOT EXISTS base64_tbl(val string) STORED AS
|INPUTFORMAT 'org.apache.hadoop.hive.contrib.fileformat.base64.Base64TextInputFormat'
|OUTPUTFORMAT 'org.apache.hadoop.hive.contrib.fileformat.base64.Base64TextOutputFormat'
""".stripMargin)
}
}

object SPARK_18989_DESC_TABLE {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().enableHiveSupport().getOrCreate()
try {
spark.sql("DESC base64_tbl")
} finally {
spark.sql("DROP TABLE IF EXISTS base64_tbl")
}
}
}