-
Notifications
You must be signed in to change notification settings - Fork 28.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-2177][SQL] describe table result contains only one column #1118
Changes from all commits
bb8bbef
725e88c
342fdf7
74bd1d4
366f891
83adb2f
f1a417e
440c5af
9787fff
8003cf3
6387217
656b068
e7c4e72
b9b9aa5
fd2534c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,3 +60,19 @@ case class ExplainCommand(plan: LogicalPlan) extends Command { | |
* Returned for the "CACHE TABLE tableName" and "UNCACHE TABLE tableName" command. | ||
*/ | ||
case class CacheCommand(tableName: String, doCache: Boolean) extends Command | ||
|
||
/** | ||
* Returned for the "DESCRIBE [EXTENDED] [dbName.]tableName" command. | ||
* @param table The table to be described. | ||
* @param isExtended True if "DESCRIBE EXTENDED" is used. Otherwise, false. | ||
* It is effective only when the table is a Hive table. | ||
*/ | ||
case class DescribeCommand( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would be great to explain isFormatted / isExtended in @param. |
||
table: LogicalPlan, | ||
isExtended: Boolean) extends Command { | ||
override def output = Seq( | ||
// Column names are based on Hive. | ||
BoundReference(0, AttributeReference("col_name", StringType, nullable = false)()), | ||
BoundReference(1, AttributeReference("data_type", StringType, nullable = false)()), | ||
BoundReference(2, AttributeReference("comment", StringType, nullable = false)())) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,7 +52,6 @@ private[hive] case class AddFile(filePath: String) extends Command | |
private[hive] object HiveQl { | ||
protected val nativeCommands = Seq( | ||
"TOK_DESCFUNCTION", | ||
"TOK_DESCTABLE", | ||
"TOK_DESCDATABASE", | ||
"TOK_SHOW_TABLESTATUS", | ||
"TOK_SHOWDATABASES", | ||
|
@@ -120,6 +119,12 @@ private[hive] object HiveQl { | |
"TOK_SWITCHDATABASE" | ||
) | ||
|
||
// Commands that we do not need to explain. | ||
protected val noExplainCommands = Seq( | ||
"TOK_CREATETABLE", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does ctas fall in here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. noExplainCommands is for those commands which we do not need to explain. For example, we will basically do nothing for "EXPLAIN CTAS". A regular CTAS query will not fall in here. |
||
"TOK_DESCTABLE" | ||
) ++ nativeCommands | ||
|
||
/** | ||
* A set of implicit transformations that allow Hive ASTNodes to be rewritten by transformations | ||
* similar to [[catalyst.trees.TreeNode]]. | ||
|
@@ -362,13 +367,20 @@ private[hive] object HiveQl { | |
} | ||
} | ||
|
||
protected def extractDbNameTableName(tableNameParts: Node): (Option[String], String) = { | ||
val (db, tableName) = | ||
tableNameParts.getChildren.map { case Token(part, Nil) => cleanIdentifier(part) } match { | ||
case Seq(tableOnly) => (None, tableOnly) | ||
case Seq(databaseName, table) => (Some(databaseName), table) | ||
} | ||
|
||
(db, tableName) | ||
} | ||
|
||
protected def nodeToPlan(node: Node): LogicalPlan = node match { | ||
// Just fake explain for any of the native commands. | ||
case Token("TOK_EXPLAIN", explainArgs) if nativeCommands contains explainArgs.head.getText => | ||
ExplainCommand(NoRelation) | ||
// Create tables aren't native commands due to CTAS queries, but we still don't need to | ||
// explain them. | ||
case Token("TOK_EXPLAIN", explainArgs) if explainArgs.head.getText == "TOK_CREATETABLE" => | ||
case Token("TOK_EXPLAIN", explainArgs) | ||
if noExplainCommands.contains(explainArgs.head.getText) => | ||
ExplainCommand(NoRelation) | ||
case Token("TOK_EXPLAIN", explainArgs) => | ||
// Ignore FORMATTED if present. | ||
|
@@ -377,6 +389,39 @@ private[hive] object HiveQl { | |
// TODO: support EXTENDED? | ||
ExplainCommand(nodeToPlan(query)) | ||
|
||
case Token("TOK_DESCTABLE", describeArgs) => | ||
// Reference: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL | ||
val Some(tableType) :: formatted :: extended :: pretty :: Nil = | ||
getClauses(Seq("TOK_TABTYPE", "FORMATTED", "EXTENDED", "PRETTY"), describeArgs) | ||
if (formatted.isDefined || pretty.isDefined) { | ||
// FORMATTED and PRETTY are not supported and this statement will be treated as | ||
// a Hive native command. | ||
NativePlaceholder | ||
} else { | ||
tableType match { | ||
case Token("TOK_TABTYPE", nameParts) if nameParts.size == 1 => { | ||
nameParts.head match { | ||
case Token(".", dbName :: tableName :: Nil) => | ||
// It is describing a table with the format like "describe db.table". | ||
// TODO: Actually, a user may mean tableName.columnName. Need to resolve this issue. | ||
val (db, tableName) = extractDbNameTableName(nameParts.head) | ||
DescribeCommand( | ||
UnresolvedRelation(db, tableName, None), extended.isDefined) | ||
case Token(".", dbName :: tableName :: colName :: Nil) => | ||
// It is describing a column with the format like "describe db.table column". | ||
NativePlaceholder | ||
case tableName => | ||
// It is describing a table with the format like "describe table". | ||
DescribeCommand( | ||
UnresolvedRelation(None, tableName.getText, None), | ||
extended.isDefined) | ||
} | ||
} | ||
// All other cases. | ||
case _ => NativePlaceholder | ||
} | ||
} | ||
|
||
case Token("TOK_CREATETABLE", children) | ||
if children.collect { case t@Token("TOK_QUERY", _) => t }.nonEmpty => | ||
// TODO: Parse other clauses. | ||
|
@@ -414,11 +459,8 @@ private[hive] object HiveQl { | |
s"Unhandled clauses: ${notImplemented.flatten.map(dumpTree(_)).mkString("\n")}") | ||
} | ||
|
||
val (db, tableName) = | ||
tableNameParts.getChildren.map{ case Token(part, Nil) => cleanIdentifier(part)} match { | ||
case Seq(tableOnly) => (None, tableOnly) | ||
case Seq(databaseName, table) => (Some(databaseName), table) | ||
} | ||
val (db, tableName) = extractDbNameTableName(tableNameParts) | ||
|
||
InsertIntoCreatedTable(db, tableName, nodeToPlan(query)) | ||
|
||
// If its not a "CREATE TABLE AS" like above then just pass it back to hive as a native command. | ||
|
@@ -736,11 +778,7 @@ private[hive] object HiveQl { | |
val Some(tableNameParts) :: partitionClause :: Nil = | ||
getClauses(Seq("TOK_TABNAME", "TOK_PARTSPEC"), tableArgs) | ||
|
||
val (db, tableName) = | ||
tableNameParts.getChildren.map{ case Token(part, Nil) => cleanIdentifier(part)} match { | ||
case Seq(tableOnly) => (None, tableOnly) | ||
case Seq(databaseName, table) => (Some(databaseName), table) | ||
} | ||
val (db, tableName) = extractDbNameTableName(tableNameParts) | ||
|
||
val partitionKeys = partitionClause.map(_.getChildren.map { | ||
// Parse partitions. We also make keys case insensitive. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -144,6 +144,12 @@ abstract class HiveComparisonTest | |
case _: SetCommand => Seq("0") | ||
case _: LogicalNativeCommand => answer.filterNot(nonDeterministicLine).filterNot(_ == "") | ||
case _: ExplainCommand => answer | ||
case _: DescribeCommand => | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add some inline comment explaining what you are filtering |
||
// Filter out non-deterministic lines and lines which do not have actual results but | ||
// can introduce problems because of the way Hive formats these lines. | ||
// Then, remove empty lines. Do not sort the results. | ||
answer.filterNot( | ||
r => nonDeterministicLine(r) || ignoredLine(r)).map(_.trim).filterNot(_ == "") | ||
case plan => if (isSorted(plan)) answer else answer.sorted | ||
} | ||
orderedAnswer.map(cleanPaths) | ||
|
@@ -169,6 +175,16 @@ abstract class HiveComparisonTest | |
protected def nonDeterministicLine(line: String) = | ||
nonDeterministicLineIndicators.exists(line contains _) | ||
|
||
// This list contains indicators for those lines which do not have actual results and we | ||
// want to ignore. | ||
lazy val ignoredLineIndicators = Seq( | ||
"# Partition Information", | ||
"# col_name" | ||
) | ||
|
||
protected def ignoredLine(line: String) = | ||
ignoredLineIndicators.exists(line contains _) | ||
|
||
/** | ||
* Removes non-deterministic paths from `str` so cached answers will compare correctly. | ||
*/ | ||
|
@@ -329,11 +345,17 @@ abstract class HiveComparisonTest | |
|
||
if ((!hiveQuery.logical.isInstanceOf[ExplainCommand]) && preparedHive != catalyst) { | ||
|
||
val hivePrintOut = s"== HIVE - ${hive.size} row(s) ==" +: preparedHive | ||
val hivePrintOut = s"== HIVE - ${preparedHive.size} row(s) ==" +: preparedHive | ||
val catalystPrintOut = s"== CATALYST - ${catalyst.size} row(s) ==" +: catalyst | ||
|
||
val resultComparison = sideBySide(hivePrintOut, catalystPrintOut).mkString("\n") | ||
|
||
println("hive output") | ||
hive.foreach(println) | ||
|
||
println("catalyst printout") | ||
catalyst.foreach(println) | ||
|
||
if (recomputeCache) { | ||
logger.warn(s"Clearing cache files for failed test $testCaseName") | ||
hiveCacheFiles.foreach(_.delete()) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remove this block