Skip to content

Commit

Permalink
[SPARK-22280][SQL][TEST] Improve StatisticsSuite to test `convertMeta…
Browse files Browse the repository at this point in the history
…store` properly
  • Loading branch information
dongjoon-hyun committed Oct 15, 2017
1 parent e8547ff commit 2a0a3f1
Showing 1 changed file with 16 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -937,26 +937,22 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
}

test("test statistics of LogicalRelation converted from Hive serde tables") {
val parquetTable = "parquetTable"
val orcTable = "orcTable"
withTable(parquetTable, orcTable) {
sql(s"CREATE TABLE $parquetTable (key STRING, value STRING) STORED AS PARQUET")
sql(s"CREATE TABLE $orcTable (key STRING, value STRING) STORED AS ORC")
sql(s"INSERT INTO TABLE $parquetTable SELECT * FROM src")
sql(s"INSERT INTO TABLE $orcTable SELECT * FROM src")

// the default value for `spark.sql.hive.convertMetastoreParquet` is true, here we just set it
// for robustness
withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> "true") {
checkTableStats(parquetTable, hasSizeInBytes = false, expectedRowCounts = None)
sql(s"ANALYZE TABLE $parquetTable COMPUTE STATISTICS")
checkTableStats(parquetTable, hasSizeInBytes = true, expectedRowCounts = Some(500))
}
withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") {
// We still can get tableSize from Hive before Analyze
checkTableStats(orcTable, hasSizeInBytes = true, expectedRowCounts = None)
sql(s"ANALYZE TABLE $orcTable COMPUTE STATISTICS")
checkTableStats(orcTable, hasSizeInBytes = true, expectedRowCounts = Some(500))
Seq("orc", "parquet").foreach { format =>
Seq("true", "false").foreach { isConverted =>
withSQLConf(
HiveUtils.CONVERT_METASTORE_ORC.key -> isConverted,
HiveUtils.CONVERT_METASTORE_PARQUET.key -> isConverted) {
withTable(format) {
sql(s"CREATE TABLE $format (key STRING, value STRING) STORED AS $format")
sql(s"INSERT INTO TABLE $format SELECT * FROM src")

val hasHiveStats = !isConverted.toBoolean
checkTableStats(format, hasSizeInBytes = hasHiveStats, expectedRowCounts = None)

sql(s"ANALYZE TABLE $format COMPUTE STATISTICS")
checkTableStats(format, hasSizeInBytes = true, expectedRowCounts = Some(500))
}
}
}
}
}
Expand Down

0 comments on commit 2a0a3f1

Please sign in to comment.