Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-20594][SQL]The staging directory should be a child directory starts with "." to avoid being deleted if we set hive.exec.stagingdir under the table directory. #17858

Closed
wants to merge 9 commits into from
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.hive.execution

import java.io.IOException
import java.io.{File, IOException}
import java.net.URI
import java.text.SimpleDateFormat
import java.util.{Date, Locale, Random}
Expand Down Expand Up @@ -97,12 +97,24 @@ case class InsertIntoHiveTable(
val inputPathUri: URI = inputPath.toUri
val inputPathName: String = inputPathUri.getPath
val fs: FileSystem = inputPath.getFileSystem(hadoopConf)
val stagingPathName: String =
var stagingPathName: String =
if (inputPathName.indexOf(stagingDir) == -1) {
new Path(inputPathName, stagingDir).toString
} else {
inputPathName.substring(0, inputPathName.indexOf(stagingDir) + stagingDir.length)
}

// SPARK-20594: This is a walk-around fix to resolve a Hive bug. Hive requires that the
// staging directory needs to avoid being deleted when users set hive.exec.stagingdir
// under the table directory.
if (FileUtils.isSubDir(new Path(stagingPathName), inputPath, fs) &&
!stagingPathName.stripPrefix(inputPathName).stripPrefix(File.separator).startsWith(".")) {
logDebug(s"The staging dir '$stagingPathName' should be a child directory starts " +
"with '.' to avoid being deleted if we set hive.exec.stagingdir under the table " +
"directory.")
stagingPathName = new Path(inputPathName, ".hive-staging").toString
}

val dir: Path =
fs.makeQualified(
new Path(stagingPathName + "_" + executionId + "-" + TaskRunner.getTaskRunnerID))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -494,4 +494,15 @@ class InsertIntoHiveTableSuite extends QueryTest with TestHiveSingleton with Bef
spark.table("t").write.insertInto(tableName)
}
}

test("SPARK-20594: hive.exec.stagingdir was deleted by Hive") {
// Set hive.exec.stagingdir under the table directory without start with ".".
withSQLConf("hive.exec.stagingdir" -> "./test") {
withTable("test_table") {
sql("CREATE TABLE test_table (key int)")
sql("INSERT OVERWRITE TABLE test_table SELECT 1")
checkAnswer(sql("SELECT * FROM test_table"), Row(1))
}
}
}
}