diff --git a/velox/connectors/hive/storage_adapters/hdfs/HdfsWriteFile.cpp b/velox/connectors/hive/storage_adapters/hdfs/HdfsWriteFile.cpp index 883ab1f64904..e37dcac4c05d 100644 --- a/velox/connectors/hive/storage_adapters/hdfs/HdfsWriteFile.cpp +++ b/velox/connectors/hive/storage_adapters/hdfs/HdfsWriteFile.cpp @@ -25,6 +25,13 @@ HdfsWriteFile::HdfsWriteFile( short replication, int blockSize) : hdfsClient_(hdfsClient), filePath_(path) { + auto pos = filePath_.rfind("/"); + auto parentDir = filePath_.substr(0, pos + 1); + // Check whether the parentDir exist, create it if not exist. + if (hdfsExists(hdfsClient_, parentDir.c_str()) == -1) { + hdfsCreateDirectory(hdfsClient_, parentDir.c_str()); + } + hdfsFile_ = hdfsOpenFile( hdfsClient_, filePath_.c_str(), diff --git a/velox/connectors/hive/storage_adapters/hdfs/tests/HdfsFileSystemTest.cpp b/velox/connectors/hive/storage_adapters/hdfs/tests/HdfsFileSystemTest.cpp index 51d933167333..f9d50f5985d6 100644 --- a/velox/connectors/hive/storage_adapters/hdfs/tests/HdfsFileSystemTest.cpp +++ b/velox/connectors/hive/storage_adapters/hdfs/tests/HdfsFileSystemTest.cpp @@ -429,6 +429,20 @@ TEST_F(HdfsFileSystemTest, writeFlushFailures) { "Cannot flush HDFS file because file handle is null, file path: /a.txt"); } +TEST_F(HdfsFileSystemTest, writeWithParentDirNotExist) { + std::string path = "/parent/directory/that/does/not/exist/a.txt"; + auto writeFile = openFileForWrite(path); + std::string data = "abcdefghijk"; + writeFile->append(data); + writeFile->flush(); + ASSERT_EQ(writeFile->size(), 0); + writeFile->append(data); + writeFile->append(data); + writeFile->flush(); + writeFile->close(); + ASSERT_EQ(writeFile->size(), data.size() * 3); +} + TEST_F(HdfsFileSystemTest, readFailures) { struct hdfsBuilder* builder = hdfsNewBuilder(); hdfsBuilderSetNameNode(builder, localhost.c_str());