diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 6359183d90bb4..fdf119477ab8b 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -148,6 +148,19 @@ Status ValidateFilePath(const AzurePath& path) { return Status::OK(); } +Status StatusFromErrorResponse(const std::string& url, + Azure::Core::Http::RawResponse* raw_response, + const std::string& context) { + const auto& body = raw_response->GetBody(); + // There isn't an Azure specification that response body on error + // doesn't contain any binary data but we assume it. We hope that + // error response body has useful information for the error. + std::string_view body_text(reinterpret_cast(body.data()), body.size()); + return Status::IOError(context, ": ", url, ": ", raw_response->GetReasonPhrase(), " (", + static_cast(raw_response->GetStatusCode()), + "): ", body_text); +} + template std::string FormatValue(typename TypeTraits::CType value) { struct StringAppender { @@ -611,6 +624,99 @@ class AzureFileSystem::Impl { RETURN_NOT_OK(ptr->Init()); return ptr; } + + Status CreateDir(const AzurePath& path) { + if (path.container.empty()) { + return Status::Invalid("Cannot create an empty container"); + } + + if (path.path_to_file.empty()) { + auto container_client = + blob_service_client_->GetBlobContainerClient(path.container); + try { + auto response = container_client.Create(); + if (response.Value.Created) { + return Status::OK(); + } else { + return StatusFromErrorResponse( + container_client.GetUrl(), response.RawResponse.get(), + "Failed to create a container: " + path.container); + } + } catch (const Azure::Storage::StorageException& exception) { + return internal::ExceptionToStatus( + "Failed to create a container: " + path.container + ": " + + container_client.GetUrl(), + exception); + } + } + + ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, + hierarchical_namespace_.Enabled(path.container)); + if (!hierarchical_namespace_enabled) { + // Without hierarchical namespace enabled Azure blob storage has no directories. + // Therefore we can't, and don't need to create one. Simply creating a blob with `/` + // in the name implies directories. + return Status::OK(); + } + + auto directory_client = datalake_service_client_->GetFileSystemClient(path.container) + .GetDirectoryClient(path.path_to_file); + try { + auto response = directory_client.Create(); + if (response.Value.Created) { + return Status::OK(); + } else { + return StatusFromErrorResponse( + directory_client.GetUrl(), response.RawResponse.get(), + "Failed to create a directory: " + path.path_to_file); + } + } catch (const Azure::Storage::StorageException& exception) { + return internal::ExceptionToStatus( + "Failed to create a directory: " + path.path_to_file + ": " + + directory_client.GetUrl(), + exception); + } + } + + Status CreateDirRecursive(const AzurePath& path) { + if (path.container.empty()) { + return Status::Invalid("Cannot create an empty container"); + } + + auto container_client = blob_service_client_->GetBlobContainerClient(path.container); + try { + container_client.CreateIfNotExists(); + } catch (const Azure::Storage::StorageException& exception) { + return internal::ExceptionToStatus( + "Failed to create a container: " + path.container + " (" + + container_client.GetUrl() + ")", + exception); + } + + ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, + hierarchical_namespace_.Enabled(path.container)); + if (!hierarchical_namespace_enabled) { + // We can't create a directory without hierarchical namespace + // support. There is only "virtual directory" without + // hierarchical namespace support. And a "virtual directory" is + // (virtually) created a blob with ".../.../blob" blob name + // automatically. + return Status::OK(); + } + + auto directory_client = datalake_service_client_->GetFileSystemClient(path.container) + .GetDirectoryClient(path.path_to_file); + try { + directory_client.CreateIfNotExists(); + } catch (const Azure::Storage::StorageException& exception) { + return internal::ExceptionToStatus( + "Failed to create a directory: " + path.path_to_file + " (" + + directory_client.GetUrl() + ")", + exception); + } + + return Status::OK(); + } }; const AzureOptions& AzureFileSystem::options() const { return impl_->options(); } @@ -636,7 +742,12 @@ Result AzureFileSystem::GetFileInfo(const FileSelector& select) } Status AzureFileSystem::CreateDir(const std::string& path, bool recursive) { - return Status::NotImplemented("The Azure FileSystem is not fully implemented"); + ARROW_ASSIGN_OR_RAISE(auto p, AzurePath::FromString(path)); + if (recursive) { + return impl_->CreateDirRecursive(p); + } else { + return impl_->CreateDir(p); + } } Status AzureFileSystem::DeleteDir(const std::string& path) { diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index c08a4b50b77a8..ecf0a19f684eb 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -49,6 +49,7 @@ #include #include +#include "arrow/filesystem/path_util.h" #include "arrow/filesystem/test_util.h" #include "arrow/result.h" #include "arrow/testing/gtest_util.h" @@ -225,6 +226,10 @@ class AzureFileSystemTest : public ::testing::Test { return s; } + std::string RandomContainerName() { return RandomChars(32); } + + std::string RandomDirectoryName() { return RandomChars(32); } + void UploadLines(const std::vector& lines, const char* path_to_file, int total_size) { // TODO(GH-38333): Switch to using Azure filesystem to write once its implemented. @@ -267,6 +272,22 @@ class AzureFlatNamespaceFileSystemTest : public AzureFileSystemTest { } }; +// How to enable this test: +// +// You need an Azure account. You should be able to create a free +// account at https://azure.microsoft.com/en-gb/free/ . You should be +// able to create a storage account through the portal Web UI. +// +// See also the official document how to create a storage account: +// https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account +// +// A few suggestions on configuration: +// +// * Use Standard general-purpose v2 not premium +// * Use LRS redundancy +// * Obviously you need to enable hierarchical namespace. +// * Set the default access tier to hot +// * SFTP, NFS and file shares are not required. class AzureHierarchicalNamespaceFileSystemTest : public AzureFileSystemTest { Result MakeOptions() override { AzureOptions options; @@ -396,6 +417,96 @@ TEST_F(AzureHierarchicalNamespaceFileSystemTest, GetFileInfoObject) { RunGetFileInfoObjectTest(); } +TEST_F(AzuriteFileSystemTest, CreateDirFailureNoContainer) { + ASSERT_RAISES(Invalid, fs_->CreateDir("", false)); +} + +TEST_F(AzuriteFileSystemTest, CreateDirSuccessContainerOnly) { + auto container_name = RandomContainerName(); + ASSERT_OK(fs_->CreateDir(container_name, false)); + arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); +} + +TEST_F(AzuriteFileSystemTest, CreateDirSuccessContainerAndDirectory) { + const auto path = PreexistingContainerPath() + RandomDirectoryName(); + ASSERT_OK(fs_->CreateDir(path, false)); + // There is only virtual directory without hierarchical namespace + // support. So the CreateDir() does nothing. + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); +} + +TEST_F(AzureHierarchicalNamespaceFileSystemTest, CreateDirSuccessContainerAndDirectory) { + const auto path = PreexistingContainerPath() + RandomDirectoryName(); + ASSERT_OK(fs_->CreateDir(path, false)); + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); +} + +TEST_F(AzuriteFileSystemTest, CreateDirFailureDirectoryWithMissingContainer) { + const auto path = std::string("not-a-container/new-directory"); + ASSERT_RAISES(IOError, fs_->CreateDir(path, false)); +} + +TEST_F(AzuriteFileSystemTest, CreateDirRecursiveFailureNoContainer) { + ASSERT_RAISES(Invalid, fs_->CreateDir("", true)); +} + +TEST_F(AzureHierarchicalNamespaceFileSystemTest, CreateDirRecursiveSuccessContainerOnly) { + auto container_name = RandomContainerName(); + ASSERT_OK(fs_->CreateDir(container_name, true)); + arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); +} + +TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessContainerOnly) { + auto container_name = RandomContainerName(); + ASSERT_OK(fs_->CreateDir(container_name, true)); + arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); +} + +TEST_F(AzureHierarchicalNamespaceFileSystemTest, CreateDirRecursiveSuccessDirectoryOnly) { + const auto parent = PreexistingContainerPath() + RandomDirectoryName(); + const auto path = internal::ConcatAbstractPath(parent, "new-sub"); + ASSERT_OK(fs_->CreateDir(path, true)); + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); +} + +TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessDirectoryOnly) { + const auto parent = PreexistingContainerPath() + RandomDirectoryName(); + const auto path = internal::ConcatAbstractPath(parent, "new-sub"); + ASSERT_OK(fs_->CreateDir(path, true)); + // There is only virtual directory without hierarchical namespace + // support. So the CreateDir() does nothing. + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); +} + +TEST_F(AzureHierarchicalNamespaceFileSystemTest, + CreateDirRecursiveSuccessContainerAndDirectory) { + auto container_name = RandomContainerName(); + const auto parent = internal::ConcatAbstractPath(container_name, RandomDirectoryName()); + const auto path = internal::ConcatAbstractPath(parent, "new-sub"); + ASSERT_OK(fs_->CreateDir(path, true)); + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); +} + +TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessContainerAndDirectory) { + auto container_name = RandomContainerName(); + const auto parent = internal::ConcatAbstractPath(container_name, RandomDirectoryName()); + const auto path = internal::ConcatAbstractPath(parent, "new-sub"); + ASSERT_OK(fs_->CreateDir(path, true)); + // There is only virtual directory without hierarchical namespace + // support. So the CreateDir() does nothing. + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); +} + +TEST_F(AzuriteFileSystemTest, CreateDirUri) { + ASSERT_RAISES(Invalid, fs_->CreateDir("abfs://" + RandomContainerName(), true)); +} + TEST_F(AzuriteFileSystemTest, OpenInputStreamString) { std::shared_ptr stream; ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(PreexistingObjectPath())); @@ -455,7 +566,7 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamInfoInvalid) { } TEST_F(AzuriteFileSystemTest, OpenInputStreamUri) { - ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfss://" + PreexistingObjectPath())); + ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + PreexistingObjectPath())); } TEST_F(AzuriteFileSystemTest, OpenInputStreamTrailingSlash) {