From 4e58f7ca0016c2b2d8a859a0c5965df3b15523e0 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Thu, 14 Dec 2023 15:25:28 -0300 Subject: [PATCH] GH-39119: [C++] Refactor the Azure FS tests and filesystem class instantiation (#39207) ### Rationale for this change This PR contains some unrelated improvements (like better docs) and some nitpicky fixes. The test refactoring makes it easier to see on which environments tests run and make them able to be instantiated with different options in the future once we extend the `AzureOptions`. ### What changes are included in this PR? - Random cleanups - Short namespace aliases - Refactoring of the tests (multiple environments, TYPED_TEST_SUITE, explicit preexisting data setup) - Cleanup of the `AzureOptions` class ### Are these changes tested? Yes. I created Azure Storage accounts to test with and without Hierarchical Namespace support. I also ran the tests in a shell without my environment variables to ensure the test-skipping behavior is correct. ### Are there any user-facing changes? Changes to `AzureOptions`, but since `AzureFileSystem` is not really used yet, these breaking changes won't be a problem. * Closes: #39119 Authored-by: Felipe Oliveira Carvalho Signed-off-by: Felipe Oliveira Carvalho --- cpp/src/arrow/filesystem/azurefs.cc | 559 ++++---- cpp/src/arrow/filesystem/azurefs.h | 150 ++- cpp/src/arrow/filesystem/azurefs_internal.cc | 6 + cpp/src/arrow/filesystem/azurefs_internal.h | 3 - cpp/src/arrow/filesystem/azurefs_test.cc | 1269 ++++++++++-------- 5 files changed, 1094 insertions(+), 893 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 824a8fb531483..217885364089b 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -33,42 +33,85 @@ #include "arrow/util/logging.h" #include "arrow/util/string.h" -namespace arrow { -namespace fs { +namespace arrow::fs { + +namespace Blobs = Azure::Storage::Blobs; +namespace Core = Azure::Core; +namespace DataLake = Azure::Storage::Files::DataLake; +namespace Http = Azure::Core::Http; +namespace Storage = Azure::Storage; // ----------------------------------------------------------------------- // AzureOptions Implementation AzureOptions::AzureOptions() = default; +AzureOptions::~AzureOptions() = default; + bool AzureOptions::Equals(const AzureOptions& other) const { - return (account_dfs_url == other.account_dfs_url && - account_blob_url == other.account_blob_url && - credentials_kind == other.credentials_kind && - default_metadata == other.default_metadata); + // TODO(GH-38598): update here when more auth methods are added. + const bool equals = backend == other.backend && + default_metadata == other.default_metadata && + account_blob_url_ == other.account_blob_url_ && + account_dfs_url_ == other.account_dfs_url_ && + credential_kind_ == other.credential_kind_; + if (!equals) { + return false; + } + switch (credential_kind_) { + case CredentialKind::kAnonymous: + return true; + case CredentialKind::kStorageSharedKeyCredential: + return storage_shared_key_credential_->AccountName == + other.storage_shared_key_credential_->AccountName; + } + DCHECK(false); + return false; } -Status AzureOptions::ConfigureAccountKeyCredentials(const std::string& account_name, - const std::string& account_key) { - if (this->backend == AzureBackend::Azurite) { - account_blob_url = "http://127.0.0.1:10000/" + account_name + "/"; - account_dfs_url = "http://127.0.0.1:10000/" + account_name + "/"; +Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_name, + const std::string& account_key) { + if (this->backend == AzureBackend::kAzurite) { + account_blob_url_ = "http://127.0.0.1:10000/" + account_name + "/"; + account_dfs_url_ = "http://127.0.0.1:10000/" + account_name + "/"; } else { - account_dfs_url = "https://" + account_name + ".dfs.core.windows.net/"; - account_blob_url = "https://" + account_name + ".blob.core.windows.net/"; + account_dfs_url_ = "https://" + account_name + ".dfs.core.windows.net/"; + account_blob_url_ = "https://" + account_name + ".blob.core.windows.net/"; } - storage_credentials_provider = - std::make_shared(account_name, - account_key); - credentials_kind = AzureCredentialsKind::StorageCredentials; + credential_kind_ = CredentialKind::kStorageSharedKeyCredential; + storage_shared_key_credential_ = + std::make_shared(account_name, account_key); return Status::OK(); } +Result> AzureOptions::MakeBlobServiceClient() + const { + switch (credential_kind_) { + case CredentialKind::kAnonymous: + break; + case CredentialKind::kStorageSharedKeyCredential: + return std::make_unique(account_blob_url_, + storage_shared_key_credential_); + } + return Status::Invalid("AzureOptions doesn't contain a valid auth configuration"); +} + +Result> +AzureOptions::MakeDataLakeServiceClient() const { + switch (credential_kind_) { + case CredentialKind::kAnonymous: + break; + case CredentialKind::kStorageSharedKeyCredential: + return std::make_unique( + account_dfs_url_, storage_shared_key_credential_); + } + return Status::Invalid("AzureOptions doesn't contain a valid auth configuration"); +} + namespace { -// An AzureFileSystem represents a single Azure storage -// account. AzureLocation describes a container and path within -// that storage account. +// An AzureFileSystem represents an Azure storage account. An AzureLocation describes a +// container in that storage account and a path within that container. struct AzureLocation { std::string all; std::string container; @@ -82,8 +125,8 @@ struct AzureLocation { // path_parts = [testdir, testfile.txt] if (internal::IsLikelyUri(string)) { return Status::Invalid( - "Expected an Azure object location of the form 'container/path...', got a URI: " - "'", + "Expected an Azure object location of the form 'container/path...'," + " got a URI: '", string, "'"); } auto first_sep = string.find_first_of(internal::kSep); @@ -130,14 +173,15 @@ struct AzureLocation { private: Status Validate() { auto status = internal::ValidateAbstractPathParts(path_parts); - if (!status.ok()) { - return Status::Invalid(status.message(), " in location ", all); - } else { - return status; - } + return status.ok() ? status : Status::Invalid(status.message(), " in location ", all); } }; +Status ExceptionToStatus(const std::string& prefix, + const Azure::Storage::StorageException& exception) { + return Status::IOError(prefix, " Azure Error: ", exception.what()); +} + Status PathNotFound(const AzureLocation& location) { return ::arrow::fs::internal::PathNotFound(location.all); } @@ -153,23 +197,41 @@ Status ValidateFileLocation(const AzureLocation& location) { if (location.path.empty()) { return NotAFile(location); } - ARROW_RETURN_NOT_OK(internal::AssertNoTrailingSlash(location.path)); - return Status::OK(); + return internal::AssertNoTrailingSlash(location.path); +} + +std::string_view BodyTextView(const Http::RawResponse& raw_response) { + const auto& body = raw_response.GetBody(); +#ifndef NDEBUG + auto& headers = raw_response.GetHeaders(); + auto content_type = headers.find("Content-Type"); + if (content_type != headers.end()) { + DCHECK_EQ(std::string_view{content_type->second}.substr(5), "text/"); + } +#endif + return std::string_view{reinterpret_cast(body.data()), body.size()}; } Status StatusFromErrorResponse(const std::string& url, - Azure::Core::Http::RawResponse* raw_response, + const Http::RawResponse& raw_response, const std::string& context) { - const auto& body = raw_response->GetBody(); // There isn't an Azure specification that response body on error // doesn't contain any binary data but we assume it. We hope that // error response body has useful information for the error. - std::string_view body_text(reinterpret_cast(body.data()), body.size()); - return Status::IOError(context, ": ", url, ": ", raw_response->GetReasonPhrase(), " (", - static_cast(raw_response->GetStatusCode()), + auto body_text = BodyTextView(raw_response); + return Status::IOError(context, ": ", url, ": ", raw_response.GetReasonPhrase(), " (", + static_cast(raw_response.GetStatusCode()), "): ", body_text); } +bool IsContainerNotFound(const Storage::StorageException& exception) { + if (exception.ErrorCode == "ContainerNotFound") { + DCHECK_EQ(exception.StatusCode, Http::HttpStatusCode::NotFound); + return true; + } + return false; +} + template std::string FormatValue(typename TypeTraits::CType value) { struct StringAppender { @@ -185,7 +247,7 @@ std::string FormatValue(typename TypeTraits::CType value) { } std::shared_ptr PropertiesToMetadata( - const Azure::Storage::Blobs::Models::BlobProperties& properties) { + const Blobs::Models::BlobProperties& properties) { auto metadata = std::make_shared(); // Not supported yet: // * properties.ObjectReplicationSourceProperties @@ -316,7 +378,7 @@ std::shared_ptr PropertiesToMetadata( class ObjectInputFile final : public io::RandomAccessFile { public: - ObjectInputFile(std::shared_ptr blob_client, + ObjectInputFile(std::shared_ptr blob_client, const io::IOContext& io_context, AzureLocation location, int64_t size = kNoSize) : blob_client_(std::move(blob_client)), @@ -334,11 +396,11 @@ class ObjectInputFile final : public io::RandomAccessFile { content_length_ = properties.Value.BlobSize; metadata_ = PropertiesToMetadata(properties.Value); return Status::OK(); - } catch (const Azure::Storage::StorageException& exception) { - if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) { + } catch (const Storage::StorageException& exception) { + if (exception.StatusCode == Http::HttpStatusCode::NotFound) { return PathNotFound(location_); } - return internal::ExceptionToStatus( + return ExceptionToStatus( "GetProperties failed for '" + blob_client_->GetUrl() + "' with an unexpected Azure error. Cannot initialise an ObjectInputFile " "without knowing the file size.", @@ -411,20 +473,20 @@ class ObjectInputFile final : public io::RandomAccessFile { } // Read the desired range of bytes - Azure::Core::Http::HttpRange range{position, nbytes}; - Azure::Storage::Blobs::DownloadBlobToOptions download_options; + Http::HttpRange range{position, nbytes}; + Storage::Blobs::DownloadBlobToOptions download_options; download_options.Range = range; try { return blob_client_ ->DownloadTo(reinterpret_cast(out), nbytes, download_options) .Value.ContentRange.Length.Value(); - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus("DownloadTo from '" + blob_client_->GetUrl() + - "' at position " + std::to_string(position) + - " for " + std::to_string(nbytes) + - " bytes failed with an Azure error. ReadAt " - "failed to read the required byte range.", - exception); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus("DownloadTo from '" + blob_client_->GetUrl() + + "' at position " + std::to_string(position) + " for " + + std::to_string(nbytes) + + " bytes failed with an Azure error. ReadAt " + "failed to read the required byte range.", + exception); } } @@ -459,7 +521,7 @@ class ObjectInputFile final : public io::RandomAccessFile { } private: - std::shared_ptr blob_client_; + std::shared_ptr blob_client_; const io::IOContext io_context_; AzureLocation location_; @@ -469,12 +531,11 @@ class ObjectInputFile final : public io::RandomAccessFile { std::shared_ptr metadata_; }; -Status CreateEmptyBlockBlob( - std::shared_ptr block_blob_client) { +Status CreateEmptyBlockBlob(std::shared_ptr block_blob_client) { try { block_blob_client->UploadFrom(nullptr, 0); - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus( "UploadFrom failed for '" + block_blob_client->GetUrl() + "' with an unexpected Azure error. There is no existing blob at this " "location or the existing blob must be replaced so ObjectAppendStream must " @@ -484,12 +545,12 @@ Status CreateEmptyBlockBlob( return Status::OK(); } -Result GetBlockList( - std::shared_ptr block_blob_client) { +Result GetBlockList( + std::shared_ptr block_blob_client) { try { return block_blob_client->GetBlockList().Value; - } catch (Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( + } catch (Storage::StorageException& exception) { + return ExceptionToStatus( "GetBlockList failed for '" + block_blob_client->GetUrl() + "' with an unexpected Azure error. Cannot write to a file without first " "fetching the existing block list.", @@ -497,19 +558,19 @@ Result GetBlockList( } } -Azure::Storage::Metadata ArrowMetadataToAzureMetadata( +Storage::Metadata ArrowMetadataToAzureMetadata( const std::shared_ptr& arrow_metadata) { - Azure::Storage::Metadata azure_metadata; + Storage::Metadata azure_metadata; for (auto key_value : arrow_metadata->sorted_pairs()) { azure_metadata[key_value.first] = key_value.second; } return azure_metadata; } -Status CommitBlockList( - std::shared_ptr block_blob_client, - const std::vector& block_ids, const Azure::Storage::Metadata& metadata) { - Azure::Storage::Blobs::CommitBlockListOptions options; +Status CommitBlockList(std::shared_ptr block_blob_client, + const std::vector& block_ids, + const Storage::Metadata& metadata) { + Blobs::CommitBlockListOptions options; options.Metadata = metadata; try { // CommitBlockList puts all block_ids in the latest element. That means in the case of @@ -517,8 +578,8 @@ Status CommitBlockList( // previously committed blocks. // https://learn.microsoft.com/en-us/rest/api/storageservices/put-block-list?tabs=microsoft-entra-id#request-body block_blob_client->CommitBlockList(block_ids, options); - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus( "CommitBlockList failed for '" + block_blob_client->GetUrl() + "' with an unexpected Azure error. Committing is required to flush an " "output/append stream.", @@ -529,11 +590,10 @@ Status CommitBlockList( class ObjectAppendStream final : public io::OutputStream { public: - ObjectAppendStream( - std::shared_ptr block_blob_client, - const io::IOContext& io_context, const AzureLocation& location, - const std::shared_ptr& metadata, - const AzureOptions& options, int64_t size = kNoSize) + ObjectAppendStream(std::shared_ptr block_blob_client, + const io::IOContext& io_context, const AzureLocation& location, + const std::shared_ptr& metadata, + const AzureOptions& options, int64_t size = kNoSize) : block_blob_client_(std::move(block_blob_client)), io_context_(io_context), location_(location), @@ -560,11 +620,11 @@ class ObjectAppendStream final : public io::OutputStream { auto properties = block_blob_client_->GetProperties(); content_length_ = properties.Value.BlobSize; pos_ = content_length_; - } catch (const Azure::Storage::StorageException& exception) { - if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) { + } catch (const Storage::StorageException& exception) { + if (exception.StatusCode == Http::HttpStatusCode::NotFound) { RETURN_NOT_OK(CreateEmptyBlockBlob(block_blob_client_)); } else { - return internal::ExceptionToStatus( + return ExceptionToStatus( "GetProperties failed for '" + block_blob_client_->GetUrl() + "' with an unexpected Azure error. Cannot initialise an " "ObjectAppendStream without knowing whether a file already exists at " @@ -634,7 +694,7 @@ class ObjectAppendStream final : public io::OutputStream { std::shared_ptr owned_buffer = nullptr) { RETURN_NOT_OK(CheckClosed("append")); auto append_data = reinterpret_cast(data); - Azure::Core::IO::MemoryBodyStream block_content(append_data, nbytes); + Core::IO::MemoryBodyStream block_content(append_data, nbytes); if (block_content.Length() == 0) { return Status::OK(); } @@ -657,13 +717,13 @@ class ObjectAppendStream final : public io::OutputStream { // if the blob was previously created with one block, with id `00001-arrow` then the // next block we append will conflict with that, and cause corruption. new_block_id += "-arrow"; - new_block_id = Azure::Core::Convert::Base64Encode( + new_block_id = Core::Convert::Base64Encode( std::vector(new_block_id.begin(), new_block_id.end())); try { block_blob_client_->StageBlock(new_block_id, block_content); - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus( "StageBlock failed for '" + block_blob_client_->GetUrl() + "' new_block_id: '" + new_block_id + "' with an unexpected Azure error. Staging new blocks is fundamental to " @@ -676,7 +736,7 @@ class ObjectAppendStream final : public io::OutputStream { return Status::OK(); } - std::shared_ptr block_blob_client_; + std::shared_ptr block_blob_client_; const io::IOContext io_context_; const AzureLocation location_; @@ -684,7 +744,7 @@ class ObjectAppendStream final : public io::OutputStream { int64_t pos_ = 0; int64_t content_length_ = kNoSize; std::vector block_ids_; - Azure::Storage::Metadata metadata_; + Storage::Metadata metadata_; }; } // namespace @@ -693,27 +753,31 @@ class ObjectAppendStream final : public io::OutputStream { // AzureFilesystem Implementation class AzureFileSystem::Impl { - public: + private: io::IOContext io_context_; - std::unique_ptr - datalake_service_client_; - std::unique_ptr blob_service_client_; AzureOptions options_; - internal::HierarchicalNamespaceDetector hierarchical_namespace_; - explicit Impl(AzureOptions options, io::IOContext io_context) - : io_context_(io_context), options_(std::move(options)) {} + std::unique_ptr datalake_service_client_; + std::unique_ptr blob_service_client_; + internal::HierarchicalNamespaceDetector hns_detector_; - Status Init() { - blob_service_client_ = std::make_unique( - options_.account_blob_url, options_.storage_credentials_provider); - datalake_service_client_ = - std::make_unique( - options_.account_dfs_url, options_.storage_credentials_provider); - RETURN_NOT_OK(hierarchical_namespace_.Init(datalake_service_client_.get())); - return Status::OK(); - } + Impl(AzureOptions options, io::IOContext io_context) + : io_context_(std::move(io_context)), options_(std::move(options)) {} + public: + static Result> Make(AzureOptions options, + io::IOContext io_context) { + auto self = std::unique_ptr( + new AzureFileSystem::Impl(std::move(options), std::move(io_context))); + ARROW_ASSIGN_OR_RAISE(self->blob_service_client_, + self->options_.MakeBlobServiceClient()); + ARROW_ASSIGN_OR_RAISE(self->datalake_service_client_, + self->options_.MakeDataLakeServiceClient()); + RETURN_NOT_OK(self->hns_detector_.Init(self->datalake_service_client_.get())); + return self; + } + + io::IOContext& io_context() { return io_context_; } const AzureOptions& options() const { return options_; } public: @@ -722,12 +786,10 @@ class AzureFileSystem::Impl { info.set_path(location.all); if (location.container.empty()) { - // The location is invalid if the container is empty but not - // path. + // The location is invalid if the container is empty but the path is not. DCHECK(location.path.empty()); - // The location must refer to the root of the Azure storage - // account. This is a directory, and there isn't any extra - // metadata to fetch. + // This location must be derived from the root path. FileInfo should describe it + // as a directory and there isn't any extra metadata to fetch. info.set_type(FileType::Directory); return info; } @@ -739,20 +801,22 @@ class AzureFileSystem::Impl { auto properties = container_client.GetProperties(); info.set_type(FileType::Directory); info.set_mtime( - std::chrono::system_clock::time_point(properties.Value.LastModified)); + std::chrono::system_clock::time_point{properties.Value.LastModified}); return info; - } catch (const Azure::Storage::StorageException& exception) { - if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) { + } catch (const Storage::StorageException& exception) { + if (IsContainerNotFound(exception)) { info.set_type(FileType::NotFound); return info; } - return internal::ExceptionToStatus( + return ExceptionToStatus( "GetProperties for '" + container_client.GetUrl() + "' failed with an unexpected Azure error. GetFileInfo is unable to " "determine whether the container exists.", exception); } } + + // There is a path to search within the container. auto file_client = datalake_service_client_->GetFileSystemClient(location.container) .GetFileClient(location.path); try { @@ -763,6 +827,8 @@ class AzureFileSystem::Impl { // For a path with a trailing slash a hierarchical namespace may return a blob // with that trailing slash removed. For consistency with flat namespace and // other filesystems we chose to return NotFound. + // + // NOTE(felipecrv): could this be an empty directory marker? info.set_type(FileType::NotFound); return info; } else { @@ -770,12 +836,12 @@ class AzureFileSystem::Impl { info.set_size(properties.Value.FileSize); } info.set_mtime( - std::chrono::system_clock::time_point(properties.Value.LastModified)); + std::chrono::system_clock::time_point{properties.Value.LastModified}); return info; - } catch (const Azure::Storage::StorageException& exception) { - if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) { + } catch (const Storage::StorageException& exception) { + if (exception.StatusCode == Http::HttpStatusCode::NotFound) { ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, - hierarchical_namespace_.Enabled(location.container)); + hns_detector_.Enabled(location.container)); if (hierarchical_namespace_enabled) { // If the hierarchical namespace is enabled, then the storage account will have // explicit directories. Neither a file nor a directory was found. @@ -784,12 +850,10 @@ class AzureFileSystem::Impl { } // On flat namespace accounts there are no real directories. Directories are only // implied by using `/` in the blob name. - Azure::Storage::Blobs::ListBlobsOptions list_blob_options; - + Blobs::ListBlobsOptions list_blob_options; // If listing the prefix `path.path_to_file` with trailing slash returns at least // one result then `path` refers to an implied directory. - auto prefix = internal::EnsureTrailingSlash(location.path); - list_blob_options.Prefix = prefix; + list_blob_options.Prefix = internal::EnsureTrailingSlash(location.path); // We only need to know if there is at least one result, so minimise page size // for efficiency. list_blob_options.PageSizeHint = 1; @@ -798,21 +862,19 @@ class AzureFileSystem::Impl { auto paged_list_result = blob_service_client_->GetBlobContainerClient(location.container) .ListBlobs(list_blob_options); - if (paged_list_result.Blobs.size() > 0) { - info.set_type(FileType::Directory); - } else { - info.set_type(FileType::NotFound); - } + auto file_type = paged_list_result.Blobs.size() > 0 ? FileType::Directory + : FileType::NotFound; + info.set_type(file_type); return info; - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "ListBlobs for '" + prefix + + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus( + "ListBlobs for '" + *list_blob_options.Prefix + "' failed with an unexpected Azure error. GetFileInfo is unable to " "determine whether the path should be considered an implied directory.", exception); } } - return internal::ExceptionToStatus( + return ExceptionToStatus( "GetProperties for '" + file_client.GetUrl() + "' failed with an unexpected " "Azure error. GetFileInfo is unable to determine whether the path exists.", @@ -822,9 +884,8 @@ class AzureFileSystem::Impl { private: template - Status VisitContainers(const Azure::Core::Context& context, - OnContainer&& on_container) const { - Azure::Storage::Blobs::ListBlobContainersOptions options; + Status VisitContainers(const Core::Context& context, OnContainer&& on_container) const { + Blobs::ListBlobContainersOptions options; try { auto container_list_response = blob_service_client_->ListBlobContainers(options, context); @@ -834,14 +895,14 @@ class AzureFileSystem::Impl { RETURN_NOT_OK(on_container(container)); } } - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus("Failed to list account containers.", exception); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus("Failed to list account containers.", exception); } return Status::OK(); } - static FileInfo FileInfoFromBlob(const std::string& container, - const Azure::Storage::Blobs::Models::BlobItem& blob) { + static FileInfo FileInfoFromBlob(std::string_view container, + const Blobs::Models::BlobItem& blob) { auto path = internal::ConcatAbstractPath(container, blob.Name); if (internal::HasTrailingSlash(blob.Name)) { return DirectoryFileInfoFromPath(path); @@ -852,7 +913,7 @@ class AzureFileSystem::Impl { return info; } - static FileInfo DirectoryFileInfoFromPath(const std::string& path) { + static FileInfo DirectoryFileInfoFromPath(std::string_view path) { return FileInfo{std::string{internal::RemoveTrailingSlash(path)}, FileType::Directory}; } @@ -870,13 +931,13 @@ class AzureFileSystem::Impl { /// \pre container_client is the client for the container named like the first /// segment of select.base_dir. Status GetFileInfoWithSelectorFromContainer( - const Azure::Storage::Blobs::BlobContainerClient& container_client, - const Azure::Core::Context& context, Azure::Nullable page_size_hint, - const FileSelector& select, FileInfoVector* acc_results) { + const Blobs::BlobContainerClient& container_client, const Core::Context& context, + Azure::Nullable page_size_hint, const FileSelector& select, + FileInfoVector* acc_results) { ARROW_ASSIGN_OR_RAISE(auto base_location, AzureLocation::FromString(select.base_dir)); bool found = false; - Azure::Storage::Blobs::ListBlobsOptions options; + Blobs::ListBlobsOptions options; if (internal::IsEmptyPath(base_location.path)) { // If the base_dir is the root of the container, then we want to list all blobs in // the container and the Prefix should be empty and not even include the trailing @@ -887,7 +948,7 @@ class AzureFileSystem::Impl { options.Prefix = internal::EnsureTrailingSlash(base_location.path); } options.PageSizeHint = page_size_hint; - options.Include = Azure::Storage::Blobs::Models::ListBlobsIncludeFlags::Metadata; + options.Include = Blobs::Models::ListBlobsIncludeFlags::Metadata; auto recurse = [&](const std::string& blob_prefix) noexcept -> Status { if (select.recursive && select.max_recursion > 0) { @@ -903,15 +964,14 @@ class AzureFileSystem::Impl { return Status::OK(); }; - auto process_blob = - [&](const Azure::Storage::Blobs::Models::BlobItem& blob) noexcept { - // blob.Name has trailing slash only when Prefix is an empty - // directory marker blob for the directory we're listing - // from, and we should skip it. - if (!internal::HasTrailingSlash(blob.Name)) { - acc_results->push_back(FileInfoFromBlob(base_location.container, blob)); - } - }; + auto process_blob = [&](const Blobs::Models::BlobItem& blob) noexcept { + // blob.Name has trailing slash only when Prefix is an empty + // directory marker blob for the directory we're listing + // from, and we should skip it. + if (!internal::HasTrailingSlash(blob.Name)) { + acc_results->push_back(FileInfoFromBlob(base_location.container, blob)); + } + }; auto process_prefix = [&](const std::string& prefix) noexcept -> Status { const auto path = internal::ConcatAbstractPath(base_location.container, prefix); acc_results->push_back(DirectoryFileInfoFromPath(path)); @@ -964,14 +1024,13 @@ class AzureFileSystem::Impl { RETURN_NOT_OK(process_prefix(list_response.BlobPrefixes[blob_prefix_index])); } } - } catch (const Azure::Storage::StorageException& exception) { - if (exception.ErrorCode == "ContainerNotFound") { + } catch (const Storage::StorageException& exception) { + if (IsContainerNotFound(exception)) { found = false; } else { - return internal::ExceptionToStatus( - "Failed to list blobs in a directory: " + select.base_dir + ": " + - container_client.GetUrl(), - exception); + return ExceptionToStatus("Failed to list blobs in a directory: " + + select.base_dir + ": " + container_client.GetUrl(), + exception); } } @@ -981,7 +1040,7 @@ class AzureFileSystem::Impl { } public: - Status GetFileInfoWithSelector(const Azure::Core::Context& context, + Status GetFileInfoWithSelector(const Core::Context& context, Azure::Nullable page_size_hint, const FileSelector& select, FileInfoVector* acc_results) { @@ -991,29 +1050,28 @@ class AzureFileSystem::Impl { // Without a container, the base_location is equivalent to the filesystem // root -- `/`. FileSelector::allow_not_found doesn't matter in this case // because the root always exists. - auto on_container = - [&](const Azure::Storage::Blobs::Models::BlobContainerItem& container) { - // Deleted containers are not listed by ListContainers. - DCHECK(!container.IsDeleted); - - // Every container is considered a directory. - FileInfo info{container.Name, FileType::Directory}; - info.set_mtime( - std::chrono::system_clock::time_point{container.Details.LastModified}); - acc_results->push_back(std::move(info)); - - // Recurse into containers (subdirectories) if requested. - if (select.recursive && select.max_recursion > 0) { - FileSelector sub_select; - sub_select.base_dir = container.Name; - sub_select.allow_not_found = true; - sub_select.recursive = true; - sub_select.max_recursion = select.max_recursion - 1; - ARROW_RETURN_NOT_OK(GetFileInfoWithSelector(context, page_size_hint, - sub_select, acc_results)); - } - return Status::OK(); - }; + auto on_container = [&](const Blobs::Models::BlobContainerItem& container) { + // Deleted containers are not listed by ListContainers. + DCHECK(!container.IsDeleted); + + // Every container is considered a directory. + FileInfo info{container.Name, FileType::Directory}; + info.set_mtime( + std::chrono::system_clock::time_point{container.Details.LastModified}); + acc_results->push_back(std::move(info)); + + // Recurse into containers (subdirectories) if requested. + if (select.recursive && select.max_recursion > 0) { + FileSelector sub_select; + sub_select.base_dir = container.Name; + sub_select.allow_not_found = true; + sub_select.recursive = true; + sub_select.max_recursion = select.max_recursion - 1; + ARROW_RETURN_NOT_OK( + GetFileInfoWithSelector(context, page_size_hint, sub_select, acc_results)); + } + return Status::OK(); + }; return VisitContainers(context, std::move(on_container)); } @@ -1026,7 +1084,7 @@ class AzureFileSystem::Impl { Result> OpenInputFile(const AzureLocation& location, AzureFileSystem* fs) { RETURN_NOT_OK(ValidateFileLocation(location)); - auto blob_client = std::make_shared( + auto blob_client = std::make_shared( blob_service_client_->GetBlobContainerClient(location.container) .GetBlobClient(location.path)); @@ -1046,7 +1104,7 @@ class AzureFileSystem::Impl { } ARROW_ASSIGN_OR_RAISE(auto location, AzureLocation::FromString(info.path())); RETURN_NOT_OK(ValidateFileLocation(location)); - auto blob_client = std::make_shared( + auto blob_client = std::make_shared( blob_service_client_->GetBlobContainerClient(location.container) .GetBlobClient(location.path)); @@ -1070,19 +1128,18 @@ class AzureFileSystem::Impl { return Status::OK(); } else { return StatusFromErrorResponse( - container_client.GetUrl(), response.RawResponse.get(), + container_client.GetUrl(), *response.RawResponse, "Failed to create a container: " + location.container); } - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to create a container: " + location.container + ": " + - container_client.GetUrl(), - exception); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus("Failed to create a container: " + location.container + + ": " + container_client.GetUrl(), + exception); } } ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, - hierarchical_namespace_.Enabled(location.container)); + hns_detector_.Enabled(location.container)); if (!hierarchical_namespace_enabled) { // Without hierarchical namespace enabled Azure blob storage has no directories. // Therefore we can't, and don't need to create one. Simply creating a blob with `/` @@ -1098,15 +1155,13 @@ class AzureFileSystem::Impl { if (response.Value.Created) { return Status::OK(); } else { - return StatusFromErrorResponse(directory_client.GetUrl(), - response.RawResponse.get(), + return StatusFromErrorResponse(directory_client.GetUrl(), *response.RawResponse, "Failed to create a directory: " + location.path); } - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to create a directory: " + location.path + ": " + - directory_client.GetUrl(), - exception); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus("Failed to create a directory: " + location.path + ": " + + directory_client.GetUrl(), + exception); } } @@ -1119,15 +1174,14 @@ class AzureFileSystem::Impl { blob_service_client_->GetBlobContainerClient(location.container); try { container_client.CreateIfNotExists(); - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to create a container: " + location.container + " (" + - container_client.GetUrl() + ")", - exception); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus("Failed to create a container: " + location.container + + " (" + container_client.GetUrl() + ")", + exception); } ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, - hierarchical_namespace_.Enabled(location.container)); + hns_detector_.Enabled(location.container)); if (!hierarchical_namespace_enabled) { // Without hierarchical namespace enabled Azure blob storage has no directories. // Therefore we can't, and don't need to create one. Simply creating a blob with `/` @@ -1141,11 +1195,10 @@ class AzureFileSystem::Impl { .GetDirectoryClient(location.path); try { directory_client.CreateIfNotExists(); - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to create a directory: " + location.path + " (" + - directory_client.GetUrl() + ")", - exception); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus("Failed to create a directory: " + location.path + " (" + + directory_client.GetUrl() + ")", + exception); } } @@ -1158,7 +1211,7 @@ class AzureFileSystem::Impl { AzureFileSystem* fs) { RETURN_NOT_OK(ValidateFileLocation(location)); - auto block_blob_client = std::make_shared( + auto block_blob_client = std::make_shared( blob_service_client_->GetBlobContainerClient(location.container) .GetBlockBlobClient(location.path)); @@ -1180,7 +1233,7 @@ class AzureFileSystem::Impl { bool missing_dir_ok) { auto container_client = blob_service_client_->GetBlobContainerClient(location.container); - Azure::Storage::Blobs::ListBlobsOptions options; + Blobs::ListBlobsOptions options; if (!location.path.empty()) { options.Prefix = internal::EnsureTrailingSlash(location.path); } @@ -1200,19 +1253,17 @@ class AzureFileSystem::Impl { continue; } auto batch = container_client.CreateBatch(); - std::vector> + std::vector> deferred_responses; for (const auto& blob_item : list_response.Blobs) { deferred_responses.push_back(batch.DeleteBlob(blob_item.Name)); } try { container_client.SubmitBatch(batch); - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to delete blobs in a directory: " + location.path + ": " + - container_client.GetUrl(), - exception); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus("Failed to delete blobs in a directory: " + + location.path + ": " + container_client.GetUrl(), + exception); } std::vector failed_blob_names; for (size_t i = 0; i < deferred_responses.size(); ++i) { @@ -1221,7 +1272,7 @@ class AzureFileSystem::Impl { try { auto delete_result = deferred_response.GetResponse(); success = delete_result.Value.Deleted; - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { success = false; } if (!success) { @@ -1240,11 +1291,10 @@ class AzureFileSystem::Impl { } } } - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to list blobs in a directory: " + location.path + ": " + - container_client.GetUrl(), - exception); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus("Failed to list blobs in a directory: " + location.path + + ": " + container_client.GetUrl(), + exception); } return Status::OK(); } @@ -1264,19 +1314,18 @@ class AzureFileSystem::Impl { return Status::OK(); } else { return StatusFromErrorResponse( - container_client.GetUrl(), response.RawResponse.get(), + container_client.GetUrl(), *response.RawResponse, "Failed to delete a container: " + location.container); } - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to delete a container: " + location.container + ": " + - container_client.GetUrl(), - exception); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus("Failed to delete a container: " + location.container + + ": " + container_client.GetUrl(), + exception); } } ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, - hierarchical_namespace_.Enabled(location.container)); + hns_detector_.Enabled(location.container)); if (hierarchical_namespace_enabled) { auto directory_client = datalake_service_client_->GetFileSystemClient(location.container) @@ -1287,14 +1336,13 @@ class AzureFileSystem::Impl { return Status::OK(); } else { return StatusFromErrorResponse( - directory_client.GetUrl(), response.RawResponse.get(), + directory_client.GetUrl(), *response.RawResponse, "Failed to delete a directory: " + location.path); } - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to delete a directory: " + location.path + ": " + - directory_client.GetUrl(), - exception); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus("Failed to delete a directory: " + location.path + ": " + + directory_client.GetUrl(), + exception); } } else { return DeleteDirContentsWithoutHierarchicalNamespace(location, @@ -1308,7 +1356,7 @@ class AzureFileSystem::Impl { } ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, - hierarchical_namespace_.Enabled(location.container)); + hns_detector_.Enabled(location.container)); if (hierarchical_namespace_enabled) { auto file_system_client = datalake_service_client_->GetFileSystemClient(location.container); @@ -1322,8 +1370,8 @@ class AzureFileSystem::Impl { file_system_client.GetDirectoryClient(path.Name); try { sub_directory_client.DeleteRecursive(); - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus( "Failed to delete a sub directory: " + location.container + internal::kSep + path.Name + ": " + sub_directory_client.GetUrl(), exception); @@ -1332,8 +1380,8 @@ class AzureFileSystem::Impl { auto sub_file_client = file_system_client.GetFileClient(path.Name); try { sub_file_client.Delete(); - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus( "Failed to delete a sub file: " + location.container + internal::kSep + path.Name + ": " + sub_file_client.GetUrl(), exception); @@ -1341,15 +1389,13 @@ class AzureFileSystem::Impl { } } } - } catch (const Azure::Storage::StorageException& exception) { - if (missing_dir_ok && - exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) { + } catch (const Storage::StorageException& exception) { + if (missing_dir_ok && exception.StatusCode == Http::HttpStatusCode::NotFound) { return Status::OK(); } else { - return internal::ExceptionToStatus( - "Failed to delete directory contents: " + location.path + ": " + - directory_client.GetUrl(), - exception); + return ExceptionToStatus("Failed to delete directory contents: " + + location.path + ": " + directory_client.GetUrl(), + exception); } } return Status::OK(); @@ -1371,8 +1417,8 @@ class AzureFileSystem::Impl { .GetUrl(); try { dest_blob_client.CopyFromUri(src_url); - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus( "Failed to copy a blob. (" + src_url + " -> " + dest_blob_client.GetUrl() + ")", exception); } @@ -1380,6 +1426,17 @@ class AzureFileSystem::Impl { } }; +AzureFileSystem::AzureFileSystem(std::unique_ptr&& impl) + : FileSystem(impl->io_context()), impl_(std::move(impl)) { + default_async_is_sync_ = false; +} + +Result> AzureFileSystem::Make( + const AzureOptions& options, const io::IOContext& io_context) { + ARROW_ASSIGN_OR_RAISE(auto impl, AzureFileSystem::Impl::Make(options, io_context)); + return std::shared_ptr(new AzureFileSystem(std::move(impl))); +} + const AzureOptions& AzureFileSystem::options() const { return impl_->options(); } bool AzureFileSystem::Equals(const FileSystem& other) const { @@ -1399,7 +1456,7 @@ Result AzureFileSystem::GetFileInfo(const std::string& path) { } Result AzureFileSystem::GetFileInfo(const FileSelector& select) { - Azure::Core::Context context; + Core::Context context; Azure::Nullable page_size_hint; // unspecified FileInfoVector results; RETURN_NOT_OK( @@ -1478,18 +1535,4 @@ Result> AzureFileSystem::OpenAppendStream( return impl_->OpenAppendStream(location, metadata, false, this); } -Result> AzureFileSystem::Make( - const AzureOptions& options, const io::IOContext& io_context) { - std::shared_ptr ptr(new AzureFileSystem(options, io_context)); - RETURN_NOT_OK(ptr->impl_->Init()); - return ptr; -} - -AzureFileSystem::AzureFileSystem(const AzureOptions& options, - const io::IOContext& io_context) - : FileSystem(io_context), impl_(std::make_unique(options, io_context)) { - default_async_is_sync_ = false; -} - -} // namespace fs -} // namespace arrow +} // namespace arrow::fs diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index b2865b059ef6e..1266aa2d02b86 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -25,90 +25,118 @@ #include "arrow/util/macros.h" #include "arrow/util/uri.h" -namespace Azure { -namespace Core { -namespace Credentials { - +namespace Azure::Core::Credentials { class TokenCredential; +} -} // namespace Credentials -} // namespace Core -namespace Storage { - +namespace Azure::Storage { class StorageSharedKeyCredential; +} -} // namespace Storage -} // namespace Azure - -namespace arrow { -namespace fs { - -enum class AzureCredentialsKind : int8_t { - /// Anonymous access (no credentials used), public - Anonymous, - /// Use explicitly-provided access key pair - StorageCredentials, - /// Use ServicePrincipleCredentials - ServicePrincipleCredentials, - /// Use Sas Token to authenticate - Sas, - /// Use Connection String - ConnectionString -}; +namespace Azure::Storage::Blobs { +class BlobServiceClient; +} + +namespace Azure::Storage::Files::DataLake { +class DataLakeServiceClient; +} -enum class AzureBackend : bool { - /// Official Azure Remote Backend - Azure, - /// Local Simulated Storage - Azurite +namespace arrow::fs { + +enum class AzureBackend { + /// \brief Official Azure Remote Backend + kAzure, + /// \brief Local Simulated Storage + kAzurite }; /// Options for the AzureFileSystem implementation. struct ARROW_EXPORT AzureOptions { - std::string account_dfs_url; - std::string account_blob_url; - AzureBackend backend = AzureBackend::Azure; - AzureCredentialsKind credentials_kind = AzureCredentialsKind::Anonymous; + /// \brief The backend to connect to: Azure or Azurite (for testing). + AzureBackend backend = AzureBackend::kAzure; - std::string sas_token; - std::string connection_string; - std::shared_ptr - storage_credentials_provider; - std::shared_ptr - service_principle_credentials_provider; + // TODO(GH-38598): Add support for more auth methods. + // std::string connection_string; + // std::string sas_token; /// \brief Default metadata for OpenOutputStream. /// /// This will be ignored if non-empty metadata is passed to OpenOutputStream. std::shared_ptr default_metadata; + private: + std::string account_blob_url_; + std::string account_dfs_url_; + + enum class CredentialKind { + kAnonymous, + kStorageSharedKeyCredential, + } credential_kind_ = CredentialKind::kAnonymous; + + std::shared_ptr + storage_shared_key_credential_; + + public: AzureOptions(); + ~AzureOptions(); - Status ConfigureAccountKeyCredentials(const std::string& account_name, - const std::string& account_key); + Status ConfigureAccountKeyCredential(const std::string& account_name, + const std::string& account_key); bool Equals(const AzureOptions& other) const; + + const std::string& AccountBlobUrl() const { return account_blob_url_; } + const std::string& AccountDfsUrl() const { return account_dfs_url_; } + + Result> + MakeBlobServiceClient() const; + + Result> + MakeDataLakeServiceClient() const; }; -/// \brief Azure-backed FileSystem implementation for ABFS and ADLS. +/// \brief FileSystem implementation backed by Azure Blob Storage (ABS) [1] and +/// Azure Data Lake Storage Gen2 (ADLS Gen2) [2]. +/// +/// ADLS Gen2 isn't a dedicated service or account type. It's a set of capabilities that +/// support high throughput analytic workloads, built on Azure Blob Storage. All the data +/// ingested via the ADLS Gen2 APIs is persisted as blobs in the storage account. +/// ADLS Gen2 provides filesystem semantics, file-level security, and Hadoop +/// compatibility. ADLS Gen1 exists as a separate object that will retired on 2024-02-29 +/// and new ADLS accounts use Gen2 instead. /// -/// ABFS (Azure Blob Storage - https://azure.microsoft.com/en-us/products/storage/blobs/) -/// object-based cloud storage system. +/// ADLS Gen2 and Blob APIs can operate on the same data, but there are +/// some limitations [3]. The ones that are relevant to this +/// implementation are listed here: /// -/// ADLS (Azure Data Lake Storage - -/// https://azure.microsoft.com/en-us/products/storage/data-lake-storage/) -/// is a scalable data storage system designed for big-data applications. -/// ADLS provides filesystem semantics, file-level security, and Hadoop -/// compatibility. Gen1 exists as a separate object that will retired -/// on Feb 29, 2024. New ADLS accounts will use Gen2 instead, which is -/// implemented on top of ABFS. +/// - You can't use Blob APIs, and ADLS APIs to write to the same instance of a file. If +/// you write to a file by using ADLS APIs then that file's blocks won't be visible +/// to calls to the GetBlockList Blob API. The only exception is when you're +/// overwriting. +/// - When you use the ListBlobs operation without specifying a delimiter, the results +/// include both directories and blobs. If you choose to use a delimiter, use only a +/// forward slash (/) -- the only supported delimiter. +/// - If you use the DeleteBlob API to delete a directory, that directory is deleted only +/// if it's empty. This means that you can't use the Blob API delete directories +/// recursively. /// -/// TODO: GH-18014 Complete the internal implementation -/// and review the documentation +/// [1]: https://azure.microsoft.com/en-us/products/storage/blobs +/// [2]: https://azure.microsoft.com/en-us/products/storage/data-lake-storage +/// [3]: +/// https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-known-issues class ARROW_EXPORT AzureFileSystem : public FileSystem { + private: + class Impl; + std::unique_ptr impl_; + + explicit AzureFileSystem(std::unique_ptr&& impl); + public: ~AzureFileSystem() override = default; + static Result> Make( + const AzureOptions& options, const io::IOContext& = io::default_io_context()); + std::string type_name() const override { return "abfs"; } /// Return the original Azure options when constructing the filesystem @@ -152,16 +180,6 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem { Result> OpenAppendStream( const std::string& path, const std::shared_ptr& metadata = {}) override; - - static Result> Make( - const AzureOptions& options, const io::IOContext& = io::default_io_context()); - - private: - AzureFileSystem(const AzureOptions& options, const io::IOContext& io_context); - - class Impl; - std::unique_ptr impl_; }; -} // namespace fs -} // namespace arrow +} // namespace arrow::fs diff --git a/cpp/src/arrow/filesystem/azurefs_internal.cc b/cpp/src/arrow/filesystem/azurefs_internal.cc index 3e545d670cb04..39c3fb23e3cfd 100644 --- a/cpp/src/arrow/filesystem/azurefs_internal.cc +++ b/cpp/src/arrow/filesystem/azurefs_internal.cc @@ -23,11 +23,17 @@ namespace arrow::fs::internal { +namespace { + +// TODO(GH-38772): Remove azurefs_internal.h/.cc by moving the detector to +// azurefs.cc (which contains a private copy of this helper function already). Status ExceptionToStatus(const std::string& prefix, const Azure::Storage::StorageException& exception) { return Status::IOError(prefix, " Azure Error: ", exception.what()); } +} // namespace + Status HierarchicalNamespaceDetector::Init( Azure::Storage::Files::DataLake::DataLakeServiceClient* datalake_service_client) { datalake_service_client_ = datalake_service_client; diff --git a/cpp/src/arrow/filesystem/azurefs_internal.h b/cpp/src/arrow/filesystem/azurefs_internal.h index c3da96239a18f..92592cf164f5a 100644 --- a/cpp/src/arrow/filesystem/azurefs_internal.h +++ b/cpp/src/arrow/filesystem/azurefs_internal.h @@ -25,9 +25,6 @@ namespace arrow::fs::internal { -Status ExceptionToStatus(const std::string& prefix, - const Azure::Storage::StorageException& exception); - class HierarchicalNamespaceDetector { public: Status Init( diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 792c63b209402..463ff4e8daf3d 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -36,6 +36,7 @@ #include "arrow/filesystem/azurefs.h" #include "arrow/filesystem/azurefs_internal.h" +#include #include #include @@ -63,6 +64,7 @@ namespace arrow { using internal::TemporaryDir; namespace fs { +using internal::ConcatAbstractPath; namespace { namespace bp = boost::process; @@ -71,56 +73,133 @@ using ::testing::Not; using ::testing::NotNull; namespace Blobs = Azure::Storage::Blobs; -namespace Files = Azure::Storage::Files; +namespace Core = Azure::Core; +namespace DataLake = Azure::Storage::Files::DataLake; -auto const* kLoremIpsum = R"""( -Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor -incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis -nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. -Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu -fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in -culpa qui officia deserunt mollit anim id est laborum. -)"""; +class BaseAzureEnv : public ::testing::Environment { + protected: + std::string account_name_; + std::string account_key_; + + BaseAzureEnv(std::string account_name, std::string account_key) + : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {} -class AzuriteEnv : public ::testing::Environment { public: - AzuriteEnv() { - account_name_ = "devstoreaccount1"; - account_key_ = - "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/" - "KBHBeksoGMGw=="; - auto exe_path = bp::search_path("azurite"); - if (exe_path.empty()) { - auto error = std::string("Could not find Azurite emulator."); - status_ = Status::Invalid(error); - return; + const std::string& account_name() const { return account_name_; } + const std::string& account_key() const { return account_key_; } + + virtual AzureBackend backend() const = 0; + + virtual bool WithHierarchicalNamespace() const { return false; } + + virtual Result GetDebugLogSize() { return 0; } + virtual Status DumpDebugLog(int64_t position) { + return Status::NotImplemented("BaseAzureEnv::DumpDebugLog"); + } +}; + +template +class AzureEnvImpl : public BaseAzureEnv { + private: + /// \brief Factory function that registers the singleton instance as a global test + /// environment. Must be called only once per implementation (see GetInstance()). + /// + /// Every BaseAzureEnv implementation defines a static and parameter-less member + /// function called Make() that returns a Result>. + /// This templated function performs the following steps: + /// + /// 1) Calls AzureEnvClass::Make() to get an instance of AzureEnvClass. + /// 2) Passes ownership of the AzureEnvClass instance to the testing environment. + /// 3) Returns a Result wrapping the raw heap-allocated pointer. + static Result MakeAndAddToGlobalTestEnvironment() { + ARROW_ASSIGN_OR_RAISE(auto env, AzureEnvClass::Make()); + auto* heap_ptr = env.release(); + ::testing::AddGlobalTestEnvironment(heap_ptr); + return heap_ptr; + } + + protected: + using BaseAzureEnv::BaseAzureEnv; + + /// \brief Create an AzureEnvClass instance from environment variables. + /// + /// Reads the account name and key from the environment variables. This can be + /// used in BaseAzureEnv implementations that don't need to do any additional + /// setup to create the singleton instance (e.g. AzureFlatNSEnv, + /// AzureHierarchicalNSEnv). + static Result> MakeFromEnvVars( + const std::string& account_name_var, const std::string& account_key_var) { + const auto account_name = std::getenv(account_name_var.c_str()); + const auto account_key = std::getenv(account_key_var.c_str()); + if (!account_name && !account_key) { + return Status::Cancelled(account_name_var + " and " + account_key_var + + " are not set. Skipping tests."); } - auto temp_dir_ = *TemporaryDir::Make("azurefs-test-"); - auto debug_log_path_result = temp_dir_->path().Join("debug.log"); - if (!debug_log_path_result.ok()) { - status_ = debug_log_path_result.status(); - return; + // If only one of the variables is set. Don't cancel tests, + // fail with a Status::Invalid. + if (!account_name) { + return Status::Invalid(account_name_var + " not set while " + account_key_var + + " is set."); } - debug_log_path_ = *debug_log_path_result; - server_process_ = - bp::child(boost::this_process::environment(), exe_path, "--silent", "--location", - temp_dir_->path().ToString(), "--debug", debug_log_path_.ToString()); - if (!(server_process_.valid() && server_process_.running())) { - auto error = "Could not start Azurite emulator."; - server_process_.terminate(); - server_process_.wait(); - status_ = Status::Invalid(error); - return; + if (!account_key) { + return Status::Invalid(account_key_var + " not set while " + account_name_var + + " is set."); } - status_ = Status::OK(); + return std::unique_ptr{new AzureEnvClass(account_name, account_key)}; } + public: + static Result GetInstance() { + // Ensure MakeAndAddToGlobalTestEnvironment() is called only once by storing the + // Result in a static variable. + static auto singleton_env = MakeAndAddToGlobalTestEnvironment(); + return singleton_env; + } + + AzureBackend backend() const final { return AzureEnvClass::kBackend; } +}; + +class AzuriteEnv : public AzureEnvImpl { + private: + std::unique_ptr temp_dir_; + arrow::internal::PlatformFilename debug_log_path_; + bp::child server_process_; + + using AzureEnvImpl::AzureEnvImpl; + + public: + static const AzureBackend kBackend = AzureBackend::kAzurite; + ~AzuriteEnv() override { server_process_.terminate(); server_process_.wait(); } - Result GetDebugLogSize() { + static Result> Make() { + auto self = std::unique_ptr( + new AzuriteEnv("devstoreaccount1", + "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/" + "K1SZFPTOtr/KBHBeksoGMGw==")); + auto exe_path = bp::search_path("azurite"); + if (exe_path.empty()) { + return Status::Invalid("Could not find Azurite emulator."); + } + ARROW_ASSIGN_OR_RAISE(self->temp_dir_, TemporaryDir::Make("azurefs-test-")); + ARROW_ASSIGN_OR_RAISE(self->debug_log_path_, + self->temp_dir_->path().Join("debug.log")); + auto server_process = bp::child( + boost::this_process::environment(), exe_path, "--silent", "--location", + self->temp_dir_->path().ToString(), "--debug", self->debug_log_path_.ToString()); + if (!server_process.valid() || !server_process.running()) { + server_process.terminate(); + server_process.wait(); + return Status::Invalid("Could not start Azurite emulator."); + } + self->server_process_ = std::move(server_process); + return self; + } + + Result GetDebugLogSize() override { ARROW_ASSIGN_OR_RAISE(auto exists, arrow::internal::FileExists(debug_log_path_)); if (!exists) { return 0; @@ -131,7 +210,7 @@ class AzuriteEnv : public ::testing::Environment { return arrow::internal::FileTell(file_descriptor.fd()); } - Status DumpDebugLog(int64_t position = 0) { + Status DumpDebugLog(int64_t position) override { ARROW_ASSIGN_OR_RAISE(auto exists, arrow::internal::FileExists(debug_log_path_)); if (!exists) { return Status::OK(); @@ -157,25 +236,35 @@ class AzuriteEnv : public ::testing::Environment { std::cerr << std::endl; return Status::OK(); } +}; - const std::string& account_name() const { return account_name_; } - const std::string& account_key() const { return account_key_; } - const Status status() const { return status_; } - +class AzureFlatNSEnv : public AzureEnvImpl { private: - std::string account_name_; - std::string account_key_; - bp::child server_process_; - Status status_; - std::unique_ptr temp_dir_; - arrow::internal::PlatformFilename debug_log_path_; + using AzureEnvImpl::AzureEnvImpl; + + public: + static const AzureBackend kBackend = AzureBackend::kAzure; + + static Result> Make() { + return MakeFromEnvVars("AZURE_FLAT_NAMESPACE_ACCOUNT_NAME", + "AZURE_FLAT_NAMESPACE_ACCOUNT_KEY"); + } }; -auto* azurite_env = ::testing::AddGlobalTestEnvironment(new AzuriteEnv); +class AzureHierarchicalNSEnv : public AzureEnvImpl { + private: + using AzureEnvImpl::AzureEnvImpl; -AzuriteEnv* GetAzuriteEnv() { - return ::arrow::internal::checked_cast(azurite_env); -} + public: + static const AzureBackend kBackend = AzureBackend::kAzure; + + static Result> Make() { + return MakeFromEnvVars("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME", + "AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY"); + } + + bool WithHierarchicalNamespace() const final { return true; } +}; // Placeholder tests // TODO: GH-18014 Remove once a proper test is added @@ -193,44 +282,110 @@ TEST(AzureFileSystem, OptionsCompare) { EXPECT_TRUE(options.Equals(options)); } -class AzureFileSystemTest : public ::testing::Test { +struct PreexistingData { + public: + using RNG = std::mt19937_64; + + public: + const std::string container_name; + static constexpr char const* kObjectName = "test-object-name"; + + static constexpr char const* kLoremIpsum = R"""( +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor +incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis +nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu +fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in +culpa qui officia deserunt mollit anim id est laborum. +)"""; + public: + explicit PreexistingData(RNG& rng) : container_name{RandomContainerName(rng)} {} + + // Creates a path by concatenating the container name and the stem. + std::string ContainerPath(std::string_view stem) const { + return ConcatAbstractPath(container_name, stem); + } + + std::string ObjectPath() const { return ContainerPath(kObjectName); } + std::string NotFoundObjectPath() const { return ContainerPath("not-found"); } + + std::string RandomDirectoryPath(RNG& rng) const { + return ContainerPath(RandomChars(32, rng)); + } + + // Utilities + static std::string RandomContainerName(RNG& rng) { return RandomChars(32, rng); } + + static std::string RandomChars(int count, RNG& rng) { + auto const fillers = std::string("abcdefghijlkmnopqrstuvwxyz0123456789"); + std::uniform_int_distribution d(0, static_cast(fillers.size()) - 1); + std::string s; + std::generate_n(std::back_inserter(s), count, [&] { return fillers[d(rng)]; }); + return s; + } + + static int RandomIndex(int end, RNG& rng) { + return std::uniform_int_distribution(0, end - 1)(rng); + } + + static std::string RandomLine(int lineno, int width, RNG& rng) { + auto line = std::to_string(lineno) + ": "; + line += RandomChars(width - static_cast(line.size()) - 1, rng); + line += '\n'; + return line; + } +}; + +class TestAzureFileSystem : public ::testing::Test { + protected: + // Set in constructor + std::mt19937_64 rng_; + + // Set in SetUp() + int64_t debug_log_start_ = 0; + bool set_up_succeeded_ = false; + AzureOptions options_; + std::shared_ptr fs_; std::unique_ptr blob_service_client_; - std::unique_ptr datalake_service_client_; - AzureOptions options_; - std::mt19937_64 generator_; - std::string container_name_; - bool suite_skipped_ = false; + std::unique_ptr datalake_service_client_; + + public: + TestAzureFileSystem() : rng_(std::random_device()()) {} - AzureFileSystemTest() : generator_(std::random_device()()) {} + virtual Result GetAzureEnv() const = 0; - virtual Result MakeOptions() = 0; + static Result MakeOptions(BaseAzureEnv* env) { + AzureOptions options; + options.backend = env->backend(); + ARROW_EXPECT_OK( + options.ConfigureAccountKeyCredential(env->account_name(), env->account_key())); + return options; + } void SetUp() override { - auto options = MakeOptions(); - if (options.ok()) { - options_ = *options; + auto make_options = [this]() -> Result { + ARROW_ASSIGN_OR_RAISE(auto env, GetAzureEnv()); + EXPECT_THAT(env, NotNull()); + ARROW_ASSIGN_OR_RAISE(debug_log_start_, env->GetDebugLogSize()); + return MakeOptions(env); + }; + auto options_res = make_options(); + if (options_res.status().IsCancelled()) { + GTEST_SKIP() << options_res.status().message(); } else { - suite_skipped_ = true; - GTEST_SKIP() << options.status().message(); + EXPECT_OK_AND_ASSIGN(options_, options_res); } - // Stop-gap solution before GH-39119 is fixed. - container_name_ = "z" + RandomChars(31); - blob_service_client_ = std::make_unique( - options_.account_blob_url, options_.storage_credentials_provider); - datalake_service_client_ = std::make_unique( - options_.account_dfs_url, options_.storage_credentials_provider); - ASSERT_OK_AND_ASSIGN(fs_, AzureFileSystem::Make(options_)); - auto container_client = CreateContainer(container_name_); - auto blob_client = container_client.GetBlockBlobClient(PreexistingObjectName()); - blob_client.UploadFrom(reinterpret_cast(kLoremIpsum), - strlen(kLoremIpsum)); + ASSERT_OK_AND_ASSIGN(fs_, AzureFileSystem::Make(options_)); + EXPECT_OK_AND_ASSIGN(blob_service_client_, options_.MakeBlobServiceClient()); + EXPECT_OK_AND_ASSIGN(datalake_service_client_, options_.MakeDataLakeServiceClient()); + set_up_succeeded_ = true; } void TearDown() override { - if (!suite_skipped_) { + if (set_up_succeeded_) { auto containers = blob_service_client_->ListBlobContainers(); for (auto container : containers.BlobContainers) { auto container_client = @@ -238,6 +393,13 @@ class AzureFileSystemTest : public ::testing::Test { container_client.DeleteIfExists(); } } + if (HasFailure()) { + // XXX: This may not include all logs in the target test because + // Azurite doesn't flush debug logs immediately... You may want + // to check the log manually... + EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv()); + ARROW_IGNORE_EXPR(env->DumpDebugLog(debug_log_start_)); + } } Blobs::BlobContainerClient CreateContainer(const std::string& name) { @@ -254,54 +416,20 @@ class AzureFileSystemTest : public ::testing::Test { return blob_client; } - std::string PreexistingContainerName() const { return container_name_; } - - std::string PreexistingContainerPath() const { - return PreexistingContainerName() + '/'; - } - - static std::string PreexistingObjectName() { return "test-object-name"; } - - std::string PreexistingObjectPath() const { - return PreexistingContainerPath() + PreexistingObjectName(); - } - - std::string NotFoundObjectPath() { return PreexistingContainerPath() + "not-found"; } - - std::string RandomLine(int lineno, std::size_t width) { - auto line = std::to_string(lineno) + ": "; - line += RandomChars(width - line.size() - 1); - line += '\n'; - return line; - } - - std::size_t RandomIndex(std::size_t end) { - return std::uniform_int_distribution(0, end - 1)(generator_); - } - - std::string RandomChars(std::size_t count) { - auto const fillers = std::string("abcdefghijlkmnopqrstuvwxyz0123456789"); - std::uniform_int_distribution d(0, fillers.size() - 1); - std::string s; - std::generate_n(std::back_inserter(s), count, [&] { return fillers[d(generator_)]; }); - return s; - } - - std::string RandomContainerName() { return RandomChars(32); } - - std::string RandomDirectoryName() { return RandomChars(32); } - - void UploadLines(const std::vector& lines, const char* path_to_file, + void UploadLines(const std::vector& lines, const std::string& path, int total_size) { - const auto path = PreexistingContainerPath() + path_to_file; ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); const auto all_lines = std::accumulate(lines.begin(), lines.end(), std::string("")); ASSERT_OK(output->Write(all_lines)); ASSERT_OK(output->Close()); } - void RunGetFileInfoObjectWithNestedStructureTest(); - void RunGetFileInfoObjectTest(); + PreexistingData SetUpPreexistingData() { + PreexistingData data(rng_); + auto container_client = CreateContainer(data.container_name); + CreateBlob(container_client, data.kObjectName, PreexistingData::kLoremIpsum); + return data; + } struct HierarchicalPaths { std::string container; @@ -310,15 +438,12 @@ class AzureFileSystemTest : public ::testing::Test { }; // Need to use "void" as the return type to use ASSERT_* in this method. - void CreateHierarchicalData(HierarchicalPaths& paths) { - const auto container_path = RandomContainerName(); - const auto directory_path = - internal::ConcatAbstractPath(container_path, RandomDirectoryName()); - const auto sub_directory_path = - internal::ConcatAbstractPath(directory_path, "new-sub"); - const auto sub_blob_path = - internal::ConcatAbstractPath(sub_directory_path, "sub.txt"); - const auto top_blob_path = internal::ConcatAbstractPath(directory_path, "top.txt"); + void CreateHierarchicalData(HierarchicalPaths* paths) { + auto data = SetUpPreexistingData(); + const auto directory_path = data.RandomDirectoryPath(rng_); + const auto sub_directory_path = ConcatAbstractPath(directory_path, "new-sub"); + const auto sub_blob_path = ConcatAbstractPath(sub_directory_path, "sub.txt"); + const auto top_blob_path = ConcatAbstractPath(directory_path, "top.txt"); ASSERT_OK(fs_->CreateDir(sub_directory_path, true)); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(sub_blob_path)); ASSERT_OK(output->Write(std::string_view("sub"))); @@ -327,15 +452,15 @@ class AzureFileSystemTest : public ::testing::Test { ASSERT_OK(output->Write(std::string_view("top"))); ASSERT_OK(output->Close()); - AssertFileInfo(fs_.get(), container_path, FileType::Directory); + AssertFileInfo(fs_.get(), data.container_name, FileType::Directory); AssertFileInfo(fs_.get(), directory_path, FileType::Directory); AssertFileInfo(fs_.get(), sub_directory_path, FileType::Directory); AssertFileInfo(fs_.get(), sub_blob_path, FileType::File); AssertFileInfo(fs_.get(), top_blob_path, FileType::File); - paths.container = container_path; - paths.directory = directory_path; - paths.sub_paths = { + paths->container = data.container_name; + paths->directory = directory_path; + paths->sub_paths = { sub_directory_path, sub_blob_path, top_blob_path, @@ -362,7 +487,7 @@ class AzureFileSystemTest : public ::testing::Test { } void AssertInfoAllContainersRecursive(const std::vector& infos) { - ASSERT_EQ(infos.size(), 14); + ASSERT_EQ(infos.size(), 12); AssertFileInfo(infos[0], "container", FileType::Directory); AssertFileInfo(infos[1], "container/emptydir", FileType::Directory); AssertFileInfo(infos[2], "container/otherdir", FileType::Directory); @@ -377,202 +502,336 @@ class AzureFileSystemTest : public ::testing::Test { strlen(kSubData)); AssertFileInfo(infos[10], "container/somefile", FileType::File, strlen(kSomeData)); AssertFileInfo(infos[11], "empty-container", FileType::Directory); - AssertFileInfo(infos[12], PreexistingContainerName(), FileType::Directory); - AssertFileInfo(infos[13], PreexistingObjectPath(), FileType::File); } -}; -class AzuriteFileSystemTest : public AzureFileSystemTest { - Result MakeOptions() override { - EXPECT_THAT(GetAzuriteEnv(), NotNull()); - ARROW_EXPECT_OK(GetAzuriteEnv()->status()); - ARROW_ASSIGN_OR_RAISE(debug_log_start_, GetAzuriteEnv()->GetDebugLogSize()); - AzureOptions options; - options.backend = AzureBackend::Azurite; - ARROW_EXPECT_OK(options.ConfigureAccountKeyCredentials( - GetAzuriteEnv()->account_name(), GetAzuriteEnv()->account_key())); - return options; + bool WithHierarchicalNamespace() const { + EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv()); + return env->WithHierarchicalNamespace(); } - void TearDown() override { - AzureFileSystemTest::TearDown(); - if (HasFailure()) { - // XXX: This may not include all logs in the target test because - // Azurite doesn't flush debug logs immediately... You may want - // to check the log manually... - ARROW_IGNORE_EXPR(GetAzuriteEnv()->DumpDebugLog(debug_log_start_)); + // Tests that are called from more than one implementation of TestAzureFileSystem + + void TestDetectHierarchicalNamespace(); + void TestGetFileInfoObject(); + void TestGetFileInfoObjectWithNestedStructure(); + + void TestDeleteDirSuccessEmpty() { + auto data = SetUpPreexistingData(); + const auto directory_path = data.RandomDirectoryPath(rng_); + + if (WithHierarchicalNamespace()) { + ASSERT_OK(fs_->CreateDir(directory_path, true)); + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory); + ASSERT_OK(fs_->DeleteDir(directory_path)); + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); + } else { + // There is only virtual directory without hierarchical namespace + // support. So the CreateDir() and DeleteDir() do nothing. + ASSERT_OK(fs_->CreateDir(directory_path)); + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); + ASSERT_OK(fs_->DeleteDir(directory_path)); + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); } } - int64_t debug_log_start_ = 0; -}; + void TestCreateDirSuccessContainerAndDirectory() { + auto data = SetUpPreexistingData(); + const auto path = data.RandomDirectoryPath(rng_); + ASSERT_OK(fs_->CreateDir(path, false)); + if (WithHierarchicalNamespace()) { + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); + } else { + // There is only virtual directory without hierarchical namespace + // support. So the CreateDir() does nothing. + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); + } + } -class AzureFlatNamespaceFileSystemTest : public AzureFileSystemTest { - Result MakeOptions() override { - AzureOptions options; - const auto account_key = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_KEY"); - const auto account_name = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_NAME"); - if (account_key && account_name) { - RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name, account_key)); - return options; + void TestCreateDirRecursiveSuccessContainerOnly() { + auto container_name = PreexistingData::RandomContainerName(rng_); + ASSERT_OK(fs_->CreateDir(container_name, true)); + arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); + } + + void TestCreateDirRecursiveSuccessDirectoryOnly() { + auto data = SetUpPreexistingData(); + const auto parent = data.RandomDirectoryPath(rng_); + const auto path = ConcatAbstractPath(parent, "new-sub"); + ASSERT_OK(fs_->CreateDir(path, true)); + if (WithHierarchicalNamespace()) { + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); + } else { + // There is only virtual directory without hierarchical namespace + // support. So the CreateDir() does nothing. + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); } - return Status::Cancelled( - "Connection details not provided for a real flat namespace " - "account."); } -}; -// How to enable this test: -// -// You need an Azure account. You should be able to create a free -// account at https://azure.microsoft.com/en-gb/free/ . You should be -// able to create a storage account through the portal Web UI. -// -// See also the official document how to create a storage account: -// https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account -// -// A few suggestions on configuration: -// -// * Use Standard general-purpose v2 not premium -// * Use LRS redundancy -// * Obviously you need to enable hierarchical namespace. -// * Set the default access tier to hot -// * SFTP, NFS and file shares are not required. -class AzureHierarchicalNamespaceFileSystemTest : public AzureFileSystemTest { - Result MakeOptions() override { - AzureOptions options; - const auto account_key = std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY"); - const auto account_name = std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME"); - if (account_key && account_name) { - RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name, account_key)); - return options; + void TestCreateDirRecursiveSuccessContainerAndDirectory() { + auto data = SetUpPreexistingData(); + const auto parent = data.RandomDirectoryPath(rng_); + const auto path = ConcatAbstractPath(parent, "new-sub"); + ASSERT_OK(fs_->CreateDir(path, true)); + if (WithHierarchicalNamespace()) { + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), data.container_name, FileType::Directory); + } else { + // There is only virtual directory without hierarchical namespace + // support. So the CreateDir() does nothing. + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), data.container_name, FileType::Directory); } - return Status::Cancelled( - "Connection details not provided for a real hierarchical namespace " - "account."); } -}; -TEST_F(AzureFlatNamespaceFileSystemTest, DetectHierarchicalNamespace) { - auto hierarchical_namespace = internal::HierarchicalNamespaceDetector(); - ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get())); - ASSERT_OK_AND_EQ(false, hierarchical_namespace.Enabled(PreexistingContainerName())); -} + void TestDeleteDirContentsSuccessNonexistent() { + auto data = SetUpPreexistingData(); + const auto directory_path = data.RandomDirectoryPath(rng_); + ASSERT_OK(fs_->DeleteDirContents(directory_path, true)); + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); + } -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DetectHierarchicalNamespace) { - auto hierarchical_namespace = internal::HierarchicalNamespaceDetector(); - ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get())); - ASSERT_OK_AND_EQ(true, hierarchical_namespace.Enabled(PreexistingContainerName())); -} + void TestDeleteDirContentsFailureNonexistent() { + auto data = SetUpPreexistingData(); + const auto directory_path = data.RandomDirectoryPath(rng_); + ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false)); + } +}; -TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespace) { - auto hierarchical_namespace = internal::HierarchicalNamespaceDetector(); - ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get())); - ASSERT_OK_AND_EQ(false, hierarchical_namespace.Enabled(PreexistingContainerName())); -} +void TestAzureFileSystem::TestDetectHierarchicalNamespace() { + // Check the environments are implemented and injected here correctly. + auto expected = WithHierarchicalNamespace(); -TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespaceFailsWithMissingContainer) { + auto data = SetUpPreexistingData(); auto hierarchical_namespace = internal::HierarchicalNamespaceDetector(); ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get())); - ASSERT_NOT_OK(hierarchical_namespace.Enabled("nonexistent-container")); + ASSERT_OK_AND_EQ(expected, hierarchical_namespace.Enabled(data.container_name)); } -TEST_F(AzuriteFileSystemTest, GetFileInfoAccount) { - AssertFileInfo(fs_.get(), "", FileType::Directory); - - // URI - ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://")); -} - -TEST_F(AzuriteFileSystemTest, GetFileInfoContainer) { - AssertFileInfo(fs_.get(), PreexistingContainerName(), FileType::Directory); +void TestAzureFileSystem::TestGetFileInfoObject() { + auto data = SetUpPreexistingData(); + auto object_properties = + blob_service_client_->GetBlobContainerClient(data.container_name) + .GetBlobClient(data.kObjectName) + .GetProperties() + .Value; - AssertFileInfo(fs_.get(), "nonexistent-container", FileType::NotFound); + AssertFileInfo(fs_.get(), data.ObjectPath(), FileType::File, + std::chrono::system_clock::time_point{object_properties.LastModified}, + static_cast(object_properties.BlobSize)); // URI - ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + PreexistingContainerName())); + ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + std::string{data.kObjectName})); } -void AzureFileSystemTest::RunGetFileInfoObjectWithNestedStructureTest() { +void TestAzureFileSystem::TestGetFileInfoObjectWithNestedStructure() { + auto data = SetUpPreexistingData(); // Adds detailed tests to handle cases of different edge cases // with directory naming conventions (e.g. with and without slashes). - constexpr auto kObjectName = "test-object-dir/some_other_dir/another_dir/foo"; - ASSERT_OK_AND_ASSIGN( - auto output, - fs_->OpenOutputStream(PreexistingContainerPath() + kObjectName, /*metadata=*/{})); - const std::string_view data(kLoremIpsum); - ASSERT_OK(output->Write(data)); + const std::string kObjectName = "test-object-dir/some_other_dir/another_dir/foo"; + ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(data.ContainerPath(kObjectName), + /*metadata=*/{})); + const std::string_view lorem_ipsum(PreexistingData::kLoremIpsum); + ASSERT_OK(output->Write(lorem_ipsum)); ASSERT_OK(output->Close()); // 0 is immediately after "/" lexicographically, ensure that this doesn't // cause unexpected issues. - ASSERT_OK_AND_ASSIGN(output, - fs_->OpenOutputStream( - PreexistingContainerPath() + "test-object-dir/some_other_dir0", - /*metadata=*/{})); - ASSERT_OK(output->Write(data)); - ASSERT_OK(output->Close()); ASSERT_OK_AND_ASSIGN( - output, fs_->OpenOutputStream(PreexistingContainerPath() + kObjectName + "0", + output, fs_->OpenOutputStream(data.ContainerPath("test-object-dir/some_other_dir0"), /*metadata=*/{})); - ASSERT_OK(output->Write(data)); + ASSERT_OK(output->Write(lorem_ipsum)); + ASSERT_OK(output->Close()); + ASSERT_OK_AND_ASSIGN(output, + fs_->OpenOutputStream(data.ContainerPath(kObjectName + "0"), + /*metadata=*/{})); + ASSERT_OK(output->Write(lorem_ipsum)); ASSERT_OK(output->Close()); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName, FileType::File); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName + "/", - FileType::NotFound); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir", - FileType::Directory); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/", + AssertFileInfo(fs_.get(), data.ContainerPath(kObjectName), FileType::File); + AssertFileInfo(fs_.get(), data.ContainerPath(kObjectName) + "/", FileType::NotFound); + AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir"), FileType::Directory); + AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir") + "/", FileType::Directory); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/some_other_dir", + AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir/some_other_dir"), FileType::Directory); - AssertFileInfo(fs_.get(), - PreexistingContainerPath() + "test-object-dir/some_other_dir/", + AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir/some_other_dir") + "/", FileType::Directory); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-di", - FileType::NotFound); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/some_other_di", + AssertFileInfo(fs_.get(), data.ContainerPath("test-object-di"), FileType::NotFound); + AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir/some_other_di"), FileType::NotFound); + + if (WithHierarchicalNamespace()) { + datalake_service_client_->GetFileSystemClient(data.container_name) + .GetDirectoryClient("test-empty-object-dir") + .Create(); + + AssertFileInfo(fs_.get(), data.ContainerPath("test-empty-object-dir"), + FileType::Directory); + } } -TEST_F(AzuriteFileSystemTest, GetFileInfoObjectWithNestedStructure) { - RunGetFileInfoObjectWithNestedStructureTest(); +template +class AzureFileSystemTestImpl : public TestAzureFileSystem { + public: + using TestAzureFileSystem::TestAzureFileSystem; + + Result GetAzureEnv() const final { return AzureEnvClass::GetInstance(); } +}; + +// How to enable the non-Azurite tests: +// +// You need an Azure account. You should be able to create a free account [1]. +// Through the portal Web UI, you should create a storage account [2]. +// +// A few suggestions on configuration: +// +// * Use Standard general-purpose v2 not premium +// * Use LRS redundancy +// * Set the default access tier to hot +// * SFTP, NFS and file shares are not required. +// +// You must not enable Hierarchical Namespace on the storage account used for +// TestAzureFlatNSFileSystem, but you must enable it on the storage account +// used for TestAzureHierarchicalNSFileSystem. +// +// The credentials should be placed in the correct environment variables: +// +// * AZURE_FLAT_NAMESPACE_ACCOUNT_NAME +// * AZURE_FLAT_NAMESPACE_ACCOUNT_KEY +// * AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME +// * AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY +// +// [1]: https://azure.microsoft.com/en-gb/free/ +// [2]: +// https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account +using TestAzureFlatNSFileSystem = AzureFileSystemTestImpl; +using TestAzureHierarchicalNSFileSystem = AzureFileSystemTestImpl; +using TestAzuriteFileSystem = AzureFileSystemTestImpl; + +// Tests using all the 3 environments (Azurite, Azure w/o HNS (flat), Azure w/ HNS) + +template +using AzureFileSystemTestOnAllEnvs = AzureFileSystemTestImpl; + +using AllEnvironments = + ::testing::Types; + +TYPED_TEST_SUITE(AzureFileSystemTestOnAllEnvs, AllEnvironments); + +TYPED_TEST(AzureFileSystemTestOnAllEnvs, DetectHierarchicalNamespace) { + this->TestDetectHierarchicalNamespace(); } -TEST_F(AzureHierarchicalNamespaceFileSystemTest, GetFileInfoObjectWithNestedStructure) { - RunGetFileInfoObjectWithNestedStructureTest(); - datalake_service_client_->GetFileSystemClient(PreexistingContainerName()) - .GetDirectoryClient("test-empty-object-dir") - .Create(); +TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObject) { + this->TestGetFileInfoObject(); +} - AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-empty-object-dir", - FileType::Directory); +TYPED_TEST(AzureFileSystemTestOnAllEnvs, DeleteDirSuccessEmpty) { + this->TestDeleteDirSuccessEmpty(); } -void AzureFileSystemTest::RunGetFileInfoObjectTest() { - auto object_properties = - blob_service_client_->GetBlobContainerClient(PreexistingContainerName()) - .GetBlobClient(PreexistingObjectName()) - .GetProperties() - .Value; +TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObjectWithNestedStructure) { + this->TestGetFileInfoObjectWithNestedStructure(); +} - AssertFileInfo(fs_.get(), PreexistingObjectPath(), FileType::File, - std::chrono::system_clock::time_point(object_properties.LastModified), - static_cast(object_properties.BlobSize)); +TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirSuccessContainerAndDirectory) { + this->TestCreateDirSuccessContainerAndDirectory(); +} + +TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessContainerOnly) { + this->TestCreateDirRecursiveSuccessContainerOnly(); +} + +TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessDirectoryOnly) { + this->TestCreateDirRecursiveSuccessDirectoryOnly(); +} + +TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessContainerAndDirectory) { + this->TestCreateDirRecursiveSuccessContainerAndDirectory(); +} + +// Tests using a real storage account *with Hierarchical Namespace enabled* + +TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirFailureNonexistent) { + auto data = SetUpPreexistingData(); + const auto path = data.RandomDirectoryPath(rng_); + ASSERT_RAISES(IOError, fs_->DeleteDir(path)); +} + +TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirSuccessHaveBlob) { + auto data = SetUpPreexistingData(); + const auto directory_path = data.RandomDirectoryPath(rng_); + const auto blob_path = ConcatAbstractPath(directory_path, "hello.txt"); + ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path)); + ASSERT_OK(output->Write(std::string_view("hello"))); + ASSERT_OK(output->Close()); + arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::File); + ASSERT_OK(fs_->DeleteDir(directory_path)); + arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound); +} + +TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirSuccessHaveDirectory) { + auto data = SetUpPreexistingData(); + const auto parent = data.RandomDirectoryPath(rng_); + const auto path = ConcatAbstractPath(parent, "new-sub"); + ASSERT_OK(fs_->CreateDir(path, true)); + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); + ASSERT_OK(fs_->DeleteDir(parent)); + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); +} + +TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsSuccessExist) { + auto preexisting_data = SetUpPreexistingData(); + HierarchicalPaths paths; + CreateHierarchicalData(&paths); + ASSERT_OK(fs_->DeleteDirContents(paths.directory)); + arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::Directory); + for (const auto& sub_path : paths.sub_paths) { + arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound); + } +} + +TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsSuccessNonexistent) { + this->TestDeleteDirContentsSuccessNonexistent(); +} + +TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsFailureNonexistent) { + this->TestDeleteDirContentsFailureNonexistent(); +} + +// Tests using Azurite (the local Azure emulator) + +TEST_F(TestAzuriteFileSystem, DetectHierarchicalNamespaceFailsWithMissingContainer) { + auto hierarchical_namespace = internal::HierarchicalNamespaceDetector(); + ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get())); + ASSERT_RAISES(IOError, hierarchical_namespace.Enabled("nonexistent-container")); +} + +TEST_F(TestAzuriteFileSystem, GetFileInfoAccount) { + AssertFileInfo(fs_.get(), "", FileType::Directory); // URI - ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + PreexistingObjectName())); + ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://")); } -TEST_F(AzuriteFileSystemTest, GetFileInfoObject) { RunGetFileInfoObjectTest(); } +TEST_F(TestAzuriteFileSystem, GetFileInfoContainer) { + auto data = SetUpPreexistingData(); + AssertFileInfo(fs_.get(), data.container_name, FileType::Directory); -TEST_F(AzureHierarchicalNamespaceFileSystemTest, GetFileInfoObject) { - RunGetFileInfoObjectTest(); + AssertFileInfo(fs_.get(), "nonexistent-container", FileType::NotFound); + + // URI + ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + data.container_name)); } -TEST_F(AzuriteFileSystemTest, GetFileInfoSelector) { +TEST_F(TestAzuriteFileSystem, GetFileInfoSelector) { SetUpSmallFileSystemTree(); FileSelector select; @@ -581,11 +840,10 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelector) { // Root dir select.base_dir = ""; ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select)); - ASSERT_EQ(infos.size(), 3); + ASSERT_EQ(infos.size(), 2); ASSERT_EQ(infos, SortedInfos(infos)); AssertFileInfo(infos[0], "container", FileType::Directory); AssertFileInfo(infos[1], "empty-container", FileType::Directory); - AssertFileInfo(infos[2], container_name_, FileType::Directory); // Empty container select.base_dir = "empty-container"; @@ -641,7 +899,7 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelector) { ASSERT_EQ(infos.size(), 4); } -TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorRecursive) { +TEST_F(TestAzuriteFileSystem, GetFileInfoSelectorRecursive) { SetUpSmallFileSystemTree(); FileSelector select; @@ -651,7 +909,7 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorRecursive) { // Root dir select.base_dir = ""; ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select)); - ASSERT_EQ(infos.size(), 14); + ASSERT_EQ(infos.size(), 12); ASSERT_EQ(infos, SortedInfos(infos)); AssertInfoAllContainersRecursive(infos); @@ -699,7 +957,7 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorRecursive) { AssertFileInfo(infos[3], "container/otherdir/1/2/3/otherfile", FileType::File, 10); } -TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorExplicitImplicitDirDedup) { +TEST_F(TestAzuriteFileSystem, GetFileInfoSelectorExplicitImplicitDirDedup) { { auto container = CreateContainer("container"); CreateBlob(container, "mydir/emptydir1/"); @@ -746,137 +1004,60 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorExplicitImplicitDirDedup) { AssertFileInfo(infos[0], "container/mydir/nonemptydir2/somefile", FileType::File); } -TEST_F(AzuriteFileSystemTest, CreateDirFailureNoContainer) { +TEST_F(TestAzuriteFileSystem, CreateDirFailureNoContainer) { ASSERT_RAISES(Invalid, fs_->CreateDir("", false)); } -TEST_F(AzuriteFileSystemTest, CreateDirSuccessContainerOnly) { - auto container_name = RandomContainerName(); +TEST_F(TestAzuriteFileSystem, CreateDirSuccessContainerOnly) { + auto container_name = PreexistingData::RandomContainerName(rng_); ASSERT_OK(fs_->CreateDir(container_name, false)); arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); } -TEST_F(AzuriteFileSystemTest, CreateDirSuccessContainerAndDirectory) { - const auto path = PreexistingContainerPath() + RandomDirectoryName(); - ASSERT_OK(fs_->CreateDir(path, false)); - // There is only virtual directory without hierarchical namespace - // support. So the CreateDir() does nothing. - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, CreateDirSuccessContainerAndDirectory) { - const auto path = PreexistingContainerPath() + RandomDirectoryName(); - ASSERT_OK(fs_->CreateDir(path, false)); - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); -} - -TEST_F(AzuriteFileSystemTest, CreateDirFailureDirectoryWithMissingContainer) { +TEST_F(TestAzuriteFileSystem, CreateDirFailureDirectoryWithMissingContainer) { const auto path = std::string("not-a-container/new-directory"); ASSERT_RAISES(IOError, fs_->CreateDir(path, false)); } -TEST_F(AzuriteFileSystemTest, CreateDirRecursiveFailureNoContainer) { +TEST_F(TestAzuriteFileSystem, CreateDirRecursiveFailureNoContainer) { ASSERT_RAISES(Invalid, fs_->CreateDir("", true)); } -TEST_F(AzureHierarchicalNamespaceFileSystemTest, CreateDirRecursiveSuccessContainerOnly) { - auto container_name = RandomContainerName(); - ASSERT_OK(fs_->CreateDir(container_name, true)); - arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); -} - -TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessContainerOnly) { - auto container_name = RandomContainerName(); - ASSERT_OK(fs_->CreateDir(container_name, true)); - arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, CreateDirRecursiveSuccessDirectoryOnly) { - const auto parent = PreexistingContainerPath() + RandomDirectoryName(); - const auto path = internal::ConcatAbstractPath(parent, "new-sub"); - ASSERT_OK(fs_->CreateDir(path, true)); - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); -} - -TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessDirectoryOnly) { - const auto parent = PreexistingContainerPath() + RandomDirectoryName(); - const auto path = internal::ConcatAbstractPath(parent, "new-sub"); - ASSERT_OK(fs_->CreateDir(path, true)); - // There is only virtual directory without hierarchical namespace - // support. So the CreateDir() does nothing. - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); - arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, - CreateDirRecursiveSuccessContainerAndDirectory) { - auto container_name = RandomContainerName(); - const auto parent = internal::ConcatAbstractPath(container_name, RandomDirectoryName()); - const auto path = internal::ConcatAbstractPath(parent, "new-sub"); - ASSERT_OK(fs_->CreateDir(path, true)); - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); +TEST_F(TestAzuriteFileSystem, CreateDirUri) { + ASSERT_RAISES( + Invalid, + fs_->CreateDir("abfs://" + PreexistingData::RandomContainerName(rng_), true)); } -TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessContainerAndDirectory) { - auto container_name = RandomContainerName(); - const auto parent = internal::ConcatAbstractPath(container_name, RandomDirectoryName()); - const auto path = internal::ConcatAbstractPath(parent, "new-sub"); - ASSERT_OK(fs_->CreateDir(path, true)); - // There is only virtual directory without hierarchical namespace - // support. So the CreateDir() does nothing. - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); - arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); - arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); -} - -TEST_F(AzuriteFileSystemTest, CreateDirUri) { - ASSERT_RAISES(Invalid, fs_->CreateDir("abfs://" + RandomContainerName(), true)); -} - -TEST_F(AzuriteFileSystemTest, DeleteDirSuccessContainer) { - const auto container_name = RandomContainerName(); +TEST_F(TestAzuriteFileSystem, DeleteDirSuccessContainer) { + const auto container_name = PreexistingData::RandomContainerName(rng_); ASSERT_OK(fs_->CreateDir(container_name)); arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); ASSERT_OK(fs_->DeleteDir(container_name)); arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::NotFound); } -TEST_F(AzuriteFileSystemTest, DeleteDirSuccessEmpty) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - // There is only virtual directory without hierarchical namespace - // support. So the CreateDir() and DeleteDir() do nothing. - ASSERT_OK(fs_->CreateDir(directory_path)); - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); - ASSERT_OK(fs_->DeleteDir(directory_path)); - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); -} - -TEST_F(AzuriteFileSystemTest, DeleteDirSuccessNonexistent) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); +TEST_F(TestAzuriteFileSystem, DeleteDirSuccessNonexistent) { + auto data = SetUpPreexistingData(); + const auto directory_path = data.RandomDirectoryPath(rng_); // There is only virtual directory without hierarchical namespace // support. So the DeleteDir() for nonexistent directory does nothing. ASSERT_OK(fs_->DeleteDir(directory_path)); arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); } -TEST_F(AzuriteFileSystemTest, DeleteDirSuccessHaveBlobs) { +TEST_F(TestAzuriteFileSystem, DeleteDirSuccessHaveBlobs) { #ifdef __APPLE__ GTEST_SKIP() << "This test fails by an Azurite problem: " "https://github.com/Azure/Azurite/pull/2302"; #endif - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + auto data = SetUpPreexistingData(); + const auto directory_path = data.RandomDirectoryPath(rng_); // We must use 257 or more blobs here to test pagination of ListBlobs(). // Because we can't add 257 or more delete blob requests to one SubmitBatch(). int64_t n_blobs = 257; for (int64_t i = 0; i < n_blobs; ++i) { - const auto blob_path = - internal::ConcatAbstractPath(directory_path, std::to_string(i) + ".txt"); + const auto blob_path = ConcatAbstractPath(directory_path, std::to_string(i) + ".txt"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path)); ASSERT_OK(output->Write(std::string_view(std::to_string(i)))); ASSERT_OK(output->Close()); @@ -884,62 +1065,24 @@ TEST_F(AzuriteFileSystemTest, DeleteDirSuccessHaveBlobs) { } ASSERT_OK(fs_->DeleteDir(directory_path)); for (int64_t i = 0; i < n_blobs; ++i) { - const auto blob_path = - internal::ConcatAbstractPath(directory_path, std::to_string(i) + ".txt"); + const auto blob_path = ConcatAbstractPath(directory_path, std::to_string(i) + ".txt"); arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound); } } -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirSuccessEmpty) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - ASSERT_OK(fs_->CreateDir(directory_path, true)); - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory); - ASSERT_OK(fs_->DeleteDir(directory_path)); - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirFailureNonexistent) { - const auto path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - ASSERT_RAISES(IOError, fs_->DeleteDir(path)); +TEST_F(TestAzuriteFileSystem, DeleteDirUri) { + auto data = SetUpPreexistingData(); + ASSERT_RAISES(Invalid, fs_->DeleteDir("abfs://" + data.container_name + "/")); } -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirSuccessHaveBlob) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - const auto blob_path = internal::ConcatAbstractPath(directory_path, "hello.txt"); - ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path)); - ASSERT_OK(output->Write(std::string_view("hello"))); - ASSERT_OK(output->Close()); - arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::File); - ASSERT_OK(fs_->DeleteDir(directory_path)); - arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound); -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirSuccessHaveDirectory) { - const auto parent = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - const auto path = internal::ConcatAbstractPath(parent, "new-sub"); - ASSERT_OK(fs_->CreateDir(path, true)); - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); - ASSERT_OK(fs_->DeleteDir(parent)); - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); - arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); -} - -TEST_F(AzuriteFileSystemTest, DeleteDirUri) { - ASSERT_RAISES(Invalid, fs_->DeleteDir("abfs://" + PreexistingContainerPath())); -} - -TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessContainer) { +TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessContainer) { #ifdef __APPLE__ GTEST_SKIP() << "This test fails by an Azurite problem: " "https://github.com/Azure/Azurite/pull/2302"; #endif + auto data = SetUpPreexistingData(); HierarchicalPaths paths; - CreateHierarchicalData(paths); + CreateHierarchicalData(&paths); ASSERT_OK(fs_->DeleteDirContents(paths.container)); arrow::fs::AssertFileInfo(fs_.get(), paths.container, FileType::Directory); arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::NotFound); @@ -948,13 +1091,14 @@ TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessContainer) { } } -TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessDirectory) { +TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessDirectory) { #ifdef __APPLE__ GTEST_SKIP() << "This test fails by an Azurite problem: " "https://github.com/Azure/Azurite/pull/2302"; #endif + auto data = SetUpPreexistingData(); HierarchicalPaths paths; - CreateHierarchicalData(paths); + CreateHierarchicalData(&paths); ASSERT_OK(fs_->DeleteDirContents(paths.directory)); // GH-38772: We may change this to FileType::Directory. arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::NotFound); @@ -963,98 +1107,72 @@ TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessDirectory) { } } -TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessNonexistent) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - ASSERT_OK(fs_->DeleteDirContents(directory_path, true)); - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); -} - -TEST_F(AzuriteFileSystemTest, DeleteDirContentsFailureNonexistent) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false)); -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirContentsSuccessExist) { - HierarchicalPaths paths; - CreateHierarchicalData(paths); - ASSERT_OK(fs_->DeleteDirContents(paths.directory)); - arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::Directory); - for (const auto& sub_path : paths.sub_paths) { - arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound); - } -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirContentsSuccessNonexistent) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - ASSERT_OK(fs_->DeleteDirContents(directory_path, true)); - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); +TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessNonexistent) { + this->TestDeleteDirContentsSuccessNonexistent(); } -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirContentsFailureNonexistent) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false)); +TEST_F(TestAzuriteFileSystem, DeleteDirContentsFailureNonexistent) { + this->TestDeleteDirContentsFailureNonexistent(); } -TEST_F(AzuriteFileSystemTest, CopyFileSuccessDestinationNonexistent) { - const auto destination_path = - internal::ConcatAbstractPath(PreexistingContainerName(), "copy-destionation"); - ASSERT_OK(fs_->CopyFile(PreexistingObjectPath(), destination_path)); +TEST_F(TestAzuriteFileSystem, CopyFileSuccessDestinationNonexistent) { + auto data = SetUpPreexistingData(); + const auto destination_path = data.ContainerPath("copy-destionation"); + ASSERT_OK(fs_->CopyFile(data.ObjectPath(), destination_path)); ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(destination_path)); ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(info)); ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024)); - EXPECT_EQ(kLoremIpsum, buffer->ToString()); + EXPECT_EQ(PreexistingData::kLoremIpsum, buffer->ToString()); } -TEST_F(AzuriteFileSystemTest, CopyFileSuccessDestinationSame) { - ASSERT_OK(fs_->CopyFile(PreexistingObjectPath(), PreexistingObjectPath())); - ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingObjectPath())); +TEST_F(TestAzuriteFileSystem, CopyFileSuccessDestinationSame) { + auto data = SetUpPreexistingData(); + ASSERT_OK(fs_->CopyFile(data.ObjectPath(), data.ObjectPath())); + ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath())); ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(info)); ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024)); - EXPECT_EQ(kLoremIpsum, buffer->ToString()); + EXPECT_EQ(PreexistingData::kLoremIpsum, buffer->ToString()); } -TEST_F(AzuriteFileSystemTest, CopyFileFailureDestinationTrailingSlash) { - ASSERT_RAISES(IOError, - fs_->CopyFile(PreexistingObjectPath(), - internal::EnsureTrailingSlash(PreexistingObjectPath()))); +TEST_F(TestAzuriteFileSystem, CopyFileFailureDestinationTrailingSlash) { + auto data = SetUpPreexistingData(); + ASSERT_RAISES(IOError, fs_->CopyFile(data.ObjectPath(), + internal::EnsureTrailingSlash(data.ObjectPath()))); } -TEST_F(AzuriteFileSystemTest, CopyFileFailureSourceNonexistent) { - const auto destination_path = - internal::ConcatAbstractPath(PreexistingContainerName(), "copy-destionation"); - ASSERT_RAISES(IOError, fs_->CopyFile(NotFoundObjectPath(), destination_path)); +TEST_F(TestAzuriteFileSystem, CopyFileFailureSourceNonexistent) { + auto data = SetUpPreexistingData(); + const auto destination_path = data.ContainerPath("copy-destionation"); + ASSERT_RAISES(IOError, fs_->CopyFile(data.NotFoundObjectPath(), destination_path)); } -TEST_F(AzuriteFileSystemTest, CopyFileFailureDestinationParentNonexistent) { +TEST_F(TestAzuriteFileSystem, CopyFileFailureDestinationParentNonexistent) { + auto data = SetUpPreexistingData(); const auto destination_path = - internal::ConcatAbstractPath(RandomContainerName(), "copy-destionation"); - ASSERT_RAISES(IOError, fs_->CopyFile(PreexistingObjectPath(), destination_path)); + ConcatAbstractPath(PreexistingData::RandomContainerName(rng_), "copy-destionation"); + ASSERT_RAISES(IOError, fs_->CopyFile(data.ObjectPath(), destination_path)); } -TEST_F(AzuriteFileSystemTest, CopyFileUri) { - const auto destination_path = - internal::ConcatAbstractPath(PreexistingContainerName(), "copy-destionation"); - ASSERT_RAISES(Invalid, - fs_->CopyFile("abfs://" + PreexistingObjectPath(), destination_path)); - ASSERT_RAISES(Invalid, - fs_->CopyFile(PreexistingObjectPath(), "abfs://" + destination_path)); +TEST_F(TestAzuriteFileSystem, CopyFileUri) { + auto data = SetUpPreexistingData(); + const auto destination_path = data.ContainerPath("copy-destionation"); + ASSERT_RAISES(Invalid, fs_->CopyFile("abfs://" + data.ObjectPath(), destination_path)); + ASSERT_RAISES(Invalid, fs_->CopyFile(data.ObjectPath(), "abfs://" + destination_path)); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamString) { +TEST_F(TestAzuriteFileSystem, OpenInputStreamString) { + auto data = SetUpPreexistingData(); std::shared_ptr stream; - ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(PreexistingObjectPath())); + ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath())); ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024)); - EXPECT_EQ(buffer->ToString(), kLoremIpsum); + EXPECT_EQ(buffer->ToString(), PreexistingData::kLoremIpsum); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamStringBuffers) { +TEST_F(TestAzuriteFileSystem, OpenInputStreamStringBuffers) { + auto data = SetUpPreexistingData(); std::shared_ptr stream; - ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(PreexistingObjectPath())); + ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath())); std::string contents; std::shared_ptr buffer; @@ -1063,23 +1181,25 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamStringBuffers) { contents.append(buffer->ToString()); } while (buffer && buffer->size() != 0); - EXPECT_EQ(contents, kLoremIpsum); + EXPECT_EQ(contents, PreexistingData::kLoremIpsum); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamInfo) { - ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingObjectPath())); +TEST_F(TestAzuriteFileSystem, OpenInputStreamInfo) { + auto data = SetUpPreexistingData(); + ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath())); std::shared_ptr stream; ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(info)); ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024)); - EXPECT_EQ(buffer->ToString(), kLoremIpsum); + EXPECT_EQ(buffer->ToString(), PreexistingData::kLoremIpsum); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamEmpty) { +TEST_F(TestAzuriteFileSystem, OpenInputStreamEmpty) { + auto data = SetUpPreexistingData(); const auto path_to_file = "empty-object.txt"; - const auto path = PreexistingContainerPath() + path_to_file; - blob_service_client_->GetBlobContainerClient(PreexistingContainerName()) + const auto path = data.ContainerPath(path_to_file); + blob_service_client_->GetBlobContainerClient(data.container_name) .GetBlockBlobClient(path_to_file) .UploadFrom(nullptr, 0); @@ -1090,24 +1210,28 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamEmpty) { EXPECT_EQ(size, 0); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamNotFound) { - ASSERT_RAISES(IOError, fs_->OpenInputStream(NotFoundObjectPath())); +TEST_F(TestAzuriteFileSystem, OpenInputStreamNotFound) { + auto data = SetUpPreexistingData(); + ASSERT_RAISES(IOError, fs_->OpenInputStream(data.NotFoundObjectPath())); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamInfoInvalid) { - ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingContainerPath())); +TEST_F(TestAzuriteFileSystem, OpenInputStreamInfoInvalid) { + auto data = SetUpPreexistingData(); + ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.container_name + "/")); ASSERT_RAISES(IOError, fs_->OpenInputStream(info)); - ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(NotFoundObjectPath())); + ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(data.NotFoundObjectPath())); ASSERT_RAISES(IOError, fs_->OpenInputStream(info2)); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamUri) { - ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + PreexistingObjectPath())); +TEST_F(TestAzuriteFileSystem, OpenInputStreamUri) { + auto data = SetUpPreexistingData(); + ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + data.ObjectPath())); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamTrailingSlash) { - ASSERT_RAISES(IOError, fs_->OpenInputStream(PreexistingObjectPath() + '/')); +TEST_F(TestAzuriteFileSystem, OpenInputStreamTrailingSlash) { + auto data = SetUpPreexistingData(); + ASSERT_RAISES(IOError, fs_->OpenInputStream(data.ObjectPath() + '/')); } namespace { @@ -1145,9 +1269,10 @@ std::shared_ptr NormalizerKeyValueMetadata( } }; // namespace -TEST_F(AzuriteFileSystemTest, OpenInputStreamReadMetadata) { +TEST_F(TestAzuriteFileSystem, OpenInputStreamReadMetadata) { + auto data = SetUpPreexistingData(); std::shared_ptr stream; - ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(PreexistingObjectPath())); + ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath())); std::shared_ptr actual; ASSERT_OK_AND_ASSIGN(actual, stream->ReadMetadata()); @@ -1175,8 +1300,9 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamReadMetadata) { NormalizerKeyValueMetadata(actual)->ToString()); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamClosed) { - ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(PreexistingObjectPath())); +TEST_F(TestAzuriteFileSystem, OpenInputStreamClosed) { + auto data = SetUpPreexistingData(); + ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(data.ObjectPath())); ASSERT_OK(stream->Close()); std::array buffer{}; ASSERT_RAISES(Invalid, stream->Read(buffer.size(), buffer.data())); @@ -1184,44 +1310,45 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamClosed) { ASSERT_RAISES(Invalid, stream->Tell()); } -TEST_F(AzuriteFileSystemTest, TestWriteMetadata) { +TEST_F(TestAzuriteFileSystem, WriteMetadata) { + auto data = SetUpPreexistingData(); options_.default_metadata = arrow::key_value_metadata({{"foo", "bar"}}); ASSERT_OK_AND_ASSIGN(auto fs_with_defaults, AzureFileSystem::Make(options_)); - std::string path = "object_with_defaults"; - auto location = PreexistingContainerPath() + path; + std::string blob_path = "object_with_defaults"; + auto full_path = data.ContainerPath(blob_path); ASSERT_OK_AND_ASSIGN(auto output, - fs_with_defaults->OpenOutputStream(location, /*metadata=*/{})); - const std::string_view expected(kLoremIpsum); + fs_with_defaults->OpenOutputStream(full_path, /*metadata=*/{})); + const std::string_view expected(PreexistingData::kLoremIpsum); ASSERT_OK(output->Write(expected)); ASSERT_OK(output->Close()); // Verify the metadata has been set. - auto blob_metadata = - blob_service_client_->GetBlobContainerClient(PreexistingContainerName()) - .GetBlockBlobClient(path) - .GetProperties() - .Value.Metadata; - EXPECT_EQ(Azure::Core::CaseInsensitiveMap{std::make_pair("foo", "bar")}, blob_metadata); + auto blob_metadata = blob_service_client_->GetBlobContainerClient(data.container_name) + .GetBlockBlobClient(blob_path) + .GetProperties() + .Value.Metadata; + EXPECT_EQ(Core::CaseInsensitiveMap{std::make_pair("foo", "bar")}, blob_metadata); // Check that explicit metadata overrides the defaults. ASSERT_OK_AND_ASSIGN( output, fs_with_defaults->OpenOutputStream( - location, /*metadata=*/arrow::key_value_metadata({{"bar", "foo"}}))); + full_path, /*metadata=*/arrow::key_value_metadata({{"bar", "foo"}}))); ASSERT_OK(output->Write(expected)); ASSERT_OK(output->Close()); - blob_metadata = blob_service_client_->GetBlobContainerClient(PreexistingContainerName()) - .GetBlockBlobClient(path) + blob_metadata = blob_service_client_->GetBlobContainerClient(data.container_name) + .GetBlockBlobClient(blob_path) .GetProperties() .Value.Metadata; // Defaults are overwritten and not merged. - EXPECT_EQ(Azure::Core::CaseInsensitiveMap{std::make_pair("bar", "foo")}, blob_metadata); + EXPECT_EQ(Core::CaseInsensitiveMap{std::make_pair("bar", "foo")}, blob_metadata); } -TEST_F(AzuriteFileSystemTest, OpenOutputStreamSmall) { - const auto path = PreexistingContainerPath() + "test-write-object"; +TEST_F(TestAzuriteFileSystem, OpenOutputStreamSmall) { + auto data = SetUpPreexistingData(); + const auto path = data.ContainerPath("test-write-object"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); - const std::string_view expected(kLoremIpsum); + const std::string_view expected(PreexistingData::kLoremIpsum); ASSERT_OK(output->Write(expected)); ASSERT_OK(output->Close()); @@ -1234,8 +1361,9 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamSmall) { EXPECT_EQ(expected, std::string_view(inbuf.data(), size)); } -TEST_F(AzuriteFileSystemTest, OpenOutputStreamLarge) { - const auto path = PreexistingContainerPath() + "test-write-object"; +TEST_F(TestAzuriteFileSystem, OpenOutputStreamLarge) { + auto data = SetUpPreexistingData(); + const auto path = data.ContainerPath("test-write-object"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); std::array sizes{257 * 1024, 258 * 1024, 259 * 1024}; std::array buffers{ @@ -1265,8 +1393,9 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamLarge) { EXPECT_EQ(contents, buffers[0] + buffers[1] + buffers[2]); } -TEST_F(AzuriteFileSystemTest, OpenOutputStreamTruncatesExistingFile) { - const auto path = PreexistingContainerPath() + "test-write-object"; +TEST_F(TestAzuriteFileSystem, OpenOutputStreamTruncatesExistingFile) { + auto data = SetUpPreexistingData(); + const auto path = data.ContainerPath("test-write-object"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); const std::string_view expected0("Existing blob content"); ASSERT_OK(output->Write(expected0)); @@ -1281,7 +1410,7 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamTruncatesExistingFile) { EXPECT_EQ(expected0, std::string_view(inbuf.data(), size)); ASSERT_OK_AND_ASSIGN(output, fs_->OpenOutputStream(path, {})); - const std::string_view expected1(kLoremIpsum); + const std::string_view expected1(PreexistingData::kLoremIpsum); ASSERT_OK(output->Write(expected1)); ASSERT_OK(output->Close()); @@ -1291,8 +1420,9 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamTruncatesExistingFile) { EXPECT_EQ(expected1, std::string_view(inbuf.data(), size)); } -TEST_F(AzuriteFileSystemTest, OpenAppendStreamDoesNotTruncateExistingFile) { - const auto path = PreexistingContainerPath() + "test-write-object"; +TEST_F(TestAzuriteFileSystem, OpenAppendStreamDoesNotTruncateExistingFile) { + auto data = SetUpPreexistingData(); + const auto path = data.ContainerPath("test-write-object"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); const std::string_view expected0("Existing blob content"); ASSERT_OK(output->Write(expected0)); @@ -1307,7 +1437,7 @@ TEST_F(AzuriteFileSystemTest, OpenAppendStreamDoesNotTruncateExistingFile) { EXPECT_EQ(expected0, std::string_view(inbuf.data())); ASSERT_OK_AND_ASSIGN(output, fs_->OpenAppendStream(path, {})); - const std::string_view expected1(kLoremIpsum); + const std::string_view expected1(PreexistingData::kLoremIpsum); ASSERT_OK(output->Write(expected1)); ASSERT_OK(output->Close()); @@ -1319,35 +1449,37 @@ TEST_F(AzuriteFileSystemTest, OpenAppendStreamDoesNotTruncateExistingFile) { std::string(expected0) + std::string(expected1)); } -TEST_F(AzuriteFileSystemTest, OpenOutputStreamClosed) { - const auto path = internal::ConcatAbstractPath(PreexistingContainerName(), - "open-output-stream-closed.txt"); +TEST_F(TestAzuriteFileSystem, OpenOutputStreamClosed) { + auto data = SetUpPreexistingData(); + const auto path = data.ContainerPath("open-output-stream-closed.txt"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); ASSERT_OK(output->Close()); - ASSERT_RAISES(Invalid, output->Write(kLoremIpsum, std::strlen(kLoremIpsum))); + ASSERT_RAISES(Invalid, output->Write(PreexistingData::kLoremIpsum, + std::strlen(PreexistingData::kLoremIpsum))); ASSERT_RAISES(Invalid, output->Flush()); ASSERT_RAISES(Invalid, output->Tell()); } -TEST_F(AzuriteFileSystemTest, OpenOutputStreamUri) { - const auto path = internal::ConcatAbstractPath(PreexistingContainerName(), - "open-output-stream-uri.txt"); +TEST_F(TestAzuriteFileSystem, OpenOutputStreamUri) { + auto data = SetUpPreexistingData(); + const auto path = data.ContainerPath("open-output-stream-uri.txt"); ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + path)); } -TEST_F(AzuriteFileSystemTest, OpenInputFileMixedReadVsReadAt) { +TEST_F(TestAzuriteFileSystem, OpenInputFileMixedReadVsReadAt) { + auto data = SetUpPreexistingData(); // Create a file large enough to make the random access tests non-trivial. auto constexpr kLineWidth = 100; auto constexpr kLineCount = 4096; std::vector lines(kLineCount); int lineno = 0; - std::generate_n(lines.begin(), lines.size(), - [&] { return RandomLine(++lineno, kLineWidth); }); + std::generate_n(lines.begin(), lines.size(), [&] { + return PreexistingData::RandomLine(++lineno, kLineWidth, rng_); + }); - const auto path_to_file = "OpenInputFileMixedReadVsReadAt/object-name"; - const auto path = PreexistingContainerPath() + path_to_file; + const auto path = data.ContainerPath("OpenInputFileMixedReadVsReadAt/object-name"); - UploadLines(lines, path_to_file, kLineCount * kLineWidth); + UploadLines(lines, path, kLineCount * kLineWidth); std::shared_ptr file; ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(path)); @@ -1368,7 +1500,7 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileMixedReadVsReadAt) { } // Verify random reads interleave too. - auto const index = RandomIndex(kLineCount); + auto const index = PreexistingData::RandomIndex(kLineCount, rng_); auto const position = index * kLineWidth; ASSERT_OK_AND_ASSIGN(size, file->ReadAt(position, buffer.size(), buffer.data())); EXPECT_EQ(size, kLineWidth); @@ -1381,27 +1513,28 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileMixedReadVsReadAt) { } } -TEST_F(AzuriteFileSystemTest, OpenInputFileRandomSeek) { +TEST_F(TestAzuriteFileSystem, OpenInputFileRandomSeek) { + auto data = SetUpPreexistingData(); // Create a file large enough to make the random access tests non-trivial. auto constexpr kLineWidth = 100; auto constexpr kLineCount = 4096; std::vector lines(kLineCount); int lineno = 0; - std::generate_n(lines.begin(), lines.size(), - [&] { return RandomLine(++lineno, kLineWidth); }); + std::generate_n(lines.begin(), lines.size(), [&] { + return PreexistingData::RandomLine(++lineno, kLineWidth, rng_); + }); - const auto path_to_file = "OpenInputFileRandomSeek/object-name"; - const auto path = PreexistingContainerPath() + path_to_file; + const auto path = data.ContainerPath("OpenInputFileRandomSeek/object-name"); std::shared_ptr output; - UploadLines(lines, path_to_file, kLineCount * kLineWidth); + UploadLines(lines, path, kLineCount * kLineWidth); std::shared_ptr file; ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(path)); for (int i = 0; i != 32; ++i) { SCOPED_TRACE("Iteration " + std::to_string(i)); // Verify sequential reads work as expected. - auto const index = RandomIndex(kLineCount); + auto const index = PreexistingData::RandomIndex(kLineCount, rng_); auto const position = index * kLineWidth; ASSERT_OK(file->Seek(position)); ASSERT_OK_AND_ASSIGN(auto actual, file->Read(kLineWidth)); @@ -1409,15 +1542,15 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileRandomSeek) { } } -TEST_F(AzuriteFileSystemTest, OpenInputFileIoContext) { +TEST_F(TestAzuriteFileSystem, OpenInputFileIoContext) { + auto data = SetUpPreexistingData(); // Create a test file. - const auto path_to_file = "OpenInputFileIoContext/object-name"; - const auto path = PreexistingContainerPath() + path_to_file; + const auto blob_path = "OpenInputFileIoContext/object-name"; + const auto path = data.ContainerPath(blob_path); const std::string contents = "The quick brown fox jumps over the lazy dog"; - auto blob_client = - blob_service_client_->GetBlobContainerClient(PreexistingContainerName()) - .GetBlockBlobClient(path_to_file); + auto blob_client = blob_service_client_->GetBlobContainerClient(data.container_name) + .GetBlockBlobClient(blob_path); blob_client.UploadFrom(reinterpret_cast(contents.data()), contents.length()); @@ -1426,8 +1559,9 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileIoContext) { EXPECT_EQ(fs_->io_context().external_id(), file->io_context().external_id()); } -TEST_F(AzuriteFileSystemTest, OpenInputFileInfo) { - ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingObjectPath())); +TEST_F(TestAzuriteFileSystem, OpenInputFileInfo) { + auto data = SetUpPreexistingData(); + ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath())); std::shared_ptr file; ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(info)); @@ -1437,24 +1571,27 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileInfo) { auto constexpr kStart = 16; ASSERT_OK_AND_ASSIGN(size, file->ReadAt(kStart, buffer.size(), buffer.data())); - auto const expected = std::string(kLoremIpsum).substr(kStart); + auto const expected = std::string(PreexistingData::kLoremIpsum).substr(kStart); EXPECT_EQ(std::string(buffer.data(), size), expected); } -TEST_F(AzuriteFileSystemTest, OpenInputFileNotFound) { - ASSERT_RAISES(IOError, fs_->OpenInputFile(NotFoundObjectPath())); +TEST_F(TestAzuriteFileSystem, OpenInputFileNotFound) { + auto data = SetUpPreexistingData(); + ASSERT_RAISES(IOError, fs_->OpenInputFile(data.NotFoundObjectPath())); } -TEST_F(AzuriteFileSystemTest, OpenInputFileInfoInvalid) { - ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingContainerPath())); +TEST_F(TestAzuriteFileSystem, OpenInputFileInfoInvalid) { + auto data = SetUpPreexistingData(); + ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.container_name)); ASSERT_RAISES(IOError, fs_->OpenInputFile(info)); - ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(NotFoundObjectPath())); + ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(data.NotFoundObjectPath())); ASSERT_RAISES(IOError, fs_->OpenInputFile(info2)); } -TEST_F(AzuriteFileSystemTest, OpenInputFileClosed) { - ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputFile(PreexistingObjectPath())); +TEST_F(TestAzuriteFileSystem, OpenInputFileClosed) { + auto data = SetUpPreexistingData(); + ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputFile(data.ObjectPath())); ASSERT_OK(stream->Close()); std::array buffer{}; ASSERT_RAISES(Invalid, stream->Tell());