diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 824a8fb531483..217885364089b 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -33,42 +33,85 @@ #include "arrow/util/logging.h" #include "arrow/util/string.h" -namespace arrow { -namespace fs { +namespace arrow::fs { + +namespace Blobs = Azure::Storage::Blobs; +namespace Core = Azure::Core; +namespace DataLake = Azure::Storage::Files::DataLake; +namespace Http = Azure::Core::Http; +namespace Storage = Azure::Storage; // ----------------------------------------------------------------------- // AzureOptions Implementation AzureOptions::AzureOptions() = default; +AzureOptions::~AzureOptions() = default; + bool AzureOptions::Equals(const AzureOptions& other) const { - return (account_dfs_url == other.account_dfs_url && - account_blob_url == other.account_blob_url && - credentials_kind == other.credentials_kind && - default_metadata == other.default_metadata); + // TODO(GH-38598): update here when more auth methods are added. + const bool equals = backend == other.backend && + default_metadata == other.default_metadata && + account_blob_url_ == other.account_blob_url_ && + account_dfs_url_ == other.account_dfs_url_ && + credential_kind_ == other.credential_kind_; + if (!equals) { + return false; + } + switch (credential_kind_) { + case CredentialKind::kAnonymous: + return true; + case CredentialKind::kStorageSharedKeyCredential: + return storage_shared_key_credential_->AccountName == + other.storage_shared_key_credential_->AccountName; + } + DCHECK(false); + return false; } -Status AzureOptions::ConfigureAccountKeyCredentials(const std::string& account_name, - const std::string& account_key) { - if (this->backend == AzureBackend::Azurite) { - account_blob_url = "http://127.0.0.1:10000/" + account_name + "/"; - account_dfs_url = "http://127.0.0.1:10000/" + account_name + "/"; +Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_name, + const std::string& account_key) { + if (this->backend == AzureBackend::kAzurite) { + account_blob_url_ = "http://127.0.0.1:10000/" + account_name + "/"; + account_dfs_url_ = "http://127.0.0.1:10000/" + account_name + "/"; } else { - account_dfs_url = "https://" + account_name + ".dfs.core.windows.net/"; - account_blob_url = "https://" + account_name + ".blob.core.windows.net/"; + account_dfs_url_ = "https://" + account_name + ".dfs.core.windows.net/"; + account_blob_url_ = "https://" + account_name + ".blob.core.windows.net/"; } - storage_credentials_provider = - std::make_shared(account_name, - account_key); - credentials_kind = AzureCredentialsKind::StorageCredentials; + credential_kind_ = CredentialKind::kStorageSharedKeyCredential; + storage_shared_key_credential_ = + std::make_shared(account_name, account_key); return Status::OK(); } +Result> AzureOptions::MakeBlobServiceClient() + const { + switch (credential_kind_) { + case CredentialKind::kAnonymous: + break; + case CredentialKind::kStorageSharedKeyCredential: + return std::make_unique(account_blob_url_, + storage_shared_key_credential_); + } + return Status::Invalid("AzureOptions doesn't contain a valid auth configuration"); +} + +Result> +AzureOptions::MakeDataLakeServiceClient() const { + switch (credential_kind_) { + case CredentialKind::kAnonymous: + break; + case CredentialKind::kStorageSharedKeyCredential: + return std::make_unique( + account_dfs_url_, storage_shared_key_credential_); + } + return Status::Invalid("AzureOptions doesn't contain a valid auth configuration"); +} + namespace { -// An AzureFileSystem represents a single Azure storage -// account. AzureLocation describes a container and path within -// that storage account. +// An AzureFileSystem represents an Azure storage account. An AzureLocation describes a +// container in that storage account and a path within that container. struct AzureLocation { std::string all; std::string container; @@ -82,8 +125,8 @@ struct AzureLocation { // path_parts = [testdir, testfile.txt] if (internal::IsLikelyUri(string)) { return Status::Invalid( - "Expected an Azure object location of the form 'container/path...', got a URI: " - "'", + "Expected an Azure object location of the form 'container/path...'," + " got a URI: '", string, "'"); } auto first_sep = string.find_first_of(internal::kSep); @@ -130,14 +173,15 @@ struct AzureLocation { private: Status Validate() { auto status = internal::ValidateAbstractPathParts(path_parts); - if (!status.ok()) { - return Status::Invalid(status.message(), " in location ", all); - } else { - return status; - } + return status.ok() ? status : Status::Invalid(status.message(), " in location ", all); } }; +Status ExceptionToStatus(const std::string& prefix, + const Azure::Storage::StorageException& exception) { + return Status::IOError(prefix, " Azure Error: ", exception.what()); +} + Status PathNotFound(const AzureLocation& location) { return ::arrow::fs::internal::PathNotFound(location.all); } @@ -153,23 +197,41 @@ Status ValidateFileLocation(const AzureLocation& location) { if (location.path.empty()) { return NotAFile(location); } - ARROW_RETURN_NOT_OK(internal::AssertNoTrailingSlash(location.path)); - return Status::OK(); + return internal::AssertNoTrailingSlash(location.path); +} + +std::string_view BodyTextView(const Http::RawResponse& raw_response) { + const auto& body = raw_response.GetBody(); +#ifndef NDEBUG + auto& headers = raw_response.GetHeaders(); + auto content_type = headers.find("Content-Type"); + if (content_type != headers.end()) { + DCHECK_EQ(std::string_view{content_type->second}.substr(5), "text/"); + } +#endif + return std::string_view{reinterpret_cast(body.data()), body.size()}; } Status StatusFromErrorResponse(const std::string& url, - Azure::Core::Http::RawResponse* raw_response, + const Http::RawResponse& raw_response, const std::string& context) { - const auto& body = raw_response->GetBody(); // There isn't an Azure specification that response body on error // doesn't contain any binary data but we assume it. We hope that // error response body has useful information for the error. - std::string_view body_text(reinterpret_cast(body.data()), body.size()); - return Status::IOError(context, ": ", url, ": ", raw_response->GetReasonPhrase(), " (", - static_cast(raw_response->GetStatusCode()), + auto body_text = BodyTextView(raw_response); + return Status::IOError(context, ": ", url, ": ", raw_response.GetReasonPhrase(), " (", + static_cast(raw_response.GetStatusCode()), "): ", body_text); } +bool IsContainerNotFound(const Storage::StorageException& exception) { + if (exception.ErrorCode == "ContainerNotFound") { + DCHECK_EQ(exception.StatusCode, Http::HttpStatusCode::NotFound); + return true; + } + return false; +} + template std::string FormatValue(typename TypeTraits::CType value) { struct StringAppender { @@ -185,7 +247,7 @@ std::string FormatValue(typename TypeTraits::CType value) { } std::shared_ptr PropertiesToMetadata( - const Azure::Storage::Blobs::Models::BlobProperties& properties) { + const Blobs::Models::BlobProperties& properties) { auto metadata = std::make_shared(); // Not supported yet: // * properties.ObjectReplicationSourceProperties @@ -316,7 +378,7 @@ std::shared_ptr PropertiesToMetadata( class ObjectInputFile final : public io::RandomAccessFile { public: - ObjectInputFile(std::shared_ptr blob_client, + ObjectInputFile(std::shared_ptr blob_client, const io::IOContext& io_context, AzureLocation location, int64_t size = kNoSize) : blob_client_(std::move(blob_client)), @@ -334,11 +396,11 @@ class ObjectInputFile final : public io::RandomAccessFile { content_length_ = properties.Value.BlobSize; metadata_ = PropertiesToMetadata(properties.Value); return Status::OK(); - } catch (const Azure::Storage::StorageException& exception) { - if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) { + } catch (const Storage::StorageException& exception) { + if (exception.StatusCode == Http::HttpStatusCode::NotFound) { return PathNotFound(location_); } - return internal::ExceptionToStatus( + return ExceptionToStatus( "GetProperties failed for '" + blob_client_->GetUrl() + "' with an unexpected Azure error. Cannot initialise an ObjectInputFile " "without knowing the file size.", @@ -411,20 +473,20 @@ class ObjectInputFile final : public io::RandomAccessFile { } // Read the desired range of bytes - Azure::Core::Http::HttpRange range{position, nbytes}; - Azure::Storage::Blobs::DownloadBlobToOptions download_options; + Http::HttpRange range{position, nbytes}; + Storage::Blobs::DownloadBlobToOptions download_options; download_options.Range = range; try { return blob_client_ ->DownloadTo(reinterpret_cast(out), nbytes, download_options) .Value.ContentRange.Length.Value(); - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus("DownloadTo from '" + blob_client_->GetUrl() + - "' at position " + std::to_string(position) + - " for " + std::to_string(nbytes) + - " bytes failed with an Azure error. ReadAt " - "failed to read the required byte range.", - exception); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus("DownloadTo from '" + blob_client_->GetUrl() + + "' at position " + std::to_string(position) + " for " + + std::to_string(nbytes) + + " bytes failed with an Azure error. ReadAt " + "failed to read the required byte range.", + exception); } } @@ -459,7 +521,7 @@ class ObjectInputFile final : public io::RandomAccessFile { } private: - std::shared_ptr blob_client_; + std::shared_ptr blob_client_; const io::IOContext io_context_; AzureLocation location_; @@ -469,12 +531,11 @@ class ObjectInputFile final : public io::RandomAccessFile { std::shared_ptr metadata_; }; -Status CreateEmptyBlockBlob( - std::shared_ptr block_blob_client) { +Status CreateEmptyBlockBlob(std::shared_ptr block_blob_client) { try { block_blob_client->UploadFrom(nullptr, 0); - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus( "UploadFrom failed for '" + block_blob_client->GetUrl() + "' with an unexpected Azure error. There is no existing blob at this " "location or the existing blob must be replaced so ObjectAppendStream must " @@ -484,12 +545,12 @@ Status CreateEmptyBlockBlob( return Status::OK(); } -Result GetBlockList( - std::shared_ptr block_blob_client) { +Result GetBlockList( + std::shared_ptr block_blob_client) { try { return block_blob_client->GetBlockList().Value; - } catch (Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( + } catch (Storage::StorageException& exception) { + return ExceptionToStatus( "GetBlockList failed for '" + block_blob_client->GetUrl() + "' with an unexpected Azure error. Cannot write to a file without first " "fetching the existing block list.", @@ -497,19 +558,19 @@ Result GetBlockList( } } -Azure::Storage::Metadata ArrowMetadataToAzureMetadata( +Storage::Metadata ArrowMetadataToAzureMetadata( const std::shared_ptr& arrow_metadata) { - Azure::Storage::Metadata azure_metadata; + Storage::Metadata azure_metadata; for (auto key_value : arrow_metadata->sorted_pairs()) { azure_metadata[key_value.first] = key_value.second; } return azure_metadata; } -Status CommitBlockList( - std::shared_ptr block_blob_client, - const std::vector& block_ids, const Azure::Storage::Metadata& metadata) { - Azure::Storage::Blobs::CommitBlockListOptions options; +Status CommitBlockList(std::shared_ptr block_blob_client, + const std::vector& block_ids, + const Storage::Metadata& metadata) { + Blobs::CommitBlockListOptions options; options.Metadata = metadata; try { // CommitBlockList puts all block_ids in the latest element. That means in the case of @@ -517,8 +578,8 @@ Status CommitBlockList( // previously committed blocks. // https://learn.microsoft.com/en-us/rest/api/storageservices/put-block-list?tabs=microsoft-entra-id#request-body block_blob_client->CommitBlockList(block_ids, options); - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus( "CommitBlockList failed for '" + block_blob_client->GetUrl() + "' with an unexpected Azure error. Committing is required to flush an " "output/append stream.", @@ -529,11 +590,10 @@ Status CommitBlockList( class ObjectAppendStream final : public io::OutputStream { public: - ObjectAppendStream( - std::shared_ptr block_blob_client, - const io::IOContext& io_context, const AzureLocation& location, - const std::shared_ptr& metadata, - const AzureOptions& options, int64_t size = kNoSize) + ObjectAppendStream(std::shared_ptr block_blob_client, + const io::IOContext& io_context, const AzureLocation& location, + const std::shared_ptr& metadata, + const AzureOptions& options, int64_t size = kNoSize) : block_blob_client_(std::move(block_blob_client)), io_context_(io_context), location_(location), @@ -560,11 +620,11 @@ class ObjectAppendStream final : public io::OutputStream { auto properties = block_blob_client_->GetProperties(); content_length_ = properties.Value.BlobSize; pos_ = content_length_; - } catch (const Azure::Storage::StorageException& exception) { - if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) { + } catch (const Storage::StorageException& exception) { + if (exception.StatusCode == Http::HttpStatusCode::NotFound) { RETURN_NOT_OK(CreateEmptyBlockBlob(block_blob_client_)); } else { - return internal::ExceptionToStatus( + return ExceptionToStatus( "GetProperties failed for '" + block_blob_client_->GetUrl() + "' with an unexpected Azure error. Cannot initialise an " "ObjectAppendStream without knowing whether a file already exists at " @@ -634,7 +694,7 @@ class ObjectAppendStream final : public io::OutputStream { std::shared_ptr owned_buffer = nullptr) { RETURN_NOT_OK(CheckClosed("append")); auto append_data = reinterpret_cast(data); - Azure::Core::IO::MemoryBodyStream block_content(append_data, nbytes); + Core::IO::MemoryBodyStream block_content(append_data, nbytes); if (block_content.Length() == 0) { return Status::OK(); } @@ -657,13 +717,13 @@ class ObjectAppendStream final : public io::OutputStream { // if the blob was previously created with one block, with id `00001-arrow` then the // next block we append will conflict with that, and cause corruption. new_block_id += "-arrow"; - new_block_id = Azure::Core::Convert::Base64Encode( + new_block_id = Core::Convert::Base64Encode( std::vector(new_block_id.begin(), new_block_id.end())); try { block_blob_client_->StageBlock(new_block_id, block_content); - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus( "StageBlock failed for '" + block_blob_client_->GetUrl() + "' new_block_id: '" + new_block_id + "' with an unexpected Azure error. Staging new blocks is fundamental to " @@ -676,7 +736,7 @@ class ObjectAppendStream final : public io::OutputStream { return Status::OK(); } - std::shared_ptr block_blob_client_; + std::shared_ptr block_blob_client_; const io::IOContext io_context_; const AzureLocation location_; @@ -684,7 +744,7 @@ class ObjectAppendStream final : public io::OutputStream { int64_t pos_ = 0; int64_t content_length_ = kNoSize; std::vector block_ids_; - Azure::Storage::Metadata metadata_; + Storage::Metadata metadata_; }; } // namespace @@ -693,27 +753,31 @@ class ObjectAppendStream final : public io::OutputStream { // AzureFilesystem Implementation class AzureFileSystem::Impl { - public: + private: io::IOContext io_context_; - std::unique_ptr - datalake_service_client_; - std::unique_ptr blob_service_client_; AzureOptions options_; - internal::HierarchicalNamespaceDetector hierarchical_namespace_; - explicit Impl(AzureOptions options, io::IOContext io_context) - : io_context_(io_context), options_(std::move(options)) {} + std::unique_ptr datalake_service_client_; + std::unique_ptr blob_service_client_; + internal::HierarchicalNamespaceDetector hns_detector_; - Status Init() { - blob_service_client_ = std::make_unique( - options_.account_blob_url, options_.storage_credentials_provider); - datalake_service_client_ = - std::make_unique( - options_.account_dfs_url, options_.storage_credentials_provider); - RETURN_NOT_OK(hierarchical_namespace_.Init(datalake_service_client_.get())); - return Status::OK(); - } + Impl(AzureOptions options, io::IOContext io_context) + : io_context_(std::move(io_context)), options_(std::move(options)) {} + public: + static Result> Make(AzureOptions options, + io::IOContext io_context) { + auto self = std::unique_ptr( + new AzureFileSystem::Impl(std::move(options), std::move(io_context))); + ARROW_ASSIGN_OR_RAISE(self->blob_service_client_, + self->options_.MakeBlobServiceClient()); + ARROW_ASSIGN_OR_RAISE(self->datalake_service_client_, + self->options_.MakeDataLakeServiceClient()); + RETURN_NOT_OK(self->hns_detector_.Init(self->datalake_service_client_.get())); + return self; + } + + io::IOContext& io_context() { return io_context_; } const AzureOptions& options() const { return options_; } public: @@ -722,12 +786,10 @@ class AzureFileSystem::Impl { info.set_path(location.all); if (location.container.empty()) { - // The location is invalid if the container is empty but not - // path. + // The location is invalid if the container is empty but the path is not. DCHECK(location.path.empty()); - // The location must refer to the root of the Azure storage - // account. This is a directory, and there isn't any extra - // metadata to fetch. + // This location must be derived from the root path. FileInfo should describe it + // as a directory and there isn't any extra metadata to fetch. info.set_type(FileType::Directory); return info; } @@ -739,20 +801,22 @@ class AzureFileSystem::Impl { auto properties = container_client.GetProperties(); info.set_type(FileType::Directory); info.set_mtime( - std::chrono::system_clock::time_point(properties.Value.LastModified)); + std::chrono::system_clock::time_point{properties.Value.LastModified}); return info; - } catch (const Azure::Storage::StorageException& exception) { - if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) { + } catch (const Storage::StorageException& exception) { + if (IsContainerNotFound(exception)) { info.set_type(FileType::NotFound); return info; } - return internal::ExceptionToStatus( + return ExceptionToStatus( "GetProperties for '" + container_client.GetUrl() + "' failed with an unexpected Azure error. GetFileInfo is unable to " "determine whether the container exists.", exception); } } + + // There is a path to search within the container. auto file_client = datalake_service_client_->GetFileSystemClient(location.container) .GetFileClient(location.path); try { @@ -763,6 +827,8 @@ class AzureFileSystem::Impl { // For a path with a trailing slash a hierarchical namespace may return a blob // with that trailing slash removed. For consistency with flat namespace and // other filesystems we chose to return NotFound. + // + // NOTE(felipecrv): could this be an empty directory marker? info.set_type(FileType::NotFound); return info; } else { @@ -770,12 +836,12 @@ class AzureFileSystem::Impl { info.set_size(properties.Value.FileSize); } info.set_mtime( - std::chrono::system_clock::time_point(properties.Value.LastModified)); + std::chrono::system_clock::time_point{properties.Value.LastModified}); return info; - } catch (const Azure::Storage::StorageException& exception) { - if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) { + } catch (const Storage::StorageException& exception) { + if (exception.StatusCode == Http::HttpStatusCode::NotFound) { ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, - hierarchical_namespace_.Enabled(location.container)); + hns_detector_.Enabled(location.container)); if (hierarchical_namespace_enabled) { // If the hierarchical namespace is enabled, then the storage account will have // explicit directories. Neither a file nor a directory was found. @@ -784,12 +850,10 @@ class AzureFileSystem::Impl { } // On flat namespace accounts there are no real directories. Directories are only // implied by using `/` in the blob name. - Azure::Storage::Blobs::ListBlobsOptions list_blob_options; - + Blobs::ListBlobsOptions list_blob_options; // If listing the prefix `path.path_to_file` with trailing slash returns at least // one result then `path` refers to an implied directory. - auto prefix = internal::EnsureTrailingSlash(location.path); - list_blob_options.Prefix = prefix; + list_blob_options.Prefix = internal::EnsureTrailingSlash(location.path); // We only need to know if there is at least one result, so minimise page size // for efficiency. list_blob_options.PageSizeHint = 1; @@ -798,21 +862,19 @@ class AzureFileSystem::Impl { auto paged_list_result = blob_service_client_->GetBlobContainerClient(location.container) .ListBlobs(list_blob_options); - if (paged_list_result.Blobs.size() > 0) { - info.set_type(FileType::Directory); - } else { - info.set_type(FileType::NotFound); - } + auto file_type = paged_list_result.Blobs.size() > 0 ? FileType::Directory + : FileType::NotFound; + info.set_type(file_type); return info; - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "ListBlobs for '" + prefix + + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus( + "ListBlobs for '" + *list_blob_options.Prefix + "' failed with an unexpected Azure error. GetFileInfo is unable to " "determine whether the path should be considered an implied directory.", exception); } } - return internal::ExceptionToStatus( + return ExceptionToStatus( "GetProperties for '" + file_client.GetUrl() + "' failed with an unexpected " "Azure error. GetFileInfo is unable to determine whether the path exists.", @@ -822,9 +884,8 @@ class AzureFileSystem::Impl { private: template - Status VisitContainers(const Azure::Core::Context& context, - OnContainer&& on_container) const { - Azure::Storage::Blobs::ListBlobContainersOptions options; + Status VisitContainers(const Core::Context& context, OnContainer&& on_container) const { + Blobs::ListBlobContainersOptions options; try { auto container_list_response = blob_service_client_->ListBlobContainers(options, context); @@ -834,14 +895,14 @@ class AzureFileSystem::Impl { RETURN_NOT_OK(on_container(container)); } } - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus("Failed to list account containers.", exception); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus("Failed to list account containers.", exception); } return Status::OK(); } - static FileInfo FileInfoFromBlob(const std::string& container, - const Azure::Storage::Blobs::Models::BlobItem& blob) { + static FileInfo FileInfoFromBlob(std::string_view container, + const Blobs::Models::BlobItem& blob) { auto path = internal::ConcatAbstractPath(container, blob.Name); if (internal::HasTrailingSlash(blob.Name)) { return DirectoryFileInfoFromPath(path); @@ -852,7 +913,7 @@ class AzureFileSystem::Impl { return info; } - static FileInfo DirectoryFileInfoFromPath(const std::string& path) { + static FileInfo DirectoryFileInfoFromPath(std::string_view path) { return FileInfo{std::string{internal::RemoveTrailingSlash(path)}, FileType::Directory}; } @@ -870,13 +931,13 @@ class AzureFileSystem::Impl { /// \pre container_client is the client for the container named like the first /// segment of select.base_dir. Status GetFileInfoWithSelectorFromContainer( - const Azure::Storage::Blobs::BlobContainerClient& container_client, - const Azure::Core::Context& context, Azure::Nullable page_size_hint, - const FileSelector& select, FileInfoVector* acc_results) { + const Blobs::BlobContainerClient& container_client, const Core::Context& context, + Azure::Nullable page_size_hint, const FileSelector& select, + FileInfoVector* acc_results) { ARROW_ASSIGN_OR_RAISE(auto base_location, AzureLocation::FromString(select.base_dir)); bool found = false; - Azure::Storage::Blobs::ListBlobsOptions options; + Blobs::ListBlobsOptions options; if (internal::IsEmptyPath(base_location.path)) { // If the base_dir is the root of the container, then we want to list all blobs in // the container and the Prefix should be empty and not even include the trailing @@ -887,7 +948,7 @@ class AzureFileSystem::Impl { options.Prefix = internal::EnsureTrailingSlash(base_location.path); } options.PageSizeHint = page_size_hint; - options.Include = Azure::Storage::Blobs::Models::ListBlobsIncludeFlags::Metadata; + options.Include = Blobs::Models::ListBlobsIncludeFlags::Metadata; auto recurse = [&](const std::string& blob_prefix) noexcept -> Status { if (select.recursive && select.max_recursion > 0) { @@ -903,15 +964,14 @@ class AzureFileSystem::Impl { return Status::OK(); }; - auto process_blob = - [&](const Azure::Storage::Blobs::Models::BlobItem& blob) noexcept { - // blob.Name has trailing slash only when Prefix is an empty - // directory marker blob for the directory we're listing - // from, and we should skip it. - if (!internal::HasTrailingSlash(blob.Name)) { - acc_results->push_back(FileInfoFromBlob(base_location.container, blob)); - } - }; + auto process_blob = [&](const Blobs::Models::BlobItem& blob) noexcept { + // blob.Name has trailing slash only when Prefix is an empty + // directory marker blob for the directory we're listing + // from, and we should skip it. + if (!internal::HasTrailingSlash(blob.Name)) { + acc_results->push_back(FileInfoFromBlob(base_location.container, blob)); + } + }; auto process_prefix = [&](const std::string& prefix) noexcept -> Status { const auto path = internal::ConcatAbstractPath(base_location.container, prefix); acc_results->push_back(DirectoryFileInfoFromPath(path)); @@ -964,14 +1024,13 @@ class AzureFileSystem::Impl { RETURN_NOT_OK(process_prefix(list_response.BlobPrefixes[blob_prefix_index])); } } - } catch (const Azure::Storage::StorageException& exception) { - if (exception.ErrorCode == "ContainerNotFound") { + } catch (const Storage::StorageException& exception) { + if (IsContainerNotFound(exception)) { found = false; } else { - return internal::ExceptionToStatus( - "Failed to list blobs in a directory: " + select.base_dir + ": " + - container_client.GetUrl(), - exception); + return ExceptionToStatus("Failed to list blobs in a directory: " + + select.base_dir + ": " + container_client.GetUrl(), + exception); } } @@ -981,7 +1040,7 @@ class AzureFileSystem::Impl { } public: - Status GetFileInfoWithSelector(const Azure::Core::Context& context, + Status GetFileInfoWithSelector(const Core::Context& context, Azure::Nullable page_size_hint, const FileSelector& select, FileInfoVector* acc_results) { @@ -991,29 +1050,28 @@ class AzureFileSystem::Impl { // Without a container, the base_location is equivalent to the filesystem // root -- `/`. FileSelector::allow_not_found doesn't matter in this case // because the root always exists. - auto on_container = - [&](const Azure::Storage::Blobs::Models::BlobContainerItem& container) { - // Deleted containers are not listed by ListContainers. - DCHECK(!container.IsDeleted); - - // Every container is considered a directory. - FileInfo info{container.Name, FileType::Directory}; - info.set_mtime( - std::chrono::system_clock::time_point{container.Details.LastModified}); - acc_results->push_back(std::move(info)); - - // Recurse into containers (subdirectories) if requested. - if (select.recursive && select.max_recursion > 0) { - FileSelector sub_select; - sub_select.base_dir = container.Name; - sub_select.allow_not_found = true; - sub_select.recursive = true; - sub_select.max_recursion = select.max_recursion - 1; - ARROW_RETURN_NOT_OK(GetFileInfoWithSelector(context, page_size_hint, - sub_select, acc_results)); - } - return Status::OK(); - }; + auto on_container = [&](const Blobs::Models::BlobContainerItem& container) { + // Deleted containers are not listed by ListContainers. + DCHECK(!container.IsDeleted); + + // Every container is considered a directory. + FileInfo info{container.Name, FileType::Directory}; + info.set_mtime( + std::chrono::system_clock::time_point{container.Details.LastModified}); + acc_results->push_back(std::move(info)); + + // Recurse into containers (subdirectories) if requested. + if (select.recursive && select.max_recursion > 0) { + FileSelector sub_select; + sub_select.base_dir = container.Name; + sub_select.allow_not_found = true; + sub_select.recursive = true; + sub_select.max_recursion = select.max_recursion - 1; + ARROW_RETURN_NOT_OK( + GetFileInfoWithSelector(context, page_size_hint, sub_select, acc_results)); + } + return Status::OK(); + }; return VisitContainers(context, std::move(on_container)); } @@ -1026,7 +1084,7 @@ class AzureFileSystem::Impl { Result> OpenInputFile(const AzureLocation& location, AzureFileSystem* fs) { RETURN_NOT_OK(ValidateFileLocation(location)); - auto blob_client = std::make_shared( + auto blob_client = std::make_shared( blob_service_client_->GetBlobContainerClient(location.container) .GetBlobClient(location.path)); @@ -1046,7 +1104,7 @@ class AzureFileSystem::Impl { } ARROW_ASSIGN_OR_RAISE(auto location, AzureLocation::FromString(info.path())); RETURN_NOT_OK(ValidateFileLocation(location)); - auto blob_client = std::make_shared( + auto blob_client = std::make_shared( blob_service_client_->GetBlobContainerClient(location.container) .GetBlobClient(location.path)); @@ -1070,19 +1128,18 @@ class AzureFileSystem::Impl { return Status::OK(); } else { return StatusFromErrorResponse( - container_client.GetUrl(), response.RawResponse.get(), + container_client.GetUrl(), *response.RawResponse, "Failed to create a container: " + location.container); } - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to create a container: " + location.container + ": " + - container_client.GetUrl(), - exception); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus("Failed to create a container: " + location.container + + ": " + container_client.GetUrl(), + exception); } } ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, - hierarchical_namespace_.Enabled(location.container)); + hns_detector_.Enabled(location.container)); if (!hierarchical_namespace_enabled) { // Without hierarchical namespace enabled Azure blob storage has no directories. // Therefore we can't, and don't need to create one. Simply creating a blob with `/` @@ -1098,15 +1155,13 @@ class AzureFileSystem::Impl { if (response.Value.Created) { return Status::OK(); } else { - return StatusFromErrorResponse(directory_client.GetUrl(), - response.RawResponse.get(), + return StatusFromErrorResponse(directory_client.GetUrl(), *response.RawResponse, "Failed to create a directory: " + location.path); } - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to create a directory: " + location.path + ": " + - directory_client.GetUrl(), - exception); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus("Failed to create a directory: " + location.path + ": " + + directory_client.GetUrl(), + exception); } } @@ -1119,15 +1174,14 @@ class AzureFileSystem::Impl { blob_service_client_->GetBlobContainerClient(location.container); try { container_client.CreateIfNotExists(); - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to create a container: " + location.container + " (" + - container_client.GetUrl() + ")", - exception); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus("Failed to create a container: " + location.container + + " (" + container_client.GetUrl() + ")", + exception); } ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, - hierarchical_namespace_.Enabled(location.container)); + hns_detector_.Enabled(location.container)); if (!hierarchical_namespace_enabled) { // Without hierarchical namespace enabled Azure blob storage has no directories. // Therefore we can't, and don't need to create one. Simply creating a blob with `/` @@ -1141,11 +1195,10 @@ class AzureFileSystem::Impl { .GetDirectoryClient(location.path); try { directory_client.CreateIfNotExists(); - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to create a directory: " + location.path + " (" + - directory_client.GetUrl() + ")", - exception); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus("Failed to create a directory: " + location.path + " (" + + directory_client.GetUrl() + ")", + exception); } } @@ -1158,7 +1211,7 @@ class AzureFileSystem::Impl { AzureFileSystem* fs) { RETURN_NOT_OK(ValidateFileLocation(location)); - auto block_blob_client = std::make_shared( + auto block_blob_client = std::make_shared( blob_service_client_->GetBlobContainerClient(location.container) .GetBlockBlobClient(location.path)); @@ -1180,7 +1233,7 @@ class AzureFileSystem::Impl { bool missing_dir_ok) { auto container_client = blob_service_client_->GetBlobContainerClient(location.container); - Azure::Storage::Blobs::ListBlobsOptions options; + Blobs::ListBlobsOptions options; if (!location.path.empty()) { options.Prefix = internal::EnsureTrailingSlash(location.path); } @@ -1200,19 +1253,17 @@ class AzureFileSystem::Impl { continue; } auto batch = container_client.CreateBatch(); - std::vector> + std::vector> deferred_responses; for (const auto& blob_item : list_response.Blobs) { deferred_responses.push_back(batch.DeleteBlob(blob_item.Name)); } try { container_client.SubmitBatch(batch); - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to delete blobs in a directory: " + location.path + ": " + - container_client.GetUrl(), - exception); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus("Failed to delete blobs in a directory: " + + location.path + ": " + container_client.GetUrl(), + exception); } std::vector failed_blob_names; for (size_t i = 0; i < deferred_responses.size(); ++i) { @@ -1221,7 +1272,7 @@ class AzureFileSystem::Impl { try { auto delete_result = deferred_response.GetResponse(); success = delete_result.Value.Deleted; - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { success = false; } if (!success) { @@ -1240,11 +1291,10 @@ class AzureFileSystem::Impl { } } } - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to list blobs in a directory: " + location.path + ": " + - container_client.GetUrl(), - exception); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus("Failed to list blobs in a directory: " + location.path + + ": " + container_client.GetUrl(), + exception); } return Status::OK(); } @@ -1264,19 +1314,18 @@ class AzureFileSystem::Impl { return Status::OK(); } else { return StatusFromErrorResponse( - container_client.GetUrl(), response.RawResponse.get(), + container_client.GetUrl(), *response.RawResponse, "Failed to delete a container: " + location.container); } - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to delete a container: " + location.container + ": " + - container_client.GetUrl(), - exception); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus("Failed to delete a container: " + location.container + + ": " + container_client.GetUrl(), + exception); } } ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, - hierarchical_namespace_.Enabled(location.container)); + hns_detector_.Enabled(location.container)); if (hierarchical_namespace_enabled) { auto directory_client = datalake_service_client_->GetFileSystemClient(location.container) @@ -1287,14 +1336,13 @@ class AzureFileSystem::Impl { return Status::OK(); } else { return StatusFromErrorResponse( - directory_client.GetUrl(), response.RawResponse.get(), + directory_client.GetUrl(), *response.RawResponse, "Failed to delete a directory: " + location.path); } - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to delete a directory: " + location.path + ": " + - directory_client.GetUrl(), - exception); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus("Failed to delete a directory: " + location.path + ": " + + directory_client.GetUrl(), + exception); } } else { return DeleteDirContentsWithoutHierarchicalNamespace(location, @@ -1308,7 +1356,7 @@ class AzureFileSystem::Impl { } ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, - hierarchical_namespace_.Enabled(location.container)); + hns_detector_.Enabled(location.container)); if (hierarchical_namespace_enabled) { auto file_system_client = datalake_service_client_->GetFileSystemClient(location.container); @@ -1322,8 +1370,8 @@ class AzureFileSystem::Impl { file_system_client.GetDirectoryClient(path.Name); try { sub_directory_client.DeleteRecursive(); - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus( "Failed to delete a sub directory: " + location.container + internal::kSep + path.Name + ": " + sub_directory_client.GetUrl(), exception); @@ -1332,8 +1380,8 @@ class AzureFileSystem::Impl { auto sub_file_client = file_system_client.GetFileClient(path.Name); try { sub_file_client.Delete(); - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus( "Failed to delete a sub file: " + location.container + internal::kSep + path.Name + ": " + sub_file_client.GetUrl(), exception); @@ -1341,15 +1389,13 @@ class AzureFileSystem::Impl { } } } - } catch (const Azure::Storage::StorageException& exception) { - if (missing_dir_ok && - exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) { + } catch (const Storage::StorageException& exception) { + if (missing_dir_ok && exception.StatusCode == Http::HttpStatusCode::NotFound) { return Status::OK(); } else { - return internal::ExceptionToStatus( - "Failed to delete directory contents: " + location.path + ": " + - directory_client.GetUrl(), - exception); + return ExceptionToStatus("Failed to delete directory contents: " + + location.path + ": " + directory_client.GetUrl(), + exception); } } return Status::OK(); @@ -1371,8 +1417,8 @@ class AzureFileSystem::Impl { .GetUrl(); try { dest_blob_client.CopyFromUri(src_url); - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus( "Failed to copy a blob. (" + src_url + " -> " + dest_blob_client.GetUrl() + ")", exception); } @@ -1380,6 +1426,17 @@ class AzureFileSystem::Impl { } }; +AzureFileSystem::AzureFileSystem(std::unique_ptr&& impl) + : FileSystem(impl->io_context()), impl_(std::move(impl)) { + default_async_is_sync_ = false; +} + +Result> AzureFileSystem::Make( + const AzureOptions& options, const io::IOContext& io_context) { + ARROW_ASSIGN_OR_RAISE(auto impl, AzureFileSystem::Impl::Make(options, io_context)); + return std::shared_ptr(new AzureFileSystem(std::move(impl))); +} + const AzureOptions& AzureFileSystem::options() const { return impl_->options(); } bool AzureFileSystem::Equals(const FileSystem& other) const { @@ -1399,7 +1456,7 @@ Result AzureFileSystem::GetFileInfo(const std::string& path) { } Result AzureFileSystem::GetFileInfo(const FileSelector& select) { - Azure::Core::Context context; + Core::Context context; Azure::Nullable page_size_hint; // unspecified FileInfoVector results; RETURN_NOT_OK( @@ -1478,18 +1535,4 @@ Result> AzureFileSystem::OpenAppendStream( return impl_->OpenAppendStream(location, metadata, false, this); } -Result> AzureFileSystem::Make( - const AzureOptions& options, const io::IOContext& io_context) { - std::shared_ptr ptr(new AzureFileSystem(options, io_context)); - RETURN_NOT_OK(ptr->impl_->Init()); - return ptr; -} - -AzureFileSystem::AzureFileSystem(const AzureOptions& options, - const io::IOContext& io_context) - : FileSystem(io_context), impl_(std::make_unique(options, io_context)) { - default_async_is_sync_ = false; -} - -} // namespace fs -} // namespace arrow +} // namespace arrow::fs diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index b2865b059ef6e..1266aa2d02b86 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -25,90 +25,118 @@ #include "arrow/util/macros.h" #include "arrow/util/uri.h" -namespace Azure { -namespace Core { -namespace Credentials { - +namespace Azure::Core::Credentials { class TokenCredential; +} -} // namespace Credentials -} // namespace Core -namespace Storage { - +namespace Azure::Storage { class StorageSharedKeyCredential; +} -} // namespace Storage -} // namespace Azure - -namespace arrow { -namespace fs { - -enum class AzureCredentialsKind : int8_t { - /// Anonymous access (no credentials used), public - Anonymous, - /// Use explicitly-provided access key pair - StorageCredentials, - /// Use ServicePrincipleCredentials - ServicePrincipleCredentials, - /// Use Sas Token to authenticate - Sas, - /// Use Connection String - ConnectionString -}; +namespace Azure::Storage::Blobs { +class BlobServiceClient; +} + +namespace Azure::Storage::Files::DataLake { +class DataLakeServiceClient; +} -enum class AzureBackend : bool { - /// Official Azure Remote Backend - Azure, - /// Local Simulated Storage - Azurite +namespace arrow::fs { + +enum class AzureBackend { + /// \brief Official Azure Remote Backend + kAzure, + /// \brief Local Simulated Storage + kAzurite }; /// Options for the AzureFileSystem implementation. struct ARROW_EXPORT AzureOptions { - std::string account_dfs_url; - std::string account_blob_url; - AzureBackend backend = AzureBackend::Azure; - AzureCredentialsKind credentials_kind = AzureCredentialsKind::Anonymous; + /// \brief The backend to connect to: Azure or Azurite (for testing). + AzureBackend backend = AzureBackend::kAzure; - std::string sas_token; - std::string connection_string; - std::shared_ptr - storage_credentials_provider; - std::shared_ptr - service_principle_credentials_provider; + // TODO(GH-38598): Add support for more auth methods. + // std::string connection_string; + // std::string sas_token; /// \brief Default metadata for OpenOutputStream. /// /// This will be ignored if non-empty metadata is passed to OpenOutputStream. std::shared_ptr default_metadata; + private: + std::string account_blob_url_; + std::string account_dfs_url_; + + enum class CredentialKind { + kAnonymous, + kStorageSharedKeyCredential, + } credential_kind_ = CredentialKind::kAnonymous; + + std::shared_ptr + storage_shared_key_credential_; + + public: AzureOptions(); + ~AzureOptions(); - Status ConfigureAccountKeyCredentials(const std::string& account_name, - const std::string& account_key); + Status ConfigureAccountKeyCredential(const std::string& account_name, + const std::string& account_key); bool Equals(const AzureOptions& other) const; + + const std::string& AccountBlobUrl() const { return account_blob_url_; } + const std::string& AccountDfsUrl() const { return account_dfs_url_; } + + Result> + MakeBlobServiceClient() const; + + Result> + MakeDataLakeServiceClient() const; }; -/// \brief Azure-backed FileSystem implementation for ABFS and ADLS. +/// \brief FileSystem implementation backed by Azure Blob Storage (ABS) [1] and +/// Azure Data Lake Storage Gen2 (ADLS Gen2) [2]. +/// +/// ADLS Gen2 isn't a dedicated service or account type. It's a set of capabilities that +/// support high throughput analytic workloads, built on Azure Blob Storage. All the data +/// ingested via the ADLS Gen2 APIs is persisted as blobs in the storage account. +/// ADLS Gen2 provides filesystem semantics, file-level security, and Hadoop +/// compatibility. ADLS Gen1 exists as a separate object that will retired on 2024-02-29 +/// and new ADLS accounts use Gen2 instead. /// -/// ABFS (Azure Blob Storage - https://azure.microsoft.com/en-us/products/storage/blobs/) -/// object-based cloud storage system. +/// ADLS Gen2 and Blob APIs can operate on the same data, but there are +/// some limitations [3]. The ones that are relevant to this +/// implementation are listed here: /// -/// ADLS (Azure Data Lake Storage - -/// https://azure.microsoft.com/en-us/products/storage/data-lake-storage/) -/// is a scalable data storage system designed for big-data applications. -/// ADLS provides filesystem semantics, file-level security, and Hadoop -/// compatibility. Gen1 exists as a separate object that will retired -/// on Feb 29, 2024. New ADLS accounts will use Gen2 instead, which is -/// implemented on top of ABFS. +/// - You can't use Blob APIs, and ADLS APIs to write to the same instance of a file. If +/// you write to a file by using ADLS APIs then that file's blocks won't be visible +/// to calls to the GetBlockList Blob API. The only exception is when you're +/// overwriting. +/// - When you use the ListBlobs operation without specifying a delimiter, the results +/// include both directories and blobs. If you choose to use a delimiter, use only a +/// forward slash (/) -- the only supported delimiter. +/// - If you use the DeleteBlob API to delete a directory, that directory is deleted only +/// if it's empty. This means that you can't use the Blob API delete directories +/// recursively. /// -/// TODO: GH-18014 Complete the internal implementation -/// and review the documentation +/// [1]: https://azure.microsoft.com/en-us/products/storage/blobs +/// [2]: https://azure.microsoft.com/en-us/products/storage/data-lake-storage +/// [3]: +/// https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-known-issues class ARROW_EXPORT AzureFileSystem : public FileSystem { + private: + class Impl; + std::unique_ptr impl_; + + explicit AzureFileSystem(std::unique_ptr&& impl); + public: ~AzureFileSystem() override = default; + static Result> Make( + const AzureOptions& options, const io::IOContext& = io::default_io_context()); + std::string type_name() const override { return "abfs"; } /// Return the original Azure options when constructing the filesystem @@ -152,16 +180,6 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem { Result> OpenAppendStream( const std::string& path, const std::shared_ptr& metadata = {}) override; - - static Result> Make( - const AzureOptions& options, const io::IOContext& = io::default_io_context()); - - private: - AzureFileSystem(const AzureOptions& options, const io::IOContext& io_context); - - class Impl; - std::unique_ptr impl_; }; -} // namespace fs -} // namespace arrow +} // namespace arrow::fs diff --git a/cpp/src/arrow/filesystem/azurefs_internal.cc b/cpp/src/arrow/filesystem/azurefs_internal.cc index 3e545d670cb04..39c3fb23e3cfd 100644 --- a/cpp/src/arrow/filesystem/azurefs_internal.cc +++ b/cpp/src/arrow/filesystem/azurefs_internal.cc @@ -23,11 +23,17 @@ namespace arrow::fs::internal { +namespace { + +// TODO(GH-38772): Remove azurefs_internal.h/.cc by moving the detector to +// azurefs.cc (which contains a private copy of this helper function already). Status ExceptionToStatus(const std::string& prefix, const Azure::Storage::StorageException& exception) { return Status::IOError(prefix, " Azure Error: ", exception.what()); } +} // namespace + Status HierarchicalNamespaceDetector::Init( Azure::Storage::Files::DataLake::DataLakeServiceClient* datalake_service_client) { datalake_service_client_ = datalake_service_client; diff --git a/cpp/src/arrow/filesystem/azurefs_internal.h b/cpp/src/arrow/filesystem/azurefs_internal.h index c3da96239a18f..92592cf164f5a 100644 --- a/cpp/src/arrow/filesystem/azurefs_internal.h +++ b/cpp/src/arrow/filesystem/azurefs_internal.h @@ -25,9 +25,6 @@ namespace arrow::fs::internal { -Status ExceptionToStatus(const std::string& prefix, - const Azure::Storage::StorageException& exception); - class HierarchicalNamespaceDetector { public: Status Init( diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 792c63b209402..463ff4e8daf3d 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -36,6 +36,7 @@ #include "arrow/filesystem/azurefs.h" #include "arrow/filesystem/azurefs_internal.h" +#include #include #include @@ -63,6 +64,7 @@ namespace arrow { using internal::TemporaryDir; namespace fs { +using internal::ConcatAbstractPath; namespace { namespace bp = boost::process; @@ -71,56 +73,133 @@ using ::testing::Not; using ::testing::NotNull; namespace Blobs = Azure::Storage::Blobs; -namespace Files = Azure::Storage::Files; +namespace Core = Azure::Core; +namespace DataLake = Azure::Storage::Files::DataLake; -auto const* kLoremIpsum = R"""( -Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor -incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis -nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. -Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu -fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in -culpa qui officia deserunt mollit anim id est laborum. -)"""; +class BaseAzureEnv : public ::testing::Environment { + protected: + std::string account_name_; + std::string account_key_; + + BaseAzureEnv(std::string account_name, std::string account_key) + : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {} -class AzuriteEnv : public ::testing::Environment { public: - AzuriteEnv() { - account_name_ = "devstoreaccount1"; - account_key_ = - "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/" - "KBHBeksoGMGw=="; - auto exe_path = bp::search_path("azurite"); - if (exe_path.empty()) { - auto error = std::string("Could not find Azurite emulator."); - status_ = Status::Invalid(error); - return; + const std::string& account_name() const { return account_name_; } + const std::string& account_key() const { return account_key_; } + + virtual AzureBackend backend() const = 0; + + virtual bool WithHierarchicalNamespace() const { return false; } + + virtual Result GetDebugLogSize() { return 0; } + virtual Status DumpDebugLog(int64_t position) { + return Status::NotImplemented("BaseAzureEnv::DumpDebugLog"); + } +}; + +template +class AzureEnvImpl : public BaseAzureEnv { + private: + /// \brief Factory function that registers the singleton instance as a global test + /// environment. Must be called only once per implementation (see GetInstance()). + /// + /// Every BaseAzureEnv implementation defines a static and parameter-less member + /// function called Make() that returns a Result>. + /// This templated function performs the following steps: + /// + /// 1) Calls AzureEnvClass::Make() to get an instance of AzureEnvClass. + /// 2) Passes ownership of the AzureEnvClass instance to the testing environment. + /// 3) Returns a Result wrapping the raw heap-allocated pointer. + static Result MakeAndAddToGlobalTestEnvironment() { + ARROW_ASSIGN_OR_RAISE(auto env, AzureEnvClass::Make()); + auto* heap_ptr = env.release(); + ::testing::AddGlobalTestEnvironment(heap_ptr); + return heap_ptr; + } + + protected: + using BaseAzureEnv::BaseAzureEnv; + + /// \brief Create an AzureEnvClass instance from environment variables. + /// + /// Reads the account name and key from the environment variables. This can be + /// used in BaseAzureEnv implementations that don't need to do any additional + /// setup to create the singleton instance (e.g. AzureFlatNSEnv, + /// AzureHierarchicalNSEnv). + static Result> MakeFromEnvVars( + const std::string& account_name_var, const std::string& account_key_var) { + const auto account_name = std::getenv(account_name_var.c_str()); + const auto account_key = std::getenv(account_key_var.c_str()); + if (!account_name && !account_key) { + return Status::Cancelled(account_name_var + " and " + account_key_var + + " are not set. Skipping tests."); } - auto temp_dir_ = *TemporaryDir::Make("azurefs-test-"); - auto debug_log_path_result = temp_dir_->path().Join("debug.log"); - if (!debug_log_path_result.ok()) { - status_ = debug_log_path_result.status(); - return; + // If only one of the variables is set. Don't cancel tests, + // fail with a Status::Invalid. + if (!account_name) { + return Status::Invalid(account_name_var + " not set while " + account_key_var + + " is set."); } - debug_log_path_ = *debug_log_path_result; - server_process_ = - bp::child(boost::this_process::environment(), exe_path, "--silent", "--location", - temp_dir_->path().ToString(), "--debug", debug_log_path_.ToString()); - if (!(server_process_.valid() && server_process_.running())) { - auto error = "Could not start Azurite emulator."; - server_process_.terminate(); - server_process_.wait(); - status_ = Status::Invalid(error); - return; + if (!account_key) { + return Status::Invalid(account_key_var + " not set while " + account_name_var + + " is set."); } - status_ = Status::OK(); + return std::unique_ptr{new AzureEnvClass(account_name, account_key)}; } + public: + static Result GetInstance() { + // Ensure MakeAndAddToGlobalTestEnvironment() is called only once by storing the + // Result in a static variable. + static auto singleton_env = MakeAndAddToGlobalTestEnvironment(); + return singleton_env; + } + + AzureBackend backend() const final { return AzureEnvClass::kBackend; } +}; + +class AzuriteEnv : public AzureEnvImpl { + private: + std::unique_ptr temp_dir_; + arrow::internal::PlatformFilename debug_log_path_; + bp::child server_process_; + + using AzureEnvImpl::AzureEnvImpl; + + public: + static const AzureBackend kBackend = AzureBackend::kAzurite; + ~AzuriteEnv() override { server_process_.terminate(); server_process_.wait(); } - Result GetDebugLogSize() { + static Result> Make() { + auto self = std::unique_ptr( + new AzuriteEnv("devstoreaccount1", + "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/" + "K1SZFPTOtr/KBHBeksoGMGw==")); + auto exe_path = bp::search_path("azurite"); + if (exe_path.empty()) { + return Status::Invalid("Could not find Azurite emulator."); + } + ARROW_ASSIGN_OR_RAISE(self->temp_dir_, TemporaryDir::Make("azurefs-test-")); + ARROW_ASSIGN_OR_RAISE(self->debug_log_path_, + self->temp_dir_->path().Join("debug.log")); + auto server_process = bp::child( + boost::this_process::environment(), exe_path, "--silent", "--location", + self->temp_dir_->path().ToString(), "--debug", self->debug_log_path_.ToString()); + if (!server_process.valid() || !server_process.running()) { + server_process.terminate(); + server_process.wait(); + return Status::Invalid("Could not start Azurite emulator."); + } + self->server_process_ = std::move(server_process); + return self; + } + + Result GetDebugLogSize() override { ARROW_ASSIGN_OR_RAISE(auto exists, arrow::internal::FileExists(debug_log_path_)); if (!exists) { return 0; @@ -131,7 +210,7 @@ class AzuriteEnv : public ::testing::Environment { return arrow::internal::FileTell(file_descriptor.fd()); } - Status DumpDebugLog(int64_t position = 0) { + Status DumpDebugLog(int64_t position) override { ARROW_ASSIGN_OR_RAISE(auto exists, arrow::internal::FileExists(debug_log_path_)); if (!exists) { return Status::OK(); @@ -157,25 +236,35 @@ class AzuriteEnv : public ::testing::Environment { std::cerr << std::endl; return Status::OK(); } +}; - const std::string& account_name() const { return account_name_; } - const std::string& account_key() const { return account_key_; } - const Status status() const { return status_; } - +class AzureFlatNSEnv : public AzureEnvImpl { private: - std::string account_name_; - std::string account_key_; - bp::child server_process_; - Status status_; - std::unique_ptr temp_dir_; - arrow::internal::PlatformFilename debug_log_path_; + using AzureEnvImpl::AzureEnvImpl; + + public: + static const AzureBackend kBackend = AzureBackend::kAzure; + + static Result> Make() { + return MakeFromEnvVars("AZURE_FLAT_NAMESPACE_ACCOUNT_NAME", + "AZURE_FLAT_NAMESPACE_ACCOUNT_KEY"); + } }; -auto* azurite_env = ::testing::AddGlobalTestEnvironment(new AzuriteEnv); +class AzureHierarchicalNSEnv : public AzureEnvImpl { + private: + using AzureEnvImpl::AzureEnvImpl; -AzuriteEnv* GetAzuriteEnv() { - return ::arrow::internal::checked_cast(azurite_env); -} + public: + static const AzureBackend kBackend = AzureBackend::kAzure; + + static Result> Make() { + return MakeFromEnvVars("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME", + "AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY"); + } + + bool WithHierarchicalNamespace() const final { return true; } +}; // Placeholder tests // TODO: GH-18014 Remove once a proper test is added @@ -193,44 +282,110 @@ TEST(AzureFileSystem, OptionsCompare) { EXPECT_TRUE(options.Equals(options)); } -class AzureFileSystemTest : public ::testing::Test { +struct PreexistingData { + public: + using RNG = std::mt19937_64; + + public: + const std::string container_name; + static constexpr char const* kObjectName = "test-object-name"; + + static constexpr char const* kLoremIpsum = R"""( +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor +incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis +nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu +fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in +culpa qui officia deserunt mollit anim id est laborum. +)"""; + public: + explicit PreexistingData(RNG& rng) : container_name{RandomContainerName(rng)} {} + + // Creates a path by concatenating the container name and the stem. + std::string ContainerPath(std::string_view stem) const { + return ConcatAbstractPath(container_name, stem); + } + + std::string ObjectPath() const { return ContainerPath(kObjectName); } + std::string NotFoundObjectPath() const { return ContainerPath("not-found"); } + + std::string RandomDirectoryPath(RNG& rng) const { + return ContainerPath(RandomChars(32, rng)); + } + + // Utilities + static std::string RandomContainerName(RNG& rng) { return RandomChars(32, rng); } + + static std::string RandomChars(int count, RNG& rng) { + auto const fillers = std::string("abcdefghijlkmnopqrstuvwxyz0123456789"); + std::uniform_int_distribution d(0, static_cast(fillers.size()) - 1); + std::string s; + std::generate_n(std::back_inserter(s), count, [&] { return fillers[d(rng)]; }); + return s; + } + + static int RandomIndex(int end, RNG& rng) { + return std::uniform_int_distribution(0, end - 1)(rng); + } + + static std::string RandomLine(int lineno, int width, RNG& rng) { + auto line = std::to_string(lineno) + ": "; + line += RandomChars(width - static_cast(line.size()) - 1, rng); + line += '\n'; + return line; + } +}; + +class TestAzureFileSystem : public ::testing::Test { + protected: + // Set in constructor + std::mt19937_64 rng_; + + // Set in SetUp() + int64_t debug_log_start_ = 0; + bool set_up_succeeded_ = false; + AzureOptions options_; + std::shared_ptr fs_; std::unique_ptr blob_service_client_; - std::unique_ptr datalake_service_client_; - AzureOptions options_; - std::mt19937_64 generator_; - std::string container_name_; - bool suite_skipped_ = false; + std::unique_ptr datalake_service_client_; + + public: + TestAzureFileSystem() : rng_(std::random_device()()) {} - AzureFileSystemTest() : generator_(std::random_device()()) {} + virtual Result GetAzureEnv() const = 0; - virtual Result MakeOptions() = 0; + static Result MakeOptions(BaseAzureEnv* env) { + AzureOptions options; + options.backend = env->backend(); + ARROW_EXPECT_OK( + options.ConfigureAccountKeyCredential(env->account_name(), env->account_key())); + return options; + } void SetUp() override { - auto options = MakeOptions(); - if (options.ok()) { - options_ = *options; + auto make_options = [this]() -> Result { + ARROW_ASSIGN_OR_RAISE(auto env, GetAzureEnv()); + EXPECT_THAT(env, NotNull()); + ARROW_ASSIGN_OR_RAISE(debug_log_start_, env->GetDebugLogSize()); + return MakeOptions(env); + }; + auto options_res = make_options(); + if (options_res.status().IsCancelled()) { + GTEST_SKIP() << options_res.status().message(); } else { - suite_skipped_ = true; - GTEST_SKIP() << options.status().message(); + EXPECT_OK_AND_ASSIGN(options_, options_res); } - // Stop-gap solution before GH-39119 is fixed. - container_name_ = "z" + RandomChars(31); - blob_service_client_ = std::make_unique( - options_.account_blob_url, options_.storage_credentials_provider); - datalake_service_client_ = std::make_unique( - options_.account_dfs_url, options_.storage_credentials_provider); - ASSERT_OK_AND_ASSIGN(fs_, AzureFileSystem::Make(options_)); - auto container_client = CreateContainer(container_name_); - auto blob_client = container_client.GetBlockBlobClient(PreexistingObjectName()); - blob_client.UploadFrom(reinterpret_cast(kLoremIpsum), - strlen(kLoremIpsum)); + ASSERT_OK_AND_ASSIGN(fs_, AzureFileSystem::Make(options_)); + EXPECT_OK_AND_ASSIGN(blob_service_client_, options_.MakeBlobServiceClient()); + EXPECT_OK_AND_ASSIGN(datalake_service_client_, options_.MakeDataLakeServiceClient()); + set_up_succeeded_ = true; } void TearDown() override { - if (!suite_skipped_) { + if (set_up_succeeded_) { auto containers = blob_service_client_->ListBlobContainers(); for (auto container : containers.BlobContainers) { auto container_client = @@ -238,6 +393,13 @@ class AzureFileSystemTest : public ::testing::Test { container_client.DeleteIfExists(); } } + if (HasFailure()) { + // XXX: This may not include all logs in the target test because + // Azurite doesn't flush debug logs immediately... You may want + // to check the log manually... + EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv()); + ARROW_IGNORE_EXPR(env->DumpDebugLog(debug_log_start_)); + } } Blobs::BlobContainerClient CreateContainer(const std::string& name) { @@ -254,54 +416,20 @@ class AzureFileSystemTest : public ::testing::Test { return blob_client; } - std::string PreexistingContainerName() const { return container_name_; } - - std::string PreexistingContainerPath() const { - return PreexistingContainerName() + '/'; - } - - static std::string PreexistingObjectName() { return "test-object-name"; } - - std::string PreexistingObjectPath() const { - return PreexistingContainerPath() + PreexistingObjectName(); - } - - std::string NotFoundObjectPath() { return PreexistingContainerPath() + "not-found"; } - - std::string RandomLine(int lineno, std::size_t width) { - auto line = std::to_string(lineno) + ": "; - line += RandomChars(width - line.size() - 1); - line += '\n'; - return line; - } - - std::size_t RandomIndex(std::size_t end) { - return std::uniform_int_distribution(0, end - 1)(generator_); - } - - std::string RandomChars(std::size_t count) { - auto const fillers = std::string("abcdefghijlkmnopqrstuvwxyz0123456789"); - std::uniform_int_distribution d(0, fillers.size() - 1); - std::string s; - std::generate_n(std::back_inserter(s), count, [&] { return fillers[d(generator_)]; }); - return s; - } - - std::string RandomContainerName() { return RandomChars(32); } - - std::string RandomDirectoryName() { return RandomChars(32); } - - void UploadLines(const std::vector& lines, const char* path_to_file, + void UploadLines(const std::vector& lines, const std::string& path, int total_size) { - const auto path = PreexistingContainerPath() + path_to_file; ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); const auto all_lines = std::accumulate(lines.begin(), lines.end(), std::string("")); ASSERT_OK(output->Write(all_lines)); ASSERT_OK(output->Close()); } - void RunGetFileInfoObjectWithNestedStructureTest(); - void RunGetFileInfoObjectTest(); + PreexistingData SetUpPreexistingData() { + PreexistingData data(rng_); + auto container_client = CreateContainer(data.container_name); + CreateBlob(container_client, data.kObjectName, PreexistingData::kLoremIpsum); + return data; + } struct HierarchicalPaths { std::string container; @@ -310,15 +438,12 @@ class AzureFileSystemTest : public ::testing::Test { }; // Need to use "void" as the return type to use ASSERT_* in this method. - void CreateHierarchicalData(HierarchicalPaths& paths) { - const auto container_path = RandomContainerName(); - const auto directory_path = - internal::ConcatAbstractPath(container_path, RandomDirectoryName()); - const auto sub_directory_path = - internal::ConcatAbstractPath(directory_path, "new-sub"); - const auto sub_blob_path = - internal::ConcatAbstractPath(sub_directory_path, "sub.txt"); - const auto top_blob_path = internal::ConcatAbstractPath(directory_path, "top.txt"); + void CreateHierarchicalData(HierarchicalPaths* paths) { + auto data = SetUpPreexistingData(); + const auto directory_path = data.RandomDirectoryPath(rng_); + const auto sub_directory_path = ConcatAbstractPath(directory_path, "new-sub"); + const auto sub_blob_path = ConcatAbstractPath(sub_directory_path, "sub.txt"); + const auto top_blob_path = ConcatAbstractPath(directory_path, "top.txt"); ASSERT_OK(fs_->CreateDir(sub_directory_path, true)); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(sub_blob_path)); ASSERT_OK(output->Write(std::string_view("sub"))); @@ -327,15 +452,15 @@ class AzureFileSystemTest : public ::testing::Test { ASSERT_OK(output->Write(std::string_view("top"))); ASSERT_OK(output->Close()); - AssertFileInfo(fs_.get(), container_path, FileType::Directory); + AssertFileInfo(fs_.get(), data.container_name, FileType::Directory); AssertFileInfo(fs_.get(), directory_path, FileType::Directory); AssertFileInfo(fs_.get(), sub_directory_path, FileType::Directory); AssertFileInfo(fs_.get(), sub_blob_path, FileType::File); AssertFileInfo(fs_.get(), top_blob_path, FileType::File); - paths.container = container_path; - paths.directory = directory_path; - paths.sub_paths = { + paths->container = data.container_name; + paths->directory = directory_path; + paths->sub_paths = { sub_directory_path, sub_blob_path, top_blob_path, @@ -362,7 +487,7 @@ class AzureFileSystemTest : public ::testing::Test { } void AssertInfoAllContainersRecursive(const std::vector& infos) { - ASSERT_EQ(infos.size(), 14); + ASSERT_EQ(infos.size(), 12); AssertFileInfo(infos[0], "container", FileType::Directory); AssertFileInfo(infos[1], "container/emptydir", FileType::Directory); AssertFileInfo(infos[2], "container/otherdir", FileType::Directory); @@ -377,202 +502,336 @@ class AzureFileSystemTest : public ::testing::Test { strlen(kSubData)); AssertFileInfo(infos[10], "container/somefile", FileType::File, strlen(kSomeData)); AssertFileInfo(infos[11], "empty-container", FileType::Directory); - AssertFileInfo(infos[12], PreexistingContainerName(), FileType::Directory); - AssertFileInfo(infos[13], PreexistingObjectPath(), FileType::File); } -}; -class AzuriteFileSystemTest : public AzureFileSystemTest { - Result MakeOptions() override { - EXPECT_THAT(GetAzuriteEnv(), NotNull()); - ARROW_EXPECT_OK(GetAzuriteEnv()->status()); - ARROW_ASSIGN_OR_RAISE(debug_log_start_, GetAzuriteEnv()->GetDebugLogSize()); - AzureOptions options; - options.backend = AzureBackend::Azurite; - ARROW_EXPECT_OK(options.ConfigureAccountKeyCredentials( - GetAzuriteEnv()->account_name(), GetAzuriteEnv()->account_key())); - return options; + bool WithHierarchicalNamespace() const { + EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv()); + return env->WithHierarchicalNamespace(); } - void TearDown() override { - AzureFileSystemTest::TearDown(); - if (HasFailure()) { - // XXX: This may not include all logs in the target test because - // Azurite doesn't flush debug logs immediately... You may want - // to check the log manually... - ARROW_IGNORE_EXPR(GetAzuriteEnv()->DumpDebugLog(debug_log_start_)); + // Tests that are called from more than one implementation of TestAzureFileSystem + + void TestDetectHierarchicalNamespace(); + void TestGetFileInfoObject(); + void TestGetFileInfoObjectWithNestedStructure(); + + void TestDeleteDirSuccessEmpty() { + auto data = SetUpPreexistingData(); + const auto directory_path = data.RandomDirectoryPath(rng_); + + if (WithHierarchicalNamespace()) { + ASSERT_OK(fs_->CreateDir(directory_path, true)); + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory); + ASSERT_OK(fs_->DeleteDir(directory_path)); + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); + } else { + // There is only virtual directory without hierarchical namespace + // support. So the CreateDir() and DeleteDir() do nothing. + ASSERT_OK(fs_->CreateDir(directory_path)); + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); + ASSERT_OK(fs_->DeleteDir(directory_path)); + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); } } - int64_t debug_log_start_ = 0; -}; + void TestCreateDirSuccessContainerAndDirectory() { + auto data = SetUpPreexistingData(); + const auto path = data.RandomDirectoryPath(rng_); + ASSERT_OK(fs_->CreateDir(path, false)); + if (WithHierarchicalNamespace()) { + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); + } else { + // There is only virtual directory without hierarchical namespace + // support. So the CreateDir() does nothing. + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); + } + } -class AzureFlatNamespaceFileSystemTest : public AzureFileSystemTest { - Result MakeOptions() override { - AzureOptions options; - const auto account_key = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_KEY"); - const auto account_name = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_NAME"); - if (account_key && account_name) { - RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name, account_key)); - return options; + void TestCreateDirRecursiveSuccessContainerOnly() { + auto container_name = PreexistingData::RandomContainerName(rng_); + ASSERT_OK(fs_->CreateDir(container_name, true)); + arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); + } + + void TestCreateDirRecursiveSuccessDirectoryOnly() { + auto data = SetUpPreexistingData(); + const auto parent = data.RandomDirectoryPath(rng_); + const auto path = ConcatAbstractPath(parent, "new-sub"); + ASSERT_OK(fs_->CreateDir(path, true)); + if (WithHierarchicalNamespace()) { + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); + } else { + // There is only virtual directory without hierarchical namespace + // support. So the CreateDir() does nothing. + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); } - return Status::Cancelled( - "Connection details not provided for a real flat namespace " - "account."); } -}; -// How to enable this test: -// -// You need an Azure account. You should be able to create a free -// account at https://azure.microsoft.com/en-gb/free/ . You should be -// able to create a storage account through the portal Web UI. -// -// See also the official document how to create a storage account: -// https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account -// -// A few suggestions on configuration: -// -// * Use Standard general-purpose v2 not premium -// * Use LRS redundancy -// * Obviously you need to enable hierarchical namespace. -// * Set the default access tier to hot -// * SFTP, NFS and file shares are not required. -class AzureHierarchicalNamespaceFileSystemTest : public AzureFileSystemTest { - Result MakeOptions() override { - AzureOptions options; - const auto account_key = std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY"); - const auto account_name = std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME"); - if (account_key && account_name) { - RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name, account_key)); - return options; + void TestCreateDirRecursiveSuccessContainerAndDirectory() { + auto data = SetUpPreexistingData(); + const auto parent = data.RandomDirectoryPath(rng_); + const auto path = ConcatAbstractPath(parent, "new-sub"); + ASSERT_OK(fs_->CreateDir(path, true)); + if (WithHierarchicalNamespace()) { + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), data.container_name, FileType::Directory); + } else { + // There is only virtual directory without hierarchical namespace + // support. So the CreateDir() does nothing. + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), data.container_name, FileType::Directory); } - return Status::Cancelled( - "Connection details not provided for a real hierarchical namespace " - "account."); } -}; -TEST_F(AzureFlatNamespaceFileSystemTest, DetectHierarchicalNamespace) { - auto hierarchical_namespace = internal::HierarchicalNamespaceDetector(); - ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get())); - ASSERT_OK_AND_EQ(false, hierarchical_namespace.Enabled(PreexistingContainerName())); -} + void TestDeleteDirContentsSuccessNonexistent() { + auto data = SetUpPreexistingData(); + const auto directory_path = data.RandomDirectoryPath(rng_); + ASSERT_OK(fs_->DeleteDirContents(directory_path, true)); + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); + } -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DetectHierarchicalNamespace) { - auto hierarchical_namespace = internal::HierarchicalNamespaceDetector(); - ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get())); - ASSERT_OK_AND_EQ(true, hierarchical_namespace.Enabled(PreexistingContainerName())); -} + void TestDeleteDirContentsFailureNonexistent() { + auto data = SetUpPreexistingData(); + const auto directory_path = data.RandomDirectoryPath(rng_); + ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false)); + } +}; -TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespace) { - auto hierarchical_namespace = internal::HierarchicalNamespaceDetector(); - ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get())); - ASSERT_OK_AND_EQ(false, hierarchical_namespace.Enabled(PreexistingContainerName())); -} +void TestAzureFileSystem::TestDetectHierarchicalNamespace() { + // Check the environments are implemented and injected here correctly. + auto expected = WithHierarchicalNamespace(); -TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespaceFailsWithMissingContainer) { + auto data = SetUpPreexistingData(); auto hierarchical_namespace = internal::HierarchicalNamespaceDetector(); ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get())); - ASSERT_NOT_OK(hierarchical_namespace.Enabled("nonexistent-container")); + ASSERT_OK_AND_EQ(expected, hierarchical_namespace.Enabled(data.container_name)); } -TEST_F(AzuriteFileSystemTest, GetFileInfoAccount) { - AssertFileInfo(fs_.get(), "", FileType::Directory); - - // URI - ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://")); -} - -TEST_F(AzuriteFileSystemTest, GetFileInfoContainer) { - AssertFileInfo(fs_.get(), PreexistingContainerName(), FileType::Directory); +void TestAzureFileSystem::TestGetFileInfoObject() { + auto data = SetUpPreexistingData(); + auto object_properties = + blob_service_client_->GetBlobContainerClient(data.container_name) + .GetBlobClient(data.kObjectName) + .GetProperties() + .Value; - AssertFileInfo(fs_.get(), "nonexistent-container", FileType::NotFound); + AssertFileInfo(fs_.get(), data.ObjectPath(), FileType::File, + std::chrono::system_clock::time_point{object_properties.LastModified}, + static_cast(object_properties.BlobSize)); // URI - ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + PreexistingContainerName())); + ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + std::string{data.kObjectName})); } -void AzureFileSystemTest::RunGetFileInfoObjectWithNestedStructureTest() { +void TestAzureFileSystem::TestGetFileInfoObjectWithNestedStructure() { + auto data = SetUpPreexistingData(); // Adds detailed tests to handle cases of different edge cases // with directory naming conventions (e.g. with and without slashes). - constexpr auto kObjectName = "test-object-dir/some_other_dir/another_dir/foo"; - ASSERT_OK_AND_ASSIGN( - auto output, - fs_->OpenOutputStream(PreexistingContainerPath() + kObjectName, /*metadata=*/{})); - const std::string_view data(kLoremIpsum); - ASSERT_OK(output->Write(data)); + const std::string kObjectName = "test-object-dir/some_other_dir/another_dir/foo"; + ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(data.ContainerPath(kObjectName), + /*metadata=*/{})); + const std::string_view lorem_ipsum(PreexistingData::kLoremIpsum); + ASSERT_OK(output->Write(lorem_ipsum)); ASSERT_OK(output->Close()); // 0 is immediately after "/" lexicographically, ensure that this doesn't // cause unexpected issues. - ASSERT_OK_AND_ASSIGN(output, - fs_->OpenOutputStream( - PreexistingContainerPath() + "test-object-dir/some_other_dir0", - /*metadata=*/{})); - ASSERT_OK(output->Write(data)); - ASSERT_OK(output->Close()); ASSERT_OK_AND_ASSIGN( - output, fs_->OpenOutputStream(PreexistingContainerPath() + kObjectName + "0", + output, fs_->OpenOutputStream(data.ContainerPath("test-object-dir/some_other_dir0"), /*metadata=*/{})); - ASSERT_OK(output->Write(data)); + ASSERT_OK(output->Write(lorem_ipsum)); + ASSERT_OK(output->Close()); + ASSERT_OK_AND_ASSIGN(output, + fs_->OpenOutputStream(data.ContainerPath(kObjectName + "0"), + /*metadata=*/{})); + ASSERT_OK(output->Write(lorem_ipsum)); ASSERT_OK(output->Close()); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName, FileType::File); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName + "/", - FileType::NotFound); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir", - FileType::Directory); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/", + AssertFileInfo(fs_.get(), data.ContainerPath(kObjectName), FileType::File); + AssertFileInfo(fs_.get(), data.ContainerPath(kObjectName) + "/", FileType::NotFound); + AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir"), FileType::Directory); + AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir") + "/", FileType::Directory); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/some_other_dir", + AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir/some_other_dir"), FileType::Directory); - AssertFileInfo(fs_.get(), - PreexistingContainerPath() + "test-object-dir/some_other_dir/", + AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir/some_other_dir") + "/", FileType::Directory); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-di", - FileType::NotFound); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/some_other_di", + AssertFileInfo(fs_.get(), data.ContainerPath("test-object-di"), FileType::NotFound); + AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir/some_other_di"), FileType::NotFound); + + if (WithHierarchicalNamespace()) { + datalake_service_client_->GetFileSystemClient(data.container_name) + .GetDirectoryClient("test-empty-object-dir") + .Create(); + + AssertFileInfo(fs_.get(), data.ContainerPath("test-empty-object-dir"), + FileType::Directory); + } } -TEST_F(AzuriteFileSystemTest, GetFileInfoObjectWithNestedStructure) { - RunGetFileInfoObjectWithNestedStructureTest(); +template +class AzureFileSystemTestImpl : public TestAzureFileSystem { + public: + using TestAzureFileSystem::TestAzureFileSystem; + + Result GetAzureEnv() const final { return AzureEnvClass::GetInstance(); } +}; + +// How to enable the non-Azurite tests: +// +// You need an Azure account. You should be able to create a free account [1]. +// Through the portal Web UI, you should create a storage account [2]. +// +// A few suggestions on configuration: +// +// * Use Standard general-purpose v2 not premium +// * Use LRS redundancy +// * Set the default access tier to hot +// * SFTP, NFS and file shares are not required. +// +// You must not enable Hierarchical Namespace on the storage account used for +// TestAzureFlatNSFileSystem, but you must enable it on the storage account +// used for TestAzureHierarchicalNSFileSystem. +// +// The credentials should be placed in the correct environment variables: +// +// * AZURE_FLAT_NAMESPACE_ACCOUNT_NAME +// * AZURE_FLAT_NAMESPACE_ACCOUNT_KEY +// * AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME +// * AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY +// +// [1]: https://azure.microsoft.com/en-gb/free/ +// [2]: +// https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account +using TestAzureFlatNSFileSystem = AzureFileSystemTestImpl; +using TestAzureHierarchicalNSFileSystem = AzureFileSystemTestImpl; +using TestAzuriteFileSystem = AzureFileSystemTestImpl; + +// Tests using all the 3 environments (Azurite, Azure w/o HNS (flat), Azure w/ HNS) + +template +using AzureFileSystemTestOnAllEnvs = AzureFileSystemTestImpl; + +using AllEnvironments = + ::testing::Types; + +TYPED_TEST_SUITE(AzureFileSystemTestOnAllEnvs, AllEnvironments); + +TYPED_TEST(AzureFileSystemTestOnAllEnvs, DetectHierarchicalNamespace) { + this->TestDetectHierarchicalNamespace(); } -TEST_F(AzureHierarchicalNamespaceFileSystemTest, GetFileInfoObjectWithNestedStructure) { - RunGetFileInfoObjectWithNestedStructureTest(); - datalake_service_client_->GetFileSystemClient(PreexistingContainerName()) - .GetDirectoryClient("test-empty-object-dir") - .Create(); +TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObject) { + this->TestGetFileInfoObject(); +} - AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-empty-object-dir", - FileType::Directory); +TYPED_TEST(AzureFileSystemTestOnAllEnvs, DeleteDirSuccessEmpty) { + this->TestDeleteDirSuccessEmpty(); } -void AzureFileSystemTest::RunGetFileInfoObjectTest() { - auto object_properties = - blob_service_client_->GetBlobContainerClient(PreexistingContainerName()) - .GetBlobClient(PreexistingObjectName()) - .GetProperties() - .Value; +TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObjectWithNestedStructure) { + this->TestGetFileInfoObjectWithNestedStructure(); +} - AssertFileInfo(fs_.get(), PreexistingObjectPath(), FileType::File, - std::chrono::system_clock::time_point(object_properties.LastModified), - static_cast(object_properties.BlobSize)); +TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirSuccessContainerAndDirectory) { + this->TestCreateDirSuccessContainerAndDirectory(); +} + +TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessContainerOnly) { + this->TestCreateDirRecursiveSuccessContainerOnly(); +} + +TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessDirectoryOnly) { + this->TestCreateDirRecursiveSuccessDirectoryOnly(); +} + +TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessContainerAndDirectory) { + this->TestCreateDirRecursiveSuccessContainerAndDirectory(); +} + +// Tests using a real storage account *with Hierarchical Namespace enabled* + +TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirFailureNonexistent) { + auto data = SetUpPreexistingData(); + const auto path = data.RandomDirectoryPath(rng_); + ASSERT_RAISES(IOError, fs_->DeleteDir(path)); +} + +TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirSuccessHaveBlob) { + auto data = SetUpPreexistingData(); + const auto directory_path = data.RandomDirectoryPath(rng_); + const auto blob_path = ConcatAbstractPath(directory_path, "hello.txt"); + ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path)); + ASSERT_OK(output->Write(std::string_view("hello"))); + ASSERT_OK(output->Close()); + arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::File); + ASSERT_OK(fs_->DeleteDir(directory_path)); + arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound); +} + +TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirSuccessHaveDirectory) { + auto data = SetUpPreexistingData(); + const auto parent = data.RandomDirectoryPath(rng_); + const auto path = ConcatAbstractPath(parent, "new-sub"); + ASSERT_OK(fs_->CreateDir(path, true)); + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); + ASSERT_OK(fs_->DeleteDir(parent)); + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); +} + +TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsSuccessExist) { + auto preexisting_data = SetUpPreexistingData(); + HierarchicalPaths paths; + CreateHierarchicalData(&paths); + ASSERT_OK(fs_->DeleteDirContents(paths.directory)); + arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::Directory); + for (const auto& sub_path : paths.sub_paths) { + arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound); + } +} + +TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsSuccessNonexistent) { + this->TestDeleteDirContentsSuccessNonexistent(); +} + +TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsFailureNonexistent) { + this->TestDeleteDirContentsFailureNonexistent(); +} + +// Tests using Azurite (the local Azure emulator) + +TEST_F(TestAzuriteFileSystem, DetectHierarchicalNamespaceFailsWithMissingContainer) { + auto hierarchical_namespace = internal::HierarchicalNamespaceDetector(); + ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get())); + ASSERT_RAISES(IOError, hierarchical_namespace.Enabled("nonexistent-container")); +} + +TEST_F(TestAzuriteFileSystem, GetFileInfoAccount) { + AssertFileInfo(fs_.get(), "", FileType::Directory); // URI - ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + PreexistingObjectName())); + ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://")); } -TEST_F(AzuriteFileSystemTest, GetFileInfoObject) { RunGetFileInfoObjectTest(); } +TEST_F(TestAzuriteFileSystem, GetFileInfoContainer) { + auto data = SetUpPreexistingData(); + AssertFileInfo(fs_.get(), data.container_name, FileType::Directory); -TEST_F(AzureHierarchicalNamespaceFileSystemTest, GetFileInfoObject) { - RunGetFileInfoObjectTest(); + AssertFileInfo(fs_.get(), "nonexistent-container", FileType::NotFound); + + // URI + ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + data.container_name)); } -TEST_F(AzuriteFileSystemTest, GetFileInfoSelector) { +TEST_F(TestAzuriteFileSystem, GetFileInfoSelector) { SetUpSmallFileSystemTree(); FileSelector select; @@ -581,11 +840,10 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelector) { // Root dir select.base_dir = ""; ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select)); - ASSERT_EQ(infos.size(), 3); + ASSERT_EQ(infos.size(), 2); ASSERT_EQ(infos, SortedInfos(infos)); AssertFileInfo(infos[0], "container", FileType::Directory); AssertFileInfo(infos[1], "empty-container", FileType::Directory); - AssertFileInfo(infos[2], container_name_, FileType::Directory); // Empty container select.base_dir = "empty-container"; @@ -641,7 +899,7 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelector) { ASSERT_EQ(infos.size(), 4); } -TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorRecursive) { +TEST_F(TestAzuriteFileSystem, GetFileInfoSelectorRecursive) { SetUpSmallFileSystemTree(); FileSelector select; @@ -651,7 +909,7 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorRecursive) { // Root dir select.base_dir = ""; ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select)); - ASSERT_EQ(infos.size(), 14); + ASSERT_EQ(infos.size(), 12); ASSERT_EQ(infos, SortedInfos(infos)); AssertInfoAllContainersRecursive(infos); @@ -699,7 +957,7 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorRecursive) { AssertFileInfo(infos[3], "container/otherdir/1/2/3/otherfile", FileType::File, 10); } -TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorExplicitImplicitDirDedup) { +TEST_F(TestAzuriteFileSystem, GetFileInfoSelectorExplicitImplicitDirDedup) { { auto container = CreateContainer("container"); CreateBlob(container, "mydir/emptydir1/"); @@ -746,137 +1004,60 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorExplicitImplicitDirDedup) { AssertFileInfo(infos[0], "container/mydir/nonemptydir2/somefile", FileType::File); } -TEST_F(AzuriteFileSystemTest, CreateDirFailureNoContainer) { +TEST_F(TestAzuriteFileSystem, CreateDirFailureNoContainer) { ASSERT_RAISES(Invalid, fs_->CreateDir("", false)); } -TEST_F(AzuriteFileSystemTest, CreateDirSuccessContainerOnly) { - auto container_name = RandomContainerName(); +TEST_F(TestAzuriteFileSystem, CreateDirSuccessContainerOnly) { + auto container_name = PreexistingData::RandomContainerName(rng_); ASSERT_OK(fs_->CreateDir(container_name, false)); arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); } -TEST_F(AzuriteFileSystemTest, CreateDirSuccessContainerAndDirectory) { - const auto path = PreexistingContainerPath() + RandomDirectoryName(); - ASSERT_OK(fs_->CreateDir(path, false)); - // There is only virtual directory without hierarchical namespace - // support. So the CreateDir() does nothing. - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, CreateDirSuccessContainerAndDirectory) { - const auto path = PreexistingContainerPath() + RandomDirectoryName(); - ASSERT_OK(fs_->CreateDir(path, false)); - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); -} - -TEST_F(AzuriteFileSystemTest, CreateDirFailureDirectoryWithMissingContainer) { +TEST_F(TestAzuriteFileSystem, CreateDirFailureDirectoryWithMissingContainer) { const auto path = std::string("not-a-container/new-directory"); ASSERT_RAISES(IOError, fs_->CreateDir(path, false)); } -TEST_F(AzuriteFileSystemTest, CreateDirRecursiveFailureNoContainer) { +TEST_F(TestAzuriteFileSystem, CreateDirRecursiveFailureNoContainer) { ASSERT_RAISES(Invalid, fs_->CreateDir("", true)); } -TEST_F(AzureHierarchicalNamespaceFileSystemTest, CreateDirRecursiveSuccessContainerOnly) { - auto container_name = RandomContainerName(); - ASSERT_OK(fs_->CreateDir(container_name, true)); - arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); -} - -TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessContainerOnly) { - auto container_name = RandomContainerName(); - ASSERT_OK(fs_->CreateDir(container_name, true)); - arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, CreateDirRecursiveSuccessDirectoryOnly) { - const auto parent = PreexistingContainerPath() + RandomDirectoryName(); - const auto path = internal::ConcatAbstractPath(parent, "new-sub"); - ASSERT_OK(fs_->CreateDir(path, true)); - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); -} - -TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessDirectoryOnly) { - const auto parent = PreexistingContainerPath() + RandomDirectoryName(); - const auto path = internal::ConcatAbstractPath(parent, "new-sub"); - ASSERT_OK(fs_->CreateDir(path, true)); - // There is only virtual directory without hierarchical namespace - // support. So the CreateDir() does nothing. - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); - arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, - CreateDirRecursiveSuccessContainerAndDirectory) { - auto container_name = RandomContainerName(); - const auto parent = internal::ConcatAbstractPath(container_name, RandomDirectoryName()); - const auto path = internal::ConcatAbstractPath(parent, "new-sub"); - ASSERT_OK(fs_->CreateDir(path, true)); - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); +TEST_F(TestAzuriteFileSystem, CreateDirUri) { + ASSERT_RAISES( + Invalid, + fs_->CreateDir("abfs://" + PreexistingData::RandomContainerName(rng_), true)); } -TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessContainerAndDirectory) { - auto container_name = RandomContainerName(); - const auto parent = internal::ConcatAbstractPath(container_name, RandomDirectoryName()); - const auto path = internal::ConcatAbstractPath(parent, "new-sub"); - ASSERT_OK(fs_->CreateDir(path, true)); - // There is only virtual directory without hierarchical namespace - // support. So the CreateDir() does nothing. - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); - arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); - arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); -} - -TEST_F(AzuriteFileSystemTest, CreateDirUri) { - ASSERT_RAISES(Invalid, fs_->CreateDir("abfs://" + RandomContainerName(), true)); -} - -TEST_F(AzuriteFileSystemTest, DeleteDirSuccessContainer) { - const auto container_name = RandomContainerName(); +TEST_F(TestAzuriteFileSystem, DeleteDirSuccessContainer) { + const auto container_name = PreexistingData::RandomContainerName(rng_); ASSERT_OK(fs_->CreateDir(container_name)); arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); ASSERT_OK(fs_->DeleteDir(container_name)); arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::NotFound); } -TEST_F(AzuriteFileSystemTest, DeleteDirSuccessEmpty) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - // There is only virtual directory without hierarchical namespace - // support. So the CreateDir() and DeleteDir() do nothing. - ASSERT_OK(fs_->CreateDir(directory_path)); - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); - ASSERT_OK(fs_->DeleteDir(directory_path)); - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); -} - -TEST_F(AzuriteFileSystemTest, DeleteDirSuccessNonexistent) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); +TEST_F(TestAzuriteFileSystem, DeleteDirSuccessNonexistent) { + auto data = SetUpPreexistingData(); + const auto directory_path = data.RandomDirectoryPath(rng_); // There is only virtual directory without hierarchical namespace // support. So the DeleteDir() for nonexistent directory does nothing. ASSERT_OK(fs_->DeleteDir(directory_path)); arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); } -TEST_F(AzuriteFileSystemTest, DeleteDirSuccessHaveBlobs) { +TEST_F(TestAzuriteFileSystem, DeleteDirSuccessHaveBlobs) { #ifdef __APPLE__ GTEST_SKIP() << "This test fails by an Azurite problem: " "https://github.com/Azure/Azurite/pull/2302"; #endif - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + auto data = SetUpPreexistingData(); + const auto directory_path = data.RandomDirectoryPath(rng_); // We must use 257 or more blobs here to test pagination of ListBlobs(). // Because we can't add 257 or more delete blob requests to one SubmitBatch(). int64_t n_blobs = 257; for (int64_t i = 0; i < n_blobs; ++i) { - const auto blob_path = - internal::ConcatAbstractPath(directory_path, std::to_string(i) + ".txt"); + const auto blob_path = ConcatAbstractPath(directory_path, std::to_string(i) + ".txt"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path)); ASSERT_OK(output->Write(std::string_view(std::to_string(i)))); ASSERT_OK(output->Close()); @@ -884,62 +1065,24 @@ TEST_F(AzuriteFileSystemTest, DeleteDirSuccessHaveBlobs) { } ASSERT_OK(fs_->DeleteDir(directory_path)); for (int64_t i = 0; i < n_blobs; ++i) { - const auto blob_path = - internal::ConcatAbstractPath(directory_path, std::to_string(i) + ".txt"); + const auto blob_path = ConcatAbstractPath(directory_path, std::to_string(i) + ".txt"); arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound); } } -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirSuccessEmpty) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - ASSERT_OK(fs_->CreateDir(directory_path, true)); - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory); - ASSERT_OK(fs_->DeleteDir(directory_path)); - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirFailureNonexistent) { - const auto path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - ASSERT_RAISES(IOError, fs_->DeleteDir(path)); +TEST_F(TestAzuriteFileSystem, DeleteDirUri) { + auto data = SetUpPreexistingData(); + ASSERT_RAISES(Invalid, fs_->DeleteDir("abfs://" + data.container_name + "/")); } -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirSuccessHaveBlob) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - const auto blob_path = internal::ConcatAbstractPath(directory_path, "hello.txt"); - ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path)); - ASSERT_OK(output->Write(std::string_view("hello"))); - ASSERT_OK(output->Close()); - arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::File); - ASSERT_OK(fs_->DeleteDir(directory_path)); - arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound); -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirSuccessHaveDirectory) { - const auto parent = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - const auto path = internal::ConcatAbstractPath(parent, "new-sub"); - ASSERT_OK(fs_->CreateDir(path, true)); - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); - ASSERT_OK(fs_->DeleteDir(parent)); - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); - arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); -} - -TEST_F(AzuriteFileSystemTest, DeleteDirUri) { - ASSERT_RAISES(Invalid, fs_->DeleteDir("abfs://" + PreexistingContainerPath())); -} - -TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessContainer) { +TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessContainer) { #ifdef __APPLE__ GTEST_SKIP() << "This test fails by an Azurite problem: " "https://github.com/Azure/Azurite/pull/2302"; #endif + auto data = SetUpPreexistingData(); HierarchicalPaths paths; - CreateHierarchicalData(paths); + CreateHierarchicalData(&paths); ASSERT_OK(fs_->DeleteDirContents(paths.container)); arrow::fs::AssertFileInfo(fs_.get(), paths.container, FileType::Directory); arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::NotFound); @@ -948,13 +1091,14 @@ TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessContainer) { } } -TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessDirectory) { +TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessDirectory) { #ifdef __APPLE__ GTEST_SKIP() << "This test fails by an Azurite problem: " "https://github.com/Azure/Azurite/pull/2302"; #endif + auto data = SetUpPreexistingData(); HierarchicalPaths paths; - CreateHierarchicalData(paths); + CreateHierarchicalData(&paths); ASSERT_OK(fs_->DeleteDirContents(paths.directory)); // GH-38772: We may change this to FileType::Directory. arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::NotFound); @@ -963,98 +1107,72 @@ TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessDirectory) { } } -TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessNonexistent) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - ASSERT_OK(fs_->DeleteDirContents(directory_path, true)); - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); -} - -TEST_F(AzuriteFileSystemTest, DeleteDirContentsFailureNonexistent) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false)); -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirContentsSuccessExist) { - HierarchicalPaths paths; - CreateHierarchicalData(paths); - ASSERT_OK(fs_->DeleteDirContents(paths.directory)); - arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::Directory); - for (const auto& sub_path : paths.sub_paths) { - arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound); - } -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirContentsSuccessNonexistent) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - ASSERT_OK(fs_->DeleteDirContents(directory_path, true)); - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); +TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessNonexistent) { + this->TestDeleteDirContentsSuccessNonexistent(); } -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirContentsFailureNonexistent) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false)); +TEST_F(TestAzuriteFileSystem, DeleteDirContentsFailureNonexistent) { + this->TestDeleteDirContentsFailureNonexistent(); } -TEST_F(AzuriteFileSystemTest, CopyFileSuccessDestinationNonexistent) { - const auto destination_path = - internal::ConcatAbstractPath(PreexistingContainerName(), "copy-destionation"); - ASSERT_OK(fs_->CopyFile(PreexistingObjectPath(), destination_path)); +TEST_F(TestAzuriteFileSystem, CopyFileSuccessDestinationNonexistent) { + auto data = SetUpPreexistingData(); + const auto destination_path = data.ContainerPath("copy-destionation"); + ASSERT_OK(fs_->CopyFile(data.ObjectPath(), destination_path)); ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(destination_path)); ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(info)); ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024)); - EXPECT_EQ(kLoremIpsum, buffer->ToString()); + EXPECT_EQ(PreexistingData::kLoremIpsum, buffer->ToString()); } -TEST_F(AzuriteFileSystemTest, CopyFileSuccessDestinationSame) { - ASSERT_OK(fs_->CopyFile(PreexistingObjectPath(), PreexistingObjectPath())); - ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingObjectPath())); +TEST_F(TestAzuriteFileSystem, CopyFileSuccessDestinationSame) { + auto data = SetUpPreexistingData(); + ASSERT_OK(fs_->CopyFile(data.ObjectPath(), data.ObjectPath())); + ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath())); ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(info)); ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024)); - EXPECT_EQ(kLoremIpsum, buffer->ToString()); + EXPECT_EQ(PreexistingData::kLoremIpsum, buffer->ToString()); } -TEST_F(AzuriteFileSystemTest, CopyFileFailureDestinationTrailingSlash) { - ASSERT_RAISES(IOError, - fs_->CopyFile(PreexistingObjectPath(), - internal::EnsureTrailingSlash(PreexistingObjectPath()))); +TEST_F(TestAzuriteFileSystem, CopyFileFailureDestinationTrailingSlash) { + auto data = SetUpPreexistingData(); + ASSERT_RAISES(IOError, fs_->CopyFile(data.ObjectPath(), + internal::EnsureTrailingSlash(data.ObjectPath()))); } -TEST_F(AzuriteFileSystemTest, CopyFileFailureSourceNonexistent) { - const auto destination_path = - internal::ConcatAbstractPath(PreexistingContainerName(), "copy-destionation"); - ASSERT_RAISES(IOError, fs_->CopyFile(NotFoundObjectPath(), destination_path)); +TEST_F(TestAzuriteFileSystem, CopyFileFailureSourceNonexistent) { + auto data = SetUpPreexistingData(); + const auto destination_path = data.ContainerPath("copy-destionation"); + ASSERT_RAISES(IOError, fs_->CopyFile(data.NotFoundObjectPath(), destination_path)); } -TEST_F(AzuriteFileSystemTest, CopyFileFailureDestinationParentNonexistent) { +TEST_F(TestAzuriteFileSystem, CopyFileFailureDestinationParentNonexistent) { + auto data = SetUpPreexistingData(); const auto destination_path = - internal::ConcatAbstractPath(RandomContainerName(), "copy-destionation"); - ASSERT_RAISES(IOError, fs_->CopyFile(PreexistingObjectPath(), destination_path)); + ConcatAbstractPath(PreexistingData::RandomContainerName(rng_), "copy-destionation"); + ASSERT_RAISES(IOError, fs_->CopyFile(data.ObjectPath(), destination_path)); } -TEST_F(AzuriteFileSystemTest, CopyFileUri) { - const auto destination_path = - internal::ConcatAbstractPath(PreexistingContainerName(), "copy-destionation"); - ASSERT_RAISES(Invalid, - fs_->CopyFile("abfs://" + PreexistingObjectPath(), destination_path)); - ASSERT_RAISES(Invalid, - fs_->CopyFile(PreexistingObjectPath(), "abfs://" + destination_path)); +TEST_F(TestAzuriteFileSystem, CopyFileUri) { + auto data = SetUpPreexistingData(); + const auto destination_path = data.ContainerPath("copy-destionation"); + ASSERT_RAISES(Invalid, fs_->CopyFile("abfs://" + data.ObjectPath(), destination_path)); + ASSERT_RAISES(Invalid, fs_->CopyFile(data.ObjectPath(), "abfs://" + destination_path)); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamString) { +TEST_F(TestAzuriteFileSystem, OpenInputStreamString) { + auto data = SetUpPreexistingData(); std::shared_ptr stream; - ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(PreexistingObjectPath())); + ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath())); ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024)); - EXPECT_EQ(buffer->ToString(), kLoremIpsum); + EXPECT_EQ(buffer->ToString(), PreexistingData::kLoremIpsum); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamStringBuffers) { +TEST_F(TestAzuriteFileSystem, OpenInputStreamStringBuffers) { + auto data = SetUpPreexistingData(); std::shared_ptr stream; - ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(PreexistingObjectPath())); + ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath())); std::string contents; std::shared_ptr buffer; @@ -1063,23 +1181,25 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamStringBuffers) { contents.append(buffer->ToString()); } while (buffer && buffer->size() != 0); - EXPECT_EQ(contents, kLoremIpsum); + EXPECT_EQ(contents, PreexistingData::kLoremIpsum); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamInfo) { - ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingObjectPath())); +TEST_F(TestAzuriteFileSystem, OpenInputStreamInfo) { + auto data = SetUpPreexistingData(); + ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath())); std::shared_ptr stream; ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(info)); ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024)); - EXPECT_EQ(buffer->ToString(), kLoremIpsum); + EXPECT_EQ(buffer->ToString(), PreexistingData::kLoremIpsum); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamEmpty) { +TEST_F(TestAzuriteFileSystem, OpenInputStreamEmpty) { + auto data = SetUpPreexistingData(); const auto path_to_file = "empty-object.txt"; - const auto path = PreexistingContainerPath() + path_to_file; - blob_service_client_->GetBlobContainerClient(PreexistingContainerName()) + const auto path = data.ContainerPath(path_to_file); + blob_service_client_->GetBlobContainerClient(data.container_name) .GetBlockBlobClient(path_to_file) .UploadFrom(nullptr, 0); @@ -1090,24 +1210,28 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamEmpty) { EXPECT_EQ(size, 0); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamNotFound) { - ASSERT_RAISES(IOError, fs_->OpenInputStream(NotFoundObjectPath())); +TEST_F(TestAzuriteFileSystem, OpenInputStreamNotFound) { + auto data = SetUpPreexistingData(); + ASSERT_RAISES(IOError, fs_->OpenInputStream(data.NotFoundObjectPath())); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamInfoInvalid) { - ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingContainerPath())); +TEST_F(TestAzuriteFileSystem, OpenInputStreamInfoInvalid) { + auto data = SetUpPreexistingData(); + ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.container_name + "/")); ASSERT_RAISES(IOError, fs_->OpenInputStream(info)); - ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(NotFoundObjectPath())); + ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(data.NotFoundObjectPath())); ASSERT_RAISES(IOError, fs_->OpenInputStream(info2)); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamUri) { - ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + PreexistingObjectPath())); +TEST_F(TestAzuriteFileSystem, OpenInputStreamUri) { + auto data = SetUpPreexistingData(); + ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + data.ObjectPath())); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamTrailingSlash) { - ASSERT_RAISES(IOError, fs_->OpenInputStream(PreexistingObjectPath() + '/')); +TEST_F(TestAzuriteFileSystem, OpenInputStreamTrailingSlash) { + auto data = SetUpPreexistingData(); + ASSERT_RAISES(IOError, fs_->OpenInputStream(data.ObjectPath() + '/')); } namespace { @@ -1145,9 +1269,10 @@ std::shared_ptr NormalizerKeyValueMetadata( } }; // namespace -TEST_F(AzuriteFileSystemTest, OpenInputStreamReadMetadata) { +TEST_F(TestAzuriteFileSystem, OpenInputStreamReadMetadata) { + auto data = SetUpPreexistingData(); std::shared_ptr stream; - ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(PreexistingObjectPath())); + ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath())); std::shared_ptr actual; ASSERT_OK_AND_ASSIGN(actual, stream->ReadMetadata()); @@ -1175,8 +1300,9 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamReadMetadata) { NormalizerKeyValueMetadata(actual)->ToString()); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamClosed) { - ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(PreexistingObjectPath())); +TEST_F(TestAzuriteFileSystem, OpenInputStreamClosed) { + auto data = SetUpPreexistingData(); + ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(data.ObjectPath())); ASSERT_OK(stream->Close()); std::array buffer{}; ASSERT_RAISES(Invalid, stream->Read(buffer.size(), buffer.data())); @@ -1184,44 +1310,45 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamClosed) { ASSERT_RAISES(Invalid, stream->Tell()); } -TEST_F(AzuriteFileSystemTest, TestWriteMetadata) { +TEST_F(TestAzuriteFileSystem, WriteMetadata) { + auto data = SetUpPreexistingData(); options_.default_metadata = arrow::key_value_metadata({{"foo", "bar"}}); ASSERT_OK_AND_ASSIGN(auto fs_with_defaults, AzureFileSystem::Make(options_)); - std::string path = "object_with_defaults"; - auto location = PreexistingContainerPath() + path; + std::string blob_path = "object_with_defaults"; + auto full_path = data.ContainerPath(blob_path); ASSERT_OK_AND_ASSIGN(auto output, - fs_with_defaults->OpenOutputStream(location, /*metadata=*/{})); - const std::string_view expected(kLoremIpsum); + fs_with_defaults->OpenOutputStream(full_path, /*metadata=*/{})); + const std::string_view expected(PreexistingData::kLoremIpsum); ASSERT_OK(output->Write(expected)); ASSERT_OK(output->Close()); // Verify the metadata has been set. - auto blob_metadata = - blob_service_client_->GetBlobContainerClient(PreexistingContainerName()) - .GetBlockBlobClient(path) - .GetProperties() - .Value.Metadata; - EXPECT_EQ(Azure::Core::CaseInsensitiveMap{std::make_pair("foo", "bar")}, blob_metadata); + auto blob_metadata = blob_service_client_->GetBlobContainerClient(data.container_name) + .GetBlockBlobClient(blob_path) + .GetProperties() + .Value.Metadata; + EXPECT_EQ(Core::CaseInsensitiveMap{std::make_pair("foo", "bar")}, blob_metadata); // Check that explicit metadata overrides the defaults. ASSERT_OK_AND_ASSIGN( output, fs_with_defaults->OpenOutputStream( - location, /*metadata=*/arrow::key_value_metadata({{"bar", "foo"}}))); + full_path, /*metadata=*/arrow::key_value_metadata({{"bar", "foo"}}))); ASSERT_OK(output->Write(expected)); ASSERT_OK(output->Close()); - blob_metadata = blob_service_client_->GetBlobContainerClient(PreexistingContainerName()) - .GetBlockBlobClient(path) + blob_metadata = blob_service_client_->GetBlobContainerClient(data.container_name) + .GetBlockBlobClient(blob_path) .GetProperties() .Value.Metadata; // Defaults are overwritten and not merged. - EXPECT_EQ(Azure::Core::CaseInsensitiveMap{std::make_pair("bar", "foo")}, blob_metadata); + EXPECT_EQ(Core::CaseInsensitiveMap{std::make_pair("bar", "foo")}, blob_metadata); } -TEST_F(AzuriteFileSystemTest, OpenOutputStreamSmall) { - const auto path = PreexistingContainerPath() + "test-write-object"; +TEST_F(TestAzuriteFileSystem, OpenOutputStreamSmall) { + auto data = SetUpPreexistingData(); + const auto path = data.ContainerPath("test-write-object"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); - const std::string_view expected(kLoremIpsum); + const std::string_view expected(PreexistingData::kLoremIpsum); ASSERT_OK(output->Write(expected)); ASSERT_OK(output->Close()); @@ -1234,8 +1361,9 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamSmall) { EXPECT_EQ(expected, std::string_view(inbuf.data(), size)); } -TEST_F(AzuriteFileSystemTest, OpenOutputStreamLarge) { - const auto path = PreexistingContainerPath() + "test-write-object"; +TEST_F(TestAzuriteFileSystem, OpenOutputStreamLarge) { + auto data = SetUpPreexistingData(); + const auto path = data.ContainerPath("test-write-object"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); std::array sizes{257 * 1024, 258 * 1024, 259 * 1024}; std::array buffers{ @@ -1265,8 +1393,9 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamLarge) { EXPECT_EQ(contents, buffers[0] + buffers[1] + buffers[2]); } -TEST_F(AzuriteFileSystemTest, OpenOutputStreamTruncatesExistingFile) { - const auto path = PreexistingContainerPath() + "test-write-object"; +TEST_F(TestAzuriteFileSystem, OpenOutputStreamTruncatesExistingFile) { + auto data = SetUpPreexistingData(); + const auto path = data.ContainerPath("test-write-object"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); const std::string_view expected0("Existing blob content"); ASSERT_OK(output->Write(expected0)); @@ -1281,7 +1410,7 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamTruncatesExistingFile) { EXPECT_EQ(expected0, std::string_view(inbuf.data(), size)); ASSERT_OK_AND_ASSIGN(output, fs_->OpenOutputStream(path, {})); - const std::string_view expected1(kLoremIpsum); + const std::string_view expected1(PreexistingData::kLoremIpsum); ASSERT_OK(output->Write(expected1)); ASSERT_OK(output->Close()); @@ -1291,8 +1420,9 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamTruncatesExistingFile) { EXPECT_EQ(expected1, std::string_view(inbuf.data(), size)); } -TEST_F(AzuriteFileSystemTest, OpenAppendStreamDoesNotTruncateExistingFile) { - const auto path = PreexistingContainerPath() + "test-write-object"; +TEST_F(TestAzuriteFileSystem, OpenAppendStreamDoesNotTruncateExistingFile) { + auto data = SetUpPreexistingData(); + const auto path = data.ContainerPath("test-write-object"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); const std::string_view expected0("Existing blob content"); ASSERT_OK(output->Write(expected0)); @@ -1307,7 +1437,7 @@ TEST_F(AzuriteFileSystemTest, OpenAppendStreamDoesNotTruncateExistingFile) { EXPECT_EQ(expected0, std::string_view(inbuf.data())); ASSERT_OK_AND_ASSIGN(output, fs_->OpenAppendStream(path, {})); - const std::string_view expected1(kLoremIpsum); + const std::string_view expected1(PreexistingData::kLoremIpsum); ASSERT_OK(output->Write(expected1)); ASSERT_OK(output->Close()); @@ -1319,35 +1449,37 @@ TEST_F(AzuriteFileSystemTest, OpenAppendStreamDoesNotTruncateExistingFile) { std::string(expected0) + std::string(expected1)); } -TEST_F(AzuriteFileSystemTest, OpenOutputStreamClosed) { - const auto path = internal::ConcatAbstractPath(PreexistingContainerName(), - "open-output-stream-closed.txt"); +TEST_F(TestAzuriteFileSystem, OpenOutputStreamClosed) { + auto data = SetUpPreexistingData(); + const auto path = data.ContainerPath("open-output-stream-closed.txt"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); ASSERT_OK(output->Close()); - ASSERT_RAISES(Invalid, output->Write(kLoremIpsum, std::strlen(kLoremIpsum))); + ASSERT_RAISES(Invalid, output->Write(PreexistingData::kLoremIpsum, + std::strlen(PreexistingData::kLoremIpsum))); ASSERT_RAISES(Invalid, output->Flush()); ASSERT_RAISES(Invalid, output->Tell()); } -TEST_F(AzuriteFileSystemTest, OpenOutputStreamUri) { - const auto path = internal::ConcatAbstractPath(PreexistingContainerName(), - "open-output-stream-uri.txt"); +TEST_F(TestAzuriteFileSystem, OpenOutputStreamUri) { + auto data = SetUpPreexistingData(); + const auto path = data.ContainerPath("open-output-stream-uri.txt"); ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + path)); } -TEST_F(AzuriteFileSystemTest, OpenInputFileMixedReadVsReadAt) { +TEST_F(TestAzuriteFileSystem, OpenInputFileMixedReadVsReadAt) { + auto data = SetUpPreexistingData(); // Create a file large enough to make the random access tests non-trivial. auto constexpr kLineWidth = 100; auto constexpr kLineCount = 4096; std::vector lines(kLineCount); int lineno = 0; - std::generate_n(lines.begin(), lines.size(), - [&] { return RandomLine(++lineno, kLineWidth); }); + std::generate_n(lines.begin(), lines.size(), [&] { + return PreexistingData::RandomLine(++lineno, kLineWidth, rng_); + }); - const auto path_to_file = "OpenInputFileMixedReadVsReadAt/object-name"; - const auto path = PreexistingContainerPath() + path_to_file; + const auto path = data.ContainerPath("OpenInputFileMixedReadVsReadAt/object-name"); - UploadLines(lines, path_to_file, kLineCount * kLineWidth); + UploadLines(lines, path, kLineCount * kLineWidth); std::shared_ptr file; ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(path)); @@ -1368,7 +1500,7 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileMixedReadVsReadAt) { } // Verify random reads interleave too. - auto const index = RandomIndex(kLineCount); + auto const index = PreexistingData::RandomIndex(kLineCount, rng_); auto const position = index * kLineWidth; ASSERT_OK_AND_ASSIGN(size, file->ReadAt(position, buffer.size(), buffer.data())); EXPECT_EQ(size, kLineWidth); @@ -1381,27 +1513,28 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileMixedReadVsReadAt) { } } -TEST_F(AzuriteFileSystemTest, OpenInputFileRandomSeek) { +TEST_F(TestAzuriteFileSystem, OpenInputFileRandomSeek) { + auto data = SetUpPreexistingData(); // Create a file large enough to make the random access tests non-trivial. auto constexpr kLineWidth = 100; auto constexpr kLineCount = 4096; std::vector lines(kLineCount); int lineno = 0; - std::generate_n(lines.begin(), lines.size(), - [&] { return RandomLine(++lineno, kLineWidth); }); + std::generate_n(lines.begin(), lines.size(), [&] { + return PreexistingData::RandomLine(++lineno, kLineWidth, rng_); + }); - const auto path_to_file = "OpenInputFileRandomSeek/object-name"; - const auto path = PreexistingContainerPath() + path_to_file; + const auto path = data.ContainerPath("OpenInputFileRandomSeek/object-name"); std::shared_ptr output; - UploadLines(lines, path_to_file, kLineCount * kLineWidth); + UploadLines(lines, path, kLineCount * kLineWidth); std::shared_ptr file; ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(path)); for (int i = 0; i != 32; ++i) { SCOPED_TRACE("Iteration " + std::to_string(i)); // Verify sequential reads work as expected. - auto const index = RandomIndex(kLineCount); + auto const index = PreexistingData::RandomIndex(kLineCount, rng_); auto const position = index * kLineWidth; ASSERT_OK(file->Seek(position)); ASSERT_OK_AND_ASSIGN(auto actual, file->Read(kLineWidth)); @@ -1409,15 +1542,15 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileRandomSeek) { } } -TEST_F(AzuriteFileSystemTest, OpenInputFileIoContext) { +TEST_F(TestAzuriteFileSystem, OpenInputFileIoContext) { + auto data = SetUpPreexistingData(); // Create a test file. - const auto path_to_file = "OpenInputFileIoContext/object-name"; - const auto path = PreexistingContainerPath() + path_to_file; + const auto blob_path = "OpenInputFileIoContext/object-name"; + const auto path = data.ContainerPath(blob_path); const std::string contents = "The quick brown fox jumps over the lazy dog"; - auto blob_client = - blob_service_client_->GetBlobContainerClient(PreexistingContainerName()) - .GetBlockBlobClient(path_to_file); + auto blob_client = blob_service_client_->GetBlobContainerClient(data.container_name) + .GetBlockBlobClient(blob_path); blob_client.UploadFrom(reinterpret_cast(contents.data()), contents.length()); @@ -1426,8 +1559,9 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileIoContext) { EXPECT_EQ(fs_->io_context().external_id(), file->io_context().external_id()); } -TEST_F(AzuriteFileSystemTest, OpenInputFileInfo) { - ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingObjectPath())); +TEST_F(TestAzuriteFileSystem, OpenInputFileInfo) { + auto data = SetUpPreexistingData(); + ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath())); std::shared_ptr file; ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(info)); @@ -1437,24 +1571,27 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileInfo) { auto constexpr kStart = 16; ASSERT_OK_AND_ASSIGN(size, file->ReadAt(kStart, buffer.size(), buffer.data())); - auto const expected = std::string(kLoremIpsum).substr(kStart); + auto const expected = std::string(PreexistingData::kLoremIpsum).substr(kStart); EXPECT_EQ(std::string(buffer.data(), size), expected); } -TEST_F(AzuriteFileSystemTest, OpenInputFileNotFound) { - ASSERT_RAISES(IOError, fs_->OpenInputFile(NotFoundObjectPath())); +TEST_F(TestAzuriteFileSystem, OpenInputFileNotFound) { + auto data = SetUpPreexistingData(); + ASSERT_RAISES(IOError, fs_->OpenInputFile(data.NotFoundObjectPath())); } -TEST_F(AzuriteFileSystemTest, OpenInputFileInfoInvalid) { - ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingContainerPath())); +TEST_F(TestAzuriteFileSystem, OpenInputFileInfoInvalid) { + auto data = SetUpPreexistingData(); + ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.container_name)); ASSERT_RAISES(IOError, fs_->OpenInputFile(info)); - ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(NotFoundObjectPath())); + ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(data.NotFoundObjectPath())); ASSERT_RAISES(IOError, fs_->OpenInputFile(info2)); } -TEST_F(AzuriteFileSystemTest, OpenInputFileClosed) { - ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputFile(PreexistingObjectPath())); +TEST_F(TestAzuriteFileSystem, OpenInputFileClosed) { + auto data = SetUpPreexistingData(); + ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputFile(data.ObjectPath())); ASSERT_OK(stream->Close()); std::array buffer{}; ASSERT_RAISES(Invalid, stream->Tell());