diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 029e19bc0e32a..9569eff2e47ed 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -828,7 +828,7 @@ bool IsDfsEmulator(const AzureOptions& options) { namespace internal { Result CheckIfHierarchicalNamespaceIsEnabled( - DataLake::DataLakeFileSystemClient& adlfs_client, const AzureOptions& options) { + const DataLake::DataLakeFileSystemClient& adlfs_client, const AzureOptions& options) { try { auto directory_client = adlfs_client.GetDirectoryClient(""); // GetAccessControlList will fail on storage accounts @@ -891,10 +891,12 @@ namespace { const char kDelimiter[] = {internal::kSep, '\0'}; +/// \pre location.container is not empty. template -Result GetContainerPropsAsFileInfo(const std::string& container_name, - ContainerClient& container_client) { - FileInfo info{container_name}; +Result GetContainerPropsAsFileInfo(const AzureLocation& location, + const ContainerClient& container_client) { + DCHECK(!location.container.empty()); + FileInfo info{location.path.empty() ? location.all : location.container}; try { auto properties = container_client.GetProperties(); info.set_type(FileType::Directory); @@ -910,6 +912,18 @@ Result GetContainerPropsAsFileInfo(const std::string& container_name, } } +template +Status CreateContainerIfNotExists(const std::string& container_name, + const ContainerClient& container_client) { + try { + container_client.CreateIfNotExists(); + return Status::OK(); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus(exception, "Failed to create a container: ", container_name, + ": ", container_client.GetUrl()); + } +} + FileInfo DirectoryFileInfoFromPath(std::string_view path) { return FileInfo{std::string{internal::RemoveTrailingSlash(path)}, FileType::Directory}; } @@ -955,12 +969,21 @@ class AzureFileSystem::Impl { io::IOContext& io_context() { return io_context_; } const AzureOptions& options() const { return options_; } - private: + Blobs::BlobContainerClient GetBlobContainerClient(const std::string& container_name) { + return blob_service_client_->GetBlobContainerClient(container_name); + } + + /// \param container_name Also known as "filesystem" in the ADLS Gen2 API. + DataLake::DataLakeFileSystemClient GetFileSystemClient( + const std::string& container_name) { + return datalake_service_client_->GetFileSystemClient(container_name); + } + /// \brief Memoized version of CheckIfHierarchicalNamespaceIsEnabled. /// /// \return kEnabled/kDisabled/kContainerNotFound (kUnknown is never returned). Result HierarchicalNamespaceSupport( - DataLake::DataLakeFileSystemClient& adlfs_client) { + const DataLake::DataLakeFileSystemClient& adlfs_client) { switch (cached_hns_support_) { case HNSSupport::kEnabled: case HNSSupport::kDisabled: @@ -987,7 +1010,6 @@ class AzureFileSystem::Impl { return hns_support; } - public: /// This is used from unit tests to ensure we perform operations on all the /// possible states of cached_hns_support_. void ForceCachedHierarchicalNamespaceSupport(int support) { @@ -1004,33 +1026,20 @@ class AzureFileSystem::Impl { DCHECK(false) << "Invalid enum HierarchicalNamespaceSupport value."; } - Result GetFileInfo(const AzureLocation& location) { - if (location.container.empty()) { - DCHECK(location.path.empty()); - // Root directory of the storage account. - return FileInfo{"", FileType::Directory}; - } - if (location.path.empty()) { - // We have a container, but no path within the container. - // The container itself represents a directory. - auto container_client = - blob_service_client_->GetBlobContainerClient(location.container); - return GetContainerPropsAsFileInfo(location.container, container_client); - } - // There is a path to search within the container. - FileInfo info{location.all}; - auto adlfs_client = datalake_service_client_->GetFileSystemClient(location.container); + /// \pre location.path is not empty. + Result GetFileInfo(const DataLake::DataLakeFileSystemClient& adlfs_client, + const AzureLocation& location) { auto file_client = adlfs_client.GetFileClient(location.path); try { + FileInfo info{location.all}; auto properties = file_client.GetProperties(); if (properties.Value.IsDirectory) { info.set_type(FileType::Directory); } else if (internal::HasTrailingSlash(location.path)) { - // For a path with a trailing slash a hierarchical namespace may return a blob - // with that trailing slash removed. For consistency with flat namespace and - // other filesystems we chose to return NotFound. - // - // NOTE(felipecrv): could this be an empty directory marker? + // For a path with a trailing slash, a Hierarchical Namespace storage account + // may recognize a file (path with trailing slash removed). For consistency + // with other arrow::FileSystem implementations we chose to return NotFound + // because the trailing slash means the user was looking for a directory. info.set_type(FileType::NotFound); return info; } else { @@ -1042,47 +1051,88 @@ class AzureFileSystem::Impl { return info; } catch (const Storage::StorageException& exception) { if (exception.StatusCode == Http::HttpStatusCode::NotFound) { - ARROW_ASSIGN_OR_RAISE(auto hns_support, - HierarchicalNamespaceSupport(adlfs_client)); - if (hns_support == HNSSupport::kContainerNotFound || - hns_support == HNSSupport::kEnabled) { - // If the hierarchical namespace is enabled, then the storage account will - // have explicit directories. Neither a file nor a directory was found. - info.set_type(FileType::NotFound); + return FileInfo{location.all, FileType::NotFound}; + } + return ExceptionToStatus( + exception, "GetProperties for '", file_client.GetUrl(), + "' failed. GetFileInfo is unable to determine whether the path exists."); + } + } + + /// On flat namespace accounts there are no real directories. Directories are + /// implied by empty directory marker blobs with names ending in "/" or there + /// being blobs with names starting with the directory path. + /// + /// \pre location.path is not empty. + Result GetFileInfo(const Blobs::BlobContainerClient& container_client, + const AzureLocation& location) { + DCHECK(!location.path.empty()); + Blobs::ListBlobsOptions options; + options.Prefix = internal::RemoveTrailingSlash(location.path); + options.PageSizeHint = 1; + + try { + FileInfo info{location.all}; + auto list_response = container_client.ListBlobsByHierarchy(kDelimiter, options); + // Since PageSizeHint=1, we expect at most one entry in either Blobs or + // BlobPrefixes. A BlobPrefix always ends with kDelimiter ("/"), so we can + // distinguish between a directory and a file by checking if we received a + // prefix or a blob. + if (!list_response.BlobPrefixes.empty()) { + // Ensure the returned BlobPrefixes[0] string doesn't contain more characters than + // the requested Prefix. For instance, if we request with Prefix="dir/abra" and + // the container contains "dir/abracadabra/" but not "dir/abra/", we will get back + // "dir/abracadabra/" in the BlobPrefixes list. If "dir/abra/" existed, + // it would be returned instead because it comes before "dir/abracadabra/" in the + // lexicographic order guaranteed by ListBlobsByHierarchy. + const auto& blob_prefix = list_response.BlobPrefixes[0]; + if (blob_prefix == internal::EnsureTrailingSlash(location.path)) { + info.set_type(FileType::Directory); return info; } - // On flat namespace accounts there are no real directories. Directories are only - // implied by using `/` in the blob name. - Blobs::ListBlobsOptions list_blob_options; - // If listing the prefix `path.path_to_file` with trailing slash returns at least - // one result then `path` refers to an implied directory. - list_blob_options.Prefix = internal::EnsureTrailingSlash(location.path); - // We only need to know if there is at least one result, so minimise page size - // for efficiency. - list_blob_options.PageSizeHint = 1; - - try { - auto paged_list_result = - blob_service_client_->GetBlobContainerClient(location.container) - .ListBlobs(list_blob_options); - auto file_type = paged_list_result.Blobs.size() > 0 ? FileType::Directory - : FileType::NotFound; - info.set_type(file_type); + } + if (!list_response.Blobs.empty()) { + const auto& blob = list_response.Blobs[0]; + if (blob.Name == location.path) { + info.set_type(FileType::File); + info.set_size(blob.BlobSize); + info.set_mtime( + std::chrono::system_clock::time_point{blob.Details.LastModified}); return info; - } catch (const Storage::StorageException& exception) { - return ExceptionToStatus( - exception, "ListBlobs failed for prefix='", *list_blob_options.Prefix, - "' failed. GetFileInfo is unable to determine whether the path should " - "be considered an implied directory."); } } + info.set_type(FileType::NotFound); + return info; + } catch (const Storage::StorageException& exception) { + if (IsContainerNotFound(exception)) { + return FileInfo{location.all, FileType::NotFound}; + } return ExceptionToStatus( - exception, "GetProperties failed for '", file_client.GetUrl(), - "' GetFileInfo is unable to determine whether the path exists."); + exception, "ListBlobsByHierarchy failed for prefix='", *options.Prefix, + "'. GetFileInfo is unable to determine whether the path exists."); } } private: + /// \pref location.container is not empty. + template + Status CheckDirExists(const ContainerClient& container_client, + const AzureLocation& location) { + DCHECK(!location.container.empty()); + FileInfo info; + if (location.path.empty()) { + ARROW_ASSIGN_OR_RAISE(info, + GetContainerPropsAsFileInfo(location, container_client)); + } else { + ARROW_ASSIGN_OR_RAISE(info, GetFileInfo(container_client, location)); + } + if (info.type() == FileType::NotFound) { + return PathNotFound(location); + } + DCHECK_EQ(info.type(), FileType::Directory); + return Status::OK(); + } + template Status VisitContainers(const Core::Context& context, OnContainer&& on_container) const { Blobs::ListBlobContainersOptions options; @@ -1297,97 +1347,79 @@ class AzureFileSystem::Impl { return ptr; } - Status CreateDir(const AzureLocation& location) { - if (location.container.empty()) { - return Status::Invalid("CreateDir requires a non-empty path."); - } - - auto container_client = - blob_service_client_->GetBlobContainerClient(location.container); - if (location.path.empty()) { - try { - auto response = container_client.Create(); - return response.Value.Created - ? Status::OK() - : Status::AlreadyExists("Directory already exists: " + location.all); - } catch (const Storage::StorageException& exception) { - return ExceptionToStatus(exception, - "Failed to create a container: ", location.container, - ": ", container_client.GetUrl()); - } - } - - auto adlfs_client = datalake_service_client_->GetFileSystemClient(location.container); - ARROW_ASSIGN_OR_RAISE(auto hns_support, HierarchicalNamespaceSupport(adlfs_client)); - if (hns_support == HNSSupport::kContainerNotFound) { - return PathNotFound(location); - } - if (hns_support == HNSSupport::kDisabled) { - ARROW_ASSIGN_OR_RAISE( - auto container_info, - GetContainerPropsAsFileInfo(location.container, container_client)); - if (container_info.type() == FileType::NotFound) { - return PathNotFound(location); - } - // Without hierarchical namespace enabled Azure blob storage has no directories. - // Therefore we can't, and don't need to create one. Simply creating a blob with `/` - // in the name implies directories. - return Status::OK(); - } - - auto directory_client = adlfs_client.GetDirectoryClient(location.path); - try { - auto response = directory_client.Create(); - if (response.Value.Created) { - return Status::OK(); - } else { - return StatusFromErrorResponse(directory_client.GetUrl(), *response.RawResponse, - "Failed to create a directory: " + location.path); + private: + /// This function cannot assume the filesystem/container already exists. + /// + /// \pre location.container is not empty. + /// \pre location.path is not empty. + template + Status CreateDirTemplate(const ContainerClient& container_client, + CreateDirIfNotExists&& create_if_not_exists, + const AzureLocation& location, bool recursive) { + DCHECK(!location.container.empty()); + DCHECK(!location.path.empty()); + // Non-recursive CreateDir calls require the parent directory to exist. + if (!recursive) { + auto parent = location.parent(); + if (!parent.path.empty()) { + RETURN_NOT_OK(CheckDirExists(container_client, parent)); } - } catch (const Storage::StorageException& exception) { - return ExceptionToStatus(exception, "Failed to create a directory: ", location.path, - ": ", directory_client.GetUrl()); + // If the parent location is just the container, we don't need to check if it + // exists because the operation we perform below will fail if the container + // doesn't exist and we can handle that error according to the recursive flag. } - } - - Status CreateDirRecursive(const AzureLocation& location) { - if (location.container.empty()) { - return Status::Invalid("CreateDir requires a non-empty path."); - } - - auto container_client = - blob_service_client_->GetBlobContainerClient(location.container); try { - container_client.CreateIfNotExists(); - } catch (const Storage::StorageException& exception) { - return ExceptionToStatus(exception, - "Failed to create a container: ", location.container, " (", - container_client.GetUrl(), ")"); - } - - auto adlfs_client = datalake_service_client_->GetFileSystemClient(location.container); - ARROW_ASSIGN_OR_RAISE(auto hns_support, HierarchicalNamespaceSupport(adlfs_client)); - if (hns_support == HNSSupport::kDisabled) { - // Without hierarchical namespace enabled Azure blob storage has no directories. - // Therefore we can't, and don't need to create one. Simply creating a blob with `/` - // in the name implies directories. + create_if_not_exists(container_client, location); return Status::OK(); - } - // Don't handle HNSSupport::kContainerNotFound, just assume it still exists (because - // it was created above) and try to create the directory. - - if (!location.path.empty()) { - auto directory_client = adlfs_client.GetDirectoryClient(location.path); - try { - directory_client.CreateIfNotExists(); - } catch (const Storage::StorageException& exception) { - return ExceptionToStatus(exception, - "Failed to create a directory: ", location.path, " (", - directory_client.GetUrl(), ")"); + } catch (const Storage::StorageException& exception) { + if (IsContainerNotFound(exception)) { + try { + if (recursive) { + container_client.CreateIfNotExists(); + create_if_not_exists(container_client, location); + return Status::OK(); + } else { + auto parent = location.parent(); + return PathNotFound(parent); + } + } catch (const Storage::StorageException& second_exception) { + return ExceptionToStatus(second_exception, "Failed to create directory '", + location.all, "': ", container_client.GetUrl()); + } } + return ExceptionToStatus(exception, "Failed to create directory '", location.all, + "': ", container_client.GetUrl()); } + } - return Status::OK(); + public: + /// This function cannot assume the filesystem already exists. + /// + /// \pre location.container is not empty. + /// \pre location.path is not empty. + Status CreateDirOnFileSystem(const DataLake::DataLakeFileSystemClient& adlfs_client, + const AzureLocation& location, bool recursive) { + return CreateDirTemplate( + adlfs_client, + [](const auto& adlfs_client, const auto& location) { + auto directory_client = adlfs_client.GetDirectoryClient(location.path); + directory_client.CreateIfNotExists(); + }, + location, recursive); + } + + /// This function cannot assume the container already exists. + /// + /// \pre location.container is not empty. + /// \pre location.path is not empty. + Status CreateDirOnContainer(const Blobs::BlobContainerClient& container_client, + const AzureLocation& location, bool recursive) { + return CreateDirTemplate( + container_client, + [this](const auto& container_client, const auto& location) { + EnsureEmptyDirExistsImplThatThrows(container_client, location.path); + }, + location, recursive); } Result> OpenAppendStream( @@ -1414,10 +1446,92 @@ class AzureFileSystem::Impl { } private: - Status DeleteDirContentsWithoutHierarchicalNamespace(const AzureLocation& location, - bool missing_dir_ok) { - auto container_client = - blob_service_client_->GetBlobContainerClient(location.container); + void EnsureEmptyDirExistsImplThatThrows( + const Blobs::BlobContainerClient& container_client, + const std::string& path_within_container) { + auto dir_marker_blob_path = internal::EnsureTrailingSlash(path_within_container); + auto block_blob_client = + container_client.GetBlobClient(dir_marker_blob_path).AsBlockBlobClient(); + // Attach metadata that other filesystem implementations expect to be present + // on directory marker blobs. + // https://github.com/fsspec/adlfs/blob/32132c4094350fca2680155a5c236f2e9f991ba5/adlfs/spec.py#L855-L870 + Blobs::UploadBlockBlobFromOptions blob_options; + blob_options.Metadata.emplace("is_directory", "true"); + block_blob_client.UploadFrom(nullptr, 0, blob_options); + } + + public: + /// This function assumes the container already exists. So it can only be + /// called after that has been verified. + /// + /// \pre location.container is not empty. + /// \pre The location.container container already exists. + Status EnsureEmptyDirExists(const Blobs::BlobContainerClient& container_client, + const AzureLocation& location, const char* operation_name) { + DCHECK(!location.container.empty()); + if (location.path.empty()) { + // Nothing to do. The container already exists per the preconditions. + return Status::OK(); + } + try { + EnsureEmptyDirExistsImplThatThrows(container_client, location.path); + return Status::OK(); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus( + exception, operation_name, " failed to ensure empty directory marker '", + location.path, "' exists in container: ", container_client.GetUrl()); + } + } + + /// \pre location.container is not empty. + /// \pre location.path is empty. + Status DeleteContainer(const Blobs::BlobContainerClient& container_client, + const AzureLocation& location) { + DCHECK(!location.container.empty()); + DCHECK(location.path.empty()); + try { + auto response = container_client.Delete(); + if (response.Value.Deleted) { + return Status::OK(); + } else { + return StatusFromErrorResponse( + container_client.GetUrl(), *response.RawResponse, + "Failed to delete a container: " + location.container); + } + } catch (const Storage::StorageException& exception) { + if (IsContainerNotFound(exception)) { + return PathNotFound(location); + } + return ExceptionToStatus(exception, + "Failed to delete a container: ", location.container, ": ", + container_client.GetUrl()); + } + } + + /// Deletes contents of a directory and possibly the directory itself + /// depending on the value of preserve_dir_marker_blob. + /// + /// \pre location.container is not empty. + /// \pre preserve_dir_marker_blob=false implies location.path is not empty + /// because we can't *not preserve* the root directory of a container. + /// + /// \param require_dir_to_exist Require the directory to exist *before* this + /// operation, otherwise return PathNotFound. + /// \param preserve_dir_marker_blob Ensure the empty directory marker blob + /// is preserved (not deleted) or created (before the contents are deleted) if it + /// doesn't exist explicitly but is implied by the existence of blobs with names + /// starting with the directory path. + /// \param operation_name Used in error messages to accurately describe the operation + Status DeleteDirContentsOnContainer(const Blobs::BlobContainerClient& container_client, + const AzureLocation& location, + bool require_dir_to_exist, + bool preserve_dir_marker_blob, + const char* operation_name) { + using DeleteBlobResponse = Storage::DeferredResponse; + DCHECK(!location.container.empty()); + DCHECK(preserve_dir_marker_blob || !location.path.empty()) + << "Must pass preserve_dir_marker_blob=true when location.path is empty " + "(i.e. deleting the contents of a container)."; Blobs::ListBlobsOptions options; if (!location.path.empty()) { options.Prefix = internal::EnsureTrailingSlash(location.path); @@ -1428,9 +1542,11 @@ class AzureFileSystem::Impl { // size of the body for a batch request can't exceed 4 MB. const int32_t kNumMaxRequestsInBatch = 256; options.PageSizeHint = kNumMaxRequestsInBatch; + // trusted only if preserve_dir_marker_blob is true. + bool found_dir_marker_blob = false; try { auto list_response = container_client.ListBlobs(options); - if (!missing_dir_ok && list_response.Blobs.empty()) { + if (require_dir_to_exist && list_response.Blobs.empty()) { return PathNotFound(location); } for (; list_response.HasPage(); list_response.MoveToNextPage()) { @@ -1438,20 +1554,44 @@ class AzureFileSystem::Impl { continue; } auto batch = container_client.CreateBatch(); - std::vector> - deferred_responses; + std::vector> deferred_responses; for (const auto& blob_item : list_response.Blobs) { - deferred_responses.push_back(batch.DeleteBlob(blob_item.Name)); + if (preserve_dir_marker_blob && !found_dir_marker_blob) { + const bool is_dir_marker_blob = + options.Prefix.HasValue() && blob_item.Name == *options.Prefix; + if (is_dir_marker_blob) { + // Skip deletion of the existing directory marker blob, + // but take note that it exists. + found_dir_marker_blob = true; + continue; + } + } + deferred_responses.emplace_back(blob_item.Name, + batch.DeleteBlob(blob_item.Name)); } try { - container_client.SubmitBatch(batch); + // Before submitting the batch deleting directory contents, ensure + // the empty directory marker blob exists. Doing this first, means that + // directory doesn't "stop existing" during the duration of the batch delete + // operation. + if (preserve_dir_marker_blob && !found_dir_marker_blob) { + // Only create an empty directory marker blob if the directory's + // existence is implied by the existence of blobs with names + // starting with the directory path. + if (!deferred_responses.empty()) { + RETURN_NOT_OK( + EnsureEmptyDirExists(container_client, location, operation_name)); + } + } + if (!deferred_responses.empty()) { + container_client.SubmitBatch(batch); + } } catch (const Storage::StorageException& exception) { return ExceptionToStatus(exception, "Failed to delete blobs in a directory: ", location.path, ": ", container_client.GetUrl()); } std::vector failed_blob_names; - for (size_t i = 0; i < deferred_responses.size(); ++i) { - const auto& deferred_response = deferred_responses[i]; + for (auto& [blob_name_view, deferred_response] : deferred_responses) { bool success = true; try { auto delete_result = deferred_response.GetResponse(); @@ -1460,8 +1600,7 @@ class AzureFileSystem::Impl { success = false; } if (!success) { - const auto& blob_item = list_response.Blobs[i]; - failed_blob_names.push_back(blob_item.Name); + failed_blob_names.emplace_back(blob_name_view); } } if (!failed_blob_names.empty()) { @@ -1475,117 +1614,74 @@ class AzureFileSystem::Impl { } } } + return Status::OK(); } catch (const Storage::StorageException& exception) { return ExceptionToStatus(exception, "Failed to list blobs in a directory: ", location.path, ": ", container_client.GetUrl()); } - return Status::OK(); } - public: - Status DeleteDir(const AzureLocation& location) { - if (location.container.empty()) { - return Status::Invalid("DeleteDir requires a non-empty path."); - } - - auto adlfs_client = datalake_service_client_->GetFileSystemClient(location.container); - ARROW_ASSIGN_OR_RAISE(auto hns_support, HierarchicalNamespaceSupport(adlfs_client)); - if (hns_support == HNSSupport::kContainerNotFound) { - return PathNotFound(location); - } - - if (location.path.empty()) { - auto container_client = - blob_service_client_->GetBlobContainerClient(location.container); - try { - auto response = container_client.Delete(); - if (response.Value.Deleted) { - return Status::OK(); - } else { - return StatusFromErrorResponse( - container_client.GetUrl(), *response.RawResponse, - "Failed to delete a container: " + location.container); - } - } catch (const Storage::StorageException& exception) { - return ExceptionToStatus(exception, - "Failed to delete a container: ", location.container, - ": ", container_client.GetUrl()); - } - } - - if (hns_support == HNSSupport::kEnabled) { - auto directory_client = adlfs_client.GetDirectoryClient(location.path); - try { - auto response = directory_client.DeleteRecursive(); - if (response.Value.Deleted) { - return Status::OK(); - } else { - return StatusFromErrorResponse( - directory_client.GetUrl(), *response.RawResponse, - "Failed to delete a directory: " + location.path); - } - } catch (const Storage::StorageException& exception) { - return ExceptionToStatus(exception, - "Failed to delete a directory: ", location.path, ": ", - directory_client.GetUrl()); + /// \pre location.container is not empty. + /// \pre location.path is not empty. + Status DeleteDirOnFileSystem(const DataLake::DataLakeFileSystemClient& adlfs_client, + const AzureLocation& location) { + DCHECK(!location.container.empty()); + DCHECK(!location.path.empty()); + auto directory_client = adlfs_client.GetDirectoryClient(location.path); + // XXX: should "directory not found" be considered an error? + try { + auto response = directory_client.DeleteRecursive(); + if (response.Value.Deleted) { + return Status::OK(); + } else { + return StatusFromErrorResponse(directory_client.GetUrl(), *response.RawResponse, + "Failed to delete a directory: " + location.path); } - } else { - return DeleteDirContentsWithoutHierarchicalNamespace(location, - /*missing_dir_ok=*/true); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus(exception, "Failed to delete a directory: ", location.path, + ": ", directory_client.GetUrl()); } } - Status DeleteDirContents(const AzureLocation& location, bool missing_dir_ok) { - if (location.container.empty()) { - return internal::InvalidDeleteDirContents(location.all); - } - - auto adlfs_client = datalake_service_client_->GetFileSystemClient(location.container); - ARROW_ASSIGN_OR_RAISE(auto hns_support, HierarchicalNamespaceSupport(adlfs_client)); - if (hns_support == HNSSupport::kContainerNotFound) { - return missing_dir_ok ? Status::OK() : PathNotFound(location); - } - - if (hns_support == HNSSupport::kEnabled) { - auto directory_client = adlfs_client.GetDirectoryClient(location.path); - try { - auto list_response = directory_client.ListPaths(false); - for (; list_response.HasPage(); list_response.MoveToNextPage()) { - for (const auto& path : list_response.Paths) { - if (path.IsDirectory) { - auto sub_directory_client = adlfs_client.GetDirectoryClient(path.Name); - try { - sub_directory_client.DeleteRecursive(); - } catch (const Storage::StorageException& exception) { - return ExceptionToStatus( - exception, "Failed to delete a sub directory: ", location.container, - kDelimiter, path.Name, ": ", sub_directory_client.GetUrl()); - } - } else { - auto sub_file_client = adlfs_client.GetFileClient(path.Name); - try { - sub_file_client.Delete(); - } catch (const Storage::StorageException& exception) { - return ExceptionToStatus( - exception, "Failed to delete a sub file: ", location.container, - kDelimiter, path.Name, ": ", sub_file_client.GetUrl()); - } + /// \pre location.container is not empty. + Status DeleteDirContentsOnFileSystem( + const DataLake::DataLakeFileSystemClient& adlfs_client, + const AzureLocation& location, bool missing_dir_ok) { + auto directory_client = adlfs_client.GetDirectoryClient(location.path); + try { + auto list_response = directory_client.ListPaths(false); + for (; list_response.HasPage(); list_response.MoveToNextPage()) { + for (const auto& path : list_response.Paths) { + if (path.IsDirectory) { + auto sub_directory_client = adlfs_client.GetDirectoryClient(path.Name); + try { + sub_directory_client.DeleteRecursive(); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus( + exception, "Failed to delete a sub directory: ", location.container, + kDelimiter, path.Name, ": ", sub_directory_client.GetUrl()); + } + } else { + auto sub_file_client = adlfs_client.GetFileClient(path.Name); + try { + sub_file_client.Delete(); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus( + exception, "Failed to delete a sub file: ", location.container, + kDelimiter, path.Name, ": ", sub_file_client.GetUrl()); } } } - } catch (const Storage::StorageException& exception) { - if (missing_dir_ok && exception.StatusCode == Http::HttpStatusCode::NotFound) { - return Status::OK(); - } else { - return ExceptionToStatus(exception, - "Failed to delete directory contents: ", location.path, - ": ", directory_client.GetUrl()); - } } return Status::OK(); - } else { - return DeleteDirContentsWithoutHierarchicalNamespace(location, missing_dir_ok); + } catch (const Storage::StorageException& exception) { + if (missing_dir_ok && exception.StatusCode == Http::HttpStatusCode::NotFound) { + return Status::OK(); + } + return ExceptionToStatus(exception, + "Failed to delete directory contents: ", location.path, + ": ", directory_client.GetUrl()); } } @@ -1640,7 +1736,30 @@ bool AzureFileSystem::Equals(const FileSystem& other) const { Result AzureFileSystem::GetFileInfo(const std::string& path) { ARROW_ASSIGN_OR_RAISE(auto location, AzureLocation::FromString(path)); - return impl_->GetFileInfo(location); + if (location.container.empty()) { + DCHECK(location.path.empty()); + // Root directory of the storage account. + return FileInfo{"", FileType::Directory}; + } + if (location.path.empty()) { + // We have a container, but no path within the container. + // The container itself represents a directory. + auto container_client = impl_->GetBlobContainerClient(location.container); + return GetContainerPropsAsFileInfo(location, container_client); + } + // There is a path to search within the container. Check HNS support to proceed. + auto adlfs_client = impl_->GetFileSystemClient(location.container); + ARROW_ASSIGN_OR_RAISE(auto hns_support, + impl_->HierarchicalNamespaceSupport(adlfs_client)); + if (hns_support == HNSSupport::kContainerNotFound) { + return FileInfo{location.all, FileType::NotFound}; + } + if (hns_support == HNSSupport::kEnabled) { + return impl_->GetFileInfo(adlfs_client, location); + } + DCHECK_EQ(hns_support, HNSSupport::kDisabled); + auto container_client = impl_->GetBlobContainerClient(location.container); + return impl_->GetFileInfo(container_client, location); } Result AzureFileSystem::GetFileInfo(const FileSelector& select) { @@ -1654,21 +1773,95 @@ Result AzureFileSystem::GetFileInfo(const FileSelector& select) Status AzureFileSystem::CreateDir(const std::string& path, bool recursive) { ARROW_ASSIGN_OR_RAISE(auto location, AzureLocation::FromString(path)); - if (recursive) { - return impl_->CreateDirRecursive(location); - } else { - return impl_->CreateDir(location); + if (location.container.empty()) { + return Status::Invalid("CreateDir requires a non-empty path."); } + + auto container_client = impl_->GetBlobContainerClient(location.container); + if (location.path.empty()) { + // If the path is just the container, the parent (root) trivially exists, + // and the CreateDir operation comes down to just creating the container. + return CreateContainerIfNotExists(location.container, container_client); + } + + auto adlfs_client = impl_->GetFileSystemClient(location.container); + ARROW_ASSIGN_OR_RAISE(auto hns_support, + impl_->HierarchicalNamespaceSupport(adlfs_client)); + if (hns_support == HNSSupport::kContainerNotFound) { + if (!recursive) { + auto parent = location.parent(); + return PathNotFound(parent); + } + RETURN_NOT_OK(CreateContainerIfNotExists(location.container, container_client)); + // Perform a second check for HNS support after creating the container. + ARROW_ASSIGN_OR_RAISE(hns_support, impl_->HierarchicalNamespaceSupport(adlfs_client)); + if (hns_support == HNSSupport::kContainerNotFound) { + // We only get kContainerNotFound if we are unable to read the properties of the + // container we just created. This is very unlikely, but theoretically possible in + // a concurrent system, so the error is handled to avoid infinite recursion. + return Status::IOError("Unable to read properties of a newly created container: ", + location.container, ": " + container_client.GetUrl()); + } + } + // CreateDirOnFileSystem and CreateDirOnContainer can handle the container + // not existing which is useful and necessary here since the only reason + // a container was created above was to check for HNS support when it wasn't + // cached yet. + if (hns_support == HNSSupport::kEnabled) { + return impl_->CreateDirOnFileSystem(adlfs_client, location, recursive); + } + DCHECK_EQ(hns_support, HNSSupport::kDisabled); + return impl_->CreateDirOnContainer(container_client, location, recursive); } Status AzureFileSystem::DeleteDir(const std::string& path) { ARROW_ASSIGN_OR_RAISE(auto location, AzureLocation::FromString(path)); - return impl_->DeleteDir(location); + if (location.container.empty()) { + return Status::Invalid("DeleteDir requires a non-empty path."); + } + if (location.path.empty()) { + auto container_client = impl_->GetBlobContainerClient(location.container); + return impl_->DeleteContainer(container_client, location); + } + + auto adlfs_client = impl_->GetFileSystemClient(location.container); + ARROW_ASSIGN_OR_RAISE(auto hns_support, + impl_->HierarchicalNamespaceSupport(adlfs_client)); + if (hns_support == HNSSupport::kContainerNotFound) { + return PathNotFound(location); + } + if (hns_support == HNSSupport::kEnabled) { + return impl_->DeleteDirOnFileSystem(adlfs_client, location); + } + DCHECK_EQ(hns_support, HNSSupport::kDisabled); + auto container_client = impl_->GetBlobContainerClient(location.container); + return impl_->DeleteDirContentsOnContainer(container_client, location, + /*require_dir_to_exist=*/true, + /*preserve_dir_marker_blob=*/false, + "DeleteDir"); } Status AzureFileSystem::DeleteDirContents(const std::string& path, bool missing_dir_ok) { ARROW_ASSIGN_OR_RAISE(auto location, AzureLocation::FromString(path)); - return impl_->DeleteDirContents(location, missing_dir_ok); + if (location.container.empty()) { + return internal::InvalidDeleteDirContents(location.all); + } + + auto adlfs_client = impl_->GetFileSystemClient(location.container); + ARROW_ASSIGN_OR_RAISE(auto hns_support, + impl_->HierarchicalNamespaceSupport(adlfs_client)); + if (hns_support == HNSSupport::kContainerNotFound) { + return missing_dir_ok ? Status::OK() : PathNotFound(location); + } + + if (hns_support == HNSSupport::kEnabled) { + return impl_->DeleteDirContentsOnFileSystem(adlfs_client, location, missing_dir_ok); + } + auto container_client = impl_->GetBlobContainerClient(location.container); + return impl_->DeleteDirContentsOnContainer(container_client, location, + /*require_dir_to_exist=*/!missing_dir_ok, + /*preserve_dir_marker_blob=*/true, + "DeleteDirContents"); } Status AzureFileSystem::DeleteRootDirContents() { diff --git a/cpp/src/arrow/filesystem/azurefs_internal.h b/cpp/src/arrow/filesystem/azurefs_internal.h index 13d84c9b542b4..5642e16bcfb05 100644 --- a/cpp/src/arrow/filesystem/azurefs_internal.h +++ b/cpp/src/arrow/filesystem/azurefs_internal.h @@ -71,7 +71,7 @@ enum class HierarchicalNamespaceSupport { /// \return kEnabled/kDisabled/kContainerNotFound (kUnknown is never /// returned). Result CheckIfHierarchicalNamespaceIsEnabled( - Azure::Storage::Files::DataLake::DataLakeFileSystemClient& adlfs_client, + const Azure::Storage::Files::DataLake::DataLakeFileSystemClient& adlfs_client, const arrow::fs::AzureOptions& options); } // namespace internal diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index f6af9f722dbac..ff94578b041dc 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -473,6 +473,14 @@ class TestAzureFileSystem : public ::testing::Test { return blob_client; } + Blobs::Models::BlobProperties GetBlobProperties(const std::string& container_name, + const std::string& blob_name) { + return blob_service_client_->GetBlobContainerClient(container_name) + .GetBlobClient(blob_name) + .GetProperties() + .Value; + } + void UploadLines(const std::vector& lines, const std::string& path, int total_size) { ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(path, {})); @@ -566,86 +574,259 @@ class TestAzureFileSystem : public ::testing::Test { return env->WithHierarchicalNamespace(); } + constexpr static const char* const kSubmitBatchBugMessage = + "This test is affected by an Azurite issue: " + "https://github.com/Azure/Azurite/pull/2302"; + + /// Azurite has a bug that causes BlobContainerClient::SubmitBatch to fail on macOS. + /// SubmitBatch is used by: + /// - AzureFileSystem::DeleteDir + /// - AzureFileSystem::DeleteDirContents + bool HasSubmitBatchBug() const { +#ifdef __APPLE__ + EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv()); + return env->backend() == AzureBackend::kAzurite; +#else + return false; +#endif + } + // Tests that are called from more than one implementation of TestAzureFileSystem void TestDetectHierarchicalNamespace(bool trip_up_azurite); void TestDetectHierarchicalNamespaceOnMissingContainer(); - void TestGetFileInfoObject(); + + void TestGetFileInfoOfRoot() { + AssertFileInfo(fs(), "", FileType::Directory); + + // URI + ASSERT_RAISES(Invalid, fs()->GetFileInfo("abfs://")); + } + + void TestGetFileInfoOnExistingContainer() { + auto data = SetUpPreexistingData(); + AssertFileInfo(fs(), data.container_name, FileType::Directory); + AssertFileInfo(fs(), data.container_name + "/", FileType::Directory); + auto props = GetBlobProperties(data.container_name, data.kObjectName); + AssertFileInfo(fs(), data.ObjectPath(), FileType::File, + std::chrono::system_clock::time_point{props.LastModified}, + static_cast(props.BlobSize)); + AssertFileInfo(fs(), data.NotFoundObjectPath(), FileType::NotFound); + AssertFileInfo(fs(), data.ObjectPath() + "/", FileType::NotFound); + AssertFileInfo(fs(), data.NotFoundObjectPath() + "/", FileType::NotFound); + + // URIs + ASSERT_RAISES(Invalid, fs()->GetFileInfo("abfs://" + data.container_name)); + ASSERT_RAISES(Invalid, fs()->GetFileInfo("abfs://" + std::string{data.kObjectName})); + ASSERT_RAISES(Invalid, fs()->GetFileInfo("abfs://" + data.ObjectPath())); + } + + void TestGetFileInfoOnMissingContainer() { + auto data = SetUpPreexistingData(); + AssertFileInfo(fs(), "nonexistent", FileType::NotFound); + AssertFileInfo(fs(), "nonexistent/object", FileType::NotFound); + AssertFileInfo(fs(), "nonexistent/object/", FileType::NotFound); + } + void TestGetFileInfoObjectWithNestedStructure(); + void TestCreateDirOnRoot() { + auto dir1 = PreexistingData::RandomContainerName(rng_); + auto dir2 = PreexistingData::RandomContainerName(rng_); + + AssertFileInfo(fs(), dir1, FileType::NotFound); + ASSERT_OK(fs()->CreateDir(dir1, false)); + AssertFileInfo(fs(), dir1, FileType::Directory); + + AssertFileInfo(fs(), dir2, FileType::NotFound); + ASSERT_OK(fs()->CreateDir(dir2, true)); + AssertFileInfo(fs(), dir1, FileType::Directory); + + // Should not fail if the directory already exists. + ASSERT_OK(fs()->CreateDir(dir1, false)); + ASSERT_OK(fs()->CreateDir(dir1, true)); + AssertFileInfo(fs(), dir1, FileType::Directory); + } + + void TestCreateDirOnExistingContainer() { + auto data = SetUpPreexistingData(); + auto dir1 = data.RandomDirectoryPath(rng_); + auto dir2 = data.RandomDirectoryPath(rng_); + + AssertFileInfo(fs(), dir1, FileType::NotFound); + ASSERT_OK(fs()->CreateDir(dir1, /*recursive=*/false)); + AssertFileInfo(fs(), dir1, FileType::Directory); + + AssertFileInfo(fs(), dir2, FileType::NotFound); + ASSERT_OK(fs()->CreateDir(dir2, /*recursive=*/true)); + AssertFileInfo(fs(), dir2, FileType::Directory); + + auto subdir1 = ConcatAbstractPath(dir1, "subdir"); + auto subdir2 = ConcatAbstractPath(dir2, "subdir"); + AssertFileInfo(fs(), subdir1, FileType::NotFound); + ASSERT_OK(fs()->CreateDir(subdir1, /*recursive=*/false)); + AssertFileInfo(fs(), subdir1, FileType::Directory); + AssertFileInfo(fs(), subdir2, FileType::NotFound); + ASSERT_OK(fs()->CreateDir(subdir2, /*recursive=*/true)); + AssertFileInfo(fs(), subdir2, FileType::Directory); + + auto dir3 = data.RandomDirectoryPath(rng_); + AssertFileInfo(fs(), dir3, FileType::NotFound); + auto subdir3 = ConcatAbstractPath(dir3, "subdir"); + AssertFileInfo(fs(), subdir3, FileType::NotFound); + // Creating subdir3 with recursive=false should fail. + EXPECT_RAISES_WITH_MESSAGE_THAT( + IOError, ::testing::HasSubstr("Path does not exist '" + dir3 + "'"), + fs()->CreateDir(subdir3, /*recursive=*/false)); + AssertFileInfo(fs(), dir3, FileType::NotFound); + AssertFileInfo(fs(), subdir3, FileType::NotFound); + // Creating subdir3 with recursive=true should work. + ASSERT_OK(fs()->CreateDir(subdir3, /*recursive=*/true)); + AssertFileInfo(fs(), dir3, FileType::Directory); + AssertFileInfo(fs(), subdir3, FileType::Directory); + + auto dir4 = data.RandomDirectoryPath(rng_); + auto subdir4 = ConcatAbstractPath(dir4, "subdir4"); + auto subdir5 = ConcatAbstractPath(dir4, "subdir4/subdir5"); + // Creating subdir4 with recursive=false should fail. + EXPECT_RAISES_WITH_MESSAGE_THAT( + IOError, ::testing::HasSubstr("Path does not exist '" + dir4 + "'"), + fs()->CreateDir(subdir4, /*recursive=*/false)); + AssertFileInfo(fs(), dir4, FileType::NotFound); + AssertFileInfo(fs(), subdir4, FileType::NotFound); + // Creating subdir5 with recursive=false should fail. + EXPECT_RAISES_WITH_MESSAGE_THAT( + IOError, ::testing::HasSubstr("Path does not exist '" + subdir4 + "'"), + fs()->CreateDir(subdir5, /*recursive=*/false)); + AssertFileInfo(fs(), dir4, FileType::NotFound); + AssertFileInfo(fs(), subdir4, FileType::NotFound); + AssertFileInfo(fs(), subdir5, FileType::NotFound); + // Creating subdir5 with recursive=true should work. + ASSERT_OK(fs()->CreateDir(subdir5, /*recursive=*/true)); + AssertFileInfo(fs(), dir4, FileType::Directory); + AssertFileInfo(fs(), subdir4, FileType::Directory); + AssertFileInfo(fs(), subdir5, FileType::Directory); + } + + void TestCreateDirOnMissingContainer() { + auto container1 = PreexistingData::RandomContainerName(rng_); + auto container2 = PreexistingData::RandomContainerName(rng_); + AssertFileInfo(fs(), container1, FileType::NotFound); + AssertFileInfo(fs(), container2, FileType::NotFound); + + auto dir1 = ConcatAbstractPath(container1, "dir"); + AssertFileInfo(fs(), dir1, FileType::NotFound); + // Creating dir1 with recursive=false should fail. + EXPECT_RAISES_WITH_MESSAGE_THAT( + IOError, ::testing::HasSubstr("Path does not exist '" + container1 + "'"), + fs()->CreateDir(dir1, /*recursive=*/false)); + AssertFileInfo(fs(), container1, FileType::NotFound); + AssertFileInfo(fs(), dir1, FileType::NotFound); + // Creating dir1 with recursive=true should work. + ASSERT_OK(fs()->CreateDir(dir1, /*recursive=*/true)); + AssertFileInfo(fs(), container1, FileType::Directory); + AssertFileInfo(fs(), dir1, FileType::Directory); + + auto dir2 = ConcatAbstractPath(container2, "dir"); + auto subdir2 = ConcatAbstractPath(dir2, "subdir2"); + auto subdir3 = ConcatAbstractPath(dir2, "subdir2/subdir3"); + // Creating dir2 with recursive=false should fail. + EXPECT_RAISES_WITH_MESSAGE_THAT( + IOError, ::testing::HasSubstr("Path does not exist '" + container2 + "'"), + fs()->CreateDir(dir2, /*recursive=*/false)); + AssertFileInfo(fs(), container2, FileType::NotFound); + AssertFileInfo(fs(), dir2, FileType::NotFound); + // Creating subdir2 with recursive=false should fail. + EXPECT_RAISES_WITH_MESSAGE_THAT( + IOError, ::testing::HasSubstr("Path does not exist '" + dir2 + "'"), + fs()->CreateDir(subdir2, /*recursive=*/false)); + AssertFileInfo(fs(), container2, FileType::NotFound); + AssertFileInfo(fs(), dir2, FileType::NotFound); + AssertFileInfo(fs(), subdir2, FileType::NotFound); + // Creating subdir3 with recursive=false should fail. + EXPECT_RAISES_WITH_MESSAGE_THAT( + IOError, ::testing::HasSubstr("Path does not exist '" + subdir2 + "'"), + fs()->CreateDir(subdir3, /*recursive=*/false)); + AssertFileInfo(fs(), container2, FileType::NotFound); + AssertFileInfo(fs(), dir2, FileType::NotFound); + AssertFileInfo(fs(), subdir2, FileType::NotFound); + AssertFileInfo(fs(), subdir3, FileType::NotFound); + // Creating subdir3 with recursive=true should work. + ASSERT_OK(fs()->CreateDir(subdir3, /*recursive=*/true)); + AssertFileInfo(fs(), container2, FileType::Directory); + AssertFileInfo(fs(), dir2, FileType::Directory); + AssertFileInfo(fs(), subdir2, FileType::Directory); + AssertFileInfo(fs(), subdir3, FileType::Directory); + } + void TestDeleteDirSuccessEmpty() { + if (HasSubmitBatchBug()) { + GTEST_SKIP() << kSubmitBatchBugMessage; + } auto data = SetUpPreexistingData(); const auto directory_path = data.RandomDirectoryPath(rng_); - if (WithHierarchicalNamespace()) { - ASSERT_OK(fs()->CreateDir(directory_path, true)); - AssertFileInfo(fs(), directory_path, FileType::Directory); - ASSERT_OK(fs()->DeleteDir(directory_path)); - AssertFileInfo(fs(), directory_path, FileType::NotFound); - } else { - // There is only virtual directory without hierarchical namespace - // support. So the CreateDir() and DeleteDir() do nothing. - ASSERT_OK(fs()->CreateDir(directory_path)); - AssertFileInfo(fs(), directory_path, FileType::NotFound); - ASSERT_OK(fs()->DeleteDir(directory_path)); - AssertFileInfo(fs(), directory_path, FileType::NotFound); - } + AssertFileInfo(fs(), directory_path, FileType::NotFound); + ASSERT_OK(fs()->CreateDir(directory_path, true)); + AssertFileInfo(fs(), directory_path, FileType::Directory); + ASSERT_OK(fs()->DeleteDir(directory_path)); + AssertFileInfo(fs(), directory_path, FileType::NotFound); } - void TestCreateDirSuccessContainerAndDirectory() { + void TestDeleteDirFailureNonexistent() { auto data = SetUpPreexistingData(); const auto path = data.RandomDirectoryPath(rng_); - ASSERT_OK(fs()->CreateDir(path, false)); - if (WithHierarchicalNamespace()) { - AssertFileInfo(fs(), path, FileType::Directory); - } else { - // There is only virtual directory without hierarchical namespace - // support. So the CreateDir() does nothing. - AssertFileInfo(fs(), path, FileType::NotFound); - } + ASSERT_RAISES(IOError, fs()->DeleteDir(path)); } - void TestCreateDirRecursiveSuccessContainerOnly() { - auto container_name = PreexistingData::RandomContainerName(rng_); - ASSERT_OK(fs()->CreateDir(container_name, true)); - AssertFileInfo(fs(), container_name, FileType::Directory); + void TestDeleteDirSuccessHaveBlob() { + if (HasSubmitBatchBug()) { + GTEST_SKIP() << kSubmitBatchBugMessage; + } + auto data = SetUpPreexistingData(); + const auto directory_path = data.RandomDirectoryPath(rng_); + const auto blob_path = ConcatAbstractPath(directory_path, "hello.txt"); + ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(blob_path)); + ASSERT_OK(output->Write("hello")); + ASSERT_OK(output->Close()); + AssertFileInfo(fs(), blob_path, FileType::File); + ASSERT_OK(fs()->DeleteDir(directory_path)); + AssertFileInfo(fs(), blob_path, FileType::NotFound); } - void TestCreateDirRecursiveSuccessDirectoryOnly() { + void TestDeleteDirSuccessHaveDirectory() { + if (HasSubmitBatchBug()) { + GTEST_SKIP() << kSubmitBatchBugMessage; + } auto data = SetUpPreexistingData(); const auto parent = data.RandomDirectoryPath(rng_); const auto path = ConcatAbstractPath(parent, "new-sub"); ASSERT_OK(fs()->CreateDir(path, true)); - if (WithHierarchicalNamespace()) { - AssertFileInfo(fs(), path, FileType::Directory); - AssertFileInfo(fs(), parent, FileType::Directory); - } else { - // There is only virtual directory without hierarchical namespace - // support. So the CreateDir() does nothing. - AssertFileInfo(fs(), path, FileType::NotFound); - AssertFileInfo(fs(), parent, FileType::NotFound); - } + AssertFileInfo(fs(), path, FileType::Directory); + AssertFileInfo(fs(), parent, FileType::Directory); + ASSERT_OK(fs()->DeleteDir(parent)); + AssertFileInfo(fs(), path, FileType::NotFound); + AssertFileInfo(fs(), parent, FileType::NotFound); } - void TestCreateDirRecursiveSuccessContainerAndDirectory() { - auto data = SetUpPreexistingData(); - const auto parent = data.RandomDirectoryPath(rng_); - const auto path = ConcatAbstractPath(parent, "new-sub"); - ASSERT_OK(fs()->CreateDir(path, true)); - if (WithHierarchicalNamespace()) { - AssertFileInfo(fs(), path, FileType::Directory); - AssertFileInfo(fs(), parent, FileType::Directory); - AssertFileInfo(fs(), data.container_name, FileType::Directory); - } else { - // There is only virtual directory without hierarchical namespace - // support. So the CreateDir() does nothing. - AssertFileInfo(fs(), path, FileType::NotFound); - AssertFileInfo(fs(), parent, FileType::NotFound); - AssertFileInfo(fs(), data.container_name, FileType::Directory); + void TestDeleteDirContentsSuccessExist() { + if (HasSubmitBatchBug()) { + GTEST_SKIP() << kSubmitBatchBugMessage; + } + auto preexisting_data = SetUpPreexistingData(); + HierarchicalPaths paths; + CreateHierarchicalData(&paths); + ASSERT_OK(fs()->DeleteDirContents(paths.directory)); + AssertFileInfo(fs(), paths.directory, FileType::Directory); + for (const auto& sub_path : paths.sub_paths) { + AssertFileInfo(fs(), sub_path, FileType::NotFound); } } void TestDeleteDirContentsSuccessNonexistent() { + if (HasSubmitBatchBug()) { + GTEST_SKIP() << kSubmitBatchBugMessage; + } auto data = SetUpPreexistingData(); const auto directory_path = data.RandomDirectoryPath(rng_); ASSERT_OK(fs()->DeleteDirContents(directory_path, true)); @@ -662,7 +843,7 @@ class TestAzureFileSystem : public ::testing::Test { void TestAzureFileSystem::TestDetectHierarchicalNamespace(bool trip_up_azurite) { EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv()); if (trip_up_azurite && env->backend() != AzureBackend::kAzurite) { - GTEST_SKIP() << "trip_up_azurite=true is only for Azurite."; + return; } auto data = SetUpPreexistingData(); @@ -704,22 +885,6 @@ void TestAzureFileSystem::TestDetectHierarchicalNamespaceOnMissingContainer() { } } -void TestAzureFileSystem::TestGetFileInfoObject() { - auto data = SetUpPreexistingData(); - auto object_properties = - blob_service_client_->GetBlobContainerClient(data.container_name) - .GetBlobClient(data.kObjectName) - .GetProperties() - .Value; - - AssertFileInfo(fs(), data.ObjectPath(), FileType::File, - std::chrono::system_clock::time_point{object_properties.LastModified}, - static_cast(object_properties.BlobSize)); - - // URI - ASSERT_RAISES(Invalid, fs()->GetFileInfo("abfs://" + std::string{data.kObjectName})); -} - void TestAzureFileSystem::TestGetFileInfoObjectWithNestedStructure() { auto data = SetUpPreexistingData(); // Adds detailed tests to handle cases of different edge cases @@ -855,6 +1020,16 @@ TYPED_TEST(TestAzureFileSystemOnAllEnvs, DetectHierarchicalNamespaceOnMissingCon this->TestDetectHierarchicalNamespaceOnMissingContainer(); } +TYPED_TEST(TestAzureFileSystemOnAllEnvs, GetFileInfoOfRoot) { + this->TestGetFileInfoOfRoot(); +} + +TYPED_TEST(TestAzureFileSystemOnAllEnvs, CreateDirWithEmptyPath) { + ASSERT_RAISES(Invalid, this->fs()->CreateDir("", false)); +} + +TYPED_TEST(TestAzureFileSystemOnAllEnvs, CreateDirOnRoot) { this->TestCreateDirOnRoot(); } + // Tests using all the 3 environments (Azurite, Azure w/o HNS (flat), Azure w/ HNS) // combined with the two scenarios for AzureFileSystem::cached_hns_support_ -- unknown and // known according to the environment. @@ -869,105 +1044,56 @@ using AllScenarios = ::testing::Types< TYPED_TEST_SUITE(TestAzureFileSystemOnAllScenarios, AllScenarios); -TYPED_TEST(TestAzureFileSystemOnAllScenarios, GetFileInfoObject) { - this->TestGetFileInfoObject(); +TYPED_TEST(TestAzureFileSystemOnAllScenarios, GetFileInfoOnExistingContainer) { + this->TestGetFileInfoOnExistingContainer(); } -TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirSuccessEmpty) { - this->TestDeleteDirSuccessEmpty(); +TYPED_TEST(TestAzureFileSystemOnAllScenarios, GetFileInfoOnMissingContainer) { + this->TestGetFileInfoOnMissingContainer(); } TYPED_TEST(TestAzureFileSystemOnAllScenarios, GetFileInfoObjectWithNestedStructure) { this->TestGetFileInfoObjectWithNestedStructure(); } -TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirSuccessContainerAndDirectory) { - this->TestCreateDirSuccessContainerAndDirectory(); +TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirOnExistingContainer) { + this->TestCreateDirOnExistingContainer(); } -TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirRecursiveSuccessContainerOnly) { - this->TestCreateDirRecursiveSuccessContainerOnly(); +TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirOnMissingContainer) { + this->TestCreateDirOnMissingContainer(); } -TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirRecursiveSuccessDirectoryOnly) { - this->TestCreateDirRecursiveSuccessDirectoryOnly(); +TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirSuccessEmpty) { + this->TestDeleteDirSuccessEmpty(); } -TYPED_TEST(TestAzureFileSystemOnAllScenarios, - CreateDirRecursiveSuccessContainerAndDirectory) { - this->TestCreateDirRecursiveSuccessContainerAndDirectory(); +TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirFailureNonexistent) { + this->TestDeleteDirFailureNonexistent(); } -// Tests using a real storage account *with Hierarchical Namespace enabled* - -TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirFailureNonexistent) { - auto data = SetUpPreexistingData(); - const auto path = data.RandomDirectoryPath(rng_); - ASSERT_RAISES(IOError, fs()->DeleteDir(path)); +TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirSuccessHaveBlob) { + this->TestDeleteDirSuccessHaveBlob(); } -TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirSuccessHaveBlob) { - auto data = SetUpPreexistingData(); - const auto directory_path = data.RandomDirectoryPath(rng_); - const auto blob_path = ConcatAbstractPath(directory_path, "hello.txt"); - ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(blob_path)); - ASSERT_OK(output->Write(std::string_view("hello"))); - ASSERT_OK(output->Close()); - AssertFileInfo(fs(), blob_path, FileType::File); - ASSERT_OK(fs()->DeleteDir(directory_path)); - AssertFileInfo(fs(), blob_path, FileType::NotFound); +TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirSuccessHaveDirectory) { + this->TestDeleteDirSuccessHaveDirectory(); } -TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirSuccessHaveDirectory) { - auto data = SetUpPreexistingData(); - const auto parent = data.RandomDirectoryPath(rng_); - const auto path = ConcatAbstractPath(parent, "new-sub"); - ASSERT_OK(fs()->CreateDir(path, true)); - AssertFileInfo(fs(), path, FileType::Directory); - AssertFileInfo(fs(), parent, FileType::Directory); - ASSERT_OK(fs()->DeleteDir(parent)); - AssertFileInfo(fs(), path, FileType::NotFound); - AssertFileInfo(fs(), parent, FileType::NotFound); -} - -TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsSuccessExist) { - auto preexisting_data = SetUpPreexistingData(); - HierarchicalPaths paths; - CreateHierarchicalData(&paths); - ASSERT_OK(fs()->DeleteDirContents(paths.directory)); - AssertFileInfo(fs(), paths.directory, FileType::Directory); - for (const auto& sub_path : paths.sub_paths) { - AssertFileInfo(fs(), sub_path, FileType::NotFound); - } +TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirContentsSuccessExist) { + this->TestDeleteDirContentsSuccessExist(); } -TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsSuccessNonexistent) { +TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirContentsSuccessNonexistent) { this->TestDeleteDirContentsSuccessNonexistent(); } -TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsFailureNonexistent) { +TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirContentsFailureNonexistent) { this->TestDeleteDirContentsFailureNonexistent(); } // Tests using Azurite (the local Azure emulator) -TEST_F(TestAzuriteFileSystem, GetFileInfoAccount) { - AssertFileInfo(fs(), "", FileType::Directory); - - // URI - ASSERT_RAISES(Invalid, fs()->GetFileInfo("abfs://")); -} - -TEST_F(TestAzuriteFileSystem, GetFileInfoContainer) { - auto data = SetUpPreexistingData(); - AssertFileInfo(fs(), data.container_name, FileType::Directory); - - AssertFileInfo(fs(), "nonexistent-container", FileType::NotFound); - - // URI - ASSERT_RAISES(Invalid, fs()->GetFileInfo("abfs://" + data.container_name)); -} - TEST_F(TestAzuriteFileSystem, GetFileInfoSelector) { SetUpSmallFileSystemTree(); @@ -1141,16 +1267,6 @@ TEST_F(TestAzuriteFileSystem, GetFileInfoSelectorExplicitImplicitDirDedup) { AssertFileInfo(infos[0], "container/mydir/nonemptydir2/somefile", FileType::File); } -TEST_F(TestAzuriteFileSystem, CreateDirFailureNoContainer) { - ASSERT_RAISES(Invalid, fs()->CreateDir("", false)); -} - -TEST_F(TestAzuriteFileSystem, CreateDirSuccessContainerOnly) { - auto container_name = PreexistingData::RandomContainerName(rng_); - ASSERT_OK(fs()->CreateDir(container_name, false)); - AssertFileInfo(fs(), container_name, FileType::Directory); -} - TEST_F(TestAzuriteFileSystem, CreateDirFailureDirectoryWithMissingContainer) { const auto path = std::string("not-a-container/new-directory"); ASSERT_RAISES(IOError, fs()->CreateDir(path, false)); @@ -1175,19 +1291,20 @@ TEST_F(TestAzuriteFileSystem, DeleteDirSuccessContainer) { } TEST_F(TestAzuriteFileSystem, DeleteDirSuccessNonexistent) { + if (HasSubmitBatchBug()) { + GTEST_SKIP() << kSubmitBatchBugMessage; + } auto data = SetUpPreexistingData(); const auto directory_path = data.RandomDirectoryPath(rng_); - // There is only virtual directory without hierarchical namespace - // support. So the DeleteDir() for nonexistent directory does nothing. - ASSERT_OK(fs()->DeleteDir(directory_path)); + // DeleteDir() fails if the directory doesn't exist. + ASSERT_RAISES(IOError, fs()->DeleteDir(directory_path)); AssertFileInfo(fs(), directory_path, FileType::NotFound); } TEST_F(TestAzuriteFileSystem, DeleteDirSuccessHaveBlobs) { -#ifdef __APPLE__ - GTEST_SKIP() << "This test fails by an Azurite problem: " - "https://github.com/Azure/Azurite/pull/2302"; -#endif + if (HasSubmitBatchBug()) { + GTEST_SKIP() << kSubmitBatchBugMessage; + } auto data = SetUpPreexistingData(); const auto directory_path = data.RandomDirectoryPath(rng_); // We must use 257 or more blobs here to test pagination of ListBlobs(). @@ -1213,10 +1330,9 @@ TEST_F(TestAzuriteFileSystem, DeleteDirUri) { } TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessContainer) { -#ifdef __APPLE__ - GTEST_SKIP() << "This test fails by an Azurite problem: " - "https://github.com/Azure/Azurite/pull/2302"; -#endif + if (HasSubmitBatchBug()) { + GTEST_SKIP() << kSubmitBatchBugMessage; + } auto data = SetUpPreexistingData(); HierarchicalPaths paths; CreateHierarchicalData(&paths); @@ -1229,16 +1345,14 @@ TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessContainer) { } TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessDirectory) { -#ifdef __APPLE__ - GTEST_SKIP() << "This test fails by an Azurite problem: " - "https://github.com/Azure/Azurite/pull/2302"; -#endif + if (HasSubmitBatchBug()) { + GTEST_SKIP() << kSubmitBatchBugMessage; + } auto data = SetUpPreexistingData(); HierarchicalPaths paths; CreateHierarchicalData(&paths); ASSERT_OK(fs()->DeleteDirContents(paths.directory)); - // GH-38772: We may change this to FileType::Directory. - AssertFileInfo(fs(), paths.directory, FileType::NotFound); + AssertFileInfo(fs(), paths.directory, FileType::Directory); for (const auto& sub_path : paths.sub_paths) { AssertFileInfo(fs(), sub_path, FileType::NotFound); }