From 4565552190b506d648b7a2396d0cf6de7641cf11 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Tue, 13 Feb 2024 18:15:10 +0000 Subject: [PATCH] GH-40052: [C++][FS][Azure] Fix CreateDir and DeleteDir trailing slash issues on hierarchical namespace accounts (#40054) ### Rationale for this change There were the following failure cases ``` fs->CreateDir("directory/") ``` ``` fs->DeleteDir("directory/") ``` They fail with ``` Failed to delete a directory: directory/: https://tomtesthns.blob.core.windows.net/ea119933-c9d3-11ee-989a-71cec6336ac8/directory/ Azure Error: [InvalidUri] 400 The request URI is invalid. The request URI is invalid. RequestId:c9ad826a-101f-0005-5be0-5d0db4000000 Time:2024-02-12T18:24:12.9974541Z Request ID: c9ad826a-101f-0005-5be0-5d0db4000000 ``` ### What changes are included in this PR? Add tests to cover these cases. Remove trailing slashes to avoid these issues. ### Are these changes tested? Yes. I added new test cases to cover these cases. I was rather torn about whether to add precise tests like I've done or to duplicate every test case we had and run it a second time with trailing slashes. ### Are there any user-facing changes? Fixed bug on `CreateDir` and `DeleteDir`. * Closes: #40052 Authored-by: Thomas Newton Signed-off-by: Felipe Oliveira Carvalho --- cpp/src/arrow/filesystem/azurefs.cc | 6 ++- cpp/src/arrow/filesystem/azurefs_test.cc | 64 ++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index d4bb445701444..11750591932e9 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -1635,7 +1635,8 @@ class AzureFileSystem::Impl { return CreateDirTemplate( adlfs_client, [](const auto& adlfs_client, const auto& location) { - auto directory_client = adlfs_client.GetDirectoryClient(location.path); + auto directory_client = adlfs_client.GetDirectoryClient( + std::string(internal::RemoveTrailingSlash(location.path))); directory_client.CreateIfNotExists(); }, location, recursive); @@ -1860,7 +1861,8 @@ class AzureFileSystem::Impl { Azure::Nullable lease_id = {}) { DCHECK(!location.container.empty()); DCHECK(!location.path.empty()); - auto directory_client = adlfs_client.GetDirectoryClient(location.path); + auto directory_client = adlfs_client.GetDirectoryClient( + std::string(internal::RemoveTrailingSlash(location.path))); DataLake::DeleteDirectoryOptions options; options.AccessConditions.LeaseId = std::move(lease_id); try { diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index c39a5b7d22bdd..42f38f1ed6ac7 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -698,6 +698,14 @@ class TestAzureFileSystem : public ::testing::Test { AssertFileInfo(fs(), dir1, FileType::Directory); } + void TestCreateDirOnRootWithTrailingSlash() { + auto dir1 = PreexistingData::RandomContainerName(rng_) + "/"; + + AssertFileInfo(fs(), dir1, FileType::NotFound); + ASSERT_OK(fs()->CreateDir(dir1, false)); + AssertFileInfo(fs(), dir1, FileType::Directory); + } + void TestCreateDirOnExistingContainer() { auto data = SetUpPreexistingData(); auto dir1 = data.RandomDirectoryPath(rng_); @@ -758,6 +766,15 @@ class TestAzureFileSystem : public ::testing::Test { AssertFileInfo(fs(), subdir5, FileType::Directory); } + void TestCreateDirOnExistingContainerWithTrailingSlash() { + auto data = SetUpPreexistingData(); + auto dir1 = data.RandomDirectoryPath(rng_) + "/"; + + AssertFileInfo(fs(), dir1, FileType::NotFound); + ASSERT_OK(fs()->CreateDir(dir1, /*recursive=*/false)); + AssertFileInfo(fs(), dir1, FileType::Directory); + } + void TestCreateDirOnMissingContainer() { auto container1 = PreexistingData::RandomContainerName(rng_); auto container2 = PreexistingData::RandomContainerName(rng_); @@ -844,6 +861,21 @@ class TestAzureFileSystem : public ::testing::Test { AssertFileInfo(fs(), blob_path, FileType::NotFound); } + void TestNonEmptyDirWithTrailingSlash() { + if (HasSubmitBatchBug()) { + GTEST_SKIP() << kSubmitBatchBugMessage; + } + auto data = SetUpPreexistingData(); + const auto directory_path = data.RandomDirectoryPath(rng_); + const auto blob_path = ConcatAbstractPath(directory_path, "hello.txt"); + ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(blob_path)); + ASSERT_OK(output->Write("hello")); + ASSERT_OK(output->Close()); + AssertFileInfo(fs(), blob_path, FileType::File); + ASSERT_OK(fs()->DeleteDir(directory_path + "/")); + AssertFileInfo(fs(), blob_path, FileType::NotFound); + } + void TestDeleteDirSuccessHaveDirectory() { if (HasSubmitBatchBug()) { GTEST_SKIP() << kSubmitBatchBugMessage; @@ -873,6 +905,20 @@ class TestAzureFileSystem : public ::testing::Test { } } + void TestDeleteDirContentsSuccessExistWithTrailingSlash() { + if (HasSubmitBatchBug()) { + GTEST_SKIP() << kSubmitBatchBugMessage; + } + auto preexisting_data = SetUpPreexistingData(); + HierarchicalPaths paths; + CreateHierarchicalData(&paths); + ASSERT_OK(fs()->DeleteDirContents(paths.directory + "/")); + AssertFileInfo(fs(), paths.directory, FileType::Directory); + for (const auto& sub_path : paths.sub_paths) { + AssertFileInfo(fs(), sub_path, FileType::NotFound); + } + } + void TestDeleteDirContentsSuccessNonexistent() { if (HasSubmitBatchBug()) { GTEST_SKIP() << kSubmitBatchBugMessage; @@ -1466,6 +1512,10 @@ TYPED_TEST(TestAzureFileSystemOnAllEnvs, CreateDirWithEmptyPath) { TYPED_TEST(TestAzureFileSystemOnAllEnvs, CreateDirOnRoot) { this->TestCreateDirOnRoot(); } +TYPED_TEST(TestAzureFileSystemOnAllEnvs, CreateDirOnRootWithTrailingSlash) { + this->TestCreateDirOnRootWithTrailingSlash(); +} + // Tests using all the 3 environments (Azurite, Azure w/o HNS (flat), Azure w/ HNS) // combined with the two scenarios for AzureFileSystem::cached_hns_support_ -- unknown and // known according to the environment. @@ -1496,6 +1546,11 @@ TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirOnExistingContainer) { this->TestCreateDirOnExistingContainer(); } +TYPED_TEST(TestAzureFileSystemOnAllScenarios, + CreateDirOnExistingContainerWithTrailingSlash) { + this->TestCreateDirOnExistingContainerWithTrailingSlash(); +} + TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirOnMissingContainer) { this->TestCreateDirOnMissingContainer(); } @@ -1512,6 +1567,10 @@ TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirSuccessHaveBlob) { this->TestDeleteDirSuccessHaveBlob(); } +TYPED_TEST(TestAzureFileSystemOnAllScenarios, NonEmptyDirWithTrailingSlash) { + this->TestNonEmptyDirWithTrailingSlash(); +} + TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirSuccessHaveDirectory) { this->TestDeleteDirSuccessHaveDirectory(); } @@ -1520,6 +1579,11 @@ TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirContentsSuccessExist) { this->TestDeleteDirContentsSuccessExist(); } +TYPED_TEST(TestAzureFileSystemOnAllScenarios, + DeleteDirContentsSuccessExistWithTrailingSlash) { + this->TestDeleteDirContentsSuccessExistWithTrailingSlash(); +} + TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirContentsSuccessNonexistent) { this->TestDeleteDirContentsSuccessNonexistent(); }