Skip to content

Commit

Permalink
apacheGH-40052: [C++][FS][Azure] Fix CreateDir and DeleteDir trailing…
Browse files Browse the repository at this point in the history
… slash issues on hierarchical namespace accounts (apache#40054)

### Rationale for this change
There were the following failure cases
```
fs->CreateDir("directory/")
``` 
```
fs->DeleteDir("directory/")
``` 
They fail with 
```
Failed to delete a directory: directory/: https://tomtesthns.blob.core.windows.net/ea119933-c9d3-11ee-989a-71cec6336ac8/directory/ Azure Error: [InvalidUri] 400 The request URI is invalid.
The request URI is invalid.
RequestId:c9ad826a-101f-0005-5be0-5d0db4000000
Time:2024-02-12T18:24:12.9974541Z
Request ID: c9ad826a-101f-0005-5be0-5d0db4000000
```

### What changes are included in this PR?
Add tests to cover these cases. 
Remove trailing slashes to avoid these issues. 

### Are these changes tested?
Yes. I added new test cases to cover these cases. I was rather torn about whether to add precise tests like I've done or to duplicate every test case we had and run it a second time with trailing slashes.

### Are there any user-facing changes?
Fixed bug on `CreateDir` and `DeleteDir`.

* Closes: apache#40052

Authored-by: Thomas Newton <[email protected]>
Signed-off-by: Felipe Oliveira Carvalho <[email protected]>
  • Loading branch information
Tom-Newton authored and zanmato1984 committed Feb 28, 2024
1 parent 4872f49 commit 4565552
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 2 deletions.
6 changes: 4 additions & 2 deletions cpp/src/arrow/filesystem/azurefs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1635,7 +1635,8 @@ class AzureFileSystem::Impl {
return CreateDirTemplate(
adlfs_client,
[](const auto& adlfs_client, const auto& location) {
auto directory_client = adlfs_client.GetDirectoryClient(location.path);
auto directory_client = adlfs_client.GetDirectoryClient(
std::string(internal::RemoveTrailingSlash(location.path)));
directory_client.CreateIfNotExists();
},
location, recursive);
Expand Down Expand Up @@ -1860,7 +1861,8 @@ class AzureFileSystem::Impl {
Azure::Nullable<std::string> lease_id = {}) {
DCHECK(!location.container.empty());
DCHECK(!location.path.empty());
auto directory_client = adlfs_client.GetDirectoryClient(location.path);
auto directory_client = adlfs_client.GetDirectoryClient(
std::string(internal::RemoveTrailingSlash(location.path)));
DataLake::DeleteDirectoryOptions options;
options.AccessConditions.LeaseId = std::move(lease_id);
try {
Expand Down
64 changes: 64 additions & 0 deletions cpp/src/arrow/filesystem/azurefs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,14 @@ class TestAzureFileSystem : public ::testing::Test {
AssertFileInfo(fs(), dir1, FileType::Directory);
}

void TestCreateDirOnRootWithTrailingSlash() {
auto dir1 = PreexistingData::RandomContainerName(rng_) + "/";

AssertFileInfo(fs(), dir1, FileType::NotFound);
ASSERT_OK(fs()->CreateDir(dir1, false));
AssertFileInfo(fs(), dir1, FileType::Directory);
}

void TestCreateDirOnExistingContainer() {
auto data = SetUpPreexistingData();
auto dir1 = data.RandomDirectoryPath(rng_);
Expand Down Expand Up @@ -758,6 +766,15 @@ class TestAzureFileSystem : public ::testing::Test {
AssertFileInfo(fs(), subdir5, FileType::Directory);
}

void TestCreateDirOnExistingContainerWithTrailingSlash() {
auto data = SetUpPreexistingData();
auto dir1 = data.RandomDirectoryPath(rng_) + "/";

AssertFileInfo(fs(), dir1, FileType::NotFound);
ASSERT_OK(fs()->CreateDir(dir1, /*recursive=*/false));
AssertFileInfo(fs(), dir1, FileType::Directory);
}

void TestCreateDirOnMissingContainer() {
auto container1 = PreexistingData::RandomContainerName(rng_);
auto container2 = PreexistingData::RandomContainerName(rng_);
Expand Down Expand Up @@ -844,6 +861,21 @@ class TestAzureFileSystem : public ::testing::Test {
AssertFileInfo(fs(), blob_path, FileType::NotFound);
}

void TestNonEmptyDirWithTrailingSlash() {
if (HasSubmitBatchBug()) {
GTEST_SKIP() << kSubmitBatchBugMessage;
}
auto data = SetUpPreexistingData();
const auto directory_path = data.RandomDirectoryPath(rng_);
const auto blob_path = ConcatAbstractPath(directory_path, "hello.txt");
ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(blob_path));
ASSERT_OK(output->Write("hello"));
ASSERT_OK(output->Close());
AssertFileInfo(fs(), blob_path, FileType::File);
ASSERT_OK(fs()->DeleteDir(directory_path + "/"));
AssertFileInfo(fs(), blob_path, FileType::NotFound);
}

void TestDeleteDirSuccessHaveDirectory() {
if (HasSubmitBatchBug()) {
GTEST_SKIP() << kSubmitBatchBugMessage;
Expand Down Expand Up @@ -873,6 +905,20 @@ class TestAzureFileSystem : public ::testing::Test {
}
}

void TestDeleteDirContentsSuccessExistWithTrailingSlash() {
if (HasSubmitBatchBug()) {
GTEST_SKIP() << kSubmitBatchBugMessage;
}
auto preexisting_data = SetUpPreexistingData();
HierarchicalPaths paths;
CreateHierarchicalData(&paths);
ASSERT_OK(fs()->DeleteDirContents(paths.directory + "/"));
AssertFileInfo(fs(), paths.directory, FileType::Directory);
for (const auto& sub_path : paths.sub_paths) {
AssertFileInfo(fs(), sub_path, FileType::NotFound);
}
}

void TestDeleteDirContentsSuccessNonexistent() {
if (HasSubmitBatchBug()) {
GTEST_SKIP() << kSubmitBatchBugMessage;
Expand Down Expand Up @@ -1466,6 +1512,10 @@ TYPED_TEST(TestAzureFileSystemOnAllEnvs, CreateDirWithEmptyPath) {

TYPED_TEST(TestAzureFileSystemOnAllEnvs, CreateDirOnRoot) { this->TestCreateDirOnRoot(); }

TYPED_TEST(TestAzureFileSystemOnAllEnvs, CreateDirOnRootWithTrailingSlash) {
this->TestCreateDirOnRootWithTrailingSlash();
}

// Tests using all the 3 environments (Azurite, Azure w/o HNS (flat), Azure w/ HNS)
// combined with the two scenarios for AzureFileSystem::cached_hns_support_ -- unknown and
// known according to the environment.
Expand Down Expand Up @@ -1496,6 +1546,11 @@ TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirOnExistingContainer) {
this->TestCreateDirOnExistingContainer();
}

TYPED_TEST(TestAzureFileSystemOnAllScenarios,
CreateDirOnExistingContainerWithTrailingSlash) {
this->TestCreateDirOnExistingContainerWithTrailingSlash();
}

TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirOnMissingContainer) {
this->TestCreateDirOnMissingContainer();
}
Expand All @@ -1512,6 +1567,10 @@ TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirSuccessHaveBlob) {
this->TestDeleteDirSuccessHaveBlob();
}

TYPED_TEST(TestAzureFileSystemOnAllScenarios, NonEmptyDirWithTrailingSlash) {
this->TestNonEmptyDirWithTrailingSlash();
}

TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirSuccessHaveDirectory) {
this->TestDeleteDirSuccessHaveDirectory();
}
Expand All @@ -1520,6 +1579,11 @@ TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirContentsSuccessExist) {
this->TestDeleteDirContentsSuccessExist();
}

TYPED_TEST(TestAzureFileSystemOnAllScenarios,
DeleteDirContentsSuccessExistWithTrailingSlash) {
this->TestDeleteDirContentsSuccessExistWithTrailingSlash();
}

TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirContentsSuccessNonexistent) {
this->TestDeleteDirContentsSuccessNonexistent();
}
Expand Down

0 comments on commit 4565552

Please sign in to comment.