-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GH-41034: [C++][FS][Azure] Adjust DeleteDir/DeleteDirContents/GetFileInfoSelector behaviors against Azure for generic filesystem tests #41068
Changes from 2 commits
741397c
a6758b1
707712b
3ac1198
ff8d266
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -1642,11 +1642,27 @@ class AzureFileSystem::Impl { | |||||||
options.Prefix = {}; | ||||||||
found = true; // Unless the container itself is not found later! | ||||||||
} else { | ||||||||
options.Prefix = internal::EnsureTrailingSlash(base_location.path); | ||||||||
ARROW_ASSIGN_OR_RAISE( | ||||||||
auto prefix, AzureLocation::FromString( | ||||||||
std::string(internal::EnsureTrailingSlash(select.base_dir)))); | ||||||||
ARROW_ASSIGN_OR_RAISE(auto info, GetFileInfo(container_client, prefix)); | ||||||||
if (info.type() == FileType::NotFound) { | ||||||||
if (select.allow_not_found) { | ||||||||
return Status::OK(); | ||||||||
} else { | ||||||||
return PathNotFound(base_location); | ||||||||
} | ||||||||
} else if (info.type() != FileType::Directory) { | ||||||||
return NotADir(base_location); | ||||||||
} | ||||||||
options.Prefix = prefix.path; | ||||||||
} | ||||||||
options.PageSizeHint = page_size_hint; | ||||||||
options.Include = Blobs::Models::ListBlobsIncludeFlags::Metadata; | ||||||||
|
||||||||
auto adlfs_client = GetFileSystemClient(base_location.container); | ||||||||
ARROW_ASSIGN_OR_RAISE(auto hns_support, HierarchicalNamespaceSupport(adlfs_client)); | ||||||||
|
||||||||
auto recurse = [&](const std::string& blob_prefix) noexcept -> Status { | ||||||||
if (select.recursive && select.max_recursion > 0) { | ||||||||
FileSelector sub_select; | ||||||||
|
@@ -1671,7 +1687,15 @@ class AzureFileSystem::Impl { | |||||||
}; | ||||||||
auto process_prefix = [&](const std::string& prefix) noexcept -> Status { | ||||||||
const auto path = internal::ConcatAbstractPath(base_location.container, prefix); | ||||||||
acc_results->push_back(DirectoryFileInfoFromPath(path)); | ||||||||
if (hns_support == HNSSupport::kEnabled) { | ||||||||
ARROW_ASSIGN_OR_RAISE( | ||||||||
auto location, | ||||||||
AzureLocation::FromString(std::string(internal::RemoveTrailingSlash(path)))); | ||||||||
ARROW_ASSIGN_OR_RAISE(auto info, GetFileInfo(adlfs_client, location)); | ||||||||
acc_results->push_back(std::move(info)); | ||||||||
} else { | ||||||||
acc_results->push_back(DirectoryFileInfoFromPath(path)); | ||||||||
} | ||||||||
return recurse(prefix); | ||||||||
}; | ||||||||
|
||||||||
|
@@ -2157,6 +2181,17 @@ class AzureFileSystem::Impl { | |||||||
Azure::Nullable<std::string> lease_id = {}) { | ||||||||
DCHECK(!location.container.empty()); | ||||||||
DCHECK(!location.path.empty()); | ||||||||
ARROW_ASSIGN_OR_RAISE(auto file_info, GetFileInfo(adlfs_client, location)); | ||||||||
if (file_info.type() == FileType::NotFound) { | ||||||||
if (require_dir_to_exist) { | ||||||||
return PathNotFound(location); | ||||||||
} else { | ||||||||
return Status::OK(); | ||||||||
} | ||||||||
} | ||||||||
if (file_info.type() != FileType::Directory) { | ||||||||
return NotADir(location); | ||||||||
} | ||||||||
auto directory_client = adlfs_client.GetDirectoryClient( | ||||||||
std::string(internal::RemoveTrailingSlash(location.path))); | ||||||||
DataLake::DeleteDirectoryOptions options; | ||||||||
|
@@ -2168,13 +2203,6 @@ class AzureFileSystem::Impl { | |||||||
// All the others either succeed or throw an exception. | ||||||||
DCHECK(response.Value.Deleted); | ||||||||
} catch (const Storage::StorageException& exception) { | ||||||||
if (exception.ErrorCode == "FilesystemNotFound" || | ||||||||
exception.ErrorCode == "PathNotFound") { | ||||||||
if (require_dir_to_exist) { | ||||||||
return PathNotFound(location); | ||||||||
} | ||||||||
return Status::OK(); | ||||||||
} | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The fact that you check existence above doesn't mean these errors can't happen down here. To use terms that @pitrou once shared with me [1], you're moving from EAFP [2] "easier to ask for permission" to LBYL [3] "look before you leap". From [3]:
[1] https://devblogs.microsoft.com/python/idiomatic-python-eafp-versus-lbyl/ There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given the TZ differences, I will give myself a chance of fixing these bugs to see if I can come up with something that doesn't introduce pessimistic round trips to the backend. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. I'll revert this for now. FYI: We want to report a
arrow/cpp/src/arrow/filesystem/test_util.cc Lines 274 to 276 in cd607d0
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we block changing the target location from other connections by using diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index df6db3d959..67232b3156 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -2181,7 +2181,7 @@ class AzureFileSystem::Impl {
Azure::Nullable<std::string> lease_id = {}) {
DCHECK(!location.container.empty());
DCHECK(!location.path.empty());
- ARROW_ASSIGN_OR_RAISE(auto file_info, GetFileInfo(adlfs_client, location));
+ ARROW_ASSIGN_OR_RAISE(auto file_info, GetFileInfo(adlfs_client, location, lease_id));
if (file_info.type() == FileType::NotFound) {
if (require_dir_to_exist) {
return PathNotFound(location); There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This scenario sounds familiar. 🤔 Does that happen even if you append a trailing slash to the path? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we add a trailing slash, API returns an error...:
|
||||||||
return ExceptionToStatus(exception, "Failed to delete a directory: ", location.path, | ||||||||
": ", directory_client.GetUrl()); | ||||||||
} | ||||||||
|
@@ -2200,6 +2228,9 @@ class AzureFileSystem::Impl { | |||||||
kDelimiter, path.Name, ": ", sub_directory_client.GetUrl()); | ||||||||
} | ||||||||
} else { | ||||||||
if (path.Name == location.path) { | ||||||||
return NotADir(location); | ||||||||
} | ||||||||
auto sub_file_client = adlfs_client.GetFileClient(path.Name); | ||||||||
try { | ||||||||
sub_file_client.Delete(); | ||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This fix is very confusing to me. This function is
GetFileInfoWithSelectorFromContainer
, it's meant to be called when HNS support has been detected as disabled.The right fix (IMO) is to create
GetFileInfoWithSelectorFromFileSystem(adlfs_client, ...)
and then inGetFileInfoWithSelector
dispatch to the different implementations according to the HNS support detection.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can understand what you say but the current implementation doesn't:
GetFileInfoWithSelector()
isn't dispatched based on HNS support:arrow/cpp/src/arrow/filesystem/azurefs.cc
Lines 2859 to 2866 in cd607d0
arrow/cpp/src/arrow/filesystem/azurefs.cc
Lines 1740 to 1779 in cd607d0
The
FromContainer
suffix doesn't mean only for no HNS support. We useOnContainer
/OnFileSystem
suffix for it.FromContainer
just means that it's for one container and it doesn't work with multiple containers.Should we create
GetFileInfoWithSelectorFromContainerOn{Container,FileSystem}()
? But their names are too long...There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But what I meant was changing/fixing
GetFileInfoWithSelector
to do that.ADLFS calls "filesystem" what "blobs" calls "container", so you replace the last word of the name:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wow! I didn't notice it!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Implemented.