Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-41034: [C++][FS][Azure] Adjust DeleteDir/DeleteDirContents/GetFileInfoSelector behaviors against Azure for generic filesystem tests #41068

Merged
merged 5 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 82 additions & 1 deletion cpp/src/arrow/filesystem/azurefs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1179,6 +1179,15 @@ Status CreateContainerIfNotExists(const std::string& container_name,
}
}

FileInfo FileInfoFromPath(std::string_view container,
const DataLake::Models::PathItem& path) {
FileInfo info{internal::ConcatAbstractPath(container, path.Name),
path.IsDirectory ? FileType::Directory : FileType::File};
info.set_size(path.FileSize);
info.set_mtime(std::chrono::system_clock::time_point{path.LastModified});
return info;
}

FileInfo DirectoryFileInfoFromPath(std::string_view path) {
return FileInfo{std::string{internal::RemoveTrailingSlash(path)}, FileType::Directory};
}
Expand Down Expand Up @@ -1576,7 +1585,7 @@ class AzureFileSystem::Impl {
}

private:
/// \pref location.container is not empty.
/// \pre location.container is not empty.
template <typename ContainerClient>
Status CheckDirExists(const ContainerClient& container_client,
const AzureLocation& location) {
Expand Down Expand Up @@ -1623,6 +1632,50 @@ class AzureFileSystem::Impl {
return result;
}

/// \brief List the paths at the root of a filesystem or some dir in a filesystem.
///
/// \pre adlfs_client is the client for the filesystem named like the first
/// segment of select.base_dir.
Status GetFileInfoWithSelectorFromFileSystem(
const DataLake::DataLakeFileSystemClient& adlfs_client,
const Core::Context& context, Azure::Nullable<int32_t> page_size_hint,
const FileSelector& select, FileInfoVector* acc_results) {
ARROW_ASSIGN_OR_RAISE(auto base_location, AzureLocation::FromString(select.base_dir));

auto directory_client = adlfs_client.GetDirectoryClient(base_location.path);
bool found = false;
DataLake::ListPathsOptions options;
options.PageSizeHint = page_size_hint;

try {
auto list_response = directory_client.ListPaths(select.recursive, options, context);
for (; list_response.HasPage(); list_response.MoveToNextPage(context)) {
if (list_response.Paths.empty()) {
continue;
}
found = true;
for (const auto& path : list_response.Paths) {
if (path.Name == base_location.path && !path.IsDirectory) {
return NotADir(base_location);
}
acc_results->push_back(FileInfoFromPath(base_location.container, path));
}
}
} catch (const Storage::StorageException& exception) {
if (IsContainerNotFound(exception) || exception.ErrorCode == "PathNotFound") {
found = false;
} else {
return ExceptionToStatus(exception,
"Failed to list paths in a directory: ", select.base_dir,
": ", directory_client.GetUrl());
}
}

return found || select.allow_not_found
? Status::OK()
: ::arrow::fs::internal::PathNotFound(select.base_dir);
}

/// \brief List the blobs at the root of a container or some dir in a container.
///
/// \pre container_client is the client for the container named like the first
Expand Down Expand Up @@ -1772,6 +1825,20 @@ class AzureFileSystem::Impl {
return VisitContainers(context, std::move(on_container));
}

auto adlfs_client = GetFileSystemClient(base_location.container);
ARROW_ASSIGN_OR_RAISE(auto hns_support, HierarchicalNamespaceSupport(adlfs_client));
if (hns_support == HNSSupport::kContainerNotFound) {
if (select.allow_not_found) {
return Status::OK();
} else {
return ::arrow::fs::internal::PathNotFound(select.base_dir);
}
}
if (hns_support == HNSSupport::kEnabled) {
return GetFileInfoWithSelectorFromFileSystem(adlfs_client, context, page_size_hint,
select, acc_results);
}
DCHECK_EQ(hns_support, HNSSupport::kDisabled);
auto container_client =
blob_service_client_->GetBlobContainerClient(base_location.container);
return GetFileInfoWithSelectorFromContainer(container_client, context, page_size_hint,
Expand Down Expand Up @@ -2157,6 +2224,17 @@ class AzureFileSystem::Impl {
Azure::Nullable<std::string> lease_id = {}) {
DCHECK(!location.container.empty());
DCHECK(!location.path.empty());
ARROW_ASSIGN_OR_RAISE(auto file_info, GetFileInfo(adlfs_client, location, lease_id));
if (file_info.type() == FileType::NotFound) {
if (require_dir_to_exist) {
return PathNotFound(location);
} else {
return Status::OK();
}
}
if (file_info.type() != FileType::Directory) {
return NotADir(location);
}
auto directory_client = adlfs_client.GetDirectoryClient(
std::string(internal::RemoveTrailingSlash(location.path)));
DataLake::DeleteDirectoryOptions options;
Expand Down Expand Up @@ -2200,6 +2278,9 @@ class AzureFileSystem::Impl {
kDelimiter, path.Name, ": ", sub_directory_client.GetUrl());
}
} else {
if (path.Name == location.path) {
return NotADir(location);
}
auto sub_file_client = adlfs_client.GetFileClient(path.Name);
try {
sub_file_client.Delete();
Expand Down
6 changes: 5 additions & 1 deletion cpp/src/arrow/filesystem/azurefs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ class TestGeneric : public ::testing::Test, public GenericFileSystemTest {
bool allow_move_dir() const override { return false; }
bool allow_move_file() const override { return true; }
bool allow_append_to_file() const override { return true; }
bool have_directory_mtimes() const override { return false; }
bool have_directory_mtimes() const override { return true; }
bool have_flaky_directory_tree_deletion() const override { return false; }
bool have_file_metadata() const override { return true; }
// calloc() used in libxml2's xmlNewGlobalState() is detected as a
Expand Down Expand Up @@ -429,6 +429,8 @@ class TestAzuriteGeneric : public TestGeneric {
protected:
// Azurite doesn't support moving files over containers.
bool allow_move_file() const override { return false; }
// Azurite doesn't support directory mtime.
bool have_directory_mtimes() const override { return false; }
// DeleteDir() doesn't work with Azurite on macOS
bool have_flaky_directory_tree_deletion() const override {
return env_->HasSubmitBatchBug();
Expand All @@ -449,6 +451,8 @@ class TestAzureFlatNSGeneric : public TestGeneric {
protected:
// Flat namespace account doesn't support moving files over containers.
bool allow_move_file() const override { return false; }
// Flat namespace account doesn't support directory mtime.
bool have_directory_mtimes() const override { return false; }
};

class TestAzureHierarchicalNSGeneric : public TestGeneric {
Expand Down
Loading