Skip to content

Commit

Permalink
GetFileInfoSelector
Browse files Browse the repository at this point in the history
* Always support directory mtime with HNS
* Add directory check for selector
  • Loading branch information
kou committed Apr 9, 2024
1 parent 741397c commit a6758b1
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 3 deletions.
28 changes: 26 additions & 2 deletions cpp/src/arrow/filesystem/azurefs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1642,11 +1642,27 @@ class AzureFileSystem::Impl {
options.Prefix = {};
found = true; // Unless the container itself is not found later!
} else {
options.Prefix = internal::EnsureTrailingSlash(base_location.path);
ARROW_ASSIGN_OR_RAISE(
auto prefix, AzureLocation::FromString(
std::string(internal::EnsureTrailingSlash(select.base_dir))));
ARROW_ASSIGN_OR_RAISE(auto info, GetFileInfo(container_client, prefix));
if (info.type() == FileType::NotFound) {
if (select.allow_not_found) {
return Status::OK();
} else {
return PathNotFound(base_location);
}
} else if (info.type() != FileType::Directory) {
return NotADir(base_location);
}
options.Prefix = prefix.path;
}
options.PageSizeHint = page_size_hint;
options.Include = Blobs::Models::ListBlobsIncludeFlags::Metadata;

auto adlfs_client = GetFileSystemClient(base_location.container);
ARROW_ASSIGN_OR_RAISE(auto hns_support, HierarchicalNamespaceSupport(adlfs_client));

auto recurse = [&](const std::string& blob_prefix) noexcept -> Status {
if (select.recursive && select.max_recursion > 0) {
FileSelector sub_select;
Expand All @@ -1671,7 +1687,15 @@ class AzureFileSystem::Impl {
};
auto process_prefix = [&](const std::string& prefix) noexcept -> Status {
const auto path = internal::ConcatAbstractPath(base_location.container, prefix);
acc_results->push_back(DirectoryFileInfoFromPath(path));
if (hns_support == HNSSupport::kEnabled) {
ARROW_ASSIGN_OR_RAISE(
auto location,
AzureLocation::FromString(std::string(internal::RemoveTrailingSlash(path))));
ARROW_ASSIGN_OR_RAISE(auto info, GetFileInfo(adlfs_client, location));
acc_results->push_back(std::move(info));
} else {
acc_results->push_back(DirectoryFileInfoFromPath(path));
}
return recurse(prefix);
};

Expand Down
6 changes: 5 additions & 1 deletion cpp/src/arrow/filesystem/azurefs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ class TestGeneric : public ::testing::Test, public GenericFileSystemTest {
bool allow_move_dir() const override { return false; }
bool allow_move_file() const override { return true; }
bool allow_append_to_file() const override { return true; }
bool have_directory_mtimes() const override { return false; }
bool have_directory_mtimes() const override { return true; }
bool have_flaky_directory_tree_deletion() const override { return false; }
bool have_file_metadata() const override { return true; }
// calloc() used in libxml2's xmlNewGlobalState() is detected as a
Expand Down Expand Up @@ -429,6 +429,8 @@ class TestAzuriteGeneric : public TestGeneric {
protected:
// Azurite doesn't support moving files over containers.
bool allow_move_file() const override { return false; }
// Azurite doesn't support directory mtime.
bool have_directory_mtimes() const override { return false; }
// DeleteDir() doesn't work with Azurite on macOS
bool have_flaky_directory_tree_deletion() const override {
return env_->HasSubmitBatchBug();
Expand All @@ -449,6 +451,8 @@ class TestAzureFlatNSGeneric : public TestGeneric {
protected:
// Flat namespace account doesn't support moving files over containers.
bool allow_move_file() const override { return false; }
// Flat namespace account doesn't support directory mtime.
bool have_directory_mtimes() const override { return false; }
};

class TestAzureHierarchicalNSGeneric : public TestGeneric {
Expand Down

0 comments on commit a6758b1

Please sign in to comment.