Skip to content

Commit

Permalink
Remove statuses from ls_with_sizes and do not mask failures on POSI…
Browse files Browse the repository at this point in the history
…X. (#5043)

[SC-23179](https://app.shortcut.com/tiledb-inc/story/23179)
[SC-48851](https://app.shortcut.com/tiledb-inc/story/48851)
[SC-48854](https://app.shortcut.com/tiledb-inc/story/48854)

This PR updates the signature of `ls_with_sizes` in the `VFS` class and
the specific VFS implementations[^1] to indicate errors by throwing
exceptions instead of returning `Status`.

On top of that, `Posix::ls_with_sizes` was updated to fail if `opendir`
failed with an error code other than `ENOENT` (see also
#5037 (comment)).

Also in `Posix::ls_with_sizes`, the pointer returned by `opendir` was
updated to be placed inside a smart pointer, making sure it gets freed.
This caused errors in the `find_head_api_violations.py` script, which
were fixed.

[^1]: HDFS was not updated to minimize risk.

---
TYPE: BUG
DESC: Do not mask failures when listing a directory fails on POSIX.

---------

Co-authored-by: Luc Rancourt <[email protected]>
  • Loading branch information
teo-tsirpanis and KiterLuc authored Jun 6, 2024
1 parent 804d7fb commit 2dc4fb7
Show file tree
Hide file tree
Showing 19 changed files with 167 additions and 165 deletions.
7 changes: 3 additions & 4 deletions scripts/find_heap_api_violations.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,7 @@

# Contains per-file exceptions to violations of "make_shared".
make_shared_exceptions = {
"*": ["tdb::make_shared"],
"*": ["tiledb::common::make_shared"],
"*": ["tdb::make_shared", "tiledb::common::make_shared"],
}

# Match C++ unique_ptr objects.
Expand All @@ -103,7 +102,7 @@
unique_ptr_exceptions = {
"*": ["tdb_unique_ptr", "tiledb_unique_ptr", "tdb::pmr::unique_ptr"],
"zstd_compressor.h": ["std::unique_ptr<ZSTD_DCtx, decltype(&ZSTD_freeDCtx)> ctx_;", "std::unique_ptr<ZSTD_CCtx, decltype(&ZSTD_freeCCtx)> ctx_;"],
"posix.cc": ["static std::unique_ptr<char, decltype(&free)> cwd_(getcwd(nullptr, 0), free);"],
"posix.cc": ["std::unique_ptr<DIR, UniqueDIRDeleter>", "static std::unique_ptr<char, decltype(&free)> cwd_(getcwd(nullptr, 0), free);"],
"curl.h": ["std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>"],
"pmr.h": ["std::unique_ptr", "unique_ptr<Tp> make_unique("],
}
Expand Down Expand Up @@ -148,7 +147,7 @@ class Violation:


def iter_file_violations(file_path: str) -> Iterable[Violation]:
with open(file_path) as f:
with open(file_path, encoding="utf-8") as f:
for line_num, line in enumerate(f):
line = line.strip()
for checker in violation_checkers:
Expand Down
8 changes: 2 additions & 6 deletions test/src/unit-vfs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -339,9 +339,7 @@ TEMPLATE_LIST_TEST_CASE(
std::string s = "abcdef";
require_tiledb_ok(vfs.write(ls_file, s.data(), s.size()));
require_tiledb_ok(vfs.close_file(ls_file));
auto&& [status, opt_children] = vfs.ls_with_sizes(ls_dir);
require_tiledb_ok(status);
auto children = opt_children.value();
auto children = vfs.ls_with_sizes(ls_dir);
#ifdef _WIN32
// Normalization only for Windows
ls_file = URI(tiledb::sm::path_win::uri_from_path(ls_file.to_string()));
Expand Down Expand Up @@ -599,9 +597,7 @@ TEST_CASE("VFS: test ls_with_sizes", "[vfs][ls-with-sizes]") {
require_tiledb_ok(vfs_ls.write(URI(subdir_file), s2.data(), s2.size()));

// List
auto&& [status, rv] = vfs_ls.ls_with_sizes(URI(dir));
auto children = *rv;
require_tiledb_ok(status);
auto children = vfs_ls.ls_with_sizes(URI(dir));

#ifdef _WIN32
// Normalization only for Windows
Expand Down
14 changes: 9 additions & 5 deletions test/support/src/vfs_helpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -451,12 +451,16 @@ VFSTestBase::VFSTestBase(
}

VFSTestBase::~VFSTestBase() {
if (vfs_.supports_uri_scheme(temp_dir_)) {
bool is_dir = false;
vfs_.is_dir(temp_dir_, &is_dir).ok();
if (is_dir) {
vfs_.remove_dir(temp_dir_).ok();
try {
if (vfs_.supports_uri_scheme(temp_dir_)) {
bool is_dir = false;
vfs_.is_dir(temp_dir_, &is_dir).ok();
if (is_dir) {
vfs_.remove_dir(temp_dir_).ok();
}
}
} catch (const std::exception& e) {
// Suppress exceptions in destructors.
}
}

Expand Down
4 changes: 1 addition & 3 deletions tiledb/sm/array/array_directory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -583,9 +583,7 @@ const std::set<std::string>& ArrayDirectory::dir_names() {
}

std::vector<URI> ArrayDirectory::ls(const URI& uri) const {
auto&& [st, opt_dir_entries] = resources_.get().vfs().ls_with_sizes(uri);
throw_if_not_ok(st);
auto dir_entries = opt_dir_entries.value();
auto dir_entries = resources_.get().vfs().ls_with_sizes(uri);
auto dirs = dir_names();
std::vector<URI> uris;

Expand Down
21 changes: 7 additions & 14 deletions tiledb/sm/filesystem/azure.cc
Original file line number Diff line number Diff line change
Expand Up @@ -585,32 +585,26 @@ Status Azure::ls(
const int max_paths) const {
assert(paths);

auto&& [st, entries] = ls_with_sizes(uri, delimiter, max_paths);
RETURN_NOT_OK(st);

for (auto& fs : *entries) {
for (auto& fs : ls_with_sizes(uri, delimiter, max_paths)) {
paths->emplace_back(fs.path().native());
}

return Status::Ok();
}

tuple<Status, optional<std::vector<directory_entry>>> Azure::ls_with_sizes(
std::vector<directory_entry> Azure::ls_with_sizes(
const URI& uri, const std::string& delimiter, int max_paths) const {
const auto& c = client();

const URI uri_dir = uri.add_trailing_slash();

if (!uri_dir.is_azure()) {
auto st = LOG_STATUS(Status_AzureError(
std::string("URI is not an Azure URI: " + uri_dir.to_string())));
return {st, nullopt};
throw AzureException("URI is not an Azure URI: " + uri_dir.to_string());
}

std::string container_name;
std::string blob_path;
RETURN_NOT_OK_TUPLE(
parse_azure_uri(uri_dir, &container_name, &blob_path), nullopt);
throw_if_not_ok(parse_azure_uri(uri_dir, &container_name, &blob_path));

auto container_client = c.GetBlobContainerClient(container_name);

Expand All @@ -624,9 +618,8 @@ tuple<Status, optional<std::vector<directory_entry>>> Azure::ls_with_sizes(
try {
response = container_client.ListBlobsByHierarchy(delimiter, options);
} catch (const ::Azure::Storage::StorageException& e) {
auto st = LOG_STATUS(Status_AzureError(
"List blobs failed on: " + uri_dir.to_string() + "; " + e.Message));
return {st, nullopt};
throw AzureException(
"List blobs failed on: " + uri_dir.to_string() + "; " + e.Message);
}

for (const auto& blob : response.Blobs) {
Expand All @@ -648,7 +641,7 @@ tuple<Status, optional<std::vector<directory_entry>>> Azure::ls_with_sizes(
options.ContinuationToken = response.NextPageToken;
} while (options.ContinuationToken.HasValue());

return {Status::Ok(), entries};
return entries;
}

Status Azure::move_object(const URI& old_uri, const URI& new_uri) {
Expand Down
3 changes: 1 addition & 2 deletions tiledb/sm/filesystem/azure.h
Original file line number Diff line number Diff line change
Expand Up @@ -392,8 +392,7 @@ class Azure {
* @param max_paths The maximum number of paths to be retrieved
* @return A list of directory_entry objects
*/
tuple<Status, optional<std::vector<filesystem::directory_entry>>>
ls_with_sizes(
std::vector<filesystem::directory_entry> ls_with_sizes(
const URI& uri,
const std::string& delimiter = "/",
int max_paths = -1) const;
Expand Down
4 changes: 2 additions & 2 deletions tiledb/sm/filesystem/filesystem_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ class FilesystemBase {
* @param parent The target directory to list.
* @return All entries that are contained in the parent
*/
virtual tuple<Status, optional<std::vector<filesystem::directory_entry>>>
ls_with_sizes(const URI& parent) const = 0;
virtual std::vector<filesystem::directory_entry> ls_with_sizes(
const URI& parent) const = 0;

/**
* Renames a file.
Expand Down
22 changes: 8 additions & 14 deletions tiledb/sm/filesystem/gcs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -477,32 +477,27 @@ Status GCS::ls(
const std::string& delimiter,
const int max_paths) const {
assert(paths);
auto&& [st, entries] = ls_with_sizes(uri, delimiter, max_paths);
RETURN_NOT_OK(st);

for (auto& fs : *entries) {
for (auto& fs : ls_with_sizes(uri, delimiter, max_paths)) {
paths->emplace_back(fs.path().native());
}

return Status::Ok();
}

tuple<Status, optional<std::vector<directory_entry>>> GCS::ls_with_sizes(
std::vector<directory_entry> GCS::ls_with_sizes(
const URI& uri, const std::string& delimiter, int max_paths) const {
RETURN_NOT_OK_TUPLE(init_client(), nullopt);
throw_if_not_ok(init_client());

const URI uri_dir = uri.add_trailing_slash();

if (!uri_dir.is_gcs()) {
auto st = LOG_STATUS(Status_GCSError(
std::string("URI is not a GCS URI: " + uri_dir.to_string())));
return {st, nullopt};
throw GCSException("URI is not a GCS URI: " + uri_dir.to_string());
}

std::string bucket_name;
std::string object_path;
RETURN_NOT_OK_TUPLE(
parse_gcs_uri(uri_dir, &bucket_name, &object_path), nullopt);
throw_if_not_ok(parse_gcs_uri(uri_dir, &bucket_name, &object_path));

std::vector<directory_entry> entries;
google::cloud::storage::Prefix prefix_option(object_path);
Expand All @@ -515,10 +510,9 @@ tuple<Status, optional<std::vector<directory_entry>>> GCS::ls_with_sizes(
if (!object_metadata.ok()) {
const google::cloud::Status status = object_metadata.status();

auto st = LOG_STATUS(Status_GCSError(std::string(
throw GCSException(
"List objects failed on: " + uri.to_string() + " (" +
status.message() + ")")));
return {st, nullopt};
status.message() + ")");
}

if (entries.size() >= static_cast<size_t>(max_paths)) {
Expand Down Expand Up @@ -549,7 +543,7 @@ tuple<Status, optional<std::vector<directory_entry>>> GCS::ls_with_sizes(
}
}

return {Status::Ok(), entries};
return entries;
}

LsObjects GCS::ls_filtered_impl(
Expand Down
3 changes: 1 addition & 2 deletions tiledb/sm/filesystem/gcs.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,7 @@ class GCS {
* @param max_paths The maximum number of paths to be retrieved
* @return A list of directory_entry objects
*/
tuple<Status, optional<std::vector<filesystem::directory_entry>>>
ls_with_sizes(
std::vector<filesystem::directory_entry> ls_with_sizes(
const URI& uri,
const std::string& delimiter = "/",
int max_paths = -1) const;
Expand Down
18 changes: 6 additions & 12 deletions tiledb/sm/filesystem/mem_filesystem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -374,18 +374,16 @@ bool MemFilesystem::is_file(const std::string& path) const {
Status MemFilesystem::ls(
const std::string& path, std::vector<std::string>* const paths) const {
assert(paths);
auto&& [st, entries] = ls_with_sizes(URI("mem://" + path));
RETURN_NOT_OK(st);

for (auto& fs : *entries) {
for (auto& fs : ls_with_sizes(URI("mem://" + path))) {
paths->emplace_back(fs.path().native());
}

return Status::Ok();
}

tuple<Status, optional<std::vector<directory_entry>>>
MemFilesystem::ls_with_sizes(const URI& path) const {
std::vector<directory_entry> MemFilesystem::ls_with_sizes(
const URI& path) const {
auto abspath = path.to_path();
std::vector<std::string> tokens = tokenize(abspath);

Expand All @@ -398,9 +396,7 @@ MemFilesystem::ls_with_sizes(const URI& path) const {
dir = dir + token + "/";

if (cur->children_.count(token) != 1) {
auto st = LOG_STATUS(Status_MemFSError(
std::string("Unable to list on non-existent path ") + abspath));
return {st, nullopt};
throw MemFSException("Unable to list on non-existent path " + abspath);
}

cur = cur->children_[token].get();
Expand All @@ -410,11 +406,9 @@ MemFilesystem::ls_with_sizes(const URI& path) const {
assert(cur_lock.owns_lock());
assert(cur_lock.mutex() == &cur->mutex_);
auto&& [st, entries] = cur->ls(dir);
if (!st.ok()) {
return {st, nullopt};
}
throw_if_not_ok(st);

return {Status::Ok(), entries};
return *entries;
}

Status MemFilesystem::move(
Expand Down
14 changes: 11 additions & 3 deletions tiledb/sm/filesystem/mem_filesystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include <unordered_map>
#include <vector>

#include "tiledb/common/exception/exception.h"
#include "tiledb/common/macros.h"
#include "tiledb/common/status.h"

Expand All @@ -52,6 +53,14 @@ namespace sm {

class URI;

/** Class for MemFS status exceptions. */
class MemFSException : public StatusException {
public:
explicit MemFSException(const std::string& msg)
: StatusException("MemFS", msg) {
}
};

/**
* The in-memory filesystem.
*
Expand Down Expand Up @@ -139,10 +148,9 @@ class MemFilesystem {
* Lists files and files information under path
*
* @param path The parent path to list sub-paths
* @return Status
* @return A list of directory_entry objects
*/
tuple<Status, optional<std::vector<filesystem::directory_entry>>>
ls_with_sizes(const URI& path) const;
std::vector<filesystem::directory_entry> ls_with_sizes(const URI& path) const;

/**
* Move a given filesystem path.
Expand Down
Loading

0 comments on commit 2dc4fb7

Please sign in to comment.