From 97b6fc6bbae41084d0a5616001f17665e801f701 Mon Sep 17 00:00:00 2001 From: Seth Shelnutt Date: Tue, 26 Jan 2021 16:45:10 -0500 Subject: [PATCH] Parallelize closing of files on write This change parallelizes the closing of files on writes. This solves a performance problem when the user was using S3 or other object store where we buffer the multi-part writes. If the user's data was below the buffer size, then no io would have occurred until the closing when we flush buffers. This causes a large performance penalty relative to expected because up to three files per field had to be uploaded serially. --- HISTORY.md | 2 ++ tiledb/sm/query/writer.cc | 25 ++++++++++++++++++++----- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 0c26cdf861d..2a256269f2c 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -10,6 +10,8 @@ ## Improvements +* Parallelize across attributes when closing a write [#2048](https://github.com/TileDB-Inc/TileDB/pull/2048) + ## Deprecations ## Bug fixes diff --git a/tiledb/sm/query/writer.cc b/tiledb/sm/query/writer.cc index 86102473b1b..4d1e9af44a5 100644 --- a/tiledb/sm/query/writer.cc +++ b/tiledb/sm/query/writer.cc @@ -1308,15 +1308,30 @@ void Writer::clear_coord_buffers() { Status Writer::close_files(FragmentMetadata* meta) const { // Close attribute and dimension files - for (const auto& it : buffers_) { - const auto& name = it.first; - RETURN_NOT_OK(storage_manager_->close_file(meta->uri(name))); + const auto buffer_name = buffer_names(); + + std::vector file_uris; + file_uris.reserve(buffer_name.size() * 3); + + for (const auto& name : buffer_name) { + file_uris.emplace_back(meta->uri(name)); if (array_schema_->var_size(name)) - RETURN_NOT_OK(storage_manager_->close_file(meta->var_uri(name))); + file_uris.emplace_back(meta->var_uri(name)); if (array_schema_->is_nullable(name)) - RETURN_NOT_OK(storage_manager_->close_file(meta->validity_uri(name))); + file_uris.emplace_back(meta->validity_uri(name)); } + auto statuses = parallel_for( + storage_manager_->io_tp(), 0, file_uris.size(), [&](uint64_t i) { + const auto& file_ur = file_uris[i]; + RETURN_NOT_OK(storage_manager_->close_file(file_ur)); + return Status::Ok(); + }); + + // Check all statuses + for (auto& st : statuses) + RETURN_NOT_OK(st); + return Status::Ok(); }