From cb476488a9e80f2377b747da91a1afd6b0c36113 Mon Sep 17 00:00:00 2001 From: Stavros Papadopoulos Date: Tue, 7 Jan 2020 11:26:30 -0500 Subject: [PATCH] Lazy array metadata fetching (#1466) Lazily read the array metadata when requested for the first time, instead of fetching it in advance upon array opening. --- tiledb/sm/array/array.cc | 70 +++++++++++++---- tiledb/sm/array/array.h | 28 +++++-- tiledb/sm/metadata/metadata.cc | 22 ++++-- tiledb/sm/metadata/metadata.h | 8 +- tiledb/sm/rest/rest_client.cc | 5 +- tiledb/sm/rest/rest_client.h | 2 +- tiledb/sm/serialization/array_schema.cc | 18 +++-- tiledb/sm/serialization/array_schema.h | 4 +- tiledb/sm/storage_manager/consolidator.cc | 10 ++- tiledb/sm/storage_manager/storage_manager.cc | 83 ++++++++++---------- tiledb/sm/storage_manager/storage_manager.h | 18 +++-- 11 files changed, 176 insertions(+), 92 deletions(-) diff --git a/tiledb/sm/array/array.cc b/tiledb/sm/array/array.cc index 98d8f18db7a..e52b90ba2d1 100644 --- a/tiledb/sm/array/array.cc +++ b/tiledb/sm/array/array.cc @@ -63,6 +63,7 @@ Array::Array(const URI& array_uri, StorageManager* storage_manager) timestamp_ = 0; last_max_buffer_sizes_subarray_ = nullptr; remote_ = array_uri.is_tiledb(); + metadata_loaded_ = false; }; Array::~Array() { @@ -150,6 +151,8 @@ Status Array::open( encryption_key_.set_key(encryption_type, encryption_key, key_length)); timestamp_ = timestamp; + metadata_.clear(); + metadata_loaded_ = false; query_type_ = query_type; if (remote_) { @@ -159,8 +162,6 @@ Status Array::open( "Cannot open array; remote array with no REST client.")); RETURN_NOT_OK( rest_client->get_array_schema_from_rest(array_uri_, &array_schema_)); - RETURN_NOT_OK(rest_client->get_array_metadata_from_rest( - array_uri_, timestamp_, this)); } else { // Open the array. RETURN_NOT_OK(storage_manager_->array_open_for_reads( @@ -168,8 +169,7 @@ Status Array::open( timestamp_, encryption_key_, &array_schema_, - &fragment_metadata_, - &metadata_)); + &fragment_metadata_)); } is_open_ = true; @@ -204,6 +204,8 @@ Status Array::open( encryption_key_.set_key(encryption_type, encryption_key, key_length)); timestamp_ = utils::time::timestamp_now_ms(); + metadata_.clear(); + metadata_loaded_ = false; query_type_ = QueryType::READ; if (remote_) { @@ -213,8 +215,6 @@ Status Array::open( "Cannot open array; remote array with no REST client.")); RETURN_NOT_OK( rest_client->get_array_schema_from_rest(array_uri_, &array_schema_)); - RETURN_NOT_OK(rest_client->get_array_metadata_from_rest( - array_uri_, timestamp_, this)); } else { // Open the array. RETURN_NOT_OK(storage_manager_->array_open_for_reads( @@ -251,6 +251,8 @@ Status Array::open( timestamp_ = query_type == QueryType::READ ? utils::time::timestamp_now_ms() : 0; + metadata_.clear(); + metadata_loaded_ = false; if (remote_) { auto rest_client = storage_manager_->rest_client(); @@ -259,16 +261,13 @@ Status Array::open( "Cannot open array; remote array with no REST client.")); RETURN_NOT_OK( rest_client->get_array_schema_from_rest(array_uri_, &array_schema_)); - RETURN_NOT_OK(rest_client->get_array_metadata_from_rest( - array_uri_, timestamp_, this)); } else if (query_type == QueryType::READ) { RETURN_NOT_OK(storage_manager_->array_open_for_reads( array_uri_, timestamp_, encryption_key_, &array_schema_, - &fragment_metadata_, - &metadata_)); + &fragment_metadata_)); } else { RETURN_NOT_OK(storage_manager_->array_open_for_writes( array_uri_, encryption_key_, &array_schema_)); @@ -317,6 +316,7 @@ Status Array::close() { } metadata_.clear(); + metadata_loaded_ = false; return Status::Ok(); } @@ -487,6 +487,8 @@ Status Array::reopen(uint64_t timestamp) { timestamp_ = timestamp; fragment_metadata_.clear(); + metadata_.clear(); + metadata_loaded_ = false; if (remote_) { return open( @@ -500,8 +502,7 @@ Status Array::reopen(uint64_t timestamp) { timestamp_, encryption_key_, &array_schema_, - &fragment_metadata_, - &metadata_); + &fragment_metadata_); } uint64_t Array::timestamp() const { @@ -595,6 +596,10 @@ Status Array::get_metadata( return LOG_STATUS( Status::ArrayError("Cannot get metadata; Key cannot be null")); + // Load array metadata, if not loaded yet + if (!metadata_loaded_) + RETURN_NOT_OK(load_metadata()); + RETURN_NOT_OK(metadata_.get(key, value_type, value_num, value)); return Status::Ok(); @@ -618,13 +623,17 @@ Status Array::get_metadata( Status::ArrayError("Cannot get metadata; Array was " "not opened in read mode")); + // Load array metadata, if not loaded yet + if (!metadata_loaded_) + RETURN_NOT_OK(load_metadata()); + RETURN_NOT_OK( metadata_.get(index, key, key_len, value_type, value_num, value)); return Status::Ok(); } -Status Array::get_metadata_num(uint64_t* num) const { +Status Array::get_metadata_num(uint64_t* num) { // Check if array is open if (!is_open_) return LOG_STATUS( @@ -636,6 +645,10 @@ Status Array::get_metadata_num(uint64_t* num) const { Status::ArrayError("Cannot get number of metadata; Array was " "not opened in read mode")); + // Load array metadata, if not loaded yet + if (!metadata_loaded_) + RETURN_NOT_OK(load_metadata()); + *num = metadata_.num(); return Status::Ok(); @@ -659,6 +672,10 @@ Status Array::has_metadata_key( return LOG_STATUS( Status::ArrayError("Cannot get metadata; Key cannot be null")); + // Load array metadata, if not loaded yet + if (!metadata_loaded_) + RETURN_NOT_OK(load_metadata()); + RETURN_NOT_OK(metadata_.has_key(key, value_type, has_key)); return Status::Ok(); @@ -668,8 +685,14 @@ Metadata* Array::metadata() { return &metadata_; } -const Metadata* Array::metadata() const { - return &metadata_; +Status Array::metadata(Metadata** metadata) { + // Load array metadata, if not loaded yet + if (!metadata_loaded_) + RETURN_NOT_OK(load_metadata()); + + *metadata = &metadata_; + + return Status::Ok(); } /* ********************************* */ @@ -844,5 +867,22 @@ Status Array::compute_max_buffer_sizes( return Status::Ok(); } +Status Array::load_metadata() { + std::lock_guard lock{mtx_}; + if (remote_) { + auto rest_client = storage_manager_->rest_client(); + if (rest_client == nullptr) + return LOG_STATUS(Status::ArrayError( + "Cannot load metadata; remote array with no REST client.")); + RETURN_NOT_OK(rest_client->get_array_metadata_from_rest( + array_uri_, timestamp_, this)); + } else { + RETURN_NOT_OK(storage_manager_->load_array_metadata( + array_uri_, encryption_key_, timestamp_, &metadata_)); + } + metadata_loaded_ = true; + return Status::Ok(); +} + } // namespace sm } // namespace tiledb diff --git a/tiledb/sm/array/array.h b/tiledb/sm/array/array.h index 9b14f2da10d..2bb11393560 100644 --- a/tiledb/sm/array/array.h +++ b/tiledb/sm/array/array.h @@ -295,16 +295,25 @@ class Array { const void** value); /** Returns the number of array metadata items. */ - Status get_metadata_num(uint64_t* num) const; + Status get_metadata_num(uint64_t* num); /** Sets has_key == 1 and corresponding value_type if the array has key. */ Status has_metadata_key(const char* key, Datatype* value_type, bool* has_key); - /** Returns the array metadata object. */ - Metadata* metadata(); + /** Retrieves the array metadata object. */ + Status metadata(Metadata** metadata); - /** Returns the array metadata object. */ - const Metadata* metadata() const; + /** + * Retrieves the array metadata object. + * + * @note This is potentially an unsafe operation + * it could have contention with locks from lazy loading of metadata. + * This should only be used by the serialization class + * (tiledb/sm/serialization/array_schema.cc). In that class we need to fetch + * the underlying Metadata object to set the values we are loading from REST. + * A lock should already by taken before load_metadata is called. + */ + Metadata* metadata(); private: /* ********************************* */ @@ -363,6 +372,9 @@ class Array { /** The array metadata. */ Metadata metadata_; + /** True if the array metadata is loaded. */ + bool metadata_loaded_; + /* ********************************* */ /* PRIVATE METHODS */ /* ********************************* */ @@ -414,6 +426,12 @@ class Array { const T* subarray, std::unordered_map>* max_buffer_sizes) const; + + /** + * Load array metadata, handles remote arrays vs non-remote arrays + * @return Status + */ + Status load_metadata(); }; } // namespace sm diff --git a/tiledb/sm/metadata/metadata.cc b/tiledb/sm/metadata/metadata.cc index b30ed5e2abc..815fff2e281 100644 --- a/tiledb/sm/metadata/metadata.cc +++ b/tiledb/sm/metadata/metadata.cc @@ -102,12 +102,7 @@ Status Metadata::deserialize( } } - // Create metadata index for fast lookups from index - metadata_index_.resize(metadata_map_.size()); - size_t i = 0; - for (auto& m : metadata_map_) - metadata_index_[i++] = std::make_pair(&(m.first), &(m.second)); - + RETURN_NOT_OK(build_metadata_index()); // Note: `metadata_map_` and `metadata_index_` are immutable after this point return Status::Ok(); @@ -215,7 +210,10 @@ Status Metadata::get( uint32_t* key_len, Datatype* value_type, uint32_t* value_num, - const void** value) const { + const void** value) { + if (metadata_index_.empty()) + RETURN_NOT_OK(build_metadata_index()); + if (index >= metadata_index_.size()) return LOG_STATUS( Status::MetadataError("Cannot get metadata; index out of bounds")); @@ -300,5 +298,15 @@ Metadata::iterator Metadata::end() const { /* PRIVATE METHODS */ /* ********************************* */ +Status Metadata::build_metadata_index() { + // Create metadata index for fast lookups from index + metadata_index_.resize(metadata_map_.size()); + size_t i = 0; + for (auto& m : metadata_map_) + metadata_index_[i++] = std::make_pair(&(m.first), &(m.second)); + + return Status::Ok(); +} + } // namespace sm } // namespace tiledb diff --git a/tiledb/sm/metadata/metadata.h b/tiledb/sm/metadata/metadata.h index d4a0a8fa762..8b82ce768be 100644 --- a/tiledb/sm/metadata/metadata.h +++ b/tiledb/sm/metadata/metadata.h @@ -169,7 +169,7 @@ class Metadata { uint32_t* key_len, Datatype* value_type, uint32_t* value_num, - const void** value) const; + const void** value); /** Returns the number of metadata items. */ uint64_t num() const; @@ -246,6 +246,12 @@ class Metadata { /* ********************************* */ /* PRIVATE METHODS */ /* ********************************* */ + + /** + * Build the metadata index vector from the metadata map + * @return Status + */ + Status build_metadata_index(); }; } // namespace sm diff --git a/tiledb/sm/rest/rest_client.cc b/tiledb/sm/rest/rest_client.cc index 7d95b32cc43..d47064bdec2 100644 --- a/tiledb/sm/rest/rest_client.cc +++ b/tiledb/sm/rest/rest_client.cc @@ -248,8 +248,7 @@ Status RestClient::get_array_metadata_from_rest( array, serialization_type_, returned_data); } -Status RestClient::post_array_metadata_to_rest( - const URI& uri, const Array* array) { +Status RestClient::post_array_metadata_to_rest(const URI& uri, Array* array) { if (array == nullptr) return LOG_STATUS(Status::RestError( "Error posting array metadata to REST; array is null.")); @@ -690,7 +689,7 @@ Status RestClient::get_array_metadata_from_rest(const URI&, uint64_t, Array*) { Status::RestError("Cannot use rest client; serialization not enabled.")); } -Status RestClient::post_array_metadata_to_rest(const URI&, const Array*) { +Status RestClient::post_array_metadata_to_rest(const URI&, Array*) { return LOG_STATUS( Status::RestError("Cannot use rest client; serialization not enabled.")); } diff --git a/tiledb/sm/rest/rest_client.h b/tiledb/sm/rest/rest_client.h index bba6684b5c2..880eb1d5f8b 100644 --- a/tiledb/sm/rest/rest_client.h +++ b/tiledb/sm/rest/rest_client.h @@ -126,7 +126,7 @@ class RestClient { * @param array Array to update/post metadata for. * @return Status */ - Status post_array_metadata_to_rest(const URI& uri, const Array* array); + Status post_array_metadata_to_rest(const URI& uri, Array* array); /** * Post a data query to rest server diff --git a/tiledb/sm/serialization/array_schema.cc b/tiledb/sm/serialization/array_schema.cc index 1cbff103c90..1a17deec1a4 100644 --- a/tiledb/sm/serialization/array_schema.cc +++ b/tiledb/sm/serialization/array_schema.cc @@ -888,24 +888,26 @@ Status max_buffer_sizes_deserialize( } Status array_metadata_serialize( - const Array* array, - SerializationType serialize_type, - Buffer* serialized_buffer) { + Array* array, SerializationType serialize_type, Buffer* serialized_buffer) { if (array == nullptr) return LOG_STATUS(Status::SerializationError( "Error serializing array metadata; array instance is null")); - if (array->metadata() == nullptr) + + Metadata* metadata; + + RETURN_NOT_OK(array->metadata(&metadata)); + + if (metadata == nullptr) return LOG_STATUS(Status::SerializationError( "Error serializing array metadata; array metadata instance is null")); - const Metadata& metadata = *(array->metadata()); try { // Serialize ::capnp::MallocMessageBuilder message; auto builder = message.initRoot(); - auto entries_builder = builder.initEntries(metadata.num()); + auto entries_builder = builder.initEntries(metadata->num()); size_t i = 0; - for (auto it = metadata.begin(); it != metadata.end(); ++it) { + for (auto it = metadata->begin(); it != metadata->end(); ++it) { auto entry_builder = entries_builder[i++]; const auto& entry = it->second; auto datatype = static_cast(entry.type_); @@ -1105,7 +1107,7 @@ Status max_buffer_sizes_deserialize( "Cannot serialize; serialization not enabled.")); } -Status array_metadata_serialize(const Array*, SerializationType, Buffer*) { +Status array_metadata_serialize(Array*, SerializationType, Buffer*) { return LOG_STATUS(Status::SerializationError( "Cannot serialize; serialization not enabled.")); } diff --git a/tiledb/sm/serialization/array_schema.h b/tiledb/sm/serialization/array_schema.h index 4875d00a2f2..b7658d3d7fe 100644 --- a/tiledb/sm/serialization/array_schema.h +++ b/tiledb/sm/serialization/array_schema.h @@ -79,9 +79,7 @@ Status max_buffer_sizes_deserialize( buffer_sizes); Status array_metadata_serialize( - const Array* array, - SerializationType serialize_type, - Buffer* serialized_buffer); + Array* array, SerializationType serialize_type, Buffer* serialized_buffer); Status array_metadata_deserialize( Array* array, diff --git a/tiledb/sm/storage_manager/consolidator.cc b/tiledb/sm/storage_manager/consolidator.cc index 4d7210273ba..0b99ca1f24e 100644 --- a/tiledb/sm/storage_manager/consolidator.cc +++ b/tiledb/sm/storage_manager/consolidator.cc @@ -121,12 +121,16 @@ Status Consolidator::consolidate_array_metadata( // Swap the in-memory metadata between the two arrays. // After that, the array for writes will store the (consolidated by // the way metadata loading works) metadata of the array for reads - auto metadata_r = array_for_reads.metadata(); - auto metadata_w = array_for_writes.metadata(); + Metadata* metadata_r; + RETURN_NOT_OK_ELSE( + array_for_reads.metadata(&metadata_r), array_for_reads.close()); + Metadata* metadata_w; + RETURN_NOT_OK_ELSE( + array_for_writes.metadata(&metadata_w), array_for_reads.close()); metadata_r->swap(metadata_w); // Metadata uris to delete - const auto to_delete = array_for_writes.metadata()->loaded_metadata_uris(); + const auto to_delete = metadata_w->loaded_metadata_uris(); // Close arrays RETURN_NOT_OK_ELSE(array_for_reads.close(), array_for_writes.close()); diff --git a/tiledb/sm/storage_manager/storage_manager.cc b/tiledb/sm/storage_manager/storage_manager.cc index ff845067f2d..c899ae8daa1 100644 --- a/tiledb/sm/storage_manager/storage_manager.cc +++ b/tiledb/sm/storage_manager/storage_manager.cc @@ -198,8 +198,7 @@ Status StorageManager::array_open_for_reads( uint64_t timestamp, const EncryptionKey& encryption_key, ArraySchema** array_schema, - std::vector* fragment_metadata, - Metadata* metadata) { + std::vector* fragment_metadata) { STATS_FUNC_IN(sm_array_open_for_reads); // Open array without fragments @@ -227,23 +226,6 @@ Status StorageManager::array_open_for_reads( return st; } - // Determine which array metadata to load - std::vector array_metadata_to_load; - std::vector array_metadata_uris; - RETURN_NOT_OK(get_array_metadata_uris(array_uri, &array_metadata_uris)); - RETURN_NOT_OK( - get_sorted_uris(array_metadata_uris, timestamp, &array_metadata_to_load)); - - // Get the array metadata - st = load_array_metadata( - open_array, encryption_key, array_metadata_to_load, metadata); - if (!st.ok()) { - open_array->mtx_unlock(); - array_close_for_reads(array_uri); - *array_schema = nullptr; - return st; - } - // Unlock the array mutex open_array->mtx_unlock(); @@ -285,8 +267,6 @@ Status StorageManager::array_open_for_reads( return st; } - // Note: This function does not load any array metadata! - // Unlock the array mutex open_array->mtx_unlock(); @@ -379,8 +359,7 @@ Status StorageManager::array_reopen( uint64_t timestamp, const EncryptionKey& encryption_key, ArraySchema** array_schema, - std::vector* fragment_metadata, - Metadata* metadata) { + std::vector* fragment_metadata) { STATS_FUNC_IN(sm_array_reopen); auto open_array = (OpenArray*)nullptr; @@ -422,23 +401,6 @@ Status StorageManager::array_reopen( // Get the array schema *array_schema = open_array->array_schema(); - // Determin which array metadata to load - std::vector array_metadata_to_load; - std::vector array_metadata_uris; - RETURN_NOT_OK(get_array_metadata_uris(array_uri, &array_metadata_uris)); - RETURN_NOT_OK( - get_sorted_uris(array_metadata_uris, timestamp, &array_metadata_to_load)); - - // Get the array metadata - st = load_array_metadata( - open_array, encryption_key, array_metadata_to_load, metadata); - if (!st.ok()) { - open_array->mtx_unlock(); - array_close_for_reads(array_uri); - *array_schema = nullptr; - return st; - } - // Unlock the mutexes open_array->mtx_unlock(); @@ -1109,6 +1071,47 @@ Status StorageManager::load_array_schema( return st; } +Status StorageManager::load_array_metadata( + const URI& array_uri, + const EncryptionKey& encryption_key, + uint64_t timestamp, + Metadata* metadata) { + OpenArray* open_array; + // Lock mutex + { + std::lock_guard lock{open_array_for_reads_mtx_}; + + // Find the open array entry + auto it = open_arrays_for_reads_.find(array_uri.to_string()); + assert(it != open_arrays_for_reads_.end()); + open_array = it->second; + + // Lock the array + open_array->mtx_lock(); + } + + // Determine which array metadata to load + std::vector array_metadata_to_load; + std::vector array_metadata_uris; + RETURN_NOT_OK_ELSE( + get_array_metadata_uris(array_uri, &array_metadata_uris), + open_array->mtx_unlock()); + RETURN_NOT_OK_ELSE( + get_sorted_uris(array_metadata_uris, timestamp, &array_metadata_to_load), + open_array->mtx_unlock()); + + // Get the array metadata + RETURN_NOT_OK_ELSE( + load_array_metadata( + open_array, encryption_key, array_metadata_to_load, metadata), + open_array->mtx_unlock()); + + // Unlock the array + open_array->mtx_unlock(); + + return Status::Ok(); +} + Status StorageManager::object_type(const URI& uri, ObjectType* type) const { URI dir_uri = uri; if (uri.is_s3()) { diff --git a/tiledb/sm/storage_manager/storage_manager.h b/tiledb/sm/storage_manager/storage_manager.h index f5e126d7022..7eb8eafbe23 100644 --- a/tiledb/sm/storage_manager/storage_manager.h +++ b/tiledb/sm/storage_manager/storage_manager.h @@ -142,7 +142,6 @@ class StorageManager { * array is opened. * @param fragment_metadata The fragment metadata to be retrieved * after the array is opened. - * @param metadata The array metadata will be loaded here if not null. * @return Status */ Status array_open_for_reads( @@ -150,8 +149,7 @@ class StorageManager { uint64_t timestamp, const EncryptionKey& encryption_key, ArraySchema** array_schema, - std::vector* fragment_metadata, - Metadata* metadata = nullptr); + std::vector* fragment_metadata); /** * Opens an array for reads, focusing only on a given list of fragments. @@ -200,7 +198,6 @@ class StorageManager { * array is opened. * @param fragment_metadata The fragment metadata to be retrieved * after the array is opened. - * @param metadata The array metadata to be loaded. * @return Status */ Status array_reopen( @@ -208,8 +205,7 @@ class StorageManager { uint64_t timestamp, const EncryptionKey& encryption_key, ArraySchema** array_schema, - std::vector* fragment_metadata, - Metadata* metadata); + std::vector* fragment_metadata); /** * Consolidates the fragments of an array into a single one. @@ -451,6 +447,16 @@ class StorageManager { const EncryptionKey& encryption_key, ArraySchema** array_schema); + /** + * Loads the array metadata from persistent storage that were created + * at or before `timestamp`. + */ + Status load_array_metadata( + const URI& array_uri, + const EncryptionKey& encryption_key, + uint64_t timestamp, + Metadata* metadata); + /** Removes a TileDB object (group, array). */ Status object_remove(const char* path) const;