Skip to content

Commit

Permalink
Lazy array metadata fetching (#1466)
Browse files Browse the repository at this point in the history
Lazily read the array metadata when requested for the first time, instead of fetching it in advance upon array opening.
  • Loading branch information
stavrospapadopoulos authored Jan 7, 2020
1 parent cfe4981 commit cb47648
Show file tree
Hide file tree
Showing 11 changed files with 176 additions and 92 deletions.
70 changes: 55 additions & 15 deletions tiledb/sm/array/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ Array::Array(const URI& array_uri, StorageManager* storage_manager)
timestamp_ = 0;
last_max_buffer_sizes_subarray_ = nullptr;
remote_ = array_uri.is_tiledb();
metadata_loaded_ = false;
};

Array::~Array() {
Expand Down Expand Up @@ -150,6 +151,8 @@ Status Array::open(
encryption_key_.set_key(encryption_type, encryption_key, key_length));

timestamp_ = timestamp;
metadata_.clear();
metadata_loaded_ = false;

query_type_ = query_type;
if (remote_) {
Expand All @@ -159,17 +162,14 @@ Status Array::open(
"Cannot open array; remote array with no REST client."));
RETURN_NOT_OK(
rest_client->get_array_schema_from_rest(array_uri_, &array_schema_));
RETURN_NOT_OK(rest_client->get_array_metadata_from_rest(
array_uri_, timestamp_, this));
} else {
// Open the array.
RETURN_NOT_OK(storage_manager_->array_open_for_reads(
array_uri_,
timestamp_,
encryption_key_,
&array_schema_,
&fragment_metadata_,
&metadata_));
&fragment_metadata_));
}

is_open_ = true;
Expand Down Expand Up @@ -204,6 +204,8 @@ Status Array::open(
encryption_key_.set_key(encryption_type, encryption_key, key_length));

timestamp_ = utils::time::timestamp_now_ms();
metadata_.clear();
metadata_loaded_ = false;

query_type_ = QueryType::READ;
if (remote_) {
Expand All @@ -213,8 +215,6 @@ Status Array::open(
"Cannot open array; remote array with no REST client."));
RETURN_NOT_OK(
rest_client->get_array_schema_from_rest(array_uri_, &array_schema_));
RETURN_NOT_OK(rest_client->get_array_metadata_from_rest(
array_uri_, timestamp_, this));
} else {
// Open the array.
RETURN_NOT_OK(storage_manager_->array_open_for_reads(
Expand Down Expand Up @@ -251,6 +251,8 @@ Status Array::open(

timestamp_ =
query_type == QueryType::READ ? utils::time::timestamp_now_ms() : 0;
metadata_.clear();
metadata_loaded_ = false;

if (remote_) {
auto rest_client = storage_manager_->rest_client();
Expand All @@ -259,16 +261,13 @@ Status Array::open(
"Cannot open array; remote array with no REST client."));
RETURN_NOT_OK(
rest_client->get_array_schema_from_rest(array_uri_, &array_schema_));
RETURN_NOT_OK(rest_client->get_array_metadata_from_rest(
array_uri_, timestamp_, this));
} else if (query_type == QueryType::READ) {
RETURN_NOT_OK(storage_manager_->array_open_for_reads(
array_uri_,
timestamp_,
encryption_key_,
&array_schema_,
&fragment_metadata_,
&metadata_));
&fragment_metadata_));
} else {
RETURN_NOT_OK(storage_manager_->array_open_for_writes(
array_uri_, encryption_key_, &array_schema_));
Expand Down Expand Up @@ -317,6 +316,7 @@ Status Array::close() {
}

metadata_.clear();
metadata_loaded_ = false;

return Status::Ok();
}
Expand Down Expand Up @@ -487,6 +487,8 @@ Status Array::reopen(uint64_t timestamp) {

timestamp_ = timestamp;
fragment_metadata_.clear();
metadata_.clear();
metadata_loaded_ = false;

if (remote_) {
return open(
Expand All @@ -500,8 +502,7 @@ Status Array::reopen(uint64_t timestamp) {
timestamp_,
encryption_key_,
&array_schema_,
&fragment_metadata_,
&metadata_);
&fragment_metadata_);
}

uint64_t Array::timestamp() const {
Expand Down Expand Up @@ -595,6 +596,10 @@ Status Array::get_metadata(
return LOG_STATUS(
Status::ArrayError("Cannot get metadata; Key cannot be null"));

// Load array metadata, if not loaded yet
if (!metadata_loaded_)
RETURN_NOT_OK(load_metadata());

RETURN_NOT_OK(metadata_.get(key, value_type, value_num, value));

return Status::Ok();
Expand All @@ -618,13 +623,17 @@ Status Array::get_metadata(
Status::ArrayError("Cannot get metadata; Array was "
"not opened in read mode"));

// Load array metadata, if not loaded yet
if (!metadata_loaded_)
RETURN_NOT_OK(load_metadata());

RETURN_NOT_OK(
metadata_.get(index, key, key_len, value_type, value_num, value));

return Status::Ok();
}

Status Array::get_metadata_num(uint64_t* num) const {
Status Array::get_metadata_num(uint64_t* num) {
// Check if array is open
if (!is_open_)
return LOG_STATUS(
Expand All @@ -636,6 +645,10 @@ Status Array::get_metadata_num(uint64_t* num) const {
Status::ArrayError("Cannot get number of metadata; Array was "
"not opened in read mode"));

// Load array metadata, if not loaded yet
if (!metadata_loaded_)
RETURN_NOT_OK(load_metadata());

*num = metadata_.num();

return Status::Ok();
Expand All @@ -659,6 +672,10 @@ Status Array::has_metadata_key(
return LOG_STATUS(
Status::ArrayError("Cannot get metadata; Key cannot be null"));

// Load array metadata, if not loaded yet
if (!metadata_loaded_)
RETURN_NOT_OK(load_metadata());

RETURN_NOT_OK(metadata_.has_key(key, value_type, has_key));

return Status::Ok();
Expand All @@ -668,8 +685,14 @@ Metadata* Array::metadata() {
return &metadata_;
}

const Metadata* Array::metadata() const {
return &metadata_;
Status Array::metadata(Metadata** metadata) {
// Load array metadata, if not loaded yet
if (!metadata_loaded_)
RETURN_NOT_OK(load_metadata());

*metadata = &metadata_;

return Status::Ok();
}

/* ********************************* */
Expand Down Expand Up @@ -844,5 +867,22 @@ Status Array::compute_max_buffer_sizes(
return Status::Ok();
}

Status Array::load_metadata() {
std::lock_guard<std::mutex> lock{mtx_};
if (remote_) {
auto rest_client = storage_manager_->rest_client();
if (rest_client == nullptr)
return LOG_STATUS(Status::ArrayError(
"Cannot load metadata; remote array with no REST client."));
RETURN_NOT_OK(rest_client->get_array_metadata_from_rest(
array_uri_, timestamp_, this));
} else {
RETURN_NOT_OK(storage_manager_->load_array_metadata(
array_uri_, encryption_key_, timestamp_, &metadata_));
}
metadata_loaded_ = true;
return Status::Ok();
}

} // namespace sm
} // namespace tiledb
28 changes: 23 additions & 5 deletions tiledb/sm/array/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,16 +295,25 @@ class Array {
const void** value);

/** Returns the number of array metadata items. */
Status get_metadata_num(uint64_t* num) const;
Status get_metadata_num(uint64_t* num);

/** Sets has_key == 1 and corresponding value_type if the array has key. */
Status has_metadata_key(const char* key, Datatype* value_type, bool* has_key);

/** Returns the array metadata object. */
Metadata* metadata();
/** Retrieves the array metadata object. */
Status metadata(Metadata** metadata);

/** Returns the array metadata object. */
const Metadata* metadata() const;
/**
* Retrieves the array metadata object.
*
* @note This is potentially an unsafe operation
* it could have contention with locks from lazy loading of metadata.
* This should only be used by the serialization class
* (tiledb/sm/serialization/array_schema.cc). In that class we need to fetch
* the underlying Metadata object to set the values we are loading from REST.
* A lock should already by taken before load_metadata is called.
*/
Metadata* metadata();

private:
/* ********************************* */
Expand Down Expand Up @@ -363,6 +372,9 @@ class Array {
/** The array metadata. */
Metadata metadata_;

/** True if the array metadata is loaded. */
bool metadata_loaded_;

/* ********************************* */
/* PRIVATE METHODS */
/* ********************************* */
Expand Down Expand Up @@ -414,6 +426,12 @@ class Array {
const T* subarray,
std::unordered_map<std::string, std::pair<uint64_t, uint64_t>>*
max_buffer_sizes) const;

/**
* Load array metadata, handles remote arrays vs non-remote arrays
* @return Status
*/
Status load_metadata();
};

} // namespace sm
Expand Down
22 changes: 15 additions & 7 deletions tiledb/sm/metadata/metadata.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,7 @@ Status Metadata::deserialize(
}
}

// Create metadata index for fast lookups from index
metadata_index_.resize(metadata_map_.size());
size_t i = 0;
for (auto& m : metadata_map_)
metadata_index_[i++] = std::make_pair(&(m.first), &(m.second));

RETURN_NOT_OK(build_metadata_index());
// Note: `metadata_map_` and `metadata_index_` are immutable after this point

return Status::Ok();
Expand Down Expand Up @@ -215,7 +210,10 @@ Status Metadata::get(
uint32_t* key_len,
Datatype* value_type,
uint32_t* value_num,
const void** value) const {
const void** value) {
if (metadata_index_.empty())
RETURN_NOT_OK(build_metadata_index());

if (index >= metadata_index_.size())
return LOG_STATUS(
Status::MetadataError("Cannot get metadata; index out of bounds"));
Expand Down Expand Up @@ -300,5 +298,15 @@ Metadata::iterator Metadata::end() const {
/* PRIVATE METHODS */
/* ********************************* */

Status Metadata::build_metadata_index() {
// Create metadata index for fast lookups from index
metadata_index_.resize(metadata_map_.size());
size_t i = 0;
for (auto& m : metadata_map_)
metadata_index_[i++] = std::make_pair(&(m.first), &(m.second));

return Status::Ok();
}

} // namespace sm
} // namespace tiledb
8 changes: 7 additions & 1 deletion tiledb/sm/metadata/metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ class Metadata {
uint32_t* key_len,
Datatype* value_type,
uint32_t* value_num,
const void** value) const;
const void** value);

/** Returns the number of metadata items. */
uint64_t num() const;
Expand Down Expand Up @@ -246,6 +246,12 @@ class Metadata {
/* ********************************* */
/* PRIVATE METHODS */
/* ********************************* */

/**
* Build the metadata index vector from the metadata map
* @return Status
*/
Status build_metadata_index();
};

} // namespace sm
Expand Down
5 changes: 2 additions & 3 deletions tiledb/sm/rest/rest_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,7 @@ Status RestClient::get_array_metadata_from_rest(
array, serialization_type_, returned_data);
}

Status RestClient::post_array_metadata_to_rest(
const URI& uri, const Array* array) {
Status RestClient::post_array_metadata_to_rest(const URI& uri, Array* array) {
if (array == nullptr)
return LOG_STATUS(Status::RestError(
"Error posting array metadata to REST; array is null."));
Expand Down Expand Up @@ -690,7 +689,7 @@ Status RestClient::get_array_metadata_from_rest(const URI&, uint64_t, Array*) {
Status::RestError("Cannot use rest client; serialization not enabled."));
}

Status RestClient::post_array_metadata_to_rest(const URI&, const Array*) {
Status RestClient::post_array_metadata_to_rest(const URI&, Array*) {
return LOG_STATUS(
Status::RestError("Cannot use rest client; serialization not enabled."));
}
Expand Down
2 changes: 1 addition & 1 deletion tiledb/sm/rest/rest_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ class RestClient {
* @param array Array to update/post metadata for.
* @return Status
*/
Status post_array_metadata_to_rest(const URI& uri, const Array* array);
Status post_array_metadata_to_rest(const URI& uri, Array* array);

/**
* Post a data query to rest server
Expand Down
18 changes: 10 additions & 8 deletions tiledb/sm/serialization/array_schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -888,24 +888,26 @@ Status max_buffer_sizes_deserialize(
}

Status array_metadata_serialize(
const Array* array,
SerializationType serialize_type,
Buffer* serialized_buffer) {
Array* array, SerializationType serialize_type, Buffer* serialized_buffer) {
if (array == nullptr)
return LOG_STATUS(Status::SerializationError(
"Error serializing array metadata; array instance is null"));
if (array->metadata() == nullptr)

Metadata* metadata;

RETURN_NOT_OK(array->metadata(&metadata));

if (metadata == nullptr)
return LOG_STATUS(Status::SerializationError(
"Error serializing array metadata; array metadata instance is null"));

const Metadata& metadata = *(array->metadata());
try {
// Serialize
::capnp::MallocMessageBuilder message;
auto builder = message.initRoot<capnp::ArrayMetadata>();
auto entries_builder = builder.initEntries(metadata.num());
auto entries_builder = builder.initEntries(metadata->num());
size_t i = 0;
for (auto it = metadata.begin(); it != metadata.end(); ++it) {
for (auto it = metadata->begin(); it != metadata->end(); ++it) {
auto entry_builder = entries_builder[i++];
const auto& entry = it->second;
auto datatype = static_cast<Datatype>(entry.type_);
Expand Down Expand Up @@ -1105,7 +1107,7 @@ Status max_buffer_sizes_deserialize(
"Cannot serialize; serialization not enabled."));
}

Status array_metadata_serialize(const Array*, SerializationType, Buffer*) {
Status array_metadata_serialize(Array*, SerializationType, Buffer*) {
return LOG_STATUS(Status::SerializationError(
"Cannot serialize; serialization not enabled."));
}
Expand Down
4 changes: 1 addition & 3 deletions tiledb/sm/serialization/array_schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,7 @@ Status max_buffer_sizes_deserialize(
buffer_sizes);

Status array_metadata_serialize(
const Array* array,
SerializationType serialize_type,
Buffer* serialized_buffer);
Array* array, SerializationType serialize_type, Buffer* serialized_buffer);

Status array_metadata_deserialize(
Array* array,
Expand Down
Loading

0 comments on commit cb47648

Please sign in to comment.