Skip to content

Commit

Permalink
Add serialization and API changes for post_array_schema_from_rest. (#…
Browse files Browse the repository at this point in the history
…5237)

This factors out serialization and API changes in #5181 that are
required for the HandleGetArraySchema route. These changes will need to
be available on REST before we can enable the new route for loading the
array schema.

There is a quick summary of the changes required in
[SC-52877](https://app.shortcut.com/tiledb-inc/story/52877/core-serialization-changes-for-loadarrayschema-models).

---
TYPE: IMPROVEMENT
DESC: Add serialization and API changes for post_array_schema_from_rest.
  • Loading branch information
shaunrd0 authored Aug 27, 2024
1 parent 910fffd commit 9b4e5ea
Show file tree
Hide file tree
Showing 26 changed files with 633 additions and 116 deletions.
2 changes: 1 addition & 1 deletion test/src/unit-capi-config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ void check_save_to_file() {
ss << "rest.curl.buffer_size 524288\n";
ss << "rest.curl.verbose false\n";
ss << "rest.http_compressor any\n";
ss << "rest.load_enumerations_on_array_open true\n";
ss << "rest.load_enumerations_on_array_open false\n";
ss << "rest.load_metadata_on_array_open true\n";
ss << "rest.load_non_empty_domain_on_array_open true\n";
ss << "rest.retry_count 25\n";
Expand Down
63 changes: 63 additions & 0 deletions test/src/unit-enumerations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1120,6 +1120,69 @@ TEST_CASE_METHOD(
REQUIRE(schema->is_enumeration_loaded("test_enmr") == true);
}

TEST_CASE_METHOD(
EnumerationFx,
"Array - Load All Enumerations - All Schemas",
"[enumeration][array][load-all-enumerations][all-schemas]") {
create_array();
auto array = get_array(QueryType::READ);
auto schema = array->array_schema_latest_ptr();
REQUIRE(schema->is_enumeration_loaded("test_enmr") == false);
std::string schema_name_1 = schema->name();

// Evolve once to add an enumeration.
auto ase = make_shared<ArraySchemaEvolution>(HERE(), memory_tracker_);
std::vector<std::string> var_values{"one", "two", "three"};
auto var_enmr = create_enumeration(
var_values, false, Datatype::STRING_ASCII, "ase_var_enmr");
ase->add_enumeration(var_enmr);
auto attr4 = make_shared<Attribute>(HERE(), "attr4", Datatype::UINT16);
attr4->set_enumeration_name("ase_var_enmr");
CHECK_NOTHROW(ase->evolve_schema(schema));
// Apply evolution to the array and reopen.
CHECK_NOTHROW(Array::evolve_array_schema(
ctx_.resources(), uri_, ase.get(), array->get_encryption_key()));
CHECK(array->reopen().ok());
CHECK_NOTHROW(array->load_all_enumerations());
auto all_schemas = array->array_schemas_all();
schema = array->array_schema_latest_ptr();
std::string schema_name_2 = schema->name();

// Check all schemas.
CHECK(all_schemas[schema_name_1]->is_enumeration_loaded("test_enmr") == true);
CHECK(all_schemas[schema_name_2]->is_enumeration_loaded("test_enmr") == true);
CHECK(
all_schemas[schema_name_2]->is_enumeration_loaded("ase_var_enmr") ==
true);

// Evolve a second time to drop an enumeration.
ase = make_shared<ArraySchemaEvolution>(HERE(), memory_tracker_);
ase->drop_enumeration("test_enmr");
ase->drop_attribute("attr1");
CHECK_NOTHROW(ase->evolve_schema(schema));
// Apply evolution to the array and reopen.
CHECK_NOTHROW(Array::evolve_array_schema(
ctx_.resources(), uri_, ase.get(), array->get_encryption_key()));
CHECK(array->reopen().ok());
CHECK_NOTHROW(array->load_all_enumerations());
all_schemas = array->array_schemas_all();
schema = array->array_schema_latest_ptr();
std::string schema_name_3 = schema->name();

// Check all schemas.
CHECK(all_schemas[schema_name_1]->is_enumeration_loaded("test_enmr") == true);
CHECK(all_schemas[schema_name_2]->is_enumeration_loaded("test_enmr") == true);
CHECK(
all_schemas[schema_name_2]->is_enumeration_loaded("ase_var_enmr") ==
true);
CHECK_THROWS_WITH(
all_schemas[schema_name_3]->is_enumeration_loaded("test_enmr"),
Catch::Matchers::ContainsSubstring("No enumeration named"));
CHECK(
all_schemas[schema_name_3]->is_enumeration_loaded("ase_var_enmr") ==
true);
}

TEST_CASE_METHOD(
EnumerationFx,
"Array - Load All Enumerations - Repeated",
Expand Down
114 changes: 100 additions & 14 deletions test/src/unit-request-handlers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

#ifdef TILEDB_SERIALIZATION

#include "test/support/src/helpers.h"
#include "test/support/src/mem_helpers.h"
#include "test/support/tdb_catch.h"
#include "tiledb/api/c_api/buffer/buffer_api_internal.h"
Expand All @@ -41,6 +42,7 @@
#include "tiledb/sm/c_api/tiledb_serialization.h"
#include "tiledb/sm/c_api/tiledb_struct_def.h"
#include "tiledb/sm/cpp_api/tiledb"
#include "tiledb/sm/cpp_api/tiledb_experimental"
#include "tiledb/sm/crypto/encryption_key.h"
#include "tiledb/sm/enums/array_type.h"
#include "tiledb/sm/enums/encryption_type.h"
Expand All @@ -67,6 +69,7 @@ struct RequestHandlerFx {
Config cfg_;
Context ctx_;
EncryptionKey enc_key_;
shared_ptr<ArraySchema> schema_;
};

struct HandleLoadArraySchemaRequestFx : RequestHandlerFx {
Expand All @@ -75,11 +78,17 @@ struct HandleLoadArraySchemaRequestFx : RequestHandlerFx {
}

virtual shared_ptr<ArraySchema> create_schema() override;
shared_ptr<ArraySchema> call_handler(

std::tuple<
shared_ptr<ArraySchema>,
std::unordered_map<std::string, shared_ptr<ArraySchema>>>
call_handler(
serialization::LoadArraySchemaRequest req, SerializationType stype);

shared_ptr<const Enumeration> create_string_enumeration(
std::string name, std::vector<std::string>& values);

shared_ptr<ArraySchema> schema_add_attribute(const std::string& attr_name);
};

struct HandleQueryPlanRequestFx : RequestHandlerFx {
Expand Down Expand Up @@ -116,15 +125,23 @@ struct HandleConsolidationPlanRequestFx : RequestHandlerFx {

TEST_CASE_METHOD(
HandleLoadArraySchemaRequestFx,
"tiledb_handle_load_array_schema_request - default request",
"tiledb_handle_load_array_schema_request - no enumerations",
"[request_handler][load_array_schema][default]") {
auto stype = GENERATE(SerializationType::JSON, SerializationType::CAPNP);

create_array();
auto schema =
call_handler(serialization::LoadArraySchemaRequest(false), stype);
auto schema_response =
call_handler(serialization::LoadArraySchemaRequest(cfg_), stype);
auto schema = std::get<0>(schema_response);
REQUIRE(schema->has_enumeration("enmr"));
REQUIRE(schema->get_loaded_enumeration_names().size() == 0);
tiledb::test::schema_equiv(*schema, *schema_);

// We did not evolve the schema so there should only be one.
auto all_schemas = std::get<1>(schema_response);
REQUIRE(all_schemas.size() == 1);
tiledb::test::schema_equiv(
*all_schemas.find(schema->name())->second, *schema_);
}

TEST_CASE_METHOD(
Expand All @@ -134,12 +151,57 @@ TEST_CASE_METHOD(
auto stype = GENERATE(SerializationType::JSON, SerializationType::CAPNP);

create_array();
auto schema =
call_handler(serialization::LoadArraySchemaRequest(true), stype);
REQUIRE(cfg_.set("rest.load_enumerations_on_array_open", "true").ok());
auto schema_response =
call_handler(serialization::LoadArraySchemaRequest(cfg_), stype);
auto schema = std::get<0>(schema_response);
REQUIRE(schema->has_enumeration("enmr"));
REQUIRE(schema->get_loaded_enumeration_names().size() == 1);
REQUIRE(schema->get_loaded_enumeration_names()[0] == "enmr");
REQUIRE(schema->get_enumeration("enmr") != nullptr);
tiledb::test::schema_equiv(*schema, *schema_);

// We did not evolve the schema so there should only be one.
auto all_schemas = std::get<1>(schema_response);
REQUIRE(all_schemas.size() == 1);
tiledb::test::schema_equiv(
*all_schemas.find(schema->name())->second, *schema_);
}

TEST_CASE_METHOD(
HandleLoadArraySchemaRequestFx,
"tiledb_handle_load_array_schema_request - multiple schemas",
"[request_handler][load_array_schema][schema-evolution]") {
auto stype = GENERATE(SerializationType::JSON, SerializationType::CAPNP);
std::string load_enums = GENERATE("true", "false");

create_array();

std::vector<shared_ptr<ArraySchema>> all_schemas{schema_};
all_schemas.push_back(schema_add_attribute("b"));
all_schemas.push_back(schema_add_attribute("c"));
all_schemas.push_back(schema_add_attribute("d"));

REQUIRE(cfg_.set("rest.load_enumerations_on_array_open", load_enums).ok());
auto schema_response =
call_handler(serialization::LoadArraySchemaRequest(cfg_), stype);
auto schema = std::get<0>(schema_response);
if (load_enums == "true") {
REQUIRE(schema->has_enumeration("enmr"));
REQUIRE(schema->get_loaded_enumeration_names().size() == 1);
REQUIRE(schema->get_loaded_enumeration_names()[0] == "enmr");
REQUIRE(schema->get_enumeration("enmr") != nullptr);
}
// The latest schema should be equal to the last applied evolution.
tiledb::test::schema_equiv(*schema, *all_schemas.back());

// Validate schemas returned from the request in the order they were created.
auto r_all_schemas = std::get<1>(schema_response);
std::map<std::string, shared_ptr<ArraySchema>> resp(
r_all_schemas.begin(), r_all_schemas.end());
for (int i = 0; const auto& s : resp) {
tiledb::test::schema_equiv(*s.second, *all_schemas[i++]);
}
}

TEST_CASE_METHOD(
Expand Down Expand Up @@ -346,7 +408,9 @@ TEST_CASE_METHOD(
RequestHandlerFx::RequestHandlerFx(const std::string uri)
: memory_tracker_(tiledb::test::create_test_memory_tracker())
, uri_(uri)
, ctx_(cfg_) {
, ctx_(cfg_)
, schema_(make_shared<ArraySchema>(
ArrayType::DENSE, ctx_.resources().ephemeral_memory_tracker())) {
delete_array();
throw_if_not_ok(enc_key_.set_key(EncryptionType::NO_ENCRYPTION, nullptr, 0));
}
Expand Down Expand Up @@ -405,9 +469,28 @@ HandleLoadArraySchemaRequestFx::create_string_enumeration(
tiledb::test::create_test_memory_tracker());
}

shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::schema_add_attribute(
const std::string& attr_name) {
tiledb::Context ctx;
tiledb::ArraySchemaEvolution ase(ctx);
auto attr = tiledb::Attribute::create<int32_t>(ctx, attr_name);
ase.add_attribute(attr);
// Evolve and update the original schema member variable.
schema_ = ase.ptr()->array_schema_evolution_->evolve_schema(schema_);
// Apply the schema evolution.
Array::evolve_array_schema(
this->ctx_.resources(),
this->uri_,
ase.ptr()->array_schema_evolution_,
this->enc_key_);

// Return the new evolved schema for validation.
return schema_;
}

shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::create_schema() {
// Create a schema to serialize
auto schema =
schema_ =
make_shared<ArraySchema>(HERE(), ArrayType::SPARSE, memory_tracker_);
auto dim =
make_shared<Dimension>(HERE(), "dim1", Datatype::INT32, memory_tracker_);
Expand All @@ -416,20 +499,23 @@ shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::create_schema() {

auto dom = make_shared<Domain>(HERE(), memory_tracker_);
throw_if_not_ok(dom->add_dimension(dim));
throw_if_not_ok(schema->set_domain(dom));
throw_if_not_ok(schema_->set_domain(dom));

std::vector<std::string> values = {"pig", "cow", "chicken", "dog", "cat"};
auto enmr = create_string_enumeration("enmr", values);
schema->add_enumeration(enmr);
schema_->add_enumeration(enmr);

auto attr = make_shared<Attribute>(HERE(), "attr", Datatype::INT32);
attr->set_enumeration_name("enmr");
throw_if_not_ok(schema->add_attribute(attr));
throw_if_not_ok(schema_->add_attribute(attr));

return schema;
return schema_;
}

shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::call_handler(
std::tuple<
shared_ptr<ArraySchema>,
std::unordered_map<std::string, shared_ptr<ArraySchema>>>
HandleLoadArraySchemaRequestFx::call_handler(
serialization::LoadArraySchemaRequest req, SerializationType stype) {
// If this looks weird, its because we're using the public C++ API to create
// these objets instead of the internal APIs elsewhere in this test suite.
Expand All @@ -451,7 +537,7 @@ shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::call_handler(
REQUIRE(rval == TILEDB_OK);

return serialization::deserialize_load_array_schema_response(
stype, resp_buf->buffer(), memory_tracker_);
uri_, stype, resp_buf->buffer(), memory_tracker_);
}

shared_ptr<ArraySchema> HandleQueryPlanRequestFx::create_schema() {
Expand Down
20 changes: 20 additions & 0 deletions test/support/src/helpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1621,6 +1621,26 @@ void read_sparse_v11(
tiledb_query_free(&query);
}

void schema_equiv(
const sm::ArraySchema& schema1, const sm::ArraySchema& schema2) {
CHECK(schema1.array_type() == schema2.array_type());
CHECK(schema1.attributes().size() == schema2.attributes().size());
for (unsigned int i = 0; i < schema2.attribute_num(); i++) {
auto a = schema1.attribute(i);
auto b = schema2.attribute(i);
CHECK(a->cell_val_num() == b->cell_val_num());
CHECK(a->name() == b->name());
CHECK(a->type() == b->type());
CHECK(a->nullable() == b->nullable());
CHECK(a->get_enumeration_name() == b->get_enumeration_name());
}
CHECK(schema1.capacity() == schema2.capacity());
CHECK(schema1.cell_order() == schema2.cell_order());
CHECK(schema1.tile_order() == schema2.tile_order());
CHECK(schema1.allows_dups() == schema2.allows_dups());
CHECK(schema1.array_uri().to_string() == schema2.array_uri().to_string());
}

template void check_subarray<int8_t>(
tiledb::sm::Subarray& subarray, const SubarrayRanges<int8_t>& ranges);
template void check_subarray<uint8_t>(
Expand Down
9 changes: 9 additions & 0 deletions test/support/src/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -957,6 +957,15 @@ void write_sparse_v11(
*/
void read_sparse_v11(
tiledb_ctx_t* ctx, const std::string& array_name, uint64_t timestamp);

/**
* Helper function to test two array schemas are equivalent.
*
* @param schema1 Expected array schema.
* @param schema2 Actual array schema.
*/
void schema_equiv(
const sm::ArraySchema& schema1, const sm::ArraySchema& schema2);
} // namespace tiledb::test

#endif
Loading

0 comments on commit 9b4e5ea

Please sign in to comment.