Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to use post_array_schema_from_rest. #5181

Draft
wants to merge 36 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
34571cb
Implement tiledb_handle_load_array_schema_request
davisp Sep 11, 2023
2fbef25
Switch to `post_array_schema_from_rest`
davisp Sep 11, 2023
38883d2
Add tiledb_array_schema_get_enumeration
davisp Sep 11, 2023
a7c849d
[WIP] Informal feed back on the load schema API
davisp Sep 12, 2023
8eda237
Merge remote-tracking branch 'origin/dev' into pd/sc-32991/add-handle…
shaunrd0 Jul 9, 2024
c96ca4b
Changes after merge.
shaunrd0 Jul 9, 2024
bb69fdc
Regenerate capnp sources.
shaunrd0 Jul 9, 2024
b4cee56
Add test from @jp-dark.
shaunrd0 Jul 10, 2024
63e22d4
Serialize all array schemas.
shaunrd0 Jul 10, 2024
8cccf09
Regenerate capnp.
shaunrd0 Jul 10, 2024
3aa1f8d
Use post_array_schema_from_rest.
shaunrd0 Jul 11, 2024
5b1f8ec
Update to use schema_load_with_options.
shaunrd0 Jul 11, 2024
61a1d39
Update tests to use VFSTestSetup.
shaunrd0 Jul 11, 2024
95abe63
Merge remote-tracking branch 'origin/dev' into pd/sc-32991/use-handle…
shaunrd0 Jul 11, 2024
73eadd4
Port regression test to REST CI.
shaunrd0 Jul 11, 2024
2783cbc
Test with REST CI.
shaunrd0 Jul 11, 2024
57ccb1f
Remove get_array_schema_from_rest, update docs.
shaunrd0 Jul 11, 2024
a3b5c44
Fix redefinition for stub function.
shaunrd0 Jul 11, 2024
9b1d3ae
Remove bool from LoadArraySchemaRequest.
shaunrd0 Jul 12, 2024
10f789b
Regenerate capnp.
shaunrd0 Jul 12, 2024
a0bf2d2
Fix config option name.
shaunrd0 Jul 15, 2024
5072f3c
Merge remote-tracking branch 'origin/dev' into pd/sc-32991/use-handle…
shaunrd0 Jul 18, 2024
e1020fc
Changes after merge.
shaunrd0 Jul 18, 2024
f322b78
Changes from review.
shaunrd0 Jul 18, 2024
afa9175
Move logic out of tiledb.cc.
shaunrd0 Jul 19, 2024
c424e77
Check all array schemas.
shaunrd0 Jul 19, 2024
e9d8191
Add docs and rename function.
shaunrd0 Jul 19, 2024
ddd3ac9
Merge remote-tracking branch 'origin/dev' into pd/sc-32991/use-handle…
shaunrd0 Jul 19, 2024
b00936b
Fix REST CI segfault.
shaunrd0 Jul 19, 2024
aa494ce
Fix CI.
shaunrd0 Jul 22, 2024
e05e701
Merge remote-tracking branch 'origin/dev' into pd/sc-32991/use-handle…
shaunrd0 Jul 23, 2024
2cbf009
Merge branch 'dev' into pd/sc-32991/use-handle-load-array-request
ypatia Jul 24, 2024
4cde5b6
Remove test branch from CI.
shaunrd0 Jul 25, 2024
a5687f0
Merge remote-tracking branch 'origin/dev' into pd/sc-32991/use-handle…
shaunrd0 Aug 7, 2024
54b9305
Load enumerations for all array schemas.
shaunrd0 Aug 7, 2024
6f108ee
Merge remote-tracking branch 'origin/dev' into pd/sc-32991/use-handle…
shaunrd0 Aug 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ trigger_pipeline:

variables:
TILEDB_REF: ${CI_COMMIT_REF_NAME}
TILEDB_CLOUD_REST_REF: pd/sc-32991/wrap-handle-load-array-schema-request
shaunrd0 marked this conversation as resolved.
Show resolved Hide resolved
trigger:
project: tiledb-inc/tiledb-internal
strategy: depend
2 changes: 2 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ set(TILEDB_UNIT_TEST_SOURCES
src/unit-Reader.cc
src/unit-request-handlers.cc
src/unit-resource-pool.cc
src/unit-rest-array-schema-load.cc
src/unit-rest-enumerations.cc
src/unit-result-coords.cc
src/unit-result-tile.cc
Expand All @@ -206,6 +207,7 @@ set(TILEDB_UNIT_TEST_SOURCES
if (TILEDB_CPP_API)
list(APPEND TILEDB_UNIT_TEST_SOURCES
src/cpp-integration-filter-pipeline.cc
src/cpp-integration-rest-schema-evolution.cc
src/test-cppapi-dense-array-dimension-label.cc
src/test-cppapi-dimension-label.cc
src/test-cppapi-ndrectangle.cc
Expand Down
254 changes: 254 additions & 0 deletions test/src/cpp-integration-rest-schema-evolution.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
#include <test/support/src/vfs_helpers.h>
shaunrd0 marked this conversation as resolved.
Show resolved Hide resolved
#include <test/support/tdb_catch.h>
#include "tiledb/sm/c_api/tiledb_struct_def.h"
#include "tiledb/sm/cpp_api/tiledb"
#include "tiledb/sm/cpp_api/tiledb_experimental"
#include "tiledb/sm/rest/rest_client.h"

#include <fstream>

using namespace tiledb;

static void create_array(const std::string& array_uri);
static void write_first_fragment(const std::string& array_uri);
static uint64_t time_travel_destination();
static void add_attr_b(const std::string& array_uri);
static void write_second_fragment(const std::string& array_uri);
static void read_without_time_travel(const std::string& array_uri);
static void read_with_time_travel(const std::string& array_uri, uint64_t when);

void create_array(const std::string& array_uri) {
tiledb::test::VFSTestSetup vfs_test_setup;
tiledb::Context ctx{vfs_test_setup.ctx()};

auto obj = tiledb::Object::object(ctx, array_uri);
if (obj.type() != tiledb::Object::Type::Invalid) {
tiledb::Object::remove(ctx, array_uri);
}

auto dim = tiledb::Dimension::create<int32_t>(ctx, "d", {{0, 1024}});

tiledb::Domain dom(ctx);
dom.add_dimension(dim);

auto attr = tiledb::Attribute::create<int32_t>(ctx, "a");

tiledb::ArraySchema schema(ctx, TILEDB_SPARSE);
schema.set_order({{TILEDB_ROW_MAJOR, TILEDB_ROW_MAJOR}})
.set_domain(dom)
.add_attribute(attr);

tiledb::Array::create(array_uri, schema);
}

void write_first_fragment(const std::string& array_uri) {
std::vector<int32_t> d_data = {0, 1, 2, 3, 4};
std::vector<int32_t> a_data = {5, 6, 7, 8, 9};

tiledb::test::VFSTestSetup vfs_test_setup;
tiledb::Context ctx{vfs_test_setup.ctx()};
tiledb::Array array(ctx, array_uri, TILEDB_WRITE);
tiledb::Query query(ctx, array, TILEDB_WRITE);
query.set_layout(TILEDB_UNORDERED)
.set_data_buffer("d", d_data)
.set_data_buffer("a", a_data);
REQUIRE(query.submit() == tiledb::Query::Status::COMPLETE);
array.close();
}

uint64_t time_travel_destination() {
// We sleep for 5ms to ensure that our fragments are separated in time
// and allowing us to grab a time guaranteed to be between them.
auto delay = std::chrono::milliseconds(5);
std::this_thread::sleep_for(delay);

auto timepoint = tiledb_timestamp_now_ms();

std::this_thread::sleep_for(delay);

return timepoint;
}

void add_attr_b(const std::string& array_uri) {
tiledb::test::VFSTestSetup vfs_test_setup;
tiledb::Context ctx{vfs_test_setup.ctx()};
auto attr = tiledb::Attribute::create<int32_t>(ctx, "b");

tiledb::ArraySchemaEvolution ase(ctx);
ase.add_attribute(attr);
ase.array_evolve(array_uri);
}

void write_second_fragment(const std::string& array_uri) {
std::vector<int32_t> d_data = {5, 6, 7, 8, 9};
std::vector<int32_t> a_data = {10, 11, 12, 13, 14};
std::vector<int32_t> b_data = {15, 16, 17, 18, 19};

tiledb::test::VFSTestSetup vfs_test_setup;
tiledb::Context ctx{vfs_test_setup.ctx()};
tiledb::Array array(ctx, array_uri, TILEDB_WRITE);
tiledb::Query query(ctx, array, TILEDB_WRITE);
query.set_layout(TILEDB_UNORDERED)
.set_data_buffer("d", d_data)
.set_data_buffer("a", a_data)
.set_data_buffer("b", b_data);
REQUIRE(query.submit() == tiledb::Query::Status::COMPLETE);
array.close();
}

void read_without_time_travel(const std::string& array_uri) {
std::vector<int32_t> d_data(10);
std::vector<int32_t> a_data(10);
std::vector<int32_t> b_data(10);

tiledb::test::VFSTestSetup vfs_test_setup;
tiledb::Context ctx{vfs_test_setup.ctx()};
tiledb::Array array(ctx, array_uri, TILEDB_READ);
tiledb::Query query(ctx, array, TILEDB_READ);
query.set_data_buffer("d", d_data)
.set_data_buffer("a", a_data)
.set_data_buffer("b", b_data);

REQUIRE(query.submit() == tiledb::Query::Status::COMPLETE);

for (int32_t i = 0; i < 10; i++) {
REQUIRE(d_data[i] == i);
REQUIRE(a_data[i] == i + 5);

if (i < 5) {
REQUIRE(b_data[i] == INT32_MIN);
} else {
REQUIRE(b_data[i] == i + 10);
}
}
}

void read_with_time_travel(const std::string& array_uri, uint64_t when) {
std::vector<int32_t> d_data(10, INT_MAX);
std::vector<int32_t> a_data(10, INT_MAX);
std::vector<int32_t> b_data(10, INT_MAX);

tiledb::test::VFSTestSetup vfs_test_setup;
tiledb::Context ctx{vfs_test_setup.ctx()};
tiledb::Array array(
ctx,
array_uri,
TILEDB_READ,
tiledb::TemporalPolicy(tiledb::TimeTravel, when));
tiledb::Query query(ctx, array, TILEDB_READ);
query.set_data_buffer("d", d_data).set_data_buffer("a", a_data);

auto matcher = Catch::Matchers::ContainsSubstring("There is no field b");
REQUIRE_THROWS_WITH(query.set_data_buffer("b", b_data), matcher);

REQUIRE(query.submit() == tiledb::Query::Status::COMPLETE);

for (int32_t i = 0; i < 10; i++) {
if (i < 5) {
REQUIRE(d_data[i] == i);
REQUIRE(a_data[i] == i + 5);
REQUIRE(b_data[i] == INT_MAX);
} else {
REQUIRE(d_data[i] == INT_MAX);
REQUIRE(a_data[i] == INT_MAX);
REQUIRE(b_data[i] == INT_MAX);
}
}
}

TEST_CASE(
"Use the correct schema when time traveling",
"[time-traveling][array-schema][bug][sc35424][rest]") {
tiledb::test::VFSTestSetup vfs_test_setup;
auto array_uri{vfs_test_setup.array_uri("test_time_traveling_schema")};

// Test setup
create_array(array_uri);
write_first_fragment(array_uri);
auto timepoint = time_travel_destination();
add_attr_b(array_uri);
write_second_fragment(array_uri);

// Check reads with and without time travel.
read_without_time_travel(array_uri);
read_with_time_travel(array_uri, timepoint);
}

TEST_CASE(
"Bug test: Schema evolution open array schema",
"[array-schema-evolution][rest]") {
// Create the virtual file system.
tiledb::test::VFSTestSetup vfs_test_setup{nullptr, true};
auto array_uri = vfs_test_setup.array_uri("schema_evolution_array");

auto config = vfs_test_setup.ctx().config();
std::string qv3 = GENERATE("true", "false");
config.set("rest.use_refactored_array_open_and_query_submit", qv3);
INFO("Using rest.use_refactored_array_open_and_query_submit: " << qv3);
vfs_test_setup.update_config(config.ptr().get());
auto ctx = vfs_test_setup.ctx();

// Create the array schema.
tiledb::Domain domain(ctx);
auto d1 = tiledb::Dimension::create<int64_t>(ctx, "d1", {{0, 100}}, 5);
domain.add_dimension(d1);
auto a1 = tiledb::Attribute::create<int64_t>(ctx, "a1");
auto a2 = tiledb::Attribute::create<int8_t>(ctx, "a2");

tiledb::ArraySchema schema(ctx, TILEDB_DENSE);
schema.set_domain(domain);
schema.add_attribute(a1);
schema.add_attribute(a2);
schema.set_cell_order(TILEDB_ROW_MAJOR);
schema.set_tile_order(TILEDB_COL_MAJOR);

// Create the array.
tiledb::Array::create(array_uri, schema);

// Evolve the array.
// -- Drop attribute a1.
// -- Add attribute a3.
// -- Set timestamp to prevent both schemas from having the same timestamp.
auto evolution = tiledb::ArraySchemaEvolution(ctx);
auto a3 = tiledb::Attribute::create<int>(ctx, "a3");
evolution.add_attribute(a3);
evolution.drop_attribute("a1");
uint64_t now{tiledb_timestamp_now_ms() + 1};
evolution.set_timestamp_range({now, now});
evolution.array_evolve(array_uri);

// Open the array before the schema evolution.
uint64_t timestamp{now - 1};
tiledb::Array array(
ctx,
array_uri,
TILEDB_READ,
tiledb::TemporalPolicy(tiledb::TimestampStartEnd, 0, timestamp));

// Get the internal TileDB array object.
auto c_array = array.ptr();
auto internal_array = c_array->array_;

// Print timestamp information.
auto timestamp_start = internal_array->timestamp_start();
auto timestamp_end = internal_array->timestamp_end();
INFO(
"Array timestamp range [" << timestamp_start << ", " << timestamp_end
<< "]");

// Get the latest schema and print timestamp information.
auto latest_schema = internal_array->array_schema_latest();
auto schema_timestamps = latest_schema.timestamp_range();
INFO(
"Schema timestamp range [" << schema_timestamps.first << ", "
<< schema_timestamps.second << "]");

CHECK(schema_timestamps.first < timestamp_end);

// Get all schemas and print index.
const auto all_schema = internal_array->array_schemas_all();
for (auto& element : all_schema) {
UNSCOPED_INFO("-- Schema key: " << element.first);
}
CHECK(all_schema.size() == 2);
}
17 changes: 13 additions & 4 deletions test/src/unit-request-handlers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,11 @@ struct HandleLoadArraySchemaRequestFx : RequestHandlerFx {
}

virtual shared_ptr<ArraySchema> create_schema() override;
shared_ptr<ArraySchema> call_handler(

std::tuple<
shared_ptr<ArraySchema>,
std::unordered_map<std::string, shared_ptr<ArraySchema>>>
call_handler(
serialization::LoadArraySchemaRequest req, SerializationType stype);

shared_ptr<const Enumeration> create_string_enumeration(
Expand Down Expand Up @@ -121,8 +125,9 @@ TEST_CASE_METHOD(
auto stype = GENERATE(SerializationType::JSON, SerializationType::CAPNP);

create_array();
auto schema =
auto schema_response =
call_handler(serialization::LoadArraySchemaRequest(false), stype);
auto schema = std::get<0>(schema_response);
REQUIRE(schema->has_enumeration("enmr"));
REQUIRE(schema->get_loaded_enumeration_names().size() == 0);
}
Expand All @@ -134,8 +139,9 @@ TEST_CASE_METHOD(
auto stype = GENERATE(SerializationType::JSON, SerializationType::CAPNP);

create_array();
auto schema =
auto schema_response =
call_handler(serialization::LoadArraySchemaRequest(true), stype);
auto schema = std::get<0>(schema_response);
REQUIRE(schema->has_enumeration("enmr"));
REQUIRE(schema->get_loaded_enumeration_names().size() == 1);
REQUIRE(schema->get_loaded_enumeration_names()[0] == "enmr");
Expand Down Expand Up @@ -429,7 +435,10 @@ shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::create_schema() {
return schema;
}

shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::call_handler(
std::tuple<
shared_ptr<ArraySchema>,
std::unordered_map<std::string, shared_ptr<ArraySchema>>>
HandleLoadArraySchemaRequestFx::call_handler(
serialization::LoadArraySchemaRequest req, SerializationType stype) {
// If this looks weird, its because we're using the public C++ API to create
// these objets instead of the internal APIs elsewhere in this test suite.
Expand Down
Loading
Loading