Skip to content

Commit

Permalink
Add tests for partial tile offsets loading. (#5004)
Browse files Browse the repository at this point in the history
Adding complete tests for the partial tile offset loading feature.
Remodeled one of the existing tests

[sc-46969]

---
TYPE:  NO_HISTORY
DESC: Add tests for partial tile offsets loading.
  • Loading branch information
DimitrisStaratzis authored Jun 6, 2024
1 parent 2dc4fb7 commit bc4ec51
Showing 1 changed file with 156 additions and 52 deletions.
208 changes: 156 additions & 52 deletions test/src/unit-sparse-unordered-with-dups-reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ struct CSparseUnorderedWithDupsFx {
std::string partial_tile_offsets_loading_;

void create_default_array_1d();
void create_large_domain_array_1d();
void create_default_array_1d_string(int tile_extent = 2, int capacity = 2);
void write_1d_fragment(
int* coords, uint64_t* coords_size, int* data, uint64_t* data_size);
Expand Down Expand Up @@ -236,6 +237,27 @@ void CSparseUnorderedWithDupsFx::create_default_array_1d() {
true); // allows dups.
}

void CSparseUnorderedWithDupsFx::create_large_domain_array_1d() {
int domain[] = {1, 20000};
int tile_extent = 2;
create_array(
ctx_,
array_name_,
TILEDB_SPARSE,
{"d"},
{TILEDB_INT32},
{domain},
{&tile_extent},
{"a"},
{TILEDB_INT32},
{1},
{tiledb::test::Compressor(TILEDB_FILTER_NONE, -1)},
TILEDB_ROW_MAJOR,
TILEDB_ROW_MAJOR,
2,
true); // allows dups.
}

void CSparseUnorderedWithDupsFx::create_default_array_1d_string(
int tile_extent, int capacity) {
int domain[] = {1, 20};
Expand Down Expand Up @@ -914,76 +936,158 @@ TEST_CASE_METHOD(

TEST_CASE_METHOD(
CSparseUnorderedWithDupsFx,
"Sparse unordered with dups reader: tile offsets forcing multiple "
"iterations",
"Sparse unordered with dups reader: tile offsets partial loading",
"[sparse-unordered-with-dups][tile-offsets][multiple-iterations]") {
bool set_subarray = GENERATE(true, false);
bool enable_partial_tile_offsets_loading = GENERATE(true, false);

// Create default array.
reset_config();
create_default_array_1d();

// Write two fragments.
std::vector<int> coords(100);
std::iota(coords.begin(), coords.end(), 1);
uint64_t coords_size = coords.size() * sizeof(int);
create_large_domain_array_1d();
bool one_frag = false;

std::vector<int> data(100);
std::iota(data.begin(), data.end(), 1);
uint64_t data_size = data.size() * sizeof(int);
SECTION("- One fragment") {
one_frag = true;
}
SECTION("- Multiple fragments") {
one_frag = false;
}

write_1d_fragment(coords.data(), &coords_size, data.data(), &data_size);
// Write fragments.
if (one_frag) {
std::vector<int> coords(100);
std::iota(coords.begin(), coords.end(), 1);
uint64_t coords_size = coords.size() * sizeof(int);

std::vector<int> coords2(100);
std::iota(coords2.begin(), coords2.end(), 101);
uint64_t coords2_size = coords.size() * sizeof(int);
std::vector<int> data(100);
std::iota(data.begin(), data.end(), 1);
uint64_t data_size = data.size() * sizeof(int);

std::vector<int> data2(100);
std::iota(data2.begin(), data2.end(), 101);
uint64_t data2_size = data.size() * sizeof(int);
write_1d_fragment(coords2.data(), &coords2_size, data2.data(), &data2_size);
write_1d_fragment(coords.data(), &coords_size, data.data(), &data_size);
} else {
std::vector<int> coords(100);
std::iota(coords.begin(), coords.end(), 1);
uint64_t coords_size = coords.size() * sizeof(int);

std::vector<int> data(100);
std::iota(data.begin(), data.end(), 1);
uint64_t data_size = data.size() * sizeof(int);

write_1d_fragment(coords.data(), &coords_size, data.data(), &data_size);

std::vector<int> coords2(1000);
std::iota(coords2.begin(), coords2.end(), 101);
uint64_t coords2_size = coords2.size() * sizeof(int);

std::vector<int> data2(1000);
std::iota(data2.begin(), data2.end(), 101);
uint64_t data2_size = data2.size() * sizeof(int);
write_1d_fragment(coords2.data(), &coords2_size, data2.data(), &data2_size);

std::vector<int> coords3(5000);
std::iota(coords3.begin(), coords3.end(), 1101);
uint64_t coords3_size = coords3.size() * sizeof(int);

std::vector<int> data3(5000);
std::iota(data3.begin(), data3.end(), 1101);
uint64_t data3_size = data3.size() * sizeof(int);
write_1d_fragment(coords3.data(), &coords3_size, data3.data(), &data3_size);

std::vector<int> coords4(10000);
std::iota(coords4.begin(), coords4.end(), 6101);
uint64_t coords4_size = coords4.size() * sizeof(int);

std::vector<int> data4(10000);
std::iota(data4.begin(), data4.end(), 6101);
uint64_t data4_size = data4.size() * sizeof(int);
write_1d_fragment(coords4.data(), &coords4_size, data4.data(), &data4_size);
}

total_budget_ = "1900000";
tile_upper_memory_limit_ = "100000";
ratio_array_data_ = set_subarray ? "0.003" : "0.002";
partial_tile_offsets_loading_ = "true";
total_budget_ = "3000000";
ratio_array_data_ = "0.002";
partial_tile_offsets_loading_ =
enable_partial_tile_offsets_loading ? "true" : "false";
update_config();

tiledb_array_t* array = nullptr;
tiledb_query_t* query = nullptr;

// Try to read.
int coords_r[200];
int data_r[200];
int coords_r[16100];
int data_r[16100];
uint64_t coords_r_size = sizeof(coords_r);
uint64_t data_r_size = sizeof(data_r);
auto rc = read(
set_subarray,
0,
coords_r,
&coords_r_size,
data_r,
&data_r_size,
&query,
&array);
CHECK(rc == TILEDB_OK);
auto rc = 0;

// Case 1: Read only one frag. Should be ok for both cases of partial tile
// loading Case 2: Read multiple fragments with partial tile offset loading.
// Should be ok
if (enable_partial_tile_offsets_loading || one_frag) {
rc = read(
false,
0,
coords_r,
&coords_r_size,
data_r,
&data_r_size,
&query,
&array);
CHECK(rc == TILEDB_OK);

// Validate the results.
for (int i = 0; i < 200; i++) {
CHECK(coords_r[i] == i + 1);
CHECK(data_r[i] == i + 1);
}
// Validate the results.
int elements_to_check;
if (one_frag) {
elements_to_check = 100;
} else {
elements_to_check = 16100;
}

// Check the internal loop count against expected value.
auto stats =
((SparseUnorderedWithDupsReader<uint8_t>*)query->query_->strategy())
->stats();
REQUIRE(stats != nullptr);
auto counters = stats->counters();
REQUIRE(counters != nullptr);
auto loop_num =
counters->find("Context.StorageManager.Query.Reader.internal_loop_num");
CHECK(2 == loop_num->second);
for (int i = 0; i < elements_to_check; i++) {
CHECK(coords_r[i] == i + 1);
CHECK(data_r[i] == i + 1);
}

// Check the internal loop count against expected value.
auto stats =
((SparseUnorderedWithDupsReader<uint8_t>*)query->query_->strategy())
->stats();
REQUIRE(stats != nullptr);
auto counters = stats->counters();
REQUIRE(counters != nullptr);
auto loop_num =
counters->find("Context.StorageManager.Query.Reader.internal_loop_num");

if (one_frag) {
CHECK(1 == loop_num->second);
} else {
CHECK(9 == loop_num->second);
}

// Try to read multiple frags without partial tile offset reading. Should
// fail
} else {
rc = read(
false,
0,
coords_r,
&coords_r_size,
data_r,
&data_r_size,
&query,
&array);
CHECK(rc == TILEDB_ERR);

tiledb_error_t* error = NULL;
rc = tiledb_ctx_get_last_error(ctx_, &error);
CHECK(rc == TILEDB_OK);

const char* msg;
rc = tiledb_error_message(error, &msg);
CHECK(rc == TILEDB_OK);

std::string error_str(msg);
CHECK(
error_str.find("Cannot load tile offsets, computed size") !=
std::string::npos);
}

// Clean up.
rc = tiledb_array_close(ctx_, array);
Expand Down

0 comments on commit bc4ec51

Please sign in to comment.