Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tests for partial tile offsets loading. #5004

Merged
merged 3 commits into from
Jun 6, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
208 changes: 156 additions & 52 deletions test/src/unit-sparse-unordered-with-dups-reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ struct CSparseUnorderedWithDupsFx {
std::string partial_tile_offsets_loading_;

void create_default_array_1d();
void create_large_domain_array_1d();
void create_default_array_1d_string(int tile_extent = 2, int capacity = 2);
void write_1d_fragment(
int* coords, uint64_t* coords_size, int* data, uint64_t* data_size);
Expand Down Expand Up @@ -236,6 +237,27 @@ void CSparseUnorderedWithDupsFx::create_default_array_1d() {
true); // allows dups.
}

void CSparseUnorderedWithDupsFx::create_large_domain_array_1d() {
int domain[] = {1, 20000};
int tile_extent = 2;
create_array(
ctx_,
array_name_,
TILEDB_SPARSE,
{"d"},
{TILEDB_INT32},
{domain},
{&tile_extent},
{"a"},
{TILEDB_INT32},
{1},
{tiledb::test::Compressor(TILEDB_FILTER_NONE, -1)},
TILEDB_ROW_MAJOR,
TILEDB_ROW_MAJOR,
2,
true); // allows dups.
}

void CSparseUnorderedWithDupsFx::create_default_array_1d_string(
int tile_extent, int capacity) {
int domain[] = {1, 20};
Expand Down Expand Up @@ -914,76 +936,158 @@ TEST_CASE_METHOD(

TEST_CASE_METHOD(
CSparseUnorderedWithDupsFx,
"Sparse unordered with dups reader: tile offsets forcing multiple "
"iterations",
"Sparse unordered with dups reader: tile offsets partial loading",
"[sparse-unordered-with-dups][tile-offsets][multiple-iterations]") {
bool set_subarray = GENERATE(true, false);
bool enable_partial_tile_offsets_loading = GENERATE(true, false);

// Create default array.
reset_config();
create_default_array_1d();

// Write two fragments.
std::vector<int> coords(100);
std::iota(coords.begin(), coords.end(), 1);
uint64_t coords_size = coords.size() * sizeof(int);
create_large_domain_array_1d();
bool one_frag = false;

std::vector<int> data(100);
std::iota(data.begin(), data.end(), 1);
uint64_t data_size = data.size() * sizeof(int);
SECTION("- One fragment") {
one_frag = true;
}
SECTION("- Multiple fragments") {
one_frag = false;
}

write_1d_fragment(coords.data(), &coords_size, data.data(), &data_size);
// Write fragments.
if (one_frag) {
std::vector<int> coords(100);
std::iota(coords.begin(), coords.end(), 1);
uint64_t coords_size = coords.size() * sizeof(int);

std::vector<int> coords2(100);
std::iota(coords2.begin(), coords2.end(), 101);
uint64_t coords2_size = coords.size() * sizeof(int);
std::vector<int> data(100);
std::iota(data.begin(), data.end(), 1);
uint64_t data_size = data.size() * sizeof(int);

std::vector<int> data2(100);
std::iota(data2.begin(), data2.end(), 101);
uint64_t data2_size = data.size() * sizeof(int);
write_1d_fragment(coords2.data(), &coords2_size, data2.data(), &data2_size);
write_1d_fragment(coords.data(), &coords_size, data.data(), &data_size);
} else {
std::vector<int> coords(100);
std::iota(coords.begin(), coords.end(), 1);
uint64_t coords_size = coords.size() * sizeof(int);

std::vector<int> data(100);
std::iota(data.begin(), data.end(), 1);
uint64_t data_size = data.size() * sizeof(int);

write_1d_fragment(coords.data(), &coords_size, data.data(), &data_size);

std::vector<int> coords2(1000);
std::iota(coords2.begin(), coords2.end(), 101);
uint64_t coords2_size = coords2.size() * sizeof(int);

std::vector<int> data2(1000);
std::iota(data2.begin(), data2.end(), 101);
uint64_t data2_size = data2.size() * sizeof(int);
write_1d_fragment(coords2.data(), &coords2_size, data2.data(), &data2_size);

std::vector<int> coords3(5000);
std::iota(coords3.begin(), coords3.end(), 1101);
uint64_t coords3_size = coords3.size() * sizeof(int);

std::vector<int> data3(5000);
std::iota(data3.begin(), data3.end(), 1101);
uint64_t data3_size = data3.size() * sizeof(int);
write_1d_fragment(coords3.data(), &coords3_size, data3.data(), &data3_size);

std::vector<int> coords4(10000);
std::iota(coords4.begin(), coords4.end(), 6101);
uint64_t coords4_size = coords4.size() * sizeof(int);

std::vector<int> data4(10000);
std::iota(data4.begin(), data4.end(), 6101);
uint64_t data4_size = data4.size() * sizeof(int);
write_1d_fragment(coords4.data(), &coords4_size, data4.data(), &data4_size);
}

total_budget_ = "1900000";
tile_upper_memory_limit_ = "100000";
ratio_array_data_ = set_subarray ? "0.003" : "0.002";
partial_tile_offsets_loading_ = "true";
total_budget_ = "3000000";
ratio_array_data_ = "0.002";
partial_tile_offsets_loading_ =
enable_partial_tile_offsets_loading ? "true" : "false";
update_config();

tiledb_array_t* array = nullptr;
tiledb_query_t* query = nullptr;

// Try to read.
int coords_r[200];
int data_r[200];
int coords_r[16100];
int data_r[16100];
uint64_t coords_r_size = sizeof(coords_r);
uint64_t data_r_size = sizeof(data_r);
auto rc = read(
set_subarray,
0,
coords_r,
&coords_r_size,
data_r,
&data_r_size,
&query,
&array);
CHECK(rc == TILEDB_OK);
auto rc = 0;

// Case 1: Read only one frag. Should be ok for both cases of partial tile
// loading Case 2: Read multiple fragments with partial tile offset loading.
// Should be ok
if (enable_partial_tile_offsets_loading || one_frag) {
rc = read(
false,
0,
coords_r,
&coords_r_size,
data_r,
&data_r_size,
&query,
&array);
CHECK(rc == TILEDB_OK);

// Validate the results.
for (int i = 0; i < 200; i++) {
CHECK(coords_r[i] == i + 1);
CHECK(data_r[i] == i + 1);
}
// Validate the results.
int elements_to_check;
if (one_frag) {
elements_to_check = 100;
} else {
elements_to_check = 16100;
}

// Check the internal loop count against expected value.
auto stats =
((SparseUnorderedWithDupsReader<uint8_t>*)query->query_->strategy())
->stats();
REQUIRE(stats != nullptr);
auto counters = stats->counters();
REQUIRE(counters != nullptr);
auto loop_num =
counters->find("Context.StorageManager.Query.Reader.internal_loop_num");
CHECK(2 == loop_num->second);
for (int i = 0; i < elements_to_check; i++) {
CHECK(coords_r[i] == i + 1);
CHECK(data_r[i] == i + 1);
}

// Check the internal loop count against expected value.
auto stats =
((SparseUnorderedWithDupsReader<uint8_t>*)query->query_->strategy())
->stats();
REQUIRE(stats != nullptr);
auto counters = stats->counters();
REQUIRE(counters != nullptr);
auto loop_num =
counters->find("Context.StorageManager.Query.Reader.internal_loop_num");

if (one_frag) {
CHECK(1 == loop_num->second);
} else {
CHECK(9 == loop_num->second);
}

// Try to read multiple frags without partial tile offset reading. Should
// fail
} else {
rc = read(
false,
0,
coords_r,
&coords_r_size,
data_r,
&data_r_size,
&query,
&array);
CHECK(rc == TILEDB_ERR);

tiledb_error_t* error = NULL;
rc = tiledb_ctx_get_last_error(ctx_, &error);
CHECK(rc == TILEDB_OK);

const char* msg;
rc = tiledb_error_message(error, &msg);
CHECK(rc == TILEDB_OK);

std::string error_str(msg);
CHECK(
error_str.find("Cannot load tile offsets, computed size") !=
std::string::npos);
}

// Clean up.
rc = tiledb_array_close(ctx_, array);
Expand Down
Loading