From bc4ec51a689337a16302d94f6ec40ab8809704dd Mon Sep 17 00:00:00 2001 From: Dimitris Staratzis <33267511+DimitrisStaratzis@users.noreply.github.com> Date: Thu, 6 Jun 2024 11:21:02 +0300 Subject: [PATCH] Add tests for partial tile offsets loading. (#5004) Adding complete tests for the partial tile offset loading feature. Remodeled one of the existing tests [sc-46969] --- TYPE: NO_HISTORY DESC: Add tests for partial tile offsets loading. --- .../unit-sparse-unordered-with-dups-reader.cc | 208 +++++++++++++----- 1 file changed, 156 insertions(+), 52 deletions(-) diff --git a/test/src/unit-sparse-unordered-with-dups-reader.cc b/test/src/unit-sparse-unordered-with-dups-reader.cc index 56f55ffb7d2..470bbfb468a 100644 --- a/test/src/unit-sparse-unordered-with-dups-reader.cc +++ b/test/src/unit-sparse-unordered-with-dups-reader.cc @@ -68,6 +68,7 @@ struct CSparseUnorderedWithDupsFx { std::string partial_tile_offsets_loading_; void create_default_array_1d(); + void create_large_domain_array_1d(); void create_default_array_1d_string(int tile_extent = 2, int capacity = 2); void write_1d_fragment( int* coords, uint64_t* coords_size, int* data, uint64_t* data_size); @@ -236,6 +237,27 @@ void CSparseUnorderedWithDupsFx::create_default_array_1d() { true); // allows dups. } +void CSparseUnorderedWithDupsFx::create_large_domain_array_1d() { + int domain[] = {1, 20000}; + int tile_extent = 2; + create_array( + ctx_, + array_name_, + TILEDB_SPARSE, + {"d"}, + {TILEDB_INT32}, + {domain}, + {&tile_extent}, + {"a"}, + {TILEDB_INT32}, + {1}, + {tiledb::test::Compressor(TILEDB_FILTER_NONE, -1)}, + TILEDB_ROW_MAJOR, + TILEDB_ROW_MAJOR, + 2, + true); // allows dups. +} + void CSparseUnorderedWithDupsFx::create_default_array_1d_string( int tile_extent, int capacity) { int domain[] = {1, 20}; @@ -914,76 +936,158 @@ TEST_CASE_METHOD( TEST_CASE_METHOD( CSparseUnorderedWithDupsFx, - "Sparse unordered with dups reader: tile offsets forcing multiple " - "iterations", + "Sparse unordered with dups reader: tile offsets partial loading", "[sparse-unordered-with-dups][tile-offsets][multiple-iterations]") { - bool set_subarray = GENERATE(true, false); + bool enable_partial_tile_offsets_loading = GENERATE(true, false); // Create default array. reset_config(); - create_default_array_1d(); - - // Write two fragments. - std::vector coords(100); - std::iota(coords.begin(), coords.end(), 1); - uint64_t coords_size = coords.size() * sizeof(int); + create_large_domain_array_1d(); + bool one_frag = false; - std::vector data(100); - std::iota(data.begin(), data.end(), 1); - uint64_t data_size = data.size() * sizeof(int); + SECTION("- One fragment") { + one_frag = true; + } + SECTION("- Multiple fragments") { + one_frag = false; + } - write_1d_fragment(coords.data(), &coords_size, data.data(), &data_size); + // Write fragments. + if (one_frag) { + std::vector coords(100); + std::iota(coords.begin(), coords.end(), 1); + uint64_t coords_size = coords.size() * sizeof(int); - std::vector coords2(100); - std::iota(coords2.begin(), coords2.end(), 101); - uint64_t coords2_size = coords.size() * sizeof(int); + std::vector data(100); + std::iota(data.begin(), data.end(), 1); + uint64_t data_size = data.size() * sizeof(int); - std::vector data2(100); - std::iota(data2.begin(), data2.end(), 101); - uint64_t data2_size = data.size() * sizeof(int); - write_1d_fragment(coords2.data(), &coords2_size, data2.data(), &data2_size); + write_1d_fragment(coords.data(), &coords_size, data.data(), &data_size); + } else { + std::vector coords(100); + std::iota(coords.begin(), coords.end(), 1); + uint64_t coords_size = coords.size() * sizeof(int); + + std::vector data(100); + std::iota(data.begin(), data.end(), 1); + uint64_t data_size = data.size() * sizeof(int); + + write_1d_fragment(coords.data(), &coords_size, data.data(), &data_size); + + std::vector coords2(1000); + std::iota(coords2.begin(), coords2.end(), 101); + uint64_t coords2_size = coords2.size() * sizeof(int); + + std::vector data2(1000); + std::iota(data2.begin(), data2.end(), 101); + uint64_t data2_size = data2.size() * sizeof(int); + write_1d_fragment(coords2.data(), &coords2_size, data2.data(), &data2_size); + + std::vector coords3(5000); + std::iota(coords3.begin(), coords3.end(), 1101); + uint64_t coords3_size = coords3.size() * sizeof(int); + + std::vector data3(5000); + std::iota(data3.begin(), data3.end(), 1101); + uint64_t data3_size = data3.size() * sizeof(int); + write_1d_fragment(coords3.data(), &coords3_size, data3.data(), &data3_size); + + std::vector coords4(10000); + std::iota(coords4.begin(), coords4.end(), 6101); + uint64_t coords4_size = coords4.size() * sizeof(int); + + std::vector data4(10000); + std::iota(data4.begin(), data4.end(), 6101); + uint64_t data4_size = data4.size() * sizeof(int); + write_1d_fragment(coords4.data(), &coords4_size, data4.data(), &data4_size); + } - total_budget_ = "1900000"; - tile_upper_memory_limit_ = "100000"; - ratio_array_data_ = set_subarray ? "0.003" : "0.002"; - partial_tile_offsets_loading_ = "true"; + total_budget_ = "3000000"; + ratio_array_data_ = "0.002"; + partial_tile_offsets_loading_ = + enable_partial_tile_offsets_loading ? "true" : "false"; update_config(); tiledb_array_t* array = nullptr; tiledb_query_t* query = nullptr; - // Try to read. - int coords_r[200]; - int data_r[200]; + int coords_r[16100]; + int data_r[16100]; uint64_t coords_r_size = sizeof(coords_r); uint64_t data_r_size = sizeof(data_r); - auto rc = read( - set_subarray, - 0, - coords_r, - &coords_r_size, - data_r, - &data_r_size, - &query, - &array); - CHECK(rc == TILEDB_OK); + auto rc = 0; + + // Case 1: Read only one frag. Should be ok for both cases of partial tile + // loading Case 2: Read multiple fragments with partial tile offset loading. + // Should be ok + if (enable_partial_tile_offsets_loading || one_frag) { + rc = read( + false, + 0, + coords_r, + &coords_r_size, + data_r, + &data_r_size, + &query, + &array); + CHECK(rc == TILEDB_OK); - // Validate the results. - for (int i = 0; i < 200; i++) { - CHECK(coords_r[i] == i + 1); - CHECK(data_r[i] == i + 1); - } + // Validate the results. + int elements_to_check; + if (one_frag) { + elements_to_check = 100; + } else { + elements_to_check = 16100; + } - // Check the internal loop count against expected value. - auto stats = - ((SparseUnorderedWithDupsReader*)query->query_->strategy()) - ->stats(); - REQUIRE(stats != nullptr); - auto counters = stats->counters(); - REQUIRE(counters != nullptr); - auto loop_num = - counters->find("Context.StorageManager.Query.Reader.internal_loop_num"); - CHECK(2 == loop_num->second); + for (int i = 0; i < elements_to_check; i++) { + CHECK(coords_r[i] == i + 1); + CHECK(data_r[i] == i + 1); + } + + // Check the internal loop count against expected value. + auto stats = + ((SparseUnorderedWithDupsReader*)query->query_->strategy()) + ->stats(); + REQUIRE(stats != nullptr); + auto counters = stats->counters(); + REQUIRE(counters != nullptr); + auto loop_num = + counters->find("Context.StorageManager.Query.Reader.internal_loop_num"); + + if (one_frag) { + CHECK(1 == loop_num->second); + } else { + CHECK(9 == loop_num->second); + } + + // Try to read multiple frags without partial tile offset reading. Should + // fail + } else { + rc = read( + false, + 0, + coords_r, + &coords_r_size, + data_r, + &data_r_size, + &query, + &array); + CHECK(rc == TILEDB_ERR); + + tiledb_error_t* error = NULL; + rc = tiledb_ctx_get_last_error(ctx_, &error); + CHECK(rc == TILEDB_OK); + + const char* msg; + rc = tiledb_error_message(error, &msg); + CHECK(rc == TILEDB_OK); + + std::string error_str(msg); + CHECK( + error_str.find("Cannot load tile offsets, computed size") != + std::string::npos); + } // Clean up. rc = tiledb_array_close(ctx_, array);