diff --git a/test/src/unit-ReadCellSlabIter.cc b/test/src/unit-ReadCellSlabIter.cc index 3150f50d114..3689ad43da0 100644 --- a/test/src/unit-ReadCellSlabIter.cc +++ b/test/src/unit-ReadCellSlabIter.cc @@ -128,8 +128,8 @@ void ReadCellSlabIterFx::check_iter( CHECK(result_cell_slab.tile_ == nullptr); } else { CHECK(result_cell_slab.tile_ != nullptr); - CHECK(result_cell_slab.tile_->frag_idx_ == rcs[0]); - CHECK(result_cell_slab.tile_->tile_idx_ == rcs[1]); + CHECK(result_cell_slab.tile_->frag_idx() == rcs[0]); + CHECK(result_cell_slab.tile_->tile_idx() == rcs[1]); } CHECK(result_cell_slab.start_ == rcs[2]); CHECK(result_cell_slab.length_ == rcs[3]); @@ -175,7 +175,7 @@ TEST_CASE_METHOD( "[ReadCellSlabIter][empty]") { Subarray* subarray = nullptr; std::map> result_space_tiles; - std::vector> result_coords; + std::vector result_coords; ReadCellSlabIter iter(subarray, &result_space_tiles, &result_coords); CHECK(iter.end()); CHECK(iter.begin().ok()); @@ -228,7 +228,7 @@ TEST_CASE_METHOD( &result_space_tiles); // Check iterator - std::vector> result_coords; + std::vector result_coords; ReadCellSlabIter iter( &subarray, &result_space_tiles, &result_coords); std::vector> c_result_cell_slabs = { @@ -286,7 +286,7 @@ TEST_CASE_METHOD( &result_space_tiles); // Check iterator - std::vector> result_coords; + std::vector result_coords; ReadCellSlabIter iter( &subarray, &result_space_tiles, &result_coords); std::vector> c_result_cell_slabs = { @@ -344,7 +344,7 @@ TEST_CASE_METHOD( &result_space_tiles); // Check iterator - std::vector> result_coords; + std::vector result_coords; ReadCellSlabIter iter( &subarray, &result_space_tiles, &result_coords); std::vector> c_result_cell_slabs = { @@ -407,24 +407,46 @@ TEST_CASE_METHOD( tile_coords, &result_space_tiles); + auto dim_num = 1; + // Create result coordinates - std::vector> result_coords; - ResultTile result_tile_2_0(2, 0); - ResultTile result_tile_3_0(3, 0); - ResultTile result_tile_3_1(3, 1); - uint64_t coords_2_3 = 3; - uint64_t coords_2_5 = 5; - uint64_t coords_3_8 = 8; - uint64_t coords_3_9 = 9; - uint64_t coords_3_12 = 12; - uint64_t coords_3_19 = 19; - result_coords.emplace_back(&result_tile_2_0, &coords_2_3, 1); - result_coords.emplace_back(&result_tile_2_0, &coords_2_5, 3); - result_coords.emplace_back(&result_tile_3_0, &coords_3_8, 2); - result_coords.emplace_back(&result_tile_3_0, &coords_3_9, 3); + std::vector result_coords; + ResultTile result_tile_2_0(2, 0, dim_num); + ResultTile result_tile_3_0(3, 0, dim_num); + ResultTile result_tile_3_1(3, 1, dim_num); + + result_tile_2_0.init_coord_tile("d", 0); + result_tile_3_0.init_coord_tile("d", 0); + result_tile_3_1.init_coord_tile("d", 0); + + std::vector vec_2_0 = {1000, 3, 1000, 5}; + Buffer buff_2_0(&vec_2_0[0], vec_2_0.size() * sizeof(uint64_t)); + Tile tile_2_0(Datatype::UINT64, sizeof(uint64_t), 0, &buff_2_0, false); + auto tile_pair = result_tile_2_0.tile_pair("d"); + REQUIRE(tile_pair != nullptr); + tile_pair->first = tile_2_0; + + std::vector vec_3_0 = {1000, 1000, 8, 9}; + Buffer buff_3_0(&vec_3_0[0], vec_3_0.size() * sizeof(uint64_t)); + Tile tile_3_0(Datatype::UINT64, sizeof(uint64_t), 0, &buff_3_0, false); + tile_pair = result_tile_3_0.tile_pair("d"); + REQUIRE(tile_pair != nullptr); + tile_pair->first = tile_3_0; + + std::vector vec_3_1 = {1000, 12, 19, 1000}; + Buffer buff_3_1(&vec_3_1[0], vec_3_1.size() * sizeof(uint64_t)); + Tile tile_3_1(Datatype::UINT64, sizeof(uint64_t), 0, &buff_3_1, false); + tile_pair = result_tile_3_1.tile_pair("d"); + REQUIRE(tile_pair != nullptr); + tile_pair->first = tile_3_1; + + result_coords.emplace_back(&result_tile_2_0, 1); + result_coords.emplace_back(&result_tile_2_0, 3); + result_coords.emplace_back(&result_tile_3_0, 2); + result_coords.emplace_back(&result_tile_3_0, 3); result_coords.back().invalidate(); - result_coords.emplace_back(&result_tile_3_1, &coords_3_12, 1); - result_coords.emplace_back(&result_tile_3_1, &coords_3_19, 2); + result_coords.emplace_back(&result_tile_3_1, 1); + result_coords.emplace_back(&result_tile_3_1, 2); // Check iterator ReadCellSlabIter iter( @@ -608,7 +630,7 @@ TEST_CASE_METHOD( &result_space_tiles); // Create result coordinates - std::vector> result_coords; + std::vector result_coords; // Check iterator ReadCellSlabIter iter( @@ -777,7 +799,7 @@ TEST_CASE_METHOD( &result_space_tiles); // Create result coordinates - std::vector> result_coords; + std::vector result_coords; // Check iterator ReadCellSlabIter iter( @@ -959,7 +981,7 @@ TEST_CASE_METHOD( &result_space_tiles); // Create result coordinates - std::vector> result_coords; + std::vector result_coords; // Check iterator ReadCellSlabIter iter( @@ -971,9 +993,8 @@ TEST_CASE_METHOD( TEST_CASE_METHOD( ReadCellSlabIterFx, - "ReadCellSlabIter: Test 2D slabs, multiple ranges, 2 denses fragments, 1 " - "sparse " - "overlap", + "ReadCellSlabIter: Test 2D slabs, multiple ranges, 2 dense fragments, " + "1 sparse, overlap", "[ReadCellSlabIter][slabs][2d][mr][2df1sf]") { Layout subarray_layout = Layout::ROW_MAJOR; Layout tile_domain_layout = Layout::ROW_MAJOR; @@ -1186,16 +1207,49 @@ TEST_CASE_METHOD( tile_coords, &result_space_tiles); + unsigned dim_num = 2; + // Create result coordinates - std::vector> result_coords; - ResultTile result_tile_3_0(3, 0); - ResultTile result_tile_3_1(3, 1); - uint64_t coords_3_3_3[] = {3, 3}; - uint64_t coords_3_5_5[] = {5, 5}; - uint64_t coords_3_5_6[] = {5, 6}; - result_coords.emplace_back(&result_tile_3_0, coords_3_3_3, 1); - result_coords.emplace_back(&result_tile_3_1, coords_3_5_5, 0); - result_coords.emplace_back(&result_tile_3_1, coords_3_5_6, 2); + std::vector result_coords; + ResultTile result_tile_3_0(3, 0, dim_num); + ResultTile result_tile_3_1(3, 1, dim_num); + + result_tile_3_0.init_coord_tile("d1", 0); + result_tile_3_0.init_coord_tile("d2", 1); + result_tile_3_1.init_coord_tile("d1", 0); + result_tile_3_1.init_coord_tile("d2", 1); + + std::vector vec_3_0_d1 = {1000, 3, 1000, 1000}; + Buffer buff_3_0_d1(&vec_3_0_d1[0], vec_3_0_d1.size() * sizeof(uint64_t)); + Tile tile_3_0_d1(Datatype::UINT64, sizeof(uint64_t), 0, &buff_3_0_d1, false); + auto tile_pair = result_tile_3_0.tile_pair("d1"); + REQUIRE(tile_pair != nullptr); + tile_pair->first = tile_3_0_d1; + + std::vector vec_3_0_d2 = {1000, 3, 1000, 1000}; + Buffer buff_3_0_d2(&vec_3_0_d2[0], vec_3_0_d2.size() * sizeof(uint64_t)); + Tile tile_3_0_d2(Datatype::UINT64, sizeof(uint64_t), 0, &buff_3_0_d2, false); + tile_pair = result_tile_3_0.tile_pair("d2"); + REQUIRE(tile_pair != nullptr); + tile_pair->first = tile_3_0_d2; + + std::vector vec_3_1_d1 = {5, 1000, 5, 1000}; + Buffer buff_3_1_d1(&vec_3_1_d1[0], vec_3_1_d1.size() * sizeof(uint64_t)); + Tile tile_3_1_d1(Datatype::UINT64, sizeof(uint64_t), 0, &buff_3_1_d1, false); + tile_pair = result_tile_3_1.tile_pair("d1"); + REQUIRE(tile_pair != nullptr); + tile_pair->first = tile_3_1_d1; + + std::vector vec_3_1_d2 = {5, 1000, 6, 1000}; + Buffer buff_3_1_d2(&vec_3_1_d2[0], vec_3_1_d2.size() * sizeof(uint64_t)); + Tile tile_3_1_d2(Datatype::UINT64, sizeof(uint64_t), 0, &buff_3_1_d2, false); + tile_pair = result_tile_3_1.tile_pair("d2"); + REQUIRE(tile_pair != nullptr); + tile_pair->first = tile_3_1_d2; + + result_coords.emplace_back(&result_tile_3_0, 1); + result_coords.emplace_back(&result_tile_3_1, 0); + result_coords.emplace_back(&result_tile_3_1, 2); // Check iterator ReadCellSlabIter iter( diff --git a/test/src/unit-Reader.cc b/test/src/unit-Reader.cc index c6b0b63fc69..4f570244c55 100644 --- a/test/src/unit-Reader.cc +++ b/test/src/unit-Reader.cc @@ -161,15 +161,15 @@ TEST_CASE_METHOD( CHECK(result_space_tiles.size() == 6); // Result tiles for fragment #1 - ResultTile result_tile_1_0_1(1, 0); - ResultTile result_tile_1_2_1(1, 2); + ResultTile result_tile_1_0_1(1, 0, dim_num); + ResultTile result_tile_1_2_1(1, 2, dim_num); // Result tiles for fragment #2 - ResultTile result_tile_1_0_2(2, 0); + ResultTile result_tile_1_0_2(2, 0, dim_num); // Result tiles for fragment #3 - ResultTile result_tile_2_0_3(3, 0); - ResultTile result_tile_3_0_3(3, 2); + ResultTile result_tile_2_0_3(3, 0, dim_num); + ResultTile result_tile_3_0_3(3, 2, dim_num); // Initialize frag domains typedef std::pair FragDomain; diff --git a/test/src/unit-capi-sparse_array.cc b/test/src/unit-capi-sparse_array.cc index 4fc3614e417..1289c7f6369 100644 --- a/test/src/unit-capi-sparse_array.cc +++ b/test/src/unit-capi-sparse_array.cc @@ -2947,16 +2947,16 @@ TEST_CASE_METHOD( REQUIRE(rc == TILEDB_OK); REQUIRE(status == TILEDB_COMPLETED); + CHECK(a1_size == sizeof(a1)); + CHECK(a2_off_size == sizeof(a2_off)); + CHECK(a2_size == 3 * sizeof(char)); CHECK(a1[0] == 0); CHECK(a1[1] == 1); - CHECK(a1_size == sizeof(a1)); CHECK(a2_off[0] == 0); CHECK(a2_off[1] == 1); - CHECK(a2_off_size == sizeof(a2_off)); CHECK(a2[0] == 'a'); CHECK(a2[1] == 'b'); CHECK(a2[2] == 'b'); - CHECK(a2_size == 3 * sizeof(char)); // Close array CHECK(tiledb_array_close(ctx, array) == TILEDB_OK); diff --git a/tiledb/sm/array_schema/domain.cc b/tiledb/sm/array_schema/domain.cc index 0d37bd99741..73a32c309a5 100644 --- a/tiledb/sm/array_schema/domain.cc +++ b/tiledb/sm/array_schema/domain.cc @@ -472,53 +472,19 @@ uint64_t Domain::cell_num_per_tile() const { } template -int Domain::cell_order_cmp(const T* coords_a, const T* coords_b) const { - // Check if they are equal - if (std::memcmp(coords_a, coords_b, dim_num_ * datatype_size(type_)) == 0) - return 0; - - // Check for precedence - if (cell_order_ == Layout::COL_MAJOR) { // COLUMN-MAJOR - for (unsigned int i = dim_num_ - 1;; --i) { - if (coords_a[i] < coords_b[i]) - return -1; - if (coords_a[i] > coords_b[i]) - return 1; - if (i == 0) - break; - } - } else if (cell_order_ == Layout::ROW_MAJOR) { // ROW-MAJOR - for (unsigned int i = 0; i < dim_num_; ++i) { - if (coords_a[i] < coords_b[i]) - return -1; - if (coords_a[i] > coords_b[i]) - return 1; - } - } else { // Invalid cell order - assert(0); - } - - // The program should never reach this point - assert(0); +int Domain::cell_order_cmp(const void* coord_a, const void* coord_b) { + auto ca = (const T*)coord_a; + auto cb = (const T*)coord_b; + if (*ca < *cb) + return -1; + if (*ca > *cb) + return 1; return 0; } -template int Domain::cell_order_cmp( - const Dimension* dim, - const std::vector& coord_buffs, - uint64_t a, - uint64_t b, - unsigned d) { - auto coord_buff = (unsigned char*)coord_buffs[d]; - auto coord_size = dim->coord_size(); - auto coords_a = (T*)&(coord_buff[a * coord_size]); - auto coords_b = (T*)&(coord_buff[b * coord_size]); - if (*coords_a < *coords_b) - return -1; - if (*coords_a > *coords_b) - return 1; - return 0; + unsigned dim_idx, const void* coord_a, const void* coord_b) const { + return cell_order_cmp_func_[dim_idx](coord_a, coord_b); } int Domain::cell_order_cmp( @@ -526,7 +492,10 @@ int Domain::cell_order_cmp( if (cell_order_ == Layout::ROW_MAJOR) { for (unsigned d = 0; d < dim_num_; ++d) { auto dim = dimension(d); - auto res = cell_order_cmp_func_[d](dim, coord_buffs, a, b, d); + auto coord_size = dim->coord_size(); + auto ca = &(((unsigned char*)coord_buffs[d])[a * coord_size]); + auto cb = &(((unsigned char*)coord_buffs[d])[b * coord_size]); + auto res = cell_order_cmp_func_[d](ca, cb); if (res == 1 || res == -1) return res; @@ -535,7 +504,10 @@ int Domain::cell_order_cmp( } else { // COL_MAJOR for (unsigned d = dim_num_ - 1;; --d) { auto dim = dimension(d); - auto res = cell_order_cmp_func_[d](dim, coord_buffs, a, b, d); + auto coord_size = dim->coord_size(); + auto ca = &(((unsigned char*)coord_buffs[d])[a * coord_size]); + auto cb = &(((unsigned char*)coord_buffs[d])[b * coord_size]); + auto res = cell_order_cmp_func_[d](ca, cb); if (res == 1 || res == -1) return res; @@ -1016,58 +988,18 @@ uint64_t Domain::tile_num(const void* range) const { return 0; } -template -int Domain::tile_order_cmp(const T* coords_a, const T* coords_b) const { - if (tile_extents_ == nullptr) - return 0; - - auto tile_extents = (T*)tile_extents_; - auto domain = (T*)domain_; - if (tile_order_ == Layout::ROW_MAJOR) { - for (unsigned i = 0; i < dim_num_; ++i) { - auto ta = (T)((coords_a[i] - domain[2 * i]) / tile_extents[i]); - auto tb = (T)((coords_b[i] - domain[2 * i]) / tile_extents[i]); - - if (ta < tb) - return -1; - if (ta > tb) - return 1; - // else ta == tb --> continue - } - } else { // COL_MAJOR - for (unsigned i = dim_num_ - 1;; --i) { - auto ta = (T)((coords_a[i] - domain[2 * i]) / tile_extents[i]); - auto tb = (T)((coords_b[i] - domain[2 * i]) / tile_extents[i]); - if (ta < tb) - return -1; - if (ta > tb) - return 1; - // else ta == tb --> continue - - if (i == 0) - break; - } - } - - return 0; -} - template int Domain::tile_order_cmp( - const Dimension* dim, - const std::vector& coord_buffs, - uint64_t a, - uint64_t b, - unsigned d) { + const Dimension* dim, const void* coord_a, const void* coord_b) { auto tile_extent = (T*)dim->tile_extent(); - assert(tile_extent != nullptr); + if (tile_extent == nullptr) + return 0; + + auto ca = (T*)coord_a; + auto cb = (T*)coord_b; auto domain = (T*)dim->domain(); - auto coord_buff = (unsigned char*)coord_buffs[d]; - auto coord_size = dim->coord_size(); - auto coords_a = (T*)&(coord_buff[a * coord_size]); - auto coords_b = (T*)&(coord_buff[b * coord_size]); - auto ta = (T)((*coords_a - domain[0]) / *tile_extent); - auto tb = (T)((*coords_b - domain[0]) / *tile_extent); + auto ta = (T)((*ca - domain[0]) / *tile_extent); + auto tb = (T)((*cb - domain[0]) / *tile_extent); if (ta < tb) return -1; if (ta > tb) @@ -1077,13 +1009,13 @@ int Domain::tile_order_cmp( int Domain::tile_order_cmp( const std::vector& coord_buffs, uint64_t a, uint64_t b) const { - if (tile_extents_ == nullptr) - return 0; - if (tile_order_ == Layout::ROW_MAJOR) { for (unsigned d = 0; d < dim_num_; ++d) { auto dim = dimension(d); - auto res = tile_order_cmp_func_[d](dim, coord_buffs, a, b, d); + auto coord_size = dim->coord_size(); + auto ca = &(((unsigned char*)coord_buffs[d])[a * coord_size]); + auto cb = &(((unsigned char*)coord_buffs[d])[b * coord_size]); + auto res = tile_order_cmp_func_[d](dim, ca, cb); if (res == 1 || res == -1) return res; @@ -1092,7 +1024,10 @@ int Domain::tile_order_cmp( } else { // COL_MAJOR for (unsigned d = dim_num_ - 1;; --d) { auto dim = dimension(d); - auto res = tile_order_cmp_func_[d](dim, coord_buffs, a, b, d); + auto coord_size = dim->coord_size(); + auto ca = &(((unsigned char*)coord_buffs[d])[a * coord_size]); + auto cb = &(((unsigned char*)coord_buffs[d])[b * coord_size]); + auto res = tile_order_cmp_func_[d](dim, ca, cb); if (res == 1 || res == -1) return res; @@ -1106,39 +1041,10 @@ int Domain::tile_order_cmp( return 0; } -template -int Domain::tile_order_cmp_tile_coords( - const T* tile_coords_a, const T* tile_coords_b) const { - if (tile_coords_a == nullptr || tile_coords_b == nullptr) - return 0; - - if (tile_order_ == Layout::ROW_MAJOR) { - for (unsigned i = 0; i < dim_num_; ++i) { - auto ta = tile_coords_a[i]; - auto tb = tile_coords_b[i]; - - if (ta < tb) - return -1; - if (ta > tb) - return 1; - // else ta == tb --> continue - } - } else { // COL_MAJOR - for (unsigned i = dim_num_ - 1;; --i) { - auto ta = tile_coords_a[i]; - auto tb = tile_coords_b[i]; - if (ta < tb) - return -1; - if (ta > tb) - return 1; - // else ta == tb --> continue - - if (i == 0) - break; - } - } - - return 0; +int Domain::tile_order_cmp( + unsigned dim_idx, const void* coord_a, const void* coord_b) const { + auto dim = dimension(dim_idx); + return tile_order_cmp_func_[dim_idx](dim, coord_a, coord_b); } Datatype Domain::type() const { @@ -1845,27 +1751,6 @@ template uint64_t Domain::cell_num(const uint64_t* domain) const; template uint64_t Domain::cell_num(const float* domain) const; template uint64_t Domain::cell_num(const double* domain) const; -template int Domain::cell_order_cmp( - const int* coords_a, const int* coords_b) const; -template int Domain::cell_order_cmp( - const int64_t* coords_a, const int64_t* coords_b) const; -template int Domain::cell_order_cmp( - const float* coords_a, const float* coords_b) const; -template int Domain::cell_order_cmp( - const double* coords_a, const double* coords_b) const; -template int Domain::cell_order_cmp( - const int8_t* coords_a, const int8_t* coords_b) const; -template int Domain::cell_order_cmp( - const uint8_t* coords_a, const uint8_t* coords_b) const; -template int Domain::cell_order_cmp( - const int16_t* coords_a, const int16_t* coords_b) const; -template int Domain::cell_order_cmp( - const uint16_t* coords_a, const uint16_t* coords_b) const; -template int Domain::cell_order_cmp( - const uint32_t* coords_a, const uint32_t* coords_b) const; -template int Domain::cell_order_cmp( - const uint64_t* coords_a, const uint64_t* coords_b) const; - template Status Domain::get_cell_pos( const int* coords, uint64_t* pos) const; template Status Domain::get_cell_pos( @@ -2054,48 +1939,6 @@ template void Domain::get_tile_subarray( const double* tile_coords, double* tile_subarray) const; -template int Domain::tile_order_cmp( - const int8_t* coords_a, const int8_t* coords_b) const; -template int Domain::tile_order_cmp( - const uint8_t* coords_a, const uint8_t* coords_b) const; -template int Domain::tile_order_cmp( - const int16_t* coords_a, const int16_t* coords_b) const; -template int Domain::tile_order_cmp( - const uint16_t* coords_a, const uint16_t* coords_b) const; -template int Domain::tile_order_cmp( - const int* coords_a, const int* coords_b) const; -template int Domain::tile_order_cmp( - const unsigned* coords_a, const unsigned* coords_b) const; -template int Domain::tile_order_cmp( - const int64_t* coords_a, const int64_t* coords_b) const; -template int Domain::tile_order_cmp( - const uint64_t* coords_a, const uint64_t* coords_b) const; -template int Domain::tile_order_cmp( - const float* coords_a, const float* coords_b) const; -template int Domain::tile_order_cmp( - const double* coords_a, const double* coords_b) const; - -template int Domain::tile_order_cmp_tile_coords( - const int8_t* coords_a, const int8_t* coords_b) const; -template int Domain::tile_order_cmp_tile_coords( - const uint8_t* coords_a, const uint8_t* coords_b) const; -template int Domain::tile_order_cmp_tile_coords( - const int16_t* coords_a, const int16_t* coords_b) const; -template int Domain::tile_order_cmp_tile_coords( - const uint16_t* coords_a, const uint16_t* coords_b) const; -template int Domain::tile_order_cmp_tile_coords( - const int* coords_a, const int* coords_b) const; -template int Domain::tile_order_cmp_tile_coords( - const unsigned* coords_a, const unsigned* coords_b) const; -template int Domain::tile_order_cmp_tile_coords( - const int64_t* coords_a, const int64_t* coords_b) const; -template int Domain::tile_order_cmp_tile_coords( - const uint64_t* coords_a, const uint64_t* coords_b) const; -template int Domain::tile_order_cmp_tile_coords( - const float* coords_a, const float* coords_b) const; -template int Domain::tile_order_cmp_tile_coords( - const double* coords_a, const double* coords_b) const; - template void Domain::get_end_of_cell_slab( int8_t* subarray, int8_t* start, Layout layout, int8_t* end) const; template void Domain::get_end_of_cell_slab( diff --git a/tiledb/sm/array_schema/domain.h b/tiledb/sm/array_schema/domain.h index 1e3e648471e..86b97ef4284 100644 --- a/tiledb/sm/array_schema/domain.h +++ b/tiledb/sm/array_schema/domain.h @@ -214,38 +214,31 @@ class Domain { * same regular tile. * * @tparam T The coordinates type. - * @param coords_a The first input coordinates. - * @param coords_b The second input coordinates. + * @param dim_idx The dimension to compare the coordinates on. + * @param coord_a The first input coordinates. + * @param coord_b The second input coordinates. * @return One of the following: - * - -1 if the first coordinates precede the second + * - -1 if the first coordinate precedes the second * - 0 if the two coordinates are identical - * - +1 if the first coordinates succeed the second + * - +1 if the first coordinate succeeds the second */ - template - int cell_order_cmp(const T* coords_a, const T* coords_b) const; + int cell_order_cmp( + unsigned dim_idx, const void* coord_a, const void* coord_b) const; /** - * Checks the cell order of the input coordinates on the given dimension. + * Checks the cell order of the input coordinates. Since the coordinates + * are given for a single dimension, this function simply checks which + * coordinate is larger. * - * @param The dimension to compare on. - * @param coord_buffs The input coordinates, given in separate buffers, - * one per dimension. The buffers are sorted in the same order of the - * dimensions as defined in the array schema. - * @param a The position of the first coordinate tuple across all buffers. - * @param b The position of the second coordinate tuple across all buffers. - * @param d The dimension index to compare on. + * @param coord_a The first coordinate. + * @param coord_b The second coordinate. * @return One of the following: - * - -1 if the first coordinates precede the second on the cell order + * - -1 if the first coordinate is smaller than the second * - 0 if the two coordinates have the same cell order - * - +1 if the first coordinates succeed the second on the cell order + * - +1 if the first coordinate is larger than the second */ template - static int cell_order_cmp( - const Dimension* dim, - const std::vector& coord_buffs, - uint64_t a, - uint64_t b, - unsigned d); + static int cell_order_cmp(const void* coord_a, const void* coord_b); /** * Checks the cell order of the input coordinates. @@ -634,42 +627,20 @@ class Domain { return ret; } - /** - * Checks the tile order of the input coordinates. - * - * @tparam T The coordinates type. - * @param coords_a The first input coordinates. - * @param coords_b The second input coordinates. - * @return One of the following: - * - -1 if the first coordinates precede the second on the tile order - * - 0 if the two coordinates have the same tile order - * - +1 if the first coordinates succeed the second on the tile order - */ - template - int tile_order_cmp(const T* coords_a, const T* coords_b) const; - /** * Checks the tile order of the input coordinates on the given dimension. * * @param The dimension to compare on. - * @param coord_buffs The input coordinates, given in separate buffers, - * one per dimension. The buffers are sorted in the same order of the - * dimensions as defined in the array schema. - * @param a The position of the first coordinate tuple across all buffers. - * @param b The position of the second coordinate tuple across all buffers. - * @param d The dimension index to compare on. + * @param coord_a The first coordinate. + * @param coord_b The second coordinate. * @return One of the following: - * - -1 if the first coordinates precede the second on the tile order + * - -1 if the first coordinate precedes the second on the tile order * - 0 if the two coordinates have the same tile order - * - +1 if the first coordinates succeed the second on the tile order + * - +1 if the first coordinate succeeds the second on the tile order */ template static int tile_order_cmp( - const Dimension* dim, - const std::vector& coord_buffs, - uint64_t a, - uint64_t b, - unsigned d); + const Dimension* dim, const void* coord_a, const void* coord_b); /** * Checks the tile order of the input coordinates. @@ -690,19 +661,18 @@ class Domain { uint64_t b) const; /** - * Checks the tile order of the input tile coordinates. + * Checks the tile order of the input coordinates for a given dimension. * - * @tparam T The coordinates type. - * @param coords_a The first tile's coordinates. - * @param coords_b The second tile's coordinates. + * @param dim_idx The index of the dimension to focus on. + * @param coord_a The first coordinate on the given dimension. + * @param coord_b The second coordinate on the given dimension. * @return One of the following: - * - -1 if the first coordinates precede the second on the tile order + * - -1 if the first coordinate precedes the second on the tile order * - 0 if the two coordinates have the same tile order - * - +1 if the first coordinates succeed the second on the tile order + * - +1 if the first coordinate succeeds the second on the tile order */ - template - int tile_order_cmp_tile_coords( - const T* tile_coords_a, const T* tile_coords_b) const; + int tile_order_cmp( + unsigned dim_idx, const void* coord_a, const void* coord_b) const; /** Returns the dimensions type. */ Datatype type() const; @@ -819,17 +789,9 @@ class Domain { * Vector of functions, one per dimension, for comparing the cell order of * two coordinates. The inputs to the function are: * - * - dim: The dimension to compare on. - * - coord_buffs: The coordinates, split in one buffer per dimensions. - * - a,b: The two positions of the coordinates to compare. - * - d: The dimension index to compare on. + * - coord_a, coord_b: The two coordinates to compare. */ - std::vector& coord_buffs, - uint64_t a, - uint64_t b, - unsigned d)> + std::vector cell_order_cmp_func_; /** @@ -837,16 +799,10 @@ class Domain { * two coordinates. The inputs to the function are: * * - dim: The dimension to compare on. - * - coord_buffs: The coordinates, split in one buffer per dimensions. - * - a,b: The two positions of the coordinates to compare. - * - d: The dimension index to compare on. + * - coord_a, coord_b: The two coordinates to compare. */ std::vector& coord_buffs, - uint64_t a, - uint64_t b, - unsigned d)> + const Dimension* dim, const void* coord_a, const void* coord_b)> tile_order_cmp_func_; /** The type of dimensions. */ diff --git a/tiledb/sm/misc/comparators.h b/tiledb/sm/misc/comparators.h index 4bcdb804125..26a3f8a6362 100644 --- a/tiledb/sm/misc/comparators.h +++ b/tiledb/sm/misc/comparators.h @@ -46,16 +46,12 @@ namespace tiledb { namespace sm { /** Wrapper of comparison function for sorting coords on row-major order. */ -template class RowCmp { public: - /** - * Constructor. - * - * @param dim_num The number of dimensions of the coords. - */ - RowCmp(unsigned dim_num) - : dim_num_(dim_num) { + /** Constructor. */ + RowCmp(const Domain* domain) + : domain_(domain) + , dim_num_(domain->dim_num()) { } /** @@ -65,34 +61,34 @@ class RowCmp { * @param b The second coordinate. * @return `true` if `a` precedes `b` and `false` otherwise. */ - bool operator()(const ResultCoords& a, const ResultCoords& b) const { - for (unsigned int i = 0; i < dim_num_; ++i) { - if (a.coords_[i] < b.coords_[i]) + bool operator()(const ResultCoords& a, const ResultCoords& b) const { + for (unsigned int d = 0; d < dim_num_; ++d) { + auto res = domain_->cell_order_cmp(d, a.coord(d), b.coord(d)); + + if (res == -1) return true; - if (a.coords_[i] > b.coords_[i]) + if (res == 1) return false; - // else a.coords_[i] == b.coords_[i] --> continue + // else same coordinate on dimension d --> continue } return false; } private: + /** The domain. */ + const Domain* domain_; /** The number of dimensions. */ unsigned dim_num_; }; /** Wrapper of comparison function for sorting coords on col-major order. */ -template class ColCmp { public: - /** - * Constructor. - * - * @param dim_num The number of dimensions of the coords. - */ - ColCmp(unsigned dim_num) - : dim_num_(dim_num) { + /** Constructor. */ + ColCmp(const Domain* domain) + : domain_(domain) + , dim_num_(domain->dim_num()) { } /** @@ -102,15 +98,17 @@ class ColCmp { * @param b The second coordinate. * @return `true` if `a` precedes `b` and `false` otherwise. */ - bool operator()(const ResultCoords& a, const ResultCoords& b) const { - for (unsigned int i = dim_num_ - 1;; --i) { - if (a.coords_[i] < b.coords_[i]) + bool operator()(const ResultCoords& a, const ResultCoords& b) const { + for (unsigned int d = dim_num_ - 1;; --d) { + auto res = domain_->cell_order_cmp(d, a.coord(d), b.coord(d)); + + if (res == -1) return true; - if (a.coords_[i] > b.coords_[i]) + if (res == 1) return false; - // else a.coords_[i] == b.coords_[i] --> continue + // else same coordinate on dimension d --> continue - if (i == 0) + if (d == 0) break; } @@ -118,6 +116,8 @@ class ColCmp { } private: + /** The domain. */ + const Domain* domain_; /** The number of dimensions. */ unsigned dim_num_; }; @@ -126,7 +126,6 @@ class ColCmp { * Wrapper of comparison function for sorting coords on the global order * of some domain. */ -template class GlobalCmp { public: /** @@ -136,10 +135,24 @@ class GlobalCmp { * @param buff The buffer containing the actual values, used * in positional comparisons. */ - GlobalCmp(const Domain* domain, const T* buff = nullptr) - : domain_(domain) - , buff_(buff) { + GlobalCmp(const Domain* domain) + : domain_(domain) { dim_num_ = domain->dim_num(); + tile_order_ = domain->tile_order(); + cell_order_ = domain->cell_order(); + coord_buffs_ = nullptr; + } + + /** + * Constructor. + * + * @param domain The array domain. + * @param coord_buffs The coordinate buffers, one per dimension, containing + * the actual values, used in positional comparisons. + */ + GlobalCmp(const Domain* domain, const std::vector* coord_buffs) + : domain_(domain) + , coord_buffs_(coord_buffs) { } /** @@ -149,75 +162,62 @@ class GlobalCmp { * @param b The second coordinate. * @return `true` if `a` precedes `b` and `false` otherwise. */ - bool operator()(const ResultCoords& a, const ResultCoords& b) const { + bool operator()(const ResultCoords& a, const ResultCoords& b) const { // Compare tile order first - auto tile_cmp = - domain_->tile_order_cmp_tile_coords(a.tile_coords_, b.tile_coords_); + if (tile_order_ == Layout::ROW_MAJOR) { + for (unsigned d = 0; d < dim_num_; ++d) { + auto res = domain_->tile_order_cmp(d, a.coord(d), b.coord(d)); - if (tile_cmp == -1) - return true; - if (tile_cmp == 1) - return false; - // else tile_cmp == 0 --> continue + if (res == -1) + return true; + if (res == 1) + return false; + // else same tile on dimension d --> continue + } + } else { // COL_MAJOR + assert(tile_order_ == Layout::COL_MAJOR); + for (unsigned d = dim_num_ - 1;; --d) { + auto res = domain_->tile_order_cmp(d, a.coord(d), b.coord(d)); - // Compare cell order - auto cell_cmp = domain_->cell_order_cmp(a.coords_, b.coords_); - return cell_cmp == -1; - } + if (res == -1) + return true; + if (res == 1) + return false; + // else same tile on dimension d --> continue - /** - * Comparison operator for a vector of integer positions. - * Here `buff_` is **not** `nullptr` and a position corresponds to - * coordinates in `buff_`. - * - * @param a The first coordinate position. - * @param b The second coordinate position. - * @return `true` if coordinates at `a` precedes coordinates at `b`, - * and `false` otherwise. - */ - bool operator()(uint64_t a, uint64_t b) const { - // Get coordinates - const T* coords_a = &buff_[a * dim_num_]; - const T* coords_b = &buff_[b * dim_num_]; - // Compare tile order first - auto tile_cmp = domain_->tile_order_cmp(coords_a, coords_b); - - if (tile_cmp == -1) - return true; - if (tile_cmp == 1) - return false; - // else tile_cmp == 0 --> continue + if (d == 0) + break; + } + } // Compare cell order - auto cell_cmp = domain_->cell_order_cmp(coords_a, coords_b); - return cell_cmp == -1; - } + if (cell_order_ == Layout::ROW_MAJOR) { + for (unsigned d = 0; d < dim_num_; ++d) { + auto res = domain_->cell_order_cmp(d, a.coord(d), b.coord(d)); - private: - /** The domain. */ - const Domain* domain_; - /** A buffer - not applicable to sorting `ResultCoords`. */ - const T* buff_; - /** The number of dimensions. */ - unsigned dim_num_; -}; + if (res == -1) + return true; + if (res == 1) + return false; + // else same tile on dimension d --> continue + } + } else { // COL_MAJOR + assert(cell_order_ == Layout::COL_MAJOR); + for (unsigned d = dim_num_ - 1;; --d) { + auto res = domain_->cell_order_cmp(d, a.coord(d), b.coord(d)); -/** - * Wrapper of comparison function for sorting coords on the global order - * of some domain. - */ -class GlobalCmp2 { - public: - /** - * Constructor. - * - * @param domain The array domain. - * @param coord_buffs The coordinate buffers, one per dimension, containing - * the actual values, used in positional comparisons. - */ - GlobalCmp2(const Domain* domain, const std::vector& coord_buffs) - : domain_(domain) - , coord_buffs_(coord_buffs) { + if (res == -1) + return true; + if (res == 1) + return false; + // else same tile on dimension d --> continue + + if (d == 0) + break; + } + } + + return false; } /** @@ -229,8 +229,8 @@ class GlobalCmp2 { * cell at `b`, and `false` otherwise. */ bool operator()(uint64_t a, uint64_t b) const { - // Compare tile order first - auto tile_cmp = domain_->tile_order_cmp(coord_buffs_, a, b); + assert(coord_buffs_ != nullptr); + auto tile_cmp = domain_->tile_order_cmp(*coord_buffs_, a, b); if (tile_cmp == -1) return true; @@ -239,18 +239,24 @@ class GlobalCmp2 { // else tile_cmp == 0 --> continue // Compare cell order - auto cell_cmp = domain_->cell_order_cmp(coord_buffs_, a, b); + auto cell_cmp = domain_->cell_order_cmp(*coord_buffs_, a, b); return cell_cmp == -1; } private: /** The domain. */ const Domain* domain_; + /** The number of dimensions. */ + unsigned dim_num_; + /** The tile order. */ + Layout tile_order_; + /** The cell order. */ + Layout cell_order_; /** * The coordinate buffers, one per dimension, sorted in the order the * dimensions are defined in the array schema. */ - const std::vector& coord_buffs_; + const std::vector* coord_buffs_; }; } // namespace sm diff --git a/tiledb/sm/query/read_cell_slab_iter.cc b/tiledb/sm/query/read_cell_slab_iter.cc index 098e7cf142a..8f999ceca9f 100644 --- a/tiledb/sm/query/read_cell_slab_iter.cc +++ b/tiledb/sm/query/read_cell_slab_iter.cc @@ -56,7 +56,7 @@ template ReadCellSlabIter::ReadCellSlabIter( const Subarray* subarray, std::map>* result_space_tiles, - std::vector>* result_coords, + std::vector* result_coords, uint64_t result_coords_pos) : result_space_tiles_(result_space_tiles) , result_coords_(result_coords) @@ -237,15 +237,14 @@ void ReadCellSlabIter::compute_result_cell_slabs( // Check overlap for (unsigned d = 0; d < dim_num; ++d) { + auto result_coord = *(const T*)(*result_coords_)[i].coord(d); if (d != slab_dim) { // No overlap - if ((*result_coords_)[i].coords_[d] != cell_slab_copy.coords_[d]) { + if (result_coord != cell_slab_copy.coords_[d]) { must_break = true; break; } - } else if ( - (*result_coords_)[i].coords_[d] < slab_start || - (*result_coords_)[i].coords_[d] > slab_end) { + } else if (result_coord < slab_start || result_coord > slab_end) { must_break = true; break; } @@ -255,9 +254,9 @@ void ReadCellSlabIter::compute_result_cell_slabs( break; // Add left slab - if ((*result_coords_)[i].coords_[slab_dim] > slab_start) { - cell_slab_copy.length_ = (*result_coords_)[i].coords_[slab_dim] - - cell_slab_copy.coords_[slab_dim]; + auto result_coord = *(const T*)(*result_coords_)[i].coord(slab_dim); + if (result_coord > slab_start) { + cell_slab_copy.length_ = result_coord - cell_slab_copy.coords_[slab_dim]; compute_result_cell_slabs_dense(cell_slab_copy, &result_space_tile); } @@ -266,8 +265,7 @@ void ReadCellSlabIter::compute_result_cell_slabs( (*result_coords_)[i].tile_, (*result_coords_)[i].pos_, 1); // Update cell slab copy - cell_slab_copy.coords_[slab_dim] = - (*result_coords_)[i].coords_[slab_dim] + 1; + cell_slab_copy.coords_[slab_dim] = result_coord + 1; cell_slab_copy.length_ = slab_end - cell_slab_copy.coords_[slab_dim] + 1; slab_start = cell_slab_copy.coords_[slab_dim]; slab_end = (T)(slab_start + cell_slab_copy.length_ - 1); diff --git a/tiledb/sm/query/read_cell_slab_iter.h b/tiledb/sm/query/read_cell_slab_iter.h index 3d559772515..6c51e90a848 100644 --- a/tiledb/sm/query/read_cell_slab_iter.h +++ b/tiledb/sm/query/read_cell_slab_iter.h @@ -83,7 +83,7 @@ class ReadCellSlabIter { ReadCellSlabIter( const Subarray* subarray, std::map>* result_space_tiles, - std::vector>* result_coords, + std::vector* result_coords, uint64_t result_coords_pos = 0); /** Destructor. */ @@ -162,7 +162,7 @@ class ReadCellSlabIter { std::map>* result_space_tiles_; /** The result sparse fragment coordinates. */ - std::vector>* result_coords_; + std::vector* result_coords_; /** Current position to be explored in `result_coords_`. */ size_t result_coords_pos_; diff --git a/tiledb/sm/query/reader.cc b/tiledb/sm/query/reader.cc index 18337a7d406..a64d9d31865 100644 --- a/tiledb/sm/query/reader.cc +++ b/tiledb/sm/query/reader.cc @@ -579,7 +579,7 @@ void Reader::compute_result_space_tiles( auto frag_idx = frag_tile_domains[f].id(); result_space_tile.frag_domains_.emplace_back(frag_idx, frag_domain); auto tile_idx = frag_tile_domains[f].tile_pos(coords); - ResultTile result_tile(frag_idx, tile_idx); + ResultTile result_tile(frag_idx, tile_idx, dim_num); result_space_tile.result_tiles_[frag_idx] = result_tile; } } @@ -590,15 +590,14 @@ void Reader::compute_result_space_tiles( /* ****************************** */ void Reader::clear_tiles( - const std::string& attr, + const std::string& name, const std::vector& result_tiles) const { for (auto& result_tile : result_tiles) - result_tile->attr_tiles_.erase(attr); + result_tile->erase_tile(name); } -template Status Reader::compute_result_cell_slabs( - const std::vector>& result_coords, + const std::vector& result_coords, std::vector* result_cell_slabs) const { STATS_FUNC_IN(reader_compute_cell_ranges); @@ -646,35 +645,40 @@ Status Reader::compute_range_result_coords( unsigned frag_idx, ResultTile* tile, const std::vector& range, - std::vector>* result_coords) const { + std::vector* result_coords) const { auto dim_num = array_schema_->dim_num(); assert(dim_num == range.size()); - const auto& t = tile->attr_tiles_.find(constants::coords)->second.first; + const auto& t = tile->coords_tile(); + // TODO: find coords_num in a different way auto coords_num = t.cell_num(); auto c = (T*)t.internal_data(); for (uint64_t i = 0, pos = 0; i < coords_num; ++i, pos += dim_num) { + // TODO: must fix - it should be one coord per dimension + // TODO: does not need to be a separate function + // TODO: not templates here if (utils::geometry::coords_in_rect(&c[pos], range, dim_num) && !coords_overwritten(frag_idx, &c[pos])) - result_coords->emplace_back(tile, &c[pos], i); + result_coords->emplace_back(tile, i); } return Status::Ok(); } +// TODO: remove template template Status Reader::compute_range_result_coords( const std::vector& single_fragment, const std::map, size_t>& result_tile_map, std::vector* result_tiles, - std::vector>>* range_result_coords) { + std::vector>* range_result_coords) { auto range_num = read_state_.partitioner_.current().range_num(); range_result_coords->resize(range_num); auto cell_order = array_schema_->cell_order(); auto statuses = parallel_for(0, range_num, [&](uint64_t r) { // Compute overlapping coordinates per range - RETURN_NOT_OK(compute_range_result_coords( + RETURN_NOT_OK(compute_range_result_coords( r, result_tile_map, result_tiles, &((*range_result_coords)[r]))); // Potentially sort for deduping purposes (for the case of updates) @@ -683,10 +687,10 @@ Status Reader::compute_range_result_coords( (layout_ == Layout::GLOBAL_ORDER || layout_ == Layout ::UNORDERED) ? cell_order : layout_; + RETURN_CANCEL_OR_ERROR( - sort_result_coords(&((*range_result_coords)[r]), layout)); - RETURN_CANCEL_OR_ERROR( - dedup_result_coords(&((*range_result_coords)[r]))); + sort_result_coords(&((*range_result_coords)[r]), layout)); + RETURN_CANCEL_OR_ERROR(dedup_result_coords(&((*range_result_coords)[r]))); } // Compute tile coordinate @@ -698,12 +702,13 @@ Status Reader::compute_range_result_coords( return Status::Ok(); } +// TODO: remove template template Status Reader::compute_range_result_coords( uint64_t range_idx, const std::map, size_t>& result_tile_map, std::vector* result_tiles, - std::vector>* range_result_coords) { + std::vector* range_result_coords) { const auto& subarray = read_state_.partitioner_.current(); const auto& overlap = subarray.tile_overlap(); auto range = subarray.range(range_idx); @@ -732,7 +737,7 @@ Status Reader::compute_range_result_coords( // Add results only if the sparse tile MBR is not fully // covered by a more recent fragment's non-empty domain if (!sparse_tile_overwritten(f, i)) - RETURN_NOT_OK(get_all_result_coords(&tile, range_result_coords)); + RETURN_NOT_OK(get_all_result_coords(&tile, range_result_coords)); } ++tr; } else { @@ -745,8 +750,9 @@ Status Reader::compute_range_result_coords( if (t->second == 1.0) { // Full overlap // Add results only if the sparse tile MBR is not fully // covered by a more recent fragment's non-empty domain + // TODO: remove template if (!sparse_tile_overwritten(f, t->first)) - RETURN_NOT_OK(get_all_result_coords(&tile, range_result_coords)); + RETURN_NOT_OK(get_all_result_coords(&tile, range_result_coords)); } else { // Partial overlap RETURN_NOT_OK(compute_range_result_coords( f, &tile, range, range_result_coords)); @@ -759,16 +765,14 @@ Status Reader::compute_range_result_coords( return Status::Ok(); } -template Status Reader::compute_subarray_coords( - std::vector>>* range_result_coords, - std::vector>* tile_coords, - std::vector>* result_coords) { + std::vector>* range_result_coords, + std::vector* result_coords) { // Add all valid ``range_result_coords`` to ``result_coords`` for (const auto& rv : *range_result_coords) { for (const auto& c : rv) { if (c.valid()) - result_coords->emplace_back(c.tile_, c.coords_, c.pos_); + result_coords->emplace_back(c.tile_, c.pos_); } } @@ -776,43 +780,11 @@ Status Reader::compute_subarray_coords( if (layout_ == Layout::UNORDERED) return Status::Ok(); - // Compute tile coordinates only for the GLOBAL_ORDER layout - if (layout_ == Layout::GLOBAL_ORDER) - compute_sparse_tile_coords(result_coords, tile_coords); - // Sort auto cell_order = array_schema_->cell_order(); Layout layout = (layout_ == Layout ::UNORDERED) ? cell_order : layout_; - RETURN_NOT_OK(sort_result_coords(result_coords, layout)); - - return Status::Ok(); -} - -template -Status Reader::compute_sparse_tile_coords( - std::vector>* result_coords, - std::vector>* tile_coords) const { - auto dim_num = array_schema_->dim_num(); - auto domain = (const T*)array_schema_->domain()->domain(); - auto tile_extents = (const T*)array_schema_->domain()->tile_extents(); - std::map, size_t> tile_coords_map; - std::vector tile_coords_tmp; - tile_coords_tmp.resize(dim_num); - for (auto& rc : *result_coords) { - for (unsigned i = 0; i < dim_num; ++i) - tile_coords_tmp[i] = - (tile_extents == nullptr) ? - 0 : - (rc.coords_[i] - domain[2 * i]) / tile_extents[i]; - auto it = tile_coords_map.find(tile_coords_tmp); - if (it == tile_coords_map.end()) { // New tile coordinates - tile_coords->emplace_back(tile_coords_tmp); - rc.tile_coords_ = &(tile_coords->back())[0]; - } else { // Existing tile coordinates - rc.tile_coords_ = &((*tile_coords)[it->second][0]); - } - } + RETURN_NOT_OK(sort_result_coords(result_coords, layout)); return Status::Ok(); } @@ -825,6 +797,7 @@ Status Reader::compute_sparse_result_tiles( STATS_FUNC_IN(reader_compute_overlapping_tiles); // For easy reference + auto dim_num = array_schema_->dim_num(); const auto& subarray = read_state_.partitioner_.current(); const auto& overlap = subarray.tile_overlap(); auto range_num = subarray.range_num(); @@ -852,7 +825,7 @@ Status Reader::compute_sparse_result_tiles( auto pair = std::pair(f, t); // Add tile only if it does not already exist if (result_tile_map->find(pair) == result_tile_map->end()) { - result_tiles->emplace_back(f, t); + result_tiles->emplace_back(f, t, dim_num); (*result_tile_map)[pair] = result_tiles->size() - 1; if (f > first_fragment[r]) (*single_fragment)[r] = false; @@ -869,7 +842,7 @@ Status Reader::compute_sparse_result_tiles( auto pair = std::pair(f, t); // Add tile only if it does not already exist if (result_tile_map->find(pair) == result_tile_map->end()) { - result_tiles->emplace_back(f, t); + result_tiles->emplace_back(f, t, dim_num); (*result_tile_map)[pair] = result_tiles->size() - 1; if (f > first_fragment[r]) (*single_fragment)[r] = false; @@ -900,17 +873,17 @@ Status Reader::copy_cells( } Status Reader::copy_fixed_cells( - const std::string& attribute, + const std::string& name, uint64_t stride, const std::vector& result_cell_slabs) { STATS_FUNC_IN(reader_copy_fixed_cells); // For easy reference - auto it = attr_buffers_.find(attribute); + auto it = attr_buffers_.find(name); auto buffer = (unsigned char*)it->second.buffer_; auto buffer_size = it->second.buffer_size_; - auto cell_size = array_schema_->cell_size(attribute); - auto type = array_schema_->type(attribute); + auto cell_size = array_schema_->cell_size(name); + auto type = array_schema_->type(name); auto fill_size = datatype_size(type); auto fill_value = constants::fill_value(type); assert(fill_value != nullptr); @@ -948,7 +921,8 @@ Status Reader::copy_fixed_cells( offset += fill_size; } } else { // Non-empty range - const auto& tile = cs.tile_->attr_tiles_.find(attribute)->second.first; + auto tile_pair = cs.tile_->tile_pair(name); + const auto& tile = tile_pair->first; if (stride == UINT64_MAX) { const uint64_t tile_offset = cs.start_ * cell_size; RETURN_NOT_OK(tile.read(buffer + offset, bytes_to_copy, tile_offset)); @@ -972,7 +946,7 @@ Status Reader::copy_fixed_cells( RETURN_NOT_OK(st); // Update buffer offsets - *(attr_buffers_[attribute].buffer_size_) = buffer_offset; + *(attr_buffers_[name].buffer_size_) = buffer_offset; STATS_COUNTER_ADD(reader_num_fixed_cell_bytes_copied, buffer_offset); return Status::Ok(); @@ -981,19 +955,19 @@ Status Reader::copy_fixed_cells( } Status Reader::copy_var_cells( - const std::string& attribute, + const std::string& name, uint64_t stride, const std::vector& result_cell_slabs) { STATS_FUNC_IN(reader_copy_var_cells); // For easy reference - auto it = attr_buffers_.find(attribute); + auto it = attr_buffers_.find(name); auto buffer = (unsigned char*)it->second.buffer_; auto buffer_var = (unsigned char*)it->second.buffer_var_; auto buffer_size = it->second.buffer_size_; auto buffer_var_size = it->second.buffer_var_size_; uint64_t offset_size = constants::cell_var_offset_size; - auto type = array_schema_->type(attribute); + auto type = array_schema_->type(name); auto fill_size = datatype_size(type); auto fill_value = constants::fill_value(type); assert(fill_value != nullptr); @@ -1003,7 +977,7 @@ Status Reader::copy_var_cells( std::vector> var_offsets_per_cs; uint64_t total_offset_size, total_var_size; RETURN_NOT_OK(compute_var_cell_destinations( - attribute, + name, stride, result_cell_slabs, &offset_offsets_per_cs, @@ -1029,8 +1003,7 @@ Status Reader::copy_var_cells( Tile* tile_var = nullptr; uint64_t tile_cell_num = 0; if (cs.tile_ != nullptr) { - std::pair* const tile_pair = - &cs.tile_->attr_tiles_.find(attribute)->second; + const auto tile_pair = cs.tile_->tile_pair(name); Tile* const tile = &tile_pair->first; tile_var = &tile_pair->second; tile_offsets = (uint64_t*)tile->internal_data(); @@ -1071,8 +1044,8 @@ Status Reader::copy_var_cells( RETURN_NOT_OK(st); // Update buffer offsets - *(attr_buffers_[attribute].buffer_size_) = total_offset_size; - *(attr_buffers_[attribute].buffer_var_size_) = total_var_size; + *(attr_buffers_[name].buffer_size_) = total_offset_size; + *(attr_buffers_[name].buffer_var_size_) = total_var_size; STATS_COUNTER_ADD( reader_num_var_cell_bytes_copied, total_offset_size + total_var_size); @@ -1082,7 +1055,7 @@ Status Reader::copy_var_cells( } Status Reader::compute_var_cell_destinations( - const std::string& attribute, + const std::string& name, uint64_t stride, const std::vector& result_cell_slabs, std::vector>* offset_offsets_per_cs, @@ -1092,7 +1065,7 @@ Status Reader::compute_var_cell_destinations( // For easy reference auto num_cs = result_cell_slabs.size(); auto offset_size = constants::cell_var_offset_size; - auto type = array_schema_->type(attribute); + auto type = array_schema_->type(name); auto fill_size = datatype_size(type); // Resize the output vectors @@ -1112,9 +1085,9 @@ Status Reader::compute_var_cell_destinations( uint64_t tile_cell_num = 0; uint64_t tile_var_size = 0; if (cs.tile_ != nullptr) { - const auto& tile_pair = cs.tile_->attr_tiles_.find(attribute)->second; - const auto& tile = tile_pair.first; - const auto& tile_var = tile_pair.second; + const auto tile_pair = cs.tile_->tile_pair(name); + const auto& tile = tile_pair->first; + const auto& tile_var = tile_pair->second; tile_offsets = (uint64_t*)tile.internal_data(); tile_cell_num = tile.cell_num(); tile_var_size = tile_var.size(); @@ -1185,14 +1158,14 @@ template void Reader::compute_result_cell_slabs( const Subarray& subarray, std::map>* result_space_tiles, - std::vector>* result_coords, + std::vector* result_coords, std::vector* result_tiles, std::vector* result_cell_slabs) const { auto layout = subarray.layout(); if (layout == Layout::ROW_MAJOR || layout == Layout::COL_MAJOR) { uint64_t result_coords_pos = 0; std::set> frag_tile_set; - compute_result_cell_slabs_row_col( + compute_result_cell_slabs_row_col( subarray, result_space_tiles, result_coords, @@ -1201,7 +1174,7 @@ void Reader::compute_result_cell_slabs( &frag_tile_set, result_cell_slabs); } else if (layout == Layout::GLOBAL_ORDER) { - compute_result_cell_slabs_global( + compute_result_cell_slabs_global( subarray, result_space_tiles, result_coords, @@ -1216,7 +1189,7 @@ template void Reader::compute_result_cell_slabs_row_col( const Subarray& subarray, std::map>* result_space_tiles, - std::vector>* result_coords, + std::vector* result_coords, uint64_t* result_coords_pos, std::vector* result_tiles, std::set>* frag_tile_set, @@ -1237,8 +1210,8 @@ void Reader::compute_result_cell_slabs_row_col( result_cell_slabs->push_back(result_cell_slab); // Add result tile if (result_cell_slab.tile_ != nullptr) { - auto frag_idx = result_cell_slab.tile_->frag_idx_; - auto tile_idx = result_cell_slab.tile_->tile_idx_; + auto frag_idx = result_cell_slab.tile_->frag_idx(); + auto tile_idx = result_cell_slab.tile_->tile_idx(); auto frag_tile_pair = std::pair(frag_idx, tile_idx); auto it = frag_tile_set->find(frag_tile_pair); if (it == frag_tile_set->end()) { @@ -1254,7 +1227,7 @@ template void Reader::compute_result_cell_slabs_global( const Subarray& subarray, std::map>* result_space_tiles, - std::vector>* result_coords, + std::vector* result_coords, std::vector* result_tiles, std::vector* result_cell_slabs) const { const auto& tile_coords = subarray.tile_coords(); @@ -1270,7 +1243,7 @@ void Reader::compute_result_cell_slabs_global( auto& tile_subarray = tile_subarrays.back(); tile_subarray.template compute_tile_coords(); - compute_result_cell_slabs_row_col( + compute_result_cell_slabs_row_col( tile_subarray, result_space_tiles, result_coords, @@ -1284,8 +1257,7 @@ void Reader::compute_result_cell_slabs_global( template Status Reader::compute_result_coords( std::vector* result_tiles, - std::vector>* tile_coords, - std::vector>* result_coords) { + std::vector* result_coords) { // Get overlapping tile indexes typedef std::pair FragTilePair; std::map result_tile_map; @@ -1316,35 +1288,35 @@ Status Reader::compute_result_coords( RETURN_CANCEL_OR_ERROR(read_tiles(dim_name, tmp_result_tiles)); RETURN_CANCEL_OR_ERROR(filter_tiles(dim_name, tmp_result_tiles)); } + // TODO: remove RETURN_CANCEL_OR_ERROR(zip_coord_tiles(tmp_result_tiles)); + // TODO: create vector in tmp_result tiles with pointers to the actual + // TODO: separate coordinate tiles. Applicable only to format version >=5 // Compute the read coordinates for all fragments for each subarray range - std::vector>> range_result_coords; + std::vector> range_result_coords; RETURN_CANCEL_OR_ERROR(compute_range_result_coords( single_fragment, result_tile_map, result_tiles, &range_result_coords)); result_tile_map.clear(); // Compute final coords (sorted in the result layout) of the whole subarray. - RETURN_CANCEL_OR_ERROR(compute_subarray_coords( - &range_result_coords, tile_coords, result_coords)); + RETURN_CANCEL_OR_ERROR( + compute_subarray_coords(&range_result_coords, result_coords)); range_result_coords.clear(); return Status::Ok(); } -template Status Reader::dedup_result_coords( - std::vector>* result_coords) const { + std::vector* result_coords) const { STATS_FUNC_IN(reader_dedup_coords); - auto coords_size = array_schema_->coords_size(); auto coords_end = result_coords->end(); auto it = skip_invalid_elements(result_coords->begin(), coords_end); while (it != coords_end) { auto next_it = skip_invalid_elements(std::next(it), coords_end); - if (next_it != coords_end && - !std::memcmp(it->coords_, next_it->coords_, coords_size)) { - if (it->tile_->frag_idx_ < next_it->tile_->frag_idx_) { + if (next_it != coords_end && it->same_coords(*next_it)) { + if (it->tile_->frag_idx() < next_it->tile_->frag_idx()) { it->invalidate(); it = skip_invalid_elements(++it, coords_end); } else { @@ -1370,11 +1342,9 @@ Status Reader::dense_read() { // Compute result coordinates from the sparse fragments // `sparse_result_tiles` will hold all the relevant result tiles of // sparse fragments - std::vector> result_coords; + std::vector result_coords; std::vector sparse_result_tiles; - std::vector> tile_coords; - RETURN_NOT_OK(compute_result_coords( - &sparse_result_tiles, &tile_coords, &result_coords)); + RETURN_NOT_OK(compute_result_coords(&sparse_result_tiles, &result_coords)); // Compute result cell slabs. // `result_space_tiles` will hold all the relevant result tiles of @@ -1385,7 +1355,7 @@ Status Reader::dense_read() { std::vector result_tiles; auto& subarray = read_state_.partitioner_.current(); subarray.compute_tile_coords(); - compute_result_cell_slabs( + compute_result_cell_slabs( subarray, &result_space_tiles, &result_coords, @@ -1394,8 +1364,7 @@ Status Reader::dense_read() { // Clear sparse coordinate tiles (not needed any more) for (auto& tile : sparse_result_tiles) - tile.attr_tiles_.erase(constants::coords); - tile_coords.clear(); + tile.erase_tile(constants::coords); // Needed when copying the cells auto stride = array_schema_->domain()->stride(subarray.layout()); @@ -1552,57 +1521,56 @@ void Reader::fill_dense_coords_col_slab( } Status Reader::filter_tiles( - const std::string& attribute, + const std::string& name, const std::vector& result_tiles) const { STATS_FUNC_IN(reader_filter_tiles); - auto var_size = array_schema_->var_size(attribute); + auto var_size = array_schema_->var_size(name); auto num_tiles = static_cast(result_tiles.size()); auto encryption_key = array_->encryption_key(); auto statuses = parallel_for(0, num_tiles, [&, this](uint64_t i) { auto& tile = result_tiles[i]; - auto& fragment = fragment_metadata_[tile->frag_idx_]; + auto& fragment = fragment_metadata_[tile->frag_idx()]; auto format_version = fragment->format_version(); // Applicable for zipped coordinates only to versions < 5 // Applicable for separate coordinates only to version >= 5 - if (attribute != constants::coords || - (attribute == constants::coords && format_version < 5) || - (array_schema_->is_dim(attribute) && format_version >= 5)) { - auto it = tile->attr_tiles_.find(attribute); - // Skip non-existent attributes (e.g. coords in the dense case). - if (it == tile->attr_tiles_.end()) + if (name != constants::coords || + (name == constants::coords && format_version < 5) || + (array_schema_->is_dim(name) && format_version >= 5)) { + auto tile_pair = tile->tile_pair(name); + + // Skip non-existent attributes/dimensions (e.g. coords in the + // dense case). + if (tile_pair == nullptr || tile_pair->first.empty()) return Status::Ok(); // Get information about the tile in its fragment - auto tile_attr_uri = fragment->uri(attribute); + auto tile_attr_uri = fragment->uri(name); + auto tile_idx = tile->tile_idx(); uint64_t tile_attr_offset; RETURN_NOT_OK(fragment->file_offset( - *encryption_key, attribute, tile->tile_idx_, &tile_attr_offset)); + *encryption_key, name, tile_idx, &tile_attr_offset)); - auto& tile_pair = it->second; - auto& t = tile_pair.first; - auto& t_var = tile_pair.second; + auto& t = tile_pair->first; + auto& t_var = tile_pair->second; if (!t.filtered()) { // Decompress, etc. - RETURN_NOT_OK(filter_tile(attribute, &t, var_size)); + RETURN_NOT_OK(filter_tile(name, &t, var_size)); RETURN_NOT_OK(storage_manager_->write_to_cache( tile_attr_uri, tile_attr_offset, t.buffer())); } if (var_size && !t_var.filtered()) { - auto tile_attr_var_uri = fragment->var_uri(attribute); + auto tile_attr_var_uri = fragment->var_uri(name); uint64_t tile_attr_var_offset; RETURN_NOT_OK(fragment->file_var_offset( - *encryption_key, - attribute, - tile->tile_idx_, - &tile_attr_var_offset)); + *encryption_key, name, tile_idx, &tile_attr_var_offset)); // Decompress, etc. - RETURN_NOT_OK(filter_tile(attribute, &t_var, false)); + RETURN_NOT_OK(filter_tile(name, &t_var, false)); RETURN_NOT_OK(storage_manager_->write_to_cache( tile_attr_var_uri, tile_attr_var_offset, t_var.buffer())); } @@ -1642,16 +1610,14 @@ Status Reader::filter_tile( return Status::Ok(); } -template Status Reader::get_all_result_coords( - ResultTile* tile, std::vector>* result_coords) const { - auto dim_num = array_schema_->dim_num(); - const auto& t = tile->attr_tiles_.find(constants::coords)->second.first; + ResultTile* tile, std::vector* result_coords) const { + // TODO: do not use the following line, find cell_num in another way + auto tile_pair = tile->tile_pair(constants::coords); + const auto& t = tile_pair->first; auto coords_num = t.cell_num(); - auto c = (T*)t.internal_data(); - for (uint64_t i = 0; i < coords_num; ++i) - result_coords->emplace_back(tile, &c[i * dim_num], i); + result_coords->emplace_back(tile, i); return Status::Ok(); } @@ -1711,13 +1677,13 @@ Status Reader::init_read_state() { } Status Reader::init_tile( - uint32_t format_version, const std::string& attribute, Tile* tile) const { + uint32_t format_version, const std::string& name, Tile* tile) const { // For easy reference auto domain = array_schema_->domain(); - auto cell_size = array_schema_->cell_size(attribute); + auto cell_size = array_schema_->cell_size(name); auto capacity = array_schema_->capacity(); - auto type = array_schema_->type(attribute); - auto is_coords = (attribute == constants::coords); + auto type = array_schema_->type(name); + auto is_coords = (name == constants::coords); auto dim_num = (is_coords) ? array_schema_->dim_num() : 0; auto cell_num_per_tile = (has_coords()) ? capacity : domain->cell_num_per_tile(); @@ -1732,13 +1698,13 @@ Status Reader::init_tile( Status Reader::init_tile( uint32_t format_version, - const std::string& attribute, + const std::string& name, Tile* tile, Tile* tile_var) const { // For easy reference auto domain = array_schema_->domain(); auto capacity = array_schema_->capacity(); - auto type = array_schema_->type(attribute); + auto type = array_schema_->type(name); auto cell_num_per_tile = (has_coords()) ? capacity : domain->cell_num_per_tile(); auto tile_size = cell_num_per_tile * constants::cell_var_offset_size; @@ -1756,7 +1722,7 @@ Status Reader::init_tile( } Status Reader::read_tiles( - const std::string& attr, + const std::string& name, const std::vector& result_tiles) const { // Shortcut for empty tile vec if (result_tiles.empty()) @@ -1764,7 +1730,7 @@ Status Reader::read_tiles( // Read the tiles asynchronously std::vector> tasks; - RETURN_CANCEL_OR_ERROR(read_tiles(attr, result_tiles, &tasks)); + RETURN_CANCEL_OR_ERROR(read_tiles(name, result_tiles, &tasks)); // Wait for the reads to finish and check statuses. auto statuses = @@ -1776,11 +1742,11 @@ Status Reader::read_tiles( } Status Reader::read_tiles( - const std::string& attribute, + const std::string& name, const std::vector& result_tiles, std::vector>* tasks) const { // For each tile, read from its fragment. - bool var_size = array_schema_->var_size(attribute); + bool var_size = array_schema_->var_size(name); auto num_tiles = static_cast(result_tiles.size()); auto encryption_key = array_->encryption_key(); @@ -1788,43 +1754,50 @@ Status Reader::read_tiles( std::map>> all_regions; for (uint64_t i = 0; i < num_tiles; i++) { auto& tile = result_tiles[i]; - auto& fragment = fragment_metadata_[tile->frag_idx_]; + auto& fragment = fragment_metadata_[tile->frag_idx()]; auto format_version = fragment->format_version(); // Applicable for zipped coordinates only to versions < 5 - if (attribute == constants::coords && format_version >= 5) + if (name == constants::coords && format_version >= 5) continue; // Applicable to separate coordinates only to versions >= 5 - if (array_schema_->is_dim(attribute) && format_version < 5) + auto is_dim = array_schema_->is_dim(name); + if (is_dim && format_version < 5) continue; - auto it = tile->attr_tiles_.find(attribute); - if (it == tile->attr_tiles_.end()) - it = tile->attr_tiles_ - .insert(std::pair( - attribute, ResultTile::TilePair(Tile(), Tile()))) - .first; - // Initialize the tile(s) - auto& tile_pair = it->second; - auto& t = tile_pair.first; - auto& t_var = tile_pair.second; + if (is_dim) { + auto dim_num = array_schema_->dim_num(); + for (unsigned d = 0; d < dim_num; ++d) { + if (array_schema_->dimension(d)->name() == name) { + tile->init_coord_tile(name, d); + break; + } + } + } else { + tile->init_attr_tile(name); + } + auto tile_pair = tile->tile_pair(name); + assert(tile_pair != nullptr); + auto& t = tile_pair->first; + auto& t_var = tile_pair->second; if (!var_size) { - RETURN_NOT_OK(init_tile(format_version, attribute, &t)); + RETURN_NOT_OK(init_tile(format_version, name, &t)); } else { - RETURN_NOT_OK(init_tile(format_version, attribute, &t, &t_var)); + RETURN_NOT_OK(init_tile(format_version, name, &t, &t_var)); } // Get information about the tile in its fragment - auto tile_attr_uri = fragment->uri(attribute); + auto tile_attr_uri = fragment->uri(name); uint64_t tile_attr_offset; + auto tile_idx = tile->tile_idx(); RETURN_NOT_OK(fragment->file_offset( - *encryption_key, attribute, tile->tile_idx_, &tile_attr_offset)); - auto tile_size = fragment->tile_size(attribute, tile->tile_idx_); + *encryption_key, name, tile_idx, &tile_attr_offset)); + auto tile_size = fragment->tile_size(name, tile_idx); uint64_t tile_persisted_size; RETURN_NOT_OK(fragment->persisted_tile_size( - *encryption_key, attribute, tile->tile_idx_, &tile_persisted_size)); + *encryption_key, name, tile_idx, &tile_persisted_size)); // Try the cache first. bool cache_hit; @@ -1845,19 +1818,16 @@ Status Reader::read_tiles( } if (var_size) { - auto tile_attr_var_uri = fragment->var_uri(attribute); + auto tile_attr_var_uri = fragment->var_uri(name); uint64_t tile_attr_var_offset; RETURN_NOT_OK(fragment->file_var_offset( - *encryption_key, attribute, tile->tile_idx_, &tile_attr_var_offset)); + *encryption_key, name, tile_idx, &tile_attr_var_offset)); uint64_t tile_var_size; RETURN_NOT_OK(fragment->tile_var_size( - *encryption_key, attribute, tile->tile_idx_, &tile_var_size)); + *encryption_key, name, tile_idx, &tile_var_size)); uint64_t tile_var_persisted_size; RETURN_NOT_OK(fragment->persisted_tile_var_size( - *encryption_key, - attribute, - tile->tile_idx_, - &tile_var_persisted_size)); + *encryption_key, name, tile_idx, &tile_var_persisted_size)); RETURN_NOT_OK(storage_manager_->read_from_cache( tile_attr_var_uri, @@ -1913,26 +1883,22 @@ void Reader::reset_buffer_sizes() { } } -template Status Reader::sort_result_coords( - std::vector>* result_coords, Layout layout) const { + std::vector* result_coords, Layout layout) const { STATS_FUNC_IN(reader_sort_coords); // TODO: do not sort if it is single fragment and // (i) it is single dimension, or (ii) it is global order - auto dim_num = array_schema_->dim_num(); auto domain = array_schema_->domain(); if (layout == Layout::ROW_MAJOR) { - parallel_sort( - result_coords->begin(), result_coords->end(), RowCmp(dim_num)); + parallel_sort(result_coords->begin(), result_coords->end(), RowCmp(domain)); } else if (layout == Layout::COL_MAJOR) { - parallel_sort( - result_coords->begin(), result_coords->end(), ColCmp(dim_num)); + parallel_sort(result_coords->begin(), result_coords->end(), ColCmp(domain)); } else if (layout == Layout::GLOBAL_ORDER) { parallel_sort( - result_coords->begin(), result_coords->end(), GlobalCmp(domain)); + result_coords->begin(), result_coords->end(), GlobalCmp(domain)); } else { assert(false); } @@ -1949,11 +1915,9 @@ Status Reader::sparse_read() { // Compute result coordinates from the sparse fragments // `sparse_result_tiles` will hold all the relevant result tiles of // sparse fragments - std::vector> result_coords; + std::vector result_coords; std::vector sparse_result_tiles; - std::vector> tile_coords; - RETURN_NOT_OK(compute_result_coords( - &sparse_result_tiles, &tile_coords, &result_coords)); + RETURN_NOT_OK(compute_result_coords(&sparse_result_tiles, &result_coords)); std::vector result_tiles; for (auto& srt : sparse_result_tiles) result_tiles.push_back(&srt); @@ -1974,8 +1938,7 @@ Status Reader::sparse_read() { // Clear sparse coordinate tiles (not needed any more) for (auto& tile : sparse_result_tiles) - tile.attr_tiles_.erase(constants::coords); - tile_coords.clear(); + tile.erase_tile(constants::coords); // Copy cells for (const auto& attr : attributes_) { @@ -2041,30 +2004,22 @@ bool Reader::coords_overwritten(unsigned frag_idx, const T* coords) const { Status Reader::zip_coord_tiles( const std::vector& tmp_result_tiles) const { - // Initialize zipped coordinate tiles - for (auto& tile : tmp_result_tiles) { - auto it = tile->attr_tiles_.find(constants::coords); - if (it == tile->attr_tiles_.end()) - tile->attr_tiles_.emplace( - constants::coords, ResultTile::TilePair(Tile(), Tile())); - } - // Zip coordinate tiles auto tile_num = (uint64_t)tmp_result_tiles.size(); auto dim_num = array_schema_->dim_num(); auto statuses = parallel_for(0, tile_num, [&](uint64_t t) { - const auto& fragment = fragment_metadata_[tmp_result_tiles[t]->frag_idx_]; + const auto& fragment = fragment_metadata_[tmp_result_tiles[t]->frag_idx()]; auto format_version = fragment->format_version(); if (format_version >= 5) { // Applicable only to version >= 5 - auto& new_tile = - tmp_result_tiles[t]->attr_tiles_[constants::coords].first; + auto coords_tile_pair = tmp_result_tiles[t]->tile_pair(constants::coords); + assert(coords_tile_pair != nullptr); + auto& new_tile = coords_tile_pair->first; RETURN_NOT_OK(init_tile(format_version, constants::coords, &new_tile)); for (unsigned d = 0; d < dim_num; ++d) { const auto& dim_name = array_schema_->dimension(d)->name(); - const auto& coord_tile = - tmp_result_tiles[t]->attr_tiles_[dim_name].first; + auto tile_pair = tmp_result_tiles[t]->tile_pair(dim_name); + const auto& coord_tile = tile_pair->first; new_tile.write(coord_tile); - tmp_result_tiles[t]->attr_tiles_.erase(dim_name); } new_tile.zip_coordinates(); } diff --git a/tiledb/sm/query/reader.h b/tiledb/sm/query/reader.h index fde33aa1da4..58ececf5d2c 100644 --- a/tiledb/sm/query/reader.h +++ b/tiledb/sm/query/reader.h @@ -407,7 +407,7 @@ class Reader { void compute_result_cell_slabs( const Subarray& subarray, std::map>* result_space_tiles, - std::vector>* result_coords, + std::vector* result_coords, std::vector* result_tiles, std::vector* result_cell_slabs) const; @@ -442,7 +442,7 @@ class Reader { void compute_result_cell_slabs_row_col( const Subarray& subarray, std::map>* result_space_tiles, - std::vector>* result_coords, + std::vector* result_coords, uint64_t* result_coords_pos, std::vector* result_tiles, std::set>* frag_tile_set, @@ -471,7 +471,7 @@ class Reader { void compute_result_cell_slabs_global( const Subarray& subarray, std::map>* result_space_tiles, - std::vector>* result_coords, + std::vector* result_coords, std::vector* result_tiles, std::vector* result_cell_slabs) const; @@ -528,27 +528,25 @@ class Reader { /* ********************************* */ /** - * Deletes the tiles on the input attribute from the result tiles. + * Deletes the tiles on the input attribute/dimension from the result tiles. * - * @param attr The attribute name. + * @param name The attribute/dimension name. * @param result_tiles The result tiles to delete from. * @return void */ void clear_tiles( - const std::string& attr, + const std::string& name, const std::vector& result_tiles) const; /** * Compute the maximal cell slabs of contiguous sparse coordinates. * - * @tparam T The coords type. * @param coords The coordinates to compute the slabs from. * @param result_cell_slabs The result cell slabs to compute. * @return Status */ - template Status compute_result_cell_slabs( - const std::vector>& result_coords, + const std::vector& result_coords, std::vector* result_cell_slabs) const; /** @@ -568,7 +566,7 @@ class Reader { unsigned frag_idx, ResultTile* tile, const std::vector& range, - std::vector>* result_coords) const; + std::vector* result_coords) const; /** * Computes the result coordinates for each range of the query @@ -589,7 +587,7 @@ class Reader { const std::vector& single_fragment, const std::map, size_t>& result_tile_map, std::vector* result_tiles, - std::vector>>* range_result_coords); + std::vector>* range_result_coords); /** * Computes the result coordinates of a given range of the query @@ -609,18 +607,13 @@ class Reader { uint64_t range_idx, const std::map, size_t>& result_tile_map, std::vector* result_tiles, - std::vector>* range_result_coords); + std::vector* range_result_coords); /** * Computes the final subarray result coordinates, which will be * deduplicated and sorted on the specified subarray layout. * - * @tparam T The domain type. * @param range_result_coords The result coordinates for each subarray range. - * @param tile_coords If the subarray layout is global order, this - * function will store the unique tile coordinates of the subarray - * coordinates in `tile_coords`. Then the element of `result_coords` will - * store only pointers to the unique tile coordinates. * @param result_coords The final (subarray) result coordinates to be * retrieved. * @return Status @@ -628,11 +621,9 @@ class Reader { * @note the function will try to gradually clean up ``range_result_coords`` * as it is done processing its elements to quickly reclaim memory. */ - template Status compute_subarray_coords( - std::vector>>* range_result_coords, - std::vector>* tile_coords, - std::vector>* result_coords); + std::vector>* range_result_coords, + std::vector* result_coords); /** * Computes info about the sparse result tiles, such as which fragment they @@ -656,22 +647,6 @@ class Reader { std::map, size_t>* result_tile_map, std::vector* single_fragment); - /** - * Computes the sparse tile coordinates. It stores the unique tile - * coordinates in `tile_coords`, and then it stores pointers to - * those tile coordinates in the elements of`result_coords`. - * - * @tparam T The domain data type. - * @param result_coords The result coordinates. - * @param tile_coords The unique tile coordinates of the result - * coordinates. - * @return - */ - template - Status compute_sparse_tile_coords( - std::vector>* result_coords, - std::vector>* tile_coords) const; - /** * Copies the cells for the input attribute and result cell slabs, into * the corresponding result buffers. @@ -689,10 +664,10 @@ class Reader { const std::vector& result_cell_slabs); /** - * Copies the cells for the input **fixed-sized** attribute and result - * cell slabs, into the corresponding result buffers. + * Copies the cells for the input **fixed-sized** attribute/dimension and + * result cell slabs, into the corresponding result buffers. * - * @param attribute The targeted attribute. + * @param name The targeted attribute/diemnsion. * @param stride If it is `UINT64_MAX`, then the cells in the result * cell slabs are all contiguous. Otherwise, each cell in the * result cell slabs are `stride` cells apart from each other. @@ -700,15 +675,15 @@ class Reader { * @return Status */ Status copy_fixed_cells( - const std::string& attribute, + const std::string& name, uint64_t stride, const std::vector& result_cell_slabs); /** - * Copies the cells for the input **var-sized** attribute and result + * Copies the cells for the input **var-sized** attribute/dimension and result * cell slabs, into the corresponding result buffers. * - * @param attribute The targeted attribute. + * @param name The targeted attribute/dimension. * @param stride If it is `UINT64_MAX`, then the cells in the result * cell slabs are all contiguous. Otherwise, each cell in the * result cell slabs are `stride` cells apart from each other. @@ -716,15 +691,16 @@ class Reader { * @return Status */ Status copy_var_cells( - const std::string& attribute, + const std::string& name, uint64_t stride, const std::vector& result_cell_slabs); /** - * Computes offsets into destination buffers for the given attribute's offset - * and variable-length data, for the given list of result cell slabs. + * Computes offsets into destination buffers for the given + * attribute/dimensions's offset and variable-length data, for the given list + * of result cell slabs. * - * @param attribute The variable-length attribute + * @param name The variable-length attribute/dimension. * @param stride If it is `UINT64_MAX`, then the cells in the result * cell slabs are all contiguous. Otherwise, each cell in the * result cell slabs are `stride` cells apart from each other. @@ -742,7 +718,7 @@ class Reader { * @return Status */ Status compute_var_cell_destinations( - const std::string& attribute, + const std::string& name, uint64_t stride, const std::vector& result_cell_slabs, std::vector>* offset_offsets_per_cs, @@ -766,28 +742,21 @@ class Reader { * Computes the result coordinates from the sparse fragments. * * @param result_tiles This will store the unique result tiles. - * @param tile_coords If the subarray layout is global order, this - * function will store the unique tile coordinates of the subarray - * coordinates in `tile_coords`. Then the element of `result_coords` - * will store only pointers to the unique tile coordinates. * @param result_coords This will store the result coordinates. */ template Status compute_result_coords( std::vector* result_tiles, - std::vector>* tile_coords, - std::vector>* result_coords); + std::vector* result_coords); /** * Deduplicates the input result coordinates, breaking ties giving preference * to the largest fragment index (i.e., it prefers more recent fragments). * - * @tparam T The coords type. * @param result_coords The result coordinates to dedup. * @return Status */ - template - Status dedup_result_coords(std::vector>* result_coords) const; + Status dedup_result_coords(std::vector* result_coords) const; /** * Performs a read on a dense array. @@ -885,15 +854,15 @@ class Reader { const T* start, uint64_t num, void* buff, uint64_t* offset) const; /** - * Filters the tiles on a particular attribute from all input fragments - * based on the tile info in `result_tiles`. + * Filters the tiles on a particular attribute/dimension from all input + * fragments based on the tile info in `result_tiles`. * - * @param attribute Attribute whose tiles will be filtered + * @param name Attribute/dimension whose tiles will be filtered * @param result_tiles Vector containing the tiles to be filtered * @return Status */ Status filter_tiles( - const std::string& attribute, + const std::string& name, const std::vector& result_tiles) const; /** @@ -912,14 +881,12 @@ class Reader { /** * Gets all the result coordinates of the input tile into `result_coords`. * - * @tparam T The coords type. * @param result_tile The result tile to read the coordinates from. * @param result_coords The result coordinates to copy into. * @return Status */ - template Status get_all_result_coords( - ResultTile* tile, std::vector>* result_coords) const; + ResultTile* tile, std::vector* result_coords) const; /** Returns `true` if the coordinates are included in the attributes. */ bool has_coords() const; @@ -931,56 +898,56 @@ class Reader { * Initializes a fixed-sized tile. * * @param format_version The format version of the tile. - * @param attribute The attribute the tile belongs to. + * @param name The attribute/dimension the tile belongs to. * @param tile The tile to be initialized. * @return Status */ Status init_tile( - uint32_t format_version, const std::string& attribute, Tile* tile) const; + uint32_t format_version, const std::string& name, Tile* tile) const; /** * Initializes a var-sized tile. * * @param format_version The format version of the tile. - * @param attribute The attribute the tile belongs to. + * @param name The attribute/dimension the tile belongs to. * @param tile The offsets tile to be initialized. * @param tile_var The var-sized data tile to be initialized. * @return Status */ Status init_tile( uint32_t format_version, - const std::string& attribute, + const std::string& name, Tile* tile, Tile* tile_var) const; /** - * Retrieves the tiles on a particular attribute and stores it in the - * appropriate result tile. + * Retrieves the tiles on a particular attribute or dimension and stores it + * in the appropriate result tile. * - * @param attr The attribute name. + * @param name The attribute/dimension name. * @param result_tiles The retrieved tiles will be stored inside the * `ResultTile` instances in this vector. * @return Status */ Status read_tiles( - const std::string& attr, + const std::string& name, const std::vector& result_tiles) const; /** - * Retrieves the tiles on a particular attribute and stores it in the - * appropriate result tile. + * Retrieves the tiles on a particular attribute or dimension and stores it + * in the appropriate result tile. * * The reads are done asynchronously, and futures for each read operation are * added to the output parameter. * - * @param attribute The attribute name. + * @param name The attribute/dimension name. * @param result_tiles The retrieved tiles will be stored inside the * `ResultTile` instances in this vector. * @param tasks Vector to hold futures for the read tasks. * @return Status */ Status read_tiles( - const std::string& attribute, + const std::string& name, const std::vector& result_tiles, std::vector>* tasks) const; @@ -994,14 +961,12 @@ class Reader { /** * Sorts the input result coordinates according to the subarray layout. * - * @tparam T The coords type. * @param result_coords The coordinates to sort. * @param layout The layout to sort into. * @return Status */ - template Status sort_result_coords( - std::vector>* result_coords, Layout layout) const; + std::vector* result_coords, Layout layout) const; /** * Performs a read on a sparse array. diff --git a/tiledb/sm/query/result_cell_slab.h b/tiledb/sm/query/result_cell_slab.h index 225da8e893f..9e363584c83 100644 --- a/tiledb/sm/query/result_cell_slab.h +++ b/tiledb/sm/query/result_cell_slab.h @@ -121,18 +121,6 @@ struct ResultCellSlab { std::swap(start_, result_cell_slab.start_); std::swap(length_, result_cell_slab.length_); } - - /** For debugging. */ - void print() const { - if (tile_ == nullptr) { - std::cout << "fragment: " << -1 << "\n"; - } else { - std::cout << "fragment: " << tile_->frag_idx_ << "\n"; - std::cout << "tile: " << tile_->tile_idx_ << "\n"; - } - std::cout << "start: " << start_ << "\n"; - std::cout << "length: " << length_ << "\n"; - } }; } // namespace sm diff --git a/tiledb/sm/query/result_coords.h b/tiledb/sm/query/result_coords.h index 45043027a41..ed54098a15d 100644 --- a/tiledb/sm/query/result_coords.h +++ b/tiledb/sm/query/result_coords.h @@ -43,11 +43,8 @@ namespace sm { /** * Stores information about cell coordinates of a sparse fragment - * that are result of a subarray query. - * - * @tparam T The coords type + * that are in the result of a subarray query. */ -template struct ResultCoords { /** * The result tile the coords belong to. @@ -57,20 +54,14 @@ struct ResultCoords { * the scope of those functions. */ ResultTile* tile_; - /** The coordinates. */ - const T* coords_; - /** The coordinates of the tile in the global logical space. */ - const T* tile_coords_; /** The position of the coordinates in the tile. */ uint64_t pos_; /** Whether this instance is "valid". */ bool valid_; /** Constructor. */ - ResultCoords(ResultTile* tile, const T* coords, uint64_t pos) + ResultCoords(ResultTile* tile, uint64_t pos) : tile_(tile) - , coords_(coords) - , tile_coords_(nullptr) , pos_(pos) , valid_(true) { } @@ -85,20 +76,24 @@ struct ResultCoords { return valid_; } - /** Mainly for debugging. */ - void print() const { - if (tile_ == nullptr) { - std::cout << "null tile\n"; - } else { - std::cout << "frag_idx: " << tile_->frag_idx_ << "\n"; - std::cout << "tile_idx: " << tile_->tile_idx_ << "\n"; - } - std::cout << "pos: " << pos_ << "\n"; - std::cout << "valid: " << valid_ << "\n"; - if (coords_ != nullptr) - std::cout << "first coord: " << coords_[0] << "\n"; - if (tile_coords_ != nullptr) - std::cout << "first tile coord: " << tile_coords_[0] << "\n"; + /** + * Returns the coordinate at the object's position `pos_` from the object's + * tile `tile_` on the given dimension. + * + * @param dim_idx The index of the dimension to retrieve the coordinate for. + * @return A constant pointer to the requested coordinate. + */ + const void* coord(unsigned dim_idx) const { + return tile_->coord(pos_, dim_idx); + } + + /** + * Returns true if the coordinates (at the current position) of the + * calling ResultCoords object and the input are the same across all + * dimensions. + */ + bool same_coords(const ResultCoords& rc) const { + return tile_->same_coords(*(rc.tile_), pos_, rc.pos_); } }; diff --git a/tiledb/sm/query/result_space_tile.h b/tiledb/sm/query/result_space_tile.h index e645350469b..765d2236231 100644 --- a/tiledb/sm/query/result_space_tile.h +++ b/tiledb/sm/query/result_space_tile.h @@ -98,21 +98,6 @@ struct ResultSpaceTile { return start_coords_ == rst.start_coords_ && result_tiles_ == rst.result_tiles_; } - - /** - * Prints information about the result tile (mainly for debugging - * purposes). - */ - void print() const { - std::cout << "Start coords:\n"; - for (auto c : start_coords_) - std::cout << c << " "; - std::cout << "\n"; - - std::cout << "Result tiles:\n"; - for (const auto& it : result_tiles_) - it.second.print(); - } }; } // namespace sm diff --git a/tiledb/sm/query/result_tile.h b/tiledb/sm/query/result_tile.h index b00cc433fc5..965159b1a0f 100644 --- a/tiledb/sm/query/result_tile.h +++ b/tiledb/sm/query/result_tile.h @@ -55,22 +55,17 @@ struct ResultTile { */ typedef std::pair TilePair; - /** The id of the fragment this tile belongs to. */ - unsigned frag_idx_ = UINT32_MAX; - /** The id of the tile (which helps locating the physical attribute tiles). */ - uint64_t tile_idx_ = UINT64_MAX; - /** - * Maps attribute names to attribute tiles. Note that the coordinates - * are a special attribute as well. - */ - std::unordered_map attr_tiles_; - /** Default constructor. */ ResultTile() = default; - ResultTile(unsigned frag_idx, uint64_t tile_idx) + /** + * Constructor. The number of dimensions `dim_num` is used to allocate + * the separate coordinate tiles. + */ + ResultTile(unsigned frag_idx, uint64_t tile_idx, unsigned dim_num) : frag_idx_(frag_idx) , tile_idx_(tile_idx) { + coord_tiles_.resize(dim_num); } /** Default destructor. */ @@ -93,11 +88,141 @@ struct ResultTile { return frag_idx_ == rt.frag_idx_ && tile_idx_ == rt.tile_idx_; } - /** Prints some information (mainly for debugging purposes). */ - void print() const { - std::cout << "Fragment id: " << frag_idx_ << "\n"; - std::cout << "Tile id: " << tile_idx_ << "\n"; + /** Erases the tile for the input attribute/dimension. */ + void erase_tile(const std::string& name) { + // Handle zipped coordinates tiles + if (name == constants::coords) { + coords_tile_ = TilePair(Tile(), Tile()); + return; + } + + // Handle dimension tile + for (auto& ct : coord_tiles_) { + if (ct.first == name) { + ct.second = TilePair(Tile(), Tile()); + return; + } + } + + // Handle attribute tile + attr_tiles_.erase(name); } + + /** Initializes the result tile for the given attribute. */ + void init_attr_tile(const std::string& name) { + // Nothing to do for the special zipped coordinates tile + if (name == constants::coords) + return; + + // Handle attributes + if (attr_tiles_.find(name) == attr_tiles_.end()) + attr_tiles_.emplace(name, TilePair(Tile(), Tile())); + } + + /** Initializes the result tile for the given dimension name and index. */ + void init_coord_tile(const std::string& name, unsigned dim_idx) { + coord_tiles_[dim_idx] = + std::pair(name, TilePair(Tile(), Tile())); + } + + /** Returns the tile pair for the input attribute or dimension. */ + TilePair* tile_pair(const std::string& name) { + // Handle zipped coordinates tile + if (name == constants::coords) + return &coords_tile_; + + // Handle attribute tile + auto it = attr_tiles_.find(name); + if (it != attr_tiles_.end()) + return &(it->second); + + // Handle separate coordinates tile + for (auto& ct : coord_tiles_) { + if (ct.first == name) + return &(ct.second); + } + + return nullptr; + } + + /** + * Returns a constant pointer to the coordinate at position `pos` for + * dimension `dim_idx`. + */ + const void* coord(uint64_t pos, unsigned dim_idx) const { + // Handle separate coordinate tiles + const auto& coord_tile = coord_tiles_[dim_idx].second.first; + if (!coord_tile.empty()) { + auto coord_buff = (const unsigned char*)coord_tile.internal_data(); + return &coord_buff[pos * coord_tile.cell_size()]; + } + + // Handle zipped coordinates tile + if (!coords_tile_.first.empty()) { + auto coords_size = coords_tile_.first.cell_size(); + auto coord_size = coords_size / coords_tile_.first.dim_num(); + auto coords_buff = + (const unsigned char*)coords_tile_.first.internal_data(); + return &coords_buff[pos * coords_size + dim_idx * coord_size]; + } + + return nullptr; + } + + /** Returns the coordinate size on the input dimension. */ + uint64_t coord_size(unsigned dim_idx) const { + // Handle zipped coordinate tiles + if (!coords_tile_.first.empty()) + return coords_tile_.first.cell_size() / coords_tile_.first.dim_num(); + + // Handle separate coordinate tiles + return coord_tiles_[dim_idx].second.first.cell_size(); + } + + /** + * Returns true if the coordinates of the calling object at position `pos_a` + * and the coordinates of the input result tile at `pos_b` are the same + * across all dimensions. + */ + bool same_coords(const ResultTile& rt, uint64_t pos_a, uint64_t pos_b) const { + auto dim_num = coord_tiles_.size(); + for (unsigned d = 0; d < dim_num; ++d) { + if (std::memcmp(coord(pos_a, d), rt.coord(pos_b, d), coord_size(d)) != 0) + return false; + } + + return true; + } + + /** Returns the zipped coordinates tile. */ + const Tile& coords_tile() const { + return coords_tile_.first; + } + + /** Returns the fragment index. */ + unsigned frag_idx() const { + return frag_idx_; + } + + /** Returns the tile index. */ + uint64_t tile_idx() const { + return tile_idx_; + } + + private: + /** The id of the fragment this tile belongs to. */ + unsigned frag_idx_ = UINT32_MAX; + /** The id of the tile (which helps locating the physical attribute tiles). */ + uint64_t tile_idx_ = UINT64_MAX; + /** Maps attribute names to tiles. */ + std::unordered_map attr_tiles_; + /** The zipped coordinates tile. */ + TilePair coords_tile_; + /** + * The separate coordinate tiles along with their names, sorted on the + * dimension order. + */ + std::vector> coord_tiles_; }; } // namespace sm diff --git a/tiledb/sm/query/writer.cc b/tiledb/sm/query/writer.cc index a352b2656dd..6dcdafed6a7 100644 --- a/tiledb/sm/query/writer.cc +++ b/tiledb/sm/query/writer.cc @@ -2242,7 +2242,7 @@ Status Writer::sort_coords(std::vector* cell_pos) const { (*cell_pos)[i] = i; // Sort the coordinates in global order - parallel_sort(cell_pos->begin(), cell_pos->end(), GlobalCmp2(domain, buffs)); + parallel_sort(cell_pos->begin(), cell_pos->end(), GlobalCmp(domain, &buffs)); return Status::Ok();