From 0315e98c4edb4e5387fce69c12c841aa0c7ec548 Mon Sep 17 00:00:00 2001 From: Stavros Papadopoulos Date: Tue, 18 Feb 2020 12:24:10 -0500 Subject: [PATCH] Refactored RTree towards addressing #93 --- test/src/unit-rtree.cc | 407 ++++++++++++---------- tiledb/sm/array_schema/dimension.cc | 204 ++++++++++-- tiledb/sm/array_schema/dimension.h | 58 +++- tiledb/sm/array_schema/domain.cc | 46 +++ tiledb/sm/array_schema/domain.h | 9 + tiledb/sm/fragment/fragment_metadata.cc | 171 +++------- tiledb/sm/fragment/fragment_metadata.h | 21 +- tiledb/sm/misc/utils.cc | 92 ----- tiledb/sm/misc/utils.h | 12 - tiledb/sm/query/reader.cc | 24 +- tiledb/sm/query/reader.h | 5 - tiledb/sm/rtree/rtree.cc | 426 +++++++++--------------- tiledb/sm/rtree/rtree.h | 122 ++++--- tiledb/sm/subarray/subarray.cc | 11 +- tools/src/commands/info_command.cc | 125 +++---- tools/src/commands/info_command.h | 8 +- 16 files changed, 865 insertions(+), 876 deletions(-) diff --git a/test/src/unit-rtree.cc b/test/src/unit-rtree.cc index 0b99ca652d3..fb01217def4 100644 --- a/test/src/unit-rtree.cc +++ b/test/src/unit-rtree.cc @@ -30,7 +30,9 @@ * Tests the `RTree` class. */ +#include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/enums/datatype.h" +#include "tiledb/sm/enums/layout.h" #include "tiledb/sm/rtree/rtree.h" #include @@ -38,158 +40,206 @@ using namespace tiledb::sm; +// `mbrs` contains a flattened vector of values (low, high) +// per dimension per MBR +template +std::vector create_mbrs(const std::vector& mbrs) { + assert(mbrs.size() % 2 * D == 0); + + uint64_t mbr_num = (uint64_t)(mbrs.size() / (2 * D)); + std::vector ret(mbr_num); + uint64_t r_size = 2 * sizeof(T); + for (uint64_t m = 0; m < mbr_num; ++m) { + ret[m].resize(D); + for (unsigned d = 0; d < D; ++d) { + ret[m][d].set_range(&mbrs[2 * D * m + 2 * d], r_size); + } + } + + return ret; +} + +Domain create_domain( + const std::vector& dim_names, + const std::vector& dim_types, + const std::vector& dim_domains, + const std::vector& dim_tile_extents) { + assert(!dim_names.empty()); + assert(dim_names.size() == dim_types.size()); + assert(dim_names.size() == dim_domains.size()); + assert(dim_names.size() == dim_tile_extents.size()); + + Domain domain(dim_types[0]); + for (size_t d = 0; d < dim_names.size(); ++d) { + Dimension dim(dim_names[d], dim_types[d]); + dim.set_domain(dim_domains[d]); + dim.set_tile_extent(dim_tile_extents[d]); + domain.add_dimension(&dim); + } + domain.init(Layout::ROW_MAJOR, Layout::ROW_MAJOR); + + return domain; +} + TEST_CASE("RTree: Test R-Tree, basic functions", "[rtree][basic]") { // Empty tree RTree rtree0; CHECK(rtree0.height() == 0); CHECK(rtree0.dim_num() == 0); + CHECK(rtree0.domain() == nullptr); CHECK(rtree0.fanout() == 0); - int r0[] = {1, 10}; - std::vector range0; - range0.push_back(&r0[0]); - auto tile_overlap = rtree0.get_tile_overlap(range0); - CHECK(tile_overlap.tile_ranges_.empty()); - CHECK(tile_overlap.tiles_.empty()); // 1D - std::vector mbrs_1d; - int m1[] = {1, 3, 5, 10, 20, 22}; - mbrs_1d.resize(3); - for (int i = 0; i < 3; ++i) - mbrs_1d[i] = &m1[2 * i]; - RTree rtree1(Datatype::INT32, 1, 3, mbrs_1d); + int32_t dim_dom[] = {1, 1000}; + int32_t dim_extent = 10; + Domain dom1 = + create_domain({"d"}, {Datatype::INT32}, {dim_dom}, {&dim_extent}); + std::vector mbrs_1d = create_mbrs({1, 3, 5, 10, 20, 22}); + RTree rtree1(&dom1, 3); + CHECK(!rtree1.set_leaf(0, mbrs_1d[0]).ok()); + rtree1.set_leaf_num(mbrs_1d.size()); + for (size_t m = 0; m < mbrs_1d.size(); ++m) + CHECK(rtree1.set_leaf(m, mbrs_1d[m]).ok()); + CHECK(!rtree1.set_leaf_num(1).ok()); + rtree1.build_tree(); + CHECK(!rtree1.set_leaf(0, mbrs_1d[0]).ok()); CHECK(rtree1.height() == 2); + CHECK(rtree1.dim_num() == 1); CHECK(rtree1.subtree_leaf_num(0) == 3); CHECK(rtree1.subtree_leaf_num(1) == 1); CHECK(rtree1.subtree_leaf_num(2) == 0); - CHECK(!std::memcmp(rtree1.leaf(0), &m1[0], 2 * sizeof(int))); - CHECK(!std::memcmp(rtree1.leaf(1), &m1[2], 2 * sizeof(int))); - CHECK(!std::memcmp(rtree1.leaf(2), &m1[4], 2 * sizeof(int))); + CHECK(rtree1.leaf(0) == mbrs_1d[0]); + CHECK(rtree1.leaf(1) == mbrs_1d[1]); + CHECK(rtree1.leaf(2) == mbrs_1d[2]); - std::vector range1; - int mbr1[] = {5, 10}; - int r1_no_left[] = {0, 1}; - int r1_left[] = {4, 7}; - int r1_exact[] = {5, 10}; - int r1_full[] = {4, 11}; - int r1_contained[] = {6, 7}; - int r1_right[] = {7, 11}; - int r1_no_right[] = {11, 15}; - range1.push_back(&r1_no_left[0]); - double ratio1 = rtree1.range_overlap(range1, mbr1); + NDRange range1(1); + NDRange mbr1(1); + int32_t mbr1_r[] = {5, 10}; + mbr1[0].set_range(mbr1_r, 2 * sizeof(int32_t)); + int32_t r1_no_left[] = {0, 1}; + int32_t r1_left[] = {4, 7}; + int32_t r1_exact[] = {5, 10}; + int32_t r1_full[] = {4, 11}; + int32_t r1_contained[] = {6, 7}; + int32_t r1_right[] = {7, 11}; + int32_t r1_no_right[] = {11, 15}; + range1[0].set_range(r1_no_left, 2 * sizeof(int32_t)); + double ratio1 = dom1.overlap_ratio(range1, mbr1); CHECK(ratio1 == 0.0); - range1.clear(); - range1.push_back(&r1_left[0]); - ratio1 = rtree1.range_overlap(range1, mbr1); + range1[0].set_range(r1_left, 2 * sizeof(int32_t)); + ratio1 = dom1.overlap_ratio(range1, mbr1); CHECK(ratio1 == 3.0 / 6); - range1.clear(); - range1.push_back(&r1_exact[0]); - ratio1 = rtree1.range_overlap(range1, mbr1); + range1[0].set_range(r1_exact, 2 * sizeof(int32_t)); + ratio1 = dom1.overlap_ratio(range1, mbr1); CHECK(ratio1 == 1.0); - range1.clear(); - range1.push_back(&r1_full[0]); - ratio1 = rtree1.range_overlap(range1, mbr1); + range1[0].set_range(r1_full, 2 * sizeof(int32_t)); + ratio1 = dom1.overlap_ratio(range1, mbr1); CHECK(ratio1 == 1.0); - range1.clear(); - range1.push_back(&r1_contained[0]); - ratio1 = rtree1.range_overlap(range1, mbr1); + range1[0].set_range(r1_contained, 2 * sizeof(int32_t)); + ratio1 = dom1.overlap_ratio(range1, mbr1); CHECK(ratio1 == 2.0 / 6); - range1.clear(); - range1.push_back(&r1_right[0]); - ratio1 = rtree1.range_overlap(range1, mbr1); + range1[0].set_range(r1_right, 2 * sizeof(int32_t)); + ratio1 = dom1.overlap_ratio(range1, mbr1); CHECK(ratio1 == 4.0 / 6); - range1.clear(); - range1.push_back(&r1_no_right[0]); - ratio1 = rtree1.range_overlap(range1, mbr1); + range1[0].set_range(r1_no_right, 2 * sizeof(int32_t)); + ratio1 = dom1.overlap_ratio(range1, mbr1); CHECK(ratio1 == 0.0); // 2D - std::vector mbrs_2d; - int64_t m2[] = {1, 3, 5, 10, 20, 22, 24, 25, 11, 15, 30, 31}; - mbrs_2d.resize(3); - for (int i = 0; i < 3; ++i) - mbrs_2d[i] = &m2[4 * i]; - RTree rtree2(Datatype::INT64, 2, 5, mbrs_2d); + int64_t dim_dom_2[] = {1, 1000}; + int64_t dim_extent_2 = 10; + Domain dom2 = create_domain( + {"d1", "d2"}, + {Datatype::INT64, Datatype::INT64}, + {dim_dom_2, dim_dom_2}, + {&dim_extent_2, &dim_extent_2}); + std::vector mbrs_2d = + create_mbrs({1, 3, 5, 10, 20, 22, 24, 25, 11, 15, 30, 31}); + RTree rtree2(&dom2, 5); + rtree2.set_leaves(mbrs_2d); + rtree2.build_tree(); CHECK(rtree2.height() == 2); CHECK(rtree2.dim_num() == 2); CHECK(rtree2.fanout() == 5); - CHECK(rtree2.type() == Datatype::INT64); - CHECK(!std::memcmp(rtree2.leaf(0), &m2[0], 4 * sizeof(int64_t))); - CHECK(!std::memcmp(rtree2.leaf(1), &m2[4], 4 * sizeof(int64_t))); - CHECK(!std::memcmp(rtree2.leaf(2), &m2[8], 4 * sizeof(int64_t))); - std::vector range2; - int64_t mbr2[] = {5, 10, 2, 9}; + CHECK(rtree2.leaf(0) == mbrs_2d[0]); + CHECK(rtree2.leaf(1) == mbrs_2d[1]); + CHECK(rtree2.leaf(2) == mbrs_2d[2]); + + NDRange range2(2); + int64_t mbr2_r[] = {5, 10, 2, 9}; + NDRange mbr2(2); + mbr2[0].set_range(&mbr2_r[0], 2 * sizeof(int64_t)); + mbr2[1].set_range(&mbr2_r[2], 2 * sizeof(int64_t)); int64_t r2_no[] = {6, 7, 10, 12}; int64_t r2_full[] = {4, 11, 2, 9}; int64_t r2_partial[] = {7, 11, 4, 5}; - range2.push_back(&r2_no[0]); - range2.push_back(&r2_no[2]); - double ratio2 = rtree2.range_overlap(range2, mbr2); + range2[0].set_range(&r2_no[0], 2 * sizeof(int64_t)); + range2[1].set_range(&r2_no[2], 2 * sizeof(int64_t)); + double ratio2 = dom2.overlap_ratio(range2, mbr2); CHECK(ratio2 == 0.0); - range2.clear(); - range2.push_back(&r2_full[0]); - range2.push_back(&r2_full[2]); - ratio2 = rtree2.range_overlap(range2, mbr2); + range2[0].set_range(&r2_full[0], 2 * sizeof(int64_t)); + range2[1].set_range(&r2_full[2], 2 * sizeof(int64_t)); + ratio2 = dom2.overlap_ratio(range2, mbr2); CHECK(ratio2 == 1.0); - range2.clear(); - range2.push_back(&r2_partial[0]); - range2.push_back(&r2_partial[2]); - ratio2 = rtree2.range_overlap(range2, mbr2); + range2[0].set_range(&r2_partial[0], 2 * sizeof(int64_t)); + range2[1].set_range(&r2_partial[2], 2 * sizeof(int64_t)); + ratio2 = dom2.overlap_ratio(range2, mbr2); CHECK(ratio2 == (4.0 / 6) * (2.0 / 8)); // Float datatype - std::vector mbrs_f; - float mf[] = {1.0f, 3.0f, 5.0f, 10.0f, 20.0f, 22.0f}; - mbrs_f.resize(3); - for (int i = 0; i < 3; ++i) - mbrs_f[i] = &mf[2 * i]; - RTree rtreef(Datatype::FLOAT32, 1, 5, mbrs_f); - std::vector rangef; - float mbrf[] = {5.0f, 10.0f}; + float dim_dom_f[] = {1.0, 1000.0}; + float dim_extent_f = 10.0; + std::vector mbrs_f = + create_mbrs({1.0f, 3.0f, 5.0f, 10.0f, 20.0f, 22.0f}); + Domain dom2f = + create_domain({"d"}, {Datatype::FLOAT32}, {dim_dom_f}, {&dim_extent_f}); + RTree rtreef(&dom2f, 5); + rtreef.set_leaves(mbrs_f); + rtreef.build_tree(); + + NDRange rangef(1); + float mbrf_r[] = {5.0f, 10.0f}; + NDRange mbrf(1); + mbrf[0].set_range(mbrf_r, 2 * sizeof(float)); float rf_no_left[] = {0.0, 1.0}; float rf_left[] = {4.0, 7.0}; float rf_exact[] = {5.0, 10.0}; float rf_full[] = {4.0, 11.0}; float rf_right[] = {7.0, 11.0}; float rf_no_right[] = {11.0, 15.0}; - rangef.push_back(&rf_no_left[0]); - double ratiof = rtree1.range_overlap(rangef, mbrf); + rangef[0].set_range(rf_no_left, 2 * sizeof(float)); + double ratiof = dom2f.overlap_ratio(rangef, mbrf); CHECK(ratiof == 0.0); - rangef.clear(); - rangef.push_back(&rf_left[0]); - ratiof = rtreef.range_overlap(rangef, mbrf); + rangef[0].set_range(rf_left, 2 * sizeof(float)); + ratiof = dom2f.overlap_ratio(rangef, mbrf); CHECK(ratiof == 2.0 / 5); - rangef.clear(); - rangef.push_back(&rf_exact[0]); - ratiof = rtreef.range_overlap(rangef, mbrf); + rangef[0].set_range(rf_exact, 2 * sizeof(float)); + ratiof = dom2f.overlap_ratio(rangef, mbrf); CHECK(ratiof == 1.0); - rangef.clear(); - rangef.push_back(&rf_full[0]); - ratiof = rtreef.range_overlap(rangef, mbrf); + rangef[0].set_range(rf_full, 2 * sizeof(float)); + ratiof = dom2f.overlap_ratio(rangef, mbrf); CHECK(ratiof == 1.0); - rangef.clear(); - rangef.push_back(&rf_right[0]); - ratiof = rtreef.range_overlap(rangef, mbrf); + rangef[0].set_range(rf_right, 2 * sizeof(float)); + ratiof = dom2f.overlap_ratio(rangef, mbrf); CHECK(ratiof == 3.0 / 5); - rangef.clear(); - rangef.push_back(&rf_no_right[0]); - ratiof = rtreef.range_overlap(rangef, mbrf); + rangef[0].set_range(rf_no_right, 2 * sizeof(float)); + ratiof = dom2f.overlap_ratio(rangef, mbrf); CHECK(ratiof == 0.0); } TEST_CASE("RTree: Test 1D R-tree, height 2", "[rtree][1d][2h]") { - int m[] = {1, 3, 5, 10, 20, 22}; - std::vector mbrs; - mbrs.resize(3); - for (int i = 0; i < 3; ++i) - mbrs[i] = &m[2 * i]; - // Build tree - RTree rtree(Datatype::INT32, 1, 3, mbrs); + int32_t dim_dom[] = {1, 1000}; + int32_t dim_extent = 10; + Domain dom1 = + create_domain({"d"}, {Datatype::INT32}, {dim_dom}, {&dim_extent}); + std::vector mbrs = create_mbrs({1, 3, 5, 10, 20, 22}); + RTree rtree(&dom1, 3); + rtree.set_leaves(mbrs); + rtree.build_tree(); CHECK(rtree.height() == 2); CHECK(rtree.dim_num() == 1); CHECK(rtree.fanout() == 3); - CHECK(rtree.type() == Datatype::INT32); // Subtree leaf num CHECK(rtree.subtree_leaf_num(0) == 3); @@ -197,23 +247,21 @@ TEST_CASE("RTree: Test 1D R-tree, height 2", "[rtree][1d][2h]") { CHECK(rtree.subtree_leaf_num(2) == 0); // Tile overlap - std::vector range; - int r_no[] = {25, 30}; - int r_full[] = {0, 22}; - int r_partial[] = {6, 21}; - range.push_back(&r_no[0]); + NDRange range(1); + int32_t r_no[] = {25, 30}; + int32_t r_full[] = {0, 22}; + int32_t r_partial[] = {6, 21}; + range[0].set_range(r_no, 2 * sizeof(int32_t)); auto overlap = rtree.get_tile_overlap(range); CHECK(overlap.tiles_.empty()); CHECK(overlap.tile_ranges_.empty()); - range.clear(); - range.push_back(&r_full[0]); + range[0].set_range(r_full, 2 * sizeof(int32_t)); overlap = rtree.get_tile_overlap(range); CHECK(overlap.tiles_.empty()); CHECK(overlap.tile_ranges_.size() == 1); CHECK(overlap.tile_ranges_[0].first == 0); CHECK(overlap.tile_ranges_[0].second == 2); - range.clear(); - range.push_back(&r_partial[0]); + range[0].set_range(r_partial, 2 * sizeof(int32_t)); overlap = rtree.get_tile_overlap(range); CHECK(overlap.tile_ranges_.empty()); CHECK(overlap.tiles_.size() == 2); @@ -224,17 +272,19 @@ TEST_CASE("RTree: Test 1D R-tree, height 2", "[rtree][1d][2h]") { } TEST_CASE("RTree: Test 1D R-tree, height 3", "[rtree][1d][3h]") { - int m[] = {1, 3, 5, 10, 20, 22, 30, 35, 36, 38, 40, 49, 50, 51, 65, 69}; - std::vector mbrs; - for (size_t i = 0; i < sizeof(m) / (2 * sizeof(int)); ++i) - mbrs.push_back(&m[2 * i]); - // Build tree - RTree rtree(Datatype::INT32, 1, 3, mbrs); + int32_t dim_dom[] = {1, 1000}; + int32_t dim_extent = 10; + std::vector mbrs = create_mbrs( + {1, 3, 5, 10, 20, 22, 30, 35, 36, 38, 40, 49, 50, 51, 65, 69}); + Domain dom1 = + create_domain({"d"}, {Datatype::INT32}, {dim_dom}, {&dim_extent}); + RTree rtree(&dom1, 3); + rtree.set_leaves(mbrs); + rtree.build_tree(); CHECK(rtree.height() == 3); CHECK(rtree.dim_num() == 1); CHECK(rtree.fanout() == 3); - CHECK(rtree.type() == Datatype::INT32); // Subtree leaf num CHECK(rtree.subtree_leaf_num(0) == 9); @@ -243,25 +293,23 @@ TEST_CASE("RTree: Test 1D R-tree, height 3", "[rtree][1d][3h]") { CHECK(rtree.subtree_leaf_num(3) == 0); // Tile overlap - std::vector range; - int r_no[] = {0, 0}; - int r_full[] = {1, 69}; - int r_only_tiles[] = {10, 20}; - int r_only_ranges[] = {30, 69}; - int r_tiles_and_ranges[] = {1, 32}; - range.push_back(&r_no[0]); + NDRange range(1); + int32_t r_no[] = {0, 0}; + int32_t r_full[] = {1, 69}; + int32_t r_only_tiles[] = {10, 20}; + int32_t r_only_ranges[] = {30, 69}; + int32_t r_tiles_and_ranges[] = {1, 32}; + range[0].set_range(r_no, 2 * sizeof(int32_t)); auto overlap = rtree.get_tile_overlap(range); CHECK(overlap.tiles_.empty()); CHECK(overlap.tile_ranges_.empty()); - range.clear(); - range.push_back(&r_full[0]); + range[0].set_range(r_full, 2 * sizeof(int32_t)); overlap = rtree.get_tile_overlap(range); CHECK(overlap.tiles_.empty()); CHECK(overlap.tile_ranges_.size() == 1); CHECK(overlap.tile_ranges_[0].first == 0); CHECK(overlap.tile_ranges_[0].second == 7); - range.clear(); - range.push_back(&r_only_tiles[0]); + range[0].set_range(r_only_tiles, 2 * sizeof(int32_t)); overlap = rtree.get_tile_overlap(range); CHECK(overlap.tile_ranges_.empty()); CHECK(overlap.tiles_.size() == 2); @@ -269,8 +317,7 @@ TEST_CASE("RTree: Test 1D R-tree, height 3", "[rtree][1d][3h]") { CHECK(overlap.tiles_[0].second == 1.0 / 6); CHECK(overlap.tiles_[1].first == 2); CHECK(overlap.tiles_[1].second == 1.0 / 3); - range.clear(); - range.push_back(&r_only_ranges[0]); + range[0].set_range(r_only_ranges, 2 * sizeof(int32_t)); overlap = rtree.get_tile_overlap(range); CHECK(overlap.tiles_.empty()); CHECK(overlap.tile_ranges_.size() == 2); @@ -278,8 +325,7 @@ TEST_CASE("RTree: Test 1D R-tree, height 3", "[rtree][1d][3h]") { CHECK(overlap.tile_ranges_[0].second == 5); CHECK(overlap.tile_ranges_[1].first == 6); CHECK(overlap.tile_ranges_[1].second == 7); - range.clear(); - range.push_back(&r_tiles_and_ranges[0]); + range[0].set_range(r_tiles_and_ranges, 2 * sizeof(int32_t)); overlap = rtree.get_tile_overlap(range); CHECK(overlap.tile_ranges_.size() == 1); CHECK(overlap.tile_ranges_[0].first == 0); @@ -290,17 +336,22 @@ TEST_CASE("RTree: Test 1D R-tree, height 3", "[rtree][1d][3h]") { } TEST_CASE("RTree: Test 2D R-tree, height 2", "[rtree][2d][2h]") { - int m[] = {1, 3, 2, 4, 5, 7, 6, 9, 10, 12, 10, 15}; - std::vector mbrs; - for (size_t i = 0; i < sizeof(m) / (4 * sizeof(int)); ++i) - mbrs.push_back(&m[4 * i]); - // Build tree - RTree rtree(Datatype::INT32, 2, 3, mbrs); + int32_t dim_dom[] = {1, 1000}; + int32_t dim_extent = 10; + Domain dom2 = create_domain( + {"d1", "d2"}, + {Datatype::INT32, Datatype::INT32}, + {dim_dom, dim_dom}, + {&dim_extent, &dim_extent}); + std::vector mbrs = + create_mbrs({1, 3, 2, 4, 5, 7, 6, 9, 10, 12, 10, 15}); + RTree rtree(&dom2, 3); + rtree.set_leaves(mbrs); + rtree.build_tree(); CHECK(rtree.height() == 2); CHECK(rtree.dim_num() == 2); CHECK(rtree.fanout() == 3); - CHECK(rtree.type() == Datatype::INT32); // Subtree leaf num CHECK(rtree.subtree_leaf_num(0) == 3); @@ -308,26 +359,24 @@ TEST_CASE("RTree: Test 2D R-tree, height 2", "[rtree][2d][2h]") { CHECK(rtree.subtree_leaf_num(2) == 0); // Tile overlap - std::vector range; - int r_no[] = {25, 30, 1, 10}; - int r_full[] = {1, 20, 1, 20}; - int r_partial[] = {5, 12, 8, 12}; - range.push_back(&r_no[0]); - range.push_back(&r_no[2]); + NDRange range(2); + int32_t r_no[] = {25, 30, 1, 10}; + int32_t r_full[] = {1, 20, 1, 20}; + int32_t r_partial[] = {5, 12, 8, 12}; + range[0].set_range(&r_no[0], 2 * sizeof(int32_t)); + range[1].set_range(&r_no[2], 2 * sizeof(int32_t)); auto overlap = rtree.get_tile_overlap(range); CHECK(overlap.tiles_.empty()); CHECK(overlap.tile_ranges_.empty()); - range.clear(); - range.push_back(&r_full[0]); - range.push_back(&r_full[2]); + range[0].set_range(&r_full[0], 2 * sizeof(int32_t)); + range[1].set_range(&r_full[2], 2 * sizeof(int32_t)); overlap = rtree.get_tile_overlap(range); CHECK(overlap.tiles_.empty()); CHECK(overlap.tile_ranges_.size() == 1); CHECK(overlap.tile_ranges_[0].first == 0); CHECK(overlap.tile_ranges_[0].second == 2); - range.clear(); - range.push_back(&r_partial[0]); - range.push_back(&r_partial[2]); + range[0].set_range(&r_partial[0], 2 * sizeof(int32_t)); + range[1].set_range(&r_partial[2], 2 * sizeof(int32_t)); overlap = rtree.get_tile_overlap(range); CHECK(overlap.tile_ranges_.empty()); CHECK(overlap.tiles_.size() == 2); @@ -338,19 +387,23 @@ TEST_CASE("RTree: Test 2D R-tree, height 2", "[rtree][2d][2h]") { } TEST_CASE("RTree: Test 2D R-tree, height 3", "[rtree][2d][3h]") { - int m[] = {1, 3, 2, 4, 5, 7, 6, 9, 10, 12, 10, 15, - 11, 15, 20, 22, 16, 16, 23, 23, 19, 20, 24, 26, - 25, 28, 30, 32, 30, 35, 35, 37, 40, 42, 40, 42}; - std::vector mbrs; - for (size_t i = 0; i < sizeof(m) / (4 * sizeof(int)); ++i) - mbrs.push_back(&m[4 * i]); - // Build tree - RTree rtree(Datatype::INT32, 2, 3, mbrs); + int32_t dim_dom[] = {1, 1000}; + int32_t dim_extent = 10; + Domain dom2 = create_domain( + {"d1", "d2"}, + {Datatype::INT32, Datatype::INT32}, + {dim_dom, dim_dom}, + {&dim_extent, &dim_extent}); + std::vector mbrs = create_mbrs( + {1, 3, 2, 4, 5, 7, 6, 9, 10, 12, 10, 15, 11, 15, 20, 22, 16, 16, + 23, 23, 19, 20, 24, 26, 25, 28, 30, 32, 30, 35, 35, 37, 40, 42, 40, 42}); + RTree rtree(&dom2, 3); + rtree.set_leaves(mbrs); + rtree.build_tree(); CHECK(rtree.height() == 3); CHECK(rtree.dim_num() == 2); CHECK(rtree.fanout() == 3); - CHECK(rtree.type() == Datatype::INT32); // Subtree leaf num CHECK(rtree.subtree_leaf_num(0) == 9); @@ -359,28 +412,26 @@ TEST_CASE("RTree: Test 2D R-tree, height 3", "[rtree][2d][3h]") { CHECK(rtree.subtree_leaf_num(3) == 0); // Tile overlap - std::vector range; - int r_no[] = {0, 0, 0, 0}; - int r_full[] = {1, 50, 1, 50}; - int r_only_tiles[] = {10, 14, 12, 21}; - int r_only_ranges[] = {11, 42, 20, 42}; - int r_tiles_and_ranges[] = {19, 50, 25, 50}; - range.push_back(&r_no[0]); - range.push_back(&r_no[2]); + NDRange range(2); + int32_t r_no[] = {0, 0, 0, 0}; + int32_t r_full[] = {1, 50, 1, 50}; + int32_t r_only_tiles[] = {10, 14, 12, 21}; + int32_t r_only_ranges[] = {11, 42, 20, 42}; + int32_t r_tiles_and_ranges[] = {19, 50, 25, 50}; + range[0].set_range(&r_no[0], 2 * sizeof(int32_t)); + range[1].set_range(&r_no[2], 2 * sizeof(int32_t)); auto overlap = rtree.get_tile_overlap(range); CHECK(overlap.tiles_.empty()); CHECK(overlap.tile_ranges_.empty()); - range.clear(); - range.push_back(&r_full[0]); - range.push_back(&r_full[2]); + range[0].set_range(&r_full[0], 2 * sizeof(int32_t)); + range[1].set_range(&r_full[2], 2 * sizeof(int32_t)); overlap = rtree.get_tile_overlap(range); CHECK(overlap.tiles_.empty()); CHECK(overlap.tile_ranges_.size() == 1); CHECK(overlap.tile_ranges_[0].first == 0); CHECK(overlap.tile_ranges_[0].second == 8); - range.clear(); - range.push_back(&r_only_tiles[0]); - range.push_back(&r_only_tiles[2]); + range[0].set_range(&r_only_tiles[0], 2 * sizeof(int32_t)); + range[1].set_range(&r_only_tiles[2], 2 * sizeof(int32_t)); overlap = rtree.get_tile_overlap(range); CHECK(overlap.tile_ranges_.empty()); CHECK(overlap.tiles_.size() == 2); @@ -388,9 +439,8 @@ TEST_CASE("RTree: Test 2D R-tree, height 3", "[rtree][2d][3h]") { CHECK(overlap.tiles_[0].second == 4.0 / 6); CHECK(overlap.tiles_[1].first == 3); CHECK(overlap.tiles_[1].second == (4.0 / 5) * (2.0 / 3)); - range.clear(); - range.push_back(&r_only_ranges[0]); - range.push_back(&r_only_ranges[2]); + range[0].set_range(&r_only_ranges[0], 2 * sizeof(int32_t)); + range[1].set_range(&r_only_ranges[2], 2 * sizeof(int32_t)); overlap = rtree.get_tile_overlap(range); CHECK(overlap.tiles_.empty()); CHECK(overlap.tile_ranges_.size() == 2); @@ -398,9 +448,8 @@ TEST_CASE("RTree: Test 2D R-tree, height 3", "[rtree][2d][3h]") { CHECK(overlap.tile_ranges_[0].second == 5); CHECK(overlap.tile_ranges_[1].first == 6); CHECK(overlap.tile_ranges_[1].second == 8); - range.clear(); - range.push_back(&r_tiles_and_ranges[0]); - range.push_back(&r_tiles_and_ranges[2]); + range[0].set_range(&r_tiles_and_ranges[0], 2 * sizeof(int32_t)); + range[1].set_range(&r_tiles_and_ranges[2], 2 * sizeof(int32_t)); overlap = rtree.get_tile_overlap(range); CHECK(overlap.tile_ranges_.size() == 1); CHECK(overlap.tile_ranges_[0].first == 6); diff --git a/tiledb/sm/array_schema/dimension.cc b/tiledb/sm/array_schema/dimension.cc index faf719df515..dbcead3b209 100644 --- a/tiledb/sm/array_schema/dimension.cc +++ b/tiledb/sm/array_schema/dimension.cc @@ -57,7 +57,9 @@ Dimension::Dimension() { set_expand_range_v_func(); set_expand_to_tile_func(); set_oob_func(); + set_covered_func(); set_overlap_func(); + set_overlap_ratio_func(); set_tile_num_func(); set_value_in_range_func(); } @@ -73,7 +75,9 @@ Dimension::Dimension(const std::string& name, Datatype type) set_expand_range_v_func(); set_expand_to_tile_func(); set_oob_func(); + set_covered_func(); set_overlap_func(); + set_overlap_ratio_func(); set_tile_num_func(); set_value_in_range_func(); } @@ -218,7 +222,9 @@ Status Dimension::deserialize(ConstBuffer* buff, Datatype type) { set_expand_range_v_func(); set_expand_to_tile_func(); set_oob_func(); + set_covered_func(); set_overlap_func(); + set_overlap_ratio_func(); set_tile_num_func(); set_value_in_range_func(); @@ -260,9 +266,7 @@ bool Dimension::is_anonymous() const { } template -void Dimension::compute_mbr( - const Dimension* dim, const Tile& tile, Range* mbr) { - assert(dim != nullptr); +void Dimension::compute_mbr(const Tile& tile, Range* mbr) { assert(mbr != nullptr); auto data = (const T*)(tile.internal_data()); assert(data != nullptr); @@ -275,12 +279,12 @@ void Dimension::compute_mbr( // Expand the MBR with the rest tile values for (uint64_t c = 1; c < cell_num; ++c) - dim->expand_range_v(&data[c], mbr); + expand_range_v(&data[c], mbr); } void Dimension::compute_mbr(const Tile& tile, Range* mbr) const { assert(compute_mbr_func_ != nullptr); - compute_mbr_func_(this, tile, mbr); + compute_mbr_func_(tile, mbr); } template @@ -299,11 +303,9 @@ void Dimension::crop_range(Range* range) const { } template -void Dimension::expand_range_v(const Dimension* dim, const void* v, Range* r) { - assert(dim != nullptr); +void Dimension::expand_range_v(const void* v, Range* r) { assert(v != nullptr); assert(!r->empty()); - (void)dim; // Not used here auto rt = (const T*)r->data(); auto vt = (const T*)v; T res[2] = {std::min(rt[0], *vt), std::max(rt[1], *vt)}; @@ -312,15 +314,13 @@ void Dimension::expand_range_v(const Dimension* dim, const void* v, Range* r) { void Dimension::expand_range_v(const void* v, Range* r) const { assert(expand_range_v_func_ != nullptr); - expand_range_v_func_(this, v, r); + expand_range_v_func_(v, r); } template -void Dimension::expand_range(const Dimension* dim, const Range& r1, Range* r2) { - assert(dim != nullptr); +void Dimension::expand_range(const Range& r1, Range* r2) { assert(!r1.empty()); assert(!r2->empty()); - (void)dim; // Not used here auto d1 = (const T*)r1.data(); auto d2 = (const T*)r2->data(); T res[2] = {std::min(d1[0], d2[0]), std::max(d1[1], d2[1])}; @@ -329,7 +329,7 @@ void Dimension::expand_range(const Dimension* dim, const Range& r1, Range* r2) { void Dimension::expand_range(const Range& r1, Range* r2) const { assert(expand_range_func_ != nullptr); - expand_range_func_(this, r1, r2); + expand_range_func_(r1, r2); } template @@ -380,12 +380,27 @@ bool Dimension::oob(const void* coord, std::string* err_msg) const { } template -bool Dimension::overlap( - const Dimension* dim, const Range& r1, const Range& r2) { - assert(dim != nullptr); +bool Dimension::covered(const Range& r1, const Range& r2) { + assert(!r1.empty()); + assert(!r2.empty()); + + auto d1 = (const T*)r1.data(); + auto d2 = (const T*)r2.data(); + assert(d1[0] <= d1[1]); + assert(d2[0] <= d2[1]); + + return d1[0] >= d2[0] && d1[1] <= d2[1]; +} + +bool Dimension::covered(const Range& r1, const Range& r2) const { + assert(covered_func_ != nullptr); + return covered_func_(r1, r2); +} + +template +bool Dimension::overlap(const Range& r1, const Range& r2) { assert(!r1.empty()); assert(!r2.empty()); - (void)dim; // Not used here auto d1 = (const T*)r1.data(); auto d2 = (const T*)r2.data(); @@ -394,7 +409,44 @@ bool Dimension::overlap( bool Dimension::overlap(const Range& r1, const Range& r2) const { assert(overlap_func_ != nullptr); - return overlap_func_(this, r1, r2); + return overlap_func_(r1, r2); +} + +template +double Dimension::overlap_ratio(const Range& r1, const Range& r2) { + assert(!r1.empty()); + assert(!r2.empty()); + + auto d1 = (const T*)r1.data(); + auto d2 = (const T*)r2.data(); + assert(d1[0] <= d1[1]); + assert(d2[0] <= d2[1]); + + // No overlap + if (d1[0] > d2[1] || d1[1] < d2[0]) + return 0.0; + + // Compute ratio + auto overlap_start = std::max(d1[0], d2[0]); + auto overlap_end = std::min(d1[1], d2[1]); + auto overlap_range = overlap_end - overlap_start; + auto mbr_range = d2[1] - d2[0]; + auto max = std::numeric_limits::max(); + if (std::numeric_limits::is_integer) { + overlap_range += 1; + mbr_range += 1; + } else { + if (overlap_range == 0) + overlap_range = std::nextafter(overlap_range, max); + if (mbr_range == 0) + mbr_range = std::nextafter(mbr_range, max); + } + return (double)overlap_range / mbr_range; +} + +double Dimension::overlap_ratio(const Range& r1, const Range& r2) const { + assert(overlap_ratio_func_ != nullptr); + return overlap_ratio_func_(r1, r2); } template @@ -422,9 +474,7 @@ uint64_t Dimension::tile_num(const Range& range) const { } template -bool Dimension::value_in_range( - const Dimension* dim, const void* value, const Range& range) { - (void)*dim; // Not used here +bool Dimension::value_in_range(const void* value, const Range& range) { assert(value != nullptr); assert(!range.empty()); auto v = (const T*)value; @@ -434,7 +484,7 @@ bool Dimension::value_in_range( bool Dimension::value_in_range(const void* value, const Range& range) const { assert(value_in_range_func_ != nullptr); - return value_in_range_func_(this, value, range); + return value_in_range_func_(value, range); } // ===== FORMAT ===== @@ -494,8 +544,8 @@ Status Dimension::set_domain(const void* domain) { Status Dimension::set_tile_extent(const void* tile_extent) { if (domain_ == nullptr) - return Status::DimensionError( - "Cannot set tile extent; Domain must be set first"); + return LOG_STATUS(Status::DimensionError( + "Cannot set tile extent; Domain must be set first")); // Note: this check was added in release 1.6.0. Older arrays may have been // serialized with a null extent, and so it is still supported internally. @@ -1085,6 +1135,59 @@ void Dimension::set_oob_func() { } } +void Dimension::set_covered_func() { + switch (type_) { + case Datatype::INT32: + covered_func_ = covered; + break; + case Datatype::INT64: + covered_func_ = covered; + break; + case Datatype::INT8: + covered_func_ = covered; + break; + case Datatype::UINT8: + covered_func_ = covered; + break; + case Datatype::INT16: + covered_func_ = covered; + break; + case Datatype::UINT16: + covered_func_ = covered; + break; + case Datatype::UINT32: + covered_func_ = covered; + break; + case Datatype::UINT64: + covered_func_ = covered; + break; + case Datatype::FLOAT32: + covered_func_ = covered; + break; + case Datatype::FLOAT64: + covered_func_ = covered; + break; + case Datatype::DATETIME_YEAR: + case Datatype::DATETIME_MONTH: + case Datatype::DATETIME_WEEK: + case Datatype::DATETIME_DAY: + case Datatype::DATETIME_HR: + case Datatype::DATETIME_MIN: + case Datatype::DATETIME_SEC: + case Datatype::DATETIME_MS: + case Datatype::DATETIME_US: + case Datatype::DATETIME_NS: + case Datatype::DATETIME_PS: + case Datatype::DATETIME_FS: + case Datatype::DATETIME_AS: + covered_func_ = covered; + break; + default: + covered_func_ = nullptr; + break; + } +} + void Dimension::set_overlap_func() { switch (type_) { case Datatype::INT32: @@ -1138,6 +1241,59 @@ void Dimension::set_overlap_func() { } } +void Dimension::set_overlap_ratio_func() { + switch (type_) { + case Datatype::INT32: + overlap_ratio_func_ = overlap_ratio; + break; + case Datatype::INT64: + overlap_ratio_func_ = overlap_ratio; + break; + case Datatype::INT8: + overlap_ratio_func_ = overlap_ratio; + break; + case Datatype::UINT8: + overlap_ratio_func_ = overlap_ratio; + break; + case Datatype::INT16: + overlap_ratio_func_ = overlap_ratio; + break; + case Datatype::UINT16: + overlap_ratio_func_ = overlap_ratio; + break; + case Datatype::UINT32: + overlap_ratio_func_ = overlap_ratio; + break; + case Datatype::UINT64: + overlap_ratio_func_ = overlap_ratio; + break; + case Datatype::FLOAT32: + overlap_ratio_func_ = overlap_ratio; + break; + case Datatype::FLOAT64: + overlap_ratio_func_ = overlap_ratio; + break; + case Datatype::DATETIME_YEAR: + case Datatype::DATETIME_MONTH: + case Datatype::DATETIME_WEEK: + case Datatype::DATETIME_DAY: + case Datatype::DATETIME_HR: + case Datatype::DATETIME_MIN: + case Datatype::DATETIME_SEC: + case Datatype::DATETIME_MS: + case Datatype::DATETIME_US: + case Datatype::DATETIME_NS: + case Datatype::DATETIME_PS: + case Datatype::DATETIME_FS: + case Datatype::DATETIME_AS: + overlap_ratio_func_ = overlap_ratio; + break; + default: + overlap_ratio_func_ = nullptr; + break; + } +} + void Dimension::set_tile_num_func() { switch (type_) { case Datatype::INT32: diff --git a/tiledb/sm/array_schema/dimension.h b/tiledb/sm/array_schema/dimension.h index 9d70616eb37..2f79fc7e96d 100644 --- a/tiledb/sm/array_schema/dimension.h +++ b/tiledb/sm/array_schema/dimension.h @@ -126,7 +126,7 @@ class Dimension { * `tile`. */ template - static void compute_mbr(const Dimension* dim, const Tile& tile, Range* mbr); + static void compute_mbr(const Tile& tile, Range* mbr); /** * Crops the input 1D range such that it does not exceed the @@ -146,14 +146,14 @@ class Dimension { /** Expand 1D range `r` using value `v`. */ template - static void expand_range_v(const Dimension* dim, const void* v, Range* r); + static void expand_range_v(const void* v, Range* r); /** Expand 1D range `r2` using 1D range `r1`. */ void expand_range(const Range& r1, Range* r2) const; /** Expand 1D range `r2` using 1D range `r1`. */ template - static void expand_range(const Dimension* dim, const Range& r1, Range* r2); + static void expand_range(const Range& r1, Range* r2); /** * Expands the input 1D range to coincide with the dimension tiles. @@ -195,12 +195,26 @@ class Dimension { static bool oob( const Dimension* dim, const void* coord, std::string* err_msg); + /** Return true if r1 is fully covered by r2. */ + bool covered(const Range& r1, const Range& r2) const; + + /** Return true if r1 is fully covered by r2. */ + template + static bool covered(const Range& r1, const Range& r2); + /** Return true if the input 1D ranges overlap. */ bool overlap(const Range& r1, const Range& r2) const; /** Return true if the input 1D ranges overlap. */ template - static bool overlap(const Dimension* dim, const Range& r1, const Range& r2); + static bool overlap(const Range& r1, const Range& r2); + + /** Return ratio of the overalp of the two input 1D ranges over `r2`. */ + double overlap_ratio(const Range& r1, const Range& r2) const; + + /** Return ratio of the overalp of the two input 1D ranges over `r2`. */ + template + static double overlap_ratio(const Range& r1, const Range& r2); /** Return the number of tiles the input range intersects. */ uint64_t tile_num(const Range& range) const; @@ -211,8 +225,7 @@ class Dimension { /** Returns `true` if `value` is within the 1D `range`. */ template - static bool value_in_range( - const Dimension* dim, const void* value, const Range& range); + static bool value_in_range(const void* value, const Range& range); /** Returns `true` if `value` is within the 1D `range`. */ bool value_in_range(const void* value, const Range& range) const; @@ -276,8 +289,7 @@ class Dimension { * Stores the appropriate templated compute_mbr() function based on the * dimension datatype. */ - std::function - compute_mbr_func_; + std::function compute_mbr_func_; /** * Stores the appropriate templated crop_range() function based on the @@ -289,15 +301,13 @@ class Dimension { * Stores the appropriate templated expand_range() function based on the * dimension datatype. */ - std::function - expand_range_v_func_; + std::function expand_range_v_func_; /** * Stores the appropriate templated expand_range() function based on the * dimension datatype. */ - std::function - expand_range_func_; + std::function expand_range_func_; /** * Stores the appropriate templated expand_to_tile() function based on the @@ -312,12 +322,23 @@ class Dimension { std::function oob_func_; + /** + * Stores the appropriate templated covered() function based on the + * dimension datatype. + */ + std::function covered_func_; + /** * Stores the appropriate templated overlap() function based on the * dimension datatype. */ - std::function - overlap_func_; + std::function overlap_func_; + + /** + * Stores the appropriate templated overlap_ratio() function based on the + * dimension datatype. + */ + std::function overlap_ratio_func_; /** * Stores the appropriate templated tile_num() function based on the @@ -329,8 +350,7 @@ class Dimension { * Stores the appropriate templated value_in_range() function based on the * dimension datatype. */ - std::function - value_in_range_func_; + std::function value_in_range_func_; /* ********************************* */ /* PRIVATE METHODS */ @@ -431,9 +451,15 @@ class Dimension { /** Sets the templated oob() function. */ void set_oob_func(); + /** Sets the templated covered() function. */ + void set_covered_func(); + /** Sets the templated overlap() function. */ void set_overlap_func(); + /** Sets the templated overlap_ratio() function. */ + void set_overlap_ratio_func(); + /** Sets the templated tile_num() function. */ void set_tile_num_func(); diff --git a/tiledb/sm/array_schema/domain.cc b/tiledb/sm/array_schema/domain.cc index 38454cce58f..a9f716e166e 100644 --- a/tiledb/sm/array_schema/domain.cc +++ b/tiledb/sm/array_schema/domain.cc @@ -601,6 +601,15 @@ void Domain::dump(FILE* out) const { } void Domain::expand_ndrange(const NDRange& r1, NDRange* r2) const { + assert(r2 != nullptr); + + // Assign r1 to r2 if r2 is empty + if (r2->empty()) { + *r2 = r1; + return; + } + + // Expand r2 along all dimensions for (unsigned d = 0; d < dim_num_; ++d) dimensions_[d]->expand_range(r1[d], &(*r2)[d]); } @@ -922,7 +931,22 @@ uint64_t Domain::tile_num(const NDRange& ndrange) const { return ret; } +bool Domain::covered(const NDRange& r1, const NDRange& r2) const { + assert(r1.size() == dim_num_); + assert(r2.size() == dim_num_); + + for (unsigned d = 0; d < dim_num_; ++d) { + if (!dimensions_[d]->covered(r1[d], r2[d])) + return false; + } + + return true; +} + bool Domain::overlap(const NDRange& r1, const NDRange& r2) const { + assert(r1.size() == dim_num_); + assert(r2.size() == dim_num_); + for (unsigned d = 0; d < dim_num_; ++d) { if (!dimensions_[d]->overlap(r1[d], r2[d])) return false; @@ -931,6 +955,28 @@ bool Domain::overlap(const NDRange& r1, const NDRange& r2) const { return true; } +double Domain::overlap_ratio(const NDRange& r1, const NDRange& r2) const { + double ratio = 1.0; + assert(dim_num_ == r1.size()); + assert(dim_num_ == r2.size()); + + for (unsigned d = 0; d < dim_num_; ++d) { + if (!dimensions_[d]->overlap(r1[d], r2[d])) + return 0.0; + + ratio *= dimensions_[d]->overlap_ratio(r1[d], r2[d]); + + // If ratio goes to 0, then the subarray overlap is much smaller than the + // volume of the MBR. Since we have already guaranteed that there is an + // overlap above, we should set the ratio to epsilon. + auto max = std::numeric_limits::max(); + if (ratio == 0) + ratio = std::nextafter(0, max); + } + + return ratio; +} + template int Domain::tile_order_cmp( const Dimension* dim, const void* coord_a, const void* coord_b) { diff --git a/tiledb/sm/array_schema/domain.h b/tiledb/sm/array_schema/domain.h index a2a84930c4e..dbe4bd4a285 100644 --- a/tiledb/sm/array_schema/domain.h +++ b/tiledb/sm/array_schema/domain.h @@ -589,9 +589,18 @@ class Domain { */ uint64_t tile_num(const NDRange& ndrange) const; + /** Returns true if r1 is fully covered by r2. */ + bool covered(const NDRange& r1, const NDRange& r2) const; + /** Returns true if the two ND ranges overlap. */ bool overlap(const NDRange& r1, const NDRange& r2) const; + /** + * Return ratio of the overalp of the two input ND ranges over + * the volume of `r2`. + */ + double overlap_ratio(const NDRange& r1, const NDRange& r2) const; + /** * Checks the tile order of the input coordinates on the given dimension. * diff --git a/tiledb/sm/fragment/fragment_metadata.cc b/tiledb/sm/fragment/fragment_metadata.cc index ec470a417bf..b4e445b0500 100644 --- a/tiledb/sm/fragment/fragment_metadata.cc +++ b/tiledb/sm/fragment/fragment_metadata.cc @@ -67,6 +67,7 @@ FragmentMetadata::FragmentMetadata( , dense_(dense) , fragment_uri_(fragment_uri) , timestamp_range_(timestamp_range) { + rtree_ = RTree(array_schema_->domain(), constants::rtree_fanout); meta_file_size_ = 0; version_ = constants::format_version; tile_index_base_ = 0; @@ -83,11 +84,7 @@ FragmentMetadata::FragmentMetadata( } } -FragmentMetadata::~FragmentMetadata() { - auto mbr_num = (uint64_t)mbrs_.size(); - for (uint64_t i = 0; i < mbr_num; ++i) - std::free(mbrs_[i]); -} +FragmentMetadata::~FragmentMetadata() = default; /* ****************************** */ /* API */ @@ -99,23 +96,8 @@ const URI& FragmentMetadata::array_uri() const { Status FragmentMetadata::set_mbr(uint64_t tile, const NDRange& mbr) { // For easy reference - uint64_t mbr_size = 2 * array_schema_->coords_size(); tile += tile_index_base_; - - // Copy MBR - auto new_mbr = (unsigned char*)std::malloc(mbr_size); - auto dim_num = array_schema_->dim_num(); - uint64_t offset = 0; - for (unsigned d = 0; d < dim_num; ++d) { - std::memcpy(&new_mbr[offset], mbr[d].data(), mbr[d].size()); - offset += mbr[d].size(); - } - - // Set MBR - assert(tile < mbrs_.size()); - mbrs_[tile] = new_mbr; - - // Expand non-empty domain + RETURN_NOT_OK(rtree_.set_leaf(tile, mbr)); return expand_non_empty_domain(mbr); } @@ -213,12 +195,11 @@ Status FragmentMetadata::add_max_buffer_sizes_sparse( RETURN_NOT_OK(load_rtree(encryption_key)); // Get tile overlap - std::vector range; auto dim_num = array_schema_->dim_num(); - range.resize(dim_num); - for (unsigned i = 0; i < dim_num; ++i) - range[i] = &subarray[2 * i]; - auto tile_overlap = rtree_.get_tile_overlap(range); + NDRange range(dim_num); + for (unsigned d = 0; d < dim_num; ++d) + range[d].set_range(&subarray[2 * d], 2 * sizeof(T)); + auto tile_overlap = rtree_.get_tile_overlap(range); uint64_t size = 0; // Handle tile ranges @@ -286,41 +267,17 @@ const URI& FragmentMetadata::fragment_uri() const { return fragment_uri_; } -template Status FragmentMetadata::get_tile_overlap( const EncryptionKey& encryption_key, - const std::vector& range, + const NDRange& range, TileOverlap* tile_overlap) { - auto dim_num = array_schema_->dim_num(); - NDRange ndrange(dim_num); - for (unsigned d = 0; d < dim_num; ++d) { - auto r_size = 2 * array_schema_->dimension(d)->coord_size(); - Range r(range[d], r_size); - ndrange[d] = std::move(r); - } - // Return if the range does not overlap the non-empty domain of the fragment - if (!array_schema_->domain()->overlap(ndrange, non_empty_domain_)) + if (!array_schema_->domain()->overlap(range, non_empty_domain_)) return Status::Ok(); - // Handle version > 2 - if (version_ > 2) { - RETURN_NOT_OK(load_rtree(encryption_key)); - *tile_overlap = rtree_.get_tile_overlap(range); - return Status::Ok(); - } - - // Handle version <= 2 - auto mbr_num = mbrs_.size(); - for (size_t t = 0; t < mbr_num; ++t) { - auto m = (const T*)mbrs_[t]; - auto overlap = RTree::range_overlap(range, m); - if (overlap > 0.0) { - auto to = std::pair(t, overlap); - tile_overlap->tiles_.push_back(to); - } - } - + // Get overlap + RETURN_NOT_OK(load_rtree(encryption_key)); + *tile_overlap = rtree_.get_tile_overlap(range); return Status::Ok(); } @@ -407,10 +364,6 @@ Status FragmentMetadata::load(const EncryptionKey& encryption_key) { return load_v3_or_higher(encryption_key); } -const std::vector FragmentMetadata::mbrs() const { - return mbrs_; -} - Status FragmentMetadata::store(const EncryptionKey& encryption_key) { auto array_uri = this->array_uri(); auto fragment_metadata_uri = @@ -487,7 +440,7 @@ Status FragmentMetadata::set_num_tiles(uint64_t num_tiles) { } if (!dense_) { - mbrs_.resize(num_tiles, nullptr); + rtree_.set_leaf_num(num_tiles); sparse_tile_num_ = num_tiles; } @@ -543,10 +496,14 @@ Status FragmentMetadata::file_var_offset( return Status::Ok(); } -const void* FragmentMetadata::mbr(uint64_t tile_idx) const { +const NDRange& FragmentMetadata::mbr(uint64_t tile_idx) const { return rtree_.leaf(tile_idx); } +const std::vector& FragmentMetadata::mbrs() const { + return rtree_.leaves(); +} + Status FragmentMetadata::persisted_tile_size( const EncryptionKey& encryption_key, const std::string& name, @@ -696,7 +653,7 @@ std::vector FragmentMetadata::compute_overlapping_tile_ids( std::vector tids; auto dim_num = array_schema_->dim_num(); - // TODO: fix + // Temporary domain vector std::vector temp(2 * array_schema_->coords_size()); uint8_t offset = 0; for (unsigned d = 0; d < dim_num; ++d) { @@ -742,7 +699,7 @@ FragmentMetadata::compute_overlapping_tile_ids_cov(const T* subarray) const { std::vector> tids; auto dim_num = array_schema_->dim_num(); - // TODO: fix + // Temporary domain vector std::vector temp(2 * array_schema_->coords_size()); uint8_t offset = 0; for (unsigned d = 0; d < dim_num; ++d) { @@ -841,7 +798,7 @@ Status FragmentMetadata::load_rtree(const EncryptionKey& encryption_key) { read_generic_tile_from_file(encryption_key, gt_offsets_.rtree_, &buff)); ConstBuffer cbuff(&buff); - RETURN_NOT_OK(rtree_.deserialize(&cbuff)); + RETURN_NOT_OK(rtree_.deserialize(&cbuff, array_schema_->domain(), version_)); loaded_metadata_.rtree_ = true; @@ -1036,28 +993,26 @@ Status FragmentMetadata::load_last_tile_cell_num(ConstBuffer* buff) { Status FragmentMetadata::load_mbrs(ConstBuffer* buff) { // Get number of MBRs uint64_t mbr_num = 0; - Status st = buff->read(&mbr_num, sizeof(uint64_t)); - if (!st.ok()) { - return LOG_STATUS(Status::FragmentMetadataError( - "Cannot load fragment metadata; Reading number of MBRs failed")); - } + RETURN_NOT_OK(buff->read(&mbr_num, sizeof(uint64_t))); - // Get MBRs - uint64_t mbr_size = 2 * array_schema_->coords_size(); - void* mbr = nullptr; - mbrs_.resize(mbr_num); - for (uint64_t i = 0; i < mbr_num; ++i) { - mbr = std::malloc(mbr_size); - st = buff->read(mbr, mbr_size); - if (!st.ok()) { - std::free(mbr); - return LOG_STATUS(Status::FragmentMetadataError( - "Cannot load fragment metadata; Reading MBR failed")); + // Set leaf level + rtree_.set_leaf_num(mbr_num); + auto domain = array_schema_->domain(); + auto dim_num = domain->dim_num(); + for (uint64_t m = 0; m < mbr_num; ++m) { + NDRange mbr(dim_num); + for (unsigned d = 0; d < dim_num; ++d) { + auto r_size = 2 * domain->dimension(d)->coord_size(); + mbr[d].set_range(buff->cur_data(), r_size); + buff->advance_offset(r_size); } - mbrs_[i] = mbr; + rtree_.set_leaf(m, mbr); } - sparse_tile_num_ = mbrs_.size(); + // Build R-tree bottom-up + rtree_.build_tree(); + + sparse_tile_num_ = mbr_num; return Status::Ok(); } @@ -1357,14 +1312,6 @@ Status FragmentMetadata::load_sparse_tile_num(ConstBuffer* buff) { return Status::Ok(); } -Status FragmentMetadata::create_rtree() { - auto dim_num = array_schema_->dim_num(); - auto type = array_schema_->domain()->type(); - auto rtree = RTree(type, dim_num, constants::rtree_fanout, mbrs_); - rtree_ = std::move(rtree); - return Status::Ok(); -} - Status FragmentMetadata::load_generic_tile_offsets(ConstBuffer* buff) { if (version_ == 3 || version_ == 4) return load_generic_tile_offsets_v3_v4(buff); @@ -1456,7 +1403,6 @@ Status FragmentMetadata::load_v1_v2(const EncryptionKey& encryption_key) { RETURN_NOT_OK(load_last_tile_cell_num(&cbuff)); RETURN_NOT_OK(load_file_sizes(&cbuff)); RETURN_NOT_OK(load_file_var_sizes(&cbuff)); - RETURN_NOT_OK(create_rtree()); delete buff; @@ -1618,7 +1564,7 @@ Status FragmentMetadata::store_rtree( } Status FragmentMetadata::write_rtree(Buffer* buff) { - RETURN_NOT_OK(create_rtree()); + RETURN_NOT_OK(rtree_.build_tree()); RETURN_NOT_OK(rtree_.serialize(buff)); return Status::Ok(); } @@ -1895,47 +1841,6 @@ template Status FragmentMetadata::add_max_buffer_sizes( std::unordered_map>* buffer_sizes); -template Status FragmentMetadata::get_tile_overlap( - const EncryptionKey& encryption_key, - const std::vector& range, - TileOverlap* tile_overlap); -template Status FragmentMetadata::get_tile_overlap( - const EncryptionKey& encryption_key, - const std::vector& range, - TileOverlap* tile_overlap); -template Status FragmentMetadata::get_tile_overlap( - const EncryptionKey& encryption_key, - const std::vector& range, - TileOverlap* tile_overlap); -template Status FragmentMetadata::get_tile_overlap( - const EncryptionKey& encryption_key, - const std::vector& range, - TileOverlap* tile_overlap); -template Status FragmentMetadata::get_tile_overlap( - const EncryptionKey& encryption_key, - const std::vector& range, - TileOverlap* tile_overlap); -template Status FragmentMetadata::get_tile_overlap( - const EncryptionKey& encryption_key, - const std::vector& range, - TileOverlap* tile_overlap); -template Status FragmentMetadata::get_tile_overlap( - const EncryptionKey& encryption_key, - const std::vector& range, - TileOverlap* tile_overlap); -template Status FragmentMetadata::get_tile_overlap( - const EncryptionKey& encryption_key, - const std::vector& range, - TileOverlap* tile_overlap); -template Status FragmentMetadata::get_tile_overlap( - const EncryptionKey& encryption_key, - const std::vector& range, - TileOverlap* tile_overlap); -template Status FragmentMetadata::get_tile_overlap( - const EncryptionKey& encryption_key, - const std::vector& range, - TileOverlap* tile_overlap); - template std::vector> FragmentMetadata::compute_overlapping_tile_ids_cov( const int8_t* subarray) const; diff --git a/tiledb/sm/fragment/fragment_metadata.h b/tiledb/sm/fragment/fragment_metadata.h index fa71f1cd0f1..55d1fa0200e 100644 --- a/tiledb/sm/fragment/fragment_metadata.h +++ b/tiledb/sm/fragment/fragment_metadata.h @@ -181,14 +181,12 @@ class FragmentMetadata { const URI& fragment_uri() const; /** - * Retrieves the overlap of all MBRs with the input range, which is given - * as a vector of [low, high] intervals per dimension. The encryption + * Retrieves the overlap of all MBRs with the input ND range. The encryption * key is needed because certain metadata may have to be loaded on-the-fly. */ - template Status get_tile_overlap( const EncryptionKey& encryption_key, - const std::vector& range, + const NDRange& range, TileOverlap* tile_overlap); /** @@ -206,9 +204,6 @@ class FragmentMetadata { /** Loads the basic metadata from storage. */ Status load(const EncryptionKey& encryption_key); - /** Returns the MBRs of the fragment. Used in format version <=2. */ - const std::vector mbrs() const; - /** Stores all the metadata to storage. */ Status store(const EncryptionKey& encryption_key); @@ -333,7 +328,10 @@ class FragmentMetadata { uint64_t* offset); /** Returns the MBR of the input tile. */ - const void* mbr(uint64_t tile_idx) const; + const NDRange& mbr(uint64_t tile_idx) const; + + /** Returns all the MBRs of all tiles in the fragment. */ + const std::vector& mbrs() const; /** * Retrieves the size of the tile when it is persisted (e.g. the size of the @@ -484,10 +482,6 @@ class FragmentMetadata { /** Keeps track of which metadata has been loaded. */ LoadedMetadata loaded_metadata_; - // TODO(sp): remove after the new dense algorithm is implemented - /** The MBRs (applicable only to the sparse case with irregular tiles). */ - std::vector mbrs_; - /** The size of the fragment metadata file. */ uint64_t meta_file_size_; @@ -574,9 +568,6 @@ class FragmentMetadata { template std::vector compute_overlapping_tile_ids(const T* subarray) const; - /** Creates an RTree (stored in `rtree_`) on top of `mbrs_`. */ - Status create_rtree(); - /** * Retrieves the tile domain for the input `subarray` based on the expanded * `domain_`. diff --git a/tiledb/sm/misc/utils.cc b/tiledb/sm/misc/utils.cc index 7c79bb46a25..90d0aa674ed 100644 --- a/tiledb/sm/misc/utils.cc +++ b/tiledb/sm/misc/utils.cc @@ -659,35 +659,6 @@ inline bool rect_in_rect( return true; } -template -inline bool rect_in_rect( - const T* rect_a, const NDRange& rect_b, unsigned int dim_num) { - for (unsigned d = 0; d < dim_num; ++d) { - auto rb = (const T*)rect_b[d].data(); - if (rect_a[2 * d] < rb[0] || rect_a[2 * d] > rb[1] || - rect_a[2 * d + 1] < rb[0] || rect_a[2 * d + 1] > rb[1]) - return false; - } - - return true; -} - -template -void compute_mbr_union( - unsigned dim_num, const T* mbrs, uint64_t mbr_num, T* mbr_union) { - // Sanity check - if (dim_num == 0 || mbr_num == 0) - return; - - // Set the first rectangle to the union - auto mbr_size = 2 * dim_num * sizeof(T); - std::memcpy(mbr_union, mbrs, mbr_size); - - // Expand the union with every other MBR - for (uint64_t i = 1; i < mbr_num; ++i) - expand_mbr_with_mbr(mbr_union, &mbrs[i * 2 * dim_num], dim_num); -} - template void expand_mbr_with_mbr(T* mbr_a, const T* mbr_b, unsigned int dim_num) { for (unsigned int i = 0; i < dim_num; ++i) { @@ -921,27 +892,6 @@ template bool rect_in_rect( template bool rect_in_rect( const uint64_t* rect_a, const uint64_t* rect_b, unsigned int dim_num); -template bool rect_in_rect( - const int* rect_a, const NDRange& rect_b, unsigned int dim_num); -template bool rect_in_rect( - const int64_t* rect_a, const NDRange& rect_b, unsigned int dim_num); -template bool rect_in_rect( - const float* react_a, const NDRange& rect_b, unsigned int dim_num); -template bool rect_in_rect( - const double* rect_a, const NDRange& rect_b, unsigned int dim_num); -template bool rect_in_rect( - const int8_t* rect_a, const NDRange& rect_b, unsigned int dim_num); -template bool rect_in_rect( - const uint8_t* rect_a, const NDRange& rect_b, unsigned int dim_num); -template bool rect_in_rect( - const int16_t* rect_a, const NDRange& rect_b, unsigned int dim_num); -template bool rect_in_rect( - const uint16_t* rect_a, const NDRange& rect_b, unsigned int dim_num); -template bool rect_in_rect( - const uint32_t* rect_a, const NDRange& rect_b, unsigned int dim_num); -template bool rect_in_rect( - const uint64_t* rect_a, const NDRange& rect_b, unsigned int dim_num); - template void expand_mbr_with_mbr( int* mbr_a, const int* mbr_b, unsigned int dim_num); template void expand_mbr_with_mbr( @@ -1097,48 +1047,6 @@ template double coverage( template double coverage( const double* a, const double* b, unsigned dim_num); -template void compute_mbr_union( - unsigned dim_num, const int8_t* mbrs, uint64_t mbr_num, int8_t* mbr_union); -template void compute_mbr_union( - unsigned dim_num, - const uint8_t* mbrs, - uint64_t mbr_num, - uint8_t* mbr_union); -template void compute_mbr_union( - unsigned dim_num, - const int16_t* mbrs, - uint64_t mbr_num, - int16_t* mbr_union); -template void compute_mbr_union( - unsigned dim_num, - const uint16_t* mbrs, - uint64_t mbr_num, - uint16_t* mbr_union); -template void compute_mbr_union( - unsigned dim_num, - const int32_t* mbrs, - uint64_t mbr_num, - int32_t* mbr_union); -template void compute_mbr_union( - unsigned dim_num, - const uint32_t* mbrs, - uint64_t mbr_num, - uint32_t* mbr_union); -template void compute_mbr_union( - unsigned dim_num, - const int64_t* mbrs, - uint64_t mbr_num, - int64_t* mbr_union); -template void compute_mbr_union( - unsigned dim_num, - const uint64_t* mbrs, - uint64_t mbr_num, - uint64_t* mbr_union); -template void compute_mbr_union( - unsigned dim_num, const float* mbrs, uint64_t mbr_num, float* mbr_union); -template void compute_mbr_union( - unsigned dim_num, const double* mbrs, uint64_t mbr_num, double* mbr_union); - } // namespace geometry namespace math { diff --git a/tiledb/sm/misc/utils.h b/tiledb/sm/misc/utils.h index 3ccd7323f4d..bc2c205043a 100644 --- a/tiledb/sm/misc/utils.h +++ b/tiledb/sm/misc/utils.h @@ -256,18 +256,6 @@ bool coords_in_rect( template bool rect_in_rect(const T* rect_a, const T* rect_b, unsigned int dim_num); -/** - * Checks if `rect_a` is inside `rect_b`. - * - * @tparam T The domain type. - * @param rect_a The first rectangle. - * @param rect_b The second rectangle. - * @param dim_num The number of dimensions. - * @return `true` if `rect_a` is inside `rect_b` and `false` otherwise. - */ -template -bool rect_in_rect(const T* rect_a, const NDRange& rect_b, unsigned int dim_num); - /** * Computes the union of a set of MBRs (rectangles). * diff --git a/tiledb/sm/query/reader.cc b/tiledb/sm/query/reader.cc index 56180b377de..372b24c46bb 100644 --- a/tiledb/sm/query/reader.cc +++ b/tiledb/sm/query/reader.cc @@ -677,8 +677,6 @@ Status Reader::compute_range_result_coords( return Status::Ok(); } -// TODO: remove template 1 -template Status Reader::compute_range_result_coords( const std::vector& single_fragment, const std::map, size_t>& result_tile_map, @@ -690,7 +688,7 @@ Status Reader::compute_range_result_coords( auto statuses = parallel_for(0, range_num, [&](uint64_t r) { // Compute overlapping coordinates per range - RETURN_NOT_OK(compute_range_result_coords( + RETURN_NOT_OK(compute_range_result_coords( r, result_tile_map, result_tiles, &((*range_result_coords)[r]))); // Potentially sort for deduping purposes (for the case of updates) @@ -717,8 +715,6 @@ Status Reader::compute_range_result_coords( return Status::Ok(); } -// TODO: remove template 2 -template Status Reader::compute_range_result_coords( uint64_t range_idx, const std::map, size_t>& result_tile_map, @@ -751,7 +747,7 @@ Status Reader::compute_range_result_coords( // Add results only if the sparse tile MBR is not fully // covered by a more recent fragment's non-empty domain // TODO: remove template - if (!sparse_tile_overwritten(f, i)) + if (!sparse_tile_overwritten(f, i)) RETURN_NOT_OK(get_all_result_coords(&tile, range_result_coords)); } ++tr; @@ -766,7 +762,7 @@ Status Reader::compute_range_result_coords( // Add results only if the sparse tile MBR is not fully // covered by a more recent fragment's non-empty domain // TODO: remove template - if (!sparse_tile_overwritten(f, t->first)) + if (!sparse_tile_overwritten(f, t->first)) RETURN_NOT_OK(get_all_result_coords(&tile, range_result_coords)); } else { // Partial overlap auto ndrange = subarray.ndrange(range_idx); @@ -1313,7 +1309,7 @@ Status Reader::compute_result_coords( // Compute the read coordinates for all fragments for each subarray range std::vector> range_result_coords; - RETURN_CANCEL_OR_ERROR(compute_range_result_coords( + RETURN_CANCEL_OR_ERROR(compute_range_result_coords( single_fragment, result_tile_map, result_tiles, &range_result_coords)); result_tile_map.clear(); @@ -1987,18 +1983,16 @@ void Reader::zero_out_buffer_sizes() { } } -template bool Reader::sparse_tile_overwritten( unsigned frag_idx, uint64_t tile_idx) const { - auto mbr = (const T*)fragment_metadata_[frag_idx]->mbr(tile_idx); - assert(mbr != nullptr); - auto fragment_num = fragment_metadata_.size(); - auto dim_num = array_schema_->dim_num(); + const auto& mbr = fragment_metadata_[frag_idx]->mbr(tile_idx); + assert(!mbr.empty()); + auto fragment_num = (unsigned)fragment_metadata_.size(); + auto domain = array_schema_->domain(); for (unsigned f = frag_idx + 1; f < fragment_num; ++f) { if (fragment_metadata_[f]->dense() && - utils::geometry::rect_in_rect( - mbr, fragment_metadata_[f]->non_empty_domain(), dim_num)) + domain->covered(mbr, fragment_metadata_[f]->non_empty_domain())) return true; } diff --git a/tiledb/sm/query/reader.h b/tiledb/sm/query/reader.h index 7e2c7cda271..6f7ee3a82c6 100644 --- a/tiledb/sm/query/reader.h +++ b/tiledb/sm/query/reader.h @@ -573,7 +573,6 @@ class Reader { * Computes the result coordinates for each range of the query * subarray. * - * @tparam T The domain type. * @param single_fragment For each range, it indicates whether all * result coordinates come from a single fragment. * @param result_tile_map This is an auxialiary map that helps finding the @@ -583,7 +582,6 @@ class Reader { * It contains a vector for each range of the subarray. * @return Status */ - template Status compute_range_result_coords( const std::vector& single_fragment, const std::map, size_t>& result_tile_map, @@ -594,7 +592,6 @@ class Reader { * Computes the result coordinates of a given range of the query * subarray. * - * @tparam T The domain type. * @param range_idx The range to focus on. * @param result_tile_map This is an auxialiary map that helps finding the * result_tiles overlapping with each range. @@ -603,7 +600,6 @@ class Reader { * It contains a vector for each range of the subarray. * @return Status */ - template Status compute_range_result_coords( uint64_t range_idx, const std::map, size_t>& result_tile_map, @@ -985,7 +981,6 @@ class Reader { * Returns true if the input tile's MBR of the input fragment is fully * covered by the non-empty domain of a more recent fragment. */ - template bool sparse_tile_overwritten(unsigned frag_idx, uint64_t tile_idx) const; /** diff --git a/tiledb/sm/rtree/rtree.cc b/tiledb/sm/rtree/rtree.cc index be5f68237d6..8f40e22d30a 100644 --- a/tiledb/sm/rtree/rtree.cc +++ b/tiledb/sm/rtree/rtree.cc @@ -31,6 +31,7 @@ */ #include "tiledb/sm/rtree/rtree.h" +#include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/buffer/buffer.h" #include "tiledb/sm/buffer/const_buffer.h" #include "tiledb/sm/enums/datatype.h" @@ -49,20 +50,13 @@ namespace sm { /* ****************************** */ RTree::RTree() { - dim_num_ = 0; + domain_ = nullptr; fanout_ = 0; - type_ = Datatype::INT32; } -RTree::RTree( - Datatype type, - unsigned dim_num, - unsigned fanout, - const std::vector& mbrs) - : dim_num_(dim_num) - , fanout_(fanout) - , type_(type) { - build_tree(mbrs); +RTree::RTree(const Domain* domain, unsigned fanout) + : domain_(domain) + , fanout_(fanout) { } RTree::~RTree() = default; @@ -95,62 +89,85 @@ RTree& RTree::operator=(RTree&& rtree) noexcept { /* API */ /* ****************************** */ +Status RTree::build_tree() { + if (levels_.empty()) + return Status::Ok(); + + assert(levels_.size() == 1); + + auto leaf_num = levels_[0].size(); + if (leaf_num == 1) + return Status::Ok(); + + // Build the tree bottom up + auto height = (size_t)ceil(utils::math::log(fanout_, leaf_num)) + 1; + for (size_t i = 0; i < height - 1; ++i) { + auto new_level = build_level(levels_.back()); + levels_.emplace_back(new_level); + } + + // Make the root as the first level + std::reverse(std::begin(levels_), std::end(levels_)); + + return Status::Ok(); +} + unsigned RTree::dim_num() const { - return dim_num_; + return (domain_ == nullptr) ? 0 : domain_->dim_num(); +} + +const Domain* RTree::domain() const { + return domain_; } unsigned RTree::fanout() const { return fanout_; } -template -TileOverlap RTree::get_tile_overlap(const std::vector& range) const { +TileOverlap RTree::get_tile_overlap(const NDRange& range) const { TileOverlap overlap; // Empty tree - if (dim_num_ == 0 || levels_.empty()) + if (domain_ == nullptr || levels_.empty()) return overlap; // This will keep track of the traversal std::list traversal; traversal.push_front({0, 0}); - auto leaf_num = levels_.back().mbr_num_; + auto leaf_num = levels_.back().size(); auto height = this->height(); - uint64_t mbr_size = 2 * dim_num_ * datatype_size(type_); while (!traversal.empty()) { // Get next entry auto entry = traversal.front(); traversal.pop_front(); - auto level = entry.level_; - auto mbr_idx = entry.mbr_idx_; - auto offset = entry.mbr_idx_ * mbr_size; - auto mbr = levels_[level].mbrs_.data() + offset; + const auto& mbr = levels_[entry.level_][entry.mbr_idx_]; - // Get - auto ratio = this->range_overlap(range, (T*)mbr); + // Get overlap ratio + auto ratio = domain_->overlap_ratio(range, mbr); // If there is overlap if (ratio != 0.0) { // If there is full overlap if (ratio == 1.0) { - auto subtree_leaf_num = this->subtree_leaf_num(level); + auto subtree_leaf_num = this->subtree_leaf_num(entry.level_); assert(subtree_leaf_num > 0); - uint64_t start = mbr_idx * subtree_leaf_num; + uint64_t start = entry.mbr_idx_ * subtree_leaf_num; uint64_t end = start + std::min(subtree_leaf_num, leaf_num - start) - 1; auto tile_range = std::pair(start, end); overlap.tile_ranges_.emplace_back(tile_range); } else { // Partial overlap // If this is the leaf level, insert into results - if (level == height - 1) { - auto mbr_idx_ratio = std::pair(mbr_idx, ratio); + if (entry.level_ == height - 1) { + auto mbr_idx_ratio = + std::pair(entry.mbr_idx_, ratio); overlap.tiles_.emplace_back(mbr_idx_ratio); } else { // Insert all "children" to traversal - auto next_mbr_num = levels_[level + 1].mbr_num_; - auto start = mbr_idx * fanout_; + auto next_mbr_num = (uint64_t)levels_[entry.level_ + 1].size(); + auto start = entry.mbr_idx_ * fanout_; auto end = std::min(start + fanout_ - 1, next_mbr_num - 1); for (uint64_t i = start; i <= end; ++i) - traversal.push_front({level + 1, end - (i - start)}); + traversal.push_front({entry.level_ + 1, end - (i - start)}); } } } @@ -163,55 +180,14 @@ unsigned RTree::height() const { return (unsigned)levels_.size(); } -const void* RTree::leaf(uint64_t leaf_idx) const { - if (leaf_idx >= levels_.back().mbr_num_) - return nullptr; - assert(leaf_idx < levels_.back().mbr_num_); - - uint64_t mbr_size = 2 * dim_num_ * datatype_size(type_); - return (const void*)&(levels_.back().mbrs_[leaf_idx * mbr_size]); +const NDRange& RTree::leaf(uint64_t leaf_idx) const { + assert(leaf_idx < levels_.back().size()); + return levels_.back()[leaf_idx]; } -template -double RTree::range_overlap(const std::vector& range, const T* mbr) { - double ratio = 1.0; - auto dim_num = (unsigned)range.size(); - - for (unsigned i = 0; i < dim_num; ++i) { - assert(range[i][0] <= range[i][1]); - assert(mbr[2 * i] <= mbr[2 * i + 1]); - - // No overlap - if (range[i][0] > mbr[2 * i + 1] || range[i][1] < mbr[2 * i]) { - ratio = 0.0; - break; - } - - // Update ratio - auto overlap_start = std::max(range[i][0], mbr[2 * i]); - auto overlap_end = std::min(range[i][1], mbr[2 * i + 1]); - auto overlap_range = overlap_end - overlap_start; - auto mbr_range = mbr[2 * i + 1] - mbr[2 * i]; - auto max = std::numeric_limits::max(); - if (std::numeric_limits::is_integer) { - overlap_range += 1; - mbr_range += 1; - } else { - if (overlap_range == 0) - overlap_range = std::nextafter(overlap_range, max); - if (mbr_range == 0) - mbr_range = std::nextafter(mbr_range, max); - } - ratio *= (double)overlap_range / mbr_range; - - // If ratio goes to 0, then the subarray overlap is much smaller than the - // volume of the MBR. Since we have already guaranteed that there is an - // overlap above, we should set the ratio to epsilon. - if (ratio == 0) - ratio = std::nextafter(0, max); - } - - return ratio; +const std::vector& RTree::leaves() const { + assert(!levels_.empty()); + return levels_.back(); } uint64_t RTree::subtree_leaf_num(uint64_t level) const { @@ -227,234 +203,162 @@ uint64_t RTree::subtree_leaf_num(uint64_t level) const { return leaf_num; } -Datatype RTree::type() const { - return type_; -} - Status RTree::serialize(Buffer* buff) const { - RETURN_NOT_OK(buff->write(&dim_num_, sizeof(dim_num_))); RETURN_NOT_OK(buff->write(&fanout_, sizeof(fanout_))); - auto type = (uint8_t)type_; - RETURN_NOT_OK(buff->write(&type, sizeof(type))); auto level_num = (unsigned)levels_.size(); RETURN_NOT_OK(buff->write(&level_num, sizeof(level_num))); - for (unsigned i = 0; i < level_num; ++i) { - auto mbr_num = levels_[i].mbr_num_; - auto mbrs_size = levels_[i].mbrs_.size(); - auto mbrs = levels_[i].mbrs_.data(); + for (unsigned l = 0; l < level_num; ++l) { + auto mbr_num = (uint64_t)levels_[l].size(); RETURN_NOT_OK(buff->write(&mbr_num, sizeof(uint64_t))); - RETURN_NOT_OK(buff->write(mbrs, mbrs_size)); + for (uint64_t m = 0; m < mbr_num; ++m) { + for (const auto& r : levels_[l][m]) { + RETURN_NOT_OK(buff->write(r.data(), r.size())); + } + } } return Status::Ok(); } -Status RTree::deserialize(ConstBuffer* cbuff) { - RETURN_NOT_OK(cbuff->read(&dim_num_, sizeof(dim_num_))); - RETURN_NOT_OK(cbuff->read(&fanout_, sizeof(fanout_))); - uint8_t type; - RETURN_NOT_OK(cbuff->read(&type, sizeof(uint8_t))); - type_ = (Datatype)type; - unsigned level_num; - RETURN_NOT_OK(cbuff->read(&level_num, sizeof(level_num))); +Status RTree::set_leaf(uint64_t leaf_id, const NDRange& mbr) { + if (levels_.size() != 1) + return LOG_STATUS(Status::RTreeError( + "Cannot set leaf; There are more than one levels in the tree")); - levels_.clear(); - levels_.resize(level_num); - uint64_t mbr_size = 2 * dim_num_ * datatype_size(type_); - uint64_t mbr_num; - for (unsigned i = 0; i < level_num; ++i) { - RETURN_NOT_OK(cbuff->read(&mbr_num, sizeof(uint64_t))); - levels_[i].mbr_num_ = mbr_num; - auto mbrs_size = mbr_num * mbr_size; - levels_[i].mbrs_.resize(mbrs_size); - RETURN_NOT_OK(cbuff->read(&levels_[i].mbrs_[0], mbrs_size)); - } + if (leaf_id >= levels_[0].size()) + return LOG_STATUS( + Status::RTreeError("Cannot set leaf; Invalid lead index")); + + levels_[0][leaf_id] = mbr; return Status::Ok(); } -/* ****************************** */ -/* PRIVATE METHODS */ -/* ****************************** */ - -Status RTree::build_tree(const std::vector& mbrs) { - switch (type_) { - case Datatype::INT8: - return build_tree(mbrs); - case Datatype::UINT8: - return build_tree(mbrs); - case Datatype::INT16: - return build_tree(mbrs); - case Datatype::UINT16: - return build_tree(mbrs); - case Datatype::INT32: - return build_tree(mbrs); - case Datatype::UINT32: - return build_tree(mbrs); - case Datatype::INT64: - return build_tree(mbrs); - case Datatype::UINT64: - return build_tree(mbrs); - case Datatype::FLOAT32: - return build_tree(mbrs); - case Datatype::FLOAT64: - return build_tree(mbrs); - case Datatype::DATETIME_YEAR: - case Datatype::DATETIME_MONTH: - case Datatype::DATETIME_WEEK: - case Datatype::DATETIME_DAY: - case Datatype::DATETIME_HR: - case Datatype::DATETIME_MIN: - case Datatype::DATETIME_SEC: - case Datatype::DATETIME_MS: - case Datatype::DATETIME_US: - case Datatype::DATETIME_NS: - case Datatype::DATETIME_PS: - case Datatype::DATETIME_FS: - case Datatype::DATETIME_AS: - return build_tree(mbrs); - default: - assert(false); - return LOG_STATUS( - Status::RTreeError("Cannot build R-Tree; Unsupported type")); - } - +Status RTree::set_leaves(const std::vector& mbrs) { + levels_.clear(); + levels_.resize(1); + levels_[0] = mbrs; return Status::Ok(); } -template -Status RTree::build_tree(const std::vector& mbrs) { - // Handle empty tree - if (mbrs.empty()) - return Status::Ok(); +Status RTree::set_leaf_num(uint64_t num) { + // There should be exactly one level (the leaf level) + if (levels_.size() != 1) + levels_.resize(1); - // Build leaf level - auto leaf_level = build_leaf_level(mbrs); - auto leaf_num = leaf_level.mbr_num_; - levels_.push_back(std::move(leaf_level)); - if (leaf_num == 1) - return Status::Ok(); - - // Build rest of the tree bottom up - auto height = (size_t)ceil(utils::math::log(fanout_, leaf_num)) + 1; - for (size_t i = 0; i < height - 1; ++i) { - auto new_level = build_level(levels_.back()); - levels_.emplace_back(new_level); - } - - // Make the root as the first level - std::reverse(std::begin(levels_), std::end(levels_)); + if (num < levels_[0].size()) + return LOG_STATUS( + Status::RTreeError("Cannot set number of leaves; provided number " + "cannot be smaller than the current leaf number")); + levels_[0].resize(num); return Status::Ok(); } -RTree::Level RTree::build_leaf_level(const std::vector& mbrs) { - assert(!mbrs.empty()); - - Level new_level; - - // Allocate space - uint64_t mbr_size = 2 * dim_num_ * datatype_size(type_); - uint64_t leaf_level_size = mbrs.size() * mbr_size; - new_level.mbr_num_ = mbrs.size(); - new_level.mbrs_.resize(leaf_level_size); - - // Copy MBRs - uint64_t offset = 0; - for (auto mbr : mbrs) { - auto copy_loc = new_level.mbrs_.data() + offset; - std::memcpy(copy_loc, mbr, mbr_size); - offset += mbr_size; - } - - return new_level; +Status RTree::deserialize( + ConstBuffer* cbuff, const Domain* domain, uint32_t version) { + if (version < 5) + return deserialize_v1_v4(cbuff, domain); + return deserialize_v5(cbuff, domain); } -template +/* ****************************** */ +/* PRIVATE METHODS */ +/* ****************************** */ + RTree::Level RTree::build_level(const Level& level) { - Level new_level; - - uint64_t mbr_size = 2 * dim_num_ * datatype_size(type_); - new_level.mbr_num_ = (uint64_t)ceil((double)level.mbr_num_ / fanout_); - uint64_t new_level_size = new_level.mbr_num_ * mbr_size; - new_level.mbrs_.resize(new_level_size); - - uint64_t mbrs_visited = 0, offset_level = 0, offset_new_level = 0; - for (uint64_t i = 0; i < new_level.mbr_num_; ++i) { - auto mbr_num = std::min(fanout_, level.mbr_num_ - mbrs_visited); - auto mbr_at = (T*)(level.mbrs_.data() + offset_level); - auto union_loc = (T*)(new_level.mbrs_.data() + offset_new_level); - utils::geometry::compute_mbr_union(dim_num_, mbr_at, mbr_num, union_loc); - mbrs_visited += mbr_num; - offset_level += mbr_num * mbr_size; - offset_new_level += mbr_size; + auto cur_mbr_num = (uint64_t)level.size(); + Level new_level((uint64_t)ceil((double)cur_mbr_num / fanout_)); + auto new_mbr_num = (uint64_t)new_level.size(); + + uint64_t mbrs_visited = 0; + for (uint64_t i = 0; i < new_mbr_num; ++i) { + auto mbr_num = std::min((uint64_t)fanout_, cur_mbr_num - mbrs_visited); + for (uint64_t j = 0; j < mbr_num; ++j, ++mbrs_visited) + domain_->expand_ndrange(level[mbrs_visited], &new_level[i]); } - assert(mbrs_visited == level.mbr_num_); return new_level; } RTree RTree::clone() const { RTree clone; - clone.dim_num_ = dim_num_; + clone.domain_ = domain_; clone.fanout_ = fanout_; - clone.type_ = type_; clone.levels_ = levels_; return clone; } +Status RTree::deserialize_v1_v4(ConstBuffer* cbuff, const Domain* domain) { + // For backwards compatibility, they will be ignored + unsigned dim_num_i; + uint8_t type_i; + + RETURN_NOT_OK(cbuff->read(&dim_num_i, sizeof(dim_num_i))); + RETURN_NOT_OK(cbuff->read(&fanout_, sizeof(fanout_))); + RETURN_NOT_OK(cbuff->read(&type_i, sizeof(type_i))); + unsigned level_num; + RETURN_NOT_OK(cbuff->read(&level_num, sizeof(level_num))); + + levels_.clear(); + levels_.resize(level_num); + auto dim_num = domain->dim_num(); + uint64_t mbr_num; + for (unsigned l = 0; l < level_num; ++l) { + RETURN_NOT_OK(cbuff->read(&mbr_num, sizeof(uint64_t))); + levels_[l].resize(mbr_num); + for (uint64_t m = 0; m < mbr_num; ++m) { + levels_[l][m].resize(dim_num); + for (unsigned d = 0; d < dim_num; ++d) { + auto r_size = 2 * domain->dimension(d)->coord_size(); + levels_[l][m][d].set_range(cbuff->cur_data(), r_size); + cbuff->advance_offset(r_size); + } + } + } + + domain_ = domain; + + return Status::Ok(); +} + +Status RTree::deserialize_v5(ConstBuffer* cbuff, const Domain* domain) { + RETURN_NOT_OK(cbuff->read(&fanout_, sizeof(fanout_))); + unsigned level_num; + RETURN_NOT_OK(cbuff->read(&level_num, sizeof(level_num))); + + if (level_num == 0) + return Status::Ok(); + + levels_.clear(); + levels_.resize(level_num); + auto dim_num = domain->dim_num(); + uint64_t mbr_num; + for (unsigned l = 0; l < level_num; ++l) { + RETURN_NOT_OK(cbuff->read(&mbr_num, sizeof(uint64_t))); + levels_[l].resize(mbr_num); + for (uint64_t m = 0; m < mbr_num; ++m) { + levels_[l][m].resize(dim_num); + for (unsigned d = 0; d < dim_num; ++d) { + auto r_size = 2 * domain->dimension(d)->coord_size(); + levels_[l][m][d].set_range(cbuff->cur_data(), r_size); + cbuff->advance_offset(r_size); + } + } + } + + domain_ = domain; + + return Status::Ok(); +} + void RTree::swap(RTree& rtree) { - std::swap(dim_num_, rtree.dim_num_); + std::swap(domain_, rtree.domain_); std::swap(fanout_, rtree.fanout_); - std::swap(type_, rtree.type_); std::swap(levels_, rtree.levels_); } -// Explicit template instantiations - -template TileOverlap RTree::get_tile_overlap( - const std::vector& range) const; -template TileOverlap RTree::get_tile_overlap( - const std::vector& range) const; -template TileOverlap RTree::get_tile_overlap( - const std::vector& range) const; -template TileOverlap RTree::get_tile_overlap( - const std::vector& range) const; -template TileOverlap RTree::get_tile_overlap( - const std::vector& range) const; -template TileOverlap RTree::get_tile_overlap( - const std::vector& range) const; -template TileOverlap RTree::get_tile_overlap( - const std::vector& range) const; -template TileOverlap RTree::get_tile_overlap( - const std::vector& range) const; -template TileOverlap RTree::get_tile_overlap( - const std::vector& range) const; -template TileOverlap RTree::get_tile_overlap( - const std::vector& range) const; - -template double RTree::range_overlap( - const std::vector& range, const int8_t* mbr); -template double RTree::range_overlap( - const std::vector& range, const uint8_t* mbr); -template double RTree::range_overlap( - const std::vector& range, const int16_t* mbr); -template double RTree::range_overlap( - const std::vector& range, const uint16_t* mbr); -template double RTree::range_overlap( - const std::vector& range, const int32_t* mbr); -template double RTree::range_overlap( - const std::vector& range, const uint32_t* mbr); -template double RTree::range_overlap( - const std::vector& range, const int64_t* mbr); -template double RTree::range_overlap( - const std::vector& range, const uint64_t* mbr); -template double RTree::range_overlap( - const std::vector& range, const float* mbr); -template double RTree::range_overlap( - const std::vector& range, const double* mbr); - } // namespace sm } // namespace tiledb diff --git a/tiledb/sm/rtree/rtree.h b/tiledb/sm/rtree/rtree.h index 9ff449d4d71..959c822a6fd 100644 --- a/tiledb/sm/rtree/rtree.h +++ b/tiledb/sm/rtree/rtree.h @@ -35,6 +35,7 @@ #include +#include "tiledb/sm/array_schema/domain.h" #include "tiledb/sm/misc/status.h" #include "tiledb/sm/misc/tile_overlap.h" @@ -61,17 +62,8 @@ class RTree { /** Constructor. */ RTree(); - /** - * Constructor. This admits a list of sorted MBRs that will - * constitute the leaf level of the tree. The constructor will - * construct bottom up the tree based on these ``mbrs``. - * The input MBRs will be copied into the leaf level. - */ - RTree( - Datatype type, - unsigned dim_num, - unsigned fanout, - const std::vector& mbrs); + /** Constructor. */ + RTree(const Domain* domain, unsigned fanout); /** Destructor. */ ~RTree(); @@ -92,9 +84,15 @@ class RTree { /* API */ /* ********************************* */ - /** Returns the number of dimensions. */ + /** Builds the RTree bottom-up on the current leaf level. */ + Status build_tree(); + + /** The number of dimensions of the R-tree. */ unsigned dim_num() const; + /** Returns the domain. */ + const Domain* domain() const; + /** Returns the fanout. */ unsigned fanout() const; @@ -102,21 +100,16 @@ class RTree { * Returns the tile overlap of the input range with the MBRs stored * in the RTree. */ - template - TileOverlap get_tile_overlap(const std::vector& range) const; + TileOverlap get_tile_overlap(const NDRange& range) const; /** Returns the tree height. */ unsigned height() const; /** Returns the leaf MBR with the input index. */ - const void* leaf(uint64_t leaf_idx) const; + const NDRange& leaf(uint64_t leaf_idx) const; - /** - * Returns the overlap between a range and an RTree MBR, as the ratio - * of the volume of the overlap over the volume of the MBR. - */ - template - static double range_overlap(const std::vector& range, const T* mbr); + /** Returns the leaves of the tree. */ + const std::vector& leaves() const; /** * Returns the number of leaves that are stored in a (full) subtree @@ -124,14 +117,40 @@ class RTree { */ uint64_t subtree_leaf_num(uint64_t level) const; - /** Returns the datatype of the R-Tree. */ - Datatype type() const; - - /** Serializes the contents of the object to the input buffer. */ + /** + * Serializes the contents of the object to the input buffer. + * Note that `domain_` is not serialized in the buffer. + */ Status serialize(Buffer* buff) const; - /** Deserializes the contents of the object from the input buffer. */ - Status deserialize(ConstBuffer* cbuff); + /** + * Sets an MBR as a leaf in the tree. The function will error out + * if the number of levels in the tree is different from exactly + * 1 (the leaf level), and if `leaf_id` is out of bounds / invalid. + */ + Status set_leaf(uint64_t leaf_id, const NDRange& mbr); + + /** + * Sets the input MBRs as leaves. This will destroy the existing + * RTree. + */ + Status set_leaves(const std::vector& mbrs); + + /** + * Resizes the leaf level. It destroys the upper levels + * of the tree if they exist. + * It errors if `num` is smaller than the current number + * of leaves. + */ + Status set_leaf_num(uint64_t num); + + /** + * Deserializes the contents of the object from the input buffer based + * on the format version. + * It also sets the input domain, as that is not serialized. + */ + Status deserialize( + ConstBuffer* cbuff, const Domain* domain, uint32_t version); private: /* ********************************* */ @@ -160,17 +179,10 @@ class RTree { * `levels_`, where the first level is the root. This is how * we can infer which tree level each `Level` object corresponds to. */ - struct Level { - /** Number of MBRs in the level (across all nodes in the level). */ - uint64_t mbr_num_ = 0; - /** The serialized MBRs of the level, in the form - * ``(low_1, high_1), ..., (low_d, high_d)`` where ``d`` is - * the number of dimensions. - */ - std::vector mbrs_; - }; + typedef std::vector Level; - /** Defines an R-Tree level entry, which corresponds to a node + /** + * Defines an R-Tree level entry, which corresponds to a node * at a particular level. It stores the level the entry belongs * to, as well as the starting index of the first MBR in the * corresponding R-Tree node. @@ -186,15 +198,12 @@ class RTree { /* PRIVATE ATTRIBUTES */ /* ********************************* */ - /** The number of dimensions. */ - unsigned dim_num_; + /** The domain. */ + const Domain* domain_; /** The fanout of the tree. */ unsigned fanout_; - /** The data type. */ - Datatype type_; - /** * The tree levels. The first level is the root. Note that the root * always consists of a single MBR. @@ -205,23 +214,30 @@ class RTree { /* PRIVATE METHODS */ /* ********************************* */ - /** Builds the RTree bottom-up on the input MBRs. */ - Status build_tree(const std::vector& mbrs); - - /** Builds the RTree bottom-up on the input MBRs. */ - template - Status build_tree(const std::vector& mbrs); - - /** Builds the tree leaf level using the input mbrs. */ - Level build_leaf_level(const std::vector& mbrs); - /** Builds a single tree level on top of the input level. */ - template Level build_level(const Level& level); /** Returns a deep copy of this RTree. */ RTree clone() const; + /** + * Deserializes the contents of the object from the input buffer based + * on the format version. + * It also sets the input domain, as that is not serialized. + * + * Applicable to versions 1-4 + */ + Status deserialize_v1_v4(ConstBuffer* cbuff, const Domain* domain); + + /** + * Deserializes the contents of the object from the input buffer based + * on the format version. + * It also sets the input domain, as that is not serialized. + * + * Applicable to versions >= 5 + */ + Status deserialize_v5(ConstBuffer* cbuff, const Domain* domain); + /** * Swaps the contents (all field values) of this RTree with the * given ``rtree``. diff --git a/tiledb/sm/subarray/subarray.cc b/tiledb/sm/subarray/subarray.cc index 45a029ee98f..cbc87d7a0e4 100644 --- a/tiledb/sm/subarray/subarray.cc +++ b/tiledb/sm/subarray/subarray.cc @@ -1233,19 +1233,18 @@ Status Subarray::compute_tile_overlap() { // Compute estimated tile overlap in parallel over fragments and ranges auto statuses = parallel_for_2d( 0, fragment_num, 0, range_num, [&](unsigned i, uint64_t j) { - auto range = this->range(j); if (meta[i]->dense()) { // Dense fragment + auto range = this->range(j); tile_overlap_[i][j] = get_tile_overlap(range, i); } else { // Sparse fragment - RETURN_NOT_OK(meta[i]->get_tile_overlap( + const auto& range = this->ndrange(j); + RETURN_NOT_OK(meta[i]->get_tile_overlap( *encryption_key, range, &(tile_overlap_[i][j]))); } return Status::Ok(); }); - for (const auto& st : statuses) { - if (!st.ok()) - return st; - } + for (const auto& st : statuses) + RETURN_NOT_OK(st); tile_overlap_computed_ = true; diff --git a/tools/src/commands/info_command.cc b/tools/src/commands/info_command.cc index 9f4021c6251..59c8ad27351 100644 --- a/tools/src/commands/info_command.cc +++ b/tools/src/commands/info_command.cc @@ -217,8 +217,8 @@ void InfoCommand::write_svg_mbrs() const { max_y = std::numeric_limits::min(); auto fragment_metadata = array.fragment_metadata(); for (const auto& f : fragment_metadata) { - auto mbrs = f->mbrs(); - for (void* mbr : mbrs) { + const auto& mbrs = f->mbrs(); + for (const auto& mbr : mbrs) { auto tup = get_mbr(mbr, schema->coords_type()); min_x = std::min(min_x, std::get<0>(tup)); min_y = std::min(min_y, std::get<1>(tup)); @@ -283,8 +283,8 @@ void InfoCommand::write_text_mbrs() const { auto coords_type = schema->coords_type(); std::stringstream text; for (const auto& f : fragment_metadata) { - auto mbrs = f->mbrs(); - for (void* mbr : mbrs) { + const auto& mbrs = f->mbrs(); + for (const auto& mbr : mbrs) { auto str_mbr = mbr_to_string(mbr, coords_type, dim_num); for (unsigned i = 0; i < dim_num; i++) { text << str_mbr[2 * i + 0] << "," << str_mbr[2 * i + 1]; @@ -307,7 +307,7 @@ void InfoCommand::write_text_mbrs() const { } std::tuple InfoCommand::get_mbr( - const void* mbr, tiledb::sm::Datatype datatype) const { + const NDRange& mbr, tiledb::sm::Datatype datatype) const { switch (datatype) { case Datatype::INT8: return get_mbr(mbr); @@ -351,79 +351,80 @@ std::tuple InfoCommand::get_mbr( template std::tuple InfoCommand::get_mbr( - const void* mbr) const { + const NDRange& mbr) const { T x, y, width, height; - y = static_cast(mbr)[0]; - height = static_cast(mbr)[1] - y + 1; - x = static_cast(mbr)[2]; - width = static_cast(mbr)[3] - x + 1; + y = static_cast(mbr[0].data())[0]; + height = static_cast(mbr[0].data())[1] - y + 1; + x = static_cast(mbr[1].data())[0]; + width = static_cast(mbr[1].data())[1] - x + 1; return std::make_tuple(x, y, width, height); } +// Works only for fixed-sized coordinates std::vector InfoCommand::mbr_to_string( - const void* mbr, Datatype coords_type, unsigned dim_num) const { + const NDRange& mbr, Datatype coords_type, unsigned dim_num) const { std::vector result; - for (unsigned i = 0; i < dim_num; i++) { + const int8_t* r8; + const uint8_t* ru8; + const int16_t* r16; + const uint16_t* ru16; + const int32_t* r32; + const uint32_t* ru32; + const int64_t* r64; + const uint64_t* ru64; + const float* rf32; + const double* rf64; + for (unsigned d = 0; d < dim_num; d++) { switch (coords_type) { case Datatype::INT8: - result.push_back( - std::to_string(static_cast(mbr)[2 * i + 0])); - result.push_back( - std::to_string(static_cast(mbr)[2 * i + 1])); + r8 = (const int8_t*)mbr[d].data(); + result.push_back(std::to_string(r8[0])); + result.push_back(std::to_string(r8[1])); break; case Datatype::UINT8: - result.push_back( - std::to_string(static_cast(mbr)[2 * i + 0])); - result.push_back( - std::to_string(static_cast(mbr)[2 * i + 1])); + ru8 = (const uint8_t*)mbr[d].data(); + result.push_back(std::to_string(ru8[0])); + result.push_back(std::to_string(ru8[1])); break; case Datatype::INT16: - result.push_back( - std::to_string(static_cast(mbr)[2 * i + 0])); - result.push_back( - std::to_string(static_cast(mbr)[2 * i + 1])); + r16 = (const int16_t*)mbr[d].data(); + result.push_back(std::to_string(r16[0])); + result.push_back(std::to_string(r16[1])); break; case Datatype::UINT16: - result.push_back( - std::to_string(static_cast(mbr)[2 * i + 0])); - result.push_back( - std::to_string(static_cast(mbr)[2 * i + 1])); + ru16 = (const uint16_t*)mbr[d].data(); + result.push_back(std::to_string(ru16[0])); + result.push_back(std::to_string(ru16[1])); break; case Datatype::INT32: - result.push_back( - std::to_string(static_cast(mbr)[2 * i + 0])); - result.push_back( - std::to_string(static_cast(mbr)[2 * i + 1])); + r32 = (const int32_t*)mbr[d].data(); + result.push_back(std::to_string(r32[0])); + result.push_back(std::to_string(r32[1])); break; case Datatype::UINT32: - result.push_back( - std::to_string(static_cast(mbr)[2 * i + 0])); - result.push_back( - std::to_string(static_cast(mbr)[2 * i + 1])); + ru32 = (const uint32_t*)mbr[d].data(); + result.push_back(std::to_string(ru32[0])); + result.push_back(std::to_string(ru32[1])); break; case Datatype::INT64: - result.push_back( - std::to_string(static_cast(mbr)[2 * i + 0])); - result.push_back( - std::to_string(static_cast(mbr)[2 * i + 1])); + r64 = (const int64_t*)mbr[d].data(); + result.push_back(std::to_string(r64[0])); + result.push_back(std::to_string(r64[1])); break; case Datatype::UINT64: - result.push_back( - std::to_string(static_cast(mbr)[2 * i + 0])); - result.push_back( - std::to_string(static_cast(mbr)[2 * i + 1])); + ru64 = (const uint64_t*)mbr[d].data(); + result.push_back(std::to_string(ru64[0])); + result.push_back(std::to_string(ru64[1])); break; case Datatype::FLOAT32: - result.push_back( - std::to_string(static_cast(mbr)[2 * i + 0])); - result.push_back( - std::to_string(static_cast(mbr)[2 * i + 1])); + rf32 = (const float*)mbr[d].data(); + result.push_back(std::to_string(rf32[0])); + result.push_back(std::to_string(rf32[1])); break; case Datatype::FLOAT64: - result.push_back( - std::to_string(static_cast(mbr)[2 * i + 0])); - result.push_back( - std::to_string(static_cast(mbr)[2 * i + 1])); + rf64 = (const double*)mbr[d].data(); + result.push_back(std::to_string(rf64[0])); + result.push_back(std::to_string(rf64[1])); break; default: throw std::invalid_argument( @@ -436,25 +437,25 @@ std::vector InfoCommand::mbr_to_string( // Explicit template instantiations template std::tuple -InfoCommand::get_mbr(const void* mbr) const; +InfoCommand::get_mbr(const NDRange& mbr) const; template std::tuple -InfoCommand::get_mbr(const void* mbr) const; +InfoCommand::get_mbr(const NDRange& mbr) const; template std::tuple -InfoCommand::get_mbr(const void* mbr) const; +InfoCommand::get_mbr(const NDRange& mbr) const; template std::tuple -InfoCommand::get_mbr(const void* mbr) const; +InfoCommand::get_mbr(const NDRange& mbr) const; template std::tuple -InfoCommand::get_mbr(const void* mbr) const; +InfoCommand::get_mbr(const NDRange& mbr) const; template std::tuple -InfoCommand::get_mbr(const void* mbr) const; +InfoCommand::get_mbr(const NDRange& mbr) const; template std::tuple -InfoCommand::get_mbr(const void* mbr) const; +InfoCommand::get_mbr(const NDRange& mbr) const; template std::tuple -InfoCommand::get_mbr(const void* mbr) const; +InfoCommand::get_mbr(const NDRange& mbr) const; template std::tuple InfoCommand::get_mbr( - const void* mbr) const; + const NDRange& mbr) const; template std::tuple -InfoCommand::get_mbr(const void* mbr) const; +InfoCommand::get_mbr(const NDRange& mbr) const; } // namespace cli } // namespace tiledb diff --git a/tools/src/commands/info_command.h b/tools/src/commands/info_command.h index 940bbaef3a2..c4fc424bc08 100644 --- a/tools/src/commands/info_command.h +++ b/tools/src/commands/info_command.h @@ -36,6 +36,7 @@ #include "commands/command.h" #include "tiledb/sm/enums/datatype.h" +#include "tiledb/sm/misc/types.h" namespace tiledb { namespace cli { @@ -84,11 +85,12 @@ class InfoCommand : public Command { /** Converts an opaque MBR to a 2D (double) rectangle. */ std::tuple get_mbr( - const void* mbr, tiledb::sm::Datatype datatype) const; + const sm::NDRange& mbr, tiledb::sm::Datatype datatype) const; /** Converts an opaque MBR to a 2D (double) rectangle. */ template - std::tuple get_mbr(const void* mbr) const; + std::tuple get_mbr( + const sm::NDRange& mbr) const; /** * Converts an opaque MBR to a string vector. The vector contents are strings: @@ -100,7 +102,7 @@ class InfoCommand : public Command { * @return String vector of MBR. */ std::vector mbr_to_string( - const void* mbr, + const sm::NDRange& mbr, tiledb::sm::Datatype coords_type, unsigned dim_num) const; };