diff --git a/HISTORY.md b/HISTORY.md index b037439e4524..b089aaf3c2f0 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,6 +6,11 @@ * Now storing the coordinate tiles on each dimension in separate files * Changed fragment name format from `__t1_t2_uuid` to `__t1_t2_uuid_`. That was necessary for backwards compatibility +## Breaking C API changes + +* Changed `domain` input of `tiledb_dimension_get_domain` to `const void**` (from `void**`). +* Changed `tile_extent` input of `tiledb_dimension_get_tile_extent` to `const void**` (from `void**`). + ## New features ## Improvements diff --git a/test/src/helpers.cc b/test/src/helpers.cc index 2e426c2519f6..0b83fa18bba2 100644 --- a/test/src/helpers.cc +++ b/test/src/helpers.cc @@ -99,14 +99,15 @@ void check_subarray( // Check ranges uint64_t dim_range_num = 0; - const T* range; + const sm::Range* range; for (unsigned i = 0; i < dim_num; ++i) { CHECK(subarray.get_range_num(i, &dim_range_num).ok()); CHECK(dim_range_num == ranges[i].size() / 2); for (uint64_t j = 0; j < dim_range_num; ++j) { - subarray.get_range(i, j, (const void**)&range); - CHECK(range[0] == ranges[i][2 * j]); - CHECK(range[1] == ranges[i][2 * j + 1]); + subarray.get_range(i, j, &range); + auto r = (const T*)range->data(); + CHECK(r[0] == ranges[i][2 * j]); + CHECK(r[1] == ranges[i][2 * j + 1]); } } } @@ -412,10 +413,10 @@ void create_subarray( tiledb::sm::Subarray ret(array, layout); auto dim_num = (unsigned)ranges.size(); - for (unsigned i = 0; i < dim_num; ++i) { - auto dim_range_num = ranges[i].size() / 2; + for (unsigned d = 0; d < dim_num; ++d) { + auto dim_range_num = ranges[d].size() / 2; for (size_t j = 0; j < dim_range_num; ++j) { - ret.add_range(i, &ranges[i][2 * j]); + ret.add_range(d, sm::Range(&ranges[d][2 * j], 2 * sizeof(T))); } } diff --git a/test/src/unit-ReadCellSlabIter.cc b/test/src/unit-ReadCellSlabIter.cc index 9e569fa2d5c5..08bfda79da9d 100644 --- a/test/src/unit-ReadCellSlabIter.cc +++ b/test/src/unit-ReadCellSlabIter.cc @@ -152,21 +152,19 @@ void ReadCellSlabIterFx::create_result_space_tiles( const std::vector& domain_slices, const std::vector>& tile_coords, std::map>* result_space_tiles) { - auto dim_num = dom->dim_num(); - auto domain = (const T*)dom->domain(); - auto tile_extents = (const T*)dom->tile_extents(); + auto domain = dom->domain(); + const auto& tile_extents = dom->tile_extents(); std::vector> frag_tile_domains; for (size_t i = 0; i < domain_slices.size(); ++i) { frag_tile_domains.emplace_back( (unsigned)(domain_slices.size() - i), - dim_num, domain, domain_slices[i], tile_extents, layout); } TileDomain array_tile_domain( - UINT32_MAX, dim_num, domain, dom_ndrange, tile_extents, layout); + UINT32_MAX, domain, dom_ndrange, tile_extents, layout); Reader::compute_result_space_tiles( dom, tile_coords, @@ -231,10 +229,9 @@ TEST_CASE_METHOD( const auto& tile_coords = subarray.tile_coords(); std::map> result_space_tiles; auto dom = array_->array_->array_schema()->domain(); - auto dom_ndrange = dom->domain_ndrange(); create_result_space_tiles( dom, - dom_ndrange, + dom->domain(), subarray_layout, domain_slices, tile_coords, @@ -292,10 +289,9 @@ TEST_CASE_METHOD( const auto& tile_coords = subarray.tile_coords(); std::map> result_space_tiles; auto dom = array_->array_->array_schema()->domain(); - auto dom_ndrange = dom->domain_ndrange(); create_result_space_tiles( dom, - dom_ndrange, + dom->domain(), subarray_layout, domain_slices, tile_coords, @@ -356,10 +352,9 @@ TEST_CASE_METHOD( const auto& tile_coords = subarray.tile_coords(); std::map> result_space_tiles; auto dom = array_->array_->array_schema()->domain(); - auto dom_ndrange = dom->domain_ndrange(); create_result_space_tiles( dom, - dom_ndrange, + dom->domain(), subarray_layout, domain_slices, tile_coords, @@ -424,10 +419,9 @@ TEST_CASE_METHOD( const auto& tile_coords = subarray.tile_coords(); std::map> result_space_tiles; auto dom = array_->array_->array_schema()->domain(); - auto dom_ndrange = dom->domain_ndrange(); create_result_space_tiles( dom, - dom_ndrange, + dom->domain(), subarray_layout, domain_slices, tile_coords, @@ -648,10 +642,9 @@ TEST_CASE_METHOD( const auto& tile_coords = subarray.tile_coords(); std::map> result_space_tiles; auto dom = array_->array_->array_schema()->domain(); - auto dom_ndrange = dom->domain_ndrange(); create_result_space_tiles( dom, - dom_ndrange, + dom->domain(), tile_domain_layout, domain_slices, tile_coords, @@ -821,10 +814,9 @@ TEST_CASE_METHOD( const auto& tile_coords = subarray.tile_coords(); std::map> result_space_tiles; auto dom = array_->array_->array_schema()->domain(); - auto dom_ndrange = dom->domain_ndrange(); create_result_space_tiles( dom, - dom_ndrange, + dom->domain(), tile_domain_layout, domain_slices, tile_coords, @@ -1007,10 +999,9 @@ TEST_CASE_METHOD( const auto& tile_coords = subarray.tile_coords(); std::map> result_space_tiles; auto dom = array_->array_->array_schema()->domain(); - auto dom_ndrange = dom->domain_ndrange(); create_result_space_tiles( dom, - dom_ndrange, + dom->domain(), tile_domain_layout, domain_slices, tile_coords, @@ -1239,10 +1230,9 @@ TEST_CASE_METHOD( const auto& tile_coords = subarray.tile_coords(); std::map> result_space_tiles; auto dom = array_->array_->array_schema()->domain(); - auto dom_ndrange = dom->domain_ndrange(); create_result_space_tiles( dom, - dom_ndrange, + dom->domain(), tile_domain_layout, domain_slices, tile_coords, diff --git a/test/src/unit-Reader.cc b/test/src/unit-Reader.cc index 9e49853e3c84..bd532cc8b97c 100644 --- a/test/src/unit-Reader.cc +++ b/test/src/unit-Reader.cc @@ -116,8 +116,15 @@ TEST_CASE_METHOD( "[Reader][2d][compute_result_space_tiles]") { Reader reader; unsigned dim_num = 2; - std::vector domain = {1, 10, 1, 15}; - std::vector tile_extents = {2, 5}; + auto size = 2 * sizeof(int32_t); + int32_t domain_vec[] = {1, 10, 1, 15}; + NDRange domain = {Range(&domain_vec[0], size), Range(&domain_vec[2], size)}; + std::vector tile_extents_vec = {2, 5}; + std::vector tile_extents(2); + tile_extents[0].resize(sizeof(int32_t)); + std::memcpy(&tile_extents[0][0], &tile_extents_vec[0], sizeof(int32_t)); + tile_extents[1].resize(sizeof(int32_t)); + std::memcpy(&tile_extents[1][0], &tile_extents_vec[1], sizeof(int32_t)); Layout layout = Layout::ROW_MAJOR; // Tile coords @@ -151,30 +158,33 @@ TEST_CASE_METHOD( std::vector domain_slice_2 = {4, 5, 2, 4}; std::vector domain_slice_3 = {5, 7, 1, 9}; - auto size = 2 * sizeof(int32_t); NDRange ds1 = {Range(&domain_slice_1[0], size), Range(&domain_slice_1[2], size)}; NDRange ds2 = {Range(&domain_slice_2[0], size), Range(&domain_slice_2[2], size)}; NDRange ds3 = {Range(&domain_slice_3[0], size), Range(&domain_slice_3[2], size)}; - NDRange dsd = {Range(&domain[0], size), Range(&domain[2], size)}; + NDRange dsd = domain; std::vector> frag_tile_domains; - frag_tile_domains.emplace_back(TileDomain( - 3, dim_num, &domain[0], ds3, &tile_extents[0], layout)); - frag_tile_domains.emplace_back(TileDomain( - 2, dim_num, &domain[0], ds2, &tile_extents[0], layout)); - frag_tile_domains.emplace_back(TileDomain( - 1, dim_num, &domain[0], ds1, &tile_extents[0], layout)); + frag_tile_domains.emplace_back( + TileDomain(3, domain, ds3, tile_extents, layout)); + frag_tile_domains.emplace_back( + TileDomain(2, domain, ds2, tile_extents, layout)); + frag_tile_domains.emplace_back( + TileDomain(1, domain, ds1, tile_extents, layout)); TileDomain array_tile_domain( - UINT32_MAX, dim_num, &domain[0], dsd, &tile_extents[0], layout); + UINT32_MAX, domain, dsd, tile_extents, layout); Dimension d1("d1", Datatype::INT32); + d1.set_domain(domain_vec); + d1.set_tile_extent(&tile_extents_vec[0]); Dimension d2("d2", Datatype::INT32); + d2.set_domain(&domain_vec[2]); + d2.set_tile_extent(&tile_extents_vec[1]); Domain dom(Datatype::INT32); - dom.add_dimension(&d1); - dom.add_dimension(&d2); + CHECK(dom.add_dimension(&d1).ok()); + CHECK(dom.add_dimension(&d2).ok()); // Compute result space tiles map std::map> result_space_tiles; diff --git a/test/src/unit-Subarray.cc b/test/src/unit-Subarray.cc index ac58b8d5795e..3ae607be05f0 100644 --- a/test/src/unit-Subarray.cc +++ b/test/src/unit-Subarray.cc @@ -321,14 +321,14 @@ TEST_CASE_METHOD( std::vector c_range_1_1 = {5, 5}; auto cropped_subarray = subarray.crop_to_tile(&tile_coords[0], Layout::ROW_MAJOR); - const void* range = nullptr; + const Range* range = nullptr; CHECK(cropped_subarray.range_num() == 2); CHECK(cropped_subarray.get_range(0, 0, &range).ok()); - CHECK(!memcmp(range, &c_range_0_0[0], 2 * sizeof(uint64_t))); + CHECK(!memcmp(range->data(), &c_range_0_0[0], 2 * sizeof(uint64_t))); CHECK(cropped_subarray.get_range(1, 0, &range).ok()); - CHECK(!memcmp(range, &c_range_1_0[0], 2 * sizeof(uint64_t))); + CHECK(!memcmp(range->data(), &c_range_1_0[0], 2 * sizeof(uint64_t))); CHECK(cropped_subarray.get_range(1, 1, &range).ok()); - CHECK(!memcmp(range, &c_range_1_1[0], 2 * sizeof(uint64_t))); + CHECK(!memcmp(range->data(), &c_range_1_1[0], 2 * sizeof(uint64_t))); close_array(ctx_, array_); } diff --git a/test/src/unit-TileDomain.cc b/test/src/unit-TileDomain.cc index 5cc9111021c1..232ef0a4fac4 100644 --- a/test/src/unit-TileDomain.cc +++ b/test/src/unit-TileDomain.cc @@ -37,16 +37,19 @@ using namespace tiledb::sm; TEST_CASE("TileDomain: Test 1D", "[TileDomain][1d]") { - unsigned dim_num = 1; - std::vector domain = {1, 100}; - std::vector domain_slice = {15, 35}; - int32_t tile_extent = 10; + int32_t tile_extent_v = 10; + std::vector tile_extents(1); + tile_extents[0].resize(sizeof(int32_t)); + std::memcpy(&tile_extents[0][0], &tile_extent_v, sizeof(int32_t)); Layout layout = Layout::ROW_MAJOR; - NDRange ds = {Range(&domain_slice[0], 2 * sizeof(int32_t))}; + auto size = 2 * sizeof(int32_t); + int32_t ds_vec[] = {15, 35}; + int32_t domain_vec[] = {1, 100}; + NDRange ds = {Range(ds_vec, size)}; + NDRange domain = {Range(domain_vec, size)}; - TileDomain tile_domain( - 0, dim_num, &domain[0], ds, &tile_extent, layout); + TileDomain tile_domain(0, domain, ds, tile_extents, layout); const auto& td = tile_domain.tile_domain(); CHECK(td.size() == 2); CHECK(td[0] == 1); @@ -68,17 +71,21 @@ TEST_CASE("TileDomain: Test 1D", "[TileDomain][1d]") { TEST_CASE( "TileDomain: Test 2D, row-major, complete", "[TileDomain][2d][row][complete]") { - unsigned dim_num = 2; - std::vector domain = {1, 10, 1, 10}; + std::vector domain_vec = {1, 10, 1, 10}; std::vector domain_slice = {1, 10, 1, 10}; - std::vector tile_extents = {2, 5}; + std::vector tile_extents_vec = {2, 5}; + std::vector tile_extents(2); + tile_extents[0].resize(sizeof(int32_t)); + std::memcpy(&tile_extents[0][0], &tile_extents_vec[0], sizeof(int32_t)); + tile_extents[1].resize(sizeof(int32_t)); + std::memcpy(&tile_extents[1][0], &tile_extents_vec[1], sizeof(int32_t)); Layout layout = Layout::ROW_MAJOR; auto size = 2 * (sizeof(int32_t)); NDRange ds = {Range(&domain_slice[0], size), Range(&domain_slice[2], size)}; + NDRange domain = {Range(&domain_vec[0], size), Range(&domain_vec[2], size)}; - TileDomain tile_domain( - 0, dim_num, &domain[0], ds, &tile_extents[0], layout); + TileDomain tile_domain(0, domain, ds, tile_extents, layout); const auto& td = tile_domain.tile_domain(); CHECK(td.size() == 4); CHECK(td[0] == 0); @@ -109,17 +116,21 @@ TEST_CASE( TEST_CASE( "TileDomain: Test 2D, row-major, partial", "[TileDomain][2d][row][partial]") { - unsigned dim_num = 2; - std::vector domain = {1, 10, 1, 10}; + std::vector domain_vec = {1, 10, 1, 10}; std::vector domain_slice = {4, 10, 2, 8}; - std::vector tile_extents = {2, 5}; + std::vector tile_extents_vec = {2, 5}; + std::vector tile_extents(2); + tile_extents[0].resize(sizeof(int32_t)); + std::memcpy(&tile_extents[0][0], &tile_extents_vec[0], sizeof(int32_t)); + tile_extents[1].resize(sizeof(int32_t)); + std::memcpy(&tile_extents[1][0], &tile_extents_vec[1], sizeof(int32_t)); Layout layout = Layout::ROW_MAJOR; auto size = 2 * (sizeof(int32_t)); NDRange ds = {Range(&domain_slice[0], size), Range(&domain_slice[2], size)}; + NDRange domain = {Range(&domain_vec[0], size), Range(&domain_vec[2], size)}; - TileDomain tile_domain( - 0, dim_num, &domain[0], ds, &tile_extents[0], layout); + TileDomain tile_domain(0, domain, ds, tile_extents, layout); const auto& td = tile_domain.tile_domain(); CHECK(td.size() == 4); CHECK(td[0] == 1); @@ -145,17 +156,21 @@ TEST_CASE( TEST_CASE( "TileDomain: Test 2D, col-major, complete", "[TileDomain][2d][col][complete]") { - unsigned dim_num = 2; - std::vector domain = {1, 10, 1, 10}; + std::vector domain_vec = {1, 10, 1, 10}; std::vector domain_slice = {1, 10, 1, 10}; - std::vector tile_extents = {2, 5}; + std::vector tile_extents_vec = {2, 5}; + std::vector tile_extents(2); + tile_extents[0].resize(sizeof(int32_t)); + std::memcpy(&tile_extents[0][0], &tile_extents_vec[0], sizeof(int32_t)); + tile_extents[1].resize(sizeof(int32_t)); + std::memcpy(&tile_extents[1][0], &tile_extents_vec[1], sizeof(int32_t)); Layout layout = Layout::COL_MAJOR; auto size = 2 * (sizeof(int32_t)); NDRange ds = {Range(&domain_slice[0], size), Range(&domain_slice[2], size)}; + NDRange domain = {Range(&domain_vec[0], size), Range(&domain_vec[2], size)}; - TileDomain tile_domain( - 0, dim_num, &domain[0], ds, &tile_extents[0], layout); + TileDomain tile_domain(0, domain, ds, tile_extents, layout); const auto& td = tile_domain.tile_domain(); CHECK(td.size() == 4); CHECK(td[0] == 0); @@ -181,17 +196,21 @@ TEST_CASE( TEST_CASE( "TileDomain: Test 2D, col-major, partial", "[TileDomain][2d][col][partial]") { - unsigned dim_num = 2; - std::vector domain = {1, 10, 1, 10}; + std::vector domain_vec = {1, 10, 1, 10}; std::vector domain_slice = {4, 10, 2, 8}; - std::vector tile_extents = {2, 5}; + std::vector tile_extents_vec = {2, 5}; + std::vector tile_extents(2); + tile_extents[0].resize(sizeof(int32_t)); + std::memcpy(&tile_extents[0][0], &tile_extents_vec[0], sizeof(int32_t)); + tile_extents[1].resize(sizeof(int32_t)); + std::memcpy(&tile_extents[1][0], &tile_extents_vec[1], sizeof(int32_t)); Layout layout = Layout::COL_MAJOR; auto size = 2 * (sizeof(int32_t)); NDRange ds = {Range(&domain_slice[0], size), Range(&domain_slice[2], size)}; + NDRange domain = {Range(&domain_vec[0], size), Range(&domain_vec[2], size)}; - TileDomain tile_domain( - 0, dim_num, &domain[0], ds, &tile_extents[0], layout); + TileDomain tile_domain(0, domain, ds, tile_extents, layout); const auto& td = tile_domain.tile_domain(); CHECK(td.size() == 4); CHECK(td[0] == 1); @@ -216,17 +235,21 @@ TEST_CASE( TEST_CASE( "TileDomain: Test 2D, tile subarray", "[TileDomain][2d][tile_subarray]") { - unsigned dim_num = 2; - std::vector domain = {1, 10, 11, 20}; + std::vector domain_vec = {1, 10, 11, 20}; std::vector domain_slice = {4, 10, 12, 18}; - std::vector tile_extents = {2, 5}; + std::vector tile_extents_vec = {2, 5}; + std::vector tile_extents(2); + tile_extents[0].resize(sizeof(int32_t)); + std::memcpy(&tile_extents[0][0], &tile_extents_vec[0], sizeof(int32_t)); + tile_extents[1].resize(sizeof(int32_t)); + std::memcpy(&tile_extents[1][0], &tile_extents_vec[1], sizeof(int32_t)); Layout layout = Layout::COL_MAJOR; auto size = 2 * (sizeof(int32_t)); NDRange ds = {Range(&domain_slice[0], size), Range(&domain_slice[2], size)}; + NDRange domain = {Range(&domain_vec[0], size), Range(&domain_vec[2], size)}; - TileDomain tile_domain( - 0, dim_num, &domain[0], ds, &tile_extents[0], layout); + TileDomain tile_domain(0, domain, ds, tile_extents, layout); int32_t tile_coords[] = {0, 0}; auto tile_subarray = tile_domain.tile_subarray(tile_coords); @@ -246,17 +269,21 @@ TEST_CASE( TEST_CASE( "TileDomain: Test 2D, tile overlap", "[TileDomain][2d][tile_overlap]") { - unsigned dim_num = 2; - std::vector domain = {1, 10, 11, 20}; + std::vector domain_vec = {1, 10, 11, 20}; std::vector domain_slice = {2, 10, 12, 18}; - std::vector tile_extents = {2, 5}; + std::vector tile_extents_vec = {2, 5}; + std::vector tile_extents(2); + tile_extents[0].resize(sizeof(int32_t)); + std::memcpy(&tile_extents[0][0], &tile_extents_vec[0], sizeof(int32_t)); + tile_extents[1].resize(sizeof(int32_t)); + std::memcpy(&tile_extents[1][0], &tile_extents_vec[1], sizeof(int32_t)); Layout layout = Layout::COL_MAJOR; auto size = 2 * (sizeof(int32_t)); NDRange ds = {Range(&domain_slice[0], size), Range(&domain_slice[2], size)}; + NDRange domain = {Range(&domain_vec[0], size), Range(&domain_vec[2], size)}; - TileDomain tile_domain( - 0, dim_num, &domain[0], ds, &tile_extents[0], layout); + TileDomain tile_domain(0, domain, ds, tile_extents, layout); int32_t tile_coords[] = {0, 0}; auto tile_overlap = tile_domain.tile_overlap(tile_coords); @@ -281,17 +308,21 @@ TEST_CASE( TEST_CASE( "TileDomain: Test 2D, in tile domain", "[TileDomain][2d][in_tile_domain]") { - unsigned dim_num = 2; - std::vector domain = {1, 10, 11, 20}; + std::vector domain_vec = {1, 10, 11, 20}; std::vector domain_slice = {2, 10, 12, 18}; - std::vector tile_extents = {2, 5}; + std::vector tile_extents_vec = {2, 5}; + std::vector tile_extents(2); + tile_extents[0].resize(sizeof(int32_t)); + std::memcpy(&tile_extents[0][0], &tile_extents_vec[0], sizeof(int32_t)); + tile_extents[1].resize(sizeof(int32_t)); + std::memcpy(&tile_extents[1][0], &tile_extents_vec[1], sizeof(int32_t)); Layout layout = Layout::COL_MAJOR; auto size = 2 * (sizeof(int32_t)); NDRange ds = {Range(&domain_slice[0], size), Range(&domain_slice[2], size)}; + NDRange domain = {Range(&domain_vec[0], size), Range(&domain_vec[2], size)}; - TileDomain tile_domain( - 0, dim_num, &domain[0], ds, &tile_extents[0], layout); + TileDomain tile_domain(0, domain, ds, tile_extents, layout); int32_t tile_coords[] = {0, 0}; CHECK(tile_domain.in_tile_domain(tile_coords)); @@ -305,23 +336,27 @@ TEST_CASE( TEST_CASE("TileDomain: Test 2D, covers", "[TileDomain][2d][covers]") { unsigned dim_num = 2; - std::vector domain = {1, 10, 1, 10}; + std::vector domain_vec = {1, 10, 1, 10}; std::vector domain_slice_1 = {2, 6, 2, 8}; std::vector domain_slice_2 = {3, 6, 1, 7}; - std::vector tile_extents = {2, 5}; + std::vector tile_extents_vec = {2, 5}; + std::vector tile_extents(2); + tile_extents[0].resize(sizeof(int32_t)); + std::memcpy(&tile_extents[0][0], &tile_extents_vec[0], sizeof(int32_t)); + tile_extents[1].resize(sizeof(int32_t)); + std::memcpy(&tile_extents[1][0], &tile_extents_vec[1], sizeof(int32_t)); Layout layout = Layout::COL_MAJOR; auto size = 2 * (sizeof(int32_t)); + NDRange domain = {Range(&domain_vec[0], size), Range(&domain_vec[2], size)}; NDRange ds1 = {Range(&domain_slice_1[0], size), Range(&domain_slice_1[2], size)}; NDRange ds2 = {Range(&domain_slice_2[0], size), Range(&domain_slice_2[2], size)}; - TileDomain tile_domain_1( - 1, dim_num, &domain[0], ds1, &tile_extents[0], layout); + TileDomain tile_domain_1(1, domain, ds1, tile_extents, layout); - TileDomain tile_domain_2( - 2, dim_num, &domain[0], ds2, &tile_extents[0], layout); + TileDomain tile_domain_2(2, domain, ds2, tile_extents, layout); int32_t tile_coords[] = {0, 0}; CHECK(!tile_domain_1.covers(tile_coords, tile_domain_2)); diff --git a/test/src/unit-capi-array.cc b/test/src/unit-capi-array.cc index 9eea1390aefe..689381017984 100644 --- a/test/src/unit-capi-array.cc +++ b/test/src/unit-capi-array.cc @@ -1654,3 +1654,227 @@ TEST_CASE_METHOD( remove_temp_dir(temp_dir); } + +TEST_CASE_METHOD( + ArrayFx, + "C API: Test query errors, getting subarray info from write queries in " + "sparse arrays", + "[capi][query][error][sparse]") { + std::string array_name = + FILE_URI_PREFIX + FILE_TEMP_DIR + "query_error_sparse"; + create_temp_dir(FILE_URI_PREFIX + FILE_TEMP_DIR); + + create_sparse_vector(array_name); + + // Open array + tiledb_array_t* array; + int rc = tiledb_array_alloc(ctx_, array_name.c_str(), &array); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_array_open(ctx_, array, TILEDB_WRITE); + REQUIRE(rc == TILEDB_OK); + + // Prepare query + tiledb_query_t* query; + rc = tiledb_query_alloc(ctx_, array, TILEDB_WRITE, &query); + CHECK(rc == TILEDB_OK); + uint64_t range_num; + rc = tiledb_query_get_range_num(ctx_, query, 0, &range_num); + CHECK(rc == TILEDB_ERR); + const void *start, *end, *stride; + rc = tiledb_query_get_range(ctx_, query, 0, 0, &start, &end, &stride); + CHECK(rc == TILEDB_ERR); + int64_t s = 10; + int64_t e = 20; + rc = tiledb_query_add_range(ctx_, query, 0, &s, &e, nullptr); + CHECK(rc == TILEDB_ERR); + int64_t subarray[] = {-1, 2}; + rc = tiledb_query_set_subarray(ctx_, query, subarray); + CHECK(rc == TILEDB_ERR); + + // Close array + rc = tiledb_array_close(ctx_, array); + CHECK(rc == TILEDB_OK); + + // Clean up + tiledb_array_free(&array); + tiledb_query_free(&query); + + remove_temp_dir(FILE_URI_PREFIX + FILE_TEMP_DIR); +} + +TEST_CASE_METHOD( + ArrayFx, + "C API: Test query errors, dense writes", + "[capi][query][error][dense]") { + std::string array_name = + FILE_URI_PREFIX + FILE_TEMP_DIR + "query_error_dense"; + create_temp_dir(FILE_URI_PREFIX + FILE_TEMP_DIR); + + create_dense_array(array_name); + + // Open array + tiledb_array_t* array; + int rc = tiledb_array_alloc(ctx_, array_name.c_str(), &array); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_array_open(ctx_, array, TILEDB_WRITE); + REQUIRE(rc == TILEDB_OK); + + int32_t a[] = {1, 2, 3, 4}; + uint64_t a_size = sizeof(a); + + // Prepare query + tiledb_query_t* query; + rc = tiledb_query_alloc(ctx_, array, TILEDB_WRITE, &query); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "a", a, &a_size); + CHECK(rc == TILEDB_OK); + uint64_t range_num; + rc = tiledb_query_get_range_num(ctx_, query, 0, &range_num); + CHECK(rc == TILEDB_OK); + CHECK(range_num == 1); // The default + const void *start, *end, *stride; + rc = tiledb_query_get_range(ctx_, query, 0, 0, &start, &end, &stride); + CHECK(rc == TILEDB_OK); + CHECK(*(const uint64_t*)start == 1); + CHECK(*(const uint64_t*)end == 10); + int64_t s = 1; + int64_t e = 2; + rc = tiledb_query_add_range(ctx_, query, 0, &s, &e, nullptr); + CHECK(rc == TILEDB_OK); + + int64_t subarray[] = {2, 3, 4, 5}; + rc = tiledb_query_set_subarray(ctx_, query, subarray); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_add_range(ctx_, query, 0, &s, &e, nullptr); + CHECK(rc == TILEDB_ERR); + + rc = tiledb_query_get_range_num(ctx_, query, 0, &range_num); + CHECK(rc == TILEDB_OK); + CHECK(range_num == 1); + rc = tiledb_query_get_range(ctx_, query, 0, 0, &start, &end, &stride); + CHECK(rc == TILEDB_OK); + CHECK(*(const uint64_t*)start == 2); + CHECK(*(const uint64_t*)end == 3); + rc = tiledb_query_get_range(ctx_, query, 1, 0, &start, &end, &stride); + CHECK(rc == TILEDB_OK); + CHECK(*(const uint64_t*)start == 4); + CHECK(*(const uint64_t*)end == 5); + + rc = tiledb_query_set_layout(ctx_, query, TILEDB_GLOBAL_ORDER); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_submit(ctx_, query); + CHECK(rc == TILEDB_ERR); + + // Close array + rc = tiledb_array_close(ctx_, array); + CHECK(rc == TILEDB_OK); + + // Clean up + tiledb_array_free(&array); + tiledb_query_free(&query); + + remove_temp_dir(FILE_URI_PREFIX + FILE_TEMP_DIR); +} + +TEST_CASE_METHOD( + ArrayFx, + "C API: Test query errors, dense unordered writes", + "[capi][query][error][dense]") { + std::string array_name = + FILE_URI_PREFIX + FILE_TEMP_DIR + "query_error_dense"; + create_temp_dir(FILE_URI_PREFIX + FILE_TEMP_DIR); + + create_dense_array(array_name); + + // Open array + tiledb_array_t* array; + int rc = tiledb_array_alloc(ctx_, array_name.c_str(), &array); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_array_open(ctx_, array, TILEDB_WRITE); + REQUIRE(rc == TILEDB_OK); + + int32_t a[] = {1, 2, 3, 4}; + uint64_t a_size = sizeof(a); + uint64_t coords[] = {1, 1, 1, 2, 1, 3, 1, 4}; + uint64_t coords_size = sizeof(coords); + + // Prepare query + tiledb_query_t* query; + rc = tiledb_query_alloc(ctx_, array, TILEDB_WRITE, &query); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "a", a, &a_size); + CHECK(rc == TILEDB_OK); + rc = + tiledb_query_set_buffer(ctx_, query, TILEDB_COORDS, coords, &coords_size); + CHECK(rc == TILEDB_OK); + + int64_t subarray[] = {2, 3, 4, 5}; + rc = tiledb_query_set_subarray(ctx_, query, subarray); + CHECK(rc == TILEDB_OK); + + rc = tiledb_query_set_layout(ctx_, query, TILEDB_UNORDERED); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_submit(ctx_, query); + CHECK(rc == TILEDB_ERR); + + // Close array + rc = tiledb_array_close(ctx_, array); + CHECK(rc == TILEDB_OK); + + // Clean up + tiledb_array_free(&array); + tiledb_query_free(&query); + + remove_temp_dir(FILE_URI_PREFIX + FILE_TEMP_DIR); +} + +TEST_CASE_METHOD( + ArrayFx, + "C API: Test query errors, dense reads in global order", + "[capi][query][error][dense]") { + std::string array_name = + FILE_URI_PREFIX + FILE_TEMP_DIR + "query_error_dense"; + create_temp_dir(FILE_URI_PREFIX + FILE_TEMP_DIR); + + create_dense_array(array_name); + + // Open array + tiledb_array_t* array; + int rc = tiledb_array_alloc(ctx_, array_name.c_str(), &array); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_array_open(ctx_, array, TILEDB_READ); + REQUIRE(rc == TILEDB_OK); + + int32_t a[4]; + uint64_t a_size = sizeof(a); + + // Prepare query + tiledb_query_t* query; + rc = tiledb_query_alloc(ctx_, array, TILEDB_READ, &query); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "a", a, &a_size); + CHECK(rc == TILEDB_OK); + + int64_t subarray[] = {2, 3, 4, 5}; + rc = tiledb_query_set_subarray(ctx_, query, subarray); + CHECK(rc == TILEDB_OK); + int64_t s = 1; + int64_t e = 2; + rc = tiledb_query_add_range(ctx_, query, 0, &s, &e, nullptr); + CHECK(rc == TILEDB_OK); + + rc = tiledb_query_set_layout(ctx_, query, TILEDB_GLOBAL_ORDER); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_submit(ctx_, query); + CHECK(rc == TILEDB_ERR); + + // Close array + rc = tiledb_array_close(ctx_, array); + CHECK(rc == TILEDB_OK); + + // Clean up + tiledb_array_free(&array); + tiledb_query_free(&query); + + remove_temp_dir(FILE_URI_PREFIX + FILE_TEMP_DIR); +} \ No newline at end of file diff --git a/test/src/unit-capi-array_schema.cc b/test/src/unit-capi-array_schema.cc index 886f98da4bae..a0aa796262b8 100644 --- a/test/src/unit-capi-array_schema.cc +++ b/test/src/unit-capi-array_schema.cc @@ -752,12 +752,12 @@ void ArraySchemaFx::load_and_check_array_schema(const std::string& path) { REQUIRE(rc == TILEDB_OK); CHECK_THAT(dim_name, Catch::Equals(DIM1_NAME)); - void* dim_domain; + const void* dim_domain; rc = tiledb_dimension_get_domain(ctx_, dim, &dim_domain); REQUIRE(rc == TILEDB_OK); CHECK(!memcmp(dim_domain, &DIM_DOMAIN[0], DIM_DOMAIN_SIZE)); - void* tile_extent; + const void* tile_extent; rc = tiledb_dimension_get_tile_extent(ctx_, dim, &tile_extent); REQUIRE(rc == TILEDB_OK); CHECK(!memcmp(tile_extent, &TILE_EXTENTS[0], TILE_EXTENT_SIZE)); diff --git a/test/src/unit-capi-dense_array.cc b/test/src/unit-capi-dense_array.cc index f0f05d799c29..46f770078563 100644 --- a/test/src/unit-capi-dense_array.cc +++ b/test/src/unit-capi-dense_array.cc @@ -4174,62 +4174,6 @@ TEST_CASE_METHOD( remove_temp_dir(temp_dir); } -TEST_CASE_METHOD( - DenseArrayFx, - "C API: Test dense array, set subarray in sparse writes should error", - "[capi], [dense], [dense-set-subarray-sparse]") { - SECTION("- No serialization") { - serialize_query_ = false; - } - SECTION("- Serialization") { - serialize_query_ = true; - } - - std::string array_name = - FILE_URI_PREFIX + FILE_TEMP_DIR + "dense_set_subarray_sparse"; - std::string temp_dir = FILE_URI_PREFIX + FILE_TEMP_DIR; - create_temp_dir(temp_dir); - create_dense_array(array_name); - - // Create TileDB context - tiledb_ctx_t* ctx = nullptr; - REQUIRE(tiledb_ctx_alloc(nullptr, &ctx) == TILEDB_OK); - - // Open array - tiledb_array_t* array; - int rc = tiledb_array_alloc(ctx, array_name.c_str(), &array); - CHECK(rc == TILEDB_OK); - rc = tiledb_array_open(ctx, array, TILEDB_WRITE); - CHECK(rc == TILEDB_OK); - - // Create WRITE query - tiledb_query_t* query; - rc = tiledb_query_alloc(ctx, array, TILEDB_WRITE, &query); - CHECK(rc == TILEDB_OK); - - uint64_t subarray[] = {1, 1, 1, 1}; - - // Set some subarray BEFORE setting the layout to UNORDERED - rc = tiledb_query_set_subarray(ctx, query, subarray); - CHECK(rc == TILEDB_OK); - - // Set some subarray AFTER setting the layout to UNORDERED - rc = tiledb_query_set_layout(ctx, query, TILEDB_UNORDERED); - CHECK(rc == TILEDB_OK); - rc = tiledb_query_set_subarray(ctx, query, subarray); - CHECK(rc == TILEDB_ERR); - - // Close array - CHECK(tiledb_array_close(ctx, array) == TILEDB_OK); - - // Clean up - tiledb_query_free(&query); - tiledb_array_free(&array); - tiledb_ctx_free(&ctx); - - remove_temp_dir(temp_dir); -} - TEST_CASE_METHOD( DenseArrayFx, "C API: Test dense array, check if coords exist in unordered writes", diff --git a/test/src/unit-cppapi-array.cc b/test/src/unit-cppapi-array.cc index 5be5caa07e82..aa019b9bc95a 100644 --- a/test/src/unit-cppapi-array.cc +++ b/test/src/unit-cppapi-array.cc @@ -564,6 +564,7 @@ TEST_CASE("C++ API: Encrypted array", "[cppapi][encryption]") { Array array(ctx, array_name, TILEDB_WRITE, TILEDB_AES_256_GCM, key, key_len); REQUIRE(Array::encryption_type(ctx, array_name) == TILEDB_AES_256_GCM); array.close(); + REQUIRE_THROWS_AS(array.open(TILEDB_WRITE), tiledb::TileDBError); array.open(TILEDB_WRITE, TILEDB_AES_256_GCM, key, key_len); REQUIRE(Array::encryption_type(ctx, array_name) == TILEDB_AES_256_GCM); diff --git a/tiledb/sm/array_schema/dimension.cc b/tiledb/sm/array_schema/dimension.cc index 3265e39e569d..677b12b617c8 100644 --- a/tiledb/sm/array_schema/dimension.cc +++ b/tiledb/sm/array_schema/dimension.cc @@ -54,9 +54,9 @@ Dimension::Dimension() Dimension::Dimension(const std::string& name, Datatype type) : name_(name) , type_(type) { - domain_ = nullptr; - tile_extent_ = nullptr; set_ceil_to_tile_func(); + set_check_range_func(); + set_coincides_with_tiles_func(); set_compute_mbr_func(); set_crop_range_func(); set_domain_range_func(); @@ -78,24 +78,28 @@ Dimension::Dimension(const Dimension* dim) { name_ = dim->name(); type_ = dim->type_; - oob_func_ = dim->oob_func_; - uint64_t type_size = datatype_size(type_); - domain_ = std::malloc(2 * type_size); - std::memcpy(domain_, dim->domain(), 2 * type_size); - const void* tile_extent = dim->tile_extent(); - if (tile_extent == nullptr) { - tile_extent_ = nullptr; - } else { - tile_extent_ = std::malloc(type_size); - std::memcpy(tile_extent_, tile_extent, type_size); - } -} + // Set fuctions + ceil_to_tile_func_ = dim->ceil_to_tile_func_; + check_range_func_ = dim->check_range_func_; + coincides_with_tiles_func_ = dim->coincides_with_tiles_func_; + compute_mbr_func_ = dim->compute_mbr_func_; + crop_range_func_ = dim->crop_range_func_; + domain_range_func_ = dim->domain_range_func_; + expand_range_v_func_ = dim->expand_range_v_func_; + expand_range_func_ = dim->expand_range_func_; + expand_to_tile_func_ = dim->expand_to_tile_func_; + oob_func_ = dim->oob_func_; + covered_func_ = dim->covered_func_; + overlap_func_ = dim->overlap_func_; + overlap_ratio_func_ = dim->overlap_ratio_func_; + split_range_func_ = dim->split_range_func_; + splitting_value_func_ = dim->splitting_value_func_; + tile_num_func_ = dim->tile_num_func_; + value_in_range_func_ = dim->value_in_range_func_; -Dimension::~Dimension() { - // Clean up - std::free(domain_); - std::free(tile_extent_); + domain_ = dim->domain(); + tile_extent_ = dim->tile_extent(); } /* ********************************* */ @@ -185,29 +189,23 @@ Status Dimension::deserialize(ConstBuffer* buff, Datatype type) { RETURN_NOT_OK(buff->read(&name_[0], dimension_name_size)); // Load domain - uint64_t domain_size = 2 * datatype_size(type_); - std::free(domain_); - domain_ = std::malloc(domain_size); - if (domain_ == nullptr) - return LOG_STATUS( - Status::DimensionError("Cannot deserialize; Memory allocation failed")); - RETURN_NOT_OK(buff->read(domain_, domain_size)); + uint64_t domain_size = 2 * coord_size(); + std::vector tmp(domain_size); + RETURN_NOT_OK(buff->read(&tmp[0], domain_size)); + domain_ = Range(&tmp[0], domain_size); // Load tile extent - std::free(tile_extent_); - tile_extent_ = nullptr; + tile_extent_.clear(); uint8_t null_tile_extent; RETURN_NOT_OK(buff->read(&null_tile_extent, sizeof(uint8_t))); if (null_tile_extent == 0) { - tile_extent_ = std::malloc(datatype_size(type_)); - if (tile_extent_ == nullptr) { - return LOG_STATUS(Status::DimensionError( - "Cannot deserialize; Memory allocation failed")); - } - RETURN_NOT_OK(buff->read(tile_extent_, datatype_size(type_))); + tile_extent_.resize(coord_size()); + RETURN_NOT_OK(buff->read(&tile_extent_[0], coord_size())); } set_ceil_to_tile_func(); + set_check_range_func(); + set_coincides_with_tiles_func(); set_compute_mbr_func(); set_crop_range_func(); set_domain_range_func(); @@ -226,7 +224,7 @@ Status Dimension::deserialize(ConstBuffer* buff, Datatype type) { return Status::Ok(); } -void* Dimension::domain() const { +const Range& Dimension::domain() const { return domain_; } @@ -234,9 +232,8 @@ void Dimension::dump(FILE* out) const { if (out == nullptr) out = stdout; // Retrieve domain and tile extent strings - std::string domain_s = utils::parse::domain_str(domain_, type_); - std::string tile_extent_s = - utils::parse::tile_extent_str(tile_extent_, type_); + std::string domain_s = domain_str(); + std::string tile_extent_s = tile_extent_str(); // Dump fprintf(out, "### Dimension ###\n"); @@ -266,10 +263,10 @@ void Dimension::ceil_to_tile( assert(dim != nullptr); assert(!r.empty()); assert(v != nullptr); - assert(dim->tile_extent() != nullptr); + assert(!dim->tile_extent().empty()); - auto tile_extent = *(const T*)dim->tile_extent(); - auto dim_dom = (const T*)dim->domain(); + auto tile_extent = *(const T*)dim->tile_extent().data(); + auto dim_dom = (const T*)dim->domain().data(); v->resize(sizeof(T)); auto r_t = (const T*)r.data(); @@ -288,6 +285,68 @@ void Dimension::ceil_to_tile( ceil_to_tile_func_(this, r, tile_num, v); } +template +bool Dimension::check_range( + const Dimension* dim, const Range& range, std::string* err_msg) { + auto domain = (const T*)dim->domain().data(); + auto r = (const T*)range.data(); + + // Check for NaN + if (!std::is_integral::value && (std::isnan(r[0]) || std::isnan(r[1]))) { + *err_msg = "Cannot add range to dimension; Range contains NaN"; + return false; + } + + // Check range bounds + if (r[0] > r[1]) { + *err_msg = + "Cannot add range to dimension; Lower range " + "bound cannot be larger than the higher bound"; + return false; + } + + // Check out-of-bounds + if (r[0] < domain[0] || r[1] > domain[1]) { + std::stringstream ss; + ss << "Range [" << r[0] << ", " << r[1] << "] is out of domain bounds [" + << domain[0] << ", " << domain[1] << "] on dimension '" << dim->name() + << "'"; + *err_msg = ss.str(); + return false; + } + + return true; +} + +Status Dimension::check_range(const Range& range) const { + assert(check_range_func_ != nullptr); + std::string err_msg; + auto ret = check_range_func_(this, range, &err_msg); + if (!ret) + return LOG_STATUS(Status::DimensionError(err_msg)); + return Status::Ok(); +} + +template +bool Dimension::coincides_with_tiles(const Dimension* dim, const Range& r) { + assert(dim != nullptr); + assert(!r.empty()); + assert(!dim->tile_extent().empty()); + + auto dim_domain = (const T*)dim->domain().data(); + auto tile_extent = *(const T*)dim->tile_extent().data(); + auto d = (const T*)r.data(); + auto norm_1 = uint64_t(d[0] - dim_domain[0]); + auto norm_2 = (uint64_t(d[1]) - dim_domain[0]) + 1; + return ((norm_1 / tile_extent) * tile_extent == norm_1) && + ((norm_2 / tile_extent) * tile_extent == norm_2); +} + +bool Dimension::coincides_with_tiles(const Range& r) const { + assert(coincides_with_tiles_func_ != nullptr); + return coincides_with_tiles_func_(this, r); +} + template void Dimension::compute_mbr(const Tile& tile, Range* mbr) { assert(mbr != nullptr); @@ -314,7 +373,7 @@ template void Dimension::crop_range(const Dimension* dim, Range* range) { assert(dim != nullptr); assert(!range->empty()); - auto dim_dom = (const T*)dim->domain(); + auto dim_dom = (const T*)dim->domain().data(); auto r = (const T*)range->data(); T res[2] = {std::max(r[0], dim_dom[0]), std::min(r[1], dim_dom[1])}; range->set_range(res, sizeof(res)); @@ -384,11 +443,11 @@ void Dimension::expand_to_tile(const Dimension* dim, Range* range) { assert(!range->empty()); // Applicable only to regular tiles and integral domains - if (dim->tile_extent() == nullptr || !std::is_integral::value) + if (dim->tile_extent().empty() || !std::is_integral::value) return; - auto tile_extent = *(const T*)dim->tile_extent(); - auto dim_dom = (const T*)dim->domain(); + auto tile_extent = *(const T*)dim->tile_extent().data(); + auto dim_dom = (const T*)dim->domain().data(); auto r = (const T*)range->data(); T res[2]; @@ -407,7 +466,7 @@ void Dimension::expand_to_tile(Range* range) const { template bool Dimension::oob( const Dimension* dim, const void* coord, std::string* err_msg) { - auto domain = (const T*)dim->domain(); + auto domain = (const T*)dim->domain().data(); auto coord_t = (const T*)coord; if (*coord_t < domain[0] || *coord_t > domain[1]) { std::stringstream ss; @@ -420,9 +479,13 @@ bool Dimension::oob( return false; } -bool Dimension::oob(const void* coord, std::string* err_msg) const { +Status Dimension::oob(const void* coord) const { assert(oob_func_ != nullptr); - return oob_func_(this, coord, err_msg); + std::string err_msg; + auto ret = oob_func_(this, coord, &err_msg); + if (ret) + return LOG_STATUS(Status::DimensionError(err_msg)); + return Status::Ok(); } template @@ -497,15 +560,15 @@ double Dimension::overlap_ratio(const Range& r1, const Range& r2) const { template void Dimension::split_range( - const void* r, const ByteVecValue& v, Range* r1, Range* r2) { - assert(r != nullptr); + const Range& r, const ByteVecValue& v, Range* r1, Range* r2) { + assert(!r.empty()); assert(!v.empty()); assert(r1 != nullptr); assert(r2 != nullptr); auto max = std::numeric_limits::max(); bool int_domain = std::numeric_limits::is_integer; - auto r_t = (const T*)r; + auto r_t = (const T*)r.data(); auto v_t = *(const T*)(&v[0]); T ret[2]; @@ -518,7 +581,7 @@ void Dimension::split_range( } void Dimension::split_range( - const void* r, const ByteVecValue& v, Range* r1, Range* r2) const { + const Range& r, const ByteVecValue& v, Range* r1, Range* r2) const { assert(split_range_func_ != nullptr); split_range_func_(r, v, r1, r2); } @@ -549,11 +612,11 @@ uint64_t Dimension::tile_num(const Dimension* dim, const Range& range) { assert(!range.empty()); // Trivial cases - if (dim->tile_extent() == nullptr) + if (dim->tile_extent().empty()) return 1; - auto tile_extent = *(const T*)dim->tile_extent(); - auto dim_dom = (const T*)dim->domain(); + auto tile_extent = *(const T*)dim->tile_extent().data(); + auto dim_dom = (const T*)dim->domain().data(); auto r = (const T*)range.data(); uint64_t start = floor((r[0] - dim_dom[0]) / tile_extent); uint64_t end = floor((r[1] - dim_dom[0]) / tile_extent); @@ -587,7 +650,7 @@ bool Dimension::value_in_range(const void* value, const Range& range) const { // tile_extent (void* - type_size) Status Dimension::serialize(Buffer* buff) { // Sanity check - if (domain_ == nullptr) { + if (domain_.empty()) { return LOG_STATUS( Status::DimensionError("Cannot serialize dimension; Domain not set")); } @@ -598,83 +661,67 @@ Status Dimension::serialize(Buffer* buff) { RETURN_NOT_OK(buff->write(name_.c_str(), dimension_name_size)); // Write domain and tile extent - uint64_t domain_size = 2 * datatype_size(type_); - RETURN_NOT_OK(buff->write(domain_, domain_size)); + uint64_t domain_size = 2 * coord_size(); + RETURN_NOT_OK(buff->write(domain_.data(), domain_size)); - auto null_tile_extent = (uint8_t)((tile_extent_ == nullptr) ? 1 : 0); + auto null_tile_extent = (uint8_t)((tile_extent_.empty()) ? 1 : 0); RETURN_NOT_OK(buff->write(&null_tile_extent, sizeof(uint8_t))); - if (tile_extent_ != nullptr) - RETURN_NOT_OK(buff->write(tile_extent_, datatype_size(type_))); + if (!tile_extent_.empty()) + RETURN_NOT_OK(buff->write(tile_extent_.data(), tile_extent_.size())); return Status::Ok(); } Status Dimension::set_domain(const void* domain) { - std::free(domain_); + if (domain == nullptr) + return Status::Ok(); + return set_domain(Range(domain, 2 * coord_size())); +} - if (domain == nullptr) { - domain_ = nullptr; +Status Dimension::set_domain(const Range& domain) { + if (domain.empty()) return Status::Ok(); - } - uint64_t domain_size = 2 * datatype_size(type_); - domain_ = std::malloc(domain_size); - if (domain_ == nullptr) { - return LOG_STATUS( - Status::DimensionError("Cannot set domain; Memory allocation error")); - } - std::memcpy(domain_, domain, domain_size); + domain_ = domain; + RETURN_NOT_OK_ELSE(check_domain(), domain_.clear()); - auto st = check_domain(); - if (!st.ok()) { - std::free(domain_); - domain_ = nullptr; + return Status::Ok(); +} + +Status Dimension::set_tile_extent(const void* tile_extent) { + ByteVecValue te; + if (tile_extent != nullptr) { + auto size = coord_size(); + te.resize(size); + std::memcpy(&te[0], tile_extent, size); } - return st; + return set_tile_extent(te); } -Status Dimension::set_tile_extent(const void* tile_extent) { - if (domain_ == nullptr) +Status Dimension::set_tile_extent(const ByteVecValue& tile_extent) { + if (domain_.empty()) return LOG_STATUS(Status::DimensionError( "Cannot set tile extent; Domain must be set first")); // Note: this check was added in release 1.6.0. Older arrays may have been // serialized with a null extent, and so it is still supported internally. // But users can not construct dimension objects with null tile extents. - if (tile_extent == nullptr) + if (tile_extent.empty()) return LOG_STATUS(Status::DimensionError( "Cannot set tile extent; tile extent cannot be null")); - std::free(tile_extent_); - if (tile_extent == nullptr) { - tile_extent_ = nullptr; - return Status::Ok(); - } + tile_extent_ = tile_extent; - uint64_t type_size = datatype_size(type_); - tile_extent_ = std::malloc(type_size); - if (tile_extent_ == nullptr) { - return LOG_STATUS(Status::DimensionError( - "Cannot set tile extent; Memory allocation error")); - } - std::memcpy(tile_extent_, tile_extent, type_size); - - auto st = check_tile_extent(); - if (!st.ok()) { - std::free(domain_); - domain_ = nullptr; - } - - return st; + return check_tile_extent(); } Status Dimension::set_null_tile_extent_to_range() { // Applicable only to null extents - if (tile_extent_ != nullptr) + if (!tile_extent_.empty()) return Status::Ok(); - if (domain_ == nullptr) + if (domain_.empty()) return LOG_STATUS(Status::DimensionError( "Cannot set tile extent to domain range; Domain not set")); @@ -727,11 +774,11 @@ Status Dimension::set_null_tile_extent_to_range() { template Status Dimension::set_null_tile_extent_to_range() { // Applicable only to null extents - if (tile_extent_ != nullptr) + if (!tile_extent_.empty()) return Status::Ok(); // Calculate new tile extent equal to domain range - auto domain = (T*)domain_; + auto domain = (const T*)domain_.data(); T tile_extent = domain[1] - domain[0]; // Check overflow before adding 1 @@ -745,18 +792,13 @@ Status Dimension::set_null_tile_extent_to_range() { // Allocate space uint64_t type_size = sizeof(T); - tile_extent_ = std::malloc(type_size); - if (tile_extent_ == nullptr) { - return LOG_STATUS( - Status::DimensionError("Cannot set null tile extent to domain range; " - "Memory allocation error")); - } - std::memcpy(tile_extent_, &tile_extent, type_size); + tile_extent_.resize(type_size); + std::memcpy(&tile_extent_[0], &tile_extent, type_size); return Status::Ok(); } -void* Dimension::tile_extent() const { +const ByteVecValue& Dimension::tile_extent() const { return tile_extent_; } @@ -815,6 +857,39 @@ Status Dimension::check_domain() const { } } +template +Status Dimension::check_domain() const { + assert(!domain_.empty()); + auto domain = (const T*)domain_.data(); + + // Check for NAN and INF + if (std::is_integral::value) { + if (std::isinf(domain[0]) || std::isinf(domain[1])) + return LOG_STATUS( + Status::DimensionError("Domain check failed; domain contains NaN")); + if (std::isnan(domain[0]) || std::isnan(domain[1])) + return LOG_STATUS( + Status::DimensionError("Domain check failed; domain contains NaN")); + } + + // Upper bound should not be smaller than lower + if (domain[1] < domain[0]) + return LOG_STATUS( + Status::DimensionError("Domain check failed; Upper domain bound should " + "not be smaller than the lower one")); + + // Domain range must not exceed the maximum uint64_t number + // for integer domains + uint64_t diff = domain[1] - domain[0]; + if (std::is_integral::value && + diff == std::numeric_limits::max()) + return LOG_STATUS(Status::DimensionError( + "Domain check failed; Domain range (upper + lower + 1) is larger " + "than the maximum uint64 number")); + + return Status::Ok(); +} + Status Dimension::check_tile_extent() const { switch (type_) { case Datatype::INT32: @@ -859,12 +934,15 @@ Status Dimension::check_tile_extent() const { template Status Dimension::check_tile_extent() const { - if (domain_ == nullptr) + if (domain_.empty()) return LOG_STATUS( Status::DimensionError("Tile extent check failed; Domain not set")); - auto tile_extent = static_cast(tile_extent_); - auto domain = static_cast(domain_); + if (tile_extent_.empty()) + return Status::Ok(); + + auto tile_extent = (const T*)tile_extent_.data(); + auto domain = (const T*)domain_.data(); bool is_int = std::is_integral::value; // Check if tile extent is negative or 0 @@ -909,6 +987,190 @@ Status Dimension::check_tile_extent() const { return Status::Ok(); } +std::string Dimension::domain_str() const { + std::stringstream ss; + + if (domain_.empty()) + return ""; + + const int* domain_int32; + const int64_t* domain_int64; + const float* domain_float32; + const double* domain_float64; + const int8_t* domain_int8; + const uint8_t* domain_uint8; + const int16_t* domain_int16; + const uint16_t* domain_uint16; + const uint32_t* domain_uint32; + const uint64_t* domain_uint64; + + switch (type_) { + case Datatype::INT32: + domain_int32 = (const int32_t*)domain_.data(); + ss << "[" << domain_int32[0] << "," << domain_int32[1] << "]"; + return ss.str(); + case Datatype::INT64: + domain_int64 = (const int64_t*)domain_.data(); + ss << "[" << domain_int64[0] << "," << domain_int64[1] << "]"; + return ss.str(); + case Datatype::FLOAT32: + domain_float32 = (const float*)domain_.data(); + ss << "[" << domain_float32[0] << "," << domain_float32[1] << "]"; + return ss.str(); + case Datatype::FLOAT64: + domain_float64 = (const double*)domain_.data(); + ss << "[" << domain_float64[0] << "," << domain_float64[1] << "]"; + return ss.str(); + case Datatype::INT8: + domain_int8 = (const int8_t*)domain_.data(); + ss << "[" << int(domain_int8[0]) << "," << int(domain_int8[1]) << "]"; + return ss.str(); + case Datatype::UINT8: + domain_uint8 = (const uint8_t*)domain_.data(); + ss << "[" << int(domain_uint8[0]) << "," << int(domain_uint8[1]) << "]"; + return ss.str(); + case Datatype::INT16: + domain_int16 = (const int16_t*)domain_.data(); + ss << "[" << domain_int16[0] << "," << domain_int16[1] << "]"; + return ss.str(); + case Datatype::UINT16: + domain_uint16 = (const uint16_t*)domain_.data(); + ss << "[" << domain_uint16[0] << "," << domain_uint16[1] << "]"; + return ss.str(); + case Datatype::UINT32: + domain_uint32 = (const uint32_t*)domain_.data(); + ss << "[" << domain_uint32[0] << "," << domain_uint32[1] << "]"; + return ss.str(); + case Datatype::UINT64: + domain_uint64 = (const uint64_t*)domain_.data(); + ss << "[" << domain_uint64[0] << "," << domain_uint64[1] << "]"; + return ss.str(); + case Datatype::DATETIME_YEAR: + case Datatype::DATETIME_MONTH: + case Datatype::DATETIME_WEEK: + case Datatype::DATETIME_DAY: + case Datatype::DATETIME_HR: + case Datatype::DATETIME_MIN: + case Datatype::DATETIME_SEC: + case Datatype::DATETIME_MS: + case Datatype::DATETIME_US: + case Datatype::DATETIME_NS: + case Datatype::DATETIME_PS: + case Datatype::DATETIME_FS: + case Datatype::DATETIME_AS: + domain_int64 = (const int64_t*)domain_.data(); + ss << "[" << domain_int64[0] << "," << domain_int64[1] << "]"; + return ss.str(); + + case Datatype::CHAR: + case Datatype::STRING_ASCII: + case Datatype::STRING_UTF8: + case Datatype::STRING_UTF16: + case Datatype::STRING_UTF32: + case Datatype::STRING_UCS2: + case Datatype::STRING_UCS4: + case Datatype::ANY: + // Not supported domain type + assert(false); + return ""; + } + + assert(false); + return ""; +} + +std::string Dimension::tile_extent_str() const { + std::stringstream ss; + + if (tile_extent_.empty()) + return constants::null_str; + + const int* tile_extent_int32; + const int64_t* tile_extent_int64; + const float* tile_extent_float32; + const double* tile_extent_float64; + const int8_t* tile_extent_int8; + const uint8_t* tile_extent_uint8; + const int16_t* tile_extent_int16; + const uint16_t* tile_extent_uint16; + const uint32_t* tile_extent_uint32; + const uint64_t* tile_extent_uint64; + + switch (type_) { + case Datatype::INT32: + tile_extent_int32 = (const int32_t*)tile_extent_.data(); + ss << *tile_extent_int32; + return ss.str(); + case Datatype::INT64: + tile_extent_int64 = (const int64_t*)tile_extent_.data(); + ss << *tile_extent_int64; + return ss.str(); + case Datatype::FLOAT32: + tile_extent_float32 = (const float*)tile_extent_.data(); + ss << *tile_extent_float32; + return ss.str(); + case Datatype::FLOAT64: + tile_extent_float64 = (const double*)tile_extent_.data(); + ss << *tile_extent_float64; + return ss.str(); + case Datatype::INT8: + tile_extent_int8 = (const int8_t*)tile_extent_.data(); + ss << int(*tile_extent_int8); + return ss.str(); + case Datatype::UINT8: + tile_extent_uint8 = (const uint8_t*)tile_extent_.data(); + ss << int(*tile_extent_uint8); + return ss.str(); + case Datatype::INT16: + tile_extent_int16 = (const int16_t*)tile_extent_.data(); + ss << *tile_extent_int16; + return ss.str(); + case Datatype::UINT16: + tile_extent_uint16 = (const uint16_t*)tile_extent_.data(); + ss << *tile_extent_uint16; + return ss.str(); + case Datatype::UINT32: + tile_extent_uint32 = (const uint32_t*)tile_extent_.data(); + ss << *tile_extent_uint32; + return ss.str(); + case Datatype::UINT64: + tile_extent_uint64 = (const uint64_t*)tile_extent_.data(); + ss << *tile_extent_uint64; + return ss.str(); + case Datatype::DATETIME_YEAR: + case Datatype::DATETIME_MONTH: + case Datatype::DATETIME_WEEK: + case Datatype::DATETIME_DAY: + case Datatype::DATETIME_HR: + case Datatype::DATETIME_MIN: + case Datatype::DATETIME_SEC: + case Datatype::DATETIME_MS: + case Datatype::DATETIME_US: + case Datatype::DATETIME_NS: + case Datatype::DATETIME_PS: + case Datatype::DATETIME_FS: + case Datatype::DATETIME_AS: + tile_extent_int64 = (const int64_t*)tile_extent_.data(); + ss << *tile_extent_int64; + return ss.str(); + + case Datatype::CHAR: + case Datatype::STRING_ASCII: + case Datatype::STRING_UTF8: + case Datatype::STRING_UTF16: + case Datatype::STRING_UTF32: + case Datatype::STRING_UCS2: + case Datatype::STRING_UCS4: + case Datatype::ANY: + // Not supported domain type + assert(false); + return ""; + } + + assert(false); + return ""; +} + void Dimension::set_crop_range_func() { switch (type_) { case Datatype::INT32: @@ -1068,6 +1330,112 @@ void Dimension::set_ceil_to_tile_func() { } } +void Dimension::set_check_range_func() { + switch (type_) { + case Datatype::INT32: + check_range_func_ = check_range; + break; + case Datatype::INT64: + check_range_func_ = check_range; + break; + case Datatype::INT8: + check_range_func_ = check_range; + break; + case Datatype::UINT8: + check_range_func_ = check_range; + break; + case Datatype::INT16: + check_range_func_ = check_range; + break; + case Datatype::UINT16: + check_range_func_ = check_range; + break; + case Datatype::UINT32: + check_range_func_ = check_range; + break; + case Datatype::UINT64: + check_range_func_ = check_range; + break; + case Datatype::FLOAT32: + check_range_func_ = check_range; + break; + case Datatype::FLOAT64: + check_range_func_ = check_range; + break; + case Datatype::DATETIME_YEAR: + case Datatype::DATETIME_MONTH: + case Datatype::DATETIME_WEEK: + case Datatype::DATETIME_DAY: + case Datatype::DATETIME_HR: + case Datatype::DATETIME_MIN: + case Datatype::DATETIME_SEC: + case Datatype::DATETIME_MS: + case Datatype::DATETIME_US: + case Datatype::DATETIME_NS: + case Datatype::DATETIME_PS: + case Datatype::DATETIME_FS: + case Datatype::DATETIME_AS: + check_range_func_ = check_range; + break; + default: + check_range_func_ = nullptr; + break; + } +} + +void Dimension::set_coincides_with_tiles_func() { + switch (type_) { + case Datatype::INT32: + coincides_with_tiles_func_ = coincides_with_tiles; + break; + case Datatype::INT64: + coincides_with_tiles_func_ = coincides_with_tiles; + break; + case Datatype::INT8: + coincides_with_tiles_func_ = coincides_with_tiles; + break; + case Datatype::UINT8: + coincides_with_tiles_func_ = coincides_with_tiles; + break; + case Datatype::INT16: + coincides_with_tiles_func_ = coincides_with_tiles; + break; + case Datatype::UINT16: + coincides_with_tiles_func_ = coincides_with_tiles; + break; + case Datatype::UINT32: + coincides_with_tiles_func_ = coincides_with_tiles; + break; + case Datatype::UINT64: + coincides_with_tiles_func_ = coincides_with_tiles; + break; + case Datatype::FLOAT32: + coincides_with_tiles_func_ = coincides_with_tiles; + break; + case Datatype::FLOAT64: + coincides_with_tiles_func_ = coincides_with_tiles; + break; + case Datatype::DATETIME_YEAR: + case Datatype::DATETIME_MONTH: + case Datatype::DATETIME_WEEK: + case Datatype::DATETIME_DAY: + case Datatype::DATETIME_HR: + case Datatype::DATETIME_MIN: + case Datatype::DATETIME_SEC: + case Datatype::DATETIME_MS: + case Datatype::DATETIME_US: + case Datatype::DATETIME_NS: + case Datatype::DATETIME_PS: + case Datatype::DATETIME_FS: + case Datatype::DATETIME_AS: + coincides_with_tiles_func_ = coincides_with_tiles; + break; + default: + coincides_with_tiles_func_ = nullptr; + break; + } +} + void Dimension::set_compute_mbr_func() { switch (type_) { case Datatype::INT32: diff --git a/tiledb/sm/array_schema/dimension.h b/tiledb/sm/array_schema/dimension.h index 3c2d93b9391c..d5208234d654 100644 --- a/tiledb/sm/array_schema/dimension.h +++ b/tiledb/sm/array_schema/dimension.h @@ -76,7 +76,7 @@ class Dimension { explicit Dimension(const Dimension* dim); /** Destructor. */ - ~Dimension(); + ~Dimension() = default; /* ********************************* */ /* API */ @@ -101,7 +101,7 @@ class Dimension { Status deserialize(ConstBuffer* buff, Datatype type); /** Returns the domain. */ - void* domain() const; + const Range& domain() const; /** Dumps the dimension contents in ASCII form in the selected output. */ void dump(FILE* out) const; @@ -129,6 +129,31 @@ class Dimension { static void ceil_to_tile( const Dimension* dim, const Range& r, uint64_t tile_num, ByteVecValue* v); + /** + * Performs correctness checks on the input range. + * + * Specifically, it checks + * - if the lower range bound is larger than the upper + * - if the range falls outside the dimension domain + * - for real domains, if any range bound is NaN + */ + Status check_range(const Range& range) const; + + /** + * Performs correctness checks on the input range. Returns `true` + * upon error and stores an error message to `err_msg`. + */ + template + static bool check_range( + const Dimension* dim, const Range& range, std::string* err_msg); + + /** Returns true if the input range coincides with tile boundaries. */ + bool coincides_with_tiles(const Range& r) const; + + /** Returns true if the input range coincides with tile boundaries. */ + template + static bool coincides_with_tiles(const Dimension* dim, const Range& r); + /** * Computed the minimum bounding range of the values stored in * `tile`. @@ -150,7 +175,7 @@ class Dimension { /** * Crops the input 1D range such that it does not exceed the - * input dimension domain. + * dimension domain. */ template static void crop_range(const Dimension* dim, Range* range); @@ -198,16 +223,10 @@ class Dimension { static void expand_to_tile(const Dimension* dim, Range* range); /** - * Returns true if the input coordinate is out-of-bounds with respect + * Returns error if the input coordinate is out-of-bounds with respect * to the dimension domain. - * - * @param coord The coordinate to be checked. It will properly be - * type-cast to the dimension datatype. - * @param err_msg An error message to be retrieved in case the function - * returns true. - * @return True if the input coordinates is out-of-bounds. */ - bool oob(const void* coord, std::string* err_msg) const; + Status oob(const void* coord) const; /** * Returns true if the input coordinate is out-of-bounds with respect @@ -247,12 +266,12 @@ class Dimension { /** Splits `r` at point `v`, producing 1D ranges `r1` and `r2`. */ void split_range( - const void* r, const ByteVecValue& v, Range* r1, Range* r2) const; + const Range& r, const ByteVecValue& v, Range* r1, Range* r2) const; /** Splits `r` at point `v`, producing 1D ranges `r1` and `r2`. */ template static void split_range( - const void* r, const ByteVecValue& v, Range* r1, Range* r2); + const Range& r, const ByteVecValue& v, Range* r1, Range* r2); /** * Computes the splitting point `v` of `r`, and sets `unsplittable` @@ -294,12 +313,20 @@ class Dimension { /** Sets the domain. */ Status set_domain(const void* domain); + /** Sets the domain. */ + Status set_domain(const Range& domain); + /** Sets the tile extent. */ Status set_tile_extent(const void* tile_extent); + /** Sets the tile extent. */ + Status set_tile_extent(const ByteVecValue& tile_extent); + /** * If the tile extent is `null`, this function sets the * the tile extent to the dimension domain range. + * + * @note This is applicable only to dense arrays. */ Status set_null_tile_extent_to_range(); @@ -308,12 +335,14 @@ class Dimension { * the tile extent to the dimension domain range. * * @tparam T The dimension type. + * + * @note This is applicable only to dense arrays. */ template Status set_null_tile_extent_to_range(); /** Returns the tile extent. */ - void* tile_extent() const; + const ByteVecValue& tile_extent() const; /** Returns the dimension type. */ Datatype type() const; @@ -327,13 +356,13 @@ class Dimension { /* ********************************* */ /** The dimension domain. */ - void* domain_; + Range domain_; /** The dimension name. */ std::string name_; /** The tile extent of the dimension. */ - void* tile_extent_; + ByteVecValue tile_extent_; /** The dimension type. */ Datatype type_; @@ -345,6 +374,20 @@ class Dimension { std::function ceil_to_tile_func_; + /** + * Stores the appropriate templated check_range() function based on the + * dimension datatype. + */ + std::function + check_range_func_; + + /** + * Stores the appropriate templated coincides_with_tiles() function based on + * the dimension datatype. + */ + std::function + coincides_with_tiles_func_; + /** * Stores the appropriate templated compute_mbr() function based on the * dimension datatype. @@ -410,7 +453,7 @@ class Dimension { * Stores the appropriate templated split_range() function based on the * dimension datatype. */ - std::function + std::function split_range_func_; /** @@ -439,83 +482,32 @@ class Dimension { /** Returns an error if the set domain is invalid. */ Status check_domain() const; - /** - * Returns an error if the set domain is invalid. - * Applicable only to integer domains - * - * @tparam T The type of the dimension domain. - * @return Status - */ - template < - typename T, - typename std::enable_if::value>::type* = nullptr> - Status check_domain() const { - assert(domain_ != nullptr); - auto domain = static_cast(domain_); - - // Upper bound should not be smaller than lower - if (domain[1] < domain[0]) - return LOG_STATUS(Status::DimensionError( - "Domain check failed; Upper domain bound should " - "not be smaller than the lower one")); - - // Domain range must not exceed the maximum uint64_t number - // for integer domains - uint64_t diff = domain[1] - domain[0]; - if (diff == std::numeric_limits::max()) - return LOG_STATUS(Status::DimensionError( - "Domain check failed; Domain range (upper + lower + 1) is larger " - "than the maximum uint64 number")); - - return Status::Ok(); - } - - /** - * Returns an error if the set domain is invalid. - * Applicable only to real domains. - * - * @tparam T The type of the dimension domain. - * @return Status - */ - template < - typename T, - typename std::enable_if::value>::type* = nullptr> - Status check_domain() const { - assert(domain_ != nullptr); - auto domain = static_cast(domain_); - - // Check for NAN and INF - if (std::isinf(domain[0]) || std::isinf(domain[1])) - return LOG_STATUS( - Status::DimensionError("Domain check failed; domain contains NaN")); - if (std::isnan(domain[0]) || std::isnan(domain[1])) - return LOG_STATUS( - Status::DimensionError("Domain check failed; domain contains NaN")); - - // Upper bound should not be smaller than lower - if (domain[1] < domain[0]) - return LOG_STATUS(Status::DimensionError( - "Domain check failed; Upper domain bound should " - "not be smaller than the lower one")); - - return Status::Ok(); - } + /** Returns an error if the set domain is invalid. */ + template + Status check_domain() const; /** Returns an error if the set tile extent is invalid. */ Status check_tile_extent() const; - /** - * Returns an error if the set tile extent is invalid. - * - * @tparam T The type of the dimension domain. - * @return Status - */ + /** Returns an error if the set tile extent is invalid. */ template Status check_tile_extent() const; + /** Returns the domain in string format. */ + std::string domain_str() const; + + /** Returns the tile extent in string format. */ + std::string tile_extent_str() const; + /** Sets the templated ceil_to_tile() function. */ void set_ceil_to_tile_func(); + /** Sets the templated check_range() function. */ + void set_check_range_func(); + + /** Sets the templated coincides_with_tiles() function. */ + void set_coincides_with_tiles_func(); + /** Sets the templated compute_mbr() function. */ void set_compute_mbr_func(); diff --git a/tiledb/sm/array_schema/domain.cc b/tiledb/sm/array_schema/domain.cc index 86ce4b75bede..8a3d66dd7664 100644 --- a/tiledb/sm/array_schema/domain.cc +++ b/tiledb/sm/array_schema/domain.cc @@ -61,9 +61,6 @@ Domain::Domain(Datatype type) tile_order_ = Layout::ROW_MAJOR; dim_num_ = 0; cell_num_per_tile_ = 0; - domain_ = nullptr; - tile_extents_ = nullptr; - tile_domain_ = nullptr; } Domain::Domain(const Domain* domain) { @@ -77,43 +74,14 @@ Domain::Domain(const Domain* domain) { for (auto dim : domain->dimensions_) dimensions_.emplace_back(new Dimension(dim)); - uint64_t coords_size = dim_num_ * datatype_size(type_); tile_order_ = domain->tile_order_; tile_offsets_col_ = domain->tile_offsets_col_; tile_offsets_row_ = domain->tile_offsets_row_; - - if (domain->domain_ == nullptr) { - domain_ = nullptr; - } else { - domain_ = std::malloc(2 * coords_size); - std::memcpy(domain_, domain->domain_, 2 * coords_size); - } - - if (domain->tile_domain_ == nullptr) { - tile_domain_ = nullptr; - } else { - tile_domain_ = std::malloc(2 * coords_size); - std::memcpy(tile_domain_, domain->tile_domain_, 2 * coords_size); - } - - if (domain->tile_extents_ == nullptr) { - tile_extents_ = nullptr; - } else { - tile_extents_ = std::malloc(coords_size); - std::memcpy(tile_extents_, domain->tile_extents_, coords_size); - } } Domain::~Domain() { for (auto dim : dimensions_) delete dim; - - std::free(tile_extents_); - tile_extents_ = nullptr; - std::free(domain_); - domain_ = nullptr; - std::free(tile_domain_); - tile_domain_ = nullptr; } /* ********************************* */ @@ -153,48 +121,6 @@ Status Domain::add_dimension(const Dimension* dim) { return Status::Ok(); } -uint64_t Domain::cell_num(const void* domain) const { - switch (type_) { - case Datatype::INT32: - return cell_num(static_cast(domain)); - case Datatype::INT64: - return cell_num(static_cast(domain)); - case Datatype::INT8: - return cell_num(static_cast(domain)); - case Datatype::UINT8: - return cell_num(static_cast(domain)); - case Datatype::INT16: - return cell_num(static_cast(domain)); - case Datatype::UINT16: - return cell_num(static_cast(domain)); - case Datatype::UINT32: - return cell_num(static_cast(domain)); - case Datatype::UINT64: - return cell_num(static_cast(domain)); - case Datatype::FLOAT32: - return cell_num(static_cast(domain)); - case Datatype::FLOAT64: - return cell_num(static_cast(domain)); - case Datatype::DATETIME_YEAR: - case Datatype::DATETIME_MONTH: - case Datatype::DATETIME_WEEK: - case Datatype::DATETIME_DAY: - case Datatype::DATETIME_HR: - case Datatype::DATETIME_MIN: - case Datatype::DATETIME_SEC: - case Datatype::DATETIME_MS: - case Datatype::DATETIME_US: - case Datatype::DATETIME_NS: - case Datatype::DATETIME_PS: - case Datatype::DATETIME_FS: - case Datatype::DATETIME_AS: - return cell_num(static_cast(domain)); - default: - assert(false); - return 0; - } -} - template uint64_t Domain::cell_num(const T* domain) const { if (&typeid(T) == &typeid(float) || &typeid(T) == &typeid(double)) @@ -306,22 +232,15 @@ unsigned int Domain::dim_num() const { return dim_num_; } -const void* Domain::domain() const { - return domain_; -} - -const void* Domain::domain(unsigned int i) const { - if (i > dim_num_) - return nullptr; +const Range& Domain::domain(unsigned i) const { + assert(i < dim_num_); return dimensions_[i]->domain(); } -NDRange Domain::domain_ndrange() const { +NDRange Domain::domain() const { NDRange ret(dim_num_); - for (unsigned d = 0; d < dim_num_; ++d) { - Range r(dimensions_[d]->domain(), 2 * dimensions_[d]->coord_size()); - ret[d] = std::move(r); - } + for (unsigned d = 0; d < dim_num_; ++d) + ret[d] = dimensions_[d]->domain(); return ret; } @@ -375,10 +294,11 @@ void Domain::expand_to_tiles(NDRange* ndrange) const { template void Domain::get_tile_coords(const T* coords, T* tile_coords) const { - auto domain = (T*)domain_; - auto tile_extents = (T*)tile_extents_; - for (unsigned i = 0; i < dim_num_; i++) - tile_coords[i] = (coords[i] - domain[2 * i]) / tile_extents[i]; + for (unsigned d = 0; d < dim_num_; d++) { + auto tile_extent = *(const T*)this->tile_extent(d).data(); + auto dim_dom = (const T*)domain(d).data(); + tile_coords[d] = (coords[d] - dim_dom[0]) / tile_extent; + } } template @@ -401,28 +321,27 @@ template void Domain::get_end_of_cell_slab( T* subarray, T* start, Layout layout, T* end) const { if (layout == Layout::GLOBAL_ORDER || layout == cell_order_) { - auto domain = (T*)domain_; - auto tile_extents = (T*)tile_extents_; + auto dim_dom = (const T*)domain(dim_num_ - 1).data(); + auto tile_extent = *(const T*)this->tile_extent(dim_num_ - 1).data(); if (cell_order_ == Layout::ROW_MAJOR) { - for (unsigned i = 0; i < dim_num_; ++i) - end[i] = start[i]; - end[dim_num_ - 1] += tile_extents[dim_num_ - 1] - - ((start[dim_num_ - 1] - domain[2 * (dim_num_ - 1)]) % - tile_extents[dim_num_ - 1]) - - 1; + for (unsigned d = 0; d < dim_num_; ++d) + end[d] = start[d]; + end[dim_num_ - 1] += + tile_extent - ((start[dim_num_ - 1] - dim_dom[0]) % tile_extent) - 1; end[dim_num_ - 1] = std::min(end[dim_num_ - 1], subarray[2 * (dim_num_ - 1) + 1]); } else { - for (unsigned i = 0; i < dim_num_; ++i) - end[i] = start[i]; - end[0] += - tile_extents[0] - ((start[0] - domain[0]) % tile_extents[0]) - 1; + auto dim_dom = (const T*)domain(0).data(); + auto tile_extent = *(const T*)this->tile_extent(0).data(); + for (unsigned d = 0; d < dim_num_; ++d) + end[d] = start[d]; + end[0] += tile_extent - ((start[0] - dim_dom[0]) % tile_extent) - 1; end[0] = std::min(end[0], subarray[1]); } } else { - for (unsigned i = 0; i < dim_num_; ++i) - end[i] = start[i]; + for (unsigned d = 0; d < dim_num_; ++d) + end[d] = start[d]; (void)subarray; } } @@ -452,21 +371,16 @@ void Domain::get_next_tile_coords( template void Domain::get_tile_domain(const T* subarray, T* tile_subarray) const { - auto domain = static_cast(domain_); - auto tile_extents = static_cast(tile_extents_); - - for (unsigned int i = 0; i < dim_num_; ++i) { - tile_subarray[2 * i] = (subarray[2 * i] - domain[2 * i]) / tile_extents[i]; - tile_subarray[2 * i + 1] = - (subarray[2 * i + 1] - domain[2 * i]) / tile_extents[i]; + for (unsigned d = 0; d < dim_num_; ++d) { + auto dim_dom = (const T*)domain(d).data(); + auto tile_extent = *(const T*)this->tile_extent(d).data(); + tile_subarray[2 * d] = (subarray[2 * d] - dim_dom[0]) / tile_extent; + tile_subarray[2 * d + 1] = (subarray[2 * d + 1] - dim_dom[0]) / tile_extent; } } template uint64_t Domain::get_tile_pos(const T* tile_coords) const { - // Sanity check - assert(tile_extents_); - // Invoke the proper function based on the tile order if (tile_order_ == Layout::ROW_MAJOR) return get_tile_pos_row(tile_coords); @@ -477,9 +391,6 @@ uint64_t Domain::get_tile_pos(const T* tile_coords) const { template uint64_t Domain::get_tile_pos(const T* domain, const T* tile_coords) const { - // Sanity check - assert(tile_extents_); - // Invoke the proper function based on the tile order if (tile_order_ == Layout::ROW_MAJOR) return get_tile_pos_row(domain, tile_coords); @@ -489,27 +400,23 @@ uint64_t Domain::get_tile_pos(const T* domain, const T* tile_coords) const { template void Domain::get_tile_subarray(const T* tile_coords, T* tile_subarray) const { - // For easy reference - auto domain = static_cast(domain_); - auto tile_extents = static_cast(tile_extents_); - - for (unsigned int i = 0; i < dim_num_; ++i) { - tile_subarray[2 * i] = tile_coords[i] * tile_extents[i] + domain[2 * i]; - tile_subarray[2 * i + 1] = - (tile_coords[i] + 1) * tile_extents[i] - 1 + domain[2 * i]; + for (unsigned d = 0; d < dim_num_; ++d) { + auto dim_dom = (const T*)domain(d).data(); + auto tile_extent = *(const T*)this->tile_extent(d).data(); + tile_subarray[2 * d] = tile_coords[d] * tile_extent + dim_dom[0]; + tile_subarray[2 * d + 1] = + (tile_coords[d] + 1) * tile_extent - 1 + dim_dom[0]; } } template void Domain::get_tile_subarray( const T* domain, const T* tile_coords, T* tile_subarray) const { - // For easy reference - auto tile_extents = static_cast(tile_extents_); - - for (unsigned int i = 0; i < dim_num_; ++i) { - tile_subarray[2 * i] = tile_coords[i] * tile_extents[i] + domain[2 * i]; - tile_subarray[2 * i + 1] = - (tile_coords[i] + 1) * tile_extents[i] - 1 + domain[2 * i]; + for (unsigned d = 0; d < dim_num_; ++d) { + auto tile_extent = *(const T*)this->tile_extent(d).data(); + tile_subarray[2 * d] = tile_coords[d] * tile_extent + domain[2 * d]; + tile_subarray[2 * d + 1] = + (tile_coords[d] + 1) * tile_extent - 1 + domain[2 * d]; } } @@ -531,34 +438,9 @@ Status Domain::init(Layout cell_order, Layout tile_order) { cell_order_ = cell_order; tile_order_ = tile_order; - // Set domain - uint64_t coord_size = datatype_size(type_); - uint64_t coords_size = dim_num_ * coord_size; - std::free(domain_); - domain_ = std::malloc(dim_num_ * 2 * coord_size); - auto domain = (char*)domain_; - for (unsigned int i = 0; i < dim_num_; ++i) { - std::memcpy(domain + i * 2 * coord_size, this->domain(i), 2 * coord_size); - } - - // Set tile extents - std::free(tile_extents_); - if (null_tile_extents()) { - tile_extents_ = nullptr; - } else { - tile_extents_ = std::malloc(coords_size); - auto tile_extents = (char*)tile_extents_; - for (unsigned int i = 0; i < dim_num_; ++i) { - std::memcpy(tile_extents + i * coord_size, tile_extent(i), coord_size); - } - } - // Compute number of cells per tile compute_cell_num_per_tile(); - // Compute tile domain - compute_tile_domain(); - // Compute tile offsets compute_tile_offsets(); @@ -569,8 +451,8 @@ Status Domain::init(Layout cell_order, Layout tile_order) { } bool Domain::null_tile_extents() const { - for (unsigned int i = 0; i < dim_num_; ++i) { - if (tile_extent(i) == nullptr) + for (unsigned d = 0; d < dim_num_; ++d) { + if (tile_extent(d).empty()) return true; } @@ -611,24 +493,26 @@ uint64_t Domain::stride(Layout subarray_layout) const { uint64_t ret = 1; if (cell_order_ == Layout::ROW_MAJOR) { for (unsigned i = 1; i < dim_num_; ++i) - ret *= *(const T*)tile_extent(i); + ret *= *(const T*)tile_extent(i).data(); } else { // COL_MAJOR for (unsigned i = 0; i < dim_num_ - 1; ++i) - ret *= *(const T*)tile_extent(i); + ret *= *(const T*)tile_extent(i).data(); } return ret; } -const void* Domain::tile_extent(unsigned int i) const { - if (i > dim_num_) - return nullptr; - +const ByteVecValue& Domain::tile_extent(unsigned i) const { + assert(i < dim_num_); return dimensions_[i]->tile_extent(); } -const void* Domain::tile_extents() const { - return tile_extents_; +std::vector Domain::tile_extents() const { + std::vector ret(dim_num_); + for (unsigned d = 0; d < dim_num_; ++d) + ret[d] = tile_extent(d); + + return ret; } uint64_t Domain::tile_num(const NDRange& ndrange) const { @@ -641,7 +525,6 @@ uint64_t Domain::tile_num(const NDRange& ndrange) const { uint64_t Domain::cell_num(const NDRange& ndrange) const { assert(!ndrange.empty()); - uint64_t cell_num = 1, range; for (unsigned d = 0; d < dim_num_; ++d) { range = dimensions_[d]->domain_range(ndrange[d]); @@ -705,15 +588,15 @@ double Domain::overlap_ratio(const NDRange& r1, const NDRange& r2) const { template int Domain::tile_order_cmp( const Dimension* dim, const void* coord_a, const void* coord_b) { - auto tile_extent = (T*)dim->tile_extent(); - if (tile_extent == nullptr) + if (dim->tile_extent().empty()) return 0; + auto tile_extent = *(const T*)dim->tile_extent().data(); auto ca = (T*)coord_a; auto cb = (T*)coord_b; - auto domain = (T*)dim->domain(); - auto ta = (T)((*ca - domain[0]) / *tile_extent); - auto tb = (T)((*cb - domain[0]) / *tile_extent); + auto domain = (const T*)dim->domain().data(); + auto ta = (T)((*ca - domain[0]) / tile_extent); + auto tb = (T)((*cb - domain[0]) / tile_extent); if (ta < tb) return -1; if (ta > tb) @@ -823,65 +706,13 @@ void Domain::compute_cell_num_per_tile() { return; // Applicable only to non-NULL space tiles - if (tile_extents_ == nullptr) + if (null_tile_extents()) return; cell_num_per_tile_ = 1; - auto tile_extents = static_cast(tile_extents_); - for (unsigned int i = 0; i < dim_num_; ++i) - cell_num_per_tile_ *= tile_extents[i]; -} - -void Domain::compute_tile_domain() { - // Invoke the proper templated function - switch (type_) { - case Datatype::INT32: - compute_tile_domain(); - break; - case Datatype::INT64: - compute_tile_domain(); - break; - case Datatype::FLOAT32: - compute_tile_domain(); - break; - case Datatype::FLOAT64: - compute_tile_domain(); - break; - case Datatype::INT8: - compute_tile_domain(); - break; - case Datatype::UINT8: - compute_tile_domain(); - break; - case Datatype::INT16: - compute_tile_domain(); - break; - case Datatype::UINT16: - compute_tile_domain(); - break; - case Datatype::UINT32: - compute_tile_domain(); - break; - case Datatype::UINT64: - compute_tile_domain(); - break; - case Datatype::DATETIME_YEAR: - case Datatype::DATETIME_MONTH: - case Datatype::DATETIME_WEEK: - case Datatype::DATETIME_DAY: - case Datatype::DATETIME_HR: - case Datatype::DATETIME_MIN: - case Datatype::DATETIME_SEC: - case Datatype::DATETIME_MS: - case Datatype::DATETIME_US: - case Datatype::DATETIME_NS: - case Datatype::DATETIME_PS: - case Datatype::DATETIME_FS: - case Datatype::DATETIME_AS: - compute_tile_domain(); - break; - default: - assert(0); + for (unsigned d = 0; d < dim_num_; ++d) { + auto tile_extent = *(const T*)this->tile_extent(d).data(); + cell_num_per_tile_ *= tile_extent; } } @@ -960,32 +791,6 @@ void Domain::set_tile_cell_order_cmp_funcs() { } } -template -void Domain::compute_tile_domain() { - if (tile_extents_ == nullptr) - return; - - // For easy reference - auto domain = static_cast(domain_); - auto tile_extents = static_cast(tile_extents_); - - // Allocate space for the tile domain - assert(tile_domain_ == nullptr); - tile_domain_ = std::malloc(2 * dim_num_ * sizeof(T)); - - // For easy reference - auto tile_domain = static_cast(tile_domain_); - T tile_num; // Per dimension - - // Calculate tile domain - for (unsigned int i = 0; i < dim_num_; ++i) { - tile_num = - ceil(double(domain[2 * i + 1] - domain[2 * i] + 1) / tile_extents[i]); - tile_domain[2 * i] = 0; - tile_domain[2 * i + 1] = tile_num - 1; - } -} - void Domain::compute_tile_offsets() { // Invoke the proper templated function switch (type_) { @@ -1046,21 +851,19 @@ void Domain::compute_tile_offsets() { // return; // Applicable only to non-NULL space tiles - if (tile_extents_ == nullptr) + if (null_tile_extents()) return; // For easy reference - auto domain = static_cast(domain_); - auto tile_extents = static_cast(tile_extents_); uint64_t tile_num; // Per dimension // Calculate tile offsets for column-major tile order tile_offsets_col_.push_back(1); if (dim_num_ > 1) { - for (unsigned int i = 1; i < dim_num_; ++i) { - tile_num = utils::math::ceil( - domain[2 * (i - 1) + 1] - domain[2 * (i - 1)] + 1, - tile_extents[i - 1]); + for (unsigned d = 1; d < dim_num_; ++d) { + auto dim_dom = (const T*)domain(d - 1).data(); + auto tile_extent = *(const T*)this->tile_extent(d - 1).data(); + tile_num = utils::math::ceil(dim_dom[1] - dim_dom[0] + 1, tile_extent); tile_offsets_col_.push_back(tile_offsets_col_.back() * tile_num); } } @@ -1068,12 +871,12 @@ void Domain::compute_tile_offsets() { // Calculate tile offsets for row-major tile order tile_offsets_row_.push_back(1); if (dim_num_ > 1) { - for (unsigned int i = dim_num_ - 2;; --i) { - tile_num = utils::math::ceil( - domain[2 * (i + 1) + 1] - domain[2 * (i + 1)] + 1, - tile_extents[i + 1]); + for (unsigned d = dim_num_ - 2;; --d) { + auto dim_dom = (const T*)domain(d + 1).data(); + auto tile_extent = *(const T*)this->tile_extent(d + 1).data(); + tile_num = utils::math::ceil(dim_dom[1] - dim_dom[0] + 1, tile_extent); tile_offsets_row_.push_back(tile_offsets_row_.back() * tile_num); - if (i == 0) + if (d == 0) break; } } @@ -1089,8 +892,8 @@ std::string Domain::default_dimension_name(unsigned int i) const { template uint64_t Domain::get_cell_pos_col(const T* coords) const { // For easy reference - auto domain = static_cast(domain_); - auto tile_extents = static_cast(tile_extents_); + const T *dim_dom_0, *dim_dom_1, *dim_dom_2; + T tile_extent_0, tile_extent_1, tile_extent_2; uint64_t pos = 0; T coords_norm; // Normalized coordinates inside the tile @@ -1099,39 +902,53 @@ uint64_t Domain::get_cell_pos_col(const T* coords) const { // loop. switch (dim_num_) { case 1: - coords_norm = (coords[0] - domain[2 * 0]); - coords_norm -= (coords_norm / tile_extents[0]) * tile_extents[0]; + dim_dom_0 = (const T*)domain(0).data(); + tile_extent_0 = *(const T*)this->tile_extent(0).data(); + coords_norm = (coords[0] - dim_dom_0[0]); + coords_norm -= (coords_norm / tile_extent_0) * tile_extent_0; pos += coords_norm * 1; break; case 2: - coords_norm = (coords[0] - domain[2 * 0]); - coords_norm -= (coords_norm / tile_extents[0]) * tile_extents[0]; + dim_dom_0 = (const T*)domain(0).data(); + tile_extent_0 = *(const T*)this->tile_extent(0).data(); + coords_norm = (coords[0] - dim_dom_0[0]); + coords_norm -= (coords_norm / tile_extent_0) * tile_extent_0; pos += coords_norm * 1; - coords_norm = (coords[1] - domain[2 * 1]); - coords_norm -= (coords_norm / tile_extents[1]) * tile_extents[1]; - pos += coords_norm * 1 * tile_extents[0]; + dim_dom_1 = (const T*)domain(1).data(); + tile_extent_1 = *(const T*)this->tile_extent(1).data(); + coords_norm = (coords[1] - dim_dom_1[0]); + coords_norm -= (coords_norm / tile_extent_1) * tile_extent_1; + pos += coords_norm * 1 * tile_extent_0; break; case 3: - coords_norm = (coords[0] - domain[2 * 0]); - coords_norm -= (coords_norm / tile_extents[0]) * tile_extents[0]; + dim_dom_0 = (const T*)domain(0).data(); + tile_extent_0 = *(const T*)this->tile_extent(0).data(); + coords_norm = (coords[0] - dim_dom_0[0]); + coords_norm -= (coords_norm / tile_extent_0) * tile_extent_0; pos += coords_norm * 1; - coords_norm = (coords[1] - domain[2 * 1]); - coords_norm -= (coords_norm / tile_extents[1]) * tile_extents[1]; - pos += coords_norm * 1 * tile_extents[0]; - - coords_norm = (coords[2] - domain[2 * 2]); - coords_norm -= (coords_norm / tile_extents[2]) * tile_extents[2]; - pos += coords_norm * 1 * tile_extents[0] * tile_extents[1]; + dim_dom_1 = (const T*)domain(1).data(); + tile_extent_1 = *(const T*)this->tile_extent(1).data(); + coords_norm = (coords[1] - dim_dom_1[0]); + coords_norm -= (coords_norm / tile_extent_1) * tile_extent_1; + pos += coords_norm * 1 * tile_extent_0; + + dim_dom_2 = (const T*)domain(2).data(); + tile_extent_2 = *(const T*)this->tile_extent(2).data(); + coords_norm = (coords[2] - dim_dom_2[0]); + coords_norm -= (coords_norm / tile_extent_2) * tile_extent_2; + pos += coords_norm * 1 * tile_extent_0 * tile_extent_1; break; default: { uint64_t cell_offset = 1; - for (unsigned int i = 0; i < dim_num_; ++i) { - coords_norm = (coords[i] - domain[2 * i]); - coords_norm -= (coords_norm / tile_extents[i]) * tile_extents[i]; + for (unsigned d = 0; d < dim_num_; ++d) { + auto dim_dom = (const T*)domain(d).data(); + auto tile_extent = *(const T*)this->tile_extent(d).data(); + coords_norm = (coords[d] - dim_dom[0]); + coords_norm -= (coords_norm / tile_extent) * tile_extent; pos += coords_norm * cell_offset; - cell_offset *= tile_extents[i]; + cell_offset *= tile_extent; } break; } @@ -1180,8 +997,8 @@ uint64_t Domain::get_cell_pos_col(const T* subarray, const T* coords) const { template uint64_t Domain::get_cell_pos_row(const T* coords) const { // For easy reference - auto domain = static_cast(domain_); - auto tile_extents = static_cast(tile_extents_); + const T *dim_dom_0, *dim_dom_1, *dim_dom_2; + T tile_extent_0, tile_extent_1, tile_extent_2; uint64_t pos = 0; T coords_norm; // Normalized coordinates inside the tile @@ -1190,45 +1007,63 @@ uint64_t Domain::get_cell_pos_row(const T* coords) const { // loop. switch (dim_num_) { case 1: - coords_norm = (coords[0] - domain[2 * 0]); - coords_norm -= (coords_norm / tile_extents[0]) * tile_extents[0]; + dim_dom_0 = (const T*)domain(0).data(); + tile_extent_0 = *(const T*)this->tile_extent(0).data(); + coords_norm = (coords[0] - dim_dom_0[0]); + coords_norm -= (coords_norm / tile_extent_0) * tile_extent_0; pos += coords_norm; break; case 2: - coords_norm = (coords[0] - domain[2 * 0]); - coords_norm -= (coords_norm / tile_extents[0]) * tile_extents[0]; - pos += coords_norm * tile_extents[1]; - - coords_norm = (coords[1] - domain[2 * 1]); - coords_norm -= (coords_norm / tile_extents[1]) * tile_extents[1]; + dim_dom_0 = (const T*)domain(0).data(); + tile_extent_0 = *(const T*)this->tile_extent(0).data(); + tile_extent_1 = *(const T*)this->tile_extent(1).data(); + coords_norm = (coords[0] - dim_dom_0[0]); + coords_norm -= (coords_norm / tile_extent_0) * tile_extent_0; + pos += coords_norm * tile_extent_1; + + dim_dom_1 = (const T*)domain(1).data(); + coords_norm = (coords[1] - dim_dom_1[0]); + coords_norm -= (coords_norm / tile_extent_1) * tile_extent_1; pos += coords_norm * 1; break; case 3: - coords_norm = (coords[0] - domain[2 * 0]); - coords_norm -= (coords_norm / tile_extents[0]) * tile_extents[0]; - pos += coords_norm * tile_extents[1] * tile_extents[2]; - - coords_norm = (coords[1] - domain[2 * 1]); - coords_norm -= (coords_norm / tile_extents[1]) * tile_extents[1]; - pos += coords_norm * tile_extents[2]; - - coords_norm = (coords[2] - domain[2 * 2]); - coords_norm -= (coords_norm / tile_extents[2]) * tile_extents[2]; + dim_dom_0 = (const T*)domain(0).data(); + tile_extent_0 = *(const T*)this->tile_extent(0).data(); + tile_extent_1 = *(const T*)this->tile_extent(1).data(); + tile_extent_2 = *(const T*)this->tile_extent(2).data(); + coords_norm = (coords[0] - dim_dom_0[0]); + coords_norm -= (coords_norm / tile_extent_0) * tile_extent_0; + pos += coords_norm * tile_extent_1 * tile_extent_2; + + dim_dom_1 = (const T*)domain(1).data(); + coords_norm = (coords[1] - dim_dom_1[0]); + coords_norm -= (coords_norm / tile_extent_1) * tile_extent_1; + pos += coords_norm * tile_extent_2; + + dim_dom_2 = (const T*)domain(2).data(); + coords_norm = (coords[2] - dim_dom_2[0]); + coords_norm -= (coords_norm / tile_extent_2) * tile_extent_2; pos += coords_norm * 1; break; default: { // Calculate initial cell_offset uint64_t cell_offset = 1; - for (unsigned int i = 1; i < dim_num_; ++i) - cell_offset *= tile_extents[i]; + for (unsigned d = 1; d < dim_num_; ++d) { + auto tile_extent = *(const T*)this->tile_extent(d).data(); + cell_offset *= tile_extent; + } // Calculate position - for (unsigned int i = 0; i < dim_num_; ++i) { - coords_norm = (coords[i] - domain[2 * i]); - coords_norm -= (coords_norm / tile_extents[i]) * tile_extents[i]; + for (unsigned d = 0; d < dim_num_; ++d) { + auto dim_dom = (const T*)domain(d).data(); + auto tile_extent = *(const T*)this->tile_extent(d).data(); + coords_norm = (coords[d] - dim_dom[0]); + coords_norm -= (coords_norm / tile_extent) * tile_extent; pos += coords_norm * cell_offset; - if (i < dim_num_ - 1) - cell_offset /= tile_extents[i + 1]; + if (d < dim_num_ - 1) { + auto tile_extent = *(const T*)this->tile_extent(d + 1).data(); + cell_offset /= tile_extent; + } } break; } @@ -1381,28 +1216,25 @@ uint64_t Domain::get_tile_pos_col(const T* tile_coords) const { template uint64_t Domain::get_tile_pos_col(const T* domain, const T* tile_coords) const { - // For easy reference - auto tile_extents = static_cast(tile_extents_); - // Calculate tile offsets std::vector tile_offsets; tile_offsets.push_back(1); - for (unsigned int i = 1; i < dim_num_; ++i) { + for (unsigned d = 1; d < dim_num_; ++d) { // Per dimension + auto tile_extent = *(const T*)this->tile_extent(d - 1).data(); uint64_t tile_num; if (&typeid(T) != &typeid(float) && &typeid(T) != &typeid(double)) - tile_num = (domain[2 * (i - 1) + 1] - domain[2 * (i - 1)] + 1) / - tile_extents[i - 1]; - else tile_num = - (domain[2 * (i - 1) + 1] - domain[2 * (i - 1)]) / tile_extents[i - 1]; + (domain[2 * (d - 1) + 1] - domain[2 * (d - 1)] + 1) / tile_extent; + else + tile_num = (domain[2 * (d - 1) + 1] - domain[2 * (d - 1)]) / tile_extent; tile_offsets.push_back(tile_offsets.back() * tile_num); } // Calculate position uint64_t pos = 0; - for (unsigned int i = 0; i < dim_num_; ++i) - pos += tile_coords[i] * tile_offsets[i]; + for (unsigned d = 0; d < dim_num_; ++d) + pos += tile_coords[d] * tile_offsets[d]; // Return return pos; @@ -1421,24 +1253,22 @@ uint64_t Domain::get_tile_pos_row(const T* tile_coords) const { template uint64_t Domain::get_tile_pos_row(const T* domain, const T* tile_coords) const { - // For easy reference - auto tile_extents = static_cast(tile_extents_); - // Calculate tile offsets std::vector tile_offsets; tile_offsets.push_back(1); if (dim_num_ > 1) { - for (unsigned int i = dim_num_ - 2;; --i) { + for (unsigned d = dim_num_ - 2;; --d) { // Per dimension + auto tile_extent = *(const T*)this->tile_extent(d + 1).data(); uint64_t tile_num; if (&typeid(T) != &typeid(float) && &typeid(T) != &typeid(double)) - tile_num = (domain[2 * (i + 1) + 1] - domain[2 * (i + 1)] + 1) / - tile_extents[i + 1]; + tile_num = + (domain[2 * (d + 1) + 1] - domain[2 * (d + 1)] + 1) / tile_extent; else - tile_num = (domain[2 * (i + 1) + 1] - domain[2 * (i + 1)]) / - tile_extents[i + 1]; + tile_num = + (domain[2 * (d + 1) + 1] - domain[2 * (d + 1)]) / tile_extent; tile_offsets.push_back(tile_offsets.back() * tile_num); - if (i == 0) + if (d == 0) break; } } diff --git a/tiledb/sm/array_schema/domain.h b/tiledb/sm/array_schema/domain.h index c346756e6cd6..9907f4925c58 100644 --- a/tiledb/sm/array_schema/domain.h +++ b/tiledb/sm/array_schema/domain.h @@ -87,20 +87,6 @@ class Domain { */ Status add_dimension(const Dimension* dim); - /** - * Returns the number of cells in the input domain. Note that this is - * applicable only to integer array domains (otherwise the output is 0). - * Also note that it is assummed that the input domain is expanded - * such that it aligns with the tile extents. - * - * @param domain The domain to be checked. - * @return The number of cells in the domain. - * - * @note The function returns 0 in case `domain` is huge, leading to more - * cells than `uint64_t` can hold. - */ - uint64_t cell_num(const void* domain) const; - /** * Returns the number of cells in the input domain. Note that this is * applicable only to integer array domains (otherwise the output is 0). @@ -189,14 +175,11 @@ class Domain { /** Returns the number of dimensions. */ unsigned int dim_num() const; - /** Returns the domain (serialized dimension domains). */ - const void* domain() const; - - /** returns the domain along the i-th dimension (nullptr upon error). */ - const void* domain(unsigned int i) const; + /** Returns the domain along the i-th dimension. */ + const Range& domain(unsigned i) const; /** Returns the domain as a N-dimensional range object. */ - NDRange domain_ndrange() const; + NDRange domain() const; /** Returns the i-th dimensions (nullptr upon error). */ const Dimension* dimension(unsigned int i) const; @@ -433,11 +416,11 @@ class Domain { template uint64_t stride(Layout subarray_layout) const; - /** Returns the tile extents. */ - const void* tile_extents() const; + /** Returns the tile extent along the i-th dimension. */ + const ByteVecValue& tile_extent(unsigned i) const; - /** returns the tile extent along the i-th dimension (nullptr upon error). */ - const void* tile_extent(unsigned int i) const; + /** Returns the tile extents. */ + std::vector tile_extents() const; /** * Returns the number of tiles contained in the input ND range. @@ -584,29 +567,7 @@ class Domain { std::vector dimensions_; /** The number of dimensions. */ - unsigned int dim_num_; - - /** - * The array domain, represented by serializing the dimension domains. - * It should contain one [lower, upper] pair per dimension. - * The type of the values stored in this buffer should match the dimensions - * type. - */ - void* domain_; - - /** - * The array domain. It should contain one [lower, upper] pair per dimension. - * The type of the values stored in this buffer should match the dimensions - * type. - */ - void* tile_domain_; - - /** - * The tile extents. There should be one value for each dimension. The type - * of the values stored in this buffer should match the dimensions type. If - * it is NULL, then it means that the array is sparse. - */ - void* tile_extents_; + unsigned dim_num_; /** * Offsets for calculating tile positions and ids for the column-major @@ -665,18 +626,6 @@ class Domain { /** Prepares the comparator functions for each dimension. */ void set_tile_cell_order_cmp_funcs(); - /** Computes the tile domain. */ - void compute_tile_domain(); - - /** - * Computes the tile domain. - * - * @tparam T The domain type. - * @return void - */ - template - void compute_tile_domain(); - /** * Computes tile offsets neccessary when computing tile positions and ids. * diff --git a/tiledb/sm/array_schema/tile_domain.h b/tiledb/sm/array_schema/tile_domain.h index 5c5862462a7c..6bbe3b34d2af 100644 --- a/tiledb/sm/array_schema/tile_domain.h +++ b/tiledb/sm/array_schema/tile_domain.h @@ -70,7 +70,6 @@ class TileDomain { * Constructor. * * @param id An identifier given to this tile domain. - * @param dim_num The number of dimensions of the tile domain. * @param domain The domain. * @param domain_slice The domain slice (included in `domain`). * @param tile_extents The tile extents of the domains. @@ -79,19 +78,18 @@ class TileDomain { */ TileDomain( unsigned id, - unsigned dim_num, - const T* domain, - const std::reference_wrapper& domain_slice, - const T* tile_extents, + const NDRange& domain, + const NDRange& domain_slice, + const std::vector tile_extents, Layout layout) : id_(id) - , dim_num_(dim_num) + , dim_num_(domain.size()) , domain_(domain) , domain_slice_(domain_slice) , tile_extents_(tile_extents) , layout_(layout) { assert(layout == Layout::ROW_MAJOR || layout == Layout::COL_MAJOR); - compute_tile_domain(domain, domain_slice.get(), tile_extents); + compute_tile_domain(domain, domain_slice, tile_extents); if (layout == Layout::ROW_MAJOR) compute_tile_offsets_row(); else @@ -134,8 +132,11 @@ class TileDomain { std::vector start_coords(const T* tile_coords) const { std::vector ret; ret.resize(dim_num_); - for (unsigned i = 0; i < dim_num_; ++i) - ret[i] = domain_[2 * i] + tile_coords[i] * tile_extents_[i]; + for (unsigned d = 0; d < dim_num_; ++d) { + auto dim_dom = (const T*)domain_[d].data(); + auto tile_extent = *(const T*)tile_extents_[d].data(); + ret[d] = dim_dom[0] + tile_coords[d] * tile_extent; + } return ret; } @@ -162,9 +163,11 @@ class TileDomain { std::vector ret; ret.resize(2 * dim_num_); - for (unsigned i = 0; i < dim_num_; ++i) { - ret[2 * i] = tile_coords[i] * tile_extents_[i] + domain_[2 * i]; - ret[2 * i + 1] = ret[2 * i] + tile_extents_[i] - 1; + for (unsigned d = 0; d < dim_num_; ++d) { + auto dim_dom = (const T*)domain_[d].data(); + auto tile_extent = *(const T*)tile_extents_[d].data(); + ret[2 * d] = tile_coords[d] * tile_extent + dim_dom[0]; + ret[2 * d + 1] = ret[2 * d] + tile_extent - 1; } return ret; @@ -187,7 +190,7 @@ class TileDomain { ret.resize(2 * dim_num_); auto tile_subarray = this->tile_subarray(tile_coords); for (unsigned d = 0; d < dim_num_; ++d) { - auto ds = (const T*)domain_slice_.get()[d].data(); + auto ds = (const T*)domain_slice_[d].data(); ret[2 * d] = std::max(tile_subarray[2 * d], ds[0]); ret[2 * d + 1] = std::min(tile_subarray[2 * d + 1], ds[1]); } @@ -247,7 +250,7 @@ class TileDomain { } /** Returns the domain slice. */ - std::reference_wrapper domain_slice() const { + const NDRange& domain_slice() const { return domain_slice_; } @@ -266,13 +269,13 @@ class TileDomain { unsigned dim_num_ = 0; /** The global domain the tiles are defined over. */ - const T* domain_; + NDRange domain_; /** The domain slice from which the tile domain is constructed. */ - std::reference_wrapper domain_slice_; + NDRange domain_slice_; /** The tile extents. */ - const T* tile_extents_; + std::vector tile_extents_; /** The layout used to compute 1D-mapped tile positions. */ Layout layout_ = Layout::GLOBAL_ORDER; @@ -295,14 +298,18 @@ class TileDomain { * `tile_extents`. */ void compute_tile_domain( - const T* domain, const NDRange& domain_slice, const T* tile_extents) { + const NDRange& domain, + const NDRange& domain_slice, + const std::vector& tile_extents) { tile_domain_.resize(2 * dim_num_); for (unsigned d = 0; d < dim_num_; ++d) { auto ds = (const T*)domain_slice[d].data(); + auto dim_dom = (const T*)domain[d].data(); + auto tile_extent = *(const T*)tile_extents[d].data(); assert(ds[0] <= ds[1]); - assert(ds[0] >= domain[2 * d] && ds[1] <= domain[2 * d + 1]); - tile_domain_[2 * d] = (ds[0] - domain[2 * d]) / tile_extents[d]; - tile_domain_[2 * d + 1] = (ds[1] - domain[2 * d]) / tile_extents[d]; + assert(ds[0] >= dim_dom[0] && ds[1] <= dim_dom[1]); + tile_domain_[2 * d] = (ds[0] - dim_dom[0]) / tile_extent; + tile_domain_[2 * d + 1] = (ds[1] - dim_dom[0]) / tile_extent; } } diff --git a/tiledb/sm/c_api/tiledb.cc b/tiledb/sm/c_api/tiledb.cc index 201dc8abf553..570bce300194 100644 --- a/tiledb/sm/c_api/tiledb.cc +++ b/tiledb/sm/c_api/tiledb.cc @@ -1734,18 +1734,20 @@ int32_t tiledb_dimension_get_type( } int32_t tiledb_dimension_get_domain( - tiledb_ctx_t* ctx, const tiledb_dimension_t* dim, void** domain) { + tiledb_ctx_t* ctx, const tiledb_dimension_t* dim, const void** domain) { if (sanity_check(ctx) == TILEDB_ERR || sanity_check(ctx, dim) == TILEDB_ERR) return TILEDB_ERR; - *domain = dim->dim_->domain(); + *domain = dim->dim_->domain().data(); return TILEDB_OK; } int32_t tiledb_dimension_get_tile_extent( - tiledb_ctx_t* ctx, const tiledb_dimension_t* dim, void** tile_extent) { + tiledb_ctx_t* ctx, + const tiledb_dimension_t* dim, + const void** tile_extent) { if (sanity_check(ctx) == TILEDB_ERR || sanity_check(ctx, dim) == TILEDB_ERR) return TILEDB_ERR; - *tile_extent = dim->dim_->tile_extent(); + *tile_extent = dim->dim_->tile_extent().data(); return TILEDB_OK; } diff --git a/tiledb/sm/c_api/tiledb.h b/tiledb/sm/c_api/tiledb.h index 85abc445d78f..46fac280e824 100644 --- a/tiledb/sm/c_api/tiledb.h +++ b/tiledb/sm/c_api/tiledb.h @@ -2291,7 +2291,7 @@ TILEDB_EXPORT int32_t tiledb_dimension_get_type( * @return `TILEDB_OK` for success and `TILEDB_ERR` for error. */ TILEDB_EXPORT int32_t tiledb_dimension_get_domain( - tiledb_ctx_t* ctx, const tiledb_dimension_t* dim, void** domain); + tiledb_ctx_t* ctx, const tiledb_dimension_t* dim, const void** domain); /** * Retrieves the tile extent of the dimension. @@ -2309,7 +2309,7 @@ TILEDB_EXPORT int32_t tiledb_dimension_get_domain( * @return `TILEDB_OK` for success and `TILEDB_ERR` for error. */ TILEDB_EXPORT int32_t tiledb_dimension_get_tile_extent( - tiledb_ctx_t* ctx, const tiledb_dimension_t* dim, void** tile_extent); + tiledb_ctx_t* ctx, const tiledb_dimension_t* dim, const void** tile_extent); /** * Dumps the contents of a dimension in ASCII form to some output (e.g., diff --git a/tiledb/sm/cpp_api/dimension.h b/tiledb/sm/cpp_api/dimension.h index 9cb8481f44a4..b702fcdf85e7 100644 --- a/tiledb/sm/cpp_api/dimension.h +++ b/tiledb/sm/cpp_api/dimension.h @@ -108,7 +108,7 @@ class Dimension { template std::pair domain() const { impl::type_check(type(), 1); - auto d = static_cast(_domain()); + auto d = (const T*)_domain(); return std::pair(d[0], d[1]); }; @@ -211,7 +211,7 @@ class Dimension { template T tile_extent() const { impl::type_check(type(), 1); - return *static_cast(_tile_extent()); + return *(const T*)_tile_extent(); } /** @@ -384,18 +384,18 @@ class Dimension { /* ********************************* */ /** Returns the binary representation of the dimension domain. */ - void* _domain() const { + const void* _domain() const { auto& ctx = ctx_.get(); - void* domain; + const void* domain; ctx.handle_error( tiledb_dimension_get_domain(ctx.ptr().get(), dim_.get(), &domain)); return domain; } /** Returns the binary representation of the dimension extent. */ - void* _tile_extent() const { + const void* _tile_extent() const { auto& ctx = ctx_.get(); - void* tile_extent; + const void* tile_extent; ctx.handle_error(tiledb_dimension_get_tile_extent( ctx.ptr().get(), dim_.get(), &tile_extent)); return tile_extent; diff --git a/tiledb/sm/fragment/fragment_metadata.cc b/tiledb/sm/fragment/fragment_metadata.cc index b4e445b0500b..d1c8cf470a91 100644 --- a/tiledb/sm/fragment/fragment_metadata.cc +++ b/tiledb/sm/fragment/fragment_metadata.cc @@ -281,29 +281,21 @@ Status FragmentMetadata::get_tile_overlap( return Status::Ok(); } -Status FragmentMetadata::init(const void* non_empty_domain) { +Status FragmentMetadata::init(const NDRange& non_empty_domain) { // For easy reference auto dim_num = array_schema_->dim_num(); auto num = array_schema_->attribute_num() + dim_num + 1; auto domain = array_schema_->domain(); // Sanity check - assert(non_empty_domain != nullptr); + assert(!non_empty_domain.empty()); assert(non_empty_domain_.empty()); assert(domain_.empty()); // Set non-empty domain for dense arrays (for sparse it will be calculated // via the MBRs) if (dense_) { - // Set non-empty domain - auto dom_ptr = (const unsigned char*)non_empty_domain; - non_empty_domain_.resize(dim_num); - for (unsigned d = 0; d < dim_num; ++d) { - auto r_size = 2 * array_schema_->dimension(d)->coord_size(); - Range r(dom_ptr, r_size); - non_empty_domain_[d] = std::move(r); - dom_ptr += r_size; - } + non_empty_domain_ = non_empty_domain; // The following is needed in case the fragment is a result of // dense consolidation, as the consolidator may have expanded @@ -755,17 +747,17 @@ void FragmentMetadata::get_subarray_tile_domain( const T* subarray, T* subarray_tile_domain) const { // For easy reference auto dim_num = array_schema_->dim_num(); - auto tile_extents = - static_cast(array_schema_->domain()->tile_extents()); // Calculate subarray in tile domain - for (unsigned int d = 0; d < dim_num; ++d) { + for (unsigned d = 0; d < dim_num; ++d) { auto domain = (const T*)domain_[d].data(); + auto tile_extent = + *(const T*)array_schema_->domain()->tile_extent(d).data(); auto overlap = std::max(subarray[2 * d], domain[0]); - subarray_tile_domain[2 * d] = (overlap - domain[0]) / tile_extents[d]; + subarray_tile_domain[2 * d] = (overlap - domain[0]) / tile_extent; overlap = std::min(subarray[2 * d + 1], domain[1]); - subarray_tile_domain[2 * d + 1] = (overlap - domain[0]) / tile_extents[d]; + subarray_tile_domain[2 * d + 1] = (overlap - domain[0]) / tile_extent; } } diff --git a/tiledb/sm/fragment/fragment_metadata.h b/tiledb/sm/fragment/fragment_metadata.h index 55d1fa0200e6..bd3a5a7af96e 100644 --- a/tiledb/sm/fragment/fragment_metadata.h +++ b/tiledb/sm/fragment/fragment_metadata.h @@ -196,7 +196,7 @@ class FragmentMetadata { * will be constrained. * @return Status */ - Status init(const void* non_empty_domain); + Status init(const NDRange& non_empty_domain); /** Returns the number of cells in the last tile. */ uint64_t last_tile_cell_num() const; diff --git a/tiledb/sm/misc/status.cc b/tiledb/sm/misc/status.cc index fe03a198c33b..be28970ea609 100644 --- a/tiledb/sm/misc/status.cc +++ b/tiledb/sm/misc/status.cc @@ -174,7 +174,7 @@ std::string Status::code_to_string() const { case StatusCode::Attribute: type = "[TileDB::Attribute] Error"; break; - case StatusCode::DenseCellRangeIter: + case StatusCode::WriteCellSlabIter: type = "[TileDB::WriteCellSlabIter] Error"; break; case StatusCode::Reader: diff --git a/tiledb/sm/misc/status.h b/tiledb/sm/misc/status.h index e2c933295cad..2e51bce988c0 100644 --- a/tiledb/sm/misc/status.h +++ b/tiledb/sm/misc/status.h @@ -105,7 +105,7 @@ enum class StatusCode : char { FS_AZURE, FS_HDFS, Attribute, - DenseCellRangeIter, + WriteCellSlabIter, Reader, Writer, PreallocatedBuffer, @@ -300,10 +300,10 @@ class Status { return Status(StatusCode::Attribute, msg, -1); } - /** Return a DenseCellRangeIterError error class Status with a given message + /** Return a WriteCellSlabIterError error class Status with a given message * **/ - static Status DenseCellRangeIterError(const std::string& msg) { - return Status(StatusCode::DenseCellRangeIter, msg, -1); + static Status WriteCellSlabIterError(const std::string& msg) { + return Status(StatusCode::WriteCellSlabIter, msg, -1); } /** Return a ReaderError error class Status with a given message **/ diff --git a/tiledb/sm/misc/types.h b/tiledb/sm/misc/types.h index 4c79cd897dee..0107bcfb9325 100644 --- a/tiledb/sm/misc/types.h +++ b/tiledb/sm/misc/types.h @@ -87,11 +87,25 @@ class Range { return &range_[0]; } + /** Returns a pointer to the start of the range. */ + const void* start() const { + return &range_[0]; + } + + const void* end() const { + return &range_[range_.size() / 2]; + } + /** Returns true if the range is empty. */ bool empty() const { return range_.empty(); } + /** Clears the range. */ + void clear() { + range_.clear(); + } + /** Returns the range size in bytes. */ uint64_t size() const { return range_.size(); diff --git a/tiledb/sm/misc/utils.cc b/tiledb/sm/misc/utils.cc index 1ae493e41190..ce8ebb07642f 100644 --- a/tiledb/sm/misc/utils.cc +++ b/tiledb/sm/misc/utils.cc @@ -310,190 +310,6 @@ bool is_uint(const std::string& str) { return true; } -std::string domain_str(const void* domain, Datatype type) { - std::stringstream ss; - - if (domain == nullptr) - return ""; - - const int* domain_int32; - const int64_t* domain_int64; - const float* domain_float32; - const double* domain_float64; - const int8_t* domain_int8; - const uint8_t* domain_uint8; - const int16_t* domain_int16; - const uint16_t* domain_uint16; - const uint32_t* domain_uint32; - const uint64_t* domain_uint64; - - switch (type) { - case Datatype::INT32: - domain_int32 = static_cast(domain); - ss << "[" << domain_int32[0] << "," << domain_int32[1] << "]"; - return ss.str(); - case Datatype::INT64: - domain_int64 = static_cast(domain); - ss << "[" << domain_int64[0] << "," << domain_int64[1] << "]"; - return ss.str(); - case Datatype::FLOAT32: - domain_float32 = static_cast(domain); - ss << "[" << domain_float32[0] << "," << domain_float32[1] << "]"; - return ss.str(); - case Datatype::FLOAT64: - domain_float64 = static_cast(domain); - ss << "[" << domain_float64[0] << "," << domain_float64[1] << "]"; - return ss.str(); - case Datatype::INT8: - domain_int8 = static_cast(domain); - ss << "[" << int(domain_int8[0]) << "," << int(domain_int8[1]) << "]"; - return ss.str(); - case Datatype::UINT8: - domain_uint8 = static_cast(domain); - ss << "[" << int(domain_uint8[0]) << "," << int(domain_uint8[1]) << "]"; - return ss.str(); - case Datatype::INT16: - domain_int16 = static_cast(domain); - ss << "[" << domain_int16[0] << "," << domain_int16[1] << "]"; - return ss.str(); - case Datatype::UINT16: - domain_uint16 = static_cast(domain); - ss << "[" << domain_uint16[0] << "," << domain_uint16[1] << "]"; - return ss.str(); - case Datatype::UINT32: - domain_uint32 = static_cast(domain); - ss << "[" << domain_uint32[0] << "," << domain_uint32[1] << "]"; - return ss.str(); - case Datatype::UINT64: - domain_uint64 = static_cast(domain); - ss << "[" << domain_uint64[0] << "," << domain_uint64[1] << "]"; - return ss.str(); - case Datatype::DATETIME_YEAR: - case Datatype::DATETIME_MONTH: - case Datatype::DATETIME_WEEK: - case Datatype::DATETIME_DAY: - case Datatype::DATETIME_HR: - case Datatype::DATETIME_MIN: - case Datatype::DATETIME_SEC: - case Datatype::DATETIME_MS: - case Datatype::DATETIME_US: - case Datatype::DATETIME_NS: - case Datatype::DATETIME_PS: - case Datatype::DATETIME_FS: - case Datatype::DATETIME_AS: - domain_int64 = static_cast(domain); - ss << "[" << domain_int64[0] << "," << domain_int64[1] << "]"; - return ss.str(); - - case Datatype::CHAR: - case Datatype::STRING_ASCII: - case Datatype::STRING_UTF8: - case Datatype::STRING_UTF16: - case Datatype::STRING_UTF32: - case Datatype::STRING_UCS2: - case Datatype::STRING_UCS4: - case Datatype::ANY: - // Not supported domain type - assert(false); - return ""; - } - - assert(false); - return ""; -} - -std::string tile_extent_str(const void* tile_extent, Datatype type) { - std::stringstream ss; - - if (tile_extent == nullptr) - return constants::null_str; - - const int* tile_extent_int32; - const int64_t* tile_extent_int64; - const float* tile_extent_float32; - const double* tile_extent_float64; - const int8_t* tile_extent_int8; - const uint8_t* tile_extent_uint8; - const int16_t* tile_extent_int16; - const uint16_t* tile_extent_uint16; - const uint32_t* tile_extent_uint32; - const uint64_t* tile_extent_uint64; - - switch (type) { - case Datatype::INT32: - tile_extent_int32 = static_cast(tile_extent); - ss << *tile_extent_int32; - return ss.str(); - case Datatype::INT64: - tile_extent_int64 = static_cast(tile_extent); - ss << *tile_extent_int64; - return ss.str(); - case Datatype::FLOAT32: - tile_extent_float32 = static_cast(tile_extent); - ss << *tile_extent_float32; - return ss.str(); - case Datatype::FLOAT64: - tile_extent_float64 = static_cast(tile_extent); - ss << *tile_extent_float64; - return ss.str(); - case Datatype::INT8: - tile_extent_int8 = static_cast(tile_extent); - ss << int(*tile_extent_int8); - return ss.str(); - case Datatype::UINT8: - tile_extent_uint8 = static_cast(tile_extent); - ss << int(*tile_extent_uint8); - return ss.str(); - case Datatype::INT16: - tile_extent_int16 = static_cast(tile_extent); - ss << *tile_extent_int16; - return ss.str(); - case Datatype::UINT16: - tile_extent_uint16 = static_cast(tile_extent); - ss << *tile_extent_uint16; - return ss.str(); - case Datatype::UINT32: - tile_extent_uint32 = static_cast(tile_extent); - ss << *tile_extent_uint32; - return ss.str(); - case Datatype::UINT64: - tile_extent_uint64 = static_cast(tile_extent); - ss << *tile_extent_uint64; - return ss.str(); - case Datatype::DATETIME_YEAR: - case Datatype::DATETIME_MONTH: - case Datatype::DATETIME_WEEK: - case Datatype::DATETIME_DAY: - case Datatype::DATETIME_HR: - case Datatype::DATETIME_MIN: - case Datatype::DATETIME_SEC: - case Datatype::DATETIME_MS: - case Datatype::DATETIME_US: - case Datatype::DATETIME_NS: - case Datatype::DATETIME_PS: - case Datatype::DATETIME_FS: - case Datatype::DATETIME_AS: - tile_extent_int64 = static_cast(tile_extent); - ss << *tile_extent_int64; - return ss.str(); - - case Datatype::CHAR: - case Datatype::STRING_ASCII: - case Datatype::STRING_UTF8: - case Datatype::STRING_UTF16: - case Datatype::STRING_UTF32: - case Datatype::STRING_UCS2: - case Datatype::STRING_UCS4: - case Datatype::ANY: - // Not supported domain type - assert(false); - return ""; - } - - assert(false); - return ""; -} - bool starts_with(const std::string& value, const std::string& prefix) { if (prefix.size() > value.size()) return false; diff --git a/tiledb/sm/misc/utils.h b/tiledb/sm/misc/utils.h index 113e1ab8f177..ced9bc013b0d 100644 --- a/tiledb/sm/misc/utils.h +++ b/tiledb/sm/misc/utils.h @@ -121,24 +121,6 @@ bool is_int(const std::string& str); /** Returns `true` if the input string is an unsigned integer. */ bool is_uint(const std::string& str); -/** - * Returns the input domain as a string of the form "[low, high]". - * - * @param domain A single dimension's domain. - * @param type The type of the dimension. - * @return A string of the form "[low, high]". - */ -std::string domain_str(const void* domain, Datatype type); - -/** - * Returns a dimension's tile extent in string form. - * - * @param tile_extent The tile extent of a single dimension. - * @param type The type of the dimension. - * @return The tile extent in string form. - */ -std::string tile_extent_str(const void* tile_extent, Datatype type); - /** * Checks if a string starts with a certain prefix. * diff --git a/tiledb/sm/query/query.cc b/tiledb/sm/query/query.cc index 50d2c8c4681a..e876c257c1a5 100644 --- a/tiledb/sm/query/query.cc +++ b/tiledb/sm/query/query.cc @@ -88,17 +88,33 @@ Query::~Query() = default; Status Query::add_range( unsigned dim_idx, const void* start, const void* end, const void* stride) { - if (type_ == QueryType::WRITE) + if (dim_idx >= array_->array_schema()->dim_num()) + return LOG_STATUS( + Status::QueryError("Cannot add range; Invalid dimension index")); + + if (start == nullptr || end == nullptr) + return LOG_STATUS(Status::QueryError("Cannot add range; Invalid range")); + + if (stride != nullptr) return LOG_STATUS(Status::QueryError( - "Cannot add range; Operation currently unsupported for write queries")); - return reader_.add_range(dim_idx, start, end, stride); + "Cannot add range; Setting range stride is currently unsupported")); + + // Prepare a temp range + std::vector range; + uint8_t coord_size = array_->array_schema()->dimension(dim_idx)->coord_size(); + range.resize(2 * coord_size); + std::memcpy(&range[0], start, coord_size); + std::memcpy(&range[coord_size], end, coord_size); + + // Add range + if (type_ == QueryType::WRITE) + return writer_.add_range(dim_idx, Range(&range[0], 2 * coord_size)); + return reader_.add_range(dim_idx, Range(&range[0], 2 * coord_size)); } Status Query::get_range_num(unsigned dim_idx, uint64_t* range_num) const { if (type_ == QueryType::WRITE) - return LOG_STATUS( - Status::QueryError("Cannot get number of ranges; Operation currently " - "unsupported for write queries")); + return writer_.get_range_num(dim_idx, range_num); return reader_.get_range_num(dim_idx, range_num); } @@ -109,8 +125,7 @@ Status Query::get_range( const void** end, const void** stride) const { if (type_ == QueryType::WRITE) - return LOG_STATUS(Status::QueryError( - "Cannot get range; Operation currently unsupported for write queries")); + return writer_.get_range(dim_idx, range_idx, start, end, stride); return reader_.get_range(dim_idx, range_idx, start, end, stride); } @@ -312,9 +327,9 @@ Status Query::init() { } if (type_ == QueryType::READ) { - RETURN_NOT_OK(reader_.init()); + RETURN_NOT_OK(reader_.init(layout_)); } else { // Write - RETURN_NOT_OK(writer_.init()); + RETURN_NOT_OK(writer_.init(layout_)); } } @@ -460,9 +475,7 @@ Status Query::set_buffer( Status Query::set_layout(Layout layout) { layout_ = layout; - if (type_ == QueryType::WRITE) - return writer_.set_layout(layout); - return reader_.set_layout(layout); + return Status::Ok(); } Status Query::set_sparse_mode(bool sparse_mode) { @@ -477,12 +490,24 @@ void Query::set_status(QueryStatus status) { status_ = status; } -Status Query::set_subarray(const void* subarray, bool check_expanded_domain) { - RETURN_NOT_OK(check_subarray(subarray, check_expanded_domain)); +Status Query::set_subarray(const void* subarray) { + // Prepare a subarray object + Subarray sub(array_, layout_); + if (subarray != nullptr) { + auto dim_num = array_->array_schema()->dim_num(); + auto s_ptr = (const unsigned char*)subarray; + uint64_t offset = 0; + for (unsigned d = 0; d < dim_num; ++d) { + auto r_size = 2 * array_->array_schema()->dimension(d)->coord_size(); + RETURN_NOT_OK(sub.add_range(d, Range(&s_ptr[offset], r_size))); + offset += r_size; + } + } + if (type_ == QueryType::WRITE) { - RETURN_NOT_OK(writer_.set_subarray(subarray)); - } else { // READ - RETURN_NOT_OK(reader_.set_subarray(subarray, check_expanded_domain)); + RETURN_NOT_OK(writer_.set_subarray(sub)); + } else if (type_ == QueryType::READ) { + RETURN_NOT_OK(reader_.set_subarray(sub)); } status_ = QueryStatus::UNINITIALIZED; @@ -490,17 +515,19 @@ Status Query::set_subarray(const void* subarray, bool check_expanded_domain) { return Status::Ok(); } -Status Query::set_subarray(const Subarray& subarray) { - // Check that the subarray is associated with the same array as the query - if (subarray.array() != array_) - return LOG_STATUS( - Status::QueryError("Cannot set subarray; The array of subarray is " - "different from that of the query")); +Status Query::set_subarray_unsafe(const NDRange& subarray) { + // Prepare a subarray object + Subarray sub(array_, layout_); + if (!subarray.empty()) { + auto dim_num = array_->array_schema()->dim_num(); + for (unsigned d = 0; d < dim_num; ++d) + RETURN_NOT_OK(sub.add_range_unsafe(d, subarray[d])); + } if (type_ == QueryType::WRITE) { - RETURN_NOT_OK(writer_.set_subarray(subarray)); - } else { // READ - RETURN_NOT_OK(reader_.set_subarray(subarray)); + RETURN_NOT_OK(writer_.set_subarray(sub)); + } else if (type_ == QueryType::READ) { + RETURN_NOT_OK(reader_.set_subarray(sub)); } status_ = QueryStatus::UNINITIALIZED; @@ -557,77 +584,5 @@ QueryType Query::type() const { /* PRIVATE METHODS */ /* ****************************** */ -Status Query::check_subarray( - const void* subarray, bool check_expanded_domain) const { - if (subarray == nullptr) - return Status::Ok(); - - auto array_schema = this->array_schema(); - if (array_schema == nullptr) - return LOG_STATUS( - Status::QueryError("Cannot check subarray; Array schema not set")); - - switch (array_schema->domain()->type()) { - case Datatype::INT8: - return check_subarray( - static_cast(subarray), check_expanded_domain); - case Datatype::UINT8: - return check_subarray( - static_cast(subarray), check_expanded_domain); - case Datatype::INT16: - return check_subarray( - static_cast(subarray), check_expanded_domain); - case Datatype::UINT16: - return check_subarray( - static_cast(subarray), check_expanded_domain); - case Datatype::INT32: - return check_subarray( - static_cast(subarray), check_expanded_domain); - case Datatype::UINT32: - return check_subarray( - static_cast(subarray), check_expanded_domain); - case Datatype::INT64: - return check_subarray( - static_cast(subarray), check_expanded_domain); - case Datatype::UINT64: - return check_subarray( - static_cast(subarray), check_expanded_domain); - case Datatype::FLOAT32: - return check_subarray( - static_cast(subarray), check_expanded_domain); - case Datatype::FLOAT64: - return check_subarray( - static_cast(subarray), check_expanded_domain); - case Datatype::DATETIME_YEAR: - case Datatype::DATETIME_MONTH: - case Datatype::DATETIME_WEEK: - case Datatype::DATETIME_DAY: - case Datatype::DATETIME_HR: - case Datatype::DATETIME_MIN: - case Datatype::DATETIME_SEC: - case Datatype::DATETIME_MS: - case Datatype::DATETIME_US: - case Datatype::DATETIME_NS: - case Datatype::DATETIME_PS: - case Datatype::DATETIME_FS: - case Datatype::DATETIME_AS: - return check_subarray( - static_cast(subarray), check_expanded_domain); - case Datatype::CHAR: - case Datatype::STRING_ASCII: - case Datatype::STRING_UTF8: - case Datatype::STRING_UTF16: - case Datatype::STRING_UTF32: - case Datatype::STRING_UCS2: - case Datatype::STRING_UCS4: - case Datatype::ANY: - // Not supported domain type - assert(false); - break; - } - - return Status::Ok(); -} - } // namespace sm } // namespace tiledb diff --git a/tiledb/sm/query/query.h b/tiledb/sm/query/query.h index 1b90bdd22dbf..ea9a78213baf 100644 --- a/tiledb/sm/query/query.h +++ b/tiledb/sm/query/query.h @@ -370,21 +370,16 @@ class Query { * the entire domain. * * @param subarray The subarray to be set. - * @param check_expanded_domain If `true`, the subarray bounds will be - * checked against the expanded domain of the array. This is important - * in dense consolidation with space tiles not fully dividing the - * dimension domain. * @return Status - */ - Status set_subarray(const void* subarray, bool check_expanded_domain = false); - - /** - * Sets the query subarray. * - * @param subarray The subarray to be set. - * @return Status + * @note Setting a subarray for sparse arrays, or for dense arrays + * when performing unordered (sparse) writes, has no effect + * (will be ingnored). */ - Status set_subarray(const Subarray& subarray); + Status set_subarray(const void* subarray); + + /** Sets the query subarray, without performing any checks. */ + Status set_subarray_unsafe(const NDRange& subarray); /** Submits the query to the storage manager. */ Status submit(); @@ -441,145 +436,6 @@ class Query { /* ********************************* */ /* PRIVATE METHODS */ /* ********************************* */ - - /** - * Correctness checks on `subarray`. - * - * @param subarray The subarray to be checked - * @param check_expanded_domain If `true`, the subarray bounds will be - * checked against the expanded domain of the array. This is important - * in dense consolidation with space tiles not fully dividing the - * dimension domain. - */ - Status check_subarray( - const void* subarray, bool check_expanded_domain = false) const; - - /** - * Correctness checks on `subarray`. - * - * @param subarray The subarray to be checked - * @param check_expanded_domain If `true`, the subarray bounds will be - * checked against the expanded domain of the array. This is important - * in dense consolidation with space tiles not fully dividing the - * dimension domain. - */ - template < - typename T, - typename std::enable_if::value>::type* = nullptr> - Status check_subarray(const T* subarray, bool check_expanded_domain) const { - auto array_schema = this->array_schema(); - auto domain = array_schema->domain(); - auto dim_num = domain->dim_num(); - - // Check subarray bounds - return check_subarray_bounds( - subarray, domain, dim_num, check_expanded_domain); - } - - template < - typename T, - typename std::enable_if::value>::type* = nullptr> - Status check_subarray(const T* subarray, bool check_expanded_domain) const { - (void)check_expanded_domain; // Non-applicable to real domains - auto array_schema = this->array_schema(); - auto domain = array_schema->domain(); - auto dim_num = domain->dim_num(); - - // Check for NaN - for (unsigned int i = 0; i < dim_num; ++i) { - if (std::isnan(subarray[2 * i]) || std::isnan(subarray[2 * i + 1])) - return LOG_STATUS(Status::QueryError("Subarray contains NaN")); - } - - // Check subarray bounds - return check_subarray_bounds(subarray, domain, dim_num); - } - - /** - * Checks that the subarray bounds are contained within the domain dimensions. - * - * @param subarray The subarray to check - * @param domain the domain of the subarray - * @param dim_num the number of dimensions in the subarray and domain - * @param check_expanded_domain If `true`, the subarray bounds will be - * checked against the expanded domain of the array. This is important - * in dense consolidation with space tiles not fully dividing the - * dimension domain. - * @return Status - */ - template - Status check_subarray_bounds( - const T* subarray, - const Domain* const domain, - const unsigned int dim_num, - bool check_expanded_domain = false) const { - T low, high; - for (unsigned int i = 0; i < dim_num; ++i) { - auto dim_domain = static_cast(domain->dimension(i)->domain()); - if (array_schema()->dense() && check_expanded_domain) { - auto tile_extent = - *static_cast(domain->dimension(i)->tile_extent()); - low = dim_domain[0]; - high = utils::math::ceil(dim_domain[1], tile_extent) * tile_extent; - } else { - low = dim_domain[0]; - high = dim_domain[1]; - } - - if (subarray[2 * i] < low || subarray[2 * i + 1] > high) - return LOG_STATUS(Status::QueryError( - "Subarray out of bounds. " + - format_subarray_bounds(subarray, domain, dim_num))); - if (subarray[2 * i] > subarray[2 * i + 1]) - return LOG_STATUS(Status::QueryError( - "Subarray lower bound is larger than upper bound. " + - format_subarray_bounds(subarray, domain, dim_num))); - } - - return Status::Ok(); - } - - /** - * Returns a formatted string containing the subarray bounds and domain - * dimension bounds. For example: - * "subarray: [1, 2, 1, 2] domain: [1, 4, 1, 4]" - * - * @param subarray The subarray to format from - * @param domain the domain of the subarray - * @param dim_num the number of dimensions in the subarray and domain - * @return string - */ - template - std::string format_subarray_bounds( - const T* subarray, - const Domain* const domain, - const unsigned int dim_num) const { - std::stringstream subarray_ss; - std::stringstream domain_ss; - - subarray_ss << "subarray: ["; - domain_ss << "domain: ["; - - for (unsigned int i = 0; i < dim_num; ++i) { - auto dim_domain = static_cast(domain->dimension(i)->domain()); - - if (i != 0) { - subarray_ss << ", "; - domain_ss << ", "; - } - - subarray_ss << subarray[2 * i] << ", " << subarray[2 * i + 1]; - - domain_ss << dim_domain[0] << ", " << dim_domain[1]; - } - - subarray_ss << "]"; - domain_ss << "]"; - - subarray_ss << " " << domain_ss.str(); - - return subarray_ss.str(); - } }; } // namespace sm diff --git a/tiledb/sm/query/read_cell_slab_iter.cc b/tiledb/sm/query/read_cell_slab_iter.cc index 51a788e7799c..edf685dea8f5 100644 --- a/tiledb/sm/query/read_cell_slab_iter.cc +++ b/tiledb/sm/query/read_cell_slab_iter.cc @@ -120,26 +120,27 @@ template void ReadCellSlabIter::compute_cell_offsets_col() { assert(std::is_integral::value); auto dim_num = domain_->dim_num(); - auto tile_extents = (const T*)domain_->tile_extents(); cell_offsets_.reserve(dim_num); cell_offsets_.push_back(1); - for (unsigned int i = 1; i < dim_num; ++i) - cell_offsets_.push_back(cell_offsets_.back() * tile_extents[i - 1]); + for (unsigned d = 1; d < dim_num; ++d) { + auto tile_extent = *(const T*)domain_->tile_extent(d - 1).data(); + cell_offsets_.push_back(cell_offsets_.back() * tile_extent); + } } template void ReadCellSlabIter::compute_cell_offsets_row() { assert(std::is_integral::value); auto dim_num = domain_->dim_num(); - auto tile_extents = (const T*)domain_->tile_extents(); cell_offsets_.reserve(dim_num); cell_offsets_.push_back(1); if (dim_num > 1) { - for (unsigned int i = dim_num - 2;; --i) { - cell_offsets_.push_back(cell_offsets_.back() * tile_extents[i + 1]); - if (i == 0) + for (unsigned d = dim_num - 2;; --d) { + auto tile_extent = *(const T*)domain_->tile_extent(d + 1).data(); + cell_offsets_.push_back(cell_offsets_.back() * tile_extent); + if (d == 0) break; } } @@ -293,7 +294,7 @@ void ReadCellSlabIter::compute_result_cell_slabs_dense( for (const auto& fd : frag_domains) { for (auto pit = to_process.begin(); pit != to_process.end();) { compute_cell_slab_overlap( - *pit, fd.second.get(), &slab_overlap, &overlap_length, &overlap_type); + *pit, fd.second, &slab_overlap, &overlap_length, &overlap_type); // No overlap if (overlap_type == 0) { diff --git a/tiledb/sm/query/reader.cc b/tiledb/sm/query/reader.cc index 0fea0d723945..e52c753d61d3 100644 --- a/tiledb/sm/query/reader.cc +++ b/tiledb/sm/query/reader.cc @@ -109,12 +109,8 @@ const Array* Reader::array() const { return array_; } -Status Reader::add_range( - unsigned dim_idx, const void* start, const void* end, const void* stride) { - if (stride != nullptr) - return LOG_STATUS(Status::ReaderError( - "Cannot add range; Setting range stride is currently unsupported")); - return subarray_.add_range(dim_idx, start, end); +Status Reader::add_range(unsigned dim_idx, const Range& range) { + return subarray_.add_range(dim_idx, range); } Status Reader::get_range_num(unsigned dim_idx, uint64_t* range_num) const { @@ -198,7 +194,7 @@ Status Reader::get_buffer( return Status::Ok(); } -Status Reader::init() { +Status Reader::init(const Layout& layout) { // Sanity checks if (storage_manager_ == nullptr) return LOG_STATUS(Status::ReaderError( @@ -216,6 +212,12 @@ Status Reader::init() { return LOG_STATUS(Status::ReaderError( "Cannot initialize reader; Dense reads must have a subarray set")); + // Set layout + RETURN_NOT_OK(set_layout(layout)); + + // Check subarray + RETURN_NOT_OK(check_subarray()); + // Get configuration parameters const char *memory_budget, *memory_budget_var; auto config = storage_manager_->config(); @@ -456,27 +458,7 @@ void Reader::set_storage_manager(StorageManager* storage_manager) { storage_manager_ = storage_manager; } -Status Reader::set_subarray(const void* subarray, bool check_expanded_domain) { - Subarray new_subarray(array_, layout_); - if (subarray != nullptr) { - auto dim_num = array_schema_->dim_num(); - auto coord_size = datatype_size(array_schema_->coords_type()); - auto s = (unsigned char*)subarray; - for (unsigned i = 0; i < dim_num; ++i) - RETURN_NOT_OK(new_subarray.add_range( - i, (void*)(s + 2 * i * coord_size), check_expanded_domain)); - } - - return set_subarray(new_subarray); -} - Status Reader::set_subarray(const Subarray& subarray) { - // Check layout - if (subarray.layout() == Layout::GLOBAL_ORDER && subarray.range_num() != 1) - return LOG_STATUS( - Status::ReaderError("Cannot set subarray; Multi-range subarrays with " - "global order layout are not supported")); - subarray_ = subarray; layout_ = subarray.layout(); @@ -549,6 +531,15 @@ void Reader::compute_result_space_tiles( /* PRIVATE METHODS */ /* ****************************** */ +Status Reader::check_subarray() const { + if (subarray_.layout() == Layout::GLOBAL_ORDER && subarray_.range_num() != 1) + return LOG_STATUS(Status::ReaderError( + "Cannot initialize reader; Multi-range subarrays with " + "global order layout are not supported")); + + return Status::Ok(); +} + void Reader::clear_tiles( const std::string& name, const std::vector& result_tiles) const { @@ -1086,10 +1077,8 @@ void Reader::compute_result_space_tiles( const Subarray& subarray, std::map>* result_space_tiles) const { // For easy reference - auto dim_num = array_schema_->dim_num(); - auto domain = (const T*)array_schema_->domain()->domain(); - auto domain_ndrange = array_schema_->domain()->domain_ndrange(); - auto tile_extents = (const T*)array_schema_->domain()->tile_extents(); + auto domain = array_schema_->domain()->domain(); + auto tile_extents = array_schema_->domain()->tile_extents(); auto tile_order = array_schema_->tile_order(); // Compute fragment tile domains @@ -1100,7 +1089,6 @@ void Reader::compute_result_space_tiles( if (fragment_metadata_[i]->dense()) { frag_tile_domains.emplace_back( i, - dim_num, domain, fragment_metadata_[i]->non_empty_domain(), tile_extents, @@ -1112,7 +1100,7 @@ void Reader::compute_result_space_tiles( // Get tile coords and array domain const auto& tile_coords = subarray.tile_coords(); TileDomain array_tile_domain( - UINT32_MAX, dim_num, domain, domain_ndrange, tile_extents, tile_order); + UINT32_MAX, domain, domain, tile_extents, tile_order); // Compute result space tiles compute_result_space_tiles( @@ -1364,6 +1352,7 @@ Status Reader::dense_read() { std::vector result_cell_slabs; std::vector result_tiles; auto& subarray = read_state_.partitioner_.current(); + subarray.compute_tile_coords(); compute_result_cell_slabs( subarray, diff --git a/tiledb/sm/query/reader.h b/tiledb/sm/query/reader.h index c33d9912cfbc..2ba1103b4f24 100644 --- a/tiledb/sm/query/reader.h +++ b/tiledb/sm/query/reader.h @@ -124,14 +124,8 @@ class Reader { /** Returns the array. */ const Array* array() const; - /** - * Adds a range to the (read/write) query on the input dimension, - * in the form of (start, end, stride). - * The range components must be of the same type as the domain type of the - * underlying array. - */ - Status add_range( - unsigned dim_idx, const void* start, const void* end, const void* stride); + /** Adds a range to the subarray on the input dimension. */ + Status add_range(unsigned dim_idx, const Range& range); /** Retrieves the number of ranges of the subarray for the given dimension. */ Status get_range_num(unsigned dim_idx, uint64_t* range_num) const; @@ -222,8 +216,8 @@ class Reader { /** Returns the last fragment uri. */ URI last_fragment_uri() const; - /** Initializes the reader. */ - Status init(); + /** Initializes the reader with the subarray layout. */ + Status init(const Layout& layout); /** Returns the cell layout. */ Layout layout() const; @@ -327,25 +321,7 @@ class Reader { /** Sets the storage manager. */ void set_storage_manager(StorageManager* storage_manager); - /** - * Sets the query subarray. If it is null, then the subarray will be set to - * the entire domain. - * - * @param subarray The subarray to be set. - * @param check_expanded_domain If `true`, the subarray bounds will be - * checked against the expanded domain of the array. This is important - * in dense consolidation with space tiles not fully dividing the - * dimension domain. - * @return Status - */ - Status set_subarray(const void* subarray, bool check_expanded_domain = false); - - /** - * Sets the query subarray. - * - * @param subarray The subarray to be set. - * @return Status - */ + /** Sets the query subarray. */ Status set_subarray(const Subarray& subarray); /** Returns the query subarray. */ @@ -525,6 +501,9 @@ class Reader { /* PRIVATE METHODS */ /* ********************************* */ + /** Correctness checks for `subarray_`. */ + Status check_subarray() const; + /** * Deletes the tiles on the input attribute/dimension from the result tiles. * diff --git a/tiledb/sm/query/result_space_tile.h b/tiledb/sm/query/result_space_tile.h index 3cab949b9efb..6cb08ce678e1 100644 --- a/tiledb/sm/query/result_space_tile.h +++ b/tiledb/sm/query/result_space_tile.h @@ -73,8 +73,7 @@ class ResultSpaceTile { ResultSpaceTile& operator=(ResultSpaceTile&& result_space_tile) = default; /** Returns the fragment domains. */ - const std::vector>>& - frag_domains() const { + const std::vector>& frag_domains() const { return frag_domains_; } @@ -118,7 +117,7 @@ class ResultSpaceTile { return false; for (size_t i = 0; i < frag_domains_.size(); ++i) { if (!(frag_domains_[i].first == rst.frag_domains_[i].first && - frag_domains_[i].second.get() == rst.frag_domains_[i].second.get())) + frag_domains_[i].second == rst.frag_domains_[i].second)) return false; } @@ -136,8 +135,7 @@ class ResultSpaceTile { * with domains that intersect this space tile will be included * in this vector. */ - std::vector>> - frag_domains_; + std::vector> frag_domains_; /** * The (dense) result tiles for this space tile, as a map diff --git a/tiledb/sm/query/write_cell_slab_iter.cc b/tiledb/sm/query/write_cell_slab_iter.cc index 1e74671d44fe..614746524219 100644 --- a/tiledb/sm/query/write_cell_slab_iter.cc +++ b/tiledb/sm/query/write_cell_slab_iter.cc @@ -244,32 +244,31 @@ template Status WriteCellSlabIter::sanity_check() const { // The layout should not be unordered if (layout_ == Layout::UNORDERED) - return LOG_STATUS(Status::DenseCellRangeIterError( + return LOG_STATUS(Status::WriteCellSlabIterError( "Sanity check failed; Unordered layout is invalid")); // For easy reference auto dim_num = domain_->dim_num(); - auto domain = (T*)domain_->domain(); + auto domain = domain_->domain(); // Check subarray length if (subarray_.size() != 2 * dim_num) - return LOG_STATUS(Status::DenseCellRangeIterError( + return LOG_STATUS(Status::WriteCellSlabIterError( "Sanity check failed; Invalid subarray length")); // Check subarray bounds - for (unsigned i = 0; i < dim_num; ++i) { - if (subarray_[2 * i] > subarray_[2 * i + 1]) - return LOG_STATUS(Status::DenseCellRangeIterError( + for (unsigned d = 0; d < dim_num; ++d) { + if (subarray_[2 * d] > subarray_[2 * d + 1]) + return LOG_STATUS(Status::WriteCellSlabIterError( "Sanity check failed; Invalid subarray bounds")); } // Check if subarray is contained in the domain - for (unsigned i = 0; i < dim_num; ++i) { - if (subarray_[2 * i] < domain[2 * i] || - subarray_[2 * i] > domain[2 * i + 1] || - subarray_[2 * i + 1] < domain[2 * i] || - subarray_[2 * i + 1] > domain[2 * i + 1]) - return LOG_STATUS(Status::DenseCellRangeIterError( + for (unsigned d = 0; d < dim_num; ++d) { + auto dim_dom = (const T*)domain[d].data(); + if (subarray_[2 * d] < dim_dom[0] || subarray_[2 * d] > dim_dom[1] || + subarray_[2 * d + 1] < dim_dom[0] || subarray_[2 * d + 1] > dim_dom[1]) + return LOG_STATUS(Status::WriteCellSlabIterError( "Sanity check failed; Subarray not contained in domain")); } diff --git a/tiledb/sm/query/writer.cc b/tiledb/sm/query/writer.cc index 63a371d53095..7361b7338bd3 100644 --- a/tiledb/sm/query/writer.cc +++ b/tiledb/sm/query/writer.cc @@ -68,11 +68,9 @@ Writer::Writer() { initialized_ = false; layout_ = Layout::ROW_MAJOR; storage_manager_ = nullptr; - subarray_ = nullptr; } Writer::~Writer() { - std::free(subarray_); clear_coord_buffers(); } @@ -80,6 +78,44 @@ Writer::~Writer() { /* API */ /* ****************************** */ +Status Writer::add_range(unsigned dim_idx, const Range& range) { + if (!array_schema_->dense()) + return LOG_STATUS( + Status::WriterError("Adding a subarray range to a write query is not " + "supported in sparse arrays")); + + if (subarray_.is_set(dim_idx)) + return LOG_STATUS( + Status::WriterError("Cannot add range; Multi-range dense writes " + "are not supported")); + + return subarray_.add_range(dim_idx, range); +} + +Status Writer::get_range_num(unsigned dim_idx, uint64_t* range_num) const { + if (!array_schema_->dense()) + return LOG_STATUS( + Status::WriterError("Getting the number of ranges from a write query " + "is not applicable to sparse arrays")); + + return subarray_.get_range_num(dim_idx, range_num); +} + +Status Writer::get_range( + unsigned dim_idx, + uint64_t range_idx, + const void** start, + const void** end, + const void** stride) const { + if (!array_schema_->dense()) + return LOG_STATUS( + Status::WriterError("Getting a range from a write query is not " + "applicable to sparse arrays")); + + *stride = nullptr; + return subarray_.get_range(dim_idx, range_idx, start, end); +} + const ArraySchema* Writer::array_schema() const { return array_schema_; } @@ -194,7 +230,7 @@ void Writer::set_dedup_coords(bool b) { dedup_coords_ = b; } -Status Writer::init() { +Status Writer::init(const Layout& layout) { // Sanity checks if (storage_manager_ == nullptr) return LOG_STATUS(Status::WriterError( @@ -206,11 +242,16 @@ Status Writer::init() { return LOG_STATUS( Status::WriterError("Cannot initialize query; Buffers not set")); - if (subarray_ == nullptr) - RETURN_NOT_OK(set_subarray(nullptr)); + // Set a default subarray + if (!subarray_.is_set()) + subarray_ = Subarray(array_, layout); + + RETURN_NOT_OK(set_layout(layout)); RETURN_NOT_OK(check_subarray()); RETURN_NOT_OK(check_buffer_sizes()); RETURN_NOT_OK(check_buffer_names()); + if (array_schema_->dense()) + RETURN_NOT_OK(subarray_.to_byte_vec(&subarray_flat_)); optimize_layout_for_1D(); @@ -238,6 +279,7 @@ Layout Writer::layout() const { void Writer::set_array(const Array* array) { array_ = array; + subarray_ = Subarray(array); } void Writer::set_array_schema(const ArraySchema* array_schema) { @@ -379,15 +421,8 @@ void Writer::set_fragment_uri(const URI& fragment_uri) { } Status Writer::set_layout(Layout layout) { - // Ordered layout for writes in sparse arrays is meaningless - if (!array_schema_->dense() && - (layout == Layout::COL_MAJOR || layout == Layout::ROW_MAJOR)) - return LOG_STATUS( - Status::WriterError("Cannot set layout; Ordered layouts cannot be used " - "when writing to sparse " - "arrays - use GLOBAL_ORDER or UNORDERED instead")); - layout_ = layout; + subarray_.set_layout(layout); return Status::Ok(); } @@ -396,50 +431,35 @@ void Writer::set_storage_manager(StorageManager* storage_manager) { storage_manager_ = storage_manager; } -Status Writer::set_subarray(const void* subarray) { - // Check - if (subarray != nullptr) { - if (!array_schema_->dense()) // Sparse arrays - return LOG_STATUS(Status::WriterError( - "Cannot set subarray when writing to sparse arrays")); - else if (layout_ == Layout::UNORDERED) // Dense arrays - return LOG_STATUS(Status::WriterError( - "Cannot set subarray when performing sparse writes to dense arrays " - "(i.e., when writing in UNORDERED mode)")); - } +Status Writer::set_subarray(const Subarray& subarray) { + // Not applicable to sparse arrays + if (!array_schema_->dense()) + return LOG_STATUS(Status::WriterError( + "Setting a subarray is not supported in sparse writes")); + + // Subarray must be unary for dense writes + if (subarray.range_num() != 1) + return LOG_STATUS( + Status::WriterError("Cannot set subarray; Multi-range dense writes " + "are not supported")); // Reset the writer (this will nuke the global write state) reset(); - uint64_t subarray_size = 2 * array_schema_->coords_size(); - if (subarray_ == nullptr) - subarray_ = std::malloc(subarray_size); - if (subarray_ == nullptr) - return LOG_STATUS( - Status::WriterError("Memory allocation for subarray failed")); + subarray_ = subarray; - if (subarray == nullptr) - std::memcpy(subarray_, array_schema_->domain()->domain(), subarray_size); - else - std::memcpy(subarray_, subarray, subarray_size); + // Set subarray_flat so calls to `subarray()` will reflect newly set value + RETURN_NOT_OK(subarray_.to_byte_vec(&subarray_flat_)); return Status::Ok(); } -Status Writer::set_subarray(const Subarray& subarray) { - if (!array_schema_->dense()) // Sparse arrays - return LOG_STATUS(Status::WriterError( - "Cannot set subarray when writing to sparse arrays")); - - // TODO - // TODO: for the dense case, allow only single-range subarrays - (void)subarray; - - return Status::Ok(); -} +const void* Writer::subarray() const { + // Only access subarray_flat_ if it is not empty + if (!subarray_flat_.empty()) + return &subarray_flat_[0]; -void* Writer::subarray() const { - return subarray_; + return nullptr; } Status Writer::write() { @@ -508,7 +528,7 @@ Status Writer::check_buffer_sizes() const { (layout_ != Layout::ROW_MAJOR && layout_ != Layout::COL_MAJOR)) return Status::Ok(); - auto cell_num = array_schema_->domain()->cell_num(subarray_); + auto cell_num = array_schema_->domain()->cell_num(subarray_.ndrange(0)); uint64_t expected_cell_num = 0; for (const auto& it : buffers_) { const auto& attr = it.first; @@ -669,10 +689,7 @@ Status Writer::check_coord_oob() const { auto statuses = parallel_for_2d(0, coords_num_, 0, dim_num, [&](uint64_t c, unsigned d) { auto dim = array_schema_->dimension(d); - std::string err_msg; - if (dim->oob(buffs[d] + c * coord_sizes[d], &err_msg)) - return Status::WriterError(err_msg); - return Status::Ok(); + return dim->oob(buffs[d] + c * coord_sizes[d]); }); // Check all statuses @@ -733,90 +750,16 @@ Status Writer::check_subarray() const { return LOG_STATUS( Status::WriterError("Cannot check subarray; Array schema not set")); - if (subarray_ == nullptr) { - if (array_schema_->dense()) + if (array_schema_->dense()) { + if (layout_ == Layout::GLOBAL_ORDER && !subarray_.coincides_with_tiles()) + return LOG_STATUS( + Status::WriterError("Cannot initialize query; In global writes for " + "dense arrays, the subarray " + "must coincide with the tile bounds")); + if (layout_ == Layout::UNORDERED && subarray_.is_set()) return LOG_STATUS(Status::WriterError( - "Cannot initialize query; Dense writes must specify a subarray")); - else - return Status::Ok(); - } - - switch (array_schema_->domain()->type()) { - case Datatype::INT8: - return check_subarray(); - case Datatype::UINT8: - return check_subarray(); - case Datatype::INT16: - return check_subarray(); - case Datatype::UINT16: - return check_subarray(); - case Datatype::INT32: - return check_subarray(); - case Datatype::UINT32: - return check_subarray(); - case Datatype::INT64: - return check_subarray(); - case Datatype::UINT64: - return check_subarray(); - case Datatype::FLOAT32: - return check_subarray(); - case Datatype::FLOAT64: - return check_subarray(); - case Datatype::DATETIME_YEAR: - case Datatype::DATETIME_MONTH: - case Datatype::DATETIME_WEEK: - case Datatype::DATETIME_DAY: - case Datatype::DATETIME_HR: - case Datatype::DATETIME_MIN: - case Datatype::DATETIME_SEC: - case Datatype::DATETIME_MS: - case Datatype::DATETIME_US: - case Datatype::DATETIME_NS: - case Datatype::DATETIME_PS: - case Datatype::DATETIME_FS: - case Datatype::DATETIME_AS: - return check_subarray(); - case Datatype::CHAR: - case Datatype::STRING_ASCII: - case Datatype::STRING_UTF8: - case Datatype::STRING_UTF16: - case Datatype::STRING_UTF32: - case Datatype::STRING_UCS2: - case Datatype::STRING_UCS4: - case Datatype::ANY: - // Not supported domain type - assert(false); - break; - } - - return Status::Ok(); -} - -template -Status Writer::check_subarray() const { - // Check subarray bounds - auto domain = array_schema_->domain(); - auto dim_num = domain->dim_num(); - auto subarray = (T*)subarray_; - - // In global dense writes, the subarray must coincide with tile extents - // Note that in the dense case, the domain type is integer - if (array_schema_->dense() && layout() == Layout::GLOBAL_ORDER) { - for (unsigned int i = 0; i < dim_num; ++i) { - const auto dim = domain->dimension(i); - auto dim_domain = static_cast(dim->domain()); - auto tile_extent = static_cast(dim->tile_extent()); - assert(tile_extent != nullptr); - auto norm_1 = uint64_t(subarray[2 * i] - dim_domain[0]); - auto norm_2 = (uint64_t(subarray[2 * i + 1]) - dim_domain[0]) + 1; - if ((norm_1 / (*tile_extent) * (*tile_extent) != norm_1) || - (norm_2 / (*tile_extent) * (*tile_extent) != norm_2)) { - return LOG_STATUS( - Status::WriterError("Invalid subarray; In global writes for " - "dense arrays, the subarray " - "must coincide with the tile bounds")); - } - } + "Cannot initialize query; Setting a subarray in unordered writes for " + "dense arrays in inapplicable")); } return Status::Ok(); @@ -1007,7 +950,8 @@ Status Writer::compute_write_cell_ranges( auto domain = array_schema_->domain(); auto dim_num = array_schema_->dim_num(); - auto subarray = (const T*)subarray_; + assert(!subarray_flat_.empty()); + auto subarray = (const T*)&subarray_flat_[0]; auto cell_order = array_schema_->cell_order(); bool same_layout = (cell_order == layout_); uint64_t start, end, start_in_sub, end_in_sub; @@ -1076,7 +1020,7 @@ Status Writer::create_fragment( *frag_meta = std::make_shared( storage_manager_, array_schema_, uri, timestamp_range, dense); - RETURN_NOT_OK((*frag_meta)->init(subarray_)); + RETURN_NOT_OK((*frag_meta)->init(subarray_.ndrange(0))); return storage_manager_->create_dir(uri); STATS_FUNC_OUT(writer_create_fragment); @@ -1173,8 +1117,9 @@ Status Writer::finalize_global_write_state() { } // Check if the total number of cells written is equal to the subarray size - if (!has_coords_) { - auto expected_cell_num = array_schema_->domain()->cell_num(subarray_); + if (!has_coords_) { // This implies a dense array + auto expected_cell_num = + array_schema_->domain()->cell_num(subarray_.ndrange(0)); if (cell_num != expected_cell_num) { clean_up(uri); std::stringstream ss; @@ -1430,8 +1375,9 @@ Status Writer::init_tile_dense_cell_range_iters( auto dim_num = domain->dim_num(); std::vector subarray; subarray.resize(2 * dim_num); + assert(!subarray_flat_.empty()); for (unsigned i = 0; i < 2 * dim_num; ++i) - subarray[i] = ((T*)subarray_)[i]; + subarray[i] = ((T*)&subarray_flat_[0])[i]; auto cell_order = domain->cell_order(); // Compute tile domain and current tile coords diff --git a/tiledb/sm/query/writer.h b/tiledb/sm/query/writer.h index 7a0683e627b4..0cce5e4deb72 100644 --- a/tiledb/sm/query/writer.h +++ b/tiledb/sm/query/writer.h @@ -41,6 +41,7 @@ #include "tiledb/sm/misc/status.h" #include "tiledb/sm/misc/types.h" #include "tiledb/sm/query/write_cell_slab_iter.h" +#include "tiledb/sm/subarray/subarray.h" #include "tiledb/sm/tile/tile.h" namespace tiledb { @@ -50,7 +51,6 @@ class Array; class ArraySchema; class FragmentMetadata; class StorageManager; -class Subarray; /** Processes write queries. */ class Writer { @@ -116,6 +116,29 @@ class Writer { /* API */ /* ********************************* */ + /** Adds a range to the subarray on the input dimension. */ + Status add_range(unsigned dim_idx, const Range& range); + + /** Retrieves the number of ranges of the subarray for the given dimension. */ + Status get_range_num(unsigned dim_idx, uint64_t* range_num) const; + + /** + * Retrieves a range from a dimension in the form (start, end, stride). + * + * @param dim_idx The dimension to retrieve the range from. + * @param range_idx The id of the range to retrieve. + * @param start The range start to retrieve. + * @param end The range end to retrieve. + * @param stride The range stride to retrieve. + * @return Status + */ + Status get_range( + unsigned dim_idx, + uint64_t range_idx, + const void** start, + const void** end, + const void** stride) const; + /** Returns the array schema. */ const ArraySchema* array_schema() const; @@ -170,8 +193,8 @@ class Writer { /** Returns current setting of dedup_coords_ */ bool get_dedup_coords() const; - /** Initializes the writer. */ - Status init(); + /** Initializes the writer with the subarray layout. */ + Status init(const Layout& layout); /** Returns the cell layout. */ Layout layout() const; @@ -238,28 +261,11 @@ class Writer { /** Sets the storage manager. */ void set_storage_manager(StorageManager* storage_manager); - /** - * Sets the query subarray. If it is null, then the subarray will be set to - * the entire domain. - * - * @param subarray The subarray to be set. - * @return Status - */ - Status set_subarray(const void* subarray); - - /** - * Sets the query subarray. - * - * @param subarray The subarray to be set. - * @return Status - */ + /** Sets the query subarray. */ Status set_subarray(const Subarray& subarray); - /* - * Return the subarray - * @return subarray - */ - void* subarray() const; + /* Return the subarray. */ + const void* subarray() const; /** Performs a write query using its set members. */ Status write(); @@ -354,8 +360,19 @@ class Writer { /** The storage manager. */ StorageManager* storage_manager_; - /** The subarray the query is constrained on. */ - void* subarray_; + /** + * The subarray the query is constrained on. It is represented + * as a flat byte vector for the (low, high) pairs of the + * subarray. This is used only in dense writes and, therefore, + * it is assumed that all dimensions have the same datatype. + */ + std::vector subarray_flat_; + + /** + * The subarray object, used in dense writes. It has to be + * comprised of a single multi-dimensional range. + */ + Subarray subarray_; /** Stores information about the written fragments. */ std::vector written_fragment_info_; @@ -413,10 +430,6 @@ class Writer { /** Correctness checks for `subarray_`. */ Status check_subarray() const; - /** Correctness checks for `subarray_`. */ - template - Status check_subarray() const; - /** * Cleans up the coordinate buffers. Applicable only if the coordinate * buffers were allocated by TileDB (not the user) diff --git a/tiledb/sm/serialization/array_schema.cc b/tiledb/sm/serialization/array_schema.cc index b8646ddccb0b..544fe0ebd8e3 100644 --- a/tiledb/sm/serialization/array_schema.cc +++ b/tiledb/sm/serialization/array_schema.cc @@ -214,16 +214,18 @@ Status dimension_to_capnp( dimension_builder->setName(dimension->name()); dimension_builder->setType(datatype_str(dimension->type())); - dimension_builder->setNullTileExtent(dimension->tile_extent() == nullptr); + dimension_builder->setNullTileExtent(dimension->tile_extent().empty()); auto domain_builder = dimension_builder->initDomain(); RETURN_NOT_OK(utils::set_capnp_array_ptr( - domain_builder, dimension->type(), dimension->domain(), 2)); + domain_builder, dimension->type(), dimension->domain().data(), 2)); - if (dimension->tile_extent() != nullptr) { + if (!dimension->tile_extent().empty()) { auto tile_extent_builder = dimension_builder->initTileExtent(); RETURN_NOT_OK(utils::set_capnp_scalar( - tile_extent_builder, dimension->type(), dimension->tile_extent())); + tile_extent_builder, + dimension->type(), + dimension->tile_extent().data())); } return Status::Ok(); diff --git a/tiledb/sm/serialization/capnp_utils.h b/tiledb/sm/serialization/capnp_utils.h index a60ef45cc207..30e3dbd37c20 100644 --- a/tiledb/sm/serialization/capnp_utils.h +++ b/tiledb/sm/serialization/capnp_utils.h @@ -320,27 +320,43 @@ tiledb::sm::Status serialize_subarray( const tiledb::sm::ArraySchema* array_schema, const void* subarray) { // Check coords type - const auto coords_type = array_schema->coords_type(); - switch (coords_type) { - case tiledb::sm::Datatype::CHAR: - case tiledb::sm::Datatype::STRING_ASCII: - case tiledb::sm::Datatype::STRING_UTF8: - case tiledb::sm::Datatype::STRING_UTF16: - case tiledb::sm::Datatype::STRING_UTF32: - case tiledb::sm::Datatype::STRING_UCS2: - case tiledb::sm::Datatype::STRING_UCS4: - case tiledb::sm::Datatype::ANY: - // String dimensions not yet supported - return LOG_STATUS(tiledb::sm::Status::SerializationError( - "Cannot serialize subarray; unsupported domain type.")); - default: - break; + auto dim_num = array_schema->dim_num(); + uint64_t subarray_size = 0; + Datatype first_dimension_datatype = array_schema->dimension(0)->type(); + // If all the dimensions are the same datatype, then we will store the + // subarray in a type array for <=1.7 compatibility + for (unsigned d = 0; d < dim_num; ++d) { + auto dimension = array_schema->dimension(d); + const auto coords_type = dimension->type(); + + if (coords_type != first_dimension_datatype) { + return Status::SerializationError( + "Subarray dimension datatypes must be homogeneous"); + } + + switch (coords_type) { + case tiledb::sm::Datatype::CHAR: + case tiledb::sm::Datatype::STRING_ASCII: + case tiledb::sm::Datatype::STRING_UTF8: + case tiledb::sm::Datatype::STRING_UTF16: + case tiledb::sm::Datatype::STRING_UTF32: + case tiledb::sm::Datatype::STRING_UCS2: + case tiledb::sm::Datatype::STRING_UCS4: + case tiledb::sm::Datatype::ANY: + // String dimensions not yet supported + return LOG_STATUS(tiledb::sm::Status::SerializationError( + "Cannot serialize subarray; unsupported domain type.")); + default: + break; + } + subarray_size += 2 * dimension->coord_size(); } - const uint64_t subarray_size = 2 * array_schema->coords_size(); - const uint64_t subarray_length = subarray_size / datatype_size(coords_type); - RETURN_NOT_OK( - set_capnp_array_ptr(builder, coords_type, subarray, subarray_length)); + // Store subarray in typed array for backwards compatibility with 1.7/1.6 + const uint64_t subarray_length = + subarray_size / datatype_size(first_dimension_datatype); + RETURN_NOT_OK(set_capnp_array_ptr( + builder, first_dimension_datatype, subarray, subarray_length)); return tiledb::sm::Status::Ok(); } @@ -351,26 +367,41 @@ tiledb::sm::Status deserialize_subarray( const tiledb::sm::ArraySchema* array_schema, void** subarray) { // Check coords type - const auto coords_type = array_schema->coords_type(); - switch (coords_type) { - case tiledb::sm::Datatype::CHAR: - case tiledb::sm::Datatype::STRING_ASCII: - case tiledb::sm::Datatype::STRING_UTF8: - case tiledb::sm::Datatype::STRING_UTF16: - case tiledb::sm::Datatype::STRING_UTF32: - case tiledb::sm::Datatype::STRING_UCS2: - case tiledb::sm::Datatype::STRING_UCS4: - case tiledb::sm::Datatype::ANY: - // String dimensions not yet supported - return LOG_STATUS(tiledb::sm::Status::SerializationError( - "Cannot deserialize subarray; unsupported domain type.")); - default: - break; + auto dim_num = array_schema->dim_num(); + uint64_t subarray_size = 0; + Datatype first_dimension_datatype = array_schema->dimension(0)->type(); + for (unsigned d = 0; d < dim_num; ++d) { + auto dimension = array_schema->dimension(d); + const auto coords_type = dimension->type(); + + if (coords_type != first_dimension_datatype) { + return Status::SerializationError( + "Subarray dimension datatypes must be homogeneous"); + } + + switch (coords_type) { + case tiledb::sm::Datatype::CHAR: + case tiledb::sm::Datatype::STRING_ASCII: + case tiledb::sm::Datatype::STRING_UTF8: + case tiledb::sm::Datatype::STRING_UTF16: + case tiledb::sm::Datatype::STRING_UTF32: + case tiledb::sm::Datatype::STRING_UCS2: + case tiledb::sm::Datatype::STRING_UCS4: + case tiledb::sm::Datatype::ANY: + // String dimensions not yet supported + return LOG_STATUS(tiledb::sm::Status::SerializationError( + "Cannot deserialize subarray; unsupported domain type.")); + default: + break; + } + subarray_size += 2 * dimension->coord_size(); } - const uint64_t subarray_size = 2 * array_schema->coords_size(); tiledb::sm::Buffer subarray_buff; - RETURN_NOT_OK(copy_capnp_list(reader, coords_type, &subarray_buff)); + // Subarrays only work on homogeneous dimensions so use first dimension + // datatype to copy from + RETURN_NOT_OK( + copy_capnp_list(reader, first_dimension_datatype, &subarray_buff)); if (subarray_buff.size() == 0) { *subarray = nullptr; diff --git a/tiledb/sm/serialization/query.cc b/tiledb/sm/serialization/query.cc index 086b1fdeb6b7..8bb128cc860c 100644 --- a/tiledb/sm/serialization/query.cc +++ b/tiledb/sm/serialization/query.cc @@ -102,38 +102,34 @@ Status writer_from_capnp( writer->set_check_coord_oob(writer_reader.getCheckCoordOOB()); writer->set_dedup_coords(writer_reader.getDedupCoords()); - const auto* schema = writer->array_schema(); - // For sparse writes we want to explicitly set subarray to nullptr. - const bool sparse_write = - !schema->dense() || writer->layout() == Layout::UNORDERED; - if (writer_reader.hasSubarray() && !sparse_write) { - auto subarray_reader = writer_reader.getSubarray(); - void* subarray = nullptr; - RETURN_NOT_OK( - utils::deserialize_subarray(subarray_reader, schema, &subarray)); - RETURN_NOT_OK_ELSE(writer->set_subarray(subarray), std::free(subarray)); - std::free(subarray); - } else { - RETURN_NOT_OK(writer->set_subarray(nullptr)); - } - return Status::Ok(); } Status subarray_to_capnp( - const Subarray* subarray, capnp::Subarray::Builder* builder) { + const ArraySchema* schema, + const Subarray* subarray, + capnp::Subarray::Builder* builder) { builder->setLayout(layout_str(subarray->layout())); const uint32_t dim_num = subarray->dim_num(); auto ranges_builder = builder->initRanges(dim_num); for (uint32_t i = 0; i < dim_num; i++) { + const auto datatype = schema->dimension(i)->type(); auto range_builder = ranges_builder[i]; - const auto* ranges = subarray->ranges_for_dim(i); - range_builder.setType(datatype_str(ranges->type_)); - range_builder.setHasDefaultRange(ranges->has_default_range_); - range_builder.setBuffer(kj::arrayPtr( - static_cast(ranges->buffer_.data()), - ranges->buffer_.size())); + const auto& ranges = subarray->ranges_for_dim(i); + range_builder.setType(datatype_str(datatype)); + + range_builder.setHasDefaultRange(subarray->is_default(i)); + // This will copy all of the ranges into one large byte vector + // Future improvement is to do this in a zero copy manner + // (kj::ArrayBuilder?) + auto capnpVector = kj::Vector(); + for (auto& range : ranges) { + capnpVector.addAll(kj::ArrayPtr( + const_cast(reinterpret_cast(range.data())), + range.size())); + } + range_builder.setBuffer(capnpVector.asPtr()); } return Status::Ok(); @@ -148,14 +144,19 @@ Status subarray_from_capnp( Datatype type = Datatype::UINT8; RETURN_NOT_OK(datatype_enum(range_reader.getType(), &type)); - Subarray::Ranges ranges(type); - auto data_ptr = range_reader.getBuffer(); - RETURN_NOT_OK(ranges.buffer_.realloc(data_ptr.size())); - RETURN_NOT_OK( - ranges.buffer_.write((void*)data_ptr.begin(), data_ptr.size())); - ranges.has_default_range_ = range_reader.getHasDefaultRange(); + auto data = range_reader.getBuffer(); + auto data_ptr = data.asBytes(); + uint64_t range_size = datatype_size(type) * 2; + size_t range_count = data_ptr.size() / range_size; + std::vector ranges(range_count); + for (size_t j = 0; j < range_count; j++) { + ranges[j] = Range(data_ptr.begin() + (j * range_size), range_size); + } RETURN_NOT_OK(subarray->set_ranges_for_dim(i, ranges)); + + // Set default indicator + subarray->set_is_default(i, range_reader.getHasDefaultRange()); } return Status::Ok(); @@ -167,7 +168,8 @@ Status subarray_partitioner_to_capnp( capnp::SubarrayPartitioner::Builder* builder) { // Subarray auto subarray_builder = builder->initSubarray(); - RETURN_NOT_OK(subarray_to_capnp(partitioner.subarray(), &subarray_builder)); + RETURN_NOT_OK( + subarray_to_capnp(schema, partitioner.subarray(), &subarray_builder)); // Per-attr mem budgets const auto* attr_budgets = partitioner.get_attr_result_budgets(); @@ -193,8 +195,8 @@ Status subarray_partitioner_to_capnp( const auto* partition_info = partitioner.current_partition_info(); auto info_builder = builder->initCurrent(); auto info_subarray_builder = info_builder.initSubarray(); - RETURN_NOT_OK( - subarray_to_capnp(&partition_info->partition_, &info_subarray_builder)); + RETURN_NOT_OK(subarray_to_capnp( + schema, &partition_info->partition_, &info_subarray_builder)); info_builder.setStart(partition_info->start_); info_builder.setEnd(partition_info->end_); info_builder.setSplitMultiRange(partition_info->split_multi_range_); @@ -209,7 +211,7 @@ Status subarray_partitioner_to_capnp( size_t sr_idx = 0; for (const auto& subarray : state->single_range_) { auto b = single_range_builder[sr_idx]; - RETURN_NOT_OK(subarray_to_capnp(&subarray, &b)); + RETURN_NOT_OK(subarray_to_capnp(schema, &subarray, &b)); sr_idx++; } auto multi_range_builder = @@ -217,7 +219,7 @@ Status subarray_partitioner_to_capnp( size_t m_idx = 0; for (const auto& subarray : state->multi_range_) { auto b = multi_range_builder[m_idx]; - RETURN_NOT_OK(subarray_to_capnp(&subarray, &b)); + RETURN_NOT_OK(subarray_to_capnp(schema, &subarray, &b)); m_idx++; } @@ -292,18 +294,18 @@ Status subarray_partitioner_from_capnp( auto sr_reader = state_reader.getSingleRange(); const unsigned num_sr = sr_reader.size(); for (unsigned i = 0; i < num_sr; i++) { - auto subarray_reader = sr_reader[i]; + auto subarray_reader_ = sr_reader[i]; state->single_range_.emplace_back(array, layout); - Subarray& subarray = state->single_range_.back(); - RETURN_NOT_OK(subarray_from_capnp(subarray_reader, &subarray)); + Subarray& subarray_ = state->single_range_.back(); + RETURN_NOT_OK(subarray_from_capnp(subarray_reader_, &subarray_)); } auto m_reader = state_reader.getMultiRange(); const unsigned num_m = m_reader.size(); for (unsigned i = 0; i < num_m; i++) { - auto subarray_reader = m_reader[i]; + auto subarray_reader_ = m_reader[i]; state->multi_range_.emplace_back(array, layout); - Subarray& subarray = state->multi_range_.back(); - RETURN_NOT_OK(subarray_from_capnp(subarray_reader, &subarray)); + Subarray& subarray_ = state->multi_range_.back(); + RETURN_NOT_OK(subarray_from_capnp(subarray_reader_, &subarray_)); } // Overall mem budget @@ -363,7 +365,8 @@ Status reader_to_capnp( // Subarray auto subarray_builder = reader_builder->initSubarray(); - RETURN_NOT_OK(subarray_to_capnp(reader.subarray(), &subarray_builder)); + RETURN_NOT_OK( + subarray_to_capnp(array_schema, reader.subarray(), &subarray_builder)); // Read state RETURN_NOT_OK(read_state_to_capnp(array_schema, reader, reader_builder)); @@ -679,12 +682,29 @@ Status query_from_capnp( } // Deserialize reader/writer. + // Also set subarray on query if it exists. Prior to 1.8 the subarray was set + // on the reader or writer directly Now we set it on the query class after the + // heterogeneous coordinate changes if (type == QueryType::READ) { auto reader_reader = query_reader.getReader(); RETURN_NOT_OK(reader_from_capnp(reader_reader, query->reader())); } else { auto writer_reader = query_reader.getWriter(); RETURN_NOT_OK(writer_from_capnp(writer_reader, query->writer())); + + // For sparse writes we want to explicitly set subarray to nullptr. + const bool sparse_write = + !schema->dense() || query->layout() == Layout::UNORDERED; + if (writer_reader.hasSubarray() && !sparse_write) { + auto subarray_reader = writer_reader.getSubarray(); + void* subarray = nullptr; + RETURN_NOT_OK( + utils::deserialize_subarray(subarray_reader, schema, &subarray)); + RETURN_NOT_OK_ELSE(query->set_subarray(subarray), std::free(subarray)); + std::free(subarray); + } else { + RETURN_NOT_OK(query->set_subarray(nullptr)); + } } // Deserialize status. This must come last because various setters above diff --git a/tiledb/sm/storage_manager/consolidator.cc b/tiledb/sm/storage_manager/consolidator.cc index 50d3f7de9dfb..81db291de392 100644 --- a/tiledb/sm/storage_manager/consolidator.cc +++ b/tiledb/sm/storage_manager/consolidator.cc @@ -519,25 +519,16 @@ Status Consolidator::create_queries( Query** query_r, Query** query_w, URI* new_fragment_uri) { - // Create subarray to set - std::vector s; - if (!sparse_mode) { - auto domain_size = 2 * array_for_reads->array_schema()->coords_size(); - s.resize(domain_size); - uint64_t offset = 0; - for (const auto& r : subarray) { - std::memcpy(&s[offset], r.data(), r.size()); - offset += r.size(); - } - } - void* to_set = (s.empty()) ? nullptr : &s[0]; + // Note: it is safe to use `set_subarray_safe` for `subarray` below + // because the subarray is calculated by the TileDB algorithm (it + // is not a user input prone to errors). // Create read query *query_r = new Query(storage_manager_, array_for_reads); RETURN_NOT_OK((*query_r)->set_layout(Layout::GLOBAL_ORDER)); RETURN_NOT_OK( set_query_buffers(*query_r, sparse_mode, buffers, buffer_sizes)); - RETURN_NOT_OK((*query_r)->set_subarray(to_set, true)); + RETURN_NOT_OK((*query_r)->set_subarray_unsafe(subarray)); if (array_for_reads->array_schema()->dense() && sparse_mode) RETURN_NOT_OK((*query_r)->set_sparse_mode(true)); @@ -549,7 +540,8 @@ Status Consolidator::create_queries( // Create write query *query_w = new Query(storage_manager_, array_for_writes, *new_fragment_uri); RETURN_NOT_OK((*query_w)->set_layout(Layout::GLOBAL_ORDER)); - RETURN_NOT_OK((*query_w)->set_subarray(to_set, true)); + if (array_for_reads->array_schema()->dense()) + RETURN_NOT_OK((*query_w)->set_subarray_unsafe(subarray)); RETURN_NOT_OK( set_query_buffers(*query_w, sparse_mode, buffers, buffer_sizes)); diff --git a/tiledb/sm/subarray/cell_slab_iter.cc b/tiledb/sm/subarray/cell_slab_iter.cc index 27e403abc605..df79a257ce5e 100644 --- a/tiledb/sm/subarray/cell_slab_iter.cc +++ b/tiledb/sm/subarray/cell_slab_iter.cc @@ -36,6 +36,7 @@ #include "tiledb/sm/array_schema/domain.h" #include "tiledb/sm/enums/layout.h" #include "tiledb/sm/misc/logger.h" +#include "tiledb/sm/misc/types.h" #include #include @@ -234,21 +235,22 @@ Status CellSlabIter::init_ranges() { // For easy reference auto dim_num = subarray_->dim_num(); auto array_schema = subarray_->array()->array_schema(); - auto array_domain = (T*)array_schema->domain()->domain(); - auto tile_extents = (T*)array_schema->domain()->tile_extents(); + auto array_domain = array_schema->domain()->domain(); uint64_t range_num; - const T* range; T tile_extent, dim_domain_start; + const tiledb::sm::Range* r; ranges_.resize(dim_num); - for (unsigned i = 0; i < dim_num; ++i) { - RETURN_NOT_OK(subarray_->get_range_num(i, &range_num)); - ranges_[i].reserve(range_num); - tile_extent = tile_extents[i]; - dim_domain_start = array_domain[2 * i]; + for (unsigned d = 0; d < dim_num; ++d) { + auto dim_dom = (const T*)array_domain[d].data(); + RETURN_NOT_OK(subarray_->get_range_num(d, &range_num)); + ranges_[d].reserve(range_num); + tile_extent = *(const T*)array_schema->domain()->tile_extent(d).data(); + dim_domain_start = dim_dom[0]; for (uint64_t j = 0; j < range_num; ++j) { - RETURN_NOT_OK(subarray_->get_range(i, j, (const void**)&range)); - create_ranges(range, tile_extent, dim_domain_start, &ranges_[i]); + RETURN_NOT_OK(subarray_->get_range(d, j, &r)); + create_ranges( + (const T*)(*r).data(), tile_extent, dim_domain_start, &ranges_[d]); } } diff --git a/tiledb/sm/subarray/subarray.cc b/tiledb/sm/subarray/subarray.cc index faf3a5db33bc..136108c0b76d 100644 --- a/tiledb/sm/subarray/subarray.cc +++ b/tiledb/sm/subarray/subarray.cc @@ -43,6 +43,7 @@ #include "tiledb/sm/rtree/rtree.h" #include +#include namespace tiledb { namespace sm { @@ -58,13 +59,13 @@ Subarray::Subarray() { tile_overlap_computed_ = false; } +Subarray::Subarray(const Array* array) + : Subarray(array, Layout::UNORDERED) { +} + Subarray::Subarray(const Array* array, Layout layout) : array_(array) , layout_(layout) { - auto dim_num = array->array_schema()->dim_num(); - auto domain_type = array->array_schema()->domain()->type(); - for (uint32_t i = 0; i < dim_num; ++i) - ranges_.emplace_back(domain_type); est_result_size_computed_ = false; tile_overlap_computed_ = false; add_default_ranges(); @@ -104,134 +105,46 @@ Subarray& Subarray::operator=(Subarray&& subarray) noexcept { /* API */ /* ****************************** */ -Status Subarray::add_range( - uint32_t dim_idx, const void* range, bool check_expanded_domain) { - if (range == nullptr) - return LOG_STATUS(Status::SubarrayError( - "Cannot add range to dimension; Range cannot be null")); - +Status Subarray::add_range(uint32_t dim_idx, const Range& range) { auto dim_num = array_->array_schema()->dim_num(); if (dim_idx >= dim_num) return LOG_STATUS(Status::SubarrayError( "Cannot add range to dimension; Invalid dimension index")); - auto type = array_->array_schema()->domain()->type(); - switch (type) { - case Datatype::INT8: - return add_range( - dim_idx, (const int8_t*)range, check_expanded_domain); - case Datatype::UINT8: - return add_range( - dim_idx, (const uint8_t*)range, check_expanded_domain); - case Datatype::INT16: - return add_range( - dim_idx, (const int16_t*)range, check_expanded_domain); - case Datatype::UINT16: - return add_range( - dim_idx, (const uint16_t*)range, check_expanded_domain); - case Datatype::INT32: - return add_range( - dim_idx, (const int32_t*)range, check_expanded_domain); - case Datatype::UINT32: - return add_range( - dim_idx, (const uint32_t*)range, check_expanded_domain); - case Datatype::INT64: - return add_range( - dim_idx, (const int64_t*)range, check_expanded_domain); - case Datatype::UINT64: - return add_range( - dim_idx, (const uint64_t*)range, check_expanded_domain); - case Datatype::FLOAT32: - return add_range( - dim_idx, (const float*)range, check_expanded_domain); - case Datatype::FLOAT64: - return add_range( - dim_idx, (const double*)range, check_expanded_domain); - case Datatype::DATETIME_YEAR: - case Datatype::DATETIME_MONTH: - case Datatype::DATETIME_WEEK: - case Datatype::DATETIME_DAY: - case Datatype::DATETIME_HR: - case Datatype::DATETIME_MIN: - case Datatype::DATETIME_SEC: - case Datatype::DATETIME_MS: - case Datatype::DATETIME_US: - case Datatype::DATETIME_NS: - case Datatype::DATETIME_PS: - case Datatype::DATETIME_FS: - case Datatype::DATETIME_AS: - return add_range( - dim_idx, (const int64_t*)range, check_expanded_domain); - default: - return LOG_STATUS(Status::SubarrayError( - "Cannot add range to dimension; Unsupported subarray domain type")); + // Must reset the result size and tile overlap + est_result_size_computed_ = false; + tile_overlap_computed_ = false; + + // Remove the default range + if (is_default_[dim_idx]) { + ranges_[dim_idx].clear(); + is_default_[dim_idx] = false; } + // Correctness checks + auto dim = array_->array_schema()->dimension(dim_idx); + RETURN_NOT_OK(dim->check_range(range)); + + // Add the range + ranges_[dim_idx].emplace_back(range); + return Status::Ok(); } -Status Subarray::add_range( - uint32_t dim_idx, const void* start, const void* end) { - if (start == nullptr || end == nullptr) - return LOG_STATUS(Status::SubarrayError( - "Cannot add range to dimension; Range start/end cannot be null")); - - auto dim_num = array_->array_schema()->dim_num(); - if (dim_idx >= dim_num) - return LOG_STATUS(Status::SubarrayError( - "Cannot add range to dimension; Invalid dimension index")); +Status Subarray::add_range_unsafe(uint32_t dim_idx, const Range& range) { + // Must reset the result size and tile overlap + est_result_size_computed_ = false; + tile_overlap_computed_ = false; - auto type = array_->array_schema()->domain()->type(); - switch (type) { - case Datatype::INT8: - return add_range( - dim_idx, (const int8_t*)start, (const int8_t*)end); - case Datatype::UINT8: - return add_range( - dim_idx, (const uint8_t*)start, (const uint8_t*)end); - case Datatype::INT16: - return add_range( - dim_idx, (const int16_t*)start, (const int16_t*)end); - case Datatype::UINT16: - return add_range( - dim_idx, (const uint16_t*)start, (const uint16_t*)end); - case Datatype::INT32: - return add_range( - dim_idx, (const int32_t*)start, (const int32_t*)end); - case Datatype::UINT32: - return add_range( - dim_idx, (const uint32_t*)start, (const uint32_t*)end); - case Datatype::INT64: - return add_range( - dim_idx, (const int64_t*)start, (const int64_t*)end); - case Datatype::UINT64: - return add_range( - dim_idx, (const uint64_t*)start, (const uint64_t*)end); - case Datatype::FLOAT32: - return add_range(dim_idx, (const float*)start, (const float*)end); - case Datatype::FLOAT64: - return add_range( - dim_idx, (const double*)start, (const double*)end); - case Datatype::DATETIME_YEAR: - case Datatype::DATETIME_MONTH: - case Datatype::DATETIME_WEEK: - case Datatype::DATETIME_DAY: - case Datatype::DATETIME_HR: - case Datatype::DATETIME_MIN: - case Datatype::DATETIME_SEC: - case Datatype::DATETIME_MS: - case Datatype::DATETIME_US: - case Datatype::DATETIME_NS: - case Datatype::DATETIME_PS: - case Datatype::DATETIME_FS: - case Datatype::DATETIME_AS: - return add_range( - dim_idx, (const int64_t*)start, (const int64_t*)end); - default: - return LOG_STATUS(Status::SubarrayError( - "Cannot add range to dimension; Unsupported subarray domain type")); + // Remove the default range + if (is_default_[dim_idx]) { + ranges_[dim_idx].clear(); + is_default_[dim_idx] = false; } + // Add the range + ranges_[dim_idx].emplace_back(range); + return Status::Ok(); } @@ -247,26 +160,45 @@ void Subarray::clear() { tile_overlap_computed_ = false; } +bool Subarray::coincides_with_tiles() const { + if (range_num() != 1) + return false; + + auto dim_num = array_->array_schema()->dim_num(); + for (unsigned d = 0; d < dim_num; ++d) { + auto dim = array_->array_schema()->dimension(d); + if (!dim->coincides_with_tiles(ranges_[d][0])) + return false; + } + + return true; +} + template Subarray Subarray::crop_to_tile(const T* tile_coords, Layout layout) const { Subarray ret(array_, layout); - const void* range; T new_range[2]; bool overlaps; // Get tile subarray based on the input coordinates + auto array_schema = array_->array_schema(); std::vector tile_subarray(2 * dim_num()); - array_->array_schema()->domain()->get_tile_subarray( - tile_coords, &tile_subarray[0]); + array_schema->domain()->get_tile_subarray(tile_coords, &tile_subarray[0]); // Compute cropped subarray for (unsigned d = 0; d < dim_num(); ++d) { - for (size_t r = 0; r < ranges_[d].range_num(); ++r) { - get_range(d, r, &range); + auto r_size = 2 * array_schema->dimension(d)->coord_size(); + for (size_t r = 0; r < ranges_[d].size(); ++r) { + const auto& range = ranges_[d][r]; utils::geometry::overlap( - (const T*)range, &tile_subarray[2 * d], 1, new_range, &overlaps); + (const T*)range.data(), + &tile_subarray[2 * d], + 1, + new_range, + &overlaps); + if (overlaps) - ret.add_range(d, new_range, true); + ret.add_range_unsafe(d, Range(new_range, r_size)); } } @@ -277,7 +209,7 @@ uint32_t Subarray::dim_num() const { return array_->array_schema()->dim_num(); } -const void* Subarray::domain() const { +NDRange Subarray::domain() const { return array_->array_schema()->domain()->domain(); } @@ -294,18 +226,18 @@ Status Subarray::get_query_type(QueryType* type) const { } Status Subarray::get_range( - uint32_t dim_idx, uint64_t range_idx, const void** range) const { + uint32_t dim_idx, uint64_t range_idx, const Range** range) const { auto dim_num = array_->array_schema()->dim_num(); if (dim_idx >= dim_num) return LOG_STATUS( Status::SubarrayError("Cannot get range; Invalid dimension index")); - auto range_num = ranges_[dim_idx].range_num(); + auto range_num = ranges_[dim_idx].size(); if (range_idx >= range_num) return LOG_STATUS( Status::SubarrayError("Cannot get range; Invalid range index")); - *range = ranges_[dim_idx].get_range(range_idx); + *range = &ranges_[dim_idx][range_idx]; return Status::Ok(); } @@ -320,13 +252,13 @@ Status Subarray::get_range( return LOG_STATUS( Status::SubarrayError("Cannot get range; Invalid dimension index")); - auto range_num = ranges_[dim_idx].range_num(); + auto range_num = ranges_[dim_idx].size(); if (range_idx >= range_num) return LOG_STATUS( Status::SubarrayError("Cannot get range; Invalid range index")); - *start = ranges_[dim_idx].get_range_start(range_idx); - *end = ranges_[dim_idx].get_range_end(range_idx); + *start = ranges_[dim_idx][range_idx].start(); + *end = ranges_[dim_idx][range_idx].end(); return Status::Ok(); } @@ -338,7 +270,7 @@ Status Subarray::get_range_num(uint32_t dim_idx, uint64_t* range_num) const { Status::SubarrayError("Cannot get number of ranges for a dimension; " "Invalid dimension index")); - *range_num = ranges_[dim_idx].range_num(); + *range_num = ranges_[dim_idx].size(); return Status::Ok(); } @@ -350,9 +282,9 @@ Subarray Subarray::get_subarray(uint64_t start, uint64_t end) const { auto end_coords = get_range_coords(end); auto dim_num = this->dim_num(); - for (unsigned i = 0; i < dim_num; ++i) { - for (uint64_t r = start_coords[i]; r <= end_coords[i]; ++r) { - ret.add_range(i, ranges_[i].get_range(r), true); + for (unsigned d = 0; d < dim_num; ++d) { + for (uint64_t r = start_coords[d]; r <= end_coords[d]; ++r) { + ret.add_range_unsafe(d, ranges_[d][r]); } } @@ -371,21 +303,28 @@ Subarray Subarray::get_subarray(uint64_t start, uint64_t end) const { return ret; } +bool Subarray::is_default(uint32_t dim_index) const { + return is_default_[dim_index]; +} + bool Subarray::is_set() const { - for (const auto& r : ranges_) - if (!r.has_default_range_) + for (const auto& d : is_default_) + if (d == false) return true; return false; } +bool Subarray::is_set(unsigned dim_idx) const { + assert(dim_idx < dim_num()); + return !is_default_[dim_idx]; +} + bool Subarray::is_unary() const { if (range_num() != 1) return false; for (const auto& range : ranges_) { - auto r = (const uint8_t*)range.get_range(0); - auto range_size = range.range_size_; - if (std::memcmp(r, r + range_size / 2, range_size / 2) != 0) + if (!range[0].unary()) return false; } @@ -396,20 +335,36 @@ bool Subarray::is_unary(uint64_t range_idx) const { auto coords = get_range_coords(range_idx); auto dim_num = this->dim_num(); - for (unsigned i = 0; i < dim_num; ++i) { - auto r = (const uint8_t*)ranges_[i].get_range(coords[i]); - auto range_size = ranges_[i].range_size_; - if (std::memcmp(r, r + range_size / 2, range_size / 2) != 0) + for (unsigned d = 0; d < dim_num; ++d) { + if (!ranges_[d][coords[d]].unary()) return false; } return true; } +void Subarray::set_is_default(uint32_t dim_index, bool is_default) { + is_default_[dim_index] = is_default; +} + void Subarray::set_layout(Layout layout) { layout_ = layout; } +Status Subarray::to_byte_vec(std::vector* byte_vec) const { + if (range_num() != 1) + return LOG_STATUS(Status::SubarrayError( + "Cannot export to byte vector; The subarray must be unary")); + + for (const auto& r : ranges_) { + auto offset = byte_vec->size(); + byte_vec->resize(offset + r[0].size()); + std::memcpy(&(*byte_vec)[offset], r[0].data(), r[0].size()); + } + + return Status::Ok(); +} + Layout Subarray::layout() const { return layout_; } @@ -581,68 +536,7 @@ uint64_t Subarray::range_num() const { uint64_t ret = 1; for (const auto& r : ranges_) - ret *= r.range_num(); - - return ret; -} - -template -std::vector Subarray::range(uint64_t range_idx) const { - std::vector ret; - uint64_t tmp_idx = range_idx; - auto dim_num = this->dim_num(); - auto cell_order = array_->array_schema()->cell_order(); - auto layout = (layout_ == Layout::UNORDERED) ? cell_order : layout_; - - if (layout == Layout::ROW_MAJOR) { - for (unsigned i = 0; i < dim_num; ++i) { - ret.push_back((T*)(ranges_[i].get_range(tmp_idx / range_offsets_[i]))); - tmp_idx %= range_offsets_[i]; - } - } else if (layout == Layout::COL_MAJOR) { - for (unsigned i = dim_num - 1;; --i) { - ret.push_back((T*)(ranges_[i].get_range(tmp_idx / range_offsets_[i]))); - tmp_idx %= range_offsets_[i]; - if (i == 0) - break; - } - std::reverse(ret.begin(), ret.end()); - } else { - assert(layout == Layout::GLOBAL_ORDER); - assert(range_num() == 1); - for (unsigned i = 0; i < dim_num; ++i) - ret.push_back((T*)ranges_[i].get_range(0)); - } - - return ret; -} - -std::vector Subarray::range(uint64_t range_idx) const { - std::vector ret; - uint64_t tmp_idx = range_idx; - auto dim_num = this->dim_num(); - auto cell_order = array_->array_schema()->cell_order(); - auto layout = (layout_ == Layout::UNORDERED) ? cell_order : layout_; - - if (layout == Layout::ROW_MAJOR) { - for (unsigned i = 0; i < dim_num; ++i) { - ret.push_back((ranges_[i].get_range(tmp_idx / range_offsets_[i]))); - tmp_idx %= range_offsets_[i]; - } - } else if (layout == Layout::COL_MAJOR) { - for (unsigned i = dim_num - 1;; --i) { - ret.push_back((ranges_[i].get_range(tmp_idx / range_offsets_[i]))); - tmp_idx %= range_offsets_[i]; - if (i == 0) - break; - } - std::reverse(ret.begin(), ret.end()); - } else { - assert(layout == Layout::GLOBAL_ORDER); - assert(range_num() == 1); - for (unsigned i = 0; i < dim_num; ++i) - ret.push_back(ranges_[i].get_range(0)); - } + ret *= r.size(); return ret; } @@ -653,48 +547,49 @@ NDRange Subarray::ndrange(uint64_t range_idx) const { auto dim_num = this->dim_num(); auto cell_order = array_->array_schema()->cell_order(); auto layout = (layout_ == Layout::UNORDERED) ? cell_order : layout_; - auto array_schema = array_->array_schema(); + // Unary case or GLOBAL_ORDER + if (range_num() == 1) { + for (unsigned d = 0; d < dim_num; ++d) + ret.emplace_back(ranges_[d][0]); + return ret; + } + + // Non-unary case (range_offsets_ must be computed) if (layout == Layout::ROW_MAJOR) { + assert(!range_offsets_.empty()); for (unsigned d = 0; d < dim_num; ++d) { - auto coord_size = array_schema->dimension(d)->coord_size(); - ret.emplace_back( - ranges_[d].get_range(tmp_idx / range_offsets_[d]), 2 * coord_size); + ret.emplace_back(ranges_[d][tmp_idx / range_offsets_[d]]); tmp_idx %= range_offsets_[d]; } } else if (layout == Layout::COL_MAJOR) { + assert(!range_offsets_.empty()); for (unsigned d = dim_num - 1;; --d) { - auto coord_size = array_schema->dimension(d)->coord_size(); - ret.emplace_back( - ranges_[d].get_range(tmp_idx / range_offsets_[d]), 2 * coord_size); + ret.emplace_back(ranges_[d][tmp_idx / range_offsets_[d]]); tmp_idx %= range_offsets_[d]; if (d == 0) break; } std::reverse(ret.begin(), ret.end()); - } else { - assert(layout == Layout::GLOBAL_ORDER); - assert(range_num() == 1); - for (unsigned d = 0; d < dim_num; ++d) { - auto coord_size = array_schema->dimension(d)->coord_size(); - ret.emplace_back(ranges_[d].get_range(0), 2 * coord_size); - } + } else { // GLOBAL_ORDER handled above + assert(false); } return ret; } -const Subarray::Ranges* Subarray::ranges_for_dim(uint32_t dim_idx) const { - return &ranges_[dim_idx]; +const std::vector& Subarray::ranges_for_dim(uint32_t dim_idx) const { + return ranges_[dim_idx]; } -Status Subarray::set_ranges_for_dim(uint32_t dim_idx, const Ranges& ranges) { - ranges_.resize(dim_idx + 1, Ranges(type())); +Status Subarray::set_ranges_for_dim( + uint32_t dim_idx, const std::vector& ranges) { + ranges_.resize(dim_idx + 1); ranges_[dim_idx] = ranges; return Status::Ok(); } -void Subarray::split( +Status Subarray::split( unsigned splitting_dim, const ByteVecValue& splitting_value, Subarray* r1, @@ -705,21 +600,22 @@ void Subarray::split( *r2 = Subarray(array_, layout_); auto dim_num = array_->array_schema()->dim_num(); - const void* range_1d; Range sr1, sr2; for (unsigned d = 0; d < dim_num; ++d) { - this->get_range(d, 0, &range_1d); + const auto& r = ranges_[d][0]; if (d == splitting_dim) { auto dim = array_->array_schema()->dimension(d); - dim->split_range(range_1d, splitting_value, &sr1, &sr2); - r1->add_range(d, sr1.data(), true); - r2->add_range(d, sr2.data(), true); + dim->split_range(r, splitting_value, &sr1, &sr2); + RETURN_NOT_OK(r1->add_range_unsafe(d, sr1)); + RETURN_NOT_OK(r2->add_range_unsafe(d, sr2)); } else { - r1->add_range(d, range_1d, true); - r2->add_range(d, range_1d, true); + RETURN_NOT_OK(r1->add_range_unsafe(d, r)); + RETURN_NOT_OK(r2->add_range_unsafe(d, r)); } } + + return Status::Ok(); } Status Subarray::split( @@ -736,7 +632,6 @@ Status Subarray::split( // For easy reference auto array_schema = array_->array_schema(); auto dim_num = array_schema->dim_num(); - const void* range_1d; uint64_t range_num; Range sr1, sr2; @@ -744,26 +639,26 @@ Status Subarray::split( RETURN_NOT_OK(this->get_range_num(d, &range_num)); if (d != splitting_dim) { for (uint64_t j = 0; j < range_num; ++j) { - this->get_range(d, j, &range_1d); - r1->add_range(d, range_1d); - r2->add_range(d, range_1d); + const auto& r = ranges_[d][j]; + RETURN_NOT_OK(r1->add_range_unsafe(d, r)); + RETURN_NOT_OK(r2->add_range_unsafe(d, r)); } } else { // d == splitting_dim if (splitting_range != UINT64_MAX) { // Need to split multiple ranges for (uint64_t j = 0; j <= splitting_range; ++j) { - this->get_range(d, j, &range_1d); - r1->add_range(d, range_1d); + const auto& r = ranges_[d][j]; + RETURN_NOT_OK(r1->add_range_unsafe(d, r)); } for (uint64_t j = splitting_range + 1; j < range_num; ++j) { - this->get_range(d, j, &range_1d); - r2->add_range(d, range_1d); + const auto& r = ranges_[d][j]; + RETURN_NOT_OK(r2->add_range_unsafe(d, r)); } } else { // Need to split a single range - this->get_range(d, 0, &range_1d); + const auto& r = ranges_[d][0]; auto dim = array_schema->dimension(d); - dim->split_range(range_1d, splitting_value, &sr1, &sr2); - r1->add_range(d, sr1.data(), true); - r2->add_range(d, sr2.data(), true); + dim->split_range(r, splitting_value, &sr1, &sr2); + RETURN_NOT_OK(r1->add_range_unsafe(d, sr1)); + RETURN_NOT_OK(r2->add_range_unsafe(d, sr2)); } } } @@ -808,83 +703,14 @@ const T* Subarray::tile_coords_ptr( /* ****************************** */ void Subarray::add_default_ranges() { - auto dim_num = array_->array_schema()->dim_num(); - auto domain = (unsigned char*)array_->array_schema()->domain()->domain(); - for (unsigned i = 0; i < dim_num; ++i) { - auto range_size = ranges_[i].range_size_; - ranges_[i].add_range(&(domain[i * range_size]), true); - } -} - -template -Status Subarray::add_range( - uint32_t dim_idx, const T* range, bool check_expanded_domain) { - assert(dim_idx < array_->array_schema()->dim_num()); - - // Must reset the result size and tile overlap - est_result_size_computed_ = false; - tile_overlap_computed_ = false; - - // Check for NaN - RETURN_NOT_OK(check_nan(range)); - - // Check range bounds - if (range[0] > range[1]) - return LOG_STATUS( - Status::SubarrayError("Cannot add range to dimension; Lower range " - "bound cannot be larger than the higher bound")); - - // Check range against the domain - auto domain = array_->array_schema()->domain(); - auto dim_domain = static_cast(domain->dimension(dim_idx)->domain()); - T low = dim_domain[0]; - T high = dim_domain[1]; - if (array_->array_schema()->dense() && check_expanded_domain) { - auto tile_extent = - *static_cast(domain->dimension(dim_idx)->tile_extent()); - high = utils::math::ceil(dim_domain[1], tile_extent) * tile_extent; - } - - if (range[0] < low || range[1] > high) - return LOG_STATUS( - Status::SubarrayError("Cannot add range to dimension; Range must be in " - "the domain the subarray is constructed from")); - - // Add the range - ranges_[dim_idx].add_range(range); - - return Status::Ok(); -} - -template -Status Subarray::add_range(uint32_t dim_idx, const T* start, const T* end) { - assert(dim_idx < array_->array_schema()->dim_num()); - T range[] = {*start, *end}; - - // Must reset the result size and tile overlap - est_result_size_computed_ = false; - tile_overlap_computed_ = false; - - // Check for NaN - RETURN_NOT_OK(check_nan(range)); - - // Check range bounds - if (*start > *end) - return LOG_STATUS( - Status::SubarrayError("Cannot add range to dimension; Range " - "start cannot be larger than the range end")); - - // Check range against the domain - auto domain = (const T*)array_->array_schema()->domain()->domain(); - if (*start < domain[2 * dim_idx] || *end > domain[2 * dim_idx + 1]) - return LOG_STATUS( - Status::SubarrayError("Cannot add range to dimension; Range must be in " - "the domain the subarray is constructed from")); - - // Add the range - ranges_[dim_idx].add_range(range); + auto array_schema = array_->array_schema(); + auto dim_num = array_schema->dim_num(); + auto domain = array_schema->domain()->domain(); - return Status::Ok(); + ranges_.resize(dim_num); + is_default_.resize(dim_num, true); + for (unsigned d = 0; d < dim_num; ++d) + ranges_[d].emplace_back(domain[d]); } void Subarray::compute_range_offsets() { @@ -898,15 +724,13 @@ void Subarray::compute_range_offsets() { range_offsets_.push_back(1); if (dim_num > 1) { for (unsigned int i = 1; i < dim_num; ++i) - range_offsets_.push_back( - range_offsets_.back() * ranges_[i - 1].range_num()); + range_offsets_.push_back(range_offsets_.back() * ranges_[i - 1].size()); } } else if (layout == Layout::ROW_MAJOR) { range_offsets_.push_back(1); if (dim_num > 1) { for (unsigned int i = dim_num - 2;; --i) { - range_offsets_.push_back( - range_offsets_.back() * ranges_[i + 1].range_num()); + range_offsets_.push_back(range_offsets_.back() * ranges_[i + 1].size()); if (i == 0) break; } @@ -1059,29 +883,30 @@ template void Subarray::compute_tile_coords_col() { std::vector> coords_set; auto array_schema = array_->array_schema(); - auto domain = (const T*)array_schema->domain()->domain(); - auto dim_num = (int)this->dim_num(); - auto tile_extents = (const T*)array_schema->domain()->tile_extents(); + auto domain = array_schema->domain()->domain(); + auto dim_num = this->dim_num(); uint64_t tile_start, tile_end; // Compute unique tile coords per dimension coords_set.resize(dim_num); - for (int i = 0; i < dim_num; ++i) { - for (uint64_t j = 0; j < ranges_[i].range_num(); ++j) { - auto r = (const T*)ranges_[i].get_range(j); - tile_start = (r[0] - domain[2 * i]) / tile_extents[i]; - tile_end = (r[1] - domain[2 * i]) / tile_extents[i]; + for (unsigned d = 0; d < dim_num; ++d) { + auto tile_extent = *(const T*)array_schema->domain()->tile_extent(d).data(); + for (uint64_t j = 0; j < ranges_[d].size(); ++j) { + auto dim_dom = (const T*)domain[d].data(); + auto r = (const T*)ranges_[d][j].data(); + tile_start = (r[0] - dim_dom[0]) / tile_extent; + tile_end = (r[1] - dim_dom[0]) / tile_extent; for (uint64_t t = tile_start; t <= tile_end; ++t) - coords_set[i].insert(t); + coords_set[d].insert(t); } } // Compute `tile_coords_` std::vector::iterator> iters; size_t tile_coords_num = 1; - for (int i = 0; i < dim_num; ++i) { - iters.push_back(coords_set[i].begin()); - tile_coords_num *= coords_set[i].size(); + for (unsigned d = 0; d < dim_num; ++d) { + iters.push_back(coords_set[d].begin()); + tile_coords_num *= coords_set[d].size(); } tile_coords_.resize(tile_coords_num); @@ -1091,12 +916,12 @@ void Subarray::compute_tile_coords_col() { size_t coord_size = sizeof(T); size_t tile_coords_pos = 0; while (iters[dim_num - 1] != coords_set[dim_num - 1].end()) { - for (int i = 0; i < dim_num; ++i) - std::memcpy(&(coords[i * coord_size]), &(*iters[i]), coord_size); + for (unsigned d = 0; d < dim_num; ++d) + std::memcpy(&(coords[d * coord_size]), &(*iters[d]), coord_size); tile_coords_[tile_coords_pos++] = coords; // Advance the iterators - auto d = 0; + unsigned d = 0; while (d < dim_num) { iters[d]++; if (iters[d] != coords_set[d].end()) @@ -1116,29 +941,30 @@ template void Subarray::compute_tile_coords_row() { std::vector> coords_set; auto array_schema = array_->array_schema(); - auto domain = (const T*)array_schema->domain()->domain(); + auto domain = array_schema->domain()->domain(); auto dim_num = this->dim_num(); - auto tile_extents = (const T*)array_schema->domain()->tile_extents(); uint64_t tile_start, tile_end; // Compute unique tile coords per dimension coords_set.resize(dim_num); - for (unsigned i = 0; i < dim_num; ++i) { - for (uint64_t j = 0; j < ranges_[i].range_num(); ++j) { - auto r = (const T*)ranges_[i].get_range(j); - tile_start = (r[0] - domain[2 * i]) / tile_extents[i]; - tile_end = (r[1] - domain[2 * i]) / tile_extents[i]; + for (unsigned d = 0; d < dim_num; ++d) { + auto tile_extent = *(const T*)array_schema->domain()->tile_extent(d).data(); + auto dim_dom = (const T*)domain[d].data(); + for (uint64_t j = 0; j < ranges_[d].size(); ++j) { + auto r = (const T*)ranges_[d][j].data(); + tile_start = (r[0] - dim_dom[0]) / tile_extent; + tile_end = (r[1] - dim_dom[0]) / tile_extent; for (uint64_t t = tile_start; t <= tile_end; ++t) - coords_set[i].insert(t); + coords_set[d].insert(t); } } // Compute `tile_coords_` std::vector::iterator> iters; size_t tile_coords_num = 1; - for (unsigned i = 0; i < dim_num; ++i) { - iters.push_back(coords_set[i].begin()); - tile_coords_num *= coords_set[i].size(); + for (unsigned d = 0; d < dim_num; ++d) { + iters.push_back(coords_set[d].begin()); + tile_coords_num *= coords_set[d].size(); } tile_coords_.resize(tile_coords_num); @@ -1148,8 +974,8 @@ void Subarray::compute_tile_coords_row() { size_t coord_size = sizeof(T); size_t tile_coords_pos = 0; while (iters[0] != coords_set[0].end()) { - for (unsigned i = 0; i < dim_num; ++i) - std::memcpy(&(coords[i * coord_size]), &(*iters[i]), coord_size); + for (unsigned d = 0; d < dim_num; ++d) + std::memcpy(&(coords[d * coord_size]), &(*iters[d]), coord_size); tile_coords_[tile_coords_pos++] = coords; // Advance the iterators @@ -1209,6 +1035,7 @@ Subarray Subarray::clone() const { clone.array_ = array_; clone.layout_ = layout_; clone.ranges_ = ranges_; + clone.is_default_ = is_default_; clone.range_offsets_ = range_offsets_; clone.tile_overlap_ = tile_overlap_; clone.est_result_size_computed_ = est_result_size_computed_; @@ -1264,15 +1091,16 @@ TileOverlap Subarray::get_tile_overlap(uint64_t range_idx, unsigned fid) const { template TileOverlap Subarray::get_tile_overlap(uint64_t range_idx, unsigned fid) const { TileOverlap ret; - auto range = this->range(range_idx); + auto ndrange = this->ndrange(range_idx); // Prepare a range copy auto dim_num = array_->array_schema()->dim_num(); std::vector range_cpy; range_cpy.resize(2 * dim_num); - for (unsigned i = 0; i < dim_num; ++i) { - range_cpy[2 * i] = range[i][0]; - range_cpy[2 * i + 1] = range[i][1]; + for (unsigned d = 0; d < dim_num; ++d) { + auto r = (const T*)ndrange[d].data(); + range_cpy[2 * d] = r[0]; + range_cpy[2 * d + 1] = r[1]; } // Get tile overlap from fragment @@ -1334,6 +1162,7 @@ void Subarray::swap(Subarray& subarray) { std::swap(array_, subarray.array_); std::swap(layout_, subarray.layout_); std::swap(ranges_, subarray.ranges_); + std::swap(is_default_, subarray.is_default_); std::swap(range_offsets_, subarray.range_offsets_); std::swap(tile_overlap_, subarray.tile_overlap_); std::swap(est_result_size_computed_, subarray.est_result_size_computed_); diff --git a/tiledb/sm/subarray/subarray.h b/tiledb/sm/subarray/subarray.h index 7bc82e45c2e9..545f46c4c4f3 100644 --- a/tiledb/sm/subarray/subarray.h +++ b/tiledb/sm/subarray/subarray.h @@ -124,70 +124,6 @@ class Subarray { uint64_t mem_size_var_; }; - /** - * Stores a set of 1D ranges. - */ - struct Ranges { - /** A buffer where all the ranges are appended to. */ - Buffer buffer_; - - /** - * ``true`` if it has the default entire-domain range - * that must be replaced the first time a new range - * is added. - */ - bool has_default_range_ = false; - - /** The size in bytes of a range. */ - uint64_t range_size_; - - /** The datatype of the ranges. */ - Datatype type_; - - /** Constructor. */ - explicit Ranges(Datatype type) - : type_(type) { - range_size_ = 2 * datatype_size(type_); - } - - /** Adds a range to the buffer. */ - void add_range(const void* range, bool is_default = false) { - if (is_default) { - buffer_.write(range, range_size_); - has_default_range_ = true; - } else { - if (has_default_range_) { - buffer_.clear(); - has_default_range_ = false; - } - buffer_.write(range, range_size_); - } - } - - /** Gets the range at the given index. */ - const void* get_range(uint64_t idx) const { - assert(idx < range_num()); - return buffer_.data(idx * range_size_); - } - - /** Gets the range start at the given index. */ - const void* get_range_start(uint64_t idx) const { - assert(idx < range_num()); - return buffer_.data(idx * range_size_); - } - - /** Gets the range end at the given index. */ - const void* get_range_end(uint64_t idx) const { - assert(idx < range_num()); - return buffer_.data(idx * range_size_ + range_size_ / 2); - } - - /** Return the number of ranges in this object. */ - uint64_t range_num() const { - return buffer_.size() / range_size_; - } - }; - /* ********************************* */ /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ @@ -195,6 +131,13 @@ class Subarray { /** Constructor. */ Subarray(); + /** + * Constructor. + * + * @param array The array the subarray is associated with. + */ + Subarray(const Array* array); + /** * Constructor. * @@ -230,47 +173,14 @@ class Subarray { /* API */ /* ********************************* */ - /** - * Adds a range along the dimension with the given index. - * - * @param dim_idx The index of the dimension to add the range to. - * @param range The range to be added in [low. high] format. - * @param check_expanded_domain If `true`, the subarray bounds will be - * checked against the expanded domain of the array. This is important - * in dense consolidation with space tiles not fully dividing the - * dimension domain. - */ - Status add_range( - uint32_t dim_idx, const void* range, bool check_expanded_domain = false); - - /** - * Adds a range along the dimension with the given index, in the - * form of (start, end). - */ - Status add_range(uint32_t dim_idx, const void* start, const void* end); + /** Adds a range along the dimension with the given index. */ + Status add_range(uint32_t dim_idx, const Range& range); /** - * Adds a range along the dimension with the given index. - * - * @tparam T The subarray domain type. - * @param dim_idx The index of the dimension to add the range to. - * @param range The range to add. - * @param check_expanded_domain If `true`, the subarray bounds will be - * checked against the expanded domain of the array. This is important - * in dense consolidation with space tiles not fully dividing the - * dimension domain. - * @return Status + * Adds a range along the dimension with the given index, without + * performing any error checks. */ - template - Status add_range( - uint32_t dim_idx, const T* range, bool check_expanded_domain); - - /** - * Adds a range along the dimension with the given index, in the - * form of (start, end). - */ - template - Status add_range(uint32_t dim_idx, const T* start, const T* end); + Status add_range_unsafe(uint32_t dim_idx, const Range& range); /** Returns the array the subarray is associated with. */ const Array* array() const; @@ -278,6 +188,12 @@ class Subarray { /** Clears the contents of the subarray. */ void clear(); + /** + * Returns true if the subarray is unary and it coincides with + * tile boundaries. + */ + bool coincides_with_tiles() const; + /** * Computes the range offsets which are important for getting * an ND range index from a flat serialized index. @@ -318,18 +234,28 @@ class Subarray { uint32_t dim_num() const; /** Returns the domain the subarray is constructed from. */ - const void* domain() const; + NDRange domain() const; /** ``True`` if the subarray does not contain any ranges. */ bool empty() const; - /** Retrieves a range of a given dimension at a given range index. */ + /** + * Retrieves a range of a given dimension at a given range index. + * + * @note Note that the retrieved range may be invalid if + * Subarray::set_range() is called after this function. In that case, + * make sure to make a copy in the caller function. + */ Status get_range( - uint32_t dim_idx, uint64_t range_idx, const void** range) const; + uint32_t dim_idx, uint64_t range_idx, const Range** range) const; /** * Retrieves a range of a given dimension at a given range index. * The range is in the form (start, end). + * + * @note Note that the retrieved range may be invalid if + * Subarray::set_range() is called after this function. In that case, + * make sure to make a copy in the caller function. */ Status get_range( uint32_t dim_idx, @@ -340,9 +266,19 @@ class Subarray { /** Retrieves the number of ranges on the given dimension index. */ Status get_range_num(uint32_t dim_idx, uint64_t* range_num) const; + /** + * + * @param dim_index + * @return returns true if the specified dimension is set to default subarray + */ + bool is_default(uint32_t dim_index) const; + /** Returns `true` if at least one dimension has non-default ranges set. */ bool is_set() const; + /** Returns `true` if the input dimension has non-default range set. */ + bool is_set(unsigned dim_idx) const; + /** * Returns ``true`` if the subarray is unary, which happens when it consists * of a single ND range **and** each 1D range is unary (i.e., consisting of @@ -404,9 +340,22 @@ class Subarray { */ Subarray get_subarray(uint64_t start, uint64_t end) const; + /** + * Set default indicator for dimension subarray. Used by serialization only + * @param dim_index + * @param is_default + */ + void set_is_default(uint32_t dim_index, bool is_default); + /** Sets the array layout. */ void set_layout(Layout layout); + /** + * Flattens the subarray ranges in a byte vector. Errors out + * if the subarray is not unary. + */ + Status to_byte_vec(std::vector* byte_vec) const; + /** Returns the subarray layout. */ Layout layout() const; @@ -416,25 +365,6 @@ class Subarray { /** The total number of multi-dimensional ranges in the subarray. */ uint64_t range_num() const; - /** - * Returns the multi-dimensional range with the input id, based on the - * order imposed on the the subarray ranges by the layout. If ``layout_`` - * is UNORDERED, then the range layout will be the same as the array's - * cell order, since this will lead to more beneficial tile access - * patterns upon a read query. - */ - template - std::vector range(uint64_t range_idx) const; - - /** - * Returns the multi-dimensional range with the input id, based on the - * order imposed on the the subarray ranges by the layout. If ``layout_`` - * is UNORDERED, then the range layout will be the same as the array's - * cell order, since this will lead to more beneficial tile access - * patterns upon a read query. - */ - std::vector range(uint64_t range_idx) const; - /** * Returns the multi-dimensional range with the input id, based on the * order imposed on the the subarray ranges by the layout. If ``layout_`` @@ -445,28 +375,28 @@ class Subarray { NDRange ndrange(uint64_t range_idx) const; /** - * Returns the `Ranges` for the given dimension index. + * Returns the `Range` vector for the given dimension index. * @note Intended for serialization only */ - const Ranges* ranges_for_dim(uint32_t dim_idx) const; + const std::vector& ranges_for_dim(uint32_t dim_idx) const; /** - * Directly sets the `Ranges` for the given dimension index, making a deep - * copy of the given `Ranges` instance. + * Directly sets the `Range` vector for the given dimension index, making + * a deep copy. * * @param dim_idx Index of dimension to set - * @param ranges Ranges instance that will be copied and set + * @param ranges `Range` vector that will be copied and set * @return Status * * @note Intended for serialization only */ - Status set_ranges_for_dim(uint32_t dim_idx, const Ranges& ranges); + Status set_ranges_for_dim(uint32_t dim_idx, const std::vector& ranges); /** * Splits the subarray along the splitting dimension and value into * two new subarrays `r1` and `r2`. */ - void split( + Status split( unsigned splitting_dim, const ByteVecValue& splitting_value, Subarray* r1, @@ -538,8 +468,14 @@ class Subarray { */ Layout layout_; - /** Stores a set of ranges per dimension. */ - std::vector ranges_; + /** Stores a vector of 1D ranges per dimension. */ + std::vector> ranges_; + + /** + * One value per dimension indicating whether the (single) range set in + * `ranges_` is the default range. + */ + std::vector is_default_; /** Important for computed an ND range index from a flat serialized index. */ std::vector range_offsets_; @@ -585,27 +521,6 @@ class Subarray { */ void add_default_ranges(); - /** Checks if the input range contains NaN. This is a noop for integers. */ - template < - typename T, - typename std::enable_if::value>::type* = nullptr> - Status check_nan(const T* range) const { - (void)range; - return Status::Ok(); - } - - /** Checks if the input range contains NaN. */ - template < - typename T, - typename std::enable_if::value>::type* = nullptr> - Status check_nan(const T* range) const { - // Check for NaN - if (std::isnan(range[0]) || std::isnan(range[1])) - return LOG_STATUS(Status::SubarrayError( - "Cannot add range to dimension; Range contains NaN")); - return Status::Ok(); - } - /** Computes the estimated result size for all attributes. */ Status compute_est_result_size(); diff --git a/tiledb/sm/subarray/subarray_partitioner.cc b/tiledb/sm/subarray/subarray_partitioner.cc index 059c45e1b6fc..89012c92c27d 100644 --- a/tiledb/sm/subarray/subarray_partitioner.cc +++ b/tiledb/sm/subarray/subarray_partitioner.cc @@ -536,7 +536,6 @@ void SubarrayPartitioner::compute_splitting_value_on_tiles( auto array_schema = subarray_.array()->array_schema(); auto dim_num = subarray_.array()->array_schema()->dim_num(); auto layout = subarray_.array()->array_schema()->tile_order(); - const void* r_v; *splitting_dim = UINT32_MAX; std::vector dims; @@ -549,15 +548,15 @@ void SubarrayPartitioner::compute_splitting_value_on_tiles( } // Compute splitting dimension and value + const Range* r; for (auto d : dims) { auto dim = array_schema->domain()->dimension(d); - auto r_size = 2 * dim->coord_size(); - range.get_range(d, 0, &r_v); - Range r(r_v, r_size); - auto tiles_apart = dim->tile_num(r) - 1; + range.get_range(d, 0, &r); + auto tiles_apart = dim->tile_num(*r) - 1; if (tiles_apart != 0) { *splitting_dim = d; - dim->ceil_to_tile(r, MAX(1, floor(tiles_apart / 2)) - 1, splitting_value); + dim->ceil_to_tile( + *r, MAX(1, floor(tiles_apart / 2)) - 1, splitting_value); *unsplittable = false; break; } @@ -593,7 +592,6 @@ void SubarrayPartitioner::compute_splitting_value_single_range( layout = (layout == Layout::UNORDERED || layout == Layout::GLOBAL_ORDER) ? cell_order : layout; - const void* r_v; *splitting_dim = UINT32_MAX; std::vector dims; @@ -606,15 +604,13 @@ void SubarrayPartitioner::compute_splitting_value_single_range( } // Compute splitting dimension and value - Range r; + const Range* r; for (auto d : dims) { auto dim = array_schema->dimension(d); - auto r_size = 2 * dim->coord_size(); - range.get_range(d, 0, &r_v); - r.set_range(r_v, r_size); - if (!r.unary()) { + range.get_range(d, 0, &r); + if (!r->unary()) { *splitting_dim = d; - dim->splitting_value(r, splitting_value, unsplittable); + dim->splitting_value(*r, splitting_value, unsplittable); // Splitting dim/value found if (!*unsplittable) @@ -647,7 +643,6 @@ void SubarrayPartitioner::compute_splitting_value_multi_range( auto dim_num = array_schema->dim_num(); auto cell_order = array_schema->cell_order(); layout = (layout == Layout::UNORDERED) ? cell_order : layout; - const void* r_v; *splitting_dim = UINT32_MAX; uint64_t range_num; @@ -661,7 +656,7 @@ void SubarrayPartitioner::compute_splitting_value_multi_range( } // Compute splitting dimension, range and value - Range r; + const Range* r; for (auto d : dims) { // Check if we need to split the multiple ranges partition.get_range_num(d, &range_num); @@ -674,13 +669,11 @@ void SubarrayPartitioner::compute_splitting_value_multi_range( } // Check if we need to split single range - partition.get_range(d, 0, &r_v); + partition.get_range(d, 0, &r); auto dim = array_schema->dimension(d); - auto r_size = 2 * dim->coord_size(); - r.set_range(r_v, r_size); - if (!r.unary()) { + if (!r->unary()) { *splitting_dim = d; - dim->splitting_value(r, splitting_value, unsplittable); + dim->splitting_value(*r, splitting_value, unsplittable); break; } } @@ -802,7 +795,7 @@ Status SubarrayPartitioner::split_top_single_range(bool* unsplittable) { // Split remaining range into two ranges Subarray r1, r2; - range.split(splitting_dim, splitting_value, &r1, &r2); + RETURN_NOT_OK(range.split(splitting_dim, splitting_value, &r1, &r2)); // Update list state_.single_range_.pop_front();