From 1ddc1c13c29429c7ba82603e11c7967c4324cacc Mon Sep 17 00:00:00 2001 From: Stavros Papadopoulos Date: Fri, 21 Feb 2020 13:05:03 -0500 Subject: [PATCH] Refactored Subarray and SubarrayPartitioner towards addressing #93 --- tiledb/sm/array_schema/dimension.cc | 257 ++++++++++++++++- tiledb/sm/array_schema/dimension.h | 70 ++++- tiledb/sm/array_schema/domain.cc | 273 +----------------- tiledb/sm/array_schema/domain.h | 97 +------ tiledb/sm/misc/types.h | 11 + tiledb/sm/query/reader.cc | 95 +++--- tiledb/sm/query/reader.h | 10 +- tiledb/sm/subarray/subarray.cc | 290 ++++++++----------- tiledb/sm/subarray/subarray.h | 59 ++-- tiledb/sm/subarray/subarray_partitioner.cc | 320 +++++++-------------- tiledb/sm/subarray/subarray_partitioner.h | 44 +-- 11 files changed, 657 insertions(+), 869 deletions(-) diff --git a/tiledb/sm/array_schema/dimension.cc b/tiledb/sm/array_schema/dimension.cc index 357176f0a5e..784a8a18cbf 100644 --- a/tiledb/sm/array_schema/dimension.cc +++ b/tiledb/sm/array_schema/dimension.cc @@ -51,6 +51,7 @@ Dimension::Dimension() { domain_ = nullptr; tile_extent_ = nullptr; type_ = Datatype::INT32; + set_ceil_to_tile_func(); set_compute_mbr_func(); set_crop_range_func(); set_domain_range_func(); @@ -61,6 +62,8 @@ Dimension::Dimension() { set_covered_func(); set_overlap_func(); set_overlap_ratio_func(); + set_split_range_func(); + set_splitting_value_func(); set_tile_num_func(); set_value_in_range_func(); } @@ -70,6 +73,7 @@ Dimension::Dimension(const std::string& name, Datatype type) , type_(type) { domain_ = nullptr; tile_extent_ = nullptr; + set_ceil_to_tile_func(); set_compute_mbr_func(); set_crop_range_func(); set_domain_range_func(); @@ -80,6 +84,8 @@ Dimension::Dimension(const std::string& name, Datatype type) set_covered_func(); set_overlap_func(); set_overlap_ratio_func(); + set_split_range_func(); + set_splitting_value_func(); set_tile_num_func(); set_value_in_range_func(); } @@ -218,6 +224,7 @@ Status Dimension::deserialize(ConstBuffer* buff, Datatype type) { RETURN_NOT_OK(buff->read(tile_extent_, datatype_size(type_))); } + set_ceil_to_tile_func(); set_compute_mbr_func(); set_crop_range_func(); set_domain_range_func(); @@ -228,6 +235,8 @@ Status Dimension::deserialize(ConstBuffer* buff, Datatype type) { set_covered_func(); set_overlap_func(); set_overlap_ratio_func(); + set_split_range_func(); + set_splitting_value_func(); set_tile_num_func(); set_value_in_range_func(); @@ -268,6 +277,34 @@ bool Dimension::is_anonymous() const { utils::parse::starts_with(name_, constants::default_dim_name); } +template +void Dimension::ceil_to_tile( + const Dimension* dim, const Range& r, uint64_t tile_num, ByteVecValue* v) { + assert(dim != nullptr); + assert(!r.empty()); + assert(v != nullptr); + assert(dim->tile_extent() != nullptr); + + auto tile_extent = *(const T*)dim->tile_extent(); + auto dim_dom = (const T*)dim->domain(); + v->resize(sizeof(T)); + auto r_t = (const T*)r.data(); + + T mid = r_t[0] + (tile_num + 1) * tile_extent; + uint64_t div = (mid - dim_dom[0]) / tile_extent; + auto floored_mid = (T)div * tile_extent + dim_dom[0]; + T sp = (std::numeric_limits::is_integer) ? + floored_mid - 1 : + std::nextafter(floored_mid, std::numeric_limits::lowest()); + std::memcpy(&(*v)[0], &sp, sizeof(T)); +} + +void Dimension::ceil_to_tile( + const Range& r, uint64_t tile_num, ByteVecValue* v) const { + assert(ceil_to_tile_func_ != nullptr); + ceil_to_tile_func_(this, r, tile_num, v); +} + template void Dimension::compute_mbr(const Tile& tile, Range* mbr) { assert(mbr != nullptr); @@ -309,13 +346,14 @@ template uint64_t Dimension::domain_range(const Range& range) { assert(!range.empty()); - if (&typeid(T) == &typeid(float) || &typeid(T) == &typeid(double)) - return 0; + // Inapplicable to real domains + if (!std::is_integral::value) + return std::numeric_limits::max(); auto r = (const T*)range.data(); uint64_t ret = r[1] - r[0]; if (ret == std::numeric_limits::max()) // overflow - return 0; + return ret; ++ret; return ret; @@ -474,6 +512,54 @@ double Dimension::overlap_ratio(const Range& r1, const Range& r2) const { return overlap_ratio_func_(r1, r2); } +template +void Dimension::split_range( + const void* r, const ByteVecValue& v, Range* r1, Range* r2) { + assert(r != nullptr); + assert(!v.empty()); + assert(r1 != nullptr); + assert(r2 != nullptr); + + auto max = std::numeric_limits::max(); + bool int_domain = std::numeric_limits::is_integer; + auto r_t = (const T*)r; + auto v_t = *(const T*)(&v[0]); + + T ret[2]; + ret[0] = r_t[0]; + ret[1] = v_t; + r1->set_range(ret, sizeof(ret)); + ret[0] = (int_domain) ? (v_t + 1) : std::nextafter(v_t, max); + ret[1] = r_t[1]; + r2->set_range(ret, sizeof(ret)); +} + +void Dimension::split_range( + const void* r, const ByteVecValue& v, Range* r1, Range* r2) const { + assert(split_range_func_ != nullptr); + split_range_func_(r, v, r1, r2); +} + +template +void Dimension::splitting_value( + const Range& r, ByteVecValue* v, bool* unsplittable) { + assert(!r.empty()); + assert(v != nullptr); + assert(unsplittable != nullptr); + + auto r_t = (const T*)r.data(); + T sp = r_t[0] + (r_t[1] - r_t[0]) / 2; + v->resize(sizeof(T)); + std::memcpy(&(*v)[0], &sp, sizeof(T)); + *unsplittable = !std::memcmp(&sp, &r_t[1], sizeof(T)); +} + +void Dimension::splitting_value( + const Range& r, ByteVecValue* v, bool* unsplittable) const { + assert(splitting_value_func_ != nullptr); + splitting_value_func_(r, v, unsplittable); +} + template uint64_t Dimension::tile_num(const Dimension* dim, const Range& range) { assert(dim != nullptr); @@ -482,14 +568,12 @@ uint64_t Dimension::tile_num(const Dimension* dim, const Range& range) { // Trivial cases if (dim->tile_extent() == nullptr) return 1; - if (!std::is_integral::value) - return 0; auto tile_extent = *(const T*)dim->tile_extent(); auto dim_dom = (const T*)dim->domain(); auto r = (const T*)range.data(); - uint64_t start = (r[0] - dim_dom[0]) / tile_extent; - uint64_t end = (r[1] - dim_dom[0]) / tile_extent; + uint64_t start = floor((r[0] - dim_dom[0]) / tile_extent); + uint64_t end = floor((r[1] - dim_dom[0]) / tile_extent); return end - start + 1; } @@ -948,6 +1032,59 @@ void Dimension::set_domain_range_func() { } } +void Dimension::set_ceil_to_tile_func() { + switch (type_) { + case Datatype::INT32: + ceil_to_tile_func_ = ceil_to_tile; + break; + case Datatype::INT64: + ceil_to_tile_func_ = ceil_to_tile; + break; + case Datatype::INT8: + ceil_to_tile_func_ = ceil_to_tile; + break; + case Datatype::UINT8: + ceil_to_tile_func_ = ceil_to_tile; + break; + case Datatype::INT16: + ceil_to_tile_func_ = ceil_to_tile; + break; + case Datatype::UINT16: + ceil_to_tile_func_ = ceil_to_tile; + break; + case Datatype::UINT32: + ceil_to_tile_func_ = ceil_to_tile; + break; + case Datatype::UINT64: + ceil_to_tile_func_ = ceil_to_tile; + break; + case Datatype::FLOAT32: + ceil_to_tile_func_ = ceil_to_tile; + break; + case Datatype::FLOAT64: + ceil_to_tile_func_ = ceil_to_tile; + break; + case Datatype::DATETIME_YEAR: + case Datatype::DATETIME_MONTH: + case Datatype::DATETIME_WEEK: + case Datatype::DATETIME_DAY: + case Datatype::DATETIME_HR: + case Datatype::DATETIME_MIN: + case Datatype::DATETIME_SEC: + case Datatype::DATETIME_MS: + case Datatype::DATETIME_US: + case Datatype::DATETIME_NS: + case Datatype::DATETIME_PS: + case Datatype::DATETIME_FS: + case Datatype::DATETIME_AS: + ceil_to_tile_func_ = ceil_to_tile; + break; + default: + ceil_to_tile_func_ = nullptr; + break; + } +} + void Dimension::set_compute_mbr_func() { switch (type_) { case Datatype::INT32: @@ -1372,6 +1509,112 @@ void Dimension::set_overlap_ratio_func() { } } +void Dimension::set_split_range_func() { + switch (type_) { + case Datatype::INT32: + split_range_func_ = split_range; + break; + case Datatype::INT64: + split_range_func_ = split_range; + break; + case Datatype::INT8: + split_range_func_ = split_range; + break; + case Datatype::UINT8: + split_range_func_ = split_range; + break; + case Datatype::INT16: + split_range_func_ = split_range; + break; + case Datatype::UINT16: + split_range_func_ = split_range; + break; + case Datatype::UINT32: + split_range_func_ = split_range; + break; + case Datatype::UINT64: + split_range_func_ = split_range; + break; + case Datatype::FLOAT32: + split_range_func_ = split_range; + break; + case Datatype::FLOAT64: + split_range_func_ = split_range; + break; + case Datatype::DATETIME_YEAR: + case Datatype::DATETIME_MONTH: + case Datatype::DATETIME_WEEK: + case Datatype::DATETIME_DAY: + case Datatype::DATETIME_HR: + case Datatype::DATETIME_MIN: + case Datatype::DATETIME_SEC: + case Datatype::DATETIME_MS: + case Datatype::DATETIME_US: + case Datatype::DATETIME_NS: + case Datatype::DATETIME_PS: + case Datatype::DATETIME_FS: + case Datatype::DATETIME_AS: + split_range_func_ = split_range; + break; + default: + split_range_func_ = nullptr; + break; + } +} + +void Dimension::set_splitting_value_func() { + switch (type_) { + case Datatype::INT32: + splitting_value_func_ = splitting_value; + break; + case Datatype::INT64: + splitting_value_func_ = splitting_value; + break; + case Datatype::INT8: + splitting_value_func_ = splitting_value; + break; + case Datatype::UINT8: + splitting_value_func_ = splitting_value; + break; + case Datatype::INT16: + splitting_value_func_ = splitting_value; + break; + case Datatype::UINT16: + splitting_value_func_ = splitting_value; + break; + case Datatype::UINT32: + splitting_value_func_ = splitting_value; + break; + case Datatype::UINT64: + splitting_value_func_ = splitting_value; + break; + case Datatype::FLOAT32: + splitting_value_func_ = splitting_value; + break; + case Datatype::FLOAT64: + splitting_value_func_ = splitting_value; + break; + case Datatype::DATETIME_YEAR: + case Datatype::DATETIME_MONTH: + case Datatype::DATETIME_WEEK: + case Datatype::DATETIME_DAY: + case Datatype::DATETIME_HR: + case Datatype::DATETIME_MIN: + case Datatype::DATETIME_SEC: + case Datatype::DATETIME_MS: + case Datatype::DATETIME_US: + case Datatype::DATETIME_NS: + case Datatype::DATETIME_PS: + case Datatype::DATETIME_FS: + case Datatype::DATETIME_AS: + splitting_value_func_ = splitting_value; + break; + default: + splitting_value_func_ = nullptr; + break; + } +} + void Dimension::set_tile_num_func() { switch (type_) { case Datatype::INT32: diff --git a/tiledb/sm/array_schema/dimension.h b/tiledb/sm/array_schema/dimension.h index c5cb20d9568..3c2d93b9391 100644 --- a/tiledb/sm/array_schema/dimension.h +++ b/tiledb/sm/array_schema/dimension.h @@ -115,6 +115,20 @@ class Dimension { /** Returns true if this is an anonymous (unlabled) dimension **/ bool is_anonymous() const; + /** + * Retrieves the value `v` that lies at the end (ceil) of the tile + * that is `tile_num` tiles apart from the beginning of `r`. + */ + void ceil_to_tile(const Range& r, uint64_t tile_num, ByteVecValue* v) const; + + /** + * Returns the value that lies at the end (ceil) of the tile + * that is `tile_num` tiles apart from the beginning of `r`. + */ + template + static void ceil_to_tile( + const Dimension* dim, const Range& r, uint64_t tile_num, ByteVecValue* v); + /** * Computed the minimum bounding range of the values stored in * `tile`. @@ -150,7 +164,7 @@ class Dimension { /** * Returns the domain range (high - low + 1) of the input - * 1D range. It returns 0 in case the dimension datatype + * 1D range. It returns MAX_UINT64 in case the dimension datatype * is not integer or if there is an overflow. */ template @@ -231,6 +245,30 @@ class Dimension { template static double overlap_ratio(const Range& r1, const Range& r2); + /** Splits `r` at point `v`, producing 1D ranges `r1` and `r2`. */ + void split_range( + const void* r, const ByteVecValue& v, Range* r1, Range* r2) const; + + /** Splits `r` at point `v`, producing 1D ranges `r1` and `r2`. */ + template + static void split_range( + const void* r, const ByteVecValue& v, Range* r1, Range* r2); + + /** + * Computes the splitting point `v` of `r`, and sets `unsplittable` + * to true if `r` cannot be split. + */ + void splitting_value( + const Range& r, ByteVecValue* v, bool* unsplittable) const; + + /** + * Computes the splitting point `v` of `r`, and sets `unsplittable` + * to true if `r` cannot be split. + */ + template + static void splitting_value( + const Range& r, ByteVecValue* v, bool* unsplittable); + /** Return the number of tiles the input range intersects. */ uint64_t tile_num(const Range& range) const; @@ -300,6 +338,13 @@ class Dimension { /** The dimension type. */ Datatype type_; + /** + * Stores the appropriate templated ceil_to_tile() function based on the + * dimension datatype. + */ + std::function + ceil_to_tile_func_; + /** * Stores the appropriate templated compute_mbr() function based on the * dimension datatype. @@ -361,6 +406,20 @@ class Dimension { */ std::function overlap_ratio_func_; + /** + * Stores the appropriate templated split_range() function based on the + * dimension datatype. + */ + std::function + split_range_func_; + + /** + * Stores the appropriate templated splitting_value() function based on the + * dimension datatype. + */ + std::function + splitting_value_func_; + /** * Stores the appropriate templated tile_num() function based on the * dimension datatype. @@ -454,6 +513,9 @@ class Dimension { template Status check_tile_extent() const; + /** Sets the templated ceil_to_tile() function. */ + void set_ceil_to_tile_func(); + /** Sets the templated compute_mbr() function. */ void set_compute_mbr_func(); @@ -484,6 +546,12 @@ class Dimension { /** Sets the templated overlap_ratio() function. */ void set_overlap_ratio_func(); + /** Sets the templated split_range() function. */ + void set_split_range_func(); + + /** Sets the templated splitting_value() function. */ + void set_splitting_value_func(); + /** Sets the templated tile_num() function. */ void set_tile_num_func(); diff --git a/tiledb/sm/array_schema/domain.cc b/tiledb/sm/array_schema/domain.cc index c30b0aa512a..a3115432bb0 100644 --- a/tiledb/sm/array_schema/domain.cc +++ b/tiledb/sm/array_schema/domain.cc @@ -131,249 +131,10 @@ Layout Domain::cell_order() const { return cell_order_; } -template -T Domain::floor_to_tile(T value, unsigned dim_idx) const { - auto domain = (T*)domain_; - auto tile_extents = (T*)tile_extents_; - - if (tile_extents_ == nullptr) - return domain[2 * dim_idx]; - - uint64_t div = (value - domain[2 * dim_idx]) / tile_extents[dim_idx]; - return (T)div * tile_extents[dim_idx] + domain[2 * dim_idx]; -} - Layout Domain::tile_order() const { return tile_order_; } -Status Domain::split_subarray( - void* subarray, Layout layout, void** subarray_1, void** subarray_2) const { - switch (type_) { - case Datatype::INT8: - return split_subarray(subarray, layout, subarray_1, subarray_2); - case Datatype::UINT8: - return split_subarray(subarray, layout, subarray_1, subarray_2); - case Datatype::INT16: - return split_subarray(subarray, layout, subarray_1, subarray_2); - case Datatype::UINT16: - return split_subarray(subarray, layout, subarray_1, subarray_2); - case Datatype::INT32: - return split_subarray(subarray, layout, subarray_1, subarray_2); - case Datatype::UINT32: - return split_subarray(subarray, layout, subarray_1, subarray_2); - case Datatype::INT64: - return split_subarray(subarray, layout, subarray_1, subarray_2); - case Datatype::UINT64: - return split_subarray(subarray, layout, subarray_1, subarray_2); - case Datatype::FLOAT32: - return split_subarray(subarray, layout, subarray_1, subarray_2); - case Datatype::FLOAT64: - return split_subarray(subarray, layout, subarray_1, subarray_2); - case Datatype::DATETIME_YEAR: - case Datatype::DATETIME_MONTH: - case Datatype::DATETIME_WEEK: - case Datatype::DATETIME_DAY: - case Datatype::DATETIME_HR: - case Datatype::DATETIME_MIN: - case Datatype::DATETIME_SEC: - case Datatype::DATETIME_MS: - case Datatype::DATETIME_US: - case Datatype::DATETIME_NS: - case Datatype::DATETIME_PS: - case Datatype::DATETIME_FS: - case Datatype::DATETIME_AS: - return split_subarray(subarray, layout, subarray_1, subarray_2); - default: - return LOG_STATUS(Status::DomainError( - "Cannot split subarray; Unsupported domain type")); - } - - return Status::Ok(); -} - -template -Status Domain::split_subarray( - void* subarray, Layout layout, void** subarray_1, void** subarray_2) const { - switch (layout) { - case Layout::GLOBAL_ORDER: - return split_subarray_global(subarray, subarray_1, subarray_2); - case Layout::ROW_MAJOR: - case Layout::COL_MAJOR: - return split_subarray_cell(subarray, layout, subarray_1, subarray_2); - default: - return LOG_STATUS( - Status::DomainError("Cannot split subarray; Unsupported layout")); - } - - return Status::Ok(); -} - -template -Status Domain::split_subarray_global( - void* subarray, void** subarray_1, void** subarray_2) const { - // Find dimension to split by tile - auto s = (T*)subarray; - int dim_to_split = -1; - auto tile_extents = (T*)tile_extents_; - auto domain = (T*)domain_; - uint64_t tiles_apart = 0; - - if (tile_extents != nullptr) { - if (tile_order_ == Layout::ROW_MAJOR) { - for (int i = 0; i < (int)dim_num_; ++i) { - tiles_apart = - (T)floor(((s[2 * i + 1] - domain[2 * i]) / tile_extents[i])) - - (T)floor(((s[2 * i] - domain[2 * i]) / tile_extents[i])); - if (tiles_apart != 0) { - // Not in the same tile - can split - dim_to_split = i; - break; - } - } - } else { - for (int i = (int)dim_num_ - 1;; --i) { - tiles_apart = - (T)floor(((s[2 * i + 1] - domain[2 * i]) / tile_extents[i])) - - (T)floor(((s[2 * i] - domain[2 * i]) / tile_extents[i])); - if (tiles_apart != 0) { - // Not in the same tile - can split - dim_to_split = i; - break; - } - if (i == 0) - break; - } - } - } - - // Cannot split by tile, split by cell - if (dim_to_split == -1) - return split_subarray_cell( - subarray, cell_order_, subarray_1, subarray_2); - - // Split by tile - *subarray_1 = std::malloc(2 * dim_num_ * sizeof(T)); - if (*subarray_1 == nullptr) - return LOG_STATUS( - Status::DomainError("Cannot split subarray; Memory allocation failed")); - *subarray_2 = std::malloc(2 * dim_num_ * sizeof(T)); - if (*subarray_2 == nullptr) { - std::free(subarray_1); - *subarray_1 = nullptr; - return LOG_STATUS( - Status::DomainError("Cannot split subarray; Memory allocation failed")); - } - auto s1 = (T*)(*subarray_1); - auto s2 = (T*)(*subarray_2); - - for (int i = 0; i < (int)dim_num_; ++i) { - if (i != dim_to_split) { - s1[2 * i] = s[2 * i]; - s1[2 * i + 1] = s[2 * i + 1]; - s2[2 * i] = s[2 * i]; - s2[2 * i + 1] = s[2 * i + 1]; - } else { - s1[2 * i] = s[2 * i]; - s1[2 * i + 1] = - s1[2 * i] + std::max(1, floor(tiles_apart / 2)) * tile_extents[i]; - - if (std::numeric_limits::is_integer) { - s1[2 * i + 1] = floor_to_tile(s1[2 * i + 1], i) - 1; - s2[2 * i] = s1[2 * i + 1] + 1; - } else { - s2[2 * i] = floor_to_tile(s1[2 * i + 1], i); - s1[2 * i + 1] = - std::nextafter(s2[2 * i], std::numeric_limits::lowest()); - } - s2[2 * i + 1] = s[2 * i + 1]; - - assert(s1[2 * i + 1] >= s1[2 * i]); - assert(s2[2 * i + 1] >= s2[2 * i]); - } - } - - return Status::Ok(); -} - -template -Status Domain::split_subarray_cell( - void* subarray, - Layout cell_layout, - void** subarray_1, - void** subarray_2) const { - // Find dimension to split - auto s = (T*)subarray; - int dim_to_split = -1; - - if (cell_layout == Layout::ROW_MAJOR) { - for (int i = 0; i < (int)dim_num_; ++i) { - if (s[2 * i] != s[2 * i + 1]) { - dim_to_split = i; - break; - } - } - } else { - for (int i = (int)dim_num_ - 1;; --i) { - if (s[2 * i] != s[2 * i + 1]) { - dim_to_split = i; - break; - } - if (i == 0) - break; - } - } - - // Cannot split - if (dim_to_split == -1) { - *subarray_1 = nullptr; - *subarray_2 = nullptr; - return Status::Ok(); - } - - // Split - *subarray_1 = std::malloc(2 * dim_num_ * sizeof(T)); - if (*subarray_1 == nullptr) - return LOG_STATUS( - Status::DomainError("Cannot split subarray; Memory allocation failed")); - *subarray_2 = std::malloc(2 * dim_num_ * sizeof(T)); - if (*subarray_2 == nullptr) { - std::free(subarray_1); - *subarray_1 = nullptr; - return LOG_STATUS( - Status::DomainError("Cannot split subarray; Memory allocation failed")); - } - auto s1 = (T*)(*subarray_1); - auto s2 = (T*)(*subarray_2); - for (int i = 0; i < (int)dim_num_; ++i) { - if (i != dim_to_split) { - s1[2 * i] = s[2 * i]; - s1[2 * i + 1] = s[2 * i + 1]; - s2[2 * i] = s[2 * i]; - s2[2 * i + 1] = s[2 * i + 1]; - } else { - s1[2 * i] = s[2 * i]; - if (std::numeric_limits::is_integer) { // Integers - s1[2 * i + 1] = s[2 * i] + (s[2 * i + 1] - s[2 * i]) / 2; - s2[2 * i] = s1[2 * i + 1] + 1; - } else { // Reals - if (std::nextafter(s[2 * i], std::numeric_limits::max()) == - s[2 * i + 1]) { - s1[2 * i + 1] = s[2 * i]; - s2[2 * i] = s[2 * i + 1]; - } else { - s1[2 * i + 1] = s[2 * i] + (s[2 * i + 1] - s[2 * i]) / 2; - s2[2 * i] = - std::nextafter(s1[2 * i + 1], std::numeric_limits::max()); - } - } - s2[2 * i + 1] = s[2 * i + 1]; - } - } - - return Status::Ok(); -} - Status Domain::add_dimension(const Dimension* dim) { // Set domain type and do sanity check if (dim_num_ == 0) @@ -888,16 +649,15 @@ uint64_t Domain::tile_num(const NDRange& ndrange) const { uint64_t Domain::cell_num(const NDRange& ndrange) const { assert(!ndrange.empty()); - uint64_t cell_num = 1, range, prod; + uint64_t cell_num = 1, range; for (unsigned d = 0; d < dim_num_; ++d) { range = dimensions_[d]->domain_range(ndrange[d]); - if (range == 0) // Real dimension domain or overflow - return 0; + if (range == std::numeric_limits::max()) // Overflow + return range; - prod = range * cell_num; - if (prod / range != cell_num) // Overflow - return 0; - cell_num = prod; + cell_num = utils::math::safe_mul(range, cell_num); + if (cell_num == std::numeric_limits::max()) // Overflow + return cell_num; } return cell_num; @@ -2000,27 +1760,6 @@ template uint64_t Domain::get_cell_pos_row( template uint64_t Domain::get_cell_pos_row( const uint64_t* subarray, const uint64_t* coords) const; -template int8_t Domain::floor_to_tile( - int8_t value, unsigned dim_idx) const; -template uint8_t Domain::floor_to_tile( - uint8_t value, unsigned dim_idx) const; -template int16_t Domain::floor_to_tile( - int16_t value, unsigned dim_idx) const; -template uint16_t Domain::floor_to_tile( - uint16_t value, unsigned dim_idx) const; -template int32_t Domain::floor_to_tile( - int32_t value, unsigned dim_idx) const; -template uint32_t Domain::floor_to_tile( - uint32_t value, unsigned dim_idx) const; -template int64_t Domain::floor_to_tile( - int64_t value, unsigned dim_idx) const; -template uint64_t Domain::floor_to_tile( - uint64_t value, unsigned dim_idx) const; -template float Domain::floor_to_tile( - float value, unsigned dim_idx) const; -template double Domain::floor_to_tile( - double value, unsigned dim_idx) const; - template uint64_t Domain::stride(Layout subarray_layout) const; template uint64_t Domain::stride(Layout subarray_layout) const; template uint64_t Domain::stride(Layout subarray_layout) const; diff --git a/tiledb/sm/array_schema/domain.h b/tiledb/sm/array_schema/domain.h index ff60cab625e..c346756e6cd 100644 --- a/tiledb/sm/array_schema/domain.h +++ b/tiledb/sm/array_schema/domain.h @@ -79,99 +79,6 @@ class Domain { /* API */ /* ********************************* */ - /** - * Floors the value such that it coincides with the largest start of a tile - * that is smaller than value, on a given dimension. If there are no tile - * extents, then the returned value is the start of the domain on the input - * dimension. - * - * @tparam T The domain type. - * @param value The value to be floored. - * @param dim_idx The targeted dimension. - * @return The floored value. - */ - template - T floor_to_tile(T value, unsigned dim_idx) const; - - /** - * Splits the input subarray in half, in a way that the input layout is - * respected. This means that if the two resulting subarrays were to - * be issued as consecutive queries with the input layout, the retrieved - * result would be correct (i.e., the resulting cells would respect the - * input layout). - * - * @param subarray The input subarray. - * @param layout The query layout. - * @param subarray_1 The first subarray resulting from the split. - * @param subarray_2 The second subarray resulting from the split. - * @return Status - */ - Status split_subarray( - void* subarray, - Layout layout, - void** subarray_1, - void** subarray_2) const; - - /** - * Splits the input subarray in half, in a way that the input layout is - * respected. This means that if the two resulting subarrays were to - * be issued as consecutive queries with the input layout, the retrieved - * result would be correct (i.e., the resulting cells would respect the - * input layout). - * - * @tparam T The domain type. - * @param subarray The input subarray. - * @param layout The query layout. - * @param subarray_1 The first subarray resulting from the split. - * @param subarray_2 The second subarray resulting from the split. - * @return Status - */ - template - Status split_subarray( - void* subarray, - Layout layout, - void** subarray_1, - void** subarray_2) const; - - /** - * Splits the input subarray in half, in a way that the global layout is - * respected. This means that if the two resulting subarrays were to - * be issued as consecutive queries with the input layout, the retrieved - * result would be correct (i.e., the resulting cells would respect the - * global layout). - * - * @tparam T The domain type. - * @param subarray The input subarray. - * @param layout The query layout. - * @param subarray_1 The first subarray resulting from the split. - * @param subarray_2 The second subarray resulting from the split. - * @return Status - */ - template - Status split_subarray_global( - void* subarray, void** subarray_1, void** subarray_2) const; - - /** - * Splits the input subarray in half, in a way that the input cell layout is - * respected. This means that if the two resulting subarrays were to - * be issued as consecutive queries with the cell layout, the retrieved - * result would be correct (i.e., the resulting cells would respect the - * input layout). - * - * @tparam T The domain type. - * @param subarray The input subarray. - * @param cell_layout The cell layout. - * @param subarray_1 The first subarray resulting from the split. - * @param subarray_2 The second subarray resulting from the split. - * @return Status - */ - template - Status split_subarray_cell( - void* subarray, - Layout cell_layout, - void** subarray_1, - void** subarray_2) const; - /** * Adds a dimension to the domain. * @@ -540,9 +447,9 @@ class Domain { /** * Returns the number of cells in the input range. - * If there is an overflow, then the function returns 0. + * If there is an overflow, then the function returns MAX_UINT64. * If at least one dimension had a non-integer domain, the - * functuon returns 0. + * functuon returns MAX_UINT64. */ uint64_t cell_num(const NDRange& ndrange) const; diff --git a/tiledb/sm/misc/types.h b/tiledb/sm/misc/types.h index 19ec073330c..4c79cd897de 100644 --- a/tiledb/sm/misc/types.h +++ b/tiledb/sm/misc/types.h @@ -33,6 +33,7 @@ #ifndef TILEDB_TYPES_H #define TILEDB_TYPES_H +#include #include #include @@ -101,6 +102,13 @@ class Range { return range_ == r.range_; } + /** Returns true if the range start is the same as its end. */ + bool unary() const { + assert(!range_.empty()); + return !std::memcmp( + &range_[0], &range_[range_.size() / 2], range_.size() / 2); + } + private: /** The range as a flat byte vector.*/ std::vector range_; @@ -109,6 +117,9 @@ class Range { /** An N-dimensional range, consisting of a vector of 1D ranges. */ typedef std::vector NDRange; +/** A value as a vector of bytes. */ +typedef std::vector ByteVecValue; + /** Contains the buffer(s) and buffer size(s) for some attribute / dimension. */ struct QueryBuffer { /** diff --git a/tiledb/sm/query/reader.cc b/tiledb/sm/query/reader.cc index 52305944c26..775981f3eb5 100644 --- a/tiledb/sm/query/reader.cc +++ b/tiledb/sm/query/reader.cc @@ -260,52 +260,6 @@ Reader::ReadState* Reader::read_state() { return &read_state_; } -Status Reader::read() { - auto coords_type = array_schema_->coords_type(); - switch (coords_type) { - case Datatype::INT8: - return read(); - case Datatype::UINT8: - return read(); - case Datatype::INT16: - return read(); - case Datatype::UINT16: - return read(); - case Datatype::INT32: - return read(); - case Datatype::UINT32: - return read(); - case Datatype::INT64: - return read(); - case Datatype::UINT64: - return read(); - case Datatype::FLOAT32: - return read(); - case Datatype::FLOAT64: - return read(); - case Datatype::DATETIME_YEAR: - case Datatype::DATETIME_MONTH: - case Datatype::DATETIME_WEEK: - case Datatype::DATETIME_DAY: - case Datatype::DATETIME_HR: - case Datatype::DATETIME_MIN: - case Datatype::DATETIME_SEC: - case Datatype::DATETIME_MS: - case Datatype::DATETIME_US: - case Datatype::DATETIME_NS: - case Datatype::DATETIME_PS: - case Datatype::DATETIME_FS: - case Datatype::DATETIME_AS: - return read(); - default: - return LOG_STATUS( - Status::ReaderError("Cannot read; Unsupported domain type")); - } - - return Status::Ok(); -} - -template Status Reader::read() { STATS_FUNC_IN(reader_read); @@ -326,7 +280,7 @@ Status Reader::read() { // Perform read if (array_schema_->dense() && !sparse_mode_) { - RETURN_NOT_OK(dense_read()); + RETURN_NOT_OK(dense_read()); } else { RETURN_NOT_OK(sparse_read()); } @@ -335,7 +289,7 @@ Status Reader::read() { // without advancing to the next partition if (read_state_.overflowed_) { zero_out_buffer_sizes(); - RETURN_NOT_OK(read_state_.split_current()); + RETURN_NOT_OK(read_state_.split_current()); if (read_state_.unsplittable_) return Status::Ok(); @@ -1342,6 +1296,51 @@ Status Reader::dedup_result_coords( STATS_FUNC_OUT(reader_dedup_coords); } +Status Reader::dense_read() { + auto coords_type = array_schema_->coords_type(); + switch (coords_type) { + case Datatype::INT8: + return dense_read(); + case Datatype::UINT8: + return dense_read(); + case Datatype::INT16: + return dense_read(); + case Datatype::UINT16: + return dense_read(); + case Datatype::INT32: + return dense_read(); + case Datatype::UINT32: + return dense_read(); + case Datatype::INT64: + return dense_read(); + case Datatype::UINT64: + return dense_read(); + case Datatype::FLOAT32: + return dense_read(); + case Datatype::FLOAT64: + return dense_read(); + case Datatype::DATETIME_YEAR: + case Datatype::DATETIME_MONTH: + case Datatype::DATETIME_WEEK: + case Datatype::DATETIME_DAY: + case Datatype::DATETIME_HR: + case Datatype::DATETIME_MIN: + case Datatype::DATETIME_SEC: + case Datatype::DATETIME_MS: + case Datatype::DATETIME_US: + case Datatype::DATETIME_NS: + case Datatype::DATETIME_PS: + case Datatype::DATETIME_FS: + case Datatype::DATETIME_AS: + return dense_read(); + default: + return LOG_STATUS(Status::ReaderError( + "Cannot read dense array; Unsupported domain type")); + } + + return Status::Ok(); +} + template Status Reader::dense_read() { STATS_FUNC_IN(reader_dense_read); diff --git a/tiledb/sm/query/reader.h b/tiledb/sm/query/reader.h index e7f2f0ba723..c33d9912cfb 100644 --- a/tiledb/sm/query/reader.h +++ b/tiledb/sm/query/reader.h @@ -97,9 +97,8 @@ class Reader { * by the reader when the current partition was estimated to fit * the results, but that was not eventually true. */ - template Status split_current() { - return partitioner_.split_current(&unsplittable_); + return partitioner_.split_current(&unsplittable_); } }; @@ -241,10 +240,6 @@ class Reader { /** Performs a read query using its set members. */ Status read(); - /** Performs a read query (applicable when setting a Subarray). */ - template - Status read(); - /** Sets the array. */ void set_array(const Array* array); @@ -752,6 +747,9 @@ class Reader { */ Status dedup_result_coords(std::vector* result_coords) const; + /** Performs a read on a dense array. */ + Status dense_read(); + /** * Performs a read on a dense array. * diff --git a/tiledb/sm/subarray/subarray.cc b/tiledb/sm/subarray/subarray.cc index cbc87d7a0e4..faf3a5db33b 100644 --- a/tiledb/sm/subarray/subarray.cc +++ b/tiledb/sm/subarray/subarray.cc @@ -239,33 +239,6 @@ const Array* Subarray::array() const { return array_; } -template -uint64_t Subarray::cell_num(uint64_t range_idx) const { - // Special case if it unary - if (is_unary(range_idx)) - return 1; - - // Inapplicable to non-unary real ranges - if (!std::is_integral::value) - return UINT64_MAX; - - uint64_t ret = 1, length; - auto range = this->range(range_idx); - - for (const auto& r : range) { - // The code below essentially computes - // ret *= r[1] - r[0] + 1; - // while performing overflow checks - length = r[1] - r[0]; - if (length == UINT64_MAX) // overflow - return UINT64_MAX; - ++length; - ret = utils::math::safe_mul(length, ret); - } - - return ret; -} - void Subarray::clear() { ranges_.clear(); range_offsets_.clear(); @@ -274,51 +247,6 @@ void Subarray::clear() { tile_overlap_computed_ = false; } -Status Subarray::compute_tile_overlap() { - auto type = array_->array_schema()->domain()->type(); - switch (type) { - case Datatype::INT8: - return compute_tile_overlap(); - case Datatype::UINT8: - return compute_tile_overlap(); - case Datatype::INT16: - return compute_tile_overlap(); - case Datatype::UINT16: - return compute_tile_overlap(); - case Datatype::INT32: - return compute_tile_overlap(); - case Datatype::UINT32: - return compute_tile_overlap(); - case Datatype::INT64: - return compute_tile_overlap(); - case Datatype::UINT64: - return compute_tile_overlap(); - case Datatype::FLOAT32: - return compute_tile_overlap(); - case Datatype::FLOAT64: - return compute_tile_overlap(); - case Datatype::DATETIME_YEAR: - case Datatype::DATETIME_MONTH: - case Datatype::DATETIME_WEEK: - case Datatype::DATETIME_DAY: - case Datatype::DATETIME_HR: - case Datatype::DATETIME_MIN: - case Datatype::DATETIME_SEC: - case Datatype::DATETIME_MS: - case Datatype::DATETIME_US: - case Datatype::DATETIME_NS: - case Datatype::DATETIME_PS: - case Datatype::DATETIME_FS: - case Datatype::DATETIME_AS: - return compute_tile_overlap(); - default: - return LOG_STATUS(Status::SubarrayError( - "Failed to compute tile overlap; unsupported domain type")); - } - - return Status::Ok(); -} - template Subarray Subarray::crop_to_tile(const T* tile_coords, Layout layout) const { Subarray ret(array_, layout); @@ -415,7 +343,6 @@ Status Subarray::get_range_num(uint32_t dim_idx, uint64_t* range_num) const { return Status::Ok(); } -template Subarray Subarray::get_subarray(uint64_t start, uint64_t end) const { Subarray ret(array_, layout_); @@ -767,6 +694,83 @@ Status Subarray::set_ranges_for_dim(uint32_t dim_idx, const Ranges& ranges) { return Status::Ok(); } +void Subarray::split( + unsigned splitting_dim, + const ByteVecValue& splitting_value, + Subarray* r1, + Subarray* r2) const { + assert(r1 != nullptr); + assert(r2 != nullptr); + *r1 = Subarray(array_, layout_); + *r2 = Subarray(array_, layout_); + + auto dim_num = array_->array_schema()->dim_num(); + const void* range_1d; + + Range sr1, sr2; + for (unsigned d = 0; d < dim_num; ++d) { + this->get_range(d, 0, &range_1d); + if (d == splitting_dim) { + auto dim = array_->array_schema()->dimension(d); + dim->split_range(range_1d, splitting_value, &sr1, &sr2); + r1->add_range(d, sr1.data(), true); + r2->add_range(d, sr2.data(), true); + } else { + r1->add_range(d, range_1d, true); + r2->add_range(d, range_1d, true); + } + } +} + +Status Subarray::split( + uint64_t splitting_range, + unsigned splitting_dim, + const ByteVecValue& splitting_value, + Subarray* r1, + Subarray* r2) const { + assert(r1 != nullptr); + assert(r2 != nullptr); + *r1 = Subarray(array_, layout_); + *r2 = Subarray(array_, layout_); + + // For easy reference + auto array_schema = array_->array_schema(); + auto dim_num = array_schema->dim_num(); + const void* range_1d; + uint64_t range_num; + Range sr1, sr2; + + for (unsigned d = 0; d < dim_num; ++d) { + RETURN_NOT_OK(this->get_range_num(d, &range_num)); + if (d != splitting_dim) { + for (uint64_t j = 0; j < range_num; ++j) { + this->get_range(d, j, &range_1d); + r1->add_range(d, range_1d); + r2->add_range(d, range_1d); + } + } else { // d == splitting_dim + if (splitting_range != UINT64_MAX) { // Need to split multiple ranges + for (uint64_t j = 0; j <= splitting_range; ++j) { + this->get_range(d, j, &range_1d); + r1->add_range(d, range_1d); + } + for (uint64_t j = splitting_range + 1; j < range_num; ++j) { + this->get_range(d, j, &range_1d); + r2->add_range(d, range_1d); + } + } else { // Need to split a single range + this->get_range(d, 0, &range_1d); + auto dim = array_schema->dimension(d); + dim->split_range(range_1d, splitting_value, &sr1, &sr2); + r1->add_range(d, sr1.data(), true); + r2->add_range(d, sr2.data(), true); + } + } + } + + return Status::Ok(); +} + const std::vector>& Subarray::tile_coords() const { return tile_coords_; } @@ -924,56 +928,7 @@ Status Subarray::compute_est_result_size() { if (est_result_size_computed_) return Status::Ok(); - auto type = array_->array_schema()->domain()->type(); - switch (type) { - case Datatype::INT8: - return compute_est_result_size(); - case Datatype::UINT8: - return compute_est_result_size(); - case Datatype::INT16: - return compute_est_result_size(); - case Datatype::UINT16: - return compute_est_result_size(); - case Datatype::INT32: - return compute_est_result_size(); - case Datatype::UINT32: - return compute_est_result_size(); - case Datatype::INT64: - return compute_est_result_size(); - case Datatype::UINT64: - return compute_est_result_size(); - case Datatype::FLOAT32: - return compute_est_result_size(); - case Datatype::FLOAT64: - return compute_est_result_size(); - case Datatype::DATETIME_YEAR: - case Datatype::DATETIME_MONTH: - case Datatype::DATETIME_WEEK: - case Datatype::DATETIME_DAY: - case Datatype::DATETIME_HR: - case Datatype::DATETIME_MIN: - case Datatype::DATETIME_SEC: - case Datatype::DATETIME_MS: - case Datatype::DATETIME_US: - case Datatype::DATETIME_NS: - case Datatype::DATETIME_PS: - case Datatype::DATETIME_FS: - case Datatype::DATETIME_AS: - return compute_est_result_size(); - default: - return LOG_STATUS(Status::SubarrayError( - "Cannot compute estimated results size; unsupported domain type")); - } - - return Status::Ok(); -} - -template -Status Subarray::compute_est_result_size() { - if (est_result_size_computed_) - return Status::Ok(); - - RETURN_NOT_OK(compute_tile_overlap()); + RETURN_NOT_OK(compute_tile_overlap()); std::mutex mtx; @@ -995,7 +950,7 @@ Status Subarray::compute_est_result_size() { bool var_size = (a == attribute_num) ? false : attributes[a]->var_size(); ResultSize result_size; RETURN_NOT_OK( - compute_est_result_size(attr_name, i, var_size, &result_size)); + compute_est_result_size(attr_name, i, var_size, &result_size)); std::lock_guard block(mtx); est_result_size_vec[a].size_fixed_ += result_size.size_fixed_; est_result_size_vec[a].size_var_ += result_size.size_var_; @@ -1027,7 +982,6 @@ Status Subarray::compute_est_result_size() { return Status::Ok(); } -template Status Subarray::compute_est_result_size( const std::string& attr_name, uint64_t range_idx, @@ -1037,6 +991,7 @@ Status Subarray::compute_est_result_size( auto fragment_num = array_->fragment_metadata().size(); ResultSize ret{0.0, 0.0, 0, 0}; auto array_schema = array_->array_schema(); + auto domain = array_schema->domain(); auto encryption_key = array_->encryption_key(); uint64_t size; @@ -1083,7 +1038,7 @@ Status Subarray::compute_est_result_size( // Calibrate result - applicable only to arrays without coordinate duplicates if (!array_->array_schema()->allows_dups()) { uint64_t max_size_fixed, max_size_var = UINT64_MAX; - auto cell_num = this->cell_num(range_idx); + auto cell_num = domain->cell_num(this->ndrange(range_idx)); if (var_size) { max_size_fixed = utils::math::safe_mul(cell_num, constants::cell_var_offset_size); @@ -1214,7 +1169,6 @@ void Subarray::compute_tile_coords_row() { tile_coords_map_[tile_coords_[i]] = i; } -template Status Subarray::compute_tile_overlap() { if (tile_overlap_computed_) return Status::Ok(); @@ -1234,8 +1188,7 @@ Status Subarray::compute_tile_overlap() { auto statuses = parallel_for_2d( 0, fragment_num, 0, range_num, [&](unsigned i, uint64_t j) { if (meta[i]->dense()) { // Dense fragment - auto range = this->range(j); - tile_overlap_[i][j] = get_tile_overlap(range, i); + tile_overlap_[i][j] = get_tile_overlap(j, i); } else { // Sparse fragment const auto& range = this->ndrange(j); RETURN_NOT_OK(meta[i]->get_tile_overlap( @@ -1265,10 +1218,53 @@ Subarray Subarray::clone() const { return clone; } +TileOverlap Subarray::get_tile_overlap(uint64_t range_idx, unsigned fid) const { + auto type = array_->array_schema()->domain()->type(); + switch (type) { + case Datatype::INT8: + return get_tile_overlap(range_idx, fid); + case Datatype::UINT8: + return get_tile_overlap(range_idx, fid); + case Datatype::INT16: + return get_tile_overlap(range_idx, fid); + case Datatype::UINT16: + return get_tile_overlap(range_idx, fid); + case Datatype::INT32: + return get_tile_overlap(range_idx, fid); + case Datatype::UINT32: + return get_tile_overlap(range_idx, fid); + case Datatype::INT64: + return get_tile_overlap(range_idx, fid); + case Datatype::UINT64: + return get_tile_overlap(range_idx, fid); + case Datatype::FLOAT32: + return get_tile_overlap(range_idx, fid); + case Datatype::FLOAT64: + return get_tile_overlap(range_idx, fid); + case Datatype::DATETIME_YEAR: + case Datatype::DATETIME_MONTH: + case Datatype::DATETIME_WEEK: + case Datatype::DATETIME_DAY: + case Datatype::DATETIME_HR: + case Datatype::DATETIME_MIN: + case Datatype::DATETIME_SEC: + case Datatype::DATETIME_MS: + case Datatype::DATETIME_US: + case Datatype::DATETIME_NS: + case Datatype::DATETIME_PS: + case Datatype::DATETIME_FS: + case Datatype::DATETIME_AS: + return get_tile_overlap(range_idx, fid); + default: + assert(false); + } + return TileOverlap(); +} + template -TileOverlap Subarray::get_tile_overlap( - const std::vector& range, unsigned fid) const { +TileOverlap Subarray::get_tile_overlap(uint64_t range_idx, unsigned fid) const { TileOverlap ret; + auto range = this->range(range_idx); // Prepare a range copy auto dim_num = array_->array_schema()->dim_num(); @@ -1346,38 +1342,6 @@ void Subarray::swap(Subarray& subarray) { } // Explicit instantiations -template Subarray Subarray::get_subarray( - uint64_t start, uint64_t end) const; -template Subarray Subarray::get_subarray( - uint64_t start, uint64_t end) const; -template Subarray Subarray::get_subarray( - uint64_t start, uint64_t end) const; -template Subarray Subarray::get_subarray( - uint64_t start, uint64_t end) const; -template Subarray Subarray::get_subarray( - uint64_t start, uint64_t end) const; -template Subarray Subarray::get_subarray( - uint64_t start, uint64_t end) const; -template Subarray Subarray::get_subarray( - uint64_t start, uint64_t end) const; -template Subarray Subarray::get_subarray( - uint64_t start, uint64_t end) const; -template Subarray Subarray::get_subarray( - uint64_t start, uint64_t end) const; -template Subarray Subarray::get_subarray( - uint64_t start, uint64_t end) const; - -template uint64_t Subarray::cell_num(uint64_t range_idx) const; -template uint64_t Subarray::cell_num(uint64_t range_idx) const; -template uint64_t Subarray::cell_num(uint64_t range_idx) const; -template uint64_t Subarray::cell_num(uint64_t range_idx) const; -template uint64_t Subarray::cell_num(uint64_t range_idx) const; -template uint64_t Subarray::cell_num(uint64_t range_idx) const; -template uint64_t Subarray::cell_num(uint64_t range_idx) const; -template uint64_t Subarray::cell_num(uint64_t range_idx) const; -template uint64_t Subarray::cell_num(uint64_t range_idx) const; -template uint64_t Subarray::cell_num(uint64_t range_idx) const; - template void Subarray::compute_tile_coords(); template void Subarray::compute_tile_coords(); template void Subarray::compute_tile_coords(); diff --git a/tiledb/sm/subarray/subarray.h b/tiledb/sm/subarray/subarray.h index 18690e1fd0d..7bc82e45c2e 100644 --- a/tiledb/sm/subarray/subarray.h +++ b/tiledb/sm/subarray/subarray.h @@ -275,14 +275,6 @@ class Subarray { /** Returns the array the subarray is associated with. */ const Array* array() const; - /** - * Returns the number of cells in the ND range with the input id. - * If the domain is huge and the number of cells overflows, the - * function returns UINT64_MAX. - */ - template - uint64_t cell_num(uint64_t range_idx) const; - /** Clears the contents of the subarray. */ void clear(); @@ -302,14 +294,12 @@ class Subarray { * Computes the estimated result size (calibrated using the maximum size) * for a given attribute and range id, for all fragments. * - * @tparam T The domain type. * @param attr_name The name of the attribute to focus on. * @param range_idx The id of the subarray range to focus on. * @param var_size Whether the attribute is var-sized or not. * @param result_size The result size to be retrieved. * @return Status */ - template Status compute_est_result_size( const std::string& attr_name, uint64_t range_idx, @@ -407,13 +397,11 @@ class Subarray { * Returns a subarray consisting of the ranges specified by * the input. * - * @tparam T The domain type. * @param start The subarray will be constructed from ranges in * interval ``[start, end]`` in the flattened range order. * @param end The subarray will be constructed from ranges in * interval ``[start, end]`` in the flattened range order. */ - template Subarray get_subarray(uint64_t start, uint64_t end) const; /** Sets the array layout. */ @@ -474,6 +462,27 @@ class Subarray { */ Status set_ranges_for_dim(uint32_t dim_idx, const Ranges& ranges); + /** + * Splits the subarray along the splitting dimension and value into + * two new subarrays `r1` and `r2`. + */ + void split( + unsigned splitting_dim, + const ByteVecValue& splitting_value, + Subarray* r1, + Subarray* r2) const; + + /** + * Splits the subarray along the splitting range, dimension and value + * into two new subarrays `r1` and `r2`. + */ + Status split( + uint64_t splitting_range, + unsigned splitting_dim, + const ByteVecValue& splitting_value, + Subarray* r1, + Subarray* r2) const; + /** * Returns the (unique) coordinates of all the tiles that the subarray * ranges intersect with. @@ -600,10 +609,6 @@ class Subarray { /** Computes the estimated result size for all attributes. */ Status compute_est_result_size(); - /** Computes the estimated result size for all attributes. */ - template - Status compute_est_result_size(); - /** * Compute `tile_coords_` and `tile_coords_map_`. The coordinates will * be sorted on col-major tile order. @@ -622,28 +627,30 @@ class Subarray { template void compute_tile_coords_row(); - /** - * Computes the tile overlap with all subarray ranges for - * all fragments. - */ - template - Status compute_tile_overlap(); - /** Returns a deep copy of this Subarray. */ Subarray clone() const; + /** + * Compute the tile overlap between ``range`` and the non-empty domain + * of the input fragment. Applicable only to dense fragments. + * + * @param range_idx The id of the range to compute the overlap with. + * @param fid The id of the fragment to focus on. + * @return The tile overlap. + */ + TileOverlap get_tile_overlap(uint64_t range_idx, unsigned fid) const; + /** * Compute the tile overlap between ``range`` and the non-empty domain * of the input fragment. Applicable only to dense fragments. * * @tparam T The domain data type. - * @param range The range to compute the overlap with. + * @param range_idx The id of the range to compute the overlap with. * @param fid The id of the fragment to focus on. * @return The tile overlap. */ template - TileOverlap get_tile_overlap( - const std::vector& range, unsigned fid) const; + TileOverlap get_tile_overlap(uint64_t range_idx, unsigned fid) const; /** * Swaps the contents (all field values) of this subarray with the diff --git a/tiledb/sm/subarray/subarray_partitioner.cc b/tiledb/sm/subarray/subarray_partitioner.cc index 5b8ac492317..059c45e1b6f 100644 --- a/tiledb/sm/subarray/subarray_partitioner.cc +++ b/tiledb/sm/subarray/subarray_partitioner.cc @@ -34,6 +34,7 @@ #include "tiledb/sm/array/array.h" #include "tiledb/sm/array_schema/array_schema.h" #include "tiledb/sm/array_schema/attribute.h" +#include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/array_schema/domain.h" #include "tiledb/sm/enums/layout.h" @@ -213,52 +214,6 @@ Status SubarrayPartitioner::get_memory_budget( return Status::Ok(); } -Status SubarrayPartitioner::next(bool* unsplittable) { - auto type = subarray_.array()->array_schema()->domain()->type(); - switch (type) { - case Datatype::INT8: - return next(unsplittable); - case Datatype::UINT8: - return next(unsplittable); - case Datatype::INT16: - return next(unsplittable); - case Datatype::UINT16: - return next(unsplittable); - case Datatype::INT32: - return next(unsplittable); - case Datatype::UINT32: - return next(unsplittable); - case Datatype::INT64: - return next(unsplittable); - case Datatype::UINT64: - return next(unsplittable); - case Datatype::FLOAT32: - return next(unsplittable); - case Datatype::FLOAT64: - return next(unsplittable); - case Datatype::DATETIME_YEAR: - case Datatype::DATETIME_MONTH: - case Datatype::DATETIME_WEEK: - case Datatype::DATETIME_DAY: - case Datatype::DATETIME_HR: - case Datatype::DATETIME_MIN: - case Datatype::DATETIME_SEC: - case Datatype::DATETIME_MS: - case Datatype::DATETIME_US: - case Datatype::DATETIME_NS: - case Datatype::DATETIME_PS: - case Datatype::DATETIME_FS: - case Datatype::DATETIME_AS: - return next(unsplittable); - default: - return LOG_STATUS(Status::SubarrayPartitionerError( - "Cannot get next partition; Unsupported subarray domain type")); - } - - return Status::Ok(); -} - -template Status SubarrayPartitioner::next(bool* unsplittable) { *unsplittable = false; @@ -267,22 +222,22 @@ Status SubarrayPartitioner::next(bool* unsplittable) { // Handle single range partitions, remaining from previous iteration if (!state_.single_range_.empty()) - return next_from_single_range(unsplittable); + return next_from_single_range(unsplittable); // Handle multi-range partitions, remaining from slab splits if (!state_.multi_range_.empty()) - return next_from_multi_range(unsplittable); + return next_from_multi_range(unsplittable); // Find the [start, end] of the subarray ranges that fit in the budget bool interval_found; - RETURN_NOT_OK(compute_current_start_end(&interval_found)); + RETURN_NOT_OK(compute_current_start_end(&interval_found)); // Single-range partition that must be split // Note: this applies only to UNORDERED and GLOBAL_ORDER layouts, // since otherwise we may have to calibrate the range start and end if (!interval_found && (subarray_.layout() == Layout::UNORDERED || subarray_.layout() == Layout::GLOBAL_ORDER)) - return next_from_single_range(unsplittable); + return next_from_single_range(unsplittable); // An interval of whole ranges that may need calibration bool must_split_slab; @@ -291,7 +246,7 @@ Status SubarrayPartitioner::next(bool* unsplittable) { // Handle case the next partition is composed of whole ND ranges if (interval_found && !must_split_slab) { current_.partition_ = - std::move(subarray_.get_subarray(current_.start_, current_.end_)); + subarray_.get_subarray(current_.start_, current_.end_); current_.split_multi_range_ = false; state_.start_ = current_.end_ + 1; @@ -299,7 +254,7 @@ Status SubarrayPartitioner::next(bool* unsplittable) { } // Must split a multi-range subarray slab - return next_from_multi_range(unsplittable); + return next_from_multi_range(unsplittable); } Status SubarrayPartitioner::set_result_budget( @@ -363,7 +318,6 @@ Status SubarrayPartitioner::set_memory_budget( return Status::Ok(); } -template Status SubarrayPartitioner::split_current(bool* unsplittable) { *unsplittable = false; @@ -372,8 +326,8 @@ Status SubarrayPartitioner::split_current(bool* unsplittable) { if (state_.multi_range_.empty()) state_.start_ = current_.start_; state_.multi_range_.push_front(current_.partition_); - split_top_multi_range(unsplittable); - return next_from_multi_range(unsplittable); + split_top_multi_range(unsplittable); + return next_from_multi_range(unsplittable); } // Current came from retrieving a multi-range partition from subarray @@ -384,7 +338,7 @@ Status SubarrayPartitioner::split_current(bool* unsplittable) { range_num * (1 - constants::multi_range_reduction_in_split); current_.end_ = current_.start_ + (uint64_t)new_range_num - 1; current_.partition_ = - std::move(subarray_.get_subarray(current_.start_, current_.end_)); + subarray_.get_subarray(current_.start_, current_.end_); state_.start_ = current_.end_ + 1; return Status::Ok(); } @@ -393,8 +347,8 @@ Status SubarrayPartitioner::split_current(bool* unsplittable) { if (state_.single_range_.empty()) state_.start_--; state_.single_range_.push_front(current_.partition_); - split_top_single_range(unsplittable); - return next_from_single_range(unsplittable); + split_top_single_range(unsplittable); + return next_from_single_range(unsplittable); } const SubarrayPartitioner::State* SubarrayPartitioner::state() const { @@ -520,7 +474,6 @@ SubarrayPartitioner SubarrayPartitioner::clone() const { return clone; } -template Status SubarrayPartitioner::compute_current_start_end(bool* found) { // Preparation auto array_schema = subarray_.array()->array_schema(); @@ -538,7 +491,7 @@ Status SubarrayPartitioner::compute_current_start_end(bool* found) { auto attr_name = budget_it.first; auto var_size = array_schema->var_size(attr_name); Subarray::ResultSize est_size; - RETURN_NOT_OK(subarray_.compute_est_result_size( + RETURN_NOT_OK(subarray_.compute_est_result_size( attr_name, current_.end_, var_size, &est_size)); auto& cur_size = cur_sizes[attr_name]; auto& mem_size = mem_sizes[attr_name]; @@ -571,28 +524,21 @@ Status SubarrayPartitioner::compute_current_start_end(bool* found) { return Status::Ok(); } -template -void SubarrayPartitioner::compute_splitting_point_on_tiles( +void SubarrayPartitioner::compute_splitting_value_on_tiles( const Subarray& range, unsigned* splitting_dim, - T* splitting_point, + ByteVecValue* splitting_value, bool* unsplittable) { assert(range.layout() == Layout::GLOBAL_ORDER); *unsplittable = true; // For easy reference auto array_schema = subarray_.array()->array_schema(); - auto domain = (const T*)array_schema->domain()->domain(); - auto tile_extents = (const T*)array_schema->domain()->tile_extents(); auto dim_num = subarray_.array()->array_schema()->dim_num(); auto layout = subarray_.array()->array_schema()->tile_order(); const void* r_v; *splitting_dim = UINT32_MAX; - // Trivial case - if (tile_extents == nullptr) - return; - std::vector dims; if (layout == Layout::ROW_MAJOR) { for (unsigned i = 0; i < dim_num; ++i) @@ -602,21 +548,16 @@ void SubarrayPartitioner::compute_splitting_point_on_tiles( dims.push_back(dim_num - i - 1); } - // Compute splitting dimension and point - for (auto i : dims) { - range.get_range(i, 0, &r_v); - auto r = (T*)r_v; - auto tiles_apart = floor(((r[1] - domain[2 * i]) / tile_extents[i])) - - floor(((r[0] - domain[2 * i]) / tile_extents[i])); + // Compute splitting dimension and value + for (auto d : dims) { + auto dim = array_schema->domain()->dimension(d); + auto r_size = 2 * dim->coord_size(); + range.get_range(d, 0, &r_v); + Range r(r_v, r_size); + auto tiles_apart = dim->tile_num(r) - 1; if (tiles_apart != 0) { - *splitting_dim = i; - T mid = r[0] + MAX(1, floor(tiles_apart / 2)) * tile_extents[i]; - T floored_mid = array_schema->domain()->floor_to_tile(mid, i); - if (std::numeric_limits::is_integer) - *splitting_point = floored_mid - 1; - else - *splitting_point = - std::nextafter(floored_mid, std::numeric_limits::lowest()); + *splitting_dim = d; + dim->ceil_to_tile(r, MAX(1, floor(tiles_apart / 2)) - 1, splitting_value); *unsplittable = false; break; } @@ -625,26 +566,28 @@ void SubarrayPartitioner::compute_splitting_point_on_tiles( // TODO (sp): in the future this can be more sophisticated, taking into // TODO (sp): account MBRs (i.e., the distirbution of the data) as well -template -void SubarrayPartitioner::compute_splitting_point_single_range( +void SubarrayPartitioner::compute_splitting_value_single_range( const Subarray& range, unsigned* splitting_dim, - T* splitting_point, + ByteVecValue* splitting_value, bool* unsplittable) { // Special case for global order if (subarray_.layout() == Layout::GLOBAL_ORDER) { - compute_splitting_point_on_tiles( - range, splitting_dim, splitting_point, unsplittable); + compute_splitting_value_on_tiles( + range, splitting_dim, splitting_value, unsplittable); + // Splitting dim/value found if (!*unsplittable) - return; // Splitting dim/point found + return; + // Else `range` is contained within a tile. - // The rest of the function will find the splitting dim/point + // The rest of the function will find the splitting dim/value } // For easy reference - auto dim_num = subarray_.array()->array_schema()->dim_num(); - auto cell_order = subarray_.array()->array_schema()->cell_order(); + auto array_schema = subarray_.array()->array_schema(); + auto dim_num = array_schema->dim_num(); + auto cell_order = array_schema->cell_order(); assert(!range.is_unary()); auto layout = subarray_.layout(); layout = (layout == Layout::UNORDERED || layout == Layout::GLOBAL_ORDER) ? @@ -655,23 +598,28 @@ void SubarrayPartitioner::compute_splitting_point_single_range( std::vector dims; if (layout == Layout::ROW_MAJOR) { - for (unsigned i = 0; i < dim_num; ++i) - dims.push_back(i); + for (unsigned d = 0; d < dim_num; ++d) + dims.push_back(d); } else { - for (unsigned i = 0; i < dim_num; ++i) - dims.push_back(dim_num - i - 1); + for (unsigned d = 0; d < dim_num; ++d) + dims.push_back(dim_num - d - 1); } - // Compute splitting dimension and point - for (auto i : dims) { - range.get_range(i, 0, &r_v); - auto r = (T*)r_v; - if (std::memcmp(r, &r[1], sizeof(T)) != 0) { - *splitting_dim = i; - *splitting_point = r[0] + (r[1] - r[0]) / 2; - *unsplittable = !std::memcmp(splitting_point, &r[1], sizeof(T)); + // Compute splitting dimension and value + Range r; + for (auto d : dims) { + auto dim = array_schema->dimension(d); + auto r_size = 2 * dim->coord_size(); + range.get_range(d, 0, &r_v); + r.set_range(r_v, r_size); + if (!r.unary()) { + *splitting_dim = d; + dim->splitting_value(r, splitting_value, unsplittable); + + // Splitting dim/value found if (!*unsplittable) - break; // Splitting dim/point found + break; + // Else continue to the next dimension } } @@ -679,25 +627,25 @@ void SubarrayPartitioner::compute_splitting_point_single_range( assert(*splitting_dim != UINT32_MAX); } -template -void SubarrayPartitioner::compute_splitting_point_multi_range( +void SubarrayPartitioner::compute_splitting_value_multi_range( unsigned* splitting_dim, uint64_t* splitting_range, - T* splitting_point, + ByteVecValue* splitting_value, bool* unsplittable) { const auto& partition = state_.multi_range_.front(); // Single-range partittion if (partition.range_num() == 1) { - compute_splitting_point_single_range( - partition, splitting_dim, splitting_point, unsplittable); + compute_splitting_value_single_range( + partition, splitting_dim, splitting_value, unsplittable); return; } // Multi-range partition auto layout = subarray_.layout(); - auto dim_num = subarray_.array()->array_schema()->dim_num(); - auto cell_order = subarray_.array()->array_schema()->cell_order(); + auto array_schema = subarray_.array()->array_schema(); + auto dim_num = array_schema->dim_num(); + auto cell_order = array_schema->cell_order(); layout = (layout == Layout::UNORDERED) ? cell_order : layout; const void* r_v; *splitting_dim = UINT32_MAX; @@ -705,32 +653,34 @@ void SubarrayPartitioner::compute_splitting_point_multi_range( std::vector dims; if (layout == Layout::ROW_MAJOR) { - for (unsigned i = 0; i < dim_num; ++i) - dims.push_back(i); + for (unsigned d = 0; d < dim_num; ++d) + dims.push_back(d); } else { - for (unsigned i = 0; i < dim_num; ++i) - dims.push_back(dim_num - i - 1); + for (unsigned d = 0; d < dim_num; ++d) + dims.push_back(dim_num - d - 1); } - // Compute splitting dimension, range and point - for (auto i : dims) { + // Compute splitting dimension, range and value + Range r; + for (auto d : dims) { // Check if we need to split the multiple ranges - partition.get_range_num(i, &range_num); + partition.get_range_num(d, &range_num); if (range_num > 1) { - assert(i == dims.back()); - *splitting_dim = i; + assert(d == dims.back()); + *splitting_dim = d; *splitting_range = (range_num - 1) / 2; *unsplittable = false; break; } // Check if we need to split single range - partition.get_range(i, 0, &r_v); - auto r = (T*)r_v; - if (std::memcmp(r, &r[1], sizeof(T)) != 0) { - *splitting_dim = i; - *splitting_point = r[0] + (r[1] - r[0]) / 2; - *unsplittable = !std::memcmp(splitting_point, &r[1], sizeof(T)); + partition.get_range(d, 0, &r_v); + auto dim = array_schema->dimension(d); + auto r_size = 2 * dim->coord_size(); + r.set_range(r_v, r_size); + if (!r.unary()) { + *splitting_dim = d; + dim->splitting_value(r, splitting_value, unsplittable); break; } } @@ -738,7 +688,6 @@ void SubarrayPartitioner::compute_splitting_point_multi_range( assert(*splitting_dim != UINT32_MAX); } -template bool SubarrayPartitioner::must_split(Subarray* partition) { auto array_schema = subarray_.array()->array_schema(); bool must_split = false; @@ -774,13 +723,12 @@ bool SubarrayPartitioner::must_split(Subarray* partition) { return must_split; } -template Status SubarrayPartitioner::next_from_multi_range(bool* unsplittable) { // A new multi-range subarray may need to be put in the list and split if (state_.multi_range_.empty()) { - auto s = subarray_.get_subarray(current_.start_, current_.end_); + auto s = subarray_.get_subarray(current_.start_, current_.end_); state_.multi_range_.push_front(std::move(s)); - split_top_multi_range(unsplittable); + split_top_multi_range(unsplittable); } // Loop until you find a partition that fits or unsplittable @@ -788,9 +736,9 @@ Status SubarrayPartitioner::next_from_multi_range(bool* unsplittable) { bool must_split; do { auto& partition = state_.multi_range_.front(); - must_split = this->must_split(&partition); + must_split = this->must_split(&partition); if (must_split) - RETURN_NOT_OK(split_top_multi_range(unsplittable)); + RETURN_NOT_OK(split_top_multi_range(unsplittable)); } while (must_split && !*unsplittable); } @@ -804,13 +752,12 @@ Status SubarrayPartitioner::next_from_multi_range(bool* unsplittable) { return Status::Ok(); } -template Status SubarrayPartitioner::next_from_single_range(bool* unsplittable) { // Handle case where a new single range must be put in the list and split if (state_.single_range_.empty()) { - auto s = subarray_.get_subarray(current_.start_, current_.end_); + auto s = subarray_.get_subarray(current_.start_, current_.end_); state_.single_range_.push_front(std::move(s)); - split_top_single_range(unsplittable); + split_top_single_range(unsplittable); } // Loop until you find a partition that fits or unsplittable @@ -818,9 +765,9 @@ Status SubarrayPartitioner::next_from_single_range(bool* unsplittable) { bool must_split; do { auto& partition = state_.single_range_.front(); - must_split = this->must_split(&partition); + must_split = this->must_split(&partition); if (must_split) - RETURN_NOT_OK(split_top_single_range(unsplittable)); + RETURN_NOT_OK(split_top_single_range(unsplittable)); } while (must_split && !*unsplittable); } @@ -834,14 +781,9 @@ Status SubarrayPartitioner::next_from_single_range(bool* unsplittable) { return Status::Ok(); } -template Status SubarrayPartitioner::split_top_single_range(bool* unsplittable) { // For easy reference const auto& range = state_.single_range_.front(); - auto dim_num = subarray_.dim_num(); - auto max = std::numeric_limits::max(); - bool int_domain = std::numeric_limits::is_integer; - const void* range_1d; // Check if unsplittable if (range.is_unary()) { @@ -849,35 +791,18 @@ Status SubarrayPartitioner::split_top_single_range(bool* unsplittable) { return Status::Ok(); } - // Finding splitting point - T splitting_point; + // Finding splitting value + ByteVecValue splitting_value; unsigned splitting_dim; - compute_splitting_point_single_range( - range, &splitting_dim, &splitting_point, unsplittable); + compute_splitting_value_single_range( + range, &splitting_dim, &splitting_value, unsplittable); if (*unsplittable) return Status::Ok(); // Split remaining range into two ranges - Subarray r1(subarray_.array(), subarray_.layout()); - Subarray r2(subarray_.array(), subarray_.layout()); - - for (unsigned i = 0; i < dim_num; ++i) { - range.get_range(i, 0, &range_1d); - if (i == splitting_dim) { - T r[2]; - r[0] = ((const T*)range_1d)[0]; - r[1] = splitting_point; - r1.add_range(i, r, true); - r[0] = (int_domain) ? (splitting_point + 1) : - std::nextafter(splitting_point, max); - r[1] = ((const T*)range_1d)[1]; - r2.add_range(i, r, true); - } else { - r1.add_range(i, range_1d, true); - r2.add_range(i, range_1d, true); - } - } + Subarray r1, r2; + range.split(splitting_dim, splitting_value, &r1, &r2); // Update list state_.single_range_.pop_front(); @@ -887,15 +812,9 @@ Status SubarrayPartitioner::split_top_single_range(bool* unsplittable) { return Status::Ok(); } -template Status SubarrayPartitioner::split_top_multi_range(bool* unsplittable) { // For easy reference const auto& partition = state_.multi_range_.front(); - auto dim_num = subarray_.dim_num(); - auto max = std::numeric_limits::max(); - bool int_domain = std::numeric_limits::is_integer; - const void* range_1d; - uint64_t range_num; // Check if unsplittable if (partition.is_unary()) { @@ -903,12 +822,12 @@ Status SubarrayPartitioner::split_top_multi_range(bool* unsplittable) { return Status::Ok(); } - // Finding splitting point + // Finding splitting value unsigned splitting_dim; uint64_t splitting_range = UINT64_MAX; - T splitting_point; - compute_splitting_point_multi_range( - &splitting_dim, &splitting_range, &splitting_point, unsplittable); + ByteVecValue splitting_value; + compute_splitting_value_multi_range( + &splitting_dim, &splitting_range, &splitting_value, unsplittable); if (*unsplittable) return Status::Ok(); @@ -916,38 +835,8 @@ Status SubarrayPartitioner::split_top_multi_range(bool* unsplittable) { // Split partition into two partitions Subarray p1(subarray_.array(), subarray_.layout()); Subarray p2(subarray_.array(), subarray_.layout()); - - for (unsigned i = 0; i < dim_num; ++i) { - RETURN_NOT_OK(partition.get_range_num(i, &range_num)); - if (i != splitting_dim) { - for (uint64_t j = 0; j < range_num; ++j) { - partition.get_range(i, j, &range_1d); - p1.add_range(i, range_1d); - p2.add_range(i, range_1d); - } - } else { // i == splitting_dim - if (splitting_range != UINT64_MAX) { // Need to split multiple ranges - for (uint64_t j = 0; j <= splitting_range; ++j) { - partition.get_range(i, j, &range_1d); - p1.add_range(i, range_1d); - } - for (uint64_t j = splitting_range + 1; j < range_num; ++j) { - partition.get_range(i, j, &range_1d); - p2.add_range(i, range_1d); - } - } else { // Need to split a single range - partition.get_range(i, 0, &range_1d); - T r[2]; - r[0] = ((const T*)range_1d)[0]; - r[1] = splitting_point; - p1.add_range(i, r); - r[0] = (int_domain) ? (splitting_point + 1) : - std::nextafter(splitting_point, max); - r[1] = ((const T*)range_1d)[1]; - p2.add_range(i, r); - } - } - } + RETURN_NOT_OK(partition.split( + splitting_range, splitting_dim, splitting_value, &p1, &p2)); // Update list state_.multi_range_.pop_front(); @@ -966,20 +855,5 @@ void SubarrayPartitioner::swap(SubarrayPartitioner& partitioner) { std::swap(memory_budget_var_, partitioner.memory_budget_var_); } -// Explicit template instantiations -template Status SubarrayPartitioner::split_current(bool* unsplittable); -template Status SubarrayPartitioner::split_current(bool* unsplittable); -template Status SubarrayPartitioner::split_current(bool* unsplittable); -template Status SubarrayPartitioner::split_current( - bool* unsplittable); -template Status SubarrayPartitioner::split_current(bool* unsplittable); -template Status SubarrayPartitioner::split_current( - bool* unsplittable); -template Status SubarrayPartitioner::split_current(bool* unsplittable); -template Status SubarrayPartitioner::split_current( - bool* unsplittable); -template Status SubarrayPartitioner::split_current(bool* unsplittable); -template Status SubarrayPartitioner::split_current(bool* unsplittable); - } // namespace sm } // namespace tiledb diff --git a/tiledb/sm/subarray/subarray_partitioner.h b/tiledb/sm/subarray/subarray_partitioner.h index ced65fa7f0c..b4e160767dd 100644 --- a/tiledb/sm/subarray/subarray_partitioner.h +++ b/tiledb/sm/subarray/subarray_partitioner.h @@ -219,18 +219,6 @@ class SubarrayPartitioner { */ Status next(bool* unsplittable); - /** - * The partitioner iterates over the partitions of the subarray it is - * associated with. This function advances to compute the next partition - * based on the specified budget. If this cannot be retrieved because - * the current partition cannot be split further (typically because it - * is a single cell whose estimated result does not fit in the budget), - * then the function does not advance to the next partition and sets - * ``unsplittable`` to ``true``. - */ - template - Status next(bool* unsplittable); - /** * Sets the memory budget (in bytes). * @@ -254,7 +242,6 @@ class SubarrayPartitioner { * by the reader when the current partition was estimated to fit * the results, but that was not eventually true. */ - template Status split_current(bool* unsplittable); /** Returns the state. */ @@ -330,49 +317,44 @@ class SubarrayPartitioner { * If the interval is a single range, which does not fit in the budget, * then the function sets ``found`` to ``false`` and ``true`` otherwise. */ - template Status compute_current_start_end(bool* found); /** * Applicable only when the `range` layout is GLOBAL_ORDER. - * Computes the splitting point and dimension for the input range. + * Computes the splitting value and dimension for the input range. * If `range` is whithin a single space tile, then `unsplittable` * is set to `true`. */ - template - void compute_splitting_point_on_tiles( + void compute_splitting_value_on_tiles( const Subarray& range, unsigned* splitting_dim, - T* splitting_point, + ByteVecValue* splitting_value, bool* unsplittable); /** - * Computes the splitting point and dimension for the input range. + * Computes the splitting value and dimension for the input range. * In case of real domains, if this function may not be able to find a - * splitting point and set ``unsplittable`` to ``true``. + * splitting value and set ``unsplittable`` to ``true``. */ - template - void compute_splitting_point_single_range( + void compute_splitting_value_single_range( const Subarray& range, unsigned* splitting_dim, - T* splitting_point, + ByteVecValue* splitting_value, bool* unsplittable); /** - * Computes the splitting point and dimension for + * Computes the splitting value and dimension for * ``state_.multi_range_.front()``. In case of real domains, if this - * function may not be able to find a splitting point and set + * function may not be able to find a splitting value and set * ``unsplittable`` to ``true``. */ - template - void compute_splitting_point_multi_range( + void compute_splitting_value_multi_range( unsigned* splitting_dim, uint64_t* splitting_range, - T* splitting_point, + ByteVecValue* splitting_value, bool* unsplittable); /** Returns ``true`` if the input partition must be split. */ - template bool must_split(Subarray* partition); /** @@ -381,7 +363,6 @@ class SubarrayPartitioner { * partitions. If the next partition cannot be produced, * ``unsplittable`` is set to ``true``. */ - template Status next_from_multi_range(bool* unsplittable); /** @@ -391,21 +372,18 @@ class SubarrayPartitioner { * If the next partition cannot be produced, ``unsplittable`` is set * to ``true``. */ - template Status next_from_single_range(bool* unsplittable); /** * Splits the top single range, or sets ``unsplittable`` to ``true`` * if that is not possible. */ - template Status split_top_single_range(bool* unsplittable); /** * Splits the top multi-range, or sets ``unsplittable`` to ``true`` * if that is not possible. */ - template Status split_top_multi_range(bool* unsplittable); /**