diff --git a/test/src/unit-capi-consolidation.cc b/test/src/unit-capi-consolidation.cc index 9b1981be3122..b597205bc79d 100644 --- a/test/src/unit-capi-consolidation.cc +++ b/test/src/unit-capi-consolidation.cc @@ -2714,7 +2714,7 @@ bool ConsolidationFx::is_array(const std::string& array_name) { TEST_CASE_METHOD( ConsolidationFx, "C API: Test consolidation, dense", - "[capi], [consolidation], [dense-consolidation]") { + "[capi][consolidation][dense-consolidation]") { remove_dense_array(); create_dense_array(); @@ -2933,8 +2933,7 @@ TEST_CASE_METHOD( TEST_CASE_METHOD( ConsolidationFx, "C API: Test advanced consolidation #1", - "[capi], [consolidation], [consolidation-adv], " - "[consolidation-adv-1]") { + "[capi][consolidation][adv-1]") { remove_dense_vector(); create_dense_vector(); write_dense_vector_4_fragments(); diff --git a/tiledb/sm/array_schema/dimension.cc b/tiledb/sm/array_schema/dimension.cc index dbcead3b209c..357176f0a5ea 100644 --- a/tiledb/sm/array_schema/dimension.cc +++ b/tiledb/sm/array_schema/dimension.cc @@ -53,6 +53,7 @@ Dimension::Dimension() { type_ = Datatype::INT32; set_compute_mbr_func(); set_crop_range_func(); + set_domain_range_func(); set_expand_range_func(); set_expand_range_v_func(); set_expand_to_tile_func(); @@ -71,6 +72,7 @@ Dimension::Dimension(const std::string& name, Datatype type) tile_extent_ = nullptr; set_compute_mbr_func(); set_crop_range_func(); + set_domain_range_func(); set_expand_range_func(); set_expand_range_v_func(); set_expand_to_tile_func(); @@ -218,6 +220,7 @@ Status Dimension::deserialize(ConstBuffer* buff, Datatype type) { set_compute_mbr_func(); set_crop_range_func(); + set_domain_range_func(); set_expand_range_func(); set_expand_range_v_func(); set_expand_to_tile_func(); @@ -302,9 +305,31 @@ void Dimension::crop_range(Range* range) const { crop_range_func_(this, range); } +template +uint64_t Dimension::domain_range(const Range& range) { + assert(!range.empty()); + + if (&typeid(T) == &typeid(float) || &typeid(T) == &typeid(double)) + return 0; + + auto r = (const T*)range.data(); + uint64_t ret = r[1] - r[0]; + if (ret == std::numeric_limits::max()) // overflow + return 0; + ++ret; + + return ret; +} + +uint64_t Dimension::domain_range(const Range& range) const { + assert(domain_range_func_ != nullptr); + return domain_range_func_(range); +} + template void Dimension::expand_range_v(const void* v, Range* r) { assert(v != nullptr); + assert(r != nullptr); assert(!r->empty()); auto rt = (const T*)r->data(); auto vt = (const T*)v; @@ -870,6 +895,59 @@ void Dimension::set_crop_range_func() { } } +void Dimension::set_domain_range_func() { + switch (type_) { + case Datatype::INT32: + domain_range_func_ = domain_range; + break; + case Datatype::INT64: + domain_range_func_ = domain_range; + break; + case Datatype::INT8: + domain_range_func_ = domain_range; + break; + case Datatype::UINT8: + domain_range_func_ = domain_range; + break; + case Datatype::INT16: + domain_range_func_ = domain_range; + break; + case Datatype::UINT16: + domain_range_func_ = domain_range; + break; + case Datatype::UINT32: + domain_range_func_ = domain_range; + break; + case Datatype::UINT64: + domain_range_func_ = domain_range; + break; + case Datatype::FLOAT32: + domain_range_func_ = domain_range; + break; + case Datatype::FLOAT64: + domain_range_func_ = domain_range; + break; + case Datatype::DATETIME_YEAR: + case Datatype::DATETIME_MONTH: + case Datatype::DATETIME_WEEK: + case Datatype::DATETIME_DAY: + case Datatype::DATETIME_HR: + case Datatype::DATETIME_MIN: + case Datatype::DATETIME_SEC: + case Datatype::DATETIME_MS: + case Datatype::DATETIME_US: + case Datatype::DATETIME_NS: + case Datatype::DATETIME_PS: + case Datatype::DATETIME_FS: + case Datatype::DATETIME_AS: + domain_range_func_ = domain_range; + break; + default: + domain_range_func_ = nullptr; + break; + } +} + void Dimension::set_compute_mbr_func() { switch (type_) { case Datatype::INT32: diff --git a/tiledb/sm/array_schema/dimension.h b/tiledb/sm/array_schema/dimension.h index 2f79fc7e96da..c5cb20d95683 100644 --- a/tiledb/sm/array_schema/dimension.h +++ b/tiledb/sm/array_schema/dimension.h @@ -141,6 +141,21 @@ class Dimension { template static void crop_range(const Dimension* dim, Range* range); + /** + * Returns the domain range (high - low + 1) of the input + * 1D range. It returns 0 in case the dimension datatype + * is not integer or if there is an overflow. + */ + uint64_t domain_range(const Range& range) const; + + /** + * Returns the domain range (high - low + 1) of the input + * 1D range. It returns 0 in case the dimension datatype + * is not integer or if there is an overflow. + */ + template + static uint64_t domain_range(const Range& range); + /** Expand 1D range `r` using value `v`. */ void expand_range_v(const void* v, Range* r) const; @@ -297,6 +312,12 @@ class Dimension { */ std::function crop_range_func_; + /** + * Stores the appropriate templated crop_range() function based on the + * dimension datatype. + */ + std::function domain_range_func_; + /** * Stores the appropriate templated expand_range() function based on the * dimension datatype. @@ -439,6 +460,9 @@ class Dimension { /** Sets the templated crop_range() function. */ void set_crop_range_func(); + /** Sets the templated domain_range() function. */ + void set_domain_range_func(); + /** Sets the templated expand_range() function. */ void set_expand_range_func(); diff --git a/tiledb/sm/array_schema/domain.cc b/tiledb/sm/array_schema/domain.cc index a9f716e166e0..c30b0aa512ac 100644 --- a/tiledb/sm/array_schema/domain.cc +++ b/tiledb/sm/array_schema/domain.cc @@ -619,52 +619,6 @@ void Domain::expand_to_tiles(NDRange* ndrange) const { dimensions_[d]->expand_to_tile(&(*ndrange)[d]); } -void Domain::expand_domain(void* domain) const { - switch (type_) { - case Datatype::INT32: - expand_domain(static_cast(domain)); - break; - case Datatype::INT64: - expand_domain(static_cast(domain)); - break; - case Datatype::INT8: - expand_domain(static_cast(domain)); - break; - case Datatype::UINT8: - expand_domain(static_cast(domain)); - break; - case Datatype::INT16: - expand_domain(static_cast(domain)); - break; - case Datatype::UINT16: - expand_domain(static_cast(domain)); - break; - case Datatype::UINT32: - expand_domain(static_cast(domain)); - break; - case Datatype::UINT64: - expand_domain(static_cast(domain)); - break; - case Datatype::DATETIME_YEAR: - case Datatype::DATETIME_MONTH: - case Datatype::DATETIME_WEEK: - case Datatype::DATETIME_DAY: - case Datatype::DATETIME_HR: - case Datatype::DATETIME_MIN: - case Datatype::DATETIME_SEC: - case Datatype::DATETIME_MS: - case Datatype::DATETIME_US: - case Datatype::DATETIME_NS: - case Datatype::DATETIME_PS: - case Datatype::DATETIME_FS: - case Datatype::DATETIME_AS: - expand_domain(static_cast(domain)); - break; - default: // Non-applicable to non-integer domains - break; - } -} - template void Domain::get_tile_coords(const T* coords, T* tile_coords) const { auto domain = (T*)domain_; @@ -931,6 +885,24 @@ uint64_t Domain::tile_num(const NDRange& ndrange) const { return ret; } +uint64_t Domain::cell_num(const NDRange& ndrange) const { + assert(!ndrange.empty()); + + uint64_t cell_num = 1, range, prod; + for (unsigned d = 0; d < dim_num_; ++d) { + range = dimensions_[d]->domain_range(ndrange[d]); + if (range == 0) // Real dimension domain or overflow + return 0; + + prod = range * cell_num; + if (prod / range != cell_num) // Overflow + return 0; + cell_num = prod; + } + + return cell_num; +} + bool Domain::covered(const NDRange& r1, const NDRange& r2) const { assert(r1.size() == dim_num_); assert(r2.size() == dim_num_); diff --git a/tiledb/sm/array_schema/domain.h b/tiledb/sm/array_schema/domain.h index dbe4bd4a2853..ff60cab625ed 100644 --- a/tiledb/sm/array_schema/domain.h +++ b/tiledb/sm/array_schema/domain.h @@ -311,57 +311,6 @@ class Domain { */ void expand_to_tiles(NDRange* ndrange) const; - /** - * Expands the input domain such that it coincides with the boundaries of - * the array's regular tiles (i.e., it maps it on the regular tile grid). - * If the array has no regular tile grid, the function does not do anything. - * - * @param domain The domain to be expanded. - * @return void - */ - void expand_domain(void* domain) const; - - /** - * Expands the input domain such that it coincides with the boundaries of - * the array's regular tiles (i.e., it maps it on the regular tile grid). - * If the array has no regular tile grid, the function does not do anything. - * - * @tparam The domain type. - * @param domain The domain to be expanded. - * @return void - */ - template < - class T, - typename std::enable_if::value, T>::type* = nullptr> - void expand_domain(T* domain) const { - // Applicable only to regular tiles - if (tile_extents_ == nullptr) - return; - - auto tile_extents = static_cast(tile_extents_); - auto array_domain = static_cast(domain_); - - for (unsigned int i = 0; i < dim_num_; ++i) { - // This will always make the first bound coincide with a tile - domain[2 * i] = ((domain[2 * i] - array_domain[2 * i]) / tile_extents[i] * - tile_extents[i]) + - array_domain[2 * i]; - - domain[2 * i + 1] = - ((domain[2 * i + 1] - array_domain[2 * i]) / tile_extents[i] + 1) * - tile_extents[i] - - 1 + array_domain[2 * i]; - } - } - - /** No-op for float/double domains. */ - template < - class T, - typename std::enable_if::value, T>::type* = nullptr> - void expand_domain(T* domain) const { - (void)domain; - } - /** * Returns the position of the input coordinates inside its corresponding * tile, based on the array cell order. Applicable only to **dense** arrays. @@ -589,6 +538,14 @@ class Domain { */ uint64_t tile_num(const NDRange& ndrange) const; + /** + * Returns the number of cells in the input range. + * If there is an overflow, then the function returns 0. + * If at least one dimension had a non-integer domain, the + * functuon returns 0. + */ + uint64_t cell_num(const NDRange& ndrange) const; + /** Returns true if r1 is fully covered by r2. */ bool covered(const NDRange& r1, const NDRange& r2) const; diff --git a/tiledb/sm/fragment/fragment_info.h b/tiledb/sm/fragment/fragment_info.h index b5dee32c51be..015306c7a05e 100644 --- a/tiledb/sm/fragment/fragment_info.h +++ b/tiledb/sm/fragment/fragment_info.h @@ -34,6 +34,7 @@ #ifndef TILEDB_FRAGMENT_INFO_H #define TILEDB_FRAGMENT_INFO_H +#include "tiledb/sm/misc/types.h" #include "tiledb/sm/misc/uri.h" #include @@ -42,25 +43,8 @@ namespace tiledb { namespace sm { /** Stores basic information about a fragment. */ -struct FragmentInfo { - /** The fragment URI. */ - URI uri_; - /** True if the fragment is sparse, and false if it is dense. */ - bool sparse_; - /** The timestamp range of the fragment. */ - std::pair timestamp_range_; - /** The size of the entire fragment directory. */ - uint64_t fragment_size_; - /** The fragment's non-empty domain. */ - std::vector non_empty_domain_; - /** - * The fragment's expanded non-empty domain (in a way that - * it coincides with tile boundaries. Applicable only to - * dense fragments. For sparse fragments, the expanded - * domain is the same as the non-empty domain. - */ - std::vector expanded_non_empty_domain_; - +class FragmentInfo { + public: /** Constructor. */ FragmentInfo() { uri_ = URI(""); @@ -75,8 +59,8 @@ struct FragmentInfo { bool sparse, const std::pair& timestamp_range, uint64_t fragment_size, - const std::vector& non_empty_domain, - const std::vector& expanded_non_empty_domain) + const NDRange& non_empty_domain, + const NDRange& expanded_non_empty_domain) : uri_(uri) , sparse_(sparse) , timestamp_range_(timestamp_range) @@ -111,6 +95,55 @@ struct FragmentInfo { return *this; } + /** Returns `true` if the fragment is sparse. */ + bool sparse() const { + return sparse_; + } + + /** Returns the fragment URI. */ + const URI& uri() const { + return uri_; + } + + /** Returns the timestamp range. */ + const std::pair& timestamp_range() const { + return timestamp_range_; + } + + /** Returns the fragment size. */ + uint64_t fragment_size() const { + return fragment_size_; + } + + /** Returns the non-empty domain. */ + const NDRange& non_empty_domain() const { + return non_empty_domain_; + } + + /** Returns the expanded non-empty domain. */ + const NDRange& expanded_non_empty_domain() const { + return expanded_non_empty_domain_; + } + + private: + /** The fragment URI. */ + URI uri_; + /** True if the fragment is sparse, and false if it is dense. */ + bool sparse_; + /** The timestamp range of the fragment. */ + std::pair timestamp_range_; + /** The size of the entire fragment directory. */ + uint64_t fragment_size_; + /** The fragment's non-empty domain. */ + NDRange non_empty_domain_; + /** + * The fragment's expanded non-empty domain (in a way that + * it coincides with tile boundaries. Applicable only to + * dense fragments. For sparse fragments, the expanded + * domain is the same as the non-empty domain. + */ + NDRange expanded_non_empty_domain_; + /** * Returns a deep copy of this FragmentInfo. * @return New FragmentInfo diff --git a/tiledb/sm/misc/utils.cc b/tiledb/sm/misc/utils.cc index 90d0aa674edf..1ae493e41190 100644 --- a/tiledb/sm/misc/utils.cc +++ b/tiledb/sm/misc/utils.cc @@ -646,32 +646,6 @@ inline bool coords_in_rect( return true; } -template -inline bool rect_in_rect( - const T* rect_a, const T* rect_b, unsigned int dim_num) { - for (unsigned int i = 0; i < dim_num; ++i) { - if (rect_a[2 * i] < rect_b[2 * i] || rect_a[2 * i] > rect_b[2 * i + 1] || - rect_a[2 * i + 1] < rect_b[2 * i] || - rect_a[2 * i + 1] > rect_b[2 * i + 1]) - return false; - } - - return true; -} - -template -void expand_mbr_with_mbr(T* mbr_a, const T* mbr_b, unsigned int dim_num) { - for (unsigned int i = 0; i < dim_num; ++i) { - // Update lower bound on dimension i - if (mbr_a[2 * i] > mbr_b[2 * i]) - mbr_a[2 * i] = mbr_b[2 * i]; - - // Update upper bound on dimension i - if (mbr_a[2 * i + 1] < mbr_b[2 * i + 1]) - mbr_a[2 * i + 1] = mbr_b[2 * i + 1]; - } -} - template bool overlap(const T* a, const T* b, unsigned dim_num) { for (unsigned i = 0; i < dim_num; ++i) { @@ -682,24 +656,6 @@ bool overlap(const T* a, const T* b, unsigned dim_num) { return true; } -template -bool overlap(const T* a, const T* b, unsigned dim_num, bool* a_contains_b) { - for (unsigned i = 0; i < dim_num; ++i) { - if (a[2 * i] > b[2 * i + 1] || a[2 * i + 1] < b[2 * i]) - return false; - } - - *a_contains_b = true; - for (unsigned i = 0; i < dim_num; ++i) { - if (a[2 * i] > b[2 * i] || a[2 * i + 1] < b[2 * i + 1]) { - *a_contains_b = false; - break; - } - } - - return true; -} - template void overlap(const T* a, const T* b, unsigned dim_num, T* o, bool* overlap) { // Get overlap range @@ -714,17 +670,6 @@ void overlap(const T* a, const T* b, unsigned dim_num, T* o, bool* overlap) { } } -template -bool overlap(const std::vector& a, const T* b) { - auto dim_num = (unsigned)a.size(); - for (unsigned i = 0; i < dim_num; ++i) { - if (a[i][0] > b[2 * i + 1] || a[i][1] < b[2 * i]) - return false; - } - - return true; -} - template double coverage(const T* a, const T* b, unsigned dim_num) { double c = 1.0; @@ -871,48 +816,6 @@ template bool coords_in_rect( const std::vector& subarray, unsigned int dim_num); -template bool rect_in_rect( - const int* rect_a, const int* rect_b, unsigned int dim_num); -template bool rect_in_rect( - const int64_t* rect_a, const int64_t* rect_b, unsigned int dim_num); -template bool rect_in_rect( - const float* react_a, const float* rect_b, unsigned int dim_num); -template bool rect_in_rect( - const double* rect_a, const double* rect_b, unsigned int dim_num); -template bool rect_in_rect( - const int8_t* rect_a, const int8_t* rect_b, unsigned int dim_num); -template bool rect_in_rect( - const uint8_t* rect_a, const uint8_t* rect_b, unsigned int dim_num); -template bool rect_in_rect( - const int16_t* rect_a, const int16_t* rect_b, unsigned int dim_num); -template bool rect_in_rect( - const uint16_t* rect_a, const uint16_t* rect_b, unsigned int dim_num); -template bool rect_in_rect( - const uint32_t* rect_a, const uint32_t* rect_b, unsigned int dim_num); -template bool rect_in_rect( - const uint64_t* rect_a, const uint64_t* rect_b, unsigned int dim_num); - -template void expand_mbr_with_mbr( - int* mbr_a, const int* mbr_b, unsigned int dim_num); -template void expand_mbr_with_mbr( - int64_t* mbr_a, const int64_t* mbr_b, unsigned int dim_num); -template void expand_mbr_with_mbr( - float* mbr_a, const float* mbr_b, unsigned int dim_num); -template void expand_mbr_with_mbr( - double* mbr_a, const double* mbr_b, unsigned int dim_num); -template void expand_mbr_with_mbr( - int8_t* mbr_a, const int8_t* mbr_b, unsigned int dim_num); -template void expand_mbr_with_mbr( - uint8_t* mbr_a, const uint8_t* mbr_b, unsigned int dim_num); -template void expand_mbr_with_mbr( - int16_t* mbr_a, const int16_t* mbr_b, unsigned int dim_num); -template void expand_mbr_with_mbr( - uint16_t* mbr_a, const uint16_t* mbr_b, unsigned int dim_num); -template void expand_mbr_with_mbr( - uint32_t* mbr_a, const uint32_t* mbr_b, unsigned int dim_num); -template void expand_mbr_with_mbr( - uint64_t* mbr_a, const uint64_t* mbr_b, unsigned int dim_num); - template bool overlap( const int8_t* a, const int8_t* b, unsigned dim_num); template bool overlap( @@ -932,27 +835,6 @@ template bool overlap(const float* a, const float* b, unsigned dim_num); template bool overlap( const double* a, const double* b, unsigned dim_num); -template bool overlap( - const int8_t* a, const int8_t* b, unsigned dim_num, bool* a_contains_b); -template bool overlap( - const uint8_t* a, const uint8_t* b, unsigned dim_num, bool* a_contains_b); -template bool overlap( - const int16_t* a, const int16_t* b, unsigned dim_num, bool* a_contains_b); -template bool overlap( - const uint16_t* a, const uint16_t* b, unsigned dim_num, bool* a_contains_b); -template bool overlap( - const int* a, const int* b, unsigned dim_num, bool* a_contains_b); -template bool overlap( - const unsigned* a, const unsigned* b, unsigned dim_num, bool* a_contains_b); -template bool overlap( - const int64_t* a, const int64_t* b, unsigned dim_num, bool* a_contains_b); -template bool overlap( - const uint64_t* a, const uint64_t* b, unsigned dim_num, bool* a_contains_b); -template bool overlap( - const float* a, const float* b, unsigned dim_num, bool* a_contains_b); -template bool overlap( - const double* a, const double* b, unsigned dim_num, bool* a_contains_b); - template void overlap( const int* a, const int* b, unsigned dim_num, int* o, bool* overlap); template void overlap( @@ -1006,27 +888,6 @@ template void overlap( uint64_t* o, bool* overlap); -template bool overlap( - const std::vector& a, const int8_t* b); -template bool overlap( - const std::vector& a, const uint8_t* b); -template bool overlap( - const std::vector& a, const int16_t* b); -template bool overlap( - const std::vector& a, const uint16_t* b); -template bool overlap( - const std::vector& a, const int32_t* b); -template bool overlap( - const std::vector& a, const uint32_t* b); -template bool overlap( - const std::vector& a, const int64_t* b); -template bool overlap( - const std::vector& a, const uint64_t* b); -template bool overlap( - const std::vector& a, const float* b); -template bool overlap( - const std::vector& a, const double* b); - template double coverage( const int8_t* a, const int8_t* b, unsigned dim_num); template double coverage( diff --git a/tiledb/sm/misc/utils.h b/tiledb/sm/misc/utils.h index bc2c205043a4..113e1ab8f177 100644 --- a/tiledb/sm/misc/utils.h +++ b/tiledb/sm/misc/utils.h @@ -244,63 +244,10 @@ template bool coords_in_rect( const T* coords, const std::vector& rect, unsigned int dim_num); -/** - * Checks if `rect_a` is inside `rect_b`. - * - * @tparam T The domain type. - * @param rect_a The first rectangle. - * @param rect_b The second rectangle. - * @param dim_num The number of dimensions. - * @return `true` if `rect_a` is inside `rect_b` and `false` otherwise. - */ -template -bool rect_in_rect(const T* rect_a, const T* rect_b, unsigned int dim_num); - -/** - * Computes the union of a set of MBRs (rectangles). - * - * @tparam T The domain type. - * @param dim_num The number of dimensions. - * @param mbrs The start of the MBRs whose union to compute. - * @param mbr_num The number of MBRs serialized one after the - * other and starting at ``mbrs``. - * @param mbr_union The MBR union to be computed. - */ -template -void compute_mbr_union( - unsigned dim_num, const T* mbrs, uint64_t mbr_num, T* mbr_union); - -/** - * Expands `mbr_a` so that it encompasses `mbr_b`. - * - * @tparam T The type of the MBR and coordinates. - * @param mbr_a The MBR to be expanded. - * @param mbr_b The MBR used to expnad `mbr_a`. - * @param dim_num The number of dimensions of the MBRs. - * @return void - */ -template -void expand_mbr_with_mbr(T* mbr_a, const T* mbr_b, unsigned int dim_num); - /** Returns *true* if hyper-rectangle `a` overlaps with `b`. */ template bool overlap(const T* a, const T* b, unsigned dim_num); -/** - * Checks whether two hyper-rectangles overlap, and determines whether - * the first rectangle contains the second. - * - * @tparam T The type of the rectangles. - * @param a The first rectangle. - * @param b The second rectangle. - * @param dim_num The number of dimensions. - * @param a_contains_b Determines whether the first rectangle contains the - * second. - * @return `True` if the rectangles overlap, and `false` otherwise. - */ -template -bool overlap(const T* a, const T* b, unsigned dim_num, bool* a_contains_b); - /** * Computes the overlap between two rectangles. * @@ -313,14 +260,6 @@ bool overlap(const T* a, const T* b, unsigned dim_num, bool* a_contains_b); template void overlap(const T* a, const T* b, unsigned dim_num, T* o, bool* overlap); -/** - * Returns *true* if hyper-rectangle `a` overlaps with `b`. - * `a` is vector of [low, high] pairs, one per dimension, whereas `b` - * is a flattened array of [low,high] pairs one per dimension. - */ -template -bool overlap(const std::vector& a, const T* b); - /** * Returns the percentage of coverage of hyper-rectangle `a` in `b`. * Note that the function assumes that `a` is fully contained in `b`. diff --git a/tiledb/sm/query/reader.cc b/tiledb/sm/query/reader.cc index 372b24c46bba..52305944c26f 100644 --- a/tiledb/sm/query/reader.cc +++ b/tiledb/sm/query/reader.cc @@ -328,7 +328,7 @@ Status Reader::read() { if (array_schema_->dense() && !sparse_mode_) { RETURN_NOT_OK(dense_read()); } else { - RETURN_NOT_OK(sparse_read()); + RETURN_NOT_OK(sparse_read()); } // In the case of overflow, we need to split the current partition @@ -801,7 +801,6 @@ Status Reader::compute_subarray_coords( return Status::Ok(); } -template Status Reader::compute_sparse_result_tiles( std::vector* result_tiles, std::map, size_t>* result_tile_map, @@ -1270,8 +1269,6 @@ void Reader::compute_result_cell_slabs_global( } } -// TODO: remove template -template Status Reader::compute_result_coords( std::vector* result_tiles, std::vector* result_coords) { @@ -1281,7 +1278,7 @@ Status Reader::compute_result_coords( std::vector single_fragment; // TODO: remove template - RETURN_CANCEL_OR_ERROR(compute_sparse_result_tiles( + RETURN_CANCEL_OR_ERROR(compute_sparse_result_tiles( result_tiles, &result_tile_map, &single_fragment)); if (result_tiles->empty()) @@ -1358,7 +1355,7 @@ Status Reader::dense_read() { // sparse fragments std::vector result_coords; std::vector sparse_result_tiles; - RETURN_NOT_OK(compute_result_coords(&sparse_result_tiles, &result_coords)); + RETURN_NOT_OK(compute_result_coords(&sparse_result_tiles, &result_coords)); // Compute result cell slabs. // `result_space_tiles` will hold all the relevant result tiles of @@ -1926,7 +1923,6 @@ Status Reader::sort_result_coords( STATS_FUNC_OUT(reader_sort_coords); } -template Status Reader::sparse_read() { STATS_FUNC_IN(reader_sparse_read); @@ -1935,7 +1931,7 @@ Status Reader::sparse_read() { // sparse fragments std::vector result_coords; std::vector sparse_result_tiles; - RETURN_NOT_OK(compute_result_coords(&sparse_result_tiles, &result_coords)); + RETURN_NOT_OK(compute_result_coords(&sparse_result_tiles, &result_coords)); std::vector result_tiles; for (auto& srt : sparse_result_tiles) result_tiles.push_back(&srt); diff --git a/tiledb/sm/query/reader.h b/tiledb/sm/query/reader.h index 6f7ee3a82c65..e7f2f0ba7235 100644 --- a/tiledb/sm/query/reader.h +++ b/tiledb/sm/query/reader.h @@ -630,7 +630,6 @@ class Reader { * track of the unique result tile info for subarray ranges that overlap * with common tiles. * - * @tparam T The coords type. * @param result_tiles The result tiles to be computed. * @param result_tile_map The result tile map to be computed. * @param single_fragment Each element corresponds to a range of the @@ -638,7 +637,6 @@ class Reader { * tiles come from a single fragment for that range. * @return Status */ - template Status compute_sparse_result_tiles( std::vector* result_tiles, std::map, size_t>* result_tile_map, @@ -741,7 +739,6 @@ class Reader { * @param result_tiles This will store the unique result tiles. * @param result_coords This will store the result coordinates. */ - template Status compute_result_coords( std::vector* result_tiles, std::vector* result_coords); @@ -965,13 +962,7 @@ class Reader { Status sort_result_coords( std::vector* result_coords, Layout layout) const; - /** - * Performs a read on a sparse array. - * - * @tparam The domain type. - * @return Status - */ - template + /** Performs a read on a sparse array. */ Status sparse_read(); /** Zeroes out the user buffer sizes, indicating an empty result. */ diff --git a/tiledb/sm/storage_manager/consolidator.cc b/tiledb/sm/storage_manager/consolidator.cc index 2668f14f62f7..c8e39ee93dbb 100644 --- a/tiledb/sm/storage_manager/consolidator.cc +++ b/tiledb/sm/storage_manager/consolidator.cc @@ -208,106 +208,40 @@ bool Consolidator::all_sparse( size_t start, size_t end) const { for (size_t i = start; i <= end; ++i) { - if (!fragments[i].sparse_) + if (!fragments[i].sparse()) return false; } return true; } -template bool Consolidator::are_consolidatable( + const Domain* domain, const std::vector& fragments, size_t start, size_t end, - const T* union_non_empty_domains, - unsigned dim_num) const { + const NDRange& union_non_empty_domains) const { // True if all fragments in [start, end] are sparse if (all_sparse(fragments, start, end)) return true; // Check overlap of union with earlier fragments for (size_t i = 0; i < start; ++i) { - if (utils::geometry::overlap( - union_non_empty_domains, - (T*)&fragments[i].non_empty_domain_[0], - dim_num)) + if (domain->overlap( + union_non_empty_domains, fragments[i].non_empty_domain())) return false; } // Check consolidation amplification factor - auto union_cell_num = - utils::geometry::cell_num(union_non_empty_domains, dim_num); + auto union_cell_num = domain->cell_num(union_non_empty_domains); uint64_t sum_cell_num = 0; for (size_t i = start; i <= end; ++i) { - // fragments(StorageManager) - sum_cell_num += utils::geometry::cell_num( - (T*)&fragments[i].expanded_non_empty_domain_[0], dim_num); + sum_cell_num += domain->cell_num(fragments[i].expanded_non_empty_domain()); } return (double(union_cell_num) / sum_cell_num) <= config_.amplification_; } -Status Consolidator::consolidate( - const ArraySchema* array_schema, - EncryptionType encryption_type, - const void* encryption_key, - uint32_t key_length) { - switch (array_schema->coords_type()) { - case Datatype::INT32: - return consolidate( - array_schema, encryption_type, encryption_key, key_length); - case Datatype::INT64: - return consolidate( - array_schema, encryption_type, encryption_key, key_length); - case Datatype::INT8: - return consolidate( - array_schema, encryption_type, encryption_key, key_length); - case Datatype::UINT8: - return consolidate( - array_schema, encryption_type, encryption_key, key_length); - case Datatype::INT16: - return consolidate( - array_schema, encryption_type, encryption_key, key_length); - case Datatype::UINT16: - return consolidate( - array_schema, encryption_type, encryption_key, key_length); - case Datatype::UINT32: - return consolidate( - array_schema, encryption_type, encryption_key, key_length); - case Datatype::UINT64: - return consolidate( - array_schema, encryption_type, encryption_key, key_length); - case Datatype::FLOAT32: - return consolidate( - array_schema, encryption_type, encryption_key, key_length); - case Datatype::FLOAT64: - return consolidate( - array_schema, encryption_type, encryption_key, key_length); - case Datatype::DATETIME_YEAR: - case Datatype::DATETIME_MONTH: - case Datatype::DATETIME_WEEK: - case Datatype::DATETIME_DAY: - case Datatype::DATETIME_HR: - case Datatype::DATETIME_MIN: - case Datatype::DATETIME_SEC: - case Datatype::DATETIME_MS: - case Datatype::DATETIME_US: - case Datatype::DATETIME_NS: - case Datatype::DATETIME_PS: - case Datatype::DATETIME_FS: - case Datatype::DATETIME_AS: - return consolidate( - array_schema, encryption_type, encryption_key, key_length); - default: - return LOG_STATUS( - Status::ConsolidatorError("Cannot consolidate; Invalid domain type")); - } - - return Status::Ok(); -} - -template Status Consolidator::consolidate( const ArraySchema* array_schema, EncryptionType encryption_type, @@ -318,14 +252,6 @@ Status Consolidator::consolidate( auto array_uri = array_schema->array_uri(); EncryptionKey enc_key; RETURN_NOT_OK(enc_key.set_key(encryption_type, encryption_key, key_length)); - auto domain_size = 2 * array_schema->coords_size(); - - // Allocate memory for union of non-empty domains of fragments to consolidate - std::unique_ptr union_non_empty_domains( - new uint8_t[domain_size]()); - if (union_non_empty_domains == nullptr) - return LOG_STATUS(Status::ConsolidatorError( - "Cannot consolidate; memory allocation failed")); // Get fragment info std::vector fragment_info; @@ -333,7 +259,7 @@ Status Consolidator::consolidate( array_schema, timestamp, enc_key, &fragment_info)); // First make a pass and delete any entirely overwritten fragments - RETURN_NOT_OK(delete_overwritten_fragments(array_schema, &fragment_info)); + RETURN_NOT_OK(delete_overwritten_fragments(array_schema, &fragment_info)); uint32_t step = 0; do { @@ -342,11 +268,12 @@ Status Consolidator::consolidate( break; // Find the next fragments to be consolidated - RETURN_NOT_OK(compute_next_to_consolidate( + NDRange union_non_empty_domains; + RETURN_NOT_OK(compute_next_to_consolidate( array_schema, fragment_info, &to_consolidate, - (T*)union_non_empty_domains.get())); + &union_non_empty_domains)); // Check if there is anything to consolidate if (to_consolidate.size() <= 1) @@ -354,10 +281,10 @@ Status Consolidator::consolidate( // Consolidate the selected fragments URI new_fragment_uri; - RETURN_NOT_OK(consolidate( + RETURN_NOT_OK(consolidate( array_uri, to_consolidate, - (T*)union_non_empty_domains.get(), + union_non_empty_domains, encryption_type, encryption_key, key_length, @@ -379,11 +306,10 @@ Status Consolidator::consolidate( return Status::Ok(); } -template Status Consolidator::consolidate( const URI& array_uri, const std::vector& to_consolidate, - T* union_non_empty_domains, + const NDRange& union_non_empty_domains, EncryptionType encryption_type, const void* encryption_key, uint32_t key_length, @@ -417,9 +343,6 @@ Status Consolidator::consolidate( bool all_sparse = this->all_sparse(to_consolidate, 0, to_consolidate.size() - 1); - // Compute layout and subarray - void* subarray = (all_sparse) ? nullptr : union_non_empty_domains; - // Prepare buffers void** buffers; uint64_t* buffer_sizes; @@ -439,7 +362,7 @@ Status Consolidator::consolidate( &array_for_reads, &array_for_writes, all_sparse, - subarray, + union_non_empty_domains, buffers, buffer_sizes, &query_r, @@ -497,7 +420,7 @@ Status Consolidator::consolidate( std::vector to_delete; for (const auto& f : to_consolidate) - to_delete.emplace_back(f.uri_); + to_delete.emplace_back(f.uri()); // Delete old fragment metadata. This makes the old fragments invisible st = delete_fragment_metadata(array_uri, to_delete); @@ -590,18 +513,31 @@ Status Consolidator::create_queries( Array* array_for_reads, Array* array_for_writes, bool sparse_mode, - void* subarray, + const NDRange& subarray, void** buffers, uint64_t* buffer_sizes, Query** query_r, Query** query_w, URI* new_fragment_uri) { + // Create subarray to set + std::vector s; + if (!sparse_mode) { + auto domain_size = 2 * array_for_reads->array_schema()->coords_size(); + s.resize(domain_size); + uint64_t offset = 0; + for (const auto& r : subarray) { + std::memcpy(&s[offset], r.data(), r.size()); + offset += r.size(); + } + } + void* to_set = (s.empty()) ? nullptr : &s[0]; + // Create read query *query_r = new Query(storage_manager_, array_for_reads); RETURN_NOT_OK((*query_r)->set_layout(Layout::GLOBAL_ORDER)); RETURN_NOT_OK( set_query_buffers(*query_r, sparse_mode, buffers, buffer_sizes)); - RETURN_NOT_OK((*query_r)->set_subarray(subarray, true)); + RETURN_NOT_OK((*query_r)->set_subarray(to_set, true)); if (array_for_reads->array_schema()->dense() && sparse_mode) RETURN_NOT_OK((*query_r)->set_sparse_mode(true)); @@ -613,7 +549,7 @@ Status Consolidator::create_queries( // Create write query *query_w = new Query(storage_manager_, array_for_writes, *new_fragment_uri); RETURN_NOT_OK((*query_w)->set_layout(Layout::GLOBAL_ORDER)); - RETURN_NOT_OK((*query_w)->set_subarray(subarray, true)); + RETURN_NOT_OK((*query_w)->set_subarray(to_set, true)); RETURN_NOT_OK( set_query_buffers(*query_w, sparse_mode, buffers, buffer_sizes)); @@ -641,7 +577,6 @@ Status Consolidator::delete_fragments(const std::vector& fragments) { return Status::Ok(); } -template Status Consolidator::delete_overwritten_fragments( const ArraySchema* array_schema, std::vector* fragments) { // Trivial case @@ -653,22 +588,19 @@ Status Consolidator::delete_overwritten_fragments( return Status::Ok(); // Find which fragments to delete - auto dim_num = array_schema->dim_num(); + auto domain = array_schema->domain(); std::vector to_delete; std::list updated; for (auto f : *fragments) updated.emplace_back(f); for (auto cur = updated.rbegin(); cur != updated.rend(); ++cur) { - if (cur->sparse_) + if (cur->sparse()) continue; for (auto check = updated.begin(); - check->uri_.to_string() != cur->uri_.to_string();) { - if (utils::geometry::rect_in_rect( - (T*)&check->non_empty_domain_[0], - (T*)&cur->non_empty_domain_[0], - dim_num)) { - to_delete.emplace_back(check->uri_); + check->uri().to_string() != cur->uri().to_string();) { + if (domain->covered(check->non_empty_domain(), cur->non_empty_domain())) { + to_delete.emplace_back(check->uri()); check = updated.erase(check); } else { ++check; @@ -700,15 +632,12 @@ void Consolidator::free_buffers( delete[] buffer_sizes; } -template Status Consolidator::compute_next_to_consolidate( const ArraySchema* array_schema, const std::vector& fragments, std::vector* to_consolidate, - T* union_non_empty_domains) const { + NDRange* union_non_empty_domains) const { // Preparation - auto domain_size = 2 * array_schema->coords_size(); - auto dim_num = array_schema->dim_num(); auto domain = array_schema->domain(); to_consolidate->clear(); auto min = config_.min_frags_; @@ -726,7 +655,7 @@ Status Consolidator::compute_next_to_consolidate( // stores the sum of fragment sizes, and the other the union of the // corresponding non-empty domains of the fragments. std::vector> m_sizes; - std::vector>> m_union; + std::vector> m_union; auto col_num = fragments.size(); auto row_num = max; m_sizes.resize(row_num); @@ -735,8 +664,6 @@ Status Consolidator::compute_next_to_consolidate( m_union.resize(row_num); for (auto& row : m_union) { row.resize(col_num); - for (auto& col : row) - col.resize(domain_size); } // Entry m[i][j] contains the collective size of fragments @@ -750,27 +677,23 @@ Status Consolidator::compute_next_to_consolidate( for (size_t i = 0; i < row_num; ++i) { for (size_t j = 0; j < col_num; ++j) { if (i == 0) { // In the first row we store the sizes of `fragments` - m_sizes[i][j] = fragments[j].fragment_size_; - std::memcpy( - &m_union[i][j][0], &fragments[j].non_empty_domain_[0], domain_size); + m_sizes[i][j] = fragments[j].fragment_size(); + m_union[i][j] = fragments[j].non_empty_domain(); } else if (i + j >= col_num) { // Non-valid entries m_sizes[i][j] = UINT64_MAX; m_union[i][j].clear(); m_union[i][j].shrink_to_fit(); } else { // Every other row is computed using the previous row - auto ratio = (float)fragments[i + j - 1].fragment_size_ / - fragments[i + j].fragment_size_; + auto ratio = (float)fragments[i + j - 1].fragment_size() / + fragments[i + j].fragment_size(); ratio = (ratio <= 1.0f) ? ratio : 1.0f / ratio; if (ratio >= size_ratio && (m_sizes[i - 1][j] != UINT64_MAX)) { - m_sizes[i][j] = m_sizes[i - 1][j] + fragments[i + j].fragment_size_; - std::memcpy(&m_union[i][j][0], &m_union[i - 1][j][0], domain_size); - utils::geometry::expand_mbr_with_mbr( - (T*)&m_union[i][j][0], - (const T*)&fragments[i + j].non_empty_domain_[0], - dim_num); - domain->expand_domain((T*)&m_union[i][j][0]); - if (!are_consolidatable( - fragments, j, j + i, (const T*)&m_union[i][j][0], dim_num)) { + m_sizes[i][j] = m_sizes[i - 1][j] + fragments[i + j].fragment_size(); + m_union[i][j] = m_union[i - 1][j]; + domain->expand_ndrange( + fragments[i + j].non_empty_domain(), &m_union[i][j]); + domain->expand_to_tiles(&m_union[i][j]); + if (!are_consolidatable(domain, fragments, j, j + i, m_union[i][j])) { // Mark this entry as invalid m_sizes[i][j] = UINT64_MAX; m_union[i][j].clear(); @@ -812,7 +735,15 @@ Status Consolidator::compute_next_to_consolidate( // Results found for (size_t f = min_col; f <= min_col + i; ++f) to_consolidate->emplace_back(fragments[f]); - std::memcpy(union_non_empty_domains, &m_union[i][min_col][0], domain_size); + *union_non_empty_domains = m_union[i][min_col]; + + std::cout << "union_non_empty_domains: "; + for (auto r : *union_non_empty_domains) { + auto d = (const uint64_t*)r.data(); + std::cout << d[0] << " " << d[1]; + } + std::cout << "\n"; + break; } @@ -894,7 +825,8 @@ void Consolidator::update_fragment_info( while (fragment_it != fragment_info->end()) { // No match - add the fragment info and advance `fragment_it` if (to_consolidate_it == to_consolidate.end() || - fragment_it->uri_.to_string() != to_consolidate_it->uri_.to_string()) { + fragment_it->uri().to_string() != + to_consolidate_it->uri().to_string()) { updated_fragment_info.emplace_back(*fragment_it); ++fragment_it; } else { // Match - add new fragment only once and advance both iterators diff --git a/tiledb/sm/storage_manager/consolidator.h b/tiledb/sm/storage_manager/consolidator.h index 440d1c6650bf..fe80ef3fabda 100644 --- a/tiledb/sm/storage_manager/consolidator.h +++ b/tiledb/sm/storage_manager/consolidator.h @@ -197,7 +197,7 @@ class Consolidator { * fragments is more than an amplification factor larger than the * sum of sizes of the separate fragment non-empty domains. * - * @tparam T The domain type. + * @param domain The array domain. * @param fragments The input fragments. * @param start The function will focus on fragments between * positions `start` and `end`. @@ -205,17 +205,15 @@ class Consolidator { * positions `start` and `end`. * @param union_non_empty_domains The union of the non-empty domains of * the fragments between `start` and `end`. - * @param dim_num The number of domain dimensions. * @return `True` if the fragments between `start` and `end` can be * consolidated based on the above definition. */ - template bool are_consolidatable( + const Domain* domain, const std::vector& fragments, size_t start, size_t end, - const T* union_non_empty_domains, - unsigned dim_num) const; + const NDRange& union_non_empty_domains) const; /** * Consolidates the fragments of the input array. @@ -233,29 +231,10 @@ class Consolidator { const void* encryption_key, uint32_t key_length); - /** - * Consolidates the fragments of the input array. - * - * @param T The domain type. - * @param array_schema The schema of the array to consolidate. - * @param encryption_type The encryption type of the array - * @param encryption_key If the array is encrypted, the private encryption - * key. For unencrypted arrays, pass `nullptr`. - * @param key_length The length in bytes of the encryption key. - * @return Status - */ - template - Status consolidate( - const ArraySchema* array_schema, - EncryptionType encryption_type, - const void* encryption_key, - uint32_t key_length); - /** * Consolidates the input fragments of the input array. This function * implements a single consolidation step. * - * @tparam T The domain type. * @param array_uri URI of array to consolidate. * @param to_consolidate The fragments to consolidate in this consolidation * step. @@ -270,11 +249,10 @@ class Consolidator { * consolidating the `to_consolidate` fragments. * @return Status */ - template Status consolidate( const URI& array_uri, const std::vector& to_consolidate, - T* union_non_empty_domains, + const NDRange& union_non_empty_domains, EncryptionType encryption_type, const void* encryption_key, uint32_t key_length, @@ -342,7 +320,7 @@ class Consolidator { Array* array_for_reads, Array* array_for_writes, bool sparse_mode, - void* subarray, + const NDRange& subarray, void** buffers, uint64_t* buffer_sizes, Query** query_r, @@ -375,7 +353,6 @@ class Consolidator { * included in the non-empty domain of a later dense fragment. * This is applicable only to dense arrays. * - * @tparam T The domain type. * @param array_schema The array schema. * @param fragments Fragment information that will help in identifying * which fragments to delete. If a fragment gets deleted by the @@ -383,7 +360,6 @@ class Consolidator { * from this vector. * @return Status */ - template Status delete_overwritten_fragments( const ArraySchema* array_schema, std::vector* fragments); @@ -401,7 +377,6 @@ class Consolidator { * Based on the input fragment info, this algorithm decides the (sorted) list * of fragments to be consolidated in the next consolidation step. * - * @tparam T The domain type. * @param array_schema The array schema. * @param fragments Information about all the fragments. * @param to_consolidate The fragments to consolidate in the next step. @@ -409,12 +384,11 @@ class Consolidator { * union of the non-empty domains of the fragments in `to_consolidate`. * @return Status */ - template Status compute_next_to_consolidate( const ArraySchema* array_schema, const std::vector& fragments, std::vector* to_consolidate, - T* union_non_empty_domains) const; + NDRange* union_non_empty_domains) const; /** * The new fragment URI is computed diff --git a/tiledb/sm/storage_manager/storage_manager.cc b/tiledb/sm/storage_manager/storage_manager.cc index 9cfe972776eb..fb97a01f9568 100644 --- a/tiledb/sm/storage_manager/storage_manager.cc +++ b/tiledb/sm/storage_manager/storage_manager.cc @@ -253,7 +253,7 @@ Status StorageManager::array_open_for_reads( // Determine which fragments to load std::vector fragments_to_load; for (const auto& fragment : fragments) - fragments_to_load.emplace_back(fragment.uri_, fragment.timestamp_range_); + fragments_to_load.emplace_back(fragment.uri(), fragment.timestamp_range()); // Get fragment metadata in the case of reads, if not fetched already Status st = load_fragment_metadata( @@ -759,35 +759,21 @@ Status StorageManager::get_fragment_info( if (fragment_metadata.empty()) return array_close_for_reads(array_uri); - uint64_t domain_size = 2 * array_schema->coords_size(); - auto dim_num = array_schema->dim_num(); for (auto meta : fragment_metadata) { const auto& uri = meta->fragment_uri(); bool sparse = !meta->dense(); - std::vector non_empty_domain; - non_empty_domain.resize(domain_size); - - // Get fragment non-empty domain - const auto meta_non_empty_domain = meta->non_empty_domain(); - auto non_empty_domain_ptr = (unsigned char*)&non_empty_domain[0]; - for (unsigned d = 0; d < dim_num; ++d) { - auto range_size = 2 * array_schema->dimension(d)->coord_size(); - std::memcpy( - non_empty_domain_ptr, meta_non_empty_domain[d].data(), range_size); - non_empty_domain_ptr += range_size; - } - // Get fragment size uint64_t size; RETURN_NOT_OK_ELSE( meta->fragment_size(&size), array_close_for_reads(array_uri)); - // Compute expanded non-empty domain only for dense fragments + // Get non-empty domain, and compute expanded non-empty domain + // (only for dense fragments) + const auto& non_empty_domain = meta->non_empty_domain(); auto expanded_non_empty_domain = non_empty_domain; if (!sparse) - array_schema->domain()->expand_domain( - (void*)&expanded_non_empty_domain[0]); + array_schema->domain()->expand_to_tiles(&expanded_non_empty_domain); // Push new fragment info fragment_info->push_back(FragmentInfo( @@ -854,23 +840,13 @@ Status StorageManager::get_fragment_info( uint64_t size; RETURN_NOT_OK(meta.fragment_size(&size)); - uint64_t domain_size = 2 * array_schema->coords_size(); - std::vector non_empty_domain; - non_empty_domain.resize(domain_size); - const auto meta_non_empty_domain = meta.non_empty_domain(); - auto non_empty_domain_ptr = (unsigned char*)&non_empty_domain[0]; - auto dim_num = array_schema->dim_num(); - for (unsigned d = 0; d < dim_num; ++d) { - auto range_size = 2 * array_schema->dimension(d)->coord_size(); - std::memcpy( - non_empty_domain_ptr, meta_non_empty_domain[d].data(), range_size); - non_empty_domain_ptr += range_size; - } - // Compute expanded non-empty domain only for dense fragments + // Get non-empty domain, and compute expanded non-empty domain + // (only for dense fragments) + const auto& non_empty_domain = meta.non_empty_domain(); auto expanded_non_empty_domain = non_empty_domain; if (!sparse) - array_schema->domain()->expand_domain((void*)&expanded_non_empty_domain[0]); + array_schema->domain()->expand_to_tiles(&expanded_non_empty_domain); // Set fragment info *fragment_info = FragmentInfo(