From 11f37eb2a3436f863a37b26959f4396fd10808f4 Mon Sep 17 00:00:00 2001 From: Stavros Papadopoulos Date: Tue, 3 Mar 2020 11:27:01 -0500 Subject: [PATCH] More cleanups towards addressing #93 --- tiledb/sm/array/array.cc | 99 ++++---------- tiledb/sm/array/array.h | 20 --- tiledb/sm/array_schema/array_schema.cc | 52 +++----- tiledb/sm/array_schema/array_schema.h | 9 -- tiledb/sm/array_schema/domain.cc | 57 ++++---- tiledb/sm/array_schema/domain.h | 23 ++-- tiledb/sm/c_api/tiledb.cc | 19 ++- tiledb/sm/enums/datatype.h | 5 + tiledb/sm/fragment/fragment_metadata.cc | 170 ++++++++++++++---------- tiledb/sm/fragment/fragment_metadata.h | 28 +++- tiledb/sm/query/reader.cc | 8 +- tiledb/sm/query/writer.cc | 9 +- tiledb/sm/rest/rest_client.cc | 2 +- tiledb/sm/serialization/array_schema.cc | 12 +- tiledb/sm/subarray/cell_slab_iter.cc | 12 +- tiledb/sm/subarray/subarray.cc | 17 ++- tiledb/sm/subarray/subarray.h | 3 - tools/src/commands/info_command.cc | 5 +- 18 files changed, 270 insertions(+), 280 deletions(-) diff --git a/tiledb/sm/array/array.cc b/tiledb/sm/array/array.cc index d4517ef6cf0..4603599e2a8 100644 --- a/tiledb/sm/array/array.cc +++ b/tiledb/sm/array/array.cc @@ -33,6 +33,7 @@ #include "tiledb/sm/array/array.h" #include "tiledb/sm/array_schema/array_schema.h" #include "tiledb/sm/array_schema/attribute.h" +#include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/array_schema/domain.h" #include "tiledb/sm/crypto/crypto.h" #include "tiledb/sm/enums/datatype.h" @@ -670,8 +671,16 @@ void Array::clear_last_max_buffer_sizes() { } Status Array::compute_max_buffer_sizes(const void* subarray) { + // Applicable only to domains where all dimensions have the same type + if (!array_schema_->domain()->all_dims_same_type()) + return LOG_STATUS( + Status::ArrayError("Cannot compute max buffer sizes; Inapplicable when " + "dimension domains have different types")); + // Allocate space for max buffer sizes subarray - auto subarray_size = 2 * array_schema_->coords_size(); + auto dim_num = array_schema_->dim_num(); + auto coord_size = array_schema_->domain()->dimension(0)->coord_size(); + auto subarray_size = 2 * dim_num * coord_size; if (last_max_buffer_sizes_subarray_ == nullptr) { last_max_buffer_sizes_subarray_ = std::malloc(subarray_size); if (last_max_buffer_sizes_subarray_ == nullptr) @@ -719,83 +728,31 @@ Status Array::compute_max_buffer_sizes( if (fragment_metadata_.empty()) return Status::Ok(); - // Compute buffer sizes - switch (array_schema_->coords_type()) { - case Datatype::INT32: - return compute_max_buffer_sizes( - static_cast(subarray), buffer_sizes); - case Datatype::INT64: - return compute_max_buffer_sizes( - static_cast(subarray), buffer_sizes); - case Datatype::FLOAT32: - return compute_max_buffer_sizes( - static_cast(subarray), buffer_sizes); - case Datatype::FLOAT64: - return compute_max_buffer_sizes( - static_cast(subarray), buffer_sizes); - case Datatype::INT8: - return compute_max_buffer_sizes( - static_cast(subarray), buffer_sizes); - case Datatype::UINT8: - return compute_max_buffer_sizes( - static_cast(subarray), buffer_sizes); - case Datatype::INT16: - return compute_max_buffer_sizes( - static_cast(subarray), buffer_sizes); - case Datatype::UINT16: - return compute_max_buffer_sizes( - static_cast(subarray), buffer_sizes); - case Datatype::UINT32: - return compute_max_buffer_sizes( - static_cast(subarray), buffer_sizes); - case Datatype::UINT64: - return compute_max_buffer_sizes( - static_cast(subarray), buffer_sizes); - case Datatype::DATETIME_YEAR: - case Datatype::DATETIME_MONTH: - case Datatype::DATETIME_WEEK: - case Datatype::DATETIME_DAY: - case Datatype::DATETIME_HR: - case Datatype::DATETIME_MIN: - case Datatype::DATETIME_SEC: - case Datatype::DATETIME_MS: - case Datatype::DATETIME_US: - case Datatype::DATETIME_NS: - case Datatype::DATETIME_PS: - case Datatype::DATETIME_FS: - case Datatype::DATETIME_AS: - return compute_max_buffer_sizes( - static_cast(subarray), buffer_sizes); - default: - return LOG_STATUS(Status::ArrayError( - "Cannot compute max read buffer sizes; Invalid coordinates type")); - } - - return Status::Ok(); -} - -template -Status Array::compute_max_buffer_sizes( - const T* subarray, - std::unordered_map>* - max_buffer_sizes) const { - // Sanity check - assert(!fragment_metadata_.empty()); - // First we calculate a rough upper bound. Especially for dense // arrays, this will not be accurate, as it accounts only for the // non-empty regions of the subarray. for (auto& meta : fragment_metadata_) - RETURN_NOT_OK(meta->add_max_buffer_sizes( - encryption_key_, subarray, max_buffer_sizes)); + RETURN_NOT_OK( + meta->add_max_buffer_sizes(encryption_key_, subarray, buffer_sizes)); + + // Prepare an NDRange for the subarray + auto dim_num = array_schema_->dim_num(); + NDRange sub(dim_num); + auto sub_ptr = (const unsigned char*)subarray; + uint64_t offset = 0; + for (unsigned d = 0; d < dim_num; ++d) { + auto r_size = 2 * array_schema_->dimension(d)->coord_size(); + sub[d] = Range(&sub_ptr[offset], r_size); + offset += r_size; + } // Rectify bound for dense arrays if (array_schema_->dense()) { - auto cell_num = array_schema_->domain()->cell_num(subarray); + auto cell_num = array_schema_->domain()->cell_num(sub); // `cell_num` becomes 0 when `subarray` is huge, leading to a // `uint64_t` overflow. if (cell_num != 0) { - for (auto& it : *max_buffer_sizes) { + for (auto& it : *buffer_sizes) { if (array_schema_->var_size(it.first)) { it.second.first = cell_num * constants::cell_var_offset_size; it.second.second += @@ -809,12 +766,12 @@ Status Array::compute_max_buffer_sizes( // Rectify bound for sparse arrays with integer domain, without duplicates if (!array_schema_->dense() && !array_schema_->allows_dups() && - datatype_is_integer(array_schema_->domain()->type())) { - auto cell_num = array_schema_->domain()->cell_num(subarray); + array_schema_->domain()->all_dims_int()) { + auto cell_num = array_schema_->domain()->cell_num(sub); // `cell_num` becomes 0 when `subarray` is huge, leading to a // `uint64_t` overflow. if (cell_num != 0) { - for (auto& it : *max_buffer_sizes) { + for (auto& it : *buffer_sizes) { if (!array_schema_->var_size(it.first)) { // Check for overflow uint64_t new_size = cell_num * array_schema_->cell_size(it.first); diff --git a/tiledb/sm/array/array.h b/tiledb/sm/array/array.h index 4cd20c60bff..07ac6feccab 100644 --- a/tiledb/sm/array/array.h +++ b/tiledb/sm/array/array.h @@ -394,26 +394,6 @@ class Array { std::unordered_map>* max_buffer_sizes_) const; - /** - * Computes an upper bound on the buffer sizes required for a read - * query, for a given subarray and set of attributes. Note that - * the attributes are already set in `max_buffer_sizes` - * - * @tparam T The domain type - * @param subarray The subarray to focus on. Note that it must have the same - * underlying type as the array domain. - * @param max_buffer_sizes The buffer sizes to be retrieved. This is a map - * from an attribute to a size pair. For fixed-sized attributes, only - * the first size is useful. For var-sized attributes, the first size - * is the offsets size, and the second size is the values size. - * @return Status - */ - template - Status compute_max_buffer_sizes( - const T* subarray, - std::unordered_map>* - max_buffer_sizes) const; - /** * Load array metadata, handles remote arrays vs non-remote arrays * @return Status diff --git a/tiledb/sm/array_schema/array_schema.cc b/tiledb/sm/array_schema/array_schema.cc index 6f9d84841cc..3248b007995 100644 --- a/tiledb/sm/array_schema/array_schema.cc +++ b/tiledb/sm/array_schema/array_schema.cc @@ -87,7 +87,6 @@ ArraySchema::ArraySchema(const ArraySchema* array_schema) { cell_order_ = array_schema->cell_order_; cell_var_offsets_filters_ = array_schema->cell_var_offsets_filters_; coords_filters_ = array_schema->coords_filters_; - coords_size_ = array_schema->coords_size_; tile_order_ = array_schema->tile_order_; version_ = array_schema->version_; @@ -174,9 +173,13 @@ Layout ArraySchema::cell_order() const { } uint64_t ArraySchema::cell_size(const std::string& name) const { - // Special zipped coordinates - if (name == constants::coords) - return domain_->dim_num() * datatype_size(coords_type()); + // Special zipped coordinates attribute + if (name == constants::coords) { + auto dim_num = domain_->dim_num(); + assert(dim_num > 0); + auto coord_size = domain_->dimension(0)->coord_size(); + return dim_num * coord_size; + } // Attribute auto attr_it = attribute_map_.find(name); @@ -228,8 +231,8 @@ Status ArraySchema::check() const { "Array schema check failed; No dimensions provided")); if (array_type_ == ArrayType::DENSE) { - if (domain_->type() == Datatype::FLOAT32 || - domain_->type() == Datatype::FLOAT64) { + auto type = domain_->dimension(0)->type(); + if (datatype_is_real(type)) { return LOG_STATUS( Status::ArraySchemaError("Array schema check failed; Dense arrays " "cannot have floating point domains")); @@ -298,14 +301,6 @@ int ArraySchema::coords_compression_level() const { return (compressor == nullptr) ? -1 : compressor->compression_level(); } -uint64_t ArraySchema::coords_size() const { - return coords_size_; -} - -Datatype ArraySchema::coords_type() const { - return domain_->type(); -} - bool ArraySchema::dense() const { return array_type_ == ArrayType::DENSE; } @@ -429,9 +424,9 @@ Layout ArraySchema::tile_order() const { } Datatype ArraySchema::type(const std::string& name) const { - // Special zipped coordinates + // Special zipped coordinates attribute if (name == constants::coords) - return domain_->type(); + return domain_->dimension(0)->type(); // Attribute auto attr_it = attribute_map_.find(name); @@ -582,10 +577,6 @@ Status ArraySchema::init() { // Initialize domain RETURN_NOT_OK(domain_->init(cell_order_, tile_order_)); - // Set cell sizes - // TODO: set upon setting domain - coords_size_ = domain_->dim_num() * datatype_size(coords_type()); - // Success return Status::Ok(); } @@ -626,11 +617,13 @@ Status ArraySchema::set_domain(Domain* domain) { if (array_type_ == ArrayType::DENSE) { RETURN_NOT_OK(domain->set_null_tile_extents_to_range()); - if (domain->type() == Datatype::FLOAT32 || - domain->type() == Datatype::FLOAT64) { - return LOG_STATUS( - Status::ArraySchemaError("Cannot set domain; Dense arrays " - "cannot have floating point domains")); + if (domain->dim_num() > 0) { + auto type = domain->dimension(0)->type(); + if (type == Datatype::FLOAT32 || type == Datatype::FLOAT64) { + return LOG_STATUS( + Status::ArraySchemaError("Cannot set domain; Dense arrays " + "cannot have floating point domains")); + } } } @@ -639,8 +632,7 @@ Status ArraySchema::set_domain(Domain* domain) { domain_ = new Domain(domain); // Potentially change the default coordinates compressor - if ((domain_->type() == Datatype::FLOAT32 || - domain_->type() == Datatype::FLOAT64) && + if (domain_->all_dims_real() && coords_compression() == Compressor::DOUBLE_DELTA) { auto* filter = coords_filters_.get_filter(); assert(filter != nullptr); @@ -686,12 +678,6 @@ bool ArraySchema::check_attribute_dimension_names() const { } bool ArraySchema::check_double_delta_compressor() const { - // Check coordinates - if ((domain_->type() == Datatype::FLOAT32 || - domain_->type() == Datatype::FLOAT64) && - coords_compression() == Compressor::DOUBLE_DELTA) - return false; - // Check attributes for (auto attr : attributes_) { if ((attr->type() == Datatype::FLOAT32 || diff --git a/tiledb/sm/array_schema/array_schema.h b/tiledb/sm/array_schema/array_schema.h index 7431cfe3a89..4feb4347e6b 100644 --- a/tiledb/sm/array_schema/array_schema.h +++ b/tiledb/sm/array_schema/array_schema.h @@ -191,12 +191,6 @@ class ArraySchema { /** Returns the compression level of the coordinates. */ int coords_compression_level() const; - /** Returns the coordinates size. */ - uint64_t coords_size() const; - - /** Returns the type of the coordinates. */ - Datatype coords_type() const; - /** True if the array is dense. */ bool dense() const; @@ -362,9 +356,6 @@ class ArraySchema { /** The filter pipeline run on coordinate tiles. */ FilterPipeline coords_filters_; - /** The size (in bytes) of the coordinates. */ - uint64_t coords_size_; - /** It maps each dimension name to the corresponding dimension object. */ std::unordered_map dim_map_; diff --git a/tiledb/sm/array_schema/domain.cc b/tiledb/sm/array_schema/domain.cc index 8a3d66dd766..7985b543500 100644 --- a/tiledb/sm/array_schema/domain.cc +++ b/tiledb/sm/array_schema/domain.cc @@ -121,27 +121,35 @@ Status Domain::add_dimension(const Dimension* dim) { return Status::Ok(); } -template -uint64_t Domain::cell_num(const T* domain) const { - if (&typeid(T) == &typeid(float) || &typeid(T) == &typeid(double)) - return 0; +bool Domain::all_dims_int() const { + for (const auto& dim : dimensions_) { + if (!datatype_is_integer(dim->type())) + return false; + } - uint64_t cell_num = 1, range, prod; - for (unsigned i = 0; i < dim_num_; ++i) { - // The code below essentially computes - // cell_num *= domain[2 * i + 1] - domain[2 * i] + 1; - // while performing overflow checks - range = domain[2 * i + 1] - domain[2 * i]; - if (range == std::numeric_limits::max()) // overflow - return 0; - ++range; - prod = range * cell_num; - if (prod / range != cell_num) // Overflow - return 0; - cell_num = prod; + return true; +} + +bool Domain::all_dims_real() const { + for (const auto& dim : dimensions_) { + if (!datatype_is_real(dim->type())) + return false; } - return cell_num; + return true; +} + +bool Domain::all_dims_same_type() const { + if (dim_num_ == 0) + return true; + + auto type = dimensions_[0]->type(); + for (unsigned d = 1; d < dim_num_; ++d) { + if (dimensions_[d]->type() != type) + return false; + } + + return true; } uint64_t Domain::cell_num_per_tile() const { @@ -644,9 +652,11 @@ int Domain::tile_order_cmp( return tile_order_cmp_func_[dim_idx](dim, coord_a, coord_b); } +/* Datatype Domain::type() const { return type_; } +*/ /* ****************************** */ /* PRIVATE METHODS */ @@ -1284,17 +1294,6 @@ uint64_t Domain::get_tile_pos_row(const T* domain, const T* tile_coords) const { } // Explicit template instantiations -template uint64_t Domain::cell_num(const int8_t* domain) const; -template uint64_t Domain::cell_num(const uint8_t* domain) const; -template uint64_t Domain::cell_num(const int16_t* domain) const; -template uint64_t Domain::cell_num(const uint16_t* domain) const; -template uint64_t Domain::cell_num(const int* domain) const; -template uint64_t Domain::cell_num(const unsigned* domain) const; -template uint64_t Domain::cell_num(const int64_t* domain) const; -template uint64_t Domain::cell_num(const uint64_t* domain) const; -template uint64_t Domain::cell_num(const float* domain) const; -template uint64_t Domain::cell_num(const double* domain) const; - template Status Domain::get_cell_pos( const int* coords, uint64_t* pos) const; template Status Domain::get_cell_pos( diff --git a/tiledb/sm/array_schema/domain.h b/tiledb/sm/array_schema/domain.h index 9907f4925c5..2abfc7c9993 100644 --- a/tiledb/sm/array_schema/domain.h +++ b/tiledb/sm/array_schema/domain.h @@ -87,18 +87,14 @@ class Domain { */ Status add_dimension(const Dimension* dim); - /** - * Returns the number of cells in the input domain. Note that this is - * applicable only to integer array domains (otherwise the output is 0). - * Also note that it is assummed that the input domain is expanded - * such that it aligns with the tile extents. - * - * @tparam T The domain type. - * @param domain The domain to be checked. - * @return The number of cells in the domain. - */ - template - uint64_t cell_num(const T* domain) const; + /** Returns true if all dimensions have integer domain types. */ + bool all_dims_int() const; + + /** Returns true if all dimensions have real domain types. */ + bool all_dims_real() const; + + /** Returns true if all dimensions have the same type. */ + bool all_dims_same_type() const; /** Returns the number of cells per tile (only for the dense case). */ uint64_t cell_num_per_tile() const; @@ -495,9 +491,6 @@ class Domain { int tile_order_cmp( unsigned dim_idx, const void* coord_a, const void* coord_b) const; - /** Returns the dimensions type. */ - Datatype type() const; - /** * Returns the position of the input coordinates inside its corresponding * tile, based on the array cell order. Applicable only to **dense** arrays, diff --git a/tiledb/sm/c_api/tiledb.cc b/tiledb/sm/c_api/tiledb.cc index 570bce30019..6bb4e3c20af 100644 --- a/tiledb/sm/c_api/tiledb.cc +++ b/tiledb/sm/c_api/tiledb.cc @@ -1621,7 +1621,24 @@ int32_t tiledb_domain_get_type( if (sanity_check(ctx) == TILEDB_ERR || sanity_check(ctx, domain) == TILEDB_ERR) return TILEDB_ERR; - *type = static_cast(domain->domain_->type()); + + if (domain->domain_->dim_num() == 0) { + auto st = tiledb::sm::Status::Error( + "Cannot get domain type; Domain has no dimensions"); + LOG_STATUS(st); + save_error(ctx, st); + return TILEDB_OOM; + } + + if (!domain->domain_->all_dims_same_type()) { + auto st = tiledb::sm::Status::Error( + "Cannot get domain type; Not applicable to heterogeneous dimensions"); + LOG_STATUS(st); + save_error(ctx, st); + return TILEDB_OOM; + } + + *type = static_cast(domain->domain_->dimension(0)->type()); return TILEDB_OK; } diff --git a/tiledb/sm/enums/datatype.h b/tiledb/sm/enums/datatype.h index 6ccc5d1b17f..ddf738c9891 100644 --- a/tiledb/sm/enums/datatype.h +++ b/tiledb/sm/enums/datatype.h @@ -266,6 +266,11 @@ inline bool datatype_is_integer(Datatype type) { type == Datatype::INT64 || type == Datatype::UINT64); } +/** Returns true if the input datatype is a real type. */ +inline bool datatype_is_real(Datatype type) { + return (type == Datatype::FLOAT32 || type == Datatype::FLOAT64); +} + /** Returns true if the input datatype is a datetime type. */ inline bool datatype_is_datetime(Datatype type) { return ( diff --git a/tiledb/sm/fragment/fragment_metadata.cc b/tiledb/sm/fragment/fragment_metadata.cc index d1c8cf470a9..eca4ad241d9 100644 --- a/tiledb/sm/fragment/fragment_metadata.cc +++ b/tiledb/sm/fragment/fragment_metadata.cc @@ -148,15 +148,91 @@ uint64_t FragmentMetadata::cell_num(uint64_t tile_pos) const { return last_tile_cell_num(); } -template Status FragmentMetadata::add_max_buffer_sizes( const EncryptionKey& encryption_key, - const T* subarray, + const void* subarray, std::unordered_map>* buffer_sizes) { + // Dense case if (dense_) return add_max_buffer_sizes_dense(encryption_key, subarray, buffer_sizes); - return add_max_buffer_sizes_sparse(encryption_key, subarray, buffer_sizes); + + // Convert subarray to NDRange + auto dim_num = array_schema_->dim_num(); + auto sub_ptr = (const unsigned char*)subarray; + NDRange sub_nd(dim_num); + uint64_t offset = 0; + for (unsigned d = 0; d < dim_num; ++d) { + auto r_size = 2 * array_schema_->dimension(d)->coord_size(); + sub_nd[d].set_range(&sub_ptr[offset], r_size); + offset += r_size; + } + + // Sparse case + return add_max_buffer_sizes_sparse(encryption_key, sub_nd, buffer_sizes); +} + +Status FragmentMetadata::add_max_buffer_sizes_dense( + const EncryptionKey& encryption_key, + const void* subarray, + std::unordered_map>* + buffer_sizes) { + // Note: applicable only to the dense case where all dimensions + // have the same type + auto type = array_schema_->dimension(0)->type(); + switch (type) { + case Datatype::INT32: + return add_max_buffer_sizes_dense( + encryption_key, static_cast(subarray), buffer_sizes); + case Datatype::INT64: + return add_max_buffer_sizes_dense( + encryption_key, static_cast(subarray), buffer_sizes); + case Datatype::FLOAT32: + return add_max_buffer_sizes_dense( + encryption_key, static_cast(subarray), buffer_sizes); + case Datatype::FLOAT64: + return add_max_buffer_sizes_dense( + encryption_key, static_cast(subarray), buffer_sizes); + case Datatype::INT8: + return add_max_buffer_sizes_dense( + encryption_key, static_cast(subarray), buffer_sizes); + case Datatype::UINT8: + return add_max_buffer_sizes_dense( + encryption_key, static_cast(subarray), buffer_sizes); + case Datatype::INT16: + return add_max_buffer_sizes_dense( + encryption_key, static_cast(subarray), buffer_sizes); + case Datatype::UINT16: + return add_max_buffer_sizes_dense( + encryption_key, static_cast(subarray), buffer_sizes); + case Datatype::UINT32: + return add_max_buffer_sizes_dense( + encryption_key, static_cast(subarray), buffer_sizes); + case Datatype::UINT64: + return add_max_buffer_sizes_dense( + encryption_key, static_cast(subarray), buffer_sizes); + case Datatype::DATETIME_YEAR: + case Datatype::DATETIME_MONTH: + case Datatype::DATETIME_WEEK: + case Datatype::DATETIME_DAY: + case Datatype::DATETIME_HR: + case Datatype::DATETIME_MIN: + case Datatype::DATETIME_SEC: + case Datatype::DATETIME_MS: + case Datatype::DATETIME_US: + case Datatype::DATETIME_NS: + case Datatype::DATETIME_PS: + case Datatype::DATETIME_FS: + case Datatype::DATETIME_AS: + return add_max_buffer_sizes_dense( + encryption_key, static_cast(subarray), buffer_sizes); + default: + return LOG_STATUS(Status::FragmentMetadataError( + "Cannot compute add read buffer sizes for dense array; Unsupported " + "domain type")); + } + + return Status::Ok(); } template @@ -186,20 +262,15 @@ Status FragmentMetadata::add_max_buffer_sizes_dense( return Status::Ok(); } -template Status FragmentMetadata::add_max_buffer_sizes_sparse( const EncryptionKey& encryption_key, - const T* subarray, + const NDRange& subarray, std::unordered_map>* buffer_sizes) { RETURN_NOT_OK(load_rtree(encryption_key)); // Get tile overlap - auto dim_num = array_schema_->dim_num(); - NDRange range(dim_num); - for (unsigned d = 0; d < dim_num; ++d) - range[d].set_range(&subarray[2 * d], 2 * sizeof(T)); - auto tile_overlap = rtree_.get_tile_overlap(range); + auto tile_overlap = rtree_.get_tile_overlap(subarray); uint64_t size = 0; // Handle tile ranges @@ -589,7 +660,9 @@ Status FragmentMetadata::get_footer_offset_and_size( Status FragmentMetadata::get_footer_offset_and_size_v3_v4( uint64_t* offset, uint64_t* size) const { auto attribute_num = array_schema_->attribute_num(); - auto domain_size = 2 * array_schema_->coords_size(); + auto dim_num = array_schema_->dim_num(); + // v3 and v4 support only arrays where all dimensions have the same type + auto domain_size = 2 * dim_num * array_schema_->dimension(0)->coord_size(); // Get footer size *size = 0; @@ -614,8 +687,11 @@ Status FragmentMetadata::get_footer_offset_and_size_v3_v4( Status FragmentMetadata::get_footer_offset_and_size_v5_or_higher( uint64_t* offset, uint64_t* size) const { - auto num = array_schema_->attribute_num() + array_schema_->dim_num() + 1; - auto domain_size = 2 * array_schema_->coords_size(); + auto dim_num = array_schema_->dim_num(); + auto num = array_schema_->attribute_num() + dim_num + 1; + uint64_t domain_size = 0; + for (unsigned d = 0; d < dim_num; ++d) + domain_size += 2 * array_schema_->domain()->dimension(d)->coord_size(); // Get footer size *size = 0; @@ -646,7 +722,9 @@ std::vector FragmentMetadata::compute_overlapping_tile_ids( auto dim_num = array_schema_->dim_num(); // Temporary domain vector - std::vector temp(2 * array_schema_->coords_size()); + auto coord_size = array_schema_->domain()->dimension(0)->coord_size(); + auto temp_size = 2 * dim_num * coord_size; + std::vector temp(temp_size); uint8_t offset = 0; for (unsigned d = 0; d < dim_num; ++d) { std::memcpy(&temp[offset], domain_[d].data(), domain_[d].size()); @@ -692,7 +770,9 @@ FragmentMetadata::compute_overlapping_tile_ids_cov(const T* subarray) const { auto dim_num = array_schema_->dim_num(); // Temporary domain vector - std::vector temp(2 * array_schema_->coords_size()); + auto coord_size = array_schema_->domain()->dimension(0)->coord_size(); + auto temp_size = 2 * dim_num * coord_size; + std::vector temp(temp_size); uint8_t offset = 0; for (unsigned d = 0; d < dim_num; ++d) { std::memcpy(&temp[offset], domain_[d].data(), domain_[d].size()); @@ -871,7 +951,10 @@ Status FragmentMetadata::load_bounding_coords(ConstBuffer* buff) { RETURN_NOT_OK(buff->read(&bounding_coords_num, sizeof(uint64_t))); // Get bounding coordinates - uint64_t bounding_coords_size = 2 * array_schema_->coords_size(); + // Note: This version supports only dimensions domains with the same type + auto coord_size = array_schema_->domain()->dimension(0)->coord_size(); + auto dim_num = array_schema_->domain()->dim_num(); + uint64_t bounding_coords_size = 2 * dim_num * coord_size; bounding_coords_.resize(bounding_coords_num); for (uint64_t i = 0; i < bounding_coords_num; ++i) { bounding_coords_[i].resize(bounding_coords_size); @@ -1059,7 +1142,9 @@ Status FragmentMetadata::load_non_empty_domain_v3_v4(ConstBuffer* buff) { // Get non-empty domain if (!null_non_empty_domain) { auto dim_num = array_schema_->dim_num(); - auto domain_size = 2 * array_schema_->coords_size(); + // Note: These versions supports only dimensions domains with the same type + auto coord_size = array_schema_->domain()->dimension(0)->coord_size(); + auto domain_size = 2 * dim_num * coord_size; std::vector temp(domain_size); RETURN_NOT_OK(buff->read(&temp[0], domain_size)); non_empty_domain_.resize(dim_num); @@ -1782,57 +1867,6 @@ void FragmentMetadata::clean_up() { } // Explicit template instantiations -template Status FragmentMetadata::add_max_buffer_sizes( - const EncryptionKey& encryption_key, - const int8_t* subarray, - std::unordered_map>* - buffer_sizes); -template Status FragmentMetadata::add_max_buffer_sizes( - const EncryptionKey& encryption_key, - const uint8_t* subarray, - std::unordered_map>* - buffer_sizes); -template Status FragmentMetadata::add_max_buffer_sizes( - const EncryptionKey& encryption_key, - const int16_t* subarray, - std::unordered_map>* - buffer_sizes); -template Status FragmentMetadata::add_max_buffer_sizes( - const EncryptionKey& encryption_key, - const uint16_t* subarray, - std::unordered_map>* - buffer_sizes); -template Status FragmentMetadata::add_max_buffer_sizes( - const EncryptionKey& encryption_key, - const int* subarray, - std::unordered_map>* - buffer_sizes); -template Status FragmentMetadata::add_max_buffer_sizes( - const EncryptionKey& encryption_key, - const unsigned* subarray, - std::unordered_map>* - buffer_sizes); -template Status FragmentMetadata::add_max_buffer_sizes( - const EncryptionKey& encryption_key, - const int64_t* subarray, - std::unordered_map>* - buffer_sizes); -template Status FragmentMetadata::add_max_buffer_sizes( - const EncryptionKey& encryption_key, - const uint64_t* subarray, - std::unordered_map>* - buffer_sizes); -template Status FragmentMetadata::add_max_buffer_sizes( - const EncryptionKey& encryption_key, - const float* subarray, - std::unordered_map>* - buffer_sizes); -template Status FragmentMetadata::add_max_buffer_sizes( - const EncryptionKey& encryption_key, - const double* subarray, - std::unordered_map>* - buffer_sizes); - template std::vector> FragmentMetadata::compute_overlapping_tile_ids_cov( const int8_t* subarray) const; diff --git a/tiledb/sm/fragment/fragment_metadata.h b/tiledb/sm/fragment/fragment_metadata.h index bd3a5a7af96..76a295004b1 100644 --- a/tiledb/sm/fragment/fragment_metadata.h +++ b/tiledb/sm/fragment/fragment_metadata.h @@ -94,7 +94,6 @@ class FragmentMetadata { * from the fragment, for a given set of attributes. Note that these upper * bounds is added to those in `buffer_sizes`. * - * @tparam T The coordinates type. * @param encryption_key The encryption key the array was opened with. * @param subarray The targeted subarray. * @param buffer_sizes The upper bounds will be added to this map. The latter @@ -103,10 +102,29 @@ class FragmentMetadata { * offsets size, whereas the second is the data size. * @return Status */ - template Status add_max_buffer_sizes( const EncryptionKey& encryption_key, - const T* subarray, + const void* subarray, + std::unordered_map>* + buffer_sizes); + + /** + * Computes an upper bound on the buffer sizes needed when reading a subarray + * from the fragment, for a given set of attributes. Note that these upper + * bounds is added to those in `buffer_sizes`. Applicable only to the dense + * case. + * + * @param encryption_key The encryption key the array was opened with. + * @param subarray The targeted subarray. + * @param buffer_sizes The upper bounds will be added to this map. The latter + * maps an attribute to a buffer size pair. For fix-sized attributes, only + * the first size is useful. For var-sized attributes, the first is the + * offsets size, whereas the second is the data size. + * @return Status + */ + Status add_max_buffer_sizes_dense( + const EncryptionKey& encryption_key, + const void* subarray, std::unordered_map>* buffer_sizes); @@ -138,7 +156,6 @@ class FragmentMetadata { * bounds is added to those in `buffer_sizes`. Applicable only to the sparse * case. * - * @tparam T The coordinates type. * @param encryption_key The encryption key the array was opened with. * @param subarray The targeted subarray. * @param buffer_sizes The upper bounds will be added to this map. The latter @@ -147,10 +164,9 @@ class FragmentMetadata { * offsets size, whereas the second is the data size. * @return Status */ - template Status add_max_buffer_sizes_sparse( const EncryptionKey& encryption_key, - const T* subarray, + const NDRange& subarray, std::unordered_map>* buffer_sizes); diff --git a/tiledb/sm/query/reader.cc b/tiledb/sm/query/reader.cc index 8c6cc49b1b3..0ce3ef15a53 100644 --- a/tiledb/sm/query/reader.cc +++ b/tiledb/sm/query/reader.cc @@ -1285,8 +1285,8 @@ Status Reader::dedup_result_coords( } Status Reader::dense_read() { - auto coords_type = array_schema_->coords_type(); - switch (coords_type) { + auto type = array_schema_->domain()->dimension(0)->type(); + switch (type) { case Datatype::INT8: return dense_read(); case Datatype::UINT8: @@ -1436,7 +1436,9 @@ Status Reader::fill_dense_coords_row_col( STATS_FUNC_IN(reader_fill_coords); auto cell_order = array_schema_->cell_order(); - auto coords_size = array_schema_->coords_size(); + auto coord_size = array_schema_->domain()->dimension(0)->coord_size(); + auto dim_num = array_schema_->dim_num(); + auto coords_size = dim_num * coord_size; // Iterate over all coordinates, retrieved in cell slabs CellSlabIter iter(&subarray); diff --git a/tiledb/sm/query/writer.cc b/tiledb/sm/query/writer.cc index 7361b7338bd..008dd870765 100644 --- a/tiledb/sm/query/writer.cc +++ b/tiledb/sm/query/writer.cc @@ -1482,8 +1482,8 @@ Status Writer::ordered_write() { assert(layout_ == Layout::ROW_MAJOR || layout_ == Layout::COL_MAJOR); assert(array_schema_->dense()); - auto coords_type = array_schema_->coords_type(); - switch (coords_type) { + auto type = array_schema_->domain()->dimension(0)->type(); + switch (type) { case Datatype::INT8: return ordered_write(); case Datatype::UINT8: @@ -2171,9 +2171,10 @@ Status Writer::split_coords_buffer() { return Status::Ok(); // For easy reference - auto coords_size = array_schema_->coords_size(); - coords_num_ = *coords_buffer_size_ / coords_size; auto dim_num = array_schema_->dim_num(); + auto coord_size = array_schema_->domain()->dimension(0)->coord_size(); + auto coords_size = dim_num * coord_size; + coords_num_ = *coords_buffer_size_ / coords_size; clear_coord_buffers(); diff --git a/tiledb/sm/rest/rest_client.cc b/tiledb/sm/rest/rest_client.cc index 6e0311c1800..17fc973072d 100644 --- a/tiledb/sm/rest/rest_client.cc +++ b/tiledb/sm/rest/rest_client.cc @@ -554,7 +554,7 @@ Status RestClient::subarray_to_str( const ArraySchema* schema, const void* subarray, std::string* subarray_str) { - const auto coords_type = schema->coords_type(); + const auto coords_type = schema->dimension(0)->type(); const auto dim_num = schema->dim_num(); const auto subarray_nelts = 2 * dim_num; diff --git a/tiledb/sm/serialization/array_schema.cc b/tiledb/sm/serialization/array_schema.cc index 544fe0ebd8e..62346c4b0df 100644 --- a/tiledb/sm/serialization/array_schema.cc +++ b/tiledb/sm/serialization/array_schema.cc @@ -325,7 +325,7 @@ Status domain_to_capnp( return LOG_STATUS(Status::SerializationError( "Error serializing domain; domain is null.")); - domainBuilder->setType(datatype_str(domain->type())); + domainBuilder->setType(datatype_str(domain->dimension(0)->type())); domainBuilder->setTileOrder(layout_str(domain->tile_order())); domainBuilder->setCellOrder(layout_str(domain->cell_order())); @@ -686,7 +686,10 @@ Status nonempty_domain_deserialize( void* subarray; RETURN_NOT_OK(utils::deserialize_subarray( reader.getNonEmptyDomain(), schema, &subarray)); - std::memcpy(nonempty_domain, subarray, 2 * schema->coords_size()); + std::memcpy( + nonempty_domain, + subarray, + 2 * schema->dimension(0)->coord_size()); std::free(subarray); } @@ -706,7 +709,10 @@ Status nonempty_domain_deserialize( void* subarray; RETURN_NOT_OK(utils::deserialize_subarray( reader.getNonEmptyDomain(), schema, &subarray)); - std::memcpy(nonempty_domain, subarray, 2 * schema->coords_size()); + std::memcpy( + nonempty_domain, + subarray, + 2 * schema->dimension(0)->coord_size()); std::free(subarray); } diff --git a/tiledb/sm/subarray/cell_slab_iter.cc b/tiledb/sm/subarray/cell_slab_iter.cc index df79a257ce5..32ef2efc8a1 100644 --- a/tiledb/sm/subarray/cell_slab_iter.cc +++ b/tiledb/sm/subarray/cell_slab_iter.cc @@ -33,6 +33,7 @@ #include "tiledb/sm/subarray/cell_slab_iter.h" #include "tiledb/sm/array/array.h" #include "tiledb/sm/array_schema/array_schema.h" +#include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/array_schema/domain.h" #include "tiledb/sm/enums/layout.h" #include "tiledb/sm/misc/logger.h" @@ -59,8 +60,11 @@ CellSlabIter::CellSlabIter(const Subarray* subarray) : subarray_(subarray) { end_ = true; if (subarray != nullptr) { - aux_tile_coords_.resize(subarray->dim_num()); - aux_tile_coords_2_.resize(subarray->array()->array_schema()->coords_size()); + auto array_schema = subarray->array()->array_schema(); + auto dim_num = array_schema->dim_num(); + auto coord_size = array_schema->dimension(0)->coord_size(); + aux_tile_coords_.resize(dim_num); + aux_tile_coords_2_.resize(dim_num * coord_size); } } @@ -270,7 +274,9 @@ Status CellSlabIter::sanity_check() const { // Check type bool error; - switch (subarray_->type()) { + auto array_schema = subarray_->array()->array_schema(); + auto type = array_schema->domain()->dimension(0)->type(); + switch (type) { case Datatype::INT8: error = !std::is_same::value; break; diff --git a/tiledb/sm/subarray/subarray.cc b/tiledb/sm/subarray/subarray.cc index 136108c0b76..212a5ac06ff 100644 --- a/tiledb/sm/subarray/subarray.cc +++ b/tiledb/sm/subarray/subarray.cc @@ -674,10 +674,6 @@ const std::vector>& Subarray::tile_overlap() const { return tile_overlap_; } -Datatype Subarray::type() const { - return array_->array_schema()->domain()->type(); -} - template void Subarray::compute_tile_coords() { if (array_->array_schema()->tile_order() == Layout::ROW_MAJOR) @@ -690,8 +686,9 @@ template const T* Subarray::tile_coords_ptr( const std::vector& tile_coords, std::vector* aux_tile_coords) const { - auto coords_size = array_->array_schema()->coords_size(); - std::memcpy(&((*aux_tile_coords)[0]), &tile_coords[0], coords_size); + auto dim_num = array_->array_schema()->dim_num(); + auto coord_size = array_->array_schema()->dimension(0)->coord_size(); + std::memcpy(&((*aux_tile_coords)[0]), &tile_coords[0], dim_num * coord_size); auto it = tile_coords_map_.find(*aux_tile_coords); if (it == tile_coords_map_.end()) return nullptr; @@ -911,7 +908,7 @@ void Subarray::compute_tile_coords_col() { tile_coords_.resize(tile_coords_num); std::vector coords; - auto coords_size = array_schema->coords_size(); + auto coords_size = dim_num * array_schema->dimension(0)->coord_size(); coords.resize(coords_size); size_t coord_size = sizeof(T); size_t tile_coords_pos = 0; @@ -969,7 +966,7 @@ void Subarray::compute_tile_coords_row() { tile_coords_.resize(tile_coords_num); std::vector coords; - auto coords_size = array_schema->coords_size(); + auto coords_size = dim_num * array_schema->dimension(0)->coord_size(); coords.resize(coords_size); size_t coord_size = sizeof(T); size_t tile_coords_pos = 0; @@ -1046,7 +1043,8 @@ Subarray Subarray::clone() const { } TileOverlap Subarray::get_tile_overlap(uint64_t range_idx, unsigned fid) const { - auto type = array_->array_schema()->domain()->type(); + assert(array_->array_schema()->dense()); + auto type = array_->array_schema()->dimension(0)->type(); switch (type) { case Datatype::INT8: return get_tile_overlap(range_idx, fid); @@ -1090,6 +1088,7 @@ TileOverlap Subarray::get_tile_overlap(uint64_t range_idx, unsigned fid) const { template TileOverlap Subarray::get_tile_overlap(uint64_t range_idx, unsigned fid) const { + assert(array_->array_schema()->dense()); TileOverlap ret; auto ndrange = this->ndrange(range_idx); diff --git a/tiledb/sm/subarray/subarray.h b/tiledb/sm/subarray/subarray.h index 545f46c4c4f..e7ee86e4a75 100644 --- a/tiledb/sm/subarray/subarray.h +++ b/tiledb/sm/subarray/subarray.h @@ -438,9 +438,6 @@ class Subarray { /** Returns the tile overlap of the subarray. */ const std::vector>& tile_overlap() const; - /** Returns the subarray domain type. */ - Datatype type() const; - /** * Compute `tile_coords_` and `tile_coords_map_`. The coordinates will * be sorted on the array tile order. diff --git a/tools/src/commands/info_command.cc b/tools/src/commands/info_command.cc index 56b6bf30e51..a680dfa1fa7 100644 --- a/tools/src/commands/info_command.cc +++ b/tools/src/commands/info_command.cc @@ -36,6 +36,7 @@ #include "tiledb/sm/array/array.h" #include "tiledb/sm/array_schema/array_schema.h" #include "tiledb/sm/array_schema/attribute.h" +#include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/crypto/encryption_key.h" #include "tiledb/sm/enums/encryption_type.h" #include "tiledb/sm/enums/query_type.h" @@ -219,7 +220,7 @@ void InfoCommand::write_svg_mbrs() const { for (const auto& f : fragment_metadata) { const auto& mbrs = f->mbrs(); for (const auto& mbr : mbrs) { - auto tup = get_mbr(mbr, schema->coords_type()); + auto tup = get_mbr(mbr, schema->dimension(0)->type()); min_x = std::min(min_x, std::get<0>(tup)); min_y = std::min(min_y, std::get<1>(tup)); max_x = std::max(max_x, std::get<0>(tup) + std::get<2>(tup)); @@ -280,7 +281,7 @@ void InfoCommand::write_text_mbrs() const { const auto* schema = array.array_schema(); auto dim_num = schema->dim_num(); auto fragment_metadata = array.fragment_metadata(); - auto coords_type = schema->coords_type(); + auto coords_type = schema->dimension(0)->type(); std::stringstream text; for (const auto& f : fragment_metadata) { const auto& mbrs = f->mbrs();