Skip to content

Commit

Permalink
More cleanups towards addressing #93
Browse files Browse the repository at this point in the history
  • Loading branch information
stavrospapadopoulos committed Mar 6, 2020
1 parent bc1d34f commit 959d6de
Show file tree
Hide file tree
Showing 18 changed files with 270 additions and 280 deletions.
99 changes: 28 additions & 71 deletions tiledb/sm/array/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "tiledb/sm/array/array.h"
#include "tiledb/sm/array_schema/array_schema.h"
#include "tiledb/sm/array_schema/attribute.h"
#include "tiledb/sm/array_schema/dimension.h"
#include "tiledb/sm/array_schema/domain.h"
#include "tiledb/sm/crypto/crypto.h"
#include "tiledb/sm/enums/datatype.h"
Expand Down Expand Up @@ -670,8 +671,16 @@ void Array::clear_last_max_buffer_sizes() {
}

Status Array::compute_max_buffer_sizes(const void* subarray) {
// Applicable only to domains where all dimensions have the same type
if (!array_schema_->domain()->all_dims_same_type())
return LOG_STATUS(
Status::ArrayError("Cannot compute max buffer sizes; Inapplicable when "
"dimension domains have different types"));

// Allocate space for max buffer sizes subarray
auto subarray_size = 2 * array_schema_->coords_size();
auto dim_num = array_schema_->dim_num();
auto coord_size = array_schema_->domain()->dimension(0)->coord_size();
auto subarray_size = 2 * dim_num * coord_size;
if (last_max_buffer_sizes_subarray_ == nullptr) {
last_max_buffer_sizes_subarray_ = std::malloc(subarray_size);
if (last_max_buffer_sizes_subarray_ == nullptr)
Expand Down Expand Up @@ -719,83 +728,31 @@ Status Array::compute_max_buffer_sizes(
if (fragment_metadata_.empty())
return Status::Ok();

// Compute buffer sizes
switch (array_schema_->coords_type()) {
case Datatype::INT32:
return compute_max_buffer_sizes<int>(
static_cast<const int*>(subarray), buffer_sizes);
case Datatype::INT64:
return compute_max_buffer_sizes<int64_t>(
static_cast<const int64_t*>(subarray), buffer_sizes);
case Datatype::FLOAT32:
return compute_max_buffer_sizes<float>(
static_cast<const float*>(subarray), buffer_sizes);
case Datatype::FLOAT64:
return compute_max_buffer_sizes<double>(
static_cast<const double*>(subarray), buffer_sizes);
case Datatype::INT8:
return compute_max_buffer_sizes<int8_t>(
static_cast<const int8_t*>(subarray), buffer_sizes);
case Datatype::UINT8:
return compute_max_buffer_sizes<uint8_t>(
static_cast<const uint8_t*>(subarray), buffer_sizes);
case Datatype::INT16:
return compute_max_buffer_sizes<int16_t>(
static_cast<const int16_t*>(subarray), buffer_sizes);
case Datatype::UINT16:
return compute_max_buffer_sizes<uint16_t>(
static_cast<const uint16_t*>(subarray), buffer_sizes);
case Datatype::UINT32:
return compute_max_buffer_sizes<uint32_t>(
static_cast<const uint32_t*>(subarray), buffer_sizes);
case Datatype::UINT64:
return compute_max_buffer_sizes<uint64_t>(
static_cast<const uint64_t*>(subarray), buffer_sizes);
case Datatype::DATETIME_YEAR:
case Datatype::DATETIME_MONTH:
case Datatype::DATETIME_WEEK:
case Datatype::DATETIME_DAY:
case Datatype::DATETIME_HR:
case Datatype::DATETIME_MIN:
case Datatype::DATETIME_SEC:
case Datatype::DATETIME_MS:
case Datatype::DATETIME_US:
case Datatype::DATETIME_NS:
case Datatype::DATETIME_PS:
case Datatype::DATETIME_FS:
case Datatype::DATETIME_AS:
return compute_max_buffer_sizes<int64_t>(
static_cast<const int64_t*>(subarray), buffer_sizes);
default:
return LOG_STATUS(Status::ArrayError(
"Cannot compute max read buffer sizes; Invalid coordinates type"));
}

return Status::Ok();
}

template <class T>
Status Array::compute_max_buffer_sizes(
const T* subarray,
std::unordered_map<std::string, std::pair<uint64_t, uint64_t>>*
max_buffer_sizes) const {
// Sanity check
assert(!fragment_metadata_.empty());

// First we calculate a rough upper bound. Especially for dense
// arrays, this will not be accurate, as it accounts only for the
// non-empty regions of the subarray.
for (auto& meta : fragment_metadata_)
RETURN_NOT_OK(meta->add_max_buffer_sizes(
encryption_key_, subarray, max_buffer_sizes));
RETURN_NOT_OK(
meta->add_max_buffer_sizes(encryption_key_, subarray, buffer_sizes));

// Prepare an NDRange for the subarray
auto dim_num = array_schema_->dim_num();
NDRange sub(dim_num);
auto sub_ptr = (const unsigned char*)subarray;
uint64_t offset = 0;
for (unsigned d = 0; d < dim_num; ++d) {
auto r_size = 2 * array_schema_->dimension(d)->coord_size();
sub[d] = Range(&sub_ptr[offset], r_size);
offset += r_size;
}

// Rectify bound for dense arrays
if (array_schema_->dense()) {
auto cell_num = array_schema_->domain()->cell_num(subarray);
auto cell_num = array_schema_->domain()->cell_num(sub);
// `cell_num` becomes 0 when `subarray` is huge, leading to a
// `uint64_t` overflow.
if (cell_num != 0) {
for (auto& it : *max_buffer_sizes) {
for (auto& it : *buffer_sizes) {
if (array_schema_->var_size(it.first)) {
it.second.first = cell_num * constants::cell_var_offset_size;
it.second.second +=
Expand All @@ -809,12 +766,12 @@ Status Array::compute_max_buffer_sizes(

// Rectify bound for sparse arrays with integer domain, without duplicates
if (!array_schema_->dense() && !array_schema_->allows_dups() &&
datatype_is_integer(array_schema_->domain()->type())) {
auto cell_num = array_schema_->domain()->cell_num(subarray);
array_schema_->domain()->all_dims_int()) {
auto cell_num = array_schema_->domain()->cell_num(sub);
// `cell_num` becomes 0 when `subarray` is huge, leading to a
// `uint64_t` overflow.
if (cell_num != 0) {
for (auto& it : *max_buffer_sizes) {
for (auto& it : *buffer_sizes) {
if (!array_schema_->var_size(it.first)) {
// Check for overflow
uint64_t new_size = cell_num * array_schema_->cell_size(it.first);
Expand Down
20 changes: 0 additions & 20 deletions tiledb/sm/array/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -394,26 +394,6 @@ class Array {
std::unordered_map<std::string, std::pair<uint64_t, uint64_t>>*
max_buffer_sizes_) const;

/**
* Computes an upper bound on the buffer sizes required for a read
* query, for a given subarray and set of attributes. Note that
* the attributes are already set in `max_buffer_sizes`
*
* @tparam T The domain type
* @param subarray The subarray to focus on. Note that it must have the same
* underlying type as the array domain.
* @param max_buffer_sizes The buffer sizes to be retrieved. This is a map
* from an attribute to a size pair. For fixed-sized attributes, only
* the first size is useful. For var-sized attributes, the first size
* is the offsets size, and the second size is the values size.
* @return Status
*/
template <class T>
Status compute_max_buffer_sizes(
const T* subarray,
std::unordered_map<std::string, std::pair<uint64_t, uint64_t>>*
max_buffer_sizes) const;

/**
* Load array metadata, handles remote arrays vs non-remote arrays
* @return Status
Expand Down
52 changes: 19 additions & 33 deletions tiledb/sm/array_schema/array_schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ ArraySchema::ArraySchema(const ArraySchema* array_schema) {
cell_order_ = array_schema->cell_order_;
cell_var_offsets_filters_ = array_schema->cell_var_offsets_filters_;
coords_filters_ = array_schema->coords_filters_;
coords_size_ = array_schema->coords_size_;
tile_order_ = array_schema->tile_order_;
version_ = array_schema->version_;

Expand Down Expand Up @@ -174,9 +173,13 @@ Layout ArraySchema::cell_order() const {
}

uint64_t ArraySchema::cell_size(const std::string& name) const {
// Special zipped coordinates
if (name == constants::coords)
return domain_->dim_num() * datatype_size(coords_type());
// Special zipped coordinates attribute
if (name == constants::coords) {
auto dim_num = domain_->dim_num();
assert(dim_num > 0);
auto coord_size = domain_->dimension(0)->coord_size();
return dim_num * coord_size;
}

// Attribute
auto attr_it = attribute_map_.find(name);
Expand Down Expand Up @@ -228,8 +231,8 @@ Status ArraySchema::check() const {
"Array schema check failed; No dimensions provided"));

if (array_type_ == ArrayType::DENSE) {
if (domain_->type() == Datatype::FLOAT32 ||
domain_->type() == Datatype::FLOAT64) {
auto type = domain_->dimension(0)->type();
if (datatype_is_real(type)) {
return LOG_STATUS(
Status::ArraySchemaError("Array schema check failed; Dense arrays "
"cannot have floating point domains"));
Expand Down Expand Up @@ -298,14 +301,6 @@ int ArraySchema::coords_compression_level() const {
return (compressor == nullptr) ? -1 : compressor->compression_level();
}

uint64_t ArraySchema::coords_size() const {
return coords_size_;
}

Datatype ArraySchema::coords_type() const {
return domain_->type();
}

bool ArraySchema::dense() const {
return array_type_ == ArrayType::DENSE;
}
Expand Down Expand Up @@ -429,9 +424,9 @@ Layout ArraySchema::tile_order() const {
}

Datatype ArraySchema::type(const std::string& name) const {
// Special zipped coordinates
// Special zipped coordinates attribute
if (name == constants::coords)
return domain_->type();
return domain_->dimension(0)->type();

// Attribute
auto attr_it = attribute_map_.find(name);
Expand Down Expand Up @@ -582,10 +577,6 @@ Status ArraySchema::init() {
// Initialize domain
RETURN_NOT_OK(domain_->init(cell_order_, tile_order_));

// Set cell sizes
// TODO: set upon setting domain
coords_size_ = domain_->dim_num() * datatype_size(coords_type());

// Success
return Status::Ok();
}
Expand Down Expand Up @@ -626,11 +617,13 @@ Status ArraySchema::set_domain(Domain* domain) {
if (array_type_ == ArrayType::DENSE) {
RETURN_NOT_OK(domain->set_null_tile_extents_to_range());

if (domain->type() == Datatype::FLOAT32 ||
domain->type() == Datatype::FLOAT64) {
return LOG_STATUS(
Status::ArraySchemaError("Cannot set domain; Dense arrays "
"cannot have floating point domains"));
if (domain->dim_num() > 0) {
auto type = domain->dimension(0)->type();
if (type == Datatype::FLOAT32 || type == Datatype::FLOAT64) {
return LOG_STATUS(
Status::ArraySchemaError("Cannot set domain; Dense arrays "
"cannot have floating point domains"));
}
}
}

Expand All @@ -639,8 +632,7 @@ Status ArraySchema::set_domain(Domain* domain) {
domain_ = new Domain(domain);

// Potentially change the default coordinates compressor
if ((domain_->type() == Datatype::FLOAT32 ||
domain_->type() == Datatype::FLOAT64) &&
if (domain_->all_dims_real() &&
coords_compression() == Compressor::DOUBLE_DELTA) {
auto* filter = coords_filters_.get_filter<CompressionFilter>();
assert(filter != nullptr);
Expand Down Expand Up @@ -686,12 +678,6 @@ bool ArraySchema::check_attribute_dimension_names() const {
}

bool ArraySchema::check_double_delta_compressor() const {
// Check coordinates
if ((domain_->type() == Datatype::FLOAT32 ||
domain_->type() == Datatype::FLOAT64) &&
coords_compression() == Compressor::DOUBLE_DELTA)
return false;

// Check attributes
for (auto attr : attributes_) {
if ((attr->type() == Datatype::FLOAT32 ||
Expand Down
9 changes: 0 additions & 9 deletions tiledb/sm/array_schema/array_schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,12 +191,6 @@ class ArraySchema {
/** Returns the compression level of the coordinates. */
int coords_compression_level() const;

/** Returns the coordinates size. */
uint64_t coords_size() const;

/** Returns the type of the coordinates. */
Datatype coords_type() const;

/** True if the array is dense. */
bool dense() const;

Expand Down Expand Up @@ -362,9 +356,6 @@ class ArraySchema {
/** The filter pipeline run on coordinate tiles. */
FilterPipeline coords_filters_;

/** The size (in bytes) of the coordinates. */
uint64_t coords_size_;

/** It maps each dimension name to the corresponding dimension object. */
std::unordered_map<std::string, const Dimension*> dim_map_;

Expand Down
57 changes: 28 additions & 29 deletions tiledb/sm/array_schema/domain.cc
Original file line number Diff line number Diff line change
Expand Up @@ -121,27 +121,35 @@ Status Domain::add_dimension(const Dimension* dim) {
return Status::Ok();
}

template <class T>
uint64_t Domain::cell_num(const T* domain) const {
if (&typeid(T) == &typeid(float) || &typeid(T) == &typeid(double))
return 0;
bool Domain::all_dims_int() const {
for (const auto& dim : dimensions_) {
if (!datatype_is_integer(dim->type()))
return false;
}

uint64_t cell_num = 1, range, prod;
for (unsigned i = 0; i < dim_num_; ++i) {
// The code below essentially computes
// cell_num *= domain[2 * i + 1] - domain[2 * i] + 1;
// while performing overflow checks
range = domain[2 * i + 1] - domain[2 * i];
if (range == std::numeric_limits<uint64_t>::max()) // overflow
return 0;
++range;
prod = range * cell_num;
if (prod / range != cell_num) // Overflow
return 0;
cell_num = prod;
return true;
}

bool Domain::all_dims_real() const {
for (const auto& dim : dimensions_) {
if (!datatype_is_real(dim->type()))
return false;
}

return cell_num;
return true;
}

bool Domain::all_dims_same_type() const {
if (dim_num_ == 0)
return true;

auto type = dimensions_[0]->type();
for (unsigned d = 1; d < dim_num_; ++d) {
if (dimensions_[d]->type() != type)
return false;
}

return true;
}

uint64_t Domain::cell_num_per_tile() const {
Expand Down Expand Up @@ -644,9 +652,11 @@ int Domain::tile_order_cmp(
return tile_order_cmp_func_[dim_idx](dim, coord_a, coord_b);
}

/*
Datatype Domain::type() const {
return type_;
}
*/

/* ****************************** */
/* PRIVATE METHODS */
Expand Down Expand Up @@ -1284,17 +1294,6 @@ uint64_t Domain::get_tile_pos_row(const T* domain, const T* tile_coords) const {
}

// Explicit template instantiations
template uint64_t Domain::cell_num<int8_t>(const int8_t* domain) const;
template uint64_t Domain::cell_num<uint8_t>(const uint8_t* domain) const;
template uint64_t Domain::cell_num<int16_t>(const int16_t* domain) const;
template uint64_t Domain::cell_num<uint16_t>(const uint16_t* domain) const;
template uint64_t Domain::cell_num<int>(const int* domain) const;
template uint64_t Domain::cell_num<unsigned>(const unsigned* domain) const;
template uint64_t Domain::cell_num<int64_t>(const int64_t* domain) const;
template uint64_t Domain::cell_num<uint64_t>(const uint64_t* domain) const;
template uint64_t Domain::cell_num<float>(const float* domain) const;
template uint64_t Domain::cell_num<double>(const double* domain) const;

template Status Domain::get_cell_pos<int>(
const int* coords, uint64_t* pos) const;
template Status Domain::get_cell_pos<int64_t>(
Expand Down
Loading

0 comments on commit 959d6de

Please sign in to comment.