From 48b6c5cf2d616b9419bd011d8e4a7473be3602e6 Mon Sep 17 00:00:00 2001 From: Stavros Papadopoulos Date: Tue, 17 Dec 2019 12:15:30 -0500 Subject: [PATCH] Split coordinates file into multiple files, one per dimension. Make appropriate changes to Reader. Ensure backwards compatibility. --- HISTORY.md | 6 + test/src/unit-backwards_compat.cc | 2 +- test/src/unit-capi-consolidation.cc | 5 +- test/src/unit-cppapi-array.cc | 7 +- test/src/unit-cppapi-schema.cc | 4 +- tiledb/sm/array_schema/array_schema.cc | 198 ++- tiledb/sm/array_schema/array_schema.h | 39 +- tiledb/sm/array_schema/dimension.cc | 11 + tiledb/sm/array_schema/dimension.h | 7 + tiledb/sm/filter/filter_pipeline.cc | 4 +- tiledb/sm/fragment/fragment_metadata.cc | 631 ++++----- tiledb/sm/fragment/fragment_metadata.h | 307 +++-- tiledb/sm/misc/constants.cc | 2 +- tiledb/sm/misc/utils.cc | 20 +- tiledb/sm/misc/utils.h | 12 +- tiledb/sm/query/reader.cc | 155 ++- tiledb/sm/query/reader.h | 19 +- tiledb/sm/query/writer.cc | 1269 ++++++------------ tiledb/sm/query/writer.h | 315 ++--- tiledb/sm/storage_manager/consolidator.cc | 7 +- tiledb/sm/storage_manager/storage_manager.cc | 8 +- 21 files changed, 1301 insertions(+), 1727 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index d316bcec3421..2c8bf6a7ec05 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,11 @@ # In Progress +## Disk Format + +* Removed file __coords.tdb that stored the zipped coordinates in sparse fragments +* Now storing the coordinate tiles on each dimension in separate files +* Changed fragment name format from `__t1_t2_uuid` to `__t1_t2_uuid_`. That was necessary for backwards compatibility + ## New features ## Improvements diff --git a/test/src/unit-backwards_compat.cc b/test/src/unit-backwards_compat.cc index 0a8b96759d7c..c5a3d6b0b255 100644 --- a/test/src/unit-backwards_compat.cc +++ b/test/src/unit-backwards_compat.cc @@ -383,7 +383,7 @@ TEST_CASE( break; } case TILEDB_UINT8: { - REQUIRE(static_cast(buffer.second)[0] == 1); + REQUIRE(static_cast(buffer.second)[0] == 1); break; } case TILEDB_INT16: { diff --git a/test/src/unit-capi-consolidation.cc b/test/src/unit-capi-consolidation.cc index d522dfed8714..1549bebfb212 100644 --- a/test/src/unit-capi-consolidation.cc +++ b/test/src/unit-capi-consolidation.cc @@ -3796,8 +3796,7 @@ TEST_CASE_METHOD( TEST_CASE_METHOD( ConsolidationFx, "C API: Test advanced consolidation, consolidatable #1", - "[capi], [consolidation], [consolidation-adv], " - "[consolidation-adv-consolidatable-1]") { + "[capi][consolidation][adv][consolidatable-1]") { remove_dense_vector(); create_dense_vector(); write_dense_vector_consolidatable_1(); @@ -3821,7 +3820,7 @@ TEST_CASE_METHOD( REQUIRE(rc == TILEDB_OK); REQUIRE(error == nullptr); rc = tiledb_config_set( - config, "sm.consolidation.step_size_ratio", "0.6", &error); + config, "sm.consolidation.step_size_ratio", "0.7", &error); REQUIRE(rc == TILEDB_OK); REQUIRE(error == nullptr); diff --git a/test/src/unit-cppapi-array.cc b/test/src/unit-cppapi-array.cc index 47645edaaef7..6e96fedd4795 100644 --- a/test/src/unit-cppapi-array.cc +++ b/test/src/unit-cppapi-array.cc @@ -85,7 +85,7 @@ struct CPPArrayFx { VFS vfs; }; -TEST_CASE("Config", "[cppapi]") { +TEST_CASE("Config", "[cppapi][config]") { // Primarily to instansiate operator[]/= template tiledb::Config cfg; cfg["vfs.s3.region"] = "us-east-1a"; @@ -94,7 +94,7 @@ TEST_CASE("Config", "[cppapi]") { CHECK((std::string)cfg["vfs.s3.use_virtual_addressing"] == "true"); } -TEST_CASE_METHOD(CPPArrayFx, "C++ API: Arrays", "[cppapi]") { +TEST_CASE_METHOD(CPPArrayFx, "C++ API: Arrays", "[cppapi][basic]") { SECTION("Dimensions") { ArraySchema schema(ctx, "cpp_unit_array"); CHECK(schema.domain().ndim() == 2); @@ -337,8 +337,7 @@ TEST_CASE_METHOD(CPPArrayFx, "C++ API: Arrays", "[cppapi]") { } TEST_CASE( - "C++ API: Incorrect buffer size and offsets", - "[cppapi], [invalid-offsets]") { + "C++ API: Incorrect buffer size and offsets", "[cppapi][invalid-offsets]") { const std::string array_name_1d = "cpp_unit_array_1d"; Context ctx; VFS vfs(ctx); diff --git a/test/src/unit-cppapi-schema.cc b/test/src/unit-cppapi-schema.cc index 44c8c46d6dd9..10b2135f7426 100644 --- a/test/src/unit-cppapi-schema.cc +++ b/test/src/unit-cppapi-schema.cc @@ -33,7 +33,7 @@ #include "catch.hpp" #include "tiledb/sm/cpp_api/tiledb" -TEST_CASE("C++ API: Schema", "[cppapi]") { +TEST_CASE("C++ API: Schema", "[cppapi][schema]") { using namespace tiledb; Context ctx; @@ -161,7 +161,7 @@ TEST_CASE("C++ API: Schema", "[cppapi]") { } } -TEST_CASE("C++ API: Test schema virtual destructors", "[cppapi]") { +TEST_CASE("C++ API: Test schema virtual destructors", "[cppapi][schema]") { tiledb::Context ctx; // Test that this generates no compiler warnings. std::unique_ptr schema; diff --git a/tiledb/sm/array_schema/array_schema.cc b/tiledb/sm/array_schema/array_schema.cc index f7158a3f41f5..070ac09f059b 100644 --- a/tiledb/sm/array_schema/array_schema.cc +++ b/tiledb/sm/array_schema/array_schema.cc @@ -89,7 +89,6 @@ ArraySchema::ArraySchema(const ArraySchema* array_schema) { capacity_ = array_schema->capacity_; cell_order_ = array_schema->cell_order_; - cell_sizes_ = array_schema->cell_sizes_; cell_var_offsets_filters_ = array_schema->cell_var_offsets_filters_; coords_filters_ = array_schema->coords_filters_; coords_size_ = array_schema->coords_size_; @@ -98,17 +97,11 @@ ArraySchema::ArraySchema(const ArraySchema* array_schema) { set_domain(array_schema->domain_); + attribute_map_.clear(); for (auto attr : array_schema->attributes_) { if (attr->name() != constants::key_attr_name) add_attribute(attr, false); } - for (const auto& attr : attributes_) - attribute_map_[attr->name()] = attr; - auto dim_num = array_schema->dim_num(); - for (unsigned d = 0; d < dim_num; ++d) { - auto dim = dimension(d); - dim_map_[dim->name()] = dim; - } } ArraySchema::~ArraySchema() { @@ -133,16 +126,10 @@ const Attribute* ArraySchema::attribute(unsigned int id) const { return nullptr; } -const Attribute* ArraySchema::attribute(std::string name) const { - bool anonymous = name.empty(); - unsigned int nattr = attribute_num(); - for (unsigned int i = 0; i < nattr; i++) { - auto attr = attribute(i); - if ((attr->name() == name) || (anonymous && attr->is_anonymous())) { - return attr; - } - } - return nullptr; +const Attribute* ArraySchema::attribute(const std::string& name) const { + auto it = + attribute_map_.find(name.empty() ? constants::default_attr_name : name); + return it == attribute_map_.end() ? nullptr : it->second; } Status ArraySchema::attribute_name_normalized( @@ -188,16 +175,45 @@ Layout ArraySchema::cell_order() const { return cell_order_; } -uint64_t ArraySchema::cell_size(const std::string& attribute) const { - auto cell_size_it = cell_sizes_.find(attribute); - assert(cell_size_it != cell_sizes_.end()); - return cell_size_it->second; +uint64_t ArraySchema::cell_size(const std::string& name) const { + // Special zipped coordinates + if (name == constants::coords) + return domain_->dim_num() * datatype_size(coords_type()); + + // Attribute + auto attr_it = attribute_map_.find(name); + if (attr_it != attribute_map_.end()) { + auto attr = attr_it->second; + auto cell_val_num = attr->cell_val_num(); + return (cell_val_num == constants::var_num) ? + constants::var_size : + cell_val_num * datatype_size(attr->type()); + } + + // Dimension + auto dim_it = dim_map_.find(name); + assert(dim_it != dim_map_.end()); + auto dim = dim_it->second; + auto cell_val_num = dim->cell_val_num(); + return (cell_val_num == constants::var_num) ? + constants::var_size : + cell_val_num * datatype_size(dim->type()); } -unsigned int ArraySchema::cell_val_num(const std::string& attribute) const { - auto it = attribute_map_.find(attribute); - assert(it != attribute_map_.end()); - return it->second->cell_val_num(); +unsigned int ArraySchema::cell_val_num(const std::string& name) const { + // Special zipped coordinates + if (name == constants::coords) + return 1; + + // Attribute + auto attr_it = attribute_map_.find(name); + if (attr_it != attribute_map_.end()) + return attr_it->second->cell_val_num(); + + // Dimension + auto dim_it = dim_map_.find(name); + assert(dim_it != dim_map_.end()); + return dim_it->second->cell_val_num(); } const FilterPipeline* ArraySchema::cell_var_offsets_filters() const { @@ -253,16 +269,20 @@ Status ArraySchema::check_attributes( return Status::Ok(); } -const FilterPipeline* ArraySchema::filters(const std::string& attribute) const { - auto it = attribute_map_.find(attribute); - if (it == attribute_map_.end()) { - if (attribute == constants::coords) - return coords_filters(); - assert(false); // This should never happen - return nullptr; // Return something ad hoc - } +const FilterPipeline* ArraySchema::filters(const std::string& name) const { + if (name == constants::coords) + return coords_filters(); + + // Attribute + auto attr_it = attribute_map_.find(name); + if (attr_it != attribute_map_.end()) + return attr_it->second->filters(); - return it->second->filters(); + // Dimension (if filters not set, return default coordinate filters) + auto dim_it = dim_map_.find(name); + assert(dim_it != dim_map_.end()); + auto ret = dim_it->second->filters(); + return (ret != nullptr) ? ret : coords_filters(); } const FilterPipeline* ArraySchema::coords_filters() const { @@ -297,15 +317,8 @@ const Dimension* ArraySchema::dimension(unsigned int i) const { } const Dimension* ArraySchema::dimension(const std::string& name) const { - bool anonymous = name.empty(); - auto dim_num = this->dim_num(); - for (unsigned d = 0; d < dim_num; ++d) { - auto dim = this->dimension(d); - if ((dim->name() == name) || (anonymous && dim->is_anonymous())) { - return dim; - } - } - return nullptr; + auto it = dim_map_.find(name.empty() ? constants::default_dim_name : name); + return it == dim_map_.end() ? nullptr : it->second; } unsigned int ArraySchema::dim_num() const { @@ -352,6 +365,14 @@ Status ArraySchema::has_attribute( return Status::Ok(); } +bool ArraySchema::is_attr(const std::string& name) const { + return this->attribute(name) != nullptr; +} + +bool ArraySchema::is_dim(const std::string& name) const { + return this->dimension(name) != nullptr; +} + // ===== FORMAT ===== // version (uint32_t) // array_type (uint8_t) @@ -404,26 +425,20 @@ Layout ArraySchema::tile_order() const { return tile_order_; } -Datatype ArraySchema::type(unsigned int i) const { - auto attribute_num = attributes_.size(); - if (i > attribute_num) { - LOG_ERROR("Cannot retrieve type; Invalid attribute id"); - assert(false); - } - if (i < attribute_num) - return attributes_[i]->type(); - return domain_->type(); -} +Datatype ArraySchema::type(const std::string& name) const { + // Special zipped coordinates + if (name == constants::coords) + return domain_->type(); -Datatype ArraySchema::type(const std::string& attribute) const { - auto it = attribute_map_.find(attribute); - if (it == attribute_map_.end()) { - if (attribute == constants::coords) - return domain_->type(); - assert(false); // This should never happen - return Datatype::INT8; // Return something ad hoc - } - return it->second->type(); + // Attribute + auto attr_it = attribute_map_.find(name); + if (attr_it != attribute_map_.end()) + return attr_it->second->type(); + + // Dimension + auto dim_it = dim_map_.find(name); + assert(dim_it != dim_map_.end()); + return dim_it->second->type(); } bool ArraySchema::var_size(const std::string& name) const { @@ -474,7 +489,10 @@ Status ArraySchema::add_attribute(const Attribute* attr, bool check_special) { } else { new_attr = new Attribute(attr); } + attributes_.emplace_back(new_attr); + attribute_map_[new_attr->name()] = new_attr; + return Status::Ok(); } @@ -530,6 +548,12 @@ Status ArraySchema::deserialize(ConstBuffer* buff) { auto attr = new Attribute(); RETURN_NOT_OK_ELSE(attr->deserialize(buff), delete attr); attributes_.emplace_back(attr); + attribute_map_[attr->name()] = attr; + } + auto dim_num = domain()->dim_num(); + for (unsigned d = 0; d < dim_num; ++d) { + auto dim = dimension(d); + dim_map_[dim->name()] = dim; } // Initialize the rest of the object members @@ -550,21 +574,9 @@ Status ArraySchema::init() { // Initialize domain RETURN_NOT_OK(domain_->init(cell_order_, tile_order_)); - attribute_map_.clear(); - for (const auto& attr : attributes_) - attribute_map_[attr->name()] = attr; - dim_map_.clear(); - auto dim_num = domain_->dim_num(); - for (unsigned d = 0; d < dim_num; ++d) { - auto dim = dimension(d); - dim_map_[dim->name()] = dim; - } - // Set cell sizes - for (auto& attr : attributes_) - cell_sizes_[attr->name()] = compute_cell_size(attr->name()); - cell_sizes_[constants::coords] = compute_cell_size(constants::coords); - coords_size_ = dim_num * datatype_size(coords_type()); + // TODO: set upon setting domain + coords_size_ = domain_->dim_num() * datatype_size(coords_type()); // Success return Status::Ok(); @@ -618,6 +630,15 @@ Status ArraySchema::set_domain(Domain* domain) { filter->set_compressor(constants::real_coords_compression); filter->set_compression_level(-1); } + + // Create dimension map + dim_map_.clear(); + auto dim_num = domain_->dim_num(); + for (unsigned d = 0; d < dim_num; ++d) { + auto dim = dimension(d); + dim_map_[dim->name()] = dim; + } + return Status::Ok(); } @@ -676,28 +697,5 @@ void ArraySchema::clear() { domain_ = nullptr; } -uint64_t ArraySchema::compute_cell_size(const std::string& attribute) const { - // Handle coordinates first - if (attribute == constants::coords) { - auto dim_num = domain_->dim_num(); - auto type = coords_type(); - return dim_num * datatype_size(type); - } - - // Handle attributes - auto attr_it = attribute_map_.find(attribute); - assert(attr_it != attribute_map_.end()); - auto attr = attr_it->second; - - // For easy reference - auto cell_val_num = attr->cell_val_num(); - auto type = attr->type(); - - // Variable-sized cell - return (cell_val_num == constants::var_num) ? - constants::var_size : - cell_val_num * datatype_size(type); -} - } // namespace sm } // namespace tiledb diff --git a/tiledb/sm/array_schema/array_schema.h b/tiledb/sm/array_schema/array_schema.h index 89c10daf5390..24dee2b49352 100644 --- a/tiledb/sm/array_schema/array_schema.h +++ b/tiledb/sm/array_schema/array_schema.h @@ -100,7 +100,7 @@ class ArraySchema { * Returns a constant pointer to the selected attribute (nullptr if it * does not exist). */ - const Attribute* attribute(std::string name) const; + const Attribute* attribute(const std::string& name) const; /** * Returns the given attribute name as it would be stored in the schema. E.g. @@ -143,11 +143,11 @@ class ArraySchema { /** Returns the cell order. */ Layout cell_order() const; - /** Returns the size of cell on the input attribute. */ - uint64_t cell_size(const std::string& attribute) const; + /** Returns the size of cell on the input attribute/dimension. */ + uint64_t cell_size(const std::string& name) const; - /** Returns the number of values per cell of the input attribute. */ - unsigned int cell_val_num(const std::string& attribute) const; + /** Returns the number of values per cell of the input attribute/dimension. */ + unsigned int cell_val_num(const std::string& name) const; /** * Return a pointer to the pipeline used for offsets of variable-sized cells. @@ -170,8 +170,11 @@ class ArraySchema { */ Status check_attributes(const std::vector& attributes) const; - /** Return the filter pipeline for the given attribute. */ - const FilterPipeline* filters(const std::string& attribute) const; + /** + * Return the filter pipeline for the given attribute/dimension (can be + * TILEDB_COORDS). + */ + const FilterPipeline* filters(const std::string& name) const; /** Return a pointer to the pipeline used for coordinates. */ const FilterPipeline* coords_filters() const; @@ -216,6 +219,12 @@ class ArraySchema { */ Status has_attribute(const std::string& name, bool* has_attr) const; + // Returns true if the input name is an attribute + bool is_attr(const std::string& name) const; + + // Returns true if the input name is a dimension + bool is_dim(const std::string& name) const; + /** * Serializes the array schema object into a buffer. * @@ -227,11 +236,11 @@ class ArraySchema { /** Returns the tile order. */ Layout tile_order() const; - /** Returns the type of the i-th attribute. */ - Datatype type(unsigned int i) const; - - /** Returns the type of the input attribute (could be coordinates). */ - Datatype type(const std::string& attribute) const; + /** + * Returns the type of the input attribute/dimension (could also be + * TILEDB_COORDS). + */ + Datatype type(const std::string& name) const; /** * Returns *true* if the input attribute/dimension has variable-sized @@ -326,9 +335,6 @@ class ArraySchema { */ Layout cell_order_; - /** Stores the size of every attribute (plus coordinates). */ - std::unordered_map cell_sizes_; - /** The filter pipeline run on offset tiles for var-length attributes. */ FilterPipeline cell_var_offsets_filters_; @@ -372,9 +378,6 @@ class ArraySchema { /** Clears all members. Use with caution! */ void clear(); - - /** Computes and returns the size of an attribute (or coordinates). */ - uint64_t compute_cell_size(const std::string& attribute) const; }; } // namespace sm diff --git a/tiledb/sm/array_schema/dimension.cc b/tiledb/sm/array_schema/dimension.cc index d6ac2cabcf83..70299e3fa5df 100644 --- a/tiledb/sm/array_schema/dimension.cc +++ b/tiledb/sm/array_schema/dimension.cc @@ -89,6 +89,11 @@ Dimension::~Dimension() { /* API */ /* ********************************* */ +unsigned int Dimension::cell_val_num() const { + // TODO: in a future PR the user will be able to set this value + return 1; +} + uint64_t Dimension::coord_size() const { return datatype_size(type_); } @@ -211,6 +216,12 @@ void Dimension::dump(FILE* out) const { fprintf(out, "- Tile extent: %s\n", tile_extent_s.c_str()); } +const FilterPipeline* Dimension::filters() const { + // TODO: in a future PR, the user will be able to set separate + // TODO: filters for each dimension + return nullptr; +} + const std::string& Dimension::name() const { return name_; } diff --git a/tiledb/sm/array_schema/dimension.h b/tiledb/sm/array_schema/dimension.h index 03a2a3d7b1f5..250e32fca7a9 100644 --- a/tiledb/sm/array_schema/dimension.h +++ b/tiledb/sm/array_schema/dimension.h @@ -38,6 +38,7 @@ #include "tiledb/sm/buffer/buffer.h" #include "tiledb/sm/enums/compressor.h" #include "tiledb/sm/enums/datatype.h" +#include "tiledb/sm/filter/filter_pipeline.h" #include "tiledb/sm/misc/logger.h" #include "tiledb/sm/misc/status.h" @@ -76,6 +77,9 @@ class Dimension { /* API */ /* ********************************* */ + /** Returns the number of values per cell. */ + unsigned int cell_val_num() const; + /** Returns the size (in bytes) of a coordinate in this dimension. */ uint64_t coord_size() const; @@ -97,6 +101,9 @@ class Dimension { /** Dumps the dimension contents in ASCII form in the selected output. */ void dump(FILE* out) const; + /** Returns the filter pipeline of this dimension. */ + const FilterPipeline* filters() const; + /** Returns the dimension name. */ const std::string& name() const; diff --git a/tiledb/sm/filter/filter_pipeline.cc b/tiledb/sm/filter/filter_pipeline.cc index 681ccde3bd52..ce0995e3cb29 100644 --- a/tiledb/sm/filter/filter_pipeline.cc +++ b/tiledb/sm/filter/filter_pipeline.cc @@ -428,8 +428,10 @@ Status FilterPipeline::run_reverse(Tile* tile) const { if (tile->stores_coords()) { // Note that format version < 2 only split the coordinates when compression // was used. See https://github.com/TileDB-Inc/TileDB/issues/1053 + // For format version > 4, a tile never stores coordinates bool using_compression = get_filter() != nullptr; - if (tile->format_version() > 1 || using_compression) { + auto version = tile->format_version(); + if (version > 1 || using_compression) { tile->zip_coordinates(); } } diff --git a/tiledb/sm/fragment/fragment_metadata.cc b/tiledb/sm/fragment/fragment_metadata.cc index 70b29adee2da..59651df5d204 100644 --- a/tiledb/sm/fragment/fragment_metadata.cc +++ b/tiledb/sm/fragment/fragment_metadata.cc @@ -76,16 +76,13 @@ FragmentMetadata::FragmentMetadata( auto attributes = array_schema_->attributes(); for (unsigned i = 0; i < attributes.size(); ++i) { auto attr_name = attributes[i]->name(); - attribute_idx_map_[attr_name] = i; - attribute_uri_map_[attr_name] = - fragment_uri_.join_path(attr_name + constants::file_suffix); - if (attributes[i]->var_size()) - attribute_var_uri_map_[attr_name] = - fragment_uri_.join_path(attr_name + "_var" + constants::file_suffix); + idx_map_[attr_name] = i; + } + idx_map_[constants::coords] = array_schema_->attribute_num(); + for (unsigned i = 0; i < array_schema_->dim_num(); ++i) { + auto dim_name = array_schema_->dimension(i)->name(); + idx_map_[dim_name] = array_schema_->attribute_num() + 1 + i; } - attribute_idx_map_[constants::coords] = array_schema_->attribute_num(); - attribute_uri_map_[constants::coords] = - fragment_uri_.join_path(constants::coords + constants::file_suffix); } FragmentMetadata::~FragmentMetadata() { @@ -171,29 +168,35 @@ void FragmentMetadata::set_tile_index_base(uint64_t tile_base) { } void FragmentMetadata::set_tile_offset( - const std::string& attribute, uint64_t tile, uint64_t tile_size) { - auto attribute_id = attribute_idx_map_[attribute]; - tile += tile_index_base_; - assert(tile < tile_offsets_[attribute_id].size()); - tile_offsets_[attribute_id][tile] = next_tile_offsets_[attribute_id]; - next_tile_offsets_[attribute_id] += tile_size; + const std::string& name, uint64_t tid, uint64_t step) { + auto it = idx_map_.find(name); + assert(it != idx_map_.end()); + auto idx = it->second; + tid += tile_index_base_; + assert(tid < tile_offsets_[idx].size()); + tile_offsets_[idx][tid] = next_tile_offsets_[idx]; + next_tile_offsets_[idx] += step; } void FragmentMetadata::set_tile_var_offset( - const std::string& attribute, uint64_t tile, uint64_t step) { - auto attribute_id = attribute_idx_map_[attribute]; - tile += tile_index_base_; - assert(tile < tile_var_offsets_[attribute_id].size()); - tile_var_offsets_[attribute_id][tile] = next_tile_var_offsets_[attribute_id]; - next_tile_var_offsets_[attribute_id] += step; + const std::string& name, uint64_t tid, uint64_t step) { + auto it = idx_map_.find(name); + assert(it != idx_map_.end()); + auto idx = it->second; + tid += tile_index_base_; + assert(tid < tile_var_offsets_[idx].size()); + tile_var_offsets_[idx][tid] = next_tile_var_offsets_[idx]; + next_tile_var_offsets_[idx] += step; } void FragmentMetadata::set_tile_var_size( - const std::string& attribute, uint64_t tile, uint64_t size) { - auto attribute_id = attribute_idx_map_[attribute]; - tile += tile_index_base_; - assert(tile < tile_var_sizes_[attribute_id].size()); - tile_var_sizes_[attribute_id][tile] = size; + const std::string& name, uint64_t tid, uint64_t size) { + auto it = idx_map_.find(name); + assert(it != idx_map_.end()); + auto idx = it->second; + tid += tile_index_base_; + assert(tid < tile_var_sizes_[idx].size()); + tile_var_sizes_[idx][tid] = size; } uint64_t FragmentMetadata::cell_num(uint64_t tile_pos) const { @@ -245,7 +248,6 @@ Status FragmentMetadata::add_max_buffer_sizes_dense( return Status::Ok(); } -// TODO (sp): remove in 1.7 template Status FragmentMetadata::add_max_buffer_sizes_sparse( const EncryptionKey& encryption_key, @@ -358,30 +360,9 @@ Status FragmentMetadata::get_tile_overlap( return Status::Ok(); } -template -uint64_t FragmentMetadata::get_tile_pos(const T* tile_coords) const { - // For easy reference - auto dim_num = array_schema_->dim_num(); - - // Get tile subarray of the expanded non-empty domain - std::vector tile_subarray; - tile_subarray.resize(2 * dim_num); - array_schema_->domain()->get_tile_domain((T*)domain_, &tile_subarray[0]); - - // Normalize tile coords such in tile subarray - std::vector norm_tile_coords; - norm_tile_coords.resize(dim_num); - for (unsigned i = 0; i < dim_num; ++i) - norm_tile_coords[i] = tile_coords[i] - tile_subarray[2 * i]; - - // Return tile pos in tile subarray - return array_schema_->domain()->get_tile_pos( - (T*)domain_, &norm_tile_coords[0]); -} - Status FragmentMetadata::init(const void* non_empty_domain) { // For easy reference - unsigned int attribute_num = array_schema_->attribute_num(); + auto num = array_schema_->attribute_num() + array_schema_->dim_num() + 1; auto domain = array_schema_->domain(); // Sanity check @@ -412,19 +393,19 @@ Status FragmentMetadata::init(const void* non_empty_domain) { last_tile_cell_num_ = 0; // Initialize tile offsets - tile_offsets_.resize(attribute_num + 1); - next_tile_offsets_.resize(attribute_num + 1); - for (unsigned int i = 0; i < attribute_num + 1; ++i) + tile_offsets_.resize(num); + next_tile_offsets_.resize(num); + for (unsigned int i = 0; i < num; ++i) next_tile_offsets_[i] = 0; // Initialize variable tile offsets - tile_var_offsets_.resize(attribute_num); - next_tile_var_offsets_.resize(attribute_num); - for (unsigned int i = 0; i < attribute_num; ++i) + tile_var_offsets_.resize(num); + next_tile_var_offsets_.resize(num); + for (unsigned int i = 0; i < num; ++i) next_tile_var_offsets_[i] = 0; // Initialize variable tile sizes - tile_var_sizes_.resize(attribute_num); + tile_var_sizes_.resize(num); return Status::Ok(); } @@ -439,23 +420,32 @@ Status FragmentMetadata::load(const EncryptionKey& encryption_key) { RETURN_NOT_OK(storage_manager_->vfs()->file_size(meta_uri, &meta_file_size_)); // Get fragment name version - std::string uri_str = fragment_uri_.c_str(); - if (uri_str.back() == '/') - uri_str.pop_back(); - std::string name = URI(uri_str).last_path_part(); uint32_t f_version; - RETURN_NOT_OK(utils::parse::get_fragment_name_version(name, &f_version)); - + RETURN_NOT_OK( + utils::parse::get_fragment_name_version(fragment_uri_, &f_version)); + + // Note: The fragment name version is different from the fragment format + // version. + // - Version 1 corresponds to format versions 1 and 2 + // * __uuid_{_t2} + // - Version 2 corresponds to version 3 and 4 + // * __t1_t2_uuid + // - Version 3 corresponds to version 5 or higher + // * __t1_t2_uuid_version if (f_version == 1) - return load_v2(encryption_key); - return load_v3(encryption_key); + return load_v1_v2(encryption_key); + return load_v3_or_higher(encryption_key); +} + +const std::vector FragmentMetadata::mbrs() const { + return mbrs_; } Status FragmentMetadata::store(const EncryptionKey& encryption_key) { auto array_uri = this->array_uri(); auto fragment_metadata_uri = fragment_uri_.join_path(constants::fragment_metadata_filename); - unsigned int attribute_num = array_schema_->attribute_num(); + auto num = array_schema_->attribute_num() + array_schema_->dim_num() + 1; uint64_t offset = 0, nbytes; // Do nothing if fragment directory does not exist. The fragment directory @@ -470,68 +460,41 @@ Status FragmentMetadata::store(const EncryptionKey& encryption_key) { // Store R-Tree gt_offsets_.rtree_ = offset; - auto st = store_rtree(encryption_key, &nbytes); + RETURN_NOT_OK_ELSE(store_rtree(encryption_key, &nbytes), clean_up()); offset += nbytes; - if (!st.ok()) { - storage_manager_->close_file(fragment_metadata_uri); - storage_manager_->vfs()->remove_file(fragment_metadata_uri); - storage_manager_->array_xunlock(array_uri); - return st; - } // Store tile offsets - gt_offsets_.tile_offsets_.resize(attribute_num + 1); - for (unsigned int i = 0; i < attribute_num + 1; ++i) { + gt_offsets_.tile_offsets_.resize(num); + for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_offsets_[i] = offset; - st = store_tile_offsets(i, encryption_key, &nbytes); + RETURN_NOT_OK_ELSE( + store_tile_offsets(i, encryption_key, &nbytes), clean_up()); offset += nbytes; - if (!st.ok()) { - storage_manager_->close_file(fragment_metadata_uri); - storage_manager_->vfs()->remove_file(fragment_metadata_uri); - storage_manager_->array_xunlock(array_uri); - return st; - } } // Store tile var offsets - gt_offsets_.tile_var_offsets_.resize(attribute_num); - for (unsigned int i = 0; i < attribute_num; ++i) { + gt_offsets_.tile_var_offsets_.resize(num); + for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_var_offsets_[i] = offset; - st = store_tile_var_offsets(i, encryption_key, &nbytes); + RETURN_NOT_OK_ELSE( + store_tile_var_offsets(i, encryption_key, &nbytes), clean_up()); offset += nbytes; - if (!st.ok()) { - storage_manager_->close_file(fragment_metadata_uri); - storage_manager_->vfs()->remove_file(fragment_metadata_uri); - storage_manager_->array_xunlock(array_uri); - return st; - } } // Store tile var sizes - gt_offsets_.tile_var_sizes_.resize(attribute_num); - for (unsigned int i = 0; i < attribute_num; ++i) { + gt_offsets_.tile_var_sizes_.resize(num); + for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_var_sizes_[i] = offset; - st = store_tile_var_sizes(i, encryption_key, &nbytes); + RETURN_NOT_OK_ELSE( + store_tile_var_sizes(i, encryption_key, &nbytes), clean_up()); offset += nbytes; - if (!st.ok()) { - storage_manager_->close_file(fragment_metadata_uri); - storage_manager_->vfs()->remove_file(fragment_metadata_uri); - storage_manager_->array_xunlock(array_uri); - return st; - } } // Store footer - st = store_footer(encryption_key); - if (!st.ok()) { - storage_manager_->close_file(fragment_metadata_uri); - storage_manager_->vfs()->remove_file(fragment_metadata_uri); - storage_manager_->array_xunlock(array_uri); - return st; - } + RETURN_NOT_OK_ELSE(store_footer(encryption_key), clean_up()); // Close file - st = storage_manager_->close_file(fragment_metadata_uri); + auto st = storage_manager_->close_file(fragment_metadata_uri); // Unlock array auto st2 = storage_manager_->array_xunlock(array_uri); @@ -539,24 +502,18 @@ Status FragmentMetadata::store(const EncryptionKey& encryption_key) { return !st.ok() ? st : st2; } -const std::vector FragmentMetadata::mbrs() const { - return mbrs_; -} - const void* FragmentMetadata::non_empty_domain() const { return non_empty_domain_; } Status FragmentMetadata::set_num_tiles(uint64_t num_tiles) { - auto num_attributes = array_schema_->attribute_num(); + auto num = array_schema_->attribute_num() + 1 + array_schema_->dim_num(); - for (unsigned i = 0; i < num_attributes + 1; i++) { + for (unsigned i = 0; i < num; i++) { assert(num_tiles >= tile_offsets_[i].size()); tile_offsets_[i].resize(num_tiles, 0); - if (i < num_attributes) { - tile_var_offsets_[i].resize(num_tiles, 0); - tile_var_sizes_[i].resize(num_tiles, 0); - } + tile_var_offsets_[i].resize(num_tiles, 0); + tile_var_sizes_[i].resize(num_tiles, 0); } if (!dense_) { @@ -583,35 +540,37 @@ uint64_t FragmentMetadata::tile_num() const { return sparse_tile_num_; } -URI FragmentMetadata::attr_uri(const std::string& attribute) const { - return attribute_uri_map_.at(attribute); +URI FragmentMetadata::uri(const std::string& name) const { + return fragment_uri_.join_path(name + constants::file_suffix); } -URI FragmentMetadata::attr_var_uri(const std::string& attribute) const { - return attribute_var_uri_map_.at(attribute); +URI FragmentMetadata::var_uri(const std::string& name) const { + return fragment_uri_.join_path(name + "_var" + constants::file_suffix); } Status FragmentMetadata::file_offset( const EncryptionKey& encryption_key, - const std::string& attribute, + const std::string& name, uint64_t tile_idx, uint64_t* offset) { - auto it = attribute_idx_map_.find(attribute); - auto attribute_id = it->second; - RETURN_NOT_OK(load_tile_offsets(encryption_key, attribute_id)); - *offset = tile_offsets_[attribute_id][tile_idx]; + auto it = idx_map_.find(name); + assert(it != idx_map_.end()); + auto idx = it->second; + RETURN_NOT_OK(load_tile_offsets(encryption_key, idx)); + *offset = tile_offsets_[idx][tile_idx]; return Status::Ok(); } Status FragmentMetadata::file_var_offset( const EncryptionKey& encryption_key, - const std::string& attribute, + const std::string& name, uint64_t tile_idx, uint64_t* offset) { - auto it = attribute_idx_map_.find(attribute); - auto attribute_id = it->second; - RETURN_NOT_OK(load_tile_var_offsets(encryption_key, attribute_id)); - *offset = tile_var_offsets_[attribute_id][tile_idx]; + auto it = idx_map_.find(name); + assert(it != idx_map_.end()); + auto idx = it->second; + RETURN_NOT_OK(load_tile_var_offsets(encryption_key, idx)); + *offset = tile_var_offsets_[idx][tile_idx]; return Status::Ok(); } @@ -621,67 +580,62 @@ const void* FragmentMetadata::mbr(uint64_t tile_idx) const { Status FragmentMetadata::persisted_tile_size( const EncryptionKey& encryption_key, - const std::string& attribute, + const std::string& name, uint64_t tile_idx, uint64_t* tile_size) { - auto it = attribute_idx_map_.find(attribute); - auto attribute_id = it->second; - RETURN_NOT_OK(load_tile_offsets(encryption_key, attribute_id)); + auto it = idx_map_.find(name); + assert(it != idx_map_.end()); + auto idx = it->second; + RETURN_NOT_OK(load_tile_offsets(encryption_key, idx)); auto tile_num = this->tile_num(); *tile_size = (tile_idx != tile_num - 1) ? - tile_offsets_[attribute_id][tile_idx + 1] - - tile_offsets_[attribute_id][tile_idx] : - file_sizes_[attribute_id] - tile_offsets_[attribute_id][tile_idx]; + tile_offsets_[idx][tile_idx + 1] - tile_offsets_[idx][tile_idx] : + file_sizes_[idx] - tile_offsets_[idx][tile_idx]; return Status::Ok(); } Status FragmentMetadata::persisted_tile_var_size( const EncryptionKey& encryption_key, - const std::string& attribute, + const std::string& name, uint64_t tile_idx, uint64_t* tile_size) { - auto it = attribute_idx_map_.find(attribute); - auto attribute_id = it->second; - RETURN_NOT_OK(load_tile_var_offsets(encryption_key, attribute_id)); + auto it = idx_map_.find(name); + assert(it != idx_map_.end()); + auto idx = it->second; + RETURN_NOT_OK(load_tile_var_offsets(encryption_key, idx)); auto tile_num = this->tile_num(); *tile_size = (tile_idx != tile_num - 1) ? - tile_var_offsets_[attribute_id][tile_idx + 1] - - tile_var_offsets_[attribute_id][tile_idx] : - file_var_sizes_[attribute_id] - - tile_var_offsets_[attribute_id][tile_idx]; + tile_var_offsets_[idx][tile_idx + 1] - + tile_var_offsets_[idx][tile_idx] : + file_var_sizes_[idx] - tile_var_offsets_[idx][tile_idx]; return Status::Ok(); } -Status FragmentMetadata::rtree( - const EncryptionKey& encryption_key, const RTree** rtree) { - RETURN_NOT_OK(load_rtree(encryption_key)); - *rtree = (version_ <= 2) ? nullptr : &rtree_; - return Status::Ok(); -} - uint64_t FragmentMetadata::tile_size( - const std::string& attribute, uint64_t tile_idx) const { - auto var_size = array_schema_->var_size(attribute); + const std::string& name, uint64_t tile_idx) const { + auto var_size = array_schema_->var_size(name); auto cell_num = this->cell_num(tile_idx); return (var_size) ? cell_num * constants::cell_var_offset_size : - cell_num * array_schema_->cell_size(attribute); + cell_num * array_schema_->cell_size(name); } Status FragmentMetadata::tile_var_size( const EncryptionKey& encryption_key, - const std::string& attribute, + const std::string& name, uint64_t tile_idx, uint64_t* tile_size) { - auto it = attribute_idx_map_.find(attribute); - auto attribute_id = it->second; - RETURN_NOT_OK(load_tile_var_sizes(encryption_key, attribute_id)); - *tile_size = tile_var_sizes_[attribute_id][tile_idx]; + auto it = idx_map_.find(name); + assert(it != idx_map_.end()); + auto idx = it->second; + RETURN_NOT_OK(load_tile_var_sizes(encryption_key, idx)); + *tile_size = tile_var_sizes_[idx][tile_idx]; + return Status::Ok(); } @@ -705,6 +659,17 @@ bool FragmentMetadata::operator<(const FragmentMetadata& metadata) const { Status FragmentMetadata::get_footer_offset_and_size( uint64_t* offset, uint64_t* size) const { + uint32_t f_version; + RETURN_NOT_OK( + utils::parse::get_fragment_name_version(fragment_uri_, &f_version)); + + if (f_version < 3) + return get_footer_offset_and_size_v3_v4(offset, size); + return get_footer_offset_and_size_v5_or_higher(offset, size); +} + +Status FragmentMetadata::get_footer_offset_and_size_v3_v4( + uint64_t* offset, uint64_t* size) const { auto attribute_num = array_schema_->attribute_num(); auto domain_size = 2 * array_schema_->coords_size(); @@ -729,6 +694,32 @@ Status FragmentMetadata::get_footer_offset_and_size( return Status::Ok(); } +Status FragmentMetadata::get_footer_offset_and_size_v5_or_higher( + uint64_t* offset, uint64_t* size) const { + auto num = array_schema_->attribute_num() + array_schema_->dim_num() + 1; + auto domain_size = 2 * array_schema_->coords_size(); + + // Get footer size + *size = 0; + *size += sizeof(uint32_t); // version + *size += sizeof(char); // dense + *size += sizeof(char); // null non-empty domain + *size += domain_size; // non-empty domain + *size += sizeof(uint64_t); // sparse tile num + *size += sizeof(uint64_t); // last tile cell num + *size += num * sizeof(uint64_t); // file sizes + *size += num * sizeof(uint64_t); // file var sizes + *size += sizeof(uint64_t); // R-Tree offset + *size += num * sizeof(uint64_t); // tile offsets + *size += num * sizeof(uint64_t); // tile var offsets + *size += num * sizeof(uint64_t); // tile var sizes + + // Get footer offset + *offset = meta_file_size_ - *size; + + return Status::Ok(); +} + template std::vector FragmentMetadata::compute_overlapping_tile_ids( const T* subarray) const { @@ -888,66 +879,66 @@ Status FragmentMetadata::load_rtree(const EncryptionKey& encryption_key) { } Status FragmentMetadata::load_tile_offsets( - const EncryptionKey& encryption_key, unsigned attr_id) { + const EncryptionKey& encryption_key, unsigned idx) { if (version_ <= 2) return Status::Ok(); std::lock_guard lock(mtx_); - if (loaded_metadata_.tile_offsets_[attr_id]) + if (loaded_metadata_.tile_offsets_[idx]) return Status::Ok(); Buffer buff; RETURN_NOT_OK(read_generic_tile_from_file( - encryption_key, gt_offsets_.tile_offsets_[attr_id], &buff)); + encryption_key, gt_offsets_.tile_offsets_[idx], &buff)); ConstBuffer cbuff(&buff); - RETURN_NOT_OK(load_tile_offsets(attr_id, &cbuff)); + RETURN_NOT_OK(load_tile_offsets(idx, &cbuff)); - loaded_metadata_.tile_offsets_[attr_id] = true; + loaded_metadata_.tile_offsets_[idx] = true; return Status::Ok(); } Status FragmentMetadata::load_tile_var_offsets( - const EncryptionKey& encryption_key, unsigned attr_id) { + const EncryptionKey& encryption_key, unsigned idx) { if (version_ <= 2) return Status::Ok(); std::lock_guard lock(mtx_); - if (loaded_metadata_.tile_var_offsets_[attr_id]) + if (loaded_metadata_.tile_var_offsets_[idx]) return Status::Ok(); Buffer buff; RETURN_NOT_OK(read_generic_tile_from_file( - encryption_key, gt_offsets_.tile_var_offsets_[attr_id], &buff)); + encryption_key, gt_offsets_.tile_var_offsets_[idx], &buff)); ConstBuffer cbuff(&buff); - RETURN_NOT_OK(load_tile_var_offsets(attr_id, &cbuff)); + RETURN_NOT_OK(load_tile_var_offsets(idx, &cbuff)); - loaded_metadata_.tile_var_offsets_[attr_id] = true; + loaded_metadata_.tile_var_offsets_[idx] = true; return Status::Ok(); } Status FragmentMetadata::load_tile_var_sizes( - const EncryptionKey& encryption_key, unsigned attr_id) { + const EncryptionKey& encryption_key, unsigned idx) { if (version_ <= 2) return Status::Ok(); std::lock_guard lock(mtx_); - if (loaded_metadata_.tile_var_sizes_[attr_id]) + if (loaded_metadata_.tile_var_sizes_[idx]) return Status::Ok(); Buffer buff; RETURN_NOT_OK(read_generic_tile_from_file( - encryption_key, gt_offsets_.tile_var_sizes_[attr_id], &buff)); + encryption_key, gt_offsets_.tile_var_sizes_[idx], &buff)); ConstBuffer cbuff(&buff); - RETURN_NOT_OK(load_tile_var_sizes(attr_id, &cbuff)); + RETURN_NOT_OK(load_tile_var_sizes(idx, &cbuff)); - loaded_metadata_.tile_var_sizes_[attr_id] = true; + loaded_metadata_.tile_var_sizes_[idx] = true; return Status::Ok(); } @@ -968,7 +959,7 @@ Status FragmentMetadata::load_bounding_coords(ConstBuffer* buff) { } // Get bounding coordinates void* bounding_coords; - bounding_coords_.resize(bounding_coords_num); + bounding_coords_.resize(bounding_coords_num, nullptr); for (uint64_t i = 0; i < bounding_coords_num; ++i) { bounding_coords = std::malloc(bounding_coords_size); st = buff->read(bounding_coords, bounding_coords_size); @@ -983,12 +974,19 @@ Status FragmentMetadata::load_bounding_coords(ConstBuffer* buff) { return Status::Ok(); } +Status FragmentMetadata::load_file_sizes(ConstBuffer* buff) { + if (version_ < 5) + return load_file_sizes_v1_v4(buff); + else + return load_file_sizes_v5_or_higher(buff); +} + // ===== FORMAT ===== -// file_sizes_attr#0 (uint64_t) +// file_sizes#0 (uint64_t) // ... -// file_sizes_attr#attribute_num (uint64_t) -Status FragmentMetadata::load_file_sizes(ConstBuffer* buff) { - unsigned int attribute_num = array_schema_->attribute_num(); +// file_sizes#attribute_num (uint64_t) +Status FragmentMetadata::load_file_sizes_v1_v4(ConstBuffer* buff) { + auto attribute_num = array_schema_->attribute_num(); file_sizes_.resize(attribute_num + 1); Status st = buff->read(&file_sizes_[0], (attribute_num + 1) * sizeof(uint64_t)); @@ -1002,11 +1000,35 @@ Status FragmentMetadata::load_file_sizes(ConstBuffer* buff) { } // ===== FORMAT ===== -// file_sizes_attr#0 (uint64_t) +// file_sizes#0 (uint64_t) // ... -// file_sizes_attr#attribute_num (uint64_t) +// file_sizes#{attribute_num+dim_num} (uint64_t) +Status FragmentMetadata::load_file_sizes_v5_or_higher(ConstBuffer* buff) { + auto num = array_schema_->attribute_num() + array_schema_->dim_num() + 1; + file_sizes_.resize(num); + Status st = buff->read(&file_sizes_[0], num * sizeof(uint64_t)); + + if (!st.ok()) { + return LOG_STATUS(Status::FragmentMetadataError( + "Cannot load fragment metadata; Reading tile offsets failed")); + } + + return Status::Ok(); +} + Status FragmentMetadata::load_file_var_sizes(ConstBuffer* buff) { - unsigned int attribute_num = array_schema_->attribute_num(); + if (version_ < 5) + return load_file_var_sizes_v1_v4(buff); + else + return load_file_var_sizes_v5_or_higher(buff); +} + +// ===== FORMAT ===== +// file_var_sizes#0 (uint64_t) +// ... +// file_var_sizes#attribute_num (uint64_t) +Status FragmentMetadata::load_file_var_sizes_v1_v4(ConstBuffer* buff) { + auto attribute_num = array_schema_->attribute_num(); file_var_sizes_.resize(attribute_num); Status st = buff->read(&file_var_sizes_[0], attribute_num * sizeof(uint64_t)); @@ -1018,6 +1040,23 @@ Status FragmentMetadata::load_file_var_sizes(ConstBuffer* buff) { return Status::Ok(); } +// ===== FORMAT ===== +// file_var_sizes#0 (uint64_t) +// ... +// file_var_sizes#{attribute_num+dim_num} (uint64_t) +Status FragmentMetadata::load_file_var_sizes_v5_or_higher(ConstBuffer* buff) { + auto num = array_schema_->attribute_num() + array_schema_->dim_num() + 1; + file_var_sizes_.resize(num); + Status st = buff->read(&file_var_sizes_[0], num * sizeof(uint64_t)); + + if (!st.ok()) { + return LOG_STATUS(Status::FragmentMetadataError( + "Cannot load fragment metadata; Reading tile offsets failed")); + } + + return Status::Ok(); +} + // ===== FORMAT ===== // last_tile_cell_num (uint64_t) Status FragmentMetadata::load_last_tile_cell_num(ConstBuffer* buff) { @@ -1091,6 +1130,7 @@ Status FragmentMetadata::load_non_empty_domain_v2(ConstBuffer* buff) { st = buff->read(non_empty_domain_, domain_size); if (!st.ok()) { std::free(non_empty_domain_); + // non_empty_domain_ = nullptr; return LOG_STATUS(Status::FragmentMetadataError( "Cannot load fragment metadata; Reading domain failed")); } @@ -1126,6 +1166,7 @@ Status FragmentMetadata::load_non_empty_domain_v3(ConstBuffer* buff) { st = buff->read(non_empty_domain_, domain_size); if (!st.ok()) { std::free(non_empty_domain_); + non_empty_domain_ = nullptr; return LOG_STATUS(Status::FragmentMetadataError( "Cannot load fragment metadata; Reading domain failed")); } @@ -1146,6 +1187,7 @@ Status FragmentMetadata::load_non_empty_domain_v3(ConstBuffer* buff) { return Status::Ok(); } +// Applicable only to versions 1 and 2 Status FragmentMetadata::load_tile_offsets(ConstBuffer* buff) { Status st; uint64_t tile_offsets_num = 0; @@ -1182,8 +1224,7 @@ Status FragmentMetadata::load_tile_offsets(ConstBuffer* buff) { return Status::Ok(); } -Status FragmentMetadata::load_tile_offsets( - unsigned attr_id, ConstBuffer* buff) { +Status FragmentMetadata::load_tile_offsets(unsigned idx, ConstBuffer* buff) { Status st; uint64_t tile_offsets_num = 0; @@ -1197,9 +1238,9 @@ Status FragmentMetadata::load_tile_offsets( // Get tile offsets if (tile_offsets_num != 0) { - tile_offsets_[attr_id].resize(tile_offsets_num); - st = buff->read( - &tile_offsets_[attr_id][0], tile_offsets_num * sizeof(uint64_t)); + tile_offsets_[idx].resize(tile_offsets_num); + st = + buff->read(&tile_offsets_[idx][0], tile_offsets_num * sizeof(uint64_t)); if (!st.ok()) { return LOG_STATUS(Status::FragmentMetadataError( "Cannot load fragment metadata; Reading tile offsets failed")); @@ -1256,7 +1297,7 @@ Status FragmentMetadata::load_tile_var_offsets(ConstBuffer* buff) { } Status FragmentMetadata::load_tile_var_offsets( - unsigned attr_id, ConstBuffer* buff) { + unsigned idx, ConstBuffer* buff) { Status st; uint64_t tile_var_offsets_num = 0; @@ -1270,10 +1311,9 @@ Status FragmentMetadata::load_tile_var_offsets( // Get variable tile offsets if (tile_var_offsets_num != 0) { - tile_var_offsets_[attr_id].resize(tile_var_offsets_num); + tile_var_offsets_[idx].resize(tile_var_offsets_num); st = buff->read( - &tile_var_offsets_[attr_id][0], - tile_var_offsets_num * sizeof(uint64_t)); + &tile_var_offsets_[idx][0], tile_var_offsets_num * sizeof(uint64_t)); if (!st.ok()) { return LOG_STATUS(Status::FragmentMetadataError( "Cannot load fragment metadata; Reading variable tile offsets " @@ -1328,8 +1368,7 @@ Status FragmentMetadata::load_tile_var_sizes(ConstBuffer* buff) { return Status::Ok(); } -Status FragmentMetadata::load_tile_var_sizes( - unsigned attr_id, ConstBuffer* buff) { +Status FragmentMetadata::load_tile_var_sizes(unsigned idx, ConstBuffer* buff) { Status st; uint64_t tile_var_sizes_num = 0; @@ -1343,9 +1382,9 @@ Status FragmentMetadata::load_tile_var_sizes( // Get variable tile sizes if (tile_var_sizes_num != 0) { - tile_var_sizes_[attr_id].resize(tile_var_sizes_num); + tile_var_sizes_[idx].resize(tile_var_sizes_num); st = buff->read( - &tile_var_sizes_[attr_id][0], tile_var_sizes_num * sizeof(uint64_t)); + &tile_var_sizes_[idx][0], tile_var_sizes_num * sizeof(uint64_t)); if (!st.ok()) { return LOG_STATUS(Status::FragmentMetadataError( "Cannot load fragment metadata; Reading variable tile sizes " @@ -1379,22 +1418,17 @@ Status FragmentMetadata::create_rtree() { return Status::Ok(); } -Status FragmentMetadata::get_generic_tile_size( - uint64_t offset, uint64_t* size) { - URI fragment_metadata_uri = fragment_uri_.join_path( - std::string(constants::fragment_metadata_filename)); - TileIO tile_io(storage_manager_, fragment_metadata_uri); - TileIO::GenericTileHeader header; - RETURN_NOT_OK(tile_io.read_generic_tile_header( - storage_manager_, fragment_metadata_uri, offset, &header)); - - *size = TileIO::GenericTileHeader::BASE_SIZE + header.filter_pipeline_size + - header.persisted_size; +Status FragmentMetadata::load_generic_tile_offsets(ConstBuffer* buff) { + if (version_ == 3 || version_ == 4) + return load_generic_tile_offsets_v3_v4(buff); + else if (version_ > 4) + return load_generic_tile_offsets_v5_or_higher(buff); + assert(false); return Status::Ok(); } -Status FragmentMetadata::load_generic_tile_offsets(ConstBuffer* buff) { +Status FragmentMetadata::load_generic_tile_offsets_v3_v4(ConstBuffer* buff) { // Load R-Tree offset RETURN_NOT_OK(buff->read(>_offsets_.rtree_, sizeof(uint64_t))); @@ -1422,10 +1456,38 @@ Status FragmentMetadata::load_generic_tile_offsets(ConstBuffer* buff) { return Status::Ok(); } -Status FragmentMetadata::load_v2(const EncryptionKey& encryption_key) { +Status FragmentMetadata::load_generic_tile_offsets_v5_or_higher( + ConstBuffer* buff) { + // Load R-Tree offset + RETURN_NOT_OK(buff->read(>_offsets_.rtree_, sizeof(uint64_t))); + + // Load offsets for tile offsets + auto num = array_schema_->attribute_num() + array_schema_->dim_num() + 1; + gt_offsets_.tile_offsets_.resize(num); + for (unsigned i = 0; i < num; ++i) { + RETURN_NOT_OK(buff->read(>_offsets_.tile_offsets_[i], sizeof(uint64_t))); + } + + // Load offsets for tile var offsets + gt_offsets_.tile_var_offsets_.resize(num); + for (unsigned i = 0; i < num; ++i) { + RETURN_NOT_OK( + buff->read(>_offsets_.tile_var_offsets_[i], sizeof(uint64_t))); + } + + // Load offsets for tile var sizes + gt_offsets_.tile_var_sizes_.resize(num); + for (unsigned i = 0; i < num; ++i) { + RETURN_NOT_OK( + buff->read(>_offsets_.tile_var_sizes_[i], sizeof(uint64_t))); + } + + return Status::Ok(); +} + +Status FragmentMetadata::load_v1_v2(const EncryptionKey& encryption_key) { URI fragment_metadata_uri = fragment_uri_.join_path( std::string(constants::fragment_metadata_filename)); - // Read metadata TileIO tile_io(storage_manager_, fragment_metadata_uri); auto tile = (Tile*)nullptr; @@ -1454,7 +1516,8 @@ Status FragmentMetadata::load_v2(const EncryptionKey& encryption_key) { return Status::Ok(); } -Status FragmentMetadata::load_v3(const EncryptionKey& encryption_key) { +Status FragmentMetadata::load_v3_or_higher( + const EncryptionKey& encryption_key) { RETURN_NOT_OK(load_footer(encryption_key)); return Status::Ok(); } @@ -1479,16 +1542,16 @@ Status FragmentMetadata::load_footer(const EncryptionKey& encryption_key) { RETURN_NOT_OK(load_file_sizes(&cbuff)); RETURN_NOT_OK(load_file_var_sizes(&cbuff)); - tile_offsets_.resize(array_schema_->attribute_num() + 1); - tile_var_offsets_.resize(array_schema_->attribute_num()); - tile_var_sizes_.resize(array_schema_->attribute_num()); + unsigned num = array_schema_->attribute_num() + 1; + num += (version_ >= 5) ? array_schema_->dim_num() : 0; - loaded_metadata_.tile_offsets_.resize( - array_schema_->attribute_num() + 1, false); - loaded_metadata_.tile_var_offsets_.resize( - array_schema_->attribute_num(), false); - loaded_metadata_.tile_var_sizes_.resize( - array_schema_->attribute_num(), false); + tile_offsets_.resize(num); + tile_var_offsets_.resize(num); + tile_var_sizes_.resize(num); + + loaded_metadata_.tile_offsets_.resize(num, false); + loaded_metadata_.tile_var_offsets_.resize(num, false); + loaded_metadata_.tile_var_sizes_.resize(num, false); RETURN_NOT_OK(load_generic_tile_offsets(&cbuff)); @@ -1498,13 +1561,12 @@ Status FragmentMetadata::load_footer(const EncryptionKey& encryption_key) { } // ===== FORMAT ===== -// file_sizes_attr#0 (uint64_t) +// file_sizes#0 (uint64_t) // ... -// file_sizes_attr#attribute_num (uint64_t) +// file_sizes#{attribute_num+dim_num} (uint64_t) Status FragmentMetadata::write_file_sizes(Buffer* buff) { - unsigned int attribute_num = array_schema_->attribute_num(); - Status st = buff->write( - &next_tile_offsets_[0], (attribute_num + 1) * sizeof(uint64_t)); + auto num = array_schema_->attribute_num() + array_schema_->dim_num() + 1; + Status st = buff->write(&next_tile_offsets_[0], num * sizeof(uint64_t)); if (!st.ok()) { return LOG_STATUS(Status::FragmentMetadataError( "Cannot serialize fragment metadata; Writing file sizes failed")); @@ -1514,13 +1576,12 @@ Status FragmentMetadata::write_file_sizes(Buffer* buff) { } // ===== FORMAT ===== -// file_var_sizes_attr#0 (uint64_t) +// file_var_sizes#0 (uint64_t) // ... -// file_var_sizes_attr#attribute_num (uint64_t) +// file_var_sizes#{attribute_num+dim_num} (uint64_t) Status FragmentMetadata::write_file_var_sizes(Buffer* buff) { - unsigned int attribute_num = array_schema_->attribute_num(); - Status st = - buff->write(&next_tile_var_offsets_[0], attribute_num * sizeof(uint64_t)); + auto num = array_schema_->attribute_num() + array_schema_->dim_num() + 1; + Status st = buff->write(&next_tile_var_offsets_[0], num * sizeof(uint64_t)); if (!st.ok()) { return LOG_STATUS(Status::FragmentMetadataError( "Cannot serialize fragment metadata; Writing file sizes failed")); @@ -1531,17 +1592,17 @@ Status FragmentMetadata::write_file_var_sizes(Buffer* buff) { // ===== FORMAT ===== // rtree_offset(uint64_t) -// tile_offsets_offset_attr1(uint64_t) +// tile_offsets_offset_0(uint64_t) // ... -// tile_offsets_offset_attr_{attr_num+1}(uint64_t) -// tile_var_offsets_offset_attr1(uint64_t) +// tile_offsets_offset_{attr_num+dim_num}(uint64_t) +// tile_var_offsets_0(uint64_t) // ... -// tile_var_offsets_offset_attr_{attr_num}(uint64_t) -// tile_var_sizes_offset_attr1(uint64_t) +// tile_var_offsets_{attr_num+dim_num}(uint64_t) +// tile_var_sizes_0(uint64_t) // ... -// tile_var_sizes_offset_attr_{attr_num}(uint64_t) +// tile_var_sizes_{attr_num+dim_num}(uint64_t) Status FragmentMetadata::write_generic_tile_offsets(Buffer* buff) { - unsigned int attribute_num = array_schema_->attribute_num(); + auto num = array_schema_->attribute_num() + array_schema_->dim_num() + 1; // Write R-Tree offset auto st = buff->write(>_offsets_.rtree_, sizeof(uint64_t)); @@ -1551,7 +1612,7 @@ Status FragmentMetadata::write_generic_tile_offsets(Buffer* buff) { } // Write tile offsets - for (unsigned i = 0; i < attribute_num + 1; ++i) { + for (unsigned i = 0; i < num; ++i) { st = buff->write(>_offsets_.tile_offsets_[i], sizeof(uint64_t)); if (!st.ok()) { return LOG_STATUS(Status::FragmentMetadataError( @@ -1560,7 +1621,7 @@ Status FragmentMetadata::write_generic_tile_offsets(Buffer* buff) { } // Write tile var offsets - for (unsigned i = 0; i < attribute_num; ++i) { + for (unsigned i = 0; i < num; ++i) { st = buff->write(>_offsets_.tile_var_offsets_[i], sizeof(uint64_t)); if (!st.ok()) { return LOG_STATUS( @@ -1570,7 +1631,7 @@ Status FragmentMetadata::write_generic_tile_offsets(Buffer* buff) { } // Write tile var sizes - for (unsigned i = 0; i < attribute_num; ++i) { + for (unsigned i = 0; i < num; ++i) { st = buff->write(>_offsets_.tile_var_sizes_[i], sizeof(uint64_t)); if (!st.ok()) { return LOG_STATUS(Status::FragmentMetadataError( @@ -1606,41 +1667,12 @@ Status FragmentMetadata::store_rtree( Buffer buff; RETURN_NOT_OK(write_rtree(&buff)); RETURN_NOT_OK(write_generic_tile_to_file(encryption_key, &buff, nbytes)); - return Status::Ok(); } Status FragmentMetadata::write_rtree(Buffer* buff) { RETURN_NOT_OK(create_rtree()); RETURN_NOT_OK(rtree_.serialize(buff)); - - return Status::Ok(); -} - -// ===== FORMAT ===== -// mbr_num(uint64_t) -// mbr_#1(void*) mbr_#2(void*) ... -Status FragmentMetadata::write_mbrs(Buffer* buff) { - Status st; - uint64_t mbr_size = 2 * array_schema_->coords_size(); - uint64_t mbr_num = mbrs_.size(); - - // Write number of MBRs - st = buff->write(&mbr_num, sizeof(uint64_t)); - if (!st.ok()) { - return LOG_STATUS(Status::FragmentMetadataError( - "Cannot serialize fragment metadata; Writing number of MBRs failed")); - } - - // Write MBRs - for (uint64_t i = 0; i < mbr_num; ++i) { - st = buff->write(mbrs_[i], mbr_size); - if (!st.ok()) { - return LOG_STATUS(Status::FragmentMetadataError( - "Cannot serialize fragment metadata; Writing MBR failed")); - } - } - return Status::Ok(); } @@ -1732,19 +1764,19 @@ Status FragmentMetadata::write_file_footer(Buffer* buff) const { } Status FragmentMetadata::store_tile_offsets( - unsigned attr_id, const EncryptionKey& encryption_key, uint64_t* nbytes) { + unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes) { Buffer buff; - RETURN_NOT_OK(write_tile_offsets(attr_id, &buff)); + RETURN_NOT_OK(write_tile_offsets(idx, &buff)); RETURN_NOT_OK(write_generic_tile_to_file(encryption_key, &buff, nbytes)); return Status::Ok(); } -Status FragmentMetadata::write_tile_offsets(unsigned attr_id, Buffer* buff) { +Status FragmentMetadata::write_tile_offsets(unsigned idx, Buffer* buff) { Status st; // Write number of tile offsets - uint64_t tile_offsets_num = tile_offsets_[attr_id].size(); + uint64_t tile_offsets_num = tile_offsets_[idx].size(); st = buff->write(&tile_offsets_num, sizeof(uint64_t)); if (!st.ok()) { return LOG_STATUS(Status::FragmentMetadataError( @@ -1755,7 +1787,7 @@ Status FragmentMetadata::write_tile_offsets(unsigned attr_id, Buffer* buff) { // Write tile offsets if (tile_offsets_num != 0) { st = buff->write( - &tile_offsets_[attr_id][0], tile_offsets_num * sizeof(uint64_t)); + &tile_offsets_[idx][0], tile_offsets_num * sizeof(uint64_t)); if (!st.ok()) { return LOG_STATUS(Status::FragmentMetadataError( "Cannot serialize fragment metadata; Writing tile offsets failed")); @@ -1766,21 +1798,20 @@ Status FragmentMetadata::write_tile_offsets(unsigned attr_id, Buffer* buff) { } Status FragmentMetadata::store_tile_var_offsets( - unsigned attr_id, const EncryptionKey& encryption_key, uint64_t* nbytes) { + unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes) { Buffer buff; - RETURN_NOT_OK(write_tile_var_offsets(attr_id, &buff)); + RETURN_NOT_OK(write_tile_var_offsets(idx, &buff)); RETURN_NOT_OK(write_generic_tile_to_file(encryption_key, &buff, nbytes)); return Status::Ok(); } -Status FragmentMetadata::write_tile_var_offsets( - unsigned attr_id, Buffer* buff) { +Status FragmentMetadata::write_tile_var_offsets(unsigned idx, Buffer* buff) { Status st; // Write tile offsets for each attribute // Write number of offsets - uint64_t tile_var_offsets_num = tile_var_offsets_[attr_id].size(); + uint64_t tile_var_offsets_num = tile_var_offsets_[idx].size(); st = buff->write(&tile_var_offsets_num, sizeof(uint64_t)); if (!st.ok()) { return LOG_STATUS(Status::FragmentMetadataError( @@ -1791,8 +1822,7 @@ Status FragmentMetadata::write_tile_var_offsets( // Write tile offsets if (tile_var_offsets_num != 0) { st = buff->write( - &tile_var_offsets_[attr_id][0], - tile_var_offsets_num * sizeof(uint64_t)); + &tile_var_offsets_[idx][0], tile_var_offsets_num * sizeof(uint64_t)); if (!st.ok()) { return LOG_STATUS(Status::FragmentMetadataError( "Cannot serialize fragment metadata; Writing " @@ -1804,19 +1834,19 @@ Status FragmentMetadata::write_tile_var_offsets( } Status FragmentMetadata::store_tile_var_sizes( - unsigned attr_id, const EncryptionKey& encryption_key, uint64_t* nbytes) { + unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes) { Buffer buff; - RETURN_NOT_OK(write_tile_var_sizes(attr_id, &buff)); + RETURN_NOT_OK(write_tile_var_sizes(idx, &buff)); RETURN_NOT_OK(write_generic_tile_to_file(encryption_key, &buff, nbytes)); return Status::Ok(); } -Status FragmentMetadata::write_tile_var_sizes(unsigned attr_id, Buffer* buff) { +Status FragmentMetadata::write_tile_var_sizes(unsigned idx, Buffer* buff) { Status st; // Write number of sizes - uint64_t tile_var_sizes_num = tile_var_sizes_[attr_id].size(); + uint64_t tile_var_sizes_num = tile_var_sizes_[idx].size(); st = buff->write(&tile_var_sizes_num, sizeof(uint64_t)); if (!st.ok()) { return LOG_STATUS(Status::FragmentMetadataError( @@ -1827,7 +1857,7 @@ Status FragmentMetadata::write_tile_var_sizes(unsigned attr_id, Buffer* buff) { // Write tile sizes if (tile_var_sizes_num != 0) { st = buff->write( - &tile_var_sizes_[attr_id][0], tile_var_sizes_num * sizeof(uint64_t)); + &tile_var_sizes_[idx][0], tile_var_sizes_num * sizeof(uint64_t)); if (!st.ok()) { return LOG_STATUS( Status::FragmentMetadataError("Cannot serialize fragment metadata; " @@ -1869,6 +1899,16 @@ Status FragmentMetadata::store_footer(const EncryptionKey& encryption_key) { return Status::Ok(); } +void FragmentMetadata::clean_up() { + auto array_uri = this->array_uri(); + auto fragment_metadata_uri = + fragment_uri_.join_path(constants::fragment_metadata_filename); + + storage_manager_->close_file(fragment_metadata_uri); + storage_manager_->vfs()->remove_file(fragment_metadata_uri); + storage_manager_->array_xunlock(array_uri); +} + // Explicit template instantiations template Status FragmentMetadata::set_mbr( uint64_t tile, const void* mbr); @@ -1942,23 +1982,6 @@ template Status FragmentMetadata::add_max_buffer_sizes( std::unordered_map>* buffer_sizes); -template uint64_t FragmentMetadata::get_tile_pos( - const int8_t* tile_coords) const; -template uint64_t FragmentMetadata::get_tile_pos( - const uint8_t* tile_coords) const; -template uint64_t FragmentMetadata::get_tile_pos( - const int16_t* tile_coords) const; -template uint64_t FragmentMetadata::get_tile_pos( - const uint16_t* tile_coords) const; -template uint64_t FragmentMetadata::get_tile_pos( - const int* tile_coords) const; -template uint64_t FragmentMetadata::get_tile_pos( - const unsigned* tile_coords) const; -template uint64_t FragmentMetadata::get_tile_pos( - const int64_t* tile_coords) const; -template uint64_t FragmentMetadata::get_tile_pos( - const uint64_t* tile_coords) const; - template Status FragmentMetadata::get_tile_overlap( const EncryptionKey& encryption_key, const std::vector& range, diff --git a/tiledb/sm/fragment/fragment_metadata.h b/tiledb/sm/fragment/fragment_metadata.h index b0c20a5dfeb1..d795b479d9d5 100644 --- a/tiledb/sm/fragment/fragment_metadata.h +++ b/tiledb/sm/fragment/fragment_metadata.h @@ -188,17 +188,6 @@ class FragmentMetadata { const std::vector& range, TileOverlap* tile_overlap); - /** - * Given as input global tile coordinates, it retrieves the tile position - * within the fragment. - * - * @tparam T The domain type. - * @param tile_coords The global tile coordinates. - * @return The tile position in the fragment. - */ - template - uint64_t get_tile_pos(const T* tile_coords) const; - /** * Initializes the fragment metadata structures. * @@ -214,12 +203,12 @@ class FragmentMetadata { /** Loads the basic metadata from storage. */ Status load(const EncryptionKey& encryption_key); - /** Stores all the metadata to storage. */ - Status store(const EncryptionKey& encryption_key); - /** Returns the MBRs of the fragment. Used in format version <=2. */ const std::vector mbrs() const; + /** Stores all the metadata to storage. */ + Status store(const EncryptionKey& encryption_key); + /** Returns the non-empty domain in which the fragment is constrained. */ const void* non_empty_domain() const; @@ -275,39 +264,37 @@ class FragmentMetadata { void set_tile_index_base(uint64_t tile_base); /** - * Sets a tile offset for the input attribute. + * Sets a tile offset for the input attribute or dimension. * - * @param attribute The attribute for which the offset is set. - * @param tile The index of the tile for which the offset is set. + * @param name The attribute/dimension for which the offset is set. + * @param tid The index of the tile for which the offset is set. * @param step This is essentially the step by which the previous * offset will be expanded. It is practically the last tile size. * @return void */ - void set_tile_offset( - const std::string& attribute, uint64_t tile, uint64_t step); + void set_tile_offset(const std::string& name, uint64_t tid, uint64_t step); /** - * Sets a variable tile offset for the input attribute. + * Sets a variable tile offset for the input attribute or dimension. * - * @param attribute The attribute for which the offset is set. - * @param tile The index of the tile for which the offset is set. + * @param name The attribute/dimension for which the offset is set. + * @param tid The index of the tile for which the offset is set. * @param step This is essentially the step by which the previous * offset will be expanded. It is practically the last variable tile size. * @return void */ void set_tile_var_offset( - const std::string& attribute, uint64_t tile, uint64_t step); + const std::string& name, uint64_t tid, uint64_t step); /** - * Sets a variable tile size for the input attribute. + * Sets a variable tile size for the input attribute or dimension. * - * @param attribute The attribute for which the size is set. - * @param tile The index of the tile for which the offset is set. + * @param name The attribute/dimension for which the size is set. + * @param tid The index of the tile for which the offset is set. * @param size The size to be appended. * @return void */ - void set_tile_var_size( - const std::string& attribute, uint64_t tile, uint64_t size); + void set_tile_var_size(const std::string& name, uint64_t tid, uint64_t size); /** Returns the tile index base value. */ uint64_t tile_index_base() const; @@ -315,42 +302,42 @@ class FragmentMetadata { /** Returns the number of tiles in the fragment. */ uint64_t tile_num() const; - /** Returns the URI of the input attribute. */ - URI attr_uri(const std::string& attribute) const; + /** Returns the URI of the input attribute/dimension. */ + URI uri(const std::string& name) const; - /** Returns the URI of the input variable-sized attribute. */ - URI attr_var_uri(const std::string& attribute) const; + /** Returns the URI of the input variable-sized attribute/dimension. */ + URI var_uri(const std::string& name) const; /** - * Retrieves the starting offset of the input tile of input attribute - * in the file. If the attribute is var-sized, it returns the starting - * offset of the offsets tile. + * Retrieves the starting offset of the input tile of the input attribute + * or dimension in the file. If the attribute/dimension is var-sized, it + * returns the starting offset of the offsets tile. * * @param encryption_key The key the array got opened with. - * @param attribute The input attribute. + * @param name The input attribute/dimension. * @param tile_idx The index of the tile in the metadata. * @param offset The file offset to be retrieved. * @return Status */ Status file_offset( const EncryptionKey& encryption_key, - const std::string& attribute, + const std::string& name, uint64_t tile_idx, uint64_t* offset); /** - * Retrieves the starting offset of the input tile of input attribute - * in the file. The attribute must be var-sized. + * Retrieves the starting offset of the input tile of input attribute or + * dimension in the file. The attribute/dimension must be var-sized. * * @param encryption_key The key the array got opened with. - * @param attribute_id The input attribute. + * @param name The input attribute/dimension. * @param tile_idx The index of the tile in the metadata. * @param offset The file offset to be retrieved. * @return Status */ Status file_var_offset( const EncryptionKey& encryption_key, - const std::string& attribute, + const std::string& name, uint64_t tile_idx, uint64_t* offset); @@ -359,65 +346,63 @@ class FragmentMetadata { /** * Retrieves the size of the tile when it is persisted (e.g. the size of the - * compressed tile on disk) for a given attribute and tile index. If the - * attribute is var-sized, this will return the persisted size of the offsets - * tile. + * compressed tile on disk) for a given attribute or dimension and tile index. + * If the attribute/dimension is var-sized, this will return the persisted + * size of the offsets tile. * * @param encryption_key The key the array got opened with. - * @param attribute The input attribute. + * @param name The input attribute/dimension. * @param tile_idx The index of the tile in the metadata. * @param tile_size The tile size to be retrieved. * @return Status */ Status persisted_tile_size( const EncryptionKey& encryption_key, - const std::string& attribute, + const std::string& name, uint64_t tile_idx, uint64_t* tile_size); /** * Retrieves the size of the tile when it is persisted (e.g. the size of the - * compressed tile on disk) for a given var-sized attribute and tile index. + * compressed tile on disk) for a given var-sized attribute or dimension + * and tile index. * * @param encryption_key The key the array got opened with. - * @param attribute The inout attribute. + * @param name The input attribute/dimension. * @param tile_idx The index of the tile in the metadata. * @param tile_size The tile size to be retrieved. * @return Status */ Status persisted_tile_var_size( const EncryptionKey& encryption_key, - const std::string& attribute, + const std::string& name, uint64_t tile_idx, uint64_t* tile_size); - /** Retrieves the RTree. */ - Status rtree(const EncryptionKey& encryption_key, const RTree** rtree); - /** - * Returns the (uncompressed) tile size for a given attribute - * and tile index. If the attribute is var-sized, this will return + * Returns the (uncompressed) tile size for a given attribute or dimension + * and tile index. If the attribute/dimension is var-sized, this will return * the size of the offsets tile. * - * @param attribute The input attribute. + * @param name The input attribute/dimension. * @param tile_idx The index of the tile in the metadata. * @return The tile size. */ - uint64_t tile_size(const std::string& attribute, uint64_t tile_idx) const; + uint64_t tile_size(const std::string& name, uint64_t tile_idx) const; /** - * Retrieves the (uncompressed) tile size for a given var-sized attribute - * and tile index. + * Retrieves the (uncompressed) tile size for a given var-sized attribute or + * dimension and tile index. * * @param encryption_key The key the array got opened with. - * @param attribute The input attribute. + * @param name The input attribute/dimension. * @param tile_idx The index of the tile in the metadata. * @param tile_size The tile size to be retrieved. * @return Status */ Status tile_var_size( const EncryptionKey& encryption_key, - const std::string& attribute, + const std::string& name, uint64_t tile_idx, uint64_t* tile_size); @@ -469,14 +454,12 @@ class FragmentMetadata { /** The array schema */ const ArraySchema* array_schema_; - /** Maps an attribute to an index used in the various vector class members. */ - std::unordered_map attribute_idx_map_; - - /** Maps an attribute to its absolute URI within this fragment. */ - std::unordered_map attribute_uri_map_; - - /** Maps an attribute to its absolute '_var' URI within this fragment. */ - std::unordered_map attribute_var_uri_map_; + /** + * Maps an attribute or dimension to an index used in the various vector + * class members. Attributes are first, then TILEDB_COORDS, then the + * dimensions. + */ + std::unordered_map idx_map_; /** A vector storing the first and last coordinates of each tile. */ std::vector bounding_coords_; @@ -578,6 +561,24 @@ class FragmentMetadata { */ Status get_footer_offset_and_size(uint64_t* offset, uint64_t* size) const; + /** + * Retrieves the offset in the fragment metadata file of the footer + * (which contains the generic tile offsets) along with its size. + * + * Applicable to format versions 3 and 4. + */ + Status get_footer_offset_and_size_v3_v4( + uint64_t* offset, uint64_t* size) const; + + /** + * Retrieves the offset in the fragment metadata file of the footer + * (which contains the generic tile offsets) along with its size. + * + * Applicable to format version 5 or higher. + */ + Status get_footer_offset_and_size_v5_or_higher( + uint64_t* offset, uint64_t* size) const; + /** * Returns the ids (positions) of the tiles overlapping `subarray`. * Applicable only to dense arrays. @@ -613,21 +614,40 @@ class FragmentMetadata { /** Loads the R-tree from storage. */ Status load_rtree(const EncryptionKey& encryption_key); - /** Loads the tile offsets for the input attribute from storage. */ - Status load_tile_offsets( - const EncryptionKey& encryption_key, unsigned attr_id); + /** + * Loads the tile offsets for the input attribute or dimension idx + * from storage. + */ + Status load_tile_offsets(const EncryptionKey& encryption_key, unsigned idx); - /** Loads the variable tile offsets for the input attribute from storage. */ + /** + * Loads the variable tile offsets for the input attribute or dimension idx + * from storage. + */ Status load_tile_var_offsets( - const EncryptionKey& encryption_key, unsigned attr_id); + const EncryptionKey& encryption_key, unsigned idx); - /** Loads the variable tile sizes for the input attribute from storage. */ - Status load_tile_var_sizes( - const EncryptionKey& encryption_key, unsigned attr_id); + /** + * Loads the variable tile sizes for the input attribute or dimension idx + * from storage. + * */ + Status load_tile_var_sizes(const EncryptionKey& encryption_key, unsigned idx); /** Loads the generic tile offsets from the buffer. */ Status load_generic_tile_offsets(ConstBuffer* buff); + /** + * Loads the generic tile offsets from the buffer. Applicable to + * versions 4 and 5. + */ + Status load_generic_tile_offsets_v3_v4(ConstBuffer* buff); + + /** + * Loads the generic tile offsets from the buffer. Applicable to + * versions 5 or higher. + */ + Status load_generic_tile_offsets_v5_or_higher(ConstBuffer* buff); + /** * Loads the bounding coordinates from the fragment metadata buffer. * @@ -636,12 +656,39 @@ class FragmentMetadata { */ Status load_bounding_coords(ConstBuffer* buff); - /** Loads the sizes of each attribute file from the buffer. */ + /** Loads the sizes of each attribute or dimension file from the buffer. */ Status load_file_sizes(ConstBuffer* buff); - /** Loads the sizes of each variable attribute file from the buffer. */ + /** + * Loads the sizes of each attribute or dimension file from the buffer. + * Applicable to format versions 1 to 4. + */ + Status load_file_sizes_v1_v4(ConstBuffer* buff); + + /** + * Loads the sizes of each attribute or dimension file from the buffer. + * Applicable to format version 5 or higher. + */ + Status load_file_sizes_v5_or_higher(ConstBuffer* buff); + + /** + * Loads the sizes of each variable attribute or dimension file from the + * buffer. + */ Status load_file_var_sizes(ConstBuffer* buff); + /** + * Loads the sizes of each variable attribute or dimension file from the + * buffer. Applicable to version 1 to 4. + */ + Status load_file_var_sizes_v1_v4(ConstBuffer* buff); + + /** + * Loads the sizes of each variable attribute or dimension file from the + * buffer. Applicable to version 5 or higher. + */ + Status load_file_var_sizes_v5_or_higher(ConstBuffer* buff); + /** * Loads the cell number of the last tile from the fragment metadata buffer. * @@ -658,67 +705,53 @@ class FragmentMetadata { */ Status load_mbrs(ConstBuffer* buff); - /** - * Loads the non-empty domain from the fragment metadata buffer. - * - * @param buff Metadata buffer. - * @return Status - */ + /** Loads the non-empty domain from the input buffer. */ Status load_non_empty_domain(ConstBuffer* buff); /** - * Loads the non-empty domain from the fragment metadata buffer, + * Loads the non-empty domain from the input buffer, * for format versions <= 2. - * - * @param buff Metadata buffer. - * @return Status */ Status load_non_empty_domain_v2(ConstBuffer* buff); /** - * Loads the non-empty domain from the fragment metadata buffer, + * Loads the non-empty domain from the input buffer, * for format versions >= 3. - * - * @param buff Metadata buffer. - * @return Status */ Status load_non_empty_domain_v3(ConstBuffer* buff); /** * Loads the tile offsets for the input attribute from the input buffer. + * Applicable to versions 1 and 2 */ Status load_tile_offsets(ConstBuffer* buff); /** - * Loads the tile offsets for the input attribute from the input buffer. + * Loads the tile offsets for the input attribute or dimension from the + * input buffer. */ - Status load_tile_offsets(unsigned attr_id, ConstBuffer* buff); + Status load_tile_offsets(unsigned idx, ConstBuffer* buff); /** - * Loads the variable tile offsets from the fragment metadata buffer. - * - * @param buff Metadata buffer. - * @return Status + * Loads the variable tile offsets from the input buffer. + * Applicable to versions 1 and 2 */ Status load_tile_var_offsets(ConstBuffer* buff); /** - * Loads the variable tile offsets for the input attribute from the buffer. + * Loads the variable tile offsets for the input attribute or dimension from + * the input buffer. */ - Status load_tile_var_offsets(unsigned attr_id, ConstBuffer* buff); + Status load_tile_var_offsets(unsigned idx, ConstBuffer* buff); - /** - * Loads the variable tile sizes from the fragment metadata. - * - * @param buff Metadata buffer. - * @return Status - */ + /** Loads the variable tile sizes from the input buffer. */ Status load_tile_var_sizes(ConstBuffer* buff); /** - * Loads the variable tile sizes for the input attribute from the buffer. + * Loads the variable tile sizes for the input attribute or dimension + * from the input buffer. */ - Status load_tile_var_sizes(unsigned attr_id, ConstBuffer* buff); + Status load_tile_var_sizes(unsigned idx, ConstBuffer* buff); /** Loads the format version from the buffer. */ Status load_version(ConstBuffer* buff); @@ -729,16 +762,11 @@ class FragmentMetadata { /** Loads the number of sparse tiles from the buffer. */ Status load_sparse_tile_num(ConstBuffer* buff); - /** - * Retrieves the size of the generic tile starting at the input offset. - */ - Status get_generic_tile_size(uint64_t offset, uint64_t* size); - /** Loads the basic metadata from storage (version 2 or before). */ - Status load_v2(const EncryptionKey& encryption_key); + Status load_v1_v2(const EncryptionKey& encryption_key); - /** Loads the basic metadata from storage (version 3). */ - Status load_v3(const EncryptionKey& encryption_key); + /** Loads the basic metadata from storage (version 3 or after). */ + Status load_v3_or_higher(const EncryptionKey& encryption_key); /** * Loads the footer of the metadata file, which contains @@ -775,50 +803,61 @@ class FragmentMetadata { /** Writes the R-tree to the input buffer. */ Status write_rtree(Buffer* buff); - /** Writes the MBRs to the input buffer. */ - Status write_mbrs(Buffer* buff); - /** Writes the non-empty domain to the input buffer. */ Status write_non_empty_domain(Buffer* buff); /** - * Writes the tile offsets of the input attribute to storage. + * Writes the tile offsets of the input attribute or dimension to storage. * + * @param idx The index of the attribute or dimension. * @param encryption_key The encryption key. * @param nbytes The total number of bytes written for the tile offsets. * @return Status */ Status store_tile_offsets( - unsigned attr_id, const EncryptionKey& encryption_key, uint64_t* nbytes); + unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes); - /** Writes the tile offsets of the input attribut$ to the input buffer. */ - Status write_tile_offsets(unsigned attr_id, Buffer* buff); + /** + * Writes the tile offsets of the input attribute or dimension idx to the + * input buffer. + */ + Status write_tile_offsets(unsigned idx, Buffer* buff); /** - * Writes the variable tile offsets of the input attribute to storage. + * Writes the variable tile offsets of the input attribute or dimension + * to storage. * + * @param idx The index of the attribute or dimension. * @param encryption_key The encryption key. * @param nbytes The total number of bytes written for the tile var offsets. * @return Status */ Status store_tile_var_offsets( - unsigned attr_id, const EncryptionKey& encryption_key, uint64_t* nbytes); + unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes); - /** Writes the variable tile offsets of the input attribute to the buffer. */ - Status write_tile_var_offsets(unsigned attr_id, Buffer* buff); + /** + * Writes the variable tile offsets of the input attribute or dimension idx + * to the buffer. + */ + Status write_tile_var_offsets(unsigned idx, Buffer* buff); /** - * Writes the variable tile sizes for the input attribute to the buffer. + * Writes the variable tile sizes for the input attribute or dimension to + * the buffer. * + * @param idx The index of the attribute or dimension. * @param encryption_key The encryption key. * @param nbytes The total number of bytes written for the tile var sizes. * @return Status */ Status store_tile_var_sizes( - unsigned attr_id, const EncryptionKey& encryption_key, uint64_t* nbytes); + unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes); - /** Writes the variable tile sizes to storage. */ - Status write_tile_var_sizes(unsigned attr_id, Buffer* buff); + /** + * Writes the variable tile sizes for the input attribute or dimension + * to storage. + */ + Status write_tile_var_sizes(unsigned idx, Buffer* buff); /** Writes the format version to the buffer. */ Status write_version(Buffer* buff); @@ -863,6 +902,12 @@ class FragmentMetadata { * number of attributes). */ Status write_file_footer(Buffer* buff) const; + + /** + * Simple clean up function called in the case of error. It removes the + * fragment metadata file and unlocks the array. + */ + void clean_up(); }; } // namespace sm diff --git a/tiledb/sm/misc/constants.cc b/tiledb/sm/misc/constants.cc index 3131e94c97ea..cf7b57e16ec5 100644 --- a/tiledb/sm/misc/constants.cc +++ b/tiledb/sm/misc/constants.cc @@ -428,7 +428,7 @@ const int32_t library_version[3] = { TILEDB_VERSION_MAJOR, TILEDB_VERSION_MINOR, TILEDB_VERSION_PATCH}; /** The TileDB serialization format version number. */ -const uint32_t format_version = 4; +const uint32_t format_version = 5; /** The maximum size of a tile chunk (unit of compression) in bytes. */ const uint64_t max_tile_chunk_size = 64 * 1024; diff --git a/tiledb/sm/misc/utils.cc b/tiledb/sm/misc/utils.cc index 34aa80449886..275f0bbfd079 100644 --- a/tiledb/sm/misc/utils.cc +++ b/tiledb/sm/misc/utils.cc @@ -253,11 +253,23 @@ std::pair get_timestamp_range( return ret; } -Status get_fragment_name_version( - const std::string& fragment_name, uint32_t* version) { - auto t_str = fragment_name.substr(fragment_name.find_last_of('_') + 1); +Status get_fragment_name_version(const URI& uri, uint32_t* version) { + // Prepare fragment name string + std::string uri_str = uri.c_str(); + if (uri_str.back() == '/') + uri_str.pop_back(); + std::string name = URI(uri_str).last_path_part(); + + // First check if it is in version 3, which has 5 '_' in the name + size_t n = std::count(name.begin(), name.end(), '_'); + if (n == 5) { + *version = 3; + return Status::Ok(); + } - // The newest version has the 32-byte long UUID at the end + // Check if it is in version 1 or 2 + // Version 2 has the 32-byte long UUID at the end + auto t_str = name.substr(name.find_last_of('_') + 1); *version = (t_str.size() == 32) ? 2 : 1; return Status::Ok(); diff --git a/tiledb/sm/misc/utils.h b/tiledb/sm/misc/utils.h index 5e16ef8b9c88..47088a4df01c 100644 --- a/tiledb/sm/misc/utils.h +++ b/tiledb/sm/misc/utils.h @@ -105,11 +105,15 @@ std::pair get_timestamp_range( uint32_t version, const std::string& fragment_name); /** - * Retrieves the fragment name version. Version 1 corresponds to format - * version <=2, and 2 to format version > 2. + * Retrieves the fragment name version. + * - Version 1 corresponds to format versions 1 and 2 + * * __uuid_{_t2} + * - Version 2 corresponds to version 3 and 4 + * * __t1_t2_uuid + * - Version 3 corresponds to version 5 or higher + * * __t1_t2_uuid_version */ -Status get_fragment_name_version( - const std::string& fragment_name, uint32_t* version); +Status get_fragment_name_version(const URI& uri, uint32_t* version); /** Returns `true` if the input string is a (potentially signed) integer. */ bool is_int(const std::string& str); diff --git a/tiledb/sm/query/reader.cc b/tiledb/sm/query/reader.cc index 40fffabcb9f1..1978f459a933 100644 --- a/tiledb/sm/query/reader.cc +++ b/tiledb/sm/query/reader.cc @@ -1294,17 +1294,30 @@ Status Reader::compute_result_coords( RETURN_CANCEL_OR_ERROR(compute_sparse_result_tiles( result_tiles, &result_tile_map, &single_fragment)); + if (result_tiles->empty()) + return Status::Ok(); + // Create temporary vector with pointers to result tiles, so that - // `read_tiles`, `filter_tiles` and `clear_tiles` below can work without - // changes + // `read_tiles`, `filter_tiles` below can work without changes std::vector tmp_result_tiles; for (auto& result_tile : *result_tiles) tmp_result_tiles.push_back(&result_tile); // Read and filter coordinate tiles + // NOTE: these will ignore tiles of fragments with format version >=5 RETURN_CANCEL_OR_ERROR(read_tiles(constants::coords, tmp_result_tiles)); RETURN_CANCEL_OR_ERROR(filter_tiles(constants::coords, tmp_result_tiles)); + // Read and filter coordinate tiles + // NOTE: these will ignore tiles of fragments with format version <5 + auto dim_num = array_schema_->dim_num(); + for (unsigned d = 0; d < dim_num; ++d) { + const auto& dim_name = array_schema_->dimension(d)->name(); + RETURN_CANCEL_OR_ERROR(read_tiles(dim_name, tmp_result_tiles)); + RETURN_CANCEL_OR_ERROR(filter_tiles(dim_name, tmp_result_tiles)); + } + RETURN_CANCEL_OR_ERROR(zip_coord_tiles(tmp_result_tiles)); + // Compute the read coordinates for all fragments for each subarray range std::vector>> range_result_coords; RETURN_CANCEL_OR_ERROR(compute_range_result_coords( @@ -1549,39 +1562,50 @@ Status Reader::filter_tiles( auto statuses = parallel_for(0, num_tiles, [&, this](uint64_t i) { auto& tile = result_tiles[i]; - auto it = tile->attr_tiles_.find(attribute); - // Skip non-existent attributes (e.g. coords in the dense case). - if (it == tile->attr_tiles_.end()) - return Status::Ok(); - - // Get information about the tile in its fragment auto& fragment = fragment_metadata_[tile->frag_idx_]; - auto tile_attr_uri = fragment->attr_uri(attribute); - uint64_t tile_attr_offset; - RETURN_NOT_OK(fragment->file_offset( - *encryption_key, attribute, tile->tile_idx_, &tile_attr_offset)); - - auto& tile_pair = it->second; - auto& t = tile_pair.first; - auto& t_var = tile_pair.second; + auto format_version = fragment->format_version(); - if (!t.filtered()) { - // Decompress, etc. - RETURN_NOT_OK(filter_tile(attribute, &t, var_size)); - RETURN_NOT_OK(storage_manager_->write_to_cache( - tile_attr_uri, tile_attr_offset, t.buffer())); - } + // Applicable for zipped coordinates only to versions < 5 + // Applicable for separate coordinates only to version >= 5 + if (attribute != constants::coords || + (attribute == constants::coords && format_version < 5) || + (array_schema_->is_dim(attribute) && format_version >= 5)) { + auto it = tile->attr_tiles_.find(attribute); + // Skip non-existent attributes (e.g. coords in the dense case). + if (it == tile->attr_tiles_.end()) + return Status::Ok(); - if (var_size && !t_var.filtered()) { - auto tile_attr_var_uri = fragment->attr_var_uri(attribute); - uint64_t tile_attr_var_offset; - RETURN_NOT_OK(fragment->file_var_offset( - *encryption_key, attribute, tile->tile_idx_, &tile_attr_var_offset)); + // Get information about the tile in its fragment + auto tile_attr_uri = fragment->uri(attribute); + uint64_t tile_attr_offset; + RETURN_NOT_OK(fragment->file_offset( + *encryption_key, attribute, tile->tile_idx_, &tile_attr_offset)); + + auto& tile_pair = it->second; + auto& t = tile_pair.first; + auto& t_var = tile_pair.second; + + if (!t.filtered()) { + // Decompress, etc. + RETURN_NOT_OK(filter_tile(attribute, &t, var_size)); + RETURN_NOT_OK(storage_manager_->write_to_cache( + tile_attr_uri, tile_attr_offset, t.buffer())); + } - // Decompress, etc. - RETURN_NOT_OK(filter_tile(attribute, &t_var, false)); - RETURN_NOT_OK(storage_manager_->write_to_cache( - tile_attr_var_uri, tile_attr_var_offset, t_var.buffer())); + if (var_size && !t_var.filtered()) { + auto tile_attr_var_uri = fragment->var_uri(attribute); + uint64_t tile_attr_var_offset; + RETURN_NOT_OK(fragment->file_var_offset( + *encryption_key, + attribute, + tile->tile_idx_, + &tile_attr_var_offset)); + + // Decompress, etc. + RETURN_NOT_OK(filter_tile(attribute, &t_var, false)); + RETURN_NOT_OK(storage_manager_->write_to_cache( + tile_attr_var_uri, tile_attr_var_offset, t_var.buffer())); + } } return Status::Ok(); @@ -1596,18 +1620,13 @@ Status Reader::filter_tiles( } Status Reader::filter_tile( - const std::string& attribute, Tile* tile, bool offsets) const { + const std::string& name, Tile* tile, bool offsets) const { uint64_t orig_size = tile->buffer()->size(); // Get a copy of the appropriate filter pipeline. - FilterPipeline filters; - if (tile->stores_coords()) { - filters = *array_schema_->coords_filters(); - } else if (offsets) { - filters = *array_schema_->cell_var_offsets_filters(); - } else { - filters = *array_schema_->filters(attribute); - } + FilterPipeline filters = + (offsets ? *array_schema_->cell_var_offsets_filters() : + *array_schema_->filters(name)); // Append an encryption filter when necessary. RETURN_NOT_OK(FilterPipeline::append_encryption_filter( @@ -1769,6 +1788,17 @@ Status Reader::read_tiles( std::map>> all_regions; for (uint64_t i = 0; i < num_tiles; i++) { auto& tile = result_tiles[i]; + auto& fragment = fragment_metadata_[tile->frag_idx_]; + auto format_version = fragment->format_version(); + + // Applicable for zipped coordinates only to versions < 5 + if (attribute == constants::coords && format_version >= 5) + continue; + + // Applicable to separate coordinates only to versions >= 5 + if (array_schema_->is_dim(attribute) && format_version < 5) + continue; + auto it = tile->attr_tiles_.find(attribute); if (it == tile->attr_tiles_.end()) it = tile->attr_tiles_ @@ -1780,8 +1810,6 @@ Status Reader::read_tiles( auto& tile_pair = it->second; auto& t = tile_pair.first; auto& t_var = tile_pair.second; - auto& fragment = fragment_metadata_[tile->frag_idx_]; - auto format_version = fragment->format_version(); if (!var_size) { RETURN_NOT_OK(init_tile(format_version, attribute, &t)); } else { @@ -1789,7 +1817,7 @@ Status Reader::read_tiles( } // Get information about the tile in its fragment - auto tile_attr_uri = fragment->attr_uri(attribute); + auto tile_attr_uri = fragment->uri(attribute); uint64_t tile_attr_offset; RETURN_NOT_OK(fragment->file_offset( *encryption_key, attribute, tile->tile_idx_, &tile_attr_offset)); @@ -1817,7 +1845,7 @@ Status Reader::read_tiles( } if (var_size) { - auto tile_attr_var_uri = fragment->attr_var_uri(attribute); + auto tile_attr_var_uri = fragment->var_uri(attribute); uint64_t tile_attr_var_offset; RETURN_NOT_OK(fragment->file_var_offset( *encryption_key, attribute, tile->tile_idx_, &tile_attr_var_offset)); @@ -2011,6 +2039,45 @@ bool Reader::coords_overwritten(unsigned frag_idx, const T* coords) const { return false; } +Status Reader::zip_coord_tiles( + const std::vector& tmp_result_tiles) const { + // Initialize zipped coordinate tiles + std::map>> all_regions; + for (auto& tile : tmp_result_tiles) { + auto it = tile->attr_tiles_.find(constants::coords); + if (it == tile->attr_tiles_.end()) + tile->attr_tiles_.insert(std::pair( + constants::coords, ResultTile::TilePair(Tile(), Tile()))); + } + + // Zip coordinate tiles + auto tile_num = (uint64_t)tmp_result_tiles.size(); + auto dim_num = array_schema_->dim_num(); + auto statuses = parallel_for(0, tile_num, [&](uint64_t t) { + const auto& fragment = fragment_metadata_[tmp_result_tiles[t]->frag_idx_]; + auto format_version = fragment->format_version(); + if (format_version >= 5) { // Applicable only to version >= 5 + auto& new_tile = + tmp_result_tiles[t]->attr_tiles_[constants::coords].first; + RETURN_NOT_OK(init_tile(format_version, constants::coords, &new_tile)); + for (unsigned d = 0; d < dim_num; ++d) { + const auto& dim_name = array_schema_->dimension(d)->name(); + const auto& coord_tile = + tmp_result_tiles[t]->attr_tiles_[dim_name].first; + new_tile.write(coord_tile); + tmp_result_tiles[t]->attr_tiles_.erase(dim_name); + } + new_tile.zip_coordinates(); + } + return Status::Ok(); + }); + + for (const auto& st : statuses) + RETURN_CANCEL_OR_ERROR(st); + + return Status::Ok(); +} + // Explicit template instantiations template void Reader::compute_result_space_tiles( const std::vector>& tile_coords, diff --git a/tiledb/sm/query/reader.h b/tiledb/sm/query/reader.h index 8ca8c1bf6087..fde33aa1da49 100644 --- a/tiledb/sm/query/reader.h +++ b/tiledb/sm/query/reader.h @@ -897,17 +897,17 @@ class Reader { const std::vector& result_tiles) const; /** - * Runs the input tile for the input attribute through the filter pipeline. - * The tile buffer is modified to contain the output of the pipeline. + * Runs the input tile for the input attribute or dimension through the + * filter pipeline. The tile buffer is modified to contain the output of the + * pipeline. * - * @param attribute The attribute the tile belong to. + * @param name The attribute/dimension the tile belong to. * @param tile The tile to be filtered. * @param offsets True if the tile to be filtered contains offsets for a - * var-sized attribute. + * var-sized attribute/dimension. * @return Status */ - Status filter_tile( - const std::string& attribute, Tile* tile, bool offsets) const; + Status filter_tile(const std::string& name, Tile* tile, bool offsets) const; /** * Gets all the result coordinates of the input tile into `result_coords`. @@ -1028,6 +1028,13 @@ class Reader { */ template bool coords_overwritten(unsigned frag_idx, const T* coords) const; + + /** + * Creates zipped coordinate tiles for TILEDB_COORDS. This is for backwards + * compatibility; it will be removed in a subsequent PR. + */ + Status zip_coord_tiles( + const std::vector& tmp_result_tiles) const; }; } // namespace sm diff --git a/tiledb/sm/query/writer.cc b/tiledb/sm/query/writer.cc index 27ad246a1640..28dd52740969 100644 --- a/tiledb/sm/query/writer.cc +++ b/tiledb/sm/query/writer.cc @@ -56,8 +56,9 @@ Writer::Writer() { array_schema_ = nullptr; coords_buffer_ = nullptr; coords_buffer_size_ = nullptr; - coord_buffers_alloced_ = false; coords_num_ = 0; + has_coords_ = false; + coord_buffer_is_set_ = false; global_write_state_.reset(nullptr); initialized_ = false; layout_ = Layout::ROW_MAJOR; @@ -80,22 +81,18 @@ const ArraySchema* Writer::array_schema() const { std::vector Writer::buffer_names() const { std::vector ret; - const size_t ret_size = - attr_buffers_.size() + (coords_buffer_ ? 1 : coord_buffers_.size()); - ret.reserve(ret_size); - // Attributes - for (const auto& it : attr_buffers_) - ret.push_back(it.first); - - // Coordinates - if (coords_buffer_ != nullptr) { - ret.push_back(constants::coords); - } else { - for (const auto& it : coord_buffers_) + // Add to the buffers names the attributes, as well as the dimensions only if + // coords_buffer_ has not been set + for (const auto& it : buffers_) { + if (!array_schema_->is_dim(it.first) || (!coords_buffer_)) ret.push_back(it.first); } + // Special zipped coordinates name + if (coords_buffer_) + ret.push_back(constants::coords); + return ret; } @@ -104,15 +101,10 @@ QueryBuffer Writer::buffer(const std::string& name) const { if (name == constants::coords) return QueryBuffer(coords_buffer_, nullptr, coords_buffer_size_, nullptr); - // Attribute - auto attr_buf = attr_buffers_.find(name); - if (attr_buf != attr_buffers_.end()) - return attr_buf->second; - - // Dimension - auto coord_buf = coord_buffers_.find(name); - if (coord_buf != coord_buffers_.end()) - return coord_buf->second; + // Attribute or dimension + auto buf = buffers_.find(name); + if (buf != buffers_.end()) + return buf->second; // Named buffer does not exist return QueryBuffer{}; @@ -133,19 +125,11 @@ Status Writer::get_buffer( return Status::Ok(); } - // Attribute - auto attr_it = attr_buffers_.find(name); - if (attr_it != attr_buffers_.end()) { - *buffer = attr_it->second.buffer_; - *buffer_size = attr_it->second.buffer_size_; - return Status::Ok(); - } - - // Dimension - auto coord_it = coord_buffers_.find(name); - if (coord_it != coord_buffers_.end()) { - *buffer = coord_it->second.buffer_; - *buffer_size = coord_it->second.buffer_size_; + // Attribute or dimension + auto it = buffers_.find(name); + if (it != buffers_.end()) { + *buffer = it->second.buffer_; + *buffer_size = it->second.buffer_size_; return Status::Ok(); } @@ -162,23 +146,13 @@ Status Writer::get_buffer( uint64_t** buffer_off_size, void** buffer_val, uint64_t** buffer_val_size) const { - // Attribute - auto attr_it = attr_buffers_.find(name); - if (attr_it != attr_buffers_.end()) { - *buffer_off = (uint64_t*)attr_it->second.buffer_; - *buffer_off_size = attr_it->second.buffer_size_; - *buffer_val = attr_it->second.buffer_var_; - *buffer_val_size = attr_it->second.buffer_var_size_; - return Status::Ok(); - } - - // Dimension - auto coord_it = coord_buffers_.find(name); - if (coord_it != coord_buffers_.end()) { - *buffer_off = (uint64_t*)coord_it->second.buffer_; - *buffer_off_size = coord_it->second.buffer_size_; - *buffer_val = coord_it->second.buffer_var_; - *buffer_val_size = coord_it->second.buffer_var_size_; + // Attribute or dimension + auto it = buffers_.find(name); + if (it != buffers_.end()) { + *buffer_off = (uint64_t*)it->second.buffer_; + *buffer_off_size = it->second.buffer_size_; + *buffer_val = it->second.buffer_var_; + *buffer_val_size = it->second.buffer_var_size_; return Status::Ok(); } @@ -223,7 +197,7 @@ Status Writer::init() { if (array_schema_ == nullptr) return LOG_STATUS( Status::WriterError("Cannot initialize query; Array schema not set")); - if (attr_buffers_.empty()) + if (buffers_.empty()) return LOG_STATUS( Status::WriterError("Cannot initialize query; Buffers not set")); @@ -250,11 +224,6 @@ Status Writer::init() { dedup_coords_ = !strcmp(dedup_coords, "true"); initialized_ = true; - auto dim_num = array_schema_->dim_num(); - coord_sizes_.resize(dim_num); - for (unsigned d = 0; d < dim_num; ++d) - coord_sizes_[d] = array_schema_->dimension(d)->coord_size(); - return Status::Ok(); } @@ -282,20 +251,58 @@ Status Writer::set_buffer( return LOG_STATUS( Status::WriterError("Cannot set buffer; Array schema not set")); - // Invoke the appropriate fuinction based on the buffer name - if (name == constants::coords) { + // Set special function for zipped coordinates buffer + if (name == constants::coords) return set_coords_buffer(buffer, buffer_size); - } else if (array_schema_->attribute(name) != nullptr) { - return set_attr_buffer(name, buffer, buffer_size); - } else if (array_schema_->dimension(name) != nullptr) { - return set_coord_buffer(name, buffer, buffer_size); - } else { + + // For easy reference + bool is_dim = array_schema_->is_dim(name); + bool is_attr = array_schema_->is_attr(name); + + // Neither a dimension nor an attribute + if (!is_dim && !is_attr) return LOG_STATUS(Status::WriterError( std::string("Cannot set buffer; Invalid buffer name '") + name + - "' (it should " - "be an attribute or dimension)")); + "' (it should be an attribute or dimension)")); + + // Error if it is var-sized + bool var_size = (array_schema_->var_size(name)); + if (var_size) + return LOG_STATUS(Status::WriterError( + std::string("Cannot set buffer; Input attribute/dimension '") + name + + "' is var-sized")); + + // Error if setting a new attribute/dimension after initialization + bool exists = buffers_.find(name) != buffers_.end(); + if (initialized_ && !exists) + return LOG_STATUS(Status::WriterError( + std::string("Cannot set buffer for new attribute/dimension '") + name + + "' after initialization")); + + // Check if zipped coordinates buffer is set + if (is_dim && coords_buffer_ != nullptr) + return LOG_STATUS(Status::WriterError( + std::string("Cannot set separate coordinates buffer after having " + "set the zipped coordinates buffer"))); + + if (is_dim) { + // Check number of coordinates + uint64_t coords_num = *buffer_size / array_schema_->cell_size(name); + if (coord_buffer_is_set_ && coords_num != coords_num_) + return LOG_STATUS(Status::WriterError( + std::string("Cannot set buffer; Input buffer for dimension '") + + name + + "' has a different number of coordinates than previously " + "set coordinate buffers")); + + coords_num_ = coords_num; + coord_buffer_is_set_ = true; + has_coords_ = true; } + // Set attribute/dimension buffer + buffers_[name] = QueryBuffer(buffer, nullptr, buffer_size, nullptr); + return Status::Ok(); } @@ -316,27 +323,47 @@ Status Writer::set_buffer( return LOG_STATUS( Status::WriterError("Cannot set buffer; Array schema not set")); - // Check that attribute exists - if (name != constants::coords && array_schema_->attribute(name) == nullptr) - return LOG_STATUS( - Status::WriterError("Cannot set buffer; Invalid attribute")); + // For easy reference + bool is_dim = array_schema_->is_dim(name); + bool is_attr = array_schema_->is_attr(name); + + // Neither a dimension nor an attribute + if (!is_dim && !is_attr) + return LOG_STATUS(Status::WriterError( + std::string("Cannot set buffer; Invalid buffer name '") + name + + "' (it should be an attribute or dimension)")); - // Check that attribute is var-sized - bool var_size = (name != constants::coords && array_schema_->var_size(name)); + // Error if it is fixed-sized + bool var_size = (array_schema_->var_size(name)); if (!var_size) return LOG_STATUS(Status::WriterError( - std::string("Cannot set buffer; Input attribute '") + name + + std::string("Cannot set buffer; Input attribute/dimension '") + name + "' is fixed-sized")); - // Error if setting a new attribute after initialization - bool attr_exists = attr_buffers_.find(name) != attr_buffers_.end(); - if (initialized_ && !attr_exists) + // Error if setting a new attribute/dimension after initialization + bool exists = buffers_.find(name) != buffers_.end(); + if (initialized_ && !exists) return LOG_STATUS(Status::WriterError( - std::string("Cannot set buffer for new attribute '") + name + + std::string("Cannot set buffer for new attribute/dimension '") + name + "' after initialization")); - // Set attribute buffer - attr_buffers_[name] = + if (is_dim) { + // Check number of coordinates + uint64_t coords_num = *buffer_off_size / constants::cell_var_offset_size; + if (coord_buffer_is_set_ && coords_num != coords_num_) + return LOG_STATUS(Status::WriterError( + std::string("Cannot set buffer; Input buffer for dimension '") + + name + + "' has a different number of coordinates than previously " + "set coordinate buffers")); + + coords_num_ = coords_num; + coord_buffer_is_set_ = true; + has_coords_ = true; + } + + // Set attribute/dimension buffer + buffers_[name] = QueryBuffer(buffer_off, buffer_val, buffer_off_size, buffer_val_size); return Status::Ok(); @@ -448,29 +475,24 @@ void Writer::add_written_fragment_info(const URI& uri) { } Status Writer::check_buffer_names() { - bool has_coords = !coord_buffers_.empty() || coords_buffer_ != nullptr; - // If the array is sparse, the coordinates must be provided - if (!array_schema_->dense() && !has_coords) + if (!array_schema_->dense() && !has_coords_) return LOG_STATUS( Status::WriterError("Sparse array writes expect the coordinates of the " "cells to be written")); // If the layout is unordered, the coordinates must be provided - if (layout_ == Layout::UNORDERED && !has_coords) + if (layout_ == Layout::UNORDERED && !has_coords_) return LOG_STATUS(Status::WriterError( "Unordered writes expect the coordinates of the cells to be written")); - // All attributes must be provided - if (attr_buffers_.size() != array_schema_->attribute_num()) + // All attributes/dimensions must be provided + auto expected_num = array_schema_->attribute_num(); + expected_num += (coord_buffer_is_set_) ? array_schema_->dim_num() : 0; + if (buffers_.size() != expected_num) return LOG_STATUS( - Status::WriterError("Writes expect all attributes to be set")); - - // If coordinates were given, they must be given for all dimensions - if (!coord_buffers_.empty() && - coord_buffers_.size() != array_schema_->dim_num()) - return LOG_STATUS(Status::WriterError( - "Writes expect coordinate buffers to be set for all dimensions")); + Status::WriterError("Writes expect all attributes (and coordinates in " + "the sparse/unordered case) to be set")); return Status::Ok(); } @@ -483,7 +505,7 @@ Status Writer::check_buffer_sizes() const { auto cell_num = array_schema_->domain()->cell_num(subarray_); uint64_t expected_cell_num = 0; - for (const auto& it : attr_buffers_) { + for (const auto& it : buffers_) { const auto& attr = it.first; bool is_var = array_schema_->var_size(attr); auto buffer_size = *it.second.buffer_size_; @@ -507,7 +529,7 @@ Status Writer::check_buffer_sizes() const { Status Writer::check_coord_dups(const std::vector& cell_pos) const { STATS_FUNC_IN(writer_check_coord_dups); - if (coord_buffers_.empty()) { + if (!has_coords_) { return LOG_STATUS( Status::WriterError("Cannot check for coordinate duplicates; " "Coordinates buffer not found")); @@ -516,12 +538,14 @@ Status Writer::check_coord_dups(const std::vector& cell_pos) const { if (coords_num_ < 2) return Status::Ok(); - // Prepare auxiliary vector for better performance + // Prepare auxiliary vectors for better performance auto dim_num = array_schema_->dim_num(); std::vector buffs(dim_num); + std::vector coord_sizes(dim_num); for (unsigned d = 0; d < dim_num; ++d) { const auto& dim_name = array_schema_->dimension(d)->name(); - buffs[d] = (unsigned char*)coord_buffers_.find(dim_name)->second.buffer_; + buffs[d] = (unsigned char*)buffers_.find(dim_name)->second.buffer_; + coord_sizes[d] = array_schema_->cell_size(dim_name); } auto statuses = parallel_for(1, coords_num_, [&](uint64_t i) { @@ -529,9 +553,9 @@ Status Writer::check_coord_dups(const std::vector& cell_pos) const { bool found_dup = true; for (unsigned d = 0; d < dim_num; ++d) { if (memcmp( - buffs[d] + cell_pos[i] * coord_sizes_[d], - buffs[d] + cell_pos[i - 1] * coord_sizes_[d], - coord_sizes_[d]) != 0) { // Not the same + buffs[d] + cell_pos[i] * coord_sizes[d], + buffs[d] + cell_pos[i - 1] * coord_sizes[d], + coord_sizes[d]) != 0) { // Not the same found_dup = false; break; } @@ -557,7 +581,7 @@ Status Writer::check_coord_dups(const std::vector& cell_pos) const { Status Writer::check_coord_dups() const { STATS_FUNC_IN(writer_check_coord_dups_global); - if (coord_buffers_.empty()) { + if (!has_coords_) { return LOG_STATUS( Status::WriterError("Cannot check for coordinate duplicates; " "Coordinates buffer not found")); @@ -566,12 +590,14 @@ Status Writer::check_coord_dups() const { if (coords_num_ < 2) return Status::Ok(); - // Prepare auxiliary vector for better performance + // Prepare auxiliary vectors for better performance auto dim_num = array_schema_->dim_num(); std::vector buffs(dim_num); + std::vector coord_sizes(dim_num); for (unsigned d = 0; d < dim_num; ++d) { const auto& dim_name = array_schema_->dimension(d)->name(); - buffs[d] = (unsigned char*)coord_buffers_.find(dim_name)->second.buffer_; + buffs[d] = (unsigned char*)buffers_.find(dim_name)->second.buffer_; + coord_sizes[d] = array_schema_->cell_size(dim_name); } auto statuses = parallel_for(1, coords_num_, [&](uint64_t i) { @@ -579,9 +605,9 @@ Status Writer::check_coord_dups() const { bool found_dup = true; for (unsigned d = 0; d < dim_num; ++d) { if (memcmp( - buffs[d] + i * coord_sizes_[d], - buffs[d] + (i - 1) * coord_sizes_[d], - coord_sizes_[d]) != 0) { // Not the same + buffs[d] + i * coord_sizes[d], + buffs[d] + (i - 1) * coord_sizes[d], + coord_sizes[d]) != 0) { // Not the same found_dup = false; break; } @@ -609,19 +635,21 @@ Status Writer::check_coord_dups() const { Status Writer::check_coord_oob() const { // Applicable only to sparse writes - exit if coordinates do not exist - if (coord_buffers_.empty()) + if (!has_coords_) return Status::Ok(); // Exit if there are no coordinates to write if (coords_num_ == 0) return Status::Ok(); - // Prepare auxiliary vector for better performance + // Prepare auxiliary vectors for better performance auto dim_num = array_schema_->dim_num(); std::vector buffs(dim_num); + std::vector coord_sizes(dim_num); for (unsigned d = 0; d < dim_num; ++d) { const auto& dim_name = array_schema_->dimension(d)->name(); - buffs[d] = (unsigned char*)coord_buffers_.find(dim_name)->second.buffer_; + buffs[d] = (unsigned char*)buffers_.find(dim_name)->second.buffer_; + coord_sizes[d] = array_schema_->cell_size(dim_name); } // Check if all coordinates fall in the domain in parallel @@ -629,7 +657,7 @@ Status Writer::check_coord_oob() const { parallel_for_2d(0, coords_num_, 0, dim_num, [&](uint64_t c, unsigned d) { auto dim = array_schema_->dimension(d); std::string err_msg; - if (dim->oob(buffs[d] + c * coord_sizes_[d], &err_msg)) + if (dim->oob(buffs[d] + c * coord_sizes[d], &err_msg)) return Status::WriterError(err_msg); return Status::Ok(); }); @@ -644,7 +672,7 @@ Status Writer::check_coord_oob() const { Status Writer::check_global_order() const { // Applicable only to sparse writes - exit if coordinates do not exist - if (coord_buffers_.empty() || coords_num_ < 2) + if (!has_coords_ || coords_num_ < 2) return Status::Ok(); // Prepare auxiliary vector for better performance @@ -652,7 +680,7 @@ Status Writer::check_global_order() const { std::vector buffs(dim_num); for (unsigned d = 0; d < dim_num; ++d) { const auto& dim_name = array_schema_->dimension(d)->name(); - buffs[d] = coord_buffers_.find(dim_name)->second.buffer_; + buffs[d] = buffers_.find(dim_name)->second.buffer_; } // Check if all coordinates fall in the domain in parallel @@ -756,17 +784,18 @@ Status Writer::check_subarray() const { // Note that in the dense case, the domain type is integer if (array_schema_->dense() && layout() == Layout::GLOBAL_ORDER) { for (unsigned int i = 0; i < dim_num; ++i) { - auto dim_domain = static_cast(domain->dimension(i)->domain()); - auto tile_extent = - static_cast(domain->dimension(i)->tile_extent()); + const auto dim = domain->dimension(i); + auto dim_domain = static_cast(dim->domain()); + auto tile_extent = static_cast(dim->tile_extent()); assert(tile_extent != nullptr); auto norm_1 = uint64_t(subarray[2 * i] - dim_domain[0]); auto norm_2 = (uint64_t(subarray[2 * i + 1]) - dim_domain[0]) + 1; if ((norm_1 / (*tile_extent) * (*tile_extent) != norm_1) || (norm_2 / (*tile_extent) * (*tile_extent) != norm_2)) { - return LOG_STATUS(Status::WriterError( - "Invalid subarray; In global writes for dense arrays, the subarray " - "must coincide with the tile bounds")); + return LOG_STATUS( + Status::WriterError("Invalid subarray; In global writes for " + "dense arrays, the subarray " + "must coincide with the tile bounds")); } } } @@ -776,33 +805,22 @@ Status Writer::check_subarray() const { void Writer::clear_coord_buffers() { // Applicable only if the coordinate buffers have been allocated by - // TileDB - if (coord_buffers_alloced_) { - for (auto& buff : coord_buffers_) { - std::free(buff.second.buffer_); - std::free(buff.second.buffer_var_); - } - coord_buffer_sizes_.clear(); - coord_buffers_.clear(); - coord_buffers_alloced_ = false; - } + // TileDB, which happens only when the zipped coordinates buffer is set + for (auto b : to_clean_) + std::free(b); + to_clean_.clear(); + coord_buffer_sizes_.clear(); } Status Writer::close_files(FragmentMetadata* meta) const { - // Close attribute files - for (const auto& it : attr_buffers_) { - const auto& attr = it.first; - RETURN_NOT_OK(storage_manager_->close_file(meta->attr_uri(attr))); - if (array_schema_->var_size(attr)) - RETURN_NOT_OK(storage_manager_->close_file(meta->attr_var_uri(attr))); + // Close attribute and dimension files + for (const auto& it : buffers_) { + const auto& name = it.first; + RETURN_NOT_OK(storage_manager_->close_file(meta->uri(name))); + if (array_schema_->var_size(name)) + RETURN_NOT_OK(storage_manager_->close_file(meta->var_uri(name))); } - // Close coordinate files - // TODO: close separate coordinate files - if (!coord_buffers_.empty()) - RETURN_NOT_OK( - storage_manager_->close_file(meta->attr_uri(constants::coords))); - return Status::Ok(); } @@ -810,8 +828,7 @@ Status Writer::compute_coord_dups( const std::vector& cell_pos, std::set* coord_dups) const { STATS_FUNC_IN(writer_compute_coord_dups); - - if (coord_buffers_.empty()) { + if (!has_coords_) { return LOG_STATUS( Status::WriterError("Cannot check for coordinate duplicates; " "Coordinates buffer not found")); @@ -820,12 +837,14 @@ Status Writer::compute_coord_dups( if (coords_num_ < 2) return Status::Ok(); - // Prepare auxiliary vector for better performance + // Prepare auxiliary vectors for better performance auto dim_num = array_schema_->dim_num(); std::vector buffs(dim_num); + std::vector coord_sizes(dim_num); for (unsigned d = 0; d < dim_num; ++d) { const auto& dim_name = array_schema_->dimension(d)->name(); - buffs[d] = (unsigned char*)coord_buffers_.find(dim_name)->second.buffer_; + buffs[d] = (unsigned char*)buffers_.find(dim_name)->second.buffer_; + coord_sizes[d] = array_schema_->cell_size(dim_name); } std::mutex mtx; @@ -834,9 +853,9 @@ Status Writer::compute_coord_dups( bool found_dup = true; for (unsigned d = 0; d < dim_num; ++d) { if (memcmp( - buffs[d] + cell_pos[i] * coord_sizes_[d], - buffs[d] + cell_pos[i - 1] * coord_sizes_[d], - coord_sizes_[d]) != 0) { // Not the same + buffs[d] + cell_pos[i] * coord_sizes[d], + buffs[d] + cell_pos[i - 1] * coord_sizes[d], + coord_sizes[d]) != 0) { // Not the same found_dup = false; break; } @@ -852,10 +871,8 @@ Status Writer::compute_coord_dups( }); // Check all statuses - for (auto& st : statuses) { - if (!st.ok()) - return st; - } + for (auto& st : statuses) + RETURN_NOT_OK(st); return Status::Ok(); @@ -865,7 +882,7 @@ Status Writer::compute_coord_dups( Status Writer::compute_coord_dups(std::set* coord_dups) const { STATS_FUNC_IN(writer_compute_coord_dups_global); - if (coord_buffers_.empty()) { + if (!has_coords_) { return LOG_STATUS( Status::WriterError("Cannot check for coordinate duplicates; " "Coordinates buffer not found")); @@ -874,12 +891,14 @@ Status Writer::compute_coord_dups(std::set* coord_dups) const { if (coords_num_ < 2) return Status::Ok(); - // Prepare auxiliary vector for better performance + // Prepare auxiliary vectors for better performance auto dim_num = array_schema_->dim_num(); std::vector buffs(dim_num); + std::vector coord_sizes(dim_num); for (unsigned d = 0; d < dim_num; ++d) { const auto& dim_name = array_schema_->dimension(d)->name(); - buffs[d] = (unsigned char*)coord_buffers_.find(dim_name)->second.buffer_; + buffs[d] = (unsigned char*)buffers_.find(dim_name)->second.buffer_; + coord_sizes[d] = array_schema_->cell_size(dim_name); } std::mutex mtx; @@ -888,9 +907,9 @@ Status Writer::compute_coord_dups(std::set* coord_dups) const { bool found_dup = true; for (unsigned d = 0; d < dim_num; ++d) { if (memcmp( - buffs[d] + i * coord_sizes_[d], - buffs[d] + (i - 1) * coord_sizes_[d], - coord_sizes_[d]) != 0) { // Not the same + buffs[d] + i * coord_sizes[d], + buffs[d] + (i - 1) * coord_sizes[d], + coord_sizes[d]) != 0) { // Not the same found_dup = false; break; } @@ -919,6 +938,8 @@ Status Writer::compute_coord_dups(std::set* coord_dups) const { Status Writer::compute_coords_metadata( const std::unordered_map>& tiles, FragmentMetadata* meta) const { + STATS_FUNC_IN(writer_compute_coords_metadata); + auto coords_type = array_schema_->coords_type(); switch (coords_type) { case Datatype::INT8: @@ -963,54 +984,67 @@ Status Writer::compute_coords_metadata( } return Status::Ok(); + + STATS_FUNC_OUT(writer_compute_coords_metadata); } template Status Writer::compute_coords_metadata( const std::unordered_map>& tiles, FragmentMetadata* meta) const { - STATS_FUNC_IN(writer_compute_coords_metadata); + // Applicable only if there are coordinates + if (!has_coords_) + return Status::Ok(); // Check if tiles are empty if (tiles.empty() || tiles.begin()->second.empty()) return Status::Ok(); - // For easy reference - auto tile_num = tiles.begin()->second.size(); + // Compute number of tiles. Assumes all attributes and + // and dimensions have the same number of tiles + auto it = tiles.begin(); + auto tile_num = array_schema_->var_size(it->first) ? it->second.size() / 2 : + it->second.size(); auto dim_num = array_schema_->dim_num(); // Compute MBRs auto statuses = parallel_for(0, tile_num, [&](uint64_t t) { std::vector mbr(2 * dim_num); std::vector data(dim_num); + uint64_t cell_num = UINT64_MAX; for (unsigned d = 0; d < dim_num; ++d) { const auto& dim_name = array_schema_->dimension(d)->name(); auto tiles_it = tiles.find(dim_name); + assert(tiles_it != tiles.end()); data[d] = (T*)(tiles_it->second[t].internal_data()); - } + assert( + cell_num == UINT64_MAX || cell_num == tiles_it->second[t].cell_num()); + cell_num = tiles_it->second[t].cell_num(); - // Initialize MBR with the first coords - auto cell_num = tiles.begin()->second[t].cell_num(); - assert(cell_num > 0); - for (unsigned d = 0; d < dim_num; ++d) { + // Initialize MBR with the first coords mbr[2 * d] = data[d][0]; mbr[2 * d + 1] = data[d][0]; } // Expand the MBR with the rest coords + assert(cell_num > 0); for (uint64_t c = 1; c < cell_num; ++c) - utils::geometry::expand_mbr(data, c, &mbr[0]); + utils::geometry::expand_mbr(data, c, &mbr[0]); meta->set_mbr(t, &mbr[0]); return Status::Ok(); }); + // Check all statuses + for (auto& st : statuses) + RETURN_NOT_OK(st); + // Set last tile cell number - meta->set_last_tile_cell_num(tiles.begin()->second.back().cell_num()); + const auto& dim_name = array_schema_->dimension(0)->name(); + uint64_t last_tile_cell_num = tiles.find(dim_name)->second.back().cell_num(); + meta->set_last_tile_cell_num(last_tile_cell_num); return Status::Ok(); - - STATS_FUNC_OUT(writer_compute_coords_metadata); } template @@ -1095,39 +1129,37 @@ Status Writer::create_fragment( STATS_FUNC_OUT(writer_create_fragment); } -Status Writer::filter_attr_tiles( - std::unordered_map>* attr_tiles) const { - auto attr_num = attr_buffers_.size(); - auto statuses = parallel_for(0, attr_num, [&](uint64_t i) { - auto buff_it = attr_buffers_.begin(); +Status Writer::filter_tiles( + std::unordered_map>* tiles) const { + // Coordinates + auto num = buffers_.size(); + auto statuses = parallel_for(0, num, [&](uint64_t i) { + auto buff_it = buffers_.begin(); std::advance(buff_it, i); - const auto& attr = buff_it->first; - auto& tiles = (*attr_tiles)[attr]; - RETURN_CANCEL_OR_ERROR(filter_tiles(attr, &tiles)); + const auto& name = buff_it->first; + RETURN_CANCEL_OR_ERROR(filter_tiles(name, &((*tiles)[name]))); return Status::Ok(); }); // Check all statuses - for (auto& st : statuses) { - if (!st.ok()) - return st; - } + for (auto& st : statuses) + RETURN_NOT_OK(st); return Status::Ok(); } Status Writer::filter_tiles( - const std::string& attribute, std::vector* tiles) const { + const std::string& name, std::vector* tiles) const { STATS_FUNC_IN(writer_filter_tiles); - bool var_size = array_schema_->var_size(attribute); + bool var_size = array_schema_->var_size(name); // Filter all tiles auto tile_num = tiles->size(); for (size_t i = 0; i < tile_num; ++i) { - RETURN_NOT_OK(filter_tile(attribute, &(*tiles)[i], var_size)); + RETURN_NOT_OK(filter_tile(name, &(*tiles)[i], var_size)); if (var_size) { ++i; - RETURN_NOT_OK(filter_tile(attribute, &(*tiles)[i], false)); + RETURN_NOT_OK(filter_tile(name, &(*tiles)[i], false)); } } @@ -1137,18 +1169,13 @@ Status Writer::filter_tiles( } Status Writer::filter_tile( - const std::string& attribute, Tile* tile, bool offsets) const { + const std::string& name, Tile* tile, bool offsets) const { auto orig_size = tile->buffer()->size(); // Get a copy of the appropriate filter pipeline. - FilterPipeline filters; - if (tile->stores_coords()) { - filters = *array_schema_->coords_filters(); - } else if (offsets) { - filters = *array_schema_->cell_var_offsets_filters(); - } else { - filters = *array_schema_->filters(attribute); - } + FilterPipeline filters = + (offsets ? *array_schema_->cell_var_offsets_filters() : + *array_schema_->filters(name)); // Append an encryption filter when necessary. RETURN_NOT_OK(FilterPipeline::append_encryption_filter( @@ -1181,27 +1208,10 @@ Status Writer::finalize_global_write_state() { // Check that the same number of cells was written across attributes // and dimensions - uint64_t cell_num = 0; - if (!coord_buffers_.empty()) { - const auto& dim_name = coord_buffers_.begin()->first; - cell_num = global_write_state_->coord_cells_written_[dim_name]; - } else if (!attr_buffers_.empty()) { - const auto& attr = attr_buffers_.begin()->first; - cell_num = global_write_state_->attr_cells_written_[attr]; - } - - for (const auto& it : attr_buffers_) { - const auto& attr = it.first; - if (global_write_state_->attr_cells_written_[attr] != cell_num) { - clean_up(uri); - return LOG_STATUS(Status::WriterError( - "Failed to finalize global write state; Different " - "number of cells written across attributes and coordinates")); - } - } - for (const auto& it : coord_buffers_) { - const auto& dim_name = it.first; - if (global_write_state_->coord_cells_written_[dim_name] != cell_num) { + auto cell_num = global_write_state_->cells_written_[buffers_.begin()->first]; + for (const auto& it : buffers_) { + const auto& name = it.first; + if (global_write_state_->cells_written_[name] != cell_num) { clean_up(uri); return LOG_STATUS(Status::WriterError( "Failed to finalize global write state; Different " @@ -1210,7 +1220,7 @@ Status Writer::finalize_global_write_state() { } // Check if the total number of cells written is equal to the subarray size - if (coord_buffers_.empty()) { + if (!has_coords_) { auto expected_cell_num = array_schema_->domain()->cell_num(subarray_); if (cell_num != expected_cell_num) { clean_up(uri); @@ -1246,7 +1256,7 @@ Status Writer::global_write() { auto uri = frag_meta->fragment_uri(); // Check for coordinate duplicates - if (!coord_buffers_.empty()) { + if (has_coords_) { if (check_coord_dups_ && !dedup_coords_) RETURN_CANCEL_OR_ERROR(check_coord_dups()); if (check_global_order_) @@ -1258,35 +1268,16 @@ Status Writer::global_write() { if (dedup_coords_) RETURN_CANCEL_OR_ERROR(compute_coord_dups(&coord_dups)); - std::unordered_map> coord_tiles; - std::unordered_map> attr_tiles; - auto statuses = parallel_for(0, 2, [&](uint64_t i) { - if (i == 0) { - // Prepare coordinate tiles - RETURN_CANCEL_OR_ERROR_ELSE( - prepare_full_coord_tiles(coord_dups, &coord_tiles), clean_up(uri)); - } else { - // Prepare attribute tiles - RETURN_CANCEL_OR_ERROR_ELSE( - prepare_full_attr_tiles(coord_dups, &attr_tiles), clean_up(uri)); - } - - return Status::Ok(); - }); - - // Check statuses - for (auto& st : statuses) - RETURN_NOT_OK(st); + std::unordered_map> tiles; + RETURN_CANCEL_OR_ERROR_ELSE( + prepare_full_tiles(coord_dups, &tiles), clean_up(uri)); // Find number of tiles uint64_t tile_num = 0; - if (!attr_tiles.empty()) { - auto it = attr_tiles.begin(); + if (!tiles.empty()) { + auto it = tiles.begin(); tile_num = array_schema_->var_size(it->first) ? it->second.size() / 2 : it->second.size(); - } else { - assert(!coord_tiles.empty()); - tile_num = coord_tiles[0].size(); } // No cells to be written @@ -1297,33 +1288,15 @@ Status Writer::global_write() { auto new_num_tiles = frag_meta->tile_index_base() + tile_num; frag_meta->set_num_tiles(new_num_tiles); - std::vector coords_tiles; - statuses = parallel_for(0, 2, [&](uint64_t i) { - if (i == 0) { - // Filter coordinate tiles - RETURN_CANCEL_OR_ERROR_ELSE( - compute_coords_metadata(coord_tiles, frag_meta), clean_up(uri)); - // TODO: remove and filter coordinate tiles separately - RETURN_CANCEL_OR_ERROR_ELSE( - zip_coord_tiles(coord_tiles, &coords_tiles), clean_up(uri)); - RETURN_CANCEL_OR_ERROR_ELSE( - filter_tiles(constants::coords, &coords_tiles), clean_up(uri)); - } else { - // Filter attribute tiles - RETURN_CANCEL_OR_ERROR_ELSE( - filter_attr_tiles(&attr_tiles), clean_up(uri)); - } - - return Status::Ok(); - }); + // Compute coordinate metadata (if coordinates are present) + RETURN_CANCEL_OR_ERROR_ELSE( + compute_coords_metadata(tiles, frag_meta), clean_up(uri)); - // Check statuses - for (auto& st : statuses) - RETURN_NOT_OK(st); + // Filter all tiles + RETURN_CANCEL_OR_ERROR_ELSE(filter_tiles(&tiles), clean_up(uri)); // Write tiles for all attributes - RETURN_NOT_OK_ELSE( - write_all_tiles(frag_meta, attr_tiles, coords_tiles), clean_up(uri)); + RETURN_NOT_OK_ELSE(write_all_tiles(frag_meta, tiles), clean_up(uri)); // Increment the tile index base for the next global order write. frag_meta->set_tile_index_base(new_num_tiles); @@ -1341,26 +1314,11 @@ Status Writer::global_write_handle_last_tile() { const auto& uri = global_write_state_->frag_meta_->fragment_uri(); // Filter last tiles - std::vector coords_tiles; - std::unordered_map> attr_tiles; - auto statuses = parallel_for(0, 2, [&](uint64_t i) { - if (i == 0) { - // Filter last coordinate tiles - RETURN_NOT_OK_ELSE(filter_last_coord_tiles(&coords_tiles), clean_up(uri)); - } else { - // Filter last attribute tiles - RETURN_NOT_OK_ELSE(filter_last_attr_tiles(&attr_tiles), clean_up(uri)); - } - - return Status::Ok(); - }); - - // Check statuses - for (auto& st : statuses) - RETURN_NOT_OK(st); + std::unordered_map> tiles; + RETURN_NOT_OK_ELSE(filter_last_tiles(&tiles), clean_up(uri)); // Write the last tiles - RETURN_NOT_OK(write_all_tiles(meta, attr_tiles, coords_tiles)); + RETURN_NOT_OK(write_all_tiles(meta, tiles)); // Increment the tile index base. meta->set_tile_index_base(meta->tile_index_base() + 1); @@ -1368,28 +1326,29 @@ Status Writer::global_write_handle_last_tile() { return Status::Ok(); } -Status Writer::filter_last_attr_tiles( - std::unordered_map>* attr_tiles) const { - // Initialize attribute tiles - for (auto it : attr_buffers_) - (*attr_tiles)[it.first] = std::vector(); - - uint64_t attr_num = attr_buffers_.size(); - auto statuses = parallel_for(0, attr_num, [&](uint64_t i) { - auto buff_it = attr_buffers_.begin(); +Status Writer::filter_last_tiles( + std::unordered_map>* tiles) const { + // Initialize attribute and coordinate tiles + for (auto it : buffers_) + (*tiles)[it.first] = std::vector(); + + // Prepare the tiles first + uint64_t num = buffers_.size(); + auto statuses = parallel_for(0, num, [&](uint64_t i) { + auto buff_it = buffers_.begin(); std::advance(buff_it, i); - const auto& attr = buff_it->first; - auto& last_tile = global_write_state_->last_attr_tiles_[attr].first; - auto& last_tile_var = global_write_state_->last_attr_tiles_[attr].second; + const auto& name = &(buff_it->first); + + auto& last_tile = global_write_state_->last_tiles_[*name].first; + auto& last_tile_var = global_write_state_->last_tiles_[*name].second; if (!last_tile.empty()) { - std::vector& tiles = (*attr_tiles)[attr]; + std::vector& tiles_ref = (*tiles)[*name]; // Note making shallow clones here, as it's not necessary to copy the // underlying tile Buffers. - tiles.push_back(last_tile.clone(false)); + tiles_ref.push_back(last_tile.clone(false)); if (!last_tile_var.empty()) - tiles.push_back(last_tile_var.clone(false)); - RETURN_NOT_OK(filter_tiles(attr, &tiles)); + tiles_ref.push_back(last_tile_var.clone(false)); } return Status::Ok(); }); @@ -1398,57 +1357,21 @@ Status Writer::filter_last_attr_tiles( for (auto& st : statuses) RETURN_NOT_OK(st); - return Status::Ok(); -} - -Status Writer::filter_last_coord_tiles(std::vector* coords_tiles) const { - // Prepare coord tiles map - std::unordered_map> coord_tiles; - auto dim_num = array_schema_->dim_num(); + // Compute coordinates metadata auto meta = global_write_state_->frag_meta_.get(); - for (unsigned d = 0; d < dim_num; ++d) { - const auto& dim_name = array_schema_->dimension(d)->name(); - coord_tiles[dim_name] = std::vector(); - } + RETURN_NOT_OK(compute_coords_metadata(*tiles, meta)); - for (unsigned d = 0; d < dim_num; ++d) { - const auto& dim_name = array_schema_->dimension(d)->name(); - auto& last_tile = global_write_state_->last_coord_tiles_[dim_name].first; - auto& last_tile_var = - global_write_state_->last_coord_tiles_[dim_name].second; - - if (!last_tile.empty()) { - auto& tiles = coord_tiles[dim_name]; - // Note making shallow clones here, as it's not necessary to copy the - // underlying tile Buffers. - tiles.push_back(last_tile.clone(false)); - if (!last_tile_var.empty()) - tiles.push_back(last_tile_var.clone(false)); - } - } - - RETURN_NOT_OK(compute_coords_metadata(coord_tiles, meta)); - // TODO: remove - RETURN_NOT_OK(zip_coord_tiles(coord_tiles, coords_tiles)); - RETURN_NOT_OK(filter_tiles(constants::coords, coords_tiles)); + // Filter tiles + RETURN_NOT_OK(filter_tiles(tiles)); return Status::Ok(); } bool Writer::all_last_tiles_empty() const { - // See if any last coordinate tiles are nonempty - auto dim_num = array_schema_->dim_num(); - for (unsigned d = 0; d < dim_num; ++d) { - const auto& dim_name = array_schema_->dimension(d)->name(); - auto& last_tile = global_write_state_->last_coord_tiles_[dim_name].first; - if (!last_tile.empty()) - return false; - } - - // See if any last coordinate tiles are nonempty - for (const auto& it : attr_buffers_) { - const auto& attr = it.first; - auto& last_tile = global_write_state_->last_attr_tiles_[attr].first; + // See if any last attribute/coordinate tiles are nonempty + for (const auto& it : buffers_) { + const auto& name = it.first; + auto& last_tile = global_write_state_->last_tiles_[name].first; if (!last_tile.empty()) return false; } @@ -1461,57 +1384,36 @@ Status Writer::init_global_write_state() { // Create global array state object if (global_write_state_ != nullptr) - return LOG_STATUS(Status::WriterError( - "Cannot initialize global write state; State not properly finalized")); + return LOG_STATUS( + Status::WriterError("Cannot initialize global write state; State not " + "properly finalized")); global_write_state_.reset(new GlobalWriteState); - bool has_coords = !coord_buffers_.empty(); - // Create fragment RETURN_NOT_OK( - create_fragment(!has_coords, &(global_write_state_->frag_meta_))); + create_fragment(!has_coords_, &(global_write_state_->frag_meta_))); auto uri = global_write_state_->frag_meta_->fragment_uri(); - // Initialize global write state for coordinates - if (has_coords) { - auto dim_num = array_schema_->dim_num(); - for (unsigned d = 0; d < dim_num; ++d) { - const auto& dim_name = array_schema_->dimension(d)->name(); - - // Initialize last tiles - auto last_tile_pair = std::pair>( - dim_name, std::pair(Tile(), Tile())); - auto it_ret = - global_write_state_->last_coord_tiles_.emplace(last_tile_pair); - - auto& last_tile = it_ret.first->second.first; - RETURN_NOT_OK_ELSE(init_coord_tile(d, &last_tile), clean_up(uri)); - - // Initialize cells written - global_write_state_->coord_cells_written_[dim_name] = 0; - } - } - - // Initialize global write state for attributes - for (const auto& it : attr_buffers_) { + // Initialize global write state for attribute and coordinates + for (const auto& it : buffers_) { // Initialize last tiles - const auto& attr = it.first; + const auto& name = it.first; auto last_tile_pair = std::pair>( - attr, std::pair(Tile(), Tile())); - auto it_ret = global_write_state_->last_attr_tiles_.emplace(last_tile_pair); + name, std::pair(Tile(), Tile())); + auto it_ret = global_write_state_->last_tiles_.emplace(last_tile_pair); - if (!array_schema_->var_size(attr)) { + if (!array_schema_->var_size(name)) { auto& last_tile = it_ret.first->second.first; - RETURN_NOT_OK_ELSE(init_tile(attr, &last_tile), clean_up(uri)); + RETURN_NOT_OK_ELSE(init_tile(name, &last_tile), clean_up(uri)); } else { auto& last_tile = it_ret.first->second.first; auto& last_tile_var = it_ret.first->second.second; RETURN_NOT_OK_ELSE( - init_tile(attr, &last_tile, &last_tile_var), clean_up(uri)); + init_tile(name, &last_tile, &last_tile_var), clean_up(uri)); } // Initialize cells written - global_write_state_->attr_cells_written_[attr] = 0; + global_write_state_->cells_written_[name] = 0; } return Status::Ok(); @@ -1519,14 +1421,13 @@ Status Writer::init_global_write_state() { STATS_FUNC_OUT(writer_init_global_write_state); } -Status Writer::init_tile(const std::string& attribute, Tile* tile) const { +Status Writer::init_tile(const std::string& name, Tile* tile) const { // For easy reference - auto has_coords = !coord_buffers_.empty(); + auto cell_size = array_schema_->cell_size(name); + auto type = array_schema_->type(name); auto domain = array_schema_->domain(); - auto cell_size = array_schema_->cell_size(attribute); auto capacity = array_schema_->capacity(); - auto type = array_schema_->type(attribute); - auto cell_num_per_tile = has_coords ? capacity : domain->cell_num_per_tile(); + auto cell_num_per_tile = has_coords_ ? capacity : domain->cell_num_per_tile(); auto tile_size = cell_num_per_tile * cell_size; // Initialize @@ -1537,13 +1438,12 @@ Status Writer::init_tile(const std::string& attribute, Tile* tile) const { } Status Writer::init_tile( - const std::string& attribute, Tile* tile, Tile* tile_var) const { + const std::string& name, Tile* tile, Tile* tile_var) const { // For easy reference - auto has_coords = !coord_buffers_.empty(); + auto type = array_schema_->type(name); auto domain = array_schema_->domain(); auto capacity = array_schema_->capacity(); - auto type = array_schema_->type(attribute); - auto cell_num_per_tile = has_coords ? capacity : domain->cell_num_per_tile(); + auto cell_num_per_tile = has_coords_ ? capacity : domain->cell_num_per_tile(); auto tile_size = cell_num_per_tile * constants::cell_var_offset_size; // Initialize @@ -1558,21 +1458,6 @@ Status Writer::init_tile( return Status::Ok(); } -Status Writer::init_coord_tile(unsigned dim_idx, Tile* tile) const { - // For easy reference - auto dim = array_schema_->dimension(dim_idx); - auto coord_size = dim->coord_size(); - auto capacity = array_schema_->capacity(); - auto type = dim->type(); - auto tile_size = capacity * coord_size; - - // Initialize - RETURN_NOT_OK( - tile->init(constants::format_version, type, tile_size, coord_size, 0)); - - return Status::Ok(); -} - template Status Writer::init_tile_dense_cell_range_iters( std::vector>* iters) const { @@ -1627,18 +1512,18 @@ Status Writer::init_tile_dense_cell_range_iters( } Status Writer::init_tiles( - const std::string& attribute, + const std::string& name, uint64_t tile_num, std::vector* tiles) const { // Initialize tiles - bool var_size = array_schema_->var_size(attribute); + bool var_size = array_schema_->var_size(name); auto tiles_len = (var_size) ? 2 * tile_num : tile_num; tiles->resize(tiles_len); for (size_t i = 0; i < tiles_len; i += (1 + var_size)) { if (!var_size) { - RETURN_NOT_OK(init_tile(attribute, &((*tiles)[i]))); + RETURN_NOT_OK(init_tile(name, &((*tiles)[i]))); } else { - RETURN_NOT_OK(init_tile(attribute, &((*tiles)[i]), &((*tiles)[i + 1]))); + RETURN_NOT_OK(init_tile(name, &((*tiles)[i]), &((*tiles)[i + 1]))); } } @@ -1654,7 +1539,8 @@ Status Writer::new_fragment_name( frag_uri->clear(); RETURN_NOT_OK(uuid::generate_uuid(&uuid, false)); std::stringstream ss; - ss << "/__" << *timestamp << "_" << *timestamp << "_" << uuid; + ss << "/__" << *timestamp << "_" << *timestamp << "_" << uuid << "_" + << constants::format_version; *frag_uri = ss.str(); return Status::Ok(); @@ -1758,9 +1644,8 @@ Status Writer::ordered_write() { clean_up(uri)); // Write tiles for all attributes - std::vector coords_tiles; // Will be ignored RETURN_NOT_OK_ELSE( - write_all_tiles(frag_meta.get(), attr_tiles, coords_tiles), + write_all_tiles(frag_meta.get(), attr_tiles), storage_manager_->vfs()->remove_dir(uri)); // Write the fragment metadata @@ -1777,12 +1662,12 @@ Status Writer::prepare_and_filter_attr_tiles( const std::vector& write_cell_ranges, std::unordered_map>* attr_tiles) const { // Initialize attribute tiles - for (const auto& it : attr_buffers_) + for (const auto& it : buffers_) (*attr_tiles)[it.first] = std::vector(); - uint64_t attr_num = attr_buffers_.size(); + uint64_t attr_num = buffers_.size(); auto statuses = parallel_for(0, attr_num, [&](uint64_t i) { - auto buff_it = attr_buffers_.begin(); + auto buff_it = buffers_.begin(); std::advance(buff_it, i); const auto& attr = buff_it->first; auto& tiles = (*attr_tiles)[attr]; @@ -1798,20 +1683,21 @@ Status Writer::prepare_and_filter_attr_tiles( return Status::Ok(); } -Status Writer::prepare_full_attr_tiles( +Status Writer::prepare_full_tiles( const std::set& coord_dups, - std::unordered_map>* attr_tiles) const { - // Initialize attribute tiles - for (const auto& it : attr_buffers_) - (*attr_tiles)[it.first] = std::vector(); + std::unordered_map>* tiles) const { + // Initialize attribute and coordinate tiles + for (const auto& it : buffers_) + (*tiles)[it.first] = std::vector(); - auto attr_num = attr_buffers_.size(); - auto statuses = parallel_for(0, attr_num, [&](uint64_t i) { - auto buff_it = attr_buffers_.begin(); + auto num = buffers_.size(); + auto statuses = parallel_for(0, num, [&](uint64_t i) { + auto buff_it = buffers_.begin(); std::advance(buff_it, i); - const auto& attr = buff_it->first; - auto& full_tiles = (*attr_tiles)[attr]; - RETURN_CANCEL_OR_ERROR(prepare_full_tiles(attr, coord_dups, &full_tiles)); + const auto& name = buff_it->first; + + RETURN_CANCEL_OR_ERROR( + prepare_full_tiles(name, coord_dups, &(*tiles)[name])); return Status::Ok(); }); @@ -1823,37 +1709,35 @@ Status Writer::prepare_full_attr_tiles( } Status Writer::prepare_full_tiles( - const std::string& attribute, + const std::string& name, const std::set& coord_dups, std::vector* tiles) const { - return array_schema_->var_size(attribute) ? - prepare_full_tiles_var(attribute, coord_dups, tiles) : - prepare_full_tiles_fixed(attribute, coord_dups, tiles); + return array_schema_->var_size(name) ? + prepare_full_tiles_var(name, coord_dups, tiles) : + prepare_full_tiles_fixed(name, coord_dups, tiles); } Status Writer::prepare_full_tiles_fixed( - const std::string& attribute, + const std::string& name, const std::set& coord_dups, std::vector* tiles) const { STATS_FUNC_IN(writer_prepare_full_tiles_fixed); - // For easy reference - auto has_coords = !coord_buffers_.empty(); - auto it = attr_buffers_.find(attribute); + auto it = buffers_.find(name); auto buffer = (unsigned char*)it->second.buffer_; auto buffer_size = it->second.buffer_size_; + auto cell_size = array_schema_->cell_size(name); auto capacity = array_schema_->capacity(); - auto cell_size = array_schema_->cell_size(attribute); auto cell_num = *buffer_size / cell_size; auto domain = array_schema_->domain(); - auto cell_num_per_tile = has_coords ? capacity : domain->cell_num_per_tile(); + auto cell_num_per_tile = has_coords_ ? capacity : domain->cell_num_per_tile(); // Do nothing if there are no cells to write if (cell_num == 0) return Status::Ok(); // First fill the last tile - auto& last_tile = global_write_state_->last_attr_tiles_[attribute].first; + auto& last_tile = global_write_state_->last_tiles_[name].first; uint64_t cell_idx = 0; if (!last_tile.empty()) { if (coord_dups.empty()) { @@ -1881,7 +1765,7 @@ Status Writer::prepare_full_tiles_fixed( if (full_tile_num > 0) { tiles->resize(full_tile_num); for (auto& tile : (*tiles)) - RETURN_NOT_OK(init_tile(attribute, &tile)); + RETURN_NOT_OK(init_tile(name, &tile)); // Handle last tile (it must be either full or empty) if (last_tile.full()) { @@ -1930,7 +1814,7 @@ Status Writer::prepare_full_tiles_fixed( } } - global_write_state_->attr_cells_written_[attribute] += cell_num; + global_write_state_->cells_written_[name] += cell_num; return Status::Ok(); @@ -1938,14 +1822,13 @@ Status Writer::prepare_full_tiles_fixed( } Status Writer::prepare_full_tiles_var( - const std::string& attribute, + const std::string& name, const std::set& coord_dups, std::vector* tiles) const { STATS_FUNC_IN(writer_prepare_full_tiles_var); // For easy reference - auto has_coords = !coord_buffers_.empty(); - auto it = attr_buffers_.find(attribute); + auto it = buffers_.find(name); auto buffer = (uint64_t*)it->second.buffer_; auto buffer_var = (unsigned char*)it->second.buffer_var_; auto buffer_size = it->second.buffer_size_; @@ -1953,7 +1836,7 @@ Status Writer::prepare_full_tiles_var( auto capacity = array_schema_->capacity(); auto cell_num = *buffer_size / constants::cell_var_offset_size; auto domain = array_schema_->domain(); - auto cell_num_per_tile = has_coords ? capacity : domain->cell_num_per_tile(); + auto cell_num_per_tile = has_coords_ ? capacity : domain->cell_num_per_tile(); uint64_t offset, var_size; // Do nothing if there are no cells to write @@ -1961,7 +1844,7 @@ Status Writer::prepare_full_tiles_var( return Status::Ok(); // First fill the last tile - auto& last_tile_pair = global_write_state_->last_attr_tiles_[attribute]; + auto& last_tile_pair = global_write_state_->last_tiles_[name]; auto& last_tile = last_tile_pair.first; auto& last_tile_var = last_tile_pair.second; uint64_t cell_idx = 0; @@ -2011,7 +1894,7 @@ Status Writer::prepare_full_tiles_var( tiles->resize(2 * full_tile_num); auto tiles_len = tiles->size(); for (uint64_t i = 0; i < tiles_len; i += 2) - RETURN_NOT_OK(init_tile(attribute, &((*tiles)[i]), &((*tiles)[i + 1]))); + RETURN_NOT_OK(init_tile(name, &((*tiles)[i]), &((*tiles)[i + 1]))); // Handle last tile (it must be either full or empty) if (last_tile.full()) { @@ -2096,143 +1979,13 @@ Status Writer::prepare_full_tiles_var( } } - global_write_state_->attr_cells_written_[attribute] += cell_num; + global_write_state_->cells_written_[name] += cell_num; return Status::Ok(); STATS_FUNC_OUT(writer_prepare_full_tiles_var); } -Status Writer::prepare_full_coord_tiles( - const std::set& coord_dups, - std::unordered_map>* tiles) const { - // If there are no coordinates, exit - if (coord_buffers_.empty()) - return Status::Ok(); - - // Prepare tiles map - auto dim_num = array_schema_->dim_num(); - for (unsigned d = 0; d < dim_num; ++d) { - const auto& dim_name = array_schema_->dimension(d)->name(); - (*tiles)[dim_name] = std::vector(); - } - - // Prepare full coordinate tiles - auto statuses = parallel_for(0, dim_num, [&](uint64_t d) { - const auto& dim_name = array_schema_->dimension(d)->name(); - auto& coord_tiles = (*tiles)[dim_name]; - return prepare_full_coord_tiles_fixed(d, coord_dups, &coord_tiles); - }); - - // Check all statuses - for (auto& st : statuses) { - if (!st.ok()) - return st; - } - - return Status::Ok(); -} - -Status Writer::prepare_full_coord_tiles_fixed( - unsigned dim_idx, - const std::set& coord_dups, - std::vector* tiles) const { - // For easy reference - auto dim = array_schema_->dimension(dim_idx); - const auto& dim_name = dim->name(); - auto it = coord_buffers_.find(dim_name); - assert(it != coord_buffers_.end()); - auto buffer = (unsigned char*)(it->second.buffer_); - auto capacity = array_schema_->capacity(); - auto coord_size = dim->coord_size(); - - // Do nothing if there are no cells to write - if (coords_num_ == 0) - return Status::Ok(); - - // First fill the last tile - auto& last_tile = global_write_state_->last_coord_tiles_[dim_name].first; - uint64_t cell_idx = 0; - if (!last_tile.empty()) { - if (coord_dups.empty()) { - do { - RETURN_NOT_OK( - last_tile.write(buffer + cell_idx * coord_size, coord_size)); - ++cell_idx; - } while (!last_tile.full() && cell_idx != coords_num_); - } else { - do { - if (coord_dups.find(cell_idx) == coord_dups.end()) - RETURN_NOT_OK( - last_tile.write(buffer + cell_idx * coord_size, coord_size)); - ++cell_idx; - } while (!last_tile.full() && cell_idx != coords_num_); - } - } - - // Initialize full tiles and set previous last tile as first tile - auto full_tile_num = - (coords_num_ - cell_idx) / capacity + (int)last_tile.full(); - auto cell_num_to_write = (full_tile_num - last_tile.full()) * capacity; - - if (full_tile_num > 0) { - tiles->resize(full_tile_num); - for (auto& tile : (*tiles)) - RETURN_NOT_OK(init_coord_tile(dim_idx, &tile)); - - // Handle last tile (it must be either full or empty) - if (last_tile.full()) { - (*tiles)[0] = last_tile; - last_tile.reset(); - } else { - assert(last_tile.empty()); - } - - // Write all remaining cells one by one - if (coord_dups.empty()) { - for (uint64_t tile_idx = 0, i = 0; i < cell_num_to_write;) { - if ((*tiles)[tile_idx].full()) - ++tile_idx; - - RETURN_NOT_OK((*tiles)[tile_idx].write( - buffer + cell_idx * coord_size, coord_size * capacity)); - cell_idx += capacity; - i += capacity; - } - } else { - for (uint64_t tile_idx = 0, i = 0; i < cell_num_to_write; - ++cell_idx, ++i) { - if (coord_dups.find(cell_idx) == coord_dups.end()) { - if ((*tiles)[tile_idx].full()) - ++tile_idx; - - RETURN_NOT_OK((*tiles)[tile_idx].write( - buffer + cell_idx * coord_size, coord_size)); - } - } - } - } - - // Potentially fill the last tile - assert(coords_num_ - cell_idx < capacity - last_tile.cell_num()); - if (coord_dups.empty()) { - for (; cell_idx < coords_num_; ++cell_idx) { - RETURN_NOT_OK( - last_tile.write(buffer + cell_idx * coord_size, coord_size)); - } - } else { - for (; cell_idx < coords_num_; ++cell_idx) { - if (coord_dups.find(cell_idx) == coord_dups.end()) - RETURN_NOT_OK( - last_tile.write(buffer + cell_idx * coord_size, coord_size)); - } - } - - global_write_state_->coord_cells_written_[dim_name] += coords_num_; - - return Status::Ok(); -} - Status Writer::prepare_tiles( const std::string& attribute, const std::vector& write_cell_ranges, @@ -2246,7 +1999,7 @@ Status Writer::prepare_tiles( // For easy reference auto var_size = array_schema_->var_size(attribute); - auto it = attr_buffers_.find(attribute); + auto it = buffers_.find(attribute); auto buffer = (uint64_t*)it->second.buffer_; auto buffer_var = (uint64_t*)it->second.buffer_var_; auto buffer_size = it->second.buffer_size_; @@ -2308,84 +2061,22 @@ Status Writer::prepare_tiles( } Status Writer::prepare_tiles( - const std::string& attribute, - const std::vector& cell_pos, - const std::set& coord_dups, - std::vector* tiles) const { - return array_schema_->var_size(attribute) ? - prepare_tiles_var(attribute, cell_pos, coord_dups, tiles) : - prepare_tiles_fixed(attribute, cell_pos, coord_dups, tiles); -} - -Status Writer::prepare_tiles_fixed( - const std::string& attribute, - const std::vector& cell_pos, - const std::set& coord_dups, - std::vector* tiles) const { - STATS_FUNC_IN(writer_prepare_tiles_fixed); - - // Trivial case - if (cell_pos.empty()) - return Status::Ok(); - - // For easy reference - auto it = attr_buffers_.find(attribute); - auto buffer = (unsigned char*)it->second.buffer_; - auto cell_num = (uint64_t)cell_pos.size(); - auto capacity = array_schema_->capacity(); - auto dups_num = coord_dups.size(); - auto tile_num = utils::math::ceil(cell_num - dups_num, capacity); - auto cell_size = array_schema_->cell_size(attribute); - - // Initialize tiles - tiles->resize(tile_num); - for (auto& tile : (*tiles)) - RETURN_NOT_OK(init_tile(attribute, &tile)); - - // Write all cells one by one - if (dups_num == 0) { - for (uint64_t i = 0, tile_idx = 0; i < cell_num; ++i) { - if ((*tiles)[tile_idx].full()) - ++tile_idx; - - RETURN_NOT_OK((*tiles)[tile_idx].write( - buffer + cell_pos[i] * cell_size, cell_size)); - } - } else { - for (uint64_t i = 0, tile_idx = 0; i < cell_num; ++i) { - if (coord_dups.find(cell_pos[i]) != coord_dups.end()) - continue; - - if ((*tiles)[tile_idx].full()) - ++tile_idx; - - RETURN_NOT_OK((*tiles)[tile_idx].write( - buffer + cell_pos[i] * cell_size, cell_size)); - } - } - - return Status::Ok(); - - STATS_FUNC_OUT(writer_prepare_tiles_fixed); -} - -Status Writer::prepare_attr_tiles( const std::vector& cell_pos, const std::set& coord_dups, - std::unordered_map>* attr_tiles) const { + std::unordered_map>* tiles) const { // Initialize attribute tiles - attr_tiles->clear(); - for (const auto& it : attr_buffers_) - (*attr_tiles)[it.first] = std::vector(); - - // Prepare tiles for all attributes - auto attr_num = attr_buffers_.size(); - auto statuses = parallel_for(0, attr_num, [&](uint64_t i) { - auto buff_it = attr_buffers_.begin(); + tiles->clear(); + for (const auto& it : buffers_) + (*tiles)[it.first] = std::vector(); + + // Prepare tiles for all attributes and coordinates + auto buffer_num = buffers_.size(); + auto statuses = parallel_for(0, buffer_num, [&](uint64_t i) { + auto buff_it = buffers_.begin(); std::advance(buff_it, i); - const auto& attr = buff_it->first; - auto& tiles = (*attr_tiles)[attr]; - RETURN_CANCEL_OR_ERROR(prepare_tiles(attr, cell_pos, coord_dups, &tiles)); + const auto& name = buff_it->first; + RETURN_CANCEL_OR_ERROR( + prepare_tiles(name, cell_pos, coord_dups, &((*tiles)[name]))); return Status::Ok(); }); @@ -2396,39 +2087,18 @@ Status Writer::prepare_attr_tiles( return Status::Ok(); } -Status Writer::prepare_coord_tiles( +Status Writer::prepare_tiles( + const std::string& name, const std::vector& cell_pos, const std::set& coord_dups, - std::unordered_map>* tiles) const { - // If coord buffers are empty, there is nothing to do - if (coord_buffers_.empty()) - return Status::Ok(); - - // Prepare coordinate tiles map - auto dim_num = array_schema_->dim_num(); - for (unsigned d = 0; d < dim_num; ++d) { - const auto& dim_name = array_schema_->dimension(d)->name(); - (*tiles)[dim_name] = std::vector(); - } - - // Prepare coordinate tiles in parallel - auto statuses = parallel_for(0, dim_num, [&](uint64_t d) { - const auto& dim_name = array_schema_->dimension(d)->name(); - auto& coord_tiles = (*tiles)[dim_name]; - return prepare_coord_tiles_fixed(d, cell_pos, coord_dups, &coord_tiles); - }); - - // Check all statuses - for (auto& st : statuses) { - if (!st.ok()) - return st; - } - - return Status::Ok(); + std::vector* tiles) const { + return array_schema_->var_size(name) ? + prepare_tiles_var(name, cell_pos, coord_dups, tiles) : + prepare_tiles_fixed(name, cell_pos, coord_dups, tiles); } -Status Writer::prepare_coord_tiles_fixed( - unsigned dim_idx, +Status Writer::prepare_tiles_fixed( + const std::string& name, const std::vector& cell_pos, const std::set& coord_dups, std::vector* tiles) const { @@ -2439,29 +2109,29 @@ Status Writer::prepare_coord_tiles_fixed( return Status::Ok(); // For easy reference - const auto& dim_name = array_schema_->dimension(dim_idx)->name(); - auto buffer = (unsigned char*)coord_buffers_.find(dim_name)->second.buffer_; + auto buffer = (unsigned char*)buffers_.find(name)->second.buffer_; + auto cell_size = array_schema_->cell_size(name); + auto cell_num = (uint64_t)cell_pos.size(); auto capacity = array_schema_->capacity(); auto dups_num = coord_dups.size(); - auto tile_num = utils::math::ceil(coords_num_ - dups_num, capacity); - auto coord_size = array_schema_->dimension(dim_idx)->coord_size(); + auto tile_num = utils::math::ceil(cell_num - dups_num, capacity); // Initialize tiles tiles->resize(tile_num); for (auto& tile : (*tiles)) - RETURN_NOT_OK(init_coord_tile(dim_idx, &tile)); + RETURN_NOT_OK(init_tile(name, &tile)); // Write all cells one by one if (dups_num == 0) { - for (uint64_t i = 0, tile_idx = 0; i < coords_num_; ++i) { + for (uint64_t i = 0, tile_idx = 0; i < cell_num; ++i) { if ((*tiles)[tile_idx].full()) ++tile_idx; RETURN_NOT_OK((*tiles)[tile_idx].write( - buffer + cell_pos[i] * coord_size, coord_size)); + buffer + cell_pos[i] * cell_size, cell_size)); } } else { - for (uint64_t i = 0, tile_idx = 0; i < coords_num_; ++i) { + for (uint64_t i = 0, tile_idx = 0; i < cell_num; ++i) { if (coord_dups.find(cell_pos[i]) != coord_dups.end()) continue; @@ -2469,7 +2139,7 @@ Status Writer::prepare_coord_tiles_fixed( ++tile_idx; RETURN_NOT_OK((*tiles)[tile_idx].write( - buffer + cell_pos[i] * coord_size, coord_size)); + buffer + cell_pos[i] * cell_size, cell_size)); } } @@ -2479,14 +2149,14 @@ Status Writer::prepare_coord_tiles_fixed( } Status Writer::prepare_tiles_var( - const std::string& attribute, + const std::string& name, const std::vector& cell_pos, const std::set& coord_dups, std::vector* tiles) const { STATS_FUNC_IN(writer_prepare_tiles_var); // For easy reference - auto it = attr_buffers_.find(attribute); + auto it = buffers_.find(name); auto buffer = (uint64_t*)it->second.buffer_; auto buffer_var = (unsigned char*)it->second.buffer_var_; auto buffer_var_size = it->second.buffer_var_size_; @@ -2501,7 +2171,7 @@ Status Writer::prepare_tiles_var( tiles->resize(2 * tile_num); auto tiles_len = tiles->size(); for (uint64_t i = 0; i < tiles_len; i += 2) - RETURN_NOT_OK(init_tile(attribute, &((*tiles)[i]), &((*tiles)[i + 1]))); + RETURN_NOT_OK(init_tile(name, &((*tiles)[i]), &((*tiles)[i + 1]))); // Write all cells one by one if (dups_num == 0) { @@ -2563,7 +2233,7 @@ Status Writer::sort_coords(std::vector* cell_pos) const { std::vector buffs(dim_num); for (unsigned d = 0; d < dim_num; ++d) { const auto& dim_name = array_schema_->dimension(d)->name(); - buffs[d] = (const void*)coord_buffers_.find(dim_name)->second.buffer_; + buffs[d] = (const void*)buffers_.find(dim_name)->second.buffer_; } // Populate cell_pos @@ -2591,8 +2261,6 @@ Status Writer::split_coords_buffer() { clear_coord_buffers(); - coord_buffers_alloced_ = true; - // New coord buffer allocations for (unsigned d = 0; d < dim_num; ++d) { auto dim = array_schema_->dimension(d); @@ -2602,10 +2270,11 @@ Status Writer::split_coords_buffer() { QueryBuffer buff; buff.buffer_size_ = &(it.first->second); buff.buffer_ = std::malloc(coord_buffer_size); + to_clean_.push_back(buff.buffer_); if (buff.buffer_ == nullptr) RETURN_NOT_OK(Status::WriterError( "Cannot split coordinate buffers; memory allocation failed")); - coord_buffers_.emplace(dim_name, buff); + buffers_.emplace(dim_name, buff); } // Split coordinates @@ -2613,7 +2282,7 @@ Status Writer::split_coords_buffer() { for (unsigned d = 0; d < dim_num; ++d) { auto coord_size = array_schema_->dimension(d)->coord_size(); const auto& dim_name = array_schema_->dimension(d)->name(); - auto buff = (unsigned char*)(coord_buffers_[dim_name].buffer_); + auto buff = (unsigned char*)(buffers_[dim_name].buffer_); for (uint64_t c = 0; c < coords_num_; ++c) { coord = &(((unsigned char*)coords_buffer_)[c * coords_size + d * coord_size]); @@ -2647,69 +2316,35 @@ Status Writer::unordered_write() { const auto& uri = frag_meta->fragment_uri(); // Prepare tiles - std::unordered_map> coord_tiles; - std::unordered_map> attr_tiles; - auto statuses = parallel_for(0, 2, [&](uint64_t i) { - if (i == 0) { - // Prepare coordinate tiles - RETURN_CANCEL_OR_ERROR_ELSE( - prepare_coord_tiles(cell_pos, coord_dups, &coord_tiles), - clean_up(uri)); - } else { - // Prepare attribute tiles - RETURN_CANCEL_OR_ERROR_ELSE( - prepare_attr_tiles(cell_pos, coord_dups, &attr_tiles), clean_up(uri)); - } - - return Status::Ok(); - }); - - // Check all statuses - for (auto& st : statuses) - RETURN_NOT_OK(st); + std::unordered_map> tiles; + RETURN_NOT_OK_ELSE( + prepare_tiles(cell_pos, coord_dups, &tiles), clean_up(uri)); // Clear the boolean vector for coordinate duplicates coord_dups.clear(); // No tiles - if (coord_tiles.empty() || coord_tiles.begin()->second.empty()) + if (tiles.empty() || tiles.begin()->second.empty()) return Status::Ok(); // Set the number of tiles in the metadata - frag_meta->set_num_tiles(coord_tiles.begin()->second.size()); - - // Filter tiles - std::vector coords_tiles; - statuses = parallel_for(0, 2, [&](uint64_t i) { - if (i == 0) { - // Filter coordinate tiles - RETURN_CANCEL_OR_ERROR_ELSE( - compute_coords_metadata(coord_tiles, frag_meta.get()), clean_up(uri)); - // TODO: remove zipping and filter coordinate tiles separately - RETURN_NOT_OK_ELSE( - zip_coord_tiles(coord_tiles, &coords_tiles), clean_up(uri)); - RETURN_CANCEL_OR_ERROR_ELSE( - filter_tiles(constants::coords, &coords_tiles), clean_up(uri)); - } else { - // Filter attribute tiles - RETURN_CANCEL_OR_ERROR_ELSE( - filter_attr_tiles(&attr_tiles), clean_up(uri)); - } + auto it = tiles.begin(); + auto tile_num = array_schema_->var_size(it->first) ? it->second.size() / 2 : + it->second.size(); + frag_meta->set_num_tiles(tile_num); - return Status::Ok(); - }); + // Compute coordinates metadata + RETURN_NOT_OK_ELSE( + compute_coords_metadata(tiles, frag_meta.get()), clean_up(uri)); - // Check all statuses - for (auto& st : statuses) - RETURN_NOT_OK(st); + // Filter all tiles + RETURN_NOT_OK_ELSE(filter_tiles(&tiles), clean_up(uri)); // Write tiles for all attributes and coordinates - RETURN_NOT_OK_ELSE( - write_all_tiles(frag_meta.get(), attr_tiles, coords_tiles), - clean_up(uri)); + RETURN_NOT_OK_ELSE(write_all_tiles(frag_meta.get(), tiles), clean_up(uri)); // Write the fragment metadata - RETURN_CANCEL_OR_ERROR_ELSE( + RETURN_NOT_OK_ELSE( frag_meta->store(array_->get_encryption_key()), clean_up(uri)); // Add written fragment info @@ -2785,31 +2420,16 @@ Status Writer::write_cell_range_to_tile_var( Status Writer::write_all_tiles( FragmentMetadata* frag_meta, - const std::unordered_map>& attr_tiles, - const std::vector& coords_tiles) const { + const std::unordered_map>& tiles) const { STATS_FUNC_IN(writer_write_all_tiles); - assert(!attr_tiles.empty() || !coords_tiles.empty()); + assert(!tiles.empty()); std::vector> tasks; - - // Attribute tiles - for (const auto& it : attr_buffers_) { - const auto& attr = it.first; - auto& tiles = attr_tiles.find(attr)->second; - tasks.push_back( - storage_manager_->writer_thread_pool()->enqueue([&, this]() { - RETURN_CANCEL_OR_ERROR(write_tiles(attr, frag_meta, tiles)); - return Status::Ok(); - })); - } - - // Coordinate tiles - if (!coord_buffers_.empty()) { + for (const auto& it : tiles) { tasks.push_back( storage_manager_->writer_thread_pool()->enqueue([&, this]() { - RETURN_CANCEL_OR_ERROR( - write_tiles(constants::coords, frag_meta, coords_tiles)); + RETURN_CANCEL_OR_ERROR(write_tiles(it.first, frag_meta, it.second)); return Status::Ok(); })); } @@ -2826,7 +2446,7 @@ Status Writer::write_all_tiles( } Status Writer::write_tiles( - const std::string& attribute, + const std::string& name, FragmentMetadata* frag_meta, const std::vector& tiles) const { // Handle zero tiles @@ -2834,27 +2454,24 @@ Status Writer::write_tiles( return Status::Ok(); // For easy reference - bool var_size = array_schema_->var_size(attribute); - const auto& attr_uri = frag_meta->attr_uri(attribute); - const auto& attr_var_uri = - var_size ? frag_meta->attr_var_uri(attribute) : URI(""); + bool var_size = array_schema_->var_size(name); + const auto& uri = frag_meta->uri(name); + const auto& var_uri = var_size ? frag_meta->var_uri(name) : URI(""); // Write tiles auto tile_num = tiles.size(); for (size_t i = 0, tile_id = 0; i < tile_num; ++i, ++tile_id) { - RETURN_NOT_OK(storage_manager_->write(attr_uri, tiles[i].buffer())); - frag_meta->set_tile_offset(attribute, tile_id, tiles[i].buffer()->size()); + RETURN_NOT_OK(storage_manager_->write(uri, tiles[i].buffer())); + frag_meta->set_tile_offset(name, tile_id, tiles[i].buffer()->size()); STATS_COUNTER_ADD(writer_num_bytes_written, tiles[i].buffer()->size()); if (var_size) { ++i; - RETURN_NOT_OK(storage_manager_->write(attr_var_uri, tiles[i].buffer())); - frag_meta->set_tile_var_offset( - attribute, tile_id, tiles[i].buffer()->size()); - frag_meta->set_tile_var_size( - attribute, tile_id, tiles[i].pre_filtered_size()); + RETURN_NOT_OK(storage_manager_->write(var_uri, tiles[i].buffer())); + frag_meta->set_tile_var_offset(name, tile_id, tiles[i].buffer()->size()); + frag_meta->set_tile_var_size(name, tile_id, tiles[i].pre_filtered_size()); STATS_COUNTER_ADD(writer_num_bytes_written, tiles[i].buffer()->size()); } @@ -2862,10 +2479,9 @@ Status Writer::write_tiles( // Close files, except in the case of global order if (layout_ != Layout::GLOBAL_ORDER) { - RETURN_NOT_OK(storage_manager_->close_file(frag_meta->attr_uri(attribute))); + RETURN_NOT_OK(storage_manager_->close_file(frag_meta->uri(name))); if (var_size) - RETURN_NOT_OK( - storage_manager_->close_file(frag_meta->attr_var_uri(attribute))); + RETURN_NOT_OK(storage_manager_->close_file(frag_meta->var_uri(name))); } STATS_COUNTER_ADD(writer_num_attr_tiles_written, tile_num); @@ -2873,38 +2489,6 @@ Status Writer::write_tiles( return Status::Ok(); } -// TODO: remove -Status Writer::zip_coord_tiles( - const std::unordered_map>& coord_tiles, - std::vector* coords_tiles) const { - if (coord_tiles.empty() || coord_tiles.begin()->second.empty()) - return Status::Ok(); - - auto tile_num = coord_tiles.begin()->second.size(); - coords_tiles->clear(); - coords_tiles->resize(tile_num); - auto type = array_schema()->domain()->type(); - unsigned dim_num = array_schema()->dim_num(); - uint64_t coords_size = array_schema()->coords_size(); - for (size_t t = 0; t < tile_num; ++t) { - auto& new_tile = (*coords_tiles)[t]; - auto cell_num = coord_tiles.begin()->second[t].cell_num(); - RETURN_NOT_OK(new_tile.init( - constants::format_version, - type, - cell_num * coords_size, - coords_size, - dim_num)); - for (unsigned d = 0; d < dim_num; ++d) { - const auto& dim_name = array_schema_->dimension(d)->name(); - const auto& coord_tile = coord_tiles.find(dim_name)->second[t]; - new_tile.write(coord_tile); - } - } - - return Status::Ok(); -} - std::string Writer::coords_to_str(uint64_t i) const { std::stringstream ss; auto dim_num = array_schema_->dim_num(); @@ -2913,8 +2497,8 @@ std::string Writer::coords_to_str(uint64_t i) const { for (unsigned d = 0; d < dim_num; ++d) { auto dim = array_schema_->dimension(d); const auto& dim_name = dim->name(); - auto buff = (unsigned char*)coord_buffers_.find(dim_name)->second.buffer_; - auto coord = buff + i * coord_sizes_[d]; + auto buff = (unsigned char*)buffers_.find(dim_name)->second.buffer_; + auto coord = buff + i * dim->coord_size(); ss << dim->coord_to_str(coord); if (d < dim_num - 1) ss << ", "; @@ -2931,12 +2515,11 @@ void Writer::clean_up(const URI& uri) { Status Writer::set_coords_buffer(void* buffer, uint64_t* buffer_size) { // Error if setting non-existing coordinates after initialization - bool has_coords = coords_buffer_ != nullptr || !coord_buffers_.empty(); - if (initialized_ && has_coords) + if (initialized_ && has_coords_) return LOG_STATUS(Status::WriterError( std::string("Cannot set coordinates after initialization"))); - if (!coord_buffers_.empty()) + if (coord_buffer_is_set_) return LOG_STATUS(Status::WriterError( std::string("Cannot set zipped coordinates buffer after having set " "separate coordinate buffers"))); @@ -2944,65 +2527,7 @@ Status Writer::set_coords_buffer(void* buffer, uint64_t* buffer_size) { // Set zipped coordinates buffer coords_buffer_ = buffer; coords_buffer_size_ = buffer_size; - - return Status::Ok(); -} - -Status Writer::set_coord_buffer( - const std::string& name, void* buffer, uint64_t* buffer_size) { - // Check that dimension is fixed-sized - auto dim = array_schema_->dimension(name); - if (dim->var_size()) - return LOG_STATUS(Status::WriterError( - std::string("Cannot set buffer; Input dimension '") + name + - "' is var-sized")); - - // Check if zipped coordinates buffer is set - if (coords_buffer_ != nullptr) - return LOG_STATUS(Status::WriterError( - std::string("Cannot set separate coordinates buffer after having " - "set the zipped coordinates buffer"))); - - // Check number of coordinates - uint64_t coords_num = *buffer_size / dim->coord_size(); - if (!coord_buffers_.empty() && coords_num != coords_num_) - return LOG_STATUS(Status::WriterError( - std::string("Cannot set buffer; Input buffer for dimension '") + name + - "' has a different number of coordinates than previously " - "set coordinate buffers")); - - // Error if setting a new dimension after initialization - bool dim_exists = coord_buffers_.find(name) != coord_buffers_.end(); - if (initialized_ && !dim_exists) - return LOG_STATUS(Status::WriterError( - std::string("Cannot set buffer for new dimension '") + name + - "' after initialization")); - - // Set coordinate buffer - coord_buffers_[name] = QueryBuffer(buffer, nullptr, buffer_size, nullptr); - coords_num_ = coords_num; - - return Status::Ok(); -} - -Status Writer::set_attr_buffer( - const std::string& name, void* buffer, uint64_t* buffer_size) { - // Check that attribute is fixed-sized - bool var_size = (array_schema_->var_size(name)); - if (var_size) - return LOG_STATUS(Status::WriterError( - std::string("Cannot set buffer; Input attribute '") + name + - "' is var-sized")); - - // Error if setting a new attribute after initialization - bool attr_exists = attr_buffers_.find(name) != attr_buffers_.end(); - if (initialized_ && !attr_exists) - return LOG_STATUS(Status::WriterError( - std::string("Cannot set buffer for new attribute '") + name + - "' after initialization")); - - // Set attribute buffer - attr_buffers_[name] = QueryBuffer(buffer, nullptr, buffer_size, nullptr); + has_coords_ = true; return Status::Ok(); } diff --git a/tiledb/sm/query/writer.h b/tiledb/sm/query/writer.h index df6ff3d639bf..c458582f513f 100644 --- a/tiledb/sm/query/writer.h +++ b/tiledb/sm/query/writer.h @@ -64,32 +64,18 @@ class Writer { */ struct GlobalWriteState { /** - * Stores the last tile of each attribute for each write operation. - * For fixed-sized attributes, the second tile is ignored. For - * var-sized attributes, the first tile is the offsets tile, whereas - * the second tile is the values tile. + * Stores the last tile of each attribute/dimension for each write + * operation. For fixed-sized attributes/dimensions, the second tile is + * ignored. For var-sized attributes/dimensions, the first tile is the + * offsets tile, whereas the second tile is the values tile. */ - std::unordered_map> last_attr_tiles_; + std::unordered_map> last_tiles_; /** - * Stores the number of cells written for each attribute across the - * write operations. + * Stores the number of cells written for each attribute/dimension across + * the write operations. */ - std::unordered_map attr_cells_written_; - - /** - * Stores the last coordinate tile of each dimension for each write - * operation. For fixed-sized dimensions, the second tile is ignored. For - * var-sized dimensions, the first tile is the offsets tile, whereas - * the second tile is the values tile. - */ - std::unordered_map> last_coord_tiles_; - - /** - * Stores the number of cells written for each dimension across the - * write operations. - */ - std::unordered_map coord_cells_written_; + std::unordered_map cells_written_; /** The fragment metadata. */ std::shared_ptr frag_meta_; @@ -291,11 +277,8 @@ class Writer { /** The array schema. */ const ArraySchema* array_schema_; - /** Maps attribute names to their buffers. */ - std::unordered_map attr_buffers_; - - /** Maps dimension names to their coordinate buffers. */ - std::unordered_map coord_buffers_; + /** Maps attribute/dimensions names to their buffers. */ + std::unordered_map buffers_; /** The coordinates buffer potentially set by the user. */ void* coords_buffer_; @@ -304,26 +287,23 @@ class Writer { uint64_t* coords_buffer_size_; /** - * The coordinate sizes, one per dimension, in the order the dimensions - * are defined in the array schema. + * The sizes of the coordinate buffers in a map (dimension -> size). + * Needed separate storage since QueryBuffer stores a pointer to the buffer + * sizes. */ - std::vector coord_sizes_; + std::unordered_map coord_buffer_sizes_; - /** Number of coordinates provided by the user. */ - uint64_t coords_num_; + /** True if at least one separate coordinate buffer is set. */ + bool coord_buffer_is_set_; - /** - * If `true` it means that TileDB alloc'ed these buffers, not the user, and - * TileDB will free this buffers in the writer destructor. - */ - bool coord_buffers_alloced_; + /** Keeps track of the number of coordinates across coordinate buffers. */ + uint64_t coords_num_; /** - * The sizes of the coordinate buffers in a map (dimension -> size). - * Needed separate storage since CoordBuffer stores a pointer to the buffer - * sizes. + * True if either zipped coordinates buffer or separate coordinate + * buffers are set. */ - std::unordered_map coord_buffer_sizes_; + bool has_coords_; /** * Meaningful only when `dedup_coords_` is `false`. @@ -379,6 +359,9 @@ class Writer { /** Stores information about the written fragments. */ std::vector written_fragment_info_; + /** Allocated buffers that neeed to be cleaned upon destruction. */ + std::vector to_clean_; + /* ********************************* */ /* PRIVATE METHODS */ /* ********************************* */ @@ -526,47 +509,42 @@ class Writer { bool dense, std::shared_ptr* frag_meta) const; /** - * Runs the input tiles for all attributes through the filter pipeline. - * The tile buffers are modified to contain the output of the pipeline. - * - * @return Status + * Runs the input coordinate and attribute tiles through their + * filter pipelines. The tile buffers are modified to contain the output + * of the pipeline. */ - Status filter_attr_tiles( - std::unordered_map>* attr_tiles) const; + Status filter_tiles( + std::unordered_map>* tiles) const; /** - * Applicable only to global writes. Filters the last coordinate tiles - * and zips them into the input vector. + * Applicable only to global writes. Filters the last attribute and + * coordinate tiles. */ - Status filter_last_coord_tiles(std::vector* coords_tiles) const; - - /** Applicable only to global writes. Filters the last attribute tiles. */ - Status filter_last_attr_tiles( - std::unordered_map>* attr_tiles) const; + Status filter_last_tiles( + std::unordered_map>* tiles) const; /** * Runs the input tiles for the input attribute through the filter pipeline. * The tile buffers are modified to contain the output of the pipeline. * - * @param attribute The attribute the tiles belong to. + * @param name The attribute/dimension the tiles belong to. * @param tile The tiles to be filtered. * @return Status */ - Status filter_tiles( - const std::string& attribute, std::vector* tiles) const; + Status filter_tiles(const std::string& name, std::vector* tiles) const; /** - * Runs the input tile for the input attribute through the filter pipeline. - * The tile buffer is modified to contain the output of the pipeline. + * Runs the input tile for the input attribute/dimension through the filter + * pipeline. The tile buffer is modified to contain the output of the + * pipeline. * - * @param attribute The attribute the tile belong to. + * @param name The attribute/dimension the tile belong to. * @param tile The tile to be filtered. * @param offsets True if the tile to be filtered contains offsets for a - * var-sized attribute. + * var-sized attribute/dimension. * @return Status */ - Status filter_tile( - const std::string& attribute, Tile* tile, bool offsets) const; + Status filter_tile(const std::string& name, Tile* tile, bool offsets) const; /** Finalizes the global write state. */ Status finalize_global_write_state(); @@ -592,31 +570,21 @@ class Writer { /** * Initializes a fixed-sized tile. * - * @param attribute The attribute the tile belongs to. + * @param name The attribute/dimension the tile belongs to. * @param tile The tile to be initialized. * @return Status */ - Status init_tile(const std::string& attribute, Tile* tile) const; + Status init_tile(const std::string& name, Tile* tile) const; /** * Initializes a var-sized tile. * - * @param attribute The attribute the tile belongs to. + * @param name The attribute/dimension the tile belongs to. * @param tile The offsets tile to be initialized. * @param tile_var The var-sized data tile to be initialized. * @return Status */ - Status init_tile( - const std::string& attribute, Tile* tile, Tile* tile_var) const; - - /** - * Initializes a fixed-sized coordinate tile. - * - * @param dim_idx The index of the dimension the tile belongs to. - * @param tile The tile to be initialized. - * @return Status - */ - Status init_coord_tile(unsigned dim_idx, Tile* tile) const; + Status init_tile(const std::string& name, Tile* tile, Tile* tile_var) const; /** * Initializes dense cell range iterators for the subarray to be writte, @@ -631,16 +599,16 @@ class Writer { std::vector>* iters) const; /** - * Initializes the tiles for writing for the input attribute. + * Initializes the tiles for writing for the input attribute/dimension. * - * @param attribute The attribute the tiles belong to. + * @param name The attribute/dimension the tiles belong to. * @param tile_num The number of tiles. * @param tiles The tiles to be initialized. Note that the vector * has been already preallocated. * @return Status */ Status init_tiles( - const std::string& attribute, + const std::string& name, uint64_t tile_num, std::vector* tiles) const; @@ -688,111 +656,74 @@ class Writer { /** * Applicable only to write in global order. It prepares only full * tiles, storing the last potentially non-full tile in - * `global_write_state->last_attr_tiles_` as part of the state to be used in + * `global_write_state->last_tiles_` as part of the state to be used in * the next write invocation. The last tiles are written to storage * upon `finalize`. Upon each invocation, the function first * populates the partially full last tile from the previous * invocation. * - * @param attribute The attribute to prepare the tiles for. * @param coord_dups The positions of the duplicate coordinates. - * @param attr_tiles The **full** tiles to be created. + * @param tiles The **full** tiles to be created. * @return Status */ - Status prepare_full_attr_tiles( + Status prepare_full_tiles( const std::set& coord_dups, - std::unordered_map>* attr_tiles) const; + std::unordered_map>* tiles) const; /** * Applicable only to write in global order. It prepares only full * tiles, storing the last potentially non-full tile in - * `global_write_state->last_attr_tiles_` as part of the state to be used in + * `global_write_state->last_tiles_` as part of the state to be used in * the next write invocation. The last tiles are written to storage * upon `finalize`. Upon each invocation, the function first * populates the partially full last tile from the previous * invocation. * - * @param attribute The attribute to prepare the tiles for. + * @param name The attribute/dimension to prepare the tiles for. * @param coord_dups The positions of the duplicate coordinates. * @param tiles The **full** tiles to be created. * @return Status */ Status prepare_full_tiles( - const std::string& attribute, + const std::string& name, const std::set& coord_dups, std::vector* tiles) const; /** * Applicable only to write in global order. It prepares only full * tiles, storing the last potentially non-full tile in - * `global_write_state_->last_attr_tiles_` as part of the state to be used in + * `global_write_state_->last_tiles_` as part of the state to be used in * the next write invocation. The last tiles are written to storage * upon `finalize`. Upon each invocation, the function first * populates the partially full last tile from the previous * invocation. Applicable only to fixed-sized attributes. * - * @param attribute The attribute to prepare the tiles for. + * @param name The attribute/dimension to prepare the tiles for. * @param coord_dups The positions of the duplicate coordinates. * @param tiles The **full** tiles to be created. * @return Status */ Status prepare_full_tiles_fixed( - const std::string& attribute, + const std::string& name, const std::set& coord_dups, std::vector* tiles) const; /** * Applicable only to write in global order. It prepares only full * tiles, storing the last potentially non-full tile in - * `global_write_state_->last_attr_tiles_` as part of the state to be used in + * `global_write_state_->last_tiles_` as part of the state to be used in * the next write invocation. The last tiles are written to storage * upon `finalize`. Upon each invocation, the function first * populates the partially full last tile from the previous * invocation. Applicable only to var-sized attributes. * - * @param attribute The attribute to prepare the tiles for. + * @param name The attribute/dimension to prepare the tiles for. * @param coord_dups The positions of the duplicate coordinates. * @param tiles The **full** tiles to be created. * @return Status */ Status prepare_full_tiles_var( - const std::string& attribute, - const std::set& coord_dups, - std::vector* tiles) const; - - /** - * Applicable only to write in global order. It prepares only full - * coordinate tiles for each dimension, storing the last potentially - * non-full tiles in `global_write_state->last_coord_tiles_` as part of the - * state to be used in the next write invocation. The last tiles are written - * to storage upon `finalize`. Upon each invocation, the function first - * populates the partially full last tile from the previous - * invocation. - * - * @param coord_dups The positions of the duplicate coordinates. - * @param tiles The **full** tiles to be created, one vector per dimension. - * @return Status - */ - Status prepare_full_coord_tiles( - const std::set& coord_dups, - std::unordered_map>* tiles) const; - - /** - * Applicable only to write in global order. It prepares only full - * coordinate tiles for each dimension, storing the last potentially - * non-full tiles in `global_write_state->last_coord_tiles_` as part of the - * state to be used in the next write invocation. The last tiles are written - * to storage upon `finalize`. Upon each invocation, the function first - * populates the partially full last tile from the previous - * invocation. - * - * @param dim_idx The index of the dimension to prepare the full tiles for. - * @param coord_dups The positions of the duplicate coordinates. - * @param tiles The **full** tiles to be created. - * @return Status - */ - Status prepare_full_coord_tiles_fixed( - unsigned dim_idx, + const std::string& name, const std::set& coord_dups, std::vector* tiles) const; @@ -824,29 +755,28 @@ class Writer { std::vector* tiles) const; /** - * It prepares the tiles, re-organizing the cells from the user - * buffers based on the input sorted positions. + * It prepares the attribute and coordinate tiles, re-organizing the cells + * from the user buffers based on the input sorted positions and coordinate + * duplicates. * - * @param attribute The attribute to prepare the tiles for. * @param cell_pos The positions that resulted from sorting and * according to which the cells must be re-arranged. * @param coord_dups The set with the positions * of duplicate coordinates/cells. - * @param tiles The tiles to be created. + * @param tiles The tiles to be created, one vector per attribute or + * coordinate. * @return Status */ Status prepare_tiles( - const std::string& attribute, const std::vector& cell_pos, const std::set& coord_dups, - std::vector* tiles) const; + std::unordered_map>* tiles) const; /** - * It prepares the tiles, re-organizing the cells from the user - * buffers based on the input sorted positions. Applicable only - * to fixed-sized attributes. + * It prepares the tiles for the input attribute or dimension, re-organizing + * the cells from the user buffers based on the input sorted positions. * - * @param attribute The attribute to prepare the tiles for. + * @param name The attribute or dimension to prepare the tiles for. * @param cell_pos The positions that resulted from sorting and * according to which the cells must be re-arranged. * @param coord_dups The set with the positions @@ -854,18 +784,18 @@ class Writer { * @param tiles The tiles to be created. * @return Status */ - Status prepare_tiles_fixed( - const std::string& attribute, + Status prepare_tiles( + const std::string& name, const std::vector& cell_pos, const std::set& coord_dups, std::vector* tiles) const; /** - * It prepares the tiles, re-organizing the cells from the user - * buffers based on the input sorted positions. Applicable only - * to var-sized attributes. + * It prepares the tiles for the input attribute or dimension, re-organizing + * the cells from the user buffers based on the input sorted positions. + * Applicable only to fixed-sized attributes or dimensions. * - * @param attribute The attribute to prepare the tiles for. + * @param name The attribute or dimension to prepare the tiles for. * @param cell_pos The positions that resulted from sorting and * according to which the cells must be re-arranged. * @param coord_dups The set with the positions @@ -873,51 +803,18 @@ class Writer { * @param tiles The tiles to be created. * @return Status */ - Status prepare_tiles_var( - const std::string& attribute, + Status prepare_tiles_fixed( + const std::string& name, const std::vector& cell_pos, const std::set& coord_dups, std::vector* tiles) const; /** - * It prepares the attribute tiles, re-organizing the cells from the user - * buffers based on the input sorted positions. - * - * @param cell_pos The positions that resulted from sorting and - * according to which the cells must be re-arranged. - * @param coord_dups The set with the positions - * of duplicate coordinates/cells. - * @param attr_tiles The tiles to be created, one vector per attribute - * @return Status - */ - Status prepare_attr_tiles( - const std::vector& cell_pos, - const std::set& coord_dups, - std::unordered_map>* attr_tiles) const; - - /** - * It prepares the coordinate tiles, re-organizing the cells from the user - * buffers based on the input sorted positions. + * It prepares the tiles for the input attribute or dimension, re-organizing + * the cells from the user buffers based on the input sorted positions. + * Applicable only to var-sized attributes or dimensions. * - * @param cell_pos The positions that resulted from sorting and - * according to which the cells must be re-arranged. - * @param coord_dups The set with the positions - * of duplicate coordinates/cells. - * @param tiles The tiles to be created, one vector per dimension - * @return Status - */ - Status prepare_coord_tiles( - const std::vector& cell_pos, - const std::set& coord_dups, - std::unordered_map>* tiles) const; - - /** - * It prepares the coordinate tiles, re-organizing the cells from the user - * buffers based on the input sorted positions. Applicable to fixed-sized - * coordinates - * - * @param dim_idx The index of the dimension to prepare the coordinate - * tiles for. + * @param name The attribute to prepare the tiles for. * @param cell_pos The positions that resulted from sorting and * according to which the cells must be re-arranged. * @param coord_dups The set with the positions @@ -925,8 +822,8 @@ class Writer { * @param tiles The tiles to be created. * @return Status */ - Status prepare_coord_tiles_fixed( - unsigned dim_idx, + Status prepare_tiles_var( + const std::string& name, const std::vector& cell_pos, const std::set& coord_dups, std::vector* tiles) const; @@ -1016,36 +913,28 @@ class Writer { /** * Writes all the input tiles to storage. * - * @param attr_tiles Attribute tiles to be written, one element per attribute. - * @param coords_tiles Coordinate tiles to be written. + * @param tiles Attribute/Coordinate tiles to be written, one element per + * attribute or dimension. + * @param tiles Attribute/Coordinate tiles to be written. * @return Status */ Status write_all_tiles( FragmentMetadata* frag_meta, - const std::unordered_map>& attr_tiles, - const std::vector& coords_tiles) const; + const std::unordered_map>& tiles) const; /** - * Writes the input tiles for the input attribute to storage. + * Writes the input tiles for the input attribute/dimension to storage. * - * @param attribute The attribute the tiles belong to. + * @param name The attribute/dimension the tiles belong to. * @param frag_meta The fragment metadata. * @param tiles The tiles to be written. * @return Status */ Status write_tiles( - const std::string& attribute, + const std::string& name, FragmentMetadata* frag_meta, const std::vector& tiles) const; - // TODO: remove - // This will be removed in a subsequent PR very soon, when we will write - // the coordinate tiles in separate files and, therefore, there will be - // no need zipping the coordinates from separate buffers into a single one. - Status zip_coord_tiles( - const std::unordered_map>& coord_tiles, - std::vector* coords_tiles) const; - /** * Returns the i-th coordinates in the coordinate buffers in string * format. @@ -1073,28 +962,6 @@ class Writer { * @return Status */ Status set_coords_buffer(void* buffer, uint64_t* buffer_size); - - /** - * Sets the coordinate buffer on a single fixed-sized dimension. - * - * @param name The name of the dimension the buffer corresponds to. - * @param buffer The buffer that has the input data to be written. - * @param buffer_size The size of `buffer` in bytes. - * @return Status - */ - Status set_coord_buffer( - const std::string& name, void* buffer, uint64_t* buffer_size); - - /** - * Sets the attribute buffer on a single fixed-sized attribute. - * - * @param name The name of the dimension the buffer corresponds to. - * @param buffer The buffer that has the input data to be written. - * @param buffer_size The size of `buffer` in bytes. - * @return Status - */ - Status set_attr_buffer( - const std::string& name, void* buffer, uint64_t* buffer_size); }; } // namespace sm diff --git a/tiledb/sm/storage_manager/consolidator.cc b/tiledb/sm/storage_manager/consolidator.cc index 7085c252cab1..4d7210273ba5 100644 --- a/tiledb/sm/storage_manager/consolidator.cc +++ b/tiledb/sm/storage_manager/consolidator.cc @@ -839,16 +839,15 @@ Status Consolidator::compute_new_fragment_uri( // Get timestamp ranges uint32_t f_version; - RETURN_NOT_OK( - utils::parse::get_fragment_name_version(first_name, &f_version)); + RETURN_NOT_OK(utils::parse::get_fragment_name_version(first, &f_version)); auto t_first = utils::parse::get_timestamp_range(f_version, first_name); - RETURN_NOT_OK(utils::parse::get_fragment_name_version(last_name, &f_version)); + RETURN_NOT_OK(utils::parse::get_fragment_name_version(last, &f_version)); auto t_last = utils::parse::get_timestamp_range(f_version, last_name); // Create new URI std::stringstream ss; ss << first.parent().to_string() << "/__" << t_first.first << "_" - << t_last.second << "_" << uuid; + << t_last.second << "_" << uuid << "_" << constants::format_version; *new_uri = URI(ss.str()); diff --git a/tiledb/sm/storage_manager/storage_manager.cc b/tiledb/sm/storage_manager/storage_manager.cc index f8cdcfc4f6b5..ff845067f2dd 100644 --- a/tiledb/sm/storage_manager/storage_manager.cc +++ b/tiledb/sm/storage_manager/storage_manager.cc @@ -906,7 +906,7 @@ Status StorageManager::get_fragment_info( uint32_t fragment_name_version; RETURN_NOT_OK(utils::parse::get_fragment_name_version( - fragment_name, &fragment_name_version)); + fragment_uri, &fragment_name_version)); // Get timestamp range auto timestamp_range = @@ -1744,8 +1744,8 @@ Status StorageManager::load_fragment_metadata( URI coords_uri = sf.uri_.join_path(constants::coords + constants::file_suffix); - RETURN_NOT_OK(utils::parse::get_fragment_name_version( - sf.uri_.to_string(), &f_version)); + RETURN_NOT_OK( + utils::parse::get_fragment_name_version(sf.uri_, &f_version)); // Note that the fragment metadata version is >= the array schema // version. Therefore, the check below is defensive and will always @@ -1793,7 +1793,7 @@ Status StorageManager::get_sorted_uris( assert(utils::parse::starts_with(name, "__")); // Get timestamp range - RETURN_NOT_OK(utils::parse::get_fragment_name_version(name, &f_version)); + RETURN_NOT_OK(utils::parse::get_fragment_name_version(uri, &f_version)); auto timestamp_range = utils::parse::get_timestamp_range(f_version, name); auto t = timestamp_range.first;