Skip to content

Commit

Permalink
Split coordinates file into multiple files, one per dimension. Make a…
Browse files Browse the repository at this point in the history
…ppropriate changes to Reader. Ensure backwards compatibility.
  • Loading branch information
stavrospapadopoulos committed Dec 30, 2019
1 parent 8b185a0 commit 1065ac8
Show file tree
Hide file tree
Showing 21 changed files with 1,300 additions and 1,726 deletions.
6 changes: 6 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# In Progress

## Disk Format

* Removed file __coords.tdb that stored the zipped coordinates in sparse fragments
* Now storing the coordinate tiles on each dimension in separate files
* Changed fragment name format from `__t1_t2_uuid` to `__t1_t2_uuid_<format_version>`. That was necessary for backwards compatibility

## New features

## Improvements
Expand Down
2 changes: 1 addition & 1 deletion test/src/unit-backwards_compat.cc
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ TEST_CASE(
break;
}
case TILEDB_UINT8: {
REQUIRE(static_cast<int8_t*>(buffer.second)[0] == 1);
REQUIRE(static_cast<uint8_t*>(buffer.second)[0] == 1);
break;
}
case TILEDB_INT16: {
Expand Down
5 changes: 2 additions & 3 deletions test/src/unit-capi-consolidation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3796,8 +3796,7 @@ TEST_CASE_METHOD(
TEST_CASE_METHOD(
ConsolidationFx,
"C API: Test advanced consolidation, consolidatable #1",
"[capi], [consolidation], [consolidation-adv], "
"[consolidation-adv-consolidatable-1]") {
"[capi][consolidation][adv][consolidatable-1]") {
remove_dense_vector();
create_dense_vector();
write_dense_vector_consolidatable_1();
Expand All @@ -3821,7 +3820,7 @@ TEST_CASE_METHOD(
REQUIRE(rc == TILEDB_OK);
REQUIRE(error == nullptr);
rc = tiledb_config_set(
config, "sm.consolidation.step_size_ratio", "0.6", &error);
config, "sm.consolidation.step_size_ratio", "0.7", &error);
REQUIRE(rc == TILEDB_OK);
REQUIRE(error == nullptr);

Expand Down
7 changes: 3 additions & 4 deletions test/src/unit-cppapi-array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ struct CPPArrayFx {
VFS vfs;
};

TEST_CASE("Config", "[cppapi]") {
TEST_CASE("Config", "[cppapi][config]") {
// Primarily to instansiate operator[]/= template
tiledb::Config cfg;
cfg["vfs.s3.region"] = "us-east-1a";
Expand All @@ -94,7 +94,7 @@ TEST_CASE("Config", "[cppapi]") {
CHECK((std::string)cfg["vfs.s3.use_virtual_addressing"] == "true");
}

TEST_CASE_METHOD(CPPArrayFx, "C++ API: Arrays", "[cppapi]") {
TEST_CASE_METHOD(CPPArrayFx, "C++ API: Arrays", "[cppapi][basic]") {
SECTION("Dimensions") {
ArraySchema schema(ctx, "cpp_unit_array");
CHECK(schema.domain().ndim() == 2);
Expand Down Expand Up @@ -337,8 +337,7 @@ TEST_CASE_METHOD(CPPArrayFx, "C++ API: Arrays", "[cppapi]") {
}

TEST_CASE(
"C++ API: Incorrect buffer size and offsets",
"[cppapi], [invalid-offsets]") {
"C++ API: Incorrect buffer size and offsets", "[cppapi][invalid-offsets]") {
const std::string array_name_1d = "cpp_unit_array_1d";
Context ctx;
VFS vfs(ctx);
Expand Down
4 changes: 2 additions & 2 deletions test/src/unit-cppapi-schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#include "catch.hpp"
#include "tiledb/sm/cpp_api/tiledb"

TEST_CASE("C++ API: Schema", "[cppapi]") {
TEST_CASE("C++ API: Schema", "[cppapi][schema]") {
using namespace tiledb;
Context ctx;

Expand Down Expand Up @@ -161,7 +161,7 @@ TEST_CASE("C++ API: Schema", "[cppapi]") {
}
}

TEST_CASE("C++ API: Test schema virtual destructors", "[cppapi]") {
TEST_CASE("C++ API: Test schema virtual destructors", "[cppapi][schema]") {
tiledb::Context ctx;
// Test that this generates no compiler warnings.
std::unique_ptr<tiledb::ArraySchema> schema;
Expand Down
198 changes: 98 additions & 100 deletions tiledb/sm/array_schema/array_schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ ArraySchema::ArraySchema(const ArraySchema* array_schema) {

capacity_ = array_schema->capacity_;
cell_order_ = array_schema->cell_order_;
cell_sizes_ = array_schema->cell_sizes_;
cell_var_offsets_filters_ = array_schema->cell_var_offsets_filters_;
coords_filters_ = array_schema->coords_filters_;
coords_size_ = array_schema->coords_size_;
Expand All @@ -98,17 +97,11 @@ ArraySchema::ArraySchema(const ArraySchema* array_schema) {

set_domain(array_schema->domain_);

attribute_map_.clear();
for (auto attr : array_schema->attributes_) {
if (attr->name() != constants::key_attr_name)
add_attribute(attr, false);
}
for (const auto& attr : attributes_)
attribute_map_[attr->name()] = attr;
auto dim_num = array_schema->dim_num();
for (unsigned d = 0; d < dim_num; ++d) {
auto dim = dimension(d);
dim_map_[dim->name()] = dim;
}
}

ArraySchema::~ArraySchema() {
Expand All @@ -133,16 +126,10 @@ const Attribute* ArraySchema::attribute(unsigned int id) const {
return nullptr;
}

const Attribute* ArraySchema::attribute(std::string name) const {
bool anonymous = name.empty();
unsigned int nattr = attribute_num();
for (unsigned int i = 0; i < nattr; i++) {
auto attr = attribute(i);
if ((attr->name() == name) || (anonymous && attr->is_anonymous())) {
return attr;
}
}
return nullptr;
const Attribute* ArraySchema::attribute(const std::string& name) const {
auto it =
attribute_map_.find(name.empty() ? constants::default_attr_name : name);
return it == attribute_map_.end() ? nullptr : it->second;
}

Status ArraySchema::attribute_name_normalized(
Expand Down Expand Up @@ -188,16 +175,45 @@ Layout ArraySchema::cell_order() const {
return cell_order_;
}

uint64_t ArraySchema::cell_size(const std::string& attribute) const {
auto cell_size_it = cell_sizes_.find(attribute);
assert(cell_size_it != cell_sizes_.end());
return cell_size_it->second;
uint64_t ArraySchema::cell_size(const std::string& name) const {
// Special zipped coordinates
if (name == constants::coords)
return domain_->dim_num() * datatype_size(coords_type());

// Attribute
auto attr_it = attribute_map_.find(name);
if (attr_it != attribute_map_.end()) {
auto attr = attr_it->second;
auto cell_val_num = attr->cell_val_num();
return (cell_val_num == constants::var_num) ?
constants::var_size :
cell_val_num * datatype_size(attr->type());
}

// Dimension
auto dim_it = dim_map_.find(name);
assert(dim_it != dim_map_.end());
auto dim = dim_it->second;
auto cell_val_num = dim->cell_val_num();
return (cell_val_num == constants::var_num) ?
constants::var_size :
cell_val_num * datatype_size(dim->type());
}

unsigned int ArraySchema::cell_val_num(const std::string& attribute) const {
auto it = attribute_map_.find(attribute);
assert(it != attribute_map_.end());
return it->second->cell_val_num();
unsigned int ArraySchema::cell_val_num(const std::string& name) const {
// Special zipped coordinates
if (name == constants::coords)
return 1;

// Attribute
auto attr_it = attribute_map_.find(name);
if (attr_it != attribute_map_.end())
return attr_it->second->cell_val_num();

// Dimension
auto dim_it = dim_map_.find(name);
assert(dim_it != dim_map_.end());
return dim_it->second->cell_val_num();
}

const FilterPipeline* ArraySchema::cell_var_offsets_filters() const {
Expand Down Expand Up @@ -253,16 +269,20 @@ Status ArraySchema::check_attributes(
return Status::Ok();
}

const FilterPipeline* ArraySchema::filters(const std::string& attribute) const {
auto it = attribute_map_.find(attribute);
if (it == attribute_map_.end()) {
if (attribute == constants::coords)
return coords_filters();
assert(false); // This should never happen
return nullptr; // Return something ad hoc
}
const FilterPipeline* ArraySchema::filters(const std::string& name) const {
if (name == constants::coords)
return coords_filters();

// Attribute
auto attr_it = attribute_map_.find(name);
if (attr_it != attribute_map_.end())
return attr_it->second->filters();

return it->second->filters();
// Dimension (if filters not set, return default coordinate filters)
auto dim_it = dim_map_.find(name);
assert(dim_it != dim_map_.end());
auto ret = dim_it->second->filters();
return (ret != nullptr) ? ret : coords_filters();
}

const FilterPipeline* ArraySchema::coords_filters() const {
Expand Down Expand Up @@ -297,15 +317,8 @@ const Dimension* ArraySchema::dimension(unsigned int i) const {
}

const Dimension* ArraySchema::dimension(const std::string& name) const {
bool anonymous = name.empty();
auto dim_num = this->dim_num();
for (unsigned d = 0; d < dim_num; ++d) {
auto dim = this->dimension(d);
if ((dim->name() == name) || (anonymous && dim->is_anonymous())) {
return dim;
}
}
return nullptr;
auto it = dim_map_.find(name.empty() ? constants::default_dim_name : name);
return it == dim_map_.end() ? nullptr : it->second;
}

unsigned int ArraySchema::dim_num() const {
Expand Down Expand Up @@ -352,6 +365,14 @@ Status ArraySchema::has_attribute(
return Status::Ok();
}

bool ArraySchema::is_attr(const std::string& name) const {
return this->attribute(name) != nullptr;
}

bool ArraySchema::is_dim(const std::string& name) const {
return this->dimension(name) != nullptr;
}

// ===== FORMAT =====
// version (uint32_t)
// array_type (uint8_t)
Expand Down Expand Up @@ -404,26 +425,20 @@ Layout ArraySchema::tile_order() const {
return tile_order_;
}

Datatype ArraySchema::type(unsigned int i) const {
auto attribute_num = attributes_.size();
if (i > attribute_num) {
LOG_ERROR("Cannot retrieve type; Invalid attribute id");
assert(false);
}
if (i < attribute_num)
return attributes_[i]->type();
return domain_->type();
}
Datatype ArraySchema::type(const std::string& name) const {
// Special zipped coordinates
if (name == constants::coords)
return domain_->type();

Datatype ArraySchema::type(const std::string& attribute) const {
auto it = attribute_map_.find(attribute);
if (it == attribute_map_.end()) {
if (attribute == constants::coords)
return domain_->type();
assert(false); // This should never happen
return Datatype::INT8; // Return something ad hoc
}
return it->second->type();
// Attribute
auto attr_it = attribute_map_.find(name);
if (attr_it != attribute_map_.end())
return attr_it->second->type();

// Dimension
auto dim_it = dim_map_.find(name);
assert(dim_it != dim_map_.end());
return dim_it->second->type();
}

bool ArraySchema::var_size(const std::string& name) const {
Expand Down Expand Up @@ -474,7 +489,10 @@ Status ArraySchema::add_attribute(const Attribute* attr, bool check_special) {
} else {
new_attr = new Attribute(attr);
}

attributes_.emplace_back(new_attr);
attribute_map_[new_attr->name()] = new_attr;

return Status::Ok();
}

Expand Down Expand Up @@ -530,6 +548,12 @@ Status ArraySchema::deserialize(ConstBuffer* buff) {
auto attr = new Attribute();
RETURN_NOT_OK_ELSE(attr->deserialize(buff), delete attr);
attributes_.emplace_back(attr);
attribute_map_[attr->name()] = attr;
}
auto dim_num = domain()->dim_num();
for (unsigned d = 0; d < dim_num; ++d) {
auto dim = dimension(d);
dim_map_[dim->name()] = dim;
}

// Initialize the rest of the object members
Expand All @@ -550,21 +574,9 @@ Status ArraySchema::init() {
// Initialize domain
RETURN_NOT_OK(domain_->init(cell_order_, tile_order_));

attribute_map_.clear();
for (const auto& attr : attributes_)
attribute_map_[attr->name()] = attr;
dim_map_.clear();
auto dim_num = domain_->dim_num();
for (unsigned d = 0; d < dim_num; ++d) {
auto dim = dimension(d);
dim_map_[dim->name()] = dim;
}

// Set cell sizes
for (auto& attr : attributes_)
cell_sizes_[attr->name()] = compute_cell_size(attr->name());
cell_sizes_[constants::coords] = compute_cell_size(constants::coords);
coords_size_ = dim_num * datatype_size(coords_type());
// TODO: set upon setting domain
coords_size_ = domain_->dim_num() * datatype_size(coords_type());

// Success
return Status::Ok();
Expand Down Expand Up @@ -618,6 +630,15 @@ Status ArraySchema::set_domain(Domain* domain) {
filter->set_compressor(constants::real_coords_compression);
filter->set_compression_level(-1);
}

// Create dimension map
dim_map_.clear();
auto dim_num = domain_->dim_num();
for (unsigned d = 0; d < dim_num; ++d) {
auto dim = dimension(d);
dim_map_[dim->name()] = dim;
}

return Status::Ok();
}

Expand Down Expand Up @@ -676,28 +697,5 @@ void ArraySchema::clear() {
domain_ = nullptr;
}

uint64_t ArraySchema::compute_cell_size(const std::string& attribute) const {
// Handle coordinates first
if (attribute == constants::coords) {
auto dim_num = domain_->dim_num();
auto type = coords_type();
return dim_num * datatype_size(type);
}

// Handle attributes
auto attr_it = attribute_map_.find(attribute);
assert(attr_it != attribute_map_.end());
auto attr = attr_it->second;

// For easy reference
auto cell_val_num = attr->cell_val_num();
auto type = attr->type();

// Variable-sized cell
return (cell_val_num == constants::var_num) ?
constants::var_size :
cell_val_num * datatype_size(type);
}

} // namespace sm
} // namespace tiledb
Loading

0 comments on commit 1065ac8

Please sign in to comment.