Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split coordinates file into multiple files, one per dimension #1462

Merged
merged 1 commit into from
Dec 31, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# In Progress

## Disk Format

* Removed file __coords.tdb that stored the zipped coordinates in sparse fragments
* Now storing the coordinate tiles on each dimension in separate files
* Changed fragment name format from `__t1_t2_uuid` to `__t1_t2_uuid_<format_version>`. That was necessary for backwards compatibility

## New features

## Improvements
Expand Down
2 changes: 1 addition & 1 deletion test/src/unit-backwards_compat.cc
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ TEST_CASE(
break;
}
case TILEDB_UINT8: {
REQUIRE(static_cast<int8_t*>(buffer.second)[0] == 1);
REQUIRE(static_cast<uint8_t*>(buffer.second)[0] == 1);
break;
}
case TILEDB_INT16: {
Expand Down
5 changes: 2 additions & 3 deletions test/src/unit-capi-consolidation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3796,8 +3796,7 @@ TEST_CASE_METHOD(
TEST_CASE_METHOD(
ConsolidationFx,
"C API: Test advanced consolidation, consolidatable #1",
"[capi], [consolidation], [consolidation-adv], "
"[consolidation-adv-consolidatable-1]") {
"[capi][consolidation][adv][consolidatable-1]") {
remove_dense_vector();
create_dense_vector();
write_dense_vector_consolidatable_1();
Expand All @@ -3821,7 +3820,7 @@ TEST_CASE_METHOD(
REQUIRE(rc == TILEDB_OK);
REQUIRE(error == nullptr);
rc = tiledb_config_set(
config, "sm.consolidation.step_size_ratio", "0.6", &error);
config, "sm.consolidation.step_size_ratio", "0.7", &error);
REQUIRE(rc == TILEDB_OK);
REQUIRE(error == nullptr);

Expand Down
7 changes: 3 additions & 4 deletions test/src/unit-cppapi-array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ struct CPPArrayFx {
VFS vfs;
};

TEST_CASE("Config", "[cppapi]") {
TEST_CASE("Config", "[cppapi][config]") {
// Primarily to instansiate operator[]/= template
tiledb::Config cfg;
cfg["vfs.s3.region"] = "us-east-1a";
Expand All @@ -94,7 +94,7 @@ TEST_CASE("Config", "[cppapi]") {
CHECK((std::string)cfg["vfs.s3.use_virtual_addressing"] == "true");
}

TEST_CASE_METHOD(CPPArrayFx, "C++ API: Arrays", "[cppapi]") {
TEST_CASE_METHOD(CPPArrayFx, "C++ API: Arrays", "[cppapi][basic]") {
SECTION("Dimensions") {
ArraySchema schema(ctx, "cpp_unit_array");
CHECK(schema.domain().ndim() == 2);
Expand Down Expand Up @@ -337,8 +337,7 @@ TEST_CASE_METHOD(CPPArrayFx, "C++ API: Arrays", "[cppapi]") {
}

TEST_CASE(
"C++ API: Incorrect buffer size and offsets",
"[cppapi], [invalid-offsets]") {
"C++ API: Incorrect buffer size and offsets", "[cppapi][invalid-offsets]") {
const std::string array_name_1d = "cpp_unit_array_1d";
Context ctx;
VFS vfs(ctx);
Expand Down
4 changes: 2 additions & 2 deletions test/src/unit-cppapi-schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#include "catch.hpp"
#include "tiledb/sm/cpp_api/tiledb"

TEST_CASE("C++ API: Schema", "[cppapi]") {
TEST_CASE("C++ API: Schema", "[cppapi][schema]") {
using namespace tiledb;
Context ctx;

Expand Down Expand Up @@ -161,7 +161,7 @@ TEST_CASE("C++ API: Schema", "[cppapi]") {
}
}

TEST_CASE("C++ API: Test schema virtual destructors", "[cppapi]") {
TEST_CASE("C++ API: Test schema virtual destructors", "[cppapi][schema]") {
tiledb::Context ctx;
// Test that this generates no compiler warnings.
std::unique_ptr<tiledb::ArraySchema> schema;
Expand Down
198 changes: 98 additions & 100 deletions tiledb/sm/array_schema/array_schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ ArraySchema::ArraySchema(const ArraySchema* array_schema) {

capacity_ = array_schema->capacity_;
cell_order_ = array_schema->cell_order_;
cell_sizes_ = array_schema->cell_sizes_;
cell_var_offsets_filters_ = array_schema->cell_var_offsets_filters_;
coords_filters_ = array_schema->coords_filters_;
coords_size_ = array_schema->coords_size_;
Expand All @@ -98,17 +97,11 @@ ArraySchema::ArraySchema(const ArraySchema* array_schema) {

set_domain(array_schema->domain_);

attribute_map_.clear();
for (auto attr : array_schema->attributes_) {
if (attr->name() != constants::key_attr_name)
add_attribute(attr, false);
}
for (const auto& attr : attributes_)
attribute_map_[attr->name()] = attr;
auto dim_num = array_schema->dim_num();
for (unsigned d = 0; d < dim_num; ++d) {
auto dim = dimension(d);
dim_map_[dim->name()] = dim;
}
}

ArraySchema::~ArraySchema() {
Expand All @@ -133,16 +126,10 @@ const Attribute* ArraySchema::attribute(unsigned int id) const {
return nullptr;
}

const Attribute* ArraySchema::attribute(std::string name) const {
bool anonymous = name.empty();
unsigned int nattr = attribute_num();
for (unsigned int i = 0; i < nattr; i++) {
auto attr = attribute(i);
if ((attr->name() == name) || (anonymous && attr->is_anonymous())) {
return attr;
}
}
return nullptr;
const Attribute* ArraySchema::attribute(const std::string& name) const {
auto it =
attribute_map_.find(name.empty() ? constants::default_attr_name : name);
return it == attribute_map_.end() ? nullptr : it->second;
}

Status ArraySchema::attribute_name_normalized(
Expand Down Expand Up @@ -188,16 +175,45 @@ Layout ArraySchema::cell_order() const {
return cell_order_;
}

uint64_t ArraySchema::cell_size(const std::string& attribute) const {
auto cell_size_it = cell_sizes_.find(attribute);
assert(cell_size_it != cell_sizes_.end());
return cell_size_it->second;
uint64_t ArraySchema::cell_size(const std::string& name) const {
// Special zipped coordinates
if (name == constants::coords)
return domain_->dim_num() * datatype_size(coords_type());

// Attribute
auto attr_it = attribute_map_.find(name);
if (attr_it != attribute_map_.end()) {
auto attr = attr_it->second;
auto cell_val_num = attr->cell_val_num();
return (cell_val_num == constants::var_num) ?
constants::var_size :
cell_val_num * datatype_size(attr->type());
}

// Dimension
auto dim_it = dim_map_.find(name);
assert(dim_it != dim_map_.end());
auto dim = dim_it->second;
auto cell_val_num = dim->cell_val_num();
return (cell_val_num == constants::var_num) ?
constants::var_size :
cell_val_num * datatype_size(dim->type());
}

unsigned int ArraySchema::cell_val_num(const std::string& attribute) const {
auto it = attribute_map_.find(attribute);
assert(it != attribute_map_.end());
return it->second->cell_val_num();
unsigned int ArraySchema::cell_val_num(const std::string& name) const {
// Special zipped coordinates
if (name == constants::coords)
return 1;

// Attribute
auto attr_it = attribute_map_.find(name);
if (attr_it != attribute_map_.end())
return attr_it->second->cell_val_num();

// Dimension
auto dim_it = dim_map_.find(name);
assert(dim_it != dim_map_.end());
return dim_it->second->cell_val_num();
}

const FilterPipeline* ArraySchema::cell_var_offsets_filters() const {
Expand Down Expand Up @@ -253,16 +269,20 @@ Status ArraySchema::check_attributes(
return Status::Ok();
}

const FilterPipeline* ArraySchema::filters(const std::string& attribute) const {
auto it = attribute_map_.find(attribute);
if (it == attribute_map_.end()) {
if (attribute == constants::coords)
return coords_filters();
assert(false); // This should never happen
return nullptr; // Return something ad hoc
}
const FilterPipeline* ArraySchema::filters(const std::string& name) const {
if (name == constants::coords)
return coords_filters();

// Attribute
auto attr_it = attribute_map_.find(name);
if (attr_it != attribute_map_.end())
return attr_it->second->filters();

return it->second->filters();
// Dimension (if filters not set, return default coordinate filters)
auto dim_it = dim_map_.find(name);
assert(dim_it != dim_map_.end());
auto ret = dim_it->second->filters();
return (ret != nullptr) ? ret : coords_filters();
}

const FilterPipeline* ArraySchema::coords_filters() const {
Expand Down Expand Up @@ -297,15 +317,8 @@ const Dimension* ArraySchema::dimension(unsigned int i) const {
}

const Dimension* ArraySchema::dimension(const std::string& name) const {
bool anonymous = name.empty();
auto dim_num = this->dim_num();
for (unsigned d = 0; d < dim_num; ++d) {
auto dim = this->dimension(d);
if ((dim->name() == name) || (anonymous && dim->is_anonymous())) {
return dim;
}
}
return nullptr;
auto it = dim_map_.find(name.empty() ? constants::default_dim_name : name);
return it == dim_map_.end() ? nullptr : it->second;
}

unsigned int ArraySchema::dim_num() const {
Expand Down Expand Up @@ -352,6 +365,14 @@ Status ArraySchema::has_attribute(
return Status::Ok();
}

bool ArraySchema::is_attr(const std::string& name) const {
return this->attribute(name) != nullptr;
}

bool ArraySchema::is_dim(const std::string& name) const {
return this->dimension(name) != nullptr;
}

// ===== FORMAT =====
// version (uint32_t)
// array_type (uint8_t)
Expand Down Expand Up @@ -404,26 +425,20 @@ Layout ArraySchema::tile_order() const {
return tile_order_;
}

Datatype ArraySchema::type(unsigned int i) const {
auto attribute_num = attributes_.size();
if (i > attribute_num) {
LOG_ERROR("Cannot retrieve type; Invalid attribute id");
assert(false);
}
if (i < attribute_num)
return attributes_[i]->type();
return domain_->type();
}
Datatype ArraySchema::type(const std::string& name) const {
// Special zipped coordinates
if (name == constants::coords)
return domain_->type();

Datatype ArraySchema::type(const std::string& attribute) const {
auto it = attribute_map_.find(attribute);
if (it == attribute_map_.end()) {
if (attribute == constants::coords)
return domain_->type();
assert(false); // This should never happen
return Datatype::INT8; // Return something ad hoc
}
return it->second->type();
// Attribute
auto attr_it = attribute_map_.find(name);
if (attr_it != attribute_map_.end())
return attr_it->second->type();

// Dimension
auto dim_it = dim_map_.find(name);
assert(dim_it != dim_map_.end());
return dim_it->second->type();
}

bool ArraySchema::var_size(const std::string& name) const {
Expand Down Expand Up @@ -474,7 +489,10 @@ Status ArraySchema::add_attribute(const Attribute* attr, bool check_special) {
} else {
new_attr = new Attribute(attr);
}

attributes_.emplace_back(new_attr);
attribute_map_[new_attr->name()] = new_attr;

return Status::Ok();
}

Expand Down Expand Up @@ -530,6 +548,12 @@ Status ArraySchema::deserialize(ConstBuffer* buff) {
auto attr = new Attribute();
RETURN_NOT_OK_ELSE(attr->deserialize(buff), delete attr);
attributes_.emplace_back(attr);
attribute_map_[attr->name()] = attr;
}
auto dim_num = domain()->dim_num();
for (unsigned d = 0; d < dim_num; ++d) {
auto dim = dimension(d);
dim_map_[dim->name()] = dim;
}

// Initialize the rest of the object members
Expand All @@ -550,21 +574,9 @@ Status ArraySchema::init() {
// Initialize domain
RETURN_NOT_OK(domain_->init(cell_order_, tile_order_));

attribute_map_.clear();
for (const auto& attr : attributes_)
attribute_map_[attr->name()] = attr;
dim_map_.clear();
auto dim_num = domain_->dim_num();
for (unsigned d = 0; d < dim_num; ++d) {
auto dim = dimension(d);
dim_map_[dim->name()] = dim;
}

// Set cell sizes
for (auto& attr : attributes_)
cell_sizes_[attr->name()] = compute_cell_size(attr->name());
cell_sizes_[constants::coords] = compute_cell_size(constants::coords);
coords_size_ = dim_num * datatype_size(coords_type());
// TODO: set upon setting domain
coords_size_ = domain_->dim_num() * datatype_size(coords_type());

// Success
return Status::Ok();
Expand Down Expand Up @@ -618,6 +630,15 @@ Status ArraySchema::set_domain(Domain* domain) {
filter->set_compressor(constants::real_coords_compression);
filter->set_compression_level(-1);
}

// Create dimension map
dim_map_.clear();
auto dim_num = domain_->dim_num();
for (unsigned d = 0; d < dim_num; ++d) {
auto dim = dimension(d);
dim_map_[dim->name()] = dim;
}

return Status::Ok();
}

Expand Down Expand Up @@ -676,28 +697,5 @@ void ArraySchema::clear() {
domain_ = nullptr;
}

uint64_t ArraySchema::compute_cell_size(const std::string& attribute) const {
// Handle coordinates first
if (attribute == constants::coords) {
auto dim_num = domain_->dim_num();
auto type = coords_type();
return dim_num * datatype_size(type);
}

// Handle attributes
auto attr_it = attribute_map_.find(attribute);
assert(attr_it != attribute_map_.end());
auto attr = attr_it->second;

// For easy reference
auto cell_val_num = attr->cell_val_num();
auto type = attr->type();

// Variable-sized cell
return (cell_val_num == constants::var_num) ?
constants::var_size :
cell_val_num * datatype_size(type);
}

} // namespace sm
} // namespace tiledb
Loading