From c15874e7515e231d8b25ddca80b49f68dc87547a Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 6 Apr 2023 11:17:53 -0500 Subject: [PATCH] Add Enumerated Data Types TODO: Write a better commit message --- CMakeLists.txt | 3 +- PJD_TODO.md | 27 ++ test/CMakeLists.txt | 1 + test/src/unit-cppapi-deletes.cc | 36 +- test/src/unit-cppapi-enumerations.cc | 423 +++++++++++++++++ .../src/array_schema_params_generator.h | 176 +++++++ tiledb/CMakeLists.txt | 4 + tiledb/api/c_api/CMakeLists.txt | 3 + .../api/c_api/dimension_label/CMakeLists.txt | 1 + .../c_api/dimension_label/test/CMakeLists.txt | 1 + tiledb/api/c_api/enumeration/CMakeLists.txt | 40 ++ .../api/c_api/enumeration/enumeration_api.cc | 204 +++++++++ .../enumeration_api_experimental.h | 230 ++++++++++ .../enumeration/enumeration_api_internal.h | 124 +++++ .../api/c_api/enumeration/test/CMakeLists.txt | 32 ++ .../compile_capi_enumeration_stub_main.cc | 36 ++ .../enumeration/test/unit_capi_enumeration.cc | 430 ++++++++++++++++++ tiledb/sm/array/array.cc | 26 ++ tiledb/sm/array/array.h | 5 + tiledb/sm/array/array_directory.cc | 41 +- tiledb/sm/array/array_directory.h | 12 + tiledb/sm/array/test/CMakeLists.txt | 2 +- tiledb/sm/array_schema/CMakeLists.txt | 25 +- tiledb/sm/array_schema/array_schema.cc | 95 +++- tiledb/sm/array_schema/array_schema.h | 52 +++ .../sm/array_schema/array_schema_evolution.cc | 98 ++-- .../sm/array_schema/array_schema_evolution.h | 36 +- tiledb/sm/array_schema/attribute.cc | 36 +- tiledb/sm/array_schema/attribute.h | 18 +- tiledb/sm/array_schema/enumeration.cc | 222 +++++++++ tiledb/sm/array_schema/enumeration.h | 229 ++++++++++ .../test/compile_enumeration_main.cc | 36 ++ tiledb/sm/c_api/tiledb.cc | 97 +++- tiledb/sm/c_api/tiledb_experimental.h | 86 ++++ tiledb/sm/cpp_api/array_experimental.h | 56 +++ tiledb/sm/cpp_api/array_schema_experimental.h | 21 + tiledb/sm/cpp_api/attribute_experimental.h | 67 +++ tiledb/sm/cpp_api/deleter.h | 4 + tiledb/sm/cpp_api/enumeration_experimental.h | 257 +++++++++++ tiledb/sm/cpp_api/tiledb_experimental | 3 + tiledb/sm/enums/layout.h | 4 +- tiledb/sm/misc/constants.cc | 8 + tiledb/sm/misc/constants.h | 9 + tiledb/sm/query/ast/CMakeLists.txt | 2 +- tiledb/sm/query/ast/query_ast.cc | 104 +++++ tiledb/sm/query/ast/query_ast.h | 138 +++++- tiledb/sm/query/query.cc | 26 ++ tiledb/sm/query/query_condition.cc | 24 + tiledb/sm/query/query_condition.h | 29 ++ tiledb/sm/storage_manager/storage_manager.cc | 39 +- tiledb/sm/tile/CMakeLists.txt | 7 +- tiledb/storage_format/uri/CMakeLists.txt | 2 +- 52 files changed, 3596 insertions(+), 91 deletions(-) create mode 100644 PJD_TODO.md create mode 100644 test/src/unit-cppapi-enumerations.cc create mode 100644 test/support/src/array_schema_params_generator.h create mode 100644 tiledb/api/c_api/enumeration/CMakeLists.txt create mode 100644 tiledb/api/c_api/enumeration/enumeration_api.cc create mode 100644 tiledb/api/c_api/enumeration/enumeration_api_experimental.h create mode 100644 tiledb/api/c_api/enumeration/enumeration_api_internal.h create mode 100644 tiledb/api/c_api/enumeration/test/CMakeLists.txt create mode 100644 tiledb/api/c_api/enumeration/test/compile_capi_enumeration_stub_main.cc create mode 100644 tiledb/api/c_api/enumeration/test/unit_capi_enumeration.cc create mode 100644 tiledb/sm/array_schema/enumeration.cc create mode 100644 tiledb/sm/array_schema/enumeration.h create mode 100644 tiledb/sm/array_schema/test/compile_enumeration_main.cc create mode 100644 tiledb/sm/cpp_api/array_experimental.h create mode 100644 tiledb/sm/cpp_api/attribute_experimental.h create mode 100644 tiledb/sm/cpp_api/enumeration_experimental.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 3910e00ef685..b3db432f501f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -219,7 +219,8 @@ else() elseif (CMAKE_BUILD_TYPE MATCHES "RelWithDebInfo") add_compile_options(-DNDEBUG -O3 -g3 -ggdb3 -gdwarf-3) elseif (CMAKE_BUILD_TYPE MATCHES "Coverage") - add_compile_options(-DDEBUG -g3 -gdwarf-3 --coverage) + add_compile_options(-DDEBUG --coverage -fprofile-instr-generate -fcoverage-mapping) + add_link_options(--coverage -fprofile-instr-generate -fcoverage-mapping) endif() # Use -Wno-literal-suffix on Linux with C++ sources. diff --git a/PJD_TODO.md b/PJD_TODO.md new file mode 100644 index 000000000000..3fa1412290d8 --- /dev/null +++ b/PJD_TODO.md @@ -0,0 +1,27 @@ +Chores Left +=== + +* Testing + * Update C API unit tests to cover 100% of the Enumeration constructor + * Throw error if enumeration has longer length than the integer width of the attribute + * Require the attribute type to be integral when setting an enumeration + * Don't allow signed integer values for attributes with enumeratiojns? Does R need this? + * Require cell_val_num == 1 for attributes + + * Errors (de?)serializing to disk + +* Miscellany + * Add Enumeration::dump(FILE* out) + * Update schema consolidation and vaccuming for enumerations + * When serializing array schema, check that any enumerations set on attributes actually exist + * Array::get_enumeration - might have to check all load schema versions? + * Do loaded enumerations need serialized? + * QueryAstNode rewrite_enumeration_condition - Need to adjust bit widths + * Document enumeration format changes in the storage format docs + +* REST Serialization ToDOs + * Enumerations initial implementation + * Add ASTNode::use_enumeration to serialization code - If not, all remote queries are against enumeration values + * Add attribute enumeration name + + diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 6930e294ac4c..f8580591595c 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -216,6 +216,7 @@ if (TILEDB_CPP_API) src/unit-cppapi-datetimes.cc src/unit-cppapi-deletes.cc src/unit-cppapi-dense-qc-coords-mode.cc + src/unit-cppapi-enumerations.cc src/unit-cppapi-time.cc src/unit-cppapi-fill_values.cc src/unit-cppapi-filter.cc diff --git a/test/src/unit-cppapi-deletes.cc b/test/src/unit-cppapi-deletes.cc index f7a941d771ac..0a5f61d0df23 100644 --- a/test/src/unit-cppapi-deletes.cc +++ b/test/src/unit-cppapi-deletes.cc @@ -109,6 +109,7 @@ struct DeletesFx { void remove_sparse_array(); void remove_array(const std::string& array_name); bool is_array(const std::string& array_name); + std::vector list_schemas(const std::string& array_name); }; DeletesFx::DeletesFx() @@ -488,6 +489,29 @@ bool DeletesFx::is_array(const std::string& array_name) { return vfs_.is_dir(array_name); } +std::vector DeletesFx::list_schemas( + const std::string& array_name) { + auto& enum_dir = tiledb::sm::constants::array_enumerations_dir_name; + auto schemas = + vfs_.ls(array_name + "/" + tiledb::sm::constants::array_schema_dir_name); + + auto it = schemas.begin(); + while (it != schemas.end()) { + if ((*it).size() < enum_dir.size()) { + continue; + } + if ((*it).substr((*it).size() - enum_dir.size()) == enum_dir) { + break; + } + ++it; + } + if (it != schemas.end()) { + schemas.erase(it); + } + + return schemas; +} + TEST_CASE_METHOD( DeletesFx, "CPP API: Test writing delete condition", @@ -1949,8 +1973,7 @@ TEST_CASE_METHOD( // Check write CHECK(tiledb::test::num_commits(SPARSE_ARRAY_NAME) == 4); CHECK(tiledb::test::num_fragments(SPARSE_ARRAY_NAME) == 4); - auto schemas = - vfs_.ls(array_name + "/" + tiledb::sm::constants::array_schema_dir_name); + auto schemas = list_schemas(array_name); CHECK(schemas.size() == 1); auto meta = vfs_.ls( array_name + "/" + tiledb::sm::constants::array_metadata_dir_name); @@ -1984,8 +2007,7 @@ TEST_CASE_METHOD( // Check working directory after delete REQUIRE(vfs_.is_file(extraneous_file_path)); CHECK(tiledb::test::num_fragments(SPARSE_ARRAY_NAME) == 0); - schemas = - vfs_.ls(array_name + "/" + tiledb::sm::constants::array_schema_dir_name); + schemas = list_schemas(array_name); CHECK(schemas.size() == 0); meta = vfs_.ls( array_name + "/" + tiledb::sm::constants::array_metadata_dir_name); @@ -2042,8 +2064,7 @@ TEST_CASE_METHOD( vfs_.touch(extraneous_file_path); // Check write - auto schemas = - vfs_.ls(array_name + "/" + tiledb::sm::constants::array_schema_dir_name); + auto schemas = list_schemas(array_name); CHECK(schemas.size() == 1); auto uris = vfs_.ls(array_name); bool ok_exists = false; @@ -2072,8 +2093,7 @@ TEST_CASE_METHOD( CHECK(!tiledb::sm::utils::parse::starts_with(uri, ok_prefix)); } REQUIRE(vfs_.is_file(extraneous_file_path)); - schemas = - vfs_.ls(array_name + "/" + tiledb::sm::constants::array_schema_dir_name); + schemas = list_schemas(array_name); CHECK(schemas.size() == 0); remove_sparse_array(); diff --git a/test/src/unit-cppapi-enumerations.cc b/test/src/unit-cppapi-enumerations.cc new file mode 100644 index 000000000000..759c608812d5 --- /dev/null +++ b/test/src/unit-cppapi-enumerations.cc @@ -0,0 +1,423 @@ +/** + * @file unit-cppapi-enumerations.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2023 TileDB Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * Tests the C++ API for enumeration related functions. + */ + +#include + +#include +//#include "test/support/src/array_schema_params_generator.h" +#include "test/support/src/helpers.h" +#include "tiledb/sm/cpp_api/tiledb" +#include "tiledb/sm/cpp_api/tiledb_experimental" + +using namespace tiledb; + +struct EnumerationFx { + EnumerationFx(); + ~EnumerationFx(); + + template + void check_enumeration( + Enumeration& enmr, + std::vector values, + tiledb_datatype_t data_type, + uint32_t cell_val_num, + bool ordered); + + void rm_array(); + + std::string uri_ = "enumeration_test_array"; + Context ctx_; + VFS vfs_; +}; + +TEST_CASE_METHOD( + EnumerationFx, + "Basic Fixed Size Enumeration Creation", + "[enumeration][basic][fixed]") { + std::vector values = {1, 2, 3, 4, 5}; + auto e = Enumeration::create(ctx_, values); + check_enumeration(e, values, TILEDB_UINT32, 1, false); +} + +TEST_CASE_METHOD( + EnumerationFx, + "Basic Variable Size Enumeration Creation", + "[enumeration][basic][fixed]") { + std::vector values = {"foo", "bar", "baz", "bingo", "bango"}; + auto e = Enumeration::create(ctx_, values); + check_enumeration(e, values, TILEDB_STRING_ASCII, TILEDB_VAR_NUM, false); +} + +TEST_CASE_METHOD( + EnumerationFx, + "Enumeration Creation with Ordered", + "[enumeration][basic][fixed]") { + std::vector values = {"foo", "bar", "baz", "bingo", "bango"}; + auto e = Enumeration::create(ctx_, values, true); + check_enumeration(e, values, TILEDB_STRING_ASCII, TILEDB_VAR_NUM, true); +} + +TEST_CASE_METHOD( + EnumerationFx, + "Enumeration Creation with Datatype", + "[enumeration][basic][fixed]") { + std::vector values = {"foo", "bar", "baz", "bingo", "bango"}; + auto e = Enumeration::create(ctx_, values, false, TILEDB_STRING_UTF8); + check_enumeration(e, values, TILEDB_STRING_UTF8, TILEDB_VAR_NUM, false); +} + +TEST_CASE_METHOD( + EnumerationFx, + "Enumeration Creation with Multi-Cell Val Num", + "[enumeration][basic][fixed]") { + std::vector values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + auto e = Enumeration::create( + ctx_, + TILEDB_INT32, + 2, + false, + values.data(), + values.size() * sizeof(int), + nullptr, + 0); + check_enumeration(e, values, TILEDB_INT32, 2, false); +} + +TEST_CASE_METHOD( + EnumerationFx, + "Enumeration Creation Error - Invalid cell val num", + "[enumeration][error][invalid-cell-val-num]") { + std::vector values = {1, 2, 3}; + REQUIRE_THROWS(Enumeration::create( + ctx_, + TILEDB_INT32, + 0, + false, + values.data(), + values.size() * sizeof(int), + nullptr, + 0)); +} + +TEST_CASE_METHOD( + EnumerationFx, + "Enumeration Creation Error - No data pointer", + "[enumeration][error][data-nullptr]") { + std::vector values = {1, 2, 3}; + REQUIRE_THROWS(Enumeration::create( + ctx_, + TILEDB_INT32, + 1, + false, + nullptr, + values.size() * sizeof(int), + nullptr, + 0)); +} + +TEST_CASE_METHOD( + EnumerationFx, + "Enumeration Creation Error - Zero data size", + "[enumeration][error][data-zero-size]") { + std::vector values = {1, 2, 3}; + REQUIRE_THROWS(Enumeration::create( + ctx_, TILEDB_INT32, 1, false, values.data(), 0, nullptr, 0)); +} + +TEST_CASE_METHOD( + EnumerationFx, + "Enumeration Creation Error - No offsets pointer", + "[enumeration][error][offsets-nullptr]") { + auto data = "foobarbazbam"; + std::vector offsets = {0, 3, 6, 9}; + REQUIRE_THROWS(Enumeration::create( + ctx_, + TILEDB_STRING_ASCII, + TILEDB_VAR_NUM, + false, + data, + strlen(data), + nullptr, + offsets.size() * sizeof(uint64_t))); +} + +TEST_CASE_METHOD( + EnumerationFx, + "Enumeration Creation Error - No offsets size", + "[enumeration][error][offsets-zero-size]") { + auto data = "foobarbazbam"; + std::vector offsets = {0, 3, 6, 9}; + REQUIRE_THROWS(Enumeration::create( + ctx_, + TILEDB_STRING_ASCII, + TILEDB_VAR_NUM, + false, + data, + strlen(data), + offsets.data(), + 0)); +} + +TEST_CASE_METHOD( + EnumerationFx, + "Enumeration Creation Error - Offsets not required, pointer provided", + "[enumeration][error][offsets-not-required]") { + std::vector values = {0, 1, 2, 3, 4}; + std::vector offsets = {0, 3, 6, 9}; + REQUIRE_THROWS(Enumeration::create( + ctx_, + TILEDB_INT32, + 1, + false, + values.data(), + values.size() * sizeof(int), + offsets.data(), + 0)); +} + +TEST_CASE_METHOD( + EnumerationFx, + "Enumeration Creation Error - Offsets not required, size provided", + "[enumeration][error][offsets-not-required]") { + std::vector values = {0, 1, 2, 3, 4}; + REQUIRE_THROWS(Enumeration::create( + ctx_, + TILEDB_INT32, + 1, + false, + values.data(), + values.size() * sizeof(int), + nullptr, + 100)); +} + +TEST_CASE_METHOD( + EnumerationFx, + "Enumeration Creation Error - Invalid offsests size provided", + "[enumeration][error][offsets-invalid-size]") { + auto data = "foobarbazbam"; + std::vector offsets = {0, 3, 6, 9}; + // Passing 3 for the offsets size is incorrect because the offsets size has + // to be a multiple of `sizeof(uint64_t)` + REQUIRE_THROWS(Enumeration::create( + ctx_, + TILEDB_STRING_ASCII, + TILEDB_VAR_NUM, + false, + data, + strlen(data), + offsets.data(), + 3)); +} + +TEST_CASE_METHOD( + EnumerationFx, + "Enumeration Creation Error - Offsets to data beyond provided data size", + "[enumeration][error][invalid-offset-data]") { + auto data = "foobarbazbam"; + std::vector offsets = {0, 3, 6, 100}; + // Passing 3 for the offsets size is incorrect because the offsets size has + // to be a multiple of `sizeof(uint64_t)` + REQUIRE_THROWS(Enumeration::create( + ctx_, + TILEDB_STRING_ASCII, + TILEDB_VAR_NUM, + false, + data, + strlen(data), + offsets.data(), + offsets.size() * sizeof(uint64_t))); +} + +TEST_CASE_METHOD( + EnumerationFx, + "Enumeration Creation Error - Invalid data size", + "[enumeration][error][invalid-data-size]") { + std::vector values = {1, 2, 3, 4, 5}; + // Passing 3 for the data size is invalid as its not a multiple of + // sizeof(int) + REQUIRE_THROWS(Enumeration::create( + ctx_, TILEDB_INT32, 1, false, values.data(), 3, nullptr, 0)); +} + +TEST_CASE_METHOD( + EnumerationFx, + "Enumeration Creation Error - Repeated fixed sized values", + "[enumeration][error][repeated-values]") { + std::vector values = {1, 2, 3, 3, 4, 5}; + REQUIRE_THROWS(Enumeration::create(ctx_, values)); +} + +TEST_CASE_METHOD( + EnumerationFx, + "Enumeration Creation Error - Repeated variable sized values", + "[enumeration][error][repeated-values]") { + std::vector values = {"foo", "bar", "bang", "bar"}; + REQUIRE_THROWS(Enumeration::create(ctx_, values)); +} + +/* ********************************* */ +/* TEST SUPPORT CODE */ +/* ********************************* */ + +EnumerationFx::EnumerationFx() + : vfs_(ctx_) { + rm_array(); +} + +EnumerationFx::~EnumerationFx() { + rm_array(); +} + +template +void EnumerationFx::check_enumeration( + Enumeration& enmr, + std::vector values, + tiledb_datatype_t data_type, + uint32_t cell_val_num, + bool ordered) { + REQUIRE(enmr.type() == data_type); + REQUIRE(enmr.cell_val_num() == cell_val_num); + REQUIRE(enmr.ordered() == ordered); + + std::vector data; + enmr.into_vector(data); + REQUIRE(data == values); +} + +void EnumerationFx::rm_array() { + if (vfs_.is_dir(uri_)) { + vfs_.remove_dir(uri_); + } +} + +/* + + +void create_array(TestData& td); +void write_array( + TestData& td, std::vector& dim_vals, std::vector& attr_vals); +void read_array( + TestData& td, + QueryCondition* qc, + std::vector& dim_vals, + std::vector& attr_vals); + +TEST_CASE( + "C++ API: Create Array with an Enumeration", "[cppapi][enumeration]") { + TestData td; + + std::vector dim_vals = {0, 1, 2, 3, 4}; + std::vector attr_vals = {3, 2, 4, 0, 1}; + REQUIRE_NOTHROW(create_array(td)); + REQUIRE_NOTHROW(write_array(td, dim_vals, attr_vals)); + + std::vector dim_read; + std::vector attr_read; + REQUIRE_NOTHROW(read_array(td, nullptr, dim_read, attr_read)); + REQUIRE(dim_read.size() == 5); + + std::vector expect = {"foo", "bar", "baz", "bingo", "bango"}; + + Array array(td.ctx_, td.uri_, TILEDB_READ); + Enumeration enmr = ArrayExperimental::get_enumeration(td.ctx_, array, "a"); + std::vector enmr_values; + enmr.into_vector(enmr_values); + REQUIRE(enmr_values == expect); + + QueryCondition qc(td.ctx_); + qc.init("a", "baz", TILEDB_EQ); + REQUIRE_NOTHROW(read_array(td, &qc, dim_read, attr_read)); + REQUIRE(dim_read.size() == 1); + REQUIRE(dim_read[0] == 1); + REQUIRE(attr_read.size() == 1); + REQUIRE(attr_read[0] == 2); +} + +void create_array(TestData& td) { + std::vector values = {"foo", "bar", "baz", "bingo", "bango"}; + auto enmr = Enumeration::create(td.ctx_, values); + auto attr = Attribute::create(td.ctx_, "a"); + auto dim = Dimension::create(td.ctx_, "d", {{0, 100}}, 10); + Domain dom(td.ctx_); + dom.add_dimension(dim); + + ArraySchema schema(td.ctx_, TILEDB_SPARSE); + schema.set_domain(dom); + + ArraySchemaExperimental::add_attribute(td.ctx_, schema, attr, enmr); + + Array::create(td.uri_, schema); +} + +void write_array( + TestData& td, std::vector& dim_vals, std::vector& attr_vals) { + Array array(td.ctx_, td.uri_, TILEDB_WRITE); + Query query(td.ctx_, array); + query.set_data_buffer("d", dim_vals) + .set_data_buffer("a", attr_vals) + .set_layout(TILEDB_UNORDERED); + REQUIRE(query.submit() == Query::Status::COMPLETE); + array.close(); +} + +void read_array( + TestData& td, + QueryCondition* qc, + std::vector& dim_vals, + std::vector& attr_vals) { + dim_vals.clear(); + dim_vals.resize(100); + + attr_vals.clear(); + attr_vals.resize(100); + + Array array(td.ctx_, td.uri_, TILEDB_READ); + Subarray subarray(td.ctx_, array); + subarray.add_range("d", 0, 100); + Query query(td.ctx_, array); + query.set_subarray(subarray) + .set_layout(TILEDB_ROW_MAJOR) + .set_data_buffer("d", dim_vals) + .set_data_buffer("a", attr_vals); + if (qc != nullptr) { + query.set_condition(*qc); + } + REQUIRE(query.submit() == Query::Status::COMPLETE); + + auto results = query.result_buffer_elements(); + dim_vals.resize(results["d"].second); + attr_vals.resize(results["a"].second); + + array.close(); +} +*/ diff --git a/test/support/src/array_schema_params_generator.h b/test/support/src/array_schema_params_generator.h new file mode 100644 index 000000000000..689a76d1eb9e --- /dev/null +++ b/test/support/src/array_schema_params_generator.h @@ -0,0 +1,176 @@ +/** + * @file array_schema_params_generator.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2023 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file declares some test suite helper functions specific to generating + * array schema params structures. + */ + +#ifndef TILEDB_ARRAY_SCHEMA_PARAMS_GENERATOR_H +#define TILEDB_ARRAY_SCHEMA_PARAMS_GENERATOR_H + +#include + +#include "test/support/tdb_catch.h" +#include "tiledb/sm/cpp_api/tiledb" +#include "tiledb/sm/cpp_api/tiledb_experimental" + +struct ArraySchemaParams { + ArraySchemaParams( + tiledb_array_type_t array_type, + tiledb_encryption_type_t enc_type, + tiledb_layout_t tile_order, + tiledb_layout_t cell_order, + uint64_t capacity, + bool allow_dups) + : array_type_(array_type) + , enc_type_(enc_type) + , tile_order_(tile_order) + , cell_order_(cell_order) + , capacity_(capacity) + , allow_dups_(allow_dups) { + } + + bool is_valid() { + // Allow dups is invalid for dense arrays + if (array_type_ == TILEDB_DENSE && allow_dups_) { + return false; + } + + return true; + } + + std::string stringify() const { + std::stringstream ss; + + ss << "ArraySchemaParams(" << array_type_ << ", " << enc_type_ << ", " + << tile_order_ << ", " << cell_order_ << ", " << capacity_ << ", " + << allow_dups_ << ")"; + + return ss.str(); + } + + tiledb_array_type_t array_type_; + tiledb_encryption_type_t enc_type_; + tiledb_layout_t tile_order_; + tiledb_layout_t cell_order_; + uint64_t capacity_; + bool allow_dups_; +}; + +inline std::ostream& operator<<( + std::ostream& os, const ArraySchemaParams& params) { + os << params.stringify(); + return os; +} + +namespace { + +class ArraySchemaParamsGenerator + : public Catch::Generators::IGenerator { + public: + ArraySchemaParamsGenerator( + std::vector array_types = + {TILEDB_DENSE, TILEDB_SPARSE}, + std::vector tile_orders = + {TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR, TILEDB_HILBERT}, + std::vector cell_orders = + {TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR}, + std::vector capacities = {10, 100, 10000}, + std::vector allow_dups_vals = {true, false}, + std::vector enc_types = { + TILEDB_NO_ENCRYPTION, TILEDB_AES_256_GCM}); + + bool next() override; + ArraySchemaParams const& get() const override; + std::string stringifyImpl() const override; + + private: + std::vector params_; + std::vector::iterator params_iter_; +}; + +ArraySchemaParamsGenerator::ArraySchemaParamsGenerator( + std::vector array_types, + std::vector tile_orders, + std::vector cell_orders, + std::vector capacities, + std::vector allow_dups_vals, + std::vector enc_types) { + for (auto atype : array_types) { + for (auto etype : enc_types) { + for (auto t_order : tile_orders) { + for (auto c_order : cell_orders) { + for (auto cap : capacities) { + for (auto dups : allow_dups_vals) { + params_.emplace_back(atype, etype, t_order, c_order, cap, dups); + if (!params_.back().is_valid()) { + params_.pop_back(); + } + } + } + } + } + } + } + + params_iter_ = params_.begin(); +} + +bool ArraySchemaParamsGenerator::next() { + // Check if we're already at the end + if (params_iter_ == params_.end()) { + return false; + } + + ++params_iter_; + + // Check if we just arrived at the end. + if (params_iter_ == params_.end()) { + return false; + } + + return true; +} + +ArraySchemaParams const& ArraySchemaParamsGenerator::get() const { + return *params_iter_; +} + +std::string ArraySchemaParamsGenerator::stringifyImpl() const { + return "ArraySchemaParamsGenerator<" + params_iter_->stringify() + ">"; +} + +Catch::Generators::GeneratorWrapper array_schema_params() { + return Catch::Generators::GeneratorWrapper( + new ArraySchemaParamsGenerator()); +} + +} // namespace + +#endif // Included array_schema_params_generator.h diff --git a/tiledb/CMakeLists.txt b/tiledb/CMakeLists.txt index 37a9fdaab9f5..5226f5881b69 100644 --- a/tiledb/CMakeLists.txt +++ b/tiledb/CMakeLists.txt @@ -84,10 +84,12 @@ if (TILEDB_CPP_API) ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/cpp_api/tiledb_experimental ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/cpp_api/array_deprecated.h ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/cpp_api/array.h + ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/cpp_api/array_experimental.h ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/cpp_api/array_schema.h ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/cpp_api/array_schema_evolution.h ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/cpp_api/array_schema_experimental.h ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/cpp_api/attribute.h + ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/cpp_api/attribute_experimental.h ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/cpp_api/config.h ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/cpp_api/consolidation_plan_experimental.h ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/cpp_api/context.h @@ -96,6 +98,7 @@ if (TILEDB_CPP_API) ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/cpp_api/dimension.h ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/cpp_api/dimension_label_experimental.h ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/cpp_api/domain.h + ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/cpp_api/enumeration_experimental.h ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/cpp_api/error.h ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/cpp_api/exception.h ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/cpp_api/filter.h @@ -147,6 +150,7 @@ set(TILEDB_CORE_SOURCES ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/array_schema/dimension.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/array_schema/dimension_label.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/array_schema/domain.cc + ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/array_schema/enumeration.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/buffer/buffer.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/buffer/buffer_list.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/c_api/api_argument_validator.cc diff --git a/tiledb/api/c_api/CMakeLists.txt b/tiledb/api/c_api/CMakeLists.txt index 655bc3402249..a1d2cdf7e30d 100644 --- a/tiledb/api/c_api/CMakeLists.txt +++ b/tiledb/api/c_api/CMakeLists.txt @@ -76,6 +76,9 @@ add_subdirectory(data_order) # `dimension_label`: depends on `context`, `datatype`, `data_order` add_subdirectory(dimension_label) +# `enumeration`: depends on `buffer`, `constants`, and `context` +add_subdirectory(enumeration) + # `filesystem`: no dependencies add_subdirectory(filesystem) diff --git a/tiledb/api/c_api/dimension_label/CMakeLists.txt b/tiledb/api/c_api/dimension_label/CMakeLists.txt index 9cfe7b01e707..e8f9d0d214ba 100644 --- a/tiledb/api/c_api/dimension_label/CMakeLists.txt +++ b/tiledb/api/c_api/dimension_label/CMakeLists.txt @@ -46,6 +46,7 @@ target_link_libraries(capi_dimension_label_stub PUBLIC array_schema $(type); + + bool is_ordered = false; + if (ordered != 0) { + is_ordered = true; + } + + try { + *enumeration = tiledb_enumeration_handle_t::make_handle( + datatype, + cell_val_num, + is_ordered, + data, + data_size, + offsets, + offsets_size); + } catch (...) { + *enumeration = nullptr; + throw; + } + + // Success + return TILEDB_OK; +} + +void tiledb_enumeration_free(tiledb_enumeration_t** enumeration) { + ensure_output_pointer_is_valid(enumeration); + ensure_enumeration_is_valid(*enumeration); + tiledb_enumeration_handle_t::break_handle(*enumeration); +} + +capi_return_t tiledb_enumeration_get_type( + tiledb_enumeration_t* enumeration, tiledb_datatype_t* type) { + ensure_enumeration_is_valid(enumeration); + ensure_output_pointer_is_valid(type); + *type = static_cast(enumeration->type()); + return TILEDB_OK; +} + +capi_return_t tiledb_enumeration_get_cell_val_num( + tiledb_enumeration_t* enumeration, uint32_t* cell_val_num) { + ensure_enumeration_is_valid(enumeration); + ensure_output_pointer_is_valid(cell_val_num); + *cell_val_num = enumeration->cell_val_num(); + return TILEDB_OK; +} + +capi_return_t tiledb_enumeration_get_ordered( + tiledb_enumeration_t* enumeration, int* ordered) { + ensure_enumeration_is_valid(enumeration); + ensure_output_pointer_is_valid(ordered); + *ordered = static_cast(enumeration->ordered()); + return TILEDB_OK; +} + +capi_return_t tiledb_enumeration_get_data( + tiledb_enumeration_t* enumeration, const void** data, uint64_t* data_size) { + ensure_enumeration_is_valid(enumeration); + ensure_output_pointer_is_valid(data); + ensure_output_pointer_is_valid(data_size); + auto [d, ds] = enumeration->data(); + *data = d; + *data_size = ds; + return TILEDB_OK; +} + +capi_return_t tiledb_enumeration_get_offsets( + tiledb_enumeration_t* enumeration, + const void** offsets, + uint64_t* offsets_size) { + ensure_enumeration_is_valid(enumeration); + ensure_output_pointer_is_valid(offsets); + ensure_output_pointer_is_valid(offsets_size); + auto [o, os] = enumeration->offsets(); + *offsets = o; + *offsets_size = os; + return TILEDB_OK; +} + +} // namespace tiledb::api + +using tiledb::api::api_entry_context; +using tiledb::api::api_entry_void; + +capi_return_t tiledb_enumeration_alloc( + tiledb_ctx_t* ctx, + tiledb_datatype_t type, + uint32_t cell_val_num, + int ordered, + const void* data, + uint64_t data_size, + const void* offsets, + uint64_t offsets_size, + tiledb_enumeration_t** enumeration) noexcept { + return api_entry_context( + ctx, + type, + cell_val_num, + ordered, + data, + data_size, + offsets, + offsets_size, + enumeration); +} + +void tiledb_enumeration_free(tiledb_enumeration_t** enumeration) noexcept { + return api_entry_void(enumeration); +} + +capi_return_t tiledb_enumeration_get_type( + tiledb_ctx_t* ctx, + tiledb_enumeration_t* enumeration, + tiledb_datatype_t* type) noexcept { + return api_entry_context( + ctx, enumeration, type); +} + +capi_return_t tiledb_enumeration_get_cell_val_num( + tiledb_ctx_t* ctx, + tiledb_enumeration_t* enumeration, + uint32_t* cell_val_num) noexcept { + return api_entry_context( + ctx, enumeration, cell_val_num); +} + +capi_return_t tiledb_enumeration_get_ordered( + tiledb_ctx_t* ctx, + tiledb_enumeration_t* enumeration, + int* ordered) noexcept { + return api_entry_context( + ctx, enumeration, ordered); +} + +capi_return_t tiledb_enumeration_get_data( + tiledb_ctx_t* ctx, + tiledb_enumeration_t* enumeration, + const void** data, + uint64_t* data_size) noexcept { + return api_entry_context( + ctx, enumeration, data, data_size); +} + +capi_return_t tiledb_enumeration_get_offsets( + tiledb_ctx_t* ctx, + tiledb_enumeration_t* enumeration, + const void** offsets, + uint64_t* offsets_size) noexcept { + return api_entry_context( + ctx, enumeration, offsets, offsets_size); +} diff --git a/tiledb/api/c_api/enumeration/enumeration_api_experimental.h b/tiledb/api/c_api/enumeration/enumeration_api_experimental.h new file mode 100644 index 000000000000..642cd4cf2489 --- /dev/null +++ b/tiledb/api/c_api/enumeration/enumeration_api_experimental.h @@ -0,0 +1,230 @@ +/** + * @file tiledb/api/c_api/enumeration/enumeration_api_experimental.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2023 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file declares the enumeration section of the C API for TileDB. + */ + +#ifndef TILEDB_CAPI_ENUMERATION_EXPERIMENTAL_H +#define TILEDB_CAPI_ENUMERATION_EXPERIMENTAL_H + +#include "../api_external_common.h" +#include "../context/context_api_external.h" +#include "../datatype/datatype_api_external.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** A TileDB dimension. */ +typedef struct tiledb_enumeration_handle_t tiledb_enumeration_t; + +/** + * Creates an Enumeration. + * + * **Example:** + * + * @code{.c} + * tiledb_enumeration_t* enumeration; + * void* data = get_data(); + * uint64_t data_size = get_data_size(); + * tiledb_enumeration_alloc( + * ctx, + * TILEDB_INT64, + * cell_val_num, + * FALSE, + * data, + * data_size, + * nullptr, + * 0, + * &enumeration); + * @endcode + * + * @param ctx The TileDB context. + * @param type The enumeration type. + * @param cell_val_num The number of values per enumeration value + * @param ordered Whether this enumeration should be considered as ordered + * @param data A pointer to the enumeration value data + * @param data_size The length of the data buffer provided + * @param offsets A pointer to the offsets buffer if cell_vall_num = UINT32_MAX + * @param offsets_size The length of the offsets buffer, zero if no offsets + * @param enumeration The newly allocated enumeration + * @return `TILEDB_OK` for success and `TILEDB_ERR` for error. + */ +TILEDB_EXPORT capi_return_t tiledb_enumeration_alloc( + tiledb_ctx_t* ctx, + tiledb_datatype_t type, + uint32_t cell_val_num, + int ordered, + const void* data, + uint64_t data_size, + const void* offsets, + uint64_t offsets_size, + tiledb_enumeration_t** enumeration) TILEDB_NOEXCEPT; + +/** + * Destroys a TileDB enumeration, freeing associated memory. + * + * **Example:** + * + * @code{.c} + * tiledb_enumeration_free(&enumeration); + * @endcode + * + * @param enumeration The enumeration to be destroyed. + */ +TILEDB_EXPORT void tiledb_enumeration_free(tiledb_enumeration_t** enumeration) + TILEDB_NOEXCEPT; + +/** + * Return the datatype of the enumeration values + * + * **Example:** + * + * @code{.c} + * tiledb_datatype_t type; + * tiledb_enumeration_get_type(ctx, enumeration, &type); + * @endcode + * + * @param ctx The TileDB context. + * @param enumeration The enumeration + * @param type The data type of the enumeration + * @return `TILEDB_OK` or `TILEDB_ERR` + */ +TILEDB_EXPORT capi_return_t tiledb_enumeration_get_type( + tiledb_ctx_t* ctx, + tiledb_enumeration_t* enumeration, + tiledb_datatype_t* type) TILEDB_NOEXCEPT; + +/** + * Return the cell value number of the enumeration values + * + * **Example:** + * + * @code{.c} + * uint32_t cell_val_num; + * tiledb_enumeration_get_cell_val_num(ctx, enumeration, &cell_val_num); + * @endcode + * + * @param ctx The TileDB context. + * @param enumeration The enumeration + * @param type The cell value number of the enumeration + * @return `TILEDB_OK` or `TILEDB_ERR` + */ +TILEDB_EXPORT capi_return_t tiledb_enumeration_get_cell_val_num( + tiledb_ctx_t* ctx, + tiledb_enumeration_t* enumeration, + uint32_t* cell_val_num) TILEDB_NOEXCEPT; + +/** + * Return whether the enumeration values should be considered ordered. + * + * **Example:** + * + * @code{.c} + * int ordered; + * tiledb_enumeration_get_ordered(ctx, enumeration, &ordered); + * @endcode + * + * The cell values are considered if the value in ordered after `TILEDB_OK` + * is returned is non-zero. I.e., this is standard `int` as `bool` behavior. + * + * @param ctx The TileDB context. + * @param enumeration The enumeration + * @param ordered A boolean value indicating whether the values are ordered + * @return `TILEDB_OK` or `TILEDB_ERR` + */ +TILEDB_EXPORT capi_return_t tiledb_enumeration_get_ordered( + tiledb_ctx_t* ctx, + tiledb_enumeration_t* enumeration, + int* ordered) TILEDB_NOEXCEPT; + +/** + * Return a pointer to and size of the enumerations underlying value data + * + * **Example:** + * + * @code{.c} + * void* data = NULL; + * uint64_t data_size = 0; + * tiledb_enumeration_get_data(ctx, enumeration, &data, &data_size); + * @endcode + * + * The pointer returned from this function references internal data managed + * by the enumeration. As such, clients should not attempt to free it, or + * access it beyond the lifetime of the enumeration instance. + * + * @param ctx The TileDB context. + * @param enumeration The enumeration + * @param data The returned pointer to this enumeration value buffer + * @param data_size The length of the buffer pointed to by data + * @return `TILEDB_OK` or `TILEDB_ERR` + */ +TILEDB_EXPORT capi_return_t tiledb_enumeration_get_data( + tiledb_ctx_t* ctx, + tiledb_enumeration_t* enumeration, + const void** data, + uint64_t* data_size) TILEDB_NOEXCEPT; + +/** + * Return a pointer to and size of the enumerations underlying value offsets + * + * **Example:** + * + * @code{.c} + * void* offsets = NULL; + * uint64_t offsets_size = 0; + * tiledb_enumeration_get_offsets(ctx, enumeration, &offsets, &offsets_size); + * @endcode + * + * The pointer returned from this function references internal data managed + * by the enumeration. As such, clients should not attempt to free it, or + * access it beyond the lifetime of the enumeration instance. + * + * If the enumeration values are var sized (i.e., cell_var_num is UINT32_MAX) + * the offsets buffer will contain a `uint64_t` value for the starting offset + * of each underlying enumeration value. Note that the number of offsets is + * calculated as `offsets_size / sizeof(uint64_t)`. + * + * @param ctx The TileDB context. + * @param enumeration The enumeration + * @param data The returned pointer to this enumeration offsets buffer + * @param data_size The length of the buffer pointed to by offsets + * @return `TILEDB_OK` or `TILEDB_ERR` + */ +TILEDB_EXPORT capi_return_t tiledb_enumeration_get_offsets( + tiledb_ctx_t* ctx, + tiledb_enumeration_t* enumeration, + const void** offsets, + uint64_t* offsets_size) TILEDB_NOEXCEPT; + +#ifdef __cplusplus +} +#endif + +#endif // TILEDB_CAPI_ENUMERATION_EXPERIMENTAL_H diff --git a/tiledb/api/c_api/enumeration/enumeration_api_internal.h b/tiledb/api/c_api/enumeration/enumeration_api_internal.h new file mode 100644 index 000000000000..c484d13ce5c2 --- /dev/null +++ b/tiledb/api/c_api/enumeration/enumeration_api_internal.h @@ -0,0 +1,124 @@ +/** + * @file tiledb/api/c_api/enumeration/enumeration_api_internal.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2023 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file declares the enumeration section of the C API for TileDB. + */ + +#ifndef TILEDB_CAPI_ENUMERATION_INTERNAL_H +#define TILEDB_CAPI_ENUMERATION_INTERNAL_H + +#include "enumeration_api_experimental.h" +#include "tiledb/api/c_api_support/handle/handle.h" +#include "tiledb/common/common.h" +#include "tiledb/sm/array_schema/enumeration.h" + +/** + * Handle `struct` for API enumeration objects. + */ +struct tiledb_enumeration_handle_t + : public tiledb::api::CAPIHandle { + private: + shared_ptr enumeration_; + + public: + /** + * Type name + */ + static constexpr std::string_view object_type_name{"enumeration"}; + + template + tiledb_enumeration_handle_t(Arg&&... arg) + : enumeration_( + tiledb::sm::Enumeration::create(std::forward(arg)...)) { + } + + /** + * Constructor from `shared_ptr` copies the shared pointer. + */ + explicit tiledb_enumeration_handle_t(shared_ptr& e) + : enumeration_(e) { + } + + /** + * Copy the underlying enumeration object. + */ + [[nodiscard]] shared_ptr copy() { + return enumeration_; + } + + /** + * Return the data type of the enumeration values. + */ + [[nodiscard]] inline tiledb_datatype_t type() const { + return static_cast(enumeration_->type()); + } + + /** + * Return the cell_val_num of the enumeration values + */ + [[nodiscard]] inline uint32_t cell_val_num() const { + return enumeration_->cell_val_num(); + } + + /** + * Return a bool indicating whethe the enumeration values are ordered. + */ + [[nodiscard]] inline bool ordered() const { + return enumeration_->ordered(); + } + + /** + * Return a pointer and size pair for the underlying data type. + */ + [[nodiscard]] inline const tuple data() const { + return enumeration_->data(); + } + + /** + * Return a pointer and size tuple for the underlying offsets buffer. + */ + [[nodiscard]] inline const tuple offsets() const { + return enumeration_->offsets(); + } +}; + +namespace tiledb::api { + +/** + * Returns after successfully validating an error. Throws otherwise. + * + * @param dim A possibly-valid enumeration handle + */ +inline void ensure_enumeration_is_valid(const tiledb_enumeration_handle_t* e) { + ensure_handle_is_valid(e); +} + +} // namespace tiledb::api + +#endif // TILEDB_CAPI_ENUMERATION_INTERNAL_H diff --git a/tiledb/api/c_api/enumeration/test/CMakeLists.txt b/tiledb/api/c_api/enumeration/test/CMakeLists.txt new file mode 100644 index 000000000000..7ace61d484d8 --- /dev/null +++ b/tiledb/api/c_api/enumeration/test/CMakeLists.txt @@ -0,0 +1,32 @@ +# +# tiledb/api/c_api/enumeration/test/CMakeLists.txt +# +# The MIT License +# +# Copyright (c) 2023 TileDB, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# + +include(unit_test) + +commence(unit_test capi_enumeration) + this_target_sources(unit_capi_enumeration.cc) + this_target_object_libraries(capi_enumeration_stub) +conclude(unit_test) diff --git a/tiledb/api/c_api/enumeration/test/compile_capi_enumeration_stub_main.cc b/tiledb/api/c_api/enumeration/test/compile_capi_enumeration_stub_main.cc new file mode 100644 index 000000000000..6ffbd2a494ee --- /dev/null +++ b/tiledb/api/c_api/enumeration/test/compile_capi_enumeration_stub_main.cc @@ -0,0 +1,36 @@ +/** + * @file tiledb/api/c_api/enumeration/test/compile_capi_enumeration_main.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2023 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "../enumeration_api_internal.h" +#include "tiledb/sm/enums/datatype.h" + +int main() { + tiledb_enumeration_handle_t e{ + tiledb::sm::Datatype::INT32, 1, 0, nullptr, 0, nullptr, 0}; + return 0; +} diff --git a/tiledb/api/c_api/enumeration/test/unit_capi_enumeration.cc b/tiledb/api/c_api/enumeration/test/unit_capi_enumeration.cc new file mode 100644 index 000000000000..65578aa71650 --- /dev/null +++ b/tiledb/api/c_api/enumeration/test/unit_capi_enumeration.cc @@ -0,0 +1,430 @@ +/** + * @file tiledb/api/c_api/enumeration/test/unit_capi_enumeration.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2023 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + */ + +#define CATCH_CONFIG_MAIN +#include +#include "../../../c_api_test_support/testsupport_capi_context.h" +#include "../enumeration_api_experimental.h" +using namespace tiledb::api::test_support; + +struct FixedSizeEnumeration { + FixedSizeEnumeration() { + uint32_t values[5] = {1, 2, 3, 4, 5}; + auto rc = tiledb_enumeration_alloc( + ctx_.context, + TILEDB_UINT32, + 1, + 0, + values, + sizeof(uint32_t) * 5, + nullptr, + 0, + &enumeration_); + REQUIRE(rc == TILEDB_OK); + } + + ~FixedSizeEnumeration() { + tiledb_enumeration_free(&enumeration_); + } + + ordinary_context ctx_; + tiledb_enumeration_t* enumeration_; +}; + +struct VarSizeEnumeration { + VarSizeEnumeration() { + const char* values = "foobarbazbingobango"; + uint64_t offsets[5] = {0, 3, 6, 9, 14}; + auto rc = tiledb_enumeration_alloc( + ctx_.context, + TILEDB_STRING_UTF8, + UINT32_MAX, + 0, + values, + strlen(values), + offsets, + 5 * sizeof(uint64_t), + &enumeration_); + REQUIRE(rc == TILEDB_OK); + } + + ~VarSizeEnumeration() { + tiledb_enumeration_free(&enumeration_); + } + + ordinary_context ctx_; + tiledb_enumeration_t* enumeration_; +}; + +TEST_CASE( + "C API: tiledb_enumeration_alloc argument validation", + "[capi][enumeration]") { + ordinary_context ctx{}; + tiledb_enumeration_t* enumeration; + + int32_t values[5] = {1, 2, 3, 4, 5}; + const char* data = "foobarbazbingobango"; + uint64_t offsets[5] = {0, 3, 6, 9, 14}; + + SECTION("success - fixed size") { + auto rc = tiledb_enumeration_alloc( + ctx.context, + TILEDB_UINT32, + 1, + 0, + values, + sizeof(uint32_t) * 5, + nullptr, + 0, + &enumeration); + REQUIRE(rc == TILEDB_OK); + tiledb_enumeration_free(&enumeration); + } + + SECTION("success - var size") { + auto rc = tiledb_enumeration_alloc( + ctx.context, + TILEDB_STRING_ASCII, + UINT32_MAX, + 0, + (void*)data, + strlen(data), + offsets, + sizeof(uint64_t) * 5, + &enumeration); + REQUIRE(rc == TILEDB_OK); + tiledb_enumeration_free(&enumeration); + } + + SECTION("failure - null context") { + auto rc = tiledb_enumeration_alloc( + nullptr, + TILEDB_UINT32, + 1, + 0, + values, + sizeof(uint32_t) * 5, + nullptr, + 0, + &enumeration); + REQUIRE(rc == TILEDB_INVALID_CONTEXT); + tiledb_enumeration_free(&enumeration); + } + + SECTION("failure - invalid datatype") { + auto rc = tiledb_enumeration_alloc( + ctx.context, + (tiledb_datatype_t)255, + 1, + 0, + values, + sizeof(uint32_t) * 5, + nullptr, + 0, + &enumeration); + REQUIRE(rc == TILEDB_ERR); + tiledb_enumeration_free(&enumeration); + } + + SECTION("failure - data nullptr") { + auto rc = tiledb_enumeration_alloc( + ctx.context, + TILEDB_INT32, + 1, + 0, + nullptr, + sizeof(uint32_t) * 5, + nullptr, + 0, + &enumeration); + REQUIRE(rc == TILEDB_ERR); + tiledb_enumeration_free(&enumeration); + } + + SECTION("failure - data_size == 0") { + auto rc = tiledb_enumeration_alloc( + ctx.context, TILEDB_INT32, 1, 0, values, 0, nullptr, 0, &enumeration); + REQUIRE(rc == TILEDB_ERR); + tiledb_enumeration_free(&enumeration); + } + + SECTION("failure - enumeration nullptr") { + auto rc = tiledb_enumeration_alloc( + ctx.context, + TILEDB_INT32, + 1, + 0, + values, + sizeof(uint32_t) * 5, + nullptr, + 0, + nullptr); + REQUIRE(rc == TILEDB_ERR); + tiledb_enumeration_free(&enumeration); + } + + SECTION("failure - offsets nullptr") { + auto rc = tiledb_enumeration_alloc( + ctx.context, + TILEDB_STRING_ASCII, + UINT32_MAX, + 0, + (void*)data, + strlen(data), + nullptr, + sizeof(uint64_t) * 5, + &enumeration); + REQUIRE(rc == TILEDB_ERR); + tiledb_enumeration_free(&enumeration); + } + + SECTION("failure - offsets_size == 0") { + auto rc = tiledb_enumeration_alloc( + ctx.context, + TILEDB_STRING_ASCII, + UINT32_MAX, + 0, + (void*)data, + strlen(data), + offsets, + 0, + &enumeration); + REQUIRE(rc == TILEDB_ERR); + tiledb_enumeration_free(&enumeration); + } +} + +TEST_CASE( + "C API: tiledb_enumeration_free argument validation", + "[capi][enumeration]") { + REQUIRE_NOTHROW(tiledb_enumeration_free(nullptr)); +} + +TEST_CASE( + "C API: tiledb_enumeration_get_type argument validation", + "[capi][enumeration]") { + FixedSizeEnumeration fe; + VarSizeEnumeration ve; + tiledb_datatype_t dt; + + SECTION("success") { + auto rc = + tiledb_enumeration_get_type(fe.ctx_.context, fe.enumeration_, &dt); + REQUIRE(rc == TILEDB_OK); + REQUIRE(dt == TILEDB_UINT32); + + rc = tiledb_enumeration_get_type(ve.ctx_.context, ve.enumeration_, &dt); + REQUIRE(rc == TILEDB_OK); + REQUIRE(dt == TILEDB_STRING_UTF8); + } + + SECTION("failure - invalid context") { + auto rc = tiledb_enumeration_get_type(nullptr, fe.enumeration_, &dt); + REQUIRE(rc == TILEDB_INVALID_CONTEXT); + } + + SECTION("failure - invalid enumeration") { + auto rc = tiledb_enumeration_get_type(fe.ctx_.context, nullptr, &dt); + REQUIRE(rc == TILEDB_ERR); + } + + SECTION("failure - invalid type pointer") { + auto rc = + tiledb_enumeration_get_type(fe.ctx_.context, fe.enumeration_, nullptr); + REQUIRE(rc == TILEDB_ERR); + } +} + +TEST_CASE( + "C API: tiledb_enumeration_get_cell_val_num argument validation", + "[capi][enumeration]") { + FixedSizeEnumeration fe; + VarSizeEnumeration ve; + uint32_t cvn; + + SECTION("success") { + auto rc = tiledb_enumeration_get_cell_val_num( + fe.ctx_.context, fe.enumeration_, &cvn); + REQUIRE(rc == TILEDB_OK); + REQUIRE(cvn == 1); + + rc = tiledb_enumeration_get_cell_val_num( + ve.ctx_.context, ve.enumeration_, &cvn); + REQUIRE(rc == TILEDB_OK); + REQUIRE(cvn == UINT32_MAX); + } + + SECTION("failure - invalid context") { + auto rc = + tiledb_enumeration_get_cell_val_num(nullptr, fe.enumeration_, &cvn); + REQUIRE(rc == TILEDB_INVALID_CONTEXT); + } + + SECTION("failure - invalid enumeration") { + auto rc = + tiledb_enumeration_get_cell_val_num(fe.ctx_.context, nullptr, &cvn); + REQUIRE(rc == TILEDB_ERR); + } + + SECTION("failure - invalid cell_val_num pointer") { + auto rc = tiledb_enumeration_get_cell_val_num( + fe.ctx_.context, fe.enumeration_, nullptr); + REQUIRE(rc == TILEDB_ERR); + } +} + +TEST_CASE( + "C API: tiledb_enumeration_get_ordered argument validation", + "[capi][enumeration]") { + FixedSizeEnumeration fe; + VarSizeEnumeration ve; + int o; + + SECTION("success") { + auto rc = + tiledb_enumeration_get_ordered(fe.ctx_.context, fe.enumeration_, &o); + REQUIRE(rc == TILEDB_OK); + REQUIRE(!o); + + rc = tiledb_enumeration_get_ordered(ve.ctx_.context, ve.enumeration_, &o); + REQUIRE(rc == TILEDB_OK); + REQUIRE(!o); + } + + SECTION("failure - invalid context") { + auto rc = tiledb_enumeration_get_ordered(nullptr, fe.enumeration_, &o); + REQUIRE(rc == TILEDB_INVALID_CONTEXT); + } + + SECTION("failure - invalid enumeration") { + auto rc = tiledb_enumeration_get_ordered(fe.ctx_.context, nullptr, &o); + REQUIRE(rc == TILEDB_ERR); + } + + SECTION("failure - invalid ordered pointer") { + auto rc = tiledb_enumeration_get_ordered( + fe.ctx_.context, fe.enumeration_, nullptr); + REQUIRE(rc == TILEDB_ERR); + } +} + +TEST_CASE( + "C API: tiledb_enumeration_get_data argument validation", + "[capi][enumeration]") { + FixedSizeEnumeration fe; + VarSizeEnumeration ve; + const void* d; + uint64_t ds; + + uint32_t fixed_expect[5] = {1, 2, 3, 4, 5}; + const char* var_expect = "foobarbazbingobango"; + + SECTION("success") { + auto rc = + tiledb_enumeration_get_data(fe.ctx_.context, fe.enumeration_, &d, &ds); + REQUIRE(rc == TILEDB_OK); + REQUIRE(std::memcmp(fixed_expect, d, sizeof(uint32_t) * 5) == 0); + REQUIRE(ds == sizeof(uint32_t) * 5); + + rc = tiledb_enumeration_get_data(ve.ctx_.context, ve.enumeration_, &d, &ds); + REQUIRE(rc == TILEDB_OK); + REQUIRE(std::memcmp(var_expect, d, strlen(var_expect)) == 0); + REQUIRE(ds == strlen(var_expect)); + } + + SECTION("failure - invalid context") { + auto rc = tiledb_enumeration_get_data(nullptr, fe.enumeration_, &d, &ds); + REQUIRE(rc == TILEDB_INVALID_CONTEXT); + } + + SECTION("failure - invalid enumeration") { + auto rc = tiledb_enumeration_get_data(fe.ctx_.context, nullptr, &d, &ds); + REQUIRE(rc == TILEDB_ERR); + } + + SECTION("failure - invalid data pointer") { + auto rc = tiledb_enumeration_get_data( + fe.ctx_.context, fe.enumeration_, nullptr, &ds); + REQUIRE(rc == TILEDB_ERR); + } + + SECTION("failure - invalid data size pointer") { + auto rc = tiledb_enumeration_get_data( + fe.ctx_.context, fe.enumeration_, &d, nullptr); + REQUIRE(rc == TILEDB_ERR); + } +} + +TEST_CASE( + "C API: tiledb_enumeration_get_offsets argument validation", + "[capi][enumeration]") { + FixedSizeEnumeration fe; + VarSizeEnumeration ve; + const void* o; + uint64_t os; + + uint64_t var_expect[5] = {0, 3, 6, 9, 14}; + + SECTION("success") { + auto rc = tiledb_enumeration_get_offsets( + fe.ctx_.context, fe.enumeration_, &o, &os); + REQUIRE(rc == TILEDB_OK); + REQUIRE(o == nullptr); + REQUIRE(os == 0); + + rc = tiledb_enumeration_get_offsets( + ve.ctx_.context, ve.enumeration_, &o, &os); + REQUIRE(rc == TILEDB_OK); + REQUIRE(std::memcmp(var_expect, o, sizeof(uint64_t) * 5) == 0); + REQUIRE(os == sizeof(uint64_t) * 5); + } + + SECTION("failure - invalid context") { + auto rc = tiledb_enumeration_get_offsets(nullptr, fe.enumeration_, &o, &os); + REQUIRE(rc == TILEDB_INVALID_CONTEXT); + } + + SECTION("failure - invalid enumeration") { + auto rc = tiledb_enumeration_get_offsets(fe.ctx_.context, nullptr, &o, &os); + REQUIRE(rc == TILEDB_ERR); + } + + SECTION("failure - invalid offsets pointer") { + auto rc = tiledb_enumeration_get_offsets( + fe.ctx_.context, fe.enumeration_, nullptr, &os); + REQUIRE(rc == TILEDB_ERR); + } + + SECTION("failure - invalid offsets size pointer") { + auto rc = tiledb_enumeration_get_offsets( + fe.ctx_.context, fe.enumeration_, &o, nullptr); + REQUIRE(rc == TILEDB_ERR); + } +} diff --git a/tiledb/sm/array/array.cc b/tiledb/sm/array/array.cc index 43ee7ef43279..882cfe29d4c8 100644 --- a/tiledb/sm/array/array.cc +++ b/tiledb/sm/array/array.cc @@ -582,6 +582,32 @@ void Array::delete_fragments_list( } } +shared_ptr Array::get_enumeration(const std::string& attr_name) { + if (!is_open_) { + throw ArrayStatusException("Cannot get enumeration; Array is not open"); + } + + // Do we need to check all previous schema versions if the attribute has + // been dropped? + auto attr = array_schema_latest_->attribute(attr_name); + + if (attr == nullptr) { + return nullptr; + } + + if (!attr->has_enumeration()) { + return nullptr; + } + + auto ptr = array_schema_latest_->enumeration(attr->get_enumeration_name()); + if (ptr != nullptr) { + return ptr; + } + + return array_dir_.load_enumeration( + array_schema_latest_, attr->get_enumeration_name(), get_encryption_key()); +} + bool Array::is_empty() const { return fragment_metadata_.empty(); } diff --git a/tiledb/sm/array/array.h b/tiledb/sm/array/array.h index 92f2fccf14f4..3951aa88cd45 100644 --- a/tiledb/sm/array/array.h +++ b/tiledb/sm/array/array.h @@ -257,6 +257,11 @@ class Array { return fragment_metadata_; } + /** + * Get the enumeration for an attribute + */ + shared_ptr get_enumeration(const std::string& attr_name); + /** * Returns `true` if the array is empty at the time it is opened. * The funciton returns `false` if the array is not open. diff --git a/tiledb/sm/array/array_directory.cc b/tiledb/sm/array/array_directory.cc index 35984fffb0ca..a290752ffd4b 100644 --- a/tiledb/sm/array/array_directory.cc +++ b/tiledb/sm/array/array_directory.cc @@ -33,6 +33,7 @@ #include "tiledb/sm/array/array_directory.h" #include "tiledb/common/logger.h" #include "tiledb/common/stdx_string.h" +#include "tiledb/sm/array_schema/enumeration.h" #include "tiledb/sm/filesystem/vfs.h" #include "tiledb/sm/misc/constants.h" #include "tiledb/sm/misc/parallel_functions.h" @@ -185,6 +186,36 @@ ArrayDirectory::load_all_array_schemas( return array_schemas; } +shared_ptr ArrayDirectory::load_enumeration( + shared_ptr schema, + const std::string& enumeration_name, + const EncryptionKey& encryption_key) const { + auto timer_se = resources_.get().stats().start_timer("sm_load_enumeration"); + + // A subtle assertion that this schema knows about this enumeration + // name. If enumeration_name is unknown it will throw an exception. + schema->enumeration(enumeration_name); + + if (uri_.is_invalid()) { + throw ArrayDirectoryException("Cannot load enumeration; Invalid array URI"); + } + + auto enum_uri = uri_.join_path(constants::array_schema_dir_name) + .join_path(constants::array_enumerations_dir_name) + .join_path(enumeration_name); + + auto&& tile = GenericTileIO::load(resources_, enum_uri, 0, encryption_key); + resources_.get().stats().add_counter("read_enumeration_size", tile.size()); + + Deserializer deserializer(tile.data(), tile.size()); + auto enum_ptr = Enumeration::deserialize(deserializer); + enum_ptr->set_name(enumeration_name); + + schema->set_enumeration(enumeration_name, enum_ptr); + + return enum_ptr; +} + const URI& ArrayDirectory::uri() const { return uri_; } @@ -1140,10 +1171,12 @@ Status ArrayDirectory::compute_array_schema_uris( if (!array_schema_dir_uris.empty()) { array_schema_uris_.reserve( array_schema_uris_.size() + array_schema_dir_uris.size()); - std::copy( - array_schema_dir_uris.begin(), - array_schema_dir_uris.end(), - std::back_inserter(array_schema_uris_)); + for (auto& uri : array_schema_dir_uris) { + if (uri.last_path_part() == constants::array_enumerations_dir_name) { + continue; + } + array_schema_uris_.push_back(uri); + } } return Status::Ok(); diff --git a/tiledb/sm/array/array_directory.h b/tiledb/sm/array/array_directory.h index c4e245314f46..f53f040f4588 100644 --- a/tiledb/sm/array/array_directory.h +++ b/tiledb/sm/array/array_directory.h @@ -385,6 +385,18 @@ class ArrayDirectory { std::unordered_map> load_all_array_schemas(const EncryptionKey& encryption_key) const; + /** + * Load an enumeration from schema with the given name. + * + * @param schema The ArraySchema that references the enumeration name. + * @param enumeration_name The name of the enumeration to load + * @return The loaded enumeration + */ + shared_ptr load_enumeration( + shared_ptr schema, + const std::string& enumeration_name, + const EncryptionKey& encryption_ley) const; + /** Returns the array URI. */ const URI& uri() const; diff --git a/tiledb/sm/array/test/CMakeLists.txt b/tiledb/sm/array/test/CMakeLists.txt index 3f06e3922eeb..62b222de11ea 100644 --- a/tiledb/sm/array/test/CMakeLists.txt +++ b/tiledb/sm/array/test/CMakeLists.txt @@ -28,7 +28,7 @@ include(unit_test) commence(unit_test array) this_target_sources(main.cc unit_array_directory.cc) - this_target_link_libraries(array) + this_target_link_libraries(array context_resources) conclude(unit_test) commence(unit_test consistency) diff --git a/tiledb/sm/array_schema/CMakeLists.txt b/tiledb/sm/array_schema/CMakeLists.txt index f9616f3b7092..df6ec2a47a47 100644 --- a/tiledb/sm/array_schema/CMakeLists.txt +++ b/tiledb/sm/array_schema/CMakeLists.txt @@ -32,7 +32,14 @@ include(object_library) # commence(object_library attribute) this_target_sources(attribute.cc) - this_target_object_libraries(baseline buffer constants filter_pipeline range stringx) + this_target_object_libraries( + baseline + buffer + constants + filter_pipeline + range + stringx + uuid) conclude(object_library) # @@ -51,13 +58,27 @@ commence(object_library domain) this_target_object_libraries(datum dimension math) conclude(object_library) +# +# `enumeration` object library +# +commence(object_library enumeration) + this_target_sources(enumeration.cc) + this_target_object_libraries(buffer constants) +conclude(object_library) + # # `array_schema` object library # commence(object_library array_schema) this_target_sources(array_schema.cc dimension_label.cc) this_target_object_libraries( - attribute domain time uri_format uuid vfs) + attribute domain enumeration time uri_format uuid vfs) conclude(object_library) +# This is linked outside the object_library scope because ContextResources +# is recompiled as part of the capi_context_stub. Including context_resources +# here like this prevents many headaches revolving around duplicate symbols +# when linking executables. +target_link_libraries(compile_array_schema PRIVATE context_resources generic_tile_io) + add_test_subdirectory() diff --git a/tiledb/sm/array_schema/array_schema.cc b/tiledb/sm/array_schema/array_schema.cc index 656f034f9466..8ec7df8ae0d3 100644 --- a/tiledb/sm/array_schema/array_schema.cc +++ b/tiledb/sm/array_schema/array_schema.cc @@ -39,6 +39,7 @@ #include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/array_schema/dimension_label.h" #include "tiledb/sm/array_schema/domain.h" +#include "tiledb/sm/array_schema/enumeration.h" #include "tiledb/sm/buffer/buffer.h" #include "tiledb/sm/enums/array_type.h" #include "tiledb/sm/enums/compressor.h" @@ -50,6 +51,8 @@ #include "tiledb/sm/filter/webp_filter.h" #include "tiledb/sm/misc/hilbert.h" #include "tiledb/sm/misc/tdb_time.h" +#include "tiledb/sm/tile/generic_tile_io.h" +#include "tiledb/storage_format/uri/generate_uri.h" #include "tiledb/storage_format/uri/parse_uri.h" #include @@ -147,11 +150,15 @@ ArraySchema::ArraySchema( dim_map_[dim->name()] = dim; } - // Create attribute map + // Create attribute and enumeration map if (!attributes_.empty()) { for (auto attr_iter : attributes_) { auto attr = attr_iter.get(); attribute_map_[attr->name()] = attr; + if (attr->has_enumeration()) { + enumeration_map_[attr->get_enumeration_name()] = + shared_ptr(); + } } } @@ -196,8 +203,14 @@ ArraySchema::ArraySchema(const ArraySchema& array_schema) { throw_if_not_ok(set_domain(array_schema.domain_)); attribute_map_.clear(); - for (auto attr : array_schema.attributes_) + enumeration_map_.clear(); + for (auto attr : array_schema.attributes_) { throw_if_not_ok(add_attribute(attr, false)); + if (attr->has_enumeration()) { + enumeration_map_[attr->get_enumeration_name()] = + shared_ptr(); + } + } // Create dimension label map for (const auto& label : array_schema.dimension_labels_) { @@ -248,6 +261,40 @@ const std::vector>& ArraySchema::attributes() return attributes_; } +shared_ptr ArraySchema::enumeration( + const std::string& enumeration_name) const { + auto found = enumeration_map_.find(enumeration_name); + if (found == enumeration_map_.end()) { + throw ArraySchemaStatusException( + "Unknown enumeration '" + enumeration_name + "'"); + } + + return found->second; +} + +std::vector> ArraySchema::enumerations() const { + std::vector> ret; + ret.resize(enumeration_map_.size()); + + int idx = 0; + for (const auto& pair : enumeration_map_) { + ret[idx++] = pair.second; + } + + return ret; +} + +void ArraySchema::set_enumeration( + const std::string& enumeration_name, shared_ptr enumeration) { + auto found = enumeration_map_.find(enumeration_name); + if (found == enumeration_map_.end()) { + throw ArraySchemaStatusException( + "Unknown enumeration '" + enumeration_name + "'"); + } + + found->second = enumeration; +} + uint64_t ArraySchema::capacity() const { return capacity_; } @@ -697,6 +744,14 @@ bool ArraySchema::is_nullable(const std::string& name) const { return attr->nullable(); } +bool ArraySchema::has_enumeration(const std::string& name) const { + auto attr = this->attribute(name); + if (attr == nullptr) { + return false; + } + return attr->has_enumeration(); +} + // ===== FORMAT ===== // version (uint32_t) // allow_dups (bool) @@ -851,6 +906,41 @@ Status ArraySchema::add_attribute( return Status::Ok(); } +void ArraySchema::add_attribute( + shared_ptr attr, + shared_ptr enumeration, + bool check_special) { + if (attr == nullptr) { + throw ArraySchemaStatusException( + "Cannot add attribute; Input attribute is null"); + } + + if (enumeration == nullptr) { + throw ArraySchemaStatusException( + "Cannot add attribute; Input enumeration is null"); + } + + if (enumeration->name().empty()) { + auto name = storage_format::generate_uri( + timestamp_range_.first, timestamp_range_.second, write_version()); + if (name[0] == '/') { + name = name.substr(1); + } + + enumeration->set_name(name); + } + + auto found = enumeration_map_.find(enumeration->name()); + if (found == enumeration_map_.end()) { + enumerations_.push_back(enumeration); + enumeration_map_[enumeration->name()] = enumeration; + } + + attr->set_enumeration_name(enumeration->name()); + + throw_if_not_ok(add_attribute(attr, check_special)); +} + void ArraySchema::add_dimension_label( dimension_size_type dim_id, const std::string& name, @@ -1280,6 +1370,7 @@ const std::string& ArraySchema::name() const { /* ****************************** */ /* PRIVATE METHODS */ /* ****************************** */ + void ArraySchema::check_attribute_dimension_label_names() const { std::set names; // Check attribute and dimension names are unique. diff --git a/tiledb/sm/array_schema/array_schema.h b/tiledb/sm/array_schema/array_schema.h index d4a8bc64e15a..627c0e4e2c1e 100644 --- a/tiledb/sm/array_schema/array_schema.h +++ b/tiledb/sm/array_schema/array_schema.h @@ -43,6 +43,7 @@ #include "tiledb/sm/misc/constants.h" #include "tiledb/sm/misc/hilbert.h" #include "tiledb/sm/misc/uuid.h" +#include "tiledb/sm/storage_manager/context_resources.h" using namespace tiledb::common; @@ -55,6 +56,7 @@ class ConstBuffer; class Dimension; class DimensionLabel; class Domain; +class Enumeration; enum class ArrayType : uint8_t; enum class Compressor : uint8_t; @@ -184,6 +186,25 @@ class ArraySchema { /** Returns the attributes. */ const std::vector>& attributes() const; + /** + * Returns a shared pointer to the selected enumeration (nullptr if it + * does not exist). + */ + shared_ptr enumeration(const std::string& name) const; + + /** Returns the enumerations. */ + std::vector> enumerations() const; + + /** + * Store a loaded enumeration + * + * @param enumeration_name The name of the enumeration to store. This must + * already be known to the array schema. + * @param enumeration The loaded enumeration to store + */ + void set_enumeration( + const std::string& enumeration_name, shared_ptr enumeration); + /** Returns the capacity. */ uint64_t capacity() const; @@ -301,6 +322,9 @@ class ArraySchema { /** Returns true if the input name is nullable. */ bool is_nullable(const std::string& name) const; + /** Returns true if the input name has an enumeration. */ + bool has_enumeration(const std::string& name) const; + /** * Serializes the array schema object into a buffer. * @@ -338,6 +362,22 @@ class ArraySchema { Status add_attribute( shared_ptr attr, bool check_special = true); + /** + * Adds an attribute with an enumeration + * + * @param attr The attribute to be added + * @param enumeration The enumeration to set on the attribute + * @param check_special If `true` this function will check if the attribute + * is special (starting with `__`) and error if that's the case. Setting + * to `false` will allow adding attributes starting with `__`, noting + * that particular care must be taken (i.e., the user must know what + * they are doing in this case). + */ + void add_attribute( + shared_ptr attr, + shared_ptr enumeration, + bool check_special = true); + /** * Adds a dimension label to the array schema. * @@ -355,6 +395,12 @@ class ArraySchema { Datatype label_type, bool check_name = true); + /** + * Add an enumeration to an attribute on the array schema. + */ + void add_enumeration( + const std::string& attr_name, shared_ptr enumeration); + /** * Drops an attribute. * @@ -546,6 +592,12 @@ class ArraySchema { /** A map from the dimension label names to the label schemas. */ std::unordered_map dimension_label_map_; + /** The array enumerations */ + std::vector> enumerations_; + + /** A map of URIs to Enumerations */ + std::unordered_map> enumeration_map_; + /** The filter pipeline run on offset tiles for var-length attributes. */ FilterPipeline cell_var_offsets_filters_; diff --git a/tiledb/sm/array_schema/array_schema_evolution.cc b/tiledb/sm/array_schema/array_schema_evolution.cc index bf285733c71e..c57f1094576e 100644 --- a/tiledb/sm/array_schema/array_schema_evolution.cc +++ b/tiledb/sm/array_schema/array_schema_evolution.cc @@ -35,10 +35,12 @@ #include "tiledb/common/common.h" #include "tiledb/common/heap_memory.h" #include "tiledb/common/logger.h" +#include "tiledb/common/status.h" #include "tiledb/sm/array_schema/array_schema.h" #include "tiledb/sm/array_schema/attribute.h" #include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/array_schema/domain.h" +#include "tiledb/sm/array_schema/enumeration.h" #include "tiledb/sm/buffer/buffer.h" #include "tiledb/sm/enums/array_type.h" #include "tiledb/sm/enums/compressor.h" @@ -58,6 +60,14 @@ using namespace tiledb::common; namespace tiledb { namespace sm { +/** Class for locally generated exceptions. */ +class ArraySchemaEvolutionException : public StatusException { + public: + explicit ArraySchemaEvolutionException(const std::string& msg) + : StatusException("ArraySchemaEvolution", msg) { + } +}; + /* ****************************** */ /* CONSTRUCTORS & DESTRUCTORS */ /* ****************************** */ @@ -73,30 +83,32 @@ ArraySchemaEvolution::~ArraySchemaEvolution() { /* API */ /* ****************************** */ -tuple>> -ArraySchemaEvolution::evolve_schema( +shared_ptr ArraySchemaEvolution::evolve_schema( const shared_ptr& orig_schema) { std::lock_guard lock(mtx_); if (orig_schema == nullptr) { - return { - LOG_STATUS(Status_ArraySchemaEvolutionError( - "Cannot evolve schema; Input array schema is null")), - nullopt}; + throw ArraySchemaEvolutionException( + "Cannot evolve schema; Input array schema is null"); } auto schema = make_shared(HERE(), *(orig_schema.get())); // Add attributes. for (auto& attr : attributes_to_add_map_) { - RETURN_NOT_OK_TUPLE(schema->add_attribute(attr.second, false), nullopt); + auto eit = enumerations_to_add_map_.find(attr.first); + if (eit == enumerations_to_add_map_.end()) { + throw_if_not_ok(schema->add_attribute(attr.second, false)); + } else { + schema->add_attribute(attr.second, eit->second, false); + } } // Drop attributes. for (auto& attr_name : attributes_to_drop_) { bool has_attr = false; - RETURN_NOT_OK_TUPLE(schema->has_attribute(attr_name, &has_attr), nullopt); + throw_if_not_ok(schema->has_attribute(attr_name, &has_attr)); if (has_attr) { - RETURN_NOT_OK_TUPLE(schema->drop_attribute(attr_name), nullopt); + throw_if_not_ok(schema->drop_attribute(attr_name)); } } @@ -104,28 +116,27 @@ ArraySchemaEvolution::evolve_schema( // Set timestamp, if specified if (std::get<0>(timestamp_range_) != 0) { - RETURN_NOT_OK_TUPLE( - schema.get()->set_timestamp_range(timestamp_range_), nullopt); - RETURN_NOT_OK_TUPLE(schema->generate_uri(timestamp_range_), nullopt); + throw_if_not_ok(schema.get()->set_timestamp_range(timestamp_range_)); + throw_if_not_ok(schema->generate_uri(timestamp_range_)); } else { // Generate new schema URI - RETURN_NOT_OK_TUPLE(schema->generate_uri(), nullopt); + throw_if_not_ok(schema->generate_uri()); } - return {Status::Ok(), schema}; + return schema; } -Status ArraySchemaEvolution::add_attribute(const Attribute* attr) { +void ArraySchemaEvolution::add_attribute(const Attribute* attr) { std::lock_guard lock(mtx_); // Sanity check if (attr == nullptr) - return LOG_STATUS(Status_ArraySchemaEvolutionError( - "Cannot add attribute; Input attribute is null")); + throw ArraySchemaEvolutionException( + "Cannot add attribute; Input attribute is null"); if (attributes_to_add_map_.find(attr->name()) != attributes_to_add_map_.end()) { - return LOG_STATUS(Status_ArraySchemaEvolutionError( - "Cannot add attribute; Input attribute name is already there")); + throw ArraySchemaEvolutionException( + "Cannot add attribute; Input attribute name is already there"); } // Create new attribute and potentially set a default name @@ -134,8 +145,15 @@ Status ArraySchemaEvolution::add_attribute(const Attribute* attr) { if (attributes_to_drop_.find(attr->name()) != attributes_to_drop_.end()) { attributes_to_drop_.erase(attr->name()); } +} + +void ArraySchemaEvolution::add_attribute( + const Attribute* attr, shared_ptr enumeration) { + // Wait to acquire the lock because add_attribute takes the lock as well + add_attribute(attr); - return Status::Ok(); + std::lock_guard lock(mtx_); + enumerations_to_add_map_[attr->name()] = enumeration; } std::vector ArraySchemaEvolution::attribute_names_to_add() const { @@ -157,15 +175,41 @@ const Attribute* ArraySchemaEvolution::attribute_to_add( return (it == attributes_to_add_map_.end()) ? nullptr : it->second.get(); } -Status ArraySchemaEvolution::drop_attribute(const std::string& attribute_name) { +std::vector ArraySchemaEvolution::enumeration_names_to_add() + const { + std::lock_guard lock(mtx_); + std::vector names; + names.reserve(enumerations_to_add_map_.size()); + for (auto elem : enumerations_to_add_map_) { + names.push_back(elem.first); + } + return names; +} + +const Enumeration* ArraySchemaEvolution::enumeration_to_add( + const std::string& name) const { + std::lock_guard lock(mtx_); + auto it = enumerations_to_add_map_.find(name); + + if (it == enumerations_to_add_map_.end()) { + return nullptr; + } + + return it->second.get(); +} + +void ArraySchemaEvolution::drop_attribute(const std::string& attribute_name) { std::lock_guard lock(mtx_); attributes_to_drop_.insert(attribute_name); - if (attributes_to_add_map_.find(attribute_name) != - attributes_to_add_map_.end()) { + auto ait = attributes_to_add_map_.find(attribute_name); + if (ait != attributes_to_add_map_.end()) { // Reset the pointer and erase it - attributes_to_add_map_.erase(attribute_name); + attributes_to_add_map_.erase(ait); + } + auto eit = enumerations_to_add_map_.find(attribute_name); + if (eit != enumerations_to_add_map_.end()) { + enumerations_to_add_map_.erase(eit); } - return Status::Ok(); } std::vector ArraySchemaEvolution::attribute_names_to_drop() const { @@ -179,7 +223,7 @@ std::vector ArraySchemaEvolution::attribute_names_to_drop() const { return names; } -Status ArraySchemaEvolution::set_timestamp_range( +void ArraySchemaEvolution::set_timestamp_range( const std::pair& timestamp_range) { if (timestamp_range.first != timestamp_range.second) { throw std::runtime_error(std::string( @@ -188,7 +232,6 @@ Status ArraySchemaEvolution::set_timestamp_range( std::to_string(timestamp_range.second) + " are not equal!")); } timestamp_range_ = timestamp_range; - return Status::Ok(); } std::pair ArraySchemaEvolution::timestamp_range() const { @@ -202,6 +245,7 @@ std::pair ArraySchemaEvolution::timestamp_range() const { void ArraySchemaEvolution::clear() { attributes_to_add_map_.clear(); + enumerations_to_add_map_.clear(); attributes_to_drop_.clear(); timestamp_range_ = {0, 0}; } diff --git a/tiledb/sm/array_schema/array_schema_evolution.h b/tiledb/sm/array_schema/array_schema_evolution.h index 6ba64b754519..846dec6e5e7f 100644 --- a/tiledb/sm/array_schema/array_schema_evolution.h +++ b/tiledb/sm/array_schema/array_schema_evolution.h @@ -38,7 +38,6 @@ #include #include "tiledb/common/common.h" -#include "tiledb/common/status.h" #include "tiledb/sm/filesystem/uri.h" #include "tiledb/sm/filter/filter_pipeline.h" #include "tiledb/sm/misc/constants.h" @@ -55,6 +54,7 @@ class Buffer; class ConstBuffer; class Dimension; class Domain; +class Enumeration; class ArraySchema; enum class ArrayType : uint8_t; @@ -79,16 +79,24 @@ class ArraySchemaEvolution { /* API */ /* ********************************* */ - tuple>> evolve_schema( + shared_ptr evolve_schema( const shared_ptr& orig_schema); /** * Adds an attribute, copying the input. * * @param attr The attribute to be added - * @return Status */ - Status add_attribute(const Attribute* attr); + void add_attribute(const Attribute* attr); + + /** + * Adds an attribute with an enumeration, copying the attribute. + * + * @param attr The attribute to be added + * @param enumeration The enumeration to set on the attribute. + */ + void add_attribute( + const Attribute* attr, shared_ptr enumeration); /** Returns the names of attributes to add. */ std::vector attribute_names_to_add() const; @@ -99,19 +107,27 @@ class ArraySchemaEvolution { */ const Attribute* attribute_to_add(const std::string& name) const; + /** Returns the names of the attributes to add. */ + std::vector enumeration_names_to_add() const; + + /** + * Returns a constant pointer to the selected enumeration or nullptr if + * it does not exist. + */ + const Enumeration* enumeration_to_add(const std::string& name) const; + /** * Drops an attribute. * * @param attr The attribute to be dropped - * @return Status */ - Status drop_attribute(const std::string& attribute_name); + void drop_attribute(const std::string& attribute_name); /** Returns the names of attributes to drop. */ std::vector attribute_names_to_drop() const; /** Set a timestamp range for the array schema evolution */ - Status set_timestamp_range( + void set_timestamp_range( const std::pair& timestamp_range); /** Returns the timestamp range. */ @@ -126,6 +142,10 @@ class ArraySchemaEvolution { /** It maps each attribute name to the corresponding attribute object. */ std::unordered_map> attributes_to_add_map_; + /** Enumerations to add with any attribute. */ + std::unordered_map> + enumerations_to_add_map_; + /** The names of array attributes to be dropped. */ std::unordered_set attributes_to_drop_; @@ -151,4 +171,4 @@ class ArraySchemaEvolution { } // namespace sm } // namespace tiledb -#endif // TILEDB_SCHEMA_EVOLUTION_H \ No newline at end of file +#endif // TILEDB_SCHEMA_EVOLUTION_H diff --git a/tiledb/sm/array_schema/attribute.cc b/tiledb/sm/array_schema/attribute.cc index 8c6132a5c925..055241584a56 100644 --- a/tiledb/sm/array_schema/attribute.cc +++ b/tiledb/sm/array_schema/attribute.cc @@ -38,6 +38,7 @@ #include "tiledb/sm/enums/filter_type.h" #include "tiledb/sm/filter/compression_filter.h" #include "tiledb/sm/misc/parse_argument.h" +#include "tiledb/sm/misc/uuid.h" #include "tiledb/type/range/range.h" #include @@ -115,7 +116,8 @@ Attribute::Attribute( const FilterPipeline& filter_pipeline, const ByteVecValue& fill_value, uint8_t fill_value_validity, - DataOrder order) + DataOrder order, + const std::string& enumeration_name) : cell_val_num_(cell_val_num) , nullable_(nullable) , filters_(filter_pipeline) @@ -123,7 +125,8 @@ Attribute::Attribute( , type_(type) , fill_value_(fill_value) , fill_value_validity_(fill_value_validity) - , order_(order) { + , order_(order) + , enumeration_name_(enumeration_name) { } Attribute::Attribute(const Attribute* attr) { @@ -136,6 +139,7 @@ Attribute::Attribute(const Attribute* attr) { fill_value_ = attr->fill_value_; fill_value_validity_ = attr->fill_value_validity_; order_ = attr->order_; + enumeration_name_ = attr->enumeration_name_; } Attribute::~Attribute() = default; @@ -204,6 +208,13 @@ Attribute Attribute::deserialize( order = data_order_from_int(deserializer.read()); } + std::string enumeration_name; + if (version >= 19) { + auto enum_name_length = deserializer.read(); + enumeration_name.resize(enum_name_length); + deserializer.read(enumeration_name.data(), enum_name_length); + } + return Attribute( name, datatype, @@ -212,7 +223,8 @@ Attribute Attribute::deserialize( filterpipeline, fill_value, fill_value_validity, - order); + order, + enumeration_name); } void Attribute::dump(FILE* out) const { @@ -299,6 +311,12 @@ void Attribute::serialize( if (version >= 17) { serializer.write(static_cast(order_)); } + + // Write enumeration URI + if (version >= 19) { + serializer.write(enumeration_name_.size()); + serializer.write(enumeration_name_.data(), enumeration_name_.size()); + } } void Attribute::set_cell_val_num(unsigned int cell_val_num) { @@ -480,6 +498,18 @@ DataOrder Attribute::order() const { return order_; } +void Attribute::set_enumeration_name(const std::string& name) { + enumeration_name_ = name; +} + +const std::string& Attribute::get_enumeration_name() const { + return enumeration_name_; +} + +bool Attribute::has_enumeration() const { + return !enumeration_name_.empty(); +} + /* ********************************* */ /* PRIVATE METHODS */ /* ********************************* */ diff --git a/tiledb/sm/array_schema/attribute.h b/tiledb/sm/array_schema/attribute.h index 7e3e7a4dd302..1c5b463d14e1 100644 --- a/tiledb/sm/array_schema/attribute.h +++ b/tiledb/sm/array_schema/attribute.h @@ -51,6 +51,7 @@ namespace sm { class Buffer; class ConstBuffer; +class Enumeration; enum class Compressor : uint8_t; enum class Datatype : uint8_t; @@ -110,7 +111,8 @@ class Attribute { const FilterPipeline& filter_pipeline, const ByteVecValue& fill_value, uint8_t fill_value_validity, - DataOrder order = DataOrder::UNORDERED_DATA); + DataOrder order = DataOrder::UNORDERED_DATA, + const std::string& enumeration_name = {}); /** * Constructor. It clones the input attribute. @@ -242,6 +244,15 @@ class Attribute { static ByteVecValue default_fill_value( Datatype datatype, uint32_t cell_val_num); + /** Set an enumeration for this attribute. */ + void set_enumeration_name(const std::string& name); + + /** Get the enumeration for this attribute. */ + const std::string& get_enumeration_name() const; + + /** Check if this attribute has an enumeration. */ + bool has_enumeration() const; + private: /* ********************************* */ /* PRIVATE ATTRIBUTES */ @@ -271,8 +282,11 @@ class Attribute { /** The required order of the data stored in the attribute. */ DataOrder order_; + /** The enumeration UUID if one exists. */ + std::string enumeration_name_; + /* ********************************* */ - /* PRIVATE ATTRIBUTES */ + /* PRIVATE METHODS */ /* ********************************* */ /** Sets the default fill value. */ diff --git a/tiledb/sm/array_schema/enumeration.cc b/tiledb/sm/array_schema/enumeration.cc new file mode 100644 index 000000000000..72a61db734d2 --- /dev/null +++ b/tiledb/sm/array_schema/enumeration.cc @@ -0,0 +1,222 @@ +/** + * @file enumeration.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2023 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file implements class Enumeration. + */ + +#include + +#include "enumeration.h" + +namespace tiledb::sm { + +/** Class for locally generated status exceptions. */ +class EnumerationException : public StatusException { + public: + explicit EnumerationException(const std::string& msg) + : StatusException("Enumeration", msg) { + } +}; + +Enumeration::Enumeration( + Datatype type, + uint32_t cell_val_num, + bool ordered, + const void* data, + uint64_t data_size, + const void* offsets, + uint64_t offsets_size) + : type_(type) + , cell_val_num_(cell_val_num) + , ordered_(ordered) + , data_(data_size) + , offsets_(offsets_size) { + if (cell_val_num == 0) { + throw EnumerationException("Invalid cell_val_num in Enumeration"); + } + + if (data == nullptr || data_size == 0) { + throw EnumerationException("No attribute value data supplied."); + } + + if (var_size() && (offsets == nullptr || offsets_size == 0)) { + throw EnumerationException( + "Variable length datatype defined but offsets are not present"); + } else if (!var_size() && (offsets != nullptr || offsets_size > 0)) { + throw EnumerationException( + "Fixed length datatype defined but offsets are present"); + } + + if (var_size()) { + if (offsets_size % sizeof(uint64_t) != 0) { + throw EnumerationException( + "Invalid offsets size is not a multiple of sizeof(uint64_t)"); + } + auto offset_values = static_cast(offsets); + uint64_t last_offset = (offsets_size / sizeof(uint64_t)) - 1; + if (offset_values[last_offset] > data_size) { + throw EnumerationException( + "Provided data buffer size is too small for the provided offsets."); + } + } else { + if (data_size % cell_size() != 0) { + throw EnumerationException( + "Invalid data size is not a multiple of the cell size."); + } + } + + throw_if_not_ok(data_.write(data, 0, data_size)); + + if (offsets_size > 0) { + throw_if_not_ok(offsets_.write(offsets, 0, offsets_size)); + } + + generate_value_map(); +} + +shared_ptr Enumeration::deserialize(Deserializer& deserializer) { + auto disk_version = deserializer.read(); + if (disk_version < constants::enumerations_min_version) { + throw EnumerationException( + "Invalid Enumeration version '" + std::to_string(disk_version) + + "' is older than minimum version for enumerations '" + + std::to_string(constants::enumerations_min_version) + "'"); + } + + auto type = deserializer.read(); + auto cell_val_num = deserializer.read(); + auto ordered = deserializer.read(); + auto data_size = deserializer.read(); + + uint8_t data[data_size]; + deserializer.read(data, data_size); + + uint64_t offsets_size = 0; + + if (cell_val_num == constants::var_num) { + offsets_size = deserializer.read(); + } + + uint8_t offsets[offsets_size]; + deserializer.read(offsets, offsets_size); + + return Enumeration::create( + static_cast(type), + cell_val_num, + ordered, + data, + data_size, + offsets, + offsets_size); +} + +void Enumeration::serialize(Serializer& serializer, uint32_t version) const { + if (version < constants::enumerations_min_version) { + throw EnumerationException("Invalid version when serializing enumeration."); + } + + serializer.write(version); + + serializer.write(static_cast(type_)); + serializer.write(cell_val_num_); + serializer.write(ordered_); + serializer.write(data_.size()); + serializer.write(data_.data(), data_.size()); + + if (var_size()) { + serializer.write(offsets_.size()); + serializer.write(offsets_.data(), offsets_.size()); + } else { + assert(cell_val_num_ < constants::var_num); + assert(offsets_.size() == 0); + } +} + +void Enumeration::set_name(const std::string& name) { + if (name.empty()) { + throw EnumerationException("Enumeration name must not be empty"); + } + + if (name.find("/") != std::string::npos) { + throw EnumerationException( + "Enumeration name must not contain path separators"); + } + + name_ = name; +} + +uint64_t Enumeration::index_of(UntypedDatumView value) const { + std::string_view value_view( + static_cast(value.content()), value.size()); + + auto iter = value_map_.find(value_view); + if (iter == value_map_.end()) { + return std::numeric_limits::max(); + } + + return iter->second; +} + +void Enumeration::generate_value_map() { + auto char_data = data_.data_as(); + if (var_size()) { + auto offsets = offsets_.data_as(); + uint64_t num_offsets = offsets_.size() / sizeof(uint64_t); + + for (uint64_t i = 0; i < num_offsets; i++) { + uint64_t length = 0; + if (i < num_offsets - 1) { + length = offsets[i + 1] - offsets[i]; + } else { + length = data_.size() - offsets[i]; + } + + auto sv = std::string_view(char_data + offsets[i], length); + add_value_to_map(sv, i); + } + } else { + uint64_t i = 0; + auto stride = cell_size(); + while (i * stride < data_.size()) { + auto sv = std::string_view(char_data + i * stride, stride); + add_value_to_map(sv, i); + i += 1; + } + } +} + +void Enumeration::add_value_to_map(std::string_view& sv, uint64_t index) { + if (value_map_.find(sv) != value_map_.end()) { + throw EnumerationException( + "Invalid duplicated value in enumeration '" + std::string(sv) + "'"); + } + value_map_[sv] = index; +} + +} // namespace tiledb::sm diff --git a/tiledb/sm/array_schema/enumeration.h b/tiledb/sm/array_schema/enumeration.h new file mode 100644 index 000000000000..b158c06b1e24 --- /dev/null +++ b/tiledb/sm/array_schema/enumeration.h @@ -0,0 +1,229 @@ +/** + * @file enumeration.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2023 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file defines class Enumeration. + */ + +#ifndef TILEDB_ENUMERATION_H +#define TILEDB_ENUMERATION_H + +#include + +#include "tiledb/common/common.h" +#include "tiledb/common/types/untyped_datum.h" +#include "tiledb/sm/buffer/buffer.h" +#include "tiledb/sm/enums/datatype.h" +#include "tiledb/sm/filesystem/uri.h" +#include "tiledb/storage_format/serialization/serializers.h" + +namespace tiledb::sm { + +/** Defines an array enumeration */ +class Enumeration { + public: + /* ********************************* */ + /* CONSTRUCTORS & DESTRUCTORS */ + /* ********************************* */ + + /** No default constructor. Use the create factory method. */ + Enumeration() = delete; + + DISABLE_COPY(Enumeration); + DISABLE_MOVE(Enumeration); + + /** Destructor. */ + ~Enumeration() = default; + + /* ********************************* */ + /* OPERATORS */ + /* ********************************* */ + + DISABLE_COPY_ASSIGN(Enumeration); + DISABLE_MOVE_ASSIGN(Enumeration); + + /* ********************************* */ + /* API */ + /* ********************************* */ + + /** Create a new Enumeration */ + shared_ptr static create( + Datatype type, + uint32_t cell_val_num, + bool ordered, + const void* data, + uint64_t data_size, + const void* offsets, + uint64_t offsets_size) { + struct EnableMakeShared : public Enumeration { + EnableMakeShared( + Datatype type, + uint32_t cell_val_num, + bool ordered, + const void* data, + uint64_t data_size, + const void* offsets, + uint64_t offsets_size) + : Enumeration( + type, + cell_val_num, + ordered, + data, + data_size, + offsets, + offsets_size) { + } + }; + return make_shared( + HERE(), + type, + cell_val_num, + ordered, + data, + data_size, + offsets, + offsets_size); + } + + /** + * Deserialize an enumeration + * + * @param deserializer The deserializer to deserialize from. + * @return A new Enumeration + */ + static shared_ptr deserialize(Deserializer& deserializer); + + /** + * Serializes the enumeration into a buffer. + * + * @param serializer The object the array schema is serialized into. + */ + void serialize(Serializer& serializer, uint32_t version) const; + + // TODO: Add proxy calls to various Buffer::as_value/cur_data_as functions + // or just return buffers instead? + + /** + * Name stores the `__t1_t2_uuid_version` formatted string that is used + * when constructing URIs for Enumeration storage. + * + * @return The name of this Enumeration + */ + const std::string& name() const { + return name_; + } + + void set_name(const std::string& name); + + Datatype type() const { + return type_; + } + + uint32_t cell_val_num() const { + return cell_val_num_; + } + + uint64_t cell_size() const { + if (var_size()) { + return constants::var_size; + } + + return cell_val_num_ * datatype_size(type_); + } + + bool var_size() const { + return cell_val_num_ == constants::var_num; + } + + bool ordered() const { + return ordered_; + } + + tuple data() const { + return {data_.data(), data_.size()}; + } + + tuple offsets() const { + return {offsets_.data(), offsets_.size()}; + } + + uint64_t index_of(UntypedDatumView data) const; + + private: + /* ********************************* */ + /* PRIVATE CONSTRUCTORS */ + /* ********************************* */ + + Enumeration( + Datatype type, + uint32_t cell_val_num, + bool ordered, + const void* data, + uint64_t data_size, + const void* offsets, + uint64_t offsets_size); + + /* ********************************* */ + /* PRIVATE ATTRIBUTES */ + /* ********************************* */ + + /** The name of this Enumeration stored in the enumerations directory. */ + std::string name_; + + /** The type of enumerated values */ + Datatype type_; + + /** Number of values per enumeration value */ + uint32_t cell_val_num_; + + /** A flag which enables or disables inequality comparisons */ + bool ordered_; + + /** The list of enumeration values */ + Buffer data_; + + /** The offsets of each enumeration value if var sized. */ + Buffer offsets_; + + /** Map of values to indices */ + std::unordered_map value_map_; + + /* ********************************* */ + /* PRIVATE METHODS */ + /* ********************************* */ + + /** Populate the value_map_ */ + void generate_value_map(); + + /** Add a value to value_map_ */ + void add_value_to_map(std::string_view& sv, uint64_t index); +}; + +} // namespace tiledb::sm + +#endif // TILEDB_DOMAIN_H diff --git a/tiledb/sm/array_schema/test/compile_enumeration_main.cc b/tiledb/sm/array_schema/test/compile_enumeration_main.cc new file mode 100644 index 000000000000..dc407325d5b5 --- /dev/null +++ b/tiledb/sm/array_schema/test/compile_enumeration_main.cc @@ -0,0 +1,36 @@ +/** + * @file compile_enumeration_main.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2023 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "tiledb/sm/array_schema/enumeration.h" +#include "tiledb/sm/enums/datatype.h" + +int main(int, char*[]) { + tiledb::sm::Enumeration::create( + tiledb::sm::Datatype::INT32, 1, false, nullptr, 0, nullptr, 0); + return 0; +} diff --git a/tiledb/sm/c_api/tiledb.cc b/tiledb/sm/c_api/tiledb.cc index ed9eaf036925..140f1f17026c 100644 --- a/tiledb/sm/c_api/tiledb.cc +++ b/tiledb/sm/c_api/tiledb.cc @@ -40,6 +40,7 @@ #include "tiledb/api/c_api/buffer_list/buffer_list_api_internal.h" #include "tiledb/api/c_api/config/config_api_internal.h" #include "tiledb/api/c_api/dimension/dimension_api_internal.h" +#include "tiledb/api/c_api/enumeration/enumeration_api_internal.h" #include "tiledb/api/c_api/error/error_api_internal.h" #include "tiledb/api/c_api/filter_list/filter_list_api_internal.h" #include "tiledb/api/c_api/string/string_api_internal.h" @@ -710,6 +711,25 @@ int32_t tiledb_array_schema_timestamp_range( return TILEDB_OK; } +TILEDB_EXPORT int32_t tiledb_array_schema_add_attribute_with_enumeration( + tiledb_ctx_t* ctx, + tiledb_array_schema_t* array_schema, + tiledb_attribute_t* attr, + tiledb_enumeration_t* enumeration) { + if (sanity_check(ctx, array_schema) == TILEDB_ERR || + sanity_check(ctx, attr) == TILEDB_ERR) { + return TILEDB_ERR; + } + + api::ensure_enumeration_is_valid(enumeration); + + array_schema->array_schema_->add_attribute( + make_shared(HERE(), attr->attr_), + enumeration->copy()); + + return TILEDB_OK; +} + int32_t tiledb_array_schema_set_coords_filter_list( tiledb_ctx_t* ctx, tiledb_array_schema_t* array_schema, @@ -1255,12 +1275,8 @@ int32_t tiledb_array_schema_evolution_add_attribute( sanity_check(ctx, attr) == TILEDB_ERR) return TILEDB_ERR; - throw_if_not_ok( - array_schema_evolution->array_schema_evolution_->add_attribute( - attr->attr_)); - return TILEDB_OK; + array_schema_evolution->array_schema_evolution_->add_attribute(attr->attr_); - // Success return TILEDB_OK; } @@ -1272,9 +1288,8 @@ int32_t tiledb_array_schema_evolution_drop_attribute( sanity_check(ctx, array_schema_evolution) == TILEDB_ERR) return TILEDB_ERR; - throw_if_not_ok( - array_schema_evolution->array_schema_evolution_->drop_attribute( - attribute_name)); + array_schema_evolution->array_schema_evolution_->drop_attribute( + attribute_name); return TILEDB_OK; } @@ -1287,12 +1302,8 @@ TILEDB_EXPORT int32_t tiledb_array_schema_evolution_set_timestamp_range( sanity_check(ctx, array_schema_evolution) == TILEDB_ERR) return TILEDB_ERR; - throw_if_not_ok( - array_schema_evolution->array_schema_evolution_->set_timestamp_range( - {lo, hi})); - return TILEDB_OK; - - // Success + array_schema_evolution->array_schema_evolution_->set_timestamp_range( + {lo, hi}); return TILEDB_OK; } @@ -3396,6 +3407,26 @@ int32_t tiledb_array_evolve( return TILEDB_OK; } +int32_t tiledb_array_get_enumeration( + tiledb_ctx_t* ctx, + const tiledb_array_t* array, + const char* attr_name, + tiledb_enumeration_t** enumeration) { + if (sanity_check(ctx, array) == TILEDB_ERR) { + return TILEDB_ERR; + } + + if (attr_name == nullptr) { + throw api::CAPIStatusException("'attr_name' must not be null"); + } + + api::ensure_output_pointer_is_valid(enumeration); + auto ptr = array->array_->get_enumeration(attr_name); + *enumeration = tiledb_enumeration_handle_t::make_handle(ptr); + + return TILEDB_OK; +} + int32_t tiledb_array_upgrade_version( tiledb_ctx_t* ctx, const char* array_uri, tiledb_config_t* config) { // Sanity Checks @@ -5353,6 +5384,17 @@ int32_t tiledb_consolidation_plan_free_json_str(char** out) { return TILEDB_OK; } +int32_t tiledb_attribute_has_enumeration( + tiledb_attribute_t* attr, int* has_enumeration) { + if (attr->attr_->has_enumeration()) { + *has_enumeration = 1; + } else { + *has_enumeration = 0; + } + + return TILEDB_OK; +} + } // namespace tiledb::api /* ****************************** */ @@ -5604,6 +5646,14 @@ int32_t tiledb_attribute_get_fill_value_nullable( ctx, attr, value, size, valid); } +int32_t tiledb_attribute_has_enumeration( + tiledb_ctx_t* ctx, + tiledb_attribute_t* attr, + int* has_enumeration) noexcept { + return api_entry_context( + ctx, attr, has_enumeration); +} + /* ********************************* */ /* DOMAIN */ /* ********************************* */ @@ -5757,6 +5807,16 @@ int32_t tiledb_array_schema_timestamp_range( ctx, array_schema, lo, hi); } +int32_t tiledb_array_schema_add_attribute_with_enumeration( + tiledb_ctx_t* ctx, + tiledb_array_schema_t* array_schema, + tiledb_attribute_t* attr, + tiledb_enumeration_t* enumeration) noexcept { + return api_entry< + tiledb::api::tiledb_array_schema_add_attribute_with_enumeration>( + ctx, array_schema, attr, enumeration); +} + int32_t tiledb_array_schema_set_coords_filter_list( tiledb_ctx_t* ctx, tiledb_array_schema_t* array_schema, @@ -6933,6 +6993,15 @@ int32_t tiledb_array_evolve( ctx, array_uri, array_schema_evolution); } +int32_t tiledb_array_get_enumeration( + tiledb_ctx_t* ctx, + const tiledb_array_t* array, + const char* attr_name, + tiledb_enumeration_t** enumeration) noexcept { + return api_entry( + ctx, array, attr_name, enumeration); +} + int32_t tiledb_array_upgrade_version( tiledb_ctx_t* ctx, const char* array_uri, diff --git a/tiledb/sm/c_api/tiledb_experimental.h b/tiledb/sm/c_api/tiledb_experimental.h index f17d5eac0474..809c3468eb2f 100644 --- a/tiledb/sm/c_api/tiledb_experimental.h +++ b/tiledb/sm/c_api/tiledb_experimental.h @@ -41,6 +41,7 @@ /* * API sections */ +#include "tiledb/api/c_api/enumeration/enumeration_api_experimental.h" #include "tiledb/api/c_api/group/group_api_external_experimental.h" #include "tiledb/api/c_api/query_plan/query_plan_api_external_experimental.h" #include "tiledb_dimension_label_experimental.h" @@ -190,6 +191,64 @@ TILEDB_EXPORT int32_t tiledb_array_schema_timestamp_range( uint64_t* lo, uint64_t* hi) TILEDB_NOEXCEPT; +/** + * Adds an attribute with enumeration to an array schema. + * + * **Example:** + * + * @code{.c} + * tiledb_attribute_t* attr; + * tiledb_attribute_alloc(ctx, "my_attr", TILEDB_INT32, &attr); + * tiledb_enumeration_t* enumeration; + * tiledb_dimension_alloc( + * ctx, + * TILEDB_INT64, + * 1, + * FALSE, + * data, + * data_size, + * nullptr, + * 0, + * &enumeration); + * tiledb_array_schema_add_attribute_with_enumeration( + * ctx, array_schema, attr, enumeration); + * @endcode + * + * @param ctx The TileDB context. + * @param array_schema The array schema. + * @param attr The attribute to be added. + * @param enumeration The enumeration to add with the attribute + * @return `TILEDB_OK` for success and `TILEDB_ERR` for error. + */ +TILEDB_EXPORT int32_t tiledb_array_schema_add_attribute_with_enumeration( + tiledb_ctx_t* ctx, + tiledb_array_schema_t* array_schema, + tiledb_attribute_t* attr, + tiledb_enumeration_t* enumeration) TILEDB_NOEXCEPT; + +/* ********************************* */ +/* ATTRIBUTE ENUMERATIONS */ +/* ********************************* */ + +/** + * Check if the given attribute has an enumeration + * + * **Example:** + * + * @code{.c} + * tiledb_attribute_get_enumeration(ctx, attr, &enumeration); + * @endcode + * + * @param ctx The TileDB context. + * @param attr The target attribute. + * @param enumeration A bool indicating if the attribute has an enumeration + * @return `TILEDB_OK` for success and `TILEDB_ERR` for error. + */ +TILEDB_EXPORT int32_t tiledb_attribute_has_enumeration( + tiledb_ctx_t* ctx, + tiledb_attribute_t* attr, + int* has_enumeration) TILEDB_NOEXCEPT; + /* ********************************* */ /* ARRAY */ /* ********************************* */ @@ -249,6 +308,33 @@ TILEDB_EXPORT int32_t tiledb_array_evolve( const char* array_uri, tiledb_array_schema_evolution_t* array_schema_evolution) TILEDB_NOEXCEPT; +/** + * Retrieves an attribute's enumeration given the attribute name (key). + * + * **Example:** + * + * The following retrieves the first attribute in the schema. + * + * @code{.c} + * tiledb_attribute_t* attr; + * tiledb_array_schema_get_enumeration( + * ctx, array_schema, "attr_0", &enumeration); + * // Make sure to delete the retrieved attribute in the end. + * @endcode + * + * @param ctx The TileDB context. + * @param array_schema The array schema. + * @param name The name (key) of the attribute from which to + * retrieve the enumeration. + * @param enumeration The enumeration object to retrieve. + * @return `TILEDB_OK` for success and `TILEDB_ERR` for error. + */ +TILEDB_EXPORT int32_t tiledb_array_get_enumeration( + tiledb_ctx_t* ctx, + const tiledb_array_t* array, + const char* name, + tiledb_enumeration_t** enumeration) TILEDB_NOEXCEPT; + /** * Upgrades an array to the latest format version. * diff --git a/tiledb/sm/cpp_api/array_experimental.h b/tiledb/sm/cpp_api/array_experimental.h new file mode 100644 index 000000000000..9a7edb9cc645 --- /dev/null +++ b/tiledb/sm/cpp_api/array_experimental.h @@ -0,0 +1,56 @@ +/** + * @file array_experimental.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2023 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file declares the experimental C++ API for arrays. + */ + +#ifndef TILEDB_CPP_API_ARRAY_EXPERIMENTAL_H +#define TILEDB_CPP_API_ARRAY_EXPERIMENTAL_H + +#include "array.h" +#include "enumeration_experimental.h" +#include "tiledb_experimental.h" + +#include + +namespace tiledb { +class ArrayExperimental { + public: + static Enumeration get_enumeration( + const Context& ctx, const Array& array, const std::string& attr_name) { + tiledb_enumeration_t* enmr; + ctx.handle_error(tiledb_array_get_enumeration( + ctx.ptr().get(), array.ptr().get(), attr_name.c_str(), &enmr)); + return Enumeration(ctx, enmr); + } +}; + +} // namespace tiledb + +#endif diff --git a/tiledb/sm/cpp_api/array_schema_experimental.h b/tiledb/sm/cpp_api/array_schema_experimental.h index e712860126a6..5150ec83b74a 100644 --- a/tiledb/sm/cpp_api/array_schema_experimental.h +++ b/tiledb/sm/cpp_api/array_schema_experimental.h @@ -35,6 +35,7 @@ #include "array_schema.h" #include "dimension_label_experimental.h" +#include "enumeration_experimental.h" #include "filter_list.h" #include "tiledb_experimental.h" @@ -154,6 +155,26 @@ class ArraySchemaExperimental { ctx.ptr().get(), array_schema.ptr().get(), name.c_str(), &dim_label)); return DimensionLabel(ctx, dim_label); } + + /** + * Add an attribute with an enumeration to the array schema. + * + * @param ctx TileDB context + * @param array_schema Target array schema + * @param attr The attribute to add + * @param enumeration The enumeration to add + */ + static void add_attribute( + const Context& ctx, + const ArraySchema& array_schema, + const Attribute& attr, + const Enumeration& enmr) { + ctx.handle_error(tiledb_array_schema_add_attribute_with_enumeration( + ctx.ptr().get(), + array_schema.ptr().get(), + attr.ptr().get(), + enmr.ptr().get())); + } }; } // namespace tiledb diff --git a/tiledb/sm/cpp_api/attribute_experimental.h b/tiledb/sm/cpp_api/attribute_experimental.h new file mode 100644 index 000000000000..896cb5055ba7 --- /dev/null +++ b/tiledb/sm/cpp_api/attribute_experimental.h @@ -0,0 +1,67 @@ +/** + * @file attribute_experimental.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2023 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file declares the experimental C++ API for attributes. + */ + +#ifndef TILEDB_CPP_API_ATTRIBUTE_EXPERIMENTAL_H +#define TILEDB_CPP_API_ATTRIBUTE_EXPERIMENTAL_H + +#include "attribute.h" +#include "context.h" +#include "enumeration_experimental.h" + +#include + +namespace tiledb { +class AttributeExperimental { + public: + /** + * Return whether an attribute has an enumeration + * + * @param ctx TileDB context. + * @param attribute Target attribute. + * @return A bool indicating whether the attribute has an enumeration + */ + static bool has_enumeration(const Context& ctx, Attribute& attribute) { + int has_enum; + ctx.handle_error(tiledb_attribute_has_enumeration( + ctx.ptr().get(), attribute.ptr().get(), &has_enum)); + + if (has_enum) { + return true; + } else { + return false; + } + } +}; + +} // namespace tiledb + +#endif diff --git a/tiledb/sm/cpp_api/deleter.h b/tiledb/sm/cpp_api/deleter.h index c72d8ef71a1f..61f9a5d2c92f 100644 --- a/tiledb/sm/cpp_api/deleter.h +++ b/tiledb/sm/cpp_api/deleter.h @@ -111,6 +111,10 @@ class Deleter { tiledb_domain_free(&p); } + void operator()(tiledb_enumeration_t* p) const { + tiledb_enumeration_free(&p); + } + void operator()(tiledb_vfs_t* p) const { tiledb_vfs_free(&p); } diff --git a/tiledb/sm/cpp_api/enumeration_experimental.h b/tiledb/sm/cpp_api/enumeration_experimental.h new file mode 100644 index 000000000000..daee25339d42 --- /dev/null +++ b/tiledb/sm/cpp_api/enumeration_experimental.h @@ -0,0 +1,257 @@ +/** + * @file enumeration_experimental.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2023 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file declares the C++ API for the TileDB Enumeration class. + */ + +#ifndef TILEDB_CPP_API_ENUMERATION_EXPERIMENTAL_H +#define TILEDB_CPP_API_ENUMERATION_EXPERIMENTAL_H + +#include "context.h" +#include "tiledb.h" +#include "tiledb_experimental.h" + +namespace tiledb { + +class Enumeration { + public: + Enumeration(const Context& ctx, tiledb_enumeration_t* enumeration) + : ctx_(ctx) { + enumeration_ = std::shared_ptr(enumeration, deleter_); + } + + Enumeration(const Enumeration&) = default; + Enumeration(Enumeration&&) = default; + Enumeration& operator=(const Enumeration&) = default; + Enumeration& operator=(Enumeration&&) = default; + + /** Destructor. */ + ~Enumeration() = default; + + /* ********************************* */ + /* API */ + /* ********************************* */ + + /** Returns the C TileDB context object. */ + std::shared_ptr ptr() const { + return enumeration_; + } + + /** + * Returns the type of the enumeration values + */ + tiledb_datatype_t type() const { + tiledb_datatype_t ret; + tiledb_ctx_t* c_ctx = ctx_.get().ptr().get(); + ctx_.get().handle_error( + tiledb_enumeration_get_type(c_ctx, enumeration_.get(), &ret)); + return ret; + } + + /** + * Return the cell_val_num of the enumeration values + */ + uint32_t cell_val_num() const { + uint32_t ret; + tiledb_ctx_t* c_ctx = ctx_.get().ptr().get(); + ctx_.get().handle_error( + tiledb_enumeration_get_cell_val_num(c_ctx, enumeration_.get(), &ret)); + return ret; + } + + /** + * Return whether this enumeration is considered ordered. + */ + bool ordered() const { + int is_ordered; + tiledb_ctx_t* c_ctx = ctx_.get().ptr().get(); + ctx_.get().handle_error( + tiledb_enumeration_get_ordered(c_ctx, enumeration_.get(), &is_ordered)); + return is_ordered ? true : false; + } + + template * = nullptr> + void into_vector(std::vector& vec) { + tiledb_ctx_t* c_ctx = ctx_.get().ptr().get(); + + const void* data; + uint64_t data_size; + ctx_.get().handle_error(tiledb_enumeration_get_data( + c_ctx, enumeration_.get(), &data, &data_size)); + + const T* elems = static_cast(data); + size_t count = data_size / sizeof(T); + + vec.clear(); + vec.reserve(count); + for (size_t i = 0; i < count; i++) { + vec.push_back(elems[i]); + } + } + + template * = nullptr> + void into_vector(std::vector>& vec) { + tiledb_ctx_t* c_ctx = ctx_.get().ptr().get(); + + const void* data; + uint64_t data_size; + ctx_.get().handle_error(tiledb_enumeration_get_data( + c_ctx, enumeration_.get(), &data, &data_size)); + + const void* offsets; + uint64_t offsets_size; + ctx_.get().handle_error(tiledb_enumeration_get_offsets( + c_ctx, enumeration_.get(), &offsets, &offsets_size)); + + const T* str_data = static_cast(data); + const uint64_t* elems = static_cast(offsets); + size_t count = offsets_size / sizeof(uint64_t); + + vec.clear(); + vec.reserve(count); + for (size_t i = 0; i < count; i++) { + uint64_t len; + if (i + 1 < count) { + len = elems[i + 1] - elems[i]; + } else { + len = data_size - elems[i]; + } + vec.emplace_back(str_data + elems[i], len); + } + } + + /* ********************************* */ + /* STATIC FUNCTIONS */ + /* ********************************* */ + + template * = nullptr> + static Enumeration create( + const Context& ctx, + std::vector& values, + bool ordered = false, + tiledb_datatype_t type = static_cast(255)) { + using DataT = impl::TypeHandler; + + if (type == static_cast(255)) { + type = DataT::tiledb_type; + } + + return create( + ctx, + type, + DataT::tiledb_num, + ordered, + values.data(), + values.size() * sizeof(T), + nullptr, + 0); + } + + template * = nullptr> + static Enumeration create( + const Context& ctx, + std::vector>& values, + bool ordered = false, + tiledb_datatype_t type = static_cast(255)) { + using DataT = impl::TypeHandler; + static_assert( + DataT::tiledb_num == 1, "Enumeration string values cannot be compound"); + + if (type == static_cast(255)) { + type = DataT::tiledb_type; + } + + uint64_t total_size = 0; + for (auto v : values) { + total_size += v.size() * sizeof(T); + } + + uint8_t data[total_size]; + std::vector offsets; + offsets.reserve(values.size()); + uint64_t curr_offset = 0; + + for (auto v : values) { + std::memcpy(data + curr_offset, v.data(), v.size() * sizeof(T)); + offsets.push_back(curr_offset); + curr_offset += v.size() * sizeof(T); + } + + return create( + ctx, + type, + std::numeric_limits::max(), + ordered, + data, + total_size, + offsets.data(), + offsets.size() * sizeof(uint64_t)); + } + + static Enumeration create( + const Context& ctx, + tiledb_datatype_t type, + uint32_t cell_val_num, + bool ordered, + const void* data, + uint64_t data_size, + const void* offsets, + uint64_t offsets_size) { + tiledb_enumeration_t* enumeration; + ctx.handle_error(tiledb_enumeration_alloc( + ctx.ptr().get(), + type, + cell_val_num, + ordered, + data, + data_size, + offsets, + offsets_size, + &enumeration)); + return Enumeration(ctx, enumeration); + } + + private: + /* ********************************* */ + /* PRIVATE ATTRIBUTES */ + /* ********************************* */ + + /** The TileDB context. */ + std::reference_wrapper ctx_; + + /** Deleter wrapper. */ + impl::Deleter deleter_; + + /** Pointer to the TileDB C Enumeration object. */ + std::shared_ptr enumeration_; +}; + +} // namespace tiledb + +#endif // TILEDB_CPP_API_ENUMERATION_EXPERIMENTAL_H diff --git a/tiledb/sm/cpp_api/tiledb_experimental b/tiledb/sm/cpp_api/tiledb_experimental index df23d84ae518..0e213c4f0ec1 100644 --- a/tiledb/sm/cpp_api/tiledb_experimental +++ b/tiledb/sm/cpp_api/tiledb_experimental @@ -33,9 +33,12 @@ #ifndef TILEDB_EXPERIMENTAL_CPP_H #define TILEDB_EXPERIMENTAL_CPP_H +#include "array_experimental.h" #include "array_schema_evolution.h" #include "array_schema_experimental.h" +#include "attribute_experimental.h" #include "dimension_label_experimental.h" +#include "enumeration_experimental.h" #include "consolidation_plan_experimental.h" #include "group_experimental.h" #include "query_experimental.h" diff --git a/tiledb/sm/enums/layout.h b/tiledb/sm/enums/layout.h index 796519184a22..076b6625fde2 100644 --- a/tiledb/sm/enums/layout.h +++ b/tiledb/sm/enums/layout.h @@ -91,14 +91,14 @@ inline Status layout_enum(const std::string& layout_str, Layout* layout) { inline void ensure_tile_order_is_valid(uint8_t layout_enum) { if (layout_enum != 0 && layout_enum != 1) throw std::runtime_error( - "[Tile order] Invalid Layout enum " + std::to_string(layout_enum)); + "[Tile order] Invalid Tile Layout enum " + std::to_string(layout_enum)); } /* Throws error if cell order's enumeration is greater than 4. */ inline void ensure_cell_order_is_valid(uint8_t layout_enum) { if (layout_enum > 4) throw std::runtime_error( - "[Cell order] Invalid Layout enum " + std::to_string(layout_enum)); + "[Cell order] Invalid Cell Layout enum " + std::to_string(layout_enum)); } } // namespace sm diff --git a/tiledb/sm/misc/constants.cc b/tiledb/sm/misc/constants.cc index 17e95b511ee9..37f90fa0c904 100644 --- a/tiledb/sm/misc/constants.cc +++ b/tiledb/sm/misc/constants.cc @@ -93,6 +93,9 @@ const std::string fragment_metadata_filename = "__fragment_metadata.tdb"; /** The array dimension labels directory name. */ const std::string array_dimension_labels_dir_name = "__labels"; +/** The array enumerations directory name. */ +const std::string array_enumerations_dir_name = "__enumerations"; + /** The default tile capacity. */ const uint64_t capacity = 10000; @@ -662,6 +665,9 @@ const format_version_t deletes_min_version = 16; /** The lowest version supported for updates. */ const format_version_t updates_min_version = 16; +/** THe lowest version supported for enumerations. */ +const format_version_t enumerations_min_version = 19; + /** The maximum size of a tile chunk (unit of compression) in bytes. */ const uint64_t max_tile_chunk_size = 64 * 1024; @@ -813,6 +819,8 @@ const void* fill_value(Datatype type) { } const std::string config_delimiter = ","; + +const storage_size_t unhyphenated_uuid_size = 32; } // namespace constants } // namespace sm diff --git a/tiledb/sm/misc/constants.h b/tiledb/sm/misc/constants.h index d2796f86fe58..92ef73646faa 100644 --- a/tiledb/sm/misc/constants.h +++ b/tiledb/sm/misc/constants.h @@ -82,6 +82,9 @@ extern const std::string array_commits_dir_name; /** The array dimension labels directory name. */ extern const std::string array_dimension_labels_dir_name; +/** The array enumerations directory name. */ +extern const std::string array_enumerations_dir_name; + /** The default tile capacity. */ extern const uint64_t capacity; @@ -638,6 +641,9 @@ extern const format_version_t deletes_min_version; /** The lowest version supported for updates. */ extern const format_version_t updates_min_version; +/** The lowest version supported for enumerations. */ +extern const format_version_t enumerations_min_version; + /** The maximum size of a tile chunk (unit of compression) in bytes. */ extern const uint64_t max_tile_chunk_size; @@ -722,6 +728,9 @@ extern const uint64_t s3_min_multipart_part_size; */ extern const std::string s3_multipart_buffering_dirname; +/** The length in bytes of an unhyphenated UUID. */ +extern const storage_size_t unhyphenated_uuid_size; + } // namespace constants } // namespace sm diff --git a/tiledb/sm/query/ast/CMakeLists.txt b/tiledb/sm/query/ast/CMakeLists.txt index 4568075fbeb7..c46c57fd7244 100644 --- a/tiledb/sm/query/ast/CMakeLists.txt +++ b/tiledb/sm/query/ast/CMakeLists.txt @@ -39,7 +39,7 @@ list(APPEND SOURCES # commence(object_library query_ast) this_target_sources(${SOURCES}) - this_target_object_libraries(array_schema baseline) + this_target_object_libraries(array_schema generic_tile_io baseline) conclude(object_library) add_test_subdirectory() diff --git a/tiledb/sm/query/ast/query_ast.cc b/tiledb/sm/query/ast/query_ast.cc index ddec36e555ea..454e68cc4efc 100644 --- a/tiledb/sm/query/ast/query_ast.cc +++ b/tiledb/sm/query/ast/query_ast.cc @@ -31,6 +31,8 @@ */ #include "query_ast.h" +#include "tiledb/common/unreachable.h" +#include "tiledb/sm/array_schema/enumeration.h" using namespace tiledb::common; @@ -60,10 +62,77 @@ void ASTNodeVal::get_field_names( field_name_set.insert(field_name_); } +void ASTNodeVal::get_enumeration_field_names( + std::unordered_set& field_name_set) const { + if (use_enumeration_) { + field_name_set.insert(field_name_); + } +} + bool ASTNodeVal::is_backwards_compatible() const { return true; } +void ASTNodeVal::rewrite_enumeration_conditions( + const ArraySchema& array_schema) { + if (!use_enumeration_) { + return; + } + + if (!array_schema.is_attr(field_name_)) { + return; + } + + auto attr = array_schema.attribute(field_name_); + if (!attr->has_enumeration()) { + return; + } + + auto enumeration = array_schema.enumeration(attr->get_enumeration_name()); + if (!enumeration) { + throw std::logic_error( + "Missing requried enumeration for field '" + field_name_ + "'"); + } + + if (!enumeration->ordered() && + (op_ != QueryConditionOp::EQ && op_ != QueryConditionOp::NE)) { + throw std::logic_error( + "Cannot apply an inequality operator against an unordered Enumeration"); + } + + auto idx = enumeration->index_of(condition_value_view_); + auto dt_size = datatype_size(attr->type()); + + if (dt_size != 1 && dt_size != 2 && dt_size != 4 && dt_size != 8) { + throw std::runtime_error("Invalid data type size for enumeration index."); + } + + // At this point we're mostly guaranteed to succeed other than maybe an + // error allocating a very small buffer in which case we're up a creek + // regardless. However, we don't want to re-apply this enumeration + // translation so we mark it as done. + use_enumeration_ = false; + + condition_value_data_ = ByteVecValue(dt_size); + condition_value_view_ = + UntypedDatumView(condition_value_data_.data(), dt_size); + + if (dt_size == 1) { + uint8_t idx_val = static_cast(idx); + memcpy(condition_value_data_.data(), &idx_val, dt_size); + } else if (dt_size == 2) { + uint16_t idx_val = static_cast(idx); + memcpy(condition_value_data_.data(), &idx_val, dt_size); + } else if (dt_size == 4) { + uint32_t idx_val = static_cast(idx); + memcpy(condition_value_data_.data(), &idx_val, dt_size); + } else if (dt_size == 8) { + memcpy(condition_value_data_.data(), &idx, dt_size); + } else { + stdx::unreachable(); + } +} + Status ASTNodeVal::check_node_validity(const ArraySchema& array_schema) const { const uint64_t condition_value_size = condition_value_data_.size(); @@ -186,6 +255,14 @@ const QueryConditionCombinationOp& ASTNodeVal::get_combination_op() const { "value node."); } +bool ASTNodeVal::use_enumeration() const { + return use_enumeration_; +} + +void ASTNodeVal::set_use_enumeration(bool use_enumeration) { + use_enumeration_ = use_enumeration; +} + bool ASTNodeExpr::is_expr() const { return true; } @@ -205,6 +282,13 @@ void ASTNodeExpr::get_field_names( } } +void ASTNodeExpr::get_enumeration_field_names( + std::unordered_set& field_name_set) const { + for (const auto& child : nodes_) { + child->get_enumeration_field_names(field_name_set); + } +} + bool ASTNodeExpr::is_backwards_compatible() const { if (combination_op_ != QueryConditionCombinationOp::AND) { return false; @@ -217,6 +301,13 @@ bool ASTNodeExpr::is_backwards_compatible() const { return true; } +void ASTNodeExpr::rewrite_enumeration_conditions( + const ArraySchema& array_schema) { + for (auto& child : nodes_) { + child->rewrite_enumeration_conditions(array_schema); + } +} + Status ASTNodeExpr::check_node_validity(const ArraySchema& array_schema) const { // If the node is a compound expression node, ensure there are at least // two children in the node and then run a check on each child node. @@ -275,9 +366,22 @@ const QueryConditionOp& ASTNodeExpr::get_op() const { const std::vector>& ASTNodeExpr::get_children() const { return nodes_; } + const QueryConditionCombinationOp& ASTNodeExpr::get_combination_op() const { return combination_op_; } +bool ASTNodeExpr::use_enumeration() const { + throw std::runtime_error( + "ASTNodeExpr::use_enumeration: Cannot get enumeration status from " + "an AST expression node."); +} + +void ASTNodeExpr::set_use_enumeration(bool use_enumeration) { + for (auto& child : nodes_) { + child->set_use_enumeration(use_enumeration); + } +} + } // namespace sm } // namespace tiledb diff --git a/tiledb/sm/query/ast/query_ast.h b/tiledb/sm/query/ast/query_ast.h index 30a08884d905..a1901207379e 100644 --- a/tiledb/sm/query/ast/query_ast.h +++ b/tiledb/sm/query/ast/query_ast.h @@ -91,11 +91,19 @@ class ASTNode { * @brief Gets the set of field names from all the value nodes in the ASTNode. * * @param field_name_set The set variable the function populates. - * @return std::unordered_set& Set of the field names in the - * node. */ virtual void get_field_names( std::unordered_set& field_name_set) const = 0; + + /** + * @brief Gets the set of field names from all the value nodes that reference + * an enumerated field. + * + * @param field_name_set The set variable the function populates. + */ + virtual void get_enumeration_field_names( + std::unordered_set& field_name_set) const = 0; + /** * @brief Returns true if the AST is previously supported by previous versions * of TileDB. This means that the AST should only have AND combination ops, @@ -106,6 +114,15 @@ class ASTNode { */ virtual bool is_backwards_compatible() const = 0; + /** + * @brief Update an node value condition values that refer to enumerated + * attributes. + * + * @param array_schema The array schema with all relevant enumerations loaded. + */ + virtual void rewrite_enumeration_conditions( + const ArraySchema& array_schema) = 0; + /** * @brief Checks whether the node is valid based on the array schema of the * array that the query condition that contains this AST node will execute a @@ -176,6 +193,26 @@ class ASTNode { */ virtual const QueryConditionCombinationOp& get_combination_op() const = 0; + /** + * @brief Return whether this node's condition should be applied against + * the attributes enumerated values or the underlying index data if + * applicable for a given attribute. + * + * @return bool If true, apply this condition against the enumerated values + */ + virtual bool use_enumeration() const = 0; + + /** + * @brief By default, a query condition is applied against an enumeration's + * values. This can be disabled to apply a given condition against the + * underlying integer data stored for the attribute by passing `false` to + * this method. + * + * @param use_enumeration A bool indicating whether this condition should be + * applied against the enumerations values or not. + */ + virtual void set_use_enumeration(bool use_enumeration) = 0; + /** * @brief Default virtual destructor. */ @@ -209,7 +246,8 @@ class ASTNodeVal : public ASTNode { (void*)"" : condition_value_data_.data()), condition_value_data_.size()) - , op_(op) { + , op_(op) + , use_enumeration_(true) { if (condition_value_view_.size() != 0) { memcpy( condition_value_data_.data(), condition_value, condition_value_size); @@ -228,7 +266,8 @@ class ASTNodeVal : public ASTNode { (void*)"" : condition_value_data_.data()), condition_value_data_.size()) - , op_(rhs.op_) { + , op_(rhs.op_) + , use_enumeration_(rhs.use_enumeration_) { } /** @@ -243,7 +282,8 @@ class ASTNodeVal : public ASTNode { (void*)"" : condition_value_data_.data()), condition_value_data_.size()) - , op_(negate_query_condition_op(rhs.op_)) { + , op_(negate_query_condition_op(rhs.op_)) + , use_enumeration_(rhs.use_enumeration_) { } /** @@ -284,12 +324,19 @@ class ASTNodeVal : public ASTNode { * @brief Gets the set of field names from all the value nodes in the ASTNode. * * @param field_name_set The set variable the function populates. - * @return std::unordered_set& Set of the field names in the - * node. */ void get_field_names( std::unordered_set& field_name_set) const override; + /** + * @brief Gets the set of field names from all the value nodes that reference + * an enumerated field. + * + * @param field_name_set The set variable the function populates. + */ + void get_enumeration_field_names( + std::unordered_set& field_name_set) const override; + /** * @brief Returns true if the AST is previously supported by previous versions * of TileDB. This means that the AST should only have AND combination ops, @@ -300,6 +347,14 @@ class ASTNodeVal : public ASTNode { */ bool is_backwards_compatible() const override; + /** + * @brief Update an node value condition values that refer to enumerated + * attributes. + * + * @param array_schema The array schema with all relevant enumerations loaded. + */ + void rewrite_enumeration_conditions(const ArraySchema& array_schema) override; + /** * @brief Checks whether the node is valid based on the array schema of the * array that the query condition that contains this AST node will execute a @@ -370,6 +425,26 @@ class ASTNodeVal : public ASTNode { */ const QueryConditionCombinationOp& get_combination_op() const override; + /** + * @brief Return whether this node's condition should be applied against + * the attributes enumerated values or the underlying index data if + * applicable for a given attribute. + * + * @return bool If true, apply this condition against the enumerated values + */ + bool use_enumeration() const override; + + /** + * @brief By default, a query condition is applied against an enumeration's + * values. This can be disabled to apply a given condition against the + * underlying integer data stored for the attribute by passing `false` to + * this method. + * + * @param use_enumeration A bool indicating whether this condition should be + * applied against the enumerations values or not. + */ + void set_use_enumeration(bool use_enumeration) override; + private: /** The attribute name. */ std::string field_name_; @@ -382,6 +457,9 @@ class ASTNodeVal : public ASTNode { /** The comparison operator. */ QueryConditionOp op_; + + /** Whether this condiiton applies to enumerated values if applicable */ + bool use_enumeration_; }; /** @@ -461,12 +539,19 @@ class ASTNodeExpr : public ASTNode { * @brief Gets the set of field names from all the value nodes in the ASTNode. * * @param field_name_set The set variable the function populates. - * @return std::unordered_set& Set of the field names in the - * node. */ void get_field_names( std::unordered_set& field_name_set) const override; + /** + * @brief Gets the set of field names from all the value nodes that reference + * an enumerated field. + * + * @param field_name_set The set variable the function populates. + */ + void get_enumeration_field_names( + std::unordered_set& field_name_set) const override; + /** * @brief Returns true if the AST is previously supported by previous versions * of TileDB. This means that the AST should only have AND combination ops, @@ -477,6 +562,14 @@ class ASTNodeExpr : public ASTNode { */ bool is_backwards_compatible() const override; + /** + * @brief Update an node value condition values that refer to enumerated + * attributes. + * + * @param array_schema The array schema with all relevant enumerations loaded. + */ + void rewrite_enumeration_conditions(const ArraySchema& array_schema) override; + /** * @brief Checks whether the node is valid based on the array schema of the * array that the query condition that contains this AST node will execute a @@ -547,6 +640,31 @@ class ASTNodeExpr : public ASTNode { */ const QueryConditionCombinationOp& get_combination_op() const override; + /** + * @brief Return whether this node's condition should be applied against + * the attributes enumerated values or the underlying index data if + * applicable for a given attribute. + * + * This method always throws when called on an expression node. + * + * @return bool If true, apply this condition against the enumerated values + */ + bool use_enumeration() const override; + + /** + * @brief By default, a query condition is applied against an enumeration's + * values. This can be disabled to apply a given condition against the + * underlying integer data stored for the attribute by passing `false` to + * this method. + * + * When called on an expression node this value is recursively applied + * against all value nodes in the AST. + * + * @param use_enumeration A bool indicating whether this condition should be + * applied against the enumerations values or not. + */ + void set_use_enumeration(bool use_enumeration) override; + private: /** The node list **/ std::vector> nodes_; @@ -558,4 +676,4 @@ class ASTNodeExpr : public ASTNode { } // namespace sm } // namespace tiledb -#endif // TILEDB_QUERY_AST_H \ No newline at end of file +#endif // TILEDB_QUERY_AST_H diff --git a/tiledb/sm/query/query.cc b/tiledb/sm/query/query.cc index 97b5edefb552..dd65a29f230d 100644 --- a/tiledb/sm/query/query.cc +++ b/tiledb/sm/query/query.cc @@ -846,6 +846,32 @@ Status Query::process() { } } + // PJD: This is the least worst place I can find to load the attribute + // enumeration values data. AFAICT this should run server side in a REST + // query as well as run before the query condition is checked for correctness + // or any other use of enumeration values is required. + if (condition_.has_value()) { + auto& names = condition_->enumeration_field_names(); + std::vector to_load; + to_load.reserve(names.size()); + for (auto name : names) { + if (array_schema_->has_enumeration(name)) { + to_load.push_back(name); + } + } + + throw_if_not_ok(parallel_for( + storage_manager_->compute_tp(), + 0, + to_load.size(), + [&](const uint64_t i) { + array_->get_enumeration(to_load[i]); + return Status::Ok(); + })); + + condition_->rewrite_enumeration_conditions(array_schema()); + } + // Update query status. status_ = QueryStatus::INPROGRESS; diff --git a/tiledb/sm/query/query_condition.cc b/tiledb/sm/query/query_condition.cc index a8c78c8ff409..b93f78e21af3 100644 --- a/tiledb/sm/query/query_condition.cc +++ b/tiledb/sm/query/query_condition.cc @@ -123,6 +123,15 @@ Status QueryCondition::init( return Status::Ok(); } +void QueryCondition::rewrite_enumeration_conditions( + const ArraySchema& array_schema) { + if (!tree_) { + return; + } + + tree_->rewrite_enumeration_conditions(array_schema); +} + Status QueryCondition::check(const ArraySchema& array_schema) const { if (!tree_) { return Status::Ok(); @@ -144,6 +153,7 @@ Status QueryCondition::combine( } combined_cond->field_names_.clear(); + combined_cond->enumeration_field_names_.clear(); combined_cond->tree_ = this->tree_->combine(rhs.tree_, combination_op); return Status::Ok(); } @@ -158,6 +168,7 @@ Status QueryCondition::negate( } combined_cond->field_names_.clear(); + combined_cond->enumeration_field_names_.clear(); combined_cond->tree_ = this->tree_->get_negated_tree(); return Status::Ok(); } @@ -174,6 +185,15 @@ std::unordered_set& QueryCondition::field_names() const { return field_names_; } +std::unordered_set& QueryCondition::enumeration_field_names() + const { + if (enumeration_field_names_.empty() && tree_ != nullptr) { + tree_->get_enumeration_field_names(enumeration_field_names_); + } + + return enumeration_field_names_; +} + uint64_t QueryCondition::condition_timestamp() const { if (condition_marker_.empty()) { return 0; @@ -188,6 +208,10 @@ uint64_t QueryCondition::condition_timestamp() const { return timestamps.first; } +void QueryCondition::set_use_enumeration(bool use_enumeration) { + tree_->set_use_enumeration(use_enumeration); +} + /** Full template specialization for `char*` and `QueryConditionOp::LT`. */ template <> struct QueryCondition::BinaryCmpNullChecks { diff --git a/tiledb/sm/query/query_condition.h b/tiledb/sm/query/query_condition.h index 717f047ceab0..4349c19c765c 100644 --- a/tiledb/sm/query/query_condition.h +++ b/tiledb/sm/query/query_condition.h @@ -112,6 +112,15 @@ class QueryCondition { uint64_t condition_value_size, const QueryConditionOp& op); + /** + * Translate any query conditions against enumerated attributes to the + * underlying attribute type. + * + * @param array_schema The current array schema with all required enumerations + * loaded. + */ + void rewrite_enumeration_conditions(const ArraySchema& array_schema); + /** * Verifies that the current state contains supported comparison * operations. Currently, we support the following: @@ -160,6 +169,12 @@ class QueryCondition { */ std::unordered_set& field_names() const; + /** + * Returns a set of all unique filed names that reference an enumerated + * attribute condition in the AST representing the query condition. + */ + std::unordered_set& enumeration_field_names() const; + /** * Returns the timestamp for this condition. */ @@ -245,6 +260,17 @@ class QueryCondition { */ uint64_t condition_index() const; + /** + * By default, a query condition is applied against the enumerated values + * of an attribute. Setting use_enumeration to false prevents the translation + * and applies this query condition directly against the underlying integral + * attribute data. + * + * @param use_enumeration A bool indicating whether to use the enumeration + * values + */ + void set_use_enumeration(bool use_enumeration); + private: /* ********************************* */ /* PRIVATE DATATYPES */ @@ -294,6 +320,9 @@ class QueryCondition { /** Caches all field names in the value nodes of the AST. */ mutable std::unordered_set field_names_; + /** Caches all filed names that references enumerations in the AST */ + mutable std::unordered_set enumeration_field_names_; + /* ********************************* */ /* PRIVATE METHODS */ /* ********************************* */ diff --git a/tiledb/sm/storage_manager/storage_manager.cc b/tiledb/sm/storage_manager/storage_manager.cc index 8759e89d67ef..6469f6448bc7 100644 --- a/tiledb/sm/storage_manager/storage_manager.cc +++ b/tiledb/sm/storage_manager/storage_manager.cc @@ -46,6 +46,7 @@ #include "tiledb/sm/array/array_directory.h" #include "tiledb/sm/array_schema/array_schema.h" #include "tiledb/sm/array_schema/array_schema_evolution.h" +#include "tiledb/sm/array_schema/enumeration.h" #include "tiledb/sm/consolidator/consolidator.h" #include "tiledb/sm/consolidator/fragment_consolidator.h" #include "tiledb/sm/enums/array_type.h" @@ -612,6 +613,11 @@ Status StorageManagerCanonical::array_create( array_uri.join_path(constants::array_schema_dir_name); RETURN_NOT_OK(vfs()->create_dir(array_schema_dir_uri)); + // Create the enumerations directory inside the array schema directory + URI array_enumerations_uri = + array_schema_dir_uri.join_path(constants::array_enumerations_dir_name); + RETURN_NOT_OK(vfs()->create_dir(array_enumerations_uri)); + // Create commit directory URI array_commit_uri = array_uri.join_path(constants::array_commits_dir_name); RETURN_NOT_OK(vfs()->create_dir(array_commit_uri)); @@ -718,11 +724,9 @@ Status StorageManager::array_evolve_schema( auto&& array_schema = array_dir.load_array_schema_latest(encryption_key); // Evolve schema - auto&& [st1, array_schema_evolved] = - schema_evolution->evolve_schema(array_schema); - RETURN_NOT_OK(st1); + auto array_schema_evolved = schema_evolution->evolve_schema(array_schema); - Status st = store_array_schema(array_schema_evolved.value(), encryption_key); + Status st = store_array_schema(array_schema_evolved, encryption_key); if (!st.ok()) { logger_->status_no_return_value(st); return logger_->status(Status_StorageManagerError( @@ -1674,11 +1678,38 @@ Status StorageManagerCanonical::store_array_schema( URI array_schema_dir_uri = array_schema->array_uri().join_path(constants::array_schema_dir_name); RETURN_NOT_OK(vfs()->is_dir(array_schema_dir_uri, &schema_dir_exists)); + if (!schema_dir_exists) RETURN_NOT_OK(vfs()->create_dir(array_schema_dir_uri)); RETURN_NOT_OK(store_data_to_generic_tile(tile, schema_uri, encryption_key)); + bool enumerations_dir_exists = false; + URI array_enumerations_dir_uri = + array_schema_dir_uri.join_path(constants::array_enumerations_dir_name); + RETURN_NOT_OK( + vfs()->is_dir(array_enumerations_dir_uri, &enumerations_dir_exists)); + + if (!enumerations_dir_exists) { + RETURN_NOT_OK(vfs()->create_dir(array_enumerations_dir_uri)); + } + + for (auto enumeration : array_schema->enumerations()) { + SizeComputationSerializer enumeration_size_serializer; + enumeration->serialize( + enumeration_size_serializer, array_schema->write_version()); + + WriterTile tile{ + WriterTile::from_generic(enumeration_size_serializer.size())}; + Serializer serializer(tile.data(), tile.size()); + enumeration->serialize(serializer, array_schema->write_version()); + + auto abs_enumeration_uri = + array_enumerations_dir_uri.join_path(enumeration->name()); + RETURN_NOT_OK( + store_data_to_generic_tile(tile, abs_enumeration_uri, encryption_key)); + } + return Status::Ok(); } diff --git a/tiledb/sm/tile/CMakeLists.txt b/tiledb/sm/tile/CMakeLists.txt index 2a7180dd2557..280e545abe40 100644 --- a/tiledb/sm/tile/CMakeLists.txt +++ b/tiledb/sm/tile/CMakeLists.txt @@ -44,7 +44,6 @@ commence(object_library generic_tile_io) baseline buffer constants - context_resources crypto filter_pipeline tile @@ -52,4 +51,10 @@ commence(object_library generic_tile_io) ) conclude(object_library) +# This is linked outside the object_library scope because ContextResources +# is recompiled as part of the capi_context_stub. Including context_resources +# here like this prevents many headaches revolving around duplicate symbols +# when linking executables. +target_link_libraries(compile_generic_tile_io PRIVATE context_resources) + add_test_subdirectory() diff --git a/tiledb/storage_format/uri/CMakeLists.txt b/tiledb/storage_format/uri/CMakeLists.txt index 6923c26548d7..4dbd4dd3f319 100644 --- a/tiledb/storage_format/uri/CMakeLists.txt +++ b/tiledb/storage_format/uri/CMakeLists.txt @@ -30,6 +30,6 @@ include(object_library) # `uri_format` object library # commence(object_library uri_format) - this_target_sources(parse_uri.cc) + this_target_sources(parse_uri.cc generate_uri.cc) this_target_object_libraries(baseline time uuid vfs) conclude(object_library)