From 2a0c51489808c6313277dcf1b544eb74e4e2951b Mon Sep 17 00:00:00 2001 From: Stavros Papadopoulos Date: Sun, 15 Mar 2020 16:10:19 -0400 Subject: [PATCH] The user can now add separate filter lists per dimension. Towards addressing #93. --- HISTORY.md | 4 + test/src/unit-capi-array_schema.cc | 241 ++++++++++++++++-- test/src/unit-capi-dense_array.cc | 2 +- test/src/unit-capi-sparse_array.cc | 17 ++ test/src/unit-cppapi-schema.cc | 10 +- test/src/unit-filter-pipeline.cc | 21 ++ tiledb/sm/array_schema/array_schema.cc | 85 +++--- tiledb/sm/array_schema/array_schema.h | 24 +- tiledb/sm/array_schema/attribute.cc | 50 ++-- tiledb/sm/array_schema/attribute.h | 16 +- tiledb/sm/array_schema/dimension.cc | 91 ++++++- tiledb/sm/array_schema/dimension.h | 24 +- tiledb/sm/array_schema/domain.cc | 17 +- tiledb/sm/array_schema/domain.h | 6 +- tiledb/sm/c_api/tiledb.cc | 72 +++++- tiledb/sm/c_api/tiledb.h | 88 +++++++ tiledb/sm/cpp_api/dimension.h | 44 ++++ .../sm/filter/bit_width_reduction_filter.cc | 6 + tiledb/sm/filter/bit_width_reduction_filter.h | 3 + tiledb/sm/filter/bitshuffle_filter.cc | 7 + tiledb/sm/filter/bitshuffle_filter.h | 3 + tiledb/sm/filter/byteshuffle_filter.cc | 7 + tiledb/sm/filter/byteshuffle_filter.h | 3 + tiledb/sm/filter/checksum_md5_filter.cc | 7 + tiledb/sm/filter/checksum_md5_filter.h | 3 + tiledb/sm/filter/checksum_sha256_filter.cc | 7 + tiledb/sm/filter/checksum_sha256_filter.h | 3 + tiledb/sm/filter/compression_filter.cc | 34 +++ tiledb/sm/filter/compression_filter.h | 3 + .../sm/filter/encryption_aes256gcm_filter.cc | 7 + .../sm/filter/encryption_aes256gcm_filter.h | 3 + tiledb/sm/filter/filter.h | 3 + tiledb/sm/filter/filter_pipeline.cc | 14 + tiledb/sm/filter/filter_pipeline.h | 9 + tiledb/sm/filter/noop_filter.cc | 7 + tiledb/sm/filter/noop_filter.h | 3 + tiledb/sm/filter/positive_delta_filter.cc | 6 + tiledb/sm/filter/positive_delta_filter.h | 3 + tiledb/sm/query/reader.cc | 4 +- tiledb/sm/query/writer.cc | 4 +- tiledb/sm/serialization/array_schema.cc | 24 +- tiledb/sm/serialization/tiledb-rest.capnp.c++ | 61 +++-- tiledb/sm/serialization/tiledb-rest.capnp.h | 79 +++++- tiledb/sm/storage_manager/storage_manager.cc | 1 - 44 files changed, 926 insertions(+), 200 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 81879df75515..7b4f834cdd55 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -15,6 +15,7 @@ ## New features * The user can now set separate coordinate buffers to the query. Also any subset of the dimensions is supported. +* The user can set separate filter lists per dimension, as well as the number of values per coordinate. ## Improvements @@ -34,6 +35,9 @@ * Added C API function `tiledb_array_has_metadata_key` and C++ API function `Array::has_metadata_key` [#1439](https://github.com/TileDB-Inc/TileDB/pull/1439) * Added C API functions `tiledb_array_schema_{set,get}_allows_dups` and C++ API functions `Array::set_allows_dups` and `Array::allows_dups` +* Added C API functions `tiledb_dimension_{set,get}_filter_list` and `tiledb_dimension_{set,get}_cell_val_num` +* Added C++ API functions `Dimension::set_cell_val_num` and `Dimension::cell_val_num`. +* Added C++ API functions `Dimension::set_filter_list` and `Dimension::filter_list`. ## API removals diff --git a/test/src/unit-capi-array_schema.cc b/test/src/unit-capi-array_schema.cc index 2c9874db6300..958c25c10bb0 100644 --- a/test/src/unit-capi-array_schema.cc +++ b/test/src/unit-capi-array_schema.cc @@ -106,7 +106,7 @@ struct ArraySchemaFx { * If true, array schema is serialized before submission, to test the * serialization paths. */ - bool serialize_array_schema = false; + bool serialize_array_schema_ = false; // TileDB context and vfs tiledb_ctx_t* ctx_; @@ -289,7 +289,7 @@ int ArraySchemaFx::array_create_wrapper( return tiledb_array_create(ctx_, path.c_str(), array_schema); #endif - if (!serialize_array_schema) { + if (!serialize_array_schema_) { return tiledb_array_create(ctx_, path.c_str(), array_schema); } @@ -348,7 +348,7 @@ int ArraySchemaFx::array_schema_load_wrapper( return tiledb_array_schema_load(ctx_, path.c_str(), array_schema); #endif - if (!serialize_array_schema) { + if (!serialize_array_schema_) { return tiledb_array_schema_load(ctx_, path.c_str(), array_schema); } @@ -408,7 +408,7 @@ int ArraySchemaFx::array_get_schema_wrapper( return tiledb_array_get_schema(ctx_, array, array_schema); #endif - if (!serialize_array_schema) { + if (!serialize_array_schema_) { return tiledb_array_get_schema(ctx_, array, array_schema); } @@ -467,7 +467,7 @@ int ArraySchemaFx::array_schema_get_domain_wrapper( return tiledb_array_schema_get_domain(ctx_, array_schema, domain); #endif - if (!serialize_array_schema) { + if (!serialize_array_schema_) { return tiledb_array_schema_get_domain(ctx_, array_schema, domain); } @@ -565,6 +565,32 @@ void ArraySchemaFx::create_array(const std::string& path) { &d4); REQUIRE(rc == TILEDB_ERR); + // Set up filters + tiledb_filter_t* filter; + rc = tiledb_filter_alloc(ctx_, TILEDB_FILTER_BZIP2, &filter); + REQUIRE(rc == TILEDB_OK); + int level = 5; + rc = tiledb_filter_set_option(ctx_, filter, TILEDB_COMPRESSION_LEVEL, &level); + REQUIRE(rc == TILEDB_OK); + tiledb_filter_list_t* filter_list; + rc = tiledb_filter_list_alloc(ctx_, &filter_list); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_filter_list_add_filter(ctx_, filter_list, filter); + REQUIRE(rc == TILEDB_OK); + tiledb_filter_free(&filter); + rc = tiledb_filter_alloc(ctx_, TILEDB_FILTER_BIT_WIDTH_REDUCTION, &filter); + REQUIRE(rc == TILEDB_OK); + int window = 1000; + rc = tiledb_filter_set_option( + ctx_, filter, TILEDB_BIT_WIDTH_MAX_WINDOW, &window); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_filter_list_add_filter(ctx_, filter_list, filter); + REQUIRE(rc == TILEDB_OK); + + // Add filters to dimension + rc = tiledb_dimension_set_filter_list(ctx_, d2, filter_list); + REQUIRE(rc == TILEDB_OK); + // Set domain tiledb_domain_t* domain; rc = tiledb_domain_alloc(ctx_, &domain); @@ -600,6 +626,8 @@ void ArraySchemaFx::create_array(const std::string& path) { tiledb_attribute_t* attr; rc = tiledb_attribute_alloc(ctx_, ATTR_NAME, ATTR_TYPE, &attr); REQUIRE(rc == TILEDB_OK); + rc = tiledb_attribute_set_filter_list(ctx_, attr, filter_list); + REQUIRE(rc == TILEDB_OK); rc = tiledb_array_schema_add_attribute(ctx_, array_schema, attr); REQUIRE(rc == TILEDB_OK); @@ -616,6 +644,8 @@ void ArraySchemaFx::create_array(const std::string& path) { REQUIRE(rc == TILEDB_ERR); // Clean up + tiledb_filter_free(&filter); + tiledb_filter_list_free(&filter_list); tiledb_array_schema_free(&array_schema); tiledb_attribute_free(&attr); tiledb_dimension_free(&d1); @@ -713,7 +743,7 @@ void ArraySchemaFx::load_and_check_array_schema(const std::string& path) { rc = tiledb_attribute_get_filter_list(ctx_, attr, &attr_filters); REQUIRE(rc == TILEDB_OK); rc = tiledb_filter_list_get_nfilters(ctx_, attr_filters, &nfilters); - CHECK(nfilters == 0); + CHECK(nfilters == 2); tiledb_filter_list_free(&attr_filters); unsigned int cell_val_num; @@ -805,21 +835,23 @@ void ArraySchemaFx::load_and_check_array_schema(const std::string& path) { "- Cell order: " + CELL_ORDER_STR + "\n" + "- Tile order: " + TILE_ORDER_STR + "\n" + "- Capacity: " + CAPACITY_STR + "\n" - "- Coordinates compressor: ZSTD\n" + - "- Coordinates compression level: -1\n\n" + - "=== Domain ===\n" - "- Dimensions type: " + - DIM_TYPE_STR + "\n\n" + "### Dimension ###\n" + "- Name: " + DIM1_NAME + - "\n" + "- Domain: " + DIM1_DOMAIN_STR + "\n" + - "- Tile extent: " + DIM1_TILE_EXTENT_STR + "\n" + "\n" + + "- Allows duplicates: " + + "false\n" + "- Coordinates filters: 1\n" + + " > ZSTD: COMPRESSION_LEVEL=-1\n" + "- Offsets filters: 1\n" + + " > ZSTD: COMPRESSION_LEVEL=-1\n\n" + "### Dimension ###\n" + + "- Name: " + DIM1_NAME + "\n" + "- Domain: " + DIM1_DOMAIN_STR + "\n" + + "- Tile extent: " + DIM1_TILE_EXTENT_STR + "\n" + "- Filters: 0\n\n" + "### Dimension ###\n" + "- Name: " + DIM2_NAME + "\n" + "- Domain: " + DIM2_DOMAIN_STR + "\n" + - "- Tile extent: " + DIM2_TILE_EXTENT_STR + "\n" + "\n" + + "- Tile extent: " + DIM2_TILE_EXTENT_STR + "\n" + "- Filters: 2\n" + + " > BZIP2: COMPRESSION_LEVEL=5\n" + + " > BitWidthReduction: BIT_WIDTH_MAX_WINDOW=1000\n\n" + "### Attribute ###\n" + "- Name: " + ATTR_NAME + "\n" + "- Type: " + ATTR_TYPE_STR + "\n" + - "- Compressor: " + ATTR_COMPRESSOR_STR + "\n" + - "- Compression level: " + ATTR_COMPRESSION_LEVEL_STR + "\n" + - "- Cell val num: " + CELL_VAL_NUM_STR + "\n"; + "- Cell val num: " + CELL_VAL_NUM_STR + "\n" + "- Filters: 2\n" + + " > BZIP2: COMPRESSION_LEVEL=5\n" + + " > BitWidthReduction: BIT_WIDTH_MAX_WINDOW=1000\n"; FILE* gold_fout = fopen("gold_fout.txt", "w"); const char* dump = dump_str.c_str(); fwrite(dump, sizeof(char), strlen(dump), gold_fout); @@ -852,12 +884,12 @@ std::string ArraySchemaFx::random_name(const std::string& prefix) { TEST_CASE_METHOD( ArraySchemaFx, "C API: Test array schema creation and retrieval", - "[capi], [array-schema]") { + "[capi][array-schema]") { SECTION("- No serialization") { - serialize_array_schema = false; + serialize_array_schema_ = false; } SECTION("- Serialization") { - serialize_array_schema = true; + serialize_array_schema_ = true; } std::string array_name; @@ -1069,10 +1101,10 @@ TEST_CASE_METHOD( "C API: Test array schema offsets/coords filter lists", "[capi], [array-schema], [filter]") { SECTION("- No serialization") { - serialize_array_schema = false; + serialize_array_schema_ = false; } SECTION("- Serialization") { - serialize_array_schema = true; + serialize_array_schema_ = true; } // Create array schema @@ -1210,10 +1242,10 @@ TEST_CASE_METHOD( "C API: Test array schema load error condition", "[capi], [array-schema]") { SECTION("- No serialization") { - serialize_array_schema = false; + serialize_array_schema_ = false; } SECTION("- Serialization") { - serialize_array_schema = true; + serialize_array_schema_ = true; } // Create array schema @@ -1300,10 +1332,10 @@ TEST_CASE_METHOD( "C API: Test array schema datetimes", "[capi][array-schema][datetime]") { SECTION("- No serialization") { - serialize_array_schema = false; + serialize_array_schema_ = false; } SECTION("- Serialization") { - serialize_array_schema = true; + serialize_array_schema_ = true; } // Create array schema @@ -1359,10 +1391,10 @@ TEST_CASE_METHOD( "C API: Test array schema setter/getter for allows_dups", "[capi][array-schema][allows-dups]") { SECTION("- No serialization") { - serialize_array_schema = false; + serialize_array_schema_ = false; } SECTION("- Serialization") { - serialize_array_schema = true; + serialize_array_schema_ = true; } // --- Test dense (should error out on allowing duplicates) --- @@ -1435,3 +1467,156 @@ TEST_CASE_METHOD( delete_array(array_name); remove_temp_dir(FILE_URI_PREFIX + FILE_TEMP_DIR); } + +TEST_CASE_METHOD( + ArraySchemaFx, + "C API: Test array schema setter/getter for dimension filters and cell val " + "num", + "[capi][array-schema][dimension]") { + SECTION("- No serialization") { + serialize_array_schema_ = false; + } + SECTION("- Serialization") { + serialize_array_schema_ = true; + } + + // Allocate array schema + tiledb_array_schema_t* array_schema; + int rc = tiledb_array_schema_alloc(ctx_, TILEDB_SPARSE, &array_schema); + REQUIRE(rc == TILEDB_OK); + + // Set up filter list + tiledb_filter_t* filter; + rc = tiledb_filter_alloc(ctx_, TILEDB_FILTER_BZIP2, &filter); + REQUIRE(rc == TILEDB_OK); + int level = 5; + rc = tiledb_filter_set_option(ctx_, filter, TILEDB_COMPRESSION_LEVEL, &level); + REQUIRE(rc == TILEDB_OK); + tiledb_filter_list_t* filter_list; + rc = tiledb_filter_list_alloc(ctx_, &filter_list); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_filter_list_add_filter(ctx_, filter_list, filter); + REQUIRE(rc == TILEDB_OK); + + // Create dimension + tiledb_dimension_t* d; + rc = tiledb_dimension_alloc( + ctx_, "d", TILEDB_INT64, &DIM_DOMAIN[0], &TILE_EXTENTS[0], &d); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_dimension_set_cell_val_num(ctx_, d, 3); + CHECK(rc == TILEDB_ERR); + rc = tiledb_dimension_set_cell_val_num(ctx_, d, 1); + CHECK(rc == TILEDB_OK); + rc = tiledb_dimension_set_filter_list(ctx_, d, filter_list); + CHECK(rc == TILEDB_OK); + + // Set domain + tiledb_domain_t* domain; + rc = tiledb_domain_alloc(ctx_, &domain); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_domain_add_dimension(ctx_, domain, d); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_array_schema_set_domain(ctx_, array_schema, domain); + REQUIRE(rc == TILEDB_OK); + + // Set attribute + tiledb_attribute_t* a; + rc = tiledb_attribute_alloc(ctx_, "a", ATTR_TYPE, &a); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_array_schema_add_attribute(ctx_, array_schema, a); + REQUIRE(rc == TILEDB_OK); + + // Create array + std::string array_name = FILE_URI_PREFIX + FILE_TEMP_DIR + "dimension"; + create_temp_dir(FILE_URI_PREFIX + FILE_TEMP_DIR); + rc = array_create_wrapper(array_name, array_schema); + REQUIRE(rc == TILEDB_OK); + + // Clean up + tiledb_filter_free(&filter); + tiledb_filter_list_free(&filter_list); + tiledb_attribute_free(&a); + tiledb_dimension_free(&d); + tiledb_domain_free(&domain); + tiledb_array_schema_free(&array_schema); + + // Load array schema + rc = array_schema_load_wrapper(array_name.c_str(), &array_schema); + REQUIRE(rc == TILEDB_OK); + + // Get dimension + rc = tiledb_array_schema_get_domain(ctx_, array_schema, &domain); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_domain_get_dimension_from_index(ctx_, domain, 0, &d); + REQUIRE(rc == TILEDB_OK); + + // Check cell val num + unsigned num; + rc = tiledb_dimension_get_cell_val_num(ctx_, d, &num); + REQUIRE(rc == TILEDB_OK); + CHECK(num == 1); + + // Check filter list + rc = tiledb_dimension_get_filter_list(ctx_, d, &filter_list); + REQUIRE(rc == TILEDB_OK); + uint32_t nfilters; + rc = tiledb_filter_list_get_nfilters(ctx_, filter_list, &nfilters); + CHECK(nfilters == 1); + rc = tiledb_filter_list_get_filter_from_index(ctx_, filter_list, 0, &filter); + REQUIRE(rc == TILEDB_OK); + tiledb_filter_type_t type; + rc = tiledb_filter_get_type(ctx_, filter, &type); + REQUIRE(rc == TILEDB_OK); + REQUIRE(type == TILEDB_FILTER_BZIP2); + int read_level; + rc = tiledb_filter_get_option( + ctx_, filter, TILEDB_COMPRESSION_LEVEL, &read_level); + REQUIRE(rc == TILEDB_OK); + REQUIRE(read_level == level); + + // Clean up + tiledb_filter_free(&filter); + tiledb_filter_list_free(&filter_list); + tiledb_dimension_free(&d); + tiledb_domain_free(&domain); + tiledb_array_schema_free(&array_schema); + delete_array(array_name); + remove_temp_dir(FILE_URI_PREFIX + FILE_TEMP_DIR); +} + +TEST_CASE_METHOD( + ArraySchemaFx, + "C API: Test array schema, set filter errors", + "[capi][array-schema][filter-error]") { + // Set up filter list + tiledb_filter_t* filter; + int rc = tiledb_filter_alloc(ctx_, TILEDB_FILTER_DOUBLE_DELTA, &filter); + REQUIRE(rc == TILEDB_OK); + tiledb_filter_list_t* filter_list; + rc = tiledb_filter_list_alloc(ctx_, &filter_list); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_filter_list_add_filter(ctx_, filter_list, filter); + REQUIRE(rc == TILEDB_OK); + + // Create real dimension and test double delta + tiledb_dimension_t* d; + float domain[] = {1.0f, 2.0f}; + float extent = .5f; + rc = tiledb_dimension_alloc(ctx_, "d", TILEDB_FLOAT32, domain, &extent, &d); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_dimension_set_filter_list(ctx_, d, filter_list); + CHECK(rc == TILEDB_ERR); + + // Create real attribute and test double delta + tiledb_attribute_t* a; + rc = tiledb_attribute_alloc(ctx_, "a", TILEDB_FLOAT64, &a); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_attribute_set_filter_list(ctx_, a, filter_list); + CHECK(rc == TILEDB_ERR); + + // Clean up + tiledb_filter_free(&filter); + tiledb_filter_list_free(&filter_list); + tiledb_attribute_free(&a); + tiledb_dimension_free(&d); +} \ No newline at end of file diff --git a/test/src/unit-capi-dense_array.cc b/test/src/unit-capi-dense_array.cc index 3495921c95f4..64788818a0ac 100644 --- a/test/src/unit-capi-dense_array.cc +++ b/test/src/unit-capi-dense_array.cc @@ -3355,7 +3355,7 @@ TEST_CASE_METHOD( TEST_CASE_METHOD( DenseArrayFx, "C API: Test dense array, simultaneous writes", - "[capi], [dense], [dense-simultaneous-writes]") { + "[capi][dense][dense-simultaneous-writes]") { SECTION("- No serialization") { serialize_query_ = false; } diff --git a/test/src/unit-capi-sparse_array.cc b/test/src/unit-capi-sparse_array.cc index 8bc8f2ed312a..b45cea158a61 100644 --- a/test/src/unit-capi-sparse_array.cc +++ b/test/src/unit-capi-sparse_array.cc @@ -875,6 +875,21 @@ void SparseArrayFx::create_sparse_array( ctx_, "d2", TILEDB_UINT64, &dim_domain[2], &tile_extents[1], &d2); CHECK(rc == TILEDB_OK); + // Set up filter list + tiledb_filter_t* filter; + rc = tiledb_filter_alloc(ctx_, TILEDB_FILTER_BZIP2, &filter); + REQUIRE(rc == TILEDB_OK); + int level = 5; + rc = tiledb_filter_set_option(ctx_, filter, TILEDB_COMPRESSION_LEVEL, &level); + REQUIRE(rc == TILEDB_OK); + tiledb_filter_list_t* filter_list; + rc = tiledb_filter_list_alloc(ctx_, &filter_list); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_filter_list_add_filter(ctx_, filter_list, filter); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_dimension_set_filter_list(ctx_, d1, filter_list); + REQUIRE(rc == TILEDB_OK); + // Create domain tiledb_domain_t* domain; rc = tiledb_domain_alloc(ctx_, &domain); @@ -939,6 +954,8 @@ void SparseArrayFx::create_sparse_array( tiledb_attribute_free(&a3); tiledb_dimension_free(&d1); tiledb_dimension_free(&d2); + tiledb_filter_free(&filter); + tiledb_filter_list_free(&filter_list); tiledb_domain_free(&domain); tiledb_array_schema_free(&array_schema); } diff --git a/test/src/unit-cppapi-schema.cc b/test/src/unit-cppapi-schema.cc index d2ec83bb14c1..095743e8030f 100644 --- a/test/src/unit-cppapi-schema.cc +++ b/test/src/unit-cppapi-schema.cc @@ -37,9 +37,16 @@ TEST_CASE("C++ API: Schema", "[cppapi][schema]") { using namespace tiledb; Context ctx; + FilterList filters(ctx); + filters.add_filter({ctx, TILEDB_FILTER_LZ4}); + Domain dense_domain(ctx); auto id1 = Dimension::create(ctx, "d1", {{-100, 100}}, 10); auto id2 = Dimension::create(ctx, "d2", {{0, 100}}, 5); + CHECK_THROWS(id1.set_cell_val_num(4)); + CHECK_NOTHROW(id1.set_cell_val_num(1)); + CHECK_NOTHROW(id1.set_filter_list(filters)); + CHECK_NOTHROW(id1.filter_list()); dense_domain.add_dimension(id1).add_dimension(id2); Domain sparse_domain(ctx); @@ -51,8 +58,6 @@ TEST_CASE("C++ API: Schema", "[cppapi][schema]") { auto a2 = Attribute::create(ctx, "a2"); auto a3 = Attribute::create>(ctx, "a3"); auto a4 = Attribute::create>(ctx, "a4"); - FilterList filters(ctx); - filters.add_filter({ctx, TILEDB_FILTER_LZ4}); a1.set_filter_list(filters); SECTION("Dense Array Schema") { @@ -150,6 +155,7 @@ TEST_CASE("C++ API: Schema", "[cppapi][schema]") { CHECK(dims[0].domain().second == 100.0); CHECK_THROWS(dims[0].tile_extent()); CHECK(dims[0].tile_extent() == 10.0); + CHECK(dims[0].cell_val_num() == 1); CHECK(sparse_domain.type() == TILEDB_FLOAT64); } diff --git a/test/src/unit-filter-pipeline.cc b/test/src/unit-filter-pipeline.cc index 68eb0b143777..1c7fc5816dcf 100644 --- a/test/src/unit-filter-pipeline.cc +++ b/test/src/unit-filter-pipeline.cc @@ -69,6 +69,10 @@ class Add1InPlace : public Filter { : Filter(FilterType::FILTER_NONE) { } + void dump(FILE* out) const override { + (void)out; + } + Status run_forward( FilterBuffer* input_metadata, FilterBuffer* input, @@ -129,6 +133,10 @@ class Add1OutOfPlace : public Filter { : Filter(FilterType::FILTER_NONE) { } + void dump(FILE* out) const override { + (void)out; + } + Status run_forward( FilterBuffer* input_metadata, FilterBuffer* input, @@ -211,6 +219,10 @@ class AddNInPlace : public Filter { increment_ = 1; } + void dump(FILE* out) const override { + (void)out; + } + Status run_forward( FilterBuffer* input_metadata, FilterBuffer* input, @@ -282,6 +294,11 @@ class PseudoChecksumFilter : public Filter { PseudoChecksumFilter() : Filter(FilterType::FILTER_NONE) { } + + void dump(FILE* out) const override { + (void)out; + } + Status run_forward( FilterBuffer* input_metadata, FilterBuffer* input, @@ -362,6 +379,10 @@ class Add1IncludingMetadataFilter : public Filter { : Filter(FilterType::FILTER_NONE) { } + void dump(FILE* out) const override { + (void)out; + } + Status run_forward( FilterBuffer* input_metadata, FilterBuffer* input, diff --git a/tiledb/sm/array_schema/array_schema.cc b/tiledb/sm/array_schema/array_schema.cc index 72eda7e1d9c7..f20ce07580c1 100644 --- a/tiledb/sm/array_schema/array_schema.cc +++ b/tiledb/sm/array_schema/array_schema.cc @@ -39,6 +39,7 @@ #include "tiledb/sm/enums/array_type.h" #include "tiledb/sm/enums/compressor.h" #include "tiledb/sm/enums/datatype.h" +#include "tiledb/sm/enums/filter_type.h" #include "tiledb/sm/enums/layout.h" #include "tiledb/sm/filter/compression_filter.h" #include "tiledb/sm/misc/logger.h" @@ -190,8 +191,8 @@ unsigned int ArraySchema::cell_val_num(const std::string& name) const { return dim_it->second->cell_val_num(); } -const FilterPipeline* ArraySchema::cell_var_offsets_filters() const { - return &cell_var_offsets_filters_; +const FilterPipeline& ArraySchema::cell_var_offsets_filters() const { + return cell_var_offsets_filters_; } Status ArraySchema::check() const { @@ -216,10 +217,7 @@ Status ArraySchema::check() const { } } - if (!check_double_delta_compressor()) - return LOG_STATUS(Status::ArraySchemaError( - "Array schema check failed; Double delta compression can be used " - "only with integer values")); + RETURN_NOT_OK(check_double_delta_compressor()); if (!check_attribute_dimension_names()) return LOG_STATUS( @@ -243,7 +241,7 @@ Status ArraySchema::check_attributes( return Status::Ok(); } -const FilterPipeline* ArraySchema::filters(const std::string& name) const { +const FilterPipeline& ArraySchema::filters(const std::string& name) const { if (name == constants::coords) return coords_filters(); @@ -255,14 +253,15 @@ const FilterPipeline* ArraySchema::filters(const std::string& name) const { // Dimension (if filters not set, return default coordinate filters) auto dim_it = dim_map_.find(name); assert(dim_it != dim_map_.end()); - auto ret = dim_it->second->filters(); - return (ret != nullptr) ? ret : coords_filters(); + const auto& ret = dim_it->second->filters(); + return !ret.empty() ? ret : coords_filters(); } -const FilterPipeline* ArraySchema::coords_filters() const { - return &coords_filters_; +const FilterPipeline& ArraySchema::coords_filters() const { + return coords_filters_; } +/* Compressor ArraySchema::coords_compression() const { auto compressor = coords_filters_.get_filter(); return (compressor == nullptr) ? Compressor::NO_COMPRESSION : @@ -273,6 +272,7 @@ int ArraySchema::coords_compression_level() const { auto compressor = coords_filters_.get_filter(); return (compressor == nullptr) ? -1 : compressor->compression_level(); } +*/ bool ArraySchema::dense() const { return array_type_ == ArrayType::DENSE; @@ -298,14 +298,15 @@ void ArraySchema::dump(FILE* out) const { fprintf(out, "- Cell order: %s\n", layout_str(cell_order_).c_str()); fprintf(out, "- Tile order: %s\n", layout_str(tile_order_).c_str()); fprintf(out, "- Capacity: %" PRIu64 "\n", capacity_); + fprintf(out, "- Allows duplicates: %s\n", (allows_dups_ ? "true" : "false")); + fprintf(out, "- Coordinates filters: %u", (unsigned)coords_filters_.size()); + coords_filters_.dump(out); fprintf( out, - "- Coordinates compressor: %s\n", - compressor_str(coords_compression()).c_str()); - fprintf( - out, - "- Coordinates compression level: %d\n\n", - coords_compression_level()); + "\n- Offsets filters: %u", + (unsigned)cell_var_offsets_filters_.size()); + cell_var_offsets_filters_.dump(out); + fprintf(out, "\n"); if (domain_ != nullptr) domain_->dump(out); @@ -378,7 +379,7 @@ Status ArraySchema::serialize(Buffer* buff) const { RETURN_NOT_OK(cell_var_offsets_filters_.serialize(buff)); // Write domain - domain_->serialize(buff); + RETURN_NOT_OK(domain_->serialize(buff, version_)); // Write attributes auto attribute_num = (uint32_t)attributes_.size(); @@ -498,7 +499,7 @@ Status ArraySchema::deserialize(ConstBuffer* buff) { // Load domain domain_ = new Domain(); - RETURN_NOT_OK(domain_->deserialize(buff)); + RETURN_NOT_OK(domain_->deserialize(buff, version_)); // Load attributes uint32_t attribute_num; @@ -509,6 +510,8 @@ Status ArraySchema::deserialize(ConstBuffer* buff) { attributes_.emplace_back(attr); attribute_map_[attr->name()] = attr; } + + // Create dimension map auto dim_num = domain()->dim_num(); for (unsigned d = 0; d < dim_num; ++d) { auto dim = dimension(d); @@ -587,15 +590,6 @@ Status ArraySchema::set_domain(Domain* domain) { delete domain_; domain_ = new Domain(domain); - // Potentially change the default coordinates compressor - if (domain_->all_dims_real() && - coords_compression() == Compressor::DOUBLE_DELTA) { - auto* filter = coords_filters_.get_filter(); - assert(filter != nullptr); - filter->set_compressor(constants::real_coords_compression); - filter->set_compression_level(-1); - } - // Create dimension map dim_map_.clear(); auto dim_num = domain_->dim_num(); @@ -633,16 +627,35 @@ bool ArraySchema::check_attribute_dimension_names() const { return (names.size() == attributes_.size() + dim_num); } -bool ArraySchema::check_double_delta_compressor() const { - // Check attributes - for (auto attr : attributes_) { - if ((attr->type() == Datatype::FLOAT32 || - attr->type() == Datatype::FLOAT64) && - attr->compressor() == Compressor::DOUBLE_DELTA) - return false; +Status ArraySchema::check_double_delta_compressor() const { + // Check if coordinate filters have DOUBLE DELTA as a compressor + bool has_double_delta = false; + for (size_t i = 0; i < coords_filters_.size(); ++i) { + if (coords_filters_.get_filter(i)->type() == + FilterType::FILTER_DOUBLE_DELTA) { + has_double_delta = true; + break; + } + } + + // Not applicable when DOUBLE DELTA no present in coord filters + if (!has_double_delta) + return Status::Ok(); + + // Error if any real dimension inherits the coord filters with DOUBLE DELTA. + // A dimension inherits the filters when it has no filters. + auto dim_num = domain_->dim_num(); + for (unsigned d = 0; d < dim_num; ++d) { + auto dim = domain_->dimension(d); + const auto& dim_filters = dim->filters(); + auto dim_type = dim->type(); + if (datatype_is_real(dim_type) && dim_filters.empty()) + return LOG_STATUS( + Status::ArraySchemaError("Real dimension cannot inherit coordinate " + "filters with DOUBLE DELTA compression")); } - return true; + return Status::Ok(); } void ArraySchema::clear() { diff --git a/tiledb/sm/array_schema/array_schema.h b/tiledb/sm/array_schema/array_schema.h index 72fb4bf39583..db02b31817ac 100644 --- a/tiledb/sm/array_schema/array_schema.h +++ b/tiledb/sm/array_schema/array_schema.h @@ -126,10 +126,8 @@ class ArraySchema { /** Returns the number of values per cell of the input attribute/dimension. */ unsigned int cell_val_num(const std::string& name) const; - /** - * Return a pointer to the pipeline used for offsets of variable-sized cells. - */ - const FilterPipeline* cell_var_offsets_filters() const; + /** Return the fileter pipeline used for offsets of variable-sized cells. */ + const FilterPipeline& cell_var_offsets_filters() const; /** * Checks the correctness of the array schema. @@ -151,16 +149,10 @@ class ArraySchema { * Return the filter pipeline for the given attribute/dimension (can be * TILEDB_COORDS). */ - const FilterPipeline* filters(const std::string& name) const; - - /** Return a pointer to the pipeline used for coordinates. */ - const FilterPipeline* coords_filters() const; - - /** Returns the compressor of the coordinates. */ - Compressor coords_compression() const; + const FilterPipeline& filters(const std::string& name) const; - /** Returns the compression level of the coordinates. */ - int coords_compression_level() const; + /** Return the pipeline used for coordinates. */ + const FilterPipeline& coords_filters() const; /** True if the array is dense. */ bool dense() const; @@ -354,10 +346,10 @@ class ArraySchema { bool check_attribute_dimension_names() const; /** - * Returns false if double delta compression is used with real attributes - * or coordinates and true otherwise. + * Returns error if double delta compression is used in the zipped + * coordinate filters and is inherited by a dimension. */ - bool check_double_delta_compressor() const; + Status check_double_delta_compressor() const; /** Clears all members. Use with caution! */ void clear(); diff --git a/tiledb/sm/array_schema/attribute.cc b/tiledb/sm/array_schema/attribute.cc index 93f365e57f56..4d95d50c2f39 100644 --- a/tiledb/sm/array_schema/attribute.cc +++ b/tiledb/sm/array_schema/attribute.cc @@ -34,6 +34,7 @@ #include "tiledb/sm/buffer/const_buffer.h" #include "tiledb/sm/enums/compressor.h" #include "tiledb/sm/enums/datatype.h" +#include "tiledb/sm/enums/filter_type.h" #include "tiledb/sm/filter/compression_filter.h" #include "tiledb/sm/misc/logger.h" #include "tiledb/sm/misc/utils.h" @@ -84,17 +85,6 @@ unsigned int Attribute::cell_val_num() const { return cell_val_num_; } -Compressor Attribute::compressor() const { - auto compressor = filters_.get_filter(); - return compressor == nullptr ? Compressor::NO_COMPRESSION : - compressor->compressor(); -} - -int Attribute::compression_level() const { - auto compressor = filters_.get_filter(); - return compressor == nullptr ? -1 : compressor->compression_level(); -} - // ===== FORMAT ===== // attribute_name_size (uint32_t) // attribute_name (string) @@ -129,17 +119,17 @@ void Attribute::dump(FILE* out) const { fprintf(out, "### Attribute ###\n"); fprintf(out, "- Name: %s\n", name_.c_str()); fprintf(out, "- Type: %s\n", datatype_str(type_).c_str()); - fprintf(out, "- Compressor: %s\n", compressor_str(compressor()).c_str()); - fprintf(out, "- Compression level: %d\n", compression_level()); - if (!var_size()) fprintf(out, "- Cell val num: %u\n", cell_val_num_); else fprintf(out, "- Cell val num: var\n"); + fprintf(out, "- Filters: %u", (unsigned)filters_.size()); + filters_.dump(out); + fprintf(out, "\n"); } -const FilterPipeline* Attribute::filters() const { - return &filters_; +const FilterPipeline& Attribute::filters() const { + return filters_; } const std::string& Attribute::name() const { @@ -182,25 +172,21 @@ Status Attribute::set_cell_val_num(unsigned int cell_val_num) { return Status::Ok(); } -void Attribute::set_compressor(Compressor compressor) { - auto filter = filters_.get_filter(); - if (filter == nullptr) - filters_.add_filter(CompressionFilter(compressor, -1)); - else - filter->set_compressor(compressor); -} +Status Attribute::set_filter_pipeline(const FilterPipeline* pipeline) { + if (pipeline == nullptr) + return LOG_STATUS(Status::AttributeError( + "Cannot set filter pipeline to attribute; Pipeline cannot be null")); -void Attribute::set_compression_level(int compression_level) { - auto filter = filters_.get_filter(); - if (filter == nullptr) - filters_.add_filter( - CompressionFilter(Compressor::NO_COMPRESSION, compression_level)); - else - filter->set_compression_level(compression_level); -} + for (unsigned i = 0; i < pipeline->size(); ++i) { + if (datatype_is_real(type_) && + pipeline->get_filter(i)->type() == FilterType::FILTER_DOUBLE_DELTA) + return LOG_STATUS( + Status::AttributeError("Cannot set DOUBLE DELTA filter to a " + "dimension with a real datatype")); + } -Status Attribute::set_filter_pipeline(const FilterPipeline* pipeline) { filters_ = *pipeline; + return Status::Ok(); } diff --git a/tiledb/sm/array_schema/attribute.h b/tiledb/sm/array_schema/attribute.h index e9665b2da8a7..bb45552c3c64 100644 --- a/tiledb/sm/array_schema/attribute.h +++ b/tiledb/sm/array_schema/attribute.h @@ -89,12 +89,6 @@ class Attribute { /** Returns the number of values per cell. */ unsigned int cell_val_num() const; - /** Returns the compressor. */ - Compressor compressor() const; - - /** Returns the compression level. */ - int compression_level() const; - /** * Populates the object members from the data in the input binary buffer. * @@ -107,7 +101,7 @@ class Attribute { void dump(FILE* out) const; /** Returns the filter pipeline of this attribute. */ - const FilterPipeline* filters() const; + const FilterPipeline& filters() const; /** Returns the attribute name. */ const std::string& name() const; @@ -129,12 +123,6 @@ class Attribute { */ Status set_cell_val_num(unsigned int cell_val_num); - /** Sets the attribute compressor. */ - void set_compressor(Compressor compressor); - - /** Sets the attribute compression level. */ - void set_compression_level(int compression_level); - /** Sets the filter pipeline for this attribute. */ Status set_filter_pipeline(const FilterPipeline* pipeline); @@ -156,7 +144,7 @@ class Attribute { /* ********************************* */ /** The attribute number of values per cell. */ - unsigned int cell_val_num_; + unsigned cell_val_num_; /** The attribute filter pipeline. */ FilterPipeline filters_; diff --git a/tiledb/sm/array_schema/dimension.cc b/tiledb/sm/array_schema/dimension.cc index 9562c95fb6c8..5a55ce1788e6 100644 --- a/tiledb/sm/array_schema/dimension.cc +++ b/tiledb/sm/array_schema/dimension.cc @@ -34,6 +34,7 @@ #include "tiledb/sm/buffer/buffer.h" #include "tiledb/sm/buffer/const_buffer.h" #include "tiledb/sm/enums/datatype.h" +#include "tiledb/sm/enums/filter_type.h" #include "tiledb/sm/misc/utils.h" #include @@ -53,6 +54,7 @@ Dimension::Dimension() Dimension::Dimension(const std::string& name, Datatype type) : name_(name) , type_(type) { + cell_val_num_ = 1; set_ceil_to_tile_func(); set_check_range_func(); set_coincides_with_tiles_func(); @@ -75,6 +77,8 @@ Dimension::Dimension(const std::string& name, Datatype type) Dimension::Dimension(const Dimension* dim) { assert(dim != nullptr); + cell_val_num_ = dim->cell_val_num_; + filters_ = dim->filters_; name_ = dim->name(); type_ = dim->type_; @@ -106,8 +110,18 @@ Dimension::Dimension(const Dimension* dim) { /* ********************************* */ unsigned int Dimension::cell_val_num() const { - // TODO: in a future PR the user will be able to set this value - return 1; + return cell_val_num_; +} + +Status Dimension::set_cell_val_num(unsigned int cell_val_num) { + if (cell_val_num != 1) + return LOG_STATUS(Status::DimensionError( + "Cannot set number of values per coordinate; Currently only one value " + "per coordinate is supported")); + + cell_val_num_ = cell_val_num; + + return Status::Ok(); } uint64_t Dimension::coord_size() const { @@ -174,19 +188,36 @@ std::string Dimension::coord_to_str(const void* coord) const { // ===== FORMAT ===== // dimension_name_size (uint32_t) // dimension_name (string) +// type (uint8_t) +// cell_val_num (uint32_t) +// filter_pipeline (see FilterPipeline::serialize) // domain (void* - 2*type_size) // null_tile_extent (uint8_t) // tile_extent (void* - type_size) -Status Dimension::deserialize(ConstBuffer* buff, Datatype type) { - // Set type - type_ = type; - +Status Dimension::deserialize( + ConstBuffer* buff, uint32_t version, Datatype type) { // Load dimension name uint32_t dimension_name_size; RETURN_NOT_OK(buff->read(&dimension_name_size, sizeof(uint32_t))); name_.resize(dimension_name_size); RETURN_NOT_OK(buff->read(&name_[0], dimension_name_size)); + // Applicable only to version >= 5 + if (version >= 5) { + // Load type + uint8_t type; + RETURN_NOT_OK(buff->read(&type, sizeof(uint8_t))); + type_ = (Datatype)type; + + // Load cell_val_num_ + RETURN_NOT_OK(buff->read(&cell_val_num_, sizeof(uint32_t))); + + // Load filter pipeline + RETURN_NOT_OK(filters_.deserialize(buff)); + } else { + type_ = type; + } + // Load domain uint64_t domain_size = 2 * coord_size(); std::vector tmp(domain_size); @@ -239,12 +270,13 @@ void Dimension::dump(FILE* out) const { fprintf(out, "- Name: %s\n", name_.c_str()); fprintf(out, "- Domain: %s\n", domain_s.c_str()); fprintf(out, "- Tile extent: %s\n", tile_extent_s.c_str()); + fprintf(out, "- Filters: %u", (unsigned)filters_.size()); + filters_.dump(out); + fprintf(out, "\n"); } -const FilterPipeline* Dimension::filters() const { - // TODO: in a future PR, the user will be able to set separate - // TODO: filters for each dimension - return nullptr; +const FilterPipeline& Dimension::filters() const { + return filters_; } const std::string& Dimension::name() const { @@ -606,10 +638,13 @@ bool Dimension::value_in_range(const void* value, const Range& range) const { // ===== FORMAT ===== // dimension_name_size (uint32_t) // dimension_name (string) +// type (uint8_t) +// cell_val_num (uint32_t) +// filter_pipeline (see FilterPipeline::serialize) // domain (void* - 2*type_size) // null_tile_extent (uint8_t) // tile_extent (void* - type_size) -Status Dimension::serialize(Buffer* buff) { +Status Dimension::serialize(Buffer* buff, uint32_t version) { // Sanity check if (domain_.empty()) { return LOG_STATUS( @@ -621,6 +656,19 @@ Status Dimension::serialize(Buffer* buff) { RETURN_NOT_OK(buff->write(&dimension_name_size, sizeof(uint32_t))); RETURN_NOT_OK(buff->write(name_.c_str(), dimension_name_size)); + // Applicable only to version >= 5 + if (version >= 5) { + // Write type + auto type = (uint8_t)type_; + RETURN_NOT_OK(buff->write(&type, sizeof(uint8_t))); + + // Write cell_val_num_ + RETURN_NOT_OK(buff->write(&cell_val_num_, sizeof(uint32_t))); + + // Write filter pipeline + RETURN_NOT_OK(filters_.serialize(buff)); + } + // Write domain and tile extent uint64_t domain_size = 2 * coord_size(); RETURN_NOT_OK(buff->write(domain_.data(), domain_size)); @@ -649,6 +697,24 @@ Status Dimension::set_domain(const Range& domain) { return Status::Ok(); } +Status Dimension::set_filter_pipeline(const FilterPipeline* pipeline) { + if (pipeline == nullptr) + return LOG_STATUS(Status::DimensionError( + "Cannot set filter pipeline to dimension; Pipeline cannot be null")); + + for (unsigned i = 0; i < pipeline->size(); ++i) { + if (datatype_is_real(type_) && + pipeline->get_filter(i)->type() == FilterType::FILTER_DOUBLE_DELTA) + return LOG_STATUS( + Status::DimensionError("Cannot set DOUBLE DELTA filter to a " + "dimension with a real datatype")); + } + + filters_ = *pipeline; + + return Status::Ok(); +} + Status Dimension::set_tile_extent(const void* tile_extent) { ByteVecValue te; if (tile_extent != nullptr) { @@ -768,8 +834,7 @@ Datatype Dimension::type() const { } bool Dimension::var_size() const { - // TODO: to fix when adding var-sized support to dimensions - return false; + return cell_val_num_ == constants::var_num; } /* ********************************* */ diff --git a/tiledb/sm/array_schema/dimension.h b/tiledb/sm/array_schema/dimension.h index 12afcf21fe68..cdd0517af3db 100644 --- a/tiledb/sm/array_schema/dimension.h +++ b/tiledb/sm/array_schema/dimension.h @@ -83,8 +83,11 @@ class Dimension { /* API */ /* ********************************* */ - /** Returns the number of values per cell. */ - unsigned int cell_val_num() const; + /** Returns the number of values per coordinate. */ + unsigned cell_val_num() const; + + /** Sets the number of values per coordinate. */ + Status set_cell_val_num(unsigned int cell_val_num); /** Returns the size (in bytes) of a coordinate in this dimension. */ uint64_t coord_size() const; @@ -97,9 +100,10 @@ class Dimension { * * @param buff The buffer to deserialize from. * @param type The type of the dimension. + * @param version The array schema version. * @return Status */ - Status deserialize(ConstBuffer* buff, Datatype type); + Status deserialize(ConstBuffer* buff, uint32_t version, Datatype type); /** Returns the domain. */ const Range& domain() const; @@ -108,7 +112,7 @@ class Dimension { void dump(FILE* out) const; /** Returns the filter pipeline of this dimension. */ - const FilterPipeline* filters() const; + const FilterPipeline& filters() const; /** Returns the dimension name. */ const std::string& name() const; @@ -372,9 +376,10 @@ class Dimension { * Serializes the object members into a binary buffer. * * @param buff The buffer to serialize the data into. + * @param version The array schema version * @return Status */ - Status serialize(Buffer* buff); + Status serialize(Buffer* buff, uint32_t version); /** Sets the domain. */ Status set_domain(const void* domain); @@ -382,6 +387,9 @@ class Dimension { /** Sets the domain. */ Status set_domain(const Range& domain); + /** Sets the filter pipeline for this dimension. */ + Status set_filter_pipeline(const FilterPipeline* pipeline); + /** Sets the tile extent. */ Status set_tile_extent(const void* tile_extent); @@ -421,9 +429,15 @@ class Dimension { /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** The number of values per coordinate. */ + unsigned cell_val_num_; + /** The dimension domain. */ Range domain_; + /** The dimension filter pipeline. */ + FilterPipeline filters_; + /** The dimension name. */ std::string name_; diff --git a/tiledb/sm/array_schema/domain.cc b/tiledb/sm/array_schema/domain.cc index 66fc78c390a0..50b1710fa1dd 100644 --- a/tiledb/sm/array_schema/domain.cc +++ b/tiledb/sm/array_schema/domain.cc @@ -105,12 +105,7 @@ Status Domain::add_dimension(const Dimension* dim) { Status::DomainError("Cannot add dimension to domain; All added " "dimensions must have the same type")); - auto new_dim = new Dimension(dim->name(), type_); - RETURN_NOT_OK_ELSE(new_dim->set_domain(dim->domain()), delete new_dim); - RETURN_NOT_OK_ELSE( - new_dim->set_tile_extent(dim->tile_extent()), delete new_dim); - - dimensions_.emplace_back(new_dim); + dimensions_.emplace_back(new Dimension(dim)); ++dim_num_; return Status::Ok(); @@ -212,7 +207,7 @@ void Domain::crop_ndrange(NDRange* ndrange) const { // dimension #1 // dimension #2 // ... -Status Domain::deserialize(ConstBuffer* buff) { +Status Domain::deserialize(ConstBuffer* buff, uint32_t version) { // Load type uint8_t type; RETURN_NOT_OK(buff->read(&type, sizeof(uint8_t))); @@ -222,7 +217,7 @@ Status Domain::deserialize(ConstBuffer* buff) { RETURN_NOT_OK(buff->read(&dim_num_, sizeof(uint32_t))); for (uint32_t i = 0; i < dim_num_; ++i) { auto dim = new Dimension(); - dim->deserialize(buff, type_); + dim->deserialize(buff, version, type_); dimensions_.emplace_back(dim); } @@ -267,8 +262,6 @@ const Dimension* Domain::dimension(std::string name) const { void Domain::dump(FILE* out) const { if (out == nullptr) out = stdout; - fprintf(out, "=== Domain ===\n"); - fprintf(out, "- Dimensions type: %s\n", datatype_str(type_).c_str()); for (auto& dim : dimensions_) { fprintf(out, "\n"); @@ -468,7 +461,7 @@ bool Domain::null_tile_extents() const { // dimension #1 // dimension #2 // ... -Status Domain::serialize(Buffer* buff) { +Status Domain::serialize(Buffer* buff, uint32_t version) { // Write type auto type = static_cast(type_); RETURN_NOT_OK(buff->write(&type, sizeof(uint8_t))); @@ -476,7 +469,7 @@ Status Domain::serialize(Buffer* buff) { // Write dimensions RETURN_NOT_OK(buff->write(&dim_num_, sizeof(uint32_t))); for (auto dim : dimensions_) - dim->serialize(buff); + dim->serialize(buff, version); return Status::Ok(); } diff --git a/tiledb/sm/array_schema/domain.h b/tiledb/sm/array_schema/domain.h index 3befdc6ba7f6..fa6c5add9ce1 100644 --- a/tiledb/sm/array_schema/domain.h +++ b/tiledb/sm/array_schema/domain.h @@ -153,9 +153,10 @@ class Domain { * Populates the object members from the data in the input binary buffer. * * @param buff The buffer to deserialize from. + * @param version The array schema version. * @return Status */ - Status deserialize(ConstBuffer* buff); + Status deserialize(ConstBuffer* buff, uint32_t version); /** Returns the cell order. */ Layout cell_order() const; @@ -392,9 +393,10 @@ class Domain { * Serializes the object members into a binary buffer. * * @param buff The buffer to serialize the data into. + * @param version The array schema version. * @return Status */ - Status serialize(Buffer* buff); + Status serialize(Buffer* buff, uint32_t version); /** * For every dimension that has a null tile extent, it sets diff --git a/tiledb/sm/c_api/tiledb.cc b/tiledb/sm/c_api/tiledb.cc index ac7f50ec8f35..dc11dcf24104 100644 --- a/tiledb/sm/c_api/tiledb.cc +++ b/tiledb/sm/c_api/tiledb.cc @@ -1536,7 +1536,7 @@ int32_t tiledb_attribute_get_filter_list( // Create a new FilterPipeline object (*filter_list)->pipeline_ = - new (std::nothrow) tiledb::sm::FilterPipeline(*attr->attr_->filters()); + new (std::nothrow) tiledb::sm::FilterPipeline(attr->attr_->filters()); if ((*filter_list)->pipeline_ == nullptr) { delete *filter_list; auto st = tiledb::sm::Status::Error( @@ -1730,6 +1730,70 @@ void tiledb_dimension_free(tiledb_dimension_t** dim) { } } +int32_t tiledb_dimension_set_filter_list( + tiledb_ctx_t* ctx, + tiledb_dimension_t* dim, + tiledb_filter_list_t* filter_list) { + if (sanity_check(ctx) == TILEDB_ERR || sanity_check(ctx, dim) == TILEDB_ERR || + sanity_check(ctx, filter_list) == TILEDB_ERR) + return TILEDB_ERR; + + if (SAVE_ERROR_CATCH( + ctx, dim->dim_->set_filter_pipeline(filter_list->pipeline_))) + return TILEDB_ERR; + + return TILEDB_OK; +} + +int32_t tiledb_dimension_set_cell_val_num( + tiledb_ctx_t* ctx, tiledb_dimension_t* dim, uint32_t cell_val_num) { + if (sanity_check(ctx) == TILEDB_ERR || sanity_check(ctx, dim) == TILEDB_ERR) + return TILEDB_ERR; + if (SAVE_ERROR_CATCH(ctx, dim->dim_->set_cell_val_num(cell_val_num))) + return TILEDB_ERR; + return TILEDB_OK; +} + +int32_t tiledb_dimension_get_filter_list( + tiledb_ctx_t* ctx, + tiledb_dimension_t* dim, + tiledb_filter_list_t** filter_list) { + if (sanity_check(ctx) == TILEDB_ERR || sanity_check(ctx, dim) == TILEDB_ERR) + return TILEDB_ERR; + + // Create a filter list struct + *filter_list = new (std::nothrow) tiledb_filter_list_t; + if (*filter_list == nullptr) { + auto st = tiledb::sm::Status::Error( + "Failed to allocate TileDB filter list object"); + LOG_STATUS(st); + save_error(ctx, st); + return TILEDB_OOM; + } + + // Create a new FilterPipeline object + (*filter_list)->pipeline_ = + new (std::nothrow) tiledb::sm::FilterPipeline(dim->dim_->filters()); + if ((*filter_list)->pipeline_ == nullptr) { + delete *filter_list; + auto st = tiledb::sm::Status::Error( + "Failed to allocate TileDB filter list object"); + LOG_STATUS(st); + save_error(ctx, st); + return TILEDB_OOM; + } + + return TILEDB_OK; +} + +int32_t tiledb_dimension_get_cell_val_num( + tiledb_ctx_t* ctx, const tiledb_dimension_t* dim, uint32_t* cell_val_num) { + if (sanity_check(ctx) == TILEDB_ERR || sanity_check(ctx, dim) == TILEDB_ERR) + return TILEDB_ERR; + *cell_val_num = dim->dim_->cell_val_num(); + return TILEDB_OK; +} + int32_t tiledb_dimension_get_name( tiledb_ctx_t* ctx, const tiledb_dimension_t* dim, const char** name) { if (sanity_check(ctx) == TILEDB_ERR || sanity_check(ctx, dim) == TILEDB_ERR) @@ -2186,8 +2250,8 @@ int32_t tiledb_array_schema_get_coords_filter_list( } // Create a new FilterPipeline object - (*filter_list)->pipeline_ = new (std::nothrow) tiledb::sm::FilterPipeline( - *array_schema->array_schema_->coords_filters()); + (*filter_list)->pipeline_ = new (std::nothrow) + tiledb::sm::FilterPipeline(array_schema->array_schema_->coords_filters()); if ((*filter_list)->pipeline_ == nullptr) { delete *filter_list; auto st = tiledb::sm::Status::Error( @@ -2220,7 +2284,7 @@ int32_t tiledb_array_schema_get_offsets_filter_list( // Create a new FilterPipeline object (*filter_list)->pipeline_ = new (std::nothrow) tiledb::sm::FilterPipeline( - *array_schema->array_schema_->cell_var_offsets_filters()); + array_schema->array_schema_->cell_var_offsets_filters()); if ((*filter_list)->pipeline_ == nullptr) { delete *filter_list; auto st = tiledb::sm::Status::Error( diff --git a/tiledb/sm/c_api/tiledb.h b/tiledb/sm/c_api/tiledb.h index 0939d624db9e..3a5af37079e9 100644 --- a/tiledb/sm/c_api/tiledb.h +++ b/tiledb/sm/c_api/tiledb.h @@ -2237,6 +2237,94 @@ TILEDB_EXPORT int32_t tiledb_dimension_alloc( */ TILEDB_EXPORT void tiledb_dimension_free(tiledb_dimension_t** dim); +/** + * Sets the filter list for a dimension. + * + * **Example:** + * + * @code{.c} + * tiledb_filter_list_t* filter_list; + * tiledb_filter_list_alloc(ctx, &filter_list); + * tiledb_filter_list_add_filter(ctx, filter_list, filter); + * tiledb_dimension_set_filter_list(ctx, dim, filter_list); + * @endcode + * + * @param ctx The TileDB context. + * @param dim The target dimension. + * @param filter_list The filter_list to be set. + * @return `TILEDB_OK` for success and `TILEDB_ERR` for error. + */ +TILEDB_EXPORT int32_t tiledb_dimension_set_filter_list( + tiledb_ctx_t* ctx, + tiledb_dimension_t* dim, + tiledb_filter_list_t* filter_list); + +/** + * Sets the number of values per cell for a dimension. If this is not + * used, the default is `1`. + * + * **Examples:** + * + * For a fixed-sized dimension: + * + * @code{.c} + * tiledb_dimension_set_cell_val_num(ctx, dim, 3); + * @endcode + * + * For a variable-sized dimension: + * + * @code{.c} + * tiledb_dimension_set_cell_val_num(ctx, dim, TILEDB_VAR_NUM); + * @endcode + * + * @param ctx The TileDB context. + * @param dim The target dimension. + * @param cell_val_num The number of values per cell. + * @return `TILEDB_OK` for success and `TILEDB_ERR` for error. + */ +TILEDB_EXPORT int32_t tiledb_dimension_set_cell_val_num( + tiledb_ctx_t* ctx, tiledb_dimension_t* dim, uint32_t cell_val_num); + +/** + * Retrieves the filter list for a dimension. + * + * **Example:** + * + * @code{.c} + * tiledb_filter_list_t* filter_list; + * tiledb_dimension_get_filter_list(ctx, dim, &filter_list); + * tiledb_filter_list_free(&filter_list); + * @endcode + * + * @param ctx The TileDB context. + * @param dim The target dimension. + * @param filter_list The filter list to be retrieved. + * @return `TILEDB_OK` for success and `TILEDB_ERR` for error. + */ +TILEDB_EXPORT int32_t tiledb_dimension_get_filter_list( + tiledb_ctx_t* ctx, + tiledb_dimension_t* dim, + tiledb_filter_list_t** filter_list); + +/** + * Retrieves the number of values per cell for a dimension. For variable-sized + * dimensions the result is TILEDB_VAR_NUM. + * + * **Example:** + * + * @code{.c} + * uint32_t num; + * tiledb_dimension_get_cell_val_num(ctx, dim, &num); + * @endcode + * + * @param ctx The TileDB context. + * @param dim The dimension. + * @param cell_val_num The number of values per cell to be retrieved. + * @return `TILEDB_OK` for success and `TILEDB_ERR` for error. + */ +TILEDB_EXPORT int32_t tiledb_dimension_get_cell_val_num( + tiledb_ctx_t* ctx, const tiledb_dimension_t* dim, uint32_t* cell_val_num); + /** * Retrieves the dimension name. * diff --git a/tiledb/sm/cpp_api/dimension.h b/tiledb/sm/cpp_api/dimension.h index b702fcdf85e7..f32216eed1d4 100644 --- a/tiledb/sm/cpp_api/dimension.h +++ b/tiledb/sm/cpp_api/dimension.h @@ -81,6 +81,50 @@ class Dimension { /* API */ /* ********************************* */ + /** + * Returns number of values of one cell on this dimension. For variable-sized + * dimensions returns TILEDB_VAR_NUM. + */ + unsigned cell_val_num() const { + auto& ctx = ctx_.get(); + unsigned num; + ctx.handle_error( + tiledb_dimension_get_cell_val_num(ctx.ptr().get(), dim_.get(), &num)); + return num; + } + + /** Sets the number of values per coordinate. */ + Dimension& set_cell_val_num(unsigned num) { + auto& ctx = ctx_.get(); + ctx.handle_error( + tiledb_dimension_set_cell_val_num(ctx.ptr().get(), dim_.get(), num)); + return *this; + } + + /** + * Returns a copy of the FilterList of the dimemnsion. + * To change the filter list, use `set_filter_list()`. + */ + FilterList filter_list() const { + auto& ctx = ctx_.get(); + tiledb_filter_list_t* filter_list; + ctx.handle_error(tiledb_dimension_get_filter_list( + ctx.ptr().get(), dim_.get(), &filter_list)); + return FilterList(ctx, filter_list); + } + + /** + * Sets the dimension filter list, which is an ordered list of filters that + * will be used to process and/or transform the coordinate data (such as + * compression). + */ + Dimension& set_filter_list(const FilterList& filter_list) { + auto& ctx = ctx_.get(); + ctx.handle_error(tiledb_dimension_set_filter_list( + ctx.ptr().get(), dim_.get(), filter_list.ptr().get())); + return *this; + } + /** Returns the name of the dimension. */ const std::string name() const { const char* name; diff --git a/tiledb/sm/filter/bit_width_reduction_filter.cc b/tiledb/sm/filter/bit_width_reduction_filter.cc index a636d6d20289..086a443d0b03 100644 --- a/tiledb/sm/filter/bit_width_reduction_filter.cc +++ b/tiledb/sm/filter/bit_width_reduction_filter.cc @@ -84,6 +84,12 @@ BitWidthReductionFilter::BitWidthReductionFilter() max_window_size_ = 256; } +void BitWidthReductionFilter::dump(FILE* out) const { + if (out == nullptr) + out = stdout; + fprintf(out, "BitWidthReduction: BIT_WIDTH_MAX_WINDOW=%u", max_window_size_); +} + Status BitWidthReductionFilter::run_forward( FilterBuffer* input_metadata, FilterBuffer* input, diff --git a/tiledb/sm/filter/bit_width_reduction_filter.h b/tiledb/sm/filter/bit_width_reduction_filter.h index ab6016a6c251..229ae1c698e7 100644 --- a/tiledb/sm/filter/bit_width_reduction_filter.h +++ b/tiledb/sm/filter/bit_width_reduction_filter.h @@ -83,6 +83,9 @@ class BitWidthReductionFilter : public Filter { /** Constructor. */ BitWidthReductionFilter(); + /** Dumps the filter details in ASCII format in the selected output. */ + void dump(FILE* out) const override; + /** Return the max window size used by the filter. */ uint32_t max_window_size() const; diff --git a/tiledb/sm/filter/bitshuffle_filter.cc b/tiledb/sm/filter/bitshuffle_filter.cc index 975f483763a6..5c0b5522217d 100644 --- a/tiledb/sm/filter/bitshuffle_filter.cc +++ b/tiledb/sm/filter/bitshuffle_filter.cc @@ -49,6 +49,13 @@ BitshuffleFilter* BitshuffleFilter::clone_impl() const { return new BitshuffleFilter; } +void BitshuffleFilter::dump(FILE* out) const { + if (out == nullptr) + out = stdout; + + fprintf(out, "BitShuffle"); +} + Status BitshuffleFilter::run_forward( FilterBuffer* input_metadata, FilterBuffer* input, diff --git a/tiledb/sm/filter/bitshuffle_filter.h b/tiledb/sm/filter/bitshuffle_filter.h index 89b12daedc3f..5e98d9ff7990 100644 --- a/tiledb/sm/filter/bitshuffle_filter.h +++ b/tiledb/sm/filter/bitshuffle_filter.h @@ -77,6 +77,9 @@ class BitshuffleFilter : public Filter { */ BitshuffleFilter(); + /** Dumps the filter details in ASCII format in the selected output. */ + void dump(FILE* out) const override; + /** * Shuffle the bits of the input data into the output data buffer. */ diff --git a/tiledb/sm/filter/byteshuffle_filter.cc b/tiledb/sm/filter/byteshuffle_filter.cc index 25b4ba4e2971..cbc58d62f8ba 100644 --- a/tiledb/sm/filter/byteshuffle_filter.cc +++ b/tiledb/sm/filter/byteshuffle_filter.cc @@ -49,6 +49,13 @@ ByteshuffleFilter* ByteshuffleFilter::clone_impl() const { return new ByteshuffleFilter; } +void ByteshuffleFilter::dump(FILE* out) const { + if (out == nullptr) + out = stdout; + + fprintf(out, "ByteShuffle"); +} + Status ByteshuffleFilter::run_forward( FilterBuffer* input_metadata, FilterBuffer* input, diff --git a/tiledb/sm/filter/byteshuffle_filter.h b/tiledb/sm/filter/byteshuffle_filter.h index 778dee5e0f84..3ef35fbe7cab 100644 --- a/tiledb/sm/filter/byteshuffle_filter.h +++ b/tiledb/sm/filter/byteshuffle_filter.h @@ -69,6 +69,9 @@ class ByteshuffleFilter : public Filter { */ ByteshuffleFilter(); + /** Dumps the filter details in ASCII format in the selected output. */ + void dump(FILE* out) const override; + /** * Shuffle the bytes of the input data into the output data buffer. */ diff --git a/tiledb/sm/filter/checksum_md5_filter.cc b/tiledb/sm/filter/checksum_md5_filter.cc index 13eeb4308b26..2ceb06f3cc13 100644 --- a/tiledb/sm/filter/checksum_md5_filter.cc +++ b/tiledb/sm/filter/checksum_md5_filter.cc @@ -50,6 +50,13 @@ ChecksumMD5Filter* ChecksumMD5Filter::clone_impl() const { return new ChecksumMD5Filter; } +void ChecksumMD5Filter::dump(FILE* out) const { + if (out == nullptr) + out = stdout; + + fprintf(out, "ChecksumMD5"); +} + Status ChecksumMD5Filter::run_forward( FilterBuffer* input_metadata, FilterBuffer* input, diff --git a/tiledb/sm/filter/checksum_md5_filter.h b/tiledb/sm/filter/checksum_md5_filter.h index b0149552ce50..dd293a51d077 100644 --- a/tiledb/sm/filter/checksum_md5_filter.h +++ b/tiledb/sm/filter/checksum_md5_filter.h @@ -75,6 +75,9 @@ class ChecksumMD5Filter : public Filter { */ ChecksumMD5Filter(); + /** Dumps the filter details in ASCII format in the selected output. */ + void dump(FILE* out) const override; + /** * Encrypt the bytes of the input data into the output data buffer. */ diff --git a/tiledb/sm/filter/checksum_sha256_filter.cc b/tiledb/sm/filter/checksum_sha256_filter.cc index 5e34a50e8191..dc38a8ebc3b8 100644 --- a/tiledb/sm/filter/checksum_sha256_filter.cc +++ b/tiledb/sm/filter/checksum_sha256_filter.cc @@ -50,6 +50,13 @@ ChecksumSHA256Filter* ChecksumSHA256Filter::clone_impl() const { return new ChecksumSHA256Filter; } +void ChecksumSHA256Filter::dump(FILE* out) const { + if (out == nullptr) + out = stdout; + + fprintf(out, "ChecksumSHA256"); +} + Status ChecksumSHA256Filter::run_forward( FilterBuffer* input_metadata, FilterBuffer* input, diff --git a/tiledb/sm/filter/checksum_sha256_filter.h b/tiledb/sm/filter/checksum_sha256_filter.h index 57b88274579a..d351292b7777 100644 --- a/tiledb/sm/filter/checksum_sha256_filter.h +++ b/tiledb/sm/filter/checksum_sha256_filter.h @@ -75,6 +75,9 @@ class ChecksumSHA256Filter : public Filter { */ ChecksumSHA256Filter(); + /** Dumps the filter details in ASCII format in the selected output. */ + void dump(FILE* out) const override; + /** * Encrypt the bytes of the input data into the output data buffer. */ diff --git a/tiledb/sm/filter/compression_filter.cc b/tiledb/sm/filter/compression_filter.cc index 2f40679c077d..4d40c3cfc4f7 100644 --- a/tiledb/sm/filter/compression_filter.cc +++ b/tiledb/sm/filter/compression_filter.cc @@ -70,6 +70,40 @@ int CompressionFilter::compression_level() const { return level_; } +void CompressionFilter::dump(FILE* out) const { + if (out == nullptr) + out = stdout; + + std::string compressor_str; + switch (compressor_) { + case Compressor::NO_COMPRESSION: + compressor_str = "NO_COMPRESSION"; + break; + case Compressor::GZIP: + compressor_str = "GZIP"; + break; + case Compressor::ZSTD: + compressor_str = "ZSTD"; + break; + case Compressor::LZ4: + compressor_str = "LZ4"; + break; + case Compressor::RLE: + compressor_str = "RLE"; + break; + case Compressor::BZIP2: + compressor_str = "BZIP2"; + break; + case Compressor::DOUBLE_DELTA: + compressor_str = "DOUBLE_DELTA"; + break; + default: + compressor_str = "NO_COMPRESSION"; + } + + fprintf(out, "%s: COMPRESSION_LEVEL=%i", compressor_str.c_str(), level_); +} + CompressionFilter* CompressionFilter::clone_impl() const { return new CompressionFilter(compressor_, level_); } diff --git a/tiledb/sm/filter/compression_filter.h b/tiledb/sm/filter/compression_filter.h index 16678e3b5e86..bf1e5862be4b 100644 --- a/tiledb/sm/filter/compression_filter.h +++ b/tiledb/sm/filter/compression_filter.h @@ -95,6 +95,9 @@ class CompressionFilter : public Filter { /** Return the compression level used by this filter instance. */ int compression_level() const; + /** Dumps the filter details in ASCII format in the selected output. */ + void dump(FILE* out) const override; + /** * Compress the given input into the given output. */ diff --git a/tiledb/sm/filter/encryption_aes256gcm_filter.cc b/tiledb/sm/filter/encryption_aes256gcm_filter.cc index fb1997110b16..085b515f7365 100644 --- a/tiledb/sm/filter/encryption_aes256gcm_filter.cc +++ b/tiledb/sm/filter/encryption_aes256gcm_filter.cc @@ -61,6 +61,13 @@ EncryptionAES256GCMFilter* EncryptionAES256GCMFilter::clone_impl() const { return clone; } +void EncryptionAES256GCMFilter::dump(FILE* out) const { + if (out == nullptr) + out = stdout; + + fprintf(out, "EncryptionAES256GCM"); +} + Status EncryptionAES256GCMFilter::run_forward( FilterBuffer* input_metadata, FilterBuffer* input, diff --git a/tiledb/sm/filter/encryption_aes256gcm_filter.h b/tiledb/sm/filter/encryption_aes256gcm_filter.h index ba12caf9c398..3eb0206191cb 100644 --- a/tiledb/sm/filter/encryption_aes256gcm_filter.h +++ b/tiledb/sm/filter/encryption_aes256gcm_filter.h @@ -88,6 +88,9 @@ class EncryptionAES256GCMFilter : public Filter { */ explicit EncryptionAES256GCMFilter(const EncryptionKey& key); + /** Dumps the filter details in ASCII format in the selected output. */ + void dump(FILE* out) const override; + /** * Encrypt the bytes of the input data into the output data buffer. */ diff --git a/tiledb/sm/filter/filter.h b/tiledb/sm/filter/filter.h index b30227e42aee..8f26414eec8b 100644 --- a/tiledb/sm/filter/filter.h +++ b/tiledb/sm/filter/filter.h @@ -67,6 +67,9 @@ class Filter { */ Filter* clone() const; + /** Dumps the filter details in ASCII format in the selected output. */ + virtual void dump(FILE* out) const = 0; + /** * Factory method to create a new Filter instance of the given type. * diff --git a/tiledb/sm/filter/filter_pipeline.cc b/tiledb/sm/filter/filter_pipeline.cc index a226bed721c5..5a89468fda25 100644 --- a/tiledb/sm/filter/filter_pipeline.cc +++ b/tiledb/sm/filter/filter_pipeline.cc @@ -488,6 +488,16 @@ Status FilterPipeline::deserialize(ConstBuffer* buff) { return Status::Ok(); } +void FilterPipeline::dump(FILE* out) const { + if (out == nullptr) + out = stdout; + + for (const auto& filter : filters_) { + fprintf(out, "\n > "); + filter->dump(out); + } +} + void FilterPipeline::set_max_chunk_size(uint32_t max_chunk_size) { max_chunk_size_ = max_chunk_size; } @@ -496,6 +506,10 @@ unsigned FilterPipeline::size() const { return static_cast(filters_.size()); } +bool FilterPipeline::empty() const { + return filters_.empty(); +} + void FilterPipeline::swap(FilterPipeline& other) { filters_.swap(other.filters_); diff --git a/tiledb/sm/filter/filter_pipeline.h b/tiledb/sm/filter/filter_pipeline.h index f695839c85e1..3f5a67cdaa18 100644 --- a/tiledb/sm/filter/filter_pipeline.h +++ b/tiledb/sm/filter/filter_pipeline.h @@ -95,6 +95,12 @@ class FilterPipeline { */ Status deserialize(ConstBuffer* buff); + /** + * Dumps the filter pipeline details in ASCII format in the selected + * output. + */ + void dump(FILE* out) const; + /** * Returns pointer to the first instance of a filter in the pipeline with the * given filter subclass type. @@ -219,6 +225,9 @@ class FilterPipeline { /** Returns the number of filters in the pipeline. */ unsigned size() const; + /** Returns true if the pipeline is empty. */ + bool empty() const; + /** Swaps the contents of this pipeline with the given pipeline. */ void swap(FilterPipeline& other); diff --git a/tiledb/sm/filter/noop_filter.cc b/tiledb/sm/filter/noop_filter.cc index 556135f26fb0..8afd361b88af 100644 --- a/tiledb/sm/filter/noop_filter.cc +++ b/tiledb/sm/filter/noop_filter.cc @@ -46,6 +46,13 @@ NoopFilter* NoopFilter::clone_impl() const { return new NoopFilter; } +void NoopFilter::dump(FILE* out) const { + if (out == nullptr) + out = stdout; + + fprintf(out, "NoOp"); +} + Status NoopFilter::run_forward( FilterBuffer* input_metadata, FilterBuffer* input, diff --git a/tiledb/sm/filter/noop_filter.h b/tiledb/sm/filter/noop_filter.h index 9cb96fc336ad..896a4e8d2864 100644 --- a/tiledb/sm/filter/noop_filter.h +++ b/tiledb/sm/filter/noop_filter.h @@ -49,6 +49,9 @@ class NoopFilter : public Filter { */ NoopFilter(); + /** Dumps the filter details in ASCII format in the selected output. */ + void dump(FILE* out) const override; + /** * Run forward. */ diff --git a/tiledb/sm/filter/positive_delta_filter.cc b/tiledb/sm/filter/positive_delta_filter.cc index c86cd56c80f1..923145a03256 100644 --- a/tiledb/sm/filter/positive_delta_filter.cc +++ b/tiledb/sm/filter/positive_delta_filter.cc @@ -47,6 +47,12 @@ PositiveDeltaFilter::PositiveDeltaFilter() max_window_size_ = 1024; } +void PositiveDeltaFilter::dump(FILE* out) const { + if (out == nullptr) + out = stdout; + fprintf(out, "PositiveDelta: POSITIVE_DELTA_MAX_WINDOW=%u", max_window_size_); +} + Status PositiveDeltaFilter::run_forward( FilterBuffer* input_metadata, FilterBuffer* input, diff --git a/tiledb/sm/filter/positive_delta_filter.h b/tiledb/sm/filter/positive_delta_filter.h index 36c5fcaac7eb..6bc24020a441 100644 --- a/tiledb/sm/filter/positive_delta_filter.h +++ b/tiledb/sm/filter/positive_delta_filter.h @@ -77,6 +77,9 @@ class PositiveDeltaFilter : public Filter { /** Return the max window size used by the filter. */ uint32_t max_window_size() const; + /** Dumps the filter details in ASCII format in the selected output. */ + void dump(FILE* out) const override; + /** * Perform positive-delta encoding of the given input into the given output. */ diff --git a/tiledb/sm/query/reader.cc b/tiledb/sm/query/reader.cc index 938bcad7eed5..af48f2f2e7f8 100644 --- a/tiledb/sm/query/reader.cc +++ b/tiledb/sm/query/reader.cc @@ -1661,8 +1661,8 @@ Status Reader::unfilter_tile( const std::string& name, Tile* tile, bool offsets) const { // Get a copy of the appropriate unfilter pipeline. FilterPipeline filters = - (offsets ? *array_schema_->cell_var_offsets_filters() : - *array_schema_->filters(name)); + (offsets ? array_schema_->cell_var_offsets_filters() : + array_schema_->filters(name)); // Append an encryption unfilter when necessary. RETURN_NOT_OK(FilterPipeline::append_encryption_filter( diff --git a/tiledb/sm/query/writer.cc b/tiledb/sm/query/writer.cc index 8ac6eb67e8d4..ee8ba2d994dd 100644 --- a/tiledb/sm/query/writer.cc +++ b/tiledb/sm/query/writer.cc @@ -1065,8 +1065,8 @@ Status Writer::filter_tile( // Get a copy of the appropriate filter pipeline. FilterPipeline filters = - (offsets ? *array_schema_->cell_var_offsets_filters() : - *array_schema_->filters(name)); + (offsets ? array_schema_->cell_var_offsets_filters() : + array_schema_->filters(name)); // Append an encryption filter when necessary. RETURN_NOT_OK(FilterPipeline::append_encryption_filter( diff --git a/tiledb/sm/serialization/array_schema.cc b/tiledb/sm/serialization/array_schema.cc index 62346c4b0dfd..a82f149e590e 100644 --- a/tiledb/sm/serialization/array_schema.cc +++ b/tiledb/sm/serialization/array_schema.cc @@ -178,9 +178,9 @@ Status attribute_to_capnp( attribute_builder->setType(datatype_str(attribute->type())); attribute_builder->setCellValNum(attribute->cell_val_num()); - const auto* filters = attribute->filters(); + const auto& filters = attribute->filters(); auto filter_pipeline_builder = attribute_builder->initFilterPipeline(); - RETURN_NOT_OK(filter_pipeline_to_capnp(filters, &filter_pipeline_builder)); + RETURN_NOT_OK(filter_pipeline_to_capnp(&filters, &filter_pipeline_builder)); return Status::Ok(); } @@ -228,6 +228,11 @@ Status dimension_to_capnp( dimension->tile_extent().data())); } + // Set filters + const FilterPipeline& coords_filters = dimension->filters(); + capnp::FilterPipeline::Builder filters_builder = + dimension_builder->initFilterPipeline(); + RETURN_NOT_OK(filter_pipeline_to_capnp(&coords_filters, &filters_builder)); return Status::Ok(); } @@ -244,6 +249,13 @@ Status dimension_from_capnp( utils::copy_capnp_list(domain_reader, dim_type, &domain_buffer)); RETURN_NOT_OK((*dimension)->set_domain(domain_buffer.data())); + if (dimension_reader.hasFilterPipeline()) { + auto reader = dimension_reader.getFilterPipeline(); + std::unique_ptr filters; + RETURN_NOT_OK(filter_pipeline_from_capnp(reader, &filters)); + RETURN_NOT_OK((*dimension)->set_filter_pipeline(filters.get())); + } + if (!dimension_reader.getNullTileExtent()) { auto tile_extent_reader = dimension_reader.getTileExtent(); switch (dim_type) { @@ -375,19 +387,19 @@ Status array_schema_to_capnp( array_schema_builder->setAllowsDuplicates(array_schema->allows_dups()); // Set coordinate filters - const FilterPipeline* coords_filters = array_schema->coords_filters(); + const FilterPipeline& coords_filters = array_schema->coords_filters(); capnp::FilterPipeline::Builder coords_filters_builder = array_schema_builder->initCoordsFilterPipeline(); RETURN_NOT_OK( - filter_pipeline_to_capnp(coords_filters, &coords_filters_builder)); + filter_pipeline_to_capnp(&coords_filters, &coords_filters_builder)); // Set offset filters - const FilterPipeline* offsets_filters = + const FilterPipeline& offsets_filters = array_schema->cell_var_offsets_filters(); capnp::FilterPipeline::Builder offsets_filters_builder = array_schema_builder->initOffsetFilterPipeline(); RETURN_NOT_OK( - filter_pipeline_to_capnp(offsets_filters, &offsets_filters_builder)); + filter_pipeline_to_capnp(&offsets_filters, &offsets_filters_builder)); // Domain auto domain_builder = array_schema_builder->initDomain(); diff --git a/tiledb/sm/serialization/tiledb-rest.capnp.c++ b/tiledb/sm/serialization/tiledb-rest.capnp.c++ index 0a2262ae056e..9dbee2ad5a0e 100644 --- a/tiledb/sm/serialization/tiledb-rest.capnp.c++ +++ b/tiledb/sm/serialization/tiledb-rest.capnp.c++ @@ -691,17 +691,17 @@ const ::capnp::_::RawSchema s_d20a578112fa92a2 = { 0, 3, i_d20a578112fa92a2, nullptr, nullptr, { &s_d20a578112fa92a2, nullptr, nullptr, 0, 0, nullptr } }; #endif // !CAPNP_LITE -static const ::capnp::_::AlignedData<88> b_95e26a84d32d8223 = { +static const ::capnp::_::AlignedData<104> b_95e26a84d32d8223 = { { 0, 0, 0, 0, 5, 0, 6, 0, 35, 130, 45, 211, 132, 106, 226, 149, 18, 0, 0, 0, 1, 0, 2, 0, 127, 216, 135, 181, 36, 146, 125, 181, - 3, 0, 7, 0, 0, 0, 0, 0, + 4, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 0, 0, 226, 0, 0, 0, 33, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 29, 0, 0, 0, 31, 1, 0, 0, + 29, 0, 0, 0, 87, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 105, 108, 101, 100, 98, 45, 114, @@ -709,42 +709,49 @@ static const ::capnp::_::AlignedData<88> b_95e26a84d32d8223 = { 112, 58, 68, 105, 109, 101, 110, 115, 105, 111, 110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, - 20, 0, 0, 0, 3, 0, 4, 0, + 24, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 125, 0, 0, 0, 42, 0, 0, 0, + 153, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 120, 0, 0, 0, 3, 0, 1, 0, - 132, 0, 0, 0, 2, 0, 1, 0, + 148, 0, 0, 0, 3, 0, 1, 0, + 160, 0, 0, 0, 2, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 129, 0, 0, 0, 122, 0, 0, 0, + 157, 0, 0, 0, 122, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 128, 0, 0, 0, 3, 0, 1, 0, - 140, 0, 0, 0, 2, 0, 1, 0, + 156, 0, 0, 0, 3, 0, 1, 0, + 168, 0, 0, 0, 2, 0, 1, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 137, 0, 0, 0, 42, 0, 0, 0, + 165, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 132, 0, 0, 0, 3, 0, 1, 0, - 144, 0, 0, 0, 2, 0, 1, 0, + 160, 0, 0, 0, 3, 0, 1, 0, + 172, 0, 0, 0, 2, 0, 1, 0, 3, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 160, 159, 176, 109, 83, 82, 166, 162, - 141, 0, 0, 0, 90, 0, 0, 0, + 169, 0, 0, 0, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 121, 0, 0, 0, 58, 0, 0, 0, + 149, 0, 0, 0, 58, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 144, 0, 0, 0, 3, 0, 1, 0, + 156, 0, 0, 0, 2, 0, 1, 0, + 5, 0, 0, 0, 3, 0, 0, 0, + 0, 0, 1, 0, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 116, 0, 0, 0, 3, 0, 1, 0, - 128, 0, 0, 0, 2, 0, 1, 0, + 153, 0, 0, 0, 122, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 152, 0, 0, 0, 3, 0, 1, 0, + 164, 0, 0, 0, 2, 0, 1, 0, 110, 97, 109, 101, 0, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -777,6 +784,15 @@ static const ::capnp::_::AlignedData<88> b_95e26a84d32d8223 = { 236, 12, 65, 249, 230, 4, 89, 206, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 16, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 102, 105, 108, 116, 101, 114, 80, 105, + 112, 101, 108, 105, 110, 101, 0, 0, + 16, 0, 0, 0, 0, 0, 0, 0, + 245, 196, 234, 51, 247, 131, 69, 188, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } @@ -785,13 +801,14 @@ static const ::capnp::_::AlignedData<88> b_95e26a84d32d8223 = { #if !CAPNP_LITE static const ::capnp::_::RawSchema* const d_95e26a84d32d8223[] = { &s_a2a652536db09fa0, + &s_bc4583f733eac4f5, &s_ce5904e6f9410cec, }; -static const uint16_t m_95e26a84d32d8223[] = {4, 0, 1, 3, 2}; -static const uint16_t i_95e26a84d32d8223[] = {0, 1, 2, 3, 4}; +static const uint16_t m_95e26a84d32d8223[] = {4, 5, 0, 1, 3, 2}; +static const uint16_t i_95e26a84d32d8223[] = {0, 1, 2, 3, 4, 5}; const ::capnp::_::RawSchema s_95e26a84d32d8223 = { - 0x95e26a84d32d8223, b_95e26a84d32d8223.words, 88, d_95e26a84d32d8223, m_95e26a84d32d8223, - 2, 5, i_95e26a84d32d8223, nullptr, nullptr, { &s_95e26a84d32d8223, nullptr, nullptr, 0, 0, nullptr } + 0x95e26a84d32d8223, b_95e26a84d32d8223.words, 104, d_95e26a84d32d8223, m_95e26a84d32d8223, + 3, 6, i_95e26a84d32d8223, nullptr, nullptr, { &s_95e26a84d32d8223, nullptr, nullptr, 0, 0, nullptr } }; #endif // !CAPNP_LITE static const ::capnp::_::AlignedData<168> b_a2a652536db09fa0 = { @@ -799,7 +816,7 @@ static const ::capnp::_::AlignedData<168> b_a2a652536db09fa0 = { 160, 159, 176, 109, 83, 82, 166, 162, 28, 0, 0, 0, 1, 0, 2, 0, 35, 130, 45, 211, 132, 106, 226, 149, - 3, 0, 7, 0, 1, 0, 10, 0, + 4, 0, 7, 0, 1, 0, 10, 0, 1, 0, 0, 0, 0, 0, 0, 0, 21, 0, 0, 0, 58, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, diff --git a/tiledb/sm/serialization/tiledb-rest.capnp.h b/tiledb/sm/serialization/tiledb-rest.capnp.h index ae35f9954729..daf47fe17663 100644 --- a/tiledb/sm/serialization/tiledb-rest.capnp.h +++ b/tiledb/sm/serialization/tiledb-rest.capnp.h @@ -149,7 +149,7 @@ struct Dimension { struct TileExtent; struct _capnpPrivate { - CAPNP_DECLARE_STRUCT_HEADER(95e26a84d32d8223, 2, 3) + CAPNP_DECLARE_STRUCT_HEADER(95e26a84d32d8223, 2, 4) #if !CAPNP_LITE static constexpr ::capnp::_::RawBrandedSchema const* brand() { return &schema->defaultBrand; @@ -178,7 +178,7 @@ struct Dimension::TileExtent { }; struct _capnpPrivate { - CAPNP_DECLARE_STRUCT_HEADER(a2a652536db09fa0, 2, 3) + CAPNP_DECLARE_STRUCT_HEADER(a2a652536db09fa0, 2, 4) #if !CAPNP_LITE static constexpr ::capnp::_::RawBrandedSchema const* brand() { return &schema->defaultBrand; @@ -1448,6 +1448,10 @@ class Dimension::Reader { inline ::tiledb::sm::serialization::capnp::DomainArray::Reader getDomain() const; + inline bool hasFilterPipeline() const; + inline ::tiledb::sm::serialization::capnp::FilterPipeline::Reader + getFilterPipeline() const; + private: ::capnp::_::StructReader _reader; template @@ -1517,6 +1521,19 @@ class Dimension::Builder { inline ::capnp::Orphan<::tiledb::sm::serialization::capnp::DomainArray> disownDomain(); + inline bool hasFilterPipeline(); + inline ::tiledb::sm::serialization::capnp::FilterPipeline::Builder + getFilterPipeline(); + inline void setFilterPipeline( + ::tiledb::sm::serialization::capnp::FilterPipeline::Reader value); + inline ::tiledb::sm::serialization::capnp::FilterPipeline::Builder + initFilterPipeline(); + inline void adoptFilterPipeline( + ::capnp::Orphan<::tiledb::sm::serialization::capnp::FilterPipeline>&& + value); + inline ::capnp::Orphan<::tiledb::sm::serialization::capnp::FilterPipeline> + disownFilterPipeline(); + private: ::capnp::_::StructBuilder _builder; template @@ -1540,6 +1557,8 @@ class Dimension::Pipeline { inline typename TileExtent::Pipeline getTileExtent(); inline ::tiledb::sm::serialization::capnp::DomainArray::Pipeline getDomain(); + inline ::tiledb::sm::serialization::capnp::FilterPipeline::Pipeline + getFilterPipeline(); private: ::capnp::AnyPointer::Pipeline _typeless; @@ -6189,6 +6208,62 @@ Dimension::Builder::disownDomain() { _builder.getPointerField(::capnp::bounded<2>() * ::capnp::POINTERS)); } +inline bool Dimension::Reader::hasFilterPipeline() const { + return !_reader.getPointerField(::capnp::bounded<3>() * ::capnp::POINTERS) + .isNull(); +} +inline bool Dimension::Builder::hasFilterPipeline() { + return !_builder.getPointerField(::capnp::bounded<3>() * ::capnp::POINTERS) + .isNull(); +} +inline ::tiledb::sm::serialization::capnp::FilterPipeline::Reader +Dimension::Reader::getFilterPipeline() const { + return ::capnp::_:: + PointerHelpers<::tiledb::sm::serialization::capnp::FilterPipeline>::get( + _reader.getPointerField(::capnp::bounded<3>() * ::capnp::POINTERS)); +} +inline ::tiledb::sm::serialization::capnp::FilterPipeline::Builder +Dimension::Builder::getFilterPipeline() { + return ::capnp::_:: + PointerHelpers<::tiledb::sm::serialization::capnp::FilterPipeline>::get( + _builder.getPointerField(::capnp::bounded<3>() * ::capnp::POINTERS)); +} +#if !CAPNP_LITE +inline ::tiledb::sm::serialization::capnp::FilterPipeline::Pipeline +Dimension::Pipeline::getFilterPipeline() { + return ::tiledb::sm::serialization::capnp::FilterPipeline::Pipeline( + _typeless.getPointerField(3)); +} +#endif // !CAPNP_LITE +inline void Dimension::Builder::setFilterPipeline( + ::tiledb::sm::serialization::capnp::FilterPipeline::Reader value) { + ::capnp::_:: + PointerHelpers<::tiledb::sm::serialization::capnp::FilterPipeline>::set( + _builder.getPointerField(::capnp::bounded<3>() * ::capnp::POINTERS), + value); +} +inline ::tiledb::sm::serialization::capnp::FilterPipeline::Builder +Dimension::Builder::initFilterPipeline() { + return ::capnp::_:: + PointerHelpers<::tiledb::sm::serialization::capnp::FilterPipeline>::init( + _builder.getPointerField(::capnp::bounded<3>() * ::capnp::POINTERS)); +} +inline void Dimension::Builder::adoptFilterPipeline( + ::capnp::Orphan<::tiledb::sm::serialization::capnp::FilterPipeline>&& + value) { + ::capnp::_:: + PointerHelpers<::tiledb::sm::serialization::capnp::FilterPipeline>::adopt( + _builder.getPointerField(::capnp::bounded<3>() * ::capnp::POINTERS), + kj::mv(value)); +} +inline ::capnp::Orphan<::tiledb::sm::serialization::capnp::FilterPipeline> +Dimension::Builder::disownFilterPipeline() { + return ::capnp::_::PointerHelpers< + ::tiledb::sm::serialization::capnp::FilterPipeline>:: + disown( + _builder.getPointerField(::capnp::bounded<3>() * ::capnp::POINTERS)); +} + inline ::tiledb::sm::serialization::capnp::Dimension::TileExtent::Which Dimension::TileExtent::Reader::which() const { return _reader.getDataField(::capnp::bounded<1>() * ::capnp::ELEMENTS); diff --git a/tiledb/sm/storage_manager/storage_manager.cc b/tiledb/sm/storage_manager/storage_manager.cc index 3df13af2db11..83254bd70933 100644 --- a/tiledb/sm/storage_manager/storage_manager.cc +++ b/tiledb/sm/storage_manager/storage_manager.cc @@ -1325,7 +1325,6 @@ Status StorageManager::store_array_schema( ArraySchema* array_schema, const EncryptionKey& encryption_key) { auto& array_uri = array_schema->array_uri(); URI schema_uri = array_uri.join_path(constants::array_schema_filename); - ; // Serialize auto buff = new Buffer();