From 9ab9f5f1b171df48a5179c4645e5ff9592d3b958 Mon Sep 17 00:00:00 2001 From: Stavros Papadopoulos Date: Sat, 6 Jun 2020 16:17:09 -0400 Subject: [PATCH] Enable the user to set arbitrary fill values to attributes. The fill values are used when empty attribute values are retrieved during a read query. This can happen either (1) in the case of a dense array with "empty" regions, or (2) any array with missing attributes. Note that the latter (i.e., enabling writes with a subset of attributes) will be supported in a subsequent PR. --- HISTORY.md | 12 + doc/source/c-api.rst | 4 + format_spec/array_schema.md | 2 + test/CMakeLists.txt | 2 + test/src/unit-capi-array.cc | 8 +- test/src/unit-capi-array_schema.cc | 3 +- test/src/unit-capi-consolidation.cc | 2 +- test/src/unit-capi-error.cc | 2 +- test/src/unit-capi-fill_values.cc | 187 +++++++++++++ test/src/unit-cppapi-array.cc | 4 +- test/src/unit-cppapi-fill_values.cc | 374 +++++++++++++++++++++++++ tiledb/sm/array_schema/array_schema.cc | 2 +- tiledb/sm/array_schema/attribute.cc | 103 ++++++- tiledb/sm/array_schema/attribute.h | 32 ++- tiledb/sm/c_api/tiledb.cc | 28 ++ tiledb/sm/c_api/tiledb.h | 78 ++++++ tiledb/sm/cpp_api/attribute.h | 78 ++++++ tiledb/sm/misc/constants.cc | 2 +- tiledb/sm/misc/utils.cc | 79 ++++++ tiledb/sm/misc/utils.h | 3 + tiledb/sm/query/reader.cc | 39 +-- 21 files changed, 1014 insertions(+), 30 deletions(-) create mode 100644 test/src/unit-capi-fill_values.cc create mode 100644 test/src/unit-cppapi-fill_values.cc diff --git a/HISTORY.md b/HISTORY.md index ab2854356f7d..37b8f6312122 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,6 +6,8 @@ ## Breaking behavior +* Empty dense arrays now return cells with fill values. + ## New features ## Improvements @@ -14,8 +16,18 @@ ## Bug fixes +* Fixed bug in setting a fill value for var-sized attributes. + ## API additions +### C API + +* Added functions `tiledb_attribute_{set,get}_fill_value` to get/set default fill values + +### C++ API + +* Added functions `Attribute::{set,get}_fill_value` to get/set default fill values + # TileDB v2.0.2 Release Notes ## Bug fixes diff --git a/doc/source/c-api.rst b/doc/source/c-api.rst index ee373deae824..b67ad1075207 100644 --- a/doc/source/c-api.rst +++ b/doc/source/c-api.rst @@ -335,6 +335,10 @@ Attribute :project: TileDB-C .. doxygenfunction:: tiledb_attribute_dump :project: TileDB-C +.. doxygenfunction:: tiledb_attribute_set_fill_value + :project: TileDB-C +.. doxygenfunction:: tiledb_attribute_get_fill_value + :project: TileDB-C Domain ------ diff --git a/format_spec/array_schema.md b/format_spec/array_schema.md index 4449ba718087..0ff5d65782aa 100644 --- a/format_spec/array_schema.md +++ b/format_spec/array_schema.md @@ -67,4 +67,6 @@ The attribute has internal format: | Attribute datatype | `uint8_t` | Datatype of the attribute values | | Cell val num | `uint32_t` | Number of attribute values per cell. For variable-length attributes, this is `std::numeric_limits::max()` | | Filters | [Filter Pipeline](./filter_pipeline.md) | The filter pipeline used on attribute value tiles | +| Fill value size | `uint64_t` | The size in bytes of the fill value | +| Fill value | `uint8_t[]` | The fill value | diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index b80323a85124..ddb1f81a3631 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -61,6 +61,7 @@ set(TILEDB_TEST_SOURCES src/unit-capi-empty-var-length.cc src/unit-capi-enum_values.cc src/unit-capi-error.cc + src/unit-capi-fill_values.cc src/unit-capi-filter.cc src/unit-capi-incomplete.cc src/unit-capi-incomplete-2.cc @@ -119,6 +120,7 @@ if (TILEDB_CPP_API) src/unit-cppapi-consolidation.cc src/unit-cppapi-consolidation-sparse.cc src/unit-cppapi-datetimes.cc + src/unit-cppapi-fill_values.cc src/unit-cppapi-filter.cc src/unit-cppapi-metadata.cc src/unit-cppapi-query.cc diff --git a/test/src/unit-capi-array.cc b/test/src/unit-capi-array.cc index 689381017984..28998bd700a1 100644 --- a/test/src/unit-capi-array.cc +++ b/test/src/unit-capi-array.cc @@ -939,7 +939,7 @@ TEST_CASE_METHOD( tiledb_query_free(&query); // Check correctness - int buffer_read_c[] = {1, 2, 3, 4, 50, 60, 70, 8, 9, 10}; + int32_t buffer_read_c[] = {1, 2, 3, 4, 50, 60, 70, 8, 9, 10}; CHECK(!std::memcmp(buffer_read, buffer_read_c, sizeof(buffer_read_c))); CHECK(buffer_read_size == sizeof(buffer_read_c)); @@ -987,7 +987,8 @@ TEST_CASE_METHOD( tiledb_query_free(&query); // Check correctness - CHECK(buffer_read_size == 0); // Empty array + // Empty array still returns fill values + CHECK(buffer_read_size == 10 * sizeof(int32_t)); // ---- READ AT TIMESTAMP BEFORE UPDATE ---- buffer_read_size = sizeof(buffer_read); @@ -1241,7 +1242,8 @@ TEST_CASE_METHOD( tiledb_query_free(&query); // Check correctness - CHECK(buffer_read_size == 0); // Empty array + // Empty array still returns fill values + CHECK(buffer_read_size == 10 * sizeof(int32_t)); // ---- READ AT THE WRITTEN TIMESTAMP ---- buffer_read_size = sizeof(buffer_read); diff --git a/test/src/unit-capi-array_schema.cc b/test/src/unit-capi-array_schema.cc index dc8002eda401..3f1df1a86ffa 100644 --- a/test/src/unit-capi-array_schema.cc +++ b/test/src/unit-capi-array_schema.cc @@ -979,7 +979,8 @@ void ArraySchemaFx::load_and_check_array_schema(const std::string& path) { "- Type: " + ATTR_TYPE_STR + "\n" + "- Cell val num: " + CELL_VAL_NUM_STR + "\n" + "- Filters: 2\n" + " > BZIP2: COMPRESSION_LEVEL=5\n" + - " > BitWidthReduction: BIT_WIDTH_MAX_WINDOW=1000\n"; + " > BitWidthReduction: BIT_WIDTH_MAX_WINDOW=1000\n" + + "- Fill value: -2147483648\n"; FILE* gold_fout = fopen("gold_fout.txt", "w"); const char* dump = dump_str.c_str(); fwrite(dump, sizeof(char), strlen(dump), gold_fout); diff --git a/test/src/unit-capi-consolidation.cc b/test/src/unit-capi-consolidation.cc index f431914d9536..976e22bff50d 100644 --- a/test/src/unit-capi-consolidation.cc +++ b/test/src/unit-capi-consolidation.cc @@ -4103,7 +4103,7 @@ TEST_CASE_METHOD( CHECK(status == TILEDB_COMPLETED); // Check buffers - CHECK(a_size == 0); + CHECK(a_size == 410 * sizeof(int32_t)); // Close array rc = tiledb_array_close(ctx_, array); diff --git a/test/src/unit-capi-error.cc b/test/src/unit-capi-error.cc index 35fbb3eb6e77..4376bc8c32be 100644 --- a/test/src/unit-capi-error.cc +++ b/test/src/unit-capi-error.cc @@ -36,7 +36,7 @@ #include -TEST_CASE("C API: Test error and error message", "[capi], [error]") { +TEST_CASE("C API: Test error and error message", "[capi][error]") { tiledb_ctx_t* ctx; int rc = tiledb_ctx_alloc(nullptr, &ctx); CHECK(rc == TILEDB_OK); diff --git a/test/src/unit-capi-fill_values.cc b/test/src/unit-capi-fill_values.cc new file mode 100644 index 000000000000..137e0380ebf5 --- /dev/null +++ b/test/src/unit-capi-fill_values.cc @@ -0,0 +1,187 @@ +/** + * @file unit-capi-fill_values.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2017-2020 TileDB Inc. + * @copyright Copyright (c) 2016 MIT and Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * Tests the attribute fill values with the C API. + */ + +#include "catch.hpp" +#include "tiledb/sm/c_api/tiledb.h" + +#include + +void check_dump( + tiledb_ctx_t* ctx, tiledb_attribute_t* a, const std::string& gold_out) { + FILE* gold_fout = fopen("gold_fout.txt", "w"); + fwrite(gold_out.c_str(), sizeof(char), gold_out.size(), gold_fout); + fclose(gold_fout); + FILE* fout = fopen("fout.txt", "w"); + tiledb_attribute_dump(ctx, a, fout); + fclose(fout); +#ifdef _WIN32 + CHECK(!system("FC gold_fout.txt fout.txt > nul")); +#else + CHECK(!system("diff gold_fout.txt fout.txt")); +#endif + + // Clean up + tiledb_vfs_t* vfs; + tiledb_vfs_alloc(ctx, nullptr, &vfs); + CHECK(tiledb_vfs_remove_file(ctx, vfs, "gold_fout.txt") == TILEDB_OK); + CHECK(tiledb_vfs_remove_file(ctx, vfs, "fout.txt") == TILEDB_OK); + tiledb_vfs_free(&vfs); +} + +TEST_CASE( + "C API: Test fill values, basic errors", "[capi][fill-values][basic]") { + int32_t value = 5; + uint64_t value_size = sizeof(int32_t); + + tiledb_ctx_t* ctx; + int32_t rc = tiledb_ctx_alloc(nullptr, &ctx); + CHECK(rc == TILEDB_OK); + + // Fixed-sized + tiledb_attribute_t* a; + rc = tiledb_attribute_alloc(ctx, "a", TILEDB_INT32, &a); + CHECK(rc == TILEDB_OK); + + // Null value + rc = tiledb_attribute_set_fill_value(ctx, a, NULL, value_size); + CHECK(rc == TILEDB_ERR); + + // Zero size + rc = tiledb_attribute_set_fill_value(ctx, a, &value, 0); + CHECK(rc == TILEDB_ERR); + + // Wrong size + rc = tiledb_attribute_set_fill_value(ctx, a, &value, 100); + CHECK(rc == TILEDB_ERR); + + // Get default + const void* value_ptr; + rc = tiledb_attribute_get_fill_value(ctx, a, &value_ptr, &value_size); + CHECK(rc == TILEDB_OK); + CHECK(*(const int32_t*)value_ptr == -2147483648); + CHECK(value_size == sizeof(int32_t)); + + // Check dump + std::string dump = std::string("### Attribute ###\n") + "- Name: a\n" + + "- Type: INT32\n" + "- Cell val num: 1\n" + + "- Filters: 0\n" + "- Fill value: -2147483648\n"; + check_dump(ctx, a, dump); + + // Correct setter + rc = tiledb_attribute_set_fill_value(ctx, a, &value, value_size); + CHECK(rc == TILEDB_OK); + + // Get the set value + rc = tiledb_attribute_get_fill_value(ctx, a, &value_ptr, &value_size); + CHECK(rc == TILEDB_OK); + CHECK(*(const int32_t*)value_ptr == 5); + CHECK(value_size == sizeof(int32_t)); + + // Check dump + dump = std::string("### Attribute ###\n") + "- Name: a\n" + + "- Type: INT32\n" + "- Cell val num: 1\n" + "- Filters: 0\n" + + "- Fill value: 5\n"; + check_dump(ctx, a, dump); + + // Setting the cell val num, also sets the fill value to a new default + rc = tiledb_attribute_set_cell_val_num(ctx, a, 2); + CHECK(rc == TILEDB_OK); + rc = tiledb_attribute_get_fill_value(ctx, a, &value_ptr, &value_size); + CHECK(rc == TILEDB_OK); + CHECK(((const int32_t*)value_ptr)[0] == -2147483648); + CHECK(((const int32_t*)value_ptr)[1] == -2147483648); + CHECK(value_size == 2 * sizeof(int32_t)); + + // Check dump + dump = std::string("### Attribute ###\n") + "- Name: a\n" + + "- Type: INT32\n" + "- Cell val num: 2\n" + "- Filters: 0\n" + + "- Fill value: -2147483648, -2147483648\n"; + check_dump(ctx, a, dump); + + // Set a fill value that is comprised of two integers + int32_t value_2[2] = {1, 2}; + rc = tiledb_attribute_set_fill_value(ctx, a, value_2, sizeof(value_2)); + CHECK(rc == TILEDB_OK); + + // Get the new value back + rc = tiledb_attribute_get_fill_value(ctx, a, &value_ptr, &value_size); + CHECK(rc == TILEDB_OK); + CHECK(((const int32_t*)value_ptr)[0] == 1); + CHECK(((const int32_t*)value_ptr)[1] == 2); + CHECK(value_size == 2 * sizeof(int32_t)); + + // Check dump + dump = std::string("### Attribute ###\n") + "- Name: a\n" + + "- Type: INT32\n" + "- Cell val num: 2\n" + "- Filters: 0\n" + + "- Fill value: 1, 2\n"; + check_dump(ctx, a, dump); + + // Make the attribute var-sized + rc = tiledb_attribute_set_cell_val_num(ctx, a, TILEDB_VAR_NUM); + CHECK(rc == TILEDB_OK); + + // Check dump + dump = std::string("### Attribute ###\n") + "- Name: a\n" + + "- Type: INT32\n" + "- Cell val num: var\n" + "- Filters: 0\n" + + "- Fill value: -2147483648\n"; + check_dump(ctx, a, dump); + + // Get the default var-sized fill value + rc = tiledb_attribute_get_fill_value(ctx, a, &value_ptr, &value_size); + CHECK(rc == TILEDB_OK); + CHECK(*(const int32_t*)value_ptr == -2147483648); + CHECK(value_size == sizeof(int32_t)); + + // Set a new fill value for the var-sized attribute + int32_t value_3[3] = {1, 2, 3}; + rc = tiledb_attribute_set_fill_value(ctx, a, value_3, sizeof(value_3)); + CHECK(rc == TILEDB_OK); + + // Get the new fill value + rc = tiledb_attribute_get_fill_value(ctx, a, &value_ptr, &value_size); + CHECK(rc == TILEDB_OK); + CHECK(((const int32_t*)value_ptr)[0] == 1); + CHECK(((const int32_t*)value_ptr)[1] == 2); + CHECK(((const int32_t*)value_ptr)[2] == 3); + CHECK(value_size == 3 * sizeof(int32_t)); + + // Check dump + dump = std::string("### Attribute ###\n") + "- Name: a\n" + + "- Type: INT32\n" + "- Cell val num: var\n" + "- Filters: 0\n" + + "- Fill value: 1, 2, 3\n"; + check_dump(ctx, a, dump); + + // Clean up + tiledb_ctx_free(&ctx); + tiledb_attribute_free(&a); +} diff --git a/test/src/unit-cppapi-array.cc b/test/src/unit-cppapi-array.cc index d33b770d3c5e..f53cd45e7b37 100644 --- a/test/src/unit-cppapi-array.cc +++ b/test/src/unit-cppapi-array.cc @@ -783,7 +783,7 @@ TEST_CASE("C++ API: Open array at", "[cppapi][open-array-at]") { query_r_at_0.submit(); array_r_at_0.close(); auto result = query_r_at_0.result_buffer_elements(); - CHECK(result["a"].second == 0); + CHECK(result["a"].second == 4); // Empty arrays return fill values CHECK(!std::equal(a_r_at_0.begin(), a_r_at_0.end(), a_w.begin())); // Read from later timestamp @@ -885,7 +885,7 @@ TEST_CASE( query_r_at_0.submit(); array_r_at_0.close(); auto result = query_r_at_0.result_buffer_elements(); - CHECK(result["a"].second == 0); + CHECK(result["a"].second == 4); // Empty arrays return fill values CHECK(!std::equal(a_r_at_0.begin(), a_r_at_0.end(), a_w.begin())); // Read from later timestamp diff --git a/test/src/unit-cppapi-fill_values.cc b/test/src/unit-cppapi-fill_values.cc new file mode 100644 index 000000000000..530be2d4acfd --- /dev/null +++ b/test/src/unit-cppapi-fill_values.cc @@ -0,0 +1,374 @@ +/** + * @file unit-cppapi-fill_values.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2017-2020 TileDB Inc. + * @copyright Copyright (c) 2016 MIT and Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * Tests the attribute fill values with the C++ API. + */ + +#include "catch.hpp" +#include "tiledb/sm/cpp_api/tiledb" +#include "tiledb/sm/misc/constants.h" + +#include + +using namespace tiledb; + +void check_dump(const Attribute& attr, const std::string& gold_out) { + FILE* gold_fout = fopen("gold_fout.txt", "w"); + fwrite(gold_out.c_str(), sizeof(char), gold_out.size(), gold_fout); + fclose(gold_fout); + FILE* fout = fopen("fout.txt", "w"); + attr.dump(fout); + fclose(fout); +#ifdef _WIN32 + CHECK(!system("FC gold_fout.txt fout.txt > nul")); +#else + CHECK(!system("diff gold_fout.txt fout.txt")); +#endif + + // Clean up + Context ctx; + VFS vfs(ctx); + CHECK_NOTHROW(vfs.remove_file("gold_fout.txt")); + CHECK_NOTHROW(vfs.remove_file("fout.txt")); +} + +void create_array_1d( + const std::string& array_name, + int32_t fill_int32 = tiledb::sm::constants::empty_int32, + std::string fill_char = std::string() + tiledb::sm::constants::empty_char, + std::array fill_double = { + tiledb::sm::constants::empty_float64, + tiledb::sm::constants::empty_float64}) { + Context ctx; + VFS vfs(ctx); + + Domain domain(ctx); + auto d = Dimension::create(ctx, "d", {{1, 10}}, 10); + domain.add_dimension(d); + + auto a1 = Attribute::create(ctx, "a1"); + a1.set_fill_value(&fill_int32, sizeof(fill_int32)); + auto a2 = Attribute::create(ctx, "a2"); + a2.set_fill_value(fill_char.c_str(), fill_char.size()); + auto a3 = Attribute::create(ctx, "a3"); + a3.set_cell_val_num(2); + a3.set_fill_value(fill_double.data(), 2 * sizeof(double)); + + ArraySchema schema(ctx, TILEDB_DENSE); + schema.set_domain(domain); + schema.add_attributes(a1, a2, a3); + + CHECK_NOTHROW(Array::create(array_name, schema)); +} + +void write_array_1d_partial(const std::string& array_name) { + Context ctx; + + std::vector a1 = {3, 4}; + std::vector a2_val = {'3', '3', '4', '4', '4'}; + std::vector a2_off = {0, 2}; + std::vector a3 = {3.1, 3.2, 4.1, 4.2}; + + Array array(ctx, array_name, TILEDB_WRITE); + Query query(ctx, array, TILEDB_WRITE); + CHECK_NOTHROW(query.set_buffer("a1", a1)); + CHECK_NOTHROW(query.set_buffer("a2", a2_off, a2_val)); + CHECK_NOTHROW(query.set_buffer("a3", a3)); + CHECK_NOTHROW(query.set_subarray({3, 4})); + CHECK_NOTHROW(query.set_layout(TILEDB_ROW_MAJOR)); + REQUIRE(query.submit() == Query::Status::COMPLETE); + array.close(); +} + +void read_array_1d_partial( + const std::string& array_name, + int32_t fill_int32 = tiledb::sm::constants::empty_int32, + std::string fill_char = std::string() + tiledb::sm::constants::empty_char, + std::array fill_double = { + tiledb::sm::constants::empty_float64, + tiledb::sm::constants::empty_float64}) { + Context ctx; + + std::vector a1(10); + std::vector a2_val(100); + std::vector a2_off(20); + std::vector a3(20); + + Array array(ctx, array_name, TILEDB_READ); + Query query(ctx, array, TILEDB_READ); + CHECK_NOTHROW(query.set_buffer("a1", a1)); + CHECK_NOTHROW(query.set_buffer("a2", a2_off, a2_val)); + CHECK_NOTHROW(query.set_buffer("a3", a3)); + CHECK_NOTHROW(query.set_subarray({1, 10})); + + REQUIRE(query.submit() == Query::Status::COMPLETE); + + auto res = query.result_buffer_elements(); + REQUIRE(res["a1"].second == 10); + REQUIRE(res["a2"].first == 10); + REQUIRE(res["a2"].second == 5 + 8 * fill_char.size()); + REQUIRE(res["a3"].second == 20); + + uint64_t off = 0; + for (size_t i = 0; i < 2; ++i) { + CHECK(a1[i] == fill_int32); + CHECK(a2_off[i] == off); + for (size_t c = 0; c < fill_char.size(); ++c) { + CHECK(a2_val[off] == fill_char[c]); + ++off; + } + CHECK(!std::memcmp(&a3[2 * i], &fill_double[0], sizeof(double))); + CHECK(!std::memcmp(&a3[2 * i + 1], &fill_double[1], sizeof(double))); + } + CHECK(a1[2] == 3); + CHECK(a1[3] == 4); + CHECK(a2_off[2] == off); + CHECK(a2_val[off] == '3'); + CHECK(a2_val[off + 1] == '3'); + off += 2; + CHECK(a2_off[3] == off); + CHECK(a2_val[off] == '4'); + CHECK(a2_val[off + 1] == '4'); + CHECK(a2_val[off + 2] == '4'); + off += 3; + CHECK(a3[4] == 3.1); + CHECK(a3[5] == 3.2); + CHECK(a3[6] == 4.1); + CHECK(a3[7] == 4.2); + for (size_t i = 4; i < 10; ++i) { + CHECK(a1[i] == fill_int32); + CHECK(a2_off[i] == off); + for (size_t c = 0; c < fill_char.size(); ++c) { + CHECK(a2_val[off] == fill_char[c]); + ++off; + } + CHECK(!std::memcmp(&a3[2 * i], &fill_double[0], sizeof(double))); + CHECK(!std::memcmp(&a3[2 * i + 1], &fill_double[1], sizeof(double))); + } + + array.close(); +} + +void read_array_1d_empty( + const std::string& array_name, + int32_t fill_int32 = tiledb::sm::constants::empty_int32, + std::string fill_char = std::string() + tiledb::sm::constants::empty_char, + std::array fill_double = { + tiledb::sm::constants::empty_float64, + tiledb::sm::constants::empty_float64}) { + Context ctx; + + std::vector a1(10); + std::vector a2_val(100); + std::vector a2_off(20); + std::vector a3(20); + + Array array(ctx, array_name, TILEDB_READ); + Query query(ctx, array, TILEDB_READ); + CHECK_NOTHROW(query.set_buffer("a1", a1)); + CHECK_NOTHROW(query.set_buffer("a2", a2_off, a2_val)); + CHECK_NOTHROW(query.set_buffer("a3", a3)); + CHECK_NOTHROW(query.set_subarray({1, 10})); + + REQUIRE(query.submit() == Query::Status::COMPLETE); + + auto res = query.result_buffer_elements(); + REQUIRE(res["a1"].second == 10); + REQUIRE(res["a2"].first == 10); + REQUIRE(res["a2"].second == 10 * fill_char.size()); + REQUIRE(res["a3"].second == 20); + + uint64_t off = 0; + for (size_t i = 0; i < 10; ++i) { + CHECK(a1[i] == fill_int32); + CHECK(a2_off[i] == off); + for (size_t c = 0; c < fill_char.size(); ++c) { + CHECK(a2_val[off] == fill_char[c]); + ++off; + } + CHECK(!std::memcmp(&a3[2 * i], &fill_double[0], sizeof(double))); + CHECK(!std::memcmp(&a3[2 * i + 1], &fill_double[1], sizeof(double))); + } + + array.close(); +} + +TEST_CASE( + "C++ API: Test fill values, basic errors", "[cppapi][fill-values][basic]") { + int32_t value = 5; + uint64_t value_size = sizeof(int32_t); + + Context ctx; + + // Fixed-sized + auto a = tiledb::Attribute::create(ctx, "a"); + + // Null value + CHECK_THROWS(a.set_fill_value(nullptr, value_size)); + + // Zero size + CHECK_THROWS(a.set_fill_value(&value, 0)); + + // Wrong size + CHECK_THROWS(a.set_fill_value(&value, 100)); + + // Get default + const void* value_ptr; + CHECK_NOTHROW(a.get_fill_value(&value_ptr, &value_size)); + CHECK(*(const int32_t*)value_ptr == -2147483648); + CHECK(value_size == sizeof(int32_t)); + + // Check dump + std::string dump = std::string("### Attribute ###\n") + "- Name: a\n" + + "- Type: INT32\n" + "- Cell val num: 1\n" + + "- Filters: 0\n" + "- Fill value: -2147483648\n"; + check_dump(a, dump); + + // Correct setter + CHECK_NOTHROW(a.set_fill_value(&value, value_size)); + + // Get the set value + CHECK_NOTHROW(a.get_fill_value(&value_ptr, &value_size)); + CHECK(*(const int32_t*)value_ptr == 5); + CHECK(value_size == sizeof(int32_t)); + + // Check dump + dump = std::string("### Attribute ###\n") + "- Name: a\n" + + "- Type: INT32\n" + "- Cell val num: 1\n" + "- Filters: 0\n" + + "- Fill value: 5\n"; + check_dump(a, dump); + + // Setting the cell val num, also sets the fill value to a new default + CHECK_NOTHROW(a.set_cell_val_num(2)); + CHECK_NOTHROW(a.get_fill_value(&value_ptr, &value_size)); + CHECK(((const int32_t*)value_ptr)[0] == -2147483648); + CHECK(((const int32_t*)value_ptr)[1] == -2147483648); + CHECK(value_size == 2 * sizeof(int32_t)); + + // Check dump + dump = std::string("### Attribute ###\n") + "- Name: a\n" + + "- Type: INT32\n" + "- Cell val num: 2\n" + "- Filters: 0\n" + + "- Fill value: -2147483648, -2147483648\n"; + check_dump(a, dump); + + // Set a fill value that is comprised of two integers + int32_t value_2[2] = {1, 2}; + CHECK_NOTHROW(a.set_fill_value(value_2, sizeof(value_2))); + + // Get the new value back + CHECK_NOTHROW(a.get_fill_value(&value_ptr, &value_size)); + CHECK(((const int32_t*)value_ptr)[0] == 1); + CHECK(((const int32_t*)value_ptr)[1] == 2); + CHECK(value_size == 2 * sizeof(int32_t)); + + // Check dump + dump = std::string("### Attribute ###\n") + "- Name: a\n" + + "- Type: INT32\n" + "- Cell val num: 2\n" + "- Filters: 0\n" + + "- Fill value: 1, 2\n"; + check_dump(a, dump); + + // Make the attribute var-sized + CHECK_NOTHROW(a.set_cell_val_num(TILEDB_VAR_NUM)); + + // Check dump + dump = std::string("### Attribute ###\n") + "- Name: a\n" + + "- Type: INT32\n" + "- Cell val num: var\n" + "- Filters: 0\n" + + "- Fill value: -2147483648\n"; + check_dump(a, dump); + + // Get the default var-sized fill value + CHECK_NOTHROW(a.get_fill_value(&value_ptr, &value_size)); + CHECK(*(const int32_t*)value_ptr == -2147483648); + CHECK(value_size == sizeof(int32_t)); + + // Set a new fill value for the var-sized attribute + int32_t value_3[3] = {1, 2, 3}; + CHECK_NOTHROW(a.set_fill_value(value_3, sizeof(value_3))); + + // Get the new fill value + CHECK_NOTHROW(a.get_fill_value(&value_ptr, &value_size)); + CHECK(((const int32_t*)value_ptr)[0] == 1); + CHECK(((const int32_t*)value_ptr)[1] == 2); + CHECK(((const int32_t*)value_ptr)[2] == 3); + CHECK(value_size == 3 * sizeof(int32_t)); + + // Check dump + dump = std::string("### Attribute ###\n") + "- Name: a\n" + + "- Type: INT32\n" + "- Cell val num: var\n" + "- Filters: 0\n" + + "- Fill value: 1, 2, 3\n"; + check_dump(a, dump); +} + +TEST_CASE( + "C++ API: Test fill values, partial array", + "[cppapi][fill-values][partial]") { + Context ctx; + VFS vfs(ctx); + std::string array_name = "fill_values_partial"; + + // First test with default fill values + if (vfs.is_dir(array_name)) + CHECK_NOTHROW(vfs.remove_dir(array_name)); + + create_array_1d(array_name); + write_array_1d_partial(array_name); + read_array_1d_partial(array_name); + + CHECK_NOTHROW(vfs.remove_dir(array_name)); + + std::string s("abc"); + create_array_1d(array_name, 0, s, {1.0, 2.0}); + write_array_1d_partial(array_name); + read_array_1d_partial(array_name, 0, s, {1.0, 2.0}); + + CHECK_NOTHROW(vfs.remove_dir(array_name)); +} + +TEST_CASE( + "C++ API: Test fill values, empty array", "[cppapi][fill-values][empty]") { + Context ctx; + VFS vfs(ctx); + std::string array_name = "fill_values_empty"; + + // First test with default fill values + if (vfs.is_dir(array_name)) + CHECK_NOTHROW(vfs.remove_dir(array_name)); + + create_array_1d(array_name); + read_array_1d_empty(array_name); + + CHECK_NOTHROW(vfs.remove_dir(array_name)); + + std::string s("abc"); + create_array_1d(array_name, 0, s, {1.0, 2.0}); + read_array_1d_empty(array_name, 0, s, {1.0, 2.0}); + + CHECK_NOTHROW(vfs.remove_dir(array_name)); +} \ No newline at end of file diff --git a/tiledb/sm/array_schema/array_schema.cc b/tiledb/sm/array_schema/array_schema.cc index 6c63de9750fb..be0a05478331 100644 --- a/tiledb/sm/array_schema/array_schema.cc +++ b/tiledb/sm/array_schema/array_schema.cc @@ -479,7 +479,7 @@ Status ArraySchema::deserialize(ConstBuffer* buff) { RETURN_NOT_OK(buff->read(&attribute_num, sizeof(uint32_t))); for (uint32_t i = 0; i < attribute_num; ++i) { auto attr = new Attribute(); - RETURN_NOT_OK_ELSE(attr->deserialize(buff), delete attr); + RETURN_NOT_OK_ELSE(attr->deserialize(buff, version_), delete attr); attributes_.emplace_back(attr); attribute_map_[attr->name()] = attr; } diff --git a/tiledb/sm/array_schema/attribute.cc b/tiledb/sm/array_schema/attribute.cc index da44376fde2d..2890da23e0f7 100644 --- a/tiledb/sm/array_schema/attribute.cc +++ b/tiledb/sm/array_schema/attribute.cc @@ -40,6 +40,8 @@ #include "tiledb/sm/misc/utils.h" #include +#include +#include namespace tiledb { namespace sm { @@ -56,6 +58,7 @@ Attribute::Attribute(const std::string& name, Datatype type) { name_ = name; type_ = type; cell_val_num_ = (type == Datatype::ANY) ? constants::var_num : 1; + set_default_fill_value(); } Attribute::Attribute(const Attribute* attr) { @@ -64,6 +67,7 @@ Attribute::Attribute(const Attribute* attr) { type_ = attr->type(); cell_val_num_ = attr->cell_val_num(); filters_ = attr->filters_; + fill_value_ = attr->fill_value_; } Attribute::~Attribute() = default; @@ -83,7 +87,7 @@ unsigned int Attribute::cell_val_num() const { return cell_val_num_; } -Status Attribute::deserialize(ConstBuffer* buff) { +Status Attribute::deserialize(ConstBuffer* buff, uint32_t version) { // Load attribute name uint32_t attribute_name_size; RETURN_NOT_OK(buff->read(&attribute_name_size, sizeof(uint32_t))); @@ -101,6 +105,18 @@ Status Attribute::deserialize(ConstBuffer* buff) { // Load filter pipeline RETURN_NOT_OK(filters_.deserialize(buff)); + // Load fill value + if (version >= 6) { + uint64_t fill_value_size = 0; + RETURN_NOT_OK(buff->read(&fill_value_size, sizeof(uint64_t))); + assert(fill_value_size > 0); + fill_value_.resize(fill_value_size); + fill_value_.shrink_to_fit(); + RETURN_NOT_OK(buff->read(&fill_value_[0], fill_value_size)); + } else { + set_default_fill_value(); + } + return Status::Ok(); } @@ -118,6 +134,8 @@ void Attribute::dump(FILE* out) const { fprintf(out, "- Filters: %u", (unsigned)filters_.size()); filters_.dump(out); fprintf(out, "\n"); + fprintf(out, "- Fill value: %s", fill_value_str().c_str()); + fprintf(out, "\n"); } const FilterPipeline& Attribute::filters() const { @@ -150,6 +168,12 @@ Status Attribute::serialize(Buffer* buff) { // Write filter pipeline RETURN_NOT_OK(filters_.serialize(buff)); + // Write fill value + auto fill_value_size = (uint64_t)fill_value_.size(); + assert(fill_value_size != 0); + RETURN_NOT_OK(buff->write(&fill_value_size, sizeof(uint64_t))); + RETURN_NOT_OK(buff->write(&fill_value_[0], fill_value_.size())); + return Status::Ok(); } @@ -160,6 +184,7 @@ Status Attribute::set_cell_val_num(unsigned int cell_val_num) { "always variable-sized")); cell_val_num_ = cell_val_num; + set_default_fill_value(); return Status::Ok(); } @@ -186,6 +211,47 @@ void Attribute::set_name(const std::string& name) { name_ = name; } +Status Attribute::set_fill_value(const void* value, uint64_t size) { + if (value == nullptr) { + return LOG_STATUS(Status::AttributeError( + "Cannot set fill value; Input value cannot be null")); + } + if (size == 0) { + return LOG_STATUS(Status::AttributeError( + "Cannot set fill value; Input size cannot be 0")); + } + if (!var_size() && size != cell_size()) { + return LOG_STATUS(Status::AttributeError( + "Cannot set fill value; Input size is not the same as cell size")); + } + + fill_value_.resize(size); + fill_value_.shrink_to_fit(); + std::memcpy(&fill_value_[0], value, size); + + return Status::Ok(); +} + +Status Attribute::get_fill_value(const void** value, uint64_t* size) const { + if (value == nullptr) { + return LOG_STATUS(Status::AttributeError( + "Cannot get fill value; Input value cannot be null")); + } + if (size == nullptr) { + return LOG_STATUS(Status::AttributeError( + "Cannot get fill value; Input size cannot be null")); + } + + *value = fill_value_.data(); + *size = (uint64_t)fill_value_.size(); + + return Status::Ok(); +} + +const ByteVecValue& Attribute::fill_value() const { + return fill_value_; +} + Datatype Attribute::type() const { return type_; } @@ -194,5 +260,40 @@ bool Attribute::var_size() const { return cell_val_num_ == constants::var_num; } +/* ********************************* */ +/* PRIVATE METHODS */ +/* ********************************* */ + +void Attribute::set_default_fill_value() { + auto fill_value = constants::fill_value(type_); + auto fill_size = datatype_size(type_); + auto cell_num = (var_size()) ? 1 : cell_val_num_; + + fill_value_.resize(cell_num * fill_size); + fill_value_.shrink_to_fit(); + uint64_t offset = 0; + auto buff = (unsigned char*)&fill_value_[0]; + for (uint64_t i = 0; i < cell_num; ++i) { + std::memcpy(buff + offset, fill_value, fill_size); + offset += fill_size; + } +} + +std::string Attribute::fill_value_str() const { + std::string ret; + + auto v_size = datatype_size(type_); + uint64_t num = fill_value_.size() / v_size; + auto v = fill_value_.data(); + for (uint64_t i = 0; i < num; ++i) { + ret += utils::parse::to_str(v, type_); + v += v_size; + if (i != num - 1) + ret += ", "; + } + + return ret; +} + } // namespace sm } // namespace tiledb diff --git a/tiledb/sm/array_schema/attribute.h b/tiledb/sm/array_schema/attribute.h index bb45552c3c64..9dc9a03d8584 100644 --- a/tiledb/sm/array_schema/attribute.h +++ b/tiledb/sm/array_schema/attribute.h @@ -35,6 +35,7 @@ #include "tiledb/sm/filter/filter_pipeline.h" #include "tiledb/sm/misc/status.h" +#include "tiledb/sm/misc/types.h" namespace tiledb { namespace sm { @@ -93,9 +94,10 @@ class Attribute { * Populates the object members from the data in the input binary buffer. * * @param buff The buffer to deserialize from. + * @param version The format spec version. * @return Status */ - Status deserialize(ConstBuffer* buff); + Status deserialize(ConstBuffer* buff, uint32_t version); /** Dumps the attribute contents in ASCII form in the selected output. */ void dump(FILE* out) const; @@ -129,6 +131,21 @@ class Attribute { /** Sets the attribute name. */ void set_name(const std::string& name); + /** + * Sets the fill value for the attribute. Applicable to + * both fixed-sized and var-sized attributes. + */ + Status set_fill_value(const void* value, uint64_t size); + + /** + * Gets the fill value for the attribute. Applicable to + * fixed-sized and var-sized attributes. + */ + Status get_fill_value(const void** value, uint64_t* size) const; + + /** Returns the fill value. */ + const ByteVecValue& fill_value() const; + /** Returns the attribute type. */ Datatype type() const; @@ -154,6 +171,19 @@ class Attribute { /** The attribute type. */ Datatype type_; + + /** The fill value. */ + ByteVecValue fill_value_; + + /* ********************************* */ + /* PRIVATE ATTRIBUTES */ + /* ********************************* */ + + /** Sets the default fill value. */ + void set_default_fill_value(); + + /** Returns the fill value in string form. */ + std::string fill_value_str() const; }; } // namespace sm diff --git a/tiledb/sm/c_api/tiledb.cc b/tiledb/sm/c_api/tiledb.cc index 6f22d6b140f2..16fe29b056bd 100644 --- a/tiledb/sm/c_api/tiledb.cc +++ b/tiledb/sm/c_api/tiledb.cc @@ -1573,6 +1573,34 @@ int32_t tiledb_attribute_dump( return TILEDB_OK; } +int32_t tiledb_attribute_set_fill_value( + tiledb_ctx_t* ctx, + tiledb_attribute_t* attr, + const void* value, + uint64_t size) { + if (sanity_check(ctx) == TILEDB_ERR || sanity_check(ctx, attr) == TILEDB_ERR) + return TILEDB_ERR; + + if (SAVE_ERROR_CATCH(ctx, attr->attr_->set_fill_value(value, size))) + return TILEDB_ERR; + + return TILEDB_OK; +} + +int32_t tiledb_attribute_get_fill_value( + tiledb_ctx_t* ctx, + tiledb_attribute_t* attr, + const void** value, + uint64_t* size) { + if (sanity_check(ctx) == TILEDB_ERR || sanity_check(ctx, attr) == TILEDB_ERR) + return TILEDB_ERR; + + if (SAVE_ERROR_CATCH(ctx, attr->attr_->get_fill_value(value, size))) + return TILEDB_ERR; + + return TILEDB_OK; +} + /* ********************************* */ /* DOMAIN */ /* ********************************* */ diff --git a/tiledb/sm/c_api/tiledb.h b/tiledb/sm/c_api/tiledb.h index 0a91f42faafe..77072ae1e60d 100644 --- a/tiledb/sm/c_api/tiledb.h +++ b/tiledb/sm/c_api/tiledb.h @@ -2028,6 +2028,84 @@ TILEDB_EXPORT int32_t tiledb_attribute_get_cell_size( TILEDB_EXPORT int32_t tiledb_attribute_dump( tiledb_ctx_t* ctx, const tiledb_attribute_t* attr, FILE* out); +/** + * Sets the default fill value for the input attribute. This value will + * be used for the input attribute whenever querying (1) an empty cell in + * a dense array, or (2) a non-empty cell (in either dense or sparse array) + * when values on the input attribute are missing (e.g., if the user writes + * a subset of the attributes in a write operation). + * + * Applicable to var-sized attributes. + * + * **Example:** + * + * @code{.c} + * // Assumming a int32 attribute + * int32_t value = 0; + * uint64_t size = sizeof(value); + * tiledb_attribute_set_fill_value(ctx, attr, &value, size); + * + * // Assumming a var char attribute + * const char* value = "null"; + * uint64_t size = strlen(value); + * tiledb_attribute_set_fill_value(ctx, attr, value, size); + * @endcode + * + * @param ctx The TileDB context. + * @param attr The target attribute. + * @param value The fill value to set. + * @param size The fill value size in bytes. + * @return `TILEDB_OK` for success and `TILEDB_ERR` for error. + * + * @note A call to `tiledb_attribute_cell_val_num` sets the fill value + * of the attribute to its default. Therefore, make sure you invoke + * `tiledb_attribute_set_fill_value` after deciding on the number + * of values this attribute will hold in each cell. + * + * @note For fixed-sized attributes, the input `size` should be equal + * to the cell size. + */ +TILEDB_EXPORT int32_t tiledb_attribute_set_fill_value( + tiledb_ctx_t* ctx, + tiledb_attribute_t* attr, + const void* value, + uint64_t size); + +/** + * Gets the default fill value for the input attribute. This value will + * be used for the input attribute whenever querying (1) an empty cell in + * a dense array, or (2) a non-empty cell (in either dense or sparse array) + * when values on the input attribute are missing (e.g., if the user writes + * a subset of the attributes in a write operation). + * + * Applicable to both fixed-sized and var-sized attributes. + * + * **Example:** + * + * @code{.c} + * // Assuming a int32 attribute + * const int32_t* value; + * uint64_t size; + * tiledb_attribute_get_fill_value(ctx, attr, &value, &size); + * + * // Assuming a var char attribute + * const char* value; + * uint64_t size; + * tiledb_attribute_get_fill_value(ctx, attr, &value, &size); + * @endcode + * + * @param ctx The TileDB context. + * @param attr The target attribute. + * @param value A pointer to the fill value to get. + * @param size The size of the fill value to get. + * @return `TILEDB_OK` for success and `TILEDB_ERR` for error. + */ +TILEDB_EXPORT int32_t tiledb_attribute_get_fill_value( + tiledb_ctx_t* ctx, + tiledb_attribute_t* attr, + const void** value, + uint64_t* size); + /* ********************************* */ /* DOMAIN */ /* ********************************* */ diff --git a/tiledb/sm/cpp_api/attribute.h b/tiledb/sm/cpp_api/attribute.h index 4f5b1dcbc17d..c075a53bb411 100644 --- a/tiledb/sm/cpp_api/attribute.h +++ b/tiledb/sm/cpp_api/attribute.h @@ -225,6 +225,84 @@ class Attribute { return *this; } + /** + * Sets the default fill value for the input attribute. This value will + * be used for the input attribute whenever querying (1) an empty cell in + * a dense array, or (2) a non-empty cell (in either dense or sparse array) + * when values on the input attribute are missing (e.g., if the user writes + * a subset of the attributes in a write operation). + * + * Applicable to var-sized attributes. + * + * **Example:** + * + * @code{.c} + * tiledb::Context ctx; + * + * // Fixed-sized attribute + * auto a1 = tiledb::Attribute::create(ctx, "a1"); + * int32_t value = 0; + * uint64_t size = sizeof(value); + * a1.set_fill_value(&value, size); + * + * // Var-sized attribute + * auto a2 = tiledb::Attribute::create(ctx, "a2"); + * std::string value("null"); + * a2.set_fill_value(value.c_str(), value.size()); + * @endcode + * + * @param value The fill value to set. + * @param size The fill value size in bytes. + * + * @note A call to `cell_val_num` sets the fill value + * of the attribute to its default. Therefore, make sure you invoke + * `set_fill_value` after deciding on the number + * of values this attribute will hold in each cell. + * + * @note For fixed-sized attributes, the input `size` should be equal + * to the cell size. + */ + Attribute& set_fill_value(const void* value, uint64_t size) { + auto& ctx = ctx_.get(); + ctx.handle_error(tiledb_attribute_set_fill_value( + ctx.ptr().get(), attr_.get(), value, size)); + return *this; + } + + /** + * Gets the default fill value for the input attribute. This value will + * be used for the input attribute whenever querying (1) an empty cell in + * a dense array, or (2) a non-empty cell (in either dense or sparse array) + * when values on the input attribute are missing (e.g., if the user writes + * a subset of the attributes in a write operation). + * + * Applicable to both fixed-sized and var-sized attributes. + * + * **Example:** + * + * @code{.c} + * // Fixed-sized attribute + * auto a1 = tiledb::Attribute::create(ctx, "a1"); + * const int32_t* value; + * uint64_t size; + * a1.get_fill_value(&value, &size); + * + * // Var-sized attribute + * auto a2 = tiledb::Attribute::create(ctx, "a2"); + * const char* value; + * uint64_t size; + * a2.get_fill_value(&value, &size); + * @endcode + * + * @param value A pointer to the fill value to get. + * @param size The size of the fill value to get. + */ + void get_fill_value(const void** value, uint64_t* size) { + auto& ctx = ctx_.get(); + ctx.handle_error(tiledb_attribute_get_fill_value( + ctx.ptr().get(), attr_.get(), value, size)); + } + /** Check if attribute is variable sized. **/ bool variable_sized() const { return cell_val_num() == TILEDB_VAR_NUM; diff --git a/tiledb/sm/misc/constants.cc b/tiledb/sm/misc/constants.cc index 12ff85c39500..16d64538d9a2 100644 --- a/tiledb/sm/misc/constants.cc +++ b/tiledb/sm/misc/constants.cc @@ -449,7 +449,7 @@ const int32_t library_version[3] = { TILEDB_VERSION_MAJOR, TILEDB_VERSION_MINOR, TILEDB_VERSION_PATCH}; /** The TileDB serialization format version number. */ -const uint32_t format_version = 5; +const uint32_t format_version = 6; /** The maximum size of a tile chunk (unit of compression) in bytes. */ const uint64_t max_tile_chunk_size = 64 * 1024; diff --git a/tiledb/sm/misc/utils.cc b/tiledb/sm/misc/utils.cc index 14c2adf288cc..0ac69c701715 100644 --- a/tiledb/sm/misc/utils.cc +++ b/tiledb/sm/misc/utils.cc @@ -350,6 +350,85 @@ std::string to_str(const T& value) { return ss.str(); } +std::string to_str(const void* value, Datatype type) { + std::stringstream ss; + switch (type) { + case Datatype::INT8: + ss << *(const int8_t*)value; + break; + case Datatype::UINT8: + ss << *(const uint8_t*)value; + break; + case Datatype::INT16: + ss << *(const int16_t*)value; + break; + case Datatype::UINT16: + ss << *(const uint16_t*)value; + break; + case Datatype::INT32: + ss << *(const int32_t*)value; + break; + case Datatype::UINT32: + ss << *(const uint32_t*)value; + break; + case Datatype::INT64: + ss << *(const int64_t*)value; + break; + case Datatype::UINT64: + ss << *(const uint64_t*)value; + break; + case Datatype::FLOAT32: + ss << *(const float*)value; + break; + case Datatype::FLOAT64: + ss << *(const double*)value; + break; + case Datatype::CHAR: + ss << *(const char*)value; + break; + case Datatype::ANY: + ss << *(const uint8_t*)value; + break; + case Datatype::STRING_ASCII: + ss << *(const uint8_t*)value; + break; + case Datatype::STRING_UTF8: + ss << *(const uint8_t*)value; + break; + case Datatype::STRING_UTF16: + ss << *(const uint16_t*)value; + break; + case Datatype::STRING_UTF32: + ss << *(const uint32_t*)value; + break; + case Datatype::STRING_UCS2: + ss << *(const uint16_t*)value; + break; + case Datatype::STRING_UCS4: + ss << *(const uint32_t*)value; + break; + case Datatype::DATETIME_YEAR: + case Datatype::DATETIME_MONTH: + case Datatype::DATETIME_WEEK: + case Datatype::DATETIME_DAY: + case Datatype::DATETIME_HR: + case Datatype::DATETIME_MIN: + case Datatype::DATETIME_SEC: + case Datatype::DATETIME_MS: + case Datatype::DATETIME_US: + case Datatype::DATETIME_NS: + case Datatype::DATETIME_PS: + case Datatype::DATETIME_FS: + case Datatype::DATETIME_AS: + ss << *(const int64_t*)value; + break; + default: + assert(false); + } + + return ss.str(); +} + uint64_t common_prefix_size(const std::string& a, const std::string& b) { auto size = std::min(a.size(), b.size()); for (size_t i = 0; i < size; ++i) { diff --git a/tiledb/sm/misc/utils.h b/tiledb/sm/misc/utils.h index aa49f708f358..02b77a0f5d42 100644 --- a/tiledb/sm/misc/utils.h +++ b/tiledb/sm/misc/utils.h @@ -148,6 +148,9 @@ bool ends_with(const std::string& value, const std::string& suffix); template std::string to_str(const T& value); +/** Converts the input value of input type to string. */ +std::string to_str(const void* value, Datatype type); + /** Returns the size of the common prefix between `a` and `b`. */ uint64_t common_prefix_size(const std::string& a, const std::string& b); diff --git a/tiledb/sm/query/reader.cc b/tiledb/sm/query/reader.cc index 6f5789352c42..d3488aa5eaf2 100644 --- a/tiledb/sm/query/reader.cc +++ b/tiledb/sm/query/reader.cc @@ -275,12 +275,14 @@ Status Reader::read() { STATS_START_TIMER(stats::Stats::TimerType::READ) STATS_ADD_COUNTER(stats::Stats::CounterType::READ_NUM, 1) + auto dense_mode = array_schema_->dense() && !sparse_mode_; + // Get next partition if (!read_state_.unsplittable_) RETURN_NOT_OK(read_state_.next()); // Handle empty array or empty/finished subarray - if (fragment_metadata_.empty()) { + if (!dense_mode && fragment_metadata_.empty()) { zero_out_buffer_sizes(); return Status::Ok(); } @@ -293,7 +295,7 @@ Status Reader::read() { reset_buffer_sizes(); // Perform read - if (array_schema_->dense() && !sparse_mode_) { + if (dense_mode) { RETURN_NOT_OK(dense_read()); } else { RETURN_NOT_OK(sparse_read()); @@ -906,6 +908,10 @@ Status Reader::copy_fixed_cells( auto buffer = (unsigned char*)it->second.buffer_; auto buffer_size = it->second.buffer_size_; auto cell_size = array_schema_->cell_size(name); + ByteVecValue fill_value; + if (array_schema_->is_attr(name)) + fill_value = array_schema_->attribute(name)->fill_value(); + uint64_t fill_value_size = (uint64_t)fill_value.size(); // Precompute the cell range destination offsets in the buffer auto num_cs = result_cell_slabs.size(); @@ -933,15 +939,11 @@ Status Reader::copy_fixed_cells( // Copy if (cs.tile_ == nullptr) { // Empty range - auto type = array_schema_->type(name); - auto fill_size = datatype_size(type); - auto fill_value = constants::fill_value(type); - assert(fill_value != nullptr); auto bytes_to_copy = cs.length_ * cell_size; - auto fill_num = bytes_to_copy / fill_size; + auto fill_num = bytes_to_copy / fill_value_size; for (uint64_t j = 0; j < fill_num; ++j) { - std::memcpy(buffer + offset, fill_value, fill_size); - offset += fill_size; + std::memcpy(buffer + offset, fill_value.data(), fill_value_size); + offset += fill_value_size; } } else { // Non-empty range if (stride == UINT64_MAX) { @@ -987,10 +989,10 @@ Status Reader::copy_var_cells( auto buffer_size = it->second.buffer_size_; auto buffer_var_size = it->second.buffer_var_size_; uint64_t offset_size = constants::cell_var_offset_size; - auto type = array_schema_->type(name); - auto fill_size = datatype_size(type); - auto fill_value = constants::fill_value(type); - assert(fill_value != nullptr); + ByteVecValue fill_value; + if (array_schema_->is_attr(name)) + fill_value = array_schema_->attribute(name)->fill_value(); + auto fill_value_size = (uint64_t)fill_value.size(); // Compute the destinations of offsets and var-len data in the buffers. std::vector> offset_offsets_per_cs; @@ -1052,7 +1054,7 @@ Status Reader::copy_var_cells( // Copy variable-sized value if (cs.tile_ == nullptr) { - std::memcpy(var_dest, &fill_value, fill_size); + std::memcpy(var_dest, fill_value.data(), fill_value_size); } else { const uint64_t cell_var_size = (cell_idx != tile_cell_num - 1) ? @@ -1091,8 +1093,10 @@ Status Reader::compute_var_cell_destinations( // For easy reference auto num_cs = result_cell_slabs.size(); auto offset_size = constants::cell_var_offset_size; - auto type = array_schema_->type(name); - auto fill_size = datatype_size(type); + ByteVecValue fill_value; + if (array_schema_->is_attr(name)) + fill_value = array_schema_->attribute(name)->fill_value(); + auto fill_value_size = (uint64_t)fill_value.size(); // Resize the output vectors offset_offsets_per_cs->resize(num_cs); @@ -1136,7 +1140,7 @@ Status Reader::compute_var_cell_destinations( // Get size of variable-sized cell uint64_t cell_var_size = 0; if (cs.tile_ == nullptr) { - cell_var_size = fill_size; + cell_var_size = fill_value_size; } else { cell_var_size = (cell_idx != tile_cell_num - 1) ? @@ -1423,7 +1427,6 @@ template Status Reader::dense_read() { // Sanity checks assert(std::is_integral::value); - assert(!fragment_metadata_.empty()); // Compute result coordinates from the sparse fragments // `sparse_result_tiles` will hold all the relevant result tiles of