From 9e31d29c8a05bd66b548a329a75e41a32f5b2890 Mon Sep 17 00:00:00 2001 From: Stavros Papadopoulos Date: Thu, 5 Mar 2020 11:56:29 -0500 Subject: [PATCH] User can set separate coordinate buffers upon reads. Towards addressing #93 --- HISTORY.md | 3 + test/src/unit-SubarrayPartitioner-error.cc | 10 + test/src/unit-capi-array_schema.cc | 187 --------- test/src/unit-capi-consolidation.cc | 2 +- test/src/unit-capi-dense_array.cc | 419 +++++++++++++++----- test/src/unit-capi-query_2.cc | 88 +++++ test/src/unit-capi-sparse_array.cc | 435 ++++++++++++++++++++- test/src/unit-cppapi-array.cc | 10 +- tiledb/sm/array/array.cc | 98 ++--- tiledb/sm/array/array.h | 12 +- tiledb/sm/array_schema/array_schema.cc | 54 +-- tiledb/sm/array_schema/array_schema.h | 29 -- tiledb/sm/array_schema/attribute.cc | 7 +- tiledb/sm/array_schema/attribute.h | 3 - tiledb/sm/array_schema/dimension.cc | 9 +- tiledb/sm/array_schema/dimension.h | 3 - tiledb/sm/array_schema/domain.cc | 19 +- tiledb/sm/array_schema/domain.h | 3 - tiledb/sm/c_api/tiledb.cc | 99 +---- tiledb/sm/c_api/tiledb.h | 44 +-- tiledb/sm/misc/constants.cc | 6 - tiledb/sm/misc/constants.h | 6 - tiledb/sm/misc/types.h | 7 +- tiledb/sm/query/query.cc | 44 +-- tiledb/sm/query/reader.cc | 377 +++++++++++------- tiledb/sm/query/reader.h | 137 ++++--- tiledb/sm/query/writer.cc | 28 +- tiledb/sm/serialization/query.cc | 22 +- tiledb/sm/storage_manager/consolidator.cc | 152 ++++--- tiledb/sm/storage_manager/consolidator.h | 39 +- tiledb/sm/subarray/subarray.cc | 230 ++++++----- tiledb/sm/subarray/subarray.h | 48 ++- tiledb/sm/subarray/subarray_partitioner.cc | 166 ++++---- tiledb/sm/subarray/subarray_partitioner.h | 48 ++- 34 files changed, 1676 insertions(+), 1168 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index b089aaf3c2f..81879df7551 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -10,9 +10,12 @@ * Changed `domain` input of `tiledb_dimension_get_domain` to `const void**` (from `void**`). * Changed `tile_extent` input of `tiledb_dimension_get_tile_extent` to `const void**` (from `void**`). +* Anonymous attribute and dimensions (i.e., empty strings for attribute/dimension names) is no longer supported. This is because now the user can set separate dimension buffers to the query and, therefore, supporting anonymous attributes and dimensions creates ambiguity in the current API. ## New features +* The user can now set separate coordinate buffers to the query. Also any subset of the dimensions is supported. + ## Improvements * Added support for AWS Security Token Service session tokens via configuration option `vfs.s3.session_token`. [#1472](https://github.com/TileDB-Inc/TileDB/pull/1472) diff --git a/test/src/unit-SubarrayPartitioner-error.cc b/test/src/unit-SubarrayPartitioner-error.cc index bb509764f83..ced346224fe 100644 --- a/test/src/unit-SubarrayPartitioner-error.cc +++ b/test/src/unit-SubarrayPartitioner-error.cc @@ -180,6 +180,10 @@ TEST_CASE_METHOD( CHECK(!st.ok()); st = subarray_partitioner.set_result_budget("b", 100, 101); CHECK(st.ok()); + st = subarray_partitioner.set_result_budget("d", 1000, 1010); + CHECK(!st.ok()); + st = subarray_partitioner.set_result_budget("d", 1000); + CHECK(st.ok()); st = subarray_partitioner.get_result_budget(nullptr, &budget_off, &budget_val); CHECK(!st.ok()); @@ -196,15 +200,21 @@ TEST_CASE_METHOD( st = subarray_partitioner.get_result_budget( TILEDB_COORDS, &budget_off, &budget_val); CHECK(!st.ok()); + st = subarray_partitioner.get_result_budget("d", &budget); + CHECK(st.ok()); + CHECK(budget == 1000); uint64_t memory_budget, memory_budget_var; st = subarray_partitioner.get_memory_budget( &memory_budget, &memory_budget_var); + CHECK(st.ok()); CHECK(memory_budget == memory_budget_); CHECK(memory_budget_var == memory_budget_var_); st = subarray_partitioner.set_memory_budget(16, 16); + CHECK(st.ok()); st = subarray_partitioner.get_memory_budget( &memory_budget, &memory_budget_var); + CHECK(st.ok()); CHECK(memory_budget == 16); CHECK(memory_budget_var == 16); diff --git a/test/src/unit-capi-array_schema.cc b/test/src/unit-capi-array_schema.cc index a0aa796262b..2c9874db630 100644 --- a/test/src/unit-capi-array_schema.cc +++ b/test/src/unit-capi-array_schema.cc @@ -957,193 +957,6 @@ TEST_CASE_METHOD( tiledb_domain_free(&domain); } -TEST_CASE_METHOD( - ArraySchemaFx, - "C API: Test array schema multiple anonymous dimensions", - "[capi], [array-schema]") { - // Create dimensions - tiledb_dimension_t* d1; - int rc = tiledb_dimension_alloc( - ctx_, "", TILEDB_INT64, &DIM_DOMAIN[0], &TILE_EXTENTS[0], &d1); - REQUIRE(rc == TILEDB_OK); - - tiledb_dimension_t* d2; - rc = tiledb_dimension_alloc( - ctx_, "", TILEDB_INT64, &DIM_DOMAIN[2], &TILE_EXTENTS[1], &d2); - REQUIRE(rc == TILEDB_OK); - - // Set domain - tiledb_domain_t* domain; - rc = tiledb_domain_alloc(ctx_, &domain); - REQUIRE(rc == TILEDB_OK); - rc = tiledb_domain_add_dimension(ctx_, domain, d1); - REQUIRE(rc == TILEDB_OK); - rc = tiledb_domain_add_dimension(ctx_, domain, d2); - REQUIRE(rc == TILEDB_OK); - - tiledb_dimension_t* get_dim = nullptr; - rc = tiledb_domain_get_dimension_from_name(ctx_, domain, "", &get_dim); - // getting multiple anonymous dimension by name is an error - CHECK(rc == TILEDB_ERR); - - rc = tiledb_domain_get_dimension_from_index(ctx_, domain, 0, &get_dim); - CHECK(rc == TILEDB_OK); - CHECK(get_dim != nullptr); - tiledb_dimension_free(&get_dim); - - // Clean up - tiledb_dimension_free(&d1); - tiledb_dimension_free(&d2); - tiledb_domain_free(&domain); -} - -TEST_CASE_METHOD( - ArraySchemaFx, - "C API: Test array schema one anonymous attribute", - "[capi], [array-schema], [anon-attr]") { - // Create array schema - tiledb_array_schema_t* array_schema; - int rc = tiledb_array_schema_alloc(ctx_, TILEDB_DENSE, &array_schema); - REQUIRE(rc == TILEDB_OK); - - // Create dimensions - tiledb_dimension_t* d1; - rc = tiledb_dimension_alloc( - ctx_, "", TILEDB_INT64, &DIM_DOMAIN[0], &TILE_EXTENTS[0], &d1); - REQUIRE(rc == TILEDB_OK); - - // Set domain - tiledb_domain_t* domain; - rc = tiledb_domain_alloc(ctx_, &domain); - REQUIRE(rc == TILEDB_OK); - rc = tiledb_domain_add_dimension(ctx_, domain, d1); - REQUIRE(rc == TILEDB_OK); - rc = tiledb_array_schema_set_domain(ctx_, array_schema, domain); - REQUIRE(rc == TILEDB_OK); - - // Set attribute - tiledb_attribute_t* attr1; - rc = tiledb_attribute_alloc(ctx_, "", ATTR_TYPE, &attr1); - REQUIRE(rc == TILEDB_OK); - tiledb_attribute_t* attr2; - rc = tiledb_attribute_alloc(ctx_, "foo", ATTR_TYPE, &attr2); - REQUIRE(rc == TILEDB_OK); - - rc = tiledb_array_schema_add_attribute(ctx_, array_schema, attr1); - REQUIRE(rc == TILEDB_OK); - rc = tiledb_array_schema_add_attribute(ctx_, array_schema, attr2); - REQUIRE(rc == TILEDB_OK); - - tiledb_attribute_t* get_attr = nullptr; - rc = tiledb_array_schema_get_attribute_from_name( - ctx_, array_schema, "", &get_attr); - // from name when there are multiple anon attributes is an error - CHECK(rc == TILEDB_OK); - CHECK(get_attr != nullptr); - tiledb_attribute_free(&get_attr); - - rc = tiledb_array_schema_get_attribute_from_index( - ctx_, array_schema, 0, &get_attr); - CHECK(rc == TILEDB_OK); - CHECK(get_attr != nullptr); - const char* get_name = nullptr; - rc = tiledb_attribute_get_name(ctx_, get_attr, &get_name); - CHECK(rc == TILEDB_OK); - CHECK(get_name != nullptr); - CHECK_THAT(get_name, Catch::Equals("")); - tiledb_attribute_free(&get_attr); - - rc = tiledb_array_schema_get_attribute_from_name( - ctx_, array_schema, "foo", &get_attr); - CHECK(rc == TILEDB_OK); - CHECK(get_attr != nullptr); - rc = tiledb_attribute_get_name(ctx_, get_attr, &get_name); - CHECK(rc == TILEDB_OK); - CHECK_THAT(get_name, Catch::Equals("foo")); - tiledb_attribute_free(&get_attr); - - int32_t has_attr = 0; - rc = tiledb_array_schema_has_attribute(ctx_, array_schema, "", &has_attr); - REQUIRE(rc == TILEDB_OK); - REQUIRE(has_attr == 1); - has_attr = 0; - rc = tiledb_array_schema_has_attribute(ctx_, array_schema, "foo", &has_attr); - REQUIRE(rc == TILEDB_OK); - REQUIRE(has_attr == 1); - has_attr = 0; - rc = tiledb_array_schema_has_attribute(ctx_, array_schema, "bar", &has_attr); - REQUIRE(rc == TILEDB_OK); - REQUIRE(has_attr == 0); - - // Clean up - tiledb_attribute_free(&attr1); - tiledb_attribute_free(&attr2); - tiledb_dimension_free(&d1); - tiledb_domain_free(&domain); - tiledb_array_schema_free(&array_schema); -} - -TEST_CASE_METHOD( - ArraySchemaFx, - "C API: Test array schema multiple anonymous attributes", - "[capi], [array-schema], [anon-attr]") { - // Create array schema - tiledb_array_schema_t* array_schema; - int rc = tiledb_array_schema_alloc(ctx_, TILEDB_DENSE, &array_schema); - REQUIRE(rc == TILEDB_OK); - - // Create dimensions - tiledb_dimension_t* d1; - rc = tiledb_dimension_alloc( - ctx_, "", TILEDB_INT64, &DIM_DOMAIN[0], &TILE_EXTENTS[0], &d1); - REQUIRE(rc == TILEDB_OK); - - // Set domain - tiledb_domain_t* domain; - rc = tiledb_domain_alloc(ctx_, &domain); - REQUIRE(rc == TILEDB_OK); - rc = tiledb_domain_add_dimension(ctx_, domain, d1); - REQUIRE(rc == TILEDB_OK); - rc = tiledb_array_schema_set_domain(ctx_, array_schema, domain); - REQUIRE(rc == TILEDB_OK); - - // Set attribute - tiledb_attribute_t* attr1; - rc = tiledb_attribute_alloc(ctx_, "", ATTR_TYPE, &attr1); - REQUIRE(rc == TILEDB_OK); - tiledb_attribute_t* attr2; - rc = tiledb_attribute_alloc(ctx_, "", ATTR_TYPE, &attr2); - REQUIRE(rc == TILEDB_OK); - - rc = tiledb_array_schema_add_attribute(ctx_, array_schema, attr1); - REQUIRE(rc == TILEDB_OK); - rc = tiledb_array_schema_add_attribute(ctx_, array_schema, attr2); - CHECK(rc != TILEDB_OK); - - tiledb_attribute_t* get_attr = nullptr; - rc = tiledb_array_schema_get_attribute_from_name( - ctx_, array_schema, "", &get_attr); - CHECK(rc == TILEDB_OK); - - rc = tiledb_array_schema_get_attribute_from_index( - ctx_, array_schema, 0, &get_attr); - CHECK(rc == TILEDB_OK); - CHECK(get_attr != nullptr); - tiledb_attribute_free(&get_attr); - - int32_t has_attr = false; - rc = tiledb_array_schema_has_attribute(ctx_, array_schema, "", &has_attr); - REQUIRE(rc == TILEDB_OK); - REQUIRE(has_attr == 1); - - // Clean up - tiledb_attribute_free(&attr1); - tiledb_attribute_free(&attr2); - tiledb_dimension_free(&d1); - tiledb_domain_free(&domain); - tiledb_array_schema_free(&array_schema); -} - TEST_CASE_METHOD( ArraySchemaFx, "C API: Test array schema with invalid float dense domain", diff --git a/test/src/unit-capi-consolidation.cc b/test/src/unit-capi-consolidation.cc index b597205bc79..a517fc9a40c 100644 --- a/test/src/unit-capi-consolidation.cc +++ b/test/src/unit-capi-consolidation.cc @@ -2714,7 +2714,7 @@ bool ConsolidationFx::is_array(const std::string& array_name) { TEST_CASE_METHOD( ConsolidationFx, "C API: Test consolidation, dense", - "[capi][consolidation][dense-consolidation]") { + "[capi][consolidation][dense]") { remove_dense_array(); create_dense_array(); diff --git a/test/src/unit-capi-dense_array.cc b/test/src/unit-capi-dense_array.cc index 46f77007856..3495921c95f 100644 --- a/test/src/unit-capi-dense_array.cc +++ b/test/src/unit-capi-dense_array.cc @@ -111,7 +111,7 @@ struct DenseArrayFx { void check_sparse_writes(const std::string& path); void check_simultaneous_writes(const std::string& path); void check_cancel_and_retry_writes(const std::string& path); - void check_return_coords(const std::string& path); + void check_return_coords(const std::string& path, bool split_coords); void check_non_empty_domain(const std::string& path); void create_dense_vector(const std::string& path); void create_dense_array(const std::string& array_name); @@ -121,13 +121,18 @@ struct DenseArrayFx { void write_dense_array_missing_attributes(const std::string& array_name); void write_partial_dense_array(const std::string& array_name); void read_dense_vector_mixed(const std::string& array_name); - void read_dense_array_with_coords_full_global(const std::string& array_name); - void read_dense_array_with_coords_full_row(const std::string& array_name); - void read_dense_array_with_coords_full_col(const std::string& array_name); + void read_dense_array_with_coords_full_global( + const std::string& array_name, bool split_coords); + void read_dense_array_with_coords_full_row( + const std::string& array_name, bool split_coords); + void read_dense_array_with_coords_full_col( + const std::string& array_name, bool split_coords); void read_dense_array_with_coords_subarray_global( - const std::string& array_name); - void read_dense_array_with_coords_subarray_row(const std::string& array_name); - void read_dense_array_with_coords_subarray_col(const std::string& array_name); + const std::string& array_name, bool split_coords); + void read_dense_array_with_coords_subarray_row( + const std::string& array_name, bool split_coords); + void read_dense_array_with_coords_subarray_col( + const std::string& array_name, bool split_coords); static std::string random_name(const std::string& prefix); /** @@ -1757,16 +1762,17 @@ void DenseArrayFx::create_dense_array_1_attribute( tiledb_array_schema_free(&array_schema); } -void DenseArrayFx::check_return_coords(const std::string& path) { +void DenseArrayFx::check_return_coords( + const std::string& path, bool split_coords) { std::string array_name = path + "return_coords"; create_dense_array(array_name); write_dense_array(array_name); - read_dense_array_with_coords_full_global(array_name); - read_dense_array_with_coords_full_row(array_name); - read_dense_array_with_coords_full_col(array_name); - read_dense_array_with_coords_subarray_global(array_name); - read_dense_array_with_coords_subarray_row(array_name); - read_dense_array_with_coords_subarray_col(array_name); + read_dense_array_with_coords_full_global(array_name, split_coords); + read_dense_array_with_coords_full_row(array_name, split_coords); + read_dense_array_with_coords_full_col(array_name, split_coords); + read_dense_array_with_coords_subarray_global(array_name, split_coords); + read_dense_array_with_coords_subarray_row(array_name, split_coords); + read_dense_array_with_coords_subarray_col(array_name, split_coords); } void DenseArrayFx::write_dense_array(const std::string& array_name) { @@ -2062,7 +2068,7 @@ void DenseArrayFx::read_dense_vector_mixed(const std::string& array_name) { } void DenseArrayFx::read_dense_array_with_coords_full_global( - const std::string& array_name) { + const std::string& array_name, bool split_coords) { // Correct buffers int c_buffer_a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; uint64_t c_buffer_a2_off[] = { @@ -2080,6 +2086,8 @@ void DenseArrayFx::read_dense_array_with_coords_full_global( }; uint64_t c_buffer_coords[] = {1, 1, 1, 2, 2, 1, 2, 2, 1, 3, 1, 4, 2, 3, 2, 4, 3, 1, 3, 2, 4, 1, 4, 2, 3, 3, 3, 4, 4, 3, 4, 4}; + uint64_t c_buffer_d1[] = {1, 1, 2, 2, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 4, 4}; + uint64_t c_buffer_d2[] = {1, 2, 1, 2, 3, 4, 3, 4, 1, 2, 1, 2, 3, 4, 3, 4}; // Open array tiledb_array_t* array; @@ -2091,7 +2099,8 @@ void DenseArrayFx::read_dense_array_with_coords_full_global( // Compute max buffer sizes uint64_t subarray[] = {1, 4, 1, 4}; uint64_t buffer_a1_size, buffer_a2_off_size, buffer_a2_val_size, - buffer_a3_size, buffer_coords_size; + buffer_a3_size, buffer_coords_size = 0, buffer_d1_size = 0, + buffer_d2_size = 0; rc = tiledb_array_max_buffer_size( ctx_, array, "a1", subarray, &buffer_a1_size); CHECK(rc == TILEDB_OK); @@ -2101,9 +2110,18 @@ void DenseArrayFx::read_dense_array_with_coords_full_global( rc = tiledb_array_max_buffer_size( ctx_, array, "a3", subarray, &buffer_a3_size); CHECK(rc == TILEDB_OK); - rc = tiledb_array_max_buffer_size( - ctx_, array, TILEDB_COORDS, subarray, &buffer_coords_size); - CHECK(rc == TILEDB_OK); + if (split_coords) { + rc = tiledb_array_max_buffer_size( + ctx_, array, "d1", subarray, &buffer_d1_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_array_max_buffer_size( + ctx_, array, "d2", subarray, &buffer_d2_size); + CHECK(rc == TILEDB_OK); + } else { + rc = tiledb_array_max_buffer_size( + ctx_, array, TILEDB_COORDS, subarray, &buffer_coords_size); + CHECK(rc == TILEDB_OK); + } // Prepare cell buffers auto buffer_a1 = (int*)malloc(buffer_a1_size); @@ -2111,6 +2129,8 @@ void DenseArrayFx::read_dense_array_with_coords_full_global( auto buffer_a2_val = (char*)malloc(buffer_a2_val_size); auto buffer_a3 = (float*)malloc(buffer_a3_size); auto buffer_coords = (uint64_t*)malloc(buffer_coords_size); + auto buffer_d1 = (uint64_t*)malloc(buffer_d1_size); + auto buffer_d2 = (uint64_t*)malloc(buffer_d2_size); // Create query tiledb_query_t* query; @@ -2133,9 +2153,16 @@ void DenseArrayFx::read_dense_array_with_coords_full_global( CHECK(rc == TILEDB_OK); rc = tiledb_query_set_buffer(ctx_, query, "a3", buffer_a3, &buffer_a3_size); CHECK(rc == TILEDB_OK); - rc = tiledb_query_set_buffer( - ctx_, query, TILEDB_COORDS, buffer_coords, &buffer_coords_size); - CHECK(rc == TILEDB_OK); + if (split_coords) { + rc = tiledb_query_set_buffer(ctx_, query, "d1", buffer_d1, &buffer_d1_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "d2", buffer_d2, &buffer_d2_size); + CHECK(rc == TILEDB_OK); + } else { + rc = tiledb_query_set_buffer( + ctx_, query, TILEDB_COORDS, buffer_coords, &buffer_coords_size); + CHECK(rc == TILEDB_OK); + } // Submit query rc = submit_query_wrapper(array_name, query); @@ -2154,12 +2181,20 @@ void DenseArrayFx::read_dense_array_with_coords_full_global( CHECK(sizeof(c_buffer_a2_off) == buffer_a2_off_size); CHECK(sizeof(c_buffer_a2_val) - 1 == buffer_a2_val_size); CHECK(sizeof(c_buffer_a3) == buffer_a3_size); - CHECK(sizeof(c_buffer_coords) == buffer_coords_size); CHECK(!memcmp(buffer_a1, c_buffer_a1, sizeof(c_buffer_a1))); CHECK(!memcmp(buffer_a2_off, c_buffer_a2_off, sizeof(c_buffer_a2_off))); CHECK(!memcmp(buffer_a2_val, c_buffer_a2_val, sizeof(c_buffer_a2_val) - 1)); CHECK(!memcmp(buffer_a3, c_buffer_a3, sizeof(c_buffer_a3))); - CHECK(!memcmp(buffer_coords, c_buffer_coords, sizeof(c_buffer_coords))); + + if (split_coords) { + CHECK(sizeof(c_buffer_d1) == buffer_d1_size); + CHECK(!memcmp(buffer_d1, c_buffer_d1, sizeof(c_buffer_d1))); + CHECK(sizeof(c_buffer_d2) == buffer_d2_size); + CHECK(!memcmp(buffer_d2, c_buffer_d2, sizeof(c_buffer_d2))); + } else { + CHECK(sizeof(c_buffer_coords) == buffer_coords_size); + CHECK(!memcmp(buffer_coords, c_buffer_coords, sizeof(c_buffer_coords))); + } // Close array rc = tiledb_array_close(ctx_, array); @@ -2173,10 +2208,12 @@ void DenseArrayFx::read_dense_array_with_coords_full_global( free(buffer_a2_val); free(buffer_a3); free(buffer_coords); + free(buffer_d1); + free(buffer_d2); } void DenseArrayFx::read_dense_array_with_coords_full_row( - const std::string& array_name) { + const std::string& array_name, bool split_coords) { // Correct buffers int c_buffer_a1[] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; uint64_t c_buffer_a2_off[] = { @@ -2194,6 +2231,8 @@ void DenseArrayFx::read_dense_array_with_coords_full_row( }; uint64_t c_buffer_coords[] = {1, 1, 1, 2, 1, 3, 1, 4, 2, 1, 2, 2, 2, 3, 2, 4, 3, 1, 3, 2, 3, 3, 3, 4, 4, 1, 4, 2, 4, 3, 4, 4}; + uint64_t c_buffer_d1[] = {1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4}; + uint64_t c_buffer_d2[] = {1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4}; // Open array tiledb_array_t* array; @@ -2205,7 +2244,8 @@ void DenseArrayFx::read_dense_array_with_coords_full_row( // Compute max buffer sizes uint64_t subarray[] = {1, 4, 1, 4}; uint64_t buffer_a1_size, buffer_a2_off_size, buffer_a2_val_size, - buffer_a3_size, buffer_coords_size; + buffer_a3_size, buffer_coords_size = 0, buffer_d1_size = 0, + buffer_d2_size = 0; rc = tiledb_array_max_buffer_size( ctx_, array, "a1", subarray, &buffer_a1_size); CHECK(rc == TILEDB_OK); @@ -2215,9 +2255,18 @@ void DenseArrayFx::read_dense_array_with_coords_full_row( rc = tiledb_array_max_buffer_size( ctx_, array, "a3", subarray, &buffer_a3_size); CHECK(rc == TILEDB_OK); - rc = tiledb_array_max_buffer_size( - ctx_, array, TILEDB_COORDS, subarray, &buffer_coords_size); - CHECK(rc == TILEDB_OK); + if (split_coords) { + rc = tiledb_array_max_buffer_size( + ctx_, array, "d1", subarray, &buffer_d1_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_array_max_buffer_size( + ctx_, array, "d2", subarray, &buffer_d2_size); + CHECK(rc == TILEDB_OK); + } else { + rc = tiledb_array_max_buffer_size( + ctx_, array, TILEDB_COORDS, subarray, &buffer_coords_size); + CHECK(rc == TILEDB_OK); + } // Prepare cell buffers auto buffer_a1 = (int*)malloc(buffer_a1_size); @@ -2225,6 +2274,8 @@ void DenseArrayFx::read_dense_array_with_coords_full_row( auto buffer_a2_val = (char*)malloc(buffer_a2_val_size); auto buffer_a3 = (float*)malloc(buffer_a3_size); auto buffer_coords = (uint64_t*)malloc(buffer_coords_size); + auto buffer_d1 = (uint64_t*)malloc(buffer_d1_size); + auto buffer_d2 = (uint64_t*)malloc(buffer_d2_size); // Create query tiledb_query_t* query; @@ -2247,9 +2298,16 @@ void DenseArrayFx::read_dense_array_with_coords_full_row( CHECK(rc == TILEDB_OK); rc = tiledb_query_set_buffer(ctx_, query, "a3", buffer_a3, &buffer_a3_size); CHECK(rc == TILEDB_OK); - rc = tiledb_query_set_buffer( - ctx_, query, TILEDB_COORDS, buffer_coords, &buffer_coords_size); - CHECK(rc == TILEDB_OK); + if (split_coords) { + rc = tiledb_query_set_buffer(ctx_, query, "d1", buffer_d1, &buffer_d1_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "d2", buffer_d2, &buffer_d2_size); + CHECK(rc == TILEDB_OK); + } else { + rc = tiledb_query_set_buffer( + ctx_, query, TILEDB_COORDS, buffer_coords, &buffer_coords_size); + CHECK(rc == TILEDB_OK); + } // Submit query rc = submit_query_wrapper(array_name, query); @@ -2268,12 +2326,20 @@ void DenseArrayFx::read_dense_array_with_coords_full_row( CHECK(sizeof(c_buffer_a2_off) == buffer_a2_off_size); CHECK(sizeof(c_buffer_a2_val) - 1 == buffer_a2_val_size); CHECK(sizeof(c_buffer_a3) == buffer_a3_size); - CHECK(sizeof(c_buffer_coords) == buffer_coords_size); CHECK(!memcmp(buffer_a1, c_buffer_a1, sizeof(c_buffer_a1))); CHECK(!memcmp(buffer_a2_off, c_buffer_a2_off, sizeof(c_buffer_a2_off))); CHECK(!memcmp(buffer_a2_val, c_buffer_a2_val, sizeof(c_buffer_a2_val) - 1)); CHECK(!memcmp(buffer_a3, c_buffer_a3, sizeof(c_buffer_a3))); - CHECK(!memcmp(buffer_coords, c_buffer_coords, sizeof(c_buffer_coords))); + + if (split_coords) { + CHECK(sizeof(c_buffer_d1) == buffer_d1_size); + CHECK(!memcmp(buffer_d1, c_buffer_d1, sizeof(c_buffer_d1))); + CHECK(sizeof(c_buffer_d2) == buffer_d2_size); + CHECK(!memcmp(buffer_d2, c_buffer_d2, sizeof(c_buffer_d2))); + } else { + CHECK(sizeof(c_buffer_coords) == buffer_coords_size); + CHECK(!memcmp(buffer_coords, c_buffer_coords, sizeof(c_buffer_coords))); + } // Close array rc = tiledb_array_close(ctx_, array); @@ -2287,10 +2353,12 @@ void DenseArrayFx::read_dense_array_with_coords_full_row( free(buffer_a2_val); free(buffer_a3); free(buffer_coords); + free(buffer_d1); + free(buffer_d2); } void DenseArrayFx::read_dense_array_with_coords_full_col( - const std::string& array_name) { + const std::string& array_name, bool split_coords) { // Correct buffers int c_buffer_a1[] = {0, 2, 8, 10, 1, 3, 9, 11, 4, 6, 12, 14, 5, 7, 13, 15}; uint64_t c_buffer_a2_off[] = { @@ -2307,6 +2375,8 @@ void DenseArrayFx::read_dense_array_with_coords_full_col( }; uint64_t c_buffer_coords[] = {1, 1, 2, 1, 3, 1, 4, 1, 1, 2, 2, 2, 3, 2, 4, 2, 1, 3, 2, 3, 3, 3, 4, 3, 1, 4, 2, 4, 3, 4, 4, 4}; + uint64_t c_buffer_d1[] = {1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4}; + uint64_t c_buffer_d2[] = {1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4}; // Open array tiledb_array_t* array; @@ -2318,7 +2388,8 @@ void DenseArrayFx::read_dense_array_with_coords_full_col( // Compute max buffer sizes uint64_t subarray[] = {1, 4, 1, 4}; uint64_t buffer_a1_size, buffer_a2_off_size, buffer_a2_val_size, - buffer_a3_size, buffer_coords_size; + buffer_a3_size, buffer_coords_size = 0, buffer_d1_size = 0, + buffer_d2_size = 0; rc = tiledb_array_max_buffer_size( ctx_, array, "a1", subarray, &buffer_a1_size); CHECK(rc == TILEDB_OK); @@ -2328,9 +2399,18 @@ void DenseArrayFx::read_dense_array_with_coords_full_col( rc = tiledb_array_max_buffer_size( ctx_, array, "a3", subarray, &buffer_a3_size); CHECK(rc == TILEDB_OK); - rc = tiledb_array_max_buffer_size( - ctx_, array, TILEDB_COORDS, subarray, &buffer_coords_size); - CHECK(rc == TILEDB_OK); + if (split_coords) { + rc = tiledb_array_max_buffer_size( + ctx_, array, "d1", subarray, &buffer_d1_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_array_max_buffer_size( + ctx_, array, "d2", subarray, &buffer_d2_size); + CHECK(rc == TILEDB_OK); + } else { + rc = tiledb_array_max_buffer_size( + ctx_, array, TILEDB_COORDS, subarray, &buffer_coords_size); + CHECK(rc == TILEDB_OK); + } // Prepare cell buffers auto buffer_a1 = (int*)malloc(buffer_a1_size); @@ -2338,6 +2418,8 @@ void DenseArrayFx::read_dense_array_with_coords_full_col( auto buffer_a2_val = (char*)malloc(buffer_a2_val_size); auto buffer_a3 = (float*)malloc(buffer_a3_size); auto buffer_coords = (uint64_t*)malloc(buffer_coords_size); + auto buffer_d1 = (uint64_t*)malloc(buffer_d1_size); + auto buffer_d2 = (uint64_t*)malloc(buffer_d2_size); // Create query tiledb_query_t* query; @@ -2360,9 +2442,16 @@ void DenseArrayFx::read_dense_array_with_coords_full_col( CHECK(rc == TILEDB_OK); rc = tiledb_query_set_buffer(ctx_, query, "a3", buffer_a3, &buffer_a3_size); CHECK(rc == TILEDB_OK); - rc = tiledb_query_set_buffer( - ctx_, query, TILEDB_COORDS, buffer_coords, &buffer_coords_size); - CHECK(rc == TILEDB_OK); + if (split_coords) { + rc = tiledb_query_set_buffer(ctx_, query, "d1", buffer_d1, &buffer_d1_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "d2", buffer_d2, &buffer_d2_size); + CHECK(rc == TILEDB_OK); + } else { + rc = tiledb_query_set_buffer( + ctx_, query, TILEDB_COORDS, buffer_coords, &buffer_coords_size); + CHECK(rc == TILEDB_OK); + } // Submit query rc = submit_query_wrapper(array_name, query); @@ -2381,12 +2470,20 @@ void DenseArrayFx::read_dense_array_with_coords_full_col( CHECK(sizeof(c_buffer_a2_off) == buffer_a2_off_size); CHECK(sizeof(c_buffer_a2_val) - 1 == buffer_a2_val_size); CHECK(sizeof(c_buffer_a3) == buffer_a3_size); - CHECK(sizeof(c_buffer_coords) == buffer_coords_size); CHECK(!memcmp(buffer_a1, c_buffer_a1, sizeof(c_buffer_a1))); CHECK(!memcmp(buffer_a2_off, c_buffer_a2_off, sizeof(c_buffer_a2_off))); CHECK(!memcmp(buffer_a2_val, c_buffer_a2_val, sizeof(c_buffer_a2_val) - 1)); CHECK(!memcmp(buffer_a3, c_buffer_a3, sizeof(c_buffer_a3))); - CHECK(!memcmp(buffer_coords, c_buffer_coords, sizeof(c_buffer_coords))); + + if (split_coords) { + CHECK(sizeof(c_buffer_d1) == buffer_d1_size); + CHECK(!memcmp(buffer_d1, c_buffer_d1, sizeof(c_buffer_d1))); + CHECK(sizeof(c_buffer_d2) == buffer_d2_size); + CHECK(!memcmp(buffer_d2, c_buffer_d2, sizeof(c_buffer_d2))); + } else { + CHECK(sizeof(c_buffer_coords) == buffer_coords_size); + CHECK(!memcmp(buffer_coords, c_buffer_coords, sizeof(c_buffer_coords))); + } // Close array rc = tiledb_array_close(ctx_, array); @@ -2400,10 +2497,12 @@ void DenseArrayFx::read_dense_array_with_coords_full_col( free(buffer_a2_val); free(buffer_a3); free(buffer_coords); + free(buffer_d1); + free(buffer_d2); } void DenseArrayFx::read_dense_array_with_coords_subarray_global( - const std::string& array_name) { + const std::string& array_name, bool split_coords) { // Correct buffers int c_buffer_a1[] = {9, 11, 12, 13, 14, 15}; uint64_t c_buffer_a2_off[] = {0, 2, 6, 7, 9, 12}; @@ -2423,6 +2522,8 @@ void DenseArrayFx::read_dense_array_with_coords_subarray_global( 15.2f, }; uint64_t c_buffer_coords[] = {3, 2, 4, 2, 3, 3, 3, 4, 4, 3, 4, 4}; + uint64_t c_buffer_d1[] = {3, 4, 3, 3, 4, 4}; + uint64_t c_buffer_d2[] = {2, 2, 3, 4, 3, 4}; // Open array tiledb_array_t* array; @@ -2434,7 +2535,8 @@ void DenseArrayFx::read_dense_array_with_coords_subarray_global( // Compute max buffer sizes uint64_t subarray[] = {3, 4, 2, 4}; uint64_t buffer_a1_size, buffer_a2_off_size, buffer_a2_val_size, - buffer_a3_size, buffer_coords_size; + buffer_a3_size, buffer_coords_size = 0, buffer_d1_size = 0, + buffer_d2_size = 0; rc = tiledb_array_max_buffer_size( ctx_, array, "a1", subarray, &buffer_a1_size); CHECK(rc == TILEDB_OK); @@ -2444,9 +2546,18 @@ void DenseArrayFx::read_dense_array_with_coords_subarray_global( rc = tiledb_array_max_buffer_size( ctx_, array, "a3", subarray, &buffer_a3_size); CHECK(rc == TILEDB_OK); - rc = tiledb_array_max_buffer_size( - ctx_, array, TILEDB_COORDS, subarray, &buffer_coords_size); - CHECK(rc == TILEDB_OK); + if (split_coords) { + rc = tiledb_array_max_buffer_size( + ctx_, array, "d1", subarray, &buffer_d1_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_array_max_buffer_size( + ctx_, array, "d2", subarray, &buffer_d2_size); + CHECK(rc == TILEDB_OK); + } else { + rc = tiledb_array_max_buffer_size( + ctx_, array, TILEDB_COORDS, subarray, &buffer_coords_size); + CHECK(rc == TILEDB_OK); + } // Prepare cell buffers auto buffer_a1 = (int*)malloc(buffer_a1_size); @@ -2454,6 +2565,8 @@ void DenseArrayFx::read_dense_array_with_coords_subarray_global( auto buffer_a2_val = (char*)malloc(buffer_a2_val_size); auto buffer_a3 = (float*)malloc(buffer_a3_size); auto buffer_coords = (uint64_t*)malloc(buffer_coords_size); + auto buffer_d1 = (uint64_t*)malloc(buffer_d1_size); + auto buffer_d2 = (uint64_t*)malloc(buffer_d2_size); // Create query tiledb_query_t* query; @@ -2476,9 +2589,16 @@ void DenseArrayFx::read_dense_array_with_coords_subarray_global( CHECK(rc == TILEDB_OK); rc = tiledb_query_set_buffer(ctx_, query, "a3", buffer_a3, &buffer_a3_size); CHECK(rc == TILEDB_OK); - rc = tiledb_query_set_buffer( - ctx_, query, TILEDB_COORDS, buffer_coords, &buffer_coords_size); - CHECK(rc == TILEDB_OK); + if (split_coords) { + rc = tiledb_query_set_buffer(ctx_, query, "d1", buffer_d1, &buffer_d1_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "d2", buffer_d2, &buffer_d2_size); + CHECK(rc == TILEDB_OK); + } else { + rc = tiledb_query_set_buffer( + ctx_, query, TILEDB_COORDS, buffer_coords, &buffer_coords_size); + CHECK(rc == TILEDB_OK); + } // Submit query rc = submit_query_wrapper(array_name, query); @@ -2497,12 +2617,20 @@ void DenseArrayFx::read_dense_array_with_coords_subarray_global( CHECK(sizeof(c_buffer_a2_off) <= buffer_a2_off_size); CHECK(sizeof(c_buffer_a2_val) - 1 <= buffer_a2_val_size); CHECK(sizeof(c_buffer_a3) <= buffer_a3_size); - CHECK(sizeof(c_buffer_coords) <= buffer_coords_size); CHECK(!memcmp(buffer_a1, c_buffer_a1, sizeof(c_buffer_a1))); CHECK(!memcmp(buffer_a2_off, c_buffer_a2_off, sizeof(c_buffer_a2_off))); CHECK(!memcmp(buffer_a2_val, c_buffer_a2_val, sizeof(c_buffer_a2_val) - 1)); CHECK(!memcmp(buffer_a3, c_buffer_a3, sizeof(c_buffer_a3))); - CHECK(!memcmp(buffer_coords, c_buffer_coords, sizeof(c_buffer_coords))); + + if (split_coords) { + CHECK(sizeof(c_buffer_d1) == buffer_d1_size); + CHECK(!memcmp(buffer_d1, c_buffer_d1, sizeof(c_buffer_d1))); + CHECK(sizeof(c_buffer_d2) == buffer_d2_size); + CHECK(!memcmp(buffer_d2, c_buffer_d2, sizeof(c_buffer_d2))); + } else { + CHECK(sizeof(c_buffer_coords) == buffer_coords_size); + CHECK(!memcmp(buffer_coords, c_buffer_coords, sizeof(c_buffer_coords))); + } // Close array rc = tiledb_array_close(ctx_, array); @@ -2516,10 +2644,12 @@ void DenseArrayFx::read_dense_array_with_coords_subarray_global( free(buffer_a2_val); free(buffer_a3); free(buffer_coords); + free(buffer_d1); + free(buffer_d2); } void DenseArrayFx::read_dense_array_with_coords_subarray_row( - const std::string& array_name) { + const std::string& array_name, bool split_coords) { // Correct buffers int c_buffer_a1[] = {9, 12, 13, 11, 14, 15}; uint64_t c_buffer_a2_off[] = {0, 2, 3, 5, 9, 12}; @@ -2537,6 +2667,8 @@ void DenseArrayFx::read_dense_array_with_coords_subarray_row( 15.1f, 15.2f}; uint64_t c_buffer_coords[] = {3, 2, 3, 3, 3, 4, 4, 2, 4, 3, 4, 4}; + uint64_t c_buffer_d1[] = {3, 3, 3, 4, 4, 4}; + uint64_t c_buffer_d2[] = {2, 3, 4, 2, 3, 4}; // Open array tiledb_array_t* array; @@ -2548,7 +2680,8 @@ void DenseArrayFx::read_dense_array_with_coords_subarray_row( // Compute max buffer sizes uint64_t subarray[] = {3, 4, 2, 4}; uint64_t buffer_a1_size, buffer_a2_off_size, buffer_a2_val_size, - buffer_a3_size, buffer_coords_size; + buffer_a3_size, buffer_coords_size = 0, buffer_d1_size = 0, + buffer_d2_size = 0; rc = tiledb_array_max_buffer_size( ctx_, array, "a1", subarray, &buffer_a1_size); CHECK(rc == TILEDB_OK); @@ -2558,9 +2691,18 @@ void DenseArrayFx::read_dense_array_with_coords_subarray_row( rc = tiledb_array_max_buffer_size( ctx_, array, "a3", subarray, &buffer_a3_size); CHECK(rc == TILEDB_OK); - rc = tiledb_array_max_buffer_size( - ctx_, array, TILEDB_COORDS, subarray, &buffer_coords_size); - CHECK(rc == TILEDB_OK); + if (split_coords) { + rc = tiledb_array_max_buffer_size( + ctx_, array, "d1", subarray, &buffer_d1_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_array_max_buffer_size( + ctx_, array, "d2", subarray, &buffer_d2_size); + CHECK(rc == TILEDB_OK); + } else { + rc = tiledb_array_max_buffer_size( + ctx_, array, TILEDB_COORDS, subarray, &buffer_coords_size); + CHECK(rc == TILEDB_OK); + } // Prepare cell buffers auto buffer_a1 = (int*)malloc(buffer_a1_size); @@ -2568,6 +2710,8 @@ void DenseArrayFx::read_dense_array_with_coords_subarray_row( auto buffer_a2_val = (char*)malloc(buffer_a2_val_size); auto buffer_a3 = (float*)malloc(buffer_a3_size); auto buffer_coords = (uint64_t*)malloc(buffer_coords_size); + auto buffer_d1 = (uint64_t*)malloc(buffer_d1_size); + auto buffer_d2 = (uint64_t*)malloc(buffer_d2_size); // Create query tiledb_query_t* query; @@ -2590,9 +2734,16 @@ void DenseArrayFx::read_dense_array_with_coords_subarray_row( CHECK(rc == TILEDB_OK); rc = tiledb_query_set_buffer(ctx_, query, "a3", buffer_a3, &buffer_a3_size); CHECK(rc == TILEDB_OK); - rc = tiledb_query_set_buffer( - ctx_, query, TILEDB_COORDS, buffer_coords, &buffer_coords_size); - CHECK(rc == TILEDB_OK); + if (split_coords) { + rc = tiledb_query_set_buffer(ctx_, query, "d1", buffer_d1, &buffer_d1_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "d2", buffer_d2, &buffer_d2_size); + CHECK(rc == TILEDB_OK); + } else { + rc = tiledb_query_set_buffer( + ctx_, query, TILEDB_COORDS, buffer_coords, &buffer_coords_size); + CHECK(rc == TILEDB_OK); + } // Submit query rc = submit_query_wrapper(array_name, query); @@ -2611,12 +2762,20 @@ void DenseArrayFx::read_dense_array_with_coords_subarray_row( CHECK(sizeof(c_buffer_a2_off) == buffer_a2_off_size); CHECK(sizeof(c_buffer_a2_val) - 1 == buffer_a2_val_size); CHECK(sizeof(c_buffer_a3) == buffer_a3_size); - CHECK(sizeof(c_buffer_coords) == buffer_coords_size); CHECK(!memcmp(buffer_a1, c_buffer_a1, sizeof(c_buffer_a1))); CHECK(!memcmp(buffer_a2_off, c_buffer_a2_off, sizeof(c_buffer_a2_off))); CHECK(!memcmp(buffer_a2_val, c_buffer_a2_val, sizeof(c_buffer_a2_val) - 1)); CHECK(!memcmp(buffer_a3, c_buffer_a3, sizeof(c_buffer_a3))); - CHECK(!memcmp(buffer_coords, c_buffer_coords, sizeof(c_buffer_coords))); + + if (split_coords) { + CHECK(sizeof(c_buffer_d1) == buffer_d1_size); + CHECK(!memcmp(buffer_d1, c_buffer_d1, sizeof(c_buffer_d1))); + CHECK(sizeof(c_buffer_d2) == buffer_d2_size); + CHECK(!memcmp(buffer_d2, c_buffer_d2, sizeof(c_buffer_d2))); + } else { + CHECK(sizeof(c_buffer_coords) == buffer_coords_size); + CHECK(!memcmp(buffer_coords, c_buffer_coords, sizeof(c_buffer_coords))); + } // Close array rc = tiledb_array_close(ctx_, array); @@ -2630,10 +2789,12 @@ void DenseArrayFx::read_dense_array_with_coords_subarray_row( free(buffer_a2_val); free(buffer_a3); free(buffer_coords); + free(buffer_d1); + free(buffer_d2); } void DenseArrayFx::read_dense_array_with_coords_subarray_col( - const std::string& array_name) { + const std::string& array_name, bool split_coords) { // Correct buffers int c_buffer_a1[] = {9, 11, 12, 14, 13, 15}; uint64_t c_buffer_a2_off[] = {0, 2, 6, 7, 10, 12}; @@ -2651,6 +2812,8 @@ void DenseArrayFx::read_dense_array_with_coords_subarray_col( 15.1f, 15.2f}; uint64_t c_buffer_coords[] = {3, 2, 4, 2, 3, 3, 4, 3, 3, 4, 4, 4}; + uint64_t c_buffer_d1[] = {3, 4, 3, 4, 3, 4}; + uint64_t c_buffer_d2[] = {2, 2, 3, 3, 4, 4}; // Open array tiledb_array_t* array; @@ -2662,7 +2825,8 @@ void DenseArrayFx::read_dense_array_with_coords_subarray_col( // Compute max buffer sizes uint64_t subarray[] = {3, 4, 2, 4}; uint64_t buffer_a1_size, buffer_a2_off_size, buffer_a2_val_size, - buffer_a3_size, buffer_coords_size; + buffer_a3_size, buffer_coords_size = 0, buffer_d1_size = 0, + buffer_d2_size = 0; rc = tiledb_array_max_buffer_size( ctx_, array, "a1", subarray, &buffer_a1_size); CHECK(rc == TILEDB_OK); @@ -2672,9 +2836,18 @@ void DenseArrayFx::read_dense_array_with_coords_subarray_col( rc = tiledb_array_max_buffer_size( ctx_, array, "a3", subarray, &buffer_a3_size); CHECK(rc == TILEDB_OK); - rc = tiledb_array_max_buffer_size( - ctx_, array, TILEDB_COORDS, subarray, &buffer_coords_size); - CHECK(rc == TILEDB_OK); + if (split_coords) { + rc = tiledb_array_max_buffer_size( + ctx_, array, "d1", subarray, &buffer_d1_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_array_max_buffer_size( + ctx_, array, "d2", subarray, &buffer_d2_size); + CHECK(rc == TILEDB_OK); + } else { + rc = tiledb_array_max_buffer_size( + ctx_, array, TILEDB_COORDS, subarray, &buffer_coords_size); + CHECK(rc == TILEDB_OK); + } // Prepare cell buffers auto buffer_a1 = (int*)malloc(buffer_a1_size); @@ -2682,6 +2855,8 @@ void DenseArrayFx::read_dense_array_with_coords_subarray_col( auto buffer_a2_val = (char*)malloc(buffer_a2_val_size); auto buffer_a3 = (float*)malloc(buffer_a3_size); auto buffer_coords = (uint64_t*)malloc(buffer_coords_size); + auto buffer_d1 = (uint64_t*)malloc(buffer_d1_size); + auto buffer_d2 = (uint64_t*)malloc(buffer_d2_size); // Create query tiledb_query_t* query; @@ -2704,9 +2879,16 @@ void DenseArrayFx::read_dense_array_with_coords_subarray_col( CHECK(rc == TILEDB_OK); rc = tiledb_query_set_buffer(ctx_, query, "a3", buffer_a3, &buffer_a3_size); CHECK(rc == TILEDB_OK); - rc = tiledb_query_set_buffer( - ctx_, query, TILEDB_COORDS, buffer_coords, &buffer_coords_size); - CHECK(rc == TILEDB_OK); + if (split_coords) { + rc = tiledb_query_set_buffer(ctx_, query, "d1", buffer_d1, &buffer_d1_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "d2", buffer_d2, &buffer_d2_size); + CHECK(rc == TILEDB_OK); + } else { + rc = tiledb_query_set_buffer( + ctx_, query, TILEDB_COORDS, buffer_coords, &buffer_coords_size); + CHECK(rc == TILEDB_OK); + } // Submit query rc = submit_query_wrapper(array_name, query); @@ -2725,12 +2907,20 @@ void DenseArrayFx::read_dense_array_with_coords_subarray_col( CHECK(sizeof(c_buffer_a2_off) == buffer_a2_off_size); CHECK(sizeof(c_buffer_a2_val) - 1 == buffer_a2_val_size); CHECK(sizeof(c_buffer_a3) == buffer_a3_size); - CHECK(sizeof(c_buffer_coords) == buffer_coords_size); CHECK(!memcmp(buffer_a1, c_buffer_a1, sizeof(c_buffer_a1))); CHECK(!memcmp(buffer_a2_off, c_buffer_a2_off, sizeof(c_buffer_a2_off))); CHECK(!memcmp(buffer_a2_val, c_buffer_a2_val, sizeof(c_buffer_a2_val) - 1)); CHECK(!memcmp(buffer_a3, c_buffer_a3, sizeof(c_buffer_a3))); - CHECK(!memcmp(buffer_coords, c_buffer_coords, sizeof(c_buffer_coords))); + + if (split_coords) { + CHECK(sizeof(c_buffer_d1) == buffer_d1_size); + CHECK(!memcmp(buffer_d1, c_buffer_d1, sizeof(c_buffer_d1))); + CHECK(sizeof(c_buffer_d2) == buffer_d2_size); + CHECK(!memcmp(buffer_d2, c_buffer_d2, sizeof(c_buffer_d2))); + } else { + CHECK(sizeof(c_buffer_coords) == buffer_coords_size); + CHECK(!memcmp(buffer_coords, c_buffer_coords, sizeof(c_buffer_coords))); + } // Close array rc = tiledb_array_close(ctx_, array); @@ -2744,6 +2934,8 @@ void DenseArrayFx::read_dense_array_with_coords_subarray_col( free(buffer_a2_val); free(buffer_a3); free(buffer_coords); + free(buffer_d1); + free(buffer_d2); } void DenseArrayFx::check_non_empty_domain(const std::string& path) { @@ -2918,23 +3110,57 @@ int DenseArrayFx::submit_query_wrapper( } } - // Repeat for coords + tiledb_attribute_free(&attr); + } + + // Repeat for coords + void* buff; + uint64_t* buff_size; + REQUIRE( + tiledb_query_get_buffer( + ctx_, new_query, TILEDB_COORDS, &buff, &buff_size) == TILEDB_OK); + if (buff_size != nullptr) { + buff = std::malloc(*buff_size); + to_free.push_back(buff); + REQUIRE( + tiledb_query_set_buffer( + ctx_, new_query, TILEDB_COORDS, buff, buff_size) == TILEDB_OK); + } + + // Repeat for split dimensions, if they are set we will set the buffer + uint32_t num_dimension; + tiledb_domain_t* domain; + REQUIRE(tiledb_array_schema_get_domain(ctx_, schema, &domain) == TILEDB_OK); + REQUIRE(tiledb_domain_get_ndim(ctx_, domain, &num_dimension) == TILEDB_OK); + + for (uint32_t i = 0; i < num_dimension; i++) { + tiledb_dimension_t* dim; + REQUIRE( + tiledb_domain_get_dimension_from_index(ctx_, domain, i, &dim) == + TILEDB_OK); + const char* name; + REQUIRE(tiledb_dimension_get_name(ctx_, dim, &name) == TILEDB_OK); + void* buff; uint64_t* buff_size; REQUIRE( - tiledb_query_get_buffer( - ctx_, new_query, TILEDB_COORDS, &buff, &buff_size) == TILEDB_OK); + tiledb_query_get_buffer(ctx_, new_query, name, &buff, &buff_size) == + TILEDB_OK); + // Buffers will always be null after deserialization on server side + REQUIRE(buff == nullptr); if (buff_size != nullptr) { + // Buffer size was set for the attribute; allocate one of the + // appropriate size. buff = std::malloc(*buff_size); to_free.push_back(buff); REQUIRE( - tiledb_query_set_buffer( - ctx_, new_query, TILEDB_COORDS, buff, buff_size) == TILEDB_OK); + tiledb_query_set_buffer(ctx_, new_query, name, buff, buff_size) == + TILEDB_OK); } - - tiledb_attribute_free(&attr); + tiledb_dimension_free(&dim); } + tiledb_domain_free(&domain); tiledb_array_schema_free(&schema); } @@ -3181,12 +3407,30 @@ TEST_CASE_METHOD( TEST_CASE_METHOD( DenseArrayFx, "C API: Test dense array, return coordinates", - "[capi], [dense], [return-coords]") { + "[capi][dense][return-coords]") { + bool split_coords = false; + SECTION("- No serialization") { serialize_query_ = false; + + SECTION("-- zipped coordinates") { + split_coords = false; + } + + SECTION("-- split coordinates") { + split_coords = true; + } } SECTION("- Serialization") { serialize_query_ = true; + + SECTION("-- zipped coordinates") { + split_coords = false; + } + + SECTION("-- split coordinates") { + split_coords = true; + } } std::string temp_dir; @@ -3199,15 +3443,16 @@ TEST_CASE_METHOD( } else { temp_dir = FILE_URI_PREFIX + FILE_TEMP_DIR; } + create_temp_dir(temp_dir); - check_return_coords(temp_dir); + check_return_coords(temp_dir, split_coords); remove_temp_dir(temp_dir); } TEST_CASE_METHOD( DenseArrayFx, "C API: Test dense array, non-empty domain", - "[capi], [dense], [dense-non-empty]") { + "[capi][dense][dense-non-empty]") { SECTION("- No serialization") { serialize_query_ = false; } @@ -3670,7 +3915,7 @@ TEST_CASE_METHOD( TEST_CASE_METHOD( DenseArrayFx, "C API: Test dense array, URI ending in a slash", - "[capi], [dense], [uri-ending-slash]") { + "[capi][dense][uri-ending-slash]") { SECTION("- No serialization") { serialize_query_ = false; } @@ -3683,14 +3928,14 @@ TEST_CASE_METHOD( create_temp_dir(temp_dir); create_dense_array(array_name); write_dense_array(array_name); - read_dense_array_with_coords_full_global(array_name); + read_dense_array_with_coords_full_global(array_name, true); remove_temp_dir(temp_dir); } TEST_CASE_METHOD( DenseArrayFx, "C API: Test dense array, missing attributes in writes", - "[capi], [dense], [dense-write-missing-attributes]") { + "[capi][dense][write-missing-attributes]") { SECTION("- No serialization") { serialize_query_ = false; } diff --git a/test/src/unit-capi-query_2.cc b/test/src/unit-capi-query_2.cc index 1a17fc43d76..dbe0d6497a7 100644 --- a/test/src/unit-capi-query_2.cc +++ b/test/src/unit-capi-query_2.cc @@ -1555,6 +1555,18 @@ TEST_CASE_METHOD( rc = tiledb_query_get_est_result_size(ctx_, query, TILEDB_COORDS, &size); CHECK(rc == TILEDB_OK); CHECK(size == 6 * 2 * sizeof(uint64_t)); + rc = + tiledb_query_get_est_result_size_var(ctx_, query, "d1", &size, &size); + CHECK(rc == TILEDB_ERR); + rc = + tiledb_query_get_est_result_size_var(ctx_, query, "d2", &size, &size); + CHECK(rc == TILEDB_ERR); + rc = tiledb_query_get_est_result_size(ctx_, query, "d1", &size); + CHECK(rc == TILEDB_OK); + CHECK(size == 6 * sizeof(uint64_t)); + rc = tiledb_query_get_est_result_size(ctx_, query, "d2", &size); + CHECK(rc == TILEDB_OK); + CHECK(size == 6 * sizeof(uint64_t)); rc = tiledb_query_get_est_result_size(ctx_, query, "a", &size); CHECK(rc == TILEDB_OK); CHECK(size == 6 * sizeof(int)); @@ -1574,6 +1586,12 @@ TEST_CASE_METHOD( rc = tiledb_query_get_est_result_size(ctx_, query, TILEDB_COORDS, &size); CHECK(rc == TILEDB_OK); CHECK(size == 0); + rc = tiledb_query_get_est_result_size(ctx_, query, "d1", &size); + CHECK(rc == TILEDB_OK); + CHECK(size == 0); + rc = tiledb_query_get_est_result_size(ctx_, query, "d2", &size); + CHECK(rc == TILEDB_OK); + CHECK(size == 0); rc = tiledb_query_get_est_result_size(ctx_, query, "a", &size); CHECK(rc == TILEDB_OK); CHECK(size == 0); @@ -1600,6 +1618,12 @@ TEST_CASE_METHOD( rc = tiledb_query_get_est_result_size(ctx_, query, TILEDB_COORDS, &size); CHECK(rc == TILEDB_OK); CHECK(size == 0); + rc = tiledb_query_get_est_result_size(ctx_, query, "d1", &size); + CHECK(rc == TILEDB_OK); + CHECK(size == 0); + rc = tiledb_query_get_est_result_size(ctx_, query, "d2", &size); + CHECK(rc == TILEDB_OK); + CHECK(size == 0); rc = tiledb_query_get_est_result_size(ctx_, query, "a", &size); CHECK(rc == TILEDB_OK); CHECK(size == 0); @@ -1622,6 +1646,18 @@ TEST_CASE_METHOD( (1.0 / 2) * (1.0 / 4) * 4 * sizeof(uint64_t) + 1.0 * (2.0 / 7) * 4 * sizeof(uint64_t)); CHECK(size == coords_size); + rc = tiledb_query_get_est_result_size(ctx_, query, "d1", &size); + CHECK(rc == TILEDB_OK); + auto d1_size = (uint64_t)ceil( + (1.0 / 2) * (1.0 / 4) * 2 * sizeof(uint64_t) + + 1.0 * (2.0 / 7) * 2 * sizeof(uint64_t)); + CHECK(size == d1_size); + rc = tiledb_query_get_est_result_size(ctx_, query, "d2", &size); + CHECK(rc == TILEDB_OK); + auto d2_size = (uint64_t)ceil( + (1.0 / 2) * (1.0 / 4) * 2 * sizeof(uint64_t) + + 1.0 * (2.0 / 7) * 2 * sizeof(uint64_t)); + CHECK(size == d2_size); rc = tiledb_query_get_est_result_size(ctx_, query, "a", &size); CHECK(rc == TILEDB_OK); auto a_size = (uint64_t)ceil( @@ -1659,6 +1695,16 @@ TEST_CASE_METHOD( (1.0 / 4) * 4 * sizeof(uint64_t) + (3.0 / 7) * 4 * sizeof(uint64_t)); CHECK(rc == TILEDB_OK); CHECK(size == coords_size); + rc = tiledb_query_get_est_result_size(ctx_, query, "d1", &size); + auto d1_size = (uint64_t)ceil( + (1.0 / 4) * 2 * sizeof(uint64_t) + (3.0 / 7) * 2 * sizeof(uint64_t)); + CHECK(rc == TILEDB_OK); + CHECK(size == d1_size); + rc = tiledb_query_get_est_result_size(ctx_, query, "d2", &size); + auto d2_size = (uint64_t)ceil( + (1.0 / 4) * 2 * sizeof(uint64_t) + (3.0 / 7) * 2 * sizeof(uint64_t)); + CHECK(rc == TILEDB_OK); + CHECK(size == d2_size); rc = tiledb_query_get_est_result_size(ctx_, query, "a", &size); CHECK(rc == TILEDB_OK); auto a_size = (uint64_t)ceil( @@ -1700,6 +1746,12 @@ TEST_CASE_METHOD( rc = tiledb_query_get_est_result_size(ctx_, query, TILEDB_COORDS, &size); CHECK(rc == TILEDB_OK); CHECK(size == 6 * 2 * sizeof(uint64_t)); + rc = tiledb_query_get_est_result_size(ctx_, query, "d1", &size); + CHECK(rc == TILEDB_OK); + CHECK(size == 6 * sizeof(uint64_t)); + rc = tiledb_query_get_est_result_size(ctx_, query, "d2", &size); + CHECK(rc == TILEDB_OK); + CHECK(size == 6 * sizeof(uint64_t)); rc = tiledb_query_get_est_result_size(ctx_, query, "a", &size); CHECK(rc == TILEDB_OK); CHECK(size == 6 * sizeof(int)); @@ -1719,6 +1771,12 @@ TEST_CASE_METHOD( rc = tiledb_query_get_est_result_size(ctx_, query, TILEDB_COORDS, &size); CHECK(rc == TILEDB_OK); CHECK(size == 0); + rc = tiledb_query_get_est_result_size(ctx_, query, "d1", &size); + CHECK(rc == TILEDB_OK); + CHECK(size == 0); + rc = tiledb_query_get_est_result_size(ctx_, query, "d2", &size); + CHECK(rc == TILEDB_OK); + CHECK(size == 0); rc = tiledb_query_get_est_result_size(ctx_, query, "a", &size); CHECK(rc == TILEDB_OK); CHECK(size == 0); @@ -1745,6 +1803,12 @@ TEST_CASE_METHOD( rc = tiledb_query_get_est_result_size(ctx_, query, TILEDB_COORDS, &size); CHECK(rc == TILEDB_OK); CHECK(size == 0); + rc = tiledb_query_get_est_result_size(ctx_, query, "d1", &size); + CHECK(rc == TILEDB_OK); + CHECK(size == 0); + rc = tiledb_query_get_est_result_size(ctx_, query, "d2", &size); + CHECK(rc == TILEDB_OK); + CHECK(size == 0); rc = tiledb_query_get_est_result_size(ctx_, query, "a", &size); CHECK(rc == TILEDB_OK); CHECK(size == 0); @@ -1765,6 +1829,14 @@ TEST_CASE_METHOD( CHECK(rc == TILEDB_OK); auto coords_size = (uint64_t)ceil(1.0 * (1.0 / 3) * 4 * sizeof(uint64_t)); CHECK(size == coords_size); + rc = tiledb_query_get_est_result_size(ctx_, query, "d1", &size); + CHECK(rc == TILEDB_OK); + auto d1_size = (uint64_t)ceil(1.0 * (1.0 / 3) * 2 * sizeof(uint64_t)); + CHECK(size == d1_size); + rc = tiledb_query_get_est_result_size(ctx_, query, "d2", &size); + CHECK(rc == TILEDB_OK); + auto d2_size = (uint64_t)ceil(1.0 * (1.0 / 3) * 2 * sizeof(uint64_t)); + CHECK(size == d2_size); rc = tiledb_query_get_est_result_size(ctx_, query, "a", &size); CHECK(rc == TILEDB_OK); auto a_size = (uint64_t)ceil(1.0 * (1.0 / 3) * 2 * sizeof(int)); @@ -1796,6 +1868,16 @@ TEST_CASE_METHOD( (6.0 / 8) * 4 * sizeof(uint64_t) + (2.0 / 6) * 4 * sizeof(uint64_t)); CHECK(rc == TILEDB_OK); CHECK(size == coords_size); + rc = tiledb_query_get_est_result_size(ctx_, query, "d1", &size); + auto d1_size = (uint64_t)ceil( + (6.0 / 8) * 2 * sizeof(uint64_t) + (2.0 / 6) * 2 * sizeof(uint64_t)); + CHECK(rc == TILEDB_OK); + CHECK(size == d1_size); + rc = tiledb_query_get_est_result_size(ctx_, query, "d2", &size); + auto d2_size = (uint64_t)ceil( + (6.0 / 8) * 2 * sizeof(uint64_t) + (2.0 / 6) * 2 * sizeof(uint64_t)); + CHECK(rc == TILEDB_OK); + CHECK(size == d2_size); rc = tiledb_query_get_est_result_size(ctx_, query, "a", &size); CHECK(rc == TILEDB_OK); auto a_size = (uint64_t)ceil( @@ -1921,6 +2003,12 @@ TEST_CASE_METHOD( rc = tiledb_query_get_est_result_size(ctx_, query, "a", &size); CHECK(rc == TILEDB_OK); CHECK(size == 4 * sizeof(int)); + rc = tiledb_query_get_est_result_size(ctx_, query, "d2", &size); + CHECK(rc == TILEDB_OK); + CHECK(size == 4 * sizeof(uint64_t)); + rc = tiledb_query_get_est_result_size(ctx_, query, TILEDB_COORDS, &size); + CHECK(rc == TILEDB_OK); + CHECK(size == 4 * 2 * sizeof(uint64_t)); rc = tiledb_query_get_est_result_size_var( ctx_, query, "b", &size_off, &size_val); CHECK(rc == TILEDB_OK); diff --git a/test/src/unit-capi-sparse_array.cc b/test/src/unit-capi-sparse_array.cc index 43593ff2c68..8bc8f2ed312 100644 --- a/test/src/unit-capi-sparse_array.cc +++ b/test/src/unit-capi-sparse_array.cc @@ -2612,16 +2612,6 @@ TEST_CASE_METHOD( check_invalid_offsets(array_name); } -TEST_CASE_METHOD( - SparseArrayFx, - "C API: Test sparse array, anonymous attribute", - "[capi][sparse][anon-attr]") { - ATTR_NAME = ""; - std::string array_name = FILE_URI_PREFIX + FILE_TEMP_DIR + "anon_attr"; - check_sorted_reads( - array_name, TILEDB_FILTER_NONE, TILEDB_ROW_MAJOR, TILEDB_ROW_MAJOR); -} - TEST_CASE_METHOD( SparseArrayFx, "C API: Test sparse array, no results", @@ -6289,7 +6279,7 @@ TEST_CASE_METHOD( uint64_t buffer_d2[] = {1, 2, 4, 3, 1, 3}; uint64_t buffer_d2_size = sizeof(buffer_d2); - // Open array + // Open array for writing tiledb_array_t* array; int rc = tiledb_array_alloc(ctx_, array_name.c_str(), &array); CHECK(rc == TILEDB_OK); @@ -6376,6 +6366,55 @@ TEST_CASE_METHOD( CHECK(rc == TILEDB_OK); rc = tiledb_query_set_buffer(ctx_, query, "d1", buffer_d1, &buffer_d1_size); CHECK(rc == TILEDB_ERR); + tiledb_query_free(&query); + + // Set separate coordinate buffers and then zipped coordinates + rc = tiledb_query_alloc(ctx_, array, TILEDB_WRITE, &query); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_layout(ctx_, query, TILEDB_UNORDERED); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "d1", buffer_d1, &buffer_d1_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer( + ctx_, query, TILEDB_COORDS, buffer_coords, &buffer_coords_size); + CHECK(rc == TILEDB_ERR); + + // Close array + rc = tiledb_array_close(ctx_, array); + CHECK(rc == TILEDB_OK); + + // Clean up + tiledb_array_free(&array); + tiledb_query_free(&query); + + // Open array for reading + rc = tiledb_array_alloc(ctx_, array_name.c_str(), &array); + CHECK(rc == TILEDB_OK); + rc = tiledb_array_open(ctx_, array, TILEDB_READ); + CHECK(rc == TILEDB_OK); + + // Set zipped coordinates first and the separate coordinate buffers + rc = tiledb_query_alloc(ctx_, array, TILEDB_READ, &query); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_layout(ctx_, query, TILEDB_UNORDERED); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer( + ctx_, query, TILEDB_COORDS, buffer_coords, &buffer_coords_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "d1", buffer_d1, &buffer_d1_size); + CHECK(rc == TILEDB_ERR); + tiledb_query_free(&query); + + // Set separate coordinate buffers and then zipped coordinates + rc = tiledb_query_alloc(ctx_, array, TILEDB_READ, &query); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_layout(ctx_, query, TILEDB_UNORDERED); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "d1", buffer_d1, &buffer_d1_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer( + ctx_, query, TILEDB_COORDS, buffer_coords, &buffer_coords_size); + CHECK(rc == TILEDB_ERR); // Close array rc = tiledb_array_close(ctx_, array); @@ -6387,3 +6426,377 @@ TEST_CASE_METHOD( remove_array(array_name); } + +TEST_CASE_METHOD( + SparseArrayFx, + "C API: Test sparse array, split coordinate buffers for reads", + "[capi][sparse][split-coords][read]") { + std::string array_name = + FILE_URI_PREFIX + FILE_TEMP_DIR + "sparse_split_coords_read"; + create_sparse_array(array_name); + + // ---- WRITE ---- + + // Prepare cell buffers + int buffer_a1[] = {0, 1, 2, 3, 4, 5, 6, 7}; + uint64_t buffer_a2[] = {0, 1, 3, 6, 10, 11, 13, 16}; + char buffer_var_a2[] = "abbcccddddeffggghhhh"; + float buffer_a3[] = {0.1f, + 0.2f, + 1.1f, + 1.2f, + 2.1f, + 2.2f, + 3.1f, + 3.2f, + 4.1f, + 4.2f, + 5.1f, + 5.2f, + 6.1f, + 6.2f, + 7.1f, + 7.2f}; + uint64_t buffer_a1_size = sizeof(buffer_a1); + uint64_t buffer_a2_size = sizeof(buffer_a2); + // No need to store the last '\0' character + uint64_t buffer_var_a2_size = sizeof(buffer_var_a2) - 1; + uint64_t buffer_a3_size = sizeof(buffer_a3); + uint64_t buffer_d1[] = {1, 1, 1, 2, 3, 3, 3, 4}; + uint64_t buffer_d1_size = sizeof(buffer_d1); + uint64_t buffer_d2[] = {1, 2, 4, 3, 1, 3, 4, 2}; + uint64_t buffer_d2_size = sizeof(buffer_d2); + + // Open array + tiledb_array_t* array; + int rc = tiledb_array_alloc(ctx_, array_name.c_str(), &array); + CHECK(rc == TILEDB_OK); + rc = tiledb_array_open(ctx_, array, TILEDB_WRITE); + CHECK(rc == TILEDB_OK); + + // Create query + tiledb_query_t* query; + rc = tiledb_query_alloc(ctx_, array, TILEDB_WRITE, &query); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_layout(ctx_, query, TILEDB_UNORDERED); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "a1", buffer_a1, &buffer_a1_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer_var( + ctx_, + query, + "a2", + (uint64_t*)buffer_a2, + &buffer_a2_size, + buffer_var_a2, + &buffer_var_a2_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "a3", buffer_a3, &buffer_a3_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "d1", buffer_d1, &buffer_d1_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "d2", buffer_d2, &buffer_d2_size); + CHECK(rc == TILEDB_OK); + + // Submit query + rc = tiledb_query_submit(ctx_, query); + CHECK(rc == TILEDB_OK); + + // Finalize query + rc = tiledb_query_finalize(ctx_, query); + CHECK(rc == TILEDB_OK); + + // Close array + rc = tiledb_array_close(ctx_, array); + CHECK(rc == TILEDB_OK); + + // Clean up + tiledb_array_free(&array); + tiledb_query_free(&query); + + // ---- READ ---- + + // Create array + rc = tiledb_array_alloc(ctx_, array_name.c_str(), &array); + CHECK(rc == TILEDB_OK); + + // Open the array + rc = tiledb_array_open(ctx_, array, TILEDB_READ); + CHECK(rc == TILEDB_OK); + + // Create buffers + int b_a1[30]; + uint64_t b_a1_size = sizeof(b_a1); + uint64_t b_a2_off[30]; + uint64_t b_a2_off_size = sizeof(b_a2_off); + char b_a2_val[30]; + uint64_t b_a2_val_size = sizeof(b_a2_val); + float b_a3[30]; + uint64_t b_a3_size = sizeof(b_a3); + uint64_t b_d1[30]; + uint64_t b_d1_size = sizeof(b_d1); + uint64_t b_d2[30]; + uint64_t b_d2_size = sizeof(b_d2); + + // Create query + rc = tiledb_query_alloc(ctx_, array, TILEDB_READ, &query); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "a1", b_a1, &b_a1_size); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_query_set_buffer_var( + ctx_, query, "a2", b_a2_off, &b_a2_off_size, b_a2_val, &b_a2_val_size); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "a3", b_a3, &b_a3_size); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "d1", b_d1, &b_d1_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "d2", b_d2, &b_d2_size); + CHECK(rc == TILEDB_OK); + + // Set a subarray + uint64_t subarray[] = {1, 4, 1, 4}; + rc = tiledb_query_set_layout(ctx_, query, TILEDB_ROW_MAJOR); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_query_set_subarray(ctx_, query, subarray); + REQUIRE(rc == TILEDB_OK); + + // Submit query + rc = tiledb_query_submit(ctx_, query); + REQUIRE(rc == TILEDB_OK); + + tiledb_query_status_t status; + rc = tiledb_query_get_status(ctx_, query, &status); + REQUIRE(rc == TILEDB_OK); + REQUIRE(status == TILEDB_COMPLETED); + + // Check buffer sizes + CHECK(b_a1_size == buffer_a1_size); + CHECK(b_a2_off_size == buffer_a2_size); + CHECK(b_a2_val_size == buffer_var_a2_size); + CHECK(b_a3_size == buffer_a3_size); + CHECK(b_d1_size == buffer_d1_size); + CHECK(b_d2_size == buffer_d2_size); + + // Check buffer data + int c_b_a1[] = {0, 1, 2, 3, 4, 5, 6, 7}; + uint64_t c_b_a2_off[] = {0, 1, 3, 6, 10, 11, 13, 16}; + char c_b_a2_val[] = "abbcccddddeffggghhhh"; + float c_b_a3[] = {0.1f, + 0.2f, + 1.1f, + 1.2f, + 2.1f, + 2.2f, + 3.1f, + 3.2f, + 4.1f, + 4.2f, + 5.1f, + 5.2f, + 6.1f, + 6.2f, + 7.1f, + 7.2f}; + uint64_t c_b_d1[] = {1, 1, 1, 2, 3, 3, 3, 4}; + uint64_t c_b_d2[] = {1, 2, 4, 3, 1, 3, 4, 2}; + CHECK(!memcmp(c_b_a1, b_a1, b_a1_size)); + CHECK(!memcmp(c_b_a2_off, b_a2_off, b_a2_off_size)); + CHECK(!memcmp(c_b_a2_val, b_a2_val, b_a2_val_size)); + CHECK(!memcmp(c_b_a3, b_a3, b_a3_size)); + CHECK(!memcmp(c_b_d1, b_d1, b_d1_size)); + CHECK(!memcmp(c_b_d2, b_d2, b_d2_size)); + + // Close array + rc = tiledb_array_close(ctx_, array); + CHECK(rc == TILEDB_OK); + + // Clean up + tiledb_array_free(&array); + tiledb_query_free(&query); + + remove_array(array_name); +} + +TEST_CASE_METHOD( + SparseArrayFx, + "C API: Test sparse array, split coordinate buffers for reads, subset of " + "dimensions", + "[capi][sparse][split-coords][read][subset]") { + std::string array_name = + FILE_URI_PREFIX + FILE_TEMP_DIR + "sparse_split_coords_read_subset"; + create_sparse_array(array_name); + + // ---- WRITE ---- + + // Prepare cell buffers + int buffer_a1[] = {0, 1, 2, 3, 4, 5, 6, 7}; + uint64_t buffer_a2[] = {0, 1, 3, 6, 10, 11, 13, 16}; + char buffer_var_a2[] = "abbcccddddeffggghhhh"; + float buffer_a3[] = {0.1f, + 0.2f, + 1.1f, + 1.2f, + 2.1f, + 2.2f, + 3.1f, + 3.2f, + 4.1f, + 4.2f, + 5.1f, + 5.2f, + 6.1f, + 6.2f, + 7.1f, + 7.2f}; + uint64_t buffer_a1_size = sizeof(buffer_a1); + uint64_t buffer_a2_size = sizeof(buffer_a2); + // No need to store the last '\0' character + uint64_t buffer_var_a2_size = sizeof(buffer_var_a2) - 1; + uint64_t buffer_a3_size = sizeof(buffer_a3); + uint64_t buffer_d1[] = {1, 1, 1, 2, 3, 3, 3, 4}; + uint64_t buffer_d1_size = sizeof(buffer_d1); + uint64_t buffer_d2[] = {1, 2, 4, 3, 1, 3, 4, 2}; + uint64_t buffer_d2_size = sizeof(buffer_d2); + + // Open array + tiledb_array_t* array; + int rc = tiledb_array_alloc(ctx_, array_name.c_str(), &array); + CHECK(rc == TILEDB_OK); + rc = tiledb_array_open(ctx_, array, TILEDB_WRITE); + CHECK(rc == TILEDB_OK); + + // Create query + tiledb_query_t* query; + rc = tiledb_query_alloc(ctx_, array, TILEDB_WRITE, &query); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_layout(ctx_, query, TILEDB_UNORDERED); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "a1", buffer_a1, &buffer_a1_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer_var( + ctx_, + query, + "a2", + (uint64_t*)buffer_a2, + &buffer_a2_size, + buffer_var_a2, + &buffer_var_a2_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "a3", buffer_a3, &buffer_a3_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "d1", buffer_d1, &buffer_d1_size); + CHECK(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "d2", buffer_d2, &buffer_d2_size); + CHECK(rc == TILEDB_OK); + + // Submit query + rc = tiledb_query_submit(ctx_, query); + CHECK(rc == TILEDB_OK); + + // Finalize query + rc = tiledb_query_finalize(ctx_, query); + CHECK(rc == TILEDB_OK); + + // Close array + rc = tiledb_array_close(ctx_, array); + CHECK(rc == TILEDB_OK); + + // Clean up + tiledb_array_free(&array); + tiledb_query_free(&query); + + // ---- READ ---- + + // Create array + rc = tiledb_array_alloc(ctx_, array_name.c_str(), &array); + CHECK(rc == TILEDB_OK); + + // Open the array + rc = tiledb_array_open(ctx_, array, TILEDB_READ); + CHECK(rc == TILEDB_OK); + + // Create buffers + int b_a1[30]; + uint64_t b_a1_size = sizeof(b_a1); + uint64_t b_a2_off[30]; + uint64_t b_a2_off_size = sizeof(b_a2_off); + char b_a2_val[30]; + uint64_t b_a2_val_size = sizeof(b_a2_val); + float b_a3[30]; + uint64_t b_a3_size = sizeof(b_a3); + uint64_t b_d1[30]; + uint64_t b_d1_size = sizeof(b_d1); + + // Create query + rc = tiledb_query_alloc(ctx_, array, TILEDB_READ, &query); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "a1", b_a1, &b_a1_size); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_query_set_buffer_var( + ctx_, query, "a2", b_a2_off, &b_a2_off_size, b_a2_val, &b_a2_val_size); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "a3", b_a3, &b_a3_size); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_query_set_buffer(ctx_, query, "d1", b_d1, &b_d1_size); + CHECK(rc == TILEDB_OK); + + // Set a subarray + uint64_t subarray[] = {1, 4, 1, 4}; + rc = tiledb_query_set_layout(ctx_, query, TILEDB_ROW_MAJOR); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_query_set_subarray(ctx_, query, subarray); + REQUIRE(rc == TILEDB_OK); + + // Submit query + rc = tiledb_query_submit(ctx_, query); + REQUIRE(rc == TILEDB_OK); + + tiledb_query_status_t status; + rc = tiledb_query_get_status(ctx_, query, &status); + REQUIRE(rc == TILEDB_OK); + REQUIRE(status == TILEDB_COMPLETED); + + // Check buffer sizes + CHECK(b_a1_size == buffer_a1_size); + CHECK(b_a2_off_size == buffer_a2_size); + CHECK(b_a2_val_size == buffer_var_a2_size); + CHECK(b_a3_size == buffer_a3_size); + CHECK(b_d1_size == buffer_d1_size); + + // Check buffer data + int c_b_a1[] = {0, 1, 2, 3, 4, 5, 6, 7}; + uint64_t c_b_a2_off[] = {0, 1, 3, 6, 10, 11, 13, 16}; + char c_b_a2_val[] = "abbcccddddeffggghhhh"; + float c_b_a3[] = {0.1f, + 0.2f, + 1.1f, + 1.2f, + 2.1f, + 2.2f, + 3.1f, + 3.2f, + 4.1f, + 4.2f, + 5.1f, + 5.2f, + 6.1f, + 6.2f, + 7.1f, + 7.2f}; + uint64_t c_b_d1[] = {1, 1, 1, 2, 3, 3, 3, 4}; + CHECK(!memcmp(c_b_a1, b_a1, b_a1_size)); + CHECK(!memcmp(c_b_a2_off, b_a2_off, b_a2_off_size)); + CHECK(!memcmp(c_b_a2_val, b_a2_val, b_a2_val_size)); + CHECK(!memcmp(c_b_a3, b_a3, b_a3_size)); + CHECK(!memcmp(c_b_d1, b_d1, b_d1_size)); + + // Close array + rc = tiledb_array_close(ctx_, array); + CHECK(rc == TILEDB_OK); + + // Clean up + tiledb_array_free(&array); + tiledb_query_free(&query); + + remove_array(array_name); +} \ No newline at end of file diff --git a/test/src/unit-cppapi-array.cc b/test/src/unit-cppapi-array.cc index aa019b9bc95..f3d4ff476d0 100644 --- a/test/src/unit-cppapi-array.cc +++ b/test/src/unit-cppapi-array.cc @@ -488,23 +488,23 @@ TEST_CASE( vfs.remove_dir(array_name); Domain domain(ctx); - domain.add_dimension(Dimension::create(ctx, "", {{0, 11}}, 12)); + domain.add_dimension(Dimension::create(ctx, "d", {{0, 11}}, 12)); ArraySchema schema(ctx, TILEDB_DENSE); schema.set_domain(domain).set_order({{TILEDB_ROW_MAJOR, TILEDB_ROW_MAJOR}}); - schema.add_attribute(Attribute::create(ctx, "")); + schema.add_attribute(Attribute::create(ctx, "a")); tiledb::Array::create(array_name, schema); auto array_w = tiledb::Array(ctx, array_name, TILEDB_WRITE); auto query_w = tiledb::Query(ctx, array_w, TILEDB_WRITE); std::vector data = {0, 1}; - query_w.set_buffer("", data).set_subarray({0, 1}).submit(); - query_w.set_buffer("", data).set_subarray({2, 3}).submit(); + query_w.set_buffer("a", data).set_subarray({0, 1}).submit(); + query_w.set_buffer("a", data).set_subarray({2, 3}).submit(); // this fragment write caused crash during consolidation // https://github.com/TileDB-Inc/TileDB/issues/1205 // https://github.com/TileDB-Inc/TileDB/issues/1212 - query_w.set_buffer("", data).set_subarray({4, 5}).submit(); + query_w.set_buffer("a", data).set_subarray({4, 5}).submit(); query_w.finalize(); array_w.close(); Array::consolidate(ctx, array_name); diff --git a/tiledb/sm/array/array.cc b/tiledb/sm/array/array.cc index 4603599e2a8..5375f734bc4 100644 --- a/tiledb/sm/array/array.cc +++ b/tiledb/sm/array/array.cc @@ -63,15 +63,10 @@ Array::Array(const URI& array_uri, StorageManager* storage_manager) is_open_ = false; array_schema_ = nullptr; timestamp_ = 0; - last_max_buffer_sizes_subarray_ = nullptr; remote_ = array_uri.is_tiledb(); metadata_loaded_ = false; }; -Array::~Array() { - std::free(last_max_buffer_sizes_subarray_); -} - /* ********************************* */ /* API */ /* ********************************* */ @@ -332,9 +327,8 @@ Status Array::get_query_type(QueryType* query_type) const { } Status Array::get_max_buffer_size( - const char* attribute, const void* subarray, uint64_t* buffer_size) { + const char* name, const void* subarray, uint64_t* buffer_size) { std::unique_lock lck(mtx_); - // Check if array is open if (!is_open_) return LOG_STATUS( @@ -346,39 +340,39 @@ Status Array::get_max_buffer_size( Status::ArrayError("Cannot get max buffer size; " "Array was not opened in read mode")); - // Check if attribute is null - if (attribute == nullptr) - return LOG_STATUS( - Status::ArrayError("Cannot get max buffer size; Attribute is null")); - - RETURN_NOT_OK(compute_max_buffer_sizes(subarray)); + // Check if name is null + if (name == nullptr) + return LOG_STATUS(Status::ArrayError( + "Cannot get max buffer size; Attribute/Dimension name is null")); - // Normalize attribute name - std::string norm_attribute; - RETURN_NOT_OK( - ArraySchema::attribute_name_normalized(attribute, &norm_attribute)); + // Check if name is attribute or dimension + bool is_dim = array_schema_->is_dim(name); + bool is_attr = array_schema_->is_attr(name); - // Check if attribute exists - auto it = last_max_buffer_sizes_.find(norm_attribute); - if (it == last_max_buffer_sizes_.end()) + // Check if attribute/dimension exists + if (name != constants::coords && !is_dim && !is_attr) return LOG_STATUS(Status::ArrayError( - std::string("Cannot get max buffer size; Attribute '") + - norm_attribute + "' does not exist")); + std::string("Cannot get max buffer size; Attribute/Dimension '") + + name + "' does not exist")); - // Check if attribute is fixed sized - if (array_schema_->var_size(norm_attribute)) + // Check if attribute/dimension is fixed sized + if (array_schema_->var_size(name)) return LOG_STATUS(Status::ArrayError( - std::string("Cannot get max buffer size; Attribute '") + - norm_attribute + "' is var-sized")); + std::string("Cannot get max buffer size; Attribute/Dimension '") + + name + "' is var-sized")); + + RETURN_NOT_OK(compute_max_buffer_sizes(subarray)); // Retrieve buffer size + auto it = last_max_buffer_sizes_.find(name); + assert(it != last_max_buffer_sizes_.end()); *buffer_size = it->second.first; return Status::Ok(); } Status Array::get_max_buffer_size( - const char* attribute, + const char* name, const void* subarray, uint64_t* buffer_off_size, uint64_t* buffer_val_size) { @@ -395,30 +389,25 @@ Status Array::get_max_buffer_size( Status::ArrayError("Cannot get max buffer size; " "Array was not opened in read mode")); - // Check if attribute is null - if (attribute == nullptr) - return LOG_STATUS( - Status::ArrayError("Cannot get max buffer size; Attribute is null")); + // Check if name is null + if (name == nullptr) + return LOG_STATUS(Status::ArrayError( + "Cannot get max buffer size; Attribute/Dimension name is null")); RETURN_NOT_OK(compute_max_buffer_sizes(subarray)); - // Normalize attribute name - std::string norm_attribute; - RETURN_NOT_OK( - ArraySchema::attribute_name_normalized(attribute, &norm_attribute)); - - // Check if attribute exists - auto it = last_max_buffer_sizes_.find(norm_attribute); + // Check if attribute/dimension exists + auto it = last_max_buffer_sizes_.find(name); if (it == last_max_buffer_sizes_.end()) return LOG_STATUS(Status::ArrayError( - std::string("Cannot get max buffer size; Attribute '") + - norm_attribute + "' does not exist")); + std::string("Cannot get max buffer size; Attribute/Dimension '") + + name + "' does not exist")); - // Check if attribute is var-sized - if (!array_schema_->var_size(norm_attribute)) + // Check if attribute/dimension is var-sized + if (!array_schema_->var_size(name)) return LOG_STATUS(Status::ArrayError( - std::string("Cannot get max buffer size; Attribute '") + - norm_attribute + "' is fixed-sized")); + std::string("Cannot get max buffer size; Attribute/Dimension '") + + name + "' is fixed-sized")); // Retrieve buffer sizes *buffer_off_size = it->second.first; @@ -666,8 +655,8 @@ Status Array::metadata(Metadata** metadata) { void Array::clear_last_max_buffer_sizes() { last_max_buffer_sizes_.clear(); - std::free(last_max_buffer_sizes_subarray_); - last_max_buffer_sizes_subarray_ = nullptr; + last_max_buffer_sizes_subarray_.clear(); + last_max_buffer_sizes_subarray_.shrink_to_fit(); } Status Array::compute_max_buffer_sizes(const void* subarray) { @@ -681,17 +670,12 @@ Status Array::compute_max_buffer_sizes(const void* subarray) { auto dim_num = array_schema_->dim_num(); auto coord_size = array_schema_->domain()->dimension(0)->coord_size(); auto subarray_size = 2 * dim_num * coord_size; - if (last_max_buffer_sizes_subarray_ == nullptr) { - last_max_buffer_sizes_subarray_ = std::malloc(subarray_size); - if (last_max_buffer_sizes_subarray_ == nullptr) - return LOG_STATUS(Status::ArrayError( - "Cannot compute max buffer sizes; Subarray allocation failed")); - } + last_max_buffer_sizes_subarray_.resize(subarray_size); // Compute max buffer sizes if (last_max_buffer_sizes_.empty() || - std::memcmp(last_max_buffer_sizes_subarray_, subarray, subarray_size) != - 0) { + std::memcmp( + &last_max_buffer_sizes_subarray_[0], subarray, subarray_size) != 0) { last_max_buffer_sizes_.clear(); // Get all attributes and coordinates @@ -702,11 +686,15 @@ Status Array::compute_max_buffer_sizes(const void* subarray) { std::pair(0, 0); last_max_buffer_sizes_[constants::coords] = std::pair(0, 0); + for (unsigned d = 0; d < dim_num; ++d) + last_max_buffer_sizes_[array_schema_->domain()->dimension(d)->name()] = + std::pair(0, 0); + RETURN_NOT_OK(compute_max_buffer_sizes(subarray, &last_max_buffer_sizes_)); } // Update subarray - std::memcpy(last_max_buffer_sizes_subarray_, subarray, subarray_size); + std::memcpy(&last_max_buffer_sizes_subarray_[0], subarray, subarray_size); return Status::Ok(); } diff --git a/tiledb/sm/array/array.h b/tiledb/sm/array/array.h index 07ac6feccab..4c56106febf 100644 --- a/tiledb/sm/array/array.h +++ b/tiledb/sm/array/array.h @@ -64,7 +64,7 @@ class Array { Array(const URI& array_uri, StorageManager* storage_manager); /** Destructor. */ - ~Array(); + ~Array() = default; /* ********************************* */ /* API */ @@ -167,18 +167,18 @@ class Array { Status get_query_type(QueryType* qyery_type) const; /** - * Returns the max buffer size given a fixed-sized attribute and + * Returns the max buffer size given a fixed-sized attribute/dimension and * a subarray. Errors if the array is not open. */ Status get_max_buffer_size( - const char* attribute, const void* subarray, uint64_t* buffer_size); + const char* name, const void* subarray, uint64_t* buffer_size); /** - * Returns the max buffer size given a var-sized attribute and + * Returns the max buffer size given a var-sized attribute/dimension and * a subarray. Errors if the array is not open. */ Status get_max_buffer_size( - const char* attribute, + const char* name, const void* subarray, uint64_t* buffer_off_size, uint64_t* buffer_val_size); @@ -348,7 +348,7 @@ class Array { * This is the last subarray used by the user to retrieve the * max buffer sizes. */ - void* last_max_buffer_sizes_subarray_; + std::vector last_max_buffer_sizes_subarray_; /** Mutex for thread-safety. */ mutable std::mutex mtx_; diff --git a/tiledb/sm/array_schema/array_schema.cc b/tiledb/sm/array_schema/array_schema.cc index 3248b007995..72eda7e1d9c 100644 --- a/tiledb/sm/array_schema/array_schema.cc +++ b/tiledb/sm/array_schema/array_schema.cc @@ -124,38 +124,10 @@ const Attribute* ArraySchema::attribute(unsigned int id) const { } const Attribute* ArraySchema::attribute(const std::string& name) const { - auto it = - attribute_map_.find(name.empty() ? constants::default_attr_name : name); + auto it = attribute_map_.find(name); return it == attribute_map_.end() ? nullptr : it->second; } -Status ArraySchema::attribute_name_normalized( - const char* attribute, std::string* normalized_name) { - if (attribute == nullptr) - return Status::AttributeError("Null attribute name"); - *normalized_name = - attribute[0] == '\0' ? constants::default_attr_name : attribute; - return Status::Ok(); -} - -Status ArraySchema::attribute_names_normalized( - const char** attributes, - unsigned num_attributes, - std::vector* normalized_names) { - normalized_names->clear(); - - if (attributes == nullptr || num_attributes == 0) - return Status::Ok(); - - for (unsigned i = 0; i < num_attributes; i++) { - std::string normalized; - RETURN_NOT_OK(attribute_name_normalized(attributes[i], &normalized)); - normalized_names->push_back(normalized); - } - - return Status::Ok(); -} - unsigned int ArraySchema::attribute_num() const { return (unsigned)attributes_.size(); } @@ -175,6 +147,7 @@ Layout ArraySchema::cell_order() const { uint64_t ArraySchema::cell_size(const std::string& name) const { // Special zipped coordinates attribute if (name == constants::coords) { + assert(domain_->all_dims_same_type()); auto dim_num = domain_->dim_num(); assert(dim_num > 0); auto coord_size = domain_->dimension(0)->coord_size(); @@ -310,7 +283,7 @@ const Dimension* ArraySchema::dimension(unsigned int i) const { } const Dimension* ArraySchema::dimension(const std::string& name) const { - auto it = dim_map_.find(name.empty() ? constants::default_dim_name : name); + auto it = dim_map_.find(name); return it == dim_map_.end() ? nullptr : it->second; } @@ -347,11 +320,8 @@ Status ArraySchema::has_attribute( const std::string& name, bool* has_attr) const { *has_attr = false; - std::string normalized; - RETURN_NOT_OK(attribute_name_normalized(name.c_str(), &normalized)); - for (auto& attr : attributes_) { - if (normalized == attr->name()) { + if (name == attr->name()) { *has_attr = true; break; } @@ -473,21 +443,7 @@ Status ArraySchema::add_attribute(const Attribute* attr, bool check_special) { } // Create new attribute and potentially set a default name - auto new_attr = (Attribute*)nullptr; - if (attr->is_anonymous()) { - // Check if any other attributes are anonymous - for (auto& a : attributes_) { - if (a->is_anonymous()) { - return LOG_STATUS(Status::ArraySchemaError( - "Only one anonymous attribute is allowed per array")); - } - } - new_attr = new Attribute(attr); - new_attr->set_name(constants::default_attr_name); - } else { - new_attr = new Attribute(attr); - } - + auto new_attr = new Attribute(attr); attributes_.emplace_back(new_attr); attribute_map_[new_attr->name()] = new_attr; diff --git a/tiledb/sm/array_schema/array_schema.h b/tiledb/sm/array_schema/array_schema.h index 4feb4347e6b..72fb4bf3958 100644 --- a/tiledb/sm/array_schema/array_schema.h +++ b/tiledb/sm/array_schema/array_schema.h @@ -108,35 +108,6 @@ class ArraySchema { */ const Attribute* attribute(const std::string& name) const; - /** - * Returns the given attribute name as it would be stored in the schema. E.g. - * if the argument is "" (empty string), this returns the default anonymous - * attribute name, which is what is stored in the schema for anonymous - * attributes. - * - * @param attribute Attribute name - * @param normalized_name Will hold the normalized name - * @return Status - */ - static Status attribute_name_normalized( - const char* attribute, std::string* normalized_name); - - /** - * Returns the given attribute names as they would be stored in the schema. - * E.g. if an input name is "" (empty string), this returns it as the default - * anonymous attribute name, which is what is stored in the schema for - * anonymous attributes. - * - * @param attributes Attribute names to normalize - * @param num_attributes Number of attribute names - * @param normalized_names Will hold the normalized names - * @return Status - */ - static Status attribute_names_normalized( - const char** attributes, - unsigned num_attributes, - std::vector* normalized_names); - /** Returns the number of attributes. */ unsigned int attribute_num() const; diff --git a/tiledb/sm/array_schema/attribute.cc b/tiledb/sm/array_schema/attribute.cc index 46fb2007d6c..93f365e57f5 100644 --- a/tiledb/sm/array_schema/attribute.cc +++ b/tiledb/sm/array_schema/attribute.cc @@ -127,7 +127,7 @@ void Attribute::dump(FILE* out) const { out = stdout; // Dump fprintf(out, "### Attribute ###\n"); - fprintf(out, "- Name: %s\n", is_anonymous() ? "" : name_.c_str()); + fprintf(out, "- Name: %s\n", name_.c_str()); fprintf(out, "- Type: %s\n", datatype_str(type_).c_str()); fprintf(out, "- Compressor: %s\n", compressor_str(compressor()).c_str()); fprintf(out, "- Compression level: %d\n", compression_level()); @@ -146,11 +146,6 @@ const std::string& Attribute::name() const { return name_; } -bool Attribute::is_anonymous() const { - return name_.empty() || - utils::parse::starts_with(name_, constants::default_attr_name); -} - // ===== FORMAT ===== // attribute_name_size (uint32_t) // attribute_name (string) diff --git a/tiledb/sm/array_schema/attribute.h b/tiledb/sm/array_schema/attribute.h index 118599bb148..e9665b2da8a 100644 --- a/tiledb/sm/array_schema/attribute.h +++ b/tiledb/sm/array_schema/attribute.h @@ -112,9 +112,6 @@ class Attribute { /** Returns the attribute name. */ const std::string& name() const; - /** Returns true if this is an anonymous (unlabeled) attribute **/ - bool is_anonymous() const; - /** * Serializes the object members into a binary buffer. * diff --git a/tiledb/sm/array_schema/dimension.cc b/tiledb/sm/array_schema/dimension.cc index 97992caff7b..9562c95fb6c 100644 --- a/tiledb/sm/array_schema/dimension.cc +++ b/tiledb/sm/array_schema/dimension.cc @@ -236,7 +236,7 @@ void Dimension::dump(FILE* out) const { // Dump fprintf(out, "### Dimension ###\n"); - fprintf(out, "- Name: %s\n", is_anonymous() ? "" : name_.c_str()); + fprintf(out, "- Name: %s\n", name_.c_str()); fprintf(out, "- Domain: %s\n", domain_s.c_str()); fprintf(out, "- Tile extent: %s\n", tile_extent_s.c_str()); } @@ -251,11 +251,6 @@ const std::string& Dimension::name() const { return name_; } -bool Dimension::is_anonymous() const { - return name_.empty() || - utils::parse::starts_with(name_, constants::default_dim_name); -} - template void Dimension::ceil_to_tile( const Dimension* dim, const Range& r, uint64_t tile_num, ByteVecValue* v) { @@ -450,7 +445,7 @@ Status Dimension::oob(const void* coord) const { std::string err_msg; auto ret = oob_func_(this, coord, &err_msg); if (ret) - return LOG_STATUS(Status::DimensionError(err_msg)); + return Status::DimensionError(err_msg); return Status::Ok(); } diff --git a/tiledb/sm/array_schema/dimension.h b/tiledb/sm/array_schema/dimension.h index 7875db19bfd..12afcf21fe6 100644 --- a/tiledb/sm/array_schema/dimension.h +++ b/tiledb/sm/array_schema/dimension.h @@ -113,9 +113,6 @@ class Dimension { /** Returns the dimension name. */ const std::string& name() const; - /** Returns true if this is an anonymous (unlabled) dimension **/ - bool is_anonymous() const; - /** * Retrieves the value `v` that lies at the end (ceil) of the tile * that is `tile_num` tiles apart from the beginning of `r`. diff --git a/tiledb/sm/array_schema/domain.cc b/tiledb/sm/array_schema/domain.cc index 7985b543500..66fc78c390a 100644 --- a/tiledb/sm/array_schema/domain.cc +++ b/tiledb/sm/array_schema/domain.cc @@ -105,12 +105,7 @@ Status Domain::add_dimension(const Dimension* dim) { Status::DomainError("Cannot add dimension to domain; All added " "dimensions must have the same type")); - // Compute new dimension name - std::string new_dim_name = dim->name(); - if (new_dim_name.empty()) - new_dim_name = default_dimension_name(dim_num_); - - auto new_dim = new Dimension(new_dim_name, type_); + auto new_dim = new Dimension(dim->name(), type_); RETURN_NOT_OK_ELSE(new_dim->set_domain(dim->domain()), delete new_dim); RETURN_NOT_OK_ELSE( new_dim->set_tile_extent(dim->tile_extent()), delete new_dim); @@ -652,12 +647,6 @@ int Domain::tile_order_cmp( return tile_order_cmp_func_[dim_idx](dim, coord_a, coord_b); } -/* -Datatype Domain::type() const { - return type_; -} -*/ - /* ****************************** */ /* PRIVATE METHODS */ /* ****************************** */ @@ -893,12 +882,6 @@ void Domain::compute_tile_offsets() { std::reverse(tile_offsets_row_.begin(), tile_offsets_row_.end()); } -std::string Domain::default_dimension_name(unsigned int i) const { - std::stringstream ss; - ss << constants::default_dim_name << "_" << i; - return ss.str(); -} - template uint64_t Domain::get_cell_pos_col(const T* coords) const { // For easy reference diff --git a/tiledb/sm/array_schema/domain.h b/tiledb/sm/array_schema/domain.h index 2abfc7c9993..3befdc6ba7f 100644 --- a/tiledb/sm/array_schema/domain.h +++ b/tiledb/sm/array_schema/domain.h @@ -635,9 +635,6 @@ class Domain { template void compute_tile_offsets(); - /** Returns the default name constructed for the i-th dimension. */ - std::string default_dimension_name(unsigned int i) const; - /** * Retrieves the next tile coordinates along the array tile order within a * given tile domain. Applicable only to **dense** arrays, and focusing on diff --git a/tiledb/sm/c_api/tiledb.cc b/tiledb/sm/c_api/tiledb.cc index e2351a2db92..ac7f50ec8f3 100644 --- a/tiledb/sm/c_api/tiledb.cc +++ b/tiledb/sm/c_api/tiledb.cc @@ -1502,11 +1502,7 @@ int32_t tiledb_attribute_get_name( if (sanity_check(ctx) == TILEDB_ERR || sanity_check(ctx, attr) == TILEDB_ERR) return TILEDB_ERR; - // Hide anonymous attribute name from user - if (attr->attr_->is_anonymous()) - *name = ""; - else - *name = attr->attr_->name().c_str(); + *name = attr->attr_->name().c_str(); return TILEDB_OK; } @@ -1835,28 +1831,8 @@ int32_t tiledb_domain_get_dimension_from_name( return TILEDB_OK; } std::string name_string(name); - const tiledb::sm::Dimension* found_dim = nullptr; - if (name_string.empty()) { // anonymous dimension - bool found_anonymous = false; - for (uint32_t i = 0; i < ndim; i++) { - auto dim = domain->domain_->dimension(i); - if (dim->is_anonymous()) { - if (found_anonymous) { - tiledb::sm::Status st = tiledb::sm::Status::Error( - "Dimension from name is ambiguous when " - "there are multiple anonymous " - "dimensions; Use index instead"); - LOG_STATUS(st); - save_error(ctx, st); - return TILEDB_ERR; - } - found_anonymous = true; - found_dim = dim; - } - } - } else { - found_dim = domain->domain_->dimension(name_string); - } + auto found_dim = domain->domain_->dimension(name_string); + if (found_dim == nullptr) { tiledb::sm::Status st = tiledb::sm::Status::DomainError( std::string("Dimension \"") + name + "\" does not exist"); @@ -1864,6 +1840,7 @@ int32_t tiledb_domain_get_dimension_from_name( save_error(ctx, st); return TILEDB_ERR; } + *dim = new (std::nothrow) tiledb_dimension_t; if (*dim == nullptr) { auto st = @@ -2521,24 +2498,16 @@ int32_t tiledb_query_set_subarray( int32_t tiledb_query_set_buffer( tiledb_ctx_t* ctx, tiledb_query_t* query, - const char* attribute, + const char* name, void* buffer, uint64_t* buffer_size) { // Sanity check if (sanity_check(ctx) == TILEDB_ERR || sanity_check(ctx, query) == TILEDB_ERR) return TILEDB_ERR; - // Normalize name - std::string normalized_name; - if (SAVE_ERROR_CATCH( - ctx, - tiledb::sm::ArraySchema::attribute_name_normalized( - attribute, &normalized_name))) - return TILEDB_ERR; - // Set attribute buffer if (SAVE_ERROR_CATCH( - ctx, query->query_->set_buffer(normalized_name, buffer, buffer_size))) + ctx, query->query_->set_buffer(name, buffer, buffer_size))) return TILEDB_ERR; return TILEDB_OK; @@ -2547,7 +2516,7 @@ int32_t tiledb_query_set_buffer( int32_t tiledb_query_set_buffer_var( tiledb_ctx_t* ctx, tiledb_query_t* query, - const char* attribute, + const char* name, uint64_t* buffer_off, uint64_t* buffer_off_size, void* buffer_val, @@ -2564,23 +2533,11 @@ int32_t tiledb_query_set_buffer_var( buffer_off, buffer_off_size, buffer_val_size))) return TILEDB_ERR; - // Normalize name - std::string normalized_name; - if (SAVE_ERROR_CATCH( - ctx, - tiledb::sm::ArraySchema::attribute_name_normalized( - attribute, &normalized_name))) - return TILEDB_ERR; - // Set attribute buffers if (SAVE_ERROR_CATCH( ctx, query->query_->set_buffer( - normalized_name, - buffer_off, - buffer_off_size, - buffer_val, - buffer_val_size))) + name, buffer_off, buffer_off_size, buffer_val, buffer_val_size))) return TILEDB_ERR; return TILEDB_OK; @@ -2589,7 +2546,7 @@ int32_t tiledb_query_set_buffer_var( int32_t tiledb_query_get_buffer( tiledb_ctx_t* ctx, tiledb_query_t* query, - const char* attribute, + const char* name, void** buffer, uint64_t** buffer_size) { // Sanity check @@ -2598,7 +2555,7 @@ int32_t tiledb_query_get_buffer( // Set attribute buffer if (SAVE_ERROR_CATCH( - ctx, query->query_->get_buffer(attribute, buffer, buffer_size))) + ctx, query->query_->get_buffer(name, buffer, buffer_size))) return TILEDB_ERR; return TILEDB_OK; @@ -2607,7 +2564,7 @@ int32_t tiledb_query_get_buffer( int32_t tiledb_query_get_buffer_var( tiledb_ctx_t* ctx, tiledb_query_t* query, - const char* attribute, + const char* name, uint64_t** buffer_off, uint64_t** buffer_off_size, void** buffer_val, @@ -2620,11 +2577,7 @@ int32_t tiledb_query_get_buffer_var( if (SAVE_ERROR_CATCH( ctx, query->query_->get_buffer( - attribute, - buffer_off, - buffer_off_size, - buffer_val, - buffer_val_size))) + name, buffer_off, buffer_off_size, buffer_val, buffer_val_size))) return TILEDB_ERR; return TILEDB_OK; @@ -2793,22 +2746,12 @@ int32_t tiledb_query_get_range( int32_t tiledb_query_get_est_result_size( tiledb_ctx_t* ctx, const tiledb_query_t* query, - const char* attr_name, + const char* name, uint64_t* size) { if (sanity_check(ctx) == TILEDB_ERR || sanity_check(ctx, query) == TILEDB_ERR) return TILEDB_ERR; - // Normalize name - std::string normalized_name; - if (SAVE_ERROR_CATCH( - ctx, - tiledb::sm::ArraySchema::attribute_name_normalized( - attr_name, &normalized_name))) - return TILEDB_ERR; - - if (SAVE_ERROR_CATCH( - ctx, - query->query_->get_est_result_size(normalized_name.c_str(), size))) + if (SAVE_ERROR_CATCH(ctx, query->query_->get_est_result_size(name, size))) return TILEDB_ERR; return TILEDB_OK; @@ -2817,24 +2760,14 @@ int32_t tiledb_query_get_est_result_size( int32_t tiledb_query_get_est_result_size_var( tiledb_ctx_t* ctx, const tiledb_query_t* query, - const char* attr_name, + const char* name, uint64_t* size_off, uint64_t* size_val) { if (sanity_check(ctx) == TILEDB_ERR || sanity_check(ctx, query) == TILEDB_ERR) return TILEDB_ERR; - // Normalize name - std::string normalized_name; - if (SAVE_ERROR_CATCH( - ctx, - tiledb::sm::ArraySchema::attribute_name_normalized( - attr_name, &normalized_name))) - return TILEDB_ERR; - if (SAVE_ERROR_CATCH( - ctx, - query->query_->get_est_result_size( - normalized_name.c_str(), size_off, size_val))) + ctx, query->query_->get_est_result_size(name, size_off, size_val))) return TILEDB_ERR; return TILEDB_OK; diff --git a/tiledb/sm/c_api/tiledb.h b/tiledb/sm/c_api/tiledb.h index 46fac280e82..0939d624db9 100644 --- a/tiledb/sm/c_api/tiledb.h +++ b/tiledb/sm/c_api/tiledb.h @@ -2961,7 +2961,7 @@ TILEDB_EXPORT int32_t tiledb_query_set_subarray( tiledb_ctx_t* ctx, tiledb_query_t* query, const void* subarray); /** - * Sets the buffer for a fixed-sized attribute to a query, which will + * Sets the buffer for a fixed-sized attribute/dimension to a query, which will * either hold the values to be written (if it is a write query), or will hold * the results from a read query. * @@ -2975,10 +2975,8 @@ TILEDB_EXPORT int32_t tiledb_query_set_subarray( * * @param ctx The TileDB context. * @param query The TileDB query. - * @param attribute The attribute to set the buffer for. Note that the - * coordinates have special attribute name `TILEDB_COORDS`. Also, - * if `attribute` is equal to the empty string, then a special default - * attribute name is set. + * @param name The attribute/dimension to set the buffer for. Note that + * zipped coordinates have special name `TILEDB_COORDS`. * @param buffer The buffer that either have the input data to be written, * or will hold the data to be read. * @param buffer_size In the case of writes, this is the size of `buffer` @@ -2990,7 +2988,7 @@ TILEDB_EXPORT int32_t tiledb_query_set_subarray( TILEDB_EXPORT int32_t tiledb_query_set_buffer( tiledb_ctx_t* ctx, tiledb_query_t* query, - const char* attribute, + const char* name, void* buffer, uint64_t* buffer_size); @@ -3012,7 +3010,7 @@ TILEDB_EXPORT int32_t tiledb_query_set_buffer( * * @param ctx The TileDB context. * @param query The TileDB query. - * @param attribute The attribute to set the buffer for. + * @param name The attribute/dimension to set the buffer for. * @param buffer_off The buffer that either have the input data to be written, * or will hold the data to be read. This buffer holds the starting offsets * of each cell value in `buffer_val`. @@ -3033,7 +3031,7 @@ TILEDB_EXPORT int32_t tiledb_query_set_buffer( TILEDB_EXPORT int32_t tiledb_query_set_buffer_var( tiledb_ctx_t* ctx, tiledb_query_t* query, - const char* attribute, + const char* name, uint64_t* buffer_off, uint64_t* buffer_off_size, void* buffer_val, @@ -3053,8 +3051,8 @@ TILEDB_EXPORT int32_t tiledb_query_set_buffer_var( * * @param ctx The TileDB context. * @param query The TileDB query. - * @param attribute The attribute to get the buffer for. Note that the - * coordinates have special attribute name `TILEDB_COORDS`. + * @param name The attribute/dimension to get the buffer for. Note that the + * zipped coordinates have special name `TILEDB_COORDS`. * @param buffer The buffer to retrieve. * @param buffer_size A pointer to the size of the buffer. * @return `TILEDB_OK` for success and `TILEDB_ERR` for error. @@ -3062,14 +3060,14 @@ TILEDB_EXPORT int32_t tiledb_query_set_buffer_var( TILEDB_EXPORT int32_t tiledb_query_get_buffer( tiledb_ctx_t* ctx, tiledb_query_t* query, - const char* attribute, + const char* name, void** buffer, uint64_t** buffer_size); /** - * Gets the values and offsets buffers for a var-sized attribute to a query. - * If the buffers have not been set, then `buffer_off` and `buffer_val` are - * set to `nullptr`. + * Gets the values and offsets buffers for a var-sized attribute/dimension + * to a query. If the buffers have not been set, then `buffer_off` and + * `buffer_val` are set to `nullptr`. * * **Example:** * @@ -3084,7 +3082,7 @@ TILEDB_EXPORT int32_t tiledb_query_get_buffer( * * @param ctx The TileDB context. * @param query The TileDB query. - * @param attribute The attribute to set the buffer for. + * @param name The attribute/dimension to set the buffer for. * @param buffer_off The offsets buffer to be retrieved. * @param buffer_off_size A pointer to the size of the offsets buffer. * @param buffer_val The values buffer to be retrieved. @@ -3094,7 +3092,7 @@ TILEDB_EXPORT int32_t tiledb_query_get_buffer( TILEDB_EXPORT int32_t tiledb_query_get_buffer_var( tiledb_ctx_t* ctx, tiledb_query_t* query, - const char* attribute, + const char* name, uint64_t** buffer_off, uint64_t** buffer_off_size, void** buffer_val, @@ -3403,7 +3401,7 @@ TILEDB_EXPORT int32_t tiledb_query_get_range( const void** stride); /** - * Retrieves the estimated result size for a fixed-sized attribute. + * Retrieves the estimated result size for a fixed-sized attribute/dimension. * * **Example:** * @@ -3414,30 +3412,30 @@ TILEDB_EXPORT int32_t tiledb_query_get_range( * * @param ctx The TileDB context * @param query The query. - * @param attr_name The attribute name. + * @param name The attribute/dimension name. * @param size The size (in bytes) to be retrieved. * @return `TILEDB_OK` for success and `TILEDB_ERR` for error. */ TILEDB_EXPORT int32_t tiledb_query_get_est_result_size( tiledb_ctx_t* ctx, const tiledb_query_t* query, - const char* attr_name, + const char* name, uint64_t* size); /** - * Retrieves the estimated result size for a var-sized attribute. + * Retrieves the estimated result size for a var-sized attribute/dimension. * * **Example:** * * @code{.c} * uint64_t size_off, size_val; - * tiledb_query_get_est_query_size_var( + * tiledb_query_get_est_result_size_var( * ctx, query, "a", &size_off, &size_val); * @endcode * * @param ctx The TileDB context * @param query The query. - * @param attr_name The attribute name. + * @param name The attribute/dimension name. * @param size_off The size of the offsets (in bytes) to be retrieved. * @param size_val The size of the values (in bytes) to be retrieved. * @return `TILEDB_OK` for success and `TILEDB_ERR` for error. @@ -3445,7 +3443,7 @@ TILEDB_EXPORT int32_t tiledb_query_get_est_result_size( TILEDB_EXPORT int32_t tiledb_query_get_est_result_size_var( tiledb_ctx_t* ctx, const tiledb_query_t* query, - const char* attr_name, + const char* name, uint64_t* size_off, uint64_t* size_val); diff --git a/tiledb/sm/misc/constants.cc b/tiledb/sm/misc/constants.cc index adfd5db9beb..f519551fc1e 100644 --- a/tiledb/sm/misc/constants.cc +++ b/tiledb/sm/misc/constants.cc @@ -442,12 +442,6 @@ const uint32_t format_version = 5; /** The maximum size of a tile chunk (unit of compression) in bytes. */ const uint64_t max_tile_chunk_size = 64 * 1024; -/** The default attribute name prefix. */ -const std::string default_attr_name = "__attr"; - -/** The default dimension name prefix. */ -const std::string default_dim_name = "__dim"; - /** Maximum number of attempts to wait for an S3 response. */ const unsigned int s3_max_attempts = 100; diff --git a/tiledb/sm/misc/constants.h b/tiledb/sm/misc/constants.h index bf9f11f92d9..3f6108e1537 100644 --- a/tiledb/sm/misc/constants.h +++ b/tiledb/sm/misc/constants.h @@ -429,12 +429,6 @@ extern const uint32_t format_version; /** The maximum size of a tile chunk (unit of compression) in bytes. */ extern const uint64_t max_tile_chunk_size; -/** The default attribute name prefix. */ -extern const std::string default_attr_name; - -/** The default dimension name prefix. */ -extern const std::string default_dim_name; - /** Maximum number of attempts to wait for an S3 response. */ extern const unsigned int s3_max_attempts; diff --git a/tiledb/sm/misc/types.h b/tiledb/sm/misc/types.h index 0107bcfb932..23edb71e54d 100644 --- a/tiledb/sm/misc/types.h +++ b/tiledb/sm/misc/types.h @@ -134,10 +134,13 @@ typedef std::vector NDRange; /** A value as a vector of bytes. */ typedef std::vector ByteVecValue; -/** Contains the buffer(s) and buffer size(s) for some attribute / dimension. */ +/** A byte vector. */ +typedef std::vector ByteVec; + +/** Contains the buffer(s) and buffer size(s) for some attribute/dimension. */ struct QueryBuffer { /** - * The attribute/coordinate buffer. In case the attribute/dimension is + * The attribute/dimension buffer. In case the attribute/dimension is * var-sized, this is the offsets buffer. */ void* buffer_; diff --git a/tiledb/sm/query/query.cc b/tiledb/sm/query/query.cc index e876c257c1a..a1044772f54 100644 --- a/tiledb/sm/query/query.cc +++ b/tiledb/sm/query/query.cc @@ -207,7 +207,7 @@ const ArraySchema* Query::array_schema() const { std::vector Query::buffer_names() const { if (type_ == QueryType::WRITE) return writer_.buffer_names(); - return reader_.attributes(); // TODO: this will change in a subsequent PR + return reader_.buffer_names(); } QueryBuffer Query::buffer(const std::string& name) const { @@ -236,62 +236,52 @@ Status Query::finalize() { return Status::Ok(); } -// TODO: fix normalized for coords Status Query::get_buffer( const char* name, void** buffer, uint64_t** buffer_size) const { - // Normalize attribute - std::string normalized; - RETURN_NOT_OK(ArraySchema::attribute_name_normalized(name, &normalized)); - // Check attribute auto array_schema = this->array_schema(); - if (normalized != constants::coords) { - if (array_schema->attribute(normalized) == nullptr && - array_schema->dimension(normalized) == nullptr) + if (name != constants::coords) { + if (array_schema->attribute(name) == nullptr && + array_schema->dimension(name) == nullptr) return LOG_STATUS(Status::QueryError( std::string("Cannot get buffer; Invalid attribute/dimension name '") + - normalized + "'")); + name + "'")); } - if (array_schema->var_size(normalized)) + if (array_schema->var_size(name)) return LOG_STATUS(Status::QueryError( - std::string("Cannot get buffer; '") + normalized + "' is var-sized")); + std::string("Cannot get buffer; '") + name + "' is var-sized")); if (type_ == QueryType::WRITE) - return writer_.get_buffer(normalized, buffer, buffer_size); - return reader_.get_buffer(normalized, buffer, buffer_size); + return writer_.get_buffer(name, buffer, buffer_size); + return reader_.get_buffer(name, buffer, buffer_size); } -// TODO: fix normalized for coords Status Query::get_buffer( const char* name, uint64_t** buffer_off, uint64_t** buffer_off_size, void** buffer_val, uint64_t** buffer_val_size) const { - // Normalize attribute - std::string normalized; - RETURN_NOT_OK(ArraySchema::attribute_name_normalized(name, &normalized)); - // Check attribute auto array_schema = this->array_schema(); - if (normalized == constants::coords) { + if (name == constants::coords) { return LOG_STATUS( Status::QueryError("Cannot get buffer; Coordinates are not var-sized")); } - if (array_schema->attribute(normalized) == nullptr && - array_schema->dimension(normalized) == nullptr) + if (array_schema->attribute(name) == nullptr && + array_schema->dimension(name) == nullptr) return LOG_STATUS(Status::QueryError( std::string("Cannot get buffer; Invalid attribute/dimension name '") + - normalized + "'")); - if (!array_schema->var_size(normalized)) + name + "'")); + if (!array_schema->var_size(name)) return LOG_STATUS(Status::QueryError( - std::string("Cannot get buffer; '") + normalized + "' is fixed-sized")); + std::string("Cannot get buffer; '") + name + "' is fixed-sized")); if (type_ == QueryType::WRITE) return writer_.get_buffer( - normalized, buffer_off, buffer_off_size, buffer_val, buffer_val_size); + name, buffer_off, buffer_off_size, buffer_val, buffer_val_size); return reader_.get_buffer( - normalized, buffer_off, buffer_off_size, buffer_val, buffer_val_size); + name, buffer_off, buffer_off_size, buffer_val, buffer_val_size); } Status Query::get_attr_serialization_state( diff --git a/tiledb/sm/query/reader.cc b/tiledb/sm/query/reader.cc index 0ce3ef15a53..938bcad7eed 100644 --- a/tiledb/sm/query/reader.cc +++ b/tiledb/sm/query/reader.cc @@ -140,15 +140,18 @@ const ArraySchema* Reader::array_schema() const { return array_schema_; } -std::vector Reader::attributes() const { - return attributes_; +std::vector Reader::buffer_names() const { + std::vector ret; + ret.reserve(buffers_.size()); + for (const auto& it : buffers_) + ret.push_back(it.first); + + return ret; } QueryBuffer Reader::buffer(const std::string& name) const { - // TODO: fetch separate coordinate buffers as well. To be addressed in - // TODO: subsequent PR - auto buf = attr_buffers_.find(name); - if (buf == attr_buffers_.end()) + auto buf = buffers_.find(name); + if (buf == buffers_.end()) return QueryBuffer{}; return buf->second; } @@ -157,11 +160,10 @@ bool Reader::incomplete() const { return read_state_.overflowed_ || !read_state_.done(); } -// TODO: handle both attributes and dimensions Status Reader::get_buffer( - const std::string& attribute, void** buffer, uint64_t** buffer_size) const { - auto it = attr_buffers_.find(attribute); - if (it == attr_buffers_.end()) { + const std::string& name, void** buffer, uint64_t** buffer_size) const { + auto it = buffers_.find(name); + if (it == buffers_.end()) { *buffer = nullptr; *buffer_size = nullptr; } else { @@ -172,15 +174,14 @@ Status Reader::get_buffer( return Status::Ok(); } -// TODO: handle both attributes and dimensions Status Reader::get_buffer( - const std::string& attribute, + const std::string& name, uint64_t** buffer_off, uint64_t** buffer_off_size, void** buffer_val, uint64_t** buffer_val_size) const { - auto it = attr_buffers_.find(attribute); - if (it == attr_buffers_.end()) { + auto it = buffers_.find(name); + if (it == buffers_.end()) { *buffer_off = nullptr; *buffer_off_size = nullptr; *buffer_val = nullptr; @@ -202,12 +203,9 @@ Status Reader::init(const Layout& layout) { if (array_schema_ == nullptr) return LOG_STATUS(Status::ReaderError( "Cannot initialize reader; Array metadata not set")); - if (attr_buffers_.empty()) + if (buffers_.empty()) return LOG_STATUS( Status::ReaderError("Cannot initialize reader; Buffers not set")); - if (attributes_.empty()) - return LOG_STATUS( - Status::ReaderError("Cannot initialize reader; Attributes not set")); if (array_schema_->dense() && !sparse_mode_ && !subarray_.is_set()) return LOG_STATUS(Status::ReaderError( "Cannot initialize reader; Dense reads must have a subarray set")); @@ -247,7 +245,7 @@ Layout Reader::layout() const { } bool Reader::no_results() const { - for (const auto& it : attr_buffers_) { + for (const auto& it : buffers_) { if (*(it.second.buffer_size_) != 0) return false; } @@ -324,7 +322,7 @@ void Reader::set_array_schema(const ArraySchema* array_schema) { } Status Reader::set_buffer( - const std::string& attribute, + const std::string& name, void* buffer, uint64_t* buffer_size, bool check_null_buffers) { @@ -338,39 +336,45 @@ Status Reader::set_buffer( return LOG_STATUS( Status::ReaderError("Cannot set buffer; Array schema not set")); - // Check that attribute exists - if (attribute != constants::coords && - array_schema_->attribute(attribute) == nullptr) - return LOG_STATUS( - Status::ReaderError("Cannot set buffer; Invalid attribute")); + // For easy reference + bool is_dim = array_schema_->is_dim(name); + bool is_attr = array_schema_->is_attr(name); + + // Check that attribute/dimension exists + if (name != constants::coords && !is_dim && !is_attr) + return LOG_STATUS(Status::ReaderError( + std::string("Cannot set buffer; Invalid attribute/dimension '") + name + + "'")); - // Check that attribute is fixed-sized - bool var_size = - (attribute != constants::coords && array_schema_->var_size(attribute)); + // Check that attribute/dimension is fixed-sized + bool var_size = (name != constants::coords && array_schema_->var_size(name)); if (var_size) - return LOG_STATUS(Status::WriterError( - std::string("Cannot set buffer; Input attribute '") + attribute + + return LOG_STATUS(Status::ReaderError( + std::string("Cannot set buffer; Input attribute/dimension '") + name + "' is var-sized")); - // Error if setting a new attribute after initialization - bool attr_exists = attr_buffers_.find(attribute) != attr_buffers_.end(); - if (read_state_.initialized_ && !attr_exists) + // Check if zipped coordinates coexist with separate coordinate buffers + if ((is_dim && buffers_.find(constants::coords) != buffers_.end()) || + (name == constants::coords && has_separate_coords())) return LOG_STATUS(Status::ReaderError( - std::string("Cannot set buffer for new attribute '") + attribute + - "' after initialization")); + std::string("Cannot set separate coordinate buffers and " + "a zipped coordinate buffer in the same query"))); - // Append to attributes only if buffer not set before - if (!attr_exists) - attributes_.emplace_back(attribute); + // Error if setting a new attribute/dimension after initialization + bool exists = buffers_.find(name) != buffers_.end(); + if (read_state_.initialized_ && !exists) + return LOG_STATUS(Status::ReaderError( + std::string("Cannot set buffer for new attribute/dimension '") + name + + "' after initialization")); // Set attribute buffer - attr_buffers_[attribute] = QueryBuffer(buffer, nullptr, buffer_size, nullptr); + buffers_[name] = QueryBuffer(buffer, nullptr, buffer_size, nullptr); return Status::Ok(); } Status Reader::set_buffer( - const std::string& attribute, + const std::string& name, uint64_t* buffer_off, uint64_t* buffer_off_size, void* buffer_val, @@ -388,33 +392,27 @@ Status Reader::set_buffer( return LOG_STATUS( Status::ReaderError("Cannot set buffer; Array schema not set")); - // Check that attribute exists - if (attribute != constants::coords && - array_schema_->attribute(attribute) == nullptr) + // Check that attribute/dimension exists + if (name != constants::coords && array_schema_->attribute(name) == nullptr) return LOG_STATUS( - Status::WriterError("Cannot set buffer; Invalid attribute")); + Status::ReaderError("Cannot set buffer; Invalid attribute/dimension")); - // Check that attribute is var-sized - bool var_size = - (attribute != constants::coords && array_schema_->var_size(attribute)); + // Check that attribute/dimension is var-sized + bool var_size = (name != constants::coords && array_schema_->var_size(name)); if (!var_size) - return LOG_STATUS(Status::WriterError( - std::string("Cannot set buffer; Input attribute '") + attribute + + return LOG_STATUS(Status::ReaderError( + std::string("Cannot set buffer; Input attribute/dimension '") + name + "' is fixed-sized")); - // Error if setting a new attribute after initialization - bool attr_exists = attr_buffers_.find(attribute) != attr_buffers_.end(); - if (read_state_.initialized_ && !attr_exists) + // Error if setting a new attribute/dimension after initialization + bool exists = buffers_.find(name) != buffers_.end(); + if (read_state_.initialized_ && !exists) return LOG_STATUS(Status::ReaderError( - std::string("Cannot set buffer for new attribute '") + attribute + + std::string("Cannot set buffer for new attribute/dimension '") + name + "' after initialization")); - // Append to attributes only if buffer not set before - if (!attr_exists) - attributes_.emplace_back(attribute); - - // Set attribute buffer - attr_buffers_[attribute] = + // Set attribute/dimension buffer + buffers_[name] = QueryBuffer(buffer_off, buffer_val, buffer_off_size, buffer_val_size); return Status::Ok(); @@ -835,7 +833,7 @@ Status Reader::copy_fixed_cells( STATS_FUNC_IN(reader_copy_fixed_cells); // For easy reference - auto it = attr_buffers_.find(name); + auto it = buffers_.find(name); auto buffer = (unsigned char*)it->second.buffer_; auto buffer_size = it->second.buffer_size_; auto cell_size = array_schema_->cell_size(name); @@ -898,7 +896,7 @@ Status Reader::copy_fixed_cells( RETURN_NOT_OK(st); // Update buffer offsets - *(attr_buffers_[name].buffer_size_) = buffer_offset; + *(buffers_[name].buffer_size_) = buffer_offset; STATS_COUNTER_ADD(reader_num_fixed_cell_bytes_copied, buffer_offset); return Status::Ok(); @@ -913,7 +911,7 @@ Status Reader::copy_var_cells( STATS_FUNC_IN(reader_copy_var_cells); // For easy reference - auto it = attr_buffers_.find(name); + auto it = buffers_.find(name); auto buffer = (unsigned char*)it->second.buffer_; auto buffer_var = (unsigned char*)it->second.buffer_var_; auto buffer_size = it->second.buffer_size_; @@ -996,8 +994,8 @@ Status Reader::copy_var_cells( RETURN_NOT_OK(st); // Update buffer offsets - *(attr_buffers_[name].buffer_size_) = total_offset_size; - *(attr_buffers_[name].buffer_var_size_) = total_var_size; + *(buffers_[name].buffer_size_) = total_offset_size; + *(buffers_[name].buffer_var_size_) = total_var_size; STATS_COUNTER_ADD( reader_num_var_cell_bytes_copied, total_offset_size + total_var_size); @@ -1364,16 +1362,17 @@ Status Reader::dense_read() { auto stride = array_schema_->domain()->stride(subarray.layout()); // Copy cells - for (const auto& attr : attributes_) { + for (const auto& it : buffers_) { + const auto& name = it.first; if (read_state_.overflowed_) break; - if (attr == constants::coords) + if (name == constants::coords || array_schema_->is_dim(name)) continue; - RETURN_CANCEL_OR_ERROR(read_tiles(attr, result_tiles)); - RETURN_CANCEL_OR_ERROR(unfilter_tiles(attr, result_tiles)); - RETURN_CANCEL_OR_ERROR(copy_cells(attr, stride, result_cell_slabs)); - clear_tiles(attr, result_tiles); + RETURN_CANCEL_OR_ERROR(read_tiles(name, result_tiles)); + RETURN_CANCEL_OR_ERROR(unfilter_tiles(name, result_tiles)); + RETURN_CANCEL_OR_ERROR(copy_cells(name, stride, result_cell_slabs)); + clear_tiles(name, result_tiles); } // Fill coordinates if the user requested them @@ -1387,24 +1386,38 @@ Status Reader::dense_read() { template Status Reader::fill_dense_coords(const Subarray& subarray) { - // For easy reference - uint64_t coords_buff_offset = 0; - auto it = attr_buffers_.find(constants::coords); - assert(it != attr_buffers_.end()); - auto coords_buff = it->second.buffer_; - auto coords_buff_size = *(it->second.buffer_size_); + // Prepare buffers + std::vector dim_idx; + std::vector buffers; + auto coords_it = buffers_.find(constants::coords); + auto dim_num = array_schema_->dim_num(); + if (coords_it != buffers_.end()) { + buffers.emplace_back(&(coords_it->second)); + dim_idx.emplace_back(dim_num); + } else { + for (unsigned d = 0; d < dim_num; ++d) { + const auto& dim = array_schema_->dimension(d); + auto it = buffers_.find(dim->name()); + if (it != buffers_.end()) { + buffers.emplace_back(&(it->second)); + dim_idx.emplace_back(d); + } + } + } + std::vector offsets(buffers.size(), 0); if (layout_ == Layout::GLOBAL_ORDER) { - RETURN_NOT_OK(fill_dense_coords_global( - subarray, coords_buff, coords_buff_size, &coords_buff_offset)); + RETURN_NOT_OK( + fill_dense_coords_global(subarray, dim_idx, buffers, &offsets)); } else { assert(layout_ == Layout::ROW_MAJOR || layout_ == Layout::COL_MAJOR); - RETURN_NOT_OK(fill_dense_coords_row_col( - subarray, coords_buff, coords_buff_size, &coords_buff_offset)); + RETURN_NOT_OK( + fill_dense_coords_row_col(subarray, dim_idx, buffers, &offsets)); } - // Update buffer size - *(it->second.buffer_size_) = coords_buff_offset; + // Update buffer sizes + for (size_t i = 0; i < buffers.size(); ++i) + *(buffers[i]->buffer_size_) = offsets[i]; return Status::Ok(); } @@ -1412,16 +1425,16 @@ Status Reader::fill_dense_coords(const Subarray& subarray) { template Status Reader::fill_dense_coords_global( const Subarray& subarray, - void* coords_buff, - uint64_t coords_buff_size, - uint64_t* coords_buff_offset) { + const std::vector& dim_idx, + const std::vector& buffers, + std::vector* offsets) { auto tile_coords = subarray.tile_coords(); auto cell_order = array_schema_->cell_order(); for (const auto& tc : tile_coords) { auto tile_subarray = subarray.crop_to_tile((const T*)&tc[0], cell_order); - RETURN_NOT_OK(fill_dense_coords_row_col( - tile_subarray, coords_buff, coords_buff_size, coords_buff_offset)); + RETURN_NOT_OK( + fill_dense_coords_row_col(tile_subarray, dim_idx, buffers, offsets)); } return Status::Ok(); @@ -1430,15 +1443,13 @@ Status Reader::fill_dense_coords_global( template Status Reader::fill_dense_coords_row_col( const Subarray& subarray, - void* coords_buff, - uint64_t coords_buff_size, - uint64_t* coords_buff_offset) { + const std::vector& dim_idx, + const std::vector& buffers, + std::vector* offsets) { STATS_FUNC_IN(reader_fill_coords); auto cell_order = array_schema_->cell_order(); - auto coord_size = array_schema_->domain()->dimension(0)->coord_size(); auto dim_num = array_schema_->dim_num(); - auto coords_size = dim_num * coord_size; // Iterate over all coordinates, retrieved in cell slabs CellSlabIter iter(&subarray); @@ -1448,18 +1459,28 @@ Status Reader::fill_dense_coords_row_col( auto coords_num = cell_slab.length_; // Check for overflow - if (coords_num * coords_size + (*coords_buff_offset) > coords_buff_size) { - read_state_.overflowed_ = true; - return Status::Ok(); + for (size_t i = 0; i < buffers.size(); ++i) { + auto idx = (dim_idx[i] == dim_num) ? 0 : dim_idx[i]; + auto dim = array_schema_->domain()->dimension(idx); + auto coord_size = dim->coord_size(); + coord_size = (dim_idx[i] == dim_num) ? coord_size * dim_num : coord_size; + auto buff_size = *(buffers[i]->buffer_size_); + auto offset = (*offsets)[i]; + if (coords_num * coord_size + offset > buff_size) { + read_state_.overflowed_ = true; + return Status::Ok(); + } } + // Copy slab if (layout_ == Layout::ROW_MAJOR || (layout_ == Layout::GLOBAL_ORDER && cell_order == Layout::ROW_MAJOR)) fill_dense_coords_row_slab( - &cell_slab.coords_[0], coords_num, coords_buff, coords_buff_offset); + &cell_slab.coords_[0], coords_num, dim_idx, buffers, offsets); else fill_dense_coords_col_slab( - &cell_slab.coords_[0], coords_num, coords_buff, coords_buff_offset); + &cell_slab.coords_[0], coords_num, dim_idx, buffers, offsets); + ++iter; } @@ -1470,48 +1491,99 @@ Status Reader::fill_dense_coords_row_col( template void Reader::fill_dense_coords_row_slab( - const T* start, uint64_t num, void* buff, uint64_t* offset) const { + const T* start, + uint64_t num, + const std::vector& dim_idx, + const std::vector& buffers, + std::vector* offsets) const { // For easy reference auto dim_num = array_schema_->dim_num(); - assert(dim_num > 0); - auto c_buff = (char*)buff; - - // Fill coordinates - for (uint64_t i = 0; i < num; ++i) { - // First dim-1 dimensions are copied as they are - if (dim_num > 1) { - auto bytes_to_copy = (dim_num - 1) * sizeof(T); - std::memcpy(c_buff + *offset, start, bytes_to_copy); - *offset += bytes_to_copy; - } - // Last dimension is incremented by `i` - auto new_coord = start[dim_num - 1] + i; - std::memcpy(c_buff + *offset, &new_coord, sizeof(T)); - *offset += sizeof(T); + // Special zipped coordinates + if (dim_idx.size() == 1 && dim_idx[0] == dim_num) { + auto c_buff = (char*)buffers[0]->buffer_; + auto offset = &(*offsets)[0]; + + // Fill coordinates + for (uint64_t i = 0; i < num; ++i) { + // First dim-1 dimensions are copied as they are + if (dim_num > 1) { + auto bytes_to_copy = (dim_num - 1) * sizeof(T); + std::memcpy(c_buff + *offset, start, bytes_to_copy); + *offset += bytes_to_copy; + } + + // Last dimension is incremented by `i` + auto new_coord = start[dim_num - 1] + i; + std::memcpy(c_buff + *offset, &new_coord, sizeof(T)); + *offset += sizeof(T); + } + } else { // Set of separate coordinate buffers + for (uint64_t i = 0; i < num; ++i) { + for (size_t b = 0; b < buffers.size(); ++b) { + auto c_buff = (char*)buffers[b]->buffer_; + auto offset = &(*offsets)[b]; + + // First dim-1 dimensions are copied as they are + if (dim_num > 1 && dim_idx[b] < dim_num - 1) { + std::memcpy(c_buff + *offset, &start[dim_idx[b]], sizeof(T)); + *offset += sizeof(T); + } else { + // Last dimension is incremented by `i` + auto new_coord = start[dim_num - 1] + i; + std::memcpy(c_buff + *offset, &new_coord, sizeof(T)); + *offset += sizeof(T); + } + } + } } } template void Reader::fill_dense_coords_col_slab( - const T* start, uint64_t num, void* buff, uint64_t* offset) const { + const T* start, + uint64_t num, + const std::vector& dim_idx, + const std::vector& buffers, + std::vector* offsets) const { // For easy reference auto dim_num = array_schema_->dim_num(); - assert(dim_num > 0); - auto c_buff = (char*)buff; - - // Fill coordinates - for (uint64_t i = 0; i < num; ++i) { - // First dimension is incremented by `i` - auto new_coord = start[0] + i; - std::memcpy(c_buff + *offset, &new_coord, sizeof(T)); - *offset += sizeof(T); - - // Last dim-1 dimensions are copied as they are - if (dim_num > 1) { - auto bytes_to_copy = (dim_num - 1) * sizeof(T); - std::memcpy(c_buff + *offset, &start[1], bytes_to_copy); - *offset += bytes_to_copy; + + // Special zipped coordinates + if (dim_idx.size() == 1 && dim_idx[0] == dim_num) { + auto c_buff = (char*)buffers[0]->buffer_; + auto offset = &(*offsets)[0]; + + // Fill coordinates + for (uint64_t i = 0; i < num; ++i) { + // First dimension is incremented by `i` + auto new_coord = start[0] + i; + std::memcpy(c_buff + *offset, &new_coord, sizeof(T)); + *offset += sizeof(T); + + // Last dim-1 dimensions are copied as they are + if (dim_num > 1) { + auto bytes_to_copy = (dim_num - 1) * sizeof(T); + std::memcpy(c_buff + *offset, &start[1], bytes_to_copy); + *offset += bytes_to_copy; + } + } + } else { // Separate coordinate buffers + for (uint64_t i = 0; i < num; ++i) { + for (size_t b = 0; b < buffers.size(); ++b) { + auto c_buff = (char*)buffers[b]->buffer_; + auto offset = &(*offsets)[b]; + + // First dimension is incremented by `i` + if (dim_idx[b] == 0) { + auto new_coord = start[0] + i; + std::memcpy(c_buff + *offset, &new_coord, sizeof(T)); + *offset += sizeof(T); + } else { // Last dim-1 dimensions are copied as they are + std::memcpy(c_buff + *offset, &start[dim_idx[b]], sizeof(T)); + *offset += sizeof(T); + } + } } } } @@ -1615,7 +1687,21 @@ Status Reader::get_all_result_coords( } bool Reader::has_coords() const { - return attr_buffers_.find(constants::coords) != attr_buffers_.end(); + for (const auto& it : buffers_) { + if (it.first == constants::coords || array_schema_->is_dim(it.first)) + return true; + } + + return false; +} + +bool Reader::has_separate_coords() const { + for (const auto& it : buffers_) { + if (array_schema_->is_dim(it.first)) + return true; + } + + return false; } Status Reader::init_read_state() { @@ -1644,7 +1730,7 @@ Status Reader::init_read_state() { read_state_.unsplittable_ = false; // Set result size budget - for (const auto& a : attr_buffers_) { + for (const auto& a : buffers_) { auto attr_name = a.first; auto buffer_size = a.second.buffer_size_; auto buffer_var_size = a.second.buffer_var_size_; @@ -1879,7 +1965,7 @@ Status Reader::read_tiles( } void Reader::reset_buffer_sizes() { - for (auto& it : attr_buffers_) { + for (auto& it : buffers_) { *(it.second.buffer_size_) = it.second.original_buffer_size_; if (it.second.buffer_var_size_ != nullptr) *(it.second.buffer_var_size_) = it.second.original_buffer_var_size_; @@ -1932,8 +2018,8 @@ Status Reader::sparse_read() { uint64_t stride = UINT64_MAX; - // Copy coordinates - if (has_coords()) + // Copy zipped coordinates + if (buffers_.find(constants::coords) != buffers_.end()) RETURN_CANCEL_OR_ERROR( copy_cells(constants::coords, stride, result_cell_slabs)); @@ -1941,16 +2027,17 @@ Status Reader::sparse_read() { erase_coord_tiles(&sparse_result_tiles); // Copy cells - for (const auto& attr : attributes_) { + for (const auto& it : buffers_) { + const auto& name = it.first; if (read_state_.overflowed_) break; - if (attr == constants::coords) + if (name == constants::coords) continue; - RETURN_CANCEL_OR_ERROR(read_tiles(attr, result_tiles)); - RETURN_CANCEL_OR_ERROR(unfilter_tiles(attr, result_tiles)); - RETURN_CANCEL_OR_ERROR(copy_cells(attr, stride, result_cell_slabs)); - clear_tiles(attr, result_tiles); + RETURN_CANCEL_OR_ERROR(read_tiles(name, result_tiles)); + RETURN_CANCEL_OR_ERROR(unfilter_tiles(name, result_tiles)); + RETURN_CANCEL_OR_ERROR(copy_cells(name, stride, result_cell_slabs)); + clear_tiles(name, result_tiles); } return Status::Ok(); @@ -1959,11 +2046,11 @@ Status Reader::sparse_read() { } void Reader::zero_out_buffer_sizes() { - for (auto& attr_buffer : attr_buffers_) { - if (attr_buffer.second.buffer_size_ != nullptr) - *(attr_buffer.second.buffer_size_) = 0; - if (attr_buffer.second.buffer_var_size_ != nullptr) - *(attr_buffer.second.buffer_var_size_) = 0; + for (auto& buffer : buffers_) { + if (buffer.second.buffer_size_ != nullptr) + *(buffer.second.buffer_size_) = 0; + if (buffer.second.buffer_var_size_ != nullptr) + *(buffer.second.buffer_var_size_) = 0; } } diff --git a/tiledb/sm/query/reader.h b/tiledb/sm/query/reader.h index 2ba1103b4f2..2fca4ddd813 100644 --- a/tiledb/sm/query/reader.h +++ b/tiledb/sm/query/reader.h @@ -163,11 +163,8 @@ class Reader { /** Returns the array schema. */ const ArraySchema* array_schema() const; - /** - * Return list of attribtues for query - * @return vector of attributes for query - */ - std::vector attributes() const; + /** Returns the names of the buffers set by the user for the read query. */ + std::vector buffer_names() const; /** Fetch QueryBuffer for the input attribute/dimension. */ QueryBuffer buffer(const std::string& name) const; @@ -180,31 +177,30 @@ class Reader { bool incomplete() const; /** - * Retrieves the buffer of a fixed-sized attribute. + * Retrieves the buffer of a fixed-sized attribute/dimension. * - * @param attribute The buffer attribute. + * @param name The attribute/dimension name. * @param buffer The buffer to be retrieved. * @param buffer_size A pointer to the buffer size to be retrieved. * @return Status */ Status get_buffer( - const std::string& attribute, - void** buffer, - uint64_t** buffer_size) const; + const std::string& name, void** buffer, uint64_t** buffer_size) const; /** - * Retrieves the offsets and values buffers of a var-sized attribute. + * Retrieves the offsets and values buffers of a var-sized + * attribute/dimension. * - * @param attribute The buffer attribute. + * @param name The attribute/dimension name. * @param buffer_off The offsets buffer to be retrieved. * @param buffer_off_size A pointer to the offsets buffer size to be - * retrieved. + * retrieved. * @param buffer_val The values buffer to be retrieved. * @param buffer_val_size A pointer to the values buffer size to be retrieved. * @return Status */ Status get_buffer( - const std::string& attribute, + const std::string& name, uint64_t** buffer_off, uint64_t** buffer_off_size, void** buffer_val, @@ -244,9 +240,9 @@ class Reader { void set_array_schema(const ArraySchema* array_schema); /** - * Sets the buffer for a fixed-sized attribute. + * Sets the buffer for a fixed-sized attribute/dimension. * - * @param attribute The attribute to set the buffer for. + * @param name The attribute/dimension to set the buffer for. * @param buffer The buffer that will hold the data to be read. * @param buffer_size This initially contains the allocated * size of `buffer`, but after the termination of the function @@ -255,15 +251,15 @@ class Reader { * @return Status */ Status set_buffer( - const std::string& attribute, + const std::string& name, void* buffer, uint64_t* buffer_size, bool check_null_buffers = true); /** - * Sets the buffer for a var-sized attribute. + * Sets the buffer for a var-sized attribute/dimension. * - * @param attribute The attribute to set the buffer for. + * @param name The name to set the buffer for. * @param buffer_off The buffer that will hold the data to be read. * This buffer holds the starting offsets of each cell value in * `buffer_val`. @@ -281,7 +277,7 @@ class Reader { * @return Status */ Status set_buffer( - const std::string& attribute, + const std::string& name, uint64_t* buffer_off, uint64_t* buffer_off_size, void* buffer_val, @@ -460,11 +456,12 @@ class Reader { /** The array schema. */ const ArraySchema* array_schema_; - /** The names of the attributes involved in the query. */ - std::vector attributes_; - - /** Maps attribute names to their buffers. */ - std::unordered_map attr_buffers_; + /** + * Maps attribute/dimension names to their buffers. + * `TILEDB_COORDS` may be used for the special zipped coordinates + * buffer. + * */ + std::unordered_map buffers_; /** The fragment metadata. */ std::vector fragment_metadata_; @@ -751,48 +748,58 @@ class Reader { Status fill_dense_coords(const Subarray& subarray); /** - * Fills the coordinate buffer with coordinates. Applicable only to dense + * Fills the coordinate buffers with coordinates. Applicable only to dense * arrays when the user explicitly requests the coordinates to be * materialized. Also applicable only to global order. * * @tparam T The domain type. * @param subarray The input subarray. - * @param coords_buff The coordinates buffer to be filled. - * @param coords_buff_size The size of the coordinates buffer. - * @param coords_buff_offset The offset in the coordinates buffer the filling - * will start from. + * @param dim_idx The dimension indices of the corresponding `buffers`. + * For the special zipped coordinates, `dim_idx`, `buffers` and `offsets` + * contain a single element and `dim_idx` contains `dim_num` as + * the dimension index. + * @param buffers The buffers to copy from. It could be the special + * zipped coordinates or separate coordinate buffers. + * @param offsets The offsets that will be used eventually to update + * the buffer sizes, determining the useful results written in + * the buffers. * @return Status */ template Status fill_dense_coords_global( const Subarray& subarray, - void* coords_buff, - uint64_t coords_buff_size, - uint64_t* coords_buff_offset); + const std::vector& dim_idx, + const std::vector& buffers, + std::vector* offsets); /** - * Fills the coordinate buffer with coordinates. Applicable only to dense + * Fills the coordinate buffers with coordinates. Applicable only to dense * arrays when the user explicitly requests the coordinates to be * materialized. Also applicable only to row-/col-major order. * * @tparam T The domain type. * @param subarray The input subarray. - * @param coords_buff The coordinates buffer to be filled. - * @param coords_buff_size The size of the coordinates buffer. - * @param coords_buff_offset The offset in the coordinates buffer the filling - * will start from. + * @param dim_idx The dimension indices of the corresponding `buffers`. + * For the special zipped coordinates, `dim_idx`, `buffers` and `offsets` + * contain a single element and `dim_idx` contains `dim_num` as + * the dimension index. + * @param buffers The buffers to copy from. It could be the special + * zipped coordinates or separate coordinate buffers. + * @param offsets The offsets that will be used eventually to update + * the buffer sizes, determining the useful results written in + * the buffers. * @return Status */ template Status fill_dense_coords_row_col( const Subarray& subarray, - void* coords_buff, - uint64_t coords_buff_size, - uint64_t* coords_buff_offset); + const std::vector& dim_idx, + const std::vector& buffers, + std::vector* offsets); /** - * Fills coordinates in the input buffer for a particular cell slab, following - * a row-major layout. For instance, if the starting coordinate are + * Fills coordinates in the input buffers for a particular cell slab, + * following a row-major layout. For instance, if the starting coordinate are * [3, 1] and the number of coords to be written is 3, this function will * write to the input buffer (starting at the input offset) coordinates * [3, 1], [3, 2], and [3, 3]. @@ -800,16 +807,27 @@ class Reader { * @tparam T The domain type. * @param start The starting coordinates in the slab. * @param num The number of coords to be written. - * @param buff The buffer to write the coordinates into. - * @param offset The offset in `buff` where the write will begin. + * @param dim_idx The dimension indices of the corresponding `buffers`. + * For the special zipped coordinates, `dim_idx`, `buffers` and `offsets` + * contain a single element and `dim_idx` contains `dim_num` as + * the dimension index. + * @param buffers The buffers to copy from. It could be the special + * zipped coordinates or separate coordinate buffers. + * @param offsets The offsets that will be used eventually to update + * the buffer sizes, determining the useful results written in + * the buffers. */ template void fill_dense_coords_row_slab( - const T* start, uint64_t num, void* buff, uint64_t* offset) const; + const T* start, + uint64_t num, + const std::vector& dim_idx, + const std::vector& buffers, + std::vector* offsets) const; /** - * Fills coordinates in the input buffer for a particular cell slab, following - * a col-major layout. For instance, if the starting coordinate are + * Fills coordinates in the input buffers for a particular cell slab, + * following a col-major layout. For instance, if the starting coordinate are * [3, 1] and the number of coords to be written is 3, this function will * write to the input buffer (starting at the input offset) coordinates * [4, 1], [5, 1], and [6, 1]. @@ -817,12 +835,23 @@ class Reader { * @tparam T The domain type. * @param start The starting coordinates in the slab. * @param num The number of coords to be written. - * @param buff The buffer to write the coordinates into. - * @param offset The offset in `buff` where the write will begin. + * @param dim_idx The dimension indices of the corresponding `buffers`. + * For the special zipped coordinates, `dim_idx`, `buffers` and `offsets` + * contain a single element and `dim_idx` contains `dim_num` as + * the dimension index. + * @param buffers The buffers to copy from. It could be the special + * zipped coordinates or separate coordinate buffers. + * @param offsets The offsets that will be used eventually to update + * the buffer sizes, determining the useful results written in + * the buffers. */ template void fill_dense_coords_col_slab( - const T* start, uint64_t num, void* buff, uint64_t* offset) const; + const T* start, + uint64_t num, + const std::vector& dim_idx, + const std::vector& buffers, + std::vector* offsets) const; /** * Filters the tiles on a particular attribute/dimension from all input @@ -862,6 +891,12 @@ class Reader { /** Returns `true` if the coordinates are included in the attributes. */ bool has_coords() const; + /** + * Returns `true` if a coordinate buffer for a separate dimension + * has been set. + */ + bool has_separate_coords() const; + /** Initializes the read state. */ Status init_read_state(); diff --git a/tiledb/sm/query/writer.cc b/tiledb/sm/query/writer.cc index 008dd870765..8ac6eb67e8d 100644 --- a/tiledb/sm/query/writer.cc +++ b/tiledb/sm/query/writer.cc @@ -123,7 +123,7 @@ const ArraySchema* Writer::array_schema() const { std::vector Writer::buffer_names() const { std::vector ret; - // Add to the buffers names the attributes, as well as the dimensions only if + // Add to the buffer names the attributes, as well as the dimensions only if // coords_buffer_ has not been set for (const auto& it : buffers_) { if (!array_schema_->is_dim(it.first) || (!coords_buffer_)) @@ -326,11 +326,11 @@ Status Writer::set_buffer( std::string("Cannot set buffer for new attribute/dimension '") + name + "' after initialization")); - // Check if zipped coordinates buffer is set + // Check if zipped coordinates coexist with separate coordinate buffers if (is_dim && coords_buffer_ != nullptr) return LOG_STATUS(Status::WriterError( - std::string("Cannot set separate coordinates buffer after having " - "set the zipped coordinates buffer"))); + std::string("Cannot set separate coordinate buffers after " + "having set the zipped coordinates buffer"))); if (is_dim) { // Check number of coordinates @@ -591,17 +591,15 @@ Status Writer::check_coord_dups(const std::vector& cell_pos) const { } // Found duplicate - if (found_dup) { - return LOG_STATUS( - Status::WriterError("Duplicate coordinates are not allowed")); - } + if (found_dup) + return Status::WriterError("Duplicate coordinates are not allowed"); return Status::Ok(); }); // Check all statuses for (auto& st : statuses) - RETURN_NOT_OK(st); + RETURN_NOT_OK_ELSE(st, LOG_STATUS(st)); return Status::Ok(); STATS_FUNC_OUT(writer_check_coord_dups); @@ -647,19 +645,15 @@ Status Writer::check_coord_dups() const { } // Found duplicate - if (found_dup) { - return LOG_STATUS( - Status::WriterError("Duplicate coordinates are not allowed")); - } + if (found_dup) + return Status::WriterError("Duplicate coordinates are not allowed"); return Status::Ok(); }); // Check all statuses - for (auto& st : statuses) { - if (!st.ok()) - return st; - } + for (auto& st : statuses) + RETURN_NOT_OK_ELSE(st, LOG_STATUS(st)); return Status::Ok(); diff --git a/tiledb/sm/serialization/query.cc b/tiledb/sm/serialization/query.cc index 8e2c8331346..b8f14ad8fae 100644 --- a/tiledb/sm/serialization/query.cc +++ b/tiledb/sm/serialization/query.cc @@ -171,23 +171,23 @@ Status subarray_partitioner_to_capnp( RETURN_NOT_OK( subarray_to_capnp(schema, partitioner.subarray(), &subarray_builder)); - // Per-attr mem budgets - const auto* attr_budgets = partitioner.get_attr_result_budgets(); - if (!attr_budgets->empty()) { - auto mem_budgets_builder = builder->initBudget(attr_budgets->size()); - size_t attr_idx = 0; - for (const auto& pair : (*attr_budgets)) { - const std::string& attr_name = pair.first; - auto budget_builder = mem_budgets_builder[attr_idx]; - budget_builder.setAttribute(attr_name); - if (attr_name == constants::coords || !schema->var_size(attr_name)) { + // Per-attr/dim mem budgets + const auto* budgets = partitioner.get_result_budgets(); + if (!budgets->empty()) { + auto mem_budgets_builder = builder->initBudget(budgets->size()); + size_t idx = 0; + for (const auto& pair : (*budgets)) { + const std::string& name = pair.first; + auto budget_builder = mem_budgets_builder[idx]; + budget_builder.setAttribute(name); + if (name == constants::coords || !schema->var_size(name)) { budget_builder.setOffsetBytes(0); budget_builder.setDataBytes(pair.second.size_fixed_); } else { budget_builder.setOffsetBytes(pair.second.size_fixed_); budget_builder.setDataBytes(pair.second.size_var_); } - attr_idx++; + idx++; } } diff --git a/tiledb/sm/storage_manager/consolidator.cc b/tiledb/sm/storage_manager/consolidator.cc index 81db291de39..b580dba989f 100644 --- a/tiledb/sm/storage_manager/consolidator.cc +++ b/tiledb/sm/storage_manager/consolidator.cc @@ -344,11 +344,9 @@ Status Consolidator::consolidate( this->all_sparse(to_consolidate, 0, to_consolidate.size() - 1); // Prepare buffers - void** buffers; - uint64_t* buffer_sizes; - unsigned int buffer_num; - Status st = create_buffers( - array_schema, all_sparse, &buffers, &buffer_sizes, &buffer_num); + std::vector buffers; + std::vector buffer_sizes; + Status st = create_buffers(array_schema, all_sparse, &buffers, &buffer_sizes); if (!st.ok()) { array_for_reads.close(); array_for_writes.close(); @@ -363,24 +361,24 @@ Status Consolidator::consolidate( &array_for_writes, all_sparse, union_non_empty_domains, - buffers, - buffer_sizes, &query_r, &query_w, new_fragment_uri); if (!st.ok()) { array_for_reads.close(); array_for_writes.close(); - clean_up(buffer_num, buffers, buffer_sizes, query_r, query_w); + delete query_r; + delete query_w; return st; } // Read from one array and write to the other - st = copy_array(query_r, query_w); + st = copy_array(query_r, query_w, &buffers, &buffer_sizes, all_sparse); if (!st.ok()) { array_for_reads.close(); array_for_writes.close(); - clean_up(buffer_num, buffers, buffer_sizes, query_r, query_w); + delete query_r; + delete query_w; return st; } @@ -389,7 +387,8 @@ Status Consolidator::consolidate( if (!st.ok()) { array_for_writes.close(); storage_manager_->vfs()->remove_dir(*new_fragment_uri); - clean_up(buffer_num, buffers, buffer_sizes, query_r, query_w); + delete query_r; + delete query_w; return st; } @@ -397,7 +396,8 @@ Status Consolidator::consolidate( st = query_w->finalize(); if (!st.ok()) { array_for_writes.close(); - clean_up(buffer_num, buffers, buffer_sizes, query_r, query_w); + delete query_r; + delete query_w; bool is_dir = false; auto st2 = storage_manager_->vfs()->is_dir(*new_fragment_uri, &is_dir); (void)st2; // Perhaps report this once we support an error stack @@ -409,7 +409,8 @@ Status Consolidator::consolidate( // Close array st = array_for_writes.close(); if (!st.ok()) { - clean_up(buffer_num, buffers, buffer_sizes, query_r, query_w); + delete query_r; + delete query_w; bool is_dir = false; auto st2 = storage_manager_->vfs()->is_dir(*new_fragment_uri, &is_dir); (void)st2; // Perhaps report this once we support an error stack @@ -426,7 +427,8 @@ Status Consolidator::consolidate( st = delete_fragment_metadata(array_uri, to_delete); if (!st.ok()) { delete_fragments(to_delete); - clean_up(buffer_num, buffers, buffer_sizes, query_r, query_w); + delete query_r; + delete query_w; return st; } @@ -434,77 +436,65 @@ Status Consolidator::consolidate( st = delete_fragments(to_delete); // Clean up - clean_up(buffer_num, buffers, buffer_sizes, query_r, query_w); + delete query_r; + delete query_w; return st; } -Status Consolidator::copy_array(Query* query_r, Query* query_w) { +Status Consolidator::copy_array( + Query* query_r, + Query* query_w, + std::vector* buffers, + std::vector* buffer_sizes, + bool sparse_mode) { + // Set the read query buffers outside the repeated submissions. + // The Reader will reset the query buffer sizes to the original + // sizes, not the potentially smaller sizes of the results after + // the query submission. + RETURN_NOT_OK(set_query_buffers(query_r, sparse_mode, buffers, buffer_sizes)); + do { + // READ RETURN_NOT_OK(query_r->submit()); + + // Set explicitly the write query buffers, as the sizes may have + // been altered by the read query. + RETURN_NOT_OK( + set_query_buffers(query_w, sparse_mode, buffers, buffer_sizes)); + + // WRITE RETURN_NOT_OK(query_w->submit()); } while (query_r->status() == QueryStatus::INCOMPLETE); return Status::Ok(); } -void Consolidator::clean_up( - unsigned buffer_num, - void** buffers, - uint64_t* buffer_sizes, - Query* query_r, - Query* query_w) const { - free_buffers(buffer_num, buffers, buffer_sizes); - delete query_r; - delete query_w; -} - Status Consolidator::create_buffers( const ArraySchema* array_schema, bool sparse_mode, - void*** buffers, - uint64_t** buffer_sizes, - unsigned int* buffer_num) { + std::vector* buffers, + std::vector* buffer_sizes) { // For easy reference auto attribute_num = array_schema->attribute_num(); auto sparse = !array_schema->dense() || sparse_mode; // Calculate number of buffers - *buffer_num = 0; + size_t buffer_num = 0; for (unsigned int i = 0; i < attribute_num; ++i) - *buffer_num += (array_schema->attributes()[i]->var_size()) ? 2 : 1; - *buffer_num += (sparse) ? 1 : 0; + buffer_num += (array_schema->attributes()[i]->var_size()) ? 2 : 1; + buffer_num += (sparse) ? array_schema->dim_num() : 0; // Create buffers - *buffers = (void**)std::malloc(*buffer_num * sizeof(void*)); - if (*buffers == nullptr) { - return LOG_STATUS(Status::ConsolidatorError( - "Cannot create consolidation buffers; Memory allocation failed")); - } - *buffer_sizes = new uint64_t[*buffer_num]; - if (*buffer_sizes == nullptr) { - return LOG_STATUS(Status::ConsolidatorError( - "Cannot create consolidation buffer sizes; Memory allocation failed")); - } + buffers->resize(buffer_num); + buffer_sizes->resize(buffer_num); // Allocate space for each buffer - bool error = false; - for (unsigned int i = 0; i < *buffer_num; ++i) { - (*buffers)[i] = std::malloc(config_.buffer_size_); - if ((*buffers)[i] == nullptr) // The loop should continue to - error = true; // allocate nullptr to each buffer + for (unsigned int i = 0; i < buffer_num; ++i) { + (*buffers)[i].resize(config_.buffer_size_); (*buffer_sizes)[i] = config_.buffer_size_; } - // Clean up upon error - if (error) { - free_buffers(*buffer_num, *buffers, *buffer_sizes); - *buffers = nullptr; - *buffer_sizes = nullptr; - return LOG_STATUS(Status::ConsolidatorError( - "Cannot create consolidation buffers; Memory allocation failed")); - } - // Success return Status::Ok(); } @@ -514,8 +504,6 @@ Status Consolidator::create_queries( Array* array_for_writes, bool sparse_mode, const NDRange& subarray, - void** buffers, - uint64_t* buffer_sizes, Query** query_r, Query** query_w, URI* new_fragment_uri) { @@ -526,8 +514,6 @@ Status Consolidator::create_queries( // Create read query *query_r = new Query(storage_manager_, array_for_reads); RETURN_NOT_OK((*query_r)->set_layout(Layout::GLOBAL_ORDER)); - RETURN_NOT_OK( - set_query_buffers(*query_r, sparse_mode, buffers, buffer_sizes)); RETURN_NOT_OK((*query_r)->set_subarray_unsafe(subarray)); if (array_for_reads->array_schema()->dense() && sparse_mode) RETURN_NOT_OK((*query_r)->set_sparse_mode(true)); @@ -542,8 +528,6 @@ Status Consolidator::create_queries( RETURN_NOT_OK((*query_w)->set_layout(Layout::GLOBAL_ORDER)); if (array_for_reads->array_schema()->dense()) RETURN_NOT_OK((*query_w)->set_subarray_unsafe(subarray)); - RETURN_NOT_OK( - set_query_buffers(*query_w, sparse_mode, buffers, buffer_sizes)); return Status::Ok(); } @@ -615,15 +599,6 @@ Status Consolidator::delete_overwritten_fragments( return Status::Ok(); } -void Consolidator::free_buffers( - unsigned int buffer_num, void** buffers, uint64_t* buffer_sizes) const { - for (unsigned int i = 0; i < buffer_num; ++i) { - std::free(buffers[i]); - } - std::free(buffers); - delete[] buffer_sizes; -} - Status Consolidator::compute_next_to_consolidate( const ArraySchema* array_schema, const std::vector& fragments, @@ -770,29 +745,36 @@ Status Consolidator::compute_new_fragment_uri( Status Consolidator::set_query_buffers( Query* query, bool sparse_mode, - void** buffers, - uint64_t* buffer_sizes) const { - auto dense = query->array_schema()->dense(); - auto attributes = query->array_schema()->attributes(); + std::vector* buffers, + std::vector* buffer_sizes) const { + auto array_schema = query->array_schema(); + auto dim_num = array_schema->dim_num(); + auto dense = array_schema->dense(); + auto attributes = array_schema->attributes(); unsigned bid = 0; for (const auto& attr : attributes) { if (!attr->var_size()) { - RETURN_NOT_OK( - query->set_buffer(attr->name(), buffers[bid], &buffer_sizes[bid])); + RETURN_NOT_OK(query->set_buffer( + attr->name(), (void*)&(*buffers)[bid][0], &(*buffer_sizes)[bid])); ++bid; } else { RETURN_NOT_OK(query->set_buffer( attr->name(), - (uint64_t*)buffers[bid], - &buffer_sizes[bid], - buffers[bid + 1], - &buffer_sizes[bid + 1])); + (uint64_t*)&(*buffers)[bid][0], + &(*buffer_sizes)[bid], + (void*)&(*buffers)[bid + 1][0], + &(*buffer_sizes)[bid + 1])); bid += 2; } } - if (!dense || sparse_mode) - RETURN_NOT_OK( - query->set_buffer(constants::coords, buffers[bid], &buffer_sizes[bid])); + if (!dense || sparse_mode) { + for (unsigned d = 0; d < dim_num; ++d) { + auto dim_name = array_schema->dimension(d)->name(); + RETURN_NOT_OK(query->set_buffer( + dim_name, (void*)&(*buffers)[bid][0], &(*buffer_sizes)[bid])); + ++bid; + } + } return Status::Ok(); } diff --git a/tiledb/sm/storage_manager/consolidator.h b/tiledb/sm/storage_manager/consolidator.h index fe80ef3fabd..57ae8fe5aa2 100644 --- a/tiledb/sm/storage_manager/consolidator.h +++ b/tiledb/sm/storage_manager/consolidator.h @@ -36,6 +36,7 @@ #include "tiledb/sm/array/array.h" #include "tiledb/sm/filesystem/filelock.h" #include "tiledb/sm/misc/status.h" +#include "tiledb/sm/misc/types.h" #include "tiledb/sm/storage_manager/open_array.h" #include @@ -261,20 +262,18 @@ class Consolidator { /** * Copies the array by reading from the fragments to be consolidated * (with `query_r`) and writing to the new fragment (with `query_w`). + * It also appropriately sets the query buffers. * * @param query_r The read query. * @param query_w The write query. * @return Status */ - Status copy_array(Query* query_r, Query* query_w); - - /** Cleans up the inputs. */ - void clean_up( - unsigned buffer_num, - void** buffers, - uint64_t* buffer_sizes, + Status copy_array( Query* query_r, - Query* query_w) const; + Query* query_w, + std::vector* buffers, + std::vector* buffer_sizes, + bool sparse_mode); /** * Creates the buffers that will be used upon reading the input fragments and @@ -286,15 +285,13 @@ class Consolidator { * in special sparse mode. This is ignored for sparse arrays. * @param buffers The buffers to be created. * @param buffer_sizes The corresponding buffer sizes. - * @param buffer_num The number of buffers to be retrieved. * @return Status */ Status create_buffers( const ArraySchema* array_schema, bool sparse_mode, - void*** buffers, - uint64_t** buffer_sizes, - unsigned int* buffer_num); + std::vector* buffers, + std::vector* buffer_sizes); /** * Creates the queries needed for consolidation. It also retrieves @@ -309,8 +306,6 @@ class Consolidator { * in special sparse mode. This is ignored for sparse arrays. * @param subarray The subarray to read from (the fragments to consolidate) * and write to (the new fragment). - * @param buffers The buffers to be passed in the queries. - * @param buffer_sizes The corresponding buffer sizes. * @param query_r This query reads from the fragments to be consolidated. * @param query_w This query writes to the new consolidated fragment. * @param new_fragment_uri The URI of the new fragment to be created. @@ -321,8 +316,6 @@ class Consolidator { Array* array_for_writes, bool sparse_mode, const NDRange& subarray, - void** buffers, - uint64_t* buffer_sizes, Query** query_r, Query** query_w, URI* new_fragment_uri); @@ -363,16 +356,6 @@ class Consolidator { Status delete_overwritten_fragments( const ArraySchema* array_schema, std::vector* fragments); - /** - * Frees the input buffers. - * - * @param buffer_num The number of buffers. - * @param buffers The buffers to be freed. - * @param buffer_sizes The corresponding buffer sizes. - */ - void free_buffers( - unsigned int buffer_num, void** buffers, uint64_t* buffer_sizes) const; - /** * Based on the input fragment info, this algorithm decides the (sorted) list * of fragments to be consolidated in the next consolidation step. @@ -415,8 +398,8 @@ class Consolidator { Status set_query_buffers( Query* query, bool sparse_mode, - void** buffers, - uint64_t* buffer_sizes) const; + std::vector* buffers, + std::vector* buffer_sizes) const; /** * Updates the `fragment_info` by removing `to_consolidate` and diff --git a/tiledb/sm/subarray/subarray.cc b/tiledb/sm/subarray/subarray.cc index 212a5ac06ff..2813f3867aa 100644 --- a/tiledb/sm/subarray/subarray.cc +++ b/tiledb/sm/subarray/subarray.cc @@ -369,122 +369,146 @@ Layout Subarray::layout() const { return layout_; } -Status Subarray::get_est_result_size(const char* attr_name, uint64_t* size) { - // Check attribute name - if (attr_name == nullptr) - return LOG_STATUS(Status::SubarrayError( - "Cannot get estimated result size; Invalid attribute")); - - // Check attribute - auto attr = array_->array_schema()->attribute(attr_name); - if (attr_name != constants::coords && attr == nullptr) - return LOG_STATUS(Status::SubarrayError( - "Cannot get estimated result size; Invalid attribute")); +Status Subarray::get_est_result_size(const char* name, uint64_t* size) { + // Check attribute/dimension name + if (name == nullptr) + return LOG_STATUS( + Status::SubarrayError("Cannot get estimated result size; " + "Attribute/Dimension name cannot be null")); // Check size pointer if (size == nullptr) return LOG_STATUS(Status::SubarrayError( - "Cannot get estimated result size; Invalid size input")); + "Cannot get estimated result size; Input size cannot be null")); + + // Check if name is attribute or dimension + auto array_schema = array_->array_schema(); + bool is_dim = array_schema->is_dim(name); + bool is_attr = array_schema->is_attr(name); - // Check if the attribute is fixed-sized - if (attr_name != constants::coords && attr->var_size()) + // Check if attribute/dimension exists + if (name != constants::coords && !is_dim && !is_attr) return LOG_STATUS(Status::SubarrayError( - "Cannot get estimated result size; Attribute must be fixed-sized")); + std::string("Cannot get estimated result size; Attribute/Dimension '") + + name + "' does not exist")); + + // Check if the attribute/dimension is fixed-sized + if (array_schema->var_size(name)) + return LOG_STATUS( + Status::SubarrayError("Cannot get estimated result size; " + "Attribute/Dimension must be fixed-sized")); // Compute tile overlap for each fragment RETURN_NOT_OK(compute_est_result_size()); - *size = (uint64_t)ceil(est_result_size_[attr_name].size_fixed_); + *size = (uint64_t)ceil(est_result_size_[name].size_fixed_); return Status::Ok(); } Status Subarray::get_est_result_size( - const char* attr_name, uint64_t* size_off, uint64_t* size_val) { - // Check attribute name - if (attr_name == nullptr) - return LOG_STATUS(Status::SubarrayError( - "Cannot get estimated result size; Invalid attribute")); - - // Check attribute - auto attr = array_->array_schema()->attribute(attr_name); - if (attr == nullptr) - return LOG_STATUS(Status::SubarrayError( - "Cannot get estimated result size; Invalid attribute")); + const char* name, uint64_t* size_off, uint64_t* size_val) { + // Check attribute/dimension name + if (name == nullptr) + return LOG_STATUS( + Status::SubarrayError("Cannot get estimated result size; " + "Attribute/Dimension name cannot be null")); // Check size pointer if (size_off == nullptr || size_val == nullptr) return LOG_STATUS(Status::SubarrayError( - "Cannot get estimated result size; Invalid size input")); + "Cannot get estimated result size; Input sizes cannot be null")); - // Check if the attribute is var-sized - if (!attr->var_size()) + // Check if name is attribute or dimension + auto array_schema = array_->array_schema(); + bool is_dim = array_schema->is_dim(name); + bool is_attr = array_schema->is_attr(name); + + // Check if attribute/dimension exists + if (name != constants::coords && !is_dim && !is_attr) return LOG_STATUS(Status::SubarrayError( - "Cannot get estimated result size; Attribute must be var-sized")); + std::string("Cannot get estimated result size; Attribute/Dimension '") + + name + "' does not exist")); + + // Check if the attribute/dimension is var-sized + if (!array_schema->var_size(name)) + return LOG_STATUS( + Status::SubarrayError("Cannot get estimated result size; " + "Attribute/Dimension must be var-sized")); // Compute tile overlap for each fragment RETURN_NOT_OK(compute_est_result_size()); - *size_off = (uint64_t)ceil(est_result_size_[attr_name].size_fixed_); - *size_val = (uint64_t)ceil(est_result_size_[attr_name].size_var_); + *size_off = (uint64_t)ceil(est_result_size_[name].size_fixed_); + *size_val = (uint64_t)ceil(est_result_size_[name].size_var_); return Status::Ok(); } -Status Subarray::get_max_memory_size(const char* attr_name, uint64_t* size) { - // Check attribute name - if (attr_name == nullptr) - return LOG_STATUS( - Status::SubarrayError("Cannot get max memory size; Invalid attribute")); - - // Check attribute - auto attr = array_->array_schema()->attribute(attr_name); - if (attr_name != constants::coords && attr == nullptr) - return LOG_STATUS( - Status::SubarrayError("Cannot get max memory size; Invalid attribute")); +Status Subarray::get_max_memory_size(const char* name, uint64_t* size) { + // Check attribute/dimension name + if (name == nullptr) + return LOG_STATUS(Status::SubarrayError( + "Cannot get max memory size; Attribute/Dimension cannot be null")); // Check size pointer if (size == nullptr) return LOG_STATUS(Status::SubarrayError( - "Cannot get max memory size; Invalid size input")); + "Cannot get max memory size; Inpute size cannot be null")); + + // Check if name is attribute or dimension + auto array_schema = array_->array_schema(); + bool is_dim = array_schema->is_dim(name); + bool is_attr = array_schema->is_attr(name); + + // Check if attribute/dimension exists + if (name != constants::coords && !is_dim && !is_attr) + return LOG_STATUS(Status::SubarrayError( + std::string("Cannot get max memory size; Attribute/Dimension '") + + name + "' does not exist")); - // Check if the attribute is fixed-sized - if (attr_name != constants::coords && attr->var_size()) + // Check if the attribute/dimension is fixed-sized + if (name != constants::coords && array_schema->var_size(name)) return LOG_STATUS(Status::SubarrayError( - "Cannot get max memory size; Attribute must be fixed-sized")); + "Cannot get max memory size; Attribute/Dimension must be fixed-sized")); // Compute tile overlap for each fragment compute_est_result_size(); - *size = (uint64_t)ceil(est_result_size_[attr_name].mem_size_fixed_); + *size = (uint64_t)ceil(est_result_size_[name].mem_size_fixed_); return Status::Ok(); } Status Subarray::get_max_memory_size( - const char* attr_name, uint64_t* size_off, uint64_t* size_val) { - // Check attribute name - if (attr_name == nullptr) - return LOG_STATUS( - Status::SubarrayError("Cannot get max memory size; Invalid attribute")); - - // Check attribute - auto attr = array_->array_schema()->attribute(attr_name); - if (attr == nullptr) - return LOG_STATUS( - Status::SubarrayError("Cannot get max memory size; Invalid attribute")); + const char* name, uint64_t* size_off, uint64_t* size_val) { + // Check attribute/dimension name + if (name == nullptr) + return LOG_STATUS(Status::SubarrayError( + "Cannot get max memory size; Attribute/Dimension cannot be null")); // Check size pointer if (size_off == nullptr || size_val == nullptr) return LOG_STATUS(Status::SubarrayError( - "Cannot get max memory size; Invalid size input")); + "Cannot get max memory size; Input sizes cannot be null")); - // Check if the attribute is var-sized - if (!attr->var_size()) + // Check if name is attribute or dimension + auto array_schema = array_->array_schema(); + bool is_dim = array_schema->is_dim(name); + bool is_attr = array_schema->is_attr(name); + + // Check if attribute/dimension exists + if (name != constants::coords && !is_dim && !is_attr) + return LOG_STATUS(Status::SubarrayError( + std::string("Cannot get max memory size; Attribute/Dimension '") + + name + "' does not exist")); + + // Check if the attribute/dimension is var-sized + if (!array_schema->var_size(name)) return LOG_STATUS(Status::SubarrayError( - "Cannot get max memory size; Attribute must be var-sized")); + "Cannot get max memory size; Attribute/Dimension must be var-sized")); // Compute tile overlap for each fragment compute_est_result_size(); - *size_off = (uint64_t)ceil(est_result_size_[attr_name].mem_size_fixed_); - *size_val = (uint64_t)ceil(est_result_size_[attr_name].mem_size_var_); + *size_off = (uint64_t)ceil(est_result_size_[name].mem_size_fixed_); + *size_val = (uint64_t)ceil(est_result_size_[name].mem_size_var_); return Status::Ok(); } @@ -753,30 +777,43 @@ Status Subarray::compute_est_result_size() { std::mutex mtx; - // Prepare estimated result size vector for all attributes and coords - auto attributes = array_->array_schema()->attributes(); - auto attribute_num = attributes.size(); + // Prepare estimated result size vector for all + // attributes/dimension and zipped coords + auto array_schema = array_->array_schema(); + auto attribute_num = array_schema->attribute_num(); + auto dim_num = array_schema->dim_num(); + auto attributes = array_schema->attributes(); + auto num = attribute_num + dim_num + 1; std::vector est_result_size_vec; - for (unsigned i = 0; i < attribute_num + 1; ++i) + for (unsigned i = 0; i < num; ++i) est_result_size_vec.emplace_back(ResultSize{0.0, 0.0, 0, 0}); // Compute estimated result in parallel over fragments and ranges auto meta = array_->fragment_metadata(); auto range_num = this->range_num(); - auto statuses = parallel_for(0, range_num, [&](uint64_t i) { - for (unsigned a = 0; a < attribute_num + 1; ++a) { - auto attr_name = - (a == attribute_num) ? constants::coords : attributes[a]->name(); - bool var_size = (a == attribute_num) ? false : attributes[a]->var_size(); + // Get attribute and dimension names + std::vector names(num); + for (unsigned i = 0; i < num; ++i) { + if (i < attribute_num) + names[i] = attributes[i]->name(); + else if (i < attribute_num + dim_num) + names[i] = array_schema->domain()->dimension(i - attribute_num)->name(); + else + names[i] = constants::coords; + } + + auto statuses = parallel_for(0, range_num, [&](uint64_t r) { + for (unsigned i = 0; i < num; ++i) { + bool var_size = array_schema->var_size(names[i]); ResultSize result_size; RETURN_NOT_OK( - compute_est_result_size(attr_name, i, var_size, &result_size)); + compute_est_result_size(names[i], r, var_size, &result_size)); std::lock_guard block(mtx); - est_result_size_vec[a].size_fixed_ += result_size.size_fixed_; - est_result_size_vec[a].size_var_ += result_size.size_var_; - est_result_size_vec[a].mem_size_fixed_ += result_size.mem_size_fixed_; - est_result_size_vec[a].mem_size_var_ += result_size.mem_size_var_; + est_result_size_vec[i].size_fixed_ += result_size.size_fixed_; + est_result_size_vec[i].size_var_ += result_size.size_var_; + est_result_size_vec[i].mem_size_fixed_ += result_size.mem_size_fixed_; + est_result_size_vec[i].mem_size_var_ += result_size.mem_size_var_; } return Status::Ok(); }); @@ -793,18 +830,15 @@ Status Subarray::compute_est_result_size() { // Set the estimated result size map est_result_size_.clear(); - for (unsigned a = 0; a < attribute_num + 1; ++a) { - auto attr_name = - (a == attribute_num) ? constants::coords : attributes[a]->name(); - est_result_size_[attr_name] = est_result_size_vec[a]; - } + for (unsigned i = 0; i < num; ++i) + est_result_size_[names[i]] = est_result_size_vec[i]; est_result_size_computed_ = true; return Status::Ok(); } Status Subarray::compute_est_result_size( - const std::string& attr_name, + const std::string& name, uint64_t range_idx, bool var_size, ResultSize* result_size) const { @@ -825,14 +859,13 @@ Status Subarray::compute_est_result_size( for (const auto& tr : overlap.tile_ranges_) { for (uint64_t tid = tr.first; tid <= tr.second; ++tid) { if (!var_size) { - ret.size_fixed_ += meta->tile_size(attr_name, tid); - ret.mem_size_fixed_ += meta->tile_size(attr_name, tid); + ret.size_fixed_ += meta->tile_size(name, tid); + ret.mem_size_fixed_ += meta->tile_size(name, tid); } else { - ret.size_fixed_ += meta->tile_size(attr_name, tid); - RETURN_NOT_OK( - meta->tile_var_size(*encryption_key, attr_name, tid, &size)); + ret.size_fixed_ += meta->tile_size(name, tid); + RETURN_NOT_OK(meta->tile_var_size(*encryption_key, name, tid, &size)); ret.size_var_ += size; - ret.mem_size_fixed_ += meta->tile_size(attr_name, tid); + ret.mem_size_fixed_ += meta->tile_size(name, tid); ret.mem_size_var_ += size; } } @@ -843,14 +876,13 @@ Status Subarray::compute_est_result_size( auto tid = t.first; auto ratio = t.second; if (!var_size) { - ret.size_fixed_ += meta->tile_size(attr_name, tid) * ratio; - ret.mem_size_fixed_ += meta->tile_size(attr_name, tid); + ret.size_fixed_ += meta->tile_size(name, tid) * ratio; + ret.mem_size_fixed_ += meta->tile_size(name, tid); } else { - ret.size_fixed_ += meta->tile_size(attr_name, tid) * ratio; - RETURN_NOT_OK( - meta->tile_var_size(*encryption_key, attr_name, tid, &size)); + ret.size_fixed_ += meta->tile_size(name, tid) * ratio; + RETURN_NOT_OK(meta->tile_var_size(*encryption_key, name, tid, &size)); ret.size_var_ += size * ratio; - ret.mem_size_fixed_ += meta->tile_size(attr_name, tid); + ret.mem_size_fixed_ += meta->tile_size(name, tid); ret.mem_size_var_ += size; } } @@ -865,7 +897,7 @@ Status Subarray::compute_est_result_size( utils::math::safe_mul(cell_num, constants::cell_var_offset_size); } else { max_size_fixed = - utils::math::safe_mul(cell_num, array_schema->cell_size(attr_name)); + utils::math::safe_mul(cell_num, array_schema->cell_size(name)); } ret.size_fixed_ = std::min(ret.size_fixed_, max_size_fixed); ret.size_var_ = std::min(ret.size_var_, max_size_var); diff --git a/tiledb/sm/subarray/subarray.h b/tiledb/sm/subarray/subarray.h index e7ee86e4a75..134a0eeb2ed 100644 --- a/tiledb/sm/subarray/subarray.h +++ b/tiledb/sm/subarray/subarray.h @@ -106,20 +106,26 @@ class Subarray { /* TYPE DEFINITIONS */ /* ********************************* */ - /** Result size (in bytes) for an attribute used for partitioning. */ + /** + * Result size (in bytes) for an attribute/dimension used for + * partitioning. + */ struct ResultSize { - /** Size for fixed-sized attributes or offsets of var-sized attributes. */ + /** Size for fixed-sized attributes/dimensions or offsets of var-sized + * attributes/dimensions. + */ double size_fixed_; - /** Size of values for var-sized attributes. */ + /** Size of values for var-sized attributes/dimensions. */ double size_var_; /** * Maximum size of overlapping tiles fetched into memory for - * fixed-sized attributes or offsets of var-sized attributes. + * fixed-sized attributes/dimensions or offsets of var-sized + * attributes/dimensions. */ uint64_t mem_size_fixed_; /** * Maximum size of overlapping tiles fetched into memory for - * var-sized attributes. + * var-sized attributes/dimensions. */ uint64_t mem_size_var_; }; @@ -208,16 +214,16 @@ class Subarray { /** * Computes the estimated result size (calibrated using the maximum size) - * for a given attribute and range id, for all fragments. + * for a given attribute/dimension and range id, for all fragments. * - * @param attr_name The name of the attribute to focus on. + * @param name The name of the attribute/dimension to focus on. * @param range_idx The id of the subarray range to focus on. - * @param var_size Whether the attribute is var-sized or not. + * @param var_size Whether the attribute/dimension is var-sized or not. * @param result_size The result size to be retrieved. * @return Status */ Status compute_est_result_size( - const std::string& attr_name, + const std::string& name, uint64_t range_idx, bool var_size, ResultSize* result_size) const; @@ -294,29 +300,29 @@ class Subarray { /** * Gets the estimated result size (in bytes) for the input fixed-sized - * attribute. + * attribute/dimension. */ - Status get_est_result_size(const char* attr_name, uint64_t* size); + Status get_est_result_size(const char* name, uint64_t* size); /** * Gets the estimated result size (in bytes) for the input var-sized - * attribute. + * attribute/dimension. */ Status get_est_result_size( - const char* attr_name, uint64_t* size_off, uint64_t* size_val); + const char* name, uint64_t* size_off, uint64_t* size_val); /* * Gets the maximum memory required to produce the result (in bytes) - * for the input fixed-sized attribute. + * for the input fixed-sized attribute/dimensiom. */ - Status get_max_memory_size(const char* attr_name, uint64_t* size); + Status get_max_memory_size(const char* name, uint64_t* size); /** * Gets the maximum memory required to produce the result (in bytes) - * for the input fixed-sized attribute. + * for the input var-sized attribute/dimension. */ Status get_max_memory_size( - const char* attr_name, uint64_t* size_off, uint64_t* size_val); + const char* name, uint64_t* size_off, uint64_t* size_val); /** Retrieves the query type of the subarray's array. */ Status get_query_type(QueryType* type) const; @@ -455,7 +461,7 @@ class Subarray { /** The array the subarray object is associated with. */ const Array* array_; - /** Stores the estimated result size for each array attribute. */ + /** Stores the estimated result size for each array attribute/dimension. */ std::unordered_map est_result_size_; /** @@ -478,8 +484,8 @@ class Subarray { std::vector range_offsets_; /** - * ``True`` if the estimated result size for all attributes has been - * computed. + * ``True`` if the estimated result size for all attributes/dimensions has + * been computed. */ bool est_result_size_computed_; @@ -518,7 +524,7 @@ class Subarray { */ void add_default_ranges(); - /** Computes the estimated result size for all attributes. */ + /** Computes the estimated result size for all attributes/dimensions. */ Status compute_est_result_size(); /** diff --git a/tiledb/sm/subarray/subarray_partitioner.cc b/tiledb/sm/subarray/subarray_partitioner.cc index 89012c92c27..0aa64117f2a 100644 --- a/tiledb/sm/subarray/subarray_partitioner.cc +++ b/tiledb/sm/subarray/subarray_partitioner.cc @@ -122,37 +122,41 @@ bool SubarrayPartitioner::done() const { } Status SubarrayPartitioner::get_result_budget( - const char* attr_name, uint64_t* budget) const { - // Check attribute name - if (attr_name == nullptr) + const char* name, uint64_t* budget) const { + // Check attribute/dimension name + if (name == nullptr) return LOG_STATUS(Status::SubarrayPartitionerError( - "Cannot get result budget; Attribute name cannot be null")); - - if (attr_name != constants::coords) { - // Check attribute name - auto attr = subarray_.array()->array_schema()->attribute(attr_name); - if (attr == nullptr) - return LOG_STATUS(Status::SubarrayPartitionerError( - "Cannot get result budget; Invalid attribute")); - - // Check budget pointer - if (budget == nullptr) - return LOG_STATUS(Status::SubarrayPartitionerError( - "Cannot get result budget; Invalid budget input")); - - // Check if the attribute is fixed-sized - if (attr->var_size()) - return LOG_STATUS(Status::SubarrayPartitionerError( - "Cannot get result budget; Attribute must be fixed-sized")); - } + "Cannot get result budget; Attribute/Dimension name cannot be null")); + + // Check budget pointer + if (budget == nullptr) + return LOG_STATUS(Status::SubarrayPartitionerError( + "Cannot get result budget; Invalid budget input")); + + // For easy reference + auto array_schema = subarray_.array()->array_schema(); + bool is_dim = array_schema->is_dim(name); + bool is_attr = array_schema->is_attr(name); + + // Check if attribute/dimension exists + if (name != constants::coords && !is_dim && !is_attr) + return LOG_STATUS(Status::SubarrayPartitionerError( + std::string("Cannot get result budget; Invalid attribute/dimension '") + + name + "'")); + + // Check if the attribute/dimension is fixed-sized + if (array_schema->var_size(name)) + return LOG_STATUS(Status::SubarrayPartitionerError( + std::string("Cannot get result budget; Input attribute/dimension '") + + name + "' is var-sized")); // Check if budget has been set - auto b_it = budget_.find(attr_name); + auto b_it = budget_.find(name); if (b_it == budget_.end()) return LOG_STATUS(Status::SubarrayPartitionerError( - std::string( - "Cannot get result budget; Budget not set for attribute '") + - attr_name + "'")); + std::string("Cannot get result budget; Budget not set for " + "attribute/dimension '") + + name + "'")); // Get budget *budget = b_it->second.size_fixed_; @@ -161,21 +165,21 @@ Status SubarrayPartitioner::get_result_budget( } Status SubarrayPartitioner::get_result_budget( - const char* attr_name, uint64_t* budget_off, uint64_t* budget_val) const { - // Check attribute name - if (attr_name == nullptr) + const char* name, uint64_t* budget_off, uint64_t* budget_val) const { + // Check attribute/dimension name + if (name == nullptr) return LOG_STATUS(Status::SubarrayPartitionerError( - "Cannot get result budget; Attribute name cannot be null")); + "Cannot get result budget; Attribute/Dimension name cannot be null")); - if (attr_name == constants::coords) + if (name == constants::coords) return LOG_STATUS(Status::SubarrayPartitionerError( - "Cannot get result budget; Attribute must be var-sized")); + "Cannot get result budget; Attribute/Dimension must be var-sized")); - // Check attribute - auto attr = subarray_.array()->array_schema()->attribute(attr_name); + // Check attribute/dimension + auto attr = subarray_.array()->array_schema()->attribute(name); if (attr == nullptr) return LOG_STATUS(Status::SubarrayPartitionerError( - "Cannot get result budget; Invalid attribute")); + "Cannot get result budget; Invalid attribute/dimension")); // Check budget pointer if (budget_off == nullptr || budget_val == nullptr) @@ -185,15 +189,15 @@ Status SubarrayPartitioner::get_result_budget( // Check if the attribute is var-sized if (!attr->var_size()) return LOG_STATUS(Status::SubarrayPartitionerError( - "Cannot get result budget; Attribute must be var-sized")); + "Cannot get result budget; Attribute/Dimension must be var-sized")); // Check if budget has been set - auto b_it = budget_.find(attr_name); + auto b_it = budget_.find(name); if (b_it == budget_.end()) return LOG_STATUS(Status::SubarrayPartitionerError( - std::string( - "Cannot get result budget; Budget not set for attribute '") + - attr_name + "'")); + std::string("Cannot get result budget; Budget not set for " + "attribute/dimension '") + + name + "'")); // Get budget *budget_off = b_it->second.size_fixed_; @@ -203,7 +207,7 @@ Status SubarrayPartitioner::get_result_budget( } const std::unordered_map* -SubarrayPartitioner::get_attr_result_budgets() const { +SubarrayPartitioner::get_result_budgets() const { return &budget_; } @@ -258,55 +262,59 @@ Status SubarrayPartitioner::next(bool* unsplittable) { } Status SubarrayPartitioner::set_result_budget( - const char* attr_name, uint64_t budget) { - // Check attribute name - if (attr_name == nullptr) + const char* name, uint64_t budget) { + // Check attribute/dimension name + if (name == nullptr) return LOG_STATUS(Status::SubarrayPartitionerError( - "Cannot set result budget; Attribute name cannot be null")); - - if (attr_name != constants::coords) { - // Check attribute - auto attr = subarray_.array()->array_schema()->attribute(attr_name); - if (attr == nullptr) - return LOG_STATUS(Status::SubarrayPartitionerError( - std::string("Cannot set result budget; Invalid attribute '") + - attr_name + "'")); - - // Check if the attribute is fixed-sized - if (attr->var_size()) - return LOG_STATUS(Status::SubarrayPartitionerError( - "Cannot set result budget; Attribute must be fixed-sized")); - } + "Cannot set result budget; Attribute/Dimension name cannot be null")); + + // For easy reference + auto array_schema = subarray_.array()->array_schema(); + bool is_dim = array_schema->is_dim(name); + bool is_attr = array_schema->is_attr(name); - budget_[attr_name] = ResultBudget{budget, 0}; + // Check if attribute/dimension exists + if (name != constants::coords && !is_dim && !is_attr) + return LOG_STATUS(Status::SubarrayPartitionerError( + std::string("Cannot set result budget; Invalid attribute/dimension '") + + name + "'")); + + // Check if the attribute/dimension is fixed-sized + bool var_size = (name != constants::coords && array_schema->var_size(name)); + if (var_size) + return LOG_STATUS(Status::SubarrayPartitionerError( + std::string("Cannot set result budget; Input attribute/dimension '") + + name + "' is var-sized")); + + budget_[name] = ResultBudget{budget, 0}; return Status::Ok(); } Status SubarrayPartitioner::set_result_budget( - const char* attr_name, uint64_t budget_off, uint64_t budget_val) { - // Check attribute name - if (attr_name == nullptr) + const char* name, uint64_t budget_off, uint64_t budget_val) { + // Check attribute/dimension name + if (name == nullptr) return LOG_STATUS(Status::SubarrayPartitionerError( - "Cannot set result budget; Attribute name cannot be null")); + "Cannot set result budget; Attribute/Dimension name cannot be null")); - if (attr_name == constants::coords) + if (name == constants::coords) return LOG_STATUS(Status::SubarrayPartitionerError( - "Cannot set result budget; Attribute must be var-sized")); + "Cannot set result budget; Attribute/Dimension must be var-sized")); - // Check attribute - auto attr = subarray_.array()->array_schema()->attribute(attr_name); + // Check attribute/dimension + auto attr = subarray_.array()->array_schema()->attribute(name); if (attr == nullptr) return LOG_STATUS(Status::SubarrayPartitionerError( - std::string("Cannot set result budget; Invalid attribute '") + - attr_name + "'")); + std::string("Cannot set result budget; Invalid attribute '") + name + + "'")); - // Check if the attribute is var-sized + // Check if the attribute/dimension is var-sized if (!attr->var_size()) return LOG_STATUS(Status::SubarrayPartitionerError( "Cannot set result budget; Attribute must be var-sized")); - budget_[attr_name] = ResultBudget{budget_off, budget_val}; + budget_[name] = ResultBudget{budget_off, budget_val}; return Status::Ok(); } @@ -488,13 +496,13 @@ Status SubarrayPartitioner::compute_current_start_end(bool* found) { ++current_.end_) { // Update current sizes for (const auto& budget_it : budget_) { - auto attr_name = budget_it.first; - auto var_size = array_schema->var_size(attr_name); + auto name = budget_it.first; + auto var_size = array_schema->var_size(name); Subarray::ResultSize est_size; RETURN_NOT_OK(subarray_.compute_est_result_size( - attr_name, current_.end_, var_size, &est_size)); - auto& cur_size = cur_sizes[attr_name]; - auto& mem_size = mem_sizes[attr_name]; + name, current_.end_, var_size, &est_size)); + auto& cur_size = cur_sizes[name]; + auto& mem_size = mem_sizes[name]; cur_size.size_fixed_ += (uint64_t)ceil(est_size.size_fixed_); cur_size.size_var_ += (uint64_t)ceil(est_size.size_var_); mem_size.size_fixed_ += (uint64_t)ceil(est_size.mem_size_fixed_); @@ -688,8 +696,8 @@ bool SubarrayPartitioner::must_split(Subarray* partition) { uint64_t size_fixed, size_var, mem_size_fixed, mem_size_var; for (const auto& b : budget_) { // Compute max sizes - auto attr_name = b.first; - auto var_size = array_schema->var_size(attr_name); + auto name = b.first; + auto var_size = array_schema->var_size(name); // Compute est sizes size_fixed = 0; diff --git a/tiledb/sm/subarray/subarray_partitioner.h b/tiledb/sm/subarray/subarray_partitioner.h index b4e160767dd..feb4231f180 100644 --- a/tiledb/sm/subarray/subarray_partitioner.h +++ b/tiledb/sm/subarray/subarray_partitioner.h @@ -57,11 +57,17 @@ class SubarrayPartitioner { /* TYPE DEFINITIONS */ /* ********************************* */ - /** Result budget (in bytes) for an attribute used for partitioning. */ + /** + * Result budget (in bytes) for an attribute/dimension used for + * partitioning. + */ struct ResultBudget { - /** Size for fixed-sized attributes or offsets of var-sized attributes. */ + /** + * Size for fixed-sized attributes/dimensions or offsets of var-sized + * attributes/dimensions. + */ uint64_t size_fixed_; - /** Size of values for var-sized attributes. */ + /** Size of values for var-sized attributes/dimensions. */ uint64_t size_var_; }; @@ -184,18 +190,24 @@ class SubarrayPartitioner { */ bool done() const; - /** Gets result size budget (in bytes) for the input fixed-sized attribute. */ - Status get_result_budget(const char* attr_name, uint64_t* budget) const; + /** + * Gets result size budget (in bytes) for the input fixed-sized + * attribute/dimension. + */ + Status get_result_budget(const char* name, uint64_t* budget) const; - /** Gets result size budget (in bytes) for the input var-sized attribute. */ + /** + * Gets result size budget (in bytes) for the input var-sized + * attribute/dimension. + */ Status get_result_budget( - const char* attr_name, uint64_t* budget_off, uint64_t* budget_val) const; + const char* name, uint64_t* budget_off, uint64_t* budget_val) const; /** - * Returns a pointer to mapping containing all attribute result budgets that - * have been set. + * Returns a pointer to mapping containing all attribute/dimension result + * budgets that have been set. */ - const std::unordered_map* get_attr_result_budgets() + const std::unordered_map* get_result_budgets() const; /** @@ -229,12 +241,18 @@ class SubarrayPartitioner { */ Status set_memory_budget(uint64_t budget, uint64_t budget_var); - /** Sets result size budget (in bytes) for the input fixed-sized attribute. */ - Status set_result_budget(const char* attr_name, uint64_t budget); + /** + * Sets result size budget (in bytes) for the input fixed-sized + * attribute/dimension. + */ + Status set_result_budget(const char* name, uint64_t budget); - /** Sets result size budget (in bytes) for the input var-sized attribute. */ + /** + * Sets result size budget (in bytes) for the input var-sized + * attribute/dimension. + */ Status set_result_budget( - const char* attr_name, uint64_t budget_off, uint64_t budget_val); + const char* name, uint64_t budget_off, uint64_t budget_val); /** * Splits the current partition and updates the state, retrieving @@ -264,7 +282,7 @@ class SubarrayPartitioner { /** The subarray the partitioner will iterate on to produce partitions. */ Subarray subarray_; - /** Result size budget (in bytes) for all attributes. */ + /** Result size budget (in bytes) for all attributes/dimensions. */ std::unordered_map budget_; /** The current partition info. */