diff --git a/apis/r/tests/testthat/helper-test-data.R b/apis/r/tests/testthat/helper-test-data.R index f0596fc08f..6a2324acdc 100644 --- a/apis/r/tests/testthat/helper-test-data.R +++ b/apis/r/tests/testthat/helper-test-data.R @@ -75,6 +75,6 @@ create_arrow_table <- function(nrows = 10L, factors = FALSE) { soma_joinid = bit64::seq.integer64(from = 0L, to = nrows - 1L), bar = seq(nrows) + 0.1, baz = as.character(seq.int(nrows) + 1000L) - # schema = create_arrow_schema() + # schema = create_arrow_schema(false) ) } diff --git a/libtiledbsoma/src/soma/soma_array.cc b/libtiledbsoma/src/soma/soma_array.cc index ccda688494..a9a100b572 100644 --- a/libtiledbsoma/src/soma/soma_array.cc +++ b/libtiledbsoma/src/soma/soma_array.cc @@ -1178,6 +1178,26 @@ uint64_t SOMAArray::nnz_slow() { } std::vector SOMAArray::shape() { + // Two reasons for this: + // * Transitional, non-monolithic, phased, careful development for the + // new-shape feature + // * Even after the new-shape feature is fully released, there will be old + // arrays on disk that were created before this feature existed. + // So this is long-term code. + auto current_domain = tiledb::ArraySchemaExperimental::current_domain( + *ctx_->tiledb_ctx(), arr_->schema()); + if (current_domain.is_empty()) { + return _tiledb_domain(); + } else { + return _tiledb_current_domain(); + } +} + +std::vector SOMAArray::maxshape() { + return _tiledb_domain(); +} + +std::vector SOMAArray::_tiledb_domain() { std::vector result; auto dimensions = mq_->schema()->domain().dimensions(); @@ -1253,6 +1273,71 @@ std::vector SOMAArray::shape() { return result; } +std::vector SOMAArray::_tiledb_current_domain() { + std::vector result; + + auto current_domain = tiledb::ArraySchemaExperimental::current_domain( + *ctx_->tiledb_ctx(), arr_->schema()); + + if (current_domain.is_empty()) { + throw TileDBSOMAError( + "Internal error: current domain requested for an array which does " + "not support it"); + } + + auto t = current_domain.type(); + if (t != TILEDB_NDRECTANGLE) { + throw TileDBSOMAError("current_domain type is not NDRECTANGLE"); + } + + NDRectangle ndrect = current_domain.ndrectangle(); + + for (auto dimension_name : dimension_names()) { + // TODO: non-int64 types for SOMADataFrame extra dims. + // This simply needs to be integrated with switch statements as in the + // legacy code below. + auto range = ndrect.range(dimension_name); + result.push_back(range[1] + 1); + } + return result; +} + +void SOMAArray::resize(const std::vector& newshape) { + if (mq_->query_type() != TILEDB_WRITE) { + throw TileDBSOMAError( + "[SOMAArray::resize] array must be opened in write mode"); + } + + auto tctx = ctx_->tiledb_ctx(); + ArraySchema schema = arr_->schema(); + Domain domain = schema.domain(); + ArraySchemaEvolution schema_evolution(*tctx); + CurrentDomain new_current_domain(*tctx); + + NDRectangle ndrect(*tctx, domain); + + // TODO: non-int64 for DataFrame when it has extra index dims. + // This will be via a resize-helper. + + unsigned n = domain.ndim(); + if ((unsigned)newshape.size() != n) { + throw TileDBSOMAError(fmt::format( + "[SOMAArray::resize]: newshape has dimension count {}; array has " + "{} ", + newshape.size(), + n)); + } + + for (unsigned i = 0; i < n; i++) { + ndrect.set_range( + domain.dimension(i).name(), 0, newshape[i] - 1); + } + + new_current_domain.set_ndrectangle(ndrect); + schema_evolution.expand_current_domain(new_current_domain); + schema_evolution.array_evolve(uri_); +} + uint64_t SOMAArray::ndim() const { return tiledb_schema()->domain().ndim(); } diff --git a/libtiledbsoma/src/soma/soma_array.h b/libtiledbsoma/src/soma/soma_array.h index d3a57987f2..5d8036fad5 100644 --- a/libtiledbsoma/src/soma/soma_array.h +++ b/libtiledbsoma/src/soma/soma_array.h @@ -571,13 +571,54 @@ class SOMAArray : public SOMAObject { } /** - * @brief Get the capacity of each dimension. + * @brief Get the current capacity of each dimension. + * + * This applies to arrays all of whose dims are of type int64_t: this + * includes SOMASparseNDArray and SOMADenseNDArray, and default-indexed + * SOMADataFrame. + * + * At the TileDB-SOMA level we call this "shape". At the TileDB Core + * storage level this maps to "current domain". + * + * Further, we map this single n to the pair (0, n-1) since core permits a + * doubly inclusive pair (lo, hi) on each dimension slot. * * @return A vector with length equal to the number of dimensions; each - * value in the vector is the capcity of each dimension. + * value in the vector is the capacity of each dimension. */ std::vector shape(); + /** + * @brief Get the maximum resizable capacity of each dimension. + * + * This applies to arrays all of whose dims are of type int64_t: this + * includes SOMASparseNDArray and SOMADenseNDArray, and default-indexed + * SOMADataFrame. + * + * At the TileDB-SOMA level we call this "maxshape". At the TileDB Core + * storage level this maps to "domain". + * + * Further, we map this single n to the pair (0, n-1) since core permits a + * doubly inclusive pair (lo, hi) on each dimension slot. + * + * @return A vector with length equal to the number of dimensions; each + * value in the vector is the maximum capacity of each dimension. + */ + std::vector maxshape(); + + /** + * @brief Resize the shape (what core calls "current domain") up to the + * maxshape (what core calls "domain"). + * + * This applies to arrays all of whose dims are of type int64_t: this + * includes SOMASparseNDArray and SOMADenseNDArray, and default-indexed + * SOMADataFrame. + * + * @return Nothing. Raises an exception if the resize would be a downsize, + * which is not supported. + */ + void resize(const std::vector& newshape); + /** * @brief Get the number of dimensions. * @@ -762,6 +803,18 @@ class SOMAArray : public SOMAObject { uint64_t _get_max_capacity(tiledb_datatype_t index_type); + /** + * With old shape: core domain mapped to tiledbsoma shape; core current + * domain did not exist. + * + * With new shape: core domain maps to tiledbsoma maxshape; + * core current_domain maps to tiledbsoma shape. + * + * Here we distinguish between user-side API, and core-side implementation. + */ + std::vector _tiledb_domain(); + std::vector _tiledb_current_domain(); + bool _extend_enumeration( ArrowSchema* value_schema, ArrowArray* value_array, diff --git a/libtiledbsoma/src/soma/soma_collection.h b/libtiledbsoma/src/soma/soma_collection.h index 1c4274556b..60dc170fd2 100644 --- a/libtiledbsoma/src/soma/soma_collection.h +++ b/libtiledbsoma/src/soma/soma_collection.h @@ -281,4 +281,4 @@ class SOMACollection : public SOMAGroup { }; } // namespace tiledbsoma -#endif // SOMA_COLLECTION \ No newline at end of file +#endif // SOMA_COLLECTION diff --git a/libtiledbsoma/src/utils/arrow_adapter.cc b/libtiledbsoma/src/utils/arrow_adapter.cc index 3e94cf6c4e..feba256db9 100644 --- a/libtiledbsoma/src/utils/arrow_adapter.cc +++ b/libtiledbsoma/src/utils/arrow_adapter.cc @@ -447,6 +447,148 @@ Dimension ArrowAdapter::_create_dim( } } +void ArrowAdapter::_set_current_domain_slot( + tiledb_datatype_t type, + const void* buff, + NDRectangle& ndrect, + std::string name) { + switch (type) { + case TILEDB_STRING_ASCII: + // Core domain must not be set for string dims. + // Core current_domain can't _not_ be set for string dims. + // For TileDB-SOMA, we set a broad initial range for string + // dims (because we have to) but there has never been support + // for user specification of domain for string dims, and we do not + // introduce support for user specification of current domain for + // string dims. + throw TileDBSOMAError( + "Internal error: _set_current_domain_slot must not be called " + "for string dimensions."); + break; + case TILEDB_TIME_SEC: + case TILEDB_TIME_MS: + case TILEDB_TIME_US: + case TILEDB_TIME_NS: + case TILEDB_DATETIME_SEC: + case TILEDB_DATETIME_MS: + case TILEDB_DATETIME_US: + case TILEDB_DATETIME_NS: { + uint64_t lo = ((uint64_t*)buff)[3]; + uint64_t hi = ((uint64_t*)buff)[4]; + ndrect.set_range(name, lo, hi); + LOG_DEBUG(fmt::format( + "[ArrowAdapter] {} current_domain uint64_t {} to {}", + name, + lo, + hi)); + } break; + case TILEDB_INT8: { + int8_t lo = ((int8_t*)buff)[3]; + int8_t hi = ((int8_t*)buff)[4]; + ndrect.set_range(name, lo, hi); + LOG_DEBUG(fmt::format( + "[ArrowAdapter] {} current_domain int8_t {} to {}", + name, + lo, + hi)); + } break; + case TILEDB_UINT8: { + uint8_t lo = ((uint8_t*)buff)[3]; + uint8_t hi = ((uint8_t*)buff)[4]; + ndrect.set_range(name, lo, hi); + LOG_DEBUG(fmt::format( + "[ArrowAdapter] {} current_domain uint8_t {} to {}", + name, + lo, + hi)); + } break; + case TILEDB_INT16: { + int16_t lo = ((int16_t*)buff)[3]; + int16_t hi = ((int16_t*)buff)[4]; + ndrect.set_range(name, lo, hi); + LOG_DEBUG(fmt::format( + "[ArrowAdapter] {} current_domain int16_t {} to {}", + name, + lo, + hi)); + } break; + case TILEDB_UINT16: { + uint16_t lo = ((uint16_t*)buff)[3]; + uint16_t hi = ((uint16_t*)buff)[4]; + ndrect.set_range(name, lo, hi); + LOG_DEBUG(fmt::format( + "[ArrowAdapter] {} current_domain uint16_t {} to {}", + name, + lo, + hi)); + } break; + case TILEDB_INT32: { + int32_t lo = ((int32_t*)buff)[3]; + int32_t hi = ((int32_t*)buff)[4]; + ndrect.set_range(name, lo, hi); + LOG_DEBUG(fmt::format( + "[ArrowAdapter] {} current_domain int32_t {} to {}", + name, + lo, + hi)); + } break; + case TILEDB_UINT32: { + uint32_t lo = ((uint32_t*)buff)[3]; + uint32_t hi = ((uint32_t*)buff)[4]; + ndrect.set_range(name, lo, hi); + LOG_DEBUG(fmt::format( + "[ArrowAdapter] {} current_domain uint32_t {} to {}", + name, + lo, + hi)); + } break; + case TILEDB_INT64: { + int64_t lo = ((int64_t*)buff)[3]; + int64_t hi = ((int64_t*)buff)[4]; + ndrect.set_range(name, lo, hi); + LOG_DEBUG(fmt::format( + "[ArrowAdapter] {} current_domain int64_t {} to {}", + name, + lo, + hi)); + } break; + case TILEDB_UINT64: { + uint64_t lo = ((uint64_t*)buff)[3]; + uint64_t hi = ((uint64_t*)buff)[4]; + ndrect.set_range(name, lo, hi); + LOG_DEBUG(fmt::format( + "[ArrowAdapter] {} current_domain uint64_t {} to {}", + name, + lo, + hi)); + } break; + case TILEDB_FLOAT32: { + float lo = ((float*)buff)[3]; + float hi = ((float*)buff)[4]; + ndrect.set_range(name, lo, hi); + LOG_DEBUG(fmt::format( + "[ArrowAdapter] {} current_domain float {} to {}", + name, + lo, + hi)); + } break; + case TILEDB_FLOAT64: { + double lo = ((double*)buff)[3]; + double hi = ((double*)buff)[4]; + ndrect.set_range(name, lo, hi); + LOG_DEBUG(fmt::format( + "[ArrowAdapter] {} current_domain double {} to {}", + name, + lo, + hi)); + } break; + default: + throw TileDBSOMAError(fmt::format( + "ArrowAdapter: Unsupported TileDB dimension: {} ", + tiledb::impl::type_to_str(type))); + } +} + tiledb_layout_t ArrowAdapter::_get_order(std::string order) { std::transform( order.begin(), order.end(), order.begin(), [](unsigned char c) { @@ -516,6 +658,9 @@ ArraySchema ArrowAdapter::tiledb_schema_from_arrow_schema( auto child = arrow_schema->children[sch_idx]; auto type = ArrowAdapter::to_tiledb_format(child->format); + LOG_DEBUG(fmt::format( + "[ArrowAdapter] schema pass for {}", std::string(child->name))); + bool isattr = true; for (int64_t i = 0; i < index_column_schema->n_children; ++i) { @@ -586,6 +731,57 @@ ArraySchema ArrowAdapter::tiledb_schema_from_arrow_schema( LOG_DEBUG(fmt::format("[ArrowAdapter] set_domain")); schema.set_domain(domain); + LOG_DEBUG(fmt::format( + "[ArrowAdapter] index_column_info length {}", + index_column_array->length)); + + // Note: this must be done after we've got the core domain, since the + // NDRectangle constructor requires access to the core domain. + bool have_current_domain_info = index_column_array->length == 5; + if (have_current_domain_info) { + CurrentDomain current_domain(*ctx); + NDRectangle ndrect(*ctx, domain); + + for (int64_t sch_idx = 0; sch_idx < arrow_schema->n_children; + ++sch_idx) { + auto child = arrow_schema->children[sch_idx]; + auto type = ArrowAdapter::to_tiledb_format(child->format); + + for (int64_t i = 0; i < index_column_schema->n_children; ++i) { + auto col_name = index_column_schema->children[i]->name; + if (strcmp(child->name, col_name) != 0) { + continue; + } + + if (ArrowAdapter::_isvar(child->format)) { + // In the core API: + // + // * domain for strings must be set as (nullptr, nullptr) + // * current_domain for strings cannot be set as (nullptr, + // nullptr) + // + // Fortunately, these are ASCII dims and we can range + // these accordingly. + ndrect.set_range(col_name, "\x01", "\x7f"); + } else { + const void* buff = index_column_array->children[i] + ->buffers[1]; + _set_current_domain_slot(type, buff, ndrect, col_name); + } + break; + } + } + + current_domain.set_ndrectangle(ndrect); + + LOG_DEBUG(fmt::format( + "[ArrowAdapter] before setting current_domain from ndrect")); + ArraySchemaExperimental::set_current_domain( + *ctx, schema, current_domain); + LOG_DEBUG(fmt::format( + "[ArrowAdapter] after setting current_domain from ndrect")); + } + LOG_DEBUG(fmt::format("[ArrowAdapter] check")); schema.check(); diff --git a/libtiledbsoma/src/utils/arrow_adapter.h b/libtiledbsoma/src/utils/arrow_adapter.h index 34c130439e..20e09afbc1 100644 --- a/libtiledbsoma/src/utils/arrow_adapter.h +++ b/libtiledbsoma/src/utils/arrow_adapter.h @@ -238,6 +238,12 @@ class ArrowAdapter { const void* buff, std::shared_ptr ctx); + static void _set_current_domain_slot( + tiledb_datatype_t type, + const void* buff, + NDRectangle& ndrect, + std::string name); + template static Dimension _create_dim_aux( std::shared_ptr ctx, std::string name, T* b) { diff --git a/libtiledbsoma/test/common.cc b/libtiledbsoma/test/common.cc index af949c439a..5ba2859a92 100644 --- a/libtiledbsoma/test/common.cc +++ b/libtiledbsoma/test/common.cc @@ -33,6 +33,14 @@ #include "common.h" namespace helper { + +// This non-obvious number is: +// * Something that fits into signed 32-bit integer for R-friendliness; +// * Is a comfortable tile-extent distance away from 2^31-1 for default +// core tile extent. (Using 2^31-1 exactly would result in a core +// array-creation error.) +const int CORE_DOMAIN_MAX = 2147483646; + ArraySchema create_schema( Context& ctx, int64_t dim_max, bool allow_duplicates) { // Create schema @@ -53,7 +61,7 @@ ArraySchema create_schema( } std::pair, ArrowTable> create_arrow_schema( - int64_t dim_max) { + int64_t dim_max, bool use_current_domain) { // Create ArrowSchema for SOMAArray auto arrow_schema = std::make_unique(); arrow_schema->format = "+s"; @@ -99,7 +107,7 @@ std::pair, ArrowTable> create_arrow_schema( col_info_array->n_children = 2; col_info_array->release = &ArrowAdapter::release_array; col_info_array->children = new ArrowArray*[1]; - int n = 3; + int n = use_current_domain ? 5 : 3; auto d0_info = col_info_array->children[0] = new ArrowArray; d0_info->length = n; d0_info->null_count = 0; @@ -110,7 +118,7 @@ std::pair, ArrowTable> create_arrow_schema( d0_info->buffers[0] = nullptr; d0_info->buffers[1] = malloc(sizeof(int64_t) * n); d0_info->n_children = 0; - int64_t dom[] = {0, dim_max, 1}; + int64_t dom[] = {0, dim_max, 1, 0, CORE_DOMAIN_MAX}; std::memcpy((void*)d0_info->buffers[1], &dom, sizeof(int64_t) * n); return std::pair( @@ -118,7 +126,7 @@ std::pair, ArrowTable> create_arrow_schema( ArrowTable(std::move(col_info_array), std::move(col_info_schema))); } -ArrowTable create_column_index_info(int64_t dim_max) { +ArrowTable create_column_index_info(int64_t dim_max, bool use_current_domain) { // Create ArrowSchema for IndexColumnInfo auto col_info_schema = std::make_unique(); col_info_schema->format = "+s"; @@ -143,7 +151,7 @@ ArrowTable create_column_index_info(int64_t dim_max) { col_info_array->n_children = 2; col_info_array->release = &ArrowAdapter::release_array; col_info_array->children = new ArrowArray*[1]; - int n = 3; + int n = use_current_domain ? 5 : 3; auto d0_info = col_info_array->children[0] = new ArrowArray; d0_info->length = n; d0_info->null_count = 0; @@ -154,7 +162,7 @@ ArrowTable create_column_index_info(int64_t dim_max) { d0_info->buffers[0] = nullptr; d0_info->buffers[1] = malloc(sizeof(int64_t) * n); d0_info->n_children = 0; - int64_t dom[] = {0, dim_max, 1, 0, 2147483646}; + int64_t dom[] = {0, dim_max, 1, 0, CORE_DOMAIN_MAX}; std::memcpy((void*)d0_info->buffers[1], &dom, sizeof(int64_t) * n); return ArrowTable(std::move(col_info_array), std::move(col_info_schema)); diff --git a/libtiledbsoma/test/common.h b/libtiledbsoma/test/common.h index 6ccdf7c0f4..36eba01440 100644 --- a/libtiledbsoma/test/common.h +++ b/libtiledbsoma/test/common.h @@ -63,7 +63,7 @@ namespace helper { ArraySchema create_schema( Context& ctx, int64_t dim_max, bool allow_duplicates = false); std::pair, ArrowTable> create_arrow_schema( - int64_t dim_max); -ArrowTable create_column_index_info(int64_t dim_max); + int64_t dim_max, bool use_current_domain); +ArrowTable create_column_index_info(int64_t dim_max, bool use_current_domain); } // namespace helper #endif diff --git a/libtiledbsoma/test/unit_soma_array.cc b/libtiledbsoma/test/unit_soma_array.cc index 8984c42f1c..a1c000c415 100644 --- a/libtiledbsoma/test/unit_soma_array.cc +++ b/libtiledbsoma/test/unit_soma_array.cc @@ -580,4 +580,4 @@ TEST_CASE("SOMAArray: Write and read back Boolean") { std::vector( {false, true, false, true, false, true, false, true})); soma_array->close(); -} \ No newline at end of file +} diff --git a/libtiledbsoma/test/unit_soma_collection.cc b/libtiledbsoma/test/unit_soma_collection.cc index a97cf9be89..8bbcd9c9cd 100644 --- a/libtiledbsoma/test/unit_soma_collection.cc +++ b/libtiledbsoma/test/unit_soma_collection.cc @@ -32,7 +32,7 @@ #include "common.h" -#define DIM_MAX 1000 +static const int64_t DIM_MAX = 1000; TEST_CASE("SOMACollection: basic") { TimestampRange ts(0, 2); @@ -49,44 +49,50 @@ TEST_CASE("SOMACollection: basic") { } TEST_CASE("SOMACollection: add SOMASparseNDArray") { - TimestampRange ts(0, 2); - auto ctx = std::make_shared(); - std::string base_uri = "mem://unit-test-add-sparse-ndarray"; - std::string sub_uri = "mem://unit-test-add-sparse-ndarray/sub"; - - SOMACollection::create(base_uri, ctx, ts); - auto index_columns = helper::create_column_index_info(DIM_MAX); - auto schema = helper::create_schema(*ctx->tiledb_ctx(), DIM_MAX, true); - - std::map expected_map{ - {"sparse_ndarray", SOMAGroupEntry(sub_uri, "SOMAArray")}}; - - auto soma_collection = SOMACollection::open( - base_uri, OpenMode::write, ctx, ts); - REQUIRE(soma_collection->timestamp() == ts); - - auto soma_sparse = soma_collection->add_new_sparse_ndarray( - "sparse_ndarray", - sub_uri, - URIType::absolute, - ctx, - "l", - ArrowTable( - std::move(index_columns.first), std::move(index_columns.second))); - REQUIRE(soma_collection->members_map() == expected_map); - REQUIRE(soma_sparse->uri() == sub_uri); - REQUIRE(soma_sparse->ctx() == ctx); - REQUIRE(soma_sparse->type() == "SOMASparseNDArray"); - REQUIRE(soma_sparse->is_sparse() == true); - REQUIRE(soma_sparse->ndim() == 1); - REQUIRE(soma_sparse->nnz() == 0); - REQUIRE(soma_sparse->timestamp() == ts); - soma_sparse->close(); - soma_collection->close(); - - soma_collection = SOMACollection::open(base_uri, OpenMode::read, ctx); - REQUIRE(soma_collection->members_map() == expected_map); - soma_collection->close(); + bool use_current_domains[] = {false, true}; + for (bool use_current_domain : use_current_domains) { + TimestampRange ts(0, 2); + auto ctx = std::make_shared(); + std::string base_uri = "mem://unit-test-add-sparse-ndarray"; + std::string sub_uri = "mem://unit-test-add-sparse-ndarray/sub"; + + SOMACollection::create(base_uri, ctx, ts); + + auto index_columns = helper::create_column_index_info( + DIM_MAX, use_current_domain); + auto schema = helper::create_schema(*ctx->tiledb_ctx(), DIM_MAX, true); + + std::map expected_map{ + {"sparse_ndarray", SOMAGroupEntry(sub_uri, "SOMAArray")}}; + + auto soma_collection = SOMACollection::open( + base_uri, OpenMode::write, ctx, ts); + REQUIRE(soma_collection->timestamp() == ts); + + auto soma_sparse = soma_collection->add_new_sparse_ndarray( + "sparse_ndarray", + sub_uri, + URIType::absolute, + ctx, + "l", + ArrowTable( + std::move(index_columns.first), + std::move(index_columns.second))); + REQUIRE(soma_collection->members_map() == expected_map); + REQUIRE(soma_sparse->uri() == sub_uri); + REQUIRE(soma_sparse->ctx() == ctx); + REQUIRE(soma_sparse->type() == "SOMASparseNDArray"); + REQUIRE(soma_sparse->is_sparse() == true); + REQUIRE(soma_sparse->ndim() == 1); + REQUIRE(soma_sparse->nnz() == 0); + REQUIRE(soma_sparse->timestamp() == ts); + soma_sparse->close(); + soma_collection->close(); + + soma_collection = SOMACollection::open(base_uri, OpenMode::read, ctx); + REQUIRE(soma_collection->members_map() == expected_map); + soma_collection->close(); + } } TEST_CASE("SOMACollection: add SOMADenseNDArray") { @@ -96,7 +102,7 @@ TEST_CASE("SOMACollection: add SOMADenseNDArray") { std::string sub_uri = "mem://unit-test-add-dense-ndarray/sub"; SOMACollection::create(base_uri, ctx, ts); - auto index_columns = helper::create_column_index_info(DIM_MAX); + auto index_columns = helper::create_column_index_info(DIM_MAX, false); auto schema = helper::create_schema(*ctx->tiledb_ctx(), DIM_MAX, true); std::map expected_map{ @@ -120,7 +126,7 @@ TEST_CASE("SOMACollection: add SOMADenseNDArray") { REQUIRE(soma_dense->type() == "SOMADenseNDArray"); REQUIRE(soma_dense->is_sparse() == false); REQUIRE(soma_dense->ndim() == 1); - REQUIRE(soma_dense->shape() == std::vector{1001}); + REQUIRE(soma_dense->shape() == std::vector{DIM_MAX + 1}); REQUIRE(soma_dense->timestamp() == ts); soma_collection->close(); @@ -136,7 +142,7 @@ TEST_CASE("SOMACollection: add SOMADataFrame") { std::string sub_uri = "mem://unit-test-add-dataframe/sub"; SOMACollection::create(base_uri, ctx, ts); - auto [schema, index_columns] = helper::create_arrow_schema(DIM_MAX); + auto [schema, index_columns] = helper::create_arrow_schema(DIM_MAX, false); std::map expected_map{ {"dataframe", SOMAGroupEntry(sub_uri, "SOMAArray")}}; @@ -200,7 +206,7 @@ TEST_CASE("SOMACollection: add SOMAExperiment") { std::string sub_uri = "mem://unit-test-add-experiment/sub"; SOMACollection::create(base_uri, ctx); - auto [schema, index_columns] = helper::create_arrow_schema(DIM_MAX); + auto [schema, index_columns] = helper::create_arrow_schema(DIM_MAX, false); std::map expected_map{ {"experiment", SOMAGroupEntry(sub_uri, "SOMAGroup")}}; @@ -232,7 +238,7 @@ TEST_CASE("SOMACollection: add SOMAMeasurement") { std::string sub_uri = "mem://unit-test-add-measurement/sub"; SOMACollection::create(base_uri, ctx); - auto [schema, index_columns] = helper::create_arrow_schema(DIM_MAX); + auto [schema, index_columns] = helper::create_arrow_schema(DIM_MAX, false); std::map expected_map{ {"measurement", SOMAGroupEntry(sub_uri, "SOMAGroup")}}; @@ -315,7 +321,7 @@ TEST_CASE("SOMAExperiment: metadata") { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-experiment"; - auto [schema, index_columns] = helper::create_arrow_schema(DIM_MAX); + auto [schema, index_columns] = helper::create_arrow_schema(DIM_MAX, false); SOMAExperiment::create( uri, std::move(schema), @@ -382,7 +388,7 @@ TEST_CASE("SOMAExperiment: metadata") { TEST_CASE("SOMAMeasurement: metadata") { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-measurement"; - auto [schema, index_columns] = helper::create_arrow_schema(DIM_MAX); + auto [schema, index_columns] = helper::create_arrow_schema(DIM_MAX, false); SOMAMeasurement::create( uri, std::move(schema), diff --git a/libtiledbsoma/test/unit_soma_dataframe.cc b/libtiledbsoma/test/unit_soma_dataframe.cc index 2b1b312cf8..77f36fae92 100644 --- a/libtiledbsoma/test/unit_soma_dataframe.cc +++ b/libtiledbsoma/test/unit_soma_dataframe.cc @@ -35,58 +35,65 @@ #define DIM_MAX 1000 TEST_CASE("SOMADataFrame: basic") { - auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-dataframe-basic"; - - REQUIRE(!SOMADataFrame::exists(uri, ctx)); - - auto [schema, index_columns] = helper::create_arrow_schema(DIM_MAX); - SOMADataFrame::create( - uri, - std::move(schema), - ArrowTable( - std::move(index_columns.first), std::move(index_columns.second)), - ctx); - - REQUIRE(SOMADataFrame::exists(uri, ctx)); - REQUIRE(!SOMASparseNDArray::exists(uri, ctx)); - REQUIRE(!SOMADenseNDArray::exists(uri, ctx)); - - auto soma_dataframe = SOMADataFrame::open(uri, OpenMode::read, ctx); - REQUIRE(soma_dataframe->uri() == uri); - REQUIRE(soma_dataframe->ctx() == ctx); - REQUIRE(soma_dataframe->type() == "SOMADataFrame"); - std::vector expected_index_column_names = {"d0"}; - REQUIRE( - soma_dataframe->index_column_names() == expected_index_column_names); - REQUIRE(soma_dataframe->count() == 0); - soma_dataframe->close(); - - std::vector d0(10); - for (int j = 0; j < 10; j++) - d0[j] = j; - std::vector a0(10, 1); - - soma_dataframe = SOMADataFrame::open(uri, OpenMode::write, ctx); - soma_dataframe->set_column_data("a0", a0.size(), a0.data()); - soma_dataframe->set_column_data("d0", d0.size(), d0.data()); - soma_dataframe->write(); - soma_dataframe->close(); - - soma_dataframe = SOMADataFrame::open(uri, OpenMode::read, ctx); - while (auto batch = soma_dataframe->read_next()) { - auto arrbuf = batch.value(); - auto d0span = arrbuf->at("d0")->data(); - auto a0span = arrbuf->at("a0")->data(); - REQUIRE(d0 == std::vector(d0span.begin(), d0span.end())); - REQUIRE(a0 == std::vector(a0span.begin(), a0span.end())); - } - soma_dataframe->close(); + bool use_current_domains[] = {false, true}; + int64_t dim_max = 1000; + for (bool use_current_domain : use_current_domains) { + auto ctx = std::make_shared(); + std::string uri = "mem://unit-test-dataframe-basic"; + + REQUIRE(!SOMADataFrame::exists(uri, ctx)); + + auto [schema, index_columns] = helper::create_arrow_schema( + dim_max, use_current_domain); + SOMADataFrame::create( + uri, + std::move(schema), + ArrowTable( + std::move(index_columns.first), + std::move(index_columns.second)), + ctx); + + REQUIRE(SOMADataFrame::exists(uri, ctx)); + REQUIRE(!SOMASparseNDArray::exists(uri, ctx)); + REQUIRE(!SOMADenseNDArray::exists(uri, ctx)); + + auto soma_dataframe = SOMADataFrame::open(uri, OpenMode::read, ctx); + REQUIRE(soma_dataframe->uri() == uri); + REQUIRE(soma_dataframe->ctx() == ctx); + REQUIRE(soma_dataframe->type() == "SOMADataFrame"); + std::vector expected_index_column_names = {"d0"}; + REQUIRE( + soma_dataframe->index_column_names() == + expected_index_column_names); + REQUIRE(soma_dataframe->count() == 0); + soma_dataframe->close(); - auto soma_object = SOMAObject::open(uri, OpenMode::read, ctx); - REQUIRE(soma_object->uri() == uri); - REQUIRE(soma_object->type() == "SOMADataFrame"); - soma_object->close(); + std::vector d0(10); + for (int j = 0; j < 10; j++) + d0[j] = j; + std::vector a0(10, 1); + + soma_dataframe = SOMADataFrame::open(uri, OpenMode::write, ctx); + soma_dataframe->set_column_data("a0", a0.size(), a0.data()); + soma_dataframe->set_column_data("d0", d0.size(), d0.data()); + soma_dataframe->write(); + soma_dataframe->close(); + + soma_dataframe = SOMADataFrame::open(uri, OpenMode::read, ctx); + while (auto batch = soma_dataframe->read_next()) { + auto arrbuf = batch.value(); + auto d0span = arrbuf->at("d0")->data(); + auto a0span = arrbuf->at("a0")->data(); + REQUIRE(d0 == std::vector(d0span.begin(), d0span.end())); + REQUIRE(a0 == std::vector(a0span.begin(), a0span.end())); + } + soma_dataframe->close(); + + auto soma_object = SOMAObject::open(uri, OpenMode::read, ctx); + REQUIRE(soma_object->uri() == uri); + REQUIRE(soma_object->type() == "SOMADataFrame"); + soma_object->close(); + } } TEST_CASE("SOMADataFrame: platform_config") { @@ -138,19 +145,73 @@ TEST_CASE("SOMADataFrame: platform_config") { section << "- filter=" << filter.first; SECTION(section.str()) { - auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-dataframe-platform-config"; - - PlatformConfig platform_config; - platform_config.dataframe_dim_zstd_level = 6; - platform_config.offsets_filters = R"([)" + filter.first + R"(])"; - platform_config.validity_filters = R"([)" + filter.first + R"(])"; - if (filter.second != TILEDB_FILTER_WEBP) { - platform_config.attrs = R"({"a0": {"filters":[)" + filter.first + - R"(]}})"; + bool use_current_domains[] = {false, true}; + int64_t dim_max = 1000; + + for (bool use_current_domain : use_current_domains) { + auto ctx = std::make_shared(); + std::string uri = "mem://unit-test-dataframe-platform-config"; + + PlatformConfig platform_config; + platform_config.dataframe_dim_zstd_level = 6; + platform_config.offsets_filters = R"([)" + filter.first + R"(])"; + platform_config.validity_filters = R"([)" + filter.first + R"(])"; + if (filter.second != TILEDB_FILTER_WEBP) { + platform_config.attrs = R"({"a0": {"filters":[)" + + filter.first + R"(]}})"; + } + + auto [schema, index_columns] = helper::create_arrow_schema( + dim_max, use_current_domain); + SOMADataFrame::create( + uri, + std::move(schema), + ArrowTable( + std::move(index_columns.first), + std::move(index_columns.second)), + ctx, + platform_config); + + auto soma_dataframe = SOMADataFrame::open(uri, OpenMode::read, ctx); + auto sch = soma_dataframe->tiledb_schema(); + REQUIRE( + sch->offsets_filter_list().filter(0).filter_type() == + filter.second); + + REQUIRE( + sch->validity_filter_list().filter(0).filter_type() == + filter.second); + + auto dim_filter = sch->domain() + .dimension("d0") + .filter_list() + .filter(0); + REQUIRE(dim_filter.filter_type() == TILEDB_FILTER_ZSTD); + REQUIRE( + dim_filter.get_option(TILEDB_COMPRESSION_LEVEL) == 6); + + if (filter.second != TILEDB_FILTER_WEBP) { + REQUIRE( + sch->attribute("a0") + .filter_list() + .filter(0) + .filter_type() == filter.second); + } + soma_dataframe->close(); } + } +} - auto [schema, index_columns] = helper::create_arrow_schema(DIM_MAX); +TEST_CASE("SOMADataFrame: metadata") { + bool use_current_domains[] = {false, true}; + int64_t dim_max = 1000; + + for (bool use_current_domain : use_current_domains) { + auto ctx = std::make_shared(); + std::string uri = "mem://unit-test-collection"; + + auto [schema, index_columns] = helper::create_arrow_schema( + dim_max, use_current_domain); SOMADataFrame::create( uri, std::move(schema), @@ -158,93 +219,62 @@ TEST_CASE("SOMADataFrame: platform_config") { std::move(index_columns.first), std::move(index_columns.second)), ctx, - platform_config); + PlatformConfig(), + TimestampRange(0, 2)); - auto soma_dataframe = SOMADataFrame::open(uri, OpenMode::read, ctx); - auto sch = soma_dataframe->tiledb_schema(); - REQUIRE( - sch->offsets_filter_list().filter(0).filter_type() == - filter.second); + auto soma_dataframe = SOMADataFrame::open( + uri, + OpenMode::write, + ctx, + {}, + ResultOrder::automatic, + TimestampRange(1, 1)); + + int32_t val = 100; + soma_dataframe->set_metadata("md", TILEDB_INT32, 1, &val); + soma_dataframe->close(); + // Read metadata + soma_dataframe->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(soma_dataframe->metadata_num() == 3); + REQUIRE(soma_dataframe->has_metadata("soma_object_type")); + REQUIRE(soma_dataframe->has_metadata("soma_encoding_version")); + REQUIRE(soma_dataframe->has_metadata("md")); + auto mdval = soma_dataframe->get_metadata("md"); + REQUIRE(std::get(*mdval) == TILEDB_INT32); + REQUIRE(std::get(*mdval) == 1); REQUIRE( - sch->validity_filter_list().filter(0).filter_type() == - filter.second); + *((const int32_t*)std::get(*mdval)) == 100); + soma_dataframe->close(); - auto dim_filter = sch->domain().dimension("d0").filter_list().filter(0); - REQUIRE(dim_filter.filter_type() == TILEDB_FILTER_ZSTD); - REQUIRE(dim_filter.get_option(TILEDB_COMPRESSION_LEVEL) == 6); + // md should not be available at (2, 2) + soma_dataframe->open(OpenMode::read, TimestampRange(2, 2)); + REQUIRE(soma_dataframe->metadata_num() == 2); + REQUIRE(soma_dataframe->has_metadata("soma_object_type")); + REQUIRE(soma_dataframe->has_metadata("soma_encoding_version")); + REQUIRE(!soma_dataframe->has_metadata("md")); + soma_dataframe->close(); - if (filter.second != TILEDB_FILTER_WEBP) { - REQUIRE( - sch->attribute("a0").filter_list().filter(0).filter_type() == - filter.second); - } + // Metadata should also be retrievable in write mode + soma_dataframe->open(OpenMode::write, TimestampRange(0, 2)); + REQUIRE(soma_dataframe->metadata_num() == 3); + REQUIRE(soma_dataframe->has_metadata("soma_object_type")); + REQUIRE(soma_dataframe->has_metadata("soma_encoding_version")); + REQUIRE(soma_dataframe->has_metadata("md")); + mdval = soma_dataframe->get_metadata("md"); + REQUIRE( + *((const int32_t*)std::get(*mdval)) == 100); + + // Delete and have it reflected when reading metadata while in write + // mode + soma_dataframe->delete_metadata("md"); + mdval = soma_dataframe->get_metadata("md"); + REQUIRE(!mdval.has_value()); soma_dataframe->close(); - } -} -TEST_CASE("SOMADataFrame: metadata") { - auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-collection"; - auto [schema, index_columns] = helper::create_arrow_schema(DIM_MAX); - SOMADataFrame::create( - uri, - std::move(schema), - ArrowTable( - std::move(index_columns.first), std::move(index_columns.second)), - ctx, - PlatformConfig(), - TimestampRange(0, 2)); - - auto soma_dataframe = SOMADataFrame::open( - uri, - OpenMode::write, - ctx, - {}, - ResultOrder::automatic, - TimestampRange(1, 1)); - - int32_t val = 100; - soma_dataframe->set_metadata("md", TILEDB_INT32, 1, &val); - soma_dataframe->close(); - - // Read metadata - soma_dataframe->open(OpenMode::read, TimestampRange(0, 2)); - REQUIRE(soma_dataframe->metadata_num() == 3); - REQUIRE(soma_dataframe->has_metadata("soma_object_type")); - REQUIRE(soma_dataframe->has_metadata("soma_encoding_version")); - REQUIRE(soma_dataframe->has_metadata("md")); - auto mdval = soma_dataframe->get_metadata("md"); - REQUIRE(std::get(*mdval) == TILEDB_INT32); - REQUIRE(std::get(*mdval) == 1); - REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); - soma_dataframe->close(); - - // md should not be available at (2, 2) - soma_dataframe->open(OpenMode::read, TimestampRange(2, 2)); - REQUIRE(soma_dataframe->metadata_num() == 2); - REQUIRE(soma_dataframe->has_metadata("soma_object_type")); - REQUIRE(soma_dataframe->has_metadata("soma_encoding_version")); - REQUIRE(!soma_dataframe->has_metadata("md")); - soma_dataframe->close(); - - // Metadata should also be retrievable in write mode - soma_dataframe->open(OpenMode::write, TimestampRange(0, 2)); - REQUIRE(soma_dataframe->metadata_num() == 3); - REQUIRE(soma_dataframe->has_metadata("soma_object_type")); - REQUIRE(soma_dataframe->has_metadata("soma_encoding_version")); - REQUIRE(soma_dataframe->has_metadata("md")); - mdval = soma_dataframe->get_metadata("md"); - REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); - - // Delete and have it reflected when reading metadata while in write mode - soma_dataframe->delete_metadata("md"); - mdval = soma_dataframe->get_metadata("md"); - REQUIRE(!mdval.has_value()); - soma_dataframe->close(); - - // Confirm delete in read mode - soma_dataframe->open(OpenMode::read, TimestampRange(0, 2)); - REQUIRE(!soma_dataframe->has_metadata("md")); - REQUIRE(soma_dataframe->metadata_num() == 2); + // Confirm delete in read mode + soma_dataframe->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(!soma_dataframe->has_metadata("md")); + REQUIRE(soma_dataframe->metadata_num() == 2); + } } diff --git a/libtiledbsoma/test/unit_soma_dense_ndarray.cc b/libtiledbsoma/test/unit_soma_dense_ndarray.cc index 2105f380cd..c52d1fe235 100644 --- a/libtiledbsoma/test/unit_soma_dense_ndarray.cc +++ b/libtiledbsoma/test/unit_soma_dense_ndarray.cc @@ -35,149 +35,170 @@ #define DIM_MAX 1000 TEST_CASE("SOMADenseNDArray: basic") { - auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-dense-ndarray-basic"; - - REQUIRE(!SOMADenseNDArray::exists(uri, ctx)); - - auto index_columns = helper::create_column_index_info(DIM_MAX); - SOMADenseNDArray::create( - uri, - "l", - ArrowTable( - std::move(index_columns.first), std::move(index_columns.second)), - ctx, - PlatformConfig(), - TimestampRange(0, 2)); - - REQUIRE(SOMADenseNDArray::exists(uri, ctx)); - REQUIRE(!SOMADataFrame::exists(uri, ctx)); - REQUIRE(!SOMASparseNDArray::exists(uri, ctx)); - - auto soma_dense = SOMADenseNDArray::open(uri, OpenMode::read, ctx); - REQUIRE(soma_dense->uri() == uri); - REQUIRE(soma_dense->ctx() == ctx); - REQUIRE(soma_dense->type() == "SOMADenseNDArray"); - REQUIRE(soma_dense->is_sparse() == false); - REQUIRE(soma_dense->soma_data_type() == "l"); - auto schema = soma_dense->tiledb_schema(); - REQUIRE(schema->has_attribute("soma_data")); - REQUIRE(schema->array_type() == TILEDB_DENSE); - REQUIRE(schema->domain().has_dimension("soma_dim_0")); - REQUIRE(soma_dense->ndim() == 1); - REQUIRE(soma_dense->shape() == std::vector{1001}); - soma_dense->close(); - - std::vector d0{1, 10}; - std::vector a0(10, 1); - - soma_dense->open(OpenMode::write); - soma_dense->set_column_data("soma_data", a0.size(), a0.data()); - soma_dense->set_column_data("soma_dim_0", d0.size(), d0.data()); - soma_dense->write(); - soma_dense->close(); - - soma_dense->open(OpenMode::read); - while (auto batch = soma_dense->read_next()) { - auto arrbuf = batch.value(); - auto a0span = arrbuf->at("soma_data")->data(); - REQUIRE(a0 == std::vector(a0span.begin(), a0span.end())); + int64_t dim_max = 1000; + bool use_current_domains[] = {false, true}; + for (bool use_current_domain : use_current_domains) { + auto ctx = std::make_shared(); + std::string uri = "mem://unit-test-dense-ndarray-basic"; + + REQUIRE(!SOMADenseNDArray::exists(uri, ctx)); + + auto index_columns = helper::create_column_index_info( + dim_max, use_current_domain); + SOMADenseNDArray::create( + uri, + "l", + ArrowTable( + std::move(index_columns.first), + std::move(index_columns.second)), + ctx, + PlatformConfig(), + TimestampRange(0, 2)); + + REQUIRE(SOMADenseNDArray::exists(uri, ctx)); + REQUIRE(!SOMADataFrame::exists(uri, ctx)); + REQUIRE(!SOMASparseNDArray::exists(uri, ctx)); + + auto soma_dense = SOMADenseNDArray::open(uri, OpenMode::read, ctx); + REQUIRE(soma_dense->uri() == uri); + REQUIRE(soma_dense->ctx() == ctx); + REQUIRE(soma_dense->type() == "SOMADenseNDArray"); + REQUIRE(soma_dense->is_sparse() == false); + REQUIRE(soma_dense->soma_data_type() == "l"); + auto schema = soma_dense->tiledb_schema(); + REQUIRE(schema->has_attribute("soma_data")); + REQUIRE(schema->array_type() == TILEDB_DENSE); + REQUIRE(schema->domain().has_dimension("soma_dim_0")); + REQUIRE(soma_dense->ndim() == 1); + REQUIRE(soma_dense->shape() == std::vector{dim_max + 1}); + soma_dense->close(); + + std::vector d0{1, 10}; + std::vector a0(10, 1); + + soma_dense->open(OpenMode::write); + soma_dense->set_column_data("soma_data", a0.size(), a0.data()); + soma_dense->set_column_data("soma_dim_0", d0.size(), d0.data()); + soma_dense->write(); + soma_dense->close(); + + soma_dense->open(OpenMode::read); + while (auto batch = soma_dense->read_next()) { + auto arrbuf = batch.value(); + auto a0span = arrbuf->at("soma_data")->data(); + REQUIRE(a0 == std::vector(a0span.begin(), a0span.end())); + } + soma_dense->close(); } - soma_dense->close(); } TEST_CASE("SOMADenseNDArray: platform_config") { - auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-dataframe-platform-config"; - - PlatformConfig platform_config; - platform_config.dense_nd_array_dim_zstd_level = 6; - - auto index_columns = helper::create_column_index_info(DIM_MAX); - SOMADenseNDArray::create( - uri, - "l", - ArrowTable( - std::move(index_columns.first), std::move(index_columns.second)), - ctx, - platform_config); - - auto soma_dataframe = SOMADenseNDArray::open(uri, OpenMode::read, ctx); - auto dim_filter = soma_dataframe->tiledb_schema() - ->domain() - .dimension("soma_dim_0") - .filter_list() - .filter(0); - REQUIRE(dim_filter.filter_type() == TILEDB_FILTER_ZSTD); - REQUIRE(dim_filter.get_option(TILEDB_COMPRESSION_LEVEL) == 6); - - soma_dataframe->close(); + int64_t dim_max = 1000; + bool use_current_domains[] = {false, true}; + for (bool use_current_domain : use_current_domains) { + auto ctx = std::make_shared(); + std::string uri = "mem://unit-test-dataframe-platform-config"; + + PlatformConfig platform_config; + platform_config.dense_nd_array_dim_zstd_level = 6; + + auto index_columns = helper::create_column_index_info( + dim_max, use_current_domain); + SOMADenseNDArray::create( + uri, + "l", + ArrowTable( + std::move(index_columns.first), + std::move(index_columns.second)), + ctx, + platform_config); + + auto soma_dataframe = SOMADenseNDArray::open(uri, OpenMode::read, ctx); + auto dim_filter = soma_dataframe->tiledb_schema() + ->domain() + .dimension("soma_dim_0") + .filter_list() + .filter(0); + REQUIRE(dim_filter.filter_type() == TILEDB_FILTER_ZSTD); + REQUIRE(dim_filter.get_option(TILEDB_COMPRESSION_LEVEL) == 6); + + soma_dataframe->close(); + } } TEST_CASE("SOMADenseNDArray: metadata") { - auto ctx = std::make_shared(); - - std::string uri = "mem://unit-test-dense-ndarray"; - - auto index_columns = helper::create_column_index_info(DIM_MAX); - SOMASparseNDArray::create( - uri, - "l", - ArrowTable( - std::move(index_columns.first), std::move(index_columns.second)), - ctx, - PlatformConfig(), - TimestampRange(0, 2)); - - auto soma_dense = SOMADenseNDArray::open( - uri, - OpenMode::write, - ctx, - {}, - ResultOrder::automatic, - std::pair(1, 1)); - - int32_t val = 100; - soma_dense->set_metadata("md", TILEDB_INT32, 1, &val); - soma_dense->close(); - - // Read metadata - soma_dense->open(OpenMode::read, TimestampRange(0, 2)); - REQUIRE(soma_dense->metadata_num() == 3); - REQUIRE(soma_dense->has_metadata("soma_object_type")); - REQUIRE(soma_dense->has_metadata("soma_encoding_version")); - REQUIRE(soma_dense->has_metadata("md")); - auto mdval = soma_dense->get_metadata("md"); - REQUIRE(std::get(*mdval) == TILEDB_INT32); - REQUIRE(std::get(*mdval) == 1); - REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); - soma_dense->close(); - - // md should not be available at (2, 2) - soma_dense->open(OpenMode::read, TimestampRange(2, 2)); - REQUIRE(soma_dense->metadata_num() == 2); - REQUIRE(soma_dense->has_metadata("soma_object_type")); - REQUIRE(soma_dense->has_metadata("soma_encoding_version")); - REQUIRE(!soma_dense->has_metadata("md")); - soma_dense->close(); - - // Metadata should also be retrievable in write mode - soma_dense->open(OpenMode::write, TimestampRange(0, 2)); - REQUIRE(soma_dense->metadata_num() == 3); - REQUIRE(soma_dense->has_metadata("soma_object_type")); - REQUIRE(soma_dense->has_metadata("soma_encoding_version")); - REQUIRE(soma_dense->has_metadata("md")); - mdval = soma_dense->get_metadata("md"); - REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); - - // Delete and have it reflected when reading metadata while in write mode - soma_dense->delete_metadata("md"); - mdval = soma_dense->get_metadata("md"); - REQUIRE(!mdval.has_value()); - soma_dense->close(); - - // Confirm delete in read mode - soma_dense->open(OpenMode::read, TimestampRange(0, 2)); - REQUIRE(!soma_dense->has_metadata("md")); - REQUIRE(soma_dense->metadata_num() == 2); + int64_t dim_max = 1000; + bool use_current_domains[] = {false, true}; + for (bool use_current_domain : use_current_domains) { + auto ctx = std::make_shared(); + + std::string uri = "mem://unit-test-dense-ndarray"; + + auto index_columns = helper::create_column_index_info( + dim_max, use_current_domain); + SOMASparseNDArray::create( + uri, + "l", + ArrowTable( + std::move(index_columns.first), + std::move(index_columns.second)), + ctx, + PlatformConfig(), + TimestampRange(0, 2)); + + auto soma_dense = SOMADenseNDArray::open( + uri, + OpenMode::write, + ctx, + {}, + ResultOrder::automatic, + std::pair(1, 1)); + + int32_t val = 100; + soma_dense->set_metadata("md", TILEDB_INT32, 1, &val); + soma_dense->close(); + + // Read metadata + soma_dense->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(soma_dense->metadata_num() == 3); + REQUIRE(soma_dense->has_metadata("soma_object_type")); + REQUIRE(soma_dense->has_metadata("soma_encoding_version")); + REQUIRE(soma_dense->has_metadata("md")); + auto mdval = soma_dense->get_metadata("md"); + REQUIRE(std::get(*mdval) == TILEDB_INT32); + REQUIRE(std::get(*mdval) == 1); + REQUIRE( + *((const int32_t*)std::get(*mdval)) == 100); + soma_dense->close(); + + // md should not be available at (2, 2) + soma_dense->open(OpenMode::read, TimestampRange(2, 2)); + REQUIRE(soma_dense->metadata_num() == 2); + REQUIRE(soma_dense->has_metadata("soma_object_type")); + REQUIRE(soma_dense->has_metadata("soma_encoding_version")); + REQUIRE(!soma_dense->has_metadata("md")); + soma_dense->close(); + + // Metadata should also be retrievable in write mode + soma_dense->open(OpenMode::write, TimestampRange(0, 2)); + REQUIRE(soma_dense->metadata_num() == 3); + REQUIRE(soma_dense->has_metadata("soma_object_type")); + REQUIRE(soma_dense->has_metadata("soma_encoding_version")); + REQUIRE(soma_dense->has_metadata("md")); + mdval = soma_dense->get_metadata("md"); + REQUIRE( + *((const int32_t*)std::get(*mdval)) == 100); + + // Delete and have it reflected when reading metadata while in write + // mode + soma_dense->delete_metadata("md"); + mdval = soma_dense->get_metadata("md"); + REQUIRE(!mdval.has_value()); + soma_dense->close(); + + // Confirm delete in read mode + soma_dense->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(!soma_dense->has_metadata("md")); + REQUIRE(soma_dense->metadata_num() == 2); + } } diff --git a/libtiledbsoma/test/unit_soma_sparse_ndarray.cc b/libtiledbsoma/test/unit_soma_sparse_ndarray.cc index 2b100ad9d5..7b933e5e44 100644 --- a/libtiledbsoma/test/unit_soma_sparse_ndarray.cc +++ b/libtiledbsoma/test/unit_soma_sparse_ndarray.cc @@ -34,153 +34,185 @@ #define DIM_MAX 1000 TEST_CASE("SOMASparseNDArray: basic") { - auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-sparse-ndarray-basic"; - - REQUIRE(!SOMASparseNDArray::exists(uri, ctx)); - - auto index_columns = helper::create_column_index_info(DIM_MAX); - SOMASparseNDArray::create( - uri, - "l", - ArrowTable( - std::move(index_columns.first), std::move(index_columns.second)), - ctx, - PlatformConfig(), - TimestampRange(0, 2)); - - REQUIRE(SOMASparseNDArray::exists(uri, ctx)); - REQUIRE(!SOMADataFrame::exists(uri, ctx)); - REQUIRE(!SOMADenseNDArray::exists(uri, ctx)); - - auto soma_sparse = SOMASparseNDArray::open(uri, OpenMode::read, ctx); - REQUIRE(soma_sparse->uri() == uri); - REQUIRE(soma_sparse->ctx() == ctx); - REQUIRE(soma_sparse->type() == "SOMASparseNDArray"); - REQUIRE(soma_sparse->is_sparse() == true); - REQUIRE(soma_sparse->soma_data_type() == "l"); - auto schema = soma_sparse->tiledb_schema(); - REQUIRE(schema->has_attribute("soma_data")); - REQUIRE(schema->array_type() == TILEDB_SPARSE); - REQUIRE(schema->domain().has_dimension("soma_dim_0")); - REQUIRE(soma_sparse->ndim() == 1); - REQUIRE(soma_sparse->nnz() == 0); - soma_sparse->close(); - - std::vector d0(10); - for (int j = 0; j < 10; j++) - d0[j] = j; - std::vector a0(10, 1); - - soma_sparse->open(OpenMode::write); - soma_sparse->set_column_data("soma_data", a0.size(), a0.data()); - soma_sparse->set_column_data("soma_dim_0", d0.size(), d0.data()); - soma_sparse->write(); - soma_sparse->close(); - - soma_sparse->open(OpenMode::read); - while (auto batch = soma_sparse->read_next()) { - auto arrbuf = batch.value(); - auto d0span = arrbuf->at("soma_dim_0")->data(); - auto a0span = arrbuf->at("soma_data")->data(); - REQUIRE(d0 == std::vector(d0span.begin(), d0span.end())); - REQUIRE(a0 == std::vector(a0span.begin(), a0span.end())); + int64_t dim_max = 1000; + bool use_current_domains[] = {false, true}; + for (bool use_current_domain : use_current_domains) { + auto ctx = std::make_shared(); + std::string uri = "mem://unit-test-sparse-ndarray-basic"; + + REQUIRE(!SOMASparseNDArray::exists(uri, ctx)); + + auto index_columns = helper::create_column_index_info( + dim_max, use_current_domain); + SOMASparseNDArray::create( + uri, + "l", + ArrowTable( + std::move(index_columns.first), + std::move(index_columns.second)), + ctx, + PlatformConfig(), + TimestampRange(0, 2)); + + REQUIRE(SOMASparseNDArray::exists(uri, ctx)); + REQUIRE(!SOMADataFrame::exists(uri, ctx)); + REQUIRE(!SOMADenseNDArray::exists(uri, ctx)); + + auto soma_sparse = SOMASparseNDArray::open(uri, OpenMode::read, ctx); + REQUIRE(soma_sparse->uri() == uri); + REQUIRE(soma_sparse->ctx() == ctx); + REQUIRE(soma_sparse->type() == "SOMASparseNDArray"); + REQUIRE(soma_sparse->is_sparse() == true); + REQUIRE(soma_sparse->soma_data_type() == "l"); + auto schema = soma_sparse->tiledb_schema(); + REQUIRE(schema->has_attribute("soma_data")); + REQUIRE(schema->array_type() == TILEDB_SPARSE); + REQUIRE(schema->domain().has_dimension("soma_dim_0")); + REQUIRE(soma_sparse->ndim() == 1); + REQUIRE(soma_sparse->nnz() == 0); + + if (use_current_domain) { + REQUIRE(soma_sparse->shape() == std::vector{dim_max + 1}); + } + + soma_sparse->close(); + + std::vector d0(10); + for (int j = 0; j < 10; j++) + d0[j] = j; + std::vector a0(10, 1); + + soma_sparse->open(OpenMode::write); + soma_sparse->set_column_data("soma_data", a0.size(), a0.data()); + soma_sparse->set_column_data("soma_dim_0", d0.size(), d0.data()); + soma_sparse->write(); + soma_sparse->close(); + + soma_sparse->open(OpenMode::read); + while (auto batch = soma_sparse->read_next()) { + auto arrbuf = batch.value(); + auto d0span = arrbuf->at("soma_dim_0")->data(); + auto a0span = arrbuf->at("soma_data")->data(); + REQUIRE(d0 == std::vector(d0span.begin(), d0span.end())); + REQUIRE(a0 == std::vector(a0span.begin(), a0span.end())); + } + + // TODO on a subsequent PR if use_current_domain: + // * test write out of bounds, including assertion of exception type + // * test resize + // * test write within new bounds + + soma_sparse->close(); } - soma_sparse->close(); } TEST_CASE("SOMASparseNDArray: platform_config") { - auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-dataframe-platform-config"; - - PlatformConfig platform_config; - platform_config.sparse_nd_array_dim_zstd_level = 6; - - auto index_columns = helper::create_column_index_info(DIM_MAX); - SOMASparseNDArray::create( - uri, - "l", - ArrowTable( - std::move(index_columns.first), std::move(index_columns.second)), - ctx, - platform_config); - - auto soma_dataframe = SOMASparseNDArray::open(uri, OpenMode::read, ctx); - auto dim_filter = soma_dataframe->tiledb_schema() - ->domain() - .dimension("soma_dim_0") - .filter_list() - .filter(0); - REQUIRE(dim_filter.filter_type() == TILEDB_FILTER_ZSTD); - REQUIRE(dim_filter.get_option(TILEDB_COMPRESSION_LEVEL) == 6); - - soma_dataframe->close(); + int64_t dim_max = 1000; + bool use_current_domains[] = {false, true}; + for (bool use_current_domain : use_current_domains) { + auto ctx = std::make_shared(); + std::string uri = "mem://unit-test-dataframe-platform-config"; + + PlatformConfig platform_config; + platform_config.sparse_nd_array_dim_zstd_level = 6; + + auto index_columns = helper::create_column_index_info( + dim_max, use_current_domain); + SOMASparseNDArray::create( + uri, + "l", + ArrowTable( + std::move(index_columns.first), + std::move(index_columns.second)), + ctx, + platform_config); + + auto soma_dataframe = SOMASparseNDArray::open(uri, OpenMode::read, ctx); + auto dim_filter = soma_dataframe->tiledb_schema() + ->domain() + .dimension("soma_dim_0") + .filter_list() + .filter(0); + REQUIRE(dim_filter.filter_type() == TILEDB_FILTER_ZSTD); + REQUIRE(dim_filter.get_option(TILEDB_COMPRESSION_LEVEL) == 6); + + soma_dataframe->close(); + } } TEST_CASE("SOMASparseNDArray: metadata") { - auto ctx = std::make_shared(); - - std::string uri = "mem://unit-test-sparse-ndarray"; - - auto index_columns = helper::create_column_index_info(DIM_MAX); - SOMASparseNDArray::create( - uri, - "l", - ArrowTable( - std::move(index_columns.first), std::move(index_columns.second)), - ctx, - PlatformConfig(), - TimestampRange(0, 2)); - - auto soma_sparse = SOMASparseNDArray::open( - uri, - OpenMode::write, - ctx, - {}, - ResultOrder::automatic, - std::pair(1, 1)); - - int32_t val = 100; - soma_sparse->set_metadata("md", TILEDB_INT32, 1, &val); - soma_sparse->close(); - - // Read metadata - soma_sparse->open(OpenMode::read, TimestampRange(0, 2)); - REQUIRE(soma_sparse->metadata_num() == 3); - REQUIRE(soma_sparse->has_metadata("soma_object_type")); - REQUIRE(soma_sparse->has_metadata("soma_encoding_version")); - REQUIRE(soma_sparse->has_metadata("md")); - auto mdval = soma_sparse->get_metadata("md"); - REQUIRE(std::get(*mdval) == TILEDB_INT32); - REQUIRE(std::get(*mdval) == 1); - REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); - soma_sparse->close(); - - // md should not be available at (2, 2) - soma_sparse->open(OpenMode::read, TimestampRange(2, 2)); - REQUIRE(soma_sparse->metadata_num() == 2); - REQUIRE(soma_sparse->has_metadata("soma_object_type")); - REQUIRE(soma_sparse->has_metadata("soma_encoding_version")); - REQUIRE(!soma_sparse->has_metadata("md")); - soma_sparse->close(); - - // Metadata should also be retrievable in write mode - soma_sparse->open(OpenMode::write, TimestampRange(0, 2)); - REQUIRE(soma_sparse->metadata_num() == 3); - REQUIRE(soma_sparse->has_metadata("soma_object_type")); - REQUIRE(soma_sparse->has_metadata("soma_encoding_version")); - REQUIRE(soma_sparse->has_metadata("md")); - mdval = soma_sparse->get_metadata("md"); - REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); - - // Delete and have it reflected when reading metadata while in write mode - soma_sparse->delete_metadata("md"); - mdval = soma_sparse->get_metadata("md"); - REQUIRE(!mdval.has_value()); - soma_sparse->close(); - - // Confirm delete in read mode - soma_sparse->open(OpenMode::read, TimestampRange(0, 2)); - REQUIRE(!soma_sparse->has_metadata("md")); - REQUIRE(soma_sparse->metadata_num() == 2); + int64_t dim_max = 1000; + bool use_current_domains[] = {false, true}; + for (bool use_current_domain : use_current_domains) { + auto ctx = std::make_shared(); + + std::string uri = "mem://unit-test-sparse-ndarray"; + + auto index_columns = helper::create_column_index_info( + dim_max, use_current_domain); + SOMASparseNDArray::create( + uri, + "l", + ArrowTable( + std::move(index_columns.first), + std::move(index_columns.second)), + ctx, + PlatformConfig(), + TimestampRange(0, 2)); + + auto soma_sparse = SOMASparseNDArray::open( + uri, + OpenMode::write, + ctx, + {}, + ResultOrder::automatic, + std::pair(1, 1)); + + int32_t val = 100; + soma_sparse->set_metadata("md", TILEDB_INT32, 1, &val); + soma_sparse->close(); + + // Read metadata + soma_sparse->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(soma_sparse->metadata_num() == 3); + REQUIRE(soma_sparse->has_metadata("soma_object_type")); + REQUIRE(soma_sparse->has_metadata("soma_encoding_version")); + REQUIRE(soma_sparse->has_metadata("md")); + auto mdval = soma_sparse->get_metadata("md"); + REQUIRE(std::get(*mdval) == TILEDB_INT32); + REQUIRE(std::get(*mdval) == 1); + REQUIRE( + *((const int32_t*)std::get(*mdval)) == 100); + soma_sparse->close(); + + // md should not be available at (2, 2) + soma_sparse->open(OpenMode::read, TimestampRange(2, 2)); + REQUIRE(soma_sparse->metadata_num() == 2); + REQUIRE(soma_sparse->has_metadata("soma_object_type")); + REQUIRE(soma_sparse->has_metadata("soma_encoding_version")); + REQUIRE(!soma_sparse->has_metadata("md")); + soma_sparse->close(); + + // Metadata should also be retrievable in write mode + soma_sparse->open(OpenMode::write, TimestampRange(0, 2)); + REQUIRE(soma_sparse->metadata_num() == 3); + REQUIRE(soma_sparse->has_metadata("soma_object_type")); + REQUIRE(soma_sparse->has_metadata("soma_encoding_version")); + REQUIRE(soma_sparse->has_metadata("md")); + mdval = soma_sparse->get_metadata("md"); + REQUIRE( + *((const int32_t*)std::get(*mdval)) == 100); + + // Delete and have it reflected when reading metadata while in write + // mode + soma_sparse->delete_metadata("md"); + mdval = soma_sparse->get_metadata("md"); + REQUIRE(!mdval.has_value()); + soma_sparse->close(); + + // Confirm delete in read mode + soma_sparse->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(!soma_sparse->has_metadata("md")); + REQUIRE(soma_sparse->metadata_num() == 2); + } }