diff --git a/libtiledbsoma/src/soma/soma_array.h b/libtiledbsoma/src/soma/soma_array.h index 8e87f3b04a..790c8594bf 100644 --- a/libtiledbsoma/src/soma/soma_array.h +++ b/libtiledbsoma/src/soma/soma_array.h @@ -796,6 +796,13 @@ class SOMAArray : public SOMAObject { */ std::optional timestamp(); + /** + * Exposed for testing purposes. + */ + CurrentDomain get_current_domain() { + return _get_current_domain(); + } + private: //=================================================================== //= private non-static diff --git a/libtiledbsoma/src/utils/arrow_adapter.cc b/libtiledbsoma/src/utils/arrow_adapter.cc index 5a450e3e26..f456b94647 100644 --- a/libtiledbsoma/src/utils/arrow_adapter.cc +++ b/libtiledbsoma/src/utils/arrow_adapter.cc @@ -868,9 +868,8 @@ ArraySchema ArrowAdapter::tiledb_schema_from_arrow_schema( // nullptr) // // Fortunately, these are ASCII dims and we can range - // these accordingly. These are minimum and maximum - // values, avoiding the extremes 0x00 and 0xff. - ndrect.set_range(col_name, "\x01", "\xfe"); + // these accordingly. + ndrect.set_range(col_name, "", "\xff"); } else { const void* buff = index_column_array->children[i] ->buffers[1]; diff --git a/libtiledbsoma/test/common.cc b/libtiledbsoma/test/common.cc index f8a3b5cdaa..3fdecfc76f 100644 --- a/libtiledbsoma/test/common.cc +++ b/libtiledbsoma/test/common.cc @@ -237,7 +237,7 @@ static std::unique_ptr _create_index_cols_info_array( std::memcpy((void*)dim_array->buffers[1], vsrc, nbytes); } else { // domain small; current_domain feature not being used - int64_t dom[] = {0, info.dim_max, 1}; + uint32_t dom[] = {0, (uint32_t)info.dim_max, 1}; void* vsrc = (void*)&dom[0]; std::memcpy((void*)dim_array->buffers[1], vsrc, nbytes); } diff --git a/libtiledbsoma/test/unit_soma_dataframe.cc b/libtiledbsoma/test/unit_soma_dataframe.cc index bd75350caa..540020cb91 100644 --- a/libtiledbsoma/test/unit_soma_dataframe.cc +++ b/libtiledbsoma/test/unit_soma_dataframe.cc @@ -32,56 +32,187 @@ #include "common.h" -#define DIM_MAX 1000 +// This is a keystroke-reduction fixture for some similar unit-test cases For +// convenience there are dims/attrs of type int64, uint32, and string. (Feel +// free to add more types.) The main value-adds of this fixture are (a) simple +// keystroke-reduction; (b) you get to pick which ones are the dim(s) and which +// are the attr(s). +struct VariouslyIndexedDataFrameFixture { + std::shared_ptr ctx_; + std::string uri_; + bool use_current_domain_; + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + // Using Catch2's TEST_CASE_METHOD we can't pass constructor args. + // This is a call-after-construction method. + void set_up(std::shared_ptr ctx, std::string uri) { + ctx_ = ctx; + uri_ = uri; + } -TEST_CASE("SOMADataFrame: basic") { - int64_t dim_max = 1000; - auto use_current_domain = GENERATE(false, true); - // TODO this could be formatted with fmt::format which is part of internal - // header spd/log/fmt/fmt.h and should not be used. In C++20, this can be - // replaced with std::format. - std::ostringstream section; - section << "- use_current_domain=" << use_current_domain; - SECTION(section.str()) { - auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-dataframe-basic"; - std::string dim_name = "d0"; - std::string attr_name = "a0"; - tiledb_datatype_t tiledb_datatype = TILEDB_INT64; - std::string arrow_format = helper::to_arrow_format(tiledb_datatype); + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + // Helpers for setting up dim/attr configs and data + static const inline int64_t i64_dim_max = 100; + static const inline int64_t u32_dim_max = 10000; + static const inline int64_t str_dim_max = 0; // not used for string dims + + static const inline std::string i64_name = "soma_joinid"; + static const inline std::string u32_name = "myuint32"; + static const inline std::string str_name = "mystring"; + + tiledb_datatype_t i64_datatype = TILEDB_INT64; + tiledb_datatype_t u32_datatype = TILEDB_UINT32; + tiledb_datatype_t str_datatype = TILEDB_STRING_ASCII; + + std::string i64_arrow_format = helper::to_arrow_format(i64_datatype); + std::string u32_arrow_format = helper::to_arrow_format(u32_datatype); + std::string attr_1_arrow_format = helper::to_arrow_format(str_datatype); + + helper::DimInfo i64_dim_info(bool use_current_domain) { + return helper::DimInfo( + {.name = i64_name, + .tiledb_datatype = i64_datatype, + .dim_max = i64_dim_max, + .use_current_domain = use_current_domain}); + } + helper::DimInfo u32_dim_info(bool use_current_domain) { + return helper::DimInfo( + {.name = u32_name, + .tiledb_datatype = u32_datatype, + .dim_max = u32_dim_max, + .use_current_domain = use_current_domain}); + } + helper::DimInfo str_dim_info(bool use_current_domain) { + return helper::DimInfo( + {.name = str_name, + .tiledb_datatype = str_datatype, + .dim_max = str_dim_max, + .use_current_domain = use_current_domain}); + } - REQUIRE(!SOMADataFrame::exists(uri, ctx)); + helper::AttrInfo i64_attr_info(std::string name = i64_name) { + return helper::AttrInfo( + {.name = name, .tiledb_datatype = i64_datatype}); + } + helper::AttrInfo u32_attr_info() { + return helper::AttrInfo( + {.name = u32_name, .tiledb_datatype = u32_datatype}); + } + helper::AttrInfo str_attr_info() { + return helper::AttrInfo( + {.name = str_name, .tiledb_datatype = str_datatype}); + } - std::vector dim_infos( - {{.name = dim_name, - .tiledb_datatype = tiledb_datatype, - .dim_max = dim_max, - .use_current_domain = use_current_domain}}); + std::vector make_i64_data() { + return std::vector({1, 2}); + } + std::vector make_u32_data() { + return std::vector({1234, 5678}); + } + std::vector make_str_data() { + return std::vector({"apple", "bat"}); + } - std::vector attr_infos( - {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + // Helper methods for create/open/write/etc. + void create( + const std::vector& dim_infos, + const std::vector& attr_infos) { auto [schema, index_columns] = helper::create_arrow_schema_and_index_columns( dim_infos, attr_infos); + SOMADataFrame::create( + uri_, + std::move(schema), + ArrowTable( + std::move(index_columns.first), + std::move(index_columns.second)), + ctx_); + } + void create( + const std::vector& dim_infos, + const std::vector& attr_infos, + const PlatformConfig& platform_config, + std::optional timestamp_range = std::nullopt) { + auto [schema, index_columns] = + helper::create_arrow_schema_and_index_columns( + dim_infos, attr_infos); SOMADataFrame::create( - uri, + uri_, std::move(schema), ArrowTable( std::move(index_columns.first), std::move(index_columns.second)), - ctx); + ctx_, + platform_config, + timestamp_range); + } - REQUIRE(SOMADataFrame::exists(uri, ctx)); - REQUIRE(!SOMASparseNDArray::exists(uri, ctx)); - REQUIRE(!SOMADenseNDArray::exists(uri, ctx)); + std::unique_ptr open( + OpenMode mode, + ResultOrder result_order = ResultOrder::automatic, + std::optional timestamp_range = std::nullopt) { + return SOMADataFrame::open( + uri_, + mode, + ctx_, + {}, // column_names + result_order, + timestamp_range); + } + + void write_generic_data() { + // No arguments -- for now. + // In a subsequent PR we'll vary writing in-bounds vs out of bounds. + auto soma_dataframe = SOMADataFrame::open(uri_, OpenMode::write, ctx_); + + auto i64_data = make_i64_data(); + auto u32_data = make_u32_data(); + auto str_data = make_str_data(); + + soma_dataframe->set_column_data( + i64_name, i64_data.size(), i64_data.data()); + soma_dataframe->set_column_data( + str_name, str_data.size(), str_data.data()); + soma_dataframe->set_column_data( + u32_name, u32_data.size(), u32_data.data()); + soma_dataframe->write(); + + soma_dataframe->close(); + } +}; + +TEST_CASE_METHOD(VariouslyIndexedDataFrameFixture, "SOMADataFrame: basic") { + auto use_current_domain = GENERATE(false, true); + // TODO this could be formatted with fmt::format which is part of internal + // header spd/log/fmt/fmt.h and should not be used. In C++20, this can be + // replaced with std::format. + std::ostringstream section; + section << "- use_current_domain=" << use_current_domain; + SECTION(section.str()) { + set_up( + std::make_shared(), "mem://unit-test-dataframe-basic"); - auto soma_dataframe = SOMADataFrame::open(uri, OpenMode::read, ctx); - REQUIRE(soma_dataframe->uri() == uri); - REQUIRE(soma_dataframe->ctx() == ctx); + std::vector dim_infos( + {i64_dim_info(use_current_domain)}); + std::vector attr_infos({u32_attr_info()}); + + REQUIRE(!SOMADataFrame::exists(uri_, ctx_)); + + create(dim_infos, attr_infos); + + REQUIRE(SOMADataFrame::exists(uri_, ctx_)); + REQUIRE(!SOMASparseNDArray::exists(uri_, ctx_)); + REQUIRE(!SOMADenseNDArray::exists(uri_, ctx_)); + + auto soma_dataframe = open(OpenMode::read); + REQUIRE(soma_dataframe->uri() == uri_); + REQUIRE(soma_dataframe->ctx() == ctx_); REQUIRE(soma_dataframe->type() == "SOMADataFrame"); - std::vector expected_index_column_names = {dim_name}; + std::vector expected_index_column_names = { + dim_infos[0].name}; REQUIRE( soma_dataframe->index_column_names() == expected_index_column_names); @@ -93,30 +224,33 @@ TEST_CASE("SOMADataFrame: basic") { d0[j] = j; std::vector a0(10, 1); - soma_dataframe = SOMADataFrame::open(uri, OpenMode::write, ctx); - soma_dataframe->set_column_data(attr_name, a0.size(), a0.data()); - soma_dataframe->set_column_data(dim_name, d0.size(), d0.data()); + soma_dataframe = open(OpenMode::write); + soma_dataframe->set_column_data( + dim_infos[0].name, d0.size(), d0.data()); + soma_dataframe->set_column_data( + attr_infos[0].name, a0.size(), a0.data()); soma_dataframe->write(); soma_dataframe->close(); - soma_dataframe = SOMADataFrame::open(uri, OpenMode::read, ctx); + soma_dataframe = open(OpenMode::read); while (auto batch = soma_dataframe->read_next()) { auto arrbuf = batch.value(); - auto d0span = arrbuf->at(dim_name)->data(); - auto a0span = arrbuf->at(attr_name)->data(); + auto d0span = arrbuf->at(dim_infos[0].name)->data(); + auto a0span = arrbuf->at(attr_infos[0].name)->data(); REQUIRE(d0 == std::vector(d0span.begin(), d0span.end())); REQUIRE(a0 == std::vector(a0span.begin(), a0span.end())); } soma_dataframe->close(); - auto soma_object = SOMAObject::open(uri, OpenMode::read, ctx); - REQUIRE(soma_object->uri() == uri); + auto soma_object = SOMAObject::open(uri_, OpenMode::read, ctx_); + REQUIRE(soma_object->uri() == uri_); REQUIRE(soma_object->type() == "SOMADataFrame"); soma_object->close(); } } -TEST_CASE("SOMADataFrame: platform_config") { +TEST_CASE_METHOD( + VariouslyIndexedDataFrameFixture, "SOMADataFrame: platform_config") { std::pair filter = GENERATE( std::make_pair( R"({"name": "GZIP", "COMPRESSION_LEVEL": 3})", TILEDB_FILTER_GZIP), @@ -158,26 +292,20 @@ TEST_CASE("SOMADataFrame: platform_config") { std::make_pair(R"("BYTESHUFFLE")", TILEDB_FILTER_BYTESHUFFLE), std::make_pair(R"("NOOP")", TILEDB_FILTER_NONE)); - // TODO this use to be formatted with fmt::format which is part of internal + // TODO this used to be formatted with fmt::format which is part of internal // header spd/log/fmt/fmt.h and should not be used. In C++20, this can be // replaced with std::format. std::ostringstream section; section << "- filter=" << filter.first; SECTION(section.str()) { - int64_t dim_max = 1000; auto use_current_domain = GENERATE(false, true); - // TODO this could be formatted with fmt::format which is part of - // internal header spd/log/fmt/fmt.h and should not be used. In C++20, - // this can be replaced with std::format. std::ostringstream section2; section2 << "- use_current_domain=" << use_current_domain; SECTION(section2.str()) { - auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-dataframe-platform-config"; - std::string dim_name = "d0"; - std::string attr_name = "a0"; - tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + set_up( + std::make_shared(), + "mem://unit-test-dataframe-platform-config"); PlatformConfig platform_config; platform_config.dataframe_dim_zstd_level = 6; @@ -189,28 +317,14 @@ TEST_CASE("SOMADataFrame: platform_config") { } std::vector dim_infos( - {{.name = dim_name, - .tiledb_datatype = tiledb_datatype, - .dim_max = dim_max, - .use_current_domain = use_current_domain}}); - - std::vector attr_infos( - {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); - - auto [schema, index_columns] = - helper::create_arrow_schema_and_index_columns( - dim_infos, attr_infos); - - SOMADataFrame::create( - uri, - std::move(schema), - ArrowTable( - std::move(index_columns.first), - std::move(index_columns.second)), - ctx, - platform_config); - - auto soma_dataframe = SOMADataFrame::open(uri, OpenMode::read, ctx); + {i64_dim_info(use_current_domain)}); + std::vector attr_infos({i64_attr_info("a0")}); + + REQUIRE(!SOMADataFrame::exists(uri_, ctx_)); + + create(dim_infos, attr_infos, platform_config); + + auto soma_dataframe = open(OpenMode::read); auto sch = soma_dataframe->tiledb_schema(); REQUIRE( sch->offsets_filter_list().filter(0).filter_type() == @@ -221,7 +335,7 @@ TEST_CASE("SOMADataFrame: platform_config") { filter.second); auto dim_filter = sch->domain() - .dimension(dim_name) + .dimension(dim_infos[0].name) .filter_list() .filter(0); REQUIRE(dim_filter.filter_type() == TILEDB_FILTER_ZSTD); @@ -230,7 +344,7 @@ TEST_CASE("SOMADataFrame: platform_config") { if (filter.second != TILEDB_FILTER_WEBP) { REQUIRE( - sch->attribute(attr_name) + sch->attribute(attr_infos[0].name) .filter_list() .filter(0) .filter_type() == filter.second); @@ -240,8 +354,7 @@ TEST_CASE("SOMADataFrame: platform_config") { } } -TEST_CASE("SOMADataFrame: metadata") { - int64_t dim_max = 1000; +TEST_CASE_METHOD(VariouslyIndexedDataFrameFixture, "SOMADataFrame: metadata") { auto use_current_domain = GENERATE(false, true); // TODO this could be formatted with fmt::format which is part of internal // header spd/log/fmt/fmt.h and should not be used. In C++20, this can be @@ -249,43 +362,18 @@ TEST_CASE("SOMADataFrame: metadata") { std::ostringstream section; section << "- use_current_domain=" << use_current_domain; SECTION(section.str()) { - auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-collection"; - std::string dim_name = "d0"; - std::string attr_name = "a0"; - tiledb_datatype_t tiledb_datatype = TILEDB_INT64; - std::string arrow_format = helper::to_arrow_format(tiledb_datatype); + set_up(std::make_shared(), "mem://unit-test-collection"); std::vector dim_infos( - {{.name = dim_name, - .tiledb_datatype = tiledb_datatype, - .dim_max = dim_max, - .use_current_domain = use_current_domain}}); + {i64_dim_info(use_current_domain)}); + std::vector attr_infos({u32_attr_info()}); - std::vector attr_infos( - {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); + REQUIRE(!SOMADataFrame::exists(uri_, ctx_)); - auto [schema, index_columns] = - helper::create_arrow_schema_and_index_columns( - dim_infos, attr_infos); + create(dim_infos, attr_infos, PlatformConfig(), TimestampRange(0, 2)); - SOMADataFrame::create( - uri, - std::move(schema), - ArrowTable( - std::move(index_columns.first), - std::move(index_columns.second)), - ctx, - PlatformConfig(), - TimestampRange(0, 2)); - - auto soma_dataframe = SOMADataFrame::open( - uri, - OpenMode::write, - ctx, - {}, - ResultOrder::automatic, - TimestampRange(1, 1)); + auto soma_dataframe = open( + OpenMode::write, ResultOrder::automatic, TimestampRange(1, 1)); int32_t val = 100; soma_dataframe->set_metadata("md", TILEDB_INT32, 1, &val); @@ -336,56 +424,234 @@ TEST_CASE("SOMADataFrame: metadata") { } } -TEST_CASE("SOMADataFrame: bounds-checking") { +TEST_CASE_METHOD( + VariouslyIndexedDataFrameFixture, "SOMADataFrame: bounds-checking") { bool use_current_domain = true; int old_max = 100; int new_max = 200; - auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-bounds-checking"; - std::string dim_name = "d0"; - std::string attr_name = "a0"; - tiledb_datatype_t tiledb_datatype = TILEDB_INT64; - std::string arrow_format = helper::to_arrow_format(tiledb_datatype); - - std::vector dim_infos( - {{.name = dim_name, - .tiledb_datatype = tiledb_datatype, - .dim_max = old_max, - .use_current_domain = use_current_domain}}); + set_up(std::make_shared(), "mem://unit-test-bounds-checking"); - std::vector attr_infos( - {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); + std::vector dim_infos({i64_dim_info(use_current_domain)}); + std::vector attr_infos({u32_attr_info()}); - auto [schema, index_columns] = - helper::create_arrow_schema_and_index_columns(dim_infos, attr_infos); + REQUIRE(!SOMADataFrame::exists(uri_, ctx_)); - SOMADataFrame::create( - uri, - std::move(schema), - ArrowTable( - std::move(index_columns.first), std::move(index_columns.second)), - ctx); + create(dim_infos, attr_infos); - auto soma_dataframe = SOMADataFrame::open(uri, OpenMode::write, ctx); + auto soma_dataframe = open(OpenMode::write); std::vector d0({old_max + 1, old_max + 2}); std::vector a0({1.5, 2.5}); - soma_dataframe->set_column_data(dim_name, d0.size(), d0.data()); - soma_dataframe->set_column_data(attr_name, a0.size(), a0.data()); + soma_dataframe->set_column_data(dim_infos[0].name, d0.size(), d0.data()); + soma_dataframe->set_column_data(attr_infos[0].name, a0.size(), a0.data()); // Writing outside the current domain should fail REQUIRE_THROWS(soma_dataframe->write()); soma_dataframe->close(); - soma_dataframe = SOMADataFrame::open(uri, OpenMode::write, ctx); + soma_dataframe = open(OpenMode::write); soma_dataframe->resize(std::vector({new_max})); soma_dataframe->close(); - soma_dataframe = SOMADataFrame::open(uri, OpenMode::write, ctx); - soma_dataframe->set_column_data(dim_name, d0.size(), d0.data()); - soma_dataframe->set_column_data(attr_name, a0.size(), a0.data()); + soma_dataframe = open(OpenMode::write); + soma_dataframe->set_column_data(dim_infos[0].name, d0.size(), d0.data()); + soma_dataframe->set_column_data(attr_infos[0].name, a0.size(), a0.data()); // Writing after resize should succeed soma_dataframe->write(); soma_dataframe->close(); } + +TEST_CASE_METHOD( + VariouslyIndexedDataFrameFixture, + "SOMADataFrame: variant-indexed dataframe dim:sjid attr:str,u32") { + auto use_current_domain = GENERATE(false, true); + std::ostringstream section; + section << "- use_current_domain=" << use_current_domain; + SECTION(section.str()) { + set_up( + std::make_shared(), + "mem://unit-test-variant-indexed-dataframe-1"); + + std::vector dim_infos( + {i64_dim_info(use_current_domain)}); + std::vector attr_infos( + {str_attr_info(), u32_attr_info()}); + + // Create + create(dim_infos, attr_infos); + + // Check current domain + auto soma_dataframe = open(OpenMode::read); + + CurrentDomain current_domain = soma_dataframe->get_current_domain(); + if (!use_current_domain) { + REQUIRE(current_domain.is_empty()); + } else { + REQUIRE(!current_domain.is_empty()); + REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE); + NDRectangle ndrect = current_domain.ndrectangle(); + + std::array i64_range = ndrect.range( + dim_infos[0].name); + REQUIRE(i64_range[0] == (int64_t)0); + REQUIRE(i64_range[1] == (int64_t)dim_infos[0].dim_max); + } + + soma_dataframe->close(); + + write_generic_data(); + } +} + +TEST_CASE_METHOD( + VariouslyIndexedDataFrameFixture, + "SOMADataFrame: variant-indexed dataframe dim:sjid,u32 attr:str") { + auto use_current_domain = GENERATE(false, true); + std::ostringstream section; + section << "- use_current_domain=" << use_current_domain; + SECTION(section.str()) { + set_up( + std::make_shared(), + "mem://unit-test-variant-indexed-dataframe-2"); + + std::vector dim_infos( + {i64_dim_info(use_current_domain), + u32_dim_info(use_current_domain)}); + std::vector attr_infos({str_attr_info()}); + + // Create + create(dim_infos, attr_infos); + + // Check current domain + auto soma_dataframe = open(OpenMode::read); + + CurrentDomain current_domain = soma_dataframe->get_current_domain(); + if (!use_current_domain) { + REQUIRE(current_domain.is_empty()); + } else { + REQUIRE(!current_domain.is_empty()); + REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE); + NDRectangle ndrect = current_domain.ndrectangle(); + + std::array i64_range = ndrect.range( + dim_infos[0].name); + REQUIRE(i64_range[0] == (int64_t)0); + REQUIRE(i64_range[1] == (int64_t)dim_infos[0].dim_max); + + std::array u32_range = ndrect.range( + dim_infos[0].name); + REQUIRE(u32_range[0] == (uint32_t)0); + REQUIRE(u32_range[1] == (uint32_t)dim_infos[0].dim_max); + } + + soma_dataframe->close(); + + // Write + write_generic_data(); + } +} + +TEST_CASE_METHOD( + VariouslyIndexedDataFrameFixture, + "SOMADataFrame: variant-indexed dataframe dim:sjid,str attr:u32") { + auto use_current_domain = GENERATE(false, true); + std::ostringstream section; + section << "- use_current_domain=" << use_current_domain; + SECTION(section.str()) { + set_up( + std::make_shared(), + "mem://unit-test-variant-indexed-dataframe-3"); + + std::vector dim_infos( + {i64_dim_info(use_current_domain), + str_dim_info(use_current_domain)}); + std::vector attr_infos({u32_attr_info()}); + + // Create + create(dim_infos, attr_infos); + + // Check current domain + auto soma_dataframe = open(OpenMode::read); + + CurrentDomain current_domain = soma_dataframe->get_current_domain(); + if (!use_current_domain) { + REQUIRE(current_domain.is_empty()); + } else { + REQUIRE(!current_domain.is_empty()); + REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE); + NDRectangle ndrect = current_domain.ndrectangle(); + + std::array i64_range = ndrect.range( + dim_infos[0].name); + REQUIRE(i64_range[0] == (int64_t)0); + REQUIRE(i64_range[1] == (int64_t)dim_infos[0].dim_max); + + std::array str_range = ndrect.range( + dim_infos[1].name); + // Can we write empty strings in this range? + REQUIRE(str_range[0] <= ""); + REQUIRE(str_range[1] >= ""); + // Can we write ASCII values in this range? + REQUIRE(str_range[0] < " "); + REQUIRE(str_range[1] > "~"); + } + + soma_dataframe->close(); + + // Write + write_generic_data(); + } +} + +TEST_CASE_METHOD( + VariouslyIndexedDataFrameFixture, + "SOMADataFrame: variant-indexed dataframe dim:str,u32 attr:sjid") { + auto use_current_domain = GENERATE(false, true); + std::ostringstream section; + section << "- use_current_domain=" << use_current_domain; + SECTION(section.str()) { + set_up( + std::make_shared(), + "mem://unit-test-variant-indexed-dataframe-4"); + + std::vector dim_infos( + {str_dim_info(use_current_domain), + u32_dim_info(use_current_domain)}); + std::vector attr_infos({i64_attr_info()}); + + // Create + create(dim_infos, attr_infos); + + // Check current domain + auto soma_dataframe = open(OpenMode::read); + + CurrentDomain current_domain = soma_dataframe->get_current_domain(); + if (!use_current_domain) { + REQUIRE(current_domain.is_empty()); + } else { + REQUIRE(!current_domain.is_empty()); + REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE); + NDRectangle ndrect = current_domain.ndrectangle(); + + std::array str_range = ndrect.range( + dim_infos[0].name); + // Can we write empty strings in this range? + REQUIRE(str_range[0] <= ""); + REQUIRE(str_range[1] >= ""); + // Can we write ASCII values in this range? + REQUIRE(str_range[0] < " "); + REQUIRE(str_range[1] > "~"); + + std::array u32_range = ndrect.range( + dim_infos[1].name); + REQUIRE(u32_range[0] == (uint32_t)0); + REQUIRE(u32_range[1] == (uint32_t)dim_infos[1].dim_max); + } + + soma_dataframe->close(); + + // Write + write_generic_data(); + } +}