diff --git a/libtiledbsoma/src/soma/soma_array.h b/libtiledbsoma/src/soma/soma_array.h index 8e87f3b04a..790c8594bf 100644 --- a/libtiledbsoma/src/soma/soma_array.h +++ b/libtiledbsoma/src/soma/soma_array.h @@ -796,6 +796,13 @@ class SOMAArray : public SOMAObject { */ std::optional timestamp(); + /** + * Exposed for testing purposes. + */ + CurrentDomain get_current_domain() { + return _get_current_domain(); + } + private: //=================================================================== //= private non-static diff --git a/libtiledbsoma/src/utils/arrow_adapter.cc b/libtiledbsoma/src/utils/arrow_adapter.cc index 5a450e3e26..f456b94647 100644 --- a/libtiledbsoma/src/utils/arrow_adapter.cc +++ b/libtiledbsoma/src/utils/arrow_adapter.cc @@ -868,9 +868,8 @@ ArraySchema ArrowAdapter::tiledb_schema_from_arrow_schema( // nullptr) // // Fortunately, these are ASCII dims and we can range - // these accordingly. These are minimum and maximum - // values, avoiding the extremes 0x00 and 0xff. - ndrect.set_range(col_name, "\x01", "\xfe"); + // these accordingly. + ndrect.set_range(col_name, "", "\xff"); } else { const void* buff = index_column_array->children[i] ->buffers[1]; diff --git a/libtiledbsoma/test/common.cc b/libtiledbsoma/test/common.cc index f8a3b5cdaa..3fdecfc76f 100644 --- a/libtiledbsoma/test/common.cc +++ b/libtiledbsoma/test/common.cc @@ -237,7 +237,7 @@ static std::unique_ptr _create_index_cols_info_array( std::memcpy((void*)dim_array->buffers[1], vsrc, nbytes); } else { // domain small; current_domain feature not being used - int64_t dom[] = {0, info.dim_max, 1}; + uint32_t dom[] = {0, (uint32_t)info.dim_max, 1}; void* vsrc = (void*)&dom[0]; std::memcpy((void*)dim_array->buffers[1], vsrc, nbytes); } diff --git a/libtiledbsoma/test/unit_soma_dataframe.cc b/libtiledbsoma/test/unit_soma_dataframe.cc index f56a68cb94..540020cb91 100644 --- a/libtiledbsoma/test/unit_soma_dataframe.cc +++ b/libtiledbsoma/test/unit_soma_dataframe.cc @@ -461,3 +461,197 @@ TEST_CASE_METHOD( soma_dataframe->close(); } + +TEST_CASE_METHOD( + VariouslyIndexedDataFrameFixture, + "SOMADataFrame: variant-indexed dataframe dim:sjid attr:str,u32") { + auto use_current_domain = GENERATE(false, true); + std::ostringstream section; + section << "- use_current_domain=" << use_current_domain; + SECTION(section.str()) { + set_up( + std::make_shared(), + "mem://unit-test-variant-indexed-dataframe-1"); + + std::vector dim_infos( + {i64_dim_info(use_current_domain)}); + std::vector attr_infos( + {str_attr_info(), u32_attr_info()}); + + // Create + create(dim_infos, attr_infos); + + // Check current domain + auto soma_dataframe = open(OpenMode::read); + + CurrentDomain current_domain = soma_dataframe->get_current_domain(); + if (!use_current_domain) { + REQUIRE(current_domain.is_empty()); + } else { + REQUIRE(!current_domain.is_empty()); + REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE); + NDRectangle ndrect = current_domain.ndrectangle(); + + std::array i64_range = ndrect.range( + dim_infos[0].name); + REQUIRE(i64_range[0] == (int64_t)0); + REQUIRE(i64_range[1] == (int64_t)dim_infos[0].dim_max); + } + + soma_dataframe->close(); + + write_generic_data(); + } +} + +TEST_CASE_METHOD( + VariouslyIndexedDataFrameFixture, + "SOMADataFrame: variant-indexed dataframe dim:sjid,u32 attr:str") { + auto use_current_domain = GENERATE(false, true); + std::ostringstream section; + section << "- use_current_domain=" << use_current_domain; + SECTION(section.str()) { + set_up( + std::make_shared(), + "mem://unit-test-variant-indexed-dataframe-2"); + + std::vector dim_infos( + {i64_dim_info(use_current_domain), + u32_dim_info(use_current_domain)}); + std::vector attr_infos({str_attr_info()}); + + // Create + create(dim_infos, attr_infos); + + // Check current domain + auto soma_dataframe = open(OpenMode::read); + + CurrentDomain current_domain = soma_dataframe->get_current_domain(); + if (!use_current_domain) { + REQUIRE(current_domain.is_empty()); + } else { + REQUIRE(!current_domain.is_empty()); + REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE); + NDRectangle ndrect = current_domain.ndrectangle(); + + std::array i64_range = ndrect.range( + dim_infos[0].name); + REQUIRE(i64_range[0] == (int64_t)0); + REQUIRE(i64_range[1] == (int64_t)dim_infos[0].dim_max); + + std::array u32_range = ndrect.range( + dim_infos[0].name); + REQUIRE(u32_range[0] == (uint32_t)0); + REQUIRE(u32_range[1] == (uint32_t)dim_infos[0].dim_max); + } + + soma_dataframe->close(); + + // Write + write_generic_data(); + } +} + +TEST_CASE_METHOD( + VariouslyIndexedDataFrameFixture, + "SOMADataFrame: variant-indexed dataframe dim:sjid,str attr:u32") { + auto use_current_domain = GENERATE(false, true); + std::ostringstream section; + section << "- use_current_domain=" << use_current_domain; + SECTION(section.str()) { + set_up( + std::make_shared(), + "mem://unit-test-variant-indexed-dataframe-3"); + + std::vector dim_infos( + {i64_dim_info(use_current_domain), + str_dim_info(use_current_domain)}); + std::vector attr_infos({u32_attr_info()}); + + // Create + create(dim_infos, attr_infos); + + // Check current domain + auto soma_dataframe = open(OpenMode::read); + + CurrentDomain current_domain = soma_dataframe->get_current_domain(); + if (!use_current_domain) { + REQUIRE(current_domain.is_empty()); + } else { + REQUIRE(!current_domain.is_empty()); + REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE); + NDRectangle ndrect = current_domain.ndrectangle(); + + std::array i64_range = ndrect.range( + dim_infos[0].name); + REQUIRE(i64_range[0] == (int64_t)0); + REQUIRE(i64_range[1] == (int64_t)dim_infos[0].dim_max); + + std::array str_range = ndrect.range( + dim_infos[1].name); + // Can we write empty strings in this range? + REQUIRE(str_range[0] <= ""); + REQUIRE(str_range[1] >= ""); + // Can we write ASCII values in this range? + REQUIRE(str_range[0] < " "); + REQUIRE(str_range[1] > "~"); + } + + soma_dataframe->close(); + + // Write + write_generic_data(); + } +} + +TEST_CASE_METHOD( + VariouslyIndexedDataFrameFixture, + "SOMADataFrame: variant-indexed dataframe dim:str,u32 attr:sjid") { + auto use_current_domain = GENERATE(false, true); + std::ostringstream section; + section << "- use_current_domain=" << use_current_domain; + SECTION(section.str()) { + set_up( + std::make_shared(), + "mem://unit-test-variant-indexed-dataframe-4"); + + std::vector dim_infos( + {str_dim_info(use_current_domain), + u32_dim_info(use_current_domain)}); + std::vector attr_infos({i64_attr_info()}); + + // Create + create(dim_infos, attr_infos); + + // Check current domain + auto soma_dataframe = open(OpenMode::read); + + CurrentDomain current_domain = soma_dataframe->get_current_domain(); + if (!use_current_domain) { + REQUIRE(current_domain.is_empty()); + } else { + REQUIRE(!current_domain.is_empty()); + REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE); + NDRectangle ndrect = current_domain.ndrectangle(); + + std::array str_range = ndrect.range( + dim_infos[0].name); + // Can we write empty strings in this range? + REQUIRE(str_range[0] <= ""); + REQUIRE(str_range[1] >= ""); + // Can we write ASCII values in this range? + REQUIRE(str_range[0] < " "); + REQUIRE(str_range[1] > "~"); + + std::array u32_range = ndrect.range( + dim_infos[1].name); + REQUIRE(u32_range[0] == (uint32_t)0); + REQUIRE(u32_range[1] == (uint32_t)dim_infos[1].dim_max); + } + + soma_dataframe->close(); + + // Write + write_generic_data(); + } +}