From d48808779464734f1d15afbc1ba66dc603381cd5 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 19 Sep 2024 16:36:32 -0400 Subject: [PATCH 1/7] [c++] implementation and unit testing for domainish accessors --- libtiledbsoma/src/soma/soma_array.cc | 102 +++++++- libtiledbsoma/src/soma/soma_array.h | 229 +++++++++++++---- libtiledbsoma/test/unit_soma_dataframe.cc | 295 +++++++++++++++++++++- 3 files changed, 576 insertions(+), 50 deletions(-) diff --git a/libtiledbsoma/src/soma/soma_array.cc b/libtiledbsoma/src/soma/soma_array.cc index 00ba2224e6..464b7ee956 100644 --- a/libtiledbsoma/src/soma/soma_array.cc +++ b/libtiledbsoma/src/soma/soma_array.cc @@ -1,5 +1,4 @@ -/** - * @file soma_array.cc +/** @file soma_array.cc * * @section LICENSE * @@ -1141,6 +1140,105 @@ std::optional SOMAArray::timestamp() { return timestamp_; } +// Note that ArrowTable is simply our libtiledbsoma pairing of ArrowArray and +// ArrowSchema from nanoarrow. +// +// The domainish enum simply lets us re-use code which is common across +// core domain, core current domain, and core non-empty domain. +ArrowTable SOMAArray::_get_core_domainish(enum Domainish which_kind) { + int array_ndim = this->ndim(); + auto dimensions = tiledb_schema()->domain().dimensions(); + + // Create the schema for the info we return + std::vector names(array_ndim); + std::vector tiledb_datatypes(array_ndim); + + for (int i = 0; i < (int)array_ndim; i++) { + const Dimension& core_dim = dimensions[i]; + names[i] = core_dim.name(); + tiledb_datatypes[i] = core_dim.type(); + } + + auto arrow_schema = ArrowAdapter::make_arrow_schema( + names, tiledb_datatypes); + + // Create the data for the info we return + auto arrow_array = ArrowAdapter::make_arrow_array_parent(array_ndim); + + for (int i = 0; i < array_ndim; i++) { + auto core_dim = dimensions[i]; + auto core_type_code = core_dim.type(); + + ArrowArray* child = nullptr; + + switch (core_type_code) { + case TILEDB_INT64: + child = ArrowAdapter::make_arrow_array_child( + _core_domainish_slot(core_dim.name(), which_kind)); + break; + case TILEDB_UINT64: + child = ArrowAdapter::make_arrow_array_child( + _core_domainish_slot( + core_dim.name(), which_kind)); + break; + case TILEDB_INT32: + child = ArrowAdapter::make_arrow_array_child( + _core_domainish_slot(core_dim.name(), which_kind)); + break; + case TILEDB_UINT32: + child = ArrowAdapter::make_arrow_array_child( + _core_domainish_slot( + core_dim.name(), which_kind)); + break; + case TILEDB_INT16: + child = ArrowAdapter::make_arrow_array_child( + _core_domainish_slot(core_dim.name(), which_kind)); + break; + case TILEDB_UINT16: + child = ArrowAdapter::make_arrow_array_child( + _core_domainish_slot( + core_dim.name(), which_kind)); + break; + case TILEDB_INT8: + child = ArrowAdapter::make_arrow_array_child( + _core_domainish_slot(core_dim.name(), which_kind)); + break; + case TILEDB_UINT8: + child = ArrowAdapter::make_arrow_array_child( + _core_domainish_slot(core_dim.name(), which_kind)); + break; + + case TILEDB_FLOAT64: + child = ArrowAdapter::make_arrow_array_child( + _core_domainish_slot(core_dim.name(), which_kind)); + break; + case TILEDB_FLOAT32: + child = ArrowAdapter::make_arrow_array_child( + _core_domainish_slot(core_dim.name(), which_kind)); + break; + + case TILEDB_STRING_ASCII: + case TILEDB_CHAR: + child = ArrowAdapter::make_arrow_array_child_string( + _core_domainish_slot_string(core_dim.name(), which_kind)); + break; + + default: + break; + } + + if (child == nullptr) { + // throw TileDBSOMAError(fmt::format( + // "WIP {} {}", + // core_dim.name(), + // tiledb::impl::type_to_str(core_type_code))); + } + arrow_array->children[i] = child; + } + + return ArrowTable(std::move(arrow_array), std::move(arrow_schema)); +} + uint64_t SOMAArray::nnz() { // Verify array is sparse if (mq_->schema()->array_type() != TILEDB_SPARSE) { diff --git a/libtiledbsoma/src/soma/soma_array.h b/libtiledbsoma/src/soma/soma_array.h index 8d5038aae2..b841d63ec6 100644 --- a/libtiledbsoma/src/soma/soma_array.h +++ b/libtiledbsoma/src/soma/soma_array.h @@ -48,6 +48,14 @@ namespace tiledbsoma { using namespace tiledb; +// This enables some code deduplication between core domain, core current +// domain, and core non-empty domain. +enum class Domainish { + kind_core_domain = 0, + kind_core_current_domain = 1, + kind_non_empty_domain = 2 +}; + class SOMAArray : public SOMAObject { public: //=================================================================== @@ -703,7 +711,7 @@ class SOMAArray : public SOMAObject { * non-empty domains of the array fragments. */ template - std::pair non_empty_domain_slot(const std::string& name) { + std::pair non_empty_domain_slot(const std::string& name) const { try { return arr_->non_empty_domain(name); } catch (const std::exception& e) { @@ -717,7 +725,7 @@ class SOMAArray : public SOMAObject { * Applicable only to var-sized dimensions. */ std::pair non_empty_domain_slot_var( - const std::string& name) { + const std::string& name) const { try { return arr_->non_empty_domain_var(name); } catch (const std::exception& e) { @@ -745,6 +753,70 @@ class SOMAArray : public SOMAObject { return !_get_current_domain().is_empty(); } + /** + * Returns the core current domain at the given dimension. + * + * o For arrays with core current-domain support: + * - soma domain is core current domain + * - soma maxdomain is core domain + * o For arrays without core current-domain support: + * - soma domain is core domain + * - soma maxdomain is core domain + * - core current domain is not accessed at the soma level + * + * @tparam T Domain datatype + * @return Pair of [lower, upper] inclusive bounds. + */ + template + std::pair _core_current_domain_slot(const std::string& name) const { + CurrentDomain current_domain = _get_current_domain(); + if (current_domain.is_empty()) { + throw TileDBSOMAError( + "_core_current_domain_slot: internal coding error"); + } + if (current_domain.type() != TILEDB_NDRECTANGLE) { + throw TileDBSOMAError( + "_core_current_domain_slot: found non-rectangle type"); + } + NDRectangle ndrect = current_domain.ndrectangle(); + + // Convert from two-element array (core API) to pair (tiledbsoma API) + std::array arr = ndrect.range(name); + return std::pair(arr[0], arr[1]); + } + + /** + * Returns the core domain at the given dimension. + * + * o For arrays with core current-domain support: + * - soma domain is core current domain + * - soma maxdomain is core domain + * o For arrays without core current-domain support: + * - soma domain is core domain + * - soma maxdomain is core domain + * - core current domain is not accessed at the soma level + * + * @tparam T Domain datatype + * @return Pair of [lower, upper] inclusive bounds. + */ + template + std::pair _core_domain_slot(const std::string& name) const { + if (std::is_same_v) { + throw std::runtime_error( + "SOMAArray::_core_domain_slot: template-specialization " + "failure."); + } + return arr_->schema().domain().dimension(name).domain(); + } + + std::pair _core_domain_slot_string( + const std::string&) const { + // Core domain for string dims is always a nullptr pair at the C++ + // level. We follow the convention started by TileDB-Py which is to + // report these as an empty-string pair. + return std::pair("", ""); + } + /** * Returns the SOMA domain at the given dimension. * @@ -775,6 +847,108 @@ class SOMAArray : public SOMAObject { return _core_domain_slot(name); } + /** + * Returns the SOMA domain in its entirety, as an Arrow table for return to + * Python/R. + * + * o For arrays with core current-domain support: + * - soma domain is core current domain + * - soma maxdomain is core domain + * o For arrays without core current-domain support: + * - soma domain is core domain + * - soma maxdomain is core domain + * - core current domain is not accessed at the soma level + * + * @tparam T Domain datatype + * @return Pair of [lower, upper] inclusive bounds. + */ + ArrowTable get_soma_domain() { + if (has_current_domain()) { + return _get_core_current_domain(); + } else { + return _get_core_domain(); + } + } + + /** + * Returns the SOMA maxdomain in its entirety, as an Arrow table for return + * to Python/R. + * + * o For arrays with core current-domain support: + * - soma domain is core current domain + * - soma maxdomain is core domain + * o For arrays without core current-domain support: + * - soma domain is core domain + * - soma maxdomain is core domain + * - core current domain is not accessed at the soma level + * + * @tparam T Domain datatype + * @return Pair of [lower, upper] inclusive bounds. + */ + ArrowTable get_soma_maxdomain() { + return _get_core_domain(); + } + + /** + * Returns the core non-empty domain in its entirety, as an Arrow + * table for return to Python/R. + */ + ArrowTable get_non_empty_domain() { + return _get_core_domainish(Domainish::kind_non_empty_domain); + } + + /** + * Code-dedupe helper for core domain, core current domain, and core + * non-empty domain. + */ + ArrowTable _get_core_domainish(enum Domainish which_kind); + + /** + * This enables some code deduplication between core domain, core current + * domain, and core non-empty domain. + */ + template + std::pair _core_domainish_slot( + const std::string& name, enum Domainish which_kind) const { + if (std::is_same_v) { + throw std::runtime_error( + "SOMAArray::_core_domainish_slot: template-specialization " + "failure."); + } + switch (which_kind) { + case Domainish::kind_core_domain: + return _core_domain_slot(name); + break; + case Domainish::kind_core_current_domain: + return _core_current_domain_slot(name); + break; + case Domainish::kind_non_empty_domain: + return non_empty_domain_slot(name); + break; + default: + throw std::runtime_error( + "internal coding error in SOMAArray::_core_domainish_slot"); + } + } + + std::pair _core_domainish_slot_string( + const std::string& name, enum Domainish which_kind) const { + switch (which_kind) { + case Domainish::kind_core_domain: + return _core_domain_slot_string(name); + break; + case Domainish::kind_core_current_domain: + return _core_current_domain_slot(name); + break; + case Domainish::kind_non_empty_domain: + return non_empty_domain_slot_var(name); + break; + default: + throw std::runtime_error( + "internal coding error in SOMAArray::_core_domainish_slot"); + } + } + /** * @brief Get the total number of unique cells in the array. * @@ -896,54 +1070,19 @@ class SOMAArray : public SOMAObject { } /** - * Returns the core current domain at the given dimension. - * - * o For arrays with core current-domain support: - * - soma domain is core current domain - * - soma maxdomain is core domain - * o For arrays without core current-domain support: - * - soma domain is core domain - * - soma maxdomain is core domain - * - core current domain is not accessed at the soma level - * - * @tparam T Domain datatype - * @return Pair of [lower, upper] inclusive bounds. + * Returns the core current domain in its entirety, as an Arrow + * table for return to Python/R. */ - template - std::pair _core_current_domain_slot(const std::string& name) const { - CurrentDomain current_domain = _get_current_domain(); - if (current_domain.is_empty()) { - throw TileDBSOMAError( - "_core_current_domain_slot: internal coding error"); - } - if (current_domain.type() != TILEDB_NDRECTANGLE) { - throw TileDBSOMAError( - "_core_current_domain_slot: found non-rectangle type"); - } - NDRectangle ndrect = current_domain.ndrectangle(); - - // Convert from two-element array (core API) to pair (tiledbsoma API) - std::array arr = ndrect.range(name); - return std::pair(arr[0], arr[1]); + ArrowTable _get_core_current_domain() { + return _get_core_domainish(Domainish::kind_core_current_domain); } /** - * Returns the core current domain at the given dimension. - * - * o For arrays with core current-domain support: - * - soma domain is core current domain - * - soma maxdomain is core domain - * o For arrays without core current-domain support: - * - soma domain is core domain - * - soma maxdomain is core domain - * - core current domain is not accessed at the soma level - * - * @tparam T Domain datatype - * @return Pair of [lower, upper] inclusive bounds. + * Returns the core domain in its entirety, as an Arrow + * table for return to Python/R. */ - template - std::pair _core_domain_slot(const std::string& name) const { - return arr_->schema().domain().dimension(name).domain(); + ArrowTable _get_core_domain() { + return _get_core_domainish(Domainish::kind_core_domain); } /** diff --git a/libtiledbsoma/test/unit_soma_dataframe.cc b/libtiledbsoma/test/unit_soma_dataframe.cc index 774a1287ef..a1926390ab 100644 --- a/libtiledbsoma/test/unit_soma_dataframe.cc +++ b/libtiledbsoma/test/unit_soma_dataframe.cc @@ -522,19 +522,60 @@ TEST_CASE_METHOD( REQUIRE(i64_range[1] == (int64_t)dim_infos[0].dim_max); } - // Check shape before write + // Check shape before resize int64_t expect = dim_infos[0].dim_max + 1; std::optional actual = soma_dataframe ->maybe_soma_joinid_shape(); REQUIRE(actual.has_value()); REQUIRE(actual.value() == expect); - soma_dataframe->close(); - REQUIRE(soma_dataframe->nnz() == 0); + soma_dataframe->close(); + + // Write data write_sjid_u32_str_data_from(0); + // Check shape after write + soma_dataframe->open(OpenMode::read); + + REQUIRE(soma_dataframe->nnz() == 2); + + expect = dim_infos[0].dim_max + 1; + actual = soma_dataframe->maybe_soma_joinid_shape(); + REQUIRE(actual.has_value()); + REQUIRE(actual.value() == expect); + + // Check domainish accessors before resize + ArrowTable non_empty_domain = soma_dataframe->get_non_empty_domain(); + std::vector + ned_sjid = ArrowAdapter::get_table_column_by_name( + non_empty_domain, "soma_joinid"); + + ArrowTable soma_domain = soma_dataframe->get_soma_domain(); + std::vector + dom_sjid = ArrowAdapter::get_table_column_by_name( + soma_domain, "soma_joinid"); + + ArrowTable soma_maxdomain = soma_dataframe->get_soma_maxdomain(); + std::vector + maxdom_sjid = ArrowAdapter::get_table_column_by_name( + soma_maxdomain, "soma_joinid"); + + REQUIRE(ned_sjid == std::vector({1, 2})); + + REQUIRE(dom_sjid == std::vector({0, 100})); + + REQUIRE(maxdom_sjid.size() == 2); + REQUIRE(maxdom_sjid[0] == 0); + if (!use_current_domain) { + REQUIRE(maxdom_sjid[1] == 100); + } else { + REQUIRE(maxdom_sjid[1] > 2000000000); + } + + soma_dataframe->close(); + REQUIRE(soma_dataframe->nnz() == 2); write_sjid_u32_str_data_from(8); REQUIRE(soma_dataframe->nnz() == 4); @@ -588,6 +629,35 @@ TEST_CASE_METHOD( // Implicitly we expect no throw write_sjid_u32_str_data_from(SOMA_JOINID_DIM_MAX); } + + // Check domainish accessors after resize + soma_dataframe->open(OpenMode::read); + + non_empty_domain = soma_dataframe->get_non_empty_domain(); + ned_sjid = ArrowAdapter::get_table_column_by_name( + non_empty_domain, "soma_joinid"); + + soma_domain = soma_dataframe->get_soma_domain(); + dom_sjid = ArrowAdapter::get_table_column_by_name( + soma_domain, "soma_joinid"); + + soma_maxdomain = soma_dataframe->get_soma_maxdomain(); + maxdom_sjid = ArrowAdapter::get_table_column_by_name( + soma_maxdomain, "soma_joinid"); + + if (!use_current_domain) { + REQUIRE(ned_sjid == std::vector({1, 10})); + REQUIRE(dom_sjid == std::vector({0, 100})); + REQUIRE(maxdom_sjid == std::vector({0, 100})); + } else { + REQUIRE(ned_sjid == std::vector({1, 102})); + REQUIRE(dom_sjid == std::vector({0, 199})); + REQUIRE(maxdom_sjid.size() == 2); + REQUIRE(maxdom_sjid[0] == 0); + REQUIRE(maxdom_sjid[1] > 2000000000); + } + + soma_dataframe->close(); } } @@ -641,6 +711,7 @@ TEST_CASE_METHOD( ->maybe_soma_joinid_shape(); REQUIRE(actual.has_value()); REQUIRE(actual.value() == expect); + soma_dataframe->close(); REQUIRE(soma_dataframe->nnz() == 0); @@ -652,6 +723,51 @@ TEST_CASE_METHOD( write_sjid_u32_str_data_from(8); REQUIRE(soma_dataframe->nnz() == 4); + // Check domainish accessors before resize + soma_dataframe->open(OpenMode::read); + + ArrowTable non_empty_domain = soma_dataframe->get_non_empty_domain(); + std::vector + ned_sjid = ArrowAdapter::get_table_column_by_name( + non_empty_domain, "soma_joinid"); + std::vector + ned_u32 = ArrowAdapter::get_table_column_by_name( + non_empty_domain, "myuint32"); + + ArrowTable soma_domain = soma_dataframe->get_soma_domain(); + std::vector + dom_sjid = ArrowAdapter::get_table_column_by_name( + soma_domain, "soma_joinid"); + std::vector + dom_u32 = ArrowAdapter::get_table_column_by_name( + soma_domain, "myuint32"); + + ArrowTable soma_maxdomain = soma_dataframe->get_soma_maxdomain(); + std::vector + maxdom_sjid = ArrowAdapter::get_table_column_by_name( + soma_maxdomain, "soma_joinid"); + std::vector + maxdom_u32 = ArrowAdapter::get_table_column_by_name( + soma_maxdomain, "myuint32"); + + REQUIRE(ned_sjid == std::vector({1, 10})); + REQUIRE(ned_u32 == std::vector({1234, 5678})); + + REQUIRE(dom_sjid == std::vector({0, 100})); + REQUIRE(dom_u32 == std::vector({0, 10000})); + + REQUIRE(maxdom_sjid.size() == 2); + REQUIRE(maxdom_u32.size() == 2); + + REQUIRE(maxdom_u32[0] == 0); + if (!use_current_domain) { + REQUIRE(maxdom_u32[1] == 10000); + } else { + REQUIRE(maxdom_u32[1] > 2000000000); + } + + soma_dataframe->close(); + // Check shape after write soma_dataframe = open(OpenMode::read); expect = dim_infos[0].dim_max + 1; @@ -707,6 +823,55 @@ TEST_CASE_METHOD( // Implicitly we expect no throw write_sjid_u32_str_data_from(SOMA_JOINID_DIM_MAX); } + + // Check domainish accessors after resize + soma_dataframe->open(OpenMode::read); + + non_empty_domain = soma_dataframe->get_non_empty_domain(); + ned_sjid = ArrowAdapter::get_table_column_by_name( + non_empty_domain, "soma_joinid"); + ned_u32 = ArrowAdapter::get_table_column_by_name( + non_empty_domain, "myuint32"); + + soma_domain = soma_dataframe->get_soma_domain(); + dom_sjid = ArrowAdapter::get_table_column_by_name( + soma_domain, "soma_joinid"); + dom_u32 = ArrowAdapter::get_table_column_by_name( + soma_domain, "myuint32"); + + soma_maxdomain = soma_dataframe->get_soma_maxdomain(); + maxdom_sjid = ArrowAdapter::get_table_column_by_name( + soma_maxdomain, "soma_joinid"); + maxdom_u32 = ArrowAdapter::get_table_column_by_name( + soma_maxdomain, "myuint32"); + + if (!use_current_domain) { + REQUIRE(ned_sjid == std::vector({1, 10})); + REQUIRE(ned_u32 == std::vector({1234, 5678})); + + REQUIRE(dom_sjid == std::vector({0, 100})); + REQUIRE(dom_u32 == std::vector({0, 10000})); + + REQUIRE(maxdom_sjid == std::vector({0, 100})); + REQUIRE(maxdom_u32 == std::vector({0, 10000})); + + } else { + REQUIRE(ned_sjid == std::vector({1, 102})); + REQUIRE(ned_u32 == std::vector({1234, 5678})); + + REQUIRE(dom_sjid == std::vector({0, 199})); + REQUIRE(dom_u32 == std::vector({0, 10000})); + + REQUIRE(maxdom_sjid.size() == 2); + REQUIRE(maxdom_sjid[0] == 0); + REQUIRE(maxdom_sjid[1] > 2000000000); + + REQUIRE(maxdom_u32.size() == 2); + REQUIRE(maxdom_u32[0] == 0); + REQUIRE(maxdom_u32[1] > 2000000000); + } + + soma_dataframe->close(); } } @@ -782,6 +947,41 @@ TEST_CASE_METHOD( actual = soma_dataframe->maybe_soma_joinid_shape(); REQUIRE(actual.has_value()); REQUIRE(actual.value() == expect); + + // Check domainish accessors before resize + ArrowTable non_empty_domain = soma_dataframe->get_non_empty_domain(); + std::vector + ned_sjid = ArrowAdapter::get_table_column_by_name( + non_empty_domain, "soma_joinid"); + std::vector + ned_str = ArrowAdapter::get_table_string_column_by_name( + non_empty_domain, "mystring"); + + ArrowTable soma_domain = soma_dataframe->get_soma_domain(); + std::vector + dom_sjid = ArrowAdapter::get_table_column_by_name( + soma_domain, "soma_joinid"); + std::vector + dom_str = ArrowAdapter::get_table_string_column_by_name( + soma_domain, "mystring"); + + ArrowTable soma_maxdomain = soma_dataframe->get_soma_maxdomain(); + std::vector + maxdom_sjid = ArrowAdapter::get_table_column_by_name( + soma_maxdomain, "soma_joinid"); + std::vector + maxdom_str = ArrowAdapter::get_table_string_column_by_name( + soma_maxdomain, "mystring"); + + REQUIRE(ned_sjid == std::vector({1, 10})); + REQUIRE(ned_str == std::vector({"apple", "bat"})); + + REQUIRE(dom_sjid == std::vector({0, 100})); + REQUIRE(dom_str == std::vector({"", ""})); + + REQUIRE(maxdom_sjid == std::vector({0, 100})); + REQUIRE(maxdom_str == std::vector({"", ""})); + soma_dataframe->close(); // Resize @@ -831,6 +1031,41 @@ TEST_CASE_METHOD( // Implicitly we expect no throw write_sjid_u32_str_data_from(SOMA_JOINID_DIM_MAX); } + + // Check domainish accessors after resize + soma_dataframe->open(OpenMode::read, TimestampRange(0, 2)); + + non_empty_domain = soma_dataframe->get_non_empty_domain(); + ned_sjid = ArrowAdapter::get_table_column_by_name( + non_empty_domain, "soma_joinid"); + ned_str = ArrowAdapter::get_table_string_column_by_name( + non_empty_domain, "mystring"); + + soma_domain = soma_dataframe->get_soma_domain(); + dom_sjid = ArrowAdapter::get_table_column_by_name( + soma_domain, "soma_joinid"); + dom_str = ArrowAdapter::get_table_string_column_by_name( + soma_domain, "mystring"); + + soma_maxdomain = soma_dataframe->get_soma_maxdomain(); + maxdom_sjid = ArrowAdapter::get_table_column_by_name( + soma_maxdomain, "soma_joinid"); + maxdom_str = ArrowAdapter::get_table_string_column_by_name( + soma_maxdomain, "mystring"); + + REQUIRE(ned_sjid == std::vector({0, 0})); + REQUIRE(ned_str == std::vector({"", ""})); + + REQUIRE(dom_sjid == std::vector({0, 100})); + REQUIRE(dom_str == std::vector({"", ""})); + + REQUIRE(maxdom_sjid == std::vector({0, 100})); + REQUIRE(maxdom_str == std::vector({"", ""})); + + REQUIRE(ned_str == std::vector({"", ""})); + REQUIRE(dom_str == std::vector({"", ""})); + + soma_dataframe->close(); } } @@ -886,6 +1121,33 @@ TEST_CASE_METHOD( std::optional actual = soma_dataframe ->maybe_soma_joinid_shape(); REQUIRE(!actual.has_value()); + + // Check domainish accessors before resize + ArrowTable non_empty_domain = soma_dataframe->get_non_empty_domain(); + std::vector + ned_str = ArrowAdapter::get_table_string_column_by_name( + non_empty_domain, "mystring"); + + ArrowTable soma_domain = soma_dataframe->get_soma_domain(); + std::vector + dom_str = ArrowAdapter::get_table_string_column_by_name( + soma_domain, "mystring"); + + ArrowTable soma_maxdomain = soma_dataframe->get_soma_maxdomain(); + std::vector + maxdom_str = ArrowAdapter::get_table_string_column_by_name( + soma_maxdomain, "mystring"); + + REQUIRE(ned_str == std::vector({"", ""})); + + if (!use_current_domain) { + REQUIRE(dom_str == std::vector({"", ""})); + REQUIRE(maxdom_str == std::vector({"", ""})); + } else { + REQUIRE(dom_str == std::vector({"", "\xff"})); + REQUIRE(maxdom_str == std::vector({"", ""})); + } + soma_dataframe->close(); REQUIRE(soma_dataframe->nnz() == 0); @@ -945,5 +1207,32 @@ TEST_CASE_METHOD( // Implicitly we expect no throw write_sjid_u32_str_data_from(SOMA_JOINID_DIM_MAX); } + + // Check domainish accessors after resize + soma_dataframe->open(OpenMode::read, TimestampRange(0, 2)); + + non_empty_domain = soma_dataframe->get_non_empty_domain(); + ned_str = ArrowAdapter::get_table_string_column_by_name( + non_empty_domain, "mystring"); + + soma_domain = soma_dataframe->get_soma_domain(); + dom_str = ArrowAdapter::get_table_string_column_by_name( + soma_domain, "mystring"); + + soma_maxdomain = soma_dataframe->get_soma_maxdomain(); + maxdom_str = ArrowAdapter::get_table_string_column_by_name( + soma_maxdomain, "mystring"); + + REQUIRE(ned_str == std::vector({"", ""})); + + if (!use_current_domain) { + REQUIRE(dom_str == std::vector({"", ""})); + REQUIRE(maxdom_str == std::vector({"", ""})); + } else { + REQUIRE(dom_str == std::vector({"", "\xff"})); + REQUIRE(maxdom_str == std::vector({"", ""})); + } + + soma_dataframe->close(); } } From 638c9f7a1f744bbdf5657b358bb6fce1931028a9 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 20 Sep 2024 13:31:27 -0400 Subject: [PATCH 2/7] Update libtiledbsoma/src/soma/soma_array.h [skip ci] Co-authored-by: nguyenv --- libtiledbsoma/src/soma/soma_array.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/libtiledbsoma/src/soma/soma_array.h b/libtiledbsoma/src/soma/soma_array.h index b841d63ec6..bde1d698be 100644 --- a/libtiledbsoma/src/soma/soma_array.h +++ b/libtiledbsoma/src/soma/soma_array.h @@ -936,13 +936,10 @@ class SOMAArray : public SOMAObject { switch (which_kind) { case Domainish::kind_core_domain: return _core_domain_slot_string(name); - break; case Domainish::kind_core_current_domain: return _core_current_domain_slot(name); - break; case Domainish::kind_non_empty_domain: return non_empty_domain_slot_var(name); - break; default: throw std::runtime_error( "internal coding error in SOMAArray::_core_domainish_slot"); From bafd673278ed1d9879e203db342221dce13bdf0c Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 20 Sep 2024 13:32:15 -0400 Subject: [PATCH 3/7] code-review feedback [skip ci] Co-authored-by: nguyenv --- libtiledbsoma/src/soma/soma_array.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/libtiledbsoma/src/soma/soma_array.h b/libtiledbsoma/src/soma/soma_array.h index bde1d698be..c4c956959e 100644 --- a/libtiledbsoma/src/soma/soma_array.h +++ b/libtiledbsoma/src/soma/soma_array.h @@ -918,13 +918,10 @@ class SOMAArray : public SOMAObject { switch (which_kind) { case Domainish::kind_core_domain: return _core_domain_slot(name); - break; case Domainish::kind_core_current_domain: return _core_current_domain_slot(name); - break; case Domainish::kind_non_empty_domain: return non_empty_domain_slot(name); - break; default: throw std::runtime_error( "internal coding error in SOMAArray::_core_domainish_slot"); From 99b0079377b1331571cee46862d5ef222bd81525 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 20 Sep 2024 13:32:24 -0400 Subject: [PATCH 4/7] code-review feedback [skip ci] Co-authored-by: nguyenv --- libtiledbsoma/src/soma/soma_array.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libtiledbsoma/src/soma/soma_array.h b/libtiledbsoma/src/soma/soma_array.h index c4c956959e..b664f26bfd 100644 --- a/libtiledbsoma/src/soma/soma_array.h +++ b/libtiledbsoma/src/soma/soma_array.h @@ -939,7 +939,7 @@ class SOMAArray : public SOMAObject { return non_empty_domain_slot_var(name); default: throw std::runtime_error( - "internal coding error in SOMAArray::_core_domainish_slot"); + "internal coding error in SOMAArray::_core_domainish_slot_string"); } } From 69a3904b7a32620c952d2bc9e25185c3dc7afda0 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 20 Sep 2024 13:33:50 -0400 Subject: [PATCH 5/7] code-review feedback [skip ci] --- libtiledbsoma/src/soma/soma_array.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libtiledbsoma/src/soma/soma_array.h b/libtiledbsoma/src/soma/soma_array.h index b841d63ec6..ae0a3f4965 100644 --- a/libtiledbsoma/src/soma/soma_array.h +++ b/libtiledbsoma/src/soma/soma_array.h @@ -945,7 +945,8 @@ class SOMAArray : public SOMAObject { break; default: throw std::runtime_error( - "internal coding error in SOMAArray::_core_domainish_slot"); + "internal coding error in SOMAArray::_core_domainish_slot: " + "unknown kind"); } } From 804db2b41d3c4f31e41fd08bbc55b5730c79a794 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 20 Sep 2024 13:38:29 -0400 Subject: [PATCH 6/7] code-review feedback] --- libtiledbsoma/src/soma/soma_array.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libtiledbsoma/src/soma/soma_array.cc b/libtiledbsoma/src/soma/soma_array.cc index 464b7ee956..19ab7eb7d0 100644 --- a/libtiledbsoma/src/soma/soma_array.cc +++ b/libtiledbsoma/src/soma/soma_array.cc @@ -1228,10 +1228,10 @@ ArrowTable SOMAArray::_get_core_domainish(enum Domainish which_kind) { } if (child == nullptr) { - // throw TileDBSOMAError(fmt::format( - // "WIP {} {}", - // core_dim.name(), - // tiledb::impl::type_to_str(core_type_code))); + throw TileDBSOMAError(fmt::format( + "SOMAArray::_get_core_domainish:dim {} has unhandled type {}", + core_dim.name(), + tiledb::impl::type_to_str(core_type_code))); } arrow_array->children[i] = child; } From cc0b03f33cd4ba6c1bb3a37d485d88b55499b4b3 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 20 Sep 2024 13:54:37 -0400 Subject: [PATCH 7/7] code-review feedback Co-authored-by: nguyenv --- libtiledbsoma/src/soma/soma_array.cc | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/libtiledbsoma/src/soma/soma_array.cc b/libtiledbsoma/src/soma/soma_array.cc index 19ab7eb7d0..fdfdfa9308 100644 --- a/libtiledbsoma/src/soma/soma_array.cc +++ b/libtiledbsoma/src/soma/soma_array.cc @@ -1224,14 +1224,11 @@ ArrowTable SOMAArray::_get_core_domainish(enum Domainish which_kind) { break; default: - break; - } - - if (child == nullptr) { - throw TileDBSOMAError(fmt::format( - "SOMAArray::_get_core_domainish:dim {} has unhandled type {}", - core_dim.name(), - tiledb::impl::type_to_str(core_type_code))); + throw TileDBSOMAError(fmt::format( + "SOMAArray::_get_core_domainish:dim {} has unhandled type " + "{}", + core_dim.name(), + tiledb::impl::type_to_str(core_type_code))); } arrow_array->children[i] = child; }