From 804c87e66ab56e12429886754310d327f23924b3 Mon Sep 17 00:00:00 2001 From: XanthosXanthopoulos Date: Fri, 6 Dec 2024 14:27:20 +0200 Subject: [PATCH] Replace string_view with string when returning column name, add current domain checks, replace vector with span when selecting points --- libtiledbsoma/src/soma/soma_attribute.cc | 47 ++- libtiledbsoma/src/soma/soma_attribute.h | 11 +- libtiledbsoma/src/soma/soma_column.cc | 2 +- libtiledbsoma/src/soma/soma_column.h | 206 +++++++++++-- libtiledbsoma/src/soma/soma_dimension.cc | 274 +++++++++++++++++- libtiledbsoma/src/soma/soma_dimension.h | 11 +- .../src/soma/soma_geometry_column.cc | 113 +++++++- libtiledbsoma/src/soma/soma_geometry_column.h | 14 +- libtiledbsoma/test/unit_soma_column.cc | 39 +-- 9 files changed, 631 insertions(+), 86 deletions(-) diff --git a/libtiledbsoma/src/soma/soma_attribute.cc b/libtiledbsoma/src/soma/soma_attribute.cc index 6ca08e3798..b9d91985de 100644 --- a/libtiledbsoma/src/soma/soma_attribute.cc +++ b/libtiledbsoma/src/soma/soma_attribute.cc @@ -18,7 +18,9 @@ void SOMAAttribute::_set_dim_points( const SOMAContext&, const std::any&) const { throw TileDBSOMAError(std::format( - "[SOMAAttribute] Column with name {} is not an index column", name())); + "[SOMAAttribute][_set_dim_points] Column with name {} is not an index " + "column", + name())); } void SOMAAttribute::_set_dim_ranges( @@ -26,35 +28,62 @@ void SOMAAttribute::_set_dim_ranges( const SOMAContext&, const std::any&) const { throw TileDBSOMAError(std::format( - "[SOMAAttribute] Column with name {} is not an index column", name())); + "[SOMAAttribute][_set_dim_ranges] Column with name {} is not an index " + "column", + name())); } void SOMAAttribute::_set_current_domain_slot( - NDRectangle&, const std::vector&) const { + NDRectangle&, std::span) const { throw TileDBSOMAError(std::format( - "[SOMAAttribute] Column with name {} is not an index column", name())); + "[SOMAAttribute][_set_current_domain_slot] Column with name {} is not " + "an index column", + name())); } +std::pair SOMAAttribute::_can_set_current_domain_slot( + std::optional&, std::span) const { + throw TileDBSOMAError(std::format( + "[SOMAAttribute][_set_current_domain_slot] Column with name {} is not " + "an index column", + name())); +}; + std::any SOMAAttribute::_core_domain_slot() const { throw TileDBSOMAError(std::format( - "[SOMAAttribute] Column with name {} is not an index column", name())); + "[SOMAAttribute][_core_domain_slot] Column with name {} is not an " + "index column", + name())); } std::any SOMAAttribute::_non_empty_domain_slot(Array&) const { throw TileDBSOMAError(std::format( - "[SOMAAttribute] Column with name {} is not an index column", name())); + "[SOMAAttribute][_non_empty_domain_slot] Column with name {} is not an " + "index column", + name())); } std::any SOMAAttribute::_core_current_domain_slot( const SOMAContext&, Array&) const { throw TileDBSOMAError(std::format( - "[SOMAAttribute] Column with name {} is not an index column", name())); + "[SOMAAttribute][_core_current_domain_slot] Column with name {} is not " + "an index column", + name())); +} + +std::any SOMAAttribute::_core_current_domain_slot(NDRectangle&) const { + throw TileDBSOMAError(std::format( + "[SOMAAttribute][_core_current_domain_slot] Column with name {} is not " + "an index column", + name())); } ArrowArray* SOMAAttribute::arrow_domain_slot( const SOMAContext&, Array&, enum Domainish) const { throw TileDBSOMAError(std::format( - "[SOMAAttribute] Column with name {} is not an index column", name())); + "[SOMAAttribute][arrow_domain_slot] Column with name {} is not an " + "index column", + name())); } ArrowSchema* SOMAAttribute::arrow_schema_slot( @@ -63,4 +92,4 @@ ArrowSchema* SOMAAttribute::arrow_schema_slot( attribute, *ctx.tiledb_ctx(), array) .release(); } -} // namespace tiledbsoma +} // namespace tiledbsoma \ No newline at end of file diff --git a/libtiledbsoma/src/soma/soma_attribute.h b/libtiledbsoma/src/soma/soma_attribute.h index e11ac7b435..01dcc83256 100644 --- a/libtiledbsoma/src/soma/soma_attribute.h +++ b/libtiledbsoma/src/soma/soma_attribute.h @@ -28,7 +28,7 @@ class SOMAAttribute : public virtual SOMAColumn { , enumeration(enumeration) { } - virtual inline std::string_view name() const override { + virtual inline std::string name() const override { return attribute.name(); } @@ -95,7 +95,11 @@ class SOMAAttribute : public virtual SOMAColumn { virtual void _set_current_domain_slot( NDRectangle& rectangle, - const std::vector& domain) const override; + std::span domain) const override; + + virtual std::pair _can_set_current_domain_slot( + std::optional& rectangle, + std::span new_domain) const override; virtual std::any _core_domain_slot() const override; @@ -104,6 +108,9 @@ class SOMAAttribute : public virtual SOMAColumn { virtual std::any _core_current_domain_slot( const SOMAContext& ctx, Array& array) const override; + virtual std::any _core_current_domain_slot( + NDRectangle& ndrect) const override; + Attribute attribute; std::optional enumeration; }; diff --git a/libtiledbsoma/src/soma/soma_column.cc b/libtiledbsoma/src/soma/soma_column.cc index d4896c9589..95a1cfecc1 100644 --- a/libtiledbsoma/src/soma/soma_column.cc +++ b/libtiledbsoma/src/soma/soma_column.cc @@ -59,4 +59,4 @@ SOMAColumn::core_current_domain_slot( throw TileDBSOMAError(e.what()); } } -} // namespace tiledbsoma +} // namespace tiledbsoma \ No newline at end of file diff --git a/libtiledbsoma/src/soma/soma_column.h b/libtiledbsoma/src/soma/soma_column.h index 19a591c08f..ec51bf92ca 100644 --- a/libtiledbsoma/src/soma/soma_column.h +++ b/libtiledbsoma/src/soma/soma_column.h @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -67,7 +68,7 @@ class SOMAColumn { /** * Get the SOMAColumn name as defined in schema. */ - virtual std::string_view name() const = 0; + virtual std::string name() const = 0; /** * If true, this column is used as index. @@ -125,6 +126,10 @@ class SOMAColumn { /** * Get the domain kind of the SOMAColumn as an ArrowArray for use with * R/Python API. + * + * @param ctx + * @param array + * @param which_kind */ virtual ArrowArray* arrow_domain_slot( const SOMAContext& ctx, @@ -133,12 +138,20 @@ class SOMAColumn { /** * Get the SOMAColumn encoded as an ArrowSchema for use with R/Python API. + * + * @param ctx + * @param array */ virtual ArrowSchema* arrow_schema_slot( const SOMAContext& ctx, Array& array) = 0; /** * Get the domain kind of the SOMAColumn. + * + * @tparam T + * @param ctx + * @param array + * @param which_kind */ template std::pair domain_slot( @@ -160,19 +173,102 @@ class SOMAColumn { /** * Set the current domain of this SOMAColumn. * + * @tparam T * @param rectangle The current domain rectangle to modify. - * @param domain A vector of void pointers to the the current domain data - * buffers. + * @param domain A vector of the n-dimensional domain in the form + * [dim_0_min, dim_1_min, ..., dim_n_max] */ + template void set_current_domain_slot( - NDRectangle& rectangle, const std::vector& domain) const { + NDRectangle& rectangle, const std::vector& domain) const { if (!isIndexColumn()) { throw TileDBSOMAError(std::format( - "[SOMAColumn] Column with name {} is not an index column", + "[SOMAColumn][set_current_domain_slot] Column with name {} is " + "not an index column", + name())); + } + + if (domain.size() % 2 != 0) { + throw TileDBSOMAError(std::format( + "[SOMAColumn][set_current_domain_slot] Provided domain for " + "column {} has missing values", + name())); + } + + std::vector transformed_domain; + size_t dim_count = domain.size() / 2; + for (size_t i = 0; i < dim_count; ++i) { + transformed_domain.push_back(std::make_any>( + std::array({domain[i], domain[i + dim_count]}))); + } + + try { + _set_current_domain_slot(rectangle, transformed_domain); + } catch (const std::exception& e) { + throw TileDBSOMAError(std::format( + "[SOMAColumn][set_current_domain_slot] Failed on \"{}\" with " + "error \"{}\"", + name(), + e.what())); + } + } + + /** + * Set the multi-type current domain of this SOMAColumn. + * + * @tparam T + * @param rectangle The current domain rectangle to modify. + * @param domain A vector holding std::arrays with 2 elements each [min, + * max], casted as std::any + */ + void set_current_domain_slot( + NDRectangle& rectangle, const std::vector& domain) const { + if (!isIndexColumn()) { + throw TileDBSOMAError(std::format( + "[SOMAColumn][set_current_domain_slot] Column with name {} is " + "not an index column", name())); } - _set_current_domain_slot(rectangle, domain); + try { + _set_current_domain_slot(rectangle, domain); + } catch (const std::exception& e) { + throw TileDBSOMAError(std::format( + "[SOMAColumn][set_current_domain_slot] Failed on \"{}\" with " + "error \"{}\"", + name(), + e.what())); + } + } + + /** + * Test if the multi-type current domain of this SOMAColumn can be set with + * the supplied new current domain. + * + * @tparam T + * @param rectangle The current domain rectangle to modify. + * @param domain A vector holding std::arrays with 2 elements each [min, + * max], casted as std::any + */ + std::pair can_set_current_domain_slot( + std::optional& rectangle, + const std::vector& domain) const { + if (!isIndexColumn()) { + throw TileDBSOMAError(std::format( + "[SOMAColumn][set_current_domain_slot] Column with name {} is " + "not an index column", + name())); + } + + try { + return _can_set_current_domain_slot(rectangle, domain); + } catch (const std::exception& e) { + throw TileDBSOMAError(std::format( + "[SOMAColumn][can_set_current_domain_slot] Failed on \"{}\" " + "with error \"{}\"", + name(), + e.what())); + } } /** @@ -182,6 +278,7 @@ class SOMAColumn { * * @tparam T * @param query + * @param ctx * @param point */ template @@ -195,8 +292,20 @@ class SOMAColumn { name())); } - this->_set_dim_points( - query, ctx, std::make_any>(std::vector({point}))); + T points[] = {point}; + + try { + this->_set_dim_points( + query, + ctx, + std::make_any>(std::span(points))); + } catch (const std::exception& e) { + throw TileDBSOMAError(std::format( + "[SOMAColumn][set_dim_point] Failed on \"{}\" with error " + "\"{}\"", + name(), + e.what())); + } } /** @@ -206,21 +315,30 @@ class SOMAColumn { * * @tparam T * @param query + * @param ctx * @param points */ template void set_dim_points( const std::unique_ptr& query, const SOMAContext& ctx, - const std::vector& points) const { + std::span points) const { if (!isIndexColumn()) { throw TileDBSOMAError(std::format( "[SOMAColumn] Column with name {} is not an index column", name())); } - this->_set_dim_points( - query, ctx, std::make_any>(points)); + try { + this->_set_dim_points( + query, ctx, std::make_any>(points)); + } catch (const std::exception& e) { + throw TileDBSOMAError(std::format( + "[SOMAColumn][set_dim_points] Failed on \"{}\" with error " + "\"{}\"", + name(), + e.what())); + } } /** @@ -243,8 +361,18 @@ class SOMAColumn { name())); } - this->_set_dim_ranges( - query, ctx, std::make_any>>(ranges)); + try { + this->_set_dim_ranges( + query, + ctx, + std::make_any>>(ranges)); + } catch (const std::exception& e) { + throw TileDBSOMAError(std::format( + "[SOMAColumn][set_dim_ranges] Failed on \"{}\" with error " + "\"{}\"", + name(), + e.what())); + } } /** @@ -266,7 +394,11 @@ class SOMAColumn { try { return std::any_cast>(_core_domain_slot()); } catch (const std::exception& e) { - throw TileDBSOMAError(e.what()); + throw TileDBSOMAError(std::format( + "[SOMAColumn][core_domain_slot] Failed on \"{}\" with error " + "\"{}\"", + name(), + e.what())); } } @@ -281,8 +413,12 @@ class SOMAColumn { return std::any_cast>( _non_empty_domain_slot(array)); } catch (const std::exception& e) { - throw TileDBSOMAError(e.what()); - }; + throw TileDBSOMAError(std::format( + "[SOMAColumn][non_empty_domain_slot] Failed on \"{}\" with " + "error \"{}\"", + name(), + e.what())); + } } /** @@ -305,6 +441,35 @@ class SOMAColumn { try { return std::any_cast>( _core_current_domain_slot(ctx, array)); + } catch (const std::exception& e) { + throw TileDBSOMAError(std::format( + "[SOMAColumn][core_current_domain_slot] Failed on \"{}\" with " + "error \"{}\"", + name(), + e.what())); + } + } + + /** + * Returns the core current domain of this column from the supplied + * NDRectangle. + * + * o For arrays with core current-domain support: + * - soma domain is core current domain + * - soma maxdomain is core domain + * o For arrays without core current-domain support: + * - soma domain is core domain + * - soma maxdomain is core domain + * - core current domain is not accessed at the soma level + * + * @tparam T Domain datatype + * @return Pair of [lower, upper] inclusive bounds. + */ + template + std::pair core_current_domain_slot(NDRectangle& ndrect) const { + try { + return std::any_cast>( + _core_current_domain_slot(ndrect)); } catch (const std::exception& e) { throw TileDBSOMAError(e.what()); } @@ -322,8 +487,11 @@ class SOMAColumn { const std::any& ranges) const = 0; virtual void _set_current_domain_slot( - NDRectangle& rectangle, - const std::vector& domain) const = 0; + NDRectangle& rectangle, std::span domain) const = 0; + + virtual std::pair _can_set_current_domain_slot( + std::optional& rectangle, + std::span new_domain) const = 0; virtual std::any _core_domain_slot() const = 0; @@ -331,6 +499,8 @@ class SOMAColumn { virtual std::any _core_current_domain_slot( const SOMAContext& ctx, Array& array) const = 0; + + virtual std::any _core_current_domain_slot(NDRectangle& ndrect) const = 0; }; template <> diff --git a/libtiledbsoma/src/soma/soma_dimension.cc b/libtiledbsoma/src/soma/soma_dimension.cc index 91a0729832..1996811f02 100644 --- a/libtiledbsoma/src/soma/soma_dimension.cc +++ b/libtiledbsoma/src/soma/soma_dimension.cc @@ -23,31 +23,38 @@ void SOMADimension::_set_dim_points( switch (dimension.type()) { case TILEDB_UINT8: query->select_points( - dimension.name(), std::any_cast>(points)); + dimension.name(), + std::any_cast>(points)); break; case TILEDB_UINT16: query->select_points( - dimension.name(), std::any_cast>(points)); + dimension.name(), + std::any_cast>(points)); break; case TILEDB_UINT32: query->select_points( - dimension.name(), std::any_cast>(points)); + dimension.name(), + std::any_cast>(points)); break; case TILEDB_UINT64: query->select_points( - dimension.name(), std::any_cast>(points)); + dimension.name(), + std::any_cast>(points)); break; case TILEDB_INT8: query->select_points( - dimension.name(), std::any_cast>(points)); + dimension.name(), + std::any_cast>(points)); break; case TILEDB_INT16: query->select_points( - dimension.name(), std::any_cast>(points)); + dimension.name(), + std::any_cast>(points)); break; case TILEDB_INT32: query->select_points( - dimension.name(), std::any_cast>(points)); + dimension.name(), + std::any_cast>(points)); break; case TILEDB_DATETIME_YEAR: case TILEDB_DATETIME_MONTH: @@ -64,22 +71,26 @@ void SOMADimension::_set_dim_points( case TILEDB_DATETIME_AS: case TILEDB_INT64: query->select_points( - dimension.name(), std::any_cast>(points)); + dimension.name(), + std::any_cast>(points)); break; case TILEDB_FLOAT32: query->select_points( - dimension.name(), std::any_cast>(points)); + dimension.name(), + std::any_cast>(points)); break; case TILEDB_FLOAT64: query->select_points( - dimension.name(), std::any_cast>(points)); + dimension.name(), + std::any_cast>(points)); break; + case TILEDB_STRING_UTF8: case TILEDB_STRING_ASCII: case TILEDB_CHAR: case TILEDB_BLOB: query->select_points( dimension.name(), - std::any_cast>(points)); + std::any_cast>(points)); break; default: throw TileDBSOMAError(std::format( @@ -169,6 +180,8 @@ void SOMADimension::_set_dim_ranges( case TILEDB_STRING_ASCII: case TILEDB_CHAR: case TILEDB_BLOB: + case TILEDB_GEOM_WKT: + case TILEDB_GEOM_WKB: query->select_ranges( dimension.name(), std::any_cast>>( @@ -182,9 +195,229 @@ void SOMADimension::_set_dim_ranges( } void SOMADimension::_set_current_domain_slot( - NDRectangle& rectangle, const std::vector& domain) const { - ArrowAdapter::set_current_domain_slot( - dimension.type(), domain.front(), rectangle, dimension.name()); + NDRectangle& rectangle, std::span domain) const { + if (domain.size() != 1) { + throw TileDBSOMAError(std::format( + "[SOMADimension][_set_current_domain_slot] Invalid domain size. " + "Expected 1, got {}", + domain.size())); + } + + switch (dimension.type()) { + case TILEDB_UINT8: { + auto dom = std::any_cast>(domain[0]); + rectangle.set_range(dimension.name(), dom[0], dom[1]); + } break; + case TILEDB_UINT16: { + auto dom = std::any_cast>(domain[0]); + rectangle.set_range(dimension.name(), dom[0], dom[1]); + } break; + case TILEDB_UINT32: { + auto dom = std::any_cast>(domain[0]); + rectangle.set_range(dimension.name(), dom[0], dom[1]); + } break; + case TILEDB_UINT64: { + auto dom = std::any_cast>(domain[0]); + rectangle.set_range(dimension.name(), dom[0], dom[1]); + } break; + case TILEDB_INT8: { + auto dom = std::any_cast>(domain[0]); + rectangle.set_range(dimension.name(), dom[0], dom[1]); + } break; + case TILEDB_INT16: { + auto dom = std::any_cast>(domain[0]); + rectangle.set_range(dimension.name(), dom[0], dom[1]); + } break; + case TILEDB_INT32: { + auto dom = std::any_cast>(domain[0]); + rectangle.set_range(dimension.name(), dom[0], dom[1]); + } break; + case TILEDB_DATETIME_YEAR: + case TILEDB_DATETIME_MONTH: + case TILEDB_DATETIME_WEEK: + case TILEDB_DATETIME_DAY: + case TILEDB_DATETIME_HR: + case TILEDB_DATETIME_MIN: + case TILEDB_DATETIME_SEC: + case TILEDB_DATETIME_MS: + case TILEDB_DATETIME_US: + case TILEDB_DATETIME_NS: + case TILEDB_DATETIME_PS: + case TILEDB_DATETIME_FS: + case TILEDB_DATETIME_AS: + case TILEDB_INT64: { + auto dom = std::any_cast>(domain[0]); + rectangle.set_range(dimension.name(), dom[0], dom[1]); + } break; + case TILEDB_FLOAT32: { + auto dom = std::any_cast>(domain[0]); + rectangle.set_range(dimension.name(), dom[0], dom[1]); + } break; + case TILEDB_FLOAT64: { + auto dom = std::any_cast>(domain[0]); + rectangle.set_range(dimension.name(), dom[0], dom[1]); + } break; + case TILEDB_STRING_ASCII: + case TILEDB_STRING_UTF8: + case TILEDB_CHAR: + case TILEDB_BLOB: + case TILEDB_GEOM_WKT: + case TILEDB_GEOM_WKB: { + auto dom = std::any_cast>(domain[0]); + if (dom[0] == "" && dom[1] == "") { + rectangle.set_range(dimension.name(), "", "\x7f"); + } else { + throw TileDBSOMAError(std::format( + "[SOMADimension][_set_current_domain_slot] domain (\"{}\", " + "\"{}\") cannot be set for " + "string index columns: please use " + "(\"\", \"\")", + dom[0], + dom[1])); + } + + } break; + default: + throw TileDBSOMAError(std::format( + "[SOMADimension][_set_current_domain_slot] Unknown datatype {}", + tiledb::impl::type_to_str(dimension.type()))); + } +} + +std::pair SOMADimension::_can_set_current_domain_slot( + std::optional& rectangle, + std::span new_domain) const { + if (new_domain.size() != 1) { + throw TileDBSOMAError(std::format( + "[SOMADimension][_can_set_current_domain_slot] Expected domain " + "size is 1, found {}", + new_domain.size())); + } + + auto comparator = + [&]( + const std::array& new_dom) -> std::pair { + if (new_dom[0] > new_dom[1]) { + return std::pair( + false, + std::format( + "index-column name {}: new lower > new upper", + dimension.name())); + } + + // If we're checking against the core current domain: the user-provided + // domain must contain the core current domain. + // + // If we're checking against the core (max) domain: the user-provided + // domain must be contained within the core (max) domain. + + if (rectangle.has_value()) { + auto dom = rectangle.value().range(dimension.name()); + + if (new_dom[0] > dom[0]) { + return std::pair( + false, + std::format( + "index-column name {}: new lower > old lower (downsize " + "is unsupported)", + dimension.name())); + } + if (new_dom[1] < dom[1]) { + return std::pair( + false, + std::format( + "index-column name {}: new upper < old upper (downsize " + "is unsupported)", + dimension.name())); + } + } else { + auto dom = std::any_cast>(_core_domain_slot()); + + if (new_dom[0] < dom.first) { + return std::pair( + false, + std::format( + "index-column name {}: new lower < limit lower", + dimension.name())); + } + if (new_dom[1] > dom.second) { + return std::pair( + false, + std::format( + "index-column name {}: new upper > limit upper", + dimension.name())); + } + } + + return std::pair(true, ""); + }; + + switch (dimension.type()) { + case TILEDB_UINT8: + return comparator( + std::any_cast>(new_domain[0])); + case TILEDB_UINT16: + return comparator( + std::any_cast>(new_domain[0])); + case TILEDB_UINT32: + return comparator( + std::any_cast>(new_domain[0])); + case TILEDB_UINT64: + return comparator( + std::any_cast>(new_domain[0])); + case TILEDB_INT8: + return comparator( + std::any_cast>(new_domain[0])); + case TILEDB_INT16: + return comparator( + std::any_cast>(new_domain[0])); + case TILEDB_INT32: + return comparator( + std::any_cast>(new_domain[0])); + case TILEDB_DATETIME_YEAR: + case TILEDB_DATETIME_MONTH: + case TILEDB_DATETIME_WEEK: + case TILEDB_DATETIME_DAY: + case TILEDB_DATETIME_HR: + case TILEDB_DATETIME_MIN: + case TILEDB_DATETIME_SEC: + case TILEDB_DATETIME_MS: + case TILEDB_DATETIME_US: + case TILEDB_DATETIME_NS: + case TILEDB_DATETIME_PS: + case TILEDB_DATETIME_FS: + case TILEDB_DATETIME_AS: + case TILEDB_INT64: + return comparator( + std::any_cast>(new_domain[0])); + case TILEDB_FLOAT32: + return comparator( + std::any_cast>(new_domain[0])); + case TILEDB_FLOAT64: + return comparator( + std::any_cast>(new_domain[0])); + case TILEDB_STRING_ASCII: + case TILEDB_STRING_UTF8: + case TILEDB_CHAR: + case TILEDB_BLOB: + case TILEDB_GEOM_WKT: + case TILEDB_GEOM_WKB: { + auto dom = std::any_cast>(new_domain[0]); + if (dom[0] != "" || dom[1] != "") { + return std::pair( + false, + "domain cannot be set for string index columns: please use " + "(\"\", \"\")"); + } + + return std::pair(true, ""); + } + default: + throw TileDBSOMAError(std::format( + "[SOMADimension][_can_set_current_domain_slot] Unknown dataype " + "{}", + tiledb::impl::type_to_str(dimension.type()))); + } } std::any SOMADimension::_core_domain_slot() const { @@ -288,6 +521,8 @@ std::any SOMADimension::_non_empty_domain_slot(Array& array) const { case TILEDB_STRING_UTF8: case TILEDB_BLOB: case TILEDB_CHAR: + case TILEDB_GEOM_WKB: + case TILEDB_GEOM_WKT: return std::make_any>( array.non_empty_domain_var(dimension.name())); default: @@ -305,6 +540,10 @@ std::any SOMADimension::_core_current_domain_slot( *ctx.tiledb_ctx(), array.schema()); NDRectangle ndrect = current_domain.ndrectangle(); + return _core_current_domain_slot(ndrect); +} + +std::any SOMADimension::_core_current_domain_slot(NDRectangle& ndrect) const { switch (dimension.type()) { case TILEDB_UINT8: { std::array domain = ndrect.range( @@ -382,7 +621,9 @@ std::any SOMADimension::_core_current_domain_slot( case TILEDB_STRING_UTF8: case TILEDB_STRING_ASCII: case TILEDB_CHAR: - case TILEDB_BLOB: { + case TILEDB_BLOB: + case TILEDB_GEOM_WKT: + case TILEDB_GEOM_WKB: { std::array domain = ndrect.range( dimension.name()); return std::make_any>( @@ -451,6 +692,7 @@ ArrowArray* SOMADimension::arrow_domain_slot( return ArrowAdapter::make_arrow_array_child( domain_slot(ctx, array, kind)); case TILEDB_STRING_ASCII: + case TILEDB_STRING_UTF8: case TILEDB_CHAR: case TILEDB_GEOM_WKB: case TILEDB_GEOM_WKT: @@ -471,4 +713,4 @@ ArrowSchema* SOMADimension::arrow_schema_slot(const SOMAContext&, Array&) { .release(); } -} // namespace tiledbsoma +} // namespace tiledbsoma \ No newline at end of file diff --git a/libtiledbsoma/src/soma/soma_dimension.h b/libtiledbsoma/src/soma/soma_dimension.h index 14c5539a37..4c14e85d59 100644 --- a/libtiledbsoma/src/soma/soma_dimension.h +++ b/libtiledbsoma/src/soma/soma_dimension.h @@ -25,7 +25,7 @@ class SOMADimension : public virtual SOMAColumn { : dimension(dimension) { } - virtual inline std::string_view name() const override { + virtual inline std::string name() const override { return dimension.name(); } @@ -88,7 +88,11 @@ class SOMADimension : public virtual SOMAColumn { virtual void _set_current_domain_slot( NDRectangle& rectangle, - const std::vector& domain) const override; + std::span domain) const override; + + virtual std::pair _can_set_current_domain_slot( + std::optional& rectangle, + std::span new_domain) const override; virtual std::any _core_domain_slot() const override; @@ -97,6 +101,9 @@ class SOMADimension : public virtual SOMAColumn { virtual std::any _core_current_domain_slot( const SOMAContext& ctx, Array& array) const override; + virtual std::any _core_current_domain_slot( + NDRectangle& ndrect) const override; + private: Dimension dimension; }; diff --git a/libtiledbsoma/src/soma/soma_geometry_column.cc b/libtiledbsoma/src/soma/soma_geometry_column.cc index 87958add95..a4d836ce31 100644 --- a/libtiledbsoma/src/soma/soma_geometry_column.cc +++ b/libtiledbsoma/src/soma/soma_geometry_column.cc @@ -5,7 +5,6 @@ namespace tiledbsoma { std::shared_ptr SOMAGeometryColumn::create( std::shared_ptr ctx, ArrowSchema* schema, - ArrowArray* array, ArrowSchema* spatial_schema, ArrowArray* spatial_array, const std::string& soma_type, @@ -58,7 +57,7 @@ void SOMAGeometryColumn::_set_dim_points( const std::any& points) const { std::vector> transformed_points = _transform_points( - std::any_cast>>(points)); + std::any_cast>>(points)); auto domain_limits = _limits(ctx, *query->schema()); @@ -109,7 +108,7 @@ void SOMAGeometryColumn::_set_dim_ranges( } void SOMAGeometryColumn::_set_current_domain_slot( - NDRectangle& rectangle, const std::vector& domain) const { + NDRectangle& rectangle, std::span domain) const { if (2 * domain.size() != dimensions.size()) { throw TileDBSOMAError(std::format( "[SOMAGeometryColumn] Dimension - Current Domain mismatch. " @@ -119,18 +118,104 @@ void SOMAGeometryColumn::_set_current_domain_slot( } for (size_t i = 0; i < domain.size(); ++i) { - const auto& dimension = dimensions[i]; - ArrowAdapter::set_current_domain_slot( - dimension.type(), domain[i], rectangle, dimension.name()); + auto dom = std::any_cast>(domain[i]); + rectangle.set_range(dimensions[i].name(), dom[0], dom[1]); } for (size_t i = 0; i < domain.size(); ++i) { - const auto& dimension = dimensions[i + domain.size()]; - ArrowAdapter::set_current_domain_slot( - dimension.type(), domain[i], rectangle, dimension.name()); + auto dom = std::any_cast>(domain[i]); + rectangle.set_range( + dimensions[i + domain.size()].name(), dom[0], dom[1]); } } +std::pair SOMAGeometryColumn::_can_set_current_domain_slot( + std::optional& rectangle, + std::span new_domain) const { + if (new_domain.size() != dimensions.size() / 2) { + throw TileDBSOMAError(std::format( + "[SOMADimension][_can_set_current_domain_slot] Expected domain " + "size is 2, found {}", + new_domain.size())); + } + + for (size_t i = 0; i < new_domain.size(); ++i) { + auto new_dom = std::any_cast>(new_domain[i]); + + if (new_dom[0] > new_dom[1]) { + return std::pair( + false, + std::format( + "index-column name {}: new lower > new upper", + dimensions[i].name())); + } + + auto dimension_min = dimensions[i]; + auto dimension_max = dimensions[i + dimensions.size() / 2]; + + if (rectangle.has_value()) { + auto dom_min = rectangle.value().range( + dimension_min.name()); + auto dom_max = rectangle.value().range( + dimension_max.name()); + + if (new_dom[0] > dom_min[0]) { + return std::pair( + false, + std::format( + "index-column name {}: new lower > old lower (downsize " + "is unsupported)", + dimension_min.name())); + } + if (new_dom[0] > dom_max[0]) { + return std::pair( + false, + std::format( + "index-column name {}: new lower > old lower (downsize " + "is unsupported)", + dimension_max.name())); + } + if (new_dom[1] < dom_min[1]) { + return std::pair( + false, + std::format( + "index-column name {}: new upper < old upper (downsize " + "is unsupported)", + dimension_min.name())); + } + if (new_dom[1] < dom_max[1]) { + return std::pair( + false, + std::format( + "index-column name {}: new upper < old upper (downsize " + "is unsupported)", + dimension_max.name())); + } + } else { + auto dom = std::any_cast< + std::pair, std::vector>>( + _core_domain_slot()); + + if (new_dom[0] > dom.first[i]) { + return std::pair( + false, + std::format( + "index-column name {}: new lower < limit lower", + dimension_min.name())); + } + if (new_dom[1] < dom.second[i]) { + return std::pair( + false, + std::format( + "index-column name {}: new upper > limit upper", + dimension_min.name())); + } + } + } + + return std::pair(true, ""); +} + std::vector> SOMAGeometryColumn::_limits( const SOMAContext& ctx, const ArraySchema& schema) const { std::vector> limits; @@ -178,7 +263,7 @@ SOMAGeometryColumn::_transform_ranges( std::vector> SOMAGeometryColumn::_transform_points( - const std::vector>& points) const { + const std::span>& points) const { if (points.size() != 1) { throw TileDBSOMAError( "Multi points are not supported for geometry dimension"); @@ -228,12 +313,18 @@ std::any SOMAGeometryColumn::_non_empty_domain_slot(Array& array) const { std::any SOMAGeometryColumn::_core_current_domain_slot( const SOMAContext& ctx, Array& array) const { - std::vector min, max; CurrentDomain current_domain = tiledb::ArraySchemaExperimental::current_domain( *ctx.tiledb_ctx(), array.schema()); NDRectangle ndrect = current_domain.ndrectangle(); + return _core_current_domain_slot(ndrect); +} + +std::any SOMAGeometryColumn::_core_current_domain_slot( + NDRectangle& ndrect) const { + std::vector min, max; + for (size_t i = 0; i < dimensions.size() / 2; ++i) { std::array domain = ndrect.range( dimensions[i].name()); diff --git a/libtiledbsoma/src/soma/soma_geometry_column.h b/libtiledbsoma/src/soma/soma_geometry_column.h index e4909323df..d8a702fb46 100644 --- a/libtiledbsoma/src/soma/soma_geometry_column.h +++ b/libtiledbsoma/src/soma/soma_geometry_column.h @@ -18,7 +18,6 @@ class SOMAGeometryColumn : public virtual SOMAColumn { static std::shared_ptr create( std::shared_ptr ctx, ArrowSchema* schema, - ArrowArray* array, ArrowSchema* spatial_schema, ArrowArray* spatial_array, const std::string& soma_type, @@ -29,7 +28,7 @@ class SOMAGeometryColumn : public virtual SOMAColumn { : dimensions(dimensions) , attribute(attribute){}; - virtual inline std::string_view name() const override { + virtual inline std::string name() const override { return SOMA_GEOMETRY_COLUMN_NAME; } @@ -92,7 +91,11 @@ class SOMAGeometryColumn : public virtual SOMAColumn { virtual void _set_current_domain_slot( NDRectangle& rectangle, - const std::vector& domain) const override; + std::span domain) const override; + + virtual std::pair _can_set_current_domain_slot( + std::optional& rectangle, + std::span new_domain) const override; virtual std::any _core_domain_slot() const override; @@ -101,6 +104,9 @@ class SOMAGeometryColumn : public virtual SOMAColumn { virtual std::any _core_current_domain_slot( const SOMAContext& ctx, Array& array) const override; + virtual std::any _core_current_domain_slot( + NDRectangle& ndrect) const override; + private: std::vector dimensions; Attribute attribute; @@ -114,7 +120,7 @@ class SOMAGeometryColumn : public virtual SOMAColumn { const; std::vector> _transform_points( - const std::vector>& points) const; + const std::span>& points) const; }; } // namespace tiledbsoma diff --git a/libtiledbsoma/test/unit_soma_column.cc b/libtiledbsoma/test/unit_soma_column.cc index 6d09d056d5..b6b79e044b 100644 --- a/libtiledbsoma/test/unit_soma_column.cc +++ b/libtiledbsoma/test/unit_soma_column.cc @@ -80,7 +80,7 @@ struct VariouslyIndexedDataFrameFixture { std::string attr_1_arrow_format = ArrowAdapter::tdb_to_arrow_type( str_datatype); - helper::DimInfo i64_dim_info(bool use_current_domain) { + helper::DimInfo i64_dim_info() { return helper::DimInfo( {.name = i64_name, .tiledb_datatype = i64_datatype, @@ -88,7 +88,7 @@ struct VariouslyIndexedDataFrameFixture { .string_lo = "N/A", .string_hi = "N/A"}); } - helper::DimInfo u32_dim_info(bool use_current_domain) { + helper::DimInfo u32_dim_info() { return helper::DimInfo( {.name = u32_name, .tiledb_datatype = u32_datatype, @@ -96,8 +96,7 @@ struct VariouslyIndexedDataFrameFixture { .string_lo = "N/A", .string_hi = "N/A"}); } - helper::DimInfo str_dim_info( - bool use_current_domain, std::string string_lo, std::string string_hi) { + helper::DimInfo str_dim_info(std::string string_lo, std::string string_hi) { return helper::DimInfo( {.name = str_name, .tiledb_datatype = str_datatype, @@ -317,7 +316,6 @@ TEST_CASE("SOMAColumn: SOMAGeometryDimension") { auto geometry_column = SOMAGeometryColumn::create( ctx->tiledb_ctx(), geom_columns.second->children[0], - geom_columns.first->children[0], spatial_columns.second.get(), spatial_columns.first.get(), "SOMAGeometryDataFrame", @@ -350,10 +348,10 @@ TEST_CASE_METHOD( std::make_shared(), "mem://unit-test-column-variant-indexed-dataframe-4-" + suffix1); - std::string string_lo = specify_domain ? "apple" : ""; - std::string string_hi = specify_domain ? "zebra" : ""; + std::string string_lo = ""; + std::string string_hi = ""; std::vector dim_infos( - {str_dim_info(true, string_lo, string_hi), u32_dim_info(true)}); + {str_dim_info(string_lo, string_hi), u32_dim_info()}); std::vector attr_infos({i64_attr_info()}); // Create @@ -388,21 +386,16 @@ TEST_CASE_METHOD( str_external = columns[0]->core_current_domain_slot( *ctx_, raw_array); - if (specify_domain) { - REQUIRE(str_range[0] == str_external.first); - REQUIRE(str_range[1] == str_external.second); - } else { - // Can we write empty strings in this range? - REQUIRE(str_range[0] <= ""); - REQUIRE(str_external.first <= ""); - REQUIRE(str_range[1] >= ""); - REQUIRE(str_external.second >= ""); - // Can we write ASCII values in this range? - REQUIRE(str_range[0] < " "); - REQUIRE(str_external.first <= " "); - REQUIRE(str_range[1] > "~"); - // REQUIRE(str_external.second >= "~"); - } + // Can we write empty strings in this range? + REQUIRE(str_range[0] <= ""); + REQUIRE(str_external.first <= ""); + REQUIRE(str_range[1] >= ""); + REQUIRE(str_external.second >= ""); + // Can we write ASCII values in this range? + REQUIRE(str_range[0] < " "); + REQUIRE(str_external.first <= " "); + REQUIRE(str_range[1] > "~"); + // REQUIRE(str_external.second >= "~"); std::array u32_range = ndrect.range( dim_infos[1].name);