diff --git a/libtiledbsoma/src/soma/soma_array.cc b/libtiledbsoma/src/soma/soma_array.cc index 81738a727b..a873faa7f2 100644 --- a/libtiledbsoma/src/soma/soma_array.cc +++ b/libtiledbsoma/src/soma/soma_array.cc @@ -1196,6 +1196,30 @@ std::vector SOMAArray::maxshape() { return _tiledb_domain(); } +std::optional SOMAArray::_shape_slot_if_soma_joinid_dim() { + const std::string dim_name = "soma_joinid"; + + if (!arr_->schema().domain().has_dimension(dim_name)) { + return std::nullopt; + } + + auto current_domain = _get_current_domain(); + if (current_domain.is_empty()) { + return std::nullopt; + } + + auto t = current_domain.type(); + if (t != TILEDB_NDRECTANGLE) { + throw TileDBSOMAError("current_domain type is not NDRECTANGLE"); + } + + NDRectangle ndrect = current_domain.ndrectangle(); + + auto range = ndrect.range(dim_name); + auto max = range[1] + 1; + return std::optional(max); +} + std::vector SOMAArray::_tiledb_domain() { std::vector result; auto dimensions = mq_->schema()->domain().dimensions(); diff --git a/libtiledbsoma/src/soma/soma_array.h b/libtiledbsoma/src/soma/soma_array.h index fb2256ba21..f74b356af1 100644 --- a/libtiledbsoma/src/soma/soma_array.h +++ b/libtiledbsoma/src/soma/soma_array.h @@ -796,6 +796,14 @@ class SOMAArray : public SOMAObject { */ std::optional timestamp(); + protected: + // For use nominally by SOMADataFrame. This could be moved in its entrety to + // SOMADataFrame, but it would entail moving several SOMAArray attributes + // from private to protected, which has knock-on effects on the order of + // constructor initializers, etc.: in total it's simplest to place this + // here, and have SOMADataFrame invoke it. + std::optional _shape_slot_if_soma_joinid_dim(); + private: //=================================================================== //= private non-static diff --git a/libtiledbsoma/src/soma/soma_dataframe.cc b/libtiledbsoma/src/soma/soma_dataframe.cc index b25d4aab21..431b843c08 100644 --- a/libtiledbsoma/src/soma/soma_dataframe.cc +++ b/libtiledbsoma/src/soma/soma_dataframe.cc @@ -94,4 +94,10 @@ uint64_t SOMADataFrame::count() { return this->nnz(); } +std::vector SOMADataFrame::shape() { + std::optional attempt = _shape_slot_if_soma_joinid_dim(); + int64_t max = attempt.has_value() ? attempt.value() : this->nnz(); + return std::vector({max}); +} + } // namespace tiledbsoma diff --git a/libtiledbsoma/src/soma/soma_dataframe.h b/libtiledbsoma/src/soma/soma_dataframe.h index bc353649d0..55840001cf 100644 --- a/libtiledbsoma/src/soma/soma_dataframe.h +++ b/libtiledbsoma/src/soma/soma_dataframe.h @@ -163,7 +163,19 @@ class SOMADataFrame : public SOMAArray { * @return int64_t */ uint64_t count(); + + /** + * For DataFrame with default indexing, namely, a single int64_t + * soma_joinid, return the same as SOMAArray. For DataFrame with soma_joinid + * being a dim along with other dims (optional behavior), return the slot + * along that dim. For DataFrame with soma_joinid being an attr, not a dim + * at all, returns nnz(). + * + * @return int64_t + */ + std::vector shape(); }; + } // namespace tiledbsoma #endif // SOMA_DATAFRAME