Skip to content

Commit

Permalink
[c++] Performant DataFrame.shape
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Aug 29, 2024
1 parent c8d6ca2 commit c875bff
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 5 deletions.
24 changes: 24 additions & 0 deletions libtiledbsoma/src/soma/soma_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1191,6 +1191,30 @@ std::vector<int64_t> SOMAArray::maxshape() {
return _tiledb_domain();
}

std::optional<int64_t> SOMAArray::_shape_slot_if_soma_joinid_dim() {
const std::string dim_name = "soma_joinid";

if (!arr_->schema().domain().has_dimension(dim_name)) {
return std::nullopt;
}

auto current_domain = _get_current_domain();
if (current_domain.is_empty()) {
return std::nullopt;
}

auto t = current_domain.type();
if (t != TILEDB_NDRECTANGLE) {
throw TileDBSOMAError("current_domain type is not NDRECTANGLE");
}

NDRectangle ndrect = current_domain.ndrectangle();

auto range = ndrect.range<int64_t>(dim_name);
auto max = range[1] + 1;
return std::optional<int64_t>(max);
}

std::vector<int64_t> SOMAArray::_tiledb_domain() {
std::vector<int64_t> result;
auto dimensions = mq_->schema()->domain().dimensions();
Expand Down
8 changes: 8 additions & 0 deletions libtiledbsoma/src/soma/soma_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,14 @@ class SOMAArray : public SOMAObject {
*/
std::optional<TimestampRange> timestamp();

protected:
// For use nominally by SOMADataFrame. This could be moved in its entrety to
// SOMADataFrame, but it would entail moving several SOMAArray attributes
// from private to protected, which has knock-on effects on the order of
// constructor initializers, etc.: in total it's simplest to place this
// here, and have SOMADataFrame invoke it.
std::optional<int64_t> _shape_slot_if_soma_joinid_dim();

private:
//===================================================================
//= private non-static
Expand Down
6 changes: 6 additions & 0 deletions libtiledbsoma/src/soma/soma_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,4 +94,10 @@ uint64_t SOMADataFrame::count() {
return this->nnz();
}

std::vector<int64_t> SOMADataFrame::shape() {
std::optional<int64_t> attempt = _shape_slot_if_soma_joinid_dim();
int64_t max = attempt.has_value() ? attempt.value() : this->nnz();
return std::vector<int64_t>({max});
}

} // namespace tiledbsoma
12 changes: 12 additions & 0 deletions libtiledbsoma/src/soma/soma_dataframe.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,19 @@ class SOMADataFrame : public SOMAArray {
* @return int64_t
*/
uint64_t count();

/**
* For DataFrame with default indexing, namely, a single int64_t
* soma_joinid, return the same as SOMAArray. For DataFrame with soma_joinid
* being a dim along with other dims (optional behavior), return the slot
* along that dim. For DataFrame with soma_joinid being an attr, not a dim
* at all, returns nnz().
*
* @return int64_t
*/
std::vector<int64_t> shape();
};

} // namespace tiledbsoma

#endif // SOMA_DATAFRAME
9 changes: 4 additions & 5 deletions libtiledbsoma/test/unit_soma_sparse_ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,10 @@ TEST_CASE("SOMASparseNDArray: basic") {
REQUIRE(soma_sparse->ndim() == 1);
REQUIRE(soma_sparse->nnz() == 0);

if (use_current_domain) {
REQUIRE(soma_sparse->shape() == std::vector<int64_t>{dim_max + 1});
} else {
REQUIRE(
soma_sparse->maxshape() == std::vector<int64_t>{dim_max + 1});
auto expect = std::vector<int64_t>({dim_max + 1});
REQUIRE(soma_sparse->shape() == expect);
if (!use_current_domain) {
REQUIRE(soma_sparse->maxshape() == expect);
}

soma_sparse->close();
Expand Down

0 comments on commit c875bff

Please sign in to comment.