Skip to content

Commit

Permalink
[c++] Performant DataFrame.shape
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Aug 19, 2024
1 parent daecd1b commit 21155f6
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 0 deletions.
24 changes: 24 additions & 0 deletions libtiledbsoma/src/soma/soma_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1196,6 +1196,30 @@ std::vector<int64_t> SOMAArray::maxshape() {
return _tiledb_domain();
}

std::optional<int64_t> SOMAArray::_shape_slot_if_soma_joinid_dim() {
const std::string dim_name = "soma_joinid";

if (!arr_->schema().domain().has_dimension(dim_name)) {
return std::nullopt;
}

auto current_domain = _get_current_domain();
if (current_domain.is_empty()) {
return std::nullopt;
}

auto t = current_domain.type();
if (t != TILEDB_NDRECTANGLE) {
throw TileDBSOMAError("current_domain type is not NDRECTANGLE");
}

NDRectangle ndrect = current_domain.ndrectangle();

auto range = ndrect.range<int64_t>(dim_name);
auto max = range[1] + 1;
return std::optional<int64_t>(max);
}

std::vector<int64_t> SOMAArray::_tiledb_domain() {
std::vector<int64_t> result;
auto dimensions = mq_->schema()->domain().dimensions();
Expand Down
8 changes: 8 additions & 0 deletions libtiledbsoma/src/soma/soma_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,14 @@ class SOMAArray : public SOMAObject {
*/
std::optional<TimestampRange> timestamp();

protected:
// For use nominally by SOMADataFrame. This could be moved in its entrety to
// SOMADataFrame, but it would entail moving several SOMAArray attributes
// from private to protected, which has knock-on effects on the order of
// constructor initializers, etc.: in total it's simplest to place this
// here, and have SOMADataFrame invoke it.
std::optional<int64_t> _shape_slot_if_soma_joinid_dim();

private:
//===================================================================
//= private non-static
Expand Down
6 changes: 6 additions & 0 deletions libtiledbsoma/src/soma/soma_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,4 +94,10 @@ uint64_t SOMADataFrame::count() {
return this->nnz();
}

std::vector<int64_t> SOMADataFrame::shape() {
std::optional<int64_t> attempt = _shape_slot_if_soma_joinid_dim();
int64_t max = attempt.has_value() ? attempt.value() : this->nnz();
return std::vector<int64_t>({max});
}

} // namespace tiledbsoma
12 changes: 12 additions & 0 deletions libtiledbsoma/src/soma/soma_dataframe.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,19 @@ class SOMADataFrame : public SOMAArray {
* @return int64_t
*/
uint64_t count();

/**
* For DataFrame with default indexing, namely, a single int64_t
* soma_joinid, return the same as SOMAArray. For DataFrame with soma_joinid
* being a dim along with other dims (optional behavior), return the slot
* along that dim. For DataFrame with soma_joinid being an attr, not a dim
* at all, returns nnz().
*
* @return int64_t
*/
std::vector<int64_t> shape();
};

} // namespace tiledbsoma

#endif // SOMA_DATAFRAME

0 comments on commit 21155f6

Please sign in to comment.