Skip to content

Commit

Permalink
[c++] Performant DataFrame.shape
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Aug 19, 2024
1 parent daecd1b commit 696ab50
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 3 deletions.
24 changes: 24 additions & 0 deletions libtiledbsoma/src/soma/soma_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1196,6 +1196,30 @@ std::vector<int64_t> SOMAArray::maxshape() {
return _tiledb_domain();
}

std::optional<int64_t> SOMAArray::_shape_slot_if_soma_joinid_dim() {
const std::string dim_name = "soma_joinid";

Check warning on line 1200 in libtiledbsoma/src/soma/soma_array.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_array.cc#L1199-L1200

Added lines #L1199 - L1200 were not covered by tests

if (!arr_->schema().domain().has_dimension(dim_name)) {
return std::nullopt;

Check warning on line 1203 in libtiledbsoma/src/soma/soma_array.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_array.cc#L1203

Added line #L1203 was not covered by tests
}

auto current_domain = _get_current_domain();

Check warning on line 1206 in libtiledbsoma/src/soma/soma_array.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_array.cc#L1206

Added line #L1206 was not covered by tests
if (current_domain.is_empty()) {
return std::nullopt;

Check warning on line 1208 in libtiledbsoma/src/soma/soma_array.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_array.cc#L1208

Added line #L1208 was not covered by tests
}

auto t = current_domain.type();

Check warning on line 1211 in libtiledbsoma/src/soma/soma_array.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_array.cc#L1211

Added line #L1211 was not covered by tests
if (t != TILEDB_NDRECTANGLE) {
throw TileDBSOMAError("current_domain type is not NDRECTANGLE");

Check warning on line 1213 in libtiledbsoma/src/soma/soma_array.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_array.cc#L1213

Added line #L1213 was not covered by tests
}

NDRectangle ndrect = current_domain.ndrectangle();

Check warning on line 1216 in libtiledbsoma/src/soma/soma_array.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_array.cc#L1216

Added line #L1216 was not covered by tests

auto range = ndrect.range<int64_t>(dim_name);
auto max = range[1] + 1;
return std::optional<int64_t>(max);

Check warning on line 1220 in libtiledbsoma/src/soma/soma_array.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_array.cc#L1218-L1220

Added lines #L1218 - L1220 were not covered by tests
}

std::vector<int64_t> SOMAArray::_tiledb_domain() {
std::vector<int64_t> result;
auto dimensions = mq_->schema()->domain().dimensions();
Expand Down
8 changes: 8 additions & 0 deletions libtiledbsoma/src/soma/soma_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,14 @@ class SOMAArray : public SOMAObject {
*/
std::optional<TimestampRange> timestamp();

protected:
// For use nominally by SOMADataFrame. This could be moved in its entrety to
// SOMADataFrame, but it would entail moving several SOMAArray attributes
// from private to protected, which has knock-on effects on the order of
// constructor initializers, etc.: in total it's simplest to place this
// here, and have SOMADataFrame invoke it.
std::optional<int64_t> _shape_slot_if_soma_joinid_dim();

private:
//===================================================================
//= private non-static
Expand Down
6 changes: 6 additions & 0 deletions libtiledbsoma/src/soma/soma_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,4 +94,10 @@ uint64_t SOMADataFrame::count() {
return this->nnz();
}

std::vector<int64_t> SOMADataFrame::shape() {
std::optional<int64_t> attempt = _shape_slot_if_soma_joinid_dim();
int64_t max = attempt.has_value() ? attempt.value() : this->nnz();
return std::vector<int64_t>({max});

Check warning on line 100 in libtiledbsoma/src/soma/soma_dataframe.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_dataframe.cc#L97-L100

Added lines #L97 - L100 were not covered by tests
}

} // namespace tiledbsoma
12 changes: 12 additions & 0 deletions libtiledbsoma/src/soma/soma_dataframe.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,19 @@ class SOMADataFrame : public SOMAArray {
* @return int64_t
*/
uint64_t count();

/**
* For DataFrame with default indexing, namely, a single int64_t
* soma_joinid, return the same as SOMAArray. For DataFrame with soma_joinid
* being a dim along with other dims (optional behavior), return the slot
* along that dim. For DataFrame with soma_joinid being an attr, not a dim
* at all, returns nnz().
*
* @return int64_t
*/
std::vector<int64_t> shape();
};

} // namespace tiledbsoma

#endif // SOMA_DATAFRAME
6 changes: 3 additions & 3 deletions libtiledbsoma/test/unit_soma_sparse_ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ TEST_CASE("SOMASparseNDArray: basic") {
REQUIRE(soma_sparse->ndim() == 1);
REQUIRE(soma_sparse->nnz() == 0);

if (use_current_domain) {
REQUIRE(soma_sparse->shape() == std::vector<int64_t>{dim_max + 1});
} else {
auto expect = std::vector<int64_t>{dim_max + 1};
REQUIRE(soma_sparse->shape() == std::vector<int64_t>{dim_max + 1});
if (!use_current_domain) {
REQUIRE(
soma_sparse->maxshape() == std::vector<int64_t>{dim_max + 1});
}
Expand Down

0 comments on commit 696ab50

Please sign in to comment.