Skip to content

Commit

Permalink
[c++] Performant DataFrame.shape
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Aug 20, 2024
1 parent 41c6d96 commit a6cb606
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 10 deletions.
24 changes: 24 additions & 0 deletions libtiledbsoma/src/soma/soma_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1201,6 +1201,30 @@ std::vector<int64_t> SOMAArray::maxshape() {
return _tiledb_domain();
}

std::optional<int64_t> SOMAArray::_shape_slot_if_soma_joinid_dim() {
const std::string dim_name = "soma_joinid";

if (!arr_->schema().domain().has_dimension(dim_name)) {
return std::nullopt;
}

auto current_domain = _get_current_domain();
if (current_domain.is_empty()) {
return std::nullopt;
}

auto t = current_domain.type();
if (t != TILEDB_NDRECTANGLE) {
throw TileDBSOMAError("current_domain type is not NDRECTANGLE");
}

NDRectangle ndrect = current_domain.ndrectangle();

auto range = ndrect.range<int64_t>(dim_name);
auto max = range[1] + 1;
return std::optional<int64_t>(max);
}

std::vector<int64_t> SOMAArray::_tiledb_domain() {
std::vector<int64_t> result;
auto dimensions = mq_->schema()->domain().dimensions();
Expand Down
8 changes: 8 additions & 0 deletions libtiledbsoma/src/soma/soma_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,14 @@ class SOMAArray : public SOMAObject {
*/
std::optional<TimestampRange> timestamp();

protected:
// For use nominally by SOMADataFrame. This could be moved in its entrety to
// SOMADataFrame, but it would entail moving several SOMAArray attributes
// from private to protected, which has knock-on effects on the order of
// constructor initializers, etc.: in total it's simplest to place this
// here, and have SOMADataFrame invoke it.
std::optional<int64_t> _shape_slot_if_soma_joinid_dim();

private:
//===================================================================
//= private non-static
Expand Down
6 changes: 6 additions & 0 deletions libtiledbsoma/src/soma/soma_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,4 +94,10 @@ uint64_t SOMADataFrame::count() {
return this->nnz();
}

std::vector<int64_t> SOMADataFrame::shape() {
std::optional<int64_t> attempt = _shape_slot_if_soma_joinid_dim();
int64_t max = attempt.has_value() ? attempt.value() : this->nnz();
return std::vector<int64_t>({max});
}

} // namespace tiledbsoma
12 changes: 12 additions & 0 deletions libtiledbsoma/src/soma/soma_dataframe.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,19 @@ class SOMADataFrame : public SOMAArray {
* @return int64_t
*/
uint64_t count();

/**
* For DataFrame with default indexing, namely, a single int64_t
* soma_joinid, return the same as SOMAArray. For DataFrame with soma_joinid
* being a dim along with other dims (optional behavior), return the slot
* along that dim. For DataFrame with soma_joinid being an attr, not a dim
* at all, returns nnz().
*
* @return int64_t
*/
std::vector<int64_t> shape();
};

} // namespace tiledbsoma

#endif // SOMA_DATAFRAME
9 changes: 4 additions & 5 deletions libtiledbsoma/test/unit_soma_collection.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,7 @@ TEST_CASE("SOMACollection: add SOMADenseNDArray") {
std::string sub_uri = "mem://unit-test-add-dense-ndarray/sub";

SOMACollection::create(base_uri, ctx, ts);
<<<<<<< HEAD
auto index_columns = helper::create_column_index_info(DIM_MAX);
=======
auto index_columns = helper::create_column_index_info(DIM_MAX, false);
auto schema = helper::create_schema(*ctx->tiledb_ctx(), DIM_MAX, true);
>>>>>>> a9030352 ([c++] Arrow utils with current-domain option [WIP])

std::map<std::string, SOMAGroupEntry> expected_map{
{"dense_ndarray", SOMAGroupEntry(sub_uri, "SOMAArray")}};
Expand Down Expand Up @@ -185,6 +180,10 @@ TEST_CASE("SOMACollection: add SOMACollection") {
std::string sub_uri = "mem://unit-test-add-collection/sub";

SOMACollection::create(base_uri, ctx);
<<<<<<< HEAD
=======
auto schema = helper::create_schema(*ctx->tiledb_ctx(), DIM_MAX, false);
>>>>>>> 0f53723b ([c++] Performant DataFrame.shape)

std::map<std::string, SOMAGroupEntry> expected_map{
{"subcollection", SOMAGroupEntry(sub_uri, "SOMAGroup")}};
Expand Down
9 changes: 4 additions & 5 deletions libtiledbsoma/test/unit_soma_sparse_ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,10 @@ TEST_CASE("SOMASparseNDArray: basic") {
REQUIRE(soma_sparse->ndim() == 1);
REQUIRE(soma_sparse->nnz() == 0);

if (use_current_domain) {
REQUIRE(soma_sparse->shape() == std::vector<int64_t>{dim_max + 1});
} else {
REQUIRE(
soma_sparse->maxshape() == std::vector<int64_t>{dim_max + 1});
auto expect = std::vector<int64_t>{dim_max + 1};
REQUIRE(soma_sparse->shape() == expect;
if (!use_current_domain) {
REQUIRE(soma_sparse->maxshape() == expect;
}

soma_sparse->close();
Expand Down

0 comments on commit a6cb606

Please sign in to comment.