Skip to content

Commit

Permalink
[r] Add check_only support for domain/shape updates (#3400)
Browse files Browse the repository at this point in the history
* Add `check_only` flag to shape/domain upgraders/resizers, for parity with Python

* unit-test coverage

* Apply suggestions from code review

* more code-review feedback

* more code-review feedback

* code-review feedback [skip ci]

Co-authored-by: Paul Hoffman <[email protected]>

* apis/r/DESCRIPTION apis/r/NEWS.md

---------

Co-authored-by: Paul Hoffman <[email protected]>
  • Loading branch information
johnkerl and mojaveazure authored Dec 5, 2024
1 parent ff8c19d commit 8af4046
Show file tree
Hide file tree
Showing 8 changed files with 161 additions and 56 deletions.
2 changes: 1 addition & 1 deletion apis/r/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Description: Interface for working with 'TileDB'-based Stack of Matrices,
like those commonly used for single cell data analysis. It is documented at
<https://github.com/single-cell-data>; a formal specification available is at
<https://github.com/single-cell-data/SOMA/blob/main/abstract_specification.md>.
Version: 1.15.99.19
Version: 1.15.99.20
Authors@R: c(
person(given = "Aaron", family = "Wolen",
role = c("cre", "aut"), email = "[email protected]",
Expand Down
1 change: 1 addition & 0 deletions apis/r/NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
* Update docstrings for `domain` argument to `create` [#3396](https://github.com/single-cell-data/TileDB-SOMA/pull/3396)
* Vignette for new-shape feature [#3302](https://github.com/single-cell-data/TileDB-SOMA/pull/3302)
* Fix blockwise iterator + re-indexer to return re-indexed shape instead of full domain
* Add `check_only` support for domain/shape updates [#3400](https://github.com/single-cell-data/TileDB-SOMA/pull/3400)

# tiledbsoma 1.14.5

Expand Down
12 changes: 6 additions & 6 deletions apis/r/R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -238,20 +238,20 @@ c_schema <- function(uri, ctxxp) {
.Call(`_tiledbsoma_c_schema`, uri, ctxxp)
}

resize <- function(uri, new_shape, function_name_for_messages, ctxxp) {
invisible(.Call(`_tiledbsoma_resize`, uri, new_shape, function_name_for_messages, ctxxp))
resize <- function(uri, new_shape, function_name_for_messages, check_only, ctxxp) {
.Call(`_tiledbsoma_resize`, uri, new_shape, function_name_for_messages, check_only, ctxxp)
}

resize_soma_joinid_shape <- function(uri, new_shape, function_name_for_messages, ctxxp) {
invisible(.Call(`_tiledbsoma_resize_soma_joinid_shape`, uri, new_shape, function_name_for_messages, ctxxp))
}

tiledbsoma_upgrade_shape <- function(uri, new_shape, function_name_for_messages, ctxxp) {
invisible(.Call(`_tiledbsoma_tiledbsoma_upgrade_shape`, uri, new_shape, function_name_for_messages, ctxxp))
tiledbsoma_upgrade_shape <- function(uri, new_shape, function_name_for_messages, check_only, ctxxp) {
.Call(`_tiledbsoma_tiledbsoma_upgrade_shape`, uri, new_shape, function_name_for_messages, check_only, ctxxp)
}

upgrade_or_change_domain <- function(uri, is_change_domain, nadimap, nadimsp, function_name_for_messages, ctxxp) {
invisible(.Call(`_tiledbsoma_upgrade_or_change_domain`, uri, is_change_domain, nadimap, nadimsp, function_name_for_messages, ctxxp))
upgrade_or_change_domain <- function(uri, is_change_domain, nadimap, nadimsp, function_name_for_messages, check_only, ctxxp) {
.Call(`_tiledbsoma_upgrade_or_change_domain`, uri, is_change_domain, nadimap, nadimsp, function_name_for_messages, check_only, ctxxp)
}

c_update_dataframe_schema <- function(uri, ctxxp, column_names_to_drop, add_cols_types, add_cols_enum_value_types, add_cols_enum_ordered) {
Expand Down
54 changes: 36 additions & 18 deletions apis/r/R/SOMADataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -472,26 +472,35 @@ SOMADataFrame <- R6::R6Class(
#' @param new_domain A named list, keyed by index-column name, with values
#' being two-element vectors containing the desired lower and upper bounds
#' for the domain.
#' @return No return value
tiledbsoma_upgrade_domain = function(new_domain) {
# stopifnot("'new_domain' must be CODE ME UP PLZ" = ...
#' @param check_only If true, does not apply the operation, but only reports
#' whether it would have succeeded.
#' @return No return value if `check_only` is `FALSE`. If `check_only` is `TRUE`,
#' returns the empty string if no error is detected, else a description of the error.
tiledbsoma_upgrade_domain = function(new_domain, check_only = FALSE) {
# Checking slotwise new shape >= old shape, and <= max_shape, is already
# done in libtiledbsoma

pyarrow_domain_table <- private$upgrade_or_change_domain_helper(
new_domain, "tiledbsoma_upgrade_domain"
)

invisible(
upgrade_or_change_domain(
self$uri,
FALSE,
pyarrow_domain_table$array,
pyarrow_domain_table$schema,
.name_of_function(),
private$.soma_context
)
reason_string <- upgrade_or_change_domain(
self$uri,
FALSE,
pyarrow_domain_table$array,
pyarrow_domain_table$schema,
.name_of_function(),
check_only,
private$.soma_context
)

if (isTRUE(check_only)) {
return(reason_string)
}

# The return value from upgrade_or_change_domain without check_only is
# always "", or it raises an error trying.
return(invisible(NULL))
},

#' @description Allows you to set the domain of a `SOMADataFrame`, when the
Expand All @@ -506,26 +515,35 @@ SOMADataFrame <- R6::R6Class(
#' @param new_domain A named list, keyed by index-column name, with values
#' being two-element vectors containing the desired lower and upper bounds
#' for the domain.
#' @return No return value
change_domain = function(new_domain) {
# stopifnot("'new_domain' must be CODE ME UP PLZ" = ...
#' @param check_only If true, does not apply the operation, but only reports
#' whether it would have succeeded.
#' @return No return value if `check_only` is `FALSE`. If `check_only` is `TRUE`,
#' returns the empty string if no error is detected, else a description of the error.
change_domain = function(new_domain, check_only = FALSE) {
# Checking slotwise new shape >= old shape, and <= max_shape, is already
# done in libtiledbsoma

pyarrow_domain_table <- private$upgrade_or_change_domain_helper(
new_domain, tiledbsoma_upgrade_domain
)

invisible(
upgrade_or_change_domain(
reason_string <- upgrade_or_change_domain(
self$uri,
TRUE,
pyarrow_domain_table$array,
pyarrow_domain_table$schema,
.name_of_function(),
check_only,
private$.soma_context
)
)

if (isTRUE(check_only)) {
return(reason_string)
}

# The return value from upgrade_or_change_domain without check_only is
# always "", or it raises an error trying.
return(invisible(NULL))
}
),
private = list(
Expand Down
35 changes: 29 additions & 6 deletions apis/r/R/SOMANDArrayBase.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,15 +102,27 @@ SOMANDArrayBase <- R6::R6Class(
#' doesn't already have a shape: in that case please call
#' `tiledbsoma_upgrade_shape`. (lifecycle: maturing)
#' @param new_shape A vector of integerish, of the same length as the array's `ndim`.
#' @return No return value
resize = function(new_shape) {
#' @param check_only If true, does not apply the operation, but only reports
#' whether it would have succeeded.
#' @return No return value if `check_only` is `FALSE`. If `check_only` is `TRUE`,
#' returns the empty string if no error is detected, else a description of the error.
resize = function(new_shape, check_only = FALSE) {
stopifnot(
"'new_shape' must be a vector of integerish values, of the same length as maxshape" =
rlang::is_integerish(new_shape, n = self$ndim()) ||
(bit64::is.integer64(new_shape) && length(new_shape) == self$ndim())
)
# Checking slotwise new shape >= old shape, and <= max_shape, is already done in libtiledbsoma
resize(self$uri, new_shape, .name_of_function(), private$.soma_context)

reason_string <- resize(self$uri, new_shape, .name_of_function(), check_only, private$.soma_context)

if (isTRUE(check_only)) {
return(reason_string)
}

# The return value from resize without check_only is always "", or it
# raises an error trying.
return(invisible(NULL))
},

#' @description Allows the array to have a resizeable shape as described in the
Expand All @@ -121,15 +133,26 @@ SOMANDArrayBase <- R6::R6Class(
#' be used for subsequent resizes on any array which already has upgraded shape.
#' (lifecycle: maturing)
#' @param shape A vector of integerish, of the same length as the array's `ndim`.
#' @return No return value
tiledbsoma_upgrade_shape = function(shape) {
#' @param check_only If true, does not apply the operation, but only reports
#' whether it would have succeeded.
#' @return No return value if `check_only` is `FALSE`. If `check_only` is `TRUE`,
#' returns the empty string if no error is detected, else a description of the error.
tiledbsoma_upgrade_shape = function(shape, check_only = FALSE) {
stopifnot(
"'shape' must be a vector of integerish values, of the same length as maxshape" =
rlang::is_integerish(shape, n = self$ndim()) ||
(bit64::is.integer64(shape) && length(shape) == self$ndim())
)
# Checking slotwise new shape >= old shape, and <= max_shape, is already done in libtiledbsoma
tiledbsoma_upgrade_shape(self$uri, shape, .name_of_function(), private$.soma_context)

reason_string <- tiledbsoma_upgrade_shape(self$uri, shape, .name_of_function(), check_only, private$.soma_context)
if (isTRUE(check_only)) {
return(reason_string)
}

# The return value from tiledbsoma_upgrade_shape without check_only is
# always "", or it raises an error trying.
return(invisible(NULL))
}
),
private = list(
Expand Down
36 changes: 21 additions & 15 deletions apis/r/src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -553,16 +553,18 @@ BEGIN_RCPP
END_RCPP
}
// resize
void resize(const std::string& uri, Rcpp::NumericVector new_shape, std::string function_name_for_messages, Rcpp::XPtr<somactx_wrap_t> ctxxp);
RcppExport SEXP _tiledbsoma_resize(SEXP uriSEXP, SEXP new_shapeSEXP, SEXP function_name_for_messagesSEXP, SEXP ctxxpSEXP) {
std::string resize(const std::string& uri, Rcpp::NumericVector new_shape, std::string function_name_for_messages, bool check_only, Rcpp::XPtr<somactx_wrap_t> ctxxp);
RcppExport SEXP _tiledbsoma_resize(SEXP uriSEXP, SEXP new_shapeSEXP, SEXP function_name_for_messagesSEXP, SEXP check_onlySEXP, SEXP ctxxpSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const std::string& >::type uri(uriSEXP);
Rcpp::traits::input_parameter< Rcpp::NumericVector >::type new_shape(new_shapeSEXP);
Rcpp::traits::input_parameter< std::string >::type function_name_for_messages(function_name_for_messagesSEXP);
Rcpp::traits::input_parameter< bool >::type check_only(check_onlySEXP);
Rcpp::traits::input_parameter< Rcpp::XPtr<somactx_wrap_t> >::type ctxxp(ctxxpSEXP);
resize(uri, new_shape, function_name_for_messages, ctxxp);
return R_NilValue;
rcpp_result_gen = Rcpp::wrap(resize(uri, new_shape, function_name_for_messages, check_only, ctxxp));
return rcpp_result_gen;
END_RCPP
}
// resize_soma_joinid_shape
Expand All @@ -579,31 +581,35 @@ BEGIN_RCPP
END_RCPP
}
// tiledbsoma_upgrade_shape
void tiledbsoma_upgrade_shape(const std::string& uri, Rcpp::NumericVector new_shape, std::string function_name_for_messages, Rcpp::XPtr<somactx_wrap_t> ctxxp);
RcppExport SEXP _tiledbsoma_tiledbsoma_upgrade_shape(SEXP uriSEXP, SEXP new_shapeSEXP, SEXP function_name_for_messagesSEXP, SEXP ctxxpSEXP) {
std::string tiledbsoma_upgrade_shape(const std::string& uri, Rcpp::NumericVector new_shape, std::string function_name_for_messages, bool check_only, Rcpp::XPtr<somactx_wrap_t> ctxxp);
RcppExport SEXP _tiledbsoma_tiledbsoma_upgrade_shape(SEXP uriSEXP, SEXP new_shapeSEXP, SEXP function_name_for_messagesSEXP, SEXP check_onlySEXP, SEXP ctxxpSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const std::string& >::type uri(uriSEXP);
Rcpp::traits::input_parameter< Rcpp::NumericVector >::type new_shape(new_shapeSEXP);
Rcpp::traits::input_parameter< std::string >::type function_name_for_messages(function_name_for_messagesSEXP);
Rcpp::traits::input_parameter< bool >::type check_only(check_onlySEXP);
Rcpp::traits::input_parameter< Rcpp::XPtr<somactx_wrap_t> >::type ctxxp(ctxxpSEXP);
tiledbsoma_upgrade_shape(uri, new_shape, function_name_for_messages, ctxxp);
return R_NilValue;
rcpp_result_gen = Rcpp::wrap(tiledbsoma_upgrade_shape(uri, new_shape, function_name_for_messages, check_only, ctxxp));
return rcpp_result_gen;
END_RCPP
}
// upgrade_or_change_domain
void upgrade_or_change_domain(const std::string& uri, bool is_change_domain, naxpArray nadimap, naxpSchema nadimsp, std::string function_name_for_messages, Rcpp::XPtr<somactx_wrap_t> ctxxp);
RcppExport SEXP _tiledbsoma_upgrade_or_change_domain(SEXP uriSEXP, SEXP is_change_domainSEXP, SEXP nadimapSEXP, SEXP nadimspSEXP, SEXP function_name_for_messagesSEXP, SEXP ctxxpSEXP) {
std::string upgrade_or_change_domain(const std::string& uri, bool is_change_domain, naxpArray nadimap, naxpSchema nadimsp, std::string function_name_for_messages, bool check_only, Rcpp::XPtr<somactx_wrap_t> ctxxp);
RcppExport SEXP _tiledbsoma_upgrade_or_change_domain(SEXP uriSEXP, SEXP is_change_domainSEXP, SEXP nadimapSEXP, SEXP nadimspSEXP, SEXP function_name_for_messagesSEXP, SEXP check_onlySEXP, SEXP ctxxpSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const std::string& >::type uri(uriSEXP);
Rcpp::traits::input_parameter< bool >::type is_change_domain(is_change_domainSEXP);
Rcpp::traits::input_parameter< naxpArray >::type nadimap(nadimapSEXP);
Rcpp::traits::input_parameter< naxpSchema >::type nadimsp(nadimspSEXP);
Rcpp::traits::input_parameter< std::string >::type function_name_for_messages(function_name_for_messagesSEXP);
Rcpp::traits::input_parameter< bool >::type check_only(check_onlySEXP);
Rcpp::traits::input_parameter< Rcpp::XPtr<somactx_wrap_t> >::type ctxxp(ctxxpSEXP);
upgrade_or_change_domain(uri, is_change_domain, nadimap, nadimsp, function_name_for_messages, ctxxp);
return R_NilValue;
rcpp_result_gen = Rcpp::wrap(upgrade_or_change_domain(uri, is_change_domain, nadimap, nadimsp, function_name_for_messages, check_only, ctxxp));
return rcpp_result_gen;
END_RCPP
}
// c_update_dataframe_schema
Expand Down Expand Up @@ -834,10 +840,10 @@ static const R_CallMethodDef CallEntries[] = {
{"_tiledbsoma_c_dimnames", (DL_FUNC) &_tiledbsoma_c_dimnames, 2},
{"_tiledbsoma_c_attrnames", (DL_FUNC) &_tiledbsoma_c_attrnames, 2},
{"_tiledbsoma_c_schema", (DL_FUNC) &_tiledbsoma_c_schema, 2},
{"_tiledbsoma_resize", (DL_FUNC) &_tiledbsoma_resize, 4},
{"_tiledbsoma_resize", (DL_FUNC) &_tiledbsoma_resize, 5},
{"_tiledbsoma_resize_soma_joinid_shape", (DL_FUNC) &_tiledbsoma_resize_soma_joinid_shape, 4},
{"_tiledbsoma_tiledbsoma_upgrade_shape", (DL_FUNC) &_tiledbsoma_tiledbsoma_upgrade_shape, 4},
{"_tiledbsoma_upgrade_or_change_domain", (DL_FUNC) &_tiledbsoma_upgrade_or_change_domain, 6},
{"_tiledbsoma_tiledbsoma_upgrade_shape", (DL_FUNC) &_tiledbsoma_tiledbsoma_upgrade_shape, 5},
{"_tiledbsoma_upgrade_or_change_domain", (DL_FUNC) &_tiledbsoma_upgrade_or_change_domain, 7},
{"_tiledbsoma_c_update_dataframe_schema", (DL_FUNC) &_tiledbsoma_c_update_dataframe_schema, 6},
{"_tiledbsoma_sr_setup", (DL_FUNC) &_tiledbsoma_sr_setup, 10},
{"_tiledbsoma_sr_complete", (DL_FUNC) &_tiledbsoma_sr_complete, 1},
Expand Down
52 changes: 44 additions & 8 deletions apis/r/src/rinterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -418,18 +418,29 @@ SEXP c_schema(const std::string& uri, Rcpp::XPtr<somactx_wrap_t> ctxxp) {
}

// [[Rcpp::export]]
void resize(
std::string resize(
const std::string& uri,
Rcpp::NumericVector new_shape,
std::string function_name_for_messages,
bool check_only,
Rcpp::XPtr<somactx_wrap_t> ctxxp) {
// This function is solely for SparseNDArray and DenseNDArray for which the
// dims are required by the SOMA spec to be of type int64. Domain-resize for
// variant-indexed dataframes is via upgrade_domain and change_domain.
auto sr = tdbs::SOMAArray::open(OpenMode::write, uri, ctxxp->ctxptr);
std::vector<int64_t> new_shape_i64 = i64_from_rcpp_numeric(new_shape);
sr->resize(new_shape_i64, function_name_for_messages);

std::string retval = "";
if (check_only) {
auto status_and_reason = sr->can_resize(
new_shape_i64, function_name_for_messages);
retval = status_and_reason.second;
} else {
sr->resize(new_shape_i64, function_name_for_messages);
}

sr->close();
return retval;
}

// [[Rcpp::export]]
Expand All @@ -446,29 +457,40 @@ void resize_soma_joinid_shape(
}

// [[Rcpp::export]]
void tiledbsoma_upgrade_shape(
std::string tiledbsoma_upgrade_shape(
const std::string& uri,
Rcpp::NumericVector new_shape,
std::string function_name_for_messages,
bool check_only,
Rcpp::XPtr<somactx_wrap_t> ctxxp) {
// This function is solely for SparseNDArray and DenseNDArray for which the
// dims are required by the SOMA spec to be of type int64. Domain-resize for
// variant-indexed dataframes is via upgrade_domain and change_domain.
auto sr = tdbs::SOMAArray::open(OpenMode::write, uri, ctxxp->ctxptr);
std::vector<int64_t> new_shape_i64 = i64_from_rcpp_numeric(new_shape);
sr->upgrade_shape(new_shape_i64, function_name_for_messages);

std::string retval = "";
if (check_only) {
auto status_and_reason = sr->can_upgrade_shape(
new_shape_i64, function_name_for_messages);
retval = status_and_reason.second;
} else {
sr->upgrade_shape(new_shape_i64, function_name_for_messages);
}

sr->close();
return retval;
}

// [[Rcpp::export]]
void upgrade_or_change_domain(
std::string upgrade_or_change_domain(
const std::string& uri,
bool is_change_domain,
naxpArray nadimap,
naxpSchema nadimsp,
std::string function_name_for_messages,
bool check_only,
Rcpp::XPtr<somactx_wrap_t> ctxxp) {

// This is pointer manipulation from R -> Rcpp SEXP -> libtiledbsoma:
nanoarrow::UniqueArray apdim{nanoarrow_array_from_xptr(nadimap)};
nanoarrow::UniqueSchema spdim{nanoarrow_schema_from_xptr(nadimsp)};
Expand All @@ -482,13 +504,27 @@ void upgrade_or_change_domain(
tdbs::ArrowTable arrow_table(std::move(dimarr), std::move(dimsch));

// Now call libtiledbsoma
std::string reason_string = "";
auto sr = tdbs::SOMADataFrame::open(uri, OpenMode::write, ctxxp->ctxptr);
if (is_change_domain) {
sr->change_domain(arrow_table, function_name_for_messages);
if (check_only) {
auto status_and_reason = sr->can_change_domain(
arrow_table, function_name_for_messages);
reason_string = status_and_reason.second;
} else {
sr->change_domain(arrow_table, function_name_for_messages);
}
} else {
sr->upgrade_domain(arrow_table, function_name_for_messages);
if (check_only) {
auto status_and_reason = sr->can_upgrade_domain(
arrow_table, function_name_for_messages);
reason_string = status_and_reason.second;
} else {
sr->upgrade_domain(arrow_table, function_name_for_messages);
}
}
sr->close();
return reason_string;
}

// [[Rcpp::export]]
Expand Down
Loading

0 comments on commit 8af4046

Please sign in to comment.