Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[r] Add check_only support for domain/shape updates #3400

Merged
merged 8 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apis/r/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Description: Interface for working with 'TileDB'-based Stack of Matrices,
like those commonly used for single cell data analysis. It is documented at
<https://github.com/single-cell-data>; a formal specification available is at
<https://github.com/single-cell-data/SOMA/blob/main/abstract_specification.md>.
Version: 1.15.99.19
Version: 1.15.99.20
Authors@R: c(
person(given = "Aaron", family = "Wolen",
role = c("cre", "aut"), email = "[email protected]",
Expand Down
1 change: 1 addition & 0 deletions apis/r/NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
* Update docstrings for `domain` argument to `create` [#3396](https://github.com/single-cell-data/TileDB-SOMA/pull/3396)
* Vignette for new-shape feature [#3302](https://github.com/single-cell-data/TileDB-SOMA/pull/3302)
* Fix blockwise iterator + re-indexer to return re-indexed shape instead of full domain
* Add `check_only` support for domain/shape updates [#3400](https://github.com/single-cell-data/TileDB-SOMA/pull/3400)

# tiledbsoma 1.14.5

Expand Down
12 changes: 6 additions & 6 deletions apis/r/R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -238,20 +238,20 @@ c_schema <- function(uri, ctxxp) {
.Call(`_tiledbsoma_c_schema`, uri, ctxxp)
}

resize <- function(uri, new_shape, function_name_for_messages, ctxxp) {
invisible(.Call(`_tiledbsoma_resize`, uri, new_shape, function_name_for_messages, ctxxp))
resize <- function(uri, new_shape, function_name_for_messages, check_only, ctxxp) {
.Call(`_tiledbsoma_resize`, uri, new_shape, function_name_for_messages, check_only, ctxxp)
}

resize_soma_joinid_shape <- function(uri, new_shape, function_name_for_messages, ctxxp) {
invisible(.Call(`_tiledbsoma_resize_soma_joinid_shape`, uri, new_shape, function_name_for_messages, ctxxp))
}

tiledbsoma_upgrade_shape <- function(uri, new_shape, function_name_for_messages, ctxxp) {
invisible(.Call(`_tiledbsoma_tiledbsoma_upgrade_shape`, uri, new_shape, function_name_for_messages, ctxxp))
tiledbsoma_upgrade_shape <- function(uri, new_shape, function_name_for_messages, check_only, ctxxp) {
.Call(`_tiledbsoma_tiledbsoma_upgrade_shape`, uri, new_shape, function_name_for_messages, check_only, ctxxp)
}

upgrade_or_change_domain <- function(uri, is_change_domain, nadimap, nadimsp, function_name_for_messages, ctxxp) {
invisible(.Call(`_tiledbsoma_upgrade_or_change_domain`, uri, is_change_domain, nadimap, nadimsp, function_name_for_messages, ctxxp))
upgrade_or_change_domain <- function(uri, is_change_domain, nadimap, nadimsp, function_name_for_messages, check_only, ctxxp) {
.Call(`_tiledbsoma_upgrade_or_change_domain`, uri, is_change_domain, nadimap, nadimsp, function_name_for_messages, check_only, ctxxp)
}

c_update_dataframe_schema <- function(uri, ctxxp, column_names_to_drop, add_cols_types, add_cols_enum_value_types, add_cols_enum_ordered) {
Expand Down
54 changes: 36 additions & 18 deletions apis/r/R/SOMADataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -472,26 +472,35 @@ SOMADataFrame <- R6::R6Class(
#' @param new_domain A named list, keyed by index-column name, with values
#' being two-element vectors containing the desired lower and upper bounds
#' for the domain.
#' @return No return value
tiledbsoma_upgrade_domain = function(new_domain) {
# stopifnot("'new_domain' must be CODE ME UP PLZ" = ...
#' @param check_only If true, does not apply the operation, but only reports
#' whether it would have succeeded.
#' @return No return value if `check_only` is `FALSE`. If `check_only` is `TRUE`,
#' returns the empty string if no error is detected, else a description of the error.
johnkerl marked this conversation as resolved.
Show resolved Hide resolved
tiledbsoma_upgrade_domain = function(new_domain, check_only = FALSE) {
# Checking slotwise new shape >= old shape, and <= max_shape, is already
# done in libtiledbsoma

pyarrow_domain_table <- private$upgrade_or_change_domain_helper(
new_domain, "tiledbsoma_upgrade_domain"
)

invisible(
upgrade_or_change_domain(
self$uri,
FALSE,
pyarrow_domain_table$array,
pyarrow_domain_table$schema,
.name_of_function(),
private$.soma_context
)
reason_string <- upgrade_or_change_domain(
self$uri,
FALSE,
pyarrow_domain_table$array,
pyarrow_domain_table$schema,
.name_of_function(),
check_only,
private$.soma_context
)

if (isTRUE(check_only)) {
return(reason_string)
}

# The return value from upgrade_or_change_domain without check_only is
# always "", or it raises an error trying.
return(invisible(NULL))
},

#' @description Allows you to set the domain of a `SOMADataFrame`, when the
Expand All @@ -506,26 +515,35 @@ SOMADataFrame <- R6::R6Class(
#' @param new_domain A named list, keyed by index-column name, with values
#' being two-element vectors containing the desired lower and upper bounds
#' for the domain.
#' @return No return value
change_domain = function(new_domain) {
# stopifnot("'new_domain' must be CODE ME UP PLZ" = ...
#' @param check_only If true, does not apply the operation, but only reports
#' whether it would have succeeded.
#' @return No return value if `check_only` is `FALSE`. If `check_only` is `TRUE`,
#' returns the empty string if no error is detected, else a description of the error.
johnkerl marked this conversation as resolved.
Show resolved Hide resolved
change_domain = function(new_domain, check_only = FALSE) {
# Checking slotwise new shape >= old shape, and <= max_shape, is already
# done in libtiledbsoma

pyarrow_domain_table <- private$upgrade_or_change_domain_helper(
new_domain, tiledbsoma_upgrade_domain
)

invisible(
upgrade_or_change_domain(
reason_string <- upgrade_or_change_domain(
self$uri,
TRUE,
pyarrow_domain_table$array,
pyarrow_domain_table$schema,
.name_of_function(),
check_only,
private$.soma_context
)
)

if (isTRUE(check_only)) {
return(reason_string)
}

# The return value from upgrade_or_change_domain without check_only is
# always "", or it raises an error trying.
return(invisible(NULL))
}
),
private = list(
Expand Down
35 changes: 29 additions & 6 deletions apis/r/R/SOMANDArrayBase.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,15 +102,27 @@ SOMANDArrayBase <- R6::R6Class(
#' doesn't already have a shape: in that case please call
#' `tiledbsoma_upgrade_shape`.
#' @param new_shape A vector of integerish, of the same length as the array's `ndim`.
#' @return No return value
resize = function(new_shape) {
#' @param check_only If true, does not apply the operation, but only reports
#' whether it would have succeeded.
#' @return No return value if `check_only` is `FALSE`. If `check_only` is `TRUE`,
#' returns the empty string if no error is detected, else a description of the error.
johnkerl marked this conversation as resolved.
Show resolved Hide resolved
resize = function(new_shape, check_only = FALSE) {
stopifnot(
"'new_shape' must be a vector of integerish values, of the same length as maxshape" =
rlang::is_integerish(new_shape, n = self$ndim()) ||
(bit64::is.integer64(new_shape) && length(new_shape) == self$ndim())
)
# Checking slotwise new shape >= old shape, and <= max_shape, is already done in libtiledbsoma
resize(self$uri, new_shape, .name_of_function(), private$.soma_context)

reason_string <- resize(self$uri, new_shape, .name_of_function(), check_only, private$.soma_context)

if (isTRUE(check_only)) {
return(reason_string)
}

# The return value from resize without check_only is always "", or it
# raises an error trying.
return(invisible(NULL))
},

#' @description Allows the array to have a resizeable shape as described in the
Expand All @@ -120,15 +132,26 @@ SOMANDArrayBase <- R6::R6Class(
#' call on a pre-1.15 array the first time a shape is set on it; the latter must
#' be used for subsequent resizes on any array which already has upgraded shape.
#' @param shape A vector of integerish, of the same length as the array's `ndim`.
#' @return No return value
tiledbsoma_upgrade_shape = function(shape) {
#' @param check_only If true, does not apply the operation, but only reports
#' whether it would have succeeded.
#' @return No return value if `check_only` is `FALSE`. If `check_only` is `TRUE`,
#' returns the empty string if no error is detected, else a description of the error.
johnkerl marked this conversation as resolved.
Show resolved Hide resolved
tiledbsoma_upgrade_shape = function(shape, check_only = FALSE) {
stopifnot(
"'shape' must be a vector of integerish values, of the same length as maxshape" =
rlang::is_integerish(shape, n = self$ndim()) ||
(bit64::is.integer64(shape) && length(shape) == self$ndim())
)
# Checking slotwise new shape >= old shape, and <= max_shape, is already done in libtiledbsoma
tiledbsoma_upgrade_shape(self$uri, shape, .name_of_function(), private$.soma_context)

reason_string <- tiledbsoma_upgrade_shape(self$uri, shape, .name_of_function(), check_only, private$.soma_context)
if (isTRUE(check_only)) {
return(reason_string)
}

# The return value from tiledbsoma_upgrade_shape without check_only is
# always "", or it raises an error trying.
return(invisible(NULL))
}
),
private = list(
Expand Down
36 changes: 21 additions & 15 deletions apis/r/src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -553,16 +553,18 @@ BEGIN_RCPP
END_RCPP
}
// resize
void resize(const std::string& uri, Rcpp::NumericVector new_shape, std::string function_name_for_messages, Rcpp::XPtr<somactx_wrap_t> ctxxp);
RcppExport SEXP _tiledbsoma_resize(SEXP uriSEXP, SEXP new_shapeSEXP, SEXP function_name_for_messagesSEXP, SEXP ctxxpSEXP) {
std::string resize(const std::string& uri, Rcpp::NumericVector new_shape, std::string function_name_for_messages, bool check_only, Rcpp::XPtr<somactx_wrap_t> ctxxp);
RcppExport SEXP _tiledbsoma_resize(SEXP uriSEXP, SEXP new_shapeSEXP, SEXP function_name_for_messagesSEXP, SEXP check_onlySEXP, SEXP ctxxpSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const std::string& >::type uri(uriSEXP);
Rcpp::traits::input_parameter< Rcpp::NumericVector >::type new_shape(new_shapeSEXP);
Rcpp::traits::input_parameter< std::string >::type function_name_for_messages(function_name_for_messagesSEXP);
Rcpp::traits::input_parameter< bool >::type check_only(check_onlySEXP);
Rcpp::traits::input_parameter< Rcpp::XPtr<somactx_wrap_t> >::type ctxxp(ctxxpSEXP);
resize(uri, new_shape, function_name_for_messages, ctxxp);
return R_NilValue;
rcpp_result_gen = Rcpp::wrap(resize(uri, new_shape, function_name_for_messages, check_only, ctxxp));
return rcpp_result_gen;
END_RCPP
}
// resize_soma_joinid_shape
Expand All @@ -579,31 +581,35 @@ BEGIN_RCPP
END_RCPP
}
// tiledbsoma_upgrade_shape
void tiledbsoma_upgrade_shape(const std::string& uri, Rcpp::NumericVector new_shape, std::string function_name_for_messages, Rcpp::XPtr<somactx_wrap_t> ctxxp);
RcppExport SEXP _tiledbsoma_tiledbsoma_upgrade_shape(SEXP uriSEXP, SEXP new_shapeSEXP, SEXP function_name_for_messagesSEXP, SEXP ctxxpSEXP) {
std::string tiledbsoma_upgrade_shape(const std::string& uri, Rcpp::NumericVector new_shape, std::string function_name_for_messages, bool check_only, Rcpp::XPtr<somactx_wrap_t> ctxxp);
RcppExport SEXP _tiledbsoma_tiledbsoma_upgrade_shape(SEXP uriSEXP, SEXP new_shapeSEXP, SEXP function_name_for_messagesSEXP, SEXP check_onlySEXP, SEXP ctxxpSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const std::string& >::type uri(uriSEXP);
Rcpp::traits::input_parameter< Rcpp::NumericVector >::type new_shape(new_shapeSEXP);
Rcpp::traits::input_parameter< std::string >::type function_name_for_messages(function_name_for_messagesSEXP);
Rcpp::traits::input_parameter< bool >::type check_only(check_onlySEXP);
Rcpp::traits::input_parameter< Rcpp::XPtr<somactx_wrap_t> >::type ctxxp(ctxxpSEXP);
tiledbsoma_upgrade_shape(uri, new_shape, function_name_for_messages, ctxxp);
return R_NilValue;
rcpp_result_gen = Rcpp::wrap(tiledbsoma_upgrade_shape(uri, new_shape, function_name_for_messages, check_only, ctxxp));
return rcpp_result_gen;
END_RCPP
}
// upgrade_or_change_domain
void upgrade_or_change_domain(const std::string& uri, bool is_change_domain, naxpArray nadimap, naxpSchema nadimsp, std::string function_name_for_messages, Rcpp::XPtr<somactx_wrap_t> ctxxp);
RcppExport SEXP _tiledbsoma_upgrade_or_change_domain(SEXP uriSEXP, SEXP is_change_domainSEXP, SEXP nadimapSEXP, SEXP nadimspSEXP, SEXP function_name_for_messagesSEXP, SEXP ctxxpSEXP) {
std::string upgrade_or_change_domain(const std::string& uri, bool is_change_domain, naxpArray nadimap, naxpSchema nadimsp, std::string function_name_for_messages, bool check_only, Rcpp::XPtr<somactx_wrap_t> ctxxp);
RcppExport SEXP _tiledbsoma_upgrade_or_change_domain(SEXP uriSEXP, SEXP is_change_domainSEXP, SEXP nadimapSEXP, SEXP nadimspSEXP, SEXP function_name_for_messagesSEXP, SEXP check_onlySEXP, SEXP ctxxpSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const std::string& >::type uri(uriSEXP);
Rcpp::traits::input_parameter< bool >::type is_change_domain(is_change_domainSEXP);
Rcpp::traits::input_parameter< naxpArray >::type nadimap(nadimapSEXP);
Rcpp::traits::input_parameter< naxpSchema >::type nadimsp(nadimspSEXP);
Rcpp::traits::input_parameter< std::string >::type function_name_for_messages(function_name_for_messagesSEXP);
Rcpp::traits::input_parameter< bool >::type check_only(check_onlySEXP);
Rcpp::traits::input_parameter< Rcpp::XPtr<somactx_wrap_t> >::type ctxxp(ctxxpSEXP);
upgrade_or_change_domain(uri, is_change_domain, nadimap, nadimsp, function_name_for_messages, ctxxp);
return R_NilValue;
rcpp_result_gen = Rcpp::wrap(upgrade_or_change_domain(uri, is_change_domain, nadimap, nadimsp, function_name_for_messages, check_only, ctxxp));
return rcpp_result_gen;
END_RCPP
}
// c_update_dataframe_schema
Expand Down Expand Up @@ -834,10 +840,10 @@ static const R_CallMethodDef CallEntries[] = {
{"_tiledbsoma_c_dimnames", (DL_FUNC) &_tiledbsoma_c_dimnames, 2},
{"_tiledbsoma_c_attrnames", (DL_FUNC) &_tiledbsoma_c_attrnames, 2},
{"_tiledbsoma_c_schema", (DL_FUNC) &_tiledbsoma_c_schema, 2},
{"_tiledbsoma_resize", (DL_FUNC) &_tiledbsoma_resize, 4},
{"_tiledbsoma_resize", (DL_FUNC) &_tiledbsoma_resize, 5},
{"_tiledbsoma_resize_soma_joinid_shape", (DL_FUNC) &_tiledbsoma_resize_soma_joinid_shape, 4},
{"_tiledbsoma_tiledbsoma_upgrade_shape", (DL_FUNC) &_tiledbsoma_tiledbsoma_upgrade_shape, 4},
{"_tiledbsoma_upgrade_or_change_domain", (DL_FUNC) &_tiledbsoma_upgrade_or_change_domain, 6},
{"_tiledbsoma_tiledbsoma_upgrade_shape", (DL_FUNC) &_tiledbsoma_tiledbsoma_upgrade_shape, 5},
{"_tiledbsoma_upgrade_or_change_domain", (DL_FUNC) &_tiledbsoma_upgrade_or_change_domain, 7},
{"_tiledbsoma_c_update_dataframe_schema", (DL_FUNC) &_tiledbsoma_c_update_dataframe_schema, 6},
{"_tiledbsoma_sr_setup", (DL_FUNC) &_tiledbsoma_sr_setup, 10},
{"_tiledbsoma_sr_complete", (DL_FUNC) &_tiledbsoma_sr_complete, 1},
Expand Down
52 changes: 44 additions & 8 deletions apis/r/src/rinterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -418,18 +418,29 @@ SEXP c_schema(const std::string& uri, Rcpp::XPtr<somactx_wrap_t> ctxxp) {
}

// [[Rcpp::export]]
void resize(
std::string resize(
const std::string& uri,
Rcpp::NumericVector new_shape,
std::string function_name_for_messages,
bool check_only,
Rcpp::XPtr<somactx_wrap_t> ctxxp) {
// This function is solely for SparseNDArray and DenseNDArray for which the
// dims are required by the SOMA spec to be of type int64. Domain-resize for
// variant-indexed dataframes is via upgrade_domain and change_domain.
auto sr = tdbs::SOMAArray::open(OpenMode::write, uri, ctxxp->ctxptr);
std::vector<int64_t> new_shape_i64 = i64_from_rcpp_numeric(new_shape);
sr->resize(new_shape_i64, function_name_for_messages);

std::string retval = "";
if (check_only) {
auto status_and_reason = sr->can_resize(
new_shape_i64, function_name_for_messages);
retval = status_and_reason.second;
} else {
sr->resize(new_shape_i64, function_name_for_messages);
}

sr->close();
return retval;
}

// [[Rcpp::export]]
Expand All @@ -446,29 +457,40 @@ void resize_soma_joinid_shape(
}

// [[Rcpp::export]]
void tiledbsoma_upgrade_shape(
std::string tiledbsoma_upgrade_shape(
const std::string& uri,
Rcpp::NumericVector new_shape,
std::string function_name_for_messages,
bool check_only,
Rcpp::XPtr<somactx_wrap_t> ctxxp) {
// This function is solely for SparseNDArray and DenseNDArray for which the
// dims are required by the SOMA spec to be of type int64. Domain-resize for
// variant-indexed dataframes is via upgrade_domain and change_domain.
auto sr = tdbs::SOMAArray::open(OpenMode::write, uri, ctxxp->ctxptr);
std::vector<int64_t> new_shape_i64 = i64_from_rcpp_numeric(new_shape);
sr->upgrade_shape(new_shape_i64, function_name_for_messages);

std::string retval = "";
if (check_only) {
auto status_and_reason = sr->can_upgrade_shape(
new_shape_i64, function_name_for_messages);
retval = status_and_reason.second;
} else {
sr->upgrade_shape(new_shape_i64, function_name_for_messages);
}

sr->close();
return retval;
}

// [[Rcpp::export]]
void upgrade_or_change_domain(
std::string upgrade_or_change_domain(
const std::string& uri,
bool is_change_domain,
naxpArray nadimap,
naxpSchema nadimsp,
std::string function_name_for_messages,
bool check_only,
Rcpp::XPtr<somactx_wrap_t> ctxxp) {

// This is pointer manipulation from R -> Rcpp SEXP -> libtiledbsoma:
nanoarrow::UniqueArray apdim{nanoarrow_array_from_xptr(nadimap)};
nanoarrow::UniqueSchema spdim{nanoarrow_schema_from_xptr(nadimsp)};
Expand All @@ -482,13 +504,27 @@ void upgrade_or_change_domain(
tdbs::ArrowTable arrow_table(std::move(dimarr), std::move(dimsch));

// Now call libtiledbsoma
std::string reason_string = "";
auto sr = tdbs::SOMADataFrame::open(uri, OpenMode::write, ctxxp->ctxptr);
if (is_change_domain) {
sr->change_domain(arrow_table, function_name_for_messages);
if (check_only) {
auto status_and_reason = sr->can_change_domain(
arrow_table, function_name_for_messages);
reason_string = status_and_reason.second;
} else {
sr->change_domain(arrow_table, function_name_for_messages);
}
} else {
sr->upgrade_domain(arrow_table, function_name_for_messages);
if (check_only) {
auto status_and_reason = sr->can_upgrade_domain(
arrow_table, function_name_for_messages);
reason_string = status_and_reason.second;
} else {
sr->upgrade_domain(arrow_table, function_name_for_messages);
}
}
sr->close();
return reason_string;
}

// [[Rcpp::export]]
Expand Down
Loading