Skip to content

Commit

Permalink
[r] Add drop_levels to SOMAExperimentAxisQuery -> ecosystem outge…
Browse files Browse the repository at this point in the history
…stors

R analog of #2811 and single-cell-data/SOMA#204; add a `drop_levels`
paramter to the ecosystem outgestors to drop unused factor levels from
resulting data frames

Modified SOMA methods:
 - `SOMAExperimentAxisQuery$to_seurat()`: add `drop_levels` to drop
   drop unused levels from `obs` and `var` data frames
 - `SOMAExperimentAxisQuery$to_seurat_assay()`: add `drop_levels` to
   drop unused levels from `var` data frame
 - `SOMAExperimentAxisQuery$to_single_cell_experiment()`: add
   `drop_levels` to drop unused levels from `obs` and `var` data frames

Also shifts `SOMAExperimentAxisQuery$to_seurat()` and
`SOMAExperimentAxisQuery$to_seurat_assay()` to use
`SOMAExperimentAxisQuery$private$.load_df()` for loading `obs` and
`var`; removing standalone code and increase sharing with the SCE
outgestor

resolves #2765

[SC-51945](https://app.shortcut.com/tiledb-inc/story/51945)
  • Loading branch information
mojaveazure committed Aug 2, 2024
1 parent 805e34a commit f374cfb
Show file tree
Hide file tree
Showing 7 changed files with 262 additions and 27 deletions.
63 changes: 50 additions & 13 deletions apis/r/R/SOMAExperimentAxisQuery.R
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,7 @@ SOMAExperimentAxisQuery <- R6::R6Class(
#' @param obsm_layers \Sexpr[results=rd]{tiledbsoma:::rd_outgest_mlayers()}
#' @param varm_layers \Sexpr[results=rd]{tiledbsoma:::rd_outgest_mlayers(axis = 'varm')}
#' @param obsp_layers \Sexpr[results=rd]{tiledbsoma:::rd_outgest_players()}
#' @param drop_levels Drop unused levels from \code{obs} and \code{var} factor columns
#'
#' @return A \code{\link[SeuratObject]{Seurat}} object
#'
Expand All @@ -526,7 +527,8 @@ SOMAExperimentAxisQuery <- R6::R6Class(
var_column_names = NULL,
obsm_layers = NULL,
varm_layers = NULL,
obsp_layers = NULL
obsp_layers = NULL,
drop_levels = FALSE
) {
check_package('SeuratObject', version = .MINIMUM_SEURAT_VERSION())
op <- options(Seurat.object.assay.version = 'v3')
Expand All @@ -545,7 +547,9 @@ SOMAExperimentAxisQuery <- R6::R6Class(
is_scalar_logical(varm_layers),
"'obsp_layers' must be a character vector" = is.null(obsp_layers) ||
is.character(obsp_layers) ||
is_scalar_logical(obsp_layers)
is_scalar_logical(obsp_layers),
"'drop_levels' must be TRUE or FALSE" = isTRUE(drop_levels) ||
isFALSE(drop_levels)
)
tryCatch(
expr = self$obs_df,
Expand Down Expand Up @@ -585,8 +589,10 @@ SOMAExperimentAxisQuery <- R6::R6Class(
y = obs_index
)
if (!(isFALSE(obs_column_names) || rlang::is_na(obs_column_names))) {
obs <- as.data.frame(
x = self$obs(obs_column_names)$concat()$to_data_frame()
obs <- private$.load_df(
'obs',
column_names = obs_column_names,
drop_levels = drop_levels
)
row.names(obs) <- cells
object[[names(obs)]] <- obs
Expand Down Expand Up @@ -732,14 +738,16 @@ SOMAExperimentAxisQuery <- R6::R6Class(
#' @param obs_index \Sexpr[results=rd]{tiledbsoma:::rd_outgest_index()}
#' @param var_index \Sexpr[results=rd]{tiledbsoma:::rd_outgest_index(axis = 'var')}
#' @param var_column_names \Sexpr[results=rd]{tiledbsoma:::rd_outgest_metadata_names(axis = 'var')}
#' @param drop_levels Drop unused levels from \code{var} factor columns
#'
#' @return An \code{\link[SeuratObject]{Assay}} object
#'
to_seurat_assay = function(
X_layers = c(counts = 'counts', data = 'logcounts'),
obs_index = NULL,
var_index = NULL,
var_column_names = NULL
var_column_names = NULL,
drop_levels = FALSE
) {
version <- 'v3'
check_package('SeuratObject', version = .MINIMUM_SEURAT_VERSION())
Expand All @@ -755,7 +763,9 @@ SOMAExperimentAxisQuery <- R6::R6Class(
(is_scalar_character(var_index) && !is.na(var_index)),
"'var_column_names' must be a character vector" = is.null(var_column_names) ||
is.character(var_column_names) ||
is_scalar_logical(var_column_names)
is_scalar_logical(var_column_names),
"'drop_levels' must be TRUE or FALSE" = isTRUE(drop_levels) ||
isFALSE(drop_levels)
)
match.arg(version, choices = 'v3')
features <- if (is.null(var_index)) {
Expand Down Expand Up @@ -810,7 +820,11 @@ SOMAExperimentAxisQuery <- R6::R6Class(
y = var_index
)
if (!(isFALSE(var_column_names) || rlang::is_na(var_column_names))) {
var <- as.data.frame(self$var(var_column_names)$concat()$to_data_frame())
var <- private$.load_df(
'var',
column_names = var_column_names,
drop_levels = drop_levels
)
row.names(var) <- features
obj[[names(var)]] <- var
}
Expand Down Expand Up @@ -1055,6 +1069,7 @@ SOMAExperimentAxisQuery <- R6::R6Class(
#' @param var_column_names \Sexpr[results=rd]{tiledbsoma:::rd_outgest_metadata_names('sce', 'var')}
#' @param obsp_layers \Sexpr[results=rd]{tiledbsoma:::rd_outgest_players('sce')}
#' @param varp_layers \Sexpr[results=rd]{tiledbsoma:::rd_outgest_players('sce', 'varp')}
#' @param drop_levels Drop unused levels from \code{obs} and \code{var} factor columns
#'
#' @return A \code{\link[SingleCellExperiment]{SingleCellExperiment}} object
#'
Expand All @@ -1068,7 +1083,8 @@ SOMAExperimentAxisQuery <- R6::R6Class(
# Omission of `varm_layers` parameter is purposeful as
# SCE objects do not support `varm_layers`
obsp_layers = NULL,
varp_layers = NULL
varp_layers = NULL,
drop_levels = FALSE
) {
check_package('SingleCellExperiment', version = .MINIMUM_SCE_VERSION())
stopifnot(
Expand All @@ -1091,12 +1107,24 @@ SOMAExperimentAxisQuery <- R6::R6Class(
is_scalar_logical(obsp_layers),
"'varp_layers' must be a character vector" = is.null(varp_layers) ||
is.character(varp_layers) ||
is_scalar_logical(varp_layers)
is_scalar_logical(varp_layers),
"'drop_levels' must be TRUE or FALSE" = isTRUE(drop_levels) ||
isFALSE(drop_levels)
)
# Load in colData
obs <- private$.load_df('obs', index = obs_index, column_names = obs_column_names)
obs <- private$.load_df(
'obs',
index = obs_index,
column_names = obs_column_names,
drop_levels = drop_levels
)
# Load in rowData
var <- private$.load_df('var', index = var_index, column_names = var_column_names)
var <- private$.load_df(
'var',
index = var_index,
column_names = var_column_names,
drop_levels = drop_levels
)
# Check the layers
X_layers <- pad_names(X_layers %||% self$ms$X$names())
assert_subset(x = X_layers, y = self$ms$X$names(), type = 'X_layer')
Expand Down Expand Up @@ -1254,11 +1282,17 @@ SOMAExperimentAxisQuery <- R6::R6Class(
# - `FALSE` or `NA`: return a data frame the number of rows as present
# in `df_name` and zero columns
# - a character vector of names of attributes to load in
.load_df = function(df_name = c('obs', 'var'), index = NULL, column_names = NULL) {
.load_df = function(
df_name = c('obs', 'var'),
index = NULL,
column_names = NULL,
drop_levels = FALSE
) {
stopifnot(
is.character(df_name),
is.null(index) || is_scalar_character(index),
is.null(column_names) || is.character(column_names) || is_scalar_logical(column_names)
is.null(column_names) || is.character(column_names) || is_scalar_logical(column_names),
isTRUE(drop_levels) || isFALSE(drop_levels)
)
df_name <- match.arg(arg = df_name)
switch(
Expand Down Expand Up @@ -1306,6 +1340,9 @@ SOMAExperimentAxisQuery <- R6::R6Class(
} else {
df[, column_names, drop = FALSE]
}
if (isTRUE(drop_levels)) {
df <- droplevels(df)
}
return(df)
},
.load_m_axis = function(layer, m_axis = c('obsm', 'varm'), type = "Embeddings") {
Expand Down
15 changes: 12 additions & 3 deletions apis/r/man/SOMAExperimentAxisQuery.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 15 additions & 3 deletions apis/r/tests/testthat/helper-test-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,24 @@ create_arrow_schema <- function(foo_first = TRUE) {
}
}

create_arrow_table <- function(nrows = 10L) {
arrow::arrow_table(
create_arrow_table <- function(nrows = 10L, factors = FALSE) {
if (isTRUE(factors)) {
return(arrow::arrow_table(
foo = seq.int(nrows) + 1000L,
soma_joinid = bit64::seq.integer64(from = 0L, to = nrows - 1L),
bar = seq(nrows) + 0.1,
baz = as.character(seq.int(nrows) + 1000L),
schema = create_arrow_schema()
grp = factor(c(
rep_len("lvl1", length.out = floor(nrows / 2)),
rep_len("lvl2", length.out = ceiling(nrows / 2))
))
))
}
arrow::arrow_table(
foo = seq.int(nrows) + 1000L,
soma_joinid = bit64::seq.integer64(from = 0L, to = nrows - 1L),
bar = seq(nrows) + 0.1,
baz = as.character(seq.int(nrows) + 1000L)
# schema = create_arrow_schema()
)
}
40 changes: 32 additions & 8 deletions apis/r/tests/testthat/helper-test-soma-objects.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@ create_and_populate_soma_dataframe <- function(
nrows = 10L,
seed = 1,
index_column_names = "foo",
factors = FALSE,
mode = NULL
) {
set.seed(seed)

arrow_schema <- create_arrow_schema()
tbl <- create_arrow_table(nrows = nrows)
# arrow_schema <- create_arrow_schema()
tbl <- create_arrow_table(nrows = nrows, factors = factors)

sdf <- SOMADataFrameCreate(uri, arrow_schema, index_column_names = index_column_names)
sdf <- SOMADataFrameCreate(uri, tbl$schema, index_column_names = index_column_names)
sdf$write(tbl)

if (is.null(mode)) {
Expand All @@ -24,17 +25,31 @@ create_and_populate_soma_dataframe <- function(
}

# Returns the object created, populated, and closed (unless otherwise requested)
create_and_populate_obs <- function(uri, nrows = 10L, seed = 1, mode = NULL) {
create_and_populate_obs <- function(
uri,
nrows = 10L,
seed = 1,
factors = FALSE,
mode = NULL
) {
create_and_populate_soma_dataframe(
uri = uri,
nrows = nrows,
seed = seed,
index_column_names = "soma_joinid"
index_column_names = "soma_joinid",
factors = factors,
mode = mode
)
}

# Returns the object created, populated, and closed (unless otherwise requested)
create_and_populate_var <- function(uri, nrows = 10L, seed = 1, mode = NULL) {
create_and_populate_var <- function(
uri,
nrows = 10L,
seed = 1,
factors = FALSE,
mode = NULL
) {

tbl <- arrow::arrow_table(
soma_joinid = bit64::seq.integer64(from = 0L, to = nrows - 1L),
Expand All @@ -46,6 +61,12 @@ create_and_populate_var <- function(uri, nrows = 10L, seed = 1, mode = NULL) {
arrow::field("xyzzy", arrow::float64(), nullable = FALSE)
)
)
if (isTRUE(factors)) {
tbl$grp <- factor(c(
rep_len("lvl1", length.out = floor(nrows / 2)),
rep_len("lvl2", length.out = floor(nrows / 2))
))
}

dname <- dirname(uri)
if (!dir.exists(dname)) dir.create(dname)
Expand Down Expand Up @@ -117,6 +138,7 @@ create_and_populate_experiment <- function(
obsp_layer_names = NULL,
varp_layer_names = NULL,
config = NULL,
factors = FALSE,
mode = NULL
) {

Expand All @@ -135,15 +157,17 @@ create_and_populate_experiment <- function(

experiment$obs <- create_and_populate_obs(
uri = file.path(uri, "obs"),
nrows = n_obs
nrows = n_obs,
factors = factors
)

experiment$ms <- SOMACollectionCreate(file.path(uri, "ms"))

ms_rna <- SOMAMeasurementCreate(file.path(uri, "ms", "RNA"))
ms_rna$var <- create_and_populate_var(
uri = file.path(ms_rna$uri, "var"),
nrows = n_var
nrows = n_var,
factors = factors
)
ms_rna$X <- SOMACollectionCreate(file.path(ms_rna$uri, "X"))

Expand Down
Loading

0 comments on commit f374cfb

Please sign in to comment.