diff --git a/apis/r/DESCRIPTION b/apis/r/DESCRIPTION index 158a2dbe88..c49b1f1e82 100644 --- a/apis/r/DESCRIPTION +++ b/apis/r/DESCRIPTION @@ -6,7 +6,7 @@ Description: Interface for working with 'TileDB'-based Stack of Matrices, like those commonly used for single cell data analysis. It is documented at ; a formal specification available is at . -Version: 1.12.99.1 +Version: 1.12.99.2 Authors@R: c( person(given = "Aaron", family = "Wolen", role = c("cre", "aut"), email = "aaron@tiledb.com", diff --git a/apis/r/NEWS.md b/apis/r/NEWS.md index f785641f3e..27b3146a4a 100644 --- a/apis/r/NEWS.md +++ b/apis/r/NEWS.md @@ -3,6 +3,7 @@ ## Changes * Change `$reopen(mode = )` default to not flip modes; require explicit `mode` parameter to be passed +* Add `drop_levels` to `SOMAExperimentAxisQuery` -> ecosystem outgestors to drop unused factor levels # tiledbsoma 1.11.4 diff --git a/apis/r/R/SOMAExperimentAxisQuery.R b/apis/r/R/SOMAExperimentAxisQuery.R index 2fbf168d23..d97fa88c3e 100644 --- a/apis/r/R/SOMAExperimentAxisQuery.R +++ b/apis/r/R/SOMAExperimentAxisQuery.R @@ -515,6 +515,7 @@ SOMAExperimentAxisQuery <- R6::R6Class( #' @param obsm_layers \Sexpr[results=rd]{tiledbsoma:::rd_outgest_mlayers()} #' @param varm_layers \Sexpr[results=rd]{tiledbsoma:::rd_outgest_mlayers(axis = 'varm')} #' @param obsp_layers \Sexpr[results=rd]{tiledbsoma:::rd_outgest_players()} + #' @param drop_levels Drop unused levels from \code{obs} and \code{var} factor columns #' #' @return A \code{\link[SeuratObject]{Seurat}} object #' @@ -526,7 +527,8 @@ SOMAExperimentAxisQuery <- R6::R6Class( var_column_names = NULL, obsm_layers = NULL, varm_layers = NULL, - obsp_layers = NULL + obsp_layers = NULL, + drop_levels = FALSE ) { check_package('SeuratObject', version = .MINIMUM_SEURAT_VERSION()) op <- options(Seurat.object.assay.version = 'v3') @@ -545,7 +547,9 @@ SOMAExperimentAxisQuery <- R6::R6Class( is_scalar_logical(varm_layers), "'obsp_layers' must be a character vector" = is.null(obsp_layers) || is.character(obsp_layers) || - is_scalar_logical(obsp_layers) + is_scalar_logical(obsp_layers), + "'drop_levels' must be TRUE or FALSE" = isTRUE(drop_levels) || + isFALSE(drop_levels) ) tryCatch( expr = self$obs_df, @@ -585,8 +589,10 @@ SOMAExperimentAxisQuery <- R6::R6Class( y = obs_index ) if (!(isFALSE(obs_column_names) || rlang::is_na(obs_column_names))) { - obs <- as.data.frame( - x = self$obs(obs_column_names)$concat()$to_data_frame() + obs <- private$.load_df( + 'obs', + column_names = obs_column_names, + drop_levels = drop_levels ) row.names(obs) <- cells object[[names(obs)]] <- obs @@ -732,6 +738,7 @@ SOMAExperimentAxisQuery <- R6::R6Class( #' @param obs_index \Sexpr[results=rd]{tiledbsoma:::rd_outgest_index()} #' @param var_index \Sexpr[results=rd]{tiledbsoma:::rd_outgest_index(axis = 'var')} #' @param var_column_names \Sexpr[results=rd]{tiledbsoma:::rd_outgest_metadata_names(axis = 'var')} + #' @param drop_levels Drop unused levels from \code{var} factor columns #' #' @return An \code{\link[SeuratObject]{Assay}} object #' @@ -739,7 +746,8 @@ SOMAExperimentAxisQuery <- R6::R6Class( X_layers = c(counts = 'counts', data = 'logcounts'), obs_index = NULL, var_index = NULL, - var_column_names = NULL + var_column_names = NULL, + drop_levels = FALSE ) { version <- 'v3' check_package('SeuratObject', version = .MINIMUM_SEURAT_VERSION()) @@ -755,7 +763,9 @@ SOMAExperimentAxisQuery <- R6::R6Class( (is_scalar_character(var_index) && !is.na(var_index)), "'var_column_names' must be a character vector" = is.null(var_column_names) || is.character(var_column_names) || - is_scalar_logical(var_column_names) + is_scalar_logical(var_column_names), + "'drop_levels' must be TRUE or FALSE" = isTRUE(drop_levels) || + isFALSE(drop_levels) ) match.arg(version, choices = 'v3') features <- if (is.null(var_index)) { @@ -810,7 +820,11 @@ SOMAExperimentAxisQuery <- R6::R6Class( y = var_index ) if (!(isFALSE(var_column_names) || rlang::is_na(var_column_names))) { - var <- as.data.frame(self$var(var_column_names)$concat()$to_data_frame()) + var <- private$.load_df( + 'var', + column_names = var_column_names, + drop_levels = drop_levels + ) row.names(var) <- features obj[[names(var)]] <- var } @@ -1055,6 +1069,7 @@ SOMAExperimentAxisQuery <- R6::R6Class( #' @param var_column_names \Sexpr[results=rd]{tiledbsoma:::rd_outgest_metadata_names('sce', 'var')} #' @param obsp_layers \Sexpr[results=rd]{tiledbsoma:::rd_outgest_players('sce')} #' @param varp_layers \Sexpr[results=rd]{tiledbsoma:::rd_outgest_players('sce', 'varp')} + #' @param drop_levels Drop unused levels from \code{obs} and \code{var} factor columns #' #' @return A \code{\link[SingleCellExperiment]{SingleCellExperiment}} object #' @@ -1068,7 +1083,8 @@ SOMAExperimentAxisQuery <- R6::R6Class( # Omission of `varm_layers` parameter is purposeful as # SCE objects do not support `varm_layers` obsp_layers = NULL, - varp_layers = NULL + varp_layers = NULL, + drop_levels = FALSE ) { check_package('SingleCellExperiment', version = .MINIMUM_SCE_VERSION()) stopifnot( @@ -1091,12 +1107,24 @@ SOMAExperimentAxisQuery <- R6::R6Class( is_scalar_logical(obsp_layers), "'varp_layers' must be a character vector" = is.null(varp_layers) || is.character(varp_layers) || - is_scalar_logical(varp_layers) + is_scalar_logical(varp_layers), + "'drop_levels' must be TRUE or FALSE" = isTRUE(drop_levels) || + isFALSE(drop_levels) ) # Load in colData - obs <- private$.load_df('obs', index = obs_index, column_names = obs_column_names) + obs <- private$.load_df( + 'obs', + index = obs_index, + column_names = obs_column_names, + drop_levels = drop_levels + ) # Load in rowData - var <- private$.load_df('var', index = var_index, column_names = var_column_names) + var <- private$.load_df( + 'var', + index = var_index, + column_names = var_column_names, + drop_levels = drop_levels + ) # Check the layers X_layers <- pad_names(X_layers %||% self$ms$X$names()) assert_subset(x = X_layers, y = self$ms$X$names(), type = 'X_layer') @@ -1254,11 +1282,17 @@ SOMAExperimentAxisQuery <- R6::R6Class( # - `FALSE` or `NA`: return a data frame the number of rows as present # in `df_name` and zero columns # - a character vector of names of attributes to load in - .load_df = function(df_name = c('obs', 'var'), index = NULL, column_names = NULL) { + .load_df = function( + df_name = c('obs', 'var'), + index = NULL, + column_names = NULL, + drop_levels = FALSE + ) { stopifnot( is.character(df_name), is.null(index) || is_scalar_character(index), - is.null(column_names) || is.character(column_names) || is_scalar_logical(column_names) + is.null(column_names) || is.character(column_names) || is_scalar_logical(column_names), + isTRUE(drop_levels) || isFALSE(drop_levels) ) df_name <- match.arg(arg = df_name) switch( @@ -1306,6 +1340,9 @@ SOMAExperimentAxisQuery <- R6::R6Class( } else { df[, column_names, drop = FALSE] } + if (isTRUE(drop_levels)) { + df <- droplevels(df) + } return(df) }, .load_m_axis = function(layer, m_axis = c('obsm', 'varm'), type = "Embeddings") { diff --git a/apis/r/man/SOMAExperimentAxisQuery.Rd b/apis/r/man/SOMAExperimentAxisQuery.Rd index 1b071ebb0e..6180e43b99 100644 --- a/apis/r/man/SOMAExperimentAxisQuery.Rd +++ b/apis/r/man/SOMAExperimentAxisQuery.Rd @@ -337,7 +337,8 @@ Loads the query as a \code{\link[SeuratObject]{Seurat}} object var_column_names = NULL, obsm_layers = NULL, varm_layers = NULL, - obsp_layers = NULL + obsp_layers = NULL, + drop_levels = FALSE )}\if{html}{\out{}} } @@ -359,6 +360,8 @@ Loads the query as a \code{\link[SeuratObject]{Seurat}} object \item{\code{varm_layers}}{\Sexpr[results=rd]{tiledbsoma:::rd_outgest_mlayers(axis = 'varm')}} \item{\code{obsp_layers}}{\Sexpr[results=rd]{tiledbsoma:::rd_outgest_players()}} + +\item{\code{drop_levels}}{Drop unused levels from \code{obs} and \code{var} factor columns} } \if{html}{\out{}} } @@ -376,7 +379,8 @@ Loads the query as a Seurat \code{\link[SeuratObject]{Assay}} X_layers = c(counts = "counts", data = "logcounts"), obs_index = NULL, var_index = NULL, - var_column_names = NULL + var_column_names = NULL, + drop_levels = FALSE )}\if{html}{\out{}} } @@ -390,6 +394,8 @@ Loads the query as a Seurat \code{\link[SeuratObject]{Assay}} \item{\code{var_index}}{\Sexpr[results=rd]{tiledbsoma:::rd_outgest_index(axis = 'var')}} \item{\code{var_column_names}}{\Sexpr[results=rd]{tiledbsoma:::rd_outgest_metadata_names(axis = 'var')}} + +\item{\code{drop_levels}}{Drop unused levels from \code{var} factor columns} } \if{html}{\out{}} } @@ -469,7 +475,8 @@ Loads the query as a var_column_names = NULL, obsm_layers = NULL, obsp_layers = NULL, - varp_layers = NULL + varp_layers = NULL, + drop_levels = FALSE )}\if{html}{\out{}} } @@ -491,6 +498,8 @@ Loads the query as a \item{\code{obsp_layers}}{\Sexpr[results=rd]{tiledbsoma:::rd_outgest_players('sce')}} \item{\code{varp_layers}}{\Sexpr[results=rd]{tiledbsoma:::rd_outgest_players('sce', 'varp')}} + +\item{\code{drop_levels}}{Drop unused levels from \code{obs} and \code{var} factor columns} } \if{html}{\out{}} } diff --git a/apis/r/tests/testthat/helper-test-data.R b/apis/r/tests/testthat/helper-test-data.R index c690bd0e48..f0596fc08f 100644 --- a/apis/r/tests/testthat/helper-test-data.R +++ b/apis/r/tests/testthat/helper-test-data.R @@ -57,12 +57,24 @@ create_arrow_schema <- function(foo_first = TRUE) { } } -create_arrow_table <- function(nrows = 10L) { - arrow::arrow_table( +create_arrow_table <- function(nrows = 10L, factors = FALSE) { + if (isTRUE(factors)) { + return(arrow::arrow_table( foo = seq.int(nrows) + 1000L, soma_joinid = bit64::seq.integer64(from = 0L, to = nrows - 1L), bar = seq(nrows) + 0.1, baz = as.character(seq.int(nrows) + 1000L), - schema = create_arrow_schema() + grp = factor(c( + rep_len("lvl1", length.out = floor(nrows / 2)), + rep_len("lvl2", length.out = ceiling(nrows / 2)) + )) + )) + } + arrow::arrow_table( + foo = seq.int(nrows) + 1000L, + soma_joinid = bit64::seq.integer64(from = 0L, to = nrows - 1L), + bar = seq(nrows) + 0.1, + baz = as.character(seq.int(nrows) + 1000L) + # schema = create_arrow_schema() ) } diff --git a/apis/r/tests/testthat/helper-test-soma-objects.R b/apis/r/tests/testthat/helper-test-soma-objects.R index a3e6e86350..7bc919c548 100644 --- a/apis/r/tests/testthat/helper-test-soma-objects.R +++ b/apis/r/tests/testthat/helper-test-soma-objects.R @@ -4,14 +4,15 @@ create_and_populate_soma_dataframe <- function( nrows = 10L, seed = 1, index_column_names = "foo", + factors = FALSE, mode = NULL ) { set.seed(seed) - arrow_schema <- create_arrow_schema() - tbl <- create_arrow_table(nrows = nrows) + # arrow_schema <- create_arrow_schema() + tbl <- create_arrow_table(nrows = nrows, factors = factors) - sdf <- SOMADataFrameCreate(uri, arrow_schema, index_column_names = index_column_names) + sdf <- SOMADataFrameCreate(uri, tbl$schema, index_column_names = index_column_names) sdf$write(tbl) if (is.null(mode)) { @@ -24,17 +25,31 @@ create_and_populate_soma_dataframe <- function( } # Returns the object created, populated, and closed (unless otherwise requested) -create_and_populate_obs <- function(uri, nrows = 10L, seed = 1, mode = NULL) { +create_and_populate_obs <- function( + uri, + nrows = 10L, + seed = 1, + factors = FALSE, + mode = NULL +) { create_and_populate_soma_dataframe( uri = uri, nrows = nrows, seed = seed, - index_column_names = "soma_joinid" + index_column_names = "soma_joinid", + factors = factors, + mode = mode ) } # Returns the object created, populated, and closed (unless otherwise requested) -create_and_populate_var <- function(uri, nrows = 10L, seed = 1, mode = NULL) { +create_and_populate_var <- function( + uri, + nrows = 10L, + seed = 1, + factors = FALSE, + mode = NULL +) { tbl <- arrow::arrow_table( soma_joinid = bit64::seq.integer64(from = 0L, to = nrows - 1L), @@ -46,6 +61,12 @@ create_and_populate_var <- function(uri, nrows = 10L, seed = 1, mode = NULL) { arrow::field("xyzzy", arrow::float64(), nullable = FALSE) ) ) + if (isTRUE(factors)) { + tbl$grp <- factor(c( + rep_len("lvl1", length.out = floor(nrows / 2)), + rep_len("lvl2", length.out = floor(nrows / 2)) + )) + } dname <- dirname(uri) if (!dir.exists(dname)) dir.create(dname) @@ -117,6 +138,7 @@ create_and_populate_experiment <- function( obsp_layer_names = NULL, varp_layer_names = NULL, config = NULL, + factors = FALSE, mode = NULL ) { @@ -135,7 +157,8 @@ create_and_populate_experiment <- function( experiment$obs <- create_and_populate_obs( uri = file.path(uri, "obs"), - nrows = n_obs + nrows = n_obs, + factors = factors ) experiment$ms <- SOMACollectionCreate(file.path(uri, "ms")) @@ -143,7 +166,8 @@ create_and_populate_experiment <- function( ms_rna <- SOMAMeasurementCreate(file.path(uri, "ms", "RNA")) ms_rna$var <- create_and_populate_var( uri = file.path(ms_rna$uri, "var"), - nrows = n_var + nrows = n_var, + factors = factors ) ms_rna$X <- SOMACollectionCreate(file.path(ms_rna$uri, "X")) diff --git a/apis/r/tests/testthat/test-SCEOutgest.R b/apis/r/tests/testthat/test-SCEOutgest.R index 5476272b4f..06053ac28a 100644 --- a/apis/r/tests/testthat/test-SCEOutgest.R +++ b/apis/r/tests/testthat/test-SCEOutgest.R @@ -196,6 +196,63 @@ test_that("Load SCE object from ExperimentQuery mechanics", { expect_error(query$to_single_cell_experiment(obsm_layers = FALSE, obsp_layers = 'tomato')) }) +test_that("Load SCE object with dropped levels", { + skip_if(!extended_tests() || covr_tests()) + skip_if_not_installed('SingleCellExperiment', .MINIMUM_SCE_VERSION('c')) + uri <- tempfile(pattern="sce-experiment-query-drop") + + n_obs <- 20L + n_var <- 10L + experiment <- create_and_populate_experiment( + uri = uri, + n_obs = n_obs, + n_var = n_var, + X_layer_names = c('counts', 'logcounts'), + factors = TRUE, + mode = 'READ' + ) + on.exit(experiment$close(), add = TRUE, after = FALSE) + + # Create the query + query <- SOMAExperimentAxisQuery$new( + experiment = experiment, + measurement_name = "RNA", + obs_query = SOMAAxisQuery$new(coords = seq.int(to = floor(n_obs / 3))), + var_query = SOMAAxisQuery$new(coords = seq.int(to = floor(n_var / 3))) + ) + + # Expect both levels to be present in `grp`, even though only one value is + expect_s4_class(sce <- query$to_single_cell_experiment(), "SingleCellExperiment") + expect_in("grp", names(SingleCellExperiment::colData(sce))) + expect_s3_class(obs <- SingleCellExperiment::colData(sce)$grp, "factor") + expect_identical(levels(obs), c("lvl1", "lvl2")) + expect_identical(unique(as.vector(obs)), "lvl1") + expect_in("grp", names(SingleCellExperiment::rowData(sce))) + expect_s3_class(var <- SingleCellExperiment::rowData(sce)$grp, "factor") + expect_identical(levels(var), c("lvl1", "lvl2")) + expect_identical(unique(as.vector(var)), "lvl1") + + # Do the same, but drop levels + expect_s4_class( + dropped <- query$to_single_cell_experiment(drop_levels = TRUE), + "SingleCellExperiment" + ) + expect_in("grp", names(SingleCellExperiment::colData(dropped))) + expect_s3_class(obsd <- SingleCellExperiment::colData(dropped)$grp, "factor") + expect_identical(levels(obsd), "lvl1") + expect_identical(unique(as.vector(obsd)), "lvl1") + expect_in("grp", names(SingleCellExperiment::rowData(dropped))) + expect_s3_class(vard <- SingleCellExperiment::rowData(dropped)$grp, "factor") + expect_identical(levels(vard), "lvl1") + expect_identical(unique(as.vector(vard)), "lvl1") + + # Test assertions + expect_error(query$to_single_cell_experiment(drop_levels = NA)) + expect_error(query$to_single_cell_experiment(drop_levels = 1L)) + expect_error(query$to_single_cell_experiment(drop_levels = 'drop')) + expect_error(query$to_single_cell_experiment(drop_levels = c(TRUE, TRUE))) +}) + test_that("Load SCE object from sliced ExperimentQuery", { skip_if(!extended_tests() || covr_tests()) skip_if_not_installed('SingleCellExperiment', .MINIMUM_SCE_VERSION('c')) diff --git a/apis/r/tests/testthat/test-SeuratOutgest-assay.R b/apis/r/tests/testthat/test-SeuratOutgest-assay.R index 136949ed29..9259028bc4 100644 --- a/apis/r/tests/testthat/test-SeuratOutgest-assay.R +++ b/apis/r/tests/testthat/test-SeuratOutgest-assay.R @@ -117,6 +117,51 @@ test_that("Load assay from ExperimentQuery mechanics", { expect_error(query$to_seurat_assay(var_column_names = 'tomato')) }) +test_that("Load assay with dropped levels", { + skip_if(!extended_tests()) + skip_if_not_installed('SeuratObject', .MINIMUM_SEURAT_VERSION('c')) + + uri <- tempfile(pattern="assay-experiment-drop") + n_obs <- 20L + n_var <- 10L + experiment <- create_and_populate_experiment( + uri = uri, + n_obs = n_obs, + n_var = n_var, + X_layer_names = c("counts", "logcounts"), + factors = TRUE, + mode = "READ" + ) + on.exit(experiment$close(), add = TRUE, after = FALSE) + + # Create the query + query <- SOMAExperimentAxisQuery$new( + experiment = experiment, + measurement_name = "RNA", + var_query = SOMAAxisQuery$new(coords = seq.int(to = floor(n_var / 3))) + ) + + # Expect both levels to be present in `grp`, even though only one value is + expect_s4_class(assay <- query$to_seurat_assay(), "Assay") + expect_in("grp", names(assay[[]])) + expect_s3_class(grp <- assay[["grp", drop = TRUE]], "factor") + expect_identical(levels(grp), c("lvl1", "lvl2")) + expect_identical(unique(as.vector(grp)), "lvl1") + + # Do the same, but drop levels + expect_s4_class(dropped <- query$to_seurat_assay(drop_levels = TRUE), "Assay") + expect_in("grp", names(dropped[[]])) + expect_s3_class(drp <- dropped[["grp", drop = TRUE]], "factor") + expect_identical(levels(drp), "lvl1") + expect_identical(unique(as.vector(drp)), "lvl1") + + # Test assertions + expect_error(query$to_seurat_assay(drop_levels = NA)) + expect_error(query$to_seurat_assay(drop_levels = 1L)) + expect_error(query$to_seurat_assay(drop_levels = 'drop')) + expect_error(query$to_seurat_assay(drop_levels = c(TRUE, TRUE))) +}) + test_that("Load assay with SeuratObject v5 returns v3 assays", { skip_if(!extended_tests()) skip_if_not_installed('SeuratObject', '4.9.9.9094') diff --git a/apis/r/tests/testthat/test-SeuratOutgest-object.R b/apis/r/tests/testthat/test-SeuratOutgest-object.R index 6800423a20..0c8846445e 100644 --- a/apis/r/tests/testthat/test-SeuratOutgest-object.R +++ b/apis/r/tests/testthat/test-SeuratOutgest-object.R @@ -194,6 +194,57 @@ test_that("Load Seurat object from ExperimentQuery mechanics", { expect_warning(query$to_seurat(obsp_layers = 'tomato')) }) +test_that("Load Seurat object with dropped levels", { + skip_if(!extended_tests() || covr_tests()) + skip_if_not_installed('SeuratObject', .MINIMUM_SEURAT_VERSION('c')) + so_version <- utils::packageVersion('SeuratObject') + skip_if_not( + (so_version >= .MINIMUM_SEURAT_VERSION() && so_version < '5.0.0') || + so_version >= '5.0.0.9003', + message = so_msg(so_version) + ) + + uri <- tempfile(pattern="seurat-experiment-drop") + n_obs <- 20L + n_var <- 10L + experiment <- create_and_populate_experiment( + uri = uri, + n_obs = n_obs, + n_var = n_var, + X_layer_names = c("counts", "logcounts"), + factors = TRUE, + mode = "READ" + ) + on.exit(experiment$close(), add = TRUE, after = FALSE) + + # Create the query + query <- SOMAExperimentAxisQuery$new( + experiment = experiment, + measurement_name = "RNA", + obs_query = SOMAAxisQuery$new(coords = seq.int(to = floor(n_obs / 3))) + ) + + # Expect both levels to be present in `grp`, even though only one value is + expect_s4_class(obj <- query$to_seurat(), "Seurat") + expect_in("grp", names(obj[[]])) + expect_s3_class(obj$grp, "factor") + expect_identical(unique(as.vector(obj$grp)), "lvl1") + expect_identical(levels(obj$grp), c("lvl1", "lvl2")) + + # Do the same, but drop levels + expect_s4_class(dropped <- query$to_seurat(drop_levels = TRUE), "Seurat") + expect_in("grp", names(dropped[[]])) + expect_s3_class(dropped$grp, "factor") + expect_identical(unique(as.vector(dropped$grp)), "lvl1") + expect_identical(levels(dropped$grp), "lvl1") + + # Test assertions + expect_error(query$to_seurat(drop_levels = NA)) + expect_error(query$to_seurat(drop_levels = 1L)) + expect_error(query$to_seurat(drop_levels = 'drop')) + expect_error(query$to_seurat(drop_levels = c(TRUE, TRUE))) +}) + test_that("Load Seurat object from sliced ExperimentQuery", { skip_if(!extended_tests() || covr_tests()) skip_if_not_installed('SeuratObject', .MINIMUM_SEURAT_VERSION('c'))