diff --git a/pipeline-runner/R/obj2s-3-upload_obj2s_to_aws.R b/pipeline-runner/R/obj2s-3-upload_obj2s_to_aws.R index 90ce4959..4c2aca55 100644 --- a/pipeline-runner/R/obj2s-3-upload_obj2s_to_aws.R +++ b/pipeline-runner/R/obj2s-3-upload_obj2s_to_aws.R @@ -7,7 +7,7 @@ upload_obj2s_to_aws <- function(input, pipeline_config, prev_out) { scdata <- prev_out$scdata config <- prev_out$config - scdata <- format_seurat(scdata, experiment_id) + scdata <- format_obj2s(scdata, experiment_id) # change sample ids/names so that get sample cell sets input <- add_samples_to_input(scdata, input) @@ -45,8 +45,8 @@ upload_obj2s_to_aws <- function(input, pipeline_config, prev_out) { key = experiment_id) # replicate qc config for simplicity - # could also create a 'seurat_config' column in experiment table and change the ui/api around more - qc_config <- construct_qc_config(list(one = scdata), unfiltered_samples = 'one', technology="seurat") + # could also create a 'obj2s_config' column in experiment table and change the ui/api around more + qc_config <- construct_qc_config(list(one = scdata), unfiltered_samples = 'one', technology = config$input$type) qc_config$configureEmbedding$embeddingSettings$useSaved <- TRUE qc_config$configureEmbedding$embeddingSettings$method <- SeuratObject::DefaultDimReduc(scdata) @@ -247,7 +247,7 @@ find_group_columns <- function(metadata, remove.dups = TRUE) { # add 'cells_id' # 'samples' must be already added # current input$metadata not yet implemented -format_seurat <- function(scdata, experiment_id) { +format_obj2s <- function(scdata, experiment_id) { scdata <- add_samples_col(scdata) scdata$cells_id <- seq_len(ncol(scdata))-1 @@ -261,6 +261,13 @@ format_seurat <- function(scdata, experiment_id) { metadata_cols <- list('percent.mt' = 0, 'doublet_scores' = 0, 'doublet_class' = 'singlet') scdata <- mock_metadata(scdata, metadata_cols) + # need that logcounts and counts have same nrow + common.genes <- intersect(row.names(scdata[['RNA']]$counts), + row.names(scdata[['RNA']]$data)) + + scdata <- scdata[common.genes, ] + scdata@misc$gene_annotations <- scdata@misc$gene_annotations[common.genes, ] + return(scdata) } diff --git a/pipeline-runner/tests/testthat/test-obj2s-3-upload_obj2s_to_aws.R b/pipeline-runner/tests/testthat/test-obj2s-3-upload_obj2s_to_aws.R index c57471a6..69871492 100644 --- a/pipeline-runner/tests/testthat/test-obj2s-3-upload_obj2s_to_aws.R +++ b/pipeline-runner/tests/testthat/test-obj2s-3-upload_obj2s_to_aws.R @@ -1,5 +1,13 @@ mock_scdata <- function() { data("pbmc_small", package = 'SeuratObject') + rns <- row.names(pbmc_small) + pbmc_small@misc$gene_annotations <- data.frame( + input = rns, + name = rns, + original_name = rns, + row.names = rns + ) + return(pbmc_small) } @@ -22,7 +30,7 @@ test_that("upload_obj2s_to_aws completes successfully", { scdata$seurat_clusters <- rep(letters[1:8], length.out = ncol(scdata)) input <- list(experimentId = '1234') - prev_out <- list(scdata = scdata, config = list()) + prev_out <- list(scdata = scdata, config = list(input = list(type = 'seurat_object'))) expect_error(upload_obj2s_to_aws(input, NULL, prev_out), NA) }) @@ -95,10 +103,41 @@ test_that("add_samples_col uses existing 'samples' or 'sample' metadata column", }) -test_that("format_seurat adds requires metadata to a SeuratObject", { +test_that("format_obj2s ensures logcounts and counts have same nrow", { + + + # filter out genes in logcounts + set.seed(0) + scdata_orig <- mock_scdata() + logcount.genes <- sample(row.names(scdata_orig), nrow(scdata_orig)/2) + + scdata_filtered <- Seurat::CreateSeuratObject( + counts = scdata_orig[['RNA']]@counts, + data = scdata_orig[['RNA']]@data[logcount.genes, ] + ) + + scdata_filtered@misc$gene_annotations <- scdata_orig@misc$gene_annotations + + # check that are fewer genes in data + expect_lt(nrow(scdata_filtered[['RNA']]$data), nrow(scdata_filtered[['RNA']]$counts)) + + scdata <- format_obj2s(scdata_filtered, '1234') + + # check that are same genes after formatting + expect_equal(nrow(scdata[['RNA']]$data), nrow(scdata[['RNA']]$counts)) + + # check that row.names are correct + expect_setequal(row.names(scdata), logcount.genes) + + # check that gene_annotations was also corrected + expect_setequal(row.names(scdata@misc$gene_annotations), logcount.genes) + +}) + +test_that("format_obj2s adds required metadata", { scdata <- mock_scdata() - scdata <- format_seurat(scdata, '1234') + scdata <- format_obj2s(scdata, '1234') # added samples expect_true(all(scdata$samples == 'NA')) @@ -108,7 +147,7 @@ test_that("format_seurat adds requires metadata to a SeuratObject", { # added misc expect_equal(scdata@misc$experimentId, '1234') - expect_setequal(names(scdata@misc), c('experimentId', 'color_pool', 'ingestionDate')) + expect_setequal(names(scdata@misc), c('experimentId', 'color_pool', 'ingestionDate', 'gene_annotations')) # added required metadata columns expect_true(all(c('percent.mt', 'doublet_scores', 'doublet_class') %in% colnames(scdata@meta.data)))