Skip to content

Commit

Permalink
Merge pull request #372 from hms-dbmi-cellenics/fix-obj2s-tech
Browse files Browse the repository at this point in the history
use correct technology for qc_config for obj2s
  • Loading branch information
alexvpickering authored Sep 20, 2024
2 parents 0ea1260 + a8c8448 commit 61c3891
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 8 deletions.
15 changes: 11 additions & 4 deletions pipeline-runner/R/obj2s-3-upload_obj2s_to_aws.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ upload_obj2s_to_aws <- function(input, pipeline_config, prev_out) {
scdata <- prev_out$scdata
config <- prev_out$config

scdata <- format_seurat(scdata, experiment_id)
scdata <- format_obj2s(scdata, experiment_id)

# change sample ids/names so that get sample cell sets
input <- add_samples_to_input(scdata, input)
Expand Down Expand Up @@ -45,8 +45,8 @@ upload_obj2s_to_aws <- function(input, pipeline_config, prev_out) {
key = experiment_id)

# replicate qc config for simplicity
# could also create a 'seurat_config' column in experiment table and change the ui/api around more
qc_config <- construct_qc_config(list(one = scdata), unfiltered_samples = 'one', technology="seurat")
# could also create a 'obj2s_config' column in experiment table and change the ui/api around more
qc_config <- construct_qc_config(list(one = scdata), unfiltered_samples = 'one', technology = config$input$type)
qc_config$configureEmbedding$embeddingSettings$useSaved <- TRUE
qc_config$configureEmbedding$embeddingSettings$method <- SeuratObject::DefaultDimReduc(scdata)

Expand Down Expand Up @@ -247,7 +247,7 @@ find_group_columns <- function(metadata, remove.dups = TRUE) {
# add 'cells_id'
# 'samples' must be already added
# current input$metadata not yet implemented
format_seurat <- function(scdata, experiment_id) {
format_obj2s <- function(scdata, experiment_id) {

scdata <- add_samples_col(scdata)
scdata$cells_id <- seq_len(ncol(scdata))-1
Expand All @@ -261,6 +261,13 @@ format_seurat <- function(scdata, experiment_id) {
metadata_cols <- list('percent.mt' = 0, 'doublet_scores' = 0, 'doublet_class' = 'singlet')
scdata <- mock_metadata(scdata, metadata_cols)

# need that logcounts and counts have same nrow
common.genes <- intersect(row.names(scdata[['RNA']]$counts),
row.names(scdata[['RNA']]$data))

scdata <- scdata[common.genes, ]
scdata@misc$gene_annotations <- scdata@misc$gene_annotations[common.genes, ]

return(scdata)
}

Expand Down
47 changes: 43 additions & 4 deletions pipeline-runner/tests/testthat/test-obj2s-3-upload_obj2s_to_aws.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
mock_scdata <- function() {
data("pbmc_small", package = 'SeuratObject')
rns <- row.names(pbmc_small)
pbmc_small@misc$gene_annotations <- data.frame(
input = rns,
name = rns,
original_name = rns,
row.names = rns
)

return(pbmc_small)
}

Expand All @@ -22,7 +30,7 @@ test_that("upload_obj2s_to_aws completes successfully", {
scdata$seurat_clusters <- rep(letters[1:8], length.out = ncol(scdata))

input <- list(experimentId = '1234')
prev_out <- list(scdata = scdata, config = list())
prev_out <- list(scdata = scdata, config = list(input = list(type = 'seurat_object')))

expect_error(upload_obj2s_to_aws(input, NULL, prev_out), NA)
})
Expand Down Expand Up @@ -95,10 +103,41 @@ test_that("add_samples_col uses existing 'samples' or 'sample' metadata column",
})


test_that("format_seurat adds requires metadata to a SeuratObject", {
test_that("format_obj2s ensures logcounts and counts have same nrow", {


# filter out genes in logcounts
set.seed(0)
scdata_orig <- mock_scdata()
logcount.genes <- sample(row.names(scdata_orig), nrow(scdata_orig)/2)

scdata_filtered <- Seurat::CreateSeuratObject(
counts = scdata_orig[['RNA']]@counts,
data = scdata_orig[['RNA']]@data[logcount.genes, ]
)

scdata_filtered@misc$gene_annotations <- scdata_orig@misc$gene_annotations

# check that are fewer genes in data
expect_lt(nrow(scdata_filtered[['RNA']]$data), nrow(scdata_filtered[['RNA']]$counts))

scdata <- format_obj2s(scdata_filtered, '1234')

# check that are same genes after formatting
expect_equal(nrow(scdata[['RNA']]$data), nrow(scdata[['RNA']]$counts))

# check that row.names are correct
expect_setequal(row.names(scdata), logcount.genes)

# check that gene_annotations was also corrected
expect_setequal(row.names(scdata@misc$gene_annotations), logcount.genes)

})

test_that("format_obj2s adds required metadata", {

scdata <- mock_scdata()
scdata <- format_seurat(scdata, '1234')
scdata <- format_obj2s(scdata, '1234')

# added samples
expect_true(all(scdata$samples == 'NA'))
Expand All @@ -108,7 +147,7 @@ test_that("format_seurat adds requires metadata to a SeuratObject", {

# added misc
expect_equal(scdata@misc$experimentId, '1234')
expect_setequal(names(scdata@misc), c('experimentId', 'color_pool', 'ingestionDate'))
expect_setequal(names(scdata@misc), c('experimentId', 'color_pool', 'ingestionDate', 'gene_annotations'))

# added required metadata columns
expect_true(all(c('percent.mt', 'doublet_scores', 'doublet_class') %in% colnames(scdata@meta.data)))
Expand Down

0 comments on commit 61c3891

Please sign in to comment.