Merge pull request #172 from biomage-ltd/test-prepare-experiment

Test prepare_experiment
hms-dbmi-cellenics · Oct 8, 2021 · fc9c079 · fc9c079
2 parents 1c02f3c + b7b2971
commit fc9c079
Show file tree

Hide file tree

Showing 2 changed files with 229 additions and 0 deletions.
diff --git a/pipeline-runner/tests/testthat/_snaps/gem2s-6-prepare_experiment.md b/pipeline-runner/tests/testthat/_snaps/gem2s-6-prepare_experiment.md
@@ -0,0 +1,128 @@
+# prepare_experiment generates qc_config that matches snapshot
+
+    Code
+      str(task_out$qc_config)
+    Output
+      List of 7
+       $ cellSizeDistribution:List of 4
+        ..$ enabled       : logi FALSE
+        ..$ auto          : logi TRUE
+        ..$ filterSettings:List of 2
+        .. ..$ minCellSize: num 1080
+        .. ..$ binStep    : num 200
+        ..$ sample_a      :List of 4
+        .. ..$ enabled              : logi FALSE
+        .. ..$ auto                 : logi TRUE
+        .. ..$ filterSettings       :List of 2
+        .. .. ..$ minCellSize: num 10
+        .. .. ..$ binStep    : num 200
+        .. ..$ defaultFilterSettings:List of 2
+        .. .. ..$ minCellSize: num 10
+        .. .. ..$ binStep    : num 200
+       $ mitochondrialContent:List of 4
+        ..$ enabled       : logi TRUE
+        ..$ auto          : logi TRUE
+        ..$ filterSettings:List of 2
+        .. ..$ method        : chr "absolute_threshold"
+        .. ..$ methodSettings:List of 1
+        .. .. ..$ absolute_threshold:List of 2
+        .. .. .. ..$ maxFraction: num 0.1
+        .. .. .. ..$ binStep    : num 0.05
+        ..$ sample_a      :List of 3
+        .. ..$ auto                 : logi TRUE
+        .. ..$ filterSettings       :List of 2
+        .. .. ..$ method        : chr "absolute_threshold"
+        .. .. ..$ methodSettings:List of 1
+        .. .. .. ..$ absolute_threshold:List of 2
+        .. .. .. .. ..$ maxFraction: num 0.1
+        .. .. .. .. ..$ binStep    : num 0.05
+        .. ..$ defaultFilterSettings:List of 2
+        .. .. ..$ method        : chr "absolute_threshold"
+        .. .. ..$ methodSettings:List of 1
+        .. .. .. ..$ absolute_threshold:List of 2
+        .. .. .. .. ..$ maxFraction: num 0.1
+        .. .. .. .. ..$ binStep    : num 0.05
+       $ classifier          :List of 4
+        ..$ enabled       : logi TRUE
+        ..$ auto          : logi TRUE
+        ..$ filterSettings:List of 1
+        .. ..$ FDR: num 0.01
+        ..$ sample_a      :List of 4
+        .. ..$ enabled              : logi TRUE
+        .. ..$ auto                 : logi TRUE
+        .. ..$ filterSettings       :List of 1
+        .. .. ..$ FDR: num 0.01
+        .. ..$ defaultFilterSettings:List of 1
+        .. .. ..$ FDR: num 0.01
+       $ numGenesVsNumUmis   :List of 4
+        ..$ enabled       : logi TRUE
+        ..$ auto          : logi TRUE
+        ..$ filterSettings:List of 2
+        .. ..$ regressionType        : chr "gam"
+        .. ..$ regressionTypeSettings:List of 1
+        .. .. ..$ gam:List of 1
+        .. .. .. ..$ p.level: num 0.001
+        ..$ sample_a      :List of 4
+        .. ..$ enabled              : logi TRUE
+        .. ..$ auto                 : logi TRUE
+        .. ..$ filterSettings       :List of 2
+        .. .. ..$ regressionType        : chr "gam"
+        .. .. ..$ regressionTypeSettings:List of 1
+        .. .. .. ..$ gam:List of 1
+        .. .. .. .. ..$ p.level: num 0.00013
+        .. ..$ defaultFilterSettings:List of 2
+        .. .. ..$ regressionType        : chr "gam"
+        .. .. ..$ regressionTypeSettings:List of 1
+        .. .. .. ..$ gam:List of 1
+        .. .. .. .. ..$ p.level: num 0.00013
+       $ doubletScores       :List of 4
+        ..$ enabled       : logi TRUE
+        ..$ auto          : logi TRUE
+        ..$ filterSettings:List of 2
+        .. ..$ probabilityThreshold: num 0.5
+        .. ..$ binStep             : num 0.05
+        ..$ sample_a      :List of 4
+        .. ..$ enabled              : logi TRUE
+        .. ..$ auto                 : logi TRUE
+        .. ..$ filterSettings       :List of 2
+        .. .. ..$ probabilityThreshold: num 0.8
+        .. .. ..$ binStep             : num 0.05
+        .. ..$ defaultFilterSettings:List of 2
+        .. .. ..$ probabilityThreshold: num 0.8
+        .. .. ..$ binStep             : num 0.05
+       $ dataIntegration     :List of 2
+        ..$ dataIntegration        :List of 2
+        .. ..$ method        : chr "harmony"
+        .. ..$ methodSettings:List of 4
+        .. .. ..$ seuratv4 :List of 2
+        .. .. .. ..$ numGenes     : num 2000
+        .. .. .. ..$ normalisation: chr "logNormalize"
+        .. .. ..$ unisample:List of 2
+        .. .. .. ..$ numGenes     : num 2000
+        .. .. .. ..$ normalisation: chr "logNormalize"
+        .. .. ..$ harmony  :List of 2
+        .. .. .. ..$ numGenes     : num 2000
+        .. .. .. ..$ normalisation: chr "logNormalize"
+        .. .. ..$ fastmnn  :List of 2
+        .. .. .. ..$ numGenes     : num 2000
+        .. .. .. ..$ normalisation: chr "logNormalize"
+        ..$ dimensionalityReduction:List of 3
+        .. ..$ method               : chr "rpca"
+        .. ..$ numPCs               : num 30
+        .. ..$ excludeGeneCategories: list()
+       $ configureEmbedding  :List of 2
+        ..$ embeddingSettings :List of 2
+        .. ..$ method        : chr "umap"
+        .. ..$ methodSettings:List of 2
+        .. .. ..$ umap:List of 2
+        .. .. .. ..$ minimumDistance: num 0.3
+        .. .. .. ..$ distanceMetric : chr "cosine"
+        .. .. ..$ tsne:List of 2
+        .. .. .. ..$ perplexity  : num 30
+        .. .. .. ..$ learningRate: num 640
+        ..$ clusteringSettings:List of 2
+        .. ..$ method        : chr "louvain"
+        .. ..$ methodSettings:List of 1
+        .. .. ..$ louvain:List of 1
+        .. .. .. ..$ resolution: num 0.8
+
diff --git a/pipeline-runner/tests/testthat/test-gem2s-6-prepare_experiment.R b/pipeline-runner/tests/testthat/test-gem2s-6-prepare_experiment.R
@@ -0,0 +1,101 @@
+mock_counts <- function() {
+    read.table(
+        file = system.file("extdata", "pbmc_raw.txt", package = "Seurat"),
+        as.is = TRUE
+    )
+}
+
+mock_doublet_scores <- function(counts) {
+
+    doublet_scores <- runif(ncol(counts))
+    doublet_class <- ifelse(doublet_scores < 0.8, 'singlet', 'doublet')
+
+    data.frame(
+        row.names = colnames(counts),
+        barcodes = colnames(counts),
+        doublet_class = doublet_class,
+        doublet_scores = doublet_scores
+    )
+}
+
+mock_prev_out <- function(samples = 'sample_a', counts = NULL) {
+
+    if (is.null(counts)) {
+        counts <- DropletUtils:::simCounts()
+        colnames(counts) <- paste0('cell', seq_len(ncol(counts)))
+    }
+
+    eout <- DropletUtils::emptyDrops(counts)
+
+    counts_list <- list()
+    edrops <- list()
+    doublet_scores <- list()
+
+    for (sample in samples) {
+        counts_list[[sample]] <- counts
+        edrops[[sample]] <- eout
+        doublet_scores[[sample]] <- mock_doublet_scores(counts)
+    }
+
+    # as passed to create_seurat
+    prev_out <- list(
+        counts_list = counts_list,
+        edrops = edrops,
+        doublet_scores = doublet_scores,
+        annot = data.frame(name = row.names(counts), input = row.names(counts)),
+        config = list(name = 'project name')
+    )
+
+    # call create_seurat to get prev_out to pass to prepare_experiment
+    create_seurat(NULL, NULL, prev_out)$output
+}
+
+
+
+test_that("prepare_experiment merges multiple SeuratObjects", {
+  prev_out <- mock_prev_out(samples = c('a', 'b', 'c'))
+  scdata_list <- prev_out$scdata_list
+
+  task_out <- expect_warning(prepare_experiment(NULL, NULL, prev_out)$output)
+
+  scdata <- task_out$scdata
+
+  expect_equal(ncol(scdata), sum(sapply(scdata_list, ncol)))
+})
+
+
+test_that("prepare_experiment ensures gene_annotations are indexed the same as scdata", {
+    prev_out <- mock_prev_out()
+
+    # shuffle gene order of annot
+    annot <- prev_out$annot
+    prev_out$annot <- annot[sample(nrow(annot)), ]
+
+    scdata <- prepare_experiment(NULL, NULL, prev_out)$output$scdata
+
+    expect_equal(row.names(scdata), scdata@misc$gene_annotations$input)
+})
+
+
+test_that("prepare_experiment adds 0 indexed cell_ids and other metadata to scdata", {
+    prev_out <- mock_prev_out()
+    input <- list(experimentId = '1234')
+    scdata <- prepare_experiment(input, NULL, prev_out)$output$scdata
+
+    added_to_misc <- c('gene_annotations', 'color_pool', 'experimentId', 'ingestionDate')
+    expect_true(all(added_to_misc %in% names(scdata@misc)))
+
+    added_ids <- unname(scdata$cells_id)
+    expected_ids <- seq(0, ncol(scdata)-1)
+    expect_equal(added_ids, expected_ids)
+})
+
+
+test_that("prepare_experiment generates qc_config that matches snapshot", {
+  prev_out <- mock_prev_out()
+  input <- list(experimentId = '1234')
+  task_out <- prepare_experiment(input, NULL, prev_out)$output
+
+  expect_snapshot(str(task_out$qc_config))
+
+})