diff --git a/pipeline-runner/tests/testthat/_snaps/gem2s-6-prepare_experiment.md b/pipeline-runner/tests/testthat/_snaps/gem2s-6-prepare_experiment.md new file mode 100644 index 00000000..e01a1a0d --- /dev/null +++ b/pipeline-runner/tests/testthat/_snaps/gem2s-6-prepare_experiment.md @@ -0,0 +1,128 @@ +# prepare_experiment generates qc_config that matches snapshot + + Code + str(task_out$qc_config) + Output + List of 7 + $ cellSizeDistribution:List of 4 + ..$ enabled : logi FALSE + ..$ auto : logi TRUE + ..$ filterSettings:List of 2 + .. ..$ minCellSize: num 1080 + .. ..$ binStep : num 200 + ..$ sample_a :List of 4 + .. ..$ enabled : logi FALSE + .. ..$ auto : logi TRUE + .. ..$ filterSettings :List of 2 + .. .. ..$ minCellSize: num 10 + .. .. ..$ binStep : num 200 + .. ..$ defaultFilterSettings:List of 2 + .. .. ..$ minCellSize: num 10 + .. .. ..$ binStep : num 200 + $ mitochondrialContent:List of 4 + ..$ enabled : logi TRUE + ..$ auto : logi TRUE + ..$ filterSettings:List of 2 + .. ..$ method : chr "absolute_threshold" + .. ..$ methodSettings:List of 1 + .. .. ..$ absolute_threshold:List of 2 + .. .. .. ..$ maxFraction: num 0.1 + .. .. .. ..$ binStep : num 0.05 + ..$ sample_a :List of 3 + .. ..$ auto : logi TRUE + .. ..$ filterSettings :List of 2 + .. .. ..$ method : chr "absolute_threshold" + .. .. ..$ methodSettings:List of 1 + .. .. .. ..$ absolute_threshold:List of 2 + .. .. .. .. ..$ maxFraction: num 0.1 + .. .. .. .. ..$ binStep : num 0.05 + .. ..$ defaultFilterSettings:List of 2 + .. .. ..$ method : chr "absolute_threshold" + .. .. ..$ methodSettings:List of 1 + .. .. .. ..$ absolute_threshold:List of 2 + .. .. .. .. ..$ maxFraction: num 0.1 + .. .. .. .. ..$ binStep : num 0.05 + $ classifier :List of 4 + ..$ enabled : logi TRUE + ..$ auto : logi TRUE + ..$ filterSettings:List of 1 + .. ..$ FDR: num 0.01 + ..$ sample_a :List of 4 + .. ..$ enabled : logi TRUE + .. ..$ auto : logi TRUE + .. ..$ filterSettings :List of 1 + .. .. ..$ FDR: num 0.01 + .. ..$ defaultFilterSettings:List of 1 + .. .. ..$ FDR: num 0.01 + $ numGenesVsNumUmis :List of 4 + ..$ enabled : logi TRUE + ..$ auto : logi TRUE + ..$ filterSettings:List of 2 + .. ..$ regressionType : chr "gam" + .. ..$ regressionTypeSettings:List of 1 + .. .. ..$ gam:List of 1 + .. .. .. ..$ p.level: num 0.001 + ..$ sample_a :List of 4 + .. ..$ enabled : logi TRUE + .. ..$ auto : logi TRUE + .. ..$ filterSettings :List of 2 + .. .. ..$ regressionType : chr "gam" + .. .. ..$ regressionTypeSettings:List of 1 + .. .. .. ..$ gam:List of 1 + .. .. .. .. ..$ p.level: num 0.00013 + .. ..$ defaultFilterSettings:List of 2 + .. .. ..$ regressionType : chr "gam" + .. .. ..$ regressionTypeSettings:List of 1 + .. .. .. ..$ gam:List of 1 + .. .. .. .. ..$ p.level: num 0.00013 + $ doubletScores :List of 4 + ..$ enabled : logi TRUE + ..$ auto : logi TRUE + ..$ filterSettings:List of 2 + .. ..$ probabilityThreshold: num 0.5 + .. ..$ binStep : num 0.05 + ..$ sample_a :List of 4 + .. ..$ enabled : logi TRUE + .. ..$ auto : logi TRUE + .. ..$ filterSettings :List of 2 + .. .. ..$ probabilityThreshold: num 0.8 + .. .. ..$ binStep : num 0.05 + .. ..$ defaultFilterSettings:List of 2 + .. .. ..$ probabilityThreshold: num 0.8 + .. .. ..$ binStep : num 0.05 + $ dataIntegration :List of 2 + ..$ dataIntegration :List of 2 + .. ..$ method : chr "harmony" + .. ..$ methodSettings:List of 4 + .. .. ..$ seuratv4 :List of 2 + .. .. .. ..$ numGenes : num 2000 + .. .. .. ..$ normalisation: chr "logNormalize" + .. .. ..$ unisample:List of 2 + .. .. .. ..$ numGenes : num 2000 + .. .. .. ..$ normalisation: chr "logNormalize" + .. .. ..$ harmony :List of 2 + .. .. .. ..$ numGenes : num 2000 + .. .. .. ..$ normalisation: chr "logNormalize" + .. .. ..$ fastmnn :List of 2 + .. .. .. ..$ numGenes : num 2000 + .. .. .. ..$ normalisation: chr "logNormalize" + ..$ dimensionalityReduction:List of 3 + .. ..$ method : chr "rpca" + .. ..$ numPCs : num 30 + .. ..$ excludeGeneCategories: list() + $ configureEmbedding :List of 2 + ..$ embeddingSettings :List of 2 + .. ..$ method : chr "umap" + .. ..$ methodSettings:List of 2 + .. .. ..$ umap:List of 2 + .. .. .. ..$ minimumDistance: num 0.3 + .. .. .. ..$ distanceMetric : chr "cosine" + .. .. ..$ tsne:List of 2 + .. .. .. ..$ perplexity : num 30 + .. .. .. ..$ learningRate: num 640 + ..$ clusteringSettings:List of 2 + .. ..$ method : chr "louvain" + .. ..$ methodSettings:List of 1 + .. .. ..$ louvain:List of 1 + .. .. .. ..$ resolution: num 0.8 + diff --git a/pipeline-runner/tests/testthat/test-gem2s-6-prepare_experiment.R b/pipeline-runner/tests/testthat/test-gem2s-6-prepare_experiment.R new file mode 100644 index 00000000..89b7cc8b --- /dev/null +++ b/pipeline-runner/tests/testthat/test-gem2s-6-prepare_experiment.R @@ -0,0 +1,101 @@ +mock_counts <- function() { + read.table( + file = system.file("extdata", "pbmc_raw.txt", package = "Seurat"), + as.is = TRUE + ) +} + +mock_doublet_scores <- function(counts) { + + doublet_scores <- runif(ncol(counts)) + doublet_class <- ifelse(doublet_scores < 0.8, 'singlet', 'doublet') + + data.frame( + row.names = colnames(counts), + barcodes = colnames(counts), + doublet_class = doublet_class, + doublet_scores = doublet_scores + ) +} + +mock_prev_out <- function(samples = 'sample_a', counts = NULL) { + + if (is.null(counts)) { + counts <- DropletUtils:::simCounts() + colnames(counts) <- paste0('cell', seq_len(ncol(counts))) + } + + eout <- DropletUtils::emptyDrops(counts) + + counts_list <- list() + edrops <- list() + doublet_scores <- list() + + for (sample in samples) { + counts_list[[sample]] <- counts + edrops[[sample]] <- eout + doublet_scores[[sample]] <- mock_doublet_scores(counts) + } + + # as passed to create_seurat + prev_out <- list( + counts_list = counts_list, + edrops = edrops, + doublet_scores = doublet_scores, + annot = data.frame(name = row.names(counts), input = row.names(counts)), + config = list(name = 'project name') + ) + + # call create_seurat to get prev_out to pass to prepare_experiment + create_seurat(NULL, NULL, prev_out)$output +} + + + +test_that("prepare_experiment merges multiple SeuratObjects", { + prev_out <- mock_prev_out(samples = c('a', 'b', 'c')) + scdata_list <- prev_out$scdata_list + + task_out <- expect_warning(prepare_experiment(NULL, NULL, prev_out)$output) + + scdata <- task_out$scdata + + expect_equal(ncol(scdata), sum(sapply(scdata_list, ncol))) +}) + + +test_that("prepare_experiment ensures gene_annotations are indexed the same as scdata", { + prev_out <- mock_prev_out() + + # shuffle gene order of annot + annot <- prev_out$annot + prev_out$annot <- annot[sample(nrow(annot)), ] + + scdata <- prepare_experiment(NULL, NULL, prev_out)$output$scdata + + expect_equal(row.names(scdata), scdata@misc$gene_annotations$input) +}) + + +test_that("prepare_experiment adds 0 indexed cell_ids and other metadata to scdata", { + prev_out <- mock_prev_out() + input <- list(experimentId = '1234') + scdata <- prepare_experiment(input, NULL, prev_out)$output$scdata + + added_to_misc <- c('gene_annotations', 'color_pool', 'experimentId', 'ingestionDate') + expect_true(all(added_to_misc %in% names(scdata@misc))) + + added_ids <- unname(scdata$cells_id) + expected_ids <- seq(0, ncol(scdata)-1) + expect_equal(added_ids, expected_ids) +}) + + +test_that("prepare_experiment generates qc_config that matches snapshot", { + prev_out <- mock_prev_out() + input <- list(experimentId = '1234') + task_out <- prepare_experiment(input, NULL, prev_out)$output + + expect_snapshot(str(task_out$qc_config)) + +})