diff --git a/pipeline-runner/R/gem2s-6-construct_qc_config.R b/pipeline-runner/R/gem2s-6-construct_qc_config.R index 1454c866..2d694d66 100644 --- a/pipeline-runner/R/gem2s-6-construct_qc_config.R +++ b/pipeline-runner/R/gem2s-6-construct_qc_config.R @@ -12,9 +12,8 @@ #' #' @return list of QC configuration parameters #' -construct_qc_config <- function(scdata_list, unfiltered_samples) { +construct_qc_config <- function(scdata_list, unfiltered_samples, technology) { samples <- names(scdata_list) - config_classifier <- add_custom_config_per_sample( customize_classifier_config, @@ -42,6 +41,7 @@ construct_qc_config <- function(scdata_list, unfiltered_samples) { customize_genes_vs_umis_config, processing_config_template[["genes_vs_umis"]], scdata_list, + technology = technology ) @@ -77,7 +77,8 @@ customize_classifier_config <- function(scdata, config, sample_name, - unfiltered_samples) { + unfiltered_samples, + technology) { config$enabled <- sample_name %in% unfiltered_samples config$prefiltered <- !(sample_name %in% unfiltered_samples) @@ -89,7 +90,8 @@ customize_cellsize_config <- function(scdata, config, sample_name, - unfiltered_samples) { + unfiltered_samples, + technology) { minCellSize <- generate_default_values_cellSizeDistribution(scdata, config) config$filterSettings$minCellSize <- minCellSize return(config) @@ -100,7 +102,8 @@ customize_mitochondrial_config <- function(scdata, config, sample_name, - unfiltered_samples) { + unfiltered_samples, + technology) { default_max_fraction <- generate_default_values_mitochondrialContent(scdata, config) config$filterSettings$methodSettings$absoluteThreshold$maxFraction <- default_max_fraction @@ -113,23 +116,27 @@ customize_doublet_config <- function(scdata, config, sample_name, - unfiltered_samples) { + unfiltered_samples, + technology) { probabilityThreshold <- generate_default_values_doubletScores(scdata) config$filterSettings$probabilityThreshold <- probabilityThreshold return(config) } - customize_genes_vs_umis_config <- function(scdata, config, sample_name, - unfiltered_samples) { + unfiltered_samples, + technology) { # Sensible values are based on the function "gene.vs.molecule.cell.filter" # from the pagoda2 package p.level <- min(0.001, 1 / ncol(scdata)) - regression_type <- config$filterSettings$regressionType + + regression_type <- ifelse( technology == "parse" , "spline" , config$filterSettings$regressionType) + + config$filterSettings$regressionType <- regression_type config$filterSettings$regressionTypeSettings[[regression_type]]$p.level <- p.level return(config) @@ -175,7 +182,8 @@ add_custom_config_per_sample <- function(customize_template_config, config_template, scdata_list, - unfiltered_samples = NA) { + unfiltered_samples = NA, + technology = NA) { config <- list() for (sample_name in names(scdata_list)) { # subset the Seurat object list to a single sample @@ -187,7 +195,8 @@ add_custom_config_per_sample <- sample_scdata, config_template, sample_name, - unfiltered_samples + unfiltered_samples, + technology ) # update sample config thresholds diff --git a/pipeline-runner/R/gem2s-6-prepare_experiment.R b/pipeline-runner/R/gem2s-6-prepare_experiment.R index 4575669a..8160c0db 100644 --- a/pipeline-runner/R/gem2s-6-prepare_experiment.R +++ b/pipeline-runner/R/gem2s-6-prepare_experiment.R @@ -34,7 +34,8 @@ prepare_experiment <- function(input, pipeline_config, prev_out) { # construct default QC config and update prev out message("Constructing default QC configuration...") unfiltered_samples <- names(prev_out$edrops[!is.null(prev_out$edrops)]) - prev_out$default_qc_config <- construct_qc_config(scdata_list, unfiltered_samples) + + prev_out$default_qc_config <- construct_qc_config(scdata_list, unfiltered_samples, input$input$type) # If we received a qc_config (subset pipeline case) then # we want to set that one as the custom config diff --git a/pipeline-runner/R/seurat-3-upload_seurat_to_aws.R b/pipeline-runner/R/seurat-3-upload_seurat_to_aws.R index 1e4c14d7..f9428424 100644 --- a/pipeline-runner/R/seurat-3-upload_seurat_to_aws.R +++ b/pipeline-runner/R/seurat-3-upload_seurat_to_aws.R @@ -46,7 +46,7 @@ upload_seurat_to_aws <- function(input, pipeline_config, prev_out) { # replicate qc config for simplicity # could also create a 'seurat_config' column in experiment table and change the ui/api around more - qc_config <- construct_qc_config(list(one = scdata), unfiltered_samples = 'one') + qc_config <- construct_qc_config(list(one = scdata), unfiltered_samples = 'one', technology="seurat") qc_config$configureEmbedding$embeddingSettings$useSaved <- TRUE qc_config$configureEmbedding$embeddingSettings$method <- SeuratObject::DefaultDimReduc(scdata) diff --git a/pipeline-runner/tests/testthat/test-gem2s-6-construct_qc_config.R b/pipeline-runner/tests/testthat/test-gem2s-6-construct_qc_config.R index 4c419bf7..e7528e61 100644 --- a/pipeline-runner/tests/testthat/test-gem2s-6-construct_qc_config.R +++ b/pipeline-runner/tests/testthat/test-gem2s-6-construct_qc_config.R @@ -27,7 +27,7 @@ mock_scdata_list <- function() { test_that("cellsize filter is disabled by default and classifier is pre-filtered", { scdata_list <- mock_scdata_list() unfiltered_samples <- c("123abc") - qc_config <- construct_qc_config(scdata_list, unfiltered_samples = unfiltered_samples) + qc_config <- construct_qc_config(scdata_list, unfiltered_samples = unfiltered_samples, technology = "10X") for (sample in names(scdata_list)) { if (sample %in% unfiltered_samples) { @@ -46,7 +46,7 @@ test_that("cellsize filter is disabled by default and classifier is pre-filtered test_that("cellsize filter is disabled by default and classifier is not pre-filtered", { scdata_list <- mock_scdata_list() unfiltered_samples <- c() - qc_config <- construct_qc_config(scdata_list, unfiltered_samples = unfiltered_samples) + qc_config <- construct_qc_config(scdata_list, unfiltered_samples = unfiltered_samples, technology = "10X") for (sample in names(scdata_list)) { expect_false(qc_config$cellSizeDistribution[[sample]]$enabled) @@ -63,7 +63,7 @@ test_that("cellsize filter is disabled by default and classifier is not pre-filt test_that("customize_doublet_config sets threshold to 0 when there are no singlets", { scdata_list <- mock_scdata_list() unfiltered_samples <- c("123abc") - qc_config <- construct_qc_config(scdata_list, unfiltered_samples = unfiltered_samples) + qc_config <- construct_qc_config(scdata_list, unfiltered_samples = unfiltered_samples, technology = "10X") for (sample in names(scdata_list)) { scdata_list[[sample]]$doublet_class <- "doublet" @@ -76,7 +76,7 @@ test_that("customize_doublet_config sets threshold to 0 when there are no single test_that("classifier filter config is enabled for unfiltered samples and disabled for pre-filtered samples", { scdata_list <- mock_scdata_list() unfiltered_samples <- c("123abc") - qc_config <- construct_qc_config(scdata_list, unfiltered_samples = unfiltered_samples) + qc_config <- construct_qc_config(scdata_list, unfiltered_samples = unfiltered_samples, technology = "10X") for (sample in names(scdata_list)) { if (sample %in% unfiltered_samples) { @@ -88,3 +88,23 @@ test_that("classifier filter config is enabled for unfiltered samples and disabl } } }) + +test_that("NumGenesVsUmis filter config has spline as default for Parse Datasets", { + scdata_list <- mock_scdata_list() + unfiltered_samples <- c("123abc") + qc_config <- construct_qc_config(scdata_list, unfiltered_samples = unfiltered_samples, "parse") + + for (sample in names(scdata_list)) { + expect_true(qc_config$numGenesVsNumUmis[[sample]]$filterSettings$regressionType == "spline") + } +}) + +test_that("NumGenesVsUmis filter config has linear as default for 10x datasets", { + scdata_list <- mock_scdata_list() + unfiltered_samples <- c("123abc") + qc_config <- construct_qc_config(scdata_list, unfiltered_samples = unfiltered_samples, "10X") + + for (sample in names(scdata_list)) { + expect_true(qc_config$numGenesVsNumUmis[[sample]]$filterSettings$regressionType == "linear") + } +}) diff --git a/pipeline-runner/tests/testthat/test-gem2s-6-prepare_experiment.R b/pipeline-runner/tests/testthat/test-gem2s-6-prepare_experiment.R index 0eb9b49f..ddf7894c 100644 --- a/pipeline-runner/tests/testthat/test-gem2s-6-prepare_experiment.R +++ b/pipeline-runner/tests/testthat/test-gem2s-6-prepare_experiment.R @@ -55,12 +55,17 @@ mock_prev_out <- function(samples = "sample_a", counts = NULL, prev_out_config = create_seurat(NULL, NULL, prev_out)$output } +mock_input <- function(){ + input <- list(input = list(type="10X"), experimentId = "1234") +} test_that("prepare_experiment ensures gene_annotations are indexed correctly for each sample", { samples <- c("a", "b", "c") prev_out <- mock_prev_out(samples = samples) + input <- mock_input() + # remove some genes from each sample prev_out$counts_list$a <- prev_out$counts_list$a[-c(1:9), ] prev_out$counts_list$b <- prev_out$counts_list$b[-c(21:30), ] @@ -68,7 +73,7 @@ test_that("prepare_experiment ensures gene_annotations are indexed correctly for # re-create seurat object prev_out <- create_seurat(NULL, NULL, prev_out)$output - scdata_list <- prepare_experiment(NULL, NULL, prev_out)$output$scdata + scdata_list <- prepare_experiment(input, NULL, prev_out)$output$scdata # we expect that the input in gene_annotations is the same as the rownames of # each sample seurat object @@ -142,7 +147,7 @@ test_that("add_metadata_to_samples generated cell ids do not depend on sample or test_that("prepare_experiment generates qc_config that matches snapshot", { prev_out <- mock_prev_out() - input <- list(experimentId = "1234") + input <- mock_input() task_out <- prepare_experiment(input, NULL, prev_out)$output expect_snapshot(str(task_out$qc_config)) @@ -154,8 +159,9 @@ test_that("prepare_experiment creates a list of valid Seurat objects", { samples <- c("a", "b", "c") prev_out <- mock_prev_out(samples ) scdata_list <- prev_out$scdata_list + input <- mock_input() - task_out <- prepare_experiment(NULL, NULL, prev_out)$output + task_out <- prepare_experiment(input, NULL, prev_out)$output scdata_list <- task_out$scdata_list @@ -175,7 +181,9 @@ test_that("prepare_experiment properly populates the misc slot", { prev_out <- mock_prev_out(samples ) scdata_list <- prev_out$scdata_list - task_out <- prepare_experiment(NULL, NULL, prev_out)$output + input <- mock_input() + + task_out <- prepare_experiment(input, NULL, prev_out)$output scdata_list <- task_out$scdata_list for (sample in samples) { @@ -197,7 +205,9 @@ test_that("prepare_experiment properly populates the metadata slot", { prev_out <- mock_prev_out(samples) scdata_list <- prev_out$scdata_list - task_out <- prepare_experiment(NULL, NULL, prev_out)$output + input <- mock_input() + + task_out <- prepare_experiment(input, NULL, prev_out)$output scdata_list <- task_out$scdata_list for (sample in samples) { @@ -230,7 +240,9 @@ test_that("Mitochondrial percentage is correct", { prev_out <- mock_prev_out(samples) scdata_list <- prev_out$scdata_list - task_out <- prepare_experiment(NULL, NULL, prev_out)$output + input <- mock_input() + + task_out <- prepare_experiment(input, NULL, prev_out)$output scdata_list <- task_out$scdata_list for (sample in samples) { @@ -254,9 +266,11 @@ test_that("Skips qc config creation if it is already created in prev_out", { samples <- c("a", "b", "c") prev_out <- mock_prev_out(samples = samples, prev_out_config = c('mocked')) + input <- mock_input() + # re-create seurat object prev_out <- create_seurat(NULL, NULL, prev_out)$output - scdata_list <- prepare_experiment(NULL, NULL, prev_out) + scdata_list <- prepare_experiment(input, NULL, prev_out) expect_true(prev_out$qc_config == c('mocked')) }) diff --git a/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R b/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R index 5b205228..b80fbdeb 100644 --- a/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R +++ b/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R @@ -15,7 +15,9 @@ mock_scdata_list <- function(config) { prev_out <- mock_prev_out(config) scdata_list <- prev_out$scdata_list - task_out <- prepare_experiment(NULL, NULL, prev_out)$output + input <- mock_input() + + task_out <- prepare_experiment(input, NULL, prev_out)$output scdata_list <- task_out$scdata_list } @@ -26,7 +28,8 @@ mock_input <- function(metadata = NULL) { sampleIds = list("123abc", "123def", "123ghi"), metadata = metadata, experimentId = "mock_experiment_id", - projectId = "mock_experiment_id" + projectId = "mock_experiment_id", + input = list( type= "10x") ) return(input) diff --git a/pipeline-runner/tests/testthat/test-subset-1-subset_seurat.R b/pipeline-runner/tests/testthat/test-subset-1-subset_seurat.R index 438e9719..e04d6ab9 100644 --- a/pipeline-runner/tests/testthat/test-subset-1-subset_seurat.R +++ b/pipeline-runner/tests/testthat/test-subset-1-subset_seurat.R @@ -28,7 +28,7 @@ mock_scdata_list <- function(samples = rep("mock_sample_1_id", 80)) { } mock_input <- function(parent_experiment_id, cellset_keys, samples = rep("mock_sample_1_id", 80), sample_ids = c("mock_sample_1_id")) { - parentProcessingConfig <- construct_qc_config(mock_scdata_list(samples), unfiltered_samples = sample_ids) + parentProcessingConfig <- construct_qc_config(mock_scdata_list(samples), unfiltered_samples = sample_ids, technology = "10x") list( parentExperimentId = parent_experiment_id, @@ -190,7 +190,7 @@ test_that("generate_subset_config works correctly", { parent_sample_ids <- c("sample-id-1", "sample-id-2", "sample-id-3", "sample-id-4") scdata_list <- mock_scdata_list(samples = rep(parent_sample_ids, 20)) - parent_processing_config <- construct_qc_config(scdata_list, unfiltered_samples = parent_sample_ids) + parent_processing_config <- construct_qc_config(scdata_list, unfiltered_samples = parent_sample_ids, technology = "10x") # Make some of the configs unique to each sample # so we can check that the translation preserves the configs