From 4d4923afece03e1bc850aacb9eb1fdcd9a4835d1 Mon Sep 17 00:00:00 2001 From: Alex Pickering Date: Mon, 27 Sep 2021 11:18:12 -0700 Subject: [PATCH 01/17] test that sample number and metadata length are the same --- pipeline-runner/init.R | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pipeline-runner/init.R b/pipeline-runner/init.R index 17cac928..3595c0ec 100644 --- a/pipeline-runner/init.R +++ b/pipeline-runner/init.R @@ -184,11 +184,25 @@ run_gem2s_step <- function(task_name, input, pipeline_config, prev_out) { return(res) } +check_input <- function(input) { + + # check that metadata items length is same as number of samples + metadata <- input$metadata + + if (length(metadata)) { + nsamples <- length(input$sampleNames) + nmeta <- sapply(metadata, length) + if (!all(nmeta == nsamples)) stop ('Sample number and metadata length') + } +} + call_gem2s <- function(task_name, input, pipeline_config) { experiment_id <- input$experimentId if (!exists("prev_out")) assign("prev_out", NULL, pos = ".GlobalEnv") + check_input(input) + c(data, task_out) %<-% run_gem2s_step(task_name, input, pipeline_config, prev_out) assign("prev_out", task_out, pos = ".GlobalEnv") From 6dc8e04f985308734f0d6031e68dd1ae010632df Mon Sep 17 00:00:00 2001 From: Alex Pickering Date: Mon, 27 Sep 2021 11:30:34 -0700 Subject: [PATCH 02/17] use HOST_IPP for api url --- pipeline-runner/init.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipeline-runner/init.R b/pipeline-runner/init.R index 17cac928..2b03b8d6 100644 --- a/pipeline-runner/init.R +++ b/pipeline-runner/init.R @@ -76,7 +76,7 @@ load_config <- function(development_aws_server) { } if(config$cluster_env == 'development') { - config$api_url <- "http://host.docker.internal:3000" + config$api_url <- sprintf("http://%s:3000", development_aws_server) config$aws_config[['endpoint']] <- sprintf("http://%s:4566", development_aws_server) # DOCKER_GATEWAY_HOST config$aws_config[['credentials']] <- list( creds = list( From 57ff7bd927753652d5eb9c146051415f1ab69fe8 Mon Sep 17 00:00:00 2001 From: Alex Pickering Date: Mon, 27 Sep 2021 12:57:29 -0700 Subject: [PATCH 03/17] move check_input to R and add tests --- pipeline-runner/R/gem2s-helpers.R | 12 ++++++++ pipeline-runner/init.R | 11 ------- .../tests/testthat/test-gem2s-helpers.R | 30 +++++++++++++++++++ 3 files changed, 42 insertions(+), 11 deletions(-) create mode 100644 pipeline-runner/tests/testthat/test-gem2s-helpers.R diff --git a/pipeline-runner/R/gem2s-helpers.R b/pipeline-runner/R/gem2s-helpers.R index 7bedcfb2..71987796 100644 --- a/pipeline-runner/R/gem2s-helpers.R +++ b/pipeline-runner/R/gem2s-helpers.R @@ -6,6 +6,18 @@ check_prev_out <- function(prev_out, check_names) { } } +check_input <- function(input) { + + # check that metadata items length is same as number of samples + metadata <- input$metadata + + if (length(metadata)) { + nsamples <- length(input$sampleNames) + nmeta <- sapply(metadata, length) + if (!all(nmeta == nsamples)) stop ('Sample number differs from metadata length.') + } +} + get_color_pool <- function() { c( diff --git a/pipeline-runner/init.R b/pipeline-runner/init.R index 3595c0ec..6e678752 100644 --- a/pipeline-runner/init.R +++ b/pipeline-runner/init.R @@ -184,17 +184,6 @@ run_gem2s_step <- function(task_name, input, pipeline_config, prev_out) { return(res) } -check_input <- function(input) { - - # check that metadata items length is same as number of samples - metadata <- input$metadata - - if (length(metadata)) { - nsamples <- length(input$sampleNames) - nmeta <- sapply(metadata, length) - if (!all(nmeta == nsamples)) stop ('Sample number and metadata length') - } -} call_gem2s <- function(task_name, input, pipeline_config) { experiment_id <- input$experimentId diff --git a/pipeline-runner/tests/testthat/test-gem2s-helpers.R b/pipeline-runner/tests/testthat/test-gem2s-helpers.R new file mode 100644 index 00000000..9c0f52b9 --- /dev/null +++ b/pipeline-runner/tests/testthat/test-gem2s-helpers.R @@ -0,0 +1,30 @@ +test_that("check_input fails if sample number differs from metadata length", { + input <- list( + sampleNames = list('WT1', 'WT2'), + metadata = list('Group' = list('WT')) + ) + + expect_error(check_input(input)) +}) + + +test_that("check_input passes if sample number is the same as metadata length", { + input <- list( + sampleNames = list('WT1', 'WT2'), + metadata = list('Group' = list('WT', 'WT')) + ) + + expect_silent(check_input(input)) +}) + + +test_that("check_input passes with multiple metadata columns", { + input <- list( + sampleNames = list('WT1', 'WT2'), + metadata = list('Group' = list('WT', 'WT'), + 'Borg' = list('YES', 'NO')) + ) + + expect_silent(check_input(input)) +}) + From 052811b0776b300888aedb96236cbe6555cc5756 Mon Sep 17 00:00:00 2001 From: Alex Pickering Date: Mon, 27 Sep 2021 13:16:01 -0700 Subject: [PATCH 04/17] add test for no metadata --- pipeline-runner/tests/testthat/test-gem2s-helpers.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pipeline-runner/tests/testthat/test-gem2s-helpers.R b/pipeline-runner/tests/testthat/test-gem2s-helpers.R index 9c0f52b9..58c07868 100644 --- a/pipeline-runner/tests/testthat/test-gem2s-helpers.R +++ b/pipeline-runner/tests/testthat/test-gem2s-helpers.R @@ -28,3 +28,13 @@ test_that("check_input passes with multiple metadata columns", { expect_silent(check_input(input)) }) + +test_that("check_input passes without metadata", { + input <- list( + sampleNames = list('WT1', 'WT2')) + + expect_silent(check_input(input)) +}) + + + From 06b9419808ec10f629b30da149435c9dcb8695da Mon Sep 17 00:00:00 2001 From: Alex Pickering Date: Mon, 27 Sep 2021 14:19:34 -0700 Subject: [PATCH 05/17] test get_cell_sets and fix failures --- pipeline-runner/R/gem2s-7-upload_to_aws.R | 15 +- .../testthat/_snaps/gem2s-7-upload_to_aws.md | 21 +++ .../testthat/test-gem2s-7-upload_to_aws.R | 170 ++++++++++++++++++ 3 files changed, 201 insertions(+), 5 deletions(-) create mode 100644 pipeline-runner/tests/testthat/_snaps/gem2s-7-upload_to_aws.md create mode 100644 pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R diff --git a/pipeline-runner/R/gem2s-7-upload_to_aws.R b/pipeline-runner/R/gem2s-7-upload_to_aws.R index 6343bca4..37073461 100644 --- a/pipeline-runner/R/gem2s-7-upload_to_aws.R +++ b/pipeline-runner/R/gem2s-7-upload_to_aws.R @@ -128,8 +128,9 @@ meta_sets <- function(input, scdata, color_pool) { keys <- names(meta) # corresponding names stored in seurat object meta.data - seurat_keys <- scdata@misc$metadata_lookups[[keys]] + seurat_keys <- scdata@misc$metadata_lookups[keys] + color_index <- 1 for (i in seq_along(keys)) { key <- keys[i] seurat_key <- seurat_keys[i] @@ -144,16 +145,20 @@ meta_sets <- function(input, scdata, color_pool) { # values of current metadata track values <- unique(meta[[i]]) - for (i in seq_along(values)) { - value <- values[i] + + for (j in seq_along(values)) { + value <- values[j] cell_ids <- scdata$cells_id[scdata[[seurat_key]] == value] - cell_set$children[[i]] <- list( + cell_set$children[[j]] <- list( "key" = paste(key, value, sep = "-"), "name" = value, - "color" = color_pool[i], + "color" = color_pool[color_index], "cellIds" = unname(cell_ids) ) + + color_index <- color_index + 1 + } cell_set_list <- c(cell_set_list, list(cell_set)) } diff --git a/pipeline-runner/tests/testthat/_snaps/gem2s-7-upload_to_aws.md b/pipeline-runner/tests/testthat/_snaps/gem2s-7-upload_to_aws.md new file mode 100644 index 00000000..59fd7b12 --- /dev/null +++ b/pipeline-runner/tests/testthat/_snaps/gem2s-7-upload_to_aws.md @@ -0,0 +1,21 @@ +# get_cell_sets without metadata matches initial snapshot + + Code + cell_sets + Output + $key + [1] "scratchpad" + + $name + [1] "Custom cell sets" + + $rootNode + [1] TRUE + + $children + list() + + $type + [1] "cellSets" + + diff --git a/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R b/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R new file mode 100644 index 00000000..1a3886d4 --- /dev/null +++ b/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R @@ -0,0 +1,170 @@ +mock_config <- function() { + config <- list( + sampleNames = list('WT1', 'WT2'), + sampleIds = list('123abc', '123def') + ) + + return(config) +} + + +mock_scdata <- function() { + pbmc_raw <- read.table( + file = system.file("extdata", "pbmc_raw.txt", package = "Seurat"), + as.is = TRUE) + + scdata <- Seurat::CreateSeuratObject(counts = pbmc_raw) + + scdata$cells_id <- seq(0, ncol(scdata)-1) + + # add samples + scdata$samples <- rep(c('123abc', '123def'), each = 40) + return(scdata) +} + + + + +test_that("get_cell_sets creates scratchpad and sample sets if no metadata", { + scdata <- mock_scdata() + config <- mock_config() + + cell_sets <- get_cell_sets(scdata, config) + keys <- sapply(cell_sets$cellSets, `[[`, 'key') + + expect_setequal(keys, c('scratchpad', 'sample')) +}) + + +test_that("get_cell_sets adds correct cell ids for each sample", { + scdata <- mock_scdata() + config <- mock_config() + + cell_sets <- get_cell_sets(scdata, config) + sets_key <- sapply(cell_sets$cellSets, `[[`, 'key') + + sample_sets <- cell_sets$cellSets[[which(sets_key == 'sample')]] + samples_key <- sapply(sample_sets$children, `[[`, 'key') + + for (sample_id in config$sampleIds) { + sample_cells <- sample_sets$children[[which(samples_key == sample_id)]]$cellIds + expected_cells <- unname(scdata$cells_id)[scdata$samples == sample_id] + + expect_equal(sample_cells, expected_cells) + } +}) + + +test_that("get_cell_sets adds correct cell ids for each sample", { + scdata <- mock_scdata() + config <- mock_config() + + cell_sets <- get_cell_sets(scdata, config) + sets_key <- sapply(cell_sets$cellSets, `[[`, 'key') + + sample_sets <- cell_sets$cellSets[[which(sets_key == 'sample')]] + samples_key <- sapply(sample_sets$children, `[[`, 'key') + + # ids are correct for each child + for (sample_id in config$sampleIds) { + sample_cells <- sample_sets$children[[which(samples_key == sample_id)]]$cellIds + expected_cells <- unname(scdata$cells_id)[scdata$samples == sample_id] + + expect_equal(sample_cells, expected_cells) + } +}) + + +test_that("get_cell_sets without metadata matches initial snapshot", { + scdata <- mock_scdata() + config <- mock_config() + + cell_sets <- get_cell_sets(scdata, config)$cellSets[[1]] + expect_equal(cell_sets$type, 'cellSets') + expect_true(cell_sets$rootNode) + + expect_snapshot(cell_sets) +}) + + +test_that("get_cell_sets adds a single metadata column", { + scdata <- mock_scdata() + config <- mock_config() + + # mockup metadata and lookups + config$metadata <- list(Group = list('Hello', 'WT2')) + + scdata$Group <- NA + scdata$Group[scdata$samples == '123abc'] <- 'Hello' + scdata$Group[scdata$samples == '123def'] <- 'WT2' + + scdata@misc <- list(metadata_lookups = c(Group = 'Group')) + + cell_sets <- get_cell_sets(scdata, config) + + # have it as a key + keys <- sapply(cell_sets$cellSets, `[[`, 'key') + expect_setequal(keys, c('scratchpad', 'sample', 'Group')) + + group_set <- cell_sets$cellSets[[which(keys == 'Group')]] + group_names <- sapply(group_set$children, `[[`, 'name') + + + # cell ids are correct for each child + for (group_name in group_names) { + group_cells <- group_set$children[[which(group_names == group_name)]]$cellIds + expected_cells <- unname(scdata$cells_id)[scdata$Group == group_name] + + expect_equal(group_cells, expected_cells) + } +}) + +test_that("get_cell_sets adds two metadata columns", { + scdata <- mock_scdata() + config <- mock_config() + + # mockup metadata and lookups + config$metadata <- list(Group1 = list('Hello', 'WT2'), + Group2 = list('WT', 'WT')) + + scdata$Group1 <- NA + scdata$Group1[scdata$samples == '123abc'] <- 'Hello' + scdata$Group1[scdata$samples == '123def'] <- 'WT2' + scdata$Group2 <- 'WT' + + scdata@misc <- list(metadata_lookups = c(Group1 = 'Group1', Group2 = 'Group2')) + cell_sets <- get_cell_sets(scdata, config) + + # have as keys + keys <- sapply(cell_sets$cellSets, `[[`, 'key') + expect_setequal(keys, c('scratchpad', 'sample', 'Group1', 'Group2')) + + # check that Group2 has all cells + group2_set <- cell_sets$cellSets[[which(keys == 'Group2')]] + group2_cells <- group2_set$children[[1]]$cellIds + expect_equal(group2_cells, unname(scdata$cells_id)) +}) + + +test_that("get_cell_sets uses unique colors for each cell set", { + scdata <- mock_scdata() + config <- mock_config() + + # mockup metadata and lookups + config$metadata <- list(Group1 = list('Hello', 'WT2'), + Group2 = list('WT', 'WT')) + + scdata$Group1 <- NA + scdata$Group1[scdata$samples == '123abc'] <- 'Hello' + scdata$Group1[scdata$samples == '123def'] <- 'WT2' + scdata$Group2 <- 'WT' + + scdata@misc <- list(metadata_lookups = c(Group1 = 'Group1', Group2 = 'Group2')) + cell_sets <- get_cell_sets(scdata, config) + + flat_cell_sets <- unlist(cell_sets) + colors <- flat_cell_sets[grepl('[.]color', names(flat_cell_sets))] + colors <- unname(colors) + + expect_equal(unique(colors), colors) +}) From 46f97dc086eadc259a70587bf9447bb631b94a3b Mon Sep 17 00:00:00 2001 From: Alex Pickering Date: Mon, 27 Sep 2021 14:44:36 -0700 Subject: [PATCH 06/17] abstract out setup of metadata --- .../testthat/_snaps/gem2s-7-upload_to_aws.md | 100 ++++++++++++++--- .../testthat/test-gem2s-7-upload_to_aws.R | 101 +++++++++--------- 2 files changed, 135 insertions(+), 66 deletions(-) diff --git a/pipeline-runner/tests/testthat/_snaps/gem2s-7-upload_to_aws.md b/pipeline-runner/tests/testthat/_snaps/gem2s-7-upload_to_aws.md index 59fd7b12..11a6afea 100644 --- a/pipeline-runner/tests/testthat/_snaps/gem2s-7-upload_to_aws.md +++ b/pipeline-runner/tests/testthat/_snaps/gem2s-7-upload_to_aws.md @@ -1,21 +1,87 @@ -# get_cell_sets without metadata matches initial snapshot +# get_cell_sets without metadata matches snapshot Code - cell_sets + str(cell_sets) Output - $key - [1] "scratchpad" - - $name - [1] "Custom cell sets" - - $rootNode - [1] TRUE - - $children - list() - - $type - [1] "cellSets" - + List of 1 + $ cellSets:List of 2 + ..$ :List of 5 + .. ..$ key : chr "scratchpad" + .. ..$ name : chr "Custom cell sets" + .. ..$ rootNode: logi TRUE + .. ..$ children: list() + .. ..$ type : chr "cellSets" + ..$ :List of 5 + .. ..$ key : chr "sample" + .. ..$ name : chr "Samples" + .. ..$ rootNode: logi TRUE + .. ..$ children:List of 2 + .. .. ..$ :List of 4 + .. .. .. ..$ key : chr "123abc" + .. .. .. ..$ name : chr "WT1" + .. .. .. ..$ color : chr "#e377c2" + .. .. .. ..$ cellIds: int [1:40] 0 1 2 3 4 5 6 7 8 9 ... + .. .. ..$ :List of 4 + .. .. .. ..$ key : chr "123def" + .. .. .. ..$ name : chr "WT2" + .. .. .. ..$ color : chr "#8c564b" + .. .. .. ..$ cellIds: int [1:40] 40 41 42 43 44 45 46 47 48 49 ... + .. ..$ type : chr "metadataCategorical" + +# get_cell_sets with two metadata groups matches snapshot + + Code + str(cell_sets) + Output + List of 1 + $ cellSets:List of 4 + ..$ :List of 5 + .. ..$ key : chr "scratchpad" + .. ..$ name : chr "Custom cell sets" + .. ..$ rootNode: logi TRUE + .. ..$ children: list() + .. ..$ type : chr "cellSets" + ..$ :List of 5 + .. ..$ key : chr "sample" + .. ..$ name : chr "Samples" + .. ..$ rootNode: logi TRUE + .. ..$ children:List of 2 + .. .. ..$ :List of 4 + .. .. .. ..$ key : chr "123abc" + .. .. .. ..$ name : chr "WT1" + .. .. .. ..$ color : chr "#e377c2" + .. .. .. ..$ cellIds: int [1:40] 0 1 2 3 4 5 6 7 8 9 ... + .. .. ..$ :List of 4 + .. .. .. ..$ key : chr "123def" + .. .. .. ..$ name : chr "WT2" + .. .. .. ..$ color : chr "#8c564b" + .. .. .. ..$ cellIds: int [1:40] 40 41 42 43 44 45 46 47 48 49 ... + .. ..$ type : chr "metadataCategorical" + ..$ :List of 5 + .. ..$ key : chr "Group1" + .. ..$ name : chr "Group1" + .. ..$ rootNode: logi TRUE + .. ..$ children:List of 2 + .. .. ..$ :List of 4 + .. .. .. ..$ key : chr "Group1-Hello" + .. .. .. ..$ name : chr "Hello" + .. .. .. ..$ color : chr "#d62728" + .. .. .. ..$ cellIds: int [1:40] 0 1 2 3 4 5 6 7 8 9 ... + .. .. ..$ :List of 4 + .. .. .. ..$ key : chr "Group1-WT2" + .. .. .. ..$ name : chr "WT2" + .. .. .. ..$ color : chr "#2ca02c" + .. .. .. ..$ cellIds: int [1:40] 40 41 42 43 44 45 46 47 48 49 ... + .. ..$ type : chr "metadataCategorical" + ..$ :List of 5 + .. ..$ key : chr "Group2" + .. ..$ name : chr "Group2" + .. ..$ rootNode: logi TRUE + .. ..$ children:List of 1 + .. .. ..$ :List of 4 + .. .. .. ..$ key : chr "Group2-WT" + .. .. .. ..$ name : chr "WT" + .. .. .. ..$ color : chr "#ff7f0e" + .. .. .. ..$ cellIds: int [1:80] 0 1 2 3 4 5 6 7 8 9 ... + .. ..$ type : chr "metadataCategorical" diff --git a/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R b/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R index 1a3886d4..e6153ae8 100644 --- a/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R +++ b/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R @@ -1,14 +1,15 @@ -mock_config <- function() { +mock_config <- function(metadata = NULL) { config <- list( sampleNames = list('WT1', 'WT2'), - sampleIds = list('123abc', '123def') + sampleIds = list('123abc', '123def'), + metadata = metadata ) return(config) } -mock_scdata <- function() { +mock_scdata <- function(metadata = NULL) { pbmc_raw <- read.table( file = system.file("extdata", "pbmc_raw.txt", package = "Seurat"), as.is = TRUE) @@ -18,7 +19,25 @@ mock_scdata <- function() { scdata$cells_id <- seq(0, ncol(scdata)-1) # add samples + samples <- c('123abc', '123def') scdata$samples <- rep(c('123abc', '123def'), each = 40) + + keys <- names(metadata) + for (i in seq_along(keys)) { + + key <- keys[i] + vals <- unlist(metadata[[i]]) + + scdata@meta.data[[key]] <- NA + scdata@meta.data[[key]][scdata$samples == samples[1]] <- vals[1] + scdata@meta.data[[key]][scdata$samples == samples[2]] <- vals[2] + } + + lookups <- make.names(keys) + names(lookups) <- keys + + scdata@misc$metadata_lookups <- lookups + return(scdata) } @@ -75,30 +94,10 @@ test_that("get_cell_sets adds correct cell ids for each sample", { }) -test_that("get_cell_sets without metadata matches initial snapshot", { - scdata <- mock_scdata() - config <- mock_config() - - cell_sets <- get_cell_sets(scdata, config)$cellSets[[1]] - expect_equal(cell_sets$type, 'cellSets') - expect_true(cell_sets$rootNode) - - expect_snapshot(cell_sets) -}) - - test_that("get_cell_sets adds a single metadata column", { - scdata <- mock_scdata() - config <- mock_config() - - # mockup metadata and lookups - config$metadata <- list(Group = list('Hello', 'WT2')) - - scdata$Group <- NA - scdata$Group[scdata$samples == '123abc'] <- 'Hello' - scdata$Group[scdata$samples == '123def'] <- 'WT2' - - scdata@misc <- list(metadata_lookups = c(Group = 'Group')) + metadata <- list(Group = list('Hello', 'WT2')) + scdata <- mock_scdata(metadata) + config <- mock_config(metadata) cell_sets <- get_cell_sets(scdata, config) @@ -120,19 +119,11 @@ test_that("get_cell_sets adds a single metadata column", { }) test_that("get_cell_sets adds two metadata columns", { - scdata <- mock_scdata() - config <- mock_config() - # mockup metadata and lookups - config$metadata <- list(Group1 = list('Hello', 'WT2'), - Group2 = list('WT', 'WT')) + metadata <- list(Group1 = list('Hello', 'WT2'), Group2 = list('WT', 'WT')) + scdata <- mock_scdata(metadata) + config <- mock_config(metadata) - scdata$Group1 <- NA - scdata$Group1[scdata$samples == '123abc'] <- 'Hello' - scdata$Group1[scdata$samples == '123def'] <- 'WT2' - scdata$Group2 <- 'WT' - - scdata@misc <- list(metadata_lookups = c(Group1 = 'Group1', Group2 = 'Group2')) cell_sets <- get_cell_sets(scdata, config) # have as keys @@ -147,19 +138,10 @@ test_that("get_cell_sets adds two metadata columns", { test_that("get_cell_sets uses unique colors for each cell set", { - scdata <- mock_scdata() - config <- mock_config() - - # mockup metadata and lookups - config$metadata <- list(Group1 = list('Hello', 'WT2'), - Group2 = list('WT', 'WT')) - - scdata$Group1 <- NA - scdata$Group1[scdata$samples == '123abc'] <- 'Hello' - scdata$Group1[scdata$samples == '123def'] <- 'WT2' - scdata$Group2 <- 'WT' + metadata <- list(Group1 = list('Hello', 'WT2'), Group2 = list('WT', 'WT')) + scdata <- mock_scdata(metadata) + config <- mock_config(metadata) - scdata@misc <- list(metadata_lookups = c(Group1 = 'Group1', Group2 = 'Group2')) cell_sets <- get_cell_sets(scdata, config) flat_cell_sets <- unlist(cell_sets) @@ -168,3 +150,24 @@ test_that("get_cell_sets uses unique colors for each cell set", { expect_equal(unique(colors), colors) }) + + +test_that("get_cell_sets without metadata matches snapshot", { + scdata <- mock_scdata() + config <- mock_config() + + cell_sets <- get_cell_sets(scdata, config) + expect_snapshot(str(cell_sets)) +}) + +test_that("get_cell_sets with two metadata groups matches snapshot", { + metadata <- list(Group1 = list('Hello', 'WT2'), Group2 = list('WT', 'WT')) + + scdata <- mock_scdata(metadata) + config <- mock_config(metadata) + + scdata@misc <- list(metadata_lookups = c(Group1 = 'Group1', Group2 = 'Group2')) + cell_sets <- get_cell_sets(scdata, config) + + expect_snapshot(str(cell_sets)) +}) From 46e6d3951a19846839646e15ed3481f24964925b Mon Sep 17 00:00:00 2001 From: Alex Pickering Date: Mon, 27 Sep 2021 14:49:18 -0700 Subject: [PATCH 07/17] same whitespace --- pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R b/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R index e6153ae8..1d074af4 100644 --- a/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R +++ b/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R @@ -42,8 +42,6 @@ mock_scdata <- function(metadata = NULL) { } - - test_that("get_cell_sets creates scratchpad and sample sets if no metadata", { scdata <- mock_scdata() config <- mock_config() @@ -118,6 +116,7 @@ test_that("get_cell_sets adds a single metadata column", { } }) + test_that("get_cell_sets adds two metadata columns", { metadata <- list(Group1 = list('Hello', 'WT2'), Group2 = list('WT', 'WT')) @@ -160,6 +159,7 @@ test_that("get_cell_sets without metadata matches snapshot", { expect_snapshot(str(cell_sets)) }) + test_that("get_cell_sets with two metadata groups matches snapshot", { metadata <- list(Group1 = list('Hello', 'WT2'), Group2 = list('WT', 'WT')) From cfbe715561c99257c91263f4c24fb47108f276b7 Mon Sep 17 00:00:00 2001 From: Alex Pickering Date: Tue, 28 Sep 2021 13:05:47 -0700 Subject: [PATCH 08/17] remove for loop --- .../testthat/test-gem2s-7-upload_to_aws.R | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R b/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R index 1d074af4..d5e209e1 100644 --- a/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R +++ b/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R @@ -9,7 +9,7 @@ mock_config <- function(metadata = NULL) { } -mock_scdata <- function(metadata = NULL) { +mock_scdata <- function(config = NULL) { pbmc_raw <- read.table( file = system.file("extdata", "pbmc_raw.txt", package = "Seurat"), as.is = TRUE) @@ -22,15 +22,16 @@ mock_scdata <- function(metadata = NULL) { samples <- c('123abc', '123def') scdata$samples <- rep(c('123abc', '123def'), each = 40) - keys <- names(metadata) - for (i in seq_along(keys)) { + # add metadata + rest <- config$metadata + keys <- c('samples', names(rest)) + metadata <- data.frame(row.names = colnames(scdata), samples = scdata$samples) - key <- keys[i] - vals <- unlist(metadata[[i]]) - - scdata@meta.data[[key]] <- NA - scdata@meta.data[[key]][scdata$samples == samples[1]] <- vals[1] - scdata@meta.data[[key]][scdata$samples == samples[2]] <- vals[2] + # Add "metadata" if exists in config + if (!is.null(rest)) { + rest <- lapply(rest, unlist) + rest <- data.frame(rest, row.names = samples, check.names = FALSE) + scdata@meta.data[names(rest)] <- rest[scdata$samples, ] } lookups <- make.names(keys) @@ -54,8 +55,8 @@ test_that("get_cell_sets creates scratchpad and sample sets if no metadata", { test_that("get_cell_sets adds correct cell ids for each sample", { - scdata <- mock_scdata() config <- mock_config() + scdata <- mock_scdata() cell_sets <- get_cell_sets(scdata, config) sets_key <- sapply(cell_sets$cellSets, `[[`, 'key') @@ -73,8 +74,8 @@ test_that("get_cell_sets adds correct cell ids for each sample", { test_that("get_cell_sets adds correct cell ids for each sample", { - scdata <- mock_scdata() config <- mock_config() + scdata <- mock_scdata() cell_sets <- get_cell_sets(scdata, config) sets_key <- sapply(cell_sets$cellSets, `[[`, 'key') @@ -94,8 +95,8 @@ test_that("get_cell_sets adds correct cell ids for each sample", { test_that("get_cell_sets adds a single metadata column", { metadata <- list(Group = list('Hello', 'WT2')) - scdata <- mock_scdata(metadata) config <- mock_config(metadata) + scdata <- mock_scdata(config) cell_sets <- get_cell_sets(scdata, config) @@ -120,8 +121,8 @@ test_that("get_cell_sets adds a single metadata column", { test_that("get_cell_sets adds two metadata columns", { metadata <- list(Group1 = list('Hello', 'WT2'), Group2 = list('WT', 'WT')) - scdata <- mock_scdata(metadata) config <- mock_config(metadata) + scdata <- mock_scdata(config) cell_sets <- get_cell_sets(scdata, config) @@ -138,8 +139,8 @@ test_that("get_cell_sets adds two metadata columns", { test_that("get_cell_sets uses unique colors for each cell set", { metadata <- list(Group1 = list('Hello', 'WT2'), Group2 = list('WT', 'WT')) - scdata <- mock_scdata(metadata) config <- mock_config(metadata) + scdata <- mock_scdata(config) cell_sets <- get_cell_sets(scdata, config) @@ -163,8 +164,8 @@ test_that("get_cell_sets without metadata matches snapshot", { test_that("get_cell_sets with two metadata groups matches snapshot", { metadata <- list(Group1 = list('Hello', 'WT2'), Group2 = list('WT', 'WT')) - scdata <- mock_scdata(metadata) config <- mock_config(metadata) + scdata <- mock_scdata(config) scdata@misc <- list(metadata_lookups = c(Group1 = 'Group1', Group2 = 'Group2')) cell_sets <- get_cell_sets(scdata, config) From cf82578e6a62a6e3217e5d30e823a46161849958 Mon Sep 17 00:00:00 2001 From: Alex Pickering Date: Tue, 28 Sep 2021 13:06:39 -0700 Subject: [PATCH 09/17] Update pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R Co-authored-by: ogibson-biomage <76957896+ogibson-biomage@users.noreply.github.com> --- pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R b/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R index d5e209e1..598298ce 100644 --- a/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R +++ b/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R @@ -20,7 +20,7 @@ mock_scdata <- function(config = NULL) { # add samples samples <- c('123abc', '123def') - scdata$samples <- rep(c('123abc', '123def'), each = 40) + scdata$samples <- rep(samples, each = 40) # add metadata rest <- config$metadata From a73eaffbfd0e943e17d92ddd9376686cdf3c8e75 Mon Sep 17 00:00:00 2001 From: Alex Pickering Date: Tue, 28 Sep 2021 13:13:06 -0700 Subject: [PATCH 10/17] re-use samples from config --- .../tests/testthat/test-gem2s-7-upload_to_aws.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R b/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R index 598298ce..20c27709 100644 --- a/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R +++ b/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R @@ -9,7 +9,7 @@ mock_config <- function(metadata = NULL) { } -mock_scdata <- function(config = NULL) { +mock_scdata <- function(config) { pbmc_raw <- read.table( file = system.file("extdata", "pbmc_raw.txt", package = "Seurat"), as.is = TRUE) @@ -19,7 +19,7 @@ mock_scdata <- function(config = NULL) { scdata$cells_id <- seq(0, ncol(scdata)-1) # add samples - samples <- c('123abc', '123def') + samples <- unlist(config$sampleIds) scdata$samples <- rep(samples, each = 40) # add metadata @@ -44,8 +44,8 @@ mock_scdata <- function(config = NULL) { test_that("get_cell_sets creates scratchpad and sample sets if no metadata", { - scdata <- mock_scdata() config <- mock_config() + scdata <- mock_scdata(config) cell_sets <- get_cell_sets(scdata, config) keys <- sapply(cell_sets$cellSets, `[[`, 'key') @@ -56,7 +56,7 @@ test_that("get_cell_sets creates scratchpad and sample sets if no metadata", { test_that("get_cell_sets adds correct cell ids for each sample", { config <- mock_config() - scdata <- mock_scdata() + scdata <- mock_scdata(config) cell_sets <- get_cell_sets(scdata, config) sets_key <- sapply(cell_sets$cellSets, `[[`, 'key') @@ -75,7 +75,7 @@ test_that("get_cell_sets adds correct cell ids for each sample", { test_that("get_cell_sets adds correct cell ids for each sample", { config <- mock_config() - scdata <- mock_scdata() + scdata <- mock_scdata(config) cell_sets <- get_cell_sets(scdata, config) sets_key <- sapply(cell_sets$cellSets, `[[`, 'key') @@ -153,8 +153,8 @@ test_that("get_cell_sets uses unique colors for each cell set", { test_that("get_cell_sets without metadata matches snapshot", { - scdata <- mock_scdata() config <- mock_config() + scdata <- mock_scdata(config) cell_sets <- get_cell_sets(scdata, config) expect_snapshot(str(cell_sets)) From c3ab64a35d6ad1e4c3995636b943dcc24a604f37 Mon Sep 17 00:00:00 2001 From: Alex Pickering Date: Tue, 28 Sep 2021 15:39:40 -0700 Subject: [PATCH 11/17] don't use @misc slot --- pipeline-runner/R/gem2s-5-create_seurat.R | 19 ++++++++----------- pipeline-runner/R/gem2s-7-upload_to_aws.R | 6 ++++-- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/pipeline-runner/R/gem2s-5-create_seurat.R b/pipeline-runner/R/gem2s-5-create_seurat.R index bfda33da..468dd8cf 100644 --- a/pipeline-runner/R/gem2s-5-create_seurat.R +++ b/pipeline-runner/R/gem2s-5-create_seurat.R @@ -9,6 +9,8 @@ #' create_seurat <- function(input, pipeline_config, prev_out) { + save(input, pipeline_config, prev_out, file = '/debug/create_seurat.rda') + message("Creating Seurat Objects...") # NOTE: edrops can be empty list @@ -46,8 +48,6 @@ create_seurat <- function(input, pipeline_config, prev_out) { construct_scdata <- function(counts, doublet_score, edrops_out, sample, annot, config, min.cells = 3, min.features = 10) { metadata <- construct_metadata(counts, sample, config) - lookups <- get_metadata_lookups(metadata) - colnames(metadata) <- lookups scdata <- Seurat::CreateSeuratObject( counts, @@ -56,8 +56,6 @@ construct_scdata <- function(counts, doublet_score, edrops_out, sample, annot, c min.cells = min.cells, min.features = min.features) - scdata@misc$metadata_lookups <- lookups - scdata <- scdata %>% add_mito(annot) %>% add_dblscore(doublet_score) %>% @@ -66,19 +64,15 @@ construct_scdata <- function(counts, doublet_score, edrops_out, sample, annot, c return(scdata) } -# to find invalid metadata column names in SeuratObject (e.g. 'TRUE') -get_metadata_lookups <- function(metadata) { - user_values <- names(metadata) - lookups <- make.names(colnames(metadata), unique = TRUE) - names(lookups) <- user_values - return(lookups) -} +# NOTE: any changes here must be reflected in meta_sets + # construct metadata for each SeuratObject construct_metadata <- function(counts, sample, config) { message("Constructing metadata df...") metadata <- data.frame(row.names = colnames(counts), samples = rep(sample, ncol(counts))) + # Add "metadata" if exists in config rest <- config$metadata if (!is.null(rest)) { @@ -87,6 +81,9 @@ construct_metadata <- function(counts, sample, config) { metadata[names(rest)] <- rest[sample, ] } + # make syntactically valid column names + colnames(metadata) <- make.names(colnames(metadata), unique = TRUE) + return(metadata) } diff --git a/pipeline-runner/R/gem2s-7-upload_to_aws.R b/pipeline-runner/R/gem2s-7-upload_to_aws.R index 37073461..970df60d 100644 --- a/pipeline-runner/R/gem2s-7-upload_to_aws.R +++ b/pipeline-runner/R/gem2s-7-upload_to_aws.R @@ -1,5 +1,7 @@ upload_to_aws <- function(input, pipeline_config, prev_out) { + + save(input, pipeline_config, prev_out, file='/debug/upload_to_aws.rda') message('Uploading to AWS ...') check_names <- c('config', 'counts_list', 'annot', 'doublet_scores', 'scdata_list', 'scdata', 'qc_config') check_prev_out(prev_out, check_names) @@ -127,8 +129,8 @@ meta_sets <- function(input, scdata, color_pool) { # names of metadata tracks keys <- names(meta) - # corresponding names stored in seurat object meta.data - seurat_keys <- scdata@misc$metadata_lookups[keys] + # same values used in construct_metadata + seurat_keys <- make.names(c('samples', keys), unique = TRUE) color_index <- 1 for (i in seq_along(keys)) { From 6ff23baac6886f1a4da144dc13d88e3a109348a5 Mon Sep 17 00:00:00 2001 From: Alex Pickering Date: Tue, 28 Sep 2021 15:39:50 -0700 Subject: [PATCH 12/17] fix tests --- .../testthat/test-gem2s-5-create_seurat.R | 47 +++---------------- 1 file changed, 7 insertions(+), 40 deletions(-) diff --git a/pipeline-runner/tests/testthat/test-gem2s-5-create_seurat.R b/pipeline-runner/tests/testthat/test-gem2s-5-create_seurat.R index d9982e08..882c00a8 100644 --- a/pipeline-runner/tests/testthat/test-gem2s-5-create_seurat.R +++ b/pipeline-runner/tests/testthat/test-gem2s-5-create_seurat.R @@ -62,52 +62,19 @@ test_that("construct_metadata works with syntactically invalid column names", { ) metadata <- construct_metadata(counts, sample, config) + valid_names <- make.names(colnames(metadata), unique = TRUE) - # doesn't change column names of metadata - expect_equal(colnames(metadata), c('samples', 'TRUE', '1first', 'with space', 'with-dash')) + # changes column names of metadata + expect_equal(colnames(metadata), valid_names) # stores correct values in metadata - expect_true(all(metadata[['TRUE']] == 'a')) - expect_true(all(metadata[['1first']] == 'b')) - expect_true(all(metadata[['with space']] == 'c')) - expect_true(all(metadata[['with-dash']] == 'd')) + expect_true(all(metadata$TRUE. == 'a')) + expect_true(all(metadata$X1first == 'b')) + expect_true(all(metadata$with.space == 'c')) + expect_true(all(metadata$with.dash == 'd')) }) -test_that("result of get_metadata_lookups can be used to access syntactically invalid slots", { - - sample <- 'hello' - counts <- mock_counts() - - config <- list( - samples = 'hello', - metadata = list('TRUE' = list('a'), - '1first' = list('b'), - 'with space' = list('c'), - 'with-dash' = list('d')) - ) - - metadata <- construct_metadata(counts, sample, config) - lookups <- get_metadata_lookups(metadata) - - # seurat stores as syntactically valid names - scdata <- SeuratObject::CreateSeuratObject(counts, meta.data = metadata) - expect_true(all(lookups %in% colnames(scdata@meta.data))) - - # use lookups to access - keys <- c('TRUE', '1first', 'with space', 'with-dash') - values <- c('a', 'b', 'c', 'd') - - for (i in seq_along(keys)) { - key <- keys[i] - val <- values[i] - seurat_key <- lookups[[key]] - - expect_true(seurat_key %in% colnames(scdata@meta.data)) - expect_true(all(scdata[[seurat_key]] == val)) - } -}) - test_that("create_seurat works without emptyDrops result", { From 27a4cc9b23dde846043d830238466e89e20ca555 Mon Sep 17 00:00:00 2001 From: Alex Pickering Date: Tue, 28 Sep 2021 15:40:44 -0700 Subject: [PATCH 13/17] remove saves --- pipeline-runner/R/gem2s-5-create_seurat.R | 3 --- pipeline-runner/R/gem2s-7-upload_to_aws.R | 2 -- 2 files changed, 5 deletions(-) diff --git a/pipeline-runner/R/gem2s-5-create_seurat.R b/pipeline-runner/R/gem2s-5-create_seurat.R index 468dd8cf..ba504925 100644 --- a/pipeline-runner/R/gem2s-5-create_seurat.R +++ b/pipeline-runner/R/gem2s-5-create_seurat.R @@ -8,9 +8,6 @@ #' @export #' create_seurat <- function(input, pipeline_config, prev_out) { - - save(input, pipeline_config, prev_out, file = '/debug/create_seurat.rda') - message("Creating Seurat Objects...") # NOTE: edrops can be empty list diff --git a/pipeline-runner/R/gem2s-7-upload_to_aws.R b/pipeline-runner/R/gem2s-7-upload_to_aws.R index 970df60d..620ef8a0 100644 --- a/pipeline-runner/R/gem2s-7-upload_to_aws.R +++ b/pipeline-runner/R/gem2s-7-upload_to_aws.R @@ -1,7 +1,5 @@ upload_to_aws <- function(input, pipeline_config, prev_out) { - - save(input, pipeline_config, prev_out, file='/debug/upload_to_aws.rda') message('Uploading to AWS ...') check_names <- c('config', 'counts_list', 'annot', 'doublet_scores', 'scdata_list', 'scdata', 'qc_config') check_prev_out(prev_out, check_names) From 995f87fa800218557fe883e52797aa8cc93698ff Mon Sep 17 00:00:00 2001 From: Alex Pickering Date: Tue, 28 Sep 2021 15:51:26 -0700 Subject: [PATCH 14/17] fix seurat_keys creation --- pipeline-runner/R/gem2s-7-upload_to_aws.R | 2 +- pipeline-runner/man/call_read10x.Rd | 2 +- pipeline-runner/man/load_cellranger_files.Rd | 2 +- .../_snaps/gem2s-7-upload_to_aws.new.md | 87 +++++++++++++++++++ .../testthat/test-gem2s-7-upload_to_aws.R | 5 -- 5 files changed, 90 insertions(+), 8 deletions(-) create mode 100644 pipeline-runner/tests/testthat/_snaps/gem2s-7-upload_to_aws.new.md diff --git a/pipeline-runner/R/gem2s-7-upload_to_aws.R b/pipeline-runner/R/gem2s-7-upload_to_aws.R index 620ef8a0..6f68f4a6 100644 --- a/pipeline-runner/R/gem2s-7-upload_to_aws.R +++ b/pipeline-runner/R/gem2s-7-upload_to_aws.R @@ -128,7 +128,7 @@ meta_sets <- function(input, scdata, color_pool) { keys <- names(meta) # same values used in construct_metadata - seurat_keys <- make.names(c('samples', keys), unique = TRUE) + seurat_keys <- make.names(c('samples', keys), unique = TRUE)[-1] color_index <- 1 for (i in seq_along(keys)) { diff --git a/pipeline-runner/man/call_read10x.Rd b/pipeline-runner/man/call_read10x.Rd index e0c8abca..735b6ab9 100644 --- a/pipeline-runner/man/call_read10x.Rd +++ b/pipeline-runner/man/call_read10x.Rd @@ -4,7 +4,7 @@ \alias{call_read10x} \title{Calls Read10X} \usage{ -call_read10x(config) +call_read10x(config, input_dir) } \arguments{ \item{config}{experiment settings.} diff --git a/pipeline-runner/man/load_cellranger_files.Rd b/pipeline-runner/man/load_cellranger_files.Rd index d09f0faa..f2989c49 100644 --- a/pipeline-runner/man/load_cellranger_files.Rd +++ b/pipeline-runner/man/load_cellranger_files.Rd @@ -4,7 +4,7 @@ \alias{load_cellranger_files} \title{Read input folder of 10x data} \usage{ -load_cellranger_files(input, pipeline_config, prev_out) +load_cellranger_files(input, pipeline_config, prev_out, input_dir = "/input") } \arguments{ \item{input}{The input object from the request} diff --git a/pipeline-runner/tests/testthat/_snaps/gem2s-7-upload_to_aws.new.md b/pipeline-runner/tests/testthat/_snaps/gem2s-7-upload_to_aws.new.md new file mode 100644 index 00000000..6a5f5364 --- /dev/null +++ b/pipeline-runner/tests/testthat/_snaps/gem2s-7-upload_to_aws.new.md @@ -0,0 +1,87 @@ +# get_cell_sets without metadata matches snapshot + + Code + str(cell_sets) + Output + List of 1 + $ cellSets:List of 2 + ..$ :List of 5 + .. ..$ key : chr "scratchpad" + .. ..$ name : chr "Custom cell sets" + .. ..$ rootNode: logi TRUE + .. ..$ children: list() + .. ..$ type : chr "cellSets" + ..$ :List of 5 + .. ..$ key : chr "sample" + .. ..$ name : chr "Samples" + .. ..$ rootNode: logi TRUE + .. ..$ children:List of 2 + .. .. ..$ :List of 4 + .. .. .. ..$ key : chr "123abc" + .. .. .. ..$ name : chr "WT1" + .. .. .. ..$ color : chr "#e377c2" + .. .. .. ..$ cellIds: int [1:40] 0 1 2 3 4 5 6 7 8 9 ... + .. .. ..$ :List of 4 + .. .. .. ..$ key : chr "123def" + .. .. .. ..$ name : chr "WT2" + .. .. .. ..$ color : chr "#8c564b" + .. .. .. ..$ cellIds: int [1:40] 40 41 42 43 44 45 46 47 48 49 ... + .. ..$ type : chr "metadataCategorical" + +# get_cell_sets with two metadata groups matches snapshot + + Code + str(cell_sets) + Output + List of 1 + $ cellSets:List of 4 + ..$ :List of 5 + .. ..$ key : chr "scratchpad" + .. ..$ name : chr "Custom cell sets" + .. ..$ rootNode: logi TRUE + .. ..$ children: list() + .. ..$ type : chr "cellSets" + ..$ :List of 5 + .. ..$ key : chr "sample" + .. ..$ name : chr "Samples" + .. ..$ rootNode: logi TRUE + .. ..$ children:List of 2 + .. .. ..$ :List of 4 + .. .. .. ..$ key : chr "123abc" + .. .. .. ..$ name : chr "WT1" + .. .. .. ..$ color : chr "#e377c2" + .. .. .. ..$ cellIds: int [1:40] 0 1 2 3 4 5 6 7 8 9 ... + .. .. ..$ :List of 4 + .. .. .. ..$ key : chr "123def" + .. .. .. ..$ name : chr "WT2" + .. .. .. ..$ color : chr "#8c564b" + .. .. .. ..$ cellIds: int [1:40] 40 41 42 43 44 45 46 47 48 49 ... + .. ..$ type : chr "metadataCategorical" + ..$ :List of 5 + .. ..$ key : chr "Group1" + .. ..$ name : chr "Group1" + .. ..$ rootNode: logi TRUE + .. ..$ children:List of 2 + .. .. ..$ :List of 4 + .. .. .. ..$ key : chr "Group1-Hello" + .. .. .. ..$ name : chr "Hello" + .. .. .. ..$ color : chr "#d62728" + .. .. .. ..$ cellIds: int(0) + .. .. ..$ :List of 4 + .. .. .. ..$ key : chr "Group1-WT2" + .. .. .. ..$ name : chr "WT2" + .. .. .. ..$ color : chr "#2ca02c" + .. .. .. ..$ cellIds: int(0) + .. ..$ type : chr "metadataCategorical" + ..$ :List of 5 + .. ..$ key : chr "Group2" + .. ..$ name : chr "Group2" + .. ..$ rootNode: logi TRUE + .. ..$ children:List of 1 + .. .. ..$ :List of 4 + .. .. .. ..$ key : chr "Group2-WT" + .. .. .. ..$ name : chr "WT" + .. .. .. ..$ color : chr "#ff7f0e" + .. .. .. ..$ cellIds: int(0) + .. ..$ type : chr "metadataCategorical" + diff --git a/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R b/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R index 20c27709..3a6e974f 100644 --- a/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R +++ b/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R @@ -34,11 +34,6 @@ mock_scdata <- function(config) { scdata@meta.data[names(rest)] <- rest[scdata$samples, ] } - lookups <- make.names(keys) - names(lookups) <- keys - - scdata@misc$metadata_lookups <- lookups - return(scdata) } From 2e65b41f20b7ab0be95268e3c2c35a8e093b4879 Mon Sep 17 00:00:00 2001 From: Alex Pickering Date: Tue, 28 Sep 2021 16:08:36 -0700 Subject: [PATCH 15/17] test for syntactic validity in upload_to_aws --- .../_snaps/gem2s-7-upload_to_aws.new.md | 87 ------------------- .../testthat/test-gem2s-7-upload_to_aws.R | 41 +++++++-- 2 files changed, 33 insertions(+), 95 deletions(-) delete mode 100644 pipeline-runner/tests/testthat/_snaps/gem2s-7-upload_to_aws.new.md diff --git a/pipeline-runner/tests/testthat/_snaps/gem2s-7-upload_to_aws.new.md b/pipeline-runner/tests/testthat/_snaps/gem2s-7-upload_to_aws.new.md deleted file mode 100644 index 6a5f5364..00000000 --- a/pipeline-runner/tests/testthat/_snaps/gem2s-7-upload_to_aws.new.md +++ /dev/null @@ -1,87 +0,0 @@ -# get_cell_sets without metadata matches snapshot - - Code - str(cell_sets) - Output - List of 1 - $ cellSets:List of 2 - ..$ :List of 5 - .. ..$ key : chr "scratchpad" - .. ..$ name : chr "Custom cell sets" - .. ..$ rootNode: logi TRUE - .. ..$ children: list() - .. ..$ type : chr "cellSets" - ..$ :List of 5 - .. ..$ key : chr "sample" - .. ..$ name : chr "Samples" - .. ..$ rootNode: logi TRUE - .. ..$ children:List of 2 - .. .. ..$ :List of 4 - .. .. .. ..$ key : chr "123abc" - .. .. .. ..$ name : chr "WT1" - .. .. .. ..$ color : chr "#e377c2" - .. .. .. ..$ cellIds: int [1:40] 0 1 2 3 4 5 6 7 8 9 ... - .. .. ..$ :List of 4 - .. .. .. ..$ key : chr "123def" - .. .. .. ..$ name : chr "WT2" - .. .. .. ..$ color : chr "#8c564b" - .. .. .. ..$ cellIds: int [1:40] 40 41 42 43 44 45 46 47 48 49 ... - .. ..$ type : chr "metadataCategorical" - -# get_cell_sets with two metadata groups matches snapshot - - Code - str(cell_sets) - Output - List of 1 - $ cellSets:List of 4 - ..$ :List of 5 - .. ..$ key : chr "scratchpad" - .. ..$ name : chr "Custom cell sets" - .. ..$ rootNode: logi TRUE - .. ..$ children: list() - .. ..$ type : chr "cellSets" - ..$ :List of 5 - .. ..$ key : chr "sample" - .. ..$ name : chr "Samples" - .. ..$ rootNode: logi TRUE - .. ..$ children:List of 2 - .. .. ..$ :List of 4 - .. .. .. ..$ key : chr "123abc" - .. .. .. ..$ name : chr "WT1" - .. .. .. ..$ color : chr "#e377c2" - .. .. .. ..$ cellIds: int [1:40] 0 1 2 3 4 5 6 7 8 9 ... - .. .. ..$ :List of 4 - .. .. .. ..$ key : chr "123def" - .. .. .. ..$ name : chr "WT2" - .. .. .. ..$ color : chr "#8c564b" - .. .. .. ..$ cellIds: int [1:40] 40 41 42 43 44 45 46 47 48 49 ... - .. ..$ type : chr "metadataCategorical" - ..$ :List of 5 - .. ..$ key : chr "Group1" - .. ..$ name : chr "Group1" - .. ..$ rootNode: logi TRUE - .. ..$ children:List of 2 - .. .. ..$ :List of 4 - .. .. .. ..$ key : chr "Group1-Hello" - .. .. .. ..$ name : chr "Hello" - .. .. .. ..$ color : chr "#d62728" - .. .. .. ..$ cellIds: int(0) - .. .. ..$ :List of 4 - .. .. .. ..$ key : chr "Group1-WT2" - .. .. .. ..$ name : chr "WT2" - .. .. .. ..$ color : chr "#2ca02c" - .. .. .. ..$ cellIds: int(0) - .. ..$ type : chr "metadataCategorical" - ..$ :List of 5 - .. ..$ key : chr "Group2" - .. ..$ name : chr "Group2" - .. ..$ rootNode: logi TRUE - .. ..$ children:List of 1 - .. .. ..$ :List of 4 - .. .. .. ..$ key : chr "Group2-WT" - .. .. .. ..$ name : chr "WT" - .. .. .. ..$ color : chr "#ff7f0e" - .. .. .. ..$ cellIds: int(0) - .. ..$ type : chr "metadataCategorical" - diff --git a/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R b/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R index 3a6e974f..62063998 100644 --- a/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R +++ b/pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R @@ -14,26 +14,27 @@ mock_scdata <- function(config) { file = system.file("extdata", "pbmc_raw.txt", package = "Seurat"), as.is = TRUE) - scdata <- Seurat::CreateSeuratObject(counts = pbmc_raw) - - scdata$cells_id <- seq(0, ncol(scdata)-1) - + # construct metadata # add samples samples <- unlist(config$sampleIds) - scdata$samples <- rep(samples, each = 40) - # add metadata rest <- config$metadata keys <- c('samples', names(rest)) - metadata <- data.frame(row.names = colnames(scdata), samples = scdata$samples) + metadata <- data.frame(row.names = colnames(pbmc_raw), samples = rep(samples, each = 40)) # Add "metadata" if exists in config if (!is.null(rest)) { rest <- lapply(rest, unlist) rest <- data.frame(rest, row.names = samples, check.names = FALSE) - scdata@meta.data[names(rest)] <- rest[scdata$samples, ] + metadata[names(rest)] <- rest[metadata$samples, ] } + # make syntactically valid + colnames(metadata) <- make.names(colnames(metadata), unique = TRUE) + + scdata <- Seurat::CreateSeuratObject(counts = pbmc_raw, meta.data = metadata) + + scdata$cells_id <- seq(0, ncol(scdata)-1) return(scdata) } @@ -113,6 +114,30 @@ test_that("get_cell_sets adds a single metadata column", { }) +test_that("get_cell_sets uses user-supplied syntactically invalid metadata column names", { + metadata <- list('TRUE' = list('Hello', 'WT2')) + config <- mock_config(metadata) + scdata <- mock_scdata(config) + + cell_sets <- get_cell_sets(scdata, config) + + # have TRUE as a key + keys <- sapply(cell_sets$cellSets, `[[`, 'key') + expect_true('TRUE' %in% keys) + + group_set <- cell_sets$cellSets[[which(keys == 'TRUE')]] + group_names <- sapply(group_set$children, `[[`, 'name') + + # cell ids are correct for each child + for (group_name in group_names) { + group_cells <- group_set$children[[which(group_names == group_name)]]$cellIds + expected_cells <- unname(scdata$cells_id)[scdata$TRUE. == group_name] + + expect_equal(group_cells, expected_cells) + } +}) + + test_that("get_cell_sets adds two metadata columns", { metadata <- list(Group1 = list('Hello', 'WT2'), Group2 = list('WT', 'WT')) From 994ff2825e29d64f6ec27218d0f7d19c6054a486 Mon Sep 17 00:00:00 2001 From: Alex Pickering Date: Tue, 28 Sep 2021 16:37:22 -0700 Subject: [PATCH 16/17] add comments --- pipeline-runner/R/gem2s-7-upload_to_aws.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pipeline-runner/R/gem2s-7-upload_to_aws.R b/pipeline-runner/R/gem2s-7-upload_to_aws.R index 6f68f4a6..f718ff32 100644 --- a/pipeline-runner/R/gem2s-7-upload_to_aws.R +++ b/pipeline-runner/R/gem2s-7-upload_to_aws.R @@ -124,10 +124,11 @@ meta_sets <- function(input, scdata, color_pool) { cell_set_list <- c() meta <- lapply(input$metadata, unlist) - # names of metadata tracks + # user-supplied metadata track names keys <- names(meta) - # same values used in construct_metadata + # syntactically valid metadata names as stored in scdata + # same names as used in construct_metadata including internal 'samples' column (dropped) seurat_keys <- make.names(c('samples', keys), unique = TRUE)[-1] color_index <- 1 From bbf32f83cfc108a3a790b8b4dc629b92c4a851fc Mon Sep 17 00:00:00 2001 From: Alex Pickering Date: Tue, 28 Sep 2021 16:56:10 -0700 Subject: [PATCH 17/17] add test that harmony works --- .../testthat/test-qc-6-integrate_scdata.R | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 pipeline-runner/tests/testthat/test-qc-6-integrate_scdata.R diff --git a/pipeline-runner/tests/testthat/test-qc-6-integrate_scdata.R b/pipeline-runner/tests/testthat/test-qc-6-integrate_scdata.R new file mode 100644 index 00000000..33f32584 --- /dev/null +++ b/pipeline-runner/tests/testthat/test-qc-6-integrate_scdata.R @@ -0,0 +1,21 @@ +mock_scdata <- function() { + pbmc_raw <- read.table( + file = system.file("extdata", "pbmc_raw.txt", package = "Seurat"), + as.is = TRUE) + + scdata <- Seurat::CreateSeuratObject(counts = pbmc_raw) + + # add samples + scdata$samples <- rep(c('123abc', '123def'), each = 40) + return(scdata) +} + + +test_that("harmony integration works", { + scdata <- mock_scdata() + config <- list(dimensionalityReduction = list(numPCs = 2), + dataIntegration = list(method = 'harmony', methodSettings = list(harmony = list(numGenes = 10, normalisation = 'logNormalize')))) + + scdata <- run_dataIntegration(scdata, config) + expect_s4_class(scdata, 'Seurat') +})