From 7b80c1352aa04a15f94a9070b4a37845c3603522 Mon Sep 17 00:00:00 2001 From: cosa65 Date: Thu, 16 Nov 2023 12:35:00 -0300 Subject: [PATCH 1/3] Skip creating CLM cell sets that are empty Signed-off-by: cosa65 --- pipeline-runner/R/qc-7-embed_and_cluster.R | 23 +++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/pipeline-runner/R/qc-7-embed_and_cluster.R b/pipeline-runner/R/qc-7-embed_and_cluster.R index e72fd568..74305dbd 100644 --- a/pipeline-runner/R/qc-7-embed_and_cluster.R +++ b/pipeline-runner/R/qc-7-embed_and_cluster.R @@ -388,19 +388,28 @@ make_cl_metadata_cellclass <- function(variable, type, cl_metadata, color_pool) cell_ids <- cl_metadata[get(variable) == values[i] & cl_metadata$duplicate_barcode == "no", cells_id] + # Empty cell sets shouldn't be created based on cell level metadata + if (length(cell_ids) == 0) next + # do not add duplicate barcodes, except for the duplicate_barcode cellset if (variable == "duplicate_barcode") { cell_ids <- cl_metadata[get(variable) == values[i], cells_id] } - cl_metadata_cellset$children[[i]] <- list( - key = uuid::UUIDgenerate(), - name = as.character(values[i]), - rootNode = FALSE, - type = type, - color = color_pool[1], - cellIds = ensure_is_list_in_json(cell_ids) + cl_metadata_cellset$children <- append( + cl_metadata_cellset$children, + list( + list( + key = uuid::UUIDgenerate(), + name = as.character(values[i]), + rootNode = FALSE, + type = type, + color = color_pool[1], + cellIds = ensure_is_list_in_json(cell_ids) + ) + ) ) + color_pool <- color_pool[-1] } From f0eb71085105326230103292d3070741c47f7b13 Mon Sep 17 00:00:00 2001 From: cosa65 Date: Thu, 16 Nov 2023 13:34:53 -0300 Subject: [PATCH 2/3] Add test Signed-off-by: cosa65 --- .../testthat/test-qc-7-embed_and_cluster.R | 34 +++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/pipeline-runner/tests/testthat/test-qc-7-embed_and_cluster.R b/pipeline-runner/tests/testthat/test-qc-7-embed_and_cluster.R index b95a6fb3..94bcd8a3 100644 --- a/pipeline-runner/tests/testthat/test-qc-7-embed_and_cluster.R +++ b/pipeline-runner/tests/testthat/test-qc-7-embed_and_cluster.R @@ -30,8 +30,9 @@ mock_color_pool <- function(n) { } -mock_cl_metadata <- function(scdata) { - barcode <- rownames(scdata@meta.data) +mock_cl_metadata <- function(scdata, optional_custom_barcodes) { + barcode <- if (missing(optional_custom_barcodes)) rownames(scdata@meta.data) else optional_custom_barcodes + samples <- rep_len(paste0("sample_", 1:4), length(barcode)) cell_type <- rep_len(paste0("cell_type_", 1:10), length(barcode)) group_var <- rep_len(paste0("group_", 1:2), length(barcode)) @@ -607,6 +608,34 @@ test_that("make_cl_metadata_cellsets makes cell-level metadata cellsets.", { } }) +test_that("make_cl_metadata_cellsets skips making cellsets that are empty (no cellIds).", { + config <- mock_config() + scdata <- mock_scdata() + cl_metadata <- mock_cl_metadata( + scdata, + c(rownames(scdata@meta.data), paste0(barcode, 1:50)) + ) + + local_mock_cl_metadata_table(cl_metadata, "mock_experiment_id") + + res <- stubbed_make_cl_metadata_cellsets(scdata, config) + withr::defer(unlink(file.path(".", basename(config$metadata_s3_path)))) + + expect_equal(length(res), 4) + expect_equal(length(res[[1]]$children), length(unique(cl_metadata$cell_type))) + expect_equal(length(res[[2]]$children), 1) + expect_equal(length(res[[3]]$children), length(unique(cl_metadata$group_var))) + expect_equal(length(res[[4]]$children), length(unique(cl_metadata$redundant_group_var))) + + cell_class_names <- c("key", "name", "rootNode", "type", "children") + purrr::walk(res, expect_named, cell_class_names) + + # cellsets have the same keys as cell classes except children, color and cellIds + for (i in seq_along(res)) { + purrr::walk(res[[i]]$children, expect_named, c(cell_class_names[-5], "color", "cellIds")) + } +}) + with_fake_http( test_that("replace_cl_metadata_through_api sends patch request", { @@ -635,6 +664,7 @@ test_that("make_cl_metadata_table works with duplicate barcodes", { expect_equal(length(res), 4) expect_equal(length(res[[1]]$children), length(unique(cl_metadata$cell_type))) + expect_equal(length(res[[2]]$children), 2) expect_equal(length(res[[3]]$children), length(unique(cl_metadata$group_var))) expect_equal(length(res[[4]]$children), length(unique(cl_metadata$redundant_group_var))) From f1631dbac30678f193e6e31c82fd50913f313f94 Mon Sep 17 00:00:00 2001 From: cosa65 Date: Thu, 16 Nov 2023 15:13:41 -0300 Subject: [PATCH 3/3] Add test Signed-off-by: cosa65 --- pipeline-runner/R/qc-7-embed_and_cluster.R | 6 +++--- .../testthat/test-qc-7-embed_and_cluster.R | 18 +++--------------- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/pipeline-runner/R/qc-7-embed_and_cluster.R b/pipeline-runner/R/qc-7-embed_and_cluster.R index 74305dbd..8eaaf020 100644 --- a/pipeline-runner/R/qc-7-embed_and_cluster.R +++ b/pipeline-runner/R/qc-7-embed_and_cluster.R @@ -388,14 +388,14 @@ make_cl_metadata_cellclass <- function(variable, type, cl_metadata, color_pool) cell_ids <- cl_metadata[get(variable) == values[i] & cl_metadata$duplicate_barcode == "no", cells_id] - # Empty cell sets shouldn't be created based on cell level metadata - if (length(cell_ids) == 0) next - # do not add duplicate barcodes, except for the duplicate_barcode cellset if (variable == "duplicate_barcode") { cell_ids <- cl_metadata[get(variable) == values[i], cells_id] } + # Empty cell sets shouldn't be created based on cell level metadata + if (length(cell_ids) == 0) next + cl_metadata_cellset$children <- append( cl_metadata_cellset$children, list( diff --git a/pipeline-runner/tests/testthat/test-qc-7-embed_and_cluster.R b/pipeline-runner/tests/testthat/test-qc-7-embed_and_cluster.R index 94bcd8a3..12ad078b 100644 --- a/pipeline-runner/tests/testthat/test-qc-7-embed_and_cluster.R +++ b/pipeline-runner/tests/testthat/test-qc-7-embed_and_cluster.R @@ -613,7 +613,7 @@ test_that("make_cl_metadata_cellsets skips making cellsets that are empty (no ce scdata <- mock_scdata() cl_metadata <- mock_cl_metadata( scdata, - c(rownames(scdata@meta.data), paste0(barcode, 1:50)) + c(paste0("missing-barcode", 1:50)) ) local_mock_cl_metadata_table(cl_metadata, "mock_experiment_id") @@ -621,19 +621,8 @@ test_that("make_cl_metadata_cellsets skips making cellsets that are empty (no ce res <- stubbed_make_cl_metadata_cellsets(scdata, config) withr::defer(unlink(file.path(".", basename(config$metadata_s3_path)))) - expect_equal(length(res), 4) - expect_equal(length(res[[1]]$children), length(unique(cl_metadata$cell_type))) - expect_equal(length(res[[2]]$children), 1) - expect_equal(length(res[[3]]$children), length(unique(cl_metadata$group_var))) - expect_equal(length(res[[4]]$children), length(unique(cl_metadata$redundant_group_var))) - - cell_class_names <- c("key", "name", "rootNode", "type", "children") - purrr::walk(res, expect_named, cell_class_names) - - # cellsets have the same keys as cell classes except children, color and cellIds - for (i in seq_along(res)) { - purrr::walk(res[[i]]$children, expect_named, c(cell_class_names[-5], "color", "cellIds")) - } + expect_equal(length(res[[1]]$children), 0) + expect_equal(length(res[[2]]$children), 0) }) @@ -664,7 +653,6 @@ test_that("make_cl_metadata_table works with duplicate barcodes", { expect_equal(length(res), 4) expect_equal(length(res[[1]]$children), length(unique(cl_metadata$cell_type))) - expect_equal(length(res[[2]]$children), 2) expect_equal(length(res[[3]]$children), length(unique(cl_metadata$group_var))) expect_equal(length(res[[4]]$children), length(unique(cl_metadata$redundant_group_var)))