diff --git a/pipeline-runner/R/seurat-3-upload_seurat_to_aws.R b/pipeline-runner/R/seurat-3-upload_seurat_to_aws.R index 9985f105..1e4c14d7 100644 --- a/pipeline-runner/R/seurat-3-upload_seurat_to_aws.R +++ b/pipeline-runner/R/seurat-3-upload_seurat_to_aws.R @@ -78,17 +78,18 @@ upload_seurat_to_aws <- function(input, pipeline_config, prev_out) { } find_cluster_columns <- function(scdata) { + meta <- scdata@meta.data # exclude all group columns, including duplicates - group_cols <- find_group_columns(scdata@meta.data, remove.dups = FALSE) - exclude_cols <- c(group_cols, 'samples') + group_cols <- find_group_columns(meta, remove.dups = FALSE) + group_cols <- c(group_cols, 'samples') + scdblfinder_cols <- grep('^scDblFinder', colnames(meta), value = TRUE) # order meta to indicate preference for louvain clusters - meta <- scdata@meta.data louvain_cols <- c('louvain', 'active.ident', 'seurat_clusters') meta <- meta |> dplyr::relocate(dplyr::any_of(louvain_cols)) - check_cols <- setdiff(colnames(meta), exclude_cols) + check_cols <- setdiff(colnames(meta), c(scdblfinder_cols, group_cols)) cluster_cols <- c() for (check_col in check_cols) { @@ -111,9 +112,9 @@ find_cluster_columns <- function(scdata) { # skip if col is same as samples or group column is_sample_col <- FALSE - for (exclude_col in exclude_cols) { - exclude_vals <- meta[[exclude_col]] - if (test_groups_equal(check_vals, exclude_vals)) { + for (group_col in group_cols) { + group_vals <- meta[[group_col]] + if (test_groups_equal(check_vals, group_vals)) { is_sample_col <- TRUE break } diff --git a/pipeline-runner/tests/testthat/test-seurat-3-upload_seurat_to_aws.R b/pipeline-runner/tests/testthat/test-seurat-3-upload_seurat_to_aws.R index e8fcef7e..c88ead69 100644 --- a/pipeline-runner/tests/testthat/test-seurat-3-upload_seurat_to_aws.R +++ b/pipeline-runner/tests/testthat/test-seurat-3-upload_seurat_to_aws.R @@ -285,3 +285,19 @@ test_that("find_cluster_columns puts 'louvain' column first if exists", { cluster_cols <- find_cluster_columns(scdata) expect_equal(cluster_cols[1], 'louvain') }) + + +test_that("find_cluster_columns omits columns that start with scDblFinder", { + + expected_cols <- c('RNA_snn_res.0.8', 'letter.idents', 'groups', 'RNA_snn_res.1') + + scdata <- mock_scdata() + sample_names <- c('A', 'B', 'C', 'D') + samples <- rep(sample_names, each = ncol(scdata)/4) + scdata$samples <- samples + + scdata$scDblFinder.cluster <- scdata$RNA_snn_res.0.8 + + cluster_cols <- find_cluster_columns(scdata) + expect_setequal(cluster_cols, expected_cols) +})