diff --git a/pipeline-runner/R/qc-7-embed_and_cluster.R b/pipeline-runner/R/qc-7-embed_and_cluster.R index 6c6c336f..7eb483c3 100644 --- a/pipeline-runner/R/qc-7-embed_and_cluster.R +++ b/pipeline-runner/R/qc-7-embed_and_cluster.R @@ -88,7 +88,7 @@ format_cell_sets_object <- function( type = "cellSets", children = list() ) - for (cluster in sort(unique(cell_sets$cluster))) { + for (cluster in sort_cluster_names(unique(cell_sets$cluster))) { cells <- cell_sets[cell_sets$cluster == cluster, "cell_ids"] is.num <- !is.na(as.numeric(cluster)) set_name <- ifelse(is.num, paste("Cluster", cluster), cluster) @@ -134,3 +134,23 @@ update_sets_through_api <- "Authorization" = auth_JWT) ) } + + +#' Sort cluster names +#' +#' Sorts cluster names naturally, i.e. Cluster 1, Cluster 2, Cluster 10 +#' +#' @param strings cluster names +#' +#' @return sorted vector +#' @export +#' +sort_cluster_names <- function(strings) { + # extract letters and digits + char <- gsub("\\d", "", strings) + nums <- gsub("\\D", "", strings) + + sorted_indices <- order(char, as.integer(nums)) + + return(strings[sorted_indices]) +} diff --git a/pipeline-runner/tests/testthat/test-qc-7-embed_and_cluster.R b/pipeline-runner/tests/testthat/test-qc-7-embed_and_cluster.R index 6487346e..acf96715 100644 --- a/pipeline-runner/tests/testthat/test-qc-7-embed_and_cluster.R +++ b/pipeline-runner/tests/testthat/test-qc-7-embed_and_cluster.R @@ -228,18 +228,38 @@ test_that("format_cell_sets_object result has correct number of clusters",{ test_that("format_cell_sets_object returns empty children on empty cellset", { n_clusters <- 5 - cell_sets <- mock_cellset_object(0, n_clusters) + cell_sets <- mock_cellset_object(50, n_clusters) color_pool <- mock_color_pool(n_clusters) algos <- c("louvain", "leiden") for (algo in algos) { res <- format_cell_sets_object(cell_sets, algo, color_pool) - expect_equal(length(res$children), 0) + expect_equal(length(res$children), 5) } }) +test_that("format_cell_sets_object orders clusters lexicographically", { + n_clusters <- 5 + cell_sets <- mock_cellset_object(50, n_clusters) + troublesome_clusters <- c(11, 12, 21, 45) + more_cell_sets <- data.frame(cluster = troublesome_clusters, cell_ids = c(101, 102, 103, 104)) + cell_sets <- rbind(cell_sets, more_cell_sets) + + color_pool <- mock_color_pool(n_clusters) + algos <- c("louvain", "leiden") + + expected_order <- + list(louvain = paste0("louvain-", c(1:5, troublesome_clusters)), + leiden = paste0("leiden-", c(1:5, troublesome_clusters))) + + for (algo in algos) { + res <- format_cell_sets_object(cell_sets, algo, color_pool) + expect_equal(unlist(lapply(res$children, `[[`, "key")), expected_order[[algo]]) + } +}) + test_that("embed_and_cluster works", { scdata <- mock_scdata()