Skip to content

Commit

Permalink
Merge pull request #126 from biomage-org/2399-low-pc-clustering
Browse files Browse the repository at this point in the history
[BIOMAGE-2399] - lower clustering dimensions for low dimensional PCAs
  • Loading branch information
gerbeldo authored Feb 15, 2023
2 parents 46e82e1 + 24898dc commit 44db180
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 2 deletions.
18 changes: 16 additions & 2 deletions pipeline-runner/R/qc-helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,16 @@ getClusters <- function(clustering_method, resolution, data) {
} else {
graph_name <- paste0(Seurat::DefaultAssay(data), "_snn")
if (!graph_name %in% names(data)) {
data <- Seurat::FindNeighbors(data, k.param = 20, annoy.metric = "cosine", verbose = FALSE, reduction = active.reduction)
# number of dimensions used must be lte to available dimensions
dims <- 1:min(10, length(data@reductions[[active.reduction]]))
data <-
Seurat::FindNeighbors(
data,
annoy.metric = "cosine",
reduction = active.reduction,
dims = dims,
verbose = FALSE,
)
}
data <- Seurat::FindClusters(data, resolution = resolution, verbose = FALSE, algorithm = algorithm, random.seed = RANDOM_SEED)
}
Expand All @@ -216,7 +225,12 @@ getSNNiGraph <- function(data, active.reduction) {

# if doesn't exist, run SNN
if (!snn_name %in% names(data)) {
data <- Seurat::FindNeighbors(data, reduction = active.reduction)
dims <- 1:min(10, length(data@reductions[[active.reduction]]))
data <-
Seurat::FindNeighbors(data,
reduction = active.reduction,
dims = dims,
verbose = FALSE)
}

# convert Seurat Graph object to igraph
Expand Down
39 changes: 39 additions & 0 deletions pipeline-runner/tests/testthat/test-qc-7-embed_and_cluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -266,3 +266,42 @@ test_that("embed_and_cluster works", {
withr::defer(unlink(cell_sets_bucket, recursive = TRUE))

})


test_that("runClusters does not crash with less than 10 dimensions available", {
algos <- c("louvain", "leiden")
scdata <- mock_scdata()
expected_keys <- c("cluster", "cell_ids")
resolution <- 0.8

# remove all pre-existing reductions and calculate low-PC PCA
scdata <- Seurat::DietSeurat(scdata, scale.data = T)
scdata <- suppressWarnings(Seurat::RunPCA(scdata, assay = "RNA", npcs = 2, verbose = F))

for (algo in algos) {
res <- runClusters(algo, resolution, scdata)
expect_equal(names(res), expected_keys)
}
})


test_that("getClusters uses the default value of 10 if there are enough PCs available",{
algos <- c("louvain", "leiden")
scdata <- mock_scdata()
resolution <- 0.8

# remove all pre-existing reductions and calculate low-PC PCA
scdata <- Seurat::DietSeurat(scdata, scale.data = T)
scdata@commands <- list()
scdata <- suppressWarnings(Seurat::RunPCA(scdata, assay = "RNA", npcs = 20, verbose = F))

for (algo in algos) {
clustered_scdata <- getClusters(algo, resolution, scdata)
if (algo == "louvain") expect_equal(clustered_scdata@commands$FindNeighbors.RNA.pca$dims, 1:10)
# difficult to test in leiden, so test internal state as proxy
if (algo == "leiden") expect_true("seurat_clusters" %in% names(clustered_scdata@meta.data))
}
})



0 comments on commit 44db180

Please sign in to comment.