Skip to content

Commit

Permalink
Merge pull request hms-dbmi-cellenics#365 from hms-dbmi-cellenics/unf…
Browse files Browse the repository at this point in the history
…iltered-seurat-integration2

Filter out cells in SeuratV4 integration
  • Loading branch information
gerbeldo authored Mar 26, 2024
2 parents 52f1a3f + 3ae4ef3 commit db832e6
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 10 deletions.
30 changes: 23 additions & 7 deletions pipeline-runner/R/qc-6-integrate_scdata-seuratv4.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#'
#' @param scdata_list list of SeuratObjects
#' @param config list of configuration parameters
#' @param cells_id list of cells ids to keep
#'
#' @return normalized and integrated Seurat object
#' @export
Expand All @@ -22,7 +23,6 @@ run_seuratv4 <- function(scdata_list, config, cells_id) {
}

reduction <- config$dimensionalityReduction$method
exclude_groups <- config$dimensionalityReduction$excludeGeneCategories

use_geosketch <- "downsampling" %in% names(config) && config$downsampling$method == "geosketch"

Expand All @@ -31,18 +31,13 @@ run_seuratv4 <- function(scdata_list, config, cells_id) {
# use the min of what the user wants and what can be calculated
npcs <- min(config$dimensionalityReduction$numPCs, npcs_for_pca)

scdata_list <- order_by_size(scdata_list)
scdata_list <- prepare_scdata_list_for_seurat_integration(scdata_list, config, cells_id)

# normalize single samples
for (i in 1:length(scdata_list)) {
# we need RNA assay to compute the integrated matrix
Seurat::DefaultAssay(scdata_list[[i]]) <- "RNA"

# remove cell cycle genes if needed
if (length(exclude_groups) > 0) {
scdata_list[[i]] <- remove_genes(scdata_list[[i]], exclude_groups)
}

if (normalization == "LogNormalize") {
scdata_list[[i]] <- scdata_list[[i]] |>
Seurat::NormalizeData(assay = "RNA", normalization.method = normalization, verbose = FALSE) |>
Expand Down Expand Up @@ -86,6 +81,27 @@ run_seuratv4 <- function(scdata_list, config, cells_id) {
}


#' prepare scdata list for seurat integration
#'
#' preprocess the scdata list before integration
#'
#' @inheritParams run_seuratv4
#' @return scdata list
#' @export
#'
prepare_scdata_list_for_seurat_integration <- function(scdata_list, config, cells_id) {
exclude_groups <- config$dimensionalityReduction$excludeGeneCategories
scdata_list <- order_by_size(scdata_list)
scdata_list <- remove_filtered_cells(scdata_list, cells_id)

# remove cell cycle genes if needed
if (length(exclude_groups) > 0) {
scdata_list <- lapply(scdata_list, remove_genes, exclude_groups)
}

return(scdata_list)
}


#' Find and integrate anchors
#'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -317,18 +317,43 @@ test_that("default assay in the integrated object matches normalisation method a
# mock a bigger dataset to run Seurat v4 integration without skipping it
c(scdata_list, sample_1_id, sample_2_id) %<-% suppressWarnings(mock_scdata(n_rep = 3))
cells_id <- list("123abc" = scdata_list$`123abc`$cells_id, "123def" = scdata_list$`123def`$cells_id)

normalisation_methods <- c("logNormalize", "SCT")

for (normalisation_method in normalisation_methods) {
config <- list(
dimensionalityReduction = list(numPCs = 10, method = "rpca"),
dataIntegration = list(method = "seuratv4", methodSettings = list(seuratv4 = list(numGenes = 10, normalisation = normalisation_method))),
downsampling = list(method = "geosketch", methodSettings = list(geosketch = list(percentageToKeep = 50))))

integrated_scdata <- suppressWarnings(integrate_scdata(scdata_list, config, "", cells_id, task_name = "dataIntegration")$data)
expect_s4_class(integrated_scdata, "Seurat")
expected_assay <- if (normalisation_method == "logNormalize") "RNA" else "SCT"
expect_equal(Seurat::DefaultAssay(integrated_scdata), expected_assay)
}
})

test_that("prepare_scdata_list_for_seurat_integration keeps cells_id cells only", {
c(scdata_list, sample_1_id, sample_2_id) %<-% mock_scdata()

cells_id <- mock_ids()

config <- list(
dimensionalityReduction = list(numPCs = 2, method = "rpca"),
dataIntegration = list(
method = "seuratv4",
methodSettings = list(seuratv4 = list(
numGenes = 1000, normalisation = "logNormalize"
))
)
)

# filter out some cells
cells_id[[sample_1_id]] <- cells_id[[sample_1_id]][1:10]
cells_id[[sample_2_id]] <- cells_id[[sample_2_id]][1:10]

scdata_list <- prepare_scdata_list_for_seurat_integration(scdata_list, config, cells_id)

expect_equal(ncol(scdata_list[[sample_1_id]]), length(cells_id[[sample_1_id]]))
expect_equal(ncol(scdata_list[[sample_2_id]]), length(cells_id[[sample_2_id]]))
})

0 comments on commit db832e6

Please sign in to comment.