Skip to content

Commit

Permalink
Merge pull request #357 from biomage-org/fix-sct-geosketch-biomage
Browse files Browse the repository at this point in the history
Fix Seurat v4 integration issue with SCTransform and geosketch
  • Loading branch information
saracastel authored Feb 5, 2024
2 parents dddacd0 + 0161faf commit f54a56a
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 3 deletions.
7 changes: 6 additions & 1 deletion pipeline-runner/R/qc-6-integrate_scdata-seuratv4.R
Original file line number Diff line number Diff line change
Expand Up @@ -231,8 +231,13 @@ seuratv4_geosketch_find_and_integrate_anchors <-
# merge
scdata <- create_scdata(scdata_list, cells_id, merge_data = TRUE)
# geosketch needs PCA to be run
if (Seurat::DefaultAssay(scdata) == "SCT") {
scdata_features <- Seurat::SelectIntegrationFeatures(object.list = scdata_list, nfeatures = 2000)
Seurat::VariableFeatures(scdata[["SCT"]]) <- scdata_features
} else {
scdata <- Seurat::FindVariableFeatures(scdata, assay = "RNA", nfeatures = 2000, verbose = FALSE)
}
scdata <- scdata |>
Seurat::FindVariableFeatures(assay = "RNA", nfeatures = 2000, verbose = FALSE) |>
Seurat::ScaleData(verbose = FALSE) |>
Seurat::RunPCA(npcs = npcs, verbose = FALSE)

Expand Down
2 changes: 1 addition & 1 deletion pipeline-runner/R/qc-6-subsample-geosketch.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ run_geosketch <- function(scdata, dims, perc_num_cells, reduction = "pca") {
embeddings <- scdata@reductions[[reduction]]@cell.embeddings[, 1:dims]
index <- unlist(geosketch$gs(embeddings, as.integer(num_cells), one_indexed = TRUE))
sketch <- scdata[, index]
Seurat::DefaultAssay(sketch) <- "RNA"

sketch@misc[["active.reduction"]] <- reduction

return(list(scdata = scdata, sketch = sketch))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,6 @@ test_that("create_scdata merge the data slots when merge_data is TRUE", {


test_that("misc slot is complete after Seurat V4 integration", {

# mock a bigger dataset to run Seurat v4 integration without skipping it
c(scdata_list, sample_1_id, sample_2_id) %<-% suppressWarnings(mock_scdata(n_rep = 3))
cells_id <- list("123abc" = scdata_list$`123abc`$cells_id, "123def" = scdata_list$`123def`$cells_id)
Expand Down Expand Up @@ -312,3 +311,24 @@ test_that("misc slot is complete after Seurat V4 integration with geosketch", {

expect_setequal(names(integrated_scdata@misc), expected_misc_names)
})


test_that("default assay in the integrated object matches normalisation method after Seurat V4 integration with geosketch", {
# mock a bigger dataset to run Seurat v4 integration without skipping it
c(scdata_list, sample_1_id, sample_2_id) %<-% suppressWarnings(mock_scdata(n_rep = 3))
cells_id <- list("123abc" = scdata_list$`123abc`$cells_id, "123def" = scdata_list$`123def`$cells_id)

normalisation_methods <- c("logNormalize", "SCT")

for (normalisation_method in normalisation_methods) {
config <- list(
dimensionalityReduction = list(numPCs = 10, method = "rpca"),
dataIntegration = list(method = "seuratv4", methodSettings = list(seuratv4 = list(numGenes = 10, normalisation = normalisation_method))),
downsampling = list(method = "geosketch", methodSettings = list(geosketch = list(percentageToKeep = 50))))

integrated_scdata <- suppressWarnings(integrate_scdata(scdata_list, config, "", cells_id, task_name = "dataIntegration")$data)
expect_s4_class(integrated_scdata, "Seurat")
expected_assay <- if (normalisation_method == "logNormalize") "RNA" else "SCT"
expect_equal(Seurat::DefaultAssay(integrated_scdata), expected_assay)
}
})

0 comments on commit f54a56a

Please sign in to comment.