Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

iLISI function #95

Merged
merged 13 commits into from
Aug 17, 2022
21 changes: 19 additions & 2 deletions renv.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1909,6 +1909,23 @@
"Hash": "a0292a1cd346ecb09c6e7307470e87aa",
"Requirements": []
},
"lisi": {
"Package": "lisi",
"Version": "1.0",
"Source": "GitHub",
"RemoteType": "github",
"RemoteHost": "api.github.com",
"RemoteUsername": "immunogenomics",
"RemoteRepo": "lisi",
"RemoteRef": "master",
"RemoteSha": "a917556310d8d2c66833dcc35aa3d0f4d1b6e0f4",
"Hash": "dfa658397f8f8728fa60de3641247992",
"Requirements": [
"RANN",
"Rcpp",
"RcppArmadillo"
]
},
"listenv": {
"Package": "listenv",
"Version": "0.8.0",
Expand Down Expand Up @@ -2574,14 +2591,14 @@
"Package": "scpcaTools",
"Version": "0.1.8",
"Source": "GitHub",
"Remotes": "AlexsLemonade/scpcaData",
"RemoteType": "github",
"Remotes": "AlexsLemonade/scpcaData",
"RemoteHost": "api.github.com",
"RemoteRepo": "scpcaTools",
"RemoteUsername": "AlexsLemonade",
"RemoteRef": "HEAD",
"RemoteSha": "a822885e536e31a5dedeb4bacfa52f5aa16ed647",
"Hash": "f159c999586ae1de424b052ee047c9f2",
"Hash": "9a2152da57d055a4169094dda083d3d2",
"Requirements": [
"BiocGenerics",
"DropletUtils",
Expand Down
75 changes: 75 additions & 0 deletions scripts/utils/calculate-iLISI.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
suppressPackageStartupMessages({
library(lisi)
library(magrittr)
library(SingleCellExperiment)
})

# script with `check_integration_method()`
source(
here::here(
"scripts",
"utils",
"integration-helpers.R"
)
)

#' Function to calculate iLISI scores from an integrated SCE object
#'
#' @param integrated_sce The integrated SCE object
#' @param batch_column The variable in `integrated_sce` indicating batches. Default
#' is "batch".
#' @param integration_method The name of the method that was used for integration
#' to create `integrated_sce`. One of: fastMNN, harmony, rpca, cca, scvi, or scanorama
#'
#' @return Tibble with five columns with one row per cell. Columns are `ilisi_score`,
#' `cell_name` (combined barcode and library), `cell_barcode`, `library` and
#' `integration_method`
calculate_ilisi <- function(integrated_sce,
batch_column = "batch",
integration_method = NULL) {

# Check integration method
integration_method <- check_integration_method(integration_method)

# Create data frame with batch information to provide to `compute_lisi()`
batch_df <- data.frame(batch = colData(integrated_sce)[,batch_column])


# Extract the PCs (or similar) to provide to `compute_lisi()`

# If we are using either scanorama or scvi, there will be a different name:
if (integration_method == "scanorama") {
reduced_dim_name <- "scanorama_SVD"
} else if (integration_method == "scvi") {
reduced_dim_name <- "scvi_latent"
} else {
reduced_dim_name <- paste0(integration_method, "_PCA")
}
pcs <- reducedDim(integrated_sce, reduced_dim_name)

# `lisi_result` is a tibble with per-cell scores, the score roughly means:
# "how many different categories are represented in the local neighborhood of the given cell?"
# With 2 batches, then, values close to 2 indicate good integration
lisi_result <- lisi::compute_lisi(pcs,
# define the batches
batch_df,
# which variables in `batch_df` to compute lisi for
batch) %>%
tibble::as_tibble() %>%
# Rename the result column to `ilisi_score`
dplyr::rename(ilisi_score = batch) %>%
# Add in the cell, library ID, and integration method
dplyr::mutate(cell_name = colnames(integrated_sce),
integration_method = integration_method) %>%
# split cell into cell_barcode and library
tidyr::separate(cell_name,
into = c("cell_barcode", "library"),
sep = "-",
# keep the original cell!
remove = FALSE)


# Return the tibble
return(lisi_result)

}
6 changes: 3 additions & 3 deletions scripts/utils/integrate-fastmnn.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,14 @@ integrate_fastMNN <- function(combined_sce,

# Only add this information if all genes were used, meaning dimensions are compatible
if (is.null(gene_list)){
assay(combined_sce, "fastMNN_corrected") <- assay(integrated_sce, "reconstructed")
assay(combined_sce, "fastmnn_corrected") <- assay(integrated_sce, "reconstructed")
}

# Add in the PCs, regardless of the gene list
reducedDim(combined_sce, "fastMNN_PCA") <- reducedDim(integrated_sce, "corrected")
reducedDim(combined_sce, "fastmnn_PCA") <- reducedDim(integrated_sce, "corrected")

# Perform UMAP with the new PCs -----------------
combined_sce <- perform_dim_reduction(combined_sce, prefix = "fastMNN")
combined_sce <- perform_dim_reduction(combined_sce, prefix = "fastmnn")

# Return SCE object with fastMNN information ---------------
return(combined_sce)
Expand Down
25 changes: 25 additions & 0 deletions scripts/utils/integration-helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -326,3 +326,28 @@ remove_uncorrected_expression <- function(sce_object,
}
return(sce_object)
}


#' Checks that a given integration method is acceptable
#'
#' @param integration_method The provided integration method to check
#'
#' @return An all-lowercase version of the given provided_method
check_integration_method <- function(integration_method) {

all_integration_methods <- c("fastMNN", "harmony", "rpca", "cca", "scvi", "scanorama")
if (is.null(integration_method)) {
stop("An `integration_method` must be provided.")
} else {
integration_method <- tolower(integration_method)
if (!(integration_method %in% tolower(all_integration_methods))) {
stop(
paste("The `integration_method` must be one of: ",
paste(all_integration_methods, collapse = ", ")
)
)
}
}
return(integration_method)
}

12 changes: 9 additions & 3 deletions scripts/utils/test_integration-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ source(file.path(util_dir, "integration-helpers.R"))
source(file.path(util_dir, "integrate-harmony.R"))
source(file.path(util_dir, "integrate-fastMNN.R"))
source(file.path(util_dir, "integrate-seurat.R"))
source(file.path(util_dir, "calculate-iLISI.R"))

library(magrittr) # pipe
library(scpcaTools) # for transformation of sce -> seurat
Expand Down Expand Up @@ -56,18 +57,18 @@ perform_dim_reduction(combined_sce,

# Test harmony:
integrate_harmony(combined_sce, "batch", from_pca=FALSE)
integrated_object <- integrate_harmony(combined_sce, "batch")
integrated_sce<- integrate_harmony(combined_sce, "batch")


# Test fastMNN:
integrated_object <-integrate_fastMNN(combined_sce)
integrated_sce <-integrate_fastMNN(combined_sce)


# plot UMAP pre and post integration
pre_integration <- scater::plotReducedDim(combined_sce,
dimred = "UMAP",
colour_by = "batch")
post_integration <- scater::plotReducedDim(integrated_object,
post_integration <- scater::plotReducedDim(integrated_sce,
dimred = "harmony_UMAP",
colour_by = "batch")

Expand Down Expand Up @@ -96,3 +97,8 @@ rpca_seurat_integrated_obj <- integrate_seurat(seurat_list, reduction_method = "

# Integrate data using canonical correlation analysis
cca_seurat_integrated_obj <- integrate_seurat(seurat_list, reduction_method = "cca")


# Test score calculation
integrated_sce <- readRDS("results/human_cell_atlas/integrated_sce/1M_Immune_Cells_integrated_harmony_sce.rds")
lisi <- calculate_ilisi(integrated_sce, "batch", "harmony")