From e81deafb34f1f103329f77de62d67b547b8e5521 Mon Sep 17 00:00:00 2001 From: kollo97 Date: Thu, 21 Nov 2024 17:22:33 +0100 Subject: [PATCH] Add examples again: trying to make BiocCheck work --- .DS_Store | Bin 0 -> 6148 bytes ._.DS_Store | Bin 0 -> 4096 bytes DESCRIPTION | 5 +- NAMESPACE | 5 +- R/anglemania.R | 15 +- R/anglemanise_utils.R | 63 ++++++- R/extract_angles.R | 20 +++ R/factorise.R | 22 +++ R/integrate_by_features.R | 25 ++- R/objects.R | 160 +++++++++++++++++- README.md | 1 + _pkgdown.yml | 2 +- inst/script/simulate_scRNA_data_splatter.R | 4 +- man/anglemania.Rd | 15 +- man/anglemaniaObject-methods.Rd | 128 +++++++++++++- man/big_mat_list_mean.Rd | 24 +++ man/create_anglemaniaObject.Rd | 12 ++ man/extract_angles.Rd | 22 +++ man/extract_rows_for_unique_genes.Rd | 2 +- man/factorise.Rd | 23 +++ man/get_dstat.Rd | 8 + man/get_list_stats.Rd | 13 ++ man/integrate_by_features.Rd | 24 ++- man/integrate_seurat_list.Rd | 9 + man/select_genes.Rd | 10 ++ man/sparse_to_fbm.Rd | 6 + .../testthat/_snaps/integrate_by_features.md | 32 ++++ tests/testthat/test-integrate_by_features.R | 5 +- vignettes/anglemania_tutorial.Rmd | 24 +-- 29 files changed, 628 insertions(+), 51 deletions(-) create mode 100755 .DS_Store create mode 100755 ._.DS_Store create mode 100644 tests/testthat/_snaps/integrate_by_features.md diff --git a/.DS_Store b/.DS_Store new file mode 100755 index 0000000000000000000000000000000000000000..ec20ea9e51377b340b07b225263ab8379a4df7b8 GIT binary patch literal 6148 zcmeH~F^a=L3`M^-E&|)QOgT*skQ)pkIYBNElEAVMND%^C&e8WvqbA<1g9%?Cy^+Sk z_AfjZ0NXw;8(;*mq&u;K?+C#DIf); zfE1XK0(p!tKWFqzdK4)j1?HiEe;*3nS(B|ZJ{=4(0+0jCVO+;7K^8BNHQ72@p;=B3 zmaP_Jh}WZ?EO}i`w$9!T%i+WF&gN4L&3ZemFriruC`bV*Fi~L9^U=@$J^jD=f6}5< z3P^!BQ^1DZVYlT=<=Oh@^}K$}s;?WJjLR9Gegc^IQM{sualiP2tjX5N3Qa!(A%lVx Hc&P%veykG0 literal 0 HcmV?d00001 diff --git a/._.DS_Store b/._.DS_Store new file mode 100755 index 0000000000000000000000000000000000000000..c3bcf45d31211ff55fda426c8fcdea44453db49b GIT binary patch literal 4096 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDJkFz{^v(m+1nBL)UWIhYCu0iY;W;207T zh0iuJU0;{4?!O;*H4S~@R7!85Z5Eu=C(GVC7fzc2c4S~@R7!85Z5TJ4hFapg3 zVK9&j$;d2LC`v8PFD*(=RY=P(%2vqCD@n~O$;{77%*m-#$Vp8rQAo;3%*zILb)mY3 QG==JaxL0Ht%") -export("angl_weights<-") -export("intersect_genes<-") -export("list_stats<-") -export("matrix_list<-") export(add_unique_batch_key) export(angl_weights) export(anglemania) @@ -58,4 +54,5 @@ importFrom(dplyr,select) importFrom(magrittr,"%>%") importFrom(pbapply,pblapply) importFrom(pbapply,pboptions) +importFrom(stats,quantile) importFrom(tidyr,unite) diff --git a/R/anglemania.R b/R/anglemania.R index 3890dfa..e239d90 100644 --- a/R/anglemania.R +++ b/R/anglemania.R @@ -54,9 +54,16 @@ #' \url{https://arxiv.org/abs/1306.0256} #' #' @examples -#' \dontrun{ +#' \donttest{ +#' load(system.file( +#' "extdata", +#' "seurat_splatter_sim.RData", +#' package = "anglemania")) #' -#' # Assuming you have an anglemania_object already created +#' angl <- create_anglemaniaObject(se, +#' batch_key = batch_key, +#' min_cells_per_gene = 1 +#' ) #' #' angl <- anglemania( #' angl, @@ -67,9 +74,9 @@ #' ) #' #' # Access the selected genes -#' selected_genes <- extract_integration_genes(angl) +#' selected_genes <- get_anglemania_genes(angl) #' } -#' +#' selected_genes[1:10] #' @export anglemania <- function( anglemania_object, diff --git a/R/anglemanise_utils.R b/R/anglemanise_utils.R index 45c1384..a60ce36 100644 --- a/R/anglemanise_utils.R +++ b/R/anglemanise_utils.R @@ -12,6 +12,11 @@ #' @return An \code{\link[bigstatsr]{FBM}} object from the \pkg{bigstatsr} #' package. #' @importFrom bigstatsr FBM +#' @examples +#' s_mat <- Matrix::rsparsematrix(nrow = 10, ncol = 5, density = 0.3) +#' # Convert the sparse matrix to an FBM using your function +#' fbm_mat <- sparse_to_fbm(s_mat) +#' fbm_mat #' @export sparse_to_fbm <- function(s_mat) { n <- nrow(s_mat) @@ -41,6 +46,13 @@ sparse_to_fbm <- function(s_mat) { #' @return A list with statistical measures including \code{mean}, \code{sd}, #' \code{var}, \code{sn}, \code{min}, and \code{max}. #' @importFrom bigstatsr big_apply +#' @examples +#' s_mat <- Matrix::rsparsematrix(nrow = 10, ncol = 5, density = 0.3) +#' # Convert the sparse matrix to an FBM using your function +#' fbm_mat <- sparse_to_fbm(s_mat) +#' result <- get_dstat(fbm_mat) +#' str(result) +#' result #' @seealso \code{\link[bigstatsr]{big_apply}}, \code{\link[bigstatsr]{FBM}} #' @export get_dstat <- function(corr_matrix) { @@ -129,6 +141,29 @@ get_dstat <- function(corr_matrix) { #' In this case, the FBMs are the angle matrices computed in \code{factorise}. #' @return A new \code{\link[bigstatsr]{FBM}} object containing the mean values. #' @importFrom bigstatsr FBM +#' @examples +#' \donttest{ +#' # Create FBMs +#' mat1 <- matrix(1:9, nrow = 3) +#' mat2 <- matrix(1:3, nrow = 3) +#' +#' fbm1 <- bigstatsr::FBM(nrow = nrow(mat1), ncol = ncol(mat1), init = mat1) +#' fbm2 <- bigstatsr::FBM(nrow = nrow(mat2), ncol = ncol(mat2), init = mat2) +#' +#' # Create weights +#' weights <- c(batch1 = 0.5, batch2 = 0.5) +#' +#' # Create the list of FBMs +#' fbm_list <- list(batch1 = fbm1, batch2 = fbm2) +#' +#' # Construct the anglemaniaObject +#' anglemania_object <- new( +#' "anglemaniaObject", +#' weights = weights, +#' matrix_list = fbm_list +#' ) +#' big_mat_list_mean(anglemania_object) +#' } #' @export big_mat_list_mean <- function(anglemania_object) { if (!inherits(anglemania_object, "anglemaniaObject")) { @@ -186,6 +221,18 @@ big_mat_list_mean <- function(anglemania_object) { #' @return A list containing three matrices: \code{mean_zscore}, #' \code{sds_zscore}, and \code{sn_zscore}. #' @importFrom bigstatsr FBM big_apply +#' @examples +#' \donttest{ +#' load(system.file( +#' "extdata", +#' "seurat_splatter_sim.RData", +#' package = "anglemania") +#' ) +#' anglemania_object <- create_anglemaniaObject(se, batch_key = "Batch") +#' anglemania_object <- anglemania(anglemania_object) +#' list_stats(anglemania_object) <- get_list_stats(anglemania_object) +#' str(list_stats(anglemania_object)) +#' } #' @seealso \code{\link[bigstatsr]{big_apply}}, \code{\link[bigstatsr]{FBM}} #' @export get_list_stats <- function(anglemania_object) { @@ -278,7 +325,7 @@ get_list_stats <- function(anglemania_object) { #' \code{max_n_genes} exceeds the number of unique genes available, all unique #' genes are returned. #' @examples -#' \dontrun{ +#' \donttest{ #' gene_pairs <- data.frame( #' geneA = c("Gene1", "Gene2", "Gene3", "Gene4"), #' geneB = c("Gene3", "Gene4", "Gene5", "Gene6") @@ -323,6 +370,7 @@ extract_rows_for_unique_genes <- function(dt, max_n_genes) { #' @return The input \code{anglemaniaObject} with the #' \code{integration_genes} slot updated to include the selected genes and #' their statistical information. +#' @importFrom stats quantile #' @details #' The function performs the following steps: #' \enumerate{ @@ -339,6 +387,15 @@ extract_rows_for_unique_genes <- function(dt, max_n_genes) { #' \code{anglemaniaObject} #' with the selected genes and their statistics. #' } +#' @examples +#' \donttest{ +#' angl <- select_genes(angl, +#' zscore_mean_threshold = 2, +#' zscore_sn_threshold = 2, +#' max_n_genes = 2000) +#' anglemania_genes <- get_anglemania_genes(angl) +#' # View the selected genes and use for integration +#' } #' @seealso \code{\link{extract_rows_for_unique_genes}}, #' \code{\link{intersect_genes}}, \code{\link{list_stats}} #' @export @@ -366,12 +423,12 @@ select_genes <- function( # Adjust thresholds if no genes passed the cutoff if (nrow(gene_ind) == 0) { message("No genes passed the cutoff.") - quantile95mean <- quantile( + quantile95mean <- stats::quantile( abs(list_stats(anglemania_object)$mean_zscore), 0.95, na.rm = TRUE ) - quantile95sn <- quantile( + quantile95sn <- stats::quantile( list_stats(anglemania_object)$sn_zscore, 0.95, na.rm = TRUE diff --git a/R/extract_angles.R b/R/extract_angles.R index 3daad23..765e65b 100644 --- a/R/extract_angles.R +++ b/R/extract_angles.R @@ -28,7 +28,27 @@ #' The diagonal elements are set to \code{NA}. #' #' @importFrom bigstatsr FBM big_apply big_transpose big_cor +#' @examples +#' \donttest{ +#' mat <- matrix( +#' c( +#' 5, 3, 0, 0, +#' 0, 0, 0, 3, +#' 2, 1, 3, 4, +#' 0, 0, 1, 0, +#' 1, 2, 1, 2, +#' 3, 4, 3, 4 +#' ), +#' nrow = 6, # 6 genes +#' ncol = 4, # 4 cells +#' byrow = TRUE +#' ) #' +#' mat <- bigstatsr::FBM(nrow = nrow(mat), ncol = ncol(mat), init = mat) +#' +#' angle_mat <- extract_angles(mat) +#' angle_mat[] +#' } #' @seealso #' \code{\link[bigstatsr]{big_apply}}, #' \code{\link[bigstatsr]{big_cor}}, diff --git a/R/factorise.R b/R/factorise.R index b5d47f1..d36b54a 100644 --- a/R/factorise.R +++ b/R/factorise.R @@ -45,6 +45,28 @@ #' @importFrom bigstatsr FBM big_apply #' @importFrom checkmate assertClass assertString assertChoice #' +#' @examples +#' \donttest{ +#' mat <- matrix( +#' c( +#' 5, 3, 0, 0, +#' 0, 0, 0, 3, +#' 2, 1, 3, 4, +#' 0, 0, 1, 0, +#' 1, 2, 1, 2, +#' 3, 4, 3, 4 +#' ), +#' nrow = 6, # 6 genes +#' ncol = 4, # 4 cells +#' byrow = TRUE +#' ) +#' +#' mat <- bigstatsr::FBM(nrow = nrow(mat), ncol = ncol(mat), init = mat) +#' +#' # Run factorise with method "pearson" and a fixed seed +#' result_fbm <- factorise(mat, method = "pearson", seed = 1) +#' result_fbm[] +#' } #' @seealso #' \code{\link{extract_angles}}, #' \code{\link{get_dstat}}, diff --git a/R/integrate_by_features.R b/R/integrate_by_features.R index 0bf81e6..e762403 100644 --- a/R/integrate_by_features.R +++ b/R/integrate_by_features.R @@ -10,7 +10,7 @@ #' `integrate_by_features` integrates samples or batches within a Seurat #' object using canonical correlation analysis (CCA) based on a set of #' selected features (genes). The function utilizes an `anglemaniaObject` to -#' extract integration genes and handles the integration process, including +#' extract anglemania genes and handles the integration process, including #' optional downstream processing steps such as scaling, PCA, and UMAP #' visualization. #' @@ -35,7 +35,7 @@ #' #' @param seurat_object A \code{\link[Seurat]{Seurat}} object containing #' all samples or batches to be integrated. -#' @param anglem_object An \code{\link{anglemaniaObject}} previously generated +#' @param anglemania_object An \code{\link{anglemaniaObject}} previously generated #' using \code{\link{create_anglemaniaObject}} and \code{\link{anglemania}}. #' It is important that the \code{dataset_key} and \code{batch_key} are #' correctly set in the \code{anglemaniaObject}. @@ -57,11 +57,19 @@ #' IntegrateData ScaleData RunPCA RunUMAP DefaultAssay #' @importFrom pbapply pblapply #' @importFrom checkmate assertClass assertLogical testFALSE -#' +#' @examples +#' \donttest{ +#' # Integrate samples using anglemaniaObject +#' # Automatically reads the batch key from anglemaniaObject +#' # splits the seurat object into batches and integrates them +#' # using CCA integration and anglemania genes previously extracted +#' # with anglemania() or select_genes() +#' integrated_object <- integrate_by_features(seurat_object, anglemania_object) +#' } #' @seealso #' \code{\link{create_anglemaniaObject}}, #' \code{\link{anglemania}}, -#' \code{\link{extract_integration_genes}}, +#' \code{\link{get_anglemania_genes}}, #' \code{\link{integrate_seurat_list}}, #' \code{\link[Seurat]{IntegrateData}}, #' \code{\link[Seurat]{FindIntegrationAnchors}} @@ -166,7 +174,14 @@ integrate_by_features <- function( #' ScaleData RunPCA RunUMAP DefaultAssay #' @importFrom pbapply pblapply #' @importFrom checkmate assertClass assertCharacter assertLogical -#' +#' @examples +#' \donttest{ +#' # Integrate a list of seurat object using selected +#' # features (e.g. anglemania genes or HVGs) +#' # and CCA integration method +#' seurat_list <- list(seurat_object1, seurat_object2) +#' integrated_seurat <- integrate_seurat_list(seurat_list, features) +#' } #' @seealso #' \code{\link{integrate_by_features}}, #' \code{\link[Seurat]{IntegrateData}}, diff --git a/R/objects.R b/R/objects.R index c8d0619..4125587 100644 --- a/R/objects.R +++ b/R/objects.R @@ -29,6 +29,21 @@ #' #' @name anglemaniaObject-methods #' @rdname anglemaniaObject-methods +#' @examples +#' load(system.file( +#' "extdata", +#' "seurat_splatter_sim.RData", +#' package = "anglemania" +#' )) +#' +#' se[[]]$Dataset <- rep(c("A", "B"), each = ncol(se) / 2) +#' anglemania_object <- create_anglemaniaObject( +#' se, +#' dataset_key = "Dataset", +#' batch_key = "Batch", +#' min_cells_per_gene = 1 +#' ) +#' anglemania_object #' @seealso \code{\link{create_anglemaniaObject}}, \code{\link{anglemania}} #' @exportClass anglemaniaObject setClass( @@ -73,6 +88,19 @@ setClass( #' @param object An \code{anglemaniaObject}. #' @return Prints a summary to the console. #' @importFrom checkmate testString +#' @examples +#' load(system.file( +#' "extdata", +#' "seurat_splatter_sim.RData", +#' package = "anglemania" +#' )) +#' +#' anglemania_object <- create_anglemaniaObject( +#' se, +#' batch_key = "Batch", +#' min_cells_per_gene = 1 +#' ) +#' show(anglemania_object) #' @describeIn anglemaniaObject-methods show anglemaniaObject info setMethod("show", "anglemaniaObject", function(object) { cat("anglemaniaObject\n") @@ -125,6 +153,19 @@ setMethod("show", "anglemaniaObject", function(object) { #' @param object An \code{anglemaniaObject} object. #' @return A list of \code{\link[bigstatsr]{FBM}} objects containing gene #' expression matrices. +#' @examples +#' load(system.file( +#' "extdata", +#' "seurat_splatter_sim.RData", +#' package = "anglemania" +#' )) +#' +#' anglemania_object <- create_anglemaniaObject( +#' se, +#' batch_key = "Batch", +#' min_cells_per_gene = 1 +#' ) +#' str(matrix_list(anglemania_object)) #' @describeIn anglemaniaObject-methods Access matrix list #' @export setGeneric( @@ -141,7 +182,7 @@ setMethod("matrix_list", "anglemaniaObject", function(object) object@matrix_list #' @param value A list of \code{\link[bigstatsr]{FBM}} objects. #' @return The updated \code{anglemaniaObject}. #' @describeIn anglemaniaObject-methods set matrix list in anglemaniaObject -#' @export +#' @keywords internal setGeneric( "matrix_list<-", function(object, value) standardGeneric("matrix_list<-") @@ -157,6 +198,19 @@ setReplaceMethod("matrix_list", "anglemaniaObject", function(object, value) { #' #' @param object An \code{anglemaniaObject}. #' @return A character string representing the dataset key. +#' @examples +#' load(system.file( +#' "extdata", +#' "seurat_splatter_sim.RData", +#' package = "anglemania" +#' )) +#' +#' anglemania_object <- create_anglemaniaObject( +#' se, +#' batch_key = "Batch", +#' min_cells_per_gene = 1 +#' ) +#' dataset_key(anglemania_object) #' @describeIn anglemaniaObject-methods Access dataset key of anglemaniaObject #' @export setGeneric( @@ -171,6 +225,19 @@ setMethod("dataset_key", "anglemaniaObject", function(object) object@dataset_key #' #' @param object An \code{anglemaniaObject}. #' @return A character string representing the batch key. +#' @examples +#' load(system.file( +#' "extdata", +#' "seurat_splatter_sim.RData", +#' package = "anglemania" +#' )) +#' +#' anglemania_object <- create_anglemaniaObject( +#' se, +#' batch_key = "Batch", +#' min_cells_per_gene = 1 +#' ) +#' batch_key(anglemania_object) #' @describeIn anglemaniaObject-methods Access batch key of anglemaniaObject #' @export setGeneric( @@ -186,6 +253,19 @@ setMethod("batch_key", "anglemaniaObject", function(object) object@batch_key) #' #' @param object An \code{anglemaniaObject}. #' @return A data frame containing dataset and batch information. +#' @examples +#' load(system.file( +#' "extdata", +#' "seurat_splatter_sim.RData", +#' package = "anglemania" +#' )) +#' +#' anglemania_object <- create_anglemaniaObject( +#' se, +#' batch_key = "Batch", +#' min_cells_per_gene = 1 +#' ) +#' data_info(anglemania_object) #' @describeIn anglemaniaObject-methods Access info of selected gene pairs #' @export setGeneric( @@ -200,6 +280,19 @@ setMethod("data_info", "anglemaniaObject", function(object) object@data_info) #' #' @param object An \code{anglemaniaObject}. #' @return A named numeric vector of weights. +#' @examples +#' load(system.file( +#' "extdata", +#' "seurat_splatter_sim.RData", +#' package = "anglemania" +#' )) +#' +#' anglemania_object <- create_anglemaniaObject( +#' se, +#' batch_key = "Batch", +#' min_cells_per_gene = 1 +#' ) +#' angl_weights(anglemania_object) #' @describeIn anglemaniaObject-methods Access weights #' @export setGeneric("angl_weights", function(object) standardGeneric("angl_weights")) @@ -213,7 +306,7 @@ setMethod("angl_weights", "anglemaniaObject", function(object) object@weights) #' @param value A named numeric vector of weights. #' @return The updated \code{anglemaniaObject}. #' @describeIn anglemaniaObject-methods Set weights -#' @export +#' @keywords internal setGeneric("angl_weights<-", function(object, value) standardGeneric("angl_weights<-")) setReplaceMethod("angl_weights", "anglemaniaObject", function(object, value) { if (!is.numeric(value)) stop("weights must be numeric") @@ -233,7 +326,14 @@ setReplaceMethod("angl_weights", "anglemaniaObject", function(object, value) { #' @param object An \code{anglemaniaObject}. #' @return A list containing statistical matrices such as mean z-scores and SNR #' z-scores +#' @examples +#' \donttest{ +#' # list_stats extracts the statistical measures from the anglemaniaObject +#' # after running anglemania() +#' stats <- list_stats(anglemania_object) +#' } #' @describeIn anglemaniaObject-methods Access statistics of the gene-gene matrices +#' @seealso \code{\link{anglemania}} \code{\link{get_list_stats}} #' @export setGeneric("list_stats", function(object) standardGeneric("list_stats")) setMethod("list_stats", "anglemaniaObject", function(object) object@list_stats) @@ -246,7 +346,7 @@ setMethod("list_stats", "anglemaniaObject", function(object) object@list_stats) #' @param value A list containing statistical matrices. #' @return The updated \code{anglemaniaObject}. #' @describeIn anglemaniaObject-methods Set statistics of the gene-gene matrices -#' @export +#' @keywords internal setGeneric("list_stats<-", function(object, value) { standardGeneric("list_stats<-") }) @@ -262,8 +362,23 @@ setReplaceMethod("list_stats", "anglemaniaObject", function(object, value) { #' number of cells across all batches. #' #' @param object An \code{anglemaniaObject}. -#' @return A character vector of intersected gene names. -#' @describeIn anglemaniaObject-methods Access the intersection of genes of all batches +#' @return A character vector of intersected gene +#' names from multiple Seurat objects. +#' @examples +#' load(system.file( +#' "extdata", +#' "seurat_splatter_sim.RData", +#' package = "anglemania" +#' )) +#' +#' anglemania_object <- create_anglemaniaObject( +#' se, +#' batch_key = "Batch", +#' min_cells_per_gene = 1 +#' ) +#' intersect_genes(anglemania_object) +#' @describeIn anglemaniaObject-methods +#' Access the intersection of genes of all batches #' @export setGeneric( "intersect_genes", @@ -280,8 +395,9 @@ setMethod("intersect_genes", "anglemaniaObject", function(object) { #' @param object An \code{anglemaniaObject}. #' @param value A character vector of gene names. #' @return The updated \code{anglemaniaObject} object. -#' @describeIn anglemaniaObject-methods Set the intersection of genes of all batches -#' @export +#' @describeIn anglemaniaObject-methods +#' Set the intersection of genes of all batches +#' @keywords internal setGeneric("intersect_genes<-", function(object, value) { standardGeneric("intersect_genes<-") }) @@ -296,6 +412,11 @@ setReplaceMethod("intersect_genes", "anglemaniaObject", function(object, value) #' #' @param object An \code{anglemaniaObject}. #' @return A character vector of integration gene names. +#' @examples +#' \donttest{ +#' # extract the genes identified by anglemania() +#' anglemania_genes <- get_anglemania_genes(anglemania_object) +#' } #' @describeIn anglemaniaObject-methods Access the genes extracted by anglemania #' @export setGeneric( @@ -326,6 +447,20 @@ setMethod("get_anglemania_genes", "anglemaniaObject", function(object) { #' column containing the unique batch key. #' #' @importFrom tidyr unite +#' @examples +#' load(system.file( +#' "extdata", +#' "seurat_splatter_sim.RData", +#' package = "anglemania")) +#' +#' se[[]]$Dataset <- rep(c("A", "B"), each = ncol(se)/2) +#' seurat_object <- add_unique_batch_key( +#' seurat_object = se, +#' dataset_key = "Dataset", +#' batch_key = "Batch", +#' new_unique_batch_key = "batch" +#' ) +#' head(seurat_object[[]]) #' @describeIn anglemaniaObject-methods Temporarily add a unique batch key to the dataset #' @export add_unique_batch_key <- function( @@ -427,6 +562,17 @@ add_unique_batch_key <- function( #' \code{\link{add_unique_batch_key}}, #' \code{\link{anglemania}}, #' \code{\link[bigstatsr]{FBM}} +#' @examples +#' load(system.file( +#' "extdata", +#' "seurat_splatter_sim.RData", +#' package = "anglemania")) +#' +#' angl <- create_anglemaniaObject(se, +#' batch_key = batch_key, +#' min_cells_per_gene = 1 +#' ) +#' angl #' @export create_anglemaniaObject create_anglemaniaObject <- function( seurat_object, diff --git a/README.md b/README.md index 3fb54bc..44dd346 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ The novelty, as well as the cornerstone, of the proposed approach, is to use the | - | - | | Github | [![Github](https://github.com/BIMSBbioinfo/anglemania/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/BIMSBbioinfo/anglemania/actions/workflows/R-CMD-check.yaml) | | Bioc Release | [![Bioc Release](https://bioconductor.org/shields/years-in-bioc/anglemania.svg)](https://bioconductor.org/packages/anglemania) +| Coverage | [![Codecov test coverage](https://codecov.io/gh/BIMSBbioinfo/anglemania/graph/badge.svg)](https://app.codecov.io/gh/BIMSBbioinfo/anglemania) diff --git a/_pkgdown.yml b/_pkgdown.yml index 2ac54f0..1f64d9e 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,6 +1,6 @@ url: ~ template: - bootstrap: 3 + bootstrap: 5 reference: - title: Main diff --git a/inst/script/simulate_scRNA_data_splatter.R b/inst/script/simulate_scRNA_data_splatter.R index eee1098..436c880 100644 --- a/inst/script/simulate_scRNA_data_splatter.R +++ b/inst/script/simulate_scRNA_data_splatter.R @@ -9,12 +9,14 @@ batch.facLoc <- 0.4 de.facLoc <- 0.1 nBatches <- 4 nGroups <- 3 +nGenes <- 5000 groupCells <- 300 sim <- splatSimulate( batchCells = rep(300 * nGroups, nBatches), batch.facLoc = batch.facLoc, group.prob = rep(1/nGroups, nGroups), + nGenes = nGenes, batch.facScale = 0.1, method = "groups", verbose = FALSE, @@ -28,4 +30,4 @@ sim se <- CreateSeuratObject(counts = counts(sim), meta.data = as.data.frame(colData(sim))) -save(se, file = "../extdata/seurat_splatter_sim.RData") +save(se, file = "../extdata/seurat_splatter_sim.RData", version = 2) diff --git a/man/anglemania.Rd b/man/anglemania.Rd index 72c2e9c..e9c8b6c 100644 --- a/man/anglemania.Rd +++ b/man/anglemania.Rd @@ -57,9 +57,16 @@ The computed statistics and selected genes are added to the \code{anglemania_object}, which is returned. } \examples{ -\dontrun{ +\donttest{ +load(system.file( + "extdata", + "seurat_splatter_sim.RData", + package = "anglemania")) -# Assuming you have an anglemania_object already created +angl <- create_anglemaniaObject(se, + batch_key = batch_key, + min_cells_per_gene = 1 + ) angl <- anglemania( angl, @@ -70,9 +77,9 @@ angl <- anglemania( ) # Access the selected genes -selected_genes <- extract_integration_genes(angl) +selected_genes <- get_anglemania_genes(angl) } - +selected_genes[1:10] } \seealso{ \code{\link{create_anglemaniaObject}}, diff --git a/man/anglemaniaObject-methods.Rd b/man/anglemaniaObject-methods.Rd index f83a7e8..5f7c279 100644 --- a/man/anglemaniaObject-methods.Rd +++ b/man/anglemaniaObject-methods.Rd @@ -92,7 +92,8 @@ z-scores The updated \code{anglemaniaObject}. -A character vector of intersected gene names. +A character vector of intersected gene +names from multiple Seurat objects. The updated \code{anglemaniaObject} object. @@ -206,6 +207,131 @@ cells in which a gene must be expressed to be included in the analysis.} and their statistics.} }} +\examples{ +load(system.file( + "extdata", + "seurat_splatter_sim.RData", + package = "anglemania" +)) + +se[[]]$Dataset <- rep(c("A", "B"), each = ncol(se) / 2) +anglemania_object <- create_anglemaniaObject( + se, + dataset_key = "Dataset", + batch_key = "Batch", + min_cells_per_gene = 1 +) +anglemania_object +load(system.file( + "extdata", + "seurat_splatter_sim.RData", + package = "anglemania" +)) + +anglemania_object <- create_anglemaniaObject( + se, + batch_key = "Batch", + min_cells_per_gene = 1 +) +show(anglemania_object) +load(system.file( + "extdata", + "seurat_splatter_sim.RData", + package = "anglemania" +)) + +anglemania_object <- create_anglemaniaObject( + se, + batch_key = "Batch", + min_cells_per_gene = 1 +) +str(matrix_list(anglemania_object)) +load(system.file( + "extdata", + "seurat_splatter_sim.RData", + package = "anglemania" +)) + +anglemania_object <- create_anglemaniaObject( + se, + batch_key = "Batch", + min_cells_per_gene = 1 +) +dataset_key(anglemania_object) +load(system.file( + "extdata", + "seurat_splatter_sim.RData", + package = "anglemania" +)) + +anglemania_object <- create_anglemaniaObject( + se, + batch_key = "Batch", + min_cells_per_gene = 1 +) +batch_key(anglemania_object) +load(system.file( + "extdata", + "seurat_splatter_sim.RData", + package = "anglemania" +)) + +anglemania_object <- create_anglemaniaObject( + se, + batch_key = "Batch", + min_cells_per_gene = 1 +) +data_info(anglemania_object) +load(system.file( + "extdata", + "seurat_splatter_sim.RData", + package = "anglemania" +)) + +anglemania_object <- create_anglemaniaObject( + se, + batch_key = "Batch", + min_cells_per_gene = 1 +) +angl_weights(anglemania_object) +\donttest{ +# list_stats extracts the statistical measures from the anglemaniaObject +# after running anglemania() +stats <- list_stats(anglemania_object) +} +load(system.file( + "extdata", + "seurat_splatter_sim.RData", + package = "anglemania" +)) + +anglemania_object <- create_anglemaniaObject( + se, + batch_key = "Batch", + min_cells_per_gene = 1 +) +intersect_genes(anglemania_object) +\donttest{ +# extract the genes identified by anglemania() +anglemania_genes <- get_anglemania_genes(anglemania_object) +} +load(system.file( + "extdata", + "seurat_splatter_sim.RData", + package = "anglemania")) + +se[[]]$Dataset <- rep(c("A", "B"), each = ncol(se)/2) +seurat_object <- add_unique_batch_key( + seurat_object = se, + dataset_key = "Dataset", + batch_key = "Batch", + new_unique_batch_key = "batch" + ) +head(seurat_object[[]]) +} \seealso{ \code{\link{create_anglemaniaObject}}, \code{\link{anglemania}} + +\code{\link{anglemania}} \code{\link{get_list_stats}} } +\keyword{internal} diff --git a/man/big_mat_list_mean.Rd b/man/big_mat_list_mean.Rd index 7d0a7a0..508cdd3 100644 --- a/man/big_mat_list_mean.Rd +++ b/man/big_mat_list_mean.Rd @@ -19,3 +19,27 @@ This function takes an \code{anglemaniaObject} containing a list of If the list is empty or the FBMs have different dimensions, it throws an error. } +\examples{ +\donttest{ +# Create FBMs +mat1 <- matrix(1:9, nrow = 3) +mat2 <- matrix(1:3, nrow = 3) + +fbm1 <- bigstatsr::FBM(nrow = nrow(mat1), ncol = ncol(mat1), init = mat1) +fbm2 <- bigstatsr::FBM(nrow = nrow(mat2), ncol = ncol(mat2), init = mat2) + +# Create weights +weights <- c(batch1 = 0.5, batch2 = 0.5) + +# Create the list of FBMs +fbm_list <- list(batch1 = fbm1, batch2 = fbm2) + +# Construct the anglemaniaObject +anglemania_object <- new( + "anglemaniaObject", + weights = weights, + matrix_list = fbm_list +) +big_mat_list_mean(anglemania_object) +} +} diff --git a/man/create_anglemaniaObject.Rd b/man/create_anglemaniaObject.Rd index dac8b52..9bbbcf9 100644 --- a/man/create_anglemaniaObject.Rd +++ b/man/create_anglemaniaObject.Rd @@ -65,6 +65,18 @@ The function performs the following steps: \item Computes weights for each batch or dataset. } } +\examples{ +load(system.file( + "extdata", + "seurat_splatter_sim.RData", + package = "anglemania")) + +angl <- create_anglemaniaObject(se, + batch_key = batch_key, + min_cells_per_gene = 1 + ) + angl +} \seealso{ \code{\link{anglemaniaObject}}, \code{\link{add_unique_batch_key}}, diff --git a/man/extract_angles.Rd b/man/extract_angles.Rd index f181c6e..20bb795 100644 --- a/man/extract_angles.Rd +++ b/man/extract_angles.Rd @@ -38,6 +38,28 @@ metrics. The function returns the gene-gene angle matrix as an \code{\link[bigstatsr]{FBM}} object. } +\examples{ +\donttest{ +mat <- matrix( + c( + 5, 3, 0, 0, + 0, 0, 0, 3, + 2, 1, 3, 4, + 0, 0, 1, 0, + 1, 2, 1, 2, + 3, 4, 3, 4 + ), + nrow = 6, # 6 genes + ncol = 4, # 4 cells + byrow = TRUE +) + +mat <- bigstatsr::FBM(nrow = nrow(mat), ncol = ncol(mat), init = mat) + +angle_mat <- extract_angles(mat) +angle_mat[] +} +} \seealso{ \code{\link[bigstatsr]{big_apply}}, \code{\link[bigstatsr]{big_cor}}, diff --git a/man/extract_rows_for_unique_genes.Rd b/man/extract_rows_for_unique_genes.Rd index 3ab386d..e5dff49 100644 --- a/man/extract_rows_for_unique_genes.Rd +++ b/man/extract_rows_for_unique_genes.Rd @@ -27,7 +27,7 @@ unique gene names, and returns the first \code{max_n_genes} genes. If genes are returned. } \examples{ -\dontrun{ +\donttest{ gene_pairs <- data.frame( geneA = c("Gene1", "Gene2", "Gene3", "Gene4"), geneB = c("Gene3", "Gene4", "Gene5", "Gene6") diff --git a/man/factorise.Rd b/man/factorise.Rd index c74f75b..ea98b45 100644 --- a/man/factorise.Rd +++ b/man/factorise.Rd @@ -52,6 +52,29 @@ This process allows for the identification of invariant gene-gene relationships by comparing them to a null distribution derived from the permuted data. } +\examples{ +\donttest{ +mat <- matrix( + c( + 5, 3, 0, 0, + 0, 0, 0, 3, + 2, 1, 3, 4, + 0, 0, 1, 0, + 1, 2, 1, 2, + 3, 4, 3, 4 + ), + nrow = 6, # 6 genes + ncol = 4, # 4 cells + byrow = TRUE +) + +mat <- bigstatsr::FBM(nrow = nrow(mat), ncol = ncol(mat), init = mat) + +# Run factorise with method "pearson" and a fixed seed +result_fbm <- factorise(mat, method = "pearson", seed = 1) +result_fbm[] +} +} \seealso{ \code{\link{extract_angles}}, \code{\link{get_dstat}}, diff --git a/man/get_dstat.Rd b/man/get_dstat.Rd index 59be657..af9477b 100644 --- a/man/get_dstat.Rd +++ b/man/get_dstat.Rd @@ -17,6 +17,14 @@ A list with statistical measures including \code{mean}, \code{sd}, Computes the mean and standard deviation of the correlation matrix using the \code{big_apply} function. } +\examples{ +s_mat <- Matrix::rsparsematrix(nrow = 10, ncol = 5, density = 0.3) +# Convert the sparse matrix to an FBM using your function +fbm_mat <- sparse_to_fbm(s_mat) +result <- get_dstat(fbm_mat) +str(result) +result +} \seealso{ \code{\link[bigstatsr]{big_apply}}, \code{\link[bigstatsr]{FBM}} } diff --git a/man/get_list_stats.Rd b/man/get_list_stats.Rd index 96d1284..435aa59 100644 --- a/man/get_list_stats.Rd +++ b/man/get_list_stats.Rd @@ -17,6 +17,19 @@ A list containing three matrices: \code{mean_zscore}, Computes the mean, standard deviations, and signal-to-noise ratio (SNR) for each element across a list of FBMs in an \code{anglemaniaObject}. } +\examples{ +\donttest{ +load(system.file( +"extdata", + "seurat_splatter_sim.RData", + package = "anglemania") +) +anglemania_object <- create_anglemaniaObject(se, batch_key = "Batch") +anglemania_object <- anglemania(anglemania_object) +list_stats(anglemania_object) <- get_list_stats(anglemania_object) +str(list_stats(anglemania_object)) +} +} \seealso{ \code{\link[bigstatsr]{big_apply}}, \code{\link[bigstatsr]{FBM}} } diff --git a/man/integrate_by_features.Rd b/man/integrate_by_features.Rd index 809453a..7316652 100644 --- a/man/integrate_by_features.Rd +++ b/man/integrate_by_features.Rd @@ -17,6 +17,11 @@ integrate_by_features( \item{seurat_object}{A \code{\link[Seurat]{Seurat}} object containing all samples or batches to be integrated.} +\item{anglemania_object}{An \code{\link{anglemaniaObject}} previously generated +using \code{\link{create_anglemaniaObject}} and \code{\link{anglemania}}. +It is important that the \code{dataset_key} and \code{batch_key} are +correctly set in the \code{anglemaniaObject}.} + \item{int_order}{An optional data frame specifying the integration order of samples within the Seurat list. See the \code{sample.tree} argument in \code{\link[Seurat]{IntegrateData}} for more details. If not @@ -29,11 +34,6 @@ embeddings). Default is \code{TRUE}.} \item{verbose}{Logical value indicating whether to display progress messages during integration. Default is \code{FALSE}.} - -\item{anglem_object}{An \code{\link{anglemaniaObject}} previously generated -using \code{\link{create_anglemaniaObject}} and \code{\link{anglemania}}. -It is important that the \code{dataset_key} and \code{batch_key} are -correctly set in the \code{anglemaniaObject}.} } \value{ A \code{\link[Seurat]{Seurat}} object containing the integrated @@ -43,7 +43,7 @@ data. The default assay is set to \code{"integrated"}. \code{integrate_by_features} integrates samples or batches within a Seurat object using canonical correlation analysis (CCA) based on a set of selected features (genes). The function utilizes an \code{anglemaniaObject} to -extract integration genes and handles the integration process, including +extract anglemania genes and handles the integration process, including optional downstream processing steps such as scaling, PCA, and UMAP visualization. } @@ -66,10 +66,20 @@ compatibility with small sample sizes (e.g., metacells or SEACells). If \code{process = TRUE}, the function will also scale the data, run PCA, and compute UMAP embeddings. } +\examples{ +\donttest{ +# Integrate samples using anglemaniaObject +# Automatically reads the batch key from anglemaniaObject +# splits the seurat object into batches and integrates them +# using CCA integration and anglemania genes previously extracted +# with anglemania() or select_genes() +integrated_object <- integrate_by_features(seurat_object, anglemania_object) +} +} \seealso{ \code{\link{create_anglemaniaObject}}, \code{\link{anglemania}}, -\code{\link{extract_integration_genes}}, +\code{\link{get_anglemania_genes}}, \code{\link{integrate_seurat_list}}, \code{\link[Seurat]{IntegrateData}}, \code{\link[Seurat]{FindIntegrationAnchors}} diff --git a/man/integrate_seurat_list.Rd b/man/integrate_seurat_list.Rd index 94414b6..66049c3 100644 --- a/man/integrate_seurat_list.Rd +++ b/man/integrate_seurat_list.Rd @@ -64,6 +64,15 @@ scales the data, runs PCA, and computes UMAP embeddings. The integration is performed using Seurat's CCA-based methods, and the function is designed to handle datasets with varying sizes efficiently. } +\examples{ +\donttest{ +# Integrate a list of seurat object using selected +# features (e.g. anglemania genes or HVGs) +# and CCA integration method +seurat_list <- list(seurat_object1, seurat_object2) +integrated_seurat <- integrate_seurat_list(seurat_list, features) +} +} \seealso{ \code{\link{integrate_by_features}}, \code{\link[Seurat]{IntegrateData}}, diff --git a/man/select_genes.Rd b/man/select_genes.Rd index 1470fe1..9fe1d57 100644 --- a/man/select_genes.Rd +++ b/man/select_genes.Rd @@ -54,6 +54,16 @@ exceed the specified thresholds. with the selected genes and their statistics. } } +\examples{ +\donttest{ +angl <- select_genes(angl, + zscore_mean_threshold = 2, + zscore_sn_threshold = 2, + max_n_genes = 2000) +anglemania_genes <- get_anglemania_genes(angl) +# View the selected genes and use for integration +} +} \seealso{ \code{\link{extract_rows_for_unique_genes}}, \code{\link{intersect_genes}}, \code{\link{list_stats}} diff --git a/man/sparse_to_fbm.Rd b/man/sparse_to_fbm.Rd index 10bd454..91da8c5 100644 --- a/man/sparse_to_fbm.Rd +++ b/man/sparse_to_fbm.Rd @@ -18,3 +18,9 @@ package. Converts a sparse matrix into an \code{\link[bigstatsr]{FBM}} with efficient memory usage. } +\examples{ +s_mat <- Matrix::rsparsematrix(nrow = 10, ncol = 5, density = 0.3) +# Convert the sparse matrix to an FBM using your function +fbm_mat <- sparse_to_fbm(s_mat) +fbm_mat +} diff --git a/tests/testthat/_snaps/integrate_by_features.md b/tests/testthat/_snaps/integrate_by_features.md new file mode 100644 index 0000000..f8c2145 --- /dev/null +++ b/tests/testthat/_snaps/integrate_by_features.md @@ -0,0 +1,32 @@ +# integrate_by_features integrates Seurat objects correctly using selected features + + Code + SeuratObject::LayerData(se_integrated)[1:10, 1:10] + Output + 10 x 10 sparse Matrix of class "dgCMatrix" + Message + [[ suppressing 10 column names 'Cell1', 'Cell2', 'Cell3' ... ]] + Output + + Gene5069 3.6068415 3.449231 3.626969 3.374226 3.345079 3.3263644 3.547470 + Gene7918 1.6601722 1.574255 1.603757 2.108829 2.562399 2.0935116 1.932643 + Gene7403 2.4969626 2.463167 2.193241 2.255050 1.950832 2.5684881 2.018957 + Gene8577 1.1728437 1.186915 1.355507 1.403790 1.731002 0.7428343 1.402947 + Gene6669 2.3715965 1.712547 2.082252 1.643740 1.632191 1.5075971 2.018396 + Gene6647 2.2298116 1.342605 1.827615 2.122916 2.041227 1.5142538 1.948779 + Gene7493 2.6551437 2.720363 2.749127 3.035987 2.807025 2.8447610 2.522325 + Gene4746 0.6745218 1.506465 1.056221 1.125693 1.198738 1.6536617 1.221798 + Gene5153 2.3853269 1.864983 2.032089 2.001118 2.047578 1.5969688 1.667440 + Gene9940 2.4682466 2.336610 1.984139 2.403311 2.431990 2.0011201 1.985741 + + Gene5069 3.449989 3.8410487 3.285473 + Gene7918 2.074880 1.8073786 2.012896 + Gene7403 2.387687 2.2914329 2.500070 + Gene8577 1.761619 0.9356402 1.441561 + Gene6669 2.010614 2.1867443 1.519853 + Gene6647 2.227451 2.1253241 2.210208 + Gene7493 2.980621 2.7042113 2.908356 + Gene4746 1.331634 1.1309406 1.116862 + Gene5153 1.808125 1.7481938 1.891890 + Gene9940 2.540866 2.2968883 2.410737 + diff --git a/tests/testthat/test-integrate_by_features.R b/tests/testthat/test-integrate_by_features.R index 8beb214..5b1c81b 100644 --- a/tests/testthat/test-integrate_by_features.R +++ b/tests/testthat/test-integrate_by_features.R @@ -11,7 +11,10 @@ test_that("integrate_by_features integrates Seurat objects correctly using selec # Integrate samples using selected features options(future.globals.maxSize = 5000 * 1024^2) - se_integrated <- integrate_by_features(se, anglemania_object) + suppressWarnings({ + se_integrated <- integrate_by_features(se, anglemania_object) + }) + # Seurat gave too many unnecessary warnings.. Annoying in R-CMD-check # make snapshot of first few counts of integrated assay expect_snapshot(SeuratObject::LayerData(se_integrated)[1:10, 1:10]) diff --git a/vignettes/anglemania_tutorial.Rmd b/vignettes/anglemania_tutorial.Rmd index a9a69c6..7a2c143 100644 --- a/vignettes/anglemania_tutorial.Rmd +++ b/vignettes/anglemania_tutorial.Rmd @@ -38,7 +38,7 @@ se_unintegrated <- se_unintegrated %>% FindVariableFeatures() %>% ScaleData() %>% RunPCA() %>% - RunUMAP(dims = 1:30) + RunUMAP(dims = 1:30, verbose = FALSE) DimPlot(se_unintegrated, reduction = "umap", group.by = "Batch") DimPlot(se_unintegrated, reduction = "umap", group.by = "Group") ``` @@ -61,7 +61,7 @@ angl angl <- anglemania(angl, zscore_mean_threshold = 2.5, zscore_sn_threshold = 2.5, - max_n_genes = 3000 # optionally define a max number of genes. default is 2000 + max_n_genes = 2000 # optionally define a max number of genes. default is 2000 ) # Inspect the anglemania genes @@ -77,10 +77,12 @@ length(integration_genes) # if the Seurat FindIntegrationAnchors() function does not work, # change this to the specified size: options(future.globals.maxSize = 8000 * 1024^2) -seurat_integrated_angl <- integrate_by_features(se, - angl, - process = TRUE -) +suppressWarnings({ + seurat_integrated_angl <- integrate_by_features(se, + angl, + process = TRUE + ) +}) seurat_integrated_angl ``` @@ -101,10 +103,12 @@ hvg_features <- Seurat::SelectIntegrationFeatures(se_list, nfeatures = 2000) ## integration ```{r, integration option 2} -seurat_integrated_hvg <- integrate_seurat_list(se_list, - features = hvg_features, - process = TRUE -) +suppressWarnings({ + seurat_integrated_hvg <- integrate_seurat_list(se_list, + features = hvg_features, + process = TRUE + ) +}) seurat_integrated_hvg ```