diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index bd0730e0..54d22c28 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -51,6 +51,48 @@ the team agrees that it's needed. If you've found a bug, please file an issue th - We use [testthat](https://cran.r-project.org/package=testthat) for unit tests. Contributions with test cases included are easier to accept. +## Testing + +Important (but not all) parts of `scdrake` are covered by [testthat](https://testthat.r-lib.org/) unit tests. +During the CI workflow, lighter of them are run automatically after the Docker image is built. + +Tests are heavily controlled by environment variables. Those are parsed in `tests/testthat/setup.R` +and displayed on the beginning of testing. To ease the manipulation with envvars, +there is a wrapper script `dev/run_tests.R`. It contains a CLI that transforms command line parameters +to envvars used in tests. See `$ Rscript dev/run_tests.R` for the list of CLI parameters and +at the same time, a list of envvars for tests. + +It's also possible to run tests from within your R session, using temporary envvars: + +```r +devtools::load_all() +withr::with_envvar( + c("SCDRAKE_TEST_RUN_PIPELINE_VIGNETTES = "TRUE""), + devtools::test(filter = "vignettes") +) +``` + +### End-to-end (e2e) tests + +In case you make bigger changes, a fuller testing is very much recommended. +This is covered by several end-to-end tests in `tests/testthat/test-run_pipeline.R` and +`tests/testthat/test-run_pipeline_vignettes.R`. +These tests are computationally demanding and cannot be run in the CI. + +The second thing is that even though e2e tests can succeed, there is no actual validation +of their outputs. Thus, it's needed to manually inspect at least the produced reports. +For this purpose, a persistent output directory can be set in `dev/run_tests.R`. + +You can use this short snippet to run e2e tests with preserved outputs: + +```bash +docker exec -it -u rstudio -w /home/rstudio/scdrake \ + r --interactive -L /usr/local/lib/R/site-library -t dev/run_tests.R \ + --no-test-single_sample-full-sct \ + --output-dir /home/rstudio/shared/test_outputs \ + --output-dir-pipeline-tests /home/rstudio/shared/test_outputs/pipeline_outputs +``` + ## Code of Conduct Please note that the `{scdrake}` project is released with a diff --git a/NAMESPACE b/NAMESPACE index 9033a6b5..794e4a04 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -55,6 +55,7 @@ export(create_dummy_plot) export(create_img_link) export(create_integration_dirs) export(create_seu_for_heatmaps) +export(create_signature_matrix_fn) export(create_single_sample_dirs) export(dimred_plots_cell_annotation_params_df_fn) export(dimred_plots_clustering_fn) @@ -137,6 +138,7 @@ export(markers_plots_top) export(markers_table_files) export(md_header) export(merge_sce_metadata) +export(meta_heatmap_ploting) export(na_empty) export(pca_phase_plots_fn) export(plotReducedDim_mod) @@ -152,6 +154,7 @@ export(run_graph_based_clustering) export(run_integration) export(run_integration_r) export(run_kmeans_clustering) +export(run_page_man_annotation) export(run_single_sample) export(run_single_sample_r) export(save_clustree) diff --git a/R/manual_cell_annotation.R b/R/manual_cell_annotation.R new file mode 100644 index 00000000..33e6724f --- /dev/null +++ b/R/manual_cell_annotation.R @@ -0,0 +1,446 @@ +## -- Common functions used for manual annotation of spots/cells. +#' @title Create signature matrix from provided file containing names with markers. +#' @param markers_file A csv file containing list of annotation names with selected markers. +#' @export +#' @concept manual_annotation +create_signature_matrix_fn <- function(markers_file) { + markers <- read.csv(markers_file) + + long_df <- markers %>% + tidyr::pivot_longer( + cols = everything(), + names_to = "types", + values_to = "gene" + ) %>% + dplyr::filter(gene != "") + + # Create a binary indicator for the presence of genes + + signature_matrix <- as.data.frame( + long_df %>% + dplyr::mutate(Presence = 1) %>% + tidyr::pivot_wider( + names_from = types, + values_from = Presence, + values_fill = list(Presence = 0) + ) + ) + rownames(signature_matrix) <- signature_matrix$gene + signature_matrix <- signature_matrix[, -1] + return(signature_matrix) +} + + +#' @title Calculate and run PAGE annotation. +#' @param sign_matrix precalculated signature matrix +#' @param sce A `SingleCellExperiment` object +#' @param values A expresion indicating which values use, logcounts as default +#' @export +#' @concept manual_annotation +run_page_man_annotation <- function(sign_matrix, + sce, + values = "logcounts", + # clustering, + scale = NULL, + overlap = 5, + reverse_log_scale = FALSE, + selected_annotation = NULL, + output_enrichment = "zscore") { + expr_values <- assay(sce, values) + + rownames(expr_values) <- rowData(sce)$SYMBOL + available_ct <- c() + + for (i in colnames(sign_matrix)) { + gene_i <- rownames(sign_matrix)[which(sign_matrix[, i] == 1)] + overlap_i <- intersect(gene_i, rownames(expr_values)) + if (length(overlap_i) <= overlap) { + output <- paste0("Warning, ", i, " only has ", length(overlap_i), " overlapped genes. Will remove it.") + print(output) + } else { + available_ct <- c(available_ct, i) + } + } + + if (length(selected_annotation) > 0) { + available_ct <- intersect(available_ct, selected_annotation) + output <- paste0("Warning, continuing only with selected annotation. Available annotation are ", available_ct) + print(output) + } + + if (length(available_ct) == 1) { + print(available_ct) + stop("Only one cell type available. Program will stop") + } + if (length(available_ct) < 1) { + stop("No cell type available for this experiment. Program will stop") + } + + interGene <- intersect(rownames(sign_matrix), rownames(expr_values)) + filterSig <- sign_matrix[interGene, available_ct] + signames <- rownames(filterSig)[which(filterSig[, 1] == 1)] + + # calculate mean gene expression + if (reverse_log_scale == TRUE) { + mean_gene_expr <- log(rowMeans(logbase^expr_values - 1, dims = 1) + 1) + } else { + mean_gene_expr <- Matrix::rowMeans(expr_values) + } + geneFold <- expr_values - mean_gene_expr + + cellColMean <- apply(geneFold, 2, mean) + cellColSd <- apply(geneFold, 2, stats::sd) + + # get enrichment scores + enrichment <- matrix( + data = NA, + nrow = dim(filterSig)[2], + ncol = length(cellColMean) + ) + for (i in (1:dim(filterSig)[2])) { + signames <- rownames(filterSig)[which(filterSig[, i] == 1)] + sigColMean <- apply(geneFold[signames, ], 2, mean) + m <- length(signames) + vectorX <- NULL + for (j in (1:length(cellColMean))) { + Sm <- sigColMean[j] + u <- cellColMean[j] + sigma <- cellColSd[j] + zscore <- (Sm - u) * m^(1 / 2) / sigma + vectorX <- append(vectorX, zscore) + } + enrichment[i, ] <- vectorX + } + ## + rownames(enrichment) <- colnames(filterSig) + colnames(enrichment) <- names(cellColMean) + enrichment <- t(enrichment) + + if (output_enrichment == "zscore") { + enrichment <- scale(enrichment) + } + + return(enrichment) +} + +#' @title Calculate metadata for manual cell/spot annotation for heatmap visualisation. +#' @param sce A `SingleCellExperiment` object +#' @param enrichment precalculated enrichment score for each cell/spot +#' @param clustering A vector of selected clustering used for annotation, inheritated from meta_heatmap plotting +#' @concept manual_annotation +calculate_metadata <- function(sce, enrichment, clustering) { + cell_types <- colnames(enrichment) + sce[[glue::glue("manual_annotation_{clustering}")]] <- colnames(enrichment)[apply(enrichment, 1, which.max)] + cell_metadata <- cbind(enrichment, sce[[clustering]]) + colnames(cell_metadata)[ncol(cell_metadata)] <- clustering + sce <- scdrake::sce_add_metadata(sce = sce, clustering_enrichment = cell_metadata) + + return(sce) +} + + +#' @title Manual annotation heatmap plotting +#' @param sce A `SingleCellAnnotation` object +#' @param clustering Selected clustering +#' @param spatial Logical vector, if include spot images for each anotation +#' @param make_cell_plot Logical vector, if include pseudotissue images, for spatial extension +#' @concept manual_annotation +#' @export +meta_heatmap_ploting <- + function(sce, + clus_cor_method = "pearson", + clus_cluster_method = "complete", + values_cor_method = "pearson", + values_cluster_method = "complete", + clustering, + show_value = "value", + # selection(c("value","zscores","zscores_rescaled")) + gradient_midpoint = 0, + gradient_limits = NULL, + x_text_size = 10, + x_text_angle = 45, + y_text_size = 10, + strip_text_size = 8, + low = "blue", + mid = "white", + high = "red", + spatial = FALSE, + dimred = dimred, + make_cell_plot = FALSE, + out_dir = NULL) { + cell_metadata <- metadata(sce)[["clustering_enrichment"]] + + cell_types <- + colnames(cell_metadata)[!colnames(cell_metadata) %in% clustering] + + cell_metadata_cols <- + colnames(cell_metadata)[-which(colnames(cell_metadata) %in% clustering)] + + cell_metadata <- tibble::as_tibble(cell_metadata) + + cell_metadata <- cell_metadata %>% + dplyr::mutate_at(clustering, factor) + + workdt <- cell_metadata %>% + dplyr::group_by(!!!rlang::syms(clustering)) %>% + dplyr::summarise(dplyr::across(all_of(cell_metadata_cols), mean, na.rm = TRUE)) + + page_enrichment <- workdt %>% + tidyr::pivot_longer( + cols = all_of(cell_metadata_cols), + names_to = "variable", + values_to = "value" + ) + + + ## plotMetaDataCellsHeatmap + metaDT <- page_enrichment + + # Step 1: Calculate Z-Scores + metaDT <- metaDT %>% + dplyr::group_by(variable) %>% + dplyr::mutate(zscores = c(scale(value))) + + # Step 2: Rescale Z-Scores to Range [-1, 1] + metaDT <- metaDT %>% + dplyr::group_by(variable) %>% + dplyr::mutate(zscores_rescaled_per_gene = c(scales::rescale(zscores, to = c(-1, 1)))) + + # Calculate means + main_factor <- clustering + + testmain <- metaDT %>% + dplyr::group_by(variable, !!!rlang::syms(clustering)) %>% + dplyr::summarise(mean_value = mean(!!!rlang::syms(show_value))) + + # Step 2: Define the dfunction + dfunction <- function(d, col_name1, col_name2, value.var) { + d %>% + tidyr::pivot_wider(names_from = {{ col_name2 }}, values_from = {{ value.var }}) + } + + # Step 3: Apply dfunction to testmain + testmain_matrix <- dfunction(d = testmain, col_name1 = variable, col_name2 = clustering, value.var = mean_value) + + testmain_mat <- as.matrix(testmain_matrix[, -1]) + + rownames(testmain_mat) <- testmain_matrix$variable + # for clusters + # + cormatrix <- stats::cor(x = testmain_mat, method = clus_cor_method) + cordist <- stats::as.dist(1 - cormatrix, diag = T, upper = T) + corclus <- stats::hclust(d = cordist, method = clus_cluster_method) + clus_names <- rownames(cormatrix) + names(clus_names) <- 1:length(clus_names) + clus_sort_names <- clus_names[corclus$order] + + # for genes + # + values_cormatrix <- stats::cor(x = t(testmain_mat), method = values_cor_method) + values_cordist <- stats::as.dist(1 - values_cormatrix, diag = T, upper = T) + values_corclus <- stats::hclust(d = values_cordist, method = values_cluster_method) + values_names <- rownames(values_cormatrix) + names(values_names) <- 1:length(values_names) + values_sort_names <- values_names[values_corclus$order] + + # data.table variables + # factor_column = variable = NULL + ## def not necesary part + # metaDT[, factor_column := factor(get(clustering), levels = clus_sort_names)] + # metaDT[, variable := factor(get('variable'), levels = values_sort_names)] + ### new part + metaDT <- metaDT %>% + dplyr::mutate(factor_column = factor(!!!rlang::syms(clustering))) # , levels = clus_sort_names)) + + # Convert variable column to a factor with specified levels + metaDT <- metaDT %>% + dplyr::mutate(variable = as.character(variable)) # , levels = values_sort_names)) + ## + + pl <- ggplot2::ggplot() + pl <- pl + ggplot2::geom_tile( + data = metaDT, + ggplot2::aes(x = factor_column, y = variable, fill = .data[[show_value]]), + color = "black" + ) + pl <- pl + ggplot2::scale_fill_gradient2( + low = low, + mid = mid, + high = high, + midpoint = gradient_midpoint + ) + pl <- pl + ggplot2::theme_classic() + pl <- pl + ggplot2::theme( + axis.text.x = ggplot2::element_text( + size = x_text_size, + angle = x_text_angle, + hjust = 1, + vjust = 1 + ), + axis.text.y = ggplot2::element_text(size = y_text_size), + legend.title = ggplot2::element_blank() + ) + pl <- pl + ggplot2::labs(x = clustering, y = "cell types") + # return(pl) + + + # output pdf + out_pdf_file <- fs::path(out_dir, glue::glue("manual_annotation_{clustering}.pdf")) + out_png_file <- out_pdf_file + fs::path_ext(out_png_file) <- "png" + # pl <- list(pl) + pl <- tryCatch( + { + scdrake::save_pdf(list(pl), out_pdf_file, stop_on_error = TRUE) + ggplot2::ggsave( + filename = out_png_file, + plot = pl, + device = "png", + dpi = 300 + ) + pl + }, + error = function(e) { + if (stringr::str_detect(e$message, "Viewport has zero dimension")) { + cli_alert_warning( + str_space( + "Error catched: 'Viewport has zero dimension(s)'.", + "There are probably too many levels and the legend doesn't fit into the plot.", + "Removing the legend before saving the plot image." + ) + ) + pl <- pl + theme(legend.position = "none") + scdrake::save_pdf(list(pl), out_pdf_file) + ggplot2::ggsave( + filename = out_png_file, + plot = pl, + device = "png", + dpi = 150 + ) + pl + } else { + cli::cli_abort(e$message) + } + } + ) + + par <- tibble::tibble( + title = as.character(glue::glue("manual_annotation_{clustering}.pdf")), + anot_plot = list(pl), + anot_plot_out_pdf_file = out_pdf_file, + anot_plot_out_png_file = out_png_file + ) + + p <- plotReducedDim_mod( + sce = sce, + dimred = dimred, + colour_by = glue::glue("manual_annotation_{clustering}"), + text_by = glue::glue("manual_annotation_{clustering}"), + title = glue::glue("manual_annotation_{clustering}_dimred.pdf"), + use_default_ggplot_palette = TRUE, + legend_title = "Cluster" + ) + + out_pdf_file <- + fs::path(out_dir, + glue::glue("manual_annotation_{clustering}_dimred.pdf")) + out_png_file <- out_pdf_file + fs::path_ext(out_png_file) <- "png" + pl <- tryCatch({ + scdrake::save_pdf(list(p), out_pdf_file, stop_on_error = TRUE) + ggplot2::ggsave( + filename = out_png_file, + plot = p, + device = "png", + dpi = 300 + ) + pl + }, + error = function(e) { + if (stringr::str_detect(e$message, "Viewport has zero dimension")) { + cli_alert_warning( + str_space( + "Error catched: 'Viewport has zero dimension(s)'.", + "There are probably too many levels and the legend doesn't fit into the plot.", + "Removing the legend before saving the plot image." + ) + ) + p <- p + theme(legend.position = "none") + scdrake::save_pdf(list(p), out_pdf_file) + ggplot2::ggsave( + filename = out_png_file, + plot = p, + device = "png", + dpi = 150 + ) + p + } else { + cli::cli_abort(e$message) + } + }) + dimred_par <- tibble::tibble( + title = as.character(glue::glue("manual_annotation_{clustering}_dimred.pdf")), + anot_plot = list(p), + anot_plot_out_pdf_file = out_pdf_file, + anot_plot_out_png_file = out_png_file + ) + par <- rbind(par, dimred_par) + if (spatial) { + man_anot_plot <- visualized_spots(sce, + cell_color = glue::glue("manual_annotation_{clustering}"), + point_size = 5, + color_as_factor = T, + legend_symbol_size = 3, + legend_text = 16 + ) + out_pdf_file <- fs::path(out_dir, "spatmananotplot.pdf") + scdrake::save_pdf(list(man_anot_plot), + out_pdf_file, + stop_on_error = TRUE, + width = 14, + height = 14 + ) + annot_par <- tibble::tibble( + title = "spatmananotplot.pdf", + anot_plot = list(man_anot_plot), + anot_plot_out_pdf_file = out_pdf_file, + anot_plot_out_png_file = NA + ) + par <- rbind(par, annot_par) + + if (make_cell_plot) { + cell_annotation_values <- cell_types + savelist <- list() + + for (annot in cell_annotation_values) { + enrich_plot <- visualized_spots( + scdrake::sce_add_colData(sce, cell_metadata), + cell_color = annot, + point_size = 1.5 + ) + savelist[[annot]] <- enrich_plot + } + combo_plot <- cowplot::plot_grid(plotlist = savelist) + out_pdf_file <- fs::path(out_dir, "spatcellplot.pdf") + scdrake::save_pdf( + list(combo_plot), + out_pdf_file, + stop_on_error = TRUE, + width = 14, + height = 14 + ) + # print(head(par)) + + cell_par <- tibble::tibble( + title = "spatcellplot.pdf", + anot_plot = list(combo_plot), + anot_plot_out_pdf_file = out_pdf_file, + anot_plot_out_png_file = NA + ) + # print(head(cell_par)) + + par <- rbind(par, cell_par) + } + } + par + } diff --git a/R/plans_common_clustering.R b/R/plans_common_clustering.R index 78d43883..eb554ddb 100644 --- a/R/plans_common_clustering.R +++ b/R/plans_common_clustering.R @@ -293,6 +293,7 @@ get_clustering_sc3_subplan <- function(sce_target_name, cluster_sc3_enabled, clu #' @param report_dimred_names A character vector: dimreds to use for plotting clustering results. #' @param dimred_plots_out_dir,other_plots_out_dir A character scalar: path to output directory to save plots. #' @param is_integration A logical scalar: if `TRUE`, clustering results will be named with `cluster_int_*` prefix. +#' @param spatial A logical scalar: if `TRUE`, enabling pseudotissue spatial visualization for spatial transcriptomics datasets. #' @param seed An integer scalar: random seed for SC3. #' @return A combined [drake::drake_plan()] from: #' @@ -310,13 +311,13 @@ get_clustering_subplan <- function(cfg, dimred_plots_out_dir, other_plots_out_dir, is_integration, + spatial, seed = 1) { any_clustering_enabled <- any( cfg$CLUSTER_GRAPH_LOUVAIN_ENABLED, cfg$CLUSTER_GRAPH_WALKTRAP_ENABLED, cfg$CLUSTER_GRAPH_LEIDEN_ENABLED, cfg$CLUSTER_KMEANS_K_ENABLED, cfg$CLUSTER_KMEANS_KBEST_ENABLED, cfg$CLUSTER_SC3_ENABLED ) - plan_clustering_graph <- get_clustering_graph_subplan( sce_target_name = sce_clustering_target_name, dimred = dimred, @@ -367,6 +368,7 @@ get_clustering_subplan <- function(cfg, !!sym(sce_dimred_plots_target_name), dimred_names = !!report_dimred_names, cluster_df = dplyr::select(clusters_all_df, -data), + spatial = !!spatial, out_dir = !!dimred_plots_out_dir ), diff --git a/R/plans_integration.R b/R/plans_integration.R index 475d3951..28377e72 100644 --- a/R/plans_integration.R +++ b/R/plans_integration.R @@ -284,6 +284,7 @@ get_int_clustering_subplan <- function(cfg, cfg_pipeline, cfg_main) { dimred_plots_out_dir = cfg$INT_CLUSTERING_DIMRED_PLOTS_OUT_DIR, other_plots_out_dir = cfg$INT_CLUSTERING_OTHER_PLOTS_OUT_DIR, is_integration = TRUE, + spatial = FALSE, seed = cfg_pipeline$SEED ) diff --git a/R/plans_single_sample.R b/R/plans_single_sample.R index d7cf945a..89aa2968 100644 --- a/R/plans_single_sample.R +++ b/R/plans_single_sample.R @@ -21,7 +21,8 @@ get_input_qc_subplan <- function(cfg, cfg_pipeline, cfg_main) { config_input_qc = !!cfg, ## -- Read raw Cell Ranger files. - sce_raw = sce_raw_fn(!!cfg$INPUT_DATA, input_data_subset = !!cfg$INPUT_DATA_SUBSET), + sce_orig = sce_raw_fn(!!cfg$INPUT_DATA, input_data_subset = !!cfg$INPUT_DATA_SUBSET), + sce_raw = sce_add_spatial_colData(sce_orig,!!cfg$SPATIAL_LOCKS,!!cfg$SPATIAL), sce_raw_info = save_object_info(sce_raw), ## -- Calculate barcode ranks (for knee plot). @@ -104,8 +105,8 @@ get_input_qc_subplan <- function(cfg, cfg_pipeline, cfg_main) { sce_custom_filter_genes_info = save_object_info(sce_custom_filter_genes), ## -- Create a history of cell and gene filtering. - sce_history = sce_history_fn(sce_unfiltered, sce_qc_filter_genes, sce_custom_filter_genes), - sce_history_plot = sce_history_plot_fn(sce_history), + sce_history = sce_history_fn(sce_unfiltered, sce_qc_filter_genes, sce_custom_filter_genes,!!cfg$SPATIAL), + sce_history_plot = sce_history_plot_fn(sce_history,!!cfg$SPATIAL), ## -- Create plots of filters. sce_qc_filter_genes_plotlist = list( @@ -230,6 +231,7 @@ get_norm_clustering_subplan <- function(cfg, cfg_pipeline, cfg_main) { hvg_selection = !!cfg$HVG_SELECTION, hvg_rm_cc_genes = !!cfg$HVG_RM_CC_GENES, hvg_cc_genes_var_expl_threshold = !!cfg$HVG_CC_GENES_VAR_EXPL_THRESHOLD, + spatial = !!cfg$SPATIAL, BPPARAM = ignore(BiocParallel::bpparam()) ), @@ -338,6 +340,7 @@ get_norm_clustering_subplan <- function(cfg, cfg_pipeline, cfg_main) { dimred_plots_out_dir = cfg$NORM_CLUSTERING_DIMRED_PLOTS_OUT_DIR, other_plots_out_dir = cfg$NORM_CLUSTERING_OTHER_PLOTS_OUT_DIR, is_integration = FALSE, + spatial = cfg$SPATIAL, seed = cfg_pipeline$SEED ) @@ -396,6 +399,39 @@ get_norm_clustering_subplan <- function(cfg, cfg_pipeline, cfg_main) { selected_markers_plots_files = NULL ) } + if (cfg$MANUAL_ANNOTATION) { + plan_manual_annotation <- drake::drake_plan( + signature_matrix = create_signature_matrix_fn(!!cfg$ANNOTATION_MARKERS), + sce_annotation_enrichment = run_page_man_annotation( + signature_matrix, + sce = sce_final_norm_clustering, + scale = !!cfg$SCALE_ANNOTATION, + overlap = !!cfg$OVERLAP, + values = "logcounts" + ), + annotation_metadata = calculate_metadata( + sce = sce_final_norm_clustering, + enrichment = sce_annotation_enrichment, + clustering = !!cfg$ANNOTATION_CLUSTERING + ), + plot_annotation = meta_heatmap_ploting( + annotation_metadata, + clustering = !!cfg$ANNOTATION_CLUSTERING, + show_value = !!cfg$SHOW_VALUE, + out_dir = !!cfg$NORM_CLUSTERING_OTHER_PLOTS_OUT_DIR, + spatial = !!cfg$SPATIAL, + dimred = !!cfg$HEATMAP_DIMRED, + make_cell_plot = !!cfg$MAKE_CELL_PLOT + ) + ) + } else { + plan_manual_annotation <- drake::drake_plan( + signature_matrix = NULL, + annotation_enrichment = NULL, + annotation_metadata = NULL, + plot_annotation = NULL + ) + } - drake::bind_plans(plan, plan_clustering, plan_cell_annotation, plan_dimred_plots_other_vars, plan_selected_markers) + drake::bind_plans(plan, plan_clustering, plan_cell_annotation, plan_dimred_plots_other_vars, plan_selected_markers,plan_manual_annotation) } diff --git a/R/sce.R b/R/sce.R index f2087791..1fc41027 100644 --- a/R/sce.R +++ b/R/sce.R @@ -53,6 +53,56 @@ sce_add_colData <- function(sce, df, replace = TRUE) { return(sce) } +#' @title Append new columns with spatial relevance to `colData` of a `SingleCellExperiment` object. +#' @param sce A `SingleCellExperiment` object. +#' @param spatial_locs A file contating spatial coordiantes +#' @param spatial Logical vector If true, add spatial coordinates +#' @concept spatial_sce +sce_add_spatial_colData <- function(sce, spatial_locs, spatial = FALSE) { + if (spatial) { + if (!file.exists(spatial_locs)) + stop("path to spatial locations does not exist") + spatial_locs <- readr::read_csv(file = spatial_locs) + colnames(spatial_locs) <- + c("Barcode", + "in_tissue", + "array_row", + "array_col", + "pixel_row", + "pixel_col") + spatial_locs <- dplyr::filter(spatial_locs, in_tissue == 1) + #rownames(spatial_locs) <- spatial_locs[,1] + # assert_that( + # nrow(spatial_locs) == ncol(sce), + # msg = "Number of rows in {.var spatial_locs} must be same as number of columns in {.var sce}." + # ) + #library(SingleCellExperiment) + ###try if in tissue! spatial_locs <- spatial_locs[spatial_locs$in_tissue == '1',] + ## [, c(1,3,4)] > 3,4 coordinate of the spot in the array + ## [, c(1,5,6)] > 5,6 the PIXEL coordinate of the center, 5 in row, 6 in column + spatial_locs <- spatial_locs[, c(1, 3, 4)] + colnames(spatial_locs) <- c("Barcode", "Dims_x", "Dims_y") + + colData(sce) <- + merge(colData(sce), + spatial_locs, + by = "Barcode", + all.x = TRUE) + sce <- sce[, !is.na(sce$Dims_x)] + #print(sce) + sce <- + scdrake::sce_add_metadata(sce, spatial_locs = colData(sce)[, c("Barcode", "Dims_x", "Dims_y")], + replace = FALSE) + #sce <- list(spatial_locs = colData(sce)[,c("Dims_x","Dims_y")]) + #SingleCellExperiment::coldata(sce) <- cbind(SingleCellExperiment::coldata(sce),spatial_locs,) + colnames(sce) <- colData(sce)$Barcode + return(sce) + } + else { + return(sce) + } +} + #' @title Append data to `metadata()` list of a `SingleCellExperiment` object. #' @description [utils::modifyList()] is used internally, so existing named items in `metadata()` can be overwritten. #' @param sce A `SingleCellExperiment` object. diff --git a/R/single_sample_input_qc.R b/R/single_sample_input_qc.R index 1940a226..5ba48251 100644 --- a/R/single_sample_input_qc.R +++ b/R/single_sample_input_qc.R @@ -190,42 +190,70 @@ get_gene_filter <- function(sce, min_ratio_cells, min_umi) { #' @param sce_unfiltered (*input target*) A `SingleCellExperiment` object. #' @param sce_qc_filter_genes (*input target*) A `SingleCellExperiment` object. #' @param sce_custom_filter_genes (*input target*) A `SingleCellExperiment` object. +#' @param spatial A logical vector: `TRUE` for spatial transcriptomics dataset. #' @return A tibble. *Output target*: `sce_history` #' #' @concept single_sample_input_qc_fn #' @export -sce_history_fn <- function(sce_unfiltered, sce_qc_filter_genes, sce_custom_filter_genes) { - tibble::tribble( - stats::formula("~filtering_type"), stats::formula("~n_cells"), stats::formula("~n_genes"), - "no_filtering", ncol(sce_unfiltered), nrow(sce_unfiltered), - "qc", ncol(sce_qc_filter_genes), nrow(sce_qc_filter_genes), - "custom", ncol(sce_custom_filter_genes), nrow(sce_custom_filter_genes) - ) %>% - dplyr::mutate( - filtering_type = factor(.data$filtering_type, levels = c("no_filtering", "qc", "custom")), - n_cells = as.integer(.data$n_cells), - n_genes = as.integer(.data$n_genes) - ) +sce_history_fn <- function(sce_unfiltered, sce_qc_filter_genes, sce_custom_filter_genes, spatial=FALSE) { + if (!spatial) { + tibble::tribble( + stats::formula("~filtering_type"), stats::formula("~n_cells"), stats::formula("~n_genes"), + "no_filtering", ncol(sce_unfiltered), nrow(sce_unfiltered), + "qc", ncol(sce_qc_filter_genes), nrow(sce_qc_filter_genes), + "custom", ncol(sce_custom_filter_genes), nrow(sce_custom_filter_genes) + ) %>% + dplyr::mutate( + filtering_type = factor(.data$filtering_type, levels = c("no_filtering", "qc", "custom")), + n_cells = as.integer(.data$n_cells), + n_genes = as.integer(.data$n_genes) + )} + else { + tibble::tribble( + stats::formula("~filtering_type"), stats::formula("~n_spots"), stats::formula("~n_genes"), + "no_filtering", ncol(sce_unfiltered), nrow(sce_unfiltered), + "qc", ncol(sce_qc_filter_genes), nrow(sce_qc_filter_genes), + "custom", ncol(sce_custom_filter_genes), nrow(sce_custom_filter_genes) + ) %>% + dplyr::mutate( + filtering_type = factor(.data$filtering_type, levels = c("no_filtering", "qc", "custom")), + n_spots = as.integer(.data$n_spots), + n_genes = as.integer(.data$n_genes) + )} } #' @title Plot history of cell and gene filtering. #' @param sce_history (*input target*) A tibble. +#' @param spatial A logical vector: `TRUE` for spatial transcriptomics dataset. #' @return A `patchwork` object. *Output target*: `sce_history_plot` #' #' @concept single_sample_input_qc_fn #' @export -sce_history_plot_fn <- function(sce_history) { - patchwork::wrap_plots( - ggplot(sce_history) + - ggplot2::geom_col(aes(x = .data$filtering_type, y = .data$n_cells, fill = .data$filtering_type)) + - ggplot2::theme_bw() + - ggtitle("Number of cells"), - ggplot(sce_history) + - ggplot2::geom_col(aes(x = .data$filtering_type, y = .data$n_genes, fill = .data$filtering_type)) + - ggplot2::theme_bw() + - ggtitle("Number of genes"), - guides = "collect" - ) +sce_history_plot_fn <- function(sce_history, spatial=FALSE) { + if (!spatial) { + patchwork::wrap_plots( + ggplot(sce_history) + + ggplot2::geom_col(aes(x = .data$filtering_type, y = .data$n_cells, fill = .data$filtering_type)) + + ggplot2::theme_bw() + + ggtitle("Number of cells"), + ggplot(sce_history) + + ggplot2::geom_col(aes(x = .data$filtering_type, y = .data$n_genes, fill = .data$filtering_type)) + + ggplot2::theme_bw() + + ggtitle("Number of genes"), + guides = "collect" + )} + else { + patchwork::wrap_plots( + ggplot(sce_history) + + ggplot2::geom_col(aes(x = .data$filtering_type, y = .data$n_spots, fill = .data$filtering_type)) + + ggplot2::theme_bw() + + ggtitle("Number of spots"), + ggplot(sce_history) + + ggplot2::geom_col(aes(x = .data$filtering_type, y = .data$n_genes, fill = .data$filtering_type)) + + ggplot2::theme_bw() + + ggtitle("Number of genes"), + guides = "collect" + )} } #' @title Select a `SingleCellExperiment` object which will proceed to the `02_norm_clustering` stage. diff --git a/R/single_sample_norm_clustering.R b/R/single_sample_norm_clustering.R index e8d11777..f1f59924 100644 --- a/R/single_sample_norm_clustering.R +++ b/R/single_sample_norm_clustering.R @@ -74,11 +74,11 @@ sce_cc_fn <- function(sce_final_input_qc, cc_genes, data = NULL) { seu <- Seurat::as.Seurat(sce_final_input_qc, data = data) ## -- CellCycleScoring() can fail if there are no cell cycle genes. seu_cc <- tryCatch( - Seurat::CellCycleScoring( - seu, - s.features = cc_genes[cc_genes$phase == "S", "ENSEMBL"], - g2m.features = cc_genes[cc_genes$phase == "G2M", "ENSEMBL"], - set.ident = TRUE + Seurat::CellCycleScoring( + seu, + s.features = cc_genes[cc_genes$phase == "S", "ENSEMBL"], + g2m.features = cc_genes[cc_genes$phase == "G2M", "ENSEMBL"], + set.ident = TRUE ), error = function(e) { cli_alert_warning(str_space( @@ -245,6 +245,7 @@ sctransform_normalization <- function(sce, #' `hvg_cc_genes_var_expl_threshold` prior to HVG selection. #' @param hvg_cc_genes_var_expl_threshold A numeric scalar: threshold for variance explained. #' Genes exceeding this threshold will be marked as CC-related. +#' @param spatial A logical scalar: if `TRUE`, add spatially variable genes extension #' @inheritParams bsparam_param #' @inheritParams bpparam_param #' @return A modified `sce_norm` object with added HVG data in `metadata()`. @@ -266,6 +267,7 @@ sce_norm_hvg_fn <- function(sce_norm, hvg_selection = c("top", "significance", "threshold"), hvg_rm_cc_genes = FALSE, hvg_cc_genes_var_expl_threshold = 5, + spatial = FALSE, BSPARAM = BiocSingular::IrlbaParam(), BPPARAM = BiocParallel::SerialParam()) { hvg_metric <- arg_match(hvg_metric) @@ -309,7 +311,30 @@ sce_norm_hvg_fn <- function(sce_norm, hvg_metric = hvg_metric, hvg_selection = hvg_selection ) + if (spatial) { + seu_sce_norm <- create_seu_for_heatmaps(sce_norm) + coord <- seu_sce_norm@meta.data[, c("Dims_x", "Dims_y")] + colnames(coord) <- c("imagerow", "imagecol") + # sfs from cfg file or some dummy ones? dummy could work... + sfs <- Seurat::scalefactors(spot = 230.6399514627273, fiducial = 372.5722292859441, hires = 0.058580592, lowres = 0.017574178) + seu_sce_norm@images$slice1 <- new( + Class = "VisiumV1", + assay = "RNA", + key = "slice1_", + coordinates = coord, + scale.factors = sfs + ) + seu_sce_norm <- Seurat::FindSpatiallyVariableFeatures( + seu_sce_norm, + assay = "RNA", + selection.method = "moransi", + nfeatures = hvg_selection_value + ) + svg_ids <- Seurat::SVFInfo(seu_sce_norm, selection.method = "moransi", ) + svg_ids <- rownames(svg_ids[svg_ids$moransi.spatially.variable == "TRUE", ]) + hvg_ids <- unique(c(hvg_ids, svg_ids)) + } if (length(hvg_ids) <= 100) { cli_alert_warning("Found a small number of HVGs ({length(hvg_ids)}). This may cause problems in downstream tasks, e.g. PCA.") } diff --git a/R/spatial_visualization.R b/R/spatial_visualization.R new file mode 100644 index 00000000..3f93b9f5 --- /dev/null +++ b/R/spatial_visualization.R @@ -0,0 +1,736 @@ +## -- Common functions related to visualization in spatial space. +#' @title A basic function for pseudotissue visualization +#' @description Adapted function from Giotto package [Dries et al, 2021], rewrite for use in scdrake package in a SingleCellExperiment object +#' @param sce A `SingleCellExperiment` object. +#' @param cell_color,color_as_factor,cell_color_code,... Passed to ggplot2 object in plot_spat_point_layer_ggplot function +#' @return A `ggplot2` object. +#' @concept spatial_visualization +visualized_spots <- function(sce, + sdimx = "Dims_x", + sdimy = "Dims_y", + spat_enr_names = NULL, + cell_color = NULL, + color_as_factor = F, + cell_color_code = NULL, + cell_color_gradient = c("navy", "lightcyan", "red"), + gradient_midpoint = NULL, + gradient_limits = NULL, + select_cells = NULL, + point_shape = c("border", "no_border"), + point_size = 3, + point_alpha = 1, + point_border_col = "black", + point_border_stroke = 0.1, + label_size = 4, + label_fontface = "bold", + show_other_cells = T, + other_cell_color = "lightgrey", + other_point_size = 1, + other_cells_alpha = 0.1, + coord_fix_ratio = NULL, + title = NULL, + show_legend = T, + legend_text = 8, + legend_symbol_size = 1, + background_color = "white", + axis_text = 8, + axis_title = 8) { + ## point shape ## + point_shape <- + match.arg(point_shape, choices = c("border", "no_border")) + + ## get spatial cell locations + cell_locations <- metadata(sce)$spatial_locs + + ## get cell metadata + cell_metadata <- + colData(sce)[, c("Barcode", cell_color, sdimx, sdimy)] + cell_metadata <- as.data.frame(cell_metadata) + if (nrow(cell_metadata) == 0) { + cell_locations_metadata <- cell_locations + } else { + cell_locations_metadata <- cell_metadata + } + + ## create subsets if needed + + if (!is.null(select_cells)) { + cat("You have selected individual cell IDs \n") + cell_locations_metadata_other <- + cell_locations_metadata[!cell_locations_metadata$Barcode %in% select_cells, ] + cell_locations_metadata_selected <- + cell_locations_metadata[cell_locations_metadata$Barcode %in% select_cells, ] + } else if (is.null(select_cells)) { + cell_locations_metadata_selected <- cell_locations_metadata + cell_locations_metadata_other <- NULL + } + + # data.table and ggplot variables + sdimx_begin <- sdimy_begin <- sdimx_end <- sdimy_end <- x_start <- x_end <- y_start <- y_end <- NULL + + ### create 2D plot with ggplot ### + # cat('create 2D plot with ggplot \n') + + pl <- ggplot2::ggplot() + pl <- pl + ggplot2::theme_bw() + + ## plot point layer + if (point_shape == "border") { + pl <- plot_spat_point_layer_ggplot( + ggobject = pl, + sdimx = sdimx, + sdimy = sdimy, + cell_locations_metadata_selected = cell_locations_metadata_selected, + cell_locations_metadata_other = cell_locations_metadata_other, + cell_color = cell_color, + color_as_factor = color_as_factor, + cell_color_code = cell_color_code, + cell_color_gradient = cell_color_gradient, + gradient_midpoint = gradient_midpoint, + gradient_limits = gradient_limits, + select_cells = select_cells, + point_size = point_size, + point_alpha = point_alpha, + point_border_stroke = point_border_stroke, + point_border_col = point_border_col, + label_size = label_size, + label_fontface = label_fontface, + show_other_cells = show_other_cells, + other_cell_color = other_cell_color, + other_point_size = other_point_size, + show_legend = show_legend + ) + } else if (point_shape == "no_border") { + pl <- plot_spat_point_layer_ggplot( + ggobject = pl, + sdimx = sdimx, + sdimy = sdimy, + cell_locations_metadata_selected = cell_locations_metadata_selected, + cell_locations_metadata_other = cell_locations_metadata_other, + cell_color = cell_color, + color_as_factor = color_as_factor, + cell_color_code = cell_color_code, + cell_color_gradient = cell_color_gradient, + gradient_midpoint = gradient_midpoint, + gradient_limits = gradient_limits, + select_cells = select_cells, + point_size = point_size, + point_alpha = point_alpha, + label_size = label_size, + label_fontface = label_fontface, + show_other_cells = show_other_cells, + other_cell_color = other_cell_color, + other_point_size = other_point_size, + show_legend = show_legend + ) + } + + ## adjust theme settings + pl <- pl + ggplot2::theme( + plot.title = ggplot2::element_text(hjust = 0.5), + legend.title = ggplot2::element_blank(), + legend.text = ggplot2::element_text(size = legend_text), + axis.title = ggplot2::element_text(size = axis_title), + axis.text = ggplot2::element_text(size = axis_text), + panel.grid = ggplot2::element_blank(), + panel.background = ggplot2::element_rect(fill = background_color) + ) + + ## change symbol size of legend + if (color_as_factor == TRUE) { + if (point_shape == "border") { + pl <- + pl + ggplot2::guides(fill = ggplot2::guide_legend(override.aes = list(size = legend_symbol_size))) + } else if (point_shape == "no_border") { + pl <- + pl + ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = legend_symbol_size))) + } + } + + # fix coord ratio + if (!is.null(coord_fix_ratio)) { + pl <- pl + ggplot2::coord_fixed(ratio = coord_fix_ratio) + } + + # provide x, y and plot titles + if (is.null(title)) { + title <- cell_color + } + pl <- + pl + ggplot2::labs(x = "x coordinates", y = "y coordinates", title = title) +} +#' @title A function for pseudotissue visualization +#' @description Adapted function from Giotto package [Dries et al, 2021], rewrite for use in scdrake package in a SingleCellExperiment object. +#' @param ggobject An inheriated object from visualized_spots. +#' @param cell_locations_metadata_selected,cell_locations_metadata_other,cell_color,... Inheriated, passed to ggplot2 object. +#' @return A `ggplot2` object. +#' @concept spatial_visualization +plot_spat_point_layer_ggplot <- function(ggobject, + sdimx = NULL, + sdimy = NULL, + cell_locations_metadata_selected, + cell_locations_metadata_other, + cell_color = NULL, + color_as_factor = T, + cell_color_code = NULL, + cell_color_gradient = c("yellow", "white", "red"), + gradient_midpoint = NULL, + gradient_limits = NULL, + select_cells = NULL, + point_size = 2, + point_alpha = 1, + point_border_col = "lightgrey", + point_border_stroke = 0.1, + label_size = 4, + label_fontface = "bold", + show_other_cells = T, + other_cell_color = "lightgrey", + other_point_size = 1, + show_legend = TRUE) { + ## specify spatial dimensions first + if (is.null(sdimx) | is.null(sdimy)) { + warning( + "plot_method = ggplot, but spatial dimensions for sdimx and/or sdimy are not specified. \n + It will default to the 'sdimx' and 'sdimy' " + ) + sdimx <- "Dims_x" + sdimy <- "Dims_y" + } + + ## ggplot object + pl <- ggobject + + ## first plot other non-selected cells + if (!is.null(select_cells) & show_other_cells == TRUE) { + pl <- + pl + ggplot2::geom_point( + data = cell_locations_metadata_other, + ggplot2::aes(x = .data[[sdimx]], y = .data[[sdimy]]), + color = other_cell_color, + show.legend = F, + size = other_point_size, + alpha = point_alpha + ) + } + + ## order of color + # 1. if NULL then default to lightblue + # 2. if character vector + # 2.1 if length of cell_color is longer than 1 and has colors + # 2.2 if not part of metadata then suppose its color + # 2.3 part of metadata + # 2.3.1 numerical column + # 2.3.2 factor column or character to factor + + # cell color default + if (is.null(cell_color)) { + cell_color <- "lightblue" + pl <- + pl + ggplot2::geom_point( + data = cell_locations_metadata_selected, + ggplot2::aes(x = .data[[sdimx]], y = .data[[sdimy]]), + show.legend = show_legend, + shape = 21, + fill = cell_color, + size = point_size, + stroke = point_border_stroke, + color = point_border_col, + alpha = point_alpha + ) + } else if (length(cell_color) > 1) { + if (is.numeric(cell_color) | is.factor(cell_color)) { + if (nrow(cell_locations_metadata_selected) != length(cell_color)) { + stop("\n vector needs to be the same lengths as number of cells \n") + } + cell_locations_metadata_selected[["temp_color"]] <- cell_color + + pl <- + pl + ggplot2::geom_point( + data = cell_locations_metadata_selected, + ggplot2::aes( + x = .data[[sdimx]], + y = .data[[sdimy]], + fill = "temp_color" + ), + show.legend = show_legend, + shape = 21, + size = point_size, + color = point_border_col, + stroke = point_border_stroke, + alpha = point_alpha + ) + } else if (is.character(cell_color)) { + if (!all(cell_color %in% grDevices::colors())) { + stop("cell_color is not numeric, a factor or vector of colors \n") + } + pl <- + pl + ggplot2::geom_point( + data = cell_locations_metadata_selected, + ggplot2::aes(x = .data[[sdimx]], y = .data[[sdimy]]), + show.legend = show_legend, + shape = 21, + fill = cell_color, + size = point_size, + color = point_border_col, + stroke = point_border_stroke, + alpha = point_alpha + ) + } + } else if (is.character(cell_color)) { + if (!cell_color %in% colnames(cell_locations_metadata_selected)) { + if (!cell_color %in% grDevices::colors()) { + stop(cell_color, " is not a color or a column name \n") + } + pl <- + pl + ggplot2::geom_point( + data = cell_locations_metadata_selected, + ggplot2::aes(x = .data[[sdimx]], y = .data[[sdimy]]), + show.legend = show_legend, + shape = 21, + fill = cell_color, + size = point_size, + color = point_border_col, + stroke = point_border_stroke, + alpha = point_alpha + ) + } else { + class_cell_color <- + class(cell_locations_metadata_selected[[cell_color]]) + + if ((class_cell_color == "integer" | + class_cell_color == "numeric") & + color_as_factor == FALSE) { + # set upper and lower limits + if (!is.null(gradient_limits) & + is.vector(gradient_limits) & + length(gradient_limits) == 2) { + lower_lim <- gradient_limits[[1]] + upper_lim <- gradient_limits[[2]] + + numeric_data <- + cell_locations_metadata_selected[[cell_color]] + limit_numeric_data <- + ifelse( + numeric_data > upper_lim, + upper_lim, + ifelse(numeric_data < lower_lim, lower_lim, numeric_data) + ) + cell_locations_metadata_selected[[cell_color]] <- + limit_numeric_data + } + + pl <- + pl + ggplot2::geom_point( + data = cell_locations_metadata_selected, + ggplot2::aes( + x = .data[[sdimx]], + y = .data[[sdimy]], + fill = .data[[cell_color]] + ), + show.legend = show_legend, + shape = 21, + size = point_size, + color = point_border_col, + stroke = point_border_stroke, + alpha = point_alpha + ) + } else { + # convert character or numeric to factor + if (color_as_factor == TRUE) { + factor_data <- factor(cell_locations_metadata_selected[[cell_color]]) + cell_locations_metadata_selected[[cell_color]] <- + factor_data + } + + pl <- + pl + ggplot2::geom_point( + data = cell_locations_metadata_selected, + ggplot2::aes( + x = .data[[sdimx]], + y = .data[[sdimy]], + fill = .data[[cell_color]] + ), + show.legend = show_legend, + shape = 21, + size = point_size, + color = point_border_col, + stroke = point_border_stroke, + alpha = point_alpha + ) + } + + ## specificy colors to use + if (!is.null(cell_color_code)) { + pl <- pl + ggplot2::scale_fill_manual(values = cell_color_code) + } else if (color_as_factor == T) { + number_colors <- length(unique(factor_data)) + cell_color_code <- getDistinctColors(n = number_colors) + names(cell_color_code) <- unique(factor_data) + pl <- + pl + ggplot2::scale_fill_manual(values = cell_color_code) + } else if (color_as_factor == F) { + if (is.null(gradient_midpoint)) { + gradient_midpoint <- + stats::median(cell_locations_metadata_selected[[cell_color]]) + } + + pl <- + pl + ggplot2::scale_fill_gradient2( + low = cell_color_gradient[[1]], + mid = cell_color_gradient[[2]], + high = cell_color_gradient[[3]], + midpoint = gradient_midpoint + ) + } + } + } + pl <- pl + ggplot2::scale_y_reverse() + return(pl) +} +#' @title A helper function for asigning colors in pseudotissue visualization +#' @description Adapted function from Giotto package [Dries et al, 2021], rewrite for use in scdrake package for a SingleCellExperiment object +#' @param n Number of desired colors. +#' @return A character vector of distinct colors. +#' @concept spatial_visualization +getDistinctColors <- function(n) { + qual_col_pals <- + RColorBrewer::brewer.pal.info[RColorBrewer::brewer.pal.info$category == "qual", ] + col_vector <- + unique(unlist( + mapply( + RColorBrewer::brewer.pal, + qual_col_pals$maxcolors, + rownames(qual_col_pals) + ) + )) + + if (n > length(col_vector)) { + # get all possible colors + all_colors <- grDevices::colors() + all_colors_no_grey <- + grep( + x = all_colors, + pattern = "grey|gray", + value = T, + invert = T + ) + grey_colors <- + grep( + x = all_colors, + pattern = "grey", + value = T, + invert = F + ) + admitted_grey_colors <- grey_colors[seq(1, 110, 10)] + broad_colors <- c(all_colors_no_grey, admitted_grey_colors) + + # if too many colors stop + if (n > length(broad_colors)) { + warning("\n not enough unique colors in R, maximum = 444 \n") + col_vector <- sample( + x = broad_colors, + size = n, + replace = T + ) + } else { + col_vector <- sample( + x = broad_colors, + size = n, + replace = F + ) + } + } else { + xxx <- grDevices::col2rgb(col_vector) + dist_mat <- as.matrix(stats::dist(t(xxx))) + diag(dist_mat) <- 1e10 + + while (length(col_vector) > n) { + minv <- apply(dist_mat, 1, function(x) { + min(x) + }) + + idx <- which(minv == min(minv))[1] + dist_mat <- dist_mat[-idx, -idx] + col_vector <- col_vector[-idx] + } + } + return(col_vector) +} +#' @title A function for visualization selected qc matrices in pseudotissue visualization +#' @description Adapted function from Giotto package [Dries et al, 2021], rewrite for use in scdrake package for a SingleCellExperiment object. Helper function for users, not in core scdrake package. +#' @param sce A `SingleCellExperiment` object. +#' @return A list of plots. +#' @concept spatial_visualization +plot_spat_visuals <- function(sce) { + to_plot <- c( + "detected", + "sum", + "subsets_mito_percent", + "subsets_ribo_percent" + ) + plist <- list() + n <- 1 + for (j in to_plot) { + plist[[n]] <- + visualized_spots( + sce = sce, + sdimx = "Dims_x", + sdimy = "Dims_y", + cell_color = j, + point_size = 2, + point_shape = "border", + color_as_factor = F, + point_alpha = 1, + show_legend = T + ) + n <- n + 1 + } + + return(plist) +} + +#' @title A function for visualization selected genes in pseudotissue visualization +#' @description Adapted function from Giotto package [Dries et al, 2021], rewrite for use in scdrake package for a SingleCellExperiment object +#' @param sce A `SingleCellExperiment` object. +#' @return A ggplot2 object. +#' @concept spatial_visualization +spatGenePlot2Dsce <- function(sce, + sdimx = "Dims_x", + sdimy = "Dims_y", + expression_values = c("counts", "logcounts"), + genes, + cell_color_gradient = c("blue", "white", "red"), + gradient_midpoint = NULL, + gradient_limits = NULL, + edge_alpha = NULL, + # midpoint = 0, + scale_alpha_with_expression = FALSE, + point_shape = c("border", "no_border"), + point_size = 1, + point_alpha = 1, + point_border_col = "black", + point_border_stroke = 0.1, + show_legend = T, + legend_text = 8, + background_color = "white", + axis_text = 8, + axis_title = 8, + cow_n_col = 2, + cow_rel_h = 1, + cow_rel_w = 1, + cow_align = "h") { + # data.table variables + Barcode <- NULL + + # point shape + point_shape <- + match.arg(point_shape, choices = c("border", "no_border")) + + # expression values + values <- match.arg(expression_values, c("counts", "logcounts")) + expr_values <- as.matrix(assay(sce, values)) + colnames(expr_values) <- colData(sce)[["Barcode"]] + # only keep genes that are in the dataset + selected_genes <- genes + selected_genes <- + selected_genes[selected_genes %in% rownames(expr_values)] + + if (length(selected_genes) == 1) { + selected_genes <- selected_genes[1] + subset_expr_data <- + expr_values[rownames(expr_values) %in% selected_genes, ] + + # t_sub_expr_data_DT = data.table::data.table('selected_gene' = subset_expr_data, 'Barcode' = colnames(expr_values)) + t_sub_expr_data_DT <- + tibble::tibble({{ selected_genes }} := unname(subset_expr_data), "Barcode" = names(subset_expr_data)) + + # data.table::setnames(t_sub_expr_data_DT, 'selected_gene', selected_genes) + } else { + subset_expr_data <- + expr_values[rownames(expr_values) %in% selected_genes, ] + t_sub_expr_data <- t(subset_expr_data) + t_sub_expr_data_DT <- tibble::as.tibble(t_sub_expr_data) + # t_sub_expr_data_DT <- data.table::as.data.table(t_sub_expr_data) + t_sub_expr_data_DT <- t_sub_expr_data_DT %>% + dplyr::mutate(Barcode = rownames(t_sub_expr_data)) + # t_sub_expr_data_DT[, Barcode := rownames(t_sub_expr_data)] + } + + ## get spatial cell locations + cell_locations <- metadata(sce)$spatial_locs + + + ## get cell metadata + cell_metadata <- colData(sce)[, c("Barcode", sdimx, sdimy)] + cell_metadata <- as.data.frame(cell_metadata) + if (nrow(cell_metadata) == 0) { + cell_locations_metadata <- cell_locations + } else { + cell_locations_metadata <- cell_metadata + } + + cell_locations_metadata_genes <- + merge(cell_locations_metadata, t_sub_expr_data_DT, by = "Barcode") + + + ## plotting ## + savelist <- list() + + for (gene in selected_genes) { + pl <- ggplot2::ggplot() + + ggplot2::scale_y_reverse() + pl <- pl + ggplot2::theme_classic() + + + + ### plot cells ### + + ## set gradient limits if needed ## + if (!is.null(gradient_limits) & + is.vector(gradient_limits) & length(gradient_limits) == 2) { + lower_lim <- gradient_limits[[1]] + upper_lim <- gradient_limits[[2]] + numeric_data <- cell_locations_metadata_genes[[gene]] + limit_numeric_data <- + ifelse( + numeric_data > upper_lim, + upper_lim, + ifelse(numeric_data < lower_lim, lower_lim, numeric_data) + ) + cell_locations_metadata_genes[[gene]] <- limit_numeric_data + } + + ## with border ## + if (point_shape == "border") { + if (scale_alpha_with_expression == TRUE) { + pl <- + pl + ggplot2::geom_point( + data = cell_locations_metadata_genes, + aes_string( + x = sdimx, + y = sdimy, + fill = gene, + alpha = gene + ), + shape = 21, + color = point_border_col, + size = point_size, + stroke = point_border_stroke, + show.legend = show_legend + ) + } else { + pl <- + pl + ggplot2::geom_point( + data = cell_locations_metadata_genes, + aes_string( + x = sdimx, + y = sdimy, + fill = gene + ), + shape = 21, + color = point_border_col, + size = point_size, + stroke = point_border_stroke, + show.legend = show_legend, + alpha = point_alpha + ) + } + + ## scale and labs ## + if (is.null(gradient_midpoint)) { + gradient_midpoint <- stats::median( + NA^(cell_locations_metadata_genes[[gene]] == 0) * cell_locations_metadata_genes[[gene]], + na.rm = TRUE + ) + } + pl <- pl + ggplot2::scale_alpha_continuous(guide = "none") + pl <- + pl + ggplot2::scale_fill_gradient2( + low = cell_color_gradient[[1]], + mid = cell_color_gradient[[2]], + high = cell_color_gradient[[3]], + midpoint = gradient_midpoint, + guide = ggplot2::guide_colorbar(title = "") + ) + pl <- + pl + ggplot2::labs(x = "coord x", y = "coord y", title = gene) + } + + ## no border ## + if (point_shape == "no_border") { + if (scale_alpha_with_expression == TRUE) { + pl <- + pl + ggplot2::geom_point( + data = cell_locations_metadata_genes, + aes_string( + x = sdimx, + y = sdimy, + color = gene, + alpha = gene + ), + shape = 19, + size = point_size, + show.legend = show_legend + ) + } else { + pl <- + pl + ggplot2::geom_point( + data = cell_locations_metadata_genes, + aes_string( + x = sdimx, + y = sdimy, + color = gene + ), + shape = 19, + size = point_size, + show.legend = show_legend, + alpha = point_alpha + ) + } + + ## scale and labs ## + + if (is.null(gradient_midpoint)) { + gradient_midpoint <- stats::median( + NA^(cell_locations_metadata_genes[[gene]] == 0) * cell_locations_metadata_genes[[gene]], + na.rm = TRUE + ) + } + + pl <- pl + ggplot2::scale_alpha_continuous(guide = "none") + pl <- + pl + ggplot2::scale_color_gradient2( + low = cell_color_gradient[[1]], + mid = cell_color_gradient[[2]], + high = cell_color_gradient[[3]], + midpoint = gradient_midpoint, + guide = ggplot2::guide_colorbar(title = "") + ) + pl <- + pl + ggplot2::labs(x = "coord x", y = "coord y", title = gene) + } + + ## theme ## + pl <- + pl + ggplot2::theme( + plot.title = ggplot2::element_text(hjust = 0.5), + legend.title = ggplot2::element_blank(), + legend.text = ggplot2::element_text(size = legend_text), + axis.title = ggplot2::element_text(size = axis_title), + axis.text = ggplot2::element_text(size = axis_text), + panel.grid = ggplot2::element_blank(), + panel.background = ggplot2::element_rect(fill = background_color) + ) + + savelist[[gene]] <- pl + } + + # combine plots with cowplot + combo_plot <- cowplot::plot_grid( + plotlist = savelist, + ncol = cow_n_col, + rel_heights = cow_rel_h, + rel_widths = cow_rel_w, + align = cow_align + ) +} diff --git a/R/visualization.R b/R/visualization.R index 046e4240..de4bf3c9 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -311,6 +311,7 @@ save_selected_markers_plots_files <- function(selected_markers_plots, selected_m #' @param sce_dimred A `SingleCellExperiment` object with computed dimreds specified in `dimred_names`. #' @param dimred_names A character vector: dimred names to use for plotting. #' @param cluster_df A tibble. +#' @param spatial A logical vector, TRUE for enable pseudotissue visualization for spatial transcriptomics datasets #' @param out_dir A character scalar: output directory in which PDF and PNG files will be saved. #' @return A tibble. *Output target*: `dimred_plots_clustering` #' @@ -319,15 +320,18 @@ save_selected_markers_plots_files <- function(selected_markers_plots, selected_m dimred_plots_clustering_fn <- function(sce_dimred, dimred_names, cluster_df, + spatial=FALSE, out_dir = NULL) { cluster_df <- tidyr::crossing(cluster_df, dimred_name = dimred_names) - + res <- lapply_rows(cluster_df, FUN = function(par) { dimred_name <- par$dimred_name dimred_name_upper <- str_to_upper(dimred_name) + cell_data <- tibble::tibble(x = par$cell_membership) + print(cell_data) colnames(cell_data) <- par$sce_column - + p <- plotReducedDim_mod( sce_add_colData(sce_dimred, cell_data), dimred = dimred_name, @@ -338,7 +342,14 @@ dimred_plots_clustering_fn <- function(sce_dimred, use_default_ggplot_palette = TRUE, legend_title = "Cluster" ) - + if (spatial == TRUE) { + palete <- c(scales::hue_pal()(par$n_clusters)) + p_spat <- visualized_spots(sce_add_colData(sce_dimred, cell_data), + cell_color = par$sce_column, color_as_factor = F, + point_shape = "border", cell_color_code = palete, show_legend = F + ) + p <- cowplot::plot_grid(p, p_spat, ncol = 2, nrow = 1, rel_widths = c(1, 1.5)) + } if (is_null(out_dir)) { out_pdf_file <- NA_character_ out_png_file <- NA_character_ @@ -346,9 +357,9 @@ dimred_plots_clustering_fn <- function(sce_dimred, out_pdf_file <- fs::path(out_dir, glue("{par$sce_column}_{dimred_name}.pdf")) out_png_file <- out_pdf_file fs::path_ext(out_png_file) <- "png" - + p <- tryCatch({ - save_pdf(list(p), out_pdf_file, stop_on_error = TRUE) + save_pdf(list(p), out_pdf_file, stop_on_error = TRUE,width=10) ggplot2::ggsave( filename = out_png_file, plot = p, @@ -356,38 +367,38 @@ dimred_plots_clustering_fn <- function(sce_dimred, dpi = 300 ) p - }, - - error = function(e) { - if (stringr::str_detect(e$message, "Viewport has zero dimension")) { - cli_alert_warning(str_space( - "Error catched: 'Viewport has zero dimension(s)'.", - "There are probably too many levels and the legend doesn't fit into the plot.", - "Removing the legend before saving the plot image." - )) - p <- p + theme(legend.position = "none") - save_pdf(list(p), out_pdf_file) - ggplot2::ggsave( - filename = out_png_file, - plot = p, - device = "png", - dpi = 150 - ) - p - } else { - cli_abort(e$message) - } + }, + + error = function(e) { + if (stringr::str_detect(e$message, "Viewport has zero dimension")) { + cli_alert_warning(str_space( + "Error catched: 'Viewport has zero dimension(s)'.", + "There are probably too many levels and the legend doesn't fit into the plot.", + "Removing the legend before saving the plot image." + )) + p <- p + theme(legend.position = "none") + save_pdf(list(p), out_pdf_file) + ggplot2::ggsave( + filename = out_png_file, + plot = p, + device = "png", + dpi = 150 + ) + p + } else { + cli_abort(e$message) } + } ) } - + par$dimred_plot <- list(p) par$dimred_plot_out_pdf_file <- out_pdf_file par$dimred_plot_out_png_file <- out_png_file - + par }) - + res } diff --git a/README.Rmd b/README.Rmd index aa166eac..ff9c0a2d 100644 --- a/README.Rmd +++ b/README.Rmd @@ -25,7 +25,7 @@ knitr::opts_chunk$set( [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) [![Docker Image CI](https://github.com/bioinfocz/scdrake/actions/workflows/docker-ci.yml/badge.svg?branch=main)](https://github.com/bioinfocz/scdrake/actions/workflows/docker-ci.yml) -`{scdrake}` is a scalable and reproducible pipeline for secondary analysis of droplet-based single-cell RNA-seq data. +`{scdrake}` is a scalable and reproducible pipeline for secondary analysis of droplet-based single-cell RNA-seq data (scRNA-seq) and spot-based spatial transcriptomics data (SRT). `{scdrake}` is an R package built on top of the `{drake}` package, a [Make](https://www.gnu.org/software/make)-like pipeline toolkit for [R language](https://www.r-project.org). @@ -34,9 +34,13 @@ The main features of the `{scdrake}` pipeline are: - Import of scRNA-seq data: [10x Genomics Cell Ranger](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger) output, delimited table, or `SingleCellExperiment` object. -- Quality control and filtering of cells and genes, removal of empty droplets. +- Import of SRT data: + [10x Genomics Space Ranger](https://www.10xgenomics.com/support/software/space-ranger/latest/getting-started/what-is-space-ranger) + output, delimited table, or `SingleCellExperiment` object, and tissue positions file as in Space ranger. +- Quality control and filtering of cells/spots and genes, removal of empty droplets. - Higly variable genes detection, cell cycle scoring, normalization, clustering, and dimensionality reduction. -- Cell type annotation. +- Spatially variable genes detection (for SRT data) +- Cell type annotation using reference sets, cell type annotation using user-provided marker genes. - Integration of multiple datasets. - Computation of cluster markers and differentially expressed genes between clusters (denoted as "contrasts"). - Rich graphical and HTML outputs based on customizable RMarkdown documents. @@ -378,7 +382,7 @@ By contributing to this project, you agree to abide by its terms. ### Funding This work was supported by [ELIXIR CZ](https://www.elixir-czech.cz) research infrastructure project -(MEYS Grant No: LM2018131) including access to computing and storage facilities. +(MEYS Grant No: LM2018131 and LM2023055) including access to computing and storage facilities. ### Software and methods used by `{scdrake}` diff --git a/README.md b/README.md index f080793b..533c2058 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,3 @@ - # scdrake [![NEWS: @@ -18,7 +17,8 @@ experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](h CI](https://github.com/bioinfocz/scdrake/actions/workflows/docker-ci.yml/badge.svg?branch=main)](https://github.com/bioinfocz/scdrake/actions/workflows/docker-ci.yml) `{scdrake}` is a scalable and reproducible pipeline for secondary -analysis of droplet-based single-cell RNA-seq data. `{scdrake}` is an R +analysis of droplet-based single-cell RNA-seq data (scRNA-seq) and +spot-based spatial transcriptomics data (SRT). `{scdrake}` is an R package built on top of the `{drake}` package, a [Make](https://www.gnu.org/software/make)-like pipeline toolkit for [R language](https://www.r-project.org). @@ -28,11 +28,17 @@ The main features of the `{scdrake}` pipeline are: - Import of scRNA-seq data: [10x Genomics Cell Ranger](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger) output, delimited table, or `SingleCellExperiment` object. -- Quality control and filtering of cells and genes, removal of empty - droplets. +- Import of SRT data: [10x Genomics Space + Ranger](https://www.10xgenomics.com/support/software/space-ranger/latest/getting-started/what-is-space-ranger) + output, delimited table, or `SingleCellExperiment` object, and + tissue positions file as in Space ranger. +- Quality control and filtering of cells/spots and genes, removal of + empty droplets. - Higly variable genes detection, cell cycle scoring, normalization, clustering, and dimensionality reduction. -- Cell type annotation. +- Spatially variable genes detection (for SRT data) +- Cell type annotation using reference sets, cell type annotation + using user-provided marker genes. - Integration of multiple datasets. - Computation of cluster markers and differentially expressed genes between clusters (denoted as “contrasts”). @@ -107,27 +113,21 @@ you can use `singularity pull docker-daemon:` You can pull the Docker image with the latest stable `{scdrake}` version using -``` bash -docker pull jirinovo/scdrake:1.5.2 -singularity pull docker:jirinovo/scdrake:1.5.2 -``` + docker pull jirinovo/scdrake:1.5.2 + singularity pull docker:jirinovo/scdrake:1.5.2 or list available versions in [our Docker Hub repository](https://hub.docker.com/r/jirinovo/scdrake/tags). For the latest development version use -``` bash -docker pull jirinovo/scdrake:latest -singularity pull docker:jirinovo/scdrake:latest -``` + docker pull jirinovo/scdrake:latest + singularity pull docker:jirinovo/scdrake:latest **Note for Mac users with M1/M2 chipsets**: until version 1.5.0 (inclusive), `arm64` images are available. -``` bash -docker pull jirinovo/scdrake:1.5.0-bioc3.15-arm64 -``` + docker pull jirinovo/scdrake:1.5.0-bioc3.15-arm64 ### Running the container @@ -136,39 +136,33 @@ and Windows or MacOS running Docker Desktop. First make a shared directory that will be mounted to the container: -``` bash -mkdir ~/scdrake_projects -cd ~/scdrake_projects -``` + mkdir ~/scdrake_projects + cd ~/scdrake_projects And run the image that will expose RStudio Server on port 8787 on your host: -``` bash -docker run -d \ - -v $(pwd):/home/rstudio/scdrake_projects \ - -p 8787:8787 \ - -e USERID=$(id -u) \ - -e GROUPID=$(id -g) \ - -e PASSWORD=1234 \ - jirinovo/scdrake:1.5.2 -``` + docker run -d \ + -v $(pwd):/home/rstudio/scdrake_projects \ + -p 8787:8787 \ + -e USERID=$(id -u) \ + -e GROUPID=$(id -g) \ + -e PASSWORD=1234 \ + jirinovo/scdrake:1.5.2 For Singularity, also make shared directories and execute the container (“run and forget”): -``` bash -mkdir -p ~/scdrake_singularity -cd ~/scdrake_singularity -mkdir -p home/${USER} scdrake_projects -singularity exec \ - -e \ - --no-home \ - --bind "home/${USER}/:/home/${USER},scdrake_projects/:/home/${USER}/scdrake_projects" \ - --pwd "/home/${USER}/scdrake_projects" \ - path/to/scdrake_image.sif \ - scdrake -``` + mkdir -p ~/scdrake_singularity + cd ~/scdrake_singularity + mkdir -p home/${USER} scdrake_projects + singularity exec \ + -e \ + --no-home \ + --bind "home/${USER}/:/home/${USER},scdrake_projects/:/home/${USER}/scdrake_projects" \ + --pwd "/home/${USER}/scdrake_projects" \ + path/to/scdrake_image.sif \ + scdrake ## Installing `{scdrake}` manually (not recommended) @@ -184,7 +178,7 @@ Click for details - For MacOS: `$ brew install libxml2 imagemagick@6 harfbuzz fribidi libgit2 geos pandoc` -### Install R \>= 4.2 +### Install R >= 4.2 See @@ -197,34 +191,26 @@ management of local R libraries. It is intended to be used on a per-project basis, i.e. each project should use its own library of R packages. -``` r -install.packages("renv") -``` + install.packages("renv") ### Initialize a new `{renv}` library Switch to directory where you will analyze data and initialize a new `{renv}` library: -``` r -renv::consent(TRUE) -renv::init() -``` + renv::consent(TRUE) + renv::init() Now exit and run again R. You should see a message that renv library has been activated. ### Install BiocManager -``` r -renv::install("BiocManager") -``` + renv::install("BiocManager") ### Install Bioconductor 3.15 -``` r -BiocManager::install(version = "3.15") -``` + BiocManager::install(version = "3.15") ### Restore `{scdrake}` dependencies from lockfile @@ -232,19 +218,15 @@ BiocManager::install(version = "3.15") packages (and other things) into a lockfile. Such lockfile is available for `{scdrake}` and you can use it to install all dependencies by -``` r -## -- This is a lockfile for the latest stable version of scdrake. -download.file("https://raw.githubusercontent.com/bioinfocz/scdrake/1.5.2/renv.lock") -## -- You can increase the number of CPU cores to speed up the installation. -options(Ncpus = 2) -renv::restore(lockfile = "renv.lock", repos = BiocManager::repositories()) -``` + ## -- This is a lockfile for the latest stable version of scdrake. + download.file("https://raw.githubusercontent.com/bioinfocz/scdrake/1.5.2/renv.lock") + ## -- You can increase the number of CPU cores to speed up the installation. + options(Ncpus = 2) + renv::restore(lockfile = "renv.lock", repos = BiocManager::repositories()) For the lockfile for the latest development version use -``` r -download.file("https://raw.githubusercontent.com/bioinfocz/scdrake/main/renv.lock") -``` + download.file("https://raw.githubusercontent.com/bioinfocz/scdrake/main/renv.lock") ### Install the `{scdrake}` package @@ -252,14 +234,12 @@ Now we can finally install the `{scdrake}` package, but using a non-standard approach - without its dependencies (which are already installed from the lockfile). -``` r -remotes::install_github( - "bioinfocz/scdrake@1.5.2", - dependencies = FALSE, upgrade = FALSE, - keep_source = TRUE, build_vignettes = TRUE, - repos = BiocManager::repositories() -) -``` + remotes::install_github( + "bioinfocz/scdrake@1.5.2", + dependencies = FALSE, upgrade = FALSE, + keep_source = TRUE, build_vignettes = TRUE, + repos = BiocManager::repositories() + ) For the latest development version use `"bioinfocz/scdrake"`. @@ -267,9 +247,7 @@ For the latest development version use `"bioinfocz/scdrake"`. Optionally, you can install `{scdrake}`’s CLI scripts with -``` r -scdrake::install_cli() -``` + scdrake::install_cli() CLI should be now accessible as a `scdrake` command. By default, the CLI is installed into `~/.local/bin`, which is usually present in the `PATH` @@ -341,28 +319,30 @@ website of the current development version. - General information: - Pipeline overview: `vignette("pipeline_overview")` - FAQ & Howtos: `vignette("scdrake_faq")` + - Spatial extention: `vignette("scdrake_spatial")` - Command line interface (CLI): `vignette("scdrake_cli")` - Config files (internals): `vignette("scdrake_config")` - Environment variables: `vignette("scdrake_envvars")` - General configs: - - Pipeline config -\> `vignette("config_pipeline")` - - Main config -\> `vignette("config_main")` + - Pipeline config -> `vignette("config_pipeline")` + - Main config -> `vignette("config_main")` - Pipelines and stages: - Single-sample pipeline: - Stage `01_input_qc`: reading in data, filtering, quality - control -\> `vignette("stage_input_qc")` + control -> `vignette("stage_input_qc")` - Stage `02_norm_clustering`: normalization, HVG selection, - dimensionality reduction, clustering, cell type annotation - -\> `vignette("stage_norm_clustering")` + SVG selection, dimensionality reduction, clustering, + (manual) cell type annotation -> + `vignette("stage_norm_clustering")` - Integration pipeline: - - Stage `01_integration`: reading in data and integration -\> - `vignette("stage_integration")` + - Stage `01_integration`: reading in data and integration + -> `vignette("stage_integration")` - Stage `02_int_clustering`: post-integration clustering and - cell annotation -\> `vignette("stage_int_clustering")` + cell annotation -> `vignette("stage_int_clustering")` - Common stages: - - Stage `cluster_markers` -\> + - Stage `cluster_markers` -> `vignette("stage_cluster_markers")` - - Stage `contrasts` (differential expression) -\> + - Stage `contrasts` (differential expression) -> `vignette("stage_contrasts")` We encourage all users to read @@ -384,9 +364,7 @@ Below is the citation output from using `citation("scdrake")` in R. Please run this yourself to check for any updates on how to cite **scdrake**. -``` r -print(citation("scdrake"), bibtex = TRUE) -``` + print(citation("scdrake"), bibtex = TRUE) To cite package ‘scdrake’ in publications use: @@ -422,7 +400,7 @@ but if you need e.g. a general help. If you want to contribute to `{scdrake}`, read the [contribution guide](.github/CONTRIBUTING.md), please. All pull requests are welcome! -:slightly_smiling_face: +:slightly\_smiling\_face: ## Code of Conduct @@ -436,8 +414,8 @@ contributing to this project, you agree to abide by its terms. ### Funding This work was supported by [ELIXIR CZ](https://www.elixir-czech.cz) -research infrastructure project (MEYS Grant No: LM2018131) including -access to computing and storage facilities. +research infrastructure project (MEYS Grant No: LM2018131 and LM2023055) +including access to computing and storage facilities. ### Software and methods used by `{scdrake}` diff --git a/_pkgdown.yml b/_pkgdown.yml index 1148974a..019ce220 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -19,7 +19,7 @@ repo: user: https://github.com/bioinfocz navbar: structure: - left: [home, intro, scdrake_integration, pipeline_overview, faq, articles, reference, news] + left: [home, intro, scdrake_integration, pipeline_overview, spatial, faq, articles, reference, news] right: [github] components: pipeline_overview: @@ -28,6 +28,9 @@ navbar: scdrake_integration: text: Integration pipeline guide href: articles/scdrake_integration.html + spatial: + text: Spatial extention + href: articles/scdrake_spatial.html faq: text: FAQ & Howtos href: articles/scdrake_faq.html @@ -49,6 +52,7 @@ articles: - scdrake_cli - scdrake_config - scdrake_envvars + - scdrake_spatial - title: General configs navbar: General configs @@ -152,6 +156,9 @@ reference: - subtitle: Cell type annotation - contents: - has_concept("sc_cell_annotation") + - subtitle: Manual annotation + - contents: + - has_concept("manual_annotation") - subtitle: Cluster markers - contents: - has_concept("sc_cluster_markers") @@ -171,10 +178,15 @@ reference: - subtitle: SingleCellExperiment object-related functions - contents: - has_concept("sc_sce") + - subtitle: SingleCellExperiment object-related spatial functions + - contents: + - has_concept("spatial_sce") - subtitle: Visualization - contents: - has_concept("sce_visualization") - + - subtitle: Spatial visualization + - contents: + - has_concept("spatial_visualization") - title: Misc functions - subtitle: HTML - contents: diff --git a/inst/Rmd/single_sample/01_input_qc_children/empty_droplets_spat.Rmd b/inst/Rmd/single_sample/01_input_qc_children/empty_droplets_spat.Rmd new file mode 100644 index 00000000..6375f456 --- /dev/null +++ b/inst/Rmd/single_sample/01_input_qc_children/empty_droplets_spat.Rmd @@ -0,0 +1,24 @@ +## Detecting empty spots + +In droplet-based Single-cell RNA-Seq`DropletUtils::emptyDrops()` computes Monte Carlo p-values based on a Dirichlet-multinomial model of sampling molecules into droplets. Same approach can be used for spots in spot-based spatial transcriptomics data. +`emptyDrops()` assumes that libraries with total UMI counts below a certain threshold (`r cfg$EMPTY_DROPLETS_LOWER` by default) +correspond to empty droplets/spots. +These are used to estimate the ambient expression profile against which the remaining libraries are tested. +Under this definition, these low-count libraries cannot be tissue containing spots and are excluded from the hypothesis testing. + +**Number of non-empty spots: `r sce_valid_cells_info$dim[2]`** + +Non-empty spots should show up with large negative log-probabilities or very large total UMI counts (based on the knee point). + +```{r} +is_cell <- empty_droplets$FDR <= cfg$EMPTY_DROPLETS_FDR_THRESHOLD +plot(empty_droplets$Total, -empty_droplets$LogProb, col = ifelse(is_cell, "red", "black"), xlab = "Total UMI count", ylab = "-Log Probability") +``` + +Spots with empty (spots) FDR > `r cfg$EMPTY_DROPLETS_FDR_THRESHOLD` have been removed. Filtered dataset summary: + +```{r} +cat(sce_valid_cells_info$str) +``` + +`r scdrake::format_used_functions("DropletUtils::emptyDrops()")` diff --git a/inst/Rmd/single_sample/01_input_qc_spatial.Rmd b/inst/Rmd/single_sample/01_input_qc_spatial.Rmd new file mode 100644 index 00000000..7a64898d --- /dev/null +++ b/inst/Rmd/single_sample/01_input_qc_spatial.Rmd @@ -0,0 +1,315 @@ +--- +title: "01 - Data load and QC" +author: "Made by the [scdrake pipeline](https://bioinfocz.github.io/scdrake) with spatial extension" +institute: | + Laboratory of Genomics and Bioinformatics + Institute of Molecular Genetics of the ASCR + https://img.cas.cz +date: "`r glue::glue('Document generated: {format(Sys.time(), \"%Y-%m-%d %H:%M:%S %Z%z\")}')`" +output: + html_document: + toc: true + toc_depth: 4 + toc_float: true + number_sections: false + theme: "flatly" + self_contained: true + code_download: true + df_print: "paged" +params: + css_file: !expr here::here("Rmd/common/stylesheet.css") + drake_cache_dir: !expr here::here(".drake") +css: "`r params$css_file`" +--- + +```{r, message = FALSE, warning = FALSE} +suppressPackageStartupMessages(library(magrittr)) +if (rlang::is_true(getOption("knitr.in.progress"))) { + params_ <- scdrake::scdrake_list(params) +} +drake_cache_dir <- params_$drake_cache_dir + +drake::loadd( + config_main, config_input_qc, empty_droplets, sce_valid_cells_info, barcode_ranks, + qc_filter, custom_filter, sce_qc_filter_rowSums, sce_custom_filter_rowSums, + path = drake_cache_dir +) + +cfg <- config_input_qc +empty_droplets_enabled <- cfg$EMPTY_DROPLETS_ENABLED +cell_filtering_enabled <- cfg$ENABLE_CELL_FILTERING +gene_filtering_enabled <- cfg$ENABLE_GENE_FILTERING + +input_type <- cfg$INPUT_DATA$type +filtering_type <- ifelse(cfg$SAVE_DATASET_SENSITIVE_FILTERING, "dataset-sensitive", "custom") +``` + +*** + +```{r, child = here::here("Rmd/common/_header.Rmd")} +``` + +*** + +```{r, results = "asis"} +if (input_type == "cellranger") { + scdrake::md_header("Input data: 10x Genomics Space Ranger data", 1) + cat(scdrake::str_space( + "The feature-barcode matrix was imported from", + "[Space Ranger](https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/what-is-space-ranger)", + "output (the official quantification tool from 10x Genomics)." + )) +} else if (input_type == "table") { + scdrake::md_header("Input data: delimited text (table)", 1) + cat("The feature-barcode matrix was imported from a delimited file.") +} else if (input_type == "sce") { + scdrake::md_header("Input data: `SingleCellExperiment` object", 1) + cat("The object holding experimental data (feature-barcode matrix, gene annotation etc.) was imported from a Rds file.") +} +``` + +Each row of feature-barcode matrix corresponds to a gene, while each column corresponds to a spot barcode. +Summary of imported data: + +```{r} +cat(drake::readd(sce_raw_info, path = drake_cache_dir)$str) +``` + +`r scdrake::format_used_functions("DropletUtils::read10xCounts()")` + +*** + +# Empty droplets + +In droplet-based single cell RNA-seq, empty droplets often contain RNA from the ambient solution, resulting in non-zero counts after debarcoding. In spot-based spatial transcriptomics, a residual tissue can be accidentally placed on the spots, resulting in non-zero counts in such spot. +It is desired to discard such droplets/spots. + +## Barcode rank plot + +A useful diagnostic for both droplet- and spot- based data is the barcode rank plot, which shows the total UMI (log-)count for each +barcode on the y-axis and the (log-)rank on the x-axis. +This is effectively a transposed empirical cumulative density plot with log-transformed axes. +It is useful as it allows examine the distribution of total UMI counts across barcodes, focusing on those with the largest counts. + +```{r, message = FALSE, warning = FALSE, results = "hold"} +uniq <- !duplicated(barcode_ranks$rank) +plot(barcode_ranks$rank[uniq], barcode_ranks$total[uniq], log = "xy", xlab = "Rank", ylab = "Total") +o <- order(barcode_ranks$rank) +lines(barcode_ranks$rank[o], barcode_ranks$fitted[o], col = "red") + +abline(h = metadata(barcode_ranks)$knee, col = "dodgerblue", lty = 2) +abline(h = metadata(barcode_ranks)$inflection, col = "forestgreen", lty = 2) +if (empty_droplets_enabled) { + abline(h = cfg$EMPTY_DROPLETS_LOWER, col = "firebrick", lty = 2) + legend( + "bottomleft", + lty = 2, + col = c("dodgerblue", "forestgreen", "firebrick"), + legend = c("knee", "inflection", "emptyDroplets lower bound") + ) +} else { + legend( + "bottomleft", + lty = 2, + col = c("dodgerblue", "forestgreen"), + legend = c("knee", "inflection") + ) +} +``` + +The knee and inflection points on the curve mark the transition between two components of the total UMI count distribution. +This is assumed to represent the difference between empty droplets with little RNA and spots containing much more RNA. + +```{r, results = "asis"} +if (empty_droplets_enabled) { + cat( + "The emptyDroplets lower bound specifies at or below which number of the total UMI count all barcodes", + "are assumed to correspond to empty droplets." + ) +} else { + cat("Removal of empty droplets was disabled. You can enable it by setting `EMPTY_DROPLETS_ENABLED` parameter to `TRUE`.") +} +``` + +```{r, child = here::here("Rmd/single_sample/01_input_qc_children/empty_droplets_spat.Rmd"), eval = tryCatch(empty_droplets_enabled, error = function(e){})} +``` + +*** + +# Gene + Spot quality filtering + +## Pre-filtering QC + +Given sets of mitochondrial and ribosomal genes in the data, the `scater` package automatically calculates +several per-spot QC metrics: + +- Number of UMI. +- Number of detected genes (non-zero UMI count). +- Percentage of expressed mitochondrial (ribosomal) genes ($\frac {UMI_{mitochondrial}} {UMI_{sum}} * 100$). + +Then we can use two different methods to filter spots based on the metrics above: + +- **Custom filtering**: a standard approach is to filter spots with low amount of reads as well as genes that are + present in at least a certain amount of spots, using fixed thresholds. While simple, using fixed thresholds requires + knowledge of the experiment and of the experimental protocol. +- **Dataset-sensitive filtering**: an alternative approach is to use adaptive, data-driven thresholds to identify + outlying spots, based on the set of QC metrics just calculated. We identify spots that are outliers for the various + QC metrics, based on the median absolute deviation (MAD) from the median value of each QC metric across all spots. + Specifically, a value is considered an outlier if it is more than `r cfg$MAD_THRESHOLD` MADs from the median in + the "problematic" direction. + +Doublets detection and/or removal is not recomended for spot-based spatial transcriptomics data. + +Now we can plot some of the QC features. spots are colored by `discard_qc`, meaning if a spot would be discarded by +MAD thresholding on a QC metric. + +```{r, fig.height = 10, fig.width = 8} +cowplot::plot_grid(plotlist = drake::readd(sce_unfiltered_plotlist, path = drake_cache_dir), ncol = 2) +``` + +Visualisation of prefiltering (raw) QC metrics in spatial coordinates. + +```{r, fig.height = 10, fig.width = 8} +pl <- plot_spat_visuals(drake::readd(sce_unfiltered, path = drake_cache_dir)) +cowplot::plot_grid(plotlist = pl,ncol=2) +``` + +`r scdrake::format_used_functions("scuttle::perCellQCMetrics()")` + +## Filtering {.tabset} + +### Dataset-sensitive filters + +#### Spot filtering + +```{r, child = here::here("Rmd/single_sample/01_input_qc_children/cell_filtering_qc.Rmd"), eval = tryCatch(cell_filtering_enabled, error = function(e){})} +``` + +```{r, results = "asis", eval = tryCatch(!cell_filtering_enabled, error = function(e){})} +cat("Spot filtering was disabled.") +``` + +#### Gene filtering + +```{r, child = here::here("Rmd/single_sample/01_input_qc_children/gene_filtering_qc.Rmd"), eval = tryCatch(gene_filtering_enabled, error = function(e){})} +``` + +```{r, results = "asis", eval = tryCatch(!gene_filtering_enabled, error = function(e){})} +cat("Gene filtering was disabled.") +``` + +### Custom filters + +#### Spot filtering + +```{r, child = here::here("Rmd/single_sample/01_input_qc_children/cell_filtering_custom.Rmd"), eval = tryCatch(cell_filtering_enabled, error = function(e){})} +``` + +```{r, results = "asis", eval = tryCatch(!cell_filtering_enabled, error = function(e){})} +cat("Spot filtering was disabled.") +``` + +#### Gene filtering + +```{r, child = here::here("Rmd/single_sample/01_input_qc_children/gene_filtering_custom.Rmd"), eval = tryCatch(gene_filtering_enabled, error = function(e){})} +``` + +```{r, results = "asis", eval = tryCatch(!gene_filtering_enabled, error = function(e){})} +cat("Gene filtering was disabled.") +``` + +*** + +## Post-filtering QC + +**Final filtering selection: using `r filtering_type` filtering.** + +```{r} +cat(drake::readd(sce_final_input_qc_info, path = drake_cache_dir)$str) +``` + +### Spot and gene number history + +```{r} +scdrake::render_bootstrap_table(drake::readd(sce_history, path = drake_cache_dir), full_width = FALSE, position = "left") +``` + +```{r} +print(drake::readd(sce_history_plot, path = drake_cache_dir)) +``` + +### Dataset-sensitive filtering + +Plots of QC metrics after dataset-sensitive filtering. +`discard_custom` means if given spot was discarded in **custom filtering**. + +```{r, fig.height = 10, fig.width = 8} +cowplot::plot_grid(plotlist = drake::readd(sce_qc_filter_genes_plotlist, path = drake_cache_dir), ncol = 2) +``` + +Plots in spatial coordinates of QC metrics after dataset-sensitive filtering. + +```{r, fig.height = 10, fig.width = 8} + pl <- plot_spat_visuals(drake::readd(sce_qc_filter, path = drake_cache_dir)) +cowplot::plot_grid(plotlist = pl,ncol=2) +``` + + +### Filtering based on custom filters + +Plots of QC metrics after custom filtering. +`discard_qc` means if given spot was discarded in **dataset-sensitive filtering**. + +```{r, fig.height = 10, fig.width = 8} +cowplot::plot_grid(plotlist = drake::readd(sce_custom_filter_genes_plotlist, path = drake_cache_dir), ncol = 2) +``` + +Plots in spatial coordinates of QC metrics after custom filtering. + +```{r, fig.height = 10, fig.width = 8} + pl <- plot_spat_visuals(drake::readd(sce_custom_filter, path = drake_cache_dir)) +cowplot::plot_grid(plotlist = pl,ncol=2) +``` + + +*** + +# Gene annotation + +- Used annotation package: `r config_main$ANNOTATION_PKG` + (v`r sessioninfo::package_info(config_main$ANNOTATION_PKG, dependencies = FALSE)$loadedversion`) +- If a single ENSEMBL ID has multiple symbols, gene descriptions, or ENTREZ IDs, they are collapsed by comma (`,`). +- ENSEMBL ID is used as a symbol for ENSEMBL IDs with unknown symbols. +- ENSEMBL ID is appended to symbols having multiple ENSEMBL IDs (e.g. TBCE has both ENSG00000285053 and ENSG00000284770 + ENSEMBL IDs assigned -> its symbol is changed to TBCE_ENSG00000285053 and TBCE_ENSG00000284770). + +```{r} +drake::readd(gene_annotation, path = drake_cache_dir) %>% + head() %>% + scdrake::render_bootstrap_table() +``` + +# + +*** + +
+ Show input parameters +
+

Main config

+ +```{r} +print(config_main) +``` + +
+

Input QC config

+ +```{r} +print(cfg) +``` +
+
+ +```{r, child = here::here("Rmd/common/_footer.Rmd")} +``` diff --git a/inst/Rmd/single_sample/02_norm_clustering.Rmd b/inst/Rmd/single_sample/02_norm_clustering.Rmd index 2bb080a6..c1d7f507 100644 --- a/inst/Rmd/single_sample/02_norm_clustering.Rmd +++ b/inst/Rmd/single_sample/02_norm_clustering.Rmd @@ -361,6 +361,32 @@ if (!is.null(cfg$CELL_ANNOTATION_SOURCES)) { } ``` +```{r, results = "asis"} +if (isTRUE(cfg$MANUAL_ANNOTATION)) { + scdrake::md_header("Manual cell annotation", 1, extra = "{.tabset}") + scdrake::catn( + glue::glue("**Annotation was done for {cfg$ANNOTATION_CLUSTERING}**")) + cat("\n\n") + cat("For manual annotation we modified an implemented function from the Giotto package. The enrichment Z score is calculated by using method (PAGE) from Kim SY et al., BMC bioinformatics, 2005 as $$ Z = \frac{((Sm – mu)*m^\frac{1}{2})}{delta} $$. \n + For each gene in each spot/cell, mu is the fold change values versus the mean expression + and delta is the standard deviation. Sm is the mean fold change value of a specific marker gene set + and m is the size of a given marker gene set.") +} + cat("\n\n") +if (isTRUE(cfg$SPATIAL)) { +scdrake::create_a_link(href = drake::readd(plot_annotation)$anot_plot_out_pdf_file[3],text = "**Cell annotation plot**", href_rel_start = fs::path_dir(cfg$NORM_CLUSTERING_REPORT_HTML_FILE),do_cat = TRUE) + cat("\n\n") + scdrake::create_a_link(href = drake::readd(plot_annotation)$anot_plot_out_pdf_file[4],text = "**Enrichment cells plot**", href_rel_start = fs::path_dir(cfg$NORM_CLUSTERING_REPORT_HTML_FILE),do_cat = TRUE) + cat("\n\n") +} +scdrake::create_img_link(drake::readd(plot_annotation)$anot_plot_out_pdf_file[1],img_src = drake::readd(plot_annotation)$anot_plot_out_png_file[1],img_width = "450px",href_rel_start = fs::path_dir(cfg$NORM_CLUSTERING_REPORT_HTML_FILE),do_cat = TRUE) + + +scdrake::create_img_link(drake::readd(plot_annotation)$anot_plot_out_pdf_file[2],img_src = drake::readd(plot_annotation)$anot_plot_out_png_file[2],img_width = "450px",href_rel_start = fs::path_dir(cfg$NORM_CLUSTERING_REPORT_HTML_FILE),do_cat = TRUE) + +``` + +
Show input parameters
diff --git a/inst/config/single_sample/01_input_qc.default.yaml b/inst/config/single_sample/01_input_qc.default.yaml index 4cf50b74..6d8c8b7f 100644 --- a/inst/config/single_sample/01_input_qc.default.yaml +++ b/inst/config/single_sample/01_input_qc.default.yaml @@ -6,6 +6,12 @@ INPUT_DATA: target_name: "target_name" INPUT_QC_REPORT_RMD_FILE: "Rmd/single_sample/01_input_qc.Rmd" +#INPUT_QC_REPORT_RMD_FILE: "Rmd/single_sample/01_input_qc_spatial.Rmd" +############################################################################### + +### Spatial experiment ######################################################## +SPATIAL: False +SPATIAL_LOCKS: null ############################################################################### ### Subset input data (if SingleCellExperiment object is imported) ############ diff --git a/inst/config/single_sample/02_norm_clustering.default.yaml b/inst/config/single_sample/02_norm_clustering.default.yaml index 31ccdbd9..c66bb3ab 100644 --- a/inst/config/single_sample/02_norm_clustering.default.yaml +++ b/inst/config/single_sample/02_norm_clustering.default.yaml @@ -8,6 +8,10 @@ SCT_VARS_TO_REGRESS: null SCT_N_HVG: 3000 ############################################################################### +### Spatial extension +SPATIAL: False +############################################################################### + ### Highly variable genes (HVGs) selection #################################### HVG_METRIC: "gene_var" HVG_SELECTION: "top" @@ -98,6 +102,17 @@ ADDITIONAL_CELL_DATA_FILE: null CELL_GROUPINGS: null ############################################################################### +### Manual cell annotation signatures ######################################### +MANUAL_ANNOTATION: False +ANNOTATION_MARKERS: "input_data/brain_markers.csv" +SCALE_ANNOTATION: False +OVERLAP: 5 +ANNOTATION_CLUSTERING: "cluster_kmeans_k4" +SHOW_VALUE: "value" +MAKE_CELL_PLOT: True +HEATMAP_DIMRED: "umap" +############################################################################### + ### Dimensionality reduction plots ############################################ NORM_CLUSTERING_REPORT_DIMRED_NAMES: ["umap", "pca", "tsne"] NORM_CLUSTERING_REPORT_DIMRED_PLOTS_OTHER: diff --git a/man/calculate_metadata.Rd b/man/calculate_metadata.Rd new file mode 100644 index 00000000..eae9dabe --- /dev/null +++ b/man/calculate_metadata.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/manual_cell_annotation.R +\name{calculate_metadata} +\alias{calculate_metadata} +\title{Calculate metadata for manual cell/spot annotation for heatmap visualisation.} +\usage{ +calculate_metadata(sce, enrichment, clustering) +} +\arguments{ +\item{sce}{A \code{SingleCellExperiment} object} + +\item{enrichment}{precalculated enrichment score for each cell/spot} + +\item{clustering}{A vector of selected clustering used for annotation, inheritated from meta_heatmap plotting} +} +\description{ +Calculate metadata for manual cell/spot annotation for heatmap visualisation. +} +\concept{manual_annotation} diff --git a/man/create_signature_matrix_fn.Rd b/man/create_signature_matrix_fn.Rd new file mode 100644 index 00000000..7e9545bf --- /dev/null +++ b/man/create_signature_matrix_fn.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/manual_cell_annotation.R +\name{create_signature_matrix_fn} +\alias{create_signature_matrix_fn} +\title{Create signature matrix from provided file containing names with markers.} +\usage{ +create_signature_matrix_fn(markers_file) +} +\arguments{ +\item{markers_file}{A csv file containing list of annotation names with selected markers.} +} +\description{ +Create signature matrix from provided file containing names with markers. +} +\concept{manual_annotation} diff --git a/man/dimred_plots_clustering_fn.Rd b/man/dimred_plots_clustering_fn.Rd index ed94f7d8..d2fead24 100644 --- a/man/dimred_plots_clustering_fn.Rd +++ b/man/dimred_plots_clustering_fn.Rd @@ -8,6 +8,7 @@ dimred_plots_clustering_fn( sce_dimred, dimred_names, cluster_df, + spatial = FALSE, out_dir = NULL ) } @@ -18,6 +19,8 @@ dimred_plots_clustering_fn( \item{cluster_df}{A tibble.} +\item{spatial}{A logical vector, TRUE for enable pseudotissue visualization for spatial transcriptomics datasets} + \item{out_dir}{A character scalar: output directory in which PDF and PNG files will be saved.} } \value{ diff --git a/man/generate_stage_report.Rd b/man/generate_stage_report.Rd index cbe54977..200ae4ec 100644 --- a/man/generate_stage_report.Rd +++ b/man/generate_stage_report.Rd @@ -80,8 +80,10 @@ This is a list of Rmd files currently bundled with the \code{scdrake} package: #> | +-- cell_filtering_custom.Rmd #> | +-- cell_filtering_qc.Rmd #> | +-- empty_droplets.Rmd +#> | +-- empty_droplets_spat.Rmd #> | +-- gene_filtering_custom.Rmd #> | \\-- gene_filtering_qc.Rmd +#> +-- 01_input_qc_spatial.Rmd #> +-- 02_norm_clustering.Rmd #> \\-- 02_norm_clustering_simple.Rmd }\if{html}{\out{}} diff --git a/man/getDistinctColors.Rd b/man/getDistinctColors.Rd new file mode 100644 index 00000000..8992c2de --- /dev/null +++ b/man/getDistinctColors.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spatial_visualization.R +\name{getDistinctColors} +\alias{getDistinctColors} +\title{A helper function for asigning colors in pseudotissue visualization} +\usage{ +getDistinctColors(n) +} +\arguments{ +\item{n}{Number of desired colors.} +} +\value{ +A character vector of distinct colors. +} +\description{ +Adapted function from Giotto package \link{Dries et al, 2021}, rewrite for use in scdrake package for a SingleCellExperiment object +} +\concept{spatial_visualization} diff --git a/man/get_clustering_subplan.Rd b/man/get_clustering_subplan.Rd index 584d0177..65825755 100644 --- a/man/get_clustering_subplan.Rd +++ b/man/get_clustering_subplan.Rd @@ -13,6 +13,7 @@ get_clustering_subplan( dimred_plots_out_dir, other_plots_out_dir, is_integration, + spatial, seed = 1 ) } @@ -29,6 +30,8 @@ get_clustering_subplan( \item{is_integration}{A logical scalar: if \code{TRUE}, clustering results will be named with \verb{cluster_int_*} prefix.} +\item{spatial}{A logical scalar: if \code{TRUE}, enabling pseudotissue spatial visualization for spatial transcriptomics datasets.} + \item{seed}{An integer scalar: random seed for SC3.} } \value{ diff --git a/man/meta_heatmap_ploting.Rd b/man/meta_heatmap_ploting.Rd new file mode 100644 index 00000000..2de5dbce --- /dev/null +++ b/man/meta_heatmap_ploting.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/manual_cell_annotation.R +\name{meta_heatmap_ploting} +\alias{meta_heatmap_ploting} +\title{Manual annotation heatmap plotting} +\usage{ +meta_heatmap_ploting( + sce, + clus_cor_method = "pearson", + clus_cluster_method = "complete", + values_cor_method = "pearson", + values_cluster_method = "complete", + clustering, + show_value = "value", + gradient_midpoint = 0, + gradient_limits = NULL, + x_text_size = 10, + x_text_angle = 45, + y_text_size = 10, + strip_text_size = 8, + low = "blue", + mid = "white", + high = "red", + spatial = FALSE, + dimred = dimred, + make_cell_plot = FALSE, + out_dir = NULL +) +} +\arguments{ +\item{sce}{A \code{SingleCellAnnotation} object} + +\item{clustering}{Selected clustering} + +\item{spatial}{Logical vector, if include spot images for each anotation} + +\item{make_cell_plot}{Logical vector, if include pseudotissue images, for spatial extension} +} +\description{ +Manual annotation heatmap plotting +} +\concept{manual_annotation} diff --git a/man/plot_spat_point_layer_ggplot.Rd b/man/plot_spat_point_layer_ggplot.Rd new file mode 100644 index 00000000..ca48153d --- /dev/null +++ b/man/plot_spat_point_layer_ggplot.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spatial_visualization.R +\name{plot_spat_point_layer_ggplot} +\alias{plot_spat_point_layer_ggplot} +\title{A function for pseudotissue visualization} +\usage{ +plot_spat_point_layer_ggplot( + ggobject, + sdimx = NULL, + sdimy = NULL, + cell_locations_metadata_selected, + cell_locations_metadata_other, + cell_color = NULL, + color_as_factor = T, + cell_color_code = NULL, + cell_color_gradient = c("yellow", "white", "red"), + gradient_midpoint = NULL, + gradient_limits = NULL, + select_cells = NULL, + point_size = 2, + point_alpha = 1, + point_border_col = "lightgrey", + point_border_stroke = 0.1, + label_size = 4, + label_fontface = "bold", + show_other_cells = T, + other_cell_color = "lightgrey", + other_point_size = 1, + show_legend = TRUE +) +} +\arguments{ +\item{ggobject}{An inheriated object from visualized_spots.} + +\item{cell_locations_metadata_selected, cell_locations_metadata_other, cell_color, ...}{Inheriated, passed to ggplot2 object.} +} +\value{ +A \code{ggplot2} object. +} +\description{ +Adapted function from Giotto package \link{Dries et al, 2021}, rewrite for use in scdrake package in a SingleCellExperiment object. +} +\concept{spatial_visualization} diff --git a/man/plot_spat_visuals.Rd b/man/plot_spat_visuals.Rd new file mode 100644 index 00000000..ee4dd82e --- /dev/null +++ b/man/plot_spat_visuals.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spatial_visualization.R +\name{plot_spat_visuals} +\alias{plot_spat_visuals} +\title{A function for visualization selected qc matrices in pseudotissue visualization} +\usage{ +plot_spat_visuals(sce) +} +\arguments{ +\item{sce}{A \code{SingleCellExperiment} object.} +} +\value{ +A list of plots. +} +\description{ +Adapted function from Giotto package \link{Dries et al, 2021}, rewrite for use in scdrake package for a SingleCellExperiment object. Helper function for users, not in core scdrake package. +} +\concept{spatial_visualization} diff --git a/man/run_page_man_annotation.Rd b/man/run_page_man_annotation.Rd new file mode 100644 index 00000000..88971fa5 --- /dev/null +++ b/man/run_page_man_annotation.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/manual_cell_annotation.R +\name{run_page_man_annotation} +\alias{run_page_man_annotation} +\title{Calculate and run PAGE annotation.} +\usage{ +run_page_man_annotation( + sign_matrix, + sce, + values = "logcounts", + scale = NULL, + overlap = 5, + reverse_log_scale = FALSE, + selected_annotation = NULL, + output_enrichment = "zscore" +) +} +\arguments{ +\item{sign_matrix}{precalculated signature matrix} + +\item{sce}{A \code{SingleCellExperiment} object} + +\item{values}{A expresion indicating which values use, logcounts as default} +} +\description{ +Calculate and run PAGE annotation. +} +\concept{manual_annotation} diff --git a/man/sce_add_spatial_colData.Rd b/man/sce_add_spatial_colData.Rd new file mode 100644 index 00000000..35cc11fb --- /dev/null +++ b/man/sce_add_spatial_colData.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sce.R +\name{sce_add_spatial_colData} +\alias{sce_add_spatial_colData} +\title{Append new columns with spatial relevance to \code{colData} of a \code{SingleCellExperiment} object.} +\usage{ +sce_add_spatial_colData(sce, spatial_locs, spatial = FALSE) +} +\arguments{ +\item{sce}{A \code{SingleCellExperiment} object.} + +\item{spatial_locs}{A file contating spatial coordiantes} + +\item{spatial}{Logical vector If true, add spatial coordinates} +} +\description{ +Append new columns with spatial relevance to \code{colData} of a \code{SingleCellExperiment} object. +} +\concept{spatial_sce} diff --git a/man/sce_history_fn.Rd b/man/sce_history_fn.Rd index 155c5ead..3ae6b4fa 100644 --- a/man/sce_history_fn.Rd +++ b/man/sce_history_fn.Rd @@ -4,7 +4,12 @@ \alias{sce_history_fn} \title{Create a tibble with history of cell and gene filtering.} \usage{ -sce_history_fn(sce_unfiltered, sce_qc_filter_genes, sce_custom_filter_genes) +sce_history_fn( + sce_unfiltered, + sce_qc_filter_genes, + sce_custom_filter_genes, + spatial = FALSE +) } \arguments{ \item{sce_unfiltered}{(\emph{input target}) A \code{SingleCellExperiment} object.} @@ -12,6 +17,8 @@ sce_history_fn(sce_unfiltered, sce_qc_filter_genes, sce_custom_filter_genes) \item{sce_qc_filter_genes}{(\emph{input target}) A \code{SingleCellExperiment} object.} \item{sce_custom_filter_genes}{(\emph{input target}) A \code{SingleCellExperiment} object.} + +\item{spatial}{A logical vector: \code{TRUE} for spatial transcriptomics dataset.} } \value{ A tibble. \emph{Output target}: \code{sce_history} diff --git a/man/sce_history_plot_fn.Rd b/man/sce_history_plot_fn.Rd index 5f488458..f9783e20 100644 --- a/man/sce_history_plot_fn.Rd +++ b/man/sce_history_plot_fn.Rd @@ -4,10 +4,12 @@ \alias{sce_history_plot_fn} \title{Plot history of cell and gene filtering.} \usage{ -sce_history_plot_fn(sce_history) +sce_history_plot_fn(sce_history, spatial = FALSE) } \arguments{ \item{sce_history}{(\emph{input target}) A tibble.} + +\item{spatial}{A logical vector: \code{TRUE} for spatial transcriptomics dataset.} } \value{ A \code{patchwork} object. \emph{Output target}: \code{sce_history_plot} diff --git a/man/sce_norm_hvg_fn.Rd b/man/sce_norm_hvg_fn.Rd index 4f102aef..7533bdf8 100644 --- a/man/sce_norm_hvg_fn.Rd +++ b/man/sce_norm_hvg_fn.Rd @@ -11,6 +11,7 @@ sce_norm_hvg_fn( hvg_selection = c("top", "significance", "threshold"), hvg_rm_cc_genes = FALSE, hvg_cc_genes_var_expl_threshold = 5, + spatial = FALSE, BSPARAM = BiocSingular::IrlbaParam(), BPPARAM = BiocParallel::SerialParam() ) @@ -26,6 +27,8 @@ sce_norm_hvg_fn( \item{hvg_cc_genes_var_expl_threshold}{A numeric scalar: threshold for variance explained. Genes exceeding this threshold will be marked as CC-related.} +\item{spatial}{A logical scalar: if \code{TRUE}, add spatially variable genes extension} + \item{BSPARAM}{A \link[BiocSingular:BiocSingularParam]{BiocSingular::BiocSingularParam} object.} \item{BPPARAM}{A \link[BiocParallel:BiocParallelParam-class]{BiocParallel::BiocParallelParam} object.} diff --git a/man/spatGenePlot2Dsce.Rd b/man/spatGenePlot2Dsce.Rd new file mode 100644 index 00000000..0b26c462 --- /dev/null +++ b/man/spatGenePlot2Dsce.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spatial_visualization.R +\name{spatGenePlot2Dsce} +\alias{spatGenePlot2Dsce} +\title{A function for visualization selected genes in pseudotissue visualization} +\usage{ +spatGenePlot2Dsce( + sce, + sdimx = "Dims_x", + sdimy = "Dims_y", + expression_values = c("counts", "logcounts"), + genes, + cell_color_gradient = c("blue", "white", "red"), + gradient_midpoint = NULL, + gradient_limits = NULL, + edge_alpha = NULL, + scale_alpha_with_expression = FALSE, + point_shape = c("border", "no_border"), + point_size = 1, + point_alpha = 1, + point_border_col = "black", + point_border_stroke = 0.1, + show_legend = T, + legend_text = 8, + background_color = "white", + axis_text = 8, + axis_title = 8, + cow_n_col = 2, + cow_rel_h = 1, + cow_rel_w = 1, + cow_align = "h" +) +} +\arguments{ +\item{sce}{A \code{SingleCellExperiment} object.} +} +\value{ +A ggplot2 object. +} +\description{ +Adapted function from Giotto package \link{Dries et al, 2021}, rewrite for use in scdrake package for a SingleCellExperiment object +} +\concept{spatial_visualization} diff --git a/man/visualized_spots.Rd b/man/visualized_spots.Rd new file mode 100644 index 00000000..e8e6f51a --- /dev/null +++ b/man/visualized_spots.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spatial_visualization.R +\name{visualized_spots} +\alias{visualized_spots} +\title{A basic function for pseudotissue visualization} +\usage{ +visualized_spots( + sce, + sdimx = "Dims_x", + sdimy = "Dims_y", + spat_enr_names = NULL, + cell_color = NULL, + color_as_factor = F, + cell_color_code = NULL, + cell_color_gradient = c("navy", "lightcyan", "red"), + gradient_midpoint = NULL, + gradient_limits = NULL, + select_cells = NULL, + point_shape = c("border", "no_border"), + point_size = 3, + point_alpha = 1, + point_border_col = "black", + point_border_stroke = 0.1, + label_size = 4, + label_fontface = "bold", + show_other_cells = T, + other_cell_color = "lightgrey", + other_point_size = 1, + other_cells_alpha = 0.1, + coord_fix_ratio = NULL, + title = NULL, + show_legend = T, + legend_text = 8, + legend_symbol_size = 1, + background_color = "white", + axis_text = 8, + axis_title = 8 +) +} +\arguments{ +\item{sce}{A \code{SingleCellExperiment} object.} + +\item{cell_color, color_as_factor, cell_color_code, ...}{Passed to ggplot2 object in plot_spat_point_layer_ggplot function} +} +\value{ +A \code{ggplot2} object. +} +\description{ +Adapted function from Giotto package \link{Dries et al, 2021}, rewrite for use in scdrake package in a SingleCellExperiment object +} +\concept{spatial_visualization} diff --git a/renv.lock b/renv.lock index bc0c8b99..b8beda02 100644 --- a/renv.lock +++ b/renv.lock @@ -2,30 +2,6 @@ "R": { "Version": "4.2.1", "Repositories": [ - { - "Name": "BioCcontainers", - "URL": "https://bioconductor.org/packages/3.15/container-binaries/bioconductor_docker" - }, - { - "Name": "BioCsoft", - "URL": "https://bioconductor.org/packages/3.15/bioc" - }, - { - "Name": "BioCann", - "URL": "https://bioconductor.org/packages/3.15/data/annotation" - }, - { - "Name": "BioCexp", - "URL": "https://bioconductor.org/packages/3.15/data/experiment" - }, - { - "Name": "BioCworkflows", - "URL": "https://bioconductor.org/packages/3.15/workflows" - }, - { - "Name": "BioCbooks", - "URL": "https://bioconductor.org/packages/3.15/books" - }, { "Name": "CRAN", "URL": "https://packagemanager.rstudio.com/cran/latest" @@ -278,7 +254,7 @@ "Package": "DEoptimR", "Version": "1.0-11", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Hash": "b18b34cfbf932e17803c2e3e4b1a2d2a", "Requirements": [] }, @@ -664,7 +640,7 @@ "Package": "RColorBrewer", "Version": "1.1-3", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Hash": "45f0398006e83a5b10b72a90663d8d8c", "Requirements": [] }, @@ -682,7 +658,7 @@ "Package": "ROCR", "Version": "1.0-11", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Hash": "cc151930e20e16427bc3d0daec62b4a9", "Requirements": [ "gplots" @@ -706,10 +682,10 @@ }, "Rcpp": { "Package": "Rcpp", - "Version": "1.0.9", + "Version": "1.0.12", "Source": "Repository", "Repository": "RSPM", - "Hash": "e9c08b94391e9f3f97355841229124f2", + "Hash": "5ea2700d21e038ace58269ecdbeb9ec0", "Requirements": [] }, "RcppAnnoy": { @@ -1121,7 +1097,7 @@ "Package": "assertthat", "Version": "0.2.1", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Hash": "50c838a310445e954bc13f26f26a6ecf", "Requirements": [] }, @@ -1236,7 +1212,7 @@ "Package": "bit64", "Version": "4.0.5", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Hash": "9fe98599ca456d6552421db0d6772d8f", "Requirements": [ "bit" @@ -1254,7 +1230,7 @@ "Package": "blob", "Version": "1.2.3", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Hash": "10d231579bc9c06ab1c320618808d4ff", "Requirements": [ "rlang", @@ -1296,14 +1272,6 @@ "yaml" ] }, - "boot": { - "Package": "boot", - "Version": "1.3-28", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "0baa960e3b49c6176a4f42addcbacc59", - "Requirements": [] - }, "brew": { "Package": "brew", "Version": "1.0-8", @@ -1340,7 +1308,7 @@ "Package": "caTools", "Version": "1.18.2", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Hash": "34d90fa5845004236b9eacafc51d07b2", "Requirements": [ "bitops" @@ -1475,7 +1443,7 @@ "Package": "colorspace", "Version": "2.0-3", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Hash": "bb4341986bc8b914f0f0acf2e4a3f2f7", "Requirements": [] }, @@ -1561,7 +1529,7 @@ "Package": "crosstalk", "Version": "1.2.0", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Hash": "6aa54f69598c32177e920eb3402e8293", "Requirements": [ "R6", @@ -1679,37 +1647,6 @@ "Hash": "bf1cd206a5d170d132ef75c7537b9bdb", "Requirements": [] }, - "doParallel": { - "Package": "doParallel", - "Version": "1.0.17", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "451e5edf411987991ab6a5410c45011f", - "Requirements": [ - "foreach", - "iterators" - ] - }, - "doRNG": { - "Package": "doRNG", - "Version": "1.8.2", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "a32487a942bdf5fd34224ad46f786e67", - "Requirements": [ - "foreach", - "iterators", - "rngtools" - ] - }, - "docopt": { - "Package": "docopt", - "Version": "0.7.1", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "e9eeef7931ee99ca0093f3f20b88e09b", - "Requirements": [] - }, "downlit": { "Package": "downlit", "Version": "0.4.2", @@ -1915,25 +1852,6 @@ "rlang" ] }, - "foreach": { - "Package": "foreach", - "Version": "1.5.2", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "618609b42c9406731ead03adf5379850", - "Requirements": [ - "codetools", - "iterators" - ] - }, - "foreign": { - "Package": "foreign", - "Version": "0.8-82", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "32b25c97ce306a760c4d9f787991b5d9", - "Requirements": [] - }, "formatR": { "Package": "formatR", "Version": "1.12", @@ -2245,7 +2163,7 @@ "Package": "gridExtra", "Version": "2.3", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Hash": "7d7f283939f563670a697165b2cf5560", "Requirements": [ "gtable" @@ -2443,14 +2361,6 @@ "Hash": "cfdea9dea85c1a973991c8cbe299f4da", "Requirements": [] }, - "iterators": { - "Package": "iterators", - "Version": "1.0.14", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "8954069286b4b2b0d023d1b288dce978", - "Requirements": [] - }, "janitor": { "Package": "janitor", "Version": "2.1.0", @@ -2530,7 +2440,7 @@ "Package": "labeling", "Version": "0.4.2", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Hash": "3d5108641f47470611a32d0bdf357a72", "Requirements": [] }, @@ -2567,7 +2477,7 @@ "Package": "lazyeval", "Version": "0.2.2", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Hash": "d908914ae53b04d4c0c0fd72ecc35370", "Requirements": [] }, @@ -2733,7 +2643,7 @@ "Package": "munsell", "Version": "0.5.0", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Hash": "6dfe8bf774944bd5595785e3229d8771", "Requirements": [ "colorspace" @@ -2743,7 +2653,7 @@ "Package": "mvtnorm", "Version": "1.1-3", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Hash": "7a7541cc284cb2ba3ba7eae645892af5", "Requirements": [] }, @@ -2769,14 +2679,6 @@ "lattice" ] }, - "nnet": { - "Package": "nnet", - "Version": "7.3-17", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "cb1d8d9f300a7e536b89c8a88c53f610", - "Requirements": [] - }, "openssl": { "Package": "openssl", "Version": "2.0.5", @@ -3049,7 +2951,7 @@ "Package": "progress", "Version": "1.2.2", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Hash": "14dc9f7a3c91ebb14ec5bb9208a07061", "Requirements": [ "R6", @@ -3216,7 +3118,7 @@ "Package": "reshape2", "Version": "1.4.4", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Hash": "bb5996d0bd962d214a11140d77589917", "Requirements": [ "Rcpp", @@ -3296,7 +3198,7 @@ "Package": "rjson", "Version": "0.2.21", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Hash": "f9da75e6444e95a1baf8ca24909d63b9", "Requirements": [] }, @@ -3327,21 +3229,11 @@ "yaml" ] }, - "rngtools": { - "Package": "rngtools", - "Version": "1.5.2", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "367a915f939520767660671efa0e32bd", - "Requirements": [ - "digest" - ] - }, "robustbase": { "Package": "robustbase", "Version": "0.95-0", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Hash": "c918476150dd483a23f1d1cab6d17e76", "Requirements": [ "DEoptimR" @@ -3371,14 +3263,6 @@ "xml2" ] }, - "rpart": { - "Package": "rpart", - "Version": "4.1.16", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "ea3ca1d9473daabb3cd0f1b4f974c1ed", - "Requirements": [] - }, "rprojroot": { "Package": "rprojroot", "Version": "2.0.3", @@ -3780,14 +3664,6 @@ "matrixStats" ] }, - "spatial": { - "Package": "spatial", - "Version": "7.3-15", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "c23666fdb7789c8a45e65340bb334607", - "Requirements": [] - }, "spatstat.data": { "Package": "spatstat.data", "Version": "3.0-0", @@ -4121,7 +3997,7 @@ "Package": "tzdb", "Version": "0.3.0", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Hash": "b2e1cbce7c903eaf23ec05c58e59fb5e", "Requirements": [ "cpp11" @@ -4216,7 +4092,7 @@ "Package": "viridis", "Version": "0.6.2", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Hash": "ee96aee95a7a563e5496f8991e9fde4b", "Requirements": [ "ggplot2", diff --git a/tests/testthat/_snaps/project.md b/tests/testthat/_snaps/project.md index 5f40ac45..7ec3c2a6 100644 --- a/tests/testthat/_snaps/project.md +++ b/tests/testthat/_snaps/project.md @@ -22,37 +22,39 @@ [17] "Rmd/single_sample/01_input_qc_children/cell_filtering_custom.Rmd" [18] "Rmd/single_sample/01_input_qc_children/cell_filtering_qc.Rmd" [19] "Rmd/single_sample/01_input_qc_children/empty_droplets.Rmd" - [20] "Rmd/single_sample/01_input_qc_children/gene_filtering_custom.Rmd" - [21] "Rmd/single_sample/01_input_qc_children/gene_filtering_qc.Rmd" - [22] "Rmd/single_sample/02_norm_clustering.Rmd" - [23] "Rmd/single_sample/02_norm_clustering_simple.Rmd" - [24] "_drake_integration.R" - [25] "_drake_single_sample.R" - [26] "config/integration/00_main.default.yaml" - [27] "config/integration/00_main.yaml" - [28] "config/integration/01_integration.default.yaml" - [29] "config/integration/01_integration.yaml" - [30] "config/integration/02_int_clustering.default.yaml" - [31] "config/integration/02_int_clustering.yaml" - [32] "config/integration/cluster_markers.default.yaml" - [33] "config/integration/cluster_markers.yaml" - [34] "config/integration/contrasts.default.yaml" - [35] "config/integration/contrasts.yaml" - [36] "config/pipeline.default.yaml" - [37] "config/pipeline.yaml" - [38] "config/single_sample/00_main.default.yaml" - [39] "config/single_sample/00_main.yaml" - [40] "config/single_sample/01_input_qc.default.yaml" - [41] "config/single_sample/01_input_qc.yaml" - [42] "config/single_sample/02_norm_clustering.default.yaml" - [43] "config/single_sample/02_norm_clustering.yaml" - [44] "config/single_sample/cluster_markers.default.yaml" - [45] "config/single_sample/cluster_markers.yaml" - [46] "config/single_sample/contrasts.default.yaml" - [47] "config/single_sample/contrasts.yaml" - [48] "plan_custom.R" - [49] "renv.lock" - [50] "selected_markers.csv" + [20] "Rmd/single_sample/01_input_qc_children/empty_droplets_spat.Rmd" + [21] "Rmd/single_sample/01_input_qc_children/gene_filtering_custom.Rmd" + [22] "Rmd/single_sample/01_input_qc_children/gene_filtering_qc.Rmd" + [23] "Rmd/single_sample/01_input_qc_spatial.Rmd" + [24] "Rmd/single_sample/02_norm_clustering.Rmd" + [25] "Rmd/single_sample/02_norm_clustering_simple.Rmd" + [26] "_drake_integration.R" + [27] "_drake_single_sample.R" + [28] "config/integration/00_main.default.yaml" + [29] "config/integration/00_main.yaml" + [30] "config/integration/01_integration.default.yaml" + [31] "config/integration/01_integration.yaml" + [32] "config/integration/02_int_clustering.default.yaml" + [33] "config/integration/02_int_clustering.yaml" + [34] "config/integration/cluster_markers.default.yaml" + [35] "config/integration/cluster_markers.yaml" + [36] "config/integration/contrasts.default.yaml" + [37] "config/integration/contrasts.yaml" + [38] "config/pipeline.default.yaml" + [39] "config/pipeline.yaml" + [40] "config/single_sample/00_main.default.yaml" + [41] "config/single_sample/00_main.yaml" + [42] "config/single_sample/01_input_qc.default.yaml" + [43] "config/single_sample/01_input_qc.yaml" + [44] "config/single_sample/02_norm_clustering.default.yaml" + [45] "config/single_sample/02_norm_clustering.yaml" + [46] "config/single_sample/cluster_markers.default.yaml" + [47] "config/single_sample/cluster_markers.yaml" + [48] "config/single_sample/contrasts.default.yaml" + [49] "config/single_sample/contrasts.yaml" + [50] "plan_custom.R" + [51] "renv.lock" + [52] "selected_markers.csv" --- @@ -78,47 +80,49 @@ [17] "Rmd/single_sample/01_input_qc_children/cell_filtering_custom.Rmd" [18] "Rmd/single_sample/01_input_qc_children/cell_filtering_qc.Rmd" [19] "Rmd/single_sample/01_input_qc_children/empty_droplets.Rmd" - [20] "Rmd/single_sample/01_input_qc_children/gene_filtering_custom.Rmd" - [21] "Rmd/single_sample/01_input_qc_children/gene_filtering_qc.Rmd" - [22] "Rmd/single_sample/02_norm_clustering.Rmd" - [23] "Rmd/single_sample/02_norm_clustering_simple.Rmd" - [24] "_drake_integration.R" - [25] "_drake_single_sample.R" - [26] "config/integration/00_main.default.yaml" - [27] "config/integration/00_main.yaml" - [28] "config/integration/00_main.yaml.backup" - [29] "config/integration/01_integration.default.yaml" - [30] "config/integration/01_integration.yaml" - [31] "config/integration/01_integration.yaml.backup" - [32] "config/integration/02_int_clustering.default.yaml" - [33] "config/integration/02_int_clustering.yaml" - [34] "config/integration/02_int_clustering.yaml.backup" - [35] "config/integration/cluster_markers.default.yaml" - [36] "config/integration/cluster_markers.yaml" - [37] "config/integration/cluster_markers.yaml.backup" - [38] "config/integration/contrasts.default.yaml" - [39] "config/integration/contrasts.yaml" - [40] "config/integration/contrasts.yaml.backup" - [41] "config/pipeline.default.yaml" - [42] "config/pipeline.yaml" - [43] "config/pipeline.yaml.backup" - [44] "config/single_sample/00_main.default.yaml" - [45] "config/single_sample/00_main.yaml" - [46] "config/single_sample/00_main.yaml.backup" - [47] "config/single_sample/01_input_qc.default.yaml" - [48] "config/single_sample/01_input_qc.yaml" - [49] "config/single_sample/01_input_qc.yaml.backup" - [50] "config/single_sample/02_norm_clustering.default.yaml" - [51] "config/single_sample/02_norm_clustering.yaml" - [52] "config/single_sample/02_norm_clustering.yaml.backup" - [53] "config/single_sample/cluster_markers.default.yaml" - [54] "config/single_sample/cluster_markers.yaml" - [55] "config/single_sample/cluster_markers.yaml.backup" - [56] "config/single_sample/contrasts.default.yaml" - [57] "config/single_sample/contrasts.yaml" - [58] "config/single_sample/contrasts.yaml.backup" - [59] "plan_custom.R" - [60] "plan_custom.orig.R" - [61] "renv.lock" - [62] "selected_markers.csv" + [20] "Rmd/single_sample/01_input_qc_children/empty_droplets_spat.Rmd" + [21] "Rmd/single_sample/01_input_qc_children/gene_filtering_custom.Rmd" + [22] "Rmd/single_sample/01_input_qc_children/gene_filtering_qc.Rmd" + [23] "Rmd/single_sample/01_input_qc_spatial.Rmd" + [24] "Rmd/single_sample/02_norm_clustering.Rmd" + [25] "Rmd/single_sample/02_norm_clustering_simple.Rmd" + [26] "_drake_integration.R" + [27] "_drake_single_sample.R" + [28] "config/integration/00_main.default.yaml" + [29] "config/integration/00_main.yaml" + [30] "config/integration/00_main.yaml.backup" + [31] "config/integration/01_integration.default.yaml" + [32] "config/integration/01_integration.yaml" + [33] "config/integration/01_integration.yaml.backup" + [34] "config/integration/02_int_clustering.default.yaml" + [35] "config/integration/02_int_clustering.yaml" + [36] "config/integration/02_int_clustering.yaml.backup" + [37] "config/integration/cluster_markers.default.yaml" + [38] "config/integration/cluster_markers.yaml" + [39] "config/integration/cluster_markers.yaml.backup" + [40] "config/integration/contrasts.default.yaml" + [41] "config/integration/contrasts.yaml" + [42] "config/integration/contrasts.yaml.backup" + [43] "config/pipeline.default.yaml" + [44] "config/pipeline.yaml" + [45] "config/pipeline.yaml.backup" + [46] "config/single_sample/00_main.default.yaml" + [47] "config/single_sample/00_main.yaml" + [48] "config/single_sample/00_main.yaml.backup" + [49] "config/single_sample/01_input_qc.default.yaml" + [50] "config/single_sample/01_input_qc.yaml" + [51] "config/single_sample/01_input_qc.yaml.backup" + [52] "config/single_sample/02_norm_clustering.default.yaml" + [53] "config/single_sample/02_norm_clustering.yaml" + [54] "config/single_sample/02_norm_clustering.yaml.backup" + [55] "config/single_sample/cluster_markers.default.yaml" + [56] "config/single_sample/cluster_markers.yaml" + [57] "config/single_sample/cluster_markers.yaml.backup" + [58] "config/single_sample/contrasts.default.yaml" + [59] "config/single_sample/contrasts.yaml" + [60] "config/single_sample/contrasts.yaml.backup" + [61] "plan_custom.R" + [62] "plan_custom.orig.R" + [63] "renv.lock" + [64] "selected_markers.csv" diff --git a/tests/testthat/test-cli.R b/tests/testthat/test-cli.R index 93897b6b..0cc62197 100644 --- a/tests/testthat/test-cli.R +++ b/tests/testthat/test-cli.R @@ -75,7 +75,8 @@ test_that("run command works", { test_that("run command returns exit code 1", { withr::with_dir(project_dir, { cfg <- load_pipeline_config() - cfg$DRAKE_TARGETS <- c("sce_raw") + #sce_orig insted of sce_raw + cfg$DRAKE_TARGETS <- c("sce_orig") yaml::write_yaml(cfg, "config/pipeline.yaml") res <- .run_cli(args = c("--pipeline-type", "single_sample", "run")) expect_equal(res$status, 1) diff --git a/tests/testthat/test-plans.R b/tests/testthat/test-plans.R index 61a47cd7..a270c0fc 100644 --- a/tests/testthat/test-plans.R +++ b/tests/testthat/test-plans.R @@ -17,8 +17,13 @@ fs::dir_copy( system.file("config", package = "scdrake", mustWork = TRUE), fs::path(project_dir, "config") ) +fs::dir_copy( + system.file("Rmd", package = "scdrake", mustWork = TRUE), + fs::path(project_dir, "Rmd") +) withr::local_dir(project_dir) update_configs() +here::i_am("config/pipeline.yaml") test_that("single-sample plans can be generated", { cfg_pipeline <- load_pipeline_config() diff --git a/vignettes/_vignette_signpost.Rmd b/vignettes/_vignette_signpost.Rmd index 49b7a648..c183a412 100644 --- a/vignettes/_vignette_signpost.Rmd +++ b/vignettes/_vignette_signpost.Rmd @@ -10,6 +10,7 @@ - General information: - Pipeline overview: `vignette("pipeline_overview")` - FAQ & Howtos: `vignette("scdrake_faq")` + - Spatial extention: `vignette("scdrake_spatial")` - Command line interface (CLI): `vignette("scdrake_cli")` - Config files (internals): `vignette("scdrake_config")` - Environment variables: `vignette("scdrake_envvars")` @@ -19,7 +20,7 @@ - Pipelines and stages: - Single-sample pipeline: - Stage `01_input_qc`: reading in data, filtering, quality control -> `vignette("stage_input_qc")` - - Stage `02_norm_clustering`: normalization, HVG selection, dimensionality reduction, clustering, cell type annotation + - Stage `02_norm_clustering`: normalization, HVG selection, SVG selection, dimensionality reduction, clustering, (manual) cell type annotation -> `vignette("stage_norm_clustering")` - Integration pipeline: - Stage `01_integration`: reading in data and integration -> `vignette("stage_integration")` diff --git a/vignettes/scdrake_spatial.Rmd b/vignettes/scdrake_spatial.Rmd new file mode 100644 index 00000000..a44308d1 --- /dev/null +++ b/vignettes/scdrake_spatial.Rmd @@ -0,0 +1,44 @@ +--- +title: "Spatial extention" +date: "`r glue::glue('Document generated: {format(Sys.time(), \"%Y-%m-%d %H:%M:%S %Z%z\")}')`" +package: scdrake +output: + BiocStyle::html_document: + toc: true + toc_float: true +vignette: > + %\VignetteIndexEntry{Spatial extention} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +*** + +`{scdrake}` now offer spatial extension for the first stage (`01_input_qc`) and the second stage (`02_norm_clustering`) of the single-sample pipeline. The spatial possibility is aimed on Visium technology, respectively on spot-based technologies. Scdrake provides comparable results with Seurat, Giotto (R), as well as scanpy (Python). However, we strongly discourage usage of scdrake for other technologies than Visium. For futher analyses of spatial dataset we recommend [CARD](https://github.com/YMa-lab/CARD) for deconvolution and [CellChat2](https://github.com/SiYangming/CellChat2) or [IGAN](https://github.com/Zhu-JC/IGAN) for cell-cell interaction. + +This vignette should serve as a supplement to other vignettes, as `vignette("stage_input_qc")` and `vignette("stage_norm_clustering")`). + + +*** + +## Spatial extention functions + +*** + +### Spatial visualization + +For (`01_input_qc`) and (`02_norm_clustering`) of the single-sample pipeline we now offer visualization of tissue, as pseudo tissue spot visualization. Spatial extention will add spot coordinates (array_col and array_row) from SpaceRanger tissue_possitions.csv file, and will filter away all spots, that are by SpaceRanger labeled as not in tissue. Visualization function are implemented from the [Giotto package](https://drieslab.github.io/Giotto_website/). Visualization is automatically used for quality control and dimension reduction results. + +*** + +### Selection of spatially variable genes + +For spatial analyses in stage 02_norm_clustering `vignette("stage_norm_clustering")` when spatial option is enabled, spatially variable genes (SVGs) are automaticaly calculated together with HVGs. That is done using Seurat::SVFInfo, selection method MoransI. A straightforward union of SVGs and HVGs is taking to further processing, see for more details. + +*** + +### Manual annotation + +Manual annotation was implemented for both single-cell and spatial datasets. In summary, expression profiles and statistical metrics are computed for each cell/spot, the result is visualized using a heatmap and dimension reduction plot. For spatial datasets is enabled to visualized results in tissue coordinates, both enrichment plots for each annotation label (individual enrichment plots) and for overall results for each spot. Manual annotation is implemented from the [Giotto package](https://drieslab.github.io/Giotto_website/), the function is based on [Kim SY et al](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-6-144). + +*** diff --git a/vignettes/stage_input_qc.Rmd b/vignettes/stage_input_qc.Rmd index 712d51f3..b45a3d3d 100644 --- a/vignettes/stage_input_qc.Rmd +++ b/vignettes/stage_input_qc.Rmd @@ -120,7 +120,7 @@ INPUT_QC_REPORT_RMD_FILE: "Rmd/single_sample/01_input_qc.Rmd" **Type:** character scalar -A path to RMarkdown file used for HTML report of this pipeline stage. +A path to RMarkdown file used for HTML report of this pipeline stage. For spatial extention, the default RMarkdown file is `01_input_qc_spatial.Rmd` *** @@ -143,6 +143,31 @@ You can also negate the selection by specifying `negate: true`. *** +#### Spatial extention + + +```yaml +SPATIAL: False +``` + +**Type:** logical scalar + +If `True`, pipeline enables spatial extension. + +In the input_qc stage, the spatial extension consists only in enabling pseudo tissue visualization, and in adding coordinates (array_col and array_row from tissue_positions.csv) to the Cell Metadata. + +*** + +```yaml +SPATIAL_LOCKS: Null +``` + +**Type:** Null or character scalar + +A path to tissue_position.csv from SpaceRanger spatial output folder used for adding coordinates (array_col and array_row) to Cell Metadata. Only void when SPATIAL is enabled. + +*** + #### Removal of empty droplets See `?DropletUtils::emptyDrops` for more details. diff --git a/vignettes/stage_norm_clustering.Rmd b/vignettes/stage_norm_clustering.Rmd index 1cfc3a78..2194b317 100644 --- a/vignettes/stage_norm_clustering.Rmd +++ b/vignettes/stage_norm_clustering.Rmd @@ -117,6 +117,21 @@ several `{scdrake}` functions. *** +#### Spatial extention + + +```yaml +SPATIAL: False +``` + +**Type:** logical scalar + +If `True`, pipeline enables spatial extension. + +This option enables spatial extension as selection of spatially variable genes (SVGS), pseudo tissue visualization and other visualization options for manual annotation. + +*** + #### Normalization parameters ```yaml @@ -211,6 +226,7 @@ A metric used to find HVGs. See for more details. *** @@ -657,6 +673,97 @@ will override corresponding parameters in `TRAIN_PARAMS` in `CELL_ANNOTATION_SOU *** +*** + +#### Manual cell annotation signatures + +```yaml +MANUAL_ANNOTATION: False +``` + +**Type:** logical scalar + +Whether manual annotation is enabled. Manual annotation is based on expression profiles, markers are taken from user-defined file. Resulted annotation is stored in `annotation_metadata` target. Manual cell annotation implemented from Giotto package enrichment function, which is based on PAGE method from [Kim SY et al](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-6-144). + +*** + +```yaml +ANNOTATION_MARKERS: Null +``` + +**Type:** logical scalar + +Path to user-define file contacting markers for each expected cell type. +Each column name is one cell type, in rows are gene markers. Example: + +``` +Astrocytes,Endothelial,Neurons +GFAP,PECAM1,SLC6A1 +SLC1A3,A2M,CHAT +GLUL,,GAD2 +``` + +*** + +```yaml +SCALE_ANNOTATION: False +``` + +**Type:** logical scalar + +Whether to scale gene profiles. + +*** +```yaml +OVERLAP: 5 +``` + +**Type:** numeric scalar + +Minimal overlap of genes in cell/cell in selected clusters, to assign cell type. + + +*** +```yaml +ANNOTATION_CLUSTERING: "cluster_kmeans_k4" +``` + +**Type:** character scalar + +On which clustering (column of cell metadata) to perform annotation. + +*** + +```yaml +SHOW_VALUE: "value" (`"value"` | `"zscores"` | `"zscores_rescaled"`) +``` + +**Type:** character scalar + +Which value to show in resulting heatmap. + +*** + +```yaml +MAKE_CELL_PLOT: False +``` + +**Type:** character scalar + +Only toghether with spatial option. Whether to show pseudo tissue cell plots with enrichment of annotation. + +*** + +```yaml +HEATMAP_DIMRED: "umap" +``` + +**Type:** character scalar + +Which dimension reduction use to show result of manual annotation. + +*** + #### Cell grouping assignment Using the two parameters below, it is possible to add custom cell metadata or rearrange the current one into new groupings