diff --git a/DESCRIPTION b/DESCRIPTION index c384dd0c..bb944f9d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: scdrake Type: Package Title: A pipeline for droplet-based single-cell RNA-seq data secondary analysis implemented in the drake Make-like toolkit for R language -Version: 1.5.1 +Version: 1.5.2 Authors@R: c( person( diff --git a/NAMESPACE b/NAMESPACE index 286a7e77..9033a6b5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -100,6 +100,7 @@ export(get_single_sample_plan) export(get_sys_env) export(get_tmp_dir) export(get_top_hvgs) +export(get_used_qc_filters_operator_desc) export(get_yq_default_path) export(glue0) export(glue0c) diff --git a/R/config_process_single_sample.R b/R/config_process_single_sample.R index 1aad289c..41a0804a 100644 --- a/R/config_process_single_sample.R +++ b/R/config_process_single_sample.R @@ -29,6 +29,15 @@ msg = "{.var input_data$type} must be {.vals possible_input_data_types}. Current value: {.val {cfg$INPUT_DATA$type}}" ) + possible_filters_operators <- c("&", "|") + for (param_name in c("DATASET_SENSITIVE_FILTERS_OPERATOR", "CUSTOM_FILTERS_OPERATOR")) { + val <- cfg[[param_name]] + assert_that_( + val %in% possible_filters_operators, + msg = "{.field {param_name}} must be one of {.vals {possible_filters_operators}}. Current value: {.val {val}}" + ) + } + assert_that_( !is_null(cfg$INPUT_DATA$path), msg = "{.field INPUT_DATA$type} is not set, data cannot be loaded later." diff --git a/R/plans_single_sample.R b/R/plans_single_sample.R index c32beb48..d7cf945a 100644 --- a/R/plans_single_sample.R +++ b/R/plans_single_sample.R @@ -60,7 +60,7 @@ get_input_qc_subplan <- function(cfg, cfg_pipeline, cfg_main) { ), qc_filters = purrr::map(qc_filters_raw, ~ as.logical(.) %>% tidyr::replace_na(replace = FALSE)), ## -- Join filters by OR operator. - qc_filter = Reduce("|", qc_filters), + qc_filter = Reduce(!!cfg$DATASET_SENSITIVE_FILTERS_OPERATOR, qc_filters), ## -- Custom filters. custom_filters_raw = list( @@ -71,7 +71,7 @@ get_input_qc_subplan <- function(cfg, cfg_pipeline, cfg_main) { # low_ribo = cell_qc$subsets_ribo_percent <= !!cfg$MIN_RIBO_RATIO * 100 ), custom_filters = purrr::map(custom_filters_raw, ~ as.logical(.) %>% tidyr::replace_na(replace = FALSE)), - custom_filter = Reduce("|", custom_filters), + custom_filter = Reduce(!!cfg$CUSTOM_FILTERS_OPERATOR, custom_filters), ## -- Add filters to sce and create Seurat object. sce_unfiltered = sce_add_colData( diff --git a/R/single_sample_input_qc.R b/R/single_sample_input_qc.R index 46ad5545..1940a226 100644 --- a/R/single_sample_input_qc.R +++ b/R/single_sample_input_qc.R @@ -263,3 +263,19 @@ sce_final_input_qc_fn <- function(sce_selected, gene_annotation) { return(sce_final_input_qc) } + +#' Return an informative message about the used operator to join cell QC filters +#' +#' @param operator A character scalar: used operator. +#' +#' @return A character scalar: the message +#' +#' @concept single_sample_input_qc_fn +#' @export +get_used_qc_filters_operator_desc <- function(operator = c("&", "|")) { + if (operator == "&") { + "Individual filters were considered jointly (using *AND* operator), i.e., a cell was removed only if violated all of the filters." + } else { + "Individual filters were considered individually (using *OR* operator), i.e., a cell was removed if violated at least one filter." + } +} diff --git a/README.md b/README.md index 2970d401..f080793b 100644 --- a/README.md +++ b/README.md @@ -108,8 +108,8 @@ You can pull the Docker image with the latest stable `{scdrake}` version using ``` bash -docker pull jirinovo/scdrake:1.5.1 -singularity pull docker:jirinovo/scdrake:1.5.1 +docker pull jirinovo/scdrake:1.5.2 +singularity pull docker:jirinovo/scdrake:1.5.2 ``` or list available versions in [our Docker Hub @@ -151,7 +151,7 @@ docker run -d \ -e USERID=$(id -u) \ -e GROUPID=$(id -g) \ -e PASSWORD=1234 \ - jirinovo/scdrake:1.5.1 + jirinovo/scdrake:1.5.2 ``` For Singularity, also make shared directories and execute the container @@ -234,7 +234,7 @@ for `{scdrake}` and you can use it to install all dependencies by ``` r ## -- This is a lockfile for the latest stable version of scdrake. -download.file("https://raw.githubusercontent.com/bioinfocz/scdrake/1.5.1/renv.lock") +download.file("https://raw.githubusercontent.com/bioinfocz/scdrake/1.5.2/renv.lock") ## -- You can increase the number of CPU cores to speed up the installation. options(Ncpus = 2) renv::restore(lockfile = "renv.lock", repos = BiocManager::repositories()) @@ -254,7 +254,7 @@ installed from the lockfile). ``` r remotes::install_github( - "bioinfocz/scdrake@1.5.1", + "bioinfocz/scdrake@1.5.2", dependencies = FALSE, upgrade = FALSE, keep_source = TRUE, build_vignettes = TRUE, repos = BiocManager::repositories() @@ -321,7 +321,7 @@ vignette](https://bioinfocz.github.io/scdrake/articles/scdrake.html) ## Vignettes and other readings See for a documentation website of -the latest stable version (1.5.1) where links to vignettes below become +the latest stable version (1.5.2) where links to vignettes below become real :-) See for a documentation diff --git a/inst/Rmd/single_sample/01_input_qc_children/cell_filtering_custom.Rmd b/inst/Rmd/single_sample/01_input_qc_children/cell_filtering_custom.Rmd index 6b8cc869..9b6feac0 100644 --- a/inst/Rmd/single_sample/01_input_qc_children/cell_filtering_custom.Rmd +++ b/inst/Rmd/single_sample/01_input_qc_children/cell_filtering_custom.Rmd @@ -1,7 +1,9 @@ -Filter dataset based on custom (fixed) thresholds of QC metrics: +Filter cells based on custom (fixed) thresholds of QC metrics: - <Min; Max> UMI per cell: <`r cfg$MIN_UMI_CF`; `r as.character(cfg$MAX_UMI_CF)`> - Min. number of features (genes) detected: `r cfg$MIN_FEATURES` - Max. ratio of mitochondrial genes expression: `r cfg$MAX_MITO_RATIO` +`r get_used_qc_filters_operator_desc(cfg$CUSTOM_FILTERS_OPERATOR)` + **Removing `r sum(custom_filter)` low quality cells based on custom thresholds.** diff --git a/inst/Rmd/single_sample/01_input_qc_children/cell_filtering_qc.Rmd b/inst/Rmd/single_sample/01_input_qc_children/cell_filtering_qc.Rmd index 55bad048..e18da6da 100644 --- a/inst/Rmd/single_sample/01_input_qc_children/cell_filtering_qc.Rmd +++ b/inst/Rmd/single_sample/01_input_qc_children/cell_filtering_qc.Rmd @@ -4,4 +4,6 @@ Filter cells based on QC metrics and MAD threshold (`r cfg$MAD_THRESHOLD`): - Low number of detected genes (lower tail). - High expression of mitochondrial genes (upper tail). +`r get_used_qc_filters_operator_desc(cfg$CUSTOM_FILTERS_OPERATOR)` + **Removing `r sum(qc_filter)` low quality cells based on MAD.** diff --git a/inst/Rmd/single_sample/01_input_qc_children/gene_filtering_qc.Rmd b/inst/Rmd/single_sample/01_input_qc_children/gene_filtering_qc.Rmd index c0149e46..5ce0a078 100644 --- a/inst/Rmd/single_sample/01_input_qc_children/gene_filtering_qc.Rmd +++ b/inst/Rmd/single_sample/01_input_qc_children/gene_filtering_qc.Rmd @@ -13,7 +13,7 @@ janitor::tabyl(sce_qc_filter_rowSums == 0) %>% **Removing `r sum(drake::readd(sce_qc_gene_filter, path = drake_cache_dir))` genes with UMI per cell less than `r cfg$MIN_UMI` and expressed in less than `r cfg$MIN_RATIO_CELLS * 100` % of all cells.** -Info on filtered dataset: +Info on dataset-sensitive filtered dataset: ```{r} cat(drake::readd(sce_qc_filter_genes_info, path = drake_cache_dir)$str) diff --git a/inst/config/single_sample/01_input_qc.default.yaml b/inst/config/single_sample/01_input_qc.default.yaml index 9a8bdc0c..4cf50b74 100644 --- a/inst/config/single_sample/01_input_qc.default.yaml +++ b/inst/config/single_sample/01_input_qc.default.yaml @@ -30,6 +30,7 @@ SAVE_DATASET_SENSITIVE_FILTERING: True ### Dataset-sensitive cell filtering ########################################## MAD_THRESHOLD: 3 +DATASET_SENSITIVE_FILTERS_OPERATOR: "&" ############################################################################### ### Custom cell filtering ##################################################### @@ -37,6 +38,7 @@ MIN_UMI_CF: 1000 MAX_UMI_CF: 50000 MIN_FEATURES: 1000 MAX_MITO_RATIO: 0.2 +CUSTOM_FILTERS_OPERATOR: "&" ############################################################################### ### Gene filtering ############################################################ diff --git a/man/get_used_qc_filters_operator_desc.Rd b/man/get_used_qc_filters_operator_desc.Rd new file mode 100644 index 00000000..5e6db6d4 --- /dev/null +++ b/man/get_used_qc_filters_operator_desc.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/single_sample_input_qc.R +\name{get_used_qc_filters_operator_desc} +\alias{get_used_qc_filters_operator_desc} +\title{Return an informative message about the used operator to join cell QC filters} +\usage{ +get_used_qc_filters_operator_desc(operator = c("&", "|")) +} +\arguments{ +\item{operator}{A character scalar: used operator.} +} +\value{ +A character scalar: the message +} +\description{ +Return an informative message about the used operator to join cell QC filters +} +\concept{single_sample_input_qc_fn} diff --git a/vignettes/stage_input_qc.Rmd b/vignettes/stage_input_qc.Rmd index 7ea3e1eb..712d51f3 100644 --- a/vignettes/stage_input_qc.Rmd +++ b/vignettes/stage_input_qc.Rmd @@ -55,8 +55,8 @@ More information about scRNA-seq quality control can be found in For "number of UMI" metric, only a lower tail is used. - Custom filtering, based on fixed thresholds. For "number of UMI" metric, both upper and lower bounds are used ([theory](https://bioconductor.org/books/3.15/OSCA.basic/quality-control.html#fixed-qc). - - In both filtering types, violation of only one metric threshold leads to removal of a cell. - More technically, metric filters are reduced by **or** operator. + - For both filtering types you can choose how to join the filters: either jointly (using the *AND* operator) or + individually (using the *OR* operator). - Gene filtering (optional) based on a minimum number of UMI per cell and a minimum ratio of cells expressing a gene. - A gene is considered expressed when number of its UMI across all cells is greater than X and at the same time it is expressed in at least Y ratio of cells. @@ -245,6 +245,19 @@ Violation of only one metric threshold leads to removal of a cell. To disable the dataset-sensitive filtering, set `MAD_THRESHOLD: .inf`. That will force passing of each cell as every QC metric will be always lower than positive infinity MAD. +*** + +```yaml +DATASET_SENSITIVE_FILTERS_OPERATOR: "&" +``` + +**Type:** a character scalar (`"&" | "|"`) + +How to join the QC filters: + +- Jointly (*AND*/`&` operator), i.e., remove only cells that violate **ALL** filters (permissive) +- Individually (*OR*/`|` operator), i.e., remove cells that violate **AT LEAST ONE** filter (strict) + ##### Custom cell filtering ```yaml @@ -295,6 +308,19 @@ i.e. cells with mitochondrial genes detected in more than (`MAX_MITO_RATIO` * 10 To disable this filter, set `MAX_MITO_RATIO: .inf` +*** + +```yaml +CUSTOM_FILTERS_OPERATOR: "&" +``` + +**Type:** a character scalar (`"&" | "|"`) + +How to join the QC filters: + +- Jointly (*AND*/`&` operator), i.e., remove only cells that violate **ALL** filters (permissive) +- Individually (*OR*/`|` operator), i.e., remove cells that violate **AT LEAST ONE** filter (strict) + #### Gene filtering Gene filtering thresholds are applied in both types of cell filtering (after it is performed).