Skip to content

Commit

Permalink
Merge pull request #177 from biomage-ltd/separate-qc-steps
Browse files Browse the repository at this point in the history
Separate qc steps
  • Loading branch information
ogibson authored Nov 4, 2021
2 parents fe6bded + 2b1e198 commit e019c19
Show file tree
Hide file tree
Showing 45 changed files with 1,486 additions and 1,202 deletions.
1 change: 1 addition & 0 deletions pipeline-runner/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
.Rproj.user
tests/testthat/testthat-problems.rds
3 changes: 3 additions & 0 deletions pipeline-runner/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@ export(filter_high_mito)
export(filter_low_cellsize)
export(generate_default_values_cellSizeDistribution)
export(generate_default_values_classifier)
export(generate_first_step_ids)
export(getClusters)
export(load_cellranger_files)
export(prepare_experiment)
export(runClusters)
export(run_emptydrops)
export(score_doublets)
import(data.table)
Expand Down
3 changes: 2 additions & 1 deletion pipeline-runner/R/gem2s-1-download_cellranger_files.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ download_cellranger_files <- function(input, pipeline_config, prev_out = list())
prev_out$config <- config
res <- list(
data = list(),
ouput = prev_out)
ouput = prev_out
)

message("\nDownloading of cellranger files step complete.")
return(res)
Expand Down
19 changes: 10 additions & 9 deletions pipeline-runner/R/gem2s-2-load_cellranger_files.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
#' }
#' @export
#'
load_cellranger_files <- function(input, pipeline_config, prev_out, input_dir = '/input') {
load_cellranger_files <- function(input, pipeline_config, prev_out, input_dir = "/input") {
message("Loading cellranger output ...")
check_prev_out(prev_out, 'config')
check_prev_out(prev_out, "config")

# destructure previous output
config <- prev_out$config
Expand All @@ -20,7 +20,8 @@ load_cellranger_files <- function(input, pipeline_config, prev_out, input_dir =

res <- list(
data = list(),
output = output)
output = output
)

message("\nLoading of cellranger files step complete.")
return(res)
Expand All @@ -44,15 +45,15 @@ call_read10x <- function(config, input_dir) {
for (sample in samples) {
sample_dir <- file.path(input_dir, sample)
sample_fpaths <- list.files(sample_dir)
annot_fpath <- file.path(sample_dir, 'features.tsv.gz')
annot_fpath <- file.path(sample_dir, "features.tsv.gz")

message("\nSample --> ", sample)
message("Reading files from ", sample_dir, ' --> ', paste(sample_fpaths, collapse = ' - '))
message("Reading files from ", sample_dir, " --> ", paste(sample_fpaths, collapse = " - "))

counts <- Seurat::Read10X(sample_dir, gene.column = 1)

if(is(counts, 'list')) {
slot <- 'Gene Expression'
if (is(counts, "list")) {
slot <- "Gene Expression"
# questionable: grab first slot if no gene expression
if (!slot %in% names(counts)) slot <- names(counts)[1]
counts <- counts[[slot]]
Expand Down Expand Up @@ -86,8 +87,8 @@ format_annot <- function(annot_list) {
annot$original_name <- gname
is.dup <- duplicated(gname) | duplicated(gname, fromLast = TRUE)

#We need to convert the gene inputs from _ to - bc when we create the Seurat object we do this, and the match would return NA values if any of the inputs still has _.
annot$input <- gsub('_', '-', annot$input)
# We need to convert the gene inputs from _ to - bc when we create the Seurat object we do this, and the match would return NA values if any of the inputs still has _.
annot$input <- gsub("_", "-", annot$input)
annot$name[is.dup] <- paste(gname[is.dup], annot$input[is.dup], sep = " - ")

annot <- annot[!duplicated(annot$input), ]
Expand Down
5 changes: 3 additions & 2 deletions pipeline-runner/R/gem2s-3-run_emptydrops.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#'
run_emptydrops <- function(input, pipeline_config, prev_out) {
message("Testing if droplets are empty...")
check_prev_out(prev_out, c('config', 'counts_list', 'annot'))
check_prev_out(prev_out, c("config", "counts_list", "annot"))

# destructure previous output
counts_list <- prev_out$counts_list
Expand All @@ -25,7 +25,8 @@ run_emptydrops <- function(input, pipeline_config, prev_out) {
prev_out$edrops <- edrops
res <- list(
data = list(),
output = prev_out)
output = prev_out
)

message("\nRunning of emptydrops step complete.")
return(res)
Expand Down
6 changes: 3 additions & 3 deletions pipeline-runner/R/gem2s-4-score_doublets.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ score_doublets <- function(input, pipeline_config, prev_out) {
message("Calculating probability of droplets being doublets...")

# NOTE: edrops is not required
check_prev_out(prev_out, c('config', 'counts_list', 'annot'))
check_prev_out(prev_out, c("config", "counts_list", "annot"))

edrops_list <- prev_out$edrops
counts_list <- prev_out$counts_list
Expand Down Expand Up @@ -40,7 +40,8 @@ score_doublets <- function(input, pipeline_config, prev_out) {
prev_out$doublet_scores <- scores
res <- list(
data = list(),
output = prev_out)
output = prev_out
)

message("\nScoring doublets step complete.")
return(res)
Expand All @@ -54,7 +55,6 @@ score_doublets <- function(input, pipeline_config, prev_out) {
#' @return data.frame with doublet scores and assigned classes
#'
compute_sample_doublet_scores <- function(sample_counts) {

set.seed(0)
sce <- scDblFinder::scDblFinder(sample_counts)
doublet_res <- data.frame(
Expand Down
15 changes: 7 additions & 8 deletions pipeline-runner/R/gem2s-5-create_seurat.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ create_seurat <- function(input, pipeline_config, prev_out) {
message("Creating Seurat Objects...")

# NOTE: edrops can be empty list
check_names <- c('config', 'counts_list', 'annot', 'doublet_scores', 'edrops')
check_names <- c("config", "counts_list", "annot", "doublet_scores", "edrops")
check_prev_out(prev_out, check_names)

# destructure previous output: config, counts_list, annot, and doublet_scores
Expand All @@ -28,30 +28,32 @@ create_seurat <- function(input, pipeline_config, prev_out) {
edrops_out = edrops[[sample]],
sample = sample,
annot = annot,
config = config)
config = config
)
}


prev_out$scdata_list <- scdata_list
res <- list(
data = list(),
output = prev_out)
output = prev_out
)

message("\nCreation of Seurat objects step complete.")
return(res)
}

# construct SeuratObject
construct_scdata <- function(counts, doublet_score, edrops_out, sample, annot, config, min.cells = 3, min.features = 10) {

metadata <- construct_metadata(counts, sample, config)

scdata <- Seurat::CreateSeuratObject(
counts,
meta.data = metadata,
project = config$name,
min.cells = min.cells,
min.features = min.features)
min.features = min.features
)

scdata <- scdata %>%
add_mito(annot) %>%
Expand Down Expand Up @@ -86,7 +88,6 @@ construct_metadata <- function(counts, sample, config) {

# add mitochondrial percent to SeuratObject
add_mito <- function(scdata, annot) {

if (any(grepl("^mt-", annot$name, ignore.case = TRUE))) {
message("Adding MT information...")
mt.features <- annot$input[grep("^mt-", annot$name, ignore.case = TRUE)]
Expand All @@ -102,7 +103,6 @@ add_mito <- function(scdata, annot) {

# add emptyDrops result to SeuratObject
add_edrops <- function(scdata, edout) {

scdata@tools$flag_filtered <- is.null(edout)

if (!scdata@tools$flag_filtered) {
Expand All @@ -120,7 +120,6 @@ add_edrops <- function(scdata, edout) {
rownames(meta.data) <- meta.data$barcode

scdata@meta.data <- meta.data

} else {
message("emptyDrops results not present, skipping...")
scdata@meta.data$emptyDrops_FDR <- NA
Expand Down
Loading

0 comments on commit e019c19

Please sign in to comment.