Skip to content

Commit

Permalink
Merge pull request #173 from biomage-ltd/fix-lookups
Browse files Browse the repository at this point in the history
Fix lookups
  • Loading branch information
ogibson authored Sep 29, 2021
2 parents 299322d + bbf32f8 commit cb8c49b
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 68 deletions.
18 changes: 6 additions & 12 deletions pipeline-runner/R/gem2s-5-create_seurat.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
#' @export
#'
create_seurat <- function(input, pipeline_config, prev_out) {

message("Creating Seurat Objects...")

# NOTE: edrops can be empty list
Expand Down Expand Up @@ -46,8 +45,6 @@ create_seurat <- function(input, pipeline_config, prev_out) {
construct_scdata <- function(counts, doublet_score, edrops_out, sample, annot, config, min.cells = 3, min.features = 10) {

metadata <- construct_metadata(counts, sample, config)
lookups <- get_metadata_lookups(metadata)
colnames(metadata) <- lookups

scdata <- Seurat::CreateSeuratObject(
counts,
Expand All @@ -56,8 +53,6 @@ construct_scdata <- function(counts, doublet_score, edrops_out, sample, annot, c
min.cells = min.cells,
min.features = min.features)

scdata@misc$metadata_lookups <- lookups

scdata <- scdata %>%
add_mito(annot) %>%
add_dblscore(doublet_score) %>%
Expand All @@ -66,19 +61,15 @@ construct_scdata <- function(counts, doublet_score, edrops_out, sample, annot, c
return(scdata)
}

# to find invalid metadata column names in SeuratObject (e.g. 'TRUE')
get_metadata_lookups <- function(metadata) {
user_values <- names(metadata)
lookups <- make.names(colnames(metadata), unique = TRUE)
names(lookups) <- user_values
return(lookups)
}


# NOTE: any changes here must be reflected in meta_sets

# construct metadata for each SeuratObject
construct_metadata <- function(counts, sample, config) {
message("Constructing metadata df...")
metadata <- data.frame(row.names = colnames(counts), samples = rep(sample, ncol(counts)))

# Add "metadata" if exists in config
rest <- config$metadata
if (!is.null(rest)) {
Expand All @@ -87,6 +78,9 @@ construct_metadata <- function(counts, sample, config) {
metadata[names(rest)] <- rest[sample, ]
}

# make syntactically valid column names
colnames(metadata) <- make.names(colnames(metadata), unique = TRUE)

return(metadata)
}

Expand Down
7 changes: 4 additions & 3 deletions pipeline-runner/R/gem2s-7-upload_to_aws.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,12 @@ meta_sets <- function(input, scdata, color_pool) {
cell_set_list <- c()
meta <- lapply(input$metadata, unlist)

# names of metadata tracks
# user-supplied metadata track names
keys <- names(meta)

# corresponding names stored in seurat object meta.data
seurat_keys <- scdata@misc$metadata_lookups[keys]
# syntactically valid metadata names as stored in scdata
# same names as used in construct_metadata including internal 'samples' column (dropped)
seurat_keys <- make.names(c('samples', keys), unique = TRUE)[-1]

color_index <- 1
for (i in seq_along(keys)) {
Expand Down
2 changes: 1 addition & 1 deletion pipeline-runner/man/call_read10x.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pipeline-runner/man/load_cellranger_files.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

47 changes: 7 additions & 40 deletions pipeline-runner/tests/testthat/test-gem2s-5-create_seurat.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,52 +62,19 @@ test_that("construct_metadata works with syntactically invalid column names", {
)

metadata <- construct_metadata(counts, sample, config)
valid_names <- make.names(colnames(metadata), unique = TRUE)

# doesn't change column names of metadata
expect_equal(colnames(metadata), c('samples', 'TRUE', '1first', 'with space', 'with-dash'))
# changes column names of metadata
expect_equal(colnames(metadata), valid_names)

# stores correct values in metadata
expect_true(all(metadata[['TRUE']] == 'a'))
expect_true(all(metadata[['1first']] == 'b'))
expect_true(all(metadata[['with space']] == 'c'))
expect_true(all(metadata[['with-dash']] == 'd'))
expect_true(all(metadata$TRUE. == 'a'))
expect_true(all(metadata$X1first == 'b'))
expect_true(all(metadata$with.space == 'c'))
expect_true(all(metadata$with.dash == 'd'))
})


test_that("result of get_metadata_lookups can be used to access syntactically invalid slots", {

sample <- 'hello'
counts <- mock_counts()

config <- list(
samples = 'hello',
metadata = list('TRUE' = list('a'),
'1first' = list('b'),
'with space' = list('c'),
'with-dash' = list('d'))
)

metadata <- construct_metadata(counts, sample, config)
lookups <- get_metadata_lookups(metadata)

# seurat stores as syntactically valid names
scdata <- SeuratObject::CreateSeuratObject(counts, meta.data = metadata)
expect_true(all(lookups %in% colnames(scdata@meta.data)))

# use lookups to access
keys <- c('TRUE', '1first', 'with space', 'with-dash')
values <- c('a', 'b', 'c', 'd')

for (i in seq_along(keys)) {
key <- keys[i]
val <- values[i]
seurat_key <- lookups[[key]]

expect_true(seurat_key %in% colnames(scdata@meta.data))
expect_true(all(scdata[[seurat_key]] == val))
}
})


test_that("create_seurat works without emptyDrops result", {

Expand Down
42 changes: 31 additions & 11 deletions pipeline-runner/tests/testthat/test-gem2s-7-upload_to_aws.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,31 +14,27 @@ mock_scdata <- function(config) {
file = system.file("extdata", "pbmc_raw.txt", package = "Seurat"),
as.is = TRUE)

scdata <- Seurat::CreateSeuratObject(counts = pbmc_raw)

scdata$cells_id <- seq(0, ncol(scdata)-1)

# construct metadata
# add samples
samples <- unlist(config$sampleIds)
scdata$samples <- rep(samples, each = 40)

# add metadata
rest <- config$metadata
keys <- c('samples', names(rest))
metadata <- data.frame(row.names = colnames(scdata), samples = scdata$samples)
metadata <- data.frame(row.names = colnames(pbmc_raw), samples = rep(samples, each = 40))

# Add "metadata" if exists in config
if (!is.null(rest)) {
rest <- lapply(rest, unlist)
rest <- data.frame(rest, row.names = samples, check.names = FALSE)
scdata@meta.data[names(rest)] <- rest[scdata$samples, ]
metadata[names(rest)] <- rest[metadata$samples, ]
}

lookups <- make.names(keys)
names(lookups) <- keys
# make syntactically valid
colnames(metadata) <- make.names(colnames(metadata), unique = TRUE)

scdata@misc$metadata_lookups <- lookups
scdata <- Seurat::CreateSeuratObject(counts = pbmc_raw, meta.data = metadata)

scdata$cells_id <- seq(0, ncol(scdata)-1)
return(scdata)
}

Expand Down Expand Up @@ -118,6 +114,30 @@ test_that("get_cell_sets adds a single metadata column", {
})


test_that("get_cell_sets uses user-supplied syntactically invalid metadata column names", {
metadata <- list('TRUE' = list('Hello', 'WT2'))
config <- mock_config(metadata)
scdata <- mock_scdata(config)

cell_sets <- get_cell_sets(scdata, config)

# have TRUE as a key
keys <- sapply(cell_sets$cellSets, `[[`, 'key')
expect_true('TRUE' %in% keys)

group_set <- cell_sets$cellSets[[which(keys == 'TRUE')]]
group_names <- sapply(group_set$children, `[[`, 'name')

# cell ids are correct for each child
for (group_name in group_names) {
group_cells <- group_set$children[[which(group_names == group_name)]]$cellIds
expected_cells <- unname(scdata$cells_id)[scdata$TRUE. == group_name]

expect_equal(group_cells, expected_cells)
}
})


test_that("get_cell_sets adds two metadata columns", {

metadata <- list(Group1 = list('Hello', 'WT2'), Group2 = list('WT', 'WT'))
Expand Down
21 changes: 21 additions & 0 deletions pipeline-runner/tests/testthat/test-qc-6-integrate_scdata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
mock_scdata <- function() {
pbmc_raw <- read.table(
file = system.file("extdata", "pbmc_raw.txt", package = "Seurat"),
as.is = TRUE)

scdata <- Seurat::CreateSeuratObject(counts = pbmc_raw)

# add samples
scdata$samples <- rep(c('123abc', '123def'), each = 40)
return(scdata)
}


test_that("harmony integration works", {
scdata <- mock_scdata()
config <- list(dimensionalityReduction = list(numPCs = 2),
dataIntegration = list(method = 'harmony', methodSettings = list(harmony = list(numGenes = 10, normalisation = 'logNormalize'))))

scdata <- run_dataIntegration(scdata, config)
expect_s4_class(scdata, 'Seurat')
})

0 comments on commit cb8c49b

Please sign in to comment.