Skip to content

Commit

Permalink
Merge branch 'master' into pipeline-fail-api-request
Browse files Browse the repository at this point in the history
  • Loading branch information
StefanBabukov authored Nov 27, 2023
2 parents 55c72cd + f2256e4 commit b670580
Show file tree
Hide file tree
Showing 13 changed files with 245 additions and 23 deletions.
2 changes: 2 additions & 0 deletions pipeline-runner/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ export(filter_low_cellsize)
export(filter_parent_cellsets)
export(filter_unnamed_features)
export(find_clm_columns)
export(find_group_columns_cl_metadata)
export(format_cl_metadata_cellsets)
export(generate_default_values_cellSizeDistribution)
export(generate_elbow_plot_data)
Expand Down Expand Up @@ -74,6 +75,7 @@ export(safe_cbind)
export(score_doublets)
export(seuratv4_find_and_integrate_anchors)
export(seuratv4_geosketch_find_and_integrate_anchors)
export(sort_cluster_names)
export(subset_experiment)
export(subset_ids)
export(subset_safe)
Expand Down
68 changes: 55 additions & 13 deletions pipeline-runner/R/qc-7-embed_and_cluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -226,11 +226,17 @@ make_cl_metadata_cellsets <- function(scdata, config) {
make_cl_metadata_table(cl_metadata, barcode_cell_id_map)

var_types <- detect_variable_types(cl_metadata)
message("Detected cell-level metadata variable types:\n")
str(var_types)


# creates cell-level metadata cellsets, setting the correct type
cl_metadata_cellsets <-
format_cl_metadata_cellsets(cl_metadata, var_types, scdata@misc$color_pool)

message("cell-level metadata cellsets created:\n")
str(cl_metadata_cellsets)

return(cl_metadata_cellsets)
}

Expand Down Expand Up @@ -266,16 +272,21 @@ get_cell_id_barcode_map <- function(scdata) {
#'
#' @return data.table of cell_ids and cell-level metadata
#' @export
make_cl_metadata_table <- function(cl_metadata, barcode_cell_ids) {
make_cl_metadata_table <- function(cl_metadata, barcode_cell_id_map) {

join_cols <- c("barcode")

# remove Seurat sample suffix if present only in one of the tables
if (!all(grepl("_\\d+$", c(barcode_cell_id_map$barcode, cl_metadata$barcode)))) {
barcode_cell_id_map[, barcode := gsub("_\\d+$", "", barcode)]
}

if (!"samples" %in% names(cl_metadata)) {
join_cols <- setdiff(join_cols, "samples")
# remove samples from barcode-cell_id map if not used for join
barcode_cell_ids <- barcode_cell_ids[, -"samples"]
barcode_cell_id_map <- barcode_cell_id_map[, -"samples"]
}

cl_metadata[barcode_cell_ids, , on = join_cols]
cl_metadata[barcode_cell_id_map, ,on = join_cols, nomatch = NULL]
}


Expand Down Expand Up @@ -309,6 +320,27 @@ find_clm_columns <- function(check_vals) {
}


#' Find group columns for cell-level metadata
#'
#' Find columns that can be used to group cells with sample granularity in
#' cell-level metadata. The only requirement is that the column has the same
#' value for all cells in a sample.
#'
#' @param cl_metadata data.table
#'
#' @return vector of column names
#' @export
#'
find_group_columns_cl_metadata <- function(cl_metadata) {

# ignore duplicate_barcode column, manually defined as clm
ndistinct_sample <- get_n_distinct_per_sample(cl_metadata[,-"duplicate_barcode"])
one_per_sample <- apply(ndistinct_sample, 2, function(x) all(x == 1))
group_cols <- names(ndistinct_sample)[one_per_sample]

return(group_cols)
}

#' Detect cell-level metadata variable types
#'
#' detect cell level metadata types
Expand All @@ -326,7 +358,7 @@ detect_variable_types <- function(cl_metadata) {
# can only find group columns when samples are available
if ("samples" %in% names(cl_metadata)) {
# do not remove dups; if a user uploads some metadata it should be there
clm_per_sample_cols <- find_group_columns(cl_metadata, remove.dups = F)
clm_per_sample_cols <- find_group_columns_cl_metadata(cl_metadata)
} else {
clm_per_sample_cols <- character()
}
Expand All @@ -337,7 +369,8 @@ detect_variable_types <- function(cl_metadata) {
# remove samples var, useless from this point on
clm_cols <- grep("^samples$", clm_cols, value = T, invert = T)

clm_cols <- c(clm_cols, "duplicate_barcode")
# duplicate_barcode may be detected as CLMPerSample, manually define it as CLM
clm_cols <- union(clm_cols, "duplicate_barcode")

return(list(CLM = clm_cols, CLMPerSample = clm_per_sample_cols))
}
Expand Down Expand Up @@ -377,14 +410,23 @@ make_cl_metadata_cellclass <- function(variable, type, cl_metadata, color_pool)
cell_ids <- cl_metadata[get(variable) == values[i], cells_id]
}

cl_metadata_cellset$children[[i]] <- list(
key = uuid::UUIDgenerate(),
name = as.character(values[i]),
rootNode = FALSE,
type = type,
color = color_pool[1],
cellIds = ensure_is_list_in_json(cell_ids)
# Empty cell sets shouldn't be created based on cell level metadata
if (length(cell_ids) == 0) next

cl_metadata_cellset$children <- append(
cl_metadata_cellset$children,
list(
list(
key = uuid::UUIDgenerate(),
name = as.character(values[i]),
rootNode = FALSE,
type = type,
color = color_pool[1],
cellIds = ensure_is_list_in_json(cell_ids)
)
)
)

color_pool <- color_pool[-1]
}

Expand Down
26 changes: 26 additions & 0 deletions pipeline-runner/R/seurat-2-load_seurat.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,19 @@ reconstruct_seurat <- function(dataset_fpath) {
names(user_scdata@reductions) <- tolower(names(user_scdata@reductions))
red_name <- SeuratObject::DefaultDimReduc(user_scdata)
check_type_is_safe(red_name)
red_match <- grep("umap|tsne", red_name, value = TRUE)

if (length(red_match) > 0 && !(red_match %in% c("umap", "tsne"))) {
is_umap <- grepl("umap", red_match)
is_tsne <- grepl("tsne", red_match)
new_red_name <- ifelse(is_umap, "umap", ifelse(is_tsne, "tsne", NA))

message("Found reduction name ", red_match," containing ", new_red_name)
user_scdata <- update_reduction_name(user_scdata, red_name, new_red_name)
red_name <- SeuratObject::DefaultDimReduc(user_scdata)
message("Updated default reduction: ", red_name)
}

stopifnot(red_name %in% c('umap', 'tsne'))

embedding <- user_scdata@reductions[[red_name]]@cell.embeddings
Expand Down Expand Up @@ -192,3 +205,16 @@ check_type_is_safe <- function(x) {
stop('Unexpected data type in uploaded .rds file.')
}
}


update_reduction_name <- function(scdata, red_name, new_name) {
current_names <- names(scdata@reductions)
if (new_name %in% current_names) {
message("Renaming existing reduction name ", names(scdata@reductions)[current_names == new_name], " to ", paste0(new_name, ".ori"))
names(scdata@reductions)[current_names == new_name] <- paste0(new_name, ".ori")
}
names(scdata@reductions)[current_names == red_name] <- new_name

return(scdata)
}

14 changes: 10 additions & 4 deletions pipeline-runner/R/seurat-3-upload_seurat_to_aws.R
Original file line number Diff line number Diff line change
Expand Up @@ -171,19 +171,25 @@ add_metadata_to_input <- function(scdata, input) {
return(input)
}

# get column names that are consistent with sample groups
find_group_columns <- function(metadata, remove.dups = TRUE) {

ndistinct_sample <- metadata |>
get_n_distinct_per_sample <- function(metadata) {
metadata |>
dplyr::group_by(samples) |>
dplyr::summarise_all(dplyr::n_distinct) |>
dplyr::select(colnames(metadata))
}


# get column names that are consistent with sample groups
find_group_columns <- function(metadata, remove.dups = TRUE) {

ndistinct_sample <- get_n_distinct_per_sample(metadata)

ndistinct <- metadata |>
dplyr::summarise_all(dplyr::n_distinct)

nsamples <- length(unique(metadata$samples))


# group columns must:
# - have fewer than the number of samples
# - have at least two values
Expand Down
2 changes: 1 addition & 1 deletion pipeline-runner/man/detect_variable_types.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pipeline-runner/man/embed_and_cluster.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions pipeline-runner/man/find_group_columns_cl_metadata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pipeline-runner/man/make_cl_metadata_cellsets.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pipeline-runner/man/make_cl_metadata_table.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 32 additions & 0 deletions pipeline-runner/man/replace_cl_metadata_through_api.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions pipeline-runner/man/sort_cluster_names.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 37 additions & 2 deletions pipeline-runner/tests/testthat/test-qc-7-embed_and_cluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ mock_color_pool <- function(n) {
}


mock_cl_metadata <- function(scdata) {
barcode <- rownames(scdata@meta.data)
mock_cl_metadata <- function(scdata, optional_custom_barcodes) {
barcode <- if (missing(optional_custom_barcodes)) rownames(scdata@meta.data) else optional_custom_barcodes

samples <- rep_len(paste0("sample_", 1:4), length(barcode))
cell_type <- rep_len(paste0("cell_type_", 1:10), length(barcode))
group_var <- rep_len(paste0("group_", 1:2), length(barcode))
Expand Down Expand Up @@ -607,6 +608,23 @@ test_that("make_cl_metadata_cellsets makes cell-level metadata cellsets.", {
}
})

test_that("make_cl_metadata_cellsets skips making cellsets that are empty (no cellIds).", {
config <- mock_config()
scdata <- mock_scdata()
cl_metadata <- mock_cl_metadata(
scdata,
c(paste0("missing-barcode", 1:50))
)

local_mock_cl_metadata_table(cl_metadata, "mock_experiment_id")

res <- stubbed_make_cl_metadata_cellsets(scdata, config)
withr::defer(unlink(file.path(".", basename(config$metadata_s3_path))))

expect_equal(length(res[[1]]$children), 0)
expect_equal(length(res[[2]]$children), 0)
})


with_fake_http(
test_that("replace_cl_metadata_through_api sends patch request", {
Expand Down Expand Up @@ -648,11 +666,28 @@ test_that("make_cl_metadata_table works with duplicate barcodes", {
}
})


test_that("make_cl_metadata_table works when seurat object contains barcode suffix but the uploaded table does not.", {
config <- mock_config()
scdata <- mock_scdata()
cl_metadata <- mock_cl_metadata(scdata)

barcode_cell_id_map <- get_cell_id_barcode_map(scdata)
expected <- make_cl_metadata_table(cl_metadata, barcode_cell_id_map)
barcode_cell_id_map$barcode <- paste0(barcode_cell_id_map$barcode, "_1")

res <- make_cl_metadata_table(cl_metadata, barcode_cell_id_map)

expect_equal(res, expected)
})


test_that("add_duplicate_barcode_column handles empty input correctly", {
empty_cl_metadata <- data.frame(barcode = character(0))
expect_equal(nrow(add_duplicate_barcode_column(empty_cl_metadata)), 0)
})


test_that("add_duplicate_barcode_column handles unique barcodes correctly", {
unique_cl_metadata <- data.frame(barcode = c("BC01", "BC02", "BC03"))
unique_result <- add_duplicate_barcode_column(unique_cl_metadata)
Expand Down
Loading

0 comments on commit b670580

Please sign in to comment.