-
Notifications
You must be signed in to change notification settings - Fork 10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add conditional print method for objects that contain alternative experiments #89
base: master
Are you sure you want to change the base?
Changes from 103 commits
5ac207e
c1db700
5a5b71b
67a48f0
430cdff
5aaba6a
672ef06
af9204b
a8f1bb2
97dfdc9
7f41b60
d4d45c4
ec37354
4344098
bef9d3a
fd1c9bd
5dbdd24
8dbb158
6a9d4f6
41b2844
1e93081
65bda14
b4e3665
79c2001
6bd2829
a68f140
b6334a0
f860dd5
a3596a1
06ff606
6c9d669
263d481
86ce152
3b94f11
5859139
fb08e11
5287d24
b8bb0a4
33c8ab5
0d8f2ce
6d1b241
da5545d
1024f9e
d31fd7a
d018a60
5956947
d3be263
ab81fb0
eafe321
e39791f
0cea6fe
3fa4f8d
3d3db34
7091049
2d7673a
e64de6e
19c65f6
e97ee07
4fa5053
ca7b27b
203e1f2
8fe1ee2
6493ad9
5a64c2d
ca20f90
ed0400e
376b260
1c8a03d
7c774bf
a2cf4ab
8df074a
8965104
fe2ca4e
e678767
5dfc7be
2050f5d
2b908a2
8e0501c
830529f
d89e38f
2600e46
3272e66
347d0cf
4f6f1cb
fc4351b
a7dd8af
8d46133
608d85b
9ea6dce
4bf0c59
93e517d
2a4ea90
34047d3
5c62b25
a7f2046
4797b81
2fb771a
cd53a86
a8dc2a8
6df2a39
61c18d7
f400720
d1e9ebc
94fc572
de152a3
4107221
c7f84c4
5b82e36
e363132
5002180
98a5584
a592c71
06aa1ec
8776694
462a9b5
aa97269
767b2b5
8c0e87a
8aede5a
858828d
550c3b4
b5c5f43
c68e190
961e442
d26e02a
e269004
7013e1c
a641911
2ad486a
4be74fc
3fe42a9
443fda8
124ddd6
27b7623
0b2d328
5593de0
86734ba
6e9a84a
e1624fe
faf5fe0
222aeea
d1ce30f
ec24c7a
ace24f7
7ea1982
6ee25e3
aac9c73
27059cc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,23 +41,25 @@ setMethod("join_features", "SingleCellExperiment", function(.data, | |
# CRAN Note | ||
.cell <- NULL | ||
.feature <- NULL | ||
|
||
arg_list <- c(mget(ls(environment(), sorted=F)), match.call(expand.dots=F)$...) | ||
all_assays <- get_all_assays(.data)$assay_id | ||
if(is.null(arg_list$assays)) assays_from_join_call <- all_assays | ||
# Shape is long | ||
if (shape == "long") { | ||
|
||
# Suppress generic data frame creation message produced by left_join | ||
suppressMessages({ | ||
.data <- | ||
.data %>% | ||
left_join( | ||
by=c_(.data)$name, | ||
get_abundance_sc_long( | ||
.data=.data, | ||
features=features, | ||
all=all, | ||
exclude_zeros=exclude_zeros)) %>% | ||
select(!!c_(.data)$symbol, .feature, | ||
contains(".abundance"), everything()) | ||
.data <- | ||
.data %>% | ||
left_join( | ||
by=c_(.data)$name, | ||
get_abundance_sc_long( | ||
.data=.data, | ||
features=features, | ||
all=all, | ||
exclude_zeros=exclude_zeros, | ||
...)) %>% | ||
select(!!c_(.data)$symbol, .feature, | ||
contains(".abundance"), everything()) | ||
}) | ||
|
||
# Provide data frame creation and abundance column message | ||
|
@@ -79,14 +81,16 @@ setMethod("join_features", "SingleCellExperiment", function(.data, | |
.data | ||
|
||
# Shape if wide | ||
} else { | ||
} else if (shape == "wide"){ | ||
if(is.null(arg_list$assays)) stop("Please provide assays") | ||
.data %>% | ||
left_join( | ||
by=c_(.data)$name, | ||
get_abundance_sc_wide( | ||
.data=.data, | ||
features=features, | ||
all=all, ...)) | ||
all=all, | ||
...)) | ||
} | ||
}) | ||
|
||
|
@@ -138,86 +142,158 @@ tidy.SingleCellExperiment <- function(object) { | |
#' @importFrom SummarizedExperiment assays assays<- assayNames | ||
#' @importFrom S4Vectors split | ||
#' @importFrom stringr str_remove | ||
#' @importFrom dplyr group_split | ||
#' @importFrom dplyr full_join | ||
#' @importFrom dplyr left_join | ||
#' @importFrom dplyr group_by | ||
#' @importFrom dplyr pick | ||
#' @importFrom dplyr group_rows | ||
#' @importFrom dplyr group_keys | ||
#' @importFrom dplyr bind_rows | ||
#' @importFrom dplyr pull | ||
#' @importFrom tidyr unite | ||
#' @importFrom tidyr separate | ||
#' @importFrom purrr reduce | ||
#' @importFrom purrr map | ||
#' @importFrom purrr set_names | ||
#' @importFrom purrr list_transpose | ||
#' | ||
#' | ||
#' @export | ||
setMethod("aggregate_cells", "SingleCellExperiment", function(.data, | ||
.sample=NULL, slot="data", assays=NULL, | ||
aggregation_function=Matrix::rowSums, | ||
...) { | ||
|
||
# Fix NOTEs | ||
feature <- NULL | ||
.sample <- enquo(.sample) | ||
|
||
# Subset only wanted assays | ||
if (!is.null(assays)) { | ||
assays(.data) <- assays(.data)[assays] | ||
} | ||
setMethod("aggregate_cells", "SingleCellExperiment", function(.data, | ||
.sample = NULL, assays = NULL, | ||
aggregation_function = Matrix::rowSums, | ||
...) { | ||
# Fix NOTEs | ||
feature <- NULL | ||
.sample <- enquo(.sample) | ||
|
||
arg_list <- c(mget(ls(environment(), sorted = F)), match.call(expand.dots = F)$...) | ||
assays_to_use <- eval(arg_list$assays) | ||
if (is.null(assays_to_use)) assays_to_use <- tail(names(assays(.data)), n = 1) | ||
|
||
sample_groups <- .data |> | ||
as_tibble() |> | ||
group_by(pick({{ .sample }})) | ||
|
||
sample_group_idx <- sample_groups |> | ||
group_rows() | ||
|
||
sample_group_keys <- sample_groups |> | ||
group_keys() | ||
|
||
grouping_factor = | ||
.data |> | ||
colData() |> | ||
as_tibble() |> | ||
select(!!.sample) |> | ||
suppressMessages() |> | ||
unite("my_id_to_split_by___", !!.sample, sep = "___") |> | ||
pull(my_id_to_split_by___) |> | ||
as.factor() | ||
|
||
list_count_cells = table(grouping_factor) |> as.list() | ||
|
||
# New method | ||
list_assays = | ||
.data |> | ||
assays() |> | ||
as.list() |> | ||
map(~ .x |> splitColData(grouping_factor)) |> | ||
unlist(recursive=FALSE) | ||
|
||
list_assays = | ||
list_assays |> | ||
map2(names(list_assays), ~ { | ||
# Get counts | ||
.x %>% | ||
aggregation_function(na.rm=TRUE) %>% | ||
enframe( | ||
name =".feature", | ||
value="x") %>% # sprintf("%s", .y)) %>% | ||
|
||
# In case we don't have rownames | ||
mutate(.feature=as.character(.feature)) | ||
}) |> | ||
enframe(name = ".sample") |> | ||
|
||
# Clean groups | ||
mutate(assay_name = assayNames(!!.data) |> rep(each=length(levels(grouping_factor)))) |> | ||
mutate(.sample = .sample |> str_remove(assay_name) |> str_remove("\\.")) |> | ||
group_split(.sample) |> | ||
map(~ .x |> unnest(value) |> pivot_wider(names_from = assay_name, values_from = x) ) |> | ||
|
||
# Add cell count | ||
map2( | ||
list_count_cells, | ||
~ .x |> mutate(.aggregated_cells = .y) | ||
) | ||
|
||
|
||
do.call(rbind, list_assays) |> | ||
|
||
left_join( | ||
.data |> | ||
colData() |> | ||
as_tibble() |> | ||
subset(!!.sample) |> | ||
unite("my_id_to_split_by___", !!.sample, remove=FALSE, sep = "___"), | ||
by= join_by(".sample" == "my_id_to_split_by___") | ||
) |> | ||
|
||
as_SummarizedExperiment( | ||
.sample=.sample, | ||
.transcript=.feature, | ||
.abundance=!!as.symbol(names(.data@assays))) | ||
.sample_names <- colnames(sample_group_keys) | ||
|
||
grouping_factor_names <- sample_group_keys |> | ||
unite(col = "grouping_factor", !!.sample, sep = "___") |> | ||
pull(grouping_factor) | ||
|
||
sce_split <- map(.x = seq_along(sample_group_idx), .f = \(.num) .data[, sample_group_idx[[.num]]]) |> | ||
purrr::set_names(grouping_factor_names) | ||
|
||
grouping_factor <- | ||
.data |> | ||
colData() |> | ||
as_tibble() |> | ||
select(!!.sample) |> | ||
suppressMessages() |> | ||
unite("my_id_to_split_by___", !!.sample, sep = "___") |> | ||
pull(my_id_to_split_by___) |> | ||
as.factor() | ||
|
||
list_count_cells <- table(grouping_factor) |> | ||
enframe(name = "grouping_factor", value = ".aggregated_cells") |> | ||
mutate(.aggregated_cells = as.integer(.aggregated_cells)) | ||
|
||
feature_df <- get_all_features(.data) | ||
selected_features <- feature_df[feature_df$assay_id %in% assays_to_use, ] | ||
selected_experiments_list <- split(x = selected_features, f = as.character(selected_features$exp_id)) | ||
if ("Main" %in% names(selected_experiments_list)) selected_experiments_list <- selected_experiments_list[c("Main", setdiff(names(selected_experiments_list), "Main"))] | ||
|
||
aggregate_assays_fun <- function(exp) { | ||
selected_exp <- unique(exp$exp_id) | ||
selected_assays <- exp |> distinct(assay_name, .keep_all = TRUE) | ||
if (selected_exp == "Main") { | ||
aggregate_sce_fun <- function(sce) { | ||
aggregated_vals <- assays(sce)[selected_assays$assay_name] |> | ||
as.list() |> | ||
map(.f = \(.list) aggregation_function(.list)) | ||
map(.x = seq_along(aggregated_vals), \(.num) enframe(x = aggregated_vals[[.num]], name = ".feature", value = selected_assays$assay_id[[.num]])) |> | ||
suppressMessages(reduce(full_join)) | ||
} | ||
aggregated_list <- lapply(sce_split, aggregate_sce_fun) |> | ||
purrr::list_transpose() |> | ||
map(.f = \(.list) .list |> bind_rows(.id = "grouping_factor")) | ||
interim_res <- map(.x = seq_along(aggregated_list), .f = \(.num) aggregated_list[[.num]] |> | ||
separate(col = grouping_factor, into = .sample_names, sep = "___")) |> | ||
purrr::set_names(nm = selected_exp) | ||
map(.x = seq_along(interim_res), .f = \(.num) interim_res[[.num]] |> mutate(assay_type = names(interim_res)[[.num]])) |> | ||
purrr::reduce(full_join) |> | ||
mutate(assay_type = ifelse(assay_type == "Main", yes = "RNA", no = assay_type)) |> | ||
select(assay_type, everything()) | ||
} else { | ||
aggregate_sce_fun <- function(sce) { | ||
aggregated_vals <- assays(altExps(sce)[[selected_exp]])[selected_assays$assay_name] |> | ||
as.list() |> | ||
set_names(selected_assays$assay_id) |> | ||
map(.f = \(.list) aggregation_function(.list)) | ||
map(.x = seq_along(aggregated_vals), \(.num) enframe(x = aggregated_vals[[.num]], name = ".feature", value = selected_assays$assay_id[[.num]])) |> | ||
suppressMessages(reduce(full_join)) | ||
} | ||
aggregated_list <- lapply(sce_split, aggregate_sce_fun) |> | ||
list_transpose() |> | ||
map(.f = \(.list) .list |> bind_rows(.id = "grouping_factor")) | ||
interim_res <- map(.x = seq_along(aggregated_list), .f = \(.num) aggregated_list[[.num]] |> | ||
separate(col = grouping_factor, into = .sample_names, sep = "___")) |> | ||
purrr::set_names(nm = selected_exp) | ||
map(.x = seq_along(interim_res), .f = \(.num) interim_res[[.num]] |> | ||
mutate(assay_type = names(interim_res)[[.num]])) |> | ||
purrr::reduce(full_join) |> | ||
mutate(assay_type = ifelse(assay_type == "Main", yes = "RNA", no = assay_type)) |> | ||
select(assay_type, everything()) | ||
} | ||
} | ||
se <- lapply(selected_experiments_list, aggregate_assays_fun) |> | ||
purrr::reduce(full_join) |> | ||
suppressMessages() | ||
|
||
if(se |> | ||
distinct(assay_type, .feature) |> | ||
pull(.feature) |> | ||
duplicated() |> | ||
any()) { | ||
warning("tidySingleCellExperiment says: The selected assays have overlapping feature names. The feature names have been combined with the selected assay_type, to keep the rownames of the SingleCellExperiment unique. You can find the original feature names in the orig.feature.names column of the rowData slot of your object.") | ||
orig_features <- se |> | ||
distinct(assay_type, .feature) | ||
dup_features <- orig_features |> | ||
filter(duplicated(.feature)) |> | ||
pull(.feature) | ||
se <- se |> | ||
mutate(.feature = case_when(.feature %in% dup_features ~ str_c(assay_type, .feature, sep = ".."), .default = .feature)) | ||
} | ||
|
||
se <- se |> | ||
tidybulk::as_SummarizedExperiment( | ||
.sample = .sample_names, | ||
.transcript = .feature, | ||
.abundance = setdiff(colnames(se), c("assay_type", .sample_names, ".feature"))) | ||
if(exists("assay_type", where = as.data.frame(colData(se)))) { | ||
rowData(se) <- rownames(se) |> | ||
enframe(name = NULL, value = "rowname") |> | ||
mutate(assay_type = unique(colData(se)$assay_type)) |> | ||
tibble::column_to_rownames() |> | ||
as.data.frame() |> | ||
as(Class = "DataFrame") | ||
colData(se)$assay_type <- NULL | ||
} | ||
if(rownames(se) |> grep(pattern = "\\.\\.") |> any()) { | ||
rowData(se) <- rowData(se) |> | ||
as.data.frame() |> | ||
rownames_to_column() |> | ||
mutate(orig.feature.names = rowname, | ||
orig.feature.names = str_remove_all(string = orig.feature.names, pattern = ".+(?=\\.\\.)"), | ||
orig.feature.names = str_remove_all(string = orig.feature.names, pattern = "^\\..")) |> | ||
column_to_rownames() |> | ||
as(Class = "DataFrame") | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add what this function is doing and why, in a comment above it? As a general comment, please comment on all your code blocks, so a developer in 5-10 years can navigate the code without having to interpret it. 🙏 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have now added plenty of comments to the method. Hopefully it should be clear what each code chunk is doing but do let me know if anything needs clarifying. I will also add descriptions to the other functions I've tweaked to clarify (e.g. join_features). |
||
return(se) | ||
}) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Amazing we are close.
Please let's stick with the tidyomics style
feature_original
(no dots in names, no abbreviations).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point. Done.