diff --git a/DESCRIPTION b/DESCRIPTION index b7bee05..bfeb39f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: signals Title: Single Cell Genomes with Allele Specificity -Version: 0.9.1 +Version: 0.10.0 Author@R: c(person("Marc", "Williams", email = "marcjwilliams1@gmail.com", role = c("aut", "cre")), person("Tyler", "Funnell", diff --git a/NEWS.md b/NEWS.md index 441cb69..1554d97 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# signals 0.10.0 + +* add chr string check +* remove acrocentric chromosomes in arm consensus +* minor changes to heatmap and plotting + # signals 0.9.1 * Update docker to install suggests packages diff --git a/R/callASCN.R b/R/callASCN.R index 9318813..7551ce4 100644 --- a/R/callASCN.R +++ b/R/callASCN.R @@ -232,6 +232,12 @@ callAlleleSpecificCN <- function(CNbins, maxCN <- max(CNbins$state) } + if (any(grepl("chr", CNbins$chr))){ + message("Removing chr string from chr column") + CNbins$chr <- sub("chr", "", CNbins$chr) + haplotypes$chr <- sub("chr", "", haplotypes$chr) + } + if (filterhaplotypes){ haplotypes <- filter_haplotypes(haplotypes, filterhaplotypes) } @@ -457,6 +463,11 @@ callAlleleSpecificCNfromHSCN <- function(hscn, CNbins <- hscn$data %>% dplyr::select(cell_id, chr, start, end, state, copy) + if (any(grepl("chr", CNbins$chr))){ + message("Removing chr string from chr column") + CNbins$chr <- sub("chr", "", CNbins$chr) + } + infloherror <- hscn$loherror CNBAF <- switch_alleles(hscn$data) diff --git a/R/callHSCN.R b/R/callHSCN.R index 3151252..baffbdd 100644 --- a/R/callHSCN.R +++ b/R/callHSCN.R @@ -714,6 +714,12 @@ callHaplotypeSpecificCN <- function(CNbins, maxCN <- max(CNbins$state) } + if (any(grepl("chr", CNbins$chr))){ + message("Removing chr string from chr column") + CNbins$chr <- sub("chr", "", CNbins$chr) + haplotypes$chr <- sub("chr", "", haplotypes$chr) + } + nhaplotypes <- haplotypes %>% dplyr::group_by(cell_id) %>% dplyr::summarize(n = sum(totalcounts)) %>% diff --git a/R/heatmap_plot.R b/R/heatmap_plot.R index 79ec8b5..4702e87 100644 --- a/R/heatmap_plot.R +++ b/R/heatmap_plot.R @@ -638,6 +638,9 @@ make_top_annotation_gain <- function(copynumber, plotfrequency = FALSE, cutoff = NULL, maxf = NULL, + frequency_height = 1.4, + sv_height = 0.7, + annofontsize = 10, SV = NULL) { ncells <- nrow(copynumber) @@ -660,7 +663,8 @@ make_top_annotation_gain <- function(copynumber, gp = grid::gpar(col = "#E34A33", fill = "#E34A33"), axis_param = list( at = c(round(maxf / 2, 2), maxf), - labels = c(paste0(round(maxf / 2, 2)), paste0(maxf)) + labels = c(paste0(round(maxf / 2, 2)), paste0(maxf)), + gp = grid::gpar(fontsize = annofontsize-1) ), ylim = c(0, maxf), border = FALSE, @@ -671,13 +675,14 @@ make_top_annotation_gain <- function(copynumber, gp = grid::gpar(col = "#3182BD", fill = "#3182BD"), axis_param = list( at = c(0.0, -round(maxf / 2, 2), -maxf), - labels = c("0", paste0(round(maxf / 2, 2)), paste0(maxf)) + labels = c("0", paste0(round(maxf / 2, 2)), paste0(maxf)), + gp = grid::gpar(fontsize = annofontsize-1) ), ylim = c(-maxf, 0), border = FALSE, ), show_annotation_name = FALSE, - height = grid::unit(1.4, "cm") + height = grid::unit(frequency_height, "cm") ) } else if (plotcol == "state_phase" & plotfrequency == TRUE) { f1a <- colSums(apply(copynumber, 2, function(x) grepl("A-Gained", x))) / ncells @@ -699,11 +704,11 @@ make_top_annotation_gain <- function(copynumber, bar_width = 1, gp = grid::gpar( col = c(scCNphase_colors["A-Gained"], scCNphase_colors["A-Hom"]), - fill = c(scCNphase_colors["A-Gained"], scCNphase_colors["A-Hom"]) - ), + fill = c(scCNphase_colors["A-Gained"], scCNphase_colors["A-Hom"])), axis_param = list( at = c(round(maxf / 2, 2), maxf), - labels = c(paste0(round(maxf / 2, 2)), paste0(maxf)) + labels = c(paste0(round(maxf / 2, 2)), paste0(maxf)), + gp = grid::gpar(fontsize = annofontsize-1) ), ylim = c(0, maxf), border = FALSE, @@ -713,17 +718,17 @@ make_top_annotation_gain <- function(copynumber, bar_width = 1, gp = grid::gpar( col = c(scCNphase_colors["B-Gained"], scCNphase_colors["B-Hom"]), - fill = c(scCNphase_colors["B-Gained"], scCNphase_colors["B-Hom"]) - ), + fill = c(scCNphase_colors["B-Gained"], scCNphase_colors["B-Hom"])), axis_param = list( at = c(0, -round(maxf / 2, 2), -maxf), - labels = c("0", paste0(round(maxf / 2, 2)), paste0(maxf)) + labels = c("0", paste0(round(maxf / 2, 2)), paste0(maxf)), + gp = grid::gpar(fontsize = annofontsize-1) ), ylim = c(-maxf, 0), border = FALSE, ), show_annotation_name = FALSE, - height = grid::unit(1.4, "cm") + height = grid::unit(frequency_height, "cm") ) } else if ((plotcol == "state_BAF" | plotcol == "BAF") & plotfrequency == TRUE) { @@ -742,7 +747,8 @@ make_top_annotation_gain <- function(copynumber, gp = grid::gpar(col = scCNphase_colors["A-Hom"], fill = scCNphase_colors["A-Hom"]), axis_param = list( at = c(round(maxf / 2, 2), maxf), - labels = c(paste0(round(maxf / 2, 2)), paste0(maxf)) + labels = c(paste0(round(maxf / 2, 2)), paste0(maxf)), + gp = grid::gpar(fontsize = annofontsize-1) ), ylim = c(0, maxf), border = FALSE, @@ -753,13 +759,14 @@ make_top_annotation_gain <- function(copynumber, gp = grid::gpar(col = scCNphase_colors["B-Hom"], fill = scCNphase_colors["B-Hom"]), axis_param = list( at = c(0.0, -round(maxf / 2, 2), -maxf), - labels = c("0", paste0(round(maxf / 2, 2)), paste0(maxf)) + labels = c("0", paste0(round(maxf / 2, 2)), paste0(maxf)), + gp = grid::gpar(fontsize = annofontsize-1) ), ylim = c(-maxf, 0), border = FALSE, ), show_annotation_name = FALSE, - height = grid::unit(1.4, "cm") + height = grid::unit(frequency_height, "cm") ) } else { @@ -781,7 +788,7 @@ make_top_annotation_gain <- function(copynumber, ), which = "column", show_annotation_name = TRUE, - height = grid::unit(0.7, "cm") + height = grid::unit(sv_height, "cm") ) } @@ -799,6 +806,7 @@ make_copynumber_heatmap <- function(copynumber, maxf = 1.0, plotcol = "state", plotfrequency = FALSE, + frequency_height = 1.4, show_legend = TRUE, show_library_label = TRUE, show_clone_label = TRUE, @@ -855,7 +863,9 @@ make_copynumber_heatmap <- function(copynumber, heatmap_legend_param = leg_params, top_annotation = make_top_annotation_gain(copynumber, cutoff = cutoff, maxf = maxf, - plotfrequency = plotfrequency, plotcol = plotcol, SV = SV + plotfrequency = plotfrequency, plotcol = plotcol, SV = SV, + frequency_height = frequency_height, + annofontsize = annofontsize ), use_raster = TRUE, raster_quality = rasterquality, @@ -897,6 +907,7 @@ getSVlegend <- function(include = NULL) { #' @param frequencycutoff default = 2 #' @param maxf Max frequency when plotting the frequency track, default = NULL infers this from the data #' @param plotfrequency Plot the frequency track of gains and losses across the genome +#' @param frequency_height height of the frequency track if using, default = 1.4 #' @param show_legend plot legend or not, boolean #' @param show_library_label show library label or not, boolean #' @param show_clone_label show clone label or not, boolean @@ -950,6 +961,7 @@ plotHeatmap <- function(cn, frequencycutoff = 2, maxf = NULL, plotfrequency = FALSE, + frequency_height = 1.4, show_legend = TRUE, show_library_label = TRUE, show_clone_label = TRUE, @@ -1196,6 +1208,7 @@ plotHeatmap <- function(cn, maxf = maxf, plotcol = plotcol, plotfrequency = plotfrequency, + frequency_height = frequency_height, show_legend = show_legend, show_library_label = show_library_label, show_clone_label = show_clone_label, diff --git a/R/plotting.R b/R/plotting.R index 96c0ade..d4fcfbd 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -576,7 +576,6 @@ get_bezier_df <- function(sv, cn, maxCN, homolog = FALSE) { #' @param positionticks set to TRUE to use position ticks rather than chromosome ticks #' @param genome genome to use, default = "hg19" (only used for ideogram) #' @param ideogram plot ideogram at the top, default = TRUE -#' @param ideogram_height height of the ideogram #' #' @return ggplot2 plot #' @@ -869,6 +868,8 @@ plotCNprofileBAFhomolog <- function(cn, chrend = NULL, shape = 16, positionticks = FALSE, + ideogram = FALSE, + genome = "hg19", ...) { if (!xaxis_order %in% c("bin", "genome_position")) { stop("xaxis_order must be either 'bin' or 'genome_position'") @@ -902,6 +903,12 @@ plotCNprofileBAFhomolog <- function(cn, if (!"BAF" %in% names(CNbins)) { stop("No BAF column in dataframe, first calculate the BAF per bin using combineBAFCN and then callAlleleSpecificCN") } + + if (ideogram == TRUE){ + miny <- -0.5 + } else{ + miny <- 0 + } statecolpal <- scCNstate_cols() @@ -954,7 +961,7 @@ plotCNprofileBAFhomolog <- function(cn, legend.position = "none" ) + ggplot2::scale_x_continuous(breaks = pl$chrticks, labels = pl$chrlabels, expand = c(0, 0), limits = c(pl$minidx, pl$maxidx), guide = ggplot2::guide_axis(check.overlap = TRUE)) + - ggplot2::scale_y_continuous(breaks = ybreaks, limits = c(0, maxCN), trans = y_axis_trans) + + ggplot2::scale_y_continuous(breaks = ybreaks, limits = c(miny, maxCN), trans = y_axis_trans) + ggplot2::xlab(xlab) + ggplot2::ylab("Copy Number") + cowplot::theme_cowplot(...) + @@ -984,7 +991,7 @@ plotCNprofileBAFhomolog <- function(cn, legend.position = "none" ) + ggplot2::scale_x_continuous(breaks = pl$chrticks, labels = pl$chrlabels, expand = c(0, 0), limits = c(pl$minidx, pl$maxidx), guide = ggplot2::guide_axis(check.overlap = TRUE)) + - ggplot2::scale_y_continuous(breaks = ybreaks, limits = c(0, maxCN), trans = y_axis_trans) + + ggplot2::scale_y_continuous(breaks = ybreaks, limits = c(miny, maxCN), trans = y_axis_trans) + ggplot2::xlab(xlab) + ggplot2::ylab("Copy Number") + cowplot::theme_cowplot(...) + @@ -1045,7 +1052,7 @@ plotCNprofileBAFhomolog <- function(cn, legend.position = "none" ) + ggplot2::scale_x_continuous(breaks = pl$chrticks, labels = pl$chrlabels, expand = c(0, 0), limits = c(pl$minidx, pl$maxidx), guide = ggplot2::guide_axis(check.overlap = TRUE)) + - ggplot2::scale_y_continuous(breaks = ybreaks, limits = c(0 - offset, maxCN + offset), trans = y_axis_trans) + + ggplot2::scale_y_continuous(breaks = ybreaks, limits = c(miny - offset, maxCN + offset), trans = y_axis_trans) + ggplot2::xlab(xlab) + ggplot2::ylab("Copy Number") + cowplot::theme_cowplot(...) + @@ -1121,6 +1128,40 @@ plotCNprofileBAFhomolog <- function(cn, gCN <- gCN + ggplot2::geom_vline(data = datidx, ggplot2::aes(xintercept = idx), lty = annotateregions_linetype, size = 0.3, alpha = 0.5) } + + if (ideogram == TRUE){ + binsize <- pl$CNbins$end[1] - pl$CNbins$start[1] + 1 + ideogram_dat <- cytoband_map[[genome]] + names(ideogram_dat) <- c("chr", "start", "end", "band", "colval") + ideogram_dat <- ideogram_dat %>% + dplyr::mutate(chr = stringr::str_remove(chr, "chr")) %>% + dplyr::mutate(start = round(start / binsize) * binsize + 1, + end = round(end / binsize) * binsize + 1) + + #create a dataframe that has the index of the start and end position + cnbin_idx_start <- pl$bins %>% + dplyr::select(chr, start, idx) %>% + dplyr::rename(idx_start = idx) + cnbin_idx_end <- pl$bins %>% + dplyr::select(chr, start, idx) %>% + dplyr::rename(end = start) %>% + dplyr::rename(idx_end = idx) + ideogram_dat <- dplyr::inner_join(ideogram_dat, + cnbin_idx_start, by = c("chr", "start")) %>% + dplyr::inner_join(cnbin_idx_end, by = c("chr", "end")) + + gCN <- gCN + + ggplot2::geom_rect(data = ideogram_dat, + ggplot2::aes(xmin = idx_start, + y = NULL, + x = NULL, + xmax = idx_end, + ymin = -0.5, + ymax = -0.15, fill = colval)) + + ggplot2::scale_fill_manual(values = cyto_colors) + + ggplot2::theme(legend.position = "none") + + } return(gCN) } @@ -1155,6 +1196,8 @@ plotCNprofileBAFhomolog <- function(cn, #' @param positionticks set to TRUE to use position ticks rather than chromosome ticks #' @param BAFcol state to use to colour BAF track, default = `state_phase` #' @param my_title string to use for title, if NULL cell_id is shown +#' @param ideogram plot ideogram at the top, default = TRUE +#' @param genome genome to use, default = "hg19" (only used for ideogram) #' #' #' @return ggplot2 plot @@ -1200,7 +1243,9 @@ plotCNprofileBAF <- function(cn, chrstart = NULL, chrend = NULL, shape = 16, + ideogram = FALSE, positionticks = FALSE, + genome = "hg19", ...) { if (homolog == TRUE) { ghomolog <- plotCNprofileBAFhomolog(cn, @@ -1229,6 +1274,8 @@ plotCNprofileBAF <- function(cn, chrend = chrend, shape = shape, positionticks = positionticks, + ideogram = ideogram, + genome = genome, ... ) return(ghomolog) @@ -1279,6 +1326,12 @@ plotCNprofileBAF <- function(cn, if (BAFcol == "state_AS") { BAFcolpal <- scCNAS_cols() } + + if (ideogram == TRUE){ + miny <- -0.5 + } else{ + miny <- 0 + } statecolpal <- scCNstate_cols() @@ -1354,7 +1407,7 @@ plotCNprofileBAF <- function(cn, legend.position = "none" ) + ggplot2::scale_x_continuous(breaks = pl$chrticks, labels = pl$chrlabels, expand = c(0, 0), limits = c(pl$minidx, pl$maxidx), guide = ggplot2::guide_axis(check.overlap = TRUE)) + - ggplot2::scale_y_continuous(breaks = ybreaks, limits = c(0, maxCN), trans = y_axis_trans) + + ggplot2::scale_y_continuous(breaks = ybreaks, limits = c(miny, maxCN), trans = y_axis_trans) + ggplot2::xlab(xlab) + ggplot2::ylab("Copy Number") + cowplot::theme_cowplot(...) + @@ -1418,7 +1471,7 @@ plotCNprofileBAF <- function(cn, legend.position = "none" ) + ggplot2::scale_x_continuous(breaks = pl$chrticks, labels = pl$chrlabels, expand = c(0, 0), limits = c(pl$minidx, pl$maxidx), guide = ggplot2::guide_axis(check.overlap = TRUE)) + - ggplot2::scale_y_continuous(breaks = ybreaks, limits = c(0, maxCN), trans = y_axis_trans) + + ggplot2::scale_y_continuous(breaks = ybreaks, limits = c(miny, maxCN), trans = y_axis_trans) + ggplot2::xlab(xlab) + ggplot2::ylab("Copy Number") + cowplot::theme_cowplot(...) + @@ -1467,6 +1520,40 @@ plotCNprofileBAF <- function(cn, gCN <- gCN + ggplot2::geom_vline(data = datidx, ggplot2::aes(xintercept = idx), lty = annotateregions_linetype, size = 0.3, alpha = 0.5) } + + if (ideogram == TRUE){ + binsize <- pl$CNbins$end[1] - pl$CNbins$start[1] + 1 + ideogram_dat <- cytoband_map[[genome]] + names(ideogram_dat) <- c("chr", "start", "end", "band", "colval") + ideogram_dat <- ideogram_dat %>% + dplyr::mutate(chr = stringr::str_remove(chr, "chr")) %>% + dplyr::mutate(start = round(start / binsize) * binsize + 1, + end = round(end / binsize) * binsize + 1) + + #create a dataframe that has the index of the start and end position + cnbin_idx_start <- pl$bins %>% + dplyr::select(chr, start, idx) %>% + dplyr::rename(idx_start = idx) + cnbin_idx_end <- pl$bins %>% + dplyr::select(chr, start, idx) %>% + dplyr::rename(end = start) %>% + dplyr::rename(idx_end = idx) + ideogram_dat <- dplyr::inner_join(ideogram_dat, + cnbin_idx_start, by = c("chr", "start")) %>% + dplyr::inner_join(cnbin_idx_end, by = c("chr", "end")) + + gCN <- gCN + + ggplot2::geom_rect(data = ideogram_dat, + ggplot2::aes(xmin = idx_start, + y = NULL, + x = NULL, + xmax = idx_end, + ymin = -0.5, + ymax = -0.15, fill = colval)) + + ggplot2::scale_fill_manual(values = cyto_colors) + + ggplot2::theme(legend.position = "none") + + } g <- cowplot::plot_grid(gBAF, gCN, align = "v", ncol = 1, rel_heights = c(1, 1.2)) diff --git a/R/util.R b/R/util.R index 1b2953c..5b3ed2a 100644 --- a/R/util.R +++ b/R/util.R @@ -840,13 +840,14 @@ per_chrarm_cn <- function(hscn, arms = NULL) { dplyr::mutate(arm = coord_to_arm(chr, start, mergesmallarms = FALSE)) %>% dplyr::mutate(chrarm = paste0(chr, arm)) %>% as.data.table() %>% + filter(!chrarm %in% c("13p", "14p", "15p", "21p", "22p", "Yp")) %>% #remove acrocentric chromosomes .[, list( state = as.double(round(median(state, na.rm = TRUE))), copy = as.double(median(copy, na.rm = TRUE)), - A = as.double(floor(median(A))), - alleleA = sum(alleleA), - alleleB = sum(alleleB), - totalcounts = sum(totalcounts), + A = as.double(floor(median(A, na.rm = TRUE))), + alleleA = sum(alleleA, na.rm = TRUE), + alleleB = sum(alleleB, na.rm = TRUE), + totalcounts = sum(totalcounts, na.rm = TRUE), state_sd = sd(state, na.rm = TRUE), proportion = sum(state_AS_phased == Mode(state_AS_phased)) / .N ), by = c("chr", "arm", "chrarm", "cell_id")] %>% @@ -859,14 +860,15 @@ per_chrarm_cn <- function(hscn, arms = NULL) { dplyr::mutate(chrarm = paste0(chr, arm)) %>% dplyr::mutate(arm = ifelse(chrarm %in% arms, arm, "")) %>% dplyr::mutate(chrarm = paste0(chr, arm)) %>% + filter(!chrarm %in% c("13p", "14p", "15p", "21p", "22p", "Yp")) %>% as.data.table() %>% .[, list( state = as.double(round(median(state, na.rm = TRUE))), copy = as.double(median(copy, na.rm = TRUE)), - A = as.double(floor(median(A))), - alleleA = sum(alleleA), - alleleB = sum(alleleB), - totalcounts = sum(totalcounts), + A = as.double(floor(median(A, na.rm = TRUE))), + alleleA = sum(alleleA), na.rm = TRUE, + alleleB = sum(alleleB, na.rm = TRUE), + totalcounts = sum(totalcounts, na.rm = TRUE), state_sd = sd(state, na.rm = TRUE), proportion = sum(state_AS_phased == Mode(state_AS_phased)) / .N ), by = c("chr", "arm", "chrarm", "cell_id")] %>% @@ -928,10 +930,10 @@ per_chr_cn <- function(hscn, arms = NULL) { .[, list( state = as.double(round(median(state, na.rm = TRUE))), copy = as.double(median(copy, na.rm = TRUE)), - A = as.double(floor(median(A))), - alleleA = sum(alleleA), - alleleB = sum(alleleB), - totalcounts = sum(totalcounts), + A = as.double(floor(median(A, na.rm = TRUE))), + alleleA = sum(alleleA, na.rm = TRUE), + alleleB = sum(alleleB, na.rm = TRUE), + totalcounts = sum(totalcounts, na.rm = TRUE), state_sd = sd(state, na.rm = TRUE), proportion = sum(state_AS_phased == Mode(state_AS_phased)) / .N ), by = c("chr", "cell_id")] %>% @@ -1052,10 +1054,12 @@ createBAFassay <- function(seur, rna_ascn, ref = "hg19") { dplyr::filter(dplyr::row_number() == 1) %>% #hack, take first chr if there are > 1 as.data.frame() %>% dplyr::mutate(arm = paste0(chr, coord_to_arm(chr, start, assembly = ref))) - row.names(meta) <- meta$ensembl_gene_symbol - meta <- meta[stringr::str_remove(rownames(seur[["gBAF"]]), "BAF-"), ] + row.names(meta) <- paste0("BAF-", meta$ensembl_gene_symbol) + meta$rownames <- row.names(meta) meta <- merge(meta, snps_to_genes[, list(totalsnpcounts = sum(totalcounts)), by = "ensembl_gene_symbol"]) - row.names(meta) <- meta$ensembl_gene_symbol + row.names(meta) <- meta$rownames + meta<- subset(meta, select = -c(rownames)) + meta <- meta[row.names(seur[["gBAF"]]@data), ] seur[["gBAF"]]@meta.features <- meta gene_counts <- snps_to_genes %>% diff --git a/vignettes/ASCN-RNA.Rmd b/vignettes/ASCN-RNA.Rmd index 0295d82..215d5d9 100644 --- a/vignettes/ASCN-RNA.Rmd +++ b/vignettes/ASCN-RNA.Rmd @@ -86,7 +86,7 @@ segments <- create_segments(consensus, field = "state_phase") segments <- filter_segments(segments, binwidth = 10e6) ``` -Note, that if you have some other ways to obtain segments such as bulk whole genome sequencing then that would also work. The `segments` data frame needs to have the columns `chr`, `start` and `end`. Alternatively, if you don't have any data that can be used to identify segments a priori you can just use chromosome arms as the segmentation. To do this just leave the segments option in any of the following function empty and the functions will revert to the default of using chromosome arms. +Note, that if you have some other ways to obtain segments such as bulk whole genome sequencing then that would also work. The `segments` data frame needs to have the columns `chr`, `start` and `end`. Alternatively, if you don't have any data that can be used to identify segments a priori you can just use chromosome arms as the segmentation. To do this just leave the segments option in any of the following functions empty and the functions will revert to the default of using chromosome arms. Our inference scheme is based on using dirichilet process clustering using [Viber](https://github.com/caravagn/VIBER) to cluster cells that share similar BAF distributions across segments. We first compute BAF frequencies across each segment in each cell.