Skip to content

Commit

Permalink
Merge branch 'master' into heatmapfrequency
Browse files Browse the repository at this point in the history
  • Loading branch information
marcjwilliams1 authored Oct 2, 2024
2 parents 63de7bc + a41c54e commit ba3bd15
Show file tree
Hide file tree
Showing 10 changed files with 70 additions and 34 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -98,5 +98,6 @@ export(widen_bins)
export(widen_haplotypebins)
import(data.table)
importFrom(Rcpp,sourceCpp)
importFrom(data.table,":=")
importFrom(magrittr,"%>%")
useDynLib(signals)
51 changes: 39 additions & 12 deletions R/callHSCN.R
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,7 @@ filter_haplotypes <- function(haplotypes, fraction){
#' @param fillmissing For bins with missing counts fill in values based on neighbouring bins
#' @param global_phasing_for_diploid When using cluster_per_chr, use all cells for phasing diploid regions within the cluster
#' @param chrs_for_global_phasing Which chromosomes to phase using all cells for diploid regions, default is NULL which uses all chromosomes
#' @param female Default is `TRUE`, if set to `FALSE` and patient is "XY", X chromosome states are set to A|0 where A=Hmmcopy state
#'
#' @return Haplotype specific copy number object
#'
Expand Down Expand Up @@ -677,8 +678,8 @@ callHaplotypeSpecificCN <- function(CNbins,
phasebyarm = FALSE,
minfrachaplotypes = 0.7,
likelihood = "auto",
minbins = 100,
minbinschr = 10,
minbins = 0,
minbinschr = 0,
phased_haplotypes = NULL,
clustering_method = "copy",
maxloherror = 0.035,
Expand All @@ -691,7 +692,8 @@ callHaplotypeSpecificCN <- function(CNbins,
smoothsingletons = TRUE,
fillmissing = TRUE,
global_phasing_for_balanced = FALSE,
chrs_for_global_phasing = NULL) {
chrs_for_global_phasing = NULL,
female = TRUE) {
if (!clustering_method %in% c("copy", "breakpoints")) {
stop("Clustering method must be one of copy or breakpoints")
}
Expand Down Expand Up @@ -720,6 +722,17 @@ callHaplotypeSpecificCN <- function(CNbins,
haplotypes$chr <- sub("chr", "", haplotypes$chr)
}

if (female == TRUE){
#do not infer states for chr "Y"
#haplotypes <- dplyr::filter(haplotypes, chr != "Y")
#CNbins <- dplyr::filter(CNbins, chr != "Y")
} else{
#do not infer states for chr "X" or "Y"
#haplotypes <- dplyr::filter(haplotypes, chr != "Y")
haplotypes <- dplyr::filter(haplotypes, chr != "X")
#CNbins <- dplyr::filter(CNbins, chr != "Y")
}

nhaplotypes <- haplotypes %>%
dplyr::group_by(cell_id) %>%
dplyr::summarize(n = sum(totalcounts)) %>%
Expand Down Expand Up @@ -794,18 +807,21 @@ callHaplotypeSpecificCN <- function(CNbins,
cells_to_remove <- ave_dist %>%
dplyr::filter(average_distance >= 0.05)
message(paste0("Removing ", dim(cells_to_remove)[1], " cells for phasing"))
ascn <- ascn %>% dplyr::filter(cell_id %in% cells_to_keep$cell_id)
haplotypes <- haplotypes %>% dplyr::filter(cell_id %in% cells_to_keep$cell_id)
ascn_filt <- ascn %>% dplyr::filter(cell_id %in% cells_to_keep$cell_id)
haplotypes_filt <- haplotypes %>% dplyr::filter(cell_id %in% cells_to_keep$cell_id)
} else{
ascn_filt <- ascn
haplotypes_filt <- haplotypes
}

infloherror <- ascn %>%
infloherror <- ascn_filt %>%
dplyr::filter(state_phase == "A-Hom") %>%
dplyr::summarise(err = weighted.mean(x = BAF, w = totalcounts, na.rm = TRUE)) %>%
dplyr::pull(err)
infloherror <- min(infloherror, maxloherror) # ensure loh error rate is < maxloherror

if (likelihood == "betabinomial" | likelihood == "auto") {
bbfit <- fitBB(ascn)
bbfit <- fitBB(ascn_filt)
if (bbfit$taronesZ > 5) {
likelihood <- "betabinomial"
message(paste0("Tarones Z-score: ", round(bbfit$taronesZ, 3), ", using ", likelihood, " model for inference."))
Expand All @@ -824,11 +840,11 @@ callHaplotypeSpecificCN <- function(CNbins,
)
}

ascn$balance <- ifelse(ascn$phase == "Balanced", 0, 1)
ascn_filt$balance <- ifelse(ascn_filt$phase == "Balanced", 0, 1)

if (is.null(phased_haplotypes)) {
chrlist <- proportion_imbalance(ascn,
haplotypes,
chrlist <- proportion_imbalance(ascn_filt,
haplotypes_filt,
phasebyarm = phasebyarm,
minfrachaplotypes = minfrachaplotypes,
mincells = mincells,
Expand All @@ -839,8 +855,8 @@ callHaplotypeSpecificCN <- function(CNbins,
propdf <- chrlist$propdf
chrlist <- chrlist$chrlist
phased_haplotypes <- phase_haplotypes_bychr(
ascn = ascn,
haplotypes = haplotypes,
ascn = ascn_filt,
haplotypes = haplotypes_filt,
chrlist = chrlist,
phasebyarm = phasebyarm,
global_phasing_for_balanced = global_phasing_for_balanced,
Expand Down Expand Up @@ -920,6 +936,17 @@ callHaplotypeSpecificCN <- function(CNbins,
tidyr::fill( c("state_min", "A", "B", "state_phase", "state_AS",
"state_AS_phased", "LOH", "state_BAF", "phase"), .direction = "downup") %>%
dplyr::ungroup()
#add 0|0 states for hom deletions
out[["data"]] <- out[["data"]] %>%
dplyr::mutate(A = ifelse(state == 0, 0, A)) %>%
dplyr::mutate(B = ifelse(state == 0, 0, B))
if (female == FALSE){
#for male patients set A == state and B == 0
out[["data"]] <- out[["data"]] %>%
dplyr::mutate(A = ifelse(chr == "X", state, A)) %>%
dplyr::mutate(B = ifelse(chr == "X", 0, B))
}
out[["data"]] <- add_states(out[["data"]])
out[["data"]] <- as.data.frame(out[["data"]])
}

Expand Down
3 changes: 2 additions & 1 deletion R/clustering.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ umap_clustering <- function(CNbins,
ret_model = TRUE,
ret_nn = TRUE,
pca = pca,
fast_sgd = fast_sgd)
fast_sgd = fast_sgd,
pca_method = "svdr")
}
)

Expand Down
3 changes: 3 additions & 0 deletions R/import_packages.R
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
#' @import data.table
NULL

#' @importFrom data.table ":="
NULL
26 changes: 13 additions & 13 deletions R/plotting.R
Original file line number Diff line number Diff line change
Expand Up @@ -432,14 +432,14 @@ plot_umap <- function(clustering, bycol = NULL, alphavalue = 0.5, raster = FALSE
)
}
g <- ggplot2::ggplot(clustering, ggplot2::aes(x = umap1, y = umap2)) +
ggrastr::geom_point_rast(ggplot2::aes_string(col = bycol), alpha = alphavalue) +
ggrastr::geom_point_rast(ggplot2::aes(col = .data[[bycol]]), alpha = alphavalue) +
ggplot2::xlab("UMAP 1") +
ggplot2::ylab("UMAP 2") +
ggplot2::theme_bw() +
ggplot2::guides(colour = ggplot2::guide_legend(ncol = 3))
} else {
g <- ggplot2::ggplot(clustering, ggplot2::aes(x = umap1, y = umap2)) +
ggplot2::geom_point(ggplot2::aes_string(col = bycol), alpha = alphavalue) +
ggplot2::geom_point(ggplot2::aes(col = .data[[bycol]]), alpha = alphavalue) +
ggplot2::xlab("UMAP 1") +
ggplot2::ylab("UMAP 2") +
ggplot2::theme_bw() +
Expand Down Expand Up @@ -713,7 +713,7 @@ plotCNprofile <- function(CNbins,
dplyr::mutate(state = factor(paste0(state), levels = c(paste0(seq(0, 10, 1)), "11+"))) %>%
ggplot2::ggplot(ggplot2::aes(x = idx, y = copy)) +
ggplot2::geom_vline(xintercept = pl$chrbreaks, col = "grey90", alpha = 0.75) +
ggrastr::geom_point_rast(ggplot2::aes_string(col = statecol),
ggrastr::geom_point_rast(ggplot2::aes(col = .data[[statecol]]),
show.legend = TRUE,
size = pointsize,
alpha = alphaval,
Expand Down Expand Up @@ -747,9 +747,9 @@ plotCNprofile <- function(CNbins,
dplyr::mutate(state = factor(paste0(state), levels = c(paste0(seq(0, 10, 1)), "11+"))) %>%
ggplot2::ggplot(ggplot2::aes(x = idx, y = copy)) +
ggplot2::geom_vline(xintercept = pl$chrbreaks, col = "grey90", alpha = 0.75) +
ggplot2::geom_point(ggplot2::aes_string(col = statecol),
size = pointsize,
ggplot2::geom_point(ggplot2::aes(col = .data[[statecol]]),
show.legend = TRUE,
size = pointsize,
alpha = alphaval,
shape = 16) +
ggplot2::scale_color_manual(
Expand Down Expand Up @@ -1422,7 +1422,7 @@ plotCNprofileBAF <- function(cn,
dplyr::mutate(state_min = paste0(state_min)) %>%
ggplot2::ggplot(ggplot2::aes(x = idx, y = BAF)) +
ggplot2::geom_vline(xintercept = pl$chrbreaks, col = "grey90", alpha = 0.75) +
ggrastr::geom_point_rast(ggplot2::aes_string(col = BAFcol),
ggrastr::geom_point_rast(ggplot2::aes(col = .data[[BAFcol]]),
show.legend = TRUE,
size = pointsize,
alpha = alphaval,
Expand Down Expand Up @@ -1461,7 +1461,7 @@ plotCNprofileBAF <- function(cn,
dplyr::mutate(state_min = paste0(state_min)) %>%
ggplot2::ggplot(ggplot2::aes(x = idx, y = copy)) +
ggplot2::geom_vline(xintercept = pl$chrbreaks, col = "grey90", alpha = 0.75) +
ggrastr::geom_point_rast(ggplot2::aes_string(col = statecol),
ggrastr::geom_point_rast(ggplot2::aes(col = .data[[statecol]]),
show.legend = TRUE,
size = pointsize,
alpha = alphaval,
Expand Down Expand Up @@ -1494,7 +1494,7 @@ plotCNprofileBAF <- function(cn,
dplyr::mutate(state_min = paste0(state_min)) %>%
ggplot2::ggplot(ggplot2::aes(x = idx, y = BAF)) +
ggplot2::geom_vline(xintercept = pl$chrbreaks, col = "grey90", alpha = 0.75) +
ggplot2::geom_point(ggplot2::aes_string(col = BAFcol),
ggplot2::geom_point(ggplot2::aes(col = .data[[BAFcol]]),
show.legend = TRUE,
size = pointsize,
alpha = alphaval,
Expand Down Expand Up @@ -1533,10 +1533,10 @@ plotCNprofileBAF <- function(cn,
dplyr::mutate(state_min = paste0(state_min)) %>%
ggplot2::ggplot(ggplot2::aes(x = idx, y = copy)) +
ggplot2::geom_vline(xintercept = pl$chrbreaks, col = "grey90", alpha = 0.75) +
ggplot2::geom_point(ggplot2::aes_string(col = statecol),
ggplot2::geom_point(ggplot2::aes(col = .data[[statecol]]),
show.legend = TRUE,
size = pointsize,
alpha = alphaval,
size = pointsize,
alpha = alphaval,
shape = shape) +
ggplot2::scale_color_manual(
name = "Allele Specific CN",
Expand Down Expand Up @@ -1763,7 +1763,7 @@ plotBAFperstate <- function(cn, minpts = 250, minfrac = 0.01, maxstate = 10, den
cowplot::theme_cowplot() +
ggplot2::geom_crossbar(
data = allASstates, ggplot2::aes(y = cBAF, ymin = cBAF, ymax = cBAF),
alpha = 0.2, size = 0.2
alpha = 0.2, linewidth = 0.2
) +
ggplot2::geom_text(data = text_fraction, ggplot2::aes(x = state_AS_phased, y = y, label = pct)) +
ggplot2::xlab("") +
Expand Down Expand Up @@ -1809,7 +1809,7 @@ plot_density_histogram <- function(dat, mystate, rho, nbins = 30, frac = "NA", l
)

g <- ggplot2::ggplot(dat, ggplot2::aes(x = BAF)) +
ggplot2::geom_histogram(ggplot2::aes(y = (..density..)), bins = nbins, fill = "azure4", alpha = 0.8) +
ggplot2::geom_histogram(ggplot2::aes(y = ggplot2::after_stat(density)), bins = nbins, fill = "azure4", alpha = 0.8) +
ggplot2::geom_line(data = dffit_bb, stat = "density", ggplot2::aes(BAF, col = type), size = 0.5, adjust = 5) +
ggplot2::geom_line(data = dffit_b, stat = "density", ggplot2::aes(BAF, col = type), size = 0.5, adjust = 5, linetype = 2) +
ggplot2::scale_colour_manual(values = c(ggplot2::alpha("deepskyblue4", 0.6), ggplot2::alpha("firebrick4", 0.6))) +
Expand Down
2 changes: 1 addition & 1 deletion R/sim-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ simulate_cell <- function(nchr = 2,
}

data("dlpbins", envir = environment())
bins <- dlpbins
bins <- dlpbins %>% dplyr::filter(chr != "Y")
bins$state <- base_ploidy
bins$state_min <- round(base_ploidy / 2)
chrvec <- setdiff(unique(bins$chr), names(clonal_events))
Expand Down
3 changes: 2 additions & 1 deletion R/util.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ createCNmatrix <- function(CNbins,

cnmatrix <- cnmatrix %>%
dplyr::as_tibble() %>%
tidyr::fill(., colnames, .direction = "updown")
tidyr::fill(., tidyr::all_of(colnames), .direction = "updown")
}

cnmatrix <- as.data.frame(cnmatrix)
Expand Down Expand Up @@ -962,6 +962,7 @@ per_chr_cn <- function(hscn, arms = NULL) {
#' @export
add_states <- function(df) {
df <- df %>%
data.table::as.data.table() %>%
.[, state_AS_phased := paste0(A, "|", B)] %>%
.[, state_AS := paste0(pmax(state - B, B), "|", pmin(state - B, B))] %>%
.[, state_min := pmin(A, B)] %>%
Expand Down
9 changes: 6 additions & 3 deletions man/callHaplotypeSpecificCN.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test-util.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ test_that("Test widen bins functions", {

segs <- create_segments(sim_data_bb$CNbins)
test_that("Test create segments", {
expect_equal(dim(segs)[1], 24 * sum(clones_dist))
expect_equal(dim(segs)[1], 23 * sum(clones_dist))
})

segs_cn <- create_segments(consensuscopynumber(CNbins))
Expand Down
4 changes: 2 additions & 2 deletions vignettes/ASCN.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,9 @@ There are also some function to plot per cell BAF and state plots. You can speci
plotCNprofileBAF(hscn, cellid = "SA921-A90554A-R03-C44")
```

Here we'll plot an equivalent plot merging data across cell clusters using the utility function `consensyscopynumber`. Here, the `cell_id` column becomes the
```{r, fig.show='hold', fig.height=5 , fig.width=10}
Here we'll plot an equivalent plot merging data across cell clusters using the utility function `consensyscopynumber`. Here, the `cell_id` column becomes the `clone_id`.

```{r, fig.show='hold', fig.height=5 , fig.width=10}
consensus_clusters <- consensuscopynumber(hscn$data, cl = cl$clustering)
plotCNprofileBAF(consensus_clusters, cellid = "A")
Expand Down

0 comments on commit ba3bd15

Please sign in to comment.