Skip to content

Commit

Permalink
work implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
kriemo committed Jul 26, 2023
1 parent 6132ed7 commit 8fe65da
Show file tree
Hide file tree
Showing 16 changed files with 380 additions and 85 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: raer
Type: Package
Title: RNA editing tools in R
Version: 0.99.6
Version: 0.99.7
Authors@R: c(
person("Kent", "Riemondy", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-0750-1273")),
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export(annot_snps)
export(calc_AEI)
export(calc_confidence)
export(calc_edit_frequency)
export(calc_scAEI)
export(correct_strand)
export(download_GSE99249)
export(download_NA12878)
Expand All @@ -19,6 +20,7 @@ export(find_de_sites)
export(find_mispriming_sites)
export(find_scde_sites)
export(get_overlapping_snps)
export(get_scAEI_sites)
export(get_splice_sites)
export(make_de_object)
export(pileup_cells)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# raer 0.99.7

* added a single cell specific AEI calculation (`calc_scAEI()`)

# raer 0.99.6

* added method to count base consensus base when counting UMIs with `pileup_cells()` using the sum of base qualities to select consensus.
Expand Down
3 changes: 3 additions & 0 deletions R/AllGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
#' comparing against the SNP db (which always returns sequences w.r.t the
#' plus strand).
#' @param drop If TRUE, remove sites overlapping SNPs
#' @param RLE If TRUE, columns added will returned as [S4Vectors::Rle()] vectors
#' to reduce memory
#'
#' @param ... For the generic, further arguments to pass to specific methods.
#' Unused for now.
#'
Expand Down
51 changes: 30 additions & 21 deletions R/annot_rse.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ annot_snps.GRanges <- function(obj,
col_to_aggr = "RefSNP_id",
drop = FALSE,
genome = NULL,
RLE = TRUE,
...) {
if (!is(dbsnp, "ODLT_SNPlocs")) {
cli::cli_abort("supplied dbSNP not valid SNP package, please install SNP db")
Expand Down Expand Up @@ -53,6 +54,11 @@ annot_snps.GRanges <- function(obj,
snp = unstrsplit(eval(parse(text = col_to_aggr)), ","),
drop = FALSE
)$snp

if(RLE) {
mcols(sites)[col_to_aggr] <- S4Vectors::Rle(mcols(sites)[[col_to_aggr]])
}

} else {
cols_exist <- any(c("snp_ref_allele", "snp_alt_alleles") %in%
colnames(mcols(obj)))
Expand All @@ -79,15 +85,22 @@ annot_snps.GRanges <- function(obj,
)

snp_info$grouping <- NULL
colnames(snp_info) <- c(col_to_aggr,
"snp_ref_allele",
"snp_alt_alleles")
snp_cols <- c(col_to_aggr,
"snp_ref_allele",
"snp_alt_alleles")
colnames(snp_info) <- snp_cols

mcols(sites) <- cbind(mcols(sites), snp_info)

if("ALT" %in% colnames(mcols(sites))) {
snp_cols <- c(snp_cols, "snp_matches_site")
mcols(sites)$snp_matches_site <- check_snp_match(sites)
}

if(RLE) {
mcols(sites)[snp_cols] <- lapply(mcols(sites)[snp_cols],
S4Vectors::Rle)
}
}

seqlevelsStyle(sites) <- in_style
Expand All @@ -102,36 +115,25 @@ annot_snps.GRanges <- function(obj,

#' @rdname annot_snps
#' @export
annot_snps.SummarizedExperiment <- function(obj,
dbsnp,
chrom = NULL,
col_to_aggr = "RefSNP_id",
drop = FALSE,
genome = NULL,
...) {
annot_snps.SummarizedExperiment <- function(obj, ...) {
gr <- rowRanges(obj)
res <- annot_snps.GRanges(gr,
dbsnp,
chrom = chrom,
col_to_aggr = col_to_aggr,
drop = drop,
genome = genome
)

res <- annot_snps.GRanges(gr, ...)
mcols(rowRanges(obj)) <- mcols(res)
obj
}

#' Annotate a RangedSummarizedExperiment using Granges objects
#' Annotate sites using GRanges object
#'
#' @description Utility function to map annotations from GRanges to rowData of
#' SummarizedExperiment or GRanges object. If multiple features overlap then
#' SummarizedExperiment or to mcols of GRanges object. If multiple features overlap then
#' they will be concatenated as comma separated values.
#'
#' @param obj RangedSummarizedExperiment or GRanges object
#' @param gr GRanges with annotations to map to obj
#' @param cols_to_map character vector of columns from gr to map to obj. If the
#' vector has names, the names will be the column names in the output obj
#' @param RLE If TRUE, columns added will returned as [S4Vectors::Rle()] vectors
#' to reduce memory
#' @param ... additional arguments to pass to [GenomicRanges::findOverlaps()]
#'
#' @return Either a SummarizedExperiment or GRanges object with additional
Expand All @@ -155,7 +157,7 @@ annot_snps.SummarizedExperiment <- function(obj,
#' @importFrom GenomeInfoDb seqlevelsStyle seqlevelsStyle<- seqlevels
#'
#' @export
annot_from_gr <- function(obj, gr, cols_to_map, ...) {
annot_from_gr <- function(obj, gr, cols_to_map, RLE = TRUE, ...) {
if (is(obj, "RangedSummarizedExperiment")) {
gr_sites <- rowRanges(obj)
return_se <- TRUE
Expand Down Expand Up @@ -193,6 +195,13 @@ annot_from_gr <- function(obj, gr, cols_to_map, ...) {
x$tmp <- ifelse(x$tmp == "", NA, x$tmp)
mcols(gr_sites)[[col_id]] <- x$tmp
}

if(RLE) {
col_nms <- names(cols_to_map)
rls_clms <- lapply(col_nms, function(x) S4Vectors::Rle(mcols(gr_sites)[[x]]))
mcols(gr_sites)[col_nms] <- rls_clms
}

if (return_se) {
mcols(rowRanges(obj)) <- mcols(gr_sites)
} else {
Expand Down
61 changes: 18 additions & 43 deletions R/calc_AEI.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#' quantification of ADAR adenosine-to-inosine RNA editing activity. Nat Methods
#' 16, 1131–1138 (2019). https://doi.org/10.1038/s41592-019-0610-9
#'
#' @param bam_fns character vector of paths to indexed bam files. If a named
#' @param bamfiles character vector of paths to indexed bam files. If a named
#' character vector is supplied the names will be used in the output.
#' @param fasta_fn fasta filename
#' @param alu_ranges [GRanges] with regions to query for
Expand Down Expand Up @@ -63,7 +63,7 @@
#' @import GenomicRanges
#'
#' @export
calc_AEI <- function(bam_fns,
calc_AEI <- function(bamfiles,
fasta_fn,
alu_ranges = NULL,
txdb = NULL,
Expand All @@ -72,7 +72,14 @@ calc_AEI <- function(bam_fns,
BPPARAM = SerialParam(),
verbose = FALSE) {

chroms <- names(Rsamtools::scanBamHeader(bam_fns[1])[[1]]$targets)
if (!is(bamfiles, "BamFileList")) {
if (is.null(names(bamfiles))) {
names(bamfiles) <- bamfiles
}
bamfiles <- BamFileList(bamfiles)
}

chroms <- names(Rsamtools::scanBamHeader(bamfiles[[1]])$targets)

if (is.null(alu_ranges)) {
cli::cli_alert_warning(c(
Expand Down Expand Up @@ -158,21 +165,16 @@ calc_AEI <- function(bam_fns,
snps <- snp_lst[chroms]
}
res <- list()
for(i in seq_along(bam_fns)) {
bam_fn <- bam_fns[i]
for(i in seq_along(bamfiles)) {
bam_fn <- bamfiles[i]
aei <- .AEI_per_bam(bam_fn = bam_fn, fasta_fn = fasta_fn, chroms = chroms,
alu_ranges = alu_ranges, snps = snps, param = param,
genes_gr = genes_gr, verbose = verbose, BPPARAM = BPPARAM)
res[[bam_fn]] <- aei
res[[path(bam_fn)]] <- aei
}

aei <- do.call(rbind, lapply(res, '[[', 1))

if(is.null(names(bam_fns))){
rownames(aei) <- bam_fns
} else {
rownames(aei) <- names(bam_fns)
}
rownames(aei) <- names(bamfiles)

aei_per_chrom <- lapply(seq_along(res), function(i) {
x <- res[[i]][[2]]
Expand Down Expand Up @@ -407,22 +409,20 @@ correct_strand <- function(rse, genes_gr) {
# drop non-genic and multi-strand (overlapping annotations)
rse <- rse[!is.na(rowData(rse)$gene_strand), ]

n_strands <- lengths(regmatches(
rowData(rse)$gene_strand,
gregexpr(",", rowData(rse)$gene_strand)
))
gs <- decode(rowData(rse)$gene_strand)
n_strands <- lengths(regmatches(gs, gregexpr(",", gs)))
rse <- rse[n_strands == 0, ]

flip_rows <- as.vector(strand(rse) != rowData(rse)$gene_strand)

rowData(rse)$REF[flip_rows] <- BASE_MAP[rowData(rse)$REF[flip_rows]]
rowData(rse)$REF[flip_rows] <- comp_bases(rowData(rse)$REF[flip_rows])

flipped_variants <- vector(mode = "list", ncol(rse))
to_flip <- assay(rse, "ALT")[flip_rows, , drop = FALSE]
flipped_variants <- apply(to_flip, c(1, 2), function(x) {
vapply(
strsplit(x, ","), function(y) {
paste0(unname(ALLELE_MAP[y]),
paste0(comp_bases(y),
collapse = ","
)
},
Expand All @@ -432,7 +432,6 @@ correct_strand <- function(rse, genes_gr) {

assay(rse, "ALT")[flip_rows, ] <- flipped_variants


# complement the nucleotide counts by reordering the assays
assays_to_swap <- c("nA", "nT", "nC", "nG")
og_order <- rownames(rse)
Expand All @@ -451,27 +450,3 @@ correct_strand <- function(rse, genes_gr) {
rse
}


ALLELE_MAP <- c(
TA = "CT",
CA = "GT",
GA = "AT",
AT = "TC",
CT = "GC",
GT = "AC",
AC = "TG",
TC = "CG",
GC = "AG",
AG = "TA",
TG = "CA",
CG = "GA",
`-` = "-"
)

BASE_MAP <- c(
"A" = "T",
"G" = "C",
"C" = "G",
"T" = "A",
"N" = "N"
)
Loading

0 comments on commit 8fe65da

Please sign in to comment.