Skip to content

Commit

Permalink
Merge pull request #189 from BIMSBbioinfo/clean/reports
Browse files Browse the repository at this point in the history
clean/reports
  • Loading branch information
alexg9010 authored Jul 19, 2024
2 parents 598af04 + a4a48be commit 1f6d46b
Show file tree
Hide file tree
Showing 10 changed files with 235 additions and 292 deletions.
10 changes: 5 additions & 5 deletions etc/settings.yaml.in
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@ locations:
input-dir: /path/to/reads/
output-dir: /path/to/output/
genome-fasta: /path/to/genome.fasta
# UCSC annotation tables fetched from table browser in BED format.
# [optional] CpG island predictions
cpgIsland-bedfile: ''
# [optional] Gene annotation, should be refGenes or knownGenes table
refGenes-bedfile: ''

general:
assembly: ''
Expand Down Expand Up @@ -36,11 +41,6 @@ general:
cores: 1
qvalue: 0.01
difference: 25
annotation:
cpgIsland-bedfile: ''
refGenes-bedfile: ''
# download cpgIsland-bedfile or refGenes-bedfile automatically if not given?
webfetch: no

# DManalyses:
# # The names of analyses can be anything but they have to be unique
Expand Down
156 changes: 75 additions & 81 deletions report_templates/diffmeth.Rmd.in
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ params:
chrom_seqlengths: ''
qvalue: 0.01
difference: 25
webfetch: ''

sampleids: ''
treatments: ''
Expand Down Expand Up @@ -99,7 +98,6 @@ nTop <- 5000

cpgIsland_bedfile <- params$cpgIsland_bedfile
refGenes_bedfile <- params$refGenes_bedfile
webfetch <- tolower(params$webfetch) %in% c("true", "yes")
chrom_seqlengths_file <- params$chrom_seqlengths

methylDiff_file <- params$methylDiff_file
Expand Down Expand Up @@ -146,7 +144,7 @@ AnnotateDiffMeth <- TRUE
ExportDMR <- TRUE
AnnotateDMR <- TRUE

if(! (all(file.exists(cpgIsland_bedfile,refGenes_bedfile)) | webfetch )) {
if(! (all(file.exists(cpgIsland_bedfile,refGenes_bedfile)))) {
AnnotateDiffMeth <- FALSE
}

Expand Down Expand Up @@ -301,7 +299,7 @@ The full table of methylKit calculateDiffMeth() results can be found at:
`r methylDiff_results_file`


```{r DiffMeth.load_methylDB, message=TRUE}
```{r DiffMeth.map_treatment, message=TRUE}


if (!setequal(control_group,treatment_group)) {
Expand All @@ -322,8 +320,9 @@ if (!setequal(control_group,treatment_group)) {
sep = ": ",collapse = "\n"))

}
```


```{r DiffMeth.load_methylDB, results='asis'}
methylDiffDB <- methylKit:::readMethylDiffDB(dbpath = methylDiff_file,
dbtype = "tabix",
sample.ids = sampleids,
Expand All @@ -341,13 +340,13 @@ methylDiff.obj.all <- getMethylDiff(.Object = methylDiffDB,
save.db = FALSE)

# Check if there are some differentially methylated cytosines
if (! nrow(methylDiff.obj.all)>1 ) {
cat("\n**There are no differentially methylated cytosines.**")
ExtractDiffMeth <- FALSE
AnnotateDiffMeth <- FALSE
if (nrow(methylDiff.obj.all) > 1) {
cat("\n**We found ", nrow(methylDiff.obj.all), " cytosines to be differentially methylated .**")
} else {
cat("\n**We found ",nrow(methylDiff.obj.all)," cytosines to be differentially methylated .**")
}
cat("\n**There are no differentially methylated cytosines.**")
ExtractDiffMeth <- FALSE
AnnotateDiffMeth <- FALSE
}

```

Expand All @@ -360,24 +359,7 @@ cat('- **DmC BED file**:\n',file.path(workdir,paste0(prefix,".dmc.bed")),'\n')

```

```{r Diffmeth.extract_dmc, eval=ExtractDiffMeth}

# Get hyper-methylated
methylDiff.obj.hyper <- getMethylDiff(.Object = methylDiff.obj.all,
difference = difference,
type = "hyper",
qvalue = qvalue)
# Get hypo-methylated
methylDiff.obj.hypo <- getMethylDiff(.Object = methylDiff.obj.all,
difference = difference,
type = "hypo",
qvalue=qvalue)

```


```{r DiffMeth.export_dmc, eval=ExtractDiffMeth, echo=FALSE}

```{r DiffMeth.export_bed, echo=FALSE}
#-------------------------------------------------------------------
#' Export windows to a BED file
#'
Expand Down Expand Up @@ -473,7 +455,25 @@ meth2bed<-function(windows,filename,
trackLine=as(trackLine, "BasicTrackLine"))
}
}
```

```{r Diffmeth.extract_dmc, eval=ExtractDiffMeth}

# Get hyper-methylated
methylDiff.obj.hyper <- getMethylDiff(.Object = methylDiff.obj.all,
difference = difference,
type = "hyper",
qvalue = qvalue)
# Get hypo-methylated
methylDiff.obj.hypo <- getMethylDiff(.Object = methylDiff.obj.all,
difference = difference,
type = "hypo",
qvalue=qvalue)

```


```{r Diffmeth.export_dmc, eval=ExtractDiffMeth}
# Export differentially methylated cytosines to a bed file
bedFile <- file.path(workdir,paste0(prefix,".dmc.bed"))
meth2bed(windows = methylDiff.obj.all,
Expand All @@ -484,11 +484,9 @@ meth2bed(windows = methylDiff.obj.all,
scoreCol = "meth.diff",
colramp=colorRamp(c("gray","green", "darkgreen")),
filename = bedFile)


```

```{r DiffMeth.find_dmr_text, results='asis', eval=ExtractDiffMeth}
```{r DiffMeth.find_dmr_text, results='asis'}
cat('## Finding Regions of Differential Methylation\n\n')

cat('We use the `methSeg()` function to perform an unsupervised segmentation of the methylation differences calculated from `calculateDiffMeth()`. Then we classify hyper and hypo methylated segments based on the methylation difference threshold and join neighbouring segments with the same class. For details about the segmentation see [Wreczycka, et al (2017)](https://www.sciencedirect.com/science/article/pii/S0168165617315936). ')
Expand All @@ -498,27 +496,31 @@ cat('To aggregate the region level statistics we use Stouffer’s weighted Z-met

```

```{r DiffMeth.find_dmr, eval=ExtractDiffMeth, echo=FALSE,warning=FALSE,message=FALSE}
```{r DiffMeth.find_dmr, results='asis',echo=FALSE,warning=FALSE,message=FALSE}
source(file.path(scripts_dir, "find_dmrs.R"))

pdf(file.path(workdir,paste0(prefix,".find_dmr.pdf")))
dmrs <- find_DMR(methylDiffDB,cores = cores)
invisible(dev.off())

if(! length(unique(dmrs$seg.group)) > 1 ) {
if (length(unique(dmrs$seg.group)) > 1) {
cat(
"\n**The segmentation was successful, out of ", length(dmrs), " regions ",
"we found ", sum(dmrs$seg.group == "hyper"), "to be hyper-methylated and ",
sum(dmrs$seg.group == "hypo"), " to be hypo-methylated.**"
)
} else {
# TODO: provide more detailed explanation what went wrong
# distinguish between "none" and other groups
cat(
"\n**The segments belong only to a single class,\n",
"This could indicate the unsupervised clustering of the ",
"segments was not successful or the difference in methylation ",
"is not high enough.**"
)
ExportDMR <- FALSE
AnnotateDMR <- FALSE
} else {
cat("\n**The segmentation was successful, out of ",length(dmrs)," regions ",
"we found ",sum(dmrs$seg.group == "hyper") ,"to be hyper-methylated and ",
sum(dmrs$seg.group == "hypo"), " to be hypo-methylated.**")
}
ExportDMR <- FALSE
AnnotateDMR <- FALSE
}
```

```{r DiffMeth.export_dmr_text, results='asis', eval=ExportDMR}
Expand Down Expand Up @@ -689,7 +691,7 @@ stats.df$variable <- factor(gsub("number.of.","",stats.df$variable),
```{r AnnotateDiffMeth.num_of_dmcs.plot, eval=ExtractDiffMeth,fig.width=8,fig.height=8}
p <- ggplot(stats.df, aes(x = chr, y = perc, fill = variable)) +
scale_fill_manual(values = c(hypo.col,hyper.col)) +
geom_bar(stat = "identity",position = "stack", color = "black") +
ggplot2::geom_bar(stat = "identity",position = "stack", color = "black") +
geom_text_repel(aes(label = count), position = position_stack(vjust = 0.5),
hjust = 0.1,size = 5) +
ggtitle(label = "Differentially methylated Cytosines per Chromosome") +
Expand Down Expand Up @@ -726,7 +728,7 @@ stats.DMC.df$variable <- factor(gsub("number.of.","",stats.DMC.df$variable),

p <- ggplot(stats.DMC.df, aes(x = chr, y = perc, fill = variable)) +
scale_fill_manual(values = c(hypo.col,hyper.col)) +
geom_bar(stat = "identity",position = "stack", color = "black") +
ggplot2::geom_bar(stat = "identity",position = "stack", color = "black") +
geom_text_repel(aes(label = count), position = position_stack(vjust = 0.5),
hjust = 0.1,size = 5) +
ggtitle(label = "Differentially methylated Cytosines per Chromosome") +
Expand All @@ -739,26 +741,31 @@ print(p)
```

```{r AnnotateDiffMeth.fetch_annotations, eval=AnnotateDiffMeth}
source(file.path(scripts_dir, "fetch_procedures.R"))
fetched.refgenes <- lookupBedFile(type = "knownGene",
filename = refGenes_bedfile,
assembly = assembly,
webfetch = webfetch )
fetch_refgen_success <- !is.null(fetched.refgenes)
fetched.cpgi <- lookupBedFile(type = "cpgIslandExt",
filename = cpgIsland_bedfile,
assembly = assembly,
webfetch = webfetch )

fetch_cpgi_success <- !is.null(fetched.cpgi)

refgenes.grl <-
tryCatch(
expr = readTranscriptFeatures(refGenes_bedfile),
error = function (msg) {
message(paste("Error while reading transcript features:",
msg))
return(NULL)
})
fetch_refgen_success <- !is.null(refgenes.grl)
# read the shores and flanking regions and name the flanks as shores
# and CpG islands as CpGi
cpg.obj <-
tryCatch(
expr = readFeatureFlank(cpgIsland_bedfile,
feature.flank.name=c("CpGi","shores")),
error = function (msg) {
message(paste("Error while reading CpG island annotation:",
msg))
return(NULL)
})
fetch_cpgi_success <- !is.null(cpg.obj)
```

```{r AnnotateDiffMeth.annotateWithGeneParts, eval=AnnotateDiffMeth, echo=FALSE, message=FALSE, results='asis'}
if( (fetch_refgen_success) & length(GRanges.diffmeth)!=0){
## now we parse the gene features
refgenes.grl <- readTranscriptFeatures(fetched.refgenes)

annot.gene <- annotateWithGeneParts(target = GRanges.diffmeth,
feature = refgenes.grl,
intersect.chr = TRUE)
Expand Down Expand Up @@ -812,7 +819,7 @@ if( (fetch_refgen_success) & length(GRanges.diffmeth)!=0){

p <- ggplot(df, aes(x = variable, y = value, fill = variable)) +
scale_fill_manual(values = rev(cols)) +
geom_bar(stat = "identity" ,color = "black", position = position_dodge()) +
ggplot2::geom_bar(stat = "identity" ,color = "black", position = position_dodge()) +
geom_text(aes(y = -10, label = count), hjust = 0,size = 5) +
ggtitle(label = "Differentially methylated Cytosines per Genomic Feature") +
labs(y = "Percentage", x = "Feature") +
Expand Down Expand Up @@ -882,7 +889,7 @@ fill_strip <- function(facet_ggplot, fills, plot=FALSE) {

p <- ggplot(df, aes(x = variable, y = value, fill = variable)) +
scale_fill_manual(values = rev(cols)) +
geom_bar(mapping = aes(group = type), stat = "identity" ,color = "black", position = position_dodge()) +
ggplot2::geom_bar(mapping = aes(group = type), stat = "identity" ,color = "black", position = position_dodge()) +
geom_text(mapping = aes(group = type, color = type , y = -10, label = count), size = 5,
position = position_dodge(width= 1)) +
scale_color_manual(values = c(hypo.col,hyper.col)) +
Expand Down Expand Up @@ -917,11 +924,6 @@ fill_strip <- function(facet_ggplot, fills, plot=FALSE) {

```{r AnnotateDiffMeth.annotateWithFeatureFlank, eval=AnnotateDiffMeth, echo=FALSE, warning=FALSE, message=FALSE ,results='asis'}
if( (fetch_cpgi_success) & length(GRanges.diffmeth)!=0){

# read the shores and flanking regions and name the flanks as shores
# and CpG islands as CpGi
cpg.obj=readFeatureFlank(fetched.cpgi,
feature.flank.name=c("CpGi","shores"))
#
# convert methylDiff object to GRanges and annotate
diffCpGann=annotateWithFeatureFlank(target = GRanges.diffmeth,
Expand Down Expand Up @@ -979,7 +981,7 @@ if( (fetch_cpgi_success) & length(GRanges.diffmeth)!=0){

p <- ggplot(df, aes(x = variable, y = value, fill = variable)) +
scale_fill_manual(values = c("white","gray","green")) +
geom_bar(stat = "identity" ,color = "black", position = position_dodge()) +
ggplot2::geom_bar(stat = "identity" ,color = "black", position = position_dodge()) +
geom_text(aes(y = -10, label = count), hjust = 0,size = 5) +
coord_flip() +
labs(y = "Percentage", x = "Feature") +
Expand Down Expand Up @@ -1018,7 +1020,7 @@ if( length(GRanges.diffmeth.hypo)!=0 & length(GRanges.diffmeth.hyper)!=0){

p <- ggplot(df, aes(x = variable, y = value, fill = variable)) +
scale_fill_manual(values = c("white","gray","green")) +
geom_bar(mapping = aes(group = type), stat = "identity" ,color = "black", position = position_dodge()) +
ggplot2::geom_bar(mapping = aes(group = type), stat = "identity" ,color = "black", position = position_dodge()) +
geom_text(mapping = aes(group = type, color = type , y = -10, label = count), size = 5,
position = position_dodge(width= 1)) +
scale_color_manual(values = c(hypo.col,hyper.col)) +
Expand Down Expand Up @@ -1137,10 +1139,7 @@ ideoDMC_hyper_hypo(hyper = GRanges.dmr.hyper,

```{r AnnotateDMR.annotateWithGeneParts, eval=AnnotateDMR, echo=FALSE, message=FALSE, results='asis'}
if( (fetch_refgen_success) & length(GRanges.dmr)!=0){
## now we parse the gene features
refgenes.grl <- readTranscriptFeatures(fetched.refgenes)

annot.dmr.gene <- annotateWithGeneParts(target = GRanges.dmr,
annot.dmr.gene <- genomation::annotateWithGeneParts(target = GRanges.dmr,
feature = refgenes.grl,
intersect.chr = TRUE)

Expand Down Expand Up @@ -1193,7 +1192,7 @@ if( (fetch_refgen_success) & length(GRanges.dmr)!=0){

p <- ggplot(df, aes(x = variable, y = value, fill = variable)) +
scale_fill_manual(values = rev(cols)) +
geom_bar(stat = "identity" ,color = "black", position = position_dodge()) +
ggplot2::geom_bar(stat = "identity" ,color = "black", position = position_dodge()) +
geom_text(aes(y = -10, label = count), hjust = 0,size = 5) +
ggtitle(label = "Differentially methylated Regions per Genomic Feature") +
labs(y = "Percentage", x = "Feature") +
Expand Down Expand Up @@ -1229,7 +1228,7 @@ if( (fetch_refgen_success) & length(GRanges.dmr)!=0){

p <- ggplot(df, aes(x = variable, y = value, fill = variable)) +
scale_fill_manual(values = rev(cols)) +
geom_bar(mapping = aes(group = type), stat = "identity" ,color = "black", position = position_dodge()) +
ggplot2::geom_bar(mapping = aes(group = type), stat = "identity" ,color = "black", position = position_dodge()) +
geom_text(mapping = aes(group = type, color = type , y = -10, label = count), size = 5,
position = position_dodge(width= 1)) +
scale_color_manual(values = c(hypo.col,hyper.col)) +
Expand Down Expand Up @@ -1265,11 +1264,6 @@ if( (fetch_refgen_success) & length(GRanges.dmr)!=0){
```{r AnnotateDMR.annotateWithFeatureFlank, eval=AnnotateDMR, echo=FALSE, warning=FALSE, message=FALSE ,results='asis'}
if( (fetch_cpgi_success) & length(GRanges.dmr)!=0){

# read the shores and flanking regions and name the flanks as shores
# and CpG islands as CpGi
cpg.obj=readFeatureFlank(fetched.cpgi,
feature.flank.name=c("CpGi","shores"))

# convert methylDiff object to GRanges and annotate
diffCpGann.dmr=annotateWithFeatureFlank(target = GRanges.dmr,
feature = cpg.obj$CpGi,
Expand Down Expand Up @@ -1326,7 +1320,7 @@ if( (fetch_cpgi_success) & length(GRanges.dmr)!=0){

p <- ggplot(df, aes(x = variable, y = value, fill = variable)) +
scale_fill_manual(values = c("white","gray","green")) +
geom_bar(stat = "identity" ,color = "black", position = position_dodge()) +
ggplot2::geom_bar(stat = "identity" ,color = "black", position = position_dodge()) +
geom_text(aes(y = -10, label = count), hjust = 0,size = 5) +
coord_flip() +
labs(y = "Percentage", x = "Feature") +
Expand Down Expand Up @@ -1365,7 +1359,7 @@ if( length(GRanges.dmr.hypo)!=0 & length(GRanges.dmr.hyper)!=0){

p <- ggplot(df, aes(x = variable, y = value, fill = variable)) +
scale_fill_manual(values = c("white","gray","green")) +
geom_bar(mapping = aes(group = type), stat = "identity" ,color = "black", position = position_dodge()) +
ggplot2::geom_bar(mapping = aes(group = type), stat = "identity" ,color = "black", position = position_dodge()) +
geom_text(mapping = aes(group = type, color = type , y = -10, label = count), size = 5,
position = position_dodge(width= 1)) +
scale_color_manual(values = c(hypo.col,hyper.col)) +
Expand Down
Loading

0 comments on commit 1f6d46b

Please sign in to comment.