From 6e57af5c1a742f49763eda12e5da618cff2e3eef Mon Sep 17 00:00:00 2001
From: Jochen Weile <jochenweile@gmail.com>
Date: Tue, 26 Jun 2018 16:39:02 -0400
Subject: [PATCH] genophenogram: more comments, replacing oversized error bars
 with crosses, stub for nucleotide-centric plot

---
 R/genophenogram.R | 227 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 226 insertions(+), 1 deletion(-)

diff --git a/R/genophenogram.R b/R/genophenogram.R
index e68bd3b..4578732 100644
--- a/R/genophenogram.R
+++ b/R/genophenogram.R
@@ -19,6 +19,7 @@
 genophenogram <- function(wt.aa, pos, mut.aa, score, syn.med, stop.med, 
 	error=NULL, a=0, grayBack=FALSE, img.width=12, tracks=NULL) {
 
+	#define bezier transformation function
 	bend <- function(x,a=0) {
 		if (is.na(x)) return(NA)
 		if (a == 0) return(x)
@@ -29,10 +30,12 @@ genophenogram <- function(wt.aa, pos, mut.aa, score, syn.med, stop.med,
 		t <- ifelse(a >= 0,1,-1) * sqrt(x/(2*a) + q^2) - q
 		(1+2*a)*t - 2*a*t^2
 	}
+	#if desired, apply the transformation to the scores
 	if (a != 0) {
 		score <- sapply(score,bend,a=a)
 	}
 
+	#calculate the relative space in the plot to be allocated for legends and feature tracks.
 	legend.share <- 1.4/img.width
 	# layout(cbind(c(3,1),c(4,2)),widths=c(9.5,.5),heights=c(2,9))
 	if (is.null(tracks)) {
@@ -41,17 +44,21 @@ genophenogram <- function(wt.aa, pos, mut.aa, score, syn.med, stop.med,
 		track.share <- tracks$num.tracks()/2.5
 		layout(cbind(c(4,3,1),c(5,5,2)),widths=c(1-legend.share,legend.share),heights=c(track.share,2,9))
 	}
+
 	#Main plot
 	###########
 	aas <- c("A","V","L","I","M","F","Y","W","R","H","K","D","E","S","T","N","Q","G","C","P","*")
+	#set up the empty plot space for the main panel
 	op <- par(cex=.6,las=1,mar=c(5,5,0,0)+.1)
 	plot(NA,type="n",
 		xlim=c(-3.5,length(wt.aa)+1),ylim=c(0,length(aas)+1),axes=FALSE,
 		xaxs="i",xlab="AA position",ylab="AA residue",main=""
 	)
+	#add x and y axes
 	axis(1,c(1,seq(5,length(wt.aa),5)))
 	axis(2,at=1:21,labels=rev(aas))
 
+	#add amino acid group labels
 	text(-1,c(17.5,9),c("hydrophobic","polar"),srt=90)
 	text(-2.33,c(9.5,12),c("-","+"),srt=90)
 	arrows(-1.66,c(4.6,13.6),-1.66,c(13.4,21.4),length=.02,angle=90,code=3)
@@ -75,6 +82,7 @@ genophenogram <- function(wt.aa, pos, mut.aa, score, syn.med, stop.med,
 		}
 	}
 
+	#calculate coordinates and colors for the main heatmap rectangles
 	x <- pos
 	y <- length(aas) - sapply(mut.aa,function(a) if (is.na(a)) NA else which(aas==a)) + 1
 	colRamp <- colorRampPalette(c("royalblue3","white","firebrick3"))(11)
@@ -97,13 +105,24 @@ genophenogram <- function(wt.aa, pos, mut.aa, score, syn.med, stop.med,
 	#change wt positions to gold color
 	cols[which(mut.aa==wt.aa[pos])] <- "lightgoldenrod1"
 
+	#draw the heatmap rectangles
 	rect(x-.5,y-.5,x+.5,y+.5,col=cols,border=NA)
 
+	#draw error bars (if provided)
 	if (!is.null(error)) {
-		# e <- .5 * error / max(error)
+		#normalize to distance between synonymous and nonsense
 		e <- .5 * error / (syn.med-stop.med)
+		#cut-off anything with error greater than the full distance
+		#(will be crossed out by a second bar below)
+		e[which(e>.5)] <- .5
+		#remove bars from wt, as they are just yellow anyway
 		e[which(mut.aa==wt.aa[pos])] <- NA
+		#draw the segements
 		segments(x-e,y-e,x+e,y+e)
+		#indices of boxes with excessive (>1 error)
+		e2i <- which(error > (syn.med-stop.med))
+		#cross out those boxes
+		segments(x[e2i]-.5,y[e2i]+.5,x[e2i]+.5,y[e2i]-.5)
 	}
 
 	par(op)
@@ -112,6 +131,7 @@ genophenogram <- function(wt.aa, pos, mut.aa, score, syn.med, stop.med,
 	###########
 	op <- par(cex=.6,mar=c(5,3,0,4)+.1)
 	plot(NA,type="n",xlim=c(-1,1),ylim=c(0,13),axes=FALSE,xlab="",ylab="")
+	#autodetect above wt-level scores and draw appropriate legend
 	if (any(score > syn.med, na.rm=TRUE)) {#with red colors
 		rect(0,0:11,1,1:12,col=c(colRamp[1:6],colRamp[6:11]),border=NA)
 		rect(0,12,1,13,col="lightgoldenrod1",border=NA)
@@ -188,3 +208,208 @@ genophenogram <- function(wt.aa, pos, mut.aa, score, syn.med, stop.med,
 	
 	return(invisible(NULL))
 }
+
+
+
+
+#' Draw genophenogram plot for nucleotide sequences
+#' 
+#' Draws a genophenogram plot from given data
+#' 
+#' @param wt.nc wildtype amino acid sequence as a vector of single characters.
+#' @param pos vector of amino acid positions
+#' @param mut.nc vector of mutant AAs
+#' @param score vector of scores
+#' @param error vector of stderr values
+#' @param a bezier transformation intensity (with -0.5 <= a <= 0.5)
+#' @param grayBack draw a gray background for incomplete maps
+#' @param img.width optional parameter to inform the drawing function of the chosen image width,
+#'      allowing it to adjust the size of the legend
+#' @param tracks an optional trackdrawer object to add structural information to the plot
+#' @export
+#' @return NULL
+genophenogram.nc <- function(wt.nc, pos, mut.nc, score, syn.med, stop.med, 
+	error=NULL, a=0, grayBack=FALSE, img.width=12, tracks=NULL) {
+
+	bend <- function(x,a=0) {
+		if (is.na(x)) return(NA)
+		if (a == 0) return(x)
+		if (abs(a) > 0.5) stop("parameter a must be between -0.5 and 0.5")
+		if (x < 0) x <- 0
+		if (x > 1) return(x)
+		q <- (1 - 2*a) / (4*a)
+		t <- ifelse(a >= 0,1,-1) * sqrt(x/(2*a) + q^2) - q
+		(1+2*a)*t - 2*a*t^2
+	}
+	if (a != 0) {
+		score <- sapply(score,bend,a=a)
+	}
+
+	legend.share <- 1.4/img.width
+	# layout(cbind(c(3,1),c(4,2)),widths=c(9.5,.5),heights=c(2,9))
+	if (is.null(tracks)) {
+		layout(cbind(c(3,1),c(4,2)),widths=c(1-legend.share,legend.share),heights=c(2,9))
+	} else {
+		track.share <- tracks$num.tracks()/2.5
+		layout(cbind(c(4,3,1),c(5,5,2)),widths=c(1-legend.share,legend.share),heights=c(track.share,2,9))
+	}
+	#Main plot
+	###########
+	# aas <- c("A","V","L","I","M","F","Y","W","R","H","K","D","E","S","T","N","Q","G","C","P","*")
+	triplets <- c(
+		"A--","C--","G--","T--",
+		"-A-","-C-","-G-","-T-",
+		"--A","--C","--G","--T"
+	)
+	op <- par(cex=.6,las=1,mar=c(5,5,0,0)+.1)
+	plot(NA,type="n",
+		xlim=c(-3.5,length(wt.nc)/3+1),ylim=c(0,length(triplets)+1),axes=FALSE,
+		xaxs="i",xlab="Codon position",ylab="mutation",main=""
+	)
+	axis(1,c(1,seq(5,length(wt.nc)/3+1,5)))
+	axis(2,at=1:length(triplets),labels=rev(triplets))
+
+	# text(-1,c(17.5,9),c("hydrophobic","polar"),srt=90)
+	# text(-2.33,c(9.5,12),c("-","+"),srt=90)
+	# arrows(-1.66,c(4.6,13.6),-1.66,c(13.4,21.4),length=.02,angle=90,code=3)
+	# arrows(-3,c(8.6,10.6),-3,c(10.4,13.4),length=.02,angle=90,code=3)
+
+	text(-1,c(2.5,6.5,10.5),c("3rd","2nd","1st"),srt=90)
+	arrows(-1.66,c(.6,4.6,8.6),-1.66,c(4.4,8.4,12.4),length=.02,angle=90,code=3)
+
+	#draw gray background if desired
+	if (grayBack) {
+		rect(.5,.5,length(wt.nc)/3+.5,length(triplets)+.5,col="gray",border=NA)
+	}
+
+	#supplement missing wt-positions
+	for (i in 1:length(wt.nc)) {
+		base <- wt.nc[[i]]
+		if (!any(pos==i & mut.nc==base,na.rm=TRUE)) {
+			pos <- c(pos,i)
+			mut.nc <- c(mut.nc,base)
+			score <- c(score,NA)
+			if (!is.null(error)) {
+				error <- c(error,NA)
+			}
+		}
+	}
+
+	x <- (pos-1) %/% 3 + 1
+	bases <- c("A","C","G","T")
+	y <- 13 - (sapply(mut.nc,function(base) which(bases==base)) + ((pos-1) %% 3)*4)
+	# y <- length(aas) - sapply(mut.aa,function(a) if (is.na(a)) NA else which(aas==a)) + 1
+
+	colRamp <- colorRampPalette(c("royalblue3","white","firebrick3"))(11)
+	scoreToColIdx <- function(score) sapply(score,function(s) {
+		if (is.na(s)) {
+			NA
+		} else if (s < stop.med) {
+			1
+		} else if (s < syn.med) {
+			round(5*(s-stop.med)/(syn.med-stop.med))+1
+		} else if (s < 2*syn.med-stop.med) {
+			round(5*(s-syn.med)/(syn.med-stop.med))+6
+		} else {
+			11
+		}
+	})
+	colIdx <- scoreToColIdx(score)
+	cols <- colRamp[colIdx]
+
+	#change wt positions to gold color
+	cols[which(mut.nc==wt.nc[pos])] <- "lightgoldenrod1"
+
+	rect(x-.5,y-.5,x+.5,y+.5,col=cols,border=NA)
+
+	segments(-.5,c(4.5,8.5),length(wt.nc)/3+1,c(4.5,8.5),lty="dashed")
+
+	if (!is.null(error)) {
+		# e <- .5 * error / max(error)
+		e <- .5 * error / (syn.med-stop.med)
+		e[which(mut.nc==wt.nc[pos])] <- NA
+		segments(x-e,y-e,x+e,y+e)
+	}
+
+	par(op)
+
+	#####Legend
+	###########
+	op <- par(cex=.6,mar=c(5,3,0,4)+.1)
+	plot(NA,type="n",xlim=c(-1,1),ylim=c(0,13),axes=FALSE,xlab="",ylab="")
+	if (any(score > syn.med, na.rm=TRUE)) {#with red colors
+		rect(0,0:11,1,1:12,col=c(colRamp[1:6],colRamp[6:11]),border=NA)
+		rect(0,12,1,13,col="lightgoldenrod1",border=NA)
+		axis(4,at=c(.5,6,11.5,12.5),labels=c("stop","syn","hyper","wt"))
+	} else {#without red colors
+		rect(0,seq(0,10,2),1,seq(2,12,2),col=colRamp[1:6],border=NA)
+		rect(0,12,1,13,col="lightgoldenrod1",border=NA)
+		axis(4,at=c(1,11,12.5),labels=c("stop","syn","wt"))
+	}
+	mtext("score",side=4,line=2,las=3,cex=0.7)
+	if(!is.null(error)) {
+		es <- seq(0,0.5,length.out=13)/2
+		ys <- 0:12+0.5
+		segments(-.5-es,ys-es,-.5+es,ys+es)
+		me <- syn.med-stop.med
+		axis(2,at=c(.5,6.5,12.5),labels=signif(c(0,me/2,me),2))
+		mtext("stderr",side=2,line=2,las=3,cex=0.7)
+	}
+	par(op)
+
+	#Summary bars
+	###########
+
+	barvals <- do.call(rbind,tapply(colIdx,x,function(idxs) {
+		table(factor(idxs,levels=1:11))
+	}))
+	# barvals <- apply(barvals,2,`/`,table(pos))
+	barvals <- t(apply(barvals,1,function(xs)xs/sum(xs)))
+	barcums <- cbind(0,t(apply(barvals,1,function(x)sapply(1:11,function(i)sum(x[1:i])))))
+
+	par(cex=.6,mar=c(0,5,1,0)+.1)
+	plot(
+		0,type="n",
+		xlim=c(-3.5,length(wt.nc)/3+1),
+		ylim=c(0,1),
+		axes=FALSE,xlab="",xaxs="i",
+		ylab="pos/neutral/neg"
+	)
+	n <- length(wt.nc)/3
+
+	#draw gray background if desired
+	if (grayBack) {
+		rect(.5,0,length(wt.nc)/3+.5,1,col="gray",border=NA)
+	}
+
+	for (i in 1:11) {
+		rect(1:n-.5,barcums[,i],1:n+.5,barcums[,i]+barvals[,i],col=colRamp[[i]],border=NA)	
+	}
+	axis(2)
+	par(op)
+
+
+	#####Tracks
+	#############
+	if (!is.null(tracks)) {
+
+		tracks$draw()
+
+		### Track legend
+		op <- par(cex=.6,mar=c(0,3,1,4)+.1)
+		plot(NA,type="n",xlim=c(-1,1),ylim=c(0,11),axes=FALSE,xlab="",ylab="")
+		orangeRamp <- colorRampPalette(c("white","orange"))(11)
+		blueRamp <- colorRampPalette(c("white","steelblue3"))(11)
+		rect(0,0:10,1,1:11,col=orangeRamp,border=NA)
+		rect(-1,0:10,0,1:11,col=blueRamp,border=NA)
+
+		axis(4,at=c(.5,5.5,10.5),labels=c("0%","50%","100%"))
+		mtext("relative interface burial",side=4,line=2,las=3,cex=0.7)
+		axis(2,at=c(.5,5.5,10.5),labels=c("0%","50%","100%"))
+		mtext("relative surface accessibility",side=2,line=2,las=3,cex=0.7)
+		par(op)
+
+	}
+	
+	return(invisible(NULL))
+}