From 44d504957ca80a7d8b066572591fc8a1064fc0d9 Mon Sep 17 00:00:00 2001 From: vinuesa Date: Sat, 14 Oct 2017 17:37:52 -0500 Subject: [PATCH 1/6] added options -a and -X to control leaf label angle and character eXpansion --- plot_matrix_heatmap.sh | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/plot_matrix_heatmap.sh b/plot_matrix_heatmap.sh index 38736be..34d9a9a 100755 --- a/plot_matrix_heatmap.sh +++ b/plot_matrix_heatmap.sh @@ -9,7 +9,8 @@ #: OUTPUT: svg and pdf; png not implemented yet progname=${0##*/} # plot_matrix_heatmap.sh -VERSION='v0.5_18Aug17' # added options -d (max no. decimals) and -x (filter matrix with regex) +VERSION='v0.6_14Oct17' # added options -X (charExp) and -a (label rotation angle) + #'v0.5_18Aug17' # added options -d (max no. decimals) and -x (filter matrix with regex) #'v0.4_17Aug17' # added option -r to remove column names and cell contents, and -k #'v0.3_13Apr16' # added option -c to filter input matrix by a maximum similarity cut-off value # to reduce excessive redundancy. Improved the help text printed with -M @@ -41,6 +42,7 @@ function print_help() -d maximum number of decimals in matrix display [0-2; def: $decimals] II) tweak the graphical output: + -a angle to rotate node angles [def $angle] -t text for plot title [def input_tab_file_name] -m margins_horizontal [def $margin_hor] -v margins_vertical [def $margin_vert] @@ -51,6 +53,7 @@ function print_help() -C do not reorder clusters [def reorder clusters and plot dendrogram] -r remove column names and cell contents [def names and cell contents are printed] -k text for scale X-axis [def "Value"] + -X character expansion factor [def $charExp] RUN NJ ANALYSIS using ANI matrix (average identity matrix) generated by get_homologues.pl -A -t 0 -M|G -N will compute a distance matrix from the input similarity matrix @@ -64,7 +67,7 @@ function print_help() -M prints gplot installation instructions and further usage information EXAMPLE: - $progname -i Avg_identity.tab -c 98.5 -t "Genus X ANIb (OMCL all clusters)" -N -o pdf -m 22 -v 22 -p 20 -H 20 -W 30 -x 'Smalt|Smc' + $progname -i Avg_identity.tab -c 98 -t "Genus X ANIb (OMCL all clusters)" -N -o pdf -m 22 -v 22 -p 20 -H 20 -W 30 -x 'Smalt|Smc' -d 1 -a 45 -X 0.9 #------------------------------------------------------------------------------------------------------------------ AIM: Plot ordered heatmaps with row and col. dendrogram, from squared numeric (distance or presence-absence) matrix, @@ -179,14 +182,18 @@ reorder_clusters=1 remove_colnames=0 key_xaxis="Value" decimals=0 +charExp=1.0 +angle=45 subset_matrix=0 # See bash cookbook 13.1 and 13.2 -while getopts ':c:i:d:t:m:o:p:v:x:H:W:k:hrMNC?:' OPTIONS +while getopts ':a:c:i:d:t:m:o:p:v:x:X:H:W:k:hrMNC?:' OPTIONS do case $OPTIONS in + a) angle=$OPTARG + ;; c) sim_cutoff=$OPTARG ;; d) decimals=$OPTARG @@ -217,6 +224,8 @@ do ;; x) regex=$OPTARG ;; + X) charExp=$OPTARG + ;; C) reorder_clusters=0 ;; \:) printf "argument missing from -%s option\n" $OPTARG @@ -270,6 +279,7 @@ cat << PARAMS input tab_file = $tab_file | sim_cutoff = $sim_cutoff | max_decimals = $decimals subset_matrix = $subset_matrix | regex = $regex text=$text|margin_hor=$margin_hor|margin_vert=$margin_vert|points=$points + angle=$angle|charExp=$charExp width=$width|height=$height|outformat=$outformat reorder_clusters=$reorder_clusters|remove_colnames=$remove_colnames|key_xaxis=$key_xaxis|do_bioNJ=$do_nj @@ -307,12 +317,8 @@ if($subset_matrix > 0 ){ include_list <- grep("$regex", rownames(mat_dat)) mat_dat <- mat_dat[include_list, ] mat_dat <- mat_dat[,include_list] - #mat_dat <- f.mat2 - #rm(f.mat) } - - if($sim_cutoff < 100) { tmp_mat = mat_dat @@ -325,26 +331,30 @@ if($reorder_clusters > 0){ if($remove_colnames > 0){ $outformat("$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize) heatmap.2(mat_dat, main="$text", notecol="black", density.info="none", key.xlab="$key_xaxis", - trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), labCol=F) + trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), labCol=F, + cexRow=$charExp, cexCol=$charExp, srtRow=$angle, srtCol=$angle) dev.off() } else { $outformat("$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize) heatmap.2(mat_dat, cellnote=mat_dat, main="$text", notecol="black", density.info="none", key.xlab="$key_xaxis", - trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5)) + trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), + cexRow=$charExp, cexCol=$charExp, srtRow=$angle, srtCol=$angle) dev.off() } } else { if($remove_colnames > 0){ $outformat("$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize) heatmap.2(mat_dat, main="$text", notecol="black", density.info="none", labCol=F, key.xlab="$key_xaxis", - trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), dendrogram = "row", Colv = FALSE) + trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), dendrogram = "row", Colv = FALSE, + cexRow=$charExp, cexCol=$charExp, srtRow=$angle, srtCol=$angle) dev.off() } else { $outformat("$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize) heatmap.2(mat_dat, cellnote=mat_dat, main="$text", notecol="black", density.info="none", key.xlab="$key_xaxis", - trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), dendrogram = "row", Colv = FALSE) + trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), dendrogram = "row", Colv = FALSE, + cexRow=$charExp, cexCol=$charExp, srtRow=$angle, srtCol=$angle) dev.off() } } From ee4a7ead57c18f56e70ee7925c6117993ae39f47 Mon Sep 17 00:00:00 2001 From: vinuesa Date: Sat, 14 Oct 2017 18:17:34 -0500 Subject: [PATCH 2/6] v0.5_14Oct17; added options -A and -X to control leaf label angle and character eXpansion --- hcluster_matrix.sh | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/hcluster_matrix.sh b/hcluster_matrix.sh index d15ac28..cf40515 100755 --- a/hcluster_matrix.sh +++ b/hcluster_matrix.sh @@ -12,7 +12,9 @@ progname=${0##*/} -VERSION='0.4_7Sep17' # v0.4_7Sep17; added options -x to select specific rows (genomes) +VERSION='0.5_14Oct17' #v'0.5_14Oct17'; added options -A and -X to control the angle + # and character eXpansion factor of leaf labels + #'0.4_7Sep17' # v0.4_7Sep17; added options -x to select specific rows (genomes) # from the input pangenome_matrix_t0.tab # -c <0|1> to print or not distances in heatmap cells # -f maximum number of decimals in matrix display (if -c 1) @@ -54,14 +56,19 @@ function print_help() -W ouptupt device width [def:$width] -N print Notes and exit [flag] + -A angle to rotate leaf labels [def $angle] + -X leaf label character expansion factor [def $charExp] + + Select genomes from input pangenome_matrix_t0.tab using regular expressions -x regex, like: 'Escherichia|Salmonella' [def $regex] + EXAMPLE: $progname -i pangenome_matrix_t0.tab -t "Pan-genome tree for Genus X" -a complete -d manhattan -o pdf -x 'maltoph|genosp' AIM: compute a distance matrix from a pangenome_matrix.tab file produced after running - get_homologues.pl and compare_clusters.pl with options -t 0 -m . + get_homologues.pl and compare_clusters.pl with options -t 0 -m . The pangenome_matrix.tab file processed by hclust(), and heatmap.2() OUTPUT: a newick file with extension .ph and svg|pdf output of hclust and heatmap.2 calls @@ -186,16 +193,22 @@ algorithm=ward.D2 distance=gower decimals=2 +charExp=1.0 +angle=45 +#colTax=1 + subset_matrix=0 # See bash cookbook 13.1 and 13.2 -while getopts ':a:c:i:d:t:m:o:p:f:v:x:H:W:R:hND?:' OPTIONS +while getopts ':a:A:c:i:d:t:m:o:p:f:v:x:X:H:W:R:hND?:' OPTIONS do case $OPTIONS in a) algorithm=$OPTARG ;; + A) angle=$OPTARG + ;; c) cell_note=$OPTARG ;; d) distance=$OPTARG @@ -216,6 +229,8 @@ do ;; x) regex=$OPTARG ;; + X) charExp=$OPTARG + ;; C) reorder_clusters=0 ;; H) height=$OPTARG @@ -294,6 +309,7 @@ cat << PARAMS distance:$distance|dist_cutoff:$dist_cutoff|hclustering_meth:$algorithm|cell_note:$cell_note text:$text|margin_hor:$margin_hor|margin_vert:$margin_vert|points:$points width:$width|height:$height|outformat:$outformat + angle:$angle|charExp:$charExp ############################################################################################## PARAMS @@ -348,14 +364,20 @@ dev.off() if($cell_note == 0){ $outformat(file="$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize) - heatmap.2(as.matrix(my_dist), main="$text $distance dist.", notecol="black", density.info="none", trace="none", dendrogram="row", margins=c($margin_vert,$margin_hor), lhei = c(1,5)) + heatmap.2(as.matrix(my_dist), main="$text $distance dist.", notecol="black", density.info="none", trace="none", dendrogram="row", + margins=c($margin_vert,$margin_hor), lhei = c(1,5), + cexRow=$charExp, cexCol=$charExp, srtRow=$angle, srtCol=$angle) +) dev.off() } if($cell_note == 1){ $outformat(file="$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize) - heatmap.2(as.matrix(my_dist), cellnote=round(as.matrix(my_dist),$decimals), main="$text $distance dist.", notecol="black", density.info="none", trace="none", dendrogram="row", margins=c($margin_vert,$margin_hor), lhei = c(1,5)) - dev.off() + heatmap.2(as.matrix(my_dist), cellnote=round(as.matrix(my_dist),$decimals), main="$text $distance dist.", + notecol="black", density.info="none", trace="none", dendrogram="row", + margins=c($margin_vert,$margin_hor), lhei = c(1,5), + cexRow=$charExp, cexCol=$charExp, srtRow=$angle, srtCol=$angle) + dev.off() } RCMD From b9d3381f3e24dcdff264d13969aeaf4d3f6eb51a Mon Sep 17 00:00:00 2001 From: vinuesa Date: Sat, 14 Oct 2017 18:30:57 -0500 Subject: [PATCH 3/6] v0.5_14Oct17; added options -A and -X to control leaf label angle and character eXpansion --- hcluster_matrix.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/hcluster_matrix.sh b/hcluster_matrix.sh index cf40515..122b74e 100755 --- a/hcluster_matrix.sh +++ b/hcluster_matrix.sh @@ -367,7 +367,6 @@ if($cell_note == 0){ heatmap.2(as.matrix(my_dist), main="$text $distance dist.", notecol="black", density.info="none", trace="none", dendrogram="row", margins=c($margin_vert,$margin_hor), lhei = c(1,5), cexRow=$charExp, cexCol=$charExp, srtRow=$angle, srtCol=$angle) -) dev.off() } From 94567e16a8d6309b0b63a69bea9366c4a9c38253 Mon Sep 17 00:00:00 2001 From: vinuesa Date: Sun, 15 Oct 2017 11:27:24 -0500 Subject: [PATCH 4/6] added options -A and -X to control the angle and character eXpansion factor of leaf labels --- hcluster_matrix.sh | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/hcluster_matrix.sh b/hcluster_matrix.sh index 122b74e..53a1a38 100755 --- a/hcluster_matrix.sh +++ b/hcluster_matrix.sh @@ -56,7 +56,7 @@ function print_help() -W ouptupt device width [def:$width] -N print Notes and exit [flag] - -A angle to rotate leaf labels [def $angle] + -A <'integer,integer'> angle to rotate leaf labels [def $angle] -X leaf label character expansion factor [def $charExp] @@ -65,7 +65,7 @@ function print_help() EXAMPLE: - $progname -i pangenome_matrix_t0.tab -t "Pan-genome tree for Genus X" -a complete -d manhattan -o pdf -x 'maltoph|genosp' + $progname -i pangenome_matrix_t0.tab -t "Pan-genome tree" -a ward.D2 -d euclidean -o pdf -x 'maltoph|genosp' -A 'NULL,45' -X 0.8 AIM: compute a distance matrix from a pangenome_matrix.tab file produced after running get_homologues.pl and compare_clusters.pl with options -t 0 -m . @@ -194,7 +194,7 @@ distance=gower decimals=2 charExp=1.0 -angle=45 +angle='NULL,NULL' #colTax=1 subset_matrix=0 @@ -309,7 +309,7 @@ cat << PARAMS distance:$distance|dist_cutoff:$dist_cutoff|hclustering_meth:$algorithm|cell_note:$cell_note text:$text|margin_hor:$margin_hor|margin_vert:$margin_vert|points:$points width:$width|height:$height|outformat:$outformat - angle:$angle|charExp:$charExp + angle:"$angle"|charExp:$charExp ############################################################################################## PARAMS @@ -323,6 +323,9 @@ tree_file=${tree_file//\//_} newick_file="hclust_${distance}-${algorithm}_${tab_file%.*}_tree.ph" newick_file=${newick_file//\//_} +aRow=$(echo "$angle" | cut -d, -f1) +aCol=$(echo "$angle" | cut -d, -f2) + echo ">>> Plotting files $tree_file and $heatmap_outfile ..." echo " this will take some time, please be patient" echo @@ -346,6 +349,8 @@ if($subset_matrix > 0 ){ table <- table[include_list, ] } + + mat_dat <- data.matrix(table[,2:ncol(table)]) rnames <- table[,1] @@ -366,7 +371,8 @@ if($cell_note == 0){ $outformat(file="$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize) heatmap.2(as.matrix(my_dist), main="$text $distance dist.", notecol="black", density.info="none", trace="none", dendrogram="row", margins=c($margin_vert,$margin_hor), lhei = c(1,5), - cexRow=$charExp, cexCol=$charExp, srtRow=$angle, srtCol=$angle) + cexRow=$charExp, cexCol=$charExp, + srtRow=$aRow, srtCol=$aCol) dev.off() } @@ -375,7 +381,8 @@ if($cell_note == 1){ heatmap.2(as.matrix(my_dist), cellnote=round(as.matrix(my_dist),$decimals), main="$text $distance dist.", notecol="black", density.info="none", trace="none", dendrogram="row", margins=c($margin_vert,$margin_hor), lhei = c(1,5), - cexRow=$charExp, cexCol=$charExp, srtRow=$angle, srtCol=$angle) + cexRow=$charExp, cexCol=$charExp, + srtRow=$aRow, srtCol=$aCol) dev.off() } From 9090b2f4bd63ec34e646aad047e226931d90466a Mon Sep 17 00:00:00 2001 From: vinuesa Date: Sun, 15 Oct 2017 11:33:12 -0500 Subject: [PATCH 5/6] added options -a and -X; improved documentation --- plot_matrix_heatmap.sh | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/plot_matrix_heatmap.sh b/plot_matrix_heatmap.sh index 34d9a9a..b6fc10c 100755 --- a/plot_matrix_heatmap.sh +++ b/plot_matrix_heatmap.sh @@ -42,18 +42,18 @@ function print_help() -d maximum number of decimals in matrix display [0-2; def: $decimals] II) tweak the graphical output: - -a angle to rotate node angles [def $angle] - -t text for plot title [def input_tab_file_name] - -m margins_horizontal [def $margin_hor] - -v margins_vertical [def $margin_vert] - -o output file format [def $outformat] - -p points for plotting device [def $points] - -H output device height [def $height] - -W output device width [def $width] - -C do not reorder clusters [def reorder clusters and plot dendrogram] - -r remove column names and cell contents [def names and cell contents are printed] - -k text for scale X-axis [def "Value"] - -X character expansion factor [def $charExp] + -a <'integer,integer'> angle to rotate row,col labels [def $angle] + -t text for plot title [def input_tab_file_name] + -m margins_horizontal [def $margin_hor] + -v margins_vertical [def $margin_vert] + -o output file format [def $outformat] + -p points for plotting device [def $points] + -H output device height [def $height] + -W output device width [def $width] + -C do not reorder clusters [def reorder clusters and plot dendrogram] + -r remove column names and cell contents [def names and cell contents are printed] + -k text for scale X-axis [def "Value"] + -X character expansion factor [def $charExp] RUN NJ ANALYSIS using ANI matrix (average identity matrix) generated by get_homologues.pl -A -t 0 -M|G -N will compute a distance matrix from the input similarity matrix @@ -67,7 +67,7 @@ function print_help() -M prints gplot installation instructions and further usage information EXAMPLE: - $progname -i Avg_identity.tab -c 98 -t "Genus X ANIb (OMCL all clusters)" -N -o pdf -m 22 -v 22 -p 20 -H 20 -W 30 -x 'Smalt|Smc' -d 1 -a 45 -X 0.9 + $progname -i Avg_identity.tab -c 99.1 -t "ANIb (OMCL core-clusters)" -N -o pdf -m 22 -v 22 -p 20 -H 20 -W 30 -x 'Smalt|Smc' -d 1 -a 'NULL,45' -X 0.9 #------------------------------------------------------------------------------------------------------------------ AIM: Plot ordered heatmaps with row and col. dendrogram, from squared numeric (distance or presence-absence) matrix, @@ -183,7 +183,7 @@ remove_colnames=0 key_xaxis="Value" decimals=0 charExp=1.0 -angle=45 +angle='NULL,NULL' subset_matrix=0 @@ -300,6 +300,11 @@ else nj_tree="${tab_file%.*}_BioNJ.ph" fi +aRow=$(echo "$angle" | cut -d, -f1) +aCol=$(echo "$angle" | cut -d, -f2) + + + # 2) call R using a heredoc and write the resulting script to file R --no-save -q < ${progname%.*}_script_run_at_${start_time}.R library("gplots") @@ -332,14 +337,14 @@ if($reorder_clusters > 0){ $outformat("$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize) heatmap.2(mat_dat, main="$text", notecol="black", density.info="none", key.xlab="$key_xaxis", trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), labCol=F, - cexRow=$charExp, cexCol=$charExp, srtRow=$angle, srtCol=$angle) + cexRow=$charExp, cexCol=$charExp, srtRow=$aRow, srtCol=$aCol) dev.off() } else { $outformat("$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize) heatmap.2(mat_dat, cellnote=mat_dat, main="$text", notecol="black", density.info="none", key.xlab="$key_xaxis", trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), - cexRow=$charExp, cexCol=$charExp, srtRow=$angle, srtCol=$angle) + cexRow=$charExp, cexCol=$charExp, srtRow=$aRow, srtCol=$aCol) dev.off() } } else { @@ -347,14 +352,14 @@ if($reorder_clusters > 0){ $outformat("$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize) heatmap.2(mat_dat, main="$text", notecol="black", density.info="none", labCol=F, key.xlab="$key_xaxis", trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), dendrogram = "row", Colv = FALSE, - cexRow=$charExp, cexCol=$charExp, srtRow=$angle, srtCol=$angle) + cexRow=$charExp, cexCol=$charExp, srtRow=$aRow, srtCol=$aCol) dev.off() } else { $outformat("$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize) heatmap.2(mat_dat, cellnote=mat_dat, main="$text", notecol="black", density.info="none", key.xlab="$key_xaxis", trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), dendrogram = "row", Colv = FALSE, - cexRow=$charExp, cexCol=$charExp, srtRow=$angle, srtCol=$angle) + cexRow=$charExp, cexCol=$charExp, srtRow=$aRow, srtCol=$aCol) dev.off() } } From efef1689853b27999aae1468fb317f2d2cdbcf94 Mon Sep 17 00:00:00 2001 From: vinuesa Date: Sun, 15 Oct 2017 11:35:07 -0500 Subject: [PATCH 6/6] added options -a and -X; improved documentation --- hcluster_matrix.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hcluster_matrix.sh b/hcluster_matrix.sh index 53a1a38..891d59a 100755 --- a/hcluster_matrix.sh +++ b/hcluster_matrix.sh @@ -44,7 +44,8 @@ function print_help() OPTIONAL: -a algorithm/method for clustering [ward.D|ward.D2|single|complete|average(=UPGMA)] [def $algorithm] - -c 1|0 to display or not the distace values in the heatmap cells [def:$cell_note] + -c 1|0 to display or not the distace values [def:$cell_note] + in the heatmap cells -d distance type [euclidean|manhattan|gower] [def $distance] -f maximum number of decimals in matrix display [1,2; def:$decimals] -t text for Main title [def:$text] @@ -56,11 +57,11 @@ function print_help() -W ouptupt device width [def:$width] -N print Notes and exit [flag] - -A <'integer,integer'> angle to rotate leaf labels [def $angle] + -A <'integer,integer'> angle to rotate row,col labels [def $angle] -X leaf label character expansion factor [def $charExp] - Select genomes from input pangenome_matrix_t0.tab using regular expressions + Select genomes from input pangenome_matrix_t0.tab using regular expressions: -x regex, like: 'Escherichia|Salmonella' [def $regex]