diff --git a/hcluster_matrix.sh b/hcluster_matrix.sh index d15ac28..891d59a 100755 --- a/hcluster_matrix.sh +++ b/hcluster_matrix.sh @@ -12,7 +12,9 @@ progname=${0##*/} -VERSION='0.4_7Sep17' # v0.4_7Sep17; added options -x to select specific rows (genomes) +VERSION='0.5_14Oct17' #v'0.5_14Oct17'; added options -A and -X to control the angle + # and character eXpansion factor of leaf labels + #'0.4_7Sep17' # v0.4_7Sep17; added options -x to select specific rows (genomes) # from the input pangenome_matrix_t0.tab # -c <0|1> to print or not distances in heatmap cells # -f maximum number of decimals in matrix display (if -c 1) @@ -42,7 +44,8 @@ function print_help() OPTIONAL: -a algorithm/method for clustering [ward.D|ward.D2|single|complete|average(=UPGMA)] [def $algorithm] - -c 1|0 to display or not the distace values in the heatmap cells [def:$cell_note] + -c 1|0 to display or not the distace values [def:$cell_note] + in the heatmap cells -d distance type [euclidean|manhattan|gower] [def $distance] -f maximum number of decimals in matrix display [1,2; def:$decimals] -t text for Main title [def:$text] @@ -54,14 +57,19 @@ function print_help() -W ouptupt device width [def:$width] -N print Notes and exit [flag] - Select genomes from input pangenome_matrix_t0.tab using regular expressions + -A <'integer,integer'> angle to rotate row,col labels [def $angle] + -X leaf label character expansion factor [def $charExp] + + + Select genomes from input pangenome_matrix_t0.tab using regular expressions: -x regex, like: 'Escherichia|Salmonella' [def $regex] + EXAMPLE: - $progname -i pangenome_matrix_t0.tab -t "Pan-genome tree for Genus X" -a complete -d manhattan -o pdf -x 'maltoph|genosp' + $progname -i pangenome_matrix_t0.tab -t "Pan-genome tree" -a ward.D2 -d euclidean -o pdf -x 'maltoph|genosp' -A 'NULL,45' -X 0.8 AIM: compute a distance matrix from a pangenome_matrix.tab file produced after running - get_homologues.pl and compare_clusters.pl with options -t 0 -m . + get_homologues.pl and compare_clusters.pl with options -t 0 -m . The pangenome_matrix.tab file processed by hclust(), and heatmap.2() OUTPUT: a newick file with extension .ph and svg|pdf output of hclust and heatmap.2 calls @@ -186,16 +194,22 @@ algorithm=ward.D2 distance=gower decimals=2 +charExp=1.0 +angle='NULL,NULL' +#colTax=1 + subset_matrix=0 # See bash cookbook 13.1 and 13.2 -while getopts ':a:c:i:d:t:m:o:p:f:v:x:H:W:R:hND?:' OPTIONS +while getopts ':a:A:c:i:d:t:m:o:p:f:v:x:X:H:W:R:hND?:' OPTIONS do case $OPTIONS in a) algorithm=$OPTARG ;; + A) angle=$OPTARG + ;; c) cell_note=$OPTARG ;; d) distance=$OPTARG @@ -216,6 +230,8 @@ do ;; x) regex=$OPTARG ;; + X) charExp=$OPTARG + ;; C) reorder_clusters=0 ;; H) height=$OPTARG @@ -294,6 +310,7 @@ cat << PARAMS distance:$distance|dist_cutoff:$dist_cutoff|hclustering_meth:$algorithm|cell_note:$cell_note text:$text|margin_hor:$margin_hor|margin_vert:$margin_vert|points:$points width:$width|height:$height|outformat:$outformat + angle:"$angle"|charExp:$charExp ############################################################################################## PARAMS @@ -307,6 +324,9 @@ tree_file=${tree_file//\//_} newick_file="hclust_${distance}-${algorithm}_${tab_file%.*}_tree.ph" newick_file=${newick_file//\//_} +aRow=$(echo "$angle" | cut -d, -f1) +aCol=$(echo "$angle" | cut -d, -f2) + echo ">>> Plotting files $tree_file and $heatmap_outfile ..." echo " this will take some time, please be patient" echo @@ -330,6 +350,8 @@ if($subset_matrix > 0 ){ table <- table[include_list, ] } + + mat_dat <- data.matrix(table[,2:ncol(table)]) rnames <- table[,1] @@ -348,14 +370,21 @@ dev.off() if($cell_note == 0){ $outformat(file="$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize) - heatmap.2(as.matrix(my_dist), main="$text $distance dist.", notecol="black", density.info="none", trace="none", dendrogram="row", margins=c($margin_vert,$margin_hor), lhei = c(1,5)) + heatmap.2(as.matrix(my_dist), main="$text $distance dist.", notecol="black", density.info="none", trace="none", dendrogram="row", + margins=c($margin_vert,$margin_hor), lhei = c(1,5), + cexRow=$charExp, cexCol=$charExp, + srtRow=$aRow, srtCol=$aCol) dev.off() } if($cell_note == 1){ $outformat(file="$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize) - heatmap.2(as.matrix(my_dist), cellnote=round(as.matrix(my_dist),$decimals), main="$text $distance dist.", notecol="black", density.info="none", trace="none", dendrogram="row", margins=c($margin_vert,$margin_hor), lhei = c(1,5)) - dev.off() + heatmap.2(as.matrix(my_dist), cellnote=round(as.matrix(my_dist),$decimals), main="$text $distance dist.", + notecol="black", density.info="none", trace="none", dendrogram="row", + margins=c($margin_vert,$margin_hor), lhei = c(1,5), + cexRow=$charExp, cexCol=$charExp, + srtRow=$aRow, srtCol=$aCol) + dev.off() } RCMD diff --git a/plot_matrix_heatmap.sh b/plot_matrix_heatmap.sh index 38736be..b6fc10c 100755 --- a/plot_matrix_heatmap.sh +++ b/plot_matrix_heatmap.sh @@ -9,7 +9,8 @@ #: OUTPUT: svg and pdf; png not implemented yet progname=${0##*/} # plot_matrix_heatmap.sh -VERSION='v0.5_18Aug17' # added options -d (max no. decimals) and -x (filter matrix with regex) +VERSION='v0.6_14Oct17' # added options -X (charExp) and -a (label rotation angle) + #'v0.5_18Aug17' # added options -d (max no. decimals) and -x (filter matrix with regex) #'v0.4_17Aug17' # added option -r to remove column names and cell contents, and -k #'v0.3_13Apr16' # added option -c to filter input matrix by a maximum similarity cut-off value # to reduce excessive redundancy. Improved the help text printed with -M @@ -41,16 +42,18 @@ function print_help() -d maximum number of decimals in matrix display [0-2; def: $decimals] II) tweak the graphical output: - -t text for plot title [def input_tab_file_name] - -m margins_horizontal [def $margin_hor] - -v margins_vertical [def $margin_vert] - -o output file format [def $outformat] - -p points for plotting device [def $points] - -H output device height [def $height] - -W output device width [def $width] - -C do not reorder clusters [def reorder clusters and plot dendrogram] - -r remove column names and cell contents [def names and cell contents are printed] - -k text for scale X-axis [def "Value"] + -a <'integer,integer'> angle to rotate row,col labels [def $angle] + -t text for plot title [def input_tab_file_name] + -m margins_horizontal [def $margin_hor] + -v margins_vertical [def $margin_vert] + -o output file format [def $outformat] + -p points for plotting device [def $points] + -H output device height [def $height] + -W output device width [def $width] + -C do not reorder clusters [def reorder clusters and plot dendrogram] + -r remove column names and cell contents [def names and cell contents are printed] + -k text for scale X-axis [def "Value"] + -X character expansion factor [def $charExp] RUN NJ ANALYSIS using ANI matrix (average identity matrix) generated by get_homologues.pl -A -t 0 -M|G -N will compute a distance matrix from the input similarity matrix @@ -64,7 +67,7 @@ function print_help() -M prints gplot installation instructions and further usage information EXAMPLE: - $progname -i Avg_identity.tab -c 98.5 -t "Genus X ANIb (OMCL all clusters)" -N -o pdf -m 22 -v 22 -p 20 -H 20 -W 30 -x 'Smalt|Smc' + $progname -i Avg_identity.tab -c 99.1 -t "ANIb (OMCL core-clusters)" -N -o pdf -m 22 -v 22 -p 20 -H 20 -W 30 -x 'Smalt|Smc' -d 1 -a 'NULL,45' -X 0.9 #------------------------------------------------------------------------------------------------------------------ AIM: Plot ordered heatmaps with row and col. dendrogram, from squared numeric (distance or presence-absence) matrix, @@ -179,14 +182,18 @@ reorder_clusters=1 remove_colnames=0 key_xaxis="Value" decimals=0 +charExp=1.0 +angle='NULL,NULL' subset_matrix=0 # See bash cookbook 13.1 and 13.2 -while getopts ':c:i:d:t:m:o:p:v:x:H:W:k:hrMNC?:' OPTIONS +while getopts ':a:c:i:d:t:m:o:p:v:x:X:H:W:k:hrMNC?:' OPTIONS do case $OPTIONS in + a) angle=$OPTARG + ;; c) sim_cutoff=$OPTARG ;; d) decimals=$OPTARG @@ -217,6 +224,8 @@ do ;; x) regex=$OPTARG ;; + X) charExp=$OPTARG + ;; C) reorder_clusters=0 ;; \:) printf "argument missing from -%s option\n" $OPTARG @@ -270,6 +279,7 @@ cat << PARAMS input tab_file = $tab_file | sim_cutoff = $sim_cutoff | max_decimals = $decimals subset_matrix = $subset_matrix | regex = $regex text=$text|margin_hor=$margin_hor|margin_vert=$margin_vert|points=$points + angle=$angle|charExp=$charExp width=$width|height=$height|outformat=$outformat reorder_clusters=$reorder_clusters|remove_colnames=$remove_colnames|key_xaxis=$key_xaxis|do_bioNJ=$do_nj @@ -290,6 +300,11 @@ else nj_tree="${tab_file%.*}_BioNJ.ph" fi +aRow=$(echo "$angle" | cut -d, -f1) +aCol=$(echo "$angle" | cut -d, -f2) + + + # 2) call R using a heredoc and write the resulting script to file R --no-save -q < ${progname%.*}_script_run_at_${start_time}.R library("gplots") @@ -307,12 +322,8 @@ if($subset_matrix > 0 ){ include_list <- grep("$regex", rownames(mat_dat)) mat_dat <- mat_dat[include_list, ] mat_dat <- mat_dat[,include_list] - #mat_dat <- f.mat2 - #rm(f.mat) } - - if($sim_cutoff < 100) { tmp_mat = mat_dat @@ -325,26 +336,30 @@ if($reorder_clusters > 0){ if($remove_colnames > 0){ $outformat("$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize) heatmap.2(mat_dat, main="$text", notecol="black", density.info="none", key.xlab="$key_xaxis", - trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), labCol=F) + trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), labCol=F, + cexRow=$charExp, cexCol=$charExp, srtRow=$aRow, srtCol=$aCol) dev.off() } else { $outformat("$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize) heatmap.2(mat_dat, cellnote=mat_dat, main="$text", notecol="black", density.info="none", key.xlab="$key_xaxis", - trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5)) + trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), + cexRow=$charExp, cexCol=$charExp, srtRow=$aRow, srtCol=$aCol) dev.off() } } else { if($remove_colnames > 0){ $outformat("$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize) heatmap.2(mat_dat, main="$text", notecol="black", density.info="none", labCol=F, key.xlab="$key_xaxis", - trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), dendrogram = "row", Colv = FALSE) + trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), dendrogram = "row", Colv = FALSE, + cexRow=$charExp, cexCol=$charExp, srtRow=$aRow, srtCol=$aCol) dev.off() } else { $outformat("$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize) heatmap.2(mat_dat, cellnote=mat_dat, main="$text", notecol="black", density.info="none", key.xlab="$key_xaxis", - trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), dendrogram = "row", Colv = FALSE) + trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), dendrogram = "row", Colv = FALSE, + cexRow=$charExp, cexCol=$charExp, srtRow=$aRow, srtCol=$aCol) dev.off() } }