Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
eead-csic-compbio committed Oct 23, 2017
2 parents e40acb2 + efef168 commit 3ed63db
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 30 deletions.
47 changes: 38 additions & 9 deletions hcluster_matrix.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@


progname=${0##*/}
VERSION='0.4_7Sep17' # v0.4_7Sep17; added options -x <regex> to select specific rows (genomes)
VERSION='0.5_14Oct17' #v'0.5_14Oct17'; added options -A and -X to control the angle
# and character eXpansion factor of leaf labels
#'0.4_7Sep17' # v0.4_7Sep17; added options -x <regex> to select specific rows (genomes)
# from the input pangenome_matrix_t0.tab
# -c <0|1> to print or not distances in heatmap cells
# -f <int> maximum number of decimals in matrix display (if -c 1)
Expand Down Expand Up @@ -42,7 +44,8 @@ function print_help()
OPTIONAL:
-a <string> algorithm/method for clustering
[ward.D|ward.D2|single|complete|average(=UPGMA)] [def $algorithm]
-c <int> 1|0 to display or not the distace values in the heatmap cells [def:$cell_note]
-c <int> 1|0 to display or not the distace values [def:$cell_note]
in the heatmap cells
-d <string> distance type [euclidean|manhattan|gower] [def $distance]
-f <int> maximum number of decimals in matrix display [1,2; def:$decimals]
-t <string> text for Main title [def:$text]
Expand All @@ -54,14 +57,19 @@ function print_help()
-W <integer> ouptupt device width [def:$width]
-N <flag> print Notes and exit [flag]
Select genomes from input pangenome_matrix_t0.tab using regular expressions
-A <'integer,integer'> angle to rotate row,col labels [def $angle]
-X <float> leaf label character expansion factor [def $charExp]
Select genomes from input pangenome_matrix_t0.tab using regular expressions:
-x <string> regex, like: 'Escherichia|Salmonella' [def $regex]
EXAMPLE:
$progname -i pangenome_matrix_t0.tab -t "Pan-genome tree for Genus X" -a complete -d manhattan -o pdf -x 'maltoph|genosp'
$progname -i pangenome_matrix_t0.tab -t "Pan-genome tree" -a ward.D2 -d euclidean -o pdf -x 'maltoph|genosp' -A 'NULL,45' -X 0.8
AIM: compute a distance matrix from a pangenome_matrix.tab file produced after running
get_homologues.pl and compare_clusters.pl with options -t 0 -m .
get_homologues.pl and compare_clusters.pl with options -t 0 -m .
The pangenome_matrix.tab file processed by hclust(), and heatmap.2()
OUTPUT: a newick file with extension .ph and svg|pdf output of hclust and heatmap.2 calls
Expand Down Expand Up @@ -186,16 +194,22 @@ algorithm=ward.D2
distance=gower
decimals=2

charExp=1.0
angle='NULL,NULL'
#colTax=1

subset_matrix=0


# See bash cookbook 13.1 and 13.2
while getopts ':a:c:i:d:t:m:o:p:f:v:x:H:W:R:hND?:' OPTIONS
while getopts ':a:A:c:i:d:t:m:o:p:f:v:x:X:H:W:R:hND?:' OPTIONS
do
case $OPTIONS in

a) algorithm=$OPTARG
;;
A) angle=$OPTARG
;;
c) cell_note=$OPTARG
;;
d) distance=$OPTARG
Expand All @@ -216,6 +230,8 @@ do
;;
x) regex=$OPTARG
;;
X) charExp=$OPTARG
;;
C) reorder_clusters=0
;;
H) height=$OPTARG
Expand Down Expand Up @@ -294,6 +310,7 @@ cat << PARAMS
distance:$distance|dist_cutoff:$dist_cutoff|hclustering_meth:$algorithm|cell_note:$cell_note
text:$text|margin_hor:$margin_hor|margin_vert:$margin_vert|points:$points
width:$width|height:$height|outformat:$outformat
angle:"$angle"|charExp:$charExp
##############################################################################################
PARAMS
Expand All @@ -307,6 +324,9 @@ tree_file=${tree_file//\//_}
newick_file="hclust_${distance}-${algorithm}_${tab_file%.*}_tree.ph"
newick_file=${newick_file//\//_}

aRow=$(echo "$angle" | cut -d, -f1)
aCol=$(echo "$angle" | cut -d, -f2)

echo ">>> Plotting files $tree_file and $heatmap_outfile ..."
echo " this will take some time, please be patient"
echo
Expand All @@ -330,6 +350,8 @@ if($subset_matrix > 0 ){
table <- table[include_list, ]
}
mat_dat <- data.matrix(table[,2:ncol(table)])
rnames <- table[,1]
Expand All @@ -348,14 +370,21 @@ dev.off()
if($cell_note == 0){
$outformat(file="$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize)
heatmap.2(as.matrix(my_dist), main="$text $distance dist.", notecol="black", density.info="none", trace="none", dendrogram="row", margins=c($margin_vert,$margin_hor), lhei = c(1,5))
heatmap.2(as.matrix(my_dist), main="$text $distance dist.", notecol="black", density.info="none", trace="none", dendrogram="row",
margins=c($margin_vert,$margin_hor), lhei = c(1,5),
cexRow=$charExp, cexCol=$charExp,
srtRow=$aRow, srtCol=$aCol)
dev.off()
}
if($cell_note == 1){
$outformat(file="$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize)
heatmap.2(as.matrix(my_dist), cellnote=round(as.matrix(my_dist),$decimals), main="$text $distance dist.", notecol="black", density.info="none", trace="none", dendrogram="row", margins=c($margin_vert,$margin_hor), lhei = c(1,5))
dev.off()
heatmap.2(as.matrix(my_dist), cellnote=round(as.matrix(my_dist),$decimals), main="$text $distance dist.",
notecol="black", density.info="none", trace="none", dendrogram="row",
margins=c($margin_vert,$margin_hor), lhei = c(1,5),
cexRow=$charExp, cexCol=$charExp,
srtRow=$aRow, srtCol=$aCol)
dev.off()
}
RCMD
Expand Down
57 changes: 36 additions & 21 deletions plot_matrix_heatmap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
#: OUTPUT: svg and pdf; png not implemented yet

progname=${0##*/} # plot_matrix_heatmap.sh
VERSION='v0.5_18Aug17' # added options -d (max no. decimals) and -x (filter matrix with regex)
VERSION='v0.6_14Oct17' # added options -X (charExp) and -a (label rotation angle)
#'v0.5_18Aug17' # added options -d (max no. decimals) and -x (filter matrix with regex)
#'v0.4_17Aug17' # added option -r to remove column names and cell contents, and -k
#'v0.3_13Apr16' # added option -c to filter input matrix by a maximum similarity cut-off value
# to reduce excessive redundancy. Improved the help text printed with -M
Expand Down Expand Up @@ -41,16 +42,18 @@ function print_help()
-d <int> maximum number of decimals in matrix display [0-2; def: $decimals]
II) tweak the graphical output:
-t <string> text for plot title [def input_tab_file_name]
-m <integer> margins_horizontal [def $margin_hor]
-v <integer> margins_vertical [def $margin_vert]
-o <string> output file format [def $outformat]
-p <integer> points for plotting device [def $points]
-H <integer> output device height [def $height]
-W <integer> output device width [def $width]
-C <flag> do not reorder clusters [def reorder clusters and plot dendrogram]
-r <flag> remove column names and cell contents [def names and cell contents are printed]
-k <string> text for scale X-axis [def "Value"]
-a <'integer,integer'> angle to rotate row,col labels [def $angle]
-t <string> text for plot title [def input_tab_file_name]
-m <integer> margins_horizontal [def $margin_hor]
-v <integer> margins_vertical [def $margin_vert]
-o <string> output file format [def $outformat]
-p <integer> points for plotting device [def $points]
-H <integer> output device height [def $height]
-W <integer> output device width [def $width]
-C <flag> do not reorder clusters [def reorder clusters and plot dendrogram]
-r <flag> remove column names and cell contents [def names and cell contents are printed]
-k <string> text for scale X-axis [def "Value"]
-X <float> character expansion factor [def $charExp]
RUN NJ ANALYSIS using ANI matrix (average identity matrix) generated by get_homologues.pl -A -t 0 -M|G
-N <flag> will compute a distance matrix from the input similarity matrix
Expand All @@ -64,7 +67,7 @@ function print_help()
-M <flag> prints gplot installation instructions and further usage information
EXAMPLE:
$progname -i Avg_identity.tab -c 98.5 -t "Genus X ANIb (OMCL all clusters)" -N -o pdf -m 22 -v 22 -p 20 -H 20 -W 30 -x 'Smalt|Smc'
$progname -i Avg_identity.tab -c 99.1 -t "ANIb (OMCL core-clusters)" -N -o pdf -m 22 -v 22 -p 20 -H 20 -W 30 -x 'Smalt|Smc' -d 1 -a 'NULL,45' -X 0.9
#------------------------------------------------------------------------------------------------------------------
AIM: Plot ordered heatmaps with row and col. dendrogram, from squared numeric (distance or presence-absence) matrix,
Expand Down Expand Up @@ -179,14 +182,18 @@ reorder_clusters=1
remove_colnames=0
key_xaxis="Value"
decimals=0
charExp=1.0
angle='NULL,NULL'

subset_matrix=0

# See bash cookbook 13.1 and 13.2
while getopts ':c:i:d:t:m:o:p:v:x:H:W:k:hrMNC?:' OPTIONS
while getopts ':a:c:i:d:t:m:o:p:v:x:X:H:W:k:hrMNC?:' OPTIONS
do
case $OPTIONS in

a) angle=$OPTARG
;;
c) sim_cutoff=$OPTARG
;;
d) decimals=$OPTARG
Expand Down Expand Up @@ -217,6 +224,8 @@ do
;;
x) regex=$OPTARG
;;
X) charExp=$OPTARG
;;
C) reorder_clusters=0
;;
\:) printf "argument missing from -%s option\n" $OPTARG
Expand Down Expand Up @@ -270,6 +279,7 @@ cat << PARAMS
input tab_file = $tab_file | sim_cutoff = $sim_cutoff | max_decimals = $decimals
subset_matrix = $subset_matrix | regex = $regex
text=$text|margin_hor=$margin_hor|margin_vert=$margin_vert|points=$points
angle=$angle|charExp=$charExp
width=$width|height=$height|outformat=$outformat
reorder_clusters=$reorder_clusters|remove_colnames=$remove_colnames|key_xaxis=$key_xaxis|do_bioNJ=$do_nj
Expand All @@ -290,6 +300,11 @@ else
nj_tree="${tab_file%.*}_BioNJ.ph"
fi

aRow=$(echo "$angle" | cut -d, -f1)
aCol=$(echo "$angle" | cut -d, -f2)



# 2) call R using a heredoc and write the resulting script to file
R --no-save -q <<RCMD > ${progname%.*}_script_run_at_${start_time}.R
library("gplots")
Expand All @@ -307,12 +322,8 @@ if($subset_matrix > 0 ){
include_list <- grep("$regex", rownames(mat_dat))
mat_dat <- mat_dat[include_list, ]
mat_dat <- mat_dat[,include_list]
#mat_dat <- f.mat2
#rm(f.mat)
}
if($sim_cutoff < 100)
{
tmp_mat = mat_dat
Expand All @@ -325,26 +336,30 @@ if($reorder_clusters > 0){
if($remove_colnames > 0){
$outformat("$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize)
heatmap.2(mat_dat, main="$text", notecol="black", density.info="none", key.xlab="$key_xaxis",
trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), labCol=F)
trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), labCol=F,
cexRow=$charExp, cexCol=$charExp, srtRow=$aRow, srtCol=$aCol)
dev.off()
}
else {
$outformat("$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize)
heatmap.2(mat_dat, cellnote=mat_dat, main="$text", notecol="black", density.info="none", key.xlab="$key_xaxis",
trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5))
trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5),
cexRow=$charExp, cexCol=$charExp, srtRow=$aRow, srtCol=$aCol)
dev.off()
}
} else {
if($remove_colnames > 0){
$outformat("$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize)
heatmap.2(mat_dat, main="$text", notecol="black", density.info="none", labCol=F, key.xlab="$key_xaxis",
trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), dendrogram = "row", Colv = FALSE)
trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), dendrogram = "row", Colv = FALSE,
cexRow=$charExp, cexCol=$charExp, srtRow=$aRow, srtCol=$aCol)
dev.off()
}
else {
$outformat("$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize)
heatmap.2(mat_dat, cellnote=mat_dat, main="$text", notecol="black", density.info="none", key.xlab="$key_xaxis",
trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), dendrogram = "row", Colv = FALSE)
trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), dendrogram = "row", Colv = FALSE,
cexRow=$charExp, cexCol=$charExp, srtRow=$aRow, srtCol=$aCol)
dev.off()
}
}
Expand Down

0 comments on commit 3ed63db

Please sign in to comment.