Skip to content

Commit

Permalink
fixed bug in compare_clusters.pl when .cluster_list file is not parse…
Browse files Browse the repository at this point in the history
…d, due to previous changes in find_taxa_FASTA_array_headers
  • Loading branch information
eead-csic-compbio committed Aug 28, 2017
1 parent 6b7cff9 commit f7983b9
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 14 deletions.
2 changes: 1 addition & 1 deletion CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -216,4 +216,4 @@
17082017: updated manuals and plot_matrix_heatmap.sh with options -r (remove column names and cell contents) and -k (set name for color key X-axis)
19082017: added options -d (max no. decimals) and -x (filter matrix with regex) to plot_matrix_heatmap.sh
28082017: added parse_pangenome_matrix.pl -x to compute cluster intersection between taxa in a pangenome matrix (thanks Sean and John!)

28082017: fixed bug in compare_clusters.pl when .cluster_list file is not parsed, due to previous changes in find_taxa_FASTA_array_headers (thanks Audrey Bioteau)
32 changes: 19 additions & 13 deletions compare_clusters.pl
Original file line number Diff line number Diff line change
Expand Up @@ -232,26 +232,36 @@
next if(-d "$dir/$file"); #print "$dir/$file\n";

# read sequences in each cluster
my ($clusterkey,$cluster_data,$n_of_cluster_seqs) = ('','',0);
my ($clusterkey,$cluster_data,$n_of_cluster_seqs,$taxon_name) = ('','',0);
my (@choppedseqs,@clusterseqs,%cluster_taxa,@gis,@neighbors,@sorted_taxa);
my $cluster_ref = read_FASTA_file_array("$dir/$file");

if($taxa{$file}) # previosly read from cluster list file
if($taxa{$file}) # previously read from .cluster_list file
{
%cluster_taxa = %{$taxa{$file}};
delete($cluster_taxa{'sorted_taxa'}); # otherwise it would count as one extra taxa; it is conserved in %taxa
delete($cluster_taxa{'sorted_taxa'}); # otherwise it would count as one extra taxa; conserved in %taxa
#Uncultured_bacterium_plasmid_pRSB205.gb 1
}
else # automatically extracted from headers, error prone
{
%cluster_taxa = find_taxa_FASTA_array_headers($cluster_ref,1);
my %cluster_taxa_in_headers = find_taxa_FASTA_array_headers($cluster_ref,1);

foreach $taxon (keys(%cluster_taxa_in_headers))
{
$taxon_name = $taxon;
$taxon_name =~ s/\[|\]//g;
$cluster_taxa{$taxon_name} = $cluster_taxa_in_headers{$taxon}{'SIZE'};
}

foreach $seq (0 .. $#{$cluster_ref})
{
foreach $taxon (keys(%cluster_taxa))
foreach $taxon (keys(%cluster_taxa_in_headers))
{
if(grep(/^$seq$/,@{$cluster_taxa{$taxon}{'MEMBERS'}}))
if(grep(/^$seq$/,@{$cluster_taxa_in_headers{$taxon}{'MEMBERS'}}))
{
$taxon =~ s/\[|\]//g;
push(@{$taxa{$file}{'sorted_taxa'}},$taxon);
$taxon_name = $taxon;
$taxon_name =~ s/\[|\]//g;
push(@{$taxa{$file}{'sorted_taxa'}},$taxon_name);
}
}
}
Expand Down Expand Up @@ -470,7 +480,6 @@
my @intersection_keys;
foreach my $key (keys(%stats))
{

# intersection steps
next if($stats{$key}{'total'} != $n_of_dirs);

Expand Down Expand Up @@ -596,10 +605,7 @@
my $pangenome_fasta_file = $INP_output_dir . "/pangenome_matrix$params\.fasta";
my $pangenome_matrix_file = $INP_output_dir . "/pangenome_matrix$params\.tab";

# 1) ordena taxa por clustering jerarquico ,de la matriz pangenomica
# codigo en python en collective intelligence para hacer clusters y pintar dendrograma
# 2) ordena clusters (en horizontal) de mas frecuentes a menos, de core a pan

# 1) sort clusters
my @taxon_names = keys(%pangemat);
my (%cluster_names,$cluster_name,$file_number,%file_name);
for($taxon=0;$taxon<scalar(@taxon_names);$taxon++)
Expand Down

0 comments on commit f7983b9

Please sign in to comment.