-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathConvergePathway2Level2.pl
executable file
·111 lines (93 loc) · 2.89 KB
/
ConvergePathway2Level2.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#/usr/bin/perl
use strict;
use warnings;
local $SIG{__WARN__} = sub { die "ERROR in $0: ", $_[0] };
use Cwd 'abs_path';
use Getopt::Long;
use Data::Dumper;
#use Statistics::R;
(my $fmapPath = abs_path($0)) =~ s/\/[^\/]*$//;
GetOptions('h' => \(my $help = ''),
'p=s' => \(my $orthology2pathwayFile = "$fmapPath/FMAP_data/KEGG_Pathway2Level2.txt"),
'd=s' => \(my $pathwayDefinitionFile = "$fmapPath/FMAP_data/KEGG_module.txt"),
);
if($help || scalar(@ARGV) == 0) {
die <<EOF;
Usage: perl ConvergePathway2Level2.pl all.merged.abundance.KeepID.Pathway.txt > all.merged.abundance.KeepID.Pathway.Level2.txt
Options: -h display this help message
EOF
}
my ($input_file) = @ARGV;
foreach($input_file, $orthology2pathwayFile, $pathwayDefinitionFile) {
die "ERROR in $0: '$_' is not readable.\n" unless(-r $_);
}
sub sum_array{
my ($array1_ref, $array2_ref) = @_;
my @array1 = @{ $array1_ref};
my @array2 = @{ $array2_ref};
my @array_sum = ();
my $max = ($#array1 > $#array2) ? $#array1 : $#array2;
@array_sum = map { $array1[$_] + $array2[$_] } (0..$max);
return @array_sum;
}
my $header = '';
my (%KO_abundance, %module_abundance, %module2KO) = ();
open (FH, "<$orthology2pathwayFile") or die $!;
while(<FH>){
chomp;
my @array = split(/\t/);
if (!defined($module2KO{$array[1]})){
$module2KO{$array[1]} = $array[0];
}
else{
$module2KO{$array[1]} = $module2KO{$array[1]}. ','. $array[0];
}
}
close(FH);
open (FH2, "<$input_file") or die $!;
while(<FH2>){
chomp;
if ($. == 1){
$header = $_;
next;
}
my @array = split(/\t/);
my $KO_ID = shift(@array);
#print "AAAAAA", $KO_ID, "\n";
$KO_abundance{$KO_ID} = [@array];
}
close(FH2);
for (sort keys %KO_abundance ) {
my @value_array = @{$KO_abundance{$_}};
}
my %module_KO_counts = ();
for my $module_ID (sort keys %module2KO ) {
my @array = split (/,/, $module2KO{$module_ID});
my $module_KO_count = $#array + 1;
$module_KO_counts{$module_ID} = $module_KO_count;
my $element_count = 0;
foreach my $element2 (@array){
if (exists $KO_abundance{$element2}){
$element_count ++;
}
}
foreach my $element (@array){
if (defined $KO_abundance{$element}){
if (!defined $module_abundance{$module_ID}){
@{$module_abundance{$module_ID}} = @{$KO_abundance{$element}};
}else{
@{$module_abundance{$module_ID}} = sum_array(\@{$module_abundance{$module_ID}}, \@{$KO_abundance{$element}});
}
}else{
next;
}
}
}
#divide by the participated KO #.
print $header, "\n";
for my $module_existed (sort keys %module_abundance){
my @normalized_module_abundance = map {($_ / $module_KO_counts{$module_existed})} @{$module_abundance{$module_existed}};
#Use the non-normalized abundance for Level1 and Level2 KEGG annotation.
print $module_existed, "\t", join("\t", @{$module_abundance{$module_existed}}), "\n";
#print $module_existed, "\t", join("\t", @normalized_module_abundance), "\n";
}