-
Notifications
You must be signed in to change notification settings - Fork 1
/
get_annotated_regions_fromverdant.pl
68 lines (64 loc) · 1.75 KB
/
get_annotated_regions_fromverdant.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/perl -w
use strict;
#USAGE 1-plastome of guide species 2-verdant annotation file of guide species 3-Species to be annotated
my %elements;
my $plastome;
my $sid;
open my $pfile, "<", $ARGV[0]; #plastome seq
while(<$pfile>){
chomp;
if(/^>/){
$sid=$_;
}
else{
$plastome.=$_;
}
}
open my $file, "<", $ARGV[1]; #verdant annotation file
while(<$file>){
chomp;
my @tarray = split /\s+/;
if($tarray[0] !~ /\-/){
if($tarray[0] !~ /\~/){
if(/IRA/ || /IRB/ || /LSC/ || /SSC/ || /FULL/ || /intron/){
next;
}
my $seq = substr($plastome, $tarray[1]-1, ($tarray[2]-$tarray[1]+1));
if($tarray[3] eq "-"){
$seq = reverse($seq);
$seq =~ tr/ATCGatcg/TAGCtagc/;
}
$elements{$tarray[0]}=$seq;
}
}
elsif($tarray[0] =~ /^trn\w+\-\w\w\w$/){
my $seq = substr($plastome, $tarray[1]-1, ($tarray[2]-$tarray[1]+1));
if($tarray[3] eq "-"){
$seq = reverse($seq);
$seq =~ tr/ATCGatcg/TAGCtagc/;
}
$elements{$tarray[0]}=$seq;
}
elsif($tarray[0] =~ /^trn\w+\-\w\w\w_exon\d$/){
my $seq = substr($plastome, $tarray[1]-1, ($tarray[2]-$tarray[1]+1));
if($tarray[3] eq "-"){
$seq = reverse($seq);
$seq =~ tr/ATCGatcg/TAGCtagc/;
}
$elements{$tarray[0]}=$seq;
}
}
open my $outfile, ">>", $ARGV[2] . "_annotated_regions_fromverdant_genes.fsa";
open my $outfile2, ">>", $ARGV[2] . "_annotated_regions_fromverdant_trnas.fsa";
open my $outfile3, ">>", $ARGV[2] . "_annotated_regions_fromverdant_rrnas.fsa";
for my $gid (sort keys %elements){
if($gid =~ /trn/){
print $outfile2 ">$gid" . "XXX$ARGV[3]\n$elements{$gid}\n";
}
elsif($gid =~ /rrn/){
print $outfile3 ">$gid" . "XXX$ARGV[3]\n$elements{$gid}\n";
}
else{
print $outfile ">$gid" . "XXX$ARGV[3]\n$elements{$gid}\n";
}
}