From 624acfd0ac430393505f995a2ab08bdeb40bf71b Mon Sep 17 00:00:00 2001 From: daisie_local Date: Wed, 18 Mar 2015 13:53:50 -0700 Subject: [PATCH] simplify_genbank_array --- lib/Genbank.pm | 23 +++++++++++++++++++++-- lib/Subfunctions.pm | 2 +- parsing/genbank.pl | 3 ++- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/lib/Genbank.pm b/lib/Genbank.pm index b802134..45f38dd 100755 --- a/lib/Genbank.pm +++ b/lib/Genbank.pm @@ -18,7 +18,7 @@ BEGIN { # Inherit from Exporter to export functions and variables our @ISA = qw(Exporter); # Functions and variables which are exported by default - our @EXPORT = qw(parse_genbank sequence_for_interval sequin_feature stringify_feature parse_feature_desc parse_interval parse_qualifiers within_interval write_sequin_tbl feature_table_from_genbank set_sequence get_sequence get_name write_features_as_fasta clone_features); + our @EXPORT = qw(parse_genbank sequence_for_interval sequin_feature stringify_feature parse_feature_desc parse_interval parse_qualifiers within_interval write_sequin_tbl feature_table_from_genbank set_sequence get_sequence get_name write_features_as_fasta clone_features simplify_genbank_array); # Functions and variables which can be optionally exported our @EXPORT_OK = qw(); } @@ -99,6 +99,25 @@ sub parse_genbank { return \@gene_array; } +sub simplify_genbank_array { + my $gene_array = shift; + + my @simplified_array = (); + foreach my $feat (@$gene_array) { + if ($feat->{'type'} =~ /gene|source/) { + my @newcontains = (); + foreach my $subfeat (@{$feat->{'contains'}}) { + if ($subfeat->{'type'} !~ /exon|intron|STS/) { + push @newcontains, $subfeat; + } + } + $feat->{'contains'} = \@newcontains; + push @simplified_array, $feat; + } + } + return \@simplified_array; +} + sub parse_feature_sequences { my $gene_array = shift; @@ -244,7 +263,7 @@ sub write_sequin_tbl { sub feature_table_from_genbank { my $gbfile = shift; - my $gene_array = Genbank::parse_genbank($gbfile); + my $gene_array = Genbank::simplify_genbank_array(Genbank::parse_genbank($gbfile)); my @feature_array = (); foreach my $gene (@$gene_array) { diff --git a/lib/Subfunctions.pm b/lib/Subfunctions.pm index 1272f7f..0b49660 100644 --- a/lib/Subfunctions.pm +++ b/lib/Subfunctions.pm @@ -1144,7 +1144,7 @@ sub blast_to_genbank { my $gbfile = shift; my $fastafile = shift; - my $gene_array = Genbank::parse_genbank($gbfile); + my $gene_array = Genbank::simplify_genbank_array(Genbank::parse_genbank($gbfile)); my $refseq = Genbank::get_sequence(); my ($ref_hash, $region_array) = Genbank::clone_features($gene_array); my ($query_hash, $query_array) = parse_fasta($fastafile); diff --git a/parsing/genbank.pl b/parsing/genbank.pl index 16ed441..cec80f6 100755 --- a/parsing/genbank.pl +++ b/parsing/genbank.pl @@ -18,7 +18,8 @@ $outfile = $gbfile; } -my $gene_array = Genbank::parse_genbank($gbfile); +my $gene_array = Genbank::simplify_genbank_array(Genbank::parse_genbank($gbfile)); + open FASTA_FH, ">", "$outfile.fasta"; print FASTA_FH Genbank::write_features_as_fasta ($gene_array);