Skip to content

Commit

Permalink
remove non-standard gene symbols in comliant mode
Browse files Browse the repository at this point in the history
  • Loading branch information
oschwengers committed Nov 13, 2024
1 parent 7aa8ebc commit 30b3928
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 2 deletions.
5 changes: 5 additions & 0 deletions bakta/io/gff.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import bakta.constants as bc
import bakta.io.fasta as fasta
import bakta.io.insdc as insdc
import bakta.features.annotation as ba
import bakta.so as so


Expand Down Expand Up @@ -165,6 +166,10 @@ def write_features(data: dict, features_by_sequence: Dict[str, dict], gff3_path:
'locus_tag': feat['locus'],
'gene': feat['gene']
}
if(ba.RE_GENE_SYMBOL.fullmatch(feat['gene'])): # discard non-standard ncRNA gene symbols
gene_annotations['gene'] = feat['gene']
else:
annotations.pop('gene')
if('truncated' in feat):
gene_annotations[bc.INSDC_FEATURE_PSEUDO] = True
gene_annotations = encode_annotations(gene_annotations)
Expand Down
11 changes: 9 additions & 2 deletions bakta/io/insdc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import bakta
import bakta.config as cfg
import bakta.constants as bc
import bakta.features.annotation as ba
import bakta.so as so


Expand Down Expand Up @@ -258,8 +259,14 @@ def build_biopython_sequence_list(data: dict, features: Sequence[dict]):
'locus_tag': feature['locus']
}
if(feature.get('gene', None)):
qualifiers['gene'] = feature['gene']
gene_qualifier['gene'] = feature['gene']
if(cfg.compliant):
if(ba.RE_GENE_SYMBOL.fullmatch(feature['gene'])): # discard non-standard gene symbols
gene_qualifier['gene'] = feature['gene']
else:
qualifiers.pop('gene')
else:
qualifiers['gene'] = feature['gene']
gene_qualifier['gene'] = feature['gene']
if(bc.PSEUDOGENE in feature):
if(feature['type'] == bc.FEATURE_CDS):
gene_qualifier[bc.INSDC_FEATURE_PSEUDOGENE] = bc.INSDC_FEATURE_PSEUDOGENE_TYPE_UNPROCESSED if feature[bc.PSEUDOGENE]['paralog'] else bc.INSDC_FEATURE_PSEUDOGENE_TYPE_UNITARY
Expand Down

0 comments on commit 30b3928

Please sign in to comment.