From d5757b6e040a91c1aae8366a60967b226b202b19 Mon Sep 17 00:00:00 2001
From: KatharinaHoff <katharina.hoff@gmail.com>
Date: Thu, 15 Feb 2024 10:37:20 +0100
Subject: [PATCH] Fixing that a string is used to concatenate to a Seq object
 in a SeqRecord

---
 scripts/getAnnoFastaFromJoingenes.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/scripts/getAnnoFastaFromJoingenes.py b/scripts/getAnnoFastaFromJoingenes.py
index 9e4057da..eb2c8094 100755
--- a/scripts/getAnnoFastaFromJoingenes.py
+++ b/scripts/getAnnoFastaFromJoingenes.py
@@ -2,7 +2,7 @@
 
 # Author: Katharina J. Hoff
 # E-Mail: katharina.hoff@uni-greifswald.de
-# Last modified on August 26th 2019
+# Last modified on February 15th 2024
 #
 # This Python script extracts CDS features from a GTF file, excises
 # corresponding sequence windows from a genome FASTA file, stitches the
@@ -16,7 +16,8 @@
 # warning to STDOUT if such genes are in the GTF-file. The IDs of bad genes
 # are printed to a file bad_genes.lst. Option -s allows to exclude bad genes
 # from the FASTA output file, automatically.
-# Beware: the script assumes that the gtf input file is sorted by coordinates!
+# Beware: The script assumes that the gtf input file is sorted by coordinates!
+#         Assumptions on AUGUSTUS-specific GTF format are made.
 
 try:
     import argparse
@@ -161,8 +162,8 @@
                         if i == 0 and cds_line['strand'] == '+' and cds_line['frame'] != 0:
                             codingseq[tx].seq += Seq((3 - cds_line['frame'])
                                                      * 'N')
-                        codingseq[tx].seq += record.seq[cds_line['start'] -
-                                                        1:cds_line['end']]
+                        codingseq[tx].seq += Seq(record.seq[cds_line['start'] -
+                                                        1:cds_line['end']])
                         if i == (nCDS - 1):
                             if cds_line['strand'] == '+':
                                 if (len(codingseq[tx].seq) % 3) != 0:
@@ -175,8 +176,7 @@
                             if(len(codingseq[tx].seq) % 3) != 0:
                                 codingseq[tx].seq = Seq(
                                     (3 - (len(codingseq[tx].seq) % 3)) * 'N') + codingseq[tx].seq
-                            codingseq[tx].seq = codingseq[
-                                tx].seq.reverse_complement()
+                            codingseq[tx].seq = codingseq[tx].seq.reverse_complement()
 except IOError:
     print("Error: Failed to open file " + args.genome + "!")
     exit(1)