Skip to content

Commit

Permalink
Merge pull request #239 from replikation/split_fasta_improvement
Browse files Browse the repository at this point in the history
Split fasta improvement
  • Loading branch information
replikation authored Oct 19, 2022
2 parents f52e379 + 31404e3 commit eab1c4f
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions bin/split_fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ def log(string, newline_before=False):
if line.startswith('>'):

# new sequence
seq_name = line.strip().split()[0][1:]
seq_name = line.strip()[1:]
assert seq_name != '', f'Empty header in file: {fasta_file}'

# sanitize
seq_name = seq_name.replace('/', '_').replace(':', '_').replace('|','_')
seq_name = seq_name.replace(' ', '_').replace('/', '_').replace(':', '_').replace('|','_')

# handle duplicates
if seq_name in sequence_names:
Expand All @@ -59,7 +59,10 @@ def log(string, newline_before=False):
log(f'Writing {outfile}')
outfh = open(outfile, 'w')
outfh.write(f'>{seq_name}\n')


elif line in ['\n','\r\n']:
continue

else:
# write rest of lines (and fix windows line endings)
outfh.write(line.replace('\r',''))
Expand Down

0 comments on commit eab1c4f

Please sign in to comment.