Skip to content

Commit

Permalink
skip the empty genomes during download (fixes #32)
Browse files Browse the repository at this point in the history
  • Loading branch information
HadrienG committed Nov 6, 2017
1 parent 656112b commit 812ba9a
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 1 deletion.
7 changes: 7 additions & 0 deletions iss/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,17 @@ def ncbi(kingdom, n_genomes):
retmode='txt')
try:
record = SeqIO.read(genome_record, 'fasta')
n_count = record.seq.count('N') + record.seq.count('n')
assert n_count / len(record) != 1.0
except http.client.IncompleteRead as e:
logger.warning(
'Failed to read downloaded genome. Skipping')
continue
except AssertionError as e:
logger.warning(
'%s only contains Ns. Skipping'
% nucleotide_info['AccessionVersion'])
continue
genomes.append(record)
n += 1
else:
Expand Down
4 changes: 3 additions & 1 deletion iss/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def simulate_read(record, ErrorModel, i):
EXPERIMENTAL. SHOULD BE MULTI-THREADABLE
"""
logger = logging.getLogger(__name__)
sequence = record.seq
header = record.id

Expand All @@ -78,7 +79,8 @@ def simulate_read(record, ErrorModel, i):
0, len(record.seq) - (2 * read_length + insert_size))
except ValueError as e:
logger.error(
'%s too small for this ErrorModel:%s' % (record.id, e))
'%s shorter than template length for this ErrorModel:%s'
% (record.id, e))
sys.exit(1)

forward_end = forward_start + read_length
Expand Down

0 comments on commit 812ba9a

Please sign in to comment.