diff --git a/doc/iss/generate.rst b/doc/iss/generate.rst index 376c0c6..c6f6c50 100644 --- a/doc/iss/generate.rst +++ b/doc/iss/generate.rst @@ -135,13 +135,28 @@ Coverage distribution In the context of InSilicoSeq, the `abundance` is the proportion of reads in a sample, which since it does not acount for the length of the genome, does not necessarily reflect the number of organisms present in a sample. -The `coverage`and `coverage_file` options allow for simulating reads according to a coverage distribution instead of abundance. +The ``coverage`` and ``coverage_file`` options allow for simulating reads according to a coverage distribution instead of abundance. .. code-block:: bash iss generate --ncbi bacteria -U 50 --coverage lognormal -n 25M \ --model novaseq --output reads +The ``coverage_file`` option works similarly to the ``abundance_file`` option. +For two genomes A and B: + +.. code-block:: bash + + iss generate --genomes genomes.fasta --coverage_file coverage.txt \ + --model novaseq --output reads + +with, for a coverage of 20x for genome_A and 100x for genome_B, the coverage file `coverage.txt` will be: + +.. code-block:: bash + + genome_A 20 + genome_B 100 + GC bias ------- diff --git a/iss/app.py b/iss/app.py index 0dce452..24b7d8a 100644 --- a/iss/app.py +++ b/iss/app.py @@ -229,7 +229,8 @@ def generate_reads(args): f = open(genome_file, 'r') # re-opens the file with f: fasta_file = SeqIO.parse(f, 'fasta') - + total_reads_generated = 0 + total_reads_generated_unrouned = 0 for record in fasta_file: # generate reads for records try: @@ -252,9 +253,22 @@ def generate_reads(args): err_mod.read_length, genome_size ) - n_pairs = int(round( - (coverage * - len(record.seq)) / err_mod.read_length) / 2) + n_pairs_unrounded = ( + (coverage * len(record.seq)) / + err_mod.read_length) / 2 + n_pairs = round(n_pairs_unrounded) + + # check that the rounding does not cause to drop + # read pairs + total_reads_generated_unrouned += n_pairs_unrounded + total_reads_generated += n_pairs + if round(total_reads_generated_unrouned) > \ + total_reads_generated: + logger.debug( + "Adding a pair to correct rounding error") + n_pairs += 1 + total_reads_generated += 1 + # skip record if n_reads == 0 if n_pairs == 0: continue diff --git a/iss/version.py b/iss/version.py index 51ed7c4..c3b3841 100644 --- a/iss/version.py +++ b/iss/version.py @@ -1 +1 @@ -__version__ = '1.5.1' +__version__ = '1.5.2'