Merge pull request #237 from HadrienG/dev

1.6.0
HadrienG · Aug 10, 2023 · 1687d52 · 1687d52
2 parents ddd93aa + b4b740e
commit 1687d52
Show file tree

Hide file tree

Showing 18 changed files with 589 additions and 300 deletions.
diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
@@ -8,7 +8,7 @@ jobs:
         strategy:
             max-parallel: 4
             matrix:
-                python-version: [3.6, 3.7, 3.8]
+                python-version: ['3.9', '3.10', '3.11']
 
         steps:
             - uses: actions/checkout@v1
@@ -21,7 +21,7 @@ jobs:
                   python -m pip install --upgrade pip
                   pip install pipenv
                   pipenv install --dev
-            - name: Test with nose
+            - name: Test with pytest
               run: |
                   chmod -w data/read_only.fasta
                   pipenv run tests

diff --git a/Pipfile b/Pipfile
@@ -7,18 +7,20 @@ name = "pypi"
 future = "*"
 numpy = "*"
 scipy = "*"
-biopython = "==1.78"
+biopython = "*"
 joblib = "*"
-pysam = "==0.15.4"
+pysam = "*"
 requests = "*"
 urllib3 = ">=1.26.5"
 
 [dev-packages]
-nose = "*"
 codecov = "*"
 "pep8" = "*"
 pycodestyle = "*"
+pytest = "*"
+pytest-cov = "*"
+exceptiongroup = "*"
 
 [scripts]
 iss = "python -m iss"
-tests = "nosetests --with-coverage --cover-package=iss --cover-xml"
+tests = "pytest --cov=iss ."
diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/iss/app.py b/iss/app.py
@@ -171,7 +171,7 @@ def generate_reads(args):
                                         k, v in coverage_dic.items()
                                         if k not in args.draft}
                 draft_dic = abundance.expand_draft_abundance(
-                    abundance_dic_short,
+                    coverage_dic,
                     args.draft,
                     mode="coverage")
                 abundance_dic = {**complete_genomes_dic,

diff --git a/iss/bam.py b/iss/bam.py
@@ -92,8 +92,8 @@ def write_to_file(model, read_length, mean_f, mean_r, hist_f, hist_r,
             insert_size=i_size,
             mean_count_forward=mean_f,
             mean_count_reverse=mean_r,
-            quality_hist_forward=hist_f,
-            quality_hist_reverse=hist_r,
+            quality_hist_forward=np.array(hist_f, dtype=object),
+            quality_hist_reverse=np.array(hist_r, dtype=object),
             subst_choices_forward=sub_f,
             subst_choices_reverse=sub_r,
             ins_forward=ins_f,

diff --git a/iss/error_models/__init__.py b/iss/error_models/__init__.py
@@ -6,8 +6,11 @@
 import sys
 import random
 import logging
+import _pickle
 import numpy as np
 
+from Bio.Seq import Seq, MutableSeq
+
 
 class ErrorModel(object):
     """Main ErrorModel Class
@@ -35,7 +38,7 @@ def load_npz(self, npz_path, model):
         try:
             error_profile = np.load(npz_path, allow_pickle=True)
             assert error_profile['model'] == model
-        except (OSError, IOError) as e:
+        except (OSError, IOError, EOFError, _pickle.UnpicklingError) as e:
             self.logger.error('Failed to read ErrorModel file: %s' % e)
             sys.exit(1)
         except AssertionError as e:
@@ -87,7 +90,7 @@ def mut_sequence(self, record, orientation):
         elif orientation == 'reverse':
             nucl_choices = self.subst_choices_rev
 
-        mutable_seq = record.seq.tomutable()
+        mutable_seq = MutableSeq(record.seq)
         quality_list = record.letter_annotations["phred_quality"]
         position = 0
         for nucl, qual in zip(mutable_seq, quality_list):
@@ -97,7 +100,7 @@ def mut_sequence(self, record, orientation):
                     nucl_choices[position][nucl.upper()][0],
                     p=nucl_choices[position][nucl.upper()][1]))
             position += 1
-        return mutable_seq.toseq()
+        return Seq(mutable_seq)
 
     def adjust_seq_length(self, mut_seq, orientation, full_sequence, bounds):
         """Truncate or Extend reads to make them fit the read length
@@ -120,11 +123,11 @@ def adjust_seq_length(self, mut_seq, orientation, full_sequence, bounds):
         """
         read_start, read_end = bounds
         if len(mut_seq) == self.read_length:
-            return mut_seq.toseq()
+            return Seq(mut_seq)
         elif len(mut_seq) > self.read_length:
             while len(mut_seq) > self.read_length:
                 mut_seq.pop()
-            return mut_seq.toseq()
+            return Seq(mut_seq)
         else:  # len smaller
             to_add = self.read_length - len(mut_seq)
             if orientation == 'forward':
@@ -142,7 +145,7 @@ def adjust_seq_length(self, mut_seq, orientation, full_sequence, bounds):
                         nucl_to_add = util.rev_comp(
                             full_sequence[read_end + i])
                     mut_seq.append(nucl_to_add)
-            return mut_seq.toseq()
+            return Seq(mut_seq)
 
     def introduce_indels(self, record, orientation, full_seq, bounds):
         """Introduce insertions or deletions in a sequence
@@ -171,7 +174,7 @@ def introduce_indels(self, record, orientation, full_seq, bounds):
             insertions = self.ins_rev
             deletions = self.del_rev
 
-        mutable_seq = record.seq.tomutable()
+        mutable_seq = MutableSeq(record.seq)
         position = 0
         for nucl in range(self.read_length - 1):
             try:

diff --git a/iss/generator.py b/iss/generator.py
@@ -5,7 +5,7 @@
 
 from Bio import SeqIO
 from Bio.Seq import Seq
-from Bio.SeqUtils import GC
+from Bio.SeqUtils import gc_fraction
 from Bio.SeqRecord import SeqRecord
 from shutil import copyfileobj
 
@@ -70,7 +70,7 @@ def reads(record, ErrorModel, n_pairs, cpu_number, output, seed,
         else:
             if gc_bias:
                 stiched_seq = forward.seq + reverse.seq
-                gc_content = GC(stiched_seq)
+                gc_content = gc_fraction(stiched_seq)
                 if 40 < gc_content < 60:
                     read_tuple_list.append((forward, reverse))
                     i += 1

diff --git a/iss/modeller.py b/iss/modeller.py
@@ -53,7 +53,7 @@ def divide_qualities_into_bins(qualities, n_bins=4):
         which_array = 0
         for array in ranges:
             if mean in array:
-                read = np.fromiter((q[0] for q in quality), dtype=np.float)
+                read = np.fromiter((q[0] for q in quality), float)
                 bin_lists[which_array].append(read)
             which_array += 1
     return bin_lists

diff --git a/iss/test/__init__.py b/iss/test/__init__.py
diff --git a/iss/test/test_abundance.py b/iss/test/test_abundance.py
@@ -1,9 +1,10 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
+import pytest
+
 from iss import util
 from iss import abundance
-from nose.tools import raises, assert_almost_equal, with_setup
 
 import numpy as np
 
@@ -12,9 +13,15 @@ def setup_function():
     output_file_prefix = 'data/.test'
 
 
-def teardown_function():
+def teardown_cleanup():
     util.cleanup(['data/test_abundance.txt'])
 
+@pytest.fixture
+def setup_and_teardown():
+    setup_function()
+    yield
+    teardown_cleanup()
+
 
 def test_parsing():
     abundance_dic = abundance.parse_abundance_file('data/abundance.txt')
@@ -27,19 +34,19 @@ def test_parsing():
     }
 
 
-@raises(SystemExit)
 def test_parsing_empty():
-    abundance_dic = abundance.parse_abundance_file('data/empty_file')
+    with pytest.raises(SystemExit):
+        abundance_dic = abundance.parse_abundance_file('data/empty_file')
 
 
-@raises(SystemExit)
 def test_parsing_no_exists():
-    abundance_dic = abundance.parse_abundance_file('data/does_not_exist')
+    with pytest.raises(SystemExit):
+        abundance_dic = abundance.parse_abundance_file('data/does_not_exist')
 
 
-@raises(SystemExit)
 def test_parsing_bad_abundance():
-    abundance_dic = abundance.parse_abundance_file('data/bad_abundance.txt')
+    with pytest.raises(SystemExit):
+        abundance_dic = abundance.parse_abundance_file('data/bad_abundance.txt')
 
 
 def test_cov_calc():
@@ -74,8 +81,7 @@ def test_distributions():
     assert round(zero_inflated_lognormal_dic['genome_A'], 2) == 0.44
 
 
-@with_setup(setup_function, teardown_function)
-def test_abunance_draft():
+def test_abunance_draft(setup_and_teardown):
     abundance_dic = {'genome_A': 0.15511887441170918,
                      'genome_T': 0.08220476760848751,
                      'genome_GC': 0.18039811160555874,

diff --git a/iss/test/test_bam.py b/iss/test/test_bam.py
@@ -1,20 +1,19 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-from iss import bam
+import pytest
 
-from nose.tools import raises
+from iss import bam
 
 import os
-import sys
 
 
-@raises(SystemExit)
 def test_read_fail():
-    bam_file = 'data/empty_file'
-    bam_reader = bam.read_bam(bam_file)
-    for read in bam_reader:
-        print(read)
+    with pytest.raises(SystemExit):
+        bam_file = 'data/empty_file'
+        bam_reader = bam.read_bam(bam_file)
+        for read in bam_reader:
+            print(read)
 
 
 def test_to_model():

diff --git a/iss/test/test_download.py b/iss/test/test_download.py
@@ -1,29 +1,33 @@
 #!/usr/bin/env python2
 # -*- coding: utf-8 -*-
 
+import pytest
+
 from Bio.Seq import Seq
 from Bio.SeqRecord import SeqRecord
 
 from iss import download
 from iss.util import cleanup
 
-from nose.tools import with_setup
-
-
 def setup_function():
     output_file_prefix = 'data/.test'
 
 
-def teardown_function():
+def teardown_cleanup():
     cleanup(['data/test_download.fasta'])
 
 
-@with_setup(setup_function, teardown_function)
-def download_to_fasta():
+@pytest.fixture
+def setup_and_teardown():
+    setup_function()
+    yield
+    teardown_cleanup()
+
+
+def download_to_fasta(setup_and_teardown):
     ftp_url = 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/737/615/GCF_000737615.1_ASM73761v1/GCF_000737615.1_ASM73761v1_genomic.fna.gz'
     download.assembly_to_fasta(ftp_url, 'data/test_download.fasta')
 
 
-@with_setup(setup_function, teardown_function)
-def test_ncbi():
+def test_ncbi(setup_and_teardown):
     genome_list = download.ncbi('bacteria', 2, 'data/test_download.fasta')
diff --git a/iss/test/test_error_model.py b/iss/test/test_error_model.py
@@ -1,11 +1,12 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
+import pytest
+
 from iss.error_models import ErrorModel, basic, kde, perfect
 
 from Bio.Seq import Seq
 from Bio.SeqRecord import SeqRecord
-from nose.tools import raises
 
 import random
 import numpy as np
@@ -88,6 +89,6 @@ def test_introduce_indels():
     assert read.seq[:10] == 'ATGATAATAT'
 
 
-@raises(SystemExit)
 def test_bad_err_mod():
-    err_mod = kde.KDErrorModel('data/empty_file')
+    with pytest.raises(SystemExit):
+        err_mod = kde.KDErrorModel('data/empty_file')