oxfordmmm · mcolpus · Mar 15, 2024 · Mar 15, 2024 · Mar 15, 2024
diff --git a/gumpy/difference.py b/gumpy/difference.py
@@ -17,6 +17,7 @@
         nucleotides to an array of codons.
     setup_codon_aa_dict() -> dict: Returns a dictionary mapping codon->amino_acid
 """
+
 import warnings
 from abc import ABC  # Python library for abstract classes
 from typing import Dict, List, Tuple

diff --git a/gumpy/gene.py b/gumpy/gene.py
@@ -1,6 +1,7 @@
 """
 Gene object
 """
+
 import copy
 import re
 
@@ -13,7 +14,6 @@
 
 # FIXME: problems with rrs, mfpB
 class Gene(object):
-
     """Gene object that uses underlying numpy arrays"""
 
     def __init__(
@@ -363,9 +363,11 @@ def minority_populations_GARC(
                 #   and mixed for mixed indels and SNPs
                 c = Counter(
                     [
-                        (nc_idx, "indel")
-                        if type_ in ["ins", "del"]
-                        else (nc_idx, type_)
+                        (
+                            (nc_idx, "indel")
+                            if type_ in ["ins", "del"]
+                            else (nc_idx, type_)
+                        )
                         for nc_idx, type_, bases, cov, frs in gene_pos_map[gene_pos]
                     ]
                 )
@@ -519,9 +521,11 @@ def __duplicate(self, index: int):
         # Check for promoters before the codons
         first_half = [self.nucleotide_number[i] for i in range(index)]
         second_half = [
-            self.nucleotide_number[i] + 1
-            if self.nucleotide_number[i] > 0
-            else self.nucleotide_number[i]
+            (
+                self.nucleotide_number[i] + 1
+                if self.nucleotide_number[i] > 0
+                else self.nucleotide_number[i]
+            )
             for i in range(index, len(self.nucleotide_number))
         ]
         self.nucleotide_number = numpy.array(

diff --git a/gumpy/genome.py b/gumpy/genome.py
@@ -1,6 +1,7 @@
 """
 Genome object
 """
+
 import copy
 import gzip
 import pathlib
@@ -16,7 +17,6 @@
 
 
 class Genome(object):
-
     """Genome object"""
 
     def __init__(

diff --git a/gumpy/variantfile.py b/gumpy/variantfile.py
@@ -1,6 +1,7 @@
 """
 Classes used to parse and store VCF data
 """
+
 import copy
 import pathlib
 import warnings
@@ -285,11 +286,7 @@ def __init__(
             for sample in record.samples.keys():
                 self.records.append(VCFRecord(record, sample))
 
-        # Ensure that only a single record exists for each position specified
-        assert len(self.records) == len(
-            set([record.pos for record in self.records])
-        ), "There must be 1 and only 1 record per position! "
-
+        # Find calls will ensure that no calls have same position
         self.__find_calls()
 
         self.__get_variants()
@@ -465,6 +462,7 @@ def __find_calls(self):
         """
 
         self.calls = {}
+        record_positions = set()
 
         for record in self.records:
             # VCF files are 1 indexed but keep for now
@@ -483,17 +481,15 @@ def __find_calls(self):
                 continue
 
             # only proceed if a dictionary has been passed (otherwise defaults to None)
-            proceed = True
             if isinstance(self.format_fields_min_thresholds, dict):
                 # ok to just do since we've already check in the constructor that these
                 #   fields exist in the VCF
-                for i in self.format_fields_min_thresholds:
-                    proceed = (
-                        proceed
-                        and record.values[i] >= self.format_fields_min_thresholds[i]
-                    )
-            if not proceed:
-                continue
+                proceed = all(
+                    record.values[i] >= self.format_fields_min_thresholds[i]
+                    for i in self.format_fields_min_thresholds
+                )
+                if not proceed:
+                    continue
 
             if (
                 len(self.minor_population_indices) > 0
@@ -527,6 +523,12 @@ def __find_calls(self):
                 variant = record.ref
                 variant_type = "ref"
 
+            if index in record_positions:
+                raise ValueError(
+                    "Multiple calls at position " + str(index) + " in VCF file"
+                )
+            record_positions.add(index)
+
             # if the REF, ALT pair are the same length, check if we can decompose
             #   into SNPs
             if len(record.ref) == len(variant):

diff --git a/tests/unit/test_minor_populations.py b/tests/unit/test_minor_populations.py
@@ -1,5 +1,6 @@
 """Tests relating to minor populations
 """
+
 import pytest
 
 import gumpy