diff --git a/gumpy/difference.py b/gumpy/difference.py index 79f584b..a57e40e 100644 --- a/gumpy/difference.py +++ b/gumpy/difference.py @@ -17,6 +17,7 @@ nucleotides to an array of codons. setup_codon_aa_dict() -> dict: Returns a dictionary mapping codon->amino_acid """ + import warnings from abc import ABC # Python library for abstract classes from typing import Dict, List, Tuple diff --git a/gumpy/gene.py b/gumpy/gene.py index 9e8d16a..fc9613f 100644 --- a/gumpy/gene.py +++ b/gumpy/gene.py @@ -1,6 +1,7 @@ """ Gene object """ + import copy import re @@ -13,7 +14,6 @@ # FIXME: problems with rrs, mfpB class Gene(object): - """Gene object that uses underlying numpy arrays""" def __init__( @@ -363,9 +363,11 @@ def minority_populations_GARC( # and mixed for mixed indels and SNPs c = Counter( [ - (nc_idx, "indel") - if type_ in ["ins", "del"] - else (nc_idx, type_) + ( + (nc_idx, "indel") + if type_ in ["ins", "del"] + else (nc_idx, type_) + ) for nc_idx, type_, bases, cov, frs in gene_pos_map[gene_pos] ] ) @@ -519,9 +521,11 @@ def __duplicate(self, index: int): # Check for promoters before the codons first_half = [self.nucleotide_number[i] for i in range(index)] second_half = [ - self.nucleotide_number[i] + 1 - if self.nucleotide_number[i] > 0 - else self.nucleotide_number[i] + ( + self.nucleotide_number[i] + 1 + if self.nucleotide_number[i] > 0 + else self.nucleotide_number[i] + ) for i in range(index, len(self.nucleotide_number)) ] self.nucleotide_number = numpy.array( diff --git a/gumpy/genome.py b/gumpy/genome.py index cf5ba0b..c5a1313 100644 --- a/gumpy/genome.py +++ b/gumpy/genome.py @@ -1,6 +1,7 @@ """ Genome object """ + import copy import gzip import pathlib @@ -16,7 +17,6 @@ class Genome(object): - """Genome object""" def __init__( diff --git a/gumpy/variantfile.py b/gumpy/variantfile.py index 3aea312..1be2cbc 100644 --- a/gumpy/variantfile.py +++ b/gumpy/variantfile.py @@ -1,6 +1,7 @@ """ Classes used to parse and store VCF data """ + import copy import pathlib import warnings @@ -285,11 +286,7 @@ def __init__( for sample in record.samples.keys(): self.records.append(VCFRecord(record, sample)) - # Ensure that only a single record exists for each position specified - assert len(self.records) == len( - set([record.pos for record in self.records]) - ), "There must be 1 and only 1 record per position! " - + # Find calls will ensure that no calls have same position self.__find_calls() self.__get_variants() @@ -465,6 +462,7 @@ def __find_calls(self): """ self.calls = {} + record_positions = set() for record in self.records: # VCF files are 1 indexed but keep for now @@ -483,17 +481,15 @@ def __find_calls(self): continue # only proceed if a dictionary has been passed (otherwise defaults to None) - proceed = True if isinstance(self.format_fields_min_thresholds, dict): # ok to just do since we've already check in the constructor that these # fields exist in the VCF - for i in self.format_fields_min_thresholds: - proceed = ( - proceed - and record.values[i] >= self.format_fields_min_thresholds[i] - ) - if not proceed: - continue + proceed = all( + record.values[i] >= self.format_fields_min_thresholds[i] + for i in self.format_fields_min_thresholds + ) + if not proceed: + continue if ( len(self.minor_population_indices) > 0 @@ -527,6 +523,12 @@ def __find_calls(self): variant = record.ref variant_type = "ref" + if index in record_positions: + raise ValueError( + "Multiple calls at position " + str(index) + " in VCF file" + ) + record_positions.add(index) + # if the REF, ALT pair are the same length, check if we can decompose # into SNPs if len(record.ref) == len(variant): diff --git a/tests/unit/test_minor_populations.py b/tests/unit/test_minor_populations.py index 6df3b3f..2cc4487 100644 --- a/tests/unit/test_minor_populations.py +++ b/tests/unit/test_minor_populations.py @@ -1,5 +1,6 @@ """Tests relating to minor populations """ + import pytest import gumpy