Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: check clashes within get_calls #31

Merged
merged 2 commits into from
Mar 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions gumpy/difference.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
nucleotides to an array of codons.
setup_codon_aa_dict() -> dict: Returns a dictionary mapping codon->amino_acid
"""

import warnings
from abc import ABC # Python library for abstract classes
from typing import Dict, List, Tuple
Expand Down
18 changes: 11 additions & 7 deletions gumpy/gene.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Gene object
"""

import copy
import re

Expand All @@ -13,7 +14,6 @@

# FIXME: problems with rrs, mfpB
class Gene(object):

"""Gene object that uses underlying numpy arrays"""

def __init__(
Expand Down Expand Up @@ -363,9 +363,11 @@ def minority_populations_GARC(
# and mixed for mixed indels and SNPs
c = Counter(
[
(nc_idx, "indel")
if type_ in ["ins", "del"]
else (nc_idx, type_)
(
(nc_idx, "indel")
if type_ in ["ins", "del"]
else (nc_idx, type_)
)
for nc_idx, type_, bases, cov, frs in gene_pos_map[gene_pos]
]
)
Expand Down Expand Up @@ -519,9 +521,11 @@ def __duplicate(self, index: int):
# Check for promoters before the codons
first_half = [self.nucleotide_number[i] for i in range(index)]
second_half = [
self.nucleotide_number[i] + 1
if self.nucleotide_number[i] > 0
else self.nucleotide_number[i]
(
self.nucleotide_number[i] + 1
if self.nucleotide_number[i] > 0
else self.nucleotide_number[i]
)
for i in range(index, len(self.nucleotide_number))
]
self.nucleotide_number = numpy.array(
Expand Down
2 changes: 1 addition & 1 deletion gumpy/genome.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Genome object
"""

import copy
import gzip
import pathlib
Expand All @@ -16,7 +17,6 @@


class Genome(object):

"""Genome object"""

def __init__(
Expand Down
28 changes: 15 additions & 13 deletions gumpy/variantfile.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Classes used to parse and store VCF data
"""

import copy
import pathlib
import warnings
Expand Down Expand Up @@ -285,11 +286,7 @@ def __init__(
for sample in record.samples.keys():
self.records.append(VCFRecord(record, sample))

# Ensure that only a single record exists for each position specified
assert len(self.records) == len(
set([record.pos for record in self.records])
), "There must be 1 and only 1 record per position! "

# Find calls will ensure that no calls have same position
self.__find_calls()

self.__get_variants()
Expand Down Expand Up @@ -465,6 +462,7 @@ def __find_calls(self):
"""

self.calls = {}
record_positions = set()

for record in self.records:
# VCF files are 1 indexed but keep for now
Expand All @@ -483,17 +481,15 @@ def __find_calls(self):
continue

# only proceed if a dictionary has been passed (otherwise defaults to None)
proceed = True
if isinstance(self.format_fields_min_thresholds, dict):
# ok to just do since we've already check in the constructor that these
# fields exist in the VCF
for i in self.format_fields_min_thresholds:
proceed = (
proceed
and record.values[i] >= self.format_fields_min_thresholds[i]
)
if not proceed:
continue
proceed = all(
record.values[i] >= self.format_fields_min_thresholds[i]
for i in self.format_fields_min_thresholds
)
if not proceed:
continue

if (
len(self.minor_population_indices) > 0
Expand Down Expand Up @@ -527,6 +523,12 @@ def __find_calls(self):
variant = record.ref
variant_type = "ref"

if index in record_positions:
raise ValueError(
"Multiple calls at position " + str(index) + " in VCF file"
)
record_positions.add(index)

# if the REF, ALT pair are the same length, check if we can decompose
# into SNPs
if len(record.ref) == len(variant):
Expand Down
1 change: 1 addition & 0 deletions tests/unit/test_minor_populations.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Tests relating to minor populations
"""

import pytest

import gumpy
Expand Down
Loading