Skip to content

Commit

Permalink
Merge pull request #31 from oxfordmmm/check_clashes_post_filter
Browse files Browse the repository at this point in the history
fix: check clashes within get_calls
  • Loading branch information
mcolpus authored Mar 15, 2024
2 parents 13dc4ac + 80de97d commit fc1ca05
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 21 deletions.
1 change: 1 addition & 0 deletions gumpy/difference.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
nucleotides to an array of codons.
setup_codon_aa_dict() -> dict: Returns a dictionary mapping codon->amino_acid
"""

import warnings
from abc import ABC # Python library for abstract classes
from typing import Dict, List, Tuple
Expand Down
18 changes: 11 additions & 7 deletions gumpy/gene.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Gene object
"""

import copy
import re

Expand All @@ -13,7 +14,6 @@

# FIXME: problems with rrs, mfpB
class Gene(object):

"""Gene object that uses underlying numpy arrays"""

def __init__(
Expand Down Expand Up @@ -363,9 +363,11 @@ def minority_populations_GARC(
# and mixed for mixed indels and SNPs
c = Counter(
[
(nc_idx, "indel")
if type_ in ["ins", "del"]
else (nc_idx, type_)
(
(nc_idx, "indel")
if type_ in ["ins", "del"]
else (nc_idx, type_)
)
for nc_idx, type_, bases, cov, frs in gene_pos_map[gene_pos]
]
)
Expand Down Expand Up @@ -519,9 +521,11 @@ def __duplicate(self, index: int):
# Check for promoters before the codons
first_half = [self.nucleotide_number[i] for i in range(index)]
second_half = [
self.nucleotide_number[i] + 1
if self.nucleotide_number[i] > 0
else self.nucleotide_number[i]
(
self.nucleotide_number[i] + 1
if self.nucleotide_number[i] > 0
else self.nucleotide_number[i]
)
for i in range(index, len(self.nucleotide_number))
]
self.nucleotide_number = numpy.array(
Expand Down
2 changes: 1 addition & 1 deletion gumpy/genome.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Genome object
"""

import copy
import gzip
import pathlib
Expand All @@ -16,7 +17,6 @@


class Genome(object):

"""Genome object"""

def __init__(
Expand Down
28 changes: 15 additions & 13 deletions gumpy/variantfile.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Classes used to parse and store VCF data
"""

import copy
import pathlib
import warnings
Expand Down Expand Up @@ -285,11 +286,7 @@ def __init__(
for sample in record.samples.keys():
self.records.append(VCFRecord(record, sample))

# Ensure that only a single record exists for each position specified
assert len(self.records) == len(
set([record.pos for record in self.records])
), "There must be 1 and only 1 record per position! "

# Find calls will ensure that no calls have same position
self.__find_calls()

self.__get_variants()
Expand Down Expand Up @@ -465,6 +462,7 @@ def __find_calls(self):
"""

self.calls = {}
record_positions = set()

for record in self.records:
# VCF files are 1 indexed but keep for now
Expand All @@ -483,17 +481,15 @@ def __find_calls(self):
continue

# only proceed if a dictionary has been passed (otherwise defaults to None)
proceed = True
if isinstance(self.format_fields_min_thresholds, dict):
# ok to just do since we've already check in the constructor that these
# fields exist in the VCF
for i in self.format_fields_min_thresholds:
proceed = (
proceed
and record.values[i] >= self.format_fields_min_thresholds[i]
)
if not proceed:
continue
proceed = all(
record.values[i] >= self.format_fields_min_thresholds[i]
for i in self.format_fields_min_thresholds
)
if not proceed:
continue

if (
len(self.minor_population_indices) > 0
Expand Down Expand Up @@ -527,6 +523,12 @@ def __find_calls(self):
variant = record.ref
variant_type = "ref"

if index in record_positions:
raise ValueError(
"Multiple calls at position " + str(index) + " in VCF file"
)
record_positions.add(index)

# if the REF, ALT pair are the same length, check if we can decompose
# into SNPs
if len(record.ref) == len(variant):
Expand Down
1 change: 1 addition & 0 deletions tests/unit/test_minor_populations.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Tests relating to minor populations
"""

import pytest

import gumpy
Expand Down

0 comments on commit fc1ca05

Please sign in to comment.