Skip to content

Commit

Permalink
Merge pull request #34 from oxfordmmm/fix/null-deletion-edge-case
Browse files Browse the repository at this point in the history
Fix/null deletion edge case
  • Loading branch information
JeremyWesthead authored Jun 28, 2024
2 parents e11c6e3 + fbfaf86 commit 4e1bd25
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 6 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
1.3.1
1.3.2

31 changes: 27 additions & 4 deletions gumpy/difference.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def __init__(self, genome1, genome2):

def get_gene_pos(
self, gene: str, idx: int, variant: str, start: int | None = None
) -> int:
) -> int | None:
"""Find the gene position of a given nucleotide index.
This is considerably faster than building a whole stacked_gene_pos array
(takes ~4.5mins for tb)
Expand All @@ -203,7 +203,8 @@ def get_gene_pos(
start (int): Start position. Defaults to None
Returns:
int: Gene position of this nucleotide index
int | None: Gene position of this nucleotide index
(or None if it lies outside of the gene)
"""
stacked_gene_mask = self.genome1.stacked_gene_name == gene
nc_idx = self.genome1.stacked_nucleotide_index[stacked_gene_mask]
Expand Down Expand Up @@ -239,6 +240,16 @@ def get_gene_pos(
if self.genome2.genes[gene]["reverse_complement"]:
nc_num = nc_num - dels + 1

# Edge case of deletion starting in revcomp gene and extending
# past gene start, so return None
if (
self.genome2.stacked_nucleotide_number[
self.genome2.stacked_gene_name == gene
][-1]
> nc_num
):
return None

return nc_num

def _get_vcf_idx(self, vcf_row: Dict) -> int | None:
Expand Down Expand Up @@ -403,7 +414,11 @@ def __get_variants(self):
continue
gene_name.append(gene)
gene_pos.append(self.get_gene_pos(gene, idx, variants[-1]))
if self.genome2.genes[gene]["codes_protein"] and gene_pos[-1] > 0:
if (
self.genome2.genes[gene]["codes_protein"]
and gene_pos[-1] is not None
and gene_pos[-1] > 0
):
# Get codon idx
nc_idx = self.genome1.stacked_nucleotide_index[
self.genome1.stacked_gene_name == gene
Expand Down Expand Up @@ -438,7 +453,11 @@ def __get_variants(self):
# Single gene, so pull out data
gene_pos.append(self.get_gene_pos(gene, idx, variants[-1]))

if self.genome2.genes[gene]["codes_protein"] and gene_pos[-1] > 0:
if (
self.genome2.genes[gene]["codes_protein"]
and gene_pos[-1] is not None
and gene_pos[-1] > 0
):
# Get codon idx
nc_idx = self.genome1.stacked_nucleotide_index[
self.genome1.stacked_gene_name == gene
Expand Down Expand Up @@ -553,6 +572,7 @@ def __get_variants(self):
)
if (
self.genome2.genes[gene]["codes_protein"]
and gene_pos[-1] is not None
and gene_pos[-1] > 0
):
# Get codon pos
Expand Down Expand Up @@ -599,6 +619,7 @@ def __get_variants(self):

if (
self.genome2.genes[gene]["codes_protein"]
and gene_pos[-1] is not None
and gene_pos[-1] > 0
):
# Get codon pos
Expand Down Expand Up @@ -710,6 +731,7 @@ def __get_variants(self):
)
if (
self.genome2.genes[gene]["codes_protein"]
and gene_pos[-1] is not None
and gene_pos[-1] > 0
):
# Get codon pos
Expand Down Expand Up @@ -756,6 +778,7 @@ def __get_variants(self):

if (
self.genome2.genes[gene]["codes_protein"]
and gene_pos[-1] is not None
and gene_pos[-1] > 0
):
# Get codon pos
Expand Down
1 change: 0 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ long_description_content_type = text/markdown
url = https://github.com/oxfordmmm/gumpy
classifiers =
Programming Language :: Python :: 3
License :: OSI Approved :: MIT License
Operating System :: OS Independent
license = University of Oxford, see LICENSE.md

Expand Down

0 comments on commit 4e1bd25

Please sign in to comment.