From 9fc85efe4a54711b64f876ab6053715550a8c327 Mon Sep 17 00:00:00 2001 From: JeremyWesthead Date: Thu, 27 Jun 2024 15:49:23 +0100 Subject: [PATCH 1/5] fix: if deletion starts outside of revcomp promoter, null it's gene pos --- gumpy/difference.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/gumpy/difference.py b/gumpy/difference.py index a57e40e..80e6d64 100644 --- a/gumpy/difference.py +++ b/gumpy/difference.py @@ -239,6 +239,16 @@ def get_gene_pos( if self.genome2.genes[gene]["reverse_complement"]: nc_num = nc_num - dels + 1 + # Edge case of deletion starting in revcomp gene and extending + # past gene start, so return None + if ( + self.genome2.stacked_nucleotide_number[ + self.genome2.stacked_gene_name == gene + ][-1] + > nc_num + ): + return None + return nc_num def _get_vcf_idx(self, vcf_row: Dict) -> int | None: @@ -403,7 +413,11 @@ def __get_variants(self): continue gene_name.append(gene) gene_pos.append(self.get_gene_pos(gene, idx, variants[-1])) - if self.genome2.genes[gene]["codes_protein"] and gene_pos[-1] > 0: + if ( + self.genome2.genes[gene]["codes_protein"] + and gene_pos[-1] is not None + and gene_pos[-1] > 0 + ): # Get codon idx nc_idx = self.genome1.stacked_nucleotide_index[ self.genome1.stacked_gene_name == gene @@ -438,7 +452,11 @@ def __get_variants(self): # Single gene, so pull out data gene_pos.append(self.get_gene_pos(gene, idx, variants[-1])) - if self.genome2.genes[gene]["codes_protein"] and gene_pos[-1] > 0: + if ( + self.genome2.genes[gene]["codes_protein"] + and gene_pos[-1] is not None + and gene_pos[-1] > 0 + ): # Get codon idx nc_idx = self.genome1.stacked_nucleotide_index[ self.genome1.stacked_gene_name == gene @@ -553,6 +571,7 @@ def __get_variants(self): ) if ( self.genome2.genes[gene]["codes_protein"] + and gene_pos[-1] is not None and gene_pos[-1] > 0 ): # Get codon pos @@ -599,6 +618,7 @@ def __get_variants(self): if ( self.genome2.genes[gene]["codes_protein"] + and gene_pos[-1] is not None and gene_pos[-1] > 0 ): # Get codon pos @@ -710,6 +730,7 @@ def __get_variants(self): ) if ( self.genome2.genes[gene]["codes_protein"] + and gene_pos[-1] is not None and gene_pos[-1] > 0 ): # Get codon pos @@ -756,6 +777,7 @@ def __get_variants(self): if ( self.genome2.genes[gene]["codes_protein"] + and gene_pos[-1] is not None and gene_pos[-1] > 0 ): # Get codon pos From 717d5e5c195d9bd34bf513b5c5b46a6b5c38b11b Mon Sep 17 00:00:00 2001 From: JeremyWesthead Date: Thu, 27 Jun 2024 16:01:41 +0100 Subject: [PATCH 2/5] style: appease mypy --- gumpy/difference.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gumpy/difference.py b/gumpy/difference.py index 80e6d64..c26cefb 100644 --- a/gumpy/difference.py +++ b/gumpy/difference.py @@ -191,7 +191,7 @@ def __init__(self, genome1, genome2): def get_gene_pos( self, gene: str, idx: int, variant: str, start: int | None = None - ) -> int: + ) -> int | None: """Find the gene position of a given nucleotide index. This is considerably faster than building a whole stacked_gene_pos array (takes ~4.5mins for tb) @@ -203,7 +203,8 @@ def get_gene_pos( start (int): Start position. Defaults to None Returns: - int: Gene position of this nucleotide index + int | None: Gene position of this nucleotide index + (or None if it lies outside of the gene) """ stacked_gene_mask = self.genome1.stacked_gene_name == gene nc_idx = self.genome1.stacked_nucleotide_index[stacked_gene_mask] From 44baf0e49a83c49eb813fe31e2c59efcb65c2b70 Mon Sep 17 00:00:00 2001 From: JeremyWesthead Date: Thu, 27 Jun 2024 16:01:48 +0100 Subject: [PATCH 3/5] style: appease pc --- gumpy/difference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gumpy/difference.py b/gumpy/difference.py index c26cefb..3af894f 100644 --- a/gumpy/difference.py +++ b/gumpy/difference.py @@ -203,7 +203,7 @@ def get_gene_pos( start (int): Start position. Defaults to None Returns: - int | None: Gene position of this nucleotide index + int | None: Gene position of this nucleotide index (or None if it lies outside of the gene) """ stacked_gene_mask = self.genome1.stacked_gene_name == gene From f2b5e1a8ee0fde5d6f1d4b96aa3b6246fcd569b5 Mon Sep 17 00:00:00 2001 From: JeremyWesthead Date: Thu, 27 Jun 2024 16:02:21 +0100 Subject: [PATCH 4/5] bump: version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 1527a8d..971711a 100644 --- a/VERSION +++ b/VERSION @@ -1,2 +1,2 @@ -1.3.1 +1.3.2 From fbfaf86ab127c10ae21ec7ab117a428c78754951 Mon Sep 17 00:00:00 2001 From: JeremyWesthead Date: Thu, 27 Jun 2024 16:12:18 +0100 Subject: [PATCH 5/5] build: remove inappropriate MIT classifier --- setup.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index a003249..d8851d2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -9,7 +9,6 @@ long_description_content_type = text/markdown url = https://github.com/oxfordmmm/gumpy classifiers = Programming Language :: Python :: 3 - License :: OSI Approved :: MIT License Operating System :: OS Independent license = University of Oxford, see LICENSE.md