From 9f0c21a7ec5dc9934f97dca467bac20beec674ee Mon Sep 17 00:00:00 2001 From: Arya Massarat <23412689+aryarm@users.noreply.github.com> Date: Tue, 23 Jan 2024 15:46:50 -0800 Subject: [PATCH 1/5] handle deprecation of binom_test in scipy 1.12.0 https://docs.scipy.org/doc/scipy/release/1.12.0-notes.html#expired-deprecations --- trtools/utils/utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/trtools/utils/utils.py b/trtools/utils/utils.py index 6cddb784..c1d55955 100644 --- a/trtools/utils/utils.py +++ b/trtools/utils/utils.py @@ -318,7 +318,11 @@ def GetHardyWeinbergBinomialTest(allele_freqs, genotype_counts): if gt[1] not in allele_freqs.keys(): return np.nan if gt[0] == gt[1]: num_hom += genotype_counts[gt] - return scipy.stats.binom_test(num_hom, n=total_samples, p=exp_hom_frac) + try: + return scipy.stats.binom_test(num_hom, n=total_samples, p=exp_hom_frac) + except AttributeError: + # binom_test was deprecated in favor of binomtest in scipy 1.12.0 + return scipy.stats.binomtest(num_hom, n=total_samples, p=exp_hom_frac).pvalue def GetHomopolymerRun(seq): r"""Compute the maximum homopolymer run length in a sequence From dfc17e1a707c66ff4a72086bcec24059969b7fc8 Mon Sep 17 00:00:00 2001 From: Arya Massarat <23412689+aryarm@users.noreply.github.com> Date: Wed, 24 Jan 2024 00:13:30 +0000 Subject: [PATCH 2/5] reraise cyvcf2 exception with more detailed message --- trtools/utils/tr_harmonizer.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/trtools/utils/tr_harmonizer.py b/trtools/utils/tr_harmonizer.py index 0b0c68a3..174cb478 100644 --- a/trtools/utils/tr_harmonizer.py +++ b/trtools/utils/tr_harmonizer.py @@ -1544,6 +1544,7 @@ class TRRecordHarmonizer: def __init__(self, vcffile: cyvcf2.VCF, vcftype: Union[str, VcfTypes] = "auto"): self.vcffile = vcffile self.vcftype = InferVCFType(vcffile, vcftype) + self._record_idx = None def MayHaveImpureRepeats(self) -> bool: """ @@ -1619,6 +1620,16 @@ def __iter__(self) -> Iterator[TRRecord]: def __next__(self) -> TRRecord: """Iterate over TRRecord produced from the underlying vcf.""" - return HarmonizeRecord(self.vcftype, next(self.vcffile)) + if self._record_idx is None: + self._record_idx = 1 + self._record_idx += 1 + try: + record = next(self.vcffile) + except Exception: + raise ValueError( + f"Encountered error when parsing the {self._record_idx}th tandem " + "repeat in the provided VCF. Check that it is properly formatted." + ) + return HarmonizeRecord(self.vcftype, record) # TODO check all users of this class for new options From 9da066e5646bf4f86df802b7bde456dca4cd4fa2 Mon Sep 17 00:00:00 2001 From: Arya Massarat <23412689+aryarm@users.noreply.github.com> Date: Wed, 24 Jan 2024 00:46:04 +0000 Subject: [PATCH 3/5] also allow StopExceptions in TRRecordHarmonizer --- trtools/utils/tr_harmonizer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/trtools/utils/tr_harmonizer.py b/trtools/utils/tr_harmonizer.py index 174cb478..7a18133b 100644 --- a/trtools/utils/tr_harmonizer.py +++ b/trtools/utils/tr_harmonizer.py @@ -1625,9 +1625,11 @@ def __next__(self) -> TRRecord: self._record_idx += 1 try: record = next(self.vcffile) + except StopIteration: + raise except Exception: raise ValueError( - f"Encountered error when parsing the {self._record_idx}th tandem " + f"Unable to parse the {self._record_idx}th tandem " "repeat in the provided VCF. Check that it is properly formatted." ) return HarmonizeRecord(self.vcftype, record) From 7f0745bf993e07a706e4782f64d6b7af16cb6951 Mon Sep 17 00:00:00 2001 From: Arya Massarat <23412689+aryarm@users.noreply.github.com> Date: Wed, 24 Jan 2024 00:48:54 +0000 Subject: [PATCH 4/5] catch ValueErrors in dumpSTR and qcSTR arising from exceptions in cyvcf2 --- trtools/dumpSTR/dumpSTR.py | 7 +++++++ trtools/qcSTR/qcSTR.py | 19 ++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/trtools/dumpSTR/dumpSTR.py b/trtools/dumpSTR/dumpSTR.py index c572e6d2..17b9ac21 100644 --- a/trtools/dumpSTR/dumpSTR.py +++ b/trtools/dumpSTR/dumpSTR.py @@ -1201,6 +1201,13 @@ def main(args): return 1 else: raise te + except ValueError as ve: + message = ve.args[0] + if 'properly formatted' in message: + common.WARNING("Could not parse VCF.\n" + message) + return 1 + else: + raise ve if args.verbose: common.MSG("Processing %s:%s"%(record.chrom, record.pos)) record_counter += 1 diff --git a/trtools/qcSTR/qcSTR.py b/trtools/qcSTR/qcSTR.py index f34bacaa..891f1e83 100644 --- a/trtools/qcSTR/qcSTR.py +++ b/trtools/qcSTR/qcSTR.py @@ -501,7 +501,24 @@ def main(args): # read the vcf numrecords = 0 - for trrecord in harmonizer: + while True: + try: + trrecord = next(harmonizer) + except StopIteration: break + except TypeError as te: + message = te.args[0] + if 'missing' in message and 'mandatory' in message: + common.WARNING("Could not parse VCF.\n" + message) + return 1 + else: + raise te + except ValueError as ve: + message = ve.args[0] + if 'properly formatted' in message: + common.WARNING("Could not parse VCF.\n" + message) + return 1 + else: + raise ve if args.numrecords is not None and numrecords >= args.numrecords: break if args.period is not None and len(trrecord.motif) != args.period: continue From 98cb8043922b140f02cc86f990640e6d3830d611 Mon Sep 17 00:00:00 2001 From: Arya Massarat <23412689+aryarm@users.noreply.github.com> Date: Fri, 26 Jan 2024 18:00:03 +0000 Subject: [PATCH 5/5] remove usage of f strings to retain support for py 3.6 --- trtools/utils/tr_harmonizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trtools/utils/tr_harmonizer.py b/trtools/utils/tr_harmonizer.py index 7a18133b..03e214b9 100644 --- a/trtools/utils/tr_harmonizer.py +++ b/trtools/utils/tr_harmonizer.py @@ -1629,7 +1629,7 @@ def __next__(self) -> TRRecord: raise except Exception: raise ValueError( - f"Unable to parse the {self._record_idx}th tandem " + "Unable to parse the "+str(self._record_idx)+"th tandem " "repeat in the provided VCF. Check that it is properly formatted." ) return HarmonizeRecord(self.vcftype, record)