From c3514549a4b2b7f67644da4a53c3587f36112ccb Mon Sep 17 00:00:00 2001 From: Melissa Gymrek Date: Thu, 29 Aug 2024 09:41:28 -0700 Subject: [PATCH] fix bug in beagle dosages with no alt alleles --- trtools/utils/tests/test_trharmonizer.py | 23 +++++++++++++++++++++++ trtools/utils/tr_harmonizer.py | 10 +++++++--- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/trtools/utils/tests/test_trharmonizer.py b/trtools/utils/tests/test_trharmonizer.py index a23aa42e..dba2f29f 100644 --- a/trtools/utils/tests/test_trharmonizer.py +++ b/trtools/utils/tests/test_trharmonizer.py @@ -97,6 +97,18 @@ def get_dummy_record_gts_allsamelen(): alt=["CAGCAACAG"] ) +dummy_record_gts_monomorphic = [ + [0, 0], + [0, 0], + [0, 0], + [0, 0]] +def get_dummy_record_gts_monomorphic(): + return DummyCyvcf2Record( + gts=dummy_record_gts_monomorphic, + ref="CAGCAGCAG", + alt=[] + ) + triploid_gts = np.array([ [0, 0, -2], [0, 0, -2], @@ -341,6 +353,17 @@ def test_TRRecord_GetGenotypes_Dosages(): assert np.all(rec.GetDosages(dosagetype=trh.TRDosageTypes.beagleap_norm) == true_beagle_norm_dosages) + monomorphic_record = get_dummy_record_gts_monomorphic() + monomorphic_record.FORMAT["AP1"] = np.array([[]]) + monomorphic_record.FORMAT["AP2"] = np.array([[]]) + rec = trh.TRRecord(monomorphic_record, monomorphic_record.REF, [], "CAG", "", None) + true_bestguess_norm_dosages = np.array([0, 0, 0, 0], dtype=np.float32) + assert np.all(rec.GetDosages(dosagetype=trh.TRDosageTypes.bestguess_norm) == + true_bestguess_norm_dosages) + true_beagleap_dosages = np.array([6, 6, 6, 6], dtype=np.float32) + assert np.all(rec.GetDosages(dosagetype=trh.TRDosageTypes.beagleap) == + true_beagleap_dosages) + # Test regular diploid record - Example with bestguess same as AP-based dosage diploid_record = get_dummy_record_diploid() diploid_record.FORMAT["AP1"] = np.array([[0, 0], [1, 0], [1, 0], [1, 0], [0, 1], [0, 0]]) diff --git a/trtools/utils/tr_harmonizer.py b/trtools/utils/tr_harmonizer.py index d48c287c..5fe8c8f7 100644 --- a/trtools/utils/tr_harmonizer.py +++ b/trtools/utils/tr_harmonizer.py @@ -1146,9 +1146,13 @@ def GetDosages(self, raise ValueError("Negative AP1 or AP2 fields detected") # Get haplotype dosages - max_alt_len = max(self.alt_allele_lengths) - h1_dos = np.clip(np.dot(ap1, self.alt_allele_lengths), 0, max_alt_len) - h2_dos = np.clip(np.dot(ap2, self.alt_allele_lengths), 0, max_alt_len) + if len(self.alt_allele_lengths) > 0: + max_alt_len = max(self.alt_allele_lengths) + h1_dos = np.clip(np.dot(ap1, self.alt_allele_lengths), 0, max_alt_len) + h2_dos = np.clip(np.dot(ap2, self.alt_allele_lengths), 0, max_alt_len) + else: + h1_dos = 0 + h2_dos = 0 ref1_dos = ref1*self.ref_allele_length ref2_dos = ref2*self.ref_allele_length