Skip to content

Commit

Permalink
fix: bug in beagle dosages with no alt alleles (#231)
Browse files Browse the repository at this point in the history
Co-authored-by: Melissa Gymrek <[email protected]>
  • Loading branch information
gymreklab and Melissa Gymrek authored Aug 29, 2024
1 parent 2b955be commit ae3ca15
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 3 deletions.
23 changes: 23 additions & 0 deletions trtools/utils/tests/test_trharmonizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,18 @@ def get_dummy_record_gts_allsamelen():
alt=["CAGCAACAG"]
)

dummy_record_gts_monomorphic = [
[0, 0],
[0, 0],
[0, 0],
[0, 0]]
def get_dummy_record_gts_monomorphic():
return DummyCyvcf2Record(
gts=dummy_record_gts_monomorphic,
ref="CAGCAGCAG",
alt=[]
)

triploid_gts = np.array([
[0, 0, -2],
[0, 0, -2],
Expand Down Expand Up @@ -341,6 +353,17 @@ def test_TRRecord_GetGenotypes_Dosages():
assert np.all(rec.GetDosages(dosagetype=trh.TRDosageTypes.beagleap_norm) ==
true_beagle_norm_dosages)

monomorphic_record = get_dummy_record_gts_monomorphic()
monomorphic_record.FORMAT["AP1"] = np.array([[]])
monomorphic_record.FORMAT["AP2"] = np.array([[]])
rec = trh.TRRecord(monomorphic_record, monomorphic_record.REF, [], "CAG", "", None)
true_bestguess_norm_dosages = np.array([0, 0, 0, 0], dtype=np.float32)
assert np.all(rec.GetDosages(dosagetype=trh.TRDosageTypes.bestguess_norm) ==
true_bestguess_norm_dosages)
true_beagleap_dosages = np.array([6, 6, 6, 6], dtype=np.float32)
assert np.all(rec.GetDosages(dosagetype=trh.TRDosageTypes.beagleap) ==
true_beagleap_dosages)

# Test regular diploid record - Example with bestguess same as AP-based dosage
diploid_record = get_dummy_record_diploid()
diploid_record.FORMAT["AP1"] = np.array([[0, 0], [1, 0], [1, 0], [1, 0], [0, 1], [0, 0]])
Expand Down
10 changes: 7 additions & 3 deletions trtools/utils/tr_harmonizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1146,9 +1146,13 @@ def GetDosages(self,
raise ValueError("Negative AP1 or AP2 fields detected")

# Get haplotype dosages
max_alt_len = max(self.alt_allele_lengths)
h1_dos = np.clip(np.dot(ap1, self.alt_allele_lengths), 0, max_alt_len)
h2_dos = np.clip(np.dot(ap2, self.alt_allele_lengths), 0, max_alt_len)
if len(self.alt_allele_lengths) > 0:
max_alt_len = max(self.alt_allele_lengths)
h1_dos = np.clip(np.dot(ap1, self.alt_allele_lengths), 0, max_alt_len)
h2_dos = np.clip(np.dot(ap2, self.alt_allele_lengths), 0, max_alt_len)
else:
h1_dos = 0
h2_dos = 0
ref1_dos = ref1*self.ref_allele_length
ref2_dos = ref2*self.ref_allele_length

Expand Down

0 comments on commit ae3ca15

Please sign in to comment.