Skip to content

Commit

Permalink
fix(namedallelematcher): improve DPYD calling
Browse files Browse the repository at this point in the history
* handle undocumented variations like other toxic genes
* take partials into account to rule out potential Reference haplotype match
  • Loading branch information
markwoon committed Jul 14, 2023
1 parent 70cc730 commit 57b2bd8
Show file tree
Hide file tree
Showing 5 changed files with 950 additions and 788 deletions.
1 change: 1 addition & 0 deletions docs/methods/NamedAlleleMatcher-201.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ A different approach is taken for genes for which the defined variants affect dr
variant calls are encountered, they will be treated as reference. This applies to:

* CACNA1S
* DPYD
* G6PD
* NUDT15
* RYR1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ public class NamedAlleleMatcher {
// CHANGES TO THIS LIST OF GENES SHOULD ALSO BE REFLECTED IN DOCUMENTATION IN NamedAlleleMatcher-201.md
public static final List<String> TREAT_UNDOCUMENTED_VARIATIONS_AS_REFERENCE = List.of(
"CACNA1S",
"DPYD",
"G6PD",
"NUDT15",
"RYR1",
Expand Down Expand Up @@ -294,31 +295,29 @@ private void callDpyd(String sampleId, SortedMap<String, SampleAllele> alleleMap
comboData = initializeCallData(sampleId, alleleMap, gene, false, true);
}
SortedSet<HaplotypeMatch> hapMatches = comboData.comparePermutations();
// Reference/Reference matches would have been handled above
// if we get to this point, it's combination that might include Reference
// if there's only 1 match and it's Reference match, it should be a no call
// if there are more than 2, strip out Reference because we prioritize non-Reference if possible
if (hapMatches.size() != 2) {
hapMatches = hapMatches.stream()
.filter(m -> !m.getName().equals("Reference"))
.collect(Collectors.toCollection(TreeSet::new));
}
if (hapMatches.size() == 0) {
resultBuilder.haplotypes(gene, refData, new ArrayList<>(hapMatches));
return;
}

// have to compute diplotypes so that we can check for homozygous and partials
List<DiplotypeMatch> matches = new DiplotypeMatcher(comboData)
.compute(true, getTopCandidateOnly(gene));
Set<String> homozygous = new HashSet<>();
int numPartials = 0;
for (DiplotypeMatch dm : matches) {
if (dm.getHaplotype1().getHaplotype().isPartial() ||
(dm.getHaplotype2() != null && dm.getHaplotype2().getHaplotype().isPartial())) {
numPartials += 1;
}

Map<String, Integer> haps = new HashMap<>();
for (String h : dm.getHaplotype1().getHaplotypeNames()) {
haps.compute(h, (k, v) -> v == null ? 1 : v + 1);
if (!dm.getHaplotype1().getHaplotype().isPartial() || !h.equals("Reference")) {
haps.compute(h, (k, v) -> v == null ? 1 : v + 1);
}
}
if (dm.getHaplotype2() != null) {
for (String h : dm.getHaplotype2().getHaplotypeNames()) {
haps.compute(h, (k, v) -> v == null ? 1 : v + 1);
if (!dm.getHaplotype2().getHaplotype().isPartial() || !h.equals("Reference")) {
haps.compute(h, (k, v) -> v == null ? 1 : v + 1);
}
}
}
for (String k : haps.keySet()) {
Expand All @@ -328,6 +327,16 @@ private void callDpyd(String sampleId, SortedMap<String, SampleAllele> alleleMap
}
}

// Reference/Reference matches would have been handled in effectively phased section
// if we get to this point, it's combination that might include Reference
// if there are more than 2 haplotype matches, strip out Reference because we prioritize non-Reference if possible
// with 2 or fewer haplotype matches, cannot have reference if there's a partial
if (hapMatches.size() > 2 || numPartials > 0) {
hapMatches = hapMatches.stream()
.filter(m -> !m.getName().equals("Reference"))
.collect(Collectors.toCollection(TreeSet::new));
}

List<HaplotypeMatch> finalHaps = new ArrayList<>();
for (HaplotypeMatch hm : hapMatches) {
finalHaps.add(hm);
Expand Down
Loading

0 comments on commit 57b2bd8

Please sign in to comment.