Skip to content

Commit

Permalink
Fixed bug in calculation of p-values for exact test of Hardy-Weinberg…
Browse files Browse the repository at this point in the history
… equilibrium and corresponding unit tests.
  • Loading branch information
samuelklee committed Aug 4, 2021
1 parent 796af9f commit 17e096d
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 124 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -82,16 +82,16 @@ public Map<String, Object> annotate(final ReferenceContext ref,
static Pair<Integer, Double> calculateEH(final VariantContext vc, final GenotypesContext genotypes) {
final GenotypeCounts t = GenotypeUtils.computeDiploidGenotypeCounts(vc, genotypes, ROUND_GENOTYPE_COUNTS);
// number of samples that have likelihoods
final int sampleCount = (int) genotypes.stream().filter(g->GenotypeUtils.isDiploidWithLikelihoods(g)).count();
final int sampleCount = (int) genotypes.stream().filter(GenotypeUtils::isDiploidWithLikelihoods).count();

return calculateEH(vc, t, sampleCount);
return calculateEH(t, sampleCount);
}

@VisibleForTesting
public static Pair<Integer, Double> calculateEH(final VariantContext vc, final GenotypeCounts t, final int sampleCount) {
final int refCount = (int)t.getRefs();
final int hetCount = (int)t.getHets();
final int homCount = (int)t.getHoms();
public static Pair<Integer, Double> calculateEH(final GenotypeCounts t, final int sampleCount) {
final int refCount = (int) t.getRefs();
final int hetCount = (int) t.getHets();
final int homCount = (int) t.getHoms();

final double pval = exactTest(hetCount, refCount, homCount);

Expand Down Expand Up @@ -137,11 +137,6 @@ static double exactTest(final int hetCount, final int refCount, final int homCou
final int rareCopies = 2 * obsHomR + hetCount;
final int N = hetCount + obsHomC + obsHomR;

//If the probability distribution has only 1 point, then the mid p-value is .5
if (rareCopies <= 1) {
return .5;
}

final double[] probs = new double[rareCopies + 1];

//Find (something close to the) mode for the midpoint
Expand Down Expand Up @@ -193,20 +188,19 @@ static double exactTest(final int hetCount, final int refCount, final int homCou
currHomC = currHomC - 1;
}

final double rightPval = probs[hetCount] / (2.0 * mysum);
final double rightPval = Math.min(1., probs[hetCount] / mysum);
//Check if we observed the highest possible number of hets
if (hetCount == rareCopies) {
return rightPval;
}
return rightPval + StatUtils.sum(Arrays.copyOfRange(probs, hetCount + 1, probs.length)) / mysum;
return Math.min(1., rightPval + StatUtils.sum(Arrays.copyOfRange(probs, hetCount + 1, probs.length)) / mysum);
}

@Override
public List<String> getKeyNames() {
return Collections.singletonList(GATKVCFConstants.EXCESS_HET_KEY);
}

//@Override
public String getRawKeyName() {
return GATKVCFConstants.RAW_GENOTYPE_COUNT_KEY;
}
Expand All @@ -218,8 +212,7 @@ public String getRawKeyName() {
* @param vc the variant context to annotate
* @param likelihoods likelihoods indexed by sample, allele, and read within sample
*/
//@Override
public Map<String, Object> annotateRawData(ReferenceContext ref, VariantContext vc, AlleleLikelihoods<GATKRead, Allele> likelihoods) {
public static Map<String, Object> annotateRawData(ReferenceContext ref, VariantContext vc, AlleleLikelihoods<GATKRead, Allele> likelihoods) {
return null;
}

Expand All @@ -231,8 +224,7 @@ public Map<String, Object> annotateRawData(ReferenceContext ref, VariantContext
* @param listOfRawData The raw data for all the variants being combined/merged
* @return A key, Object map containing the annotations generated by this combine operation
*/
//@Override
public Map<String, Object> combineRawData(List<Allele> allelesList, List<ReducibleAnnotationData<?>> listOfRawData) {
public static Map<String, Object> combineRawData(List<Allele> allelesList, List<ReducibleAnnotationData<?>> listOfRawData) {
return null;
}

Expand All @@ -243,15 +235,14 @@ public Map<String, Object> combineRawData(List<Allele> allelesList, List<Reducib
* @param originalVC -- used to get all the alleles for all gVCFs
* @return A key, Object map containing the finalized annotations generated by this operation to be added to the code
*/
//@Override
public Map<String, Object> finalizeRawData(VariantContext vc, VariantContext originalVC) {
if (vc.hasAttribute(getRawKeyName())) {
List<Integer> counts = vc.getAttributeAsIntList(getRawKeyName(), 0);
if (counts.size() != NUMBER_OF_GENOTYPE_COUNTS) {
throw new IllegalStateException("Genotype counts for ExcessHet (" + getRawKeyName() + ") should have three values: homozygous reference, heterozygous with one ref allele, and homozygous variant/heterozygous non-reference");
}
final GenotypeCounts t = new GenotypeCounts(counts.get(0), counts.get(1), counts.get(2));
final Pair<Integer, Double> sampleCountEH = calculateEH(vc, t, counts.get(0)+counts.get(1)+counts.get(2));
final Pair<Integer, Double> sampleCountEH = calculateEH(t, counts.get(0)+counts.get(1)+counts.get(2));
final int sampleCount = sampleCountEH.getLeft();
final double eh = sampleCountEH.getRight();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ else if (variant.hasAttribute(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY)) {
final int refCount = Math.max(numCalledAlleles / 2 - gtCounts.get(1) - gtCounts.get(2), 0);
//homRefs don't get counted properly because ref blocks aren't annotated
gtCounts.set(0, refCount);
final Pair<Integer, Double> eh = ExcessHet.calculateEH(variant, new GenotypeCounts(gtCounts.get(0), gtCounts.get(1), gtCounts.get(2)), numCalledAlleles / 2);
final Pair<Integer, Double> eh = ExcessHet.calculateEH(new GenotypeCounts(gtCounts.get(0), gtCounts.get(1), gtCounts.get(2)), numCalledAlleles / 2);
vcfBuilder.attribute(GATKVCFConstants.EXCESS_HET_KEY, String.format("%.4f", eh.getRight()));
vcfBuilder.rmAttribute(GATKVCFConstants.RAW_GENOTYPE_COUNT_KEY);
if (annotationDBBuilder != null) {
Expand Down
Loading

0 comments on commit 17e096d

Please sign in to comment.