Skip to content

Commit

Permalink
Cap likelihoods symmetrically (#6196)
Browse files Browse the repository at this point in the history
* Cap likelihoods symmetrically to fix het GT calls with >90% alt
reads
  • Loading branch information
ldgauthier authored Oct 23, 2019
1 parent f1272b5 commit b2e219b
Show file tree
Hide file tree
Showing 42 changed files with 952 additions and 211 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,8 @@ public static CachingIndexedFastaSequenceFile createReferenceReader(final String
* @return never {@code null}.
*/
public static ReadLikelihoodCalculationEngine createLikelihoodCalculationEngine(final LikelihoodEngineArgumentCollection likelihoodArgs) {
final double log10GlobalReadMismappingRate = likelihoodArgs.phredScaledGlobalReadMismappingRate < 0 ? - Double.MAX_VALUE
//AlleleLikelihoods::normalizeLikelihoods uses Double.NEGATIVE_INFINITY as a flag to disable capping
final double log10GlobalReadMismappingRate = likelihoodArgs.phredScaledGlobalReadMismappingRate < 0 ? Double.NEGATIVE_INFINITY
: QualityUtils.qualToErrorProbLog10(likelihoodArgs.phredScaledGlobalReadMismappingRate);

switch ( likelihoodArgs.likelihoodEngineImplementation) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -352,9 +352,10 @@ public void normalizeLikelihoods(final double maximumLikelihoodDifferenceCap) {
private void normalizeLikelihoodsPerEvidence(final double maximumBestAltLikelihoodDifference,
final double[][] sampleValues, final int sampleIndex, final int evidenceIndex) {

final BestAllele bestAlternativeAllele = searchBestAllele(sampleIndex,evidenceIndex,false);
//allow the best allele to be the reference because asymmetry leads to strange artifacts like het calls with >90% alt reads
final BestAllele bestAllele = searchBestAllele(sampleIndex,evidenceIndex,true);

final double worstLikelihoodCap = bestAlternativeAllele.likelihood + maximumBestAltLikelihoodDifference;
final double worstLikelihoodCap = bestAllele.likelihood + maximumBestAltLikelihoodDifference;

final int alleleCount = alleles.numberOfAlleles();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@
import java.util.stream.Collectors;

public class CombineGVCFsIntegrationTest extends CommandLineProgramTest {
// If true, update the expected outputs in tests that assert an exact match vs. prior output,
// instead of actually running the tests. Can be used with "./gradlew test -Dtest.single=HaplotypeCallerIntegrationTest"
// to update all of the exact-match tests at once. After you do this, you should look at the
// diffs in the new expected outputs in git to confirm that they are consistent with expectations.
public static final boolean UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS = false;

private static final List<String> NO_EXTRA_ARGS = Collections.emptyList();
private static final List<String> ATTRIBUTES_TO_IGNORE = Arrays.asList(
"RAW_MQ", //MQ data format and key have changed since GATK3
Expand All @@ -49,6 +55,14 @@ private static <T> void assertForEachElementInLists(final List<T> actual, final
}
}

/*
* Make sure that someone didn't leave the UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS toggle turned on
*/
@Test
public void assertThatExpectedOutputUpdateToggleIsDisabled() {
Assert.assertFalse(UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS, "The toggle to update expected outputs should not be left enabled");
}

@DataProvider
public Object[][] gvcfsToCombine() {
return new Object[][]{
Expand Down Expand Up @@ -134,8 +148,8 @@ public void compareToGATK3(File[] inputs, File outputFile, List<String> extraArg
}

@Test(dataProvider = "gvcfsToCombine")
public void compareToGATK3ExpectedResults(File[] inputs, File outputFile, List<String> extraArgs, String reference) throws IOException, NoSuchAlgorithmException {
assertVariantContextsMatch(Arrays.asList(inputs), outputFile, extraArgs, reference, ATTRIBUTES_TO_IGNORE);
public void compareToExpectedResults(File[] inputs, File expected, List<String> extraArgs, String reference) throws IOException, NoSuchAlgorithmException {
assertVariantContextsMatch(Arrays.asList(inputs), expected, extraArgs, reference, ATTRIBUTES_TO_IGNORE);
}

public static void runProcess(ProcessController processController, String[] command) {
Expand All @@ -161,7 +175,7 @@ public void runCombineGVCFSandAssertSomething(List<File> inputs, File expected,

final ArgumentsBuilder args = new ArgumentsBuilder();
args.addReference(new File(reference))
.addOutput(output);
.addOutput(UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ? expected : output);
for (File input: inputs) {
args.addArgument("V", input.getAbsolutePath());
}
Expand All @@ -173,9 +187,11 @@ public void runCombineGVCFSandAssertSomething(List<File> inputs, File expected,
Utils.resetRandomGenerator();
runCommandLine(args);

final List<VariantContext> expectedVC = getVariantContexts(expected);
final List<VariantContext> actualVC = getVariantContexts(output);
assertForEachElementInLists(actualVC, expectedVC, assertion);
if (!UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS) {
final List<VariantContext> expectedVC = getVariantContexts(expected);
final List<VariantContext> actualVC = getVariantContexts(output);
assertForEachElementInLists(actualVC, expectedVC, assertion);
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.hellbender.utils.variant.HomoSapiensConstants;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
Expand Down Expand Up @@ -1220,6 +1221,35 @@ public void testMaxAlternateAlleles(final String bam, final String reference, fi
}
}

@Test
public void testContaminatedHomVarDeletions() {
final String bam = toolsTestDir + "haplotypecaller/deletion_sample.snippet.bam";
final String intervals = "chr3:46373452";

final File outputContaminatedHomVarDeletions = createTempFile("testContaminatedHomVarDeletions", ".vcf");

final ArgumentsBuilder args = new ArgumentsBuilder().addInput(new File(bam))
.addReference(hg38Reference)
.addInterval(new SimpleInterval(intervals))
.addOutput(outputContaminatedHomVarDeletions)
.addNumericArgument(IntervalArgumentCollection.INTERVAL_PADDING_LONG_NAME, 50);
runCommandLine(args);

List<VariantContext> vcs = VariantContextTestUtils.getVariantContexts(outputContaminatedHomVarDeletions);

//check known homozygous deletion for correct genotype
for (final VariantContext vc : vcs) {
final Genotype gt = vc.getGenotype(0);
if (gt.hasAD()) {
final int[] ads = gt.getAD();
final double alleleBalance = ads[1] / (ads[0] + ads[1]);
if (alleleBalance > 0.9) {
Assert.assertTrue(vc.getGenotype(0).isHomVar());
}
}
}
}

/**
* Helper method for testMaxAlternateAlleles
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,19 +282,18 @@ public void testNormalizeCapWorstLK(final String[] samples, final Allele[] allel
for (int a = 0; a < numberOfAlleles; a++)
newLikelihoods[s][a] = new double[sampleReadCount];
for (int r = 0; r < sampleReadCount; r++) {
double bestAltLk = Double.NEGATIVE_INFINITY;
double bestAlleleLk = Double.NEGATIVE_INFINITY;
//best likelihood can be alt OR reference
for (int a = 0; a < numberOfAlleles; a++) {
if (alleles[a].isReference())
continue;
bestAltLk = Math.max(bestAltLk, originalLikelihoods[s][a][r]);
bestAlleleLk = Math.max(bestAlleleLk, originalLikelihoods[s][a][r]);
}
if (bestAltLk == Double.NEGATIVE_INFINITY)
if (bestAlleleLk == Double.NEGATIVE_INFINITY)
for (int a = 0; a < numberOfAlleles; a++) {
newLikelihoods[s][a][r] = originalLikelihoods[s][a][r];
}
else
for (int a = 0; a < numberOfAlleles; a++) {
newLikelihoods[s][a][r] = Math.max(originalLikelihoods[s][a][r], bestAltLk - 0.001);
newLikelihoods[s][a][r] = Math.max(originalLikelihoods[s][a][r], bestAlleleLk - 0.001);
}
}
}
Expand Down
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit b2e219b

Please sign in to comment.