Skip to content

Commit

Permalink
fixed small gene panel edge case for CalculateContamination (#6137)
Browse files Browse the repository at this point in the history
  • Loading branch information
davidbenjamin authored Sep 5, 2019
1 parent e684080 commit b6af0fa
Show file tree
Hide file tree
Showing 3 changed files with 1,964 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ public class ContaminationModel {
public static final double UNSCRUPULOUS_HOM_REF_ALLELE_FRACTION = 0.15;
public static final double UNSCRUPULOUS_HOM_REF_FRACTION_TO_REMOVE_FOR_POSSIBLE_LOH = 0.1;
public static final double UNSCRUPULOUS_HOM_REF_PERCENTILE = 100 * ( 1 - UNSCRUPULOUS_HOM_REF_FRACTION_TO_REMOVE_FOR_POSSIBLE_LOH);
public static final double MINIMUM_UNSCRUPULOUS_HOM_REF_ALT_FRACTION_THRESHOLD = 0.1;

public static final double MAF_STEP_SIZE = 0.04;
private final double contamination;
private final double errorRate;
Expand Down Expand Up @@ -101,12 +103,13 @@ public Pair<Double, Double> calculateContaminationFromHoms(final List<PileupSumm
} else {
result = calculateContamination(Strategy.UNSCRUPULOUS_HOM_REF, tumorSites, minMaf);
}
if (result.getRight() < (result.getLeft() * MIN_RELATIVE_ERROR + MIN_ABSOLUTE_ERROR)) {
if (!Double.isNaN(result.getLeft()) && result.getRight() < (result.getLeft() * MIN_RELATIVE_ERROR + MIN_ABSOLUTE_ERROR)) {
return result;
}
}

return calculateContamination(Strategy.UNSCRUPULOUS_HOM_REF, tumorSites, 0);
final Pair<Double, Double> result = calculateContamination(Strategy.UNSCRUPULOUS_HOM_REF, tumorSites, 0);
return Double.isNaN(result.getLeft()) ? Pair.of(0.0, 1.0) : result;
}

/**
Expand Down Expand Up @@ -134,8 +137,9 @@ private Pair<Double, Double> calculateContamination(final Strategy strategy, fin
final List<PileupSummary> candidateHomRefs = tumorSites.stream()
.filter(site -> site.getAltFraction() < UNSCRUPULOUS_HOM_REF_ALLELE_FRACTION)
.collect(Collectors.toList());
final double threshold = new Percentile(UNSCRUPULOUS_HOM_REF_PERCENTILE).evaluate(candidateHomRefs.stream().mapToDouble(PileupSummary::getAltFraction).toArray());
genotypingHoms = candidateHomRefs.stream().filter(site -> site.getAltFraction() < threshold).collect(Collectors.toList());
final double altFractionThreshold = Math.max(MINIMUM_UNSCRUPULOUS_HOM_REF_ALT_FRACTION_THRESHOLD,
new Percentile(UNSCRUPULOUS_HOM_REF_PERCENTILE).evaluate(candidateHomRefs.stream().mapToDouble(PileupSummary::getAltFraction).toArray()));
genotypingHoms = candidateHomRefs.stream().filter(site -> site.getAltFraction() <= altFractionThreshold).collect(Collectors.toList());
}
final List<PileupSummary> homs = subsetSites(tumorSites, genotypingHoms);
final double tumorErrorRate = calculateErrorRate(tumorSites);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@
* Created by David Benjamin on 2/16/17.
*/
public class CalculateContaminationIntegrationTest extends CommandLineProgramTest {
public static final File SPIKEIN_DATA_DIRECTORY = new File(getTestDataDir(), "calculatecontamination");
public static final File NA12891_1_PCT_NA12892_99_PCT = new File(SPIKEIN_DATA_DIRECTORY, "NA12891_0.01_NA12892_0.99.table");
public static final File NA12891_3_PCT_NA12892_97_PCT = new File(SPIKEIN_DATA_DIRECTORY, "NA12891_0.03_NA12892_0.97.table");
public static final File NA12891_5_PCT_NA12892_95_PCT = new File(SPIKEIN_DATA_DIRECTORY, "NA12891_0.05_NA12892_0.95.table");
public static final File NA12891_8_PCT_NA12892_92_PCT = new File(SPIKEIN_DATA_DIRECTORY, "NA12891_0.08_NA12892_0.92.table");
public static final File CONTAMINATION_TEST_DATA_DIRECTORY = new File(getTestDataDir(), "calculatecontamination");
public static final File NA12891_1_PCT_NA12892_99_PCT = new File(CONTAMINATION_TEST_DATA_DIRECTORY, "NA12891_0.01_NA12892_0.99.table");
public static final File NA12891_3_PCT_NA12892_97_PCT = new File(CONTAMINATION_TEST_DATA_DIRECTORY, "NA12891_0.03_NA12892_0.97.table");
public static final File NA12891_5_PCT_NA12892_95_PCT = new File(CONTAMINATION_TEST_DATA_DIRECTORY, "NA12891_0.05_NA12892_0.95.table");
public static final File NA12891_8_PCT_NA12892_92_PCT = new File(CONTAMINATION_TEST_DATA_DIRECTORY, "NA12891_0.08_NA12892_0.92.table");
public static final double BASELINE_CONTAMINATION_OF_NA12892 = 0.001;

@DataProvider(name = "includeHomAlts")
Expand Down Expand Up @@ -152,4 +152,21 @@ public void testMatchedNormal() {
final double calculatedContamination = ContaminationRecord.readFromFile(contaminationTable).get(0).getContamination();
Assert.assertEquals(calculatedContamination, contamination, 0.01);
}

@Test
public void testSmallGenePanelWithNoHomAlts() {
final File inputPileups = new File(CONTAMINATION_TEST_DATA_DIRECTORY, "small_gene_panel.pileups");
final File contaminationTable = createTempFile("contamination", ".table");

final String[] args = {
"-I", inputPileups.getAbsolutePath(),
"-O", contaminationTable.getAbsolutePath(),
};
runCommandLine(args);

final double calculatedContamination = ContaminationRecord.readFromFile(contaminationTable).get(0).getContamination();

Assert.assertFalse(Double.isNaN(calculatedContamination));
Assert.assertTrue(calculatedContamination < 0.2);
}
}
Loading

0 comments on commit b6af0fa

Please sign in to comment.