Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed M2 bug where triallelic normal artifacts were sometimes hidden from filtering #4809

Merged
merged 1 commit into from
May 25, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public class M2ArgumentCollection extends AssemblyBasedCallerArgumentCollection
public static final String GERMLINE_RESOURCE_LONG_NAME = "germline-resource";
public static final String DEFAULT_AF_LONG_NAME = "af-of-alleles-not-in-resource";
public static final String DEFAULT_AF_SHORT_NAME = "default-af";
public static final String EMISSION_LOG_LONG_NAME = "tumor-lod-to-emit";
public static final String EMISSION_LOD_LONG_NAME = "tumor-lod-to-emit";
public static final String EMISSION_LOG_SHORT_NAME = "emit-lod";
public static final String INITIAL_TUMOR_LOD_LONG_NAME = "initial-tumor-lod";
public static final String INITIAL_TUMOR_LOD_SHORT_NAME = "init-lod";
Expand Down Expand Up @@ -105,14 +105,14 @@ public double getDefaultAlleleFrequency() {
* Default setting of 3 is permissive and will emit some amount of negative training data that
* {@link FilterMutectCalls} should then filter.
*/
@Argument(fullName = EMISSION_LOG_LONG_NAME, shortName = EMISSION_LOG_SHORT_NAME, optional = true, doc = "LOD threshold to emit tumor variant to VCF.")
public double emissionLodThreshold = 3.0;
@Argument(fullName = EMISSION_LOD_LONG_NAME, shortName = EMISSION_LOG_SHORT_NAME, optional = true, doc = "LOD threshold to emit tumor variant to VCF.")
public double emissionLod = 3.0;

/**
* Only variants with estimated tumor LODs exceeding this threshold will be considered active.
*/
@Argument(fullName = INITIAL_TUMOR_LOD_LONG_NAME, shortName = INITIAL_TUMOR_LOD_SHORT_NAME, optional = true, doc = "LOD threshold to consider pileup active.")
public double initialTumorLodThreshold = 2.0;
public double initialTumorLod = 2.0;

/**
* In tumor-only mode, we discard variants with population allele frequencies greater than this threshold.
Expand Down Expand Up @@ -140,7 +140,7 @@ public double getDefaultAlleleFrequency() {
* but may also increase calling false positive, i.e. germline, variants.
*/
@Argument(fullName = NORMAL_LOD_LONG_NAME, optional = true, doc = "LOD threshold for calling normal variant non-germline.")
public double normalLodThreshold = 2.2;
public double normalLod = 2.2;

/**
* Two or more phased substitutions separated by this distance or less are merged into MNPs.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ public ActivityProfileState isActive(final AlignmentContext context, final Refer
final double tumorLog10Odds = -QualityUtils.qualToErrorProbLog10(tumorAltCountAndQualSum.getSecond()) +
MathUtils.log10Factorial(tumorAltCount) + MathUtils.log10Factorial(tumorRefCount) - MathUtils.log10Factorial(tumorPileup.size() + 1);

if (tumorLog10Odds < MTAC.initialTumorLodThreshold) {
if (tumorLog10Odds < MTAC.initialTumorLod) {
return new ActivityProfileState(refInterval, 0.0);
} else if (hasNormal() && !MTAC.genotypeGermlineSites) {
final ReadPileup normalPileup = pileup.getPileupForSample(normalSample, header);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,30 +126,39 @@ public CalledHaplotypes callMutations(
final Optional<PerAlleleCollection<Double>> normalLog10Odds = getForNormal(() -> diploidAltLog10Odds(log10NormalMatrix.get()));
final Optional<PerAlleleCollection<Double>> normalArtifactLog10Odds = getForNormal(() -> somaticLog10Odds(log10NormalMatrix.get()));

final Set<Allele> allelesConsistentWithGivenAlleles = getAllelesConsistentWithGivenAlleles(givenAlleles, loc, mergedVC);
final List<Allele> somaticAltAlleles = mergedVC.getAlternateAlleles().stream()
.filter(allele -> allelesConsistentWithGivenAlleles.contains(allele) ||
((tumorLog10Odds.getAlt(allele) > MTAC.emissionLodThreshold) &&
(!hasNormal || MTAC.genotypeGermlineSites || normalLog10Odds.get().getAlt(allele) > MTAC.normalLodThreshold)))
final Set<Allele> forcedAlleles = getAllelesConsistentWithGivenAlleles(givenAlleles, loc, mergedVC);

final List<Allele> tumorAltAlleles = mergedVC.getAlternateAlleles().stream()
.filter(allele -> forcedAlleles.contains(allele) || tumorLog10Odds.getAlt(allele) > MTAC.emissionLod)
.collect(Collectors.toList());
final List<Allele> allSomaticAlleles = ListUtils.union(Arrays.asList(mergedVC.getReference()), somaticAltAlleles);
if (somaticAltAlleles.isEmpty()) {

final long somaticAltCount = tumorAltAlleles.stream()
.filter(allele -> forcedAlleles.contains(allele) || !hasNormal || MTAC.genotypeGermlineSites || normalLog10Odds.get().getAlt(allele) > MTAC.normalLod)
.count();

// if every alt allele is germline, skip this variant. However, if some alt alleles are germline and others
// are not we emit them all so that the filtering engine can see them
if (somaticAltCount == 0) {
continue;
}

final LikelihoodMatrix<Allele> subsettedLog10TumorMatrix = new SubsettedLikelihoodMatrix<>(log10TumorMatrix, allSomaticAlleles);

final List<Allele> allAllelesToEmit = ListUtils.union(Arrays.asList(mergedVC.getReference()), tumorAltAlleles);


final LikelihoodMatrix<Allele> subsettedLog10TumorMatrix = new SubsettedLikelihoodMatrix<>(log10TumorMatrix, allAllelesToEmit);
final Optional<LikelihoodMatrix<Allele>> subsettedLog10NormalMatrix =
getForNormal(() -> new SubsettedLikelihoodMatrix<>(log10NormalMatrix.get(), allSomaticAlleles));
getForNormal(() -> new SubsettedLikelihoodMatrix<>(log10NormalMatrix.get(), allAllelesToEmit));

final Map<String, Object> populationAFAnnotation = GermlineProbabilityCalculator.getPopulationAFAnnotation(featureContext.getValues(MTAC.germlineResource, loc), somaticAltAlleles, MTAC.getDefaultAlleleFrequency());
final Map<String, Object> populationAFAnnotation = GermlineProbabilityCalculator.getPopulationAFAnnotation(featureContext.getValues(MTAC.germlineResource, loc), tumorAltAlleles, MTAC.getDefaultAlleleFrequency());

final VariantContextBuilder callVcb = new VariantContextBuilder(mergedVC)
.alleles(allSomaticAlleles)
.alleles(allAllelesToEmit)
.attributes(populationAFAnnotation)
.attribute(GATKVCFConstants.TUMOR_LOD_KEY, somaticAltAlleles.stream().mapToDouble(tumorLog10Odds::getAlt).toArray());
.attribute(GATKVCFConstants.TUMOR_LOD_KEY, tumorAltAlleles.stream().mapToDouble(tumorLog10Odds::getAlt).toArray());

normalLog10Odds.ifPresent(values -> callVcb.attribute(GATKVCFConstants.NORMAL_LOD_KEY, values.asDoubleArray(somaticAltAlleles)));
normalArtifactLog10Odds.ifPresent(values -> callVcb.attribute(GATKVCFConstants.NORMAL_ARTIFACT_LOD_ATTRIBUTE, values.asDoubleArray(somaticAltAlleles)));
normalLog10Odds.ifPresent(values -> callVcb.attribute(GATKVCFConstants.NORMAL_LOD_KEY, values.asDoubleArray(tumorAltAlleles)));
normalArtifactLog10Odds.ifPresent(values -> callVcb.attribute(GATKVCFConstants.NORMAL_ARTIFACT_LOD_ATTRIBUTE, values.asDoubleArray(tumorAltAlleles)));

if (!featureContext.getValues(MTAC.pon, mergedVC.getStart()).isEmpty()) {
callVcb.attribute(GATKVCFConstants.IN_PON_VCF_ATTRIBUTE, true);
Expand Down