Skip to content

Commit

Permalink
mutect2 expose param (#8447)
Browse files Browse the repository at this point in the history
Allow users to turn off fix from 4.1.9.0 for consistency's sake
  • Loading branch information
ldgauthier authored and rickymagner committed Nov 28, 2023
1 parent ed33e87 commit e11853a
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import org.broadinstitute.hellbender.cmdline.ReadFilterArgumentDefinitions;
import org.broadinstitute.hellbender.engine.FeatureInput;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.FlowBasedAlignmentArgumentCollection;
import org.broadinstitute.hellbender.tools.FlowBasedArgumentCollection;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.*;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.FilterMutectCalls;
Expand Down Expand Up @@ -50,6 +49,7 @@ public class M2ArgumentCollection extends AssemblyBasedCallerArgumentCollection
public static final String CALLABLE_DEPTH_LONG_NAME = "callable-depth";
public static final String PCR_SNV_QUAL_LONG_NAME = "pcr-snv-qual";
public static final String PCR_INDEL_QUAL_LONG_NAME = "pcr-indel-qual";
public static final String MULTIPLE_SUBSTITUTION_BASE_QUAL_CORRECTION = "base-qual-correction-factor";
public static final String F1R2_TAR_GZ_NAME = "f1r2-tar-gz";

public static final double DEFAULT_AF_FOR_TUMOR_ONLY_CALLING = 5e-8;
Expand Down Expand Up @@ -249,6 +249,18 @@ public double getInitialLogOdds() {
@Argument(fullName = PCR_INDEL_QUAL_LONG_NAME, optional = true, doc = "Phred-scaled PCR indel qual for overlapping fragments")
public int pcrIndelQual = 40;

/**
* A scale factor to modify the base qualities reported by the sequencer and used in the Mutect2 substitution error model.
* Set to zero to turn off the error model changes included in GATK 4.1.9.0.
* Our pileup likelihoods models assume that the base quality (qual) corresponds to the probability that a ref base is misread
* as the *particular* alt base, whereas the qual actually means the probability of *any* substitution error.
* Since there are three possible substitutions for each ref base we must divide the error probability by three
* which corresponds to adding 10*log10(3) = 4.77 ~ 5 to the qual.
*/
@Advanced
@Argument(fullName = MULTIPLE_SUBSTITUTION_BASE_QUAL_CORRECTION, optional = true, doc = "Set to zero to turn off the error model changes included in GATK 4.1.9.0.")
public int activeRegionMultipleSubstitutionBaseQualCorrection = (int)Math.round(10 * Math.log10(3));

/**
* In tumor-only mode, we discard variants with population allele frequencies greater than this threshold.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ public final class Mutect2Engine implements AssemblyRegionEvaluator, AutoCloseab

private final Optional<F1R2CountsCollector> f1R2CountsCollector;

private PileupQualBuffer tumorPileupQualBuffer = new PileupQualBuffer();
private PileupQualBuffer normalPileupQualBuffer = new PileupQualBuffer();
private PileupQualBuffer tumorPileupQualBuffer;
private PileupQualBuffer normalPileupQualBuffer;

/**
* Create and initialize a new HaplotypeCallerEngine given a collection of HaplotypeCaller arguments, a reads header,
Expand All @@ -144,6 +144,9 @@ public Mutect2Engine(final M2ArgumentCollection MTAC, AssemblyRegionArgumentColl
aligner = SmithWatermanAligner.getAligner(MTAC.smithWatermanImplementation);
samplesList = new IndexedSampleList(new ArrayList<>(ReadUtils.getSamplesFromHeader(header)));

tumorPileupQualBuffer = new PileupQualBuffer(MTAC.activeRegionMultipleSubstitutionBaseQualCorrection);
normalPileupQualBuffer = new PileupQualBuffer(MTAC.activeRegionMultipleSubstitutionBaseQualCorrection);

// optimize set operations for the common cases of no normal and one normal
if (MTAC.normalSamples.isEmpty()) {
normalSamples = Collections.emptySet();
Expand Down Expand Up @@ -671,16 +674,14 @@ private static class PileupQualBuffer {
private static final int OTHER_SUBSTITUTION = 4;
private static final int INDEL = 5;

// our pileup likelihoods models assume that the qual corresponds to the probability that a ref base is misread
// as the *particular* alt base, whereas the qual actually means the probability of *any* substitution error.
// since there are three possible substitutions for each ref base we must divide the error probability by three
// which corresponds to adding 10*log10(3) = 4.77 ~ 5 to the qual.
private static final int ONE_THIRD_QUAL_CORRECTION = 5;
private static double MULTIPLE_SUBSTITUTION_BASE_QUAL_CORRECTION;

// indices 0-3 are A,C,G,T; 4 is other substitution (just in case it's some exotic protocol); 5 is indel
private List<ByteArrayList> buffers = IntStream.range(0,6).mapToObj(n -> new ByteArrayList()).collect(Collectors.toList());

public PileupQualBuffer() { }
public PileupQualBuffer(final double multipleSubstitutionQualCorrection) {
MULTIPLE_SUBSTITUTION_BASE_QUAL_CORRECTION = multipleSubstitutionQualCorrection;
}

public void accumulateQuals(final ReadPileup pileup, final byte refBase, final int pcrErrorQual) {
clear();
Expand Down Expand Up @@ -731,7 +732,7 @@ private void accumulateSubstitution(final byte base, final byte qual) {
if (index == -1) { // -1 is the hard-coded value for non-simple bases in BaseUtils
buffers.get(OTHER_SUBSTITUTION).add(qual);
} else {
buffers.get(index).add((byte) FastMath.min(qual + ONE_THIRD_QUAL_CORRECTION, QualityUtils.MAX_QUAL));
buffers.get(index).add((byte) FastMath.min(qual + MULTIPLE_SUBSTITUTION_BASE_QUAL_CORRECTION, QualityUtils.MAX_QUAL));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,6 @@ public void testSmallNumAltExact(final int numRef, final double errorRate) {

Assert.assertEquals(calculated, expected, precision);
}



}

@DataProvider(name = "fewAltData")
Expand Down

0 comments on commit e11853a

Please sign in to comment.