Skip to content

Commit

Permalink
clean-up after rebase
Browse files Browse the repository at this point in the history
  • Loading branch information
davidbenjamin committed Feb 28, 2019
1 parent caf0bca commit 5a74c30
Show file tree
Hide file tree
Showing 21 changed files with 2,056 additions and 2,072 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
import htsjdk.samtools.util.Locatable;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.*;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import htsjdk.variant.vcf.VCFStandardHeaderLines;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.ArgumentCollection;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
Expand All @@ -16,13 +19,13 @@
import org.broadinstitute.hellbender.engine.MultiVariantWalkerGroupedOnStart;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.walkers.annotator.*;
import org.broadinstitute.hellbender.tools.walkers.mutect.M2FiltersArgumentCollection;
import org.broadinstitute.hellbender.tools.walkers.mutect.Mutect2FilteringEngine;
import org.broadinstitute.hellbender.tools.walkers.annotator.Annotation;
import org.broadinstitute.hellbender.tools.walkers.annotator.StandardAnnotation;
import org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.Mutect2FilteringEngine;
import org.broadinstitute.hellbender.utils.IntervalUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.genotyper.IndexedSampleList;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import org.broadinstitute.hellbender.tools.walkers.genotyper.*;
import org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.GeneralPloidyFailOverAFCalculatorProvider;
import org.broadinstitute.hellbender.tools.walkers.mutect.M2ArgumentCollection;
import org.broadinstitute.hellbender.tools.walkers.mutect.M2FiltersArgumentCollection;
import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.M2FiltersArgumentCollection;
import org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
Expand Down Expand Up @@ -118,7 +118,7 @@ public final class GenotypeGVCFs extends VariantLocusWalker {
*/
@Argument(fullName=M2ArgumentCollection.EMISSION_LOD_LONG_NAME, shortName = M2ArgumentCollection.EMISSION_LOG_SHORT_NAME,
doc = "LOD threshold to emit variant to VCF.")
protected double tlodThreshold = 0.8 * M2FiltersArgumentCollection.DEFAULT_TLOD_FILTER_THRESHOLD; //allow for some lower quality variants
protected double tlodThreshold = 3.5; //allow for some lower quality variants


/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,18 @@
import com.google.common.primitives.Ints;
import htsjdk.samtools.util.Locatable;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.*;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLineCount;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils;
import org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AlleleSpecificAnnotationData;
import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.ReducibleAnnotationData;
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator;
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculators;
import org.broadinstitute.hellbender.tools.walkers.mutect.Mutect2FilteringEngine;
import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.Mutect2FilteringEngine;
import org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.logging.OneShotLogger;
Expand Down Expand Up @@ -44,8 +47,6 @@ public final class ReferenceConfidenceVariantContextMerger {

private static final List<String> SOMATIC_FORMAT_ANNOTATIONS_TO_KEEP = Arrays.asList(
GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY,
GATKVCFConstants.STRAND_ARTIFACT_AF_KEY,
GATKVCFConstants.STRAND_ARTIFACT_POSTERIOR_KEY,
GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY,
GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY,
VCFConstants.PHASE_SET_KEY);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ public final class Mutect2Engine implements AssemblyRegionEvaluator {
private static final List<String> STANDARD_MUTECT_INFO_FIELDS = Arrays.asList(GATKVCFConstants.NORMAL_LOD_KEY, GATKVCFConstants.TUMOR_LOD_KEY, GATKVCFConstants.NORMAL_ARTIFACT_LOD_ATTRIBUTE,
GATKVCFConstants.EVENT_COUNT_IN_HAPLOTYPE_KEY, GATKVCFConstants.IN_PON_VCF_ATTRIBUTE, GATKVCFConstants.POPULATION_AF_VCF_ATTRIBUTE,
GATKVCFConstants.GERMLINE_QUAL_VCF_ATTRIBUTE, GATKVCFConstants.CONTAMINATION_QUAL_ATTRIBUTE, GATKVCFConstants.SEQUENCING_QUAL_VCF_ATTRIBUTE,
GATKVCFConstants.POLYMERASE_SLIPPAGE_QUAL_VCF_ATTRIBUTE,
GATKVCFConstants.STRAND_QUAL_VCF_ATTRIBUTE, GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY, GATKVCFConstants.N_COUNT_KEY, GATKVCFConstants.UNIQUE_ALT_READ_SET_COUNT_KEY, GATKVCFConstants.STRAND_ARTIFACT_POSTERIOR_KEY);
GATKVCFConstants.POLYMERASE_SLIPPAGE_QUAL_VCF_ATTRIBUTE, GATKVCFConstants.FORWARD_STRAND_COUNT_KEY,
GATKVCFConstants.STRAND_QUAL_VCF_ATTRIBUTE, GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY, GATKVCFConstants.N_COUNT_KEY, GATKVCFConstants.UNIQUE_ALT_READ_SET_COUNT_KEY);
private static final String MUTECT_VERSION = "2.2";

public static final String TUMOR_SAMPLE_KEY_IN_VCF_HEADER = "tumor_sample";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public FragmentLengthFilter(final double maxMedianFragmentLengthDifference) {

@Override
public boolean isArtifact(final VariantContext vc, final Mutect2FilteringEngine filteringEngine) {
final List<Integer> fragmentLengthByAllele = vc.getAttributeAsIntList(FragmentLength.KEY, 0);
final List<Integer> fragmentLengthByAllele = vc.getAttributeAsIntList(GATKVCFConstants.MEDIAN_FRAGMENT_LENGTH_KEY, 0);

return Math.abs(fragmentLengthByAllele.get(1) - fragmentLengthByAllele.get(0)) > maxMedianFragmentLengthDifference;
}
Expand All @@ -30,5 +30,5 @@ public String filterName() {
}

@Override
protected List<String> requiredAnnotations() { return Collections.singletonList(FragmentLength.KEY); }
protected List<String> requiredAnnotations() { return Collections.singletonList(GATKVCFConstants.MEDIAN_FRAGMENT_LENGTH_KEY); }
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public MappingQualityFilter(final double minMedianMappingQuality, final int long
public boolean isArtifact(final VariantContext vc, final Mutect2FilteringEngine filteringEngine) {
final List<Integer> indelLengths = vc.getIndelLengths();
final int indelLength = indelLengths == null ? 0 : indelLengths.stream().mapToInt(Math::abs).max().orElseGet(() -> 0);
final List<Integer> mappingQualityByAllele = vc.getAttributeAsIntList(MappingQuality.KEY, 0);
final List<Integer> mappingQualityByAllele = vc.getAttributeAsIntList(GATKVCFConstants.MEDIAN_MAPPING_QUALITY_KEY, 0);

// we use the mapping quality annotation of the alt allele in most cases, but for long indels we use the reference
// annotation. We have to do this because the indel, even if it maps uniquely, gets a poor mapping quality
Expand All @@ -37,5 +37,5 @@ public String filterName() {
}

@Override
protected List<String> requiredAnnotations() { return Collections.singletonList(MappingQuality.KEY); }
protected List<String> requiredAnnotations() { return Collections.singletonList(GATKVCFConstants.MEDIAN_MAPPING_QUALITY_KEY); }
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@ public class Mutect2FilteringEngine {
private final List<Mutect2VariantFilter> filters = new ArrayList<>();
private final Set<String> normalSamples;

public static final List<String> STANDARD_MUTECT_INFO_FIELDS_FOR_FILTERING = Arrays.asList(
GATKVCFConstants.MEDIAN_MAPPING_QUALITY_KEY,
GATKVCFConstants.MEDIAN_BASE_QUALITY_KEY,
GATKVCFConstants.MEDIAN_READ_POSITON_KEY,
GATKVCFConstants.MEDIAN_FRAGMENT_LENGTH_KEY,
GATKVCFConstants.FORWARD_STRAND_COUNT_KEY
);

/**
* DATA ACCUMULATED AND LEARNED ON EACH PASS OF {@link FilterMutectCalls}
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public ReadPositionFilter(final double minMedianReadPosition) {

@Override
public boolean isArtifact(final VariantContext vc, final Mutect2FilteringEngine filteringEngine) {
final List<Integer> readPositionByAllele = vc.getAttributeAsIntList(ReadPosition.KEY, 0);
final List<Integer> readPositionByAllele = vc.getAttributeAsIntList(GATKVCFConstants.MEDIAN_READ_POSITON_KEY, 0);

// a negative value is possible due to a bug: https://github.com/broadinstitute/gatk/issues/5492
return readPositionByAllele.get(0) > -1 && readPositionByAllele.get(0) < minMedianReadPosition;
Expand All @@ -31,5 +31,5 @@ public String filterName() {
}

@Override
protected List<String> requiredAnnotations() { return Collections.singletonList(ReadPosition.KEY); }
protected List<String> requiredAnnotations() { return Collections.singletonList(GATKVCFConstants.MEDIAN_READ_POSITON_KEY); }
}
Original file line number Diff line number Diff line change
Expand Up @@ -74,23 +74,6 @@ public double logProbability(int k) {
CombinatoricsUtils.binomialCoefficientLog(n,k) + Beta.logBeta(k+alpha, n - k + beta) - Beta.logBeta(alpha, beta);
}

/**
* @param k number of successes. Must be positive or zero.
* @return the value of the natural log pdf at k
*/
@Override
public double logProbability(int k) {

ParamUtils.isPositiveOrZero(k, "Number of successes must be greater than or equal to zero.");

// nchoosek * beta(k+alpha, n-k+beta)/ beta(alpha, beta)
// binomialcoefficient is "n choose k"
if (k > n) {
return Double.NEGATIVE_INFINITY;
}
return max(Double.NEGATIVE_INFINITY, CombinatoricsUtils.binomialCoefficientLog(n, k) + Beta.logBeta(k+alpha, n - k + beta) - Beta.logBeta(alpha, beta));
}

/**
* @param k number of successes. Must be positive or zero.
* @return the value of the cdf at k
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,11 @@ public final class GATKVCFConstants {
public static final String ORIGINAL_CONTIG_MISMATCH_KEY = "OCM";
public static final String N_COUNT_KEY = "NCount";
public static final String UNIQUE_ALT_READ_SET_COUNT_KEY = "UNIQ_ALT_READ_COUNT";
public static final String STRAND_ARTIFACT_POSTERIOR_KEY = "SAPP"; // Strand Artifact Filter
public static final String MEDIAN_BASE_QUALITY_KEY = "MBQ";
public static final String MEDIAN_MAPPING_QUALITY_KEY = "MMQ";
public static final String FORWARD_STRAND_COUNT_KEY = "FWD";
public static final String MEDIAN_FRAGMENT_LENGTH_KEY = "MFRL";
public static final String MEDIAN_READ_POSITON_KEY = "MPOS";

// FORMAT keys
public static final String ALLELE_BALANCE_KEY = "AB";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ public static VCFFormatHeaderLine getEquivalentFormatHeaderLine(final String inf
addFormatLine(new VCFFormatHeaderLine(ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele fractions of alternate alleles in the tumor"));
addFormatLine(new VCFFormatHeaderLine(F1R2_KEY, VCFHeaderLineCount.R, VCFHeaderLineType.Integer, "Count of reads in F1R2 pair orientation supporting each allele"));
addFormatLine(new VCFFormatHeaderLine(F2R1_KEY, VCFHeaderLineCount.R, VCFHeaderLineType.Integer, "Count of reads in F2R1 pair orientation supporting each allele"));
addFormatLine(new VCFFormatHeaderLine(STRAND_ARTIFACT_POSTERIOR_KEY, 3, VCFHeaderLineType.Float, "posterior probabilities of the presence of strand artifact"));
addFormatLine(new VCFFormatHeaderLine(ROF_POSTERIOR_KEY, 1, VCFHeaderLineType.Float, "posterior probability of read orientation-based artifacts"));
addFormatLine(new VCFFormatHeaderLine(ROF_PRIOR_KEY, 1, VCFHeaderLineType.Float, "prior probability of read orientation-based artifacts under the present referene context"));
addFormatLine(new VCFFormatHeaderLine(ROF_TYPE_KEY, 1, VCFHeaderLineType.String, "type of read orientation artifact (F1R2 or F2R1)"));
Expand Down Expand Up @@ -226,7 +225,7 @@ public static VCFFormatHeaderLine getEquivalentFormatHeaderLine(final String inf
addInfoLine(new VCFInfoHeaderLine(ORIGINAL_CONTIG_MISMATCH_KEY, 1, VCFHeaderLineType.Integer, "Number of alt reads whose original alignment doesn't match the current contig."));
addInfoLine(new VCFInfoHeaderLine(N_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Count of N bases in the pileup"));
addInfoLine(new VCFInfoHeaderLine(UNIQUE_ALT_READ_SET_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of ALT reads with unique start and mate end positions at a variant site"));
addInfoLine(new VCFInfoHeaderLine(STRAND_ARTIFACT_POSTERIOR_KEY, 3, VCFHeaderLineType.Float, "posterior probabilities of the presence of strand artifact"));
addInfoLine(new VCFInfoHeaderLine(FORWARD_STRAND_COUNT_KEY, VCFHeaderLineCount.R, VCFHeaderLineType.Integer, "forward strand counts of each allele"));
addInfoLine(new BaseQuality().getDescriptions().get(0));
addInfoLine(new FragmentLength().getDescriptions().get(0));
addInfoLine(new MappingQuality().getDescriptions().get(0));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,6 @@ public void testCombineSomaticGvcfs() throws Exception {
if (vc.getStart() == 14872) {
Assert.assertTrue(!vc.isFiltered());
Assert.assertTrue(!vc.getGenotype(0).isFiltered());
Assert.assertTrue(vc.getGenotype(1).getFilters().contains(GATKVCFConstants.TUMOR_LOD_FILTER_NAME));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.FeatureDataSource;
import org.broadinstitute.hellbender.tools.walkers.mutect.M2FiltersArgumentCollection;
import org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
Expand Down Expand Up @@ -49,7 +48,7 @@ public class GenotypeGVCFsIntegrationTest extends CommandLineProgramTest {
private static final String BASE_PAIR_EXPECTED = "gvcf.basepairResolution.gatk3.7_30_ga4f720357.output.vcf";
private static final String b38_reference_20_21 = largeFileTestDir + "Homo_sapiens_assembly38.20.21.fasta";
private static final String BASE_PAIR_GVCF = "gvcf.basepairResolution.gvcf";
private static final double TLOD_THRESHOLD = 0.8 * M2FiltersArgumentCollection.DEFAULT_TLOD_FILTER_THRESHOLD;
private static final double TLOD_THRESHOLD = 4.0;

private static final File CEUTRIO_20_21_GATK3_4_G_VCF = new File(largeFileTestDir, "gvcfs/CEUTrio.20.21.gatk3.4.g.vcf");
private static final String CEUTRIO_20_21_EXPECTED_VCF = "CEUTrio.20.21.gatk3.7_30_ga4f720357.expected.vcf";
Expand Down
Loading

0 comments on commit 5a74c30

Please sign in to comment.