Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed M2 bug for germline resources with AF=. #5442

Merged
merged 2 commits into from
Nov 21, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.argumentcollections.IntervalArgumentCollection;
import picard.cmdline.programgroups.IntervalsManipulationProgramGroup;
import org.broadinstitute.hellbender.engine.GATKTool;
import org.broadinstitute.hellbender.utils.IntervalUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.param.ParamUtils;
import picard.cmdline.programgroups.IntervalsManipulationProgramGroup;
import picard.util.IntervalList.IntervalListScatterMode;
import picard.util.IntervalList.IntervalListScatterer;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,17 @@ public static double[] calculateGermlineProbabilities(final double[] populationA

@VisibleForTesting
static double[] getGermlineAltAlleleFrequencies(final List<Allele> altAlleles, final Optional<VariantContext> germlineVC, final double afOfAllelesNotInGermlineResource) {
if (germlineVC.isPresent() && germlineVC.get().hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)) {
final List<Double> germlineAltAFs = germlineVC.get().getAttributeAsDoubleList(VCFConstants.ALLELE_FREQUENCY_KEY, afOfAllelesNotInGermlineResource);
if (germlineVC.isPresent()) {
final List<Double> germlineAltAFs = Mutect2Engine.getAttributeAsDoubleList(germlineVC.get(), VCFConstants.ALLELE_FREQUENCY_KEY, afOfAllelesNotInGermlineResource);
return altAlleles.stream()
.mapToDouble(allele -> {
final OptionalInt germlineAltIndex = findAltAlleleIndex(germlineVC.get(), allele);
return germlineAltIndex.isPresent() ? germlineAltAFs.get(germlineAltIndex.getAsInt())
: afOfAllelesNotInGermlineResource;
}).toArray();
} else {
return Doubles.toArray(Collections.nCopies(altAlleles.size(), afOfAllelesNotInGermlineResource));
}

return Doubles.toArray(Collections.nCopies(altAlleles.size(), afOfAllelesNotInGermlineResource));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -288,8 +288,9 @@ public ActivityProfileState isActive(final AlignmentContext context, final Refer
} else if (!MTAC.genotypeGermlineSites) {
final List<VariantContext> germline = featureContext.getValues(MTAC.germlineResource, refInterval);
if (!germline.isEmpty()){
final List<Double> germlineAlleleFrequencies = germline.get(0).getAttributeAsDoubleList(VCFConstants.ALLELE_FREQUENCY_KEY, 0.0);
if (! germlineAlleleFrequencies.isEmpty() && germlineAlleleFrequencies.get(0) > MTAC.maxPopulationAlleleFrequency) {
final VariantContext germlineVariant = germline.get(0);
final List<Double> germlineAlleleFrequencies = getAttributeAsDoubleList(germlineVariant, VCFConstants.ALLELE_FREQUENCY_KEY, 0.0);
if (!germlineAlleleFrequencies.isEmpty() && germlineAlleleFrequencies.get(0) > MTAC.maxPopulationAlleleFrequency) {
return new ActivityProfileState(refInterval, 0.0);
}
}
Expand All @@ -302,6 +303,22 @@ public ActivityProfileState isActive(final AlignmentContext context, final Refer
return new ActivityProfileState( refInterval, 1.0, ActivityProfileState.Type.NONE, null);
}

// NOTE: this is a hack to get around an htsjdk bug: https://github.com/samtools/htsjdk/issues/1228
// htsjdk doesn't correctly detect the missing value string '.', so we have copied and fixed the htsjdk code
public static List<Double> getAttributeAsDoubleList(final VariantContext vc, final String key, final double defaultValue) {
return vc.getCommonInfo().getAttributeAsList(key).stream()
.map(x -> {
if (x == null) {
return defaultValue;
} else if (x instanceof Number) {
return ((Number) x).doubleValue();
} else {
String string = (String) x;
return string.equals(VCFConstants.MISSING_VALUE_v4) ? defaultValue : Double.valueOf(string); // throws an exception if this isn't a string
}
}).collect(Collectors.toList());
}

private static int getCurrentOrFollowingIndelLength(final PileupElement pe) {
return pe.isDeletion() ? pe.getCurrentCigarElement().getLength() : pe.getLengthOfImmediatelyFollowingIndel();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ public class Mutect2IntegrationTest extends CommandLineProgramTest {
private static final File DEEP_MITO_BAM = new File(largeFileTestDir, "mutect/highDPMTsnippet.bam");
private static final String DEEP_MITO_SAMPLE_NAME = "mixture";

private static final File GNOMAD_WITHOUT_AF_SNIPPET = new File(toolsTestDir, "mutect/gnomad-without-af.vcf");

/**
* Several DREAM challenge bams with synthetic truth data. In order to keep file sizes manageable, bams are restricted
* to chromosome 20, leaving ~100-200 variants, and then further restricted to 400-bp intervals centered around
Expand Down Expand Up @@ -351,6 +353,22 @@ public void testGivenAllelesZeroCoverage() throws Exception {
runCommandLine(args);
}

// make sure we have fixed a bug where germline resources with AF=. throw errors
@Test
public void testMissingAF() {
final File bam = new File(DREAM_BAMS_DIR, "tumor_4.bam");
final String sample = "synthetic.challenge.set4.tumour";
final File unfilteredVcf = createTempFile("unfiltered", ".vcf");
final List<String> args = Arrays.asList("-I", bam.getAbsolutePath(),
"-" + M2ArgumentCollection.TUMOR_SAMPLE_SHORT_NAME, sample,
"-R", b37_reference_20_21,
"--" + M2ArgumentCollection.GERMLINE_RESOURCE_LONG_NAME, GNOMAD_WITHOUT_AF_SNIPPET.getAbsolutePath(),
"-L", "20:10086110",
"-L", "20:10837425-10837426",
"-O", unfilteredVcf.getAbsolutePath());
runCommandLine(args);
}

@Test
public void testContaminationFilter() throws Exception {
Utils.resetRandomGenerator();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
##fileformat=VCFv4.2
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
#CHROM POS ID REF ALT QUAL FILTER INFO
20 10086110 rs592026 G A 166.95 PASS AC=32788;AF=.
20 10837425 . TC T 305.46 PASS AC=1;AF=.
Binary file not shown.