diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingGivenAllelesUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingGivenAllelesUtils.java index c118bf28fe8..f32ef8a7b00 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingGivenAllelesUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingGivenAllelesUtils.java @@ -57,7 +57,7 @@ protected static VariantContext composeGivenAllelesVariantContextFromVariantList final List haplotypeSources = vcsAtLoc.stream().map(VariantContext::getSource).collect(Collectors.toList()); final VariantContext mergedVc = GATKVariantContextUtils.simpleMerge(vcsAtLoc, haplotypeSources, keepFiltered ? KEEP_UNCONDITIONAL : KEEP_IF_ANY_UNFILTERED, - GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false); + GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false); return mergedVc; } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java index c8422910e8a..821809c8c54 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java @@ -14,7 +14,6 @@ import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler; import org.broadinstitute.hellbender.utils.QualityUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; -import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.clipping.ReadClipper; import org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.hellbender.utils.fragments.FragmentCollection; @@ -227,7 +226,7 @@ public static VariantContext makeMergedVariantContext(final List final List haplotypeSources = vcs.stream().map(VariantContext::getSource).collect(Collectors.toList()); return GATKVariantContextUtils.simpleMerge(vcs, haplotypeSources, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, - GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false); + GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false); } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java index 7d28ff4c6e9..9147e715b95 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java @@ -1,9 +1,6 @@ package org.broadinstitute.hellbender.utils.variant; -import com.google.common.annotations.VisibleForTesting; import htsjdk.samtools.SAMSequenceDictionary; -import htsjdk.samtools.util.CollectionUtil; -import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Locatable; import htsjdk.tribble.TribbleException; import htsjdk.variant.variantcontext.*; @@ -24,11 +21,9 @@ import org.broadinstitute.hellbender.tools.walkers.genotyper.*; import org.broadinstitute.hellbender.utils.BaseUtils; import org.broadinstitute.hellbender.utils.MathUtils; -import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.param.ParamUtils; -import java.io.File; import java.io.Serializable; import java.util.*; import java.util.function.BiFunction; @@ -632,24 +627,16 @@ public static Genotype removePLsAndAD(final Genotype g) { * @param priorityListOfVCs priority list detailing the order in which we should grab the VCs * @param filteredRecordMergeType merge type for filtered records * @param genotypeMergeOptions merge option for genotypes - * @param annotateOrigin should we annotate the set it came from? - * @param printMessages should we print messages? - * @param setKey the key name of the set * @param filteredAreUncalled are filtered records uncalled? - * @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count? * @return new VariantContext representing the merge of unsortedVCs */ public static VariantContext simpleMerge(final Collection unsortedVCs, final List priorityListOfVCs, final FilteredRecordMergeType filteredRecordMergeType, final GenotypeMergeType genotypeMergeOptions, - final boolean annotateOrigin, - final boolean printMessages, - final String setKey, - final boolean filteredAreUncalled, - final boolean mergeInfoWithMaxAC ) { + final boolean filteredAreUncalled) { int originalNumOfVCs = priorityListOfVCs == null ? 0 : priorityListOfVCs.size(); - return simpleMerge(unsortedVCs, priorityListOfVCs, originalNumOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, setKey, filteredAreUncalled, mergeInfoWithMaxAC); + return simpleMerge(unsortedVCs, priorityListOfVCs, originalNumOfVCs, filteredRecordMergeType, genotypeMergeOptions, filteredAreUncalled); } /** @@ -665,11 +652,7 @@ public static VariantContext simpleMerge(final Collection unsort * @param priorityListOfVCs priority list detailing the order in which we should grab the VCs * @param filteredRecordMergeType merge type for filtered records * @param genotypeMergeOptions merge option for genotypes - * @param annotateOrigin should we annotate the set it came from? - * @param printMessages should we print messages? - * @param setKey the key name of the set * @param filteredAreUncalled are filtered records uncalled? - * @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count? * @return new VariantContext representing the merge of unsortedVCs */ public static VariantContext simpleMerge(final Collection unsortedVCs, @@ -677,20 +660,13 @@ public static VariantContext simpleMerge(final Collection unsort final int originalNumOfVCs, final FilteredRecordMergeType filteredRecordMergeType, final GenotypeMergeType genotypeMergeOptions, - final boolean annotateOrigin, - final boolean printMessages, - final String setKey, - final boolean filteredAreUncalled, - final boolean mergeInfoWithMaxAC ) { + final boolean filteredAreUncalled) { if ( unsortedVCs == null || unsortedVCs.isEmpty() ) return null; if (priorityListOfVCs != null && originalNumOfVCs != priorityListOfVCs.size()) throw new IllegalArgumentException("the number of the original VariantContexts must be the same as the number of VariantContexts in the priority list"); - if ( annotateOrigin && priorityListOfVCs == null && originalNumOfVCs == 0) - throw new IllegalArgumentException("Cannot merge calls and annotate their origins without a complete priority list of VariantContexts or the number of original VariantContexts"); - final List preFilteredVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions); // Make sure all variant contexts are padded with reference base in case of indels if necessary List VCs = preFilteredVCs.stream() @@ -714,18 +690,13 @@ public static VariantContext simpleMerge(final Collection unsort VariantContext longestVC = first; int depth = 0; - int maxAC = -1; - final Map attributesWithMaxAC = new LinkedHashMap<>(); double log10PError = CommonInfo.NO_LOG10_PERROR; boolean anyVCHadFiltersApplied = false; - VariantContext vcWithMaxAC = null; GenotypesContext genotypes = GenotypesContext.create(); // counting the number of filtered and variant VCs int nFiltered = 0; - boolean remapped = false; - // cycle through and add info from the other VCs, making sure the loc/reference matches for ( final VariantContext vc : VCs ) { Utils.validate(longestVC.getStart() == vc.getStart(), () -> "BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString()); @@ -737,7 +708,6 @@ public static VariantContext simpleMerge(final Collection unsort if ( vc.isVariant() ) variantSources.add(vc.getSource()); AlleleMapper alleleMapping = resolveIncompatibleAlleles(refAllele, vc, alleles); - remapped = remapped || alleleMapping.needsRemapping(); alleles.addAll(alleleMapping.values()); @@ -758,26 +728,6 @@ public static VariantContext simpleMerge(final Collection unsort if (vc.hasAttribute(VCFConstants.DEPTH_KEY)) depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0); if ( vc.hasID() ) rsIDs.add(vc.getID()); - if (mergeInfoWithMaxAC && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) { - String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY, null); - // lets see if the string contains a "," separator - if (rawAlleleCounts.contains(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)) { - final List alleleCountArray = Arrays.asList(rawAlleleCounts.substring(1, rawAlleleCounts.length() - 1).split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)); - for (final String alleleCount : alleleCountArray) { - final int ac = Integer.valueOf(alleleCount.trim()); - if (ac > maxAC) { - maxAC = ac; - vcWithMaxAC = vc; - } - } - } else { - final int ac = Integer.valueOf(rawAlleleCounts); - if (ac > maxAC) { - maxAC = ac; - vcWithMaxAC = vc; - } - } - } for (final Map.Entry p : vc.getAttributes().entrySet()) { final String key = p.getKey(); @@ -817,40 +767,10 @@ public static VariantContext simpleMerge(final Collection unsort } } - // take the VC with the maxAC and pull the attributes into a modifiable map - if ( mergeInfoWithMaxAC && vcWithMaxAC != null ) { - attributesWithMaxAC.putAll(vcWithMaxAC.getAttributes()); - } - // if at least one record was unfiltered and we want a union, clear all of the filters if ( (filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size()) || filteredRecordMergeType == FilteredRecordMergeType.KEEP_UNCONDITIONAL ) filters.clear(); - - if ( annotateOrigin ) { // we care about where the call came from - String setValue; - if ( nFiltered == 0 && variantSources.size() == originalNumOfVCs ) // nothing was unfiltered - setValue = MERGE_INTERSECTION; - else if ( nFiltered == VCs.size() ) // everything was filtered out - setValue = MERGE_FILTER_IN_ALL; - else if ( variantSources.isEmpty() ) // everyone was reference - setValue = MERGE_REF_IN_ALL; - else { - final LinkedHashSet s = new LinkedHashSet<>(); - for ( final VariantContext vc : VCs ) - if ( vc.isVariant() ) - s.add( vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource() ); - setValue = Utils.join("-", s); - } - - if ( setKey != null ) { - attributes.put(setKey, setValue); - if( mergeInfoWithMaxAC && vcWithMaxAC != null ) { - attributesWithMaxAC.put(setKey, setValue); - } - } - } - if ( depth > 0 ) attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth)); @@ -864,11 +784,10 @@ else if ( variantSources.isEmpty() ) // everyone was reference if ( anyVCHadFiltersApplied ) { builder.filters(filters.isEmpty() ? filters : new TreeSet<>(filters)); } - builder.attributes(new TreeMap<>(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes)); + builder.attributes(new TreeMap<>(attributes)); // Trim the padded bases of all alleles if necessary final VariantContext merged = builder.make(); - if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged); return merged; } diff --git a/src/test/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtilsUnitTest.java index fb093c30491..e974e069778 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtilsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtilsUnitTest.java @@ -244,7 +244,7 @@ public void testMergeAlleles(MergeAllelesTest cfg) { final VariantContext merged = GATKVariantContextUtils.simpleMerge( inputs, priority, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, - GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false); + GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false); Assert.assertEquals(merged.getAlleles().size(),cfg.expected.size()); Assert.assertEquals(new LinkedHashSet<>(merged.getAlleles()), new LinkedHashSet<>(cfg.expected)); //HACK this is a hack to get around a bug in the htsjdk. The method returns a list with an unspecified order. @@ -301,7 +301,7 @@ public void testRSIDMerge(SimpleMergeRSIDTest cfg) { final VariantContext merged = GATKVariantContextUtils.simpleMerge( inputs,null, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, - GATKVariantContextUtils.GenotypeMergeType.UNSORTED, false, false, "set", false, false); + GATKVariantContextUtils.GenotypeMergeType.UNSORTED, false); Assert.assertEquals(merged.getID(), cfg.expected); } @@ -416,14 +416,11 @@ public Object[][] mergeFilteredData() { public void testMergeFiltered(MergeFilteredTest cfg) { final List priority = vcs2priority(cfg.inputs); final VariantContext merged = GATKVariantContextUtils.simpleMerge( - cfg.inputs, priority, cfg.type, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); + cfg.inputs, priority, cfg.type, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false); // test alleles are equal Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles()); - // test set field - Assert.assertEquals(merged.getAttribute("set"), cfg.setExpected); - // test filter field Assert.assertEquals(merged.getFilters(), cfg.expected.getFilters()); } @@ -557,7 +554,7 @@ public Object[][] mergeGenotypesData() { public void testMergeGenotypes(MergeGenotypesTest cfg) { final VariantContext merged = GATKVariantContextUtils.simpleMerge( cfg.inputs, cfg.priority, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, - GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); + GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false); // test alleles are equal Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles()); @@ -598,7 +595,7 @@ public void testMergeGenotypesUniquify() { final VariantContext merged = GATKVariantContextUtils.simpleMerge( Arrays.asList(vc1, vc2), null, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, - GATKVariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false); + GATKVariantContextUtils.GenotypeMergeType.UNIQUIFY, false); // test genotypes Assert.assertEquals(merged.getSampleNames(), new LinkedHashSet<>(Arrays.asList("s1.1", "s1.2"))); @@ -620,27 +617,7 @@ public void testMergeGenotypesUniquify() { // Misc. tests // // -------------------------------------------------------------------------------- - - @Test - public void testAnnotationSet() { - for ( final boolean annotate : Arrays.asList(true, false)) { - for ( final String set : Arrays.asList("set", "combine", "x")) { - final List priority = Arrays.asList("1", "2"); - VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS); - VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS); - - final VariantContext merged = GATKVariantContextUtils.simpleMerge( - Arrays.asList(vc1, vc2), priority, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, - GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, annotate, false, set, false, false); - - if ( annotate ) - Assert.assertEquals(merged.getAttribute(set), GATKVariantContextUtils.MERGE_INTERSECTION); - else - Assert.assertFalse(merged.hasAttribute(set)); - } - } - } - + private static List vcs2priority(final Collection vcs) { return vcs.stream() .map(VariantContext::getSource)