Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unused args in VCF merge #5745

Merged
merged 4 commits into from
Mar 18, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ protected static VariantContext composeGivenAllelesVariantContextFromVariantList
final List<String> haplotypeSources = vcsAtLoc.stream().map(VariantContext::getSource).collect(Collectors.toList());
final VariantContext mergedVc = GATKVariantContextUtils.simpleMerge(vcsAtLoc, haplotypeSources,
keepFiltered ? KEEP_UNCONDITIONAL : KEEP_IF_ANY_UNFILTERED,
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false);

return mergedVc;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
import org.broadinstitute.hellbender.utils.QualityUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.clipping.ReadClipper;
import org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.hellbender.utils.fragments.FragmentCollection;
Expand Down Expand Up @@ -227,7 +226,7 @@ public static VariantContext makeMergedVariantContext(final List<VariantContext>
final List<String> haplotypeSources = vcs.stream().map(VariantContext::getSource).collect(Collectors.toList());
return GATKVariantContextUtils.simpleMerge(vcs, haplotypeSources,
GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false);
}


Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
package org.broadinstitute.hellbender.utils.variant;

import com.google.common.annotations.VisibleForTesting;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Locatable;
import htsjdk.tribble.TribbleException;
import htsjdk.variant.variantcontext.*;
Expand All @@ -24,11 +21,9 @@
import org.broadinstitute.hellbender.tools.walkers.genotyper.*;
import org.broadinstitute.hellbender.utils.BaseUtils;
import org.broadinstitute.hellbender.utils.MathUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.param.ParamUtils;

import java.io.File;
import java.io.Serializable;
import java.util.*;
import java.util.function.BiFunction;
Expand Down Expand Up @@ -632,24 +627,16 @@ public static Genotype removePLsAndAD(final Genotype g) {
* @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
* @param filteredRecordMergeType merge type for filtered records
* @param genotypeMergeOptions merge option for genotypes
* @param annotateOrigin should we annotate the set it came from?
* @param printMessages should we print messages?
* @param setKey the key name of the set
* @param filteredAreUncalled are filtered records uncalled?
* @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
* @return new VariantContext representing the merge of unsortedVCs
*/
public static VariantContext simpleMerge(final Collection<VariantContext> unsortedVCs,
final List<String> priorityListOfVCs,
final FilteredRecordMergeType filteredRecordMergeType,
final GenotypeMergeType genotypeMergeOptions,
final boolean annotateOrigin,
final boolean printMessages,
final String setKey,
final boolean filteredAreUncalled,
final boolean mergeInfoWithMaxAC ) {
final boolean filteredAreUncalled) {
int originalNumOfVCs = priorityListOfVCs == null ? 0 : priorityListOfVCs.size();
return simpleMerge(unsortedVCs, priorityListOfVCs, originalNumOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, setKey, filteredAreUncalled, mergeInfoWithMaxAC);
return simpleMerge(unsortedVCs, priorityListOfVCs, originalNumOfVCs, filteredRecordMergeType, genotypeMergeOptions, filteredAreUncalled);
}

/**
Expand All @@ -665,32 +652,21 @@ public static VariantContext simpleMerge(final Collection<VariantContext> unsort
* @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
* @param filteredRecordMergeType merge type for filtered records
* @param genotypeMergeOptions merge option for genotypes
* @param annotateOrigin should we annotate the set it came from?
* @param printMessages should we print messages?
* @param setKey the key name of the set
* @param filteredAreUncalled are filtered records uncalled?
* @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
* @return new VariantContext representing the merge of unsortedVCs
*/
public static VariantContext simpleMerge(final Collection<VariantContext> unsortedVCs,
final List<String> priorityListOfVCs,
final int originalNumOfVCs,
final FilteredRecordMergeType filteredRecordMergeType,
final GenotypeMergeType genotypeMergeOptions,
final boolean annotateOrigin,
final boolean printMessages,
final String setKey,
final boolean filteredAreUncalled,
final boolean mergeInfoWithMaxAC ) {
final boolean filteredAreUncalled) {
if ( unsortedVCs == null || unsortedVCs.isEmpty() )
return null;

if (priorityListOfVCs != null && originalNumOfVCs != priorityListOfVCs.size())
throw new IllegalArgumentException("the number of the original VariantContexts must be the same as the number of VariantContexts in the priority list");

if ( annotateOrigin && priorityListOfVCs == null && originalNumOfVCs == 0)
throw new IllegalArgumentException("Cannot merge calls and annotate their origins without a complete priority list of VariantContexts or the number of original VariantContexts");

final List<VariantContext> preFilteredVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
// Make sure all variant contexts are padded with reference base in case of indels if necessary
List<VariantContext> VCs = preFilteredVCs.stream()
Expand All @@ -714,18 +690,13 @@ public static VariantContext simpleMerge(final Collection<VariantContext> unsort

VariantContext longestVC = first;
int depth = 0;
int maxAC = -1;
final Map<String, Object> attributesWithMaxAC = new LinkedHashMap<>();
double log10PError = CommonInfo.NO_LOG10_PERROR;
boolean anyVCHadFiltersApplied = false;
VariantContext vcWithMaxAC = null;
GenotypesContext genotypes = GenotypesContext.create();

// counting the number of filtered and variant VCs
int nFiltered = 0;

boolean remapped = false;

// cycle through and add info from the other VCs, making sure the loc/reference matches
for ( final VariantContext vc : VCs ) {
Utils.validate(longestVC.getStart() == vc.getStart(), () -> "BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString());
Expand All @@ -737,7 +708,6 @@ public static VariantContext simpleMerge(final Collection<VariantContext> unsort
if ( vc.isVariant() ) variantSources.add(vc.getSource());

AlleleMapper alleleMapping = resolveIncompatibleAlleles(refAllele, vc, alleles);
remapped = remapped || alleleMapping.needsRemapping();

alleles.addAll(alleleMapping.values());

Expand All @@ -758,26 +728,6 @@ public static VariantContext simpleMerge(final Collection<VariantContext> unsort
if (vc.hasAttribute(VCFConstants.DEPTH_KEY))
depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0);
if ( vc.hasID() ) rsIDs.add(vc.getID());
if (mergeInfoWithMaxAC && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) {
String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY, null);
// lets see if the string contains a "," separator
if (rawAlleleCounts.contains(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)) {
final List<String> alleleCountArray = Arrays.asList(rawAlleleCounts.substring(1, rawAlleleCounts.length() - 1).split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR));
for (final String alleleCount : alleleCountArray) {
final int ac = Integer.valueOf(alleleCount.trim());
if (ac > maxAC) {
maxAC = ac;
vcWithMaxAC = vc;
}
}
} else {
final int ac = Integer.valueOf(rawAlleleCounts);
if (ac > maxAC) {
maxAC = ac;
vcWithMaxAC = vc;
}
}
}

for (final Map.Entry<String, Object> p : vc.getAttributes().entrySet()) {
final String key = p.getKey();
Expand Down Expand Up @@ -817,40 +767,10 @@ public static VariantContext simpleMerge(final Collection<VariantContext> unsort
}
}

// take the VC with the maxAC and pull the attributes into a modifiable map
if ( mergeInfoWithMaxAC && vcWithMaxAC != null ) {
attributesWithMaxAC.putAll(vcWithMaxAC.getAttributes());
}

// if at least one record was unfiltered and we want a union, clear all of the filters
if ( (filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size()) || filteredRecordMergeType == FilteredRecordMergeType.KEEP_UNCONDITIONAL )
filters.clear();


if ( annotateOrigin ) { // we care about where the call came from
String setValue;
if ( nFiltered == 0 && variantSources.size() == originalNumOfVCs ) // nothing was unfiltered
setValue = MERGE_INTERSECTION;
else if ( nFiltered == VCs.size() ) // everything was filtered out
setValue = MERGE_FILTER_IN_ALL;
else if ( variantSources.isEmpty() ) // everyone was reference
setValue = MERGE_REF_IN_ALL;
else {
final LinkedHashSet<String> s = new LinkedHashSet<>();
for ( final VariantContext vc : VCs )
if ( vc.isVariant() )
s.add( vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource() );
setValue = Utils.join("-", s);
}

if ( setKey != null ) {
attributes.put(setKey, setValue);
if( mergeInfoWithMaxAC && vcWithMaxAC != null ) {
attributesWithMaxAC.put(setKey, setValue);
}
}
}

if ( depth > 0 )
attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth));

Expand All @@ -864,11 +784,10 @@ else if ( variantSources.isEmpty() ) // everyone was reference
if ( anyVCHadFiltersApplied ) {
builder.filters(filters.isEmpty() ? filters : new TreeSet<>(filters));
}
builder.attributes(new TreeMap<>(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes));
builder.attributes(new TreeMap<>(attributes));

// Trim the padded bases of all alleles if necessary
final VariantContext merged = builder.make();
if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged);
return merged;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ public void testMergeAlleles(MergeAllelesTest cfg) {
final VariantContext merged = GATKVariantContextUtils.simpleMerge(
inputs, priority,
GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false);
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false);

Assert.assertEquals(merged.getAlleles().size(),cfg.expected.size());
Assert.assertEquals(new LinkedHashSet<>(merged.getAlleles()), new LinkedHashSet<>(cfg.expected)); //HACK this is a hack to get around a bug in the htsjdk. The method returns a list with an unspecified order.
Expand Down Expand Up @@ -301,7 +301,7 @@ public void testRSIDMerge(SimpleMergeRSIDTest cfg) {
final VariantContext merged = GATKVariantContextUtils.simpleMerge(
inputs,null,
GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
GATKVariantContextUtils.GenotypeMergeType.UNSORTED, false, false, "set", false, false);
GATKVariantContextUtils.GenotypeMergeType.UNSORTED, false);
Assert.assertEquals(merged.getID(), cfg.expected);
}

Expand Down Expand Up @@ -416,14 +416,11 @@ public Object[][] mergeFilteredData() {
public void testMergeFiltered(MergeFilteredTest cfg) {
final List<String> priority = vcs2priority(cfg.inputs);
final VariantContext merged = GATKVariantContextUtils.simpleMerge(
cfg.inputs, priority, cfg.type, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
cfg.inputs, priority, cfg.type, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false);

// test alleles are equal
Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles());

// test set field
Assert.assertEquals(merged.getAttribute("set"), cfg.setExpected);

// test filter field
Assert.assertEquals(merged.getFilters(), cfg.expected.getFilters());
}
Expand Down Expand Up @@ -557,7 +554,7 @@ public Object[][] mergeGenotypesData() {
public void testMergeGenotypes(MergeGenotypesTest cfg) {
final VariantContext merged = GATKVariantContextUtils.simpleMerge(
cfg.inputs, cfg.priority, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false);

// test alleles are equal
Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles());
Expand Down Expand Up @@ -598,7 +595,7 @@ public void testMergeGenotypesUniquify() {

final VariantContext merged = GATKVariantContextUtils.simpleMerge(
Arrays.asList(vc1, vc2), null, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
GATKVariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false);
GATKVariantContextUtils.GenotypeMergeType.UNIQUIFY, false);

// test genotypes
Assert.assertEquals(merged.getSampleNames(), new LinkedHashSet<>(Arrays.asList("s1.1", "s1.2")));
Expand All @@ -620,27 +617,7 @@ public void testMergeGenotypesUniquify() {
// Misc. tests
//
// --------------------------------------------------------------------------------

@Test
public void testAnnotationSet() {
for ( final boolean annotate : Arrays.asList(true, false)) {
for ( final String set : Arrays.asList("set", "combine", "x")) {
final List<String> priority = Arrays.asList("1", "2");
VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS);
VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS);

final VariantContext merged = GATKVariantContextUtils.simpleMerge(
Arrays.asList(vc1, vc2), priority, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, annotate, false, set, false, false);

if ( annotate )
Assert.assertEquals(merged.getAttribute(set), GATKVariantContextUtils.MERGE_INTERSECTION);
else
Assert.assertFalse(merged.hasAttribute(set));
}
}
}


private static List<String> vcs2priority(final Collection<VariantContext> vcs) {
return vcs.stream()
.map(VariantContext::getSource)
Expand Down