Skip to content

Commit

Permalink
SelectVariants and VariantFiltration not updating AC, AN and AF for -…
Browse files Browse the repository at this point in the history
…-setFilteredGtToNocall
  • Loading branch information
ronlevine committed Sep 15, 2016
1 parent 4ec1b85 commit 88acbda
Show file tree
Hide file tree
Showing 19 changed files with 534 additions and 90 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.broadinstitute.hellbender.engine.*;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.IntervalUtils;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;

import java.io.File;
import java.util.*;
Expand Down Expand Up @@ -204,6 +205,13 @@ private void initializeVcfWriter() {
final Set<VCFHeaderLine> hInfo = new LinkedHashSet<>();
hInfo.addAll(getHeaderForVariants().getMetaDataInInputOrder());

// need AC, AN and AF since output if set filtered genotypes to no-call
// If setting filtered genotypes to no-call, then allele counts (AC, AN and AF ) will be recomputed and these annotations
// need to be included in the header
if ( setFilteredGenotypesToNocall ) {
GATKVariantContextUtils.addChromosomeCountsToHeader(hInfo);
}

if ( clusterWindow > 0 ) {
hInfo.add(new VCFFilterHeaderLine(CLUSTERED_SNP_FILTER_NAME, "SNPs found in clusters"));
}
Expand Down Expand Up @@ -288,7 +296,7 @@ private void filter(final VariantContext vc, final FeatureContext featureContext

// make new Genotypes based on filters
if ( !genotypeFilterExps.isEmpty() || setFilteredGenotypesToNocall ) {
builder.genotypes(makeGenotypes(vc));
GATKVariantContextUtils.setFilteredGenotypeToNocall(builder, vc, setFilteredGenotypesToNocall, this::getGenotypeFilters);
}

// make a new variant context based on filters
Expand Down Expand Up @@ -321,35 +329,27 @@ private void filter(final VariantContext vc, final FeatureContext featureContext
writer.add(builder.make());
}

private GenotypesContext makeGenotypes(final VariantContext vc) {
final GenotypesContext genotypes = GenotypesContext.create(vc.getGenotypes().size());

// for each genotype, check filters then create a new object
for ( final Genotype g : vc.getGenotypes() ) {
if ( g.isCalled() ) {
final List<String> filters = new ArrayList<>();
if ( g.isFiltered() ) {
filters.add(g.getFilters());
}

// Add if expression filters the variant context
for ( final JexlVCMatchExp exp : genotypeFilterExps ) {
if ( invertLogic(VariantContextUtils.match(vc, g, exp), invertGenotypeFilterExpression) ) {
filters.add(exp.name);
}
}
/**
* Get the genotype filters
*
* @param vc the variant context
* @param g the genotype
* @return list of genotype filter names
*/
private List<String> getGenotypeFilters(final VariantContext vc, final Genotype g) {
final List<String> filters = new ArrayList<>();
if (g.isFiltered()) {
filters.addAll(Arrays.asList(g.getFilters().split(";")));
}

// if sample is filtered and --setFilteredGtToNocall, set genotype to non-call
if ( !filters.isEmpty() && setFilteredGenotypesToNocall ) {
genotypes.add(new GenotypeBuilder(g).filters(filters).alleles(diploidNoCallAlleles).make());
} else {
genotypes.add(new GenotypeBuilder(g).filters(filters).make());
}
} else {
genotypes.add(g);
// Add if expression filters the variant context
for (final JexlVCMatchExp exp : genotypeFilterExps) {
if (invertLogic(VariantContextUtils.match(vc, g, exp), invertGenotypeFilterExpression)) {
filters.add(exp.name);
}
}
return genotypes;

return filters;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.broadinstitute.hellbender.engine.filters.VariantTypesVariantFilter;
import org.broadinstitute.hellbender.engine.VariantWalker;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.walkers.annotator.ChromosomeCounts;
import org.broadinstitute.hellbender.tools.walkers.genotyper.AlleleSubsettingUtils;
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAssignmentMethod;
import org.broadinstitute.hellbender.utils.commandline.HiddenOption;
Expand Down Expand Up @@ -562,7 +563,11 @@ public void apply(VariantContext vc, ReadsContext readsContext, ReferenceContext
initalizeAlleleAnyploidIndicesCache(vc);

final VariantContext sub = subsetRecord(vc, preserveAlleles, removeUnusedAlternates);
final VariantContext filteredGenotypeToNocall = setFilteredGenotypesToNocall ? setFilteredGenotypeToNocall(sub) : sub;
final VariantContextBuilder builder = new VariantContextBuilder(vc);
if ( setFilteredGenotypesToNocall ) {
GATKVariantContextUtils.setFilteredGenotypeToNocall(builder, sub, setFilteredGenotypesToNocall, this::getGenotypeFilters);
}
final VariantContext filteredGenotypeToNocall = setFilteredGenotypesToNocall ? builder.make(): sub;

// Not excluding non-variants or subsetted polymorphic variants AND including filtered loci or subsetted variant is not filtered
if ((!XLnonVariants || filteredGenotypeToNocall.isPolymorphicInSamples()) && (!XLfiltered || !filteredGenotypeToNocall.isFiltered())) {
Expand Down Expand Up @@ -591,6 +596,22 @@ public void apply(VariantContext vc, ReadsContext readsContext, ReferenceContext
}
}

/**
* Get the genotype filters
*
* @param vc the variant context
* @param g the genotype
* @return list of genotype filter names
*/
private List<String> getGenotypeFilters(final VariantContext vc, final Genotype g) {
final List<String> filters = new ArrayList<>();
if (g.isFiltered()) {
filters.add(g.getFilters());
}

return filters;
}

/**
* Initialize cache of allele anyploid indices
*
Expand Down Expand Up @@ -738,6 +759,11 @@ private Set<VCFHeaderLine> createVCFHeaderLineList(Map<String, VCFHeader> vcfHea
final Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfHeaders.values(), true);
headerLines.add(new VCFHeaderLine("source", this.getClass().getSimpleName()));

// need AC, AN and AF since output if set filtered genotypes to no-call
if (setFilteredGenotypesToNocall) {
GATKVariantContextUtils.addChromosomeCountsToHeader(headerLines);
}

if (keepOriginalChrCounts) {
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ORIGINAL_AC_KEY));
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ORIGINAL_AF_KEY));
Expand All @@ -747,7 +773,7 @@ private Set<VCFHeaderLine> createVCFHeaderLineList(Map<String, VCFHeader> vcfHea
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ORIGINAL_DP_KEY));
}

headerLines.addAll(Arrays.asList(ChromosomeCountConstants.descriptions));
headerLines.addAll(Arrays.asList(ChromosomeCounts.descriptions));
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY));

return headerLines;
Expand Down Expand Up @@ -1006,7 +1032,7 @@ private VariantContext subsetRecord(final VariantContext vc, final boolean prese

if (fractionGenotypes > 0) {
final List<Genotype> genotypes = newGC.stream().map(genotype -> randomGenotypes.nextDouble() > fractionGenotypes ? genotype :
new GenotypeBuilder(genotype).alleles(diploidNoCallAlleles).noGQ().make()).collect(Collectors.toList());
new GenotypeBuilder(genotype).alleles(getNoCallAlleles(genotype.getPloidy())).noGQ().make()).collect(Collectors.toList());
newGC = GenotypesContext.create(new ArrayList<>(genotypes));
}

Expand All @@ -1020,20 +1046,6 @@ private VariantContext subsetRecord(final VariantContext vc, final boolean prese
return preserveAlleles? subset : GATKVariantContextUtils.trimAlleles(subset,true,true);
}

/**
* If --setFilteredGtToNocall, set filtered genotypes to no-call
*
* @param vc the VariantContext record to set filtered genotypes to no-call
* @return the VariantContext with no-call genotypes if the sample was filtered
*/
private VariantContext setFilteredGenotypeToNocall(final VariantContext vc) {
final GenotypesContext genotypes = GenotypesContext.create(vc.getGenotypes().size());
vc.getGenotypes().stream()
.map(g -> g.isCalled() && g.isFiltered() ? new GenotypeBuilder(g).alleles(diploidNoCallAlleles).make() : g)
.forEach(genotypes::add);
return new VariantContextBuilder(vc).genotypes(genotypes).make();
}

/**
* Get the ploidy number of NO-CALL alleles
*
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,20 @@
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
import htsjdk.variant.vcf.VCFConstants;
import org.apache.commons.io.FilenameUtils;
import htsjdk.variant.vcf.VCFHeaderLine;
import htsjdk.variant.vcf.VCFStandardHeaderLines;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.tuple.MutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.hellbender.tools.walkers.genotyper.AlleleSubsettingUtils;
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAssignmentMethod;
import org.broadinstitute.hellbender.utils.*;

import java.io.File;
import java.io.Serializable;
import java.util.*;
import java.util.function.BiFunction;
import java.util.stream.Collectors;

public final class GATKVariantContextUtils {
Expand Down Expand Up @@ -1379,4 +1381,58 @@ private static int indexOfSameAllele(final VariantContext vc, final Allele allel

return -1;
}

/**
* Add chromosome counts (AC, AN and AF) to the VCF header lines
*
* @param headerLines the VCF header lines
*/
public static void addChromosomeCountsToHeader(final Set<VCFHeaderLine> headerLines) {
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY));
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY));
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY));
}

/**
* Set the builder's filtered genotypes to no-call and update AC, AN and AF
*
* @param builder builder for variant context
* @param vc the VariantContext record to set filtered genotypes to no-call
* @param setFilteredGenotypesToNocall flag to set filtered genotype to NO CALL
* @param filters the filters for each genotype
*/
public static void setFilteredGenotypeToNocall(final VariantContextBuilder builder, final VariantContext vc,
final boolean setFilteredGenotypesToNocall, BiFunction<VariantContext, Genotype, List<String>> filters) {
Utils.nonNull(vc);
Utils.nonNull(builder);
Utils.nonNull(filters);

final GenotypesContext genotypes = GenotypesContext.create(vc.getGenotypes().size());

// update genotypes if filtered genotypes are set to no-call
boolean haveFilteredNoCallAlleles = false;
for (final Genotype g : vc.getGenotypes()) {
if (g.isCalled()) {
List<String> filterNames = filters.apply(vc,g);
if (!filterNames.isEmpty() && setFilteredGenotypesToNocall) {
haveFilteredNoCallAlleles = true;
genotypes.add(new GenotypeBuilder(g).filters(filterNames).alleles(GATKVariantContextUtils.noCallAlleles((g.getPloidy()))).make());
} else {
genotypes.add(new GenotypeBuilder(g).filters(filterNames).make());
}
} else {
genotypes.add(g);
}
}

// if filtered genotypes are set to no-call, output recomputed AC, AN, AF
if ( haveFilteredNoCallAlleles ) {
final Map<String, Object> attributes = new LinkedHashMap<>(vc.getAttributes()); // need to make mutable
VariantContextUtils.calculateChromosomeCounts(builder.genotypes(genotypes).make(), attributes, true, vc.getSampleNames());
builder.attributes(attributes);
}

// update genotypes
builder.genotypes(genotypes);
}
}
Loading

0 comments on commit 88acbda

Please sign in to comment.