Skip to content

Commit

Permalink
Fix case where hom-ref "variant" with no data had wrong sized PLs and (
Browse files Browse the repository at this point in the history
…#7644)

* Fix case where hom-ref "variant" with no data had wrong sized PLs and
didn't merge with adjacent block

* Address review comments
  • Loading branch information
ldgauthier authored Jan 31, 2022
1 parent 4cbff5d commit d373a50
Show file tree
Hide file tree
Showing 8 changed files with 32 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ private AlleleSubsettingUtils() {} // prevent instantiation
* @param allelesToKeep the subset of alleles to use with the new Genotypes
* @param assignmentMethod assignment strategy for the (subsetted) PLs
* @param depth the original variant DP or 0 if there was no DP
* @param emitEmptyPLs force the output of a PL array even if there is no data
* @param suppressUninformativePLs force the output of a PL array even if there is no data
* @return a new non-null GenotypesContext
*/
public static GenotypesContext subsetAlleles(final GenotypesContext originalGs, final int defaultPloidy,
Expand All @@ -55,7 +55,7 @@ public static GenotypesContext subsetAlleles(final GenotypesContext originalGs,
final GenotypePriorCalculator gpc,
final GenotypeAssignmentMethod assignmentMethod,
final int depth,
final boolean emitEmptyPLs) {
final boolean suppressUninformativePLs) {
Utils.nonNull(originalGs, "original GenotypesContext must not be null");
Utils.nonNull(allelesToKeep, "allelesToKeep is null");
Utils.nonEmpty(allelesToKeep, "must keep at least one allele");
Expand Down Expand Up @@ -90,7 +90,7 @@ public static GenotypesContext subsetAlleles(final GenotypesContext originalGs,
newLog10GQ = -0.1*g.getGQ();
}

final boolean useNewLikelihoods = emitEmptyPLs ||
final boolean useNewLikelihoods = !suppressUninformativePLs ||
(newLikelihoods != null && (depth != 0 || GATKVariantContextUtils.isInformative(newLikelihoods)));

final GenotypeBuilder gb = new GenotypeBuilder(g);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ public VariantContext calculateGenotypes(final VariantContext vc, final Genotype
if (maxAltAlleles < vc.getAlternateAlleles().size()) {
final List<Allele> allelesToKeep = AlleleSubsettingUtils.calculateMostLikelyAlleles(vc, defaultPloidy, maxAltAlleles);
final GenotypesContext reducedGenotypes = allelesToKeep.size() == 1 ? GATKVariantContextUtils.subsetToRefOnly(vc, defaultPloidy) :
AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), allelesToKeep, gpc, GenotypeAssignmentMethod.SET_TO_NO_CALL, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), true);
AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), allelesToKeep, gpc, GenotypeAssignmentMethod.SET_TO_NO_CALL, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), false);
reducedVC = new VariantContextBuilder(vc).alleles(allelesToKeep).genotypes(reducedGenotypes).make();
}

Expand Down Expand Up @@ -181,7 +181,7 @@ && noAllelesOrFirstAlleleIsNotNonRef(outputAlternativeAlleles.alleles) && givenA
// create the genotypes
//TODO: omit subsetting if output alleles is not a proper subset of vc.getAlleles
final GenotypesContext genotypes = outputAlleles.size() == 1 ? GATKVariantContextUtils.subsetToRefOnly(vc, defaultPloidy) :
AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), outputAlleles, gpc, configuration.genotypeArgs.genotypeAssignmentMethod, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), true);
AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), outputAlleles, gpc, configuration.genotypeArgs.genotypeAssignmentMethod, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), false);

if (configuration.genotypeArgs.usePosteriorProbabilitiesToCalculateQual && hasPosteriors(genotypes)) {
final double log10NoVariantPosterior = phredNoVariantPosteriorProbability(outputAlleles, genotypes) * -.1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -507,9 +507,14 @@ protected GenotypeBuilder changeCallToHomRefVersusNonRef(final VariantContext lo
} else {
final List<Allele> bestAlleles = AlleleSubsettingUtils.calculateMostLikelyAlleles(lowQualVariant, genotype.getPloidy(), 1);
final Allele bestAlt = bestAlleles.stream().filter(a -> !a.isReference()).findFirst().orElse(Allele.NON_REF_ALLELE); //allow span dels
//we care about the best alt even though it's getting removed because NON_REF should get the best likelihoods
//it shouldn't matter that we're passing in different alt alleles since the GenotypesContext only knows
// the called alleles and this is a reference genotype that will stay hom-ref
final GenotypesContext context = AlleleSubsettingUtils.subsetAlleles(lowQualVariant.getGenotypes(),
genotype.getPloidy(), lowQualVariant.getAlleles(), Arrays.asList(inputRefAllele, bestAlt),
null, GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, lowQualVariant.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), false); //BEST_MATCH to avoid no-calling low qual genotypes
null, GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, //BEST_MATCH to avoid no-calling low qual genotypes
lowQualVariant.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0),
false); //emitEmptyPLs = true to make sure we always subset
final Genotype subsetG = context.get(0);
gb = new GenotypeBuilder(subsetG).noAttributes(); //remove attributes because hom ref blocks shouldn't have posteriors
//subsetting may strip GQ and PLs for low qual genotypes
Expand Down Expand Up @@ -564,7 +569,7 @@ VariantContext cleanUpHighQualityVariant(final VariantContext variant) {
newAlleleSetUntrimmed.removeAll(allelesToDrop);
final GenotypesContext gc = AlleleSubsettingUtils.subsetAlleles(variant.getGenotypes(), genotype.getPloidy(), variant.getAlleles(),
newAlleleSetUntrimmed, null, GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN,
variant.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), false);
variant.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), true);
if (gc.get(0).isHomRef() || !gc.get(0).hasGQ() || gc.get(0).getAlleles().contains(Allele.NO_CALL)) { //could be low quality or no-call after subsetting
if (dropLowQuals) {
return null;
Expand Down Expand Up @@ -775,7 +780,7 @@ private static void addQualAnnotations(final Map<String, Object> destination, fi
//TODO: this isn't going to work for DRAGEN's genotype posteriors
final GenotypesContext gc = AlleleSubsettingUtils.subsetAlleles(updatedAllelesVC.getGenotypes(),
updatedAllelesGenotype.getPloidy(), updatedAllelesVC.getAlleles(), Arrays.asList(updatedAllelesVC.getReference(), alt), null,
GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, 0, false);
GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, 0, true);
//assignment method doens't really matter as long as we don't zero out PLs; don't need depth to get PLs for quals

final Genotype subsettedGenotype = gc.get(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypeBuilder;
import htsjdk.variant.variantcontext.GenotypesContext;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.VariantContextUtils;
Expand Down Expand Up @@ -34,9 +33,7 @@
import org.broadinstitute.hellbender.tools.walkers.genotyper.AlleleSubsettingUtils;
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAssignmentMethod;
import org.broadinstitute.hellbender.utils.samples.MendelianViolation;
import org.broadinstitute.hellbender.utils.samples.PedigreeValidationType;
import org.broadinstitute.hellbender.utils.samples.SampleDB;
import org.broadinstitute.hellbender.utils.samples.SampleDBBuilder;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.hellbender.utils.variant.*;
Expand Down Expand Up @@ -1033,7 +1030,7 @@ private VariantContext subsetRecord(final VariantContext vc, final boolean prese
// fix the PL and AD values if sub has fewer alleles than original vc
final GenotypesContext subGenotypesWithOldAlleles = sub.getGenotypes(); //we need sub for the right samples, but PLs still go with old alleles
newGC = sub.getNAlleles() == vc.getNAlleles() ? subGenotypesWithOldAlleles :
AlleleSubsettingUtils.subsetAlleles(subGenotypesWithOldAlleles, 0, vc.getAlleles(), sub.getAlleles(), null, GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), false);
AlleleSubsettingUtils.subsetAlleles(subGenotypesWithOldAlleles, 0, vc.getAlleles(), sub.getAlleles(), null, GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), true);
} else {
newGC = sub.getGenotypes();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import com.google.common.annotations.VisibleForTesting;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.util.Locatable;
import htsjdk.tribble.TribbleException;
import htsjdk.utils.ValidationUtils;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.variantcontext.writer.Options;
Expand Down Expand Up @@ -1688,7 +1687,7 @@ public static List<VariantContext> splitVariantContextToBiallelics(final Variant
genotypeAssignmentMethodUsed != GenotypeAssignmentMethod.SET_TO_NO_CALL)
AlleleSubsettingUtils.addInfoFieldAnnotations(vc, builder, keepOriginalChrCounts);

builder.genotypes(AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(),2,vc.getAlleles(), alleles, null, genotypeAssignmentMethodUsed,vc.getAttributeAsInt("DP",0), false));
builder.genotypes(AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(),2,vc.getAlleles(), alleles, null, genotypeAssignmentMethodUsed,vc.getAttributeAsInt("DP",0), true));
final VariantContext trimmed = trimAlleles(builder.make(), trimLeft, true);
biallelics.add(trimmed);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ public void testUpdatePLsAndADData(final VariantContext originalVC,
AlleleSubsettingUtils.subsetAlleles(oldGs, 0, originalVC.getAlleles(),
selectedVCwithGTs.getAlleles(), null,
GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES,
originalVC.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), false);
originalVC.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), true);

Assert.assertEquals(actual.size(), expectedGenotypes.size());
for ( final Genotype expected : expectedGenotypes ) {
Expand Down Expand Up @@ -327,7 +327,7 @@ public void testUninformativePLsAreKeptWhenDepthIsNotZero(){
final List<Allele> alleles = Arrays.asList(Aref);
final Genotype uniformativePL = new GenotypeBuilder("sample", alleles).PL(new int[] {0}).make();
final GenotypesContext result = AlleleSubsettingUtils.subsetAlleles(GenotypesContext.create(uniformativePL), 2,
alleles, alleles, null, GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, 10, false);
alleles, alleles, null, GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, 10, true);
final Genotype genotype = result.get(0);
Assert.assertTrue(genotype.hasPL());
Assert.assertEquals(genotype.getPL(), new int[]{0});
Expand Down Expand Up @@ -412,7 +412,7 @@ public void testAlleleReorderingBehavior() {

final GenotypesContext newGs = AlleleSubsettingUtils.subsetAlleles(GenotypesContext.create(g5),
2, threeAlleles, threeAllelesSorted, null,
GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, 10, false);
GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, 10, true);

Assert.assertEquals(newGs.get(0).getPL(), new int[] {50, 20, 0, 40, 10, 30});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,12 @@ public void testLowQualVariantToGQ0HomRef() {
Assert.assertTrue(!modified.filtersWereApplied());
Assert.assertEquals(modified.getLog10PError(), VariantContext.NO_LOG10_PERROR);

final Genotype longPls = VariantContextTestUtils.makeG("sample1", Allele.NO_CALL, Allele.NO_CALL, 0,0,0,0,0,0);
final VariantContext lotsOfZeroPls = makeNoDepthVC("lowQualVar", Arrays.asList(LONG_REF, DELETION, Allele.NON_REF_ALLELE), LONG_REF.length(), longPls);
final VariantContext properlySubset = reblocker.lowQualVariantToGQ0HomRef(lotsOfZeroPls).make();
Assert.assertEquals(properlySubset.getGenotype(0).getPL().length, 3);
Assert.assertEquals(properlySubset.getGenotype(0).getPL(), new int[]{0,0,0});

//No-calls were throwing NPEs. Now they're not.
final Genotype g2 = VariantContextTestUtils.makeG("sample1", Allele.NO_CALL,Allele.NO_CALL);
final VariantContext noData = makeDeletionVC("noData", Arrays.asList(LONG_REF, DELETION, Allele.NON_REF_ALLELE), LONG_REF.length(), g2);
Expand Down Expand Up @@ -425,6 +431,13 @@ private VariantContext makeDeletionVC(final String source, final List<Allele> al
.genotypes(Arrays.asList(genotypes)).unfiltered().log10PError(-3.0).attribute(VCFConstants.DEPTH_KEY, EXAMPLE_DP).make();
}

private VariantContext makeNoDepthVC(final String source, final List<Allele> alleles, final int refLength, final Genotype... genotypes) {
final int start = DEFAULT_START;
final int stop = start+refLength-1;
return new VariantContextBuilder(source, "1", start, stop, alleles)
.genotypes(Arrays.asList(genotypes)).unfiltered().log10PError(-3.0).make();
}

private Genotype addAD(final Genotype g, final int... ads) {
return new GenotypeBuilder(g).AD(ads).make();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ public static VariantContext sortAlleles(final VariantContext vc, final VCFHeade
result.alleles(sortedAlleles);

GenotypesContext newGT = AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(),2,vc.getAlleles(),sortedAlleles, null,
GenotypeAssignmentMethod.SET_TO_NO_CALL, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY,0), false);
GenotypeAssignmentMethod.SET_TO_NO_CALL, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY,0), true);

// Asserting that the new genotypes were calculated properly in case AlleleSubsettingUtils behavior changes
if (newGT.getSampleNames().size() != vc.getGenotypes().size()) throw new IllegalStateException("Sorting this variant context resulted in a different number of genotype alleles, check that AlleleSubsettingUtils still supports reordering:" + vc.toString());
Expand Down

0 comments on commit d373a50

Please sign in to comment.