Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix case where hom-ref "variant" with no data had wrong sized PLs and #7644

Merged
merged 2 commits into from
Jan 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ private AlleleSubsettingUtils() {} // prevent instantiation
* @param allelesToKeep the subset of alleles to use with the new Genotypes
* @param assignmentMethod assignment strategy for the (subsetted) PLs
* @param depth the original variant DP or 0 if there was no DP
* @param emitEmptyPLs force the output of a PL array even if there is no data
* @param suppressUninformativePLs force the output of a PL array even if there is no data
* @return a new non-null GenotypesContext
*/
public static GenotypesContext subsetAlleles(final GenotypesContext originalGs, final int defaultPloidy,
Expand All @@ -55,7 +55,7 @@ public static GenotypesContext subsetAlleles(final GenotypesContext originalGs,
final GenotypePriorCalculator gpc,
final GenotypeAssignmentMethod assignmentMethod,
final int depth,
final boolean emitEmptyPLs) {
final boolean suppressUninformativePLs) {
Utils.nonNull(originalGs, "original GenotypesContext must not be null");
Utils.nonNull(allelesToKeep, "allelesToKeep is null");
Utils.nonEmpty(allelesToKeep, "must keep at least one allele");
Expand Down Expand Up @@ -90,7 +90,7 @@ public static GenotypesContext subsetAlleles(final GenotypesContext originalGs,
newLog10GQ = -0.1*g.getGQ();
}

final boolean useNewLikelihoods = emitEmptyPLs ||
final boolean useNewLikelihoods = !suppressUninformativePLs ||
(newLikelihoods != null && (depth != 0 || GATKVariantContextUtils.isInformative(newLikelihoods)));

final GenotypeBuilder gb = new GenotypeBuilder(g);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ public VariantContext calculateGenotypes(final VariantContext vc, final Genotype
if (maxAltAlleles < vc.getAlternateAlleles().size()) {
final List<Allele> allelesToKeep = AlleleSubsettingUtils.calculateMostLikelyAlleles(vc, defaultPloidy, maxAltAlleles);
final GenotypesContext reducedGenotypes = allelesToKeep.size() == 1 ? GATKVariantContextUtils.subsetToRefOnly(vc, defaultPloidy) :
AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), allelesToKeep, gpc, GenotypeAssignmentMethod.SET_TO_NO_CALL, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), true);
AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), allelesToKeep, gpc, GenotypeAssignmentMethod.SET_TO_NO_CALL, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), false);
reducedVC = new VariantContextBuilder(vc).alleles(allelesToKeep).genotypes(reducedGenotypes).make();
}

Expand Down Expand Up @@ -181,7 +181,7 @@ && noAllelesOrFirstAlleleIsNotNonRef(outputAlternativeAlleles.alleles) && givenA
// create the genotypes
//TODO: omit subsetting if output alleles is not a proper subset of vc.getAlleles
final GenotypesContext genotypes = outputAlleles.size() == 1 ? GATKVariantContextUtils.subsetToRefOnly(vc, defaultPloidy) :
AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), outputAlleles, gpc, configuration.genotypeArgs.genotypeAssignmentMethod, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), true);
AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), outputAlleles, gpc, configuration.genotypeArgs.genotypeAssignmentMethod, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), false);

if (configuration.genotypeArgs.usePosteriorProbabilitiesToCalculateQual && hasPosteriors(genotypes)) {
final double log10NoVariantPosterior = phredNoVariantPosteriorProbability(outputAlleles, genotypes) * -.1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -507,9 +507,14 @@ protected GenotypeBuilder changeCallToHomRefVersusNonRef(final VariantContext lo
} else {
final List<Allele> bestAlleles = AlleleSubsettingUtils.calculateMostLikelyAlleles(lowQualVariant, genotype.getPloidy(), 1);
final Allele bestAlt = bestAlleles.stream().filter(a -> !a.isReference()).findFirst().orElse(Allele.NON_REF_ALLELE); //allow span dels
//we care about the best alt even though it's getting removed because NON_REF should get the best likelihoods
//it shouldn't matter that we're passing in different alt alleles since the GenotypesContext only knows
// the called alleles and this is a reference genotype that will stay hom-ref
final GenotypesContext context = AlleleSubsettingUtils.subsetAlleles(lowQualVariant.getGenotypes(),
genotype.getPloidy(), lowQualVariant.getAlleles(), Arrays.asList(inputRefAllele, bestAlt),
null, GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, lowQualVariant.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), false); //BEST_MATCH to avoid no-calling low qual genotypes
null, GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, //BEST_MATCH to avoid no-calling low qual genotypes
lowQualVariant.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0),
false); //emitEmptyPLs = true to make sure we always subset
final Genotype subsetG = context.get(0);
gb = new GenotypeBuilder(subsetG).noAttributes(); //remove attributes because hom ref blocks shouldn't have posteriors
//subsetting may strip GQ and PLs for low qual genotypes
Expand Down Expand Up @@ -564,7 +569,7 @@ VariantContext cleanUpHighQualityVariant(final VariantContext variant) {
newAlleleSetUntrimmed.removeAll(allelesToDrop);
final GenotypesContext gc = AlleleSubsettingUtils.subsetAlleles(variant.getGenotypes(), genotype.getPloidy(), variant.getAlleles(),
newAlleleSetUntrimmed, null, GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN,
variant.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), false);
variant.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), true);
if (gc.get(0).isHomRef() || !gc.get(0).hasGQ() || gc.get(0).getAlleles().contains(Allele.NO_CALL)) { //could be low quality or no-call after subsetting
if (dropLowQuals) {
return null;
Expand Down Expand Up @@ -775,7 +780,7 @@ private static void addQualAnnotations(final Map<String, Object> destination, fi
//TODO: this isn't going to work for DRAGEN's genotype posteriors
final GenotypesContext gc = AlleleSubsettingUtils.subsetAlleles(updatedAllelesVC.getGenotypes(),
updatedAllelesGenotype.getPloidy(), updatedAllelesVC.getAlleles(), Arrays.asList(updatedAllelesVC.getReference(), alt), null,
GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, 0, false);
GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, 0, true);
//assignment method doens't really matter as long as we don't zero out PLs; don't need depth to get PLs for quals

final Genotype subsettedGenotype = gc.get(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypeBuilder;
import htsjdk.variant.variantcontext.GenotypesContext;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.VariantContextUtils;
Expand Down Expand Up @@ -34,9 +33,7 @@
import org.broadinstitute.hellbender.tools.walkers.genotyper.AlleleSubsettingUtils;
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAssignmentMethod;
import org.broadinstitute.hellbender.utils.samples.MendelianViolation;
import org.broadinstitute.hellbender.utils.samples.PedigreeValidationType;
import org.broadinstitute.hellbender.utils.samples.SampleDB;
import org.broadinstitute.hellbender.utils.samples.SampleDBBuilder;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.hellbender.utils.variant.*;
Expand Down Expand Up @@ -1033,7 +1030,7 @@ private VariantContext subsetRecord(final VariantContext vc, final boolean prese
// fix the PL and AD values if sub has fewer alleles than original vc
final GenotypesContext subGenotypesWithOldAlleles = sub.getGenotypes(); //we need sub for the right samples, but PLs still go with old alleles
newGC = sub.getNAlleles() == vc.getNAlleles() ? subGenotypesWithOldAlleles :
AlleleSubsettingUtils.subsetAlleles(subGenotypesWithOldAlleles, 0, vc.getAlleles(), sub.getAlleles(), null, GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), false);
AlleleSubsettingUtils.subsetAlleles(subGenotypesWithOldAlleles, 0, vc.getAlleles(), sub.getAlleles(), null, GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), true);
} else {
newGC = sub.getGenotypes();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import com.google.common.annotations.VisibleForTesting;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.util.Locatable;
import htsjdk.tribble.TribbleException;
import htsjdk.utils.ValidationUtils;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.variantcontext.writer.Options;
Expand Down Expand Up @@ -1688,7 +1687,7 @@ public static List<VariantContext> splitVariantContextToBiallelics(final Variant
genotypeAssignmentMethodUsed != GenotypeAssignmentMethod.SET_TO_NO_CALL)
AlleleSubsettingUtils.addInfoFieldAnnotations(vc, builder, keepOriginalChrCounts);

builder.genotypes(AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(),2,vc.getAlleles(), alleles, null, genotypeAssignmentMethodUsed,vc.getAttributeAsInt("DP",0), false));
builder.genotypes(AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(),2,vc.getAlleles(), alleles, null, genotypeAssignmentMethodUsed,vc.getAttributeAsInt("DP",0), true));
final VariantContext trimmed = trimAlleles(builder.make(), trimLeft, true);
biallelics.add(trimmed);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ public void testUpdatePLsAndADData(final VariantContext originalVC,
AlleleSubsettingUtils.subsetAlleles(oldGs, 0, originalVC.getAlleles(),
selectedVCwithGTs.getAlleles(), null,
GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES,
originalVC.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), false);
originalVC.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), true);

Assert.assertEquals(actual.size(), expectedGenotypes.size());
for ( final Genotype expected : expectedGenotypes ) {
Expand Down Expand Up @@ -327,7 +327,7 @@ public void testUninformativePLsAreKeptWhenDepthIsNotZero(){
final List<Allele> alleles = Arrays.asList(Aref);
final Genotype uniformativePL = new GenotypeBuilder("sample", alleles).PL(new int[] {0}).make();
final GenotypesContext result = AlleleSubsettingUtils.subsetAlleles(GenotypesContext.create(uniformativePL), 2,
alleles, alleles, null, GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, 10, false);
alleles, alleles, null, GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, 10, true);
final Genotype genotype = result.get(0);
Assert.assertTrue(genotype.hasPL());
Assert.assertEquals(genotype.getPL(), new int[]{0});
Expand Down Expand Up @@ -412,7 +412,7 @@ public void testAlleleReorderingBehavior() {

final GenotypesContext newGs = AlleleSubsettingUtils.subsetAlleles(GenotypesContext.create(g5),
2, threeAlleles, threeAllelesSorted, null,
GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, 10, false);
GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, 10, true);

Assert.assertEquals(newGs.get(0).getPL(), new int[] {50, 20, 0, 40, 10, 30});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,12 @@ public void testLowQualVariantToGQ0HomRef() {
Assert.assertTrue(!modified.filtersWereApplied());
Assert.assertEquals(modified.getLog10PError(), VariantContext.NO_LOG10_PERROR);

final Genotype longPls = VariantContextTestUtils.makeG("sample1", Allele.NO_CALL, Allele.NO_CALL, 0,0,0,0,0,0);
final VariantContext lotsOfZeroPls = makeNoDepthVC("lowQualVar", Arrays.asList(LONG_REF, DELETION, Allele.NON_REF_ALLELE), LONG_REF.length(), longPls);
final VariantContext properlySubset = reblocker.lowQualVariantToGQ0HomRef(lotsOfZeroPls).make();
Assert.assertEquals(properlySubset.getGenotype(0).getPL().length, 3);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you check the content of PL also is correnct? is it all 0s or '.'?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assert.assertEquals(...getPL(), new int [] { 0, 0, 0}) may just work.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does work, thanks. This is done.

Assert.assertEquals(properlySubset.getGenotype(0).getPL(), new int[]{0,0,0});

//No-calls were throwing NPEs. Now they're not.
final Genotype g2 = VariantContextTestUtils.makeG("sample1", Allele.NO_CALL,Allele.NO_CALL);
final VariantContext noData = makeDeletionVC("noData", Arrays.asList(LONG_REF, DELETION, Allele.NON_REF_ALLELE), LONG_REF.length(), g2);
Expand Down Expand Up @@ -425,6 +431,13 @@ private VariantContext makeDeletionVC(final String source, final List<Allele> al
.genotypes(Arrays.asList(genotypes)).unfiltered().log10PError(-3.0).attribute(VCFConstants.DEPTH_KEY, EXAMPLE_DP).make();
}

private VariantContext makeNoDepthVC(final String source, final List<Allele> alleles, final int refLength, final Genotype... genotypes) {
final int start = DEFAULT_START;
final int stop = start+refLength-1;
return new VariantContextBuilder(source, "1", start, stop, alleles)
.genotypes(Arrays.asList(genotypes)).unfiltered().log10PError(-3.0).make();
}

private Genotype addAD(final Genotype g, final int... ads) {
return new GenotypeBuilder(g).AD(ads).make();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ public static VariantContext sortAlleles(final VariantContext vc, final VCFHeade
result.alleles(sortedAlleles);

GenotypesContext newGT = AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(),2,vc.getAlleles(),sortedAlleles, null,
GenotypeAssignmentMethod.SET_TO_NO_CALL, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY,0), false);
GenotypeAssignmentMethod.SET_TO_NO_CALL, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY,0), true);

// Asserting that the new genotypes were calculated properly in case AlleleSubsettingUtils behavior changes
if (newGT.getSampleNames().size() != vc.getGenotypes().size()) throw new IllegalStateException("Sorting this variant context resulted in a different number of genotype alleles, check that AlleleSubsettingUtils still supports reordering:" + vc.toString());
Expand Down