From fcd82a01b81ee1c62a4f0eab87770885ed2b09d7 Mon Sep 17 00:00:00 2001 From: Megan Shand Date: Tue, 11 Jul 2023 10:57:24 -0400 Subject: [PATCH 1/2] ReblockGVCFs cleanup for dragen inputs --- .../walkers/variantutils/ReblockGVCF.java | 30 ++++++++++++++++- .../ReblockGVCFIntegrationTest.java | 32 +++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java index 847f0facd6e..1b21eff1950 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java @@ -108,6 +108,7 @@ public final class ReblockGVCF extends MultiVariantWalker { public static final String RGQ_THRESHOLD_SHORT_NAME = "rgq-threshold"; public static final String TREE_SCORE_THRESHOLD_LONG_NAME = "tree-score-threshold-to-no-call"; public static final String ANNOTATIONS_TO_KEEP_LONG_NAME = "annotations-to-keep"; + public static final String ANNOTATIONS_TO_REMOVE_LONG_NAME = "format-annotations-to-remove"; public static final String KEEP_ALL_ALTS_ARG_NAME = "keep-all-alts"; public static final String QUAL_APPROX_LONG_NAME = "do-qual-score-approximation"; public static final String QUAL_APPROX_SHORT_NAME = "do-qual-approx"; @@ -154,6 +155,10 @@ public final class ReblockGVCF extends MultiVariantWalker { @Argument(fullName=ANNOTATIONS_TO_KEEP_LONG_NAME, doc="Annotations that are not recognized by GATK to keep, that should be kept in final GVCF at variant sites.", optional = true) private List annotationsToKeep = new ArrayList<>(); + @Advanced + @Argument(fullName=ANNOTATIONS_TO_REMOVE_LONG_NAME, doc="FORMAT level annotations to remove from all genotypes in final GVCF.", optional = true) + private List annotationsToRemove = new ArrayList<>(); + @Advanced @Argument(fullName=QUAL_APPROX_LONG_NAME, shortName=QUAL_APPROX_SHORT_NAME, doc="Add necessary INFO field annotation to perform QUAL approximation downstream; required for GnarlyGenotyper", optional = true) protected boolean doQualApprox = false; @@ -227,6 +232,9 @@ public void onTraversalStart() { + ", but the " + GATKVCFConstants.TREE_SCORE + " annotation is not present in the input GVCF."); } + List missingAnnotationsToRemove = annotationsToRemove.stream().filter(a -> inputHeader.getFormatHeaderLine(a)==null).toList(); + missingAnnotationsToRemove.forEach(a -> logger.warn("FORMAT level annotation " + a + ", which was requested to be removed by --" + ANNOTATIONS_TO_REMOVE_LONG_NAME + ", not found in input GVCF header.")); + final Set inputHeaders = inputHeader.getMetaDataInSortedOrder(); final Set headerLines = new HashSet<>(inputHeaders); @@ -317,7 +325,27 @@ protected void createAnnotationEngine() { // get VariantContexts from input gVCFs and regenotype @Override public void apply(VariantContext variant, ReadsContext reads, ReferenceContext ref, FeatureContext features) { - regenotypeVC(variant); + if (!variant.hasAllele(Allele.NON_REF_ALLELE)) { + throw new GATKException("Variant Context at " + variant.getContig() + ":" + variant.getStart() + " does not contain a allele. This tool is only intended for use with GVCFs."); + } + VariantContext newVC = annotationsToRemove.size() > 0 ? vcFormatAnnotationsRemoved(variant) : variant; + regenotypeVC(newVC); + } + + /** + * Remove format level annotations from genotype in variant context. + * + * @param vc variant context to remove format annotations from + * @return variant context with format annotations removed from genotype + */ + private VariantContext vcFormatAnnotationsRemoved(final VariantContext vc) { + final Genotype genotype = vc.getGenotype(0); + Map extendedAttributes = genotype.getExtendedAttributes(); + for (String annotation : annotationsToRemove) { + extendedAttributes.remove(annotation); + } + final Genotype newGenotype = new GenotypeBuilder(genotype).noAttributes().attributes(extendedAttributes).make(); + return new VariantContextBuilder(vc).genotypes(newGenotype).make(); } /** diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java index a0f5aee0275..feaabde092b 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java @@ -9,6 +9,7 @@ import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.engine.FeatureDataSource; +import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.broadinstitute.hellbender.testutils.CommandLineProgramTester; @@ -575,4 +576,35 @@ public void testFilters() throws IOException { Assert.assertFalse(filteredRefBlockVC.isFiltered()); // Ref block is unfiltered even though the input RefBlock and low qual variant were both filtered Assert.assertEquals(filteredRefBlockVC.getGenotype(0).getDP(), 12); // Ref block is combination of filtered variant with depth 22 and filtered ref block with depth 1 } + + @Test + public void testRemovingFormatAnnotations() { + final File input = getTestFile("dragen.g.vcf"); + final File output = createTempFile("reblockedgvcf", ".vcf"); + final String priKey = "PRI"; + + final ArgumentsBuilder args = new ArgumentsBuilder(); + args.addReference(new File(hg38Reference)) + .add("V", input) + .add(ReblockGVCF.ANNOTATIONS_TO_REMOVE_LONG_NAME, priKey) + .addOutput(output); + runCommandLine(args); + + final List outVCs = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()).getRight(); + for(VariantContext vc : outVCs){ + Assert.assertNull(vc.getGenotype(0).getExtendedAttribute(priKey)); + } + } + + @Test + public void testNonGVCFInput() { + final File output = createTempFile("reblockedgvcf", ".vcf"); + + final ArgumentsBuilder args = new ArgumentsBuilder(); + args.addReference(new File(b37_reference_20_21)) + .add("V", "src/test/resources/large/NA12878.HiSeq.WGS.b37_decoy.indel.recalibrated.chr20.vcf") + .addOutput(output); + + Assert.assertThrows(GATKException.class, () -> runCommandLine(args)); + } } From 53a5741cbc1b69138a6214a6ef04b4a6fbe79430 Mon Sep 17 00:00:00 2001 From: Megan Shand Date: Fri, 14 Jul 2023 10:49:45 -0400 Subject: [PATCH 2/2] addressing comments --- .../hellbender/tools/walkers/variantutils/ReblockGVCF.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java index 1b21eff1950..8b965e93012 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java @@ -326,9 +326,9 @@ protected void createAnnotationEngine() { @Override public void apply(VariantContext variant, ReadsContext reads, ReferenceContext ref, FeatureContext features) { if (!variant.hasAllele(Allele.NON_REF_ALLELE)) { - throw new GATKException("Variant Context at " + variant.getContig() + ":" + variant.getStart() + " does not contain a allele. This tool is only intended for use with GVCFs."); + throw new UserException("Variant Context at " + variant.getContig() + ":" + variant.getStart() + " does not contain a allele. This tool is only intended for use with GVCFs."); } - VariantContext newVC = annotationsToRemove.size() > 0 ? vcFormatAnnotationsRemoved(variant) : variant; + VariantContext newVC = annotationsToRemove.size() > 0 ? removeVCFFormatAnnotations(variant) : variant; regenotypeVC(newVC); } @@ -338,7 +338,7 @@ public void apply(VariantContext variant, ReadsContext reads, ReferenceContext r * @param vc variant context to remove format annotations from * @return variant context with format annotations removed from genotype */ - private VariantContext vcFormatAnnotationsRemoved(final VariantContext vc) { + private VariantContext removeVCFFormatAnnotations(final VariantContext vc) { final Genotype genotype = vc.getGenotype(0); Map extendedAttributes = genotype.getExtendedAttributes(); for (String annotation : annotationsToRemove) {