Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ReblockGVCFs cleanup #8411

Merged
merged 2 commits into from
Jul 14, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ public final class ReblockGVCF extends MultiVariantWalker {
public static final String RGQ_THRESHOLD_SHORT_NAME = "rgq-threshold";
public static final String TREE_SCORE_THRESHOLD_LONG_NAME = "tree-score-threshold-to-no-call";
public static final String ANNOTATIONS_TO_KEEP_LONG_NAME = "annotations-to-keep";
public static final String ANNOTATIONS_TO_REMOVE_LONG_NAME = "format-annotations-to-remove";
public static final String KEEP_ALL_ALTS_ARG_NAME = "keep-all-alts";
public static final String QUAL_APPROX_LONG_NAME = "do-qual-score-approximation";
public static final String QUAL_APPROX_SHORT_NAME = "do-qual-approx";
Expand Down Expand Up @@ -154,6 +155,10 @@ public final class ReblockGVCF extends MultiVariantWalker {
@Argument(fullName=ANNOTATIONS_TO_KEEP_LONG_NAME, doc="Annotations that are not recognized by GATK to keep, that should be kept in final GVCF at variant sites.", optional = true)
private List<String> annotationsToKeep = new ArrayList<>();

@Advanced
@Argument(fullName=ANNOTATIONS_TO_REMOVE_LONG_NAME, doc="FORMAT level annotations to remove from all genotypes in final GVCF.", optional = true)
private List<String> annotationsToRemove = new ArrayList<>();

@Advanced
@Argument(fullName=QUAL_APPROX_LONG_NAME, shortName=QUAL_APPROX_SHORT_NAME, doc="Add necessary INFO field annotation to perform QUAL approximation downstream; required for GnarlyGenotyper", optional = true)
protected boolean doQualApprox = false;
Expand Down Expand Up @@ -227,6 +232,9 @@ public void onTraversalStart() {
+ ", but the " + GATKVCFConstants.TREE_SCORE + " annotation is not present in the input GVCF.");
}

List<String> missingAnnotationsToRemove = annotationsToRemove.stream().filter(a -> inputHeader.getFormatHeaderLine(a)==null).toList();
missingAnnotationsToRemove.forEach(a -> logger.warn("FORMAT level annotation " + a + ", which was requested to be removed by --" + ANNOTATIONS_TO_REMOVE_LONG_NAME + ", not found in input GVCF header."));

final Set<VCFHeaderLine> inputHeaders = inputHeader.getMetaDataInSortedOrder();

final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeaders);
Expand Down Expand Up @@ -317,7 +325,27 @@ protected void createAnnotationEngine() {
// get VariantContexts from input gVCFs and regenotype
@Override
public void apply(VariantContext variant, ReadsContext reads, ReferenceContext ref, FeatureContext features) {
regenotypeVC(variant);
if (!variant.hasAllele(Allele.NON_REF_ALLELE)) {
throw new GATKException("Variant Context at " + variant.getContig() + ":" + variant.getStart() + " does not contain a <NON-REF> allele. This tool is only intended for use with GVCFs.");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would use a UserException instead -- I think it's a subclass

}
VariantContext newVC = annotationsToRemove.size() > 0 ? vcFormatAnnotationsRemoved(variant) : variant;
regenotypeVC(newVC);
}

/**
* Remove format level annotations from genotype in variant context.
*
* @param vc variant context to remove format annotations from
* @return variant context with format annotations removed from genotype
*/
private VariantContext vcFormatAnnotationsRemoved(final VariantContext vc) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The grammar of this method name feels weird. Like usually they're verb phrases. I would prefer "removeVCFormatAnnotations"

final Genotype genotype = vc.getGenotype(0);
Map<String, Object> extendedAttributes = genotype.getExtendedAttributes();
for (String annotation : annotationsToRemove) {
extendedAttributes.remove(annotation);
}
final Genotype newGenotype = new GenotypeBuilder(genotype).noAttributes().attributes(extendedAttributes).make();
return new VariantContextBuilder(vc).genotypes(newGenotype).make();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.broadinstitute.hellbender.GATKBaseTest;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.FeatureDataSource;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
import org.broadinstitute.hellbender.testutils.CommandLineProgramTester;
Expand Down Expand Up @@ -575,4 +576,35 @@ public void testFilters() throws IOException {
Assert.assertFalse(filteredRefBlockVC.isFiltered()); // Ref block is unfiltered even though the input RefBlock and low qual variant were both filtered
Assert.assertEquals(filteredRefBlockVC.getGenotype(0).getDP(), 12); // Ref block is combination of filtered variant with depth 22 and filtered ref block with depth 1
}

@Test
public void testRemovingFormatAnnotations() {
final File input = getTestFile("dragen.g.vcf");
final File output = createTempFile("reblockedgvcf", ".vcf");
final String priKey = "PRI";

final ArgumentsBuilder args = new ArgumentsBuilder();
args.addReference(new File(hg38Reference))
.add("V", input)
.add(ReblockGVCF.ANNOTATIONS_TO_REMOVE_LONG_NAME, priKey)
.addOutput(output);
runCommandLine(args);

final List<VariantContext> outVCs = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()).getRight();
for(VariantContext vc : outVCs){
Assert.assertNull(vc.getGenotype(0).getExtendedAttribute(priKey));
}
}

@Test
public void testNonGVCFInput() {
final File output = createTempFile("reblockedgvcf", ".vcf");

final ArgumentsBuilder args = new ArgumentsBuilder();
args.addReference(new File(b37_reference_20_21))
.add("V", "src/test/resources/large/NA12878.HiSeq.WGS.b37_decoy.indel.recalibrated.chr20.vcf")
.addOutput(output);

Assert.assertThrows(GATKException.class, () -> runCommandLine(args));
}
}