diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ValidateVariants.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ValidateVariants.java index c8901f82235..707c6708118 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ValidateVariants.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ValidateVariants.java @@ -197,6 +197,7 @@ public enum ValidationType { // information to keep track of when validating a GVCF private SimpleInterval previousInterval; private int previousStart = -1; + private String previousContig = null; @Override public void onTraversalStart() { @@ -239,14 +240,7 @@ public void apply(final VariantContext vc, final ReadsContext readsContext, fina if (VALIDATE_GVCF) { final SimpleInterval refInterval = ref.getInterval(); - //if next VC refers to a previous genomic position, throw an error - //Note that HaplotypeCaller can emit variants that start inside of a deletion on another haplotype, - // making v2's start less than the deletion's end - if (previousStart > -1 && vc.getStart() < previousStart) { - final UserException e = new UserException(String.format("In a GVCF all records must ordered. Record: %s covers a position previously traversed.", - vc.toStringWithoutGenotypes())); - throwOrWarn(e); - } + validateVariantsOrder(vc); // GenomeLocSortedSet will automatically merge intervals that are overlapping when setting `mergeIfIntervalOverlaps` // to true. In a GVCF most blocks are adjacent to each other so they wouldn't normally get merged. We check @@ -347,6 +341,24 @@ private Collection calculateValidationTypesToApply(final List -1 && vc.getStart() < previousStart) { + final UserException e = new UserException(String.format("In a GVCF all records must ordered. Record: %s covers a position previously traversed.", + vc.toStringWithoutGenotypes())); + throwOrWarn(e); + } + } + private void validateGVCFVariant(final VariantContext vc) { if (!vc.hasAllele(Allele.NON_REF_ALLELE)) { final UserException e = new UserException(String.format("In a GVCF all records must contain a %s allele. Offending record: %s", diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/ValidateVariantsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/ValidateVariantsIntegrationTest.java index 3d8ef6313bf..99a68ff22cd 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/ValidateVariantsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/ValidateVariantsIntegrationTest.java @@ -355,4 +355,36 @@ public void testNonOverlappingRegionsBP_RESOLUTION() throws IOException { Collections.emptyList()); spec.executeTest("tests capture of non-complete region, on BP_RESOLUTION gvcf", this); } + + @Test + public void testGoodVariantsOrderTwoContigs() throws IOException { + IntegrationTestSpec spec = new IntegrationTestSpec( + baseTestString(false, "goodGVCF.inOrderTwoContigs.g.vcf", true, ALLELES, null, hg38Reference) + " -gvcf ", + Collections.emptyList()); + spec.executeTest("tests the variants order validation for a valid file including two contigs", this); + } + + @Test + public void testBadVariantsOrderTwoContigs() throws IOException { + IntegrationTestSpec spec = new IntegrationTestSpec( + baseTestString(false, "badGVCF.outOfOrderTwoContigs.g.vcf", true, ALLELES, null, hg38Reference) + " -gvcf ", + 0, UserException.class); + spec.executeTest("tests the variants order validation for an invalid file including two contigs", this); + } + + @Test + public void testGoodVariantsOrderThreeContigs() throws IOException { + IntegrationTestSpec spec = new IntegrationTestSpec( + baseTestString(false, "goodGVCF.inOrderThreeContigs.g.vcf", true, ALLELES, null, hg38Reference) + " -gvcf ", + Collections.emptyList()); + spec.executeTest("tests the variants order validation for a valid file including three contigs", this); + } + + @Test + public void testBadVariantsOrderThreeContigs() throws IOException { + IntegrationTestSpec spec = new IntegrationTestSpec( + baseTestString(false, "badGVCF.outOfOrderThreeContigs.g.vcf", true, ALLELES, null, hg38Reference) + " -gvcf ", + 0, UserException.class); + spec.executeTest("tests the variants order validation for an invalid file including three contigs", this); + } } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/ValidateVariants/badGVCF.outOfOrderThreeContigs.g.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/ValidateVariants/badGVCF.outOfOrderThreeContigs.g.vcf new file mode 100644 index 00000000000..83238b47c68 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/ValidateVariants/badGVCF.outOfOrderThreeContigs.g.vcf @@ -0,0 +1,61 @@ +##fileformat=VCFv4.2 +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GATKCommandLine= +##GVCFBlock0-500=minGQ=0(inclusive),maxGQ=500(exclusive) +##GVCFBlock1000-1700=minGQ=1000(inclusive),maxGQ=1700(exclusive) +##GVCFBlock1700-1800=minGQ=1700(inclusive),maxGQ=1800(exclusive) +##GVCFBlock1800-1900=minGQ=1800(inclusive),maxGQ=1900(exclusive) +##GVCFBlock1900-2000=minGQ=1900(inclusive),maxGQ=2000(exclusive) +##GVCFBlock2000-2147483647=minGQ=2000(inclusive),maxGQ=2147483647(exclusive) +##GVCFBlock500-1000=minGQ=500(inclusive),maxGQ=1000(exclusive) +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##MutectVersion=2.1 +##contig= +##contig= +##contig= +##filtering_status=Warning: unfiltered Mutect 2 calls. Please run FilterMutectCalls to remove false positives. +##source=Mutect2 +##tumor_sample=NA12878 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 +chrM 1 . G . . END=156 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chrM 152 . T C, . . DP=1582;ECNT=2;POP_AF=5.000e-08,5.000e-08;TLOD=5265.15,-2.894e+00 GT:AD:AF:DP:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:PGT:PID:SA_MAP_AF:SA_POST_PROB 0/1/2:3,1556,0:0.997,6.372e-04:1559:2,777,0:1,779,0:30,30,0:16270,369,0:60,0:42,0:0|1:152_T_C:0.990,0.990,0.998:0.036,0.021,0.943 +chrM 153 . A . . END=16569 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chr1 1 . N . . END=99993569 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chr1 99993570 . A . . END=99993570 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chr1 99993560 . C . . END=99993570 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chr1 99993571 . A C, . . DP=1582;ECNT=2;POP_AF=5.000e-08,5.000e-08;TLOD=5265.15,-2.894e+00 GT:AD:AF:DP:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:PGT:PID:SA_MAP_AF:SA_POST_PROB 0/1/2:3,1556,0:0.997,6.372e-04:1559:2,777,0:1,779,0:30,30,0:16270,369,0:60,0:42,0:0|1:152_T_C:0.990,0.990,0.998:0.036,0.021,0.943 +chr1 99993572 . A . . END=248956422 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chr2 1 . N . . END=99993569 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chr2 59984617 . A . . END=59984617 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chr2 59984617 . C . . END=59984617 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chr2 59984618 . A C, . . DP=1582;ECNT=2;POP_AF=5.000e-08,5.000e-08;TLOD=5265.15,-2.894e+00 GT:AD:AF:DP:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:PGT:PID:SA_MAP_AF:SA_POST_PROB 0/1/2:3,1556,0:0.997,6.372e-04:1559:2,777,0:1,779,0:30,30,0:16270,369,0:60,0:42,0:0|1:152_T_C:0.990,0.990,0.998:0.036,0.021,0.943 +chr2 59984619 . A . . END=242193529 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/ValidateVariants/badGVCF.outOfOrderTwoContigs.g.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/ValidateVariants/badGVCF.outOfOrderTwoContigs.g.vcf new file mode 100644 index 00000000000..2f5b9efec5b --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/ValidateVariants/badGVCF.outOfOrderTwoContigs.g.vcf @@ -0,0 +1,55 @@ +##fileformat=VCFv4.2 +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GATKCommandLine= +##GVCFBlock0-500=minGQ=0(inclusive),maxGQ=500(exclusive) +##GVCFBlock1000-1700=minGQ=1000(inclusive),maxGQ=1700(exclusive) +##GVCFBlock1700-1800=minGQ=1700(inclusive),maxGQ=1800(exclusive) +##GVCFBlock1800-1900=minGQ=1800(inclusive),maxGQ=1900(exclusive) +##GVCFBlock1900-2000=minGQ=1900(inclusive),maxGQ=2000(exclusive) +##GVCFBlock2000-2147483647=minGQ=2000(inclusive),maxGQ=2147483647(exclusive) +##GVCFBlock500-1000=minGQ=500(inclusive),maxGQ=1000(exclusive) +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##MutectVersion=2.1 +##contig= +##contig= +##filtering_status=Warning: unfiltered Mutect 2 calls. Please run FilterMutectCalls to remove false positives. +##source=Mutect2 +##tumor_sample=NA12878 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 +chrM 1 . G . . END=156 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chrM 152 . T C, . . DP=1582;ECNT=2;POP_AF=5.000e-08,5.000e-08;TLOD=5265.15,-2.894e+00 GT:AD:AF:DP:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:PGT:PID:SA_MAP_AF:SA_POST_PROB 0/1/2:3,1556,0:0.997,6.372e-04:1559:2,777,0:1,779,0:30,30,0:16270,369,0:60,0:42,0:0|1:152_T_C:0.990,0.990,0.998:0.036,0.021,0.943 +chrM 153 . A . . END=16569 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chr1 1 . N . . END=99993569 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chr1 99993570 . A . . END=99993570 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chr1 99993560 . C . . END=99993570 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chr1 99993571 . A C, . . DP=1582;ECNT=2;POP_AF=5.000e-08,5.000e-08;TLOD=5265.15,-2.894e+00 GT:AD:AF:DP:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:PGT:PID:SA_MAP_AF:SA_POST_PROB 0/1/2:3,1556,0:0.997,6.372e-04:1559:2,777,0:1,779,0:30,30,0:16270,369,0:60,0:42,0:0|1:152_T_C:0.990,0.990,0.998:0.036,0.021,0.943 +chr1 99993572 . A . . END=248956422 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/ValidateVariants/goodGVCF.inOrderThreeContigs.g.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/ValidateVariants/goodGVCF.inOrderThreeContigs.g.vcf new file mode 100644 index 00000000000..ba5c1356387 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/ValidateVariants/goodGVCF.inOrderThreeContigs.g.vcf @@ -0,0 +1,59 @@ +##fileformat=VCFv4.2 +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GATKCommandLine= +##GVCFBlock0-500=minGQ=0(inclusive),maxGQ=500(exclusive) +##GVCFBlock1000-1700=minGQ=1000(inclusive),maxGQ=1700(exclusive) +##GVCFBlock1700-1800=minGQ=1700(inclusive),maxGQ=1800(exclusive) +##GVCFBlock1800-1900=minGQ=1800(inclusive),maxGQ=1900(exclusive) +##GVCFBlock1900-2000=minGQ=1900(inclusive),maxGQ=2000(exclusive) +##GVCFBlock2000-2147483647=minGQ=2000(inclusive),maxGQ=2147483647(exclusive) +##GVCFBlock500-1000=minGQ=500(inclusive),maxGQ=1000(exclusive) +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##MutectVersion=2.1 +##contig= +##contig= +##contig= +##filtering_status=Warning: unfiltered Mutect 2 calls. Please run FilterMutectCalls to remove false positives. +##source=Mutect2 +##tumor_sample=NA12878 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 +chrM 1 . G . . END=156 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chrM 152 . T C, . . DP=1582;ECNT=2;POP_AF=5.000e-08,5.000e-08;TLOD=5265.15,-2.894e+00 GT:AD:AF:DP:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:PGT:PID:SA_MAP_AF:SA_POST_PROB 0/1/2:3,1556,0:0.997,6.372e-04:1559:2,777,0:1,779,0:30,30,0:16270,369,0:60,0:42,0:0|1:152_T_C:0.990,0.990,0.998:0.036,0.021,0.943 +chrM 153 . A . . END=16569 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chr1 1 . N . . END=99993569 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chr1 99993570 . A . . END=99993570 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chr1 99993571 . A C, . . DP=1582;ECNT=2;POP_AF=5.000e-08,5.000e-08;TLOD=5265.15,-2.894e+00 GT:AD:AF:DP:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:PGT:PID:SA_MAP_AF:SA_POST_PROB 0/1/2:3,1556,0:0.997,6.372e-04:1559:2,777,0:1,779,0:30,30,0:16270,369,0:60,0:42,0:0|1:152_T_C:0.990,0.990,0.998:0.036,0.021,0.943 +chr1 99993572 . A . . END=248956422 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chr2 1 . N . . END=59984616 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chr2 59984617 . A . . END=59984617 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chr2 59984618 . A C, . . DP=1582;ECNT=2;POP_AF=5.000e-08,5.000e-08;TLOD=5265.15,-2.894e+00 GT:AD:AF:DP:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:PGT:PID:SA_MAP_AF:SA_POST_PROB 0/1/2:3,1556,0:0.997,6.372e-04:1559:2,777,0:1,779,0:30,30,0:16270,369,0:60,0:42,0:0|1:152_T_C:0.990,0.990,0.998:0.036,0.021,0.943 +chr2 59984619 . A . . END=242193529 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/ValidateVariants/goodGVCF.inOrderThreeContigs.g.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/ValidateVariants/goodGVCF.inOrderThreeContigs.g.vcf.idx new file mode 100644 index 00000000000..3224ce799ed Binary files /dev/null and b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/ValidateVariants/goodGVCF.inOrderThreeContigs.g.vcf.idx differ diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/ValidateVariants/goodGVCF.inOrderTwoContigs.g.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/ValidateVariants/goodGVCF.inOrderTwoContigs.g.vcf new file mode 100644 index 00000000000..692142062da --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/ValidateVariants/goodGVCF.inOrderTwoContigs.g.vcf @@ -0,0 +1,54 @@ +##fileformat=VCFv4.2 +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GATKCommandLine= +##GVCFBlock0-500=minGQ=0(inclusive),maxGQ=500(exclusive) +##GVCFBlock1000-1700=minGQ=1000(inclusive),maxGQ=1700(exclusive) +##GVCFBlock1700-1800=minGQ=1700(inclusive),maxGQ=1800(exclusive) +##GVCFBlock1800-1900=minGQ=1800(inclusive),maxGQ=1900(exclusive) +##GVCFBlock1900-2000=minGQ=1900(inclusive),maxGQ=2000(exclusive) +##GVCFBlock2000-2147483647=minGQ=2000(inclusive),maxGQ=2147483647(exclusive) +##GVCFBlock500-1000=minGQ=500(inclusive),maxGQ=1000(exclusive) +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##MutectVersion=2.1 +##contig= +##contig= +##filtering_status=Warning: unfiltered Mutect 2 calls. Please run FilterMutectCalls to remove false positives. +##source=Mutect2 +##tumor_sample=NA12878 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 +chrM 1 . G . . END=156 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chrM 152 . T C, . . DP=1582;ECNT=2;POP_AF=5.000e-08,5.000e-08;TLOD=5265.15,-2.894e+00 GT:AD:AF:DP:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:PGT:PID:SA_MAP_AF:SA_POST_PROB 0/1/2:3,1556,0:0.997,6.372e-04:1559:2,777,0:1,779,0:30,30,0:16270,369,0:60,0:42,0:0|1:152_T_C:0.990,0.990,0.998:0.036,0.021,0.943 +chrM 153 . A . . END=16569 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chr1 1 . N . . END=99993569 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chr1 99993570 . A . . END=99993570 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 +chr1 99993571 . A C, . . DP=1582;ECNT=2;POP_AF=5.000e-08,5.000e-08;TLOD=5265.15,-2.894e+00 GT:AD:AF:DP:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:PGT:PID:SA_MAP_AF:SA_POST_PROB 0/1/2:3,1556,0:0.997,6.372e-04:1559:2,777,0:1,779,0:30,30,0:16270,369,0:60,0:42,0:0|1:152_T_C:0.990,0.990,0.998:0.036,0.021,0.943 +chr1 99993572 . A . . END=248956422 GT:DP:MIN_DP:TLOD 0/0:1694:1680:1803.51 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/ValidateVariants/goodGVCF.inOrderTwoContigs.g.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/ValidateVariants/goodGVCF.inOrderTwoContigs.g.vcf.idx new file mode 100644 index 00000000000..1c1829f563f Binary files /dev/null and b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/ValidateVariants/goodGVCF.inOrderTwoContigs.g.vcf.idx differ