diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/CombineGVCFs.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/CombineGVCFs.java index 1536ef7d113..641804ca641 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/CombineGVCFs.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/CombineGVCFs.java @@ -412,14 +412,17 @@ public Object onTraversalSuccess() { return null; } - SimpleInterval interval = prevPos != null ? new SimpleInterval(prevPos.getContig(), prevPos.getStart(), variantContextsOverlappingCurrentMerge.stream().map(VariantContext::getEnd).max(Comparator.naturalOrder()).get()) : - storedReferenceContext.getInterval(); - - createIntermediateVariants(interval); - - // there shouldn't be any state left unless the user cut in the middle of a gVCF block if ( !variantContextsOverlappingCurrentMerge.isEmpty() ) { - logger.warn("You have asked for an interval that cuts in the middle of one or more gVCF blocks. Please note that this will cause you to lose records that don't end within your interval."); + // finish off the last blocks + final SimpleInterval lastInterval = new SimpleInterval( + variantContextsOverlappingCurrentMerge.get(0).getContig(), + variantContextsOverlappingCurrentMerge.get(0).getStart(), + variantContextsOverlappingCurrentMerge.stream().map(VariantContext::getEnd).max(Comparator.naturalOrder()).get()); + createIntermediateVariants(lastInterval); + // there shouldn't be any state left unless the user cut in the middle of a gVCF block + if ( !variantContextsOverlappingCurrentMerge.isEmpty() ) { + logger.warn("You have asked for an interval that cuts in the middle of one or more gVCF blocks. Please note that this will cause you to lose records that don't end within your interval."); + } } return null; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/CombineGVCFsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/CombineGVCFsIntegrationTest.java index 658feaae272..f7a80540a79 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/CombineGVCFsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/CombineGVCFsIntegrationTest.java @@ -74,7 +74,10 @@ public Object[][] gvcfsToCombine() { //Testing allele-specific annotations {new File[]{getTestFile("NA12878.AS.chr20snippet.g.vcf"), getTestFile("NA12892.AS.chr20snippet.g.vcf")}, getTestFile("testAlleleSpecificAnnotations.vcf"), Arrays.asList("-G", "Standard", "-G", "AS_Standard"), b37_reference_20_21}, //Testing allele-specific annotations missing AS_Standard Group - {new File[]{getTestFile("NA12878.AS.chr20snippet.g.vcf"), getTestFile("NA12892.AS.chr20snippet.g.vcf")}, getTestFile("testAlleleSpecificAnnotationsNoGroup.vcf"), Arrays.asList("-G", "Standard", "-G", "AS_Standard"), b37_reference_20_21},}; + {new File[]{getTestFile("NA12878.AS.chr20snippet.g.vcf"), getTestFile("NA12892.AS.chr20snippet.g.vcf")}, getTestFile("testAlleleSpecificAnnotationsNoGroup.vcf"), Arrays.asList("-G", "Standard", "-G", "AS_Standard"), b37_reference_20_21}, + //Test that trailing reference blocks are emitted with correct intervals + {new File[]{getTestFile("gvcfExample1WithTrailingReferenceBlocks.g.vcf"), getTestFile("gvcfExample2WithTrailingReferenceBlocks.g.vcf")}, getTestFile("gvcfWithTrailingReferenceBlocksExpected.g.vcf"), NO_EXTRA_ARGS, b38_reference_20_21}, + }; } @@ -118,11 +121,6 @@ public void compareToGATK3(File[] inputs, File outputFile, List extraArg } else { System.out.println("Found precomputed gatk3Result"); } - final Path expectedResultsDir = Paths.get(getToolTestDataDir()); - if ( !Files.exists(expectedResultsDir)) { - Files.createDirectory(expectedResultsDir); - } - Files.copy(gatk3Result.toPath(), expectedResultsDir.resolve(outputFile.getName()), StandardCopyOption.REPLACE_EXISTING); assertVariantContextsMatch(Arrays.asList(inputs), gatk3Result, extraArgs, reference); } @@ -151,7 +149,7 @@ private void assertVariantContextsMatch(List inputs, File expected, List inputs, File expected, List additionalArguments, BiConsumer assertion, String reference) throws IOException { - final File output = createTempFile("genotypegvcf", ".vcf"); + final File output = createTempFile("combinegvcfs", ".vcf"); final ArgumentsBuilder args = new ArgumentsBuilder(); args.addReference(new File(reference)) @@ -195,7 +193,7 @@ private static VCFHeader getHeaderFromFile(final File vcfFile) throws IOExceptio @Test public void testOneStartsBeforeTwoAndEndsAfterwards() throws Exception { - final File output = createTempFile("genotypegvcf", ".vcf"); + final File output = createTempFile("combinegvcfs", ".vcf"); final ArgumentsBuilder args = new ArgumentsBuilder(); args.addReference(new File(b37_reference_20_21)) @@ -227,7 +225,7 @@ public void testOneStartsBeforeTwoAndEndsAfterwards() throws Exception { @Test() public void testTetraploidRun() throws IOException { - final File output = createTempFile("genotypegvcf", ".vcf"); + final File output = createTempFile("combinegvcfs", ".vcf"); final ArgumentsBuilder args = new ArgumentsBuilder(); args.addReference(new File(b37_reference_20_21)) @@ -248,7 +246,7 @@ public void testTetraploidRun() throws IOException { @Test public void testTwoSpansManyBlocksInOne() throws Exception { - final File output = createTempFile("genotypegvcf", ".vcf"); + final File output = createTempFile("combinegvcfs", ".vcf"); final ArgumentsBuilder args = new ArgumentsBuilder(); args.addReference(new File(b37_reference_20_21)) @@ -267,7 +265,7 @@ public void testTwoSpansManyBlocksInOne() throws Exception { // Ensuring that no exception is thrown and that the resulting VCF is empty @Test public void testNoDataInInterval() throws Exception { - final File output = createTempFile("genotypegvcf", ".vcf"); + final File output = createTempFile("combinegvcfs", ".vcf"); final ArgumentsBuilder args = new ArgumentsBuilder(); args.addReference(new File(b37_reference_20_21)) @@ -286,7 +284,7 @@ public void testNoDataInInterval() throws Exception { @Test public void testOneHasAltAndTwoHasNothing() throws Exception { - final File output = createTempFile("genotypegvcf", ".vcf"); + final File output = createTempFile("combinegvcfs", ".vcf"); final ArgumentsBuilder args = new ArgumentsBuilder(); args.addReference(new File(b37_reference_20_21)) @@ -307,7 +305,7 @@ public void testOneHasAltAndTwoHasNothing() throws Exception { @Test public void testOneHasAltAndTwoHasRefBlock() throws Exception { - final File output = createTempFile("genotypegvcf", ".vcf"); + final File output = createTempFile("combinegvcfs", ".vcf"); final ArgumentsBuilder args = new ArgumentsBuilder(); args.addReference(new File(b37_reference_20_21)) @@ -332,7 +330,7 @@ public void testOneHasAltAndTwoHasRefBlock() throws Exception { @Test public void testOneHasDeletionAndTwoHasRefBlock() throws Exception { - final File output = createTempFile("genotypegvcf", ".vcf"); + final File output = createTempFile("combinegvcfs", ".vcf"); final ArgumentsBuilder args = new ArgumentsBuilder(); args.addReference(new File(b37_reference_20_21)) @@ -369,7 +367,7 @@ public void testOneHasDeletionAndTwoHasRefBlock() throws Exception { // This test asserts that the behavior is rational in the case of whole genome gvcfs which have variants starting at // the first and ending on the last base of each contig. public void testStartChromosome() throws Exception { - final File output = createTempFile("genotypegvcf", ".vcf"); + final File output = createTempFile("combinegvcfs", ".vcf"); final ArgumentsBuilder args = new ArgumentsBuilder(); args.addReference(new File(b37_reference_20_21)) diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/CombineGVCFs/gvcfExample1WithTrailingReferenceBlocks.g.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/CombineGVCFs/gvcfExample1WithTrailingReferenceBlocks.g.vcf new file mode 100644 index 00000000000..ec5f33cf951 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/CombineGVCFs/gvcfExample1WithTrailingReferenceBlocks.g.vcf @@ -0,0 +1,47 @@ +##fileformat=VCFv4.1 +##ALT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GVCFBlock=minGQ=0(inclusive),maxGQ=5(exclusive) +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA1 +chr20 69491 . G . . BLOCK_SIZE=20;END=69510 GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:94:99:82:99:0,120,1800 +chr20 69511 . A G, 2253.77 . BaseQRankSum=1.169;DP=82;MQ=31.05;MQ0=0;MQRankSum=-0.866;ReadPosRankSum=1.689 GT:AD:DP:GQ:PL:SB 1/1:1,79,0:80:99:2284,207,0,2287,237,2316:0,1,46,33 +chr20 69512 . C . . BLOCK_SIZE=10;END=69521 GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:96:99:82:99:0,120,1800 +chr20 69522 . T . . BLOCK_SIZE=27;END=69548 GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:95:0:95:0:0,0,0 +chr20 69549 . G . . BLOCK_SIZE=86;END=69634 GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:156:99:56:66:0,66,990 +chr20 69635 . G T, 60.77 . BaseQRankSum=0.937;DP=7;MQ=34.15;MQ0=0;MQRankSum=1.300;ReadPosRankSum=1.754 GT:AD:DP:GQ:PL:SB 0/1:4,3,0:7:89:89,0,119,101,128,229:0,4,0,3 +chr20 69762 . T . . BLOCK_SIZE=1;END=69762 GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:18:7:18:0,18,270 +chr20 69763 . A . . BLOCK_SIZE=4;END=69766 GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:21:7:21:0,21,253 +chr20 69767 . C . . BLOCK_SIZE=5;END=69771 GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:12:7:12:0,12,180 +chr20 69772 . AAAGC A, 60.77 . BaseQRankSum=0.937;DP=7;MQ=34.15;MQ0=0;MQRankSum=1.300;ReadPosRankSum=1.754 GT:AD:DP:GQ:PL:SB 0/1:4,3,0:7:89:89,0,119,101,128,229:0,4,0,3 +chr20 69773 . A . . BLOCK_SIZE=10;END=69783 GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:0:3:0:0,0,0 +chr21 1 . G . . END=46709983 GT:DP:GQ:MIN_DP:PL 0/0:0:0:0:0,0,0 +chr20_GL383577v2_alt 1 . G . . END=128386 GT:DP:GQ:MIN_DP:PL 0/0:0:0:0:0,0,0 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/CombineGVCFs/gvcfExample2WithTrailingReferenceBlocks.g.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/CombineGVCFs/gvcfExample2WithTrailingReferenceBlocks.g.vcf new file mode 100644 index 00000000000..b3ad4bcb99c --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/CombineGVCFs/gvcfExample2WithTrailingReferenceBlocks.g.vcf @@ -0,0 +1,41 @@ +##fileformat=VCFv4.1 +##ALT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GVCFBlock=minGQ=0(inclusive),maxGQ=5(exclusive) +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA2 +chr20 69498 . C . . BLOCK_SIZE=9;END=69506 GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:94:99:82:99:0,120,1800 +chr20 69514 . T . . BLOCK_SIZE=111;END=69624 GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:96:99:82:99:0,120,1800 +chr20 69625 . A . . BLOCK_SIZE=51;END=69675 GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:18:7:18:0,18,270 +chr20 69767 . C . . BLOCK_SIZE=8;END=69774 GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:12:7:15:0,12,180 +chr20 69775 . C . . BLOCK_SIZE=17;END=69791 GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:15:7:15:0,15,160 +chr21 1 . G . . END=46709983 GT:DP:GQ:MIN_DP:PL 0/0:0:0:0:0,0,0 +chr20_GL383577v2_alt 1 . G . . END=128386 GT:DP:GQ:MIN_DP:PL 0/0:0:0:0:0,0,0 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/CombineGVCFs/gvcfWithTrailingReferenceBlocksExpected.g.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/CombineGVCFs/gvcfWithTrailingReferenceBlocksExpected.g.vcf new file mode 100644 index 00000000000..a041b98c6d9 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/CombineGVCFs/gvcfWithTrailingReferenceBlocksExpected.g.vcf @@ -0,0 +1,65 @@ +##fileformat=VCFv4.2 +##ALT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GATKCommandLine= +##GVCFBlock=minGQ=0(inclusive),maxGQ=5(exclusive) +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##source=CombineGVCFs +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA1 NA2 +chr20 69491 . G . . END=69497 GT:DP:GQ:MIN_DP:MIN_GQ:PL ./.:94:99:82:99:0,120,1800 ./. +chr20 69498 . C . . END=69506 GT:DP:GQ:MIN_DP:MIN_GQ:PL ./.:94:99:82:99:0,120,1800 ./.:94:99:82:99:0,120,1800 +chr20 69507 . T . . END=69510 GT:DP:GQ:MIN_DP:MIN_GQ:PL ./.:94:99:82:99:0,120,1800 ./. +chr20 69511 . A G, . . BaseQRankSum=1.17;DP=82;MQ=31.05;MQ0=0;MQRankSum=-8.660e-01;ReadPosRankSum=1.69 GT:AD:DP:GQ:PL:SB ./.:1,79,0:80:99:2284,207,0,2287,237,2316:0,1,46,33 ./. +chr20 69512 . C . . END=69513 GT:DP:GQ:MIN_DP:MIN_GQ:PL ./.:96:99:82:99:0,120,1800 ./. +chr20 69514 . T . . END=69521 GT:DP:GQ:MIN_DP:MIN_GQ:PL ./.:96:99:82:99:0,120,1800 ./.:96:99:82:99:0,120,1800 +chr20 69522 . T . . END=69548 GT:DP:GQ:MIN_DP:MIN_GQ:PL ./.:95:0:95:0:0,0,0 ./.:96:99:82:99:0,120,1800 +chr20 69549 . G . . END=69624 GT:DP:GQ:MIN_DP:MIN_GQ:PL ./.:156:99:56:66:0,66,990 ./.:96:99:82:99:0,120,1800 +chr20 69625 . A . . END=69634 GT:DP:GQ:MIN_DP:MIN_GQ:PL ./.:156:99:56:66:0,66,990 ./.:7:18:7:18:0,18,270 +chr20 69635 . G T, . . BaseQRankSum=0.937;DP=14;MQ=34.15;MQ0=0;MQRankSum=1.30;ReadPosRankSum=1.75 GT:AD:DP:GQ:MIN_DP:MIN_GQ:PL:SB ./.:4,3,0:7:89:.:.:89,0,119,101,128,229:0,4,0,3 ./.:.:7:18:7:18:0,18,270,18,270,270 +chr20 69636 . A . . END=69675 GT:DP:GQ:MIN_DP:MIN_GQ:PL ./. ./.:7:18:7:18:0,18,270 +chr20 69762 . T . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL ./.:7:18:7:18:0,18,270 ./. +chr20 69763 . A . . END=69766 GT:DP:GQ:MIN_DP:MIN_GQ:PL ./.:7:21:7:21:0,21,253 ./. +chr20 69767 . C . . END=69771 GT:DP:GQ:MIN_DP:MIN_GQ:PL ./.:7:12:7:12:0,12,180 ./.:7:12:7:15:0,12,180 +chr20 69772 . AAAGC A, . . BaseQRankSum=0.937;DP=14;MQ=34.15;MQ0=0;MQRankSum=1.30;ReadPosRankSum=1.75 GT:AD:DP:GQ:MIN_DP:MIN_GQ:PL:SB ./.:4,3,0:7:89:.:.:89,0,119,101,128,229:0,4,0,3 ./.:.:7:12:7:15:0,12,180,12,180,180 +chr20 69773 . A . . END=69774 GT:DP:GQ:MIN_DP:MIN_GQ:PL ./.:7:0:3:0:0,0,0 ./.:7:12:7:15:0,12,180 +chr20 69775 . C . . END=69783 GT:DP:GQ:MIN_DP:MIN_GQ:PL ./.:7:0:3:0:0,0,0 ./.:7:15:7:15:0,15,160 +chr20 69784 . A . . END=69791 GT:DP:GQ:MIN_DP:MIN_GQ:PL ./. ./.:7:15:7:15:0,15,160 +chr21 1 . G . . END=46709983 GT:DP:GQ:MIN_DP:PL ./.:0:0:0:0,0,0 ./.:0:0:0:0,0,0 +chr20_GL383577v2_alt 1 . G . . END=128386 GT:DP:GQ:MIN_DP:PL ./.:0:0:0:0,0,0 ./.:0:0:0:0,0,0