From dee4fe33d7745d24f03e08b5d02c2f2fe30b0885 Mon Sep 17 00:00:00 2001 From: lichtens Date: Wed, 8 Nov 2017 11:04:38 -0500 Subject: [PATCH 1/9] First cut, not fully implemented. --- .../copynumber/CollectAllelicCountsSpark.java | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java new file mode 100644 index 00000000000..eaea3e68e3f --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java @@ -0,0 +1,50 @@ +package org.broadinstitute.hellbender.tools.copynumber; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.broadcast.Broadcast; +import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.engine.spark.LocusWalkerContext; +import org.broadinstitute.hellbender.engine.spark.LocusWalkerSpark; +import org.broadinstitute.hellbender.tools.copynumber.allelic.alleliccount.AllelicCountCollector; +import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SampleMetadata; +import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SampleNameUtils; +import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SimpleSampleMetadata; +import org.broadinstitute.hellbender.utils.Nucleotide; + +import java.util.Collections; +import java.util.Iterator; + +public class CollectAllelicCountsSpark extends LocusWalkerSpark { + + + @Override + protected void processAlignments(JavaRDD rdd, JavaSparkContext ctx) { + final String sampleName = SampleNameUtils.readSampleName(getHeaderForReads()); + final SampleMetadata sampleMetadata = new SimpleSampleMetadata(sampleName); + final Broadcast sampleMetadataBroadcast = ctx.broadcast(sampleMetadata); + // rdd.map(pileupFunction(metadata, outputInsertLength, showVerbose)).saveAsTextFile(outputFile); + rdd.mapPartitions(distributedCount(sampleMetadataBroadcast.getValue(), 20)).reduce(); + } + + private static FlatMapFunction, AllelicCountCollector> distributedCount(final SampleMetadata sampleMetadata, + final int minimumBaseQuality) { + return (FlatMapFunction, AllelicCountCollector>) contextIterator -> { + final AllelicCountCollector result = new AllelicCountCollector(sampleMetadata); + + contextIterator.forEachRemaining( ctx -> { + final ReferenceContext referenceContext = ctx.getReferenceContext(); + final byte refAsByte = referenceContext.getBase(); + result.collectAtLocus(Nucleotide.valueOf(refAsByte), ctx.getAlignmentContext().getBasePileup(), + ctx.getAlignmentContext().getLocation(), minimumBaseQuality); + } + ); + return Collections.singletonList(result).iterator(); + }; + } + + private static AllelicCountCollector combineAllelicCountCollectors(final AllelicCountCollector allelicCountCollector1, final AllelicCountCollector allelicCountCollector2) { + + } +} From 79a3348072de48ef075164431e8d1ee5fa0760ff Mon Sep 17 00:00:00 2001 From: lichtens Date: Thu, 9 Nov 2017 09:51:44 -0500 Subject: [PATCH 2/9] Spark version of collect allelic counts spark. --- .../copynumber/CollectAllelicCountsSpark.java | 44 +++++++++++++++++-- .../datacollection/AllelicCountCollector.java | 9 ++++ 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java index eaea3e68e3f..e2f7494f7b2 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java @@ -1,9 +1,13 @@ package org.broadinstitute.hellbender.tools.copynumber; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.broadcast.Broadcast; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.engine.spark.LocusWalkerContext; import org.broadinstitute.hellbender.engine.spark.LocusWalkerSpark; @@ -13,11 +17,39 @@ import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SimpleSampleMetadata; import org.broadinstitute.hellbender.utils.Nucleotide; +import java.io.File; import java.util.Collections; import java.util.Iterator; public class CollectAllelicCountsSpark extends LocusWalkerSpark { + private static final Logger logger = LogManager.getLogger(CollectAllelicCounts.class); + @Argument( + doc = "Output allelic-counts file.", + fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, + shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME + ) + private File outputAllelicCountsFile; + + @Argument( + doc = "Minimum base quality; base calls with lower quality will be filtered out of pileup.", + fullName = "minimumBaseQuality", + shortName = "minBQ", + minValue = 0, + optional = true + ) + private int minimumBaseQuality = 20; + + private static final int DEFAULT_MINIMUM_MAPPING_QUALITY = 30; + + @Override + public boolean emitEmptyLoci() {return true;} + + @Override + public boolean requiresReference() {return true;} + + @Override + public boolean requiresIntervals() {return true;} @Override protected void processAlignments(JavaRDD rdd, JavaSparkContext ctx) { @@ -25,7 +57,11 @@ protected void processAlignments(JavaRDD rdd, JavaSparkConte final SampleMetadata sampleMetadata = new SimpleSampleMetadata(sampleName); final Broadcast sampleMetadataBroadcast = ctx.broadcast(sampleMetadata); // rdd.map(pileupFunction(metadata, outputInsertLength, showVerbose)).saveAsTextFile(outputFile); - rdd.mapPartitions(distributedCount(sampleMetadataBroadcast.getValue(), 20)).reduce(); + final AllelicCountCollector finalAllelicCountCollector = + rdd.mapPartitions(distributedCount(sampleMetadataBroadcast.getValue(), minimumBaseQuality)) + .reduce((a1, a2) -> combineAllelicCountCollectors(a1, a2)); + + finalAllelicCountCollector.getAllelicCounts().write(outputAllelicCountsFile); } private static FlatMapFunction, AllelicCountCollector> distributedCount(final SampleMetadata sampleMetadata, @@ -44,7 +80,9 @@ private static FlatMapFunction, AllelicCountCollect }; } - private static AllelicCountCollector combineAllelicCountCollectors(final AllelicCountCollector allelicCountCollector1, final AllelicCountCollector allelicCountCollector2) { - + private static AllelicCountCollector combineAllelicCountCollectors(final AllelicCountCollector allelicCountCollector1, + final AllelicCountCollector allelicCountCollector2) { + allelicCountCollector1.collectFromCollector(allelicCountCollector2); + return allelicCountCollector1; } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/datacollection/AllelicCountCollector.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/datacollection/AllelicCountCollector.java index 2f92e778795..37e6d249d57 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/datacollection/AllelicCountCollector.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/datacollection/AllelicCountCollector.java @@ -94,4 +94,13 @@ private static Nucleotide inferAltFromPileupBaseCounts(final Nucleotide.Counter .sorted((b1, b2) -> Long.compare(baseCounts.get(b2), baseCounts.get(b1))) .findFirst().get(); } + + /** + * Reminder that any additional information used through this method will not be able to enforce the minBaseQuality. + * + * @param allelicCountCollector input data to combine with this + */ + public void collectFromCollector(final AllelicCountCollector allelicCountCollector) { + this.allelicCounts.addAll(allelicCountCollector.getAllelicCounts().getRecords()); + } } From eba3a5be5d304d795c644cb41889291002cff36b Mon Sep 17 00:00:00 2001 From: lichtens Date: Thu, 9 Nov 2017 10:57:48 -0500 Subject: [PATCH 3/9] Tests not passing yet. --- .../copynumber/CollectAllelicCountsSpark.java | 30 ++++- .../datacollection/AllelicCountCollector.java | 20 ++- .../metadata/SimpleSampleMetadata.java | 7 +- ...lectAllelicCountsSparkIntegrationTest.java | 119 ++++++++++++++++++ 4 files changed, 169 insertions(+), 7 deletions(-) create mode 100644 src/test/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSparkIntegrationTest.java diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java index e2f7494f7b2..c4f7a7a47bd 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java @@ -7,8 +7,12 @@ import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.broadcast.Broadcast; import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup; import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.engine.filters.MappingQualityReadFilter; +import org.broadinstitute.hellbender.engine.filters.ReadFilter; import org.broadinstitute.hellbender.engine.spark.LocusWalkerContext; import org.broadinstitute.hellbender.engine.spark.LocusWalkerSpark; import org.broadinstitute.hellbender.tools.copynumber.allelic.alleliccount.AllelicCountCollector; @@ -18,9 +22,19 @@ import org.broadinstitute.hellbender.utils.Nucleotide; import java.io.File; +import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; +import java.util.List; +/** + * See {@link CollectAllelicCounts}. This behaves the same, except that it supports spark. + */ +@CommandLineProgramProperties( + summary = "Collects ref/alt counts at sites.", + oneLineSummary = "Collects ref/alt counts at sites.", + programGroup = CopyNumberProgramGroup.class +) public class CollectAllelicCountsSpark extends LocusWalkerSpark { private static final Logger logger = LogManager.getLogger(CollectAllelicCounts.class); @@ -59,7 +73,7 @@ protected void processAlignments(JavaRDD rdd, JavaSparkConte // rdd.map(pileupFunction(metadata, outputInsertLength, showVerbose)).saveAsTextFile(outputFile); final AllelicCountCollector finalAllelicCountCollector = rdd.mapPartitions(distributedCount(sampleMetadataBroadcast.getValue(), minimumBaseQuality)) - .reduce((a1, a2) -> combineAllelicCountCollectors(a1, a2)); + .reduce((a1, a2) -> combineAllelicCountCollectors(a1, a2, sampleMetadataBroadcast.getValue())); finalAllelicCountCollector.getAllelicCounts().write(outputAllelicCountsFile); } @@ -81,8 +95,16 @@ private static FlatMapFunction, AllelicCountCollect } private static AllelicCountCollector combineAllelicCountCollectors(final AllelicCountCollector allelicCountCollector1, - final AllelicCountCollector allelicCountCollector2) { - allelicCountCollector1.collectFromCollector(allelicCountCollector2); - return allelicCountCollector1; + final AllelicCountCollector allelicCountCollector2, + final SampleMetadata sampleMetadata) { + return AllelicCountCollector.combine(allelicCountCollector1, allelicCountCollector2, sampleMetadata); + } + + @Override + public List getDefaultReadFilters() { + final List initialReadFilters = new ArrayList<>(super.getDefaultReadFilters()); + initialReadFilters.add(new MappingQualityReadFilter(DEFAULT_MINIMUM_MAPPING_QUALITY)); + + return initialReadFilters; } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/datacollection/AllelicCountCollector.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/datacollection/AllelicCountCollector.java index 37e6d249d57..4ded8cf98b7 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/datacollection/AllelicCountCollector.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/datacollection/AllelicCountCollector.java @@ -98,9 +98,25 @@ private static Nucleotide inferAltFromPileupBaseCounts(final Nucleotide.Counter /** * Reminder that any additional information used through this method will not be able to enforce the minBaseQuality. * - * @param allelicCountCollector input data to combine with this + * @param allelicCountCollector input data to combine with this. */ public void collectFromCollector(final AllelicCountCollector allelicCountCollector) { - this.allelicCounts.addAll(allelicCountCollector.getAllelicCounts().getRecords()); + if (allelicCountCollector != null) { + this.allelicCounts.addAll(allelicCountCollector.getAllelicCounts().getRecords()); + } + } + + /** TODO: Docs and input parameter checking + * + * @param allelicCountCollector1 + * @param allelicCountCollector2 + * @return a new allelic count collector with the combined contents of the two inputs + */ + public static AllelicCountCollector combine(final AllelicCountCollector allelicCountCollector1, final AllelicCountCollector allelicCountCollector2, + final SampleMetadata sampleMetadata) { + final AllelicCountCollector result = new AllelicCountCollector(sampleMetadata); + result.collectFromCollector(allelicCountCollector1); + result.collectFromCollector(allelicCountCollector2); + return result; } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/metadata/SimpleSampleMetadata.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/metadata/SimpleSampleMetadata.java index 831748b537a..46db2b12d08 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/metadata/SimpleSampleMetadata.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/metadata/SimpleSampleMetadata.java @@ -4,12 +4,17 @@ import htsjdk.samtools.SAMReadGroupRecord; import org.broadinstitute.hellbender.utils.Utils; +import java.io.Serializable; + /** * Metadata associated with a single sample. * * @author Samuel Lee <slee@broadinstitute.org> */ -public class SimpleSampleMetadata implements SampleMetadata { +public class SimpleSampleMetadata implements SampleMetadata, Serializable { + + private static final long serialVersionUID = 0L; + private final String sampleName; public SimpleSampleMetadata(final String sampleName) { diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSparkIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSparkIntegrationTest.java new file mode 100644 index 00000000000..aa180578cac --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSparkIntegrationTest.java @@ -0,0 +1,119 @@ +package org.broadinstitute.hellbender.tools.copynumber; + +import org.broadinstitute.hellbender.CommandLineProgramTest; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.tools.copynumber.allelic.alleliccount.AllelicCount; +import org.broadinstitute.hellbender.tools.copynumber.allelic.alleliccount.AllelicCountCollection; +import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SimpleSampleMetadata; +import org.broadinstitute.hellbender.utils.Nucleotide; +import org.broadinstitute.hellbender.utils.SimpleInterval; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.util.Arrays; + +/** + * Integration test for {@link CollectAllelicCountsSpark}. Uses a BAM with sites generated from hg19mini using wgsim. + * + * These tests should be identical for {@link CollectAllelicCounts} + * + */ +public final class CollectAllelicCountsSparkIntegrationTest extends CommandLineProgramTest { + + private static final String TEST_SUB_DIR = toolsTestDir + "copynumber/allelic"; + private static final File NORMAL_BAM_FILE = new File(TEST_SUB_DIR, "collect-allelic-counts-normal.bam"); + private static final File TUMOR_BAM_FILE = new File(TEST_SUB_DIR, "collect-allelic-counts-tumor.bam"); + private static final File SITES_FILE = new File(TEST_SUB_DIR, "collect-allelic-counts-sites.interval_list"); + private static final File REFERENCE_FILE = new File(hg19MiniReference); + private static final String NORMAL_SAMPLE_NAME_EXPECTED = "20"; + private static final String TUMOR_SAMPLE_NAME_EXPECTED = "20"; + + @DataProvider(name = "testData") + public Object[][] testData() { + //counts from IGV with minMQ = 30 and minBQ = 20 + final AllelicCountCollection normalCountsExpected = new AllelicCountCollection( + new SimpleSampleMetadata(NORMAL_SAMPLE_NAME_EXPECTED), + Arrays.asList( + new AllelicCount(new SimpleInterval("1", 10736, 10736), 0, 0, Nucleotide.G, Nucleotide.N), + new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4, Nucleotide.G, Nucleotide.A), + new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6, Nucleotide.G, Nucleotide.T), + new AllelicCount(new SimpleInterval("1", 12444, 12444), 0, 18, Nucleotide.T, Nucleotide.C), + new AllelicCount(new SimpleInterval("1", 13059, 13059), 0, 8, Nucleotide.C, Nucleotide.A), + new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8, Nucleotide.T, Nucleotide.G), + new AllelicCount(new SimpleInterval("1", 15204, 15204), 4, 4, Nucleotide.C, Nucleotide.A), + new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9, Nucleotide.T, Nucleotide.G), + new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5, Nucleotide.G, Nucleotide.C), + new AllelicCount(new SimpleInterval("2", 15110, 15110), 6, 0, Nucleotide.G, Nucleotide.N), + new AllelicCount(new SimpleInterval("2", 15629, 15629), 5, 3, Nucleotide.T, Nucleotide.A))); + + final AllelicCountCollection tumorCountsExpected = new AllelicCountCollection( + new SimpleSampleMetadata(TUMOR_SAMPLE_NAME_EXPECTED), + Arrays.asList( + new AllelicCount(new SimpleInterval("1", 10736, 10736), 0, 0, Nucleotide.G, Nucleotide.N), + new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4, Nucleotide.G, Nucleotide.A), + new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6, Nucleotide.G, Nucleotide.T), + new AllelicCount(new SimpleInterval("1", 12444, 12444), 0, 17, Nucleotide.T, Nucleotide.C), + new AllelicCount(new SimpleInterval("1", 13059, 13059), 0, 8, Nucleotide.C, Nucleotide.A), + new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8, Nucleotide.T, Nucleotide.G), + new AllelicCount(new SimpleInterval("1", 15204, 15204), 4, 3, Nucleotide.C, Nucleotide.A), + new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9, Nucleotide.T, Nucleotide.G), + new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5, Nucleotide.G, Nucleotide.C), + new AllelicCount(new SimpleInterval("2", 15110, 15110), 6, 0, Nucleotide.G, Nucleotide.N), + new AllelicCount(new SimpleInterval("2", 15629, 15629), 5, 3, Nucleotide.T, Nucleotide.A))); + + //counts from IGV with minMQ = 30 and minBQ = 20, without nucleotides + final AllelicCountCollection normalCountsExpectedWithoutNucleotides = new AllelicCountCollection( + new SimpleSampleMetadata(NORMAL_SAMPLE_NAME_EXPECTED), + Arrays.asList( + new AllelicCount(new SimpleInterval("1", 10736, 10736), 0, 0), + new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4), + new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6), + new AllelicCount(new SimpleInterval("1", 12444, 12444), 0, 18), + new AllelicCount(new SimpleInterval("1", 13059, 13059), 0, 8), + new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8), + new AllelicCount(new SimpleInterval("1", 15204, 15204), 4, 4), + new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9), + new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5), + new AllelicCount(new SimpleInterval("2", 15110, 15110), 6, 0), + new AllelicCount(new SimpleInterval("2", 15629, 15629), 5, 3))); + + final AllelicCountCollection tumorCountsExpectedWithoutNucleotides = new AllelicCountCollection( + new SimpleSampleMetadata(TUMOR_SAMPLE_NAME_EXPECTED), + Arrays.asList( + new AllelicCount(new SimpleInterval("1", 10736, 10736), 0, 0), + new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4), + new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6), + new AllelicCount(new SimpleInterval("1", 12444, 12444), 0, 17), + new AllelicCount(new SimpleInterval("1", 13059, 13059), 0, 8), + new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8), + new AllelicCount(new SimpleInterval("1", 15204, 15204), 4, 3), + new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9), + new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5), + new AllelicCount(new SimpleInterval("2", 15110, 15110), 6, 0), + new AllelicCount(new SimpleInterval("2", 15629, 15629), 5, 3))); + + return new Object[][]{ + {NORMAL_BAM_FILE, normalCountsExpected}, + {TUMOR_BAM_FILE, tumorCountsExpected}, + {NORMAL_BAM_FILE, normalCountsExpectedWithoutNucleotides}, + {TUMOR_BAM_FILE, tumorCountsExpectedWithoutNucleotides} + }; + } + + @Test(dataProvider = "testData") + public void test(final File inputBAMFile, + final AllelicCountCollection countsExpected) { + final File outputFile = createTempFile("collect-allelic-counts-test-output", ".tsv"); + final String[] arguments = { + "-" + StandardArgumentDefinitions.INPUT_SHORT_NAME, inputBAMFile.getAbsolutePath(), + "-L", SITES_FILE.getAbsolutePath(), + "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REFERENCE_FILE.getAbsolutePath(), + "-" + StandardArgumentDefinitions.OUTPUT_SHORT_NAME, outputFile.getAbsolutePath() + }; + runCommandLine(arguments); + final AllelicCountCollection countsResult = new AllelicCountCollection(outputFile); + Assert.assertEquals(countsResult.getRecords(), countsExpected.getRecords()); + } +} \ No newline at end of file From 64cd52c4f666c25af4f1cbeebf53c8b65bcf9bad Mon Sep 17 00:00:00 2001 From: lichtens Date: Mon, 13 Nov 2017 09:11:06 -0500 Subject: [PATCH 4/9] Possible bugfix needed for LocusWalkerSpark. --- .../tools/copynumber/CollectAllelicCountsSpark.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java index c4f7a7a47bd..02676fb41d8 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java @@ -10,7 +10,6 @@ import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup; -import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.engine.filters.MappingQualityReadFilter; import org.broadinstitute.hellbender.engine.filters.ReadFilter; import org.broadinstitute.hellbender.engine.spark.LocusWalkerContext; @@ -70,10 +69,11 @@ protected void processAlignments(JavaRDD rdd, JavaSparkConte final String sampleName = SampleNameUtils.readSampleName(getHeaderForReads()); final SampleMetadata sampleMetadata = new SimpleSampleMetadata(sampleName); final Broadcast sampleMetadataBroadcast = ctx.broadcast(sampleMetadata); - // rdd.map(pileupFunction(metadata, outputInsertLength, showVerbose)).saveAsTextFile(outputFile); + final AllelicCountCollector finalAllelicCountCollector = rdd.mapPartitions(distributedCount(sampleMetadataBroadcast.getValue(), minimumBaseQuality)) .reduce((a1, a2) -> combineAllelicCountCollectors(a1, a2, sampleMetadataBroadcast.getValue())); + final List tmp = rdd.collect(); finalAllelicCountCollector.getAllelicCounts().write(outputAllelicCountsFile); } @@ -84,8 +84,7 @@ private static FlatMapFunction, AllelicCountCollect final AllelicCountCollector result = new AllelicCountCollector(sampleMetadata); contextIterator.forEachRemaining( ctx -> { - final ReferenceContext referenceContext = ctx.getReferenceContext(); - final byte refAsByte = referenceContext.getBase(); + final byte refAsByte = ctx.getReferenceContext().getBase(); result.collectAtLocus(Nucleotide.valueOf(refAsByte), ctx.getAlignmentContext().getBasePileup(), ctx.getAlignmentContext().getLocation(), minimumBaseQuality); } From 4bd23d404d3393e56ec8bf67267214830b8b6579 Mon Sep 17 00:00:00 2001 From: lichtens Date: Mon, 13 Nov 2017 09:19:50 -0500 Subject: [PATCH 5/9] Added TODO regarding notes on bugfix. --- .../tools/copynumber/CollectAllelicCountsSpark.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java index 02676fb41d8..add786212a3 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java @@ -73,7 +73,12 @@ protected void processAlignments(JavaRDD rdd, JavaSparkConte final AllelicCountCollector finalAllelicCountCollector = rdd.mapPartitions(distributedCount(sampleMetadataBroadcast.getValue(), minimumBaseQuality)) .reduce((a1, a2) -> combineAllelicCountCollectors(a1, a2, sampleMetadataBroadcast.getValue())); - final List tmp = rdd.collect(); + // TODO: In integration test, we come back with 8 entries, when we should get 11. Also, at least one of those 8 will have incorrect counts. + // TODO: Delete the next line (and other TODOs) when the integration test is fixed. + // final List tmp = rdd.collect(); + // TODO: mapPartitions --> map has same erroneous behavior + // TODO: emit empty loci = false has same errorneous behavior (though the counts will change as expected). + finalAllelicCountCollector.getAllelicCounts().write(outputAllelicCountsFile); } From ef1a30bcbd77ff6ffa27b57a7ed584541275330b Mon Sep 17 00:00:00 2001 From: lichtens Date: Mon, 13 Nov 2017 21:43:42 -0500 Subject: [PATCH 6/9] Fixing compile error --- .../hellbender/tools/copynumber/CollectAllelicCountsSpark.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java index add786212a3..9fd9017ec39 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java @@ -35,6 +35,9 @@ programGroup = CopyNumberProgramGroup.class ) public class CollectAllelicCountsSpark extends LocusWalkerSpark { + + private static final long serialVersionUID = 1L; + private static final Logger logger = LogManager.getLogger(CollectAllelicCounts.class); @Argument( From 426010eb3f6cf6d69d9706ce264a585baf24b502 Mon Sep 17 00:00:00 2001 From: Tom White Date: Tue, 18 Sep 2018 16:36:41 +0100 Subject: [PATCH 7/9] Make CollectAllelicCountsSpark run again. Reproduced failing test in https://github.com/broadinstitute/gatk/issues/3823 --- .../copynumber/CollectAllelicCountsSpark.java | 21 +++++++-------- .../datacollection/AllelicCountCollector.java | 2 +- ...lectAllelicCountsSparkIntegrationTest.java | 27 ++++++++++++------- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java index 9fd9017ec39..f0268972aaf 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java @@ -14,10 +14,8 @@ import org.broadinstitute.hellbender.engine.filters.ReadFilter; import org.broadinstitute.hellbender.engine.spark.LocusWalkerContext; import org.broadinstitute.hellbender.engine.spark.LocusWalkerSpark; -import org.broadinstitute.hellbender.tools.copynumber.allelic.alleliccount.AllelicCountCollector; -import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SampleMetadata; -import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SampleNameUtils; -import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SimpleSampleMetadata; +import org.broadinstitute.hellbender.tools.copynumber.datacollection.AllelicCountCollector; +import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.*; import org.broadinstitute.hellbender.utils.Nucleotide; import java.io.File; @@ -69,12 +67,11 @@ public class CollectAllelicCountsSpark extends LocusWalkerSpark { @Override protected void processAlignments(JavaRDD rdd, JavaSparkContext ctx) { - final String sampleName = SampleNameUtils.readSampleName(getHeaderForReads()); - final SampleMetadata sampleMetadata = new SimpleSampleMetadata(sampleName); - final Broadcast sampleMetadataBroadcast = ctx.broadcast(sampleMetadata); + final SampleLocatableMetadata metadata = MetadataUtils.fromHeader(getHeaderForReads(), Metadata.Type.SAMPLE_LOCATABLE); + final Broadcast sampleMetadataBroadcast = ctx.broadcast(metadata); final AllelicCountCollector finalAllelicCountCollector = - rdd.mapPartitions(distributedCount(sampleMetadataBroadcast.getValue(), minimumBaseQuality)) + rdd.mapPartitions(distributedCount(sampleMetadataBroadcast, minimumBaseQuality)) .reduce((a1, a2) -> combineAllelicCountCollectors(a1, a2, sampleMetadataBroadcast.getValue())); // TODO: In integration test, we come back with 8 entries, when we should get 11. Also, at least one of those 8 will have incorrect counts. // TODO: Delete the next line (and other TODOs) when the integration test is fixed. @@ -86,14 +83,14 @@ protected void processAlignments(JavaRDD rdd, JavaSparkConte finalAllelicCountCollector.getAllelicCounts().write(outputAllelicCountsFile); } - private static FlatMapFunction, AllelicCountCollector> distributedCount(final SampleMetadata sampleMetadata, + private static FlatMapFunction, AllelicCountCollector> distributedCount(final Broadcast sampleMetadataBroadcast, final int minimumBaseQuality) { return (FlatMapFunction, AllelicCountCollector>) contextIterator -> { - final AllelicCountCollector result = new AllelicCountCollector(sampleMetadata); + final AllelicCountCollector result = new AllelicCountCollector(sampleMetadataBroadcast.getValue()); contextIterator.forEachRemaining( ctx -> { final byte refAsByte = ctx.getReferenceContext().getBase(); - result.collectAtLocus(Nucleotide.valueOf(refAsByte), ctx.getAlignmentContext().getBasePileup(), + result.collectAtLocus(Nucleotide.decode(refAsByte), ctx.getAlignmentContext().getBasePileup(), ctx.getAlignmentContext().getLocation(), minimumBaseQuality); } ); @@ -103,7 +100,7 @@ private static FlatMapFunction, AllelicCountCollect private static AllelicCountCollector combineAllelicCountCollectors(final AllelicCountCollector allelicCountCollector1, final AllelicCountCollector allelicCountCollector2, - final SampleMetadata sampleMetadata) { + final SampleLocatableMetadata sampleMetadata) { return AllelicCountCollector.combine(allelicCountCollector1, allelicCountCollector2, sampleMetadata); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/datacollection/AllelicCountCollector.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/datacollection/AllelicCountCollector.java index 4ded8cf98b7..efa0b94899f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/datacollection/AllelicCountCollector.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/datacollection/AllelicCountCollector.java @@ -113,7 +113,7 @@ public void collectFromCollector(final AllelicCountCollector allelicCountCollect * @return a new allelic count collector with the combined contents of the two inputs */ public static AllelicCountCollector combine(final AllelicCountCollector allelicCountCollector1, final AllelicCountCollector allelicCountCollector2, - final SampleMetadata sampleMetadata) { + final SampleLocatableMetadata sampleMetadata) { final AllelicCountCollector result = new AllelicCountCollector(sampleMetadata); result.collectFromCollector(allelicCountCollector1); result.collectFromCollector(allelicCountCollector2); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSparkIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSparkIntegrationTest.java index aa180578cac..dfd118582a8 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSparkIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSparkIntegrationTest.java @@ -1,10 +1,13 @@ package org.broadinstitute.hellbender.tools.copynumber; +import htsjdk.samtools.SAMSequenceDictionary; import org.broadinstitute.hellbender.CommandLineProgramTest; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; -import org.broadinstitute.hellbender.tools.copynumber.allelic.alleliccount.AllelicCount; -import org.broadinstitute.hellbender.tools.copynumber.allelic.alleliccount.AllelicCountCollection; -import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SimpleSampleMetadata; +import org.broadinstitute.hellbender.engine.ReferenceDataSource; +import org.broadinstitute.hellbender.tools.copynumber.formats.collections.AllelicCountCollection; +import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SampleLocatableMetadata; +import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SimpleSampleLocatableMetadata; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.AllelicCount; import org.broadinstitute.hellbender.utils.Nucleotide; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.testng.Assert; @@ -21,20 +24,26 @@ * */ public final class CollectAllelicCountsSparkIntegrationTest extends CommandLineProgramTest { - - private static final String TEST_SUB_DIR = toolsTestDir + "copynumber/allelic"; + private static final File TEST_SUB_DIR = new File(toolsTestDir, "copynumber"); private static final File NORMAL_BAM_FILE = new File(TEST_SUB_DIR, "collect-allelic-counts-normal.bam"); private static final File TUMOR_BAM_FILE = new File(TEST_SUB_DIR, "collect-allelic-counts-tumor.bam"); private static final File SITES_FILE = new File(TEST_SUB_DIR, "collect-allelic-counts-sites.interval_list"); private static final File REFERENCE_FILE = new File(hg19MiniReference); + private static final String NORMAL_SAMPLE_NAME_EXPECTED = "20"; private static final String TUMOR_SAMPLE_NAME_EXPECTED = "20"; + private static final SAMSequenceDictionary SEQUENCE_DICTIONARY = ReferenceDataSource.of(REFERENCE_FILE.toPath()).getSequenceDictionary(); + private static final SampleLocatableMetadata NORMAL_METADATA_EXPECTED = new SimpleSampleLocatableMetadata( + NORMAL_SAMPLE_NAME_EXPECTED, SEQUENCE_DICTIONARY); + + private static final SampleLocatableMetadata TUMOR_METADATA_EXPECTED = new SimpleSampleLocatableMetadata( + TUMOR_SAMPLE_NAME_EXPECTED, SEQUENCE_DICTIONARY); @DataProvider(name = "testData") public Object[][] testData() { //counts from IGV with minMQ = 30 and minBQ = 20 final AllelicCountCollection normalCountsExpected = new AllelicCountCollection( - new SimpleSampleMetadata(NORMAL_SAMPLE_NAME_EXPECTED), + NORMAL_METADATA_EXPECTED, Arrays.asList( new AllelicCount(new SimpleInterval("1", 10736, 10736), 0, 0, Nucleotide.G, Nucleotide.N), new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4, Nucleotide.G, Nucleotide.A), @@ -49,7 +58,7 @@ public Object[][] testData() { new AllelicCount(new SimpleInterval("2", 15629, 15629), 5, 3, Nucleotide.T, Nucleotide.A))); final AllelicCountCollection tumorCountsExpected = new AllelicCountCollection( - new SimpleSampleMetadata(TUMOR_SAMPLE_NAME_EXPECTED), + TUMOR_METADATA_EXPECTED, Arrays.asList( new AllelicCount(new SimpleInterval("1", 10736, 10736), 0, 0, Nucleotide.G, Nucleotide.N), new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4, Nucleotide.G, Nucleotide.A), @@ -65,7 +74,7 @@ public Object[][] testData() { //counts from IGV with minMQ = 30 and minBQ = 20, without nucleotides final AllelicCountCollection normalCountsExpectedWithoutNucleotides = new AllelicCountCollection( - new SimpleSampleMetadata(NORMAL_SAMPLE_NAME_EXPECTED), + NORMAL_METADATA_EXPECTED, Arrays.asList( new AllelicCount(new SimpleInterval("1", 10736, 10736), 0, 0), new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4), @@ -80,7 +89,7 @@ public Object[][] testData() { new AllelicCount(new SimpleInterval("2", 15629, 15629), 5, 3))); final AllelicCountCollection tumorCountsExpectedWithoutNucleotides = new AllelicCountCollection( - new SimpleSampleMetadata(TUMOR_SAMPLE_NAME_EXPECTED), + TUMOR_METADATA_EXPECTED, Arrays.asList( new AllelicCount(new SimpleInterval("1", 10736, 10736), 0, 0), new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4), From 73a115ce5f98d4a5b5e6c35e4f68d713003ee3f0 Mon Sep 17 00:00:00 2001 From: Tom White Date: Tue, 2 Oct 2018 17:31:46 +0100 Subject: [PATCH 8/9] Remove old TODOs --- .../tools/copynumber/CollectAllelicCountsSpark.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java index f0268972aaf..52d2641ab49 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java @@ -73,13 +73,6 @@ protected void processAlignments(JavaRDD rdd, JavaSparkConte final AllelicCountCollector finalAllelicCountCollector = rdd.mapPartitions(distributedCount(sampleMetadataBroadcast, minimumBaseQuality)) .reduce((a1, a2) -> combineAllelicCountCollectors(a1, a2, sampleMetadataBroadcast.getValue())); - // TODO: In integration test, we come back with 8 entries, when we should get 11. Also, at least one of those 8 will have incorrect counts. - // TODO: Delete the next line (and other TODOs) when the integration test is fixed. - // final List tmp = rdd.collect(); - // TODO: mapPartitions --> map has same erroneous behavior - // TODO: emit empty loci = false has same errorneous behavior (though the counts will change as expected). - - finalAllelicCountCollector.getAllelicCounts().write(outputAllelicCountsFile); } From 94c5a734e773266c9cb3cadf24ed73b0e9015ae3 Mon Sep 17 00:00:00 2001 From: Tom White Date: Tue, 25 Sep 2018 11:20:08 +0100 Subject: [PATCH 9/9] Add ExampleLocusWalkerSpark.java and test --- .../examples/ExampleLocusWalkerSpark.java | 82 +++++++++++++++++++ ...xampleLocusWalkerSparkIntegrationTest.java | 34 ++++++++ 2 files changed, 116 insertions(+) create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleLocusWalkerSpark.java create mode 100644 src/test/java/org/broadinstitute/hellbender/tools/examples/ExampleLocusWalkerSparkIntegrationTest.java diff --git a/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleLocusWalkerSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleLocusWalkerSpark.java new file mode 100644 index 00000000000..4edc6883c50 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleLocusWalkerSpark.java @@ -0,0 +1,82 @@ +package org.broadinstitute.hellbender.tools.examples; + +import htsjdk.variant.variantcontext.VariantContext; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.Function; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.cmdline.programgroups.ExampleProgramGroup; +import org.broadinstitute.hellbender.engine.AlignmentContext; +import org.broadinstitute.hellbender.engine.FeatureContext; +import org.broadinstitute.hellbender.engine.FeatureInput; +import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.engine.spark.LocusWalkerContext; +import org.broadinstitute.hellbender.engine.spark.LocusWalkerSpark; +import org.broadinstitute.hellbender.utils.pileup.ReadPileup; + +import java.io.PrintStream; +import java.util.List; + +/** + * Example/toy program that shows how to implement the LocusWalker interface. Prints locus-based coverage from supplied + * reads, and reference bases/overlapping variants if provided + */ +@CommandLineProgramProperties( + summary = "Example tool that prints locus-based coverage from supplied read to the specified output file (stdout if none provided), along with overlapping reference bases/features (if provided)", + oneLineSummary = "Example tool that prints locus-based coverage with optional contextual data", + programGroup = ExampleProgramGroup.class, + omitFromCommandLine = true +) +public final class ExampleLocusWalkerSpark extends LocusWalkerSpark { + private static final long serialVersionUID = 1L; + + @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, doc = "Output file (if not provided, defaults to STDOUT)", common = false, optional = true) + private String outputFile = null; + + @Argument(fullName = StandardArgumentDefinitions.VARIANT_LONG_NAME, shortName = StandardArgumentDefinitions.VARIANT_SHORT_NAME, doc = "One or more VCF files", optional = true) + private List> variants; + + private PrintStream outputStream = null; + + + @Override + protected void processAlignments(JavaRDD rdd, JavaSparkContext ctx) { + rdd.map(intervalFunction(variants)).saveAsTextFile(outputFile); + } + + private static Function intervalFunction(List> variants) { + return (Function) context -> { + AlignmentContext alignmentContext = context.getAlignmentContext(); + ReferenceContext referenceContext = context.getReferenceContext(); + FeatureContext featureContext = context.getFeatureContext(); + + StringBuilder sb = new StringBuilder(); + + // Get pileup and counts + ReadPileup pileup = alignmentContext.getBasePileup(); + // print the locus and coverage + sb.append(String.format("Current locus %s:%d (coverage=%s)\n", alignmentContext.getContig(), + alignmentContext.getPosition(), pileup.size())); + // print the reference context if available + if ( referenceContext.hasBackingDataSource() ) { + sb.append("\tReference base(s): " + new String(referenceContext.getBases())); + sb.append("\n"); + } + // print the overlapping variants if there are some + if(featureContext.hasBackingDataSource()) { + List vars = featureContext.getValues(variants); + if(!vars.isEmpty()) { + sb.append("\tOverlapping variant(s):\n"); + for (VariantContext variant : vars) { + sb.append(String.format("\t\t%s:%d-%d, Ref:%s, Alt(s):%s\n", variant.getContig(), variant.getStart(), + variant.getEnd(), variant.getReference(), variant.getAlternateAlleles())); + } + } + } + + return sb.toString(); + }; + } +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/examples/ExampleLocusWalkerSparkIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/examples/ExampleLocusWalkerSparkIntegrationTest.java new file mode 100644 index 00000000000..ac1082d41d6 --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/tools/examples/ExampleLocusWalkerSparkIntegrationTest.java @@ -0,0 +1,34 @@ +package org.broadinstitute.hellbender.tools.examples; + +import org.broadinstitute.hellbender.CommandLineProgramTest; +import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; +import org.broadinstitute.hellbender.testutils.IntegrationTestSpec; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.IOException; + +public final class ExampleLocusWalkerSparkIntegrationTest extends CommandLineProgramTest { + private static final String TEST_DATA_DIRECTORY = publicTestDir + "org/broadinstitute/hellbender/engine/"; + private static final String TEST_OUTPUT_DIRECTORY = exampleTestDir; + + @Test + public void testExampleLocusWalker() throws IOException { + final File out = File.createTempFile("out", ".txt"); + out.delete(); + out.deleteOnExit(); + final ArgumentsBuilder args = new ArgumentsBuilder(); + args.add("-L 1"); + args.add("--input"); + args.add(TEST_DATA_DIRECTORY + "reads_data_source_test1.bam"); + args.add("-V"); + args.add(TEST_DATA_DIRECTORY + "feature_data_source_test.vcf"); + args.add("--output"); + args.add(out.getAbsolutePath()); + args.add("--reference"); + args.add(hg19MiniReference); + this.runCommandLine(args.getArgsArray()); + File expected = new File(TEST_OUTPUT_DIRECTORY, "expected_ExampleLocusWalkerIntegrationTest_output.txt"); + IntegrationTestSpec.assertEqualTextFiles(new File(out, "part-00000"), expected); + } +}