diff --git a/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleLocusWalkerSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleLocusWalkerSpark.java new file mode 100644 index 00000000000..4edc6883c50 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleLocusWalkerSpark.java @@ -0,0 +1,82 @@ +package org.broadinstitute.hellbender.tools.examples; + +import htsjdk.variant.variantcontext.VariantContext; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.Function; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.cmdline.programgroups.ExampleProgramGroup; +import org.broadinstitute.hellbender.engine.AlignmentContext; +import org.broadinstitute.hellbender.engine.FeatureContext; +import org.broadinstitute.hellbender.engine.FeatureInput; +import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.engine.spark.LocusWalkerContext; +import org.broadinstitute.hellbender.engine.spark.LocusWalkerSpark; +import org.broadinstitute.hellbender.utils.pileup.ReadPileup; + +import java.io.PrintStream; +import java.util.List; + +/** + * Example/toy program that shows how to implement the LocusWalker interface. Prints locus-based coverage from supplied + * reads, and reference bases/overlapping variants if provided + */ +@CommandLineProgramProperties( + summary = "Example tool that prints locus-based coverage from supplied read to the specified output file (stdout if none provided), along with overlapping reference bases/features (if provided)", + oneLineSummary = "Example tool that prints locus-based coverage with optional contextual data", + programGroup = ExampleProgramGroup.class, + omitFromCommandLine = true +) +public final class ExampleLocusWalkerSpark extends LocusWalkerSpark { + private static final long serialVersionUID = 1L; + + @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, doc = "Output file (if not provided, defaults to STDOUT)", common = false, optional = true) + private String outputFile = null; + + @Argument(fullName = StandardArgumentDefinitions.VARIANT_LONG_NAME, shortName = StandardArgumentDefinitions.VARIANT_SHORT_NAME, doc = "One or more VCF files", optional = true) + private List> variants; + + private PrintStream outputStream = null; + + + @Override + protected void processAlignments(JavaRDD rdd, JavaSparkContext ctx) { + rdd.map(intervalFunction(variants)).saveAsTextFile(outputFile); + } + + private static Function intervalFunction(List> variants) { + return (Function) context -> { + AlignmentContext alignmentContext = context.getAlignmentContext(); + ReferenceContext referenceContext = context.getReferenceContext(); + FeatureContext featureContext = context.getFeatureContext(); + + StringBuilder sb = new StringBuilder(); + + // Get pileup and counts + ReadPileup pileup = alignmentContext.getBasePileup(); + // print the locus and coverage + sb.append(String.format("Current locus %s:%d (coverage=%s)\n", alignmentContext.getContig(), + alignmentContext.getPosition(), pileup.size())); + // print the reference context if available + if ( referenceContext.hasBackingDataSource() ) { + sb.append("\tReference base(s): " + new String(referenceContext.getBases())); + sb.append("\n"); + } + // print the overlapping variants if there are some + if(featureContext.hasBackingDataSource()) { + List vars = featureContext.getValues(variants); + if(!vars.isEmpty()) { + sb.append("\tOverlapping variant(s):\n"); + for (VariantContext variant : vars) { + sb.append(String.format("\t\t%s:%d-%d, Ref:%s, Alt(s):%s\n", variant.getContig(), variant.getStart(), + variant.getEnd(), variant.getReference(), variant.getAlternateAlleles())); + } + } + } + + return sb.toString(); + }; + } +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/examples/ExampleLocusWalkerSparkIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/examples/ExampleLocusWalkerSparkIntegrationTest.java new file mode 100644 index 00000000000..ac1082d41d6 --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/tools/examples/ExampleLocusWalkerSparkIntegrationTest.java @@ -0,0 +1,34 @@ +package org.broadinstitute.hellbender.tools.examples; + +import org.broadinstitute.hellbender.CommandLineProgramTest; +import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; +import org.broadinstitute.hellbender.testutils.IntegrationTestSpec; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.IOException; + +public final class ExampleLocusWalkerSparkIntegrationTest extends CommandLineProgramTest { + private static final String TEST_DATA_DIRECTORY = publicTestDir + "org/broadinstitute/hellbender/engine/"; + private static final String TEST_OUTPUT_DIRECTORY = exampleTestDir; + + @Test + public void testExampleLocusWalker() throws IOException { + final File out = File.createTempFile("out", ".txt"); + out.delete(); + out.deleteOnExit(); + final ArgumentsBuilder args = new ArgumentsBuilder(); + args.add("-L 1"); + args.add("--input"); + args.add(TEST_DATA_DIRECTORY + "reads_data_source_test1.bam"); + args.add("-V"); + args.add(TEST_DATA_DIRECTORY + "feature_data_source_test.vcf"); + args.add("--output"); + args.add(out.getAbsolutePath()); + args.add("--reference"); + args.add(hg19MiniReference); + this.runCommandLine(args.getArgsArray()); + File expected = new File(TEST_OUTPUT_DIRECTORY, "expected_ExampleLocusWalkerIntegrationTest_output.txt"); + IntegrationTestSpec.assertEqualTextFiles(new File(out, "part-00000"), expected); + } +}