From 8d74d04bb59132cbb38bea8980f60da25b883958 Mon Sep 17 00:00:00 2001 From: Ted Sharpe Date: Mon, 11 Apr 2022 17:09:22 -0400 Subject: [PATCH 1/4] new LocusDepthtoBAF tool --- .../hellbender/tools/sv/BafEvidence.java | 8 + .../hellbender/tools/sv/LocusDepth.java | 46 ++-- .../hellbender/tools/sv/LocusDepthtoBAF.java | 200 ++++++++++++++++++ .../tools/walkers/sv/CollectSVEvidence.java | 9 +- .../utils/codecs/LocusDepthBCICodec.java | 13 +- .../utils/codecs/LocusDepthCodec.java | 11 +- .../tools/sv/LocusDepthtoBAFUnitTest.java | 25 +++ .../sv/PrintSVEvidenceIntegrationTest.java | 8 +- 8 files changed, 288 insertions(+), 32 deletions(-) create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java create mode 100644 src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFUnitTest.java diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/BafEvidence.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/BafEvidence.java index 730c1dfda6d..a33c31c5d4a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/BafEvidence.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/BafEvidence.java @@ -24,6 +24,14 @@ public BafEvidence( final String sample, final String contig, this.value = value; } + // value-altering constructor + public BafEvidence( final BafEvidence that, final double value ) { + this.sample = that.sample; + this.contig = that.contig; + this.position = that.position; + this.value = value; + } + public String getSample() { return sample; } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepth.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepth.java index 869878741a3..1a018757756 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepth.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepth.java @@ -2,6 +2,7 @@ import com.google.common.annotations.VisibleForTesting; import htsjdk.samtools.util.Locatable; +import org.broadinstitute.hellbender.utils.Utils; import java.util.*; @@ -11,25 +12,36 @@ public final class LocusDepth implements SVFeature { private final String contig; private final int position; private final String sample; - private final byte refCall; // index into nucleotideValues + // next three fields index "ACGT" + private final int refIndex; + private final int altIndex; private final int[] depths; + public final static String BCI_VERSION = "1.0"; - public LocusDepth( final Locatable loc, final String sample, final byte refCall ) { + public LocusDepth( final Locatable loc, final String sample, final int refIndex, final int altIndex ) { + Utils.validateArg(refIndex >= 0 && refIndex <= 3, "refIndex must be between 0 and 3"); + Utils.validateArg(altIndex >= 0 && altIndex <= 3, "altIndex must be between 0 and 3"); + Utils.validateArg(refIndex != altIndex, "refIndex and altIndex must be different"); this.contig = loc.getContig(); this.position = loc.getStart(); this.sample = sample; - this.refCall = refCall; + this.refIndex = refIndex; + this.altIndex = altIndex; this.depths = new int[4]; } public LocusDepth( final String contig, final int position, - final String sample, final byte refCall, + final String sample, final int refIndex, final int altIndex, final int aDepth, final int cDepth, final int gDepth, final int tDepth ) { + Utils.validateArg(refIndex >= 0 && refIndex <= 3, "refIndex must be between 0 and 3"); + Utils.validateArg(altIndex >= 0 && altIndex <= 3, "altIndex must be between 0 and 3"); + Utils.validateArg(refIndex != altIndex, "refIndex and altIndex must be different"); this.contig = contig; this.position = position; this.sample = sample; - this.refCall = refCall; + this.refIndex = refIndex; + this.altIndex = altIndex; this.depths = new int[4]; depths[0] = aDepth; depths[1] = cDepth; @@ -57,13 +69,19 @@ public int getStart() { } public String getSample() { return sample; } - public char getRefCall() { - return (char)refCall; + // index into "ACGT" + public int getRefIndex() { + return refIndex; + } + // index into "ACGT" + public int getAltIndex() { + return altIndex; } - public int getADepth() { return depths[0]; } - public int getCDepth() { return depths[1]; } - public int getGDepth() { return depths[2]; } - public int getTDepth() { return depths[3]; } + public int getTotalDepth() { return depths[0] + depths[1] + depths[2] + depths[3]; } + // idx is index into "ACGT" + public int getDepth( final int idx ) { return depths[idx]; } + public int getRefDepth() { return depths[refIndex]; } + public int getAltDepth() { return depths[altIndex]; } @Override public LocusDepth extractSamples( final Set sampleNames, final Object header ) { @@ -77,7 +95,7 @@ public boolean equals( final Object obj ) { } public boolean equals( final LocusDepth that ) { - return this.position == that.position && this.refCall == that.refCall && + return this.position == that.position && this.refIndex == that.refIndex && this.depths[0] == that.depths[0] && this.depths[1] == that.depths[1] && this.depths[2] == that.depths[2] && this.depths[3] == that.depths[3] && Objects.equals(this.contig, that.contig) && @@ -86,12 +104,12 @@ public boolean equals( final LocusDepth that ) { @Override public int hashCode() { - return Objects.hash(contig, position, sample, refCall, Arrays.hashCode(depths)); + return Objects.hash(contig, position, sample, refIndex, Arrays.hashCode(depths)); } @Override public String toString() { - return contig + "\t" + position + "\t" + sample + "\t" + (char)refCall + "\t" + + return contig + "\t" + position + "\t" + sample + "\t" + (char)refIndex + "\t" + depths[0] + "\t" + depths[1] + "\t" + depths[2] + "\t" + depths[3]; } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java new file mode 100644 index 00000000000..7bcb2c6de8f --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java @@ -0,0 +1,200 @@ +package org.broadinstitute.hellbender.tools.sv; + +import com.google.common.annotations.VisibleForTesting; +import htsjdk.tribble.Feature; +import org.apache.commons.math3.distribution.ChiSquaredDistribution; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import org.broadinstitute.barclay.argparser.ExperimentalFeature; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.cmdline.programgroups.StructuralVariantDiscoveryProgramGroup; +import org.broadinstitute.hellbender.engine.GATKPath; +import org.broadinstitute.hellbender.engine.MultiFeatureWalker; +import org.broadinstitute.hellbender.engine.ReadsContext; +import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.utils.MathUtils.RunningAverage; +import org.broadinstitute.hellbender.utils.codecs.FeatureOutputCodec; +import org.broadinstitute.hellbender.utils.codecs.FeatureOutputCodecFinder; +import org.broadinstitute.hellbender.utils.codecs.FeatureSink; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + *

Merges locus-sorted LocusDepth evidence files, and calculates the bi-allelic frequency for each + * sample and site, and writes these values as a BafEvidence output file.

+ * + *

Inputs

+ * + * + * + *

Output

+ * + * + * + *

Usage example

+ * + *
+ *     gatk LocusDepthtoBAF \
+ *       -F file1.ld.txt.gz [-F file2.ld.txt.gz ...] \
+ *       -O merged.baf.bci
+ * 
+ * + * @author Ted Sharpe <tsharpe@broadinstitute.org> + */ +@CommandLineProgramProperties( + summary = "Convert LocusDepth to BafEvidence", + oneLineSummary = "Convert LocusDepth to BafEvidence", + programGroup = StructuralVariantDiscoveryProgramGroup.class +) +@ExperimentalFeature +public class LocusDepthtoBAF extends MultiFeatureWalker { + public static final String LOCUS_DEPTH_FILE_NAME = "locus-depth"; + public static final String SAMPLE_NAMES_NAME = "sample-names"; + public static final String BAF_EVIDENCE_FILE_NAME = "baf-evidence-output"; + public static final String MIN_TOTAL_DEPTH_NAME = "min-total-depth"; + public static final String MAX_STD_NAME = "max-std"; + public static final String MIN_HET_LIKELIHOOD = "min-het-likelihood"; + public static final String COMPRESSION_LEVEL_NAME = "compression-level"; + public static ChiSquaredDistribution chiSqDist = new ChiSquaredDistribution(null, 1.); + + @Argument( + doc = "Locus depth files to process", + fullName = LOCUS_DEPTH_FILE_NAME, + shortName = StandardArgumentDefinitions.FEATURE_SHORT_NAME ) + private List locusDepthFiles; + + @Argument(fullName = SAMPLE_NAMES_NAME, doc = "Sample names", optional = true) + private List sampleNames; + + @Argument( + doc = "BAF output file", + fullName = BAF_EVIDENCE_FILE_NAME, + shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME ) + private GATKPath bafOutputFile; + + @Argument( + doc = "Maximum standard deviation across bafs at a locus (otherwise locus will be ignored)", + fullName = MAX_STD_NAME, optional = true ) + private double maxStdDev = .2; + + @Argument( + doc = "Minimum total depth at a locus (otherwise locus will be ignored)", + fullName = MIN_TOTAL_DEPTH_NAME, optional = true ) + private int minTotalDepth = 10; + + @Argument( + doc = "Minimum likelihood of site being biallelic and heterozygous", + fullName = MIN_HET_LIKELIHOOD, optional = true ) + private double minHetLikelihood = .5; + + @Argument( + doc = "Output compression level", + fullName = COMPRESSION_LEVEL_NAME, + minValue = 0, maxValue = 9, optional = true ) + private int compressionLevel = 4; + + private FeatureSink writer; + private final List sameLocusBuffer = new ArrayList<>(); + + @Override + @SuppressWarnings("unchecked") + public void onTraversalStart() { + super.onTraversalStart(); + final FeatureOutputCodec> codec = + FeatureOutputCodecFinder.find(bafOutputFile); + if ( !codec.getFeatureType().isAssignableFrom(BafEvidence.class) ) { + throw new UserException("We're intending to write BafEvidence, but the feature type " + + "associated with the output file expects features of type " + + codec.getFeatureType().getSimpleName()); + } + writer = (FeatureSink)codec.makeSink(bafOutputFile, + getBestAvailableSequenceDictionary(), + sampleNames, compressionLevel); + } + + @Override + public void apply( LocusDepth locusDepth, Object header, + ReadsContext readsContext, ReferenceContext referenceContext ) { + final double baf = calcBAF(locusDepth); + if ( baf > 0. ) { + final BafEvidence bafEvidence = + new BafEvidence(locusDepth.getSample(), locusDepth.getContig(), + locusDepth.getStart(), baf); + if ( !sameLocus(bafEvidence) ) { + processBuffer(); + } + sameLocusBuffer.add(bafEvidence); + } + } + + @Override + public Object onTraversalSuccess() { + super.onTraversalSuccess(); + writer.close(); + return null; + } + + @VisibleForTesting double calcBAF( final LocusDepth locusDepth ) { + final int totalDepth = locusDepth.getTotalDepth(); + if ( totalDepth < minTotalDepth ) { + return 0.; + } + double expectRefAlt = totalDepth / 2.; + final double refDiff = locusDepth.getRefDepth() - expectRefAlt; + final double altDiff = locusDepth.getAltDepth() - expectRefAlt; + double chiSq = (refDiff * refDiff + altDiff * altDiff) / expectRefAlt; + double fitProb = 1. - chiSqDist.cumulativeProbability(chiSq); + if ( fitProb < minHetLikelihood ) { + return 0.; + } + return (double)locusDepth.getAltDepth() / totalDepth; + } + + private boolean sameLocus( final BafEvidence bafEvidence ) { + if ( sameLocusBuffer.isEmpty() ) { + return true; + } + final BafEvidence curLocus = sameLocusBuffer.get(0); + return curLocus.getContig().equals(bafEvidence.getContig()) && + curLocus.getStart() == bafEvidence.getStart(); + } + + private void processBuffer() { + int nBafs = sameLocusBuffer.size(); + if ( nBafs == 1 ) { + writer.write(new BafEvidence(sameLocusBuffer.get(0), .5)); + } else { + final RunningAverage ra = new RunningAverage(); + for ( final BafEvidence bafEvidence : sameLocusBuffer ) { + ra.add(bafEvidence.getValue()); + } + final double stddev = ra.stddev(); + if ( stddev <= maxStdDev ) { + final double[] vals = new double[nBafs]; + for ( int idx = 0; idx != nBafs; ++idx ) { + vals[idx] = sameLocusBuffer.get(idx).getValue(); + } + Arrays.sort(vals); + int midIdx = nBafs / 2; + double median = + (nBafs & 1) != 0 ? vals[midIdx] : (vals[midIdx] + vals[midIdx - 1])/2.; + final double adj = .5 - median; + for ( final BafEvidence bafEvidence : sameLocusBuffer ) { + writer.write(new BafEvidence(bafEvidence, bafEvidence.getValue()+adj)); + } + } + } + sameLocusBuffer.clear(); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java index 1570a9f196e..aed4ef8ef4b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java @@ -5,6 +5,7 @@ import htsjdk.samtools.CigarOperator; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.util.Locatable; +import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.BetaFeature; @@ -694,14 +695,16 @@ private boolean readNextLocus() { return false; } VariantContext snp = snpSourceItr.next(); - while ( !snp.isSNP() ) { + while ( !snp.isSNP() || !snp.isBiallelic() ) { if ( !snpSourceItr.hasNext() ) { return false; } snp = snpSourceItr.next(); } - final byte[] refSeq = snp.getReference().getBases(); - final LocusDepth locusDepth = new LocusDepth(snp, sampleName, refSeq[0]); + final List alleles = snp.getAlleles(); + final int refIndex = Nucleotide.decode(alleles.get(0).getBases()[0]).ordinal(); + final int altIndex = Nucleotide.decode(alleles.get(1).getBases()[0]).ordinal(); + final LocusDepth locusDepth = new LocusDepth(snp, sampleName, refIndex, altIndex); locusDepthQueue.add(locusDepth); return true; } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthBCICodec.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthBCICodec.java index 18c980e7d66..d9b0a5271b0 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthBCICodec.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthBCICodec.java @@ -32,7 +32,7 @@ public LocusDepth decode( final Reader reader ) throws IOException { return new LocusDepth(reader.getDictionary().getSequence(dis.readInt()).getSequenceName(), dis.readInt(), reader.getSampleNames().get(dis.readInt()), - dis.readByte(), + dis.readByte(), dis.readByte(), dis.readInt(), dis.readInt(), dis.readInt(), dis.readInt()); } @@ -68,11 +68,12 @@ public void encode( final LocusDepth locusDepth, final Writer writer dos.writeInt(writer.getContigIndex(locusDepth.getContig())); dos.writeInt(locusDepth.getStart()); dos.writeInt(writer.getSampleIndex(locusDepth.getSample())); - dos.writeByte(locusDepth.getRefCall()); - dos.writeInt(locusDepth.getADepth()); - dos.writeInt(locusDepth.getCDepth()); - dos.writeInt(locusDepth.getGDepth()); - dos.writeInt(locusDepth.getTDepth()); + dos.writeByte(locusDepth.getRefIndex()); + dos.writeByte(locusDepth.getAltIndex()); + dos.writeInt(locusDepth.getDepth(0)); + dos.writeInt(locusDepth.getDepth(1)); + dos.writeInt(locusDepth.getDepth(2)); + dos.writeInt(locusDepth.getDepth(3)); } @Override diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthCodec.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthCodec.java index e1b77d34975..bcbd8a6983d 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthCodec.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthCodec.java @@ -36,11 +36,12 @@ public LocusDepthCodec() { return new LocusDepth(tokens.get(0), Integer.parseUnsignedInt(tokens.get(1)) + 1, tokens.get(2), - (byte)tokens.get(3).charAt(0), + Integer.parseUnsignedInt(tokens.get(3)), Integer.parseUnsignedInt(tokens.get(4)), Integer.parseUnsignedInt(tokens.get(5)), Integer.parseUnsignedInt(tokens.get(6)), - Integer.parseUnsignedInt(tokens.get(7))); + Integer.parseUnsignedInt(tokens.get(7)), + Integer.parseUnsignedInt(tokens.get(8))); } @Override public Object readActualHeader( LineIterator reader ) { return null; } @@ -86,8 +87,8 @@ public FeatureSink makeSortMerger( final GATKPath path, public static String encode( final LocusDepth locusDepth ) { return locusDepth.getContig() + "\t" + (locusDepth.getStart() - 1) + "\t" + - locusDepth.getSample() + "\t" + locusDepth.getRefCall() + "\t" + - locusDepth.getADepth() + "\t" + locusDepth.getCDepth() + "\t" + - locusDepth.getGDepth() + "\t" + locusDepth.getTDepth(); + locusDepth.getSample() + "\t" + locusDepth.getRefIndex() + "\t" + + locusDepth.getAltIndex() + "\t" + locusDepth.getDepth(0) + "\t" + locusDepth.getDepth(1) + "\t" + + locusDepth.getDepth(2) + "\t" + locusDepth.getDepth(3); } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFUnitTest.java new file mode 100644 index 00000000000..ffcc27ecf86 --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFUnitTest.java @@ -0,0 +1,25 @@ +package org.broadinstitute.hellbender.tools.sv; + +import org.apache.commons.math3.distribution.ChiSquaredDistribution; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class LocusDepthtoBAFUnitTest { + @Test + public void testBAFCalc() { + final LocusDepthtoBAF instance = new LocusDepthtoBAF(); + final String tig = "1"; + final int pos = 1; + final int refIndex = 0; + final int altIndex = 1; + final String smpl = "s"; + // too shallow + Assert.assertEquals(instance.calcBAF(new LocusDepth(tig, pos, smpl, refIndex, altIndex, 4, 4, 0, 0)), 0.); + // just deep enough + Assert.assertEquals(instance.calcBAF(new LocusDepth(tig, pos, smpl, refIndex, altIndex, 5, 5, 0, 0)), .5); + // too unequal + Assert.assertEquals(instance.calcBAF(new LocusDepth(tig, pos, smpl, refIndex, altIndex, 7, 3, 0, 0)), 0.); + // sufficiently equal + Assert.assertEquals(instance.calcBAF(new LocusDepth(tig, pos, smpl, refIndex, altIndex, 6, 4, 0, 0)), .4); + } +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/PrintSVEvidenceIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/PrintSVEvidenceIntegrationTest.java index 82d387b03ba..16c436068a4 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/PrintSVEvidenceIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/PrintSVEvidenceIntegrationTest.java @@ -161,8 +161,8 @@ public void testDepthUniquenessCriterion( int val1, int val2 ) { public void testLocusDepthViolateUniquenessCriterion() { final DummyFeatureSink sink = new DummyFeatureSink<>(); final LocusDepthSortMerger sortMerger = new LocusDepthSortMerger(dict, sink); - sortMerger.write(new LocusDepth("chr1", 1, "sample", (byte)'A', 1, 0, 0, 0)); - sortMerger.write(new LocusDepth("chr1", 1, "sample", (byte)'A', 0, 1, 0, 0)); + sortMerger.write(new LocusDepth("chr1", 1, "sample", 0, 1, 1, 0, 0, 0)); + sortMerger.write(new LocusDepth("chr1", 1, "sample", 0, 1, 0, 1, 0, 0)); sortMerger.close(); Assert.assertEquals(sink.nRecsWritten, 0); } @@ -171,8 +171,8 @@ public void testLocusDepthViolateUniquenessCriterion() { public void testLocusDepthUniquenessCriterion() { final DummyFeatureSink sink = new DummyFeatureSink<>(); final LocusDepthSortMerger sortMerger = new LocusDepthSortMerger(dict, sink); - sortMerger.write(new LocusDepth("chr1", 1, "sample1", (byte)'A', 1, 0, 0, 0)); - sortMerger.write(new LocusDepth("chr1", 1, "sample2", (byte)'A', 0, 1, 0, 0)); + sortMerger.write(new LocusDepth("chr1", 1, "sample1", 0, 1, 1, 0, 0, 0)); + sortMerger.write(new LocusDepth("chr1", 1, "sample2", 0, 1, 0, 1, 0, 0)); sortMerger.close(); Assert.assertEquals(sink.nRecsWritten, 2); } From 10682114b68e5ad537de2b1b3e9e53d4ef4bfe30 Mon Sep 17 00:00:00 2001 From: Ted Sharpe Date: Tue, 12 Apr 2022 13:41:51 -0400 Subject: [PATCH 2/4] tests --- .../hellbender/tools/sv/LocusDepthtoBAF.java | 10 ++-- .../utils/codecs/LocusDepthCodec.java | 2 +- .../sv/LocusDepthtoBAFIntegrationTest.java | 60 +++++++++++++++++++ .../tools/sv/LocusDepthtoBAFUnitTest.java | 1 - .../walkers/sv/LocusDepthtoBAF/test1.baf.txt | 0 .../walkers/sv/LocusDepthtoBAF/test1.ld.txt | 2 + .../walkers/sv/LocusDepthtoBAF/test2.baf.txt | 3 + .../walkers/sv/LocusDepthtoBAF/test2.ld.txt | 3 + .../walkers/sv/LocusDepthtoBAF/test3.baf.txt | 2 + .../walkers/sv/LocusDepthtoBAF/test3.ld.txt | 2 + 10 files changed, 78 insertions(+), 7 deletions(-) create mode 100644 src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFIntegrationTest.java create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.baf.txt create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.ld.txt create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.baf.txt create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.ld.txt create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.baf.txt create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.ld.txt diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java index 7bcb2c6de8f..d8d2cfbe3ec 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java @@ -8,10 +8,7 @@ import org.broadinstitute.barclay.argparser.ExperimentalFeature; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.cmdline.programgroups.StructuralVariantDiscoveryProgramGroup; -import org.broadinstitute.hellbender.engine.GATKPath; -import org.broadinstitute.hellbender.engine.MultiFeatureWalker; -import org.broadinstitute.hellbender.engine.ReadsContext; -import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.engine.*; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.utils.MathUtils.RunningAverage; import org.broadinstitute.hellbender.utils.codecs.FeatureOutputCodec; @@ -72,7 +69,7 @@ public class LocusDepthtoBAF extends MultiFeatureWalker { doc = "Locus depth files to process", fullName = LOCUS_DEPTH_FILE_NAME, shortName = StandardArgumentDefinitions.FEATURE_SHORT_NAME ) - private List locusDepthFiles; + private List> locusDepthFiles; @Argument(fullName = SAMPLE_NAMES_NAME, doc = "Sample names", optional = true) private List sampleNames; @@ -141,6 +138,9 @@ public void apply( LocusDepth locusDepth, Object header, @Override public Object onTraversalSuccess() { super.onTraversalSuccess(); + if ( !sameLocusBuffer.isEmpty() ) { + processBuffer(); + } writer.close(); return null; } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthCodec.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthCodec.java index bcbd8a6983d..42ad7483511 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthCodec.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthCodec.java @@ -30,7 +30,7 @@ public LocusDepthCodec() { @Override public LocusDepth decode( final String line ) { final List tokens = splitter.splitToList(line); - if ( tokens.size() != 8 ) { + if ( tokens.size() != 9 ) { throw new IllegalArgumentException("Invalid number of columns: " + tokens.size()); } return new LocusDepth(tokens.get(0), diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFIntegrationTest.java new file mode 100644 index 00000000000..67c4d25f5d6 --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFIntegrationTest.java @@ -0,0 +1,60 @@ +package org.broadinstitute.hellbender.tools.sv; + +import htsjdk.samtools.util.Log; +import org.broadinstitute.hellbender.CommandLineProgramTest; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; +import org.broadinstitute.hellbender.testutils.IntegrationTestSpec; +import org.testng.annotations.Test; + +import java.io.IOException; +import java.util.Collections; + +public class LocusDepthtoBAFIntegrationTest extends CommandLineProgramTest { + public static final String ld2bafTestDir = toolsTestDir + "walkers/sv/LocusDepthtoBAF/"; + + @Override public String getTestedToolName() { return "LocusDepthtoBAF"; } + + @Test + public void testStdDevTooBig() throws IOException { + final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); + argsBuilder.add(StandardArgumentDefinitions.VERBOSITY_NAME, Log.LogLevel.ERROR.name()); + argsBuilder.add(LocusDepthtoBAF.MIN_HET_LIKELIHOOD, "0."); + argsBuilder.add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, hg19_chr1_1M_dict); + argsBuilder.add(StandardArgumentDefinitions.FEATURE_SHORT_NAME, ld2bafTestDir + "test1.ld.txt"); + argsBuilder.add(StandardArgumentDefinitions.OUTPUT_SHORT_NAME, "%s"); + final IntegrationTestSpec testSpec = + new IntegrationTestSpec(argsBuilder.getString(), + Collections.singletonList(ld2bafTestDir + "test1.baf.txt")); + testSpec.setOutputFileExtension("baf.txt"); + testSpec.executeTest("test1: standard deviation too big", this); + } + + @Test + public void testAOK() throws IOException { + final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); + argsBuilder.add(StandardArgumentDefinitions.VERBOSITY_NAME, Log.LogLevel.ERROR.name()); + argsBuilder.add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, hg19_chr1_1M_dict); + argsBuilder.add(StandardArgumentDefinitions.FEATURE_SHORT_NAME, ld2bafTestDir + "test2.ld.txt"); + argsBuilder.add(StandardArgumentDefinitions.OUTPUT_SHORT_NAME, "%s"); + final IntegrationTestSpec testSpec = + new IntegrationTestSpec(argsBuilder.getString(), + Collections.singletonList(ld2bafTestDir + "test2.baf.txt")); + testSpec.setOutputFileExtension("baf.txt"); + testSpec.executeTest("test2: a-ok", this); + } + + @Test + public void testAdjustMedian() throws IOException { + final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); + argsBuilder.add(StandardArgumentDefinitions.VERBOSITY_NAME, Log.LogLevel.ERROR.name()); + argsBuilder.add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, hg19_chr1_1M_dict); + argsBuilder.add(StandardArgumentDefinitions.FEATURE_SHORT_NAME, ld2bafTestDir + "test3.ld.txt"); + argsBuilder.add(StandardArgumentDefinitions.OUTPUT_SHORT_NAME, "%s"); + final IntegrationTestSpec testSpec = + new IntegrationTestSpec(argsBuilder.getString(), + Collections.singletonList(ld2bafTestDir + "test3.baf.txt")); + testSpec.setOutputFileExtension("baf.txt"); + testSpec.executeTest("test3: adjust median", this); + } +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFUnitTest.java index ffcc27ecf86..53f1ba5957c 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFUnitTest.java @@ -1,6 +1,5 @@ package org.broadinstitute.hellbender.tools.sv; -import org.apache.commons.math3.distribution.ChiSquaredDistribution; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.baf.txt b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.baf.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.ld.txt b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.ld.txt new file mode 100644 index 00000000000..39a4909bbf5 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.ld.txt @@ -0,0 +1,2 @@ +1 1000 smpl1 0 1 3 7 0 0 +1 1000 smpl2 0 1 7 3 0 0 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.baf.txt b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.baf.txt new file mode 100644 index 00000000000..0e8759b815a --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.baf.txt @@ -0,0 +1,3 @@ +1 1000 0.6 smpl1 +1 1000 0.5 smpl2 +1 1000 0.4 smpl3 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.ld.txt b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.ld.txt new file mode 100644 index 00000000000..ebf91c3c71d --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.ld.txt @@ -0,0 +1,3 @@ +1 1000 smpl1 0 3 4 0 0 6 +1 1000 smpl2 0 3 5 0 0 5 +1 1000 smpl3 0 3 6 0 0 4 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.baf.txt b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.baf.txt new file mode 100644 index 00000000000..c1009f969fd --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.baf.txt @@ -0,0 +1,2 @@ +1 1000 0.5 smpl1 +1 1000 0.5 smpl2 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.ld.txt b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.ld.txt new file mode 100644 index 00000000000..7e4122c7d19 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.ld.txt @@ -0,0 +1,2 @@ +1 1000 smpl1 1 3 0 4 0 6 +1 1000 smpl2 1 3 0 4 0 6 From 2d9e9a22453e2cf81c50f465f1aca481ca700f43 Mon Sep 17 00:00:00 2001 From: Ted Sharpe Date: Mon, 25 Apr 2022 14:19:34 -0400 Subject: [PATCH 3/4] address reviewer comments --- .../hellbender/tools/sv/BafEvidence.java | 4 + .../hellbender/tools/sv/LocusDepth.java | 52 +++---- .../hellbender/tools/sv/LocusDepthtoBAF.java | 144 +++++++++++++----- .../tools/walkers/sv/CollectSVEvidence.java | 20 ++- .../utils/codecs/LocusDepthBCICodec.java | 6 +- .../utils/codecs/LocusDepthCodec.java | 10 +- .../tools/sv/BafEvidenceUnitTest.java | 11 ++ .../sv/LocusDepthtoBAFIntegrationTest.java | 22 ++- .../tools/sv/LocusDepthtoBAFUnitTest.java | 8 +- .../sv/PrintSVEvidenceIntegrationTest.java | 8 +- .../walkers/sv/LocusDepthtoBAF/test1.ld.txt | 4 +- .../walkers/sv/LocusDepthtoBAF/test1.vcf | 4 + .../walkers/sv/LocusDepthtoBAF/test2.baf.txt | 3 + .../walkers/sv/LocusDepthtoBAF/test2.ld.txt | 9 +- .../walkers/sv/LocusDepthtoBAF/test2.vcf | 6 + .../walkers/sv/LocusDepthtoBAF/test2a.vcf | 6 + .../walkers/sv/LocusDepthtoBAF/test3.ld.txt | 4 +- .../walkers/sv/LocusDepthtoBAF/test3.vcf | 4 + 18 files changed, 218 insertions(+), 107 deletions(-) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2a.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.vcf diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/BafEvidence.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/BafEvidence.java index a33c31c5d4a..050ec1b5003 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/BafEvidence.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/BafEvidence.java @@ -13,11 +13,15 @@ public final class BafEvidence implements SVFeature { private final double value; public final static String BCI_VERSION = "1.0"; + public final static double MISSING_VALUE = -1.; public BafEvidence( final String sample, final String contig, final int position, final double value ) { Utils.nonNull(sample); Utils.nonNull(contig); + Utils.validateArg(position > 0, "starting position must be positive"); + Utils.validateArg(value >= 0. || value == MISSING_VALUE, + "value must be non-negative or missing"); this.sample = sample; this.contig = contig; this.position = position; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepth.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepth.java index 1a018757756..d3ac47ae9f1 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepth.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepth.java @@ -12,36 +12,26 @@ public final class LocusDepth implements SVFeature { private final String contig; private final int position; private final String sample; - // next three fields index "ACGT" - private final int refIndex; - private final int altIndex; - private final int[] depths; + private final int[] depths; // four slots for each call, ACGT public final static String BCI_VERSION = "1.0"; - public LocusDepth( final Locatable loc, final String sample, final int refIndex, final int altIndex ) { - Utils.validateArg(refIndex >= 0 && refIndex <= 3, "refIndex must be between 0 and 3"); - Utils.validateArg(altIndex >= 0 && altIndex <= 3, "altIndex must be between 0 and 3"); - Utils.validateArg(refIndex != altIndex, "refIndex and altIndex must be different"); - this.contig = loc.getContig(); - this.position = loc.getStart(); - this.sample = sample; - this.refIndex = refIndex; - this.altIndex = altIndex; - this.depths = new int[4]; + public LocusDepth( final Locatable loc, final String sample ) { + this(loc.getContig(), loc.getStart(), sample, 0, 0, 0, 0); } - public LocusDepth( final String contig, final int position, - final String sample, final int refIndex, final int altIndex, + public LocusDepth( final String contig, final int position, final String sample, final int aDepth, final int cDepth, final int gDepth, final int tDepth ) { - Utils.validateArg(refIndex >= 0 && refIndex <= 3, "refIndex must be between 0 and 3"); - Utils.validateArg(altIndex >= 0 && altIndex <= 3, "altIndex must be between 0 and 3"); - Utils.validateArg(refIndex != altIndex, "refIndex and altIndex must be different"); + Utils.nonNull(contig, "contig may not be null"); + Utils.validateArg(position > 0, "starting position must be positive"); + Utils.nonNull(sample, "sample must not be null"); + Utils.validateArg(aDepth >= 0, "depth must be non-negative"); + Utils.validateArg(cDepth >= 0, "depth must be non-negative"); + Utils.validateArg(gDepth >= 0, "depth must be non-negative"); + Utils.validateArg(tDepth >= 0, "depth must be non-negative"); this.contig = contig; this.position = position; this.sample = sample; - this.refIndex = refIndex; - this.altIndex = altIndex; this.depths = new int[4]; depths[0] = aDepth; depths[1] = cDepth; @@ -69,19 +59,11 @@ public int getStart() { } public String getSample() { return sample; } - // index into "ACGT" - public int getRefIndex() { - return refIndex; - } - // index into "ACGT" - public int getAltIndex() { - return altIndex; - } + public int getTotalDepth() { return depths[0] + depths[1] + depths[2] + depths[3]; } + // idx is index into "ACGT" public int getDepth( final int idx ) { return depths[idx]; } - public int getRefDepth() { return depths[refIndex]; } - public int getAltDepth() { return depths[altIndex]; } @Override public LocusDepth extractSamples( final Set sampleNames, final Object header ) { @@ -95,7 +77,7 @@ public boolean equals( final Object obj ) { } public boolean equals( final LocusDepth that ) { - return this.position == that.position && this.refIndex == that.refIndex && + return this.position == that.position && this.depths[0] == that.depths[0] && this.depths[1] == that.depths[1] && this.depths[2] == that.depths[2] && this.depths[3] == that.depths[3] && Objects.equals(this.contig, that.contig) && @@ -104,12 +86,12 @@ public boolean equals( final LocusDepth that ) { @Override public int hashCode() { - return Objects.hash(contig, position, sample, refIndex, Arrays.hashCode(depths)); + return Objects.hash(contig, position, sample, Arrays.hashCode(depths)); } @Override public String toString() { - return contig + "\t" + position + "\t" + sample + "\t" + (char)refIndex + "\t" + - depths[0] + "\t" + depths[1] + "\t" + depths[2] + "\t" + depths[3]; + return sample + "@" + contig + ":" + position + + "[" + depths[0] + "," + depths[1] + "," + depths[2] + "," + depths[3] + "]"; } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java index d8d2cfbe3ec..23b4305a8db 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java @@ -1,7 +1,11 @@ package org.broadinstitute.hellbender.tools.sv; import com.google.common.annotations.VisibleForTesting; +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.util.Locatable; import htsjdk.tribble.Feature; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.VariantContext; import org.apache.commons.math3.distribution.ChiSquaredDistribution; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; @@ -11,17 +15,25 @@ import org.broadinstitute.hellbender.engine.*; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.utils.MathUtils.RunningAverage; +import org.broadinstitute.hellbender.utils.Nucleotide; import org.broadinstitute.hellbender.utils.codecs.FeatureOutputCodec; import org.broadinstitute.hellbender.utils.codecs.FeatureOutputCodecFinder; import org.broadinstitute.hellbender.utils.codecs.FeatureSink; import java.util.ArrayList; import java.util.Arrays; +import java.util.Iterator; import java.util.List; /** - *

Merges locus-sorted LocusDepth evidence files, and calculates the bi-allelic frequency for each - * sample and site, and writes these values as a BafEvidence output file.

+ *

Merges locus-sorted LocusDepth evidence files, and calculates the bi-allelic frequency (baf) + * for each sample and site, and writes these values as a BafEvidence output file.

+ *

Samples at sites that have too few depth counts (controlled by --min-total-depth), and samples + * failing a Pearson's chi square test for goodness of fit to a bi-allelic model are excluded. (I.e., + * only samples that are judged to be heterozygous at a site are included.)

+ *

Across all evaluable samples, sites where the samples have bafs with too much variance + * (controlled by --max-std) are excluded. The median baf value at each site is adjusted to be + * exactly 0.5.

* *

Inputs

* @@ -57,11 +69,13 @@ @ExperimentalFeature public class LocusDepthtoBAF extends MultiFeatureWalker { public static final String LOCUS_DEPTH_FILE_NAME = "locus-depth"; + public static final String BAF_SITES_VCF_LONG_NAME = "baf-sites-vcf"; public static final String SAMPLE_NAMES_NAME = "sample-names"; public static final String BAF_EVIDENCE_FILE_NAME = "baf-evidence-output"; + public static final String BIALLELIC_ONLY_NAME = "biallelic-only"; public static final String MIN_TOTAL_DEPTH_NAME = "min-total-depth"; public static final String MAX_STD_NAME = "max-std"; - public static final String MIN_HET_LIKELIHOOD = "min-het-likelihood"; + public static final String MIN_HET_PROBABILITY = "min-het-probability"; public static final String COMPRESSION_LEVEL_NAME = "compression-level"; public static ChiSquaredDistribution chiSqDist = new ChiSquaredDistribution(null, 1.); @@ -71,7 +85,12 @@ public class LocusDepthtoBAF extends MultiFeatureWalker { shortName = StandardArgumentDefinitions.FEATURE_SHORT_NAME ) private List> locusDepthFiles; - @Argument(fullName = SAMPLE_NAMES_NAME, doc = "Sample names", optional = true) + @Argument(fullName = BAF_SITES_VCF_LONG_NAME, + doc = "Input VCF of SNPs marking loci for allele counts") + public GATKPath bafSitesFile; + + @Argument(fullName = SAMPLE_NAMES_NAME, + doc = "Sample names. Necessary only when writing a *.baf.bci output file.", optional = true) private List sampleNames; @Argument( @@ -80,6 +99,11 @@ public class LocusDepthtoBAF extends MultiFeatureWalker { shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME ) private GATKPath bafOutputFile; + @Argument( + doc = "Process only bi-allelic SNP sites", + fullName = BIALLELIC_ONLY_NAME, optional = true ) + private boolean biAllelicOnly = true; + @Argument( doc = "Maximum standard deviation across bafs at a locus (otherwise locus will be ignored)", fullName = MAX_STD_NAME, optional = true ) @@ -91,9 +115,9 @@ public class LocusDepthtoBAF extends MultiFeatureWalker { private int minTotalDepth = 10; @Argument( - doc = "Minimum likelihood of site being biallelic and heterozygous", - fullName = MIN_HET_LIKELIHOOD, optional = true ) - private double minHetLikelihood = .5; + doc = "Minimum probability of site being biallelic and heterozygous", + fullName = MIN_HET_PROBABILITY, optional = true ) + private double minHetProbability = .5; @Argument( doc = "Output compression level", @@ -101,13 +125,19 @@ public class LocusDepthtoBAF extends MultiFeatureWalker { minValue = 0, maxValue = 9, optional = true ) private int compressionLevel = 4; + private FeatureDataSource bafSitesSource; + private Iterator bafSitesIterator; private FeatureSink writer; - private final List sameLocusBuffer = new ArrayList<>(); + private final List sameLocusBuffer = new ArrayList<>(); @Override @SuppressWarnings("unchecked") public void onTraversalStart() { super.onTraversalStart(); + bafSitesSource = new FeatureDataSource<>(bafSitesFile.toPath().toString()); + bafSitesIterator = bafSitesSource.iterator(); + final SAMSequenceDictionary dict = getDictionary(); + dict.assertSameDictionary(bafSitesSource.getSequenceDictionary()); final FeatureOutputCodec> codec = FeatureOutputCodecFinder.find(bafOutputFile); if ( !codec.getFeatureType().isAssignableFrom(BafEvidence.class) ) { @@ -115,24 +145,17 @@ public void onTraversalStart() { "associated with the output file expects features of type " + codec.getFeatureType().getSimpleName()); } - writer = (FeatureSink)codec.makeSink(bafOutputFile, - getBestAvailableSequenceDictionary(), - sampleNames, compressionLevel); + writer = (FeatureSink)codec.makeSink(bafOutputFile, dict, sampleNames, + compressionLevel); } @Override public void apply( LocusDepth locusDepth, Object header, - ReadsContext readsContext, ReferenceContext referenceContext ) { - final double baf = calcBAF(locusDepth); - if ( baf > 0. ) { - final BafEvidence bafEvidence = - new BafEvidence(locusDepth.getSample(), locusDepth.getContig(), - locusDepth.getStart(), baf); - if ( !sameLocus(bafEvidence) ) { - processBuffer(); - } - sameLocusBuffer.add(bafEvidence); + ReadsContext reads, ReferenceContext ref ) { + if ( !sameLocus(locusDepth) ) { + processBuffer(); } + sameLocusBuffer.add(locusDepth); } @Override @@ -141,60 +164,107 @@ public Object onTraversalSuccess() { if ( !sameLocusBuffer.isEmpty() ) { processBuffer(); } + bafSitesSource.close(); writer.close(); return null; } - @VisibleForTesting double calcBAF( final LocusDepth locusDepth ) { + /** Performs a Pearson's chi square test for goodness of fit to a biallelic model to determine + * heterozygosity. If this fails, BafEvidence.MISSING_VALUE is returned. If the test succeeds, + * the depth of the alt call as a fraction of the total depth is returned. + */ + @VisibleForTesting double calcBAF( final LocusDepth locusDepth, + final int refIndex, final int altIndex ) { final int totalDepth = locusDepth.getTotalDepth(); if ( totalDepth < minTotalDepth ) { - return 0.; + return BafEvidence.MISSING_VALUE; } double expectRefAlt = totalDepth / 2.; - final double refDiff = locusDepth.getRefDepth() - expectRefAlt; - final double altDiff = locusDepth.getAltDepth() - expectRefAlt; + final double altDepth = locusDepth.getDepth(altIndex); + final double refDiff = locusDepth.getDepth(refIndex) - expectRefAlt; + final double altDiff = altDepth - expectRefAlt; double chiSq = (refDiff * refDiff + altDiff * altDiff) / expectRefAlt; double fitProb = 1. - chiSqDist.cumulativeProbability(chiSq); - if ( fitProb < minHetLikelihood ) { - return 0.; + if ( fitProb < minHetProbability ) { + return BafEvidence.MISSING_VALUE; } - return (double)locusDepth.getAltDepth() / totalDepth; + return altDepth / totalDepth; } - private boolean sameLocus( final BafEvidence bafEvidence ) { + private boolean sameLocus( final Locatable locus ) { if ( sameLocusBuffer.isEmpty() ) { return true; } - final BafEvidence curLocus = sameLocusBuffer.get(0); - return curLocus.getContig().equals(bafEvidence.getContig()) && - curLocus.getStart() == bafEvidence.getStart(); + final LocusDepth curLocus = sameLocusBuffer.get(0); + return curLocus.getContig().equals(locus.getContig()) && + curLocus.getStart() == locus.getStart(); } private void processBuffer() { - int nBafs = sameLocusBuffer.size(); + final VariantContext vc = nextSite(); + if ( !sameLocus(vc) ) { + final Locatable loc = sameLocusBuffer.get(0); + throw new UserException("expecting locus " + loc.getContig() + ":" + loc.getStart() + + ", but found locus " + vc.getContig() + ":" + vc.getStart() + " in baf sites vcf"); + } + final List alleles = vc.getAlleles(); + final int refIndex = Nucleotide.decode(alleles.get(0).getBases()[0]).ordinal(); + if ( refIndex > 3 ) { + throw new UserException("ref call is not [ACGT] in vcf at " + vc.getContig() + ":" + vc.getStart()); + } + final int nAlleles = alleles.size(); + final int[] altIndices = new int[nAlleles - 1]; + for ( int alleleIndex = 1; alleleIndex < nAlleles; ++alleleIndex ) { + final int altIndex = Nucleotide.decode(alleles.get(1).getBases()[0]).ordinal(); + if ( altIndex > 3 ) { + throw new UserException("alt call is not [ACGT] in vcf at " + vc.getContig() + ":" + vc.getStart()); + } + altIndices[alleleIndex - 1] = altIndex; + } + final List beList = new ArrayList<>(sameLocusBuffer.size()); + for ( final LocusDepth ld : sameLocusBuffer ) { + for ( final int altIndex : altIndices ) { + double baf = calcBAF(ld, refIndex, altIndex); + if ( baf != BafEvidence.MISSING_VALUE ) { + beList.add(new BafEvidence(ld.getSample(), ld.getContig(), ld.getStart(), baf)); + break; + } + } + } + int nBafs = beList.size(); if ( nBafs == 1 ) { - writer.write(new BafEvidence(sameLocusBuffer.get(0), .5)); + writer.write(new BafEvidence(beList.get(0), .5)); } else { final RunningAverage ra = new RunningAverage(); - for ( final BafEvidence bafEvidence : sameLocusBuffer ) { + for ( final BafEvidence bafEvidence : beList ) { ra.add(bafEvidence.getValue()); } final double stddev = ra.stddev(); if ( stddev <= maxStdDev ) { final double[] vals = new double[nBafs]; for ( int idx = 0; idx != nBafs; ++idx ) { - vals[idx] = sameLocusBuffer.get(idx).getValue(); + vals[idx] = beList.get(idx).getValue(); } Arrays.sort(vals); int midIdx = nBafs / 2; double median = (nBafs & 1) != 0 ? vals[midIdx] : (vals[midIdx] + vals[midIdx - 1])/2.; final double adj = .5 - median; - for ( final BafEvidence bafEvidence : sameLocusBuffer ) { + for ( final BafEvidence bafEvidence : beList ) { writer.write(new BafEvidence(bafEvidence, bafEvidence.getValue()+adj)); } } } sameLocusBuffer.clear(); } + + private VariantContext nextSite() { + while ( bafSitesIterator.hasNext() ) { + final VariantContext vc = bafSitesIterator.next(); + if ( vc.isSNP() && (!biAllelicOnly || vc.isBiallelic()) ) { + return vc; + } + } + throw new UserException("baf sites vcf exhausted before locus depth data"); + } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java index aed4ef8ef4b..c2ab98ae64a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java @@ -5,7 +5,6 @@ import htsjdk.samtools.CigarOperator; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.util.Locatable; -import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.BetaFeature; @@ -97,6 +96,7 @@ public class CollectSVEvidence extends ReadWalker { public static final String ALLELE_COUNT_OUTPUT_ARGUMENT_LONG_NAME = "allele-count-file"; public static final String ALLELE_COUNT_INPUT_ARGUMENT_SHORT_NAME = "F"; public static final String ALLELE_COUNT_INPUT_ARGUMENT_LONG_NAME = "allele-count-vcf"; + public static final String BIALLELIC_ONLY_NAME = "biallelic-only"; public static final String SAMPLE_NAME_ARGUMENT_LONG_NAME = "sample-name"; public static final String COMPRESSION_LEVEL_ARGUMENT_LONG_NAME = "compression-level"; @@ -122,6 +122,12 @@ public class CollectSVEvidence extends ReadWalker { optional = true) public GATKPath alleleCountInputFilename; + + @Argument( + doc = "Process only bi-allelic SNP sites for locus depth", + fullName = BIALLELIC_ONLY_NAME, optional = true ) + private boolean biAllelicOnly = true; + @Argument(fullName = "allele-count-min-mapq", doc = "minimum mapping quality for read to be allele-counted", optional = true) @@ -166,7 +172,7 @@ public void onTraversalStart() { if ( alleleCountInputFilename != null && alleleCountOutputFilename != null ) { alleleCounter = new AlleleCounter(sequenceDictionary, sampleName, compressionLevel, alleleCountInputFilename, alleleCountOutputFilename, - minMapQ, minQ); + biAllelicOnly, minMapQ, minQ); } else if ( alleleCountInputFilename != null ) { throw new UserException("Having specified an allele-count-vcf input, " + "you must also supply an allele-count-file for output."); @@ -558,6 +564,7 @@ final static class AlleleCounter { private final SAMSequenceDictionary dict; private final String sampleName; private final FeatureSink writer; + private final boolean biAllelicOnly; private final int minMapQ; private final int minQ; private final Iterator snpSourceItr; @@ -568,6 +575,7 @@ public AlleleCounter( final SAMSequenceDictionary dict, final int compressionLevel, final GATKPath inputPath, final GATKPath outputPath, + final boolean biAllelicOnly, final int minMapQ, final int minQ ) { this.dict = dict; @@ -586,6 +594,7 @@ public AlleleCounter( final SAMSequenceDictionary dict, } this.writer = codec.makeSink(outputPath, dict, sampleNames, compressionLevel); } + this.biAllelicOnly = biAllelicOnly; this.minMapQ = minMapQ; this.minQ = minQ; final FeatureDataSource snpSource = @@ -695,16 +704,13 @@ private boolean readNextLocus() { return false; } VariantContext snp = snpSourceItr.next(); - while ( !snp.isSNP() || !snp.isBiallelic() ) { + while ( !snp.isSNP() || (biAllelicOnly && !snp.isBiallelic()) ) { if ( !snpSourceItr.hasNext() ) { return false; } snp = snpSourceItr.next(); } - final List alleles = snp.getAlleles(); - final int refIndex = Nucleotide.decode(alleles.get(0).getBases()[0]).ordinal(); - final int altIndex = Nucleotide.decode(alleles.get(1).getBases()[0]).ordinal(); - final LocusDepth locusDepth = new LocusDepth(snp, sampleName, refIndex, altIndex); + final LocusDepth locusDepth = new LocusDepth(snp, sampleName); locusDepthQueue.add(locusDepth); return true; } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthBCICodec.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthBCICodec.java index d9b0a5271b0..4d69ce73708 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthBCICodec.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthBCICodec.java @@ -30,9 +30,7 @@ public LocusDepth decode( final Reader reader ) throws IOException { } final DataInputStream dis = reader.getStream(); return new LocusDepth(reader.getDictionary().getSequence(dis.readInt()).getSequenceName(), - dis.readInt(), - reader.getSampleNames().get(dis.readInt()), - dis.readByte(), dis.readByte(), + dis.readInt(), reader.getSampleNames().get(dis.readInt()), dis.readInt(), dis.readInt(), dis.readInt(), dis.readInt()); } @@ -68,8 +66,6 @@ public void encode( final LocusDepth locusDepth, final Writer writer dos.writeInt(writer.getContigIndex(locusDepth.getContig())); dos.writeInt(locusDepth.getStart()); dos.writeInt(writer.getSampleIndex(locusDepth.getSample())); - dos.writeByte(locusDepth.getRefIndex()); - dos.writeByte(locusDepth.getAltIndex()); dos.writeInt(locusDepth.getDepth(0)); dos.writeInt(locusDepth.getDepth(1)); dos.writeInt(locusDepth.getDepth(2)); diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthCodec.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthCodec.java index 42ad7483511..87ac356d48e 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthCodec.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthCodec.java @@ -30,7 +30,7 @@ public LocusDepthCodec() { @Override public LocusDepth decode( final String line ) { final List tokens = splitter.splitToList(line); - if ( tokens.size() != 9 ) { + if ( tokens.size() != 7 ) { throw new IllegalArgumentException("Invalid number of columns: " + tokens.size()); } return new LocusDepth(tokens.get(0), @@ -39,9 +39,7 @@ public LocusDepthCodec() { Integer.parseUnsignedInt(tokens.get(3)), Integer.parseUnsignedInt(tokens.get(4)), Integer.parseUnsignedInt(tokens.get(5)), - Integer.parseUnsignedInt(tokens.get(6)), - Integer.parseUnsignedInt(tokens.get(7)), - Integer.parseUnsignedInt(tokens.get(8))); + Integer.parseUnsignedInt(tokens.get(6))); } @Override public Object readActualHeader( LineIterator reader ) { return null; } @@ -87,8 +85,8 @@ public FeatureSink makeSortMerger( final GATKPath path, public static String encode( final LocusDepth locusDepth ) { return locusDepth.getContig() + "\t" + (locusDepth.getStart() - 1) + "\t" + - locusDepth.getSample() + "\t" + locusDepth.getRefIndex() + "\t" + - locusDepth.getAltIndex() + "\t" + locusDepth.getDepth(0) + "\t" + locusDepth.getDepth(1) + "\t" + + locusDepth.getSample() + "\t" + + locusDepth.getDepth(0) + "\t" + locusDepth.getDepth(1) + "\t" + locusDepth.getDepth(2) + "\t" + locusDepth.getDepth(3); } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/BafEvidenceUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/BafEvidenceUnitTest.java index 945b2adfd9c..0523f51ef64 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/BafEvidenceUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/BafEvidenceUnitTest.java @@ -73,4 +73,15 @@ public void testBinaryRoundTrip() { } Assert.assertEquals(recoveredBafs, bafs); } + + @Test + public void testValueAlteringConstructor() { + final BafEvidence bafEvidence = new BafEvidence("sample", "contig", 1234, .4); + final BafEvidence newValue = new BafEvidence(bafEvidence, .5); + Assert.assertEquals(newValue.getSample(), bafEvidence.getSample()); + Assert.assertEquals(newValue.getContig(), bafEvidence.getContig()); + Assert.assertEquals(newValue.getStart(), bafEvidence.getStart()); + Assert.assertEquals(bafEvidence.getValue(), .4); + Assert.assertEquals(newValue.getValue(), .5); + } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFIntegrationTest.java index 67c4d25f5d6..c437c0b8830 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFIntegrationTest.java @@ -3,6 +3,7 @@ import htsjdk.samtools.util.Log; import org.broadinstitute.hellbender.CommandLineProgramTest; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.broadinstitute.hellbender.testutils.IntegrationTestSpec; import org.testng.annotations.Test; @@ -19,8 +20,9 @@ public class LocusDepthtoBAFIntegrationTest extends CommandLineProgramTest { public void testStdDevTooBig() throws IOException { final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); argsBuilder.add(StandardArgumentDefinitions.VERBOSITY_NAME, Log.LogLevel.ERROR.name()); - argsBuilder.add(LocusDepthtoBAF.MIN_HET_LIKELIHOOD, "0."); + argsBuilder.add(LocusDepthtoBAF.MIN_HET_PROBABILITY, "0."); argsBuilder.add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, hg19_chr1_1M_dict); + argsBuilder.add(LocusDepthtoBAF.BAF_SITES_VCF_LONG_NAME, ld2bafTestDir + "test1.vcf"); argsBuilder.add(StandardArgumentDefinitions.FEATURE_SHORT_NAME, ld2bafTestDir + "test1.ld.txt"); argsBuilder.add(StandardArgumentDefinitions.OUTPUT_SHORT_NAME, "%s"); final IntegrationTestSpec testSpec = @@ -34,7 +36,8 @@ public void testStdDevTooBig() throws IOException { public void testAOK() throws IOException { final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); argsBuilder.add(StandardArgumentDefinitions.VERBOSITY_NAME, Log.LogLevel.ERROR.name()); - argsBuilder.add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, hg19_chr1_1M_dict); + argsBuilder.add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, publicTestDir + "hg19micro.dict"); + argsBuilder.add(LocusDepthtoBAF.BAF_SITES_VCF_LONG_NAME, ld2bafTestDir + "test2.vcf"); argsBuilder.add(StandardArgumentDefinitions.FEATURE_SHORT_NAME, ld2bafTestDir + "test2.ld.txt"); argsBuilder.add(StandardArgumentDefinitions.OUTPUT_SHORT_NAME, "%s"); final IntegrationTestSpec testSpec = @@ -44,11 +47,26 @@ public void testAOK() throws IOException { testSpec.executeTest("test2: a-ok", this); } + @Test + public void testSitesMismatch() throws IOException { + final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); + argsBuilder.add(StandardArgumentDefinitions.VERBOSITY_NAME, Log.LogLevel.ERROR.name()); + argsBuilder.add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, publicTestDir + "hg19micro.dict"); + argsBuilder.add(LocusDepthtoBAF.BAF_SITES_VCF_LONG_NAME, ld2bafTestDir + "test2a.vcf"); + argsBuilder.add(StandardArgumentDefinitions.FEATURE_SHORT_NAME, ld2bafTestDir + "test2.ld.txt"); + argsBuilder.add(StandardArgumentDefinitions.OUTPUT_SHORT_NAME, "%s"); + final IntegrationTestSpec testSpec = + new IntegrationTestSpec(argsBuilder.getString(), 1, UserException.class); + testSpec.setOutputFileExtension("baf.txt"); + testSpec.executeTest("test2: a-ok", this); + } + @Test public void testAdjustMedian() throws IOException { final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); argsBuilder.add(StandardArgumentDefinitions.VERBOSITY_NAME, Log.LogLevel.ERROR.name()); argsBuilder.add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, hg19_chr1_1M_dict); + argsBuilder.add(LocusDepthtoBAF.BAF_SITES_VCF_LONG_NAME, ld2bafTestDir + "test3.vcf"); argsBuilder.add(StandardArgumentDefinitions.FEATURE_SHORT_NAME, ld2bafTestDir + "test3.ld.txt"); argsBuilder.add(StandardArgumentDefinitions.OUTPUT_SHORT_NAME, "%s"); final IntegrationTestSpec testSpec = diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFUnitTest.java index 53f1ba5957c..b2c4bcc737d 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFUnitTest.java @@ -13,12 +13,12 @@ public void testBAFCalc() { final int altIndex = 1; final String smpl = "s"; // too shallow - Assert.assertEquals(instance.calcBAF(new LocusDepth(tig, pos, smpl, refIndex, altIndex, 4, 4, 0, 0)), 0.); + Assert.assertEquals(instance.calcBAF(new LocusDepth(tig, pos, smpl, 4, 4, 0, 0), refIndex, altIndex), BafEvidence.MISSING_VALUE); // just deep enough - Assert.assertEquals(instance.calcBAF(new LocusDepth(tig, pos, smpl, refIndex, altIndex, 5, 5, 0, 0)), .5); + Assert.assertEquals(instance.calcBAF(new LocusDepth(tig, pos, smpl, 5, 5, 0, 0), refIndex, altIndex), .5); // too unequal - Assert.assertEquals(instance.calcBAF(new LocusDepth(tig, pos, smpl, refIndex, altIndex, 7, 3, 0, 0)), 0.); + Assert.assertEquals(instance.calcBAF(new LocusDepth(tig, pos, smpl, 7, 3, 0, 0), refIndex, altIndex), BafEvidence.MISSING_VALUE); // sufficiently equal - Assert.assertEquals(instance.calcBAF(new LocusDepth(tig, pos, smpl, refIndex, altIndex, 6, 4, 0, 0)), .4); + Assert.assertEquals(instance.calcBAF(new LocusDepth(tig, pos, smpl, 6, 4, 0, 0), refIndex, altIndex), .4); } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/PrintSVEvidenceIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/PrintSVEvidenceIntegrationTest.java index 16c436068a4..0e425c70405 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/PrintSVEvidenceIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/PrintSVEvidenceIntegrationTest.java @@ -161,8 +161,8 @@ public void testDepthUniquenessCriterion( int val1, int val2 ) { public void testLocusDepthViolateUniquenessCriterion() { final DummyFeatureSink sink = new DummyFeatureSink<>(); final LocusDepthSortMerger sortMerger = new LocusDepthSortMerger(dict, sink); - sortMerger.write(new LocusDepth("chr1", 1, "sample", 0, 1, 1, 0, 0, 0)); - sortMerger.write(new LocusDepth("chr1", 1, "sample", 0, 1, 0, 1, 0, 0)); + sortMerger.write(new LocusDepth("chr1", 1, "sample", 1, 0, 0, 0)); + sortMerger.write(new LocusDepth("chr1", 1, "sample", 0, 1, 0, 0)); sortMerger.close(); Assert.assertEquals(sink.nRecsWritten, 0); } @@ -171,8 +171,8 @@ public void testLocusDepthViolateUniquenessCriterion() { public void testLocusDepthUniquenessCriterion() { final DummyFeatureSink sink = new DummyFeatureSink<>(); final LocusDepthSortMerger sortMerger = new LocusDepthSortMerger(dict, sink); - sortMerger.write(new LocusDepth("chr1", 1, "sample1", 0, 1, 1, 0, 0, 0)); - sortMerger.write(new LocusDepth("chr1", 1, "sample2", 0, 1, 0, 1, 0, 0)); + sortMerger.write(new LocusDepth("chr1", 1, "sample1", 1, 0, 0, 0)); + sortMerger.write(new LocusDepth("chr1", 1, "sample2", 0, 1, 0, 0)); sortMerger.close(); Assert.assertEquals(sink.nRecsWritten, 2); } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.ld.txt b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.ld.txt index 39a4909bbf5..1dcdbc6a01a 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.ld.txt +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.ld.txt @@ -1,2 +1,2 @@ -1 1000 smpl1 0 1 3 7 0 0 -1 1000 smpl2 0 1 7 3 0 0 +1 1000 smpl1 3 7 0 0 +1 1000 smpl2 7 3 0 0 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.vcf new file mode 100644 index 00000000000..43f4c1b5b83 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.vcf @@ -0,0 +1,4 @@ +##fileformat=VCFv4.2 +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +1 1001 snp1 A C . . . diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.baf.txt b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.baf.txt index 0e8759b815a..20d6e23b241 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.baf.txt +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.baf.txt @@ -1,3 +1,6 @@ 1 1000 0.6 smpl1 1 1000 0.5 smpl2 1 1000 0.4 smpl3 +2 2000 0.6 smpl1 +2 2000 0.5 smpl2 +2 2000 0.4 smpl3 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.ld.txt b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.ld.txt index ebf91c3c71d..e9db3347d77 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.ld.txt +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.ld.txt @@ -1,3 +1,6 @@ -1 1000 smpl1 0 3 4 0 0 6 -1 1000 smpl2 0 3 5 0 0 5 -1 1000 smpl3 0 3 6 0 0 4 +1 1000 smpl1 4 0 0 6 +1 1000 smpl2 5 0 0 5 +1 1000 smpl3 6 0 0 4 +2 2000 smpl1 4 0 0 6 +2 2000 smpl2 5 0 0 5 +2 2000 smpl3 6 0 0 4 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.vcf new file mode 100644 index 00000000000..501d6a72e56 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.vcf @@ -0,0 +1,6 @@ +##fileformat=VCFv4.2 +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +1 1001 snp1 A T . . . +2 2001 snp2 A T . . . diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2a.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2a.vcf new file mode 100644 index 00000000000..4e8dead4c1b --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2a.vcf @@ -0,0 +1,6 @@ +##fileformat=VCFv4.2 +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +1 1001 snp1 A T . . . +2 9999 snp2 A T . . . diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.ld.txt b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.ld.txt index 7e4122c7d19..24d1ae5eb0a 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.ld.txt +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.ld.txt @@ -1,2 +1,2 @@ -1 1000 smpl1 1 3 0 4 0 6 -1 1000 smpl2 1 3 0 4 0 6 +1 1000 smpl1 0 4 0 6 +1 1000 smpl2 0 4 0 6 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.vcf new file mode 100644 index 00000000000..8f657040a1d --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.vcf @@ -0,0 +1,4 @@ +##fileformat=VCFv4.2 +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +1 1001 snp1 C T . . . From 964de7aa41398c89a946911eee8c233ff2cc9ea9 Mon Sep 17 00:00:00 2001 From: Ted Sharpe Date: Thu, 28 Apr 2022 15:01:01 -0400 Subject: [PATCH 4/4] prohibit multi-allelic sites --- .../hellbender/tools/sv/LocusDepthtoBAF.java | 44 +++++++------------ .../tools/walkers/sv/CollectSVEvidence.java | 19 +++----- 2 files changed, 20 insertions(+), 43 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java index 23b4305a8db..3a740c40cd6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java @@ -72,7 +72,6 @@ public class LocusDepthtoBAF extends MultiFeatureWalker { public static final String BAF_SITES_VCF_LONG_NAME = "baf-sites-vcf"; public static final String SAMPLE_NAMES_NAME = "sample-names"; public static final String BAF_EVIDENCE_FILE_NAME = "baf-evidence-output"; - public static final String BIALLELIC_ONLY_NAME = "biallelic-only"; public static final String MIN_TOTAL_DEPTH_NAME = "min-total-depth"; public static final String MAX_STD_NAME = "max-std"; public static final String MIN_HET_PROBABILITY = "min-het-probability"; @@ -99,11 +98,6 @@ public class LocusDepthtoBAF extends MultiFeatureWalker { shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME ) private GATKPath bafOutputFile; - @Argument( - doc = "Process only bi-allelic SNP sites", - fullName = BIALLELIC_ONLY_NAME, optional = true ) - private boolean biAllelicOnly = true; - @Argument( doc = "Maximum standard deviation across bafs at a locus (otherwise locus will be ignored)", fullName = MAX_STD_NAME, optional = true ) @@ -150,8 +144,8 @@ public void onTraversalStart() { } @Override - public void apply( LocusDepth locusDepth, Object header, - ReadsContext reads, ReferenceContext ref ) { + public void apply( final LocusDepth locusDepth, final Object header, + final ReadsContext reads, final ReferenceContext ref ) { if ( !sameLocus(locusDepth) ) { processBuffer(); } @@ -179,12 +173,12 @@ public Object onTraversalSuccess() { if ( totalDepth < minTotalDepth ) { return BafEvidence.MISSING_VALUE; } - double expectRefAlt = totalDepth / 2.; + final double expectRefAlt = totalDepth / 2.; final double altDepth = locusDepth.getDepth(altIndex); final double refDiff = locusDepth.getDepth(refIndex) - expectRefAlt; final double altDiff = altDepth - expectRefAlt; - double chiSq = (refDiff * refDiff + altDiff * altDiff) / expectRefAlt; - double fitProb = 1. - chiSqDist.cumulativeProbability(chiSq); + final double chiSq = (refDiff * refDiff + altDiff * altDiff) / expectRefAlt; + final double fitProb = 1. - chiSqDist.cumulativeProbability(chiSq); if ( fitProb < minHetProbability ) { return BafEvidence.MISSING_VALUE; } @@ -212,26 +206,18 @@ private void processBuffer() { if ( refIndex > 3 ) { throw new UserException("ref call is not [ACGT] in vcf at " + vc.getContig() + ":" + vc.getStart()); } - final int nAlleles = alleles.size(); - final int[] altIndices = new int[nAlleles - 1]; - for ( int alleleIndex = 1; alleleIndex < nAlleles; ++alleleIndex ) { - final int altIndex = Nucleotide.decode(alleles.get(1).getBases()[0]).ordinal(); - if ( altIndex > 3 ) { - throw new UserException("alt call is not [ACGT] in vcf at " + vc.getContig() + ":" + vc.getStart()); - } - altIndices[alleleIndex - 1] = altIndex; + final int altIndex = Nucleotide.decode(alleles.get(1).getBases()[0]).ordinal(); + if ( altIndex > 3 ) { + throw new UserException("alt call is not [ACGT] in vcf at " + vc.getContig() + ":" + vc.getStart()); } final List beList = new ArrayList<>(sameLocusBuffer.size()); for ( final LocusDepth ld : sameLocusBuffer ) { - for ( final int altIndex : altIndices ) { - double baf = calcBAF(ld, refIndex, altIndex); - if ( baf != BafEvidence.MISSING_VALUE ) { - beList.add(new BafEvidence(ld.getSample(), ld.getContig(), ld.getStart(), baf)); - break; - } + final double baf = calcBAF(ld, refIndex, altIndex); + if ( baf != BafEvidence.MISSING_VALUE ) { + beList.add(new BafEvidence(ld.getSample(), ld.getContig(), ld.getStart(), baf)); } } - int nBafs = beList.size(); + final int nBafs = beList.size(); if ( nBafs == 1 ) { writer.write(new BafEvidence(beList.get(0), .5)); } else { @@ -246,8 +232,8 @@ private void processBuffer() { vals[idx] = beList.get(idx).getValue(); } Arrays.sort(vals); - int midIdx = nBafs / 2; - double median = + final int midIdx = nBafs / 2; + final double median = (nBafs & 1) != 0 ? vals[midIdx] : (vals[midIdx] + vals[midIdx - 1])/2.; final double adj = .5 - median; for ( final BafEvidence bafEvidence : beList ) { @@ -261,7 +247,7 @@ private void processBuffer() { private VariantContext nextSite() { while ( bafSitesIterator.hasNext() ) { final VariantContext vc = bafSitesIterator.next(); - if ( vc.isSNP() && (!biAllelicOnly || vc.isBiallelic()) ) { + if ( vc.isSNP() && vc.isBiallelic() ) { return vc; } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java index c2ab98ae64a..096c4554edb 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java @@ -96,7 +96,6 @@ public class CollectSVEvidence extends ReadWalker { public static final String ALLELE_COUNT_OUTPUT_ARGUMENT_LONG_NAME = "allele-count-file"; public static final String ALLELE_COUNT_INPUT_ARGUMENT_SHORT_NAME = "F"; public static final String ALLELE_COUNT_INPUT_ARGUMENT_LONG_NAME = "allele-count-vcf"; - public static final String BIALLELIC_ONLY_NAME = "biallelic-only"; public static final String SAMPLE_NAME_ARGUMENT_LONG_NAME = "sample-name"; public static final String COMPRESSION_LEVEL_ARGUMENT_LONG_NAME = "compression-level"; @@ -123,11 +122,6 @@ public class CollectSVEvidence extends ReadWalker { public GATKPath alleleCountInputFilename; - @Argument( - doc = "Process only bi-allelic SNP sites for locus depth", - fullName = BIALLELIC_ONLY_NAME, optional = true ) - private boolean biAllelicOnly = true; - @Argument(fullName = "allele-count-min-mapq", doc = "minimum mapping quality for read to be allele-counted", optional = true) @@ -172,7 +166,7 @@ public void onTraversalStart() { if ( alleleCountInputFilename != null && alleleCountOutputFilename != null ) { alleleCounter = new AlleleCounter(sequenceDictionary, sampleName, compressionLevel, alleleCountInputFilename, alleleCountOutputFilename, - biAllelicOnly, minMapQ, minQ); + minMapQ, minQ); } else if ( alleleCountInputFilename != null ) { throw new UserException("Having specified an allele-count-vcf input, " + "you must also supply an allele-count-file for output."); @@ -330,7 +324,7 @@ private void flushSplitCounts(final Predicate flushablePosition, final FeatureSink srWriter) { while (splitCounts.size() > 0 && flushablePosition.test(splitCounts.peek())) { - SplitPos pos = splitCounts.poll(); + final SplitPos pos = splitCounts.poll(); int countAtPos = 1; while (splitCounts.size() > 0 && splitCounts.peek().equals(pos)) { countAtPos++; @@ -341,7 +335,7 @@ private void flushSplitCounts(final Predicate flushablePosition, } } - private SplitPos getSplitPosition(GATKRead read) { + private SplitPos getSplitPosition( final GATKRead read ) { if (read.getCigar().getFirstCigarElement().getOperator() == CigarOperator.M) { final int matchLength = read.getCigar().getCigarElements().stream().filter(e -> e.getOperator().consumesReferenceBases()).mapToInt(CigarElement::getLength).sum(); return new SplitPos(read.getStart() + matchLength, POSITION.RIGHT); @@ -352,7 +346,7 @@ private SplitPos getSplitPosition(GATKRead read) { return new SplitPos(-1, POSITION.MIDDLE); } - private boolean isSoftClipped(final GATKRead read) { + private boolean isSoftClipped( final GATKRead read ) { final CigarOperator firstOperator = read.getCigar().getFirstCigarElement().getOperator(); final CigarOperator lastOperator = read.getCigar().getLastCigarElement().getOperator(); return (firstOperator == CigarOperator.SOFT_CLIP && lastOperator != CigarOperator.SOFT_CLIP) || @@ -564,7 +558,6 @@ final static class AlleleCounter { private final SAMSequenceDictionary dict; private final String sampleName; private final FeatureSink writer; - private final boolean biAllelicOnly; private final int minMapQ; private final int minQ; private final Iterator snpSourceItr; @@ -575,7 +568,6 @@ public AlleleCounter( final SAMSequenceDictionary dict, final int compressionLevel, final GATKPath inputPath, final GATKPath outputPath, - final boolean biAllelicOnly, final int minMapQ, final int minQ ) { this.dict = dict; @@ -594,7 +586,6 @@ public AlleleCounter( final SAMSequenceDictionary dict, } this.writer = codec.makeSink(outputPath, dict, sampleNames, compressionLevel); } - this.biAllelicOnly = biAllelicOnly; this.minMapQ = minMapQ; this.minQ = minQ; final FeatureDataSource snpSource = @@ -704,7 +695,7 @@ private boolean readNextLocus() { return false; } VariantContext snp = snpSourceItr.next(); - while ( !snp.isSNP() || (biAllelicOnly && !snp.isBiallelic()) ) { + while ( !snp.isSNP() || !snp.isBiallelic() ) { if ( !snpSourceItr.hasNext() ) { return false; }