Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Assembly-based realignment filter
Browse files Browse the repository at this point in the history
davidbenjamin committed Sep 6, 2019
1 parent 3b4278d commit 3b1c64a
Showing 10 changed files with 365 additions and 241 deletions.
Original file line number Diff line number Diff line change
@@ -7,9 +7,11 @@
import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;

import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;


/**
@@ -24,11 +26,19 @@
*/
public abstract class MultiVariantWalkerGroupedOnStart extends MultiVariantWalker {
private List<VariantContext> currentVariants = new ArrayList<>();
private ReferenceContext spanningReferenceContext;
private int firstCurrentVariantStart;
private int lastCurrentVariantStart;
private List<ReadsContext> currentReadsContexts = new ArrayList<>();
private OverlapDetector<SimpleInterval> overlapDetector;

public static final String IGNORE_VARIANTS_THAT_START_OUTSIDE_INTERVAL = "ignore-variants-starting-outside-interval";

public static final String COMBINE_VARIANTS_DISTANCE = "combine-variants-distance";

public static final String MAX_COMBINED_DISTANCE = "max-distance";

public static final String REFERENCE_WINDOW_PADDING = "ref-padding";

/**
* this option has no effect unless intervals are specified.
* <p>
@@ -40,6 +50,33 @@ public abstract class MultiVariantWalkerGroupedOnStart extends MultiVariantWalke
optional = true)
private boolean ignoreIntervalsOutsideStart = false;

@Advanced
@Argument(fullName = COMBINE_VARIANTS_DISTANCE, doc = "Maximum distance for variants to be grouped together", optional = true)
protected int distanceToCombineVariants = defaultDistanceToGroupVariants();

@Advanced
@Argument(fullName = MAX_COMBINED_DISTANCE, doc = "Maximum distance for variants to be grouped together", optional = true)
protected int maxCombinedDistance = defaultMaxGroupedSpan();

@Advanced
@Argument(fullName = REFERENCE_WINDOW_PADDING, doc = "Number of bases on either side to expand spanning reference window", optional = true)
protected int referenceWindowPadding = defaultReferenceWindowPadding();

// override to group variants that start nearby but not at the same locus
protected int defaultDistanceToGroupVariants() {
return 0;
}

// override to change reference padding
protected int defaultReferenceWindowPadding() {
return 1;
}

// override to cap the size span of combined variants
protected int defaultMaxGroupedSpan() {
return Integer.MAX_VALUE;
}

@Override
public boolean requiresReference() {
return true;
@@ -61,20 +98,20 @@ public final void apply(VariantContext variant, ReadsContext readsContext, Refer

// Collecting all the reads that start at a particular base into one.
if (currentVariants.isEmpty()) {
currentVariants.add(variant);
firstCurrentVariantStart = variant.getStart();
} else if (!currentVariants.get(0).contigsMatch(variant)
|| currentVariants.get(0).getStart() < variant.getStart()) {
|| lastCurrentVariantStart < variant.getStart() - distanceToCombineVariants
|| firstCurrentVariantStart < variant.getStart() - maxCombinedDistance) {
// Emptying any sites which should emit a new VC since the last one
apply(new ArrayList<>(currentVariants), spanningReferenceContext);
apply(new ArrayList<>(currentVariants), currentReadsContexts);
currentVariants.clear();
currentVariants.add(variant);
} else {
currentVariants.add(variant);
}
if (referenceContext.hasBackingDataSource()){
referenceContext.setWindow(1, 1);
currentReadsContexts.clear();
firstCurrentVariantStart = variant.getStart();
}
spanningReferenceContext = getExpandedReferenceContext(currentVariants, spanningReferenceContext, referenceContext);

currentVariants.add(variant);
currentReadsContexts.add(readsContext);
lastCurrentVariantStart = variant.getStart();
}

/**
@@ -83,28 +120,32 @@ public final void apply(VariantContext variant, ReadsContext readsContext, Refer
* This is the primary traversal for any MultiVariantWalkerGroupedOnStart walkers. Will traverse over input variant contexts
* and call #apply() exactly once for each unique reference start position. All variants starting at each locus
* across source files will be grouped and passed as a list of VariantContext objects.
*
* @param variantContexts VariantContexts from driving variants with matching start positon
* @param variantContexts VariantContexts from driving variants with matching start positon
* NOTE: This will never be empty
* @param referenceContext ReferenceContext object covering the reference of the longest spanning VariantContext
* with one base on either end as window padding.
* @param readsContexts
*/
public abstract void apply(List<VariantContext> variantContexts, ReferenceContext referenceContext);
public abstract void apply(List<VariantContext> variantContexts, ReferenceContext referenceContext, final List<ReadsContext> readsContexts);

public void apply(List<VariantContext> variantContexts, final List<ReadsContext> readsContexts) {
apply(variantContexts, makeSpanningReferenceContext(variantContexts, referenceWindowPadding), readsContexts);
}

/**
* Helper method that ensures the reference context it returns is adequate to span the length of all the accumulated
* VariantContexts. It makes the assumption that all variant contexts in currentVariants start at the same location and
* that currentReferenceContext corresponds to the correct span for the longest variantContexts the first N-1 VariantContexts
* and that newReferenceContext corresponds to the correct span for the Nth item.
* VariantContexts. It assumes that all variant contexts in currentVariants have the same contig.
*/
@VisibleForTesting
final static ReferenceContext getExpandedReferenceContext(List<VariantContext> currentVariants,
ReferenceContext currentReferenceContext,
ReferenceContext newReferenceContext) {
if ((currentReferenceContext==null)||(currentVariants.size() == 1 || newReferenceContext.getWindow().getEnd() > currentReferenceContext.getWindow().getEnd())) {
return newReferenceContext;
}
return currentReferenceContext;
private ReferenceContext makeSpanningReferenceContext(final List<VariantContext> variantContexts, final int referenceWindowPadding) {
Utils.nonEmpty(variantContexts, "Must have at least one current variant context");
final List<String> contigs = variantContexts.stream().map(VariantContext::getContig).distinct().collect(Collectors.toList());
Utils.validate(contigs.size() == 1, "variant contexts should all have the same contig");
final int minStart = variantContexts.stream().mapToInt(VariantContext::getStart).min().getAsInt();
final int maxEnd = variantContexts.stream().mapToInt(VariantContext::getEnd).max().getAsInt();
final SimpleInterval combinedInterval = new SimpleInterval(contigs.get(0), minStart, maxEnd);

final ReferenceContext combinedReferenceContext = new ReferenceContext(reference, combinedInterval);
combinedReferenceContext.setWindow(referenceWindowPadding,referenceWindowPadding);
return combinedReferenceContext;
}

/**
@@ -147,7 +188,7 @@ private void afterTraverse() {
logger.warn("Error: The requested interval contained no data in source VCF files");

} else {
apply(currentVariants, spanningReferenceContext);
apply(currentVariants, currentReadsContexts);
}
}
}
Original file line number Diff line number Diff line change
@@ -17,6 +17,7 @@
import org.broadinstitute.hellbender.cmdline.argumentcollections.DbsnpArgumentCollection;
import org.broadinstitute.hellbender.cmdline.programgroups.ShortVariantDiscoveryProgramGroup;
import org.broadinstitute.hellbender.engine.MultiVariantWalkerGroupedOnStart;
import org.broadinstitute.hellbender.engine.ReadsContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.walkers.annotator.Annotation;
@@ -140,7 +141,7 @@ public List<Class<? extends Annotation>> getDefaultVariantAnnotationGroups() {
private ReferenceContext storedReferenceContext;

@Override
public void apply(List<VariantContext> variantContexts, ReferenceContext referenceContext) {
public void apply(List<VariantContext> variantContexts, ReferenceContext referenceContext, final List<ReadsContext> readsContexts) {
// Check that the input variant contexts do not contain MNPs as these may not be properly merged
for (final VariantContext ctx : variantContexts) {
if (GATKVariantContextUtils.isUnmixedMnpIgnoringNonRef(ctx)) {

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -8,22 +8,16 @@ public class RealignmentArgumentCollection {
public static final double DEFAULT_DROP_RATIO = 0.2;
public static final double DEFAULT_SEED_SPLIT_FACTOR = 0.5;
public static final int DEFAULT_MAX_REASONABLE_FRAGMENT_LENGTH = 100000;
public static final int DEFAULT_MIN_ALIGNER_SCORE_DIFFERENCE = 20;
public static final double DEFAULT_MIN_MISMATCH_RATIO = 2.5;
public static final int DEFAULT_NUM_REGULAR_CONTIGS = 25;
public static final double DEFAULT_MIN_ALIGNER_SCORE_DIFFERENCE_PER_BASE = 0.2;
public static final double DEFAULT_MIN_MISMATCH_DIFFERENCE_PER_BASE = 0.02;
public static final int DEFAULT_NUM_REGULAR_CONTIGS = 250000;

/**
* BWA-mem index image created by {@link BwaMemIndexImageCreator}
*/
@Argument(fullName = "bwa-mem-index-image", shortName = "index", doc = "BWA-mem index image")
public String bwaMemIndexImage;

/**
* Turn off the default mate-aware realignment
*/
@Argument(fullName = "dont-use-mates", doc = "Realign individual reads without using their mates", optional = true)
public boolean dontUseMates = false;

/**
* Maximum fragment length to be considered a reasonable pair alignment
*/
@@ -33,14 +27,14 @@ public class RealignmentArgumentCollection {
/**
* Minimum difference between best and second-best alignment for a read to be considered well-mapped
*/
@Argument(fullName = "min-aligner-score-difference", doc = "Minimum difference between best and second-best alignment for a read to be considered well-mapped", optional = true)
public int minAlignerScoreDifference = DEFAULT_MIN_ALIGNER_SCORE_DIFFERENCE;
@Argument(fullName = "min-aligner-score-difference-per-base", doc = "Minimum difference between best and second-best alignment for a read to be considered well-mapped", optional = true)
public double minAlignerScoreDifferencePerBase = DEFAULT_MIN_ALIGNER_SCORE_DIFFERENCE_PER_BASE;

/**
* Minimum ratio between the number of mismatches in the second best and best alignments
*/
@Argument(fullName = "min-mismatch-ratio", doc = "Minimum ratio between the number of mismatches in the second best and best alignments", optional = true)
public double minMismatchRatio = DEFAULT_MIN_MISMATCH_RATIO;
@Argument(fullName = "min-mismatch-difference-per-base", doc = "Minimum ratio between the number of mismatches in the second best and best alignments", optional = true)
public double minMismatchDifferencePerBase = DEFAULT_MIN_MISMATCH_DIFFERENCE_PER_BASE;

/**
* Number of regular i.e. non-alt contigs
Original file line number Diff line number Diff line change
@@ -1,38 +1,31 @@
package org.broadinstitute.hellbender.tools.walkers.realignmentfilter;


import com.google.common.annotations.VisibleForTesting;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMFlag;
import htsjdk.samtools.util.OverlapDetector;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.math3.util.Pair;
import org.broadinstitute.hellbender.tools.spark.sv.utils.Strand;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.bwa.BwaMemAligner;
import org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment;
import org.broadinstitute.hellbender.utils.bwa.BwaMemIndex;
import org.broadinstitute.hellbender.utils.read.AlignmentUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.ReadUtils;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.*;
import java.util.stream.Collectors;

public class RealignmentEngine {
private final BwaMemAligner aligner;
private final int maxReasonableFragmentLength;
private final int minAlignerScoreDifference;
private final double minMismatchRatio;
private final int numberOfRegularContigs;

public RealignmentEngine(final RealignmentArgumentCollection rfac) {
final BwaMemIndex index = new BwaMemIndex(rfac.bwaMemIndexImage);
maxReasonableFragmentLength = rfac.maxReasonableFragmentLength;
minAlignerScoreDifference = rfac.minAlignerScoreDifference;
minMismatchRatio = rfac.minMismatchRatio;
numberOfRegularContigs = rfac.numRegularContigs;
aligner = new BwaMemAligner(index);
aligner.setMinSeedLengthOption(rfac.minSeedLength);
@@ -83,57 +76,67 @@ private final static boolean mightSupportInsertion(final CigarElement cigarEleme
return cigarOperator == CigarOperator.I || cigarOperator == CigarOperator.S;
}

public RealignmentResult realign(final GATKRead read) {
final List<BwaMemAlignment> alignments = aligner.alignSeqs(Arrays.asList(read), GATKRead::getBasesNoCopy).get(0);
public List<BwaMemAlignment> realign(final byte[] sequence) {
final List<BwaMemAlignment> alignments = aligner.alignSeqs(Arrays.asList(sequence)).get(0).stream()
.filter(a -> a.getRefId() >= 0)
.collect(Collectors.toList());

final List<BwaMemAlignment> nonAltAlignments = alignments.size() == 1 ? alignments :
return alignments.size() == 1 ? alignments :
alignments.stream().filter(a -> a.getRefId() < numberOfRegularContigs).collect(Collectors.toList());
return checkAlignments(nonAltAlignments, minAlignerScoreDifference, minMismatchRatio);
}

@VisibleForTesting
static RealignmentResult checkAlignments(final List<BwaMemAlignment> alignments, final int minAlignerScoreDifference, final double minMismatchRatio) {
if (alignments.isEmpty()) {
return new RealignmentResult(false, Collections.emptyList());
} else if (alignments.get(0).getRefId() < 0) {
return new RealignmentResult(false, alignments);
} else if (alignments.size() == 1) {
return new RealignmentResult(true, alignments);
} else {
final int scoreDifference = alignments.get(0).getAlignerScore() - alignments.get(1).getAlignerScore();
final double mismatchRatio = (double) alignments.get(1).getNMismatches() / (Math.max(alignments.get(0).getNMismatches(),0.0001));
return new RealignmentResult(scoreDifference >= minAlignerScoreDifference && mismatchRatio > minMismatchRatio, alignments);
/**
* Find all common alignments of a set of unitigs. That is, find all alignments in which each unitig aligns within a small distance of the others
* @param allUnitigAlignments for each unitig, a list of possible alignments
* @param maxReasonableFragmentLength
* @return
*/
public static List<List<BwaMemAlignment>> findJointAlignments(List<List<BwaMemAlignment>> allUnitigAlignments, int maxReasonableFragmentLength) {
if (allUnitigAlignments.isEmpty()) {
return Collections.emptyList();
} else if (allUnitigAlignments.size() == 1) {
return allUnitigAlignments.get(0).stream().map(Collections::singletonList).collect(Collectors.toList());
}

// TODO: we need to check that contig is the same or equivalent up to hg38 alt contig
// TODO: do this by looking at index.getReferenceContigNames() and bamHeader.getSequenceDictionary().getSequences()
// TODO: in IDE and seeing what the correspondence could be
// TODO: put in check that start position is within eg 10 Mb of original mapping
}
// it's more efficient to start from the unitig with the fewest alignments
Collections.sort(allUnitigAlignments, Comparator.comparingInt(List::size));

// note that we realigned the original aligned bases of the read and mate, not the raw sequenced bases
// thus one of them has been reverse-complemented and we look for pairs that map to the same strand
public static List<Pair<BwaMemAlignment, BwaMemAlignment>> findPlausiblePairs(List<BwaMemAlignment> readRealignments, List<BwaMemAlignment> mateRealignments, int maxReasonableFragmentLength) {
return readRealignments.stream()
.flatMap(r -> mateRealignments.stream()
.filter(m -> m.getRefId() == r.getRefId())
.filter(m -> Math.abs(m.getRefStart() - r.getRefStart()) < maxReasonableFragmentLength)
.filter(m -> SAMFlag.READ_REVERSE_STRAND.isSet(m.getSamFlag()) == SAMFlag.READ_REVERSE_STRAND.isSet(r.getSamFlag()))
.map(m -> new Pair<>(r,m)))
.collect(Collectors.toList());
}
// make an OverlapDetector for each unitig with padding given by {@code maxReasonableFragmentLength}
final List<OverlapDetector<BwaMemAlignment>> overlapDetectors = new ArrayList<>();
for (final List<BwaMemAlignment> unitigAlignments : allUnitigAlignments) {
final List<SimpleInterval> intervals = unitigAlignments.stream().map(RealignmentEngine::convertToInterval).collect(Collectors.toList());
final OverlapDetector<BwaMemAlignment> overlapDetector = new OverlapDetector<BwaMemAlignment>(-maxReasonableFragmentLength/2, -maxReasonableFragmentLength/2);
overlapDetector.addAll(unitigAlignments, intervals);
overlapDetectors.add(overlapDetector);
}

public static class RealignmentResult {
private final boolean mapsToSupposedLocation;
private final List<BwaMemAlignment> realignments;
final Set<List<BwaMemAlignment>> commonAlignments = new HashSet<>();
for (final BwaMemAlignment alignment : allUnitigAlignments.get(0)) {
final Strand strand = getStrand(alignment);
final SimpleInterval interval = convertToInterval(alignment);

public RealignmentResult(boolean mapsToSupposedLocation, List<BwaMemAlignment> realignments) {
this.mapsToSupposedLocation = mapsToSupposedLocation;
this.realignments = realignments;
// only alignments that have a same-strand overlap every unitig alignment
if (!overlapDetectors.stream().allMatch(detector -> detector.getOverlaps(interval).stream().anyMatch(a -> getStrand(a) == strand))) {
continue;
}

final List<BwaMemAlignment> alignments = overlapDetectors.stream()
.map(detector -> detector.getOverlaps(interval).stream()
.filter(a -> getStrand(a) == strand)
.max(Comparator.comparingInt(BwaMemAlignment::getAlignerScore)).get())
.collect(Collectors.toList());

commonAlignments.add(alignments);
}

public boolean isGood() { return mapsToSupposedLocation; }
return new ArrayList<>(commonAlignments);
}

private static SimpleInterval convertToInterval(final BwaMemAlignment alignment) {
return new SimpleInterval(Integer.toString(alignment.getRefId()), alignment.getRefStart(), alignment.getRefEnd());
}

public List<BwaMemAlignment> getRealignments() { return realignments; }
private static Strand getStrand(final BwaMemAlignment alignment) {
return SAMFlag.READ_REVERSE_STRAND.isSet(alignment.getSamFlag()) ? Strand.NEGATIVE : Strand.POSITIVE;
}
}
Original file line number Diff line number Diff line change
@@ -118,7 +118,6 @@ public final class GATKVCFConstants {
public static final String SEQUENCING_QUAL_KEY = "SEQQ";
public static final String POLYMERASE_SLIPPAGE_QUAL_KEY = "STRQ";
public static final String STRAND_QUAL_KEY = "STRANDQ";
public static final String REALIGNMENT_COUNTS_KEY = "RCNTS";
public static final String CONTAMINATION_QUAL_KEY = "CONTQ";
public static final String READ_ORIENTATION_QUAL_KEY = "ROQ";
public static final String ORIGINAL_CONTIG_MISMATCH_KEY = "OCM";
@@ -128,6 +127,9 @@ public final class GATKVCFConstants {
public static final String MEDIAN_MAPPING_QUALITY_KEY = "MMQ";
public static final String MEDIAN_FRAGMENT_LENGTH_KEY = "MFRL";
public static final String MEDIAN_READ_POSITON_KEY = "MPOS";
public static final String UNITIG_SIZES_KEY = "UNITIGS";
public static final String ALIGNMENT_SCORE_DIFFERENCE_KEY = "ALIGN_DIFF";
public static final String JOINT_ALIGNMENT_COUNT_KEY = "NALIGNS";

// Methylation-specific INFO Keys
public static final String UNCONVERTED_BASE_COVERAGE_KEY = "UNCONVERTED_BASE_COV";
Original file line number Diff line number Diff line change
@@ -218,7 +218,6 @@ public static VCFFormatHeaderLine getEquivalentFormatHeaderLine(final String inf
// M2-related info lines
addInfoLine(new VCFInfoHeaderLine(EVENT_COUNT_IN_HAPLOTYPE_KEY, 1, VCFHeaderLineType.Integer, "Number of events in this haplotype"));
addInfoLine(new VCFInfoHeaderLine(NORMAL_LOG_10_ODDS_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Normal log 10 likelihood ratio of diploid het or hom alt genotypes"));
addInfoLine(new VCFInfoHeaderLine(REALIGNMENT_COUNTS_KEY, 2, VCFHeaderLineType.Integer, "Number of reads passing and failing realignment."));
addInfoLine(new VCFInfoHeaderLine(TUMOR_LOG_10_ODDS_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Log 10 likelihood ratio score of variant existing versus not existing"));
addFormatLine(new VCFFormatHeaderLine(TUMOR_LOG_10_ODDS_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Log 10 likelihood ratio score of variant existing versus not existing"));
addInfoLine(new VCFInfoHeaderLine(IN_PON_KEY, 0, VCFHeaderLineType.Flag, "site found in panel of normals"));
@@ -237,5 +236,8 @@ public static VCFFormatHeaderLine getEquivalentFormatHeaderLine(final String inf
addInfoLine(new FragmentLength().getDescriptions().get(0));
addInfoLine(new MappingQuality().getDescriptions().get(0));
addInfoLine(new ReadPosition().getDescriptions().get(0));
addInfoLine(new VCFInfoHeaderLine(UNITIG_SIZES_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Sizes of reassembled unitigs"));
addInfoLine(new VCFInfoHeaderLine(JOINT_ALIGNMENT_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of joint alignments"));
addInfoLine(new VCFInfoHeaderLine(ALIGNMENT_SCORE_DIFFERENCE_KEY, 1, VCFHeaderLineType.Integer, "Difference in alignment score between best and next-best alignment"));
}
}
Original file line number Diff line number Diff line change
@@ -7,7 +7,6 @@
import org.apache.commons.collections.IteratorUtils;
import org.broadinstitute.hellbender.GATKBaseTest;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.mockito.Mockito;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
@@ -16,7 +15,6 @@
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;

@@ -27,54 +25,10 @@ public class MultiVariantWalkerGroupedOnStartUnitTest extends GATKBaseTest {
public static final Allele REF = Allele.create("A", true);
public static final Allele ALT = Allele.create("C", false);

// This test is disabled until mocking classes with mockito is allowed in GATK4.
@Test(enabled = false )
public void testGetExpandedReferenceContext() {
ReferenceContext referenceContext1 = Mockito.mock(ReferenceContext.class);
when(referenceContext1.getWindow()).thenReturn(new SimpleInterval("20",1000,1100));
when(referenceContext1.getBases()).thenReturn(new byte[]{'A'});

ReferenceContext referenceContext2 = Mockito.mock(ReferenceContext.class);
when(referenceContext2.getWindow()).thenReturn(new SimpleInterval("20",1000,1050));
when(referenceContext2.getBases()).thenReturn(new byte[]{'C'});

ReferenceContext referenceContext3 = Mockito.mock(ReferenceContext.class);
when(referenceContext3.getWindow()).thenReturn(new SimpleInterval("20",1050,1100));
when(referenceContext3.getBases()).thenReturn(new byte[]{'G'});

ReferenceContext referenceContext4 = Mockito.mock(ReferenceContext.class);
when(referenceContext4.getWindow()).thenReturn(new SimpleInterval("20",1050,1150));
when(referenceContext4.getBases()).thenReturn(new byte[]{'T'});

List<VariantContext> VCs = new ArrayList<>();
VariantContext A = new VariantContextBuilder().loc("20",1001,1099).alleles(Arrays.asList(REF, Allele.NON_REF_ALLELE)).attribute(VCFConstants.END_KEY, 1099).make();

VCs.add(A);
ReferenceContext contextState = MultiVariantWalkerGroupedOnStart.getExpandedReferenceContext(VCs, null, referenceContext1);
Assert.assertEquals(contextState.getWindow().getEnd(),1100);
Assert.assertEquals(contextState.getBases()[0],'A');

VCs.add(A);
contextState = MultiVariantWalkerGroupedOnStart.getExpandedReferenceContext(VCs, contextState, referenceContext1);
Assert.assertEquals(contextState.getWindow().getEnd(),1100);
Assert.assertEquals(contextState.getBases()[0],'A');

VCs.clear();
VCs.add(A);
contextState = MultiVariantWalkerGroupedOnStart.getExpandedReferenceContext(VCs, contextState, referenceContext4);
Assert.assertEquals(contextState.getWindow().getEnd(),1150);
Assert.assertEquals(contextState.getBases()[0],'T');

VCs.add(A);
contextState = MultiVariantWalkerGroupedOnStart.getExpandedReferenceContext(VCs, contextState, referenceContext3);
Assert.assertEquals(contextState.getWindow().getEnd(),1150);
Assert.assertEquals(contextState.getBases()[0],'T');
}

private final class DummyExampleGroupingMultiVariantWalker extends MultiVariantWalkerGroupedOnStart {
List<List<VariantContext>> seenVariants = new ArrayList<>();
@Override
public void apply(List<VariantContext> variantContexts, ReferenceContext referenceContext) {
public void apply(List<VariantContext> variantContexts, ReferenceContext referenceContext, final List<ReadsContext> readsContexts) {
seenVariants.add(variantContexts);
}
}
Original file line number Diff line number Diff line change
@@ -27,6 +27,7 @@ public void testDreamTruthData() {
final File filteredVcf = createTempFile("filtered", ".vcf");

final String[] args = {
"-R", b37Reference,
"-I", tumorBam.getAbsolutePath(),
"-V", truthVcf.getAbsolutePath(),
"-L", "20",
@@ -37,4 +38,5 @@ public void testDreamTruthData() {

runCommandLine(args);
}

}
Original file line number Diff line number Diff line change
@@ -4,7 +4,8 @@
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import org.apache.commons.math3.util.Pair;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment;
import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
@@ -14,6 +15,7 @@
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.stream.Collectors;

public class RealignmentEngineUnitTest {
private static final SAMFileHeader HEADER = ArtificialReadUtils.createArtificialSamHeader();
@@ -53,15 +55,13 @@ public void testSupportsVariant() {
}
}

@Test
public void testCheckRealignments() {
final BwaMemAlignment aln90 = makeAlignment(0,90, 1, false);
final BwaMemAlignment aln80 = makeAlignment(1,80, 1, true);
final BwaMemAlignment aln70 = makeAlignment(0,70, 1, false);
Assert.assertTrue(RealignmentEngine.checkAlignments(Arrays.asList(aln70), 1, 0.0).isGood());
Assert.assertTrue(RealignmentEngine.checkAlignments(Arrays.asList(aln90, aln80), 5, 0.0).isGood());
Assert.assertTrue(RealignmentEngine.checkAlignments(Arrays.asList(aln90, aln80, aln70), 5,0.0).isGood());
Assert.assertFalse(RealignmentEngine.checkAlignments(Arrays.asList(aln90, aln80, aln70), 11,0.0).isGood());
// note that we realigned the original aligned bases of the read and mate, not the raw sequenced bases
// thus one of them has been reverse-complemented and we look for pairs that map to the same strand
private static List<Pair<BwaMemAlignment, BwaMemAlignment>> findPlausiblePairs(List<BwaMemAlignment> readRealignments, List<BwaMemAlignment> mateRealignments, int maxReasonableFragmentLength) {
return RealignmentEngine.findJointAlignments(Arrays.asList(readRealignments, mateRealignments), maxReasonableFragmentLength).stream()
.filter(list -> list.size() == 2)
.map(list -> ImmutablePair.of(list.get(0), list.get(1)))
.collect(Collectors.toList());
}

@Test
@@ -81,23 +81,23 @@ public void testFindPlausiblePairs() {
// reads are 1,3,5,7,9; mates are 2,4,6,8,10 -- only pair is 9 & 10
final List<BwaMemAlignment> readAlignments1 = Arrays.asList(aln1, aln3, aln5, aln7, aln9);
final List<BwaMemAlignment> mateAlignments1 = Arrays.asList(aln2, aln4, aln6, aln8, aln10);
final List<Pair<BwaMemAlignment, BwaMemAlignment>> pairs1 = RealignmentEngine.findPlausiblePairs(readAlignments1, mateAlignments1, maxFragmentLength);
final List<Pair<BwaMemAlignment, BwaMemAlignment>> pairs1 = findPlausiblePairs(readAlignments1, mateAlignments1, maxFragmentLength);
Assert.assertEquals(pairs1.size(),1);
Assert.assertEquals(pairs1.get(0).getFirst(), aln9);
Assert.assertEquals(pairs1.get(0).getSecond(), aln10);
Assert.assertEquals(pairs1.get(0).getLeft(), aln9);
Assert.assertEquals(pairs1.get(0).getRight(), aln10);

// reads are 1,2,3,4,5; mates are 6,7,8,9,10 -- only pair is 5 & 7
final List<BwaMemAlignment> readAlignments2 = Arrays.asList(aln1, aln2, aln3, aln4, aln5);
final List<BwaMemAlignment> mateAlignments2 = Arrays.asList(aln6, aln7, aln8, aln9, aln10);
final List<Pair<BwaMemAlignment, BwaMemAlignment>> pairs2 = RealignmentEngine.findPlausiblePairs(readAlignments2, mateAlignments2, maxFragmentLength);
final List<Pair<BwaMemAlignment, BwaMemAlignment>> pairs2 = findPlausiblePairs(readAlignments2, mateAlignments2, maxFragmentLength);
Assert.assertEquals(pairs2.size(),1);
Assert.assertEquals(pairs2.get(0).getFirst(), aln5);
Assert.assertEquals(pairs2.get(0).getSecond(), aln7);
Assert.assertEquals(pairs2.get(0).getLeft(), aln5);
Assert.assertEquals(pairs2.get(0).getRight(), aln7);

// reads are 1,2,5,6,9; mates are 3,4,7,8,10 -- pairs are 1 & 3, 2 & 4, 5 & 7, 9 & 10
final List<BwaMemAlignment> readAlignments3 = Arrays.asList(aln1, aln2, aln5, aln6, aln9);
final List<BwaMemAlignment> mateAlignments3 = Arrays.asList(aln3, aln4, aln7, aln8, aln10);
final List<Pair<BwaMemAlignment, BwaMemAlignment>> pairs3 = RealignmentEngine.findPlausiblePairs(readAlignments3, mateAlignments3, maxFragmentLength);
final List<Pair<BwaMemAlignment, BwaMemAlignment>> pairs3 = findPlausiblePairs(readAlignments3, mateAlignments3, maxFragmentLength);
Assert.assertEquals(pairs3.size(),4);
}

0 comments on commit 3b1c64a

Please sign in to comment.