Skip to content

Commit

Permalink
rewrote getReadCoordinateForReferenceCoordinate
Browse files Browse the repository at this point in the history
  • Loading branch information
davidbenjamin committed Mar 5, 2020
1 parent fc8f944 commit aa2f2ba
Show file tree
Hide file tree
Showing 70 changed files with 1,770 additions and 1,881 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public static OptionalInt getBaseQuality(final GATKRead read, final VariantConte
if (vc.getStart() < read.getStart() || read.getEnd() < vc.getStart()) {
return OptionalInt.empty();
}
final OptionalDouble result = BaseQualityRankSumTest.getReadBaseQuality(read, vc.getStart());
final OptionalDouble result = BaseQualityRankSumTest.getReadBaseQuality(read, vc);
return result.isPresent() ? OptionalInt.of((int) FastMath.round(result.getAsDouble())) : OptionalInt.empty();
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
Expand All @@ -10,6 +11,7 @@

import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.OptionalDouble;


Expand All @@ -32,14 +34,13 @@ public final class BaseQualityRankSumTest extends RankSumTest implements Standar
public List<String> getKeyNames() { return Collections.singletonList(GATKVCFConstants.BASE_QUAL_RANK_SUM_KEY); }

@Override
protected OptionalDouble getElementForRead(final GATKRead read, final int refLoc) {
return getReadBaseQuality(read, refLoc);
protected OptionalDouble getElementForRead(final GATKRead read, final VariantContext vc) {
return getReadBaseQuality(read, vc);
}

public static OptionalDouble getReadBaseQuality(final GATKRead read, final int refLoc) {
public static OptionalDouble getReadBaseQuality(final GATKRead read, final VariantContext vc) {
Utils.nonNull(read);

final int readCoordinate = ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, refLoc, ClippingTail.RIGHT_TAIL, true);
return readCoordinate == ReadUtils.CLIPPING_GOAL_NOT_REACHED || readCoordinate < 0 || readCoordinate >= read.getLength() ? OptionalDouble.empty() : OptionalDouble.of(read.getBaseQuality(readCoordinate));
final Optional<Byte> readBaseQuality = ReadUtils.getReadBaseQualityAtReferenceCoordinate(read, vc.getStart());
return readBaseQuality.isPresent() ? OptionalDouble.of(readBaseQuality.get()) : OptionalDouble.empty();
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
Expand Down Expand Up @@ -33,7 +34,7 @@ public final class ClippingRankSumTest extends RankSumTest {
public List<String> getKeyNames() { return Collections.singletonList(GATKVCFConstants.CLIPPING_RANK_SUM_KEY); }

@Override
protected OptionalDouble getElementForRead(final GATKRead read, final int refLoc) {
protected OptionalDouble getElementForRead(final GATKRead read, final VariantContext vc) {
Utils.nonNull(read);
return OptionalDouble.of(AlignmentUtils.getNumHardClippedBases(read));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,19 +56,7 @@ public List<VCFInfoHeaderLine> getDescriptions() {

@VisibleForTesting
static Boolean doesReadHaveN(final GATKRead read, final VariantContext vc) {

if (vc.getStart() < read.getStart() || read.getEnd() < vc.getStart()) {
return false;
}

final Pair<Integer, Boolean> offsetAndInsideDeletion = ReadUtils.getReadCoordinateForReferenceCoordinate(read, vc.getStart(), true);

if (offsetAndInsideDeletion.getRight()) {
return false;
} else {
final int offset = offsetAndInsideDeletion.getLeft();
return !(offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED || offset < 0 || offset >= read.getLength() || AlignmentUtils.isInsideDeletion(read.getCigar(), offset))
&& read.getBase(offset) == 'N';
}
final Optional<Byte> readBase = ReadUtils.getReadBaseAtReferenceCoordinate(read, vc.getStart());
return readBase.isPresent() && readBase.get() == 'N';
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
Expand Down Expand Up @@ -31,7 +32,7 @@ public final class LikelihoodRankSumTest extends RankSumTest {
public List<String> getKeyNames() { return Collections.singletonList(GATKVCFConstants.LIKELIHOOD_RANK_SUM_KEY); }

@Override
protected OptionalDouble getElementForRead(final GATKRead read, final int refLoc, final AlleleLikelihoods<GATKRead, Allele>.BestAllele bestAllele) {
protected OptionalDouble getElementForRead(final GATKRead read, final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele>.BestAllele bestAllele) {
Utils.nonNull(read, "read is null");
Utils.nonNull(bestAllele, "mostLikelyAllele is null");
if ( ! bestAllele.isInformative() ) {
Expand All @@ -41,7 +42,7 @@ protected OptionalDouble getElementForRead(final GATKRead read, final int refLoc
}

@Override
protected OptionalDouble getElementForRead(final GATKRead read, final int refLoc) {
protected OptionalDouble getElementForRead(final GATKRead read, final VariantContext vc) {
// todo its possible this should throw, as This method should never have been called as getElementForRead(read,refloc,mostLikelyAllele) was overriden
return OptionalDouble.empty();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
Expand Down Expand Up @@ -36,7 +37,7 @@ public final class MappingQualityRankSumTest extends RankSumTest implements Stan
public List<String> getKeyNames() { return Collections.singletonList(GATKVCFConstants.MAP_QUAL_RANK_SUM_KEY); }

@Override
protected OptionalDouble getElementForRead(final GATKRead read, final int refLoc) {
protected OptionalDouble getElementForRead(final GATKRead read, final VariantContext vc) {
Utils.nonNull(read);
return OptionalDouble.of(read.getMappingQuality());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.pileup.PileupElement;
import org.broadinstitute.hellbender.utils.pileup.ReadPileup;
import org.broadinstitute.hellbender.utils.read.ClippingTail;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.ReadUtils;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
Expand Down Expand Up @@ -78,7 +77,7 @@ public void annotate(final ReferenceContext refContext,
.collect(Collectors.toMap(a -> a, a -> new MutableInt(0)));

Utils.stream(likelihoods.bestAllelesBreakingTies(g.getSampleName()))
.filter(ba -> ba.isInformative() && isUsableRead(ba.evidence) && getReadBaseQuality(ba.evidence, vc.getStart()).orElse(0) >= MINIMUM_BASE_QUALITY)
.filter(ba -> ba.isInformative() && isUsableRead(ba.evidence) && BaseQualityRankSumTest.getReadBaseQuality(ba.evidence, vc).orElse(0) >= MINIMUM_BASE_QUALITY)
.forEach(ba -> (ReadUtils.isF2R1(ba.evidence) ? f2r1Counts : f1r2Counts).get(ba.allele).increment());

final int[] f1r2 = vc.getAlleles().stream().mapToInt(a -> f1r2Counts.get(a).intValue()).toArray();
Expand Down Expand Up @@ -170,9 +169,4 @@ protected static boolean isUsableRead(final GATKRead read) {
return read.getMappingQuality() != 0 && read.getMappingQuality() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE;
}

private static OptionalDouble getReadBaseQuality(final GATKRead read, final int refLoc) {
Utils.nonNull(read);
final int readCoord = ReadUtils.getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), refLoc, ClippingTail.RIGHT_TAIL, true);
return readCoord < 0 || readCoord >= read.getLength() ? OptionalDouble.empty() : OptionalDouble.of(read.getBaseQuality(readCoord));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,9 @@
import org.broadinstitute.hellbender.utils.QualityUtils;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
import org.broadinstitute.hellbender.utils.pileup.PileupElement;
import org.broadinstitute.hellbender.utils.read.GATKRead;

import java.util.*;
import java.util.stream.Collectors;


/**
Expand All @@ -36,10 +34,8 @@ public Map<String, Object> annotate(final ReferenceContext ref,
final List<Double> refQuals = new ArrayList<>();
final List<Double> altQuals = new ArrayList<>();

final int refLoc = vc.getStart();

if( likelihoods != null) {
fillQualsFromLikelihood(vc, likelihoods, refQuals, altQuals, refLoc);
fillQualsFromLikelihood(vc, likelihoods, refQuals, altQuals);
}


Expand All @@ -60,12 +56,12 @@ public Map<String, Object> annotate(final ReferenceContext ref,
}
}

protected void fillQualsFromLikelihood(VariantContext vc, AlleleLikelihoods<GATKRead, Allele> likelihoods, List<Double> refQuals, List<Double> altQuals, int refLoc) {
protected void fillQualsFromLikelihood(VariantContext vc, AlleleLikelihoods<GATKRead, Allele> likelihoods, List<Double> refQuals, List<Double> altQuals) {
for (final AlleleLikelihoods<GATKRead, Allele>.BestAllele bestAllele : likelihoods.bestAllelesBreakingTies()) {
final GATKRead read = bestAllele.evidence;
final Allele allele = bestAllele.allele;
if (bestAllele.isInformative() && isUsableRead(read, refLoc)) {
final OptionalDouble value = getElementForRead(read, refLoc, bestAllele);
if (bestAllele.isInformative() && isUsableRead(read, vc)) {
final OptionalDouble value = getElementForRead(read, vc, bestAllele);
// Bypass read if the clipping goal is not reached or the refloc is inside a spanning deletion
if (value.isPresent() && value.getAsDouble() != INVALID_ELEMENT_FROM_READ) {
if (allele.isReference()) {
Expand All @@ -82,31 +78,31 @@ protected void fillQualsFromLikelihood(VariantContext vc, AlleleLikelihoods<GATK
* Get the element for the given read at the given reference position
*
* @param read the read
* @param refLoc the reference position
* @param vc the variant to be annotated
* @param bestAllele the most likely allele for this read
* @return a Double representing the element to be used in the rank sum test, or null if it should not be used
*/
protected OptionalDouble getElementForRead(final GATKRead read, final int refLoc, final AlleleLikelihoods<GATKRead, Allele>.BestAllele bestAllele) {
return getElementForRead(read, refLoc);
protected OptionalDouble getElementForRead(final GATKRead read, final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele>.BestAllele bestAllele) {
return getElementForRead(read, vc);
}

/**
* Get the element for the given read at the given reference position
*
* @param read the read
* @param refLoc the reference position
* @param vc the variant to be annotated
* @return an OptionalDouble representing the element to be used in the rank sum test, empty if it should not be used
*/
protected abstract OptionalDouble getElementForRead(final GATKRead read, final int refLoc);
protected abstract OptionalDouble getElementForRead(final GATKRead read, final VariantContext vc);

/**
* Can the read be used in comparative tests between ref / alt bases?
*
* @param read the read to consider
* @param refLoc the reference location
* @param vc the variant to be annotated
* @return true if this read is meaningful for comparison, false otherwise
*/
protected boolean isUsableRead(final GATKRead read, final int refLoc) {
protected boolean isUsableRead(final GATKRead read, final VariantContext vc) {
Utils.nonNull(read);
return read.getMappingQuality() != 0 && read.getMappingQuality() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.variant.variantcontext.VariantContext;
import org.apache.commons.lang3.tuple.Pair;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
Expand Down Expand Up @@ -39,26 +41,31 @@ public final class ReadPosRankSumTest extends RankSumTest implements StandardAnn
public List<String> getKeyNames() { return Collections.singletonList(GATKVCFConstants.READ_POS_RANK_SUM_KEY); }

@Override
protected OptionalDouble getElementForRead(final GATKRead read, final int refLoc) {
return getReadPosition(read, refLoc);
protected OptionalDouble getElementForRead(final GATKRead read, final VariantContext vc) {
return getReadPosition(read, vc);
}

@Override
public boolean isUsableRead(final GATKRead read, final int refLoc) {
public boolean isUsableRead(final GATKRead read, final VariantContext vc) {
Utils.nonNull(read);
return super.isUsableRead(read, refLoc) && read.getSoftEnd() >= refLoc;
// we use vc.getEnd() + 1 in case of a leading indel -- if this isn't relevant getReadPosition will return empty
return super.isUsableRead(read, vc) && read.getSoftStart() <= vc.getEnd() + 1 && read.getSoftEnd() >= vc.getStart();
}

public static OptionalDouble getReadPosition(final GATKRead read, final int refLoc) {
public static OptionalDouble getReadPosition(final GATKRead read, final VariantContext vc) {
Utils.nonNull(read);
final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), refLoc, ClippingTail.RIGHT_TAIL, true);
if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) {
return OptionalDouble.empty();

// edge case: if a read starts with an insertion then the variant context's end is the reference base BEFORE the read start,
// which appears like no overlap
if (read.getStart() == vc.getEnd() + 1 && read.getCigarElements().stream().map(CigarElement::getOperator)
.filter(o -> !o.isClipping()).findFirst().orElse(null) == CigarOperator.INSERTION) {
return OptionalDouble.of(0);
}

// If the offset inside a deletion, it does not lie on a read.
if ( AlignmentUtils.isInsideDeletion(read.getCigar(), offset) ) {
return OptionalDouble.of(INVALID_ELEMENT_FROM_READ);
final Pair<Integer, CigarOperator> offset = ReadUtils.getReadCoordinateForReferenceCoordinate(read, vc.getStart());

if (offset.getLeft() == ReadUtils.CLIPPING_GOAL_NOT_REACHED) {
return OptionalDouble.empty();
}

// hard clips at this point in the code are perfectly good bases that were clipped to make the read fit the assembly region
Expand All @@ -68,17 +75,9 @@ public static OptionalDouble getReadPosition(final GATKRead read, final int refL
final int leadingHardClips = firstElement.getOperator() == CigarOperator.HARD_CLIP ? firstElement.getLength() : 0;
final int trailingHardClips = lastElement.getOperator() == CigarOperator.HARD_CLIP ? lastElement.getLength() : 0;

if (offset >= cigar.getReadLength()) {
return OptionalDouble.empty();
}
int readPos = leadingHardClips + AlignmentUtils.calcAlignmentByteArrayOffset(read.getCigar(), offset, false, 0, 0);
final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read );
final int numOriginalBases = numAlignedBases + leadingHardClips + trailingHardClips;

//After the middle of the read, we compute the postion from the end of the read.
if (readPos > numOriginalBases / 2) {
readPos = numOriginalBases - (readPos + 1);
}
return OptionalDouble.of(readPos);
final int leftDistance = leadingHardClips + offset.getLeft();
final int rightDistance = read.getLength() - 1 - offset.getLeft() + trailingHardClips;
return OptionalDouble.of(Math.min(leftDistance, rightDistance));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ protected OptionalInt getValueForRead(final GATKRead read, final VariantContext
if (vc.getStart() < read.getStart() || read.getEnd() < vc.getStart()) {
return OptionalInt.empty();
}
final OptionalDouble valueAsDouble = ReadPosRankSumTest.getReadPosition(read, vc.getStart());
final OptionalDouble valueAsDouble = ReadPosRankSumTest.getReadPosition(read, vc);
return valueAsDouble.isPresent() ? OptionalInt.of((int) valueAsDouble.getAsDouble()) : OptionalInt.empty();
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific;

import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.tools.walkers.annotator.BaseQualityRankSumTest;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
Expand Down Expand Up @@ -74,12 +75,12 @@ public List<String> getSecondaryRawKeys() {
* Get the element for the given read at the given reference position
*
* @param read the read
* @param refLoc the reference position
* @param vc the variant to be annotated
* @return a Double representing the element to be used in the rank sum test, or null if it should not be used
*/
@Override
protected OptionalDouble getElementForRead(final GATKRead read, final int refLoc) {
return BaseQualityRankSumTest.getReadBaseQuality(read, refLoc);
protected OptionalDouble getElementForRead(final GATKRead read, final VariantContext vc) {
return BaseQualityRankSumTest.getReadBaseQuality(read, vc);
}

}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific;

import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
Expand Down Expand Up @@ -68,7 +69,7 @@ public List<String> getSecondaryRawKeys() {
}

@Override
protected OptionalDouble getElementForRead(final GATKRead read, final int refLoc) {
protected OptionalDouble getElementForRead(final GATKRead read, final VariantContext vc) {
Utils.nonNull(read);
return OptionalDouble.of(read.getMappingQuality());
}
Expand Down
Loading

0 comments on commit aa2f2ba

Please sign in to comment.