Skip to content

Commit

Permalink
documentation clarification to SVInterval, also requires constructor …
Browse files Browse the repository at this point in the history
…argument validator (#5157)
  • Loading branch information
SHuang-Broad authored Sep 6, 2018
1 parent 33af6de commit 2bfe742
Show file tree
Hide file tree
Showing 7 changed files with 122 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -373,9 +373,9 @@ public static int overlapOnRefSpan(final AlignmentInterval one, final AlignmentI
if (!one.referenceSpan.getContig().equals(two.referenceSpan.getContig())) return 0;

// dummy number for chr to be used in constructing SVInterval, since 2 input AI's both map to the same chr by this point
final int dummyChr = -1;
final int dummyChr = 0;
final SVInterval intOne = new SVInterval(dummyChr, one.referenceSpan.getStart(), one.referenceSpan.getEnd() + 1),
intTwo = new SVInterval(dummyChr, two.referenceSpan.getStart(), two.referenceSpan.getEnd() + 1);
intTwo = new SVInterval(dummyChr, two.referenceSpan.getStart(), two.referenceSpan.getEnd() + 1);

return intOne.overlapLen(intTwo);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ static boolean deletionConsistencyCheck(final VariantContext simple, final Set<S

final SimpleInterval deletedRange = new SimpleInterval(simple.getContig(), simple.getStart() + 1, simple.getEnd());
// dummy number for chr to be used in constructing SVInterval, since 2 input AI's both map to the same chr by this point
final int dummyChr = -1;
final int dummyChr = 0;
final SVInterval intervalOne = new SVInterval(dummyChr, deletedRange.getStart() - 1, deletedRange.getEnd());

for (final SimpleInterval missing : missingSegments) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -837,21 +837,24 @@ private static List<SVInterval> findHighCoverageSubintervalsAndLog(
final int minFlankingHighCovFactor = params.highDepthCoverageFactor;
final int minPeakHighCovFactor = params.highDepthCoveragePeakFactor;

int minFlankingHighCoverageValue = (int) (minFlankingHighCovFactor * broadcastMetadata.getValue().getCoverage());
int minPeakHighCoverageValue = (int) (minPeakHighCovFactor * broadcastMetadata.getValue().getCoverage());
final ReadMetadata shortReadMetadata = broadcastMetadata.getValue();
int minFlankingHighCoverageValue = (int) (minFlankingHighCovFactor * shortReadMetadata.getCoverage());
int minPeakHighCoverageValue = (int) (minPeakHighCovFactor * shortReadMetadata.getCoverage());
final List<SVInterval> result =
findHighCoverageSubIntervals(ctx, broadcastMetadata, intervals, unfilteredReads,
filter,
minFlankingHighCoverageValue,
minPeakHighCoverageValue);
log("Found " + result.size() + " sub-intervals with coverage over " + minFlankingHighCoverageValue + " and a peak coverage of over " + minPeakHighCoverageValue + ".", logger);
log("Found " + result.size() + " sub-intervals with coverage over " + minFlankingHighCoverageValue +
" and a peak coverage of over " + minPeakHighCoverageValue + ".", logger);

final String intervalFile = params.highCoverageIntervalsFile;
if (intervalFile != null) {
try (final OutputStreamWriter writer =
new OutputStreamWriter(new BufferedOutputStream(BucketUtils.createFile(intervalFile)))) {
for (SVInterval i : result) {
writer.write(i.toBedString(broadcastMetadata.getValue()) + "\n");
for (final SVInterval svInterval : result) {
final String bedLine = shortReadMetadata.getContigName(svInterval.getContig()) + "\t" + (svInterval.getStart() - 1) + "\t" + svInterval.getEnd() + "\n";
writer.write(bedLine);
}
} catch (final IOException ioe) {
throw new UserException.CouldNotCreateOutputFile("Can't write high coverage intervals file " + intervalFile, ioe);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,10 @@ private static class CigarQualityInfo {
private static double getGenomeIntervalsOverlap(final BreakpointEvidence evidence,
final FeatureDataSource<BEDFeature> genomeIntervals,
final ReadMetadata readMetadata) {
final SimpleInterval simpleInterval = evidence.getLocation().toSimpleInterval(readMetadata);
final SVInterval location = evidence.getLocation();
final SimpleInterval simpleInterval = new SimpleInterval(readMetadata.getContigName(location.getContig()),
location.getStart(),
location.getEnd() - 1); // "end - 1" because SimpleIntervals are closed on both ends
int overlap = 0;
for(final Iterator<BEDFeature> overlapperItr = genomeIntervals.query(simpleInterval); overlapperItr.hasNext();) {
final BEDFeature overlapper = overlapperItr.next();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,36 @@
import com.esotericsoftware.kryo.io.Output;
import com.google.common.annotations.VisibleForTesting;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.tools.spark.sv.evidence.ReadMetadata;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;

/**
* Naturally collating, simple interval.
* Some methods assume that the interval is half-open.
* Naturally collating, simple interval
*
* WARNING: THIS IS NOT THE SAME AS THE BED COORDINATE SYSTEM OR {@link SimpleInterval} !!!!!
*
* <p>
* SVIntervals are comparable, and provide the same total order as coordinate-sorted BAMs.
* They can encode any contiguous stretch of bases on the reference,
* or strand-sensitively on its reverse complement.
* They can encode 0-length intervals (i.e., locations) to indicate things like the precise,
* unambiguous location of an insert between two reference bases.
* They're super light-weight, and cache friendly (no references),
* and they can be compared and tested for equality quickly and locally.
* They know how to serialize themselves with Kryo.
* </p>
*
* In general, flexibility is provided by SVInterval, and
* one should be responsible for carefully being self-consistent.
*
* So please refrain from putting methods here that
* makes any assumptions about coordinate system, 1-based or 0-based.
*
* <p>
* Some methods assume that the interval is half-open.
* Please read the documentations clearly before picking a convention and starting to construct the intervals.
* And one should stick to this convention.
* </p>
*/
@DefaultSerializer(SVInterval.Serializer.class)
@VisibleForTesting
Expand All @@ -20,7 +44,46 @@ public final class SVInterval implements Comparable<SVInterval> {
private final int start;
private final int end;

/**
* This constructor uses the {@link SVIntervalConstructorArgsValidator#RefuseNegativeContigAndCoordinates} as the validator.
*/
public SVInterval( final int contig, final int start, final int end ) {
this(contig, start, end, SVIntervalConstructorArgsValidator.RefuseNegativeContigAndCoordinates);
}

@FunctionalInterface
public interface SVIntervalConstructorArgsValidator {
void accept(final int contig, final int start, final int end);

default SVIntervalConstructorArgsValidator andThen(final SVIntervalConstructorArgsValidator after) {
Utils.nonNull(after);
return (final int contig, final int start, final int end) -> {
accept(contig, start, end); after.accept(contig, start, end);
};
}

SVIntervalConstructorArgsValidator ACCEPTS_ALL = ((contig, start, end) -> {});

SVIntervalConstructorArgsValidator RefuseNegativeContigs =
((contig, start, end) -> {
if (contig < 0)
throw new IllegalArgumentException("provided contig is negative: " + contig);
});

SVIntervalConstructorArgsValidator RefuseNegativeCoordinates =
((contig, start, end) -> {
if (start < 0)
throw new IllegalArgumentException("provided start is negative: " + start);
if (end < 0)
throw new IllegalArgumentException("provided end is negative: " + end);
});

SVIntervalConstructorArgsValidator RefuseNegativeContigAndCoordinates =
RefuseNegativeContigs.andThen(RefuseNegativeCoordinates);
}

public SVInterval( final int contig, final int start, final int end, final SVIntervalConstructorArgsValidator argsValidator) {
argsValidator.accept(contig, start, end);
this.contig = contig;
this.start = start;
this.end = end;
Expand All @@ -41,20 +104,28 @@ private void serialize( final Kryo kryo, final Output output ) {
public int getContig() { return contig; }
public int getStart() { return start; }
public int getEnd() { return end; }
public int getLength() { return end-start; }

// assumes the interval is half-open
public int getLength() { return end - start; }

public SVLocation getStartLocation() { return new SVLocation(contig, start); }

/** This definition is appropriate for half-open intervals.
* If you're building your intervals as closed, you're going to have trouble.
/**
* This definition is appropriate for half-open intervals.
* If you're building your intervals as closed, you're going to have trouble.
*/
public boolean overlaps( final SVInterval that ) {
return this.contig == that.contig && this.start < that.end && that.start < this.end;
}

// assumes the interval is half-open
public boolean isDisjointFrom( final SVInterval that ) {
return !overlaps(that);
}

/**
* Returns false if the two intervals overlap as judged by {@link #overlaps(SVInterval)}
*/
public boolean isUpstreamOf( final SVInterval that ) {
return this.contig < that.contig || (this.contig == that.contig && this.end <= that.start);
}
Expand All @@ -63,26 +134,35 @@ public boolean contains( final SVInterval that ) {
return this.contig == that.contig && this.start <= that.start && this.end >= that.end;
}

/**
* Assumes interval half-open and this {@link #isUpstreamOf(SVInterval)}.
*/
public int gapLen( final SVInterval that ) {
if ( this.contig != that.contig ) return Integer.MAX_VALUE;
return that.start - this.end;
}

// assumes half-open
public int overlapLen( final SVInterval that ) {
if ( this.contig != that.contig ) return 0;
return Math.max(0, Math.min(this.end, that.end) - Math.max(this.start, that.start));
}

// no assumption about half-open
public SVInterval join( final SVInterval that ) {
if ( this.contig != that.contig ) throw new GATKException("Joining across contigs.");
return new SVInterval(contig, Math.min(this.start, that.start), Math.max(this.end, that.end));
}

/**
* Returns {@code null} if the two intervals don't overlap as judged by {@link #overlaps(SVInterval)}.
*/
public SVInterval intersect(final SVInterval that) {
if (! this.overlaps(that)) return null;
return new SVInterval(this.getContig(), Math.max(this.start, that.start), Math.min(this.end, that.end));
}

// a floored mid point between start and end, e.g. if start == 1 and end == 2, the midpoint will be (1+2)/2 -> 1.
public int midpoint() {
return (start + end) / 2;
}
Expand All @@ -109,15 +189,6 @@ public int compareTo( final SVInterval that ) {
return result;
}

public String toBedString(final ReadMetadata metadata) {
return metadata.getContigName(this.contig)+"\t"+(start-1)+"\t"+end;
}

public SimpleInterval toSimpleInterval(final ReadMetadata readMetadata) {
// "end - 1" because SimpleIntervals are closed on both ends
return new SimpleInterval(readMetadata.getContigName(contig), start, end - 1);
}

@Override
public String toString() { return Integer.toString(contig)+"["+start+":"+end+"]"; }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ public void putFindAndOrderTest() {
Assert.assertEquals(testTree.size(), intervals.length);

// putting a new interval into the tree ought to return the sentinel value and increase the size by 1
Assert.assertEquals(testTree.put(new SVInterval(-1, 0, 0), someValue), sentinel);
Assert.assertEquals(testTree.put(new SVInterval(-1, 0, 0, SVInterval.SVIntervalConstructorArgsValidator.RefuseNegativeCoordinates), someValue), sentinel);
Assert.assertEquals(testTree.size(), intervals.length+1);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,24 @@ private Object[][] forTestContainment() {
public void testContainment(final SVInterval one, final SVInterval two, final boolean expected) {
Assert.assertEquals(one.contains(two), expected);
}

@DataProvider
private Object[][] forTestCtorArgsValidators() {
final List<Object[]> data = new ArrayList<>(20);

data.add(new Object[]{-1, 0, 0, SVInterval.SVIntervalConstructorArgsValidator.RefuseNegativeContigs});
data.add(new Object[]{ 0, -1, 0, SVInterval.SVIntervalConstructorArgsValidator.RefuseNegativeCoordinates});
data.add(new Object[]{ 0, 0, -1, SVInterval.SVIntervalConstructorArgsValidator.RefuseNegativeCoordinates});

data.add(new Object[]{-1, 0, 0, SVInterval.SVIntervalConstructorArgsValidator.RefuseNegativeContigAndCoordinates});
data.add(new Object[]{ 0, -1, 0, SVInterval.SVIntervalConstructorArgsValidator.RefuseNegativeContigAndCoordinates});
data.add(new Object[]{ 0, 0, -1, SVInterval.SVIntervalConstructorArgsValidator.RefuseNegativeContigAndCoordinates});
data.add(new Object[]{-1, -1, -1, SVInterval.SVIntervalConstructorArgsValidator.RefuseNegativeContigAndCoordinates});

return data.toArray(new Object[data.size()][]);
}
@Test(groups = "sv", dataProvider = "forTestCtorArgsValidators", expectedExceptions = IllegalArgumentException.class)
public void test(final int contig, final int start, final int end, final SVInterval.SVIntervalConstructorArgsValidator argsValidator) {
new SVInterval(contig, start, end, argsValidator);
}
}

0 comments on commit 2bfe742

Please sign in to comment.