Skip to content

Commit

Permalink
Simplify BedToIntervalList by not reimplementing coordinate conversion (
Browse files Browse the repository at this point in the history
#1292)

* Simplify BedToIntervalList by not reimplementing coordiant conversion

* BedToIntervalList spends a lot of comments complaining about coordinate conversions after explicitely setting the codec to not perform the conversion from [0,..) to [1,..]
* Simplified by just using the baked in conversion instead of disabling it.
* add more tests and fix for zero-length edge case at the start of the contig
  • Loading branch information
lbergelson authored and gbggrant committed Apr 25, 2019
1 parent 7bb2862 commit d981f9c
Show file tree
Hide file tree
Showing 10 changed files with 59 additions and 15 deletions.
18 changes: 4 additions & 14 deletions src/main/java/picard/util/BedToIntervalList.java
Original file line number Diff line number Diff line change
Expand Up @@ -124,25 +124,14 @@ protected int doWork() {
header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
final IntervalList intervalList = new IntervalList(header);

/**
* NB: BED is zero-based, but a BEDCodec by default (since it is returns tribble Features) has an offset of one,
* so it returns 1-based starts. Ugh. Set to zero.
*/
final FeatureReader<BEDFeature> bedReader = AbstractFeatureReader.getFeatureReader(INPUT.getAbsolutePath(), new BEDCodec(BEDCodec.StartOffset.ZERO), false);
final FeatureReader<BEDFeature> bedReader = AbstractFeatureReader.getFeatureReader(INPUT.getAbsolutePath(), new BEDCodec(), false);
final CloseableTribbleIterator<BEDFeature> iterator = bedReader.iterator();
final ProgressLogger progressLogger = new ProgressLogger(LOG, (int) 1e6);

while (iterator.hasNext()) {
final BEDFeature bedFeature = iterator.next();
final String sequenceName = bedFeature.getContig();
/**
* NB: BED is zero-based, so we need to add one here to make it one-based. Please observe we set the start
* offset to zero when creating the BEDCodec.
*/
final int start = bedFeature.getStart() + 1;
/**
* NB: BED is 0-based OPEN (which, for the end is equivalent to 1-based closed).
*/
final int start = bedFeature.getStart();
final int end = bedFeature.getEnd();
// NB: do not use an empty name within an interval
String name = bedFeature.getName();
Expand All @@ -157,7 +146,8 @@ protected int doWork() {
throw new PicardException(String.format("Start on sequence '%s' was less than one: %d", sequenceName, start));
} else if (sequenceRecord.getSequenceLength() < start) {
throw new PicardException(String.format("Start on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), start));
} else if (end < 1) {
} else if ((end == 0 && start != 1 ) //special case for 0-length interval at the start of a contig
|| end < 0 ) {
throw new PicardException(String.format("End on sequence '%s' was less than one: %d", sequenceName, end));
} else if (sequenceRecord.getSequenceLength() < end) {
throw new PicardException(String.format("End on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), end));
Expand Down
6 changes: 5 additions & 1 deletion src/test/java/picard/util/BedToIntervalListTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,11 @@ public Object[][] testBedToIntervalListDataProvider() {
{"overlapping.bed"},
{"extended.bed"},
{"one_base_interval.bed"},
{"zero_base_interval.bed"}
{"zero_base_interval.bed"},
{"first_base_in_contig.bed"},
{"zero_length_interval_at_first_position_in_contig.bed"},
{"last_base_in_contig.bed"},
{"multi_contig.bed"}
};
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
chr1 0 100
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
@HD VN:1.6 SO:coordinate
@SQ SN:chr1 LN:1000000
@SQ SN:chr2 LN:1000000
@SQ SN:chr3 LN:1000000
@SQ SN:chr4 LN:1000000
@SQ SN:chr5 LN:1000000
@SQ SN:chr6 LN:1000000
@SQ SN:chr7 LN:1000000
@SQ SN:chr8 LN:1000000
chr1 1 100 + .
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
chr1 0 1000000
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
@HD VN:1.6 SO:coordinate
@SQ SN:chr1 LN:1000000
@SQ SN:chr2 LN:1000000
@SQ SN:chr3 LN:1000000
@SQ SN:chr4 LN:1000000
@SQ SN:chr5 LN:1000000
@SQ SN:chr6 LN:1000000
@SQ SN:chr7 LN:1000000
@SQ SN:chr8 LN:1000000
chr1 1 1000000 + .
4 changes: 4 additions & 0 deletions testdata/picard/util/BedToIntervalListTest/multi_contig.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
chr1 100 2000 chr1_100_2000+ 11 +
chr1 3000 4000 chr1_3000_4000- 12 -
chr2 100 2000 chr2_100_2000+ 11 +
chr2 3000 4000 chr2_3000_4000- 12 -
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
@HD VN:1.6 SO:coordinate
@SQ SN:chr1 LN:1000000
@SQ SN:chr2 LN:1000000
@SQ SN:chr3 LN:1000000
@SQ SN:chr4 LN:1000000
@SQ SN:chr5 LN:1000000
@SQ SN:chr6 LN:1000000
@SQ SN:chr7 LN:1000000
@SQ SN:chr8 LN:1000000
chr1 101 2000 + chr1_100_2000+
chr1 3001 4000 - chr1_3000_4000-
chr2 101 2000 + chr2_100_2000+
chr2 3001 4000 - chr2_3000_4000-
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
chr1 0 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
@HD VN:1.6 SO:coordinate
@SQ SN:chr1 LN:1000000
@SQ SN:chr2 LN:1000000
@SQ SN:chr3 LN:1000000
@SQ SN:chr4 LN:1000000
@SQ SN:chr5 LN:1000000
@SQ SN:chr6 LN:1000000
@SQ SN:chr7 LN:1000000
@SQ SN:chr8 LN:1000000
chr1 1 0 + .

0 comments on commit d981f9c

Please sign in to comment.