diff --git a/src/main/java/picard/util/BedToIntervalList.java b/src/main/java/picard/util/BedToIntervalList.java index 8cf17e6049..4ee65cd619 100644 --- a/src/main/java/picard/util/BedToIntervalList.java +++ b/src/main/java/picard/util/BedToIntervalList.java @@ -124,25 +124,14 @@ protected int doWork() { header.setSortOrder(SAMFileHeader.SortOrder.coordinate); final IntervalList intervalList = new IntervalList(header); - /** - * NB: BED is zero-based, but a BEDCodec by default (since it is returns tribble Features) has an offset of one, - * so it returns 1-based starts. Ugh. Set to zero. - */ - final FeatureReader bedReader = AbstractFeatureReader.getFeatureReader(INPUT.getAbsolutePath(), new BEDCodec(BEDCodec.StartOffset.ZERO), false); + final FeatureReader bedReader = AbstractFeatureReader.getFeatureReader(INPUT.getAbsolutePath(), new BEDCodec(), false); final CloseableTribbleIterator iterator = bedReader.iterator(); final ProgressLogger progressLogger = new ProgressLogger(LOG, (int) 1e6); while (iterator.hasNext()) { final BEDFeature bedFeature = iterator.next(); final String sequenceName = bedFeature.getContig(); - /** - * NB: BED is zero-based, so we need to add one here to make it one-based. Please observe we set the start - * offset to zero when creating the BEDCodec. - */ - final int start = bedFeature.getStart() + 1; - /** - * NB: BED is 0-based OPEN (which, for the end is equivalent to 1-based closed). - */ + final int start = bedFeature.getStart(); final int end = bedFeature.getEnd(); // NB: do not use an empty name within an interval String name = bedFeature.getName(); @@ -157,7 +146,8 @@ protected int doWork() { throw new PicardException(String.format("Start on sequence '%s' was less than one: %d", sequenceName, start)); } else if (sequenceRecord.getSequenceLength() < start) { throw new PicardException(String.format("Start on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), start)); - } else if (end < 1) { + } else if ((end == 0 && start != 1 ) //special case for 0-length interval at the start of a contig + || end < 0 ) { throw new PicardException(String.format("End on sequence '%s' was less than one: %d", sequenceName, end)); } else if (sequenceRecord.getSequenceLength() < end) { throw new PicardException(String.format("End on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), end)); diff --git a/src/test/java/picard/util/BedToIntervalListTest.java b/src/test/java/picard/util/BedToIntervalListTest.java index 567b222de7..cf66516466 100644 --- a/src/test/java/picard/util/BedToIntervalListTest.java +++ b/src/test/java/picard/util/BedToIntervalListTest.java @@ -62,7 +62,11 @@ public Object[][] testBedToIntervalListDataProvider() { {"overlapping.bed"}, {"extended.bed"}, {"one_base_interval.bed"}, - {"zero_base_interval.bed"} + {"zero_base_interval.bed"}, + {"first_base_in_contig.bed"}, + {"zero_length_interval_at_first_position_in_contig.bed"}, + {"last_base_in_contig.bed"}, + {"multi_contig.bed"} }; } diff --git a/testdata/picard/util/BedToIntervalListTest/first_base_in_contig.bed b/testdata/picard/util/BedToIntervalListTest/first_base_in_contig.bed new file mode 100644 index 0000000000..babf5d47fe --- /dev/null +++ b/testdata/picard/util/BedToIntervalListTest/first_base_in_contig.bed @@ -0,0 +1 @@ +chr1 0 100 \ No newline at end of file diff --git a/testdata/picard/util/BedToIntervalListTest/first_base_in_contig.bed.interval_list b/testdata/picard/util/BedToIntervalListTest/first_base_in_contig.bed.interval_list new file mode 100644 index 0000000000..1abb970685 --- /dev/null +++ b/testdata/picard/util/BedToIntervalListTest/first_base_in_contig.bed.interval_list @@ -0,0 +1,10 @@ +@HD VN:1.6 SO:coordinate +@SQ SN:chr1 LN:1000000 +@SQ SN:chr2 LN:1000000 +@SQ SN:chr3 LN:1000000 +@SQ SN:chr4 LN:1000000 +@SQ SN:chr5 LN:1000000 +@SQ SN:chr6 LN:1000000 +@SQ SN:chr7 LN:1000000 +@SQ SN:chr8 LN:1000000 +chr1 1 100 + . diff --git a/testdata/picard/util/BedToIntervalListTest/last_base_in_contig.bed b/testdata/picard/util/BedToIntervalListTest/last_base_in_contig.bed new file mode 100644 index 0000000000..3117ed9215 --- /dev/null +++ b/testdata/picard/util/BedToIntervalListTest/last_base_in_contig.bed @@ -0,0 +1 @@ +chr1 0 1000000 diff --git a/testdata/picard/util/BedToIntervalListTest/last_base_in_contig.bed.interval_list b/testdata/picard/util/BedToIntervalListTest/last_base_in_contig.bed.interval_list new file mode 100644 index 0000000000..dfca040b33 --- /dev/null +++ b/testdata/picard/util/BedToIntervalListTest/last_base_in_contig.bed.interval_list @@ -0,0 +1,10 @@ +@HD VN:1.6 SO:coordinate +@SQ SN:chr1 LN:1000000 +@SQ SN:chr2 LN:1000000 +@SQ SN:chr3 LN:1000000 +@SQ SN:chr4 LN:1000000 +@SQ SN:chr5 LN:1000000 +@SQ SN:chr6 LN:1000000 +@SQ SN:chr7 LN:1000000 +@SQ SN:chr8 LN:1000000 +chr1 1 1000000 + . diff --git a/testdata/picard/util/BedToIntervalListTest/multi_contig.bed b/testdata/picard/util/BedToIntervalListTest/multi_contig.bed new file mode 100644 index 0000000000..79c7e844b6 --- /dev/null +++ b/testdata/picard/util/BedToIntervalListTest/multi_contig.bed @@ -0,0 +1,4 @@ +chr1 100 2000 chr1_100_2000+ 11 + +chr1 3000 4000 chr1_3000_4000- 12 - +chr2 100 2000 chr2_100_2000+ 11 + +chr2 3000 4000 chr2_3000_4000- 12 - diff --git a/testdata/picard/util/BedToIntervalListTest/multi_contig.bed.interval_list b/testdata/picard/util/BedToIntervalListTest/multi_contig.bed.interval_list new file mode 100644 index 0000000000..57c5a8ccb5 --- /dev/null +++ b/testdata/picard/util/BedToIntervalListTest/multi_contig.bed.interval_list @@ -0,0 +1,13 @@ +@HD VN:1.6 SO:coordinate +@SQ SN:chr1 LN:1000000 +@SQ SN:chr2 LN:1000000 +@SQ SN:chr3 LN:1000000 +@SQ SN:chr4 LN:1000000 +@SQ SN:chr5 LN:1000000 +@SQ SN:chr6 LN:1000000 +@SQ SN:chr7 LN:1000000 +@SQ SN:chr8 LN:1000000 +chr1 101 2000 + chr1_100_2000+ +chr1 3001 4000 - chr1_3000_4000- +chr2 101 2000 + chr2_100_2000+ +chr2 3001 4000 - chr2_3000_4000- diff --git a/testdata/picard/util/BedToIntervalListTest/zero_length_interval_at_first_position_in_contig.bed b/testdata/picard/util/BedToIntervalListTest/zero_length_interval_at_first_position_in_contig.bed new file mode 100644 index 0000000000..3a149568d6 --- /dev/null +++ b/testdata/picard/util/BedToIntervalListTest/zero_length_interval_at_first_position_in_contig.bed @@ -0,0 +1 @@ +chr1 0 0 \ No newline at end of file diff --git a/testdata/picard/util/BedToIntervalListTest/zero_length_interval_at_first_position_in_contig.bed.interval_list b/testdata/picard/util/BedToIntervalListTest/zero_length_interval_at_first_position_in_contig.bed.interval_list new file mode 100644 index 0000000000..b8dc1800fd --- /dev/null +++ b/testdata/picard/util/BedToIntervalListTest/zero_length_interval_at_first_position_in_contig.bed.interval_list @@ -0,0 +1,10 @@ +@HD VN:1.6 SO:coordinate +@SQ SN:chr1 LN:1000000 +@SQ SN:chr2 LN:1000000 +@SQ SN:chr3 LN:1000000 +@SQ SN:chr4 LN:1000000 +@SQ SN:chr5 LN:1000000 +@SQ SN:chr6 LN:1000000 +@SQ SN:chr7 LN:1000000 +@SQ SN:chr8 LN:1000000 +chr1 1 0 + .