Skip to content

Commit

Permalink
Add flag to keep zero length intervals when converting bed -> interva…
Browse files Browse the repository at this point in the history
…l_list (#1928)

* Add flag to keep zero length intervals when converting bed -> interval_list
  • Loading branch information
rickymagner authored Dec 15, 2023
1 parent 98c86c4 commit 5e2c94a
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 7 deletions.
31 changes: 29 additions & 2 deletions src/main/java/picard/util/BedToIntervalList.java
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,13 @@ public class BedToIntervalList extends CommandLineProgram {
@Argument(doc = "If true, entries that are on contig-names that are missing from the provided dictionary will be dropped.")
public boolean DROP_MISSING_CONTIGS = false;

@Argument(doc = "If true, write length zero intervals in input bed file to resulting interval list file.")
public boolean KEEP_LENGTH_ZERO_INTERVALS = false;

private final Log LOG = Log.getInstance(getClass());
private int missingIntervals = 0;
private int missingRegion = 0;
private int lengthZeroIntervals = 0;

@Override
protected int doWork() {
Expand Down Expand Up @@ -168,8 +172,18 @@ protected int doWork() {
}

final boolean isNegativeStrand = bedFeature.getStrand() == Strand.NEGATIVE;
final Interval interval = new Interval(sequenceName, start, end, isNegativeStrand, name);
intervalList.add(interval);

// Use end+1 since bed start gets shifted by 1 using 1-based coordinates
if ((start == end+1) && !KEEP_LENGTH_ZERO_INTERVALS) {
LOG.info(String.format("Skipping writing length zero interval at %s:%d-%d.", sequenceName, start, end));
} else {
final Interval interval = new Interval(sequenceName, start, end, isNegativeStrand, name);
intervalList.add(interval);
}

if (start == end+1) {
lengthZeroIntervals++;
}

progressLogger.record(sequenceName, start);
}
Expand All @@ -182,6 +196,19 @@ protected int doWork() {
LOG.warn(String.format("There were %d missing regions with a total of %d bases", missingIntervals, missingRegion));
}
}

if (!KEEP_LENGTH_ZERO_INTERVALS) {
if (lengthZeroIntervals == 0) {
LOG.info("No input regions had length zero, so none were skipped.");
} else {
LOG.info(String.format("Skipped writing a total of %d entries with length zero in the input file.", lengthZeroIntervals));
}
} else {
if (lengthZeroIntervals > 0) {
LOG.warn(String.format("Input file had %d entries with length zero. Run with the KEEP_LENGTH_ZERO_INTERVALS flag set to false to remove these.", lengthZeroIntervals));
}
}

// Sort and write the output
IntervalList out = intervalList;
if (SORT) {
Expand Down
23 changes: 18 additions & 5 deletions src/test/java/picard/util/BedToIntervalListTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public class BedToIntervalListTest {

private static final String TEST_DATA_DIR = "testdata/picard/util/BedToIntervalListTest";

private void doTest(final String inputBed, final String header) throws IOException, SAMException {
private void doTest(final String inputBed, final String header, boolean keepLengthZero) throws IOException, SAMException {
final File outputFile = File.createTempFile("bed_to_interval_list_test.", ".interval_list");
outputFile.deleteOnExit();
final BedToIntervalList program = new BedToIntervalList();
Expand All @@ -25,6 +25,7 @@ private void doTest(final String inputBed, final String header) throws IOExcepti
program.SEQUENCE_DICTIONARY = new File(TEST_DATA_DIR, header);
program.OUTPUT = outputFile;
program.UNIQUE = true;
program.KEEP_LENGTH_ZERO_INTERVALS = keepLengthZero;
program.doWork();

// Assert they are equal
Expand All @@ -34,25 +35,30 @@ private void doTest(final String inputBed, final String header) throws IOExcepti

@Test(dataProvider = "testBedToIntervalListDataProvider")
public void testBedToIntervalList(final String inputBed) throws IOException {
doTest(inputBed, "header.sam");
doTest(inputBed, "header.sam", true);
}

// test a fixed bed file using different dictionaries
@Test(dataProvider = "testBedToIntervalListSequenceDictionaryDataProvider")
public void testBedToIntervalListSequenceDictionary(final String dictionary) throws IOException {
doTest("seq_dict_test.bed", dictionary);
doTest("seq_dict_test.bed", dictionary, true);
}

// test for back dictionaries - we expect these to throw exceptions
@Test(dataProvider = "testBedToIntervalListSequenceDictionaryBadDataProvider",
expectedExceptions = {SAMException.class, PicardException.class})
public void testBedToIntervalListBadSequenceDictionary(final String dictionary) throws IOException {
doTest("seq_dict_test.bed", dictionary);
doTest("seq_dict_test.bed", dictionary, true);
}

@Test(dataProvider = "testBedToIntervalListOutOfBoundsDataProvider", expectedExceptions = PicardException.class)
public void testBedToIntervalListOutOfBounds(final String inputBed) throws IOException {
doTest(inputBed, "header.sam");
doTest(inputBed, "header.sam", true);
}

@Test(dataProvider = "testLengthZeroIntervalsSkippedProvider")
public void testLengthZeroIntervalsSkipped(final String inputBed) throws IOException {
doTest(inputBed, "header.sam", false);
}

@DataProvider
Expand Down Expand Up @@ -103,4 +109,11 @@ public Object[][] testBedToIntervalListOutOfBoundsDataProvider() {
{"off_by_one_interval.bed"}
};
}

@DataProvider
public Object[][] testLengthZeroIntervalsSkippedProvider() {
return new Object[][]{
{"zero_length_test.bed"}
};
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
chr1 1000 1000
chr2 1000 2000
chr3 1000 1000
chr4 1000 2000
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
@HD VN:1.6 SO:coordinate
@SQ SN:chr1 LN:1000000
@SQ SN:chr2 LN:1000000
@SQ SN:chr3 LN:1000000
@SQ SN:chr4 LN:1000000
@SQ SN:chr5 LN:1000000
@SQ SN:chr6 LN:1000000
@SQ SN:chr7 LN:1000000
@SQ SN:chr8 LN:1000000
chr2 1001 2000 + .
chr4 1001 2000 + .

0 comments on commit 5e2c94a

Please sign in to comment.