Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Inverted SoftClippedReadFilter to conform to filtering logic #8888

Merged
merged 5 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,8 @@ private ReadFilterArgumentDefinitions(){}

public static final String KEEP_INTERVAL_NAME = "keep-intervals";

public static final String SOFT_CLIPPED_RATIO_THRESHOLD = "soft-clipped-ratio-threshold";
public static final String SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD = "soft-clipped-leading-trailing-ratio";

public static final String INVERT_SOFT_CLIP_RATIO_FILTER = "invert-soft-clip-ratio-filter";
public static final String SOFT_CLIPPED_RATIO_THRESHOLD = "max-soft-clipped-ratio";
public static final String SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD = "max-soft-clipped-leading-trailing-ratio";

public static final String READ_FILTER_TAG = "read-filter-tag";
public static final String READ_FILTER_TAG_COMP = "read-filter-tag-comp";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,28 +25,21 @@ public final class SoftClippedReadFilter extends ReadFilter {
static final long serialVersionUID = 1L;
private final Logger logger = LogManager.getLogger(this.getClass());

@VisibleForTesting
@Argument(fullName = ReadFilterArgumentDefinitions.INVERT_SOFT_CLIP_RATIO_FILTER,
doc = "Inverts the results from this filter, causing all variants that would pass to fail and visa-versa.",
optional = true
)
boolean doInvertFilter = false;

@VisibleForTesting
@Argument(fullName = ReadFilterArgumentDefinitions.SOFT_CLIPPED_RATIO_THRESHOLD,
doc = "Threshold ratio of soft clipped bases (anywhere in the cigar string) to total bases in read for read to be filtered.",
optional = true,
mutex = { ReadFilterArgumentDefinitions.SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD }
)
Double minimumSoftClippedRatio = null;
Double maximumSoftClippedRatio = null;

@VisibleForTesting
@Argument(fullName = ReadFilterArgumentDefinitions.SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD,
doc = "Threshold ratio of soft clipped bases (leading / trailing the cigar string) to total bases in read for read to be filtered.",
optional = true,
mutex = {ReadFilterArgumentDefinitions.SOFT_CLIPPED_RATIO_THRESHOLD}
)
Double minimumLeadingTrailingSoftClippedRatio = null;
Double maximumLeadingTrailingSoftClippedRatio = null;

// Command line parser requires a no-arg constructor
public SoftClippedReadFilter() {}
Expand All @@ -61,15 +54,15 @@ private boolean testMinSoftClippedRatio(final GATKRead read) {
totalLength += element.getLength();
}

final double softClipRatio = ((double)numSoftClippedBases / (double)totalLength);
final double softClipRatio = totalLength != 0 ? ((double)numSoftClippedBases / (double)totalLength) : 0.0;

return softClipRatio > minimumSoftClippedRatio;
return softClipRatio <= maximumSoftClippedRatio;
}

private boolean testMinLeadingTrailingSoftClippedRatio(final GATKRead read) {

if ( read.getCigarElements().size() < 1 ) {
return false;
return true; //NOTE: in this edge case that the read should pass this filter as there are no cigar elements to have edge soft-clipping.
}

// Get the index of the last cigar element:
Expand All @@ -90,24 +83,25 @@ private boolean testMinLeadingTrailingSoftClippedRatio(final GATKRead read) {
.sum();

// Calculate the ratio:
final double softClipRatio = ((double)numLeadingTrailingSoftClippedBases / (double)totalLength);
final double softClipRatio = totalLength != 0 ? ((double)numLeadingTrailingSoftClippedBases / (double)totalLength) : 0.0;

return softClipRatio > minimumLeadingTrailingSoftClippedRatio;
return softClipRatio <= maximumLeadingTrailingSoftClippedRatio;
}

@Override
// NOTE: for read filters we always return true if the read passes the filter, and false if it doesn't.
public boolean test(final GATKRead read) {

final boolean result;

// NOTE: Since we have mutex'd the args for the clipping ratios, we only need to see if they
// have been specified. If they have, that's the filter logic we're using.
// If we specified the clipping ratio, we use the min sequence length test:
if ( minimumSoftClippedRatio != null ) {
if ( maximumSoftClippedRatio != null ) {
result = testMinSoftClippedRatio(read);
}
// If we specified the leading/trailing clipping ratio, we use the min sequence length test:
else if ( minimumLeadingTrailingSoftClippedRatio != null ) {
else if ( maximumLeadingTrailingSoftClippedRatio != null ) {
result = testMinLeadingTrailingSoftClippedRatio(read);
}
else {
Expand All @@ -118,10 +112,6 @@ else if ( minimumLeadingTrailingSoftClippedRatio != null ) {
);
}

// Check for if we want to invert our results:
if ( doInvertFilter ) {
return !result;
}
return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,10 @@ public void testOverclippedSoftClipRatioFilter(final String cigarString,
final boolean expectedResult) {

final SoftClippedReadFilter filter = new SoftClippedReadFilter();
filter.minimumSoftClippedRatio = clipRatio;
filter.maximumSoftClippedRatio = clipRatio;

final GATKRead read = buildSAMRead(cigarString);
Assert.assertEquals(filter.test(read), expectedResult, cigarString);

filter.doInvertFilter = true;
Assert.assertEquals(filter.test(read), !expectedResult, "Inverted case: " + cigarString);
}

@Test(dataProvider= "SoftClippedLeadingTrailingRatioDataProvider")
Expand All @@ -52,13 +49,10 @@ public void testSoftClippedLeadingTrailingRatioFilter(final String cigarString,
final boolean expectedResult) {

final SoftClippedReadFilter filter = new SoftClippedReadFilter();
filter.minimumLeadingTrailingSoftClippedRatio = clipRatio;
filter.maximumLeadingTrailingSoftClippedRatio = clipRatio;

final GATKRead read = buildSAMRead(cigarString);
Assert.assertEquals(filter.test(read), expectedResult, cigarString);

filter.doInvertFilter = true;
Assert.assertEquals(filter.test(read), !expectedResult, "Inverted case: " + cigarString);
}

@DataProvider(name = "SoftClipRatioDataProvider")
Expand All @@ -67,25 +61,25 @@ public Iterator<Object[]> softClipRatioDataProvider() {

// ---------------------------------------
// Null / trivial cases:
testData.add(new Object[] { "", 0.1, false });
testData.add(new Object[] { "10H", 0.1, false });
testData.add(new Object[] { "", 0.1, true });
testData.add(new Object[] { "10H", 0.1, true });

// ---------------------------------------
// Soft clip ratio test:

testData.add(new Object[] { "1S1M1S17M", 0.2, false }); // 2/20 = .100
testData.add(new Object[] { "1S1M2S17M", 0.2, false }); // 3/21 = .143
testData.add(new Object[] { "1S1M3S17M", 0.2, false }); // 4/22 = .182
testData.add(new Object[] { "1S1M4S17M", 0.2, true }); // 5/23 = .217
testData.add(new Object[] { "1S1M5S17M", 0.2, true }); // 6/24 = .250
testData.add(new Object[] { "1S1M6S17M", 0.2, true }); // 7/25 = .280
testData.add(new Object[] { "1S1M1S17M", 0.2, true }); // 2/20 = .100
testData.add(new Object[] { "1S1M2S17M", 0.2, true }); // 3/21 = .143
testData.add(new Object[] { "1S1M3S17M", 0.2, true }); // 4/22 = .182
testData.add(new Object[] { "1S1M4S17M", 0.2, false }); // 5/23 = .217
testData.add(new Object[] { "1S1M5S17M", 0.2, false }); // 6/24 = .250
testData.add(new Object[] { "1S1M6S17M", 0.2, false }); // 7/25 = .280

// ---------------------------------------
// Soft clip placement:

testData.add(new Object[] { "101S100M", 0.5, true });
testData.add(new Object[] { "100M101S", 0.5, true });
testData.add(new Object[] { "25H20S10M20S10M20S10M20S10M20S10M20S25H", 0.5, true });
testData.add(new Object[] { "101S100M", 0.5, false });
testData.add(new Object[] { "100M101S", 0.5, false });
testData.add(new Object[] { "25H20S10M20S10M20S10M20S10M20S10M20S25H", 0.5, false });

return testData.iterator();
}
Expand All @@ -96,42 +90,42 @@ public Iterator<Object[]> softClippedLeadingTrailingRatioDataProvider() {

// ---------------------------------------
// Null / trivial cases:
testData.add(new Object[] { "", 0.1, false });
testData.add(new Object[] { "10H", 0.1, false });
testData.add(new Object[] { "", 0.1, true });
testData.add(new Object[] { "10H", 0.1, true });

// ---------------------------------------
// Soft clip ratio test:

// Non-leading/-trailing
testData.add(new Object[] { "1S1M1S17M", 0.2, false }); // 2/20 = .100
testData.add(new Object[] { "1S1M2S17M", 0.2, false }); // 3/21 = .143
testData.add(new Object[] { "1S1M3S17M", 0.2, false }); // 4/22 = .182
testData.add(new Object[] { "1S1M4S17M", 0.2, false }); // 5/23 = .217
testData.add(new Object[] { "1S1M5S17M", 0.2, false }); // 6/24 = .250
testData.add(new Object[] { "1S1M6S17M", 0.2, false }); // 7/25 = .280
testData.add(new Object[] { "1S1M1S17M", 0.2, true }); // 2/20 = .100
testData.add(new Object[] { "1S1M2S17M", 0.2, true }); // 3/21 = .143
testData.add(new Object[] { "1S1M3S17M", 0.2, true }); // 4/22 = .182
testData.add(new Object[] { "1S1M4S17M", 0.2, true }); // 5/23 = .217
testData.add(new Object[] { "1S1M5S17M", 0.2, true }); // 6/24 = .250
testData.add(new Object[] { "1S1M6S17M", 0.2, true }); // 7/25 = .280

// Leading:
testData.add(new Object[] { "2S1S1S16M", 0.2, false }); // 2/20 = .100
testData.add(new Object[] { "3S1S1S16M", 0.2, false }); // 3/21 = .143
testData.add(new Object[] { "4S1S1S16M", 0.2, false }); // 4/22 = .182
testData.add(new Object[] { "5S1S1S16M", 0.2, true }); // 5/23 = .217
testData.add(new Object[] { "6S1S1S16M", 0.2, true }); // 6/24 = .250
testData.add(new Object[] { "7S1S1S16M", 0.2, true }); // 7/25 = .280
testData.add(new Object[] { "2S1S1S16M", 0.2, true }); // 2/20 = .100
testData.add(new Object[] { "3S1S1S16M", 0.2, true }); // 3/21 = .143
testData.add(new Object[] { "4S1S1S16M", 0.2, true }); // 4/22 = .182
testData.add(new Object[] { "5S1S1S16M", 0.2, false }); // 5/23 = .217
testData.add(new Object[] { "6S1S1S16M", 0.2, false }); // 6/24 = .250
testData.add(new Object[] { "7S1S1S16M", 0.2, false }); // 7/25 = .280

// Trailing:
testData.add(new Object[] { "1M1S16M2S", 0.2, false }); // 2/20 = .100
testData.add(new Object[] { "1M1S16M3S", 0.2, false }); // 3/21 = .143
testData.add(new Object[] { "1M1S16M4S", 0.2, false }); // 4/22 = .182
testData.add(new Object[] { "1M1S16M5S", 0.2, true }); // 5/23 = .217
testData.add(new Object[] { "1M1S16M6S", 0.2, true }); // 6/24 = .250
testData.add(new Object[] { "1M1S16M7S", 0.2, true }); // 7/25 = .280
testData.add(new Object[] { "1M1S16M2S", 0.2, true }); // 2/20 = .100
testData.add(new Object[] { "1M1S16M3S", 0.2, true }); // 3/21 = .143
testData.add(new Object[] { "1M1S16M4S", 0.2, true }); // 4/22 = .182
testData.add(new Object[] { "1M1S16M5S", 0.2, false }); // 5/23 = .217
testData.add(new Object[] { "1M1S16M6S", 0.2, false }); // 6/24 = .250
testData.add(new Object[] { "1M1S16M7S", 0.2, false }); // 7/25 = .280

// ---------------------------------------
// Soft clip placement:

testData.add(new Object[] { "101S100M", 0.5, true });
testData.add(new Object[] { "100M101S", 0.5, true });
testData.add(new Object[] { "25H20S10M20S10M20S10M20S10M20S10M20S25H", 0.5, false });
testData.add(new Object[] { "101S100M", 0.5, false });
testData.add(new Object[] { "100M101S", 0.5, false });
testData.add(new Object[] { "25H20S10M20S10M20S10M20S10M20S10M20S25H", 0.5, true });

return testData.iterator();
}
Expand Down
Loading