From 3f85616df1f2997fd6d1c32c47dc5d922a972c65 Mon Sep 17 00:00:00 2001 From: James Date: Mon, 24 Jun 2024 10:43:21 -0400 Subject: [PATCH 1/5] cleaned up SoftClippedReadFilter to conform to the logic for other filters --- .../ReadFilterArgumentDefinitions.java | 4 +- .../engine/filters/SoftClippedReadFilter.java | 25 +++--- .../SoftClippedReadFilterUnitTest.java | 80 +++++++++---------- 3 files changed, 55 insertions(+), 54 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/cmdline/ReadFilterArgumentDefinitions.java b/src/main/java/org/broadinstitute/hellbender/cmdline/ReadFilterArgumentDefinitions.java index 9b8d016e14d..9b6ff2287cb 100644 --- a/src/main/java/org/broadinstitute/hellbender/cmdline/ReadFilterArgumentDefinitions.java +++ b/src/main/java/org/broadinstitute/hellbender/cmdline/ReadFilterArgumentDefinitions.java @@ -53,8 +53,8 @@ private ReadFilterArgumentDefinitions(){} public static final String KEEP_INTERVAL_NAME = "keep-intervals"; - public static final String SOFT_CLIPPED_RATIO_THRESHOLD = "soft-clipped-ratio-threshold"; - public static final String SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD = "soft-clipped-leading-trailing-ratio"; + public static final String SOFT_CLIPPED_RATIO_THRESHOLD = "max-soft-clipped-ratio-threshold"; + public static final String SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD = "max-soft-clipped-leading-trailing-ratio"; public static final String INVERT_SOFT_CLIP_RATIO_FILTER = "invert-soft-clip-ratio-filter"; diff --git a/src/main/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilter.java b/src/main/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilter.java index fa0421255fa..be3ab586001 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilter.java @@ -25,12 +25,12 @@ public final class SoftClippedReadFilter extends ReadFilter { static final long serialVersionUID = 1L; private final Logger logger = LogManager.getLogger(this.getClass()); - @VisibleForTesting - @Argument(fullName = ReadFilterArgumentDefinitions.INVERT_SOFT_CLIP_RATIO_FILTER, - doc = "Inverts the results from this filter, causing all variants that would pass to fail and visa-versa.", - optional = true - ) - boolean doInvertFilter = false; +// @VisibleForTesting +// @Argument(fullName = ReadFilterArgumentDefinitions.INVERT_SOFT_CLIP_RATIO_FILTER, +// doc = "Inverts the results from this filter, causing all variants that would pass to fail and visa-versa.", +// optional = true +// ) +// boolean doInvertFilter = false; @VisibleForTesting @Argument(fullName = ReadFilterArgumentDefinitions.SOFT_CLIPPED_RATIO_THRESHOLD, @@ -38,7 +38,7 @@ public final class SoftClippedReadFilter extends ReadFilter { optional = true, mutex = { ReadFilterArgumentDefinitions.SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD } ) - Double minimumSoftClippedRatio = null; + Double maximumSoftClippedRatio = null; @VisibleForTesting @Argument(fullName = ReadFilterArgumentDefinitions.SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD, @@ -46,7 +46,7 @@ public final class SoftClippedReadFilter extends ReadFilter { optional = true, mutex = {ReadFilterArgumentDefinitions.SOFT_CLIPPED_RATIO_THRESHOLD} ) - Double minimumLeadingTrailingSoftClippedRatio = null; + Double maximumLeadingTrailingSoftClippedRatio = null; // Command line parser requires a no-arg constructor public SoftClippedReadFilter() {} @@ -63,7 +63,7 @@ private boolean testMinSoftClippedRatio(final GATKRead read) { final double softClipRatio = ((double)numSoftClippedBases / (double)totalLength); - return softClipRatio > minimumSoftClippedRatio; + return softClipRatio < maximumSoftClippedRatio; } private boolean testMinLeadingTrailingSoftClippedRatio(final GATKRead read) { @@ -92,10 +92,11 @@ private boolean testMinLeadingTrailingSoftClippedRatio(final GATKRead read) { // Calculate the ratio: final double softClipRatio = ((double)numLeadingTrailingSoftClippedBases / (double)totalLength); - return softClipRatio > minimumLeadingTrailingSoftClippedRatio; + return softClipRatio < maximumLeadingTrailingSoftClippedRatio; } @Override + // NOTE: for read filters we always return true if the read passes the filter, and false if it doesn't. public boolean test(final GATKRead read) { final boolean result; @@ -103,11 +104,11 @@ public boolean test(final GATKRead read) { // NOTE: Since we have mutex'd the args for the clipping ratios, we only need to see if they // have been specified. If they have, that's the filter logic we're using. // If we specified the clipping ratio, we use the min sequence length test: - if ( minimumSoftClippedRatio != null ) { + if ( maximumSoftClippedRatio != null ) { result = testMinSoftClippedRatio(read); } // If we specified the leading/trailing clipping ratio, we use the min sequence length test: - else if ( minimumLeadingTrailingSoftClippedRatio != null ) { + else if ( maximumLeadingTrailingSoftClippedRatio != null ) { result = testMinLeadingTrailingSoftClippedRatio(read); } else { diff --git a/src/test/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilterUnitTest.java b/src/test/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilterUnitTest.java index 668a1c62a92..72cb414ab59 100644 --- a/src/test/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilterUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilterUnitTest.java @@ -37,13 +37,13 @@ public void testOverclippedSoftClipRatioFilter(final String cigarString, final boolean expectedResult) { final SoftClippedReadFilter filter = new SoftClippedReadFilter(); - filter.minimumSoftClippedRatio = clipRatio; + filter.maximumSoftClippedRatio = clipRatio; final GATKRead read = buildSAMRead(cigarString); Assert.assertEquals(filter.test(read), expectedResult, cigarString); - filter.doInvertFilter = true; - Assert.assertEquals(filter.test(read), !expectedResult, "Inverted case: " + cigarString); +// filter.doInvertFilter = true; +// Assert.assertEquals(filter.test(read), !expectedResult, "Inverted case: " + cigarString); } @Test(dataProvider= "SoftClippedLeadingTrailingRatioDataProvider") @@ -52,13 +52,13 @@ public void testSoftClippedLeadingTrailingRatioFilter(final String cigarString, final boolean expectedResult) { final SoftClippedReadFilter filter = new SoftClippedReadFilter(); - filter.minimumLeadingTrailingSoftClippedRatio = clipRatio; + filter.maximumLeadingTrailingSoftClippedRatio = clipRatio; final GATKRead read = buildSAMRead(cigarString); Assert.assertEquals(filter.test(read), expectedResult, cigarString); - filter.doInvertFilter = true; - Assert.assertEquals(filter.test(read), !expectedResult, "Inverted case: " + cigarString); +// filter.doInvertFilter = true; +// Assert.assertEquals(filter.test(read), !expectedResult, "Inverted case: " + cigarString); } @DataProvider(name = "SoftClipRatioDataProvider") @@ -67,25 +67,25 @@ public Iterator softClipRatioDataProvider() { // --------------------------------------- // Null / trivial cases: - testData.add(new Object[] { "", 0.1, false }); - testData.add(new Object[] { "10H", 0.1, false }); + testData.add(new Object[] { "", 0.1, true }); + testData.add(new Object[] { "10H", 0.1, true }); // --------------------------------------- // Soft clip ratio test: - testData.add(new Object[] { "1S1M1S17M", 0.2, false }); // 2/20 = .100 - testData.add(new Object[] { "1S1M2S17M", 0.2, false }); // 3/21 = .143 - testData.add(new Object[] { "1S1M3S17M", 0.2, false }); // 4/22 = .182 - testData.add(new Object[] { "1S1M4S17M", 0.2, true }); // 5/23 = .217 - testData.add(new Object[] { "1S1M5S17M", 0.2, true }); // 6/24 = .250 - testData.add(new Object[] { "1S1M6S17M", 0.2, true }); // 7/25 = .280 + testData.add(new Object[] { "1S1M1S17M", 0.2, true }); // 2/20 = .100 + testData.add(new Object[] { "1S1M2S17M", 0.2, true }); // 3/21 = .143 + testData.add(new Object[] { "1S1M3S17M", 0.2, true }); // 4/22 = .182 + testData.add(new Object[] { "1S1M4S17M", 0.2, false }); // 5/23 = .217 + testData.add(new Object[] { "1S1M5S17M", 0.2, false }); // 6/24 = .250 + testData.add(new Object[] { "1S1M6S17M", 0.2, false }); // 7/25 = .280 // --------------------------------------- // Soft clip placement: - testData.add(new Object[] { "101S100M", 0.5, true }); - testData.add(new Object[] { "100M101S", 0.5, true }); - testData.add(new Object[] { "25H20S10M20S10M20S10M20S10M20S10M20S25H", 0.5, true }); + testData.add(new Object[] { "101S100M", 0.5, false }); + testData.add(new Object[] { "100M101S", 0.5, false }); + testData.add(new Object[] { "25H20S10M20S10M20S10M20S10M20S10M20S25H", 0.5, false }); return testData.iterator(); } @@ -96,42 +96,42 @@ public Iterator softClippedLeadingTrailingRatioDataProvider() { // --------------------------------------- // Null / trivial cases: - testData.add(new Object[] { "", 0.1, false }); - testData.add(new Object[] { "10H", 0.1, false }); + testData.add(new Object[] { "", 0.1, true }); + testData.add(new Object[] { "10H", 0.1, true }); // --------------------------------------- // Soft clip ratio test: // Non-leading/-trailing - testData.add(new Object[] { "1S1M1S17M", 0.2, false }); // 2/20 = .100 - testData.add(new Object[] { "1S1M2S17M", 0.2, false }); // 3/21 = .143 - testData.add(new Object[] { "1S1M3S17M", 0.2, false }); // 4/22 = .182 - testData.add(new Object[] { "1S1M4S17M", 0.2, false }); // 5/23 = .217 - testData.add(new Object[] { "1S1M5S17M", 0.2, false }); // 6/24 = .250 - testData.add(new Object[] { "1S1M6S17M", 0.2, false }); // 7/25 = .280 + testData.add(new Object[] { "1S1M1S17M", 0.2, true }); // 2/20 = .100 + testData.add(new Object[] { "1S1M2S17M", 0.2, true }); // 3/21 = .143 + testData.add(new Object[] { "1S1M3S17M", 0.2, true }); // 4/22 = .182 + testData.add(new Object[] { "1S1M4S17M", 0.2, true }); // 5/23 = .217 + testData.add(new Object[] { "1S1M5S17M", 0.2, true }); // 6/24 = .250 + testData.add(new Object[] { "1S1M6S17M", 0.2, true }); // 7/25 = .280 // Leading: - testData.add(new Object[] { "2S1S1S16M", 0.2, false }); // 2/20 = .100 - testData.add(new Object[] { "3S1S1S16M", 0.2, false }); // 3/21 = .143 - testData.add(new Object[] { "4S1S1S16M", 0.2, false }); // 4/22 = .182 - testData.add(new Object[] { "5S1S1S16M", 0.2, true }); // 5/23 = .217 - testData.add(new Object[] { "6S1S1S16M", 0.2, true }); // 6/24 = .250 - testData.add(new Object[] { "7S1S1S16M", 0.2, true }); // 7/25 = .280 + testData.add(new Object[] { "2S1S1S16M", 0.2, true }); // 2/20 = .100 + testData.add(new Object[] { "3S1S1S16M", 0.2, true }); // 3/21 = .143 + testData.add(new Object[] { "4S1S1S16M", 0.2, true }); // 4/22 = .182 + testData.add(new Object[] { "5S1S1S16M", 0.2, false }); // 5/23 = .217 + testData.add(new Object[] { "6S1S1S16M", 0.2, false }); // 6/24 = .250 + testData.add(new Object[] { "7S1S1S16M", 0.2, false }); // 7/25 = .280 // Trailing: - testData.add(new Object[] { "1M1S16M2S", 0.2, false }); // 2/20 = .100 - testData.add(new Object[] { "1M1S16M3S", 0.2, false }); // 3/21 = .143 - testData.add(new Object[] { "1M1S16M4S", 0.2, false }); // 4/22 = .182 - testData.add(new Object[] { "1M1S16M5S", 0.2, true }); // 5/23 = .217 - testData.add(new Object[] { "1M1S16M6S", 0.2, true }); // 6/24 = .250 - testData.add(new Object[] { "1M1S16M7S", 0.2, true }); // 7/25 = .280 + testData.add(new Object[] { "1M1S16M2S", 0.2, true }); // 2/20 = .100 + testData.add(new Object[] { "1M1S16M3S", 0.2, true }); // 3/21 = .143 + testData.add(new Object[] { "1M1S16M4S", 0.2, true }); // 4/22 = .182 + testData.add(new Object[] { "1M1S16M5S", 0.2, false }); // 5/23 = .217 + testData.add(new Object[] { "1M1S16M6S", 0.2, false }); // 6/24 = .250 + testData.add(new Object[] { "1M1S16M7S", 0.2, false }); // 7/25 = .280 // --------------------------------------- // Soft clip placement: - testData.add(new Object[] { "101S100M", 0.5, true }); - testData.add(new Object[] { "100M101S", 0.5, true }); - testData.add(new Object[] { "25H20S10M20S10M20S10M20S10M20S10M20S25H", 0.5, false }); + testData.add(new Object[] { "101S100M", 0.5, false }); + testData.add(new Object[] { "100M101S", 0.5, false }); + testData.add(new Object[] { "25H20S10M20S10M20S10M20S10M20S10M20S25H", 0.5, true }); return testData.iterator(); } From 853c0087c5a02dc35a5d2fd6196f3313cdc96cb9 Mon Sep 17 00:00:00 2001 From: James Date: Mon, 24 Jun 2024 15:01:09 -0400 Subject: [PATCH 2/5] more cleanup --- .../engine/filters/SoftClippedReadFilter.java | 21 +++++-------------- .../SoftClippedReadFilterUnitTest.java | 6 ------ 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilter.java b/src/main/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilter.java index be3ab586001..bc63df8e711 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilter.java @@ -25,13 +25,6 @@ public final class SoftClippedReadFilter extends ReadFilter { static final long serialVersionUID = 1L; private final Logger logger = LogManager.getLogger(this.getClass()); -// @VisibleForTesting -// @Argument(fullName = ReadFilterArgumentDefinitions.INVERT_SOFT_CLIP_RATIO_FILTER, -// doc = "Inverts the results from this filter, causing all variants that would pass to fail and visa-versa.", -// optional = true -// ) -// boolean doInvertFilter = false; - @VisibleForTesting @Argument(fullName = ReadFilterArgumentDefinitions.SOFT_CLIPPED_RATIO_THRESHOLD, doc = "Threshold ratio of soft clipped bases (anywhere in the cigar string) to total bases in read for read to be filtered.", @@ -61,15 +54,15 @@ private boolean testMinSoftClippedRatio(final GATKRead read) { totalLength += element.getLength(); } - final double softClipRatio = ((double)numSoftClippedBases / (double)totalLength); + final double softClipRatio = totalLength != 0 ? ((double)numSoftClippedBases / (double)totalLength) : 0.0; - return softClipRatio < maximumSoftClippedRatio; + return softClipRatio <= maximumSoftClippedRatio; } private boolean testMinLeadingTrailingSoftClippedRatio(final GATKRead read) { if ( read.getCigarElements().size() < 1 ) { - return false; + return true; //NOTE: in this edge case that the read should pass this filter as there are no cigar elements to have edge soft-clipping. } // Get the index of the last cigar element: @@ -90,9 +83,9 @@ private boolean testMinLeadingTrailingSoftClippedRatio(final GATKRead read) { .sum(); // Calculate the ratio: - final double softClipRatio = ((double)numLeadingTrailingSoftClippedBases / (double)totalLength); + final double softClipRatio = totalLength != 0 ? ((double)numLeadingTrailingSoftClippedBases / (double)totalLength) : 0.0; - return softClipRatio < maximumLeadingTrailingSoftClippedRatio; + return softClipRatio <= maximumLeadingTrailingSoftClippedRatio; } @Override @@ -119,10 +112,6 @@ else if ( maximumLeadingTrailingSoftClippedRatio != null ) { ); } - // Check for if we want to invert our results: - if ( doInvertFilter ) { - return !result; - } return result; } } diff --git a/src/test/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilterUnitTest.java b/src/test/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilterUnitTest.java index 72cb414ab59..b87062e56e7 100644 --- a/src/test/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilterUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilterUnitTest.java @@ -41,9 +41,6 @@ public void testOverclippedSoftClipRatioFilter(final String cigarString, final GATKRead read = buildSAMRead(cigarString); Assert.assertEquals(filter.test(read), expectedResult, cigarString); - -// filter.doInvertFilter = true; -// Assert.assertEquals(filter.test(read), !expectedResult, "Inverted case: " + cigarString); } @Test(dataProvider= "SoftClippedLeadingTrailingRatioDataProvider") @@ -56,9 +53,6 @@ public void testSoftClippedLeadingTrailingRatioFilter(final String cigarString, final GATKRead read = buildSAMRead(cigarString); Assert.assertEquals(filter.test(read), expectedResult, cigarString); - -// filter.doInvertFilter = true; -// Assert.assertEquals(filter.test(read), !expectedResult, "Inverted case: " + cigarString); } @DataProvider(name = "SoftClipRatioDataProvider") From de5efe07b72b501ab13522a189fa4533073a05e4 Mon Sep 17 00:00:00 2001 From: James Date: Thu, 27 Jun 2024 15:37:53 -0400 Subject: [PATCH 3/5] slight cleanup to the argument --- .../hellbender/cmdline/ReadFilterArgumentDefinitions.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/cmdline/ReadFilterArgumentDefinitions.java b/src/main/java/org/broadinstitute/hellbender/cmdline/ReadFilterArgumentDefinitions.java index 9b6ff2287cb..0591b8a48ca 100644 --- a/src/main/java/org/broadinstitute/hellbender/cmdline/ReadFilterArgumentDefinitions.java +++ b/src/main/java/org/broadinstitute/hellbender/cmdline/ReadFilterArgumentDefinitions.java @@ -53,11 +53,9 @@ private ReadFilterArgumentDefinitions(){} public static final String KEEP_INTERVAL_NAME = "keep-intervals"; - public static final String SOFT_CLIPPED_RATIO_THRESHOLD = "max-soft-clipped-ratio-threshold"; + public static final String SOFT_CLIPPED_RATIO_THRESHOLD = "max-soft-clipped-ratiogit "; public static final String SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD = "max-soft-clipped-leading-trailing-ratio"; - public static final String INVERT_SOFT_CLIP_RATIO_FILTER = "invert-soft-clip-ratio-filter"; - public static final String READ_FILTER_TAG = "read-filter-tag"; public static final String READ_FILTER_TAG_COMP = "read-filter-tag-comp"; public static final String READ_FILTER_TAG_OP = "read-filter-tag-op"; From c3eb022c4c3c6e4467e895690701cb9ebb32f63c Mon Sep 17 00:00:00 2001 From: James Date: Thu, 27 Jun 2024 15:43:11 -0400 Subject: [PATCH 4/5] typo --- .../hellbender/cmdline/ReadFilterArgumentDefinitions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/broadinstitute/hellbender/cmdline/ReadFilterArgumentDefinitions.java b/src/main/java/org/broadinstitute/hellbender/cmdline/ReadFilterArgumentDefinitions.java index 0591b8a48ca..47f4c6d0d34 100644 --- a/src/main/java/org/broadinstitute/hellbender/cmdline/ReadFilterArgumentDefinitions.java +++ b/src/main/java/org/broadinstitute/hellbender/cmdline/ReadFilterArgumentDefinitions.java @@ -53,7 +53,7 @@ private ReadFilterArgumentDefinitions(){} public static final String KEEP_INTERVAL_NAME = "keep-intervals"; - public static final String SOFT_CLIPPED_RATIO_THRESHOLD = "max-soft-clipped-ratiogit "; + public static final String SOFT_CLIPPED_RATIO_THRESHOLD = "max-soft-clipped-ratiogit"; public static final String SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD = "max-soft-clipped-leading-trailing-ratio"; public static final String READ_FILTER_TAG = "read-filter-tag"; From 742a45a0ecaf2dc0a61d4c40f09c0eb654613199 Mon Sep 17 00:00:00 2001 From: James Date: Thu, 27 Jun 2024 15:50:34 -0400 Subject: [PATCH 5/5] typo undo? why is it so hard to tell me if its what is in git? --- .../hellbender/cmdline/ReadFilterArgumentDefinitions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/broadinstitute/hellbender/cmdline/ReadFilterArgumentDefinitions.java b/src/main/java/org/broadinstitute/hellbender/cmdline/ReadFilterArgumentDefinitions.java index 47f4c6d0d34..d873e545e62 100644 --- a/src/main/java/org/broadinstitute/hellbender/cmdline/ReadFilterArgumentDefinitions.java +++ b/src/main/java/org/broadinstitute/hellbender/cmdline/ReadFilterArgumentDefinitions.java @@ -53,7 +53,7 @@ private ReadFilterArgumentDefinitions(){} public static final String KEEP_INTERVAL_NAME = "keep-intervals"; - public static final String SOFT_CLIPPED_RATIO_THRESHOLD = "max-soft-clipped-ratiogit"; + public static final String SOFT_CLIPPED_RATIO_THRESHOLD = "max-soft-clipped-ratio"; public static final String SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD = "max-soft-clipped-leading-trailing-ratio"; public static final String READ_FILTER_TAG = "read-filter-tag";