Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added --inverted-read-filter argument to allow for selecting reads failing read filters from the command line easily #8724

Merged
merged 3 commits into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.broadinstitute.hellbender.cmdline.GATKPlugin;

import com.google.common.collect.Lists;
import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.hellbender.cmdline.ReadFilterArgumentDefinitions;
Expand All @@ -23,6 +24,11 @@ public class DefaultGATKReadFilterArgumentCollection extends GATKReadFilterArgum
doc="Read filters to be applied before analysis", optional=true, common = true)
public final List<String> userEnabledReadFilterNames = new ArrayList<>(); // preserve order

@Argument(fullName = ReadFilterArgumentDefinitions.INVERTED_READ_FILTER_LONG_NAME,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I thought you were going to implement special handling of not in the read filter name which sounded complicated. This is much clearer.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I considered it but when we add a read filter named Not___ReadFilter i really didn't want to deal with NotNot___ReadFilters cropping up...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does mean that you can't interleave the inverted filters with regular filters in any arbitrary order however...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could potentially allow !ReadFilter because ! shouldn't be allowed in read filter names, but yeah, it would be more complicated I think.

shortName = ReadFilterArgumentDefinitions.INVERTED_READ_FILTER_SHORT_NAME,
doc="Inverted (with flipped acceptance/failure conditions) read filters applied before analysis (after regular read filters).", optional=true, common = true)
public final List<String> userEnabledInvertedReadFilterNames = new ArrayList<>(); // preserve order

@Argument(fullName = ReadFilterArgumentDefinitions.DISABLE_READ_FILTER_LONG_NAME,
shortName = ReadFilterArgumentDefinitions.DISABLE_READ_FILTER_SHORT_NAME,
doc="Read filters to be disabled before analysis", optional=true, common = true)
Expand All @@ -34,12 +40,28 @@ public class DefaultGATKReadFilterArgumentCollection extends GATKReadFilterArgum
doc = "Disable all tool default read filters (WARNING: many tools will not function correctly without their default read filters on)", common = true, optional = true)
public boolean disableToolDefaultReadFilters = false;

private List<String> fullUserEnabledReadFilterNames = new ArrayList<>();
/** Returns the list with the union of inverted and non-inverted read filters provided by the user. Includes user default filters followed by the user inverted filters. */
public List<String> getAllUserEnabledReadFilterNames() {
jamesemery marked this conversation as resolved.
Show resolved Hide resolved
if (fullUserEnabledReadFilterNames.isEmpty()) {
fullUserEnabledReadFilterNames.addAll(userEnabledReadFilterNames);
fullUserEnabledReadFilterNames.addAll(userEnabledInvertedReadFilterNames);
}
return fullUserEnabledReadFilterNames;
}

/** Returns the list with the read filters provided by the user, preserving the order. */
@Override
public List<String> getUserEnabledReadFilterNames() {
return userEnabledReadFilterNames;
}

/** Returns the list with the inverted read filters provided by the user, preserving the order. */
@Override
public List<String> getUserEnabledInvertedReadFilterNames() {
return userEnabledInvertedReadFilterNames;
}

/** Returns the set of filters disabled by the user. */
@Override
public List<String> getUserDisabledReadFilterNames() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@ public abstract class GATKReadFilterArgumentCollection implements Serializable {
*/
public abstract List<String> getUserEnabledReadFilterNames();

/**
* Returns the inverted enabled read filters. Applied after the regular read filters. Order should be honored.
*/
public abstract List<String> getUserEnabledInvertedReadFilterNames();

/**
* Returns the union of all of the user enabled read filters and the user enabled inverted read filters.
jamesemery marked this conversation as resolved.
Show resolved Hide resolved
*/
public abstract List<String> getAllUserEnabledReadFilterNames();

/**
* Returns the disabled read filter names. Order should be honored.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ public boolean isDependentArgumentAllowed(final Class<?> predecessorClass) {
// for the user to subsequently disable the required predecessor. That case is caught during final
// validation done by the validateArguments method.
String predecessorName = predecessorClass.getSimpleName();
boolean isAllowed = userArgs.getUserEnabledReadFilterNames().contains(predecessorName)
boolean isAllowed = userArgs.getAllUserEnabledReadFilterNames().contains(predecessorName)
|| (toolDefaultReadFilters.get(predecessorName) != null);
if (isAllowed) {
// Keep track of the ones we allow so we can validate later that they weren't subsequently disabled
Expand Down Expand Up @@ -225,6 +225,10 @@ private List<ReadFilter> getUserEnabledInstances() {
ReadFilter rf = allDiscoveredReadFilters.get(s);
filters.add(rf);
});
userArgs.getUserEnabledInvertedReadFilterNames().forEach(s -> {
ReadFilter rf = allDiscoveredReadFilters.get(s).negate();
filters.add(rf);
});
return filters;
}

Expand Down Expand Up @@ -294,14 +298,23 @@ public void validateAndResolvePlugins() {
}

// throw if a filter is both enabled *and* disabled by the user
final Set<String> enabledAndDisabled = new HashSet<>(userArgs.getUserEnabledReadFilterNames());
final Set<String> enabledAndDisabled = new HashSet<>(userArgs.getAllUserEnabledReadFilterNames());
enabledAndDisabled.retainAll(userArgs.getUserDisabledReadFilterNames());
if (!enabledAndDisabled.isEmpty()) {
final String badFiltersList = Utils.join(", ", enabledAndDisabled);
throw new CommandLineException(
String.format("The read filter(s): %s are both enabled and disabled", badFiltersList));
}

// throw if a filter is enabled and inverted by the user
final Set<String> enabledAndInverted = new HashSet<>(userArgs.getUserEnabledReadFilterNames());
enabledAndInverted.retainAll(userArgs.getUserEnabledInvertedReadFilterNames());
if (!enabledAndInverted.isEmpty()) {
final String badFiltersList = Utils.join(", ", enabledAndDisabled);
throw new CommandLineException(
String.format("The read filter(s): %s are both enabled and inverted", badFiltersList));
}

// throw if a disabled filter doesn't exist; warn if it wasn't enabled by the tool in the first place
userArgs.getUserDisabledReadFilterNames().forEach(s -> {
if (!allDiscoveredReadFilters.containsKey(s)) {
Expand All @@ -319,6 +332,14 @@ public void validateAndResolvePlugins() {
logger.warn(String.format("Redundant enabled filter (%s) is enabled for this tool by default", s));
});

// throw if a filter is both default and inverted by the user as we do not want to inadvertently filter all reads from the input
final Set<String> redundantInv = new HashSet<>(toolDefaultReadFilters.keySet());
redundantInv.retainAll(userArgs.getUserEnabledInvertedReadFilterNames());
if (!userArgs.getDisableToolDefaultReadFilters() && redundantInv.size() > 0) {
throw new CommandLineException(
String.format("The read filter(s): %s exist as tool default filters and were inverted, disable tool default read filters in order to use this filter", redundantInv));
}

// Throw if args were specified for a filter that was also disabled, or that was not enabled by the
// tool by default.
//
Expand Down Expand Up @@ -346,7 +367,7 @@ public void validateAndResolvePlugins() {

// throw if a filter name was specified that has no corresponding instance
final Map<String, ReadFilter> requestedReadFilters = new HashMap<>();
userArgs.getUserEnabledReadFilterNames().forEach(s -> {
userArgs.getAllUserEnabledReadFilterNames().forEach(s -> {
ReadFilter trf = allDiscoveredReadFilters.get(s);
if (null == trf) {
if (!toolDefaultReadFilters.containsKey(s)) {
Expand All @@ -370,7 +391,7 @@ public void validateAndResolvePlugins() {
*/
public boolean isDisabledFilter(final String filterName) {
return userArgs.getUserDisabledReadFilterNames().contains(filterName)
|| (userArgs.getDisableToolDefaultReadFilters() && !userArgs.getUserEnabledReadFilterNames().contains(filterName));
|| (userArgs.getDisableToolDefaultReadFilters() && !userArgs.getAllUserEnabledReadFilterNames().contains(filterName));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ private ReadFilterArgumentDefinitions(){}
// GATKReadFilterPluginDescriptor arguments

public static final String READ_FILTER_LONG_NAME = "read-filter";
public static final String INVERTED_READ_FILTER_LONG_NAME = "inverted-read-filter";
public static final String DISABLE_READ_FILTER_LONG_NAME = "disable-read-filter";
public static final String DISABLE_TOOL_DEFAULT_READ_FILTERS = "disable-tool-default-read-filters";
public static final String READ_FILTER_SHORT_NAME = "RF";
public static final String INVERTED_READ_FILTER_SHORT_NAME = "XRF";
public static final String DISABLE_READ_FILTER_SHORT_NAME = "DF";

// ReadFilter arguments
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public void resetFilteredCount() {
filteredCount = 0;
}

public String getName() {return delegateFilter.getClass().getSimpleName();}
public String getName() {return delegateFilter.getName();}

// Returns a summary line with filter counts organized by level
public String getSummaryLine() {return getSummaryLineForLevel(0);}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ public abstract class ReadFilter implements Predicate<GATKRead>, Serializable {

public void setHeader(SAMFileHeader samHeader) { this.samHeader = samHeader; }

private static class ReadFilterNegate extends ReadFilter {
@VisibleForTesting
public static class ReadFilterNegate extends ReadFilter {
private static final long serialVersionUID = 1L;

private final ReadFilter delegate;
Expand All @@ -44,6 +45,11 @@ protected ReadFilterNegate(ReadFilter delegate) {
public boolean test( GATKRead read ) {
return !delegate.test(read);
}

@Override
public String getName() {
return "NOT " + delegate.getName();
}
}

protected abstract static class ReadFilterBinOp extends ReadFilter {
Expand Down Expand Up @@ -80,6 +86,11 @@ protected static class ReadFilterAnd extends ReadFilterBinOp {

@Override
public boolean test( GATKRead read ) { return lhs.test(read) && rhs.test(read); }

@Override
public String getName() {
return lhs.getName() + " AND " + rhs.getName();
}
}

private static class ReadFilterOr extends ReadFilterBinOp {
Expand All @@ -89,6 +100,11 @@ private static class ReadFilterOr extends ReadFilterBinOp {

@Override
public boolean test( GATKRead read ) { return lhs.test(read) || rhs.test(read);}

@Override
public String getName() {
return lhs.getName() + " OR " + rhs.getName();
}
}

// It turns out, this is necessary. Please don't remove it.
Expand Down Expand Up @@ -144,4 +160,11 @@ public ReadFilter or( ReadFilter other ) {

@Override
public abstract boolean test( GATKRead read );

/**
* Retruns the display name for logs for this filter
*/
String getName() {
return this.getClass().getSimpleName();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import javax.validation.constraints.AssertTrue;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
Expand Down Expand Up @@ -538,6 +539,44 @@ public void testEnableDisableConflict() {
"--" + ReadFilterArgumentDefinitions.DISABLE_READ_FILTER_LONG_NAME, "GoodCigarReadFilter"});
}

@Test
public void testDisableToolDefaultFiltersWithInvertedFilter() {
GATKReadFilterPluginDescriptor rfDesc = new GATKReadFilterPluginDescriptor(
Collections.singletonList(new ReadLengthReadFilter(1, 10)));
CommandLineParser clp = new CommandLineArgumentParser(
new Object(),
Collections.singletonList(rfDesc),
Collections.emptySet());
clp.parseArguments(nullMessageStream, new String[] {
"--" + ReadFilterArgumentDefinitions.DISABLE_TOOL_DEFAULT_READ_FILTERS,
"--" + ReadFilterArgumentDefinitions.INVERTED_READ_FILTER_LONG_NAME, ReadLengthReadFilter.class.getSimpleName()}
);
List<ReadFilter> allFilters = rfDesc.getResolvedInstances();
Assert.assertEquals(allFilters.size(), 1);
Assert.assertEquals(allFilters.get(0).getClass(), ReadFilter.ReadFilterNegate.class);
}

@Test(expectedExceptions = CommandLineException.class)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might either add a new more specific exception type or scrape the message. It's really easy to tests like this failing for other reasons like typos and not the one you expect.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

idk... commandline is a very specfic exception and i'm not super inclined to split the exception up into dozens of hyper specific commandline exceptions. Furthermore.... if you look up in the class this is already all over the place in these tests. No action

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm in favor of hyper specific exceptions all over the place but that's fine. Did you check that it's actually erroring for the reasons you expect? I says this from experience constructing commandline tests that turned out to be typos and not valid tests.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not 100% sure, but I think there are issues with inverted and disabled read filters. You should be allowed to
a) disable the tool defaults and add back one of them but inverted.
b) disable a specific read filter and add it back inverted

Could you add tests for those cases?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will add a test for A, B is very thorny... i'm going to say thats an edge case and not handle it as it is a whole lot more complicated for a very niche case (if you want to invert a default filter you must construct your own filter list)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, that seems fine. I doubt we'll ever have someone complain.

public void testEnableInvertConflict() {
CommandLineParser clp = new CommandLineArgumentParser(
new Object(),
Collections.singletonList(new GATKReadFilterPluginDescriptor(null)),
Collections.emptySet());
clp.parseArguments(nullMessageStream, new String[] {
"--RF", "GoodCigarReadFilter",
"--" + ReadFilterArgumentDefinitions.INVERTED_READ_FILTER_LONG_NAME, "GoodCigarReadFilter"});
}

@Test(expectedExceptions = CommandLineException.class)
public void testInvertToolDefaultConflict() {
CommandLineParser clp = new CommandLineArgumentParser(
new Object(),
Collections.singletonList(new GATKReadFilterPluginDescriptor(List.of(new ReadFilterLibrary.GoodCigarReadFilter()))),
Collections.emptySet());
clp.parseArguments(nullMessageStream, new String[] {
"--" + ReadFilterArgumentDefinitions.INVERTED_READ_FILTER_LONG_NAME, "GoodCigarReadFilter"});
}

@Test
public void testPreserveCommandLineOrder() {
List<ReadFilter> orderedDefaults = new ArrayList<>();
Expand Down Expand Up @@ -665,6 +704,32 @@ public void testReadLengthFilter() {
Assert.assertTrue(rf.test(read));
}

@Test
public void testInvertReadLengthFilter() {
final SAMFileHeader header = createHeaderWithReadGroups();
final GATKRead read = simpleGoodRead(header);

CommandLineParser clp = new CommandLineArgumentParser(
new Object(),
Collections.singletonList(new GATKReadFilterPluginDescriptor(null)),
Collections.emptySet());
String[] args = {
"--" + ReadFilterArgumentDefinitions.INVERTED_READ_FILTER_LONG_NAME, ReadLengthReadFilter.class.getSimpleName(),
"--" + ReadFilterArgumentDefinitions.MIN_READ_LENGTH_ARG_NAME, "10",
"--" + ReadFilterArgumentDefinitions.MAX_READ_LENGTH_ARG_NAME, "20"
};
clp.parseArguments(nullMessageStream, args);
ReadFilter rf = instantiateFilter(clp, header);

read.setBases(new byte[5]);
Assert.assertTrue(rf.test(read));
read.setBases(new byte[25]);
Assert.assertTrue(rf.test(read));

read.setBases(new byte[15]);
Assert.assertFalse(rf.test(read));
}

final private static String readgroupName = "ReadGroup1";

@DataProvider(name="testDefaultFilters")
Expand Down
Loading