Skip to content

Commit

Permalink
added --inverted-read-filter argument to allow for selecting reads th…
Browse files Browse the repository at this point in the history
…at fail read filters from the command line easily (#8724)
  • Loading branch information
jamesemery authored Mar 11, 2024
1 parent 9af1be3 commit b81a638
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 6 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.broadinstitute.hellbender.cmdline.GATKPlugin;

import com.google.common.collect.Lists;
import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.hellbender.cmdline.ReadFilterArgumentDefinitions;
Expand All @@ -23,6 +24,11 @@ public class DefaultGATKReadFilterArgumentCollection extends GATKReadFilterArgum
doc="Read filters to be applied before analysis", optional=true, common = true)
public final List<String> userEnabledReadFilterNames = new ArrayList<>(); // preserve order

@Argument(fullName = ReadFilterArgumentDefinitions.INVERTED_READ_FILTER_LONG_NAME,
shortName = ReadFilterArgumentDefinitions.INVERTED_READ_FILTER_SHORT_NAME,
doc="Inverted (with flipped acceptance/failure conditions) read filters applied before analysis (after regular read filters).", optional=true, common = true)
public final List<String> userEnabledInvertedReadFilterNames = new ArrayList<>(); // preserve order

@Argument(fullName = ReadFilterArgumentDefinitions.DISABLE_READ_FILTER_LONG_NAME,
shortName = ReadFilterArgumentDefinitions.DISABLE_READ_FILTER_SHORT_NAME,
doc="Read filters to be disabled before analysis", optional=true, common = true)
Expand All @@ -34,12 +40,28 @@ public class DefaultGATKReadFilterArgumentCollection extends GATKReadFilterArgum
doc = "Disable all tool default read filters (WARNING: many tools will not function correctly without their default read filters on)", common = true, optional = true)
public boolean disableToolDefaultReadFilters = false;

private List<String> fullUserEnabledReadFilterNames = new ArrayList<>();
/** Returns the list with the union of inverted and non-inverted read filters provided by the user. Includes user default filters followed by the user inverted filters. */
public List<String> getAllUserEnabledReadFilterNames() {
if (fullUserEnabledReadFilterNames.isEmpty()) {
fullUserEnabledReadFilterNames.addAll(userEnabledReadFilterNames);
fullUserEnabledReadFilterNames.addAll(userEnabledInvertedReadFilterNames);
}
return fullUserEnabledReadFilterNames;
}

/** Returns the list with the read filters provided by the user, preserving the order. */
@Override
public List<String> getUserEnabledReadFilterNames() {
return userEnabledReadFilterNames;
}

/** Returns the list with the inverted read filters provided by the user, preserving the order. */
@Override
public List<String> getUserEnabledInvertedReadFilterNames() {
return userEnabledInvertedReadFilterNames;
}

/** Returns the set of filters disabled by the user. */
@Override
public List<String> getUserDisabledReadFilterNames() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@ public abstract class GATKReadFilterArgumentCollection implements Serializable {
*/
public abstract List<String> getUserEnabledReadFilterNames();

/**
* Returns the inverted enabled read filters. Applied after the regular read filters. Order should be honored.
*/
public abstract List<String> getUserEnabledInvertedReadFilterNames();

/**
* Returns the union of all of the user enabled read filters and the user enabled inverted read filters.
*/
public abstract List<String> getAllUserEnabledReadFilterNames();

/**
* Returns the disabled read filter names. Order should be honored.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ public boolean isDependentArgumentAllowed(final Class<?> predecessorClass) {
// for the user to subsequently disable the required predecessor. That case is caught during final
// validation done by the validateArguments method.
String predecessorName = predecessorClass.getSimpleName();
boolean isAllowed = userArgs.getUserEnabledReadFilterNames().contains(predecessorName)
boolean isAllowed = userArgs.getAllUserEnabledReadFilterNames().contains(predecessorName)
|| (toolDefaultReadFilters.get(predecessorName) != null);
if (isAllowed) {
// Keep track of the ones we allow so we can validate later that they weren't subsequently disabled
Expand Down Expand Up @@ -225,6 +225,10 @@ private List<ReadFilter> getUserEnabledInstances() {
ReadFilter rf = allDiscoveredReadFilters.get(s);
filters.add(rf);
});
userArgs.getUserEnabledInvertedReadFilterNames().forEach(s -> {
ReadFilter rf = allDiscoveredReadFilters.get(s).negate();
filters.add(rf);
});
return filters;
}

Expand Down Expand Up @@ -294,14 +298,23 @@ public void validateAndResolvePlugins() {
}

// throw if a filter is both enabled *and* disabled by the user
final Set<String> enabledAndDisabled = new HashSet<>(userArgs.getUserEnabledReadFilterNames());
final Set<String> enabledAndDisabled = new HashSet<>(userArgs.getAllUserEnabledReadFilterNames());
enabledAndDisabled.retainAll(userArgs.getUserDisabledReadFilterNames());
if (!enabledAndDisabled.isEmpty()) {
final String badFiltersList = Utils.join(", ", enabledAndDisabled);
throw new CommandLineException(
String.format("The read filter(s): %s are both enabled and disabled", badFiltersList));
}

// throw if a filter is enabled and inverted by the user
final Set<String> enabledAndInverted = new HashSet<>(userArgs.getUserEnabledReadFilterNames());
enabledAndInverted.retainAll(userArgs.getUserEnabledInvertedReadFilterNames());
if (!enabledAndInverted.isEmpty()) {
final String badFiltersList = Utils.join(", ", enabledAndDisabled);
throw new CommandLineException(
String.format("The read filter(s): %s are both enabled and inverted", badFiltersList));
}

// throw if a disabled filter doesn't exist; warn if it wasn't enabled by the tool in the first place
userArgs.getUserDisabledReadFilterNames().forEach(s -> {
if (!allDiscoveredReadFilters.containsKey(s)) {
Expand All @@ -319,6 +332,14 @@ public void validateAndResolvePlugins() {
logger.warn(String.format("Redundant enabled filter (%s) is enabled for this tool by default", s));
});

// throw if a filter is both default and inverted by the user as we do not want to inadvertently filter all reads from the input
final Set<String> redundantInv = new HashSet<>(toolDefaultReadFilters.keySet());
redundantInv.retainAll(userArgs.getUserEnabledInvertedReadFilterNames());
if (!userArgs.getDisableToolDefaultReadFilters() && redundantInv.size() > 0) {
throw new CommandLineException(
String.format("The read filter(s): %s exist as tool default filters and were inverted, disable tool default read filters in order to use this filter", redundantInv));
}

// Throw if args were specified for a filter that was also disabled, or that was not enabled by the
// tool by default.
//
Expand Down Expand Up @@ -346,7 +367,7 @@ public void validateAndResolvePlugins() {

// throw if a filter name was specified that has no corresponding instance
final Map<String, ReadFilter> requestedReadFilters = new HashMap<>();
userArgs.getUserEnabledReadFilterNames().forEach(s -> {
userArgs.getAllUserEnabledReadFilterNames().forEach(s -> {
ReadFilter trf = allDiscoveredReadFilters.get(s);
if (null == trf) {
if (!toolDefaultReadFilters.containsKey(s)) {
Expand All @@ -370,7 +391,7 @@ public void validateAndResolvePlugins() {
*/
public boolean isDisabledFilter(final String filterName) {
return userArgs.getUserDisabledReadFilterNames().contains(filterName)
|| (userArgs.getDisableToolDefaultReadFilters() && !userArgs.getUserEnabledReadFilterNames().contains(filterName));
|| (userArgs.getDisableToolDefaultReadFilters() && !userArgs.getAllUserEnabledReadFilterNames().contains(filterName));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ private ReadFilterArgumentDefinitions(){}
// GATKReadFilterPluginDescriptor arguments

public static final String READ_FILTER_LONG_NAME = "read-filter";
public static final String INVERTED_READ_FILTER_LONG_NAME = "inverted-read-filter";
public static final String DISABLE_READ_FILTER_LONG_NAME = "disable-read-filter";
public static final String DISABLE_TOOL_DEFAULT_READ_FILTERS = "disable-tool-default-read-filters";
public static final String READ_FILTER_SHORT_NAME = "RF";
public static final String INVERTED_READ_FILTER_SHORT_NAME = "XRF";
public static final String DISABLE_READ_FILTER_SHORT_NAME = "DF";

// ReadFilter arguments
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public void resetFilteredCount() {
filteredCount = 0;
}

public String getName() {return delegateFilter.getClass().getSimpleName();}
public String getName() {return delegateFilter.getName();}

// Returns a summary line with filter counts organized by level
public String getSummaryLine() {return getSummaryLineForLevel(0);}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ public abstract class ReadFilter implements Predicate<GATKRead>, Serializable {

public void setHeader(SAMFileHeader samHeader) { this.samHeader = samHeader; }

private static class ReadFilterNegate extends ReadFilter {
@VisibleForTesting
public static class ReadFilterNegate extends ReadFilter {
private static final long serialVersionUID = 1L;

private final ReadFilter delegate;
Expand All @@ -44,6 +45,11 @@ protected ReadFilterNegate(ReadFilter delegate) {
public boolean test( GATKRead read ) {
return !delegate.test(read);
}

@Override
public String getName() {
return "NOT " + delegate.getName();
}
}

protected abstract static class ReadFilterBinOp extends ReadFilter {
Expand Down Expand Up @@ -80,6 +86,11 @@ protected static class ReadFilterAnd extends ReadFilterBinOp {

@Override
public boolean test( GATKRead read ) { return lhs.test(read) && rhs.test(read); }

@Override
public String getName() {
return lhs.getName() + " AND " + rhs.getName();
}
}

private static class ReadFilterOr extends ReadFilterBinOp {
Expand All @@ -89,6 +100,11 @@ private static class ReadFilterOr extends ReadFilterBinOp {

@Override
public boolean test( GATKRead read ) { return lhs.test(read) || rhs.test(read);}

@Override
public String getName() {
return lhs.getName() + " OR " + rhs.getName();
}
}

// It turns out, this is necessary. Please don't remove it.
Expand Down Expand Up @@ -144,4 +160,11 @@ public ReadFilter or( ReadFilter other ) {

@Override
public abstract boolean test( GATKRead read );

/**
* Retruns the display name for logs for this filter
*/
String getName() {
return this.getClass().getSimpleName();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import javax.validation.constraints.AssertTrue;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
Expand Down Expand Up @@ -538,6 +539,44 @@ public void testEnableDisableConflict() {
"--" + ReadFilterArgumentDefinitions.DISABLE_READ_FILTER_LONG_NAME, "GoodCigarReadFilter"});
}

@Test
public void testDisableToolDefaultFiltersWithInvertedFilter() {
GATKReadFilterPluginDescriptor rfDesc = new GATKReadFilterPluginDescriptor(
Collections.singletonList(new ReadLengthReadFilter(1, 10)));
CommandLineParser clp = new CommandLineArgumentParser(
new Object(),
Collections.singletonList(rfDesc),
Collections.emptySet());
clp.parseArguments(nullMessageStream, new String[] {
"--" + ReadFilterArgumentDefinitions.DISABLE_TOOL_DEFAULT_READ_FILTERS,
"--" + ReadFilterArgumentDefinitions.INVERTED_READ_FILTER_LONG_NAME, ReadLengthReadFilter.class.getSimpleName()}
);
List<ReadFilter> allFilters = rfDesc.getResolvedInstances();
Assert.assertEquals(allFilters.size(), 1);
Assert.assertEquals(allFilters.get(0).getClass(), ReadFilter.ReadFilterNegate.class);
}

@Test(expectedExceptions = CommandLineException.class)
public void testEnableInvertConflict() {
CommandLineParser clp = new CommandLineArgumentParser(
new Object(),
Collections.singletonList(new GATKReadFilterPluginDescriptor(null)),
Collections.emptySet());
clp.parseArguments(nullMessageStream, new String[] {
"--RF", "GoodCigarReadFilter",
"--" + ReadFilterArgumentDefinitions.INVERTED_READ_FILTER_LONG_NAME, "GoodCigarReadFilter"});
}

@Test(expectedExceptions = CommandLineException.class)
public void testInvertToolDefaultConflict() {
CommandLineParser clp = new CommandLineArgumentParser(
new Object(),
Collections.singletonList(new GATKReadFilterPluginDescriptor(List.of(new ReadFilterLibrary.GoodCigarReadFilter()))),
Collections.emptySet());
clp.parseArguments(nullMessageStream, new String[] {
"--" + ReadFilterArgumentDefinitions.INVERTED_READ_FILTER_LONG_NAME, "GoodCigarReadFilter"});
}

@Test
public void testPreserveCommandLineOrder() {
List<ReadFilter> orderedDefaults = new ArrayList<>();
Expand Down Expand Up @@ -665,6 +704,32 @@ public void testReadLengthFilter() {
Assert.assertTrue(rf.test(read));
}

@Test
public void testInvertReadLengthFilter() {
final SAMFileHeader header = createHeaderWithReadGroups();
final GATKRead read = simpleGoodRead(header);

CommandLineParser clp = new CommandLineArgumentParser(
new Object(),
Collections.singletonList(new GATKReadFilterPluginDescriptor(null)),
Collections.emptySet());
String[] args = {
"--" + ReadFilterArgumentDefinitions.INVERTED_READ_FILTER_LONG_NAME, ReadLengthReadFilter.class.getSimpleName(),
"--" + ReadFilterArgumentDefinitions.MIN_READ_LENGTH_ARG_NAME, "10",
"--" + ReadFilterArgumentDefinitions.MAX_READ_LENGTH_ARG_NAME, "20"
};
clp.parseArguments(nullMessageStream, args);
ReadFilter rf = instantiateFilter(clp, header);

read.setBases(new byte[5]);
Assert.assertTrue(rf.test(read));
read.setBases(new byte[25]);
Assert.assertTrue(rf.test(read));

read.setBases(new byte[15]);
Assert.assertFalse(rf.test(read));
}

final private static String readgroupName = "ReadGroup1";

@DataProvider(name="testDefaultFilters")
Expand Down

0 comments on commit b81a638

Please sign in to comment.