Skip to content

Commit

Permalink
Added some generic read tag/expression fliters for use on numeric tags (
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesemery authored May 10, 2022
1 parent bd640ea commit a3141f7
Show file tree
Hide file tree
Showing 7 changed files with 387 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,5 +56,11 @@ private ReadFilterArgumentDefinitions(){}

public static final String INVERT_SOFT_CLIP_RATIO_FILTER = "invert-soft-clip-ratio-filter";

public static final String READ_FILTER_TAG = "read-filter-tag";
public static final String READ_FILTER_TAG_COMP = "read-filter-tag-comp";
public static final String READ_FILTER_TAG_OP = "read-filter-tag-op";
public static final String READ_FILTER_EXPRESSION_LONG_NAME = "read-filter-expression";


public static final String MATE_TOO_DISTANT_LENGTH = "mate-too-distant-length";
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package org.broadinstitute.hellbender.engine.filters;

import htsjdk.samtools.util.Lazy;
import htsjdk.variant.variantcontext.VariantContextUtils;
import org.apache.commons.jexl2.Expression;
import org.apache.commons.jexl2.JexlContext;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.ReadFilterArgumentDefinitions;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.HaplotypeCallerEngine;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.read.GATKRead;

import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.function.BiFunction;

/**
* Keep only reads that the attributes of meet a given set of jexl expressions
*/
@DocumentedFeature(groupName = HelpConstants.DOC_CAT_READFILTERS, groupSummary = HelpConstants.DOC_CAT_READFILTERS_SUMMARY,
summary = "Keep only reads that meet all given jexl expressions (on their attributes)")
public final class JexlExpressionReadTagValueFilter extends ReadFilter {
private static final long serialVersionUID = 1L;
private static final Logger logger = LogManager.getLogger(JexlExpressionReadTagValueFilter.class);

@Argument(fullName=ReadFilterArgumentDefinitions.READ_FILTER_EXPRESSION_LONG_NAME, doc="One or more JEXL expressions used to filter", optional=false)
public List<String> filterExpressions = new ArrayList<>();

private Lazy<List<Expression>> jexlExprs = new Lazy<>(() -> {
List<Expression> l = new LinkedList<>();
for ( String expr : filterExpressions ) {
final Expression jexl = VariantContextUtils.engine.get().createExpression(expr);
logger.info("created jexl: " + jexl);
l.add(jexl);
}
return l;
});

private static class GATKReadJexlContext implements JexlContext {

final private GATKRead read;

GATKReadJexlContext(final GATKRead read) {
this.read = read;
}

@Override
public Object get(final String name) {
return read.getAttributeAsString(name);
}

@Override
public void set(final String name, final Object value) {
throw new IllegalArgumentException("setting attributes is not allowed");
}

@Override
public boolean has(final String name) {
return read.hasAttribute(name);
}
}

public JexlExpressionReadTagValueFilter() {
}

// convenience constructor for using a single jexl expression
public JexlExpressionReadTagValueFilter(final String jexlExpr) {
this.filterExpressions = Collections.singletonList(jexlExpr);
}

// convenience constructor for using a multiple jexl expressions
public JexlExpressionReadTagValueFilter(final List<String> jexlExprs) {
this.filterExpressions = jexlExprs;
}

@Override
public boolean test(final GATKRead read) {

// loop over expressions. At this point expressions are ANDed
for ( Expression expr : jexlExprs.get() ) {
Object v = expr.evaluate(new GATKReadJexlContext(read));
if (!v.equals(Boolean.TRUE)) {
return false;
}
}

// if here, all expressions matched
return true;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package org.broadinstitute.hellbender.engine.filters;

import htsjdk.samtools.util.Lazy;
import htsjdk.variant.variantcontext.VariantContextUtils;
import org.apache.commons.jexl2.JexlContext;
import org.apache.commons.jexl2.JexlEngine;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.ReadFilterArgumentDefinitions;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.read.GATKRead;

import org.apache.commons.jexl2.Expression;

import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.function.BiFunction;

/**
* Keep only reads that contain a tag with a value that agrees with parameters as specified.
*
*/
@DocumentedFeature(groupName = HelpConstants.DOC_CAT_READFILTERS, groupSummary = HelpConstants.DOC_CAT_READFILTERS_SUMMARY,
summary = "Keep only reads that contains a tag with value that agrees with parameters")
public final class ReadTagValueFilter extends ReadFilter {
private static final long serialVersionUID = 1L;

@Argument(fullName = ReadFilterArgumentDefinitions.READ_FILTER_TAG,
doc = "Look for this tag in read", optional=false)
public String readFilterTagName = null;

@Argument(fullName = ReadFilterArgumentDefinitions.READ_FILTER_TAG_COMP,
doc = "Compare value in tag to this value", optional=true)
public Float readFilterTagComp = 0F;

public enum Operator {
LESS((Float x, Float y) -> x < y),
LESS_OR_EQUAL((Float x, Float y) -> x <= y),
GREATER((Float x, Float y) -> x > y),
GREATER_OR_EQUAL((Float x, Float y) -> x >= y),
EQUAL(Float::equals),
NOT_EQUAL((Float x, Float y) -> !x.equals(y));

final BiFunction<Float, Float, Boolean> comp;

Operator(BiFunction<Float, Float, Boolean> comp) {
this.comp = comp;
}
}

@Argument(fullName = ReadFilterArgumentDefinitions.READ_FILTER_TAG_OP,
doc = "Compare value in tag to value with this operator. " +
"If T is the value in the tag, OP is the operation provided, " +
"and V is the value in read-filter-tag, then the " +
"read will pass the filter iff T OP V is true.", optional = true)
public Operator readFilterTagOp = Operator.EQUAL;

public ReadTagValueFilter() {
}

// convenience constructor for using <tag operator value> form
public ReadTagValueFilter(final String tagName, final float tagValue, final Operator operator) {
this.readFilterTagName = tagName;
this.readFilterTagComp = tagValue;
this.readFilterTagOp = operator;
}

@Override
public boolean test(final GATKRead read) {

return read.hasAttribute(this.readFilterTagName) &&
this.readFilterTagComp != null &&
this.readFilterTagOp.comp.apply(read.getAttributeAsFloat(this.readFilterTagName),
this.readFilterTagComp);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,16 @@ default Optional<SamPairUtil.PairOrientation> getPairOrientation() {
*/
Integer getAttributeAsInteger( final String attributeName );

/**
* Retrieve the value of a particular attribute typed as a floating point value.
*
* @param attributeName name of the attribute to retrieve
* @return float value of the requested attribute, or {@code null} if the attribute is not present
* @throws GATKException.ReadAttributeTypeMismatch if the attribute
* value cannot be typed as a float
*/
Float getAttributeAsFloat( final String attributeName );

/**
* Retrieve the value of a particular attribute typed as a String.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,27 @@ else if ( attributeValue instanceof Integer ) {
}
}

@Override
public Float getAttributeAsFloat( final String attributeName ) {
ReadUtils.assertAttributeNameIsLegal(attributeName);
final Object attributeValue = samRecord.getAttribute(attributeName);

if ( attributeValue == null ) {
return null;
}
else if ( attributeValue instanceof Float ) {
return (Float)attributeValue;
}
else {
try {
return Float.parseFloat(attributeValue.toString());
}
catch ( NumberFormatException e ) {
throw new GATKException.ReadAttributeTypeMismatch(attributeName, "integer", e);
}
}
}

@Override
public String getAttributeAsString( final String attributeName ) {
ReadUtils.assertAttributeNameIsLegal(attributeName);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package org.broadinstitute.hellbender.engine.filters;

import org.broadinstitute.hellbender.GATKBaseTest;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

public class JexlExpressionReadTagValueFilterUnitTest extends GATKBaseTest {

@Test(dataProvider= "JexlExpressionReadTagValueFilterDataProvider")
public void testJexlExpressionReadTagValueFilter(final String cigarString,
final Object[] attrsNameAndValue,
final String[] jexlExpr,
final boolean expectedResult,
final Class<? extends Throwable> expectedException) {

final JexlExpressionReadTagValueFilter filter;

// test different constructors here as well
if ( jexlExpr.length == 1 ) {
filter = new JexlExpressionReadTagValueFilter(jexlExpr[0]);
} else {
filter = new JexlExpressionReadTagValueFilter(Arrays.asList(jexlExpr));
}

final GATKRead read = ReadTagValueFilterUnitTest.buildSAMRead(cigarString, attrsNameAndValue);
try {
Assert.assertEquals(filter.test(read), expectedResult, cigarString);
Assert.assertNull(expectedException);
} catch (Throwable e) {
if ( expectedException == null || !expectedException.isInstance(e) )
throw e;
}
}

@DataProvider(name = "JexlExpressionReadTagValueFilterDataProvider")
public Iterator<Object[]> jexlExpressionReadTagValueFilterDataProvider() {
final List<Object[]> result = new LinkedList<>();

result.add(new Object[] {
"100M", // cigar
new Object[] {"TM", 1.0f}, // attributes
new String[] {"TM == 1.0"}, // jexl expressions
Boolean.TRUE, // expected
null // expected exception
});

result.add(new Object[] {
"100M", // cigar
new Object[] {"TM", 1.0f}, // attributes
new String[] {"TM < 1.0"}, // jexl expressions
Boolean.FALSE, // expected
null // expected exception
});

result.add(new Object[] {
"100M", // cigar
new Object[] {"TM", 1.0f}, // attributes
new String[] {"NO_SUCH < 1.0"}, // jexl expressions
Boolean.FALSE, // expected
IllegalArgumentException.class // expected exception
});

result.add(new Object[] {
"100M", // cigar
new Object[] {"TM", 1.0f, "TA", 2.0f}, // attributes
new String[] {"TM >= 1.0", "TA <= 2.0"}, // jexl expressions
Boolean.TRUE, // expected
null // expected exception
});

result.add(new Object[] {
"100M", // cigar
new Object[] {"TM", 1.0f, "TA", 2.0f}, // attributes
new String[] {"TM >= 1.0", "TA < 2.0"}, // jexl expressions
Boolean.FALSE, // expected
null // expected exception
});
return result.iterator();
}


}
Loading

0 comments on commit a3141f7

Please sign in to comment.