Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GL-18 Funcotation filter for homvar & compound hets on two autosomal recessive genes #5843

Merged
merged 7 commits into from
Apr 2, 2019
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ if [ -z "${IS_NOT_RUN_UNIT_TESTS}" ] ; then
chmod -R a+w ${STAGING_ABSOLUTE_PATH}/src/test/resources

cp build.gradle build.gradle.backup
cp /scripts/docker/dockertest.gradle .
cp scripts/docker/dockertest.gradle .

echo docker run ${REMOVE_CONTAINER_STRING} -v ${STAGING_ABSOLUTE_PATH}:/gatkCloneMountPoint -v ${STAGING_ABSOLUTE_PATH}/testJars:/jars -t ${REPO_PRJ}:${GITHUB_TAG} bash /root/run_unit_tests.sh
docker run ${REMOVE_CONTAINER_STRING} -v ${STAGING_ABSOLUTE_PATH}:/gatkCloneMountPoint -v ${STAGING_ABSOLUTE_PATH}/testJars:/jars -t ${REPO_PRJ}:${GITHUB_TAG} bash /root/run_unit_tests.sh
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.ReadsContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.engine.VariantWalker;
import org.broadinstitute.hellbender.engine.TwoPassVariantWalker;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.ArHomvarFilter;
import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.AutosomalRecessiveConstants;
import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.ClinVarFilter;
import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.FuncotationFilter;
import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.LmmFilter;
Expand All @@ -28,9 +30,11 @@
import java.io.File;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
Expand All @@ -57,7 +61,7 @@
)
@DocumentedFeature
@ExperimentalFeature
public class FilterFuncotations extends VariantWalker {
public class FilterFuncotations extends TwoPassVariantWalker {

static final String ONE_LINE_SUMMARY = "Filter variants based on clinically-significant Funcotations.";
static final String SUMMARY = ONE_LINE_SUMMARY +
Expand Down Expand Up @@ -116,6 +120,8 @@ public enum AlleleFrequencyDataSource {
private VariantContextWriter outputVcfWriter;
private String[] funcotationKeys;
private final List<FuncotationFilter> funcotationFilters = new ArrayList<>();
private final List<VariantContext> arCompoundHetVariants = new ArrayList<>();
private final Map<String, List<VariantContext>> arHetVariantsByGene = new HashMap<>();

@Override
public void onTraversalStart() {
Expand All @@ -141,11 +147,30 @@ private void registerFilters() {
funcotationFilters.add(new ClinVarFilter(afDataSource));
funcotationFilters.add(new LofFilter(reference, afDataSource));
funcotationFilters.add(new LmmFilter());
funcotationFilters.add(new ArHomvarFilter());
}

@Override
public void apply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) {
outputVcfWriter.add(applyFilters(variant, getMatchingFilters(variant)));
public void firstPassApply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) {
buildArHetByGene(variant);
}

@Override
protected void afterFirstPass() {
arHetVariantsByGene.keySet().forEach(gene -> {
if(arHetVariantsByGene.get(gene).size() > 1) {
arCompoundHetVariants.addAll(arHetVariantsByGene.get(gene));
}
});
}

@Override
public void secondPassApply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) {
Set<String> matchingFilters = getMatchingFilters(variant, funcotationFilters);
als364 marked this conversation as resolved.
Show resolved Hide resolved
if(arCompoundHetVariants.stream().anyMatch(hetVariant -> variantContextsMatch(hetVariant, variant))) {
matchingFilters.add(AutosomalRecessiveConstants.AR_INFO_VALUE);
}
outputVcfWriter.add(applyFilters(variant, matchingFilters));
}

/**
Expand All @@ -154,24 +179,17 @@ public void apply(final VariantContext variant, final ReadsContext readsContext,
* The filter will be treated as a match if it matches Funcotations for any of the transcripts in the
* variant's Funcotation map.
*/
private Set<String> getMatchingFilters(final VariantContext variant) {
private Set<String> getMatchingFilters(final VariantContext variant, final List<FuncotationFilter> funcotationFilters) {
final Set<String> matchingFilters = new HashSet<>();


final Map<Allele, FuncotationMap> funcs = FuncotatorUtils.createAlleleToFuncotationMapFromFuncotationVcfAttribute(
funcotationKeys, variant, "Gencode_" + reference.gencodeVersion + "_annotationTranscript", "FAKE_SOURCE");

funcs.values().forEach(funcotationMap -> {
final Stream<Set<Map.Entry<String, String>>> transcriptFuncotations = funcotationMap.getTranscriptList().stream()
.map(funcotationMap::get)
.map(funcotations -> funcotations.stream()
.flatMap(this::extractFuncotationFields)
.filter(entry -> entry.getValue() != null && !entry.getValue().isEmpty())
.collect(Collectors.toSet()));

transcriptFuncotations.forEach(funcotations -> {
getTranscriptFuncotations(funcotationMap).forEach(funcotations -> {
final Set<String> matches = funcotationFilters.stream()
.filter(f -> f.checkFilter(funcotations))
.filter(f -> f.checkFilter(funcotations, variant))
.map(FuncotationFilter::getFilterName)
.collect(Collectors.toSet());
matchingFilters.addAll(matches);
Expand Down Expand Up @@ -213,10 +231,56 @@ private VariantContext applyFilters(final VariantContext variant, final Set<Stri
return variantContextBuilder.make();
}

private void buildArHetByGene(VariantContext variant) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be in its own class?. Having AR specific code in the FilterFuncotations class seems like it breaks encapsulation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, it could be. There's not currently a 'good' place for it as this can't be a FuncotationFilter.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah - it may be worth designing another class hierarchy for filters that require two passes. That way the next time someone has to make a filter that requires work on the first pass it'll be easier.

Maybe as a subclass of FuncotationFilter?

als364 marked this conversation as resolved.
Show resolved Hide resolved
final Map<Allele, FuncotationMap> funcs = FuncotatorUtils.createAlleleToFuncotationMapFromFuncotationVcfAttribute(
funcotationKeys, variant, "Gencode_" + reference.gencodeVersion + "_annotationTranscript", "FAKE_SOURCE");

funcs.values().forEach(funcotationMap -> {
getTranscriptFuncotations(funcotationMap).forEach(funcotations -> {
Optional<Map.Entry<String, String>> maybeGeneFuncotation = funcotations.stream().filter(funcotation -> funcotation.getKey().equals("Gencode_27_hugoSymbol")).findFirst();
als364 marked this conversation as resolved.
Show resolved Hide resolved
if (maybeGeneFuncotation.isPresent()) {
String gene = maybeGeneFuncotation.get().getValue();
if (AutosomalRecessiveConstants.AUTOSOMAL_RECESSIVE_GENES.contains(gene) && variant.getHetCount() > 0) {
if(arHetVariantsByGene.containsKey(gene)) {
arHetVariantsByGene.get(gene).add(variant);
}
else {
ArrayList<VariantContext> variants = new ArrayList<>();
variants.add(variant);
arHetVariantsByGene.put(gene, variants);
}
}
}
});
});
}


private Stream<Set<Map.Entry<String, String>>> getTranscriptFuncotations(final FuncotationMap funcotationMap) {
return funcotationMap.getTranscriptList().stream()
.map(funcotationMap::get)
.map(funcotations -> funcotations.stream()
.flatMap(this::extractFuncotationFields)
.filter(entry -> entry.getValue() != null && !entry.getValue().isEmpty())
.collect(Collectors.toSet()));
}

@Override
public void closeTool() {
if (outputVcfWriter != null) {
outputVcfWriter.close();
}
}

// We know these VariantContexts come from the same list of variants, so we should only need to check these things
// instead of these things plus attributes, filters, and qual scores.
private boolean variantContextsMatch(VariantContext v1, VariantContext v2) {
return v1.getContig().equals(v2.getContig())
&& v1.getStart() == v2.getStart()
&& v1.getEnd() == v2.getEnd()
&& v1.getReference() == v2.getReference()
&& v1.getReference() == v2.getReference()
&& v1.getAlternateAlleles().size() == v2.getAlternateAlleles().size()
&& v1.getAlternateAlleles().containsAll(v2.getAlternateAlleles());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ public abstract class AlleleFrequencyUtils {
public static FuncotationFiltrationRule buildMaxMafRule(final double maxMaf, final AlleleFrequencyDataSource afDataSource) {
ParamUtils.inRange(maxMaf, 0, 1, "MAF must be between 0 and 1");
if (afDataSource.equals(AlleleFrequencyDataSource.exac)) {
return funcotations -> AlleleFrequencyExacUtils.getMaxMinorAlleleFreq(funcotations) <= maxMaf;
return (funcotations, variant) -> AlleleFrequencyExacUtils.getMaxMinorAlleleFreq(funcotations) <= maxMaf;
}
else {
return funcotations -> {
return (funcotations, variant) -> {
final Map<String, String> condensedFuncotations = funcotations.stream().collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
return (!AlleleFrequencyGnomadUtils.allFrequenciesFiltered(condensedFuncotations)
&& AlleleFrequencyGnomadUtils.getMaxMinorAlleleFreq(condensedFuncotations) <= maxMaf);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package org.broadinstitute.hellbender.tools.funcotator.filtrationRules;

import htsjdk.variant.variantcontext.VariantContext;

import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class ArHomvarFilter extends FuncotationFilter {
public ArHomvarFilter() {
super(AutosomalRecessiveConstants.AR_INFO_VALUE);
}

@Override
List<FuncotationFiltrationRule> getRules() {
return Collections.singletonList(this::arHomvarRule);
}

private boolean arHomvarRule(Set<Map.Entry<String, String>> funcotations, VariantContext variantContext) {
// Is this gene part of the list of genes we care about?
boolean isInterestingGene = funcotations.stream()
.anyMatch(funcotation ->
funcotation.getKey().equals("Gencode_27_hugoSymbol")
als364 marked this conversation as resolved.
Show resolved Hide resolved
&& AutosomalRecessiveConstants.AUTOSOMAL_RECESSIVE_GENES.contains(funcotation.getValue())
);
// If so, is this a homvar?
if (isInterestingGene) {
return variantContext.getHomVarCount() > 0;
}

return false;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package org.broadinstitute.hellbender.tools.funcotator.filtrationRules;

import java.util.Arrays;
import java.util.List;

public class AutosomalRecessiveConstants {
public static final String AR_INFO_VALUE = "AR";
public static final List<String> AUTOSOMAL_RECESSIVE_GENES = Arrays.asList("ATP7B", "MUTYH");
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ public ClinVarFilter(final AlleleFrequencyDataSource afDataSource) {
@Override
List<FuncotationFiltrationRule> getRules() {
return Arrays.asList(
funcotations -> containsKey(funcotations, ACMG_DISEASE_FUNCOTATION),
funcotations -> {
(funcotations, variant) -> containsKey(funcotations, ACMG_DISEASE_FUNCOTATION),
(funcotations, variant) -> {
final Set<String> significance = matchOnKeyOrDefault(funcotations, CLINVAR_SIGNIFICANCE_FUNCOTATION, "")
.filter(value -> !value.isEmpty())
.collect(Collectors.toSet());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.broadinstitute.hellbender.tools.funcotator.filtrationRules;

import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.hellbender.utils.Utils;

import java.util.Collections;
Expand Down Expand Up @@ -36,13 +37,15 @@ public String getFilterName() {
*
* @param prunedTranscriptFuncotations Funcotation values of a single transcript. Assumed to have
* been "pruned" to remove null / empty values. Never {@code null}
* @param variant VariantContext of this transcript.
*
* @return true if the Funcotations match all of this filter's rules, and false otherwise
*/
public Boolean checkFilter(final Set<Map.Entry<String, String>> prunedTranscriptFuncotations) {
public Boolean checkFilter(final Set<Map.Entry<String, String>> prunedTranscriptFuncotations, VariantContext variant) {
als364 marked this conversation as resolved.
Show resolved Hide resolved
Utils.nonNull(prunedTranscriptFuncotations);

return getRules().stream()
.map(rule -> rule.checkRule(prunedTranscriptFuncotations))
.map(rule -> rule.checkRule(prunedTranscriptFuncotations, variant))
.reduce(Boolean::logicalAnd)
.orElse(false);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.broadinstitute.hellbender.tools.funcotator.filtrationRules;

import htsjdk.variant.variantcontext.VariantContext;

import java.util.Map;
import java.util.Set;

Expand All @@ -11,5 +13,5 @@ interface FuncotationFiltrationRule {
/**
* Check if a set of Funcotations matches this rule.
*/
boolean checkRule(final Set<Map.Entry<String, String>> funcotations);
boolean checkRule(final Set<Map.Entry<String, String>> funcotations, VariantContext variant);
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public LmmFilter() {

@Override
List<FuncotationFiltrationRule> getRules() {
return Collections.singletonList(funcotations ->
return Collections.singletonList((funcotations, variant) ->
getMatchesOrDefault(funcotations, entry -> entry.getKey().equals(LMM_FLAGGED), "false")
.map(Boolean::valueOf)
.reduce(Boolean::logicalOr)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ public LofFilter(final FilterFuncotations.Reference ref, final AlleleFrequencyDa
@Override
List<FuncotationFiltrationRule> getRules() {
return Arrays.asList(
funcotations -> matchOnKeyOrDefault(funcotations, classificationFuncotation, "").anyMatch(CONSTANT_LOF_CLASSIFICATIONS::contains),
funcotations -> matchOnKeyOrDefault(funcotations, LOF_GENE_FUNCOTATION, "").anyMatch("YES"::equals),
(funcotations, variant) -> matchOnKeyOrDefault(funcotations, classificationFuncotation, "").anyMatch(CONSTANT_LOF_CLASSIFICATIONS::contains),
(funcotations, variant) -> matchOnKeyOrDefault(funcotations, LOF_GENE_FUNCOTATION, "").anyMatch("YES"::equals),
AlleleFrequencyUtils.buildMaxMafRule(LOF_MAX_MAF, afDataSource));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
import org.broadinstitute.hellbender.testutils.VariantContextTestUtils;
import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.AutosomalRecessiveConstants;
import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.ClinVarFilter;
import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.LmmFilter;
import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.LofFilter;
Expand All @@ -23,16 +24,20 @@ public class FilterFuncotationsIntegrationTest extends CommandLineProgramTest {
private static final Path TEST_DATA_DIR = getTestDataDir().toPath().resolve("FilterFuncotations");

private static final Set<String> ALL_FILTERS = new HashSet<>(Arrays.asList(
ClinVarFilter.CLINSIG_INFO_VALUE, LofFilter.CLINSIG_INFO_VALUE, LmmFilter.CLINSIG_INFO_VALUE));
ClinVarFilter.CLINSIG_INFO_VALUE, LofFilter.CLINSIG_INFO_VALUE, LmmFilter.CLINSIG_INFO_VALUE, AutosomalRecessiveConstants.AR_INFO_VALUE));

@DataProvider(name = "uniformVcfProvider")
public Object[][] uniformVcfProvider() {
return new Object[][]{
{"all.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), ALL_FILTERS},
{"all_gnomad.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.gnomad, Collections.emptySet(), ALL_FILTERS},
{"ar_homvar.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), Collections.singleton(AutosomalRecessiveConstants.AR_INFO_VALUE)},
{"ar_hetvar.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.singleton(FilterFuncotationsConstants.NOT_CLINSIG_FILTER), Collections.singleton(FilterFuncotationsConstants.CLINSIG_INFO_NOT_SIGNIFICANT)},
{"ar_compound_het.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), Collections.singleton(AutosomalRecessiveConstants.AR_INFO_VALUE)},
{"clinvar.vcf", FilterFuncotations.Reference.hg19, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), Collections.singleton(ClinVarFilter.CLINSIG_INFO_VALUE)},
{"clinvar_gnomad.vcf", FilterFuncotations.Reference.hg19, FilterFuncotations.AlleleFrequencyDataSource.gnomad, Collections.emptySet(), Collections.singleton(ClinVarFilter.CLINSIG_INFO_VALUE)},
{"gnomad_af_failing_cases.vcf", FilterFuncotations.Reference.hg19, FilterFuncotations.AlleleFrequencyDataSource.gnomad, Collections.singleton(FilterFuncotationsConstants.NOT_CLINSIG_FILTER), Collections.singleton(FilterFuncotationsConstants.CLINSIG_INFO_NOT_SIGNIFICANT)},
{"gnomad_af_failing_cases.vcf", FilterFuncotations.Reference.hg19, FilterFuncotations.AlleleFrequencyDataSource.gnomad, Collections.singleton(FilterFuncotationsConstants.NOT_CLINSIG_FILTER),
Collections.singleton(FilterFuncotationsConstants.CLINSIG_INFO_NOT_SIGNIFICANT)},
{"gnomad_af_passing_cases.vcf", FilterFuncotations.Reference.hg19, FilterFuncotations.AlleleFrequencyDataSource.gnomad, Collections.emptySet(), Collections.singleton(LofFilter.CLINSIG_INFO_VALUE)},
{"lmm.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), Collections.singleton(LmmFilter.CLINSIG_INFO_VALUE)},
{"lof.vcf", FilterFuncotations.Reference.b37, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), Collections.singleton(LofFilter.CLINSIG_INFO_VALUE)},
Expand Down
Loading