likelihoods, final VariantContext vc){
final int[][] table = getContingencyTable(likelihoods, vc, MIN_COUNT);
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/VariantAnnotation.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/VariantAnnotation.java
index a3a611cf566..dd8d055c4a3 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/VariantAnnotation.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/VariantAnnotation.java
@@ -5,7 +5,7 @@
/**
* Superclass of all variant annotations.
*/
-public abstract class VariantAnnotation {
+public abstract class VariantAnnotation implements Annotation {
/**
* Return the keys
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/VariantAnnotator.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/VariantAnnotator.java
new file mode 100644
index 00000000000..55cfc01a0d7
--- /dev/null
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/VariantAnnotator.java
@@ -0,0 +1,259 @@
+package org.broadinstitute.hellbender.tools.walkers.annotator;
+
+import com.google.common.collect.Lists;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.writer.VariantContextWriter;
+import htsjdk.variant.vcf.*;
+import org.broadinstitute.barclay.argparser.*;
+import org.broadinstitute.hellbender.cmdline.GATKPlugin.DefaultGATKVariantAnnotationArgumentCollection;
+import org.broadinstitute.hellbender.cmdline.GATKPlugin.GATKAnnotationArgumentCollection;
+import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
+import org.broadinstitute.hellbender.cmdline.argumentcollections.DbsnpArgumentCollection;
+import org.broadinstitute.hellbender.engine.*;
+import org.broadinstitute.hellbender.engine.filters.ReadFilter;
+import org.broadinstitute.hellbender.engine.filters.ReadFilterLibrary;
+import org.broadinstitute.hellbender.engine.filters.WellformedReadFilter;
+import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerUtils;
+import org.broadinstitute.hellbender.utils.BaseUtils;
+import org.broadinstitute.hellbender.utils.Utils;
+import org.broadinstitute.hellbender.utils.genotyper.*;
+import org.broadinstitute.hellbender.utils.genotyper.SampleList;
+import org.broadinstitute.hellbender.utils.read.GATKRead;
+import picard.cmdline.programgroups.VariantManipulationProgramGroup;
+
+import java.io.File;
+import java.util.*;
+import java.util.stream.Collectors;
+
+
+/**
+ * Annotate variant calls with context information
+ *
+ *
+ * This tool is designed to annotate variant calls based on their context (as opposed to functional annotation).
+ * Various annotation modules are available; see the "Annotation Modules" page linked in the Tool Documentation sidebar for a complete list.
+ *
+ *
Input
+ *
+ * A variant set to annotate and optionally one or more BAM files.
+ *
+ *
+ * Output
+ *
+ * An annotated VCF.
+ *
+ *
+ * Usage examples
+ *
+ *
+ * Annotate a VCF with dbSNP IDs and depth of coverage for each sample
+ *
+ * VariantAnnotator \
+ * -R reference.fasta \
+ * -I input.bam \
+ * -V input.vcf \
+ * -o output.vcf \
+ * -A Coverage \
+ * --dbsnp dbsnp.vcf
+ *
+ *
+ * Annotate a VCF with allele frequency by an external resource. Annotation will only occur if there is allele concordance between the resource and the input VCF
+ *
+ * VariantAnnotator \
+ * -R reference.fasta \
+ * -I input.bam \
+ * -V input.vcf \
+ * -o output.vcf \
+ * -L anotherInput.vcf \
+ * --resource foo:resource.vcf \
+ * -E foo.AF \
+ * --resource-allele-concordance
+ *
+ *
+ * Annotate with AF and FILTER fields from an external resource
+ *
+ * VariantAnnotator \
+ * -R reference.fasta \
+ * -V input.vcf \
+ * -o output.vcf \
+ * --resource foo:resource.vcf \
+ * --expression foo.AF \
+ * --expression foo.FILTER
+ *
+ *
+ * Caveat
+ * This tool will not output every annotation as many cannot be made without computing the per-read AlleleLikelihoods,
+ * which is generated in either the HaplotypeCaller or Mutect2.
+ *
+ * Special note on RankSumTestAnnotations
+ * RankSumAnnotations produced by this tool are not the same as those produced by the HaplotypeCaller. Without the
+ * likelihoods, the tool resorts to a pileup heuristic to categorize reads which means that RankSumAnnotations will only
+ * be present for SNP variants.
+ */
+@CommandLineProgramProperties(summary="Tool for adding annotations to VCF files",
+ oneLineSummary = "Tool for adding annotations to VCF files",
+ programGroup = VariantManipulationProgramGroup.class)
+@BetaFeature
+public class VariantAnnotator extends VariantWalker {
+ private VariantContextWriter vcfWriter;
+
+ /**
+ * The rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate. Note that dbSNP is not used in any way for the calculations themselves.
+ */
+ @ArgumentCollection
+ protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
+
+ /**
+ * If a call overlaps with a record from the provided comp track, the INFO field will be annotated
+ * as such in the output with the track name (e.g. -comp:FOO will have 'FOO' in the INFO field). Records that are
+ * filtered in the comp track will be ignored. Note that 'dbSNP' has been special-cased (see the --dbsnp argument).
+ */
+ @Advanced
+ @Argument(fullName = "comp", doc = "Comparison VCF file(s)", optional = true)
+ public List> comps = new ArrayList<>();
+
+ /**
+ * An external resource VCF file or files from which to annotate.
+ *
+ * Use this option to add annotations from a resource file to the output.
+ * For example, if you want to annotate your callset with the AC field value from a VCF file named
+ * 'resource_file.vcf', you tag it with '-resource:my_resource resource_file.vcf' and you additionally specify
+ * '-E my_resource.AC' (-E is short for --expression, also documented on this page). In the resulting output
+ * VCF, any records for which there is a record at the same position in the resource file will be annotated with
+ * 'my_resource.AC=N'. Note that if there are multiple records in the resource file that overlap the given
+ * position, one is chosen randomly. Check for allele concordance if using --resource-allele-concordance, otherwise
+ * the match is based on position only.
+ */
+ @Argument(fullName="resource", doc="External resource VCF file", optional=true)
+ public List> resources;
+
+ @Argument(fullName= StandardArgumentDefinitions.OUTPUT_LONG_NAME,
+ shortName=StandardArgumentDefinitions.OUTPUT_SHORT_NAME,
+ doc="The file to whcih variants should be written", optional=false)
+ protected File outputFile;
+
+ /**
+ * See the --list argument to view available annotations.
+ */
+ @ArgumentCollection
+ public GATKAnnotationArgumentCollection variantAnnotationArgumentCollection = new DefaultGATKVariantAnnotationArgumentCollection(
+ Collections.emptyList(),
+ Collections.emptyList(),
+ Collections.emptyList());
+
+ /**
+ * This option enables you to add annotations from one VCF to another.
+ *
+ * For example, if you want to annotate your callset with the AC field value from a VCF file named
+ * 'resource_file.vcf', you tag it with '-resource:my_resource resource_file.vcf' (see the -resource argument, also
+ * documented on this page) and you specify '-E my_resource.AC'. In the resulting output VCF, any records for
+ * which there is a record at the same position in the resource file will be annotated with 'my_resource.AC=N'.
+ * INFO field data, ID, ALT, and FILTER fields may be used as expression values.
+ * Note that if there are multiple records in the resource file that overlap the given position, one is chosen
+ * randomly.
+ */
+ @Argument(fullName="expression", shortName="E", doc="One or more specific expressions to apply to variant calls", optional=true)
+ protected Set expressionsToUse = new HashSet<>();
+
+ /**
+ * If this argument is specified, add annotations (specified by --expression) from an external resource
+ * (specified by --resource) to the input VCF (specified by --variant) only if the alleles are
+ * concordant between input and the resource VCFs. Otherwise, always add the annotations.
+ */
+ @Argument(fullName="resource-allele-concordance", shortName="rac", doc="Check for allele concordances when using an external resource VCF file", optional=true)
+ protected Boolean expressionAlleleConcordance = false;
+
+ /**
+ * You can use the -XA argument in combination with this one to exclude specific annotations.Note that some
+ * annotations may not be actually applied if they are not applicable to the data provided or if they are
+ * unavailable to the tool (e.g. there are several annotations that are currently not hooked up to
+ * HaplotypeCaller). At present no error or warning message will be provided, the annotation will simply be
+ * skipped silently. You can check the output VCF header to see which annotations were actually applied (although
+ * this does not guarantee that the annotation was applied to all records in the VCF, since some annotations have
+ * additional requirements, e.g. minimum number of samples or heterozygous sites only -- see the documentation
+ * for individual annotations' requirements).
+ */
+ @Argument(fullName="use-all-annotations", shortName="all", doc="Use all possible annotations (not for the faint of heart)", optional=true)
+ protected Boolean USE_ALL_ANNOTATIONS = false;
+
+ private VariantAnnotatorEngine annotatorEngine;
+ private SampleList variantSamples;
+
+ /**
+ * Prepare the output file and the list of available features.
+ */
+ public void onTraversalStart() {
+ // get the list of all sample names from the variant VCF, if applicable
+ final List samples = getHeaderForVariants().getGenotypeSamples();
+ variantSamples = new IndexedSampleList(samples);
+
+ if ( USE_ALL_ANNOTATIONS ) {
+ annotatorEngine = VariantAnnotatorEngine.ofAllMinusExcluded(variantAnnotationArgumentCollection.getUserDisabledAnnotationNames(), dbsnp.dbsnp, comps, false);
+ } else {
+ annotatorEngine = VariantAnnotatorEngine.ofSelectedMinusExcluded(variantAnnotationArgumentCollection, dbsnp.dbsnp, comps, false);
+ }
+ annotatorEngine.addExpressions(expressionsToUse, resources, expressionAlleleConcordance );
+
+ // setup the header fields
+ // note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones
+ final Set hInfo = new HashSet<>();
+
+ hInfo.addAll(annotatorEngine.getVCFAnnotationDescriptions(false));
+ hInfo.addAll(getHeaderForVariants().getMetaDataInInputOrder());
+
+ // TODO ask reviewer, VCFUtils.withUpdatedContigs is what GATK3 calls into, it isn't used anywhere in 4 though so should it be used here?
+ VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
+ vcfWriter = createVCFWriter(outputFile);
+ vcfWriter.writeHeader(VCFUtils.withUpdatedContigs(vcfHeader, hasReference()? new File(referenceArguments.getReferenceFileName()): null, referenceArguments.getReferencePath()==null ? getBestAvailableSequenceDictionary(): getReferenceDictionary()));
+ }
+
+ /**
+ * For each site of interest, annotate based on the requested annotation types
+ *
+ * @param vc
+ * @param readsContext Reads overlapping the current variant. Will be an empty, but non-null, context object
+ * if there is no backing source of reads data (in which case all queries on it will return
+ * an empty array/iterator)
+ * @param refContext
+ * @param fc
+ */
+ @Override
+ public void apply(final VariantContext vc, final ReadsContext readsContext, final ReferenceContext refContext, final FeatureContext fc) {
+
+ // if the reference is present and base is not ambiguous, we can annotate
+ if (refContext.getBases().length ==0 || BaseUtils.simpleBaseToBaseIndex(refContext.getBase()) != -1 ) {
+ //TODO remove this filter and update the tests, this implementation filters out reads that start in a spanning deleting according to variant context in order to match gatk3,
+ //TODO this will cause the reads to be assigned and annotated in a different manner than the haplotype caller.
+ final List reads = Utils.stream(readsContext).filter(r -> r.getStart() <= vc.getStart()).collect(Collectors.toList());
+
+ ReadLikelihoods likelihoods = new UnfilledReadsLikelihoods<>( variantSamples, new IndexedAlleleList<>(vc.getAlleles()),
+ AssemblyBasedCallerUtils.splitReadsBySample(variantSamples, getHeaderForReads(), reads));
+
+ VariantContext annotatedVC = annotatorEngine.annotateContext(vc, fc, refContext, likelihoods, a -> true);
+ vcfWriter.add(annotatedVC);
+
+ } else {
+ vcfWriter.add(vc);
+ }
+ }
+
+ public List getDefaultReadFilters() {
+ return Lists.newArrayList( new WellformedReadFilter(),
+ ReadFilterLibrary.NOT_DUPLICATE,
+ ReadFilterLibrary.PRIMARY_LINE,
+ ReadFilterLibrary.PASSES_VENDOR_QUALITY_CHECK,
+ ReadFilterLibrary.MAPPED);
+ }
+
+ /**
+ * Make sure that the writer is closed upon completing the tool.
+ */
+ @Override
+ public void closeTool() {
+ if (vcfWriter !=null) {
+ vcfWriter.close();
+ }
+ }
+}
+
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/VariantAnnotatorEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/VariantAnnotatorEngine.java
index e3be62e3af1..16271eab082 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/VariantAnnotatorEngine.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/VariantAnnotatorEngine.java
@@ -3,18 +3,20 @@
import com.google.common.collect.Sets;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.*;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.argparser.CommandLineException;
import org.broadinstitute.hellbender.cmdline.GATKPlugin.DefaultGATKVariantAnnotationArgumentCollection;
import org.broadinstitute.hellbender.cmdline.GATKPlugin.GATKAnnotationArgumentCollection;
-import org.broadinstitute.hellbender.engine.FeatureContext;
-import org.broadinstitute.hellbender.engine.FeatureInput;
-import org.broadinstitute.hellbender.engine.ReferenceContext;
+import org.broadinstitute.hellbender.engine.*;
import org.broadinstitute.hellbender.exceptions.GATKException;
+import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.ReducibleAnnotation;
import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.ReducibleAnnotationData;
import org.broadinstitute.hellbender.utils.ClassUtils;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods;
+import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;
import org.reflections.ReflectionUtils;
import java.util.*;
@@ -31,16 +33,23 @@ public final class VariantAnnotatorEngine {
private final List infoAnnotations;
private final List genotypeAnnotations;
private Set reducibleKeys;
+ private List expressions = new ArrayList<>();
private final VariantOverlapAnnotator variantOverlapAnnotator;
+ private boolean expressionAlleleConcordance;
+ private final boolean useRawAnnotations;
+
+ private final static Logger logger = LogManager.getLogger(VariantAnnotatorEngine.class);
private VariantAnnotatorEngine(final AnnotationManager annots,
final FeatureInput dbSNPInput,
- final List> featureInputs){
+ final List> featureInputs,
+ final boolean useRaw){
infoAnnotations = annots.createInfoFieldAnnotations();
genotypeAnnotations = annots.createGenotypeAnnotations();
variantOverlapAnnotator = initializeOverlapAnnotator(dbSNPInput, featureInputs);
reducibleKeys = new HashSet<>();
+ useRawAnnotations = useRaw;
for (InfoFieldAnnotation annot : infoAnnotations) {
if (annot instanceof ReducibleAnnotation) {
reducibleKeys.add(((ReducibleAnnotation) annot).getRawKeyName());
@@ -77,6 +86,7 @@ public VariantAnnotatorEngine(final List annotationList,
}
variantOverlapAnnotator = initializeOverlapAnnotator(dbSNPInput, featureInputs);
reducibleKeys = new HashSet<>();
+ useRawAnnotations = useRaw;
for (InfoFieldAnnotation annot : infoAnnotations) {
if (annot instanceof ReducibleAnnotation) {
reducibleKeys.add(((ReducibleAnnotation) annot).getRawKeyName());
@@ -98,10 +108,11 @@ public VariantAnnotatorEngine(final List annotationList,
*/
public static VariantAnnotatorEngine ofAllMinusExcluded(final List annotationsToExclude,
final FeatureInput dbSNPInput,
- final List> comparisonFeatureInputs) {
+ final List> comparisonFeatureInputs,
+ final boolean useRawAnnotations) {
Utils.nonNull(annotationsToExclude, "annotationsToExclude is null");
Utils.nonNull(comparisonFeatureInputs, "comparisonFeatureInputs is null");
- return new VariantAnnotatorEngine(AnnotationManager.ofAllMinusExcluded(annotationsToExclude), dbSNPInput, comparisonFeatureInputs);
+ return new VariantAnnotatorEngine(AnnotationManager.ofAllMinusExcluded(annotationsToExclude), dbSNPInput, comparisonFeatureInputs, useRawAnnotations);
}
/**
@@ -125,7 +136,33 @@ public static VariantAnnotatorEngine ofSelectedMinusExcluded(final List
Utils.nonNull(annotationsToUse, "annotationsToUse is null");
Utils.nonNull(annotationsToExclude, "annotationsToExclude is null");
Utils.nonNull(comparisonFeatureInputs, "comparisonFeatureInputs is null");
- return new VariantAnnotatorEngine(AnnotationManager.ofSelectedMinusExcluded(annotationGroupsToUse, annotationsToUse, annotationsToExclude), dbSNPInput, comparisonFeatureInputs);
+ return new VariantAnnotatorEngine(AnnotationManager.ofSelectedMinusExcluded(annotationGroupsToUse, annotationsToUse, annotationsToExclude), dbSNPInput, comparisonFeatureInputs, false);
+ }
+
+ /**
+ * Makes the engine for given annotation types and groups (minus the excluded ones).
+ * @param annotationGroupsToUse list of annotations groups to include
+ * @param annotationsToUse list of of annotations to include
+ * @param annotationsToExclude list of annotations to exclude
+ * @param dbSNPInput input for variants from a known set from DbSNP or null if not provided.
+ * The annotation engine will mark variants overlapping anything in this set using {@link htsjdk.variant.vcf.VCFConstants#DBSNP_KEY}.
+ * @param comparisonFeatureInputs list of inputs with known variants.
+ * The annotation engine will mark variants overlapping anything in those sets using the name given by {@link FeatureInput#getName()}.
+ * Note: the DBSNP FeatureInput should be passed in separately, and not as part of this List - an GATKException will be thrown otherwise.
+ * Note: there are no non-DBSNP comparison FeatureInputs an empty List should be passed in here, rather than null.
+ * @param useRawAnnotations Specify whether the annotation engine will attempt to output raw annotations for reducible annotations
+ */
+ public static VariantAnnotatorEngine ofSelectedMinusExcluded(final List annotationGroupsToUse,
+ final List annotationsToUse,
+ final List annotationsToExclude,
+ final FeatureInput dbSNPInput,
+ final List> comparisonFeatureInputs,
+ final boolean useRawAnnotations) {
+ Utils.nonNull(annotationGroupsToUse, "annotationGroupsToUse is null");
+ Utils.nonNull(annotationsToUse, "annotationsToUse is null");
+ Utils.nonNull(annotationsToExclude, "annotationsToExclude is null");
+ Utils.nonNull(comparisonFeatureInputs, "comparisonFeatureInputs is null");
+ return new VariantAnnotatorEngine(AnnotationManager.ofSelectedMinusExcluded(annotationGroupsToUse, annotationsToUse, annotationsToExclude), dbSNPInput, comparisonFeatureInputs, useRawAnnotations);
}
/**
@@ -142,11 +179,12 @@ public static VariantAnnotatorEngine ofSelectedMinusExcluded(final List
*/
public static VariantAnnotatorEngine ofSelectedMinusExcluded(final GATKAnnotationArgumentCollection argumentCollection,
final FeatureInput dbSNPInput,
- final List> comparisonFeatureInputs) {
+ final List> comparisonFeatureInputs,
+ final boolean useRawAnnotations) {
return ofSelectedMinusExcluded(argumentCollection.getUserEnabledAnnotationGroups(),
argumentCollection.getUserEnabledAnnotationNames(),
argumentCollection.getUserDisabledAnnotationNames(),
- dbSNPInput, comparisonFeatureInputs);
+ dbSNPInput, comparisonFeatureInputs, useRawAnnotations);
}
private VariantOverlapAnnotator initializeOverlapAnnotator(final FeatureInput dbSNPInput, final List> featureInputs) {
final Map, String> overlaps = new LinkedHashMap<>();
@@ -211,11 +249,38 @@ public Set getVCFAnnotationDescriptions(boolean useRaw) {
}
}
+ // Add header lines corresponding to the expression target files
+ for (final VariantAnnotatorEngine.VAExpression expression : getRequestedExpressions()) {
+ // special case the ID field
+ if (expression.fieldName.equals("ID")) {
+ descriptions.add(new VCFInfoHeaderLine(expression.fullName, 1, VCFHeaderLineType.String, "ID field transferred from external VCF resource"));
+ } else {
+ final VCFInfoHeaderLine targetHeaderLine = ((VCFHeader) new FeatureDataSource<>(expression.binding, 100, VariantContext.class).getHeader())
+ .getInfoHeaderLines().stream()
+ .filter(l -> l.getID().equals(expression.fieldName))
+ .findFirst().orElse(null);
+
+ VCFInfoHeaderLine lineToAdd;
+ if (targetHeaderLine != null) {
+ expression.sethInfo(targetHeaderLine);
+ if (targetHeaderLine.getCountType() == VCFHeaderLineCount.INTEGER) {
+ lineToAdd = new VCFInfoHeaderLine(expression.fullName, targetHeaderLine.getCount(), targetHeaderLine.getType(), targetHeaderLine.getDescription());
+ } else {
+ lineToAdd = new VCFInfoHeaderLine(expression.fullName, targetHeaderLine.getCountType(), targetHeaderLine.getType(), targetHeaderLine.getDescription());
+ }
+ } else {
+ lineToAdd = new VCFInfoHeaderLine(expression.fullName, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Value transferred from another external VCF resource");
+ logger.warn(String.format("The requested expression attribute \"%s\" is missing from the header in its resource file %s", expression.fullName, expression.binding.getName()));
+ }
+ descriptions.add(lineToAdd);
+ expression.sethInfo(lineToAdd);
+ }
+ }
+
Utils.validate(!descriptions.contains(null), "getVCFAnnotationDescriptions should not contain null. This error is likely due to an incorrect implementation of getDescriptions() in one or more of the annotation classes");
return descriptions;
}
-
/**
* Combine (raw) data for reducible annotations (those that use raw data in gVCFs)
* Mutates annotationMap by removing the annotations that were combined
@@ -285,6 +350,7 @@ public VariantContext finalizeAnnotations(VariantContext vc, VariantContext orig
* @param ref the reference context of the variant to annotate or null if there is none
* @param likelihoods likelihoods indexed by sample, allele, and read within sample. May be null
* @param addAnnot function that indicates if the given annotation type should be added to the variant
+ *
*/
public VariantContext annotateContext(final VariantContext vc,
final FeatureContext features,
@@ -300,9 +366,16 @@ public VariantContext annotateContext(final VariantContext vc,
final VariantContext newGenotypeAnnotatedVC = builder.make();
final Map infoAnnotMap = new LinkedHashMap<>(newGenotypeAnnotatedVC.getAttributes());
+ annotateExpressions(vc, features, ref, infoAnnotMap);
+
for ( final InfoFieldAnnotation annotationType : this.infoAnnotations) {
if (addAnnot.test(annotationType)){
- final Map annotationsFromCurrentType = annotationType.annotate(ref, newGenotypeAnnotatedVC, likelihoods);
+ final Map annotationsFromCurrentType;
+ if (useRawAnnotations && annotationType instanceof ReducibleAnnotation) {
+ annotationsFromCurrentType = ((ReducibleAnnotation) annotationType).annotateRawData(ref, newGenotypeAnnotatedVC, likelihoods);
+ } else {
+ annotationsFromCurrentType = annotationType.annotate(ref, newGenotypeAnnotatedVC, likelihoods);
+ }
if ( annotationsFromCurrentType != null ) {
infoAnnotMap.putAll(annotationsFromCurrentType);
}
@@ -458,7 +531,195 @@ private List filterAnnotations(final List al
return Collections.unmodifiableList(new ArrayList<>(annotations));
}
+ }
+
+ /**
+ * A container object for storing the objects necessary for carrying over expression annotations.
+ * It holds onto the source feature input as well as any relevant header lines in order to alter the vcfHeader.
+ */
+ public static class VAExpression {
+
+ final private String fullName, fieldName;
+ final private FeatureInput binding;
+ private VCFInfoHeaderLine hInfo;
+
+ public VAExpression(String fullExpression, List> dataSourceList){
+ final int indexOfDot = fullExpression.lastIndexOf(".");
+ if ( indexOfDot == -1 ) {
+ throw new UserException.BadInput("The requested expression '"+fullExpression+"' is invalid, it should be in VCFFile.value format");
+ }
+
+ fullName = fullExpression;
+ fieldName = fullExpression.substring(indexOfDot+1);
+
+ final String bindingName = fullExpression.substring(0, indexOfDot);
+ Optional> binding = dataSourceList.stream().filter(ds -> ds.getName().equals(bindingName)).findFirst();
+ if (!binding.isPresent()) {
+ throw new UserException.BadInput("The requested expression '"+fullExpression+"' is invalid, could not find vcf input file");
+ }
+ this.binding = binding.get();
+ }
+
+ public void sethInfo(VCFInfoHeaderLine hInfo) {
+ this.hInfo = hInfo;
+ }
+ }
+
+ protected List getRequestedExpressions() { return expressions; }
+
+ // select specific expressions to use
+ public void addExpressions(Set expressionsToUse, List> dataSources, boolean expressionAlleleConcordance) {//, Set) {
+ // set up the expressions
+ for ( final String expression : expressionsToUse ) {
+ expressions.add(new VAExpression(expression, dataSources));
+ }
+ this.expressionAlleleConcordance = expressionAlleleConcordance;
+ }
+
+ /**
+ * Handles logic for expressions for variant contexts. Used to add annotations from one vcf file into the fields
+ * of another if the variant contexts match sufficiently between the two files.
+ *
+ * @param vc VariantContext to add annotations to
+ * @param features FeatureContext object containing extra VCF features to add to vc
+ * @param ref Reference context object corresponding to the region overlapping vc
+ * @param attributes running list of attributes into which to place new annotations
+ */
+ private void annotateExpressions(final VariantContext vc,
+ final FeatureContext features,
+ final ReferenceContext ref,
+ final Map attributes){
+ Utils.nonNull(vc);
+
+ // each requested expression
+ for ( final VAExpression expression : expressions ) {
+ List variantContexts = features.getValues(expression.binding, vc.getStart());
+
+ if (!variantContexts.isEmpty()) {
+ // get the expression's variant context
+ VariantContext expressionVC = variantContexts.iterator().next();
+
+ // special-case the ID field
+ if (expression.fieldName.equals("ID")) {
+ if (expressionVC.hasID()) {
+ attributes.put(expression.fullName, expressionVC.getID());
+ }
+ } else if (expression.fieldName.equals("ALT")) {
+ attributes.put(expression.fullName, expressionVC.getAlternateAllele(0).getDisplayString());
+ } else if (expression.fieldName.equals("FILTER")) {
+ final String filterString = expressionVC.isFiltered() ? expressionVC.getFilters().stream().collect(Collectors.joining(",")) : "PASS";
+ attributes.put(expression.fullName, filterString);
+ } else if (expressionVC.hasAttribute(expression.fieldName)) {
+ // find the info field
+ final VCFInfoHeaderLine hInfo = expression.hInfo;
+ if (hInfo == null) {
+ throw new UserException("Cannot annotate expression " + expression.fullName + " at " + ref.getInterval() + " for variant allele(s) " + vc.getAlleles() + ", missing header info");
+ }
+
+ //
+ // Add the info field annotations
+ //
+ final boolean useRefAndAltAlleles = VCFHeaderLineCount.R == hInfo.getCountType();
+ final boolean useAltAlleles = VCFHeaderLineCount.A == hInfo.getCountType();
+
+ // Annotation uses ref and/or alt alleles or enforce allele concordance
+ if ((useAltAlleles || useRefAndAltAlleles) || expressionAlleleConcordance) {
+
+ // remove brackets and spaces from expression value
+ final String cleanedExpressionValue = expressionVC.getAttribute(expression.fieldName,"").toString().replaceAll("[\\[\\]\\s]", "");
+
+ // get comma separated expression values
+ final ArrayList expressionValuesList = new ArrayList<>(Arrays.asList(cleanedExpressionValue.split(",")));
+
+ boolean canAnnotate = false;
+ // get the minimum biallelics without genotypes
+
+ final List minBiallelicVCs = getMinRepresentationBiallelics(vc);
+ final List minBiallelicExprVCs = getMinRepresentationBiallelics(expressionVC);
+
+ // check concordance
+ final List annotationValues = new ArrayList<>();
+ for (final VariantContext biallelicVC : minBiallelicVCs) {
+ // check that ref and alt alleles are the same
+ List exprAlleles = biallelicVC.getAlleles();
+ boolean isAlleleConcordant = false;
+ int i = 0;
+ for (final VariantContext biallelicExprVC : minBiallelicExprVCs) {
+ List alleles = biallelicExprVC.getAlleles();
+ // concordant
+ if (alleles.equals(exprAlleles)) {
+ // get the value for the reference if needed.
+ if (i == 0 && useRefAndAltAlleles) {
+ annotationValues.add(expressionValuesList.get(i++));
+ }
+ // use annotation expression and add to vc
+ annotationValues.add(expressionValuesList.get(i));
+ isAlleleConcordant = true;
+ canAnnotate = true;
+ break;
+ }
+ i++;
+ }
+
+ // can not find allele match so set to annotation value to zero
+ if (!isAlleleConcordant) {
+ annotationValues.add("0");
+ }
+ }
+
+ // some allele matches so add the annotation values
+ if (canAnnotate) {
+ attributes.put(expression.fullName, annotationValues);
+ }
+ } else {
+ // use all of the expression values
+ attributes.put(expression.fullName, expressionVC.getAttribute(expression.fieldName));
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Break the variant context into bialleles (reference and alternate alleles) and trim to a minimum representation
+ *
+ * @param vc variant context to annotate
+ * @return list of biallelics trimmed to a minimum representation
+ */
+ private List getMinRepresentationBiallelics(final VariantContext vc) {
+ final List minRepresentationBiallelicVCs = new ArrayList<>();
+ if (vc.getNAlleles() > 2) {
+ // TODO, this doesn't actually need to be done, we can simulate it at less cost
+ for (int i = 1; i < vc.getNAlleles(); i++) {
+ // Determining if the biallelic would have been considered a SNP
+ if (! (vc.getReference().length() == 1 && vc.getAlternateAllele(i-1).length() == 1) ) {
+ minRepresentationBiallelicVCs.add(GATKVariantContextUtils.trimAlleles(
+ new VariantContextBuilder(vc)
+ .alleles(Arrays.asList(vc.getReference(),vc.getAlternateAllele(i-1)))
+ .attributes(removeIrrelevantAttributes(vc.getAttributes())).make(), true, true));
+ } else {
+ minRepresentationBiallelicVCs.add(new VariantContextBuilder(vc)
+ .alleles(Arrays.asList(vc.getReference(),vc.getAlternateAllele(i-1)))
+ .attributes(removeIrrelevantAttributes(vc.getAttributes())).make());
+ }
+ }
+ } else {
+ minRepresentationBiallelicVCs.add(vc);
+ }
+
+ return minRepresentationBiallelicVCs;
+ }
+
+ private Map removeIrrelevantAttributes(Map attributes) {
+ // since the VC has been subset, remove the invalid attributes
+ Map ret = new HashMap<>(attributes);
+ for ( final String key : attributes.keySet() ) {
+ if ( !(key.equals(VCFConstants.ALLELE_COUNT_KEY) || key.equals(VCFConstants.ALLELE_FREQUENCY_KEY) || key.equals(VCFConstants.ALLELE_NUMBER_KEY)) ) {
+ ret.remove(key);
+ }
+ }
+ return ret;
}
}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_RMSMappingQuality.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_RMSMappingQuality.java
index 9184f057a75..af378b5059d 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_RMSMappingQuality.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_RMSMappingQuality.java
@@ -4,18 +4,15 @@
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypesContext;
import htsjdk.variant.variantcontext.VariantContext;
-import htsjdk.variant.vcf.VCFInfoHeaderLine;
-import org.apache.log4j.Logger;
import org.broadinstitute.barclay.help.DocumentedFeature;
-import org.broadinstitute.hellbender.engine.AlignmentContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.tools.walkers.annotator.InfoFieldAnnotation;
import org.broadinstitute.hellbender.utils.QualityUtils;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods;
+import org.broadinstitute.hellbender.utils.logging.OneShotLogger;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
-import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;
import java.util.*;
@@ -49,7 +46,8 @@ public final class AS_RMSMappingQuality extends InfoFieldAnnotation implements A
private final String printFormat = "%.2f";
- private static final Logger logger = Logger.getLogger(AS_RMSMappingQuality.class);
+ private static final OneShotLogger allele_logger = new OneShotLogger(AS_RMSMappingQuality.class);
+ private static final OneShotLogger genotype_logger = new OneShotLogger(AS_RMSMappingQuality.class);
public static final String SPLIT_DELIM = "\\|"; //String.split takes a regex, so we need to escape the pipe
public static final String PRINT_DELIM = "|";
@@ -58,26 +56,51 @@ public final class AS_RMSMappingQuality extends InfoFieldAnnotation implements A
public Map annotate(final ReferenceContext ref,
final VariantContext vc,
final ReadLikelihoods likelihoods) {
- return annotateRawData(ref, vc, likelihoods);
+ Utils.nonNull(vc);
+ if ( likelihoods == null) {
+ return Collections.emptyMap();
+ }
+ final Map annotations = new HashMap<>();
+ final ReducibleAnnotationData myData = new ReducibleAnnotationData<>(null);
+ getRMSDataFromLikelihoods(likelihoods, myData);
+ final String annotationString = makeFinalizedAnnotationString(vc, myData.getAttributeMap());
+ annotations.put(getKeyNames().get(0), annotationString);
+ return annotations;
}
+
@Override
public Map annotateRawData(final ReferenceContext ref,
final VariantContext vc,
final ReadLikelihoods likelihoods ) {
Utils.nonNull(vc);
- if ( likelihoods == null) {
+ if ( likelihoods == null || !likelihoods.hasFilledLikelihoods()) {
return Collections.emptyMap();
}
final Map annotations = new LinkedHashMap<>();
- final ReducibleAnnotationData myData = new ReducibleAnnotationData<>(null);
- calculateRawData(vc, likelihoods, myData);
+ final ReducibleAnnotationData myData = new ReducibleAnnotationData<>(null);
+ getRMSDataFromLikelihoods(likelihoods, myData);
final String annotationString = makeRawAnnotationString(vc.getAlleles(), myData.getAttributeMap());
annotations.put(getRawKeyName(), annotationString);
return annotations;
}
+ @SuppressWarnings({"unchecked", "rawtypes"})//FIXME
+ public void calculateRawData(final VariantContext vc,
+ final ReadLikelihoods likelihoods,
+ final ReducibleAnnotationData myData){
+ //For the raw data here, we're only keeping track of the sum of the squares of our values
+ //When we go to reduce, we'll use the AD info to get the number of reads
+
+ //must use likelihoods for allele-specific annotations
+ if (likelihoods == null) {
+ return;
+ }
+ getRMSDataFromLikelihoods(likelihoods, myData);
+ }
+
+
/**
* For AS_RMSMappingQuality annotations, the annotations will simply consist of a list of the total value for
* every allele computed by parsing all of the individual AS_RMSMappingQuality Raw Key values as doubles
@@ -87,10 +110,10 @@ public Map annotateRawData(final ReferenceContext ref,
@SuppressWarnings({"unchecked", "rawtypes"})//FIXME generics here blow up
public Map combineRawData(final List vcAlleles, final List> annotationList) {
//VC already contains merged alleles from ReferenceConfidenceVariantContextMerger
- ReducibleAnnotationData combinedData = new AlleleSpecificAnnotationData(vcAlleles, null);
+ ReducibleAnnotationData combinedData = new AlleleSpecificAnnotationData(vcAlleles, null);
for (final ReducibleAnnotationData> currentValue : annotationList) {
- ReducibleAnnotationData value = (ReducibleAnnotationData)currentValue;
+ ReducibleAnnotationData value = (ReducibleAnnotationData)currentValue;
parseRawDataString(value);
combineAttributeMap(value, combinedData);
@@ -101,7 +124,7 @@ public Map combineRawData(final List vcAlleles, final Li
return annotations;
}
- public void combineAttributeMap(final ReducibleAnnotationData toAdd, final ReducibleAnnotationData combined) {
+ public void combineAttributeMap(final ReducibleAnnotationData toAdd, final ReducibleAnnotationData combined) {
//check that alleles match
for (final Allele currentAllele : combined.getAlleles()){
//combined is initialized with all alleles, but toAdd might have only a subset
@@ -115,7 +138,7 @@ public void combineAttributeMap(final ReducibleAnnotationData toAdd, fin
}
}
- protected void parseRawDataString(final ReducibleAnnotationData myData) {
+ protected void parseRawDataString(final ReducibleAnnotationData myData) {
final String rawDataString = myData.getRawData();
//get per-allele data by splitting on allele delimiter
final String[] rawDataPerAllele = rawDataString.split(SPLIT_DELIM);
@@ -152,19 +175,6 @@ public Map finalizeRawData(final VariantContext vc, final Varian
return annotations;
}
- @SuppressWarnings({"unchecked", "rawtypes"})//FIXME
- public void calculateRawData(final VariantContext vc,
- final ReadLikelihoods likelihoods,
- final ReducibleAnnotationData myData){
- //For the raw data here, we're only keeping track of the sum of the squares of our values
- //When we go to reduce, we'll use the AD info to get the number of reads
-
- //must use likelihoods for allele-specific annotations
- if (likelihoods == null) {
- return;
- }
- getRMSDataFromLikelihoods(likelihoods, myData);
- }
@Override
public List getKeyNames() { return Arrays.asList(GATKVCFConstants.AS_RMS_MAPPING_QUALITY_KEY); }
@@ -172,7 +182,7 @@ public void calculateRawData(final VariantContext vc,
@Override
public String getRawKeyName() { return GATKVCFConstants.AS_RAW_RMS_MAPPING_QUALITY_KEY; }
- private void getRMSDataFromLikelihoods(final ReadLikelihoods likelihoods, ReducibleAnnotationData myData) {
+ private void getRMSDataFromLikelihoods(final ReadLikelihoods likelihoods, ReducibleAnnotationData myData) {
for ( final ReadLikelihoods.BestAllele bestAllele : likelihoods.bestAlleles() ) {
if (bestAllele.isInformative()) {
final int mq = bestAllele.read.getMappingQuality();
@@ -184,7 +194,7 @@ private void getRMSDataFromLikelihoods(final ReadLikelihoods likelihoods
}
}
- private String makeRawAnnotationString(final List vcAlleles, final Map perAlleleValues) {
+ private String makeRawAnnotationString(final List vcAlleles, final Map perAlleleValues) {
String annotationString = "";
for (final Allele current : vcAlleles) {
if (!annotationString.isEmpty()) {
@@ -199,7 +209,7 @@ private String makeRawAnnotationString(final List vcAlleles, final Map perAlleleValues) {
+ private String makeFinalizedAnnotationString(final VariantContext vc, final Map perAlleleValues) {
final Map variantADs = getADcounts(vc);
String annotationString = "";
for (final Allele current : vc.getAlternateAlleles()) {
@@ -209,7 +219,7 @@ private String makeFinalizedAnnotationString(final VariantContext vc, final Map<
if (perAlleleValues.containsKey(current)) {
annotationString += String.format(printFormat, Math.sqrt((double) perAlleleValues.get(current) / variantADs.get(current)));
} else {
- logger.warn("ERROR: VC allele is not found in annotation alleles -- maybe there was trimming?");
+ allele_logger.warn("ERROR: VC allele is not found in annotation alleles -- maybe there was trimming?");
}
}
return annotationString;
@@ -218,7 +228,7 @@ private String makeFinalizedAnnotationString(final VariantContext vc, final Map<
private Map getADcounts(final VariantContext vc) {
final GenotypesContext genotypes = vc.getGenotypes();
if ( genotypes == null || genotypes.size() == 0 ) {
- logger.warn("VC does not have genotypes -- annotations were calculated in wrong order");
+ genotype_logger.warn("VC does not have genotypes -- annotations were calculated in wrong order");
return null;
}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_RankSumTest.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_RankSumTest.java
index f1c39989edb..fa451a06898 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_RankSumTest.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_RankSumTest.java
@@ -1,6 +1,8 @@
package org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific;
+import com.google.common.primitives.Doubles;
import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.GenotypesContext;
import htsjdk.variant.variantcontext.VariantContext;
import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger;
@@ -9,7 +11,9 @@
import org.broadinstitute.hellbender.utils.CompressedDataList;
import org.broadinstitute.hellbender.utils.Histogram;
import org.broadinstitute.hellbender.utils.MannWhitneyU;
+import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods;
+import org.broadinstitute.hellbender.utils.pileup.PileupElement;
import java.util.*;
@@ -27,7 +31,40 @@ public abstract class AS_RankSumTest extends RankSumTest implements ReducibleAnn
public Map annotate(final ReferenceContext ref,
final VariantContext vc,
final ReadLikelihoods likelihoods) {
- return annotateRawData(ref, vc, likelihoods);
+ Utils.nonNull(vc, "vc is null");
+
+ final GenotypesContext genotypes = vc.getGenotypes();
+ if (genotypes == null || genotypes.isEmpty()) {
+ return Collections.emptyMap();
+ }
+
+ final List refQuals = new ArrayList<>();
+ final List altQuals = new ArrayList<>();
+
+ final int refLoc = vc.getStart();
+
+ if( likelihoods != null) {
+ // Default to using the likelihoods to calculate the rank sum
+ if (likelihoods.hasFilledLikelihoods()) {
+ fillQualsFromLikelihood(vc, likelihoods, refQuals, altQuals, refLoc);
+ }
+ }
+
+ if ( refQuals.isEmpty() && altQuals.isEmpty() ) {
+ return Collections.emptyMap();
+ }
+
+ final MannWhitneyU mannWhitneyU = new MannWhitneyU();
+
+ // we are testing that set1 (the alt bases) have lower quality scores than set2 (the ref bases)
+ final MannWhitneyU.Result result = mannWhitneyU.test(Doubles.toArray(altQuals), Doubles.toArray(refQuals), MannWhitneyU.TestType.FIRST_DOMINATES);
+ final double zScore = result.getZ();
+
+ if (Double.isNaN(zScore)) {
+ return Collections.emptyMap();
+ } else {
+ return Collections.singletonMap(getKeyNames().get(0), String.format("%.3f", zScore));
+ }
}
/**
@@ -45,7 +82,7 @@ public Map annotate(final ReferenceContext ref,
public Map annotateRawData(final ReferenceContext ref,
final VariantContext vc,
final ReadLikelihoods likelihoods ) {
- if ( likelihoods == null) {
+ if ( likelihoods == null || !likelihoods.hasFilledLikelihoods()) {
return Collections.emptyMap();
}
@@ -112,7 +149,7 @@ protected String makeRawAnnotationString(final List vcAlleles, final Map
// Generates as CompressedDataList over integer values over each read
@SuppressWarnings({"unchecked", "rawtypes"})//FIXME generics here blow up
- public void calculateRawData(VariantContext vc, final ReadLikelihoods likelihoods, ReducibleAnnotationData myData) {
+ private void calculateRawData(VariantContext vc, final ReadLikelihoods likelihoods, ReducibleAnnotationData myData) {
if( vc.getGenotypes().getSampleNames().size() != 1) {
throw new IllegalStateException("Calculating raw data for allele-specific rank sums requires variant context input with exactly one sample, as in a gVCF.");
}
@@ -314,5 +351,9 @@ public String outputSingletonValueAsHistogram(final Double rankSumValue) {
return h.toString();
}
-
+ @Override
+ // Unnecessary for this implementation of Annotate
+ protected final OptionalDouble getElementForPileupElement(PileupElement p, int refLoc) {
+ return null;
+ }
}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_ReadPosRankSumTest.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_ReadPosRankSumTest.java
index d01c1b0af45..007a91f2fbd 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_ReadPosRankSumTest.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_ReadPosRankSumTest.java
@@ -5,6 +5,7 @@
import org.broadinstitute.hellbender.tools.walkers.annotator.ReadPosRankSumTest;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
+import org.broadinstitute.hellbender.utils.pileup.PileupElement;
import org.broadinstitute.hellbender.utils.read.AlignmentUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.ReadUtils;
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_StrandBiasTest.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_StrandBiasTest.java
index 552a026b256..878bebf2764 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_StrandBiasTest.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_StrandBiasTest.java
@@ -3,14 +3,13 @@
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypesContext;
import htsjdk.variant.variantcontext.VariantContext;
-import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.apache.log4j.Logger;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.tools.walkers.annotator.StrandBiasTest;
import org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods;
+import org.broadinstitute.hellbender.utils.pileup.PileupElement;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
-import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;
import java.util.*;
@@ -36,13 +35,6 @@ public AS_StrandBiasTest(){
@Override
public String getRawKeyName() { return GATKVCFConstants.AS_SB_TABLE_KEY; }
- @Override
- public Map annotate(final ReferenceContext ref,
- final VariantContext vc,
- final ReadLikelihoods likelihoods) {
- return annotateRawData(ref, vc, likelihoods);
- }
-
/**
* Method which determines how the Strand Bias read direction allele data must be combined into a final annotation
* Must be overridden by client methods.
@@ -68,14 +60,14 @@ public Map annotateRawData(final ReferenceContext ref,
final ReadLikelihoods likelihoods ) {
//for allele-specific annotations we only call from HC and we only use likelihoods
- if ( likelihoods == null) {
+ if ( likelihoods == null || !likelihoods.hasFilledLikelihoods()) {
return Collections.emptyMap();
}
// calculate the annotation from the likelihoods
// likelihoods can come from HaplotypeCaller call to VariantAnnotatorEngine
final Map annotations = new HashMap<>();
final ReducibleAnnotationData> myData = new AlleleSpecificAnnotationData<>(vc.getAlleles(),null);
- calculateRawData(vc, likelihoods, myData);
+ getStrandCountsFromLikelihoodMap(vc, likelihoods, myData, MIN_COUNT);
final Map> perAlleleValues = myData.getAttributeMap();
final String annotationString = makeRawAnnotationString(vc.getAlleles(), perAlleleValues);
annotations.put(getRawKeyName(), annotationString);
@@ -201,18 +193,6 @@ protected void parseRawDataString(ReducibleAnnotationData> myData)
myData.setAttributeMap(perAlleleValues);
}
-
- @SuppressWarnings({"unchecked", "rawtypes"})//FIXME
- public void calculateRawData(final VariantContext vc,
- final ReadLikelihoods likelihoods,
- final ReducibleAnnotationData rawAnnotations) {
- if(likelihoods == null) {
- return;
- }
-
- getStrandCountsFromLikelihoodMap(vc, likelihoods, rawAnnotations, MIN_COUNT);
- }
-
/**
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
* fw rc
@@ -308,6 +288,13 @@ protected Map calculateAnnotationFromGTfield(final GenotypesCont
return Collections.emptyMap();
}
+ @Override
+ //Allele-specific annotations cannot be called from walkers other than HaplotypeCaller
+ protected Map calculateAnnotationFromStratifiedContexts(final Map> stratifiedContexts,
+ final VariantContext vc){
+ return new HashMap<>();
+ }
+
public static String rawValueAsString(int[][] table) {
return table[0][0]+","+table[0][1]+ PRINT_DELIM +table[1][0]+","+table[1][1];
}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java
index 70847054687..43d4e80782c 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java
@@ -171,7 +171,7 @@ private void initialize(boolean createBamOutIndex, final boolean createBamOutMD5
initializeActiveRegionEvaluationGenotyperEngine();
if (annotationEngine == null) {
- annotationEngine = VariantAnnotatorEngine.ofSelectedMinusExcluded(hcArgs.defaultGATKVariantAnnotationArgumentCollection, hcArgs.dbsnp.dbsnp, hcArgs.comps);
+ annotationEngine = VariantAnnotatorEngine.ofSelectedMinusExcluded(hcArgs.defaultGATKVariantAnnotationArgumentCollection, hcArgs.dbsnp.dbsnp, hcArgs.comps, emitReferenceConfidence());
}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java
index f63c2610970..0d4086c7304 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java
@@ -120,7 +120,7 @@ private void initialize(final boolean createBamOutIndex, final boolean createBam
" sample name " + normalSampleName);
}
- annotationEngine = VariantAnnotatorEngine.ofSelectedMinusExcluded(MTAC.defaultGATKVariantAnnotationArgumentCollection, null, Collections.emptyList());
+ annotationEngine = VariantAnnotatorEngine.ofSelectedMinusExcluded(MTAC.defaultGATKVariantAnnotationArgumentCollection, null, Collections.emptyList(), false);
assemblyEngine = AssemblyBasedCallerUtils.createReadThreadingAssembler(MTAC);
likelihoodCalculationEngine = AssemblyBasedCallerUtils.createLikelihoodCalculationEngine(MTAC.likelihoodArgs);
diff --git a/src/main/java/org/broadinstitute/hellbender/utils/genotyper/ReadLikelihoods.java b/src/main/java/org/broadinstitute/hellbender/utils/genotyper/ReadLikelihoods.java
index 19b0a099dc7..ea2fb878d91 100644
--- a/src/main/java/org/broadinstitute/hellbender/utils/genotyper/ReadLikelihoods.java
+++ b/src/main/java/org/broadinstitute/hellbender/utils/genotyper/ReadLikelihoods.java
@@ -8,10 +8,13 @@
import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
import org.apache.commons.collections.ListUtils;
import org.apache.commons.math3.stat.descriptive.rank.Median;
+import org.broadinstitute.hellbender.tools.walkers.qc.Pileup;
import org.broadinstitute.hellbender.utils.IndexRange;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.downsampling.AlleleBiasedDownsamplingUtils;
+import org.broadinstitute.hellbender.utils.pileup.PileupElement;
+import org.broadinstitute.hellbender.utils.pileup.ReadPileup;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import java.util.*;
@@ -27,7 +30,7 @@
*
* @author Valentin Ruano-Rubio <valentin@broadinstitute.org>
*/
-public final class ReadLikelihoods implements SampleList, AlleleList {
+public class ReadLikelihoods implements SampleList, AlleleList {
/**
* Index indicaintg that the reference allele is missing.
@@ -37,7 +40,7 @@ public final class ReadLikelihoods implements SampleList, Alle
/**
* Reads by sample index. Each sub array contains reference to the reads of the ith sample.
*/
- private final GATKRead[][] readsBySampleIndex;
+ protected final GATKRead[][] readsBySampleIndex;
/**
* Indexed per sample, allele and finally read (within sample).
@@ -45,17 +48,17 @@ public final class ReadLikelihoods implements SampleList, Alle
* valuesBySampleIndex[s][a][r] == lnLk(R_r | A_a) where R_r comes from Sample s.
*