From dc59aabb72e2561da14a2af867fb359bbbcf824d Mon Sep 17 00:00:00 2001 From: Lee Lichtenstein Date: Fri, 28 Sep 2018 20:09:16 -0400 Subject: [PATCH] Adding command line exclusion lists, so that users can prune fields from output. (#5226) - Added support for `--exclusion-list` parameter, which can be specified multiple times. This will remove some rendered fields from the output. The field should appear exactly as written in the output MAF or VCF (e.g. ClinVar_ALLELEID). Closes #4359. - Does basic cleaning of field values for MAF. I.e. cleaning tabs and \n. Fixes #4693 --- .../tools/funcotator/Funcotation.java | 17 --- .../FuncotatorArgumentCollection.java | 7 + .../FuncotatorArgumentDefinitions.java | 5 + .../tools/funcotator/FuncotatorEngine.java | 4 +- .../tools/funcotator/FuncotatorUtils.java | 42 +++++- .../tools/funcotator/OutputRenderer.java | 33 ++++- .../dataSources/TableFuncotation.java | 24 ++-- .../gencode/GencodeFuncotation.java | 3 +- .../gencode/GencodeFuncotationBuilder.java | 2 +- .../mafOutput/MafOutputRenderer.java | 29 +++- .../vcfOutput/VcfOutputRenderer.java | 57 +++++--- .../funcotator/FuncotatorIntegrationTest.java | 129 +++++++++++++---- .../funcotator/FuncotatorUtilsUnitTest.java | 66 ++++++++- .../dataSources/TableFuncotationUnitTest.java | 8 +- .../gencode/GencodeFuncotationUnitTest.java | 22 +-- .../vcf/VcfFuncotationFactoryUnitTest.java | 21 +-- .../mafOutput/MafOutputRendererUnitTest.java | 76 +++++++++- .../vcfOutput/VcfOutputRendererUnitTest.java | 54 +++++++ .../utils/test/FuncotatorTestUtils.java | 136 ++++++++++++++++++ 19 files changed, 594 insertions(+), 141 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotation.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotation.java index c506dd7479d..db13a074588 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotation.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotation.java @@ -25,12 +25,6 @@ public interface Funcotation { */ void setFieldSerializationOverrideValue( final String fieldName, final String overrideValue ); - /** - * Converts this {@link Funcotation} to a string suitable for insertion into a VCF file. - * @return a {@link String} representing this {@link Funcotation} suitable for insertion into a VCF file. - */ - String serializeToVcfString(); - /** * @return The name of the data source behind the {@link DataSourceFuncotationFactory} used to create this {@link Funcotation}. */ @@ -76,17 +70,6 @@ default void setFieldSerializationOverrideValues(final Map fieldS } } - /** - * TODO: This interface should have nothing specific to a VCF. That should be the job of the VCFOutputRenderer to sanitize any strings. https://github.com/broadinstitute/gatk/issues/4797 - * Converts this {@link Funcotation} to a string suitable for insertion into a VCF file. - * {@code manualAnnotationString} should be written first, followed by the inherent annotations in this {@link Funcotation}. - * @param manualAnnotationString A {@link String} of manually-provided annotations to add to this {@link Funcotation}. - * @return a {@link String} representing this {@link Funcotation} suitable for insertion into a VCF file. - */ - default String serializeToVcfString(final String manualAnnotationString) { - return (manualAnnotationString == null ? "" : manualAnnotationString) + serializeToVcfString(); - } - /** * @return Return whether the field exists in this {@link Funcotation}. */ diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorArgumentCollection.java index af888d17a0a..f4bba99e271 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorArgumentCollection.java @@ -101,4 +101,11 @@ public class FuncotatorArgumentCollection implements Serializable { doc = "(Advanced / DO NOT USE*) If you select this flag, Funcotator will force a conversion of variant contig names from b37 to hg19. *This option is useful in integration tests (written by devs) only." ) public boolean forceB37ToHg19ContigNameConversion = false; + + @Argument( + fullName = FuncotatorArgumentDefinitions.EXCLUSION_FIELDS_LONG_NAME, + optional = true, + doc = "Fields that should not be rendered in the final output. Only exact name matches will be excluded." + ) + public Set excludedFields = new HashSet<>(); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorArgumentDefinitions.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorArgumentDefinitions.java index 2954cb4bd98..3bf8c016508 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorArgumentDefinitions.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorArgumentDefinitions.java @@ -48,6 +48,11 @@ public class FuncotatorArgumentDefinitions { */ public static final String ANNOTATION_OVERRIDES_LONG_NAME = "annotation-override"; + /** + * List of final rendered fields to exclude from a final rendering + */ + public static final String EXCLUSION_FIELDS_LONG_NAME = "exclude-field"; + public static final String HG19_REFERENCE_VERSION_STRING = "hg19"; public static final String HG38_REFERENCE_VERSION_STRING = "hg38"; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorEngine.java index 170b7ec3f3f..67d2b4f30fe 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorEngine.java @@ -192,7 +192,7 @@ public OutputRenderer createOutputRenderer(final LinkedHashMap a unaccountedForDefaultAnnotations, unaccountedForOverrideAnnotations, defaultToolVcfHeaderLines.stream().map(Object::toString).collect(Collectors.toCollection(LinkedHashSet::new)), - funcotatorArgs.referenceVersion); + funcotatorArgs.referenceVersion, funcotatorArgs.excludedFields); break; case VCF: @@ -202,7 +202,7 @@ public OutputRenderer createOutputRenderer(final LinkedHashMap a headerForVariants, unaccountedForDefaultAnnotations, unaccountedForOverrideAnnotations, - defaultToolVcfHeaderLines + defaultToolVcfHeaderLines, funcotatorArgs.excludedFields ); break; default: diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorUtils.java index a17d15a140e..ab6a01fdf79 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorUtils.java @@ -2077,14 +2077,25 @@ public static String[] extractFuncotatorKeysFromHeaderDescription(final String f } /** - * Make sure that an individual funcotation (i.e. single value of a funcotation) is sanitized for VCF consumption. + * Make sure that an individual funcotation field (i.e. single value of a funcotation) is sanitized for VCF consumption. * Particularly, make sure that it does not allow special characters that would interfere with VCF parsing. - * @param individualFuncotation value from a funcotation Never {@code null} - * @return input string with special characters replaced by _%HEX%_ where HEX is the 2 digit ascii hex code. + * @param individualFuncotationField value from a funcotation. Never {@code null} + * @return input string with special characters replaced by _%HEX%_ where HEX is the 2 digit ascii hex code. Never {@code null} */ - public static String sanitizeFuncotationForVcf(final String individualFuncotation) { - Utils.nonNull(individualFuncotation); - return StringUtils.replaceEach(individualFuncotation, new String[]{",", ";", "=", "\t", "|", " "}, new String[]{"_%2C_", "_%3B_", "_%3D_", "_%09_", "_%7C_", "_%20_"}); + public static String sanitizeFuncotationFieldForVcf(final String individualFuncotationField) { + Utils.nonNull(individualFuncotationField); + return StringUtils.replaceEach(individualFuncotationField, new String[]{",", ";", "=", "\t", "|", " ", "\n"}, new String[]{"_%2C_", "_%3B_", "_%3D_", "_%09_", "_%7C_", "_%20_", "_%0A_"}); + } + + /** + * Make sure that an individual funcotation field (i.e. single value of a funcotation) is sanitized for MAF consumption. + * Particularly, make sure that it does not allow special characters that would interfere with MAF parsing. + * @param individualFuncotationField value from a funcotation. Never {@code null} + * @return input string with special characters replaced by _%HEX%_ where HEX is the 2 digit ascii hex code. Never {@code null} + */ + public static String sanitizeFuncotationFieldForMaf(final String individualFuncotationField) { + Utils.nonNull(individualFuncotationField); + return StringUtils.replaceEach(individualFuncotationField, new String[]{"\t", "\n"}, new String[]{"_%09_", "_%0A_"}); } /** @@ -2175,5 +2186,24 @@ public static List createFuncotations(final VariantContext vc, fina return result; } + + /** + * @param funcotation Funcotation to render for a VCF. Never {@code null} + * @param includedFields List of fields to include. Any that match fields in the funcotation will be rendered. + * Never {@code null} + * @return string with the VCF representation of a {@link VcfOutputRenderer#FUNCOTATOR_VCF_FIELD_NAME} for the given + * Funcotation. Never {@code null}, but empty string is possible. + */ + public static String renderSanitizedFuncotationForVcf(final Funcotation funcotation, final List includedFields) { + Utils.nonNull(funcotation); + Utils.nonNull(includedFields); + if (includedFields.size() == 0) { + return ""; + } + return funcotation.getFieldNames().stream() + .filter(f -> includedFields.contains(f)) + .map(field -> FuncotatorUtils.sanitizeFuncotationFieldForVcf(funcotation.getField(field))) + .collect(Collectors.joining(VcfOutputRenderer.FIELD_DELIMITER)); + } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/OutputRenderer.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/OutputRenderer.java index 1b40adf6fef..956f287a839 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/OutputRenderer.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/OutputRenderer.java @@ -1,6 +1,12 @@ package org.broadinstitute.hellbender.tools.funcotator; +import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFHeaderLineType; +import htsjdk.variant.vcf.VCFInfoHeaderLine; +import org.broadinstitute.hellbender.tools.funcotator.dataSources.TableFuncotation; +import org.broadinstitute.hellbender.tools.funcotator.metadata.VcfFuncotationMetadata; +import org.broadinstitute.hellbender.utils.Utils; import java.util.LinkedHashMap; import java.util.List; @@ -23,11 +29,6 @@ public abstract class OutputRenderer implements AutoCloseable { */ protected LinkedHashMap manualAnnotations; - /** - * {@link String} representation of {@link OutputRenderer#manualAnnotations} serialized to the output format of this {@link OutputRenderer}. - */ - protected String manualAnnotationSerializedString; - /** * {@link List} of the {@link DataSourceFuncotationFactory} objects that are being used in this run of {@link Funcotator}. */ @@ -56,4 +57,26 @@ public String getDataSourceInfoString() { * @param txToFuncotationMap {@link FuncotationMap} to add to the given {@code variant} on output. */ public abstract void write(final VariantContext variant, final FuncotationMap txToFuncotationMap); + + /** + * Utility for output renderers. + * + * Given a {@link LinkedHashMap} of field:value pairs (String:String), create a funcotation (funcotation metadata included!) + * with the best representation possible. + * + * @param data Never {@code null} + * @param altAllele Never {@code null} + * @param datasourceName Name to use as the datasource. Never {@code null} + * @return A funcotation with all fields considered strings and a generic description. Funcotation metadata is populated. + * Never {@code null} + */ + public static Funcotation createFuncotationFromLinkedHashMap(final LinkedHashMap data, final Allele altAllele, final String datasourceName) { + Utils.nonNull(data); + Utils.nonNull(altAllele); + Utils.nonNull(datasourceName); + final List manualAnnotationHeaderLines = data.entrySet().stream() + .map(e -> new VCFInfoHeaderLine(e.getKey(), 1, VCFHeaderLineType.String, "Specified from map: " + e.getKey() + ":" + e.getValue() )) + .collect(Collectors.toList()); + return TableFuncotation.create(data, altAllele, datasourceName, VcfFuncotationMetadata.create(manualAnnotationHeaderLines)); + } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/TableFuncotation.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/TableFuncotation.java index 3ed43760eb3..14f167639c3 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/TableFuncotation.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/TableFuncotation.java @@ -5,12 +5,10 @@ import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.tools.funcotator.Funcotation; -import org.broadinstitute.hellbender.tools.funcotator.FuncotatorUtils; import org.broadinstitute.hellbender.tools.funcotator.dataSources.xsv.LocatableXsvFuncotationFactory; import org.broadinstitute.hellbender.tools.funcotator.dataSources.xsv.SimpleKeyXsvFuncotationFactory; import org.broadinstitute.hellbender.tools.funcotator.metadata.FuncotationMetadata; import org.broadinstitute.hellbender.tools.funcotator.metadata.FuncotationMetadataUtils; -import org.broadinstitute.hellbender.tools.funcotator.vcfOutput.VcfOutputRenderer; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.codecs.xsvLocatableTable.XsvTableFeature; @@ -101,13 +99,6 @@ public void setFieldSerializationOverrideValue(final String fieldName, final Str fieldMap.put(fieldName, overrideValue); } - @Override - public String serializeToVcfString() { - return fieldMap.values().stream() - .map(f -> (f == null ? "" : FuncotatorUtils.sanitizeFuncotationForVcf(f))) - .collect(Collectors.joining(VcfOutputRenderer.FIELD_DELIMITER)); - } - @Override public LinkedHashSet getFieldNames() { return new LinkedHashSet<>(fieldMap.keySet()); @@ -216,6 +207,21 @@ public static TableFuncotation create(final Map data, final Alle return create(fieldNames, fieldValues, altAllele, dataSourceName, metadata); } + /** + * See {@link TableFuncotation#create(List, List, Allele, String, FuncotationMetadata)} + * + * @param data Map for field name to field value. Never {@code null} + * @param altAllele See {@link TableFuncotation#create(List, List, Allele, String, FuncotationMetadata)} + * @param dataSourceName See {@link TableFuncotation#create(List, List, Allele, String, FuncotationMetadata)} + * @param metadata See {@link TableFuncotation#create(List, List, Allele, String, FuncotationMetadata)} + * @return See {@link TableFuncotation#create(List, List, Allele, String, FuncotationMetadata)} + */ + public static TableFuncotation create(final LinkedHashMap data, final Allele altAllele, final String dataSourceName, final FuncotationMetadata metadata ) { + final List fieldNames = new ArrayList<>(data.keySet()); + final List fieldValues = fieldNames.stream().map(f -> data.get(f)).collect(Collectors.toList()); + return create(fieldNames, fieldValues, altAllele, dataSourceName, metadata); + } + /** * See {@link TableFuncotation#create(List, List, Allele, String, FuncotationMetadata)} * diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotation.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotation.java index 9a8eac8c3c1..247b3f70d8e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotation.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotation.java @@ -180,7 +180,6 @@ public Allele getAltAllele() { return Allele.create(tumorSeqAllele2.getBytes(), false); } - @Override public String serializeToVcfString() { // Alias for the FIELD_DELIMITER so we can have nicer looking code: final String DELIMITER = VcfOutputRenderer.FIELD_DELIMITER; @@ -212,7 +211,7 @@ public String serializeToVcfString() { (otherTranscriptsSerializedOverride != null ? otherTranscriptsSerializedOverride : (otherTranscripts != null ? otherTranscripts.stream().map(Object::toString).collect(Collectors.joining(VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER)) : "")) ); - return funcotations.stream().map(FuncotatorUtils::sanitizeFuncotationForVcf).collect(Collectors.joining(DELIMITER)); + return funcotations.stream().map(FuncotatorUtils::sanitizeFuncotationFieldForVcf).collect(Collectors.joining(DELIMITER)); } @Override diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationBuilder.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationBuilder.java index e8217ba5cf2..bd9930fb689 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationBuilder.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationBuilder.java @@ -35,7 +35,7 @@ public GencodeFuncotationBuilder(final GencodeFuncotation gf) { } public GencodeFuncotation build() { - // TODO: In the future, we will need a mechanism for populating the metadata (https://github.com/broadinstitute/gatk/issues/4857) + // TODO: In the future, we will need a mechanism for populating the metadata, especially if we want to support separate INFO fields in a VCF for each funcotation field (https://github.com/broadinstitute/gatk/issues/4857) gencodeFuncotation.setMetadata(FuncotationMetadataUtils.createWithUnknownAttributes(new ArrayList<>(gencodeFuncotation.getFieldNames()))); return gencodeFuncotation; } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRenderer.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRenderer.java index 1c3774e1ff9..13b8bed70ba 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRenderer.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRenderer.java @@ -9,6 +9,7 @@ import org.apache.commons.lang.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.tools.funcotator.*; import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation; @@ -23,6 +24,7 @@ import java.nio.file.Path; import java.text.SimpleDateFormat; import java.util.*; +import java.util.function.Function; import java.util.stream.Collectors; /** @@ -144,6 +146,9 @@ public class MafOutputRenderer extends OutputRenderer { /** The version of the reference used to create annotations that will be output by this {@link MafOutputRenderer}.*/ private final String referenceVersion; + /** Fields that should be removed in the final MAF file. */ + private final Set excludedOutputFields; + //================================================================================================================== // Constructors: @@ -163,7 +168,7 @@ public MafOutputRenderer(final Path outputFilePath, final LinkedHashMap unaccountedForDefaultAnnotations, final LinkedHashMap unaccountedForOverrideAnnotations, final Set toolHeaderLines, - final String referenceVersion) { + final String referenceVersion, final Set excludedOutputFields) { // Set our internal variables from the input: this.outputFilePath = outputFilePath; @@ -253,9 +258,6 @@ public MafOutputRenderer(final Path outputFilePath, defaultMap.put(MafOutputRendererConstants.FieldName_Score, MafOutputRendererConstants.UNUSED_STRING); defaultMap.put(MafOutputRendererConstants.FieldName_BAM_File, MafOutputRendererConstants.UNUSED_STRING); - // Cache the manual annotation string so we can pass it easily into any Funcotations: - manualAnnotationSerializedString = (manualAnnotations.size() != 0 ? MafOutputRendererConstants.FIELD_DELIMITER + String.join( MafOutputRendererConstants.FIELD_DELIMITER, manualAnnotations.values() ) + MafOutputRendererConstants.FIELD_DELIMITER : ""); - // Open the output object: try { printWriter = new PrintWriter(Files.newOutputStream(outputFilePath)); @@ -263,6 +265,8 @@ public MafOutputRenderer(final Path outputFilePath, catch (final IOException ex) { throw new UserException("Error opening output file path: " + outputFilePath.toUri().toString(), ex); } + + this.excludedOutputFields = excludedOutputFields; } //================================================================================================================== @@ -327,12 +331,13 @@ public void write(final VariantContext variant, final FuncotationMap txToFuncota writeString(entry.getValue()); writeString(MafOutputRendererConstants.FIELD_DELIMITER); } - writeLine(manualAnnotationSerializedString); + writeLine(""); } } } - private LinkedHashMap createMafCompliantOutputMap(final Allele altAllele, final List funcotations) { + @VisibleForTesting + LinkedHashMap createMafCompliantOutputMap(final Allele altAllele, final List funcotations) { // Create our output maps: final LinkedHashMap outputMap = new LinkedHashMap<>(defaultMap); final LinkedHashMap extraFieldOutputMap = new LinkedHashMap<>(); @@ -366,7 +371,17 @@ private LinkedHashMap createMafCompliantOutputMap(final Allele a outputMap.putAll(extraFieldOutputMap); // Now translate fields to the field names that MAF likes: - return replaceFuncotationValuesWithMafCompliantValues(outputMap); + final LinkedHashMap mafCompliantMap = replaceFuncotationValuesWithMafCompliantValues(outputMap); + + // Remove any fields that are excluded and sanitize any field values. + return mafCompliantMap.keySet().stream() + .filter(k -> !excludedOutputFields.contains(k)) + .collect(Collectors.toMap( + Function.identity(), k -> FuncotatorUtils.sanitizeFuncotationFieldForMaf(mafCompliantMap.get(k)), + (u, v) -> { + throw new GATKException.ShouldNeverReachHereException("Found duplicate keys for MAF output"); + }, + LinkedHashMap::new)); } //================================================================================================================== diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/vcfOutput/VcfOutputRenderer.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/vcfOutput/VcfOutputRenderer.java index 49521e9ef94..b90ae101ddd 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/vcfOutput/VcfOutputRenderer.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/vcfOutput/VcfOutputRenderer.java @@ -1,11 +1,11 @@ package org.broadinstitute.hellbender.tools.funcotator.vcfOutput; +import com.google.common.annotations.VisibleForTesting; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.*; -import org.apache.commons.lang.StringUtils; import org.broadinstitute.hellbender.tools.funcotator.*; import org.broadinstitute.hellbender.tools.funcotator.dataSources.DataSourceUtils; import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation; @@ -14,6 +14,7 @@ import java.util.*; import java.util.stream.Collectors; +import java.util.stream.Stream; /** * A Funcotator output renderer for writing to VCF files. @@ -65,10 +66,15 @@ public class VcfOutputRenderer extends OutputRenderer { //================================================================================================================== private final VariantContextWriter vcfWriter; + + /** VCF Header that came in with the input VCF */ private final VCFHeader existingHeader; private final LinkedHashSet defaultToolVcfHeaderLines; + /** List of the fields that will get rendered in the funcotation annotation. Excluded fields have been removed. */ + private final List finalFuncotationFieldNames; + //================================================================================================================== public VcfOutputRenderer(final VariantContextWriter vcfWriter, @@ -82,15 +88,27 @@ public VcfOutputRenderer(final VariantContextWriter vcfWriter, final VCFHeader existingHeader, final LinkedHashMap unaccountedForDefaultAnnotations, final LinkedHashMap unaccountedForOverrideAnnotations) { - this(vcfWriter, dataSources, existingHeader, unaccountedForDefaultAnnotations, unaccountedForOverrideAnnotations, new LinkedHashSet<>()); + this(vcfWriter, dataSources, existingHeader, unaccountedForDefaultAnnotations, unaccountedForOverrideAnnotations, + new LinkedHashSet<>(), new LinkedHashSet<>()); } + @VisibleForTesting public VcfOutputRenderer(final VariantContextWriter vcfWriter, final List dataSources, final VCFHeader existingHeader, final LinkedHashMap unaccountedForDefaultAnnotations, final LinkedHashMap unaccountedForOverrideAnnotations, final Set defaultToolVcfHeaderLines) { + this(vcfWriter, dataSources, existingHeader, unaccountedForDefaultAnnotations, unaccountedForOverrideAnnotations, + defaultToolVcfHeaderLines, new HashSet<>()); + } + + public VcfOutputRenderer(final VariantContextWriter vcfWriter, + final List dataSources, + final VCFHeader existingHeader, + final LinkedHashMap unaccountedForDefaultAnnotations, + final LinkedHashMap unaccountedForOverrideAnnotations, + final Set defaultToolVcfHeaderLines, final Set excludedOutputFields) { this.vcfWriter = vcfWriter; this.existingHeader = existingHeader; @@ -104,8 +122,11 @@ public VcfOutputRenderer(final VariantContextWriter vcfWriter, // Get our default tool VCF header lines: this.defaultToolVcfHeaderLines = new LinkedHashSet<>(defaultToolVcfHeaderLines); - // Cache the manual annotation string so we can pass it easily into any Funcotations: - manualAnnotationSerializedString = (manualAnnotations.size() != 0 ? String.join( FIELD_DELIMITER, manualAnnotations.values() ) + FIELD_DELIMITER : ""); + // Please note that this assumes that there is no conversion between the name given by the datasource (or user) + // and the output name. + finalFuncotationFieldNames = Stream.concat(getDataSourceFieldNamesForHeaderAsList(dataSourceFactories).stream(), manualAnnotations.keySet().stream()) + .filter(f -> !excludedOutputFields.contains(f)) + .collect(Collectors.toList()); // Open the output file and set up the header: final VCFHeader newHeader = createVCFHeader(); @@ -151,15 +172,18 @@ public void write(final VariantContext variant, final FuncotationMap txToFuncota for (final String txId : txToFuncotationMap.getTranscriptList()) { funcotatorAnnotationStringBuilder.append(START_TRANSCRIPT_DELIMITER); final List funcotations = txToFuncotationMap.get(txId); + final Funcotation manualAnnotationFuncotation = createManualAnnotationFuncotation(altAllele); + funcotatorAnnotationStringBuilder.append( - funcotations.stream() + Stream.concat(funcotations.stream(), Stream.of(manualAnnotationFuncotation)) .filter(f -> f.getAltAllele().equals(altAllele)) .filter(f -> f.getFieldNames().size() > 0) .filter(f -> !f.getDataSourceName().equals(FuncotatorConstants.DATASOURCE_NAME_FOR_INPUT_VCFS)) .map(VcfOutputRenderer::adjustIndelAlleleInformation) - .map(f -> retrieveSanitizedFuncotation(f, manualAnnotationSerializedString)) + .map(f -> FuncotatorUtils.renderSanitizedFuncotationForVcf(f, finalFuncotationFieldNames)) .collect(Collectors.joining(FIELD_DELIMITER)) ); + funcotatorAnnotationStringBuilder.append(END_TRANSCRIPT_DELIMITER + ALL_TRANSCRIPT_DELIMITER); } // We have a trailing "#" - we need to remove it: @@ -180,6 +204,10 @@ public void write(final VariantContext variant, final FuncotationMap txToFuncota vcfWriter.add( variantContextOutputBuilder.make() ); } + private Funcotation createManualAnnotationFuncotation(final Allele altAllele) { + return OutputRenderer.createFuncotationFromLinkedHashMap(manualAnnotations, altAllele, "UnaccountedManualAnnotations"); + } + //================================================================================================================== /** @@ -252,12 +280,8 @@ private VCFHeader createVCFHeader() { // Add all lines of our existing VCF header: headerLines.addAll( existingHeader.getMetaDataInInputOrder() ); - final String dataSourceFields = getDataSourceFieldNamesForHeader(dataSourceFactories); - final String manualAnnotationFields = String.join( HEADER_LISTED_FIELD_DELIMITER, manualAnnotations.keySet() ); - // Construct (only) the field list delimited by HEADER_LISTED_FIELD_DELIMITER - final String delimitedFields = StringUtils.isEmpty(manualAnnotationFields) ? dataSourceFields : - manualAnnotationFields + HEADER_LISTED_FIELD_DELIMITER + dataSourceFields; + final String delimitedFields = String.join(HEADER_LISTED_FIELD_DELIMITER, finalFuncotationFieldNames); // Add in the lines about Funcotations: headerLines.addAll(defaultToolVcfHeaderLines); @@ -273,22 +297,17 @@ private VCFHeader createVCFHeader() { } /** - * Creates a {@link String} containing the field names from our {@link VcfOutputRenderer#dataSourceFactories} suitable for putting in the VCF header. + * Creates a {@link List} of {@link String} containing the field names from our {@link VcfOutputRenderer#dataSourceFactories} suitable for putting in the VCF header. * * Gencode annotations are put first and then the rest. * * @param dataSourceFactories A {@link List} of {@link DataSourceFuncotationFactory} objects from which to pull field names. * @return A {@link String} containing the field names from our {@link VcfOutputRenderer#dataSourceFactories} suitable for putting in the VCF header. */ - private static String getDataSourceFieldNamesForHeader(final List dataSourceFactories) { + private static List getDataSourceFieldNamesForHeaderAsList(final List dataSourceFactories) { return dataSourceFactories.stream().sorted(DataSourceUtils::datasourceComparator) .map(DataSourceFuncotationFactory::getSupportedFuncotationFields) .flatMap(LinkedHashSet::stream) - .map(Object::toString).collect(Collectors.joining(HEADER_LISTED_FIELD_DELIMITER)); - } - - private static String retrieveSanitizedFuncotation(final Funcotation funcotation, final String manualAnnotationSerializedString) { - return funcotation.serializeToVcfString(manualAnnotationSerializedString); - + .map(Object::toString).collect(Collectors.toList()); } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java index 148b06676af..92e3c8a9386 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java @@ -1,6 +1,6 @@ package org.broadinstitute.hellbender.tools.funcotator; -import avro.shaded.com.google.common.collect.Sets; +import com.google.common.collect.Sets; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFCompoundHeaderLine; @@ -13,6 +13,10 @@ import org.broadinstitute.hellbender.engine.FeatureDataSource; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; +import org.broadinstitute.hellbender.testutils.FuncotatorReferenceTestUtils; +import org.broadinstitute.hellbender.testutils.IntegrationTestSpec; +import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; import org.broadinstitute.hellbender.tools.copynumber.utils.annotatedinterval.AnnotatedInterval; import org.broadinstitute.hellbender.tools.copynumber.utils.annotatedinterval.AnnotatedIntervalCollection; import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation; @@ -21,10 +25,6 @@ import org.broadinstitute.hellbender.tools.funcotator.mafOutput.MafOutputRenderer; import org.broadinstitute.hellbender.tools.funcotator.mafOutput.MafOutputRendererConstants; import org.broadinstitute.hellbender.tools.funcotator.vcfOutput.VcfOutputRenderer; -import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; -import org.broadinstitute.hellbender.testutils.FuncotatorReferenceTestUtils; -import org.broadinstitute.hellbender.testutils.IntegrationTestSpec; -import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -34,6 +34,7 @@ import java.util.*; import java.util.stream.Collectors; import java.util.stream.IntStream; +import java.util.stream.Stream; import static org.broadinstitute.hellbender.tools.funcotator.FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription; @@ -163,6 +164,24 @@ private ArgumentsBuilder createBaselineArgumentsForFuncotator(final String varia final String refVer, final FuncotatorArgumentDefinitions.OutputFormatType outputFormatType, final boolean shouldValidateSeqDicts) { + return createBaselineArgumentsForFuncotator(variantFileName, + outputFile, + referenceFileName, + dataSourcesPath, + refVer, + outputFormatType, + shouldValidateSeqDicts, + Collections.emptyList()); + } + + private ArgumentsBuilder createBaselineArgumentsForFuncotator(final String variantFileName, + final File outputFile, + final String referenceFileName, + final String dataSourcesPath, + final String refVer, + final FuncotatorArgumentDefinitions.OutputFormatType outputFormatType, + final boolean shouldValidateSeqDicts, + final List excludedFields) { final ArgumentsBuilder arguments = new ArgumentsBuilder(); @@ -173,6 +192,7 @@ private ArgumentsBuilder createBaselineArgumentsForFuncotator(final String varia arguments.addArgument(FuncotatorArgumentDefinitions.REFERENCE_VERSION_LONG_NAME, refVer); arguments.addArgument(FuncotatorArgumentDefinitions.OUTPUT_FORMAT_LONG_NAME, outputFormatType.toString()); arguments.addArgument("verbosity", "INFO"); + excludedFields.forEach(ef -> arguments.addArgument(FuncotatorArgumentDefinitions.EXCLUSION_FIELDS_LONG_NAME, ef)); if ( !shouldValidateSeqDicts ) { // Disable the sequence dictionary check for the tests: @@ -602,6 +622,44 @@ public void testCanAnnotateHg38ClinvarAndGencodeV28() { .count(), NUM_CLINVAR_HITS); } + @Test + public void testExclusionFromDatasourceVcfToVcf() { + // Clinvar datasource did go through one round of preprocessing to make contig names "1" --> "chr1" (for example). This is an issue with ClinVar, not GATK. + final FuncotatorArgumentDefinitions.OutputFormatType outputFormatType = FuncotatorArgumentDefinitions.OutputFormatType.VCF; + final File outputFile = getOutputFile(outputFormatType); + + final List excludedFields = Arrays.asList("dummy_ClinVar_VCF_DBVARID", "dummy_ClinVar_VCF_CLNVI"); + final ArgumentsBuilder arguments = createBaselineArgumentsForFuncotator( + PIK3CA_VCF_HG38, + outputFile, + hg38Chr3Ref, + DS_PIK3CA_DIR, + FuncotatorTestConstants.REFERENCE_VERSION_HG38, + outputFormatType, + false, excludedFields); + + runCommandLine(arguments); + + final Pair> tempVcf = VariantContextTestUtils.readEntireVCFIntoMemory(outputFile.getAbsolutePath()); + + final String[] funcotatorKeys = FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription(tempVcf.getLeft().getInfoHeaderLine(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME).getDescription()); + + // Ensure that the header does not contain the excluded fields + Stream.of(funcotatorKeys).forEach(k -> Assert.assertFalse(excludedFields.contains(k))); + + final List variantContexts = tempVcf.getRight(); + for (final VariantContext vc : variantContexts) { + final Map funcs = FuncotatorUtils.createAlleleToFuncotationMapFromFuncotationVcfAttribute( + funcotatorKeys, vc, "Gencode_28_annotationTranscript", "FAKE_SOURCE"); + for (final String txId: funcs.get(vc.getAlternateAllele(0)).getTranscriptList()) { + final List funcotations = funcs.get(vc.getAlternateAllele(0)).get(txId); + for (final Funcotation funcotation : funcotations) { + funcotation.getFieldNames().forEach(f -> Assert.assertFalse(excludedFields.contains(f))); + } + } + } + } + @DataProvider(name = "provideForMafVcfConcordance") final Object[][] provideForMafVcfConcordance() { return new Object[][]{ @@ -813,7 +871,7 @@ public void testVcfDatasourceAccountsForAltAlleles() { Assert.assertEquals(funcotations.size(), 1, "Found more than one funcotation in the funcotation map!"); final Funcotation funcotation = funcotations.get(0); - Assert.assertEquals(funcotation.getField("dummy_ClinVar_VCF_CLNDISDB"), FuncotatorUtils.sanitizeFuncotationForVcf(gtString), "Field (dummy_ClinVar_VCF_CLNDISDB) was unsanititzed: " + funcotation.getField("dummy_ClinVar_VCF_CLNDISDB")); + Assert.assertEquals(funcotation.getField("dummy_ClinVar_VCF_CLNDISDB"), FuncotatorUtils.sanitizeFuncotationFieldForVcf(gtString), "Field (dummy_ClinVar_VCF_CLNDISDB) was unsanititzed: " + funcotation.getField("dummy_ClinVar_VCF_CLNDISDB")); } } @@ -888,34 +946,15 @@ public void testNoSpanningDeletionWriteWithMAF() { @Test public void testVCFToMAFPreservesFields() { - final FuncotatorArgumentDefinitions.OutputFormatType outputFormatType = FuncotatorArgumentDefinitions.OutputFormatType.MAF; - final File outputFile = getOutputFile(outputFormatType); - - final ArgumentsBuilder arguments = createBaselineArgumentsForFuncotator( - PIK3CA_VCF_HG19, - outputFile, - b37Chr3Ref, - DS_PIK3CA_DIR, - FuncotatorTestConstants.REFERENCE_VERSION_HG19, - outputFormatType, - false); - - arguments.addArgument(FuncotatorArgumentDefinitions.TRANSCRIPT_SELECTION_MODE_LONG_NAME, TranscriptSelectionMode.CANONICAL.toString()); - - // Disable the sequence dictionary check for the tests: - arguments.addBooleanArgument(FuncotatorArgumentDefinitions.FORCE_B37_TO_HG19_REFERENCE_CONTIG_CONVERSION, true); - - runCommandLine(arguments); - - final AnnotatedIntervalCollection maf = AnnotatedIntervalCollection.create(outputFile.toPath(), null); + final AnnotatedIntervalCollection maf = runPik3caHg19VcfToMaf(new HashSet<>()); Assert.assertTrue(maf.getRecords().size() > 0); Assert.assertTrue(maf.getRecords().stream().allMatch(r -> r.hasAnnotation("ILLUMINA_BUILD"))); Assert.assertTrue(maf.getRecords().stream().allMatch(r -> r.getAnnotationValue("ILLUMINA_BUILD").startsWith("37"))); // Needs to get aliases from the MAF, since AF (and maybe more) has its name changed. So create a dummy // MafOutputRenderer that mimics the one that is used in the command line invocation above and get the aliases. - final File dummyOutputFile = getOutputFile(outputFormatType); - final MafOutputRenderer dummyMafOutputRenderer = new MafOutputRenderer(dummyOutputFile.toPath(), Collections.emptyList(), new VCFHeader(), new LinkedHashMap<>(), new LinkedHashMap<>(), new HashSet<>(), "b37"); + final File dummyOutputFile = getOutputFile(FuncotatorArgumentDefinitions.OutputFormatType.MAF); + final MafOutputRenderer dummyMafOutputRenderer = new MafOutputRenderer(dummyOutputFile.toPath(), Collections.emptyList(), new VCFHeader(), new LinkedHashMap<>(), new LinkedHashMap<>(), new HashSet<>(), "b37", new HashSet()); final Map> mafAliasMap = dummyMafOutputRenderer.getReverseOutputFieldNameMap(); // Get all of the alias lists @@ -929,6 +968,40 @@ public void testVCFToMAFPreservesFields() { Assert.assertTrue(maf.getAnnotations().containsAll(vcfHeaderInfoSet)); } + private AnnotatedIntervalCollection runPik3caHg19VcfToMaf(final Set excludedFields) { + final File outputFile = getOutputFile(FuncotatorArgumentDefinitions.OutputFormatType.MAF); + + final ArgumentsBuilder arguments = createBaselineArgumentsForFuncotator( + PIK3CA_VCF_HG19, + outputFile, + b37Chr3Ref, + DS_PIK3CA_DIR, + FuncotatorTestConstants.REFERENCE_VERSION_HG19, + FuncotatorArgumentDefinitions.OutputFormatType.MAF, + false); + + arguments.addArgument(FuncotatorArgumentDefinitions.TRANSCRIPT_SELECTION_MODE_LONG_NAME, TranscriptSelectionMode.CANONICAL.toString()); + + excludedFields.forEach(f -> arguments.addArgument(FuncotatorArgumentDefinitions.EXCLUSION_FIELDS_LONG_NAME, f)); + + // Disable the sequence dictionary check for the tests: + arguments.addBooleanArgument(FuncotatorArgumentDefinitions.FORCE_B37_TO_HG19_REFERENCE_CONTIG_CONVERSION, true); + + runCommandLine(arguments); + + return AnnotatedIntervalCollection.create(outputFile.toPath(), null); + } + + @Test + public void testVcfToMafHonorsExcludedFields() { + final String fieldToEnsureIsIncluded = "dummy_ClinVar_VCF_CLNVC"; + final HashSet excludedFields = com.google.common.collect.Sets.newHashSet("dummy_ClinVar_VCF_AF_EXAC", "dummy_ClinVar_VCF_CLNSIGCONF"); + final AnnotatedIntervalCollection maf = runPik3caHg19VcfToMaf(excludedFields); + Assert.assertTrue(maf.getRecords().size() > 0); + maf.getRecords().forEach(r -> Assert.assertTrue(r.hasAnnotation(fieldToEnsureIsIncluded))); + maf.getRecords().forEach(r -> Assert.assertEquals(Sets.intersection(r.getAnnotations().keySet(), excludedFields).size(), 0)); + } + @Test public void testVCFToVCFPreservesFields() { diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorUtilsUnitTest.java index e0c65cc8275..355fd32e8da 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorUtilsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorUtilsUnitTest.java @@ -9,11 +9,13 @@ import htsjdk.variant.vcf.VCFHeaderLineCount; import htsjdk.variant.vcf.VCFHeaderLineType; import htsjdk.variant.vcf.VCFInfoHeaderLine; +import org.apache.commons.collections.MapUtils; import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.engine.ReferenceDataSource; import org.broadinstitute.hellbender.engine.ReferenceFileSource; import org.broadinstitute.hellbender.exceptions.GATKException; +import org.broadinstitute.hellbender.testutils.FuncotatorReferenceTestUtils; import org.broadinstitute.hellbender.tools.funcotator.dataSources.TableFuncotation; import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotationBuilder; import org.broadinstitute.hellbender.tools.funcotator.metadata.FuncotationMetadata; @@ -23,7 +25,6 @@ import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.read.ReadUtils; -import org.broadinstitute.hellbender.testutils.FuncotatorReferenceTestUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -1610,4 +1611,67 @@ public void testCreateFuncotationsFromVariantContext(final VariantContext vc, fi Assert.assertEquals(funcotations.stream().map(f -> f.getAltAllele()).collect(Collectors.toSet()), new HashSet<>(vc.getAlternateAlleles())); Assert.assertEquals(funcotations.stream().map(f -> f.getMetadata()).collect(Collectors.toSet()), new HashSet<>(Collections.singletonList(metadata))); } + + @DataProvider + public Object[][] provideMafSanitizing() { + return new Object[][] { + {"\tHAHAHAH\t", "_%09_HAHAHAH_%09_"}, + {"\tHAHAHAH\n", "_%09_HAHAHAH_%0A_"}, + {"FOO", "FOO"} + }; + } + + @Test(dataProvider = "provideMafSanitizing") + public void testSanitizeFuncotationFieldForMaf(final String individualFuncotationField, final String gt) { + final String guess = FuncotatorUtils.sanitizeFuncotationFieldForMaf(individualFuncotationField); + Assert.assertEquals(guess, gt); + + } + + @SuppressWarnings("unchecked") + @DataProvider + public Object[][] provideForRenderSanitizedFuncotationForVcf() { + + return new Object[][]{ + // Test a very basic case where all fields are included + {OutputRenderer.createFuncotationFromLinkedHashMap( + (LinkedHashMap) MapUtils.putAll(new LinkedHashMap(), + new String[][]{{"FOO", "BAR"},{"BAZ", "HUH?"}}), + Allele.create("T"), "FAKEDATA"), Arrays.asList("FOO", "BAZ"), + "BAR|HUH?" + }, + + // Test case where only one field is included + {OutputRenderer.createFuncotationFromLinkedHashMap( + (LinkedHashMap) MapUtils.putAll(new LinkedHashMap(), + new String[][]{{"FOO", "BAR"},{"BAZ", "HUH?"}}), + Allele.create("T"), "FAKEDATA"), Arrays.asList("FOO"), + "BAR" + }, + + // Make sure that specifying a non-existent included field (NOTHERE) has no effect on the output, + // even when another field is excluded. + {OutputRenderer.createFuncotationFromLinkedHashMap( + (LinkedHashMap) MapUtils.putAll(new LinkedHashMap(), + new String[][]{{"FOO", "BAR"},{"BAZ", "HUH?"}}), + Allele.create("T"), "FAKEDATA"), Arrays.asList("FOO", "NOTHERE"), + "BAR" + }, + + // Make sure that specifying a non-existent included field (NOTHERE) has no effect on the output, + // even when all fields are included.. + {OutputRenderer.createFuncotationFromLinkedHashMap( + (LinkedHashMap) MapUtils.putAll(new LinkedHashMap(), + new String[][]{{"FOO", "BAR"},{"BAZ", "HUH?"}}), + Allele.create("T"), "FAKEDATA"), Arrays.asList("FOO", "BAZ", "NOTHERE"), + "BAR|HUH?" + } + }; + } + + @Test(dataProvider = "provideForRenderSanitizedFuncotationForVcf" ) + public void testRenderSanitizedFuncotationForVcf(final Funcotation funcotation, final List includedFields, final String gt) { + final String guess = FuncotatorUtils.renderSanitizedFuncotationForVcf(funcotation, includedFields); + Assert.assertEquals(guess, gt); + } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/TableFuncotationUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/TableFuncotationUnitTest.java index 95556d5a535..1b52658dcdc 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/TableFuncotationUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/TableFuncotationUnitTest.java @@ -3,14 +3,12 @@ import htsjdk.variant.variantcontext.Allele; import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.exceptions.GATKException; +import org.broadinstitute.hellbender.tools.funcotator.FuncotatorUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.util.Arrays; -import java.util.Collections; -import java.util.LinkedHashSet; -import java.util.List; +import java.util.*; import java.util.stream.Collectors; /** @@ -263,7 +261,7 @@ public void testSetFieldSerializationOverrideValue(final TableFuncotation funcot @Test(dataProvider = "provideForTestSerializeToVcfString") public void testSerializeToVcfString(final TableFuncotation funcotation, final String expected) { - Assert.assertEquals( funcotation.serializeToVcfString(), expected ); + Assert.assertEquals(FuncotatorUtils.renderSanitizedFuncotationForVcf(funcotation, new ArrayList<>(funcotation.getFieldNames())), expected ); } @Test(dataProvider = "provideListOfStrings") diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationUnitTest.java index 0011659336a..fedef8b49a9 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationUnitTest.java @@ -238,8 +238,7 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { GencodeFuncotation.VariantClassification.NONSENSE, GencodeFuncotation.VariantClassification.INTRON, GencodeFuncotation.VariantType.SNP, "A", "T", "big_%20_changes", "T1", "3'", 1, 1, "A", "ATC", "Lys", 1.0, "ATGCGCAT", Arrays.asList("ONE", "TWO", "THREE")), - "CRUMB BUM!!!!!", - "CRUMB BUM!!!!!" + "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + + "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big_%20_changes" + D + "T1" + D + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" @@ -250,7 +249,6 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { GencodeFuncotation.VariantClassification.NONSENSE, GencodeFuncotation.VariantClassification.INTRON, GencodeFuncotation.VariantType.SNP, "A", "T", "big_%20_changes", "T1", "3'", 1, 1, "A", "ATC", "Lys", 1.0, "ATGCGCAT", Arrays.asList("ONE", "TWO", "THREE")), - null, "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big_%20_changes" + D + "T1" + D + @@ -261,7 +259,6 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { GencodeFuncotation.VariantClassification.NONSENSE, GencodeFuncotation.VariantClassification.INTRON, GencodeFuncotation.VariantType.SNP, "A", "T", "big_%20_changes", "T1", "3'", 1, 1, "A", "ATC", "Lys", 1.0, "ATGCGCAT", Arrays.asList("ONE", "TWO", "THREE")), - null, D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big_%20_changes" + D + "T1" + D + @@ -272,7 +269,6 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { GencodeFuncotation.VariantClassification.NONSENSE, GencodeFuncotation.VariantClassification.INTRON, GencodeFuncotation.VariantType.SNP, "A", "T", "big_%20_changes", "T1", "3'", 1, 1, "A", "ATC", "Lys", 1.0, "ATGCGCAT", Arrays.asList("ONE", "TWO", "THREE")), - null, "TESTGENE" + D + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big_%20_changes" + D + "T1" + D + @@ -283,7 +279,6 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { GencodeFuncotation.VariantClassification.NONSENSE, GencodeFuncotation.VariantClassification.INTRON, GencodeFuncotation.VariantType.SNP, "A", "T", "big_%20_changes", "T1", "3'", 1, 1, "A", "ATC", "Lys", 1.0, "ATGCGCAT", Arrays.asList("ONE", "TWO", "THREE")), - null, "TESTGENE" + D + "BUILD1" + D + D + 50 + D + 60 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big_%20_changes" + D + "T1" + D + @@ -294,7 +289,6 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { null, GencodeFuncotation.VariantClassification.INTRON, GencodeFuncotation.VariantType.SNP, "A", "T", "big_%20_changes", "T1", "3'", 1, 1, "A", "ATC", "Lys", 1.0, "ATGCGCAT", Arrays.asList("ONE", "TWO", "THREE")), - null, "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big_%20_changes" + D + "T1" + D + @@ -305,7 +299,6 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { GencodeFuncotation.VariantClassification.NONSENSE, null, GencodeFuncotation.VariantType.SNP, "A", "T", "big_%20_changes", "T1", "3'", 1, 1, "A", "ATC", "Lys", 1.0, "ATGCGCAT", Arrays.asList("ONE", "TWO", "THREE")), - null, "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big_%20_changes" + D + "T1" + D + @@ -316,7 +309,6 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { GencodeFuncotation.VariantClassification.NONSENSE, GencodeFuncotation.VariantClassification.INTRON, null, "A", "T", "big_%20_changes", "T1", "3'", 1, 1, "A", "ATC", "Lys", 1.0, "ATGCGCAT", Arrays.asList("ONE", "TWO", "THREE")), - null, "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + D + "A" + D + "A" + D + "T" + D + "big_%20_changes" + D + "T1" + D + @@ -327,7 +319,6 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { GencodeFuncotation.VariantClassification.NONSENSE, GencodeFuncotation.VariantClassification.INTRON, GencodeFuncotation.VariantType.SNP, "G", "C", null, "T1", "3'", 1, 1, "A", "ACC", "Lys", 1.0, "ATGCGCAT", Arrays.asList("ONE", "TWO", "THREE")), - null, "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "G" + D + "G" + D + "C" + D + D + "T1" + D + @@ -338,7 +329,6 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { GencodeFuncotation.VariantClassification.NONSENSE, GencodeFuncotation.VariantClassification.INTRON, GencodeFuncotation.VariantType.SNP, "A", "T", "big_%20_changes", null, null, 1, 1, "A", "ATC", "Lys", 1.0, "ATGCGCAT", Arrays.asList("ONE", "TWO", "THREE")), - null, "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big_%20_changes" + D + D + @@ -349,7 +339,6 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { GencodeFuncotation.VariantClassification.NONSENSE, GencodeFuncotation.VariantClassification.INTRON, GencodeFuncotation.VariantType.SNP, "A", "T", "big_%20_changes", "T1", "3'", null, null, "A", "ATC", "Lys", 1.0, "ATGCGCAT", Arrays.asList("ONE", "TWO", "THREE")), - null, "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big_%20_changes" + D + "T1" + D + @@ -360,7 +349,6 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { GencodeFuncotation.VariantClassification.NONSENSE, GencodeFuncotation.VariantClassification.INTRON, GencodeFuncotation.VariantType.SNP, "A", "T", "big_%20_changes", "T1", "3'", 1, 1, null, "ATC", "Lys", 1.0, "ATGCGCAT", Arrays.asList("ONE", "TWO", "THREE")), - null, "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big_%20_changes" + D + "T1" + D + @@ -371,7 +359,6 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { GencodeFuncotation.VariantClassification.NONSENSE, GencodeFuncotation.VariantClassification.INTRON, GencodeFuncotation.VariantType.SNP, "A", "T", "big_%20_changes", "T1", "3'", 1, 1, "A", null, "Lys", 1.0, "ATGCGCAT", Arrays.asList("ONE", "TWO", "THREE")), - null, "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big_%20_changes" + D + "T1" + D + @@ -382,7 +369,6 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { GencodeFuncotation.VariantClassification.NONSENSE, GencodeFuncotation.VariantClassification.INTRON, GencodeFuncotation.VariantType.SNP, "A", "T", "big_%20_changes", "T1", "3'", 1, 1, "A", "ATC", null, 1.0, "ATGCGCAT", Arrays.asList("ONE", "TWO", "THREE")), - null, "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big_%20_changes" + D + "T1" + D + @@ -393,7 +379,6 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { GencodeFuncotation.VariantClassification.NONSENSE, GencodeFuncotation.VariantClassification.INTRON, GencodeFuncotation.VariantType.SNP, "A", "T", "big_%20_changes", "T1", "3'", 1, 1, "A", "ATC", "Lys", null, "ATGCGCAT", Arrays.asList("ONE", "TWO", "THREE")), - null, "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big_%20_changes" + D + "T1" + D + @@ -404,7 +389,6 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { GencodeFuncotation.VariantClassification.NONSENSE, GencodeFuncotation.VariantClassification.INTRON, GencodeFuncotation.VariantType.SNP, "A", "T", "big_%20_changes", "T1", "3'", 1, 1, "A", "ATC", "Lys", 1.0, null, Arrays.asList("ONE", "TWO", "THREE")), - null, "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big_%20_changes" + D + "T1" + D + @@ -724,8 +708,8 @@ void testSerializationOverrides(final GencodeFuncotation gencodeFuncotation, fin } @Test(dataProvider = "createGencodeFuncotationsAndStringSerializations") - void testSerializeToVcfString(final GencodeFuncotation gencodeFuncotation, final String manualAnnotationString, final String expected) { - Assert.assertEquals(gencodeFuncotation.serializeToVcfString(manualAnnotationString), expected); + void testSerializeToVcfString(final GencodeFuncotation gencodeFuncotation, final String expected) { + Assert.assertEquals(gencodeFuncotation.serializeToVcfString(), expected); } @Test(dataProvider = "provideForTestGetFieldNames") diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactoryUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactoryUnitTest.java index 211bef0f6d8..1888e9ddc29 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactoryUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactoryUnitTest.java @@ -23,6 +23,7 @@ import org.broadinstitute.hellbender.tools.funcotator.dataSources.TableFuncotation; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.io.IOUtils; +import org.broadinstitute.hellbender.utils.test.FuncotatorTestUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -108,25 +109,7 @@ public class VcfFuncotationFactoryUnitTest extends GATKBaseTest { //================================================================================================================== // Helper Methods: - private VariantContext createVariantContext(final String contig, - final int start, - final int end, - final String refString, - final String altString) { - - final Allele refAllele = Allele.create(refString, true); - final Allele altAllele = Allele.create(altString); - - final VariantContextBuilder variantContextBuilder = new VariantContextBuilder( - FuncotatorReferenceTestUtils.retrieveHg19Chr3Ref(), - contig, - start, - end, - Arrays.asList(refAllele, altAllele) - ); - return variantContextBuilder.make(); - } private Object[] helpProvideForTestCreateFuncotations(final String contig, final int start, @@ -135,7 +118,7 @@ private Object[] helpProvideForTestCreateFuncotations(final String contig, final String altAlleleString, final List expected) { return new Object[]{ - createVariantContext(contig, start, end, refAlleleString, altAlleleString), + FuncotatorTestUtils.createSimpleVariantContext(FuncotatorReferenceTestUtils.retrieveHg19Chr3Ref(), contig, start, end, refAlleleString, altAlleleString), new ReferenceContext(CHR3_REF_DATA_SOURCE, new SimpleInterval(contig, start, end)), expected }; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRendererUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRendererUnitTest.java index ed0676a0398..fb2075cec0b 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRendererUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRendererUnitTest.java @@ -1,5 +1,6 @@ package org.broadinstitute.hellbender.tools.funcotator.mafOutput; +import htsjdk.tribble.annotation.Strand; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; @@ -8,7 +9,9 @@ import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.engine.DummyPlaceholderGatkTool; import org.broadinstitute.hellbender.exceptions.GATKException; +import org.broadinstitute.hellbender.testutils.FuncotatorReferenceTestUtils; import org.broadinstitute.hellbender.testutils.IntegrationTestSpec; +import org.broadinstitute.hellbender.tools.copynumber.utils.annotatedinterval.AnnotatedIntervalCollection; import org.broadinstitute.hellbender.tools.funcotator.*; import org.broadinstitute.hellbender.tools.funcotator.dataSources.DataSourceUtils; import org.broadinstitute.hellbender.tools.funcotator.dataSources.TableFuncotation; @@ -16,6 +19,7 @@ import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotationFactory; import org.broadinstitute.hellbender.tools.funcotator.vcfOutput.VcfOutputRenderer; import org.broadinstitute.hellbender.utils.io.IOUtils; +import org.broadinstitute.hellbender.utils.test.FuncotatorTestUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -98,6 +102,10 @@ private List createFieldValuesFromNameList(final String prefix, final Li } private MafOutputRenderer createMafOutputRenderer(final File outputFile, final String referenceVersion) { + return createMafOutputRenderer(outputFile, referenceVersion, new HashSet<>()); + } + + private MafOutputRenderer createMafOutputRenderer(final File outputFile, final String referenceVersion, final Set excludedFields) { final Map configData = DataSourceUtils.getAndValidateDataSourcesFromPaths( @@ -124,7 +132,7 @@ private MafOutputRenderer createMafOutputRenderer(final File outputFile, final S new LinkedHashMap<>(), new LinkedHashMap<>(), new HashSet<>(), - referenceVersion + referenceVersion, excludedFields ); } @@ -1133,4 +1141,70 @@ public void testWrite(final List variants, final List excludedFields = Collections.singleton("FAKEDATA_FOO"); + try ( final MafOutputRenderer mafOutputRenderer = createMafOutputRenderer( outFile, "hg19", excludedFields) ) { + final FuncotationMap funcotationMap = FuncotationMap.createFromGencodeFuncotations(Collections.singletonList(dummyGencodeFuncotation)); + funcotationMap.add(dummyTranscriptName, createDummyTableFuncotation()); + mafOutputRenderer.write(dummyVariantContext, funcotationMap); + } + + final AnnotatedIntervalCollection maf = AnnotatedIntervalCollection.create(outFile.toPath(), null); + Assert.assertTrue(maf.getRecords().size() > 0); + maf.getRecords().forEach(r -> Assert.assertFalse(r.hasAnnotation("FAKEDATA_FOO"))); + maf.getRecords().forEach(r -> Assert.assertTrue(r.hasAnnotation("FAKEDATA_BAR"))); + } + + private static Funcotation createDummyGencodeFuncotation(final String dummyTranscriptName, final VariantContext dummyVariantContext) { + return FuncotatorTestUtils.createGencodeFuncotation("GENE","b37", dummyVariantContext.getContig(), dummyVariantContext.getStart(),dummyVariantContext.getEnd(), + GencodeFuncotation.VariantClassification.DE_NOVO_START_IN_FRAME, null, GencodeFuncotation.VariantType.SNP, + dummyVariantContext.getReference().getDisplayString(), + dummyVariantContext.getAlternateAllele(0).getDisplayString(), "g.1000000"+ dummyVariantContext.getReference().getDisplayString() + ">" + dummyVariantContext.getAlternateAllele(0).getDisplayString(), + dummyTranscriptName, Strand.FORWARD, + 1, 1500, + " ", " ", + "p.L300P", 0.5, + "ACTGATCGATCGA",Collections.singletonList("FAKE00002.5"), "27"); + } + + private static Funcotation createDummyTableFuncotation() { + final String datasourceName = "FAKEDATA"; + final LinkedHashMap data = new LinkedHashMap<>(); + data.put(datasourceName + "_FOO", "1"); + data.put(datasourceName + "_BAR", "2"); + data.put(datasourceName + "_BAZ", "\tYES\n"); + final Allele altAllele = Allele.create("T"); + return OutputRenderer.createFuncotationFromLinkedHashMap(data, altAllele, datasourceName); + } + + @Test + public void testCreateMafCompliantOutputMapSanitized() { + final File outFile = getSafeNonExistentFile("TestMafOutputSanitized.maf"); + final String dummyTranscriptName = "FAKE00001.1"; + final VariantContext dummyVariantContext = FuncotatorTestUtils.createSimpleVariantContext(FuncotatorReferenceTestUtils.retrieveHg19Chr3Ref(),"3", 1000000, 1000000, "C", "T"); + final GencodeFuncotation dummyGencodeFuncotation = (GencodeFuncotation) createDummyGencodeFuncotation(dummyTranscriptName, dummyVariantContext); + final Set excludedFields = Collections.emptySet(); + try ( final MafOutputRenderer mafOutputRenderer = createMafOutputRenderer( outFile, "hg19", excludedFields) ) { + final FuncotationMap funcotationMap = FuncotationMap.createFromGencodeFuncotations(Collections.singletonList(dummyGencodeFuncotation)); + funcotationMap.add(dummyTranscriptName, createDummyTableFuncotation()); + mafOutputRenderer.write(dummyVariantContext, funcotationMap); + } + + final AnnotatedIntervalCollection maf = AnnotatedIntervalCollection.create(outFile.toPath(), null); + Assert.assertTrue(maf.getRecords().size() > 0); + maf.getRecords().forEach(r -> Assert.assertTrue(r.hasAnnotation("FAKEDATA_FOO"))); + maf.getRecords().forEach(r -> Assert.assertTrue(r.hasAnnotation("FAKEDATA_BAR"))); + maf.getRecords().forEach(r -> Assert.assertTrue(r.hasAnnotation("FAKEDATA_BAZ"))); + maf.getRecords().forEach(r -> Assert.assertEquals(r.getAnnotationValue("FAKEDATA_BAZ"), "_%09_YES_%0A_")); + } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/vcfOutput/VcfOutputRendererUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/vcfOutput/VcfOutputRendererUnitTest.java index c04c2d980b1..a893531223f 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/vcfOutput/VcfOutputRendererUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/vcfOutput/VcfOutputRendererUnitTest.java @@ -1,6 +1,19 @@ package org.broadinstitute.hellbender.tools.funcotator.vcfOutput; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.writer.VariantContextWriter; +import htsjdk.variant.vcf.VCFHeader; +import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.hellbender.GATKBaseTest; +import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; +import org.broadinstitute.hellbender.tools.funcotator.FuncotationMap; +import org.broadinstitute.hellbender.tools.funcotator.FuncotatorUtils; +import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.File; +import java.util.*; /** * Unit test class for the {@link VcfOutputRenderer} class. @@ -10,6 +23,8 @@ public class VcfOutputRendererUnitTest extends GATKBaseTest { //================================================================================================================== // Static Variables: + private static final String TEST_VCF = toolsTestDir + "/funcotator/hg38_trio.pik3ca.vcf"; + //================================================================================================================== // Helper Methods: @@ -18,4 +33,43 @@ public class VcfOutputRendererUnitTest extends GATKBaseTest { //================================================================================================================== // Tests: + + /** Test that the exclusion list overrides the manually specified annotations */ + @Test + public void testExclusionListOverridesManualDefaultAnnotations() { + final Pair> entireInputVcf = VariantContextTestUtils.readEntireVCFIntoMemory(TEST_VCF); + final File outFile = createTempFile("vcf_output_renderer_exclusion", ".vcf"); + final VariantContextWriter vcfWriter = GATKVariantContextUtils.createVCFWriter(outFile,null, false); + + final LinkedHashMap dummyDefaults = new LinkedHashMap<>(); + dummyDefaults.put("FOO", "BAR"); + dummyDefaults.put("BAZ", "HUH?"); + + final VcfOutputRenderer vcfOutputRenderer = new VcfOutputRenderer(vcfWriter, + new ArrayList<>(), entireInputVcf.getLeft(), new LinkedHashMap<>(dummyDefaults), + new LinkedHashMap<>(), + new HashSet<>(), new HashSet<>(Arrays.asList("BAZ", "AC"))); + + final VariantContext variant = entireInputVcf.getRight().get(0); + + final FuncotationMap funcotationMap = FuncotationMap.createNoTranscriptInfo(Collections.emptyList()); + vcfOutputRenderer.write(variant, funcotationMap); + vcfOutputRenderer.close(); + + // Check the output + final Pair> tempVcf = VariantContextTestUtils.readEntireVCFIntoMemory(outFile.getAbsolutePath()); + final VariantContext tempVariant = tempVcf.getRight().get(0); + final String[] funcotatorKeys = FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription(tempVcf.getLeft().getInfoHeaderLine("FUNCOTATION").getDescription()); + Assert.assertEquals(funcotatorKeys.length,1); + Assert.assertEquals(funcotatorKeys[0],"FOO"); + final FuncotationMap tempFuncotationMap = + FuncotationMap.createAsAllTableFuncotationsFromVcf(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY, funcotatorKeys, + tempVariant.getAttributeAsString("FUNCOTATION", ""), tempVariant.getAlternateAllele(0), "TEST"); + Assert.assertTrue(tempFuncotationMap.get(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY).get(0).hasField("FOO")); + Assert.assertEquals(tempFuncotationMap.get(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY).get(0).getField("FOO"), "BAR"); + Assert.assertFalse(tempFuncotationMap.get(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY).get(0).hasField("BAZ")); + + // IMPORTANT: If the field is not a proper funcotation in VCFs, it will not be excluded. I.e. if an input VCF has an excluded field, it will not be excluded. + Assert.assertEquals(tempVariant.getAttribute("AC"), "1"); + } } diff --git a/src/test/java/org/broadinstitute/hellbender/utils/test/FuncotatorTestUtils.java b/src/test/java/org/broadinstitute/hellbender/utils/test/FuncotatorTestUtils.java index 360f3f0c385..1c5e290ec81 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/test/FuncotatorTestUtils.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/test/FuncotatorTestUtils.java @@ -2,15 +2,25 @@ import com.google.common.annotations.VisibleForTesting; import htsjdk.tribble.Feature; +import htsjdk.tribble.annotation.Strand; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.VariantContextBuilder; import org.broadinstitute.hellbender.cmdline.CommandLineProgram; import org.broadinstitute.hellbender.engine.FeatureContext; import org.broadinstitute.hellbender.engine.FeatureInput; import org.broadinstitute.hellbender.engine.FeatureManager; +import org.broadinstitute.hellbender.testutils.FuncotatorReferenceTestUtils; import org.broadinstitute.hellbender.tools.funcotator.DataSourceFuncotationFactory; +import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation; +import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotationBuilder; +import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotationFactory; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.param.ParamUtils; import java.nio.file.Path; +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -49,4 +59,130 @@ public static FeatureContext createFeatureContext(final List= 1. + * @param transcriptPos 1-based position in the transcript genome sequence. In the Funcotator main code, this field takes into account the coding direction, so it is possible to have + * a higher transcript position that is a lower position in genomic coordinate space. + * This method will not check validity against the annotation transcript and transcript exon. Must be >= 1. + * @param cDnaChange This method will not check validity of the specified value. + * @param codonChange This method will not check validity of the specified value. + * @param proteinChange This method will not check validity of the specified value. + * @param gcContent This method will not check validity. Must be >= 0.0 and =< 1.0 + * @param referenceContext This method will not check validity of the specified value. This method will not even check that you are specifying valid bases. + * @param otherTranscripts This method will not check validity of the specified value. + * @param version Gencode version. Use "19" for hg19/b37 and "28" for hg38. This method will not check validity, + * but much of the rendering code will fail if this field is not valid. + * @return a gencode funcotation representing the above parameters. Never {@code null} + */ + public static GencodeFuncotation createGencodeFuncotation(final String hugoSymbol, final String ncbiBuild, + final String chromosome, final int start, final int end, + final GencodeFuncotation.VariantClassification variantClassification, + final GencodeFuncotation.VariantClassification secondaryVariantClassification, + final GencodeFuncotation.VariantType variantType, + final String refAllele, + final String tumorSeqAllele2, final String genomeChange, + final String annotationTranscript, final Strand transcriptStrand, + final Integer transcriptExon, final Integer transcriptPos, + final String cDnaChange, final String codonChange, + final String proteinChange, final Double gcContent, + final String referenceContext, + final List otherTranscripts, final String version) { + + + ParamUtils.isPositive(start, "Start position is 1-based and must be greater that zero."); + ParamUtils.isPositive(end, "End position is 1-based and must be greater that zero."); + ParamUtils.isPositive(transcriptExon, "Transcript exon is a 1-based index."); + ParamUtils.isPositive(transcriptPos, "Transcript position is a 1-based index."); + ParamUtils.inRange(gcContent, 0.0, 1.0, "GC Content must be between 0.0 and 1.0."); + + final GencodeFuncotationBuilder funcotationBuilder = new GencodeFuncotationBuilder(); + + funcotationBuilder.setVersion(version); + funcotationBuilder.setDataSourceName(GencodeFuncotationFactory.DEFAULT_NAME); + + funcotationBuilder.setHugoSymbol( hugoSymbol ); + funcotationBuilder.setNcbiBuild( ncbiBuild ); + funcotationBuilder.setChromosome( chromosome ); + funcotationBuilder.setStart( start ); + funcotationBuilder.setEnd( end ); + funcotationBuilder.setVariantClassification( variantClassification ); + funcotationBuilder.setSecondaryVariantClassification(secondaryVariantClassification); + funcotationBuilder.setVariantType( variantType ); + funcotationBuilder.setRefAllele(Allele.create(refAllele)); + funcotationBuilder.setTumorSeqAllele2( tumorSeqAllele2 ); + + funcotationBuilder.setGenomeChange( genomeChange ); + funcotationBuilder.setAnnotationTranscript( annotationTranscript ); + funcotationBuilder.setStrand( transcriptStrand ); + funcotationBuilder.setTranscriptExonNumber( transcriptExon ); + funcotationBuilder.setTranscriptPos( transcriptPos ); + funcotationBuilder.setcDnaChange( cDnaChange ); + funcotationBuilder.setCodonChange( codonChange ); + funcotationBuilder.setProteinChange( proteinChange ); + funcotationBuilder.setGcContent( gcContent ); + funcotationBuilder.setReferenceContext( referenceContext ); + funcotationBuilder.setOtherTranscripts( otherTranscripts ); + + return funcotationBuilder.build(); + } + + /** + * Create a variant context with the following fields. Note that the genotype will be empty, as will + * INFO annotations. + * + * @param reference E.g. {@link FuncotatorReferenceTestUtils#retrieveHg19Chr3Ref} + * @param contig Never {@code null} + * @param start Must be positive. + * @param end Must be positive. + * @param refAlleleString Valid string for an allele. Do not include "*". Never {@code null} + * @param altAlleleString Valid string for an allele. Do not include "*". Never {@code null} + * @return a simple, biallelic variant context + */ + public static VariantContext createSimpleVariantContext(final String reference, final String contig, + final int start, + final int end, + final String refAlleleString, + final String altAlleleString) { + Utils.nonNull(contig); + Utils.nonNull(refAlleleString); + Utils.nonNull(altAlleleString); + ParamUtils.isPositive(start, "Invalid start position: " + start); + ParamUtils.isPositive(end, "Invalid end position: " + end); + + final Allele refAllele = Allele.create(refAlleleString, true); + final Allele altAllele = Allele.create(altAlleleString); + + final VariantContextBuilder variantContextBuilder = new VariantContextBuilder( + reference, + contig, + start, + end, + Arrays.asList(refAllele, altAllele) + ); + + return variantContextBuilder.make(); + } }