diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/AnnotateIntervals.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/AnnotateIntervals.java
index a1b00c2fe15..40f5c45b430 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/AnnotateIntervals.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/AnnotateIntervals.java
@@ -1,7 +1,10 @@
package org.broadinstitute.hellbender.tools.copynumber;
import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.util.CoordMath;
import htsjdk.samtools.util.Locatable;
+import htsjdk.tribble.bed.BEDFeature;
+import org.apache.commons.lang3.tuple.Pair;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.BetaFeature;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
@@ -9,23 +12,25 @@
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup;
import org.broadinstitute.hellbender.engine.*;
+import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.copynumber.arguments.CopyNumberArgumentValidationUtils;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.AnnotatedIntervalCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SimpleLocatableMetadata;
import org.broadinstitute.hellbender.tools.copynumber.formats.records.AnnotatedInterval;
-import org.broadinstitute.hellbender.tools.copynumber.formats.records.AnnotationSet;
-import org.broadinstitute.hellbender.utils.IntervalMergingRule;
-import org.broadinstitute.hellbender.utils.Nucleotide;
-import org.broadinstitute.hellbender.utils.SimpleInterval;
+import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.AnnotationKey;
+import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.AnnotationMap;
+import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.CopyNumberAnnotations;
+import org.broadinstitute.hellbender.utils.*;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
+import java.util.stream.Collectors;
/**
- * Annotates intervals with GC content. The output may optionally be used as input to
- * {@link CreateReadCountPanelOfNormals} or {@link DenoiseReadCounts}. In the former case,
- * using the resulting panel as input to {@link DenoiseReadCounts} will perform explicit GC-bias correction.
+ * Annotates intervals with GC content, and optionally, mappability and segmental-duplication content.
+ * The output may optionally be used as input to {@link CreateReadCountPanelOfNormals}, {@link DenoiseReadCounts},
+ * and {@link GermlineCNVCaller}.
*
*
Inputs
*
@@ -39,46 +44,99 @@
* The argument {@code interval-merging-rule} must be set to {@link IntervalMergingRule#OVERLAPPING_ONLY}
* and all other common arguments for interval padding or merging must be set to their defaults.
*
+ *
+ * (Optional) Umap single-read mappability track.
+ * This is a BED file in .bed or .bed.gz format that identifies uniquely mappable regions of the genome.
+ * The track should correspond to the appropriate read length and overlapping intervals must be merged.
+ * See https://bismap.hoffmanlab.org/. If scores are provided,
+ * intervals will be annotated with the length-weighted average; scores may not be NaN. Otherwise, scores
+ * for covered and uncovered intervals will be taken as unity and zero, respectively.
+ *
+ *
+ * (Optional) Segmental-duplication track.
+ * This is a BED file in .bed or .bed.gz format that identifies segmental-duplication regions of the genome.
+ * Overlapping intervals must be merged. If scores are provided, intervals will be annotated with the
+ * length-weighted average; scores may not be NaN. Otherwise, scores for covered and uncovered intervals
+ * will be taken as unity and zero, respectively.
+ *
*
*
* Output
*
*
* -
- * GC-content annotated-intervals file.
+ * Annotated-intervals file.
* This is a tab-separated values (TSV) file with a SAM-style header containing a sequence dictionary,
- * a row specifying the column headers contained in {@link AnnotatedIntervalCollection.AnnotatedIntervalTableColumn},
- * and the corresponding entry rows.
+ * a row specifying the column headers for the contained annotations (see {@link CopyNumberAnnotations}
+ * for possible annotations), and the corresponding entry rows.
*
*
*
- * Usage example
+ * Usage examples
+ *
+ *
+ * gatk AnnotateIntervals \
+ * -R reference.fa \
+ * -L intervals.interval_list \
+ * --interval-merging-rule OVERLAPPING_ONLY \
+ * -O annotated_intervals.tsv
+ *
*
*
* gatk AnnotateIntervals \
* -R reference.fa \
* -L intervals.interval_list \
+ * --mappability-track mappability.bed.gz \
+ * --segmental-duplication-track segmental_duplication.bed.gz \
* --interval-merging-rule OVERLAPPING_ONLY \
* -O annotated_intervals.tsv
*
*
- * @author David Benjamin <davidben@broadinstitute.org>
* @author Samuel Lee <slee@broadinstitute.org>
*/
@CommandLineProgramProperties(
- summary = "Annotates intervals with GC content",
- oneLineSummary = "Annotates intervals with GC content",
+ summary = "Annotates intervals with GC content, mappability, and segmental-duplication content",
+ oneLineSummary = "Annotates intervals with GC content, mappability, and segmental-duplication content",
programGroup = CopyNumberProgramGroup.class
)
@DocumentedFeature
@BetaFeature
public final class AnnotateIntervals extends GATKTool {
+ private static final int DEFAULT_FEATURE_QUERY_LOOKAHEAD_IN_BP = 1_000_000;
+
+ public static final String MAPPABILITY_TRACK_PATH_LONG_NAME = "mappability-track";
+ public static final String SEGMENTAL_DUPLICATION_TRACK_PATH_LONG_NAME = "segmental-duplication-track";
+ public static final String FEATURE_QUERY_LOOKAHEAD = "feature-query-lookahead";
+
@Argument(
doc = "Output file for annotated intervals.",
fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME,
shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME
)
- protected File outputAnnotatedIntervalsFile;
+ private File outputAnnotatedIntervalsFile;
+
+ @Argument(
+ doc = "Path to Umap single-read mappability track in .bed or .bed.gz format (see https://bismap.hoffmanlab.org/). " +
+ "Overlapping intervals must be merged.",
+ fullName = MAPPABILITY_TRACK_PATH_LONG_NAME,
+ optional = true
+ )
+ private FeatureInput mappabilityTrackPath;
+
+ @Argument(
+ doc = "Path to segmental-duplication track in .bed or .bed.gz format (see https://bismap.hoffmanlab.org/). " +
+ "Overlapping intervals must be merged.",
+ fullName = SEGMENTAL_DUPLICATION_TRACK_PATH_LONG_NAME,
+ optional = true
+ )
+ private FeatureInput segmentalDuplicationTrackPath;
+
+ @Argument(
+ doc = "Number of bases to cache when querying feature tracks.",
+ fullName = FEATURE_QUERY_LOOKAHEAD,
+ optional = true
+ )
+ private int featureQueryLookahead = DEFAULT_FEATURE_QUERY_LOOKAHEAD_IN_BP;
@Override
public boolean requiresReference() {
@@ -93,7 +151,8 @@ public boolean requiresIntervals() {
private List intervals;
private SAMSequenceDictionary sequenceDictionary;
private ReferenceDataSource reference;
- private final GCContentAnnotator gcContentAnnotator = new GCContentAnnotator();
+ private FeatureManager features;
+ private List> annotators = new ArrayList<>();
private AnnotatedIntervalCollection annotatedIntervals;
@Override
@@ -103,45 +162,94 @@ public void onTraversalStart() {
logger.info("Loading intervals for annotation...");
sequenceDictionary = getBestAvailableSequenceDictionary();
intervals = intervalArgumentCollection.getIntervals(sequenceDictionary);
+
+ logger.info("Loading resources for annotation...");
reference = ReferenceDataSource.of(referenceArguments.getReferencePath()); //the GATKTool ReferenceDataSource is package-protected, so we cannot access it directly
+ features = new FeatureManager( //the GATKTool FeatureManager is package-protected, so we cannot access it directly
+ this,
+ featureQueryLookahead,
+ cloudPrefetchBuffer,
+ cloudIndexPrefetchBuffer,
+ referenceArguments.getReferencePath());
+
+ // always perform GC-content annotation
+ logger.info("Adding GC-content annotator...");
+ annotators.add(new GCContentAnnotator());
+
+ // add optional annotators
+ if (mappabilityTrackPath != null) {
+ logger.info("Adding mappability annotator...");
+ annotators.add(new MappabilityAnnotator(mappabilityTrackPath));
+ }
+ if (segmentalDuplicationTrackPath != null) {
+ logger.info("Adding segmental-duplication-content annotator...");
+ annotators.add(new SegmentalDuplicationContentAnnotator(segmentalDuplicationTrackPath));
+ }
+
logger.info("Annotating intervals...");
}
@Override
public void traverse() {
final List annotatedIntervalList = new ArrayList<>(intervals.size());
- intervals.forEach(interval -> {
- annotatedIntervalList.add(new AnnotatedInterval(
- interval,
- new AnnotationSet(gcContentAnnotator.apply(
- interval, null, new ReferenceContext(reference, interval), null))));
+ for (final SimpleInterval interval : intervals) {
+ final ReferenceContext referenceContext = new ReferenceContext(reference, interval);
+ final FeatureContext featureContext = new FeatureContext(features, interval);
+ final AnnotationMap annotations = new AnnotationMap(annotators.stream()
+ .collect(Collectors.mapping(
+ a -> Pair.of(
+ a.getAnnotationKey(),
+ a.applyAndValidate(interval, referenceContext, featureContext)),
+ Collectors.toList())));
+ annotatedIntervalList.add(new AnnotatedInterval(interval, annotations));
progressMeter.update(interval);
- });
+ }
annotatedIntervals = new AnnotatedIntervalCollection(new SimpleLocatableMetadata(sequenceDictionary), annotatedIntervalList);
}
@Override
public Object onTraversalSuccess() {
+ reference.close();
+ features.close();
logger.info(String.format("Writing annotated intervals to %s...", outputAnnotatedIntervalsFile));
annotatedIntervals.write(outputAnnotatedIntervalsFile);
return super.onTraversalSuccess();
}
- //if additional annotators are added to this tool, they should follow this interface
- //(and validation that the required resources are available should be performed)
- private interface IntervalAnnotator {
- T apply(final Locatable interval,
- final ReadsContext readsContext,
- final ReferenceContext referenceContext,
- final FeatureContext featureContext);
+ /**
+ * If additional annotators are added to this tool, they should follow this interface.
+ * Validation that the required resources are available should be performed before
+ * calling {@link IntervalAnnotator#apply}.
+ */
+ abstract static class IntervalAnnotator {
+ public abstract AnnotationKey getAnnotationKey();
+
+ abstract T apply(final Locatable interval,
+ final ReferenceContext referenceContext,
+ final FeatureContext featureContext);
+
+ T applyAndValidate(final Locatable interval,
+ final ReferenceContext referenceContext,
+ final FeatureContext featureContext) {
+ try {
+ return getAnnotationKey().validate(apply(interval, referenceContext, featureContext));
+ } catch (final IllegalArgumentException e) {
+ throw new UserException.BadInput(String.format("%s " +
+ "Feature track may contain overlapping intervals; these should be merged.", e.getMessage()));
+ }
+ }
}
- private class GCContentAnnotator implements IntervalAnnotator {
+ public static class GCContentAnnotator extends IntervalAnnotator {
+ @Override
+ public AnnotationKey getAnnotationKey() {
+ return CopyNumberAnnotations.GC_CONTENT;
+ }
+
@Override
- public Double apply(final Locatable interval,
- final ReadsContext readsContext,
- final ReferenceContext referenceContext,
- final FeatureContext featureContext) {
+ Double apply(final Locatable interval,
+ final ReferenceContext referenceContext,
+ final FeatureContext featureContext) {
final Nucleotide.Counter counter = new Nucleotide.Counter();
counter.addAll(referenceContext.getBases());
final long gcCount = counter.get(Nucleotide.C) + counter.get(Nucleotide.G);
@@ -150,4 +258,59 @@ public Double apply(final Locatable interval,
return totalCount == 0 ? Double.NaN : gcCount / (double) totalCount;
}
}
+
+ /**
+ * If scores are provided, intervals will be annotated with the length-weighted average; scores may not be NaN.
+ * Otherwise, scores for covered and uncovered intervals will be taken as unity and zero, respectively.
+ */
+ abstract static class BEDLengthWeightedAnnotator extends IntervalAnnotator {
+ private final FeatureInput trackPath;
+
+ BEDLengthWeightedAnnotator(final FeatureInput trackPath) {
+ this.trackPath = trackPath;
+ }
+
+ @Override
+ Double apply(final Locatable interval,
+ final ReferenceContext referenceContext,
+ final FeatureContext featureContext) {
+ final int intervalLength = interval.getLengthOnReference();
+ if (intervalLength == 0) {
+ return Double.NaN;
+ }
+ double lengthWeightedSum = 0.;
+ final List features = featureContext.getValues(trackPath);
+ for (final BEDFeature feature : features) {
+ final double scoreOrNaN = (double) feature.getScore();
+ final double score = Double.isNaN(scoreOrNaN) ? 1. : scoreOrNaN; // missing score -> score = 1
+ lengthWeightedSum += score *
+ CoordMath.getOverlap(
+ feature.getStart(), feature.getEnd() - 1, // zero-based
+ interval.getStart(), interval.getEnd()); // one-based
+ }
+ return lengthWeightedSum / interval.getLengthOnReference();
+ }
+ }
+
+ public static class MappabilityAnnotator extends BEDLengthWeightedAnnotator {
+ MappabilityAnnotator(final FeatureInput mappabilityTrackPath) {
+ super(mappabilityTrackPath);
+ }
+
+ @Override
+ public AnnotationKey getAnnotationKey() {
+ return CopyNumberAnnotations.MAPPABILITY;
+ }
+ }
+
+ public static class SegmentalDuplicationContentAnnotator extends BEDLengthWeightedAnnotator {
+ SegmentalDuplicationContentAnnotator(final FeatureInput segmentalDuplicationTrackPath) {
+ super(segmentalDuplicationTrackPath);
+ }
+
+ @Override
+ public AnnotationKey getAnnotationKey() {
+ return CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT;
+ }
+ }
}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormals.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormals.java
index c22b2438d9c..954777d72ea 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormals.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormals.java
@@ -21,6 +21,7 @@
import org.broadinstitute.hellbender.tools.copynumber.denoising.HDF5SVDReadCountPanelOfNormals;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.AnnotatedIntervalCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.SimpleCountCollection;
+import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.CopyNumberAnnotations;
import org.broadinstitute.hellbender.tools.copynumber.utils.HDF5Utils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
@@ -280,7 +281,9 @@ protected void runPipeline(final JavaSparkContext ctx) {
inputAnnotatedIntervalsFile, firstReadCounts, logger);
final double[] intervalGCContent = annotatedIntervals == null
? null
- : annotatedIntervals.getRecords().stream().mapToDouble(i -> i.getAnnotationSet().getGCContent()).toArray();
+ : annotatedIntervals.getRecords().stream()
+ .mapToDouble(i -> i.getAnnotationMap().getValue(CopyNumberAnnotations.GC_CONTENT))
+ .toArray();
//validate input read-counts files (i.e., check intervals and that only integer counts are contained)
//and aggregate as a RealMatrix with dimensions numIntervals x numSamples
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/DenoiseReadCounts.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/DenoiseReadCounts.java
index 674b5eed31e..511363fa85b 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/DenoiseReadCounts.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/DenoiseReadCounts.java
@@ -17,6 +17,7 @@
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.AnnotatedIntervalCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CopyRatioCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.SimpleCountCollection;
+import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.CopyNumberAnnotations;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import java.io.File;
@@ -210,7 +211,9 @@ protected Object doWork() {
inputAnnotatedIntervalsFile, readCounts, logger);
final double[] intervalGCContent = annotatedIntervals == null
? null
- : annotatedIntervals.getRecords().stream().mapToDouble(i -> i.getAnnotationSet().getGCContent()).toArray();
+ : annotatedIntervals.getRecords().stream()
+ .mapToDouble(i -> i.getAnnotationMap().getValue(CopyNumberAnnotations.GC_CONTENT))
+ .toArray();
if (intervalGCContent == null) {
logger.warn("Neither a panel of normals nor GC-content annotations were provided, so only standardization will be performed...");
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/CopyNumberFormatsUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/CopyNumberFormatsUtils.java
index 1cb10c5387d..bc800acd631 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/CopyNumberFormatsUtils.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/CopyNumberFormatsUtils.java
@@ -1,6 +1,18 @@
package org.broadinstitute.hellbender.tools.copynumber.formats;
+import org.broadinstitute.hellbender.exceptions.UserException;
+import org.broadinstitute.hellbender.utils.io.IOUtils;
+import org.broadinstitute.hellbender.utils.text.XReadLines;
+import org.broadinstitute.hellbender.utils.tsv.TableColumnCollection;
+import org.broadinstitute.hellbender.utils.tsv.TableUtils;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+
public final class CopyNumberFormatsUtils {
+ public static final String COMMENT_PREFIX = "@"; //SAMTextHeaderCodec.HEADER_LINE_START; we need TableReader to treat SAM header as comment lines
public static final String DOUBLE_FORMAT = "%.6f";
private CopyNumberFormatsUtils() {}
@@ -8,4 +20,32 @@ private CopyNumberFormatsUtils() {}
public static String formatDouble(final double value) {
return String.format(DOUBLE_FORMAT, value);
}
+
+ /**
+ * Extracts column names from a TSV file
+ */
+ public static TableColumnCollection readColumnsFromHeader(final File inputFile) {
+ IOUtils.canReadFile(inputFile);
+ List columns = null;
+ try (final XReadLines reader = new XReadLines(inputFile)) {
+ while (reader.hasNext()) {
+ String nextLine = reader.next();
+ if (!nextLine.startsWith(COMMENT_PREFIX)) {
+ columns = Arrays.asList(nextLine.split(TableUtils.COLUMN_SEPARATOR_STRING));
+ break;
+ }
+ }
+ } catch (final IOException e) {
+ throw new UserException.CouldNotReadInputFile(inputFile);
+ }
+ if (columns == null) {
+ throw new UserException.BadInput(String.format(
+ "The input file %s does not have a header (starting with comment character %s).",
+ inputFile.getAbsolutePath(), COMMENT_PREFIX));
+ }
+ if (columns.stream().distinct().count() != columns.size()) {
+ throw new UserException.BadInput("Column headers must all be unique.");
+ }
+ return new TableColumnCollection(columns);
+ }
}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AbstractRecordCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AbstractRecordCollection.java
index 489c2b9dae6..cdd112163d3 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AbstractRecordCollection.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AbstractRecordCollection.java
@@ -163,7 +163,7 @@ static String formatDouble(final double value) {
}
final class RecordCollectionReader extends TableReader {
- private static final String COMMENT_PREFIX = "@"; //SAMTextHeaderCodec.HEADER_LINE_START; we need TableReader to treat SAM header as comment lines
+ private static final String COMMENT_PREFIX = CopyNumberFormatsUtils.COMMENT_PREFIX; //SAMTextHeaderCodec.HEADER_LINE_START; we need TableReader to treat SAM header as comment lines
private final File file;
RecordCollectionReader(final File file) throws IOException {
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AnnotatedIntervalCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AnnotatedIntervalCollection.java
index f03906c0eab..70116a2be28 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AnnotatedIntervalCollection.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AnnotatedIntervalCollection.java
@@ -1,56 +1,181 @@
package org.broadinstitute.hellbender.tools.copynumber.formats.collections;
+import org.apache.commons.collections4.ListUtils;
+import org.apache.commons.lang3.tuple.Pair;
+import org.broadinstitute.hellbender.exceptions.UserException;
+import org.broadinstitute.hellbender.tools.copynumber.formats.CopyNumberFormatsUtils;
import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.LocatableMetadata;
import org.broadinstitute.hellbender.tools.copynumber.formats.records.AnnotatedInterval;
-import org.broadinstitute.hellbender.tools.copynumber.formats.records.AnnotationSet;
+import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.AnnotationKey;
+import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.AnnotationMap;
+import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.CopyNumberAnnotations;
import org.broadinstitute.hellbender.utils.SimpleInterval;
+import org.broadinstitute.hellbender.utils.Utils;
+import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.broadinstitute.hellbender.utils.tsv.DataLine;
import org.broadinstitute.hellbender.utils.tsv.TableColumnCollection;
import java.io.File;
-import java.util.List;
+import java.util.*;
import java.util.function.BiConsumer;
import java.util.function.Function;
+import java.util.stream.Collectors;
/**
+ * Represents a collection of intervals annotated with {@link CopyNumberAnnotations}.
+ * Supports {@link AnnotationKey}s of integer, long, double, and string type.
+ * Can be constructed from a TSV file that contains the standard interval column headers,
+ * any subset of the {@link CopyNumberAnnotations}, and additional columns (which are ignored).
+ *
* @author Samuel Lee <slee@broadinstitute.org>
*/
public final class AnnotatedIntervalCollection extends AbstractLocatableCollection {
//note to developers: repeat the column headers in Javadoc so that they are viewable when linked
/**
- * CONTIG, START, END, GC_CONTENT
+ * CONTIG, START, END; columns headers for additional annotations can be specified
*/
enum AnnotatedIntervalTableColumn {
CONTIG,
START,
- END,
- GC_CONTENT;
+ END;
- static final TableColumnCollection COLUMNS = new TableColumnCollection((Object[]) values());
+ static final TableColumnCollection STANDARD_COLUMNS = new TableColumnCollection((Object[]) values());
+ }
+
+ enum AnnotationValueType {
+ Integer,
+ Long,
+ Double,
+ String
}
- private static final Function ANNOTATED_INTERVAL_RECORD_FROM_DATA_LINE_DECODER = dataLine -> {
- final String contig = dataLine.get(AnnotatedIntervalTableColumn.CONTIG);
- final int start = dataLine.getInt(AnnotatedIntervalTableColumn.START);
- final int end = dataLine.getInt(AnnotatedIntervalTableColumn.END);
- final double gcContent = dataLine.getDouble(AnnotatedIntervalTableColumn.GC_CONTENT);
- final SimpleInterval interval = new SimpleInterval(contig, start, end);
- final AnnotationSet annotationSet = new AnnotationSet(gcContent);
- return new AnnotatedInterval(interval, annotationSet);
+ private static final BiConsumer ANNOTATED_INTERVAL_RECORD_TO_DATA_LINE_ENCODER = (annotatedInterval, dataLine) -> {
+ dataLine.append(annotatedInterval.getInterval().getContig())
+ .append(annotatedInterval.getInterval().getStart())
+ .append(annotatedInterval.getInterval().getEnd());
+ final AnnotationMap annotations = annotatedInterval.getAnnotationMap();
+ for (final AnnotationKey> key : annotations.getKeys()) {
+ final AnnotationValueType type = AnnotationValueType.valueOf(key.getType().getSimpleName());
+ switch (type) {
+ case Integer:
+ dataLine.append((Integer) annotations.getValue(key));
+ break;
+ case Long:
+ dataLine.append((Long) annotations.getValue(key));
+ break;
+ case Double:
+ dataLine.append(formatDouble((Double) annotations.getValue(key)));
+ break;
+ case String:
+ dataLine.append((String) annotations.getValue(key));
+ break;
+ default:
+ throw new UserException.BadInput(String.format("Unsupported annotation type: %s", type));
+ }
+ }
};
- private static final BiConsumer ANNOTATED_INTERVAL_RECORD_TO_DATA_LINE_ENCODER = (annotatedInterval, dataLine) ->
- dataLine.append(annotatedInterval.getInterval().getContig())
- .append(annotatedInterval.getInterval().getStart())
- .append(annotatedInterval.getInterval().getEnd())
- .append(formatDouble(annotatedInterval.getAnnotationSet().getGCContent()));
-
public AnnotatedIntervalCollection(final File inputFile) {
- super(inputFile, AnnotatedIntervalCollection.AnnotatedIntervalTableColumn.COLUMNS, ANNOTATED_INTERVAL_RECORD_FROM_DATA_LINE_DECODER, ANNOTATED_INTERVAL_RECORD_TO_DATA_LINE_ENCODER);
+ this(inputFile, getAnnotationKeys(CopyNumberFormatsUtils.readColumnsFromHeader(inputFile)));
+ }
+
+ private AnnotatedIntervalCollection(final File inputFile,
+ final List> annotationKeys) {
+ super(
+ inputFile,
+ getColumns(annotationKeys),
+ getAnnotatedIntervalRecordFromDataLineDecoder(annotationKeys),
+ ANNOTATED_INTERVAL_RECORD_TO_DATA_LINE_ENCODER);
}
public AnnotatedIntervalCollection(final LocatableMetadata metadata,
final List annotatedIntervals) {
- super(metadata, annotatedIntervals, AnnotatedIntervalCollection.AnnotatedIntervalTableColumn.COLUMNS, ANNOTATED_INTERVAL_RECORD_FROM_DATA_LINE_DECODER, ANNOTATED_INTERVAL_RECORD_TO_DATA_LINE_ENCODER);
+ super(
+ metadata,
+ annotatedIntervals,
+ getColumns(getAnnotationKeys(annotatedIntervals)),
+ getAnnotatedIntervalRecordFromDataLineDecoder(getAnnotationKeys(annotatedIntervals)),
+ ANNOTATED_INTERVAL_RECORD_TO_DATA_LINE_ENCODER);
+ }
+
+ private static TableColumnCollection getColumns(final List> annotationKeys) {
+ return new TableColumnCollection(
+ ListUtils.union(
+ AnnotatedIntervalTableColumn.STANDARD_COLUMNS.names(),
+ annotationKeys.stream().map(AnnotationKey::getName).collect(Collectors.toList())));
+ }
+
+ private static List> getAnnotationKeys(final TableColumnCollection columns) {
+ Utils.nonNull(columns);
+ Utils.validateArg(columns.columnCount() != 0, "TableColumnCollection cannot be empty.");
+ Utils.validateArg(columns.containsAll(AnnotatedIntervalTableColumn.STANDARD_COLUMNS.names()),
+ String.format("TableColumnCollection must contain standard columns: %s.",
+ AnnotatedIntervalTableColumn.STANDARD_COLUMNS.names()));
+ return CopyNumberAnnotations.ANNOTATIONS.stream()
+ .filter(a -> columns.contains(a.getName()))
+ .collect(Collectors.toList());
+ }
+
+ private static List> getAnnotationKeys(final List annotatedIntervals) {
+ return annotatedIntervals.isEmpty() ? new ArrayList<>() : annotatedIntervals.get(0).getAnnotationMap().getKeys();
+ }
+
+ private static Function getAnnotatedIntervalRecordFromDataLineDecoder(
+ final List> annotationKeys) {
+ return dataLine -> {
+ final String contig = dataLine.get(AnnotatedIntervalTableColumn.CONTIG);
+ final int start = dataLine.getInt(AnnotatedIntervalTableColumn.START);
+ final int end = dataLine.getInt(AnnotatedIntervalTableColumn.END);
+ final SimpleInterval interval = new SimpleInterval(contig, start, end);
+ final List, Object>> annotations = new ArrayList<>(annotationKeys.size());
+ for (final AnnotationKey> key : annotationKeys) {
+ final AnnotationValueType type = AnnotationValueType.valueOf(key.getType().getSimpleName());
+ switch (type) {
+ case Integer:
+ annotations.add(Pair.of(key, dataLine.getInt(key.getName())));
+ break;
+ case Long:
+ annotations.add(Pair.of(key, dataLine.getLong(key.getName())));
+ break;
+ case Double:
+ annotations.add(Pair.of(key, dataLine.getDouble(key.getName())));
+ break;
+ case String:
+ annotations.add(Pair.of(key, dataLine.get(key.getName())));
+ break;
+ default:
+ throw new UserException.BadInput(String.format("Unsupported annotation type: %s", type));
+ }
+ }
+ final AnnotationMap annotationMap = new AnnotationMap(annotations);
+ return new AnnotatedInterval(interval, annotationMap);
+ };
+ }
+
+ /**
+ * Columns, encoder, and decoder are not used.
+ */
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+
+ final AbstractRecordCollection, ?> that = (AbstractRecordCollection, ?>) o;
+ return getMetadata().equals(that.getMetadata()) &&
+ getRecords().equals(that.getRecords());
+ }
+
+ /**
+ * Columns, encoder, and decoder are not used.
+ */
+ @Override
+ public int hashCode() {
+ int result = getMetadata().hashCode();
+ result = 31 * result + getRecords().hashCode();
+ return result;
}
-}
+}
\ No newline at end of file
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/CopyNumberPosteriorDistributionCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/CopyNumberPosteriorDistributionCollection.java
index 8b5a3cf8b03..584e2321820 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/CopyNumberPosteriorDistributionCollection.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/CopyNumberPosteriorDistributionCollection.java
@@ -1,6 +1,7 @@
package org.broadinstitute.hellbender.tools.copynumber.formats.collections;
import org.broadinstitute.hellbender.exceptions.UserException;
+import org.broadinstitute.hellbender.tools.copynumber.formats.CopyNumberFormatsUtils;
import org.broadinstitute.hellbender.tools.copynumber.formats.records.CopyNumberPosteriorDistribution;
import org.broadinstitute.hellbender.tools.copynumber.gcnv.GermlineCNVNamingConstants;
import org.broadinstitute.hellbender.tools.copynumber.gcnv.IntegerCopyNumberState;
@@ -84,13 +85,10 @@ private static class IntegerCopyNumberStateCollection {
private final List copyNumberStates;
private final TableColumnCollection columnCollection;
- private static final String COMMENT_PREFIX = "@";
-
IntegerCopyNumberStateCollection(final File inputFile) {
- final List copyNumberStatesColumns = extractCopyNumberColumnsFromHeader(inputFile);
- this.columnCollection = new TableColumnCollection(copyNumberStatesColumns);
+ this.columnCollection = CopyNumberFormatsUtils.readColumnsFromHeader(inputFile);
this.copyNumberStates = new ArrayList<>();
- copyNumberStatesColumns
+ columnCollection.names()
.forEach(copyNumberString -> copyNumberStates.add(parseIntegerCopyNumber(copyNumberString)));
}
@@ -137,29 +135,5 @@ private IntegerCopyNumberState parseIntegerCopyNumber(final String copyNumberSta
"Could not parse copy-number column string (%s) to an integer copy-number.", copyNumberStateString));
}
}
-
- /**
- * Extracts column names from a TSV file
- */
- private List extractCopyNumberColumnsFromHeader(final File inputFile) {
- List columns = null;
- try (final XReadLines reader = new XReadLines(inputFile)) {
- while (reader.hasNext()) {
- String nextLine = reader.next();
- if (!nextLine.startsWith(COMMENT_PREFIX)) {
- columns = Arrays.asList(nextLine.split(TableUtils.COLUMN_SEPARATOR_STRING));
- break;
- }
- }
- } catch (final IOException e) {
- throw new UserException.CouldNotReadInputFile(inputFile);
- }
- if (columns == null) {
- throw new UserException.BadInput(String.format(
- "The input file %s does not have a header (starting with comment character %s).",
- inputFile.getAbsolutePath(), COMMENT_PREFIX));
- }
- return columns;
- }
}
}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/AnnotatedInterval.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/AnnotatedInterval.java
index 1bc35f57ed1..5888eff352a 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/AnnotatedInterval.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/AnnotatedInterval.java
@@ -1,7 +1,7 @@
package org.broadinstitute.hellbender.tools.copynumber.formats.records;
import htsjdk.samtools.util.Locatable;
-import htsjdk.tribble.Feature;
+import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.AnnotationMap;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
@@ -10,14 +10,14 @@
*
* @author Samuel Lee <slee@broadinstitute.org>
*/
-public class AnnotatedInterval implements Locatable, Feature {
+public class AnnotatedInterval implements Locatable {
private final SimpleInterval interval;
- private final AnnotationSet annotationSet;
+ private final AnnotationMap annotationMap;
public AnnotatedInterval(final SimpleInterval interval,
- final AnnotationSet annotationSet) {
+ final AnnotationMap annotationMap) {
this.interval = Utils.nonNull(interval);
- this.annotationSet = Utils.nonNull(annotationSet);
+ this.annotationMap = Utils.nonNull(annotationMap);
}
@Override
@@ -39,8 +39,8 @@ public SimpleInterval getInterval() {
return interval;
}
- public AnnotationSet getAnnotationSet() {
- return annotationSet;
+ public AnnotationMap getAnnotationMap() {
+ return annotationMap;
}
@Override
@@ -53,13 +53,13 @@ public boolean equals(Object o) {
}
final AnnotatedInterval that = (AnnotatedInterval) o;
- return interval.equals(that.interval) && annotationSet.equals(that.annotationSet);
+ return interval.equals(that.interval) && annotationMap.equals(that.annotationMap);
}
@Override
public int hashCode() {
int result = interval.hashCode();
- result = 31 * result + annotationSet.hashCode();
+ result = 31 * result + annotationMap.hashCode();
return result;
}
@@ -67,7 +67,7 @@ public int hashCode() {
public String toString() {
return "AnnotatedInterval{" +
"interval=" + interval +
- ", annotationSet=" + annotationSet +
+ ", annotationMap=" + annotationMap +
'}';
}
}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/AnnotationSet.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/AnnotationSet.java
deleted file mode 100644
index dd3a55beddb..00000000000
--- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/AnnotationSet.java
+++ /dev/null
@@ -1,53 +0,0 @@
-package org.broadinstitute.hellbender.tools.copynumber.formats.records;
-
-import org.broadinstitute.hellbender.tools.copynumber.formats.collections.AnnotatedIntervalCollection;
-import org.broadinstitute.hellbender.utils.Utils;
-
-/**
- * Represents a set of annotations for an interval. Currently, only GC content is represented.
- *
- * @author Samuel Lee <slee@broadinstitute.org>
- */
-public final class AnnotationSet {
- /**
- * If additional annotation fields are added here, then {@link AnnotatedIntervalCollection}
- * should be updated accordingly.
- */
- private final double gcContent;
-
- public AnnotationSet(final double gcContent) {
- Utils.validateArg((0. <= gcContent && gcContent <= 1.) || Double.isNaN(gcContent),
- "GC content must be in [0, 1] or NaN.");
- this.gcContent = gcContent;
- }
-
- public double getGCContent() {
- return gcContent;
- }
-
- @Override
- public boolean equals(Object o) {
- if (this == o) {
- return true;
- }
- if (o == null || getClass() != o.getClass()) {
- return false;
- }
-
- final AnnotationSet that = (AnnotationSet) o;
- return Double.compare(that.gcContent, gcContent) == 0;
- }
-
- @Override
- public int hashCode() {
- long temp = Double.doubleToLongBits(gcContent);
- return (int) (temp ^ (temp >>> 32));
- }
-
- @Override
- public String toString() {
- return "AnnotationSet{" +
- "gcContent=" + gcContent +
- '}';
- }
-}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/annotation/AnnotationKey.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/annotation/AnnotationKey.java
new file mode 100644
index 00000000000..5dfd8f02685
--- /dev/null
+++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/annotation/AnnotationKey.java
@@ -0,0 +1,66 @@
+package org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation;
+
+import org.broadinstitute.hellbender.utils.Utils;
+
+import java.util.function.Function;
+
+/**
+ * Represents a key for a named, typed annotation.
+ *
+ * @author Samuel Lee <slee@broadinstitute.org>
+ */
+public final class AnnotationKey {
+ private final String name;
+ private final Class clazz;
+ private final Function validateValue;
+
+ public AnnotationKey(final String name,
+ final Class clazz,
+ final Function validateValue) {
+ this.name = Utils.nonEmpty(name);
+ this.clazz = Utils.nonNull(clazz);
+ this.validateValue = Utils.nonNull(validateValue);
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public Class getType() {
+ return clazz;
+ }
+
+ public T validate(final T value) {
+ Utils.validateArg(validateValue.apply(value),
+ String.format("Invalid value %s for annotation %s.", value, name));
+ return value;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+
+ final AnnotationKey> that = (AnnotationKey>) o;
+ return name.equals(that.name) && clazz.equals(that.clazz);
+ }
+
+ @Override
+ public int hashCode() {
+ int result = name.hashCode();
+ result = 31 * result + clazz.hashCode();
+ return result;
+ }
+
+ @Override
+ public String toString() {
+ return "AnnotationKey{" +
+ "name='" + name + '\'' +
+ ", class=" + clazz +
+ '}';
+ }
+}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/annotation/AnnotationMap.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/annotation/AnnotationMap.java
new file mode 100644
index 00000000000..a5fe52e05e4
--- /dev/null
+++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/annotation/AnnotationMap.java
@@ -0,0 +1,64 @@
+package org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation;
+
+import com.google.common.collect.ImmutableMap;
+import org.apache.commons.lang3.tuple.Pair;
+import org.broadinstitute.hellbender.utils.Utils;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Represents an immutable ordered collection of named, typed annotations for an interval.
+ *
+ * @author Samuel Lee <slee@broadinstitute.org>
+ */
+public final class AnnotationMap {
+
+ private final Map, Object> annotationMap;
+
+ public AnnotationMap(final List, Object>> annotations) {
+ Utils.nonEmpty(annotations);
+ final ImmutableMap.Builder, Object> builder = new ImmutableMap.Builder<>();
+ annotations.forEach(a -> builder.put(a.getKey(), a.getValue()));
+ annotationMap = builder.build();
+ }
+
+ public List> getKeys() {
+ return new ArrayList<>(annotationMap.keySet());
+ }
+
+ public T getValue(final AnnotationKey key) {
+ Utils.nonNull(key);
+ if (!annotationMap.containsKey(key)) {
+ throw new IllegalArgumentException(
+ String.format("Annotation %s not contained in AnnotationMap.", key.getName()));
+ }
+ return key.getType().cast(annotationMap.get(key));
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+
+ final AnnotationMap that = (AnnotationMap) o;
+ return annotationMap.equals(that.annotationMap);
+ }
+
+ @Override
+ public int hashCode() {
+ return annotationMap.hashCode();
+ }
+
+ @Override
+ public String toString() {
+ return "AnnotationMap{" +
+ "annotationMap=" + annotationMap +
+ '}';
+ }
+}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/annotation/CopyNumberAnnotations.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/annotation/CopyNumberAnnotations.java
new file mode 100644
index 00000000000..74f8b55134f
--- /dev/null
+++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/annotation/CopyNumberAnnotations.java
@@ -0,0 +1,26 @@
+package org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation;
+
+import java.util.Arrays;
+import java.util.List;
+
+public final class CopyNumberAnnotations {
+ public static AnnotationKey GC_CONTENT = new AnnotationKey<>(
+ "GC_CONTENT",
+ Double.class,
+ gcContent -> (0. <= gcContent && gcContent <= 1.) || Double.isNaN(gcContent));
+
+ public static AnnotationKey MAPPABILITY = new AnnotationKey<>(
+ "MAPPABILITY",
+ Double.class,
+ mappability -> (0. <= mappability && mappability <= 1.) || Double.isNaN(mappability));
+
+ public static AnnotationKey SEGMENTAL_DUPLICATION_CONTENT = new AnnotationKey<>(
+ "SEGMENTAL_DUPLICATION_CONTENT",
+ Double.class,
+ segmentalDuplicationContent -> (0. <= segmentalDuplicationContent && segmentalDuplicationContent <= 1.) || Double.isNaN(segmentalDuplicationContent));
+
+ /**
+ * This defines the canonical order of these annotations.
+ */
+ public static List> ANNOTATIONS = Arrays.asList(GC_CONTENT, MAPPABILITY, SEGMENTAL_DUPLICATION_CONTENT);
+}
diff --git a/src/main/java/org/broadinstitute/hellbender/utils/tsv/TableColumnCollection.java b/src/main/java/org/broadinstitute/hellbender/utils/tsv/TableColumnCollection.java
index b480418fa77..b43efd02d07 100644
--- a/src/main/java/org/broadinstitute/hellbender/utils/tsv/TableColumnCollection.java
+++ b/src/main/java/org/broadinstitute/hellbender/utils/tsv/TableColumnCollection.java
@@ -338,4 +338,24 @@ public static String[] checkNames(final String[] columnNames,
}
return columnNames;
}
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+
+ final TableColumnCollection that = (TableColumnCollection) o;
+ return names.equals(that.names) && indexByName.equals(that.indexByName);
+ }
+
+ @Override
+ public int hashCode() {
+ int result = names.hashCode();
+ result = 31 * result + indexByName.hashCode();
+ return result;
+ }
}
diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/AnnotateIntervalsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/AnnotateIntervalsIntegrationTest.java
index 83ca68c19ee..b7ef0f40b4a 100644
--- a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/AnnotateIntervalsIntegrationTest.java
+++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/AnnotateIntervalsIntegrationTest.java
@@ -1,15 +1,19 @@
package org.broadinstitute.hellbender.tools.copynumber;
import htsjdk.samtools.SAMSequenceDictionary;
+import org.apache.commons.lang3.tuple.Pair;
import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.argumentcollections.IntervalArgumentCollection;
import org.broadinstitute.hellbender.engine.ReferenceDataSource;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.AnnotatedIntervalCollection;
+import org.broadinstitute.hellbender.tools.copynumber.formats.collections.AnnotatedIntervalCollectionUnitTest;
import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.LocatableMetadata;
import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SimpleLocatableMetadata;
import org.broadinstitute.hellbender.tools.copynumber.formats.records.AnnotatedInterval;
-import org.broadinstitute.hellbender.tools.copynumber.formats.records.AnnotationSet;
+import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.AnnotationKey;
+import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.AnnotationMap;
+import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.CopyNumberAnnotations;
import org.broadinstitute.hellbender.utils.IntervalMergingRule;
import org.broadinstitute.hellbender.utils.IntervalSetRule;
import org.broadinstitute.hellbender.utils.SimpleInterval;
@@ -18,7 +22,11 @@
import org.testng.annotations.Test;
import java.io.File;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
/**
* Integration tests for {@link AnnotateIntervals}.
@@ -29,34 +37,122 @@ public final class AnnotateIntervalsIntegrationTest extends CommandLineProgramTe
private static final File TEST_SUB_DIR = new File(toolsTestDir, "copynumber");
private static final File INTERVALS_FILE = new File(TEST_SUB_DIR, "annotate-intervals-test.interval_list");
private static final File REFERENCE_FILE = new File(b37_reference_20_21);
+ private static final File MAPPABILITY_TRACK_FILE = new File(TEST_SUB_DIR,
+ "annotate-intervals-hg19-umap-k100-single-read-mappability-merged-20-21.bed.gz");
+ private static final File SEGMENTAL_DUPLICATION_TRACK_FILE = new File(TEST_SUB_DIR,
+ "annotate-intervals-hg19-segmental-duplication-20-21.bed.gz");
private static final SAMSequenceDictionary SEQUENCE_DICTIONARY = ReferenceDataSource.of(REFERENCE_FILE.toPath()).getSequenceDictionary();
private static final LocatableMetadata LOCATABLE_METADATA = new SimpleLocatableMetadata(SEQUENCE_DICTIONARY);
/**
- * Test that intervals are sorted according to {@link #SEQUENCE_DICTIONARY}
- * and adjacent intervals are not merged. GC content truth was taken from AnnotateTargets (a previous version of the tool).
+ * Test case checks that intervals are sorted according to {@link #SEQUENCE_DICTIONARY} and
+ * adjacent intervals are not merged. This test case is also used in {@link AnnotatedIntervalCollectionUnitTest}.
*/
+ private static final AnnotatedIntervalCollection EXPECTED_ALL_ANNOTATIONS = new AnnotatedIntervalCollection(
+ LOCATABLE_METADATA,
+ Arrays.asList(
+ new AnnotatedInterval(new SimpleInterval("20", 1000001, 1001000),
+ new AnnotationMap(Arrays.asList(
+ Pair.of(CopyNumberAnnotations.GC_CONTENT, 0.49),
+ Pair.of(CopyNumberAnnotations.MAPPABILITY, 1.),
+ Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))),
+ new AnnotatedInterval(new SimpleInterval("20", 1001001, 1002000),
+ new AnnotationMap(Arrays.asList(
+ Pair.of(CopyNumberAnnotations.GC_CONTENT, 0.483),
+ Pair.of(CopyNumberAnnotations.MAPPABILITY, 1.),
+ Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))),
+ new AnnotatedInterval(new SimpleInterval("20", 1002001, 1003000),
+ new AnnotationMap(Arrays.asList(
+ Pair.of(CopyNumberAnnotations.GC_CONTENT, 0.401),
+ Pair.of(CopyNumberAnnotations.MAPPABILITY, 1.),
+ Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))),
+ new AnnotatedInterval(new SimpleInterval("20", 1003001, 1004000),
+ new AnnotationMap(Arrays.asList(
+ Pair.of(CopyNumberAnnotations.GC_CONTENT, 0.448),
+ Pair.of(CopyNumberAnnotations.MAPPABILITY, 1.),
+ Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))),
+ new AnnotatedInterval(new SimpleInterval("21", 1, 100),
+ new AnnotationMap(Arrays.asList(
+ Pair.of(CopyNumberAnnotations.GC_CONTENT, Double.NaN),
+ Pair.of(CopyNumberAnnotations.MAPPABILITY, 0.0),
+ Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))),
+ new AnnotatedInterval(new SimpleInterval("21", 101, 200),
+ new AnnotationMap(Arrays.asList(
+ Pair.of(CopyNumberAnnotations.GC_CONTENT, Double.NaN),
+ Pair.of(CopyNumberAnnotations.MAPPABILITY, 0.0),
+ Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.))))));
+
+ @Test
+ public void testGCContentOnly() {
+ final File outputFile = createTempFile("annotate-intervals-test", ".tsv");
+ final ArgumentsBuilder argsBuilder = new ArgumentsBuilder()
+ .addReference(REFERENCE_FILE)
+ .addArgument(StandardArgumentDefinitions.INTERVALS_LONG_NAME, INTERVALS_FILE.getAbsolutePath())
+ .addArgument(IntervalArgumentCollection.INTERVAL_MERGING_RULE_LONG_NAME, IntervalMergingRule.OVERLAPPING_ONLY.toString())
+ .addOutput(outputFile);
+ runCommandLine(argsBuilder);
+ final AnnotatedIntervalCollection result = new AnnotatedIntervalCollection(outputFile);
+ final AnnotatedIntervalCollection expected = AnnotatedIntervalCollectionUnitTest.subsetAnnotations(
+ EXPECTED_ALL_ANNOTATIONS,
+ Collections.singletonList(CopyNumberAnnotations.GC_CONTENT));
+ Assert.assertEquals(result, expected);
+ Assert.assertNotSame(result, expected);
+ }
+
@Test
- public void test() {
+ public void testMappability() {
final File outputFile = createTempFile("annotate-intervals-test", ".tsv");
final ArgumentsBuilder argsBuilder = new ArgumentsBuilder()
.addReference(REFERENCE_FILE)
+ .addFileArgument(AnnotateIntervals.MAPPABILITY_TRACK_PATH_LONG_NAME, MAPPABILITY_TRACK_FILE)
.addArgument(StandardArgumentDefinitions.INTERVALS_LONG_NAME, INTERVALS_FILE.getAbsolutePath())
.addArgument(IntervalArgumentCollection.INTERVAL_MERGING_RULE_LONG_NAME, IntervalMergingRule.OVERLAPPING_ONLY.toString())
.addOutput(outputFile);
runCommandLine(argsBuilder);
final AnnotatedIntervalCollection result = new AnnotatedIntervalCollection(outputFile);
+ final AnnotatedIntervalCollection expected = AnnotatedIntervalCollectionUnitTest.subsetAnnotations(
+ EXPECTED_ALL_ANNOTATIONS,
+ Arrays.asList(
+ CopyNumberAnnotations.GC_CONTENT,
+ CopyNumberAnnotations.MAPPABILITY));
+ Assert.assertEquals(result, expected);
+ Assert.assertNotSame(result, expected);
+ }
- final AnnotatedIntervalCollection expected = new AnnotatedIntervalCollection(
- LOCATABLE_METADATA,
+ @Test
+ public void testSegmentalDuplicationContent() {
+ final File outputFile = createTempFile("annotate-intervals-test", ".tsv");
+ final ArgumentsBuilder argsBuilder = new ArgumentsBuilder()
+ .addReference(REFERENCE_FILE)
+ .addFileArgument(AnnotateIntervals.SEGMENTAL_DUPLICATION_TRACK_PATH_LONG_NAME, SEGMENTAL_DUPLICATION_TRACK_FILE)
+ .addArgument(StandardArgumentDefinitions.INTERVALS_LONG_NAME, INTERVALS_FILE.getAbsolutePath())
+ .addArgument(IntervalArgumentCollection.INTERVAL_MERGING_RULE_LONG_NAME, IntervalMergingRule.OVERLAPPING_ONLY.toString())
+ .addOutput(outputFile);
+ runCommandLine(argsBuilder);
+ final AnnotatedIntervalCollection result = new AnnotatedIntervalCollection(outputFile);
+ final AnnotatedIntervalCollection expected = AnnotatedIntervalCollectionUnitTest.subsetAnnotations(
+ EXPECTED_ALL_ANNOTATIONS,
Arrays.asList(
- new AnnotatedInterval(new SimpleInterval("20", 1000001, 1001000), new AnnotationSet(0.49)),
- new AnnotatedInterval(new SimpleInterval("20", 1001001, 1002000), new AnnotationSet(0.483)),
- new AnnotatedInterval(new SimpleInterval("20", 1002001, 1003000), new AnnotationSet(0.401)),
- new AnnotatedInterval(new SimpleInterval("20", 1003001, 1004000), new AnnotationSet(0.448)),
- new AnnotatedInterval(new SimpleInterval("21", 1, 100), new AnnotationSet(Double.NaN)),
- new AnnotatedInterval(new SimpleInterval("21", 101, 200), new AnnotationSet(Double.NaN))));
+ CopyNumberAnnotations.GC_CONTENT,
+ CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT));
+ Assert.assertEquals(result, expected);
+ Assert.assertNotSame(result, expected);
+ }
+
+ @Test
+ public void testAllAnnotations() {
+ final File outputFile = createTempFile("annotate-intervals-test", ".tsv");
+ final ArgumentsBuilder argsBuilder = new ArgumentsBuilder()
+ .addReference(REFERENCE_FILE)
+ .addFileArgument(AnnotateIntervals.MAPPABILITY_TRACK_PATH_LONG_NAME, MAPPABILITY_TRACK_FILE)
+ .addFileArgument(AnnotateIntervals.SEGMENTAL_DUPLICATION_TRACK_PATH_LONG_NAME, SEGMENTAL_DUPLICATION_TRACK_FILE)
+ .addArgument(StandardArgumentDefinitions.INTERVALS_LONG_NAME, INTERVALS_FILE.getAbsolutePath())
+ .addArgument(IntervalArgumentCollection.INTERVAL_MERGING_RULE_LONG_NAME, IntervalMergingRule.OVERLAPPING_ONLY.toString())
+ .addOutput(outputFile);
+ runCommandLine(argsBuilder);
+ final AnnotatedIntervalCollection result = new AnnotatedIntervalCollection(outputFile);
+ final AnnotatedIntervalCollection expected = EXPECTED_ALL_ANNOTATIONS;
Assert.assertEquals(result, expected);
Assert.assertNotSame(result, expected);
}
diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormalsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormalsIntegrationTest.java
index c21853d65b0..3c7b1038978 100644
--- a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormalsIntegrationTest.java
+++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormalsIntegrationTest.java
@@ -3,6 +3,7 @@
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import org.apache.commons.collections4.ListUtils;
+import org.apache.commons.lang3.tuple.Pair;
import org.apache.commons.math3.linear.*;
import org.apache.commons.math3.random.RandomDataGenerator;
import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation;
@@ -18,8 +19,9 @@
import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SimpleLocatableMetadata;
import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SimpleSampleLocatableMetadata;
import org.broadinstitute.hellbender.tools.copynumber.formats.records.AnnotatedInterval;
-import org.broadinstitute.hellbender.tools.copynumber.formats.records.AnnotationSet;
+import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.AnnotationMap;
import org.broadinstitute.hellbender.tools.copynumber.formats.records.SimpleCount;
+import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.CopyNumberAnnotations;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
import org.testng.Assert;
@@ -102,7 +104,8 @@ public Object[][] dataPanelOfNormals() {
final AnnotatedIntervalCollection annotatedIntervals = new AnnotatedIntervalCollection(
new SimpleLocatableMetadata(SEQUENCE_DICTIONARY),
IntStream.range(0, NUM_INTERVALS)
- .mapToObj(i -> new AnnotatedInterval(intervals.get(i), new AnnotationSet(intervalGCContent[i])))
+ .mapToObj(i -> new AnnotatedInterval(intervals.get(i),
+ new AnnotationMap(Collections.singletonList(Pair.of(CopyNumberAnnotations.GC_CONTENT, intervalGCContent[i])))))
.collect(Collectors.toList()));
final File annotatedIntervalsFile = createTempFile("annotated-intervals", ".tsv");
annotatedIntervals.write(annotatedIntervalsFile);
diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AllelicCountCollectionUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AllelicCountCollectionUnitTest.java
index fad7301632d..7db5675b833 100644
--- a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AllelicCountCollectionUnitTest.java
+++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AllelicCountCollectionUnitTest.java
@@ -79,8 +79,7 @@ public void testReadMissingNucleotides() {
@Test
public void testWrite() throws IOException {
final File outputFile = createTempFile("allelic-count-collection-test-output", ".tsv");
- final AllelicCountCollection allelicCounts = new AllelicCountCollection(ALLELIC_COUNTS_FILE);
- allelicCounts.write(outputFile);
+ ALLELIC_COUNTS_EXPECTED.write(outputFile);
Assert.assertTrue(FileUtils.contentEquals(outputFile, ALLELIC_COUNTS_FILE));
}
}
\ No newline at end of file
diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AnnotatedIntervalCollectionUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AnnotatedIntervalCollectionUnitTest.java
new file mode 100644
index 00000000000..8d238338c22
--- /dev/null
+++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AnnotatedIntervalCollectionUnitTest.java
@@ -0,0 +1,137 @@
+package org.broadinstitute.hellbender.tools.copynumber.formats.collections;
+
+import htsjdk.samtools.SAMSequenceDictionary;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.tuple.Pair;
+import org.broadinstitute.hellbender.GATKBaseTest;
+import org.broadinstitute.hellbender.engine.ReferenceDataSource;
+import org.broadinstitute.hellbender.exceptions.UserException;
+import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.LocatableMetadata;
+import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SimpleLocatableMetadata;
+import org.broadinstitute.hellbender.tools.copynumber.formats.records.AnnotatedInterval;
+import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.AnnotationKey;
+import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.AnnotationMap;
+import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.CopyNumberAnnotations;
+import org.broadinstitute.hellbender.utils.SimpleInterval;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+public final class AnnotatedIntervalCollectionUnitTest extends GATKBaseTest {
+ private static final File TEST_SUB_DIR = new File(toolsTestDir + "copynumber/formats/collections");
+ private static final File ANNOTATED_INTERVALS_ALL_ANNOTATIONS_FILE = new File(TEST_SUB_DIR,
+ "annotated-intervals-all-annotations.tsv");
+ private static final File ANNOTATED_INTERVALS_EXTRA_ANNOTATION_FILE = new File(TEST_SUB_DIR,
+ "annotated-intervals-extra-annotation.tsv");
+ private static final File ANNOTATED_INTERVALS_REPEATED_ANNOTATION_FILE = new File(TEST_SUB_DIR,
+ "annotated-intervals-repeated-annotation.tsv");
+ private static final File ANNOTATED_INTERVALS_GC_CONTENT_ONLY_FILE = new File(TEST_SUB_DIR,
+ "annotated-intervals-gc-content-only.tsv");
+ private static final File REFERENCE_FILE = new File(b37_reference_20_21);
+
+ private static final SAMSequenceDictionary SEQUENCE_DICTIONARY = ReferenceDataSource.of(REFERENCE_FILE.toPath()).getSequenceDictionary();
+ private static final LocatableMetadata LOCATABLE_METADATA = new SimpleLocatableMetadata(SEQUENCE_DICTIONARY);
+
+ private static final AnnotatedIntervalCollection EXPECTED_ALL_ANNOTATIONS = new AnnotatedIntervalCollection(
+ LOCATABLE_METADATA,
+ Arrays.asList(
+ new AnnotatedInterval(new SimpleInterval("20", 1000001, 1001000),
+ new AnnotationMap(Arrays.asList(
+ Pair.of(CopyNumberAnnotations.GC_CONTENT, 0.49),
+ Pair.of(CopyNumberAnnotations.MAPPABILITY, 1.),
+ Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))),
+ new AnnotatedInterval(new SimpleInterval("20", 1001001, 1002000),
+ new AnnotationMap(Arrays.asList(
+ Pair.of(CopyNumberAnnotations.GC_CONTENT, 0.483),
+ Pair.of(CopyNumberAnnotations.MAPPABILITY, 1.),
+ Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))),
+ new AnnotatedInterval(new SimpleInterval("20", 1002001, 1003000),
+ new AnnotationMap(Arrays.asList(
+ Pair.of(CopyNumberAnnotations.GC_CONTENT, 0.401),
+ Pair.of(CopyNumberAnnotations.MAPPABILITY, 1.),
+ Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))),
+ new AnnotatedInterval(new SimpleInterval("20", 1003001, 1004000),
+ new AnnotationMap(Arrays.asList(
+ Pair.of(CopyNumberAnnotations.GC_CONTENT, 0.448),
+ Pair.of(CopyNumberAnnotations.MAPPABILITY, 1.),
+ Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))),
+ new AnnotatedInterval(new SimpleInterval("21", 1, 100),
+ new AnnotationMap(Arrays.asList(
+ Pair.of(CopyNumberAnnotations.GC_CONTENT, Double.NaN),
+ Pair.of(CopyNumberAnnotations.MAPPABILITY, 0.0),
+ Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))),
+ new AnnotatedInterval(new SimpleInterval("21", 101, 200),
+ new AnnotationMap(Arrays.asList(
+ Pair.of(CopyNumberAnnotations.GC_CONTENT, Double.NaN),
+ Pair.of(CopyNumberAnnotations.MAPPABILITY, 0.0),
+ Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.))))));
+
+ @Test
+ public void testRead() {
+ final AnnotatedIntervalCollection result = new AnnotatedIntervalCollection(ANNOTATED_INTERVALS_ALL_ANNOTATIONS_FILE);
+ Assert.assertEquals(result, EXPECTED_ALL_ANNOTATIONS);
+ Assert.assertNotSame(result, EXPECTED_ALL_ANNOTATIONS);
+ }
+
+ /**
+ * Extra annotations not listed in {@link CopyNumberAnnotations} should be ignored.
+ */
+ @Test
+ public void testReadExtraAnnotation() {
+ final AnnotatedIntervalCollection result = new AnnotatedIntervalCollection(ANNOTATED_INTERVALS_EXTRA_ANNOTATION_FILE);
+ Assert.assertEquals(result, EXPECTED_ALL_ANNOTATIONS);
+ Assert.assertNotSame(result, EXPECTED_ALL_ANNOTATIONS);
+ }
+
+ @Test(expectedExceptions = UserException.BadInput.class)
+ public void testReadRepeatedAnnotation() {
+ final AnnotatedIntervalCollection result = new AnnotatedIntervalCollection(ANNOTATED_INTERVALS_REPEATED_ANNOTATION_FILE);
+ Assert.assertEquals(result, EXPECTED_ALL_ANNOTATIONS);
+ Assert.assertNotSame(result, EXPECTED_ALL_ANNOTATIONS);
+ }
+
+ @Test
+ public void testWrite() throws IOException {
+ final File outputFile = createTempFile("annotated-interval-collection-test-output", ".tsv");
+ EXPECTED_ALL_ANNOTATIONS.write(outputFile);
+ Assert.assertTrue(FileUtils.contentEquals(outputFile, ANNOTATED_INTERVALS_ALL_ANNOTATIONS_FILE));
+ }
+
+ @Test
+ public void testReadGCContentOnly() {
+ final AnnotatedIntervalCollection result = new AnnotatedIntervalCollection(ANNOTATED_INTERVALS_GC_CONTENT_ONLY_FILE);
+ final AnnotatedIntervalCollection expected = AnnotatedIntervalCollectionUnitTest.subsetAnnotations(
+ EXPECTED_ALL_ANNOTATIONS,
+ Collections.singletonList(CopyNumberAnnotations.GC_CONTENT));
+ Assert.assertEquals(result, expected);
+ Assert.assertNotSame(result, expected);
+ }
+
+ private static AnnotatedInterval subsetAnnotations(final AnnotatedInterval annotatedInterval,
+ final List> annotationKeys) {
+ final List, Object>> subsetAnnotationEntries = new ArrayList<>();
+ for (final AnnotationKey> annotationKey : annotationKeys) {
+ subsetAnnotationEntries.add(Pair.of(
+ annotationKey,
+ annotatedInterval.getAnnotationMap().getValue(annotationKey)));
+ }
+ final AnnotationMap subsetAnnotationMap = new AnnotationMap(subsetAnnotationEntries);
+ return new AnnotatedInterval(annotatedInterval.getInterval(), subsetAnnotationMap);
+ }
+
+ public static AnnotatedIntervalCollection subsetAnnotations(final AnnotatedIntervalCollection annotatedIntervals,
+ final List> annotationKeys) {
+ return new AnnotatedIntervalCollection(
+ annotatedIntervals.getMetadata(),
+ annotatedIntervals.getRecords().stream()
+ .map(i -> subsetAnnotations(i, annotationKeys))
+ .collect(Collectors.toList()));
+ }
+}
\ No newline at end of file
diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-segmental-duplication-20-21.bed.gz b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-segmental-duplication-20-21.bed.gz
new file mode 100644
index 00000000000..4ee6aac759d
Binary files /dev/null and b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-segmental-duplication-20-21.bed.gz differ
diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-segmental-duplication-20-21.bed.gz.tbi b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-segmental-duplication-20-21.bed.gz.tbi
new file mode 100644
index 00000000000..f0555079e69
Binary files /dev/null and b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-segmental-duplication-20-21.bed.gz.tbi differ
diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-umap-k100-single-read-mappability-merged-20-21.bed.gz b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-umap-k100-single-read-mappability-merged-20-21.bed.gz
new file mode 100644
index 00000000000..24988af82c2
Binary files /dev/null and b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-umap-k100-single-read-mappability-merged-20-21.bed.gz differ
diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-umap-k100-single-read-mappability-merged-20-21.bed.gz.tbi b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-umap-k100-single-read-mappability-merged-20-21.bed.gz.tbi
new file mode 100644
index 00000000000..5ac8e7128c3
Binary files /dev/null and b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-umap-k100-single-read-mappability-merged-20-21.bed.gz.tbi differ
diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-all-annotations.tsv b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-all-annotations.tsv
new file mode 100644
index 00000000000..f8d561dc983
--- /dev/null
+++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-all-annotations.tsv
@@ -0,0 +1,10 @@
+@HD VN:1.5
+@SQ SN:20 LN:63025520 M5:0dec9660ec1efaaf33281c0d5ea2560f UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta
+@SQ SN:21 LN:48129895 M5:2979a6085bfe28e3ad6f552f361ed74d UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta
+CONTIG START END GC_CONTENT MAPPABILITY SEGMENTAL_DUPLICATION_CONTENT
+20 1000001 1001000 0.490000 1.000000 0.000000
+20 1001001 1002000 0.483000 1.000000 0.000000
+20 1002001 1003000 0.401000 1.000000 0.000000
+20 1003001 1004000 0.448000 1.000000 0.000000
+21 1 100 NaN 0.000000 0.000000
+21 101 200 NaN 0.000000 0.000000
diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-extra-annotation.tsv b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-extra-annotation.tsv
new file mode 100644
index 00000000000..a3f8fcd6dfa
--- /dev/null
+++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-extra-annotation.tsv
@@ -0,0 +1,10 @@
+@HD VN:1.5
+@SQ SN:20 LN:63025520 M5:0dec9660ec1efaaf33281c0d5ea2560f UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta
+@SQ SN:21 LN:48129895 M5:2979a6085bfe28e3ad6f552f361ed74d UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta
+CONTIG START END GC_CONTENT MAPPABILITY SEGMENTAL_DUPLICATION_CONTENT EXTRA_ANNOTATION
+20 1000001 1001000 0.490000 1.000000 0.000000 0.000000
+20 1001001 1002000 0.483000 1.000000 0.000000 0.000000
+20 1002001 1003000 0.401000 1.000000 0.000000 0.000000
+20 1003001 1004000 0.448000 1.000000 0.000000 0.000000
+21 1 100 NaN 0.000000 0.000000 0.000000
+21 101 200 NaN 0.000000 0.000000 0.000000
diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-gc-content-only.tsv b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-gc-content-only.tsv
new file mode 100644
index 00000000000..7eae2e7f464
--- /dev/null
+++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-gc-content-only.tsv
@@ -0,0 +1,10 @@
+@HD VN:1.5
+@SQ SN:20 LN:63025520 M5:0dec9660ec1efaaf33281c0d5ea2560f UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta
+@SQ SN:21 LN:48129895 M5:2979a6085bfe28e3ad6f552f361ed74d UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta
+CONTIG START END GC_CONTENT
+20 1000001 1001000 0.490000
+20 1001001 1002000 0.483000
+20 1002001 1003000 0.401000
+20 1003001 1004000 0.448000
+21 1 100 NaN
+21 101 200 NaN
diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-repeated-annotation.tsv b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-repeated-annotation.tsv
new file mode 100644
index 00000000000..cb0f46c3790
--- /dev/null
+++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-repeated-annotation.tsv
@@ -0,0 +1,10 @@
+@HD VN:1.5
+@SQ SN:20 LN:63025520 M5:0dec9660ec1efaaf33281c0d5ea2560f UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta
+@SQ SN:21 LN:48129895 M5:2979a6085bfe28e3ad6f552f361ed74d UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta
+CONTIG START END GC_CONTENT MAPPABILITY SEGMENTAL_DUPLICATION_CONTENT SEGMENTAL_DUPLICATION_CONTENT
+20 1000001 1001000 0.490000 1.000000 0.000000 0.000000
+20 1001001 1002000 0.483000 1.000000 0.000000 0.000000
+20 1002001 1003000 0.401000 1.000000 0.000000 0.000000
+20 1003001 1004000 0.448000 1.000000 0.000000 0.000000
+21 1 100 NaN 0.000000 0.000000 0.000000
+21 101 200 NaN 0.000000 0.000000 0.000000