diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/AnnotateIntervals.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/AnnotateIntervals.java index a1b00c2fe15..40f5c45b430 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/AnnotateIntervals.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/AnnotateIntervals.java @@ -1,7 +1,10 @@ package org.broadinstitute.hellbender.tools.copynumber; import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.util.CoordMath; import htsjdk.samtools.util.Locatable; +import htsjdk.tribble.bed.BEDFeature; +import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.BetaFeature; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; @@ -9,23 +12,25 @@ import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup; import org.broadinstitute.hellbender.engine.*; +import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.tools.copynumber.arguments.CopyNumberArgumentValidationUtils; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.AnnotatedIntervalCollection; import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SimpleLocatableMetadata; import org.broadinstitute.hellbender.tools.copynumber.formats.records.AnnotatedInterval; -import org.broadinstitute.hellbender.tools.copynumber.formats.records.AnnotationSet; -import org.broadinstitute.hellbender.utils.IntervalMergingRule; -import org.broadinstitute.hellbender.utils.Nucleotide; -import org.broadinstitute.hellbender.utils.SimpleInterval; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.AnnotationKey; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.AnnotationMap; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.CopyNumberAnnotations; +import org.broadinstitute.hellbender.utils.*; import java.io.File; import java.util.ArrayList; import java.util.List; +import java.util.stream.Collectors; /** - * Annotates intervals with GC content. The output may optionally be used as input to - * {@link CreateReadCountPanelOfNormals} or {@link DenoiseReadCounts}. In the former case, - * using the resulting panel as input to {@link DenoiseReadCounts} will perform explicit GC-bias correction. + * Annotates intervals with GC content, and optionally, mappability and segmental-duplication content. + * The output may optionally be used as input to {@link CreateReadCountPanelOfNormals}, {@link DenoiseReadCounts}, + * and {@link GermlineCNVCaller}. * *

Inputs

* @@ -39,46 +44,99 @@ * The argument {@code interval-merging-rule} must be set to {@link IntervalMergingRule#OVERLAPPING_ONLY} * and all other common arguments for interval padding or merging must be set to their defaults. * + *
  • + * (Optional) Umap single-read mappability track. + * This is a BED file in .bed or .bed.gz format that identifies uniquely mappable regions of the genome. + * The track should correspond to the appropriate read length and overlapping intervals must be merged. + * See https://bismap.hoffmanlab.org/. If scores are provided, + * intervals will be annotated with the length-weighted average; scores may not be NaN. Otherwise, scores + * for covered and uncovered intervals will be taken as unity and zero, respectively. + *
  • + *
  • + * (Optional) Segmental-duplication track. + * This is a BED file in .bed or .bed.gz format that identifies segmental-duplication regions of the genome. + * Overlapping intervals must be merged. If scores are provided, intervals will be annotated with the + * length-weighted average; scores may not be NaN. Otherwise, scores for covered and uncovered intervals + * will be taken as unity and zero, respectively. + *
  • * * *

    Output

    * * * - *

    Usage example

    + *

    Usage examples

    + * + *
    + *     gatk AnnotateIntervals \
    + *          -R reference.fa \
    + *          -L intervals.interval_list \
    + *          --interval-merging-rule OVERLAPPING_ONLY \
    + *          -O annotated_intervals.tsv
    + * 
    * *
      *     gatk AnnotateIntervals \
      *          -R reference.fa \
      *          -L intervals.interval_list \
    + *          --mappability-track mappability.bed.gz \
    + *          --segmental-duplication-track segmental_duplication.bed.gz \
      *          --interval-merging-rule OVERLAPPING_ONLY \
      *          -O annotated_intervals.tsv
      * 
    * - * @author David Benjamin <davidben@broadinstitute.org> * @author Samuel Lee <slee@broadinstitute.org> */ @CommandLineProgramProperties( - summary = "Annotates intervals with GC content", - oneLineSummary = "Annotates intervals with GC content", + summary = "Annotates intervals with GC content, mappability, and segmental-duplication content", + oneLineSummary = "Annotates intervals with GC content, mappability, and segmental-duplication content", programGroup = CopyNumberProgramGroup.class ) @DocumentedFeature @BetaFeature public final class AnnotateIntervals extends GATKTool { + private static final int DEFAULT_FEATURE_QUERY_LOOKAHEAD_IN_BP = 1_000_000; + + public static final String MAPPABILITY_TRACK_PATH_LONG_NAME = "mappability-track"; + public static final String SEGMENTAL_DUPLICATION_TRACK_PATH_LONG_NAME = "segmental-duplication-track"; + public static final String FEATURE_QUERY_LOOKAHEAD = "feature-query-lookahead"; + @Argument( doc = "Output file for annotated intervals.", fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME ) - protected File outputAnnotatedIntervalsFile; + private File outputAnnotatedIntervalsFile; + + @Argument( + doc = "Path to Umap single-read mappability track in .bed or .bed.gz format (see https://bismap.hoffmanlab.org/). " + + "Overlapping intervals must be merged.", + fullName = MAPPABILITY_TRACK_PATH_LONG_NAME, + optional = true + ) + private FeatureInput mappabilityTrackPath; + + @Argument( + doc = "Path to segmental-duplication track in .bed or .bed.gz format (see https://bismap.hoffmanlab.org/). " + + "Overlapping intervals must be merged.", + fullName = SEGMENTAL_DUPLICATION_TRACK_PATH_LONG_NAME, + optional = true + ) + private FeatureInput segmentalDuplicationTrackPath; + + @Argument( + doc = "Number of bases to cache when querying feature tracks.", + fullName = FEATURE_QUERY_LOOKAHEAD, + optional = true + ) + private int featureQueryLookahead = DEFAULT_FEATURE_QUERY_LOOKAHEAD_IN_BP; @Override public boolean requiresReference() { @@ -93,7 +151,8 @@ public boolean requiresIntervals() { private List intervals; private SAMSequenceDictionary sequenceDictionary; private ReferenceDataSource reference; - private final GCContentAnnotator gcContentAnnotator = new GCContentAnnotator(); + private FeatureManager features; + private List> annotators = new ArrayList<>(); private AnnotatedIntervalCollection annotatedIntervals; @Override @@ -103,45 +162,94 @@ public void onTraversalStart() { logger.info("Loading intervals for annotation..."); sequenceDictionary = getBestAvailableSequenceDictionary(); intervals = intervalArgumentCollection.getIntervals(sequenceDictionary); + + logger.info("Loading resources for annotation..."); reference = ReferenceDataSource.of(referenceArguments.getReferencePath()); //the GATKTool ReferenceDataSource is package-protected, so we cannot access it directly + features = new FeatureManager( //the GATKTool FeatureManager is package-protected, so we cannot access it directly + this, + featureQueryLookahead, + cloudPrefetchBuffer, + cloudIndexPrefetchBuffer, + referenceArguments.getReferencePath()); + + // always perform GC-content annotation + logger.info("Adding GC-content annotator..."); + annotators.add(new GCContentAnnotator()); + + // add optional annotators + if (mappabilityTrackPath != null) { + logger.info("Adding mappability annotator..."); + annotators.add(new MappabilityAnnotator(mappabilityTrackPath)); + } + if (segmentalDuplicationTrackPath != null) { + logger.info("Adding segmental-duplication-content annotator..."); + annotators.add(new SegmentalDuplicationContentAnnotator(segmentalDuplicationTrackPath)); + } + logger.info("Annotating intervals..."); } @Override public void traverse() { final List annotatedIntervalList = new ArrayList<>(intervals.size()); - intervals.forEach(interval -> { - annotatedIntervalList.add(new AnnotatedInterval( - interval, - new AnnotationSet(gcContentAnnotator.apply( - interval, null, new ReferenceContext(reference, interval), null)))); + for (final SimpleInterval interval : intervals) { + final ReferenceContext referenceContext = new ReferenceContext(reference, interval); + final FeatureContext featureContext = new FeatureContext(features, interval); + final AnnotationMap annotations = new AnnotationMap(annotators.stream() + .collect(Collectors.mapping( + a -> Pair.of( + a.getAnnotationKey(), + a.applyAndValidate(interval, referenceContext, featureContext)), + Collectors.toList()))); + annotatedIntervalList.add(new AnnotatedInterval(interval, annotations)); progressMeter.update(interval); - }); + } annotatedIntervals = new AnnotatedIntervalCollection(new SimpleLocatableMetadata(sequenceDictionary), annotatedIntervalList); } @Override public Object onTraversalSuccess() { + reference.close(); + features.close(); logger.info(String.format("Writing annotated intervals to %s...", outputAnnotatedIntervalsFile)); annotatedIntervals.write(outputAnnotatedIntervalsFile); return super.onTraversalSuccess(); } - //if additional annotators are added to this tool, they should follow this interface - //(and validation that the required resources are available should be performed) - private interface IntervalAnnotator { - T apply(final Locatable interval, - final ReadsContext readsContext, - final ReferenceContext referenceContext, - final FeatureContext featureContext); + /** + * If additional annotators are added to this tool, they should follow this interface. + * Validation that the required resources are available should be performed before + * calling {@link IntervalAnnotator#apply}. + */ + abstract static class IntervalAnnotator { + public abstract AnnotationKey getAnnotationKey(); + + abstract T apply(final Locatable interval, + final ReferenceContext referenceContext, + final FeatureContext featureContext); + + T applyAndValidate(final Locatable interval, + final ReferenceContext referenceContext, + final FeatureContext featureContext) { + try { + return getAnnotationKey().validate(apply(interval, referenceContext, featureContext)); + } catch (final IllegalArgumentException e) { + throw new UserException.BadInput(String.format("%s " + + "Feature track may contain overlapping intervals; these should be merged.", e.getMessage())); + } + } } - private class GCContentAnnotator implements IntervalAnnotator { + public static class GCContentAnnotator extends IntervalAnnotator { + @Override + public AnnotationKey getAnnotationKey() { + return CopyNumberAnnotations.GC_CONTENT; + } + @Override - public Double apply(final Locatable interval, - final ReadsContext readsContext, - final ReferenceContext referenceContext, - final FeatureContext featureContext) { + Double apply(final Locatable interval, + final ReferenceContext referenceContext, + final FeatureContext featureContext) { final Nucleotide.Counter counter = new Nucleotide.Counter(); counter.addAll(referenceContext.getBases()); final long gcCount = counter.get(Nucleotide.C) + counter.get(Nucleotide.G); @@ -150,4 +258,59 @@ public Double apply(final Locatable interval, return totalCount == 0 ? Double.NaN : gcCount / (double) totalCount; } } + + /** + * If scores are provided, intervals will be annotated with the length-weighted average; scores may not be NaN. + * Otherwise, scores for covered and uncovered intervals will be taken as unity and zero, respectively. + */ + abstract static class BEDLengthWeightedAnnotator extends IntervalAnnotator { + private final FeatureInput trackPath; + + BEDLengthWeightedAnnotator(final FeatureInput trackPath) { + this.trackPath = trackPath; + } + + @Override + Double apply(final Locatable interval, + final ReferenceContext referenceContext, + final FeatureContext featureContext) { + final int intervalLength = interval.getLengthOnReference(); + if (intervalLength == 0) { + return Double.NaN; + } + double lengthWeightedSum = 0.; + final List features = featureContext.getValues(trackPath); + for (final BEDFeature feature : features) { + final double scoreOrNaN = (double) feature.getScore(); + final double score = Double.isNaN(scoreOrNaN) ? 1. : scoreOrNaN; // missing score -> score = 1 + lengthWeightedSum += score * + CoordMath.getOverlap( + feature.getStart(), feature.getEnd() - 1, // zero-based + interval.getStart(), interval.getEnd()); // one-based + } + return lengthWeightedSum / interval.getLengthOnReference(); + } + } + + public static class MappabilityAnnotator extends BEDLengthWeightedAnnotator { + MappabilityAnnotator(final FeatureInput mappabilityTrackPath) { + super(mappabilityTrackPath); + } + + @Override + public AnnotationKey getAnnotationKey() { + return CopyNumberAnnotations.MAPPABILITY; + } + } + + public static class SegmentalDuplicationContentAnnotator extends BEDLengthWeightedAnnotator { + SegmentalDuplicationContentAnnotator(final FeatureInput segmentalDuplicationTrackPath) { + super(segmentalDuplicationTrackPath); + } + + @Override + public AnnotationKey getAnnotationKey() { + return CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT; + } + } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormals.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormals.java index c22b2438d9c..954777d72ea 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormals.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormals.java @@ -21,6 +21,7 @@ import org.broadinstitute.hellbender.tools.copynumber.denoising.HDF5SVDReadCountPanelOfNormals; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.AnnotatedIntervalCollection; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.SimpleCountCollection; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.CopyNumberAnnotations; import org.broadinstitute.hellbender.tools.copynumber.utils.HDF5Utils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; @@ -280,7 +281,9 @@ protected void runPipeline(final JavaSparkContext ctx) { inputAnnotatedIntervalsFile, firstReadCounts, logger); final double[] intervalGCContent = annotatedIntervals == null ? null - : annotatedIntervals.getRecords().stream().mapToDouble(i -> i.getAnnotationSet().getGCContent()).toArray(); + : annotatedIntervals.getRecords().stream() + .mapToDouble(i -> i.getAnnotationMap().getValue(CopyNumberAnnotations.GC_CONTENT)) + .toArray(); //validate input read-counts files (i.e., check intervals and that only integer counts are contained) //and aggregate as a RealMatrix with dimensions numIntervals x numSamples diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/DenoiseReadCounts.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/DenoiseReadCounts.java index 674b5eed31e..511363fa85b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/DenoiseReadCounts.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/DenoiseReadCounts.java @@ -17,6 +17,7 @@ import org.broadinstitute.hellbender.tools.copynumber.formats.collections.AnnotatedIntervalCollection; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CopyRatioCollection; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.SimpleCountCollection; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.CopyNumberAnnotations; import org.broadinstitute.hellbender.utils.io.IOUtils; import java.io.File; @@ -210,7 +211,9 @@ protected Object doWork() { inputAnnotatedIntervalsFile, readCounts, logger); final double[] intervalGCContent = annotatedIntervals == null ? null - : annotatedIntervals.getRecords().stream().mapToDouble(i -> i.getAnnotationSet().getGCContent()).toArray(); + : annotatedIntervals.getRecords().stream() + .mapToDouble(i -> i.getAnnotationMap().getValue(CopyNumberAnnotations.GC_CONTENT)) + .toArray(); if (intervalGCContent == null) { logger.warn("Neither a panel of normals nor GC-content annotations were provided, so only standardization will be performed..."); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/CopyNumberFormatsUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/CopyNumberFormatsUtils.java index 1cb10c5387d..bc800acd631 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/CopyNumberFormatsUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/CopyNumberFormatsUtils.java @@ -1,6 +1,18 @@ package org.broadinstitute.hellbender.tools.copynumber.formats; +import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.utils.io.IOUtils; +import org.broadinstitute.hellbender.utils.text.XReadLines; +import org.broadinstitute.hellbender.utils.tsv.TableColumnCollection; +import org.broadinstitute.hellbender.utils.tsv.TableUtils; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + public final class CopyNumberFormatsUtils { + public static final String COMMENT_PREFIX = "@"; //SAMTextHeaderCodec.HEADER_LINE_START; we need TableReader to treat SAM header as comment lines public static final String DOUBLE_FORMAT = "%.6f"; private CopyNumberFormatsUtils() {} @@ -8,4 +20,32 @@ private CopyNumberFormatsUtils() {} public static String formatDouble(final double value) { return String.format(DOUBLE_FORMAT, value); } + + /** + * Extracts column names from a TSV file + */ + public static TableColumnCollection readColumnsFromHeader(final File inputFile) { + IOUtils.canReadFile(inputFile); + List columns = null; + try (final XReadLines reader = new XReadLines(inputFile)) { + while (reader.hasNext()) { + String nextLine = reader.next(); + if (!nextLine.startsWith(COMMENT_PREFIX)) { + columns = Arrays.asList(nextLine.split(TableUtils.COLUMN_SEPARATOR_STRING)); + break; + } + } + } catch (final IOException e) { + throw new UserException.CouldNotReadInputFile(inputFile); + } + if (columns == null) { + throw new UserException.BadInput(String.format( + "The input file %s does not have a header (starting with comment character %s).", + inputFile.getAbsolutePath(), COMMENT_PREFIX)); + } + if (columns.stream().distinct().count() != columns.size()) { + throw new UserException.BadInput("Column headers must all be unique."); + } + return new TableColumnCollection(columns); + } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AbstractRecordCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AbstractRecordCollection.java index 489c2b9dae6..cdd112163d3 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AbstractRecordCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AbstractRecordCollection.java @@ -163,7 +163,7 @@ static String formatDouble(final double value) { } final class RecordCollectionReader extends TableReader { - private static final String COMMENT_PREFIX = "@"; //SAMTextHeaderCodec.HEADER_LINE_START; we need TableReader to treat SAM header as comment lines + private static final String COMMENT_PREFIX = CopyNumberFormatsUtils.COMMENT_PREFIX; //SAMTextHeaderCodec.HEADER_LINE_START; we need TableReader to treat SAM header as comment lines private final File file; RecordCollectionReader(final File file) throws IOException { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AnnotatedIntervalCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AnnotatedIntervalCollection.java index f03906c0eab..70116a2be28 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AnnotatedIntervalCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AnnotatedIntervalCollection.java @@ -1,56 +1,181 @@ package org.broadinstitute.hellbender.tools.copynumber.formats.collections; +import org.apache.commons.collections4.ListUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.tools.copynumber.formats.CopyNumberFormatsUtils; import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.LocatableMetadata; import org.broadinstitute.hellbender.tools.copynumber.formats.records.AnnotatedInterval; -import org.broadinstitute.hellbender.tools.copynumber.formats.records.AnnotationSet; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.AnnotationKey; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.AnnotationMap; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.CopyNumberAnnotations; import org.broadinstitute.hellbender.utils.SimpleInterval; +import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.tsv.DataLine; import org.broadinstitute.hellbender.utils.tsv.TableColumnCollection; import java.io.File; -import java.util.List; +import java.util.*; import java.util.function.BiConsumer; import java.util.function.Function; +import java.util.stream.Collectors; /** + * Represents a collection of intervals annotated with {@link CopyNumberAnnotations}. + * Supports {@link AnnotationKey}s of integer, long, double, and string type. + * Can be constructed from a TSV file that contains the standard interval column headers, + * any subset of the {@link CopyNumberAnnotations}, and additional columns (which are ignored). + * * @author Samuel Lee <slee@broadinstitute.org> */ public final class AnnotatedIntervalCollection extends AbstractLocatableCollection { //note to developers: repeat the column headers in Javadoc so that they are viewable when linked /** - * CONTIG, START, END, GC_CONTENT + * CONTIG, START, END; columns headers for additional annotations can be specified */ enum AnnotatedIntervalTableColumn { CONTIG, START, - END, - GC_CONTENT; + END; - static final TableColumnCollection COLUMNS = new TableColumnCollection((Object[]) values()); + static final TableColumnCollection STANDARD_COLUMNS = new TableColumnCollection((Object[]) values()); + } + + enum AnnotationValueType { + Integer, + Long, + Double, + String } - private static final Function ANNOTATED_INTERVAL_RECORD_FROM_DATA_LINE_DECODER = dataLine -> { - final String contig = dataLine.get(AnnotatedIntervalTableColumn.CONTIG); - final int start = dataLine.getInt(AnnotatedIntervalTableColumn.START); - final int end = dataLine.getInt(AnnotatedIntervalTableColumn.END); - final double gcContent = dataLine.getDouble(AnnotatedIntervalTableColumn.GC_CONTENT); - final SimpleInterval interval = new SimpleInterval(contig, start, end); - final AnnotationSet annotationSet = new AnnotationSet(gcContent); - return new AnnotatedInterval(interval, annotationSet); + private static final BiConsumer ANNOTATED_INTERVAL_RECORD_TO_DATA_LINE_ENCODER = (annotatedInterval, dataLine) -> { + dataLine.append(annotatedInterval.getInterval().getContig()) + .append(annotatedInterval.getInterval().getStart()) + .append(annotatedInterval.getInterval().getEnd()); + final AnnotationMap annotations = annotatedInterval.getAnnotationMap(); + for (final AnnotationKey key : annotations.getKeys()) { + final AnnotationValueType type = AnnotationValueType.valueOf(key.getType().getSimpleName()); + switch (type) { + case Integer: + dataLine.append((Integer) annotations.getValue(key)); + break; + case Long: + dataLine.append((Long) annotations.getValue(key)); + break; + case Double: + dataLine.append(formatDouble((Double) annotations.getValue(key))); + break; + case String: + dataLine.append((String) annotations.getValue(key)); + break; + default: + throw new UserException.BadInput(String.format("Unsupported annotation type: %s", type)); + } + } }; - private static final BiConsumer ANNOTATED_INTERVAL_RECORD_TO_DATA_LINE_ENCODER = (annotatedInterval, dataLine) -> - dataLine.append(annotatedInterval.getInterval().getContig()) - .append(annotatedInterval.getInterval().getStart()) - .append(annotatedInterval.getInterval().getEnd()) - .append(formatDouble(annotatedInterval.getAnnotationSet().getGCContent())); - public AnnotatedIntervalCollection(final File inputFile) { - super(inputFile, AnnotatedIntervalCollection.AnnotatedIntervalTableColumn.COLUMNS, ANNOTATED_INTERVAL_RECORD_FROM_DATA_LINE_DECODER, ANNOTATED_INTERVAL_RECORD_TO_DATA_LINE_ENCODER); + this(inputFile, getAnnotationKeys(CopyNumberFormatsUtils.readColumnsFromHeader(inputFile))); + } + + private AnnotatedIntervalCollection(final File inputFile, + final List> annotationKeys) { + super( + inputFile, + getColumns(annotationKeys), + getAnnotatedIntervalRecordFromDataLineDecoder(annotationKeys), + ANNOTATED_INTERVAL_RECORD_TO_DATA_LINE_ENCODER); } public AnnotatedIntervalCollection(final LocatableMetadata metadata, final List annotatedIntervals) { - super(metadata, annotatedIntervals, AnnotatedIntervalCollection.AnnotatedIntervalTableColumn.COLUMNS, ANNOTATED_INTERVAL_RECORD_FROM_DATA_LINE_DECODER, ANNOTATED_INTERVAL_RECORD_TO_DATA_LINE_ENCODER); + super( + metadata, + annotatedIntervals, + getColumns(getAnnotationKeys(annotatedIntervals)), + getAnnotatedIntervalRecordFromDataLineDecoder(getAnnotationKeys(annotatedIntervals)), + ANNOTATED_INTERVAL_RECORD_TO_DATA_LINE_ENCODER); + } + + private static TableColumnCollection getColumns(final List> annotationKeys) { + return new TableColumnCollection( + ListUtils.union( + AnnotatedIntervalTableColumn.STANDARD_COLUMNS.names(), + annotationKeys.stream().map(AnnotationKey::getName).collect(Collectors.toList()))); + } + + private static List> getAnnotationKeys(final TableColumnCollection columns) { + Utils.nonNull(columns); + Utils.validateArg(columns.columnCount() != 0, "TableColumnCollection cannot be empty."); + Utils.validateArg(columns.containsAll(AnnotatedIntervalTableColumn.STANDARD_COLUMNS.names()), + String.format("TableColumnCollection must contain standard columns: %s.", + AnnotatedIntervalTableColumn.STANDARD_COLUMNS.names())); + return CopyNumberAnnotations.ANNOTATIONS.stream() + .filter(a -> columns.contains(a.getName())) + .collect(Collectors.toList()); + } + + private static List> getAnnotationKeys(final List annotatedIntervals) { + return annotatedIntervals.isEmpty() ? new ArrayList<>() : annotatedIntervals.get(0).getAnnotationMap().getKeys(); + } + + private static Function getAnnotatedIntervalRecordFromDataLineDecoder( + final List> annotationKeys) { + return dataLine -> { + final String contig = dataLine.get(AnnotatedIntervalTableColumn.CONTIG); + final int start = dataLine.getInt(AnnotatedIntervalTableColumn.START); + final int end = dataLine.getInt(AnnotatedIntervalTableColumn.END); + final SimpleInterval interval = new SimpleInterval(contig, start, end); + final List, Object>> annotations = new ArrayList<>(annotationKeys.size()); + for (final AnnotationKey key : annotationKeys) { + final AnnotationValueType type = AnnotationValueType.valueOf(key.getType().getSimpleName()); + switch (type) { + case Integer: + annotations.add(Pair.of(key, dataLine.getInt(key.getName()))); + break; + case Long: + annotations.add(Pair.of(key, dataLine.getLong(key.getName()))); + break; + case Double: + annotations.add(Pair.of(key, dataLine.getDouble(key.getName()))); + break; + case String: + annotations.add(Pair.of(key, dataLine.get(key.getName()))); + break; + default: + throw new UserException.BadInput(String.format("Unsupported annotation type: %s", type)); + } + } + final AnnotationMap annotationMap = new AnnotationMap(annotations); + return new AnnotatedInterval(interval, annotationMap); + }; + } + + /** + * Columns, encoder, and decoder are not used. + */ + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + final AbstractRecordCollection that = (AbstractRecordCollection) o; + return getMetadata().equals(that.getMetadata()) && + getRecords().equals(that.getRecords()); + } + + /** + * Columns, encoder, and decoder are not used. + */ + @Override + public int hashCode() { + int result = getMetadata().hashCode(); + result = 31 * result + getRecords().hashCode(); + return result; } -} +} \ No newline at end of file diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/CopyNumberPosteriorDistributionCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/CopyNumberPosteriorDistributionCollection.java index 8b5a3cf8b03..584e2321820 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/CopyNumberPosteriorDistributionCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/CopyNumberPosteriorDistributionCollection.java @@ -1,6 +1,7 @@ package org.broadinstitute.hellbender.tools.copynumber.formats.collections; import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.tools.copynumber.formats.CopyNumberFormatsUtils; import org.broadinstitute.hellbender.tools.copynumber.formats.records.CopyNumberPosteriorDistribution; import org.broadinstitute.hellbender.tools.copynumber.gcnv.GermlineCNVNamingConstants; import org.broadinstitute.hellbender.tools.copynumber.gcnv.IntegerCopyNumberState; @@ -84,13 +85,10 @@ private static class IntegerCopyNumberStateCollection { private final List copyNumberStates; private final TableColumnCollection columnCollection; - private static final String COMMENT_PREFIX = "@"; - IntegerCopyNumberStateCollection(final File inputFile) { - final List copyNumberStatesColumns = extractCopyNumberColumnsFromHeader(inputFile); - this.columnCollection = new TableColumnCollection(copyNumberStatesColumns); + this.columnCollection = CopyNumberFormatsUtils.readColumnsFromHeader(inputFile); this.copyNumberStates = new ArrayList<>(); - copyNumberStatesColumns + columnCollection.names() .forEach(copyNumberString -> copyNumberStates.add(parseIntegerCopyNumber(copyNumberString))); } @@ -137,29 +135,5 @@ private IntegerCopyNumberState parseIntegerCopyNumber(final String copyNumberSta "Could not parse copy-number column string (%s) to an integer copy-number.", copyNumberStateString)); } } - - /** - * Extracts column names from a TSV file - */ - private List extractCopyNumberColumnsFromHeader(final File inputFile) { - List columns = null; - try (final XReadLines reader = new XReadLines(inputFile)) { - while (reader.hasNext()) { - String nextLine = reader.next(); - if (!nextLine.startsWith(COMMENT_PREFIX)) { - columns = Arrays.asList(nextLine.split(TableUtils.COLUMN_SEPARATOR_STRING)); - break; - } - } - } catch (final IOException e) { - throw new UserException.CouldNotReadInputFile(inputFile); - } - if (columns == null) { - throw new UserException.BadInput(String.format( - "The input file %s does not have a header (starting with comment character %s).", - inputFile.getAbsolutePath(), COMMENT_PREFIX)); - } - return columns; - } } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/AnnotatedInterval.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/AnnotatedInterval.java index 1bc35f57ed1..5888eff352a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/AnnotatedInterval.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/AnnotatedInterval.java @@ -1,7 +1,7 @@ package org.broadinstitute.hellbender.tools.copynumber.formats.records; import htsjdk.samtools.util.Locatable; -import htsjdk.tribble.Feature; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.AnnotationMap; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; @@ -10,14 +10,14 @@ * * @author Samuel Lee <slee@broadinstitute.org> */ -public class AnnotatedInterval implements Locatable, Feature { +public class AnnotatedInterval implements Locatable { private final SimpleInterval interval; - private final AnnotationSet annotationSet; + private final AnnotationMap annotationMap; public AnnotatedInterval(final SimpleInterval interval, - final AnnotationSet annotationSet) { + final AnnotationMap annotationMap) { this.interval = Utils.nonNull(interval); - this.annotationSet = Utils.nonNull(annotationSet); + this.annotationMap = Utils.nonNull(annotationMap); } @Override @@ -39,8 +39,8 @@ public SimpleInterval getInterval() { return interval; } - public AnnotationSet getAnnotationSet() { - return annotationSet; + public AnnotationMap getAnnotationMap() { + return annotationMap; } @Override @@ -53,13 +53,13 @@ public boolean equals(Object o) { } final AnnotatedInterval that = (AnnotatedInterval) o; - return interval.equals(that.interval) && annotationSet.equals(that.annotationSet); + return interval.equals(that.interval) && annotationMap.equals(that.annotationMap); } @Override public int hashCode() { int result = interval.hashCode(); - result = 31 * result + annotationSet.hashCode(); + result = 31 * result + annotationMap.hashCode(); return result; } @@ -67,7 +67,7 @@ public int hashCode() { public String toString() { return "AnnotatedInterval{" + "interval=" + interval + - ", annotationSet=" + annotationSet + + ", annotationMap=" + annotationMap + '}'; } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/AnnotationSet.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/AnnotationSet.java deleted file mode 100644 index dd3a55beddb..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/AnnotationSet.java +++ /dev/null @@ -1,53 +0,0 @@ -package org.broadinstitute.hellbender.tools.copynumber.formats.records; - -import org.broadinstitute.hellbender.tools.copynumber.formats.collections.AnnotatedIntervalCollection; -import org.broadinstitute.hellbender.utils.Utils; - -/** - * Represents a set of annotations for an interval. Currently, only GC content is represented. - * - * @author Samuel Lee <slee@broadinstitute.org> - */ -public final class AnnotationSet { - /** - * If additional annotation fields are added here, then {@link AnnotatedIntervalCollection} - * should be updated accordingly. - */ - private final double gcContent; - - public AnnotationSet(final double gcContent) { - Utils.validateArg((0. <= gcContent && gcContent <= 1.) || Double.isNaN(gcContent), - "GC content must be in [0, 1] or NaN."); - this.gcContent = gcContent; - } - - public double getGCContent() { - return gcContent; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - final AnnotationSet that = (AnnotationSet) o; - return Double.compare(that.gcContent, gcContent) == 0; - } - - @Override - public int hashCode() { - long temp = Double.doubleToLongBits(gcContent); - return (int) (temp ^ (temp >>> 32)); - } - - @Override - public String toString() { - return "AnnotationSet{" + - "gcContent=" + gcContent + - '}'; - } -} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/annotation/AnnotationKey.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/annotation/AnnotationKey.java new file mode 100644 index 00000000000..5dfd8f02685 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/annotation/AnnotationKey.java @@ -0,0 +1,66 @@ +package org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation; + +import org.broadinstitute.hellbender.utils.Utils; + +import java.util.function.Function; + +/** + * Represents a key for a named, typed annotation. + * + * @author Samuel Lee <slee@broadinstitute.org> + */ +public final class AnnotationKey { + private final String name; + private final Class clazz; + private final Function validateValue; + + public AnnotationKey(final String name, + final Class clazz, + final Function validateValue) { + this.name = Utils.nonEmpty(name); + this.clazz = Utils.nonNull(clazz); + this.validateValue = Utils.nonNull(validateValue); + } + + public String getName() { + return name; + } + + public Class getType() { + return clazz; + } + + public T validate(final T value) { + Utils.validateArg(validateValue.apply(value), + String.format("Invalid value %s for annotation %s.", value, name)); + return value; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + final AnnotationKey that = (AnnotationKey) o; + return name.equals(that.name) && clazz.equals(that.clazz); + } + + @Override + public int hashCode() { + int result = name.hashCode(); + result = 31 * result + clazz.hashCode(); + return result; + } + + @Override + public String toString() { + return "AnnotationKey{" + + "name='" + name + '\'' + + ", class=" + clazz + + '}'; + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/annotation/AnnotationMap.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/annotation/AnnotationMap.java new file mode 100644 index 00000000000..a5fe52e05e4 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/annotation/AnnotationMap.java @@ -0,0 +1,64 @@ +package org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation; + +import com.google.common.collect.ImmutableMap; +import org.apache.commons.lang3.tuple.Pair; +import org.broadinstitute.hellbender.utils.Utils; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * Represents an immutable ordered collection of named, typed annotations for an interval. + * + * @author Samuel Lee <slee@broadinstitute.org> + */ +public final class AnnotationMap { + + private final Map, Object> annotationMap; + + public AnnotationMap(final List, Object>> annotations) { + Utils.nonEmpty(annotations); + final ImmutableMap.Builder, Object> builder = new ImmutableMap.Builder<>(); + annotations.forEach(a -> builder.put(a.getKey(), a.getValue())); + annotationMap = builder.build(); + } + + public List> getKeys() { + return new ArrayList<>(annotationMap.keySet()); + } + + public T getValue(final AnnotationKey key) { + Utils.nonNull(key); + if (!annotationMap.containsKey(key)) { + throw new IllegalArgumentException( + String.format("Annotation %s not contained in AnnotationMap.", key.getName())); + } + return key.getType().cast(annotationMap.get(key)); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + final AnnotationMap that = (AnnotationMap) o; + return annotationMap.equals(that.annotationMap); + } + + @Override + public int hashCode() { + return annotationMap.hashCode(); + } + + @Override + public String toString() { + return "AnnotationMap{" + + "annotationMap=" + annotationMap + + '}'; + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/annotation/CopyNumberAnnotations.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/annotation/CopyNumberAnnotations.java new file mode 100644 index 00000000000..74f8b55134f --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/annotation/CopyNumberAnnotations.java @@ -0,0 +1,26 @@ +package org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation; + +import java.util.Arrays; +import java.util.List; + +public final class CopyNumberAnnotations { + public static AnnotationKey GC_CONTENT = new AnnotationKey<>( + "GC_CONTENT", + Double.class, + gcContent -> (0. <= gcContent && gcContent <= 1.) || Double.isNaN(gcContent)); + + public static AnnotationKey MAPPABILITY = new AnnotationKey<>( + "MAPPABILITY", + Double.class, + mappability -> (0. <= mappability && mappability <= 1.) || Double.isNaN(mappability)); + + public static AnnotationKey SEGMENTAL_DUPLICATION_CONTENT = new AnnotationKey<>( + "SEGMENTAL_DUPLICATION_CONTENT", + Double.class, + segmentalDuplicationContent -> (0. <= segmentalDuplicationContent && segmentalDuplicationContent <= 1.) || Double.isNaN(segmentalDuplicationContent)); + + /** + * This defines the canonical order of these annotations. + */ + public static List> ANNOTATIONS = Arrays.asList(GC_CONTENT, MAPPABILITY, SEGMENTAL_DUPLICATION_CONTENT); +} diff --git a/src/main/java/org/broadinstitute/hellbender/utils/tsv/TableColumnCollection.java b/src/main/java/org/broadinstitute/hellbender/utils/tsv/TableColumnCollection.java index b480418fa77..b43efd02d07 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/tsv/TableColumnCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/tsv/TableColumnCollection.java @@ -338,4 +338,24 @@ public static String[] checkNames(final String[] columnNames, } return columnNames; } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + final TableColumnCollection that = (TableColumnCollection) o; + return names.equals(that.names) && indexByName.equals(that.indexByName); + } + + @Override + public int hashCode() { + int result = names.hashCode(); + result = 31 * result + indexByName.hashCode(); + return result; + } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/AnnotateIntervalsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/AnnotateIntervalsIntegrationTest.java index 83ca68c19ee..b7ef0f40b4a 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/AnnotateIntervalsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/AnnotateIntervalsIntegrationTest.java @@ -1,15 +1,19 @@ package org.broadinstitute.hellbender.tools.copynumber; import htsjdk.samtools.SAMSequenceDictionary; +import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.hellbender.CommandLineProgramTest; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.cmdline.argumentcollections.IntervalArgumentCollection; import org.broadinstitute.hellbender.engine.ReferenceDataSource; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.AnnotatedIntervalCollection; +import org.broadinstitute.hellbender.tools.copynumber.formats.collections.AnnotatedIntervalCollectionUnitTest; import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.LocatableMetadata; import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SimpleLocatableMetadata; import org.broadinstitute.hellbender.tools.copynumber.formats.records.AnnotatedInterval; -import org.broadinstitute.hellbender.tools.copynumber.formats.records.AnnotationSet; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.AnnotationKey; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.AnnotationMap; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.CopyNumberAnnotations; import org.broadinstitute.hellbender.utils.IntervalMergingRule; import org.broadinstitute.hellbender.utils.IntervalSetRule; import org.broadinstitute.hellbender.utils.SimpleInterval; @@ -18,7 +22,11 @@ import org.testng.annotations.Test; import java.io.File; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; /** * Integration tests for {@link AnnotateIntervals}. @@ -29,34 +37,122 @@ public final class AnnotateIntervalsIntegrationTest extends CommandLineProgramTe private static final File TEST_SUB_DIR = new File(toolsTestDir, "copynumber"); private static final File INTERVALS_FILE = new File(TEST_SUB_DIR, "annotate-intervals-test.interval_list"); private static final File REFERENCE_FILE = new File(b37_reference_20_21); + private static final File MAPPABILITY_TRACK_FILE = new File(TEST_SUB_DIR, + "annotate-intervals-hg19-umap-k100-single-read-mappability-merged-20-21.bed.gz"); + private static final File SEGMENTAL_DUPLICATION_TRACK_FILE = new File(TEST_SUB_DIR, + "annotate-intervals-hg19-segmental-duplication-20-21.bed.gz"); private static final SAMSequenceDictionary SEQUENCE_DICTIONARY = ReferenceDataSource.of(REFERENCE_FILE.toPath()).getSequenceDictionary(); private static final LocatableMetadata LOCATABLE_METADATA = new SimpleLocatableMetadata(SEQUENCE_DICTIONARY); /** - * Test that intervals are sorted according to {@link #SEQUENCE_DICTIONARY} - * and adjacent intervals are not merged. GC content truth was taken from AnnotateTargets (a previous version of the tool). + * Test case checks that intervals are sorted according to {@link #SEQUENCE_DICTIONARY} and + * adjacent intervals are not merged. This test case is also used in {@link AnnotatedIntervalCollectionUnitTest}. */ + private static final AnnotatedIntervalCollection EXPECTED_ALL_ANNOTATIONS = new AnnotatedIntervalCollection( + LOCATABLE_METADATA, + Arrays.asList( + new AnnotatedInterval(new SimpleInterval("20", 1000001, 1001000), + new AnnotationMap(Arrays.asList( + Pair.of(CopyNumberAnnotations.GC_CONTENT, 0.49), + Pair.of(CopyNumberAnnotations.MAPPABILITY, 1.), + Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))), + new AnnotatedInterval(new SimpleInterval("20", 1001001, 1002000), + new AnnotationMap(Arrays.asList( + Pair.of(CopyNumberAnnotations.GC_CONTENT, 0.483), + Pair.of(CopyNumberAnnotations.MAPPABILITY, 1.), + Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))), + new AnnotatedInterval(new SimpleInterval("20", 1002001, 1003000), + new AnnotationMap(Arrays.asList( + Pair.of(CopyNumberAnnotations.GC_CONTENT, 0.401), + Pair.of(CopyNumberAnnotations.MAPPABILITY, 1.), + Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))), + new AnnotatedInterval(new SimpleInterval("20", 1003001, 1004000), + new AnnotationMap(Arrays.asList( + Pair.of(CopyNumberAnnotations.GC_CONTENT, 0.448), + Pair.of(CopyNumberAnnotations.MAPPABILITY, 1.), + Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))), + new AnnotatedInterval(new SimpleInterval("21", 1, 100), + new AnnotationMap(Arrays.asList( + Pair.of(CopyNumberAnnotations.GC_CONTENT, Double.NaN), + Pair.of(CopyNumberAnnotations.MAPPABILITY, 0.0), + Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))), + new AnnotatedInterval(new SimpleInterval("21", 101, 200), + new AnnotationMap(Arrays.asList( + Pair.of(CopyNumberAnnotations.GC_CONTENT, Double.NaN), + Pair.of(CopyNumberAnnotations.MAPPABILITY, 0.0), + Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))))); + + @Test + public void testGCContentOnly() { + final File outputFile = createTempFile("annotate-intervals-test", ".tsv"); + final ArgumentsBuilder argsBuilder = new ArgumentsBuilder() + .addReference(REFERENCE_FILE) + .addArgument(StandardArgumentDefinitions.INTERVALS_LONG_NAME, INTERVALS_FILE.getAbsolutePath()) + .addArgument(IntervalArgumentCollection.INTERVAL_MERGING_RULE_LONG_NAME, IntervalMergingRule.OVERLAPPING_ONLY.toString()) + .addOutput(outputFile); + runCommandLine(argsBuilder); + final AnnotatedIntervalCollection result = new AnnotatedIntervalCollection(outputFile); + final AnnotatedIntervalCollection expected = AnnotatedIntervalCollectionUnitTest.subsetAnnotations( + EXPECTED_ALL_ANNOTATIONS, + Collections.singletonList(CopyNumberAnnotations.GC_CONTENT)); + Assert.assertEquals(result, expected); + Assert.assertNotSame(result, expected); + } + @Test - public void test() { + public void testMappability() { final File outputFile = createTempFile("annotate-intervals-test", ".tsv"); final ArgumentsBuilder argsBuilder = new ArgumentsBuilder() .addReference(REFERENCE_FILE) + .addFileArgument(AnnotateIntervals.MAPPABILITY_TRACK_PATH_LONG_NAME, MAPPABILITY_TRACK_FILE) .addArgument(StandardArgumentDefinitions.INTERVALS_LONG_NAME, INTERVALS_FILE.getAbsolutePath()) .addArgument(IntervalArgumentCollection.INTERVAL_MERGING_RULE_LONG_NAME, IntervalMergingRule.OVERLAPPING_ONLY.toString()) .addOutput(outputFile); runCommandLine(argsBuilder); final AnnotatedIntervalCollection result = new AnnotatedIntervalCollection(outputFile); + final AnnotatedIntervalCollection expected = AnnotatedIntervalCollectionUnitTest.subsetAnnotations( + EXPECTED_ALL_ANNOTATIONS, + Arrays.asList( + CopyNumberAnnotations.GC_CONTENT, + CopyNumberAnnotations.MAPPABILITY)); + Assert.assertEquals(result, expected); + Assert.assertNotSame(result, expected); + } - final AnnotatedIntervalCollection expected = new AnnotatedIntervalCollection( - LOCATABLE_METADATA, + @Test + public void testSegmentalDuplicationContent() { + final File outputFile = createTempFile("annotate-intervals-test", ".tsv"); + final ArgumentsBuilder argsBuilder = new ArgumentsBuilder() + .addReference(REFERENCE_FILE) + .addFileArgument(AnnotateIntervals.SEGMENTAL_DUPLICATION_TRACK_PATH_LONG_NAME, SEGMENTAL_DUPLICATION_TRACK_FILE) + .addArgument(StandardArgumentDefinitions.INTERVALS_LONG_NAME, INTERVALS_FILE.getAbsolutePath()) + .addArgument(IntervalArgumentCollection.INTERVAL_MERGING_RULE_LONG_NAME, IntervalMergingRule.OVERLAPPING_ONLY.toString()) + .addOutput(outputFile); + runCommandLine(argsBuilder); + final AnnotatedIntervalCollection result = new AnnotatedIntervalCollection(outputFile); + final AnnotatedIntervalCollection expected = AnnotatedIntervalCollectionUnitTest.subsetAnnotations( + EXPECTED_ALL_ANNOTATIONS, Arrays.asList( - new AnnotatedInterval(new SimpleInterval("20", 1000001, 1001000), new AnnotationSet(0.49)), - new AnnotatedInterval(new SimpleInterval("20", 1001001, 1002000), new AnnotationSet(0.483)), - new AnnotatedInterval(new SimpleInterval("20", 1002001, 1003000), new AnnotationSet(0.401)), - new AnnotatedInterval(new SimpleInterval("20", 1003001, 1004000), new AnnotationSet(0.448)), - new AnnotatedInterval(new SimpleInterval("21", 1, 100), new AnnotationSet(Double.NaN)), - new AnnotatedInterval(new SimpleInterval("21", 101, 200), new AnnotationSet(Double.NaN)))); + CopyNumberAnnotations.GC_CONTENT, + CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT)); + Assert.assertEquals(result, expected); + Assert.assertNotSame(result, expected); + } + + @Test + public void testAllAnnotations() { + final File outputFile = createTempFile("annotate-intervals-test", ".tsv"); + final ArgumentsBuilder argsBuilder = new ArgumentsBuilder() + .addReference(REFERENCE_FILE) + .addFileArgument(AnnotateIntervals.MAPPABILITY_TRACK_PATH_LONG_NAME, MAPPABILITY_TRACK_FILE) + .addFileArgument(AnnotateIntervals.SEGMENTAL_DUPLICATION_TRACK_PATH_LONG_NAME, SEGMENTAL_DUPLICATION_TRACK_FILE) + .addArgument(StandardArgumentDefinitions.INTERVALS_LONG_NAME, INTERVALS_FILE.getAbsolutePath()) + .addArgument(IntervalArgumentCollection.INTERVAL_MERGING_RULE_LONG_NAME, IntervalMergingRule.OVERLAPPING_ONLY.toString()) + .addOutput(outputFile); + runCommandLine(argsBuilder); + final AnnotatedIntervalCollection result = new AnnotatedIntervalCollection(outputFile); + final AnnotatedIntervalCollection expected = EXPECTED_ALL_ANNOTATIONS; Assert.assertEquals(result, expected); Assert.assertNotSame(result, expected); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormalsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormalsIntegrationTest.java index c21853d65b0..3c7b1038978 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormalsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormalsIntegrationTest.java @@ -3,6 +3,7 @@ import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceRecord; import org.apache.commons.collections4.ListUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.math3.linear.*; import org.apache.commons.math3.random.RandomDataGenerator; import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation; @@ -18,8 +19,9 @@ import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SimpleLocatableMetadata; import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SimpleSampleLocatableMetadata; import org.broadinstitute.hellbender.tools.copynumber.formats.records.AnnotatedInterval; -import org.broadinstitute.hellbender.tools.copynumber.formats.records.AnnotationSet; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.AnnotationMap; import org.broadinstitute.hellbender.tools.copynumber.formats.records.SimpleCount; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.CopyNumberAnnotations; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.testng.Assert; @@ -102,7 +104,8 @@ public Object[][] dataPanelOfNormals() { final AnnotatedIntervalCollection annotatedIntervals = new AnnotatedIntervalCollection( new SimpleLocatableMetadata(SEQUENCE_DICTIONARY), IntStream.range(0, NUM_INTERVALS) - .mapToObj(i -> new AnnotatedInterval(intervals.get(i), new AnnotationSet(intervalGCContent[i]))) + .mapToObj(i -> new AnnotatedInterval(intervals.get(i), + new AnnotationMap(Collections.singletonList(Pair.of(CopyNumberAnnotations.GC_CONTENT, intervalGCContent[i]))))) .collect(Collectors.toList())); final File annotatedIntervalsFile = createTempFile("annotated-intervals", ".tsv"); annotatedIntervals.write(annotatedIntervalsFile); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AllelicCountCollectionUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AllelicCountCollectionUnitTest.java index fad7301632d..7db5675b833 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AllelicCountCollectionUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AllelicCountCollectionUnitTest.java @@ -79,8 +79,7 @@ public void testReadMissingNucleotides() { @Test public void testWrite() throws IOException { final File outputFile = createTempFile("allelic-count-collection-test-output", ".tsv"); - final AllelicCountCollection allelicCounts = new AllelicCountCollection(ALLELIC_COUNTS_FILE); - allelicCounts.write(outputFile); + ALLELIC_COUNTS_EXPECTED.write(outputFile); Assert.assertTrue(FileUtils.contentEquals(outputFile, ALLELIC_COUNTS_FILE)); } } \ No newline at end of file diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AnnotatedIntervalCollectionUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AnnotatedIntervalCollectionUnitTest.java new file mode 100644 index 00000000000..8d238338c22 --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AnnotatedIntervalCollectionUnitTest.java @@ -0,0 +1,137 @@ +package org.broadinstitute.hellbender.tools.copynumber.formats.collections; + +import htsjdk.samtools.SAMSequenceDictionary; +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.broadinstitute.hellbender.GATKBaseTest; +import org.broadinstitute.hellbender.engine.ReferenceDataSource; +import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.LocatableMetadata; +import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SimpleLocatableMetadata; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.AnnotatedInterval; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.AnnotationKey; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.AnnotationMap; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.CopyNumberAnnotations; +import org.broadinstitute.hellbender.utils.SimpleInterval; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +public final class AnnotatedIntervalCollectionUnitTest extends GATKBaseTest { + private static final File TEST_SUB_DIR = new File(toolsTestDir + "copynumber/formats/collections"); + private static final File ANNOTATED_INTERVALS_ALL_ANNOTATIONS_FILE = new File(TEST_SUB_DIR, + "annotated-intervals-all-annotations.tsv"); + private static final File ANNOTATED_INTERVALS_EXTRA_ANNOTATION_FILE = new File(TEST_SUB_DIR, + "annotated-intervals-extra-annotation.tsv"); + private static final File ANNOTATED_INTERVALS_REPEATED_ANNOTATION_FILE = new File(TEST_SUB_DIR, + "annotated-intervals-repeated-annotation.tsv"); + private static final File ANNOTATED_INTERVALS_GC_CONTENT_ONLY_FILE = new File(TEST_SUB_DIR, + "annotated-intervals-gc-content-only.tsv"); + private static final File REFERENCE_FILE = new File(b37_reference_20_21); + + private static final SAMSequenceDictionary SEQUENCE_DICTIONARY = ReferenceDataSource.of(REFERENCE_FILE.toPath()).getSequenceDictionary(); + private static final LocatableMetadata LOCATABLE_METADATA = new SimpleLocatableMetadata(SEQUENCE_DICTIONARY); + + private static final AnnotatedIntervalCollection EXPECTED_ALL_ANNOTATIONS = new AnnotatedIntervalCollection( + LOCATABLE_METADATA, + Arrays.asList( + new AnnotatedInterval(new SimpleInterval("20", 1000001, 1001000), + new AnnotationMap(Arrays.asList( + Pair.of(CopyNumberAnnotations.GC_CONTENT, 0.49), + Pair.of(CopyNumberAnnotations.MAPPABILITY, 1.), + Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))), + new AnnotatedInterval(new SimpleInterval("20", 1001001, 1002000), + new AnnotationMap(Arrays.asList( + Pair.of(CopyNumberAnnotations.GC_CONTENT, 0.483), + Pair.of(CopyNumberAnnotations.MAPPABILITY, 1.), + Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))), + new AnnotatedInterval(new SimpleInterval("20", 1002001, 1003000), + new AnnotationMap(Arrays.asList( + Pair.of(CopyNumberAnnotations.GC_CONTENT, 0.401), + Pair.of(CopyNumberAnnotations.MAPPABILITY, 1.), + Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))), + new AnnotatedInterval(new SimpleInterval("20", 1003001, 1004000), + new AnnotationMap(Arrays.asList( + Pair.of(CopyNumberAnnotations.GC_CONTENT, 0.448), + Pair.of(CopyNumberAnnotations.MAPPABILITY, 1.), + Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))), + new AnnotatedInterval(new SimpleInterval("21", 1, 100), + new AnnotationMap(Arrays.asList( + Pair.of(CopyNumberAnnotations.GC_CONTENT, Double.NaN), + Pair.of(CopyNumberAnnotations.MAPPABILITY, 0.0), + Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))), + new AnnotatedInterval(new SimpleInterval("21", 101, 200), + new AnnotationMap(Arrays.asList( + Pair.of(CopyNumberAnnotations.GC_CONTENT, Double.NaN), + Pair.of(CopyNumberAnnotations.MAPPABILITY, 0.0), + Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.)))))); + + @Test + public void testRead() { + final AnnotatedIntervalCollection result = new AnnotatedIntervalCollection(ANNOTATED_INTERVALS_ALL_ANNOTATIONS_FILE); + Assert.assertEquals(result, EXPECTED_ALL_ANNOTATIONS); + Assert.assertNotSame(result, EXPECTED_ALL_ANNOTATIONS); + } + + /** + * Extra annotations not listed in {@link CopyNumberAnnotations} should be ignored. + */ + @Test + public void testReadExtraAnnotation() { + final AnnotatedIntervalCollection result = new AnnotatedIntervalCollection(ANNOTATED_INTERVALS_EXTRA_ANNOTATION_FILE); + Assert.assertEquals(result, EXPECTED_ALL_ANNOTATIONS); + Assert.assertNotSame(result, EXPECTED_ALL_ANNOTATIONS); + } + + @Test(expectedExceptions = UserException.BadInput.class) + public void testReadRepeatedAnnotation() { + final AnnotatedIntervalCollection result = new AnnotatedIntervalCollection(ANNOTATED_INTERVALS_REPEATED_ANNOTATION_FILE); + Assert.assertEquals(result, EXPECTED_ALL_ANNOTATIONS); + Assert.assertNotSame(result, EXPECTED_ALL_ANNOTATIONS); + } + + @Test + public void testWrite() throws IOException { + final File outputFile = createTempFile("annotated-interval-collection-test-output", ".tsv"); + EXPECTED_ALL_ANNOTATIONS.write(outputFile); + Assert.assertTrue(FileUtils.contentEquals(outputFile, ANNOTATED_INTERVALS_ALL_ANNOTATIONS_FILE)); + } + + @Test + public void testReadGCContentOnly() { + final AnnotatedIntervalCollection result = new AnnotatedIntervalCollection(ANNOTATED_INTERVALS_GC_CONTENT_ONLY_FILE); + final AnnotatedIntervalCollection expected = AnnotatedIntervalCollectionUnitTest.subsetAnnotations( + EXPECTED_ALL_ANNOTATIONS, + Collections.singletonList(CopyNumberAnnotations.GC_CONTENT)); + Assert.assertEquals(result, expected); + Assert.assertNotSame(result, expected); + } + + private static AnnotatedInterval subsetAnnotations(final AnnotatedInterval annotatedInterval, + final List> annotationKeys) { + final List, Object>> subsetAnnotationEntries = new ArrayList<>(); + for (final AnnotationKey annotationKey : annotationKeys) { + subsetAnnotationEntries.add(Pair.of( + annotationKey, + annotatedInterval.getAnnotationMap().getValue(annotationKey))); + } + final AnnotationMap subsetAnnotationMap = new AnnotationMap(subsetAnnotationEntries); + return new AnnotatedInterval(annotatedInterval.getInterval(), subsetAnnotationMap); + } + + public static AnnotatedIntervalCollection subsetAnnotations(final AnnotatedIntervalCollection annotatedIntervals, + final List> annotationKeys) { + return new AnnotatedIntervalCollection( + annotatedIntervals.getMetadata(), + annotatedIntervals.getRecords().stream() + .map(i -> subsetAnnotations(i, annotationKeys)) + .collect(Collectors.toList())); + } +} \ No newline at end of file diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-segmental-duplication-20-21.bed.gz b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-segmental-duplication-20-21.bed.gz new file mode 100644 index 00000000000..4ee6aac759d Binary files /dev/null and b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-segmental-duplication-20-21.bed.gz differ diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-segmental-duplication-20-21.bed.gz.tbi b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-segmental-duplication-20-21.bed.gz.tbi new file mode 100644 index 00000000000..f0555079e69 Binary files /dev/null and b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-segmental-duplication-20-21.bed.gz.tbi differ diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-umap-k100-single-read-mappability-merged-20-21.bed.gz b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-umap-k100-single-read-mappability-merged-20-21.bed.gz new file mode 100644 index 00000000000..24988af82c2 Binary files /dev/null and b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-umap-k100-single-read-mappability-merged-20-21.bed.gz differ diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-umap-k100-single-read-mappability-merged-20-21.bed.gz.tbi b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-umap-k100-single-read-mappability-merged-20-21.bed.gz.tbi new file mode 100644 index 00000000000..5ac8e7128c3 Binary files /dev/null and b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/annotate-intervals-hg19-umap-k100-single-read-mappability-merged-20-21.bed.gz.tbi differ diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-all-annotations.tsv b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-all-annotations.tsv new file mode 100644 index 00000000000..f8d561dc983 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-all-annotations.tsv @@ -0,0 +1,10 @@ +@HD VN:1.5 +@SQ SN:20 LN:63025520 M5:0dec9660ec1efaaf33281c0d5ea2560f UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta +@SQ SN:21 LN:48129895 M5:2979a6085bfe28e3ad6f552f361ed74d UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta +CONTIG START END GC_CONTENT MAPPABILITY SEGMENTAL_DUPLICATION_CONTENT +20 1000001 1001000 0.490000 1.000000 0.000000 +20 1001001 1002000 0.483000 1.000000 0.000000 +20 1002001 1003000 0.401000 1.000000 0.000000 +20 1003001 1004000 0.448000 1.000000 0.000000 +21 1 100 NaN 0.000000 0.000000 +21 101 200 NaN 0.000000 0.000000 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-extra-annotation.tsv b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-extra-annotation.tsv new file mode 100644 index 00000000000..a3f8fcd6dfa --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-extra-annotation.tsv @@ -0,0 +1,10 @@ +@HD VN:1.5 +@SQ SN:20 LN:63025520 M5:0dec9660ec1efaaf33281c0d5ea2560f UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta +@SQ SN:21 LN:48129895 M5:2979a6085bfe28e3ad6f552f361ed74d UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta +CONTIG START END GC_CONTENT MAPPABILITY SEGMENTAL_DUPLICATION_CONTENT EXTRA_ANNOTATION +20 1000001 1001000 0.490000 1.000000 0.000000 0.000000 +20 1001001 1002000 0.483000 1.000000 0.000000 0.000000 +20 1002001 1003000 0.401000 1.000000 0.000000 0.000000 +20 1003001 1004000 0.448000 1.000000 0.000000 0.000000 +21 1 100 NaN 0.000000 0.000000 0.000000 +21 101 200 NaN 0.000000 0.000000 0.000000 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-gc-content-only.tsv b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-gc-content-only.tsv new file mode 100644 index 00000000000..7eae2e7f464 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-gc-content-only.tsv @@ -0,0 +1,10 @@ +@HD VN:1.5 +@SQ SN:20 LN:63025520 M5:0dec9660ec1efaaf33281c0d5ea2560f UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta +@SQ SN:21 LN:48129895 M5:2979a6085bfe28e3ad6f552f361ed74d UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta +CONTIG START END GC_CONTENT +20 1000001 1001000 0.490000 +20 1001001 1002000 0.483000 +20 1002001 1003000 0.401000 +20 1003001 1004000 0.448000 +21 1 100 NaN +21 101 200 NaN diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-repeated-annotation.tsv b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-repeated-annotation.tsv new file mode 100644 index 00000000000..cb0f46c3790 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/formats/collections/annotated-intervals-repeated-annotation.tsv @@ -0,0 +1,10 @@ +@HD VN:1.5 +@SQ SN:20 LN:63025520 M5:0dec9660ec1efaaf33281c0d5ea2560f UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta +@SQ SN:21 LN:48129895 M5:2979a6085bfe28e3ad6f552f361ed74d UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta +CONTIG START END GC_CONTENT MAPPABILITY SEGMENTAL_DUPLICATION_CONTENT SEGMENTAL_DUPLICATION_CONTENT +20 1000001 1001000 0.490000 1.000000 0.000000 0.000000 +20 1001001 1002000 0.483000 1.000000 0.000000 0.000000 +20 1002001 1003000 0.401000 1.000000 0.000000 0.000000 +20 1003001 1004000 0.448000 1.000000 0.000000 0.000000 +21 1 100 NaN 0.000000 0.000000 0.000000 +21 101 200 NaN 0.000000 0.000000 0.000000