From 33f34b98c38ce689ee478a096f1f4302d5e0f9ba Mon Sep 17 00:00:00 2001 From: takutosato Date: Thu, 14 Mar 2019 14:56:52 -0400 Subject: [PATCH] rest of comment edits --- scripts/mutect2_wdl/mutect2.wdl | 10 +++---- .../LearnReadOrientationModel.java | 26 +++++++++++++------ .../LearnReadOrientationModelUnitTest.java | 4 +-- .../ReadOrientationModelIntegrationTest.java | 12 +++------ 4 files changed, 28 insertions(+), 24 deletions(-) diff --git a/scripts/mutect2_wdl/mutect2.wdl b/scripts/mutect2_wdl/mutect2.wdl index f63f97c2f42..015f09eaf5d 100755 --- a/scripts/mutect2_wdl/mutect2.wdl +++ b/scripts/mutect2_wdl/mutect2.wdl @@ -228,7 +228,7 @@ workflow Mutect2 { call LearnReadOrientationModel { input: alt_tables = CollectF1R2Counts.alt_table, - ref_histograms = CollectF1R2Counts.ref_histogram, + ref_histograms = CollectF1R2Counts.ref_histograms, alt_histograms = CollectF1R2Counts.alt_histograms, gatk_override = gatk_override, gatk_docker = gatk_docker, @@ -611,7 +611,7 @@ task M2 { gatk --java-options "-Xmx${command_mem}m" GetSampleName -R ${ref_fasta} -I ${tumor_bam} -O tumor_name.txt -encode tumor_command_line="-I ${tumor_bam} -tumor `cat tumor_name.txt`" - if [[ -f "${normal_bam}" ]]; then + if [[ ! -z "${normal_bam}" ]]; then gatk --java-options "-Xmx${command_mem}m" GetSampleName -R ${ref_fasta} -I ${normal_bam} -O normal_name.txt -encode normal_command_line="-I ${normal_bam} -normal `cat normal_name.txt`" fi @@ -634,11 +634,11 @@ task M2 { touch tumor-pileups.table touch normal-pileups.table - if [[ -f "${variants_for_contamination}" ]]; then + if [[ ! -z "${variants_for_contamination}" ]]; then gatk --java-options "-Xmx${command_mem}m" GetPileupSummaries -R ${ref_fasta} -I ${tumor_bam} ${"--interval-set-rule INTERSECTION -L " + intervals} \ -V ${variants_for_contamination} -L ${variants_for_contamination} -O tumor-pileups.table - if [[ -f ${normal_bam} ]]; then + if [[ ! -z ${normal_bam} ]]; then gatk --java-options "-Xmx${command_mem}m" GetPileupSummaries -R ${ref_fasta} -I ${normal_bam} ${"--interval-set-rule INTERSECTION -L " + intervals} \ -V ${variants_for_contamination} -L ${variants_for_contamination} -O normal-pileups.table fi @@ -915,7 +915,7 @@ task CollectF1R2Counts { output { File alt_table = glob("*-alt.tsv")[0] - File ref_histogram = glob("*-ref.metrics")[0] + File ref_histograms = glob("*-ref.metrics")[0] File alt_histograms = glob("*-alt-depth1.metrics")[0] String tumor_sample = read_string("tumor_name.txt") } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModel.java index cb79b08b27a..5db70a10973 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModel.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModel.java @@ -76,12 +76,10 @@ public class LearnReadOrientationModel extends CommandLineProgram { protected void onStartup(){ Utils.validate(refHistogramFiles.size() == altDataTables.size() && altDataTables.size() == altHistogramFiles.size(), "The numbers of ref histograms, alt histograms, and alt table files must all be the same"); - START HERE CHECK HEADER - - refHistograms = sumHistogramsFromFiles(refHistogramFiles); + refHistograms = sumHistogramsFromFiles(refHistogramFiles, true); if (altHistogramFiles != null) { - altHistograms = sumHistogramsFromFiles(altHistogramFiles); + altHistograms = sumHistogramsFromFiles(altHistogramFiles, false); } else { altHistograms = Collections.emptyList(); } @@ -258,16 +256,29 @@ public static MetricsFile readMetricsFile(File file){ return metricsFile; } - public static List> sumHistogramsFromFiles(final List files){ + public static List> sumHistogramsFromFiles(final List files, final boolean ref){ Utils.nonNull(files, "files may not be null"); - final List> histogramList = readMetricsFile(files.get(0)).getAllHistograms(); + if (ref){ + Utils.validate(histogramList.size() == F1R2FilterConstants.NUM_KMERS, + "The list of ref histograms need to include all kmers as enforced by CollectF1R2Counts"); + Utils.validate(histogramList.stream().allMatch(h -> F1R2FilterConstants.ALL_KMERS.contains(h.getValueLabel())), + "a histogram contains an unsupported, non-kmer header"); + } else { + Utils.validate(histogramList.size() == F1R2FilterConstants.NUM_KMERS * F1R2FilterConstants.numAltHistogramsPerContext, + "The list of alt histograms missing some (kmer, alt allele, f1r2) triple"); + + } + + + + for (int i = 1; i < files.size(); i++){ final List> ithHistograms = readMetricsFile(files.get(i)).getAllHistograms(); for (final Histogram jthHistogram : ithHistograms){ final String refContext = jthHistogram.getValueLabel(); final Optional> hist = histogramList.stream().filter(h -> h.getValueLabel().equals(refContext)).findAny(); - Utils.validate(hist.isPresent(),"Reference histogram is empty, which violates the invariant enforced by CollectF1R2Counts"); + Utils.validate(hist.isPresent(),"Missing histogram header for: " + refContext); hist.get().addHistogram(jthHistogram); } @@ -277,7 +288,6 @@ public static List> sumHistogramsFromFiles(final List f public static Pair> gatherAltSiteRecords(final List tables){ final int defaultInitialListSize = 1_000_000; - final Pair> sampleAndRecords = AltSiteRecord.readAltSiteRecords(tables.get(0), defaultInitialListSize); final String sample = sampleAndRecords.getLeft(); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModelUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModelUnitTest.java index ac5f158390d..128e159b5ae 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModelUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModelUnitTest.java @@ -55,8 +55,8 @@ public void testCombineHistograms(){ CollectF1R2Counts.class.getSimpleName()); } - final List> ref = LearnReadOrientationModel.sumHistogramsFromFiles(Arrays.asList(refMetricsDir.listFiles())); - final List> alt = LearnReadOrientationModel.sumHistogramsFromFiles(Arrays.asList(altMetricsDir.listFiles())); + final List> ref = LearnReadOrientationModel.sumHistogramsFromFiles(Arrays.asList(refMetricsDir.listFiles()), true); + final List> alt = LearnReadOrientationModel.sumHistogramsFromFiles(Arrays.asList(altMetricsDir.listFiles()), false); final List altSites = LearnReadOrientationModel.gatherAltSiteRecords(Arrays.asList(altTableDir.listFiles())).getRight(); final List> refTruth = LearnReadOrientationModel.readMetricsFile(refHistTruthFile).getAllHistograms(); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/readorientation/ReadOrientationModelIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/readorientation/ReadOrientationModelIntegrationTest.java index 2b3c2a107ff..35ca2042a4e 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/readorientation/ReadOrientationModelIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/readorientation/ReadOrientationModelIntegrationTest.java @@ -1,6 +1,5 @@ package org.broadinstitute.hellbender.tools.walkers.readorientation; -import htsjdk.samtools.util.Histogram; import htsjdk.variant.variantcontext.VariantContext; import org.apache.commons.lang3.tuple.ImmutableTriple; import org.apache.commons.lang3.tuple.Triple; @@ -13,15 +12,11 @@ import org.broadinstitute.hellbender.tools.walkers.SplitIntervals; import org.broadinstitute.hellbender.tools.walkers.annotator.ReferenceBases; import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.FilterMutectCalls; -import org.broadinstitute.hellbender.tools.walkers.contamination.GatherPileupSummaries; -import org.broadinstitute.hellbender.tools.walkers.contamination.GetPileupSummaries; import org.broadinstitute.hellbender.tools.walkers.mutect.M2ArgumentCollection; -import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.M2FiltersArgumentCollection; import org.broadinstitute.hellbender.tools.walkers.mutect.Mutect2; import org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.testng.Assert; -import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -30,7 +25,6 @@ import java.nio.file.Files; import java.nio.file.Paths; import java.util.*; -import java.util.stream.IntStream; import java.util.stream.StreamSupport; public class ReadOrientationModelIntegrationTest extends CommandLineProgramTest { @@ -85,13 +79,13 @@ public void testOnRealBam(final int scatterCount) throws IOException { args.addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, priorTable.getAbsolutePath()); final File[] refMetricsFiles = refMetricsDir.listFiles(); Arrays.stream(refMetricsFiles).forEach(f -> - args.addArgument(CollectF1R2Counts.REF_SITE_METRICS_SHORT_NAME, f.getAbsolutePath())); + args.addArgument(CollectF1R2Counts.REF_SITE_METRICS_LONG_NAME, f.getAbsolutePath())); final File[] altMetricsFiles = altMetricsDir.listFiles(); Arrays.stream(altMetricsFiles).forEach(f -> - args.addArgument(CollectF1R2Counts.ALT_DEPTH1_HISTOGRAM_SHORT_NAME, f.getAbsolutePath())); + args.addArgument(CollectF1R2Counts.ALT_DEPTH1_HISTOGRAM_LONG_NAME, f.getAbsolutePath())); final File[] altTableFiles = altTableDir.listFiles(); Arrays.stream(altTableFiles).forEach(f -> - args.addArgument(CollectF1R2Counts.ALT_DATA_TABLE_SHORT_NAME, f.getAbsolutePath())); + args.addArgument(CollectF1R2Counts.ALT_DATA_TABLE_LONG_NAME, f.getAbsolutePath())); runCommandLine(args.getArgsList(), LearnReadOrientationModel.class.getSimpleName()); final ArtifactPriorCollection artifactPriorCollection = ArtifactPriorCollection.readArtifactPriors(priorTable);