Skip to content

Commit

Permalink
rest of comment edits
Browse files Browse the repository at this point in the history
  • Loading branch information
takutosato committed Mar 14, 2019
1 parent 5ca8d0b commit 33f34b9
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 24 deletions.
10 changes: 5 additions & 5 deletions scripts/mutect2_wdl/mutect2.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ workflow Mutect2 {
call LearnReadOrientationModel {
input:
alt_tables = CollectF1R2Counts.alt_table,
ref_histograms = CollectF1R2Counts.ref_histogram,
ref_histograms = CollectF1R2Counts.ref_histograms,
alt_histograms = CollectF1R2Counts.alt_histograms,
gatk_override = gatk_override,
gatk_docker = gatk_docker,
Expand Down Expand Up @@ -611,7 +611,7 @@ task M2 {
gatk --java-options "-Xmx${command_mem}m" GetSampleName -R ${ref_fasta} -I ${tumor_bam} -O tumor_name.txt -encode
tumor_command_line="-I ${tumor_bam} -tumor `cat tumor_name.txt`"

if [[ -f "${normal_bam}" ]]; then
if [[ ! -z "${normal_bam}" ]]; then
gatk --java-options "-Xmx${command_mem}m" GetSampleName -R ${ref_fasta} -I ${normal_bam} -O normal_name.txt -encode
normal_command_line="-I ${normal_bam} -normal `cat normal_name.txt`"
fi
Expand All @@ -634,11 +634,11 @@ task M2 {
touch tumor-pileups.table
touch normal-pileups.table

if [[ -f "${variants_for_contamination}" ]]; then
if [[ ! -z "${variants_for_contamination}" ]]; then
gatk --java-options "-Xmx${command_mem}m" GetPileupSummaries -R ${ref_fasta} -I ${tumor_bam} ${"--interval-set-rule INTERSECTION -L " + intervals} \
-V ${variants_for_contamination} -L ${variants_for_contamination} -O tumor-pileups.table

if [[ -f ${normal_bam} ]]; then
if [[ ! -z ${normal_bam} ]]; then
gatk --java-options "-Xmx${command_mem}m" GetPileupSummaries -R ${ref_fasta} -I ${normal_bam} ${"--interval-set-rule INTERSECTION -L " + intervals} \
-V ${variants_for_contamination} -L ${variants_for_contamination} -O normal-pileups.table
fi
Expand Down Expand Up @@ -915,7 +915,7 @@ task CollectF1R2Counts {

output {
File alt_table = glob("*-alt.tsv")[0]
File ref_histogram = glob("*-ref.metrics")[0]
File ref_histograms = glob("*-ref.metrics")[0]
File alt_histograms = glob("*-alt-depth1.metrics")[0]
String tumor_sample = read_string("tumor_name.txt")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,10 @@ public class LearnReadOrientationModel extends CommandLineProgram {
protected void onStartup(){
Utils.validate(refHistogramFiles.size() == altDataTables.size() && altDataTables.size() == altHistogramFiles.size(),
"The numbers of ref histograms, alt histograms, and alt table files must all be the same");
START HERE CHECK HEADER

refHistograms = sumHistogramsFromFiles(refHistogramFiles);
refHistograms = sumHistogramsFromFiles(refHistogramFiles, true);

if (altHistogramFiles != null) {
altHistograms = sumHistogramsFromFiles(altHistogramFiles);
altHistograms = sumHistogramsFromFiles(altHistogramFiles, false);
} else {
altHistograms = Collections.emptyList();
}
Expand Down Expand Up @@ -258,16 +256,29 @@ public static MetricsFile<?, Integer> readMetricsFile(File file){
return metricsFile;
}

public static List<Histogram<Integer>> sumHistogramsFromFiles(final List<File> files){
public static List<Histogram<Integer>> sumHistogramsFromFiles(final List<File> files, final boolean ref){
Utils.nonNull(files, "files may not be null");

final List<Histogram<Integer>> histogramList = readMetricsFile(files.get(0)).getAllHistograms();
if (ref){
Utils.validate(histogramList.size() == F1R2FilterConstants.NUM_KMERS,
"The list of ref histograms need to include all kmers as enforced by CollectF1R2Counts");
Utils.validate(histogramList.stream().allMatch(h -> F1R2FilterConstants.ALL_KMERS.contains(h.getValueLabel())),
"a histogram contains an unsupported, non-kmer header");
} else {
Utils.validate(histogramList.size() == F1R2FilterConstants.NUM_KMERS * F1R2FilterConstants.numAltHistogramsPerContext,
"The list of alt histograms missing some (kmer, alt allele, f1r2) triple");

}




for (int i = 1; i < files.size(); i++){
final List<Histogram<Integer>> ithHistograms = readMetricsFile(files.get(i)).getAllHistograms();
for (final Histogram<Integer> jthHistogram : ithHistograms){
final String refContext = jthHistogram.getValueLabel();
final Optional<Histogram<Integer>> hist = histogramList.stream().filter(h -> h.getValueLabel().equals(refContext)).findAny();
Utils.validate(hist.isPresent(),"Reference histogram is empty, which violates the invariant enforced by CollectF1R2Counts");
Utils.validate(hist.isPresent(),"Missing histogram header for: " + refContext);

hist.get().addHistogram(jthHistogram);
}
Expand All @@ -277,7 +288,6 @@ public static List<Histogram<Integer>> sumHistogramsFromFiles(final List<File> f

public static Pair<String, List<AltSiteRecord>> gatherAltSiteRecords(final List<File> tables){
final int defaultInitialListSize = 1_000_000;

final Pair<String, List<AltSiteRecord>> sampleAndRecords = AltSiteRecord.readAltSiteRecords(tables.get(0), defaultInitialListSize);
final String sample = sampleAndRecords.getLeft();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ public void testCombineHistograms(){
CollectF1R2Counts.class.getSimpleName());
}

final List<Histogram<Integer>> ref = LearnReadOrientationModel.sumHistogramsFromFiles(Arrays.asList(refMetricsDir.listFiles()));
final List<Histogram<Integer>> alt = LearnReadOrientationModel.sumHistogramsFromFiles(Arrays.asList(altMetricsDir.listFiles()));
final List<Histogram<Integer>> ref = LearnReadOrientationModel.sumHistogramsFromFiles(Arrays.asList(refMetricsDir.listFiles()), true);
final List<Histogram<Integer>> alt = LearnReadOrientationModel.sumHistogramsFromFiles(Arrays.asList(altMetricsDir.listFiles()), false);
final List<AltSiteRecord> altSites = LearnReadOrientationModel.gatherAltSiteRecords(Arrays.asList(altTableDir.listFiles())).getRight();

final List<Histogram<Integer>> refTruth = LearnReadOrientationModel.readMetricsFile(refHistTruthFile).getAllHistograms();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.broadinstitute.hellbender.tools.walkers.readorientation;

import htsjdk.samtools.util.Histogram;
import htsjdk.variant.variantcontext.VariantContext;
import org.apache.commons.lang3.tuple.ImmutableTriple;
import org.apache.commons.lang3.tuple.Triple;
Expand All @@ -13,15 +12,11 @@
import org.broadinstitute.hellbender.tools.walkers.SplitIntervals;
import org.broadinstitute.hellbender.tools.walkers.annotator.ReferenceBases;
import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.FilterMutectCalls;
import org.broadinstitute.hellbender.tools.walkers.contamination.GatherPileupSummaries;
import org.broadinstitute.hellbender.tools.walkers.contamination.GetPileupSummaries;
import org.broadinstitute.hellbender.tools.walkers.mutect.M2ArgumentCollection;
import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.M2FiltersArgumentCollection;
import org.broadinstitute.hellbender.tools.walkers.mutect.Mutect2;
import org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

Expand All @@ -30,7 +25,6 @@
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
import java.util.stream.IntStream;
import java.util.stream.StreamSupport;

public class ReadOrientationModelIntegrationTest extends CommandLineProgramTest {
Expand Down Expand Up @@ -85,13 +79,13 @@ public void testOnRealBam(final int scatterCount) throws IOException {
args.addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, priorTable.getAbsolutePath());
final File[] refMetricsFiles = refMetricsDir.listFiles();
Arrays.stream(refMetricsFiles).forEach(f ->
args.addArgument(CollectF1R2Counts.REF_SITE_METRICS_SHORT_NAME, f.getAbsolutePath()));
args.addArgument(CollectF1R2Counts.REF_SITE_METRICS_LONG_NAME, f.getAbsolutePath()));
final File[] altMetricsFiles = altMetricsDir.listFiles();
Arrays.stream(altMetricsFiles).forEach(f ->
args.addArgument(CollectF1R2Counts.ALT_DEPTH1_HISTOGRAM_SHORT_NAME, f.getAbsolutePath()));
args.addArgument(CollectF1R2Counts.ALT_DEPTH1_HISTOGRAM_LONG_NAME, f.getAbsolutePath()));
final File[] altTableFiles = altTableDir.listFiles();
Arrays.stream(altTableFiles).forEach(f ->
args.addArgument(CollectF1R2Counts.ALT_DATA_TABLE_SHORT_NAME, f.getAbsolutePath()));
args.addArgument(CollectF1R2Counts.ALT_DATA_TABLE_LONG_NAME, f.getAbsolutePath()));
runCommandLine(args.getArgsList(), LearnReadOrientationModel.class.getSimpleName());

final ArtifactPriorCollection artifactPriorCollection = ArtifactPriorCollection.readArtifactPriors(priorTable);
Expand Down

0 comments on commit 33f34b9

Please sign in to comment.