Skip to content

Commit

Permalink
Command line fun
Browse files Browse the repository at this point in the history
  • Loading branch information
davidbenjamin committed Dec 5, 2018
1 parent 25da765 commit ce8051c
Show file tree
Hide file tree
Showing 9 changed files with 120 additions and 85 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,11 @@ public abstract class AssemblyBasedCallerArgumentCollection extends StandardCall
@ArgumentCollection
public AssemblyRegionTrimmerArgumentCollection assemblyRegionTrimmerArgs = new AssemblyRegionTrimmerArgumentCollection();

protected boolean useMutectAssemblerArgumentCollection() { return false; }

@ArgumentCollection
public ReadThreadingAssemblerArgumentCollection assemblerArgs = new ReadThreadingAssemblerArgumentCollection();
public ReadThreadingAssemblerArgumentCollection assemblerArgs = useMutectAssemblerArgumentCollection() ?
new MutectReadThreadingAssemblerArgumentCollection() : new HaplotypeCallerReadThreadingAssemblerArgumentCollection();

@ArgumentCollection
public LikelihoodEngineArgumentCollection likelihoodArgs = new LikelihoodEngineArgumentCollection();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,20 +189,10 @@ public static ReadLikelihoodCalculationEngine createLikelihoodCalculationEngine(

public static ReadThreadingAssembler createReadThreadingAssembler(final AssemblyBasedCallerArgumentCollection args) {
final ReadThreadingAssemblerArgumentCollection rtaac = args.assemblerArgs;
final ReadThreadingAssembler assemblyEngine = new ReadThreadingAssembler(rtaac.maxNumHaplotypesInPopulation, rtaac.kmerSizes,
rtaac.dontIncreaseKmerSizesForCycles, rtaac.allowNonUniqueKmersInRef, rtaac.numPruningSamples, rtaac.minPruneFactor,
rtaac.useAdaptivePruning, rtaac.initialErrorRateForPruning, rtaac.pruningLog10OddsThreshold, rtaac.maxUnprunedVariants);
assemblyEngine.setErrorCorrectKmers(rtaac.errorCorrectKmers);
final ReadThreadingAssembler assemblyEngine = rtaac.makeReadThreadingAssembler();
assemblyEngine.setDebug(args.debug);
assemblyEngine.setDebugGraphTransformations(rtaac.debugGraphTransformations);
assemblyEngine.setRecoverDanglingBranches(!rtaac.doNotRecoverDanglingBranches);
assemblyEngine.setMinDanglingBranchLength(rtaac.minDanglingBranchLength);
assemblyEngine.setMinBaseQualityToUseInAssembly(args.minBaseQualityScore);

if ( rtaac.graphOutput != null ) {
assemblyEngine.setGraphWriter(new File(rtaac.graphOutput));
}

return assemblyEngine;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ private void validateAndInitializeArgs() {
hcArgs.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(hcArgs.CONTAMINATION_FRACTION_FILE, hcArgs.CONTAMINATION_FRACTION, sampleSet, logger));
}

if ( hcArgs.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES && hcArgs.assemblerArgs.consensusMode ) {
if ( hcArgs.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES && hcArgs.assemblerArgs.consensusMode() ) {
throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode at the same time. Please choose one or the other.");
}

Expand Down Expand Up @@ -604,7 +604,7 @@ public List<VariantContext> callRegion(final AssemblyRegion region, final Featur
assemblyResult.getPaddedReferenceLoc(),
regionForGenotyping.getSpan(),
features,
(hcArgs.assemblerArgs.consensusMode ? Collections.<VariantContext>emptyList() : givenAlleles),
(hcArgs.assemblerArgs.consensusMode() ? Collections.<VariantContext>emptyList() : givenAlleles),
emitReferenceConfidence(),
hcArgs.maxMnpDistance,
readsHeader,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package org.broadinstitute.hellbender.tools.walkers.haplotypecaller;

import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.Hidden;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;

import java.io.File;

public class HaplotypeCallerReadThreadingAssemblerArgumentCollection extends ReadThreadingAssemblerArgumentCollection {
/**
* A single edge multiplicity cutoff for pruning doesn't work in samples with variable depths, for example exomes
* and RNA. This parameter enables the probabilistic algorithm for pruning the assembly graph that considers the
* likelihood that each chain in the graph comes from real variation.
*/
@Advanced
@Argument(fullName="adaptive-pruning", doc = "Use Mutect2's adaptive graph pruning algorithm", optional = true)
public boolean useAdaptivePruning = false;

/**
* By default, the read threading assembler will attempt to recover dangling heads and tails. See the `minDanglingBranchLength` argument documentation for more details.
*/
@Hidden
@Argument(fullName="do-not-recover-dangling-branches", doc="Disable dangling head and tail recovery", optional = true)
public boolean doNotRecoverDanglingBranches = false;

/**
* As of version 3.3, this argument is no longer needed because dangling end recovery is now the default behavior. See GATK 3.3 release notes for more details.
*/
@Deprecated
@Argument(fullName="recover-dangling-heads", doc="This argument is deprecated since version 3.3", optional = true)
public boolean DEPRECATED_RecoverDanglingHeads = false;

/**
* This argument is specifically intended for 1000G consensus analysis mode. Setting this flag will inject all
* provided alleles to the assembly graph but will not forcibly genotype all of them.
*/
@Advanced
@Argument(fullName="consensus", doc="1000G consensus mode", optional = true)
public boolean consensusMode = false;

@Override
public ReadThreadingAssembler makeReadThreadingAssembler() {
final ReadThreadingAssembler assemblyEngine = new ReadThreadingAssembler(maxNumHaplotypesInPopulation, kmerSizes,
dontIncreaseKmerSizesForCycles, allowNonUniqueKmersInRef, numPruningSamples, useAdaptivePruning ? 0 : minPruneFactor,
useAdaptivePruning, initialErrorRateForPruning, pruningLog10OddsThreshold, maxUnprunedVariants);
assemblyEngine.setDebugGraphTransformations(debugGraphTransformations);
assemblyEngine.setRecoverDanglingBranches(!doNotRecoverDanglingBranches);
assemblyEngine.setMinDanglingBranchLength(minDanglingBranchLength);

if ( graphOutput != null ) {
assemblyEngine.setGraphWriter(new File(graphOutput));
}

return assemblyEngine;
}

@Override
public boolean consensusMode() { return consensusMode; }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package org.broadinstitute.hellbender.tools.walkers.haplotypecaller;

import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;

import java.io.File;

public class MutectReadThreadingAssemblerArgumentCollection extends ReadThreadingAssemblerArgumentCollection {

/**
* A single edge multiplicity cutoff for pruning doesn't work in samples with variable depths, for example exomes
* and RNA. This parameter disables the probabilistic algorithm for pruning the assembly graph that considers the
* likelihood that each chain in the graph comes from real variation, and instead uses a simple multiplicity cutoff.
*/
@Advanced
@Argument(fullName="disable-adaptive-pruning", doc = "Disable the adaptive algorithm for pruning paths in the graph", optional = true)
public boolean disableAdaptivePruning = false;

@Override
public ReadThreadingAssembler makeReadThreadingAssembler() {
final ReadThreadingAssembler assemblyEngine = new ReadThreadingAssembler(maxNumHaplotypesInPopulation, kmerSizes,
dontIncreaseKmerSizesForCycles, allowNonUniqueKmersInRef, numPruningSamples, disableAdaptivePruning ? minPruneFactor : 0,
!disableAdaptivePruning, initialErrorRateForPruning, pruningLog10OddsThreshold, maxUnprunedVariants);
assemblyEngine.setDebugGraphTransformations(debugGraphTransformations);
assemblyEngine.setRecoverDanglingBranches(true);
assemblyEngine.setMinDanglingBranchLength(minDanglingBranchLength);

if ( graphOutput != null ) {
assemblyEngine.setGraphWriter(new File(graphOutput));
}

return assemblyEngine;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@
import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.Hidden;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;

import java.io.Serializable;
import java.util.List;

/**
* Set of arguments related to the {@link org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler}
*/
public final class ReadThreadingAssemblerArgumentCollection implements Serializable {
public abstract class ReadThreadingAssemblerArgumentCollection implements Serializable {
private static final long serialVersionUID = 1L;

// -----------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -48,20 +49,6 @@ public final class ReadThreadingAssemblerArgumentCollection implements Serializa
@Argument(fullName="num-pruning-samples", doc="Number of samples that must pass the minPruning threshold", optional = true)
public int numPruningSamples = 1;

/**
* As of version 3.3, this argument is no longer needed because dangling end recovery is now the default behavior. See GATK 3.3 release notes for more details.
*/
@Deprecated
@Argument(fullName="recover-dangling-heads", doc="This argument is deprecated since version 3.3", optional = true)
public boolean DEPRECATED_RecoverDanglingHeads = false;

/**
* By default, the read threading assembler will attempt to recover dangling heads and tails. See the `minDanglingBranchLength` argument documentation for more details.
*/
@Hidden
@Argument(fullName="do-not-recover-dangling-branches", doc="Disable dangling head and tail recovery", optional = true)
public boolean doNotRecoverDanglingBranches = false;

/**
* When constructing the assembly graph we are often left with "dangling" branches. The assembly engine attempts to rescue these branches
* by merging them back into the main graph. This argument describes the minimum length of a dangling branch needed for the engine to
Expand All @@ -71,13 +58,7 @@ public final class ReadThreadingAssemblerArgumentCollection implements Serializa
@Argument(fullName="min-dangling-branch-length", doc="Minimum length of a dangling branch to attempt recovery", optional = true)
public int minDanglingBranchLength = 4;

/**
* This argument is specifically intended for 1000G consensus analysis mode. Setting this flag will inject all
* provided alleles to the assembly graph but will not forcibly genotype all of them.
*/
@Advanced
@Argument(fullName="consensus", doc="1000G consensus mode", optional = true)
public boolean consensusMode = false;


/**
* The assembly graph can be quite complex, and could imply a very large number of possible haplotypes. Each haplotype
Expand All @@ -91,13 +72,6 @@ public final class ReadThreadingAssemblerArgumentCollection implements Serializa
@Argument(fullName="max-num-haplotypes-in-population", doc="Maximum number of haplotypes to consider for your population", optional = true)
public int maxNumHaplotypesInPopulation = 128;

/**
* Enabling this argument may cause fundamental problems with the assembly graph itself.
*/
@Hidden
@Argument(fullName="error-correct-kmers", doc = "Use an exploratory algorithm to error correct the kmers used during assembly", optional = true)
public boolean errorCorrectKmers = false;

/**
* Paths with fewer supporting kmers than the specified threshold will be pruned from the graph.
*
Expand All @@ -111,15 +85,6 @@ public final class ReadThreadingAssemblerArgumentCollection implements Serializa
@Argument(fullName="min-pruning", doc = "Minimum support to not prune paths in the graph", optional = true)
public int minPruneFactor = 2;

/**
* A single edge multiplicity cutoff for pruning doesn't work in samples with variable depths, for example exomes
* and RNA. This parameter activates a probabilistic algorithm for pruning the assembly graph that considers the
* likelihood that each chain in the graph comes from real variation.
*/
@Advanced
@Argument(fullName="adaptive-pruning", doc = "Use an adaptive algorithm for pruning paths in the graph", optional = true)
public boolean useAdaptivePruning = false;

/**
* Initial base error rate guess for the probabilistic adaptive pruning model. Results are not very sensitive to this
* parameter because it is only a starting point from which the algorithm discovers the true error rate.
Expand Down Expand Up @@ -168,4 +133,8 @@ public final class ReadThreadingAssemblerArgumentCollection implements Serializa
@Hidden
@Argument(fullName="min-observations-for-kmer-to-be-solid", doc = "A k-mer must be seen at least these times for it considered to be solid", optional = true)
public int minObservationsForKmerToBeSolid = 20;

public abstract ReadThreadingAssembler makeReadThreadingAssembler();

public boolean consensusMode() { return false; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,6 @@ public final class ReadThreadingAssembler {
private int pruneFactor;
private final ChainPruner<MultiDeBruijnVertex, MultiSampleEdge> chainPruner;

protected boolean errorCorrectKmers = false;

private File debugGraphOutputPath = null; //Where to write debug graphs, if unset it defaults to the current working dir
private File graphOutputPath = null;

Expand Down Expand Up @@ -572,18 +570,6 @@ private void printGraphs(final List<SeqGraph> graphs) {
//
// -----------------------------------------------------------------------------------------------

public int getPruneFactor() {
return pruneFactor;
}

public boolean shouldErrorCorrectKmers() {
return errorCorrectKmers;
}

public void setErrorCorrectKmers(boolean errorCorrectKmers) {
this.errorCorrectKmers = errorCorrectKmers;
}

public void setGraphWriter(File graphOutputPath) {
this.graphOutputPath = graphOutputPath;
}
Expand All @@ -610,10 +596,6 @@ public boolean isDebugGraphTransformations() {

public boolean isRecoverDanglingBranches() { return recoverDanglingBranches; }

public void setPruneFactor(final int pruneFactor) {
this.pruneFactor = pruneFactor;
}

public void setDebugGraphTransformations(final boolean debugGraphTransformations) {
this.debugGraphTransformations = debugGraphTransformations;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ public class M2ArgumentCollection extends AssemblyBasedCallerArgumentCollection
public static final double DEFAULT_INITIAL_LOD = 2.0;
public static final double DEFAULT_MITO_INITIAL_LOD = 0;

@Override
protected boolean useMutectAssemblerArgumentCollection() { return true; }

//TODO: HACK ALERT HACK ALERT HACK ALERT
//TODO: GATK4 does not yet have a way to tag inputs, eg -I:tumor tumor.bam -I:normal normal.bam,
//TODO: so for now we require the user to specify bams *both* as inputs, with -I tumor.bam -I normal.bam
Expand Down
Loading

0 comments on commit ce8051c

Please sign in to comment.