Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GnarlyGenotyper tech debt #6075

Merged
merged 6 commits into from
Aug 7, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -310,9 +310,9 @@ private <T extends Feature> List<T> subsetToStartPosition(final Collection<T> fe
* @param interval genomic interval for the result. Typically, this would be the interval of the variant. Never {@link null}.
* @param featureQueryLookahead When querying FeatureDataSources, cache this many extra bases of context beyond
* the end of query intervals in anticipation of future queries. Must be >= 0. If uncertain, use zero.
* @param cloudPrefetchBuffer See {@link FeatureManager#FeatureManager(CommandLineProgram, int, int, int, Path)} If uncertain, use zero.
* @param cloudIndexPrefetchBuffer See {@link FeatureManager#FeatureManager(CommandLineProgram, int, int, int, Path)} If uncertain, use zero.
* @param reference See {@link FeatureManager#FeatureManager(CommandLineProgram, int, int, int, Path)} If uncertain, use {@code null}.
* @param cloudPrefetchBuffer See {@link FeatureManager#FeatureManager(CommandLineProgram, int, int, int, org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBOptions)} If uncertain, use zero.
* @param cloudIndexPrefetchBuffer See {@link FeatureManager#FeatureManager(CommandLineProgram, int, int, int, org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBOptions)} If uncertain, use zero.
* @param reference See {@link FeatureManager#FeatureManager(CommandLineProgram, int, int, int, org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBOptions)} If uncertain, use {@code null}.
*/
@VisibleForTesting
public static FeatureContext createFeatureContextForTesting(final Map<FeatureInput<? extends Feature>, Class<? extends Feature>> featureInputsWithType, final String dummyToolInstanceName,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.config.ConfigFactory;
import org.broadinstitute.hellbender.utils.config.GATKConfig;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;

import java.io.File;
import java.lang.reflect.Field;
Expand Down Expand Up @@ -140,35 +139,34 @@ public FeatureManager(final CommandLineProgram toolInstance, final int featureQu
* Create a FeatureManager given a CommandLineProgram tool instance, discovering all FeatureInput
* arguments in the tool and creating query-able FeatureDataSources for them. Allows control over
* how much caching is performed by each {@link FeatureDataSource}.
*
* @param toolInstance Instance of the tool to be run (potentially containing one or more FeatureInput arguments)
* @param toolInstance Instance of the tool to be run (potentially containing one or more FeatureInput arguments)
* Must have undergone command-line argument parsing and argument value injection already.
* @param featureQueryLookahead When querying FeatureDataSources, cache this many extra bases of context beyond
* the end of query intervals in anticipation of future queries (>= 0).
* @param cloudPrefetchBuffer MB size of caching/prefetching wrapper for the data, if on Google Cloud (0 to disable).
* @param cloudIndexPrefetchBuffer MB size of caching/prefetching wrapper for the index, if on Google Cloud (0 to disable).
* @param reference reference to use when opening feature files, may be null, currently only used by Genomics DB
* @param gdbOptions settings for GenomicsDB to use when reading from a GenomicsDB workspace
*
*/
public FeatureManager(final CommandLineProgram toolInstance, final int featureQueryLookahead, final int cloudPrefetchBuffer, final int cloudIndexPrefetchBuffer, final Path reference) {
public FeatureManager(final CommandLineProgram toolInstance, final int featureQueryLookahead, final int cloudPrefetchBuffer, final int cloudIndexPrefetchBuffer, final GenomicsDBOptions gdbOptions) {
this.toolInstanceSimpleClassName = toolInstance.getClass().getSimpleName();
this.featureSources = new LinkedHashMap<>();

initializeFeatureSources(featureQueryLookahead, toolInstance, cloudPrefetchBuffer, cloudIndexPrefetchBuffer, reference);
initializeFeatureSources(featureQueryLookahead, toolInstance, cloudPrefetchBuffer, cloudIndexPrefetchBuffer, gdbOptions);
}

/**
* Same as {@link FeatureManager#FeatureManager(CommandLineProgram, int, int, int, Path)}, except used when the
* Same as {@link FeatureManager#FeatureManager(CommandLineProgram, int, int, int, GenomicsDBOptions)}, except used when the
* FeatureInputs (and associated types) are known.
*
* This constructor should only be used in test code.
*
* @param featureInputsToTypeMap {@link Map} of a {@link FeatureInput} to the output type that must extend {@link Feature}. Never {@code null}
* @param toolInstanceName See {@link FeatureManager#FeatureManager(CommandLineProgram, int, int, int, Path)}
* @param featureQueryLookahead See {@link FeatureManager#FeatureManager(CommandLineProgram, int, int, int, Path)}
* @param cloudPrefetchBuffer See {@link FeatureManager#FeatureManager(CommandLineProgram, int, int, int, Path)}
* @param cloudIndexPrefetchBuffer See {@link FeatureManager#FeatureManager(CommandLineProgram, int, int, int, Path)}
* @param reference See {@link FeatureManager#FeatureManager(CommandLineProgram, int, int, int, Path)}
* @param toolInstanceName See {@link FeatureManager#FeatureManager(CommandLineProgram, int, int, int, GenomicsDBOptions)}
* @param featureQueryLookahead See {@link FeatureManager#FeatureManager(CommandLineProgram, int, int, int, GenomicsDBOptions)}
* @param cloudPrefetchBuffer See {@link FeatureManager#FeatureManager(CommandLineProgram, int, int, int, GenomicsDBOptions)}
* @param cloudIndexPrefetchBuffer See {@link FeatureManager#FeatureManager(CommandLineProgram, int, int, int, GenomicsDBOptions)}
* @param reference See {@link FeatureManager#FeatureManager(CommandLineProgram, int, int, int, GenomicsDBOptions)}
*/
@VisibleForTesting
FeatureManager(final Map<FeatureInput<? extends Feature>, Class<? extends Feature>> featureInputsToTypeMap, final String toolInstanceName, final int featureQueryLookahead, final int cloudPrefetchBuffer, final int cloudIndexPrefetchBuffer, final Path reference) {
Expand All @@ -193,7 +191,7 @@ public FeatureManager(final CommandLineProgram toolInstance, final int featureQu
* @param cloudIndexPrefetchBuffer MB size of caching/prefetching wrapper for the index, if on Google Cloud (0 to disable).
*/
@SuppressWarnings({"unchecked", "rawtypes"})
private void initializeFeatureSources( final int featureQueryLookahead, final CommandLineProgram toolInstance, final int cloudPrefetchBuffer, final int cloudIndexPrefetchBuffer, final Path reference) {
private void initializeFeatureSources( final int featureQueryLookahead, final CommandLineProgram toolInstance, final int cloudPrefetchBuffer, final int cloudIndexPrefetchBuffer, final GenomicsDBOptions gdbOptions) {

// Discover all arguments of type FeatureInput (or Collections thereof) in our tool's class hierarchy
// (and associated ArgumentCollections). Arguments not specified by the user on the command line will
Expand All @@ -208,7 +206,7 @@ private void initializeFeatureSources( final int featureQueryLookahead, final Co
if ( featureInput != null ) {
final Class<? extends Feature> featureType = getFeatureTypeForFeatureInputField(featureArgument.getKey());
addToFeatureSources(featureQueryLookahead, featureInput, featureType, cloudPrefetchBuffer, cloudIndexPrefetchBuffer,
toolInstance instanceof VariantWalker ? ((VariantWalker) toolInstance).getGenomicsDBOptions() : null);
gdbOptions);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public boolean requiresFeatures(){
@Override
void initializeFeatures() {
features = new FeatureManager(this, FeatureDataSource.DEFAULT_QUERY_LOOKAHEAD_BASES, cloudPrefetchBuffer, cloudIndexPrefetchBuffer,
referenceArguments.getReferencePath());
getGenomicsDBOptions());
initializeDrivingFeatures();
}

Expand Down
10 changes: 5 additions & 5 deletions src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java
Original file line number Diff line number Diff line change
Expand Up @@ -415,11 +415,11 @@ protected List<SimpleInterval> transformTraversalIntervals(final List<SimpleInte
}

/**
*
* @return By default, not every GATK tool can read from a GenomicsDB -- child classes can override
* Get the GenomicsDB read settings for the current tool
* @return By default, just return the vanilla options
*/
protected GenomicsDBOptions getGenomicsDBOptions() {
throw new IllegalArgumentException("This tool does not take a GenomicsDB as a feature input.");
return new GenomicsDBOptions(referenceArguments.getReferencePath());
}

/**
Expand Down Expand Up @@ -481,8 +481,8 @@ private boolean hasCramInput() {
* By default, this method initializes the FeatureManager to use the lookahead cache of {@link FeatureDataSource#DEFAULT_QUERY_LOOKAHEAD_BASES} bases.
*/
void initializeFeatures() {
features = new FeatureManager(this, FeatureDataSource.DEFAULT_QUERY_LOOKAHEAD_BASES, cloudPrefetchBuffer, cloudIndexPrefetchBuffer,
referenceArguments.getReferencePath());
features = new FeatureManager(this, FeatureDataSource.DEFAULT_QUERY_LOOKAHEAD_BASES, cloudPrefetchBuffer,
cloudIndexPrefetchBuffer, getGenomicsDBOptions());
if ( features.isEmpty() ) { // No available sources of Features discovered for this tool
features = null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ void initializeFeatures() {
// with ReadWalkers, typically), but with IntervalWalkers our query intervals are guaranteed
// to be non-overlapping, since our interval parsing code always merges overlapping intervals.
features = new FeatureManager(this, 0, cloudPrefetchBuffer, cloudIndexPrefetchBuffer,
referenceArguments.getReferencePath());
getGenomicsDBOptions());
if ( features.isEmpty() ) { // No available sources of Features for this tool
features = null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ void setReadTraversalBounds() {
void initializeFeatures() {
//We override this method to change lookahead of the cache
features = new FeatureManager(this, FEATURE_CACHE_LOOKAHEAD, cloudPrefetchBuffer, cloudIndexPrefetchBuffer,
referenceArguments.getReferencePath());
getGenomicsDBOptions());
if ( features.isEmpty() ) { // No available sources of Features discovered for this tool
features = null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ void initializeFeatures() {
// TODO: FeatureDataSource.DEFAULT_QUERY_LOOKAHEAD_BASES will likely hurt performance for tools like VQSR,
// TODO: but let's test it
features = new FeatureManager(this, DEFAULT_DRIVING_VARIANTS_LOOKAHEAD_BASES, cloudPrefetchBuffer, cloudIndexPrefetchBuffer,
referenceArguments.getReferencePath());
getGenomicsDBOptions());
initializeDrivingVariants();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ public void onTraversalStart() {
featureQueryLookahead,
cloudPrefetchBuffer,
cloudIndexPrefetchBuffer,
referenceArguments.getReferencePath());
getGenomicsDBOptions());

// always perform GC-content annotation
logger.info("Adding GC-content annotator...");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,19 +50,7 @@ public static String encodeAnyASList( final List<?> somethingList) {
* @return true if the annotation is expected to have values per-allele
*/
public static boolean isAlleleSpecific(final InfoFieldAnnotation annotation) {
if (annotation instanceof AS_RankSumTest) {
return true;
}
if (annotation instanceof AS_StrandBiasTest) {
return true;
}
if (annotation instanceof AS_RMSMappingQuality) {
return true;
}
if (annotation instanceof AS_StandardAnnotation) {
return true;
}
return false;
return annotation instanceof AlleleSpecificAnnotation;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
*/
//TODO: this can't extend InbreedingCoeff because that one is Standard and it would force this to be output all the time; should fix code duplication nonetheless
@DocumentedFeature(groupName=HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Allele-specific likelihood-based test for the consanguinity among samples (AS_InbreedingCoeff)")
public final class AS_InbreedingCoeff extends InfoFieldAnnotation implements AS_StandardAnnotation {
public final class AS_InbreedingCoeff extends InfoFieldAnnotation implements AS_StandardAnnotation, AlleleSpecificAnnotation {

public static final int MIN_SAMPLES = 10;
private Set<String> founderIds; //TODO: either use this or enter a bug report
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
* </ul>
*/
@DocumentedFeature(groupName=HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Allele-specific call confidence normalized by depth of sample reads supporting the allele (AS_QD)")
public class AS_QualByDepth extends InfoFieldAnnotation implements ReducibleAnnotation, AS_StandardAnnotation {
public class AS_QualByDepth extends InfoFieldAnnotation implements ReducibleAnnotation, AS_StandardAnnotation, AlleleSpecificAnnotation {

@Override
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.AS_QUAL_BY_DEPTH_KEY); }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
* </ul>
*/
@DocumentedFeature(groupName=HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Allele-specific root-mean-square of the mapping quality of reads across all samples (AS_MQ)")
public final class AS_RMSMappingQuality extends InfoFieldAnnotation implements AS_StandardAnnotation, ReducibleAnnotation {
public final class AS_RMSMappingQuality extends InfoFieldAnnotation implements AS_StandardAnnotation, ReducibleAnnotation, AlleleSpecificAnnotation {

private final String printFormat = "%.2f";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
/**
* Allele-specific implementation of rank sum test annotations
*/
public abstract class AS_RankSumTest extends RankSumTest implements ReducibleAnnotation {
public abstract class AS_RankSumTest extends RankSumTest implements ReducibleAnnotation, AlleleSpecificAnnotation {
private static final Logger logger = LogManager.getLogger(AS_RankSumTest.class);
public static final String RAW_DELIM = ",";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
/**
* Allele-specific implementation of strand bias annotations
*/
public abstract class AS_StrandBiasTest extends StrandBiasTest implements ReducibleAnnotation {
public abstract class AS_StrandBiasTest extends StrandBiasTest implements ReducibleAnnotation, AlleleSpecificAnnotation {
private final static Logger logger = LogManager.getLogger(AS_StrandBiasTest.class);
public static final String SPLIT_DELIM = "\\|"; //String.split takes a regex, so we need to escape the pipe
public static final String PRINT_DELIM = "|";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific;

import org.broadinstitute.hellbender.tools.walkers.annotator.Annotation;

/**
* This is a marker interface used to indicate which annotations are allele-specific.
*/
public interface AlleleSpecificAnnotation extends Annotation {
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ public abstract class AssemblyBasedCallerArgumentCollection {

public static final String MIN_BASE_QUALITY_SCORE_LONG_NAME = "min-base-quality-score";
public static final String SMITH_WATERMAN_LONG_NAME = "smith-waterman";
public static final String EMIT_REFERENCE_CONFIDENCE_LONG_NAME = "emit-ref-confidence";

public ReadThreadingAssembler createReadThreadingAssembler() {
final ReadThreadingAssembler assemblyEngine = assemblerArgs.makeReadThreadingAssembler();
Expand Down Expand Up @@ -107,11 +108,12 @@ public ReadThreadingAssembler createReadThreadingAssembler() {
public SmithWatermanAligner.Implementation smithWatermanImplementation = SmithWatermanAligner.Implementation.JAVA;

/**
* (BETA feature) The reference confidence mode makes it possible to emit a per-bp or summarized confidence estimate for a site being strictly homozygous-reference.
* This is similar to the HaplotypeCaller reference confidence/GVCF mode. See https://software.broadinstitute.org/gatk/documentation/article.php?id=4017 for information about GVCFs.
* The reference confidence mode makes it possible to emit a per-bp or summarized confidence estimate for a site being strictly homozygous-reference.
* See https://software.broadinstitute.org/gatk/documentation/article.php?id=4017 for information about GVCFs.
* For Mutect2, this is a BETA feature that functions similarly to the HaplotypeCaller reference confidence/GVCF mode.
*/
@Advanced
@Argument(fullName="emit-ref-confidence", shortName="ERC", doc="(BETA feature) Mode for emitting reference confidence scores", optional = true)
@Argument(fullName=EMIT_REFERENCE_CONFIDENCE_LONG_NAME, shortName="ERC", doc="Mode for emitting reference confidence scores (For Mutect2, this is a BETA feature)", optional = true)
public ReferenceConfidenceMode emitReferenceConfidence = ReferenceConfidenceMode.NONE;

protected abstract int getDefaultMaxMnpDistance();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,12 @@ private void validateAndInitializeArgs() {
if ( emitReferenceConfidence() && samplesList.numberOfSamples() != 1 ) {
throw new CommandLineException.BadArgumentValue("--emit-ref-confidence", "Can only be used in single sample mode currently. Use the --sample-name argument to run on a single sample out of a multi-sample BAM file.");
}

if (hcArgs.floorBlocks && !emitReferenceConfidence()) {
throw new UserException(HaplotypeCallerArgumentCollection.OUTPUT_BLOCK_LOWER_BOUNDS + " refers to GVCF blocks," +
" so reference confidence mode (" + AssemblyBasedCallerArgumentCollection.EMIT_REFERENCE_CONFIDENCE_LONG_NAME +
") must be specified.");
}
}

private void initializeSamples() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ public void traverse() {
@Override
void initializeFeatures(){
features = new FeatureManager(this, FeatureDataSource.DEFAULT_QUERY_LOOKAHEAD_BASES, cloudPrefetchBuffer, cloudIndexPrefetchBuffer,
referenceArguments.getReferencePath());
getGenomicsDBOptions());
}
}
Loading