Skip to content

Commit

Permalink
Check for and require AVX for CNNScoreVariants (#5291)
Browse files Browse the repository at this point in the history
  • Loading branch information
EdwardDixon authored and cmnbroad committed Nov 19, 2018
1 parent 197c4cb commit b7c0560
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 27 deletions.
13 changes: 5 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,10 @@ releases of the toolkit.
docker client, which can be found on the [docker website](https://www.docker.com/get-docker).
* Python Dependencies:<a name="python"></a>
* GATK4 uses the [Conda](https://conda.io/docs/index.html) package manager to establish and manage the
Python environment and dependencies required by GATK tools that have a Python dependency. There are two different
conda environments that can be used:
* The ```gatk``` environment, which has no special hardware requirements. The GATK Docker image comes with the
"gatk" environment pre-configured.
* The ```gatk-intel``` environment, which requires and uses Intel (AVX2 or AVX-512) hardware acceleration to
increase performance.
* To establish the conda environment when not using the Docker image, a conda environment must first be "created", and
Python environment and dependencies required by GATK tools that have a Python dependency. The ```gatk``` environment,
requires hardware with AVX support for tools that depend on TensorFlow (e.g. CNNScoreVariant). The GATK Docker image
comes with the ```gatk``` environment pre-configured.
* To establish the environment when not using the Docker image, a conda environment must first be "created", and
then "activated":
* First, make sure [Miniconda or Conda](https://conda.io/docs/index.html) is installed (Miniconda is sufficient).
* To "create" the conda environment:
Expand Down Expand Up @@ -273,7 +270,7 @@ You can download and run pre-built versions of GATK4 from the following places:
* Examples:
```
```
./gatk PrintReadsSpark \
-I gs://my-gcs-bucket/path/to/input.bam \
-O gs://my-gcs-bucket/path/to/output.bam \
Expand Down
18 changes: 2 additions & 16 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -522,24 +522,10 @@ task condaStandardEnvironmentDefinition(type: Copy) {
}
}

task condaIntelEnvironmentDefinition(type: Copy) {
from "scripts"
into buildDir
include gatkCondaTemplate
rename { file -> gatkCondaIntelYML }
expand(["condaEnvName":"gatk-intel",
"condaEnvDescription" : "Conda environment for GATK Python Tools running with Intel hardware acceleration",
"tensorFlowDependency" :
"https://anaconda.org/intel/tensorflow/$tensorflowVersion/download/tensorflow-$tensorflowVersion-cp36-cp36m-linux_x86_64.whl"])
doLast {
logger.lifecycle("Created Intel Conda environment yml file: $gatkCondaIntelYML")
}
}

// Create two GATK conda environment yml files from the conda enc template
// (one for standard GATK and one for running GATK with Intel hardware).
// Create GATK conda environment yml file from the conda enc template
task condaEnvironmentDefinition() {
dependsOn 'pythonPackageArchive', 'condaStandardEnvironmentDefinition', 'condaIntelEnvironmentDefinition'
dependsOn 'pythonPackageArchive', 'condaStandardEnvironmentDefinition'
}

// Create the Python package archive file
Expand Down
2 changes: 1 addition & 1 deletion scripts/gatkcondaenv.yml.template
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,4 @@ dependencies:
- theano==0.9.0
- tqdm==4.19.4
- werkzeug==0.12.2
- gatkPythonPackageArchive.zip
- gatkPythonPackageArchive.zip
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,11 @@
import org.broadinstitute.hellbender.utils.runtime.AsynchronousStreamWriter;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;
import org.broadinstitute.hellbender.exceptions.UserException;
import picard.cmdline.programgroups.VariantFilteringProgramGroup;

import com.intel.gkl.IntelGKLUtils;

import java.io.*;
import java.util.*;

Expand Down Expand Up @@ -108,6 +111,10 @@ public class CNNScoreVariants extends TwoPassVariantWalker {
"1D models will look at the reference sequence and variant annotations." +
"2D models look at aligned reads, reference sequence, and variant annotations." +
"2D models require a BAM file as input as well as the tensor-type argument to be set.";
static final String DISABLE_AVX_CHECK_NAME = "disable-avx-check";
static final String AVXREQUIRED_ERROR = "This tool requires AVX instruction set support by default due to its dependency on recent versions of the TensorFlow library.\n" +
" If you have an older (pre-1.6) version of TensorFlow installed that does not require AVX you may attempt to re-run the tool with the %s argument to bypass this check.\n" +
" Note that such configurations are not officially supported.";

private static final int CONTIG_INDEX = 0;
private static final int POS_INDEX = 1;
Expand Down Expand Up @@ -160,6 +167,11 @@ public class CNNScoreVariants extends TwoPassVariantWalker {
@Argument(fullName = "output-tensor-dir", shortName = "output-tensor-dir", doc = "Optional directory where tensors can be saved for debugging or visualization.", optional = true)
private String outputTensorsDir = "";

@Advanced
@Argument(fullName = DISABLE_AVX_CHECK_NAME, shortName = DISABLE_AVX_CHECK_NAME, doc = "If set, no check will be made for AVX support. " +
"Use only if you have installed a pre-1.6 TensorFlow build. ", optional = true)
private boolean disableAVXCheck = false;

@Hidden
@Argument(fullName = "enable-journal", shortName = "enable-journal", doc = "Enable streaming process journal.", optional = true)
private boolean enableJournal = false;
Expand Down Expand Up @@ -232,8 +244,14 @@ public List<ReadFilter> getDefaultReadFilters() {

@Override
public void onTraversalStart() {
if (getHeaderForVariants().getGenotypeSamples().size() > 1) {
logger.warn("CNNScoreVariants is a single sample tool, but the input VCF has more than 1 sample.");
// Users can disable the AVX check to allow an older version of TF that doesn't require AVX to be used.
if(this.disableAVXCheck == false) {
IntelGKLUtils utils = new IntelGKLUtils();
utils.load(null);
if (utils.isAvxSupported() == false) {
// Give user the bad news, suggest remedies.
throw new UserException.HardwareFeatureException(String.format(CNNScoreVariants.AVXREQUIRED_ERROR, DISABLE_AVX_CHECK_NAME));
}
}

// Start the Python process and initialize a stream writer for streaming data to the Python code
Expand Down

0 comments on commit b7c0560

Please sign in to comment.