Skip to content

Commit

Permalink
Merge pull request #1 from hasindu2008/gpu-varcall-update
Browse files Browse the repository at this point in the history
cleaned up GPU consensus calling
  • Loading branch information
iiSeymour authored Oct 3, 2019
2 parents 0cec8f9 + 896b806 commit 0fa62cd
Show file tree
Hide file tree
Showing 59 changed files with 33,253 additions and 1,929 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ include/
lib/
share/

hdf5-1.8.14.tar.gz
hdf5-1.*.tar.gz
3.2.5.tar.bz2
eigen/
local*
89 changes: 74 additions & 15 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,27 +1,84 @@
# travis.yml for github.com/jts/nanopolish

dist: trusty
services: docker
sudo: false
language: generic
cache: apt
git:
depth: 1

.nanopolish.ci.matrix-definitions:
- &cron-only
if: type = cron OR env(CI_CRON) = true
- &arch
before_install:
- |
sed -i "/^FROM / s/arm64/${ARCH}/" Dockerfile-arm
- |
docker run --rm --privileged \
multiarch/qemu-user-static:register --reset && \
docker build --rm -t nanopolish -f Dockerfile-arm .
script:
- |
docker run --rm -t \
-e HDF5="${HDF5:-install}" \
-e H5_CFLAGS="${H5_CFLAGS}" \
-e HDF5_VERSION="1.10.4" \
-e H5_INCLUDE="${H5_INCLUDE}" \
-e LDFLAGS="${LDFLAGS}" \
nanopolish
matrix:
include:
# Set env for both nanoplish and the dependency hdf5.
- env:
- CC=gcc-4.8
- CXX=g++-4.8
- AR=gcc-ar-4.8
- NM=gcc-nm-4.8
- RANLIB=gcc-ranlib-4.8
- env:
- CC=gcc-8
- CXX=g++-8
- AR=gcc-ar-8
- NM=gcc-nm-8
- RANLIB=gcc-ranlib-8
# Set env for both nanoplish and the dependency hdf5.
- env:
- CC=gcc-4.8
- CXX=g++-4.8
- AR=gcc-ar-4.8
- NM=gcc-nm-4.8
- RANLIB=gcc-ranlib-4.8
- env:
- CC=gcc-8
- CXX=g++-8
- AR=gcc-ar-8
- NM=gcc-nm-8
- RANLIB=gcc-ranlib-8
# aarch64 - ARM 64-bit
- name: aarch64
sudo: required
env:
- ARCH=arm64
<<: *arch
<<: *cron-only
- name: aarch64-system-hdf5
sudo: required
env:
- ARCH=arm64
- HDF5="system"
- H5_INCLUDE="-I/usr/include/hdf5/serial"
- LDFLAGS="-L/usr/lib/aarch64-linux-gnu/hdf5/serial"
<<: *arch
# armv7l - ARM 32-bit
- name: armv7l
sudo: required
env:
- ARCH=armhf
<<: *arch
<<: *cron-only
- name: armv7l-system-hdf5
sudo: required
env:
- ARCH=armhf
- HDF5="system"
- H5_INCLUDE="-I/usr/include/hdf5/serial"
- LDFLAGS="-L/usr/lib/arm-linux-gnueabihf/hdf5/serial"
<<: *arch
allow_failures:
# The jobs installing hdf5 from source in docker finishes with error
# because of the job exceeded the maximum time limit (50 minutes).
- name: aarch64
- name: armv7l

# Install and export newer gcc
before_install:
Expand All @@ -38,9 +95,11 @@ before_install:
sudo apt-get install -qq "${CXX}"
fi
script:
before_script:
# Suppress all compiler warnings for hdf5 Makefile
# to display the log without downloading the raw log on Travis log page.
# Travis finishs with error when exceeding the limit of 4 MB of log length.
- export H5_CFLAGS="-w"
- make nanopolish && make test

script:
- make && make test
10 changes: 10 additions & 0 deletions DEVELOPERS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Developer Notes

## Updating Bioconda on tagged releases
The following is a quick step-by-step checklist on updating the bioconda release for nanopolish, to be done after each tagged release, and is a condensed/updated version of [these slides](https://monashbioinformaticsplatform.github.io/bioconda-tutorial/#/) by Andrew Perry.
1. On Github, fork `https://github.com/bioconda/bioconda-recipes` to `https://github.com/{USER}/bioconda-recipes` and clone the latter repository to a local directory; `cd` into the cloned directory.
2. Check out a new branch via `git branch nanopolish-bioconda-bump && git checkout nanopolish-bioconda-bump`.
3. Update the `bioconda-recipes/recipes/nanopolish/meta.yaml` file by editing the version tag and the SHA hash; the SHA256 hash can be obtained by running `sha256sum nanopolish-v{$VERSION}.tar.gz` on the command line (where `{VERSION}` is the new, updated version tag); commit the changes to the `meta.yaml` file via, e.g., `git commit -a -m 'bump nanopolish to version {VERSION}'`.
4. Push the changes to your forked repo via `git push origin nanopolish-bioconda-bump`; then, make a pull request to merge the updates into the master branch of the upstream `bioconda-recipes` repository.
5. If all goes well, the automated TravisCI tests on the upstream repository will pass and an owner will merge the changes.
6. Otherwise, if further edits are requested or if the TravisCI tests fail, make further commits to the local cloned repository and push to the forked repository on Github; the changes should automatically appear in the pull request and will trigger an automated TravisCI check.
18 changes: 18 additions & 0 deletions Dockerfile-arm
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM multiarch/ubuntu-debootstrap:arm64-bionic
RUN uname -a
RUN apt-get update -qq && \
apt-get install -yq --no-install-suggests --no-install-recommends \
bzip2 \
ca-certificates \
gcc \
g++ \
make \
software-properties-common
RUN add-apt-repository -y universe && \
apt-get update -qq && \
apt-get install -yq libhdf5-dev
RUN find /usr/include -name "hdf5.h" || true
RUN find /usr/lib -name "libhdf5.a" || true
WORKDIR /nanopolish
COPY . .
CMD exec bash -c "make && make test"
73 changes: 35 additions & 38 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#

# Sub directories containing source code, except for the main programs
SUBDIRS := src src/hmm src/thirdparty src/thirdparty/scrappie src/common src/alignment src/pore_model src/cuda_kernels
SUBDIRS := src src/hmm src/thirdparty src/thirdparty/scrappie src/common src/alignment src/pore_model

#
# Set libraries, paths, flags and options
Expand All @@ -10,19 +10,20 @@ SUBDIRS := src src/hmm src/thirdparty src/thirdparty/scrappie src/common src/ali
#Basic flags every build needs
LIBS = -lz
CXXFLAGS ?= -g -O3
CXXFLAGS += -std=c++11 -fopenmp -fsigned-char
CFLAGS ?= -std=c99 -O3
CXXFLAGS += -std=c++11 -fopenmp -fsigned-char -D_FILE_OFFSET_BITS=64 #D_FILE_OFFSET_BITS=64 makes nanopolish work in 32 bit systems
CFLAGS ?= -O3 -std=c99 -fsigned-char -D_FILE_OFFSET_BITS=64
LDFLAGS ?=
CXX ?= g++
CC ?= gcc
NVCC = nvcc
NVCCFLAGS ?= -std=c++11 -I. -I/usr/local/cuda-9.0include -O3 -use_fast_math --default-stream per-thread -restrict
CURTFLAGS ?= -L/usr/local/cuda-9.0/lib64 -lcudart

# Change the value of HDF5, EIGEN, or HTS below to any value to disable compilation of bundled code
HDF5 ?= install
EIGEN ?= install
HTS ?= install

HDF5_VERSION ?= 1.8.14
EIGEN_VERSION ?= 3.2.5

# Check operating system, OSX doesn't have -lrt
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux)
Expand All @@ -37,7 +38,7 @@ ifeq ($(HDF5), install)
else
# Use system-wide hdf5
H5_LIB =
H5_INCLUDE =
H5_INCLUDE ?=
LIBS += -lhdf5
endif

Expand Down Expand Up @@ -69,94 +70,90 @@ EIGEN_INCLUDE = -I./eigen/
# Include the src subdirectories
NP_INCLUDE = $(addprefix -I./, $(SUBDIRS))

CUDA_INCLUDE=-I/usr/local/cuda-9.0/include

# Add include flags
CPPFLAGS += $(H5_INCLUDE) $(HTS_INCLUDE) $(FAST5_INCLUDE) $(NP_INCLUDE) $(EIGEN_INCLUDE) $(CUDA_INCLUDE)
CPPFLAGS += $(H5_INCLUDE) $(HTS_INCLUDE) $(FAST5_INCLUDE) $(NP_INCLUDE) $(EIGEN_INCLUDE)

# Main programs to build
PROGRAM = nanopolish
TEST_PROGRAM = nanopolish_test

.PHONY: all
all: $(PROGRAM) $(TEST_PROGRAM)
all: depend $(PROGRAM)

#
# Build libhts
#
htslib/libhts.a:
cd htslib && make || exit 255
cd htslib && make htslib_default_libs="-lz -lm -lbz2" || exit 255

#
# If this library is a dependency the user wants HDF5 to be downloaded and built.
#
lib/libhdf5.a:
if [ ! -e hdf5-1.8.14.tar.gz ]; then \
wget https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.8/hdf5-1.8.14/src/hdf5-1.8.14.tar.gz; \
if [ ! -e hdf5-$(HDF5_VERSION).tar.gz ]; then \
version_major_minor=`echo "$(HDF5_VERSION)" | sed -E 's/\.[0-9]+$$//'`; \
wget https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-$${version_major_minor}/hdf5-$(HDF5_VERSION)/src/hdf5-$(HDF5_VERSION).tar.gz; \
fi
tar -xzf hdf5-1.8.14.tar.gz || exit 255
cd hdf5-1.8.14 && \
./configure --enable-threadsafe --prefix=`pwd`/.. && \

tar -xzf hdf5-$(HDF5_VERSION).tar.gz || exit 255
cd hdf5-$(HDF5_VERSION) && \
./configure --enable-threadsafe --disable-hl --libdir=`pwd`/../lib --includedir=`pwd`/../include --prefix=`pwd`/.. && \
make && make install

# Download and install eigen if not already downloaded
eigen/INSTALL:
if [ ! -e 3.2.5.tar.bz2 ]; then \
wget http://bitbucket.org/eigen/eigen/get/3.2.5.tar.bz2; \
if [ ! -e $(EIGEN_VERSION).tar.bz2 ]; then \
wget http://bitbucket.org/eigen/eigen/get/$(EIGEN_VERSION).tar.bz2; \
fi
tar -xjf 3.2.5.tar.bz2 || exit 255
mv eigen-eigen-bdd17ee3b1b3 eigen || exit 255
tar -xjf $(EIGEN_VERSION).tar.bz2 || exit 255
mv eigen-eigen-* eigen || exit 255

#
# Source files
#

# Find the source files by searching subdirectories
CPP_SRC := $(foreach dir, $(SUBDIRS), $(wildcard $(dir)/*.cpp))
CU_SRC := $(foreach dir, $(SUBDIRS), $(wildcard $(dir)/*.cu))
C_SRC := $(foreach dir, $(SUBDIRS), $(wildcard $(dir)/*.c))
EXE_SRC = src/main/nanopolish.cpp src/test/nanopolish_test.cpp

# Automatically generated object names
CPP_OBJ=$(CPP_SRC:.cpp=.o)
C_OBJ=$(C_SRC:.c=.o)
CU_OBJ=$(CU_SRC:.cu=.o)
CPP_OBJ = $(CPP_SRC:.cpp=.o)
C_OBJ = $(C_SRC:.c=.o)

.SUFFIXES: .cu
ifdef cuda
include cuda.mk
endif

# Generate dependencies
.PHONY: depend
depend: .depend

.depend: $(CPP_SRC) $(C_SRC) $(CU_SRC) $(EXE_SRC) $(H5_LIB) $(EIGEN_CHECK)
.depend: $(CPP_SRC) $(C_SRC) $(EXE_SRC) $(H5_LIB) $(EIGEN_CHECK)
rm -f ./.depend
$(CXX) $(CXXFLAGS) $(CPPFLAGS) -MM $(CPP_SRC) $(C_SRC) > ./.depend;

include .depend

# Compile objects
.cpp.o:
$(CXX) -o $@ -c $(CXXFLAGS) $(CPPFLAGS) -fPIC $<

.c.o:
$(CC) -o $@ -c $(CFLAGS) $(CPPFLAGS) $(H5_INCLUDE) -fPIC $<

.cu.o:
$(NVCC) -o $@ -c $(NVCCFLAGS) $(CPPFLAGS) $<

# Link main executable
$(PROGRAM): src/main/nanopolish.o $(CU_OBJ) $(CPP_OBJ) $(C_OBJ) $(HTS_LIB) $(H5_LIB) $(EIGEN_CHECK)
$(CXX) -o $@ $(CXXFLAGS) $(CPPFLAGS) -fPIC $< $(CPP_OBJ) $(CU_OBJ) $(C_OBJ) $(HTS_LIB) $(H5_LIB) $(LIBS) $(LDFLAGS) $(CURTFLAGS)
$(PROGRAM): src/main/nanopolish.o $(CPP_OBJ) $(C_OBJ) $(HTS_LIB) $(H5_LIB) $(EIGEN_CHECK)
$(CXX) -o $@ $(CXXFLAGS) $(CPPFLAGS) -fPIC $< $(CPP_OBJ) $(C_OBJ) $(HTS_LIB) $(H5_LIB) $(LIBS) $(LDFLAGS)

# Link test executable
$(TEST_PROGRAM): src/test/nanopolish_test.o $(CPP_OBJ) $(CU_OBJ) $(C_OBJ) $(HTS_LIB) $(H5_LIB)
$(CXX) -o $@ $(CXXFLAGS) $(CPPFLAGS) -fPIC $< $(CPP_OBJ) $(CU_OBJ) $(C_OBJ) $(HTS_LIB) $(H5_LIB) $(LIBS) $(LDFLAGS) $(CURTFLAGS)
$(TEST_PROGRAM): src/test/nanopolish_test.o $(CPP_OBJ) $(C_OBJ) $(HTS_LIB) $(H5_LIB)
$(CXX) -o $@ $(CXXFLAGS) $(CPPFLAGS) -fPIC $< $(CPP_OBJ) $(C_OBJ) $(HTS_LIB) $(H5_LIB) $(LIBS) $(LDFLAGS)

.PHONY: test
test: $(TEST_PROGRAM)
./$(TEST_PROGRAM)

.PHONY: clean
clean:
rm -f $(PROGRAM) $(TEST_PROGRAM) $(CPP_OBJ) $(CU_OBJ) $(C_OBJ) src/main/nanopolish.o src/test/nanopolish_test.o
src/main/nanopolish.o src/test/nanopolish_test.o
rm -f $(PROGRAM) $(TEST_PROGRAM) $(CPP_OBJ) $(C_OBJ) \
src/cuda_kernels/GpuAligner.o \
src/main/nanopolish.o src/test/nanopolish_test.o
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ Software package for signal-level analysis of Oxford Nanopore sequencing data. N

## Release notes

* 0.11.1: `nanopolish polya` now supports SQK-RNA-002 kits with automatic backwards-compatibility with SQK-RNA-001

* 0.11.0: support for multi-fast5 files. `nanopolish methyltrain` now subsamples input data, improving speed and memory usage

* 0.10.2: added new program `nanopolish polya` to estimate the length of poly-A tails on direct RNA reads (by @paultsw)

* 0.10.1: `nanopolish variants --consensus` now only outputs a VCF file instead of a fasta sequence. The VCF file describes the changes that need to be made to turn the draft sequence into the polished assembly. A new program, `nanopolish vcf2fasta`, is provided to generate the polished genome (this replaces `nanopolish_merge.py`, see usage instructions below). This change is to avoid issues when merging segments that end on repeat boundaries (reported by Michael Wykes and Chris Wright).
Expand Down Expand Up @@ -86,7 +90,7 @@ samtools index reads.sorted.bam
Now, we use nanopolish to compute the consensus sequence (the genome is polished in 50kb blocks and there will be one output file per block). We'll run this in parallel:

```
python nanopolish_makerange.py draft.fa | parallel --results nanopolish.results -P 8 \
python3 nanopolish_makerange.py draft.fa | parallel --results nanopolish.results -P 8 \
nanopolish variants --consensus -o polished.{1}.vcf -w {1} -r reads.fa -b reads.sorted.bam -g draft.fa -t 4 --min-candidate-frequency 0.1
```

Expand Down Expand Up @@ -116,10 +120,11 @@ docker run -v /path/to/local/data/data/:/data/ -it :image_id ./nanopolish event

## GPU acceleration

The nanopolish consensus improvement algorithm can be performed faster using CUDA-enabled GPU acceleration. This is an experimental feature, to try this feature run with the `--gpu` flag e.g:
The nanopolish consensus improvement algorithm can be performed faster using CUDA-enabled GPU acceleration. This is an experimental feature, to try this feature run with the `--gpu=1` flag e.g:
```
nanopolish variants --consensus polished_gpu.fa -w "tig00000001:200000-230000" -r reads.fasta -b reads.sorted.bam -g draft.fa --threads=8 --gpu=1
```
Note that this feature requires nanopolish to be compiled with `make cuda=1`. You should have the [CUDA toolkit installed and configured](https://docs.nvidia.com/cuda/cuda-quick-start-guide/). If your CUDA installation is not in the default location, you can provide the path to make as `make cuda=1 NVCC=/path/to/nvidia_c_compiler CUDA_LIB=/path/to/cuda/lib CUDA_INCLUDE=/path/to/cuda/include`.

## Credits and Thanks

Expand Down
27 changes: 27 additions & 0 deletions cuda.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#Make file options for CUDA support

NVCC ?= nvcc
CUDA_ROOT = /usr/local/cuda
CUDA_LIB ?= $(CUDA_ROOT)/lib64
CUDA_INCLUDE ?= $(CUDA_ROOT)/include
CURTFLAGS = -L$(CUDA_LIB) -lcudart_static -lrt
NVCCFLAGS ?= -std=c++11 -I. -I$(CUDA_INCLUDE) -O3 -use_fast_math --default-stream per-thread -restrict

CPPFLAGS += -I$(CUDA_INCLUDE)
CPPFLAGS += -DHAVE_CUDA=1

# Sub directories containing CUDA source code
SUBDIRS += src/cuda_kernels
# Find the source files by searching subdirectories
CU_SRC := $(foreach dir, $(SUBDIRS), $(wildcard $(dir)/*.cu))
# Automatically generated object names
CU_OBJ = $(CU_SRC:.cu=.o)
CPP_OBJ += $(CU_OBJ)
LDFLAGS += $(CURTFLAGS)

.SUFFIXES: .cu

# Compile objects
.cu.o:
$(NVCC) -o $@ -c $(NVCCFLAGS) $(CPPFLAGS) $<

Loading

0 comments on commit 0fa62cd

Please sign in to comment.