From 29daf30b042d567df7eaed9ac5322f9245cd8f90 Mon Sep 17 00:00:00 2001 From: Tyghe Vallard Date: Wed, 24 Feb 2016 15:44:10 -0500 Subject: [PATCH] fixed bwa_index input/output. bwa_mem can accept ref.fasta.bwt or ref.fasta as input. interleave_index_filter now accepts stdin correctly. paired_to_interleave removed unused forward/revese options. created simple map_pipe to simulate mapping pipeline using many of our tools to show usage as well as do integration testing --- .travis.yml | 4 ++ jip_modules/bwa_index.jip | 15 +++-- jip_modules/bwa_mem.jip | 12 ++-- jip_modules/interleaved_index_filter.jip | 18 ++++- jip_modules/paired_to_interleave.jip | 2 - jip_modules/simplepipe.jip | 32 --------- jip_modules/trim_reads.jip | 2 +- tests/jip_modules/map_pipe.jip | 85 ++++++++++++++++++++++++ tests/map_pipe.sh | 6 ++ tests/testinput/f1.index.fastq | 16 +++++ tests/testinput/f2.index.fastq | 16 +++++ 11 files changed, 163 insertions(+), 45 deletions(-) delete mode 100755 jip_modules/simplepipe.jip create mode 100755 tests/jip_modules/map_pipe.jip create mode 100644 tests/map_pipe.sh create mode 100644 tests/testinput/f1.index.fastq create mode 100644 tests/testinput/f2.index.fastq diff --git a/.travis.yml b/.travis.yml index d759796..3cc04b7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,9 @@ sudo: false language: python python: - "2.7" +cache: + directories: + - .hypothesis install: # We do this conditionally because it saves us some downloading if the # version is the same. @@ -29,5 +32,6 @@ install: script: - nosetests --with-coverage --cover-erase --cover-package=bioframework - pybot tests/*.robot + - tests/map_pipe.sh after_success: - coveralls diff --git a/jip_modules/bwa_index.jip b/jip_modules/bwa_index.jip index 83d6f88..2b88274 100755 --- a/jip_modules/bwa_index.jip +++ b/jip_modules/bwa_index.jip @@ -2,19 +2,24 @@ # bwa index # # Usage: -# bwa_index -r [-o ] +# bwa_index [-o ] # # Options: -# -r, --reference The fasta file to index # -o, --output Where to create the index # [Default: Same path as reference argument] +# Help: +# The reference filepath to index #%begin init -options['output'].default = "${reference}.bwt" +options['output'].default = "${input|abs|name}.bwt" #%end #%begin validate #%end -cp ${reference|abs} ${output|ext} -bwa index ${output|ext} +cmd="cp ${input|abs} ${output|ext}" +echo $cmd >&2 +$cmd +cmd="bwa index ${output|ext}" +echo $cmd >&2 +$cmd diff --git a/jip_modules/bwa_mem.jip b/jip_modules/bwa_mem.jip index e8a97ad..4df5af6 100755 --- a/jip_modules/bwa_mem.jip +++ b/jip_modules/bwa_mem.jip @@ -2,16 +2,20 @@ # bwa mem # # Usage: -# bwa_mem -r -f ... [-p] [-o ] +# bwa_mem [...] -r [-p] [-o ] # # Options: # -r, --reference Fasta reference path # -o, --output Output path [default: stdout] -# -f, --fastq ... Fastq files to map to reference [default: stdin] # -p, --interleaved Flag to specify interleaved input [default: False] +# +# Help: +# Input fastq format #%begin init -options['fastq'].streamable = True +options['input'].streamable = True #%end -bwa mem ${interleaved|arg|else('')} ${reference} ${fastq|else("/dev/stdin")} ${output|arg(">")} +cmd="bwa mem ${interleaved|arg|else('')} ${reference|re('.bwt','')} ${input|else("/dev/stdin")} ${output|arg(">")}" +echo $cmd >&2 +$cmd diff --git a/jip_modules/interleaved_index_filter.jip b/jip_modules/interleaved_index_filter.jip index e05b474..aca5a77 100755 --- a/jip_modules/interleaved_index_filter.jip +++ b/jip_modules/interleaved_index_filter.jip @@ -6,11 +6,27 @@ # Options: # -o, --output= Output files [default: stdout] # -q, --minimum= Minimum quality of the index file +# Any index qualities less than this are filtered +# Qualities that are greater than or equal are kept # -1, --index1= Index file from MiSeq # -2, --index2= Index file from MiSeq # # Help: -# Interleaved fastq file +# Interleaved fastq file or read from stdin +# +# Filters out reads based on their matching index inside of supplied index1 and index2 +# fastq files. +# The identifiers in the index1 fastq file must match with reads inside of the forward +# reads in the input interleaved fastq. +# Similarily, the index2 fastq identifiers must match to reads inside the reverse +# reads in the input interleaved fastq. +# +# If either index is too low, both the forward and reverse will be filtered out +# to ensure that the interleaved output is kept intact. + +#%begin init +options['input'].streamable = True +#%end #%begin command python from bioframework.index_filter import filter_on_index_quality_interleaved diff --git a/jip_modules/paired_to_interleave.jip b/jip_modules/paired_to_interleave.jip index 90e90bb..f6db70d 100755 --- a/jip_modules/paired_to_interleave.jip +++ b/jip_modules/paired_to_interleave.jip @@ -6,8 +6,6 @@ # paired_to_interleave [...] [--out-format ] [-o ] # # Options: -# -f, --forward= The forward fastq input -# -r, --reverse= The reverse fastq input # -o, --output= The interleaved output [default: stdout] # --out-format= The output format [default: fastq] # diff --git a/jip_modules/simplepipe.jip b/jip_modules/simplepipe.jip deleted file mode 100755 index a50a0af..0000000 --- a/jip_modules/simplepipe.jip +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env jip -# -# Simple pipeline to run cutadapt, bwa index, bwa mem, samtools view -# -# Usage: -# simplepipe -r -f ... [-q ...] [-o ] [--interleave] -# -# Options: -# -r, --reference Reference file to index and map to -# -f, --fastq ... List of fastq files to process -# -q, --qualcutoff ... The quality cutoff for the read trimming -# [Default: 25] -# -o, --output The output bam file -# [Default: mapped.bam] -# -i, --interleave To use interleave format as much as possible for stream -# [Default: False] - -#%begin pipeline -if options['interleave']: - fastq = run('paired_to_interleave', forward=options['fastq'].value[0], reverse=options['fastq'].value[1]) -else: - fastq = options['fastq'] -trimmed = run('cutadapt', input=fastq, qualcutoff=options['qualcutoff'], interleave=options['interleave']) -index_ref = run('bwa_index', reference=options['reference']) -sam = run('bwa_mem', reference=reference, fastq=trimmed) -ubam = run('sam_to_bam', input=sam) -bam = run('sort_bam', input=ubam, output=options['output']) -run('plot_bam', input=bam, output=options['output']+'.svg', format='svg') - -index_ref >> sam - -#%end diff --git a/jip_modules/trim_reads.jip b/jip_modules/trim_reads.jip index e14c8ba..e10de90 100755 --- a/jip_modules/trim_reads.jip +++ b/jip_modules/trim_reads.jip @@ -8,7 +8,6 @@ # Options: # -p, --paired Indicates that the input is paired-interleave format otherwise # the input is assumed to be unpaired fastq -# [Default: stdin] # -o, --output= Output fastq. If input was interleave, output will be as well # [default: stdout] # -t, --trim-n Trim N's from ends of reads @@ -24,6 +23,7 @@ # Ex. -x -10 would remove 10 bases from the end # Ex. -x 10 -10 would remove 10 bases from front and end #%begin init +options['input'].streamable = True #%end #%begin validate diff --git a/tests/jip_modules/map_pipe.jip b/tests/jip_modules/map_pipe.jip new file mode 100755 index 0000000..c93657c --- /dev/null +++ b/tests/jip_modules/map_pipe.jip @@ -0,0 +1,85 @@ +#!/usr/bin/env jip +# +# Simple pipeline to run cutadapt, bwa index, bwa mem, samtools view +# +# Usage: +# simplepipe ... -r [--trimn] [--trimlq ...] [--removebases ] [--indexqual --indexes ...] [-o ] [--interleave] +# +# Options: +# -r, --reference= Reference file to index and map to +# --trimn Flag to indicate trimming of all N's from reads +# --trimlq=... The quality cutoff for the read trimming +# [Default: 25,25] +# --removebases= Number of bases to remove from ends of reads +# See trim_reads --help for more details of -x +# option +# --indexqual= The quality of the supplied read index files +# --indexes= Index fastq files for fastq files +# [Default: None] +# -o, --output= The output bam file +# [Default: mapped.bam] +# Help: +# Input can be multiple fastq/sff files + +#%begin validate +if options['indexqual'] and len(options['indexes'].value) != 2: + validation_error("You must supply 2 index files when using --indexqual") +#%end + +#%begin pipeline +# Convert format if needed +_inputs = [] +for fq in options['input'].value: + if fq.endswith('.sff'): + outpath = basename(fq) + '.fastq' + _x = run('convert_format', input=fq, output=outpath) + _inputs.append(outpath) + else: + _inputs.append(fq) + +if len(_inputs) > 1: + output = run('paired_to_interleave', input=_inputs) +else: + output = _inputs[0] + +# Run index qual filter +if options['indexqual']: + output = run( + 'interleaved_index_filter', + input=output, + index1=options['indexes'].value[0], + index2=options['indexes'].value[1], + minimum=options['indexqual'] + ) + +# Run general trimming +if options['trimn'] or options['trimlq'] or options['removebases']: + output = run( + 'trim_reads', + input=output, + trim_n=options['trimn'], + qualcutoff=options['trimlq'], + removebases=options['removebases'] + ) + +# Map fastq to reference +refindex = run('bwa_index', input=options['reference']) +output = run( + 'bwa_mem', + input=output, + interleaved=True, + reference=refindex +) + +bash('cat', input=output) + +#trimmed = run('cutadapt', input=fastq, qualcutoff=options['qualcutoff'], interleave=options['interleave']) +#index_ref = run('bwa_index', reference=options['reference']) +#sam = run('bwa_mem', reference=reference, fastq=trimmed) +#ubam = run('sam_to_bam', input=sam) +#bam = run('sort_bam', input=ubam, output=options['output']) +#run('plot_bam', input=bam, output=options['output']+'.svg', format='svg') + +#index_ref >> sam + +#%end diff --git a/tests/map_pipe.sh b/tests/map_pipe.sh new file mode 100644 index 0000000..5133832 --- /dev/null +++ b/tests/map_pipe.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +export JIP_PATH=$PWD/jip_modules + +# Super simple test to make sure pieces all work nicely together +tests/jip_modules/map_pipe.jip tests/testinput/f1.fastq tests/testinput/f1.fastq -r tests/testinput/ref.fasta --indexqual 21 --indexes tests/testinput/f1.index.fastq tests/testinput/f2.index.fastq --removebases 1 diff --git a/tests/testinput/f1.index.fastq b/tests/testinput/f1.index.fastq new file mode 100644 index 0000000..7e7b157 --- /dev/null +++ b/tests/testinput/f1.index.fastq @@ -0,0 +1,16 @@ +@read1 +ATGC ++ +IIII +@read2 +ATGC ++ +IIII +@read3 +ATGC ++ +II5I +@read4 +ATGC ++ +II5I diff --git a/tests/testinput/f2.index.fastq b/tests/testinput/f2.index.fastq new file mode 100644 index 0000000..5477953 --- /dev/null +++ b/tests/testinput/f2.index.fastq @@ -0,0 +1,16 @@ +@read1 +ATGC ++ +IIII +@read2 +ATGC ++ +II5I +@read3 +ATGC ++ +IIII +@read4 +ATGC ++ +II5I