Skip to content

Commit

Permalink
Single pipe from reads->cutadapt->bwa->samtools using interleaved format
Browse files Browse the repository at this point in the history
  • Loading branch information
necrolyte2 committed Jan 29, 2016
1 parent 9cc3027 commit d02c5d1
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 32 deletions.
13 changes: 9 additions & 4 deletions pyjip/bwa_mem.jip
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@
# bwa mem
#
# Usage:
# bwa_mem -r <reference> -f <fastq>... [-o <output>]
# bwa_mem -r <reference> -f <fastq>... [-p] [-o <output>]
#
# Options:
# -r, --reference <reference> Fasta reference path
# -o, --output <output> Output path [Default: stdout]
# -f, --fastq <fastq>... Fastq files to map to reference
# -o, --output <output> Output path [default: stdout]
# -f, --fastq <fastq>... Fastq files to map to reference [default: stdin]
# -p, --interleaved Flag to specify interleaved input [default: False]

bwa mem ${reference} ${fastq}
#%begin init
options['fastq'].streamable = True
#%end

bwa mem ${interleaved|arg|else('')} ${reference} ${fastq|else("/dev/stdin")} ${output|arg(">")}
47 changes: 23 additions & 24 deletions pyjip/cutadapt.jip
Original file line number Diff line number Diff line change
Expand Up @@ -2,40 +2,39 @@
# Runs cutadapt with only quality filter
#
# usage:
# cutadapt -q <qualcutoff>... -f <fastq>... [-o <outr1>] [-p <outr2>] [--pipe]
# cutadapt -q <qualcutoff>... [-i <input>] [-o <output>] [-p <outr2>] [--interleave]
#
# Options:
# -f, --fastq <fastq> The input fastq file[s]
# -i, --input <input> The input fastq [Default: stdin]
# -q, --qualcutoff <qualcutoff> The quality cutoff [Default: 25]
# -o, --outr1 <outr1> R1 output fastq [Default: output_r1.cutadapt]
# -p, --outr2 <outr2> R2 output fastq
# --pipe Flag to create named pipes for output [Default: False]
# -o, --output <output> Output fastq for single read or interleaved(paired)[Default: stdout]
# -p, --outr2 <outr2> Reverse output for paired fastq(non-interleaved)
# --interleave Interleave output [Default: False]

#%begin init
add_output('output', [options['outr1']], nargs='+')
#%end

#%begin validate
#if options['interleave'] and len(options['fastq']) > 1:
# validation_error("Cannot specify more than 1 fastq file with --interleave")

if options['interleave'] and len(options['outr2']) > 1:
validation_error("Cannot specify outr2 and --interleave")

#if len(options['fastq']) != len(options['qualcutoff']):
# validation_error("qualcutoff must match number of fastq given")
#%end

#%begin setup
'''
if len(options['fastq']) == 2:
options['qualcutoff'].join = ','
if len(options['qualcutoff']) != len(options['fastq']):
qualcutoff = options['qualcutoff'].value[0]
options['qualcutoff'].set([qualcutoff,qualcutoff])
if not options['outr2']:
options['outr2'].set('output_r2.cutadapt')
options['output'].append(options['outr2'])
#%end

#%begin validate
assert len(options['fastq']) == len(options['qualcutoff']), validation_error('qualcuttoff must have same number of entries as fastq')
if not options['interleave']:
if not options['outr2']:
options['outr2'].set('output_r2.cutadapt')
options['output'].append(options['outr2'])
'''
#%end

if [ "${pipe|arg}" == "--pipe" ]
then
mkfifo ${outr1} ${outr2|else('')}
fi
cutadapt ${outr1|arg} ${outr2|arg|else('')} -q ${qualcutoff} ${fastq}
if [ "${pipe|arg}" == "--pipe" ]
then
rm ${outr1} ${outr2|else('')}
fi
cutadapt ${interleave|arg|else('')} ${output|arg(">")} ${outr2|arg|else('')} -q ${qualcutoff} -f fastq ${input|else("/dev/stdin")}
39 changes: 39 additions & 0 deletions pyjip/paired_to_interleave.jip
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/env jip
#
# Converts 2 fastq paired files into single interleave
#
# Usage:
# paired_to_interleave -f <forward> -r <reverse> [--outformat <outformat>] [-o <output>]
#
# Options:
# -f, --forward <forward> The forward fastq input
# -r, --reverse <reverse> The reverse fastq input
# -o, --output <output> The interleaved output [default: stdout]
# --outformat <outformat> The output format [default: fastq]

#%begin validate
if options['outformat'] not in ('fasta', 'fastq'):
validation_error(
"output format can only be fasta or fastq. You provided '%s'"
% options['outformat']
)
#%end

#%begin command python
import itertools

from Bio import SeqIO

def interleave(iter1, iter2):
for forward, reverse in itertools.izip(iter1, iter2):
assert forward.id == reverse.id, "%s did not match %s" % \
(forward.id, reverse.id)
yield forward
yield reverse

f1, f2 = open("${forward}"), open("${reverse}")
records = interleave(SeqIO.parse(f1, 'fastq'), SeqIO.parse(f2, 'fastq'))
outfile = open("${output|else('/dev/stdout')}", 'w')
count = SeqIO.write(records, outfile, "${outformat}")
outfile.close()
#%end
12 changes: 8 additions & 4 deletions pyjip/simplepipe.jip
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Simple pipeline to run cutadapt, bwa index, bwa mem, samtools view
#
# Usage:
# simplepipe -r <reference> -f <fastq>... [-q <qualcutoff>...] [-o <output>] [--pipe]
# simplepipe -r <reference> -f <fastq>... [-q <qualcutoff>...] [-o <output>] [--interleave]
#
# Options:
# -r, --reference <reference> Reference file to index and map to
Expand All @@ -12,13 +12,17 @@
# [Default: 25]
# -o, --output <output> The output bam file
# [Default: mapped.bam]
# -p, --pipe To use named pipe(have to submit jip job for this to work)
# -i, --interleave To use interleave format as much as possible for stream
# [Default: False]

#%begin pipeline
trimmed = run('cutadapt', fastq=options['fastq'], qualcutoff=options['qualcutoff'], pipe=options['pipe'])
if options['interleave']:
fastq = run('paired_to_interleave', forward=options['fastq'].value[0], reverse=options['fastq'].value[1])
else:
fastq = options['fastq']
trimmed = run('cutadapt', input=fastq, qualcutoff=options['qualcutoff'], interleave=options['interleave'])
index_ref = run('bwa_index', reference=options['reference'])
sam = run('bwa_mem', reference=reference, fastq=['output_r1.cutadapt', 'output_r2.cutadapt'])
sam = run('bwa_mem', reference=reference, fastq=trimmed)
bam = run('sam_to_bam', input=sam, output=options['output'])

index_ref >> (sam | bam)
Expand Down

0 comments on commit d02c5d1

Please sign in to comment.