Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

many fixes that were part of the pre-develop CD stuff #244

Merged
merged 3 commits into from
Apr 13, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ Version X
- nfilter will now simply symlink if no options are supplied essentially skipping
itself
- nfilter utilizes threads from config file
- config file now has THREADS default
- fix for bug where some miseq reads were not identified correctly in tagreads
- convert functions now support output directory
- bug fix for nfilter symlinking
- fix for qsub job output from runsample

Version 1.4.2
+++++++++++++
Expand Down
1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,4 @@ Indices and tables
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

11 changes: 7 additions & 4 deletions ngs_mapper/config.yaml.default
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
# Does not need the trailing /
NGSDATA: &NGSDATA /path/to/NGSDATA

# Default threads to use for any stage that supports it
THREADS: &THREADS 1

# All scripts by name should be top level items
# Sub items then are the option names(the dest portion of the add_arugment for the script)
# Each option needs to define the default as well as the help message
Expand All @@ -13,8 +16,8 @@ ngs_filter:
default: 0
help: 'The index for each associated MiSeq read must be equal or above this value.'
threads:
default: 1
help: 'How many threads to use for bwa[Default: %(default)s]'
default: *THREADS
help: 'How many threads to use[Default: %(default)s]'
platforms:
choices:
- MiSeq
Expand Down Expand Up @@ -81,7 +84,7 @@ run_bwa_on_samplename:
default: False
help: 'Flag to indicate that you want the temporary files kept instead of removing them[Default: %(default)s]'
threads:
default: 1
default: *THREADS
help: 'How many threads to use for bwa[Default: %(default)s]'
tagreads:
SM:
Expand Down Expand Up @@ -113,7 +116,7 @@ base_caller:
default: 10
help: 'What factor to bias high quality bases by. Must be an integer >= 1[Default: %(default)s]'
threads:
default: 1
default: *THREADS
help: 'How many threads to use when running base_caller.py[Default: %(default)s]'
miseq_sync:
ngsdata:
Expand Down
53 changes: 37 additions & 16 deletions ngs_mapper/file_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@
drop_ext = lambda s: '.'.join(s.split('.')[:-1])
swap_ext = lambda ext: lambda s: drop_ext(s) + '.' + ext
find_ext = lambda ext: lambda dir: glob("%s/*%s" % (dir, ext))
swap_dir = lambda dir: lambda p: os.path.join(dir, os.path.basename(p))

def convert_sff(dir):
def convert_sff(dir, outdir):
sff_paths = find_ext('sff')(dir)
outnames = map(swap_ext('fastq'), sff_paths)
outnames = map(swap_dir(outdir), outnames)
def wrapped_conv(a, b):
logger.info('Converting {0} to {1}'.format(a, b))
n = 0
Expand All @@ -31,37 +33,56 @@ def wrapped_conv(a, b):
return n
return sum(map(wrapped_conv, sff_paths, outnames))

def convert_ab1(dir):
def convert_ab1(dir, outdir):
for abi in find_ext('ab1')(dir):
dest = swap_ext('fastq')(abi)
dest = swap_dir(outdir)(dest)
logger.info('Converting {0} to {1}'.format(abi, dest))
SeqIO.convert(abi, 'abi', dest, 'fastq')

def convert_gzips(dir):
def convert_gzips(dir, outdir):
for gz in find_ext('gz')(dir):
dest = drop_ext(gz)
dest = swap_dir(outdir)(drop_ext(gz))
with gzip.open( gz, 'rb' ) as input:
with open(dest, 'w') as output:
logger.info('Unpacking {0} to {1}'.format(gz, dest))
output.write(input.read())

def convert_formats(dir):
convert_gzips(dir)
convert_ab1(dir)
convert_sff(dir)

def get_dir_arg():
def link_fastqs(dir, outdir):
for fq in find_ext('fastq')(dir):
dest = swap_dir(outdir)(fq)
src = os.path.abspath(fq)
dst = os.path.abspath(dest)
if os.path.exists(dst):
logger.warning(
'Skipping symlink of {0} because {1} already exists.' \
'This can happen if you have the file compressed and also not ' \
'compressed in the input directory'.format(
src, dst
))
else:
logger.debug('Symlinking {0} to {1}'.format(src, dst))
os.symlink(src, dst)

def convert_formats(dir, outdir):
convert_gzips(dir, outdir)
convert_ab1(dir, outdir)
convert_sff(dir, outdir)
link_fastqs(dir, outdir)

def get_dir_args():
if os.path.isdir(sys.argv[1]):
return sys.argv[1]
indir, outdir = sys.argv[1], sys.argv[2]
os.mkdir(outdir)
return indir, outdir
else:
raise ValueError("Path %s is not a directory" % sys.argv[1])
raise ValueError("Path %s or %s is not a directory" % (sys.argv[1], sys.argv[2]))

def main_convert_formats():
convert_formats(get_dir_arg())
convert_formats(*get_dir_args())


def main_sff_convert():
convert_sff(get_dir_arg())
def main_sff_convert():
convert_sff(*get_dir_args())

# sff_names = filter(lambda x: x.endswith('sff'), os.listdir(dir))
# sff_paths = map(partial(os.path.join, dir), sff_names)
Expand Down
2 changes: 1 addition & 1 deletion ngs_mapper/nfilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def write_filtered(readpath, idxQualMin, dropNs, outdir='.'):
results = make_filtered(readpath, idxQualMin, dropNs)
outpath = name_filtered(readpath, outdir)
if not idxQualMin and not dropNs:
os.symlink(readpath, outpath)
os.symlink(os.path.abspath(readpath), os.path.abspath(outpath))
logger.warn("Index Quality was %s and dropNs was set to %s, so file %s was copied to %s without filtering" % (idxQualMin, dropNs, readpath, outpath))
return outpath
try:
Expand Down
73 changes: 41 additions & 32 deletions ngs_mapper/runsample.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ def parse_args( args=sys.argv[1:] ):
parser.add_argument(
'--drop-ns',
dest='drop_ns',
action='store_true',
default=_config['ngs_filter']['dropNs']['default'],
help=_config['ngs_filter']['dropNs']['help'],
)
Expand All @@ -243,7 +244,32 @@ def parse_args( args=sys.argv[1:] ):

args, rest = parser.parse_known_args(args)
args.config = configfile
return args,rest

# Parse qsub args if found
if rest and rest[0].startswith('--qsub'):
qsub_parser = argparse.ArgumentParser(add_help=False)
qsub_parser.add_argument(
'--qsub-help',
default=False,
action='store_true'
)
qsub_parser.add_argument(
'--qsub_l',
default='nodes=1:ppn=1',
)
qsub_parser.add_argument(
'--qsub_M',
default=None
)

qsub_args = qsub_parser.parse_args(rest)

if qsub_args.qsub_help:
qsub_parser.print_help()
sys.exit(1)
rest = qsub_args

return args, rest

def make_project_repo( projpath ):
'''
Expand Down Expand Up @@ -274,11 +300,11 @@ def run_cmd( cmdstr, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, scri
raise MissingCommand( "{0} is not an executable?".format(cmd[0]) )

def main():
args,rest = parse_args()
args,qsubargs = parse_args()
# Qsub job?
if rest and rest[0].startswith('--qsub'):
args, qsubargs = split_args(' '.join(sys.argv[1:]))
print pbs_job(args, qsubargs)
if qsubargs:
runsampleargs, _ = split_args(' '.join(sys.argv[1:]))
print pbs_job(runsampleargs, qsubargs)
sys.exit(1)
# So we can set the global logger
global logger
Expand Down Expand Up @@ -354,20 +380,23 @@ def select_keys(d, keys):
return dict( ((k, v) for k, v in d.items() if k in keys))

#convert sffs to fastq
convert_dir = os.path.join(tdir,'converted')

print sh.convert_formats(cmd_args['readsdir'], _out=sys.stdout, _err=sys.stderr)
print sh.convert_formats(cmd_args['readsdir'], convert_dir, _out=sys.stdout, _err=sys.stderr)
#print sh.sff_to_fastq(cmd_args['readsdir'], _out=sys.stdout, _err=sys.stderr)
try:
if cmd_args['config']:
__result = sh.ngs_filter(cmd_args['readsdir'], config=cmd_args['config'], outdir=cmd_args['filtered_dir'])
__result = sh.ngs_filter(convert_dir, config=cmd_args['config'], outdir=cmd_args['filtered_dir'])
else:
filter_args = select_keys(cmd_args, ["drop_ns", "platforms", "index_min"])
__result = sh.ngs_filter(cmd_args['readsdir'], outdir=cmd_args['filtered_dir'], **filter_args)
__result = sh.ngs_filter(convert_dir, outdir=cmd_args['filtered_dir'], **filter_args)
logger.debug( 'ngs_filter: %s' % __result )
except sh.ErrorReturnCode, e:
logger.error(e.stderr)
sys.exit(1)

#sh.rm(convert_dir, r=True)

#Trim reads
cmd = 'trim_reads {filtered_dir} -q {trim_qual} -o {trim_outdir} --head-crop {head_crop}'
if cmd_args['config']:
Expand Down Expand Up @@ -479,45 +508,25 @@ def pbs_job(runsampleargs, pbsargs):

:param string runsampleargs: args that are for runsample that originaly came
from sys.argv(any non --qsub\_)
:param string pbsargs: args for qsub(any --qsub\_)
:param Namespace pbsargs: parsed --qsub_* args
:return: pbs job file string
'''
qsub_parser = argparse.ArgumentParser(add_help=False)
qsub_parser.add_argument(
'--qsub-help',
default=False,
action='store_true'
)
qsub_parser.add_argument(
'--qsub_l',
default='nodes=1:ppn=1',
)
qsub_parser.add_argument(
'--qsub_M',
default=None
)
qsub_args = qsub_parser.parse_args(pbsargs)

if qsub_args.qsub_help:
qsub_parser.print_help()
return ''

samplename = runsampleargs[2]
template = '#!/bin/bash\n' \
'#PBS -N {samplename}-ngs_mapper\n' \
'#PBS -j oe\n' \
'#PBS -l {qsub_l}\n'
if qsub_args.qsub_M is not None:
if pbsargs.qsub_M is not None:
template += '#PBS -m abe\n' \
'#PBS -M ' + qsub_args.qsub_M + '\n'
'#PBS -M ' + pbsargs.qsub_M + '\n'

template += '\n' \
'cd $PBS_O_WORKDIR\n' \
'runsample {runsampleargs}\n'

return template.format(
samplename=samplename,
qsub_l=qsub_args.qsub_l,
qsub_l=pbsargs.qsub_l,
runsampleargs=' '.join(runsampleargs)
)

Expand Down
2 changes: 1 addition & 1 deletion ngs_mapper/tagreads.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class HeaderExists(Exception): pass
ID_MAP = (
re.compile( '[0-9A-Z]{14}' ),
re.compile( '[A-Z0-9]{5}:\d{1,}:\d{1,}' ),
re.compile( 'M[0-9]{5}:\d+:\d{9}-[A-Z0-9]{5}:\d:\d{4}:\d{4,5}:\d{4,5}' ),
re.compile( 'M[0-9]{5}:\d+:[\w\d-]+:\d:\d{4}:\d{4,5}:\d{4,5}' ),
re.compile( '.*' )
)
# Read Group Template
Expand Down
Loading