Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simple method for providing more params to blastn #57

Open
wants to merge 4 commits into
base: diagnostic_primers
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 29 additions & 7 deletions diagnostic_primers/blast.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,16 @@

import csv
import os
import re

from Bio.Blast.Applications import NcbiblastnCommandline

from diagnostic_primers import load_primers, write_primers


def build_commands(collection, blastexe, blastdb, outdir=None, existingfiles=[]):
def build_commands(collection, blastexe, blastdb, outdir=None,
existingfiles=[], extra_blast_params='',
max_target_seqs=1, perc_identity=90):
"""Builds and returns a list of BLASTN command lines for screening

The returned commands run BLASTN using primer sequences for each
Expand All @@ -73,7 +76,7 @@ def build_commands(collection, blastexe, blastdb, outdir=None, existingfiles=[])
sequences are placed in the specified output directory.
"""
clines = []

# Create output directory if required
if outdir:
os.makedirs(outdir, exist_ok=True)
Expand All @@ -89,13 +92,17 @@ def build_commands(collection, blastexe, blastdb, outdir=None, existingfiles=[])
fastafname = "_".join([stem, "primers.fasta"])
g.write_primers(fastafname)

cline = build_blastscreen_cmd(fastafname, blastexe, blastdb, outdir)
cline = build_blastscreen_cmd(fastafname, blastexe, blastdb, outdir,
max_target_seqs=max_target_seqs,
perc_identity=perc_identity)
if os.path.split(cline.out)[-1] not in existingfiles:
clines.append(cline)
cline = ' '.join([str(cline), extra_blast_params])
return clines


def build_blastscreen_cmd(queryfile, blastexe, blastdb, outdir=None):
def build_blastscreen_cmd(queryfile, blastexe, blastdb, outdir=None,
max_target_seqs=1, perc_identity=90):
"""Build and return a BLASTN command-line.

- queryfile Path to the primer sequences query file
Expand All @@ -118,6 +125,9 @@ def build_blastscreen_cmd(queryfile, blastexe, blastdb, outdir=None):
If the output directory is not specified, the output is written to the
same directory as the input, with the same filestem.
"""
outfmt = ' '.join(['6', 'qaccver','saccver','pident','length','mismatch',
'gapopen','qstart','qend','sstart','send','evalue','bitscore',
'qlen', 'sscinames'])
if outdir is None:
stem = os.path.splitext(queryfile)[0]
else:
Expand All @@ -130,13 +140,13 @@ def build_blastscreen_cmd(queryfile, blastexe, blastdb, outdir=None):
out=stem + ".blasttab",
task="blastn-short",
max_target_seqs=1,
outfmt=6,
outfmt=outfmt,
perc_identity=90,
ungapped=True,
)


def apply_screen(blastfile, primerjson, jsondir=None, maxaln=15):
def apply_screen(blastfile, primerjson, jsondir=None, maxaln=15, filter_list=None):
"""Apply the results from a BLASTN screen to a primer JSON file.

Loads the BLASTN .blasttab file, and the JSON file defining primers. Where
Expand All @@ -156,13 +166,25 @@ def apply_screen(blastfile, primerjson, jsondir=None, maxaln=15):
Primer pairs where one or more sequences has alignment length greater
than maxaln are removed from the set loaded in the JSON file
"""
# filtering blast
if filter_list is not None:
filter_list = [re.compile(x) for x in filter_list.split(',')]
else:
filter_list = []

# Parse BLASTN output and identify noncompliant primers
excluded = set()
with open(blastfile, "r") as bfh:
reader = csv.reader(bfh, delimiter="\t")
for row in reader:
if int(row[3]) > maxaln:
# filter by length
if float(row[3]) / float(row[-2]) > maxaln:
excluded.add(row[0][:-4])
# filter by sci-name
for regex in filter_list:
if regex.search(row[-1]):
excluded.add(row[0][:-4])
break

# Parse primer JSON and remove primer pairs found in excluded
primerdata = load_primers(primerjson, "json")
Expand Down
38 changes: 35 additions & 3 deletions diagnostic_primers/scripts/parsers/blastscreen_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,28 +61,31 @@ def build(subparsers, parents=None):
dest="bs_exe",
action="store",
default="blastn",
type=str,
help="path to BLASTN+ executable",
)
parser.add_argument(
"--db",
dest="bs_db",
action="store",
default="nr",
type=str,
help="path to BLASTN+ database",
)
parser.add_argument(
"--maxaln",
dest="maxaln",
action="store",
default=15,
type=int,
help="exclude primers with longer alignment",
default=0.75,
type=float,
help="BLASTn hits with aln length >X (frac.) of primer length are considered valid"
)
parser.add_argument(
"--outdir",
dest="bs_dir",
action="store",
default="blastn",
type=str,
help="path to directory for BLASTN+ output",
)
parser.add_argument(
Expand All @@ -100,4 +103,33 @@ def build(subparsers, parents=None):
default=False,
help="Overwrite old BLASTN+ output",
)
parser.add_argument(
"-x",
"--extra-blast-params",
dest="extra_blast_params",
default='',
type=str,
help="Extra parameters to pass to blastn",
)
parser.add_argument(
"--max-target-seqs",
dest="max_target_seqs",
default=1,
type=int,
help="BLASTn -max_target_seqs param"
)
parser.add_argument(
"--perc-identity",
dest="perc_identity",
default=90,
type=float,
help="BLASTn -perc_identity param",
)
parser.add_argument(
"--filter-list",
dest="filter_list",
default=None,
type=str,
help="comma-sep list of regex to filter hits by scientific name",
)
parser.set_defaults(func=subcommands.subcmd_blastscreen)
6 changes: 4 additions & 2 deletions diagnostic_primers/scripts/subcommands/subcmd_blastscreen.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ def subcmd_blastscreen(args, logger):
# Run BLASTN search with primer sequences
logger.info("Building BLASTN screen command-lines...")
clines = blast.build_commands(
coll, args.bs_exe, args.bs_db, args.bs_dir, existingfiles
coll, args.bs_exe, args.bs_db, args.bs_dir, existingfiles, args.extra_blast_params,
args.max_target_seqs, args.perc_identity
)
if clines:
pretty_clines = [str(c).replace(" -", " \\\n -") for c in clines]
Expand All @@ -109,7 +110,8 @@ def subcmd_blastscreen(args, logger):
"Amending primer file %s with results from %s", indata.primers, blastout
)
newprimers = blast.apply_screen(
blastout, indata.primers, jsondir=args.bs_jsondir, maxaln=args.maxaln
blastout, indata.primers, jsondir=args.bs_jsondir, maxaln=args.maxaln,
filter_list=args.filter_list
)
logger.info("Screened primers placed in %s", newprimers)
indata.primers = newprimers
Expand Down