Skip to content

Commit

Permalink
added option specify 'none' as the aligner. This allows for alignment…
Browse files Browse the repository at this point in the history
… to be run separately as requested in #306
  • Loading branch information
gtonkinhill committed Sep 24, 2024
1 parent f3d664a commit 4dc499e
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 10 deletions.
9 changes: 5 additions & 4 deletions panaroo/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def get_options(args):
help=
"Specify an aligner. Options:'prank', 'clustal', and default: 'mafft'",
type=str,
choices=['prank', 'clustal', 'mafft'],
choices=['prank', 'clustal', 'mafft', 'none'],
default="mafft")
core.add_argument(
"--codons",
Expand Down Expand Up @@ -550,9 +550,10 @@ def main():
if args.verbose: print("generating pan genome MSAs...")
generate_pan_genome_alignment(G, temp_dir, args.output_dir, args.n_cpu,
args.alr, args.codons, isolate_names)
core_nodes = get_core_gene_nodes(G, args.core, len(args.input_files))
core_names = [G.nodes[x]["name"] for x in core_nodes]
concatenate_core_genome_alignments(core_names, args.output_dir, args.hc_threshold)
if args.alr!='none':
core_nodes = get_core_gene_nodes(G, args.core, len(args.input_files))
core_names = [G.nodes[x]["name"] for x in core_nodes]
concatenate_core_genome_alignments(core_names, args.output_dir, args.hc_threshold)
elif args.aln == "core":
if args.verbose: print("generating core genome MSAs...")
generate_core_genome_alignment(G, temp_dir, args.output_dir,
Expand Down
2 changes: 2 additions & 0 deletions panaroo/generate_alignments.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ def check_aligner_install(aligner):
command = "prank -help"
elif aligner == "mafft":
command = "mafft --help"
elif aligner == "none":
return True
else:
sys.stderr.write("Incorrect aligner specification\n")
sys.exit()
Expand Down
19 changes: 19 additions & 0 deletions panaroo/generate_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,11 @@ def generate_pan_genome_alignment(G, temp_dir, output_dir, threads, aligner,
aligner,
threads)
else:
if aligner=='none':
temp_dir = output_dir + "unaligned_gene_sequences/"
if not os.path.exists(temp_dir):
os.mkdir(temp_dir)

#Multithread writing gene sequences to disk (temp directory) so aligners can find them
unaligned_sequence_files = Parallel(n_jobs=threads)(
delayed(output_sequence)(G.nodes[x], isolates, temp_dir, output_dir)
Expand All @@ -323,6 +328,10 @@ def generate_pan_genome_alignment(G, temp_dir, output_dir, threads, aligner,
#remove single sequence files
unaligned_sequence_files = filter(None, unaligned_sequence_files)

if aligner=='none':
print("No aligner specified. Returning unaligned gene fasta files.")
return

#Get Biopython command calls for each output gene sequences
commands = [
get_alignment_commands(fastafile, output_dir, aligner, threads)
Expand Down Expand Up @@ -531,10 +540,20 @@ def generate_core_genome_alignment(
output_dir, temp_dir,
aligner, threads)
else:
if aligner=='none':
temp_dir = output_dir + "unaligned_gene_sequences/"
if not os.path.exists(temp_dir):
os.mkdir(temp_dir)

#Output core node sequences
unaligned_sequence_files = Parallel(n_jobs=threads)(
delayed(output_sequence)(G.nodes[x], isolates, temp_dir, output_dir)
for x in tqdm(core_genes))

if aligner=='none':
print("No aligner specified. Returning unaligned gene fasta files.")
return

#remove single sequence files
unaligned_sequence_files = filter(None, unaligned_sequence_files)

Expand Down
13 changes: 7 additions & 6 deletions panaroo/post_run_alignment_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def get_options():
help=
"Specify an aligner. Options:'prank', 'clustal', and default: 'mafft'",
type=str,
choices={'prank', 'clustal', 'mafft'},
choices={'prank', 'clustal', 'mafft', 'none'},
default="mafft")
core.add_argument(
"--codons",
Expand Down Expand Up @@ -109,11 +109,12 @@ def main():
if args.verbose: print("generating pan genome MSAs...")
generate_pan_genome_alignment(G, temp_dir, args.output_dir, args.n_cpu,
args.alr, args.codons, isolate_names)

core_nodes = get_core_gene_nodes(G, args.core, len(isolate_names))
core_names = [G.nodes[x]["name"] for x in core_nodes]
concatenate_core_genome_alignments(core_names, args.output_dir,
args.hc_threshold)

if args.alr!='none':
core_nodes = get_core_gene_nodes(G, args.core, len(isolate_names))
core_names = [G.nodes[x]["name"] for x in core_nodes]
concatenate_core_genome_alignments(core_names, args.output_dir,
args.hc_threshold)
elif args.aln == "core":
if args.verbose: print("generating core genome MSAs...")
generate_core_genome_alignment(G, temp_dir, args.output_dir,
Expand Down

0 comments on commit 4dc499e

Please sign in to comment.