Skip to content

Commit

Permalink
das_tool: Use script not shell.
Browse files Browse the repository at this point in the history
  • Loading branch information
wwood committed Dec 13, 2023
1 parent 16e1c3e commit 1c352ad
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 21 deletions.
23 changes: 3 additions & 20 deletions aviary/modules/binning/binning.smk
Original file line number Diff line number Diff line change
Expand Up @@ -629,32 +629,15 @@ rule das_tool:
mem_mb = lambda wildcards, attempt: min(int(config["max_memory"])*1024, 512*1024*attempt),
runtime = lambda wildcards, attempt: 12*60*attempt,
output:
das_tool_done = "data/das_tool_bins_pre_refine/done"
touch("data/das_tool_bins_pre_refine/done")
conda:
"envs/das_tool.yaml"
log:
"logs/das_tool.log"
benchmark:
"benchmarks/das_tool.benchmark.txt"
shell:
"""
Fasta_to_Scaffolds2Bin.sh -i data/metabat_bins_sspec -e fa > data/metabat_bins_sspec.tsv 2> {log};
Fasta_to_Scaffolds2Bin.sh -i data/metabat_bins_ssens -e fa > data/metabat_bins_ssens.tsv 2>> {log};
Fasta_to_Scaffolds2Bin.sh -i data/metabat_bins_sens -e fa > data/metabat_bins_sens.tsv 2>> {log};
Fasta_to_Scaffolds2Bin.sh -i data/metabat_bins_spec -e fa > data/metabat_bins_spec.tsv 2>> {log};
Fasta_to_Scaffolds2Bin.sh -i data/concoct_bins -e fa > data/concoct_bins.tsv 2>> {log};
Fasta_to_Scaffolds2Bin.sh -i data/maxbin2_bins -e fasta > data/maxbin_bins.tsv 2>> {log};
Fasta_to_Scaffolds2Bin.sh -i data/vamb_bins/bins -e fna > data/vamb_bins.tsv 2>> {log};
Fasta_to_Scaffolds2Bin.sh -i data/rosella_refined/final_bins/ -e fna > data/rosella_refined_bins.tsv 2>> {log};
Fasta_to_Scaffolds2Bin.sh -i data/metabat2_refined/final_bins/ -e fna > data/metabat2_refined_bins.tsv 2>> {log};
Fasta_to_Scaffolds2Bin.sh -i data/semibin_refined/final_bins/ -e fna > data/semibin_refined_bins.tsv 2>> {log};
scaffold2bin_files=$(find data/*bins*.tsv -not -empty -exec ls {{}} \; | tr "\n" ',' | sed "s/,$//g");
DAS_Tool --search_engine diamond --write_bin_evals 1 --write_bins 1 -t {threads} --score_threshold -42 \
-i $scaffold2bin_files \
-c {input.fasta} \
-o data/das_tool_bins_pre_refine/das_tool >> {log} 2>&1 && \
touch data/das_tool_bins_pre_refine/done
"""
script:
"scripts/das_tool.py"

rule refine_dastool:
input:
Expand Down
1 change: 1 addition & 0 deletions aviary/modules/binning/envs/das_tool.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ channels:
- bioconda
dependencies:
- das_tool = 1.1.2
- extern = 0.4.1
61 changes: 61 additions & 0 deletions aviary/modules/binning/scripts/das_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import logging
import os
import sys

import extern

if __name__ == '__main__':
unrefined_binners_to_use = [
('concoct', 'fa'),
('maxbin2', 'fasta'),
('vamb', 'fna')]
refined_binners_to_use = [
('rosella', 'fna'),
('semibin', 'fna')]

# N.B. specifying "metabat" will skip both MetaBAT1 and MetaBAT2.
metabats = ['metabat_sspec', 'metabat_ssens', 'metabat_sens', 'metabat_spec']

binners = []
for (binner, extension) in unrefined_binners_to_use:
if binner not in snakemake.config['skip_binners']:
binners.append((f'{binner}_bins/', extension, f'data/{binner}_bins.tsv'))

for (binner, extension) in refined_binners_to_use:
if binner not in snakemake.config['skip_binners']:
binners.append((binner+'_refined/final_bins/', extension, f'data/{binner}_refined_bins.tsv'))

for metabat in metabats:
if metabat not in snakemake.config['skip_binners']:
binners.append((metabat.replace('metabat','metabat_bins'), 'fa', f'data/{metabat}_bins.tsv'))
if 'metabat2' not in snakemake.config['skip_binners']:
binners.append(('metabat2_refined/final_bins/', 'fna', f'data/metabat2_refined_bins.tsv'))

logfile = snakemake.log[0]
logging.basicConfig(filename=logfile, level=logging.INFO)
logging.info("Using the following binners: " + str(binners))

if len(binners) == 0:
logging.error("All binners have been skipped, so DAS_tool cannot be run.")
sys.exit(1)
if len(binners) == 1:
logging.error("Only one binner has been specified, so DAS_tool cannot be run. Please specify at least two binners.")
sys.exit(1)

bin_definition_files = []
for binner, extension, bin_definition_file in binners:
extern.run(f'Fasta_to_Scaffolds2Bin.sh -i data/{binner} -e {extension} >{bin_definition_file} 2>> {logfile}')
if os.path.getsize(bin_definition_file) == 0:
logging.warning(f'Bin definition file {bin_definition_file} is empty, suggesting that {binner} failed or did not not create any output bins.')
else:
bin_definition_files.append(bin_definition_file)
logging.info("Bin definition files created: " + str(bin_definition_files))

scaffold2bin_files = ','.join(bin_definition_files)

das_tool_command = f'DAS_Tool --search_engine diamond --write_bin_evals 1 --write_bins 1 -t {snakemake.threads} --score_threshold -42 \
-i {scaffold2bin_files} \
-c {snakemake.input.fasta} \
-o data/das_tool_bins_pre_refine/das_tool >> {logfile} 2>&1'
logging.info("Running DAS_Tool with command: " + das_tool_command)
extern.run(das_tool_command)
2 changes: 1 addition & 1 deletion test/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def test_short_read_recovery_fast(self):
f"-1 {data}/wgsim.1.fq.gz "
f"-2 {data}/wgsim.2.fq.gz "
f"--skip-abundances "
f"--skip-binners concoct rosella vamb metabat maxbin "
f"--skip-binners concoct rosella vamb maxbin2 semibin "
f"--skip-qc "
f"--refinery-max-iterations 0 "
f"--conda-prefix {path_to_conda} "
Expand Down

0 comments on commit 1c352ad

Please sign in to comment.