Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

das_tool: Use script not shell. #187

Merged
merged 2 commits into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 3 additions & 20 deletions aviary/modules/binning/binning.smk
Original file line number Diff line number Diff line change
Expand Up @@ -629,32 +629,15 @@ rule das_tool:
mem_mb = lambda wildcards, attempt: min(int(config["max_memory"])*1024, 512*1024*attempt),
runtime = lambda wildcards, attempt: 12*60*attempt,
output:
das_tool_done = "data/das_tool_bins_pre_refine/done"
touch("data/das_tool_bins_pre_refine/done")
conda:
"envs/das_tool.yaml"
log:
"logs/das_tool.log"
benchmark:
"benchmarks/das_tool.benchmark.txt"
shell:
"""
Fasta_to_Scaffolds2Bin.sh -i data/metabat_bins_sspec -e fa > data/metabat_bins_sspec.tsv 2> {log};
Fasta_to_Scaffolds2Bin.sh -i data/metabat_bins_ssens -e fa > data/metabat_bins_ssens.tsv 2>> {log};
Fasta_to_Scaffolds2Bin.sh -i data/metabat_bins_sens -e fa > data/metabat_bins_sens.tsv 2>> {log};
Fasta_to_Scaffolds2Bin.sh -i data/metabat_bins_spec -e fa > data/metabat_bins_spec.tsv 2>> {log};
Fasta_to_Scaffolds2Bin.sh -i data/concoct_bins -e fa > data/concoct_bins.tsv 2>> {log};
Fasta_to_Scaffolds2Bin.sh -i data/maxbin2_bins -e fasta > data/maxbin_bins.tsv 2>> {log};
Fasta_to_Scaffolds2Bin.sh -i data/vamb_bins/bins -e fna > data/vamb_bins.tsv 2>> {log};
Fasta_to_Scaffolds2Bin.sh -i data/rosella_refined/final_bins/ -e fna > data/rosella_refined_bins.tsv 2>> {log};
Fasta_to_Scaffolds2Bin.sh -i data/metabat2_refined/final_bins/ -e fna > data/metabat2_refined_bins.tsv 2>> {log};
Fasta_to_Scaffolds2Bin.sh -i data/semibin_refined/final_bins/ -e fna > data/semibin_refined_bins.tsv 2>> {log};
scaffold2bin_files=$(find data/*bins*.tsv -not -empty -exec ls {{}} \; | tr "\n" ',' | sed "s/,$//g");
DAS_Tool --search_engine diamond --write_bin_evals 1 --write_bins 1 -t {threads} --score_threshold -42 \
-i $scaffold2bin_files \
-c {input.fasta} \
-o data/das_tool_bins_pre_refine/das_tool >> {log} 2>&1 && \
touch data/das_tool_bins_pre_refine/done
"""
script:
"scripts/das_tool.py"

rule refine_dastool:
input:
Expand Down
1 change: 1 addition & 0 deletions aviary/modules/binning/envs/das_tool.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ channels:
- bioconda
dependencies:
- das_tool = 1.1.2
- extern = 0.4.1
61 changes: 61 additions & 0 deletions aviary/modules/binning/scripts/das_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import logging
import os
import sys

import extern

if __name__ == '__main__':
unrefined_binners_to_use = [
('concoct', 'fa'),
('maxbin2', 'fasta'),
('vamb', 'fna')]
refined_binners_to_use = [
('rosella', 'fna'),
('semibin', 'fna')]

# N.B. specifying "metabat" will skip both MetaBAT1 and MetaBAT2.
metabats = ['metabat_sspec', 'metabat_ssens', 'metabat_sens', 'metabat_spec']

binners = []
for (binner, extension) in unrefined_binners_to_use:
if binner not in snakemake.config['skip_binners']:
binners.append((f'{binner}_bins/', extension, f'data/{binner}_bins.tsv'))

for (binner, extension) in refined_binners_to_use:
if binner not in snakemake.config['skip_binners']:
binners.append((binner+'_refined/final_bins/', extension, f'data/{binner}_refined_bins.tsv'))

for metabat in metabats:
if metabat not in snakemake.config['skip_binners']:
binners.append((metabat.replace('metabat','metabat_bins'), 'fa', f'data/{metabat}_bins.tsv'))
if 'metabat2' not in snakemake.config['skip_binners']:
binners.append(('metabat2_refined/final_bins/', 'fna', f'data/metabat2_refined_bins.tsv'))

logfile = snakemake.log[0]
logging.basicConfig(filename=logfile, level=logging.INFO)
logging.info("Using the following binners: " + str(binners))

if len(binners) == 0:
logging.error("All binners have been skipped, so DAS_tool cannot be run.")
sys.exit(1)
if len(binners) == 1:
rhysnewell marked this conversation as resolved.
Show resolved Hide resolved
logging.error("Only one binner has been specified, so DAS_tool cannot be run. Please specify at least two binners.")
sys.exit(1)

bin_definition_files = []
for binner, extension, bin_definition_file in binners:
extern.run(f'Fasta_to_Scaffolds2Bin.sh -i data/{binner} -e {extension} >{bin_definition_file} 2>> {logfile}')
if os.path.getsize(bin_definition_file) == 0:
logging.warning(f'Bin definition file {bin_definition_file} is empty, suggesting that {binner} failed or did not not create any output bins.')
else:
bin_definition_files.append(bin_definition_file)
logging.info("Bin definition files created: " + str(bin_definition_files))

scaffold2bin_files = ','.join(bin_definition_files)

das_tool_command = f'DAS_Tool --search_engine diamond --write_bin_evals 1 --write_bins 1 -t {snakemake.threads} --score_threshold -42 \
-i {scaffold2bin_files} \
-c {snakemake.input.fasta} \
-o data/das_tool_bins_pre_refine/das_tool >> {logfile} 2>&1'
logging.info("Running DAS_Tool with command: " + das_tool_command)
extern.run(das_tool_command)
2 changes: 1 addition & 1 deletion test/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def test_short_read_recovery_fast(self):
f"-1 {data}/wgsim.1.fq.gz "
f"-2 {data}/wgsim.2.fq.gz "
f"--skip-abundances "
f"--skip-binners concoct rosella vamb metabat maxbin "
f"--skip-binners concoct rosella vamb maxbin2 semibin "
f"--skip-qc "
f"--refinery-max-iterations 0 "
f"--conda-prefix {path_to_conda} "
Expand Down
Loading