Skip to content

Commit

Permalink
Merge pull request #129 from rhysnewell/add-resources
Browse files Browse the repository at this point in the history
Better snakemake cluster submission support
  • Loading branch information
AroneyS authored Sep 7, 2023
2 parents 2b0aaff + 0a2d7e0 commit de1a48a
Show file tree
Hide file tree
Showing 27 changed files with 1,205 additions and 683 deletions.
24 changes: 22 additions & 2 deletions aviary/aviary.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,23 @@ def main():
default=" "
)

base_group.add_argument(
'--snakemake-profile',
help='Snakemake profile (see https://snakemake.readthedocs.io/en/stable/executing/cli.html#profiles)\n'
'Create profile as `~/.config/snakemake/[CLUSTER_PROFILE]/config.yaml`. \n'
'Can be used to submit rules as jobs to cluster engine (see https://snakemake.readthedocs.io/en/stable/executing/cluster.html), \n'
'requires cluster, cluster-status, jobs, cluster-cancel. ',
dest='snakemake_profile',
default=""
)

base_group.add_argument(
'--cluster-retries',
help='Number of times to retry a failed job when using cluster submission (see `--snakemake-profile`). ',
dest='cluster_retries',
default=0
)

base_group.add_argument(
'--dry-run', '--dry_run', '--dryrun',
help='Perform snakemake dry run, tests workflow order and conda environments',
Expand Down Expand Up @@ -250,7 +267,7 @@ def main():
'--rerun-triggers', '--rerun_triggers',
help='Specify which kinds of modifications will trigger rules to rerun',\
dest='rerun_triggers',
default="mtime",
default=["mtime"],
nargs="*",
choices=["mtime","params","input","software-env","code"]
)
Expand Down Expand Up @@ -1182,7 +1199,10 @@ def main():
dryrun=args.dryrun,
clean=args.clean,
conda_frontend=args.conda_frontend,
snakemake_args=args.cmds)
snakemake_args=args.cmds,
rerun_triggers=args.rerun_triggers,
profile=args.snakemake_profile,
cluster_retries=args.cluster_retries)
else:
process_batch(args, prefix)

Expand Down
38 changes: 26 additions & 12 deletions aviary/modules/annotation/annotation.smk
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
localrules: download_databases, download_eggnog_db, download_gtdb, download_checkm2, annotate

onstart:
import os
Expand Down Expand Up @@ -133,15 +134,21 @@ rule checkm2:
mag_extension = config['mag_extension'],
checkm2_db_path = config["checkm2_db_folder"]
threads:
config["max_threads"]
min(config["max_threads"], 16)
resources:
mem_mb = lambda wildcards, attempt: min(int(config["max_memory"])*1024, 128*1024*attempt),
runtime = lambda wildcards, attempt: 8*60*attempt,
log:
'logs/checkm2.log'
benchmark:
'benchmarks/checkm2.benchmark.txt'
conda:
"../../envs/checkm2.yaml"
shell:
'export CHECKM2DB={params.checkm2_db_path}/uniref100.KO.1.dmnd; '
'echo "Using CheckM2 database $CHECKM2DB"; '
'echo "Using CheckM2 database $CHECKM2DB" > {log}; '
'checkm2 predict -i {input.mag_folder}/ -x {params.mag_extension} -o {output.checkm2_folder} -t {threads} --force'
'>> {log} 2>&1 '

rule eggnog:
input:
Expand All @@ -151,13 +158,15 @@ rule eggnog:
mag_extension = config['mag_extension'],
eggnog_db = config['eggnog_folder'],
tmpdir = config["tmpdir"]
resources:
mem_mb=int(config["max_memory"])*512
group: 'annotation'
output:
done = 'data/eggnog/done'
threads:
config['max_threads']
min(config["max_threads"], 64)
resources:
mem_mb = lambda wildcards, attempt: min(int(config["max_memory"])*1024, 512*1024*attempt),
runtime = lambda wildcards, attempt: 24*60*attempt,
log:
'logs/eggnog.log'
benchmark:
'benchmarks/eggnog.benchmark.txt'
conda:
Expand All @@ -167,31 +176,36 @@ rule eggnog:
'mkdir -p data/eggnog/; '
'find {input.mag_folder}/*.{params.mag_extension} | parallel -j1 \'emapper.py --data_dir {params.eggnog_db} '
'--dmnd_db {params.eggnog_db}/*dmnd --cpu {threads} -m diamond --itype genome --genepred prodigal -i {{}} '
'--output_dir data/eggnog/ --temp_dir {params.tmpdir} -o {{/.}} || echo "Genome already annotated"\'; '
'--output_dir data/eggnog/ --temp_dir {params.tmpdir} -o {{/.}} || echo "Genome already annotated"\' '
'> {log} 2>&1; '
'touch data/eggnog/done; '

rule gtdbtk:
input:
mag_folder = config['mag_directory']
group: 'annotation'
output:
done = "data/gtdbtk/done"
params:
gtdbtk_folder = config['gtdbtk_folder'],
pplacer_threads = config["pplacer_threads"],
extension = config['mag_extension']
resources:
mem_mb=int(config["max_memory"])*1024
conda:
"../../envs/gtdbtk.yaml"
threads:
config["max_threads"]
min(config["max_threads"], 32)
resources:
mem_mb = lambda wildcards, attempt: min(int(config["max_memory"])*1024, 256*1024*attempt),
runtime = lambda wildcards, attempt: 12*60*attempt,
log:
'logs/gtdbtk.log'
benchmark:
'benchmarks/gtdbtk.benchmark.txt'
shell:
"export GTDBTK_DATA_PATH={params.gtdbtk_folder} && "
"gtdbtk classify_wf --skip_ani_screen --cpus {threads} --pplacer_cpus {params.pplacer_threads} --extension {params.extension} "
"--genome_dir {input.mag_folder} --out_dir data/gtdbtk && touch data/gtdbtk/done"
"--genome_dir {input.mag_folder} --out_dir data/gtdbtk "
"> {log} 2>&1 "
"&& touch data/gtdbtk/done"

rule annotate:
input:
Expand Down
Loading

0 comments on commit de1a48a

Please sign in to comment.