Skip to content

Commit

Permalink
Merge pull request #142 from rhysnewell/dev
Browse files Browse the repository at this point in the history
Merge Dev into Main
  • Loading branch information
rhysnewell authored Sep 13, 2023
2 parents 83a2518 + 2c61ba1 commit 031f049
Show file tree
Hide file tree
Showing 38 changed files with 1,781 additions and 1,009 deletions.
5 changes: 1 addition & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,11 @@ that can seamlessly communicate with each other. Each module can be run independ

# Quick Installation

Your conda channels should be configured ideally in this order with strict channel priority order
turned on:
Your conda channels should be configured ideally in this order:
```
conda config --add channels defaults
conda config --add channels bioconda
conda config --add channels conda-forge
conda config --set channel_priority strict
```

Your resulting `.condarc` file should look something like:
Expand All @@ -25,7 +23,6 @@ channels:
- conda-forge
- bioconda
- defaults
channel_priority: strict
```

#### Option 1) Install from Bioconda
Expand Down
2 changes: 1 addition & 1 deletion aviary.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ channels:
- anaconda
dependencies:
- python>=3.8
- snakemake>=6.0.5,<=7.17
- snakemake>=6.0.5,<=7.32.3
- ruamel.yaml>=0.15.99 # needs to be explicit
- numpy
- pandas
Expand Down
2 changes: 1 addition & 1 deletion aviary/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.7.2"
__version__ = "0.8.0"
74 changes: 55 additions & 19 deletions aviary/aviary.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,16 @@ def main():
default=250,
)

base_group.add_argument(
'--request-gpu', '--request_gpu',
help='Request a GPU for use with the pipeline. This will only work if the pipeline is run on a cluster',
type=str2bool,
nargs='?',
const=True,
dest='request_gpu',
default=False,
)

base_group.add_argument(
'-o', '--output',
help='Output directory',
Expand Down Expand Up @@ -195,6 +205,23 @@ def main():
default=" "
)

base_group.add_argument(
'--snakemake-profile',
help='Snakemake profile (see https://snakemake.readthedocs.io/en/stable/executing/cli.html#profiles)\n'
'Create profile as `~/.config/snakemake/[CLUSTER_PROFILE]/config.yaml`. \n'
'Can be used to submit rules as jobs to cluster engine (see https://snakemake.readthedocs.io/en/stable/executing/cluster.html), \n'
'requires cluster, cluster-status, jobs, cluster-cancel. ',
dest='snakemake_profile',
default=""
)

base_group.add_argument(
'--cluster-retries',
help='Number of times to retry a failed job when using cluster submission (see `--snakemake-profile`). ',
dest='cluster_retries',
default=0
)

base_group.add_argument(
'--dry-run', '--dry_run', '--dryrun',
help='Perform snakemake dry run, tests workflow order and conda environments',
Expand Down Expand Up @@ -250,7 +277,7 @@ def main():
'--rerun-triggers', '--rerun_triggers',
help='Specify which kinds of modifications will trigger rules to rerun',\
dest='rerun_triggers',
default="mtime",
default=["mtime"],
nargs="*",
choices=["mtime","params","input","software-env","code"]
)
Expand Down Expand Up @@ -286,32 +313,43 @@ def main():

qc_group.add_argument(
'-r', '--reference-filter', '--reference_filter',
help='Reference filter file to aid in the assembly',
help='One or more reference filter files to aid in the assembly. Remove contaminant reads from the assembly.',
dest="reference_filter",
default='none'
nargs='*',
default=['none']
)

qc_group.add_argument(
'--min-read-size', '--min_read_size',
help='Minimum long read size when filtering using Filtlong',
dest="min_read_size",
default=250
default=100
)

qc_group.add_argument(
'--min-mean-q', '--min_mean_q',
help='Minimum mean quality threshold',
dest="min_mean_q",
default=50
default=10
)

qc_group.add_argument(
'--keep-percent', '--keep_percent',
help='Percentage of reads passing quality thresholds kept by filtlong',
help='DEPRECATED: Percentage of reads passing quality thresholds kept by filtlong',
dest="keep_percent",
default=100
)

qc_group.add_argument(
'--skip-qc', '--skip_qc',
help='Skip quality control steps',
type=str2bool,
nargs='?',
const=True,
dest="skip_qc",
default=False
)


####################################################################

Expand All @@ -322,10 +360,9 @@ def main():
read_group_exclusive.add_argument(
'-1', '--pe-1', '--paired-reads-1', '--paired_reads_1', '--pe1',
help='A space separated list of forwards read files \n'
'NOTE: If performing assembly and multiple files and longreads \n'
' are provided then only the first file will be used for assembly. \n'
'NOTE: If performing assembly and multiple files are provided then only the first file will be used for assembly. \n'
' If no longreads are provided then all samples will be co-assembled \n'
' with megahit or metaspades depending on the --coassemble parameter\n',
' with megahit or metaspades depending on the --coassemble parameter',
dest='pe1',
nargs='*',
default="none"
Expand All @@ -334,8 +371,7 @@ def main():
short_read_group.add_argument(
'-2', '--pe-2', '--paired-reads-2', '--paired_reads_2', '--pe2',
help='A space separated list of reverse read files \n'
'NOTE: If performing assembly and multiple files and longreads \n'
' are provided then only the first file will be used for assembly. \n'
'NOTE: If performing assembly and multiple files are provided then only the first file will be used for assembly. \n'
' If no longreads are provided then all samples will be co-assembled \n'
' with megahit or metaspades depending on the --coassemble parameter',
dest='pe2',
Expand All @@ -346,8 +382,7 @@ def main():
read_group_exclusive.add_argument(
'-i','--interleaved',
help='A space separated list of interleaved read files \n'
'NOTE: If performing assembly and multiple files and longreads \n'
' are provided then only the first file will be used for assembly. \n'
'NOTE: If performing assembly and multiple files are provided then only the first file will be used for assembly. \n'
' If no longreads are provided then all samples will be co-assembled \n'
' with megahit or metaspades depending on the --coassemble parameter',
dest='interleaved',
Expand All @@ -358,8 +393,7 @@ def main():
read_group_exclusive.add_argument(
'-c', '--coupled',
help='Forward and reverse read files in a coupled space separated list. \n'
'NOTE: If performing assembly and multiple files and longreads \n'
' are provided then only the first file will be used for assembly. \n'
'NOTE: If performing assembly and multiple files are provided then only the first file will be used for assembly. \n'
' If no longreads are provided then all samples will be co-assembled \n'
' with megahit or metaspades depending on the --coassemble parameter',
dest='coupled',
Expand All @@ -382,8 +416,7 @@ def main():
long_read_group.add_argument(
'-l', '--longreads', '--long-reads', '--long_reads',
help='A space separated list of long-read read files. '
'NOTE: If performing assembly and multiple long read files are provided, \n'
' then only the first file is used for assembly. This behaviour might change in future.',
'NOTE: The first file will be used for assembly unless --coassemble is set to True. Then all files will be used.',
dest='longreads',
nargs='*',
default="none"
Expand Down Expand Up @@ -677,7 +710,7 @@ def main():
nargs='?',
const=True,
dest='coassemble',
default=True,
default=False,
)

assemble_group.add_argument(
Expand Down Expand Up @@ -1182,7 +1215,10 @@ def main():
dryrun=args.dryrun,
clean=args.clean,
conda_frontend=args.conda_frontend,
snakemake_args=args.cmds)
snakemake_args=args.cmds,
rerun_triggers=args.rerun_triggers,
profile=args.snakemake_profile,
cluster_retries=args.cluster_retries)
else:
process_batch(args, prefix)

Expand Down
2 changes: 2 additions & 0 deletions aviary/envs/coverm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ channels:
dependencies:
- coverm >= 0.6
- galah >= 0.3
- chopper >= 0.6
- pigz
- parallel
- dashing
- fastani
4 changes: 2 additions & 2 deletions aviary/modules/Snakefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
ruleorder: skip_long_assembly > get_reads_list_ref > link_reads > short_only
ruleorder: filtlong_no_reference > link_reads
# ruleorder: skip_long_assembly > get_reads_list_ref > link_reads > short_only
# ruleorder: filtlong_no_reference > link_reads

onsuccess:
print("Aviary finished, no error")
Expand Down
39 changes: 27 additions & 12 deletions aviary/modules/annotation/annotation.smk
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
localrules: download_databases, download_eggnog_db, download_gtdb, download_checkm2, annotate

onstart:
import os
Expand Down Expand Up @@ -133,15 +134,22 @@ rule checkm2:
mag_extension = config['mag_extension'],
checkm2_db_path = config["checkm2_db_folder"]
threads:
config["max_threads"]
min(config["max_threads"], 16)
resources:
mem_mb = lambda wildcards, attempt: min(int(config["max_memory"])*1024, 128*1024*attempt),
runtime = lambda wildcards, attempt: 8*60*attempt,
gpus = 1 if config["request_gpu"] else 0
log:
'logs/checkm2.log'
benchmark:
'benchmarks/checkm2.benchmark.txt'
conda:
"../../envs/checkm2.yaml"
shell:
'export CHECKM2DB={params.checkm2_db_path}/uniref100.KO.1.dmnd; '
'echo "Using CheckM2 database $CHECKM2DB"; '
'echo "Using CheckM2 database $CHECKM2DB" > {log}; '
'checkm2 predict -i {input.mag_folder}/ -x {params.mag_extension} -o {output.checkm2_folder} -t {threads} --force'
'>> {log} 2>&1 '

rule eggnog:
input:
Expand All @@ -151,13 +159,15 @@ rule eggnog:
mag_extension = config['mag_extension'],
eggnog_db = config['eggnog_folder'],
tmpdir = config["tmpdir"]
resources:
mem_mb=int(config["max_memory"])*512
group: 'annotation'
output:
done = 'data/eggnog/done'
threads:
config['max_threads']
min(config["max_threads"], 64)
resources:
mem_mb = lambda wildcards, attempt: min(int(config["max_memory"])*1024, 512*1024*attempt),
runtime = lambda wildcards, attempt: 24*60*attempt,
log:
'logs/eggnog.log'
benchmark:
'benchmarks/eggnog.benchmark.txt'
conda:
Expand All @@ -167,31 +177,36 @@ rule eggnog:
'mkdir -p data/eggnog/; '
'find {input.mag_folder}/*.{params.mag_extension} | parallel -j1 \'emapper.py --data_dir {params.eggnog_db} '
'--dmnd_db {params.eggnog_db}/*dmnd --cpu {threads} -m diamond --itype genome --genepred prodigal -i {{}} '
'--output_dir data/eggnog/ --temp_dir {params.tmpdir} -o {{/.}} || echo "Genome already annotated"\'; '
'--output_dir data/eggnog/ --temp_dir {params.tmpdir} -o {{/.}} || echo "Genome already annotated"\' '
'> {log} 2>&1; '
'touch data/eggnog/done; '

rule gtdbtk:
input:
mag_folder = config['mag_directory']
group: 'annotation'
output:
done = "data/gtdbtk/done"
params:
gtdbtk_folder = config['gtdbtk_folder'],
pplacer_threads = config["pplacer_threads"],
extension = config['mag_extension']
resources:
mem_mb=int(config["max_memory"])*1024
conda:
"../../envs/gtdbtk.yaml"
threads:
config["max_threads"]
min(config["max_threads"], 32)
resources:
mem_mb = lambda wildcards, attempt: min(int(config["max_memory"])*1024, 256*1024*attempt),
runtime = lambda wildcards, attempt: 12*60*attempt,
log:
'logs/gtdbtk.log'
benchmark:
'benchmarks/gtdbtk.benchmark.txt'
shell:
"export GTDBTK_DATA_PATH={params.gtdbtk_folder} && "
"gtdbtk classify_wf --skip_ani_screen --cpus {threads} --pplacer_cpus {params.pplacer_threads} --extension {params.extension} "
"--genome_dir {input.mag_folder} --out_dir data/gtdbtk && touch data/gtdbtk/done"
"--genome_dir {input.mag_folder} --out_dir data/gtdbtk "
"> {log} 2>&1 "
"&& touch data/gtdbtk/done"

rule annotate:
input:
Expand Down
Loading

0 comments on commit 031f049

Please sign in to comment.