Skip to content

Commit

Permalink
Merge pull request #196 from rhysnewell/dev
Browse files Browse the repository at this point in the history
release v0.9.0
  • Loading branch information
rhysnewell authored Mar 12, 2024
2 parents fafa03e + 7195d25 commit 14c7092
Show file tree
Hide file tree
Showing 27 changed files with 910 additions and 354 deletions.
5 changes: 3 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@ authors:
given-names: Ben J.
orcid: https://orcid.org/0000-0003-0670-7480
title: "Aviary: Hybrid assembly and genome recovery from metagenomes with Aviary"
version: 0.8.2
date-released: 2023-11-05
version: 0.8.3
doi: 10.5281/zenodo.10158087
date-released: 2023-11-20
34 changes: 16 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/aviary/README.html)
![](https://anaconda.org/bioconda/aviary/badges/license.svg)
![](https://anaconda.org/bioconda/aviary/badges/version.svg)
![](https://anaconda.org/bioconda/aviary/badges/latest_release_relative_date.svg)
![](https://anaconda.org/bioconda/aviary/badges/platforms.svg)
[![DOI](https://zenodo.org/badge/271448699.svg)](https://zenodo.org/doi/10.5281/zenodo.10158086)


![](docs/_include/images/aviary_logo.png)

# Aviary
Expand Down Expand Up @@ -57,7 +65,12 @@ conda env create -n aviary -f aviary.yml
conda activate aviary
pip install -e .
```
The `aviary` executable can then be run from any directory. Since the code in
this directory is then used for running, any updates made there will be
immediately available. We recommend this mode for developing and debugging
aviary.

## Checking installation
Whatever option you choose, running `aviary --help` should return the following
output:

Expand Down Expand Up @@ -86,22 +99,6 @@ Utility modules:
```

Upon first running aviary you will be prompted to input the location for where you would like
your conda environments to be stored, the GTDB release installed on your system, the location of your
EnrichM database, and the location of your BUSCO database. These locations will be stored as environment
variables, but for aviary to be able to use those environment variables you will have to either source your .bashrc
or reactivate your conda environment depending on whether you installed aviary within a conda environment or not:

```
conda deactivate; conda activate aviary
OR
source ~/.bashrc
```

These environment variables can be reset using `aviary configure`

## Databases

Aviary uses programs which require access to locally stored databases.
Expand All @@ -111,7 +108,7 @@ The **required** databases are as follows:
* [GTDB](https://gtdb.ecogenomic.org/downloads)
* [EggNog](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.8#setup)
* [CheckM2](https://github.com/chklovski/CheckM2)

* [SingleM](https://wwood.github.io/singlem/)

### Installing databases

Expand All @@ -121,7 +118,7 @@ ask you to set these environment variables upon first running and if they are no
the `aviary configure` subcommand to reset the environment variables:

```commandline
aviary configure -o logs/ --eggnog-db-path /shared/db/eggnog/ --gtdb-path /shared/db/gtdb/ --checkm2-db-path /shared/db/checkm2db/ --download
aviary configure -o logs/ --eggnog-db-path /shared/db/eggnog/ --gtdb-path /shared/db/gtdb/ --checkm2-db-path /shared/db/checkm2db/ --singlem-metapackage-path /shared/db/singlem/ --download
```

This command will check if the databases exist at those given locations, if they don't then aviary will download and change
Expand All @@ -141,6 +138,7 @@ These environment variables can also be configured manually, just set the follow
```
export GTDBTK_DATA_PATH=/path/to/gtdb/gtdb_release207/db/ # https://gtdb.ecogenomic.org/downloads
export EGGNOG_DATA_DIR=/path/to/eggnog-mapper/2.1.8/ # https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.8#setup
export SINGLEM_METAPACKAGE_PATH=/path/to/singlem_metapackage.smpkg/
export CHECKM2DB=/path/to/checkm2db/
export CONDA_ENV_PATH=/path/to/conda/envs/
```
Expand Down
2 changes: 1 addition & 1 deletion aviary/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.8.3"
__version__ = "0.9.0"
171 changes: 83 additions & 88 deletions aviary/aviary.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,17 @@ def str2bool(v):
else:
raise argparse.ArgumentTypeError('Boolean value expected.')

def add_workflow_arg(parser, default, help=None):
if help is None:
help = 'Main workflow to run. This is the snakemake target rule to run.'
parser.add_argument(
'-w', '--workflow',
help=help,
dest='workflow',
nargs="+",
default=default,
)

def main():
if len(sys.argv) == 1 or sys.argv[1] == '-h' or sys.argv[1] == '--help':
phelp()
Expand Down Expand Up @@ -138,21 +149,14 @@ def main():

base_group.add_argument(
'-t', '--max-threads', '--max_threads',
help='Maximum number of threads given to any particular process',
help='Maximum number of threads given to any particular process. If max_threads > n_cores then n_cores will be bumped up to max_threads. Useful if you want more fine grain control over the number of threads used by each process.',
dest='max_threads',
default=8,
)

base_group.add_argument(
'-p', '--pplacer-threads', '--pplacer_threads',
help=argparse.SUPPRESS,
dest='pplacer_threads',
default=8,
)

base_group.add_argument(
'-n', '--n-cores', '--n_cores',
help='Maximum number of cores available for use. Must be >= to max_threads',
help='Maximum number of cores available for use. Setting to multiples of max_threads will allow for multiple processes to be run in parallel.',
dest='n_cores',
default=16,
)
Expand Down Expand Up @@ -266,11 +270,11 @@ def main():

base_group.add_argument(
'--download', '--download',
help='Downloads the required GTDB, EggNOG, & CheckM2 databases if required',
type=str2bool,
nargs='?',
const=True,
help='Downloads the requested GTDB, EggNOG, SingleM, & CheckM2 databases',
dest='download',
default=[],
nargs="*",
choices=["gtdb", "eggnog", "singlem", "checkm2"]
)

base_group.add_argument(
Expand Down Expand Up @@ -594,23 +598,65 @@ def main():
default=3
)

binning_group.add_argument(
'--extra-binners', '--extra_binners', '--extra-binner', '--extra_binner',
help='Optional list of extra binning algorithms to run. Can be any combination of: \n'
'maxbin, maxbin2, concoct \n'
'These binners are skipped by default as they can have long runtimes \n'
'N.B. specifying "maxbin" and "maxbin2" are equivalent \n',
dest='extra_binners',
nargs='*',
choices=["maxbin", "maxbin2", "concoct"]
)

binning_group.add_argument(
'--skip-binners', '--skip_binners', '--skip_binner', '--skip-binner',
help='Optional list of binning algorithms to skip. Can be any combination of: \n'
'rosella, semibin, metabat1, metabat2, metabat, vamb, concoct, maxbin2, maxbin \n'
'Capitals will be auto-corrected. N.B. specifying "metabat" will skip both \n'
'MetaBAT1 and MetaBAT2.',
'rosella, semibin, metabat1, metabat2, metabat, vamb \n'
'N.B. specifying "metabat" will skip both MetaBAT1 and MetaBAT2. \n',
dest='skip_binners',
nargs='*'
# default=["maxbin2"]
nargs='*',
choices=["rosella", "semibin", "metabat1", "metabat2", "metabat", "vamb"]
)

binning_group.add_argument(
'--binning-only', '--binning_only',
help='Only run up to the binning stage. Do not run SingleM, GTDB-tk, or CoverM',
type=str2bool,
nargs='?',
const=True,
dest='binning_only',
default=False,
)

binning_group.add_argument(
'--skip-abundances', '--skip_abundances',
help='Skip CoverM post-binning abundance calculations.',
dest='skip_abundances',
type=str2bool,
nargs='?',
const=True,
default=False,
)

binning_group.add_argument(
'--skip-taxonomy', '--skip_taxonomy',
help='Skip GTDB-tk post-binning taxonomy assignment.',
dest='skip_taxonomy',
type=str2bool,
nargs='?',
const=True,
default=False,
)

binning_group.add_argument(
'--skip-singlem', '--skip_singlem',
help='Skip SingleM post-binning recovery assessment.',
dest='skip_singlem',
type=str2bool,
nargs='?',
const=True,
default=False,
action="store_true",
)

####################################################################
Expand Down Expand Up @@ -839,13 +885,7 @@ def main():



assemble_options.add_argument(
'-w', '--workflow',
help='Main workflow to run',
dest='workflow',
nargs="+",
default=['complete_assembly_with_qc'],
)
add_workflow_arg(assemble_options, ['complete_assembly_with_qc'])

########################## ~ RECOVER ~ ###########################

Expand All @@ -869,13 +909,7 @@ def main():
required=False,
)

recover_options.add_argument(
'-w', '--workflow',
help='Main workflow to run',
dest='workflow',
nargs="+",
default=['recover_mags'],
)
add_workflow_arg(recover_options, ['recover_mags'])

recover_options.add_argument(
'--perform-strain-analysis', '--perform_strain_analysis',
Expand Down Expand Up @@ -909,13 +943,7 @@ def main():
required=False,
)

annotate_options.add_argument(
'-w', '--workflow',
help='Main workflow to run',
dest='workflow',
nargs="+",
default=['annotate'],
)
add_workflow_arg(annotate_options, ['annotate'])

########################## ~ diversity ~ ###########################

Expand All @@ -940,13 +968,7 @@ def main():
required=False,
)

diversity_options.add_argument(
'-w', '--workflow',
help='Main workflow to run',
dest='workflow',
nargs="+",
default=['lorikeet'],
)
add_workflow_arg(diversity_options, ['lorikeet'])

diversity_options.add_argument(
'--perform-strain-analysis', '--perform_strain_analysis',
Expand Down Expand Up @@ -982,13 +1004,7 @@ def main():
required=True,
)

cluster_options.add_argument(
'-w', '--workflow',
help='Main workflow to run',
dest='workflow',
nargs="+",
default=['complete_cluster'],
)
add_workflow_arg(cluster_options, ['complete_cluster'])

########################## ~ VIRAL ~ ###########################

Expand All @@ -1004,13 +1020,7 @@ def main():
''')

viral_options.add_argument(
'-w', '--workflow',
help='Main workflow to run',
dest='workflow',
nargs="+",
default=['create_webpage_genotype'],
)
add_workflow_arg(viral_options, ['create_webpage_genotype'])

########################## ~ COMPLETE ~ ###########################

Expand All @@ -1035,13 +1045,7 @@ def main():
required=False,
)

complete_options.add_argument(
'-w', '--workflow',
help='Main workflow to run',
dest='workflow',
nargs="+",
default=['get_bam_indices', 'recover_mags', 'annotate', 'lorikeet'],
)
add_workflow_arg(complete_options, ['get_bam_indices', 'recover_mags', 'annotate', 'lorikeet'])

########################## ~ ISOLATE ~ ###########################

Expand All @@ -1057,13 +1061,7 @@ def main():
''')

isolate_options.add_argument(
'-w', '--workflow',
help='Main workflows to run',
dest='workflow',
nargs="+",
default=['circlator'],
)
add_workflow_arg(isolate_options, ['circlator'])

########################## ~ BATCH ~ ###########################

Expand Down Expand Up @@ -1138,12 +1136,10 @@ def main():
default='95'
)

batch_options.add_argument(
'-w', '--workflow',
help='Main workflow to run for each sample',
dest='workflow',
nargs="+",
default=['get_bam_indices', 'recover_mags', 'annotate', 'lorikeet'],
add_workflow_arg(
batch_options,
['get_bam_indices', 'recover_mags', 'annotate', 'lorikeet'],
help='Main workflow (snakemake target rule) to run for each sample'
)

########################## ~ configure ~ ###########################
Expand Down Expand Up @@ -1195,13 +1191,7 @@ def main():
required=False,
)

configure_options.add_argument(
'-w', '--workflow',
help=argparse.SUPPRESS,
dest='workflow',
nargs="+",
default=['download_databases'],
)
add_workflow_arg(configure_options, ['download_databases'], help=argparse.SUPPRESS)

###########################################################################
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
Expand Down Expand Up @@ -1261,6 +1251,9 @@ def main():

# else:
args = manage_env_vars(args)
if int(args.max_threads) > int(args.n_cores):
args.n_cores = args.max_threads

prefix = args.output
if not os.path.exists(prefix):
os.makedirs(prefix)
Expand Down Expand Up @@ -1307,6 +1300,8 @@ def manage_env_vars(args):
args.eggnog_db_path = Config.get_software_db_path('EGGNOG_DATA_DIR', '--eggnog-db-path')
if args.checkm2_db_path is None:
args.checkm2_db_path = Config.get_software_db_path('CHECKM2DB', '--checkm2-db-path')
if args.singlem_metapackage_path is None:
args.singlem_db_path = Config.get_software_db_path('SINGLEM_METAPACKAGE_PATH', '--singlem-metapackage-path')
except AttributeError:
pass

Expand Down
Loading

0 comments on commit 14c7092

Please sign in to comment.