diff --git a/README.md b/README.md index e1691e10..127d25a7 100644 --- a/README.md +++ b/README.md @@ -141,6 +141,7 @@ These environment variables can also be configured manually, just set the follow ``` export GTDBTK_DATA_PATH=/path/to/gtdb/gtdb_release207/db/ # https://gtdb.ecogenomic.org/downloads export EGGNOG_DATA_DIR=/path/to/eggnog-mapper/2.1.8/ # https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.8#setup +export SINGLEM_METAPACKAGE_PATH=/path/to/singlem_metapackage.smpkg/ export CHECKM2DB=/path/to/checkm2db/ export CONDA_ENV_PATH=/path/to/conda/envs/ ``` diff --git a/aviary/aviary.py b/aviary/aviary.py index 5adeba07..c1c62eff 100644 --- a/aviary/aviary.py +++ b/aviary/aviary.py @@ -266,11 +266,11 @@ def main(): base_group.add_argument( '--download', '--download', - help='Downloads the required GTDB, EggNOG, & CheckM2 databases if required', - type=str2bool, - nargs='?', - const=True, + help='Downloads the requested GTDB, EggNOG, SingleM, & CheckM2 databases', dest='download', + default=[], + nargs="*", + choices=["gtdb", "eggnog", "singlem", "checkm2"] ) base_group.add_argument( diff --git a/aviary/modules/annotation/annotation.smk b/aviary/modules/annotation/annotation.smk index 31e1d40a..0498c928 100644 --- a/aviary/modules/annotation/annotation.smk +++ b/aviary/modules/annotation/annotation.smk @@ -40,9 +40,10 @@ if config['mag_extension'] == 'none': rule download_databases: input: - 'logs/download_gtdb.log', - 'logs/download_eggnog.log', - 'logs/download_checkm2.log' + 'logs/download_gtdb.log' if "gtdb" in config["download"] else [], + 'logs/download_eggnog.log' if "eggnog" in config["download"] else [], + 'logs/download_singlem.log' if "singlem" in config["download"] else [], + 'logs/download_checkm2.log' if "checkm2" in config["download"] else [], threads: 1 log: temp("logs/download.log") @@ -111,6 +112,18 @@ rule download_gtdb: ' echo "[INFO] - Conda not found in PATH, please be sure to set the TARGET_DIR envrionment variable"; ' 'fi; ' +rule download_singlem_metapackage: + params: + metapackage_folder = os.path.expanduser(config['singlem_metapackage']) + conda: + "../../envs/singlem.yaml" + threads: 1 + log: + 'logs/download_singlem.log' + shell: + 'singlem data --output-directory {params.metapackage_folder}_tmp 2> {log} && ' + 'mv {params.metapackage_folder}_tmp/*.smpkg.zb/payload_directory {params.metapackage_folder}' + rule download_checkm2: params: checkm2_folder = os.path.expanduser(config['checkm2_db_folder']) diff --git a/aviary/modules/processor.py b/aviary/modules/processor.py index 9a48e79c..2523f0c3 100644 --- a/aviary/modules/processor.py +++ b/aviary/modules/processor.py @@ -235,6 +235,8 @@ def __init__(self, except AttributeError: self.mag_directory = 'none' + self.download = args.download + try: if args.gtdb_path is not None: self.gtdbtk = args.gtdb_path @@ -374,6 +376,7 @@ def make_config(self): conf["long_contig_size"] = self.long_contig_size conf["min_contig_size"] = int(self.min_contig_size) conf["min_bin_size"] = int(self.min_bin_size) + conf["download"] = self.download conf["gtdbtk_folder"] = self.gtdbtk conf["eggnog_folder"] = self.eggnog conf["singlem_metapackage"] = self.singlem diff --git a/docs/examples.md b/docs/examples.md index 033eea8a..cb9ed843 100755 --- a/docs/examples.md +++ b/docs/examples.md @@ -106,6 +106,9 @@ use the the `aviary configure` module to update the environment variables used b These environment variables can also be configured manually, just set the following variables in your `.bashrc` file: ``` GTDBTK_DATA_PATH +EGGNOG_DATA_DIR +SINGLEM_METAPACKAGE_PATH +CHECKM2DB CONDA_ENV_PATH ``` diff --git a/docs/faqs.md b/docs/faqs.md index a3451be1..e3d5ec16 100755 --- a/docs/faqs.md +++ b/docs/faqs.md @@ -66,6 +66,8 @@ the `activate.d/aviary.sh` or `.bashrc` files changing the specific paths: ``` export GTDBTK_DATA_PATH=/path/to/gtdb/gtdb_release207/db/ # https://gtdb.ecogenomic.org/downloads export EGGNOG_DATA_DIR=/path/to/eggnog-mapper/2.1.7/ # https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.7#setup +export SINGLEM_METAPACKAGE_PATH=/path/to/singlem_metapackage.smpkg/ +export CHECKM2DB=/path/to/checkm2db/ export CONDA_ENV_PATH=/path/to/conda/envs/ ``` diff --git a/docs/installation.md b/docs/installation.md index 2093c0af..b0214672 100755 --- a/docs/installation.md +++ b/docs/installation.md @@ -128,5 +128,7 @@ These environment variables can also be configured manually, just set the follow ``` export GTDBTK_DATA_PATH=/path/to/gtdb/gtdb_release207/db/ # https://gtdb.ecogenomic.org/downloads export EGGNOG_DATA_DIR=/path/to/eggnog-mapper/2.1.7/ # https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.7#setup +export SINGLEM_METAPACKAGE_PATH=/path/to/singlem_metapackage.smpkg/ +export CHECKM2DB=/path/to/checkm2db/ export CONDA_ENV_PATH=/path/to/conda/envs/ ```