From 16f5b573d777d3af1167736489fbbf76d22aca40 Mon Sep 17 00:00:00 2001 From: AroneyS Date: Thu, 18 Apr 2024 13:00:25 +1000 Subject: [PATCH] fix tmpdir being set from base command --- aviary/aviary.py | 6 +----- aviary/modules/annotation/annotation.smk | 2 +- aviary/modules/assembly/assembly.smk | 6 +++--- .../assembly/scripts/assemble_short_reads.py | 13 +++++++++++-- aviary/modules/binning/scripts/get_coverage.py | 3 ++- aviary/modules/processor.py | 5 +++-- aviary/modules/strain_analysis/strain_analysis.smk | 1 - config.yaml | 2 +- 8 files changed, 22 insertions(+), 16 deletions(-) diff --git a/aviary/aviary.py b/aviary/aviary.py index 0364ab62..e3da82f7 100755 --- a/aviary/aviary.py +++ b/aviary/aviary.py @@ -35,7 +35,6 @@ import logging import os from datetime import datetime -import tempfile # Debug debug={1:logging.CRITICAL, @@ -198,7 +197,7 @@ def main(): help='Path to the location that will be treated used for temporary files. If none is specified, the TMPDIR \n' 'environment variable will be used. Can be configured within the `configure` subcommand', dest='tmpdir', - default=tempfile.gettempdir(), + default=None, ) base_group.add_argument( @@ -1290,9 +1289,6 @@ def manage_env_vars(args): if args.conda_prefix is None: args.conda_prefix = Config.get_software_db_path('CONDA_ENV_PATH', '--conda-prefix') - if args.tmpdir is None: - args.tmpdir = Config.get_software_db_path('TMPDIR', '--tmpdir') - try: if args.gtdb_path is None: args.gtdb_path = Config.get_software_db_path('GTDBTK_DATA_PATH', '--gtdb-path') diff --git a/aviary/modules/annotation/annotation.smk b/aviary/modules/annotation/annotation.smk index 99190c93..7e303163 100644 --- a/aviary/modules/annotation/annotation.smk +++ b/aviary/modules/annotation/annotation.smk @@ -171,7 +171,7 @@ rule eggnog: params: mag_extension = config['mag_extension'], eggnog_db = config['eggnog_folder'], - tmpdir = config["tmpdir"] + tmpdir = config["tmpdir"] if config["tmpdir"] else "$TMPDIR", output: done = 'data/eggnog/done' threads: diff --git a/aviary/modules/assembly/assembly.smk b/aviary/modules/assembly/assembly.smk index 00b1f258..acaf41c3 100644 --- a/aviary/modules/assembly/assembly.smk +++ b/aviary/modules/assembly/assembly.smk @@ -362,7 +362,7 @@ rule spades_assembly: max_memory = config["max_memory"], long_read_type = config["long_read_type"], kmer_sizes = " ".join(config["kmer_sizes"]), - tmpdir = config["tmpdir"] + tmpdir = config["tmpdir"] if config["tmpdir"] else "$TMPDIR" conda: "envs/spades.yaml" benchmark: @@ -458,14 +458,14 @@ rule spades_assembly_coverage: log: "logs/spades_assembly_coverage.log" params: - tmpdir = config["tmpdir"] + tmpdir = f"TMPDIR={config["tmpdir"]}" if config["tmpdir"] else "" conda: "../../envs/coverm.yaml" benchmark: "benchmarks/spades_assembly_coverage.benchmark.txt" shell: """ - TMPDIR={params.tmpdir} coverm contig -m metabat -t {threads} -r {input.fasta} --interleaved {input.fastq} --bam-file-cache-directory data/cached_bams/ > {output.assembly_cov} 2> {log}; + {params.tmpdir} coverm contig -m metabat -t {threads} -r {input.fasta} --interleaved {input.fastq} --bam-file-cache-directory data/cached_bams/ > {output.assembly_cov} 2> {log}; mv data/cached_bams/*.bam {output.bam} && samtools index -@ {threads} {output.bam} 2>> {log} """ diff --git a/aviary/modules/assembly/scripts/assemble_short_reads.py b/aviary/modules/assembly/scripts/assemble_short_reads.py index ed72caa4..c54ad4dd 100644 --- a/aviary/modules/assembly/scripts/assemble_short_reads.py +++ b/aviary/modules/assembly/scripts/assemble_short_reads.py @@ -29,6 +29,15 @@ def assemble_short_reads( :return: ''' + if not tmp_dir: + try: + tmp_dir = os.environ["TMPDIR"] + except KeyError: + tmp_dir_arg = "" + + if tmp_dir: + tmp_dir_arg = f"--tmp-dir {tmp_dir}" + # deal with read sets i.e. are we coassembling? Which assembler are we using? # Non co-assembled reads are handled the same for each assembler if read_set1 != 'none': @@ -86,7 +95,7 @@ def assemble_short_reads( # Run chosen assembler if use_megahit: max_memory_in_bytes = max_memory * 1024*1024*1024 - command = f"megahit {read_string} -t {threads} -m {max_memory_in_bytes} -o data/megahit_assembly --tmp-dir {tmp_dir}" + command = f"megahit {read_string} -t {threads} -m {max_memory_in_bytes} -o data/megahit_assembly {tmp_dir_arg}" with open(log, 'a') as logf: logf.write(f"Queueing command {command}\n") @@ -97,7 +106,7 @@ def assemble_short_reads( else: kmers = " ".join(kmer_sizes) command = f"spades.py --memory {max_memory} --meta -t {threads} " \ - f"-o data/short_read_assembly {read_string} -k {kmers} --tmp-dir {tmp_dir}" + f"-o data/short_read_assembly {read_string} -k {kmers} {tmp_dir_arg}" with open(log, 'a') as logf: logf.write(f"Queueing command {command}\n") subprocess.run(command.split(), stdout=logf, stderr=subprocess.STDOUT) diff --git a/aviary/modules/binning/scripts/get_coverage.py b/aviary/modules/binning/scripts/get_coverage.py index 2f093e8e..8fb7453b 100644 --- a/aviary/modules/binning/scripts/get_coverage.py +++ b/aviary/modules/binning/scripts/get_coverage.py @@ -12,7 +12,8 @@ def get_coverage( threads: int, log: str, ): - os.environ["TMPDIR"] = tmpdir + if tmpdir: os.environ["TMPDIR"] = tmpdir + if long_reads != "none" and not os.path.exists("data/long_cov.tsv"): if long_read_type in ["ont", "ont_hq"]: coverm_cmd = f"coverm contig -t {threads} -r {input_fasta} --single {' '.join(long_reads)} -p minimap2-ont -m length trimmed_mean variance --bam-file-cache-directory data/binning_bams/ --discard-unmapped --min-read-percent-identity 0.85 --output-file data/long_cov.tsv".split() diff --git a/aviary/modules/processor.py b/aviary/modules/processor.py index 57df8d40..ac156e9f 100644 --- a/aviary/modules/processor.py +++ b/aviary/modules/processor.py @@ -92,7 +92,7 @@ def __init__(self, self.conda_prefix = args.conda_prefix - self.tmpdir = os.path.abspath(args.tmpdir) + self.tmpdir = os.path.abspath(args.tmpdir) if args.tmpdir else None self.resources = args.resources self.output = os.path.abspath(args.output) self.threads = args.max_threads @@ -444,7 +444,8 @@ def run_workflow(self, cores=16, profile=None, cluster_retries=None, self._validate_config() cores = max(int(self.threads), cores) - os.environ["TMPDIR"] = self.tmpdir + if self.tmpdir is not None: + os.environ["TMPDIR"] = self.tmpdir for workflow in self.workflows: cmd = ( "snakemake --snakefile {snakefile} --directory {working_dir} " diff --git a/aviary/modules/strain_analysis/strain_analysis.smk b/aviary/modules/strain_analysis/strain_analysis.smk index be5e41d8..463f4016 100644 --- a/aviary/modules/strain_analysis/strain_analysis.smk +++ b/aviary/modules/strain_analysis/strain_analysis.smk @@ -65,7 +65,6 @@ rule lorikeet: params: mag_extension = config['mag_extension'], parallel_genomes = 8, - tmpdir = config['tmpdir'] resources: mem_mb=int(config["max_memory"])*512 threads: diff --git a/config.yaml b/config.yaml index 1697ff82..f8b5a9db 100644 --- a/config.yaml +++ b/config.yaml @@ -50,4 +50,4 @@ ani: none precluster_ani: none precluster_method: none pggb_params: none -tmpdir: /tmp +tmpdir: none