Skip to content

Commit

Permalink
Merge pull request #200 from rhysnewell/fix-tmpdir
Browse files Browse the repository at this point in the history
fix tmpdir being set from base command
  • Loading branch information
AroneyS authored May 7, 2024
2 parents 9d97ef3 + 8d3898c commit 691941f
Show file tree
Hide file tree
Showing 9 changed files with 112 additions and 40 deletions.
6 changes: 1 addition & 5 deletions aviary/aviary.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import logging
import os
from datetime import datetime
import tempfile

# Debug
debug={1:logging.CRITICAL,
Expand Down Expand Up @@ -198,7 +197,7 @@ def main():
help='Path to the location that will be treated used for temporary files. If none is specified, the TMPDIR \n'
'environment variable will be used. Can be configured within the `configure` subcommand',
dest='tmpdir',
default=tempfile.gettempdir(),
default=None,
)

base_group.add_argument(
Expand Down Expand Up @@ -1279,9 +1278,6 @@ def manage_env_vars(args):
if args.conda_prefix is None:
args.conda_prefix = Config.get_software_db_path('CONDA_ENV_PATH', '--conda-prefix')

if args.tmpdir is None:
args.tmpdir = Config.get_software_db_path('TMPDIR', '--tmpdir')

try:
if args.gtdb_path is None:
args.gtdb_path = Config.get_software_db_path('GTDBTK_DATA_PATH', '--gtdb-path')
Expand Down
2 changes: 1 addition & 1 deletion aviary/modules/annotation/annotation.smk
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ rule eggnog:
params:
mag_extension = config['mag_extension'],
eggnog_db = config['eggnog_folder'],
tmpdir = config["tmpdir"]
tmpdir = config["tmpdir"] if config["tmpdir"] else "$TMPDIR",
output:
done = 'data/eggnog/done'
threads:
Expand Down
31 changes: 4 additions & 27 deletions aviary/modules/assembly/assembly.smk
Original file line number Diff line number Diff line change
Expand Up @@ -367,31 +367,8 @@ rule spades_assembly:
"envs/spades.yaml"
benchmark:
"benchmarks/spades_assembly.benchmark.txt"
shell:
"""
rm -rf data/spades_assembly/tmp;
minimumsize=500000;
actualsize=$(stat -c%s data/short_reads.filt.fastq.gz);
if [ -d "data/spades_assembly/" ]
then
spades.py --restart-from last --memory {params.max_memory} -t {threads} -o data/spades_assembly -k {params.kmer_sizes} --tmp-dir {params.tmpdir} > {log} 2>&1 && \
cp data/spades_assembly/scaffolds.fasta data/spades_assembly.fasta
elif [ $actualsize -ge $minimumsize ]
then
if [ {params.long_read_type} = "ont" ] || [ {params.long_read_type} = "ont_hq" ]
then
spades.py --checkpoints all --memory {params.max_memory} --meta --nanopore {input.long_reads} --12 {input.fastq} \
-o data/spades_assembly -t {threads} -k {params.kmer_sizes} --tmp-dir {params.tmpdir} >> {log} 2>&1 && \
cp data/spades_assembly/scaffolds.fasta data/spades_assembly.fasta
else
spades.py --checkpoints all --memory {params.max_memory} --meta --pacbio {input.long_reads} --12 {input.fastq} \
-o data/spades_assembly -t {threads} -k {params.kmer_sizes} --tmp-dir {params.tmpdir} >> {log} 2>&1 && \
cp data/spades_assembly/scaffolds.fasta data/spades_assembly.fasta
fi
else
mkdir -p {output.spades_folder} && touch {output.fasta}
fi
"""
script:
"scripts/spades_assembly.py"


# Perform short read assembly only with no other steps
Expand Down Expand Up @@ -458,14 +435,14 @@ rule spades_assembly_coverage:
log:
"logs/spades_assembly_coverage.log"
params:
tmpdir = config["tmpdir"]
tmpdir = f"TMPDIR={config['tmpdir']}" if config["tmpdir"] else ""
conda:
"../../envs/coverm.yaml"
benchmark:
"benchmarks/spades_assembly_coverage.benchmark.txt"
shell:
"""
TMPDIR={params.tmpdir} coverm contig -m metabat -t {threads} -r {input.fasta} --interleaved {input.fastq} --bam-file-cache-directory data/cached_bams/ > {output.assembly_cov} 2> {log};
{params.tmpdir} coverm contig -m metabat -t {threads} -r {input.fasta} --interleaved {input.fastq} --bam-file-cache-directory data/cached_bams/ > {output.assembly_cov} 2> {log};
mv data/cached_bams/*.bam {output.bam} && samtools index -@ {threads} {output.bam} 2>> {log}
"""

Expand Down
13 changes: 11 additions & 2 deletions aviary/modules/assembly/scripts/assemble_short_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,15 @@ def assemble_short_reads(
:return:
'''

if not tmp_dir:
try:
tmp_dir = os.environ["TMPDIR"]
except KeyError:
tmp_dir_arg = ""

if tmp_dir:
tmp_dir_arg = f"--tmp-dir {tmp_dir}"

# deal with read sets i.e. are we coassembling? Which assembler are we using?
# Non co-assembled reads are handled the same for each assembler
if read_set1 != 'none':
Expand Down Expand Up @@ -86,7 +95,7 @@ def assemble_short_reads(
# Run chosen assembler
if use_megahit:
max_memory_in_bytes = max_memory * 1024*1024*1024
command = f"megahit {read_string} -t {threads} -m {max_memory_in_bytes} -o data/megahit_assembly --tmp-dir {tmp_dir}"
command = f"megahit {read_string} -t {threads} -m {max_memory_in_bytes} -o data/megahit_assembly {tmp_dir_arg}"

with open(log, 'a') as logf:
logf.write(f"Queueing command {command}\n")
Expand All @@ -97,7 +106,7 @@ def assemble_short_reads(
else:
kmers = " ".join(kmer_sizes)
command = f"spades.py --memory {max_memory} --meta -t {threads} " \
f"-o data/short_read_assembly {read_string} -k {kmers} --tmp-dir {tmp_dir}"
f"-o data/short_read_assembly {read_string} -k {kmers} {tmp_dir_arg}"
with open(log, 'a') as logf:
logf.write(f"Queueing command {command}\n")
subprocess.run(command.split(), stdout=logf, stderr=subprocess.STDOUT)
Expand Down
89 changes: 89 additions & 0 deletions aviary/modules/assembly/scripts/spades_assembly.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import subprocess
import os
import sys
from typing import List

def spades_asssembly(
input_fastq: str,
input_long_reads: str,
output_fasta: str,
output_spades_folder: str,
max_memory: int,
threads: int,
kmer_sizes: List[int],
tmp_dir: str,
long_read_type: str,
log: str):

'''
Assemble short reads and long reads (if any) using spades
:param input_fastq: short reads fastq file
:param input_long_reads: long reads fastq file
:param output_fasta: output fasta file
:param output_spades_folder: output spades folder
:param max_memory: maximum memory to use
:param threads: number of threads
:param tmpdir: temporary directory
:param kmer_sizes: list of kmer sizes
:param long_read_type: type of long reads
:param log: log file
:return:
'''
if tmp_dir:
tmp_dir_arg = f"--tmp-dir {tmp_dir}"
else:
try:
tmp_dir = os.environ["TMPDIR"]
tmp_dir_arg = f"--tmp-dir {tmp_dir}"
except KeyError:
tmp_dir_arg = ""

if os.path.exists("data/spades_assembly/tmp"):
with open(log, 'a') as logf:
subprocess.run("rm -rf data/spades_assembly/tmp".split(), stdout=logf, stderr=subprocess.STDOUT)
# remove existing temporary directory
minimumsize=500000
actualsize = int(subprocess.check_output('stat -c%s data/short_reads.filt.fastq.gz', shell=True))
# check if directory exists
if os.path.exists("data/spades_assembly"):
# resume previous assembly
command = f"spades.py --restart-from last --memory {max_memory} -t {threads} " \
f"-o data/spades_assembly -k {kmer_sizes} {tmp_dir_arg}"
# run cmd
with open(log, 'a') as logf:
logf.write(f"Queueing command {command}\n")
subprocess.run(command.split(), stdout=logf, stderr=subprocess.STDOUT)
subprocess.run("cp data/spades_assembly/scaffolds.fasta data/spades_assembly.fasta".split(), stdout=logf, stderr=subprocess.STDOUT)
elif actualsize >= minimumsize:
if long_read_type in ["ont","ont_hq"]:
command = f"spades.py --checkpoints all --memory {max_memory} --meta --nanopore {input_long_reads} --12 {input_fastq} "\
f"-o data/spades_assembly -t {threads} -k {kmer_sizes} {tmp_dir_arg} "
else:
command = f"spades.py --checkpoints all --memory {max_memory} --meta --pacbio {input_long_reads} --12 {input_fastq} "\
f"-o data/spades_assembly -t {threads} -k {kmer_sizes} {tmp_dir_arg} "
# run cmd
with open(log, 'a') as logf:
logf.write(f"Queueing command {command}\n")
subprocess.run(command.split(), stdout=logf, stderr=subprocess.STDOUT)
subprocess.run("cp data/spades_assembly/scaffolds.fasta data/spades_assembly.fasta".split(), stdout=logf, stderr=subprocess.STDOUT)
else:
with open(log, 'a') as logf:
subprocess.run(f"mkdir -p {output.spades_folder} && touch {output.fasta}".split(), stdout=logf, stderr=subprocess.STDOUT)


if __name__ == '__main__':
log = snakemake.log[0]
with open(log, 'w') as logf: pass

spades_asssembly(
snakemake.input.fastq,
snakemake.input.long_reads,
snakemake.output.fasta,
snakemake.output.spades_folder,
snakemake.params.max_memory,
snakemake.threads,
snakemake.params.kmer_sizes,
snakemake.params.tmpdir,
snakemake.params.long_read_type,
log
)
3 changes: 2 additions & 1 deletion aviary/modules/binning/scripts/get_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ def get_coverage(
threads: int,
log: str,
):
os.environ["TMPDIR"] = tmpdir
if tmpdir: os.environ["TMPDIR"] = tmpdir

if long_reads != "none" and not os.path.exists("data/long_cov.tsv"):
if long_read_type in ["ont", "ont_hq"]:
coverm_cmd = f"coverm contig -t {threads} -r {input_fasta} --single {' '.join(long_reads)} -p minimap2-ont -m length trimmed_mean variance --bam-file-cache-directory data/binning_bams/ --discard-unmapped --min-read-percent-identity 0.85 --output-file data/long_cov.tsv".split()
Expand Down
5 changes: 3 additions & 2 deletions aviary/modules/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def __init__(self,


self.conda_prefix = args.conda_prefix
self.tmpdir = os.path.abspath(args.tmpdir)
self.tmpdir = os.path.abspath(args.tmpdir) if args.tmpdir else None
self.resources = args.resources
self.output = os.path.abspath(args.output)
self.threads = args.max_threads
Expand Down Expand Up @@ -446,7 +446,8 @@ def run_workflow(self, cores=16, profile=None, cluster_retries=None,
self._validate_config()

cores = max(int(self.threads), cores)
os.environ["TMPDIR"] = self.tmpdir
if self.tmpdir is not None:
os.environ["TMPDIR"] = self.tmpdir
for workflow in self.workflows:
cmd = (
"snakemake --snakefile {snakefile} --directory {working_dir} "
Expand Down
1 change: 0 additions & 1 deletion aviary/modules/strain_analysis/strain_analysis.smk
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ rule lorikeet:
params:
mag_extension = config['mag_extension'],
parallel_genomes = 8,
tmpdir = config['tmpdir']
resources:
mem_mb=int(config["max_memory"])*512
threads:
Expand Down
2 changes: 1 addition & 1 deletion config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,4 @@ ani: none
precluster_ani: none
precluster_method: none
pggb_params: none
tmpdir: /tmp
tmpdir: none

0 comments on commit 691941f

Please sign in to comment.