diff --git a/mess/workflow/rules/processing/reads.smk b/mess/workflow/rules/processing/reads.smk index 9974c74..68c4d98 100644 --- a/mess/workflow/rules/processing/reads.smk +++ b/mess/workflow/rules/processing/reads.smk @@ -239,9 +239,9 @@ if BAM: rule download_taxdump: output: - temp(os.path.join(TAXONKIT, "taxdump.tar.gz")), + os.path.join(TAXONKIT, "taxdump.tar.gz"), log: - os.path.join(dir.out.logs, "curl", "taxdump.log"), + os.path.join(dir.out.logs, "curl.log"), resources: mem_mb=config.resources.sml.mem, mem=str(config.resources.sml.mem) + "MB", @@ -265,13 +265,11 @@ if BAM: params: dir=TAXONKIT, log: - os.path.join(dir.out.logs, "tar", "taxdump.log"), + os.path.join(dir.out.logs, "taxdump.log"), resources: mem_mb=config.resources.sml.mem, mem=str(config.resources.sml.mem) + "MB", time=config.resources.sml.time, - log: - os.path.join(dir.out.logs, "wget", "taxdump.log"), conda: os.path.join(dir.env, "utils.yml") shell: @@ -482,16 +480,12 @@ if not SKIP_SHUFFLE: os.path.join(dir.out.fastq, "{sample}_R{p}.fq.gz") if PAIRED else os.path.join(dir.out.fastq, "{sample}.fq.gz"), - conda: - os.path.join(dir.env, "seqkit.yml") benchmark: os.path.join( dir.out.bench, "seqkit", "anonymize", "{sample}_R{p}.txt" ) if PAIRED else os.path.join( dir.out.bench, "seqkit", "anonymize", "{sample}.txt" ) - params: - seqkit_replace, log: os.path.join(dir.out.logs, "seqkit", "replace", "{sample}_R{p}.log") if PAIRED @@ -499,13 +493,41 @@ if not SKIP_SHUFFLE: os.path.join(dir.out.logs, "anonymized_reads", "{sample}_R{p}.tsv") if PAIRED else os.path.join(dir.out.logs, "anonymized_reads", "{sample}.tsv"), + params: + seqkit_replace, resources: mem_mb=config.resources.sml.mem, mem=str(config.resources.sml.mem) + "MB", time=config.resources.norm.time, + conda: + os.path.join(dir.env, "seqkit.yml") shell: """ seqkit seq {input} | seqkit replace \\ -p .+ -r "{params}" -o {output} 2> {log[0]} paste -d '\t' <(seqkit seq -n {output}) <(seqkit seq -n {input}) > {log[1]} """ + + +rule cleanup_files: + input: + list_reads, + os.path.join(dir.out.base, "replicates.tsv"), + os.path.join(dir.out.base, "samples.tsv"), + os.path.join(dir.out.processing, "split"), + output: + temp(os.path.join(dir.out.base, "cleanup.done")), + resources: + mem_mb=config.resources.sml.mem, + mem=str(config.resources.sml.mem) + "MB", + time=config.resources.norm.time, + params: + proc=dir.out.processing, + base=dir.out.base, + conda: + os.path.join(dir.env, "utils.yml") + shell: + """ + rm -rf {params[0]} + touch {output} + """