From 1316da0474dcfae386af2ad0233063bbb1dc6b4c Mon Sep 17 00:00:00 2001 From: farchaab Date: Fri, 6 Dec 2024 12:55:57 +0100 Subject: [PATCH] remove tax_collapse --- zamp/DBprocess.Snakefile | 5 +-- .../DB_processing/DB_preprocessing.rules | 33 +++++++++---------- zamp/utils.py | 7 ---- 3 files changed, 19 insertions(+), 26 deletions(-) diff --git a/zamp/DBprocess.Snakefile b/zamp/DBprocess.Snakefile index 530f1dd..45941d0 100644 --- a/zamp/DBprocess.Snakefile +++ b/zamp/DBprocess.Snakefile @@ -42,7 +42,6 @@ TAXONOMY = config.args.taxonomy CLASSIFIERS = config.args.classifier RDP_MEM = config.args.rdp_mem PROCESS = config.args.processing -TAX_COLLAPSE = dict(config.args.tax_collapse) ## Cutadapt args ERRORS = config.args.errors @@ -72,7 +71,9 @@ if "--use-singularity" in sys.argv: workflow.deployment_settings.apptainer_args += ( f" -B {workflow.basedir}:{workflow.basedir}" ) - workflow.deployment_settings.apptainer_args += f" -B {os.path.abspath(DBPATH)}:{os.path.abspath(DBPATH)}" + workflow.deployment_settings.apptainer_args += ( + f" -B {os.path.abspath(DBPATH)}:{os.path.abspath(DBPATH)}" + ) #### Load a dictionnary of singularity containers that will be called from each rule singularity_envs = yaml.safe_load( open(os.path.join(workflow.basedir, "envs", "singularity", "sing_envs.yml"), "r") diff --git a/zamp/rules/DB_processing/DB_preprocessing.rules b/zamp/rules/DB_processing/DB_preprocessing.rules index 8bb4d11..1ed186a 100644 --- a/zamp/rules/DB_processing/DB_preprocessing.rules +++ b/zamp/rules/DB_processing/DB_preprocessing.rules @@ -21,15 +21,15 @@ rule Extract_amplicons_cutadapt: threads: 3 shell: """ - cutadapt \ - --cores {threads} \ - --error-rate {params.errors} \ - --times 1 \ - -o {output} \ - -g "{params.adapter}" \ - --discard-untrimmed \ - --minimum-length {params.minlen} \ - --maximum-length {params.maxlen} \ + cutadapt \\ + --cores {threads} \\ + --error-rate {params.errors} \\ + --times 1 \\ + -o {output} \\ + -g "{params.adapter}" \\ + --discard-untrimmed \\ + --minimum-length {params.minlen} \\ + --maximum-length {params.maxlen} \\ {input} > {log} """ @@ -49,10 +49,10 @@ rule Dereplicate_amplicons: threads: 1 shell: """ - vsearch \ - --derep_fulllength {input} \ - --output {output.fa} \ - --uc {output.uc} \ + vsearch \\ + --derep_fulllength {input} \\ + --output {output.fa} \\ + --uc {output.uc} \\ 2> {log} """ @@ -66,14 +66,13 @@ rule Derep_and_merge_taxonomy: tax=os.path.join("{prefix}", "master", "original_tax.txt"), uc=os.path.join("{prefix}", "QIIME", "DB_amp.uc"), output: - formatted_tax=os.path.join("{prefix}", "QIIME", "DB_amp_taxonomy.txt"), + collapsed=os.path.join("{prefix}", "QIIME", "DB_amp_taxonomy.txt"), all=os.path.join("{prefix}", "QIIME", "DB_amp_all_taxonomy.txt"), - problematic=os.path.join("{prefix}", "QIIME", "problematic_taxa.txt"), + ambiguous=os.path.join("{prefix}", "QIIME", "ambiguous_taxa.txt"), log: os.path.join("{prefix}", "logs", "QIIME", "derep_and_merge.log"), params: db_name=DBNAME, - tax_collapse=TAX_COLLAPSE, threads: 1 script: os.path.join("scripts", "tax_formatting.py") @@ -83,7 +82,7 @@ rule Hash_derep_tax: input: os.path.join("{prefix}", "QIIME", "DB_amp_taxonomy.txt"), os.path.join("{prefix}", "QIIME", "DB_amp_all_taxonomy.txt"), - os.path.join("{prefix}", "QIIME", "problematic_taxa.txt"), + os.path.join("{prefix}", "QIIME", "ambiguous_taxa.txt"), os.path.join("{prefix}", "QIIME", "DB_amp.fasta"), output: os.path.join("{prefix}", "QIIME", "DB_formatted.hash"), diff --git a/zamp/utils.py b/zamp/utils.py index 6b236fd..b9ffa69 100644 --- a/zamp/utils.py +++ b/zamp/utils.py @@ -224,13 +224,6 @@ def db_options(func): help="Extract amplicon regions and merge taxonomy", show_default=True, ), - click.option( - "--tax-collapse", - help="Dictionary of number of ranks to print limit when collapsing names ", - default='{"Species": 5, "Genus": 6}', - callback=validate_dict, - show_default=True, - ), click.option( "--fw-primer", type=str,