From be970e813a9022260a4ee9e41d3dd4e83b57a9cc Mon Sep 17 00:00:00 2001 From: farchaab Date: Wed, 28 Aug 2024 10:37:25 +0200 Subject: [PATCH 1/9] install utils and ps with apt --- Dockerfile | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index de9ea0d..99eddae 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,15 @@ LABEL org.opencontainers.image.source=https://github.com/metagenlab/assembly_fin LABEL org.opencontainers.image.description="Snakemake-powered cli to download genomes with NCBI datasets" LABEL org.opencontainers.image.licenses=MIT ENV LANG=C.UTF-8 -ENV SHELL=/bin/bash +ENV SHELL=/bin/bash + +USER root +ENV APT_PKGS="curl rsync p7zip-full procps" +RUN apt-get update \ + && apt-get install -y --no-install-recommends ${APT_PKGS} \ + && apt-get clean \ + && rm -rf /var/lib/apt /var/lib/dpkg /var/lib/cache /var/lib/log +USER $MAMBA_USER COPY --chown=$MAMBA_USER:$MAMBA_USER . /pkg RUN micromamba config set extract_threads 1 && \ From 621e8b4ba0d2a33ef8d952ff49931b9b235cfb6e Mon Sep 17 00:00:00 2001 From: farchaab Date: Wed, 28 Aug 2024 10:37:51 +0200 Subject: [PATCH 2/9] removed curl, rsync and unzip --- env.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/env.yaml b/env.yaml index b632610..b9e0796 100644 --- a/env.yaml +++ b/env.yaml @@ -6,9 +6,6 @@ dependencies: - ncbi-datasets-cli =16.26.2 - taxonkit =0.17.0 - csvtk =0.30.0 - - rsync =3.3.0 - - curl =8.7.1 - - unzip =6.0 - python =3.12 - pip - snakemake-minimal =8.18.1 From 82c790406675c82135c6e53905cdc553d8a104fd Mon Sep 17 00:00:00 2001 From: farchaab Date: Wed, 28 Aug 2024 10:38:40 +0200 Subject: [PATCH 3/9] added taxon column in summary table --- assembly_finder/workflow/rules/download.smk | 1 + assembly_finder/workflow/scripts/select_assemblies.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/assembly_finder/workflow/rules/download.smk b/assembly_finder/workflow/rules/download.smk index f468b38..5cbe326 100644 --- a/assembly_finder/workflow/rules/download.smk +++ b/assembly_finder/workflow/rules/download.smk @@ -185,6 +185,7 @@ rule filter_genome_summaries: params: rank=RANK, nrank=NRANK, + taxon=TAXON, script: os.path.join(dir.scripts, "select_assemblies.py") diff --git a/assembly_finder/workflow/scripts/select_assemblies.py b/assembly_finder/workflow/scripts/select_assemblies.py index 2084a87..a7c7840 100644 --- a/assembly_finder/workflow/scripts/select_assemblies.py +++ b/assembly_finder/workflow/scripts/select_assemblies.py @@ -16,6 +16,7 @@ def read_json(file): # Params rank = snakemake.params.rank nrank = snakemake.params.nrank +taxon = snakemake.params.taxon # format summary column names summary_df.columns = ( @@ -73,6 +74,9 @@ def read_json(file): "species", ] +if taxon: + tax_cols.insert(0, "taxon") + df[tax_cols].to_csv(snakemake.output.tax, sep="\t", index=None) df[summary_df.columns].to_csv(snakemake.output.gen, sep="\t", index=None) df[["accession"]].drop_duplicates().to_csv( From d0fc28b6570d2b87ae456ad0e0fd0414e169e02a Mon Sep 17 00:00:00 2001 From: farchaab Date: Wed, 28 Aug 2024 10:39:45 +0200 Subject: [PATCH 4/9] return query when string and fixed args when accession --- assembly_finder/workflow/rules/functions.smk | 31 ++++++++++++-------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/assembly_finder/workflow/rules/functions.smk b/assembly_finder/workflow/rules/functions.smk index fc44693..a2d3d66 100644 --- a/assembly_finder/workflow/rules/functions.smk +++ b/assembly_finder/workflow/rules/functions.smk @@ -21,6 +21,8 @@ def convert_query(wildcards): ("GCF" not in wildcards.query) and ("GCA" not in wildcards.query) ): return wildcards.query.replace("_", " ") + else: + return wildcards.query def get_limit(wildcards, nbs, dic): @@ -43,19 +45,22 @@ def get_abs_path(indir, accessions): KEY = "" if API_KEY: KEY += f"--api-key {API_KEY} " -ARGS = "" -if ANNOTATED: - ARGS += "--annotated " -if ASM_LVL: - ARGS += f"--assembly-level {ASM_LVL} " -if SOURCE: - ARGS += f"--assembly-source {SOURCE} " -if ATYPICAL: - ARGS += "--exclude-atypical " -if MAG: - ARGS += f"--mag {MAG} " -if REFERENCE: - ARGS += "--reference " + +if not TAXON: + ARGS = "" +else: + if ANNOTATED: + ARGS += "--annotated " + if ASM_LVL: + ARGS += f"--assembly-level {ASM_LVL} " + if SOURCE: + ARGS += f"--assembly-source {SOURCE} " + if ATYPICAL: + ARGS += "--exclude-atypical " + if MAG: + ARGS += f"--mag {MAG} " + if REFERENCE: + ARGS += "--reference " GZIP = "" if COMPRESSED: GZIP = "--gzip" From 2621c5cd2e6f87b3f6ab3782390cec655b2020da Mon Sep 17 00:00:00 2001 From: farchaab Date: Wed, 28 Aug 2024 10:40:14 +0200 Subject: [PATCH 5/9] added tests for string input --- tests/test_assembly_finder.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/tests/test_assembly_finder.py b/tests/test_assembly_finder.py index eb6457c..09a32c0 100644 --- a/tests/test_assembly_finder.py +++ b/tests/test_assembly_finder.py @@ -42,15 +42,31 @@ def test_cli(): exec_command("assembly_finder -v") -def test_taxons(): - """download genomes from taxons""" +def test_taxon_string(): + """download genomes from string input""" + exec_command( + f"assembly_finder --threads {threads} -i bacteria -nb 1 --output {outdir}" + ) + remove_directory(outdir) + + +def test_accession_string(): + """download genomes from string input""" + exec_command( + f"assembly_finder --threads {threads} -i GCF_000418345.1 --accession --output {outdir}" + ) + remove_directory(outdir) + + +def test_taxons_file(): + """download genomes from taxons file""" input = os.path.join(test_data_path, "taxons.tsv") exec_command(f"assembly_finder --threads {threads} -i {input} --output {outdir}") remove_directory(outdir) -def test_accessions(): - """download genomes from accessions""" +def test_accessions_file(): + """download genomes from accessions file""" input = os.path.join(test_data_path, "accessions.txt") exec_command( f"assembly_finder --threads {threads} -i {input} --accession --output {outdir}" From 066cf8a6749be42b40a35b7fabc339a75559cde3 Mon Sep 17 00:00:00 2001 From: farchaab Date: Wed, 28 Aug 2024 10:40:24 +0200 Subject: [PATCH 6/9] bumped version --- assembly_finder/assembly_finder.VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assembly_finder/assembly_finder.VERSION b/assembly_finder/assembly_finder.VERSION index 4d01880..11d9d6c 100644 --- a/assembly_finder/assembly_finder.VERSION +++ b/assembly_finder/assembly_finder.VERSION @@ -1 +1 @@ -0.7.6 \ No newline at end of file +0.7.7 \ No newline at end of file From febca6f55d67b49775300c41130795351b7511df Mon Sep 17 00:00:00 2001 From: farchaab Date: Wed, 28 Aug 2024 11:37:25 +0200 Subject: [PATCH 7/9] fixed args definition --- assembly_finder/workflow/rules/functions.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assembly_finder/workflow/rules/functions.smk b/assembly_finder/workflow/rules/functions.smk index a2d3d66..3735352 100644 --- a/assembly_finder/workflow/rules/functions.smk +++ b/assembly_finder/workflow/rules/functions.smk @@ -45,7 +45,7 @@ def get_abs_path(indir, accessions): KEY = "" if API_KEY: KEY += f"--api-key {API_KEY} " - +ARGS = "" if not TAXON: ARGS = "" else: From 937841b6240b081016c902f10134839eda9193ed Mon Sep 17 00:00:00 2001 From: farchaab Date: Wed, 28 Aug 2024 12:01:21 +0200 Subject: [PATCH 8/9] changed jobs names --- .github/workflows/unit-tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 3de490b..4d71ed4 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -29,7 +29,7 @@ jobs: matrix: python-version: ["3.12"] fail-fast: false - name: Linux Python ${{ matrix.python-version }} + name: Tests Linux steps: - uses: actions/checkout@v4 with: @@ -63,7 +63,7 @@ jobs: python-version: ["3.12"] architecture: ["x64"] fail-fast: false - name: OSX Python ${{ matrix.python-version }} + name: Tests OSX steps: - uses: actions/checkout@v4 with: From 952987f5c28348e97a689bb2998c3302e4039263 Mon Sep 17 00:00:00 2001 From: farchaab Date: Wed, 28 Aug 2024 12:05:38 +0200 Subject: [PATCH 9/9] updated job names --- .github/workflows/unit-tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 4d71ed4..10ca602 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -29,7 +29,7 @@ jobs: matrix: python-version: ["3.12"] fail-fast: false - name: Tests Linux + name: Linux steps: - uses: actions/checkout@v4 with: @@ -63,7 +63,7 @@ jobs: python-version: ["3.12"] architecture: ["x64"] fail-fast: false - name: Tests OSX + name: OSX steps: - uses: actions/checkout@v4 with: