From 3a4d077f4e19bd5eea162aeb6d02179a645764f9 Mon Sep 17 00:00:00 2001 From: lenakinzel Date: Thu, 6 Jan 2022 13:42:06 +0000 Subject: [PATCH 1/8] added use-data-handling flag in config file in order to define whether the reorganization and the archiving within the data-handling --- config/config.yaml | 5 +++ workflow/Snakefile | 48 +++++++++++++++-------------- workflow/schemas/config.schema.yaml | 3 ++ 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index ae6c6d16e..e2241b044 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -12,6 +12,11 @@ human-genome-download-path: - ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/405/GCA_000001405.28_GRCh38.p13/GCA_000001405.28_GRCh38.p13_genomic.fna.gz data-handling: + # flag for using the following data-handling structure + # True: data-handling structure is used as shown down here + # False: only the sample sheet needs to be updated (manually) + # no data archiving is taking place + use-data-handling: True # path of incoming data, which is moved to the # data directory by the preprocessing script incoming: ../incoming/ diff --git a/workflow/Snakefile b/workflow/Snakefile index f9b5bf3c4..4e6a08194 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -47,34 +47,36 @@ include: "rules/variant_filtration.smk" include: "rules/variant_report.smk" include: "rules/generate_output.smk" include: "rules/benchmarking.smk" -include: "rules/preprocessing.smk" +if config["data-handling"]["use-data-handling"]: + include: "rules/preprocessing.smk" include: "rules/long_read.smk" -rule save_latest_run: - input: - expand( - "results/.indicators/{latest_run}.archived", - latest_run=get_latest_run_date(), - ), - output: - expand( - "".join( - ( - config["data-handling"]["archive"], - "{latest_run}/results_{latest_run}.tar.gz", - ) +if config["data-handling"]["use-data-handling"]: + rule save_latest_run: + input: + expand( + "results/.indicators/{latest_run}.archived", + latest_run=get_latest_run_date(), + ), + output: + expand( + "".join( + ( + config["data-handling"]["archive"], + "{latest_run}/results_{latest_run}.tar.gz", + ) + ), + latest_run=get_latest_run_date(), ), + params: latest_run=get_latest_run_date(), - ), - params: - latest_run=get_latest_run_date(), - log: - expand("logs/save-run/{latest_run}.log", latest_run=get_latest_run_date()), - conda: - "envs/unix.yaml" - shell: - "tar -zcvf {output} results/{params.latest_run} 2> {log} 2>&1" + log: + expand("logs/save-run/{latest_run}.log", latest_run=get_latest_run_date()), + conda: + "envs/unix.yaml" + shell: + "tar -zcvf {output} results/{params.latest_run} 2> {log} 2>&1" checkpoint all: diff --git a/workflow/schemas/config.schema.yaml b/workflow/schemas/config.schema.yaml index 36c3fd622..d4d498813 100644 --- a/workflow/schemas/config.schema.yaml +++ b/workflow/schemas/config.schema.yaml @@ -21,6 +21,9 @@ properties: description: download path of human genome reference data-handling: properties: + use-data-handling: + type: boolean + description: flag whether to use data reorganization and archiving or not incoming: type: string description: path of incoming data, which is moved to the data directory by the preprocessing script From 22ec701d0ea68ed0dc9abf29825e6e267cacf2df Mon Sep 17 00:00:00 2001 From: lenakinzel Date: Thu, 6 Jan 2022 17:08:28 +0000 Subject: [PATCH 2/8] changes according to failing formatting test --- workflow/Snakefile | 3 +-- workflow/rules/preprocessing.smk | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index 4e6a08194..74e6225a5 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -47,8 +47,7 @@ include: "rules/variant_filtration.smk" include: "rules/variant_report.smk" include: "rules/generate_output.smk" include: "rules/benchmarking.smk" -if config["data-handling"]["use-data-handling"]: - include: "rules/preprocessing.smk" +include: "rules/preprocessing.smk" include: "rules/long_read.smk" diff --git a/workflow/rules/preprocessing.smk b/workflow/rules/preprocessing.smk index 6a6350441..055172a89 100644 --- a/workflow/rules/preprocessing.smk +++ b/workflow/rules/preprocessing.smk @@ -4,12 +4,13 @@ # except according to those terms. -rule update_sample: - input: - "config/pep/samples.csv", - log: - "logs/sample_update/preprocessing/sample_csv_update.txt", - conda: - "../envs/python.yaml" - script: - "../scripts/update-sample-sheet.py" +if config["data-handling"]["use-data-handling"]: + rule update_sample: + input: + "config/pep/samples.csv", + log: + "logs/sample_update/preprocessing/sample_csv_update.txt", + conda: + "../envs/python.yaml" + script: + "../scripts/update-sample-sheet.py" From 337c2dc269c9b4be20647596169659d2256aa4c3 Mon Sep 17 00:00:00 2001 From: Thomas Battenfeld Date: Tue, 11 Jan 2022 15:06:20 +0100 Subject: [PATCH 3/8] fmt --- workflow/Snakefile | 1 + workflow/rules/preprocessing.smk | 1 + 2 files changed, 2 insertions(+) diff --git a/workflow/Snakefile b/workflow/Snakefile index 74e6225a5..d289c9c75 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -52,6 +52,7 @@ include: "rules/long_read.smk" if config["data-handling"]["use-data-handling"]: + rule save_latest_run: input: expand( diff --git a/workflow/rules/preprocessing.smk b/workflow/rules/preprocessing.smk index 055172a89..0a4c8b940 100644 --- a/workflow/rules/preprocessing.smk +++ b/workflow/rules/preprocessing.smk @@ -5,6 +5,7 @@ if config["data-handling"]["use-data-handling"]: + rule update_sample: input: "config/pep/samples.csv", From 7843899e433889870eca3e34cf538e77c3105f68 Mon Sep 17 00:00:00 2001 From: lenakinzel Date: Tue, 11 Jan 2022 14:33:10 +0000 Subject: [PATCH 4/8] solve key error by reverting changes done before in order to solve formatting error --- workflow/Snakefile | 3 ++- workflow/rules/preprocessing.smk | 20 +++++++++----------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index d289c9c75..547f1d605 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -47,7 +47,8 @@ include: "rules/variant_filtration.smk" include: "rules/variant_report.smk" include: "rules/generate_output.smk" include: "rules/benchmarking.smk" -include: "rules/preprocessing.smk" +if config["data-handling"]["use-data-handling"]: + include: "rules/preprocessing.smk" include: "rules/long_read.smk" diff --git a/workflow/rules/preprocessing.smk b/workflow/rules/preprocessing.smk index 0a4c8b940..6a6350441 100644 --- a/workflow/rules/preprocessing.smk +++ b/workflow/rules/preprocessing.smk @@ -4,14 +4,12 @@ # except according to those terms. -if config["data-handling"]["use-data-handling"]: - - rule update_sample: - input: - "config/pep/samples.csv", - log: - "logs/sample_update/preprocessing/sample_csv_update.txt", - conda: - "../envs/python.yaml" - script: - "../scripts/update-sample-sheet.py" +rule update_sample: + input: + "config/pep/samples.csv", + log: + "logs/sample_update/preprocessing/sample_csv_update.txt", + conda: + "../envs/python.yaml" + script: + "../scripts/update-sample-sheet.py" From aed5b2c0865777276765ec394bef8a8355ea2302 Mon Sep 17 00:00:00 2001 From: lenakinzel Date: Fri, 11 Feb 2022 11:54:36 +0000 Subject: [PATCH 5/8] added second flag in order to switch archiving on/off independently --- config/config.yaml | 7 +++++-- workflow/Snakefile | 7 ++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index e2241b044..83865f23d 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -13,10 +13,13 @@ human-genome-download-path: data-handling: # flag for using the following data-handling structure - # True: data-handling structure is used as shown down here + # True: data-handling structure is used as shown below # False: only the sample sheet needs to be updated (manually) - # no data archiving is taking place use-data-handling: True + # flag for archiving data + # True: data is archived in path defined below + # False: data is not archived + archive-data: True # path of incoming data, which is moved to the # data directory by the preprocessing script incoming: ../incoming/ diff --git a/workflow/Snakefile b/workflow/Snakefile index 547f1d605..de57cf743 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -47,12 +47,17 @@ include: "rules/variant_filtration.smk" include: "rules/variant_report.smk" include: "rules/generate_output.smk" include: "rules/benchmarking.smk" + + if config["data-handling"]["use-data-handling"]: + include: "rules/preprocessing.smk" + + include: "rules/long_read.smk" -if config["data-handling"]["use-data-handling"]: +if config["data-handling"]["archive-data"]: rule save_latest_run: input: From ea334df90f27d77010f1ba506802df82f88ba23d Mon Sep 17 00:00:00 2001 From: lenakinzel Date: Mon, 14 Feb 2022 10:07:39 +0000 Subject: [PATCH 6/8] added data-handling flags to .test/config --- .tests/config/config.yaml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/.tests/config/config.yaml b/.tests/config/config.yaml index 24aaa2d61..2781db98a 100644 --- a/.tests/config/config.yaml +++ b/.tests/config/config.yaml @@ -24,11 +24,21 @@ human-genome-download-path: - ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/405/GCA_000001405.28_GRCh38.p13/GCA_000001405.28_GRCh38.p13_genomic.fna.gz data-handling: - # path of incoming data + # flag for using the following data-handling structure + # True: data-handling structure is used as shown below + # False: only the sample sheet needs to be updated (manually) + use-data-handling: True + # flag for archiving data + # True: data is archived in path defined below + # False: data is not archived + archive-data: True + # path of incoming data, which is moved to the + # data directory by the preprocessing script incoming: ../incoming/ - # path to store data in the workflow + # path to store data within the workflow data: data/ - # path to archive data from incoming to + # path to archive data from incoming and + # the results from the latest run to archive: ../archive/ quality-criteria: From 1bf3bfa8b3ea3e5eda4c50d880cbbde6aa7bf266 Mon Sep 17 00:00:00 2001 From: lenakinzel Date: Mon, 14 Feb 2022 11:56:58 +0000 Subject: [PATCH 7/8] changed download link for fastq.gz-file in main.yml --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6f578f875..d30d03d1b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -86,7 +86,7 @@ jobs: curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.1.fastq.gz curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.2.fastq.gz curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/ont_reads.fastq.gz > .tests/data/ont_reads.fastq.gz - curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR574/003/ERR5745913/ERR5745913.fastq.gz > .tests/data/ion_reads.fastq.gz + curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/ERR5745913.fastq.gz > .tests/data/ion_reads.fastq.gz echo sample_name,fq1,fq2,date,is_amplicon_data,technology > .tests/config/pep/samples.csv echo illumina-test,data/B117.1.fastq.gz,data/B117.2.fastq.gz,2022-01-01,$AMPLICON,illumina >> .tests/config/pep/samples.csv echo ont-test,data/ont_reads.fastq.gz,,2022-01-01,$AMPLICON,ont >> .tests/config/pep/samples.csv From b5511b1d832516c42de89ae37270db87ed9145fb Mon Sep 17 00:00:00 2001 From: Thomas Battenfeld Date: Thu, 17 Feb 2022 08:48:56 +0100 Subject: [PATCH 8/8] fix(ci): changed branch to pull --- .github/workflows/release-please.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml index 3d08c975d..5061eb6b5 100644 --- a/.github/workflows/release-please.yml +++ b/.github/workflows/release-please.yml @@ -26,6 +26,7 @@ jobs: export PRNUMBER=$( echo '${{ steps.release.outputs.pr }}' | jq -r '.number' ) if [ "$PRNUMBER" -eq "$PRNUMBER" ] 2>/dev/null; then echo "::set-output name=pr::$(echo true)" + echo "::set-output name=number::$(echo $PRNUMBER)" else [ -n "$PRNUMBER" ] && echo "::set-output name=pr::$(echo false)" && exit 0 echo "::set-output name=pr::$(echo false)" @@ -82,7 +83,7 @@ jobs: git config user.name github-actions[bot] git config user.email github-actions[bot]@users.noreply.github.com git fetch origin - git pull origin pull/${{ steps.release.outputs.pr }}/head + git pull origin pull/${{ steps.pr_created.outputs.number }}/head - name: Export version as environment variable if: ${{ steps.pr_created.outputs.pr == 'true' || steps.release.outputs.release_created != 'true' }}