diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6f578f875..d30d03d1b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -86,7 +86,7 @@ jobs: curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.1.fastq.gz curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.2.fastq.gz curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/ont_reads.fastq.gz > .tests/data/ont_reads.fastq.gz - curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR574/003/ERR5745913/ERR5745913.fastq.gz > .tests/data/ion_reads.fastq.gz + curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/ERR5745913.fastq.gz > .tests/data/ion_reads.fastq.gz echo sample_name,fq1,fq2,date,is_amplicon_data,technology > .tests/config/pep/samples.csv echo illumina-test,data/B117.1.fastq.gz,data/B117.2.fastq.gz,2022-01-01,$AMPLICON,illumina >> .tests/config/pep/samples.csv echo ont-test,data/ont_reads.fastq.gz,,2022-01-01,$AMPLICON,ont >> .tests/config/pep/samples.csv diff --git a/.tests/config/config.yaml b/.tests/config/config.yaml index f7acb92b9..a08709b7c 100644 --- a/.tests/config/config.yaml +++ b/.tests/config/config.yaml @@ -24,11 +24,21 @@ human-genome-download-path: - ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/405/GCA_000001405.28_GRCh38.p13/GCA_000001405.28_GRCh38.p13_genomic.fna.gz data-handling: - # path of incoming data + # flag for using the following data-handling structure + # True: data-handling structure is used as shown below + # False: only the sample sheet needs to be updated (manually) + use-data-handling: True + # flag for archiving data + # True: data is archived in path defined below + # False: data is not archived + archive-data: True + # path of incoming data, which is moved to the + # data directory by the preprocessing script incoming: ../incoming/ - # path to store data in the workflow + # path to store data within the workflow data: data/ - # path to archive data from incoming to + # path to archive data from incoming and + # the results from the latest run to archive: ../archive/ quality-criteria: diff --git a/config/config.yaml b/config/config.yaml index 120a27e48..78406e544 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -12,6 +12,14 @@ human-genome-download-path: - ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/405/GCA_000001405.28_GRCh38.p13/GCA_000001405.28_GRCh38.p13_genomic.fna.gz data-handling: + # flag for using the following data-handling structure + # True: data-handling structure is used as shown below + # False: only the sample sheet needs to be updated (manually) + use-data-handling: True + # flag for archiving data + # True: data is archived in path defined below + # False: data is not archived + archive-data: True # path of incoming data, which is moved to the # data directory by the preprocessing script incoming: ../incoming/ diff --git a/workflow/Snakefile b/workflow/Snakefile index deb6f638f..fda56fc2d 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -47,35 +47,43 @@ include: "rules/variant_filtration.smk" include: "rules/variant_report.smk" include: "rules/generate_output.smk" include: "rules/benchmarking.smk" -include: "rules/preprocessing.smk" + + +if config["data-handling"]["use-data-handling"]: + + include: "rules/preprocessing.smk" + + include: "rules/long_read.smk" include: "rules/lineage_variant_calling.smk" -rule save_latest_run: - input: - expand( - "results/.indicators/{latest_run}.archived", - latest_run=get_latest_run_date(), - ), - output: - expand( - "".join( - ( - config["data-handling"]["archive"], - "{latest_run}/results_{latest_run}.tar.gz", - ) +if config["data-handling"]["archive-data"]: + + rule save_latest_run: + input: + expand( + "results/.indicators/{latest_run}.archived", + latest_run=get_latest_run_date(), ), + output: + expand( + "".join( + ( + config["data-handling"]["archive"], + "{latest_run}/results_{latest_run}.tar.gz", + ) + ), + latest_run=get_latest_run_date(), + ), + params: latest_run=get_latest_run_date(), - ), - params: - latest_run=get_latest_run_date(), - log: - expand("logs/save-run/{latest_run}.log", latest_run=get_latest_run_date()), - conda: - "envs/unix.yaml" - shell: - "tar -zcvf {output} results/{params.latest_run} 2> {log} 2>&1" + log: + expand("logs/save-run/{latest_run}.log", latest_run=get_latest_run_date()), + conda: + "envs/unix.yaml" + shell: + "tar -zcvf {output} results/{params.latest_run} 2> {log} 2>&1" checkpoint all: diff --git a/workflow/schemas/config.schema.yaml b/workflow/schemas/config.schema.yaml index 36c3fd622..d4d498813 100644 --- a/workflow/schemas/config.schema.yaml +++ b/workflow/schemas/config.schema.yaml @@ -21,6 +21,9 @@ properties: description: download path of human genome reference data-handling: properties: + use-data-handling: + type: boolean + description: flag whether to use data reorganization and archiving or not incoming: type: string description: path of incoming data, which is moved to the data directory by the preprocessing script