Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: make incoming and archive structure data-handling optional #443

Merged
merged 9 commits into from
Feb 16, 2022
5 changes: 5 additions & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ human-genome-download-path:
- ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/405/GCA_000001405.28_GRCh38.p13/GCA_000001405.28_GRCh38.p13_genomic.fna.gz

data-handling:
# flag for using the following data-handling structure
# True: data-handling structure is used as shown down here
# False: only the sample sheet needs to be updated (manually)
# no data archiving is taking place
use-data-handling: True
# path of incoming data, which is moved to the
# data directory by the preprocessing script
incoming: ../incoming/
Expand Down
49 changes: 26 additions & 23 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,34 +47,37 @@ include: "rules/variant_filtration.smk"
include: "rules/variant_report.smk"
include: "rules/generate_output.smk"
include: "rules/benchmarking.smk"
include: "rules/preprocessing.smk"
if config["data-handling"]["use-data-handling"]:
include: "rules/preprocessing.smk"
include: "rules/long_read.smk"


rule save_latest_run:
input:
expand(
"results/.indicators/{latest_run}.archived",
latest_run=get_latest_run_date(),
),
output:
expand(
"".join(
(
config["data-handling"]["archive"],
"{latest_run}/results_{latest_run}.tar.gz",
)
if config["data-handling"]["use-data-handling"]:

rule save_latest_run:
input:
expand(
"results/.indicators/{latest_run}.archived",
latest_run=get_latest_run_date(),
),
output:
expand(
"".join(
(
config["data-handling"]["archive"],
"{latest_run}/results_{latest_run}.tar.gz",
)
),
latest_run=get_latest_run_date(),
),
params:
latest_run=get_latest_run_date(),
),
params:
latest_run=get_latest_run_date(),
log:
expand("logs/save-run/{latest_run}.log", latest_run=get_latest_run_date()),
conda:
"envs/unix.yaml"
shell:
"tar -zcvf {output} results/{params.latest_run} 2> {log} 2>&1"
log:
expand("logs/save-run/{latest_run}.log", latest_run=get_latest_run_date()),
conda:
"envs/unix.yaml"
shell:
"tar -zcvf {output} results/{params.latest_run} 2> {log} 2>&1"


checkpoint all:
Expand Down
3 changes: 3 additions & 0 deletions workflow/schemas/config.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ properties:
description: download path of human genome reference
data-handling:
properties:
use-data-handling:
type: boolean
description: flag whether to use data reorganization and archiving or not
incoming:
type: string
description: path of incoming data, which is moved to the data directory by the preprocessing script
Expand Down