IKIM-Essen · thomasbtf · Feb 16, 2022 · Jan 6, 2022 · Jan 6, 2022 · Jan 11, 2022
diff --git a/config/config.yaml b/config/config.yaml
@@ -12,6 +12,11 @@ human-genome-download-path:
   - ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/405/GCA_000001405.28_GRCh38.p13/GCA_000001405.28_GRCh38.p13_genomic.fna.gz
 
 data-handling:
+  # flag for using the following data-handling structure
+  # True: data-handling structure is used as shown down here
+  # False: only the sample sheet needs to be updated (manually)
+  #        no data archiving is taking place
+  use-data-handling: True
   # path of incoming data, which is moved to the
   # data directory by the preprocessing script
   incoming: ../incoming/

diff --git a/workflow/Snakefile b/workflow/Snakefile
@@ -47,34 +47,37 @@ include: "rules/variant_filtration.smk"
 include: "rules/variant_report.smk"
 include: "rules/generate_output.smk"
 include: "rules/benchmarking.smk"
-include: "rules/preprocessing.smk"
+if config["data-handling"]["use-data-handling"]:
+    include: "rules/preprocessing.smk"
 include: "rules/long_read.smk"
 
 
-rule save_latest_run:
-    input:
-        expand(
-            "results/.indicators/{latest_run}.archived",
-            latest_run=get_latest_run_date(),
-        ),
-    output:
-        expand(
-            "".join(
-                (
-                    config["data-handling"]["archive"],
-                    "{latest_run}/results_{latest_run}.tar.gz",
-                )
+if config["data-handling"]["use-data-handling"]:
+
+    rule save_latest_run:
+        input:
+            expand(
+                "results/.indicators/{latest_run}.archived",
+                latest_run=get_latest_run_date(),
             ),
+        output:
+            expand(
+                "".join(
+                    (
+                        config["data-handling"]["archive"],
+                        "{latest_run}/results_{latest_run}.tar.gz",
+                    )
+                ),
+                latest_run=get_latest_run_date(),
+            ),
+        params:
             latest_run=get_latest_run_date(),
-        ),
-    params:
-        latest_run=get_latest_run_date(),
-    log:
-        expand("logs/save-run/{latest_run}.log", latest_run=get_latest_run_date()),
-    conda:
-        "envs/unix.yaml"
-    shell:
-        "tar -zcvf {output} results/{params.latest_run} 2> {log} 2>&1"
+        log:
+            expand("logs/save-run/{latest_run}.log", latest_run=get_latest_run_date()),
+        conda:
+            "envs/unix.yaml"
+        shell:
+            "tar -zcvf {output} results/{params.latest_run} 2> {log} 2>&1"
 
 
 checkpoint all:

diff --git a/workflow/schemas/config.schema.yaml b/workflow/schemas/config.schema.yaml
@@ -21,6 +21,9 @@ properties:
     description: download path of human genome reference
   data-handling:
     properties:
+      use-data-handling:
+        type: boolean
+        description: flag whether to use data reorganization and archiving or not
       incoming:
         type: string
         description: path of incoming data, which is moved to the data directory by the preprocessing script