From 56276321176f7a6e14875600e98503a4761b20ba Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Mon, 22 Jul 2024 07:19:13 +0200 Subject: [PATCH] feat: write manifest files to output (#92) (#99) * feat: write manifest files to output (#92) * wi * wip --- environment.yml | 1 + rules/output/annonars/alphamissense.smk | 12 ++++++ rules/output/annonars/cadd.smk | 10 +++++ rules/output/annonars/cons.smk | 10 +++++ rules/output/annonars/dbnsfp.smk | 12 ++++++ rules/output/annonars/dbscsnv.smk | 12 ++++++ rules/output/annonars/dbsnp.smk | 10 +++++ rules/output/annonars/functional.smk | 12 ++++++ rules/output/annonars/genes.smk | 13 +++++++ rules/output/annonars/gnomad_exomes.smk | 12 ++++++ rules/output/annonars/gnomad_genomes.smk | 12 ++++++ rules/output/annonars/gnomad_mtdna.smk | 12 ++++++ rules/output/annonars/gnomad_sv.smk | 48 ++++++++++++++++++++++++ rules/output/annonars/helix.smk | 12 ++++++ rules/output/annonars/regions.smk | 10 +++++ rules/output/mehari/freqs.smk | 13 +++++++ rules/reduced/annonars.smk | 10 +++++ rules/reduced/hpo.smk | 20 ++++++++++ rules/reduced/mehari.smk | 10 +++++ 19 files changed, 251 insertions(+) diff --git a/environment.yml b/environment.yml index 4b735ce..44d6a60 100644 --- a/environment.yml +++ b/environment.yml @@ -10,6 +10,7 @@ dependencies: - attrs - cattrs - click + - hashdeep - loguru - numpy - pydantic diff --git a/rules/output/annonars/alphamissense.smk b/rules/output/annonars/alphamissense.smk index 4217671..9b3fc10 100644 --- a/rules/output/annonars/alphamissense.smk +++ b/rules/output/annonars/alphamissense.smk @@ -20,6 +20,9 @@ rule output_annonars_alphamissense: # -- build AlphaMissense RocksDB with annon spec_yaml=( "output/full/annonars/alphamissense-{genome_release}-{v_alphamissense}+{v_annonars}/spec.yaml" ), + manifest=( + "output/full/annonars/alphamissense-{genome_release}-{v_alphamissense}+{v_annonars}/MANIFEST.txt" + ), threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96")) resources: runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"), @@ -58,4 +61,13 @@ rule output_annonars_alphamissense: # -- build AlphaMissense RocksDB with annon --value v_annonars={wildcards.v_annonars} \ --value v_downloader={PV.downloader} \ > {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """ diff --git a/rules/output/annonars/cadd.smk b/rules/output/annonars/cadd.smk index 62c16be..f10e9bc 100644 --- a/rules/output/annonars/cadd.smk +++ b/rules/output/annonars/cadd.smk @@ -53,6 +53,7 @@ rule output_annonars_cadd: # -- build CADD RocksDB with annonars "output/full/annonars/cadd-{genome_release}-{v_cadd}+{v_annonars}/rocksdb/IDENTITY" ), spec_yaml=("output/full/annonars/cadd-{genome_release}-{v_cadd}+{v_annonars}/spec.yaml"), + manifest=("output/full/annonars/cadd-{genome_release}-{v_cadd}+{v_annonars}/MANIFEST.txt"), threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96")) resources: runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"), @@ -93,4 +94,13 @@ rule output_annonars_cadd: # -- build CADD RocksDB with annonars --value v_annonars={wildcards.v_annonars} \ --value v_downloader={PV.downloader} \ > {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """ diff --git a/rules/output/annonars/cons.smk b/rules/output/annonars/cons.smk index 3ea385d..788fe6f 100644 --- a/rules/output/annonars/cons.smk +++ b/rules/output/annonars/cons.smk @@ -11,6 +11,7 @@ rule output_annonars_cons: # -- build UCSC conservation track RocksDB with anno "output/full/annonars/cons-{genome_release}-{v_cons}+{v_annonars}/rocksdb/IDENTITY" ), spec_yaml=("output/full/annonars/cons-{genome_release}-{v_cons}+{v_annonars}/spec.yaml"), + manifest=("output/full/annonars/cons-{genome_release}-{v_cons}+{v_annonars}/MANIFEST.txt"), threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96")) resources: runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"), @@ -37,4 +38,13 @@ rule output_annonars_cons: # -- build UCSC conservation track RocksDB with anno --value v_annonars={wildcards.v_annonars} \ --value v_downloader={PV.downloader} \ > {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """ diff --git a/rules/output/annonars/dbnsfp.smk b/rules/output/annonars/dbnsfp.smk index f34ad60..1873c76 100644 --- a/rules/output/annonars/dbnsfp.smk +++ b/rules/output/annonars/dbnsfp.smk @@ -16,6 +16,9 @@ rule output_annonars_dbnsfp: # -- build dbNSFP RocksDB with annonars "output/full/annonars/dbnsfp-{genome_release}-{v_dbnsfp}+{v_annonars}/rocksdb/IDENTITY" ), spec_yaml=("output/full/annonars/dbnsfp-{genome_release}-{v_dbnsfp}+{v_annonars}/spec.yaml"), + manifest=( + "output/full/annonars/dbnsfp-{genome_release}-{v_dbnsfp}+{v_annonars}/MANIFEST.txt" + ), threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96")) resources: runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"), @@ -59,4 +62,13 @@ rule output_annonars_dbnsfp: # -- build dbNSFP RocksDB with annonars --value v_annonars={wildcards.v_annonars} \ --value v_downloader={PV.downloader} \ > {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """ diff --git a/rules/output/annonars/dbscsnv.smk b/rules/output/annonars/dbscsnv.smk index 04c1496..9490149 100644 --- a/rules/output/annonars/dbscsnv.smk +++ b/rules/output/annonars/dbscsnv.smk @@ -18,6 +18,9 @@ rule output_annonars_dbscsnv: # -- build dbscSNV RocksDB with annonars spec_yaml=( "output/full/annonars/dbscsnv-{genome_release}-{v_dbscsnv}+{v_annonars}/spec.yaml" ), + manifest=( + "output/full/annonars/dbscsnv-{genome_release}-{v_dbscsnv}+{v_annonars}/MANIFEST.txt" + ), threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96")) resources: runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"), @@ -61,4 +64,13 @@ rule output_annonars_dbscsnv: # -- build dbscSNV RocksDB with annonars --value v_annonars={wildcards.v_annonars} \ --value v_downloader={PV.downloader} \ > {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """ diff --git a/rules/output/annonars/dbsnp.smk b/rules/output/annonars/dbsnp.smk index 7724697..5226254 100644 --- a/rules/output/annonars/dbsnp.smk +++ b/rules/output/annonars/dbsnp.smk @@ -11,6 +11,7 @@ rule output_annonars_dbsnp: # -- build dbSNP RocksDB with annonars "output/full/annonars/dbsnp-{genome_release}-{v_dbsnp}+{v_annonars}/rocksdb/IDENTITY" ), spec_yaml=("output/full/annonars/dbsnp-{genome_release}-{v_dbsnp}+{v_annonars}/spec.yaml"), + manifest=("output/full/annonars/dbsnp-{genome_release}-{v_dbsnp}+{v_annonars}/MANIFEST.txt"), threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96")) resources: runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"), @@ -37,4 +38,13 @@ rule output_annonars_dbsnp: # -- build dbSNP RocksDB with annonars --value v_annonars={wildcards.v_annonars} \ --value v_downloader={PV.downloader} \ > {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """ diff --git a/rules/output/annonars/functional.smk b/rules/output/annonars/functional.smk index c4a6d13..325d2c0 100644 --- a/rules/output/annonars/functional.smk +++ b/rules/output/annonars/functional.smk @@ -39,6 +39,9 @@ rule output_annonars_functional: # -- build annonars functional RocksDB file spec_yaml=( "output/full/annonars/functional-{genome_release}-{v_refseq}+{v_annonars}/spec.yaml" ), + manifest=( + "output/full/annonars/functional-{genome_release}-{v_refseq}+{v_annonars}/MANIFEST.txt" + ), wildcard_constraints: v_refseq=RE_VERSION, v_annonars=RE_VERSION, @@ -64,4 +67,13 @@ rule output_annonars_functional: # -- build annonars functional RocksDB file --value v_annonars={wildcards.v_annonars} \ --value v_downloader={PV.downloader} \ > {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """ diff --git a/rules/output/annonars/genes.smk b/rules/output/annonars/genes.smk index e72b8e7..d461f6f 100644 --- a/rules/output/annonars/genes.smk +++ b/rules/output/annonars/genes.smk @@ -28,6 +28,10 @@ rule output_annonars_genes: # -- build annonars genes RocksDB file "output/full/annonars/genes-{v_acmg_sf}+{v_gnomad_constraints}+{v_dbnsfp}+{v_hpo}+{date}+{v_annonars}/" "spec.yaml" ), + manifest=( + "output/full/annonars/genes-{v_acmg_sf}+{v_gnomad_constraints}+{v_dbnsfp}+{v_hpo}+{date}+{v_annonars}/" + "MANIFEST.txt" + ), wildcard_constraints: v_acmg_sf=RE_VERSION, v_gnomad_constraints=RE_VERSION, @@ -73,4 +77,13 @@ rule output_annonars_genes: # -- build annonars genes RocksDB file --value v_annonars={wildcards.v_annonars} \ --value v_downloader={PV.downloader} \ > {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """ diff --git a/rules/output/annonars/gnomad_exomes.smk b/rules/output/annonars/gnomad_exomes.smk index 0376970..a4e1291 100644 --- a/rules/output/annonars/gnomad_exomes.smk +++ b/rules/output/annonars/gnomad_exomes.smk @@ -13,6 +13,9 @@ rule output_annonars_gnomad_exomes: # -- build gnomAD-exomes RocksDB with annon spec_yaml=( "output/full/annonars/gnomad-exomes-{genome_release}-{v_gnomad}+{v_annonars}/spec.yaml" ), + manifest=( + "output/full/annonars/gnomad-exomes-{genome_release}-{v_gnomad}+{v_annonars}/MANIFEST.txt" + ), threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96")) resources: runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"), @@ -55,4 +58,13 @@ rule output_annonars_gnomad_exomes: # -- build gnomAD-exomes RocksDB with annon --value v_annonars={wildcards.v_annonars} \ --value v_downloader={PV.downloader} \ > {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """ diff --git a/rules/output/annonars/gnomad_genomes.smk b/rules/output/annonars/gnomad_genomes.smk index 673f574..07c4825 100644 --- a/rules/output/annonars/gnomad_genomes.smk +++ b/rules/output/annonars/gnomad_genomes.smk @@ -13,6 +13,9 @@ rule output_annonars_gnomad_genomes: # -- build gnomAD-genomes RocksDB with ann spec_yaml=( "output/full/annonars/gnomad-genomes-{genome_release}-{v_gnomad}+{v_annonars}/spec.yaml" ), + manifest=( + "output/full/annonars/gnomad-genomes-{genome_release}-{v_gnomad}+{v_annonars}/MANIFEST.txt" + ), threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96")) resources: runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"), @@ -55,4 +58,13 @@ rule output_annonars_gnomad_genomes: # -- build gnomAD-genomes RocksDB with ann --value v_annonars={wildcards.v_annonars} \ --value v_downloader={PV.downloader} \ > {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """ diff --git a/rules/output/annonars/gnomad_mtdna.smk b/rules/output/annonars/gnomad_mtdna.smk index bfbb712..befa299 100644 --- a/rules/output/annonars/gnomad_mtdna.smk +++ b/rules/output/annonars/gnomad_mtdna.smk @@ -13,6 +13,9 @@ rule output_annonars_gnomad_mtdna: # -- build gnomAD-mtDNA RocksDB with annonar spec_yaml=( "output/full/annonars/gnomad-mtdna-{genome_release}-{v_gnomad}+{v_annonars}/spec.yaml" ), + manifest=( + "output/full/annonars/gnomad-mtdna-{genome_release}-{v_gnomad}+{v_annonars}/MANIFEST.txt" + ), threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96")) resources: runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"), @@ -40,4 +43,13 @@ rule output_annonars_gnomad_mtdna: # -- build gnomAD-mtDNA RocksDB with annonar --value v_annonars={wildcards.v_annonars} \ --value v_downloader={PV.downloader} \ > {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """ diff --git a/rules/output/annonars/gnomad_sv.smk b/rules/output/annonars/gnomad_sv.smk index 7f52a9f..b5f1473 100644 --- a/rules/output/annonars/gnomad_sv.smk +++ b/rules/output/annonars/gnomad_sv.smk @@ -12,6 +12,9 @@ rule output_annonars_gnomad_sv_grch37_exac: # -- build gnomAD-SV RocksDB with a "output/full/annonars/gnomad-sv-exomes-grch37-{v_gnomad}+{v_annonars}/rocksdb/IDENTITY", ), spec_yaml=("output/full/annonars/gnomad-sv-exomes-grch37-{v_gnomad}+{v_annonars}/spec.yaml"), + manifest=( + "output/full/annonars/gnomad-sv-exomes-grch37-{v_gnomad}+{v_annonars}/MANIFEST.txt" + ), threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96")) resources: runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"), @@ -39,6 +42,15 @@ rule output_annonars_gnomad_sv_grch37_exac: # -- build gnomAD-SV RocksDB with a --value v_annonars={wildcards.v_annonars} \ --value v_downloader={PV.downloader} \ > {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """ @@ -55,6 +67,9 @@ rule output_annonars_gnomad_sv_grch37_gnomad_sv2: # -- build gnomAD-SV RocksDB spec_yaml=( "output/full/annonars/gnomad-sv-genomes-grch37-{v_gnomad}+{v_annonars}/spec.yaml" ), + manifest=( + "output/full/annonars/gnomad-sv-genomes-grch37-{v_gnomad}+{v_annonars}/MANIFEST.txt" + ), threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96")) resources: runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"), @@ -82,6 +97,15 @@ rule output_annonars_gnomad_sv_grch37_gnomad_sv2: # -- build gnomAD-SV RocksDB --value v_annonars={wildcards.v_annonars} \ --value v_downloader={PV.downloader} \ > {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """ @@ -96,6 +120,9 @@ rule output_annonars_gnomad_sv_grch38_gnomad_cnv4: # -- build gnomAD-SV RocksDB "output/full/annonars/gnomad-sv-exomes-grch38-{v_gnomad}+{v_annonars}/rocksdb/IDENTITY", ), spec_yaml=("output/full/annonars/gnomad-sv-exomes-grch38-{v_gnomad}+{v_annonars}/spec.yaml"), + manifest=( + "output/full/annonars/gnomad-sv-exomes-grch38-{v_gnomad}+{v_annonars}/MANIFEST.txt" + ), threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96")) resources: runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"), @@ -123,6 +150,15 @@ rule output_annonars_gnomad_sv_grch38_gnomad_cnv4: # -- build gnomAD-SV RocksDB --value v_annonars={wildcards.v_annonars} \ --value v_downloader={PV.downloader} \ > {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """ @@ -136,6 +172,9 @@ rule output_annonars_gnomad_sv_grch38_gnomad_sv4: # -- build gnomAD-SV RocksDB spec_yaml=( "output/full/annonars/gnomad-sv-genomes-grch38-{v_gnomad}+{v_annonars}/spec.yaml" ), + manifest=( + "output/full/annonars/gnomad-sv-genomes-grch38-{v_gnomad}+{v_annonars}/MANIFEST.txt" + ), threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96")) resources: runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"), @@ -164,4 +203,13 @@ rule output_annonars_gnomad_sv_grch38_gnomad_sv4: # -- build gnomAD-SV RocksDB --value v_annonars={wildcards.v_annonars} \ --value v_downloader={PV.downloader} \ > {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """ diff --git a/rules/output/annonars/helix.smk b/rules/output/annonars/helix.smk index 378ef69..b2db8c7 100644 --- a/rules/output/annonars/helix.smk +++ b/rules/output/annonars/helix.smk @@ -13,6 +13,9 @@ rule output_annonars_helixmtdb: # -- build HelixMtDb RocksDB with annonars spec_yaml=( "output/full/annonars/helixmtdb-{genome_release}-{v_helixmtdb}+{v_annonars}/spec.yaml", ), + manifest=( + "output/full/annonars/helixmtdb-{genome_release}-{v_helixmtdb}+{v_annonars}/MANIFEST.txt", + ), threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96")) resources: runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"), @@ -39,4 +42,13 @@ rule output_annonars_helixmtdb: # -- build HelixMtDb RocksDB with annonars --value v_annonars={wildcards.v_annonars} \ --value v_downloader={PV.downloader} \ > {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """ diff --git a/rules/output/annonars/regions.smk b/rules/output/annonars/regions.smk index 1084571..fc17892 100644 --- a/rules/output/annonars/regions.smk +++ b/rules/output/annonars/regions.smk @@ -25,6 +25,7 @@ rule output_annonars_regions: # -- build annonars regions RocksDB file "output/full/annonars/regions-{genome_release}-{date}+{v_annonars}/" "rocksdb/IDENTITY" ), spec_yaml=("output/full/annonars/regions-{genome_release}-{date}+{v_annonars}/spec.yaml"), + manifest=("output/full/annonars/regions-{genome_release}-{date}+{v_annonars}/MANIFEST.txt"), wildcard_constraints: v_refseq=RE_VERSION, v_annonars=RE_VERSION, @@ -49,4 +50,13 @@ rule output_annonars_regions: # -- build annonars regions RocksDB file --value v_annonars={wildcards.v_annonars} \ --value v_downloader={PV.downloader} \ > {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """ diff --git a/rules/output/mehari/freqs.smk b/rules/output/mehari/freqs.smk index b1f7483..2a5ebe2 100644 --- a/rules/output/mehari/freqs.smk +++ b/rules/output/mehari/freqs.smk @@ -18,6 +18,10 @@ rule output_mehari_freqs_build: # -- build frequency tables for mehari "output/full/mehari/freqs-{genome_release}-{v_gnomad_genomes}+{v_gnomad_exomes}+" "{v_gnomad_mtdna}+{v_helixmtdb}+{v_annonars}/spec.yaml" ), + manifest=( + "output/full/mehari/freqs-{genome_release}-{v_gnomad_genomes}+{v_gnomad_exomes}+" + "{v_gnomad_mtdna}+{v_helixmtdb}+{v_annonars}/MANIFEST.txt" + ), threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96")) resources: runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"), @@ -79,4 +83,13 @@ rule output_mehari_freqs_build: # -- build frequency tables for mehari --value v_annovars={wildcards.v_annonars} \ --value v_downloader={PV.downloader} \ > {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """ diff --git a/rules/reduced/annonars.smk b/rules/reduced/annonars.smk index 67c0b61..433de9a 100644 --- a/rules/reduced/annonars.smk +++ b/rules/reduced/annonars.smk @@ -32,6 +32,7 @@ rule subset_annonars: # -- create exomes subset output: rocksdb_identity="output/reduced-{set_name}/annonars/{name}-{genome_release}-{version_multi}/rocksdb/IDENTITY", spec_yaml="output/reduced-{set_name}/annonars/{name}-{genome_release}-{version_multi}/spec.yaml", + manifest="output/reduced-{set_name}/annonars/{name}-{genome_release}-{version_multi}/MANIFEST.txt", wildcard_constraints: name=RE_NAME, genome_release=RE_GENOME, @@ -51,4 +52,13 @@ rule subset_annonars: # -- create exomes subset --path-beds {input.bed} cp {input.spec_yaml} {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """ diff --git a/rules/reduced/hpo.smk b/rules/reduced/hpo.smk index b182c94..374ef6d 100644 --- a/rules/reduced/hpo.smk +++ b/rules/reduced/hpo.smk @@ -16,6 +16,7 @@ rule subset_viguno_pheno_exomes: # -- create exomes subset phenotype_to_genes="output/reduced-exomes/viguno/hpo-{v_hpo}+{v_viguno}/phenotype_to_genes.txt", bin="output/reduced-exomes/viguno/hpo-{v_hpo}+{v_viguno}/hpo.bin", spec_yaml="output/reduced-exomes/viguno/hpo-{v_hpo}+{v_viguno}/spec.yaml", + manifest="output/reduced-exomes/viguno/hpo-{v_hpo}+{v_viguno}/MANIFEST.txt", wildcard_constraints: v_hpo=RE_VERSION, v_viguno=RE_VERSION, @@ -26,6 +27,15 @@ rule subset_viguno_pheno_exomes: # -- create exomes subset cp -a {input.phenotype_to_genes} {output.phenotype_to_genes} cp -a {input.bin} {output.bin} cp -a {input.spec_yaml} {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """ @@ -42,6 +52,7 @@ rule subset_worker_pheno_dev: # -- create development subset phenotype_to_genes="output/reduced-dev/viguno/hpo-{v_hpo}+{v_viguno}/phenotype_to_genes.txt", bin="output/reduced-dev/viguno/hpo-{v_hpo}+{v_viguno}/hpo.bin", spec_yaml="output/reduced-dev/viguno/hpo-{v_hpo}+{v_viguno}/spec.yaml", + manifest="output/reduced-dev/viguno/hpo-{v_hpo}+{v_viguno}/MANIFEST.txt", wildcard_constraints: v_hpo=RE_VERSION, v_viguno=RE_VERSION, @@ -55,4 +66,13 @@ rule subset_worker_pheno_dev: # -- create development subset cp -a {input.phenotype_to_genes} {output.phenotype_to_genes} cp -a {input.bin} {output.bin} cp -a {input.spec_yaml} {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """ diff --git a/rules/reduced/mehari.smk b/rules/reduced/mehari.smk index e7a1b16..5881f9d 100644 --- a/rules/reduced/mehari.smk +++ b/rules/reduced/mehari.smk @@ -32,6 +32,7 @@ rule subset_mehari: # -- create exomes subset output: rocksdb_identity="output/reduced-{set_name}/mehari/freqs-{genome_release}-{version_multi}/rocksdb/IDENTITY", spec_yaml="output/reduced-{set_name}/mehari/freqs-{genome_release}-{version_multi}/spec.yaml", + manifest="output/reduced-{set_name}/mehari/freqs-{genome_release}-{version_multi}/MANIFEST.txt", wildcard_constraints: genome_release=RE_GENOME, v_hpo=RE_VERSION, @@ -48,4 +49,13 @@ rule subset_mehari: # -- create exomes subset --path-beds {input.bed} cp {input.spec_yaml} {output.spec_yaml} + + export TMPDIR=$(mktemp -d) + pushd $(dirname {output.spec_yaml}) + rm -f MANIFEST.txt + hashdeep -l -r . >$TMPDIR/MANIFEST.txt + CHECKSUM=$(sha256sum $TMPDIR/MANIFEST.txt | cut -d ' ' -f 1) + echo "## EOF SHA256=$CHECKSUM" >> $TMPDIR/MANIFEST.txt + cp $TMPDIR/MANIFEST.txt MANIFEST.txt + popd """