diff --git a/_viash.yaml b/_viash.yaml index 8255ca231..02a348ca5 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -11,5 +11,5 @@ config_mods: | .platforms[.type == 'docker'].target_image_source := 'https://github.com/openproblems-bio/task_grn_inference' .platforms[.type == "nextflow"].directives.tag := "$id" .platforms[.type == "nextflow"].auto.simplifyOutput := false - .platforms[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h", veryveryhightime : "time = 48.h" } + .platforms[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h", veryveryhightime : "time = 48.h", threedaystime : "time = 72.h", oneweektime : "time = 168.h" } .platforms[.type == "nextflow"].config.script := "process.errorStrategy = 'ignore'" \ No newline at end of file diff --git a/scripts/run_benchmark_single_omics.sh b/scripts/run_benchmark_single_omics.sh index 3b8b1f5b6..a0421b5c1 100644 --- a/scripts/run_benchmark_single_omics.sh +++ b/scripts/run_benchmark_single_omics.sh @@ -2,12 +2,11 @@ # RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)" RUN_ID="single_omics_try1" -# resources_dir="s3://openproblems-data/resources/grn" -# publish_dir="s3://openproblems-data/resources/grn/results/${RUN_ID}" - -resources_dir="./resources_test/" -publish_dir="output/${RUN_ID}" +resources_dir="s3://openproblems-data/resources/grn" +publish_dir="s3://openproblems-data/resources/grn/results/${RUN_ID}" +# resources_dir="./resources_test/" +# publish_dir="output/${RUN_ID}" reg_type=ridge subsample=-2 diff --git a/src/api/comp_method.yaml b/src/api/comp_method.yaml index 9d8c2c911..c20f92480 100644 --- a/src/api/comp_method.yaml +++ b/src/api/comp_method.yaml @@ -31,6 +31,13 @@ functionality: type: integer direction: input default: 4 + - name: --tf_all + type: file + example: resources/prior/tf_all.csv + required: false + - name: --max_n_links + type: integer + default: 50000 test_resources: - type: python_script diff --git a/src/methods/multi_omics/scglue/config.vsh.yaml b/src/methods/multi_omics/scglue/config.vsh.yaml index b3b5f22e8..71e4ac0bd 100644 --- a/src/methods/multi_omics/scglue/config.vsh.yaml +++ b/src/methods/multi_omics/scglue/config.vsh.yaml @@ -41,4 +41,4 @@ platforms: - type: native - type: nextflow directives: - label: [veryveryhightime,midmem,midcpu] + label: [oneweektime,midmem,highcpu] diff --git a/src/methods/multi_omics/scglue_ns/config.vsh.yaml b/src/methods/multi_omics/scglue_ns/config.vsh.yaml index 9c888c2d3..805bc8b5b 100644 --- a/src/methods/multi_omics/scglue_ns/config.vsh.yaml +++ b/src/methods/multi_omics/scglue_ns/config.vsh.yaml @@ -26,4 +26,4 @@ functionality: platforms: - type: nextflow directives: - label: [ veryveryhightime, midmem, highcpu ] + label: [oneweektime,midmem,highcpu] diff --git a/src/methods/single_omics/ennet/config.novsh.yaml b/src/methods/single_omics/ennet/config.novsh.yaml deleted file mode 100644 index b274f064d..000000000 --- a/src/methods/single_omics/ennet/config.novsh.yaml +++ /dev/null @@ -1,67 +0,0 @@ -functionality: - name: ennet - info: - label: ennet - summary: "GRN inference using ENNET" - description: | - GRN inference using ENNET. - documentation_url: https://doi.org/10.1186/1752-0509-7-106 - arguments: - - name: --multiomics_rna - type: file - example: resources/grn-benchmark/multiomics_rna.h5ad - info: - label: multiomics_rna - summary: "Multiomics RNA data" - file_type: rds - columns: - - name: dummpy - type: string - required: false - required: true - must_exist: true - - name: --tfs - type: file - example: resources/prior/tf_all.csv - info: - label: tfs - summary: "List of putative TFs" - file_type: csv - required: true - must_exist: true - - name: --prediction - __merge__: ../../../api/file_prediction.yaml - required: true - direction: output - - name: --max_n_links - type: integer - default: 50000 - - name: --temp_dir - type: file - direction: output - default: 'output/ennet' - - name: --num_workers - type: integer - direction: input - default: 4 - resources: - - type: r_script - path: script.R - -platforms: - - type: docker - image: janursa/figr:19-08-2024 - setup: - - type: r - packages: [ foreach, plyr, anndata, dplyr ] - - type: docker - run: | - wget http://github.com/slawekj/ennet/archive/master.tar.gz -O ennet.tar.gz && \ - tar -xvzf ennet.tar.gz && \ - cd ennet-master && \ - R CMD build ennet && \ - R CMD INSTALL ennet - - type: native - - type: nextflow - directives: - label: [midtime,midmem,midcpu] diff --git a/src/methods/single_omics/ennet/config.vsh.yaml b/src/methods/single_omics/ennet/config.vsh.yaml new file mode 100644 index 000000000..66e4e9835 --- /dev/null +++ b/src/methods/single_omics/ennet/config.vsh.yaml @@ -0,0 +1,32 @@ +__merge__: ../../../api/comp_method.yaml + +functionality: + name: ennet + namespace: "grn_methods" + info: + label: ennet + summary: "GRN inference using ENNET" + description: | + GRN inference using ENNET. + documentation_url: https://doi.org/10.1186/1752-0509-7-106 + resources: + - type: r_script + path: script.R + +platforms: + - type: docker + image: janursa/figr:19-08-2024 + setup: + - type: r + packages: [ foreach, plyr, anndata, dplyr ] + - type: docker + run: | + wget http://github.com/slawekj/ennet/archive/master.tar.gz -O ennet.tar.gz && \ + tar -xvzf ennet.tar.gz && \ + cd ennet-master && \ + R CMD build ennet && \ + R CMD INSTALL ennet + - type: native + - type: nextflow + directives: + label: [midtime,midmem,midcpu] diff --git a/src/methods/single_omics/ennet/script.R b/src/methods/single_omics/ennet/script.R index 787e42c7a..f0d76a430 100644 --- a/src/methods/single_omics/ennet/script.R +++ b/src/methods/single_omics/ennet/script.R @@ -5,7 +5,7 @@ library(dplyr) ## VIASH START par <- list( "multiomics_rna" = 'resources/resources_test/grn-benchmark/multiomics_rna.h5ad', - "tfs" = 'resources/prior/tf_all.csv', + "tf_all" = 'resources/prior/tf_all.csv', "prediction" = 'output/ennet/prediction.csv', "temp_dir": 'output/ennet', "max_n_links": 50000 @@ -29,7 +29,7 @@ mask <- (zero_proportion <= 0.9) X <- X[mask,] # Load list of putative TFs -dat <- read.csv(par$tfs, header = FALSE) +dat <- read.csv(par$tf_all, header = FALSE) Tf <- which(gene_names %in% dat$V1) # Run GRN inference method diff --git a/src/methods/single_omics/genie3/config.novsh.yaml b/src/methods/single_omics/genie3/config.novsh.yaml deleted file mode 100644 index 97a02557f..000000000 --- a/src/methods/single_omics/genie3/config.novsh.yaml +++ /dev/null @@ -1,63 +0,0 @@ -functionality: - name: genie3 - info: - label: genie3 - summary: "GRN inference using GENIE3" - description: | - GRN inference using GENIE3. - documentation_url: https://www.bioconductor.org/packages/release/bioc/html/GENIE3.html - arguments: - - name: --multiomics_rna - type: file - example: resources/grn-benchmark/multiomics_rna.h5ad - info: - label: multiomics_rna - summary: "Multiomics RNA data" - file_type: rds - columns: - - name: dummpy - type: string - required: false - required: true - must_exist: true - - name: --tfs - type: file - example: resources/prior/tf_all.csv - info: - label: tfs - summary: "List of putative TFs" - file_type: csv - required: true - must_exist: true - - name: --prediction - __merge__: ../../../api/file_prediction.yaml - required: true - direction: output - - name: --max_n_links - type: integer - default: 50000 - - name: --temp_dir - type: file - direction: output - default: 'output/genie3' - - name: --num_workers - type: integer - direction: input - default: 4 - resources: - - type: python_script - path: script.py - -platforms: - - type: docker - image: continuumio/anaconda3:2024.02-1 - setup: - - type: docker - run: | - conda install -y -c bioconda arboreto pandas - - type: python - packages: [ anndata ] - - type: native - - type: nextflow - directives: - label: [midtime,midmem,midcpu] diff --git a/src/methods/single_omics/genie3/config.vsh.yaml b/src/methods/single_omics/genie3/config.vsh.yaml new file mode 100644 index 000000000..6aa4cdbb9 --- /dev/null +++ b/src/methods/single_omics/genie3/config.vsh.yaml @@ -0,0 +1,29 @@ +__merge__: ../../../api/comp_method.yaml + +functionality: + name: genie3 + namespace: "grn_methods" + info: + label: genie3 + + summary: "GRN inference using GENIE3" + description: | + GRN inference using GENIE3. + documentation_url: https://www.bioconductor.org/packages/release/bioc/html/GENIE3.html + resources: + - type: python_script + path: script.py + +platforms: + - type: docker + image: continuumio/anaconda3:2024.02-1 + setup: + - type: docker + run: | + conda install -y -c bioconda arboreto pandas + - type: python + packages: [ anndata ] + - type: native + - type: nextflow + directives: + label: [midtime,midmem,midcpu] diff --git a/src/methods/single_omics/genie3/script.py b/src/methods/single_omics/genie3/script.py index 50b527bf1..a3ab20ac2 100644 --- a/src/methods/single_omics/genie3/script.py +++ b/src/methods/single_omics/genie3/script.py @@ -10,7 +10,7 @@ ## VIASH START par = { 'multiomics_rna': 'resources/resources_test/grn-benchmark/multiomics_rna.h5ad', - 'tfs': 'resources/prior/tf_all.csv', + "tf_all": 'resources/prior/tf_all.csv', 'prediction': 'output/genie3/prediction.csv', 'max_n_links': 50000 } @@ -32,7 +32,7 @@ adata_rna = X[~mask, :] # Load list of putative TFs -df = pd.read_csv(par['tfs'], header=None, names=['gene_name']) +df = pd.read_csv(par["tf_all"], header=None, names=['gene_name']) tfs = set(list(df['gene_name'])) tf_names = [gene_name for gene_name in gene_names if (gene_name in tfs)] diff --git a/src/methods/single_omics/grnboost2/config.novsh.yaml b/src/methods/single_omics/grnboost2/config.novsh.yaml deleted file mode 100644 index 48fdff137..000000000 --- a/src/methods/single_omics/grnboost2/config.novsh.yaml +++ /dev/null @@ -1,63 +0,0 @@ -functionality: - name: grnboost2 - info: - label: grnboost2 - summary: "GRN inference using GRNBoost2" - description: | - GRN inference using GRNBoost2. - documentation_url: https://arboreto.readthedocs.io/en/latest/algorithms.html#grnboost2 - arguments: - - name: --multiomics_rna - type: file - example: resources/grn-benchmark/multiomics_rna.h5ad - info: - label: multiomics_rna - summary: "Multiomics RNA data" - file_type: rds - columns: - - name: dummpy - type: string - required: false - required: true - must_exist: true - - name: --tfs - type: file - example: resources/prior/tf_all.csv - info: - label: tfs - summary: "List of putative TFs" - file_type: csv - required: true - must_exist: true - - name: --prediction - __merge__: ../../../api/file_prediction.yaml - required: true - direction: output - - name: --max_n_links - type: integer - default: 50000 - - name: --temp_dir - type: file - direction: output - default: 'output/grnboost2' - - name: --num_workers - type: integer - direction: input - default: 4 - resources: - - type: python_script - path: script.py - -platforms: - - type: docker - image: continuumio/anaconda3:2024.02-1 - setup: - - type: docker - run: | - conda install -y -c bioconda arboreto pandas - - type: python - packages: [ anndata ] - - type: native - - type: nextflow - directives: - label: [midtime,midmem,midcpu] diff --git a/src/methods/single_omics/grnboost2/config.vsh.yaml b/src/methods/single_omics/grnboost2/config.vsh.yaml new file mode 100644 index 000000000..30fb59eb9 --- /dev/null +++ b/src/methods/single_omics/grnboost2/config.vsh.yaml @@ -0,0 +1,29 @@ +__merge__: ../../../api/comp_method.yaml + +functionality: + name: grnboost2 + namespace: "grn_methods" + info: + label: grnboost2 + summary: "GRN inference using GRNBoost2" + description: | + GRN inference using GRNBoost2. + documentation_url: https://arboreto.readthedocs.io/en/latest/algorithms.html#grnboost2 + + resources: + - type: python_script + path: script.py + +platforms: + - type: docker + image: continuumio/anaconda3:2024.02-1 + setup: + - type: docker + run: | + conda install -y -c bioconda arboreto pandas + - type: python + packages: [ anndata ] + - type: native + - type: nextflow + directives: + label: [midtime,midmem,midcpu] diff --git a/src/methods/single_omics/grnboost2/script.py b/src/methods/single_omics/grnboost2/script.py index 4ad59f8f2..510d273fb 100644 --- a/src/methods/single_omics/grnboost2/script.py +++ b/src/methods/single_omics/grnboost2/script.py @@ -10,7 +10,7 @@ ## VIASH START par = { 'multiomics_rna': 'resources/resources_test/grn-benchmark/multiomics_rna.h5ad', - 'tfs': 'resources/prior/tf_all.csv', + "tf_all": 'resources/prior/tf_all.csv', 'prediction': 'output/grnboost2/prediction.csv', 'max_n_links': 50000 } @@ -32,7 +32,7 @@ adata_rna = X[~mask, :] # Load list of putative TFs -df = pd.read_csv(par['tfs'], header=None, names=['gene_name']) +df = pd.read_csv(par["tf_all"], header=None, names=['gene_name']) tfs = set(list(df['gene_name'])) tf_names = [gene_name for gene_name in gene_names if (gene_name in tfs)] diff --git a/src/methods/single_omics/pidc/config.novsh.yaml b/src/methods/single_omics/pidc/config.novsh.yaml deleted file mode 100644 index 038ec66e9..000000000 --- a/src/methods/single_omics/pidc/config.novsh.yaml +++ /dev/null @@ -1,50 +0,0 @@ -functionality: - name: pidc - info: - label: pidc - summary: "GRN inference using PIDC" - description: | - GRN inference using PIDC. - documentation_url: https://rdrr.io/github/hmutpw/PIDC/ - arguments: - - name: --multiomics_rna - type: file - example: resources/grn-benchmark/multiomics_rna.h5ad - info: - label: multiomics_rna - summary: "Multiomics RNA data" - file_type: rds - columns: - - name: dummpy - type: string - required: false - required: true - must_exist: true - - name: --prediction - __merge__: ../../../api/file_prediction.yaml - required: true - direction: output - - name: --max_n_links - type: integer - default: 50000 - - name: --temp_dir - type: file - direction: output - default: 'output/pidc' - - name: --num_workers - type: integer - direction: input - default: 4 - resources: - - type: python_script - path: script.py - - type: file - path: pidc.jl - -platforms: - - type: docker - image: apassemi/pidc:latest - - type: native - - type: nextflow - directives: - label: [midtime,midmem,midcpu] diff --git a/src/methods/single_omics/pidc/config.vsh.yaml b/src/methods/single_omics/pidc/config.vsh.yaml new file mode 100644 index 000000000..00580a5b5 --- /dev/null +++ b/src/methods/single_omics/pidc/config.vsh.yaml @@ -0,0 +1,25 @@ +__merge__: ../../../api/comp_method.yaml + +functionality: + name: pidc + namespace: "grn_methods" + info: + label: pidc + summary: "GRN inference using PIDC" + description: | + GRN inference using PIDC. + documentation_url: https://rdrr.io/github/hmutpw/PIDC/ + + resources: + - type: python_script + path: script.py + - type: file + path: pidc.jl + +platforms: + - type: docker + image: apassemi/pidc:latest + - type: native + - type: nextflow + directives: + label: [midtime,midmem,midcpu] diff --git a/src/methods/single_omics/portia/config.vsh.yaml b/src/methods/single_omics/portia/config.vsh.yaml index 7f4148d24..5b9af2e4f 100644 --- a/src/methods/single_omics/portia/config.vsh.yaml +++ b/src/methods/single_omics/portia/config.vsh.yaml @@ -9,14 +9,7 @@ functionality: description: | GRN inference using PORTIA. documentation_url: https://github.com/AntoinePassemiers/PORTIA - arguments: - - name: --tf_all - type: file - example: resources/prior/tf_all.csv - required: true - - name: --max_n_links - type: integer - default: 50000 + resources: - type: python_script diff --git a/src/methods/single_omics/ppcor/config.novsh.yaml b/src/methods/single_omics/ppcor/config.novsh.yaml deleted file mode 100644 index e74acb00f..000000000 --- a/src/methods/single_omics/ppcor/config.novsh.yaml +++ /dev/null @@ -1,51 +0,0 @@ -functionality: - name: ppcor - info: - label: ppcor - summary: "GRN inference using PPCOR" - description: | - GRN inference using PPCOR. - documentation_url: https://rdrr.io/cran/ppcor/man/ - arguments: - - name: --multiomics_rna - type: file - example: resources/grn-benchmark/multiomics_rna.h5ad - info: - label: multiomics_rna - summary: "Multiomics RNA data" - file_type: rds - columns: - - name: dummpy - type: string - required: false - required: true - must_exist: true - - name: --prediction - __merge__: ../../../api/file_prediction.yaml - required: true - direction: output - - name: --max_n_links - type: integer - default: 50000 - - name: --temp_dir - type: file - direction: output - default: 'output/ppcor' - - name: --num_workers - type: integer - direction: input - default: 4 - resources: - - type: r_script - path: script.R - -platforms: - - type: docker - image: janursa/figr:19-08-2024 - setup: - - type: r - packages: [ ppcor, anndata, dplyr ] - - type: native - - type: nextflow - directives: - label: [midtime,midmem,midcpu] diff --git a/src/methods/single_omics/ppcor/config.vsh.yaml b/src/methods/single_omics/ppcor/config.vsh.yaml new file mode 100644 index 000000000..4e6474973 --- /dev/null +++ b/src/methods/single_omics/ppcor/config.vsh.yaml @@ -0,0 +1,26 @@ +__merge__: ../../../api/comp_method.yaml + +functionality: + name: ppcor + namespace: "grn_methods" + info: + label: ppcor + summary: "GRN inference using PPCOR" + description: | + GRN inference using PPCOR. + documentation_url: https://rdrr.io/cran/ppcor/man/ + + resources: + - type: r_script + path: script.R + +platforms: + - type: docker + image: janursa/figr:19-08-2024 + setup: + - type: r + packages: [ ppcor, anndata, dplyr ] + - type: native + - type: nextflow + directives: + label: [midtime,midmem,midcpu] diff --git a/src/methods/single_omics/scsgl/config.novsh.yaml b/src/methods/single_omics/scsgl/config.vsh.yaml similarity index 50% rename from src/methods/single_omics/scsgl/config.novsh.yaml rename to src/methods/single_omics/scsgl/config.vsh.yaml index dcc592973..6dd5b532e 100644 --- a/src/methods/single_omics/scsgl/config.novsh.yaml +++ b/src/methods/single_omics/scsgl/config.vsh.yaml @@ -1,40 +1,15 @@ +__merge__: ../../../api/comp_method.yaml + functionality: name: scsgl + namespace: "grn_methods" info: label: scsgl summary: "GRN inference using SCSGL" description: | GRN inference using SCSGL. documentation_url: https://doi.org/10.1101/2021.07.08.451697 - arguments: - - name: --multiomics_rna - type: file - example: resources/grn-benchmark/multiomics_rna.h5ad - info: - label: multiomics_rna - summary: "Multiomics RNA data" - file_type: rds - columns: - - name: dummpy - type: string - required: false - required: true - must_exist: true - - name: --prediction - __merge__: ../../../api/file_prediction.yaml - required: true - direction: output - - name: --max_n_links - type: integer - default: 50000 - - name: --temp_dir - type: file - direction: output - default: 'output/scsgl' - - name: --num_workers - type: integer - direction: input - default: 4 + resources: - type: python_script path: script.py diff --git a/src/methods/single_omics/tigress/config.novsh.yaml b/src/methods/single_omics/tigress/config.novsh.yaml deleted file mode 100644 index 5a3721256..000000000 --- a/src/methods/single_omics/tigress/config.novsh.yaml +++ /dev/null @@ -1,63 +0,0 @@ -functionality: - name: tigress - info: - label: tigress - summary: "GRN inference using TIGRESS" - description: | - GRN inference using TIGRESS. - documentation_url: https://rdrr.io/github/jpvert/tigress/man/tigress.html - arguments: - - name: --multiomics_rna - type: file - example: resources/grn-benchmark/multiomics_rna.h5ad - info: - label: multiomics_rna - summary: "Multiomics RNA data" - file_type: rds - columns: - - name: dummpy - type: string - required: false - required: true - must_exist: true - - name: --tfs - type: file - example: resources/prior/tf_all.csv - info: - label: tfs - summary: "List of putative TFs" - file_type: csv - required: true - must_exist: true - - name: --prediction - __merge__: ../../../api/file_prediction.yaml - required: true - direction: output - - name: --max_n_links - type: integer - default: 50000 - - name: --temp_dir - type: file - direction: output - default: 'output/tigress' - - name: --num_workers - type: integer - direction: input - default: 4 - resources: - - type: r_script - path: script.R - -platforms: - - type: docker - image: janursa/figr:19-08-2024 - setup: - - type: r - packages: [ devtools, foreach, plyr, doRNG, glmnet, randomForest, anndata, dplyr ] - - type: docker - run: | - Rscript -e 'library(devtools); install_github("jpvert/tigress")' - - type: native - - type: nextflow - directives: - label: [midtime,midmem,midcpu] diff --git a/src/methods/single_omics/tigress/config.vsh.yaml b/src/methods/single_omics/tigress/config.vsh.yaml new file mode 100644 index 000000000..d88d6b5ec --- /dev/null +++ b/src/methods/single_omics/tigress/config.vsh.yaml @@ -0,0 +1,29 @@ +__merge__: ../../../api/comp_method.yaml + +functionality: + name: tigress + namespace: "grn_methods" + info: + label: tigress + summary: "GRN inference using TIGRESS" + description: | + GRN inference using TIGRESS. + documentation_url: https://rdrr.io/github/jpvert/tigress/man/tigress.html + + resources: + - type: r_script + path: script.R + +platforms: + - type: docker + image: janursa/figr:19-08-2024 + setup: + - type: r + packages: [ devtools, foreach, plyr, doRNG, glmnet, randomForest, anndata, dplyr ] + - type: docker + run: | + Rscript -e 'library(devtools); install_github("jpvert/tigress")' + - type: native + - type: nextflow + directives: + label: [midtime,midmem,midcpu] diff --git a/src/methods/single_omics/tigress/script.R b/src/methods/single_omics/tigress/script.R index 114703150..fd6bc50a6 100644 --- a/src/methods/single_omics/tigress/script.R +++ b/src/methods/single_omics/tigress/script.R @@ -5,7 +5,7 @@ library(dplyr) ## VIASH START par <- list( "multiomics_rna" = 'resources/resources_test/grn-benchmark/multiomics_rna.h5ad', - "tfs" = 'resources/prior/tf_all.csv', + "tf_all" = 'resources/prior/tf_all.csv', "prediction" = 'output/tigress/prediction.csv', "temp_dir": 'output/tigress', "max_n_links": 50000 @@ -30,7 +30,7 @@ mask <- (zero_proportion <= 0.9) X <- X[mask,] # Load list of putative TFs -dat <- read.csv(par$tfs, header = FALSE) +dat <- read.csv(par$tf_all, header = FALSE) Tf <- intersect(gene_names, dat$V1) # Run GRN inference method diff --git a/src/workflows/run_benchmark_single_omics/___main.nf b/src/workflows/run_benchmark_single_omics/___main.nf new file mode 100644 index 000000000..2fa73958f --- /dev/null +++ b/src/workflows/run_benchmark_single_omics/___main.nf @@ -0,0 +1,315 @@ +workflow auto { + findStatesTemp(params, meta.config) + | meta.workflow.run( + auto: [publish: "state"] + ) +} + +workflow run_wf { + take: + input_ch + + main: + + // construct list of methods + methods = [ + portia + ] + + // construct list of metrics + metrics = [ + regression_1 + ] + + /**************************** + * EXTRACT DATASET METADATA * + ****************************/ + dataset_ch = input_ch + // store join id + | map{ id, state -> + [id, state + ["_meta": [join_id: id]]] + } + + // extract the dataset metadata + // | extract_metadata.run( + // fromState: [input: "input_solution"], + // toState: { id, output, state -> + // state + [ + // dataset_uns: readYaml(output.output).uns + // ] + // } + // ) + + /*************************** + * RUN METHODS AND METRICS * + ***************************/ + score_ch = dataset_ch + + // run all methods + | runEach( + components: methods, + + // use the 'filter' argument to only run a method on the normalisation the component is asking for + // filter: { id, state, comp -> + // def norm = state.dataset_uns.normalization_id + // def pref = comp.config.info.preferred_normalization + // // if the preferred normalisation is none at all, + // // we can pass whichever dataset we want + // def norm_check = (norm == "log_cp10k" && pref == "counts") || norm == pref + // def method_check = !state.method_ids || state.method_ids.contains(comp.config.name) + + // method_check && norm_check + // }, + + // define a new 'id' by appending the method name to the dataset id + id: { id, state, comp -> + id + "." + comp.config.name + }, + + // use 'fromState' to fetch the arguments the component requires from the overall state + fromState: { id, state, comp -> + def new_args = [ + multiomics_rna: state.multiomics_rna, + tf_all: state.tf_all, + ] + if (comp.config.info.type == "control_method") { + new_args.input_solution = state.input_solution + } + new_args + }, + + // use 'toState' to publish that component's outputs to the overall state + toState: { id, output, state, comp -> + state + [ + method_id: comp.config.name, + prediction: output.prediction + ] + } + ) + + // run all metrics + | runEach( + components: metrics, + id: { id, state, comp -> + id + "." + comp.config.name + }, + // use 'fromState' to fetch the arguments the component requires from the overall state + fromState: [ + perturbation_data: "perturbation_data", + prediction: "prediction", + subsample: "subsample", + reg_type: "reg_type", + max_workers: "max_workers", + consensus: "consensus", + tf_all: "tf_all" + ], + // use 'toState' to publish that component's outputs to the overall state + toState: { id, output, state, comp -> + state + [ + metric_id: comp.config.name, + metric_output: output.score + ] + } + ) + + + /****************************** + * GENERATE OUTPUT YAML FILES * + ******************************/ + // TODO: can we store everything below in a separate helper function? + + // extract the dataset metadata + // dataset_meta_ch = dataset_ch + // // // only keep one of the normalization methods + // // | filter{ id, state -> + // // state.dataset_uns.normalization_id == "log_cp10k" + // // } + // | joinStates { ids, states -> + // // store the dataset metadata in a file + // // def dataset_uns = states.collect{state -> + // // def uns = state.dataset_uns.clone() + // // uns.remove("normalization_id") + // // uns + // // } + // def dataset_uns_yaml_blob = toYamlBlob(dataset_uns) + // def dataset_uns_file = tempFile("dataset_uns.yaml") + // dataset_uns_file.write(dataset_uns_yaml_blob) + + // // ["output", [output_dataset_info: dataset_uns_file]] + // } + + output_ch = score_ch + + // extract the scores + | extract_metadata.run( + key: "extract_scores", + fromState: [input: "metric_output"], + toState: { id, output, state -> + state + [ + score_uns: readYaml(output.output).uns + ] + } + ) + + | joinStates { ids, states -> + // store the method configs in a file + def method_configs = methods.collect{it.config} + def method_configs_yaml_blob = toYamlBlob(method_configs) + def method_configs_file = tempFile("method_configs.yaml") + method_configs_file.write(method_configs_yaml_blob) + + // store the metric configs in a file + def metric_configs = metrics.collect{it.config} + def metric_configs_yaml_blob = toYamlBlob(metric_configs) + def metric_configs_file = tempFile("metric_configs.yaml") + metric_configs_file.write(metric_configs_yaml_blob) + + def viash_file = meta.resources_dir.resolve("_viash.yaml") + def viash_file_content = toYamlBlob(readYaml(viash_file).info) + def task_info_file = tempFile("task_info.yaml") + task_info_file.write(viash_file_content) + + // store the scores in a file + def score_uns = states.collect{it.score_uns} + def score_uns_yaml_blob = toYamlBlob(score_uns) + def score_uns_file = tempFile("score_uns.yaml") + score_uns_file.write(score_uns_yaml_blob) + + def new_state = [ + output_method_configs: method_configs_file, + output_metric_configs: metric_configs_file, + output_task_info: task_info_file, + output_scores: score_uns_file, + _meta: states[0]._meta + ] + + ["output", new_state] + } + + // merge all of the output data + // | mix(dataset_meta_ch) + | joinStates{ ids, states -> + def mergedStates = states.inject([:]) { acc, m -> acc + m } + [ids[0], mergedStates] + } + + emit: + output_ch +} + +// temp fix for rename_keys typo + +def findStatesTemp(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesTempWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey;newKey:oldKey'" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesTempWf +} \ No newline at end of file diff --git a/src/workflows/run_benchmark_single_omics/__main.nf b/src/workflows/run_benchmark_single_omics/__main.nf new file mode 100644 index 000000000..1240d344f --- /dev/null +++ b/src/workflows/run_benchmark_single_omics/__main.nf @@ -0,0 +1,246 @@ +// construct list of methods +methods = [ + portia +] + +// construct list of metrics +metrics = [ + regression_1 +] + +// helper workflow for starting a workflow based on lists of yaml files +workflow auto { + findStates(params, meta.config) + | meta.workflow.run( + auto: [publish: "state"] + ) +} + +// benchmarking workflow +workflow run_wf { + take: + input_ch + + main: + + /*************************** + * RUN METHODS AND METRICS * + ***************************/ + score_ch = input_ch + + | run_benchmark_fun( + methods: methods, + metrics: metrics, + methodFromState: { id, state, comp -> + def new_args = [ + multiomics_rna: state.multiomics_rna, + tf_all: state.tf_all, + prediction: 'predictions/$id.$key.output.h5ad', + output_model: null + ] + if (comp.config.functionality.info.type == "control_method") { + new_args.de_test_h5ad = state.de_test_h5ad + } + new_args + }, + methodToState: ["prediction": "prediction"], + metricFromState: [ + perturbation_data: "perturbation_data", + prediction: "prediction", + subsample: "subsample", + reg_type: "reg_type", + max_workers: "max_workers", + consensus: "consensus", + tf_all: "tf_all" + ], + metricToState: ["metric_output": "output"], + methodAuto: [publish: "state"] + ) + | joinStates { ids, states -> + def score_uns = states.collect{it.score_uns} + def score_uns_yaml_blob = toYamlBlob(score_uns) + def score_uns_file = tempFile("score_uns.yaml") + score_uns_file.write(score_uns_yaml_blob) + + ["output", [scores: score_uns_file]] + } + + /****************************** + * GENERATE OUTPUT YAML FILES * + ******************************/ + // create dataset, method and metric metadata files + metadata_ch = input_ch + | create_metadata_files( + datasetFromState: [input: "multiomics_rna"], + methods: methods, + metrics: metrics, + meta: meta + ) + + // merge all of the output data + output_ch = score_ch + | mix(metadata_ch) + | joinStates{ ids, states -> + def mergedStates = states.inject([:]) { acc, m -> acc + m } + [ids[0], mergedStates] + } + + emit: + output_ch +} + + + + + +def run_benchmark_fun(args) { + // required args + def methods_ = args.methods + def metrics_ = args.metrics + def methodFromState = args.methodFromState + def methodToState = args.methodToState + def metricFromState = args.metricFromState + def metricToState = args.metricToState + + assert methods_, "methods must be defined" + assert metrics_, "metrics must be defined" + assert methodFromState, "methodFromState must be defined" + assert methodToState, "methodToState must be defined" + assert metricFromState, "metricFromState must be defined" + assert metricToState, "metricToState must be defined" + + // optional args + def keyPrefix = args.keyPrefix ?: "" + def methodAuto = args.methodAuto ?: [:] + def metricAuto = args.metricAuto ?: [:] + + // add the key prefix to the method and metric names + if (keyPrefix && keyPrefix != "") { + methods_ = methods.collect{ method -> + method.run(key: keyPrefix + method.config.functionality.name) + } + metrics_ = metrics.collect{ metric -> + metric.run(key: keyPrefix + metric.config.functionality.name) + } + } + + workflow bench { + take: input_ch + + main: + output_ch = input_ch + // run all methods + | runEach( + components: methods_, + filter: { id, state, comp -> + !state.method_ids || state.method_ids.contains(comp.config.functionality.name) + }, + id: { id, state, comp -> + id + "." + comp.config.functionality.name + }, + fromState: methodFromState, + toState: methodToState, + auto: methodAuto + ) + + // run all metrics + | runEach( + components: metrics_, + filter: { id, state, comp -> + !state.metric_ids || state.metric_ids.contains(comp.config.functionality.name) + }, + id: { id, state, comp -> + id + "." + comp.config.functionality.name + }, + fromState: metricFromState, + toState: metricToState, + auto: metricAuto + ) + + // extract the scores + | extract_metadata.run( + key: "${keyPrefix}score_uns", + fromState: [input: "metric_output"], + toState: { id, output, state -> + state + [ + score_uns: readYaml(output.output).uns + ] + } + ) + + emit: output_ch + } + return bench +} + + +def create_metadata_files(args) { + // required args + def meta_ = args.meta + def methods_ = args.methods + def metrics_ = args.metrics + def datasetFromState = args.datasetFromState + + assert meta_, "meta must be defined" + assert methods_, "methods must be defined" + assert metrics_, "metrics must be defined" + assert datasetFromState, "datasetFromState must be defined" + + workflow metadata { + take: input_ch + + main: + output_ch = input_ch + + | map{ id, state -> + [id, state + ["_meta": [join_id: id]]] + } + + | extract_metadata.run( + key: "dataset_uns", + fromState: args.datasetFromState, + toState: { id, output, state -> + state + [ + dataset_info: readYaml(output.output).uns + ] + } + ) + + | joinStates { ids, states -> + assert states.size() > 0, "no states found" + assert states[0]._meta, "no _meta found in state[0]" + assert states.every{it.dataset_info}, "not all states have dataset_info" + + // combine the dataset info into one file + def dataset_uns = states.collect{it.dataset_info} + def dataset_uns_yaml_blob = toYamlBlob(dataset_uns) + def dataset_uns_file = tempFile("dataset_uns.yaml") + dataset_uns_file.write(dataset_uns_yaml_blob) + + // store the method configs in a file + def method_configs = methods_.collect{it.config} + def method_configs_yaml_blob = toYamlBlob(method_configs) + def method_configs_file = tempFile("method_configs.yaml") + method_configs_file.write(method_configs_yaml_blob) + + // store the metric configs in a file + def metric_configs = metrics_.collect{it.config} + def metric_configs_yaml_blob = toYamlBlob(metric_configs) + def metric_configs_file = tempFile("metric_configs.yaml") + metric_configs_file.write(metric_configs_yaml_blob) + + def task_info_file = meta_.resources_dir.resolve("task_info.yaml") + + def new_state = [ + dataset_uns: dataset_uns_file, + method_configs: method_configs_file, + metric_configs: metric_configs_file, + task_info: task_info_file, + _meta: states[0]._meta + ] + ["output", new_state] + } + emit: output_ch + } + return metadata +} diff --git a/src/workflows/run_benchmark_single_omics/config.vsh.yaml b/src/workflows/run_benchmark_single_omics/config.vsh.yaml index a0d3e05bc..2395856e7 100644 --- a/src/workflows/run_benchmark_single_omics/config.vsh.yaml +++ b/src/workflows/run_benchmark_single_omics/config.vsh.yaml @@ -49,11 +49,11 @@ functionality: required: true direction: output default: "scores.yaml" - - name: "--metric_configs" - type: file - required: true - direction: output - default: metric_configs.yaml + # - name: "--metric_configs" + # type: file + # required: true + # direction: output + # default: metric_configs.yaml resources: - type: nextflow_script @@ -69,6 +69,13 @@ functionality: - name: control_methods/positive_control - name: control_methods/negative_control - name: grn_methods/portia + - name: grn_methods/ennet + - name: grn_methods/genie3 + - name: grn_methods/grnboost2 + - name: grn_methods/pidc + - name: grn_methods/ppcor + - name: grn_methods/scsgl + - name: grn_methods/tigress repositories: - name: openproblemsv2 type: github @@ -77,4 +84,4 @@ functionality: platforms: - type: nextflow directives: - label: [ midtime, midmem, lowcpu ] + label: [ hightime, midmem, highcpu ] diff --git a/src/workflows/run_benchmark_single_omics/main.nf b/src/workflows/run_benchmark_single_omics/main.nf index 1240d344f..5b32ab115 100644 --- a/src/workflows/run_benchmark_single_omics/main.nf +++ b/src/workflows/run_benchmark_single_omics/main.nf @@ -1,14 +1,3 @@ -// construct list of methods -methods = [ - portia -] - -// construct list of metrics -metrics = [ - regression_1 -] - -// helper workflow for starting a workflow based on lists of yaml files workflow auto { findStates(params, meta.config) | meta.workflow.run( @@ -16,231 +5,194 @@ workflow auto { ) } -// benchmarking workflow workflow run_wf { take: input_ch main: + // construct list of methods + // methods = [ + // portia, + // ennet, + // genie3, + // grnboost2, + // pidc, + // ppcor, + // scsgl, + // tigress + // ] + + methods = [ + portia, + pidc, + ppcor, + tigress + ] + + // construct list of metrics + metrics = [ + regression_1 + ] + + /**************************** + * EXTRACT DATASET METADATA * + ****************************/ + dataset_ch = input_ch + // store join id + | map{ id, state -> + [id, state + ["_meta": [join_id: id]]] + } + + // // extract the dataset metadata + // | extract_metadata.run( + // fromState: [input: "input_test"], + // toState: { id, output, state -> + // state + [ + // dataset_uns: readYaml(output.output).uns + // ] + // } + // ) + /*************************** * RUN METHODS AND METRICS * ***************************/ - score_ch = input_ch - - | run_benchmark_fun( - methods: methods, - metrics: metrics, - methodFromState: { id, state, comp -> - def new_args = [ - multiomics_rna: state.multiomics_rna, - tf_all: state.tf_all, - prediction: 'predictions/$id.$key.output.h5ad', - output_model: null + score_ch = dataset_ch + + // run all methods + | runEach( + components: methods, + + // use the 'filter' argument to only run a defined method or all methods + // filter: { id, state, comp -> + // def method_check = !state.method_ids || state.method_ids.contains(comp.config.functionality.name) + + // method_check + // }, + + // define a new 'id' by appending the method name to the dataset id + id: { id, state, comp -> + id + "." + comp.config.functionality.name + }, + // use 'fromState' to fetch the arguments the component requires from the overall state + fromState: [ + multiomics_rna: "multiomics_rna", + tf_all: "tf_all", + ], + // use 'toState' to publish that component's outputs to the overall state + toState: { id, output, state, comp -> + state + [ + method_id: comp.config.functionality.name, + prediction: output.prediction ] - if (comp.config.functionality.info.type == "control_method") { - new_args.de_test_h5ad = state.de_test_h5ad - } - new_args + } + ) + + // run all metrics + | runEach( + components: metrics, + id: { id, state, comp -> + id + "." + comp.config.functionality.name }, - methodToState: ["prediction": "prediction"], - metricFromState: [ + // use 'fromState' to fetch the arguments the component requires from the overall state + fromState: [ perturbation_data: "perturbation_data", prediction: "prediction", + method_id: "method_id", subsample: "subsample", reg_type: "reg_type", max_workers: "max_workers", consensus: "consensus", tf_all: "tf_all" ], - metricToState: ["metric_output": "output"], - methodAuto: [publish: "state"] + // use 'toState' to publish that component's outputs to the overall state + toState: { id, output, state, comp -> + state + [ + metric_id: comp.config.functionality.name, + metric_output: output.score + ] + } ) - | joinStates { ids, states -> - def score_uns = states.collect{it.score_uns} - def score_uns_yaml_blob = toYamlBlob(score_uns) - def score_uns_file = tempFile("score_uns.yaml") - score_uns_file.write(score_uns_yaml_blob) - - ["output", [scores: score_uns_file]] - } /****************************** * GENERATE OUTPUT YAML FILES * ******************************/ - // create dataset, method and metric metadata files - metadata_ch = input_ch - | create_metadata_files( - datasetFromState: [input: "multiomics_rna"], - methods: methods, - metrics: metrics, - meta: meta - ) + // TODO: can we store everything below in a separate helper function? + // NOTE: the 'denoising' task doesn't use normalized data, + // so code related to normalization_ids is commented out + + // extract the dataset metadata + // dataset_meta_ch = dataset_ch + // // // only keep one of the normalization methods + // // | filter{ id, state -> + // // state.dataset_uns.normalization_id == "log_cp10k" + // // } + // | joinStates { ids, states -> + // // store the dataset metadata in a file + // def dataset_uns = states.collect{state -> + // def uns = state.dataset_uns.clone() + // // uns.remove("normalization_id") + // uns + // } + // def dataset_uns_yaml_blob = toYamlBlob(dataset_uns) + // def dataset_uns_file = tempFile("dataset_uns.yaml") + // dataset_uns_file.write(dataset_uns_yaml_blob) + + // ["output", [output_dataset_info: dataset_uns_file]] + // } - // merge all of the output data output_ch = score_ch - | mix(metadata_ch) - | joinStates{ ids, states -> - def mergedStates = states.inject([:]) { acc, m -> acc + m } - [ids[0], mergedStates] - } - - emit: - output_ch -} - + // extract the scores + | extract_metadata.run( + key: "extract_scores", + fromState: [input: "metric_output"], + toState: { id, output, state -> + state + [ + score_uns: readYaml(output.output).uns + ] + } + ) + | joinStates { ids, states -> + // store the method configs in a file + def method_configs = methods.collect{it.config} + def method_configs_yaml_blob = toYamlBlob(method_configs) + def method_configs_file = tempFile("method_configs.yaml") + method_configs_file.write(method_configs_yaml_blob) + // store the metric configs in a file + def metric_configs = metrics.collect{it.config} + def metric_configs_yaml_blob = toYamlBlob(metric_configs) + def metric_configs_file = tempFile("metric_configs.yaml") + metric_configs_file.write(metric_configs_yaml_blob) -def run_benchmark_fun(args) { - // required args - def methods_ = args.methods - def metrics_ = args.metrics - def methodFromState = args.methodFromState - def methodToState = args.methodToState - def metricFromState = args.metricFromState - def metricToState = args.metricToState + def task_info_file = meta.resources_dir.resolve("task_info.yaml") - assert methods_, "methods must be defined" - assert metrics_, "metrics must be defined" - assert methodFromState, "methodFromState must be defined" - assert methodToState, "methodToState must be defined" - assert metricFromState, "metricFromState must be defined" - assert metricToState, "metricToState must be defined" + // store the scores in a file + def score_uns = states.collect{it.score_uns} + def score_uns_yaml_blob = toYamlBlob(score_uns) + def score_uns_file = tempFile("score_uns.yaml") + score_uns_file.write(score_uns_yaml_blob) - // optional args - def keyPrefix = args.keyPrefix ?: "" - def methodAuto = args.methodAuto ?: [:] - def metricAuto = args.metricAuto ?: [:] + def new_state = [ + // output_method_configs: method_configs_file, + // output_metric_configs: metric_configs_file, + // output_task_info: task_info_file, + scores: score_uns_file, + _meta: states[0]._meta + ] - // add the key prefix to the method and metric names - if (keyPrefix && keyPrefix != "") { - methods_ = methods.collect{ method -> - method.run(key: keyPrefix + method.config.functionality.name) - } - metrics_ = metrics.collect{ metric -> - metric.run(key: keyPrefix + metric.config.functionality.name) + ["output", new_state] } - } - - workflow bench { - take: input_ch - - main: - output_ch = input_ch - // run all methods - | runEach( - components: methods_, - filter: { id, state, comp -> - !state.method_ids || state.method_ids.contains(comp.config.functionality.name) - }, - id: { id, state, comp -> - id + "." + comp.config.functionality.name - }, - fromState: methodFromState, - toState: methodToState, - auto: methodAuto - ) - - // run all metrics - | runEach( - components: metrics_, - filter: { id, state, comp -> - !state.metric_ids || state.metric_ids.contains(comp.config.functionality.name) - }, - id: { id, state, comp -> - id + "." + comp.config.functionality.name - }, - fromState: metricFromState, - toState: metricToState, - auto: metricAuto - ) - - // extract the scores - | extract_metadata.run( - key: "${keyPrefix}score_uns", - fromState: [input: "metric_output"], - toState: { id, output, state -> - state + [ - score_uns: readYaml(output.output).uns - ] - } - ) - - emit: output_ch - } - return bench -} - - -def create_metadata_files(args) { - // required args - def meta_ = args.meta - def methods_ = args.methods - def metrics_ = args.metrics - def datasetFromState = args.datasetFromState - assert meta_, "meta must be defined" - assert methods_, "methods must be defined" - assert metrics_, "metrics must be defined" - assert datasetFromState, "datasetFromState must be defined" - - workflow metadata { - take: input_ch - - main: - output_ch = input_ch - - | map{ id, state -> - [id, state + ["_meta": [join_id: id]]] - } + // merge all of the output data + // | mix(dataset_meta_ch) + | joinStates{ ids, states -> + def mergedStates = states.inject([:]) { acc, m -> acc + m } + [ids[0], mergedStates] + } - | extract_metadata.run( - key: "dataset_uns", - fromState: args.datasetFromState, - toState: { id, output, state -> - state + [ - dataset_info: readYaml(output.output).uns - ] - } - ) - - | joinStates { ids, states -> - assert states.size() > 0, "no states found" - assert states[0]._meta, "no _meta found in state[0]" - assert states.every{it.dataset_info}, "not all states have dataset_info" - - // combine the dataset info into one file - def dataset_uns = states.collect{it.dataset_info} - def dataset_uns_yaml_blob = toYamlBlob(dataset_uns) - def dataset_uns_file = tempFile("dataset_uns.yaml") - dataset_uns_file.write(dataset_uns_yaml_blob) - - // store the method configs in a file - def method_configs = methods_.collect{it.config} - def method_configs_yaml_blob = toYamlBlob(method_configs) - def method_configs_file = tempFile("method_configs.yaml") - method_configs_file.write(method_configs_yaml_blob) - - // store the metric configs in a file - def metric_configs = metrics_.collect{it.config} - def metric_configs_yaml_blob = toYamlBlob(metric_configs) - def metric_configs_file = tempFile("metric_configs.yaml") - metric_configs_file.write(metric_configs_yaml_blob) - - def task_info_file = meta_.resources_dir.resolve("task_info.yaml") - - def new_state = [ - dataset_uns: dataset_uns_file, - method_configs: method_configs_file, - metric_configs: metric_configs_file, - task_info: task_info_file, - _meta: states[0]._meta - ] - ["output", new_state] - } - emit: output_ch - } - return metadata -} + emit: + output_ch +} \ No newline at end of file