From 9c77313765b714beac0fc2a331f568bb81f4da10 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Mon, 9 Dec 2024 13:40:38 +0100 Subject: [PATCH] Prepare task for adding foundation models (#24) * Update common submodule * Use checkItemAllowed() for benchmark method check * Replace cxg_mouse_pancreas_atlas with cxg_immune_cell_atlas * Update README * Update CHANGELOG * Add a base method API schema * Update CHANGELOG * Add config check to base method schema * Add dataset_organism to training dataset files --- CHANGELOG.md | 18 +++++++++-- README.md | 31 +++++++++---------- common | 2 +- scripts/create_resources/test_resources.sh | 22 ++++++------- scripts/run_benchmark/run_test_local.sh | 6 ++-- scripts/run_benchmark/run_test_seqeracloud.sh | 6 ++-- src/api/base_method.yaml | 20 ++++++++++++ src/api/comp_control_method.yaml | 6 ++-- src/api/comp_data_processor.yaml | 4 +-- src/api/comp_method.yaml | 22 ++----------- src/api/comp_metric.yaml | 4 +-- src/api/file_common_dataset.yaml | 6 ++-- src/api/file_prediction.yaml | 4 +-- src/api/file_score.yaml | 4 +-- src/api/file_test.yaml | 6 ++-- src/api/file_train.yaml | 10 ++++-- .../perfect_denoising/script.py | 4 +-- src/data_processors/process_dataset/script.py | 5 +-- src/methods/alra/script.R | 2 +- src/methods/dca/script.py | 2 +- src/methods/knn_smoothing/script.py | 2 +- src/methods/magic/script.py | 3 +- src/methods/saver/script.R | 2 +- src/metrics/mse/script.py | 5 ++- src/metrics/poisson/script.py | 4 +-- src/workflows/run_benchmark/config.vsh.yaml | 21 +++++++++++-- src/workflows/run_benchmark/main.nf | 20 ++++++++---- 27 files changed, 141 insertions(+), 100 deletions(-) create mode 100644 src/api/base_method.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index b8556ac..3eb0ebe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,18 @@ * Update workflows to use core repository dependency (PR #20). +* Update the `common` submodule (PR #24) + +* Use the common `checkItemAllowed()` for the method check in the benchmark workflow (PR #24) + +* Use the `cxg_immune_cell_atlas` dataset instead of the `cxg_mouse_pancreas_atlas` for testing (PR #24) + +* Update `README` (PR #24) + +* Add a base method API schema (PR #24) + +* Add `dataset_organism` to training input files (PR #24) + ## BUG FIXES * Update the nextflow workflow dependencies (PR #17). @@ -57,7 +69,7 @@ * `process_dataset`: Added a component for processing common datasets into task-ready dataset objects. * `resources_test/denoising/pancreas` with `src/tasks/denoising/resources_test_scripts/pancreas.sh`. - + * `workflows/run`: Added nf-tower test script. (PR #205) ### V1 MIGRATION @@ -81,7 +93,7 @@ ### Changes from V1 * Anndata layers are used to store data instead of obsm - + * extended the use of sparse data in methods unless it was not possible -* process_dataset also removes unnecessary data from train and test datasets not needed by the methods and metrics. \ No newline at end of file +* process_dataset also removes unnecessary data from train and test datasets not needed by the methods and metrics. diff --git a/README.md b/README.md index 05210b1..a519d9f 100644 --- a/README.md +++ b/README.md @@ -45,16 +45,16 @@ dataset. ## API ``` mermaid -flowchart LR - file_common_dataset("Common Dataset") - comp_data_processor[/"Data processor"/] - file_test("Test data") - file_train("Training data") - comp_control_method[/"Control Method"/] - comp_metric[/"Metric"/] - comp_method[/"Method"/] - file_prediction("Denoised data") - file_score("Score") +flowchart TB + file_common_dataset("Common Dataset") + comp_data_processor[/"Data processor"/] + file_test("Test data") + file_train("Training data") + comp_control_method[/"Control Method"/] + comp_metric[/"Metric"/] + comp_method[/"Method"/] + file_prediction("Denoised data") + file_score("Score") file_common_dataset---comp_data_processor comp_data_processor-->file_test comp_data_processor-->file_train @@ -72,8 +72,7 @@ flowchart LR A subset of the common dataset. -Example file: -`resources_test/common/cxg_mouse_pancreas_atlas/dataset.h5ad` +Example file: `resources_test/common/cxg_immune_cell_atlas/dataset.h5ad` Format: @@ -125,7 +124,7 @@ Arguments: The subset of molecules used for the test dataset Example file: -`resources_test/task_denoising/cxg_mouse_pancreas_atlas/test.h5ad` +`resources_test/task_denoising/cxg_immune_cell_atlas/test.h5ad` Format: @@ -160,7 +159,7 @@ Data structure: The subset of molecules used for the training dataset Example file: -`resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad` +`resources_test/task_denoising/cxg_immune_cell_atlas/train.h5ad` Format: @@ -235,7 +234,7 @@ Arguments: A denoised dataset as output by a method. Example file: -`resources_test/task_denoising/cxg_mouse_pancreas_atlas/denoised.h5ad` +`resources_test/task_denoising/cxg_immune_cell_atlas/denoised.h5ad` Format: @@ -264,7 +263,7 @@ Data structure: File indicating the score of a metric. Example file: -`resources_test/task_denoising/cxg_mouse_pancreas_atlas/score.h5ad` +`resources_test/task_denoising/cxg_immune_cell_atlas/score.h5ad` Format: diff --git a/common b/common index e64f472..65e05af 160000 --- a/common +++ b/common @@ -1 +1 @@ -Subproject commit e64f472b37f1bdbd383640098708ecf5c9f7fd7e +Subproject commit 65e05af68a11ee87853fcf7a3c6b579001f21abe diff --git a/scripts/create_resources/test_resources.sh b/scripts/create_resources/test_resources.sh index 866c924..4711d79 100755 --- a/scripts/create_resources/test_resources.sh +++ b/scripts/create_resources/test_resources.sh @@ -15,24 +15,24 @@ mkdir -p $DATASET_DIR # process dataset viash run src/data_processors/process_dataset/config.vsh.yaml -- \ - --input $RAW_DATA/cxg_mouse_pancreas_atlas/dataset.h5ad \ - --output_train $DATASET_DIR/cxg_mouse_pancreas_atlas/train.h5ad \ - --output_test $DATASET_DIR/cxg_mouse_pancreas_atlas/test.h5ad + --input $RAW_DATA/cxg_immune_cell_atlas/dataset.h5ad \ + --output_train $DATASET_DIR/cxg_immune_cell_atlas/train.h5ad \ + --output_test $DATASET_DIR/cxg_immune_cell_atlas/test.h5ad # run one method viash run src/methods/magic/config.vsh.yaml -- \ - --input_train $DATASET_DIR/cxg_mouse_pancreas_atlas/train.h5ad \ - --output $DATASET_DIR/cxg_mouse_pancreas_atlas/denoised.h5ad + --input_train $DATASET_DIR/cxg_immune_cell_atlas/train.h5ad \ + --output $DATASET_DIR/cxg_immune_cell_atlas/denoised.h5ad # run one metric viash run src/metrics/poisson/config.vsh.yaml -- \ - --input_prediction $DATASET_DIR/cxg_mouse_pancreas_atlas/denoised.h5ad \ - --input_test $DATASET_DIR/cxg_mouse_pancreas_atlas/test.h5ad \ - --output $DATASET_DIR/cxg_mouse_pancreas_atlas/score.h5ad + --input_prediction $DATASET_DIR/cxg_immune_cell_atlas/denoised.h5ad \ + --input_test $DATASET_DIR/cxg_immune_cell_atlas/test.h5ad \ + --output $DATASET_DIR/cxg_immune_cell_atlas/score.h5ad # write manual state.yaml. this is not actually necessary but you never know it might be useful -cat > $DATASET_DIR/cxg_mouse_pancreas_atlas/state.yaml << HERE -id: cxg_mouse_pancreas_atlas +cat > $DATASET_DIR/cxg_immune_cell_atlas/state.yaml << HERE +id: cxg_immune_cell_atlas train: !file train.h5ad test: !file test.h5ad prediction: !file denoised.h5ad @@ -40,6 +40,6 @@ score: !file score.h5ad HERE # only run this if you have access to the openproblems-data bucket -aws s3 sync --profile OP \ +aws s3 sync --profile op \ "$DATASET_DIR" s3://openproblems-data/resources_test/task_denoising \ --delete --dryrun diff --git a/scripts/run_benchmark/run_test_local.sh b/scripts/run_benchmark/run_test_local.sh index 30015fb..55580c0 100755 --- a/scripts/run_benchmark/run_test_local.sh +++ b/scripts/run_benchmark/run_test_local.sh @@ -20,8 +20,8 @@ nextflow run . \ -profile docker \ -resume \ -c common/nextflow_helpers/labels_ci.config \ - --id cxg_mouse_pancreas_atlas \ - --input_train resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad \ - --input_test resources_test/task_denoising/cxg_mouse_pancreas_atlas/test.h5ad \ + --id cxg_immune_cell_atlas \ + --input_train resources_test/task_denoising/cxg_immune_cell_atlas/train.h5ad \ + --input_test resources_test/task_denoising/cxg_immune_cell_atlas/test.h5ad \ --output_state state.yaml \ --publish_dir "$publish_dir" diff --git a/scripts/run_benchmark/run_test_seqeracloud.sh b/scripts/run_benchmark/run_test_seqeracloud.sh index 5e673e2..a728475 100755 --- a/scripts/run_benchmark/run_test_seqeracloud.sh +++ b/scripts/run_benchmark/run_test_seqeracloud.sh @@ -13,9 +13,9 @@ publish_dir_s3="s3://openproblems-nextflow/temp/results/task_denoising/$(date +% # write the parameters to file cat > /tmp/params.yaml << HERE -id: cxg_mouse_pancreas_atlas -input_train: $resources_test_s3/cxg_mouse_pancreas_atlas/train.h5ad -input_test: $resources_test_s3/cxg_mouse_pancreas_atlas/test.h5ad +id: cxg_immune_cell_atlas +input_train: $resources_test_s3/cxg_immune_cell_atlas/train.h5ad +input_test: $resources_test_s3/cxg_immune_cell_atlas/test.h5ad output_state: "state.yaml" publish_dir: $publish_dir_s3 HERE diff --git a/src/api/base_method.yaml b/src/api/base_method.yaml new file mode 100644 index 0000000..07d7481 --- /dev/null +++ b/src/api/base_method.yaml @@ -0,0 +1,20 @@ +namespace: "methods" +info: + type: method + type_info: + label: Method + summary: A method. + description: | + A denoising method to remove noise (i.e. technical artifacts) from a dataset. +arguments: + - name: --input_train + __merge__: file_train.yaml + required: true + direction: input + - name: --output + __merge__: file_prediction.yaml + required: true + direction: output +test_resources: + - type: python_script + path: /common/component_tests/check_config.py diff --git a/src/api/comp_control_method.yaml b/src/api/comp_control_method.yaml index d64b5b0..2988eb5 100644 --- a/src/api/comp_control_method.yaml +++ b/src/api/comp_control_method.yaml @@ -9,7 +9,7 @@ info: but also receive the solution object as input. It serves as a starting point to test the relative accuracy of new methods in the task, and also as a quality control for the metrics defined - in the task. + in the task. arguments: - name: --input_train __merge__: file_train.yaml @@ -29,5 +29,5 @@ test_resources: - type: python_script path: /common/component_tests/check_config.py - path: /common/library.bib - - path: /resources_test/task_denoising/cxg_mouse_pancreas_atlas - dest: resources_test/task_denoising/cxg_mouse_pancreas_atlas \ No newline at end of file + - path: /resources_test/task_denoising/cxg_immune_cell_atlas + dest: resources_test/task_denoising/cxg_immune_cell_atlas diff --git a/src/api/comp_data_processor.yaml b/src/api/comp_data_processor.yaml index a500c27..f2dcb66 100644 --- a/src/api/comp_data_processor.yaml +++ b/src/api/comp_data_processor.yaml @@ -22,5 +22,5 @@ arguments: test_resources: - type: python_script path: /common/component_tests/run_and_check_output.py - - path: /resources_test/common/cxg_mouse_pancreas_atlas - dest: resources_test/common/cxg_mouse_pancreas_atlas + - path: /resources_test/common/cxg_immune_cell_atlas + dest: resources_test/common/cxg_immune_cell_atlas diff --git a/src/api/comp_method.yaml b/src/api/comp_method.yaml index bfbd4fd..da7c11d 100644 --- a/src/api/comp_method.yaml +++ b/src/api/comp_method.yaml @@ -1,25 +1,9 @@ -namespace: "methods" -info: - type: method - type_info: - label: Method - summary: A method. - description: | - A denoising method to remove noise (i.e. technical artifacts) from a dataset. -arguments: - - name: --input_train - __merge__: file_train.yaml - required: true - direction: input - - name: --output - __merge__: file_prediction.yaml - required: true - direction: output +__merge__: base_method.yaml test_resources: - type: python_script path: /common/component_tests/run_and_check_output.py - type: python_script path: /common/component_tests/check_config.py - path: /common/library.bib - - path: /resources_test/task_denoising/cxg_mouse_pancreas_atlas - dest: resources_test/task_denoising/cxg_mouse_pancreas_atlas \ No newline at end of file + - path: /resources_test/task_denoising/cxg_immune_cell_atlas + dest: resources_test/task_denoising/cxg_immune_cell_atlas diff --git a/src/api/comp_metric.yaml b/src/api/comp_metric.yaml index bfccf93..e113937 100644 --- a/src/api/comp_metric.yaml +++ b/src/api/comp_metric.yaml @@ -25,5 +25,5 @@ test_resources: - type: python_script path: /common/component_tests/run_and_check_output.py - path: /common/library.bib - - path: /resources_test/task_denoising/cxg_mouse_pancreas_atlas - dest: resources_test/task_denoising/cxg_mouse_pancreas_atlas + - path: /resources_test/task_denoising/cxg_immune_cell_atlas + dest: resources_test/task_denoising/cxg_immune_cell_atlas diff --git a/src/api/file_common_dataset.yaml b/src/api/file_common_dataset.yaml index e6174b9..57ff616 100644 --- a/src/api/file_common_dataset.yaml +++ b/src/api/file_common_dataset.yaml @@ -1,11 +1,11 @@ type: file -example: "resources_test/common/cxg_mouse_pancreas_atlas/dataset.h5ad" +example: "resources_test/common/cxg_immune_cell_atlas/dataset.h5ad" label: "Common Dataset" summary: A subset of the common dataset. info: format: type: h5ad - layers: + layers: - type: integer name: counts description: Raw counts @@ -15,7 +15,7 @@ info: name: batch description: Batch information required: false - + uns: - type: string name: dataset_id diff --git a/src/api/file_prediction.yaml b/src/api/file_prediction.yaml index ecb55ff..f48a4b3 100644 --- a/src/api/file_prediction.yaml +++ b/src/api/file_prediction.yaml @@ -1,5 +1,5 @@ type: file -example: "resources_test/task_denoising/cxg_mouse_pancreas_atlas/denoised.h5ad" +example: "resources_test/task_denoising/cxg_immune_cell_atlas/denoised.h5ad" label: "Denoised data" summary: A denoised dataset as output by a method. info: @@ -18,4 +18,4 @@ info: - type: string name: method_id description: "A unique identifier for the method" - required: true \ No newline at end of file + required: true diff --git a/src/api/file_score.yaml b/src/api/file_score.yaml index 103a7cd..bda2ede 100644 --- a/src/api/file_score.yaml +++ b/src/api/file_score.yaml @@ -1,5 +1,5 @@ type: file -example: resources_test/task_denoising/cxg_mouse_pancreas_atlas/score.h5ad +example: resources_test/task_denoising/cxg_immune_cell_atlas/score.h5ad label: Score summary: "File indicating the score of a metric." info: @@ -23,4 +23,4 @@ info: name: metric_values description: "The metric values obtained for the given prediction. Must be of same length as 'metric_ids'." multiple: true - required: true \ No newline at end of file + required: true diff --git a/src/api/file_test.yaml b/src/api/file_test.yaml index 1c6d202..b6db758 100644 --- a/src/api/file_test.yaml +++ b/src/api/file_test.yaml @@ -1,11 +1,11 @@ type: file -example: "resources_test/task_denoising/cxg_mouse_pancreas_atlas/test.h5ad" +example: "resources_test/task_denoising/cxg_immune_cell_atlas/test.h5ad" label: "Test data" summary: The subset of molecules used for the test dataset info: format: type: h5ad - layers: + layers: - type: integer name: counts description: Raw counts @@ -42,4 +42,4 @@ info: - name: train_sum type: integer description: The total number of counts in the training dataset. - required: true \ No newline at end of file + required: true diff --git a/src/api/file_train.yaml b/src/api/file_train.yaml index 6b60dc1..fc7ba7c 100644 --- a/src/api/file_train.yaml +++ b/src/api/file_train.yaml @@ -1,11 +1,11 @@ type: file -example: "resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad" +example: "resources_test/task_denoising/cxg_immune_cell_atlas/train.h5ad" label: "Training data" summary: The subset of molecules used for the training dataset info: format: type: h5ad - layers: + layers: - type: integer name: counts description: Raw counts @@ -14,4 +14,8 @@ info: - type: string name: dataset_id description: "A unique identifier for the dataset" - required: true \ No newline at end of file + required: true + - name: dataset_organism + type: string + description: The organism of the sample in the dataset. + required: false diff --git a/src/control_methods/perfect_denoising/script.py b/src/control_methods/perfect_denoising/script.py index 3ed780c..2960ed4 100644 --- a/src/control_methods/perfect_denoising/script.py +++ b/src/control_methods/perfect_denoising/script.py @@ -2,8 +2,8 @@ ## VIASH START par = { - 'input_train': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad', - 'input_test': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/test.h5ad', + 'input_train': 'resources_test/task_denoising/cxg_immune_cell_atlas/train.h5ad', + 'input_test': 'resources_test/task_denoising/cxg_immune_cell_atlas/test.h5ad', 'output': 'output_PD.h5ad', } meta = { diff --git a/src/data_processors/process_dataset/script.py b/src/data_processors/process_dataset/script.py index b606975..8f3f5ac 100644 --- a/src/data_processors/process_dataset/script.py +++ b/src/data_processors/process_dataset/script.py @@ -45,7 +45,7 @@ obs_filt = np.ones(dtype=np.bool_, shape=adata_output.n_obs) obs_index = np.random.choice(np.where(obs_filt)[0], par["n_obs_limit"], replace=False) adata_output = adata_output[obs_index].copy() - + # remove all layers except for counts print(">> Remove all layers except for counts", flush=True) for key in list(adata_output.layers.keys()): @@ -70,11 +70,12 @@ # copy adata to train_set, test_set print(">> Create AnnData output objects", flush=True) +train_uns_keys = ["dataset_id", "dataset_organism"] output_train = ad.AnnData( layers={"counts": X_train}, obs=adata_output.obs[[]], var=adata_output.var[[]], - uns={"dataset_id": adata_output.uns["dataset_id"]} + uns={key: adata_output.uns[key] for key in train_uns_keys} ) test_uns_keys = ["dataset_id", "dataset_name", "dataset_url", "dataset_reference", "dataset_summary", "dataset_description", "dataset_organism"] output_test = ad.AnnData( diff --git a/src/methods/alra/script.R b/src/methods/alra/script.R index 9f7536a..49bba72 100644 --- a/src/methods/alra/script.R +++ b/src/methods/alra/script.R @@ -4,7 +4,7 @@ library(ALRA, warn.conflicts = FALSE) ## VIASH START par <- list( - input_train = "resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad", + input_train = "resources_test/task_denoising/cxg_immune_cell_atlas/train.h5ad", norm = "log", output = "output.h5ad" ) diff --git a/src/methods/dca/script.py b/src/methods/dca/script.py index a045ad4..32c2c84 100644 --- a/src/methods/dca/script.py +++ b/src/methods/dca/script.py @@ -3,7 +3,7 @@ ## VIASH START par = { - 'input_train': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad', + 'input_train': 'resources_test/task_denoising/cxg_immune_cell_atlas/train.h5ad', 'output': 'output_dca.h5ad', 'epochs': 300, } diff --git a/src/methods/knn_smoothing/script.py b/src/methods/knn_smoothing/script.py index 0fea7ed..a0b0fa3 100644 --- a/src/methods/knn_smoothing/script.py +++ b/src/methods/knn_smoothing/script.py @@ -3,7 +3,7 @@ ## VIASH START par = { - 'input_train': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad', + 'input_train': 'resources_test/task_denoising/cxg_immune_cell_atlas/train.h5ad', 'output': 'output_knn.h5ad', } meta = { diff --git a/src/methods/magic/script.py b/src/methods/magic/script.py index 2ca832d..67a25c6 100644 --- a/src/methods/magic/script.py +++ b/src/methods/magic/script.py @@ -7,7 +7,7 @@ ## VIASH START par = { - "input_train": "resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad", + "input_train": "resources_test/task_denoising/cxg_immune_cell_atlas/train.h5ad", "output": "output_magic.h5ad", "solver": "exact", "norm": "sqrt", @@ -73,4 +73,3 @@ print("Write Data", flush=True) output.write_h5ad(par["output"], compression="gzip") - diff --git a/src/methods/saver/script.R b/src/methods/saver/script.R index 8c6c320..ab1d658 100644 --- a/src/methods/saver/script.R +++ b/src/methods/saver/script.R @@ -5,7 +5,7 @@ library(Matrix, warn.conflicts = FALSE) ## VIASH START par <- list( - input_train = "resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad", + input_train = "resources_test/task_denoising/cxg_immune_cell_atlas/train.h5ad", norm = "log", output = "output.h5ad" ) diff --git a/src/metrics/mse/script.py b/src/metrics/mse/script.py index f5212b2..8d70589 100644 --- a/src/metrics/mse/script.py +++ b/src/metrics/mse/script.py @@ -5,8 +5,8 @@ ## VIASH START par = { - 'input_test': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/test.h5ad', - 'input_prediction': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/denoised.h5ad', + 'input_test': 'resources_test/task_denoising/cxg_immune_cell_atlas/test.h5ad', + 'input_prediction': 'resources_test/task_denoising/cxg_immune_cell_atlas/denoised.h5ad', 'output': 'output_mse.h5ad' } meta = { @@ -48,4 +48,3 @@ print("Write adata to file", flush=True) output.write_h5ad(par['output'], compression="gzip") - diff --git a/src/metrics/poisson/script.py b/src/metrics/poisson/script.py index d8e2408..43caef3 100644 --- a/src/metrics/poisson/script.py +++ b/src/metrics/poisson/script.py @@ -4,8 +4,8 @@ ## VIASH START par = { - 'input_test': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/test.h5ad', - 'input_prediction': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/denoised.h5ad', + 'input_test': 'resources_test/task_denoising/cxg_immune_cell_atlas/test.h5ad', + 'input_prediction': 'resources_test/task_denoising/cxg_immune_cell_atlas/denoised.h5ad', 'output': 'output_poisson.h5ad' } meta = { diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml index e162544..083dd30 100644 --- a/src/workflows/run_benchmark/config.vsh.yaml +++ b/src/workflows/run_benchmark/config.vsh.yaml @@ -39,18 +39,33 @@ argument_groups: required: true direction: output default: task_info.yaml - - name: Methods + - name: Method filtering + description: | + Use these arguments to filter methods by name. By default, all methods are + run. If `--methods_include` is defined, only those methods are run. If + `--methods_exclude` is defined, all methods except those specified are run. + These arguments are mutually exclusive, so only `--methods_include` OR + `--methods_exclude` can set but not both. arguments: - - name: "--method_ids" + - name: "--methods_include" type: string multiple: true - description: A list of method ids to run. If not specified, all methods will be run. + description: | + A list of method ids to include. If specified, only these methods will be run. + - name: "--methods_exclude" + type: string + multiple: true + description: | + A list of method ids to exclude. If specified, all methods except the ones listed will be run. + resources: - type: nextflow_script path: main.nf entrypoint: run_wf - type: file path: /_viash.yaml + - path: /common/nextflow_helpers/helper.nf + dependencies: - name: h5ad/extract_uns_metadata repository: core diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf index fe8defb..97155fb 100644 --- a/src/workflows/run_benchmark/main.nf +++ b/src/workflows/run_benchmark/main.nf @@ -1,3 +1,5 @@ +include { checkItemAllowed } from "${meta.resources_dir}/helper.nf" + workflow auto { findStates(params, meta.config) | meta.workflow.run( @@ -32,7 +34,7 @@ workflow run_wf { ****************************/ dataset_ch = input_ch // store join id - | map{ id, state -> + | map{ id, state -> [id, state + ["_meta": [join_id: id]]] } @@ -45,7 +47,7 @@ workflow run_wf { ] } ) - + /*************************** * RUN METHODS AND METRICS * ***************************/ @@ -57,7 +59,13 @@ workflow run_wf { // use the 'filter' argument to only run a defined method or all methods filter: { id, state, comp -> - def method_check = !state.method_ids || state.method_ids.contains(comp.config.name) + def method_check = checkItemAllowed( + comp.config.name, + state.methods_include, + state.methods_exclude, + "methods_include", + "methods_exclude" + ) method_check }, @@ -88,7 +96,7 @@ workflow run_wf { }, // use 'fromState' to fetch the arguments the component requires from the overall state fromState: [ - input_test: "input_test", + input_test: "input_test", input_prediction: "method_output" ], // use 'toState' to publish that component's outputs to the overall state @@ -117,7 +125,7 @@ workflow run_wf { def score_uns_yaml_blob = toYamlBlob(score_uns) def score_uns_file = tempFile("score_uns.yaml") score_uns_file.write(score_uns_yaml_blob) - + ["output", [output_scores: score_uns_file]] } @@ -171,7 +179,7 @@ workflow run_wf { ["output", new_state] } - // merge all of the output data + // merge all of the output data output_ch = score_ch | mix(meta_ch) | joinStates{ ids, states ->