From 9c77313765b714beac0fc2a331f568bb81f4da10 Mon Sep 17 00:00:00 2001
From: Luke Zappia <lazappi@users.noreply.github.com>
Date: Mon, 9 Dec 2024 13:40:38 +0100
Subject: [PATCH] Prepare task for adding foundation models (#24)

* Update common submodule

* Use checkItemAllowed() for benchmark method check

* Replace cxg_mouse_pancreas_atlas with cxg_immune_cell_atlas

* Update README

* Update CHANGELOG

* Add a base method API schema

* Update CHANGELOG

* Add config check to base method schema

* Add dataset_organism to training dataset files
---
 CHANGELOG.md                                  | 18 +++++++++--
 README.md                                     | 31 +++++++++----------
 common                                        |  2 +-
 scripts/create_resources/test_resources.sh    | 22 ++++++-------
 scripts/run_benchmark/run_test_local.sh       |  6 ++--
 scripts/run_benchmark/run_test_seqeracloud.sh |  6 ++--
 src/api/base_method.yaml                      | 20 ++++++++++++
 src/api/comp_control_method.yaml              |  6 ++--
 src/api/comp_data_processor.yaml              |  4 +--
 src/api/comp_method.yaml                      | 22 ++-----------
 src/api/comp_metric.yaml                      |  4 +--
 src/api/file_common_dataset.yaml              |  6 ++--
 src/api/file_prediction.yaml                  |  4 +--
 src/api/file_score.yaml                       |  4 +--
 src/api/file_test.yaml                        |  6 ++--
 src/api/file_train.yaml                       | 10 ++++--
 .../perfect_denoising/script.py               |  4 +--
 src/data_processors/process_dataset/script.py |  5 +--
 src/methods/alra/script.R                     |  2 +-
 src/methods/dca/script.py                     |  2 +-
 src/methods/knn_smoothing/script.py           |  2 +-
 src/methods/magic/script.py                   |  3 +-
 src/methods/saver/script.R                    |  2 +-
 src/metrics/mse/script.py                     |  5 ++-
 src/metrics/poisson/script.py                 |  4 +--
 src/workflows/run_benchmark/config.vsh.yaml   | 21 +++++++++++--
 src/workflows/run_benchmark/main.nf           | 20 ++++++++----
 27 files changed, 141 insertions(+), 100 deletions(-)
 create mode 100644 src/api/base_method.yaml

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b8556ac..3eb0ebe 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -38,6 +38,18 @@
 
 * Update workflows to use core repository dependency (PR #20).
 
+* Update the `common` submodule (PR #24)
+
+* Use the common `checkItemAllowed()` for the method check in the benchmark workflow (PR #24)
+
+* Use the `cxg_immune_cell_atlas` dataset instead of the `cxg_mouse_pancreas_atlas` for testing (PR #24)
+
+* Update `README` (PR #24)
+
+* Add a base method API schema (PR #24)
+
+* Add `dataset_organism` to training input files (PR #24)
+
 ## BUG FIXES
 
 * Update the nextflow workflow dependencies (PR #17).
@@ -57,7 +69,7 @@
 * `process_dataset`: Added a component for processing common datasets into task-ready dataset objects.
 
 * `resources_test/denoising/pancreas` with `src/tasks/denoising/resources_test_scripts/pancreas.sh`.
-  
+
 * `workflows/run`: Added nf-tower test script. (PR #205)
 
 ### V1 MIGRATION
@@ -81,7 +93,7 @@
 ### Changes from V1
 
 * Anndata layers are used to store data instead of obsm
-  
+
 * extended the use of sparse data in methods unless it was not possible
 
-* process_dataset also removes unnecessary data from train and test datasets not needed by the methods and metrics.
\ No newline at end of file
+* process_dataset also removes unnecessary data from train and test datasets not needed by the methods and metrics.
diff --git a/README.md b/README.md
index 05210b1..a519d9f 100644
--- a/README.md
+++ b/README.md
@@ -45,16 +45,16 @@ dataset.
 ## API
 
 ``` mermaid
-flowchart LR
-  file_common_dataset("Common Dataset")
-  comp_data_processor[/"Data processor"/]
-  file_test("Test data")
-  file_train("Training data")
-  comp_control_method[/"Control Method"/]
-  comp_metric[/"Metric"/]
-  comp_method[/"Method"/]
-  file_prediction("Denoised data")
-  file_score("Score")
+flowchart TB
+  file_common_dataset("<a href='https://github.com/openproblems-bio/task_denoising#file-format-common-dataset'>Common Dataset</a>")
+  comp_data_processor[/"<a href='https://github.com/openproblems-bio/task_denoising#component-type-data-processor'>Data processor</a>"/]
+  file_test("<a href='https://github.com/openproblems-bio/task_denoising#file-format-test-data'>Test data</a>")
+  file_train("<a href='https://github.com/openproblems-bio/task_denoising#file-format-training-data'>Training data</a>")
+  comp_control_method[/"<a href='https://github.com/openproblems-bio/task_denoising#component-type-control-method'>Control Method</a>"/]
+  comp_metric[/"<a href='https://github.com/openproblems-bio/task_denoising#component-type-metric'>Metric</a>"/]
+  comp_method[/"<a href='https://github.com/openproblems-bio/task_denoising#component-type-method'>Method</a>"/]
+  file_prediction("<a href='https://github.com/openproblems-bio/task_denoising#file-format-denoised-data'>Denoised data</a>")
+  file_score("<a href='https://github.com/openproblems-bio/task_denoising#file-format-score'>Score</a>")
   file_common_dataset---comp_data_processor
   comp_data_processor-->file_test
   comp_data_processor-->file_train
@@ -72,8 +72,7 @@ flowchart LR
 
 A subset of the common dataset.
 
-Example file:
-`resources_test/common/cxg_mouse_pancreas_atlas/dataset.h5ad`
+Example file: `resources_test/common/cxg_immune_cell_atlas/dataset.h5ad`
 
 Format:
 
@@ -125,7 +124,7 @@ Arguments:
 The subset of molecules used for the test dataset
 
 Example file:
-`resources_test/task_denoising/cxg_mouse_pancreas_atlas/test.h5ad`
+`resources_test/task_denoising/cxg_immune_cell_atlas/test.h5ad`
 
 Format:
 
@@ -160,7 +159,7 @@ Data structure:
 The subset of molecules used for the training dataset
 
 Example file:
-`resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad`
+`resources_test/task_denoising/cxg_immune_cell_atlas/train.h5ad`
 
 Format:
 
@@ -235,7 +234,7 @@ Arguments:
 A denoised dataset as output by a method.
 
 Example file:
-`resources_test/task_denoising/cxg_mouse_pancreas_atlas/denoised.h5ad`
+`resources_test/task_denoising/cxg_immune_cell_atlas/denoised.h5ad`
 
 Format:
 
@@ -264,7 +263,7 @@ Data structure:
 File indicating the score of a metric.
 
 Example file:
-`resources_test/task_denoising/cxg_mouse_pancreas_atlas/score.h5ad`
+`resources_test/task_denoising/cxg_immune_cell_atlas/score.h5ad`
 
 Format:
 
diff --git a/common b/common
index e64f472..65e05af 160000
--- a/common
+++ b/common
@@ -1 +1 @@
-Subproject commit e64f472b37f1bdbd383640098708ecf5c9f7fd7e
+Subproject commit 65e05af68a11ee87853fcf7a3c6b579001f21abe
diff --git a/scripts/create_resources/test_resources.sh b/scripts/create_resources/test_resources.sh
index 866c924..4711d79 100755
--- a/scripts/create_resources/test_resources.sh
+++ b/scripts/create_resources/test_resources.sh
@@ -15,24 +15,24 @@ mkdir -p $DATASET_DIR
 
 # process dataset
 viash run src/data_processors/process_dataset/config.vsh.yaml -- \
-  --input $RAW_DATA/cxg_mouse_pancreas_atlas/dataset.h5ad \
-  --output_train $DATASET_DIR/cxg_mouse_pancreas_atlas/train.h5ad \
-  --output_test $DATASET_DIR/cxg_mouse_pancreas_atlas/test.h5ad
+  --input $RAW_DATA/cxg_immune_cell_atlas/dataset.h5ad \
+  --output_train $DATASET_DIR/cxg_immune_cell_atlas/train.h5ad \
+  --output_test $DATASET_DIR/cxg_immune_cell_atlas/test.h5ad
 
 # run one method
 viash run src/methods/magic/config.vsh.yaml -- \
-    --input_train $DATASET_DIR/cxg_mouse_pancreas_atlas/train.h5ad \
-    --output $DATASET_DIR/cxg_mouse_pancreas_atlas/denoised.h5ad
+    --input_train $DATASET_DIR/cxg_immune_cell_atlas/train.h5ad \
+    --output $DATASET_DIR/cxg_immune_cell_atlas/denoised.h5ad
 
 # run one metric
 viash run src/metrics/poisson/config.vsh.yaml -- \
-    --input_prediction $DATASET_DIR/cxg_mouse_pancreas_atlas/denoised.h5ad \
-    --input_test $DATASET_DIR/cxg_mouse_pancreas_atlas/test.h5ad \
-    --output $DATASET_DIR/cxg_mouse_pancreas_atlas/score.h5ad
+    --input_prediction $DATASET_DIR/cxg_immune_cell_atlas/denoised.h5ad \
+    --input_test $DATASET_DIR/cxg_immune_cell_atlas/test.h5ad \
+    --output $DATASET_DIR/cxg_immune_cell_atlas/score.h5ad
 
 # write manual state.yaml. this is not actually necessary but you never know it might be useful
-cat > $DATASET_DIR/cxg_mouse_pancreas_atlas/state.yaml << HERE
-id: cxg_mouse_pancreas_atlas
+cat > $DATASET_DIR/cxg_immune_cell_atlas/state.yaml << HERE
+id: cxg_immune_cell_atlas
 train: !file train.h5ad
 test: !file test.h5ad
 prediction: !file denoised.h5ad
@@ -40,6 +40,6 @@ score: !file score.h5ad
 HERE
 
 # only run this if you have access to the openproblems-data bucket
-aws s3 sync --profile OP \
+aws s3 sync --profile op \
   "$DATASET_DIR" s3://openproblems-data/resources_test/task_denoising \
   --delete --dryrun
diff --git a/scripts/run_benchmark/run_test_local.sh b/scripts/run_benchmark/run_test_local.sh
index 30015fb..55580c0 100755
--- a/scripts/run_benchmark/run_test_local.sh
+++ b/scripts/run_benchmark/run_test_local.sh
@@ -20,8 +20,8 @@ nextflow run . \
   -profile docker \
   -resume \
   -c common/nextflow_helpers/labels_ci.config \
-  --id cxg_mouse_pancreas_atlas \
-  --input_train resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad \
-  --input_test resources_test/task_denoising/cxg_mouse_pancreas_atlas/test.h5ad \
+  --id cxg_immune_cell_atlas \
+  --input_train resources_test/task_denoising/cxg_immune_cell_atlas/train.h5ad \
+  --input_test resources_test/task_denoising/cxg_immune_cell_atlas/test.h5ad \
   --output_state state.yaml \
   --publish_dir "$publish_dir"
diff --git a/scripts/run_benchmark/run_test_seqeracloud.sh b/scripts/run_benchmark/run_test_seqeracloud.sh
index 5e673e2..a728475 100755
--- a/scripts/run_benchmark/run_test_seqeracloud.sh
+++ b/scripts/run_benchmark/run_test_seqeracloud.sh
@@ -13,9 +13,9 @@ publish_dir_s3="s3://openproblems-nextflow/temp/results/task_denoising/$(date +%
 
 # write the parameters to file
 cat > /tmp/params.yaml << HERE
-id: cxg_mouse_pancreas_atlas
-input_train: $resources_test_s3/cxg_mouse_pancreas_atlas/train.h5ad
-input_test: $resources_test_s3/cxg_mouse_pancreas_atlas/test.h5ad
+id: cxg_immune_cell_atlas
+input_train: $resources_test_s3/cxg_immune_cell_atlas/train.h5ad
+input_test: $resources_test_s3/cxg_immune_cell_atlas/test.h5ad
 output_state: "state.yaml"
 publish_dir: $publish_dir_s3
 HERE
diff --git a/src/api/base_method.yaml b/src/api/base_method.yaml
new file mode 100644
index 0000000..07d7481
--- /dev/null
+++ b/src/api/base_method.yaml
@@ -0,0 +1,20 @@
+namespace: "methods"
+info:
+  type: method
+  type_info:
+    label: Method
+    summary: A method.
+    description: |
+      A denoising method to remove noise (i.e. technical artifacts) from a dataset.
+arguments:
+  - name: --input_train
+    __merge__: file_train.yaml
+    required: true
+    direction: input
+  - name: --output
+    __merge__: file_prediction.yaml
+    required: true
+    direction: output
+test_resources:
+  - type: python_script
+    path: /common/component_tests/check_config.py
diff --git a/src/api/comp_control_method.yaml b/src/api/comp_control_method.yaml
index d64b5b0..2988eb5 100644
--- a/src/api/comp_control_method.yaml
+++ b/src/api/comp_control_method.yaml
@@ -9,7 +9,7 @@ info:
       but also receive the solution object as input. It serves as a
       starting point to test the relative accuracy of new methods in
       the task, and also as a quality control for the metrics defined
-      in the task. 
+      in the task.
 arguments:
   - name: --input_train
     __merge__: file_train.yaml
@@ -29,5 +29,5 @@ test_resources:
   - type: python_script
     path: /common/component_tests/check_config.py
   - path: /common/library.bib
-  - path: /resources_test/task_denoising/cxg_mouse_pancreas_atlas
-    dest: resources_test/task_denoising/cxg_mouse_pancreas_atlas
\ No newline at end of file
+  - path: /resources_test/task_denoising/cxg_immune_cell_atlas
+    dest: resources_test/task_denoising/cxg_immune_cell_atlas
diff --git a/src/api/comp_data_processor.yaml b/src/api/comp_data_processor.yaml
index a500c27..f2dcb66 100644
--- a/src/api/comp_data_processor.yaml
+++ b/src/api/comp_data_processor.yaml
@@ -22,5 +22,5 @@ arguments:
 test_resources:
   - type: python_script
     path: /common/component_tests/run_and_check_output.py
-  - path: /resources_test/common/cxg_mouse_pancreas_atlas
-    dest: resources_test/common/cxg_mouse_pancreas_atlas
+  - path: /resources_test/common/cxg_immune_cell_atlas
+    dest: resources_test/common/cxg_immune_cell_atlas
diff --git a/src/api/comp_method.yaml b/src/api/comp_method.yaml
index bfbd4fd..da7c11d 100644
--- a/src/api/comp_method.yaml
+++ b/src/api/comp_method.yaml
@@ -1,25 +1,9 @@
-namespace: "methods"
-info:
-  type: method
-  type_info:
-    label: Method
-    summary: A method.
-    description: |
-      A denoising method to remove noise (i.e. technical artifacts) from a dataset.
-arguments:
-  - name: --input_train
-    __merge__: file_train.yaml
-    required: true
-    direction: input
-  - name: --output
-    __merge__: file_prediction.yaml
-    required: true
-    direction: output
+__merge__: base_method.yaml
 test_resources:
   - type: python_script
     path: /common/component_tests/run_and_check_output.py
   - type: python_script
     path: /common/component_tests/check_config.py
   - path: /common/library.bib
-  - path: /resources_test/task_denoising/cxg_mouse_pancreas_atlas
-    dest: resources_test/task_denoising/cxg_mouse_pancreas_atlas
\ No newline at end of file
+  - path: /resources_test/task_denoising/cxg_immune_cell_atlas
+    dest: resources_test/task_denoising/cxg_immune_cell_atlas
diff --git a/src/api/comp_metric.yaml b/src/api/comp_metric.yaml
index bfccf93..e113937 100644
--- a/src/api/comp_metric.yaml
+++ b/src/api/comp_metric.yaml
@@ -25,5 +25,5 @@ test_resources:
   - type: python_script
     path: /common/component_tests/run_and_check_output.py
   - path: /common/library.bib
-  - path: /resources_test/task_denoising/cxg_mouse_pancreas_atlas
-    dest: resources_test/task_denoising/cxg_mouse_pancreas_atlas
+  - path: /resources_test/task_denoising/cxg_immune_cell_atlas
+    dest: resources_test/task_denoising/cxg_immune_cell_atlas
diff --git a/src/api/file_common_dataset.yaml b/src/api/file_common_dataset.yaml
index e6174b9..57ff616 100644
--- a/src/api/file_common_dataset.yaml
+++ b/src/api/file_common_dataset.yaml
@@ -1,11 +1,11 @@
 type: file
-example: "resources_test/common/cxg_mouse_pancreas_atlas/dataset.h5ad"
+example: "resources_test/common/cxg_immune_cell_atlas/dataset.h5ad"
 label: "Common Dataset"
 summary: A subset of the common dataset.
 info:
   format:
     type: h5ad
-    layers: 
+    layers:
       - type: integer
         name: counts
         description: Raw counts
@@ -15,7 +15,7 @@ info:
         name: batch
         description: Batch information
         required: false
-      
+
     uns:
       - type: string
         name: dataset_id
diff --git a/src/api/file_prediction.yaml b/src/api/file_prediction.yaml
index ecb55ff..f48a4b3 100644
--- a/src/api/file_prediction.yaml
+++ b/src/api/file_prediction.yaml
@@ -1,5 +1,5 @@
 type: file
-example: "resources_test/task_denoising/cxg_mouse_pancreas_atlas/denoised.h5ad"
+example: "resources_test/task_denoising/cxg_immune_cell_atlas/denoised.h5ad"
 label: "Denoised data"
 summary: A denoised dataset as output by a method.
 info:
@@ -18,4 +18,4 @@ info:
       - type: string
         name: method_id
         description: "A unique identifier for the method"
-        required: true
\ No newline at end of file
+        required: true
diff --git a/src/api/file_score.yaml b/src/api/file_score.yaml
index 103a7cd..bda2ede 100644
--- a/src/api/file_score.yaml
+++ b/src/api/file_score.yaml
@@ -1,5 +1,5 @@
 type: file
-example: resources_test/task_denoising/cxg_mouse_pancreas_atlas/score.h5ad
+example: resources_test/task_denoising/cxg_immune_cell_atlas/score.h5ad
 label: Score
 summary: "File indicating the score of a metric."
 info:
@@ -23,4 +23,4 @@ info:
         name: metric_values
         description: "The metric values obtained for the given prediction. Must be of same length as 'metric_ids'."
         multiple: true
-        required: true
\ No newline at end of file
+        required: true
diff --git a/src/api/file_test.yaml b/src/api/file_test.yaml
index 1c6d202..b6db758 100644
--- a/src/api/file_test.yaml
+++ b/src/api/file_test.yaml
@@ -1,11 +1,11 @@
 type: file
-example: "resources_test/task_denoising/cxg_mouse_pancreas_atlas/test.h5ad"
+example: "resources_test/task_denoising/cxg_immune_cell_atlas/test.h5ad"
 label: "Test data"
 summary: The subset of molecules used for the test dataset
 info:
   format:
     type: h5ad
-    layers: 
+    layers:
       - type: integer
         name: counts
         description: Raw counts
@@ -42,4 +42,4 @@ info:
       - name: train_sum
         type: integer
         description: The total number of counts in the training dataset.
-        required: true
\ No newline at end of file
+        required: true
diff --git a/src/api/file_train.yaml b/src/api/file_train.yaml
index 6b60dc1..fc7ba7c 100644
--- a/src/api/file_train.yaml
+++ b/src/api/file_train.yaml
@@ -1,11 +1,11 @@
 type: file
-example: "resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad"
+example: "resources_test/task_denoising/cxg_immune_cell_atlas/train.h5ad"
 label: "Training data"
 summary: The subset of molecules used for the training dataset
 info:
   format:
     type: h5ad
-    layers: 
+    layers:
       - type: integer
         name: counts
         description: Raw counts
@@ -14,4 +14,8 @@ info:
       - type: string
         name: dataset_id
         description: "A unique identifier for the dataset"
-        required: true
\ No newline at end of file
+        required: true
+      - name: dataset_organism
+        type: string
+        description: The organism of the sample in the dataset.
+        required: false
diff --git a/src/control_methods/perfect_denoising/script.py b/src/control_methods/perfect_denoising/script.py
index 3ed780c..2960ed4 100644
--- a/src/control_methods/perfect_denoising/script.py
+++ b/src/control_methods/perfect_denoising/script.py
@@ -2,8 +2,8 @@
 
 ## VIASH START
 par = {
-    'input_train': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad',
-    'input_test': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/test.h5ad',
+    'input_train': 'resources_test/task_denoising/cxg_immune_cell_atlas/train.h5ad',
+    'input_test': 'resources_test/task_denoising/cxg_immune_cell_atlas/test.h5ad',
     'output': 'output_PD.h5ad',
 }
 meta = {
diff --git a/src/data_processors/process_dataset/script.py b/src/data_processors/process_dataset/script.py
index b606975..8f3f5ac 100644
--- a/src/data_processors/process_dataset/script.py
+++ b/src/data_processors/process_dataset/script.py
@@ -45,7 +45,7 @@
     obs_filt = np.ones(dtype=np.bool_, shape=adata_output.n_obs)
     obs_index = np.random.choice(np.where(obs_filt)[0], par["n_obs_limit"], replace=False)
     adata_output = adata_output[obs_index].copy()
-        
+
 # remove all layers except for counts
 print(">> Remove all layers except for counts", flush=True)
 for key in list(adata_output.layers.keys()):
@@ -70,11 +70,12 @@
 
 # copy adata to train_set, test_set
 print(">> Create AnnData output objects", flush=True)
+train_uns_keys = ["dataset_id", "dataset_organism"]
 output_train = ad.AnnData(
     layers={"counts": X_train},
     obs=adata_output.obs[[]],
     var=adata_output.var[[]],
-    uns={"dataset_id": adata_output.uns["dataset_id"]}
+    uns={key: adata_output.uns[key] for key in train_uns_keys}
 )
 test_uns_keys = ["dataset_id", "dataset_name", "dataset_url", "dataset_reference", "dataset_summary", "dataset_description", "dataset_organism"]
 output_test = ad.AnnData(
diff --git a/src/methods/alra/script.R b/src/methods/alra/script.R
index 9f7536a..49bba72 100644
--- a/src/methods/alra/script.R
+++ b/src/methods/alra/script.R
@@ -4,7 +4,7 @@ library(ALRA, warn.conflicts = FALSE)
 
 ## VIASH START
 par <- list(
-  input_train = "resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad",
+  input_train = "resources_test/task_denoising/cxg_immune_cell_atlas/train.h5ad",
   norm = "log",
   output = "output.h5ad"
 )
diff --git a/src/methods/dca/script.py b/src/methods/dca/script.py
index a045ad4..32c2c84 100644
--- a/src/methods/dca/script.py
+++ b/src/methods/dca/script.py
@@ -3,7 +3,7 @@
 
 ## VIASH START
 par = {
-    'input_train': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad',
+    'input_train': 'resources_test/task_denoising/cxg_immune_cell_atlas/train.h5ad',
     'output': 'output_dca.h5ad',
     'epochs': 300,
 }
diff --git a/src/methods/knn_smoothing/script.py b/src/methods/knn_smoothing/script.py
index 0fea7ed..a0b0fa3 100644
--- a/src/methods/knn_smoothing/script.py
+++ b/src/methods/knn_smoothing/script.py
@@ -3,7 +3,7 @@
 
 ## VIASH START
 par = {
-    'input_train': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad',
+    'input_train': 'resources_test/task_denoising/cxg_immune_cell_atlas/train.h5ad',
     'output': 'output_knn.h5ad',
 }
 meta = {
diff --git a/src/methods/magic/script.py b/src/methods/magic/script.py
index 2ca832d..67a25c6 100644
--- a/src/methods/magic/script.py
+++ b/src/methods/magic/script.py
@@ -7,7 +7,7 @@
 
 ## VIASH START
 par = {
-    "input_train": "resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad",
+    "input_train": "resources_test/task_denoising/cxg_immune_cell_atlas/train.h5ad",
     "output": "output_magic.h5ad",
     "solver": "exact",
     "norm": "sqrt",
@@ -73,4 +73,3 @@
 
 print("Write Data", flush=True)
 output.write_h5ad(par["output"], compression="gzip")
-
diff --git a/src/methods/saver/script.R b/src/methods/saver/script.R
index 8c6c320..ab1d658 100644
--- a/src/methods/saver/script.R
+++ b/src/methods/saver/script.R
@@ -5,7 +5,7 @@ library(Matrix, warn.conflicts = FALSE)
 
 ## VIASH START
 par <- list(
-  input_train = "resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad",
+  input_train = "resources_test/task_denoising/cxg_immune_cell_atlas/train.h5ad",
   norm = "log",
   output = "output.h5ad"
 )
diff --git a/src/metrics/mse/script.py b/src/metrics/mse/script.py
index f5212b2..8d70589 100644
--- a/src/metrics/mse/script.py
+++ b/src/metrics/mse/script.py
@@ -5,8 +5,8 @@
 
 ## VIASH START
 par = {
-    'input_test': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/test.h5ad',
-    'input_prediction': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/denoised.h5ad',
+    'input_test': 'resources_test/task_denoising/cxg_immune_cell_atlas/test.h5ad',
+    'input_prediction': 'resources_test/task_denoising/cxg_immune_cell_atlas/denoised.h5ad',
     'output': 'output_mse.h5ad'
 }
 meta = {
@@ -48,4 +48,3 @@
 
 print("Write adata to file", flush=True)
 output.write_h5ad(par['output'], compression="gzip")
-
diff --git a/src/metrics/poisson/script.py b/src/metrics/poisson/script.py
index d8e2408..43caef3 100644
--- a/src/metrics/poisson/script.py
+++ b/src/metrics/poisson/script.py
@@ -4,8 +4,8 @@
 
 ## VIASH START
 par = {
-    'input_test': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/test.h5ad',
-    'input_prediction': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/denoised.h5ad',
+    'input_test': 'resources_test/task_denoising/cxg_immune_cell_atlas/test.h5ad',
+    'input_prediction': 'resources_test/task_denoising/cxg_immune_cell_atlas/denoised.h5ad',
     'output': 'output_poisson.h5ad'
 }
 meta = {
diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml
index e162544..083dd30 100644
--- a/src/workflows/run_benchmark/config.vsh.yaml
+++ b/src/workflows/run_benchmark/config.vsh.yaml
@@ -39,18 +39,33 @@ argument_groups:
         required: true
         direction: output
         default: task_info.yaml
-  - name: Methods
+  - name: Method filtering
+    description: |
+      Use these arguments to filter methods by name. By default, all methods are
+      run. If `--methods_include` is defined, only those methods are run. If
+      `--methods_exclude` is defined, all methods except those specified are run.
+      These arguments are mutually exclusive, so only `--methods_include` OR
+      `--methods_exclude` can set but not both.
     arguments:
-      - name: "--method_ids"
+      - name: "--methods_include"
         type: string
         multiple: true
-        description: A list of method ids to run. If not specified, all methods will be run.
+        description: |
+          A list of method ids to include. If specified, only these methods will be run.
+      - name: "--methods_exclude"
+        type: string
+        multiple: true
+        description: |
+          A list of method ids to exclude. If specified, all methods except the ones listed will be run.
+
 resources:
   - type: nextflow_script
     path: main.nf
     entrypoint: run_wf
   - type: file
     path: /_viash.yaml
+  - path: /common/nextflow_helpers/helper.nf
+
 dependencies:
   - name: h5ad/extract_uns_metadata
     repository: core
diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf
index fe8defb..97155fb 100644
--- a/src/workflows/run_benchmark/main.nf
+++ b/src/workflows/run_benchmark/main.nf
@@ -1,3 +1,5 @@
+include { checkItemAllowed } from "${meta.resources_dir}/helper.nf"
+
 workflow auto {
   findStates(params, meta.config)
     | meta.workflow.run(
@@ -32,7 +34,7 @@ workflow run_wf {
    ****************************/
   dataset_ch = input_ch
     // store join id
-    | map{ id, state -> 
+    | map{ id, state ->
       [id, state + ["_meta": [join_id: id]]]
     }
 
@@ -45,7 +47,7 @@ workflow run_wf {
         ]
       }
     )
-    
+
   /***************************
    * RUN METHODS AND METRICS *
    ***************************/
@@ -57,7 +59,13 @@ workflow run_wf {
 
       // use the 'filter' argument to only run a defined method or all methods
       filter: { id, state, comp ->
-        def method_check = !state.method_ids || state.method_ids.contains(comp.config.name)
+        def method_check = checkItemAllowed(
+          comp.config.name,
+          state.methods_include,
+          state.methods_exclude,
+          "methods_include",
+          "methods_exclude"
+        )
 
         method_check
       },
@@ -88,7 +96,7 @@ workflow run_wf {
       },
       // use 'fromState' to fetch the arguments the component requires from the overall state
       fromState: [
-        input_test: "input_test", 
+        input_test: "input_test",
         input_prediction: "method_output"
       ],
       // use 'toState' to publish that component's outputs to the overall state
@@ -117,7 +125,7 @@ workflow run_wf {
       def score_uns_yaml_blob = toYamlBlob(score_uns)
       def score_uns_file = tempFile("score_uns.yaml")
       score_uns_file.write(score_uns_yaml_blob)
-      
+
       ["output", [output_scores: score_uns_file]]
     }
 
@@ -171,7 +179,7 @@ workflow run_wf {
       ["output", new_state]
     }
 
-  // merge all of the output data 
+  // merge all of the output data
   output_ch = score_ch
     | mix(meta_ch)
     | joinStates{ ids, states ->