[pred_mod] Update dataset id (#389)

* Change id send to module * add method_ids arg ref #362 * Add suggestion
openproblems-bio · Feb 27, 2024 · a6bf60f · a6bf60f
1 parent 524f389
commit a6bf60f
Show file tree

Hide file tree

Showing 4 changed files with 19 additions and 9 deletions.
diff --git a/src/tasks/predict_modality/workflows/process_datasets/main.nf b/src/tasks/predict_modality/workflows/process_datasets/main.nf
@@ -83,19 +83,20 @@ workflow run_wf {
         // Note: this id is added before the normalisation id  
         // Example old id: dataset_loader/dataset_id/normalization_id  
         // Example new id: dataset_loader/dataset_id/direction/normalization_id
-        def left = id.replaceAll("/${state.normalization_id}\$", "")
-        def right = id.replaceAll("^${left}", "")
-        def new_id = left + "/" + dir + right
+        def orig_dataset_id = id.replaceAll("/${state.normalization_id}$", "")
+        def normalization_id = id.replaceAll("^${orig_dataset_id}", "")
+        def new_dataset_id = orig_dataset_id + "/" + dir
+        def new_id = new_dataset_id + normalization_id
 
-        [new_id, state + [direction: dir, "_meta": [join_id: id]]]
+        [new_id, state + [dataset_id: new_dataset_id, direction: dir, "_meta": [join_id: id]]]
       }
     }
 
     | process_dataset.run(
       fromState: { id, state ->
         def swap_state = state.direction == "swap" ? true : false
         [
-          dataset_id: id,
+          dataset_id: state.dataset_id,
           input_mod1: state.dataset_mod1,
           input_mod2: state.dataset_mod2,
           output_train_mod1: state.output_train_mod1,

diff --git a/src/tasks/predict_modality/workflows/run_benchmark/config.vsh.yaml b/src/tasks/predict_modality/workflows/run_benchmark/config.vsh.yaml
@@ -48,6 +48,12 @@ functionality:
           required: true
           direction: output
           default: task_info.yaml
+    - name: Methods
+      arguments:
+        - name: "--method_ids"
+          type: string
+          multiple: true
+          description: A list of method ids to run. If not specified, all methods will be run.
   resources:
     - type: nextflow_script
       path: main.nf

diff --git a/src/tasks/predict_modality/workflows/run_benchmark/main.nf b/src/tasks/predict_modality/workflows/run_benchmark/main.nf
@@ -75,13 +75,16 @@ workflow run_wf {
     | runEach(
       components: methods,
 
-      // // use the 'filter' argument to only run a method on the normalisation the component is asking for
+      // use the 'filter' argument to only run a method on the normalisation the component is asking for
       filter: { id, state, comp ->
         def norm = state.rna_norm
         def pref = comp.config.functionality.info.preferred_normalization
         // if the preferred normalisation is none at all,
         // we can pass whichever dataset we want
-        (norm == "log_cp10k" && pref == "counts") || norm == pref
+        def norm_check = (norm == "log_cp10k" && pref == "counts") || norm == pref
+        def method_check = !state.method_ids || state.method_ids.contains(comp.config.functionality.name)
+
+        method_check && norm_check
       },
 
       // define a new 'id' by appending the method name to the dataset id

diff --git a/src/tasks/predict_modality/workflows/run_benchmark/run_test.sh b/src/tasks/predict_modality/workflows/run_benchmark/run_test.sh
@@ -8,7 +8,7 @@ cd "$REPO_ROOT"
 
 set -e
 
-DATASETS_DIR="resources_test/predict_modality/openproblems_neurips2021/bmmc_cite_GEX2ADT"
+DATASETS_DIR="resources_test/predict_modality/openproblems_neurips2021"
 OUTPUT_DIR="output/predict_modality"
 
 if [ ! -d "$OUTPUT_DIR" ]; then
@@ -24,7 +24,7 @@ nextflow run . \
   -entry auto \
   -with-trace \
   -c src/wf_utils/labels_ci.config \
-  --input_states "$DATASETS_DIR/state.yaml" \
+  --input_states "$DATASETS_DIR/**/state.yaml" \
   --rename_keys 'input_train_mod1:output_train_mod1,input_train_mod2:output_train_mod2,input_test_mod1:output_test_mod1,input_test_mod2:output_test_mod2' \
   --settings '{"output_scores": "scores.yaml", "output_dataset_info": "dataset_info.yaml", "output_method_configs": "method_configs.yaml", "output_metric_configs": "metric_configs.yaml", "output_task_info": "task_info.yaml"}' \
   --publish_dir "$OUTPUT_DIR" \