openproblems-bio · rcannood · Sep 19, 2024 · Aug 16, 2024 · Aug 16, 2024 · Aug 16, 2024
diff --git a/README.md b/README.md
@@ -8,75 +8,8 @@ Do not edit this file directly.
 
 Removing noise in sparse single-cell RNA-sequencing count data
 
-Path to source:
-[`src`](https://github.com/openproblems-bio/task_denoising/src)
-
-## README
-
-## Installation
-
-You need to have Docker, Java, and Viash installed. Follow [these
-instructions](https://openproblems.bio/documentation/fundamentals/requirements)
-to install the required dependencies.
-
-## Add a method
-
-To add a method to the repository, follow the instructions in the
-`scripts/add_a_method.sh` script.
-
-## Frequently used commands
-
-To get started, you can run the following commands:
-
-``` bash
-git clone [email protected]:openproblems-bio/task_denoising.git
-
-cd task_denoising
-
-# initialise submodule
-scripts/init_submodule.sh
-
-# download resources
-scripts/download_resources.sh
-```
-
-To run the benchmark, you first need to build the components.
-Afterwards, you can run the benchmark:
-
-``` bash
-viash ns build --parallel --setup cachedbuild
-
-scripts/run_benchmark.sh
-```
-
-After adding a component, it is recommended to run the tests to ensure
-that the component is working correctly:
-
-``` bash
-viash ns test --parallel
-```
-
-Optionally, you can provide the `--query` argument to test only a subset
-of components:
-
-``` bash
-viash ns test --parallel --query 'component_name'
-```
-
-## Motivation
-
-Single-cell RNA-Seq protocols only detect a fraction of the mRNA
-molecules present in each cell. As a result, the measurements (UMI
-counts) observed for each gene and each cell are associated with
-generally high levels of technical noise ([Grün et al.,
-2014](https://www.nature.com/articles/nmeth.2930)). Denoising describes
-the task of estimating the true expression level of each gene in each
-cell. In the single-cell literature, this task is also referred to as
-*imputation*, a term which is typically used for missing data problems
-in statistics. Similar to the use of the terms “dropout”, “missing
-data”, and “technical zeros”, this terminology can create confusion
-about the underlying measurement process ([Sarkar and Stephens,
-2020](https://www.biorxiv.org/content/10.1101/2020.04.07.030007v2)).
+Repository:
+[openproblems-bio/task_denoising](https://github.com/openproblems-bio/task_denoising)
 
 ## Description
 
@@ -115,23 +48,23 @@ dataset.
 flowchart LR
   file_common_dataset("Common Dataset")
   comp_process_dataset[/"Data processor"/]
-  file_train_h5ad("Training data")
   file_test_h5ad("Test data")
+  file_train_h5ad("Training data")
   comp_control_method[/"Control Method"/]
-  comp_method[/"Method"/]
   comp_metric[/"Metric"/]
+  comp_method[/"Method"/]
   file_prediction("Denoised data")
   file_score("Score")
   file_common_dataset---comp_process_dataset
-  comp_process_dataset-->file_train_h5ad
   comp_process_dataset-->file_test_h5ad
-  file_train_h5ad---comp_control_method
-  file_train_h5ad---comp_method
+  comp_process_dataset-->file_train_h5ad
   file_test_h5ad---comp_control_method
   file_test_h5ad---comp_metric
+  file_train_h5ad---comp_control_method
+  file_train_h5ad---comp_method
   comp_control_method-->file_prediction
-  comp_method-->file_prediction
   comp_metric-->file_score
+  comp_method-->file_prediction
   file_prediction---comp_metric
 ```
 
@@ -151,7 +84,7 @@ Format:
 
 </div>
 
-Slot description:
+Data structure:
 
 <div class="small">
 
@@ -170,9 +103,6 @@ Slot description:
 
 ## Component type: Data processor
 
-Path:
-[`src/process_dataset`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/process_dataset)
-
 A denoising dataset processor.
 
 Arguments:
@@ -187,72 +117,69 @@ Arguments:
 
 </div>
 
-## File format: Training data
+## File format: Test data
 
-The subset of molecules used for the training dataset
+The subset of molecules used for the test dataset
 
-Example file: `resources_test/denoising/pancreas/train.h5ad`
+Example file: `resources_test/denoising/pancreas/test.h5ad`
 
 Format:
 
 <div class="small">
 
     AnnData object
      layers: 'counts'
-     uns: 'dataset_id'
+     uns: 'dataset_id', 'dataset_name', 'dataset_url', 'dataset_reference', 'dataset_summary', 'dataset_description', 'dataset_organism', 'train_sum'
 
 </div>
 
-Slot description:
+Data structure:
 
 <div class="small">
 
-| Slot                | Type      | Description                          |
-|:--------------------|:----------|:-------------------------------------|
-| `layers["counts"]`  | `integer` | Raw counts.                          |
-| `uns["dataset_id"]` | `string`  | A unique identifier for the dataset. |
+| Slot | Type | Description |
+|:---|:---|:---|
+| `layers["counts"]` | `integer` | Raw counts. |
+| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. |
+| `uns["dataset_name"]` | `string` | Nicely formatted name. |
+| `uns["dataset_url"]` | `string` | (*Optional*) Link to the original source of the dataset. |
+| `uns["dataset_reference"]` | `string` | (*Optional*) Bibtex reference of the paper in which the dataset was published. |
+| `uns["dataset_summary"]` | `string` | Short description of the dataset. |
+| `uns["dataset_description"]` | `string` | Long description of the dataset. |
+| `uns["dataset_organism"]` | `string` | (*Optional*) The organism of the sample in the dataset. |
+| `uns["train_sum"]` | `integer` | The total number of counts in the training dataset. |
 
 </div>
 
-## File format: Test data
+## File format: Training data
 
-The subset of molecules used for the test dataset
+The subset of molecules used for the training dataset
 
-Example file: `resources_test/denoising/pancreas/test.h5ad`
+Example file: `resources_test/denoising/pancreas/train.h5ad`
 
 Format:
 
 <div class="small">
 
     AnnData object
      layers: 'counts'
-     uns: 'dataset_id', 'dataset_name', 'dataset_url', 'dataset_reference', 'dataset_summary', 'dataset_description', 'dataset_organism', 'train_sum'
+     uns: 'dataset_id'
 
 </div>
 
-Slot description:
+Data structure:
 
 <div class="small">
 
-| Slot | Type | Description |
-|:---|:---|:---|
-| `layers["counts"]` | `integer` | Raw counts. |
-| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. |
-| `uns["dataset_name"]` | `string` | Nicely formatted name. |
-| `uns["dataset_url"]` | `string` | (*Optional*) Link to the original source of the dataset. |
-| `uns["dataset_reference"]` | `string` | (*Optional*) Bibtex reference of the paper in which the dataset was published. |
-| `uns["dataset_summary"]` | `string` | Short description of the dataset. |
-| `uns["dataset_description"]` | `string` | Long description of the dataset. |
-| `uns["dataset_organism"]` | `string` | (*Optional*) The organism of the sample in the dataset. |
-| `uns["train_sum"]` | `integer` | The total number of counts in the training dataset. |
+| Slot                | Type      | Description                          |
+|:--------------------|:----------|:-------------------------------------|
+| `layers["counts"]`  | `integer` | Raw counts.                          |
+| `uns["dataset_id"]` | `string`  | A unique identifier for the dataset. |
 
 </div>
 
 ## Component type: Control Method
 
-Path:
-[`src/control_methods`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/control_methods)
-
 A control method.
 
 Arguments:
@@ -267,40 +194,34 @@ Arguments:
 
 </div>
 
-## Component type: Method
-
-Path:
-[`src/methods`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/methods)
+## Component type: Metric
 
-A method.
+A metric.
 
 Arguments:
 
 <div class="small">
 
 | Name | Type | Description |
 |:---|:---|:---|
-| `--input_train` | `file` | The subset of molecules used for the training dataset. |
-| `--output` | `file` | (*Output*) A denoised dataset as output by a method. |
+| `--input_test` | `file` | The subset of molecules used for the test dataset. |
+| `--input_prediction` | `file` | A denoised dataset as output by a method. |
+| `--output` | `file` | (*Output*) File indicating the score of a metric. |
 
 </div>
 
-## Component type: Metric
-
-Path:
-[`src/metrics`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/metrics)
+## Component type: Method
 
-A metric.
+A method.
 
 Arguments:
 
 <div class="small">
 
 | Name | Type | Description |
 |:---|:---|:---|
-| `--input_test` | `file` | The subset of molecules used for the test dataset. |
-| `--input_prediction` | `file` | A denoised dataset as output by a method. |
-| `--output` | `file` | (*Output*) File indicating the score of a metric. |
+| `--input_train` | `file` | The subset of molecules used for the training dataset. |
+| `--output` | `file` | (*Output*) A denoised dataset as output by a method. |
 
 </div>
 
@@ -320,7 +241,7 @@ Format:
 
 </div>
 
-Slot description:
+Data structure:
 
 <div class="small">
 
@@ -347,7 +268,7 @@ Format:
 
 </div>
 
-Slot description:
+Data structure:
 
 <div class="small">
 

diff --git a/_viash.yaml b/_viash.yaml
@@ -1,32 +1,12 @@
 name: task_denoising
-version: dev
-
 organization: openproblems-bio
-description: |
-  Removing noise in sparse single-cell RNA-sequencing count data.
+version: dev
 license: MIT
-keywords: [single-cell, openproblems, benchmark, denoising]
-links:
-  issue_tracker: https://github.com/openproblems-bio/task_denoising/issues
-  repository: https://github.com/openproblems-bio/task_denoising
-  docker_registry: ghcr.io
 
-info:
-  label: Denoising
-  summary: "Removing noise in sparse single-cell RNA-sequencing count data"
-  image: /src/api/thumbnail.svg
-  motivation: |
-    Single-cell RNA-Seq protocols only detect a fraction of the mRNA molecules present
-    in each cell. As a result, the measurements (UMI counts) observed for each gene and each
-    cell are associated with generally high levels of technical noise ([Grün et al.,
-    2014](https://www.nature.com/articles/nmeth.2930)). Denoising describes the task of
-    estimating the true expression level of each gene in each cell. In the single-cell
-    literature, this task is also referred to as *imputation*, a term which is typically
-    used for missing data problems in statistics. Similar to the use of the terms "dropout",
-    "missing data", and "technical zeros", this terminology can create confusion about the
-    underlying measurement process ([Sarkar and Stephens,
-    2020](https://www.biorxiv.org/content/10.1101/2020.04.07.030007v2)).
-  description: |
+label: Denoising
+keywords: [single-cell, openproblems, benchmark, denoising]
+summary: "Removing noise in sparse single-cell RNA-sequencing count data"
+description: |
     A key challenge in evaluating denoising methods is the general lack of a ground truth. A
     recent benchmark study ([Hou et al.,
     2020](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-020-02132-x))
@@ -43,13 +23,33 @@ info:
     accuracy is measured by comparing the result to the test dataset. The authors show that
     both in theory and in practice, the measured denoising accuracy is representative of the
     accuracy that would be obtained on a ground truth dataset.
+links:
+  issue_tracker: https://github.com/openproblems-bio/task_denoising/issues
+  repository: https://github.com/openproblems-bio/task_denoising
+  docker_registry: ghcr.io
+
+info:  
+  image: thumbnail.svg
+  motivation: |
+    Single-cell RNA-Seq protocols only detect a fraction of the mRNA molecules present
+    in each cell. As a result, the measurements (UMI counts) observed for each gene and each
+    cell are associated with generally high levels of technical noise ([Grün et al.,
+    2014](https://www.nature.com/articles/nmeth.2930)). Denoising describes the task of
+    estimating the true expression level of each gene in each cell. In the single-cell
+    literature, this task is also referred to as *imputation*, a term which is typically
+    used for missing data problems in statistics. Similar to the use of the terms "dropout",
+    "missing data", and "technical zeros", this terminology can create confusion about the
+    underlying measurement process ([Sarkar and Stephens,
+    2020](https://www.biorxiv.org/content/10.1101/2020.04.07.030007v2)).
+
   test_resources:
     - type: s3
       path: s3://openproblems-data/resources_test/denoising/
       dest: resources_test/denoising
     - type: s3
       path: s3://openproblems-data/resources_test/common/
       dest: resources_test/common
+
 authors: 
   - name: "Wesley Lewis"
     roles: [ author, maintainer ]

diff --git a/common b/common
diff --git a/src/api/comp_control_method.yaml b/src/api/comp_control_method.yaml
@@ -27,7 +27,7 @@ test_resources:
   - type: python_script
     path: /common/component_tests/run_and_check_output.py
   - type: python_script
-    path: /common/component_tests/check_method_config.py
+    path: /common/component_tests/check_config.py
   - path: /common/library.bib
   - path: /resources_test/denoising/pancreas
     dest: resources_test/denoising/pancreas
diff --git a/src/api/comp_method.yaml b/src/api/comp_method.yaml
@@ -19,7 +19,7 @@ test_resources:
   - type: python_script
     path: /common/component_tests/run_and_check_output.py
   - type: python_script
-    path: /common/component_tests/check_method_config.py
+    path: /common/component_tests/check_config.py
   - path: /common/library.bib
   - path: /resources_test/denoising/pancreas
     dest: resources_test/denoising/pancreas
diff --git a/src/api/comp_metric.yaml b/src/api/comp_metric.yaml
@@ -21,7 +21,7 @@ arguments:
     required: true
 test_resources:
   - type: python_script
-    path: /common/component_tests/check_metric_config.py
+    path: /common/component_tests/check_config.py
   - type: python_script
     path: /common/component_tests/run_and_check_output.py
   - path: /common/library.bib