move task_info to _viash

openproblems-bio · Jul 12, 2024 · 6b5f7b7 · 6b5f7b7
1 parent ef70e43
commit 6b5f7b7
Show file tree

Hide file tree

Showing 2 changed files with 55 additions and 58 deletions.
diff --git a/_viash.yaml b/_viash.yaml
@@ -1,9 +1,9 @@
-viash_version: 0.9.0-RC6
-
 name: task_denoising
+version: dev
+
 organization: openproblems-bio
 description: |
-  An OpenProblems benchmark task.
+  Removing noise in sparse single-cell RNA-sequencing count data.
 license: MIT
 keywords: [single-cell, openproblems, benchmark, denoising]
 links:
@@ -12,16 +12,66 @@ links:
   docker_registry: ghcr.io
 
 info:
+  label: Denoising
+  summary: "Removing noise in sparse single-cell RNA-sequencing count data"
+  image: /src/api/thumbnail.svg
+  motivation: |
+    Single-cell RNA-Seq protocols only detect a fraction of the mRNA molecules present
+    in each cell. As a result, the measurements (UMI counts) observed for each gene and each
+    cell are associated with generally high levels of technical noise ([Grün et al.,
+    2014](https://www.nature.com/articles/nmeth.2930)). Denoising describes the task of
+    estimating the true expression level of each gene in each cell. In the single-cell
+    literature, this task is also referred to as *imputation*, a term which is typically
+    used for missing data problems in statistics. Similar to the use of the terms "dropout",
+    "missing data", and "technical zeros", this terminology can create confusion about the
+    underlying measurement process ([Sarkar and Stephens,
+    2020](https://www.biorxiv.org/content/10.1101/2020.04.07.030007v2)).
+  description: |
+    A key challenge in evaluating denoising methods is the general lack of a ground truth. A
+    recent benchmark study ([Hou et al.,
+    2020](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-020-02132-x))
+    relied on flow-sorted datasets, mixture control experiments ([Tian et al.,
+    2019](https://www.nature.com/articles/s41592-019-0425-8)), and comparisons with bulk
+    RNA-Seq data. Since each of these approaches suffers from specific limitations, it is
+    difficult to combine these different approaches into a single quantitative measure of
+    denoising accuracy. Here, we instead rely on an approach termed molecular
+    cross-validation (MCV), which was specifically developed to quantify denoising accuracy
+    in the absence of a ground truth ([Batson et al.,
+    2019](https://www.biorxiv.org/content/10.1101/786269v1)). In MCV, the observed molecules
+    in a given scRNA-Seq dataset are first partitioned between a *training* and a *test*
+    dataset. Next, a denoising method is applied to the training dataset. Finally, denoising
+    accuracy is measured by comparing the result to the test dataset. The authors show that
+    both in theory and in practice, the measured denoising accuracy is representative of the
+    accuracy that would be obtained on a ground truth dataset.
   test_resources:
     - type: s3
       path: s3://openproblems-data/resources_test/denoising/
       dest: resources_test/denoising
     - type: s3
       path: s3://openproblems-data/resources_test/common/
       dest: resources_test/common
+authors: 
+  - name: "Wesley Lewis"
+    roles: [ author, maintainer ]
+    info:
+      github: wes-lewis
+  - name: "Scott Gigante"
+    roles: [ author, maintainer ]
+    info:
+      github: scottgigante
+      orcid: "0000-0002-4544-2764"
+  - name: Robrecht Cannoodt
+    roles: [ author ]
+    info:
+      github: rcannood
+      orcid: "0000-0003-3641-729X"
+  - name: Kai Waldrant
+    roles: [ contributor ]
+    info:
+      github: KaiWaldrant
+      orcid: "0009-0003-8555-1361"
 
-
-version: dev
+viash_version: 0.9.0-RC6
 
 config_mods: |
   .runners[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h" }
diff --git a/src/api/task_info.yaml b/src/api/task_info.yaml