From e0bfa5b29a1dcdcf5cdbc202660d9250017258bb Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Fri, 16 Aug 2024 16:28:48 +0200 Subject: [PATCH 01/27] update submodule --- common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common b/common index 25bfdf2..4343620 160000 --- a/common +++ b/common @@ -1 +1 @@ -Subproject commit 25bfdf258bf7ff4417964eb5647b2dc999f12cfd +Subproject commit 434362003da58bb42ed4d76cc8bda51f62b71236 From 9b4016137c92f4018086fb43248987485589308c Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Fri, 16 Aug 2024 16:29:01 +0200 Subject: [PATCH 02/27] update viash version --- _viash.yaml | 53 ++++++++++++++++++++++++++------------------------- thumbnail.svg | 1 + 2 files changed, 28 insertions(+), 26 deletions(-) create mode 100644 thumbnail.svg diff --git a/_viash.yaml b/_viash.yaml index 4b9a7d6..0ffc420 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -1,32 +1,14 @@ -name: task_denoising -version: dev +viash_version: 0.9.0-RC7 +name: task_denoising organization: openproblems-bio -description: | - Removing noise in sparse single-cell RNA-sequencing count data. +version: dev license: MIT -keywords: [single-cell, openproblems, benchmark, denoising] -links: - issue_tracker: https://github.com/openproblems-bio/task_denoising/issues - repository: https://github.com/openproblems-bio/task_denoising - docker_registry: ghcr.io -info: - label: Denoising - summary: "Removing noise in sparse single-cell RNA-sequencing count data" - image: /src/api/thumbnail.svg - motivation: | - Single-cell RNA-Seq protocols only detect a fraction of the mRNA molecules present - in each cell. As a result, the measurements (UMI counts) observed for each gene and each - cell are associated with generally high levels of technical noise ([Grün et al., - 2014](https://www.nature.com/articles/nmeth.2930)). Denoising describes the task of - estimating the true expression level of each gene in each cell. In the single-cell - literature, this task is also referred to as *imputation*, a term which is typically - used for missing data problems in statistics. Similar to the use of the terms "dropout", - "missing data", and "technical zeros", this terminology can create confusion about the - underlying measurement process ([Sarkar and Stephens, - 2020](https://www.biorxiv.org/content/10.1101/2020.04.07.030007v2)). - description: | +label: Denoising +keywords: [single-cell, openproblems, benchmark, denoising] +summary: "Removing noise in sparse single-cell RNA-sequencing count data" +description: | A key challenge in evaluating denoising methods is the general lack of a ground truth. A recent benchmark study ([Hou et al., 2020](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-020-02132-x)) @@ -43,6 +25,25 @@ info: accuracy is measured by comparing the result to the test dataset. The authors show that both in theory and in practice, the measured denoising accuracy is representative of the accuracy that would be obtained on a ground truth dataset. +links: + issue_tracker: https://github.com/openproblems-bio/task_denoising/issues + repository: https://github.com/openproblems-bio/task_denoising + docker_registry: ghcr.io + +info: + image: thumbnail.svg + motivation: | + Single-cell RNA-Seq protocols only detect a fraction of the mRNA molecules present + in each cell. As a result, the measurements (UMI counts) observed for each gene and each + cell are associated with generally high levels of technical noise ([Grün et al., + 2014](https://www.nature.com/articles/nmeth.2930)). Denoising describes the task of + estimating the true expression level of each gene in each cell. In the single-cell + literature, this task is also referred to as *imputation*, a term which is typically + used for missing data problems in statistics. Similar to the use of the terms "dropout", + "missing data", and "technical zeros", this terminology can create confusion about the + underlying measurement process ([Sarkar and Stephens, + 2020](https://www.biorxiv.org/content/10.1101/2020.04.07.030007v2)). + test_resources: - type: s3 path: s3://openproblems-data/resources_test/denoising/ @@ -50,6 +51,7 @@ info: - type: s3 path: s3://openproblems-data/resources_test/common/ dest: resources_test/common + authors: - name: "Wesley Lewis" roles: [ author, maintainer ] @@ -71,7 +73,6 @@ authors: github: KaiWaldrant orcid: "0009-0003-8555-1361" -viash_version: 0.9.0-RC6 config_mods: | .runners[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h" } diff --git a/thumbnail.svg b/thumbnail.svg new file mode 100644 index 0000000..65936f0 --- /dev/null +++ b/thumbnail.svg @@ -0,0 +1 @@ +dim-2dim-1dim-2dim-1 \ No newline at end of file From d6a46550445ccbb377df73bce18cd5287926a7c5 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Fri, 16 Aug 2024 16:29:25 +0200 Subject: [PATCH 03/27] relocate thumbnail --- src/api/thumbnail.svg | 1 - 1 file changed, 1 deletion(-) delete mode 100644 src/api/thumbnail.svg diff --git a/src/api/thumbnail.svg b/src/api/thumbnail.svg deleted file mode 100644 index 65936f0..0000000 --- a/src/api/thumbnail.svg +++ /dev/null @@ -1 +0,0 @@ -dim-2dim-1dim-2dim-1 \ No newline at end of file From edb908dd3b229a2263c1f15c435589cb3b422ffc Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Fri, 16 Aug 2024 16:51:01 +0200 Subject: [PATCH 04/27] update methods metadata --- src/methods/alra/config.vsh.yaml | 25 ++++++++++--------- src/methods/dca/config.vsh.yaml | 17 +++++++------ src/methods/knn_smoothing/config.vsh.yaml | 30 ++++++++++++----------- src/methods/magic/config.vsh.yaml | 29 +++++++++++----------- src/methods/saver/config.vsh.yaml | 25 ++++++++++--------- 5 files changed, 66 insertions(+), 60 deletions(-) diff --git a/src/methods/alra/config.vsh.yaml b/src/methods/alra/config.vsh.yaml index 4f956b4..d12fb44 100644 --- a/src/methods/alra/config.vsh.yaml +++ b/src/methods/alra/config.vsh.yaml @@ -1,19 +1,20 @@ __merge__: ../../api/comp_method.yaml name: "alra" +label: ALRA +summary: "ALRA imputes missing values in scRNA-seq data by computing rank-k approximation, thresholding by gene, and rescaling the matrix." +description: | + Adaptively-thresholded Low Rank Approximation (ALRA). + + ALRA is a method for imputation of missing values in single cell RNA-sequencing data, + described in the preprint, "Zero-preserving imputation of scRNA-seq data using low-rank approximation" + available [here](https://www.biorxiv.org/content/early/2018/08/22/397588). Given a + scRNA-seq expression matrix, ALRA first computes its rank-k approximation using randomized SVD. + Next, each row (gene) is thresholded by the magnitude of the most negative value of that gene. + Finally, the matrix is rescaled. +references: + doi: 10.1101/397588 info: - label: ALRA - summary: "ALRA imputes missing values in scRNA-seq data by computing rank-k approximation, thresholding by gene, and rescaling the matrix." - description: | - Adaptively-thresholded Low Rank Approximation (ALRA). - - ALRA is a method for imputation of missing values in single cell RNA-sequencing data, - described in the preprint, "Zero-preserving imputation of scRNA-seq data using low-rank approximation" - available [here](https://www.biorxiv.org/content/early/2018/08/22/397588). Given a - scRNA-seq expression matrix, ALRA first computes its rank-k approximation using randomized SVD. - Next, each row (gene) is thresholded by the magnitude of the most negative value of that gene. - Finally, the matrix is rescaled. - reference: "linderman2018zero" repository_url: "https://github.com/KlugerLab/ALRA" documentation_url: https://github.com/KlugerLab/ALRA/blob/master/README.md v1: diff --git a/src/methods/dca/config.vsh.yaml b/src/methods/dca/config.vsh.yaml index 3d62968..0853156 100644 --- a/src/methods/dca/config.vsh.yaml +++ b/src/methods/dca/config.vsh.yaml @@ -1,14 +1,15 @@ __merge__: ../../api/comp_method.yaml name: "dca" -info: - label: DCA - summary: "A deep autoencoder with ZINB loss function to address the dropout effect in count data" - description: | - "Deep Count Autoencoder +label: DCA +summary: "A deep autoencoder with ZINB loss function to address the dropout effect in count data" +description: | + "Deep Count Autoencoder - Removes the dropout effect by taking the count structure, overdispersed nature and sparsity of the data into account - using a deep autoencoder with zero-inflated negative binomial (ZINB) loss function." - reference: "eraslan2019single" + Removes the dropout effect by taking the count structure, overdispersed nature and sparsity of the data into account + using a deep autoencoder with zero-inflated negative binomial (ZINB) loss function." +references: + doi: 10.1038/s41467-018-07931-2 +info: documentation_url: "https://github.com/theislab/dca#readme" repository_url: "https://github.com/theislab/dca" v1: diff --git a/src/methods/knn_smoothing/config.vsh.yaml b/src/methods/knn_smoothing/config.vsh.yaml index fd7aab1..10c7d24 100644 --- a/src/methods/knn_smoothing/config.vsh.yaml +++ b/src/methods/knn_smoothing/config.vsh.yaml @@ -1,21 +1,22 @@ __merge__: ../../api/comp_method.yaml name: "knn_smoothing" +label: KNN Smoothing +summary: "Iterative kNN-smoothing denoises scRNA-seq data by iteratively increasing the size of neighbourhoods for smoothing until a maximum k value is reached." +description: "Iterative kNN-smoothing is a method to repair or denoise noisy scRNA-seq + expression matrices. Given a scRNA-seq expression matrix, KNN-smoothing first + applies initial normalisation and smoothing. Then, a chosen number of + principal components is used to calculate Euclidean distances between cells. + Minimally sized neighbourhoods are initially determined from these Euclidean + distances, and expression profiles are shared between neighbouring cells. + Then, the resultant smoothed matrix is used as input to the next step of + smoothing, where the size (k) of the considered neighbourhoods is increased, + leading to greater smoothing. This process continues until a chosen maximum k + value has been reached, at which point the iteratively smoothed object is + then optionally scaled to yield a final result." +references: + doi: 10.1101/217737 info: - label: KNN Smoothing - summary: "Iterative kNN-smoothing denoises scRNA-seq data by iteratively increasing the size of neighbourhoods for smoothing until a maximum k value is reached." - description: "Iterative kNN-smoothing is a method to repair or denoise noisy scRNA-seq - expression matrices. Given a scRNA-seq expression matrix, KNN-smoothing first - applies initial normalisation and smoothing. Then, a chosen number of - principal components is used to calculate Euclidean distances between cells. - Minimally sized neighbourhoods are initially determined from these Euclidean - distances, and expression profiles are shared between neighbouring cells. - Then, the resultant smoothed matrix is used as input to the next step of - smoothing, where the size (k) of the considered neighbourhoods is increased, - leading to greater smoothing. This process continues until a chosen maximum k - value has been reached, at which point the iteratively smoothed object is - then optionally scaled to yield a final result." - reference: "wagner2018knearest" documentation_url: "https://github.com/yanailab/knn-smoothing#readme" repository_url: "https://github.com/yanailab/knn-smoothing" v1: @@ -24,6 +25,7 @@ info: variants: knn_smoothing: preferred_normalization: counts + resources: - type: python_script path: script.py diff --git a/src/methods/magic/config.vsh.yaml b/src/methods/magic/config.vsh.yaml index 62b9c87..6077b1e 100644 --- a/src/methods/magic/config.vsh.yaml +++ b/src/methods/magic/config.vsh.yaml @@ -1,20 +1,21 @@ __merge__: ../../api/comp_method.yaml name: "magic" +label: MAGIC +summary: "MAGIC imputes and denoises scRNA-seq data that is noisy or dropout-prone." +description: "MAGIC (Markov Affinity-based Graph Imputation of Cells) is a method for + imputation and denoising of noisy or dropout-prone single cell RNA-sequencing + data. Given a normalised scRNA-seq expression matrix, it first calculates + Euclidean distances between each pair of cells in the dataset, which is then + augmented using a Gaussian kernel (function) and row-normalised to give a + normalised affinity matrix. A t-step markov process is then calculated, by + powering this affinity matrix t times. Finally, the powered affinity matrix + is right-multiplied by the normalised data, causing the final imputed values + to take the value of a per-gene average weighted by the affinities of cells. + The resultant imputed matrix is then rescaled, to more closely match the + magnitude of measurements in the normalised (input) matrix." +references: + doi: 10.1016/j.cell.2018.05.061 info: - label: MAGIC - summary: "MAGIC imputes and denoises scRNA-seq data that is noisy or dropout-prone." - description: "MAGIC (Markov Affinity-based Graph Imputation of Cells) is a method for - imputation and denoising of noisy or dropout-prone single cell RNA-sequencing - data. Given a normalised scRNA-seq expression matrix, it first calculates - Euclidean distances between each pair of cells in the dataset, which is then - augmented using a Gaussian kernel (function) and row-normalised to give a - normalised affinity matrix. A t-step markov process is then calculated, by - powering this affinity matrix t times. Finally, the powered affinity matrix - is right-multiplied by the normalised data, causing the final imputed values - to take the value of a per-gene average weighted by the affinities of cells. - The resultant imputed matrix is then rescaled, to more closely match the - magnitude of measurements in the normalised (input) matrix." - reference: "van2018recovering" documentation_url: "https://github.com/KrishnaswamyLab/MAGIC#readme" repository_url: "https://github.com/KrishnaswamyLab/MAGIC" v1: diff --git a/src/methods/saver/config.vsh.yaml b/src/methods/saver/config.vsh.yaml index 90717dc..e53505e 100644 --- a/src/methods/saver/config.vsh.yaml +++ b/src/methods/saver/config.vsh.yaml @@ -2,19 +2,20 @@ __merge__: ../../api/comp_method.yaml name: saver status: disabled +label: SAVER +summary: SAVER (Single-cell Analysis Via Expression Recovery) implements a regularized regression prediction and empirical Bayes method to recover the true gene expression profile. +description: | + SAVER takes advantage of gene-to-gene relationships to recover the true expression level of each gene in each cell, + removing technical variation while retaining biological variation across cells (https://github.com/mohuangx/SAVER). + SAVER uses a post-quality-control scRNA-seq dataset with UMI counts as input. SAVER assumes that the count of each + gene in each cell follows a Poisson-gamma mixture, also known as a negative binomial model. Instead of specifying + the gamma prior, we estimate the prior parameters in an empirical Bayes-like approach with a Poisson LASSO regression, + using the expression of other genes as predictors. Once the prior parameters are estimated, SAVER outputs the + posterior distribution of the true expression, which quantifies estimation uncertainty, and the posterior mean is + used as the SAVER recovered expression value. +references: + doi: 10.1038/s41592-018-0033-z info: - label: SAVER - summary: SAVER (Single-cell Analysis Via Expression Recovery) implements a regularized regression prediction and empirical Bayes method to recover the true gene expression profile. - description: | - SAVER takes advantage of gene-to-gene relationships to recover the true expression level of each gene in each cell, - removing technical variation while retaining biological variation across cells (https://github.com/mohuangx/SAVER). - SAVER uses a post-quality-control scRNA-seq dataset with UMI counts as input. SAVER assumes that the count of each - gene in each cell follows a Poisson-gamma mixture, also known as a negative binomial model. Instead of specifying - the gamma prior, we estimate the prior parameters in an empirical Bayes-like approach with a Poisson LASSO regression, - using the expression of other genes as predictors. Once the prior parameters are estimated, SAVER outputs the - posterior distribution of the true expression, which quantifies estimation uncertainty, and the posterior mean is - used as the SAVER recovered expression value. - reference: huang2018savergene repository_url: https://github.com/mohuangx/SAVER documentation_url: https://mohuangx.github.io/SAVER/index.html preferred_normalization: counts From dc1aaf88d699e808ebdfd56e58f60210751209ac Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Fri, 16 Aug 2024 16:53:17 +0200 Subject: [PATCH 05/27] update control_methods --- src/control_methods/no_denoising/config.vsh.yaml | 6 +++--- src/control_methods/perfect_denoising/config.vsh.yaml | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/control_methods/no_denoising/config.vsh.yaml b/src/control_methods/no_denoising/config.vsh.yaml index c0364df..5f0272a 100644 --- a/src/control_methods/no_denoising/config.vsh.yaml +++ b/src/control_methods/no_denoising/config.vsh.yaml @@ -1,9 +1,9 @@ __merge__: ../../api/comp_control_method.yaml name: "no_denoising" +label: No Denoising +summary: "negative control by copying train counts" +description: "This method serves as a negative control, where the denoised data is a copy of the unaltered training data. This represents the scoring threshold if denoising was not performed on the data." info: - label: No Denoising - summary: "negative control by copying train counts" - description: "This method serves as a negative control, where the denoised data is a copy of the unaltered training data. This represents the scoring threshold if denoising was not performed on the data." v1: path: openproblems/tasks/denoising/methods/baseline.py commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32 diff --git a/src/control_methods/perfect_denoising/config.vsh.yaml b/src/control_methods/perfect_denoising/config.vsh.yaml index e4f235d..47c3c5d 100644 --- a/src/control_methods/perfect_denoising/config.vsh.yaml +++ b/src/control_methods/perfect_denoising/config.vsh.yaml @@ -1,10 +1,10 @@ __merge__: ../../api/comp_control_method.yaml name: "perfect_denoising" +label: Perfect Denoising +summary: "Positive control by copying the test counts" +description: "This method serves as a positive control, where the test data is copied 1-to-1 to the denoised data. This makes it seem as if the data is perfectly denoised as it will be compared to the test data in the metrics." info: - label: Perfect Denoising - summary: "Positive control by copying the test counts" - description: "This method serves as a positive control, where the test data is copied 1-to-1 to the denoised data. This makes it seem as if the data is perfectly denoised as it will be compared to the test data in the metrics." v1: path: openproblems/tasks/denoising/methods/baseline.py commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32 From 3c23b8daf28c9cb14f0f98ea69edf8ee604a9fae Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Fri, 16 Aug 2024 17:04:50 +0200 Subject: [PATCH 06/27] add file_type --- src/api/file_common_dataset.yaml | 12 ++++++++++++ src/api/file_prediction.yaml | 1 + src/api/file_test_h5ad.yaml | 1 + src/api/file_train_h5ad.yaml | 1 + 4 files changed, 15 insertions(+) diff --git a/src/api/file_common_dataset.yaml b/src/api/file_common_dataset.yaml index ff913ce..e2ddfae 100644 --- a/src/api/file_common_dataset.yaml +++ b/src/api/file_common_dataset.yaml @@ -3,12 +3,24 @@ example: "resources_test/common/pancreas/dataset.h5ad" info: label: "Common Dataset" summary: A subset of the common dataset. + file_type: h5ad slots: layers: - type: integer name: counts description: Raw counts required: true + obs: + - type: integer + name: soma_joinid + description: If the dataset was retrieved from CELLxGENE census, this is a unique identifier for the cell. + required: false + + - type: string + name: dataset + description: The names for the used datasets if the dataset is a combination of different experiments/studies. + required: false + uns: - type: string name: dataset_id diff --git a/src/api/file_prediction.yaml b/src/api/file_prediction.yaml index 788fa1a..9a67079 100644 --- a/src/api/file_prediction.yaml +++ b/src/api/file_prediction.yaml @@ -3,6 +3,7 @@ example: "resources_test/denoising/pancreas/denoised.h5ad" info: label: "Denoised data" summary: A denoised dataset as output by a method. + file_type: h5ad slots: layers: - type: integer diff --git a/src/api/file_test_h5ad.yaml b/src/api/file_test_h5ad.yaml index 371b305..1436682 100644 --- a/src/api/file_test_h5ad.yaml +++ b/src/api/file_test_h5ad.yaml @@ -3,6 +3,7 @@ example: "resources_test/denoising/pancreas/test.h5ad" info: label: "Test data" summary: The subset of molecules used for the test dataset + file_type: h5ad slots: layers: - type: integer diff --git a/src/api/file_train_h5ad.yaml b/src/api/file_train_h5ad.yaml index 302eae2..70f3d19 100644 --- a/src/api/file_train_h5ad.yaml +++ b/src/api/file_train_h5ad.yaml @@ -3,6 +3,7 @@ example: "resources_test/denoising/pancreas/train.h5ad" info: label: "Training data" summary: The subset of molecules used for the training dataset + file_type: h5ad slots: layers: - type: integer From d5e398fde0b5b4dd7bffda0ec9fae36bb386ce7b Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Fri, 16 Aug 2024 17:08:39 +0200 Subject: [PATCH 07/27] remove obs layer --- src/api/file_common_dataset.yaml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/api/file_common_dataset.yaml b/src/api/file_common_dataset.yaml index e2ddfae..1364461 100644 --- a/src/api/file_common_dataset.yaml +++ b/src/api/file_common_dataset.yaml @@ -10,17 +10,6 @@ info: name: counts description: Raw counts required: true - obs: - - type: integer - name: soma_joinid - description: If the dataset was retrieved from CELLxGENE census, this is a unique identifier for the cell. - required: false - - - type: string - name: dataset - description: The names for the used datasets if the dataset is a combination of different experiments/studies. - required: false - uns: - type: string name: dataset_id From 1f408563c98efc1546fd39ac177ec57f1be0a877 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Mon, 2 Sep 2024 10:50:05 +0200 Subject: [PATCH 08/27] Update common resources --- common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common b/common index 4343620..a140917 160000 --- a/common +++ b/common @@ -1 +1 @@ -Subproject commit 434362003da58bb42ed4d76cc8bda51f62b71236 +Subproject commit a1409176da317c8a7c9c65d1488bcf3b5afee3d6 From 628f15389d7ff1fafe529fa7cf647bec7647c1c6 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Mon, 2 Sep 2024 11:04:29 +0200 Subject: [PATCH 09/27] Update file* API --- src/api/file_common_dataset.yaml | 8 ++++---- src/api/file_prediction.yaml | 8 ++++---- src/api/file_score.yaml | 8 ++++---- src/api/file_test_h5ad.yaml | 8 ++++---- src/api/file_train_h5ad.yaml | 8 ++++---- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/api/file_common_dataset.yaml b/src/api/file_common_dataset.yaml index 1364461..4e5db66 100644 --- a/src/api/file_common_dataset.yaml +++ b/src/api/file_common_dataset.yaml @@ -1,10 +1,10 @@ type: file example: "resources_test/common/pancreas/dataset.h5ad" +label: "Common Dataset" +summary: A subset of the common dataset. info: - label: "Common Dataset" - summary: A subset of the common dataset. - file_type: h5ad - slots: + format: + type: h5ad layers: - type: integer name: counts diff --git a/src/api/file_prediction.yaml b/src/api/file_prediction.yaml index 9a67079..e732d66 100644 --- a/src/api/file_prediction.yaml +++ b/src/api/file_prediction.yaml @@ -1,10 +1,10 @@ type: file example: "resources_test/denoising/pancreas/denoised.h5ad" +label: "Denoised data" +summary: A denoised dataset as output by a method. info: - label: "Denoised data" - summary: A denoised dataset as output by a method. - file_type: h5ad - slots: + format: + type: h5ad layers: - type: integer name: denoised diff --git a/src/api/file_score.yaml b/src/api/file_score.yaml index 4a29744..3e80f6e 100644 --- a/src/api/file_score.yaml +++ b/src/api/file_score.yaml @@ -1,10 +1,10 @@ type: file example: resources_test/denoising/pancreas/score.h5ad +label: Score +summary: "File indicating the score of a metric." info: - label: Score - summary: "File indicating the score of a metric." - file_type: h5ad - slots: + format: + type: h5ad uns: - type: string name: dataset_id diff --git a/src/api/file_test_h5ad.yaml b/src/api/file_test_h5ad.yaml index 1436682..10dab87 100644 --- a/src/api/file_test_h5ad.yaml +++ b/src/api/file_test_h5ad.yaml @@ -1,10 +1,10 @@ type: file example: "resources_test/denoising/pancreas/test.h5ad" +label: "Test data" +summary: The subset of molecules used for the test dataset info: - label: "Test data" - summary: The subset of molecules used for the test dataset - file_type: h5ad - slots: + format: + type: h5ad layers: - type: integer name: counts diff --git a/src/api/file_train_h5ad.yaml b/src/api/file_train_h5ad.yaml index 70f3d19..0d12edb 100644 --- a/src/api/file_train_h5ad.yaml +++ b/src/api/file_train_h5ad.yaml @@ -1,10 +1,10 @@ type: file example: "resources_test/denoising/pancreas/train.h5ad" +label: "Training data" +summary: The subset of molecules used for the training dataset info: - label: "Training data" - summary: The subset of molecules used for the training dataset - file_type: h5ad - slots: + format: + type: h5ad layers: - type: integer name: counts From 6a71bdc61dd6e26316e714eb200477be2901c42c Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Mon, 2 Sep 2024 11:06:42 +0200 Subject: [PATCH 10/27] update README --- README.md | 169 +++++++++++++++--------------------------------------- 1 file changed, 45 insertions(+), 124 deletions(-) diff --git a/README.md b/README.md index cafd684..deac578 100644 --- a/README.md +++ b/README.md @@ -8,75 +8,8 @@ Do not edit this file directly. Removing noise in sparse single-cell RNA-sequencing count data -Path to source: -[`src`](https://github.com/openproblems-bio/task_denoising/src) - -## README - -## Installation - -You need to have Docker, Java, and Viash installed. Follow [these -instructions](https://openproblems.bio/documentation/fundamentals/requirements) -to install the required dependencies. - -## Add a method - -To add a method to the repository, follow the instructions in the -`scripts/add_a_method.sh` script. - -## Frequently used commands - -To get started, you can run the following commands: - -``` bash -git clone git@github.com:openproblems-bio/task_denoising.git - -cd task_denoising - -# initialise submodule -scripts/init_submodule.sh - -# download resources -scripts/download_resources.sh -``` - -To run the benchmark, you first need to build the components. -Afterwards, you can run the benchmark: - -``` bash -viash ns build --parallel --setup cachedbuild - -scripts/run_benchmark.sh -``` - -After adding a component, it is recommended to run the tests to ensure -that the component is working correctly: - -``` bash -viash ns test --parallel -``` - -Optionally, you can provide the `--query` argument to test only a subset -of components: - -``` bash -viash ns test --parallel --query 'component_name' -``` - -## Motivation - -Single-cell RNA-Seq protocols only detect a fraction of the mRNA -molecules present in each cell. As a result, the measurements (UMI -counts) observed for each gene and each cell are associated with -generally high levels of technical noise ([Grün et al., -2014](https://www.nature.com/articles/nmeth.2930)). Denoising describes -the task of estimating the true expression level of each gene in each -cell. In the single-cell literature, this task is also referred to as -*imputation*, a term which is typically used for missing data problems -in statistics. Similar to the use of the terms “dropout”, “missing -data”, and “technical zeros”, this terminology can create confusion -about the underlying measurement process ([Sarkar and Stephens, -2020](https://www.biorxiv.org/content/10.1101/2020.04.07.030007v2)). +Repository: +[openproblems-bio/task_denoising](https://github.com/openproblems-bio/task_denoising) ## Description @@ -115,23 +48,23 @@ dataset. flowchart LR file_common_dataset("Common Dataset") comp_process_dataset[/"Data processor"/] - file_train_h5ad("Training data") file_test_h5ad("Test data") + file_train_h5ad("Training data") comp_control_method[/"Control Method"/] - comp_method[/"Method"/] comp_metric[/"Metric"/] + comp_method[/"Method"/] file_prediction("Denoised data") file_score("Score") file_common_dataset---comp_process_dataset - comp_process_dataset-->file_train_h5ad comp_process_dataset-->file_test_h5ad - file_train_h5ad---comp_control_method - file_train_h5ad---comp_method + comp_process_dataset-->file_train_h5ad file_test_h5ad---comp_control_method file_test_h5ad---comp_metric + file_train_h5ad---comp_control_method + file_train_h5ad---comp_method comp_control_method-->file_prediction - comp_method-->file_prediction comp_metric-->file_score + comp_method-->file_prediction file_prediction---comp_metric ``` @@ -151,7 +84,7 @@ Format: -Slot description: +Data structure:
@@ -170,9 +103,6 @@ Slot description: ## Component type: Data processor -Path: -[`src/process_dataset`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/process_dataset) - A denoising dataset processor. Arguments: @@ -187,11 +117,11 @@ Arguments:
-## File format: Training data +## File format: Test data -The subset of molecules used for the training dataset +The subset of molecules used for the test dataset -Example file: `resources_test/denoising/pancreas/train.h5ad` +Example file: `resources_test/denoising/pancreas/test.h5ad` Format: @@ -199,26 +129,33 @@ Format: AnnData object layers: 'counts' - uns: 'dataset_id' + uns: 'dataset_id', 'dataset_name', 'dataset_url', 'dataset_reference', 'dataset_summary', 'dataset_description', 'dataset_organism', 'train_sum' -Slot description: +Data structure:
-| Slot | Type | Description | -|:--------------------|:----------|:-------------------------------------| -| `layers["counts"]` | `integer` | Raw counts. | -| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. | +| Slot | Type | Description | +|:---|:---|:---| +| `layers["counts"]` | `integer` | Raw counts. | +| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. | +| `uns["dataset_name"]` | `string` | Nicely formatted name. | +| `uns["dataset_url"]` | `string` | (*Optional*) Link to the original source of the dataset. | +| `uns["dataset_reference"]` | `string` | (*Optional*) Bibtex reference of the paper in which the dataset was published. | +| `uns["dataset_summary"]` | `string` | Short description of the dataset. | +| `uns["dataset_description"]` | `string` | Long description of the dataset. | +| `uns["dataset_organism"]` | `string` | (*Optional*) The organism of the sample in the dataset. | +| `uns["train_sum"]` | `integer` | The total number of counts in the training dataset. |
-## File format: Test data +## File format: Training data -The subset of molecules used for the test dataset +The subset of molecules used for the training dataset -Example file: `resources_test/denoising/pancreas/test.h5ad` +Example file: `resources_test/denoising/pancreas/train.h5ad` Format: @@ -226,33 +163,23 @@ Format: AnnData object layers: 'counts' - uns: 'dataset_id', 'dataset_name', 'dataset_url', 'dataset_reference', 'dataset_summary', 'dataset_description', 'dataset_organism', 'train_sum' + uns: 'dataset_id' -Slot description: +Data structure:
-| Slot | Type | Description | -|:---|:---|:---| -| `layers["counts"]` | `integer` | Raw counts. | -| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. | -| `uns["dataset_name"]` | `string` | Nicely formatted name. | -| `uns["dataset_url"]` | `string` | (*Optional*) Link to the original source of the dataset. | -| `uns["dataset_reference"]` | `string` | (*Optional*) Bibtex reference of the paper in which the dataset was published. | -| `uns["dataset_summary"]` | `string` | Short description of the dataset. | -| `uns["dataset_description"]` | `string` | Long description of the dataset. | -| `uns["dataset_organism"]` | `string` | (*Optional*) The organism of the sample in the dataset. | -| `uns["train_sum"]` | `integer` | The total number of counts in the training dataset. | +| Slot | Type | Description | +|:--------------------|:----------|:-------------------------------------| +| `layers["counts"]` | `integer` | Raw counts. | +| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. |
## Component type: Control Method -Path: -[`src/control_methods`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/control_methods) - A control method. Arguments: @@ -267,12 +194,9 @@ Arguments: -## Component type: Method - -Path: -[`src/methods`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/methods) +## Component type: Metric -A method. +A metric. Arguments: @@ -280,17 +204,15 @@ Arguments: | Name | Type | Description | |:---|:---|:---| -| `--input_train` | `file` | The subset of molecules used for the training dataset. | -| `--output` | `file` | (*Output*) A denoised dataset as output by a method. | +| `--input_test` | `file` | The subset of molecules used for the test dataset. | +| `--input_prediction` | `file` | A denoised dataset as output by a method. | +| `--output` | `file` | (*Output*) File indicating the score of a metric. | -## Component type: Metric - -Path: -[`src/metrics`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/metrics) +## Component type: Method -A metric. +A method. Arguments: @@ -298,9 +220,8 @@ Arguments: | Name | Type | Description | |:---|:---|:---| -| `--input_test` | `file` | The subset of molecules used for the test dataset. | -| `--input_prediction` | `file` | A denoised dataset as output by a method. | -| `--output` | `file` | (*Output*) File indicating the score of a metric. | +| `--input_train` | `file` | The subset of molecules used for the training dataset. | +| `--output` | `file` | (*Output*) A denoised dataset as output by a method. | @@ -320,7 +241,7 @@ Format: -Slot description: +Data structure:
@@ -347,7 +268,7 @@ Format:
-Slot description: +Data structure:
From 81ade3d234dbb1972b8256e1a6a5a28e2c9a6c0b Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Mon, 2 Sep 2024 11:12:52 +0200 Subject: [PATCH 11/27] fix component test fp --- src/api/comp_control_method.yaml | 2 +- src/api/comp_method.yaml | 2 +- src/api/comp_metric.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/api/comp_control_method.yaml b/src/api/comp_control_method.yaml index 1cee82a..e7f1fe1 100644 --- a/src/api/comp_control_method.yaml +++ b/src/api/comp_control_method.yaml @@ -27,7 +27,7 @@ test_resources: - type: python_script path: /common/component_tests/run_and_check_output.py - type: python_script - path: /common/component_tests/check_method_config.py + path: /common/component_tests/check_config.py - path: /common/library.bib - path: /resources_test/denoising/pancreas dest: resources_test/denoising/pancreas \ No newline at end of file diff --git a/src/api/comp_method.yaml b/src/api/comp_method.yaml index 09fae19..9a35e00 100644 --- a/src/api/comp_method.yaml +++ b/src/api/comp_method.yaml @@ -19,7 +19,7 @@ test_resources: - type: python_script path: /common/component_tests/run_and_check_output.py - type: python_script - path: /common/component_tests/check_method_config.py + path: /common/component_tests/check_config.py - path: /common/library.bib - path: /resources_test/denoising/pancreas dest: resources_test/denoising/pancreas \ No newline at end of file diff --git a/src/api/comp_metric.yaml b/src/api/comp_metric.yaml index 83435ab..9104459 100644 --- a/src/api/comp_metric.yaml +++ b/src/api/comp_metric.yaml @@ -21,7 +21,7 @@ arguments: required: true test_resources: - type: python_script - path: /common/component_tests/check_metric_config.py + path: /common/component_tests/check_config.py - type: python_script path: /common/component_tests/run_and_check_output.py - path: /common/library.bib From fd3d2180e03f15829a23d86114c0b1a01aa2c563 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Mon, 2 Sep 2024 11:17:36 +0200 Subject: [PATCH 12/27] update metrics references api --- src/metrics/mse/config.vsh.yaml | 3 ++- src/metrics/poisson/config.vsh.yaml | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/metrics/mse/config.vsh.yaml b/src/metrics/mse/config.vsh.yaml index 9068716..e97b67e 100644 --- a/src/metrics/mse/config.vsh.yaml +++ b/src/metrics/mse/config.vsh.yaml @@ -6,7 +6,8 @@ info: label: Mean-squared error summary: "The mean squared error between the denoised counts and the true counts." description: "The mean squared error between the denoised counts of the training dataset and the true counts of the test dataset after reweighing by the train/test ratio" - reference: batson2019molecular + references: + doi: 10.1101/786269 v1: path: openproblems/tasks/denoising/metrics/mse.py commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32 diff --git a/src/metrics/poisson/config.vsh.yaml b/src/metrics/poisson/config.vsh.yaml index 9f8aab8..f77a156 100644 --- a/src/metrics/poisson/config.vsh.yaml +++ b/src/metrics/poisson/config.vsh.yaml @@ -6,7 +6,8 @@ info: label: Poisson Loss summary: "The Poisson log likelihood of the true counts observed in the distribution of denoised counts" description: "The Poisson log likelihood of observing the true counts of the test dataset given the distribution given in the denoised dataset." - reference: batson2019molecular + references: + doi: 10.1101/786269 v1: path: openproblems/tasks/denoising/metrics/poisson.py commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32 From 1be13e368ac0d2e2d3704f3a955f785a44f56e31 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Mon, 2 Sep 2024 11:41:33 +0200 Subject: [PATCH 13/27] add openproblems package to DCA --- src/methods/dca/config.vsh.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/methods/dca/config.vsh.yaml b/src/methods/dca/config.vsh.yaml index 0853156..85114eb 100644 --- a/src/methods/dca/config.vsh.yaml +++ b/src/methods/dca/config.vsh.yaml @@ -40,6 +40,7 @@ engines: - requests - jsonschema - "git+https://github.com/scottgigante-immunai/dca.git@patch-1" + github: openproblems-bio/core#subdirectory=packages/python/openproblems runners: - type: executable - type: nextflow From ec6c32a9c68c66ba044dfaf6d88287d7572ee7d7 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Wed, 18 Sep 2024 11:49:45 +0200 Subject: [PATCH 14/27] update DCA method --- common | 2 +- src/methods/dca/config.vsh.yaml | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/common b/common index a140917..f264283 160000 --- a/common +++ b/common @@ -1 +1 @@ -Subproject commit a1409176da317c8a7c9c65d1488bcf3b5afee3d6 +Subproject commit f2642835c89264e0a43e87e3f6c588c6be4902e7 diff --git a/src/methods/dca/config.vsh.yaml b/src/methods/dca/config.vsh.yaml index 85114eb..1c32b67 100644 --- a/src/methods/dca/config.vsh.yaml +++ b/src/methods/dca/config.vsh.yaml @@ -9,9 +9,10 @@ description: | using a deep autoencoder with zero-inflated negative binomial (ZINB) loss function." references: doi: 10.1038/s41467-018-07931-2 +links: + documentation: "https://github.com/theislab/dca#readme" + repository: "https://github.com/theislab/dca" info: - documentation_url: "https://github.com/theislab/dca#readme" - repository_url: "https://github.com/theislab/dca" v1: path: openproblems/tasks/denoising/methods/dca.py commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32 @@ -32,6 +33,9 @@ engines: setup: - type: apt packages: procps + - type: python + github: + - openproblems-bio/core@feature/no-ref/add-support-py3.9#subdirectory=packages/python/openproblems - type: python packages: - anndata~=0.8.0 @@ -40,7 +44,7 @@ engines: - requests - jsonschema - "git+https://github.com/scottgigante-immunai/dca.git@patch-1" - github: openproblems-bio/core#subdirectory=packages/python/openproblems + - numpy<2 runners: - type: executable - type: nextflow From 72dd68e476edf2a4876eeb403f891efd236e0987 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Wed, 18 Sep 2024 16:39:39 +0200 Subject: [PATCH 15/27] update submodule --- common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common b/common index f264283..79006d5 160000 --- a/common +++ b/common @@ -1 +1 @@ -Subproject commit f2642835c89264e0a43e87e3f6c588c6be4902e7 +Subproject commit 79006d5f737a0697dafc98935b1256d3a4682853 From cf430177e0d961a35115d5a59bfef3608fdb2d86 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Wed, 18 Sep 2024 16:41:44 +0200 Subject: [PATCH 16/27] update dca --- src/methods/dca/config.vsh.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/methods/dca/config.vsh.yaml b/src/methods/dca/config.vsh.yaml index 1c32b67..343a032 100644 --- a/src/methods/dca/config.vsh.yaml +++ b/src/methods/dca/config.vsh.yaml @@ -35,7 +35,7 @@ engines: packages: procps - type: python github: - - openproblems-bio/core@feature/no-ref/add-support-py3.9#subdirectory=packages/python/openproblems + - openproblems-bio/core#subdirectory=packages/python/openproblems - type: python packages: - anndata~=0.8.0 From 781295fb48765439435166df52135aedfdb80ce2 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Wed, 18 Sep 2024 16:45:08 +0200 Subject: [PATCH 17/27] update links --- src/methods/alra/config.vsh.yaml | 5 +++-- src/methods/knn_smoothing/config.vsh.yaml | 5 +++-- src/methods/magic/config.vsh.yaml | 5 +++-- src/methods/saver/config.vsh.yaml | 5 +++-- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/methods/alra/config.vsh.yaml b/src/methods/alra/config.vsh.yaml index d12fb44..7598429 100644 --- a/src/methods/alra/config.vsh.yaml +++ b/src/methods/alra/config.vsh.yaml @@ -14,9 +14,10 @@ description: | Finally, the matrix is rescaled. references: doi: 10.1101/397588 +links: + documentation: https://github.com/KlugerLab/ALRA/blob/master/README.md + repository: https://github.com/KlugerLab/ALRA info: - repository_url: "https://github.com/KlugerLab/ALRA" - documentation_url: https://github.com/KlugerLab/ALRA/blob/master/README.md v1: path: openproblems/tasks/denoising/methods/alra.py commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32 diff --git a/src/methods/knn_smoothing/config.vsh.yaml b/src/methods/knn_smoothing/config.vsh.yaml index 10c7d24..d2a4e82 100644 --- a/src/methods/knn_smoothing/config.vsh.yaml +++ b/src/methods/knn_smoothing/config.vsh.yaml @@ -16,9 +16,10 @@ description: "Iterative kNN-smoothing is a method to repair or denoise noisy scR then optionally scaled to yield a final result." references: doi: 10.1101/217737 +links: + documentation: https://github.com/yanailab/knn-smoothing#readme + repository: https://github.com/yanailab/knn-smoothing info: - documentation_url: "https://github.com/yanailab/knn-smoothing#readme" - repository_url: "https://github.com/yanailab/knn-smoothing" v1: path: openproblems/tasks/denoising/methods/knn_smoothing.py commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32 diff --git a/src/methods/magic/config.vsh.yaml b/src/methods/magic/config.vsh.yaml index 6077b1e..da5667c 100644 --- a/src/methods/magic/config.vsh.yaml +++ b/src/methods/magic/config.vsh.yaml @@ -15,9 +15,10 @@ description: "MAGIC (Markov Affinity-based Graph Imputation of Cells) is a metho magnitude of measurements in the normalised (input) matrix." references: doi: 10.1016/j.cell.2018.05.061 +links: + documentation: https://github.com/KrishnaswamyLab/MAGIC#readme + repository: https://github.com/KrishnaswamyLab/MAGIC info: - documentation_url: "https://github.com/KrishnaswamyLab/MAGIC#readme" - repository_url: "https://github.com/KrishnaswamyLab/MAGIC" v1: path: openproblems/tasks/denoising/methods/magic.py commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32 diff --git a/src/methods/saver/config.vsh.yaml b/src/methods/saver/config.vsh.yaml index e53505e..3d07668 100644 --- a/src/methods/saver/config.vsh.yaml +++ b/src/methods/saver/config.vsh.yaml @@ -15,9 +15,10 @@ description: | used as the SAVER recovered expression value. references: doi: 10.1038/s41592-018-0033-z +links: + documentation: https://mohuangx.github.io/SAVER/index.html + repository: https://github.com/mohuangx/SAVER info: - repository_url: https://github.com/mohuangx/SAVER - documentation_url: https://mohuangx.github.io/SAVER/index.html preferred_normalization: counts resources: - type: r_script From c0cbc94a6a1e87ed714add28892962e0457c80bd Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Thu, 19 Sep 2024 10:30:41 +0200 Subject: [PATCH 18/27] set numpy<2 --- src/methods/magic/config.vsh.yaml | 2 +- src/metrics/mse/config.vsh.yaml | 1 + src/metrics/poisson/config.vsh.yaml | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/methods/magic/config.vsh.yaml b/src/methods/magic/config.vsh.yaml index da5667c..1bb3a94 100644 --- a/src/methods/magic/config.vsh.yaml +++ b/src/methods/magic/config.vsh.yaml @@ -58,7 +58,7 @@ engines: image: openproblems/base_python:1.0.0 setup: - type: python - pip: [scprep, magic-impute, scipy, scikit-learn<1.2] + pip: [scprep, magic-impute, scipy, scikit-learn<1.2, numpy<2] runners: - type: executable - type: nextflow diff --git a/src/metrics/mse/config.vsh.yaml b/src/metrics/mse/config.vsh.yaml index e97b67e..94e800a 100644 --- a/src/metrics/mse/config.vsh.yaml +++ b/src/metrics/mse/config.vsh.yaml @@ -25,6 +25,7 @@ engines: pypi: - scikit-learn - scprep + - numpy<2 runners: - type: executable - type: nextflow diff --git a/src/metrics/poisson/config.vsh.yaml b/src/metrics/poisson/config.vsh.yaml index f77a156..47742a7 100644 --- a/src/metrics/poisson/config.vsh.yaml +++ b/src/metrics/poisson/config.vsh.yaml @@ -23,7 +23,8 @@ engines: setup: - type: python pypi: - - scprep + - scprep + - numpy<2 runners: - type: executable - type: nextflow From 0a01503b131eb59a85bbbedc1dedf303c4595d41 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Thu, 19 Sep 2024 10:32:58 +0200 Subject: [PATCH 19/27] update fapi file name --- src/api/comp_control_method.yaml | 4 ++-- src/api/comp_method.yaml | 2 +- src/api/comp_metric.yaml | 2 +- src/api/comp_process_dataset.yaml | 4 ++-- src/api/{file_test_h5ad.yaml => file_test.yaml} | 0 src/api/{file_train_h5ad.yaml => file_train.yaml} | 0 src/workflows/process_datasets/config.vsh.yaml | 4 ++-- src/workflows/run_benchmark/config.vsh.yaml | 4 ++-- 8 files changed, 10 insertions(+), 10 deletions(-) rename src/api/{file_test_h5ad.yaml => file_test.yaml} (100%) rename src/api/{file_train_h5ad.yaml => file_train.yaml} (100%) diff --git a/src/api/comp_control_method.yaml b/src/api/comp_control_method.yaml index e7f1fe1..0378baa 100644 --- a/src/api/comp_control_method.yaml +++ b/src/api/comp_control_method.yaml @@ -12,11 +12,11 @@ info: in the task. arguments: - name: --input_train - __merge__: file_train_h5ad.yaml + __merge__: file_train.yaml required: true direction: input - name: --input_test - __merge__: file_test_h5ad.yaml + __merge__: file_test.yaml required: true direction: input - name: --output diff --git a/src/api/comp_method.yaml b/src/api/comp_method.yaml index 9a35e00..ef04c12 100644 --- a/src/api/comp_method.yaml +++ b/src/api/comp_method.yaml @@ -8,7 +8,7 @@ info: A denoising method to remove noise (i.e. technical artifacts) from a dataset. arguments: - name: --input_train - __merge__: file_train_h5ad.yaml + __merge__: file_train.yaml required: true direction: input - name: --output diff --git a/src/api/comp_metric.yaml b/src/api/comp_metric.yaml index 9104459..354d0f4 100644 --- a/src/api/comp_metric.yaml +++ b/src/api/comp_metric.yaml @@ -8,7 +8,7 @@ info: A metric for evaluating denoised datasets. arguments: - name: "--input_test" - __merge__: file_test_h5ad.yaml + __merge__: file_test.yaml direction: input required: true - name: "--input_prediction" diff --git a/src/api/comp_process_dataset.yaml b/src/api/comp_process_dataset.yaml index b5e7416..9383ac8 100644 --- a/src/api/comp_process_dataset.yaml +++ b/src/api/comp_process_dataset.yaml @@ -12,11 +12,11 @@ arguments: direction: input required: true - name: "--output_train" - __merge__: file_train_h5ad.yaml + __merge__: file_train.yaml direction: output required: true - name: "--output_test" - __merge__: file_test_h5ad.yaml + __merge__: file_test.yaml direction: output required: true test_resources: diff --git a/src/api/file_test_h5ad.yaml b/src/api/file_test.yaml similarity index 100% rename from src/api/file_test_h5ad.yaml rename to src/api/file_test.yaml diff --git a/src/api/file_train_h5ad.yaml b/src/api/file_train.yaml similarity index 100% rename from src/api/file_train_h5ad.yaml rename to src/api/file_train.yaml diff --git a/src/workflows/process_datasets/config.vsh.yaml b/src/workflows/process_datasets/config.vsh.yaml index 22765f2..6041a5c 100644 --- a/src/workflows/process_datasets/config.vsh.yaml +++ b/src/workflows/process_datasets/config.vsh.yaml @@ -10,11 +10,11 @@ argument_groups: - name: Outputs arguments: - name: "--output_train" - __merge__: "/src/api/file_train_h5ad.yaml" + __merge__: "/src/api/file_train.yaml" direction: output required: true - name: "--output_test" - __merge__: "/src/api/file_test_h5ad.yaml" + __merge__: "/src/api/file_test.yaml" direction: output required: true resources: diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml index 3d1b6bc..da35f2b 100644 --- a/src/workflows/run_benchmark/config.vsh.yaml +++ b/src/workflows/run_benchmark/config.vsh.yaml @@ -4,11 +4,11 @@ argument_groups: - name: Inputs arguments: - name: "--input_train" - __merge__: "/src/api/file_train_h5ad.yaml" + __merge__: "/src/api/file_train.yaml" required: true direction: input - name: "--input_test" - __merge__: "/src/api/file_test_h5ad.yaml" + __merge__: "/src/api/file_test.yaml" required: true direction: input - name: Outputs From ba3c7eb4329916f3ee6041aceb7af52afc2ee026 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Thu, 19 Sep 2024 10:37:05 +0200 Subject: [PATCH 20/27] update create_readme script --- scripts/create_readme.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/create_readme.sh b/scripts/create_readme.sh index e5dec6f..b43731f 100755 --- a/scripts/create_readme.sh +++ b/scripts/create_readme.sh @@ -1,5 +1,5 @@ #!/bin/bash -common/create_task_readme/create_task_readme \ - --task_dir src \ - --output README.md +set -e + +common/scripts/create_task_readme --input src/api \ No newline at end of file From 888d5b58d6265a1de9d5d32c5883758b88fa38c5 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Thu, 19 Sep 2024 10:37:11 +0200 Subject: [PATCH 21/27] update readme --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index deac578..a9e9a05 100644 --- a/README.md +++ b/README.md @@ -48,20 +48,20 @@ dataset. flowchart LR file_common_dataset("Common Dataset") comp_process_dataset[/"Data processor"/] - file_test_h5ad("Test data") - file_train_h5ad("Training data") + file_test("Test data") + file_train("Training data") comp_control_method[/"Control Method"/] comp_metric[/"Metric"/] comp_method[/"Method"/] file_prediction("Denoised data") file_score("Score") file_common_dataset---comp_process_dataset - comp_process_dataset-->file_test_h5ad - comp_process_dataset-->file_train_h5ad - file_test_h5ad---comp_control_method - file_test_h5ad---comp_metric - file_train_h5ad---comp_control_method - file_train_h5ad---comp_method + comp_process_dataset-->file_test + comp_process_dataset-->file_train + file_test---comp_control_method + file_test---comp_metric + file_train---comp_control_method + file_train---comp_method comp_control_method-->file_prediction comp_metric-->file_score comp_method-->file_prediction From 4ab3149dc31e6c1632c38555d40393288de211d4 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Thu, 19 Sep 2024 10:55:45 +0200 Subject: [PATCH 22/27] relocate process datasets --- src/api/{comp_process_dataset.yaml => comp_data_processor.yaml} | 2 +- src/{ => data_processors}/process_dataset/config.vsh.yaml | 0 src/{ => data_processors}/process_dataset/helper.py | 0 src/{ => data_processors}/process_dataset/script.py | 0 4 files changed, 1 insertion(+), 1 deletion(-) rename src/api/{comp_process_dataset.yaml => comp_data_processor.yaml} (96%) rename src/{ => data_processors}/process_dataset/config.vsh.yaml (100%) rename src/{ => data_processors}/process_dataset/helper.py (100%) rename src/{ => data_processors}/process_dataset/script.py (100%) diff --git a/src/api/comp_process_dataset.yaml b/src/api/comp_data_processor.yaml similarity index 96% rename from src/api/comp_process_dataset.yaml rename to src/api/comp_data_processor.yaml index 9383ac8..d3d24bb 100644 --- a/src/api/comp_process_dataset.yaml +++ b/src/api/comp_data_processor.yaml @@ -1,4 +1,4 @@ -namespace: "process_dataset" +namespace: "data_processors" info: type: process_dataset type_info: diff --git a/src/process_dataset/config.vsh.yaml b/src/data_processors/process_dataset/config.vsh.yaml similarity index 100% rename from src/process_dataset/config.vsh.yaml rename to src/data_processors/process_dataset/config.vsh.yaml diff --git a/src/process_dataset/helper.py b/src/data_processors/process_dataset/helper.py similarity index 100% rename from src/process_dataset/helper.py rename to src/data_processors/process_dataset/helper.py diff --git a/src/process_dataset/script.py b/src/data_processors/process_dataset/script.py similarity index 100% rename from src/process_dataset/script.py rename to src/data_processors/process_dataset/script.py From dbec1efcf76f9aff6676e9294749c69649bd11c3 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Thu, 19 Sep 2024 10:56:02 +0200 Subject: [PATCH 23/27] Update scripts dir --- scripts/.gitignore | 3 -- scripts/create_component/.gitignore | 2 + .../resources.sh} | 26 ++++------ scripts/create_resources/test_resources.sh | 52 +++++++++++++++++++ scripts/create_test_resources.sh | 38 -------------- scripts/download_resources.sh | 15 ------ scripts/project/build_all_components.sh | 6 +++ .../project/build_all_docker_containers.sh | 7 +++ scripts/{ => project}/test_all_components.sh | 4 +- scripts/run_benchmark/run_full_local.sh | 40 ++++++++++++++ .../run_full_seqeracloud.sh} | 13 ++++- scripts/run_benchmark/run_test_local.sh | 27 ++++++++++ scripts/run_benchmark/run_test_seqeracloud.sh | 31 +++++++++++ scripts/run_benchmark_test.sh | 19 ------- scripts/sync_resources.sh | 5 ++ 15 files changed, 193 insertions(+), 95 deletions(-) delete mode 100644 scripts/.gitignore create mode 100644 scripts/create_component/.gitignore rename scripts/{process_datasets.sh => create_resources/resources.sh} (60%) create mode 100755 scripts/create_resources/test_resources.sh delete mode 100755 scripts/create_test_resources.sh delete mode 100755 scripts/download_resources.sh create mode 100755 scripts/project/build_all_components.sh create mode 100755 scripts/project/build_all_docker_containers.sh rename scripts/{ => project}/test_all_components.sh (75%) create mode 100755 scripts/run_benchmark/run_full_local.sh rename scripts/{run_benchmark.sh => run_benchmark/run_full_seqeracloud.sh} (75%) create mode 100755 scripts/run_benchmark/run_test_local.sh create mode 100755 scripts/run_benchmark/run_test_seqeracloud.sh delete mode 100755 scripts/run_benchmark_test.sh create mode 100755 scripts/sync_resources.sh diff --git a/scripts/.gitignore b/scripts/.gitignore deleted file mode 100644 index 2f7ffd3..0000000 --- a/scripts/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -add_a_method.sh -add_a_control_method.sh -add_a_metric.sh \ No newline at end of file diff --git a/scripts/create_component/.gitignore b/scripts/create_component/.gitignore new file mode 100644 index 0000000..09380f9 --- /dev/null +++ b/scripts/create_component/.gitignore @@ -0,0 +1,2 @@ +# if users change the scripts, the changes should not be committed. +/create_*_*.sh \ No newline at end of file diff --git a/scripts/process_datasets.sh b/scripts/create_resources/resources.sh similarity index 60% rename from scripts/process_datasets.sh rename to scripts/create_resources/resources.sh index 85c0559..a289f00 100755 --- a/scripts/process_datasets.sh +++ b/scripts/create_resources/resources.sh @@ -1,7 +1,12 @@ #!/bin/bash +# get the root of the directory +REPO_ROOT=$(git rev-parse --show-toplevel) + +# ensure that the command below is run from the root of the repository +cd "$REPO_ROOT" + cat > /tmp/params.yaml << 'HERE' -id: denoising_process_datasets input_states: s3://openproblems-data/resources/datasets/**/log_cp10k/state.yaml rename_keys: 'input:output_dataset' settings: '{"output_train": "$id/train.h5ad", "output_test": "$id/test.h5ad"}' @@ -9,20 +14,7 @@ output_state: "$id/state.yaml" publish_dir: s3://openproblems-data/resources/denoising/datasets HERE -cat > /tmp/nextflow.config << HERE -process { - executor = 'awsbatch' - withName:'.*publishStatesProc' { - memory = '16GB' - disk = '100GB' - } - withLabel:highmem { - memory = '350GB' - } -} -HERE - -tw launch https://github.com/openproblems-bio/task_denoising.git \ +tw launch https://github.com/openproblems-bio/task_template.git \ --revision build/main \ --pull-latest \ --main-script target/nextflow/workflows/process_datasets/main.nf \ @@ -30,5 +22,5 @@ tw launch https://github.com/openproblems-bio/task_denoising.git \ --compute-env 6TeIFgV5OY4pJCk8I0bfOh \ --params-file /tmp/params.yaml \ --entry-name auto \ - --config /tmp/nextflow.config \ - --labels denoising,process_datasets \ No newline at end of file + --config common/nextflow_helpers/labels_tw.config \ + --labels denoising,process_datasets diff --git a/scripts/create_resources/test_resources.sh b/scripts/create_resources/test_resources.sh new file mode 100755 index 0000000..980d179 --- /dev/null +++ b/scripts/create_resources/test_resources.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# get the root of the directory +REPO_ROOT=$(git rev-parse --show-toplevel) + +# ensure that the command below is run from the root of the repository +cd "$REPO_ROOT" + +# # remove this when you have implemented the script +# echo "TODO: replace the commands in this script with the sequence of components that you need to run to generate test_resources." +# echo " Inside this script, you will need to place commands to generate example files for each of the 'src/api/file_*.yaml' files." +# exit 1 + +set -e + +RAW_DATA=resources_test/common +DATASET_DIR=resources_test/denoising + +mkdir -p $DATASET_DIR + +# process dataset +viash run src/data_processors/process_dataset/config.vsh.yaml -- \ + --input $RAW_DATA/cxg_mouse_pancreas_atlas/dataset.h5ad \ + --output_train $DATASET_DIR/cxg_mouse_pancreas_atlas/train.h5ad \ + --output_test $DATASET_DIR/cxg_mouse_pancreas_atlas/test.h5ad \ + --output_solution $DATASET_DIR/cxg_mouse_pancreas_atlas/solution.h5ad + +# run one method +viash run src/methods/magic/config.vsh.yaml -- \ + --input_train $DATASET_DIR/pancreas/train.h5ad \ + --output $DATASET_DIR/pancreas/denoised.h5ad + +# run one metric +viash run src/metrics/poisson/config.vsh.yaml -- \ + --input_denoised $DATASET_DIR/pancreas/denoised.h5ad \ + --input_test $DATASET_DIR/pancreas/test.h5ad \ + --output $DATASET_DIR/pancreas/score.h5ad + +# write manual state.yaml. this is not actually necessary but you never know it might be useful +cat > $DATASET_DIR/cxg_mouse_pancreas_atlas/state.yaml << HERE +id: cxg_mouse_pancreas_atlas +train: !file train.h5ad +test: !file test.h5ad +solution: !file solution.h5ad +prediction: !file denoised.h5ad +score: !file score.h5ad +HERE + +# only run this if you have access to the openproblems-data bucket +# aws s3 sync --profile op \ +# "$DATASET_DIR" s3://openproblems-data/resources_test/denoising \ +# --delete --dryrun diff --git a/scripts/create_test_resources.sh b/scripts/create_test_resources.sh deleted file mode 100755 index deec9dc..0000000 --- a/scripts/create_test_resources.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash - -# get the root of the directory -REPO_ROOT=$(git rev-parse --show-toplevel) - -# ensure that the command below is run from the root of the repository -cd "$REPO_ROOT" - -set -e - -RAW_DATA=resources_test/common -DATASET_DIR=resources_test/denoising - -mkdir -p $DATASET_DIR - -# process dataset -echo Running process_dataset -nextflow run . \ - -main-script target/nextflow/workflows/process_datasets/main.nf \ - -profile docker \ - -entry auto \ - --input_states "$RAW_DATA/**/state.yaml" \ - --rename_keys 'input:output_dataset' \ - --settings '{"output_train": "$id/train.h5ad", "output_test": "$id/test.h5ad"}' \ - --publish_dir "$DATASET_DIR" \ - --output_state '$id/state.yaml' - -# run one method -viash run src/methods/magic/config.vsh.yaml -- \ - --input_train $DATASET_DIR/pancreas/train.h5ad \ - --output $DATASET_DIR/pancreas/denoised.h5ad - -# run one metric -viash run src/metrics/poisson/config.vsh.yaml -- \ - --input_denoised $DATASET_DIR/pancreas/denoised.h5ad \ - --input_test $DATASET_DIR/pancreas/test.h5ad \ - --output $DATASET_DIR/pancreas/score.h5ad - diff --git a/scripts/download_resources.sh b/scripts/download_resources.sh deleted file mode 100755 index c621323..0000000 --- a/scripts/download_resources.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -set -e - -echo ">> Downloading resources" - -common/sync_resources/sync_resources \ - --input "s3://openproblems-data/resources_test/common/" \ - --output "resources_test/common" \ - --delete - -common/sync_resources/sync_resources \ - --input "s3://openproblems-data/resources_test/denoising/" \ - --output "resources_test/denoising" \ - --delete \ No newline at end of file diff --git a/scripts/project/build_all_components.sh b/scripts/project/build_all_components.sh new file mode 100755 index 0000000..4e90d91 --- /dev/null +++ b/scripts/project/build_all_components.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +set -e + +# Build all components in a namespace (refer https://viash.io/reference/cli/ns_build.html) +viash ns build --parallel diff --git a/scripts/project/build_all_docker_containers.sh b/scripts/project/build_all_docker_containers.sh new file mode 100755 index 0000000..5d43639 --- /dev/null +++ b/scripts/project/build_all_docker_containers.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +set -e + +# Build all components in a namespace (refer https://viash.io/reference/cli/ns_build.html) +# and set up the container via a cached build +viash ns build --parallel --setup cachedbuild diff --git a/scripts/test_all_components.sh b/scripts/project/test_all_components.sh similarity index 75% rename from scripts/test_all_components.sh rename to scripts/project/test_all_components.sh index cd016e9..8a08afd 100755 --- a/scripts/test_all_components.sh +++ b/scripts/project/test_all_components.sh @@ -1,4 +1,6 @@ #!/bin/bash +set -e + # Test all components in a namespace (refer https://viash.io/reference/cli/ns_test.html) -viash ns test --parallel \ No newline at end of file +viash ns test --parallel diff --git a/scripts/run_benchmark/run_full_local.sh b/scripts/run_benchmark/run_full_local.sh new file mode 100755 index 0000000..da7f291 --- /dev/null +++ b/scripts/run_benchmark/run_full_local.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# get the root of the directory +REPO_ROOT=$(git rev-parse --show-toplevel) + +# ensure that the command below is run from the root of the repository +cd "$REPO_ROOT" + +# NOTE: depending on the the datasets and components, you may need to launch this workflow +# on a different compute platform (e.g. a HPC, AWS Cloud, Azure Cloud, Google Cloud). +# please refer to the nextflow information for more details: +# https://www.nextflow.io/docs/latest/ + + +set -e + +echo "Running benchmark on test data" +echo " Make sure to run 'scripts/project/build_all_docker_containers.sh'!" + +# generate a unique id +RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)" +publish_dir="resources/results/${RUN_ID}" + +# write the parameters to file +cat > /tmp/params.yaml << HERE +input_states: resources/datasets/**/state.yaml +rename_keys: 'input_train:output_train;input_test:output_test' +output_state: "state.yaml" +publish_dir: "$publish_dir" +HERE + +# run the benchmark +nextflow run openproblems-bio/task_template \ + --revision build/main \ + -main-script target/nextflow/workflows/run_benchmark/main.nf \ + -profile docker \ + -resume \ + -entry auto \ + -c common/nextflow_helpers/labels_ci.config \ + -params-file /tmp/params.yaml diff --git a/scripts/run_benchmark.sh b/scripts/run_benchmark/run_full_seqeracloud.sh similarity index 75% rename from scripts/run_benchmark.sh rename to scripts/run_benchmark/run_full_seqeracloud.sh index 73eb674..aa5a3c8 100755 --- a/scripts/run_benchmark.sh +++ b/scripts/run_benchmark/run_full_seqeracloud.sh @@ -1,11 +1,20 @@ #!/bin/bash +# get the root of the directory +REPO_ROOT=$(git rev-parse --show-toplevel) + +# ensure that the command below is run from the root of the repository +cd "$REPO_ROOT" + +set -e + +# generate a unique id RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)" publish_dir="s3://openproblems-data/resources/denoising/results/${RUN_ID}" -# make sure only log_cp10k is used +# write the parameters to file cat > /tmp/params.yaml << HERE -input_states: s3://openproblems-data/resources/denoising/datasets/**/log_cp10k/state.yaml +input_states: s3://openproblems-data/resources/denoising/datasets/**/state.yaml rename_keys: 'input_train:output_train;input_test:output_test' output_state: "state.yaml" publish_dir: "$publish_dir" diff --git a/scripts/run_benchmark/run_test_local.sh b/scripts/run_benchmark/run_test_local.sh new file mode 100755 index 0000000..a85bf75 --- /dev/null +++ b/scripts/run_benchmark/run_test_local.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# get the root of the directory +REPO_ROOT=$(git rev-parse --show-toplevel) + +# ensure that the command below is run from the root of the repository +cd "$REPO_ROOT" + +set -e + +echo "Running benchmark on test data" +echo " Make sure to run 'scripts/project/build_all_docker_containers.sh'!" + +# generate a unique id +RUN_ID="testrun_$(date +%Y-%m-%d_%H-%M-%S)" +publish_dir="temp/results/${RUN_ID}" + +nextflow run . \ + -main-script target/nextflow/workflows/run_benchmark/main.nf \ + -profile docker \ + -resume \ + -c common/nextflow_helpers/labels_ci.config \ + --id cxg_mouse_pancreas_atlas \ + --input_train resources_test/denoising/cxg_mouse_pancreas_atlas/train.h5ad \ + --input_test resources_test/denoising/cxg_mouse_pancreas_atlas/test.h5ad \ + --output_state state.yaml \ + --publish_dir "$publish_dir" diff --git a/scripts/run_benchmark/run_test_seqeracloud.sh b/scripts/run_benchmark/run_test_seqeracloud.sh new file mode 100755 index 0000000..428eda3 --- /dev/null +++ b/scripts/run_benchmark/run_test_seqeracloud.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# get the root of the directory +REPO_ROOT=$(git rev-parse --show-toplevel) + +# ensure that the command below is run from the root of the repository +cd "$REPO_ROOT" + +set -e + +resources_test_s3=s3://openproblems-data/resources_test/denoising +publish_dir_s3="s3://openproblems-nextflow/temp/results/denoising/$(date +%Y-%m-%d_%H-%M-%S)" + +# write the parameters to file +cat > /tmp/params.yaml << HERE +id: cxg_mouse_pancreas_atlas +input_train: $resources_test_s3/cxg_mouse_pancreas_atlas/train.h5ad +input_test: $resources_test_s3/cxg_mouse_pancreas_atlas/test.h5ad +output_state: "state.yaml" +publish_dir: $publish_dir_s3 +HERE + +tw launch https://github.com/openproblems-bio/task_denoising.git \ + --revision build/main \ + --pull-latest \ + --main-script target/nextflow/workflows/run_benchmark/main.nf \ + --workspace 53907369739130 \ + --compute-env 6TeIFgV5OY4pJCk8I0bfOh \ + --params-file /tmp/params.yaml \ + --config common/nextflow_helpers/labels_tw.config \ + --labels denoising,test diff --git a/scripts/run_benchmark_test.sh b/scripts/run_benchmark_test.sh deleted file mode 100755 index 9e4d01c..0000000 --- a/scripts/run_benchmark_test.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -cat > /tmp/params.yaml << 'HERE' -input_states: s3://openproblems-data/resources_test/denoising/**/state.yaml -rename_keys: 'input_train:output_train;input_test:output_test' -output_state: "state.yaml" -publish_dir: s3://openproblems-nextflow/temp/denoising/ -HERE - -tw launch https://github.com/openproblems-bio/task_denoising.git \ - --revision build/main \ - --pull-latest \ - --main-script target/nextflow/workflows/run_benchmark/main.nf \ - --workspace 53907369739130 \ - --compute-env 6TeIFgV5OY4pJCk8I0bfOh \ - --params-file /tmp/params.yaml \ - --entry-name auto \ - --config common/nextflow_helpers/labels_tw.config \ - --labels denoising,test \ No newline at end of file diff --git a/scripts/sync_resources.sh b/scripts/sync_resources.sh new file mode 100755 index 0000000..20b87e7 --- /dev/null +++ b/scripts/sync_resources.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +set -e + +common/scripts/sync_resources From eaff41e6fedca6af9f23d517294b63f53bdc8b63 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Thu, 19 Sep 2024 11:04:46 +0200 Subject: [PATCH 24/27] update changelog --- CHANGELOG.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb12c68..6c817fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,10 +6,18 @@ * Directory structure has been updated. +* Update to viash 0.9.0 (PR #13). + ## NEW FUNCTIONALITY * Add `CHANGELOG.md` (PR #7). +## MAJOR CHANGES + +* Revamp `scripts` directory (PR #13). + +* Relocated `process_datasets` to `data_processors/process_datasets` (PR #13). + ## MINOR CHANGES * Remove dtype parameter in `.Anndata()` (PR #6). @@ -20,6 +28,11 @@ * Update docker containers used in components (PR #12). +* Set `numpy<2` for some failing methods (PR #13). + +* Small changes to api file names (PR #13). + + ## transfer from openproblems-v2 repository ### NEW FUNCTIONALITY From 2713a3ae631577c06785ee604ab08bac4ed55ed4 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Thu, 19 Sep 2024 11:05:10 +0200 Subject: [PATCH 25/27] update readme --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index a9e9a05..c5def4a 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ dataset. ``` mermaid flowchart LR file_common_dataset("Common Dataset") - comp_process_dataset[/"Data processor"/] + comp_data_processor[/"Data processor"/] file_test("Test data") file_train("Training data") comp_control_method[/"Control Method"/] @@ -55,9 +55,9 @@ flowchart LR comp_method[/"Method"/] file_prediction("Denoised data") file_score("Score") - file_common_dataset---comp_process_dataset - comp_process_dataset-->file_test - comp_process_dataset-->file_train + file_common_dataset---comp_data_processor + comp_data_processor-->file_test + comp_data_processor-->file_train file_test---comp_control_method file_test---comp_metric file_train---comp_control_method From e06814ea044f533735758de4d858de6842c5f1b6 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Thu, 19 Sep 2024 11:12:08 +0200 Subject: [PATCH 26/27] update process_datasets merge path --- src/data_processors/process_dataset/config.vsh.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data_processors/process_dataset/config.vsh.yaml b/src/data_processors/process_dataset/config.vsh.yaml index f167d39..d0da376 100644 --- a/src/data_processors/process_dataset/config.vsh.yaml +++ b/src/data_processors/process_dataset/config.vsh.yaml @@ -1,4 +1,4 @@ -__merge__: ../api/comp_process_dataset.yaml +__merge__: ../../api/comp_process_dataset.yaml name: "process_dataset" description: | Split data using molecular cross-validation. From cc9c32453dfa84dbe14097c9fb6914821a58ba16 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Thu, 19 Sep 2024 11:33:38 +0200 Subject: [PATCH 27/27] fix processor config --- src/data_processors/process_dataset/config.vsh.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data_processors/process_dataset/config.vsh.yaml b/src/data_processors/process_dataset/config.vsh.yaml index d0da376..3914251 100644 --- a/src/data_processors/process_dataset/config.vsh.yaml +++ b/src/data_processors/process_dataset/config.vsh.yaml @@ -1,4 +1,4 @@ -__merge__: ../../api/comp_process_dataset.yaml +__merge__: ../../api/comp_data_processor.yaml name: "process_dataset" description: | Split data using molecular cross-validation.