single omics workflow added

openproblems-bio · Aug 29, 2024 · 376e5b4 · 376e5b4
1 parent 3282935
commit 376e5b4
Show file tree

Hide file tree

Showing 27 changed files with 926 additions and 619 deletions.
diff --git a/_viash.yaml b/_viash.yaml
@@ -11,5 +11,5 @@ config_mods: |
   .platforms[.type == 'docker'].target_image_source := 'https://github.com/openproblems-bio/task_grn_inference'
   .platforms[.type == "nextflow"].directives.tag := "$id"
   .platforms[.type == "nextflow"].auto.simplifyOutput := false
-  .platforms[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h",  veryveryhightime : "time = 48.h" }
+  .platforms[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h",  veryveryhightime : "time = 48.h", threedaystime : "time = 72.h", oneweektime : "time = 168.h" }
   .platforms[.type == "nextflow"].config.script := "process.errorStrategy = 'ignore'"
diff --git a/scripts/run_benchmark_single_omics.sh b/scripts/run_benchmark_single_omics.sh
@@ -2,12 +2,11 @@
 
 # RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
 RUN_ID="single_omics_try1"
-# resources_dir="s3://openproblems-data/resources/grn"
-# publish_dir="s3://openproblems-data/resources/grn/results/${RUN_ID}"
-
-resources_dir="./resources_test/"
-publish_dir="output/${RUN_ID}"
+resources_dir="s3://openproblems-data/resources/grn"
+publish_dir="s3://openproblems-data/resources/grn/results/${RUN_ID}"
 
+# resources_dir="./resources_test/"
+# publish_dir="output/${RUN_ID}"
 
 reg_type=ridge
 subsample=-2

diff --git a/src/api/comp_method.yaml b/src/api/comp_method.yaml
@@ -31,6 +31,13 @@ functionality:
       type: integer
       direction: input
       default: 4
+    - name: --tf_all
+      type: file
+      example: resources/prior/tf_all.csv
+      required: false
+    - name: --max_n_links
+      type: integer
+      default: 50000
 
   test_resources:
     - type: python_script

diff --git a/src/methods/multi_omics/scglue/config.vsh.yaml b/src/methods/multi_omics/scglue/config.vsh.yaml
@@ -41,4 +41,4 @@ platforms:
   - type: native
   - type: nextflow
     directives:
-      label: [veryveryhightime,midmem,midcpu]
+      label: [oneweektime,midmem,highcpu]
diff --git a/src/methods/multi_omics/scglue_ns/config.vsh.yaml b/src/methods/multi_omics/scglue_ns/config.vsh.yaml
@@ -26,4 +26,4 @@ functionality:
 platforms:
   - type: nextflow
     directives:
-      label: [ veryveryhightime, midmem, highcpu ]
+      label: [oneweektime,midmem,highcpu]
diff --git a/src/methods/single_omics/ennet/config.novsh.yaml b/src/methods/single_omics/ennet/config.novsh.yaml
diff --git a/src/methods/single_omics/ennet/config.vsh.yaml b/src/methods/single_omics/ennet/config.vsh.yaml
@@ -0,0 +1,32 @@
+__merge__: ../../../api/comp_method.yaml
+
+functionality:
+  name: ennet
+  namespace: "grn_methods"
+  info:
+    label: ennet
+    summary: "GRN inference using ENNET"
+    description: |
+      GRN inference using ENNET.
+    documentation_url: https://doi.org/10.1186/1752-0509-7-106
+  resources:
+    - type: r_script
+      path: script.R
+
+platforms:
+  - type: docker
+    image: janursa/figr:19-08-2024
+    setup:
+      - type: r
+        packages: [ foreach, plyr, anndata, dplyr ]
+      - type: docker
+        run: |
+          wget http://github.com/slawekj/ennet/archive/master.tar.gz -O ennet.tar.gz && \
+          tar -xvzf ennet.tar.gz && \
+          cd ennet-master && \
+          R CMD build ennet && \
+          R CMD INSTALL ennet
+  - type: native
+  - type: nextflow
+    directives:
+      label: [midtime,midmem,midcpu]
diff --git a/src/methods/single_omics/ennet/script.R b/src/methods/single_omics/ennet/script.R
@@ -5,7 +5,7 @@ library(dplyr)
 ## VIASH START
 par <- list(
     "multiomics_rna" = 'resources/resources_test/grn-benchmark/multiomics_rna.h5ad',
-    "tfs" = 'resources/prior/tf_all.csv',
+    "tf_all" = 'resources/prior/tf_all.csv',
     "prediction" = 'output/ennet/prediction.csv',
     "temp_dir": 'output/ennet',
     "max_n_links": 50000
@@ -29,7 +29,7 @@ mask <- (zero_proportion <= 0.9)
 X <- X[mask,]
 
 # Load list of putative TFs
-dat <- read.csv(par$tfs, header = FALSE)
+dat <- read.csv(par$tf_all, header = FALSE)
 Tf <- which(gene_names %in% dat$V1)
 
 # Run GRN inference method

diff --git a/src/methods/single_omics/genie3/config.novsh.yaml b/src/methods/single_omics/genie3/config.novsh.yaml
diff --git a/src/methods/single_omics/genie3/config.vsh.yaml b/src/methods/single_omics/genie3/config.vsh.yaml
@@ -0,0 +1,29 @@
+__merge__: ../../../api/comp_method.yaml
+
+functionality:
+  name: genie3
+  namespace: "grn_methods"
+  info:
+    label: genie3
+
+    summary: "GRN inference using GENIE3"
+    description: |
+      GRN inference using GENIE3.
+    documentation_url: https://www.bioconductor.org/packages/release/bioc/html/GENIE3.html
+  resources:
+    - type: python_script
+      path: script.py
+
+platforms:
+  - type: docker
+    image: continuumio/anaconda3:2024.02-1
+    setup:
+      - type: docker
+        run: |
+          conda install -y -c bioconda arboreto pandas
+      - type: python
+        packages: [ anndata ]
+  - type: native
+  - type: nextflow
+    directives:
+      label: [midtime,midmem,midcpu]
diff --git a/src/methods/single_omics/genie3/script.py b/src/methods/single_omics/genie3/script.py
@@ -10,7 +10,7 @@
 ## VIASH START
 par = {
   'multiomics_rna': 'resources/resources_test/grn-benchmark/multiomics_rna.h5ad',
-  'tfs': 'resources/prior/tf_all.csv',
+  "tf_all": 'resources/prior/tf_all.csv',
   'prediction': 'output/genie3/prediction.csv',
   'max_n_links': 50000
 }
@@ -32,7 +32,7 @@
 adata_rna = X[~mask, :]
 
 # Load list of putative TFs
-df = pd.read_csv(par['tfs'], header=None, names=['gene_name'])
+df = pd.read_csv(par["tf_all"], header=None, names=['gene_name'])
 tfs = set(list(df['gene_name']))
 tf_names = [gene_name for gene_name in gene_names if (gene_name in tfs)]
 

diff --git a/src/methods/single_omics/grnboost2/config.novsh.yaml b/src/methods/single_omics/grnboost2/config.novsh.yaml
diff --git a/src/methods/single_omics/grnboost2/config.vsh.yaml b/src/methods/single_omics/grnboost2/config.vsh.yaml
@@ -0,0 +1,29 @@
+__merge__: ../../../api/comp_method.yaml
+
+functionality:
+  name: grnboost2
+  namespace: "grn_methods"
+  info:
+    label: grnboost2
+    summary: "GRN inference using GRNBoost2"
+    description: |
+      GRN inference using GRNBoost2.
+    documentation_url: https://arboreto.readthedocs.io/en/latest/algorithms.html#grnboost2
+
+  resources:
+    - type: python_script
+      path: script.py
+
+platforms:
+  - type: docker
+    image: continuumio/anaconda3:2024.02-1
+    setup:
+      - type: docker
+        run: |
+          conda install -y -c bioconda arboreto pandas
+      - type: python
+        packages: [ anndata ]
+  - type: native
+  - type: nextflow
+    directives:
+      label: [midtime,midmem,midcpu]
diff --git a/src/methods/single_omics/grnboost2/script.py b/src/methods/single_omics/grnboost2/script.py
@@ -10,7 +10,7 @@
 ## VIASH START
 par = {
   'multiomics_rna': 'resources/resources_test/grn-benchmark/multiomics_rna.h5ad',
-  'tfs': 'resources/prior/tf_all.csv',
+  "tf_all": 'resources/prior/tf_all.csv',
   'prediction': 'output/grnboost2/prediction.csv',
   'max_n_links': 50000
 }
@@ -32,7 +32,7 @@
 adata_rna = X[~mask, :]
 
 # Load list of putative TFs
-df = pd.read_csv(par['tfs'], header=None, names=['gene_name'])
+df = pd.read_csv(par["tf_all"], header=None, names=['gene_name'])
 tfs = set(list(df['gene_name']))
 tf_names = [gene_name for gene_name in gene_names if (gene_name in tfs)]