openproblems-bio · janursa · Oct 5, 2024 · Oct 5, 2024
diff --git a/scripts/extract_resources.sh → scripts/repo/extract_resources.sh b/scripts/extract_resources.sh → scripts/repo/extract_resources.sh
diff --git a/scripts/run_pc_vs_nc.sh → scripts/repo/run_pc_vs_nc.sh b/scripts/run_pc_vs_nc.sh → scripts/repo/run_pc_vs_nc.sh
diff --git a/scripts/run_robust_analys.sh → scripts/repo/run_robust_analys.sh b/scripts/run_robust_analys.sh → scripts/repo/run_robust_analys.sh
diff --git a/scripts/run_robust_analys_causal.sh → scripts/repo/run_robust_analys_causal.sh b/scripts/run_robust_analys_causal.sh → scripts/repo/run_robust_analys_causal.sh
diff --git a/scripts/run_process_multiomics_dataset.sh b/scripts/run_process_multiomics_dataset.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+RUN_ID="process_multiomics"
+# resources_dir="s3://openproblems-data/resources/grn/"
+resources_dir="resources"
+publish_dir="${resources_dir}/results/${RUN_ID}"
+
+cat > ./params/${RUN_ID}.yaml << HERE
+param_list:
+  - id: process_multiomics
+    multiome_counts: $resources_dir/datasets_raw/multiome_counts.h5ad
+
+output_state: "state.yaml"
+publish_dir: "$publish_dir"
+HERE
+
+
+# ./tw-windows-x86_64.exe launch  https://github.com/openproblems-bio/task_grn_inference.git `
+#     --revision build/main --pull-latest `
+#     --main-script target/nextflow/workflows/process_multiomics/main.nf `
+#     --workspace 53907369739130 --compute-env 6TeIFgV5OY4pJCk8I0bfOh `
+#     --params-file ./params/process_multiomics.yaml `
+#     --config src/common/nextflow_helpers/labels_tw.config
+
+
+nextflow run .   \
+  -main-script  target/nextflow/workflows/process_multiomics/main.nf  \
+  -profile docker     -with-trace     -c src/common/nextflow_helpers/labels_ci.config  \
+  -params-file params/${RUN_ID}.yaml
diff --git a/scripts/run_process_perturbation_dataset.sh b/scripts/run_process_perturbation_dataset.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+RUN_ID="process_perturbation"
+resources_dir="s3://openproblems-data/resources/grn/"
+publish_dir="${resources_dir}/results/${RUN_ID}"
+
+cat > ./params/${RUN_ID}.yaml << HERE
+param_list:
+  - id: test_process_perturatbion
+    perturbation_counts: $resources_dir/datasets_raw/perturbation_counts.h5ad
+
+output_state: "state.yaml"
+publish_dir: "$publish_dir"
+HERE
+
+
+# ./tw-windows-x86_64.exe launch  https://github.com/openproblems-bio/task_grn_inference.git `
+#     --revision build/main --pull-latest `
+#     --main-script target/nextflow/workflows/process_perturbation/main.nf `
+#     --workspace 53907369739130 --compute-env 6TeIFgV5OY4pJCk8I0bfOh `
+#     --params-file ./params/process_perturbation.yaml `
+#     --config src/common/nextflow_helpers/labels_tw.config
+
+
+nextflow run .   \
+  -main-script  target/nextflow/workflows/process_perturbation/main.nf  \
+  -profile docker     -with-trace     -c src/common/nextflow_helpers/labels_ci.config  \
+  -params-file params/${RUN_ID}.yaml
diff --git a/scripts/run_process_perturbation_tw.sh b/scripts/run_process_perturbation_tw.sh
diff --git a/src/methods/multi_omics/figr/script.R b/src/methods/multi_omics/figr/script.R
@@ -27,6 +27,7 @@ dir.create(par$temp_dir, recursive = TRUE, showWarnings = TRUE)
 atac = readRDS(par$multiomics_atac_r)
 rna  = readRDS(par$multiomics_rna_r)
 
+
 colnames(atac) <- gsub("-", "", colnames(atac))
 colnames(rna) <- gsub("-", "", colnames(rna))
 

diff --git a/src/process_data/multiomics/batch_correction/script.py b/src/process_data/multiomics/batch_correction/script.py
diff --git a/src/process_data/multiomics/format_data/config.vsh.yaml b/src/process_data/multiomics/format_data/config.vsh.yaml
@@ -8,26 +8,15 @@ functionality:
   arguments:
     - name: --multiome_counts
       type: file 
-      required: false
+      required: true
       direction: input
       example: resources/datasets_raw/multiome_counts.h5ad
+
     - name: --multiomics_rna
       type: file 
       required: false
       direction: output
       example: resources/grn-benchmark/multiomics_rna.h5ad
-    - name: --multiomics_rna_d0
-      type: file 
-      required: false
-      direction: output
-      example: resources/grn-benchmark/multiomics_rna_d0.h5ad
-
-    - name: --multiomics_rna_d0_hvg
-      type: file 
-      required: false
-      direction: output
-      example: resources/grn-benchmark/multiomics_rna_d0_hvg.h5ad
-
     - name: --multiomics_atac
       type: file 
       required: false

diff --git a/src/process_data/multiomics/format_data/script.py b/src/process_data/multiomics/format_data/script.py
@@ -1,5 +1,6 @@
 import anndata as ad
 import scanpy as sc
+import numpy as np
 ## VIASH START
 par = {
     # 'multiome_counts': 'resources/datasets_raw/multiome_counts.h5ad',
@@ -26,22 +27,14 @@
 multiomics_rna = multiomics[:,multiomics.var.feature_types=='Gene Expression']
 multiomics_rna.var = multiomics_rna.var[['gene_ids', 'interval']]
 
-def high_coverage(adata):
-    threshold = 0.1
-    mask = adata.X!=0
-    mask_obs = (np.sum(mask, axis=1).A.flatten()/mask.shape[1])>threshold
-    mask_var = (np.sum(mask, axis=0).A.flatten()/mask.shape[0])>threshold
-    adata.obs['high_coverage'] = mask_obs
-    adata.var['high_coverage'] = mask_var
-high_coverage(multiomics_rna)
-
-#  hvgs
-var = sc.pp.highly_variable_genes(multiomics_rna, flavor='seurat_v3', n_top_genes=7000, inplace=False)
-multiomics_rna.var['highly_variable'] = var.highly_variable
-
-# subset to donor 0
-multiomics_rna_d0 = multiomics_rna[multiomics_rna.obs.donor_id=='donor_0', :]
-multiomics_rna_d0_hvg = multiomics_rna[multiomics_rna.obs.donor_id=='donor_0', multiomics_rna.var.highly_variable]
+# def high_coverage(adata):
+#     threshold = 0.1
+#     mask = adata.X!=0
+#     mask_obs = (np.sum(mask, axis=1).A.flatten()/mask.shape[1])>threshold
+#     mask_var = (np.sum(mask, axis=0).A.flatten()/mask.shape[0])>threshold
+#     adata.obs['high_coverage'] = mask_obs
+#     adata.var['high_coverage'] = mask_var
+# high_coverage(multiomics_rna)
 #------ ATAC
 multiomics_atac = multiomics[:,multiomics.var.feature_types=='Peaks']
 multiomics_atac.var = multiomics_atac.var[[]]
@@ -62,6 +55,4 @@ def high_coverage(adata):
 multiomics_atac.obs['donor_id'] = multiomics_atac.obs['donor_id'].map(donor_map)
 
 multiomics_rna.write(par['multiomics_rna'])
-multiomics_rna_h0.write(par['multiomics_rna_h0'])
-multiomics_rna_h0_hvg.write(par['multiomics_rna_h0_hvg'])
 multiomics_atac.write(par['multiomics_atac'])
diff --git a/src/process_data/multiomics/format_resources_r/config.vsh.yaml b/src/process_data/multiomics/format_resources_r/config.vsh.yaml
@@ -8,50 +8,45 @@ functionality:
   arguments:
     - name: --rna_matrix
       type: file
-      required: false
+      required: true
       direction: input
-      default: output/scRNA/X_matrix.mtx
-
+      example: output/scRNA/X_matrix.mtx
     - name: --atac_matrix
       type: file
-      required: false
+      required: true
       direction: input
-      default: output/scATAC/X_matrix.mtx 
-
+      example: output/scATAC/X_matrix.mtx 
     - name: --rna_gene_annot
       type: file
-      required: false
+      required: true
       direction: input
-      default: output/scRNA/annotation_gene.csv
-
+      example: output/scRNA/annotation_gene.csv
     - name: --rna_cell_annot
       type: file
-      required: false
+      required: true
       direction: input
-      default: output/scRNA/annotation_cell.csv 
-
+      example: output/scRNA/annotation_cell.csv 
     - name: --atac_peak_annot
       type: file
-      required: false
+      required: true
       direction: input
-      default: output/scATAC/annotation_gene.csv
-
+      example: output/scATAC/annotation_gene.csv
     - name: --atac_cell_annot
       type: file
-      required: false
+      required: true
       direction: input
-      default: output/scATAC/annotation_cell.csv
+      example: output/scATAC/annotation_cell.csv
 
     - name: --rna_rds
       type: file
       required: false
       direction: output
-      default: resources/grn-benchmark/multiomics_r/rna.rds
+      example: resources/grn-benchmark/multiomics_r/rna.rds
     - name: --atac_rds
       type: file
       required: false
       direction: output
-      default: resources/grn-benchmark/multiomics_r/atac.rds
+      example: resources/grn-benchmark/multiomics_r/atac.rds
 
 
 

diff --git a/src/process_data/multiomics/format_resources_r/script.R b/src/process_data/multiomics/format_resources_r/script.R
@@ -32,6 +32,7 @@ annotation_peak_filtered <- annotation_peak[filter_indices, ]
 # Filter the rows in X
 X_filtered <- X[filter_indices, ]
 
+
 # Create the SummarizedExperiment object with the filtered data
 atac <- SummarizedExperiment(assays = list(counts = X_filtered), 
                              rowRanges = GRanges(annotation_peak_filtered$seqname,

diff --git a/src/process_data/multiomics/multiome_matrix/config.vsh.yaml b/src/process_data/multiomics/multiome_matrix/config.vsh.yaml
@@ -8,51 +8,50 @@ functionality:
   arguments:
     - name: --multiomics_rna
       type: file
-      required: false
+      required: true
       direction: input
-      default: resources/grn-benchmark/multiomics_rna.h5ad
+      example: resources/grn-benchmark/multiomics_rna.h5ad
 
     - name: --multiomics_atac
       type: file
-      required: false
+      required: true
       direction: input
-      default: resources/grn-benchmark/multiomics_atac.h5ad
+      example: resources/grn-benchmark/multiomics_atac.h5ad
 
     - name: --rna_matrix
       type: file
       required: false
       direction: output
-      default: output/scRNA/X_matrix.mtx
-
+      example: output/scRNA/X_matrix.mtx
     - name: --atac_matrix
       type: file
       required: false
       direction: output
-      default: output/scATAC/X_matrix.mtx 
+      example: output/scATAC/X_matrix.mtx 
 
     - name: --rna_gene_annot
       type: file
       required: false
       direction: output
-      default: output/scRNA/annotation_gene.csv
+      example: output/scRNA/annotation_gene.csv
 
     - name: --rna_cell_annot
       type: file
       required: false
       direction: output
-      default: output/scRNA/annotation_cell.csv 
+      example: output/scRNA/annotation_cell.csv 
 
     - name: --atac_peak_annot
       type: file
       required: false
       direction: output
-      default: output/scATAC/annotation_gene.csv
+      example: output/scATAC/annotation_gene.csv
 
     - name: --atac_cell_annot
       type: file
       required: false
       direction: output
-      default: output/scATAC/annotation_cell.csv
+      example: output/scATAC/annotation_cell.csv
   resources:
     - type: python_script
       path: script.py

diff --git a/src/process_data/multiomics/subset_hvg/config.vsh.yaml b/src/process_data/multiomics/subset_hvg/config.vsh.yaml
@@ -0,0 +1,45 @@
+
+functionality:
+  name: subset_hvg
+  namespace: "multiomics"
+  info:
+    label: subset_hvg
+    summary: "Receives multiomics data and subsets it for hvg"
+  arguments:
+    - name: --multiomics_rna
+      type: file 
+      required: true
+      direction: input
+      example: resources/grn-benchmark/multiomics_rna.h5ad
+    - name: --multiomics_atac
+      type: file 
+      required: true
+      direction: input
+      example: resources/grn-benchmark/multiomics_atac.h5ad
+
+    - name: --multiomics_rna_d0_hvg
+      type: file 
+      required: false
+      direction: output
+      example: resources/grn-benchmark/multiomics_rna_d0_hvg.h5ad
+    - name: --multiomics_atac_d0
+      type: file 
+      required: false
+      direction: output
+      example: resources/grn-benchmark/multiomics_atac_d0.h5ad
+
+  resources:
+    - type: python_script
+      path: script.py
+platforms:
+  - type: docker
+    image: ghcr.io/openproblems-bio/base_python:1.0.4
+    setup:
+      - type: python
+        packages: [ scikit-misc ]
+
+
+  - type: native
+  - type: nextflow
+    directives:
+      label: [midtime,midmem,midcpu]