scenic added

openproblems-bio · Sep 14, 2024 · 56460b4 · 56460b4
1 parent b5a6f0a
commit 56460b4
Show file tree

Hide file tree

Showing 6 changed files with 117 additions and 4 deletions.
diff --git a/runs.ipynb b/runs.ipynb
@@ -2583,11 +2583,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 131,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "download: s3://openproblems-data/resources_test/grn/results/single_omics_inference/state.yaml to rsesources/results/single_omics_inference/state.yaml\n",
+      "download: s3://openproblems-data/resources_test/grn/results/single_omics_inference/trace.txt to rsesources/results/single_omics_inference/trace.txt\n",
+      "download: s3://openproblems-data/resources_test/grn/results/single_omics_inference/scores.yaml to rsesources/results/single_omics_inference/scores.yaml\n",
+      "download: s3://openproblems-data/resources_test/grn/results/single_omics_inference/ridge.ennet.ennet.prediction.csv to rsesources/results/single_omics_inference/ridge.ennet.ennet.prediction.csv\n",
+      "download: s3://openproblems-data/resources_test/grn/results/single_omics_inference/ridge.pidc.pidc.prediction.csv to rsesources/results/single_omics_inference/ridge.pidc.pidc.prediction.csv\n",
+      "download: s3://openproblems-data/resources_test/grn/results/single_omics_inference/ridge.tigress.tigress.prediction.csv to rsesources/results/single_omics_inference/ridge.tigress.tigress.prediction.csv\n"
+     ]
+    }
+   ],
    "source": [
-    "!aws s3 sync s3://openproblems-data/resources/grn/results/single_omics_all resources/results/single_omics_all"
+    "!aws s3 sync s3://openproblems-data/resources_test/grn/results/single_omics_inference rsesources/results/single_omics_inference"
    ]
   },
   {

diff --git a/scripts/run_benchmark_single_omics.sh b/scripts/run_benchmark_single_omics.sh
@@ -3,7 +3,7 @@
 # RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
 RUN_ID="single_omics_inference"
 # resources_dir="./resources_test/"
-resources_dir="s3://openproblems-data/resources_test/grn"
+resources_dir="s3://openproblems-data/resources/grn"
 publish_dir="${resources_dir}/results/${RUN_ID}"
 
 

diff --git a/scripts/run_grn_evaluation.sh b/scripts/run_grn_evaluation.sh
@@ -36,6 +36,7 @@ baseline_models=(
     baseline_pearson_causal
     baseline_pearson_causal_celltype
     baseline_pearson_causal_metacell
+    baseline_pearson_causal_impute
     positive_control
     )
 # Start writing to the YAML file

diff --git a/src/methods/single_omics/scenic/config.vsh.yaml b/src/methods/single_omics/scenic/config.vsh.yaml
@@ -0,0 +1,27 @@
+__merge__: ../../../api/comp_method.yaml
+
+functionality:
+  name: scenic
+  namespace: "grn_methods"
+  info:
+    label: scenic
+    summary: "GRN inference using scenic"
+
+
+  resources:
+    - type: python_script
+      path: script.py
+
+platforms:
+  - type: docker
+    image: aertslab/pyscenic:0.12.1
+    setup:
+    #   - type: docker
+    #     run: |
+    #       conda install -y -c bioconda arboreto pandas
+      - type: python
+        packages: [ anndata ]
+  - type: native
+  - type: nextflow
+    directives:
+      label: [onedaytime, midmem, midcpu]
diff --git a/src/methods/single_omics/scenic/script.py b/src/methods/single_omics/scenic/script.py
@@ -0,0 +1,71 @@
+import os
+
+import anndata
+import numpy as np
+import pandas as pd
+from arboreto.algo import grnboost2
+from distributed import Client
+
+
+## VIASH START
+par = {
+  'multiomics_rna': 'resources_test/grn-benchmark/multiomics_rna.h5ad',
+  "tf_all": 'resources/prior/tf_all.csv',
+  'prediction': 'output/grnboost2/prediction.csv',
+  'max_n_links': 50000
+}
+## VIASH END
+os.makedirs(par['temp_dir'], exist_ok=True)
+
+# Load scRNA-seq data
+adata_rna = anndata.read_h5ad(par['multiomics_rna'])
+gene_names = adata_rna.var.gene_ids.index.to_numpy()
+X = adata_rna.X.toarray()
+
+# Load list of putative TFs
+# df = pd.read_csv(par["tf_all"], header=None, names=['gene_name'])
+# tfs = set(list(df['gene_name']))
+# tf_names = [gene_name for gene_name in gene_names if (gene_name in tfs)]
+
+# format output
+expression_data = f"{par['temp_dir']}/expression_data.tsv"
+pd.DataFrame(X, columns=gene_names).to_csv(expression_data, sep='\t', index=False)
+
+expr_mat_adjacencies = f"{par['temp_dir']}/expr_mat_adjacencies.tsv"
+command = [
+    "pyscenic", "grn",
+    "--num_workers", par['max_workers'],
+    "-o", expr_mat_adjacencies,
+    expression_data,
+    par['tf_all']
+]
+
+# Run grn
+import subprocess
+subprocess.run(command, check=True)
+
+
+# Run prune
+regulons = f"{par['temp_dir']}/regulons.csv"
+annotations_fname = "/data/motifs-v9-nr.hgnc-m0.001-o0.0.tbl" 
+ranking_1 = "/data/hg19-tss-centered-5kb-7species.mc9nr.genes_vs_motifs.rankings.feather "
+ranking_2 = /data/hg19-tss-centered-10kb-7species.mc9nr.genes_vs_motifs.rankings.feather
+command = [
+    "pyscenic", "ctx",
+    expr_mat_adjacencies, ranking_1, ranking_2,
+    "--annotations_fname", annotations_fname, 
+    "--expression_mtx_fname", expression_data,
+    "--mode", "custom_multiprocessing",
+    "--output", regulons, 
+    "--num_workers", par['max_workers']
+]
+subprocess.run(command, check=True)
+
+# Save inferred GRN
+print(expr_mat_adjacencies)
+network = pd.read_csv(expr_mat_adjacencies,  sep='\t')
+network.to_csv(par['prediction'], sep=',')
+
+print('Finished.')
+
+
diff --git a/src/methods/single_omics/scenic/test.sh b/src/methods/single_omics/scenic/test.sh
@@ -0,0 +1 @@
+viash run src/methods/single_omics/scenic/config.vsh.yaml -- --multiomics_rna resources_test/grn-benchmark/multiomics_rna.h5ad --tf_all resources/prior/tf_all.csv --prediction output/scenic_prediction.csv --temp_dir output/scenic
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		viash run src/methods/single_omics/scenic/config.vsh.yaml -- --multiomics_rna resources_test/grn-benchmark/multiomics_rna.h5ad --tf_all resources/prior/tf_all.csv --prediction output/scenic_prediction.csv --temp_dir output/scenic