diff --git a/runs.ipynb b/runs.ipynb
index 9ed0558e8..43b653e7b 100644
--- a/runs.ipynb
+++ b/runs.ipynb
@@ -1,8 +1,32 @@
{
"cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Commands"
+ ]
+ },
{
"cell_type": "code",
- "execution_count": 99,
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Completed 257.3 MiB/746.3 MiB (10.5 MiB/s) with 3 file(s) remaining \r"
+ ]
+ }
+ ],
+ "source": [
+ "!aws s3 sync resources/grn-benchmark s3://openproblems-data/resources/grn/grn-benchmark "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -14,6 +38,37 @@
"grn_models = ['collectri','granie', 'figr', 'celloracle', 'scglue', 'scenicplus']"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# To be classified "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import anndata as ad \n",
+ "import numpy as np \n",
+ "adata_rna = ad.read_h5ad('resources/grn-benchmark/multiomics_rna.h5ad')\n",
+ "X = adata_rna.X.todense()\n",
+ "\n",
+ "if True:\n",
+ " print('qc')\n",
+ " print('Data shape before QC: ', adata_rna.shape)\n",
+ " # Remove genes with >=90% of zeros\n",
+ " mask_gene = (np.mean(X == 0, axis=0) >= 0.9)\n",
+ " adata_rna = adata_rna[:, ~mask_gene]\n",
+ " # Remove samples with >=90% of zeros\n",
+ " mask_cells = (np.mean(X == 0, axis=1) >= 0.9)\n",
+ " adata_rna = adata_rna[~mask_cells, :]\n",
+ " print('Data shape after QC: ', adata_rna.shape)\n",
+ "adata_rna.write('resources/grn-benchmark/multiomics_rna_qc.h5ad')"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -3079,6 +3134,501 @@
"df_all['Rank'] = df_all_n.mean(axis=1).rank(ascending=False).astype(int)\n",
"df_all.style.background_gradient()"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "df = pd.read_csv('output/scenic_default/expr_mat_adjacencies.tsv', sep='\\t')\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "qc\n",
+ "Data shape before QC: (25551, 22787)\n",
+ "Data shape after QC: (4632, 5439)\n"
+ ]
+ }
+ ],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "atac-emb.h5ad\t\t\t peaks.bed\n",
+ "atac.h5ad\t\t\t prediction.csv\n",
+ "ctx_annotation.tsv\t\t pruned_grn.csv\n",
+ "draft_grn.csv\t\t\t rna-emb.h5ad\n",
+ "gene2peak.links\t\t\t rna.h5ad\n",
+ "glue\t\t\t\t rna.loom\n",
+ "glue.dill\t\t\t supp.genes_vs_tracks.rankings.feather\n",
+ "glue.genes_vs_tracks.rankings.feather tfs.txt\n",
+ "guidance.graphml.gz\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls output/scglue/"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# !pip install pyarrow"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " source | \n",
+ " target | \n",
+ " weight | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " IRF9 | \n",
+ " RSAD2 | \n",
+ " 0.729921 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " IRF9 | \n",
+ " SAMD9L | \n",
+ " 0.812882 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " IRF9 | \n",
+ " HERC6 | \n",
+ " 1.465909 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " IRF9 | \n",
+ " XRCC2 | \n",
+ " 0.437663 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " IRF9 | \n",
+ " EXOC3L1 | \n",
+ " 0.875641 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 593 | \n",
+ " 593 | \n",
+ " SP2 | \n",
+ " PDE9A | \n",
+ " 0.929089 | \n",
+ "
\n",
+ " \n",
+ " 594 | \n",
+ " 594 | \n",
+ " SP2 | \n",
+ " ANKRD13B | \n",
+ " 2.120112 | \n",
+ "
\n",
+ " \n",
+ " 595 | \n",
+ " 595 | \n",
+ " SP2 | \n",
+ " IQANK1 | \n",
+ " 0.639963 | \n",
+ "
\n",
+ " \n",
+ " 596 | \n",
+ " 596 | \n",
+ " SP2 | \n",
+ " FAM120A | \n",
+ " 0.321937 | \n",
+ "
\n",
+ " \n",
+ " 597 | \n",
+ " 597 | \n",
+ " SP2 | \n",
+ " GCC2-AS1 | \n",
+ " 1.377123 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
598 rows × 4 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Unnamed: 0 source target weight\n",
+ "0 0 IRF9 RSAD2 0.729921\n",
+ "1 1 IRF9 SAMD9L 0.812882\n",
+ "2 2 IRF9 HERC6 1.465909\n",
+ "3 3 IRF9 XRCC2 0.437663\n",
+ "4 4 IRF9 EXOC3L1 0.875641\n",
+ ".. ... ... ... ...\n",
+ "593 593 SP2 PDE9A 0.929089\n",
+ "594 594 SP2 ANKRD13B 2.120112\n",
+ "595 595 SP2 IQANK1 0.639963\n",
+ "596 596 SP2 FAM120A 0.321937\n",
+ "597 597 SP2 GCC2-AS1 1.377123\n",
+ "\n",
+ "[598 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 43,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = pd.read_csv('output/scglue/prediction.csv')\n",
+ "df # default"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " source | \n",
+ " target | \n",
+ " weight | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " BCL6 | \n",
+ " FBP1 | \n",
+ " 0.544815 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " BCL6 | \n",
+ " FAM157C | \n",
+ " 1.530639 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " BCL6 | \n",
+ " OLIG1 | \n",
+ " 0.508618 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " BCL6 | \n",
+ " BIRC5 | \n",
+ " 0.915793 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " BCL6 | \n",
+ " TMED5 | \n",
+ " 0.523122 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 5686 | \n",
+ " 5686 | \n",
+ " ZNF816 | \n",
+ " FLII | \n",
+ " 0.160327 | \n",
+ "
\n",
+ " \n",
+ " 5687 | \n",
+ " 5687 | \n",
+ " ZNF816 | \n",
+ " KNOP1 | \n",
+ " 0.167249 | \n",
+ "
\n",
+ " \n",
+ " 5688 | \n",
+ " 5688 | \n",
+ " ZNF816 | \n",
+ " SCO1 | \n",
+ " 0.184201 | \n",
+ "
\n",
+ " \n",
+ " 5689 | \n",
+ " 5689 | \n",
+ " ZNF816 | \n",
+ " SH2D4B | \n",
+ " 0.072754 | \n",
+ "
\n",
+ " \n",
+ " 5690 | \n",
+ " 5690 | \n",
+ " ZNF816 | \n",
+ " SLC13A3 | \n",
+ " 0.313595 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5691 rows × 4 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Unnamed: 0 source target weight\n",
+ "0 0 BCL6 FBP1 0.544815\n",
+ "1 1 BCL6 FAM157C 1.530639\n",
+ "2 2 BCL6 OLIG1 0.508618\n",
+ "3 3 BCL6 BIRC5 0.915793\n",
+ "4 4 BCL6 TMED5 0.523122\n",
+ "... ... ... ... ...\n",
+ "5686 5686 ZNF816 FLII 0.160327\n",
+ "5687 5687 ZNF816 KNOP1 0.167249\n",
+ "5688 5688 ZNF816 SCO1 0.184201\n",
+ "5689 5689 ZNF816 SH2D4B 0.072754\n",
+ "5690 5690 ZNF816 SLC13A3 0.313595\n",
+ "\n",
+ "[5691 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 1,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "df_relaxed = pd.read_csv('output/scglue/prediction.csv')\n",
+ "df_relaxed "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " source | \n",
+ " target | \n",
+ " weight | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ACAA1 | \n",
+ " ARHGEF2 | \n",
+ " 1.038998 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " ACAA1 | \n",
+ " TUBA1A | \n",
+ " 1.779427 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " ACAA1 | \n",
+ " HMBOX1 | \n",
+ " 4.849021 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " ACAA1 | \n",
+ " BAX | \n",
+ " 1.048583 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " ACAA1 | \n",
+ " SAMHD1 | \n",
+ " 0.571366 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 16816 | \n",
+ " 16816 | \n",
+ " ZXDC | \n",
+ " PRKAA1 | \n",
+ " 0.663177 | \n",
+ "
\n",
+ " \n",
+ " 16817 | \n",
+ " 16817 | \n",
+ " ZXDC | \n",
+ " ARMC8 | \n",
+ " 0.551332 | \n",
+ "
\n",
+ " \n",
+ " 16818 | \n",
+ " 16818 | \n",
+ " ZXDC | \n",
+ " ANKRD26 | \n",
+ " 1.600681 | \n",
+ "
\n",
+ " \n",
+ " 16819 | \n",
+ " 16819 | \n",
+ " ZXDC | \n",
+ " YTHDF2 | \n",
+ " 0.550480 | \n",
+ "
\n",
+ " \n",
+ " 16820 | \n",
+ " 16820 | \n",
+ " ZXDC | \n",
+ " HTT | \n",
+ " 0.626333 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
16821 rows × 4 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Unnamed: 0 source target weight\n",
+ "0 0 ACAA1 ARHGEF2 1.038998\n",
+ "1 1 ACAA1 TUBA1A 1.779427\n",
+ "2 2 ACAA1 HMBOX1 4.849021\n",
+ "3 3 ACAA1 BAX 1.048583\n",
+ "4 4 ACAA1 SAMHD1 0.571366\n",
+ "... ... ... ... ...\n",
+ "16816 16816 ZXDC PRKAA1 0.663177\n",
+ "16817 16817 ZXDC ARMC8 0.551332\n",
+ "16818 16818 ZXDC ANKRD26 1.600681\n",
+ "16819 16819 ZXDC YTHDF2 0.550480\n",
+ "16820 16820 ZXDC HTT 0.626333\n",
+ "\n",
+ "[16821 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 45,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_scenic = pd.read_csv('output/scenic/scenic.csv')\n",
+ "df_scenic "
+ ]
}
],
"metadata": {
diff --git a/batch.sh b/scripts/batch_scenic.sh
similarity index 81%
rename from batch.sh
rename to scripts/batch_scenic.sh
index 4861ee8fb..e8da2a0ec 100644
--- a/batch.sh
+++ b/scripts/batch_scenic.sh
@@ -1,11 +1,11 @@
#!/bin/bash
#SBATCH --time=12:00:00
-#SBATCH --job-name=scenic_donor_0
+#SBATCH --job-name=scenic
#SBATCH --output=logs/%j.out
#SBATCH --error=logs/%j.err
#SBATCH --mail-type=END
#SBATCH --mail-user=jalil.nourisa@gmail.com
-#SBATCH --cpus-per-task=10
+#SBATCH --cpus-per-task=20
#SBATCH --mem=64G
singularity exec ../../images/scenic python src/methods/single_omics/scenic/script.py
diff --git a/scripts/batch_scglue.sh b/scripts/batch_scglue.sh
new file mode 100644
index 000000000..147ad7728
--- /dev/null
+++ b/scripts/batch_scglue.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+#SBATCH --time=12:00:00
+#SBATCH --job-name=scglue_donor_0
+#SBATCH --output=logs/%j.out
+#SBATCH --error=logs/%j.err
+#SBATCH --mail-type=END
+#SBATCH --mail-user=jalil.nourisa@gmail.com
+#SBATCH --cpus-per-task=20
+#SBATCH --mem=64G
+
+singularity exec ../../images/scglue python src/methods/multi_omics/scglue/script.py
diff --git a/src/exp_analysis/config.vsh.yaml b/src/exp_analysis/config.vsh.yaml
index 5b3336052..4e50b6a2a 100644
--- a/src/exp_analysis/config.vsh.yaml
+++ b/src/exp_analysis/config.vsh.yaml
@@ -47,6 +47,12 @@ functionality:
required: false
direction: output
default: output/stats.json
+ - name: --reg_weight_distribution
+ type: file
+ required: false
+ direction: output
+ default: output/reg_weight_distribution.png
+
resources:
- type: python_script
diff --git a/src/exp_analysis/helper.py b/src/exp_analysis/helper.py
index 975b8bb5b..68e5f0809 100644
--- a/src/exp_analysis/helper.py
+++ b/src/exp_analysis/helper.py
@@ -11,7 +11,6 @@ def degree_centrality(net, source='source', target='target', normalize=False):
total_targets = net[target].nunique()
counts = counts/total_targets
return counts
-
def plot_cumulative_density(data, title='', ax=None, s=1, **kwdgs):
# Step 1: Sort the data
sorted_data = np.sort(data)
@@ -25,11 +24,14 @@ def plot_cumulative_density(data, title='', ax=None, s=1, **kwdgs):
else:
fig = None
ax.step(sorted_data, cdf, where='post', label=title, **kwdgs)
- ax.set_xlabel('Data')
+ ax.set_xlabel('Weight')
ax.set_ylabel('Cumulative Density')
ax.set_title(title)
- # ax.grid(True)
+ ax.grid(True)
return fig, ax
+
+
+
class Connectivity:
def __init__(self, net, **kwargs):
self.out_deg = degree_centrality(net, source='source', target='target', **kwargs)
diff --git a/src/exp_analysis/script.py b/src/exp_analysis/script.py
index a9b810c4f..c4f6b5522 100644
--- a/src/exp_analysis/script.py
+++ b/src/exp_analysis/script.py
@@ -5,23 +5,18 @@
## VIASH START
par = {
- "perturbation_data": "resources/grn-benchmark/pertubation_data.h5ad",
- "prediction": "resources/grn-benchmark/grn_models/figr.csv",
+ "perturbation_data": "resources/grn-benchmark/perturbation_data.h5ad",
+ "prediction": "output/scenic_default/scenic.csv",
# "peak_gene_net": "resources/grn-benchmark/peak_gene_models/figr.csv",
- "annot_peak_database": "resources/grn-benchmark/supp/annot_peak_database.csv",
- "annot_gene_database": "resources/grn-benchmark/supp/annot_gene_database.csv",
- "hvgs": "resources/grn-benchmark/supp/hvgs.txt"
-
+ "annot_peak_database": "resources/supplementary/annot_peak_database.csv",
+ "annot_gene_database": "resources/supplementary/annot_gene_database.csv",
+ "stats": "output/stats.json",
+ "reg_weight_distribution": "output/reg_weight_distribution.png",
+ "tf_gene_indegee_fig": "output/tf_gene_indegee_fig.png",
+ "tf_gene_outdegee_fig": "output/tf_gene_outdegee_fig.png"
}
## VIASH END
-# meta = {
-# "resources_dir":'resources'
-# }
-sys.path.append(meta["resources_dir"])
-from helper import Explanatory_analysis
-
-print('Reading input files', flush=True)
perturbation_data = ad.read_h5ad(par["perturbation_data"])
prediction = pd.read_csv(par["prediction"])
@@ -29,6 +24,17 @@
# annot_peak_database = pd.read_csv(par["annot_peak_database"])
# hvgs = pd.read_csv(par["hvgs"])
+meta = {
+ "resources_dir":'src/exp_analysis/'
+}
+sys.path.append(meta["resources_dir"])
+from helper import Explanatory_analysis, plot_cumulative_density
+
+print('Plotting CMD of weight: ',par['reg_weight_distribution'], flush=True)
+fig, ax = plot_cumulative_density(prediction.weight, title='CMD of reg. weight')
+fig.savefig(par['reg_weight_distribution'])
+
+print('Reading input files', flush=True)
# peak_gene_net['source'] = peak_gene_net['peak']
info_obj = Explanatory_analysis(net=prediction)
print("Calculate basic stats")
diff --git a/src/exp_analysis/test.sh b/src/exp_analysis/test.sh
index 004b91d18..3e97b0053 100644
--- a/src/exp_analysis/test.sh
+++ b/src/exp_analysis/test.sh
@@ -1,4 +1,4 @@
viash run src/exp_analysis/config.vsh.yaml -- \
--perturbation_data resources/grn-benchmark/perturbation_data.h5ad \
- --prediction resources/grn_models/baselines/positive_control.csv \
+ --prediction output/scenic/scenic.csv \
diff --git a/src/methods/multi_omics/scglue/main.py b/src/methods/multi_omics/scglue/main.py
index b71a208e2..ad3ba5f19 100644
--- a/src/methods/multi_omics/scglue/main.py
+++ b/src/methods/multi_omics/scglue/main.py
@@ -109,7 +109,7 @@ def training(par):
nx.write_graphml(guidance, f"{par['temp_dir']}/guidance.graphml.gz")
-def cis_inference(par):
+def run_grn(par):
''' Infers gene2peak connections
'''
rna = ad.read_h5ad(f"{par['temp_dir']}/rna-emb.h5ad")
@@ -155,13 +155,12 @@ def cis_inference(par):
np.savetxt(f"{par['temp_dir']}/tfs.txt", tfs, fmt="%s")
# Construct the command
- command = (
- f"pyscenic grn {par['temp_dir']}/rna.loom {par['temp_dir']}/tfs.txt "
- f"-o {par['temp_dir']}/draft_grn.csv --seed 0 --num_workers {par['num_workers']} "
- "--cell_id_attribute obs_id --gene_attribute name"
- )
-
- result = subprocess.run(command, shell=True, capture_output=True, text=True)
+ command = ['pyscenic', 'grn', f"{par['temp_dir']}/rna.loom",
+ f"{par['temp_dir']}/tfs.txt", '-o', f"{par['temp_dir']}/draft_grn.csv",
+ '--seed', '0', '--num_workers', f"{par['num_workers']}",
+ '--cell_id_attribute', 'obs_id', '--gene_attribute', 'name']
+ print('Run grn')
+ result = subprocess.run(command, check=True)
print("Output:")
print(result.stdout)
@@ -193,7 +192,7 @@ def cis_inference(par):
n_samples=0
)
- ### Prune coexpression network using cis-regulatory ranking
+ ### Prepare data for pruning
gene2tf_rank_glue.columns = gene2tf_rank_glue.columns + "_glue"
gene2tf_rank_supp.columns = gene2tf_rank_supp.columns + "_supp"
@@ -215,19 +214,29 @@ def cis_inference(par):
orthologous_identity=1.0,
description="placeholder"
).to_csv(f"{par['temp_dir']}/ctx_annotation.tsv", sep="\t", index=False)
-
+def prune_grn(par):
# Construct the command
#TODO: be sure that obs_id is in obs and name is in var
print("Run pscenic ctx", flush=True)
- command = (
- f" pyscenic ctx {par['temp_dir']}/draft_grn.csv {par['temp_dir']}/glue.genes_vs_tracks.rankings.feather "
- f" {par['temp_dir']}/supp.genes_vs_tracks.rankings.feather --annotations_fname {par['temp_dir']}/ctx_annotation.tsv "
- f" --expression_mtx_fname {par['temp_dir']}/rna.loom --output {par['temp_dir']}/pruned_grn.csv "
- f" --rank_threshold 500 --min_genes 1 --num_workers {par['num_workers']} "
- " --cell_id_attribute obs_id --gene_attribute name"
- )
+ command = [
+ "pyscenic", "ctx",
+ f"{par['temp_dir']}/draft_grn.csv",
+ f"{par['temp_dir']}/glue.genes_vs_tracks.rankings.feather",
+ f"{par['temp_dir']}/supp.genes_vs_tracks.rankings.feather",
+ "--annotations_fname", f"{par['temp_dir']}/ctx_annotation.tsv",
+ "--expression_mtx_fname", f"{par['temp_dir']}/rna.loom",
+ "--output", f"{par['temp_dir']}/pruned_grn.csv",
+ "--rank_threshold", "10000",
+ "--auc_threshold", "0.1",
+ "--nes_threshold", "2",
+ "--mask_dropouts",
+ "--min_genes", "1",
+ "--num_workers", f"{par['num_workers']}",
+ "--cell_id_attribute", "obs_id",
+ "--gene_attribute", "name"
+ ]
- result = subprocess.run(command, shell=True, capture_output=True, text=True)
+ result = subprocess.run(command, check=True)
print("Output:")
print(result.stdout)
@@ -262,15 +271,16 @@ def main(par):
print("Number of GPUs:", torch.cuda.device_count())
os.makedirs(par['temp_dir'], exist_ok=True)
- print('Reading input files', flush=True)
- rna = ad.read_h5ad(par['multiomics_rna'])
- atac = ad.read_h5ad(par['multiomics_atac'])
-
- print('Preprocess data', flush=True)
- preprocess(rna, atac, par)
- print('Train a model', flush=True)
- training(par)
- cis_inference(par)
+ # print('Reading input files', flush=True)
+ # rna = ad.read_h5ad(par['multiomics_rna'])
+ # atac = ad.read_h5ad(par['multiomics_atac'])
+
+ # print('Preprocess data', flush=True)
+ # preprocess(rna, atac, par)
+ # print('Train a model', flush=True)
+ # training(par)
+ # run_grn(par)
+ prune_grn(par)
print('Curate predictions', flush=True)
pruned_grn = pd.read_csv(
f"{par['temp_dir']}/pruned_grn.csv", header=None, skiprows=3,
diff --git a/src/methods/multi_omics/scglue/script.py b/src/methods/multi_omics/scglue/script.py
index 25728e583..47a11f38c 100644
--- a/src/methods/multi_omics/scglue/script.py
+++ b/src/methods/multi_omics/scglue/script.py
@@ -5,17 +5,17 @@
## VIASH START
par = {
- "multiomics_rna": "resources_test/grn-benchmark/multiomics_rna.h5ad",
- "multiomics_atac": "resources_test/grn-benchmark/multiomics_atac.h5ad",
+ "multiomics_rna": "resources/grn-benchmark/multiomics_rna_0.h5ad",
+ "multiomics_atac": "resources/grn-benchmark/multiomics_atac_0.h5ad",
"motif_file": "resources/supplementary/JASPAR2022-hg38.bed.gz",
"annotation_file": "resources/supplementary/gencode.v45.annotation.gtf.gz",
"temp_dir": 'output/scglue/',
- "num_workers": 10,
- "prediction": "output/prediction.csv",
+ "num_workers": 20,
+ "prediction": "output/scglue/prediction.csv",
}
## VIASH END
meta = {
- "resources_dir": 'resources'
+ "resources_dir": 'src/methods/multi_omics/scglue/'
}
diff --git a/src/methods/single_omics/ennet/script.R b/src/methods/single_omics/ennet/script.R
index 87e787795..64e5c076a 100644
--- a/src/methods/single_omics/ennet/script.R
+++ b/src/methods/single_omics/ennet/script.R
@@ -18,16 +18,16 @@ ad <- anndata::read_h5ad(par$multiomics_rna)
X <- as.matrix(ad$X)
gene_names <- colnames(ad)
-# Remove genes with > 90% of zeros
-zero_proportion <- colMeans(X == 0)
-mask <- (zero_proportion <= 0.9)
-X <- X[, mask]
-gene_names <- gene_names[mask]
-
-# Remove samples with > 90% of zeros
-zero_proportion <- rowMeans(X == 0)
-mask <- (zero_proportion <= 0.9)
-X <- X[mask,]
+# # Remove genes with > 90% of zeros
+# zero_proportion <- colMeans(X == 0)
+# mask <- (zero_proportion <= 0.9)
+# X <- X[, mask]
+# gene_names <- gene_names[mask]
+
+# # Remove samples with > 90% of zeros
+# zero_proportion <- rowMeans(X == 0)
+# mask <- (zero_proportion <= 0.9)
+# X <- X[mask,]
# Load list of putative TFs
dat <- read.csv(par$tf_all, header = FALSE)
diff --git a/src/methods/single_omics/genie3/script.py b/src/methods/single_omics/genie3/script.py
index a3ab20ac2..9e632553e 100644
--- a/src/methods/single_omics/genie3/script.py
+++ b/src/methods/single_omics/genie3/script.py
@@ -22,14 +22,15 @@
gene_names = adata_rna.var.gene_ids.index.to_numpy()
X = adata_rna.X.toarray()
-# Remove genes with >=90% of zeros
-mask = (np.mean(X == 0, axis=0) >= 0.9)
-X = X[:, ~mask]
-gene_names = gene_names[~mask]
-
-# Remove samples with >=90% of zeros
-mask = (np.mean(X == 0, axis=1) >= 0.9)
-adata_rna = X[~mask, :]
+if False:
+ # Remove genes with >=90% of zeros
+ mask = (np.mean(X == 0, axis=0) >= 0.9)
+ X = X[:, ~mask]
+ gene_names = gene_names[~mask]
+
+ # Remove samples with >=90% of zeros
+ mask = (np.mean(X == 0, axis=1) >= 0.9)
+ adata_rna = X[~mask, :]
# Load list of putative TFs
df = pd.read_csv(par["tf_all"], header=None, names=['gene_name'])
diff --git a/src/methods/single_omics/grnboost2/script.py b/src/methods/single_omics/grnboost2/script.py
index 510d273fb..cf983a9e2 100644
--- a/src/methods/single_omics/grnboost2/script.py
+++ b/src/methods/single_omics/grnboost2/script.py
@@ -22,14 +22,15 @@
gene_names = adata_rna.var.gene_ids.index.to_numpy()
X = adata_rna.X.toarray()
-# Remove genes with >=90% of zeros
-mask = (np.mean(X == 0, axis=0) >= 0.9)
-X = X[:, ~mask]
-gene_names = gene_names[~mask]
-
-# Remove samples with >=90% of zeros
-mask = (np.mean(X == 0, axis=1) >= 0.9)
-adata_rna = X[~mask, :]
+if False:
+ # Remove genes with >=90% of zeros
+ mask = (np.mean(X == 0, axis=0) >= 0.9)
+ X = X[:, ~mask]
+ gene_names = gene_names[~mask]
+
+ # Remove samples with >=90% of zeros
+ mask = (np.mean(X == 0, axis=1) >= 0.9)
+ adata_rna = X[~mask, :]
# Load list of putative TFs
df = pd.read_csv(par["tf_all"], header=None, names=['gene_name'])
diff --git a/src/methods/single_omics/pidc/config.vsh.yaml b/src/methods/single_omics/pidc/config.vsh.yaml
new file mode 100644
index 000000000..80db737e3
--- /dev/null
+++ b/src/methods/single_omics/pidc/config.vsh.yaml
@@ -0,0 +1,50 @@
+functionality:
+ name: pidc
+ info:
+ label: pidc
+ summary: "GRN inference using PIDC"
+ description: |
+ GRN inference using PIDC.
+ documentation_url: https://rdrr.io/github/hmutpw/PIDC/
+ arguments:
+ - name: --multiomics_rna
+ type: file
+ example: resources/grn-benchmark/multiomics_rna.h5ad
+ info:
+ label: multiomics_rna
+ summary: "Multiomics RNA data"
+ file_type: rds
+ columns:
+ - name: dummpy
+ type: string
+ required: false
+ required: true
+ must_exist: true
+ - name: --prediction
+ __merge__: ../../api/file_prediction.yaml
+ required: true
+ direction: output
+ - name: --max_n_links
+ type: integer
+ default: 50000
+ - name: --temp_dir
+ type: file
+ direction: output
+ default: 'output/pidc'
+ - name: --num_workers
+ type: integer
+ direction: input
+ default: 4
+ resources:
+ - type: python_script
+ path: script.py
+ - type: file
+ path: pidc.jl
+
+platforms:
+ - type: docker
+ image: apassemi/pidc:latest
+ - type: native
+ - type: nextflow
+ directives:
+ label: [midtime,midmem,midcpu]
diff --git a/src/methods/single_omics/pidc/pidc.jl b/src/methods/single_omics/pidc/pidc.jl
new file mode 100644
index 000000000..fd4a51808
--- /dev/null
+++ b/src/methods/single_omics/pidc/pidc.jl
@@ -0,0 +1,11 @@
+using NetworkInference
+using LightGraphs
+
+algorithm = PIDCNetworkInference()
+dataset_name = string(ARGS[1])
+
+genes = get_nodes(dataset_name);
+
+network = InferredNetwork(algorithm, genes);
+
+write_network_file(string(ARGS[2]), network);
diff --git a/src/methods/single_omics/pidc/script.py b/src/methods/single_omics/pidc/script.py
new file mode 100644
index 000000000..24ada09e3
--- /dev/null
+++ b/src/methods/single_omics/pidc/script.py
@@ -0,0 +1,58 @@
+import os
+import subprocess
+import pathlib
+
+import anndata
+import numpy as np
+import pandas as pd
+
+
+## VIASH START
+par = {
+ 'multiomics_rna': 'resources_test/grn-benchmark/multiomics_rna.h5ad',
+ 'tf_all': 'resources_test/prior/tf_all.csv',
+ 'prediction': 'output/pidc/prediction.csv',
+ 'temp_dir': 'output/pidc',
+ 'max_n_links': 50000
+}
+## VIASH END
+
+# Load scRNA-seq data
+adata_rna = anndata.read_h5ad(par['multiomics_rna'])
+gene_names = adata_rna.var.gene_ids.index.to_numpy()
+df_rna = pd.DataFrame(adata_rna.X.toarray(), index=adata_rna.obs.index, columns=adata_rna.var.index)
+
+# Remove genes with >=90% of zeros
+# mask = (np.mean(df_rna.to_numpy() == 0, axis=0) >= 0.9)
+# df_rna = df_rna.loc[:, ~mask]
+
+# # Remove samples with >=90% of zeros
+# mask = (np.mean(df_rna.to_numpy() == 0, axis=1) >= 0.9)
+# df_rna = df_rna.loc[~mask, :]
+
+# (genes x samples) format is needed
+df_rna = df_rna.transpose()
+
+# Save expression data
+if not os.path.exists(os.path.join(par['temp_dir'], 'multiomics_rna.tsv')):
+ os.makedirs(par['temp_dir'], exist_ok=True)
+ df_rna.to_csv(os.path.join(par['temp_dir'], 'multiomics_rna.tsv'), sep='\t', index=True, header=True)
+
+# Run PIDC
+# meta = {
+# "resources_dir":'src/methods/single_omics/pidc/'
+# }
+JL_SCRIPT_PATH = os.path.join(meta['resources_dir'], 'pidc.jl')
+subprocess.run([
+ 'julia',
+ JL_SCRIPT_PATH,
+ os.path.join(par['temp_dir'], 'multiomics_rna.tsv'),
+ os.path.join(par['temp_dir'], 'output.tsv'),
+])
+
+# Re-format output
+df = pd.read_csv(os.path.join(par['temp_dir'], 'output.tsv'), header=None, names=['source', 'target', 'weight'], sep='\t')
+df = df.head(par['max_n_links'])
+df.to_csv(par['prediction'], header=True, sep=',')
+
+print('Finished.')
diff --git a/src/methods/single_omics/pidc/test.sh b/src/methods/single_omics/pidc/test.sh
new file mode 100644
index 000000000..8c47f3a0d
--- /dev/null
+++ b/src/methods/single_omics/pidc/test.sh
@@ -0,0 +1,2 @@
+viash build src/methods/pidc/config.vsh.yaml -p docker -o bin/pidc && bin/pidc/pidc --multiomics_rna resources/resources_test/grn-benchmark/multiomics_rna.h5ad --prediction output/pidc/prediction.csv
+#viash run src/methods/pidc/config.vsh.yaml -p docker -- ---setup build && bin/pidc/pidc --multiomics_rna resources/resources_test/grn-benchmark/multiomics_rna.h5ad --prediction output/pidc/prediction.csv
\ No newline at end of file
diff --git a/src/methods/single_omics/portia/script.py b/src/methods/single_omics/portia/script.py
index 9aecdaf39..542df3b34 100644
--- a/src/methods/single_omics/portia/script.py
+++ b/src/methods/single_omics/portia/script.py
@@ -23,13 +23,15 @@
X = adata_rna.X.toarray() if scipy.sparse.issparse(adata_rna.X) else adata_rna.X
# Remove genes with >=90% of zeros
-mask = (np.mean(X == 0, axis=0) >= 0.9)
-X = X[:, ~mask]
-gene_names = gene_names[~mask]
-
-# Remove samples with >=90% of zeros
-mask = (np.mean(X == 0, axis=1) >= 0.9)
-adata_rna = X[~mask, :]
+if False:
+ # Remove genes with >=90% of zeros
+ mask = (np.mean(X == 0, axis=0) >= 0.9)
+ X = X[:, ~mask]
+ gene_names = gene_names[~mask]
+
+ # Remove samples with >=90% of zeros
+ mask = (np.mean(X == 0, axis=1) >= 0.9)
+ adata_rna = X[~mask, :]
# Load list of putative TFs
df = pd.read_csv(par['tf_all'], header=None, names=['gene_name'])
diff --git a/src/methods/single_omics/ppcor/script.R b/src/methods/single_omics/ppcor/script.R
index 7d458e92f..c05516136 100644
--- a/src/methods/single_omics/ppcor/script.R
+++ b/src/methods/single_omics/ppcor/script.R
@@ -18,14 +18,15 @@ geneNames <- colnames(inputExpr)
colnames(inputExpr) <- c(geneNames)
X <- as.matrix(inputExpr)
-# Keep genes with less than 10% of zeros
-mask <- (apply(X, 2, function(x) mean(x != 0)) >= 0.1)
-X <- X[, mask]
-geneNames <- geneNames[mask]
-
-# Keep samples with less than 10% of zeros
-mask <- (apply(X, 1, function(x) mean(x != 0)) >= 0.1)
-X <- X[mask,]
+
+# # Keep genes with less than 10% of zeros
+# mask <- (apply(X, 2, function(x) mean(x != 0)) >= 0.1)
+# X <- X[, mask]
+# geneNames <- geneNames[mask]
+
+# # Keep samples with less than 10% of zeros
+# mask <- (apply(X, 1, function(x) mean(x != 0)) >= 0.1)
+# X <- X[mask,]
# Run GRN inference method
pcorResults = pcor(x = X, method = "pearson")
diff --git a/src/methods/single_omics/scenic/script.py b/src/methods/single_omics/scenic/script.py
index 28f4cd46d..fb244917f 100644
--- a/src/methods/single_omics/scenic/script.py
+++ b/src/methods/single_omics/scenic/script.py
@@ -1,14 +1,8 @@
import os
-
import anndata
import numpy as np
import pandas as pd
-# from contextlib_chdir import chdir
import subprocess
-# from urllib.request import urlretrieve
-import zipfile
-from arboreto.algo import grnboost2
-from distributed import Client
import ast
# wget https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg38/refseq_r80/mc_v10_clust/gene_based/hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather
@@ -26,150 +20,77 @@
'motif_annotation': 'resources_local/motifs-v10nr_clust-nr.hgnc-m0.001-o0.0.tbl',
'genes_vs_motifs_10k': 'resources_local/hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather',
'genes_vs_motifs_500': 'resources_local/hg38_500bp_up_100bp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather',
-
'max_n_links': 50000,
'seed': "32"
}
## VIASH END
os.makedirs(par['temp_dir'], exist_ok=True)
-out_dir = 'resources_local'
-RANKINGS_DB_PATH = os.path.join(out_dir, 'cistarget-db', 'db.regions_vs_motifs.rankings.feather')
-SCORES_DB_PATH = os.path.join(out_dir, 'cistarget-db', 'db.regions_vs_motifs.scores.feather')
-
-# if not (os.path.exists(RANKINGS_DB_PATH) and os.path.exists(SCORES_DB_PATH)):
-
-# # Download create_cisTarget_databases
-# os.makedirs(os.path.join(out_dir, 'cistarget-db'), exist_ok=True)
-# if not os.path.exists(os.path.join(out_dir, 'cistarget-db', 'create_cisTarget_databases')):
-# with chdir(os.path.join(out_dir, 'cistarget-db')):
-# subprocess.run(['git', 'clone', 'https://github.com/aertslab/create_cisTarget_databases'])
-
-# # Download cluster-buster
-# if not os.path.exists(os.path.join(out_dir, 'cistarget-db', 'cbust')):
-# urlretrieve('https://resources.aertslab.org/cistarget/programs/cbust', os.path.join(out_dir, 'cistarget-db', 'cbust'))
-# subprocess.run(['chmod', 'a+x', os.path.join(out_dir, 'cistarget-db', 'cbust')])
-
-# # Download motif collection
-# if not os.path.exists(os.path.join(out_dir, 'cistarget-db', 'v10nr_clust_public')):
-# urlretrieve(
-# 'https://resources.aertslab.org/cistarget/motif_collections/v10nr_clust_public/v10nr_clust_public.zip',
-# os.path.join(out_dir, 'cistarget-db', 'v10nr_clust_public.zip')
-# )
-# with zipfile.ZipFile(os.path.join(out_dir, 'cistarget-db', 'v10nr_clust_public.zip'), 'r') as zip_ref:
-# zip_ref.extractall(os.path.join(out_dir, 'cistarget-db'))
-
-# # Download chromosome sizes
-# if not os.path.exists(os.path.join(out_dir, 'cistarget-db', 'hg38.chrom.sizes')):
-# urlretrieve(
-# 'https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.chrom.sizes',
-# os.path.join(out_dir, 'cistarget-db', 'hg38.chrom.sizes')
-# )
-
-# # Download reference genome
-# if not os.path.exists(os.path.join(out_dir, 'cistarget-db', 'hg38.fa')):
-# print('Downloading reference genome', flush=True)
-# if not os.path.exists(os.path.join(out_dir, 'cistarget-db', 'hg38.fa.gz')):
-# urlretrieve(
-# 'https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz',
-# os.path.join(out_dir, 'cistarget-db', 'hg38.fa.gz')
-# )
-# with gzip.open(os.path.join(out_dir, 'cistarget-db', 'hg38.fa.gz'), 'rb') as f_in:
-# with open(os.path.join(out_dir, 'cistarget-db', 'hg38.fa'), 'wb') as f_out:
-# shutil.copyfileobj(f_in, f_out)
-
-# # Prepare fasta from consensus regions
-# if not os.path.exists(os.path.join(out_dir, 'cistarget-db', 'hg38.with_1kb_bg_padding.fa')):
-# subprocess.run([
-# os.path.join(out_dir, 'cistarget-db', 'create_cisTarget_databases', 'create_fasta_with_padded_bg_from_bed.sh'),
-# os.path.join(out_dir, 'cistarget-db', 'hg38.fa'),
-# os.path.join(out_dir, 'cistarget-db', 'hg38.chrom.sizes'),
-# os.path.join(out_dir, 'consensus_peak_calling', 'consensus_regions.bed'),
-# os.path.join(out_dir, 'cistarget-db', 'hg38.with_1kb_bg_padding.fa'),
-# '1000',
-# 'yes'
-# ])
-
-# # Create cistarget databases
-# with open(os.path.join(out_dir, 'cistarget-db', 'motifs.txt'), 'w') as f:
-# for filename in os.listdir(os.path.join(out_dir, 'cistarget-db', 'v10nr_clust_public', 'singletons')):
-# f.write(f'{filename}\n')
-# with chdir(os.path.join(out_dir, 'cistarget-db')):
-# subprocess.run([
-# 'python',
-# os.path.join(out_dir, 'cistarget-db', 'create_cisTarget_databases', 'create_cistarget_motif_databases.py'),
-# '-f', os.path.join(out_dir, 'cistarget-db', 'hg38.with_1kb_bg_padding.fa'),
-# '-M', os.path.join(out_dir, 'cistarget-db', 'v10nr_clust_public', 'singletons'),
-# '-m', os.path.join(out_dir, 'cistarget-db', 'motifs.txt'),
-# '-c', os.path.join(out_dir, 'cistarget-db', 'cbust'),
-# '-o', 'db',
-# '--bgpadding', '1000',
-# '-t', str(par['num_workers'])
-# ])
-
-
-
-
-# Load scRNA-seq data
-adata_rna = anndata.read_h5ad(par['multiomics_rna'])
-adata_rna = adata_rna[adata_rna.obs.donor_id=='donor_0',] #TODO: to go
-gene_names = adata_rna.var.gene_ids.index.to_numpy()
-X = adata_rna.X.toarray()
-
# Load list of putative TFs
# df = pd.read_csv(par["tf_all"], header=None, names=['gene_name'])
# tfs = set(list(df['gene_name']))
# tf_names = [gene_name for gene_name in gene_names if (gene_name in tfs)]
-# format output
-expression_data = os.path.join(par['temp_dir'], "expression_data.tsv")
-
-pd.DataFrame(X, columns=gene_names).to_csv(expression_data, sep='\t', index=False)
expr_mat_adjacencies = os.path.join(par['temp_dir'], "expr_mat_adjacencies.tsv")
-
-
-print('Run grn')
-command = [
- "pyscenic", "grn",
- "--num_workers", par['max_workers'],
- "--seed", par['seed'],
- "-o", expr_mat_adjacencies,
- expression_data,
- par['tf_all'],
-
-]
-subprocess.run(command, check=True)
-
-
-print('Run prune')
+expression_data = os.path.join(par['temp_dir'], "expression_data.tsv")
regulons = f"{par['temp_dir']}/regulons.csv"
-command = [
- "pyscenic", "ctx",
- expr_mat_adjacencies, par['genes_vs_motifs_500'], par['genes_vs_motifs_10k'],
- "--annotations_fname", par['motif_annotation'],
- "--expression_mtx_fname", expression_data,
- "--mode", "custom_multiprocessing",
- "--output", regulons,
- "--num_workers", par['max_workers']
-]
-subprocess.run(command, check=True)
-
-print('Format regulons')
-regulons = pd.read_csv(regulons, index_col=0, skiprows=1)['TargetGenes'].reset_index().iloc[1:,:]
-
-def format_it(df):
- values = df['TargetGenes'].values[0]
- genes = []
- weights = []
- for value in ast.literal_eval(values):
- genes.append(value[0])
- weights.append(value[1])
- return pd.DataFrame({'target':genes, 'weight':weights})
-grn = regulons.groupby('index').apply(lambda df: format_it(df)).reset_index().rename(columns={'index':'source'})
-network = grn[['source','target','weight']]
-
+def format_data(par):
+ print('Read data')
+ adata_rna = anndata.read_h5ad(par['multiomics_rna'])
+ gene_names = adata_rna.var_names
+ pd.DataFrame(adata_rna.X.todense(), columns=gene_names).to_csv(expression_data, sep='\t', index=False)
+
+
+def run_grn(par):
+ print('Run grn')
+ command = [
+ "pyscenic", "grn",
+ "--num_workers", par['max_workers'],
+ "--seed", par['seed'],
+ "-o", expr_mat_adjacencies,
+ "--method", "grnboost2",
+ expression_data,
+ par['tf_all']
+ ]
+ subprocess.run(command, check=True)
+
+def prune_grn(par):
+ print('Run prune')
+
+ command = [
+ "pyscenic", "ctx",
+ expr_mat_adjacencies, par['genes_vs_motifs_500'], par['genes_vs_motifs_10k'],
+ "--annotations_fname", par['motif_annotation'],
+ "--expression_mtx_fname", expression_data,
+ "--mode", "custom_multiprocessing",
+ "--output", regulons,
+ "--num_workers", par['max_workers']
+ ]
+
+ subprocess.run(command, check=True)
+def format_grn(par):
+
+ print('Format regulons')
+ regulons_df = pd.read_csv(regulons, index_col=0, skiprows=1)['TargetGenes'].reset_index().iloc[1:,:]
+
+ def format_it(df):
+ values = df['TargetGenes'].values[0]
+ genes = []
+ weights = []
+ for value in ast.literal_eval(values):
+ genes.append(value[0])
+ weights.append(value[1])
+ return pd.DataFrame({'target':genes, 'weight':weights})
+ grn = regulons_df.groupby('index').apply(lambda df: format_it(df)).reset_index().rename(columns={'index':'source'})
+ network = grn[['source','target','weight']]
+ return network
+
+format_data(par)
+run_grn(par)
+prune_grn(par)
+network = format_grn(par)
network.to_csv(par['prediction'], sep=',')
print('Finished.')
diff --git a/src/methods/single_omics/scsgl/script.py b/src/methods/single_omics/scsgl/script.py
index 4f61c5b51..11cc9cc5d 100644
--- a/src/methods/single_omics/scsgl/script.py
+++ b/src/methods/single_omics/scsgl/script.py
@@ -30,13 +30,13 @@
from pysrc.graphlearning import learn_signed_graph
# Remove genes with >=90% of zeros
-mask = (np.mean(X == 0, axis=0) >= 0.75)
-X = X[:, ~mask]
-gene_names = gene_names[~mask]
+# mask = (np.mean(X == 0, axis=0) >= 0.75)
+# X = X[:, ~mask]
+# gene_names = gene_names[~mask]
-# Remove samples with >=90% of zeros
-mask = (np.mean(X == 0, axis=1) >= 0.75)
-X = X[~mask, :]
+# # Remove samples with >=90% of zeros
+# mask = (np.mean(X == 0, axis=1) >= 0.75)
+# X = X[~mask, :]
# Run scSGL
print('Starting scSGL', flush=True)
diff --git a/src/methods/single_omics/tigress/script.R b/src/methods/single_omics/tigress/script.R
index 7427c38dc..766c768e7 100644
--- a/src/methods/single_omics/tigress/script.R
+++ b/src/methods/single_omics/tigress/script.R
@@ -19,16 +19,16 @@ X <- as.matrix(ad$X)
gene_names <- colnames(ad)
# Remove genes with > 90% of zeros
-zero_proportion <- colMeans(X == 0)
-mask <- (zero_proportion <= 0.9)
-X <- X[, mask]
-gene_names <- gene_names[mask]
-colnames(X) <- gene_names
-
-# Remove samples with > 90% of zeros
-zero_proportion <- rowMeans(X == 0)
-mask <- (zero_proportion <= 0.9)
-X <- X[mask,]
+# zero_proportion <- colMeans(X == 0)
+# mask <- (zero_proportion <= 0.9)
+# X <- X[, mask]
+# gene_names <- gene_names[mask]
+# colnames(X) <- gene_names
+
+# # Remove samples with > 90% of zeros
+# zero_proportion <- rowMeans(X == 0)
+# mask <- (zero_proportion <= 0.9)
+# X <- X[mask,]
# Load list of putative TFs
dat <- read.csv(par$tf_all, header = FALSE)
diff --git a/src/metrics/regression_1/config.vsh.yaml b/src/metrics/regression_1/config.vsh.yaml
index a6fefcba0..25729eb86 100644
--- a/src/metrics/regression_1/config.vsh.yaml
+++ b/src/metrics/regression_1/config.vsh.yaml
@@ -9,10 +9,11 @@ functionality:
Calculates R2 score using regression approach 1.
arguments:
- name: --min_tf
- type: integer
+ type: boolean
direction: input
description: calculate the scores for the given min tfs in addition to the default
required: false
+ default: false
resources:
- type: python_script
path: script.py
diff --git a/src/metrics/regression_1/main.py b/src/metrics/regression_1/main.py
index c5de93f7a..a94315cb2 100644
--- a/src/metrics/regression_1/main.py
+++ b/src/metrics/regression_1/main.py
@@ -152,8 +152,8 @@ def regression_1(
net_sub = net[net.cell_type==cell_type]
else:
net_sub = net
-
- net_sub = select_top_links(net_sub, par) #only top links are considered
+ if len(net_sub)>par['max_n_links']:
+ net_sub = select_top_links(net_sub, par) #only top links are considered
prturb_adata_sub = prturb_adata[prturb_adata.obs.cell_type==cell_type,:]
y_true_sub, y_pred_sub = cross_validation(net_sub, prturb_adata_sub, par)
@@ -242,7 +242,7 @@ def main(par):
print(f'Compute metrics for layer: {layer}', flush=True)
tfs_cases = [-1]
if par['min_tf']:
- tfs_cases += par['min_tf']
+ tfs_cases += 30
layer_results = {} # Store results for this layer
for exclude_missing_genes in [False, True]: # two settings on target gene
for tf_n in tfs_cases: # two settings on tfs
diff --git a/src/metrics/regression_1/script.py b/src/metrics/regression_1/script.py
index cba39bbdb..646d2b115 100644
--- a/src/metrics/regression_1/script.py
+++ b/src/metrics/regression_1/script.py
@@ -6,17 +6,22 @@
## VIASH START
par = {
"perturbation_data": "resources/grn-benchmark/perturbation_data.h5ad",
- "prediction": "resources/grn-benchmark/grn_models/collectri.csv",
+ "tf_all": "resources/prior/tf_all.csv",
+ "prediction": "output/scenic/scenic.csv",
+ "method_id": "scenic",
+ "min_tf": False,
+ "max_n_links": 50000,
+ "apply_tf": "true",
'score': 'output/score.h5ad',
'reg_type': 'ridge',
'layer': 'pearson',
'subsample': 200,
'max_workers': 4,
}
-print(par)
-
-
## VIASH END
+meta = {
+ "resources_dir":'src/metrics/regression_1/'
+}
sys.path.append(meta["resources_dir"])
from main import main
diff --git a/src/workflows/run_benchmark_single_omics/___main.nf b/src/workflows/run_benchmark_single_omics/___main.nf
deleted file mode 100644
index 2fa73958f..000000000
--- a/src/workflows/run_benchmark_single_omics/___main.nf
+++ /dev/null
@@ -1,315 +0,0 @@
-workflow auto {
- findStatesTemp(params, meta.config)
- | meta.workflow.run(
- auto: [publish: "state"]
- )
-}
-
-workflow run_wf {
- take:
- input_ch
-
- main:
-
- // construct list of methods
- methods = [
- portia
- ]
-
- // construct list of metrics
- metrics = [
- regression_1
- ]
-
- /****************************
- * EXTRACT DATASET METADATA *
- ****************************/
- dataset_ch = input_ch
- // store join id
- | map{ id, state ->
- [id, state + ["_meta": [join_id: id]]]
- }
-
- // extract the dataset metadata
- // | extract_metadata.run(
- // fromState: [input: "input_solution"],
- // toState: { id, output, state ->
- // state + [
- // dataset_uns: readYaml(output.output).uns
- // ]
- // }
- // )
-
- /***************************
- * RUN METHODS AND METRICS *
- ***************************/
- score_ch = dataset_ch
-
- // run all methods
- | runEach(
- components: methods,
-
- // use the 'filter' argument to only run a method on the normalisation the component is asking for
- // filter: { id, state, comp ->
- // def norm = state.dataset_uns.normalization_id
- // def pref = comp.config.info.preferred_normalization
- // // if the preferred normalisation is none at all,
- // // we can pass whichever dataset we want
- // def norm_check = (norm == "log_cp10k" && pref == "counts") || norm == pref
- // def method_check = !state.method_ids || state.method_ids.contains(comp.config.name)
-
- // method_check && norm_check
- // },
-
- // define a new 'id' by appending the method name to the dataset id
- id: { id, state, comp ->
- id + "." + comp.config.name
- },
-
- // use 'fromState' to fetch the arguments the component requires from the overall state
- fromState: { id, state, comp ->
- def new_args = [
- multiomics_rna: state.multiomics_rna,
- tf_all: state.tf_all,
- ]
- if (comp.config.info.type == "control_method") {
- new_args.input_solution = state.input_solution
- }
- new_args
- },
-
- // use 'toState' to publish that component's outputs to the overall state
- toState: { id, output, state, comp ->
- state + [
- method_id: comp.config.name,
- prediction: output.prediction
- ]
- }
- )
-
- // run all metrics
- | runEach(
- components: metrics,
- id: { id, state, comp ->
- id + "." + comp.config.name
- },
- // use 'fromState' to fetch the arguments the component requires from the overall state
- fromState: [
- perturbation_data: "perturbation_data",
- prediction: "prediction",
- subsample: "subsample",
- reg_type: "reg_type",
- max_workers: "max_workers",
- consensus: "consensus",
- tf_all: "tf_all"
- ],
- // use 'toState' to publish that component's outputs to the overall state
- toState: { id, output, state, comp ->
- state + [
- metric_id: comp.config.name,
- metric_output: output.score
- ]
- }
- )
-
-
- /******************************
- * GENERATE OUTPUT YAML FILES *
- ******************************/
- // TODO: can we store everything below in a separate helper function?
-
- // extract the dataset metadata
- // dataset_meta_ch = dataset_ch
- // // // only keep one of the normalization methods
- // // | filter{ id, state ->
- // // state.dataset_uns.normalization_id == "log_cp10k"
- // // }
- // | joinStates { ids, states ->
- // // store the dataset metadata in a file
- // // def dataset_uns = states.collect{state ->
- // // def uns = state.dataset_uns.clone()
- // // uns.remove("normalization_id")
- // // uns
- // // }
- // def dataset_uns_yaml_blob = toYamlBlob(dataset_uns)
- // def dataset_uns_file = tempFile("dataset_uns.yaml")
- // dataset_uns_file.write(dataset_uns_yaml_blob)
-
- // // ["output", [output_dataset_info: dataset_uns_file]]
- // }
-
- output_ch = score_ch
-
- // extract the scores
- | extract_metadata.run(
- key: "extract_scores",
- fromState: [input: "metric_output"],
- toState: { id, output, state ->
- state + [
- score_uns: readYaml(output.output).uns
- ]
- }
- )
-
- | joinStates { ids, states ->
- // store the method configs in a file
- def method_configs = methods.collect{it.config}
- def method_configs_yaml_blob = toYamlBlob(method_configs)
- def method_configs_file = tempFile("method_configs.yaml")
- method_configs_file.write(method_configs_yaml_blob)
-
- // store the metric configs in a file
- def metric_configs = metrics.collect{it.config}
- def metric_configs_yaml_blob = toYamlBlob(metric_configs)
- def metric_configs_file = tempFile("metric_configs.yaml")
- metric_configs_file.write(metric_configs_yaml_blob)
-
- def viash_file = meta.resources_dir.resolve("_viash.yaml")
- def viash_file_content = toYamlBlob(readYaml(viash_file).info)
- def task_info_file = tempFile("task_info.yaml")
- task_info_file.write(viash_file_content)
-
- // store the scores in a file
- def score_uns = states.collect{it.score_uns}
- def score_uns_yaml_blob = toYamlBlob(score_uns)
- def score_uns_file = tempFile("score_uns.yaml")
- score_uns_file.write(score_uns_yaml_blob)
-
- def new_state = [
- output_method_configs: method_configs_file,
- output_metric_configs: metric_configs_file,
- output_task_info: task_info_file,
- output_scores: score_uns_file,
- _meta: states[0]._meta
- ]
-
- ["output", new_state]
- }
-
- // merge all of the output data
- // | mix(dataset_meta_ch)
- | joinStates{ ids, states ->
- def mergedStates = states.inject([:]) { acc, m -> acc + m }
- [ids[0], mergedStates]
- }
-
- emit:
- output_ch
-}
-
-// temp fix for rename_keys typo
-
-def findStatesTemp(Map params, Map config) {
- def auto_config = deepClone(config)
- def auto_params = deepClone(params)
-
- auto_config = auto_config.clone()
- // override arguments
- auto_config.argument_groups = []
- auto_config.arguments = [
- [
- type: "string",
- name: "--id",
- description: "A dummy identifier",
- required: false
- ],
- [
- type: "file",
- name: "--input_states",
- example: "/path/to/input/directory/**/state.yaml",
- description: "Path to input directory containing the datasets to be integrated.",
- required: true,
- multiple: true,
- multiple_sep: ";"
- ],
- [
- type: "string",
- name: "--filter",
- example: "foo/.*/state.yaml",
- description: "Regex to filter state files by path.",
- required: false
- ],
- // to do: make this a yaml blob?
- [
- type: "string",
- name: "--rename_keys",
- example: ["newKey1:oldKey1", "newKey2:oldKey2"],
- description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.",
- required: false,
- multiple: true,
- multiple_sep: ";"
- ],
- [
- type: "string",
- name: "--settings",
- example: '{"output_dataset": "dataset.h5ad", "k": 10}',
- description: "Global arguments as a JSON glob to be passed to all components.",
- required: false
- ]
- ]
- if (!(auto_params.containsKey("id"))) {
- auto_params["id"] = "auto"
- }
-
- // run auto config through processConfig once more
- auto_config = processConfig(auto_config)
-
- workflow findStatesTempWf {
- helpMessage(auto_config)
-
- output_ch =
- channelFromParams(auto_params, auto_config)
- | flatMap { autoId, args ->
-
- def globalSettings = args.settings ? readYamlBlob(args.settings) : [:]
-
- // look for state files in input dir
- def stateFiles = args.input_states
-
- // filter state files by regex
- if (args.filter) {
- stateFiles = stateFiles.findAll{ stateFile ->
- def stateFileStr = stateFile.toString()
- def matcher = stateFileStr =~ args.filter
- matcher.matches()}
- }
-
- // read in states
- def states = stateFiles.collect { stateFile ->
- def state_ = readTaggedYaml(stateFile)
- [state_.id, state_]
- }
-
- // construct renameMap
- if (args.rename_keys) {
- def renameMap = args.rename_keys.collectEntries{renameString ->
- def split = renameString.split(":")
- assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey;newKey:oldKey'"
- split
- }
-
- // rename keys in state, only let states through which have all keys
- // also add global settings
- states = states.collectMany{id, state ->
- def newState = [:]
-
- for (key in renameMap.keySet()) {
- def origKey = renameMap[key]
- if (!(state.containsKey(origKey))) {
- return []
- }
- newState[key] = state[origKey]
- }
-
- [[id, globalSettings + newState]]
- }
- }
-
- states
- }
- emit:
- output_ch
- }
-
- return findStatesTempWf
-}
\ No newline at end of file
diff --git a/src/workflows/run_benchmark_single_omics/__main.nf b/src/workflows/run_benchmark_single_omics/__main.nf
deleted file mode 100644
index 1240d344f..000000000
--- a/src/workflows/run_benchmark_single_omics/__main.nf
+++ /dev/null
@@ -1,246 +0,0 @@
-// construct list of methods
-methods = [
- portia
-]
-
-// construct list of metrics
-metrics = [
- regression_1
-]
-
-// helper workflow for starting a workflow based on lists of yaml files
-workflow auto {
- findStates(params, meta.config)
- | meta.workflow.run(
- auto: [publish: "state"]
- )
-}
-
-// benchmarking workflow
-workflow run_wf {
- take:
- input_ch
-
- main:
-
- /***************************
- * RUN METHODS AND METRICS *
- ***************************/
- score_ch = input_ch
-
- | run_benchmark_fun(
- methods: methods,
- metrics: metrics,
- methodFromState: { id, state, comp ->
- def new_args = [
- multiomics_rna: state.multiomics_rna,
- tf_all: state.tf_all,
- prediction: 'predictions/$id.$key.output.h5ad',
- output_model: null
- ]
- if (comp.config.functionality.info.type == "control_method") {
- new_args.de_test_h5ad = state.de_test_h5ad
- }
- new_args
- },
- methodToState: ["prediction": "prediction"],
- metricFromState: [
- perturbation_data: "perturbation_data",
- prediction: "prediction",
- subsample: "subsample",
- reg_type: "reg_type",
- max_workers: "max_workers",
- consensus: "consensus",
- tf_all: "tf_all"
- ],
- metricToState: ["metric_output": "output"],
- methodAuto: [publish: "state"]
- )
- | joinStates { ids, states ->
- def score_uns = states.collect{it.score_uns}
- def score_uns_yaml_blob = toYamlBlob(score_uns)
- def score_uns_file = tempFile("score_uns.yaml")
- score_uns_file.write(score_uns_yaml_blob)
-
- ["output", [scores: score_uns_file]]
- }
-
- /******************************
- * GENERATE OUTPUT YAML FILES *
- ******************************/
- // create dataset, method and metric metadata files
- metadata_ch = input_ch
- | create_metadata_files(
- datasetFromState: [input: "multiomics_rna"],
- methods: methods,
- metrics: metrics,
- meta: meta
- )
-
- // merge all of the output data
- output_ch = score_ch
- | mix(metadata_ch)
- | joinStates{ ids, states ->
- def mergedStates = states.inject([:]) { acc, m -> acc + m }
- [ids[0], mergedStates]
- }
-
- emit:
- output_ch
-}
-
-
-
-
-
-def run_benchmark_fun(args) {
- // required args
- def methods_ = args.methods
- def metrics_ = args.metrics
- def methodFromState = args.methodFromState
- def methodToState = args.methodToState
- def metricFromState = args.metricFromState
- def metricToState = args.metricToState
-
- assert methods_, "methods must be defined"
- assert metrics_, "metrics must be defined"
- assert methodFromState, "methodFromState must be defined"
- assert methodToState, "methodToState must be defined"
- assert metricFromState, "metricFromState must be defined"
- assert metricToState, "metricToState must be defined"
-
- // optional args
- def keyPrefix = args.keyPrefix ?: ""
- def methodAuto = args.methodAuto ?: [:]
- def metricAuto = args.metricAuto ?: [:]
-
- // add the key prefix to the method and metric names
- if (keyPrefix && keyPrefix != "") {
- methods_ = methods.collect{ method ->
- method.run(key: keyPrefix + method.config.functionality.name)
- }
- metrics_ = metrics.collect{ metric ->
- metric.run(key: keyPrefix + metric.config.functionality.name)
- }
- }
-
- workflow bench {
- take: input_ch
-
- main:
- output_ch = input_ch
- // run all methods
- | runEach(
- components: methods_,
- filter: { id, state, comp ->
- !state.method_ids || state.method_ids.contains(comp.config.functionality.name)
- },
- id: { id, state, comp ->
- id + "." + comp.config.functionality.name
- },
- fromState: methodFromState,
- toState: methodToState,
- auto: methodAuto
- )
-
- // run all metrics
- | runEach(
- components: metrics_,
- filter: { id, state, comp ->
- !state.metric_ids || state.metric_ids.contains(comp.config.functionality.name)
- },
- id: { id, state, comp ->
- id + "." + comp.config.functionality.name
- },
- fromState: metricFromState,
- toState: metricToState,
- auto: metricAuto
- )
-
- // extract the scores
- | extract_metadata.run(
- key: "${keyPrefix}score_uns",
- fromState: [input: "metric_output"],
- toState: { id, output, state ->
- state + [
- score_uns: readYaml(output.output).uns
- ]
- }
- )
-
- emit: output_ch
- }
- return bench
-}
-
-
-def create_metadata_files(args) {
- // required args
- def meta_ = args.meta
- def methods_ = args.methods
- def metrics_ = args.metrics
- def datasetFromState = args.datasetFromState
-
- assert meta_, "meta must be defined"
- assert methods_, "methods must be defined"
- assert metrics_, "metrics must be defined"
- assert datasetFromState, "datasetFromState must be defined"
-
- workflow metadata {
- take: input_ch
-
- main:
- output_ch = input_ch
-
- | map{ id, state ->
- [id, state + ["_meta": [join_id: id]]]
- }
-
- | extract_metadata.run(
- key: "dataset_uns",
- fromState: args.datasetFromState,
- toState: { id, output, state ->
- state + [
- dataset_info: readYaml(output.output).uns
- ]
- }
- )
-
- | joinStates { ids, states ->
- assert states.size() > 0, "no states found"
- assert states[0]._meta, "no _meta found in state[0]"
- assert states.every{it.dataset_info}, "not all states have dataset_info"
-
- // combine the dataset info into one file
- def dataset_uns = states.collect{it.dataset_info}
- def dataset_uns_yaml_blob = toYamlBlob(dataset_uns)
- def dataset_uns_file = tempFile("dataset_uns.yaml")
- dataset_uns_file.write(dataset_uns_yaml_blob)
-
- // store the method configs in a file
- def method_configs = methods_.collect{it.config}
- def method_configs_yaml_blob = toYamlBlob(method_configs)
- def method_configs_file = tempFile("method_configs.yaml")
- method_configs_file.write(method_configs_yaml_blob)
-
- // store the metric configs in a file
- def metric_configs = metrics_.collect{it.config}
- def metric_configs_yaml_blob = toYamlBlob(metric_configs)
- def metric_configs_file = tempFile("metric_configs.yaml")
- metric_configs_file.write(metric_configs_yaml_blob)
-
- def task_info_file = meta_.resources_dir.resolve("task_info.yaml")
-
- def new_state = [
- dataset_uns: dataset_uns_file,
- method_configs: method_configs_file,
- metric_configs: metric_configs_file,
- task_info: task_info_file,
- _meta: states[0]._meta
- ]
- ["output", new_state]
- }
- emit: output_ch
- }
- return metadata
-}
diff --git a/src/workflows/run_benchmark_single_omics/_main.nf b/src/workflows/run_benchmark_single_omics/_main.nf
deleted file mode 100644
index d3eb8ee16..000000000
--- a/src/workflows/run_benchmark_single_omics/_main.nf
+++ /dev/null
@@ -1,129 +0,0 @@
-
-workflow auto {
- findStatesTemp(params, meta.config)
- | meta.workflow.run(
- auto: [publish: "state"]
- )
-}
-
-workflow run_wf {
- take:
- input_ch
-
- main:
-
- // construct list of metrics
- metrics = [
- regression_1
- ]
-
- /***************************
- * RUN METRICS *
- ***************************/
- score_ch = input_ch
- | map{ id, state ->
- [id, state + ["_meta": [join_id: id]]]
- }
-
- | positive_control.run(
- runIf: { id, state ->
- state.method_id == 'positive_control'
- },
- fromState: [
- perturbation_data: "perturbation_data",
- layer: "layer",
- tf_all: "tf_all"
- ],
- toState: {id, output, state ->
- state + [
- prediction: output.prediction
- ]
- }
- )
- | negative_control.run(
- runIf: { id, state ->
- state.method_id == 'negative_control'
- },
- fromState: [
- perturbation_data: "perturbation_data"
- ],
- toState: {id, output, state ->
- state + [
- prediction: output.prediction
- ]
- }
- )
-
- // run all metrics
- | runEach(
- components: metrics,
- id: { id, state, comp ->
- id + "." + comp.config.functionality.name
- },
- // use 'fromState' to fetch the arguments the component requires from the overall state
- fromState: [
- perturbation_data: "perturbation_data",
- layer: "layer",
- prediction: "prediction",
- subsample: "subsample",
- reg_type: "reg_type",
- method_id: "method_id",
- max_workers: "max_workers",
- consensus: "consensus"
- ],
- // use 'toState' to publish that component's outputs to the overall state
- toState: { id, output, state, comp ->
- state + [
- metric_id: comp.config.functionality.name,
- metric_output: output.score
- ]
- }
- )
-
- output_ch = score_ch
-
- // extract the scores
- | extract_metadata.run(
- key: "extract_scores",
- fromState: [input: "metric_output"],
- toState: { id, output, state ->
- state + [
- score_uns: readYaml(output.output).uns
- ]
- }
- )
-
- | joinStates { ids, states ->
- assert states[0]._meta, "no _meta found in state[0]"
- // store the metric configs in a file
- def metric_configs = metrics.collect{it.config}
- def metric_configs_yaml_blob = toYamlBlob(metric_configs)
- def metric_configs_file = tempFile("metric_configs.yaml")
- metric_configs_file.write(metric_configs_yaml_blob)
-
- def task_info_file = meta.resources_dir.resolve("task_info.yaml")
-
- // store the scores in a file
- def score_uns = states.collect{it.score_uns}
- def score_uns_yaml_blob = toYamlBlob(score_uns)
- def score_uns_file = tempFile("score_uns.yaml")
- score_uns_file.write(score_uns_yaml_blob)
-
- def new_state = [
- metric_configs: metric_configs_file,
- scores: score_uns_file,
- _meta: states[0]._meta
- ]
-
- ["output", new_state]
- }
-
- // merge all of the output data
- | joinStates{ ids, states ->
- def mergedStates = states.inject([:]) { acc, m -> acc + m }
- [ids[0], mergedStates]
- }
-
- emit:
- output_ch
-}
diff --git a/src/workflows/run_benchmark_single_omics/config.vsh.yaml b/src/workflows/run_benchmark_single_omics/config.vsh.yaml
index aa6261341..8821ad9fb 100644
--- a/src/workflows/run_benchmark_single_omics/config.vsh.yaml
+++ b/src/workflows/run_benchmark_single_omics/config.vsh.yaml
@@ -93,6 +93,7 @@ functionality:
- name: grn_methods/scsgl
- name: grn_methods/tigress
- name: grn_methods/scgpt
+ - name: grn_methods/scenic
repositories:
- name: openproblems
type: github