Skip to content

Commit

Permalink
scenic added
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Sep 14, 2024
1 parent b5a6f0a commit 56460b4
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 4 deletions.
19 changes: 16 additions & 3 deletions runs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2583,11 +2583,24 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 131,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"download: s3://openproblems-data/resources_test/grn/results/single_omics_inference/state.yaml to rsesources/results/single_omics_inference/state.yaml\n",
"download: s3://openproblems-data/resources_test/grn/results/single_omics_inference/trace.txt to rsesources/results/single_omics_inference/trace.txt\n",
"download: s3://openproblems-data/resources_test/grn/results/single_omics_inference/scores.yaml to rsesources/results/single_omics_inference/scores.yaml\n",
"download: s3://openproblems-data/resources_test/grn/results/single_omics_inference/ridge.ennet.ennet.prediction.csv to rsesources/results/single_omics_inference/ridge.ennet.ennet.prediction.csv\n",
"download: s3://openproblems-data/resources_test/grn/results/single_omics_inference/ridge.pidc.pidc.prediction.csv to rsesources/results/single_omics_inference/ridge.pidc.pidc.prediction.csv\n",
"download: s3://openproblems-data/resources_test/grn/results/single_omics_inference/ridge.tigress.tigress.prediction.csv to rsesources/results/single_omics_inference/ridge.tigress.tigress.prediction.csv\n"
]
}
],
"source": [
"!aws s3 sync s3://openproblems-data/resources/grn/results/single_omics_all resources/results/single_omics_all"
"!aws s3 sync s3://openproblems-data/resources_test/grn/results/single_omics_inference rsesources/results/single_omics_inference"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion scripts/run_benchmark_single_omics.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
RUN_ID="single_omics_inference"
# resources_dir="./resources_test/"
resources_dir="s3://openproblems-data/resources_test/grn"
resources_dir="s3://openproblems-data/resources/grn"
publish_dir="${resources_dir}/results/${RUN_ID}"


Expand Down
1 change: 1 addition & 0 deletions scripts/run_grn_evaluation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ baseline_models=(
baseline_pearson_causal
baseline_pearson_causal_celltype
baseline_pearson_causal_metacell
baseline_pearson_causal_impute
positive_control
)
# Start writing to the YAML file
Expand Down
27 changes: 27 additions & 0 deletions src/methods/single_omics/scenic/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
__merge__: ../../../api/comp_method.yaml

functionality:
name: scenic
namespace: "grn_methods"
info:
label: scenic
summary: "GRN inference using scenic"


resources:
- type: python_script
path: script.py

platforms:
- type: docker
image: aertslab/pyscenic:0.12.1
setup:
# - type: docker
# run: |
# conda install -y -c bioconda arboreto pandas
- type: python
packages: [ anndata ]
- type: native
- type: nextflow
directives:
label: [onedaytime, midmem, midcpu]
71 changes: 71 additions & 0 deletions src/methods/single_omics/scenic/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import os

import anndata
import numpy as np
import pandas as pd
from arboreto.algo import grnboost2
from distributed import Client


## VIASH START
par = {
'multiomics_rna': 'resources_test/grn-benchmark/multiomics_rna.h5ad',
"tf_all": 'resources/prior/tf_all.csv',
'prediction': 'output/grnboost2/prediction.csv',
'max_n_links': 50000
}
## VIASH END
os.makedirs(par['temp_dir'], exist_ok=True)

# Load scRNA-seq data
adata_rna = anndata.read_h5ad(par['multiomics_rna'])
gene_names = adata_rna.var.gene_ids.index.to_numpy()
X = adata_rna.X.toarray()

# Load list of putative TFs
# df = pd.read_csv(par["tf_all"], header=None, names=['gene_name'])
# tfs = set(list(df['gene_name']))
# tf_names = [gene_name for gene_name in gene_names if (gene_name in tfs)]

# format output
expression_data = f"{par['temp_dir']}/expression_data.tsv"
pd.DataFrame(X, columns=gene_names).to_csv(expression_data, sep='\t', index=False)

expr_mat_adjacencies = f"{par['temp_dir']}/expr_mat_adjacencies.tsv"
command = [
"pyscenic", "grn",
"--num_workers", par['max_workers'],
"-o", expr_mat_adjacencies,
expression_data,
par['tf_all']
]

# Run grn
import subprocess
subprocess.run(command, check=True)


# Run prune
regulons = f"{par['temp_dir']}/regulons.csv"
annotations_fname = "/data/motifs-v9-nr.hgnc-m0.001-o0.0.tbl"
ranking_1 = "/data/hg19-tss-centered-5kb-7species.mc9nr.genes_vs_motifs.rankings.feather "
ranking_2 = /data/hg19-tss-centered-10kb-7species.mc9nr.genes_vs_motifs.rankings.feather
command = [
"pyscenic", "ctx",
expr_mat_adjacencies, ranking_1, ranking_2,
"--annotations_fname", annotations_fname,
"--expression_mtx_fname", expression_data,
"--mode", "custom_multiprocessing",
"--output", regulons,
"--num_workers", par['max_workers']
]
subprocess.run(command, check=True)

# Save inferred GRN
print(expr_mat_adjacencies)
network = pd.read_csv(expr_mat_adjacencies, sep='\t')
network.to_csv(par['prediction'], sep=',')

print('Finished.')


1 change: 1 addition & 0 deletions src/methods/single_omics/scenic/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
viash run src/methods/single_omics/scenic/config.vsh.yaml -- --multiomics_rna resources_test/grn-benchmark/multiomics_rna.h5ad --tf_all resources/prior/tf_all.csv --prediction output/scenic_prediction.csv --temp_dir output/scenic

0 comments on commit 56460b4

Please sign in to comment.