Skip to content

Commit

Permalink
small changes to single omics
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Sep 30, 2024
1 parent 14b161a commit 8258d0a
Show file tree
Hide file tree
Showing 12 changed files with 640 additions and 163 deletions.
519 changes: 438 additions & 81 deletions runs.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion scripts/sbatch/calculate_scores.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@
#SBATCH --mem=64G
#SBATCH --cpus-per-task=20

python src/metrics/script_all.py
python src/metrics/script_all.py
37 changes: 31 additions & 6 deletions src/control_methods/script_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import random

par = {
'write_dir': "resources/grn_models/d0_hvgs",
'models_dir': "resources/grn_models/d0_hvgs",
"multiomics_rna": "resources/grn-benchmark/multiomics_rna_d0_hvg.h5ad",

"perturbation_data": "resources/grn-benchmark/perturbation_data.h5ad",
Expand All @@ -18,30 +18,55 @@
'causal': True
}

import argparse
parser = argparse.ArgumentParser(description="Process multiomics RNA data.")
parser.add_argument('--multiomics_rna', type=str, help='Path to the multiomics RNA file')
parser.add_argument('--prediction', type=str, help='Path to the prediction file')
parser.add_argument('--resources_dir', type=str, help='Path to the prediction file')
parser.add_argument('--tf_all', type=str, help='Path to the tf_all')
parser.add_argument('--num_workers', type=str, help='Number of cores')
parser.add_argument('--models_dir', type=str, help='where to write the model')
args = parser.parse_args()

if args.multiomics_rna:
par['multiomics_rna'] = args.multiomics_rna
if args.tf_all:
par['tf_all'] = args.tf_all
if args.num_workers:
par['num_workers'] = args.num_workers
if args.models_dir:
par['models_dir'] = args.models_dir

if args.resources_dir:
meta = {'resources_dir': args.resources_dir}

meta = {
"resources_dir": 'src/control_methods',
"util": 'src/utils'
}
sys.path.append(meta["resources_dir"])
sys.path.append(meta["util"])

os.makedirs(par['write_dir'], exist_ok=True)
os.makedirs(par['models_dir'], exist_ok=True)

from util import create_corr_net


print(par)
#---- run for pearson_corr
par['prediction'] = f"{par['write_dir']}/pearson_corr.csv"
print("run for pearson_corr")
par['prediction'] = f"{par['models_dir']}/pearson_corr.csv"
par['causal'] = True
net = create_corr_net(par)
net.to_csv(par['prediction'])
#---- run for negative control
print("run for negative control")
from negative_control.main import main
par['prediction'] = f"{par['write_dir']}/negative_control.csv"
par['prediction'] = f"{par['models_dir']}/negative_control.csv"
net = main(par)
net.to_csv(par['prediction'])
#---- run for positive_control
print("run for positive control")
par['multiomics_rna'] = par['perturbation_data']
par['prediction'] = f"{par['write_dir']}/positive_control.csv"
par['prediction'] = f"{par['models_dir']}/positive_control.csv"
net = create_corr_net(par)
net.to_csv(par['prediction'])
63 changes: 60 additions & 3 deletions src/exp_analysis/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,63 @@ def plot_cumulative_density(data, title='', ax=None, s=1, **kwdgs):
# self.out_deg = degree_centrality(net, source='source', target='target', **kwargs)
# self.in_deg = degree_centrality(net, source='target', target='source', **kwargs)

class VSA_analysis:
'''Vester's sensitivity analysis.
- active_sum: active sum. Sum along rows of the influence matrix and it indicates how much does a variable influence all the others.
- passive_sum: passive sum. Its is the sum along columns of the influence matrix and it indicates how sensitive a variable is, how does it react to the influence of others
'''
def __init__(self, sample: pd.DataFrame, control: pd.DataFrame, mode='weight', top_q_net = 0.75, critical_change_q_t: float = 0.75):
# - keep only top quantile links
control = control[control.weight > control.weight.quantile(top_q_net)]
sample = sample[sample.weight > sample.weight.quantile(top_q_net)]

print('Determine roles')
self.vsa_control = self.determine_roles(control)
self.vsa_sample = self.determine_roles(sample)
print('Determine role change')
self.oo = self.diff_roles(self.vsa_control, self.vsa_sample, critical_change_q_t)

@staticmethod
def determine_roles(net, mode='weight', top_q_role=0.9) -> pd.DataFrame:
# active sum and passive sum
gene_names = np.union1d(net.source.unique(), net.target.unique())
if mode=='weight':
active_sum = np.asarray([sum(net.query(f"source == '{gene}'")['weight']) for gene in gene_names])
passive_sum = np.asarray([sum(net.query(f"target == '{gene}'")['weight']) for gene in gene_names])
else:
raise ValueError('define first')

# define the roles ['Buffering', 'Passive', 'Active', 'Critical'] -> [0, 1, 2, 3]
active_sum_threhsold = np.quantile(active_sum, top_q_role)
passive_sum_threhsold = np.quantile(passive_sum, top_q_role)
# active_sum, passive_sum = standardize_array(active_sum), standardize_array(passive_sum)
roles = [2*as_flag+ps_flag for as_flag, ps_flag in zip(active_sum>active_sum_threhsold, passive_sum>passive_sum_threhsold)]
roles = pd.DataFrame(data={'gene_name': gene_names, 'active_sum': active_sum, 'passive_sum': passive_sum, 'role':roles })
roles.set_index('gene_name',inplace=True)
return roles
@staticmethod
def diff_roles(control: pd.DataFrame, sample: pd.DataFrame, critical_change_q_t: float=.75) -> pd.DataFrame:
'''
Find the distance in the role from control to sample, and flags the top changes.
'''
# - match the index
common_genes = pd.Index(np.union1d(sample.index, control.index))
control = control.reindex(common_genes).fillna(0)
sample = sample.reindex(common_genes).fillna(0)
# - calculate distance
as_distance = (sample['active_sum'] - control['active_sum'])
ps_distance = (sample['passive_sum'] - control['passive_sum'])
overall_distance = as_distance**2 + ps_distance**2
df_distance = pd.DataFrame({'as_distance':as_distance, 'ps_distance':ps_distance, 'overall_distance':overall_distance}, index=common_genes)

# df_distance = df_distance.assign(passive_sum_c=control['passive_sum'], active_sum_c=control['active_sum'])
# df_distance = df_distance.assign(passive_sum_s=sample['passive_sum'], active_sum_s=sample['active_sum'])

# - define x top percentile as the cut-off value to determine critical role change
# df_distance['critical_change_as'] = df_distance['as_distance'] > df_distance['as_distance'].quantile(critical_change_q_t)
# df_distance['critical_change_ps'] = df_distance['ps_distance'] > df_distance['ps_distance'].quantile(critical_change_q_t)
# df_distance['critical_change_overall'] = df_distance['overall_distance'] > df_distance['overall_distance'].quantile(critical_change_q_t)
return df_distance
class Exp_analysis:
'''
This class provides functions for explanatory analysis of GRNs
Expand Down Expand Up @@ -101,8 +158,6 @@ def plot_centrality(self, df, title='',ax=None, xlabel='Degree', ylabel='Gene',
if colors is not None:
for label, color in zip(ax.get_yticklabels(), colors):
label.set_color(color)


def top_edges(self, quantile=0.95):
grn = self.net
grn = grn[grn.weight>grn.weight.quantile(quantile)]
Expand All @@ -118,7 +173,6 @@ def determine_source_properties(self):
c_weight = determine_centrality(self.net, mode='weight')

self.sources = c_weight.merge(c_degree, left_index=True, right_index=True)

# def calculate_centrality_stats(self) -> None:
# '''Calculate network centrality metrics such as in and out degree
# '''
Expand Down Expand Up @@ -164,6 +218,9 @@ def annotate_genes(self, gene_annotation_df) -> dict[str, float]:
# gene_annot.values
self.gene_annot = {key:round((value*100/len(self.targets)), 1) for key, value in gene_annot.items()}
return self.gene_annot
def analyse_roles(self, ref_net, **kwargs):
self.vsa_obj = VSA_analysis(self.net, ref_net,**kwargs)
self.vsa_results = self.vsa_obj.oo
def plot_interactions(interaction_df: pd.DataFrame, min_subset_size=None, min_degree=None, color_map=None) -> plt.Figure:
"""Upset plot of interactions
Expand Down
1 change: 1 addition & 0 deletions src/methods/single_omics/genie3/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ functionality:
path: script.py
- path: /src/utils/util.py
dest: util.py


platforms:
- type: docker
Expand Down
3 changes: 3 additions & 0 deletions src/methods/single_omics/scenic/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ functionality:
resources:
- type: python_script
path: script.py
- path: /src/utils/util.py
dest: util.py
# - path: main.py

platforms:
- type: docker
Expand Down
Empty file.
100 changes: 50 additions & 50 deletions src/methods/single_omics/scenic/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
import ast
import requests
import scipy.sparse as sp
import sys



# wget https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg38/refseq_r80/mc_v10_clust/gene_based/hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather

Expand All @@ -27,13 +30,6 @@
'normalize': False
}
## VIASH END

import sys
meta= {
"resources_dir": 'src/utils/'
}


import argparse
parser = argparse.ArgumentParser(description="Process multiomics RNA data.")
parser.add_argument('--multiomics_rna', type=str, help='Path to the multiomics RNA file')
Expand All @@ -53,58 +49,45 @@
par['num_workers'] = args.num_workers

if args.resources_dir:
meta['resources_dir'] = args.resources_dir
meta = {'resources_dir': args.resources_dir}

sys.path.append(meta["resources_dir"])
from util import process_links

os.makedirs(par['temp_dir'], exist_ok=True)

databases = f"{par['temp_dir']}/databases/"
os.makedirs(databases, exist_ok=True)

par['motif_annotation'] = f'{databases}/motifs-v10nr_clust-nr.hgnc-m0.001-o0.0.tbl'
par['genes_vs_motifs_10k'] = f'{databases}/hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather'
par['genes_vs_motifs_500'] = f'{databases}/hg38_500bp_up_100bp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather'

if not (os.path.exists(par['motif_annotation'])):
print('downloading motif_annotation')
response = requests.get("https://resources.aertslab.org/cistarget/motif2tf/motifs-v10nr_clust-nr.hgnc-m0.001-o0.0.tbl")
with open(par['motif_annotation'], "wb") as file:
file.write(response.content)
if not (os.path.exists(par['genes_vs_motifs_10k'])):
print('downloading genes_vs_motifs_10k')
response = requests.get("https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg38/refseq_r80/mc_v10_clust/gene_based/hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather")
with open(par['genes_vs_motifs_10k'], "wb") as file:
file.write(response.content)
if not (os.path.exists(par['genes_vs_motifs_500'])):
print('downloading genes_vs_motifs_500')
response = requests.get("https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg38/refseq_r80/mc_v10_clust/gene_based/hg38_500bp_up_100bp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather")
with open(par['genes_vs_motifs_500'], "wb") as file:
file.write(response.content)

expr_mat_adjacencies = os.path.join(par['temp_dir'], "expr_mat_adjacencies.tsv")
expression_data = os.path.join(par['temp_dir'], "expression_data.tsv")
regulons = f"{par['temp_dir']}/regulons.csv"
def download_prior(par):
if not (os.path.exists(par['motif_annotation'])):
print('downloading motif_annotation')
response = requests.get("https://resources.aertslab.org/cistarget/motif2tf/motifs-v10nr_clust-nr.hgnc-m0.001-o0.0.tbl")
with open(par['motif_annotation'], "wb") as file:
file.write(response.content)
if not (os.path.exists(par['genes_vs_motifs_10k'])):
print('downloading genes_vs_motifs_10k')
response = requests.get("https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg38/refseq_r80/mc_v10_clust/gene_based/hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather")
with open(par['genes_vs_motifs_10k'], "wb") as file:
file.write(response.content)
if not (os.path.exists(par['genes_vs_motifs_500'])):
print('downloading genes_vs_motifs_500')
response = requests.get("https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg38/refseq_r80/mc_v10_clust/gene_based/hg38_500bp_up_100bp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather")
with open(par['genes_vs_motifs_500'], "wb") as file:
file.write(response.content)

def format_data(par):
print('Read data')
adata_rna = anndata.read_h5ad(par['multiomics_rna'])
gene_names = adata_rna.var_names
if sp.issparse(adata_rna.X):
adata_rna.X = adata_rna.X.toarray()
pd.DataFrame(adata_rna.X, columns=gene_names).to_csv(expression_data, sep='\t', index=False)
pd.DataFrame(adata_rna.X, columns=gene_names).to_csv(par['expression_data'], sep='\t', index=False)


def run_grn(par):
print('Run grn')
command = [
"pyscenic", "grn",
"--num_workers", str(par['num_workers']),
"--seed", str(par['seed']),
"-o", expr_mat_adjacencies,
"-o", str(par['expr_mat_adjacencies']),
"--method", "grnboost2",
expression_data,
str(par['expression_data']),
par['tf_all']
]
subprocess.run(command, check=True)
Expand All @@ -114,11 +97,11 @@ def prune_grn(par):

command = [
"pyscenic", "ctx",
expr_mat_adjacencies, par['genes_vs_motifs_500'], par['genes_vs_motifs_10k'],
par['expr_mat_adjacencies'], par['genes_vs_motifs_500'], par['genes_vs_motifs_10k'],
"--annotations_fname", par['motif_annotation'],
"--expression_mtx_fname", expression_data,
"--expression_mtx_fname", par['expression_data'],
"--mode", "custom_multiprocessing",
"--output", regulons,
"--output", str(par['regulons']),
"--num_workers", str(par['num_workers']),
"--rank_threshold", str(par['rank_threshold']),
"--auc_threshold", str(par['auc_threshold']),
Expand All @@ -129,7 +112,7 @@ def prune_grn(par):
def format_grn(par):

print('Format regulons')
regulons_df = pd.read_csv(regulons, index_col=0, skiprows=1)['TargetGenes'].reset_index().iloc[1:,:]
regulons_df = pd.read_csv(par['regulons'], index_col=0, skiprows=1)['TargetGenes'].reset_index().iloc[1:,:]

def format_it(df):
values = df['TargetGenes'].values[0]
Expand All @@ -142,13 +125,30 @@ def format_it(df):
grn = regulons_df.groupby('index').apply(lambda df: format_it(df)).reset_index().rename(columns={'index':'source'})
network = grn[['source','target','weight']]
return network
def main(par):
databases = f"{par['temp_dir']}/databases/"
os.makedirs(databases, exist_ok=True)

par['motif_annotation'] = f'{databases}/motifs-v10nr_clust-nr.hgnc-m0.001-o0.0.tbl'
par['genes_vs_motifs_10k'] = f'{databases}/hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather'
par['genes_vs_motifs_500'] = f'{databases}/hg38_500bp_up_100bp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather'

par['expr_mat_adjacencies'] = os.path.join(par['temp_dir'], "expr_mat_adjacencies.tsv")
par['expression_data'] = os.path.join(par['temp_dir'], "expression_data.tsv")
par['regulons'] = f"{par['temp_dir']}/regulons.csv"

# format_data(par)
# run_grn(par)
prune_grn(par)
network = format_grn(par)
return network

if __name__=='__main__':
network = main(par)
os.makedirs(par['temp_dir'], exist_ok=True)

format_data(par)
run_grn(par)
prune_grn(par)
network = format_grn(par)
network.to_csv(par['prediction'], sep=',')
network.to_csv(par['prediction'], sep=',')
print('Finished.')

print('Finished.')


20 changes: 15 additions & 5 deletions src/metrics/regression_2/consensus/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,21 @@
## VIASH START
par = {
'perturbation_data': 'resources/grn-benchmark/perturbation_data.h5ad',
'grn_folder': 'resources/grn-benchmark/grn_models/d0_hvg',
'grns': 'pearson_corr, pearson_causal, portia, ppcor, genie3, grnboost2, scenic, scglue, celloracle',
# 'models_dir': 'resources/grn-benchmark/grn_models/d0_hvg',
# 'models': [pearson_corr, pearson_causal, portia, ppcor, genie3, grnboost2, scenic, scglue, celloracle],
'output': 'resources/grn-benchmark/consensus-num-regulators.json'
}
## VIASH END
import argparse
parser = argparse.ArgumentParser(description="Process multiomics RNA data.")
parser.add_argument('--models_dir', type=str, help='where to read the models')
parser.add_argument('--models', nargs='+', help="List of models to process", required=True)

args = parser.parse_args()
if args.models_dir:
par['models_dir'] = args.models_dir
if args.models:
par['models'] = args.models


# Load perturbation data
Expand All @@ -26,12 +36,12 @@
gene_names = adata_rna.var.index.to_numpy()

print(par['perturbation_data'])
print(par['grns'])
print(par['models'])

# Load inferred GRNs
grns = []
for filename in par['grns'].split(','):
filepath = os.path.join(par['grn_folder'], f'{filename}.csv')
for filename in par['models']:
filepath = os.path.join(par['models_dir'], f'{filename}.csv')
gene_dict = {gene_name: i for i, gene_name in enumerate(gene_names)}
A = np.zeros((len(gene_names), len(gene_names)), dtype=float)
df = pd.read_csv(filepath, sep=',', header='infer', index_col=0)
Expand Down
Loading

0 comments on commit 8258d0a

Please sign in to comment.