Skip to content

Commit

Permalink
worflow bug fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Nov 28, 2024
1 parent 4cd5eac commit b76d6c0
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 63 deletions.
23 changes: 10 additions & 13 deletions scripts/run_benchmark_all.sh
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
#!/bin/bash

RUN_ID="d0_hvgs_baseline"
dataset="op"
RUN_ID=${dataset}
# resources_dir="./resources/"
resources_dir="s3://openproblems-data/resources/grn"
publish_dir="${resources_dir}/results/${RUN_ID}"

reg_type=ridge
subsample=-2
subsample=-1
num_workers=10
layer='scgen_pearson'
layer='X_norm'
metric_ids="[regression_1, regression_2]"
cell_type_specific=false #for controls
normalize=false
method_ids="[pearson_corr, positive_control]"
method_ids="[negative_control, pearson_corr, positive_control, portia, scgpt]"

param_file="./params/${RUN_ID}.yaml"

Expand All @@ -22,17 +21,15 @@ param_list:
- id: ${reg_type}
metric_ids: $metric_ids
method_ids: $method_ids
perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
multiomics_rna: ${resources_dir}/grn-benchmark/multiomics_rna_d0_hvg.h5ad
multiomics_atac: ${resources_dir}/grn-benchmark/multiomics_atac_d0.h5ad
evaluation_data: ${resources_dir}/evaluation_datasets/${dataset}_perturbation.h5ad
rna: ${resources_dir}/inference_datasets/${dataset}_rna.h5ad
atac: ${resources_dir}/inference_datasets/${dataset}_atac.h5ad
reg_type: $reg_type
subsample: $subsample
num_workers: $num_workers
layer: $layer
consensus: ${resources_dir}/prior/consensus-num-regulators.json
consensus: ${resources_dir}/prior/${dataset}_consensus-num-regulators.json
tf_all: ${resources_dir}/prior/tf_all.csv
cell_type_specific: ${cell_type_specific}
normalize: ${normalize}
output_state: "state.yaml"
publish_dir: "$publish_dir"
Expand Down Expand Up @@ -60,6 +57,6 @@ HERE
--pull-latest \
--main-script target/nextflow/workflows/run_benchmark/main.nf \
--workspace 53907369739130 \
--compute-env 6TeIFgV5OY4pJCk8I0bfOh \
--compute-env 5DwwhQoBi0knMSGcwThnlF \
--params-file ${param_file} \
--config src/common/nextflow_helpers/labels_tw.config
105 changes: 57 additions & 48 deletions src/metrics/script_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,39 +5,36 @@
import os


def define_par(dataset):
def define_par(dataset, global_models=False):

par = {
'reg_type': 'ridge',
'models_dir': f"resources/grn_models/{dataset}",
'scores_dir': f"output/temp/{dataset}",

'models': [ 'collectri', 'negative_control', 'positive_control', 'pearson_corr', 'portia', 'ppcor', 'grnboost2', 'scenic', 'granie', 'scglue', 'celloracle', 'figr', 'scenicplus'],
'models': [ 'negative_control', 'positive_control', 'pearson_corr', 'portia', 'ppcor', 'grnboost2', 'scenic', 'granie', 'scglue', 'celloracle', 'figr', 'scenicplus'],

# 'models': [ 'positive_control', 'pearson_corr'],
'global_models': [
'ANANSE_tissue/networks/lung.parquet',
'ANANSE_tissue/networks/stomach.parquet',
'ANANSE_tissue/networks/heart.parquet',
'ANANSE_tissue/networks/bone_marrow.parquet',

'gtex_rna/networks/Whole_Blood.parquet',
'gtex_rna/networks/Brain_Amygdala.parquet',
'gtex_rna/networks/Breast_Mammary_Tissue.parquet',
'gtex_rna/networks/Lung.parquet',
'gtex_rna/networks/Stomach.parquet',


'cellnet_human_Hg1332/networks/bcell.parquet',
'cellnet_human_Hg1332/networks/tcell.parquet',
'cellnet_human_Hg1332/networks/skin.parquet',
'cellnet_human_Hg1332/networks/neuron.parquet',
'cellnet_human_Hg1332/networks/heart.parquet',
'collectri',
'Ananse:Lung',
'Ananse:Stomach',
'Ananse:Heart',
'Ananse:Bone marrow',
'Gtex:Whole blood',
'Gtex:Brain amygdala',
'Gtex:Breast mammary tissue',
'Gtex:Lung',
'Gtex:Stomach',
'Cellnet:Bcell',
'Cellnet:Tcell',
'Cellnet:Skin',
'Cellnet:Neuron',
'Cellnet:Heart'
],
'global_models_dir': '../eric/network_collection/networks/',
'global_models_dir': 'resources/grn_models/global/',

"evaluation_data": f"resources/evaluation_datasets/{dataset}_perturbation.h5ad",
'consensus': f'resources/prior/{dataset}_consensus-num-regulators.json',
'consensus': f'resources/prior/{dataset}_consensus-num-regulators.json',

'layer': 'X_norm',

Expand All @@ -48,6 +45,32 @@ def define_par(dataset):
'verbose': 4,
'num_workers': 20
}
if global_models:
import shutil

temp_grn_dir = 'output/models/'
os.makedirs(temp_grn_dir, exist_ok=True)

grn_file_list = []
for model in par['global_models']:
grn_file = f"{par['global_models_dir']}/{model}.csv"
grn_file_list.append(grn_file)

for model in par['models']:
grn_file = f"{par['models_dir']}/{model}.csv"
grn_file_list.append(grn_file)

par['models'] = par['models'] + par['global_models']
par['models_dir'] = temp_grn_dir
par['consensus'] = f'{temp_grn_dir}/{dataset}_consensus-num-regulators.json'
for grn_file in grn_file_list:
try:
shutil.copy(grn_file, temp_grn_dir)
print(f"Copied {grn_file} to {temp_grn_dir}")
except FileNotFoundError:
print(f"File not found: {grn_file}")
except Exception as e:
print(f"Error copying {grn_file}: {e}")
return par


Expand All @@ -66,45 +89,31 @@ def define_par(dataset):
from consensus.script import main as main_consensus

# - run general models
global_models = False
global_models = True

# - run metrics
for dataset in ['op', 'replogle2', 'nakatake', 'norman', 'adamson']: #'op', 'replogle2', 'nakatake', 'norman', 'adamson'
for dataset in ['op']: #'op', 'replogle2', 'nakatake', 'norman', 'adamson'
print('------ ', dataset, '------')
par = define_par(dataset)
os.makedirs(par['scores_dir'], exist_ok=True)
par = define_par(dataset, global_models=global_models)
main_consensus(par)
for binarize in [True]:
par['binarize'] = binarize
for max_n_links in [10000]:
for max_n_links in [50000]:
par['max_n_links'] = max_n_links
for apply_skeleton in [False]:
par['apply_skeleton'] = apply_skeleton
# - determines models to run
grn_files_dict = {}
# - add global models
if global_models:
for model in par['global_models']:
temp_dir = f"{par['scores_dir']}/nets/"
os.makedirs(temp_dir, exist_ok=True)
net = pd.read_parquet(f"{par['global_models_dir']}/{model}")
net.columns = ['source','target','weight']
net = process_links(net, par)
if par['binarize']:
net['weight'] = net['weight'].apply(binarize_weight)
model = model.replace('/','_')
grn_file = f'{temp_dir}/{model}.csv'
net.to_csv(grn_file)
grn_files_dict[model] = grn_file
else:
# - add actual models
for model in par['models']:
print(model)
grn_file = f"{par['models_dir']}/{model}.csv"
if not os.path.exists(grn_file):
print(f"{grn_file} doesnt exist. Skipped.")
continue
grn_files_dict[model] = grn_file
# - add models
for model in par['models']:
print(model)
grn_file = f"{par['models_dir']}/{model}.csv"
if not os.path.exists(grn_file):
print(f"{grn_file} doesnt exist. Skipped.")
continue
grn_files_dict[model] = grn_file

# - actual runs
i = 0
Expand Down
3 changes: 3 additions & 0 deletions src/workflows/run_benchmark/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ functionality:
# ---- multiomics
- name: grn_methods/celloracle
- name: grn_methods/scglue
- name: grn_methods/figr
- name: grn_methods/scenicplus
- name: grn_methods/granie
# ---- baselines
- name: control_methods/pearson_corr
- name: control_methods/negative_control
Expand Down
6 changes: 4 additions & 2 deletions src/workflows/run_benchmark/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ workflow run_wf {
// construct list of methods
methods = [
portia,
genie3,
grnboost2,
ppcor,
scenic,
Expand All @@ -26,7 +25,10 @@ workflow run_wf {
positive_control,

celloracle,
scglue
scglue,
granie,
figr,
scenicplus
]


Expand Down

0 comments on commit b76d6c0

Please sign in to comment.