Skip to content

Commit

Permalink
single omics workflow updated
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Aug 30, 2024
1 parent 144813f commit dd969e0
Show file tree
Hide file tree
Showing 9 changed files with 43 additions and 26 deletions.
10 changes: 5 additions & 5 deletions scripts/run_benchmark_single_omics.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#!/bin/bash

# RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
RUN_ID="single_omics_try1"
resources_dir="s3://openproblems-data/resources/grn"
publish_dir="s3://openproblems-data/resources/grn/results/${RUN_ID}"
RUN_ID="single_omics"
# resources_dir="s3://openproblems-data/resources_test/grn"
# publish_dir="s3://openproblems-data/resources_test/grn/results/${RUN_ID}"

# resources_dir="./resources_test/"
# publish_dir="output/${RUN_ID}"
resources_dir="./resources_test/"
publish_dir="output/${RUN_ID}"

reg_type=ridge
subsample=-2
Expand Down
4 changes: 4 additions & 0 deletions src/api/comp_metric.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ functionality:
type: file
direction: input
default: 'resources/prior/tf_all.csv'
- name: --apply_tf
type: boolean
required: false
default: true



Expand Down
2 changes: 1 addition & 1 deletion src/methods/single_omics/tigress/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@ platforms:
- type: native
- type: nextflow
directives:
label: [midtime,midmem,midcpu]
label: [midtime, midmem, highcpu]
5 changes: 5 additions & 0 deletions src/metrics/regression_1/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ functionality:
direction: input
required: false
default: pearson
- name: --min_tf
type: integer
direction: input
description: calculate the scores for the given min tfs in addition to the default
required: false
resources:
- type: python_script
path: script.py
Expand Down
22 changes: 13 additions & 9 deletions src/metrics/regression_1/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,8 @@ def main(par):
gene_names = perturbation_data.var.index.to_numpy()
net = pd.read_csv(par['prediction'])
# subset to keep only those links with source as tf
net = net[net.source.isin(tf_all)]
if par['apply_tf']:
net = net[net.source.isin(tf_all)]

subsample = par['subsample']
reg_type = par['reg_type']
Expand Down Expand Up @@ -210,13 +211,15 @@ def main(par):
net_processed = process_net(net.copy(), gene_names, manipulate)

print(f'Compute metrics for layer: {layer}', flush=True)
tfs_cases = [-1]
if par['min_tf']:
tfs_cases += par['min_tf']
layer_results = {} # Store results for this layer
for exclude_missing_genes in [False]: # two settings on target gene
for tf_n in [-1]: # two settings on tfs
for exclude_missing_genes in [False, True]: # two settings on target gene
for tf_n in tfs_cases: # two settings on tfs
run_key = f'ex({exclude_missing_genes})_tf({tf_n})'
print(run_key)
net_subset = net_processed.copy()

# Subset TFs
if tf_n == -1:
degrees = net_subset.abs().sum(axis=0)
Expand All @@ -234,11 +237,12 @@ def main(par):

# Convert results to DataFrame
df_results = pd.DataFrame(layer_results)
# if 'ex(True)_tf(140)' not in df_results.columns:
# df_results['ex(True)_tf(140)'] = df_results['ex(True)_tf(-1)']
# if 'ex(False)_tf(140)' not in df_results.columns:
# df_results['ex(False)_tf(140)'] = df_results['ex(False)_tf(-1)']

if par['min_tf']:
if 'ex(True)_tf(140)' not in df_results.columns:
df_results['ex(True)_tf(140)'] = df_results['ex(True)_tf(-1)']
if 'ex(False)_tf(140)' not in df_results.columns:
df_results['ex(False)_tf(140)'] = df_results['ex(False)_tf(-1)']

df_results['Mean'] = df_results.mean(axis=1)

return df_results
5 changes: 3 additions & 2 deletions src/metrics/regression_2/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,8 +277,9 @@ def main(par: Dict[str, Any]) -> pd.DataFrame:
n_features_theta_max = np.asarray([data[gene_name]['1'] for gene_name in gene_names], dtype=int)

# Load list of putative TFs
df = pd.read_csv(par['tf_all'], header=None, names=['gene_name'])
tf_names = set(list(df['gene_name'].to_numpy()))
tf_names = np.loadtxt(par['tf_all'], dtype=str)
if par['apply_tf']==False:
tf_names = gene_names

# Evaluate GRN
print(f'Compute metrics for layer: {layer}', flush=True)
Expand Down
12 changes: 6 additions & 6 deletions src/process_data/test_data/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,33 @@ functionality:
arguments:
- name: --multiomics_rna
type: file
required: true
required: false
direction: input
default: resources/grn-benchmark/multiomics_rna.h5ad
- name: --multiomics_rna_test
type: file
required: true
required: false
direction: output
default: resources_test/grn-benchmark/multiomics_rna.h5ad

- name: --multiomics_atac
type: file
required: true
required: false
direction: input
default: resources/grn-benchmark/multiomics_atac.h5ad
- name: --multiomics_atac_test
type: file
required: true
required: false
direction: input
default: resources_test/grn-benchmark/multiomics_atac.h5ad
- name: --perturbation_data
type: file
required: true
required: false
direction: input
default: resources/grn-benchmark/perturbation_data.h5ad
- name: --perturbation_data_test
type: file
required: true
required: false
direction: output
default: resources_test/grn-benchmark/perturbation_data.h5ad
resources:
Expand Down
2 changes: 1 addition & 1 deletion src/process_data/test_data/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,4 @@

# shorten perturbation
adata_bulk = ad.read_h5ad(par['perturbation_data'])
adata_bulk[:200, adata_bulk.var_names.isin(adata_rna_s.var_names)].write(par['perturbation_data_test'])
adata_bulk[:600, adata_bulk.var_names.isin(adata_rna_s.var_names)].write(par['perturbation_data_test'])
7 changes: 5 additions & 2 deletions src/workflows/run_benchmark_single_omics/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,17 @@ workflow run_wf {

methods = [
portia,
pidc,
ennet,
grnboost2,
scsgl,
ppcor,
tigress
]

// construct list of metrics
metrics = [
regression_1
regression_1,
regression_2
]

/****************************
Expand Down

0 comments on commit dd969e0

Please sign in to comment.