Skip to content

Commit

Permalink
workflows updated
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Sep 10, 2024
1 parent a63a2eb commit a8da63f
Show file tree
Hide file tree
Showing 15 changed files with 251 additions and 127 deletions.
19 changes: 19 additions & 0 deletions runs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2184,6 +2184,25 @@
"!aws s3 sync s3://openproblems-data/resources/grn/results/single_omics_all resources/results/single_omics_all"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"download: s3://openproblems-data/resources/grn/results/grn_evaluation_so_ridge/scores.yaml to resources/results/grn_evaluation_so_ridge/scores.yaml\n",
"download: s3://openproblems-data/resources/grn/results/grn_evaluation_so_ridge/trace.txt to resources/results/grn_evaluation_so_ridge/trace.txt\n",
"download: s3://openproblems-data/resources/grn/results/grn_evaluation_so_ridge/metric_configs.yaml to resources/results/grn_evaluation_so_ridge/metric_configs.yaml\n"
]
}
],
"source": [
"!aws s3 sync s3://openproblems-data/resources/grn/results/grn_evaluation_so_ridge resources/results/grn_evaluation_so_ridge"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
72 changes: 40 additions & 32 deletions scripts/run_grn_evaluation.sh
Original file line number Diff line number Diff line change
@@ -1,23 +1,33 @@
#!/bin/bash

# RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
reg_type=${1} #GB, ridge
# reg_type=${1} #GB, ridge
reg_type=ridge

RUN_ID="grn_evaluation_so_${reg_type}"
RUN_ID="grn_evaluation_so_all_${reg_type}"
# resources_dir="s3://openproblems-data/resources/grn"
resources_dir="./resources"
publish_dir="${resources_dir}/results/${RUN_ID}"
grn_models_folder="${resources_dir}/grn_models"

subsample=-2
max_workers=10
layer=pearson
metric_ids="[regression_1]"
layer=scgen_pearson
metric_ids="[regression_1, regression_2]"

param_file="./params/${RUN_ID}.yaml"

grn_names=(
"scglue"
"scenicplus"
"celloracle"
"granie"
"figr"
"collectri"
"genie3"
"grnboost2"
"ppcor"
"portia"
)
# Start writing to the YAML file
cat > $param_file << HERE
Expand All @@ -26,47 +36,45 @@ HERE

append_entry() {
cat >> $param_file << HERE
- id: ${reg_type}_${1}_${3}
- id: ${reg_type}_${1}
metric_ids: ${metric_ids}
perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
reg_type: $reg_type
method_id: $1
subsample: $subsample
max_workers: $max_workers
tf_all: ${resources_dir}/prior/tf_all.csv
layer: ${3}
layer: ${layer}
consensus: ${resources_dir}/prior/consensus-num-regulators.json
HERE

# Conditionally append the prediction line if the second argument is "true"
if [[ $2 == "true" ]]; then
cat >> $param_file << HERE
prediction: ${grn_models_folder}/$1.csv
HERE
fi
}

# Loop through grn_names and layers

for grn_name in "${grn_names[@]}"; do
append_entry "$grn_name" "true" "$layer"
done


# # Append negative control
# grn_name="negative_control"
# for layer in "${layers[@]}"; do
# append_entry "$grn_name" "false" "$layer"
# done


# # Append positive controls
# grn_name="positive_control"
# for layer in "${layers[@]}"; do
# append_entry "$grn_name" "false" "$layer"
# #Loop through grn_names and layers
# for grn_name in "${grn_names[@]}"; do
# append_entry "$grn_name"
# done

append_entry_control() {
cat >> $param_file << HERE
- id: ${reg_type}_${1}
metric_ids: ${metric_ids}
perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
reg_type: $reg_type
method_id: $1
subsample: $subsample
max_workers: $max_workers
tf_all: ${resources_dir}/prior/tf_all.csv
layer: ${layer}
consensus: ${resources_dir}/prior/consensus-num-regulators.json
causal: ${2}
HERE
}
# controls
# append_entry_control "negative_control" ""
# append_entry_control "positive_control" ""
append_entry_control "baseline_corr_causal" "True"
append_entry_control "baseline_corr" "False"

# Append the remaining output_state and publish_dir to the YAML file
cat >> $param_file << HERE
Expand All @@ -88,7 +96,7 @@ nextflow run . \
# --main-script target/nextflow/workflows/run_grn_evaluation/main.nf `
# --workspace 53907369739130 `
# --compute-env 6TeIFgV5OY4pJCk8I0bfOh `
# --params-file ./params/scgen_pearson_gb_pcs.yaml `
# --params-file ./params/grn_evaluation_so_ridge.yaml `
# --config src/common/nextflow_helpers/labels_tw.config


82 changes: 56 additions & 26 deletions scripts/run_robust_analys_causal.sh
Original file line number Diff line number Diff line change
@@ -1,55 +1,85 @@
#!/bin/bash
# viash ns build --parallel
RUN_ID="robust_analy_causal"
# resources_dir="resources"
resources_dir="s3://openproblems-data/resources/grn"

RUN_ID="robust_analy_causal_1"
resources_dir="resources"
# resources_dir="s3://openproblems-data/resources/grn"
publish_dir="${resources_dir}/results/${RUN_ID}"



reg_type=ridge
subsample=-2
max_workers=10

params_list_file="params/list_${RUN_ID}.yaml"
layer=(scgen_pearson)
metric_ids="[regression_1]"

param_file="./params/${RUN_ID}.yaml"
cat >> $param_file << HERE
param_list:
HERE

# add causal corr
cat >> $param_file << HERE
- id: corr-causal
metric_ids: ${metric_ids}
multiomics_rna: ${resources_dir}/grn-benchmark/multiomics_rna.h5ad
perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
reg_type: $reg_type
method_id: baseline_corr_causal
layer: ${layer}
subsample: $subsample
max_workers: $max_workers
consensus: ${resources_dir}/prior/consensus-num-regulators.json
tf_all: ${resources_dir}/prior/tf_all.csv
causal: True
HERE

append_entry() {
cat >> $params_list_file << HERE
cat >> $param_file << HERE
- id: corr-${1}
metric_ids: ${metric_ids}
multiomics_rna: ${resources_dir}/grn-benchmark/multiomics_rna.h5ad
perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
reg_type: $reg_type
method_id: corr-${1}
layer: ${2}
method_id: baseline_corr-${1}
layer: ${layer}
subsample: $subsample
max_workers: $max_workers
consensus: ${resources_dir}/prior/consensus-num-regulators.json
tf_all: ${resources_dir}/prior/tf_all.csv
causal: False
HERE
}
# Loop through grn_names and layers
layers=("pearson") # Array containing the layer(s)

for layer in "${layers[@]}"; do # Iterate over each layer in the array
for iter in {1..10}; do # Loop from 1 to 100 iterations
append_entry "$iter" "$layer" # Execute the append_entry command
done

for iter in {1..2}; do # Loop from 1 to 100 iterations
append_entry "$iter" # Execute the append_entry command
done

aws s3 sync params/ s3://openproblems-data/resources/grn/params
# Append the remaining output_state and publish_dir to the YAML file
cat >> $param_file << HERE
param_list: "${resources_dir}/${params_list_file}"
output_state: "state.yaml"
publish_dir: "$publish_dir"
HERE

# nextflow run . \
# -main-script target/nextflow/workflows/run_robustness_analysis_causal/main.nf \
# -profile docker \
# -with-trace \
# -c src/common/nextflow_helpers/labels_ci.config \
# -params-file ${param_file}
# params_list_file="params/list_${RUN_ID}.yaml"

# param_file="./params/${RUN_ID}.yaml"


# # Loop through grn_names and layers
# layers=("pearson") # Array containing the layer(s)



# aws s3 sync params/ s3://openproblems-data/resources/grn/params
# # Append the remaining output_state and publish_dir to the YAML file
# cat >> $param_file << HERE
# param_list: "${resources_dir}/${params_list_file}"
# output_state: "state.yaml"
# publish_dir: "$publish_dir"
# HERE

nextflow run . \
-main-script target/nextflow/workflows/run_robustness_analysis_causal/main.nf \
-profile docker \
-with-trace \
-c src/common/nextflow_helpers/labels_ci.config \
-params-file ${param_file}
11 changes: 9 additions & 2 deletions src/api/comp_control_method.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,14 @@ functionality:
arguments:
- name: --perturbation_data
__merge__: file_perturbation_h5ad.yaml
required: true
required: false
direction: input
default: resources/grn-benchmark/perturbation_data.h5ad
- name: --multiomics_rna
__merge__: file_multiomics_rna_h5ad.yaml
required: false
direction: input
default: resources/grn-benchmark/multiomics_rna.h5ad
- name: --layer
type: string
direction: input
Expand All @@ -21,13 +27,14 @@ functionality:
required: false
- name: --prediction
__merge__: file_prediction.yaml
required: true
required: false
direction: output
- name: --tf_all
type: file
required: false
direction: input
example: resources_test/prior/tf_all.csv
default: resources/prior/tf_all.csv


test_resources:
Expand Down
2 changes: 2 additions & 0 deletions src/api/comp_method.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@ functionality:
__merge__: file_multiomics_rna_h5ad.yaml
required: false
direction: input
default: resources/grn-benchmark/multiomics_rna.h5ad
- name: --multiomics_atac
__merge__: file_multiomics_atac_h5ad.yaml
required: false
direction: input
must_exist: false
default: resources/grn-benchmark/multiomics_atac.h5ad
- name: --prediction
__merge__: file_prediction.yaml
required: false
Expand Down
5 changes: 5 additions & 0 deletions src/api/comp_metric.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ functionality:
type: boolean
required: false
default: true
- name: --clip_scores
type: boolean
required: false
default: true
description: clips the r2 scores for each gene to make them within [0, 1]



Expand Down
32 changes: 32 additions & 0 deletions src/control_methods/baseline_corr/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
__merge__: ../../api/comp_control_method.yaml

functionality:
name: baseline_corr
info:
label: baseline_corr
summary: "Baseline based on Pearson corr"

arguments:
- name: --causal
type: boolean
direction: input
default: false
- name: --seed
type: integer
direction: input


resources:
- type: python_script
path: script.py

platforms:
- type: docker
image: ghcr.io/openproblems-bio/base_python:1.0.4
setup:
- type: python
packages: [ ]
- type: native
- type: nextflow
directives:
label: [midtime, midmem, midcpu]
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,10 @@
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler


## VIASH START
par = {

}

## VIASH END

def create_corr_net(X: np.ndarray, groups: np.ndarray):
grns = []
for group in tqdm(np.unique(groups), desc="Processing groups"):
Expand All @@ -22,10 +18,10 @@ def create_corr_net(X: np.ndarray, groups: np.ndarray):
grn = np.dot(X_sub.T, X_sub) / X_sub.shape[0]
grns.append(grn)
return np.mean(grns, axis=0)


print('Read data')
multiomics_rna = ad.read_h5ad(par["multiomics_rna"])
# print('subsetting: remove this')
# multiomics_rna = multiomics_rna[:5000, :5000]
gene_names = multiomics_rna.var_names.to_numpy()
tf_all = np.loadtxt(par['tf_all'], dtype=str)
groups = multiomics_rna.obs.cell_type
Expand All @@ -39,14 +35,15 @@ def create_corr_net(X: np.ndarray, groups: np.ndarray):
print('Create corr net')
net = create_corr_net(multiomics_rna.X, groups)
net = pd.DataFrame(net, index=gene_names, columns=gene_names)

if par['causal']:
net_corr = net[tf_all]
net = net[tf_all]
else:
net_corr = net.sample(len(tf_all), axis=1)
net_corr = net_corr.reset_index().melt(id_vars='index', var_name='source', value_name='weight')
net_corr.rename(columns={'index': 'target'}, inplace=True)
net = net.sample(len(tf_all), axis=1, random_state=par['seed'])

net = net.reset_index().melt(id_vars='index', var_name='source', value_name='weight')
net.rename(columns={'index': 'target'}, inplace=True)


print('Output GRN')
net_corr.to_csv(par['prediction'])

net.to_csv(par['prediction'])
Loading

0 comments on commit a8da63f

Please sign in to comment.