Skip to content

Commit

Permalink
local runs updated
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Sep 23, 2024
1 parent 3ee6fba commit 347b160
Show file tree
Hide file tree
Showing 13 changed files with 2,688 additions and 3,178 deletions.
1,106 changes: 0 additions & 1,106 deletions NN-grn-inference.ipynb

This file was deleted.

4,349 changes: 2,416 additions & 1,933 deletions runs.ipynb

Large diffs are not rendered by default.

21 changes: 15 additions & 6 deletions scripts/run_robust_analys.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,19 @@ output_state: "state.yaml"
publish_dir: "$publish_dir"
HERE

nextflow run . \
-main-script target/nextflow/workflows/run_robustness_analysis/main.nf \
-profile docker \
-with-trace \
-c src/common/nextflow_helpers/labels_ci.config \
-params-file ${param_file}
# nextflow run . \
# -main-script target/nextflow/workflows/run_robustness_analysis/main.nf \
# -profile docker \
# -with-trace \
# -c src/common/nextflow_helpers/labels_ci.config \
# -params-file ${param_file}

./tw launch https://github.com/openproblems-bio/run_robustness_analysis \
--revision build/main \
--pull-latest \
--main-script target/nextflow/workflows/run_grn_evaluation/main.nf \
--workspace 53907369739130 \
--compute-env 6TeIFgV5OY4pJCk8I0bfOh \
--params-file ${param_file} \
--config src/common/nextflow_helpers/labels_tw.config

5 changes: 3 additions & 2 deletions scripts/sbatch/calculate_scores.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
#SBATCH --job-name=calculate-scores
#SBATCH --job-name=robustness
#SBATCH --time=48:00:00
#SBATCH --output=logs/%j.out
#SBATCH --error=logs/%j.err
Expand All @@ -8,4 +8,5 @@
#SBATCH --mem=64G
#SBATCH --cpus-per-task=20

python src/metrics/regression_1/script_all.py
# python src/metrics/script_all.py
python src/robustness_analysis/script_all.py
2 changes: 1 addition & 1 deletion src/api/comp_method.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ functionality:
- name: --cell_type_specific
type: boolean
direction: input
default: true
default: false
- name: --normalize
type: boolean
direction: input
Expand Down
33 changes: 29 additions & 4 deletions src/methods/single_omics/grnboost2/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,16 @@
from distributed import Client, LocalCluster
from tqdm import tqdm
import subprocess
import argparse
import sys

# Handle command-line arguments
parser = argparse.ArgumentParser(description="Process multiomics RNA data.")
parser.add_argument('--multiomics_rna', type=str, help='Path to the multiomics RNA file')
parser.add_argument('--prediction', type=str, help='Path to the prediction file')
parser.add_argument('--resources_dir', type=str, help='Path to the prediction file')
parser.add_argument('--tf_all', type=str, help='Path to the tf_all')
args = parser.parse_args()


## VIASH START
Expand All @@ -21,18 +30,33 @@
}
## VIASH END

import sys
meta= {
"resources_dir": 'src/utils/'
}

# Update par passed from the command line
if args.multiomics_rna:
par['multiomics_rna'] = args.multiomics_rna
if args.prediction:
par['prediction'] = args.prediction
if args.tf_all:
par['tf_all'] = args.tf_all

if args.resources_dir:
meta['resources_dir'] = args.resources_dir

print(par)

sys.path.append(meta["resources_dir"])
from util import process_links
from util import process_links, basic_qc
# Load scRNA-seq data
print('Reading data')
adata_rna = anndata.read_h5ad(par['multiomics_rna'])
print('Shape before QC: ', adata_rna.shape)
adata_rna = basic_qc(adata_rna)
print('Shape after QC: ', adata_rna.shape)

groups = adata_rna.obs.cell_type
gene_names = adata_rna.var.gene_ids.index.to_numpy()
gene_names = adata_rna.var_names
X = adata_rna.X

# Load list of putative TFs
Expand All @@ -54,6 +78,7 @@ def infer_grn(X, par):

# par['cell_type_specific'] = False
if par['cell_type_specific']:
groups = adata_rna.obs.cell_type
i = 0
for group in tqdm(np.unique(groups), desc="Processing groups"):
X_sub = X[groups == group, :]
Expand Down
3 changes: 0 additions & 3 deletions src/metrics/regression_2/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,6 @@ def static_approach(


def main(par: Dict[str, Any]) -> pd.DataFrame:

# Set global seed for reproducibility purposes
random_state = SEED
np.random.seed(random_state)
Expand Down Expand Up @@ -282,8 +281,6 @@ def main(par: Dict[str, Any]) -> pd.DataFrame:
n_genes = len(gene_names)
groups = LabelEncoder().fit_transform(perturbation_data.obs.plate_name)



grn = load_grn(par['prediction'], gene_names, par)

# Load and standardize perturbation data
Expand Down
53 changes: 0 additions & 53 deletions src/metrics/regression_2/script_all.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
'read_dir': "resources/grn_models/d0_hvgs",
'write_dir': "resources/results/scores",
'methods': [ 'collectri', 'negative_control', 'positive_control', 'pearson_corr', 'pearson_causal', 'portia', 'ppcor', 'genie3', 'grnboost2', 'scenic', 'scglue', 'celloracle'],
'layers': ['lognorm', 'pearson', 'scgen_lognorm', 'scgen_pearson'],
# 'layers': ['lognorm', 'pearson', 'seurat_lognorm', 'seurat_pearson', 'scgen_lognorm', 'scgen_pearson'],
'layers': ['seurat_lognorm', 'seurat_pearson'],

# 'layers': ['scgen_pearson'],

"perturbation_data": "resources/grn-benchmark/perturbation_data.h5ad",
"tf_all": "resources/prior/tf_all.csv",
"min_tf": False,
"max_n_links": 50000,
"apply_tf": "true",
'subsample': -2,
Expand Down Expand Up @@ -44,12 +46,12 @@
reg1 = main(par)
from regression_2.main import main
reg2 = main(par)
prediction = pd.concat([reg1, reg2], axis=1)
prediction.index = [method]
score = pd.concat([reg1, reg2], axis=1)
score.index = [method]
if i==0:
df_all = prediction
df_all = score
else:
df_all = pd.concat([df_all, prediction])
df_all = pd.concat([df_all, score])
df_all.to_csv(f"{par['write_dir']}/{layer}-{par['reg_type']}.csv")
print(df_all)

67 changes: 67 additions & 0 deletions src/robustness_analysis/permute_grn/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@

import os
import pandas as pd
import numpy as np
def main(par):
degree = par['degree']/100
type = par['noise_type']


prediction = pd.read_csv(par['prediction'])


if type == 'weight': # add noise to weight
assert 'weight' in prediction.columns
print('Add noise to weight')
std_dev = prediction['weight'].std()
noise = np.random.normal(loc=0, scale=degree * std_dev, size=prediction['weight'].shape)
prediction['weight'] += noise

elif type == 'net': # shuffle source-target matrix
print('Permute links')

# 1. Pivot the GRN with target as index and source as columns
pivot_df = prediction.pivot(index='target', columns='source', values='weight')

# Fill NaNs with 0 or a value of your choice
pivot_df.fillna(0, inplace=True)

# 2. Randomly choose degree% of the matrix to shuffle
matrix_flattened = pivot_df.values.flatten()
n_elements = len(matrix_flattened)
n_shuffle = int(n_elements * degree)

# Randomly select 20% of the matrix elements' indices
shuffle_indices = np.random.choice(n_elements, n_shuffle, replace=False)

# Get the values that will be shuffled
shuffle_values = matrix_flattened[shuffle_indices]

# 3. Shuffle the selected values
np.random.shuffle(shuffle_values)

# Assign the shuffled values back to the selected positions
matrix_flattened[shuffle_indices] = shuffle_values

# Reshape the flattened array back into the matrix
pivot_df_shuffled = pd.DataFrame(matrix_flattened.reshape(pivot_df.shape),
index=pivot_df.index,
columns=pivot_df.columns)

flat_df = pivot_df_shuffled.reset_index()

# Melt the DataFrame to turn it back into long-form (source-target-weight)
prediction = flat_df.melt(id_vars='target', var_name='source', value_name='weight')
prediction = prediction[prediction['weight'] !=0 ].reset_index(drop=True)
elif type == 'sign': # change the regulatory sign
num_rows = len(prediction)
num_to_modify = int(num_rows * degree)
# 2. Randomly select indices to modify
random_indices = np.random.choice(prediction.index, size=num_to_modify, replace=False)
# 3. Change the sign of the selected rows
prediction.loc[random_indices, 'weight'] *= -1
elif type == 'binary': # change the regulatory sign
prediction['weight'] = np.where(prediction['weight'] > 0, 1, -1)
else:
raise ValueError(f'Wrong type ({type}) for adding noise')
return prediction
65 changes: 1 addition & 64 deletions src/robustness_analysis/permute_grn/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,71 +9,8 @@
'degree': 20,
'noise_type': 'links'
}

## VIASH END

degree = par['degree']/100
type = par['noise_type']


prediction = pd.read_csv(par['prediction'])


if type == 'weight': # add noise to weight
assert 'weight' in prediction.columns
print('Add noise to weight')
std_dev = prediction['weight'].std()
noise = np.random.normal(loc=0, scale=degree * std_dev, size=prediction['weight'].shape)
prediction['weight'] += noise

elif type == 'net': # shuffle source-target matrix
print('Permute links')

# 1. Pivot the GRN with target as index and source as columns
pivot_df = prediction.pivot(index='target', columns='source', values='weight')

# Fill NaNs with 0 or a value of your choice
pivot_df.fillna(0, inplace=True)

# 2. Randomly choose degree% of the matrix to shuffle
matrix_flattened = pivot_df.values.flatten()
n_elements = len(matrix_flattened)
n_shuffle = int(n_elements * degree)

# Randomly select 20% of the matrix elements' indices
shuffle_indices = np.random.choice(n_elements, n_shuffle, replace=False)

# Get the values that will be shuffled
shuffle_values = matrix_flattened[shuffle_indices]

# 3. Shuffle the selected values
np.random.shuffle(shuffle_values)

# Assign the shuffled values back to the selected positions
matrix_flattened[shuffle_indices] = shuffle_values

# Reshape the flattened array back into the matrix
pivot_df_shuffled = pd.DataFrame(matrix_flattened.reshape(pivot_df.shape),
index=pivot_df.index,
columns=pivot_df.columns)

flat_df = pivot_df_shuffled.reset_index()

# Melt the DataFrame to turn it back into long-form (source-target-weight)
prediction = flat_df.melt(id_vars='target', var_name='source', value_name='weight')
prediction = prediction[prediction['weight'] !=0 ].reset_index(drop=True)
elif type == 'sign': # change the regulatory sign
num_rows = len(prediction)
num_to_modify = int(num_rows * degree)
# 2. Randomly select indices to modify
random_indices = np.random.choice(prediction.index, size=num_to_modify, replace=False)
# 3. Change the sign of the selected rows
prediction.loc[random_indices, 'weight'] *= -1
elif type == 'binary': # change the regulatory sign
prediction['weight'] = np.where(prediction['weight'] > 0, 1, -1)
else:
raise ValueError(f'Wrong type ({type}) for adding noise')

print('Output noised GRN')
prediction = main(par)
prediction.to_csv(par['prediction_n'])

Loading

0 comments on commit 347b160

Please sign in to comment.