local runs updated

openproblems-bio · Sep 23, 2024 · 347b160 · 347b160
1 parent 3ee6fba
commit 347b160
Show file tree

Hide file tree

Showing 13 changed files with 2,688 additions and 3,178 deletions.
diff --git a/NN-grn-inference.ipynb b/NN-grn-inference.ipynb
diff --git a/runs.ipynb b/runs.ipynb
diff --git a/scripts/run_robust_analys.sh b/scripts/run_robust_analys.sh
@@ -73,10 +73,19 @@ output_state: "state.yaml"
 publish_dir: "$publish_dir"
 HERE
 
-nextflow run . \
-  -main-script  target/nextflow/workflows/run_robustness_analysis/main.nf \
-  -profile docker \
-  -with-trace \
-  -c src/common/nextflow_helpers/labels_ci.config \
-  -params-file ${param_file}
+# nextflow run . \
+#   -main-script  target/nextflow/workflows/run_robustness_analysis/main.nf \
+#   -profile docker \
+#   -with-trace \
+#   -c src/common/nextflow_helpers/labels_ci.config \
+#   -params-file ${param_file}
+
+./tw launch https://github.com/openproblems-bio/run_robustness_analysis \
+  --revision build/main \
+  --pull-latest \
+  --main-script target/nextflow/workflows/run_grn_evaluation/main.nf \
+  --workspace 53907369739130 \
+  --compute-env 6TeIFgV5OY4pJCk8I0bfOh \
+  --params-file ${param_file} \
+  --config src/common/nextflow_helpers/labels_tw.config
 
diff --git a/scripts/sbatch/calculate_scores.sh b/scripts/sbatch/calculate_scores.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-#SBATCH --job-name=calculate-scores
+#SBATCH --job-name=robustness
 #SBATCH --time=48:00:00
 #SBATCH --output=logs/%j.out
 #SBATCH --error=logs/%j.err
@@ -8,4 +8,5 @@
 #SBATCH --mem=64G 
 #SBATCH --cpus-per-task=20  
 
-python src/metrics/regression_1/script_all.py
+# python src/metrics/script_all.py
+python src/robustness_analysis/script_all.py
diff --git a/src/api/comp_method.yaml b/src/api/comp_method.yaml
@@ -46,7 +46,7 @@ functionality:
     - name: --cell_type_specific
       type: boolean
       direction: input
-      default: true
+      default: false
     - name: --normalize
       type: boolean
       direction: input

diff --git a/src/methods/single_omics/grnboost2/script.py b/src/methods/single_omics/grnboost2/script.py
@@ -7,7 +7,16 @@
 from distributed import Client, LocalCluster
 from tqdm import tqdm
 import subprocess 
+import argparse
+import sys
 
+# Handle command-line arguments
+parser = argparse.ArgumentParser(description="Process multiomics RNA data.")
+parser.add_argument('--multiomics_rna', type=str, help='Path to the multiomics RNA file')
+parser.add_argument('--prediction', type=str, help='Path to the prediction file')
+parser.add_argument('--resources_dir', type=str, help='Path to the prediction file')
+parser.add_argument('--tf_all', type=str, help='Path to the tf_all')
+args = parser.parse_args()
 
 
 ## VIASH START
@@ -21,18 +30,33 @@
 }
 ## VIASH END
 
-import sys
 meta= {
   "resources_dir": 'src/utils/'
 }
+
+# Update par passed from the command line
+if args.multiomics_rna:
+    par['multiomics_rna'] = args.multiomics_rna
+if args.prediction:
+    par['prediction'] = args.prediction
+if args.tf_all:
+    par['tf_all'] = args.tf_all
+
+if args.resources_dir:
+    meta['resources_dir'] = args.resources_dir    
+
+print(par)
+
 sys.path.append(meta["resources_dir"])
-from util import process_links
+from util import process_links, basic_qc
 # Load scRNA-seq data
 print('Reading data')
 adata_rna = anndata.read_h5ad(par['multiomics_rna'])
+print('Shape before QC: ', adata_rna.shape)
+adata_rna = basic_qc(adata_rna)
+print('Shape after QC: ', adata_rna.shape)
 
-groups = adata_rna.obs.cell_type
-gene_names = adata_rna.var.gene_ids.index.to_numpy()
+gene_names = adata_rna.var_names
 X = adata_rna.X
 
 # Load list of putative TFs
@@ -54,6 +78,7 @@ def infer_grn(X, par):
 
 # par['cell_type_specific'] = False
 if par['cell_type_specific']:
+    groups = adata_rna.obs.cell_type
     i = 0
     for group in tqdm(np.unique(groups), desc="Processing groups"):
         X_sub = X[groups == group, :]

diff --git a/src/metrics/regression_2/main.py b/src/metrics/regression_2/main.py
@@ -254,7 +254,6 @@ def static_approach(
 
 
 def main(par: Dict[str, Any]) -> pd.DataFrame:
-
     # Set global seed for reproducibility purposes
     random_state = SEED
     np.random.seed(random_state)
@@ -282,8 +281,6 @@ def main(par: Dict[str, Any]) -> pd.DataFrame:
     n_genes = len(gene_names)
     groups = LabelEncoder().fit_transform(perturbation_data.obs.plate_name)
 
-
-
     grn = load_grn(par['prediction'], gene_names, par)
 
     # Load and standardize perturbation data

diff --git a/src/metrics/regression_2/script_all.py b/src/metrics/regression_2/script_all.py
diff --git a/src/metrics/regression_1/script_all.py → src/metrics/script_all.py b/src/metrics/regression_1/script_all.py → src/metrics/script_all.py
@@ -10,11 +10,13 @@
   'read_dir': "resources/grn_models/d0_hvgs",
   'write_dir': "resources/results/scores",
   'methods': [ 'collectri', 'negative_control', 'positive_control', 'pearson_corr', 'pearson_causal',  'portia', 'ppcor', 'genie3', 'grnboost2', 'scenic', 'scglue', 'celloracle'],
-  'layers': ['lognorm', 'pearson', 'scgen_lognorm', 'scgen_pearson'],
+  # 'layers': ['lognorm', 'pearson', 'seurat_lognorm', 'seurat_pearson', 'scgen_lognorm', 'scgen_pearson'],
+  'layers': ['seurat_lognorm', 'seurat_pearson'],
+
+  # 'layers': ['scgen_pearson'],
 
   "perturbation_data": "resources/grn-benchmark/perturbation_data.h5ad",
   "tf_all": "resources/prior/tf_all.csv",
-  "min_tf": False,
   "max_n_links": 50000,
   "apply_tf": "true",
   'subsample': -2,
@@ -44,12 +46,12 @@
     reg1 = main(par)
     from regression_2.main import main 
     reg2 = main(par)
-    prediction = pd.concat([reg1, reg2], axis=1)
-    prediction.index = [method]
+    score = pd.concat([reg1, reg2], axis=1)
+    score.index = [method]
     if i==0:
-      df_all = prediction
+      df_all = score
     else:
-      df_all = pd.concat([df_all, prediction])
+      df_all = pd.concat([df_all, score])
     df_all.to_csv(f"{par['write_dir']}/{layer}-{par['reg_type']}.csv")
     print(df_all)
 
diff --git a/src/robustness_analysis/permute_grn/main.py b/src/robustness_analysis/permute_grn/main.py
@@ -0,0 +1,67 @@
+
+import os
+import pandas as pd
+import numpy as np
+def main(par):
+  degree = par['degree']/100
+  type = par['noise_type']
+
+
+  prediction = pd.read_csv(par['prediction'])
+
+
+  if type == 'weight': # add noise to weight
+    assert 'weight' in prediction.columns 
+    print('Add noise to weight')
+    std_dev = prediction['weight'].std()
+    noise = np.random.normal(loc=0, scale=degree * std_dev, size=prediction['weight'].shape)
+    prediction['weight'] += noise
+
+  elif type == 'net': # shuffle source-target matrix
+    print('Permute links')
+
+    # 1. Pivot the GRN with target as index and source as columns
+    pivot_df = prediction.pivot(index='target', columns='source', values='weight')
+
+    # Fill NaNs with 0 or a value of your choice
+    pivot_df.fillna(0, inplace=True)
+
+    # 2. Randomly choose degree% of the matrix to shuffle
+    matrix_flattened = pivot_df.values.flatten()
+    n_elements = len(matrix_flattened)
+    n_shuffle = int(n_elements * degree)
+
+    # Randomly select 20% of the matrix elements' indices
+    shuffle_indices = np.random.choice(n_elements, n_shuffle, replace=False)
+
+    # Get the values that will be shuffled
+    shuffle_values = matrix_flattened[shuffle_indices]
+
+    # 3. Shuffle the selected values
+    np.random.shuffle(shuffle_values)
+
+    # Assign the shuffled values back to the selected positions
+    matrix_flattened[shuffle_indices] = shuffle_values
+
+    # Reshape the flattened array back into the matrix
+    pivot_df_shuffled = pd.DataFrame(matrix_flattened.reshape(pivot_df.shape), 
+                                    index=pivot_df.index, 
+                                    columns=pivot_df.columns)
+
+    flat_df = pivot_df_shuffled.reset_index()
+
+    # Melt the DataFrame to turn it back into long-form (source-target-weight)
+    prediction = flat_df.melt(id_vars='target', var_name='source', value_name='weight')
+    prediction = prediction[prediction['weight'] !=0 ].reset_index(drop=True)
+  elif type == 'sign': # change the regulatory sign
+    num_rows = len(prediction)
+    num_to_modify = int(num_rows * degree)
+    # 2. Randomly select indices to modify
+    random_indices = np.random.choice(prediction.index, size=num_to_modify, replace=False)
+    # 3. Change the sign of the selected rows
+    prediction.loc[random_indices, 'weight'] *= -1
+  elif type == 'binary': # change the regulatory sign
+    prediction['weight'] = np.where(prediction['weight'] > 0, 1, -1)
+  else:
+    raise ValueError(f'Wrong type ({type}) for adding noise')
+  return prediction
diff --git a/src/robustness_analysis/permute_grn/script.py b/src/robustness_analysis/permute_grn/script.py
@@ -9,71 +9,8 @@
   'degree': 20,
   'noise_type': 'links'
 }
-
 ## VIASH END
-
-degree = par['degree']/100
-type = par['noise_type']
-
-
-prediction = pd.read_csv(par['prediction'])
-
-
-if type == 'weight': # add noise to weight
-  assert 'weight' in prediction.columns 
-  print('Add noise to weight')
-  std_dev = prediction['weight'].std()
-  noise = np.random.normal(loc=0, scale=degree * std_dev, size=prediction['weight'].shape)
-  prediction['weight'] += noise
-
-elif type == 'net': # shuffle source-target matrix
-  print('Permute links')
-
-  # 1. Pivot the GRN with target as index and source as columns
-  pivot_df = prediction.pivot(index='target', columns='source', values='weight')
-
-  # Fill NaNs with 0 or a value of your choice
-  pivot_df.fillna(0, inplace=True)
-
-  # 2. Randomly choose degree% of the matrix to shuffle
-  matrix_flattened = pivot_df.values.flatten()
-  n_elements = len(matrix_flattened)
-  n_shuffle = int(n_elements * degree)
-
-  # Randomly select 20% of the matrix elements' indices
-  shuffle_indices = np.random.choice(n_elements, n_shuffle, replace=False)
-
-  # Get the values that will be shuffled
-  shuffle_values = matrix_flattened[shuffle_indices]
-
-  # 3. Shuffle the selected values
-  np.random.shuffle(shuffle_values)
-
-  # Assign the shuffled values back to the selected positions
-  matrix_flattened[shuffle_indices] = shuffle_values
-
-  # Reshape the flattened array back into the matrix
-  pivot_df_shuffled = pd.DataFrame(matrix_flattened.reshape(pivot_df.shape), 
-                                  index=pivot_df.index, 
-                                  columns=pivot_df.columns)
-
-  flat_df = pivot_df_shuffled.reset_index()
-
-  # Melt the DataFrame to turn it back into long-form (source-target-weight)
-  prediction = flat_df.melt(id_vars='target', var_name='source', value_name='weight')
-  prediction = prediction[prediction['weight'] !=0 ].reset_index(drop=True)
-elif type == 'sign': # change the regulatory sign
-  num_rows = len(prediction)
-  num_to_modify = int(num_rows * degree)
-  # 2. Randomly select indices to modify
-  random_indices = np.random.choice(prediction.index, size=num_to_modify, replace=False)
-  # 3. Change the sign of the selected rows
-  prediction.loc[random_indices, 'weight'] *= -1
-elif type == 'binary': # change the regulatory sign
-  prediction['weight'] = np.where(prediction['weight'] > 0, 1, -1)
-else:
-  raise ValueError(f'Wrong type ({type}) for adding noise')
-
 print('Output noised GRN')
+prediction = main(par)
 prediction.to_csv(par['prediction_n'])