scglue and granie updated

openproblems-bio · Aug 22, 2024 · f98576c · f98576c
1 parent 7a90f77
commit f98576c
Show file tree

Hide file tree

Showing 8 changed files with 75 additions and 50 deletions.
diff --git a/scripts/run_process_perturbation_tw.sh b/scripts/run_process_perturbation_tw.sh
@@ -13,20 +13,16 @@ output_state: "state.yaml"
 publish_dir: "$publish_dir"
 HERE
 
-# ./tw-windows-x86_64.exe launch openproblems-bio/task_grn_benchmark \
-#   --revision build/main \
-#   --pull-latest \
-#   --main-script target/nextflow/workflows/process_perturbation/main.nf \
-#   --workspace 53907369739130 \
-#   --compute-env 6TeIFgV5OY4pJCk8I0bfOh \
-#   --params-file /tmp/params.yaml \
-#   --config src/common/nextflow_helpers/labels_tw.config
-
-
 
   ./tw-windows-x86_64.exe launch  https://github.com/openproblems-bio/task_grn_benchmark.git `
      --revision build/main --pull-latest `
      --main-script target/nextflow/workflows/process_perturbation/main.nf `
      --workspace 53907369739130 --compute-env 6TeIFgV5OY4pJCk8I0bfOh `
      --params-file ./params/process_perturbation.yaml `
-     --config src/common/nextflow_helpers/labels_tw.config
+     --config src/common/nextflow_helpers/labels_tw.config
+
+
+nextflow run .   \
+  -main-script  target/nextflow/workflows/grn_inference_granie/main.nf  \
+  -profile docker     -with-trace     -c src/common/nextflow_helpers/labels_ci.config  \
+  -params-file params/granie.yaml
diff --git a/src/methods/multi_omics/granie_ns/config.vsh.yaml b/src/methods/multi_omics/granie_ns/config.vsh.yaml
@@ -1,5 +1,5 @@
 
-__merge__: ../../../api/comp_method.yaml
+__merge__: ../../../api/comp_method_r.yaml
 
 functionality:
   name: grn_inference_granie

diff --git a/src/methods/multi_omics/granie_ns/main.nf b/src/methods/multi_omics/granie_ns/main.nf
@@ -7,8 +7,8 @@ workflow run_wf {
 
     | granie.run(
       fromState: [
-              multiomics_rna: "multiomics_rna",
-              multiomics_atac: "multiomics_atac",
+              multiomics_rna_r: "multiomics_rna_r",
+              multiomics_ata_r: "multiomics_ata_r",
               num_workers: "num_workers"
               ],
       toState: [prediction:"prediction"]

diff --git a/src/methods/multi_omics/scenicplus/script.py b/src/methods/multi_omics/scenicplus/script.py
@@ -22,12 +22,12 @@
   'multiomics_rna': 'resources/grn-benchmark/multiomics_rna.h5ad',
   'multiomics_atac': 'resources/grn-benchmark/multiomics_atac.h5ad',
   'cistopic_out': 'output/pycistopic',
-  'out_dir': 'output/scenicplus',
+  'temp_dir': 'output/scenicplus',
   'prediction': 'output/prediction.csv',
 }
 ## VIASH END
 
-work_dir = par['out_dir']
+work_dir = par['temp_dir']
 os.makedirs(os.path.join(work_dir, 'scRNA'), exist_ok=True)
 
 # Download databases
@@ -119,7 +119,7 @@ def download_checksum(url: str, filepath: str) -> str:
 # Init scenicplus pipeline
 os.makedirs(os.path.join(work_dir, 'scplus_pipeline'), exist_ok=True)
 os.makedirs(os.path.join(work_dir, 'scplus_pipeline', 'temp'), exist_ok=True)
-subprocess.run(['scenicplus', 'init_snakemake', '--out_dir', os.path.join(work_dir, 'scplus_pipeline')])
+subprocess.run(['scenicplus', 'init_snakemake', '--temp_dir', os.path.join(work_dir, 'scplus_pipeline')])
 
 # Load pipeline settings
 with open(os.path.join(work_dir, 'scplus_pipeline', 'Snakemake', 'config', 'config.yaml'), 'r') as f:

diff --git a/src/methods/multi_omics/scglue/config.vsh.yaml b/src/methods/multi_omics/scglue/config.vsh.yaml
@@ -41,4 +41,4 @@ platforms:
   - type: native
   - type: nextflow
     directives:
-      label: [hightime,midmem,midcpu]
+      label: [veryhightime,midmem,midcpu]
diff --git a/src/methods/multi_omics/scglue_ns/config.vsh.yaml b/src/methods/multi_omics/scglue_ns/config.vsh.yaml
@@ -26,4 +26,4 @@ functionality:
 platforms:
   - type: nextflow
     directives:
-      label: [ hightime, midmem, highcpu ]
+      label: [ veryhightime, midmem, highcpu ]
diff --git a/src/metrics/regression_1/script_all.py b/src/metrics/regression_1/script_all.py
@@ -12,8 +12,10 @@
   'reg_type': 'ridge',
   'subsample': -2,
   "tf_all":  "./resources/prior/tf_all.csv",
-  "temp_dir": "output"
+  "temp_dir": "output/ridge/noised"
 }
+grn_models_folder = 'resources/grn_models'
+grn_models_folder = 'resources/supplementary/grn_models_noised'
 
 def create_positive_control(X: np.ndarray, groups: np.ndarray):
     grns = []
@@ -42,9 +44,11 @@ def create_negative_control(gene_names) -> np.ndarray:
 }
 sys.path.append(meta["resources_dir"])
 from main import main 
-layers = ['pearson', 'lognorm', 'scgen_pearson', 'scgen_lognorm', 'seurat_lognorm', 'seurat_pearson']
+# layers = ['pearson', 'lognorm', 'scgen_pearson', 'scgen_lognorm', 'seurat_lognorm', 'seurat_pearson']
+layers = ['pearson']
 grn_models = ['scenicplus', 'celloracle', 'figr', 'granie', 'scglue', 'collectri']
-controls = ['negative_control', 'positive_control']
+# controls = ['negative_control', 'positive_control']
+controls = []
 
 os.makedirs(par['temp_dir'], exist_ok=True)
 for grn_model in controls + grn_models :
@@ -76,18 +80,18 @@ def create_negative_control(gene_names) -> np.ndarray:
       par['prediction'] = f"{par['temp_dir']}/negative_control.csv"
       pivoted_net.to_csv(par['prediction'])
     else:
-      par['prediction'] = f"resources/grn_models/{grn_model}.csv"
-    # output = main(par) 
-    # output.index = [layer]
+      par['prediction'] = f"{grn_models_folder}/{grn_model}.csv"
+    output = main(par) 
+    output.index = [layer]
 
-    # if ii == 0:
-    #   score = output
-    # else:
-    #   score = pd.concat([score, output], axis=0)
+    if ii == 0:
+      score = output
+    else:
+      score = pd.concat([score, output], axis=0)
 
-    # print("Write output to file", flush=True)
-    # print(grn_model, layer, score)
+    print("Write output to file", flush=True)
+    print(grn_model, layer, score)
 
-  # print("Write output to file", flush=True)
-  # score.to_csv(par['score'])
+  print("Write output to file", flush=True)
+  score.to_csv(par['score'])
 
diff --git a/src/robustness_analysis/add_noise_grn.py b/src/robustness_analysis/add_noise_grn.py
@@ -6,25 +6,50 @@
 grn_folder = 'resources/grn_models'
 grn_folder_noised = 'resources/supplementary/grn_models_noised'
 noise_ratio = 0.2
+# permute_ratio = 0.2
 
 # Ensure the output folder exists
 os.makedirs(grn_folder_noised, exist_ok=True)
 
+if True: # add noise
+    # Loop through all files in the grn_folder
+    for file_name in os.listdir(grn_folder):
+        if file_name.endswith('.csv'):
+            # Read the CSV file
+            file_path = os.path.join(grn_folder, file_name)
+            df = pd.read_csv(file_path)
+
+            # Add noise to the 'weight' column
+            if 'weight' in df.columns:
+                std_dev = df['weight'].std()
+                noise = np.random.normal(0, noise_ratio * std_dev, size=df['weight'].shape)
+                df['weight'] += noise
+
+            # Save the noised DataFrame to the new folder
+            noised_file_path = os.path.join(grn_folder_noised, file_name)
+            df.to_csv(noised_file_path, index=False)
+
+    print("Noise added to all GRN models and saved successfully.")
 # Loop through all files in the grn_folder
-for file_name in os.listdir(grn_folder):
-    if file_name.endswith('.csv'):
-        # Read the CSV file
-        file_path = os.path.join(grn_folder, file_name)
-        df = pd.read_csv(file_path)
-
-        # Add noise to the 'weight' column
-        if 'weight' in df.columns:
-            std_dev = df['weight'].std()
-            noise = np.random.normal(0, noise_ratio * std_dev, size=df['weight'].shape)
-            df['weight'] += noise
-
-        # Save the noised DataFrame to the new folder
-        noised_file_path = os.path.join(grn_folder_noised, file_name)
-        df.to_csv(noised_file_path, index=False)
-
-print("Noise added to all GRN models and saved successfully.")
+else:
+    for file_name in os.listdir(grn_folder):
+        if file_name.endswith('.csv'):
+            # Read the CSV file
+            file_path = os.path.join(grn_folder, file_name)
+            df = pd.read_csv(file_path)
+
+            # Permute 20% of the rows in the 'weight' column
+            if 'weight' in df.columns:
+                num_rows_to_permute = int(len(df) * permute_ratio)
+
+                # Randomly select 20% of the row indices to permute
+                permute_indices = np.random.choice(df.index, size=num_rows_to_permute, replace=False)
+
+                # Shuffle the selected rows in 'weight' column
+                df.loc[permute_indices, 'weight'] = np.random.permutation(df.loc[permute_indices, 'weight'].values)
+
+            # Save the modified DataFrame to the new folder
+            noised_file_path = os.path.join(grn_folder_noised, file_name)
+            df.to_csv(noised_file_path, index=False)
+
+    print("20% of the 'weight' column rows have been permuted for all GRN models and saved successfully.")