diff --git a/scripts/run_process_perturbation_tw.sh b/scripts/run_process_perturbation_tw.sh index 6b5c151d0..9b3c9c5f9 100644 --- a/scripts/run_process_perturbation_tw.sh +++ b/scripts/run_process_perturbation_tw.sh @@ -13,20 +13,16 @@ output_state: "state.yaml" publish_dir: "$publish_dir" HERE -# ./tw-windows-x86_64.exe launch openproblems-bio/task_grn_benchmark \ -# --revision build/main \ -# --pull-latest \ -# --main-script target/nextflow/workflows/process_perturbation/main.nf \ -# --workspace 53907369739130 \ -# --compute-env 6TeIFgV5OY4pJCk8I0bfOh \ -# --params-file /tmp/params.yaml \ -# --config src/common/nextflow_helpers/labels_tw.config - - ./tw-windows-x86_64.exe launch https://github.com/openproblems-bio/task_grn_benchmark.git ` --revision build/main --pull-latest ` --main-script target/nextflow/workflows/process_perturbation/main.nf ` --workspace 53907369739130 --compute-env 6TeIFgV5OY4pJCk8I0bfOh ` --params-file ./params/process_perturbation.yaml ` - --config src/common/nextflow_helpers/labels_tw.config \ No newline at end of file + --config src/common/nextflow_helpers/labels_tw.config + + +nextflow run . \ + -main-script target/nextflow/workflows/grn_inference_granie/main.nf \ + -profile docker -with-trace -c src/common/nextflow_helpers/labels_ci.config \ + -params-file params/granie.yaml \ No newline at end of file diff --git a/src/methods/multi_omics/granie_ns/config.vsh.yaml b/src/methods/multi_omics/granie_ns/config.vsh.yaml index 187ef77af..d32a2a264 100644 --- a/src/methods/multi_omics/granie_ns/config.vsh.yaml +++ b/src/methods/multi_omics/granie_ns/config.vsh.yaml @@ -1,5 +1,5 @@ -__merge__: ../../../api/comp_method.yaml +__merge__: ../../../api/comp_method_r.yaml functionality: name: grn_inference_granie diff --git a/src/methods/multi_omics/granie_ns/main.nf b/src/methods/multi_omics/granie_ns/main.nf index b4c4c9bdc..ce360506f 100644 --- a/src/methods/multi_omics/granie_ns/main.nf +++ b/src/methods/multi_omics/granie_ns/main.nf @@ -7,8 +7,8 @@ workflow run_wf { | granie.run( fromState: [ - multiomics_rna: "multiomics_rna", - multiomics_atac: "multiomics_atac", + multiomics_rna_r: "multiomics_rna_r", + multiomics_ata_r: "multiomics_ata_r", num_workers: "num_workers" ], toState: [prediction:"prediction"] diff --git a/src/methods/multi_omics/scenicplus/script.py b/src/methods/multi_omics/scenicplus/script.py index 2bb347b3a..9131d8763 100644 --- a/src/methods/multi_omics/scenicplus/script.py +++ b/src/methods/multi_omics/scenicplus/script.py @@ -22,12 +22,12 @@ 'multiomics_rna': 'resources/grn-benchmark/multiomics_rna.h5ad', 'multiomics_atac': 'resources/grn-benchmark/multiomics_atac.h5ad', 'cistopic_out': 'output/pycistopic', - 'out_dir': 'output/scenicplus', + 'temp_dir': 'output/scenicplus', 'prediction': 'output/prediction.csv', } ## VIASH END -work_dir = par['out_dir'] +work_dir = par['temp_dir'] os.makedirs(os.path.join(work_dir, 'scRNA'), exist_ok=True) # Download databases @@ -119,7 +119,7 @@ def download_checksum(url: str, filepath: str) -> str: # Init scenicplus pipeline os.makedirs(os.path.join(work_dir, 'scplus_pipeline'), exist_ok=True) os.makedirs(os.path.join(work_dir, 'scplus_pipeline', 'temp'), exist_ok=True) -subprocess.run(['scenicplus', 'init_snakemake', '--out_dir', os.path.join(work_dir, 'scplus_pipeline')]) +subprocess.run(['scenicplus', 'init_snakemake', '--temp_dir', os.path.join(work_dir, 'scplus_pipeline')]) # Load pipeline settings with open(os.path.join(work_dir, 'scplus_pipeline', 'Snakemake', 'config', 'config.yaml'), 'r') as f: diff --git a/src/methods/multi_omics/scglue/config.vsh.yaml b/src/methods/multi_omics/scglue/config.vsh.yaml index 5724c7723..b57e4b156 100644 --- a/src/methods/multi_omics/scglue/config.vsh.yaml +++ b/src/methods/multi_omics/scglue/config.vsh.yaml @@ -41,4 +41,4 @@ platforms: - type: native - type: nextflow directives: - label: [hightime,midmem,midcpu] + label: [veryhightime,midmem,midcpu] diff --git a/src/methods/multi_omics/scglue_ns/config.vsh.yaml b/src/methods/multi_omics/scglue_ns/config.vsh.yaml index 0e406c0f0..2bee22a97 100644 --- a/src/methods/multi_omics/scglue_ns/config.vsh.yaml +++ b/src/methods/multi_omics/scglue_ns/config.vsh.yaml @@ -26,4 +26,4 @@ functionality: platforms: - type: nextflow directives: - label: [ hightime, midmem, highcpu ] + label: [ veryhightime, midmem, highcpu ] diff --git a/src/metrics/regression_1/script_all.py b/src/metrics/regression_1/script_all.py index 9af3cfe67..58e1d499b 100644 --- a/src/metrics/regression_1/script_all.py +++ b/src/metrics/regression_1/script_all.py @@ -12,8 +12,10 @@ 'reg_type': 'ridge', 'subsample': -2, "tf_all": "./resources/prior/tf_all.csv", - "temp_dir": "output" + "temp_dir": "output/ridge/noised" } +grn_models_folder = 'resources/grn_models' +grn_models_folder = 'resources/supplementary/grn_models_noised' def create_positive_control(X: np.ndarray, groups: np.ndarray): grns = [] @@ -42,9 +44,11 @@ def create_negative_control(gene_names) -> np.ndarray: } sys.path.append(meta["resources_dir"]) from main import main -layers = ['pearson', 'lognorm', 'scgen_pearson', 'scgen_lognorm', 'seurat_lognorm', 'seurat_pearson'] +# layers = ['pearson', 'lognorm', 'scgen_pearson', 'scgen_lognorm', 'seurat_lognorm', 'seurat_pearson'] +layers = ['pearson'] grn_models = ['scenicplus', 'celloracle', 'figr', 'granie', 'scglue', 'collectri'] -controls = ['negative_control', 'positive_control'] +# controls = ['negative_control', 'positive_control'] +controls = [] os.makedirs(par['temp_dir'], exist_ok=True) for grn_model in controls + grn_models : @@ -76,18 +80,18 @@ def create_negative_control(gene_names) -> np.ndarray: par['prediction'] = f"{par['temp_dir']}/negative_control.csv" pivoted_net.to_csv(par['prediction']) else: - par['prediction'] = f"resources/grn_models/{grn_model}.csv" - # output = main(par) - # output.index = [layer] + par['prediction'] = f"{grn_models_folder}/{grn_model}.csv" + output = main(par) + output.index = [layer] - # if ii == 0: - # score = output - # else: - # score = pd.concat([score, output], axis=0) + if ii == 0: + score = output + else: + score = pd.concat([score, output], axis=0) - # print("Write output to file", flush=True) - # print(grn_model, layer, score) + print("Write output to file", flush=True) + print(grn_model, layer, score) - # print("Write output to file", flush=True) - # score.to_csv(par['score']) + print("Write output to file", flush=True) + score.to_csv(par['score']) diff --git a/src/robustness_analysis/add_noise_grn.py b/src/robustness_analysis/add_noise_grn.py index f475607cc..ad43332bf 100644 --- a/src/robustness_analysis/add_noise_grn.py +++ b/src/robustness_analysis/add_noise_grn.py @@ -6,25 +6,50 @@ grn_folder = 'resources/grn_models' grn_folder_noised = 'resources/supplementary/grn_models_noised' noise_ratio = 0.2 +# permute_ratio = 0.2 # Ensure the output folder exists os.makedirs(grn_folder_noised, exist_ok=True) +if True: # add noise + # Loop through all files in the grn_folder + for file_name in os.listdir(grn_folder): + if file_name.endswith('.csv'): + # Read the CSV file + file_path = os.path.join(grn_folder, file_name) + df = pd.read_csv(file_path) + + # Add noise to the 'weight' column + if 'weight' in df.columns: + std_dev = df['weight'].std() + noise = np.random.normal(0, noise_ratio * std_dev, size=df['weight'].shape) + df['weight'] += noise + + # Save the noised DataFrame to the new folder + noised_file_path = os.path.join(grn_folder_noised, file_name) + df.to_csv(noised_file_path, index=False) + + print("Noise added to all GRN models and saved successfully.") # Loop through all files in the grn_folder -for file_name in os.listdir(grn_folder): - if file_name.endswith('.csv'): - # Read the CSV file - file_path = os.path.join(grn_folder, file_name) - df = pd.read_csv(file_path) - - # Add noise to the 'weight' column - if 'weight' in df.columns: - std_dev = df['weight'].std() - noise = np.random.normal(0, noise_ratio * std_dev, size=df['weight'].shape) - df['weight'] += noise - - # Save the noised DataFrame to the new folder - noised_file_path = os.path.join(grn_folder_noised, file_name) - df.to_csv(noised_file_path, index=False) - -print("Noise added to all GRN models and saved successfully.") +else: + for file_name in os.listdir(grn_folder): + if file_name.endswith('.csv'): + # Read the CSV file + file_path = os.path.join(grn_folder, file_name) + df = pd.read_csv(file_path) + + # Permute 20% of the rows in the 'weight' column + if 'weight' in df.columns: + num_rows_to_permute = int(len(df) * permute_ratio) + + # Randomly select 20% of the row indices to permute + permute_indices = np.random.choice(df.index, size=num_rows_to_permute, replace=False) + + # Shuffle the selected rows in 'weight' column + df.loc[permute_indices, 'weight'] = np.random.permutation(df.loc[permute_indices, 'weight'].values) + + # Save the modified DataFrame to the new folder + noised_file_path = os.path.join(grn_folder_noised, file_name) + df.to_csv(noised_file_path, index=False) + + print("20% of the 'weight' column rows have been permuted for all GRN models and saved successfully.") \ No newline at end of file