diff --git a/runs.ipynb b/runs.ipynb index 2b31db782..3c74734a5 100644 --- a/runs.ipynb +++ b/runs.ipynb @@ -175,14 +175,14 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Submitted batch job 7838763\n" + "Submitted batch job 7838786\n" ] } ], @@ -194,6 +194,86 @@ " calculate_scores()" ] }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "--2024-11-18 22:11:00-- https://drive.google.com/uc?export=download&id=1CPVtpWUJ2nkI9jGignlHLcefBe6Gk-F9\n", + "Resolving drive.google.com (drive.google.com)... 173.194.79.113, 173.194.79.139, 173.194.79.138, ...\n", + "Connecting to drive.google.com (drive.google.com)|173.194.79.113|:443... connected.\n", + "HTTP request sent, awaiting response... 303 See Other\n", + "Location: https://drive.usercontent.google.com/download?id=1CPVtpWUJ2nkI9jGignlHLcefBe6Gk-F9&export=download [following]\n", + "--2024-11-18 22:11:00-- https://drive.usercontent.google.com/download?id=1CPVtpWUJ2nkI9jGignlHLcefBe6Gk-F9&export=download\n", + "Resolving drive.usercontent.google.com (drive.usercontent.google.com)... 173.194.69.132, 2a00:1450:4013:c04::84\n", + "Connecting to drive.usercontent.google.com (drive.usercontent.google.com)|173.194.69.132|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 2425 (2.4K) [text/html]\n", + "Saving to: ‘output/best_model.pt’\n", + "\n", + " 0K .. 100% 44.8M=0s\n", + "\n", + "2024-11-18 22:11:00 (44.8 MB/s) - ‘output/best_model.pt’ saved [2425/2425]\n", + "\n", + "--2024-11-18 22:11:00-- https://drive.google.com/file/d/1Qzb6Y9UB342a2QxmY-BCubSvcmYZ5jw3/view?usp=drive_link\n", + "Resolving drive.google.com (drive.google.com)... 173.194.79.139, 173.194.79.138, 173.194.79.102, ...\n", + "Connecting to drive.google.com (drive.google.com)|173.194.79.139|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: unspecified [text/html]\n", + "Saving to: ‘output/vocab.json’\n", + "\n", + " 0K .......... .......... .......... .......... .......... 2.51M\n", + " 50K .......... .......... .......... .......... . 5.45M=0.03s\n", + "\n", + "2024-11-18 22:11:01 (3.32 MB/s) - ‘output/vocab.json’ saved [93749]\n", + "\n", + "--2024-11-18 22:11:01-- https://drive.google.com/file/d/1VwPGHuSorVAXyTreMFI1yzMougtUDeUt/view?usp=drive_link\n", + "Resolving drive.google.com (drive.google.com)... 173.194.79.138, 173.194.79.102, 173.194.79.100, ...\n", + "Connecting to drive.google.com (drive.google.com)|173.194.79.138|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: unspecified [text/html]\n", + "Saving to: ‘output/args.json’\n", + "\n", + " 0K .......... .......... .......... .......... .......... 2.49M\n", + " 50K .......... .......... .......... .......... . 5.16M=0.03s\n", + "\n", + "2024-11-18 22:11:01 (3.25 MB/s) - ‘output/args.json’ saved [93798]\n", + "\n" + ] + }, + { + "data": { + "text/plain": [ + "CompletedProcess(args=\"wget --no-check-certificate 'https://drive.google.com/file/d/1VwPGHuSorVAXyTreMFI1yzMougtUDeUt/view?usp=drive_link' -O output/args.json\", returncode=0)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "par = {'temp_dir': 'output'}\n", + "\n", + "par['model_file'] = f\"{par['temp_dir']}/best_model.pt\"\n", + "par['model_config_file'] = f\"{par['temp_dir']}/args.json\"\n", + "par['vocab_file'] = f\"{par['temp_dir']}/vocab.json\"\n", + "\n", + "\n", + "command = f\"wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=1CPVtpWUJ2nkI9jGignlHLcefBe6Gk-F9' -O {par['model_file']}\"\n", + "subprocess.run(command, shell=True, check=True)\n", + "\n", + "command = f\"wget --no-check-certificate 'https://drive.google.com/file/d/1Qzb6Y9UB342a2QxmY-BCubSvcmYZ5jw3/view?usp=drive_link' -O {par['vocab_file']}\"\n", + "subprocess.run(command, shell=True, check=True)\n", + "\n", + "command = f\"wget --no-check-certificate 'https://drive.google.com/file/d/1VwPGHuSorVAXyTreMFI1yzMougtUDeUt/view?usp=drive_link' -O {par['model_config_file']}\"\n", + "subprocess.run(command, shell=True, check=True)\n" + ] + }, { "cell_type": "code", "execution_count": 19, diff --git a/src/methods/single_omics/scgpt/config.novsh.yaml b/src/methods/single_omics/scgpt/config.vsh.yaml similarity index 56% rename from src/methods/single_omics/scgpt/config.novsh.yaml rename to src/methods/single_omics/scgpt/config.vsh.yaml index 2ec20365b..53abd4826 100644 --- a/src/methods/single_omics/scgpt/config.novsh.yaml +++ b/src/methods/single_omics/scgpt/config.vsh.yaml @@ -10,21 +10,6 @@ functionality: GRN inference using scGPT. documentation_url: https://github.com/bowang-lab/scGPT/blob/main/tutorials/Tutorial_Attention_GRN.ipynb arguments: - - name: --model_file - type: file - direction: input - example: resources_test/supplementary/finetuned_scGPT_adamson/best_model.pt - default: resources_test/supplementary/finetuned_scGPT_adamson/best_model.pt - - name: --model_config_file - type: file - direction: input - example: resources_test/supplementary/finetuned_scGPT_adamson/args.json - default: resources_test/supplementary/finetuned_scGPT_adamson/args.json - - name: --vocab_file - type: file - direction: input - example: resources_test/supplementary/finetuned_scGPT_adamson/vocab.json - default: resources_test/supplementary/finetuned_scGPT_adamson/vocab.json - name: --n_bins type: integer direction: input diff --git a/src/methods/single_omics/scgpt/script.py b/src/methods/single_omics/scgpt/script.py index d251fdd24..95dec679d 100644 --- a/src/methods/single_omics/scgpt/script.py +++ b/src/methods/single_omics/scgpt/script.py @@ -4,6 +4,7 @@ from pathlib import Path import sys import warnings +import subprocess import torch from anndata import AnnData @@ -57,6 +58,22 @@ } ## VIASH END +# Download datasets +par['model_file'] = f"{par['temp_dir']}/best_model.pt" +par['model_config_file'] = f"{par['temp_dir']}/args.json" +par['vocab_file'] = f"{par['temp_dir']}/vocab.json" + + +command = f"wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=1CPVtpWUJ2nkI9jGignlHLcefBe6Gk-F9' -O {par['model_file']}" +subprocess.run(command, shell=True, check=True) + +command = f"wget --no-check-certificate 'https://drive.google.com/file/d/1Qzb6Y9UB342a2QxmY-BCubSvcmYZ5jw3/view?usp=drive_link' -O {par['vocab_file']}" +subprocess.run(command, shell=True, check=True) + +command = f"wget --no-check-certificate 'https://drive.google.com/file/d/1VwPGHuSorVAXyTreMFI1yzMougtUDeUt/view?usp=drive_link' -O {par['model_config_file']}" +subprocess.run(command, shell=True, check=True) + + # os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:50" initial_memory = torch.cuda.memory_allocated() def monitor_memory(): diff --git a/src/metrics/script_all.py b/src/metrics/script_all.py index 2c2a9809b..521b77789 100644 --- a/src/metrics/script_all.py +++ b/src/metrics/script_all.py @@ -16,7 +16,7 @@ def define_par(dataset): raise ValueError('define first') par = { - 'reg_type': 'ridge', + 'reg_type': 'GB', 'models_dir': f"resources/grn_models/{dataset}", 'scores_dir': f"output/temp/{dataset}", @@ -77,7 +77,7 @@ def define_par(dataset): global_models = False # - run metrics -for dataset in ['norman', 'adamson']: #'replogle2', 'nakatake', norman +for dataset in ['op','replogle2', 'nakatake', 'norman', 'adamson']: #'replogle2', 'nakatake', norman print('------ ', dataset, '------') par = define_par(dataset) os.makedirs(par['scores_dir'], exist_ok=True) diff --git a/src/process_data/perturbation/batch_correction_evaluation/config.vsh.yaml b/src/process_data/perturbation/batch_correction_evaluation/config.novsh.yaml similarity index 100% rename from src/process_data/perturbation/batch_correction_evaluation/config.vsh.yaml rename to src/process_data/perturbation/batch_correction_evaluation/config.novsh.yaml diff --git a/src/process_data/perturbation/batch_correction_scgen/config.vsh.yaml b/src/process_data/perturbation/batch_correction_scgen/config.novsh.yaml similarity index 100% rename from src/process_data/perturbation/batch_correction_scgen/config.vsh.yaml rename to src/process_data/perturbation/batch_correction_scgen/config.novsh.yaml diff --git a/src/process_data/perturbation/batch_correction_seurat/config.vsh.yaml b/src/process_data/perturbation/batch_correction_seurat/config.novsh.yaml similarity index 100% rename from src/process_data/perturbation/batch_correction_seurat/config.vsh.yaml rename to src/process_data/perturbation/batch_correction_seurat/config.novsh.yaml diff --git a/src/workflows/process_perturbation/config.vsh.yaml b/src/workflows/process_perturbation/config.vsh.yaml index 370169604..75f5273e2 100644 --- a/src/workflows/process_perturbation/config.vsh.yaml +++ b/src/workflows/process_perturbation/config.vsh.yaml @@ -12,10 +12,10 @@ functionality: type: file required: true direction: input - default: resources/datasets_raw/perturbation_counts.h5ad + example: resources_test/datasets_raw/op_perturbation_counts.h5ad description: single cell perturbation data - - name: --perturbation_data_bc + - name: --perturbation_data_n __merge__: ../../api/file_evaluation_h5ad.yaml required: false direction: output @@ -28,8 +28,8 @@ functionality: dependencies: - name: perturbation/sc_counts - name: perturbation/normalization - - name: perturbation/batch_correction_scgen - - name: perturbation/batch_correction_seurat + # - name: perturbation/batch_correction_scgen + # - name: perturbation/batch_correction_seurat platforms: - type: nextflow diff --git a/src/workflows/process_perturbation/main.nf b/src/workflows/process_perturbation/main.nf index e109f4641..45a1e4971 100644 --- a/src/workflows/process_perturbation/main.nf +++ b/src/workflows/process_perturbation/main.nf @@ -14,18 +14,9 @@ workflow run_wf { fromState: [pseudobulked_data_f: "pseudobulked_data_f"], toState: [perturbation_data_n: "perturbation_data_n"] ) - - | batch_correction_scgen.run( - fromState: [perturbation_data_n: "perturbation_data_n"], - toState: [perturbation_data_bc: "perturbation_data_bc"] - ) - | batch_correction_seurat.run( - fromState: [perturbation_data_n: "perturbation_data_bc"], - toState: [perturbation_data_bc: "perturbation_data_bc"] - ) - | setState(["perturbation_data_bc"]) + | setState(["perturbation_data_n"]) emit: output_ch diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml index a47948402..33887f28b 100644 --- a/src/workflows/run_benchmark/config.vsh.yaml +++ b/src/workflows/run_benchmark/config.vsh.yaml @@ -9,13 +9,13 @@ functionality: argument_groups: - name: Inputs arguments: - - name: --multiomics_rna + - name: --rna type: file direction: input - - name: --multiomics_atac + - name: --atac type: file direction: input - - name: --perturbation_data + - name: --evaluation_data type: file direction: input - name: --prediction @@ -25,7 +25,7 @@ functionality: - name: --subsample type: integer direction: input - default: 2 + default: -1 - name: --reg_type type: string direction: input @@ -49,15 +49,7 @@ functionality: required: false direction: input default: pearson - - name: --cell_type_specific - type: boolean - required: false - direction: input - default: true - - name: --normalize - type: boolean - required: false - direction: input + - name: Outputs arguments: - name: "--scores" @@ -96,10 +88,11 @@ functionality: - name: grn_methods/portia - name: grn_methods/grnboost2 - name: grn_methods/scenic + # - name: grn_methods/genie3 - name: grn_methods/ppcor #needs docker image - # - name: grn_methods/scgpt + - name: grn_methods/scgpt # ---- multiomics - name: grn_methods/celloracle diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf index 670554b67..5bf7a4621 100644 --- a/src/workflows/run_benchmark/main.nf +++ b/src/workflows/run_benchmark/main.nf @@ -18,6 +18,7 @@ workflow run_wf { grnboost2, ppcor, scenic, + scglue, pearson_corr, negative_control, @@ -77,12 +78,9 @@ workflow run_wf { }, // use 'fromState' to fetch the arguments the component requires from the overall state fromState: [ - multiomics_rna: "multiomics_rna", - multiomics_atac: "multiomics_atac", + rna: "rna", + atac: "atac", tf_all: "tf_all", - perturbation_data:"perturbation_data", - cell_type_specific:"cell_type_specific", - normalize:"normalize", num_workers:"num_workers" ], @@ -107,7 +105,7 @@ workflow run_wf { }, // use 'fromState' to fetch the arguments the component requires from the overall state fromState: [ - perturbation_data: "perturbation_data", + evaluation_data: "evaluation_data", prediction: "prediction", method_id: "method_id", subsample: "subsample", @@ -116,7 +114,6 @@ workflow run_wf { consensus: "consensus", tf_all: "tf_all", layer:"layer", - cell_type_specific:"cell_type_specific" ], // use 'toState' to publish that component's outputs to the overall state toState: { id, output, state, comp -> diff --git a/src/workflows/run_robustness_analysis_causal/config.novsh.yaml b/src/workflows/run_robustness_analysis_causal/config.novsh.yaml deleted file mode 100644 index be89033a5..000000000 --- a/src/workflows/run_robustness_analysis_causal/config.novsh.yaml +++ /dev/null @@ -1,94 +0,0 @@ -functionality: - name: run_robustness_analysis_causal - namespace: "workflows" - info: - label: run_robustness_analysis_causal - summary: "Evaluates GRNs and provides scores using regression analysis." - argument_groups: - - name: Inputs - arguments: - - name: --multiomics_rna - type: file - direction: input - - name: --perturbation_data - type: file - direction: input - - name: --layer - type: string - direction: input - - name: --subsample - type: integer - direction: input - default: 200 - - name: --reg_type - type: string - direction: input - default: ridge - - name: --method_id - type: string - direction: input - required: True - example: collectri - - name: --num_workers - type: integer - direction: input - required: True - - name: --consensus - type: file - required: false - direction: input - default: resources/prior/consensus.json - - name: --tf_all - type: file - required: false - direction: input - - name: --causal - type: boolean - required: false - direction: input - - name: --seed - type: integer - required: false - direction: input - - - - name: Outputs - arguments: - - name: "--scores" - type: file - required: true - direction: output - default: "scores.yaml" - - name: "--metric_configs" - type: file - required: true - direction: output - default: metric_configs.yaml - - name: Arguments - arguments: - - name: "--metric_ids" - type: string - multiple: true - description: A list of metric ids to run. If not specified, all metric will be run. - - resources: - - type: nextflow_script - path: main.nf - entrypoint: run_wf - - type: file - path: ../../api/task_info.yaml - dependencies: - - name: common/extract_metadata - repository: openproblems - - name: metrics/regression_1 - - name: metrics/regression_2 - - name: control_methods/baseline_corr - repositories: - - name: openproblems - type: github - repo: openproblems-bio/openproblems - tag: v2.0.0 -platforms: - - type: nextflow - directives: - label: [ midtime, midmem, lowcpu ] diff --git a/src/workflows/run_robustness_analysis_causal/main.nf b/src/workflows/run_robustness_analysis_causal/main.nf deleted file mode 100644 index aaf3a1275..000000000 --- a/src/workflows/run_robustness_analysis_causal/main.nf +++ /dev/null @@ -1,118 +0,0 @@ - -workflow auto { - findStatesTemp(params, meta.config) - | meta.workflow.run( - auto: [publish: "state"] - ) -} - -workflow run_wf { - take: - input_ch - - main: - - // construct list of metrics - metrics = [ - regression_1, - regression_2 - ] - - /*************************** - * RUN METRICS * - ***************************/ - score_ch = input_ch - | map{ id, state -> - [id, state + ["_meta": [join_id: id]]] - } - - | baseline_corr.run( - fromState: [ - multiomics_rna: "multiomics_rna", - tf_all: "tf_all", - causal:"causal" - ], - toState: {id, output, state -> - state + [ - prediction: output.prediction - ] - } - ) - - // run all metrics - | runEach( - components: metrics, - filter: { id, state, comp -> - !state.metric_ids || state.metric_ids.contains(comp.config.functionality.name) - }, - id: { id, state, comp -> - id + "." + comp.config.functionality.name - }, - // use 'fromState' to fetch the arguments the component requires from the overall state - fromState: [ - perturbation_data: "perturbation_data", - prediction: "prediction", - layer: "layer", - subsample: "subsample", - reg_type: "reg_type", - method_id: "method_id", - num_workers: "num_workers", - consensus: "consensus", - tf_all: "tf_all" - ], - // use 'toState' to publish that component's outputs to the overall state - toState: { id, output, state, comp -> - state + [ - metric_id: comp.config.functionality.name, - metric_output: output.score - ] - } - ) - - output_ch = score_ch - - // extract the scores - | extract_metadata.run( - key: "extract_scores", - fromState: [input: "metric_output"], - toState: { id, output, state -> - state + [ - score_uns: readYaml(output.output).uns - ] - } - ) - - | joinStates { ids, states -> - assert states[0]._meta, "no _meta found in state[0]" - // store the metric configs in a file - def metric_configs = metrics.collect{it.config} - def metric_configs_yaml_blob = toYamlBlob(metric_configs) - def metric_configs_file = tempFile("metric_configs.yaml") - metric_configs_file.write(metric_configs_yaml_blob) - - def task_info_file = meta.resources_dir.resolve("task_info.yaml") - - // store the scores in a file - def score_uns = states.collect{it.score_uns} - def score_uns_yaml_blob = toYamlBlob(score_uns) - def score_uns_file = tempFile("score_uns.yaml") - score_uns_file.write(score_uns_yaml_blob) - - def new_state = [ - metric_configs: metric_configs_file, - scores: score_uns_file, - _meta: states[0]._meta - ] - - ["output", new_state] - } - - // merge all of the output data - | joinStates{ ids, states -> - def mergedStates = states.inject([:]) { acc, m -> acc + m } - [ids[0], mergedStates] - } - - emit: - output_ch -}