diff --git a/runs.ipynb b/runs.ipynb index 658343c22..d01d933d3 100644 --- a/runs.ipynb +++ b/runs.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -153,9 +153,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "download: s3://openproblems-data/resources/grn/results/benchmark_donor_0_baselines/state.yaml to resources/results/benchmark_donor_0_baselines/state.yaml\n", - "download: s3://openproblems-data/resources/grn/results/benchmark_donor_0_baselines/scores.yaml to resources/results/benchmark_donor_0_baselines/scores.yaml\n", "download: s3://openproblems-data/resources/grn/results/benchmark_donor_0_baselines/trace.txt to resources/results/benchmark_donor_0_baselines/trace.txt\n", + "download: s3://openproblems-data/resources/grn/results/benchmark_donor_0_baselines/scores.yaml to resources/results/benchmark_donor_0_baselines/scores.yaml\n", "download: s3://openproblems-data/resources/grn/results/benchmark_donor_0_baselines/ridge.positive_control.positive_control.prediction.csv to resources/results/benchmark_donor_0_baselines/ridge.positive_control.positive_control.prediction.csv\n", "download: s3://openproblems-data/resources/grn/results/benchmark_donor_0_baselines/ridge.pearson_corr.pearson_corr.prediction.csv to resources/results/benchmark_donor_0_baselines/ridge.pearson_corr.pearson_corr.prediction.csv\n", "download: s3://openproblems-data/resources/grn/results/benchmark_donor_0_baselines/ridge.pearson_causal.pearson_causal.prediction.csv to resources/results/benchmark_donor_0_baselines/ridge.pearson_causal.pearson_causal.prediction.csv\n" @@ -165,68 +164,68 @@ "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
 ex(False)_tf(-1)ex(True)_tf(-1)static-theta-0.0static-theta-0.5ex(False)_tf(-1)ex(True)_tf(-1)static-theta-0.0static-theta-0.5
pearson_corr0.2396200.5182170.5295020.524232pearson_corr0.2867280.5436110.7390060.553125
pearson_causal0.3646560.5924570.7413280.560490pearson_causal0.1522080.4365370.6321540.520246
positive_control0.1973070.5792380.5308480.584694positive_control0.1032430.5102310.5813970.543740
\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -250,85 +249,90 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "download: s3://openproblems-data/resources/grn/results/benchmark_donor_0_baselines_nonspecific_notnormalized/trace.txt to resources/results/benchmark_donor_0_baselines_nonspecific_notnormalized/trace.txt\n" + "download: s3://openproblems-data/resources/grn/results/benchmark_donor_0_baselines_nonspecific_notnormalized/trace.txt to resources/results/benchmark_donor_0_baselines_nonspecific_notnormalized/trace.txt\n", + "download: s3://openproblems-data/resources/grn/results/benchmark_donor_0_baselines_nonspecific_notnormalized/scores.yaml to resources/results/benchmark_donor_0_baselines_nonspecific_notnormalized/scores.yaml\n", + "download: s3://openproblems-data/resources/grn/results/benchmark_donor_0_baselines_nonspecific_notnormalized/state.yaml to resources/results/benchmark_donor_0_baselines_nonspecific_notnormalized/state.yaml\n", + "download: s3://openproblems-data/resources/grn/results/benchmark_donor_0_baselines_nonspecific_notnormalized/ridge.pearson_causal.pearson_causal.prediction.csv to resources/results/benchmark_donor_0_baselines_nonspecific_notnormalized/ridge.pearson_causal.pearson_causal.prediction.csv\n", + "download: s3://openproblems-data/resources/grn/results/benchmark_donor_0_baselines_nonspecific_notnormalized/ridge.positive_control.positive_control.prediction.csv to resources/results/benchmark_donor_0_baselines_nonspecific_notnormalized/ridge.positive_control.positive_control.prediction.csv\n", + "download: s3://openproblems-data/resources/grn/results/benchmark_donor_0_baselines_nonspecific_notnormalized/ridge.pearson_corr.pearson_corr.prediction.csv to resources/results/benchmark_donor_0_baselines_nonspecific_notnormalized/ridge.pearson_corr.pearson_corr.prediction.csv\n" ] }, { "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
 ex(False)_tf(-1)ex(True)_tf(-1)static-theta-0.0static-theta-0.5ex(False)_tf(-1)ex(True)_tf(-1)static-theta-0.0static-theta-0.5
pearson_corr0.2867280.5436110.7390060.553125pearson_corr0.2867280.5436110.7390060.553125
pearson_causal0.1522080.4365370.6321540.520246pearson_causal0.1522080.4365370.6321540.520246
positive_control0.1032430.5102310.5813970.543740positive_control0.1032430.5102310.5813970.543740
\n" ], "text/plain": [ - "" + "" ] }, - "execution_count": 2, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } diff --git a/scripts/run_benchmark_all.sh b/scripts/run_benchmark_all.sh index d7b41ba0e..9c26c4e1c 100644 --- a/scripts/run_benchmark_all.sh +++ b/scripts/run_benchmark_all.sh @@ -1,9 +1,9 @@ #!/bin/bash # RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)" -RUN_ID="benchmark_donor_0_baselines" -resources_dir="./resources/" -# resources_dir="s3://openproblems-data/resources/grn" +RUN_ID="benchmark_donor_0_baselines_nonspecific_notnormalized" +# resources_dir="./resources/" +resources_dir="s3://openproblems-data/resources/grn" publish_dir="${resources_dir}/results/${RUN_ID}" reg_type=ridge @@ -12,7 +12,7 @@ max_workers=10 layer='scgen_pearson' metric_ids="[regression_1, regression_2]" cell_type_specific=false #for controls -normalize=true +normalize=false only_hvgs=false # method_ids="[tigress, ennet, scsgl, pidc]" method_ids="[pearson_corr, pearson_causal, positive_control]" @@ -42,12 +42,12 @@ output_state: "state.yaml" publish_dir: "$publish_dir" HERE -nextflow run . \ - -main-script target/nextflow/workflows/run_benchmark/main.nf \ - -profile docker \ - -with-trace \ - -c src/common/nextflow_helpers/labels_ci.config \ - -params-file ${param_file} +# nextflow run . \ +# -main-script target/nextflow/workflows/run_benchmark/main.nf \ +# -profile docker \ +# -with-trace \ +# -c src/common/nextflow_helpers/labels_ci.config \ +# -params-file ${param_file} # ./tw-windows-x86_64.exe launch ` # https://github.com/openproblems-bio/task_grn_inference.git ` @@ -59,11 +59,11 @@ nextflow run . \ # --params-file ./params/benchmark_donor_0_default.yaml ` # --config src/common/nextflow_helpers/labels_tw.config -# ./tw launch https://github.com/openproblems-bio/task_grn_inference \ -# --revision build/main \ -# --pull-latest \ -# --main-script target/nextflow/workflows/run_benchmark/main.nf \ -# --workspace 53907369739130 \ -# --compute-env 6TeIFgV5OY4pJCk8I0bfOh \ -# --params-file ${param_file} \ -# --config src/common/nextflow_helpers/labels_tw.config +./tw launch https://github.com/openproblems-bio/task_grn_inference \ + --revision build/main \ + --pull-latest \ + --main-script target/nextflow/workflows/run_benchmark/main.nf \ + --workspace 53907369739130 \ + --compute-env 6TeIFgV5OY4pJCk8I0bfOh \ + --params-file ${param_file} \ + --config src/common/nextflow_helpers/labels_tw.config diff --git a/src/api/comp_metric.yaml b/src/api/comp_metric.yaml index 9d070e42e..8d6e6b453 100644 --- a/src/api/comp_metric.yaml +++ b/src/api/comp_metric.yaml @@ -36,7 +36,7 @@ functionality: direction: input default: -2 description: number of samples randomly drawn from perturbation data - - name: --max_workers + - name: --num_workers type: integer direction: input default: 4 diff --git a/src/metrics/regression_1/main.py b/src/metrics/regression_1/main.py index f0aac022a..f2477c04e 100644 --- a/src/metrics/regression_1/main.py +++ b/src/metrics/regression_1/main.py @@ -217,7 +217,7 @@ def main(par): # net = net.groupby(['source', 'target']).mean().reset_index() subsample = par['subsample'] - max_workers = par['max_workers'] + max_workers = par['num_workers'] layer = par["layer"] if subsample == -1: pass diff --git a/src/metrics/regression_1/script.py b/src/metrics/regression_1/script.py index 4d1ef6183..54e95ef92 100644 --- a/src/metrics/regression_1/script.py +++ b/src/metrics/regression_1/script.py @@ -16,7 +16,7 @@ 'reg_type': 'ridge', 'layer': 'scgen_pearson', 'subsample': -2, - 'max_workers': 4, + 'num_workers': 4, } ## VIASH END # meta = { diff --git a/src/metrics/regression_2/main.py b/src/metrics/regression_2/main.py index e3aa94939..352e63cd3 100644 --- a/src/metrics/regression_2/main.py +++ b/src/metrics/regression_2/main.py @@ -316,11 +316,11 @@ def main(par: Dict[str, Any]) -> pd.DataFrame: print(f'Compute metrics for layer: {layer}', flush=True) # print(f'Dynamic approach:', flush=True) print(f'Static approach (theta=0):', flush=True) - score_static_min = static_approach(grn, n_features_theta_min, X, groups, gene_names, tf_names, par['reg_type'], n_jobs=par['max_workers'], n_features_dict=n_features_dict, clip_scores=clip_scores) + score_static_min = static_approach(grn, n_features_theta_min, X, groups, gene_names, tf_names, par['reg_type'], n_jobs=par['num_workers'], n_features_dict=n_features_dict, clip_scores=clip_scores) print(f'Static approach (theta=0.5):', flush=True) - score_static_median = static_approach(grn, n_features_theta_median, X, groups, gene_names, tf_names, par['reg_type'], n_jobs=par['max_workers'], n_features_dict=n_features_dict, clip_scores=clip_scores) + score_static_median = static_approach(grn, n_features_theta_median, X, groups, gene_names, tf_names, par['reg_type'], n_jobs=par['num_workers'], n_features_dict=n_features_dict, clip_scores=clip_scores) # print(f'Static approach (theta=1):', flush=True) - # score_static_max = static_approach(grn, n_features_theta_max, X, groups, gene_names, tf_names, par['reg_type'], n_jobs=par['max_workers']) + # score_static_max = static_approach(grn, n_features_theta_max, X, groups, gene_names, tf_names, par['reg_type'], n_jobs=par['num_workers']) # TODO: find a mathematically sound way to combine Z-scores and r2 scores results = { diff --git a/src/metrics/regression_2/script.py b/src/metrics/regression_2/script.py index 2c2368424..ad13c49c6 100644 --- a/src/metrics/regression_2/script.py +++ b/src/metrics/regression_2/script.py @@ -17,7 +17,7 @@ 'static_only': True, 'layer': 'scgen_pearson', 'subsample': -2, - 'max_workers': 4, + 'num_workers': 4, 'apply_tf': True, 'clip_scores': True, 'method_id': 'grnboost' diff --git a/src/utils/util.py b/src/utils/util.py index 0b78914e3..4d9d2e1ce 100644 --- a/src/utils/util.py +++ b/src/utils/util.py @@ -19,9 +19,9 @@ def corr_net(X, gene_names, par, tf_all, causal=False): net = np.dot(X.T, X) / X.shape[0] net = pd.DataFrame(net, index=gene_names, columns=gene_names) if causal: - net = net.sample(len(tf_all), axis=1, random_state=par['seed']) - else: net = net[tf_all] + else: + net = net.sample(len(tf_all), axis=1, random_state=par['seed']) net = net.reset_index() index_name = net.columns[0] net = net.melt(id_vars=index_name, var_name='source', value_name='weight') @@ -60,6 +60,7 @@ def create_corr_net(par): X = multiomics_rna.X if par['cell_type_specific']: + print('cell_type_specific') i = 0 for group in tqdm(np.unique(groups), desc="Processing groups"): X_sub = X[groups == group, :] diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml index ff319a572..d1604cc86 100644 --- a/src/workflows/run_benchmark/config.vsh.yaml +++ b/src/workflows/run_benchmark/config.vsh.yaml @@ -30,7 +30,7 @@ functionality: type: string direction: input default: ridge - - name: --max_workers + - name: --num_workers type: integer direction: input required: True diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf index 31e03c8ab..4520e47bf 100644 --- a/src/workflows/run_benchmark/main.nf +++ b/src/workflows/run_benchmark/main.nf @@ -83,7 +83,10 @@ workflow run_wf { multiomics_atac: "multiomics_atac", tf_all: "tf_all", perturbation_data:"perturbation_data", - cell_type_specific:"cell_type_specific" + cell_type_specific:"cell_type_specific", + normalize:"normalize", + only_hvgs:"only_hvgs", + num_workers:"num_workers" ], // use 'toState' to publish that component's outputs to the overall state @@ -112,7 +115,7 @@ workflow run_wf { method_id: "method_id", subsample: "subsample", reg_type: "reg_type", - max_workers: "max_workers", + num_workers: "num_workers", consensus: "consensus", tf_all: "tf_all", layer:"layer", diff --git a/src/workflows/run_grn_evaluation/config.vsh.yaml b/src/workflows/run_grn_evaluation/config.vsh.yaml index 2b1d7e3f8..f1ae053fc 100644 --- a/src/workflows/run_grn_evaluation/config.vsh.yaml +++ b/src/workflows/run_grn_evaluation/config.vsh.yaml @@ -32,7 +32,7 @@ functionality: direction: input required: True example: collectri - - name: --max_workers + - name: --num_workers type: integer direction: input required: True @@ -46,30 +46,6 @@ functionality: required: false direction: input default: resources/prior/consensus.json - # - name: --causal - # type: boolean - # required: false - # direction: input - # - name: --corr_method - # type: string - # required: false - # direction: input - # default: pearson - # - name: --cell_type_specific - # type: boolean - # required: false - # direction: input - # default: false - # - name: --metacell - # type: boolean - # required: false - # direction: input - # default: false - # - name: --impute - # type: boolean - # required: false - # direction: input - # default: false - name: Outputs arguments: diff --git a/src/workflows/run_grn_evaluation/main.nf b/src/workflows/run_grn_evaluation/main.nf index 0964bb183..32d9017f9 100644 --- a/src/workflows/run_grn_evaluation/main.nf +++ b/src/workflows/run_grn_evaluation/main.nf @@ -59,7 +59,7 @@ workflow run_wf { subsample: "subsample", reg_type: "reg_type", method_id: "method_id", - max_workers: "max_workers", + num_workers: "num_workers", consensus: "consensus", layer: "layer", tf_all: "tf_all" diff --git a/src/workflows/run_robustness_analysis/config.novsh.yaml b/src/workflows/run_robustness_analysis/config.novsh.yaml index 90ac3583c..a69cc1ba1 100644 --- a/src/workflows/run_robustness_analysis/config.novsh.yaml +++ b/src/workflows/run_robustness_analysis/config.novsh.yaml @@ -30,7 +30,7 @@ functionality: direction: input required: True example: collectri - - name: --max_workers + - name: --num_workers type: integer direction: input required: True diff --git a/src/workflows/run_robustness_analysis/main.nf b/src/workflows/run_robustness_analysis/main.nf index 83268f26a..1b38effed 100644 --- a/src/workflows/run_robustness_analysis/main.nf +++ b/src/workflows/run_robustness_analysis/main.nf @@ -49,7 +49,7 @@ workflow run_wf { subsample: "subsample", reg_type: "reg_type", method_id: "method_id", - max_workers: "max_workers", + num_workers: "num_workers", consensus: "consensus", tf_all: "tf_all" ], diff --git a/src/workflows/run_robustness_analysis_causal/config.novsh.yaml b/src/workflows/run_robustness_analysis_causal/config.novsh.yaml index 1641bcf4a..be89033a5 100644 --- a/src/workflows/run_robustness_analysis_causal/config.novsh.yaml +++ b/src/workflows/run_robustness_analysis_causal/config.novsh.yaml @@ -29,7 +29,7 @@ functionality: direction: input required: True example: collectri - - name: --max_workers + - name: --num_workers type: integer direction: input required: True diff --git a/src/workflows/run_robustness_analysis_causal/main.nf b/src/workflows/run_robustness_analysis_causal/main.nf index 31d82ccb2..aaf3a1275 100644 --- a/src/workflows/run_robustness_analysis_causal/main.nf +++ b/src/workflows/run_robustness_analysis_causal/main.nf @@ -56,7 +56,7 @@ workflow run_wf { subsample: "subsample", reg_type: "reg_type", method_id: "method_id", - max_workers: "max_workers", + num_workers: "num_workers", consensus: "consensus", tf_all: "tf_all" ],