diff --git a/runs.ipynb b/runs.ipynb
index 26833540d..5ade52ba0 100644
--- a/runs.ipynb
+++ b/runs.ipynb
@@ -25,11 +25,21 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 53,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "upload: resources/grn_models/default/negative_control.csv to s3://openproblems-data/resources/grn/grn_models/default/negative_control.csv\n",
+ "upload: resources/grn_models/d0_hvgs/negative_control.csv to s3://openproblems-data/resources/grn/grn_models/d0_hvgs/negative_control.csv\n",
+ "upload: resources/results/d0_hvgs/trace.txt to s3://openproblems-data/resources/grn/results/d0_hvgs/trace.txt\n",
+ "delete: s3://openproblems-data/resources/grn/results/d0_hvgs_baseline/trace.txt\n"
+ ]
+ }
+ ],
"source": [
- "\n",
"!aws s3 sync resources/grn-benchmark s3://openproblems-data/resources/grn/grn-benchmark --delete\n",
"!aws s3 sync resources/grn_models/ s3://openproblems-data/resources/grn/grn_models --delete\n",
"!aws s3 sync resources/prior/ s3://openproblems-data/resources/grn/prior --delete\n",
@@ -135,105 +145,26 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 54,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "zhao-KDunion. adata shape: (36199, 8442), GT size: (105136, 3), Gene overlap: (8425,)\n",
- "/viash_automount/tmp/viash-run-regression_1-8RZIHd.py:56: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.\n",
- " output = ad.AnnData(\n",
- " ex(False)_tf(-1) ex(True)_tf(-1) Mean\n",
- "0 -0.020129 0.03817 0.009021\n",
- "(3,) (3,)\n",
- "zhao-chipunion. adata shape: (36199, 8442), GT size: (60662, 3), Gene overlap: (8378,)\n",
- "/viash_automount/tmp/viash-run-regression_1-rLuWXt.py:56: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.\n",
- " output = ad.AnnData(\n",
- " ex(False)_tf(-1) ex(True)_tf(-1) Mean\n",
- "0 -0.006815 0.064118 0.028652\n",
- "(3,) (3,)\n",
- "zhao-chipunion_KDUnion_intersect. adata shape: (36199, 8442), GT size: (9019, 3), Gene overlap: (4493,)\n",
- "/viash_automount/tmp/viash-run-regression_1-nsE1Jg.py:56: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.\n",
- " output = ad.AnnData(\n",
- " ex(False)_tf(-1) ex(True)_tf(-1) Mean\n",
- "0 -0.004249 0.000488 -0.00188\n",
- "(3,) (3,)\n",
- "shalek-KDunion. adata shape: (1211, 9411), GT size: (148047, 3), Gene overlap: (8784,)\n",
- "/viash_automount/tmp/viash-run-regression_1-54532G.py:56: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.\n",
- " output = ad.AnnData(\n",
- " ex(False)_tf(-1) ex(True)_tf(-1) Mean\n",
- "0 -0.010243 0.104809 0.047283\n",
- "(3,) (3,)\n",
- "shalek-chipunion. adata shape: (1211, 9411), GT size: (96383, 3), Gene overlap: (8670,)\n",
- "/viash_automount/tmp/viash-run-regression_1-9tO5h1.py:56: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.\n",
- " output = ad.AnnData(\n",
- " ex(False)_tf(-1) ex(True)_tf(-1) Mean\n",
- "0 -0.003727 0.042922 0.019598\n",
- "(3,) (3,)\n",
- "shalek-chipunion_KDUnion_intersect. adata shape: (1211, 9411), GT size: (67705, 3), Gene overlap: (7852,)\n",
- "/viash_automount/tmp/viash-run-regression_1-5SN7ZR.py:56: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.\n",
- " output = ad.AnnData(\n",
- " ex(False)_tf(-1) ex(True)_tf(-1) Mean\n",
- "0 -0.002252 0.094459 0.046103\n",
- "(3,) (3,)\n",
- "han-KDunion. adata shape: (5520, 7465), GT size: (77400, 3), Gene overlap: (7387,)\n",
- "/viash_automount/tmp/viash-run-regression_1-eSKe1T.py:56: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.\n",
- " output = ad.AnnData(\n",
- " ex(False)_tf(-1) ex(True)_tf(-1) Mean\n",
- "0 -0.019109 0.031554 0.006222\n",
- "(3,) (3,)\n",
- "han-chipunion. adata shape: (5520, 7465), GT size: (160038, 3), Gene overlap: (7458,)\n",
- "/viash_automount/tmp/viash-run-regression_1-VnCHsA.py:56: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.\n",
- " output = ad.AnnData(\n",
- " ex(False)_tf(-1) ex(True)_tf(-1) Mean\n",
- "0 -0.013187 0.071785 0.029299\n",
- "(3,) (3,)\n",
- "han-chipunion_KDUnion_intersect. adata shape: (5520, 7465), GT size: (8463, 3), Gene overlap: (4141,)\n",
- "/viash_automount/tmp/viash-run-regression_1-0OpbTu.py:56: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.\n",
- " output = ad.AnnData(\n",
- " ex(False)_tf(-1) ex(True)_tf(-1) Mean\n",
- "0 -0.004217 0.004052 -0.000082\n",
- "(3,) (3,)\n",
- "jackson-KDunion. adata shape: (17396, 5736), GT size: (27433, 3), Gene overlap: (4785,)\n",
- "/viash_automount/tmp/viash-run-regression_1-wvATsv.py:56: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.\n",
- " output = ad.AnnData(\n",
- " ex(False)_tf(-1) ex(True)_tf(-1) Mean\n",
- "0 -0.078523 0.238183 0.07983\n",
- "(3,) (3,)\n",
- "jackson-chipunion. adata shape: (17396, 5736), GT size: (24481, 3), Gene overlap: (4898,)\n",
- "/viash_automount/tmp/viash-run-regression_1-GudAWm.py:56: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.\n",
- " output = ad.AnnData(\n",
- " ex(False)_tf(-1) ex(True)_tf(-1) Mean\n",
- "0 -0.027772 0.186608 0.079418\n",
- "(3,) (3,)\n",
- "jackson-chipunion_KDUnion_intersect. adata shape: (17396, 5736), GT size: (2661, 3), Gene overlap: (1515,)\n",
- "/viash_automount/tmp/viash-run-regression_1-lrY1xV.py:56: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.\n",
- " output = ad.AnnData(\n",
- " ex(False)_tf(-1) ex(True)_tf(-1) Mean\n",
- "0 0.004694 0.321477 0.163086\n",
- "(3,) (3,)\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
- "import subprocess\n",
- "import anndata as ad \n",
- "import pandas as pd\n",
- "import numpy as np\n",
- "for cell_type in ['zhao', 'shalek', 'han', 'jackson']:\n",
- " adata = ad.read_h5ad(f'resources_local/mccalla_extended/{cell_type}.h5ad')\n",
- " adata.layers['norm'] = adata.X\n",
- " adata.obs['cell_type'] = 'onecelltype'\n",
- " adata.write(f'resources_local/mccalla_extended/{cell_type}.h5ad')\n",
- " subsample = min([10000, len(adata)])\n",
- " for GT in ['KDunion', 'chipunion', 'chipunion_KDUnion_intersect']:\n",
- " GT_df = pd.read_csv(f'resources_local/mccalla_extended/{cell_type}_{GT}.csv')\n",
- " gene_overlap = np.intersect1d(adata.var_names, GT_df.target.unique()).shape\n",
- " print(f\"{cell_type}-{GT}. adata shape: {adata.shape}, GT size: {GT_df.shape}, Gene overlap: {gene_overlap}\")\n",
- " command = f\"viash run src/metrics/regression_1/config.vsh.yaml -- --perturbation_data resources_local/mccalla_extended/{cell_type}.h5ad --prediction resources_local/mccalla_extended/{cell_type}_{GT}.csv --layer norm --subsample {subsample} --apply_tf false --tf_all resources/prior/tf_all.csv --max_n_links -1 --verbose 1 --score output/{cell_type}_{GT}.h5ad\"\n",
- " subprocess.run(command, shell=True, check=True)"
+ "# import subprocess\n",
+ "# import anndata as ad \n",
+ "# import pandas as pd\n",
+ "# import numpy as np\n",
+ "# for cell_type in ['zhao', 'shalek', 'han', 'jackson']:\n",
+ "# adata = ad.read_h5ad(f'resources_local/mccalla_extended/{cell_type}.h5ad')\n",
+ "# adata.layers['norm'] = adata.X\n",
+ "# adata.obs['cell_type'] = 'onecelltype'\n",
+ "# adata.write(f'resources_local/mccalla_extended/{cell_type}.h5ad')\n",
+ "# subsample = min([10000, len(adata)])\n",
+ "# for GT in ['KDunion', 'chipunion', 'chipunion_KDUnion_intersect']:\n",
+ "# GT_df = pd.read_csv(f'resources_local/mccalla_extended/{cell_type}_{GT}.csv')\n",
+ "# gene_overlap = np.intersect1d(adata.var_names, GT_df.target.unique()).shape\n",
+ "# print(f\"{cell_type}-{GT}. adata shape: {adata.shape}, GT size: {GT_df.shape}, Gene overlap: {gene_overlap}\")\n",
+ "# command = f\"viash run src/metrics/regression_1/config.vsh.yaml -- --perturbation_data resources_local/mccalla_extended/{cell_type}.h5ad --prediction resources_local/mccalla_extended/{cell_type}_{GT}.csv --layer norm --subsample {subsample} --apply_tf false --tf_all resources/prior/tf_all.csv --max_n_links -1 --verbose 1 --score output/{cell_type}_{GT}.h5ad\"\n",
+ "# subprocess.run(command, shell=True, check=True)"
]
},
{
@@ -243,297 +174,312 @@
"# d0_hvgs"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Scores"
+ ]
+ },
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
- "methods = [ 'pearson_corr', 'pearson_causal', 'positive_control', 'portia', 'ppcor', 'genie3', 'grnboost2', 'scenic', 'scglue', 'celloracle']"
+ "methods = [ 'collectri', 'negative_control', 'positive_control', 'pearson_corr', 'pearson_causal', 'portia', 'ppcor', 'genie3', 'grnboost2', 'scenic', 'scglue', 'celloracle']"
]
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 33,
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "download: s3://openproblems-data/resources/grn/results/d0_hvgs/scores.yaml to resources/results/d0_hvgs/scores.yaml\n",
- "download: s3://openproblems-data/resources/grn/results/d0_hvgs/trace.txt to resources/results/d0_hvgs/trace.txt\n",
- "download: s3://openproblems-data/resources/grn/results/d0_hvgs/metric_configs.yaml to resources/results/d0_hvgs/metric_configs.yaml\n",
- "download: s3://openproblems-data/resources/grn/results/d0_hvgs/state.yaml to resources/results/d0_hvgs/state.yaml\n"
- ]
- },
{
"data": {
"text/html": [
"\n",
- "
\n",
+ "\n",
" \n",
" \n",
" | \n",
- " ex(False)_tf(-1) | \n",
- " ex(True)_tf(-1) | \n",
- " static-theta-0.0 | \n",
- " static-theta-0.5 | \n",
+ " ex(False)_tf(-1) | \n",
+ " ex(True)_tf(-1) | \n",
+ " static-theta-0.0 | \n",
+ " static-theta-0.5 | \n",
"
\n",
" \n",
" \n",
" \n",
- " pearson_corr | \n",
- " 0.238664 | \n",
- " 0.514612 | \n",
- " 0.529502 | \n",
- " 0.524232 | \n",
+ " collectri | \n",
+ " nan | \n",
+ " nan | \n",
+ " nan | \n",
+ " nan | \n",
+ "
\n",
+ " \n",
+ " negative_control | \n",
+ " nan | \n",
+ " nan | \n",
+ " nan | \n",
+ " nan | \n",
"
\n",
" \n",
- " pearson_causal | \n",
- " 0.355256 | \n",
- " 0.578753 | \n",
- " 0.741328 | \n",
- " 0.560490 | \n",
+ " positive_control | \n",
+ " 0.489147 | \n",
+ " 0.677155 | \n",
+ " 0.655407 | \n",
+ " 0.574608 | \n",
"
\n",
" \n",
- " positive_control | \n",
- " 0.489147 | \n",
- " 0.677155 | \n",
- " 0.655407 | \n",
- " 0.574608 | \n",
+ " pearson_corr | \n",
+ " 0.238664 | \n",
+ " 0.514612 | \n",
+ " 0.529502 | \n",
+ " 0.524232 | \n",
"
\n",
" \n",
- " portia | \n",
- " 0.148941 | \n",
- " 0.227248 | \n",
- " 0.451256 | \n",
- " 0.518048 | \n",
+ " pearson_causal | \n",
+ " 0.355256 | \n",
+ " 0.578753 | \n",
+ " 0.741328 | \n",
+ " 0.560490 | \n",
"
\n",
" \n",
- " ppcor | \n",
- " 0.022846 | \n",
- " 0.094107 | \n",
- " 0.396680 | \n",
- " 0.509874 | \n",
+ " portia | \n",
+ " 0.148941 | \n",
+ " 0.227248 | \n",
+ " 0.451256 | \n",
+ " 0.518048 | \n",
"
\n",
" \n",
- " genie3 | \n",
- " 0.372641 | \n",
- " 0.490357 | \n",
- " 0.754073 | \n",
- " 0.576580 | \n",
+ " ppcor | \n",
+ " 0.022846 | \n",
+ " 0.094107 | \n",
+ " 0.396680 | \n",
+ " 0.509874 | \n",
"
\n",
" \n",
- " grnboost2 | \n",
- " 0.381032 | \n",
- " 0.459860 | \n",
- " 0.781852 | \n",
- " 0.609075 | \n",
+ " genie3 | \n",
+ " 0.372641 | \n",
+ " 0.490357 | \n",
+ " 0.754073 | \n",
+ " 0.576580 | \n",
"
\n",
" \n",
- " scenic | \n",
- " 0.154399 | \n",
- " 0.206571 | \n",
- " 0.600839 | \n",
- " 0.574294 | \n",
+ " grnboost2 | \n",
+ " 0.381032 | \n",
+ " 0.459860 | \n",
+ " 0.781852 | \n",
+ " 0.609075 | \n",
"
\n",
" \n",
- " scglue | \n",
- " 0.078309 | \n",
- " 0.238859 | \n",
- " 0.448617 | \n",
- " 0.527076 | \n",
+ " scenic | \n",
+ " 0.154399 | \n",
+ " 0.206571 | \n",
+ " 0.600839 | \n",
+ " 0.574294 | \n",
"
\n",
" \n",
- " celloracle | \n",
- " 0.216897 | \n",
- " 0.311451 | \n",
- " 0.639556 | \n",
- " 0.580147 | \n",
+ " scglue | \n",
+ " 0.078309 | \n",
+ " 0.238859 | \n",
+ " 0.448617 | \n",
+ " 0.527076 | \n",
+ "
\n",
+ " \n",
+ " celloracle | \n",
+ " 0.216897 | \n",
+ " 0.311451 | \n",
+ " 0.639556 | \n",
+ " 0.580147 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 23,
+ "execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"RUN_ID=\"d0_hvgs\" \n",
- "df_all = process_data(RUN_ID, models_all=methods)\n",
- "df_all.style.background_gradient()"
+ "df_scores = process_data(RUN_ID, models_all=methods)\n",
+ "df_scores.style.background_gradient()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "# Format resourcs used"
+ "## Format resourcs used"
]
},
{
"cell_type": "code",
- "execution_count": 45,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "/vol/tmp/users/jnourisa/ipykernel_2913022/1716007044.py:19: FutureWarning: The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\\s+'`` instead\n",
+ "/vol/tmp/users/jnourisa/ipykernel_1653246/1716007044.py:19: FutureWarning: The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\\s+'`` instead\n",
" df = pd.read_csv(io.StringIO(output), delim_whitespace=True)\n",
- "/vol/tmp/users/jnourisa/ipykernel_2913022/1716007044.py:19: FutureWarning: The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\\s+'`` instead\n",
+ "/vol/tmp/users/jnourisa/ipykernel_1653246/1716007044.py:19: FutureWarning: The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\\s+'`` instead\n",
" df = pd.read_csv(io.StringIO(output), delim_whitespace=True)\n",
- "/vol/tmp/users/jnourisa/ipykernel_2913022/1716007044.py:19: FutureWarning: The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\\s+'`` instead\n",
+ "/vol/tmp/users/jnourisa/ipykernel_1653246/1716007044.py:19: FutureWarning: The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\\s+'`` instead\n",
" df = pd.read_csv(io.StringIO(output), delim_whitespace=True)\n",
- "/vol/tmp/users/jnourisa/ipykernel_2913022/1716007044.py:19: FutureWarning: The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\\s+'`` instead\n",
+ "/vol/tmp/users/jnourisa/ipykernel_1653246/1716007044.py:19: FutureWarning: The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\\s+'`` instead\n",
" df = pd.read_csv(io.StringIO(output), delim_whitespace=True)\n",
- "/vol/tmp/users/jnourisa/ipykernel_2913022/1716007044.py:19: FutureWarning: The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\\s+'`` instead\n",
+ "/vol/tmp/users/jnourisa/ipykernel_1653246/1716007044.py:19: FutureWarning: The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\\s+'`` instead\n",
" df = pd.read_csv(io.StringIO(output), delim_whitespace=True)\n",
- "/vol/tmp/users/jnourisa/ipykernel_2913022/1716007044.py:19: FutureWarning: The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\\s+'`` instead\n",
+ "/vol/tmp/users/jnourisa/ipykernel_1653246/1716007044.py:19: FutureWarning: The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\\s+'`` instead\n",
" df = pd.read_csv(io.StringIO(output), delim_whitespace=True)\n"
]
},
@@ -650,7 +596,7 @@
"scglue 35.933720 "
]
},
- "execution_count": 45,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -702,7 +648,7 @@
},
{
"cell_type": "code",
- "execution_count": 47,
+ "execution_count": 10,
"metadata": {},
"outputs": [
{
@@ -728,6 +674,7 @@
" | \n",
" %cpu | \n",
" peak_rss | \n",
+ " peak_vmem | \n",
" rchar | \n",
" wchar | \n",
" duration | \n",
@@ -738,6 +685,7 @@
" celloracle | \n",
" 799.6 | \n",
" 14.9 | \n",
+ " 35.4 | \n",
" 18.0 | \n",
" 86.1 | \n",
" 1.472222 | \n",
@@ -747,17 +695,16 @@
""
],
"text/plain": [
- " %cpu peak_rss rchar wchar duration\n",
- "celloracle 799.6 14.9 18.0 86.1 1.472222"
+ " %cpu peak_rss peak_vmem rchar wchar duration\n",
+ "celloracle 799.6 14.9 35.4 18.0 86.1 1.472222"
]
},
- "execution_count": 47,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "\n",
"# sequra runs\n",
"base_dir = 'resources/results/d0_hvgs_res/'\n",
"models = ['celloracle']\n",
@@ -780,11 +727,12 @@
"\n",
"\n",
"for i, method in enumerate(models):\n",
- " df = pd.read_csv(f'{base_dir}/{method}.txt', sep='\\t')[cols]\n",
+ " df = pd.read_csv(f'{base_dir}/{method}.txt', sep='\\t')\n",
+ " df = df[['%cpu', 'peak_rss', 'peak_vmem', 'rchar', 'wchar', 'duration']]\n",
" df = df.drop(1)\n",
"\n",
"\n",
- " for col in cols:\n",
+ " for col in df.columns:\n",
" if col=='%cpu':\n",
" df[col] = df[col].str.replace('%', '').astype(float)\n",
" elif col=='duration':\n",
@@ -803,7 +751,7 @@
},
{
"cell_type": "code",
- "execution_count": 48,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
@@ -827,8 +775,8 @@
" \n",
" \n",
" | \n",
- " Peak memory | \n",
- " Duration | \n",
+ " Peak memory (GB) | \n",
+ " Duration (hour) | \n",
"
\n",
" \n",
" \n",
@@ -872,17 +820,17 @@
""
],
"text/plain": [
- " Peak memory Duration\n",
- "celloracle 14.900000 1.472222\n",
- "portia 46.943497 0.110556\n",
- "grnboost2 3.067471 1.568056\n",
- "scenic 30.356461 1.908056\n",
- "genie3 13.105103 16.682500\n",
- "ppcor 3.909119 0.556667\n",
- "scglue 29.917423 4.380278"
+ " Peak memory (GB) Duration (hour)\n",
+ "celloracle 14.900000 1.472222\n",
+ "portia 46.943497 0.110556\n",
+ "grnboost2 3.067471 1.568056\n",
+ "scenic 30.356461 1.908056\n",
+ "genie3 13.105103 16.682500\n",
+ "ppcor 3.909119 0.556667\n",
+ "scglue 29.917423 4.380278"
]
},
- "execution_count": 48,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -904,6 +852,479 @@
"df_res"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Merge scores with resources"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ex(False)_tf(-1) | \n",
+ " ex(True)_tf(-1) | \n",
+ " static-theta-0.0 | \n",
+ " static-theta-0.5 | \n",
+ " overall_score | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " collectri | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " negative_control | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " positive_control | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 0.838274 | \n",
+ " 0.943410 | \n",
+ " 0.945421 | \n",
+ "
\n",
+ " \n",
+ " pearson_corr | \n",
+ " 0.487919 | \n",
+ " 0.759962 | \n",
+ " 0.677241 | \n",
+ " 0.860702 | \n",
+ " 0.696456 | \n",
+ "
\n",
+ " \n",
+ " pearson_causal | \n",
+ " 0.726277 | \n",
+ " 0.854683 | \n",
+ " 0.948169 | \n",
+ " 0.920231 | \n",
+ " 0.862340 | \n",
+ "
\n",
+ " \n",
+ " portia | \n",
+ " 0.304491 | \n",
+ " 0.335592 | \n",
+ " 0.577163 | \n",
+ " 0.850549 | \n",
+ " 0.516949 | \n",
+ "
\n",
+ " \n",
+ " ppcor | \n",
+ " 0.046705 | \n",
+ " 0.138974 | \n",
+ " 0.507359 | \n",
+ " 0.837129 | \n",
+ " 0.382542 | \n",
+ "
\n",
+ " \n",
+ " genie3 | \n",
+ " 0.761818 | \n",
+ " 0.724142 | \n",
+ " 0.964470 | \n",
+ " 0.946649 | \n",
+ " 0.849270 | \n",
+ "
\n",
+ " \n",
+ " grnboost2 | \n",
+ " 0.778974 | \n",
+ " 0.679105 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 0.864520 | \n",
+ "
\n",
+ " \n",
+ " scenic | \n",
+ " 0.315650 | \n",
+ " 0.305056 | \n",
+ " 0.768481 | \n",
+ " 0.942895 | \n",
+ " 0.583021 | \n",
+ "
\n",
+ " \n",
+ " scglue | \n",
+ " 0.160093 | \n",
+ " 0.352738 | \n",
+ " 0.573787 | \n",
+ " 0.865371 | \n",
+ " 0.487997 | \n",
+ "
\n",
+ " \n",
+ " celloracle | \n",
+ " 0.443420 | \n",
+ " 0.459941 | \n",
+ " 0.818001 | \n",
+ " 0.952505 | \n",
+ " 0.668467 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ex(False)_tf(-1) ex(True)_tf(-1) static-theta-0.0 \\\n",
+ "collectri 0.000000 0.000000 0.000000 \n",
+ "negative_control 0.000000 0.000000 0.000000 \n",
+ "positive_control 1.000000 1.000000 0.838274 \n",
+ "pearson_corr 0.487919 0.759962 0.677241 \n",
+ "pearson_causal 0.726277 0.854683 0.948169 \n",
+ "portia 0.304491 0.335592 0.577163 \n",
+ "ppcor 0.046705 0.138974 0.507359 \n",
+ "genie3 0.761818 0.724142 0.964470 \n",
+ "grnboost2 0.778974 0.679105 1.000000 \n",
+ "scenic 0.315650 0.305056 0.768481 \n",
+ "scglue 0.160093 0.352738 0.573787 \n",
+ "celloracle 0.443420 0.459941 0.818001 \n",
+ "\n",
+ " static-theta-0.5 overall_score \n",
+ "collectri 0.000000 0.000000 \n",
+ "negative_control 0.000000 0.000000 \n",
+ "positive_control 0.943410 0.945421 \n",
+ "pearson_corr 0.860702 0.696456 \n",
+ "pearson_causal 0.920231 0.862340 \n",
+ "portia 0.850549 0.516949 \n",
+ "ppcor 0.837129 0.382542 \n",
+ "genie3 0.946649 0.849270 \n",
+ "grnboost2 1.000000 0.864520 \n",
+ "scenic 0.942895 0.583021 \n",
+ "scglue 0.865371 0.487997 \n",
+ "celloracle 0.952505 0.668467 "
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# create ranking \n",
+ "df_scores = df_scores.fillna(0)\n",
+ "df_scores[df_scores < 0]=0\n",
+ "df_scores = (df_scores-df_scores.min(axis=0))/(df_scores.max(axis=0)-df_scores.min(axis=0))\n",
+ "df_scores['overall_score'] = df_scores.mean(axis=1)\n",
+ "# df_scores['rank'] = df_scores.mean(axis=1).rank(ascending=False).astype(int)\n",
+ "\n",
+ "df_scores"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Peak memory (GB) | \n",
+ " Duration (hour) | \n",
+ " ex(False)_tf(-1) | \n",
+ " ex(True)_tf(-1) | \n",
+ " static-theta-0.0 | \n",
+ " static-theta-0.5 | \n",
+ " overall_score | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " collectri | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " negative_control | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " positive_control | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 0.838274 | \n",
+ " 0.943410 | \n",
+ " 0.945421 | \n",
+ "
\n",
+ " \n",
+ " pearson_corr | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.487919 | \n",
+ " 0.759962 | \n",
+ " 0.677241 | \n",
+ " 0.860702 | \n",
+ " 0.696456 | \n",
+ "
\n",
+ " \n",
+ " pearson_causal | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.726277 | \n",
+ " 0.854683 | \n",
+ " 0.948169 | \n",
+ " 0.920231 | \n",
+ " 0.862340 | \n",
+ "
\n",
+ " \n",
+ " portia | \n",
+ " 46.943497 | \n",
+ " 0.110556 | \n",
+ " 0.304491 | \n",
+ " 0.335592 | \n",
+ " 0.577163 | \n",
+ " 0.850549 | \n",
+ " 0.516949 | \n",
+ "
\n",
+ " \n",
+ " ppcor | \n",
+ " 3.909119 | \n",
+ " 0.556667 | \n",
+ " 0.046705 | \n",
+ " 0.138974 | \n",
+ " 0.507359 | \n",
+ " 0.837129 | \n",
+ " 0.382542 | \n",
+ "
\n",
+ " \n",
+ " genie3 | \n",
+ " 13.105103 | \n",
+ " 16.682500 | \n",
+ " 0.761818 | \n",
+ " 0.724142 | \n",
+ " 0.964470 | \n",
+ " 0.946649 | \n",
+ " 0.849270 | \n",
+ "
\n",
+ " \n",
+ " grnboost2 | \n",
+ " 3.067471 | \n",
+ " 1.568056 | \n",
+ " 0.778974 | \n",
+ " 0.679105 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 0.864520 | \n",
+ "
\n",
+ " \n",
+ " scenic | \n",
+ " 30.356461 | \n",
+ " 1.908056 | \n",
+ " 0.315650 | \n",
+ " 0.305056 | \n",
+ " 0.768481 | \n",
+ " 0.942895 | \n",
+ " 0.583021 | \n",
+ "
\n",
+ " \n",
+ " scglue | \n",
+ " 29.917423 | \n",
+ " 4.380278 | \n",
+ " 0.160093 | \n",
+ " 0.352738 | \n",
+ " 0.573787 | \n",
+ " 0.865371 | \n",
+ " 0.487997 | \n",
+ "
\n",
+ " \n",
+ " celloracle | \n",
+ " 14.900000 | \n",
+ " 1.472222 | \n",
+ " 0.443420 | \n",
+ " 0.459941 | \n",
+ " 0.818001 | \n",
+ " 0.952505 | \n",
+ " 0.668467 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Peak memory (GB) Duration (hour) ex(False)_tf(-1) \\\n",
+ "collectri 0.000000 0.000000 0.000000 \n",
+ "negative_control 0.000000 0.000000 0.000000 \n",
+ "positive_control 0.000000 0.000000 1.000000 \n",
+ "pearson_corr 0.000000 0.000000 0.487919 \n",
+ "pearson_causal 0.000000 0.000000 0.726277 \n",
+ "portia 46.943497 0.110556 0.304491 \n",
+ "ppcor 3.909119 0.556667 0.046705 \n",
+ "genie3 13.105103 16.682500 0.761818 \n",
+ "grnboost2 3.067471 1.568056 0.778974 \n",
+ "scenic 30.356461 1.908056 0.315650 \n",
+ "scglue 29.917423 4.380278 0.160093 \n",
+ "celloracle 14.900000 1.472222 0.443420 \n",
+ "\n",
+ " ex(True)_tf(-1) static-theta-0.0 static-theta-0.5 \\\n",
+ "collectri 0.000000 0.000000 0.000000 \n",
+ "negative_control 0.000000 0.000000 0.000000 \n",
+ "positive_control 1.000000 0.838274 0.943410 \n",
+ "pearson_corr 0.759962 0.677241 0.860702 \n",
+ "pearson_causal 0.854683 0.948169 0.920231 \n",
+ "portia 0.335592 0.577163 0.850549 \n",
+ "ppcor 0.138974 0.507359 0.837129 \n",
+ "genie3 0.724142 0.964470 0.946649 \n",
+ "grnboost2 0.679105 1.000000 1.000000 \n",
+ "scenic 0.305056 0.768481 0.942895 \n",
+ "scglue 0.352738 0.573787 0.865371 \n",
+ "celloracle 0.459941 0.818001 0.952505 \n",
+ "\n",
+ " overall_score \n",
+ "collectri 0.000000 \n",
+ "negative_control 0.000000 \n",
+ "positive_control 0.945421 \n",
+ "pearson_corr 0.696456 \n",
+ "pearson_causal 0.862340 \n",
+ "portia 0.516949 \n",
+ "ppcor 0.382542 \n",
+ "genie3 0.849270 \n",
+ "grnboost2 0.864520 \n",
+ "scenic 0.583021 \n",
+ "scglue 0.487997 \n",
+ "celloracle 0.668467 "
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_combined = pd.concat([df_res, df_scores], axis=1).fillna(0)\n",
+ "df_combined = df_combined.reindex(methods)\n",
+ "df_combined"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/jnourisa/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/core/arraylike.py:399: RuntimeWarning: divide by zero encountered in log\n",
+ " result = getattr(ufunc, method)(*inputs, **kwargs)\n",
+ "/home/jnourisa/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/core/arraylike.py:399: RuntimeWarning: divide by zero encountered in log\n",
+ " result = getattr(ufunc, method)(*inputs, **kwargs)\n"
+ ]
+ }
+ ],
+ "source": [
+ "summary_all_file = 'output/summary_d0_hvgs.tsv'\n",
+ "summary_plot = 'output/summary_d0_hvgs.pdf'\n",
+ "df_all = df_combined.copy()\n",
+ "# df_combined.index = df_combined.index.map(surragate_names)\n",
+ "\n",
+ "\n",
+ "df_all.index.name = 'method_name' \n",
+ "# df_all['mean_cpu_pct_scaled'] = df_all['%CPU']/df_all['%CPU'].max()\n",
+ "df_all['mean_peak_memory_log_scaled'] = np.log(df_all['Peak memory (GB)'])\n",
+ "# df_all['mean_peak_memory_str'] = [f\"{int(value/ 1E9)}\" for value in df_all['Peak memory']]\n",
+ "df_all['mean_peak_memory_str'] = df_all['Peak memory (GB)']\n",
+ "# df_all[\"mean_disk_read_log_scaled\"] = np.log(df_all['Disk read'])\n",
+ "# df_all[\"mean_disk_read_str\"] = [f\"{int(value/ 1E9)}\" for value in df_all['Disk read']]\n",
+ "# df_all[\"mean_disk_write_log_scaled\"] = np.log(df_all['Disk write'])\n",
+ "# df_all[\"mean_disk_write_str\"] = [f\"{int(value/ 1E9)}\" for value in df_all['Disk write']]\n",
+ "df_all[\"mean_duration_log_scaled\"] = np.log(df_all['Duration (hour)'])\n",
+ "# df_all[\"mean_duration_str\"] = [f\"{int(value/ (60*60))}\" for value in df_all['Duration']]\n",
+ "df_all[\"mean_duration_str\"] = df_all['Duration (hour)']\n",
+ "\n",
+ "\n",
+ "df_all = df_all.reset_index()\n",
+ "df_all.to_csv(summary_all_file, sep='\\t')\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "── \u001b[1mAttaching packages\u001b[22m ─────────────────────────────────────── tidyverse 1.3.1 ──\n",
+ "\u001b[32m✔\u001b[39m \u001b[34mggplot2\u001b[39m 3.3.6 \u001b[32m✔\u001b[39m \u001b[34mpurrr \u001b[39m 0.3.4\n",
+ "\u001b[32m✔\u001b[39m \u001b[34mtibble \u001b[39m 3.1.7 \u001b[32m✔\u001b[39m \u001b[34mdplyr \u001b[39m 1.0.9\n",
+ "\u001b[32m✔\u001b[39m \u001b[34mtidyr \u001b[39m 1.2.0 \u001b[32m✔\u001b[39m \u001b[34mstringr\u001b[39m 1.4.0\n",
+ "\u001b[32m✔\u001b[39m \u001b[34mreadr \u001b[39m 2.1.2 \u001b[32m✔\u001b[39m \u001b[34mforcats\u001b[39m 0.5.1\n",
+ "── \u001b[1mConflicts\u001b[22m ────────────────────────────────────────── tidyverse_conflicts() ──\n",
+ "\u001b[31m✖\u001b[39m \u001b[34mdplyr\u001b[39m::\u001b[32mfilter()\u001b[39m masks \u001b[34mstats\u001b[39m::filter()\n",
+ "\u001b[31m✖\u001b[39m \u001b[34mdplyr\u001b[39m::\u001b[32mlag()\u001b[39m masks \u001b[34mstats\u001b[39m::lag()\n",
+ "\u001b[?25hError in library(funkyheatmap) : \n",
+ " there is no package called ‘funkyheatmap’\n",
+ "Execution halted\n",
+ "\u001b[?25h"
+ ]
+ }
+ ],
+ "source": [
+ "\n",
+ "!Rscript ../grn_benchmark/src/summary_figure.R {summary_all_file} {summary_plot}\n"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
diff --git a/scripts/run_benchmark_all.sh b/scripts/run_benchmark_all.sh
index 44b382edf..86fb2fbc3 100644
--- a/scripts/run_benchmark_all.sh
+++ b/scripts/run_benchmark_all.sh
@@ -1,7 +1,6 @@
#!/bin/bash
-# RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
-RUN_ID="celloracle_d0_hvg"
+RUN_ID="d0_hvgs_baseline"
# resources_dir="./resources/"
resources_dir="s3://openproblems-data/resources/grn"
publish_dir="${resources_dir}/results/${RUN_ID}"
@@ -13,10 +12,7 @@ layer='scgen_pearson'
metric_ids="[regression_1, regression_2]"
cell_type_specific=false #for controls
normalize=false
-only_hvgs=true
-# method_ids="[tigress, ennet, scsgl, pidc]"
-# method_ids="[pearson_corr, pearson_causal, positive_control]"
-method_ids="[celloracle]"
+method_ids="[pearson_corr, pearson_causal, positive_control]"
param_file="./params/${RUN_ID}.yaml"
@@ -27,7 +23,7 @@ param_list:
metric_ids: $metric_ids
method_ids: $method_ids
perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
- multiomics_rna: ${resources_dir}/grn-benchmark/multiomics_rna_0.h5ad
+ multiomics_rna: ${resources_dir}/grn-benchmark/multiomics_rna_0_hvgs.h5ad
multiomics_atac: ${resources_dir}/grn-benchmark/multiomics_atac_0.h5ad
reg_type: $reg_type
subsample: $subsample
@@ -37,7 +33,6 @@ param_list:
tf_all: ${resources_dir}/prior/tf_all.csv
cell_type_specific: ${cell_type_specific}
normalize: ${normalize}
- only_hvgs: ${only_hvgs}
output_state: "state.yaml"
publish_dir: "$publish_dir"
diff --git a/src/control_methods/pearson/script.py b/src/control_methods/pearson/script.py
index dde473edd..1674312c1 100644
--- a/src/control_methods/pearson/script.py
+++ b/src/control_methods/pearson/script.py
@@ -7,7 +7,6 @@
'max_n_links': 50000,
'prediction': 'resources/grn_models/donor_0_default/pearson.csv',
"seed": 32,
- 'only_hvgs': True,
'normalize': False
}
## VIASH END
diff --git a/src/utils/util.py b/src/utils/util.py
index d8c395edc..9339483ce 100644
--- a/src/utils/util.py
+++ b/src/utils/util.py
@@ -50,10 +50,6 @@ def process_data(adata, par):
adata.X = adata.X.toarray() # You can also use .todense(), but .toarray() gives a NumPy array directly
else:
print("adata.X is already dense.")
- if par['only_hvgs']:
- print('Subsetting data to hvgs')
- adata = adata[:, adata.var.hvg_counts]
- print('New dimension of data: ', adata.shape)
def create_corr_net(par):
print(par)
diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml
index d1604cc86..ed8361f9c 100644
--- a/src/workflows/run_benchmark/config.vsh.yaml
+++ b/src/workflows/run_benchmark/config.vsh.yaml
@@ -58,10 +58,7 @@ functionality:
type: boolean
required: false
direction: input
- - name: --only_hvgs
- type: boolean
- required: false
- direction: input
+
- name: Outputs
diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf
index 4520e47bf..17ee2fb90 100644
--- a/src/workflows/run_benchmark/main.nf
+++ b/src/workflows/run_benchmark/main.nf
@@ -85,7 +85,6 @@ workflow run_wf {
perturbation_data:"perturbation_data",
cell_type_specific:"cell_type_specific",
normalize:"normalize",
- only_hvgs:"only_hvgs",
num_workers:"num_workers"
],