pearson corr is simplified

openproblems-bio · Sep 23, 2024 · 59b4e9f · 59b4e9f
1 parent 347b160
commit 59b4e9f
Show file tree

Hide file tree

Showing 10 changed files with 17 additions and 271 deletions.
diff --git a/runs.ipynb b/runs.ipynb
@@ -2332,24 +2332,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'reg_type': 'ridge', 'write_dir': 'resources/results/robustness_analysis', 'perturbation_data': 'resources/grn-benchmark/perturbation_data.h5ad', 'cell_type_specific': False, 'normalize': False, 'multiomics_rna': 'resources/grn-benchmark/multiomics_rna_d0_hvg.h5ad', 'tf_all': 'resources/prior/tf_all.csv', 'max_n_links': 50000, 'apply_tf': False, 'subsample': -2, 'verbose': 2, 'binarize': True, 'num_workers': 20, 'consensus': 'resources/prior/consensus-num-regulators.json', 'static_only': True, 'clip_scores': True, 'layer': 'scgen_pearson', 'prediction': 'resources/results/robustness_analysis/corr/corr_causal.csv', 'causal': False}\n",
+      "{'reg_type': 'ridge', 'write_dir': 'resources/results/robustness_analysis', 'perturbation_data': 'resources/grn-benchmark/perturbation_data.h5ad', 'cell_type_specific': False, 'normalize': False, 'multiomics_rna': 'resources/grn-benchmark/multiomics_rna_d0_hvg.h5ad', 'tf_all': 'resources/prior/tf_all.csv', 'max_n_links': 50000, 'apply_tf': False, 'subsample': -2, 'verbose': 2, 'binarize': True, 'num_workers': 20, 'consensus': 'resources/prior/consensus-num-regulators.json', 'static_only': True, 'clip_scores': True, 'layer': 'scgen_pearson', 'prediction': 'resources/results/robustness_analysis/corr/corr_causal.csv', 'causal': True}\n",
       "Read data\n",
-      "Random subsetting\n",
+      "Causal subsetting\n",
+      "       target  source    weight\n",
+      "0  AC107068.2   NFXL1  0.831974\n",
+      "1        FBN1   NR4A3  0.763036\n",
+      "2       BIRC3   NR4A3  0.751467\n",
+      "3      MFSD12  HMG20B  0.747598\n",
+      "4   STX17-AS1   NR4A3  0.744736\n",
       "Traceback (most recent call last):\n",
-      "  File \"/home/jnourisa/projs/ongoing/task_grn_inference/src/robustness_analysis/script_all.py\", line 126, in <module>\n",
-      "    net = create_corr_net(par)\n",
-      "  File \"/home/jnourisa/projs/ongoing/task_grn_inference/src/utils/util.py\", line 97, in create_corr_net\n",
-      "    grn = corr_net(X, gene_names, par, tf_all, par['causal'])\n",
-      "  File \"/home/jnourisa/projs/ongoing/task_grn_inference/src/utils/util.py\", line 49, in corr_net\n",
-      "    net = net.sample(len(tf_all), axis=1, random_state=par['seed'])\n",
-      "KeyError: 'seed'\n"
+      "  File \"/home/jnourisa/projs/ongoing/task_grn_inference/src/robustness_analysis/script_all.py\", line 128, in <module>\n",
+      "    aa\n",
+      "NameError: name 'aa' is not defined\n"
      ]
     }
    ],
@@ -2375,205 +2377,6 @@
     "scores_causal = pd.read_csv(\"resources/results/robustness_analysis/corr/scores_causal.csv\", index_col=0)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>S1</th>\n",
-       "      <th>S2</th>\n",
-       "      <th>static-theta-0.0</th>\n",
-       "      <th>static-theta-0.5</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.129438</td>\n",
-       "      <td>0.146122</td>\n",
-       "      <td>0.395947</td>\n",
-       "      <td>0.531279</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "         S1        S2  static-theta-0.0  static-theta-0.5\n",
-       "0  0.129438  0.146122          0.395947          0.531279"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "scores_causal"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>S1</th>\n",
-       "      <th>S2</th>\n",
-       "      <th>static-theta-0.0</th>\n",
-       "      <th>static-theta-0.5</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.308404</td>\n",
-       "      <td>0.530900</td>\n",
-       "      <td>0.696251</td>\n",
-       "      <td>0.543549</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.265614</td>\n",
-       "      <td>0.475809</td>\n",
-       "      <td>0.523504</td>\n",
-       "      <td>0.541446</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.290910</td>\n",
-       "      <td>0.491693</td>\n",
-       "      <td>0.678848</td>\n",
-       "      <td>0.545381</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.279864</td>\n",
-       "      <td>0.488906</td>\n",
-       "      <td>0.594799</td>\n",
-       "      <td>0.533744</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.287134</td>\n",
-       "      <td>0.505098</td>\n",
-       "      <td>0.763991</td>\n",
-       "      <td>0.550709</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.275764</td>\n",
-       "      <td>0.503644</td>\n",
-       "      <td>0.463909</td>\n",
-       "      <td>0.535066</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.279247</td>\n",
-       "      <td>0.498071</td>\n",
-       "      <td>0.609557</td>\n",
-       "      <td>0.534484</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.271350</td>\n",
-       "      <td>0.468998</td>\n",
-       "      <td>0.721520</td>\n",
-       "      <td>0.538930</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.277122</td>\n",
-       "      <td>0.484159</td>\n",
-       "      <td>0.652754</td>\n",
-       "      <td>0.537736</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.299330</td>\n",
-       "      <td>0.537139</td>\n",
-       "      <td>0.710375</td>\n",
-       "      <td>0.540515</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>100 rows × 4 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "          S1        S2  static-theta-0.0  static-theta-0.5\n",
-       "0   0.308404  0.530900          0.696251          0.543549\n",
-       "0   0.265614  0.475809          0.523504          0.541446\n",
-       "0   0.290910  0.491693          0.678848          0.545381\n",
-       "0   0.279864  0.488906          0.594799          0.533744\n",
-       "0   0.287134  0.505098          0.763991          0.550709\n",
-       "..       ...       ...               ...               ...\n",
-       "0   0.275764  0.503644          0.463909          0.535066\n",
-       "0   0.279247  0.498071          0.609557          0.534484\n",
-       "0   0.271350  0.468998          0.721520          0.538930\n",
-       "0   0.277122  0.484159          0.652754          0.537736\n",
-       "0   0.299330  0.537139          0.710375          0.540515\n",
-       "\n",
-       "[100 rows x 4 columns]"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "scores_corr"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 14,

diff --git a/src/api/comp_method.yaml b/src/api/comp_method.yaml
@@ -52,11 +52,6 @@ functionality:
       direction: input
       default: false
       description: normalize rna seq data before inference. currently, it's only applicable to baseline models
-    - name: --only_hvgs
-      type: boolean
-      direction: input
-      default: false
-      description: subset rna seq data to only 7000 hvgs to reduce dimensionality
 
   test_resources:
     - type: python_script

diff --git a/src/control_methods/pearson/script.py b/src/control_methods/pearson/script.py
@@ -14,7 +14,7 @@
 sys.path.append(meta["resources_dir"])
 from util import create_corr_net
 
-par['causal'] = False
+par['causal'] = True
 net = create_corr_net(par)
 print('Output GRN')
 net.to_csv(par['prediction'])
diff --git a/src/control_methods/pearson_causal/config.vsh.yaml b/src/control_methods/pearson_causal/config.vsh.yaml
diff --git a/src/control_methods/pearson_causal/script.py b/src/control_methods/pearson_causal/script.py
diff --git a/src/control_methods/pearson_causal/test.sh b/src/control_methods/pearson_causal/test.sh
diff --git a/src/metrics/regression_2/config.vsh.yaml b/src/metrics/regression_2/config.vsh.yaml
@@ -10,6 +10,8 @@ functionality:
     - type: python_script
       path: script.py
     - path: main.py
+    - path: /src/utils/util.py
+      dest: util.py
   arguments:
     - name: --consensus
       type: file

diff --git a/src/robustness_analysis/script_all.py b/src/robustness_analysis/script_all.py
@@ -43,7 +43,6 @@
 def run_reg(par):
   from metrics.regression_1.main import main 
   reg1 = main(par)
-  return reg1
   from metrics.regression_2.main import main 
   reg2 = main(par)
   score = pd.concat([reg1, reg2], axis=1)
@@ -98,6 +97,7 @@ def run_reg(par):
     'static_only': True,
     'clip_scores': True,
     'layer': 'scgen_pearson',
+    'seed': 32
   }
 
   # # run for corr 
@@ -124,7 +124,7 @@ def run_reg(par):
   par['prediction'] = f"{par['write_dir']}/corr/corr_causal.csv"
   par['causal'] = True
   net = create_corr_net(par)
+
   net.to_csv(par['prediction'])
   score = run_reg(par)
-  print(score)
   score.to_csv(f"{par['write_dir']}/corr/scores_causal.csv")
diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml
@@ -109,7 +109,6 @@ functionality:
     - name: grn_methods/scglue
     # ---- baselines
     - name: control_methods/pearson_corr
-    - name: control_methods/pearson_causal
     - name: control_methods/negative_control
     - name: control_methods/positive_control
     # -- needs development

diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf
@@ -21,7 +21,6 @@ workflow run_wf {
     scenic,
 
     pearson_corr,
-    pearson_causal,
     negative_control,
     positive_control,