initial commit for proliferation screen

pinellolab · Nov 9, 2023 · 568c599 · 568c599
1 parent bf071a8
commit 568c599
Show file tree

Hide file tree

Showing 7 changed files with 345 additions and 544 deletions.
diff --git a/README.md b/README.md
@@ -248,7 +248,7 @@ bean-filter my_sorting_screen.h5ad \
 
 ## `bean-run`: Quantify variant effects
 BEAN uses Bayesian network to incorporate gRNA editing outcome to provide posterior estimate of variant phenotype. The Bayesian network reflects data generation process. Briefly,  
-1. Cellular phenotype is modeled as the Gaussian mixture distribution of wild-type phenotype and variant phenotype.
+1. Cellular phenotype (either for cells are sorted upon for sorting screen, or log(proliferation rate)) is modeled as the Gaussian mixture distribution of wild-type phenotype and variant phenotype.
 2. The weight of the mixture components are inferred from the reporter editing outcome and the chromatin accessibility of the loci.
 3. Cells with each gRNA, formulated as the mixture distribution, is sorted by the phenotypic quantile to produce the gRNA counts.
 
@@ -258,7 +258,7 @@ For the full detail, see the method section of the [BEAN manuscript](https://www
 
 <br></br>
 ```bash
-bean-run variant[tiling] my_sorting_screen_filtered.h5ad \
+bean-run sorting[survival] variant[tiling] my_sorting_screen_filtered.h5ad \
 [--uniform-edit, --scale-by-acc [--acc-bw-path accessibility_signal.bw, --acc-col accessibility]] \
 -o output_prefix/ \
 --fit-negctrl

diff --git a/bean/model/model.py b/bean/model/model.py
@@ -91,7 +91,6 @@ def NormalModel(
                     obs=data.X_masked.permute(0, 2, 1),
                 )
             if use_bcmatch:
-                print(f"Use_bcmatch:{use_bcmatch}")
                 a_bcmatch = get_alpha(
                     expected_guide_p,
                     data.size_factor_bcmatch,

diff --git a/bean/model/readwrite.py b/bean/model/readwrite.py
@@ -57,41 +57,50 @@ def write_result_table(
     adjust_confidence_negatives: np.ndarray = None,
     guide_index: Optional[Sequence[str]] = None,
     guide_acc: Optional[Sequence] = None,
+    sd_is_fitted: bool = True,
     return_result: bool = False,
 ) -> Union[pd.DataFrame, None]:
     """Combine target information and scores to write result table to a csv file or return it."""
     if param_hist_dict["params"]["mu_loc"].dim() == 2:
         mu = param_hist_dict["params"]["mu_loc"].detach()[:, 0].cpu().numpy()
         mu_sd = param_hist_dict["params"]["mu_scale"].detach()[:, 0].cpu().numpy()
-        sd = param_hist_dict["params"]["sd_loc"].detach().exp()[:, 0].cpu().numpy()
+        if sd_is_fitted:
+            sd = param_hist_dict["params"]["sd_loc"].detach().exp()[:, 0].cpu().numpy()
     elif param_hist_dict["params"]["mu_loc"].dim() == 1:
         mu = param_hist_dict["params"]["mu_loc"].detach().cpu().numpy()
         mu_sd = param_hist_dict["params"]["mu_scale"].detach().cpu().numpy()
-        sd = param_hist_dict["params"]["sd_loc"].detach().exp().cpu().numpy()
+        if sd_is_fitted:
+            sd = param_hist_dict["params"]["sd_loc"].detach().exp().cpu().numpy()
     else:
         raise ValueError(
             f'`mu_loc` has invalid shape of {param_hist_dict["params"]["mu_loc"].shape}'
         )
-    fit_df = pd.DataFrame(
-        {
-            "mu": mu,
-            "mu_sd": mu_sd,
-            "mu_z": mu / mu_sd,
-            "sd": sd,
-        }
-    )
+    param_dict = {
+        "mu": mu,
+        "mu_sd": mu_sd,
+        "mu_z": mu / mu_sd,
+    }
+    if sd_is_fitted:
+        param_dict["sd"] = sd
+    fit_df = pd.DataFrame(param_dict)
     fit_df["novl"] = get_novl(fit_df, "mu", "mu_sd")
     if "negctrl" in param_hist_dict.keys():
         print("Normalizing with common negative control distribution")
         mu0 = param_hist_dict["negctrl"]["params"]["mu_loc"].detach().cpu().numpy()
-        sd0 = (
-            param_hist_dict["negctrl"]["params"]["sd_loc"].detach().exp().cpu().numpy()
-        )
-        print(f"Fitted mu0={mu0}, sd0={sd0}.")
+        if sd_is_fitted:
+            sd0 = (
+                param_hist_dict["negctrl"]["params"]["sd_loc"]
+                .detach()
+                .exp()
+                .cpu()
+                .numpy()
+            )
+        print(f"Fitted mu0={mu0}" + (f", sd0={sd0}." if sd_is_fitted else ""))
         fit_df["mu_scaled"] = (mu - mu0) / sd0
         fit_df["mu_sd_scaled"] = mu_sd / sd0
         fit_df["mu_z_scaled"] = fit_df.mu_scaled / fit_df.mu_sd_scaled
-        fit_df["sd_scaled"] = sd / sd0
+        if sd_is_fitted:
+            fit_df["sd_scaled"] = sd / sd0
         fit_df["novl_scaled"] = get_novl(fit_df, "mu_scaled", "mu_sd_scaled")
 
     fit_df = pd.concat(