AllenCell · jcass11 · Dec 6, 2024 · Nov 20, 2024 · Nov 22, 2024 · Dec 5, 2024
diff --git a/...orph_analysis/analyses/density/extra_checks/check_that_density_works_for_perturbations.py b/...orph_analysis/analyses/density/extra_checks/check_that_density_works_for_perturbations.py
@@ -0,0 +1,103 @@
+# %%
+from nuc_morph_analysis.lib.preprocessing.global_dataset_filtering import load_dataset_with_features
+from nuc_morph_analysis.lib.preprocessing import filter_data
+import matplotlib.pyplot as plt
+import matplotlib.pyplot as plt
+import matplotlib
+from nuc_morph_analysis.lib.visualization.notebook_tools import save_and_show_plot
+from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric
+from nuc_morph_analysis.lib.visualization.reference_points import COLONY_COLORS, COLONY_LABELS
+from nuc_morph_analysis.analyses.inhibitors.dataset_info import get_drug_perturbation_details_from_colony_name
+
+matplotlib.rcParams["pdf.fonttype"] = 42
+plt.rcParams["font.family"] = "Arial"
+
+#%%
+def try_to_get_drug_name(colony_name):
+    try:
+        return get_drug_perturbation_details_from_colony_name(colony_name)["drugs_string"]
+    except:
+        return colony_name
+
+def plot_perturbation_densities(df, figdir, time_axis = 'real_time', error="percentile", show_legend=True, interval=5,titlestr=""):
+    fig, ax = plt.subplots(1, 1, figsize=(10, 4))
+
+    feature_col = "2d_area_nuc_cell_ratio"
+    scale, label, units, _ = get_plot_labels_for_metric(feature_col)
+
+    new_colors = plt.cm.tab20(range(20))
+    for ci, (colony, df_colony) in enumerate(df.groupby("colony")):
+        df_colony = df_colony.sort_values("index_sequence")
+
+        color = COLONY_COLORS.get(colony,new_colors[ci])
+
+        if time_axis == "real_time":
+            time_col = "index_sequence"
+            x_label = "Real Time (hr)"
+        if time_axis == "colony_time":
+            time_col = "colony_time"
+            x_label = "Aligned Colony Time (hr)"
+
+        grouper = df_colony[[time_col] + [feature_col]].groupby(time_col)[
+                feature_col
+            ]
+
+        # filter grouper so that only timepoints with more than 15 cells are included
+        count = grouper.count()
+        log_count = count[count>15].index
+
+
+        mean_density = grouper.mean() * scale
+        if error == "std":
+            std_density = grouper.std() * scale
+            lower = mean_density - std_density
+            upper = mean_density + std_density
+        if error == "percentile":
+            lower = grouper.quantile(0.05) * scale
+            upper = grouper.quantile(0.95) * scale
+
+        time = mean_density.index.values * interval / 60
+
+        time = time[log_count]
+        mean_density = mean_density[log_count]
+        lower = lower[log_count]
+        upper = upper[log_count]
+
+        ax.fill_between(
+            time,
+            lower,
+            upper,
+            alpha=0.12,
+            color=color,
+            zorder=0,
+            edgecolor="none",
+            label=COLONY_LABELS.get(colony,try_to_get_drug_name(colony)),
+        )
+        ax.plot(
+            time, mean_density, linewidth=1.2, color=color, label="", zorder=20
+        )
+
+    ax.set_ylabel(f"Average Density \n Across Colony {units}")
+    ax.set_xlabel(x_label)
+    if show_legend is True:
+        # ax.legend(loc="upper right", handletextpad=0.7, frameon=False)
+        # put legend outside to the right
+        ax.legend(loc="center left", bbox_to_anchor=(1.1, 0.5), frameon=False)
+    plt.title(titlestr)
+    plt.tight_layout()
+    # save_and_show_plot(
+    #     f"{figdir}/avg_density_colony_{time_axis}_alignment-{feature_col}",
+    #     file_extension=".pdf",
+    #     dpi=300,
+    #     transparent=True,
+    # )
+
+from nuc_morph_analysis.lib.preprocessing.global_dataset_filtering import load_dataset_with_features
+for dataset in ["all_drug_perturbation","all_feeding_control","all_baseline"]:
+    df0 = load_dataset_with_features(dataset,load_local=True)
+    df = filter_data.all_timepoints_minimal_filtering(df0)
+    figdir = f"figures/{dataset}/density_plots"
+    plot_perturbation_densities(df, figdir, time_axis = 'real_time', error="percentile", show_legend=True, interval=5,titlestr=dataset)
+
+
+#%%
diff --git a/nuc_morph_analysis/lib/preprocessing/all_datasets.py b/nuc_morph_analysis/lib/preprocessing/all_datasets.py
@@ -22,7 +22,7 @@
     "all_baseline": {  # this is the common info for all baseline datasets
         # FMS ID for 2024-07-08_main_manifest.parquet generated from morflowgenesis v0.3.0
         # with generate_main_manifest.py at commit 6e9eb0962343113ab3999ce6b59d8331ddab9a45
-        "fmsid": "443ac819f633494f936ff410c14c21ed",  # morflowgenesis v0.3.0 updated with 2d_area from watershed PR (9/19/24)
+        "fmsid": "443ac819f633494f936ff410c14c21ed",  # morflowgenesis v0.3.0 updated with new density from watershed PR (9/19/24)
         "s3_path": INTERMEDIATE_MANIFEST_DIR / "2024-06-25_baseline_intermediate_manifest.parquet",
         "pixel_size": PIXEL_SIZE_YX_100x,
         "time_interval": 5,  # min
@@ -35,7 +35,7 @@
     "all_feeding_control": {  # this is the common info for all "feeding_control" datasets
         # FMS ID for 2024-06-16_feeding_control_main_manifest.parquet generated from morflowgenesis v0.3.0
         # with generate_perturbation_manifest.py at commit ebe76b5e84c9ca24617e4d04aed8acc1c2c3bb62
-        "fmsid": "8ecd9b04329b490baec500859e276fbe",  # morflowgenesis v 0.3.0
+        "fmsid": "f95429aa9d084a699d9e591afd2f7792",  # morflowgenesis v 0.3.0 updated with new density from density_calc PR (12/5/24)
         "s3_path": INTERMEDIATE_MANIFEST_DIR
         / "2024-06-14_feeding_control_intermediate_manifest.parquet",
         "pixel_size": PIXEL_SIZE_YX_100x,
@@ -49,7 +49,7 @@
     "all_drug_perturbation": {  # this is the common info for all "drug_perturbation" datasets
         # FMS ID for 2024-06-24_drug_perturbation_main_manifest.parquet generated from morflowgenesis v0.3.0
         # with generate_perturbation_manifest.py at commit 725ed45a6413391b9927610649e6209c04bcae9f
-        "fmsid": "19e1125fd9c4413e8babe2e9de8d9b87",  # morflowgenesis v 0.3.0
+        "fmsid": "5e8170e7881a4ad09c236e3e0c056d75",  # morflowgenesis v 0.3.0 updated with new density from density_calc PR (12/5/24)
         "s3_path": INTERMEDIATE_MANIFEST_DIR
         / "2024-06-24_inhibitor_perturbation_intermediate_manifest.parquet",
         "pixel_size": PIXEL_SIZE_YX_100x,

diff --git a/nuc_morph_analysis/lib/preprocessing/generate_perturbation_manifest.py b/nuc_morph_analysis/lib/preprocessing/generate_perturbation_manifest.py
@@ -18,6 +18,7 @@
     match_and_update_dataframe,
     FRAMES_TO_SHIFT,
 )
+from nuc_morph_analysis.lib.preprocessing.twoD_zMIP_area import watershed_workflow
 
 
 # %%
@@ -86,8 +87,23 @@ def generate_manifest_one_colony(morflowgenesis_df, dataset, experiments=None):
     # --------------------------
     # add_colony_metrics features
     logging.info("Calculating colony metrics")
-    return add_colony_metrics(step5_df)
+    step5_df = add_colony_metrics(step5_df)
 
+    # --------------------------
+    # STEP 6: calculate 2D object-based density
+    # --------------------------
+    logging.info("Calculating image-based density metrics")
+    step6_df = step5_df.copy()
+    density_df = watershed_workflow.get_pseudo_cell_boundaries_for_movie(dataset, parallel=True)
+    # now merge the density_df with the main dataframe
+    step6_df = pd.merge(step6_df,
+                            density_df,
+                            on=['colony','index_sequence','label_img'],
+                            suffixes=('', '__dup_col'),
+                            how='left')
+    # now remove columns with __dup_col suffix
+    step6_df = step6_df[step6_df.columns.drop(list(step6_df.filter(regex='__dup_col')))]
+    return step6_df
 
 def get_combined_manifest(experiments):
     """
@@ -107,7 +123,11 @@ def get_combined_manifest(experiments):
 
 
 # %%
-for experiments in ["feeding_control", "drug_perturbation"]:
-    df = get_combined_manifest(experiments)
-    write_result(df, f"{experiments}_main_manifest", format="parquet")
+def run_workflow():
+    for experiments in ["feeding_control", "drug_perturbation"]:
+        df = get_combined_manifest(experiments)
+        write_result(df, f"{experiments}_main_manifest", format="parquet")
 # %%
+
+if __name__ == "__main__":
+    run_workflow()
diff --git a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py
@@ -393,7 +393,7 @@ def merge_datasets(df_all, df_full):
     'resolution_level',
     '2d_area_cyto',
     'inv_cyto_density',
-    'density'
+    'density',
 
     # created in add_groth_features.fit_tracks_to_model()
     'tscale_exponentialfit_volume',
@@ -438,6 +438,8 @@ def remove_columns(df, column_list=COLUMNS_TO_DROP):
     df : pandas.DataFrame
         The dataframe with the columns removed.
     """
+    column_list = [col for col in column_list if col in df.columns]
+
     df = df.drop(columns=column_list)
     return df
 

diff --git a/run_all_manuscript_workflows.py b/run_all_manuscript_workflows.py
@@ -3,7 +3,6 @@
 from nuc_morph_analysis.analyses.colony_area import colony_area_workflow
 from nuc_morph_analysis.analyses.segmentation_model_validation import seg_model_validation_figure_workflow
 
-
 class Workflows:
     def figure_1_dataset():
         import nuc_morph_analysis.analyses.dataset_images_for_figures.figure_1_workflow