Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Quick run_all_manuscript_workflow error catching #53

Merged
merged 10 commits into from
Dec 6, 2024
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# %%
from nuc_morph_analysis.lib.preprocessing.global_dataset_filtering import load_dataset_with_features
from nuc_morph_analysis.lib.preprocessing import filter_data
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import matplotlib
from nuc_morph_analysis.lib.visualization.notebook_tools import save_and_show_plot
from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric
from nuc_morph_analysis.lib.visualization.reference_points import COLONY_COLORS, COLONY_LABELS
from nuc_morph_analysis.analyses.inhibitors.dataset_info import get_drug_perturbation_details_from_colony_name

matplotlib.rcParams["pdf.fonttype"] = 42
plt.rcParams["font.family"] = "Arial"

#%%
def try_to_get_drug_name(colony_name):
try:
return get_drug_perturbation_details_from_colony_name(colony_name)["drugs_string"]
except:
return colony_name

def plot_perturbation_densities(df, figdir, time_axis = 'real_time', error="percentile", show_legend=True, interval=5,titlestr=""):
fig, ax = plt.subplots(1, 1, figsize=(10, 4))

feature_col = "2d_area_nuc_cell_ratio"
scale, label, units, _ = get_plot_labels_for_metric(feature_col)

new_colors = plt.cm.tab20(range(20))
for ci, (colony, df_colony) in enumerate(df.groupby("colony")):
df_colony = df_colony.sort_values("index_sequence")

color = COLONY_COLORS.get(colony,new_colors[ci])

if time_axis == "real_time":
time_col = "index_sequence"
x_label = "Real Time (hr)"
if time_axis == "colony_time":
time_col = "colony_time"
x_label = "Aligned Colony Time (hr)"

grouper = df_colony[[time_col] + [feature_col]].groupby(time_col)[
feature_col
]

# filter grouper so that only timepoints with more than 15 cells are included
count = grouper.count()
log_count = count[count>15].index


mean_density = grouper.mean() * scale
if error == "std":
std_density = grouper.std() * scale
lower = mean_density - std_density
upper = mean_density + std_density
if error == "percentile":
lower = grouper.quantile(0.05) * scale
upper = grouper.quantile(0.95) * scale

time = mean_density.index.values * interval / 60

time = time[log_count]
mean_density = mean_density[log_count]
lower = lower[log_count]
upper = upper[log_count]

ax.fill_between(
time,
lower,
upper,
alpha=0.12,
color=color,
zorder=0,
edgecolor="none",
label=COLONY_LABELS.get(colony,try_to_get_drug_name(colony)),
)
ax.plot(
time, mean_density, linewidth=1.2, color=color, label="", zorder=20
)

ax.set_ylabel(f"Average Density \n Across Colony {units}")
ax.set_xlabel(x_label)
if show_legend is True:
# ax.legend(loc="upper right", handletextpad=0.7, frameon=False)
# put legend outside to the right
ax.legend(loc="center left", bbox_to_anchor=(1.1, 0.5), frameon=False)
plt.title(titlestr)
plt.tight_layout()
# save_and_show_plot(
# f"{figdir}/avg_density_colony_{time_axis}_alignment-{feature_col}",
# file_extension=".pdf",
# dpi=300,
# transparent=True,
# )

from nuc_morph_analysis.lib.preprocessing.global_dataset_filtering import load_dataset_with_features
for dataset in ["all_drug_perturbation","all_feeding_control","all_baseline"]:
df0 = load_dataset_with_features(dataset,load_local=True)
df = filter_data.all_timepoints_minimal_filtering(df0)
figdir = f"figures/{dataset}/density_plots"
plot_perturbation_densities(df, figdir, time_axis = 'real_time', error="percentile", show_legend=True, interval=5,titlestr=dataset)


#%%
6 changes: 3 additions & 3 deletions nuc_morph_analysis/lib/preprocessing/all_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"all_baseline": { # this is the common info for all baseline datasets
# FMS ID for 2024-07-08_main_manifest.parquet generated from morflowgenesis v0.3.0
# with generate_main_manifest.py at commit 6e9eb0962343113ab3999ce6b59d8331ddab9a45
"fmsid": "443ac819f633494f936ff410c14c21ed", # morflowgenesis v0.3.0 updated with 2d_area from watershed PR (9/19/24)
"fmsid": "443ac819f633494f936ff410c14c21ed", # morflowgenesis v0.3.0 updated with new density from watershed PR (9/19/24)
"s3_path": INTERMEDIATE_MANIFEST_DIR / "2024-06-25_baseline_intermediate_manifest.parquet",
"pixel_size": PIXEL_SIZE_YX_100x,
"time_interval": 5, # min
Expand All @@ -35,7 +35,7 @@
"all_feeding_control": { # this is the common info for all "feeding_control" datasets
# FMS ID for 2024-06-16_feeding_control_main_manifest.parquet generated from morflowgenesis v0.3.0
# with generate_perturbation_manifest.py at commit ebe76b5e84c9ca24617e4d04aed8acc1c2c3bb62
"fmsid": "8ecd9b04329b490baec500859e276fbe", # morflowgenesis v 0.3.0
"fmsid": "f95429aa9d084a699d9e591afd2f7792", # morflowgenesis v 0.3.0 updated with new density from density_calc PR (12/5/24)
"s3_path": INTERMEDIATE_MANIFEST_DIR
/ "2024-06-14_feeding_control_intermediate_manifest.parquet",
"pixel_size": PIXEL_SIZE_YX_100x,
Expand All @@ -49,7 +49,7 @@
"all_drug_perturbation": { # this is the common info for all "drug_perturbation" datasets
# FMS ID for 2024-06-24_drug_perturbation_main_manifest.parquet generated from morflowgenesis v0.3.0
# with generate_perturbation_manifest.py at commit 725ed45a6413391b9927610649e6209c04bcae9f
"fmsid": "19e1125fd9c4413e8babe2e9de8d9b87", # morflowgenesis v 0.3.0
"fmsid": "5e8170e7881a4ad09c236e3e0c056d75", # morflowgenesis v 0.3.0 updated with new density from density_calc PR (12/5/24)
"s3_path": INTERMEDIATE_MANIFEST_DIR
/ "2024-06-24_inhibitor_perturbation_intermediate_manifest.parquet",
"pixel_size": PIXEL_SIZE_YX_100x,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
match_and_update_dataframe,
FRAMES_TO_SHIFT,
)
from nuc_morph_analysis.lib.preprocessing.twoD_zMIP_area import watershed_workflow


# %%
Expand Down Expand Up @@ -86,8 +87,23 @@ def generate_manifest_one_colony(morflowgenesis_df, dataset, experiments=None):
# --------------------------
# add_colony_metrics features
logging.info("Calculating colony metrics")
return add_colony_metrics(step5_df)
step5_df = add_colony_metrics(step5_df)

# --------------------------
# STEP 6: calculate 2D object-based density
# --------------------------
logging.info("Calculating image-based density metrics")
step6_df = step5_df.copy()
density_df = watershed_workflow.get_pseudo_cell_boundaries_for_movie(dataset, parallel=True)
# now merge the density_df with the main dataframe
step6_df = pd.merge(step6_df,
density_df,
on=['colony','index_sequence','label_img'],
suffixes=('', '__dup_col'),
how='left')
# now remove columns with __dup_col suffix
step6_df = step6_df[step6_df.columns.drop(list(step6_df.filter(regex='__dup_col')))]
return step6_df

def get_combined_manifest(experiments):
"""
Expand All @@ -107,7 +123,11 @@ def get_combined_manifest(experiments):


# %%
for experiments in ["feeding_control", "drug_perturbation"]:
df = get_combined_manifest(experiments)
write_result(df, f"{experiments}_main_manifest", format="parquet")
def run_workflow():
for experiments in ["feeding_control", "drug_perturbation"]:
df = get_combined_manifest(experiments)
write_result(df, f"{experiments}_main_manifest", format="parquet")
# %%

if __name__ == "__main__":
run_workflow()
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ def merge_datasets(df_all, df_full):
'resolution_level',
'2d_area_cyto',
'inv_cyto_density',
'density'
'density',
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you!


# created in add_groth_features.fit_tracks_to_model()
'tscale_exponentialfit_volume',
Expand Down Expand Up @@ -438,6 +438,8 @@ def remove_columns(df, column_list=COLUMNS_TO_DROP):
df : pandas.DataFrame
The dataframe with the columns removed.
"""
column_list = [col for col in column_list if col in df.columns]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you!


df = df.drop(columns=column_list)
return df

Expand Down
1 change: 0 additions & 1 deletion run_all_manuscript_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from nuc_morph_analysis.analyses.colony_area import colony_area_workflow
from nuc_morph_analysis.analyses.segmentation_model_validation import seg_model_validation_figure_workflow


class Workflows:
def figure_1_dataset():
import nuc_morph_analysis.analyses.dataset_images_for_figures.figure_1_workflow
Expand Down
Loading