diff --git a/nuc_morph_analysis/analyses/linear_regression/supplemental_lrm_figure_workflow.py b/nuc_morph_analysis/analyses/linear_regression/supplemental_lrm_figure_workflow.py index 6cf8d000..f1f12a2a 100644 --- a/nuc_morph_analysis/analyses/linear_regression/supplemental_lrm_figure_workflow.py +++ b/nuc_morph_analysis/analyses/linear_regression/supplemental_lrm_figure_workflow.py @@ -1,7 +1,5 @@ #%% -import warnings import numpy as np -import pandas as pd from nuc_morph_analysis.lib.preprocessing import global_dataset_filtering, filter_data from nuc_morph_analysis.analyses.linear_regression.linear_regression import fit_linear_regression from nuc_morph_analysis.analyses.linear_regression.analysis_plots import (run_regression_workflow, diff --git a/nuc_morph_analysis/lib/preprocessing/add_features.py b/nuc_morph_analysis/lib/preprocessing/add_features.py index 2e498c45..9d211e6b 100644 --- a/nuc_morph_analysis/lib/preprocessing/add_features.py +++ b/nuc_morph_analysis/lib/preprocessing/add_features.py @@ -1,4 +1,5 @@ from nuc_morph_analysis.analyses.lineage.get_features import lineage_trees +from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric import numpy as np FRAME_COL = {"Ff": "A", "frame_transition": "B", "Fb": "C"} @@ -625,3 +626,60 @@ def add_perimeter_ratio(df): """ df['2d_perimeter_nuc_cell_ratio'] = df['2d_perimeter_nucleus'] / df['2d_perimeter_pseudo_cell'] return df + +def add_features_at_transition(df, + feature_list=['xy_aspect', + 'SA_vol_ratio', + 'neighbor_avg_lrm_volume_90um', + 'neighbor_avg_lrm_height_90um', + 'neighbor_avg_lrm_xy_aspect_90um', + 'neighbor_avg_lrm_mesh_sa_90um', + 'neighbor_avg_dxdt_48_volume_90um', + 'neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um'] + ): + """ + Add feature measurements at transition that are used in the linear regression analysis. + Features should be pre-calculated and not need to be scaled. + + Parameters + ---------- + df_full : DataFrame + The dataframe containing full trajectories + feature_list : list + List of column names + + Returns + ------- + df_full : DataFrame + The dataframe with the added feature columns + """ + + for feature in feature_list: + df = add_feature_at(df, "frame_transition", feature, feature) + return df + +def add_mean_features(df, + feature_list=['neighbor_avg_dxdt_48_volume_90um', + 'neighbor_avg_lrm_volume_90um', + 'neighbor_avg_lrm_height_90um', + 'neighbor_avg_lrm_xy_aspect_90um', + 'neighbor_avg_lrm_mesh_sa_90um', + 'neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um'] + ): + """ + Add mean feature measurements over the growth trajectory that are used in the linear regression analysis. + + Parameters + ---------- + df : DataFrame + The dataframe containing full trajectories + feature_list : list + List of column names + + Returns + ------- + df : DataFrame + The dataframe with the added mean feature columns + """ + multiplier_list = [get_plot_labels_for_metric(x)[0] for x in feature_list] + df = add_mean_feature_over_trajectory(df, feature_list, multiplier_list) diff --git a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py index 44c4aabf..30eb05cf 100644 --- a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py +++ b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py @@ -202,7 +202,7 @@ def process_all_tracks(df, dataset, remove_growth_outliers, num_workers): df = add_neighborhood_avg_features.run_script(df, num_workers=num_workers) df = add_neighborhood_avg_features_lrm.run_script(df, num_workers=num_workers, feature_list=["volume", "height", "xy_aspect", "mesh_sa", "2d_area_nuc_cell_ratio"], - exclude_outliers=False) + exclude_outliers=True) if dataset == "all_baseline": df = add_colony_time_all_datasets(df) @@ -264,26 +264,12 @@ def process_full_tracks(df_all, thresh, pix_size, interval): # For LRM df_full = add_features.add_lineage_features(df_full, feature_list=['volume_at_B', 'duration_BC', 'volume_at_C', 'delta_volume_BC']) - df_full = add_features.add_feature_at(df_full, "frame_transition", 'height', 'height_percentile', pix_size) - for feature in ['xy_aspect', 'SA_vol_ratio', 'neighbor_avg_lrm_volume_90um', 'neighbor_avg_lrm_height_90um', - 'neighbor_avg_lrm_xy_aspect_90um','neighbor_avg_lrm_mesh_sa_90um']: - df_full = add_features.add_feature_at(df_full, "frame_transition", feature, feature) - + df_full = add_features.add_features_at_transition(df_full) df_full = add_features.get_early_transient_gr_of_neighborhood(df_full, scale=get_plot_labels_for_metric('neighbor_avg_dxdt_48_volume_90um')[0]) df_full = add_features.sum_mitotic_events_along_full_track(df_full) df_full = add_features.normalize_sum_events(df_full) - - ft_list = ['neighbor_avg_dxdt_48_volume_90um', - 'neighbor_avg_lrm_volume_90um', - 'neighbor_avg_lrm_height_90um', - 'neighbor_avg_lrm_xy_aspect_90um', - 'neighbor_avg_lrm_mesh_sa_90um', - 'neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um',] - multiplier_list = [get_plot_labels_for_metric(x)[0] for x in ft_list] - df_full = add_features.add_mean_feature_over_trajectory(df_full, ft_list, multiplier_list) - for feat in ft_list: - df_full = add_features.add_feature_at(df_full, "frame_transition", feat, feat) + df_full = add_features.add_mean_features(df_full) # Add flag for use after merging back to main manifest df_full = add_features.add_full_track_flag(df_full)