Skip to content

Commit

Permalink
PR review changes to global dataset filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
chantelleleveille committed Oct 28, 2024
1 parent 0c6fb0f commit 2bff19f
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 19 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#%%
import warnings
import numpy as np
import pandas as pd
from nuc_morph_analysis.lib.preprocessing import global_dataset_filtering, filter_data
from nuc_morph_analysis.analyses.linear_regression.linear_regression import fit_linear_regression
from nuc_morph_analysis.analyses.linear_regression.analysis_plots import (run_regression_workflow,
Expand Down
58 changes: 58 additions & 0 deletions nuc_morph_analysis/lib/preprocessing/add_features.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from nuc_morph_analysis.analyses.lineage.get_features import lineage_trees
from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric
import numpy as np

FRAME_COL = {"Ff": "A", "frame_transition": "B", "Fb": "C"}
Expand Down Expand Up @@ -625,3 +626,60 @@ def add_perimeter_ratio(df):
"""
df['2d_perimeter_nuc_cell_ratio'] = df['2d_perimeter_nucleus'] / df['2d_perimeter_pseudo_cell']
return df

def add_features_at_transition(df,
feature_list=['xy_aspect',
'SA_vol_ratio',
'neighbor_avg_lrm_volume_90um',
'neighbor_avg_lrm_height_90um',
'neighbor_avg_lrm_xy_aspect_90um',
'neighbor_avg_lrm_mesh_sa_90um',
'neighbor_avg_dxdt_48_volume_90um',
'neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um']
):
"""
Add feature measurements at transition that are used in the linear regression analysis.
Features should be pre-calculated and not need to be scaled.
Parameters
----------
df_full : DataFrame
The dataframe containing full trajectories
feature_list : list
List of column names
Returns
-------
df_full : DataFrame
The dataframe with the added feature columns
"""

for feature in feature_list:
df = add_feature_at(df, "frame_transition", feature, feature)
return df

def add_mean_features(df,
feature_list=['neighbor_avg_dxdt_48_volume_90um',
'neighbor_avg_lrm_volume_90um',
'neighbor_avg_lrm_height_90um',
'neighbor_avg_lrm_xy_aspect_90um',
'neighbor_avg_lrm_mesh_sa_90um',
'neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um']
):
"""
Add mean feature measurements over the growth trajectory that are used in the linear regression analysis.
Parameters
----------
df : DataFrame
The dataframe containing full trajectories
feature_list : list
List of column names
Returns
-------
df : DataFrame
The dataframe with the added mean feature columns
"""
multiplier_list = [get_plot_labels_for_metric(x)[0] for x in feature_list]
df = add_mean_feature_over_trajectory(df, feature_list, multiplier_list)
20 changes: 3 additions & 17 deletions nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def process_all_tracks(df, dataset, remove_growth_outliers, num_workers):
df = add_neighborhood_avg_features.run_script(df, num_workers=num_workers)
df = add_neighborhood_avg_features_lrm.run_script(df, num_workers=num_workers,
feature_list=["volume", "height", "xy_aspect", "mesh_sa", "2d_area_nuc_cell_ratio"],
exclude_outliers=False)
exclude_outliers=True)

if dataset == "all_baseline":
df = add_colony_time_all_datasets(df)
Expand Down Expand Up @@ -264,26 +264,12 @@ def process_full_tracks(df_all, thresh, pix_size, interval):

# For LRM
df_full = add_features.add_lineage_features(df_full, feature_list=['volume_at_B', 'duration_BC', 'volume_at_C', 'delta_volume_BC'])

df_full = add_features.add_feature_at(df_full, "frame_transition", 'height', 'height_percentile', pix_size)
for feature in ['xy_aspect', 'SA_vol_ratio', 'neighbor_avg_lrm_volume_90um', 'neighbor_avg_lrm_height_90um',
'neighbor_avg_lrm_xy_aspect_90um','neighbor_avg_lrm_mesh_sa_90um']:
df_full = add_features.add_feature_at(df_full, "frame_transition", feature, feature)

df_full = add_features.add_features_at_transition(df_full)
df_full = add_features.get_early_transient_gr_of_neighborhood(df_full, scale=get_plot_labels_for_metric('neighbor_avg_dxdt_48_volume_90um')[0])
df_full = add_features.sum_mitotic_events_along_full_track(df_full)
df_full = add_features.normalize_sum_events(df_full)

ft_list = ['neighbor_avg_dxdt_48_volume_90um',
'neighbor_avg_lrm_volume_90um',
'neighbor_avg_lrm_height_90um',
'neighbor_avg_lrm_xy_aspect_90um',
'neighbor_avg_lrm_mesh_sa_90um',
'neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um',]
multiplier_list = [get_plot_labels_for_metric(x)[0] for x in ft_list]
df_full = add_features.add_mean_feature_over_trajectory(df_full, ft_list, multiplier_list)
for feat in ft_list:
df_full = add_features.add_feature_at(df_full, "frame_transition", feat, feat)
df_full = add_features.add_mean_features(df_full)

# Add flag for use after merging back to main manifest
df_full = add_features.add_full_track_flag(df_full)
Expand Down

0 comments on commit 2bff19f

Please sign in to comment.