diff --git a/applications/contrastive_phenotyping/evaluation/ALFI_displacement.py b/applications/contrastive_phenotyping/evaluation/ALFI_displacement.py index ba36e84d..595f283f 100644 --- a/applications/contrastive_phenotyping/evaluation/ALFI_displacement.py +++ b/applications/contrastive_phenotyping/evaluation/ALFI_displacement.py @@ -5,6 +5,9 @@ from viscy.representation.embedding_writer import read_embedding_dataset from viscy.representation.evaluation.distance import ( calculate_normalized_euclidean_distance_cell, + compute_displacement, + compute_dynamic_range, + compute_rms_per_track, ) from collections import defaultdict from tabulate import tabulate @@ -13,156 +16,10 @@ from sklearn.metrics.pairwise import cosine_similarity from collections import OrderedDict -# %% function +# %% function -def compute_displacement_mean_std_full(embedding_dataset, max_tau=10): - """ - Compute the mean and standard deviation of displacements between embeddings at time t and t + tau for all cells. - - Parameters: - embedding_dataset : xarray.Dataset - The dataset containing embeddings, timepoints, fov_name, and track_id. - max_tau : int, optional - The maximum tau value to compute displacements for. Default is 10. - - Returns: - tuple - - mean_displacement_per_tau (dict): Mean displacement for each tau. - - std_displacement_per_tau (dict): Standard deviation of displacements for each tau. - """ - fov_names = embedding_dataset["fov_name"].values - track_ids = embedding_dataset["track_id"].values - timepoints = embedding_dataset["t"].values - embeddings = embedding_dataset["features"].values - - cell_identifiers = np.array( - list(zip(fov_names, track_ids)), - dtype=[("fov_name", "O"), ("track_id", "int64")], - ) - - unique_cells = np.unique(cell_identifiers) - - displacement_per_tau = defaultdict(list) - - for cell in unique_cells: - fov_name = cell["fov_name"] - track_id = cell["track_id"] - - indices = np.where((fov_names == fov_name) & (track_ids == track_id))[0] - - cell_timepoints = timepoints[indices] - cell_embeddings = embeddings[indices] - - sorted_indices = np.argsort(cell_timepoints) - cell_timepoints = cell_timepoints[sorted_indices] - - cell_embeddings = cell_embeddings[sorted_indices] - - for i in range(len(cell_timepoints)): - current_time = cell_timepoints[i] - - current_embedding = cell_embeddings[i] - - current_embedding = current_embedding / np.linalg.norm(current_embedding) - - for tau in range(0, max_tau + 1): - future_time = current_time + tau - - - future_index = np.where(cell_timepoints == future_time)[0] - - if len(future_index) >= 1: - - future_embedding = cell_embeddings[future_index[0]] - future_embedding = future_embedding / np.linalg.norm( - future_embedding - ) - - distance = np.linalg.norm(current_embedding - future_embedding) - - displacement_per_tau[tau].append(distance) - - mean_displacement_per_tau = { - tau: np.mean(displacements) - for tau, displacements in displacement_per_tau.items() - } - std_displacement_per_tau = { - tau: np.std(displacements) - for tau, displacements in displacement_per_tau.items() - } - - return mean_displacement_per_tau, std_displacement_per_tau - - -def compute_dynamic_range(mean_displacement_per_tau): - """ - Compute the dynamic range as the difference between the maximum - and minimum mean displacement per τ. - - Parameters: - mean_displacement_per_tau: dict with τ as key and mean displacement as value - - Returns: - float: dynamic range (max displacement - min displacement) - """ - displacements = mean_displacement_per_tau.values() - return max(displacements) - min(displacements) - - -def compute_rms_per_track(embedding_dataset): - """ - Compute RMS of the time derivative of embeddings per track. - - Parameters: - embedding_dataset : xarray.Dataset - The dataset containing embeddings, timepoints, fov_name, and track_id. - - Returns: - list: A list of RMS values, one for each track. - """ - fov_names = embedding_dataset["fov_name"].values - track_ids = embedding_dataset["track_id"].values - timepoints = embedding_dataset["t"].values - embeddings = embedding_dataset["features"].values - - cell_identifiers = np.array( - list(zip(fov_names, track_ids)), - dtype=[("fov_name", "O"), ("track_id", "int64")], - ) - - unique_cells = np.unique(cell_identifiers) - - rms_values = [] - - for cell in unique_cells: - fov_name = cell["fov_name"] - track_id = cell["track_id"] - - indices = np.where((fov_names == fov_name) & (track_ids == track_id))[0] - - cell_timepoints = timepoints[indices] - cell_embeddings = embeddings[indices] - #print(cell_embeddings.shape) - - if len(cell_embeddings) < 2: - continue - - sorted_indices = np.argsort(cell_timepoints) - cell_embeddings = cell_embeddings[sorted_indices] - - # Compute differences between consecutive embeddings - differences = np.diff(cell_embeddings, axis=0) # Shape: (T-1, 768) - - if differences.shape[0] == 0: - continue - - # Compute RMS for this track - norms = np.linalg.norm(differences, axis=1) - if len(norms) > 0: - rms = np.sqrt(np.mean(norms**2)) - rms_values.append(rms) - - return rms_values +# Removed redundant compute_displacement_mean_std_full function +# Removed redundant compute_dynamic_range and compute_rms_per_track functions def plot_rms_histogram(rms_values, label, bins=30): @@ -188,7 +45,10 @@ def plot_rms_histogram(rms_values, label, bins=30): plt.grid(True) plt.show() -def plot_displacement(mean_displacement, std_displacement, label, metrics_no_track=None): + +def plot_displacement( + mean_displacement, std_displacement, label, metrics_no_track=None +): """ Plot embedding displacement over time with mean and standard deviation. @@ -247,6 +107,7 @@ def plot_displacement(mean_displacement, std_displacement, label, metrics_no_tra plt.legend(fontsize=12) plt.show() + def plot_overlay_displacement(overlay_displacement_data): """ Plot embedding displacement over time for all datasets in one plot. @@ -263,7 +124,7 @@ def plot_overlay_displacement(overlay_displacement_data): taus = list(mean_displacement.keys()) mean_values = list(mean_displacement.values()) plt.plot(taus, mean_values, marker="o", label=label) - + plt.xlabel("Time Shift (τ)", fontsize=14) plt.ylabel("Euclidean Distance", fontsize=14) plt.title("Overlayed Embedding Displacement Over Time", fontsize=16) @@ -271,7 +132,8 @@ def plot_overlay_displacement(overlay_displacement_data): plt.legend(fontsize=12) plt.show() -# %% hist stats + +# %% hist stats def plot_boxplot_rms_across_models(datasets_rms): """ Plot a boxplot for the distribution of RMS values across models. @@ -290,11 +152,13 @@ def plot_boxplot_rms_across_models(datasets_rms): print(data) # Plot the boxplot plt.boxplot(data, tick_labels=labels, patch_artist=True, showmeans=True) - - plt.title("Distribution of RMS of Rate of Change of Embedding Across Models", fontsize=16) + + plt.title( + "Distribution of RMS of Rate of Change of Embedding Across Models", fontsize=16 + ) plt.ylabel("RMS of Time Derivative", fontsize=14) plt.xticks(rotation=45, fontsize=12) - plt.grid(axis='y', linestyle='--', alpha=0.7) + plt.grid(axis="y", linestyle="--", alpha=0.7) plt.tight_layout() plt.show() @@ -313,7 +177,7 @@ def plot_histogram_absolute_differences(datasets_abs_diff): plt.figure(figsize=(12, 6)) for label, abs_diff in datasets_abs_diff.items(): plt.hist(abs_diff, bins=50, alpha=0.5, label=label, density=True) - + plt.title("Histograms of Absolute Differences Across Models", fontsize=16) plt.xlabel("Absolute Difference", fontsize=14) plt.ylabel("Density", fontsize=14) @@ -322,6 +186,7 @@ def plot_histogram_absolute_differences(datasets_abs_diff): plt.tight_layout() plt.show() + # %% Paths to datasets feature_paths = { "7 min interval": "/hpc/projects/organelle_phenotyping/ALFI_benchmarking/predictions_final/ALFI_opp_7mins.zarr", @@ -345,8 +210,8 @@ def plot_histogram_absolute_differences(datasets_abs_diff): features_path_no_track = Path(no_track_path) embedding_dataset_no_track = read_embedding_dataset(features_path_no_track) -mean_displacement_no_track, std_displacement_no_track = compute_displacement_mean_std_full( - embedding_dataset_no_track, max_tau=max_tau +mean_displacement_no_track, std_displacement_no_track = compute_displacement( + embedding_dataset_no_track, max_tau=max_tau, return_mean_std=True ) dynamic_range_no_track = compute_dynamic_range(mean_displacement_no_track) metrics["No Tracking"] = { @@ -360,11 +225,7 @@ def plot_histogram_absolute_differences(datasets_abs_diff): print("\nProcessing No Tracking dataset...") print(f"Dynamic Range for No Tracking: {dynamic_range_no_track}") -plot_displacement( - mean_displacement_no_track, - std_displacement_no_track, - "No Tracking" -) +plot_displacement(mean_displacement_no_track, std_displacement_no_track, "No Tracking") rms_values_no_track = compute_rms_per_track(embedding_dataset_no_track) datasets_rms["No Tracking"] = rms_values_no_track @@ -373,14 +234,18 @@ def plot_histogram_absolute_differences(datasets_abs_diff): plot_rms_histogram(rms_values_no_track, "No Tracking", bins=30) # Compute absolute differences for "No Tracking" -abs_diff_no_track = np.concatenate([ - np.linalg.norm( - np.diff(embedding_dataset_no_track["features"].values[indices], axis=0), - axis=-1 - ) - for indices in np.split(np.arange(len(embedding_dataset_no_track["track_id"])), - np.where(np.diff(embedding_dataset_no_track["track_id"]) != 0)[0] + 1) -]) +abs_diff_no_track = np.concatenate( + [ + np.linalg.norm( + np.diff(embedding_dataset_no_track["features"].values[indices], axis=0), + axis=-1, + ) + for indices in np.split( + np.arange(len(embedding_dataset_no_track["track_id"])), + np.where(np.diff(embedding_dataset_no_track["track_id"]) != 0)[0] + 1, + ) + ] +) datasets_abs_diff["No Tracking"] = abs_diff_no_track # Process other datasets @@ -390,8 +255,8 @@ def plot_histogram_absolute_differences(datasets_abs_diff): features_path = Path(path) embedding_dataset = read_embedding_dataset(features_path) - mean_displacement, std_displacement = compute_displacement_mean_std_full( - embedding_dataset, max_tau=max_tau + mean_displacement, std_displacement = compute_displacement( + embedding_dataset, max_tau=max_tau, return_mean_std=True ) dynamic_range = compute_dynamic_range(mean_displacement) metrics[label] = { @@ -417,16 +282,17 @@ def plot_histogram_absolute_differences(datasets_abs_diff): print(f"Plotting histogram of RMS values for {label}...") plot_rms_histogram(rms_values, label, bins=30) - abs_diff = np.concatenate([ - np.linalg.norm( - np.diff(embedding_dataset["features"].values[indices], axis=0), - axis=-1 - ) - for indices in np.split( - np.arange(len(embedding_dataset["track_id"])), - np.where(np.diff(embedding_dataset["track_id"]) != 0)[0] + 1 - ) - ]) + abs_diff = np.concatenate( + [ + np.linalg.norm( + np.diff(embedding_dataset["features"].values[indices], axis=0), axis=-1 + ) + for indices in np.split( + np.arange(len(embedding_dataset["track_id"])), + np.where(np.diff(embedding_dataset["track_id"]) != 0)[0] + 1, + ) + ] + ) datasets_abs_diff[label] = abs_diff print("\nPlotting overlayed displacement for all datasets...") @@ -443,3 +309,4 @@ def plot_histogram_absolute_differences(datasets_abs_diff): plot_histogram_absolute_differences(datasets_abs_diff) +# %% diff --git a/viscy/representation/evaluation/distance.py b/viscy/representation/evaluation/distance.py index 5968fb03..68a794b1 100644 --- a/viscy/representation/evaluation/distance.py +++ b/viscy/representation/evaluation/distance.py @@ -1,134 +1,22 @@ from collections import defaultdict - import numpy as np from sklearn.metrics.pairwise import cosine_similarity def calculate_cosine_similarity_cell(embedding_dataset, fov_name, track_id): - """ - Calculate cosine similarities for a specific cell over time. - - Parameters: - embedding_dataset : xarray.Dataset - The dataset containing embeddings, timepoints, fov_name, and track_id. - fov_name : str - Field of view name to identify the specific cell. - track_id : int - Track ID to identify the specific cell. - - Returns: - tuple - - time_points (array): Array of time points for the cell. - - cosine_similarities (list): Cosine similarities between the embedding - at the first time point and each subsequent time point. - """ - # Filter the dataset for the specific infected cell + """Extract embeddings and calculate cosine similarities for a specific cell""" filtered_data = embedding_dataset.where( (embedding_dataset["fov_name"] == fov_name) & (embedding_dataset["track_id"] == track_id), drop=True, ) - - # Extract the feature embeddings and time points features = filtered_data["features"].values # (sample, features) time_points = filtered_data["t"].values # (sample,) - - # Get the first time point's embedding first_time_point_embedding = features[0].reshape(1, -1) - - # Calculate cosine similarity between each time point and the first time point - cosine_similarities = [] - for i in range(len(time_points)): - similarity = cosine_similarity( - first_time_point_embedding, features[i].reshape(1, -1) - ) - cosine_similarities.append(similarity[0][0]) - - return time_points, cosine_similarities - - -def compute_displacement_mean_std( - embedding_dataset, max_tau=10, use_cosine=False, use_dissimilarity=False -): - """ - Compute the mean and standard deviation of displacements between embeddings at time t and t + tau for each tau. - - Parameters: - embedding_dataset : xarray.Dataset - The dataset containing embeddings, timepoints, fov_name, and track_id. - max_tau : int, optional - The maximum tau value to compute displacements for. Default is 10. - use_cosine : bool, optional - If True, compute cosine similarity instead of Euclidean distance. Default is False. - use_dissimilarity : bool, optional - If True and use_cosine is True, compute cosine dissimilarity (1 - similarity). - Default is False. - - Returns: - tuple - - mean_displacement_per_tau (dict): Mean displacement for each tau. - - std_displacement_per_tau (dict): Standard deviation of displacements for each tau. - """ - # Get the arrays of (fov_name, track_id, t, and embeddings) - fov_names = embedding_dataset["fov_name"].values - track_ids = embedding_dataset["track_id"].values - timepoints = embedding_dataset["t"].values - embeddings = embedding_dataset["features"].values - - # Dictionary to store displacements for each tau - displacement_per_tau = defaultdict(list) - - # Iterate over all entries in the dataset - for i in range(len(fov_names)): - fov_name = fov_names[i] - track_id = track_ids[i] - current_time = timepoints[i] - current_embedding = embeddings[i] - - # For each time point t, compute displacements for t + tau - for tau in range(1, max_tau + 1): - future_time = current_time + tau - - # Find if future_time exists for the same (fov_name, track_id) - matching_indices = np.where( - (fov_names == fov_name) - & (track_ids == track_id) - & (timepoints == future_time) - )[0] - - if len(matching_indices) == 1: - # Get the embedding at t + tau - future_embedding = embeddings[matching_indices[0]] - - if use_cosine: - # Compute cosine similarity - similarity = cosine_similarity( - current_embedding.reshape(1, -1), - future_embedding.reshape(1, -1), - )[0][0] - # Choose whether to use similarity or dissimilarity - if use_dissimilarity: - displacement = 1 - similarity # Cosine dissimilarity - else: - displacement = similarity # Cosine similarity - else: - # Compute the Euclidean distance, elementwise square on difference - displacement = np.sum((current_embedding - future_embedding) ** 2) - - # Store the displacement for the given tau - displacement_per_tau[tau].append(displacement) - - # Compute mean and std displacement for each tau by averaging the displacements - mean_displacement_per_tau = { - tau: np.mean(displacements) - for tau, displacements in displacement_per_tau.items() - } - std_displacement_per_tau = { - tau: np.std(displacements) - for tau, displacements in displacement_per_tau.items() - } - - return mean_displacement_per_tau, std_displacement_per_tau + cosine_similarities = cosine_similarity( + first_time_point_embedding, features + ).flatten() + return time_points, cosine_similarities.tolist() def compute_displacement( @@ -137,55 +25,30 @@ def compute_displacement( use_cosine=False, use_dissimilarity=False, use_umap=False, + return_mean_std=False, ): - """ - Compute the displacements between embeddings at time t and t + tau for each tau. - - Parameters: - embedding_dataset : xarray.Dataset - The dataset containing embeddings, timepoints, fov_name, and track_id. - max_tau : int, optional - The maximum tau value to compute displacements for. Default is 10. - use_cosine : bool, optional - If True, compute cosine similarity instead of Euclidean distance. Default is False. - use_dissimilarity : bool, optional - If True and use_cosine is True, compute cosine dissimilarity (1 - similarity). - Default is False. - use_umap : bool, optional - If True, use UMAP embeddings instead of feature embeddings. Default is False. - - Returns: - dict - A dictionary where the key is tau and the value is a list of displacements - for all cells at that tau. - """ - # Get the arrays of (fov_name, track_id, t, and embeddings) + """Compute the norm of differences between embeddings at t and t + tau""" fov_names = embedding_dataset["fov_name"].values track_ids = embedding_dataset["track_id"].values timepoints = embedding_dataset["t"].values if use_umap: - umap1 = embedding_dataset["UMAP1"].values - umap2 = embedding_dataset["UMAP2"].values - embeddings = np.vstack((umap1, umap2)).T + embeddings = np.vstack( + (embedding_dataset["UMAP1"].values, embedding_dataset["UMAP2"].values) + ).T else: embeddings = embedding_dataset["features"].values - # Dictionary to store displacements for each tau displacement_per_tau = defaultdict(list) - # Iterate over all entries in the dataset for i in range(len(fov_names)): fov_name = fov_names[i] track_id = track_ids[i] current_time = timepoints[i] - current_embedding = embeddings[i] + current_embedding = embeddings[i].reshape(1, -1) - # For each time point t, compute displacements for t + tau for tau in range(1, max_tau + 1): future_time = current_time + tau - - # Find if future_time exists for the same (fov_name, track_id) matching_indices = np.where( (fov_names == fov_name) & (track_ids == track_id) @@ -193,86 +56,57 @@ def compute_displacement( )[0] if len(matching_indices) == 1: - # Get the embedding at t + tau - future_embedding = embeddings[matching_indices[0]] + future_embedding = embeddings[matching_indices[0]].reshape(1, -1) if use_cosine: - # Compute cosine similarity - similarity = cosine_similarity( - current_embedding.reshape(1, -1), - future_embedding.reshape(1, -1), - )[0][0] - # Choose whether to use similarity or dissimilarity - if use_dissimilarity: - displacement = 1 - similarity # Cosine dissimilarity - else: - displacement = similarity # Cosine similarity + similarity = cosine_similarity(current_embedding, future_embedding)[ + 0 + ][0] + displacement = 1 - similarity if use_dissimilarity else similarity else: - # Compute the Euclidean distance, elementwise square on difference displacement = np.sum((current_embedding - future_embedding) ** 2) - # Store the displacement for the given tau displacement_per_tau[tau].append(displacement) + if return_mean_std: + mean_displacement_per_tau = { + tau: np.mean(displacements) + for tau, displacements in displacement_per_tau.items() + } + std_displacement_per_tau = { + tau: np.std(displacements) + for tau, displacements in displacement_per_tau.items() + } + return mean_displacement_per_tau, std_displacement_per_tau + return displacement_per_tau -def calculate_normalized_euclidean_distance_cell(embedding_dataset, fov_name, track_id): +def compute_dynamic_range(mean_displacement_per_tau): """ - Calculate the normalized Euclidean distance between the embedding at the first time point and each subsequent time point for a specific cell. + Compute the dynamic range as the difference between the maximum + and minimum mean displacement per τ. Parameters: - embedding_dataset : xarray.Dataset - The dataset containing embeddings, timepoints, fov_name, and track_id. - fov_name : str - Field of view name to identify the specific cell. - track_id : int - Track ID to identify the specific cell. + mean_displacement_per_tau: dict with τ as key and mean displacement as value Returns: - tuple - - time_points (array): Array of time points for the cell. - - euclidean_distances (list): Normalized Euclidean distances between the embedding - at the first time point and each subsequent time point. + float: dynamic range (max displacement - min displacement) """ - filtered_data = embedding_dataset.where( - (embedding_dataset["fov_name"] == fov_name) - & (embedding_dataset["track_id"] == track_id), - drop=True, - ) + displacements = list(mean_displacement_per_tau.values()) + return max(displacements) - min(displacements) - features = filtered_data["features"].values # (sample, features) - time_points = filtered_data["t"].values # (sample,) - - normalized_features = features / np.linalg.norm(features, axis=1, keepdims=True) - - # Get the first time point's normalized embedding - first_time_point_embedding = normalized_features[0].reshape(1, -1) - - euclidean_distances = [] - for i in range(len(time_points)): - distance = np.linalg.norm( - first_time_point_embedding - normalized_features[i].reshape(1, -1) - ) - euclidean_distances.append(distance) - - return time_points, euclidean_distances - -def compute_displacement_mean_std_full(embedding_dataset, max_tau=10): +def compute_rms_per_track(embedding_dataset): """ - Compute the mean and standard deviation of displacements between embeddings at time t and t + tau for all cells. + Compute RMS of the time derivative of embeddings per track. Parameters: embedding_dataset : xarray.Dataset The dataset containing embeddings, timepoints, fov_name, and track_id. - max_tau : int, optional - The maximum tau value to compute displacements for. Default is 10. Returns: - tuple - - mean_displacement_per_tau (dict): Mean displacement for each tau. - - std_displacement_per_tau (dict): Standard deviation of displacements for each tau. + list: A list of RMS values, one for each track. """ fov_names = embedding_dataset["fov_name"].values track_ids = embedding_dataset["track_id"].values @@ -283,90 +117,45 @@ def compute_displacement_mean_std_full(embedding_dataset, max_tau=10): list(zip(fov_names, track_ids)), dtype=[("fov_name", "O"), ("track_id", "int64")], ) - unique_cells = np.unique(cell_identifiers) - displacement_per_tau = defaultdict(list) + rms_values = [] for cell in unique_cells: fov_name = cell["fov_name"] track_id = cell["track_id"] - indices = np.where((fov_names == fov_name) & (track_ids == track_id))[0] - cell_timepoints = timepoints[indices] cell_embeddings = embeddings[indices] + if len(cell_embeddings) < 2: + continue + sorted_indices = np.argsort(cell_timepoints) - cell_timepoints = cell_timepoints[sorted_indices] cell_embeddings = cell_embeddings[sorted_indices] + differences = np.diff(cell_embeddings, axis=0) - for i in range(len(cell_timepoints)): - current_time = cell_timepoints[i] - current_embedding = cell_embeddings[i] - - current_embedding = current_embedding / np.linalg.norm(current_embedding) - for tau in range(0, max_tau + 1): - future_time = current_time + tau + if differences.shape[0] == 0: + continue - future_index = np.where(cell_timepoints == future_time)[0] + norms = np.linalg.norm(differences, axis=1) + rms = np.sqrt(np.mean(norms**2)) + rms_values.append(rms) - if len(future_index) >= 1: + return rms_values - future_embedding = cell_embeddings[future_index[0]] - future_embedding = future_embedding / np.linalg.norm( - future_embedding - ) - distance = np.linalg.norm(current_embedding - future_embedding) - - displacement_per_tau[tau].append(distance) - - mean_displacement_per_tau = { - tau: np.mean(displacements) - for tau, displacements in displacement_per_tau.items() - } - std_displacement_per_tau = { - tau: np.std(displacements) - for tau, displacements in displacement_per_tau.items() - } - - return mean_displacement_per_tau, std_displacement_per_tau - - -# Function to compute metrics for dynamic range and smoothness -def compute_dynamic_smoothness_metrics(mean_displacement_per_tau): - """ - Compute dynamic range and smoothness metrics for displacement curves. - - Parameters: - mean_displacement_per_tau: dict with tau as key and mean displacement as value - - Returns: - tuple: (dynamic_range, smoothness) - - dynamic_range: max displacement - min displacement - - smoothness: RMS of second differences of normalized curve - """ - taus = np.array(sorted(mean_displacement_per_tau.keys())) - displacements = np.array([mean_displacement_per_tau[tau] for tau in taus]) - - dynamic_range = np.max(displacements) - np.min(displacements) - - if np.max(displacements) != np.min(displacements): - displacements_normalized = (displacements - np.min(displacements)) / ( - np.max(displacements) - np.min(displacements) - ) - else: - displacements_normalized = displacements - np.min( - displacements - ) # Handle constant case - - first_diff = np.diff(displacements_normalized) - - second_diff = np.diff(first_diff) - - # Compute RMS of second differences as smoothness metric - # Lower values indicate smoother curves - smoothness = np.sqrt(np.mean(second_diff**2)) - - return dynamic_range, smoothness +def calculate_normalized_euclidean_distance_cell(embedding_dataset, fov_name, track_id): + filtered_data = embedding_dataset.where( + (embedding_dataset["fov_name"] == fov_name) + & (embedding_dataset["track_id"] == track_id), + drop=True, + ) + features = filtered_data["features"].values # (sample, features) + time_points = filtered_data["t"].values # (sample,) + normalized_features = features / np.linalg.norm(features, axis=1, keepdims=True) + first_time_point_embedding = normalized_features[0].reshape(1, -1) + euclidean_distances = np.linalg.norm( + first_time_point_embedding - normalized_features, axis=1 + ) + return time_points, euclidean_distances.tolist()